summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xAndroid.mk8
-rwxr-xr-xcommon/arm/ih264_arm_memory_barrier.s77
-rwxr-xr-xcommon/arm/ih264_deblk_chroma_a9.s1337
-rwxr-xr-xcommon/arm/ih264_deblk_luma_a9.s1092
-rwxr-xr-xcommon/arm/ih264_default_weighted_pred_a9q.s359
-rwxr-xr-xcommon/arm/ih264_ihadamard_scaling_a9.s250
-rwxr-xr-xcommon/arm/ih264_inter_pred_chroma_a9q.s254
-rwxr-xr-xcommon/arm/ih264_inter_pred_filters_luma_horz_a9q.s245
-rwxr-xr-xcommon/arm/ih264_inter_pred_filters_luma_vert_a9q.s301
-rwxr-xr-xcommon/arm/ih264_inter_pred_luma_bilinear_a9q.s398
-rwxr-xr-xcommon/arm/ih264_inter_pred_luma_copy_a9q.s253
-rwxr-xr-xcommon/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s441
-rwxr-xr-xcommon/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s1044
-rwxr-xr-xcommon/arm/ih264_inter_pred_luma_horz_qpel_a9q.s266
-rwxr-xr-xcommon/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s505
-rwxr-xr-xcommon/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s355
-rwxr-xr-xcommon/arm/ih264_inter_pred_luma_vert_qpel_a9q.s330
-rwxr-xr-xcommon/arm/ih264_intra_pred_chroma_a9q.s551
-rwxr-xr-xcommon/arm/ih264_intra_pred_luma_16x16_a9q.s520
-rwxr-xr-xcommon/arm/ih264_intra_pred_luma_4x4_a9q.s842
-rwxr-xr-xcommon/arm/ih264_intra_pred_luma_8x8_a9q.s1037
-rwxr-xr-xcommon/arm/ih264_iquant_itrans_recon_a9.s871
-rwxr-xr-xcommon/arm/ih264_iquant_itrans_recon_dc_a9.s399
-rwxr-xr-xcommon/arm/ih264_itrans_recon_a9.s216
-rwxr-xr-xcommon/arm/ih264_mem_fns_neon.s268
-rwxr-xr-xcommon/arm/ih264_padding_neon.s646
-rwxr-xr-xcommon/arm/ih264_platform_macros.h152
-rwxr-xr-xcommon/arm/ih264_resi_trans_a9.s604
-rwxr-xr-xcommon/arm/ih264_resi_trans_quant_a9.s694
-rwxr-xr-xcommon/arm/ih264_weighted_bi_pred_a9q.s642
-rwxr-xr-xcommon/arm/ih264_weighted_pred_a9q.s479
-rwxr-xr-xcommon/armv8/ih264_deblk_chroma_av8.s585
-rwxr-xr-xcommon/armv8/ih264_deblk_luma_av8.s1084
-rwxr-xr-xcommon/armv8/ih264_default_weighted_pred_av8.s353
-rwxr-xr-xcommon/armv8/ih264_ihadamard_scaling_av8.s250
-rwxr-xr-xcommon/armv8/ih264_inter_pred_chroma_av8.s392
-rwxr-xr-xcommon/armv8/ih264_inter_pred_filters_luma_horz_av8.s530
-rwxr-xr-xcommon/armv8/ih264_inter_pred_filters_luma_vert_av8.s452
-rwxr-xr-xcommon/armv8/ih264_inter_pred_luma_copy_av8.s267
-rwxr-xr-xcommon/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s820
-rwxr-xr-xcommon/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s1120
-rwxr-xr-xcommon/armv8/ih264_inter_pred_luma_horz_qpel_av8.s597
-rwxr-xr-xcommon/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s910
-rwxr-xr-xcommon/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s958
-rwxr-xr-xcommon/armv8/ih264_inter_pred_luma_vert_qpel_av8.s511
-rwxr-xr-xcommon/armv8/ih264_intra_pred_chroma_av8.s574
-rwxr-xr-xcommon/armv8/ih264_intra_pred_luma_16x16_av8.s606
-rwxr-xr-xcommon/armv8/ih264_intra_pred_luma_4x4_av8.s876
-rwxr-xr-xcommon/armv8/ih264_intra_pred_luma_8x8_av8.s1084
-rwxr-xr-xcommon/armv8/ih264_iquant_itrans_recon_av8.s778
-rwxr-xr-xcommon/armv8/ih264_iquant_itrans_recon_dc_av8.s397
-rwxr-xr-xcommon/armv8/ih264_mem_fns_neon_av8.s274
-rwxr-xr-xcommon/armv8/ih264_neon_macros.s41
-rwxr-xr-xcommon/armv8/ih264_padding_neon_av8.s784
-rwxr-xr-xcommon/armv8/ih264_platform_macros.h152
-rwxr-xr-xcommon/armv8/ih264_resi_trans_quant_av8.s731
-rwxr-xr-xcommon/armv8/ih264_weighted_bi_pred_av8.s574
-rwxr-xr-xcommon/armv8/ih264_weighted_pred_av8.s471
-rwxr-xr-xcommon/ih264_buf_mgr.c696
-rwxr-xr-xcommon/ih264_buf_mgr.h122
-rwxr-xr-xcommon/ih264_cabac_tables.c10869
-rwxr-xr-xcommon/ih264_cabac_tables.h101
-rwxr-xr-xcommon/ih264_cavlc_tables.c282
-rwxr-xr-xcommon/ih264_cavlc_tables.h133
-rwxr-xr-xcommon/ih264_chroma_intra_pred_filters.c478
-rwxr-xr-xcommon/ih264_common_tables.c725
-rwxr-xr-xcommon/ih264_common_tables.h136
-rwxr-xr-xcommon/ih264_deblk_edge_filters.c2087
-rwxr-xr-xcommon/ih264_deblk_edge_filters.h195
-rwxr-xr-xcommon/ih264_deblk_tables.c119
-rwxr-xr-xcommon/ih264_deblk_tables.h73
-rwxr-xr-xcommon/ih264_debug.h61
-rwxr-xr-xcommon/ih264_defs.h690
-rwxr-xr-xcommon/ih264_disp_mgr.c186
-rwxr-xr-xcommon/ih264_disp_mgr.h70
-rwxr-xr-xcommon/ih264_dpb_mgr.c1176
-rwxr-xr-xcommon/ih264_dpb_mgr.h186
-rwxr-xr-xcommon/ih264_error.h68
-rwxr-xr-xcommon/ih264_ihadamard_scaling.c216
-rwxr-xr-xcommon/ih264_inter_pred_filters.c1042
-rwxr-xr-xcommon/ih264_inter_pred_filters.h241
-rwxr-xr-xcommon/ih264_intra_pred_filters.h331
-rwxr-xr-xcommon/ih264_iquant_itrans_recon.c873
-rwxr-xr-xcommon/ih264_itrans_recon.h71
-rwxr-xr-xcommon/ih264_list.c574
-rwxr-xr-xcommon/ih264_list.h93
-rwxr-xr-xcommon/ih264_luma_intra_pred_filters.c1933
-rwxr-xr-xcommon/ih264_macros.h110
-rwxr-xr-xcommon/ih264_mem_fns.c176
-rwxr-xr-xcommon/ih264_mem_fns.h126
-rwxr-xr-xcommon/ih264_padding.c331
-rwxr-xr-xcommon/ih264_padding.h74
-rwxr-xr-xcommon/ih264_resi_trans.h70
-rwxr-xr-xcommon/ih264_resi_trans_quant.c814
-rwxr-xr-xcommon/ih264_size_defs.h85
-rwxr-xr-xcommon/ih264_structs.h1722
-rwxr-xr-xcommon/ih264_trans_data.c312
-rwxr-xr-xcommon/ih264_trans_data.h125
-rwxr-xr-xcommon/ih264_trans_macros.h124
-rwxr-xr-xcommon/ih264_trans_quant_itrans_iquant.h232
-rwxr-xr-xcommon/ih264_typedefs.h64
-rwxr-xr-xcommon/ih264_weighted_pred.c495
-rwxr-xr-xcommon/ih264_weighted_pred.h164
-rwxr-xr-xcommon/ithread.c604
-rwxr-xr-xcommon/ithread.h104
-rwxr-xr-xcommon/mips/ih264_platform_macros.h102
-rwxr-xr-xcommon/x86/ih264_chroma_intra_pred_filters_ssse3.c433
-rwxr-xr-xcommon/x86/ih264_deblk_chroma_ssse3.c1087
-rwxr-xr-xcommon/x86/ih264_deblk_luma_ssse3.c2012
-rwxr-xr-xcommon/x86/ih264_ihadamard_scaling_sse42.c238
-rwxr-xr-xcommon/x86/ih264_ihadamard_scaling_ssse3.c200
-rwxr-xr-xcommon/x86/ih264_inter_pred_filters_ssse3.c4375
-rwxr-xr-xcommon/x86/ih264_iquant_itrans_recon_dc_ssse3.c437
-rwxr-xr-xcommon/x86/ih264_iquant_itrans_recon_sse42.c554
-rwxr-xr-xcommon/x86/ih264_iquant_itrans_recon_ssse3.c1035
-rwxr-xr-xcommon/x86/ih264_luma_intra_pred_filters_ssse3.c2282
-rwxr-xr-xcommon/x86/ih264_mem_fns_ssse3.c169
-rwxr-xr-xcommon/x86/ih264_padding_ssse3.c335
-rwxr-xr-xcommon/x86/ih264_platform_macros.h114
-rwxr-xr-xcommon/x86/ih264_resi_trans_quant_sse42.c984
-rwxr-xr-xcommon/x86/ih264_weighted_pred_sse42.c1349
-rwxr-xr-xdecoder.arm.mk44
-rwxr-xr-xdecoder.arm64.mk46
-rwxr-xr-xdecoder.mips.mk6
-rwxr-xr-xdecoder.mips64.mk6
-rwxr-xr-xdecoder.mk76
-rwxr-xr-xdecoder.x86.mk26
-rwxr-xr-xdecoder.x86_64.mk30
-rwxr-xr-xdecoder/arm/ih264d_function_selector.c101
-rwxr-xr-xdecoder/arm/ih264d_function_selector_a9q.c200
-rwxr-xr-xdecoder/arm/ih264d_function_selector_av8.c191
-rwxr-xr-xdecoder/ih264d.h482
-rwxr-xr-xdecoder/ih264d_api.c4680
-rwxr-xr-xdecoder/ih264d_bitstrm.c181
-rwxr-xr-xdecoder/ih264d_bitstrm.h195
-rwxr-xr-xdecoder/ih264d_cabac.c779
-rwxr-xr-xdecoder/ih264d_cabac.h267
-rwxr-xr-xdecoder/ih264d_cabac_init_tables.c9273
-rwxr-xr-xdecoder/ih264d_compute_bs.c2394
-rwxr-xr-xdecoder/ih264d_deblocking.c2134
-rwxr-xr-xdecoder/ih264d_deblocking.h173
-rwxr-xr-xdecoder/ih264d_debug.c40
-rwxr-xr-xdecoder/ih264d_debug.h135
-rwxr-xr-xdecoder/ih264d_defs.h671
-rwxr-xr-xdecoder/ih264d_dpb_manager.h173
-rwxr-xr-xdecoder/ih264d_dpb_mgr.c1987
-rwxr-xr-xdecoder/ih264d_error_handler.h115
-rwxr-xr-xdecoder/ih264d_format_conv.c838
-rwxr-xr-xdecoder/ih264d_format_conv.h120
-rwxr-xr-xdecoder/ih264d_function_selector.h75
-rwxr-xr-xdecoder/ih264d_function_selector_generic.c222
-rwxr-xr-xdecoder/ih264d_inter_pred.c1614
-rwxr-xr-xdecoder/ih264d_inter_pred.h93
-rwxr-xr-xdecoder/ih264d_mb_utils.c1496
-rwxr-xr-xdecoder/ih264d_mb_utils.h293
-rwxr-xr-xdecoder/ih264d_mem_request.h82
-rwxr-xr-xdecoder/ih264d_mvpred.c1193
-rwxr-xr-xdecoder/ih264d_mvpred.h153
-rwxr-xr-xdecoder/ih264d_nal.c393
-rwxr-xr-xdecoder/ih264d_nal.h56
-rwxr-xr-xdecoder/ih264d_parse_bslice.c1696
-rwxr-xr-xdecoder/ih264d_parse_cabac.c1607
-rwxr-xr-xdecoder/ih264d_parse_cabac.h60
-rwxr-xr-xdecoder/ih264d_parse_cavlc.c2694
-rwxr-xr-xdecoder/ih264d_parse_cavlc.h165
-rwxr-xr-xdecoder/ih264d_parse_headers.c1204
-rwxr-xr-xdecoder/ih264d_parse_headers.h46
-rwxr-xr-xdecoder/ih264d_parse_islice.c1479
-rwxr-xr-xdecoder/ih264d_parse_islice.h113
-rwxr-xr-xdecoder/ih264d_parse_mb_header.c1397
-rwxr-xr-xdecoder/ih264d_parse_mb_header.h88
-rwxr-xr-xdecoder/ih264d_parse_pslice.c1760
-rwxr-xr-xdecoder/ih264d_parse_slice.c1887
-rwxr-xr-xdecoder/ih264d_parse_slice.h47
-rwxr-xr-xdecoder/ih264d_process_bslice.c2345
-rwxr-xr-xdecoder/ih264d_process_bslice.h108
-rwxr-xr-xdecoder/ih264d_process_intra_mb.c2006
-rwxr-xr-xdecoder/ih264d_process_intra_mb.h65
-rwxr-xr-xdecoder/ih264d_process_pslice.c1139
-rwxr-xr-xdecoder/ih264d_process_pslice.h69
-rwxr-xr-xdecoder/ih264d_quant_scaling.c274
-rwxr-xr-xdecoder/ih264d_quant_scaling.h37
-rwxr-xr-xdecoder/ih264d_sei.c386
-rwxr-xr-xdecoder/ih264d_sei.h91
-rwxr-xr-xdecoder/ih264d_structs.h1582
-rwxr-xr-xdecoder/ih264d_tables.c872
-rwxr-xr-xdecoder/ih264d_tables.h157
-rwxr-xr-xdecoder/ih264d_thread_compute_bs.c802
-rwxr-xr-xdecoder/ih264d_thread_compute_bs.h34
-rwxr-xr-xdecoder/ih264d_thread_parse_decode.c732
-rwxr-xr-xdecoder/ih264d_thread_parse_decode.h48
-rwxr-xr-xdecoder/ih264d_transfer_address.h45
-rwxr-xr-xdecoder/ih264d_utils.c2625
-rwxr-xr-xdecoder/ih264d_utils.h101
-rwxr-xr-xdecoder/ih264d_vui.c233
-rwxr-xr-xdecoder/ih264d_vui.h96
-rwxr-xr-xdecoder/iv.h420
-rwxr-xr-xdecoder/ivd.h585
-rwxr-xr-xdecoder/mips/ih264d_function_selector.c66
-rwxr-xr-xdecoder/x86/ih264d_function_selector.c94
-rwxr-xr-xdecoder/x86/ih264d_function_selector_sse42.c95
-rwxr-xr-xdecoder/x86/ih264d_function_selector_ssse3.c181
-rwxr-xr-xencoder.arm.mk47
-rwxr-xr-xencoder.arm64.mk48
-rwxr-xr-xencoder.mips.mk7
-rwxr-xr-xencoder.mips64.mk7
-rwxr-xr-xencoder.mk90
-rwxr-xr-xencoder.x86.mk37
-rwxr-xr-xencoder.x86_64.mk35
-rwxr-xr-xencoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s313
-rwxr-xr-xencoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s529
-rwxr-xr-xencoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s346
-rwxr-xr-xencoder/arm/ih264e_fmt_conv.s329
-rwxr-xr-xencoder/arm/ih264e_function_selector.c170
-rwxr-xr-xencoder/arm/ih264e_function_selector_a9q.c252
-rwxr-xr-xencoder/arm/ih264e_function_selector_av8.c259
-rwxr-xr-xencoder/arm/ih264e_half_pel.s951
-rwxr-xr-xencoder/arm/ih264e_platform_macros.h143
-rwxr-xr-xencoder/arm/ime_distortion_metrics_a9q.s1353
-rwxr-xr-xencoder/arm/ime_platform_macros.h51
-rwxr-xr-xencoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s592
-rwxr-xr-xencoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s467
-rwxr-xr-xencoder/armv8/ih264e_half_pel_av8.s1024
-rwxr-xr-xencoder/armv8/ih264e_platform_macros.h143
-rwxr-xr-xencoder/armv8/ime_distortion_metrics_av8.s978
-rwxr-xr-xencoder/armv8/ime_platform_macros.h51
-rwxr-xr-xencoder/ih264e.h620
-rwxr-xr-xencoder/ih264e_api.c5559
-rwxr-xr-xencoder/ih264e_bitstream.c472
-rwxr-xr-xencoder/ih264e_bitstream.h401
-rwxr-xr-xencoder/ih264e_cavlc.c1448
-rwxr-xr-xencoder/ih264e_cavlc.h112
-rwxr-xr-xencoder/ih264e_config.h52
-rwxr-xr-xencoder/ih264e_core_coding.c2365
-rwxr-xr-xencoder/ih264e_core_coding.h653
-rwxr-xr-xencoder/ih264e_deblk.c854
-rwxr-xr-xencoder/ih264e_deblk.h99
-rwxr-xr-xencoder/ih264e_debug.h65
-rwxr-xr-xencoder/ih264e_defs.h538
-rwxr-xr-xencoder/ih264e_encode.c580
-rwxr-xr-xencoder/ih264e_encode_header.c1187
-rwxr-xr-xencoder/ih264e_encode_header.h278
-rwxr-xr-xencoder/ih264e_error.h229
-rwxr-xr-xencoder/ih264e_fmt_conv.c864
-rwxr-xr-xencoder/ih264e_fmt_conv.h142
-rwxr-xr-xencoder/ih264e_function_selector_generic.c259
-rwxr-xr-xencoder/ih264e_globals.c261
-rwxr-xr-xencoder/ih264e_globals.h192
-rwxr-xr-xencoder/ih264e_half_pel.c226
-rwxr-xr-xencoder/ih264e_half_pel.h162
-rwxr-xr-xencoder/ih264e_intra_modes_eval.c2296
-rwxr-xr-xencoder/ih264e_intra_modes_eval.h418
-rwxr-xr-xencoder/ih264e_list.h42
-rwxr-xr-xencoder/ih264e_master.h132
-rwxr-xr-xencoder/ih264e_mc.c320
-rwxr-xr-xencoder/ih264e_mc.h104
-rwxr-xr-xencoder/ih264e_me.c1153
-rwxr-xr-xencoder/ih264e_me.h278
-rwxr-xr-xencoder/ih264e_modify_frm_rate.c240
-rwxr-xr-xencoder/ih264e_modify_frm_rate.h182
-rwxr-xr-xencoder/ih264e_process.c2369
-rwxr-xr-xencoder/ih264e_process.h364
-rwxr-xr-xencoder/ih264e_rate_control.c801
-rwxr-xr-xencoder/ih264e_rate_control.h351
-rwxr-xr-xencoder/ih264e_rc_mem_interface.c395
-rwxr-xr-xencoder/ih264e_rc_mem_interface.h179
-rwxr-xr-xencoder/ih264e_statistics.h141
-rwxr-xr-xencoder/ih264e_structs.h2566
-rwxr-xr-xencoder/ih264e_time_stamp.c748
-rwxr-xr-xencoder/ih264e_time_stamp.h498
-rwxr-xr-xencoder/ih264e_trace.h161
-rwxr-xr-xencoder/ih264e_trace_support.h61
-rwxr-xr-xencoder/ih264e_utils.c1804
-rwxr-xr-xencoder/ih264e_utils.h327
-rwxr-xr-xencoder/ih264e_version.c143
-rwxr-xr-xencoder/ih264e_version.h64
-rwxr-xr-xencoder/ime.c836
-rwxr-xr-xencoder/ime.h209
-rwxr-xr-xencoder/ime_defs.h59
-rwxr-xr-xencoder/ime_distortion_metrics.c1262
-rwxr-xr-xencoder/ime_distortion_metrics.h170
-rwxr-xr-xencoder/ime_macros.h44
-rwxr-xr-xencoder/ime_statistics.h86
-rwxr-xr-xencoder/ime_structs.h305
-rwxr-xr-xencoder/ime_typedefs.h50
-rwxr-xr-xencoder/irc_bit_allocation.c859
-rwxr-xr-xencoder/irc_bit_allocation.h99
-rwxr-xr-xencoder/irc_cbr_buffer_control.c653
-rwxr-xr-xencoder/irc_cbr_buffer_control.h104
-rwxr-xr-xencoder/irc_cntrl_param.h59
-rwxr-xr-xencoder/irc_common.h104
-rwxr-xr-xencoder/irc_datatypes.h64
-rwxr-xr-xencoder/irc_est_sad.c260
-rwxr-xr-xencoder/irc_est_sad.h64
-rwxr-xr-xencoder/irc_fixed_point_error_bits.c185
-rwxr-xr-xencoder/irc_fixed_point_error_bits.h64
-rwxr-xr-xencoder/irc_frame_info_collector.c177
-rwxr-xr-xencoder/irc_frame_info_collector.h109
-rwxr-xr-xencoder/irc_mb_model_based.c157
-rwxr-xr-xencoder/irc_mb_model_based.h57
-rwxr-xr-xencoder/irc_mem_req_and_acq.h179
-rwxr-xr-xencoder/irc_picture_type.c1585
-rwxr-xr-xencoder/irc_picture_type.h95
-rwxr-xr-xencoder/irc_rate_control_api.c1600
-rwxr-xr-xencoder/irc_rate_control_api.h188
-rwxr-xr-xencoder/irc_rate_control_api_structs.h93
-rwxr-xr-xencoder/irc_rd_model.c565
-rwxr-xr-xencoder/irc_rd_model.h98
-rwxr-xr-xencoder/irc_rd_model_struct.h75
-rwxr-xr-xencoder/irc_trace_support.h61
-rwxr-xr-xencoder/irc_vbr_storage_vbv.c368
-rwxr-xr-xencoder/irc_vbr_storage_vbv.h119
-rwxr-xr-xencoder/irc_vbr_str_prms.c199
-rwxr-xr-xencoder/irc_vbr_str_prms.h65
-rwxr-xr-xencoder/ithread.h101
-rwxr-xr-xencoder/iv2.h386
-rwxr-xr-xencoder/ive2.h1445
-rwxr-xr-xencoder/mips/ih264e_function_selector.c110
-rwxr-xr-xencoder/mips/ih264e_platform_macros.h135
-rwxr-xr-xencoder/mips/ime_platform_macros.h52
-rwxr-xr-xencoder/x86/ih264e_function_selector.c141
-rwxr-xr-xencoder/x86/ih264e_function_selector_sse42.c146
-rwxr-xr-xencoder/x86/ih264e_function_selector_ssse3.c190
-rwxr-xr-xencoder/x86/ih264e_half_pel_ssse3.c487
-rwxr-xr-xencoder/x86/ih264e_intra_modes_eval_ssse3.c1259
-rwxr-xr-xencoder/x86/ih264e_platform_macros.h154
-rwxr-xr-xencoder/x86/ime_distortion_metrics_sse42.c1940
-rwxr-xr-xencoder/x86/ime_platform_macros.h52
-rwxr-xr-xtest/Android.mk8
-rwxr-xr-xtest/decoder.mk13
-rwxr-xr-xtest/decoder/main.c3196
-rwxr-xr-xtest/encoder.mk13
-rwxr-xr-xtest/encoder/app.h348
-rwxr-xr-xtest/encoder/input.c312
-rwxr-xr-xtest/encoder/main.c2512
-rwxr-xr-xtest/encoder/output.c109
-rwxr-xr-xtest/encoder/psnr.c242
-rwxr-xr-xtest/encoder/psnr.h62
-rwxr-xr-xtest/encoder/recon.c221
339 files changed, 204373 insertions, 0 deletions
diff --git a/Android.mk b/Android.mk
new file mode 100755
index 0000000..0085832
--- /dev/null
+++ b/Android.mk
@@ -0,0 +1,8 @@
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+# encoder
+include $(LOCAL_PATH)/encoder.mk
+
+# decoder
+include $(LOCAL_PATH)/decoder.mk
diff --git a/common/arm/ih264_arm_memory_barrier.s b/common/arm/ih264_arm_memory_barrier.s
new file mode 100755
index 0000000..523218f
--- /dev/null
+++ b/common/arm/ih264_arm_memory_barrier.s
@@ -0,0 +1,77 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@*******************************************************************************
+@* @file
+@* ih264_arm_memory_barrier.s
+@*
+@* @brief
+@* Contains function definitions for data synchronization.
+@*
+@* @author
+@* Ittiam
+@*
+@* @par List of Functions:
+@*
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+
+.text
+.p2align 2
+
+
+@*****************************************************************************
+@*
+@* Function Name : ih264_arm_dsb
+@* Description : Adds DSB
+@* Revision History :
+@* DD MM YYYY Author(s) Changes
+@* 03 07 2008 100355 First version
+@*
+@*****************************************************************************
+
+ .global ih264_arm_dsb
+ih264_arm_dsb:
+ dsb
+ bx lr
+
+
+
+@*****************************************************************************
+@*
+@* Function Name : ih264_arm_dmb
+@* Description : Adds DMB
+@* Revision History :
+@* DD MM YYYY Author(s) Changes
+@* 03 07 2008 100355 First version
+@*
+@*****************************************************************************
+
+ .global ih264_arm_dmb
+
+ih264_arm_dmb:
+ dmb
+ bx lr
+
+
+
diff --git a/common/arm/ih264_deblk_chroma_a9.s b/common/arm/ih264_deblk_chroma_a9.s
new file mode 100755
index 0000000..66102a7
--- /dev/null
+++ b/common/arm/ih264_deblk_chroma_a9.s
@@ -0,0 +1,1337 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/*****************************************************************************/
+@/* */
+@/* File Name : ih264_deblk_chroma_a9.s */
+@/* */
+@/* Description : Contains function definitions for deblocking luma */
+@/* edge. Functions are coded in NEON assembly and can */
+@/* be compiled using ARM RVDS. */
+@/* */
+@/* List of Functions : ih264_deblk_chroma_vert_bs4_bp_a9() */
+@/* ih264_deblk_chroma_vert_bslt4_bp_a9() */
+@/* ih264_deblk_chroma_horz_bs4_bp_a9() */
+@/* ih264_deblk_chroma_horz_bslt4_bp_a9() */
+@/* ih264_deblk_chroma_vert_bs4_mbaff_bp_a9() */
+@/* ih264_deblk_chroma_vert_bslt4_mbaff_bp_a9() */
+@/* ih264_deblk_chroma_vert_bs4_a9() */
+@/* ih264_deblk_chroma_vert_bslt4_a9() */
+@/* ih264_deblk_chroma_horz_bs4_a9() */
+@/* ih264_deblk_chroma_horz_bslt4_a9() */
+@/* ih264_deblk_chroma_vert_bs4_mbaff_a9() */
+@/* ih264_deblk_chroma_vert_bslt4_mbaff_a9() */
+@/* */
+@/* Issues / Problems : None */
+@/* */
+@/* Revision History : */
+@/* */
+@/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+@/* 28 11 2013 Ittiam Draft */
+@/* 05 01 2015 Kaushik Added double-call functions for */
+@/* Senthoor vertical deblocking, and high */
+@/* profile functions. */
+@/* */
+@/*****************************************************************************/
+
+
+.text
+.p2align 2
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Performs filtering of a chroma block horizontal edge when the
+@* boundary strength is set to 4
+@*
+@* @par Description:
+@* This operation is described in Sec. 8.7.2.4 under the title
+@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+@*
+@* @param[in] r0 - pu1_src
+@* Pointer to the src sample q0
+@*
+@* @param[in] r1 - src_strd
+@* Source stride
+@*
+@* @param[in] r2 - alpha
+@* Alpha Value for the boundary
+@*
+@* @param[in] r3 - beta
+@* Beta Value for the boundary
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+ .global ih264_deblk_chroma_horz_bs4_bp_a9
+
+ih264_deblk_chroma_horz_bs4_bp_a9:
+
+ stmfd sp!, {r4, lr} @
+ vpush {d8 - d15}
+ sub r0, r0, r1, lsl #1 @R0 = uc_edgePixel pointing to p1 of chroma
+ vld2.8 {d6, d7}, [r0], r1 @D6 = p1u , D7 = p1v
+ mov r4, r0 @Keeping a backup of the pointer p0 of chroma
+ vld2.8 {d4, d5}, [r0], r1 @D4 = p0u , D5 = p0v
+ vdup.8 q10, r2 @Q10 contains alpha
+ vld2.8 {d0, d1}, [r0], r1 @D0 = q0u , D1 = q0v
+ vaddl.u8 q4, d6, d0 @
+ vaddl.u8 q5, d7, d1 @Q4,Q5 = q0 + p1
+ vmov.i8 d31, #2 @
+ vld2.8 {d2, d3}, [r0] @D2 = q1u , D3 = q1v
+ vabd.u8 q13, q3, q2 @Q13 = ABS(p1 - p0)
+ vmlal.u8 q4, d2, d31 @
+ vmlal.u8 q5, d3, d31 @Q5,Q4 = (X2(q1U) + q0U + p1U)
+ vabd.u8 q11, q2, q0 @Q11 = ABS(p0 - q0)
+ vabd.u8 q12, q1, q0 @Q12 = ABS(q1 - q0)
+ vaddl.u8 q7, d4, d2 @
+ vaddl.u8 q14, d5, d3 @Q14,Q7 = P0 + Q1
+ vdup.8 q8, r3 @Q8 contains beta
+ vmlal.u8 q7, d6, d31 @
+ vmlal.u8 q14, d7, d31 @Q14,Q7 = (X2(p1U) + p0U + q1U)
+ vcge.u8 q9, q11, q10 @Q9 = ( ABS(p0 - q0) >= Alpha )
+ vcge.u8 q12, q12, q8 @Q12= ( ABS(q1 - q0) >= Beta )
+ vcge.u8 q13, q13, q8 @Q13= ( ABS(p1 - p0) >= Beta )
+ vrshrn.u16 d8, q4, #2 @
+ vrshrn.u16 d9, q5, #2 @Q4 = (X2(q1U) + q0U + p1U + 2) >> 2
+ vorr q9, q9, q12 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta )
+ vrshrn.u16 d10, q7, #2 @
+ vrshrn.u16 d11, q14, #2 @Q5 = (X2(p1U) + p0U + q1U + 2) >> 2
+ vorr q9, q9, q13 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta )
+ vbit q5, q2, q9 @
+ vbit q4, q0, q9 @
+ vst2.8 {d10, d11}, [r4], r1 @
+ vst2.8 {d8, d9}, [r4] @
+ vpop {d8 - d15}
+ ldmfd sp!, {r4, pc} @
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Performs filtering of a chroma block vertical edge when the
+@* boundary strength is set to 4
+@*
+@* @par Description:
+@* This operation is described in Sec. 8.7.2.4 under the title
+@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+@*
+@* @param[in] r0 - pu1_src
+@* Pointer to the src sample q0
+@*
+@* @param[in] r1 - src_strd
+@* Source stride
+@*
+@* @param[in] r2 - alpha
+@* Alpha Value for the boundary
+@*
+@* @param[in] r3 - beta
+@* Beta Value for the boundary
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+ .global ih264_deblk_chroma_vert_bs4_bp_a9
+
+ih264_deblk_chroma_vert_bs4_bp_a9:
+
+ stmfd sp!, {r12, r14}
+ vpush {d8 - d15}
+ sub r0, r0, #4 @point r0 to p1u of row0.
+ mov r12, r0 @keep a back up of r0 for buffer write
+
+ vld4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r1
+ vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r0], r1
+ vld4.16 {d0[2], d2[2], d4[2], d6[2]}, [r0], r1
+ vld4.16 {d0[3], d2[3], d4[3], d6[3]}, [r0], r1
+
+ vld4.16 {d1[0], d3[0], d5[0], d7[0]}, [r0], r1
+ vld4.16 {d1[1], d3[1], d5[1], d7[1]}, [r0], r1
+ vld4.16 {d1[2], d3[2], d5[2], d7[2]}, [r0], r1
+ vld4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0], r1
+
+ vdup.8 q11, r2 @Q4 = alpha
+ vdup.8 q12, r3 @Q5 = beta
+ vmov.i8 d31, #2
+
+ vabd.u8 q4, q1, q2 @|p0-q0|
+ vabd.u8 q5, q3, q2 @|q1-q0|
+ vabd.u8 q6, q0, q1 @|p1-p0|
+ vaddl.u8 q7, d2, d6
+ vaddl.u8 q8, d3, d7 @(p0 + q1)
+ vclt.u8 q4, q4, q11 @|p0-q0| < alpha ?
+ vclt.u8 q5, q5, q12 @|q1-q0| < beta ?
+ vclt.u8 q6, q6, q12 @|p1-p0| < beta ?
+ vmlal.u8 q7, d0, d31
+ vmlal.u8 q8, d1, d31 @2*p1 + (p0 + q1)
+ vaddl.u8 q9, d0, d4
+ vaddl.u8 q10, d1, d5 @(p1 + q0)
+ vand.u8 q4, q4, q5 @|p0-q0| < alpha && |q1-q0| < beta
+ vmlal.u8 q9, d6, d31
+ vmlal.u8 q10, d7, d31 @2*q1 + (p1 + q0)
+
+ vrshrn.i16 d14, q7, #2
+ vrshrn.i16 d15, q8, #2 @(2*p1 + (p0 + q1) + 2) >> 2
+ vand.u8 q4, q4, q6 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta
+ vrshrn.i16 d18, q9, #2
+ vrshrn.i16 d19, q10, #2 @(2*q1 + (p1 + q0) + 2) >> 2
+
+ vbit q1, q7, q4
+ vbit q2, q9, q4
+
+ vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r12], r1
+ vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r12], r1
+ vst4.16 {d0[2], d2[2], d4[2], d6[2]}, [r12], r1
+ vst4.16 {d0[3], d2[3], d4[3], d6[3]}, [r12], r1
+
+ vst4.16 {d1[0], d3[0], d5[0], d7[0]}, [r12], r1
+ vst4.16 {d1[1], d3[1], d5[1], d7[1]}, [r12], r1
+ vst4.16 {d1[2], d3[2], d5[2], d7[2]}, [r12], r1
+ vst4.16 {d1[3], d3[3], d5[3], d7[3]}, [r12], r1
+ vpop {d8 - d15}
+ ldmfd sp!, {r12, pc}
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Performs filtering of a chroma block horizontal edge for cases where the
+@* boundary strength is less than 4
+@*
+@* @par Description:
+@* This operation is described in Sec. 8.7.2.4 under the title
+@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+@*
+@* @param[in] r0 - pu1_src
+@* Pointer to the src sample q0
+@*
+@* @param[in] r1 - src_strd
+@* Source stride
+@*
+@* @param[in] r2 - alpha
+@* Alpha Value for the boundary
+@*
+@* @param[in] r3 - beta
+@* Beta Value for the boundary
+@*
+@* @param[in] sp(0) - u4_bs
+@* Packed Boundary strength array
+@*
+@* @param[in] sp(4) - pu1_cliptab
+@* tc0_table
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+ .global ih264_deblk_chroma_horz_bslt4_bp_a9
+
+ih264_deblk_chroma_horz_bslt4_bp_a9:
+
+ stmfd sp!, {r4-r6, lr} @
+
+ ldrd r4, r5, [sp, #0x10] @r4 = u4_bs , r5 = pu1_cliptab
+ vpush {d8 - d15}
+ sub r0, r0, r1, lsl #1 @R0 = uc_edgePixelU pointing to p2 of chroma U
+ rev r4, r4 @
+ vmov.32 d12[0], r4 @d12[0] = ui_Bs
+ vld1.32 d16[0], [r5] @D16[0] contains cliptab
+ vld2.8 {d6, d7}, [r0], r1 @Q3=p1
+ vtbl.8 d14, {d16}, d12 @
+ vmovl.u8 q6, d12 @q6 = uc_Bs in each 16 bit scalar
+ mov r6, r0 @Keeping a backup of the pointer to chroma U P0
+ vld2.8 {d4, d5}, [r0], r1 @Q2=p0
+ vmov.i8 d30, #1 @
+ vdup.8 q10, r2 @Q10 contains alpha
+ vld2.8 {d0, d1}, [r0], r1 @Q0=q0
+ vmovl.u8 q7, d14 @
+ vld2.8 {d2, d3}, [r0] @Q1=q1
+ vsubl.u8 q5, d1, d5 @
+ vsubl.u8 q4, d0, d4 @Q5,Q4 = (q0 - p0)
+ vabd.u8 q13, q3, q2 @Q13 = ABS(p1 - p0)
+ vshl.i16 q5, q5, #2 @Q5 = (q0 - p0)<<2
+ vabd.u8 q11, q2, q0 @Q11 = ABS(p0 - q0)
+ vshl.i16 q4, q4, #2 @Q4 = (q0 - p0)<<2
+ vsli.16 q7, q7, #8 @
+ vabd.u8 q12, q1, q0 @Q12 = ABS(q1 - q0)
+ vcge.u8 q9, q11, q10 @Q9 = ( ABS(p0 - q0) >= Alpha )
+ vsubl.u8 q10, d6, d2 @Q10 = (p1 - q1)L
+ vsubl.u8 q3, d7, d3 @Q3 = (p1 - q1)H
+ vdup.8 q8, r3 @Q8 contains beta
+ vadd.i16 q4, q4, q10 @
+ vadd.i16 q5, q5, q3 @Q5,Q4 = [ (q0 - p0)<<2 ] + (p1 - q1)
+ vcge.u8 q12, q12, q8 @Q12= ( ABS(q1 - q0) >= Beta )
+ vcgt.s16 d12, d12, #0 @Q6 = (us_Bs > 0)
+ vqrshrn.s16 d8, q4, #3 @
+ vqrshrn.s16 d9, q5, #3 @Q4 = i_macro = (((q0 - p0)<<2) + (p1 - q1) + 4)>>3
+ vadd.i8 d14, d14, d30 @Q7 = C = C0+1
+ vcge.u8 q13, q13, q8 @Q13= ( ABS(p1 - p0) >= Beta )
+ vorr q9, q9, q12 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta )
+ vabs.s8 q3, q4 @Q4 = ABS (i_macro)
+ vmov.i8 d15, d14 @
+ vmov.i8 d13, d12 @
+ vorr q9, q9, q13 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta )
+ vmin.u8 q7, q3, q7 @Q7 = delta = (ABS(i_macro) > C) ? C : ABS(i_macro)
+ vbic q6, q6, q9 @final condition
+ vcge.s8 q4, q4, #0 @Q4 = (i_macro >= 0)
+ vand q7, q7, q6 @Making delta zero in places where values shouldn be filterd
+ vqadd.u8 q8, q2, q7 @Q8 = p0 + delta
+ vqsub.u8 q2, q2, q7 @Q2 = p0 - delta
+ vqadd.u8 q9, q0, q7 @Q9 = q0 + delta
+ vqsub.u8 q0, q0, q7 @Q0 = q0 - delta
+ vbif q8, q2, q4 @Q8 = (i_macro >= 0 ) ? (p0+delta) : (p0-delta)
+ vbif q0, q9, q4 @Q0 = (i_macro >= 0 ) ? (q0-delta) : (q0+delta)
+ vst2.8 {d16, d17}, [r6], r1 @
+ vst2.8 {d0, d1}, [r6] @
+ vpop {d8 - d15}
+ ldmfd sp!, {r4-r6, pc} @
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Performs filtering of a chroma block vertical edge for cases where the
+@* boundary strength is less than 4
+@*
+@* @par Description:
+@* This operation is described in Sec. 8.7.2.4 under the title
+@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+@*
+@* @param[in] r0 - pu1_src
+@* Pointer to the src sample q0
+@*
+@* @param[in] r1 - src_strd
+@* Source stride
+@*
+@* @param[in] r2 - alpha
+@* Alpha Value for the boundary
+@*
+@* @param[in] r3 - beta
+@* Beta Value for the boundary
+@*
+@* @param[in] sp(0) - u4_bs
+@* Packed Boundary strength array
+@*
+@* @param[in] sp(4) - pu1_cliptab
+@* tc0_table
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+ .global ih264_deblk_chroma_vert_bslt4_bp_a9
+
+ih264_deblk_chroma_vert_bslt4_bp_a9:
+
+ stmfd sp!, {r10-r12, r14}
+
+ sub r0, r0, #4 @point r0 to p1u of row0.
+ ldr r11, [sp, #16] @r12 = ui_Bs
+
+ ldr r10, [sp, #20] @r14 = puc_ClipTab
+ mov r12, r0 @keep a back up of r0 for buffer write
+ vpush {d8 - d15}
+ vld4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r1
+ vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r0], r1
+ vld4.16 {d0[2], d2[2], d4[2], d6[2]}, [r0], r1
+ vld4.16 {d0[3], d2[3], d4[3], d6[3]}, [r0], r1
+
+ vld4.16 {d1[0], d3[0], d5[0], d7[0]}, [r0], r1
+ vld4.16 {d1[1], d3[1], d5[1], d7[1]}, [r0], r1
+ vld4.16 {d1[2], d3[2], d5[2], d7[2]}, [r0], r1
+ vld4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0], r1
+
+
+ vdup.8 q11, r2 @Q4 = alpha
+ vabd.u8 q4, q1, q2 @|p0-q0|
+ vdup.8 q12, r3 @Q5 = beta
+ vabd.u8 q5, q3, q2 @|q1-q0|
+ vabd.u8 q6, q0, q1 @|p1-p0|
+ vclt.u8 q4, q4, q11 @|p0-q0| < alpha ?
+ vsubl.u8 q7, d0, d6
+ vclt.u8 q5, q5, q12 @|q1-q0| < beta ?
+ vsubl.u8 q8, d1, d7 @(p1 - q1)
+ vclt.u8 q6, q6, q12 @|p1-p0| < beta ?
+ vsubl.u8 q9, d4, d2
+ vand.u8 q4, q4, q5 @|p0-q0| < alpha && |q1-q0| < beta
+ vsubl.u8 q10, d5, d3 @(q0 - p0)
+ vmov.u16 q14, #4
+ vld1.32 {d24[0]}, [r10] @Load ClipTable
+ rev r11, r11 @Blocking strengths
+ vand.u8 q4, q4, q6 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta
+
+ vmov.32 d10[0], r11
+
+ vmla.s16 q7, q9, q14
+ vmla.s16 q8, q10, q14 @4*(q0 - p0) + (p1 - q1)
+
+ vmovl.u8 q5, d10
+
+
+ vsli.u16 d10, d10, #8
+ vmovl.u16 q5, d10
+ vsli.u32 q5, q5, #16
+ vtbl.8 d12, {d24}, d10
+ vtbl.8 d13, {d24}, d11 @tC0
+ vmov.u8 q12, #1
+ vadd.u8 q6, q6, q12 @tC0 + 1
+ vcge.u8 q5, q5, q12 @u4_bS > 0 ?
+ vand.u8 q4, q4, q5 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0
+
+ @ Q0 - Q3(inputs),
+ @ Q4 (|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0),
+ @ Q6 (tC)
+
+ vrshr.s16 q7, q7, #3
+ vrshr.s16 q8, q8, #3 @(((q0 - p0) << 2) + (p1 - q1) + 4) >> 3)
+
+ vcgt.s16 q9, q7, #0
+ vcgt.s16 q10, q8, #0
+ vmovn.i16 d18, q9
+ vmovn.i16 d19, q10 @Q9 = sign(delta)
+ vabs.s16 q7, q7
+ vabs.s16 q8, q8
+ vmovn.u16 d14, q7
+ vmovn.u16 d15, q8
+ vmin.u8 q7, q7, q6 @Q7 = |delta|
+
+ vqadd.u8 q10, q1, q7 @p0+|delta|
+ vqadd.u8 q11, q2, q7 @q0+|delta|
+ vqsub.u8 q12, q1, q7 @p0-|delta|
+ vqsub.u8 q13, q2, q7 @q0-|delta|
+
+ vbit q12, q10, q9 @p0 + delta
+ vbit q11, q13, q9 @q0 - delta
+
+ vbit q1, q12, q4
+ vbit q2, q11, q4
+
+ vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r12], r1
+ vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r12], r1
+ vst4.16 {d0[2], d2[2], d4[2], d6[2]}, [r12], r1
+ vst4.16 {d0[3], d2[3], d4[3], d6[3]}, [r12], r1
+
+ vst4.16 {d1[0], d3[0], d5[0], d7[0]}, [r12], r1
+ vst4.16 {d1[1], d3[1], d5[1], d7[1]}, [r12], r1
+ vst4.16 {d1[2], d3[2], d5[2], d7[2]}, [r12], r1
+ vst4.16 {d1[3], d3[3], d5[3], d7[3]}, [r12], r1
+ vpop {d8 - d15}
+ ldmfd sp!, {r10-r12, pc}
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Performs filtering of a chroma block vertical edge when the
+@* boundary strength is set to 4 on calling twice
+@*
+@* @par Description:
+@* This operation is described in Sec. 8.7.2.4 under the title
+@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+@*
+@* @param[in] r0 - pu1_src
+@* Pointer to the src sample q0
+@*
+@* @param[in] r1 - src_strd
+@* Source stride
+@*
+@* @param[in] r2 - alpha
+@* Alpha Value for the boundary
+@*
+@* @param[in] r3 - beta
+@* Beta Value for the boundary
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+ .global ih264_deblk_chroma_vert_bs4_mbaff_bp_a9
+
+ih264_deblk_chroma_vert_bs4_mbaff_bp_a9:
+
+ stmfd sp!, {r12, r14}
+ vpush {d8 - d15}
+ sub r0, r0, #4 @point r0 to p1u of row0.
+ mov r12, r0 @keep a back up of r0 for buffer write
+
+ vld4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0], r1
+ vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r0], r1
+ vld4.16 {d0[2], d1[2], d2[2], d3[2]}, [r0], r1
+ vld4.16 {d0[3], d1[3], d2[3], d3[3]}, [r0], r1
+
+ vdup.8 d11, r2 @D11 = alpha
+ vdup.8 d12, r3 @D12 = beta
+ vmov.i8 d31, #2
+
+ vabd.u8 d4, d1, d2 @|p0-q0|
+ vabd.u8 d5, d3, d2 @|q1-q0|
+ vabd.u8 d6, d0, d1 @|p1-p0|
+ vaddl.u8 q14, d1, d3 @(p0 + q1)
+ vclt.u8 d4, d4, d11 @|p0-q0| < alpha ?
+ vclt.u8 d5, d5, d12 @|q1-q0| < beta ?
+ vclt.u8 d6, d6, d12 @|p1-p0| < beta ?
+ vmlal.u8 q14, d0, d31 @2*p1 + (p0 + q1)
+ vaddl.u8 q13, d0, d2 @(p1 + q0)
+ vand.u8 d4, d4, d5 @|p0-q0| < alpha && |q1-q0| < beta
+ vmlal.u8 q13, d3, d31 @2*q1 + (p1 + q0)
+
+ vrshrn.i16 d7, q14, #2 @(2*p1 + (p0 + q1) + 2) >> 2
+ vand.u8 d4, d4, d6 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta
+ vrshrn.i16 d9, q13, #2 @(2*q1 + (p1 + q0) + 2) >> 2
+
+ vbit d1, d7, d4
+ vbit d2, d9, d4
+
+ vst4.16 {d0[0], d1[0], d2[0], d3[0]}, [r12], r1
+ vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r12], r1
+ vst4.16 {d0[2], d1[2], d2[2], d3[2]}, [r12], r1
+ vst4.16 {d0[3], d1[3], d2[3], d3[3]}, [r12], r1
+ vpop {d8 - d15}
+ ldmfd sp!, {r12, pc}
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Performs filtering of a chroma block vertical edge for cases where the
+@* boundary strength is less than 4 on calling twice
+@*
+@* @par Description:
+@* This operation is described in Sec. 8.7.2.4 under the title
+@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+@*
+@* @param[in] r0 - pu1_src
+@* Pointer to the src sample q0
+@*
+@* @param[in] r1 - src_strd
+@* Source stride
+@*
+@* @param[in] r2 - alpha
+@* Alpha Value for the boundary
+@*
+@* @param[in] r3 - beta
+@* Beta Value for the boundary
+@*
+@* @param[in] sp(0) - u4_bs
+@* Packed Boundary strength array
+@*
+@* @param[in] sp(4) - pu1_cliptab
+@* tc0_table
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+ .global ih264_deblk_chroma_vert_bslt4_mbaff_bp_a9
+
+ih264_deblk_chroma_vert_bslt4_mbaff_bp_a9:
+
+ stmfd sp!, {r10-r12, r14}
+
+ sub r0, r0, #4 @point r0 to p1u of row0.
+ ldr r11, [sp, #16] @r11 = ui_Bs
+
+ ldr r10, [sp, #20] @r10 = puc_ClipTab
+ mov r12, r0 @keep a back up of r0 for buffer write
+ vpush {d8 - d15}
+ vld4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0], r1
+ vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r0], r1
+ vld4.16 {d0[2], d1[2], d2[2], d3[2]}, [r0], r1
+ vld4.16 {d0[3], d1[3], d2[3], d3[3]}, [r0], r1
+
+ vdup.8 d11, r2 @D11 = alpha
+ vabd.u8 d4, d1, d2 @|p0-q0|
+ vdup.8 d12, r3 @D12 = beta
+ vabd.u8 d5, d3, d2 @|q1-q0|
+ vabd.u8 d6, d0, d1 @|p1-p0|
+ vclt.u8 d4, d4, d11 @|p0-q0| < alpha ?
+ vclt.u8 d5, d5, d12 @|q1-q0| < beta ?
+ vsubl.u8 q14, d0, d3 @(p1 - q1)
+ vclt.u8 d6, d6, d12 @|p1-p0| < beta ?
+ vand.u8 d4, d4, d5 @|p0-q0| < alpha && |q1-q0| < beta
+ vsubl.u8 q12, d2, d1 @(q0 - p0)
+ vmov.u16 q10, #4
+
+ vld1.32 {d31[0]}, [r10] @Load ClipTable
+ rev r11, r11 @Blocking strengths
+ vand.u8 d4, d4, d6 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta
+ vmov.32 d22[0], r11
+ vmla.s16 q14, q12, q10 @4*(q0 - p0) + (p1 - q1)
+ vmovl.u8 q11, d22
+ vsli.u16 d22, d22, #8
+ vtbl.8 d6, {d31}, d22 @tC0
+ vmov.u8 d12, #1
+ vadd.u8 d6, d6, d12 @tC0 + 1
+ vcge.u8 d5, d22, d12 @u4_bS > 0 ?
+ vand.u8 d4, d4, d5 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0
+
+ @ D0 - D3(inputs),
+ @ D4 (|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0),
+ @ D6 (tC)
+
+ vrshr.s16 q14, q14, #3 @(((q0 - p0) << 2) + (p1 - q1) + 4) >> 3)
+
+ vcgt.s16 q13, q14, #0
+ vmovn.i16 d9, q13 @D9 = sign(delta)
+ vabs.s16 q14, q14
+ vmovn.u16 d7, q14
+ vmin.u8 d7, d7, d6 @D7 = |delta|
+
+ vqadd.u8 d10, d1, d7 @p0+|delta|
+ vqadd.u8 d11, d2, d7 @q0+|delta|
+ vqsub.u8 d12, d1, d7 @p0-|delta|
+ vqsub.u8 d13, d2, d7 @q0-|delta|
+
+ vbit d12, d10, d9 @p0 + delta
+ vbit d11, d13, d9 @q0 - delta
+
+ vbit d1, d12, d4
+ vbit d2, d11, d4
+
+ vst4.16 {d0[0], d1[0], d2[0], d3[0]}, [r12], r1
+ vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r12], r1
+ vst4.16 {d0[2], d1[2], d2[2], d3[2]}, [r12], r1
+ vst4.16 {d0[3], d1[3], d2[3], d3[3]}, [r12], r1
+ vpop {d8 - d15}
+ ldmfd sp!, {r10-r12, pc}
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Performs filtering of a chroma block horizontal edge when the
+@* boundary strength is set to 4 in high profile
+@*
+@* @par Description:
+@* This operation is described in Sec. 8.7.2.4 under the title
+@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+@*
+@* @param[in] r0 - pu1_src
+@* Pointer to the src sample q0
+@*
+@* @param[in] r1 - src_strd
+@* Source stride
+@*
+@* @param[in] r2 - alpha_cb
+@* Alpha Value for the boundary in U
+@*
+@* @param[in] r3 - beta_cb
+@* Beta Value for the boundary in U
+@*
+@* @param[in] sp(0) - alpha_cr
+@* Alpha Value for the boundary in V
+@*
+@* @param[in] sp(4) - beta_cr
+@* Beta Value for the boundary in V
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+ .global ih264_deblk_chroma_horz_bs4_a9
+
+ih264_deblk_chroma_horz_bs4_a9:
+
+ stmfd sp!, {r4-r6, lr} @
+
+ ldr r5, [sp, #16] @R5 = alpha_cr
+ ldr r6, [sp, #20] @R6 = beta_cr
+ vpush {d8 - d15}
+ sub r0, r0, r1, lsl #1 @R0 = uc_edgePixel pointing to p1 of chroma
+ vld2.8 {d6, d7}, [r0], r1 @D6 = p1u , D7 = p1v
+ mov r4, r0 @Keeping a backup of the pointer p0 of chroma
+ vld2.8 {d4, d5}, [r0], r1 @D4 = p0u , D5 = p0v
+ vdup.8 d20, r2 @D20 contains alpha_cb
+ vdup.8 d21, r5 @D21 contains alpha_cr
+ vld2.8 {d0, d1}, [r0], r1 @D0 = q0u , D1 = q0v
+ vaddl.u8 q4, d6, d0 @
+ vaddl.u8 q5, d7, d1 @Q4,Q5 = q0 + p1
+ vmov.i8 d31, #2 @
+ vld2.8 {d2, d3}, [r0] @D2 = q1u , D3 = q1v
+ vabd.u8 q13, q3, q2 @Q13 = ABS(p1 - p0)
+ vmlal.u8 q4, d2, d31 @
+ vmlal.u8 q5, d3, d31 @Q5,Q4 = (X2(q1U) + q0U + p1U)
+ vabd.u8 q11, q2, q0 @Q11 = ABS(p0 - q0)
+ vabd.u8 q12, q1, q0 @Q12 = ABS(q1 - q0)
+ vaddl.u8 q7, d4, d2 @
+ vaddl.u8 q14, d5, d3 @Q14,Q7 = P0 + Q1
+ vdup.8 d16, r3 @D16 contains beta_cb
+ vdup.8 d17, r6 @D17 contains beta_cr
+ vmlal.u8 q7, d6, d31 @
+ vmlal.u8 q14, d7, d31 @Q14,Q7 = (X2(p1U) + p0U + q1U)
+ vcge.u8 q9, q11, q10 @Q9 = ( ABS(p0 - q0) >= Alpha )
+ vcge.u8 q12, q12, q8 @Q12= ( ABS(q1 - q0) >= Beta )
+ vcge.u8 q13, q13, q8 @Q13= ( ABS(p1 - p0) >= Beta )
+ vrshrn.u16 d8, q4, #2 @
+ vrshrn.u16 d9, q5, #2 @Q4 = (X2(q1U) + q0U + p1U + 2) >> 2
+ vorr q9, q9, q12 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta )
+ vrshrn.u16 d10, q7, #2 @
+ vrshrn.u16 d11, q14, #2 @Q5 = (X2(p1U) + p0U + q1U + 2) >> 2
+ vorr q9, q9, q13 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta )
+ vbit q5, q2, q9 @
+ vbit q4, q0, q9 @
+ vst2.8 {d10, d11}, [r4], r1 @
+ vst2.8 {d8, d9}, [r4] @
+ vpop {d8 - d15}
+ ldmfd sp!, {r4-r6, pc} @
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Performs filtering of a chroma block vertical edge when the
+@* boundary strength is set to 4 in high profile
+@*
+@* @par Description:
+@* This operation is described in Sec. 8.7.2.4 under the title
+@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+@*
+@* @param[in] r0 - pu1_src
+@* Pointer to the src sample q0
+@*
+@* @param[in] r1 - src_strd
+@* Source stride
+@*
+@* @param[in] r2 - alpha_cb
+@* Alpha Value for the boundary in U
+@*
+@* @param[in] r3 - beta_cb
+@* Beta Value for the boundary in U
+@*
+@* @param[in] sp(0) - alpha_cr
+@* Alpha Value for the boundary in V
+@*
+@* @param[in] sp(4) - beta_cr
+@* Beta Value for the boundary in V
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+ .global ih264_deblk_chroma_vert_bs4_a9
+
+ih264_deblk_chroma_vert_bs4_a9:
+
+ stmfd sp!, {r4, r5, r12, r14}
+
+ sub r0, r0, #4 @point r0 to p1u of row0.
+ mov r12, r0 @keep a back up of r0 for buffer write
+
+ ldr r4, [sp, #16] @r4 = alpha_cr
+ ldr r5, [sp, #20] @r5 = beta_cr
+ add r2, r2, r4, lsl #8 @r2 = (alpha_cr,alpha_cb)
+ add r3, r3, r5, lsl #8 @r3 = (beta_cr,beta_cb)
+ vpush {d8 - d15}
+ vld4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r1
+ vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r0], r1
+ vld4.16 {d0[2], d2[2], d4[2], d6[2]}, [r0], r1
+ vld4.16 {d0[3], d2[3], d4[3], d6[3]}, [r0], r1
+
+ vld4.16 {d1[0], d3[0], d5[0], d7[0]}, [r0], r1
+ vld4.16 {d1[1], d3[1], d5[1], d7[1]}, [r0], r1
+ vld4.16 {d1[2], d3[2], d5[2], d7[2]}, [r0], r1
+ vld4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0], r1
+
+ vdup.16 q11, r2 @Q11 = alpha
+ vdup.16 q12, r3 @Q12 = beta
+ vmov.i8 d31, #2
+
+ vabd.u8 q4, q1, q2 @|p0-q0|
+ vabd.u8 q5, q3, q2 @|q1-q0|
+ vabd.u8 q6, q0, q1 @|p1-p0|
+ vaddl.u8 q7, d2, d6
+ vaddl.u8 q8, d3, d7 @(p0 + q1)
+ vclt.u8 q4, q4, q11 @|p0-q0| < alpha ?
+ vclt.u8 q5, q5, q12 @|q1-q0| < beta ?
+ vclt.u8 q6, q6, q12 @|p1-p0| < beta ?
+ vmlal.u8 q7, d0, d31
+ vmlal.u8 q8, d1, d31 @2*p1 + (p0 + q1)
+ vaddl.u8 q9, d0, d4
+ vaddl.u8 q10, d1, d5 @(p1 + q0)
+ vand.u8 q4, q4, q5 @|p0-q0| < alpha && |q1-q0| < beta
+ vmlal.u8 q9, d6, d31
+ vmlal.u8 q10, d7, d31 @2*q1 + (p1 + q0)
+
+ vrshrn.i16 d14, q7, #2
+ vrshrn.i16 d15, q8, #2 @(2*p1 + (p0 + q1) + 2) >> 2
+ vand.u8 q4, q4, q6 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta
+ vrshrn.i16 d18, q9, #2
+ vrshrn.i16 d19, q10, #2 @(2*q1 + (p1 + q0) + 2) >> 2
+
+ vbit q1, q7, q4
+ vbit q2, q9, q4
+
+ vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r12], r1
+ vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r12], r1
+ vst4.16 {d0[2], d2[2], d4[2], d6[2]}, [r12], r1
+ vst4.16 {d0[3], d2[3], d4[3], d6[3]}, [r12], r1
+
+ vst4.16 {d1[0], d3[0], d5[0], d7[0]}, [r12], r1
+ vst4.16 {d1[1], d3[1], d5[1], d7[1]}, [r12], r1
+ vst4.16 {d1[2], d3[2], d5[2], d7[2]}, [r12], r1
+ vst4.16 {d1[3], d3[3], d5[3], d7[3]}, [r12], r1
+ vpop {d8 - d15}
+ ldmfd sp!, {r4, r5, r12, pc}
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Performs filtering of a chroma block horizontal edge for cases where the
+@* boundary strength is less than 4 in high profile
+@*
+@* @par Description:
+@* This operation is described in Sec. 8.7.2.4 under the title
+@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+@*
+@* @param[in] r0 - pu1_src
+@* Pointer to the src sample q0
+@*
+@* @param[in] r1 - src_strd
+@* Source stride
+@*
+@* @param[in] r2 - alpha_cb
+@* Alpha Value for the boundary in U
+@*
+@* @param[in] r3 - beta_cb
+@* Beta Value for the boundary in U
+@*
+@* @param[in] sp(0) - alpha_cr
+@* Alpha Value for the boundary in V
+@*
+@* @param[in] sp(4) - beta_cr
+@* Beta Value for the boundary in V
+@*
+@* @param[in] sp(8) - u4_bs
+@* Packed Boundary strength array
+@*
+@* @param[in] sp(12) - pu1_cliptab_cb
+@* tc0_table for U
+@*
+@* @param[in] sp(16) - pu1_cliptab_cr
+@* tc0_table for V
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+ .global ih264_deblk_chroma_horz_bslt4_a9
+
+ih264_deblk_chroma_horz_bslt4_a9:
+
+ stmfd sp!, {r4-r9, lr} @
+
+ ldrd r4, r5, [sp, #28] @R4 = alpha_cr , R5 = beta_cr
+ ldr r7, [sp, #36] @R7 = u4_bs
+ ldrd r8, r9, [sp, #40] @R8 = pu1_cliptab_cb , R9 = pu1_cliptab_cr
+ sub r0, r0, r1, lsl #1 @R0 = uc_edgePixelU pointing to p1 of chroma U
+ vpush {d8 - d15}
+ rev r7, r7 @
+ vmov.32 d12[0], r7 @D12[0] = ui_Bs
+
+ vld1.32 d16[0], [r8] @D16[0] contains cliptab_cb
+ vld1.32 d17[0], [r9] @D17[0] contains cliptab_cr
+ vld2.8 {d6, d7}, [r0], r1 @Q3=p1
+ vtbl.8 d14, {d16}, d12 @Retreiving cliptab values for U
+ vtbl.8 d28, {d17}, d12 @Retrieving cliptab values for V
+ vmovl.u8 q6, d12 @Q6 = uc_Bs in each 16 bit scalar
+ mov r6, r0 @Keeping a backup of the pointer to chroma U P0
+ vld2.8 {d4, d5}, [r0], r1 @Q2=p0
+ vmov.i8 d30, #1 @
+ vdup.8 d20, r2 @D20 contains alpha_cb
+ vdup.8 d21, r4 @D21 contains alpha_cr
+ vld2.8 {d0, d1}, [r0], r1 @Q0=q0
+ vmovl.u8 q7, d14 @
+ vmovl.u8 q14, d28 @
+ vmov.i16 d15, d28 @D14 has cliptab values for U, D15 for V
+ vld2.8 {d2, d3}, [r0] @Q1=q1
+ vsubl.u8 q5, d1, d5 @
+ vsubl.u8 q4, d0, d4 @Q5,Q4 = (q0 - p0)
+ vabd.u8 q13, q3, q2 @Q13 = ABS(p1 - p0)
+ vshl.i16 q5, q5, #2 @Q5 = (q0 - p0)<<2
+ vabd.u8 q11, q2, q0 @Q11 = ABS(p0 - q0)
+ vshl.i16 q4, q4, #2 @Q4 = (q0 - p0)<<2
+ vsli.16 q7, q7, #8 @
+ vabd.u8 q12, q1, q0 @Q12 = ABS(q1 - q0)
+ vcge.u8 q9, q11, q10 @Q9 = ( ABS(p0 - q0) >= Alpha )
+ vsubl.u8 q10, d6, d2 @Q10 = (p1 - q1)L
+ vsubl.u8 q3, d7, d3 @Q3 = (p1 - q1)H
+ vdup.8 d16, r3 @Q8 contains beta_cb
+ vdup.8 d17, r5 @Q8 contains beta_cr
+ vadd.i16 q4, q4, q10 @
+ vadd.i16 q5, q5, q3 @Q5,Q4 = [ (q0 - p0)<<2 ] + (p1 - q1)
+ vcge.u8 q12, q12, q8 @Q12= ( ABS(q1 - q0) >= Beta )
+ vcgt.s16 d12, d12, #0 @Q6 = (us_Bs > 0)
+ vqrshrn.s16 d8, q4, #3 @
+ vqrshrn.s16 d9, q5, #3 @Q4 = i_macro = (((q0 - p0)<<2) + (p1 - q1) + 4)>>3
+ vadd.i8 d14, d14, d30 @D14 = C = C0+1 for U
+ vcge.u8 q13, q13, q8 @Q13= ( ABS(p1 - p0) >= Beta )
+ vorr q9, q9, q12 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta )
+ vabs.s8 q3, q4 @Q4 = ABS (i_macro)
+ vadd.i8 d15, d15, d30 @D15 = C = C0+1 for V
+ vmov.i8 d13, d12 @
+ vorr q9, q9, q13 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta )
+ vmin.u8 q7, q3, q7 @Q7 = delta = (ABS(i_macro) > C) ? C : ABS(i_macro)
+ vbic q6, q6, q9 @final condition
+ vcge.s8 q4, q4, #0 @Q4 = (i_macro >= 0)
+ vand q7, q7, q6 @Making delta zero in places where values shouldn be filterd
+ vqadd.u8 q8, q2, q7 @Q8 = p0 + delta
+ vqsub.u8 q2, q2, q7 @Q2 = p0 - delta
+ vqadd.u8 q9, q0, q7 @Q9 = q0 + delta
+ vqsub.u8 q0, q0, q7 @Q0 = q0 - delta
+ vbif q8, q2, q4 @Q8 = (i_macro >= 0 ) ? (p0+delta) : (p0-delta)
+ vbif q0, q9, q4 @Q0 = (i_macro >= 0 ) ? (q0-delta) : (q0+delta)
+ vst2.8 {d16, d17}, [r6], r1 @
+ vst2.8 {d0, d1}, [r6] @
+ vpop {d8 - d15}
+ ldmfd sp!, {r4-r9, pc} @
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Performs filtering of a chroma block vertical edge for cases where the
+@* boundary strength is less than 4 in high profile
+@*
+@* @par Description:
+@* This operation is described in Sec. 8.7.2.4 under the title
+@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+@*
+@* @param[in] r0 - pu1_src
+@* Pointer to the src sample q0
+@*
+@* @param[in] r1 - src_strd
+@* Source stride
+@*
+@* @param[in] r2 - alpha_cb
+@* Alpha Value for the boundary in U
+@*
+@* @param[in] r3 - beta_cb
+@* Beta Value for the boundary in U
+@*
+@* @param[in] sp(0) - alpha_cr
+@* Alpha Value for the boundary in V
+@*
+@* @param[in] sp(4) - beta_cr
+@* Beta Value for the boundary in V
+@*
+@* @param[in] sp(8) - u4_bs
+@* Packed Boundary strength array
+@*
+@* @param[in] sp(12) - pu1_cliptab_cb
+@* tc0_table for U
+@*
+@* @param[in] sp(16) - pu1_cliptab_cr
+@* tc0_table for V
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+ .global ih264_deblk_chroma_vert_bslt4_a9
+
+ih264_deblk_chroma_vert_bslt4_a9:
+
+ stmfd sp!, {r4-r7, r10-r12, r14}
+
+ sub r0, r0, #4 @point r0 to p1u of row0.
+ ldrd r4, r5, [sp, #32] @R4 = alpha_cr , R5 = beta_cr
+ add r2, r2, r4, lsl #8
+ add r3, r3, r5, lsl #8
+ ldr r6, [sp, #40] @R6 = u4_bs
+ ldrd r10, r11, [sp, #44] @R10 = pu1_cliptab_cb , R11 = pu1_cliptab_cr
+ vpush {d8 - d15}
+ mov r12, r0 @keep a back up of R0 for buffer write
+
+ vld4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r1
+ vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r0], r1
+ vld4.16 {d0[2], d2[2], d4[2], d6[2]}, [r0], r1
+ vld4.16 {d0[3], d2[3], d4[3], d6[3]}, [r0], r1
+
+ vld4.16 {d1[0], d3[0], d5[0], d7[0]}, [r0], r1
+ vld4.16 {d1[1], d3[1], d5[1], d7[1]}, [r0], r1
+ vld4.16 {d1[2], d3[2], d5[2], d7[2]}, [r0], r1
+ vld4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0], r1
+
+
+ vdup.16 q11, r2 @Q11 = alpha
+ vabd.u8 q4, q1, q2 @|p0-q0|
+ vdup.16 q12, r3 @Q12 = beta
+ vabd.u8 q5, q3, q2 @|q1-q0|
+ vabd.u8 q6, q0, q1 @|p1-p0|
+ vclt.u8 q4, q4, q11 @|p0-q0| < alpha ?
+ vsubl.u8 q7, d0, d6
+ vclt.u8 q5, q5, q12 @|q1-q0| < beta ?
+ vsubl.u8 q8, d1, d7 @(p1 - q1)
+ vclt.u8 q6, q6, q12 @|p1-p0| < beta ?
+ vsubl.u8 q9, d4, d2
+ vand.u8 q4, q4, q5 @|p0-q0| < alpha && |q1-q0| < beta
+ vsubl.u8 q10, d5, d3 @(q0 - p0)
+ vmov.u16 q14, #4
+ vld1.32 {d24[0]}, [r10] @Load ClipTable for U
+ vld1.32 {d25[0]}, [r11] @Load ClipTable for V
+ rev r6, r6 @Blocking strengths
+ vand.u8 q4, q4, q6 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta
+
+ vmov.32 d10[0], r6
+
+ vmla.s16 q7, q9, q14
+ vmla.s16 q8, q10, q14 @4*(q0 - p0) + (p1 - q1)
+
+ vmovl.u8 q5, d10
+ vsli.u16 d10, d10, #8
+ vtbl.8 d12, {d24}, d10 @tC0 for U
+ vtbl.8 d13, {d25}, d10 @tC0 for V
+ vzip.8 d12, d13
+ vmovl.u16 q5, d10
+ vsli.u32 q5, q5, #16
+ vmov.u8 q12, #1
+ vadd.u8 q6, q6, q12 @tC0 + 1
+ vcge.u8 q5, q5, q12 @u4_bS > 0 ?
+ vand.u8 q4, q4, q5 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0
+
+ @ Q0 - Q3(inputs),
+ @ Q4 (|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0),
+ @ Q6 (tC)
+
+ vrshr.s16 q7, q7, #3
+ vrshr.s16 q8, q8, #3 @(((q0 - p0) << 2) + (p1 - q1) + 4) >> 3)
+
+ vcgt.s16 q9, q7, #0
+ vcgt.s16 q10, q8, #0
+ vmovn.i16 d18, q9
+ vmovn.i16 d19, q10 @Q9 = sign(delta)
+ vabs.s16 q7, q7
+ vabs.s16 q8, q8
+ vmovn.u16 d14, q7
+ vmovn.u16 d15, q8
+ vmin.u8 q7, q7, q6 @Q7 = |delta|
+
+ vqadd.u8 q10, q1, q7 @p0+|delta|
+ vqadd.u8 q11, q2, q7 @q0+|delta|
+ vqsub.u8 q12, q1, q7 @p0-|delta|
+ vqsub.u8 q13, q2, q7 @q0-|delta|
+
+ vbit q12, q10, q9 @p0 + delta
+ vbit q11, q13, q9 @q0 - delta
+
+ vbit q1, q12, q4
+ vbit q2, q11, q4
+
+ vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r12], r1
+ vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r12], r1
+ vst4.16 {d0[2], d2[2], d4[2], d6[2]}, [r12], r1
+ vst4.16 {d0[3], d2[3], d4[3], d6[3]}, [r12], r1
+
+ vst4.16 {d1[0], d3[0], d5[0], d7[0]}, [r12], r1
+ vst4.16 {d1[1], d3[1], d5[1], d7[1]}, [r12], r1
+ vst4.16 {d1[2], d3[2], d5[2], d7[2]}, [r12], r1
+ vst4.16 {d1[3], d3[3], d5[3], d7[3]}, [r12], r1
+ vpop {d8 - d15}
+ ldmfd sp!, {r4-r7, r10-r12, pc}
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Performs filtering of a chroma block vertical edge when the
+@* boundary strength is set to 4 on calling twice in high profile
+@*
+@* @par Description:
+@* This operation is described in Sec. 8.7.2.4 under the title
+@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+@*
+@* @param[in] r0 - pu1_src
+@* Pointer to the src sample q0
+@*
+@* @param[in] r1 - src_strd
+@* Source stride
+@*
+@* @param[in] r2 - alpha_cb
+@* Alpha Value for the boundary in U
+@*
+@* @param[in] r3 - beta_cb
+@* Beta Value for the boundary in U
+@*
+@* @param[in] sp(0) - alpha_cr
+@* Alpha Value for the boundary in V
+@*
+@* @param[in] sp(4) - beta_cr
+@* Beta Value for the boundary in V
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+ .global ih264_deblk_chroma_vert_bs4_mbaff_a9
+
+ih264_deblk_chroma_vert_bs4_mbaff_a9:
+
+ stmfd sp!, {r4, r5, r12, r14}
+
+ sub r0, r0, #4 @point r0 to p1u of row0.
+ mov r12, r0 @keep a back up of r0 for buffer write
+ ldrd r4, r5, [sp, #16] @R4 = alpha_cr , R5 = beta_cr
+ add r2, r2, r4, lsl #8
+ add r3, r3, r5, lsl #8
+ vpush {d8 - d15}
+ vld4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0], r1
+ vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r0], r1
+ vld4.16 {d0[2], d1[2], d2[2], d3[2]}, [r0], r1
+ vld4.16 {d0[3], d1[3], d2[3], d3[3]}, [r0], r1
+
+ vdup.16 d11, r2 @D11 = alpha
+ vdup.16 d12, r3 @D12 = beta
+ vmov.i8 d31, #2
+
+ vabd.u8 d4, d1, d2 @|p0-q0|
+ vabd.u8 d5, d3, d2 @|q1-q0|
+ vabd.u8 d6, d0, d1 @|p1-p0|
+ vaddl.u8 q14, d1, d3 @(p0 + q1)
+ vclt.u8 d4, d4, d11 @|p0-q0| < alpha ?
+ vclt.u8 d5, d5, d12 @|q1-q0| < beta ?
+ vclt.u8 d6, d6, d12 @|p1-p0| < beta ?
+ vmlal.u8 q14, d0, d31 @2*p1 + (p0 + q1)
+ vaddl.u8 q13, d0, d2 @(p1 + q0)
+ vand.u8 d4, d4, d5 @|p0-q0| < alpha && |q1-q0| < beta
+ vmlal.u8 q13, d3, d31 @2*q1 + (p1 + q0)
+
+ vrshrn.i16 d7, q14, #2 @(2*p1 + (p0 + q1) + 2) >> 2
+ vand.u8 d4, d4, d6 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta
+ vrshrn.i16 d9, q13, #2 @(2*q1 + (p1 + q0) + 2) >> 2
+
+ vbit d1, d7, d4
+ vbit d2, d9, d4
+
+ vst4.16 {d0[0], d1[0], d2[0], d3[0]}, [r12], r1
+ vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r12], r1
+ vst4.16 {d0[2], d1[2], d2[2], d3[2]}, [r12], r1
+ vst4.16 {d0[3], d1[3], d2[3], d3[3]}, [r12], r1
+ vpop {d8 - d15}
+ ldmfd sp!, {r4, r5, r12, pc}
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Performs filtering of a chroma block vertical edge for cases where the
+@* boundary strength is less than 4 on calling twice in high profile
+@*
+@* @par Description:
+@* This operation is described in Sec. 8.7.2.4 under the title
+@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+@*
+@* @param[in] r0 - pu1_src
+@* Pointer to the src sample q0
+@*
+@* @param[in] r1 - src_strd
+@* Source stride
+@*
+@* @param[in] r2 - alpha_cb
+@* Alpha Value for the boundary in U
+@*
+@* @param[in] r3 - beta_cb
+@* Beta Value for the boundary in U
+@*
+@* @param[in] sp(0) - alpha_cr
+@* Alpha Value for the boundary in V
+@*
+@* @param[in] sp(4) - beta_cr
+@* Beta Value for the boundary in V
+@*
+@* @param[in] sp(8) - u4_bs
+@* Packed Boundary strength array
+@*
+@* @param[in] sp(12) - pu1_cliptab_cb
+@* tc0_table for U
+@*
+@* @param[in] sp(16) - pu1_cliptab_cr
+@* tc0_table for V
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+ .global ih264_deblk_chroma_vert_bslt4_mbaff_a9
+
+ih264_deblk_chroma_vert_bslt4_mbaff_a9:
+
+ stmfd sp!, {r4-r6, r10-r12, r14}
+
+ sub r0, r0, #4 @point r0 to p1u of row0.
+ mov r12, r0 @keep a back up of r0 for buffer write
+
+ ldrd r4, r5, [sp, #28] @R4 = alpha_cr , R5 = beta_cr
+ add r2, r2, r4, lsl #8
+ add r3, r3, r5, lsl #8
+ ldr r6, [sp, #36] @R6 = u4_bs
+ ldrd r10, r11, [sp, #40] @R10 = pu1_cliptab_cb , R11 = pu1_cliptab_cr
+ vpush {d8 - d15}
+ vld4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0], r1
+ vld4.16 {d0[1], d1[1], d2[1], d3[1]}, [r0], r1
+ vld4.16 {d0[2], d1[2], d2[2], d3[2]}, [r0], r1
+ vld4.16 {d0[3], d1[3], d2[3], d3[3]}, [r0], r1
+
+ vdup.16 d11, r2 @D11 = alpha
+ vabd.u8 d4, d1, d2 @|p0-q0|
+ vdup.16 d12, r3 @D12 = beta
+ vabd.u8 d5, d3, d2 @|q1-q0|
+ vabd.u8 d6, d0, d1 @|p1-p0|
+ vclt.u8 d4, d4, d11 @|p0-q0| < alpha ?
+ vclt.u8 d5, d5, d12 @|q1-q0| < beta ?
+ vsubl.u8 q14, d0, d3 @(p1 - q1)
+ vclt.u8 d6, d6, d12 @|p1-p0| < beta ?
+ vand.u8 d4, d4, d5 @|p0-q0| < alpha && |q1-q0| < beta
+ vsubl.u8 q12, d2, d1 @(q0 - p0)
+ vmov.u16 q10, #4
+
+ vld1.32 {d31[1]}, [r10] @Load ClipTable for U
+ vld1.32 {d31[0]}, [r11] @Load ClipTable for V
+ rev r6, r6 @Blocking strengths
+ vand.u8 d4, d4, d6 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta
+ vmov.32 d22[0], r6
+ vmla.s16 q14, q12, q10 @4*(q0 - p0) + (p1 - q1)
+ vmovl.u8 q11, d22
+ vsli.u16 d22, d22, #8
+ vmov.u16 d13, #4
+ vadd.u8 d22, d22, d13
+ vtbl.8 d6, {d31}, d22 @tC0
+ vmov.u8 d12, #1
+ vsub.u8 d22, d22, d13
+ vadd.u8 d6, d6, d12 @tC0 + 1
+ vcge.u8 d5, d22, d12 @u4_bS > 0 ?
+ vand.u8 d4, d4, d5 @|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0
+
+ @ D0 - D3(inputs),
+ @ D4 (|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0),
+ @ D6 (tC)
+
+ vrshr.s16 q14, q14, #3 @(((q0 - p0) << 2) + (p1 - q1) + 4) >> 3)
+
+ vcgt.s16 q13, q14, #0
+ vmovn.i16 d9, q13 @D9 = sign(delta)
+ vabs.s16 q14, q14
+ vmovn.u16 d7, q14
+ vmin.u8 d7, d7, d6 @D7 = |delta|
+
+ vqadd.u8 d10, d1, d7 @p0+|delta|
+ vqadd.u8 d11, d2, d7 @q0+|delta|
+ vqsub.u8 d12, d1, d7 @p0-|delta|
+ vqsub.u8 d13, d2, d7 @q0-|delta|
+
+ vbit d12, d10, d9 @p0 + delta
+ vbit d11, d13, d9 @q0 - delta
+
+ vbit d1, d12, d4
+ vbit d2, d11, d4
+
+ vst4.16 {d0[0], d1[0], d2[0], d3[0]}, [r12], r1
+ vst4.16 {d0[1], d1[1], d2[1], d3[1]}, [r12], r1
+ vst4.16 {d0[2], d1[2], d2[2], d3[2]}, [r12], r1
+ vst4.16 {d0[3], d1[3], d2[3], d3[3]}, [r12], r1
+ vpop {d8 - d15}
+ ldmfd sp!, {r4-r6, r10-r12, pc}
+
+
+
diff --git a/common/arm/ih264_deblk_luma_a9.s b/common/arm/ih264_deblk_luma_a9.s
new file mode 100755
index 0000000..3e6a4d9
--- /dev/null
+++ b/common/arm/ih264_deblk_luma_a9.s
@@ -0,0 +1,1092 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/*****************************************************************************/
+@/* */
+@/* File Name : ih264_deblk_luma_a9.s */
+@/* */
+@/* Description : Contains function definitions for deblocking luma */
+@/* edge. Functions are coded in NEON assembly and can */
+@/* be compiled using ARM RVDS. */
+@/* */
+@/* List of Functions : ih264_deblk_luma_vert_bs4_a9() */
+@/* ih264_deblk_luma_vert_bslt4_a9() */
+@/* ih264_deblk_luma_horz_bs4_a9() */
+@/* ih264_deblk_luma_horz_bslt4_a9() */
+@/* ih264_deblk_luma_vert_bs4_mbaff_a9() */
+@/* ih264_deblk_luma_vert_bslt4_mbaff_a9() */
+@/* */
+@/* Issues / Problems : None */
+@/* */
+@/* Revision History : */
+@/* */
+@/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+@/* 28 11 2013 Ittiam Draft */
+@/* 05 01 2015 Kaushik Added double-call functions for */
+@/* Senthoor vertical deblocking. */
+@/* */
+@/*****************************************************************************/
+
+
+.text
+.p2align 2
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Performs filtering of a luma block horizontal edge for cases where the
+@* boundary strength is less than 4
+@*
+@* @par Description:
+@* This operation is described in Sec. 8.7.2.4 under the title
+@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+@*
+@* @param[in] r0 - pu1_src
+@* Pointer to the src sample q0
+@*
+@* @param[in] r1 - src_strd
+@* Source stride
+@*
+@* @param[in] r2 - alpha
+@* Alpha Value for the boundary
+@*
+@* @param[in] r3 - beta
+@* Beta Value for the boundary
+@*
+@* @param[in] sp(0) - u4_bs
+@* Packed Boundary strength array
+@*
+@* @param[in] sp(4) - pu1_cliptab
+@* tc0_table
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+ .global ih264_deblk_luma_horz_bslt4_a9
+
+ih264_deblk_luma_horz_bslt4_a9:
+
+ stmfd sp!, {r4-r7, lr}
+
+ ldrd r4, r5, [sp, #0x14] @r4 = ui_Bs , r5 = *puc_ClpTab
+ vpush {d8 - d15}
+ sub r0, r0, r1, lsl #1 @R1 = uc_Horizonpad
+ sub r0, r0, r1 @r0 pointer to p2
+ rev r4, r4 @
+ vld1.8 {q5}, [r0], r1 @p2 values are loaded into q5
+ vmov.32 d12[0], r4 @d12[0] = ui_Bs
+ mov r6, r0 @keeping backup of pointer to p1
+ vld1.8 {q4}, [r0], r1 @p1 values are loaded into q4
+ mov r7, r0 @keeping backup of pointer to p0
+ vld1.8 {q3}, [r0], r1 @p0 values are loaded into q3
+ vmovl.u8 q6, d12 @q6 = uc_Bs in each 16 bt scalar
+ vld1.8 {q0}, [r0], r1 @q0 values are loaded into q0
+ vabd.u8 q13, q4, q3 @Q13 = ABS(p1 - p0)
+ vld1.8 {q1}, [r0], r1 @q1 values are loaded into q1
+ vabd.u8 q11, q3, q0 @Q11 = ABS(p0 - q0)
+ vld1.32 d16[0], [r5] @D16[0] contains cliptab
+ vabd.u8 q12, q1, q0 @Q12 = ABS(q1 - q0)
+ vld1.8 {q2}, [r0], r1 @q2 values are loaded into q2
+ vtbl.8 d14, {d16}, d12 @
+ vdup.8 q10, r2 @Q10 contains alpha
+ vdup.8 q8, r3 @Q8 contains beta
+ vmovl.u16 q6, d12 @
+ vmovl.u16 q7, d14 @
+ vabd.u8 q14, q5, q3 @Q14 = Ap = ABS(p2 - p0)
+ vabd.u8 q15, q2, q0 @Q15 = Aq = ABS(q2 - q0)
+ vcgt.s32 q6, q6, #0 @Q6 = (us_Bs > 0)
+ vsli.32 q7, q7, #8 @
+ vcge.u8 q9, q11, q10 @Q9 = ( ABS(p0 - q0) >= Alpha )
+ vcge.u8 q12, q12, q8 @Q12=( ABS(q1 - q0) >= Beta )
+ vcge.u8 q13, q13, q8 @Q13=( ABS(p1 - p0) >= Beta )
+ vcgt.u8 q10, q8, q14 @Q10=(Ap<Beta)
+ vcgt.u8 q11, q8, q15 @Q11=(Aq<Beta)
+ vsli.32 q7, q7, #16 @Q7 = C0
+ vorr q9, q9, q12 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta )
+ vsubl.u8 q15, d1, d7 @
+ vsubl.u8 q12, d0, d6 @Q15,Q12 = (q0 - p0)
+ vorr q9, q9, q13 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta )
+ vsubl.u8 q14, d8, d2 @Q14 = (p1 - q1)L
+ vshl.i16 q13, q15, #2 @Q13 = (q0 - p0)<<2
+ vshl.i16 q12, q12, #2 @Q12 = (q0 - p0)<<2
+ vsubl.u8 q15, d9, d3 @Q15 = (p1 - q1)H
+ vbic q6, q6, q9 @final condition
+ vadd.i16 q12, q12, q14 @
+ vadd.i16 q13, q13, q15 @Q13,Q12 = [ (q0 - p0)<<2 ] + (p1 - q1)
+ vsub.i8 q9, q7, q10 @Q9 = C0 + (Ap < Beta)
+ vrhadd.u8 q8, q3, q0 @Q8 = ((p0+q0+1) >> 1)
+ vqrshrn.s16 d24, q12, #3 @
+ vqrshrn.s16 d25, q13, #3 @Q12 = i_macro = (((q0 - p0)<<2) + (p1 - q1) + 4)>>3
+ vsub.i8 q9, q9, q11 @Q9 = C0 + (Ap < Beta) + (Aq < Beta)
+ vand.i8 q10, q10, q6 @
+ vand.i8 q11, q11, q6 @
+ vabs.s8 q13, q12 @Q13 = ABS (i_macro)
+ vaddl.u8 q14, d17, d11 @
+ vaddl.u8 q5, d16, d10 @Q14,Q5 = p2 + (p0+q0+1)>>1
+ vaddl.u8 q15, d17, d5 @
+ vmin.u8 q9, q13, q9 @Q9 = delta = (ABS(i_macro) > C) ? C : ABS(i_macro)
+ vshll.u8 q13, d9, #1 @
+ vaddl.u8 q2, d16, d4 @Q15,Q2 = q2 + (p0+q0+1)>>1
+ vshll.u8 q8, d8, #1 @Q13,Q8 = (p1<<1)
+ vand q9, q9, q6 @Making delta zero in places where values shouldn be filterd
+ vsub.i16 q14, q14, q13 @Q14,Q5 = [p2 + (p0+q0+1)>>1] - (p1<<1)
+ vsub.i16 q5, q5, q8 @
+ vshll.u8 q8, d2, #1 @
+ vshll.u8 q13, d3, #1 @Q13,Q8 = (q1<<1)
+ vqshrn.s16 d29, q14, #1 @
+ vqshrn.s16 d28, q5, #1 @Q14 = i_macro_p1
+ vsub.i16 q2, q2, q8 @
+ vsub.i16 q15, q15, q13 @Q15,Q2 = [q2 + (p0+q0+1)>>1] - (q1<<1)
+ vneg.s8 q13, q7 @Q13 = -C0
+ vmin.s8 q14, q14, q7 @Q14 = min(C0,i_macro_p1)
+ vcge.s8 q12, q12, #0 @Q12 = (i_macro >= 0)
+ vqshrn.s16 d31, q15, #1 @
+ vqshrn.s16 d30, q2, #1 @Q15 = i_macro_q1
+ vmax.s8 q14, q14, q13 @Q14 = max( - C0 , min(C0, i_macro_p1) )
+ vqadd.u8 q8, q3, q9 @Q8 = p0 + delta
+ vqsub.u8 q3, q3, q9 @Q3 = p0 - delta
+ vmin.s8 q15, q15, q7 @Q15 = min(C0,i_macro_q1)
+ vand.i8 q14, q10, q14 @condition check Ap<beta
+ vqadd.u8 q7, q0, q9 @Q7 = q0 + delta
+ vqsub.u8 q0, q0, q9 @Q0 = q0 - delta
+ vmax.s8 q15, q15, q13 @Q15 = max( - C0 , min(C0, i_macro_q1) )
+ vbif q8, q3, q12 @Q8 = (i_macro >= 0 ) ? (p0+delta) : (p0-delta)
+ vbif q0, q7, q12 @Q0 = (i_macro >= 0 ) ? (q0-delta) : (q0+delta)
+ vadd.i8 q14, q14, q4 @
+ vand.i8 q15, q11, q15 @condition check Aq<beta
+ vst1.8 {q8}, [r7], r1 @writting back filtered value of p0
+ vadd.i8 q15, q15, q1 @
+ vst1.8 {q0}, [r7], r1 @writting back filtered value of q0
+ vst1.8 {q14}, [r6] @writting back filtered value of p1
+ vst1.8 {q15}, [r7], r1 @writting back filtered value of q1
+ vpop {d8 - d15}
+ ldmfd sp!, {r4-r7, pc}
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Performs filtering of a luma block horizontal edge when the
+@* boundary strength is set to 4
+@*
+@* @par Description:
+@* This operation is described in Sec. 8.7.2.4 under the title
+@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+@*
+@* @param[in] r0 - pu1_src
+@* Pointer to the src sample q0
+@*
+@* @param[in] r1 - src_strd
+@* Source stride
+@*
+@* @param[in] r2 - alpha
+@* Alpha Value for the boundary
+@*
+@* @param[in] r3 - beta
+@* Beta Value for the boundary
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+ .global ih264_deblk_luma_horz_bs4_a9
+
+ih264_deblk_luma_horz_bs4_a9:
+
+ @ Back up necessary registers on stack
+ stmfd sp!, {r12, r14}
+ vpush {d8 - d15}
+ @ Init
+ vdup.8 q0, r2 @duplicate alpha
+ sub r12, r0, r1 @pointer to p0 = q0 - src_strd
+ vdup.8 q1, r3 @duplicate beta
+ sub r14, r0, r1, lsl#1 @pointer to p1 = q0 - src_strd*2
+ sub r2, r0, r1, lsl#2 @pointer to p3 = q0 - src_strd*4
+ sub r3, r14, r1 @pointer to p2 = p1 - src_strd
+
+ @ Load Data
+ vld1.8 {d4, d5}, [r0], r1 @load q0 to Q2, q0 = q0 + src_strd
+ vld1.8 {d6, d7}, [r12] @load p0 to Q3
+ vld1.8 {d8, d9}, [r0], r1 @load q1 to Q4, q0 = q0 + src_strd
+ vld1.8 {d10, d11}, [r14] @load p1 to Q5
+
+ @ Filter Decision
+ vabd.u8 q6, q2, q3 @ABS(p0 - q0)
+ vabd.u8 q7, q4, q2 @ABS(q1 - q0)
+ vabd.u8 q8, q5, q3 @ABS(p1 - p0)
+ vcge.u8 q9, q6, q0 @ABS(p0 - q0) >= Alpha
+ vcge.u8 q7, q7, q1 @ABS(q1 - q0) >= Beta
+ vcge.u8 q8, q8, q1 @ABS(p1 - p0) >= Beta
+ vmov.i8 q10, #2
+ vorr q9, q9, q7 @ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta
+ vld1.8 {d14, d15}, [r0], r1 @load q2 to Q7, q0 = q0 + src_strd
+ vorr q9, q9, q8 @ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta
+ vsra.u8 q10, q0, #2 @((Alpha >> 2) + 2)
+ vabd.u8 q11, q7, q2 @Aq = ABS(q2 - q0)
+ vaddl.u8 q12, d4, d6 @p0+q0 L
+ vaddl.u8 q13, d5, d7 @p0+q0 H
+ vclt.u8 q11, q11, q1 @Aq < Beta
+ vclt.u8 q10, q6, q10 @(ABS(p0 - q0) <((Alpha >>2) + 2))
+
+ @ Deblock Filtering q0', q1', q2'
+ vaddw.u8 q14, q12, d8 @p0+q0+q1 L
+ vaddw.u8 q15, q13, d9 @p0+q0+q1 H
+ vand q11, q11, q10 @(Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2))
+ @ q0' if (Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) TRUE
+ vadd.i16 q8, q14, q14 @2*(p0+q0+q1)L
+ vadd.i16 q0, q15, q15 @2*(p0+q0+q1)H
+ vaddw.u8 q8, q8, d14 @2*(p0+q0+q1)+q2 L
+ vaddw.u8 q0, q0, d15 @2*(p0+q0+q1)+q2 H
+ vaddw.u8 q8, q8, d10 @2*(p0+q0+q1)+q2 +p1 L
+ vaddw.u8 q0, q0, d11 @2*(p0+q0+q1)+q2 +p1 H
+ vrshrn.u16 d12, q8, #3 @(2*(p0+q0+q1)+q2 +p1 +4)>> 3 L [q0']
+ vrshrn.u16 d13, q0, #3 @(2*(p0+q0+q1)+q2 +p1 +4)>> 3 H [q0']
+ @ q0" if (Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) FALSE
+ vaddl.u8 q8, d8, d8 @2*q1 L
+ vaddl.u8 q0, d9, d9 @2*q1 H
+ vaddw.u8 q8, q8, d4 @2*q1+q0 L
+ vaddw.u8 q0, q0, d5 @2*q1+q0 H
+ vaddw.u8 q8, q8, d10 @2*q1+q0+p1 L
+ vaddw.u8 q0, q0, d11 @2*q1+q0+p1 H
+ vrshrn.u16 d16, q8, #2 @(2*q1+q0+p1+2)>>2 L [q0"]
+ vrshrn.u16 d17, q0, #2 @(2*q1+q0+p1+2)>>2 H [q0"]
+ @ q1'
+ vaddw.u8 q14, q14, d14 @p0+q0+q1+q2 L
+ vaddw.u8 q15, q15, d15 @p0+q0+q1+q2 H
+ vld1.8 {q0}, [r0], r1 @load q3 to Q0, q0 = q0 + src_strd
+ vbit q8, q6, q11 @choosing between q0' and q0" depending on condn
+ sub r0, r0, r1, lsl #2 @pointer to q0
+ vbic q11, q11, q9 @((ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta))
+ @ && (Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2))
+ vrshrn.u16 d12, q14, #2 @(p0+q0+q1+q2+2)>>2 L [q1']
+ vrshrn.u16 d13, q15, #2 @(p0+q0+q1+q2+2)>>2 H [q1']
+ vbif q2, q8, q9 @choose q0 or filtered q0
+ @ q2'
+ vaddl.u8 q8, d14, d0 @q2+q3,L
+ vaddl.u8 q0, d15, d1 @q2+q3,H
+ vadd.i16 q14, q14, q8 @p0+q0+q1+2*q2+q3 L
+ vst1.8 {d4, d5}, [r0], r1 @store q0
+ vadd.i16 q15, q15, q0 @p0+q0+q1+2*q2+q3 H
+ vadd.i16 q14, q14, q8 @p0+q0+q1+3*q2+2*q3 L
+ vadd.i16 q15, q15, q0 @p0+q0+q1+3*q2+2*q3 H
+ vrshrn.u16 d0, q14, #3 @(p0+q0+q1+3*q2+2*q3+4)>>3 L [q2']
+ vrshrn.u16 d1, q15, #3 @(p0+q0+q1+3*q2+2*q3+4)>>3 H [q2']
+ vld1.8 {d30, d31}, [r3] @load p2 to Q15
+ vbif q6, q4, q11 @choose q1 or filtered value of q1
+
+ vabd.u8 q8, q15, q3 @Ap,ABS(p2 - p0)
+ vaddw.u8 q12, q12, d10 @p0+q0+p1 L
+ vbif q0, q7, q11 @choose q2 or filtered q2
+ vaddw.u8 q13, q13, d11 @p0+q0+p1 H
+ vst1.8 {d12, d13}, [r0], r1 @store q1
+ vclt.u8 q8, q8, q1 @Ap < Beta
+ vadd.i16 q14, q12, q12 @2*(p0+q0+p1) L
+ vadd.i16 q2, q13, q13 @2*(p0+q0+p1) H
+ vst1.8 {d0, d1}, [r0], r1 @store q2
+ vand q10, q10, q8 @((Ap < Beta) && (ABS(p0 - q0) <((Alpha >>2) + 2)))
+ vaddw.u8 q14, q14, d30 @2*(p0+q0+p1)+p2 l
+ vaddw.u8 q2, q2, d31 @2*(p0+q0+p1)+p2 H
+ vaddw.u8 q14, q14, d8 @2*(p0+q0+p1)+p2+q1 L
+ vaddw.u8 q2, q2, d9 @2*(p0+q0+p1)+p2+q1 H
+ vrshrn.u16 d28, q14, #3 @(2*(p0+q0+p1)+p2+q1+4)>>3 L,p0'
+ vrshrn.u16 d29, q2, #3 @(2*(p0+q0+p1)+p2+q1+4)>>3 H,p0'
+ vmov.i8 d0, #2
+ vmov.i16 d1, #2
+ vaddl.u8 q1, d6, d8 @p0+q1 L
+ vmlal.u8 q1, d10, d0 @2*p1+p0+q1 L
+ vaddl.u8 q8, d7, d9 @p0+q1 H
+ vmlal.u8 q8, d11, d0 @2*p1+p0+q1 H
+ vaddw.u8 q6, q12, d30 @(p0+q0+p1) +p2 L
+ vld1.8 {d24, d25}, [r2] @load p3,Q12
+ vaddw.u8 q2, q13, d31 @(p0+q0+p1) +p2 H
+ vaddl.u8 q4, d30, d24 @p2+p3 L
+ vrshrn.u16 d26, q6, #2 @((p0+q0+p1)+p2 +2)>>2,p1' L
+ vrshrn.u16 d2, q1, #2 @(2*p1+p0+q1+2)>>2,p0"L
+ vrshrn.u16 d27, q2, #2 @((p0+q0+p1)+p2 +2)>>2,p1' H
+ vrshrn.u16 d3, q8, #2 @(2*p1+p0+q1+2)>>2,p0" H
+ vaddl.u8 q8, d31, d25 @p2+p3 H
+ vmla.u16 q6, q4, d1[0] @(p0+q0+p1)+3*p2+2*p3 L
+ vmla.u16 q2, q8, d1[0] @(p0+q0+p1)+3*p2+2*p3 H
+ vbic q8, q10, q9 @((ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta))
+ @&& (Ap < Beta && ABS(p0 - q0) <((Alpha >>2) + 2))
+ vbit q1, q14, q10 @choosing between po' and p0"
+ vrshrn.u16 d12, q6, #3 @((p0+q0+p1)+3*p2+2*p3+4)>>3 L p2'
+ vrshrn.u16 d13, q2, #3 @((p0+q0+p1)+3*p2+2*p3+4)>>3 H p2'
+ vbif q3, q1, q9 @choosing between p0 and filtered value of p0
+ vbit q5, q13, q8 @choosing between p1 and p1'
+ vbit q15, q6, q8 @choosing between p2 and p2'
+ vst1.8 {d6, d7}, [r12] @store p0
+ vst1.8 {d10, d11}, [r14] @store p1
+ vst1.8 {d30, d31}, [r3] @store p2
+ vpop {d8 - d15}
+ ldmfd sp!, {r12, pc}
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Performs filtering of a luma block vertical edge for cases where the
+@* boundary strength is less than 4
+@*
+@* @par Description:
+@* This operation is described in Sec. 8.7.2.4 under the title
+@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+@*
+@* @param[in] r0 - pu1_src
+@* Pointer to the src sample q0
+@*
+@* @param[in] r1 - src_strd
+@* Source stride
+@*
+@* @param[in] r2 - alpha
+@* Alpha Value for the boundary
+@*
+@* @param[in] r3 - beta
+@* Beta Value for the boundary
+@*
+@* @param[in] sp(0) - u4_bs
+@* Packed Boundary strength array
+@*
+@* @param[in] sp(4) - pu1_cliptab
+@* tc0_table
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+ .global ih264_deblk_luma_vert_bslt4_a9
+
+ih264_deblk_luma_vert_bslt4_a9:
+
+ stmfd sp!, {r12, lr}
+
+ sub r0, r0, #4 @pointer uc_edgePixel-4
+ ldr r12, [sp, #8] @r12 = ui_Bs
+ ldr r14, [sp, #12] @r14 = *puc_ClpTab
+ vpush {d8 - d15}
+ @loading p3:p2:p1:p0:q0:q1:q2:q3 for every row
+ vld1.8 {d0}, [r0], r1 @row1
+ vld1.8 d2, [r0], r1 @row2
+ vld1.8 d4, [r0], r1 @row3
+ rev r12, r12 @reversing ui_bs
+ vld1.8 d6, [r0], r1 @row4
+ vmov.32 d18[0], r12 @d12[0] = ui_Bs
+ vld1.32 d16[0], [r14] @D16[0] contains cliptab
+ vld1.8 d8, [r0], r1 @row5
+ vmovl.u8 q9, d18 @q6 = uc_Bs in each 16 bt scalar
+ vld1.8 d10, [r0], r1 @row6
+ vld1.8 d12, [r0], r1 @row7
+ vtbl.8 d16, {d16}, d18 @puc_ClipTab[uc_Bs]
+ vld1.8 d14, [r0], r1 @row8
+ vld1.8 d1, [r0], r1 @row9
+ vmovl.u16 q8, d16 @
+ vld1.8 d3, [r0], r1 @row10
+ vld1.8 d5, [r0], r1 @row11
+ vld1.8 d7, [r0], r1 @row12
+ vsli.32 q8, q8, #8 @
+ vld1.8 d9, [r0], r1 @row13
+ vld1.8 d11, [r0], r1 @row14
+ vld1.8 d13, [r0], r1 @row15
+ vsli.32 q8, q8, #16 @Q8 = C0
+ vld1.8 d15, [r0], r1 @row16
+
+ @taking two 8x8 transposes
+ @2X2 transposes
+ vtrn.8 d0, d2 @row1 &2
+ vtrn.8 d4, d6 @row3&row4
+ vtrn.8 d8, d10 @row5&6
+ vtrn.8 d12, d14 @row7 & 8
+ vtrn.8 d1, d3 @row9 &10
+ vtrn.8 d5, d7 @row11 & 12
+ vtrn.8 d9, d11 @row13 &14
+ vtrn.8 d13, d15 @row15 & 16
+ @4x4 transposes
+ vtrn.16 d2, d6 @row2 & row4
+ vtrn.16 d10, d14 @row6 & row8
+ vtrn.16 d3, d7 @row10 & 12
+ vtrn.16 d11, d15 @row14 & row16
+ vtrn.32 d6, d14 @row4 & 8
+ vtrn.32 d7, d15 @row 12 & 16
+
+ @now Q3 ->p0 and Q7->q3
+ vtrn.16 d0, d4 @row1 & 3
+ vtrn.16 d8, d12 @row 5 & 7
+ vtrn.16 d1, d5 @row9 & row11
+ vtrn.16 d9, d13 @row13 & row15
+ vtrn.32 d0, d8 @row1 & row5
+ vtrn.32 d1, d9 @row9 & 13
+
+ @now Q0->p3 & Q4->q0
+ @starting processing as p0 and q0 are now ready
+ vtrn.32 d2, d10 @row2 &6
+ vrhadd.u8 q10, q3, q4 @((p0 + q0 + 1) >> 1)
+ vtrn.32 d3, d11 @row10&row14
+ vmov.i8 d19, #2
+ @now Q1->p2 & Q5->q1
+ vtrn.32 d4, d12 @row3 & 7
+ vabd.u8 q11, q3, q4 @ABS(p0 - q0)
+ vtrn.32 d5, d13 @row11 & row15
+ vaddl.u8 q12, d20, d2 @(p2 + ((p0 + q0 + 1) >> 1) L
+ @now Q2->p1,Q6->q2
+ vaddl.u8 q13, d21, d3 @(p2 + ((p0 + q0 + 1) >> 1) H
+ vmlsl.u8 q12, d4, d19 @(p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) L
+ vmlsl.u8 q13, d5, d19 @(p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) H
+ vdup.8 q14, r2 @alpha
+ vcle.u8 q11, q14, q11 @ABS(p0 - q0) >= Alpha(Alpha <=ABS(p0 - q0))
+ vdup.i8 q14, r3 @beta
+ vabd.u8 q15, q5, q4 @ABS(q1 - q0)
+ vqshrn.s16 d24, q12, #1 @((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1) L
+ vqshrn.s16 d25 , q13, #1 @((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1) H
+ vcge.u8 q15, q15, q14 @ABS(q1 - q0) >= Beta
+ vabd.u8 q13, q2, q3 @ABS(p1 - p0)
+ vmin.s8 q12, q12, q8 @min(deltap1 ,C0)
+ vorr q11, q11, q15 @ABS(q1 - q0) >= Beta ||ABS(p0 - q0) >= Alpha
+ vneg.s8 q15, q8 @-C0
+ vcge.u8 q13, q13, q14 @ABS(p1 - p0) >= Beta
+ vmax.s8 q12, q12, q15 @max(deltap1,-C0)
+ vorr q11, q11, q13 @ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta)
+ vmovl.u16 q13, d18 @ui_bs
+ vaddl.u8 q9, d20, d12 @q2 + ((p0 + q0 + 1) >> 1) L
+ vceq.u32 q13, q13, #0 @ui_bs == 0
+ vsubw.u8 q9, q9, d10 @(q2 + ((p0 + q0 + 1) >> 1) - q1) L
+ vaddl.u8 q10, d21, d13 @q2 + ((p0 + q0 + 1) >> 1) H
+ vsubw.u8 q9, q9, d10 @(q2 + ((p0 + q0 + 1) >> 1) - 2*q1)L
+ vsubw.u8 q10, q10, d11 @(q2 + ((p0 + q0 + 1) >> 1) - q1) H
+ vorr q13, q13, q11 @(ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta)) &&(ui_bs)
+ vsubw.u8 q10, q10, d11 @(q2 + ((p0 + q0 + 1) >> 1) - 2*q1) H
+ vqshrn.s16 d18, q9, #1 @((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1) L
+ vabd.u8 q11, q1, q3 @Ap = ABS(p2 - p0)
+ vqshrn.s16 d19, q10, #1 @((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1) H
+ vabd.u8 q10, q6, q4 @Aq= ABS(q2 - q0)
+ vclt.u8 q11, q11, q14 @Ap < Beta
+ vmin.s8 q9, q9, q8 @min(delatq1,C0)
+ vclt.u8 q10, q10, q14 @Aq <Beta
+ vsubl.u8 q14, d8, d6 @(q0 - p0) L
+ vmax.s8 q9, q9, q15 @max(deltaq1,-C0)
+ vsubl.u8 q15, d9, d7 @(q0 - p0) H
+ vshl.s16 q14, q14, #2 @(q0 - p0)<<2 L
+ vsub.u8 q8, q8, q11 @C0 + (Ap < Beta)
+ vshl.s16 q15, q15, #2 @(q0 - p0) << 2) H
+ vaddw.u8 q14, q14, d4 @((q0 - p0) << 2) + (p1 L
+ vaddw.u8 q15, q15, d5 @((q0 - p0) << 2) + (p1 H
+ vsubw.u8 q14, q14, d10 @((q0 - p0) << 2) + (p1 - q1) L
+ vsubw.u8 q15, q15, d11 @((q0 - p0) << 2) + (p1 - q1) H
+ vbic q11, q11, q13 @final condition for p1
+ vrshrn.s16 d28, q14, #3 @delta = ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3); L
+ vrshrn.s16 d29, q15, #3 @delta = ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3) H
+ vsub.u8 q8, q8, q10 @C0 + (Ap < Beta) + (Aq < Beta)
+ vbic q10, q10, q13 @final condition for q1
+ vabs.s8 q15, q14 @abs(delta)
+ vand q12, q12, q11 @delatp1
+ vand q9, q9, q10 @delta q1
+ vmin.u8 q15, q15, q8 @min((abs(delta),C)
+ vadd.i8 q2, q2, q12 @p1+deltap1
+ vadd.i8 q5, q5, q9 @q1+deltaq1
+ vbic q15, q15, q13 @abs(delta) of pixels to be changed only
+ vcge.s8 q14, q14, #0 @sign(delta)
+ vqsub.u8 q11, q3, q15 @clip(p0-delta)
+ vtrn.8 d0, d2 @row1 &2
+ vqadd.u8 q3, q3, q15 @clip(p0+delta)
+ vtrn.8 d1, d3 @row9 &10
+ vqadd.u8 q12, q4, q15 @clip(q0+delta)
+ vtrn.8 d12, d14 @row7 & 8
+ vqsub.u8 q4, q4, q15 @clip(q0-delta)
+ vtrn.8 d13, d15 @row15 & 16
+ vbif q3, q11, q14 @p0
+ vbif q4, q12, q14 @q0
+ vtrn.8 d4, d6 @row3&row4
+ vtrn.8 d8, d10 @row5&6
+ vtrn.8 d5, d7 @row11 & 12
+ vtrn.8 d9, d11 @row13 &14
+ vtrn.16 d2, d6 @row2 & row4
+ vtrn.16 d10, d14 @row6 & row8
+ vtrn.16 d3, d7 @row10 & 12
+ vtrn.16 d11, d15 @row14 & row16
+ vtrn.32 d6, d14 @row4 & 8
+ vtrn.32 d7, d15 @row 12 & 16
+ @now Q3 ->p0 and Q7->q3
+ vtrn.16 d0, d4 @row1 & 3
+ vtrn.16 d8, d12 @row 5 & 7
+ vtrn.16 d1, d5 @row9 & row11
+ vtrn.16 d9, d13 @row13 & row15
+ sub r0, r0, r1, lsl#4 @restore pointer
+ vtrn.32 d0, d8 @row1 & row5
+ vtrn.32 d1, d9 @row9 & 13
+ vtrn.32 d2, d10 @row2 &6
+ vtrn.32 d3, d11 @row10&row14
+ vtrn.32 d4, d12 @row3 & 7
+ vtrn.32 d5, d13 @row11 & row15
+ vst1.8 {d0}, [r0], r1 @row1
+ vst1.8 d2, [r0], r1 @row2
+ vst1.8 d4, [r0], r1 @row3
+ vst1.8 d6, [r0], r1 @row4
+ vst1.8 d8, [r0], r1 @row5
+ vst1.8 d10, [r0], r1 @row6
+ vst1.8 d12, [r0], r1 @row7
+ vst1.8 d14, [r0], r1 @row8
+ vst1.8 d1, [r0], r1 @row9
+ vst1.8 d3, [r0], r1 @row10
+ vst1.8 d5, [r0], r1 @row11
+ vst1.8 d7, [r0], r1 @row12
+ vst1.8 d9, [r0], r1 @row13
+ vst1.8 d11, [r0], r1 @row14
+ vst1.8 d13, [r0], r1 @row15
+ vst1.8 d15, [r0], r1 @row16
+ vpop {d8 - d15}
+ ldmfd sp!, {r12, pc}
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Performs filtering of a luma block vertical edge when the
+@* boundary strength is set to 4
+@*
+@* @par Description:
+@* This operation is described in Sec. 8.7.2.4 under the title
+@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+@*
+@* @param[in] r0 - pu1_src
+@* Pointer to the src sample q0
+@*
+@* @param[in] r1 - src_strd
+@* Source stride
+@*
+@* @param[in] r2 - alpha
+@* Alpha Value for the boundary
+@*
+@* @param[in] r3 - beta
+@* Beta Value for the boundary
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+ .global ih264_deblk_luma_vert_bs4_a9
+
+ih264_deblk_luma_vert_bs4_a9:
+
+ stmfd sp!, {r12, lr}
+ vpush {d8 - d15}
+ sub r0, r0, #4 @pointer uc_edgePixel-4
+ @loading p3:p2:p1:p0:q0:q1:q2:q3 for every row
+ vld1.8 d0, [r0], r1 @row1
+ vld1.8 d2, [r0], r1 @row2
+ vld1.8 d4, [r0], r1 @row3
+ vld1.8 d6, [r0], r1 @row4
+ vld1.8 d8, [r0], r1 @row5
+ vld1.8 d10, [r0], r1 @row6
+ vld1.8 d12, [r0], r1 @row7
+ vld1.8 d14, [r0], r1 @row8
+ vld1.8 d1, [r0], r1 @row9
+ vld1.8 d3, [r0], r1 @row10
+ vld1.8 d5, [r0], r1 @row11
+ vld1.8 d7, [r0], r1 @row12
+ vld1.8 d9, [r0], r1 @row13
+ vld1.8 d11, [r0], r1 @row14
+ vld1.8 d13, [r0], r1 @row15
+ vld1.8 d15, [r0], r1 @row16
+ @taking two 8x8 transposes
+ @2X2 transposes
+ vtrn.8 d0, d2 @row1 &2
+ vtrn.8 d4, d6 @row3&row4
+ vtrn.8 d8, d10 @row5&6
+ vtrn.8 d12, d14 @row7 & 8
+ vtrn.8 d1, d3 @row9 &10
+ vtrn.8 d5, d7 @row11 & 12
+ vtrn.8 d9, d11 @row13 &14
+ vtrn.8 d13, d15 @row15 & 16
+ @4x4 transposes
+ vtrn.16 d2, d6 @row2 & row4
+ vtrn.16 d10, d14 @row6 & row8
+ vtrn.16 d3, d7 @row10 & 12
+ vtrn.16 d11, d15 @row14 & row16
+ vtrn.32 d6, d14 @row4 & 8
+ vtrn.32 d7, d15 @row 12 & 16
+ @now Q3 ->p0 and Q7->q3
+ vtrn.16 d0, d4 @row1 & 3
+ vtrn.16 d8, d12 @row 5 & 7
+ vtrn.16 d1, d5 @row9 & row11
+ vtrn.16 d9, d13 @row13 & row15
+ vtrn.32 d0, d8 @row1 & row5
+ vtrn.32 d1, d9 @row9 & 13
+ @now Q0->p3 & Q4->q0
+ @starting processing as p0 and q0 are now ready
+ @now Q1->p2 & Q5->q1
+ vpush {q7} @saving in stack
+ vtrn.32 d4, d12 @row3 & 7
+ vmov.i16 q14, #2
+ vtrn.32 d5, d13 @row11 & row15
+ vaddl.u8 q8, d6, d8 @p0+q0 L
+ vtrn.32 d2, d10 @row2 &6
+ vaddl.u8 q9, d7, d9 @p0+q0 H
+ vtrn.32 d3, d11 @row10&row14
+ vaddw.u8 q10, q8, d4 @p0+q0+p1 L
+ vaddw.u8 q11, q9, d5 @p0+q0+p1 H
+ vaddl.u8 q12, d2, d10 @p2+q1 L
+ vaddl.u8 q13, d3, d11 @p2+q1 H
+ vmla.u16 q12, q10, q14 @p2 + X2(p1) + X2(p0) + X2(q0) + q1 L
+ vmla.u16 q13, q11, q14 @p2 + X2(p1) + X2(p0) + X2(q0) + q1 H
+ vmov.i8 q14, #2
+ vaddw.u8 q8, q10, d2 @p0+q0+p1+p2 L
+ vaddw.u8 q9, q11, d3 @p0+q0+p1+p2 H
+ vdup.i8 q15, r2 @duplicate alpha
+ vrshrn.u16 d20, q8, #2 @(p2 + p1 + p0 + q0 + 2) >> 2)L p1'
+ vrshrn.u16 d21, q9, #2 @(p2 + p1 + p0 + q0 + 2) >> 2)H p1'
+ vabd.u8 q11, q3, q4 @ABD(p0-q0)
+ vsra.u8 q14, q15, #2 @alpha >>2 +2
+ vabd.u8 q15, q1, q3 @Ap = ABD(p2-p0)
+ vrshrn.u16 d24, q12, #3 @((p2 + X2(p1) + X2(p0) + X2(q0) + q1 + 4) >> 3) L p0'
+ vrshrn.u16 d25, q13, #3 @((p2 + X2(p1) + X2(p0) + X2(q0) + q1 + 4) >> 3) H p0'
+ vdup.i8 q13, r3 @beta
+ vcgt.u8 q14, q14, q11 @ABS(p0 - q0) <((Alpha >>2) + 2)
+ vaddl.u8 q11, d6, d10 @p0+q1 L
+ vcgt.u8 q7, q13, q15 @beta>Ap
+ vaddl.u8 q15, d7, d11 @p0+q1 H
+ vaddw.u8 q11, q11, d4 @p0+q1+p1 L
+ vaddw.u8 q15, q15, d5 @p0+q1+p1 H
+ vaddw.u8 q11, q11, d4 @p0+q1+2*p1 L
+ vaddw.u8 q15, q15, d5 @p0+q1+2*p1 H
+ vand q7, q7, q14 @(Ap < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)
+ vrshrn.u16 d22, q11, #2 @((X2(p1) + p0 + q1 + 2) >> 2) L p0"
+ vrshrn.u16 d23, q15, #2 @((X2(p1) + p0 + q1 + 2) >> 2) H p0"
+ vaddl.u8 q15, d2, d0 @p2+p3 L
+ vbif q12, q11, q7 @p0' or p0 "
+ vaddl.u8 q11, d3, d1 @p2+p3 H
+ vadd.u16 q15, q15, q15 @2*(p2+p3) L
+ vadd.u16 q11, q11, q11 @2*(p2+p3)H
+ vadd.u16 q8, q8, q15 @(X2(p3) + X3(p2) + p1 + p0 + q0) L
+ vadd.u16 q9, q9, q11 @(X2(p3) + X3(p2) + p1 + p0 + q0) H
+ vabd.u8 q15, q6, q4 @Aq = abs(q2-q0)
+ vabd.u8 q11, q5, q4 @ABS(Q1-Q0)
+ vrshrn.u16 d16, q8, #3 @((X2(p3) + X3(p2) + p1 + p0 + q0 + 4) >> 3); L p2'
+ vrshrn.u16 d17, q9, #3 @((X2(p3) + X3(p2) + p1 + p0 + q0 + 4) >> 3); H p2'
+ vabd.u8 q9, q2, q3 @ABS(p1-p0)
+ vcgt.u8 q15, q13, q15 @Aq < Beta
+ vcge.u8 q11, q11, q13 @ABS(q1 - q0) >= Beta
+ vcge.u8 q9, q9, q13 @ABS(p1 - p0) >= beta
+ vdup.i8 q13, r2 @duplicate alpha
+ vand q15, q15, q14 @(Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2))
+ vabd.u8 q14, q3, q4 @abs(p0-q0)
+ vorr q11, q11, q9 @ABS(p1 - p0) >= Beta || ABS(q1 - q0) >= Beta
+ vaddl.u8 q9, d6, d8 @p0+q0 L
+ vcge.u8 q14, q14, q13 @ABS(p0 - q0) >= Alpha
+ vaddl.u8 q13, d7, d9 @p0+q0 H
+ vaddw.u8 q9, q9, d10 @p0+q0+q1 L
+ vorr q11, q11, q14 @ABS(p1 - p0) >= Beta || ABS(q1 - q0) >= Beta||ABS(p0 - q0) >= Alpha
+ vaddw.u8 q13, q13, d11 @p0+q0+q1 H
+ vbic q7, q7, q11 @final condn for p's
+ vmov.i8 q14, #2
+ vbif q3, q12, q11 @final p0
+ vbit q1, q8, q7 @final p2
+ vbif q10, q2, q7 @final p1
+ vaddl.u8 q12, d8, d4 @q0+p1 L
+ vmlal.u8 q12, d10, d28 @X2(q1) + q0 + p1 L
+ vaddl.u8 q8, d9, d5 @q0+p1 H
+ vmlal.u8 q8, d11, d28 @X2(q1) + q0 + p1 H
+ vmov.i16 q14, #2
+ vaddl.u8 q7, d4, d12 @p1+q2 L
+ vmla.u16 q7, q9, q14 @p1 + X2(p0) + X2(q0) + X2(q1) + q2L
+ vaddl.u8 q2, d5, d13 @p1+q2H
+ vmla.u16 q2, q13, q14 @p1 + X2(p0) + X2(q0) + X2(q1) + q2H
+ vrshrn.u16 d24, q12, #2 @(X2(q1) + q0 + p1 + 2) >> 2; L q0'
+ vrshrn.u16 d25, q8, #2 @(X2(q1) + q0 + p1 + 2) >> 2; H q0'
+ vaddw.u8 q9, q9, d12 @p0 + q0 + q1 + q2 L
+ vaddw.u8 q13, q13, d13 @p0 + q0 + q1 + q2 H
+ vrshrn.u16 d16, q7, #3 @(p1 + X2(p0) + X2(q0) + X2(q1) + q2 + 4) >> 3 L qo"
+ vpop {q7}
+ vrshrn.u16 d17, q2, #3 @(p1 + X2(p0) + X2(q0) + X2(q1) + q2 + 4) >> 3 H qo"
+ vrshrn.u16 d4, q9, #2 @p0 + q0 + q1 + q2 + 2)>>2 L q1'
+ vrshrn.u16 d5, q13, #2 @p0 + q0 + q1 + q2 + 2)>>2 H q1'
+ vbit q12, q8, q15 @q0' or q0"
+ vbic q15, q15, q11 @final condn for q's
+ vtrn.8 d0, d2 @row1 &2
+ vbit q5, q2, q15 @final q1
+ vtrn.8 d1, d3 @row9 &10
+ vaddl.u8 q8, d12, d14 @q2+q3 L
+ vtrn.8 d20, d6 @row3&row4
+ vaddl.u8 q2, d13, d15 @q2+q3 H
+ vtrn.8 d21, d7 @row11 & 12
+ vmla.u16 q9, q8, q14 @X2(q3) + X3(q2) + q1 + q0 + p0 L
+ vtrn.16 d2, d6 @row2 & row4
+ vmla.u16 q13, q2, q14 @X2(q3) + X3(q2) + q1 + q0 + p0 H
+ vtrn.16 d3, d7 @row10 & 12
+ vbif q4, q12, q11 @final q0
+ vtrn.16 d0, d20 @row1 & 3
+ vrshrn.u16 d18, q9, #3 @(X2(q3) + X3(q2) + q1 + q0 + p0 + 4) >> 3; L
+ vtrn.16 d1, d21 @row9 & row11
+ vrshrn.u16 d19, q13, #3 @(X2(q3) + X3(q2) + q1 + q0 + p0 + 4) >> 3; H
+ vtrn.8 d8, d10 @row5&6
+ vbit q6, q9, q15 @final q2
+ vtrn.8 d9, d11 @row13 &14
+ vtrn.8 d12, d14 @row7 & 8
+ vtrn.8 d13, d15 @row15 & 16
+ vtrn.16 d10, d14 @row6 & row8
+ vtrn.16 d11, d15 @row14 & row16
+ @now Q3 ->p0 and Q7->q3
+ vtrn.16 d8, d12 @row 5 & 7
+ vtrn.16 d9, d13 @row13 & row15
+ sub r0, r0, r1, lsl#4 @restore pointer
+ vtrn.32 d6, d14 @row4 & 8
+ vtrn.32 d7, d15 @row 12 & 16
+ vtrn.32 d0, d8 @row1 & row5
+ vtrn.32 d1, d9 @row9 & 13
+ vtrn.32 d2, d10 @row2 &6
+ vtrn.32 d3, d11 @row10&row14
+ vtrn.32 d20, d12 @row3 & 7
+ vtrn.32 d21, d13 @row11 & row15
+ vst1.8 d0, [r0], r1 @row1
+ vst1.8 d2, [r0], r1 @row2
+ vst1.8 d20, [r0], r1 @row3
+ vst1.8 d6, [r0], r1 @row4
+ vst1.8 d8, [r0], r1 @row5
+ vst1.8 d10, [r0], r1 @row6
+ vst1.8 d12, [r0], r1 @row7
+ vst1.8 d14, [r0], r1 @row8
+ vst1.8 d1, [r0], r1 @row9
+ vst1.8 d3, [r0], r1 @row10
+ vst1.8 d21, [r0], r1 @row11
+ vst1.8 d7, [r0], r1 @row12
+ vst1.8 d9, [r0], r1 @row13
+ vst1.8 d11, [r0], r1 @row14
+ vst1.8 d13, [r0], r1 @row15
+ vst1.8 d15, [r0], r1 @row16
+ vpop {d8 - d15}
+ ldmfd sp!, {r12, pc}
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Performs filtering of a luma block vertical edge when the
+@* boundary strength is set to 4 on calling twice
+@*
+@* @par Description:
+@* This operation is described in Sec. 8.7.2.4 under the title
+@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+@*
+@* @param[in] r0 - pu1_src
+@* Pointer to the src sample q0
+@*
+@* @param[in] r1 - src_strd
+@* Source stride
+@*
+@* @param[in] r2 - alpha
+@* Alpha Value for the boundary
+@*
+@* @param[in] r3 - beta
+@* Beta Value for the boundary
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+ .global ih264_deblk_luma_vert_bs4_mbaff_a9
+
+ih264_deblk_luma_vert_bs4_mbaff_a9:
+
+ stmfd sp!, {lr}
+
+ sub r0, r0, #4 @pointer uc_edgePixel-4
+ vpush {d8 - d15}
+ @loading [p3:p2],[p1:p0]:[q0:q1]:[q2:q3] for every row
+ vld4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r1
+ vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r0], r1
+ vld4.16 {d0[2], d2[2], d4[2], d6[2]}, [r0], r1
+ vld4.16 {d0[3], d2[3], d4[3], d6[3]}, [r0], r1
+ vld4.16 {d1[0], d3[0], d5[0], d7[0]}, [r0], r1
+ vld4.16 {d1[1], d3[1], d5[1], d7[1]}, [r0], r1
+ vld4.16 {d1[2], d3[2], d5[2], d7[2]}, [r0], r1
+ vld4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0], r1
+
+ vuzp.8 d0, d1 @D0->p3, D1->p2
+ vuzp.8 d2, d3 @D2->p1, D3->p0
+ vuzp.8 d4, d5 @D4->q0, D5->q1
+ vuzp.8 d6, d7 @D6->q2, D7->q3
+
+ vmov.i16 q14, #2
+ vaddl.u8 q4, d3, d4 @p0+q0
+ vaddw.u8 q5, q4, d2 @p0+q0+p1
+ vaddl.u8 q6, d1, d5 @p2+q1
+ vmla.u16 q6, q5, q14 @p2 + X2(p1) + X2(p0) + X2(q0) + q1
+
+ vmov.i8 d14, #2
+ vaddw.u8 q4, q5, d1 @p0+q0+p1+p2
+ vdup.i8 d15, r2 @duplicate alpha
+ vrshrn.u16 d10, q4, #2 @(p2 + p1 + p0 + q0 + 2) >> 2) p1'
+ vabd.u8 d11, d3, d4 @ABD(p0-q0)
+ vsra.u8 d14, d15, #2 @alpha >>2 +2
+ vabd.u8 d15, d1, d3 @Ap = ABD(p2-p0)
+ vrshrn.u16 d12, q6, #3 @((p2 + X2(p1) + X2(p0) + X2(q0) + q1 + 4) >> 3) p0'
+ vdup.i8 d13, r3 @beta
+ vcgt.u8 d14, d14, d11 @ABS(p0 - q0) <((Alpha >>2) + 2)
+ vaddl.u8 q8, d3, d5 @p0+q1
+ vcgt.u8 d26, d13, d15 @beta>Ap
+ vaddw.u8 q8, q8, d2 @p0+q1+p1
+ vaddw.u8 q8, q8, d2 @p0+q1+2*p1
+ vand d26, d26, d14 @(Ap < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)
+ vrshrn.u16 d11, q8, #2 @((X2(p1) + p0 + q1 + 2) >> 2) p0"
+ vbif d12, d11, d26 @p0' or p0 "
+ vaddl.u8 q9, d1, d0 @p2+p3
+ vadd.u16 q9, q9, q9 @2*(p2+p3)
+ vadd.u16 q4, q4, q9 @(X2(p3) + X3(p2) + p1 + p0 + q0)
+ vabd.u8 d15, d6, d4 @Aq = abs(q2-q0)
+ vabd.u8 d11, d5, d4 @ABS(q1-q0)
+ vrshrn.u16 d8, q4, #3 @((X2(p3) + X3(p2) + p1 + p0 + q0 + 4) >> 3); p2'
+ vabd.u8 d9, d2, d3 @ABS(p1-p0)
+ vcgt.u8 d15, d13, d15 @Aq < Beta
+ vcge.u8 d11, d11, d13 @ABS(q1 - q0) >= Beta
+ vcge.u8 d9, d9, d13 @ABS(p1 - p0) >= beta
+ vdup.i8 d13, r2 @duplicate alpha
+ vand d15, d15, d14 @(Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2))
+ vabd.u8 d14, d3, d4 @abs(p0-q0)
+ vorr d11, d11, d9 @ABS(p1 - p0) >= Beta || ABS(q1 - q0) >= Beta
+ vcge.u8 d14, d14, d13 @ABS(p0 - q0) >= Alpha
+ vaddl.u8 q10, d3, d4 @p0+q0
+ vorr d11, d11, d14 @ABS(p1 - p0) >= Beta || ABS(q1 - q0) >= Beta||ABS(p0 - q0) >= Alpha
+ vaddw.u8 q10, q10, d5 @p0+q0+q1
+ vbic d26, d26, d11 @final condn for p's
+ vmov.i8 d14, #2
+ vbif d3, d12, d11 @final p0
+ vbit d1, d8, d26 @final p2
+ vbif d10, d2, d26 @final p1
+ vaddl.u8 q6, d4, d2 @q0+p1
+ vmlal.u8 q6, d5, d14 @X2(q1) + q0 + p1
+
+ vaddl.u8 q11, d2, d6 @p1+q2
+ vmla.u16 q11, q10, q14 @p1 + X2(p0) + X2(q0) + X2(q1) + q2
+ vrshrn.u16 d12, q6, #2 @(X2(q1) + q0 + p1 + 2) >> 2; q0'
+ vaddw.u8 q10, q10, d6 @p0 + q0 + q1 + q2
+ vrshrn.u16 d8, q11, #3 @(p1 + X2(p0) + X2(q0) + X2(q1) + q2 + 4) >> 3 qo"
+
+ vrshrn.u16 d2, q10, #2 @p0 + q0 + q1 + q2 + 2)>>2 q1'
+ vbit d12, d8, d15 @q0' or q0"
+ vbic d15, d15, d11 @final condn for q's
+ vbit d5, d2, d15 @final q1
+ vaddl.u8 q12, d6, d7 @q2+q3
+ vmla.u16 q10, q12, q14 @X2(q3) + X3(q2) + q1 + q0 + p0
+ vbif d4, d12, d11 @final q0
+ vrshrn.u16 d9, q10, #3 @(X2(q3) + X3(q2) + q1 + q0 + p0 + 4) >> 3;
+ vbit d6, d9, d15 @final q2
+ vand d2, d10, d10 @D0->p3, D1->p2, D2->p1, D3->p0, D4->q0, D5->q1, D6->q2, D7->q3
+
+ vzip.8 d0, d1 @D0,D1 -> [p3:p2]
+ vzip.8 d2, d3 @D2,D3 -> [p1:p0]
+ vzip.8 d4, d5 @D4,D5 -> [q0:q1]
+ vzip.8 d6, d7 @D6,D7 -> [q2:q3]
+
+ sub r0, r0, r1, lsl#3 @restore pointer
+
+ @storing [p3:p2],[p1:p0]:[q0:q1]:[q2:q3] in every row
+ vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r1
+ vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r0], r1
+ vst4.16 {d0[2], d2[2], d4[2], d6[2]}, [r0], r1
+ vst4.16 {d0[3], d2[3], d4[3], d6[3]}, [r0], r1
+ vst4.16 {d1[0], d3[0], d5[0], d7[0]}, [r0], r1
+ vst4.16 {d1[1], d3[1], d5[1], d7[1]}, [r0], r1
+ vst4.16 {d1[2], d3[2], d5[2], d7[2]}, [r0], r1
+ vst4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0], r1
+ vpop {d8 - d15}
+ ldmfd sp!, {pc}
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Performs filtering of a luma block vertical edge for cases where the
+@* boundary strength is less than 4 on calling twice
+@*
+@* @par Description:
+@* This operation is described in Sec. 8.7.2.4 under the title
+@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+@*
+@* @param[in] r0 - pu1_src
+@* Pointer to the src sample q0
+@*
+@* @param[in] r1 - src_strd
+@* Source stride
+@*
+@* @param[in] r2 - alpha
+@* Alpha Value for the boundary
+@*
+@* @param[in] r3 - beta
+@* Beta Value for the boundary
+@*
+@* @param[in] sp(0) - u4_bs
+@* Packed Boundary strength array
+@*
+@* @param[in] sp(4) - pu1_cliptab
+@* tc0_table
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+ .global ih264_deblk_luma_vert_bslt4_mbaff_a9
+
+ih264_deblk_luma_vert_bslt4_mbaff_a9:
+
+ stmfd sp!, {r12, lr}
+
+ sub r0, r0, #4 @pointer uc_edgePixel-4
+ ldr r12, [sp, #8] @r12 = ui_Bs
+ ldr r14, [sp, #12] @r14 = pu1_ClipTab
+ vpush {d8 - d15}
+ @loading [p3:p2],[p1:p0]:[q0:q1]:[q2:q3] for every row
+ vld4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r1
+ vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r0], r1
+ vld4.16 {d0[2], d2[2], d4[2], d6[2]}, [r0], r1
+ vld4.16 {d0[3], d2[3], d4[3], d6[3]}, [r0], r1
+ vld4.16 {d1[0], d3[0], d5[0], d7[0]}, [r0], r1
+ vld4.16 {d1[1], d3[1], d5[1], d7[1]}, [r0], r1
+ vld4.16 {d1[2], d3[2], d5[2], d7[2]}, [r0], r1
+ vld4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0], r1
+
+ vuzp.8 d0, d1 @D0->p3, D1->p2
+ vuzp.8 d2, d3 @D2->p1, D3->p0
+ vuzp.8 d4, d5 @D4->q0, D5->q1
+ vuzp.8 d6, d7 @D6->q2, D7->q3
+
+ rev r12, r12 @reversing ui_bs
+ vmov.32 d8[0], r12 @D8[0] = ui_Bs
+ vld1.32 d9[0], [r14] @D9[0] contains cliptab
+ vmovl.u8 q15, d8 @D30 = ui_Bs in each 16 bt scalar
+ vtbl.8 d8, {d9}, d30 @puc_ClipTab[ui_Bs]
+ vsli.16 d8, d8, #8 @D8 = C0
+
+ vrhadd.u8 d10, d3, d4 @((p0 + q0 + 1) >> 1)
+ vmov.i8 d31, #2
+ vabd.u8 d11, d3, d4 @ABS(p0 - q0)
+ vaddl.u8 q6, d10, d1 @(p2 + ((p0 + q0 + 1) >> 1)
+ vmlsl.u8 q6, d2, d31 @(p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1))
+ vdup.8 d14, r2 @alpha
+ vcle.u8 d11, d14, d11 @ABS(p0 - q0) >= Alpha(Alpha <=ABS(p0 - q0))
+ vdup.i8 d14, r3 @beta
+ vabd.u8 d15, d5, d4 @ABS(q1 - q0)
+ vqshrn.s16 d12, q6, #1 @((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1)
+ vcge.u8 d15, d15, d14 @ABS(q1 - q0) >= Beta
+ vabd.u8 d13, d2, d3 @ABS(p1 - p0)
+ vmin.s8 d12, d12, d8 @min(deltap1 ,C0)
+ vorr d11, d11, d15 @ABS(q1 - q0) >= Beta ||ABS(p0 - q0) >= Alpha
+ vneg.s8 d15, d8 @-C0
+ vcge.u8 d13, d13, d14 @ABS(p1 - p0) >= Beta
+ vmax.s8 d12, d12, d15 @max(deltap1,-C0)
+ vorr d11, d11, d13 @ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta)
+ vceq.u16 d13, d30, #0 @ui_bs == 0
+ vaddl.u8 q14, d10, d6 @q2 + ((p0 + q0 + 1) >> 1)
+ vsubw.u8 q14, q14, d5 @q2 + ((p0 + q0 + 1) >> 1) - q1
+ vsubw.u8 q14, q14, d5 @q2 + ((p0 + q0 + 1) >> 1) - 2*q1
+ vorr d13, d13, d11 @(ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta))
+ @|| (ui_bs == 0)
+ vqshrn.s16 d9, q14, #1 @(q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1
+ vabd.u8 d11, d1, d3 @Ap = ABS(p2 - p0)
+ vabd.u8 d10, d6, d4 @Aq= ABS(q2 - q0)
+ vclt.u8 d11, d11, d14 @Ap < Beta
+ vmin.s8 d9, d9, d8 @min(deltaq1,C0)
+ vclt.u8 d10, d10, d14 @Aq < Beta
+ vmax.s8 d9, d9, d15 @max(deltaq1,-C0)
+ vsubl.u8 q7, d4, d3 @q0 - p0
+ vshl.s16 q7, q7, #2 @(q0 - p0) << 2
+ vsub.u8 d8, d8, d11 @C0 + (Ap < Beta)
+ vaddw.u8 q7, q7, d2 @((q0 - p0) << 2) + p1
+ vsubw.u8 q7, q7, d5 @((q0 - p0) << 2) + (p1 - q1)
+ vbic d11, d11, d13 @final condition for p1
+ vrshr.s16 q15, q7, #3 @delta = (((q0 - p0) << 2) + (p1 - q1) + 4) >> 3
+ vsub.u8 d8, d8, d10 @C0 + (Ap < Beta) + (Aq < Beta)
+ vbic d10, d10, d13 @final condition for q1
+ vabs.s16 q14, q15
+ vmovn.i16 d15, q14 @abs(delta)
+ vand d12, d12, d11 @delatp1
+ vand d9, d9, d10 @deltaq1
+ vmin.u8 d15, d15, d8 @min((abs(delta),C)
+ vadd.i8 d2, d2, d12 @p1+deltap1
+ vadd.i8 d5, d5, d9 @q1+deltaq1
+ vbic d15, d15, d13 @abs(delta) of pixels to be changed only
+ vcge.s16 q14, q15, #0
+ vmovn.i16 d14, q14 @sign(delta)
+ vqsub.u8 d11, d3, d15 @clip(p0-delta)
+ vqadd.u8 d3, d3, d15 @clip(p0+delta)
+ vqadd.u8 d12, d4, d15 @clip(q0+delta)
+ vqsub.u8 d4, d4, d15 @clip(q0-delta)
+ vbif d3, d11, d14 @p0
+ vbif d4, d12, d14 @q0
+
+ sub r0, r0, r1, lsl#3 @restore pointer
+ @D0->p3, D1->p2, D2->p1, D3->p0, D4->q0, D5->q1, D6->q2, D7->q3
+ vzip.8 d0, d1 @D0,D1 -> [p3:p2]
+ vzip.8 d2, d3 @D2,D3 -> [p1:p0]
+ vzip.8 d4, d5 @D4,D5 -> [q0:q1]
+ vzip.8 d6, d7 @D6,D7 -> [q2:q3]
+
+ @storing [p3:p2],[p1:p0]:[q0:q1]:[q2:q3] in every row
+ vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r1
+ vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r0], r1
+ vst4.16 {d0[2], d2[2], d4[2], d6[2]}, [r0], r1
+ vst4.16 {d0[3], d2[3], d4[3], d6[3]}, [r0], r1
+ vst4.16 {d1[0], d3[0], d5[0], d7[0]}, [r0], r1
+ vst4.16 {d1[1], d3[1], d5[1], d7[1]}, [r0], r1
+ vst4.16 {d1[2], d3[2], d5[2], d7[2]}, [r0], r1
+ vst4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0], r1
+ vpop {d8 - d15}
+ ldmfd sp!, {r12, pc}
+
+
+
diff --git a/common/arm/ih264_default_weighted_pred_a9q.s b/common/arm/ih264_default_weighted_pred_a9q.s
new file mode 100755
index 0000000..94cda46
--- /dev/null
+++ b/common/arm/ih264_default_weighted_pred_a9q.s
@@ -0,0 +1,359 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@******************************************************************************
+@* @file
+@* ih264_default_weighted_pred_a9q.s
+@*
+@* @brief
+@* Contains function definitions for default weighted prediction.
+@* Functions are coded using NEON intrinsics and can be compiled using ARM RVCT
+@*
+@* @author
+@* Kaushik Senthoor R
+@*
+@* @par List of Functions:
+@*
+@* - ih264_default_weighted_pred_luma_a9q()
+@* - ih264_default_weighted_pred_chroma_a9q()
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@*******************************************************************************
+@* @function
+@* ih264_default_weighted_pred_luma_a9q()
+@*
+@* @brief
+@* This routine performs the default weighted prediction as described in sec
+@* 8.4.2.3.1 titled "Default weighted sample prediction process" for luma.
+@*
+@* @par Description:
+@* This function gets two ht x wd blocks, calculates their rounded-average and
+@* stores it in the destination block.
+@*
+@* @param[in] pu1_src1:
+@* UWORD8 Pointer to the buffer containing the first input block.
+@*
+@* @param[in] pu1_src2:
+@* UWORD8 Pointer to the buffer containing the second input block.
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination where the output block is stored.
+@*
+@* @param[in] src_strd1
+@* Stride of the first input buffer
+@*
+@* @param[in] src_strd2
+@* Stride of the second input buffer
+@*
+@* @param[in] dst_strd
+@* Stride of the destination buffer
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16).
+@*
+@*******************************************************************************
+@*/
+@void ih264_default_weighted_pred_luma_a9q(UWORD8 *pu1_src1,
+@ UWORD8 *pu1_src2,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd1,
+@ WORD32 src_strd2,
+@ WORD32 dst_strd,
+@ WORD32 ht,
+@ WORD32 wd)
+@
+@**************Variables Vs Registers*****************************************
+@ r0 => pu1_src1
+@ r1 => pu1_src2
+@ r2 => pu1_dst
+@ r3 => src_strd1
+@ [sp] => src_strd2 (r4)
+@ [sp+4] => dst_strd (r5)
+@ [sp+8] => ht (r6)
+@ [sp+12] => wd (r7)
+@
+.text
+.p2align 2
+
+ .global ih264_default_weighted_pred_luma_a9q
+
+ih264_default_weighted_pred_luma_a9q:
+
+ stmfd sp!, {r4-r7, r14} @stack stores the values of the arguments
+ ldr r7, [sp, #32] @Load wd
+ ldr r4, [sp, #20] @Load src_strd2
+ ldr r5, [sp, #24] @Load dst_strd
+ cmp r7, #16
+ ldr r6, [sp, #28] @Load ht
+ vpush {d8-d15}
+ beq loop_16 @branch if wd is 16
+ cmp r7, #8
+ beq loop_8 @branch if wd is 8
+
+loop_4: @each iteration processes four rows
+
+ vld1.32 d0[0], [r0], r3 @load row 1 in source 1
+ vld1.32 d0[1], [r0], r3 @load row 2 in source 1
+ vld1.32 d2[0], [r1], r4 @load row 1 in source 2
+ vld1.32 d2[1], [r1], r4 @load row 2 in source 2
+
+ vld1.32 d1[0], [r0], r3 @load row 3 in source 1
+ vld1.32 d1[1], [r0], r3 @load row 4 in source 1
+ vrhadd.u8 d0, d0, d2
+ vld1.32 d3[0], [r1], r4 @load row 3 in source 2
+ vld1.32 d3[1], [r1], r4 @load row 4 in source 2
+
+ subs r6, r6, #4 @decrement ht by 4
+ vst1.32 d0[0], [r2], r5 @load row 1 in destination
+ vst1.32 d0[1], [r2], r5 @load row 2 in destination
+ vrhadd.u8 d1, d1, d3
+ vst1.32 d1[0], [r2], r5 @load row 3 in destination
+ vst1.32 d1[1], [r2], r5 @load row 4 in destination
+
+ bgt loop_4 @if greater than 0 repeat the loop again
+
+ b end_loops
+
+loop_8: @each iteration processes four rows
+
+ vld1.8 d0, [r0], r3 @load row 1 in source 1
+ vld1.8 d4, [r1], r4 @load row 1 in source 2
+ vld1.8 d1, [r0], r3 @load row 2 in source 1
+ vld1.8 d5, [r1], r4 @load row 2 in source 2
+ vld1.8 d2, [r0], r3 @load row 3 in source 1
+ vrhadd.u8 q0, q0, q2
+ vld1.8 d6, [r1], r4 @load row 3 in source 2
+ vld1.8 d3, [r0], r3 @load row 4 in source 1
+ vrhadd.u8 d2, d2, d6
+ vld1.8 d7, [r1], r4 @load row 4 in source 2
+
+ subs r6, r6, #4 @decrement ht by 4
+ vst1.8 d0, [r2], r5 @load row 1 in destination
+ vrhadd.u8 d3, d3, d7
+ vst1.8 d1, [r2], r5 @load row 2 in destination
+ vst1.8 d2, [r2], r5 @load row 3 in destination
+ vst1.8 d3, [r2], r5 @load row 4 in destination
+
+ bgt loop_8 @if greater than 0 repeat the loop again
+
+ b end_loops
+
+loop_16: @each iteration processes eight rows
+
+ vld1.8 {q0}, [r0], r3 @load row 1 in source 1
+ vld1.8 {q8}, [r1], r4 @load row 1 in source 2
+ vld1.8 {q1}, [r0], r3 @load row 2 in source 1
+ vld1.8 {q9}, [r1], r4 @load row 2 in source 2
+ vrhadd.u8 q0, q0, q8
+ vld1.8 {q2}, [r0], r3 @load row 3 in source 1
+ vld1.8 {q10}, [r1], r4 @load row 3 in source 2
+ vrhadd.u8 q1, q1, q9
+ vld1.8 {q3}, [r0], r3 @load row 4 in source 1
+ vld1.8 {q11}, [r1], r4 @load row 4 in source 2
+ vrhadd.u8 q2, q2, q10
+ vld1.8 {q4}, [r0], r3 @load row 5 in source 1
+ vld1.8 {q12}, [r1], r4 @load row 5 in source 2
+ vrhadd.u8 q3, q3, q11
+ vld1.8 {q5}, [r0], r3 @load row 6 in source 1
+ vld1.8 {q13}, [r1], r4 @load row 6 in source 2
+ vrhadd.u8 q4, q4, q12
+ vld1.8 {q6}, [r0], r3 @load row 7 in source 1
+ vld1.8 {q14}, [r1], r4 @load row 7 in source 2
+ vrhadd.u8 q5, q5, q13
+ vld1.8 {q7}, [r0], r3 @load row 8 in source 1
+ vld1.8 {q15}, [r1], r4 @load row 8 in source 2
+
+ vrhadd.u8 q6, q6, q14
+ vst1.8 {q0}, [r2], r5 @load row 1 in destination
+ vst1.8 {q1}, [r2], r5 @load row 2 in destination
+ vrhadd.u8 q7, q7, q15
+ vst1.8 {q2}, [r2], r5 @load row 3 in destination
+ vst1.8 {q3}, [r2], r5 @load row 4 in destination
+ subs r6, r6, #8 @decrement ht by 8
+ vst1.8 {q4}, [r2], r5 @load row 5 in destination
+ vst1.8 {q5}, [r2], r5 @load row 6 in destination
+ vst1.8 {q6}, [r2], r5 @load row 7 in destination
+ vst1.8 {q7}, [r2], r5 @load row 8 in destination
+
+ bgt loop_16 @if greater than 0 repeat the loop again
+
+end_loops:
+
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r7, r15} @Reload the registers from sp
+
+
+@*******************************************************************************
+@* @function
+@* ih264_default_weighted_pred_chroma_a9q()
+@*
+@* @brief
+@* This routine performs the default weighted prediction as described in sec
+@* 8.4.2.3.1 titled "Default weighted sample prediction process" for chroma.
+@*
+@* @par Description:
+@* This function gets two ht x wd blocks, calculates their rounded-average and
+@* stores it in the destination block for U and V.
+@*
+@* @param[in] pu1_src1:
+@* UWORD8 Pointer to the buffer containing the first input block.
+@*
+@* @param[in] pu1_src2:
+@* UWORD8 Pointer to the buffer containing the second input block.
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination where the output block is stored.
+@*
+@* @param[in] src_strd1
+@* Stride of the first input buffer
+@*
+@* @param[in] src_strd2
+@* Stride of the second input buffer
+@*
+@* @param[in] dst_strd
+@* Stride of the destination buffer
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8).
+@*
+@*******************************************************************************
+@*/
+@void ih264_default_weighted_pred_chroma_a9q(UWORD8 *pu1_src1,
+@ UWORD8 *pu1_src2,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd1,
+@ WORD32 src_strd2,
+@ WORD32 dst_strd,
+@ WORD32 ht,
+@ WORD32 wd)
+@
+@**************Variables Vs Registers*****************************************
+@ r0 => pu1_src1
+@ r1 => pu1_src2
+@ r2 => pu1_dst
+@ r3 => src_strd1
+@ [sp] => src_strd2 (r4)
+@ [sp+4] => dst_strd (r5)
+@ [sp+8] => ht (r6)
+@ [sp+12] => wd (r7)
+@
+
+
+ .global ih264_default_weighted_pred_chroma_a9q
+
+ih264_default_weighted_pred_chroma_a9q:
+
+ stmfd sp!, {r4-r7, r14} @stack stores the values of the arguments
+ ldr r7, [sp, #32] @Load wd
+ ldr r4, [sp, #20] @Load src_strd2
+ ldr r5, [sp, #24] @Load dst_strd
+ cmp r7, #8
+ ldr r6, [sp, #28] @Load ht
+ vpush {d8-d15}
+ beq loop_8_uv @branch if wd is 8
+ cmp r7, #4
+ beq loop_4_uv @branch if wd is 4
+
+loop_2_uv: @each iteration processes two rows
+
+ vld1.32 d0[0], [r0], r3 @load row 1 in source 1
+ vld1.32 d0[1], [r0], r3 @load row 2 in source 1
+
+ vld1.32 d1[0], [r1], r4 @load row 1 in source 2
+ vld1.32 d1[1], [r1], r4 @load row 2 in source 2
+
+ vrhadd.u8 d0, d0, d1
+
+ subs r6, r6, #2 @decrement ht by 2
+ vst1.32 d0[0], [r2], r5 @load row 1 in destination
+ vst1.32 d0[1], [r2], r5 @load row 2 in destination
+
+ bgt loop_2_uv @if greater than 0 repeat the loop again
+
+ b end_loops_uv
+
+loop_4_uv: @each iteration processes two rows
+
+ vld1.8 d0, [r0], r3 @load row 1 in source 1
+ vld1.8 d2, [r1], r4 @load row 1 in source 2
+ vld1.8 d1, [r0], r3 @load row 2 in source 1
+ vrhadd.u8 d0, d0, d2
+ vld1.8 d3, [r1], r4 @load row 2 in source 2
+
+ vrhadd.u8 d1, d1, d3
+ vst1.8 d0, [r2], r5 @load row 1 in destination
+ subs r6, r6, #2 @decrement ht by 2
+ vst1.8 d1, [r2], r5 @load row 2 in destination
+
+ bgt loop_4_uv @if greater than 0 repeat the loop again
+
+ b end_loops_uv
+
+loop_8_uv: @each iteration processes four rows
+
+ vld1.8 {q0}, [r0], r3 @load row 1 in source 1
+ vld1.8 {q4}, [r1], r4 @load row 1 in source 2
+ vld1.8 {q1}, [r0], r3 @load row 2 in source 1
+ vrhadd.u8 q0, q0, q4
+ vld1.8 {q5}, [r1], r4 @load row 2 in source 2
+ vld1.8 {q2}, [r0], r3 @load row 3 in source 1
+ vrhadd.u8 q1, q1, q5
+ vld1.8 {q6}, [r1], r4 @load row 3 in source 2
+ vld1.8 {q3}, [r0], r3 @load row 4 in source 1
+ vrhadd.u8 q2, q2, q6
+ vld1.8 {q7}, [r1], r4 @load row 4 in source 2
+
+ vst1.8 {q0}, [r2], r5 @load row 1 in destination
+ vrhadd.u8 q3, q3, q7
+ vst1.8 {q1}, [r2], r5 @load row 2 in destination
+ subs r6, r6, #4 @decrement ht by 4
+ vst1.8 {q2}, [r2], r5 @load row 3 in destination
+ vst1.8 {q3}, [r2], r5 @load row 4 in destination
+
+ bgt loop_8_uv @if greater than 0 repeat the loop again
+
+end_loops_uv:
+
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r7, r15} @Reload the registers from sp
+
+
diff --git a/common/arm/ih264_ihadamard_scaling_a9.s b/common/arm/ih264_ihadamard_scaling_a9.s
new file mode 100755
index 0000000..687099a
--- /dev/null
+++ b/common/arm/ih264_ihadamard_scaling_a9.s
@@ -0,0 +1,250 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@ *******************************************************************************
+@ * @file
+@ * ih264_ihadamard_scaling_a9.s
+@ *
+@ * @brief
+@ * Contains function definitions for inverse hadamard transform on 4x4 DC outputs
+@ * of 16x16 intra-prediction
+@ *
+@ * @author
+@ * Mohit
+@ *
+@ * @par List of Functions:
+@ * - ih264_ihadamard_scaling_4x4_a9()
+@ * - ih264_ihadamard_scaling_2x2_uv_a9()
+@ *
+@ * @remarks
+@ * None
+@ *
+@ *******************************************************************************
+@ */
+@ * @brief This function performs a 4x4 inverse hadamard transform on the 4x4 DC coefficients
+@ * of a 16x16 intra prediction macroblock, and then performs scaling.
+@ * prediction buffer
+@ *
+@ * @par Description:
+@ * The DC coefficients pass through a 2-stage inverse hadamard transform.
+@ * This inverse transformed content is scaled to based on Qp value.
+@ *
+@ * @param[in] pi2_src
+@ * input 4x4 block of DC coefficients
+@ *
+@ * @param[out] pi2_out
+@ * output 4x4 block
+@ *
+@ * @param[in] pu2_iscal_mat
+@ * pointer to scaling list
+@ *
+@ * @param[in] pu2_weigh_mat
+@ * pointer to weight matrix
+@ *
+@ * @param[in] u4_qp_div_6
+@ * Floor (qp/6)
+@ *
+@ * @param[in] pi4_tmp
+@ * temporary buffer of size 1*16
+@ *
+@ * @returns none
+@ *
+@ * @remarks none
+@ *
+@ *******************************************************************************
+@ */
+@ *
+@ *******************************************************************************
+@ */
+@ void ih264_ihadamard_scaling_4x4(WORD16* pi2_src,
+@ WORD16* pi2_out,
+@ const UWORD16 *pu2_iscal_mat,
+@ const UWORD16 *pu2_weigh_mat,
+@ UWORD32 u4_qp_div_6,
+@ WORD32* pi4_tmp)
+@**************Variables Vs Registers*****************************************
+@r0 => *pi2_src
+@r1 => *pi2_out
+@r2 => *pu2_iscal_mat
+@r3 => *pu2_weigh_mat
+@r4 => u4_qp_div_6
+
+.text
+.p2align 2
+
+ .global ih264_ihadamard_scaling_4x4_a9
+
+ih264_ihadamard_scaling_4x4_a9:
+
+@VLD4.S16 is used because the pointer is incremented by SUB_BLK_WIDTH_4x4
+@If the macro value changes need to change the instruction according to it.
+@Only one shift is done in horizontal inverse because,
+@if u4_qp_div_6 is lesser than 4 then shift value will be neagative and do negative left shift, in this case rnd_factor has value
+@if u4_qp_div_6 is greater than 4 then shift value will be positive and do left shift, here rnd_factor is 0
+
+ stmfd sp!, {r4-r12, r14} @ stack stores the values of the arguments
+ ldr r4, [sp, #40] @ Loads u4_qp_div_6
+ vdup.s32 q10, r4 @ Populate the u4_qp_div_6 in Q10
+ ldrh r6, [r3] @ load pu2_weight_mat[0] , H for unsigned halfword load
+ ldrh r7, [r2] @ load pu2_iscal_mat[0] , H for unsigned halfword load
+ mul r6, r6, r7 @ pu2_iscal_mat[0]*pu2_weigh_mat[0]
+ vdup.s32 q9, r6 @ Populate pu2_iscal_mat[0]*pu2_weigh_mat[0] 32-bit in Q9
+ vpush {d8-d15}
+@=======================INVERSE HADAMARD TRANSFORM================================
+
+ vld4.s16 {d0, d1, d2, d3}, [r0] @load x4,x5,x6,x7
+ vaddl.s16 q12, d0, d3 @x0 = x4 + x7
+ vaddl.s16 q13, d1, d2 @x1 = x5 + x6
+ vsubl.s16 q14, d1, d2 @x2 = x5 - x6
+ vsubl.s16 q15, d0, d3 @x3 = x4 - x7
+
+ vadd.s32 q2, q12, q13 @pi4_tmp_ptr[0] = x0 + x1
+ vadd.s32 q3, q15, q14 @pi4_tmp_ptr[1] = x3 + x2
+ vsub.s32 q4, q12, q13 @pi4_tmp_ptr[2] = x0 - x1
+ vsub.s32 q5, q15, q14 @pi4_tmp_ptr[3] = x3 - x2
+
+ vtrn.32 q2, q3 @Transpose the register for vertical transform
+ vtrn.32 q4, q5
+
+ vswp d5, d8 @Q2 = x4, Q4 = x6
+ vswp d7, d10 @Q3 = x5, Q5 = x7
+
+
+ vadd.s32 q12, q2, q5 @x0 = x4+x7
+ vadd.s32 q13, q3, q4 @x1 = x5+x6
+ vsub.s32 q14, q3, q4 @x2 = x5-x6
+ vsub.s32 q15, q2, q5 @x3 = x4-x7
+
+ vadd.s32 q0, q12, q13 @pi4_tmp_ptr[0] = x0 + x1
+ vadd.s32 q1, q15, q14 @pi4_tmp_ptr[1] = x3 + x2
+ vsub.s32 q2, q12, q13 @pi4_tmp_ptr[2] = x0 - x1
+ vsub.s32 q3, q15, q14 @pi4_tmp_ptr[3] = x3 - x2
+
+
+ vmul.s32 q0, q0, q9 @ Q0 = p[i] = (x[i] * trns_coeff[i]) where i = 0..3
+ vmul.s32 q1, q1, q9 @ Q1 = p[i] = (x[i] * trns_coeff[i]) where i = 4..7
+ vmul.s32 q2, q2, q9 @ Q2 = p[i] = (x[i] * trns_coeff[i]) where i = 8..11
+ vmul.s32 q3, q3, q9 @ Q3 = p[i] = (x[i] * trns_coeff[i]) where i = 12..15
+
+ vshl.s32 q0, q0, q10 @ Q0 = q[i] = (p[i] << (qP/6)) where i = 0..3
+ vshl.s32 q1, q1, q10 @ Q1 = q[i] = (p[i] << (qP/6)) where i = 4..7
+ vshl.s32 q2, q2, q10 @ Q2 = q[i] = (p[i] << (qP/6)) where i = 8..11
+ vshl.s32 q3, q3, q10 @ Q3 = q[i] = (p[i] << (qP/6)) where i = 12..15
+
+ vqrshrn.s32 d0, q0, #0x6 @ D0 = c[i] = ((q[i] + 32) >> 4) where i = 0..3
+ vqrshrn.s32 d1, q1, #0x6 @ D1 = c[i] = ((q[i] + 32) >> 4) where i = 4..7
+ vqrshrn.s32 d2, q2, #0x6 @ D2 = c[i] = ((q[i] + 32) >> 4) where i = 8..11
+ vqrshrn.s32 d3, q3, #0x6 @ D3 = c[i] = ((q[i] + 32) >> 4) where i = 12..15
+
+ vst1.s16 {d0, d1, d2, d3}, [r1] @IV row store the value
+
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, r15} @Reload the registers from SP
+
+
+
+@ *******************************************************************************
+@ */
+@ * @brief This function performs a 2x2 inverse hadamard transform for chroma block
+@ *
+@ * @par Description:
+@ * The DC coefficients pass through a 2-stage inverse hadamard transform.
+@ * This inverse transformed content is scaled to based on Qp value.
+@ * Both DC blocks of U and v blocks are processesd
+@ *
+@ * @param[in] pi2_src
+@ * input 1x8 block of ceffs. First 4 are from U and next from V
+@ *
+@ * @param[out] pi2_out
+@ * output 1x8 block
+@ *
+@ * @param[in] pu2_iscal_mat
+@ * pointer to scaling list
+@ *
+@ * @param[in] pu2_weigh_mat
+@ * pointer to weight matrix
+@ *
+@ * @param[in] u4_qp_div_6
+@ * Floor (qp/6)
+@ *
+@ * @returns none
+@ *
+@ * @remarks none
+@ *
+@ *******************************************************************************
+@ */
+@ *
+@ *******************************************************************************
+@ */
+@ void ih264_ihadamard_scaling_2x2_uv(WORD16* pi2_src,
+@ WORD16* pi2_out,
+@ const UWORD16 *pu2_iscal_mat,
+@ const UWORD16 *pu2_weigh_mat,
+@ UWORD32 u4_qp_div_6,
+
+ .global ih264_ihadamard_scaling_2x2_uv_a9
+ih264_ihadamard_scaling_2x2_uv_a9:
+
+@Registers used
+@ r0 : *pi2_src
+@ r1 : *pi2_out
+@ r2 : *pu2_iscal_mat
+@ r3 : *pu2_weigh_mat
+
+ vld1.u16 d26[0], [r2]
+ vld1.u16 d27[0], [r3]
+ vmull.u16 q15, d26, d27 @pu2_iscal_mat[0] * pu2_weigh_mat[0]
+ vdup.u32 q15, d30[0]
+
+ vld1.u16 d28[0], [sp] @load qp/6
+
+ vpush {d8-d15}
+
+ vmov.u16 d29, #5
+ vsubl.u16 q14, d28, d29 @qp\6 - 5
+ vdup.s32 q14, d28[0]
+
+ vld2.s16 {d0, d1}, [r0] @load 8 dc coeffs
+ @i2_x4,i2_x6,i2_y4,i1_y6 -> d0
+ @i2_x5,i2_x7,i2_y5,i1_y6 -> d1
+
+ vaddl.s16 q1, d0, d1 @ i4_x0 = i4_x4 + i4_x5;...x2
+ vsubl.s16 q2, d0, d1 @ i4_x1 = i4_x4 - i4_x5;...x3
+
+ vtrn.s32 q1, q2 @i4_x0 i4_x1 -> q1
+
+ vadd.s32 q3, q1, q2 @i4_x4 = i4_x0+i4_x2;.. i4_x5
+ vsub.s32 q1, q1, q2 @i4_x6 = i4_x0-i4_x2;.. i4_x7
+
+ vmul.s32 q5, q3, q15
+ vmul.s32 q6, q1, q15
+
+ vshl.s32 q7, q5, q14
+ vshl.s32 q8, q6, q14
+
+ vmovn.s32 d18, q7 @i4_x4 i4_x5 i4_y4 i4_y5
+ vmovn.s32 d19, q8 @i4_x6 i4_x7 i4_y6 i4_y7
+
+ vst2.s32 {d18-d19}, [r1]
+
+ vpop {d8-d15}
+ bx lr
+
+
diff --git a/common/arm/ih264_inter_pred_chroma_a9q.s b/common/arm/ih264_inter_pred_chroma_a9q.s
new file mode 100755
index 0000000..afd2860
--- /dev/null
+++ b/common/arm/ih264_inter_pred_chroma_a9q.s
@@ -0,0 +1,254 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@******************************************************************************
+@* @file
+@* ih264_inter_pred_chroma_a9q.s
+@*
+@* @brief
+@* Contains function definitions for inter prediction interpolation.
+@*
+@* @author
+@* Ittaim
+@*
+@* @par List of Functions:
+@*
+@* - ih264_inter_pred_chroma_a9q()
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@
+
+@/**
+@/**
+@/**
+@
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Interprediction chroma filter
+@*
+@* @par Description:
+@* Applies filtering to chroma samples as mentioned in
+@* sec 8.4.2.2.2 titled "chroma sample interpolation process"
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source containing alternate U and V samples
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in]uc_dx
+@* dx value where the sample is to be produced(refer sec 8.4.2.2.2 )
+@*
+@* @param[in] uc_dy
+@* dy value where the sample is to be produced(refer sec 8.4.2.2.2 )
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@void ih264_inter_pred_chroma(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ UWORD8 u1_dx,
+@ UWORD8 u1_dy,
+@ WORD32 ht,
+@ WORD32 wd)
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => u1_dx
+@ r5 => u1_dy
+@ r6 => height
+@ r7 => width
+@
+.text
+.p2align 2
+
+ .global ih264_inter_pred_chroma_a9q
+
+ih264_inter_pred_chroma_a9q:
+
+
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ vstmdb sp!, {d8-d15} @push neon registers to stack
+ ldr r4, [sp, #104]
+ ldr r5, [sp, #108]
+ ldr r6, [sp, #112]
+ ldr r7, [sp, #116]
+
+ rsb r8, r4, #8 @8-u1_dx
+ rsb r9, r5, #8 @8-u1_dy
+ mul r10, r8, r9
+ mul r11, r4, r9
+
+ vdup.u8 d28, r10
+ vdup.u8 d29, r11
+
+ mul r10, r8, r5
+ mul r11, r4, r5
+
+ vdup.u8 d30, r10
+ vdup.u8 d31, r11
+
+ subs r12, r7, #2 @if wd=4 branch to loop_4
+ beq loop_2
+ subs r12, r7, #4 @if wd=8 branch to loop_8
+ beq loop_4
+
+loop_8:
+ sub r6, #1
+ vld1.8 {d0, d1, d2}, [r0], r2 @ Load row0
+ vld1.8 {d5, d6, d7}, [r0], r2 @ Load row1
+ vext.8 d3, d0, d1, #2
+ vext.8 d8, d5, d6, #2
+
+ vmull.u8 q5, d0, d28
+ vmlal.u8 q5, d5, d30
+ vmlal.u8 q5, d3, d29
+ vmlal.u8 q5, d8, d31
+ vext.8 d9, d6, d7, #2
+ vext.8 d4, d1, d2, #2
+
+inner_loop_8:
+ vmull.u8 q6, d6, d30
+ vmlal.u8 q6, d1, d28
+ vmlal.u8 q6, d9, d31
+ vmlal.u8 q6, d4, d29
+ vmov d0, d5
+ vmov d3, d8
+
+ vqrshrun.s16 d14, q5, #6
+ vmov d1, d6
+ vmov d4, d9
+
+ vld1.8 {d5, d6, d7}, [r0], r2 @ Load row1
+ vqrshrun.s16 d15, q6, #6
+
+ vext.8 d8, d5, d6, #2
+ subs r6, #1
+ vext.8 d9, d6, d7, #2
+ vst1.8 {q7}, [r1], r3 @ Store dest row
+
+ vmull.u8 q5, d0, d28
+ vmlal.u8 q5, d5, d30
+ vmlal.u8 q5, d3, d29
+ vmlal.u8 q5, d8, d31
+ bne inner_loop_8
+
+ vmull.u8 q6, d6, d30
+ vmlal.u8 q6, d1, d28
+ vmlal.u8 q6, d9, d31
+ vmlal.u8 q6, d4, d29
+
+ vqrshrun.s16 d14, q5, #6
+ vqrshrun.s16 d15, q6, #6
+
+ vst1.8 {q7}, [r1], r3 @ Store dest row
+
+ b end_func
+
+loop_4:
+ sub r6, #1
+ vld1.8 {d0, d1}, [r0], r2 @ Load row0
+ vld1.8 {d2, d3}, [r0], r2 @ Load row1
+ vext.8 d1, d0, d1, #2
+ vext.8 d3, d2, d3, #2
+
+ vmull.u8 q2, d2, d30
+ vmlal.u8 q2, d0, d28
+ vmlal.u8 q2, d3, d31
+ vmlal.u8 q2, d1, d29
+
+inner_loop_4:
+ subs r6, #1
+ vmov d0, d2
+ vmov d1, d3
+
+ vld1.8 {d2, d3}, [r0], r2 @ Load row1
+ vqrshrun.s16 d6, q2, #6
+
+ vext.8 d3, d2, d3, #2
+ vst1.8 {d6}, [r1], r3 @ Store dest row
+
+ vmull.u8 q2, d0, d28
+ vmlal.u8 q2, d2, d30
+ vmlal.u8 q2, d1, d29
+ vmlal.u8 q2, d3, d31
+ bne inner_loop_4
+
+ vqrshrun.s16 d6, q2, #6
+ vst1.8 {d6}, [r1], r3 @ Store dest row
+
+ b end_func
+
+loop_2:
+ vld1.8 {d0}, [r0], r2 @ Load row0
+ vext.8 d1, d0, d0, #2
+ vld1.8 {d2}, [r0], r2 @ Load row1
+ vext.8 d3, d2, d2, #2
+ vmull.u8 q2, d0, d28
+ vmlal.u8 q2, d1, d29
+ vmlal.u8 q2, d2, d30
+ vmlal.u8 q2, d3, d31
+ vld1.8 {d6}, [r0] @ Load row2
+ vqrshrun.s16 d4, q2, #6
+ vext.8 d7, d6, d6, #2
+ vst1.32 d4[0], [r1], r3 @ Store dest row0
+ vmull.u8 q4, d2, d28
+ vmlal.u8 q4, d3, d29
+ vmlal.u8 q4, d6, d30
+ vmlal.u8 q4, d7, d31
+ subs r6, #2
+ vqrshrun.s16 d8, q4, #6
+ vst1.32 d8[0], [r1], r3 @ Store dest row1
+ bne loop_2 @ repeat if ht=2
+
+end_func:
+ vldmia sp!, {d8-d15} @ Restore neon registers that were saved
+ ldmfd sp!, {r4-r12, pc} @ Restoring registers from stack
+
diff --git a/common/arm/ih264_inter_pred_filters_luma_horz_a9q.s b/common/arm/ih264_inter_pred_filters_luma_horz_a9q.s
new file mode 100755
index 0000000..ea6bba0
--- /dev/null
+++ b/common/arm/ih264_inter_pred_filters_luma_horz_a9q.s
@@ -0,0 +1,245 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@******************************************************************************
+@* @file
+@* ih264_inter_pred_luma_horz_a9q.s
+@*
+@* @brief
+@* Contains function definitions for inter prediction interpolation.
+@*
+@* @author
+@* Ittiam
+@*
+@* @par List of Functions:
+@*
+@* - ih264_inter_pred_luma_horz_a9q()
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@
+
+@/**
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Interprediction luma filter for horizontal input
+@*
+@* @par Description:
+@* Applies a 6 tap horizontal filter .The output is clipped to 8 bits
+@* sec 8.4.2.2.1 titled "Luma sample interpolation process"
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @returns
+@*
+@ @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@void ih264_inter_pred_luma_horz (
+@ UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ht,
+@ WORD32 wd )
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r5 => ht
+@ r6 => wd
+
+.text
+.p2align 2
+
+
+ .global ih264_inter_pred_luma_horz_a9q
+
+ih264_inter_pred_luma_horz_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ vstmdb sp!, {d8-d15} @push neon registers to stack
+ ldr r5, [sp, #104] @Loads ht
+ sub r0, r0, #2 @pu1_src-2
+ ldr r6, [sp, #108] @Loads wd
+ vmov.i8 d0, #5 @filter coeff
+ subs r12, r6, #8 @if wd=8 branch to loop_8
+ vmov.i8 d1, #20 @filter coeff
+ beq loop_8
+
+ subs r12, r6, #4 @if wd=4 branch to loop_4
+ beq loop_4
+
+loop_16: @when wd=16
+ @// Processing row0 and row1
+ vld1.8 {d2, d3, d4}, [r0], r2 @// Load row0 ;for checking loop
+ vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row0)
+ vld1.8 {d5, d6, d7}, [r0], r2 @// Load row1
+ vext.8 d30, d3, d4, #5 @//extract a[5] (column2,row0)
+ vaddl.u8 q4, d31, d2 @// a0 + a5 (column1,row0)
+ vext.8 d28, d5, d6, #5 @//extract a[5] (column1,row1)
+ vaddl.u8 q5, d30, d3 @// a0 + a5 (column2,row0)
+ vext.8 d27, d6, d7, #5 @//extract a[5] (column2,row1)
+ vaddl.u8 q7, d28, d5 @// a0 + a5 (column1,row1)
+ vext.8 d31, d2, d3, #2 @//extract a[2] (column1,row0)
+ vaddl.u8 q8, d27, d6 @// a0 + a5 (column2,row1)
+ vext.8 d30, d3, d4, #2 @//extract a[2] (column2,row0)
+ vmlal.u8 q4, d31, d1 @// a0 + a5 + 20a2 (column1,row0)
+ vext.8 d28, d5, d6, #2 @//extract a[2] (column1,row1)
+ vmlal.u8 q5, d30, d1 @// a0 + a5 + 20a2 (column2,row0)
+ vext.8 d27, d6, d7, #2 @//extract a[2] (column2,row1)
+ vmlal.u8 q7, d28, d1 @// a0 + a5 + 20a2 (column1,row1)
+ vext.8 d31, d2, d3, #3 @//extract a[3] (column1,row0)
+ vmlal.u8 q8, d27, d1 @// a0 + a5 + 20a2 (column2,row1)
+ vext.8 d30, d3, d4, #3 @//extract a[3] (column2,row0)
+ vmlal.u8 q4, d31, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ vext.8 d28, d5, d6, #3 @//extract a[3] (column1,row1)
+ vmlal.u8 q5, d30, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ vext.8 d27, d6, d7, #3 @//extract a[3] (column2,row1)
+ vmlal.u8 q7, d28, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row1)
+ vext.8 d31, d2, d3, #1 @//extract a[1] (column1,row0)
+ vmlal.u8 q8, d27, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row1)
+ vext.8 d30, d3, d4, #1 @//extract a[1] (column2,row0)
+ vmlsl.u8 q4, d31, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ vext.8 d28, d5, d6, #1 @//extract a[1] (column1,row1)
+ vmlsl.u8 q5, d30, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ vext.8 d27, d6, d7, #1 @//extract a[1] (column2,row1)
+ vmlsl.u8 q7, d28, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1)
+ vext.8 d31, d2, d3, #4 @//extract a[4] (column1,row0)
+ vmlsl.u8 q8, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row1)
+ vext.8 d30, d3, d4, #4 @//extract a[4] (column2,row0)
+ vmlsl.u8 q4, d31, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ vext.8 d28, d5, d6, #4 @//extract a[4] (column1,row1)
+ vmlsl.u8 q5, d30, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ vext.8 d27, d6, d7, #4 @//extract a[4] (column2,row1)
+ vmlsl.u8 q7, d28, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1)
+ vmlsl.u8 q8, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row1)
+ vqrshrun.s16 d20, q4, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ vqrshrun.s16 d21, q5, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row2)
+ vst1.8 {d20, d21}, [r1], r3 @//Store dest row0
+ vqrshrun.s16 d23, q7, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1)
+ vext.8 d30, d3, d4, #5 @//extract a[5] (column2,row2)
+ vqrshrun.s16 d24, q8, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row1)
+ vst1.8 {d23, d24}, [r1], r3 @//Store dest row1
+ subs r5, r5, #2 @ 2 rows done, decrement by 2
+
+ beq end_func
+ b loop_16 @ loop if height == 8 or 16
+
+loop_8:
+@// Processing row0 and row1
+ vld1.8 {d5, d6}, [r0], r2 @// Load row1
+ vext.8 d28, d5, d6, #5 @//extract a[5] (column1,row1)
+ vld1.8 {d2, d3}, [r0], r2 @// Load row0
+ vext.8 d25, d5, d6, #2 @//extract a[2] (column1,row1)
+ vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row0)
+ vext.8 d24, d5, d6, #3 @//extract a[3] (column1,row1)
+ vext.8 d23, d5, d6, #1 @//extract a[1] (column1,row1)
+ vext.8 d22, d5, d6, #4 @//extract a[4] (column1,row1)
+ vaddl.u8 q7, d28, d5 @// a0 + a5 (column1,row1)
+ vext.8 d29, d2, d3, #3 @//extract a[3] (column1,row0)
+ vmlal.u8 q7, d25, d1 @// a0 + a5 + 20a2 (column1,row1)
+ vmlal.u8 q7, d24, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row1)
+ vmlsl.u8 q7, d23, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1)
+ vmlsl.u8 q7, d22, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1)
+ vext.8 d30, d2, d3, #2 @//extract a[2] (column1,row0)
+ vaddl.u8 q4, d31, d2 @// a0 + a5 (column1,row0)
+ vext.8 d27, d2, d3, #1 @//extract a[1] (column1,row0)
+ vext.8 d26, d2, d3, #4 @//extract a[4] (column1,row0)
+ vmlal.u8 q4, d29, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ vmlal.u8 q4, d30, d1 @// a0 + a5 + 20a2 (column1,row0)
+ vmlsl.u8 q4, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ vmlsl.u8 q4, d26, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ vqrshrun.s16 d23, q7, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ vst1.8 {d23}, [r1], r3 @//Store dest row0
+ vqrshrun.s16 d20, q4, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1)
+ vst1.8 {d20}, [r1], r3 @//Store dest row1
+ subs r5, r5, #2 @ 2 rows done, decrement by 2
+
+ beq end_func @ Branch if height==4
+
+ b loop_8 @looping if height =8 or 16
+
+loop_4:
+ vld1.8 {d5, d6}, [r0], r2 @// Load row1
+ vext.8 d28, d5, d6, #5 @//extract a[5] (column1,row1)
+ vld1.8 {d2, d3}, [r0], r2 @// Load row0
+ vext.8 d25, d5, d6, #2 @//extract a[2] (column1,row1)
+ vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row0)
+ vaddl.u8 q7, d28, d5 @// a0 + a5 (column1,row1)
+ vext.8 d24, d5, d6, #3 @//extract a[3] (column1,row1)
+ vext.8 d23, d5, d6, #1 @//extract a[1] (column1,row1)
+ vext.8 d22, d5, d6, #4 @//extract a[4] (column1,row1)
+ vext.8 d29, d2, d3, #3 @//extract a[3] (column1,row0)
+ vmlal.u8 q7, d25, d1 @// a0 + a5 + 20a2 (column1,row1)
+ vmlal.u8 q7, d24, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row1)
+ vmlsl.u8 q7, d23, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1)
+ vmlsl.u8 q7, d22, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1)
+ vaddl.u8 q4, d31, d2 @// a0 + a5 (column1,row0)
+ vext.8 d30, d2, d3, #2 @//extract a[2] (column1,row0)
+ vext.8 d27, d2, d3, #1 @//extract a[1] (column1,row0)
+ vext.8 d26, d2, d3, #4 @//extract a[4] (column1,row0)
+ vmlal.u8 q4, d29, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ vmlal.u8 q4, d30, d1 @// a0 + a5 + 20a2 (column1,row0)
+ vmlsl.u8 q4, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ vmlsl.u8 q4, d26, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ vqrshrun.s16 d23, q7, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ vst1.32 d23[0], [r1], r3 @//Store dest row0
+ vqrshrun.s16 d20, q4, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1)
+ vst1.32 d20[0], [r1], r3 @//Store dest row1
+ subs r5, r5, #2 @ 2 rows done, decrement by 2
+ beq end_func
+
+ b loop_4
+
+end_func:
+ vldmia sp!, {d8-d15} @ Restore neon registers that were saved
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
diff --git a/common/arm/ih264_inter_pred_filters_luma_vert_a9q.s b/common/arm/ih264_inter_pred_filters_luma_vert_a9q.s
new file mode 100755
index 0000000..5b29e02
--- /dev/null
+++ b/common/arm/ih264_inter_pred_filters_luma_vert_a9q.s
@@ -0,0 +1,301 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@******************************************************************************
+@* @file
+@* ih264_inter_pred_luma_vert_a9q.s
+@*
+@* @brief
+@* Contains function definitions for inter prediction interpolation.
+@*
+@* @author
+@* Ittiam
+@*
+@* @par List of Functions:
+@*
+@* - ih264_inter_pred_luma_vert_a9q()
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@
+
+@/**
+@/**
+@/**
+@ *******************************************************************************
+@ *
+@ * @brief
+@ * Interprediction luma filter for vertical input
+@ *
+@ * @par Description:
+@ * Applies a 6 tap vertcal filter.The output is clipped to 8 bits
+@ * sec 8.4.2.2.1 titled "Luma sample interpolation process"
+@ *
+@ * @param[in] pu1_src
+@ * UWORD8 pointer to the source
+@ *
+@ * @param[out] pu1_dst
+@ * UWORD8 pointer to the destination
+@ *
+@ * @param[in] src_strd
+@ * integer source stride
+@ *
+@ * @param[in] dst_strd
+@ * integer destination stride
+@ *
+@ * @param[in] ht
+@ * integer height of the array
+@ *
+@ * @param[in] wd
+@ * integer width of the array
+@ *
+@ * @returns
+@ *
+@ * @remarks
+@ * None
+@ *
+@ *******************************************************************************
+
+@void ih264_inter_pred_luma_vert (
+@ UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ht,
+@ WORD32 wd )
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r5 => ht
+@ r6 => wd
+
+.text
+.p2align 2
+
+
+ .global ih264_inter_pred_luma_vert_a9q
+
+ih264_inter_pred_luma_vert_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ vstmdb sp!, {d8-d15} @push neon registers to stack
+ ldr r5, [sp, #104] @Loads ht
+ sub r0, r0, r2, lsl #1 @pu1_src-2*src_strd
+ ldr r6, [sp, #108] @Loads wd
+ vmov.u16 q11, #20 @ Filter coeff 0x14 into Q11
+
+ subs r12, r6, #8 @if wd=8 branch to loop_8
+ vmov.u16 q12, #5 @ Filter coeff 0x5 into Q12
+ beq loop_8
+
+ subs r12, r6, #4 @if wd=4 branch to loop_4
+ beq loop_4
+
+loop_16: @when wd=16
+
+ vld1.u32 {q0}, [r0], r2 @ Vector load from src[0_0]
+ vld1.u32 {q1}, [r0], r2 @ Vector load from src[1_0]
+ vld1.u32 {q2}, [r0], r2 @ Vector load from src[2_0]
+ vld1.u32 {q3}, [r0], r2 @ Vector load from src[3_0]
+ vld1.u32 {q4}, [r0], r2 @ Vector load from src[4_0]
+ vaddl.u8 q6, d4, d6 @ temp1 = src[2_0] + src[3_0]
+ vld1.u32 {q5}, [r0], r2 @ Vector load from src[5_0]
+
+ vaddl.u8 q7, d0, d10 @ temp = src[0_0] + src[5_0]
+ vaddl.u8 q8, d2, d8 @ temp2 = src[1_0] + src[4_0]
+ vmla.u16 q7, q6, q11 @ temp += temp1 * 20
+ vaddl.u8 q10, d1, d11 @ temp4 = src[0_8] + src[5_8]
+ vaddl.u8 q9, d5, d7 @ temp3 = src[2_8] + src[3_8]
+ vmla.u16 q10, q9, q11 @ temp4 += temp3 * 20
+ vld1.u32 {q0}, [r0], r2
+ vaddl.u8 q13, d3, d9 @ temp5 = src[1_8] + src[4_8]
+ vaddl.u8 q6, d6, d8
+ vmls.u16 q7, q8, q12 @ temp -= temp2 * 5
+ vaddl.u8 q8, d2, d0
+ vaddl.u8 q9, d4, d10
+ vmla.u16 q8, q6, q11
+ vmls.u16 q10, q13, q12 @ temp4 -= temp5 * 5
+ vaddl.u8 q13, d5, d11
+ vaddl.u8 q6, d7, d9
+ vqrshrun.s16 d30, q7, #5 @ dst[0_0] = CLIP_U8((temp +16) >> 5)
+ vaddl.u8 q7, d3, d1
+ vld1.u32 {q1}, [r0], r2
+ vmla.u16 q7, q6, q11
+ vmls.u16 q8, q9, q12
+ vqrshrun.s16 d31, q10, #5 @ dst[0_8] = CLIP_U8((temp4 +16) >> 5)
+ vaddl.u8 q9, d4, d2
+ vaddl.u8 q6, d8, d10
+
+ vst1.u32 {q15}, [r1], r3 @ Vector store to dst[0_0]
+ vmla.u16 q9, q6, q11
+ vaddl.u8 q10, d6, d0
+ vmls.u16 q7, q13, q12
+ vqrshrun.s16 d30, q8, #5
+ vaddl.u8 q6, d9, d11
+ vaddl.u8 q8, d5, d3
+ vaddl.u8 q13, d7, d1
+ vmla.u16 q8, q6, q11
+ vmls.u16 q9, q10, q12
+ vld1.u32 {q2}, [r0], r2
+
+ vqrshrun.s16 d31, q7, #5
+ vaddl.u8 q6, d10, d0
+ vaddl.u8 q7, d6, d4
+ vaddl.u8 q10, d8, d2
+ vmla.u16 q7, q6, q11
+ vmls.u16 q8, q13, q12
+ vst1.u32 {q15}, [r1], r3 @store row 1
+ vqrshrun.s16 d30, q9, #5
+ vaddl.u8 q9, d7, d5
+ vaddl.u8 q6, d11, d1
+ vmla.u16 q9, q6, q11
+ vaddl.u8 q13, d9, d3
+ vmls.u16 q7, q10, q12
+
+ vqrshrun.s16 d31, q8, #5
+ vmls.u16 q9, q13, q12
+ vaddl.u8 q6, d0, d2 @ temp1 = src[2_0] + src[3_0]
+ vst1.u32 {q15}, [r1], r3 @store row 2
+ vaddl.u8 q8, d10, d4 @ temp2 = src[1_0] + src[4_0]
+ vaddl.u8 q10, d9, d7 @ temp4 = src[0_8] + src[5_8]
+ vqrshrun.s16 d30, q7, #5
+ vaddl.u8 q13, d5, d11 @ temp5 = src[1_8] + src[4_8]
+ vaddl.u8 q7, d8, d6 @ temp = src[0_0] + src[5_0]
+ vqrshrun.s16 d31, q9, #5
+ vmla.u16 q7, q6, q11 @ temp += temp1 * 20
+ vaddl.u8 q9, d1, d3 @ temp3 = src[2_8] + src[3_8]
+ vst1.u32 {q15}, [r1], r3 @store row 3
+ subs r5, r5, #4 @ 4 rows processed, decrement by 4
+ subne r0, r0 , r2, lsl #2
+ subne r0, r0, r2
+ beq end_func @ Branch if height==4
+
+ b loop_16 @ looping if height = 8 or 16
+
+loop_8:
+@// Processing row0 and row1
+
+ vld1.u32 d0, [r0], r2 @ Vector load from src[0_0]
+ vld1.u32 d1, [r0], r2 @ Vector load from src[1_0]
+ vld1.u32 d2, [r0], r2 @ Vector load from src[2_0]
+ vld1.u32 d3, [r0], r2 @ Vector load from src[3_0]
+ vld1.u32 d4, [r0], r2 @ Vector load from src[4_0]
+ vld1.u32 d5, [r0], r2 @ Vector load from src[5_0]
+
+ vaddl.u8 q3, d2, d3 @ temp1 = src[2_0] + src[3_0]
+ vaddl.u8 q4, d0, d5 @ temp = src[0_0] + src[5_0]
+ vaddl.u8 q5, d1, d4 @ temp2 = src[1_0] + src[4_0]
+ vmla.u16 q4, q3, q11 @ temp += temp1 * 20
+ vld1.u32 d6, [r0], r2
+ vaddl.u8 q7, d3, d4
+ vaddl.u8 q8, d1, d6
+ vaddl.u8 q9, d2, d5
+ vmls.u16 q4, q5, q12 @ temp -= temp2 * 5
+ vmla.u16 q8, q7, q11
+ vld1.u32 d7, [r0], r2
+ vaddl.u8 q10, d4, d5
+ vaddl.u8 q6, d2, d7
+ vaddl.u8 q5, d3, d6
+ vmls.u16 q8, q9, q12
+ vqrshrun.s16 d26, q4, #5 @ dst[0_0] = CLIP_U8( (temp + 16) >> 5)
+ vmla.u16 q6, q10, q11
+ vld1.u32 d0, [r0], r2
+ vaddl.u8 q7, d5, d6
+ vqrshrun.s16 d27, q8, #5
+ vaddl.u8 q10, d3, d0
+ vmls.u16 q6, q5, q12
+ vst1.u32 d26, [r1], r3 @ Vector store to dst[0_0]
+ vaddl.u8 q9, d4, d7
+ vmla.u16 q10, q7, q11
+ vst1.u32 d27, [r1], r3
+ vqrshrun.s16 d28, q6, #5
+ vst1.u32 d28, [r1], r3
+ vmls.u16 q10, q9, q12
+ vqrshrun.s16 d29, q10, #5
+ vst1.u32 d29, [r1], r3 @store row 3
+
+ subs r5, r5, #4 @ 4 rows processed, decrement by 4
+ subne r0, r0 , r2, lsl #2
+ subne r0, r0, r2
+ beq end_func @ Branch if height==4
+
+ b loop_8 @looping if height == 8 or 16
+
+
+loop_4:
+@// Processing row0 and row1
+
+ vld1.u32 d0[0], [r0], r2 @ Vector load from src[0_0]
+ vld1.u32 d1[0], [r0], r2 @ Vector load from src[1_0]
+ vld1.u32 d2[0], [r0], r2 @ Vector load from src[2_0]
+ vld1.u32 d3[0], [r0], r2 @ Vector load from src[3_0]
+ vld1.u32 d4[0], [r0], r2 @ Vector load from src[4_0]
+ vld1.u32 d5[0], [r0], r2 @ Vector load from src[5_0]
+
+ vaddl.u8 q3, d2, d3 @ temp1 = src[2_0] + src[3_0]
+ vaddl.u8 q4, d0, d5 @ temp = src[0_0] + src[5_0]
+ vaddl.u8 q5, d1, d4 @ temp2 = src[1_0] + src[4_0]
+ vmla.u16 q4, q3, q11 @ temp += temp1 * 20
+ vld1.u32 d6[0], [r0], r2
+ vaddl.u8 q7, d3, d4
+ vaddl.u8 q8, d1, d6
+ vaddl.u8 q9, d2, d5
+ vmls.u16 q4, q5, q12 @ temp -= temp2 * 5
+ vld1.u32 d7[0], [r0], r2
+ vmla.u16 q8, q7, q11
+ vaddl.u8 q10, d4, d5
+ vaddl.u8 q6, d2, d7
+ vaddl.u8 q5, d3, d6
+ vmls.u16 q8, q9, q12
+ vqrshrun.s16 d26, q4, #5 @ dst[0_0] = CLIP_U8( (temp + 16) >> 5)
+ vmla.u16 q6, q10, q11
+ vld1.u32 d0[0], [r0], r2
+ vaddl.u8 q7, d5, d6
+ vqrshrun.s16 d27, q8, #5
+ vaddl.u8 q10, d3, d0
+ vmls.u16 q6, q5, q12
+ vst1.u32 d26[0], [r1], r3 @ Vector store to dst[0_0]
+ vaddl.u8 q9, d4, d7
+ vmla.u16 q10, q7, q11
+ vst1.u32 d27[0], [r1], r3
+ vqrshrun.s16 d28, q6, #5
+ vst1.u32 d28[0], [r1], r3
+ vmls.u16 q10, q9, q12
+ vqrshrun.s16 d29, q10, #5
+ vst1.u32 d29[0], [r1], r3 @store row 3
+
+ subs r5, r5, #8
+ subeq r0, r0, r2, lsl #2
+ subeq r0, r0, r2
+ beq loop_4 @ Loop if height==8
+
+end_func:
+ vldmia sp!, {d8-d15} @ Restore neon registers that were saved
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
diff --git a/common/arm/ih264_inter_pred_luma_bilinear_a9q.s b/common/arm/ih264_inter_pred_luma_bilinear_a9q.s
new file mode 100755
index 0000000..6a3c83d
--- /dev/null
+++ b/common/arm/ih264_inter_pred_luma_bilinear_a9q.s
@@ -0,0 +1,398 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@******************************************************************************
+@* @file
+@* ih264_inter_pred_luma_bilinear_a9q.s
+@*
+@* @brief
+@* Contains function definitions for inter prediction interpolation.
+@*
+@* @author
+@* Ittiam
+@*
+@* @par List of Functions:
+@*
+@* - ih264_inter_pred_luma_bilinear_a9q()
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@
+
+@/**
+@/**
+@/**
+@ *******************************************************************************
+@ * function:ih264_inter_pred_luma_bilinear
+@ *
+@* @brief
+@* This routine applies the bilinear filter to the predictors .
+@* The filtering operation is described in
+@* sec 8.4.2.2.1 titled "Luma sample interpolation process"
+@*
+@* @par Description:
+@\note
+@* This function is called to obtain pixels lying at the following
+@* locations (1/4,1), (3/4,1),(1,1/4), (1,3/4) ,(1/4,1/2), (3/4,1/2),(1/2,1/4), (1/2,3/4),(3/4,1/4),(1/4,3/4),(3/4,3/4)&& (1/4,1/4) .
+@* The function averages the two adjacent values from the two input arrays in horizontal direction.
+@*
+@*
+@* @param[in] pu1_src1:
+@* UWORD8 Pointer to the buffer containing the first input array.
+@*
+@* @param[in] pu1_src2:
+@* UWORD8 Pointer to the buffer containing the second input array.
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination where the output of bilinear filter is stored.
+@*
+@* @param[in] src_strd1
+@* Stride of the first input buffer
+@*
+@* @param[in] src_strd2
+@* Stride of the second input buffer
+@*
+@* @param[in] dst_strd
+@* integer destination stride of pu1_dst
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@void ih264_inter_pred_luma_bilinear(UWORD8 *pu1_src1,
+@ UWORD8 *pu1_src2,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd1,
+@ WORD32 src_strd2,
+@ WORD32 dst_strd,
+@ WORD32 height,
+@ WORD32 width)
+@
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src1
+@ r1 => *pu1_src2
+@ r2 => *pu1_dst
+@ r3 => src_strd1
+@ r4 => src_strd2
+@ r5 => dst_strd
+@ r6 => height
+@ r7 => width
+@
+.text
+.p2align 2
+
+ .global ih264_inter_pred_luma_bilinear_a9q
+
+ih264_inter_pred_luma_bilinear_a9q:
+
+
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ vstmdb sp!, {d8-d15} @push neon registers to stack
+ ldr r4, [sp, #104]
+ ldr r5, [sp, #108] @
+ ldr r6, [sp, #112]
+ ldr r7, [sp, #116]
+
+ subs r12, r7, #4 @if wd=4 branch to loop_4
+ beq loop_4
+ subs r12, r7, #8 @if wd=8 branch to loop_8
+ beq loop_8
+
+loop_16: @when wd=16
+
+ vld1.8 {q0}, [r0], r3 @// Load row0 ;src1
+ vld1.8 {q2}, [r1], r4 @// Load row0 ;src2
+ vld1.8 {q1}, [r0], r3 @// Load row1 ;src1
+ vaddl.u8 q10, d0, d4
+ vld1.8 {q3}, [r1], r4 @// Load row1 ;src2
+ vaddl.u8 q11, d1, d5
+ vld1.8 {q4}, [r0], r3 @// Load row2 ;src1
+ vaddl.u8 q12, d2, d6
+ vld1.8 {q5}, [r0], r3 @// Load row3 ;src1
+ vaddl.u8 q13, d3, d7
+ vld1.8 {q6}, [r1], r4 @// Load row2 ;src2
+ vaddl.u8 q8, d8, d12
+ vld1.8 {q7}, [r1], r4 @// Load row3 ;src2
+ vaddl.u8 q9, d9, d13
+ vqrshrun.s16 d28, q10, #1
+ vqrshrun.s16 d29, q11, #1
+ vaddl.u8 q10, d10, d14
+ vqrshrun.s16 d30, q12, #1
+ vqrshrun.s16 d31, q13, #1
+ vst1.8 {q14}, [r2], r5 @//Store dest row0
+ vaddl.u8 q11, d11, d15
+ vst1.8 {q15}, [r2], r5 @//Store dest row1
+ vqrshrun.s16 d28, q8, #1
+ vld1.8 {q0}, [r0], r3 @// Load row4 ;src1
+ vqrshrun.s16 d29, q9, #1
+ vld1.8 {q1}, [r0], r3 @// Load row5 ;src1
+ vqrshrun.s16 d30, q10, #1
+ vld1.8 {q2}, [r1], r4 @// Load row4 ;src2
+ vqrshrun.s16 d31, q11, #1
+ vld1.8 {q3}, [r1], r4 @// Load row5 ;src2
+ vaddl.u8 q10, d0, d4
+ vst1.8 {q14}, [r2], r5 @//Store dest row2
+ vaddl.u8 q13, d3, d7
+ vst1.8 {q15}, [r2], r5 @//Store dest row3
+ vaddl.u8 q11, d1, d5
+ vld1.8 {q4}, [r0], r3 @// Load row6 ;src1
+ vaddl.u8 q12, d2, d6
+ vld1.8 {q5}, [r0], r3 @// Load row7 ;src1
+ vqrshrun.s16 d28, q10, #1
+ vld1.8 {q6}, [r1], r4 @// Load row6 ;src2
+ vqrshrun.s16 d29, q11, #1
+ vld1.8 {q7}, [r1], r4 @// Load row7 ;src2
+ vaddl.u8 q8, d8, d12
+ vaddl.u8 q9, d9, d13
+ vaddl.u8 q10, d10, d14
+ vqrshrun.s16 d30, q12, #1
+ vqrshrun.s16 d31, q13, #1
+ vst1.8 {q14}, [r2], r5 @//Store dest row4
+ vaddl.u8 q11, d11, d15
+ vst1.8 {q15}, [r2], r5 @//Store dest row5
+ vqrshrun.s16 d28, q8, #1
+ vqrshrun.s16 d30, q10, #1
+ vqrshrun.s16 d29, q9, #1
+ vld1.8 {q2}, [r1], r4 @// Load row8 ;src2
+ vqrshrun.s16 d31, q11, #1
+ vst1.8 {q14}, [r2], r5 @//Store dest row6
+ subs r12, r6, #8
+ vst1.8 {q15}, [r2], r5 @//Store dest row7
+
+ beq end_func @ end function if ht=8
+
+ vld1.8 {q0}, [r0], r3 @// Load row8 ;src1
+ vaddl.u8 q10, d0, d4
+ vld1.8 {q1}, [r0], r3 @// Load row9 ;src1
+ vaddl.u8 q11, d1, d5
+ vld1.8 {q3}, [r1], r4 @// Load row9 ;src2
+ vqrshrun.s16 d28, q10, #1
+ vld1.8 {q4}, [r0], r3 @// Load row10 ;src1
+ vqrshrun.s16 d29, q11, #1
+ vld1.8 {q5}, [r0], r3 @// Load row11 ;src1
+ vaddl.u8 q12, d2, d6
+ vld1.8 {q6}, [r1], r4 @// Load row10 ;src2
+ vaddl.u8 q13, d3, d7
+ vld1.8 {q7}, [r1], r4 @// Load row11 ;src2
+ vaddl.u8 q8, d8, d12
+ vaddl.u8 q9, d9, d13
+ vaddl.u8 q10, d10, d14
+ vqrshrun.s16 d30, q12, #1
+ vst1.8 {q14}, [r2], r5 @//Store dest row8
+ vqrshrun.s16 d31, q13, #1
+ vst1.8 {q15}, [r2], r5 @//Store dest row9
+ vqrshrun.s16 d28, q8, #1
+ vld1.8 {q0}, [r0], r3 @// Load row12 ;src1
+ vaddl.u8 q11, d11, d15
+ vld1.8 {q1}, [r0], r3 @// Load row13 ;src1
+ vqrshrun.s16 d29, q9, #1
+ vld1.8 {q2}, [r1], r4 @// Load row12 ;src2
+ vqrshrun.s16 d30, q10, #1
+ vld1.8 {q3}, [r1], r4 @// Load row13 ;src2
+ vqrshrun.s16 d31, q11, #1
+ vst1.8 {q14}, [r2], r5 @//Store dest row10
+ vaddl.u8 q10, d0, d4
+ vst1.8 {q15}, [r2], r5 @//Store dest row11
+ vaddl.u8 q11, d1, d5
+ vld1.8 {q4}, [r0], r3 @// Load row14 ;src1
+ vaddl.u8 q13, d3, d7
+ vld1.8 {q5}, [r0], r3 @// Load row15 ;src1
+ vaddl.u8 q12, d2, d6
+ vld1.8 {q6}, [r1], r4 @// Load row14 ;src2
+ vaddl.u8 q8, d8, d12
+ vld1.8 {q7}, [r1], r4 @// Load row15 ;src2
+ vaddl.u8 q9, d9, d13
+ vqrshrun.s16 d28, q10, #1
+ vqrshrun.s16 d29, q11, #1
+ vaddl.u8 q10, d10, d14
+ vst1.8 {q14}, [r2], r5 @//Store dest row12
+ vqrshrun.s16 d30, q12, #1
+ vqrshrun.s16 d31, q13, #1
+ vaddl.u8 q11, d11, d15
+ vst1.8 {q15}, [r2], r5 @//Store dest row13
+ vqrshrun.s16 d28, q8, #1
+ vqrshrun.s16 d29, q9, #1
+ vqrshrun.s16 d30, q10, #1
+ vst1.8 {q14}, [r2], r5 @//Store dest row14
+ vqrshrun.s16 d31, q11, #1
+ vst1.8 {q15}, [r2], r5 @//Store dest row15
+ b end_func
+
+
+
+loop_8: @wd=8;
+ vld1.8 {d0}, [r0], r3 @// Load row0 ;src1
+ vld1.8 {d4}, [r1], r4 @// Load row0 ;src2
+ vld1.8 {d1}, [r0], r3 @// Load row1 ;src1
+ vaddl.u8 q10, d0, d4
+ vld1.8 {d5}, [r1], r4 @// Load row1 ;src2
+ vld1.8 {d2}, [r0], r3 @// Load row2 ;src1
+ vqrshrun.s16 d28, q10, #1
+ vld1.8 {d6}, [r1], r4 @// Load row2 ;src2
+ vaddl.u8 q11, d1, d5
+ vld1.8 {d3}, [r0], r3 @// Load row3 ;src1
+ vaddl.u8 q12, d2, d6
+ vst1.8 {d28}, [r2], r5 @//Store dest row0
+ vqrshrun.s16 d29, q11, #1
+ vld1.8 {d7}, [r1], r4 @// Load row3 ;src2
+ vqrshrun.s16 d30, q12, #1
+ vst1.8 {d29}, [r2], r5 @//Store dest row1
+ vaddl.u8 q13, d3, d7
+ vst1.8 {d30}, [r2], r5 @//Store dest row2
+ vqrshrun.s16 d31, q13, #1
+ subs r12, r6, #4
+ vst1.8 {d31}, [r2], r5 @//Store dest row3
+ beq end_func @ end function if ht=4
+
+ vld1.8 {d12}, [r1], r4 @// Load row4 ;src2
+ vld1.8 {d8}, [r0], r3 @// Load row4 ;src1
+ vld1.8 {d9}, [r0], r3 @// Load row5 ;src1
+ vaddl.u8 q8, d8, d12
+ vld1.8 {d13}, [r1], r4 @// Load row5 ;src2
+ vld1.8 {d10}, [r0], r3 @// Load row6;src1
+ vaddl.u8 q9, d9, d13
+ vld1.8 {d14}, [r1], r4 @// Load row6 ;src2
+ vqrshrun.s16 d28, q8, #1
+ vld1.8 {d11}, [r0], r3 @// Load row7 ;src1
+ vqrshrun.s16 d29, q9, #1
+ vst1.8 {d28}, [r2], r5 @//Store dest row4
+ vaddl.u8 q10, d10, d14
+ vst1.8 {d29}, [r2], r5 @//Store dest row5
+ vqrshrun.s16 d30, q10, #1
+ vld1.8 {d15}, [r1], r4 @// Load row7 ;src2
+ vaddl.u8 q11, d11, d15
+ vst1.8 {d30}, [r2], r5 @//Store dest row6
+ vqrshrun.s16 d31, q11, #1
+ subs r12, r6, #8
+ vst1.8 {d31}, [r2], r5 @//Store dest row7
+ beq end_func @ end function if ht=8
+
+ vld1.8 {d0}, [r0], r3 @// Load row8 ;src1
+ vld1.8 {d4}, [r1], r4 @// Load row8 ;src2
+ vld1.8 {d1}, [r0], r3 @// Load row9 ;src1
+ vaddl.u8 q10, d0, d4
+ vld1.8 {d5}, [r1], r4 @// Load row9 ;src2
+ vld1.8 {d2}, [r0], r3 @// Load row10 ;src1
+ vaddl.u8 q11, d1, d5
+ vld1.8 {d6}, [r1], r4 @// Load row10 ;src2
+ vqrshrun.s16 d28, q10, #1
+ vld1.8 {d3}, [r0], r3 @// Load row11 ;src1
+ vaddl.u8 q12, d2, d6
+ vld1.8 {d7}, [r1], r4 @// Load row11 ;src2
+ vqrshrun.s16 d29, q11, #1
+ vld1.8 {d8}, [r0], r3 @// Load row12 ;src1
+ vaddl.u8 q13, d3, d7
+ vst1.8 {d28}, [r2], r5 @//Store dest row8
+ vqrshrun.s16 d30, q12, #1
+ vld1.8 {d12}, [r1], r4 @// Load row12 ;src2
+ vqrshrun.s16 d31, q13, #1
+ vst1.8 {d29}, [r2], r5 @//Store dest row9
+ vaddl.u8 q8, d8, d12
+ vld1.8 {d9}, [r0], r3 @// Load row13 ;src1
+ vqrshrun.s16 d28, q8, #1
+ vld1.8 {d13}, [r1], r4 @// Load row13 ;src2
+ vld1.8 {d10}, [r0], r3 @// Load row14;src1
+ vaddl.u8 q9, d9, d13
+ vld1.8 {d11}, [r0], r3 @// Load row15 ;src1
+ vld1.8 {d14}, [r1], r4 @// Load row14 ;src2
+ vqrshrun.s16 d29, q9, #1
+ vld1.8 {d15}, [r1], r4 @// Load roW15 ;src2
+ vaddl.u8 q10, d10, d14
+ vst1.8 {d30}, [r2], r5 @//Store dest row10
+ vaddl.u8 q11, d11, d15
+ vst1.8 {d31}, [r2], r5 @//Store dest row11
+ vqrshrun.s16 d30, q10, #1
+ vst1.8 {d28}, [r2], r5 @//Store dest row12
+ vqrshrun.s16 d31, q11, #1
+ vst1.8 {d29}, [r2], r5 @//Store dest row13
+ vst1.8 {d30}, [r2], r5 @//Store dest row14
+ vst1.8 {d31}, [r2], r5 @//Store dest row15
+
+ b end_func
+
+
+
+loop_4:
+ vld1.32 d0[0], [r0], r3 @// Load row0 ;src1
+ vld1.32 d4[0], [r1], r4 @// Load row0 ;src2
+ vld1.32 d1[0], [r0], r3 @// Load row1 ;src1
+ vaddl.u8 q10, d0, d4
+ vld1.32 d5[0], [r1], r4 @// Load row1 ;src2
+ vld1.32 d2[0], [r0], r3 @// Load row2 ;src1
+ vqrshrun.s16 d28, q10, #1
+ vld1.32 d6[0], [r1], r4 @// Load row2 ;src2
+ vaddl.u8 q11, d1, d5
+ vld1.32 d3[0], [r0], r3 @// Load row3 ;src1
+ vaddl.u8 q12, d2, d6
+ vst1.32 d28[0], [r2], r5 @//Store dest row0
+ vqrshrun.s16 d29, q11, #1
+ vld1.32 d7[0], [r1], r4 @// Load row3 ;src2
+ vqrshrun.s16 d30, q12, #1
+ vst1.32 d29[0], [r2], r5 @//Store dest row1
+ vaddl.u8 q13, d3, d7
+ vst1.32 d30[0], [r2], r5 @//Store dest row2
+ vqrshrun.s16 d31, q13, #1
+ subs r12, r6, #4
+ vst1.32 d31[0], [r2], r5 @//Store dest row3
+ beq end_func @ end function if ht=4
+
+ vld1.32 d12[0], [r1], r4 @// Load row4 ;src2
+ vld1.32 d8[0], [r0], r3 @// Load row4 ;src1
+ vld1.32 d9[0], [r0], r3 @// Load row5 ;src1
+ vaddl.u8 q8, d8, d12
+ vld1.32 d13[0], [r1], r4 @// Load row5 ;src2
+ vld1.32 d10[0], [r0], r3 @// Load row6;src1
+ vaddl.u8 q9, d9, d13
+ vld1.32 d14[0], [r1], r4 @// Load row6 ;src2
+ vqrshrun.s16 d28, q8, #1
+ vld1.32 d11[0], [r0], r3 @// Load row7 ;src1
+ vqrshrun.s16 d29, q9, #1
+ vst1.32 d28[0], [r2], r5 @//Store dest row4
+ vaddl.u8 q10, d10, d14
+ vst1.32 d29[0], [r2], r5 @//Store dest row5
+ vqrshrun.s16 d30, q10, #1
+ vld1.32 d15[0], [r1], r4 @// Load row7 ;src2
+ vaddl.u8 q11, d11, d15
+ vst1.32 d30[0], [r2], r5 @//Store dest row6
+ vqrshrun.s16 d31, q11, #1
+ vst1.32 d31[0], [r2], r5 @//Store dest row7
+
+end_func:
+
+ vldmia sp!, {d8-d15} @ Restore neon registers that were saved
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
diff --git a/common/arm/ih264_inter_pred_luma_copy_a9q.s b/common/arm/ih264_inter_pred_luma_copy_a9q.s
new file mode 100755
index 0000000..8ba2fbf
--- /dev/null
+++ b/common/arm/ih264_inter_pred_luma_copy_a9q.s
@@ -0,0 +1,253 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Interprediction luma function for copy
+@*
+@* @par Description:
+@* Copies the array of width 'wd' and height 'ht' from the location pointed
+@* by 'src' to the location pointed by 'dst'
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@void ih264_inter_pred_luma_copy (
+@ UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ht,
+@ WORD32 wd )
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r7 => ht
+@ r12 => wd
+
+.text
+.p2align 2
+
+ .global ih264_inter_pred_luma_copy_a9q
+
+ih264_inter_pred_luma_copy_a9q:
+ stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments
+ vstmdb sp!, {d8-d15} @push neon registers to stack
+ ldr r12, [sp, #108] @Loads wd
+ ldr r7, [sp, #104] @Loads ht
+ cmp r7, #0 @checks ht == 0
+ ble end_loops
+ tst r12, #15 @checks wd for multiples for 4 & 8
+ beq core_loop_wd_16
+ tst r12, #7 @checks wd for multiples for 4 & 8
+ beq core_loop_wd_8
+ sub r11, r12, #4
+
+outer_loop_wd_4:
+ subs r4, r12, #0 @checks wd == 0
+ ble end_inner_loop_wd_4
+
+inner_loop_wd_4:
+ vld1.32 {d0[0]}, [r0] @vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0)
+ add r5, r0, r2 @pu1_src_tmp += src_strd
+ add r6, r1, r3 @pu1_dst_tmp += dst_strd
+ vst1.32 {d0[0]}, [r1] @vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0)
+ vld1.32 {d0[0]}, [r5], r2 @vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0)
+ add r0, r0, #4 @pu1_src += 4
+ vst1.32 {d0[0]}, [r6], r3 @vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0)
+ vld1.32 {d0[0]}, [r5], r2 @vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0)
+ subs r4, r4, #4 @(wd -4)
+ vst1.32 {d0[0]}, [r6], r3 @vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0)
+ vld1.32 {d0[0]}, [r5], r2 @vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0)
+ add r1, r1, #4 @pu1_dst += 4
+ vst1.32 {d0[0]}, [r6], r3 @vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0)
+
+ bgt inner_loop_wd_4
+
+end_inner_loop_wd_4:
+ subs r7, r7, #4 @ht - 4
+ sub r0, r5, r11 @pu1_src = pu1_src_tmp
+ sub r1, r6, r11 @pu1_dst = pu1_dst_tmp
+ bgt outer_loop_wd_4
+
+end_loops:
+ vldmia sp!, {d8-d15} @ Restore neon registers that were saved
+ ldmfd sp!, {r4-r12, r15} @Reload the registers from SP
+
+
+
+core_loop_wd_8:
+ sub r11, r12, #8
+
+outer_loop_wd_8:
+ subs r4, r12, #0 @checks wd
+ ble end_inner_loop_wd_8
+
+inner_loop_wd_8:
+ add r5, r0, r2 @pu1_src_tmp += src_strd
+ vld1.8 {d0}, [r0]! @vld1_u8(pu1_src_tmp)
+ add r6, r1, r3 @pu1_dst_tmp += dst_strd
+ vst1.8 {d0}, [r1]! @vst1_u8(pu1_dst_tmp, tmp_src)
+ vld1.8 {d1}, [r5], r2 @vld1_u8(pu1_src_tmp)
+ vst1.8 {d1}, [r6], r3 @vst1_u8(pu1_dst_tmp, tmp_src)
+ subs r4, r4, #8 @wd - 8(Loop condition)
+ vld1.8 {d2}, [r5], r2 @vld1_u8(pu1_src_tmp)
+ vst1.8 {d2}, [r6], r3 @vst1_u8(pu1_dst_tmp, tmp_src)
+ vld1.8 {d3}, [r5], r2 @vld1_u8(pu1_src_tmp)
+ vst1.8 {d3}, [r6], r3 @vst1_u8(pu1_dst_tmp, tmp_src)
+ bgt inner_loop_wd_8
+
+end_inner_loop_wd_8:
+ subs r7, r7, #4 @ht -= 4
+ sub r0, r5, r11 @pu1_src = pu1_src_tmp
+ sub r1, r6, r11 @pu1_dst = pu1_dst_tmp
+ bgt outer_loop_wd_8
+
+ vldmia sp!, {d8-d15} @ Restore neon registers that were saved
+ ldmfd sp!, {r4-r12, r15} @Reload the registers from SP
+
+core_loop_wd_16:
+ sub r11, r12, #16
+
+outer_loop_wd_16:
+ subs r4, r12, #0 @checks wd
+ ble end_inner_loop_wd_16
+
+inner_loop_wd_16:
+ add r5, r0, r2 @pu1_src_tmp += src_strd
+ vld1.8 {q0}, [r0]! @vld1_u8(pu1_src_tmp)
+ add r6, r1, r3 @pu1_dst_tmp += dst_strd
+ vst1.8 {q0}, [r1]! @vst1_u8(pu1_dst_tmp, tmp_src)
+ vld1.8 {q1}, [r5], r2 @vld1_u8(pu1_src_tmp)
+ vst1.8 {q1}, [r6], r3 @vst1_u8(pu1_dst_tmp, tmp_src)
+ subs r4, r4, #16 @wd - 8(Loop condition)
+ vld1.8 {q2}, [r5], r2 @vld1_u8(pu1_src_tmp)
+ vst1.8 {q2}, [r6], r3 @vst1_u8(pu1_dst_tmp, tmp_src)
+ vld1.8 {q3}, [r5], r2 @vld1_u8(pu1_src_tmp)
+ vst1.8 {q3}, [r6], r3 @vst1_u8(pu1_dst_tmp, tmp_src)
+ bgt inner_loop_wd_16
+
+end_inner_loop_wd_16:
+ subs r7, r7, #4 @ht -= 4
+ sub r0, r5, r11 @pu1_src = pu1_src_tmp
+ sub r1, r6, r11 @pu1_dst = pu1_dst_tmp
+ bgt outer_loop_wd_16
+
+ vldmia sp!, {d8-d15} @ Restore neon registers that were saved
+ ldmfd sp!, {r4-r12, r15} @Reload the registers from SP
+
+
+@ /*
+@ ********************************************************************************
+@ *
+@ * @brief This function copies a 4x4 block to destination
+@ *
+@ * @par Description:
+@ * Copies a 4x4 block to destination, where both src and dst are interleaved
+@ *
+@ * @param[in] pi2_src
+@ * Source
+@ *
+@ * @param[in] pu1_out
+@ * Output pointer
+@ *
+@ * @param[in] pred_strd,
+@ * Prediction buffer stride
+@ *
+@ * @param[in] out_strd
+@ * output buffer buffer Stride
+@ *
+@ * @returns none
+@ *
+@ * @remarks none
+@ * Currently wd and height is not used, ie a 4x4 block is always copied
+@ *
+@ *******************************************************************************
+@ */
+@ void ih264_interleave_copy(WORD16 *pi2_src,
+@ UWORD8 *pu1_out,
+@ WORD32 pred_strd,
+@ WORD32 out_strd
+@ WORD32 wd
+@ WORD32 ht)
+@ Register Usage
+@ r0 : pi2_src
+@ r1 : pu1_out
+@ r2 : src_strd
+@ r3 : out_strd
+@ Neon registers d0-d7, d16-d30 are used
+@ No need for pushing arm and neon registers
+
+ .global ih264_interleave_copy_a9
+ih264_interleave_copy_a9:
+
+ vld1.u8 d2, [r0], r2 @load src plane 1 => d2 &pred palne 2 => d3
+ vld1.u8 d3, [r0], r2
+ vld1.u8 d4, [r0], r2
+ vld1.u8 d5, [r0], r2
+
+ mov r0, r1
+
+ vld1.u8 d18, [r1], r3 @load out [8 bit size) -8 coeffs
+ vld1.u8 d19, [r1], r3
+ vmov.u16 q15, #0x00ff
+ vld1.u8 d20, [r1], r3
+ vld1.u8 d21, [r1], r3
+
+ vbit.u8 q9, q1, q15
+ vbit.u8 q10, q2, q15
+
+ vst1.u8 d18, [r0], r3 @store out
+ vst1.u8 d19, [r0], r3
+ vst1.u8 d20, [r0], r3
+ vst1.u8 d21, [r0], r3
+
+ bx lr
+
+
+
diff --git a/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s
new file mode 100755
index 0000000..43321a8
--- /dev/null
+++ b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s
@@ -0,0 +1,441 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@******************************************************************************
+@* @file
+@* ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s
+@*
+@* @brief
+@* Contains function definitions for inter prediction interpolation.
+@*
+@* @author
+@* Mohit
+@*
+@* @par List of Functions:
+@*
+@* - ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q()
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@
+
+@/**
+@/**
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* This function implements a two stage cascaded six tap filter. It
+@* applies the six tap filter in the vertical direction on the
+@* predictor values, followed by applying the same filter in the
+@* horizontal direction on the output of the first stage. The six tap
+@* filtering operation is described in sec 8.4.2.2.1 titled "Luma sample
+@* interpolation process"
+@*
+@* @par Description:
+@* This function is called to obtain pixels lying at the following
+@* location (1/2,1/2). The function interpolates
+@* the predictors first in the horizontal direction and then in the
+@* vertical direction to output the (1/2,1/2).
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @param[in] pu1_tmp: temporary buffer
+@*
+@* @param[in] dydx: x and y reference offset for qpel calculations: UNUSED in this function.
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/;
+
+@void ih264_inter_pred_luma_horz_hpel_vert_hpel(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,,
+@ WORD32 dst_strd,
+@ WORD32 ht,
+@ WORD32 wd,
+@ UWORD8* pu1_tmp,
+@ UWORD32 dydx)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r8 => ht
+@ r9 => wd
+
+.text
+.p2align 2
+
+ .global ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q
+
+ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ vstmdb sp!, {d8-d15} @push neon registers to stack
+ ldr r8, [sp, #104] @ loads ht
+ sub r0, r0, r2, lsl #1 @pu1_src-2*src_strd
+ sub r0, r0, #2 @pu1_src-2
+ ldr r9, [sp, #108] @ loads wd
+
+ vmov.s16 d0, #20 @ Filter coeff 20
+ vmov.s16 d1, #5 @ Filter coeff 5
+ subs r12, r9, #4 @if wd=4 branch to loop_4
+ beq loop_4
+ subs r12, r9, #8 @if wd=8 branch to loop_8
+ beq loop_8
+
+ mov r10, #8
+ sub r7, r3, r10
+ @when wd=16
+
+loop_16:
+ vld1.u32 {d2, d3, d4}, [r0], r2 @ Vector load from src[0_0]
+ vld1.u32 {d5, d6, d7}, [r0], r2 @ Vector load from src[1_0]
+ vld1.u32 {d8, d9, d10}, [r0], r2 @ Vector load from src[2_0]
+ vld1.u32 {d11, d12, d13}, [r0], r2 @ Vector load from src[3_0]
+ vld1.u32 {d14, d15, d16}, [r0], r2 @ Vector load from src[4_0]
+ vld1.u32 {d17, d18, d19}, [r0], r2 @ Vector load from src[5_0]
+
+ @ vERTICAL FILTERING FOR ROW 0
+ vaddl.u8 q10, d8, d11 @ temp1 = src[2_0] + src[3_0]
+ vaddl.u8 q12, d2, d17 @ temp2 = src[0_0] + src[5_0]
+ vaddl.u8 q11, d5, d14 @ temp = src[1_0] + src[4_0]
+ vaddl.u8 q13, d3, d18 @ temp2 = src[0_0] + src[5_0]
+ vmla.u16 q12, q10, d0[0] @ temp += temp1 * 20
+ vmls.s16 q12, q11, d1[0] @ temp -= temp2 * 5
+ vaddl.u8 q10, d6, d15 @ temp = src[1_0] + src[4_0]
+ vaddl.u8 q11, d9, d12 @ temp3 = src[2_0] + src[3_0]
+ vaddl.u8 q14, d4, d19 @ temp2 = src[0_0] + src[5_0]
+ vmla.u16 q13, q11, d0[0] @ temp4 += temp3 * 20
+ vmls.s16 q13, q10, d1[0] @ temp -= temp2 * 5
+ vaddl.u8 q11, d10, d13 @ temp3 = src[2_0] + src[3_0]
+ vaddl.u8 q10, d7, d16 @ temp = src[1_0] + src[4_0]
+ vmla.u16 q14, q11, d0[0] @ temp4 += temp3 * 20
+ vmls.s16 q14, q10, d1[0] @ temp -= temp2 * 5
+ vext.16 q10, q12, q13, #5 @//extract a[5] (column1)
+
+ @Q12,Q13,Q14 HAVE VERTICAL FILTERED VALUES
+ @CASCADED FILTERING FOR ROW 0
+ vext.16 q11, q12, q13, #2 @//extract a[2] (column1)
+ vaddl.s16 q1, d20, d24 @// a0 + a5 (column1)
+ vaddl.s16 q15, d21, d25 @// a0 + a5 (column1)
+ vmlal.s16 q1, d22, d0[0] @// a0 + a5 + 20a2 (column1)
+ vmlal.s16 q15, d23, d0[0] @// a0 + a5 + 20a2 (column1)
+ vext.16 q11, q12, q13, #1 @//extract a[1] (column1)
+ vext.16 q10, q12, q13, #3 @//extract a[3] (column1)
+ vmlsl.s16 q1, d22, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1)
+ vmlsl.s16 q15, d23, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1)
+ vmlal.s16 q1, d20, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1)
+ vmlal.s16 q15, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1)
+ vext.16 q11, q12, q13, #4 @//extract a[4] (column1)
+ vext.16 q10, q13, q14, #5 @//extract a[5] (column2)
+ vmlsl.s16 q1, d22, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1)
+ vmlsl.s16 q15, d23, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1)
+ vqrshrun.s32 d22, q1, #10
+ vqrshrun.s32 d23, q15, #10
+ vqshrun.s16 d22, q11, #0
+ vst1.u8 {d22}, [r1], r10 @//Store dest row0, column 1; (1/2,1/2)
+ vext.16 q11, q13, q14, #2 @//extract a[2] (column2)
+ vaddl.s16 q1, d20, d26 @// a0 + a5 (column2)
+ vaddl.s16 q15, d21, d27 @// a0 + a5 (column2)
+ vmlal.s16 q1, d22, d0[0] @// a0 + a5 + 20a2 (column2)
+ vmlal.s16 q15, d23, d0[0] @// a0 + a5 + 20a2 (column2)
+ vext.16 q10, q13, q14, #3 @//extract a[3] (column2)
+ vext.16 q11, q13, q14, #1 @//extract a[1] (column2)
+ vmlal.s16 q1, d20, d0[0] @// a0 + a5 + 20a2 + 20a3 (column2)
+ vmlal.s16 q15, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 (column2)
+ vext.16 q10, q13, q14, #4 @//extract a[4] (column2)
+ vmlsl.s16 q1, d22, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2)
+ vmlsl.s16 q15, d23, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2)
+ vmlsl.s16 q1, d20, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2)
+ vmlsl.s16 q15, d21, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2)
+ vqrshrun.s32 d20, q1, #10
+ vqrshrun.s32 d21, q15, #10
+ vld1.u32 {d2, d3, d4}, [r0], r2 @ Vector load from src[6_0]
+ vqshrun.s16 d22, q10, #0
+ vst1.u8 {d22}, [r1], r7 @//Store dest row0 ,column 2; (1/2,1/2)
+
+ @ vERTICAL FILTERING FOR ROW 1
+ vaddl.u8 q10, d11, d14 @ temp1 = src[2_0] + src[3_0]
+ vaddl.u8 q12, d5, d2 @ temp2 = src[0_0] + src[5_0]
+ vaddl.u8 q11, d8, d17 @ temp = src[1_0] + src[4_0]
+ vaddl.u8 q13, d6, d3 @ temp2 = src[0_0] + src[5_0]
+ vmla.u16 q12, q10, d0[0] @ temp += temp1 * 20
+ vaddl.u8 q10, d9, d18 @ temp = src[1_0] + src[4_0]
+ vmls.s16 q12, q11, d1[0] @ temp -= temp2 * 5
+ vaddl.u8 q11, d12, d15 @ temp3 = src[2_0] + src[3_0]
+ vaddl.u8 q14, d7, d4 @ temp2 = src[0_0] + src[5_0]
+ vmla.u16 q13, q11, d0[0] @ temp4 += temp3 * 20
+ vaddl.u8 q11, d13, d16 @ temp3 = src[2_0] + src[3_0]
+ vmls.s16 q13, q10, d1[0] @ temp -= temp2 * 5
+ vmla.u16 q14, q11, d0[0] @ temp4 += temp3 * 20
+ vaddl.u8 q10, d10, d19 @ temp = src[1_0] + src[4_0]
+ vmls.s16 q14, q10, d1[0] @ temp -= temp2 * 5
+ vext.16 q10, q12, q13, #5 @//extract a[5] (column1)
+
+ @Q12,Q13,Q14 HAVE VERTICAL FILTERED VALUES
+ @CASCADED FILTERING FOR ROW 1
+ vext.16 q11, q12, q13, #2 @//extract a[2] (column1)
+ vaddl.s16 q3, d20, d24 @// a0 + a5 (column1)
+ vaddl.s16 q15, d21, d25 @// a0 + a5 (column1)
+ vmlal.s16 q3, d22, d0[0] @// a0 + a5 + 20a2 (column1)
+ vmlal.s16 q15, d23, d0[0] @// a0 + a5 + 20a2 (column1)
+ vext.16 q11, q12, q13, #1 @//extract a[1] (column1)
+ vext.16 q10, q12, q13, #3 @//extract a[3] (column1)
+ vmlsl.s16 q3, d22, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1)
+ vmlsl.s16 q15, d23, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1)
+ vmlal.s16 q3, d20, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1)
+ vmlal.s16 q15, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1)
+ vext.16 q11, q12, q13, #4 @//extract a[4] (column1)
+ vext.16 q10, q13, q14, #5 @//extract a[5] (column2)
+ vmlsl.s16 q3, d22, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1)
+ vmlsl.s16 q15, d23, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1)
+ vqrshrun.s32 d22, q3, #10
+ vqrshrun.s32 d23, q15, #10
+ vqshrun.s16 d22, q11, #0
+ vst1.u8 {d22}, [r1], r10 @//Store dest row1, column 1; (1/2,1/2)
+ vext.16 q11, q13, q14, #2 @//extract a[2] (column2)
+ vaddl.s16 q3, d20, d26 @// a0 + a5 (column2)
+ vaddl.s16 q15, d21, d27 @// a0 + a5 (column2)
+ vmlal.s16 q3, d22, d0[0] @// a0 + a5 + 20a2 (column2)
+ vmlal.s16 q15, d23, d0[0] @// a0 + a5 + 20a2 (column2)
+ vext.16 q10, q13, q14, #3 @//extract a[3] (column2)
+ vext.16 q11, q13, q14, #1 @//extract a[1] (column2)
+ vmlal.s16 q3, d20, d0[0] @// a0 + a5 + 20a2 + 20a3 (column2)
+ vmlal.s16 q15, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 (column2)
+ vext.16 q10, q13, q14, #4 @//extract a[4] (column2)
+ vmlsl.s16 q3, d22, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2)
+ vmlsl.s16 q15, d23, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2)
+ vmlsl.s16 q3, d20, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2)
+ vmlsl.s16 q15, d21, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2)
+ vqrshrun.s32 d20, q3, #10
+ vqrshrun.s32 d21, q15, #10
+ vqshrun.s16 d22, q10, #0
+ vst1.u8 {d22}, [r1], r7 @//Store dest row1 ,column 2; (1/2,1/2)
+
+ subs r8, r8, #2 @ 2 rows processed, decrement by 2
+ subne r0, r0 , r2, lsl #2
+ subne r0, r0, r2
+ beq end_func @ Branch if height==4
+
+ b loop_16 @ looping if height = 8 or 16
+
+loop_8:
+ vld1.u32 {d2, d3}, [r0], r2 @ Vector load from src[0_0]
+ vld1.u32 {d4, d5}, [r0], r2 @ Vector load from src[1_0]
+ vld1.u32 {d6, d7}, [r0], r2 @ Vector load from src[2_0]
+ vld1.u32 {d8, d9}, [r0], r2 @ Vector load from src[3_0]
+ vld1.u32 {d10, d11}, [r0], r2 @ Vector load from src[4_0]
+ vld1.u32 {d12, d13}, [r0], r2 @ Vector load from src[5_0]
+
+ @ vERTICAL FILTERING FOR ROW 0
+ vaddl.u8 q10, d6, d8 @ temp1 = src[2_0] + src[3_0]
+ vaddl.u8 q11, d4, d10 @ temp2 = src[1_0] + src4_0]
+ vaddl.u8 q12, d2, d12 @ temp = src[0_0] + src[5_0]
+ vaddl.u8 q13, d3, d13 @ temp = src[0_0] + src[5_0]
+ vaddl.u8 q14, d7, d9 @ temp1 = src[2_0] + src[3_0]
+ vmla.u16 q12, q10, d0[0] @ temp += temp1 * 20
+ vmls.s16 q12, q11, d1[0] @ temp -= temp2 * 5
+ vaddl.u8 q15, d5, d11 @ temp2 = src[1_0] + src4_0]
+ vmla.u16 q13, q14, d0[0] @ temp += temp1 * 20
+ vmls.s16 q13, q15, d1[0] @ temp -= temp2 * 5
+ @Q12,Q13 HAVE VERTICAL FILTERED VALUES
+ @CASCADED FILTERING FOR ROW 0
+
+ vext.16 q10, q12, q13, #5 @//extract a[5] (column1)
+ vext.16 q11, q12, q13, #2 @//extract a[2] (column1)
+ vaddl.s16 q14, d20, d24 @// a0 + a5 (column1)
+ vaddl.s16 q15, d21, d25 @// a0 + a5 (column1)
+ vext.16 q9, q12, q13, #1 @//extract a[1] (column1)
+ vext.16 q10, q12, q13, #3 @//extract a[3] (column1)
+ vext.16 q1, q12, q13, #4 @//extract a[4] (column1)
+ vmlal.s16 q14, d22, d0[0] @// a0 + a5 + 20a2 (column1)
+ vmlsl.s16 q14, d18, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1)
+ vmlal.s16 q14, d20, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1)
+ vmlsl.s16 q14, d2, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1)
+ vld1.u32 {d14, d15}, [r0], r2 @ Vector load from src[6_0]
+ vmlal.s16 q15, d23, d0[0] @// a0 + a5 + 20a2 (column1)
+ vmlal.s16 q15, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1)
+ vmlsl.s16 q15, d19, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1)
+ vmlsl.s16 q15, d3, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1)
+
+ vaddl.u8 q12, d4, d14 @ temp = src[0_0] + src[5_0]
+ vaddl.u8 q13, d5, d15 @ temp = src[0_0] + src[5_0]
+ vqrshrun.s32 d18, q14, #10
+ vaddl.u8 q14, d9, d11 @ temp1 = src[2_0] + src[3_0]
+ vaddl.u8 q10, d8, d10 @ temp1 = src[2_0] + src[3_0]
+ vaddl.u8 q11, d6, d12 @ temp2 = src[1_0] + src4_0]
+ vqrshrun.s32 d19, q15, #10
+ vmla.u16 q12, q10, d0[0] @ temp += temp1 * 20
+ vmls.s16 q12, q11, d1[0] @ temp -= temp2 * 5
+ vaddl.u8 q15, d7, d13 @ temp2 = src[1_0] + src4_0]
+ vmla.u16 q13, q14, d0[0] @ temp += temp1 * 20
+ vmls.s16 q13, q15, d1[0] @ temp -= temp2 * 5
+ vqshrun.s16 d2, q9, #0
+ @ vERTICAL FILTERING FOR ROW 1
+
+ @Q12,Q13 HAVE VERTICAL FILTERED VALUES
+ @CASCADED FILTERING FOR ROW 1
+ vext.16 q10, q12, q13, #5 @//extract a[5] (column1)
+ vext.16 q11, q12, q13, #2 @//extract a[2] (column1)
+ vaddl.s16 q14, d20, d24 @// a0 + a5 (column1)
+ vaddl.s16 q15, d21, d25 @// a0 + a5 (column1)
+ vst1.u8 {d2}, [r1], r3 @//Store dest row0, column 1; (1/2,1/2)
+ vext.16 q9, q12, q13, #1 @//extract a[1] (column1)
+ vext.16 q10, q12, q13, #3 @//extract a[3] (column1)
+ vext.16 q2, q12, q13, #4 @//extract a[4] (column1)
+ vmlal.s16 q14, d22, d0[0] @// a0 + a5 + 20a2 (column1)
+ vmlsl.s16 q14, d18, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1)
+ vmlal.s16 q14, d20, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1)
+ vmlsl.s16 q14, d4, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1)
+ vmlal.s16 q15, d23, d0[0] @// a0 + a5 + 20a2 (column1)
+ vmlal.s16 q15, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1)
+ vmlsl.s16 q15, d19, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1)
+ vmlsl.s16 q15, d5, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1)
+ vqrshrun.s32 d18, q14, #10
+ vqrshrun.s32 d19, q15, #10
+ vqshrun.s16 d3, q9, #0
+ vst1.u8 {d3}, [r1], r3 @//Store dest row1, column 1; (1/2,1/2)
+
+ subs r8, r8, #2 @ 2 rows processed, decrement by 2
+ subne r0, r0 , r2, lsl #2
+ subne r0, r0, r2
+ beq end_func @ Branch if height==4
+
+ b loop_8 @looping if height == 8 or 16
+
+loop_4:
+ vld1.u32 {d2, d3}, [r0], r2 @ Vector load from src[0_0]
+ vld1.u32 {d4, d5}, [r0], r2 @ Vector load from src[1_0]
+ vld1.u32 {d6, d7}, [r0], r2 @ Vector load from src[2_0]
+ vld1.u32 {d8, d9}, [r0], r2 @ Vector load from src[3_0]
+ vld1.u32 {d10, d11}, [r0], r2 @ Vector load from src[4_0]
+ vld1.u32 {d12, d13}, [r0], r2 @ Vector load from src[5_0]
+
+ @ vERTICAL FILTERING FOR ROW 0
+ vaddl.u8 q10, d6, d8 @ temp1 = src[2_0] + src[3_0]
+ vaddl.u8 q11, d4, d10 @ temp2 = src[1_0] + src4_0]
+ vaddl.u8 q12, d2, d12 @ temp = src[0_0] + src[5_0]
+ vaddl.u8 q13, d3, d13 @ temp = src[0_0] + src[5_0]
+ vaddl.u8 q14, d7, d9 @ temp1 = src[2_0] + src[3_0]
+ vmla.u16 q12, q10, d0[0] @ temp += temp1 * 20
+ vmls.s16 q12, q11, d1[0] @ temp -= temp2 * 5
+ vaddl.u8 q15, d5, d11 @ temp2 = src[1_0] + src4_0]
+ vmla.u16 q13, q14, d0[0] @ temp += temp1 * 20
+ vmls.s16 q13, q15, d1[0] @ temp -= temp2 * 5
+ @Q12,Q13 HAVE VERTICAL FILTERED VALUES
+ @CASCADED FILTERING FOR ROW 0
+
+ vext.16 q10, q12, q13, #5 @//extract a[5] (column1)
+ vext.16 q11, q12, q13, #2 @//extract a[2] (column1)
+ vaddl.s16 q14, d20, d24 @// a0 + a5 (column1)
+ vaddl.s16 q15, d21, d25 @// a0 + a5 (column1)
+
+ vext.16 q1, q12, q13, #4 @//extract a[4] (column1)
+ vext.16 q9, q12, q13, #1 @//extract a[1] (column1)
+ vext.16 q10, q12, q13, #3 @//extract a[3] (column1)
+
+ vmlal.s16 q14, d22, d0[0] @// a0 + a5 + 20a2 (column1)
+ vmlsl.s16 q14, d18, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1)
+ vmlal.s16 q14, d20, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1)
+ vmlsl.s16 q14, d2, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1)
+ vld1.u32 {d14, d15}, [r0], r2 @ Vector load from src[6_0]
+ vmlal.s16 q15, d23, d0[0] @// a0 + a5 + 20a2 (column1)
+ vmlal.s16 q15, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1)
+ vmlsl.s16 q15, d19, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1)
+ vmlsl.s16 q15, d3, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1)
+ vaddl.u8 q12, d4, d14 @ temp = src[0_0] + src[5_0]
+ vaddl.u8 q13, d5, d15 @ temp = src[0_0] + src[5_0]
+ vqrshrun.s32 d18, q14, #10
+ vaddl.u8 q14, d9, d11 @ temp1 = src[2_0] + src[3_0]
+ vaddl.u8 q11, d6, d12 @ temp2 = src[1_0] + src4_0]
+ vaddl.u8 q10, d8, d10 @ temp1 = src[2_0] + src[3_0]
+ vqrshrun.s32 d19, q15, #10
+ vmla.u16 q12, q10, d0[0] @ temp += temp1 * 20
+ vmls.s16 q12, q11, d1[0] @ temp -= temp2 * 5
+ vaddl.u8 q15, d7, d13 @ temp2 = src[1_0] + src4_0]
+ vqshrun.s16 d2, q9, #0
+ vmla.u16 q13, q14, d0[0] @ temp += temp1 * 20
+ vmls.s16 q13, q15, d1[0] @ temp -= temp2 * 5
+
+ @ vERTICAL FILTERING FOR ROW 1
+
+ @Q12,Q13 HAVE VERTICAL FILTERED VALUES
+ @CASCADED FILTERING FOR ROW 1
+ vext.16 q10, q12, q13, #5 @//extract a[5] (column1)
+ vext.16 q11, q12, q13, #2 @//extract a[2] (column1)
+ vst1.u32 {d2[0]}, [r1], r3 @//Store dest row0, column 1; (1/2,1/2)
+ vaddl.s16 q14, d20, d24 @// a0 + a5 (column1)
+ vaddl.s16 q15, d21, d25 @// a0 + a5 (column1)
+ vext.16 q9, q12, q13, #1 @//extract a[1] (column1)
+ vext.16 q10, q12, q13, #3 @//extract a[3] (column1)
+ vext.16 q2, q12, q13, #4 @//extract a[4] (column1)
+ vmlal.s16 q14, d22, d0[0] @// a0 + a5 + 20a2 (column1)
+ vmlsl.s16 q14, d18, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1)
+ vmlal.s16 q14, d20, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1)
+ vmlsl.s16 q14, d4, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1)
+ vmlal.s16 q15, d23, d0[0] @// a0 + a5 + 20a2 (column1)
+ vmlal.s16 q15, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 (column1)
+ vmlsl.s16 q15, d19, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1)
+ vmlsl.s16 q15, d5, d1[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1)
+ vqrshrun.s32 d18, q14, #10
+ vqrshrun.s32 d19, q15, #10
+ vqshrun.s16 d4, q9, #0
+ vst1.u32 {d4[0]}, [r1], r3 @//Store dest row1, column 1; (1/2,1/2)
+
+ subs r8, r8, #2 @ 2 rows processed, decrement by 2
+ subne r0, r0 , r2, lsl #2
+ subne r0, r0, r2
+ beq end_func @ Branch if height==4
+
+ b loop_4 @looping if height == 8 or 16
+
+end_func:
+ vldmia sp!, {d8-d15} @ Restore neon registers that were saved
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
diff --git a/common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s
new file mode 100755
index 0000000..65a6de7
--- /dev/null
+++ b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s
@@ -0,0 +1,1044 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@******************************************************************************
+@* @file
+@* ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s
+@*
+@* @brief
+@* Contains function definitions for inter prediction interpolation.
+@*
+@* @author
+@* Mohit
+@*
+@* @par List of Functions:
+@*
+@* - ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q()
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@
+
+@/**
+@/**
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* This function implements a two stage cascaded six tap filter. It
+@* applies the six tap filter in the horizontal direction on the
+@* predictor values, followed by applying the same filter in the
+@* vertical direction on the output of the first stage. It then averages
+@* the output of the 1st stage and the output of the 2nd stage to obtain
+@* the quarter pel values. The six tap filtering operation is described
+@* in sec 8.4.2.2.1 titled "Luma sample interpolation process".
+@*
+@* @par Description:
+@* This function is called to obtain pixels lying at the following
+@* location (1/2,1/4) or (1/2,3/4). The function interpolates
+@* the predictors first in the horizontal direction and then in the
+@* vertical direction to output the (1/2,1/2). It then averages
+@* the output of the 2nd stage and (1/2,1/2) value to obtain (1/2,1/4)
+@* or (1/2,3/4) depending on the offset.
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @param[in] pu1_tmp: temporary buffer
+@*
+@* @param[in] dydx: x and y reference offset for qpel calculations
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/;
+
+@void ih264_inter_pred_luma_horz_hpel_vert_qpel(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,,
+@ WORD32 dst_strd,
+@ WORD32 ht,
+@ WORD32 wd,
+@ UWORD8* pu1_tmp,
+@ UWORD32 dydx)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ht
+@ r5 => wd
+@ r7 => dydx
+@ r9 => *pu1_tmp
+
+.text
+.p2align 2
+
+ .global ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q
+
+ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q:
+
+ stmfd sp!, {r4-r12, r14} @ store register values to stack
+ vstmdb sp!, {d8-d15} @push neon registers to stack
+ ldr r4, [sp, #104] @ loads ht
+ sub r0, r0, r2, lsl #1 @ pu1_src-2*src_strd
+ sub r0, r0, #2 @ pu1_src-2
+ ldr r5, [sp, #108] @ loads wd
+ ldr r7, [sp, #116] @ loads dydx
+ lsr r7, r7, #3 @ dydx >> 2 followed by dydx & 0x3 and dydx>>1 to obtain the deciding bit
+ ldr r9, [sp, #112] @ pu1_tmp
+ add r7, r7, #2
+ mov r6, #48
+ mla r7, r7, r6, r9
+
+ subs r12, r5, #4 @if wd=4 branch to loop_4
+ beq loop_4_start
+
+ subs r12, r5, #8 @if wd=8 branch to loop_8
+ beq loop_8_start
+
+ @when wd=16
+ vmov.u16 q11, #20 @ Filter coeff 0x14 into Q11
+ vmov.u16 q12, #5 @ Filter coeff 0x5 into Q12
+ add r8, r0, #8
+ add r14, r1, #8
+ add r10, r9, #8
+ mov r12, r4
+ add r11, r7, #8
+
+loop_16_lowhalf_start:
+ vld1.32 {q0}, [r0], r2 @ row -2 load for horizontal filter
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q3, d0, d5
+
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q4, d2, d3
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q3, q4, q11
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q4, d1, d4
+ vld1.32 {q0}, [r0], r2 @ row -1 load for horizontal filter
+ vmls.u16 q3, q4, q12
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q4, d0, d5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q5, d2, d3
+
+ vst1.32 {q3}, [r9], r6 @ store temp buffer 0
+
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q4, q5, q11
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q5, d1, d4
+ vld1.32 {q0}, [r0], r2 @ row 0 load for horizontal filter
+ vmls.u16 q4, q5, q12
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q5, d0, d5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q6, d2, d3
+
+ vst1.32 {q4}, [r9], r6 @ store temp buffer 1
+
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q5, q6, q11
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q6, d1, d4
+ vld1.32 {q0}, [r0], r2 @ row 1 load for horizontal filter
+ vmls.u16 q5, q6, q12
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q6, d0, d5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q7, d2, d3
+
+ vst1.32 {q5}, [r9], r6 @ store temp buffer 2
+
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q6, q7, q11
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q7, d1, d4
+ vld1.32 {q0}, [r0], r2 @ row 2 load for horizontal filter
+ vmls.u16 q6, q7, q12
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q7, d0, d5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q8, d2, d3
+
+ vst1.32 {q6}, [r9], r6 @ store temp buffer 3
+
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q7, q8, q11
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q8, d1, d4
+
+ vmls.u16 q7, q8, q12
+loop_16_lowhalf:
+
+ vld1.32 {q0}, [r0], r2 @ row 3 load for horizontal filter
+ vext.8 d5, d0, d1, #5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q8, d0, d5
+
+ vst1.32 {q7}, [r9], r6 @ store temp buffer 4
+ vaddl.u8 q9, d2, d3
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q8, q9, q11
+ vext.8 d1, d0, d1, #1
+ vadd.s16 q14, q4, q7
+ vaddl.u8 q9, d1, d4
+ vadd.s16 q15, q5, q6
+ vmls.u16 q8, q9, q12
+ vld1.32 {q0}, [r0], r2 @ row 4 load for hoorizontal filter
+ vext.8 d5, d0, d1, #5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q10, d0, d5
+
+ vst1.32 {q8}, [r9], r6 @ store temp buffer r5
+
+ vaddl.s16 q9, d6, d16
+
+ vld1.32 {q13}, [r7], r6 @ load from temp buffer 0
+
+ vaddl.s16 q3, d7, d17
+
+ vqrshrun.s16 d26, q13, #5
+
+ vmlal.s16 q9, d30, d22
+ vmlsl.s16 q9, d28, d24
+ vmlal.s16 q3, d31, d22
+ vmlsl.s16 q3, d29, d24
+ vaddl.u8 q1, d2, d3
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q10, q1, q11
+ vqrshrun.s32 d18, q9, #10
+ vext.8 d1, d0, d1, #1
+ vqrshrun.s32 d19, q3, #10
+ vadd.s16 q14, q5, q8
+ vaddl.u8 q1, d1, d4
+ vadd.s16 q15, q6, q7
+ vmls.u16 q10, q1, q12
+ vqmovn.u16 d18, q9
+ vld1.32 {q0}, [r0], r2 @ row 5 load for horizontal filter
+
+ vrhadd.u8 d26, d18, d26
+
+ vext.8 d5, d0, d1, #5
+ vext.8 d2, d0, d1, #2
+
+ vst1.32 {q10}, [r9], r6 @ store temp buffer r6
+
+ vaddl.s16 q9, d8, d20
+
+ vaddl.s16 q3, d9, d21
+
+ vld1.32 {q4}, [r7], r6 @load from temp buffer 1
+
+
+ vst1.32 d26, [r1], r3 @ store row 0
+
+ vmlal.s16 q9, d30, d22
+ vmlsl.s16 q9, d28, d24
+
+ vqrshrun.s16 d28, q4, #5
+
+ vmlal.s16 q3, d31, d22
+ vmlsl.s16 q3, d29, d24
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q4, d0, d5
+ vaddl.u8 q1, d2, d3
+ vqrshrun.s32 d18, q9, #10
+ vext.8 d4, d0, d1, #4
+ vqrshrun.s32 d19, q3, #10
+ vmla.u16 q4, q1, q11
+ vext.8 d1, d0, d1, #1
+ vadd.s16 q13, q6, q10
+ vaddl.u8 q1, d1, d4
+ vqmovn.u16 d18, q9
+ vadd.s16 q15, q7, q8
+ vmls.u16 q4, q1, q12
+ vld1.32 {q0}, [r0], r2 @ row 6 load for horizontal filter
+
+ vrhadd.u8 d28, d28, d18
+
+ vext.8 d5, d0, d1, #5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+
+ vst1.32 d28, [r1], r3 @ store row 1
+
+ vaddl.u8 q14, d0, d5
+
+ vst1.32 {q4}, [r9], r6 @ store temp buffer r7
+
+ vaddl.s16 q9, d10, d8
+ vaddl.s16 q3, d11, d9
+
+ vld1.32 {q5}, [r7], r6 @ load from temp buffer 2
+
+ vmlal.s16 q9, d30, d22
+ vmlsl.s16 q9, d26, d24
+ vmlal.s16 q3, d31, d22
+
+ vqrshrun.s16 d26, q5, #5
+
+ vmlsl.s16 q3, d27, d24
+ vaddl.u8 q1, d2, d3
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q14, q1, q11
+ vqrshrun.s32 d18, q9, #10
+ vext.8 d1, d0, d1, #1
+ vqrshrun.s32 d19, q3, #10
+ vadd.s16 q5, q7, q4
+ vaddl.u8 q1, d1, d4
+ vadd.s16 q15, q8, q10
+ vmls.u16 q14, q1, q12
+ vqmovn.u16 d27, q9
+
+ vaddl.s16 q9, d12, d28
+ vaddl.s16 q3, d13, d29
+
+ vrhadd.u8 d26, d26, d27
+
+ vmlal.s16 q9, d30, d22
+ vmlsl.s16 q9, d10, d24
+ vmlal.s16 q3, d31, d22
+ vmlsl.s16 q3, d11, d24
+
+ vst1.32 d26, [r1], r3 @ store row 2
+
+ vst1.32 {q14}, [r9]
+
+
+ vqrshrun.s32 d18, q9, #10
+ vmov q5, q10
+ vld1.32 {q15}, [r7], r6 @ load from temp buffer 3
+
+ vqrshrun.s32 d19, q3, #10
+ subs r4, r4, #4
+
+ vqrshrun.s16 d30, q15, #5
+
+ vqmovn.u16 d18, q9
+ vmov q6, q4
+ vmov q3, q7
+ vrhadd.u8 d30, d18, d30
+ vmov q4, q8
+ vmov q7, q14
+ vst1.32 d30, [r1], r3 @ store row 3
+
+ bgt loop_16_lowhalf @ looping if height =16
+
+
+loop_16_highhalf_start:
+ vld1.32 {q0}, [r8], r2
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q3, d0, d5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q4, d2, d3
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q3, q4, q11
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q4, d1, d4
+ vld1.32 {q0}, [r8], r2
+ vmls.u16 q3, q4, q12
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q4, d0, d5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q5, d2, d3
+
+ vst1.32 {q3}, [r10], r6
+
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q4, q5, q11
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q5, d1, d4
+ vld1.32 {q0}, [r8], r2
+ vmls.u16 q4, q5, q12
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q5, d0, d5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q6, d2, d3
+
+ vst1.32 {q4}, [r10], r6
+
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q5, q6, q11
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q6, d1, d4
+ vld1.32 {q0}, [r8], r2
+ vmls.u16 q5, q6, q12
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q6, d0, d5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q7, d2, d3
+
+ vst1.32 {q5}, [r10], r6
+
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q6, q7, q11
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q7, d1, d4
+ vld1.32 {q0}, [r8], r2
+ vmls.u16 q6, q7, q12
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q7, d0, d5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q8, d2, d3
+
+ vst1.32 {q6}, [r10], r6
+
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q7, q8, q11
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q8, d1, d4
+
+ vmls.u16 q7, q8, q12
+
+loop_16_highhalf:
+
+ vld1.32 {q0}, [r8], r2
+ vext.8 d5, d0, d1, #5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q8, d0, d5
+
+ vst1.32 {q7}, [r10], r6
+
+ vaddl.u8 q9, d2, d3
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q8, q9, q11
+ vext.8 d1, d0, d1, #1
+ vadd.s16 q14, q4, q7
+ vaddl.u8 q9, d1, d4
+ vadd.s16 q15, q5, q6
+ vmls.u16 q8, q9, q12
+ vld1.32 {q0}, [r8], r2
+ vext.8 d5, d0, d1, #5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q10, d0, d5
+
+ vst1.32 {q8}, [r10], r6
+
+ vaddl.s16 q9, d6, d16
+
+ vld1.32 {q13}, [r11], r6
+
+ vaddl.s16 q3, d7, d17
+
+ vqrshrun.s16 d26, q13, #5
+
+ vmlal.s16 q9, d30, d22
+ vmlsl.s16 q9, d28, d24
+ vmlal.s16 q3, d31, d22
+ vmlsl.s16 q3, d29, d24
+ vaddl.u8 q1, d2, d3
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q10, q1, q11
+ vqrshrun.s32 d18, q9, #10
+ vext.8 d1, d0, d1, #1
+ vqrshrun.s32 d19, q3, #10
+ vadd.s16 q14, q5, q8
+ vaddl.u8 q1, d1, d4
+ vadd.s16 q15, q6, q7
+ vmls.u16 q10, q1, q12
+ vqmovn.u16 d18, q9
+ vld1.32 {q0}, [r8], r2
+
+ vrhadd.u8 d26, d18, d26
+
+ vext.8 d5, d0, d1, #5
+ vext.8 d2, d0, d1, #2
+
+ vst1.32 {q10}, [r10], r6
+
+ vaddl.s16 q9, d8, d20
+ vaddl.s16 q3, d9, d21
+
+ vld1.32 {q4}, [r11], r6
+
+
+ vst1.32 d26, [r14], r3 @store row 0
+
+ vmlal.s16 q9, d30, d22
+ vmlsl.s16 q9, d28, d24
+
+ vqrshrun.s16 d28, q4, #5
+
+ vmlal.s16 q3, d31, d22
+ vmlsl.s16 q3, d29, d24
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q4, d0, d5
+ vaddl.u8 q1, d2, d3
+ vqrshrun.s32 d18, q9, #10
+ vext.8 d4, d0, d1, #4
+ vqrshrun.s32 d19, q3, #10
+ vmla.u16 q4, q1, q11
+ vext.8 d1, d0, d1, #1
+ vadd.s16 q13, q6, q10
+ vaddl.u8 q1, d1, d4
+ vqmovn.u16 d18, q9
+ vadd.s16 q15, q7, q8
+ vmls.u16 q4, q1, q12
+ vld1.32 {q0}, [r8], r2
+
+ vrhadd.u8 d28, d28, d18
+
+ vext.8 d5, d0, d1, #5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+
+ vst1.32 d28, [r14], r3 @store row 1
+
+ vaddl.u8 q14, d0, d5
+
+ vst1.32 {q4}, [r10], r6
+
+ vaddl.s16 q9, d10, d8
+ vaddl.s16 q3, d11, d9
+
+ vld1.32 {q5}, [r11], r6
+
+ vmlal.s16 q9, d30, d22
+ vmlsl.s16 q9, d26, d24
+ vmlal.s16 q3, d31, d22
+
+ vqrshrun.s16 d26, q5, #5
+
+ vmlsl.s16 q3, d27, d24
+ vaddl.u8 q1, d2, d3
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q14, q1, q11
+ vqrshrun.s32 d18, q9, #10
+ vext.8 d1, d0, d1, #1
+ vqrshrun.s32 d19, q3, #10
+ vadd.s16 q5, q7, q4
+ vaddl.u8 q1, d1, d4
+ vadd.s16 q15, q8, q10
+ vmls.u16 q14, q1, q12
+ vqmovn.u16 d27, q9
+
+
+ vaddl.s16 q9, d12, d28
+ vaddl.s16 q3, d13, d29
+
+ vrhadd.u8 d26, d26, d27
+
+ vmlal.s16 q9, d30, d22
+ vmlsl.s16 q9, d10, d24
+ vmlal.s16 q3, d31, d22
+ vmlsl.s16 q3, d11, d24
+
+ vst1.32 d26, [r14], r3 @ store row 2
+
+ vst1.32 {q14}, [r10]
+
+ vqrshrun.s32 d18, q9, #10
+ vmov q5, q10
+ vld1.32 {q15}, [r11], r6
+
+ vqrshrun.s32 d19, q3, #10
+ subs r12, r12, #4
+
+ vqrshrun.s16 d30, q15, #5
+
+ vqmovn.u16 d18, q9
+ vmov q6, q4
+ vmov q3, q7
+ vrhadd.u8 d30, d18, d30
+ vmov q4, q8
+ vmov q7, q14
+ vst1.32 d30, [r14], r3 @ store row 3
+
+ bgt loop_16_highhalf @ looping if height = 8 or 16
+ b end_func
+
+loop_8_start:
+
+ vmov.u16 q11, #20 @ Filter coeff 20 into Q11
+ vmov.u16 q12, #5 @ Filter coeff 5 into Q12
+ vld1.32 {q0}, [r0], r2 @ row -2 load for horizontal filter
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q3, d0, d5
+
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q4, d2, d3
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q3, q4, q11
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q4, d1, d4
+ vld1.32 {q0}, [r0], r2 @ row -1 load for horizontal filter
+ vmls.u16 q3, q4, q12
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q4, d0, d5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q5, d2, d3
+
+ vst1.32 {q3}, [r9], r6 @ store temp buffer 0
+
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q4, q5, q11
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q5, d1, d4
+ vld1.32 {q0}, [r0], r2 @ row 0 load for horizontal filter
+ vmls.u16 q4, q5, q12
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q5, d0, d5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q6, d2, d3
+
+ vst1.32 {q4}, [r9], r6 @ store temp buffer 1
+
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q5, q6, q11
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q6, d1, d4
+ vld1.32 {q0}, [r0], r2 @ row 1 load for horizontal filter
+ vmls.u16 q5, q6, q12
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q6, d0, d5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q7, d2, d3
+
+ vst1.32 {q5}, [r9], r6 @ store temp buffer 2
+
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q6, q7, q11
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q7, d1, d4
+ vld1.32 {q0}, [r0], r2 @ row 2 load for horizontal filter
+ vmls.u16 q6, q7, q12
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q7, d0, d5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q8, d2, d3
+
+ vst1.32 {q6}, [r9], r6 @ store temp buffer 3
+
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q7, q8, q11
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q8, d1, d4
+
+ vmls.u16 q7, q8, q12
+loop_8:
+
+ vld1.32 {q0}, [r0], r2 @ row 3 load for horizontal filter
+ vext.8 d5, d0, d1, #5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q8, d0, d5
+
+ vst1.32 {q7}, [r9], r6 @ store temp buffer 4
+
+ vaddl.u8 q9, d2, d3
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q8, q9, q11
+ vext.8 d1, d0, d1, #1
+ vadd.s16 q14, q4, q7
+ vaddl.u8 q9, d1, d4
+ vadd.s16 q15, q5, q6
+ vmls.u16 q8, q9, q12
+ vld1.32 {q0}, [r0], r2 @ row 4 load for hoorizontal filter
+ vext.8 d5, d0, d1, #5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q10, d0, d5
+
+ vst1.32 {q8}, [r9], r6 @ store temp buffer r5
+
+ vaddl.s16 q9, d6, d16
+
+ vld1.32 {q13}, [r7], r6 @ load from temp buffer 0
+
+ vaddl.s16 q3, d7, d17
+
+ vqrshrun.s16 d26, q13, #5
+
+ vmlal.s16 q9, d30, d22
+ vmlsl.s16 q9, d28, d24
+ vmlal.s16 q3, d31, d22
+ vmlsl.s16 q3, d29, d24
+ vaddl.u8 q1, d2, d3
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q10, q1, q11
+ vqrshrun.s32 d18, q9, #10
+ vext.8 d1, d0, d1, #1
+ vqrshrun.s32 d19, q3, #10
+ vadd.s16 q14, q5, q8
+ vaddl.u8 q1, d1, d4
+ vadd.s16 q15, q6, q7
+ vmls.u16 q10, q1, q12
+ vqmovn.u16 d18, q9
+ vld1.32 {q0}, [r0], r2 @ row 5 load for horizontal filter
+
+ vrhadd.u8 d26, d18, d26
+
+ vext.8 d5, d0, d1, #5
+ vext.8 d2, d0, d1, #2
+
+ vst1.32 {q10}, [r9], r6 @ store temp buffer r6
+
+ vaddl.s16 q9, d8, d20
+
+ vaddl.s16 q3, d9, d21
+
+ vld1.32 {q4}, [r7], r6 @load from temp buffer 1
+
+
+ vst1.32 d26, [r1], r3 @ store row 0
+
+ vmlal.s16 q9, d30, d22
+ vmlsl.s16 q9, d28, d24
+
+ vqrshrun.s16 d28, q4, #5
+
+ vmlal.s16 q3, d31, d22
+ vmlsl.s16 q3, d29, d24
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q4, d0, d5
+ vaddl.u8 q1, d2, d3
+ vqrshrun.s32 d18, q9, #10
+ vext.8 d4, d0, d1, #4
+ vqrshrun.s32 d19, q3, #10
+ vmla.u16 q4, q1, q11
+ vext.8 d1, d0, d1, #1
+ vadd.s16 q13, q6, q10
+ vaddl.u8 q1, d1, d4
+ vqmovn.u16 d18, q9
+ vadd.s16 q15, q7, q8
+ vmls.u16 q4, q1, q12
+ vld1.32 {q0}, [r0], r2 @ row 6 load for horizontal filter
+
+ vrhadd.u8 d28, d28, d18
+
+ vext.8 d5, d0, d1, #5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+
+ vst1.32 d28, [r1], r3 @ store row 1
+
+ vaddl.u8 q14, d0, d5
+
+ vst1.32 {q4}, [r9], r6 @ store temp buffer r7
+
+ vaddl.s16 q9, d10, d8
+ vaddl.s16 q3, d11, d9
+
+ vld1.32 {q5}, [r7], r6 @ load from temp buffer 2
+
+ vmlal.s16 q9, d30, d22
+ vmlsl.s16 q9, d26, d24
+ vmlal.s16 q3, d31, d22
+
+ vqrshrun.s16 d26, q5, #5
+
+ vmlsl.s16 q3, d27, d24
+ vaddl.u8 q1, d2, d3
+ vext.8 d4, d0, d1, #4
+ vmla.u16 q14, q1, q11
+ vqrshrun.s32 d18, q9, #10
+ vext.8 d1, d0, d1, #1
+ vqrshrun.s32 d19, q3, #10
+ vadd.s16 q5, q7, q4
+ vaddl.u8 q1, d1, d4
+ vadd.s16 q15, q8, q10
+ vmls.u16 q14, q1, q12
+ vqmovn.u16 d27, q9
+
+ vaddl.s16 q9, d12, d28
+ vaddl.s16 q3, d13, d29
+
+ vrhadd.u8 d26, d26, d27
+
+ vmlal.s16 q9, d30, d22
+ vmlsl.s16 q9, d10, d24
+ vmlal.s16 q3, d31, d22
+ vmlsl.s16 q3, d11, d24
+
+ vst1.32 d26, [r1], r3 @ store row 2
+
+ vst1.32 {q14}, [r9]
+
+
+ vqrshrun.s32 d18, q9, #10
+ vmov q5, q10
+ vld1.32 {q15}, [r7], r6 @ load from temp buffer 3
+
+ vqrshrun.s32 d19, q3, #10
+ subs r4, r4, #4
+
+ vqrshrun.s16 d30, q15, #5
+
+ vqmovn.u16 d18, q9
+ vmov q6, q4
+ vmov q3, q7
+ vrhadd.u8 d30, d18, d30
+ vmov q4, q8
+ vmov q7, q14
+ vst1.32 d30, [r1], r3 @ store row 3
+
+ bgt loop_8 @if height =8 or 16 loop
+ b end_func
+
+loop_4_start:
+ vmov.u16 d22, #20 @ Filter coeff 20 into D22
+ vmov.u16 d23, #5 @ Filter coeff 5 into D23
+
+ vld1.32 {q0}, [r0], r2 @row -2 load
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q3, d0, d5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q4, d2, d3
+ vext.8 d4, d0, d1, #4
+ vmla.u16 d6, d8, d22
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q4, d1, d4
+ vld1.32 {q0}, [r0], r2 @ row -1 load
+ vmls.u16 d6, d8, d23
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q4, d0, d5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q5, d2, d3
+
+ vst1.32 d6, [r9], r6 @ store temp buffer 0
+
+ vext.8 d4, d0, d1, #4
+ vmla.u16 d8, d10, d22
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q5, d1, d4
+ vld1.32 {q0}, [r0], r2 @ row 0 load
+ vmls.u16 d8, d10, d23
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q5, d0, d5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q6, d2, d3
+
+ vst1.32 d8, [r9], r6 @ store temp buffer 1
+
+ vext.8 d4, d0, d1, #4
+ vmla.u16 d10, d12, d22
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q6, d1, d4
+ vld1.32 {q0}, [r0], r2 @ row 1 load
+ vmls.u16 d10, d12, d23
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q6, d0, d5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q7, d2, d3
+
+ vst1.32 d10, [r9], r6 @ store temp buffer 2
+
+ vext.8 d4, d0, d1, #4
+ vmla.u16 d12, d14, d22
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q7, d1, d4
+ vld1.32 {q0}, [r0], r2 @ row 2 load
+ vmls.u16 d12, d14, d23
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q7, d0, d5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q8, d2, d3
+ vext.8 d4, d0, d1, #4
+ vmla.u16 d14, d16, d22
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q8, d1, d4
+
+ vst1.32 d12, [r9], r6 @ store temp buffer 3
+
+ vmls.u16 d14, d16, d23
+
+loop_4:
+
+ vld1.32 {q0}, [r0], r2 @ row 3 load
+ vext.8 d5, d0, d1, #5
+ vaddl.u8 q8, d0, d5
+ vext.8 d2, d0, d1, #2
+ vext.8 d3, d0, d1, #3
+ vaddl.u8 q9, d2, d3
+ vst1.32 d14, [r9], r6 @ store temp buffer 4
+ vext.8 d4, d0, d1, #4
+ vmla.u16 d16, d18, d22
+ vext.8 d1, d0, d1, #1
+ vaddl.u8 q9, d1, d4
+ vadd.s16 d2, d10, d12
+ vmls.u16 d16, d18, d23
+ vadd.s16 d3, d8, d14
+ vld1.32 {q9}, [r0], r2 @ row 4 load
+ vext.8 d25, d18, d19, #5
+ vaddl.u8 q13, d18, d25
+ vext.8 d20, d18, d19, #2
+
+ vst1.32 d16, [r9], r6 @ store temp buffer 5
+
+ vaddl.s16 q0, d6, d16
+ vmlal.s16 q0, d2, d22
+ vext.8 d21, d18, d19, #3
+ vaddl.u8 q14, d20, d21
+ vext.8 d24, d18, d19, #4
+ vmlsl.s16 q0, d3, d23
+ vmla.u16 d26, d28, d22
+ vext.8 d19, d18, d19, #1
+ vaddl.u8 q14, d19, d24
+ vadd.s16 d2, d12, d14
+ vmls.u16 d26, d28, d23
+ vqrshrun.s32 d0, q0, #0xa
+ vadd.s16 d3, d10, d16
+ vld1.32 {q9}, [r0], r2 @ row 5 load
+ vext.8 d25, d18, d19, #5
+ vqmovn.u16 d11, q0
+ vaddl.u8 q14, d18, d25
+
+ vst1.32 d26, [r9], r6 @ store temp buffer 6
+
+ @Q3 available here
+ vld1.32 d6, [r7], r6 @ load from temp buffer 0
+ vld1.32 d7, [r7], r6 @ load from temp buffer 1
+ vqrshrun.s16 d9, q3, #5
+
+ vext.8 d20, d18, d19, #2
+
+ vaddl.s16 q0, d8, d26
+ vmlal.s16 q0, d2, d22
+ vext.8 d21, d18, d19, #3
+ vaddl.u8 q3, d20, d21
+ vext.8 d24, d18, d19, #4
+ vmlsl.s16 q0, d3, d23
+ vmla.u16 d28, d6, d22
+ vext.8 d19, d18, d19, #1
+ vaddl.u8 q3, d19, d24
+ vadd.s16 d2, d14, d16
+ vmls.u16 d28, d6, d23
+ vqrshrun.s32 d0, q0, #0xa
+ vadd.s16 d3, d12, d26
+ vld1.32 {q9}, [r0], r2 @ row 6 load
+ vext.8 d25, d18, d19, #5
+ vqmovn.u16 d13, q0
+
+ vtrn.32 d11, d13
+ vaddl.s16 q0, d10, d28
+ vrhadd.u8 d9, d9, d11
+
+ vst1.32 d28, [r9], r6 @ store temp buffer 7
+
+ vmlal.s16 q0, d2, d22
+ vaddl.u8 q15, d18, d25
+
+ vst1.32 d9[0], [r1], r3 @ store row 0
+
+ vext.8 d20, d18, d19, #2
+
+ vst1.32 d9[1], [r1], r3 @ store row 1
+
+ vext.8 d21, d18, d19, #3
+ vmlsl.s16 q0, d3, d23
+ vaddl.u8 q4, d20, d21
+ vext.8 d24, d18, d19, #4
+ vmla.u16 d30, d8, d22
+ vext.8 d19, d18, d19, #1
+ vaddl.u8 q4, d19, d24
+ vqrshrun.s32 d0, q0, #0xa
+ vadd.s16 d2, d16, d26
+ vmls.u16 d30, d8, d23
+ vqmovn.u16 d4, q0
+
+ vadd.s16 d3, d14, d28
+
+
+ vaddl.s16 q0, d12, d30
+
+ vst1.32 d30, [r9]
+
+ vmlal.s16 q0, d2, d22
+
+ vld1.32 d8, [r7], r6 @ load from temp buffer 2
+ vld1.32 d9, [r7], r6 @ load from temp buffer 3
+ vmlsl.s16 q0, d3, d23
+ subs r4, r4, #4
+ vqrshrun.s16 d10, q4, #5
+
+ vmov d12, d28
+
+ vqrshrun.s32 d0, q0, #0xa
+ vmov d6, d14
+ vmov d8, d16
+
+ vqmovn.u16 d5, q0
+
+ vtrn.32 d4, d5
+ vrhadd.u8 d4, d4, d10
+ vmov d10, d26
+ vmov d14, d30
+
+ vst1.32 d4[0], [r1], r3 @ store row 2
+ vst1.32 d4[1], [r1], r3 @ store row 3
+
+ bgt loop_4
+
+end_func:
+ vldmia sp!, {d8-d15} @ Restore neon registers that were saved
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
diff --git a/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s
new file mode 100755
index 0000000..c39ae01
--- /dev/null
+++ b/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s
@@ -0,0 +1,266 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@******************************************************************************
+@* @file
+@* ih264_inter_pred_luma_horz_qpel_a9q.s
+@*
+@* @brief
+@* Contains function definitions for inter prediction horizontal quarter pel interpolation.
+@*
+@* @author
+@* Mohit
+@*
+@* @par List of Functions:
+@*
+@* - ih264_inter_pred_luma_horz_qpe_a9ql()
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@
+
+@/**
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Quarter pel interprediction luma filter for horizontal input
+@*
+@* @par Description:
+@* Applies a 6 tap horizontal filter .The output is clipped to 8 bits
+@* sec 8.4.2.2.1 titled "Luma sample interpolation process"
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@ @param[in] pu1_tmp: temporary buffer: UNUSED in this function
+@*
+@* @param[in] dydx: x and y reference offset for qpel calculations.
+@* @returns
+@*
+@ @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@void ih264_inter_pred_luma_horz (
+@ UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ht,
+@ WORD32 wd,
+@ UWORD8* pu1_tmp,
+@ UWORD32 dydx)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r5 => ht
+@ r6 => wd
+@ r7 => dydx
+
+.text
+.p2align 2
+
+
+ .global ih264_inter_pred_luma_horz_qpel_a9q
+
+ih264_inter_pred_luma_horz_qpel_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ vstmdb sp!, {d8-d15} @push neon registers to stack
+ ldr r5, [sp, #104] @Loads ht
+ ldr r6, [sp, #108] @Loads wd
+ ldr r7, [sp, #116] @Loads dydx
+ and r7, r7, #3 @Finds x-offset
+ add r7, r0, r7, lsr #1 @pu1_src + (x_offset>>1)
+ sub r0, r0, #2 @pu1_src-2
+ vmov.i8 d0, #5 @filter coeff
+ subs r12, r6, #8 @if wd=8 branch to loop_8
+ vmov.i8 d1, #20 @filter coeff
+
+ beq loop_8
+
+ subs r12, r6, #4 @if wd=4 branch to loop_4
+ beq loop_4
+
+loop_16: @when wd=16
+ @// Processing row0 and row1
+ vld1.8 {d2, d3, d4}, [r0], r2 @// Load row0
+ vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row0)
+ vld1.8 {d5, d6, d7}, [r0], r2 @// Load row1
+ vext.8 d30, d3, d4, #5 @//extract a[5] (column2,row0)
+ vaddl.u8 q4, d31, d2 @// a0 + a5 (column1,row0)
+ vext.8 d28, d5, d6, #5 @//extract a[5] (column1,row1)
+ vaddl.u8 q5, d30, d3 @// a0 + a5 (column2,row0)
+ vext.8 d27, d6, d7, #5 @//extract a[5] (column2,row1)
+ vaddl.u8 q7, d28, d5 @// a0 + a5 (column1,row1)
+ vext.8 d31, d2, d3, #2 @//extract a[2] (column1,row0)
+ vaddl.u8 q8, d27, d6 @// a0 + a5 (column2,row1)
+ vext.8 d30, d3, d4, #2 @//extract a[2] (column2,row0)
+ vmlal.u8 q4, d31, d1 @// a0 + a5 + 20a2 (column1,row0)
+ vext.8 d28, d5, d6, #2 @//extract a[2] (column1,row1)
+ vmlal.u8 q5, d30, d1 @// a0 + a5 + 20a2 (column2,row0)
+ vext.8 d27, d6, d7, #2 @//extract a[2] (column2,row1)
+ vmlal.u8 q7, d28, d1 @// a0 + a5 + 20a2 (column1,row1)
+ vext.8 d31, d2, d3, #3 @//extract a[3] (column1,row0)
+ vmlal.u8 q8, d27, d1 @// a0 + a5 + 20a2 (column2,row1)
+ vext.8 d30, d3, d4, #3 @//extract a[3] (column2,row0)
+ vmlal.u8 q4, d31, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ vext.8 d28, d5, d6, #3 @//extract a[3] (column1,row1)
+ vmlal.u8 q5, d30, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ vext.8 d27, d6, d7, #3 @//extract a[3] (column2,row1)
+ vmlal.u8 q7, d28, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row1)
+ vext.8 d31, d2, d3, #1 @//extract a[1] (column1,row0)
+ vmlal.u8 q8, d27, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row1)
+ vext.8 d30, d3, d4, #1 @//extract a[1] (column2,row0)
+ vmlsl.u8 q4, d31, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ vext.8 d28, d5, d6, #1 @//extract a[1] (column1,row1)
+ vmlsl.u8 q5, d30, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ vext.8 d27, d6, d7, #1 @//extract a[1] (column2,row1)
+ vmlsl.u8 q7, d28, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1)
+ vext.8 d31, d2, d3, #4 @//extract a[4] (column1,row0)
+ vmlsl.u8 q8, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row1)
+ vext.8 d30, d3, d4, #4 @//extract a[4] (column2,row0)
+ vmlsl.u8 q4, d31, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ vext.8 d28, d5, d6, #4 @//extract a[4] (column1,row1)
+ vmlsl.u8 q5, d30, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ vext.8 d27, d6, d7, #4 @//extract a[4] (column2,row1)
+ vmlsl.u8 q7, d28, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1)
+ vmlsl.u8 q8, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row1)
+ vld1.32 {d12, d13}, [r7], r2 @Load value for interpolation (column1,row0)
+ vqrshrun.s16 d20, q4, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ vqrshrun.s16 d21, q5, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row2)
+ vrhadd.u8 q10, q6, q10 @Interpolation step for qpel calculation
+ vqrshrun.s16 d18, q7, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1)
+ vst1.8 {d20, d21}, [r1], r3 @//Store dest row0
+ vext.8 d30, d3, d4, #5 @//extract a[5] (column2,row2)
+ vqrshrun.s16 d19, q8, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row1)
+ vld1.32 {d12, d13}, [r7], r2 @Load value for interpolation (column1,row1)
+ vrhadd.u8 q9, q6, q9 @Interpolation step for qpel calculation
+ vst1.8 {d18, d19}, [r1], r3 @//Store dest row1
+ subs r5, r5, #2 @ 2 rows done, decrement by 2
+
+ beq end_func
+ b loop_16
+
+loop_8:
+@// Processing row0 and row1
+
+ vld1.8 {d5, d6}, [r0], r2 @// Load row1
+ vext.8 d28, d5, d6, #5 @//extract a[5] (column1,row1)
+ vld1.8 {d2, d3}, [r0], r2 @// Load row0
+ vext.8 d25, d5, d6, #2 @//extract a[2] (column1,row1)
+ vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row0)
+ vext.8 d24, d5, d6, #3 @//extract a[3] (column1,row1)
+ vext.8 d23, d5, d6, #1 @//extract a[1] (column1,row1)
+ vext.8 d22, d5, d6, #4 @//extract a[4] (column1,row1)
+ vaddl.u8 q7, d28, d5 @// a0 + a5 (column1,row1)
+ vext.8 d29, d2, d3, #3 @//extract a[3] (column1,row0)
+ vmlal.u8 q7, d25, d1 @// a0 + a5 + 20a2 (column1,row1)
+ vmlal.u8 q7, d24, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row1)
+ vmlsl.u8 q7, d23, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1)
+ vmlsl.u8 q7, d22, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1)
+ vext.8 d30, d2, d3, #2 @//extract a[2] (column1,row0)
+ vaddl.u8 q4, d31, d2 @// a0 + a5 (column1,row0)
+ vext.8 d27, d2, d3, #1 @//extract a[1] (column1,row0)
+ vext.8 d26, d2, d3, #4 @//extract a[4] (column1,row0)
+ vmlal.u8 q4, d29, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ vmlal.u8 q4, d30, d1 @// a0 + a5 + 20a2 (column1,row0)
+ vmlsl.u8 q4, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ vmlsl.u8 q4, d26, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ vqrshrun.s16 d18, q7, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ vld1.32 d12, [r7], r2 @Load value for interpolation (column1,row0)
+ vld1.32 d13, [r7], r2 @Load value for interpolation (column1,row1)
+ vqrshrun.s16 d19, q4, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1)
+ vrhadd.u8 q9, q6, q9 @Interpolation step for qpel calculation
+ vst1.8 {d18}, [r1], r3 @//Store dest row0
+ vst1.8 {d19}, [r1], r3 @//Store dest row1
+ subs r5, r5, #2 @ 2 rows done, decrement by 2
+
+ beq end_func @ Branch if height==4
+ b loop_8 @looping if height == 8 or 16
+
+loop_4:
+ vld1.8 {d5, d6}, [r0], r2 @// Load row1
+ vext.8 d28, d5, d6, #5 @//extract a[5] (column1,row1)
+ vld1.8 {d2, d3}, [r0], r2 @// Load row0
+ vext.8 d25, d5, d6, #2 @//extract a[2] (column1,row1)
+ vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row0)
+ vaddl.u8 q7, d28, d5 @// a0 + a5 (column1,row1)
+ vext.8 d24, d5, d6, #3 @//extract a[3] (column1,row1)
+ vext.8 d23, d5, d6, #1 @//extract a[1] (column1,row1)
+ vext.8 d22, d5, d6, #4 @//extract a[4] (column1,row1)
+ vext.8 d29, d2, d3, #3 @//extract a[3] (column1,row0)
+ vmlal.u8 q7, d25, d1 @// a0 + a5 + 20a2 (column1,row1)
+ vmlal.u8 q7, d24, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row1)
+ vmlsl.u8 q7, d23, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1)
+ vmlsl.u8 q7, d22, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1)
+ vaddl.u8 q4, d31, d2 @// a0 + a5 (column1,row0)
+ vext.8 d30, d2, d3, #2 @//extract a[2] (column1,row0)
+ vld1.32 d12, [r7], r2 @Load value for interpolation (column1,row0)
+ vld1.32 d13, [r7], r2 @Load value for interpolation (column1,row1)
+ vext.8 d27, d2, d3, #1 @//extract a[1] (column1,row0)
+ vext.8 d26, d2, d3, #4 @//extract a[4] (column1,row0)
+ vmlal.u8 q4, d29, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ vmlal.u8 q4, d30, d1 @// a0 + a5 + 20a2 (column1,row0)
+ vmlsl.u8 q4, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ vmlsl.u8 q4, d26, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ vqrshrun.s16 d18, q7, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ vqrshrun.s16 d19, q4, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1)
+ vrhadd.u8 q9, q6, q9 @Interpolation step for qpel calculation
+ vst1.32 d18[0], [r1], r3 @//Store dest row0
+ vst1.32 d19[0], [r1], r3 @//Store dest row1
+
+ subs r5, r5, #2 @ 2 rows done, decrement by 2
+ beq end_func
+
+ b loop_4
+
+end_func:
+ vldmia sp!, {d8-d15} @ Restore neon registers that were saved
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
diff --git a/common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s
new file mode 100755
index 0000000..565cc80
--- /dev/null
+++ b/common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s
@@ -0,0 +1,505 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@******************************************************************************
+@* @file
+@* ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s
+@*
+@* @brief
+@* Contains function definitions for inter prediction interpolation.
+@*
+@* @author
+@* Mohit
+@*
+@* @par List of Functions:
+@*
+@* - ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q()
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@
+
+@/**
+@/**
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* This function implements a two stage cascaded six tap filter. It
+@* applies the six tap filter in the vertical direction on the
+@* predictor values, followed by applying the same filter in the
+@* horizontal direction on the output of the first stage. It then averages
+@* the output of the 1st stage and the final stage to obtain the quarter
+@* pel values.The six tap filtering operation is described in sec 8.4.2.2.1
+@* titled "Luma sample interpolation process".
+@*
+@* @par Description:
+@* This function is called to obtain pixels lying at the following
+@* location (1/4,1/2) or (3/4,1/2). The function interpolates
+@* the predictors first in the verical direction and then in the
+@* horizontal direction to output the (1/2,1/2). It then averages
+@* the output of the 2nd stage and (1/2,1/2) value to obtain (1/4,1/2)
+@* or (3/4,1/2) depending on the offset.
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @param[in] pu1_tmp: temporary buffer
+@*
+@* @param[in] dydx: x and y reference offset for qpel calculations
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/;
+
+@void ih264_inter_pred_luma_horz_qpel_vert_hpel(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,,
+@ WORD32 dst_strd,
+@ WORD32 ht,
+@ WORD32 wd,
+@ UWORD8* pu1_tmp,
+@ UWORD32 dydx)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ht
+@ r5 => wd
+@ r6 => dydx
+@ r9 => *pu1_tmp
+
+.text
+.p2align 2
+
+ .global ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q
+
+ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ vstmdb sp!, {d8-d15} @push neon registers to stack
+ ldr r4, [sp, #104] @ loads ht
+ sub r0, r0, r2, lsl #1 @pu1_src-2*src_strd
+ sub r0, r0, #2 @pu1_src-2
+ ldr r5, [sp, #108] @ loads wd
+ ldr r6, [sp, #116] @ loads dydx
+ and r6, r6, #2 @ dydx & 0x3 followed by dydx>>1 and dydx<<1
+ ldr r9, [sp, #112] @pu1_tmp
+ add r7, r9, #4
+ add r6, r7, r6 @ pi16_pred1_temp += (x_offset>>1)
+
+ vmov.u16 q13, #0x14 @ Filter coeff 20 into Q13
+ vmov.u16 q12, #0x5 @ Filter coeff 5 into Q12
+ mov r7, #0x20
+ mov r8, #0x30
+ subs r12, r5, #4 @if wd=4 branch to loop_4
+ beq loop_4
+
+ subs r12, r5, #8 @if wd=8 branch to loop_8
+ beq loop_8
+
+ @when wd=16
+ vmov.u16 q14, #0x14 @ Filter coeff 20 into Q13
+ vmov.u16 q15, #0x5 @ Filter coeff 5 into Q12
+ add r14, r2, #0
+ sub r2, r2, #16
+
+
+loop_16:
+
+ vld1.u32 {q0}, [r0]! @ Vector load from src[0_0]
+ vld1.u32 d12, [r0], r2 @ Vector load from src[0_0]
+ vld1.u32 {q1}, [r0]! @ Vector load from src[1_0]
+ vld1.u32 d13, [r0], r2 @ Vector load from src[1_0]
+ vld1.u32 {q2}, [r0]! @ Vector load from src[2_0]
+ vld1.u32 d14, [r0], r2 @ Vector load from src[2_0]
+ vld1.u32 {q3}, [r0]! @ Vector load from src[3_0]
+ vld1.u32 d15, [r0], r2 @ Vector load from src[3_0]
+ vld1.u32 {q4}, [r0]! @ Vector load from src[4_0]
+ vld1.u32 d16, [r0], r2 @ Vector load from src[4_0]
+
+ vld1.u32 {q5}, [r0]! @ Vector load from src[5_0]
+ vld1.u32 d17, [r0], r2 @ Vector load from src[5_0]
+
+ vaddl.u8 q10, d4, d6
+ vaddl.u8 q9, d0, d10
+ vaddl.u8 q11, d2, d8
+ vmla.u16 q9, q10, q14
+ vaddl.u8 q12, d5, d7
+ vaddl.u8 q10, d1, d11
+ vaddl.u8 q13, d3, d9
+ vmla.u16 q10, q12, q14
+ vaddl.u8 q12, d14, d15
+ vmls.u16 q9, q11, q15
+ vaddl.u8 q11, d12, d17
+ vmls.u16 q10, q13, q15
+ vaddl.u8 q13, d13, d16
+ vmla.u16 q11, q12, q14
+ vmls.u16 q11, q13, q15
+ vst1.32 {q9}, [r9]!
+ vst1.32 {q10}, [r9]!
+ vext.16 q12, q9, q10, #2
+ vext.16 q13, q9, q10, #3
+ vst1.32 {q11}, [r9]
+ vext.16 q11, q9, q10, #5
+ vadd.s16 q0, q12, q13
+ vext.16 q12, q9, q10, #1
+ vext.16 q13, q9, q10, #4
+ vadd.s16 q12, q12, q13
+
+ vaddl.s16 q13, d18, d22
+ vmlal.s16 q13, d0, d28
+ vmlsl.s16 q13, d24, d30
+
+ vaddl.s16 q11, d19, d23
+ vmlal.s16 q11, d1, d28
+ vmlsl.s16 q11, d25, d30
+
+ vqrshrun.s32 d18, q13, #10
+ vqrshrun.s32 d19, q11, #10
+ vld1.32 {q11}, [r9]!
+ vqmovn.u16 d18, q9
+
+ vext.16 q12, q10, q11, #2
+ vext.16 q13, q10, q11, #3
+ vext.16 q0, q10, q11, #5
+ vst1.32 d18, [r1]
+ vadd.s16 q9, q12, q13
+ vext.16 q12, q10, q11, #1
+ vext.16 q13, q10, q11, #4
+ vadd.s16 q12, q12, q13
+
+ vaddl.s16 q13, d0, d20
+ vmlal.s16 q13, d18, d28
+ vmlsl.s16 q13, d24, d30
+
+ vaddl.s16 q11, d1, d21
+ vmlal.s16 q11, d19, d28
+ vmlsl.s16 q11, d25, d30
+
+ vqrshrun.s32 d18, q13, #10
+ vqrshrun.s32 d19, q11, #10
+
+ vaddl.u8 q12, d7, d9
+ vld1.32 {q10}, [r6]!
+ vld1.32 {q11}, [r6], r7
+
+ vqmovn.u16 d19, q9
+
+ vld1.32 d18, [r1]
+ vqrshrun.s16 d20, q10, #5
+ vqrshrun.s16 d21, q11, #5
+ vaddl.u8 q11, d4, d10
+ vld1.u32 {q0}, [r0]! @ Vector load from src[6_0]
+ vrhadd.u8 q9, q9, q10
+ vld1.u32 d12, [r0], r2 @ Vector load from src[6_0]
+ vaddl.u8 q10, d6, d8
+ vaddl.u8 q13, d5, d11
+ vst1.32 {q9}, [r1], r3 @ store row 0
+
+@ROW_2
+
+ vaddl.u8 q9, d2, d0
+
+ vmla.u16 q9, q10, q14
+
+ vaddl.u8 q10, d3, d1
+
+ vmla.u16 q10, q12, q14
+ vaddl.u8 q12, d15, d16
+ vmls.u16 q9, q11, q15
+ vaddl.u8 q11, d13, d12
+ vmls.u16 q10, q13, q15
+ vaddl.u8 q13, d14, d17
+ vmla.u16 q11, q12, q14
+ vmls.u16 q11, q13, q15
+ vst1.32 {q9}, [r9]!
+ vst1.32 {q10}, [r9]!
+ vext.16 q12, q9, q10, #2
+ vext.16 q13, q9, q10, #3
+ vst1.32 {q11}, [r9]
+ vext.16 q11, q9, q10, #5
+ vadd.s16 q1, q12, q13
+ vext.16 q12, q9, q10, #1
+ vext.16 q13, q9, q10, #4
+ vadd.s16 q12, q12, q13
+
+ vaddl.s16 q13, d18, d22
+ vmlal.s16 q13, d2, d28
+ vmlsl.s16 q13, d24, d30
+
+ vaddl.s16 q11, d19, d23
+ vmlal.s16 q11, d3, d28
+ vmlsl.s16 q11, d25, d30
+
+ vqrshrun.s32 d18, q13, #10
+ vqrshrun.s32 d19, q11, #10
+ vld1.32 {q11}, [r9]!
+ vqmovn.u16 d18, q9
+
+ vext.16 q12, q10, q11, #2
+ vext.16 q13, q10, q11, #3
+ vext.16 q1, q10, q11, #5
+ vst1.32 d18, [r1]
+ vadd.s16 q9, q12, q13
+ vext.16 q12, q10, q11, #1
+ vext.16 q13, q10, q11, #4
+ vadd.s16 q12, q12, q13
+
+ vaddl.s16 q13, d2, d20
+ vmlal.s16 q13, d18, d28
+ vmlsl.s16 q13, d24, d30
+
+ vaddl.s16 q11, d3, d21
+ vmlal.s16 q11, d19, d28
+ vmlsl.s16 q11, d25, d30
+
+ vqrshrun.s32 d18, q13, #10
+ vqrshrun.s32 d19, q11, #10
+ vaddl.u8 q12, d9, d11
+ vld1.32 {q10}, [r6]!
+ vld1.32 {q11}, [r6], r7
+ vqmovn.u16 d19, q9
+ vld1.32 d18, [r1]
+ vqrshrun.s16 d20, q10, #5
+ vqrshrun.s16 d21, q11, #5
+
+ vrhadd.u8 q9, q9, q10
+
+ vst1.32 {q9}, [r1], r3 @ store row 1
+
+ subs r4, r4, #2
+ subne r0, r0 , r14, lsl #2
+ subne r0, r0, r14
+
+ beq end_func @ Branch if height==4
+ b loop_16 @ Loop if height==8
+
+loop_8:
+ vld1.u32 {q0}, [r0], r2 @ Vector load from src[0_0]
+ vld1.u32 {q1}, [r0], r2 @ Vector load from src[1_0]
+ vld1.u32 {q2}, [r0], r2 @ Vector load from src[2_0]
+ vld1.u32 {q3}, [r0], r2 @ Vector load from src[3_0]
+ vld1.u32 {q4}, [r0], r2 @ Vector load from src[4_0]
+
+ vld1.u32 {q5}, [r0], r2 @ Vector load from src[5_0]
+ vaddl.u8 q7, d4, d6
+ vaddl.u8 q6, d0, d10
+ vaddl.u8 q8, d2, d8
+ vmla.u16 q6, q7, q13
+ vaddl.u8 q9, d5, d7
+ vaddl.u8 q7, d1, d11
+ vaddl.u8 q11, d3, d9
+ vmla.u16 q7, q9, q13
+ vmls.u16 q6, q8, q12
+ vld1.32 {q0}, [r0], r2 @ Vector load from src[6_0]
+ vaddl.u8 q8, d6, d8
+ vmls.u16 q7, q11, q12
+ vaddl.u8 q14, d2, d0
+ vst1.32 {q6}, [r9]! @ store row 0 to temp buffer: col 0
+ vext.16 q11, q6, q7, #5
+ vaddl.u8 q9, d4, d10
+ vmla.u16 q14, q8, q13
+ vaddl.s16 q15, d12, d22
+ vst1.32 {q7}, [r9], r7 @ store row 0 to temp buffer: col 1
+ vaddl.s16 q11, d13, d23
+ vext.16 q8, q6, q7, #2
+ vmls.u16 q14, q9, q12
+ vext.16 q9, q6, q7, #3
+ vext.16 q10, q6, q7, #4
+ vext.16 q7, q6, q7, #1
+ vadd.s16 q8, q8, q9
+ vadd.s16 q9, q7, q10
+ vaddl.u8 q10, d7, d9
+ vmlal.s16 q15, d16, d26
+ vmlsl.s16 q15, d18, d24
+ vmlal.s16 q11, d17, d26
+ vmlsl.s16 q11, d19, d24
+ vaddl.u8 q7, d3, d1
+ vst1.32 {q14}, [r9]! @ store row 1 to temp buffer: col 0
+ vmla.u16 q7, q10, q13
+ vqrshrun.s32 d12, q15, #10
+ vaddl.u8 q8, d5, d11
+ vqrshrun.s32 d13, q11, #10
+ vmls.u16 q7, q8, q12
+@ vld1.32 {q1},[r0],r2 ; Vector load from src[7_0]
+ vqmovn.u16 d25, q6
+ vaddl.u8 q8, d8, d10
+
+
+ vext.16 q11, q14, q7, #5
+ vaddl.u8 q10, d4, d2
+ vaddl.s16 q15, d28, d22
+ vmla.u16 q10, q8, q13
+ vst1.32 {q7}, [r9], r7 @ store row 1 to temp buffer: col 1
+ vaddl.s16 q11, d29, d23
+ vext.16 q8, q14, q7, #2
+ vext.16 q9, q14, q7, #3
+ vext.16 q6, q14, q7, #4
+ vext.16 q7, q14, q7, #1
+ vadd.s16 q8, q8, q9
+ vadd.s16 q9, q6, q7
+ vld1.32 {q7}, [r6], r8 @ load row 0 from temp buffer
+ vmlal.s16 q15, d16, d26
+ vmlsl.s16 q15, d18, d24
+ vmlal.s16 q11, d17, d26
+ vmlsl.s16 q11, d19, d24
+ vqrshrun.s16 d14, q7, #0x5
+ vld1.32 {q14}, [r6], r8 @ load row 1 from temp buffer
+ vaddl.u8 q9, d6, d0
+ vqrshrun.s32 d16, q15, #10
+ vqrshrun.s16 d15, q14, #0x5
+ vqrshrun.s32 d17, q11, #10
+ vmov d12, d25
+ vmov d25, d24
+
+ vqmovn.u16 d13, q8
+ vrhadd.u8 q6, q6, q7
+
+ vst1.32 d12, [r1], r3 @ store row 0
+ vst1.32 d13, [r1], r3 @ store row 1
+
+ subs r4, r4, #2
+ subne r0, r0 , r2, lsl #2
+ subne r0, r0, r2
+
+ beq end_func @ Branch if height==4
+ b loop_8 @ Loop if height==8
+
+loop_4:
+ vld1.u32 {q0}, [r0], r2 @ Vector load from src[0_0]
+ vld1.u32 {q1}, [r0], r2 @ Vector load from src[1_0]
+ vld1.u32 {q2}, [r0], r2 @ Vector load from src[2_0]
+ vld1.u32 {q3}, [r0], r2 @ Vector load from src[3_0]
+ vld1.u32 {q4}, [r0], r2 @ Vector load from src[4_0]
+ vld1.u32 {q5}, [r0], r2 @ Vector load from src[5_0]
+
+ vaddl.u8 q7, d4, d6 @ temp1 = src[2_0] + src[3_0]
+ vaddl.u8 q6, d0, d10 @ temp = src[0_0] + src[5_0]
+ vaddl.u8 q8, d2, d8 @ temp2 = src[1_0] + src[4_0]
+ vmla.u16 q6, q7, q13 @ temp += temp1 * 20
+ vaddl.u8 q9, d5, d7 @ temp1 = src[2_0] + src[3_0]
+ vaddl.u8 q7, d1, d11 @ temp = src[0_0] + src[5_0]
+ vaddl.u8 q11, d3, d9 @ temp2 = src[1_0] + src[4_0]
+ vmla.u16 q7, q9, q13 @ temp += temp1 * 20
+ vmls.u16 q6, q8, q12 @ temp -= temp2 * 5
+ vld1.32 {q0}, [r0], r2 @ Vector load from src[6_0]
+ vaddl.u8 q8, d6, d8
+ vmls.u16 q7, q11, q12 @ temp -= temp2 * 5
+ @Q6 and Q7 have filtered values
+ vaddl.u8 q14, d2, d0
+ vst1.32 {q6}, [r9]! @ store row 0 to temp buffer: col 0
+ vext.16 q11, q6, q7, #5
+ vaddl.u8 q9, d4, d10
+ vmla.u16 q14, q8, q13
+ vaddl.s16 q15, d12, d22
+ vst1.32 {q7}, [r9], r7 @ store row 0 to temp buffer: col 1
+ vaddl.s16 q11, d13, d23
+ vext.16 q8, q6, q7, #2
+ vmls.u16 q14, q9, q12
+ vext.16 q9, q6, q7, #3
+ vext.16 q10, q6, q7, #4
+ vext.16 q7, q6, q7, #1
+ vadd.s16 q8, q8, q9
+ vadd.s16 q9, q7, q10
+ vaddl.u8 q10, d7, d9
+ vmlal.s16 q15, d16, d26
+ vmlsl.s16 q15, d18, d24
+ vmlal.s16 q11, d17, d26
+ vmlsl.s16 q11, d19, d24
+ vaddl.u8 q7, d3, d1
+ vst1.32 {q14}, [r9]! @ store row 1 to temp buffer: col 0
+ vmla.u16 q7, q10, q13
+ vqrshrun.s32 d12, q15, #10
+ vaddl.u8 q8, d5, d11
+ vqrshrun.s32 d13, q11, #10
+ vmls.u16 q7, q8, q12
+ vqmovn.u16 d25, q6
+ vaddl.u8 q8, d8, d10
+
+ vext.16 q11, q14, q7, #5
+ vaddl.u8 q10, d4, d2
+ vaddl.s16 q15, d28, d22
+ vmla.u16 q10, q8, q13
+ vst1.32 {q7}, [r9], r7 @ store row 1 to temp buffer: col 1
+ vaddl.s16 q11, d29, d23
+ vext.16 q8, q14, q7, #2
+ vext.16 q9, q14, q7, #3
+ vext.16 q6, q14, q7, #4
+ vext.16 q7, q14, q7, #1
+ vadd.s16 q8, q8, q9
+ vadd.s16 q9, q6, q7
+ vld1.32 d14, [r6], r8 @load row 0 from temp buffer
+ vmlal.s16 q15, d16, d26
+ vmlsl.s16 q15, d18, d24
+ vmlal.s16 q11, d17, d26
+ vmlsl.s16 q11, d19, d24
+ vqrshrun.s16 d14, q7, #0x5
+ vld1.32 d28, [r6], r8 @load row 1 from temp buffer
+ vaddl.u8 q9, d6, d0
+ vqrshrun.s32 d16, q15, #10
+ vqrshrun.s16 d15, q14, #0x5
+ vqrshrun.s32 d17, q11, #10
+ vmov d12, d25
+ vmov d25, d24
+
+ vqmovn.u16 d13, q8
+ vrhadd.u8 q6, q6, q7
+ vst1.32 d12[0], [r1], r3 @ store row 0
+ vst1.32 d13[0], [r1], r3 @store row 1
+
+ subs r4, r4, #2
+ subne r0, r0 , r2, lsl #2
+ subne r0, r0, r2
+
+ beq end_func @ Branch if height==4
+ b loop_4 @ Loop if height==8
+
+end_func:
+ vldmia sp!, {d8-d15} @ Restore neon registers that were saved
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
diff --git a/common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s
new file mode 100755
index 0000000..3c8b60a
--- /dev/null
+++ b/common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s
@@ -0,0 +1,355 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@******************************************************************************
+@* @file
+@* ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s
+@*
+@* @brief
+@* Contains function definitions for inter prediction interpolation.
+@*
+@* @author
+@* Mohit
+@*
+@* @par List of Functions:
+@*
+@* - ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q()
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@
+
+@/**
+@/**
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* This function implements two six tap filters. It
+@* applies the six tap filter in the horizontal direction on the
+@* predictor values, then applies the same filter in the
+@* vertical direction on the predictor values. It then averages these
+@* two outputs to obtain quarter pel values in horizontal and vertical direction.
+@* The six tap filtering operation is described in sec 8.4.2.2.1 titled
+@* "Luma sample interpolation process"
+@*
+@* @par Description:
+@* This function is called to obtain pixels lying at the following
+@* location (1/4,1/4) or (3/4,1/4) or (1/4,3/4) or (3/4,3/4).
+@* The function interpolates the predictors first in the horizontal direction
+@* and then in the vertical direction, and then averages these two
+@* values.
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @param[in] pu1_tmp: temporary buffer
+@*
+@* @param[in] dydx: x and y reference offset for qpel calculations
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/;
+
+@void ih264_inter_pred_luma_horz_qpel_vert_qpel(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,,
+@ WORD32 dst_strd,
+@ WORD32 ht,
+@ WORD32 wd,
+@ UWORD8* pu1_tmp,
+@ UWORD32 dydx)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ht
+@ r5 => wd
+@ r6 => dydx
+
+.text
+.p2align 2
+
+ .global ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q
+
+ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ vstmdb sp!, {d8-d15} @push neon registers to stack
+ ldr r4, [sp, #104] @ loads ht
+ ldr r5, [sp, #108] @ loads wd
+ ldr r6, [sp, #116] @dydx
+ and r7, r6, #3
+ add r7, r0, r7, lsr #1 @pu1_pred_vert = pu1_src + (x_offset>>1)
+
+ and r6, r6, #12 @Finds y-offset
+ lsr r6, r6, #3 @dydx>>3
+ mul r6, r2, r6
+ add r6, r0, r6 @pu1_pred_horz = pu1_src + (y_offset>>1)*src_strd
+ sub r7, r7, r2, lsl #1 @pu1_pred_vert-2*src_strd
+ sub r6, r6, #2 @pu1_pred_horz-2
+ vmov.u8 d30, #20 @ Filter coeff 20
+ vmov.u8 d31, #5 @ Filter coeff 5
+
+ subs r12, r5, #4 @if wd=4 branch to loop_4
+ beq loop_4
+ subs r12, r5, #8 @if wd=8 branch to loop_8
+ beq loop_8
+
+loop_16:
+ vld1.32 {q0}, [r7], r2 @ Vector load from src[0_0]
+ vld1.32 {q1}, [r7], r2 @ Vector load from src[1_0]
+ vld1.32 {q2}, [r7], r2 @ Vector load from src[2_0]
+ vld1.32 {q3}, [r7], r2 @ Vector load from src[3_0]
+ vld1.32 {q4}, [r7], r2 @ Vector load from src[4_0]
+ add r11, r6, #8
+ vld1.32 {q5}, [r7], r2 @ Vector load from src[5_0]
+ vld1.32 {q9}, [r6], r2 @ horz row0, col 0
+ vaddl.u8 q12, d0, d10
+ vmlal.u8 q12, d4, d30
+ vmlal.u8 q12, d6, d30
+ vmlsl.u8 q12, d2, d31
+ vmlsl.u8 q12, d8, d31
+ vext.8 d23, d18, d19, #5
+ vext.8 d20, d18, d19, #2
+ vext.8 d21, d18, d19, #3
+ vext.8 d22, d18, d19, #4
+ vext.8 d19, d18, d19, #1
+ vqrshrun.s16 d26, q12, #5
+ vaddl.u8 q14, d18, d23
+ vmlal.u8 q14, d20, d30
+ vmlal.u8 q14, d21, d30
+ vmlsl.u8 q14, d19, d31
+ vmlsl.u8 q14, d22, d31
+ vld1.32 {q9}, [r11], r2 @ horz row 0, col 1
+ vaddl.u8 q12, d1, d11
+ vmlal.u8 q12, d5, d30
+ vmlal.u8 q12, d7, d30
+ vmlsl.u8 q12, d3, d31
+ vmlsl.u8 q12, d9, d31
+ vqrshrun.s16 d28, q14, #5
+ vext.8 d23, d18, d19, #5
+ vext.8 d20, d18, d19, #2
+ vext.8 d21, d18, d19, #3
+ vext.8 d22, d18, d19, #4
+ vext.8 d19, d18, d19, #1
+ vqrshrun.s16 d27, q12, #5
+ vld1.32 {q6}, [r7], r2 @ src[6_0]
+
+ vaddl.u8 q12, d18, d23
+ vmlal.u8 q12, d20, d30
+ vmlal.u8 q12, d21, d30
+ vmlsl.u8 q12, d19, d31
+ vmlsl.u8 q12, d22, d31
+
+ vaddl.u8 q8, d2, d12
+ vmlal.u8 q8, d6, d30
+ vmlal.u8 q8, d8, d30
+ vmlsl.u8 q8, d4, d31
+ vmlsl.u8 q8, d10, d31
+ vqrshrun.s16 d29, q12, #5
+ vld1.32 {q9}, [r6], r2 @ horz row 1, col 0
+
+ vaddl.u8 q12, d3, d13
+ vmlal.u8 q12, d7, d30
+ vmlal.u8 q12, d9, d30
+ vmlsl.u8 q12, d5, d31
+ vmlsl.u8 q12, d11, d31
+ vrhadd.u8 q14, q14, q13
+ vqrshrun.s16 d26, q8, #5
+ vext.8 d23, d18, d19, #5
+ vext.8 d20, d18, d19, #2
+ vext.8 d21, d18, d19, #3
+ vext.8 d22, d18, d19, #4
+ vst1.32 {q14}, [r1], r3 @ store row 0
+ vext.8 d19, d18, d19, #1
+ vqrshrun.s16 d27, q12, #5
+
+ vaddl.u8 q14, d18, d23
+ vmlal.u8 q14, d20, d30
+ vmlal.u8 q14, d21, d30
+ vmlsl.u8 q14, d19, d31
+ vmlsl.u8 q14, d22, d31
+
+ vld1.32 {q9}, [r11], r2 @ horz row 1, col 1
+
+ vext.8 d23, d18, d19, #5
+ vext.8 d20, d18, d19, #2
+ vext.8 d21, d18, d19, #3
+ vext.8 d22, d18, d19, #4
+ vext.8 d19, d18, d19, #1
+
+ vqrshrun.s16 d28, q14, #5
+ vaddl.u8 q12, d18, d23
+ vmlal.u8 q12, d20, d30
+ vmlal.u8 q12, d21, d30
+ vmlsl.u8 q12, d19, d31
+ vmlsl.u8 q12, d22, d31
+
+ vqrshrun.s16 d29, q12, #5
+ vrhadd.u8 q14, q14, q13
+ vst1.32 {q14}, [r1], r3 @ store row 1
+
+ subs r4, r4, #2 @ 2 rows processed, decrement by 2
+ subne r7, r7 , r2, lsl #2
+ subne r7, r7, r2
+ beq end_func @ Branch if height==4
+
+ b loop_16 @ looping if height = 8 or 16
+
+
+loop_8:
+ vld1.32 d0, [r7], r2 @ Vector load from src[0_0]
+ vld1.32 d1, [r7], r2 @ Vector load from src[1_0]
+ vld1.32 d2, [r7], r2 @ Vector load from src[2_0]
+ vld1.32 d3, [r7], r2 @ Vector load from src[3_0]
+ vld1.32 d4, [r7], r2 @ Vector load from src[4_0]
+ vld1.32 d5, [r7], r2 @ Vector load from src[5_0]
+ vaddl.u8 q5, d0, d5
+ vmlal.u8 q5, d2, d30
+ vmlal.u8 q5, d3, d30
+ vmlsl.u8 q5, d1, d31
+ vmlsl.u8 q5, d4, d31
+ vld1.32 {q6}, [r6], r2 @horz row 0
+ vext.8 d17, d12, d13, #5
+ vext.8 d14, d12, d13, #2
+ vext.8 d15, d12, d13, #3
+ vext.8 d16, d12, d13, #4
+ vext.8 d13, d12, d13, #1
+ vqrshrun.s16 d26, q5, #5
+ vld1.32 d6, [r7], r2 @ src[6_0]
+ vaddl.u8 q5, d12, d17
+ vmlal.u8 q5, d14, d30
+ vmlal.u8 q5, d15, d30
+ vmlsl.u8 q5, d13, d31
+ vmlsl.u8 q5, d16, d31
+ vld1.32 {q6}, [r6], r2 @ horz row 1
+ vaddl.u8 q9, d1, d6
+ vmlal.u8 q9, d3, d30
+ vmlal.u8 q9, d4, d30
+ vmlsl.u8 q9, d2, d31
+ vmlsl.u8 q9, d5, d31
+ vqrshrun.s16 d28, q5, #5
+ vext.8 d17, d12, d13, #5
+ vext.8 d14, d12, d13, #2
+ vext.8 d15, d12, d13, #3
+ vext.8 d16, d12, d13, #4
+ vext.8 d13, d12, d13, #1
+ vqrshrun.s16 d27, q9, #5
+ vaddl.u8 q5, d12, d17
+ vmlal.u8 q5, d14, d30
+ vmlal.u8 q5, d15, d30
+ vmlsl.u8 q5, d13, d31
+ vmlsl.u8 q5, d16, d31
+ vqrshrun.s16 d29, q5, #5
+ vrhadd.u8 q13, q13, q14
+ vst1.32 d26, [r1], r3
+ vst1.32 d27, [r1], r3
+
+ subs r4, r4, #2 @ 2 rows processed, decrement by 2
+ subne r7, r7 , r2, lsl #2
+ subne r7, r7, r2
+ beq end_func @ Branch if height==4
+ b loop_8 @looping if height == 8 or 16
+
+loop_4:
+ vld1.32 d0[0], [r7], r2 @ Vector load from src[0_0]
+ vld1.32 d1[0], [r7], r2 @ Vector load from src[1_0]
+ vld1.32 d2[0], [r7], r2 @ Vector load from src[2_0]
+ vld1.32 d3[0], [r7], r2 @ Vector load from src[3_0]
+ vld1.32 d4[0], [r7], r2 @ Vector load from src[4_0]
+ vld1.32 d5[0], [r7], r2 @ Vector load from src[5_0]
+ vaddl.u8 q5, d0, d5
+ vmlal.u8 q5, d2, d30
+ vmlal.u8 q5, d3, d30
+ vmlsl.u8 q5, d1, d31
+ vmlsl.u8 q5, d4, d31
+ vld1.32 {q6}, [r6], r2 @load for horz filter row 0
+ vext.8 d17, d12, d13, #5
+ vext.8 d14, d12, d13, #2
+ vext.8 d15, d12, d13, #3
+ vext.8 d16, d12, d13, #4
+ vext.8 d13, d12, d13, #1
+ vqrshrun.s16 d26, q5, #5
+ vld1.32 d6[0], [r7], r2 @ Vector load from src[6_0]
+ vaddl.u8 q5, d12, d17
+ vmlal.u8 q5, d14, d30
+ vmlal.u8 q5, d15, d30
+ vmlsl.u8 q5, d13, d31
+ vmlsl.u8 q5, d16, d31
+ vld1.32 {q6}, [r6], r2 @horz row 1
+ vaddl.u8 q9, d1, d6
+ vmlal.u8 q9, d3, d30
+ vmlal.u8 q9, d4, d30
+ vmlsl.u8 q9, d2, d31
+ vmlsl.u8 q9, d5, d31
+ vqrshrun.s16 d28, q5, #5
+ vext.8 d17, d12, d13, #5
+ vext.8 d14, d12, d13, #2
+ vext.8 d15, d12, d13, #3
+ vext.8 d16, d12, d13, #4
+ vext.8 d13, d12, d13, #1
+ vqrshrun.s16 d27, q9, #5
+ vaddl.u8 q5, d12, d17
+ vmlal.u8 q5, d14, d30
+ vmlal.u8 q5, d15, d30
+ vmlsl.u8 q5, d13, d31
+ vmlsl.u8 q5, d16, d31
+ vqrshrun.s16 d29, q5, #5
+ vrhadd.u8 q13, q13, q14
+ vst1.32 d26[0], [r1], r3
+ vst1.32 d27[0], [r1], r3
+
+ subs r4, r4, #2 @ 2 rows processed, decrement by 2
+ subne r7, r7 , r2, lsl #2
+ subne r7, r7, r2
+ beq end_func @ Branch if height==4
+ b loop_4 @ Loop if height==8
+end_func:
+ vldmia sp!, {d8-d15} @ Restore neon registers that were saved
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
diff --git a/common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s
new file mode 100755
index 0000000..d45055e
--- /dev/null
+++ b/common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s
@@ -0,0 +1,330 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@******************************************************************************
+@* @file
+@* ih264_inter_pred_luma_vert_qpel_a9q.s
+@*
+@* @brief
+@* Contains function definitions for inter prediction vertical quarter pel interpolation.
+@*
+@* @author
+@* Mohit
+@*
+@* @par List of Functions:
+@*
+@* - ih264_inter_pred_luma_vert_qpel_a9q()
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@
+
+@/**
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Quarter pel interprediction luma filter for vertical input
+@*
+@* @par Description:
+@* Applies a 6 tap horizontal filter .The output is clipped to 8 bits
+@* sec 8.4.2.2.1 titled "Luma sample interpolation process"
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @param[in] pu1_tmp: temporary buffer: UNUSED in this function
+@*
+@* @param[in] dydx: x and y reference offset for qpel calculations.
+@* @returns
+@*
+@ @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@void ih264_inter_pred_luma_vert (
+@ UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ht,
+@ WORD32 wd,
+@ UWORD8* pu1_tmp,
+@ UWORD32 dydx)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r5 => ht
+@ r6 => wd
+@ r7 => dydx
+
+.text
+.p2align 2
+
+ .global ih264_inter_pred_luma_vert_qpel_a9q
+
+ih264_inter_pred_luma_vert_qpel_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ vstmdb sp!, {d8-d15} @push neon registers to stack
+ ldr r5, [sp, #104] @Loads ht
+
+ ldr r6, [sp, #108] @Loads wd
+ ldr r7, [sp, #116] @Loads dydx
+ and r7, r7, #12 @Finds y-offset
+ lsr r7, r7, #3 @dydx>>3
+ mul r7, r2, r7
+ add r7, r0, r7 @pu1_src + (y_offset>>1)*src_strd
+ vmov.u16 q11, #20 @ Filter coeff 0x14 into Q11
+ sub r0, r0, r2, lsl #1 @pu1_src-2*src_strd
+ subs r12, r6, #8 @if wd=8 branch to loop_8
+ vmov.u16 q12, #5 @ Filter coeff 0x5 into Q12
+ beq loop_8
+
+ subs r12, r6, #4 @if wd=4 branch to loop_4
+ beq loop_4
+
+loop_16: @when wd=16
+
+ vld1.u32 {q0}, [r0], r2 @ Vector load from src[0_0]
+ vld1.u32 {q1}, [r0], r2 @ Vector load from src[1_0]
+ vld1.u32 {q2}, [r0], r2 @ Vector load from src[2_0]
+ vld1.u32 {q3}, [r0], r2 @ Vector load from src[3_0]
+ vld1.u32 {q4}, [r0], r2 @ Vector load from src[4_0]
+ vaddl.u8 q6, d4, d6 @ temp1 = src[2_0] + src[3_0]
+ vld1.u32 {q5}, [r0], r2 @ Vector load from src[5_0]
+ vaddl.u8 q7, d0, d10 @ temp = src[0_0] + src[5_0]
+ vaddl.u8 q8, d2, d8 @ temp2 = src[1_0] + src[4_0]
+ vmla.u16 q7, q6, q11 @ temp += temp1 * 20
+ vaddl.u8 q10, d1, d11 @ temp4 = src[0_8] + src[5_8]
+ vaddl.u8 q9, d5, d7 @ temp3 = src[2_8] + src[3_8]
+ vmla.u16 q10, q9, q11 @ temp4 += temp3 * 20
+ vld1.u32 {q0}, [r0], r2
+ vaddl.u8 q13, d3, d9 @ temp5 = src[1_8] + src[4_8]
+ vaddl.u8 q6, d6, d8
+ vmls.u16 q7, q8, q12 @ temp -= temp2 * 5
+ vaddl.u8 q8, d2, d0
+ vaddl.u8 q9, d4, d10
+ vmla.u16 q8, q6, q11
+ vmls.u16 q10, q13, q12 @ temp4 -= temp5 * 5
+ vaddl.u8 q13, d5, d11
+ vaddl.u8 q6, d7, d9
+ vqrshrun.s16 d30, q7, #5 @ dst[0_0] = CLIP_U8((temp +16) >> 5)
+ vaddl.u8 q7, d3, d1
+ vld1.u32 {q1}, [r0], r2
+ vmla.u16 q7, q6, q11
+ vmls.u16 q8, q9, q12
+ vqrshrun.s16 d31, q10, #5 @ dst[0_8] = CLIP_U8((temp4 +16) >> 5)
+ vld1.u32 {q10}, [r7], r2 @ Load for interpolation row 0
+ vrhadd.u8 q15, q10, q15 @ Interpolation to obtain qpel value
+ vaddl.u8 q9, d4, d2
+ vaddl.u8 q6, d8, d10
+
+ vst1.u32 {q15}, [r1], r3 @ Vector store to dst[0_0]
+ vmla.u16 q9, q6, q11
+ vaddl.u8 q10, d6, d0
+ vmls.u16 q7, q13, q12
+ vqrshrun.s16 d30, q8, #5
+ vaddl.u8 q6, d9, d11
+ vaddl.u8 q8, d5, d3
+ vaddl.u8 q13, d7, d1
+ vmla.u16 q8, q6, q11
+ vmls.u16 q9, q10, q12
+ vld1.u32 {q2}, [r0], r2
+
+ vqrshrun.s16 d31, q7, #5
+ vld1.u32 {q7}, [r7], r2 @ Load for interpolation row 1
+ vaddl.u8 q6, d10, d0
+ vrhadd.u8 q15, q7, q15 @ Interpolation to obtain qpel value
+ vaddl.u8 q7, d6, d4
+ vaddl.u8 q10, d8, d2
+ vmla.u16 q7, q6, q11
+ vmls.u16 q8, q13, q12
+ vst1.u32 {q15}, [r1], r3 @store row 1
+ vqrshrun.s16 d30, q9, #5
+ vaddl.u8 q9, d7, d5
+ vaddl.u8 q6, d11, d1
+ vmla.u16 q9, q6, q11
+ vaddl.u8 q13, d9, d3
+ vmls.u16 q7, q10, q12
+ vqrshrun.s16 d31, q8, #5
+ vld1.u32 {q8}, [r7], r2 @ Load for interpolation row 2
+ vmls.u16 q9, q13, q12
+ vrhadd.u8 q15, q8, q15 @ Interpolation to obtain qpel value
+ vaddl.u8 q6, d0, d2 @ temp1 = src[2_0] + src[3_0]
+ vst1.u32 {q15}, [r1], r3 @store row 2
+ vaddl.u8 q8, d10, d4 @ temp2 = src[1_0] + src[4_0]
+ vaddl.u8 q10, d9, d7 @ temp4 = src[0_8] + src[5_8]
+ vqrshrun.s16 d30, q7, #5
+ vaddl.u8 q13, d5, d11 @ temp5 = src[1_8] + src[4_8]
+ vaddl.u8 q7, d8, d6 @ temp = src[0_0] + src[5_0]
+ vqrshrun.s16 d31, q9, #5
+ vld1.u32 {q9}, [r7], r2 @ Load for interpolation row 3
+ vmla.u16 q7, q6, q11 @ temp += temp1 * 20
+ vrhadd.u8 q15, q9, q15 @ Interpolation to obtain qpel value
+ vaddl.u8 q9, d1, d3 @ temp3 = src[2_8] + src[3_8]
+ vst1.u32 {q15}, [r1], r3 @store row 3
+ subs r5, r5, #4 @ 4 rows processed, decrement by 4
+ subne r0, r0 , r2, lsl #2
+ subne r0, r0, r2
+ beq end_func @ Branch if height==4
+
+ b loop_16 @ looping if height = 8 or 16
+
+
+loop_8:
+
+ @// Processing row0 and row1
+ vld1.u32 d0, [r0], r2 @ Vector load from src[0_0]
+ vld1.u32 d1, [r0], r2 @ Vector load from src[1_0]
+ vld1.u32 d2, [r0], r2 @ Vector load from src[2_0]
+ vld1.u32 d3, [r0], r2 @ Vector load from src[3_0]
+ vld1.u32 d4, [r0], r2 @ Vector load from src[4_0]
+ vld1.u32 d5, [r0], r2 @ Vector load from src[5_0]
+
+ vaddl.u8 q3, d2, d3 @ temp1 = src[2_0] + src[3_0]
+ vaddl.u8 q4, d0, d5 @ temp = src[0_0] + src[5_0]
+ vaddl.u8 q5, d1, d4 @ temp2 = src[1_0] + src[4_0]
+ vmla.u16 q4, q3, q11 @ temp += temp1 * 20
+ vld1.u32 d6, [r0], r2
+ vaddl.u8 q7, d3, d4
+ vaddl.u8 q8, d1, d6
+ vaddl.u8 q9, d2, d5
+ vmls.u16 q4, q5, q12 @ temp -= temp2 * 5
+ vmla.u16 q8, q7, q11
+ vld1.u32 d7, [r0], r2
+ vaddl.u8 q10, d4, d5
+ vaddl.u8 q6, d2, d7
+ vaddl.u8 q5, d3, d6
+ vmls.u16 q8, q9, q12
+ vqrshrun.s16 d26, q4, #5 @ dst[0_0] = CLIP_U8( (temp + 16) >> 5)
+ vmla.u16 q6, q10, q11
+ vld1.32 d8, [r7], r2 @Load value for interpolation (row0)
+ vld1.32 d9, [r7], r2 @Load value for interpolation (row1)
+ vld1.u32 d0, [r0], r2
+ vaddl.u8 q7, d5, d6
+ vqrshrun.s16 d27, q8, #5
+ vrhadd.u8 q13, q4, q13 @ Interpolation step for qpel calculation
+ vaddl.u8 q10, d3, d0
+ vmls.u16 q6, q5, q12
+ vst1.u32 d26, [r1], r3 @ Vector store to dst[0_0]
+ vaddl.u8 q9, d4, d7
+ vmla.u16 q10, q7, q11
+ vst1.u32 d27, [r1], r3 @ Vector store to dst[1_0]
+ vqrshrun.s16 d28, q6, #5
+ vmls.u16 q10, q9, q12
+ vld1.32 d12, [r7], r2 @Load value for interpolation (row2)
+ vld1.32 d13, [r7], r2 @Load value for interpolation (row3)
+ vqrshrun.s16 d29, q10, #5
+ subs r9, r5, #4
+ vrhadd.u8 q14, q6, q14
+ vst1.u32 d28, [r1], r3 @store row 2
+ vst1.u32 d29, [r1], r3 @store row 3
+
+ subs r5, r5, #4 @ 4 rows processed, decrement by 4
+ subne r0, r0 , r2, lsl #2
+ subne r0, r0, r2
+ beq end_func @ Branch if height==4
+ b loop_8 @looping if height == 8 or 16
+
+loop_4:
+@// Processing row0 and row1
+
+ vld1.u32 d0[0], [r0], r2 @ Vector load from src[0_0]
+ vld1.u32 d1[0], [r0], r2 @ Vector load from src[1_0]
+ vld1.u32 d2[0], [r0], r2 @ Vector load from src[2_0]
+ vld1.u32 d3[0], [r0], r2 @ Vector load from src[3_0]
+ vld1.u32 d4[0], [r0], r2 @ Vector load from src[4_0]
+ vld1.u32 d5[0], [r0], r2 @ Vector load from src[5_0]
+
+ vaddl.u8 q3, d2, d3 @ temp1 = src[2_0] + src[3_0]
+ vaddl.u8 q4, d0, d5 @ temp = src[0_0] + src[5_0]
+ vaddl.u8 q5, d1, d4 @ temp2 = src[1_0] + src[4_0]
+ vmla.u16 q4, q3, q11 @ temp += temp1 * 20
+ vld1.u32 d6, [r0], r2
+ vaddl.u8 q7, d3, d4
+ vaddl.u8 q8, d1, d6
+ vaddl.u8 q9, d2, d5
+ vmls.u16 q4, q5, q12 @ temp -= temp2 * 5
+ vld1.u32 d7[0], [r0], r2
+ vmla.u16 q8, q7, q11
+ vaddl.u8 q10, d4, d5
+ vaddl.u8 q6, d2, d7
+ vaddl.u8 q5, d3, d6
+ vmls.u16 q8, q9, q12
+ vqrshrun.s16 d26, q4, #5 @ dst[0_0] = CLIP_U8( (temp + 16) >> 5)
+ vld1.u32 d8[0], [r7], r2 @Load value for interpolation - row 0
+ vld1.u32 d9[0], [r7], r2 @Load value for interpolation - row 1
+ vmla.u16 q6, q10, q11
+ vld1.u32 d0[0], [r0], r2
+ vaddl.u8 q7, d5, d6
+ vqrshrun.s16 d27, q8, #5
+ vaddl.u8 q10, d3, d0
+ vrhadd.u8 q13, q13, q4 @Interpolation step for qpel calculation
+ vmls.u16 q6, q5, q12
+ vst1.u32 d26[0], [r1], r3 @ Vector store to dst[0_0]
+ vaddl.u8 q9, d4, d7
+ vmla.u16 q10, q7, q11
+ vst1.u32 d27[0], [r1], r3 @ store row 1
+ vqrshrun.s16 d28, q6, #5
+ vld1.u32 d12[0], [r7], r2 @Load value for interpolation - row 2
+ vld1.u32 d13[0], [r7], r2 @Load value for interpolation - row 3
+
+ vmls.u16 q10, q9, q12
+ vqrshrun.s16 d29, q10, #5
+ vrhadd.u8 q14, q6, q14 @Interpolation step for qpel calculation
+ vst1.u32 d28[0], [r1], r3 @store row 2
+ vst1.u32 d29[0], [r1], r3 @store row 3
+
+ subs r5, r5, #8
+ subeq r0, r0, r2, lsl #2
+ subeq r0, r0, r2
+ beq loop_4 @ Loop if height==8
+
+end_func:
+ vldmia sp!, {d8-d15} @ Restore neon registers that were saved
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
diff --git a/common/arm/ih264_intra_pred_chroma_a9q.s b/common/arm/ih264_intra_pred_chroma_a9q.s
new file mode 100755
index 0000000..d03fc55
--- /dev/null
+++ b/common/arm/ih264_intra_pred_chroma_a9q.s
@@ -0,0 +1,551 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@******************************************************************************
+@* @file
+@* ih264_intra_pred_chroma_a9q.s
+@*
+@* @brief
+@* Contains function definitions for intra chroma prediction .
+@*
+@* @author
+@* Ittiam
+@*
+@* @par List of Functions:
+@*
+@* - ih264_intra_pred_chroma_mode_horz_a9q()
+@* - ih264_intra_pred_chroma_8x8_mode_vert_a9q()
+@* - ih264_intra_pred_chroma_mode_dc_a9q()
+@* - ih264_intra_pred_chroma_mode_plane_a9q()
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@/* All the functions here are replicated from ih264_chroma_intra_pred_filters.c
+@
+
+@/**
+@/**
+@/**
+@
+.text
+.p2align 2
+
+ .extern ih264_gai1_intrapred_chroma_plane_coeffs1
+.hidden ih264_gai1_intrapred_chroma_plane_coeffs1
+ .extern ih264_gai1_intrapred_chroma_plane_coeffs2
+.hidden ih264_gai1_intrapred_chroma_plane_coeffs2
+scratch_chroma_intrapred_addr1:
+ .long ih264_gai1_intrapred_chroma_plane_coeffs1 - scrlblc1 - 8
+
+scratch_intrapred_chroma_plane_addr1:
+ .long ih264_gai1_intrapred_chroma_plane_coeffs2 - scrlblc2 - 8
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_chroma_8x8_mode_dc
+@*
+@* @brief
+@* Perform Intra prediction for chroma_8x8 mode:DC
+@*
+@* @par Description:
+@* Perform Intra prediction for chroma_8x8 mode:DC ,described in sec 8.3.4.1
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source containing alternate U and V samples
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination with alternate U and V samples
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@** @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_chroma_8x8_mode_dc(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+
+ .global ih264_intra_pred_chroma_8x8_mode_dc_a9q
+
+ih264_intra_pred_chroma_8x8_mode_dc_a9q:
+
+ stmfd sp!, {r4, r14} @store register values to stack
+ ldr r4, [sp, #8] @r4 => ui_neighboravailability
+ vpush {d8-d15}
+
+ ands r2, r4, #0x01 @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE
+ beq top_available
+ ands r2, r4, #0x04 @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE
+ beq left_available
+
+ vld1.u8 {q0}, [r0] @BOTH LEFT AND TOP AVAILABLE
+ add r0, r0, #18
+ vld1.u8 {q1}, [r0]
+ vaddl.u8 q2, d1, d2
+ vaddl.u8 q3, d0, d3
+ vmovl.u8 q1, d3
+ vmovl.u8 q0, d0
+
+ vadd.u16 d12, d4, d5
+ vadd.u16 d13, d2, d3
+ vadd.u16 d15, d6, d7
+ vadd.u16 d14, d0, d1
+
+ vpadd.u32 d12, d12, d15
+ vpadd.u32 d14, d13, d14
+ vqrshrun.s16 d12, q6, #3
+ vqrshrun.s16 d14, q7, #2
+ vdup.u16 d8, d12[0]
+ vdup.u16 d9, d14[0]
+ vdup.u16 d10, d14[1]
+ vdup.u16 d11, d12[1]
+ b str_pred
+
+top_available: @ONLY TOP AVAILABLE
+ ands r2, r4, #0x04 @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE
+ beq none_available
+
+ add r0, r0, #18
+ vld1.u8 {q0}, [r0]
+ vmovl.u8 q1, d0
+ vmovl.u8 q2, d1
+ vadd.u16 d0, d2, d3
+ vadd.u16 d1, d4, d5
+ vpaddl.u32 q0, q0
+ vqrshrun.s16 d0, q0, #2
+ vdup.u16 d8, d0[0]
+ vdup.u16 d9, d0[2]
+ vmov q5, q4
+ b str_pred
+
+left_available: @ONLY LEFT AVAILABLE
+ vld1.u8 {q0}, [r0]
+ vmovl.u8 q1, d0
+ vmovl.u8 q2, d1
+ vadd.u16 d0, d2, d3
+ vadd.u16 d1, d4, d5
+ vpaddl.u32 q0, q0
+ vqrshrun.s16 d0, q0, #2
+ vdup.u16 q5, d0[0]
+ vdup.u16 q4, d0[2]
+ b str_pred
+
+none_available: @NONE AVAILABLE
+ vmov.u8 q4, #128
+ vmov.u8 q5, #128
+
+str_pred:
+ vst1.8 {q4}, [r1], r3
+ vst1.8 {q4}, [r1], r3
+ vst1.8 {q4}, [r1], r3
+ vst1.8 {q4}, [r1], r3
+ vst1.8 {q5}, [r1], r3
+ vst1.8 {q5}, [r1], r3
+ vst1.8 {q5}, [r1], r3
+ vst1.8 {q5}, [r1], r3
+
+ vpop {d8-d15}
+ ldmfd sp!, {r4, pc} @Restoring registers from stack
+
+
+
+@/******************************************************************************
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_chroma_8x8_mode_horz
+@*
+@* @brief
+@* Perform Intra prediction for chroma_8x8 mode:Horizontal
+@*
+@* @par Description:
+@* Perform Intra prediction for chroma_8x8 mode:Horizontal ,described in sec 8.3.4.2
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source containing alternate U and V samples
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination with alternate U and V samples
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels(Not used in this function)
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@void ih264_intra_pred_chroma_8x8_mode_horz(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_chroma_8x8_mode_horz_a9q
+
+ih264_intra_pred_chroma_8x8_mode_horz_a9q:
+
+ stmfd sp!, {r14} @store register values to stack
+
+ vld1.u8 {q0}, [r0]
+ mov r2, #6
+
+ vdup.u16 q1, d1[3]
+ vdup.u16 q2, d1[2]
+ vst1.8 {q1}, [r1], r3
+
+loop_8x8_horz:
+ vext.8 q0, q0, q0, #12
+ vst1.8 {q2}, [r1], r3
+ vdup.u16 q1, d1[3]
+ subs r2, #2
+ vdup.u16 q2, d1[2]
+ vst1.8 {q1}, [r1], r3
+ bne loop_8x8_horz
+
+ vext.8 q0, q0, q0, #12
+ vst1.8 {q2}, [r1], r3
+
+ ldmfd sp!, {pc} @restoring registers from stack
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_chroma_8x8_mode_vert
+@*
+@* @brief
+@* Perform Intra prediction for chroma_8x8 mode:vertical
+@*
+@* @par Description:
+@*Perform Intra prediction for chroma_8x8 mode:vertical ,described in sec 8.3.4.3
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source containing alternate U and V samples
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination with alternate U and V samples
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels(Not used in this function)
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@void ih264_intra_pred_chroma_8x8_mode_vert(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_chroma_8x8_mode_vert_a9q
+
+ih264_intra_pred_chroma_8x8_mode_vert_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+
+ add r0, r0, #18
+ vld1.8 {q0}, [r0]
+
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+@/******************************************************************************
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_chroma_8x8_mode_plane
+@*
+@* @brief
+@* Perform Intra prediction for chroma_8x8 mode:PLANE
+@*
+@* @par Description:
+@* Perform Intra prediction for chroma_8x8 mode:PLANE ,described in sec 8.3.4.4
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source containing alternate U and V samples
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination with alternate U and V samples
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_chroma_8x8_mode_plane(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+ .global ih264_intra_pred_chroma_8x8_mode_plane_a9q
+ih264_intra_pred_chroma_8x8_mode_plane_a9q:
+
+ stmfd sp!, {r4-r10, r12, lr}
+ vpush {d8-d15}
+
+
+ vld1.32 d0, [r0]
+ add r10, r0, #10
+ vld1.32 d1, [r10]
+ add r10, r10, #6
+ vrev64.16 d5, d0
+ vld1.32 d2, [r10]!
+ add r10, r10, #2
+ vrev64.16 d7, d2
+ vld1.32 d3, [r10]
+ sub r5, r3, #8
+ ldr r12, scratch_chroma_intrapred_addr1
+scrlblc1:
+ add r12, r12, pc
+ vsubl.u8 q5, d5, d1
+ vld1.64 {q4}, [r12] @ Load multiplication factors 1 to 8 into D3
+ vsubl.u8 q6, d3, d7
+ vmul.s16 q7, q5, q4
+ vmul.s16 q8, q6, q4
+ vuzp.16 q7, q8
+
+ vpadd.s16 d14, d14
+ vpadd.s16 d15, d15
+ vpadd.s16 d16, d16
+ vpadd.s16 d17, d17
+ vpadd.s16 d14, d14
+ vpadd.s16 d15, d15
+ vpadd.s16 d16, d16
+ vpadd.s16 d17, d17
+
+ mov r6, #34
+ vdup.16 q9, r6
+
+ vmull.s16 q11, d14, d18
+ vmull.s16 q12, d15, d18
+ vmull.s16 q13, d16, d18
+ vmull.s16 q14, d17, d18
+
+ vrshrn.s32 d10, q11, #6
+ vrshrn.s32 d12, q12, #6
+ vrshrn.s32 d13, q13, #6
+ vrshrn.s32 d14, q14, #6
+
+
+ ldrb r6, [r0], #1
+ add r10, r0, #31
+ ldrb r8, [r0], #1
+ ldrb r7, [r10], #1
+ ldrb r9, [r10], #1
+
+ add r6, r6, r7
+ add r8, r8, r9
+ lsl r6, r6, #4
+ lsl r8, r8, #4
+
+ vdup.16 q0, r6
+ vdup.16 q1, r8
+ vdup.16 q2, d12[0]
+ vdup.16 q3, d10[0]
+
+ vdup.16 q12, d14[0]
+ vdup.16 q13, d13[0]
+ vzip.16 q2, q12
+ vzip.16 q3, q13
+ vzip.16 q0, q1
+
+ ldr r12, scratch_intrapred_chroma_plane_addr1
+scrlblc2:
+ add r12, r12, pc
+ vld1.64 {q4}, [r12]
+ vmov.16 q5, q4
+ vmov q11, q4
+ vzip.16 q4, q5
+
+ vmul.s16 q6, q2, q4
+ vmul.s16 q8, q2, q5
+ vadd.s16 q6, q0, q6
+ vadd.s16 q8, q0, q8
+
+
+ vdup.16 q10, d22[0]
+ vmul.s16 q2, q3, q10
+ vdup.16 q15, d22[1]
+ vmul.s16 q9, q3, q10
+ vmul.s16 q7, q3, q15
+ vmul.s16 q4, q3, q15
+ vadd.s16 q12, q6, q2
+ vadd.s16 q0, q8, q9
+ vadd.s16 q1, q6, q7
+ vqrshrun.s16 d28, q12, #5
+ vadd.s16 q13, q8, q4
+ vqrshrun.s16 d29, q0, #5
+ vdup.16 q10, d22[2]
+ vst1.8 {q14}, [r1], r3
+ vqrshrun.s16 d28, q1, #5
+ vqrshrun.s16 d29, q13, #5
+ vmul.s16 q2, q3, q10
+ vmul.s16 q9, q3, q10
+ vst1.8 {q14}, [r1], r3
+ vadd.s16 q12, q6, q2
+ vadd.s16 q0, q8, q9
+ vdup.16 q15, d22[3]
+ vqrshrun.s16 d28, q12, #5
+ vqrshrun.s16 d29, q0, #5
+ vmul.s16 q7, q3, q15
+ vmul.s16 q4, q3, q15
+ vst1.8 {q14}, [r1], r3
+ vadd.s16 q1, q6, q7
+ vadd.s16 q13, q8, q4
+ vdup.16 q10, d23[0]
+ vqrshrun.s16 d28, q1, #5
+ vqrshrun.s16 d29, q13, #5
+ vmul.s16 q2, q3, q10
+ vmul.s16 q9, q3, q10
+ vst1.8 {q14}, [r1], r3
+ vadd.s16 q12, q6, q2
+ vadd.s16 q0, q8, q9
+ vdup.16 q15, d23[1]
+ vqrshrun.s16 d28, q12, #5
+ vqrshrun.s16 d29, q0, #5
+ vmul.s16 q7, q3, q15
+ vmul.s16 q4, q3, q15
+ vst1.8 {q14}, [r1], r3
+ vadd.s16 q1, q6, q7
+ vadd.s16 q13, q8, q4
+ vdup.16 q10, d23[2]
+ vqrshrun.s16 d28, q1, #5
+ vqrshrun.s16 d29, q13, #5
+ vmul.s16 q2, q3, q10
+ vmul.s16 q9, q3, q10
+ vst1.8 {q14}, [r1], r3
+ vadd.s16 q12, q6, q2
+ vadd.s16 q0, q8, q9
+ vdup.16 q15, d23[3]
+ vqrshrun.s16 d28, q12, #5
+ vqrshrun.s16 d29, q0, #5
+ vmul.s16 q7, q3, q15
+ vmul.s16 q4, q3, q15
+ vst1.8 {q14}, [r1], r3
+ vadd.s16 q1, q6, q7
+ vadd.s16 q13, q8, q4
+ vqrshrun.s16 d28, q1, #5
+ vqrshrun.s16 d29, q13, #5
+ vst1.8 {q14}, [r1], r3
+
+
+
+end_func_plane:
+
+
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r10, r12, pc}
+
+
+
+
diff --git a/common/arm/ih264_intra_pred_luma_16x16_a9q.s b/common/arm/ih264_intra_pred_luma_16x16_a9q.s
new file mode 100755
index 0000000..e38e203
--- /dev/null
+++ b/common/arm/ih264_intra_pred_luma_16x16_a9q.s
@@ -0,0 +1,520 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@******************************************************************************
+@* @file
+@* ih264_intra_pred_luma_16x16_a9q.s
+@*
+@* @brief
+@* Contains function definitions for intra 16x16 Luma prediction .
+@*
+@* @author
+@* Ittiam
+@*
+@* @par List of Functions:
+@*
+@* - ih264_intra_pred_luma_16x16_mode_vert_a9q()
+@* - ih264_intra_pred_luma_16x16_mode_horz_a9q()
+@* - ih264_intra_pred_luma_16x16_mode_dc_a9q()
+@* - ih264_intra_pred_luma_16x16_mode_plane_a9q()
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@/* All the functions here are replicated from ih264_intra_pred_filters.c
+@
+
+@/**
+@/**
+@/**
+@
+
+.text
+.p2align 2
+
+
+ .extern ih264_gai1_intrapred_luma_plane_coeffs
+.hidden ih264_gai1_intrapred_luma_plane_coeffs
+scratch_intrapred_addr1:
+ .long ih264_gai1_intrapred_luma_plane_coeffs - scrlbl1 - 8
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_16x16_mode_vert_a9q
+@*
+@* @brief
+@* Perform Intra prediction for luma_16x16 mode:vertical
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_16x16 mode:Vertical ,described in sec 8.3.3.1
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels(Not used in this function)
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@void ih264_intra_pred_luma_16x16_mode_vert(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_16x16_mode_vert_a9q
+
+ih264_intra_pred_luma_16x16_mode_vert_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+
+ add r0, r0, #17
+ vld1.8 {q0}, [r0]
+
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
+@/******************************************************************************
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_16x16_mode_horz_a9q
+@*
+@* @brief
+@* Perform Intra prediction for luma_16x16 mode:horizontal
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_16x16 mode:horizontal ,described in sec 8.3.3.2
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels(Not used in this function)
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@void ih264_intra_pred_luma_16x16_mode_horz(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+ .global ih264_intra_pred_luma_16x16_mode_horz_a9q
+
+ih264_intra_pred_luma_16x16_mode_horz_a9q:
+
+ stmfd sp!, {r14} @store register values to stack
+
+ vld1.u8 {q0}, [r0]
+ mov r2, #14
+
+ vdup.u8 q1, d1[7]
+ vdup.u8 q2, d1[6]
+ vst1.8 {q1}, [r1], r3
+
+loop_16x16_horz:
+ vext.8 q0, q0, q0, #14
+ vst1.8 {q2}, [r1], r3
+ vdup.u8 q1, d1[7]
+ subs r2, #2
+ vdup.u8 q2, d1[6]
+ vst1.8 {q1}, [r1], r3
+ bne loop_16x16_horz
+
+ vext.8 q0, q0, q0, #14
+ vst1.8 {q2}, [r1], r3
+
+ ldmfd sp!, {pc} @Restoring registers from stack
+
+
+
+
+@/******************************************************************************
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_16x16_mode_dc_a9q
+@*
+@* @brief
+@* Perform Intra prediction for luma_16x16 mode:DC
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_16x16 mode:DC ,described in sec 8.3.3.3
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_16x16_mode_dc(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+ .global ih264_intra_pred_luma_16x16_mode_dc_a9q
+
+ih264_intra_pred_luma_16x16_mode_dc_a9q:
+
+ stmfd sp!, {r4, r14} @store register values to stack
+ ldr r4, [sp, #8] @r4 => ui_neighboravailability
+
+ ands r2, r4, #0x01 @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE
+ beq top_available
+ ands r2, r4, #0x04 @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE
+ beq left_available
+
+ vld1.u8 {q0}, [r0] @BOTH LEFT AND TOP AVAILABLE
+ add r0, r0, #17
+ vpaddl.u8 q0, q0
+ vld1.u8 {q1}, [r0]
+ vpaddl.u8 q1, q1
+ vadd.u16 q0, q0, q1
+ vadd.u16 d0, d0, d1
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
+ vqrshrun.s16 d0, q0, #5
+ vdup.u8 q0, d0[0]
+ b str_pred
+
+top_available: @ONLY TOP AVAILABLE
+ ands r2, r4, #0x04 @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE
+ beq none_available
+
+ add r0, r0, #17
+ vld1.u8 {q0}, [r0]
+ vpaddl.u8 q0, q0
+ vadd.u16 d0, d0, d1
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
+ vqrshrun.s16 d0, q0, #4
+ vdup.u8 q0, d0[0]
+ b str_pred
+
+left_available: @ONLY LEFT AVAILABLE
+ vld1.u8 {q0}, [r0]
+ vpaddl.u8 q0, q0
+ vadd.u16 d0, d0, d1
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
+ vqrshrun.s16 d0, q0, #4
+ vdup.u8 q0, d0[0]
+ b str_pred
+
+none_available: @NONE AVAILABLE
+ vmov.u8 q0, #128
+
+str_pred:
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+ vst1.8 {q0}, [r1], r3
+
+ ldmfd sp!, {r4, pc} @Restoring registers from stack
+
+
+
+
+
+@/******************************************************************************
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_16x16_mode_plane_a9q
+@*
+@* @brief
+@* Perform Intra prediction for luma_16x16 mode:PLANE
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_16x16 mode:PLANE ,described in sec 8.3.3.4
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_16x16_mode_plane(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+ .global ih264_intra_pred_luma_16x16_mode_plane_a9q
+ih264_intra_pred_luma_16x16_mode_plane_a9q:
+
+ stmfd sp!, {r4-r10, r12, lr}
+
+ mov r2, r1
+ add r1, r0, #17
+ add r0, r0, #15
+
+ mov r8, #9
+ sub r1, r1, #1
+ mov r10, r1 @top_left
+ mov r4, #-1
+ vld1.32 d2, [r1], r8
+ ldr r7, scratch_intrapred_addr1
+scrlbl1:
+ add r7, r7, pc
+
+ vld1.32 d0, [r1]
+ vrev64.8 d2, d2
+ vld1.32 {q3}, [r7]
+ vsubl.u8 q0, d0, d2
+ vmovl.u8 q8, d6
+ vmul.s16 q0, q0, q8
+ vmovl.u8 q9, d7
+
+ add r7, r0, r4, lsl #3
+ sub r0, r7, r4, lsl #1
+ rsb lr, r4, #0x0
+
+ vpadd.s16 d0, d0, d1
+
+ ldrb r8, [r7], r4
+ ldrb r9, [r0], lr
+
+ vpaddl.s16 d0, d0
+ sub r12, r8, r9
+
+ ldrb r8, [r7], r4
+
+ vpaddl.s32 d0, d0
+ ldrb r9, [r0], lr
+ sub r8, r8, r9
+ vshl.s32 d2, d0, #2
+ add r12, r12, r8, lsl #1
+
+ vadd.s32 d0, d0, d2
+ ldrb r8, [r7], r4
+ ldrb r9, [r0], lr
+ vrshr.s32 d0, d0, #6 @ i_b = D0[0]
+ sub r8, r8, r9
+ ldrb r5, [r7], r4
+ add r8, r8, r8, lsl #1
+
+ vdup.16 q2, d0[0]
+ add r12, r12, r8
+ ldrb r9, [r0], lr
+ vmul.s16 q0, q2, q8
+ sub r5, r5, r9
+ vmul.s16 q1, q2, q9
+ add r12, r12, r5, lsl #2
+
+ ldrb r8, [r7], r4
+ ldrb r9, [r0], lr
+ sub r8, r8, r9
+ ldrb r5, [r7], r4
+ add r8, r8, r8, lsl #2
+ ldrb r6, [r0], lr
+ add r12, r12, r8
+ ldrb r8, [r7], r4
+ ldrb r9, [r0], lr
+
+ sub r5, r5, r6
+ sub r8, r8, r9
+ add r5, r5, r5, lsl #1
+ rsb r8, r8, r8, lsl #3
+ add r12, r12, r5, lsl #1
+ ldrb r5, [r7], r4
+ ldrb r6, [r10] @top_left
+ add r12, r12, r8
+ sub r9, r5, r6
+ ldrb r6, [r1, #7]
+ add r12, r12, r9, lsl #3 @ i_c = r12
+ add r8, r5, r6
+
+ add r12, r12, r12, lsl #2
+ lsl r8, r8, #4 @ i_a = r8
+
+ add r12, r12, #0x20
+ lsr r12, r12, #6
+
+ vshl.s16 q14, q2, #3
+ vdup.16 q3, r12
+
+ vdup.16 q15, r8
+ vshl.s16 q13, q3, #3
+ vsub.s16 q15, q15, q14
+ vsub.s16 q15, q15, q13
+ vadd.s16 q14, q15, q3
+
+ mov r0, #14
+ vadd.s16 q13, q14, q0
+ vadd.s16 q14, q14, q1
+ vqrshrun.s16 d20, q13, #5
+ vqrshrun.s16 d21, q14, #5
+
+loop_16x16_plane:
+
+ vadd.s16 q13, q13, q3
+ vadd.s16 q14, q14, q3
+ vqrshrun.s16 d22, q13, #5
+ vst1.32 {q10}, [r2], r3
+ vqrshrun.s16 d23, q14, #5
+
+ vadd.s16 q13, q13, q3
+ subs r0, #2
+ vadd.s16 q14, q14, q3
+ vqrshrun.s16 d20, q13, #5
+ vst1.32 {q11}, [r2], r3
+ vqrshrun.s16 d21, q14, #5
+ bne loop_16x16_plane
+
+ vadd.s16 q13, q13, q3
+ vadd.s16 q14, q14, q3
+ vqrshrun.s16 d22, q13, #5
+ vst1.32 {q10}, [r2], r3
+ vqrshrun.s16 d23, q14, #5
+ vst1.32 {q11}, [r2], r3
+
+ ldmfd sp!, {r4-r10, r12, pc}
+
+
+
diff --git a/common/arm/ih264_intra_pred_luma_4x4_a9q.s b/common/arm/ih264_intra_pred_luma_4x4_a9q.s
new file mode 100755
index 0000000..cb386ea
--- /dev/null
+++ b/common/arm/ih264_intra_pred_luma_4x4_a9q.s
@@ -0,0 +1,842 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@******************************************************************************
+@* @file
+@* ih264_intra_pred_luma_4x4_a9q.s
+@*
+@* @brief
+@* Contains function definitions for intra 4x4 Luma prediction .
+@*
+@* @author
+@* Ittiam
+@*
+@* @par List of Functions:
+@*
+@* -ih264_intra_pred_luma_4x4_mode_vert_a9q
+@* -ih264_intra_pred_luma_4x4_mode_horz_a9q
+@* -ih264_intra_pred_luma_4x4_mode_dc_a9q
+@* -ih264_intra_pred_luma_4x4_mode_diag_dl_a9q
+@* -ih264_intra_pred_luma_4x4_mode_diag_dr_a9q
+@* -ih264_intra_pred_luma_4x4_mode_vert_r_a9q
+@* -ih264_intra_pred_luma_4x4_mode_horz_d_a9q
+@* -ih264_intra_pred_luma_4x4_mode_vert_l_a9q
+@* -ih264_intra_pred_luma_4x4_mode_horz_u_a9q
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@/* All the functions here are replicated from ih264_intra_pred_filters.c
+@
+
+@/**
+@/**
+@/**
+@
+
+.text
+.p2align 2
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_4x4_mode_vert
+@*
+@* @brief
+@* Perform Intra prediction for luma_4x4 mode:vertical
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_4x4 mode:vertical ,described in sec 8.3.1.2.1
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels(Not used in this function)
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@void ih264_intra_pred_luma_4x4_mode_vert(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+ .global ih264_intra_pred_luma_4x4_mode_vert_a9q
+
+ih264_intra_pred_luma_4x4_mode_vert_a9q:
+
+
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+
+ add r0, r0, #5
+
+ vld1.32 d0[0], [r0]
+
+ vst1.32 d0[0], [r1], r3
+ vst1.32 d0[0], [r1], r3
+ vst1.32 d0[0], [r1], r3
+ vst1.32 d0[0], [r1], r3
+
+
+
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
+@/******************************************************************************
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_4x4_mode_horz
+@*
+@* @brief
+@* Perform Intra prediction for luma_4x4 mode:horizontal
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_4x4 mode:horizontal ,described in sec 8.3.1.2.2
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels(Not used in this function)
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@void ih264_intra_pred_luma_4x4_mode_horz(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+
+ .global ih264_intra_pred_luma_4x4_mode_horz_a9q
+
+ih264_intra_pred_luma_4x4_mode_horz_a9q:
+
+
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ add r0, r0, #3
+ mov r2 , #-1
+
+ ldrb r5, [r0], r2
+ vdup.u8 d0, r5
+ ldrb r6, [r0], r2
+ vst1.32 d0[0], [r1], r3
+ vdup.u8 d1, r6
+ ldrb r7, [r0], r2
+ vst1.32 d1[0], [r1], r3
+ vdup.u8 d2, r7
+ ldrb r8, [r0], r2
+ vst1.32 d2[0], [r1], r3
+ vdup.u8 d3, r8
+ vst1.32 d3[0], [r1], r3
+
+
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
+
+
+@/******************************************************************************
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_4x4_mode_dc
+@*
+@* @brief
+@* Perform Intra prediction for luma_4x4 mode:DC
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_4x4 mode:DC ,described in sec 8.3.1.2.3
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_4x4_mode_dc(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+
+ .global ih264_intra_pred_luma_4x4_mode_dc_a9q
+
+ih264_intra_pred_luma_4x4_mode_dc_a9q:
+
+
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ ldr r4, [sp, #40] @ r4 => ui_neighboravailability
+
+ ands r5, r4, #0x01
+ beq top_available @LEFT NOT AVAILABLE
+
+ add r10, r0, #3
+ mov r2, #-1
+ ldrb r5, [r10], r2
+ ldrb r6, [r10], r2
+ ldrb r7, [r10], r2
+ add r5, r5, r6
+ ldrb r8, [r10], r2
+ add r5, r5, r7
+ ands r11, r4, #0x04 @ CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE
+ add r5, r5, r8
+ beq left_available
+ add r10, r0, #5
+ @ BOTH LEFT AND TOP AVAILABLE
+ ldrb r6, [r10], #1
+ ldrb r7, [r10], #1
+ add r5, r5, r6
+ ldrb r8, [r10], #1
+ add r5, r5, r7
+ ldrb r9, [r10], #1
+ add r5, r5, r8
+ add r5, r5, r9
+ add r5, r5, #4
+ lsr r5, r5, #3
+ vdup.u8 d0, r5
+ vst1.32 d0[0], [r1], r3
+ vst1.32 d0[0], [r1], r3
+ vst1.32 d0[0], [r1], r3
+ vst1.32 d0[0], [r1], r3
+ b end_func
+
+top_available: @ ONLT TOP AVAILABLE
+ ands r11, r4, #0x04 @ CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE
+ beq none_available
+
+ add r10, r0, #5
+ ldrb r6, [r10], #1
+ ldrb r7, [r10], #1
+ ldrb r8, [r10], #1
+ add r5, r6, r7
+ ldrb r9, [r10], #1
+ add r5, r5, r8
+ add r5, r5, r9
+ add r5, r5, #2
+ lsr r5, r5, #2
+ vdup.u8 d0, r5
+ vst1.32 d0[0], [r1], r3
+ vst1.32 d0[0], [r1], r3
+ vst1.32 d0[0], [r1], r3
+ vst1.32 d0[0], [r1], r3
+ b end_func
+
+left_available: @ONLY LEFT AVAILABLE
+ add r5, r5, #2
+ lsr r5, r5, #2
+ vdup.u8 d0, r5
+ vst1.32 d0[0], [r1], r3
+ vst1.32 d0[0], [r1], r3
+ vst1.32 d0[0], [r1], r3
+ vst1.32 d0[0], [r1], r3
+ b end_func
+
+none_available: @NONE AVAILABLE
+ mov r5, #128
+ vdup.u8 d0, r5
+ vst1.32 d0[0], [r1], r3
+ vst1.32 d0[0], [r1], r3
+ vst1.32 d0[0], [r1], r3
+ vst1.32 d0[0], [r1], r3
+ b end_func
+
+
+end_func:
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_4x4_mode_diag_dl
+@*
+@* @brief
+@* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left ,described in sec 8.3.1.2.4
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_4x4_mode_diag_dl(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_4x4_mode_diag_dl_a9q
+
+ih264_intra_pred_luma_4x4_mode_diag_dl_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+
+ add r0, r0, #5
+ sub r5, r3, #2
+ add r6, r0, #7
+ vld1.8 {d0}, [r0]
+ vext.8 d1, d0, d0, #1
+ vext.8 d2, d0, d0, #2
+ vld1.8 {d2[6]}, [r6]
+ vaddl.u8 q10, d0, d1
+ vaddl.u8 q11, d1, d2
+ vadd.u16 q12, q10, q11
+ vqrshrun.s16 d3, q12, #2
+ vst1.32 {d3[0]}, [r1], r3
+ vext.8 d4, d3, d3, #1
+ vst1.32 {d4[0]}, [r1], r3
+ vst1.16 {d3[1]}, [r1]!
+ vst1.16 {d3[2]}, [r1], r5
+ vst1.16 {d4[1]}, [r1]!
+ vst1.16 {d4[2]}, [r1]
+
+end_func_diag_dl:
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_4x4_mode_diag_dr
+@*
+@* @brief
+@* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right ,described in sec 8.3.1.2.5
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_4x4_mode_diag_dr(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_4x4_mode_diag_dr_a9q
+
+ih264_intra_pred_luma_4x4_mode_diag_dr_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+
+
+ vld1.u8 {d0}, [r0]
+ add r0, r0, #1
+ vld1.u8 {d1}, [r0]
+ vext.8 d2, d1, d1, #1
+ vaddl.u8 q10, d0, d1
+ vaddl.u8 q11, d1, d2
+ vadd.u16 q12, q10, q11
+ vqrshrun.s16 d3, q12, #2
+
+ vext.8 d4, d3, d3, #1
+ sub r5, r3, #2
+ vst1.16 {d4[1]}, [r1]!
+ vst1.16 {d4[2]}, [r1], r5
+ vst1.16 {d3[1]}, [r1]!
+ vst1.16 {d3[2]}, [r1], r5
+ vst1.32 {d4[0]}, [r1], r3
+ vst1.32 {d3[0]}, [r1], r3
+
+end_func_diag_dr:
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_4x4_mode_vert_r
+@*
+@* @brief
+@* Perform Intra prediction for luma_4x4 mode:Vertical_Right
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_4x4 mode:Vertical_Right ,described in sec 8.3.1.2.6
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_4x4_mode_vert_r(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_4x4_mode_vert_r_a9q
+
+ih264_intra_pred_luma_4x4_mode_vert_r_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+
+
+ vld1.u8 {d0}, [r0]
+ add r0, r0, #1
+ vld1.u8 {d1}, [r0]
+ vext.8 d2, d1, d1, #1
+ vaddl.u8 q10, d0, d1
+ vaddl.u8 q11, d1, d2
+ vadd.u16 q12, q10, q11
+ vqrshrun.s16 d4, q10, #1
+ vqrshrun.s16 d3, q12, #2
+ sub r5, r3, #2
+ vext.8 d5, d3, d3, #3
+ vst1.32 {d4[1]}, [r1], r3
+ vst1.32 {d5[0]}, [r1], r3
+ sub r8, r3, #3
+ vst1.u8 {d3[2]}, [r1]!
+ vst1.16 {d4[2]}, [r1]!
+ vst1.u8 {d4[6]}, [r1], r8
+ vst1.u8 {d3[1]}, [r1]!
+ vst1.16 {d5[0]}, [r1]!
+ vst1.u8 {d5[2]}, [r1]
+
+
+end_func_vert_r:
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_4x4_mode_horz_d
+@*
+@* @brief
+@* Perform Intra prediction for luma_4x4 mode:Horizontal_Down
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_4x4 mode:Horizontal_Down ,described in sec 8.3.1.2.7
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_4x4_mode_horz_d(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_4x4_mode_horz_d_a9q
+
+ih264_intra_pred_luma_4x4_mode_horz_d_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+
+ vld1.u8 {d0}, [r0]
+ add r0, r0, #1
+ vld1.u8 {d1}, [r0]
+ vext.8 d2, d1, d0, #1
+ vaddl.u8 q10, d0, d1
+ vaddl.u8 q11, d1, d2
+ vadd.u16 q12, q10, q11
+ vqrshrun.s16 d4, q10, #1
+ vqrshrun.s16 d5, q12, #2
+ sub r5, r3, #2
+ vmov.8 d6, d5
+ vtrn.8 d4, d5 @
+ vst1.u16 {d5[1]}, [r1]!
+ vst1.16 {d6[2]}, [r1], r5
+ vst1.u16 {d4[1]}, [r1]!
+ vst1.16 {d5[1]}, [r1], r5
+ vst1.u16 {d5[0]}, [r1]!
+ vst1.16 {d4[1]}, [r1], r5
+ vst1.u16 {d4[0]}, [r1]!
+ vst1.16 {d5[0]}, [r1], r5
+
+end_func_horz_d:
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_4x4_mode_vert_l
+@*
+@* @brief
+@* Perform Intra prediction for luma_4x4 mode:Vertical_Left
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_4x4 mode:Vertical_Left ,described in sec 8.3.1.2.8
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_4x4_mode_vert_l(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_4x4_mode_vert_l_a9q
+
+ih264_intra_pred_luma_4x4_mode_vert_l_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ add r0, r0, #4
+ vld1.u8 {d0}, [r0]
+ add r0, r0, #1
+ vld1.u8 {d1}, [r0]
+ vext.8 d2, d1, d0, #1
+ vaddl.u8 q10, d0, d1
+ vaddl.u8 q11, d1, d2
+ vadd.u16 q12, q10, q11
+ vqrshrun.s16 d4, q10, #1
+ vqrshrun.s16 d5, q12, #2
+ vext.8 d6, d4, d4, #1
+ vext.8 d7, d5, d5, #1
+ vst1.32 {d6[0]}, [r1], r3
+ vext.8 d16, d4, d4, #2
+ vext.8 d17, d5, d5, #2
+ vst1.32 {d7[0]}, [r1], r3
+ vst1.32 {d16[0]}, [r1], r3
+ vst1.32 {d17[0]}, [r1], r3
+
+
+
+end_func_vert_l:
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_4x4_mode_horz_u
+@*
+@* @brief
+@* Perform Intra prediction for luma_4x4 mode:Horizontal_Up
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_4x4 mode:Horizontal_Up ,described in sec 8.3.1.2.9
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_4x4_mode_horz_u(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_4x4_mode_horz_u_a9q
+
+ih264_intra_pred_luma_4x4_mode_horz_u_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ mov r10, r0
+ vld1.u8 {d0}, [r0]
+ ldrb r9, [r0], #1
+ vext.8 d1, d0, d0, #1
+ vld1.u8 {d0[7]}, [r10]
+ vext.8 d2, d1, d1, #1
+ vaddl.u8 q10, d0, d1
+ vaddl.u8 q11, d1, d2
+ vadd.u16 q12, q10, q11
+ vqrshrun.s16 d4, q10, #1
+ vqrshrun.s16 d5, q12, #2
+ vmov d6, d4
+ vext.8 d6, d5, d4, #1
+ vst1.8 {d4[2]}, [r1]!
+ vst1.8 {d6[0]}, [r1]!
+ vtrn.8 d6, d5 @
+ sub r5, r3, #2
+ vtrn.8 d4, d6 @
+ vdup.8 d7, r9
+ vst1.16 {d6[0]}, [r1], r5
+ vst1.16 {d6[0]}, [r1]!
+ vst1.16 {d5[3]}, [r1], r5
+ vst1.16 {d5[3]}, [r1]!
+ vst1.16 {d7[3]}, [r1], r5
+ vst1.32 {d7[0]}, [r1], r3
+
+end_func_horz_u:
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
diff --git a/common/arm/ih264_intra_pred_luma_8x8_a9q.s b/common/arm/ih264_intra_pred_luma_8x8_a9q.s
new file mode 100755
index 0000000..6da1c95
--- /dev/null
+++ b/common/arm/ih264_intra_pred_luma_8x8_a9q.s
@@ -0,0 +1,1037 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@******************************************************************************
+@* @file
+@* ih264_intra_pred_luma_8x8_a9q.s
+@*
+@* @brief
+@* Contains function definitions for intra 8x8 Luma prediction .
+@*
+@* @author
+@* Ittiam
+@*
+@* @par List of Functions:
+@*
+@* -ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q
+@* -ih264_intra_pred_luma_8x8_mode_vert_a9q
+@* -ih264_intra_pred_luma_8x8_mode_horz_a9q
+@* -ih264_intra_pred_luma_8x8_mode_dc_a9q
+@* -ih264_intra_pred_luma_8x8_mode_diag_dl_a9q
+@* -ih264_intra_pred_luma_8x8_mode_diag_dr_a9q
+@* -ih264_intra_pred_luma_8x8_mode_vert_r_a9q
+@* -ih264_intra_pred_luma_8x8_mode_horz_d_a9q
+@* -ih264_intra_pred_luma_8x8_mode_vert_l_a9q
+@* -ih264_intra_pred_luma_8x8_mode_horz_u_a9q
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+
+@/* All the functions here are replicated from ih264_intra_pred_filters.c
+@
+
+@/**
+@/**
+@/**
+@
+
+
+.text
+.p2align 2
+
+ .extern ih264_gai1_intrapred_luma_8x8_horz_u
+.hidden ih264_gai1_intrapred_luma_8x8_horz_u
+scratch_intrapred_addr_8x8:
+ .long ih264_gai1_intrapred_luma_8x8_horz_u - scrlb8x8l2 - 8
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_8x8_mode_ref_filtering
+@*
+@* @brief
+@* Reference sample filtering process for Intra_8x8 sample prediction
+@*
+@* @par Description:
+@* Perform Reference sample filtering process for Intra_8x8 sample prediction ,described in sec 8.3.2.2.1
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride [Not used]
+@*
+@* @param[in] dst_strd
+@* integer destination stride[Not used]
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels[Not used]
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_8x8_mode_ref_filtering(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+
+
+ .global ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q
+
+ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ vpush {d8-d15}
+
+
+ vld1.u8 {q0}, [r0]! @
+ vld1.u8 {q1}, [r0]
+ add r0, r0, #8 @
+ vext.8 q2, q0, q1, #1
+ vext.8 q3, q1, q1, #1
+ vext.8 q4, q2, q3, #1
+ vext.8 q5, q3, q3, #1
+ vld1.8 {d10[7]}, [r0] @ LOADING SRC[24] AGIN TO THE END FOR p'[ 15, -1 ] = ( p[ 14, -1 ] + 3 * p[ 15, -1 ] + 2 ) >> 2
+ vaddl.u8 q10, d0, d4
+ vaddl.u8 q7, d0, d0 @ SPECIAL CASE FOR p'[ -1 ,7 ] = ( p[ -1, 6 ] + 3 * p[ -1, 7 ] + 2 ) >> 2
+ vadd.u16 q7, q10, q7
+ vaddl.u8 q11, d1, d5
+ vqrshrun.s16 d14, q7, #2
+ vaddl.u8 q12, d4, d8
+ vaddl.u8 q13, d5, d9
+ vst1.8 {d14[0]}, [r1]!
+ vadd.u16 q12, q10, q12
+ vadd.u16 q13, q11, q13
+ vaddl.u8 q9, d2, d6
+ vaddl.u8 q8, d6, d10
+ vqrshrun.s16 d4, q12, #2
+ vqrshrun.s16 d5, q13, #2
+ vadd.u16 q6, q8, q9
+ vst1.8 {q2}, [r1]!
+ vqrshrun.s16 d6, q6, #2
+ vst1.8 {d6}, [r1]
+
+
+end_func_ref_filt:
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_8x8_mode_vert
+@*
+@* @brief
+@* Perform Intra prediction for luma_8x8 mode:vertical
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_8x8 mode:vertical ,described in sec 8.3.2.2.2
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels(Not used in this function)
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@void ih264_intra_pred_luma_8x8_mode_vert(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_8x8_mode_vert_a9q
+
+ih264_intra_pred_luma_8x8_mode_vert_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+
+ add r0, r0, #9
+ vld1.8 d0, [r0]
+
+ vst1.8 d0, [r1], r3
+ vst1.8 d0, [r1], r3
+ vst1.8 d0, [r1], r3
+ vst1.8 d0, [r1], r3
+ vst1.8 d0, [r1], r3
+ vst1.8 d0, [r1], r3
+ vst1.8 d0, [r1], r3
+ vst1.8 d0, [r1], r3
+
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
+@/******************************************************************************
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_8x8_mode_horz
+@*
+@* @brief
+@* Perform Intra prediction for luma_8x8 mode:horizontal
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_8x8 mode:horizontal ,described in sec 8.3.2.2.2
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels(Not used in this function)
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@void ih264_intra_pred_luma_8x8_mode_horz(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_8x8_mode_horz_a9q
+
+ih264_intra_pred_luma_8x8_mode_horz_a9q:
+
+ stmfd sp!, {r14} @store register values to stack
+
+ vld1.u8 {d0}, [r0]
+ mov r2, #6
+
+ vdup.u8 d1, d0[7]
+ vdup.u8 d2, d0[6]
+ vst1.8 {d1}, [r1], r3
+
+loop_8x8_horz:
+ vext.8 d0, d0, d0, #6
+ vst1.8 {d2}, [r1], r3
+ vdup.u8 d1, d0[7]
+ subs r2, #2
+ vdup.u8 d2, d0[6]
+ vst1.8 {d1}, [r1], r3
+ bne loop_8x8_horz
+
+ vext.8 d0, d0, d0, #6
+ vst1.8 {d2}, [r1], r3
+
+ ldmfd sp!, {pc} @restoring registers from stack
+
+
+
+
+
+@/******************************************************************************
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_8x8_mode_dc
+@*
+@* @brief
+@* Perform Intra prediction for luma_8x8 mode:DC
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_8x8 mode:DC ,described in sec 8.3.2.2.3
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_8x8_mode_dc(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_8x8_mode_dc_a9q
+
+ih264_intra_pred_luma_8x8_mode_dc_a9q:
+
+ stmfd sp!, {r4, r14} @store register values to stack
+ ldr r4, [sp, #8] @r4 => ui_neighboravailability
+
+ ands r2, r4, #0x01 @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE
+ beq top_available
+ ands r2, r4, #0x04 @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE
+ beq left_available
+
+ vld1.u8 {d0}, [r0] @BOTH LEFT AND TOP AVAILABLE
+ add r0, r0, #9
+ vld1.u8 {d1}, [r0]
+ vpaddl.u8 q0, q0
+ vadd.u16 d0, d0, d1
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
+ vqrshrun.s16 d0, q0, #4
+ vdup.u8 d0, d0[0]
+ b str_pred
+
+top_available: @ONLY TOP AVAILABLE
+ ands r2, r4, #0x04 @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE
+ beq none_available
+
+ add r0, r0, #9
+ vld1.u8 {d0}, [r0]
+ vpaddl.u8 d0, d0
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
+ vqrshrun.s16 d0, q0, #3
+ vdup.u8 d0, d0[0]
+ b str_pred
+
+left_available: @ONLY LEFT AVAILABLE
+ vld1.u8 {d0}, [r0]
+ vpaddl.u8 d0, d0
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
+ vqrshrun.s16 d0, q0, #3
+ vdup.u8 d0, d0[0]
+ b str_pred
+
+none_available: @NONE AVAILABLE
+ vmov.u8 q0, #128
+
+str_pred:
+ vst1.8 {d0}, [r1], r3
+ vst1.8 {d0}, [r1], r3
+ vst1.8 {d0}, [r1], r3
+ vst1.8 {d0}, [r1], r3
+ vst1.8 {d0}, [r1], r3
+ vst1.8 {d0}, [r1], r3
+ vst1.8 {d0}, [r1], r3
+ vst1.8 {d0}, [r1], r3
+
+ ldmfd sp!, {r4, pc} @Restoring registers from stack
+
+
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_8x8_mode_diag_dl
+@*
+@* @brief
+@* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left ,described in sec 8.3.2.2.4
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_8x8_mode_diag_dl(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+ .global ih264_intra_pred_luma_8x8_mode_diag_dl_a9q
+
+ih264_intra_pred_luma_8x8_mode_diag_dl_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+
+ add r0, r0, #9
+ sub r5, r3, #4
+ add r6, r0, #15
+ vld1.8 {q0}, [r0]
+ vext.8 q2, q0, q0, #2
+ vext.8 q1, q0, q0, #1
+ vld1.8 {d5[6]}, [r6]
+ @ q1 = q0 shifted to left once
+ @ q2 = q1 shifted to left once
+ vaddl.u8 q10, d0, d2 @Adding for FILT121
+ vaddl.u8 q11, d1, d3
+ vaddl.u8 q12, d2, d4
+ vaddl.u8 q13, d3, d5
+ vadd.u16 q12, q10, q12
+ vadd.u16 q13, q11, q13
+
+ vqrshrun.s16 d4, q12, #2
+ vqrshrun.s16 d5, q13, #2
+ @Q2 has all FILT121 values
+ vst1.8 {d4}, [r1], r3
+ vext.8 q9, q2, q2, #1
+ vext.8 q8, q9, q9, #1
+ vst1.8 {d18}, [r1], r3
+ vext.8 q15, q8, q8, #1
+ vst1.8 {d16}, [r1], r3
+ vst1.8 {d30}, [r1], r3
+ vst1.32 {d4[1]}, [r1]!
+ vst1.32 {d5[0]}, [r1], r5
+ vst1.32 {d18[1]}, [r1]!
+ vst1.32 {d19[0]}, [r1], r5
+ vst1.32 {d16[1]}, [r1]!
+ vst1.32 {d17[0]}, [r1], r5
+ vst1.32 {d30[1]}, [r1]!
+ vst1.32 {d31[0]}, [r1], r5
+
+
+end_func_diag_dl:
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_8x8_mode_diag_dr
+@*
+@* @brief
+@* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right ,described in sec 8.3.2.2.5
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_8x8_mode_diag_dr(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_8x8_mode_diag_dr_a9q
+
+ih264_intra_pred_luma_8x8_mode_diag_dr_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+
+
+ vld1.u8 {q0}, [r0]
+ add r0, r0, #1
+ vld1.u8 {q1}, [r0]
+ vext.8 q2, q1, q1, #1
+ @ q1 = q0 shifted to left once
+ @ q2 = q1 shifted to left once
+ vaddl.u8 q10, d0, d2 @Adding for FILT121
+ vaddl.u8 q11, d1, d3
+ vaddl.u8 q12, d2, d4
+ vaddl.u8 q13, d3, d5
+ vadd.u16 q12, q10, q12
+ vadd.u16 q13, q11, q13
+ vqrshrun.s16 d4, q12, #2
+ vqrshrun.s16 d5, q13, #2
+ @Q2 has all FILT121 values
+ sub r5, r3, #4
+ vext.8 q9, q2, q2, #15
+ vst1.8 {d19}, [r1], r3
+ vext.8 q8, q9, q9, #15
+ vst1.8 {d17}, [r1], r3
+ vext.8 q15, q8, q8, #15
+ vst1.8 {d31}, [r1], r3
+ vst1.32 {d4[1]}, [r1]!
+ vst1.32 {d5[0]}, [r1], r5
+ vst1.32 {d18[1]}, [r1]!
+ vst1.32 {d19[0]}, [r1], r5
+ vst1.32 {d16[1]}, [r1]!
+ vst1.32 {d17[0]}, [r1], r5
+ vst1.32 {d30[1]}, [r1]!
+ vst1.32 {d31[0]}, [r1], r5
+ vst1.8 {d4}, [r1], r3
+
+end_func_diag_dr:
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_8x8_mode_vert_r
+@*
+@* @brief
+@* Perform Intra prediction for luma_8x8 mode:Vertical_Right
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_8x8 mode:Vertical_Right ,described in sec 8.3.2.2.6
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_8x8_mode_vert_r(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_8x8_mode_vert_r_a9q
+
+ih264_intra_pred_luma_8x8_mode_vert_r_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+
+ vld1.u8 {q0}, [r0]
+ add r0, r0, #1
+ vld1.u8 {q1}, [r0]
+ vext.8 q2, q1, q1, #1
+ @ q1 = q0 shifted to left once
+ @ q2 = q1 shifted to left once
+ vaddl.u8 q10, d0, d2
+ vaddl.u8 q11, d1, d3
+ vaddl.u8 q12, d2, d4
+ vaddl.u8 q13, d3, d5
+ vadd.u16 q12, q10, q12
+ vadd.u16 q13, q11, q13
+
+ vqrshrun.s16 d4, q10, #1
+ vqrshrun.s16 d5, q11, #1
+ vqrshrun.s16 d6, q12, #2
+ vqrshrun.s16 d7, q13, #2
+ @Q2 has all FILT11 values
+ @Q3 has all FILT121 values
+ sub r5, r3, #6
+ sub r6, r3, #4
+ vst1.8 {d5}, [r1], r3 @ row 0
+ vext.8 q9, q3, q3, #15
+ vmov.8 q11, q9
+ vext.8 q8, q2, q2, #1
+ vst1.8 {d19}, [r1], r3 @row 1
+
+ vmov.8 q15, q8
+ vext.8 q10, q2, q2, #15
+ vuzp.8 q8, q9
+ @row 2
+ vext.8 q14, q8, q8, #1
+ vst1.8 {d21}, [r1]
+ vst1.8 {d6[6]}, [r1], r3
+ @row 3
+
+ vst1.16 {d29[1]}, [r1]!
+ vst1.32 {d7[0]}, [r1]!
+ vst1.16 {d7[2]}, [r1], r5
+@row 4
+ vst1.16 {d19[1]}, [r1]!
+ vst1.32 {d5[0]}, [r1]!
+ vst1.16 {d5[2]}, [r1], r5
+
+@row 5
+ vext.8 q13, q9, q9, #1
+ vst1.16 {d17[1]}, [r1]!
+ vst1.32 {d23[0]}, [r1]!
+ vst1.16 {d23[2]}, [r1], r5
+
+
+@row 6
+ vst1.16 {d27[0]}, [r1]!
+ vst1.8 {d27[2]}, [r1]!
+ vst1.8 {d5[0]}, [r1]!
+ vst1.32 {d31[0]}, [r1], r6
+@row 7
+ vst1.32 {d29[0]}, [r1]!
+ vst1.32 {d7[0]}, [r1]!
+
+
+
+end_func_vert_r:
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_8x8_mode_horz_d
+@*
+@* @brief
+@* Perform Intra prediction for luma_8x8 mode:Horizontal_Down
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_8x8 mode:Horizontal_Down ,described in sec 8.3.2.2.7
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_8x8_mode_horz_d(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+ .global ih264_intra_pred_luma_8x8_mode_horz_d_a9q
+
+ih264_intra_pred_luma_8x8_mode_horz_d_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ vpush {d8-d15}
+
+ vld1.u8 {q0}, [r0]
+ add r0, r0, #1
+ vld1.u8 {q1}, [r0]
+ vext.8 q2, q1, q1, #1
+ @ q1 = q0 shifted to left once
+ @ q2 = q1 shifted to left once
+ vaddl.u8 q10, d0, d2
+ vaddl.u8 q11, d1, d3
+ vaddl.u8 q12, d2, d4
+ vaddl.u8 q13, d3, d5
+ vadd.u16 q12, q10, q12
+ vadd.u16 q13, q11, q13
+
+ vqrshrun.s16 d4, q10, #1
+ vqrshrun.s16 d5, q11, #1
+ vqrshrun.s16 d6, q12, #2
+ vqrshrun.s16 d7, q13, #2
+ @Q2 has all FILT11 values
+ @Q3 has all FILT121 values
+ vmov.8 q4, q2
+ vmov.8 q5, q3
+ sub r6, r3, #6
+ vtrn.8 q4, q5 @
+ vmov.8 q6, q4
+ vmov.8 q7, q5
+ sub r5, r3, #4
+ vtrn.16 q6, q7
+ vext.8 q8, q3, q3, #14
+ @ROW 0
+ vst1.8 {d17}, [r1]
+ vst1.16 {d10[3]}, [r1], r3
+
+ @ROW 1
+ vst1.32 {d14[1]}, [r1]!
+ vst1.32 {d7[0]}, [r1], r5
+ @ROW 2
+ vst1.16 {d10[2]}, [r1]!
+ vst1.32 {d14[1]}, [r1]!
+ vst1.16 {d7[0]}, [r1], r6
+ @ROW 3
+ vst1.32 {d12[1]}, [r1]!
+ vst1.32 {d14[1]}, [r1], r5
+ @ROW 4
+ vst1.16 {d14[1]}, [r1]!
+ vst1.32 {d12[1]}, [r1]!
+ vst1.16 {d14[2]}, [r1], r6
+ @ROW 5
+ vst1.32 {d14[0]}, [r1]!
+ vst1.32 {d12[1]}, [r1], r5
+ @ROW 6
+ vst1.16 {d10[0]}, [r1]!
+ vst1.16 {d8[1]}, [r1]!
+ vst1.16 {d14[1]}, [r1]!
+ vst1.16 {d12[2]}, [r1], r6
+ @ROW 7
+ vst1.32 {d12[0]}, [r1]!
+ vst1.32 {d14[0]}, [r1], r5
+
+end_func_horz_d:
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_8x8_mode_vert_l
+@*
+@* @brief
+@* Perform Intra prediction for luma_8x8 mode:Vertical_Left
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_8x8 mode:Vertical_Left ,described in sec 8.3.2.2.8
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_8x8_mode_vert_l(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_8x8_mode_vert_l_a9q
+
+ih264_intra_pred_luma_8x8_mode_vert_l_a9q:
+
+ stmfd sp!, {r4-r12, r14} @Restoring registers from stack
+ vpush {d8-d15}
+ add r0, r0, #9
+ vld1.u8 {q0}, [r0]
+ add r0, r0, #1
+ vld1.u8 {q1}, [r0]
+ vext.8 q2, q1, q1, #1
+ vaddl.u8 q10, d0, d2
+ vaddl.u8 q11, d1, d3
+ vaddl.u8 q12, d2, d4
+ vaddl.u8 q13, d3, d5
+ vadd.u16 q12, q10, q12
+ vadd.u16 q13, q11, q13
+
+ vqrshrun.s16 d4, q10, #1
+ vqrshrun.s16 d5, q11, #1
+ vqrshrun.s16 d6, q12, #2
+ vext.8 q4, q2, q2, #1
+ vqrshrun.s16 d7, q13, #2
+ @Q2 has all FILT11 values
+ @Q3 has all FILT121 values
+
+ vext.8 q5, q3, q3, #1
+ @ROW 0,1
+ vst1.8 {d4}, [r1], r3
+ vst1.8 {d6}, [r1], r3
+
+ vext.8 q6, q4, q4, #1
+ vext.8 q7, q5, q5, #1
+ @ROW 2,3
+ vst1.8 {d8}, [r1], r3
+ vst1.8 {d10}, [r1], r3
+
+ vext.8 q8, q6, q6, #1
+ vext.8 q9, q7, q7, #1
+ @ROW 4,5
+ vst1.8 {d12}, [r1], r3
+ vst1.8 {d14}, [r1], r3
+ @ROW 6,7
+ vst1.8 {d16}, [r1], r3
+ vst1.8 {d18}, [r1], r3
+
+end_func_vert_l:
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@*ih264_intra_pred_luma_8x8_mode_horz_u
+@*
+@* @brief
+@* Perform Intra prediction for luma_8x8 mode:Horizontal_Up
+@*
+@* @par Description:
+@* Perform Intra prediction for luma_8x8 mode:Horizontal_Up ,described in sec 8.3.2.2.9
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] ui_neighboravailability
+@* availability of neighbouring pixels
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************/
+@void ih264_intra_pred_luma_8x8_mode_horz_u(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 ui_neighboravailability)
+
+@**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ r4 => ui_neighboravailability
+
+ .global ih264_intra_pred_luma_8x8_mode_horz_u_a9q
+
+ih264_intra_pred_luma_8x8_mode_horz_u_a9q:
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ vpush {d8-d15}
+
+ vld1.u8 {q0}, [r0]
+ vld1.u8 {d1[7]}, [r0]
+ vext.8 q1, q0, q0, #1
+ vext.8 q2, q1, q1, #1
+ @ LOADING V TABLE
+ ldr r12, scratch_intrapred_addr_8x8
+scrlb8x8l2:
+ add r12, r12, pc
+ vaddl.u8 q10, d0, d2
+ vaddl.u8 q11, d1, d3
+ vaddl.u8 q12, d2, d4
+ vaddl.u8 q13, d3, d5
+ vadd.u16 q12, q10, q12
+ vadd.u16 q13, q11, q13
+ vld1.u8 {q5}, [r12]
+ vqrshrun.s16 d4, q10, #1
+ vqrshrun.s16 d5, q11, #1
+ vqrshrun.s16 d6, q12, #2
+ vqrshrun.s16 d7, q13, #2
+ @Q2 has all FILT11 values
+ @Q3 has all FILT121 values
+ vtbl.u8 d12, {q2, q3}, d10
+ vdup.u8 q7, d5[7] @
+ vtbl.u8 d13, {q2, q3}, d11
+ vext.8 q8, q6, q7, #2
+ vext.8 q9, q8, q7, #2
+ vst1.8 {d12}, [r1], r3
+ vext.8 q10, q9, q7, #2
+ vst1.8 {d16}, [r1], r3
+ vst1.8 {d18}, [r1], r3
+ vst1.8 {d20}, [r1], r3
+ vst1.8 {d13}, [r1], r3
+ vst1.8 {d17}, [r1], r3
+ vst1.8 {d19}, [r1], r3
+ vst1.8 {d21}, [r1], r3
+
+
+end_func_horz_u:
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
+
+
+
diff --git a/common/arm/ih264_iquant_itrans_recon_a9.s b/common/arm/ih264_iquant_itrans_recon_a9.s
new file mode 100755
index 0000000..f71ca69
--- /dev/null
+++ b/common/arm/ih264_iquant_itrans_recon_a9.s
@@ -0,0 +1,871 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@ *******************************************************************************
+@ * @file
+@ * ih264_iquant_itrans_recon_a9.s
+@ *
+@ * @brief
+@ * Contains function definitions for single stage inverse transform
+@ *
+@ * @author
+@ * Mohit
+@ * Harinarayanaan
+@ *
+@ * @par List of Functions:
+@ * - ih264_iquant_itrans_recon_4x4_a9()
+@ * - ih264_iquant_itrans_recon_8x8_a9()
+@ * - ih264_iquant_itrans_recon_chroma_4x4_a9()
+@ *
+@ * @remarks
+@ * None
+@ *
+@ *******************************************************************************
+@*/
+@/**
+@ *******************************************************************************
+@ *
+@ * @brief
+@ * This function performs inverse quant and Inverse transform type Ci4 for 4*4 block
+@ *
+@ * @par Description:
+@ * Performs inverse transform Ci4 and adds the residue to get the
+@ * reconstructed block
+@ *
+@ * @param[in] pi2_src
+@ * Input 4x4 coefficients
+@ *
+@ * @param[in] pu1_pred
+@ * Prediction 4x4 block
+@ *
+@ * @param[out] pu1_out
+@ * Output 4x4 block
+@ *
+@ * @param[in] u4_qp_div_6
+@ * QP
+@ *
+@ * @param[in] pu2_weigh_mat
+@ * Pointer to weight matrix
+@ *
+@ * @param[in] pred_strd,
+@ * Prediction stride
+@ *
+@ * @param[in] out_strd
+@ * Output Stride
+@ *
+@ *@param[in] pi2_tmp
+@ * temporary buffer of size 1*16
+@ *
+@ * @param[in] pu2_iscal_mat
+@ * Pointer to the inverse quantization matrix
+@ *
+@ * @returns Void
+@ *
+@ * @remarks
+@ * None
+@ *
+@ *******************************************************************************
+@ */
+@void ih264_iquant_itrans_recon_4x4(WORD16 *pi2_src,
+@ UWORD8 *pu1_pred,
+@ UWORD8 *pu1_out,
+@ WORD32 pred_strd,
+@ WORD32 out_strd,
+@ const UWORD16 *pu2_iscal_mat,
+@ const UWORD16 *pu2_weigh_mat,
+@ UWORD32 u4_qp_div_6,
+@ WORD32 *pi4_tmp,
+@ WORD32 iq_start_idx
+@ WORD16 *pi2_dc_ld_addr)
+@**************Variables Vs Registers*****************************************
+@r0 => *pi2_src
+@r1 => *pu1_pred
+@r2 => *pu1_out
+@r3 => pred_strd
+@r4 => out_strd
+@r5 => *pu2_iscal_mat
+@r6 => *pu2_weigh_mat
+@r7 => u4_qp_div_6
+@r8 => iq_start_idx
+@r10=> pi2_dc_ld_addr
+.text
+.p2align 2
+
+ .global ih264_iquant_itrans_recon_4x4_a9
+
+ih264_iquant_itrans_recon_4x4_a9:
+
+@VLD4.S16 is used because the pointer is incremented by SUB_BLK_WIDTH_4x4
+@If the macro value changes need to change the instruction according to it.
+@Only one shift is done in horizontal inverse because,
+@if u4_qp_div_6 is lesser than 4 then shift value will be neagative and do negative left shift, in this case rnd_factor has value
+@if u4_qp_div_6 is greater than 4 then shift value will be positive and do left shift, here rnd_factor is 0
+
+ stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments
+ ldr r7, [sp, #52] @Loads u4_qp_div_6
+ ldr r4, [sp, #40] @Loads out_strd
+ vdup.s32 q15, r7 @Populate the u4_qp_div_6 in Q15
+ ldr r5, [sp, #44] @Loads *pu2_iscal_mat
+
+ ldr r6, [sp, #48] @Loads *pu2_weigh_mat
+
+ ldr r8, [sp, #60] @Loads iq_start_idx
+
+ ldr r10, [sp, #64] @Load alternate dc address
+
+ vpush {d8-d15}
+@=======================DEQUANT FROM HERE===================================
+
+ vld4.s16 {d20, d21, d22, d23}, [r5] @Load pu2_iscal_mat[i], i =0..15
+ vld4.s16 {d26, d27, d28, d29}, [r6] @pu2_weigh_mat[i], i =0..15
+ vmul.s16 q10, q10, q13 @x[i]=(scale[i] * dequant[i]) where i = 0..7
+ vld4.s16 {d16, d17, d18, d19}, [r0] @pi2_src_tmp[i], i =0..15
+
+ vmul.s16 q11, q11, q14 @x[i]=(scale[i] * dequant[i]) where i = 8..15
+
+ subs r8, r8, #1 @ if r8 == 1 => intra case , so result of subtraction is zero and Z flag is set
+ ldreqsh r9, [r10] @ Loads signed halfword pi2_dc_ld_addr[0], if r8==1
+
+ vmull.s16 q0, d16, d20 @ Q0 = p[i] = (x[i] * trns_coeff[i]) where i = 0..3
+ vmull.s16 q1, d17, d21 @ Q1 = p[i] = (x[i] * trns_coeff[i]) where i = 4..7
+ vmull.s16 q2, d18, d22 @ Q2 = p[i] = (x[i] * trns_coeff[i]) where i = 8..11
+ vmull.s16 q3, d19, d23 @ Q3 = p[i] = (x[i] * trns_coeff[i]) where i = 12..15
+
+ vshl.s32 q0, q0, q15 @ Q0 = q[i] = (p[i] << (qP/6)) where i = 0..3
+ vshl.s32 q1, q1, q15 @ Q1 = q[i] = (p[i] << (qP/6)) where i = 4..7
+ vshl.s32 q2, q2, q15 @ Q2 = q[i] = (p[i] << (qP/6)) where i = 8..11
+ vshl.s32 q3, q3, q15 @ Q3 = q[i] = (p[i] << (qP/6)) where i = 12..15
+
+ vqrshrn.s32 d0, q0, #0x4 @ D0 = c[i] = ((q[i] + 32) >> 4) where i = 0..3
+ vqrshrn.s32 d1, q1, #0x4 @ D1 = c[i] = ((q[i] + 32) >> 4) where i = 4..7
+ vqrshrn.s32 d2, q2, #0x4 @ D2 = c[i] = ((q[i] + 32) >> 4) where i = 8..11
+ vqrshrn.s32 d3, q3, #0x4 @ D3 = c[i] = ((q[i] + 32) >> 4) where i = 12..15
+
+ vmoveq.16 d0[0], r9 @ Restore dc value in case of intra, i.e. r8 == 1
+
+@========= PROCESS IDCT FROM HERE =======
+@Steps for Stage 1:
+@------------------
+ vld1.32 d30[0], [r1], r3 @I row Load pu1_pred buffer
+ vadd.s16 d4, d0, d2 @x0 = q0 + q1;
+
+ vsub.s16 d5, d0, d2 @x1 = q0 - q1;
+
+ vshr.s16 d8, d1, #1 @q0>>1
+ vshr.s16 d9, d3, #1 @q1>>1
+
+ vsub.s16 d6, d8, d3 @x2 = (q0 >> 1) - q1;
+ vadd.s16 d7, d1, d9 @x3 = q0+ (q1 >> 1);
+ vld1.32 d30[1], [r1], r3 @II row Load pu1_pred buffer
+
+ vswp d6, d7 @Reverse positions of x2 and x3
+
+ vsub.s16 q6, q2, q3 @x0-x3 and x1-x2 combined
+ vadd.s16 q5, q2, q3 @x0 + x3 and x1+x2 combined
+
+ vld1.32 d31[0], [r1], r3 @III row Load pu1_pred buf
+
+ vswp d12, d13
+@Steps for Stage 2:
+@------------------
+ vtrn.16 d10, d11
+ vtrn.16 d12, d13
+ vtrn.32 d10, d12
+ vtrn.32 d11, d13
+ vadd.s16 d14, d10, d12 @x0 = q0 + q1;
+
+ vsub.s16 d15, d10, d12 @x1 = q0 - q1;
+
+ vshr.s16 d18, d11, #1 @q0>>1
+ vshr.s16 d19, d13, #1 @q1>>1
+
+ vsub.s16 d16, d18, d13 @x2 = (q0 >> 1) - q1;
+ vadd.s16 d17, d11, d19 @x3 = q0+ (q1 >> 1);
+
+ vld1.32 d31[1], [r1], r3 @IV row Load pu1_pred buffer
+ vswp d16, d17 @Reverse positions of x2 and x3
+
+ vsub.s16 q11, q7, q8 @x0-x3 and x1-x2 combined
+ vadd.s16 q10, q7, q8 @x0 + x3 and x1+x2 combined
+
+ vswp d22, d23
+
+ vrshr.s16 q10, q10, #6 @
+ vrshr.s16 q11, q11, #6
+
+ vaddw.u8 q10, q10, d30
+ vaddw.u8 q11, q11, d31
+
+ vqmovun.s16 d0, q10
+ vqmovun.s16 d1, q11
+
+ vst1.32 d0[0], [r2], r4 @I row store the value
+ vst1.32 d0[1], [r2], r4 @II row store the value
+ vst1.32 d1[0], [r2], r4 @III row store the value
+ vst1.32 d1[1], [r2] @IV row store the value
+
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, r15} @Reload the registers from SP
+
+
+ @/**
+@ *******************************************************************************
+@ *
+@ * @brief
+@ * This function performs inverse quant and Inverse transform type Ci4 for 4*4 block
+@ *
+@ * @par Description:
+@ * Performs inverse transform Ci4 and adds the residue to get the
+@ * reconstructed block
+@ *
+@ * @param[in] pi2_src
+@ * Input 4x4 coefficients
+@ *
+@ * @param[in] pu1_pred
+@ * Prediction 4x4 block
+@ *
+@ * @param[out] pu1_out
+@ * Output 4x4 block
+@ *
+@ * @param[in] u4_qp_div_6
+@ * QP
+@ *
+@ * @param[in] pu2_weigh_mat
+@ * Pointer to weight matrix
+@ *
+@ * @param[in] pred_strd,
+@ * Prediction stride
+@ *
+@ * @param[in] out_strd
+@ * Output Stride
+@ *
+@ *@param[in] pi2_tmp
+@ * temporary buffer of size 1*16
+@ *
+@ * @param[in] pu2_iscal_mat
+@ * Pointer to the inverse quantization matrix
+@ *
+@ * @returns Void
+@ *
+@ * @remarks
+@ * None
+@ *
+@ *******************************************************************************
+@ */
+@void ih264_iquant_itrans_recon_chroma_4x4(WORD16 *pi2_src,
+@ UWORD8 *pu1_pred,
+@ UWORD8 *pu1_out,
+@ WORD32 pred_strd,
+@ WORD32 out_strd,
+@ const UWORD16 *pu2_iscal_mat,
+@ const UWORD16 *pu2_weigh_mat,
+@ UWORD32 u4_qp_div_6,
+@ WORD32 *pi4_tmp
+@ WORD16 *pi2_dc_src)
+@**************Variables Vs Registers*****************************************
+@r0 => *pi2_src
+@r1 => *pu1_pred
+@r2 => *pu1_out
+@r3 => pred_strd
+@r4 => out_strd
+@r5 => *pu2_iscal_mat
+@r6 => *pu2_weigh_mat
+@r7 => u4_qp_div_6
+
+ .global ih264_iquant_itrans_recon_chroma_4x4_a9
+ih264_iquant_itrans_recon_chroma_4x4_a9:
+
+@VLD4.S16 is used because the pointer is incremented by SUB_BLK_WIDTH_4x4
+@If the macro value changes need to change the instruction according to it.
+@Only one shift is done in horizontal inverse because,
+@if u4_qp_div_6 is lesser than 4 then shift value will be neagative and do negative left shift, in this case rnd_factor has value
+@if u4_qp_div_6 is greater than 4 then shift value will be positive and do left shift, here rnd_factor is 0
+
+ stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments
+ ldr r7, [sp, #52] @Loads u4_qp_div_6
+ ldr r4, [sp, #40] @Loads out_strd
+ vdup.s32 q15, r7 @Populate the u4_qp_div_6 in Q15
+ ldr r5, [sp, #44] @Loads *pu2_iscal_mat
+ ldr r6, [sp, #48] @Loads *pu2_weigh_mat
+ ldr r8, [sp, #60] @loads *pi2_dc_src
+
+ vpush {d8-d15}
+@=======================DEQUANT FROM HERE===================================
+
+ vld4.s16 {d20, d21, d22, d23}, [r5] @Load pu2_iscal_mat[i], i =0..15
+ vld4.s16 {d26, d27, d28, d29}, [r6] @pu2_weigh_mat[i], i =0..15
+ vmul.s16 q10, q10, q13 @x[i]=(scale[i] * dequant[i]) where i = 0..7
+ vld4.s16 {d16, d17, d18, d19}, [r0] @pi2_src_tmp[i], i =0..15
+
+ vmul.s16 q11, q11, q14 @x[i]=(scale[i] * dequant[i]) where i = 8..15
+
+ vmull.s16 q0, d16, d20 @ Q0 = p[i] = (x[i] * trns_coeff[i]) where i = 0..3
+ vmull.s16 q1, d17, d21 @ Q1 = p[i] = (x[i] * trns_coeff[i]) where i = 4..7
+ vmull.s16 q2, d18, d22 @ Q2 = p[i] = (x[i] * trns_coeff[i]) where i = 8..11
+ vmull.s16 q3, d19, d23 @ Q3 = p[i] = (x[i] * trns_coeff[i]) where i = 12..15
+
+ vshl.s32 q0, q0, q15 @ Q0 = q[i] = (p[i] << (qP/6)) where i = 0..3
+ vshl.s32 q1, q1, q15 @ Q1 = q[i] = (p[i] << (qP/6)) where i = 4..7
+ vshl.s32 q2, q2, q15 @ Q2 = q[i] = (p[i] << (qP/6)) where i = 8..11
+ vshl.s32 q3, q3, q15 @ Q3 = q[i] = (p[i] << (qP/6)) where i = 12..15
+
+ vqrshrn.s32 d0, q0, #0x4 @ D0 = c[i] = ((q[i] + 32) >> 4) where i = 0..3
+ vqrshrn.s32 d1, q1, #0x4 @ D1 = c[i] = ((q[i] + 32) >> 4) where i = 4..7
+ vqrshrn.s32 d2, q2, #0x4 @ D2 = c[i] = ((q[i] + 32) >> 4) where i = 8..11
+ vqrshrn.s32 d3, q3, #0x4 @ D3 = c[i] = ((q[i] + 32) >> 4) where i = 12..15
+
+ ldrsh r9, [r8] @ Loads signed halfword pi2_dc_src[0]
+ vmov.16 d0[0], r9 @ Restore dc value since its chroma iq-it
+
+@========= PROCESS IDCT FROM HERE =======
+@Steps for Stage 1:
+@------------------
+ vld2.8 {d28, d29}, [r1], r3 @I row Load pu1_pred buffer
+ vadd.s16 d4, d0, d2 @x0 = q0 + q1;
+
+ vsub.s16 d5, d0, d2 @x1 = q0 - q1;
+
+ vshr.s16 d8, d1, #1 @q0>>1
+ vshr.s16 d9, d3, #1 @q1>>1
+
+ vsub.s16 d6, d8, d3 @x2 = (q0 >> 1) - q1;
+ vadd.s16 d7, d1, d9 @x3 = q0+ (q1 >> 1);
+ vld2.8 {d29, d30}, [r1], r3 @II row Load pu1_pred buffer
+
+ vswp d6, d7 @Reverse positions of x2 and x3
+
+ vsub.s16 q6, q2, q3 @x0-x3 and x1-x2 combined
+ vtrn.32 d28, d29 @ D28 -- row I and II of pu1_pred_buffer
+ vadd.s16 q5, q2, q3 @x0 + x3 and x1+x2 combined
+
+ vld2.8 {d29, d30}, [r1], r3 @III row Load pu1_pred buf
+
+ vswp d12, d13
+@Steps for Stage 2:
+@------------------
+ vtrn.16 d10, d11
+ vtrn.16 d12, d13
+ vtrn.32 d10, d12
+ vtrn.32 d11, d13
+ vadd.s16 d14, d10, d12 @x0 = q0 + q1;
+
+ vsub.s16 d15, d10, d12 @x1 = q0 - q1;
+
+ vshr.s16 d18, d11, #1 @q0>>1
+ vshr.s16 d19, d13, #1 @q1>>1
+
+ vsub.s16 d16, d18, d13 @x2 = (q0 >> 1) - q1;
+ vadd.s16 d17, d11, d19 @x3 = q0+ (q1 >> 1);
+
+ vld2.8 {d30, d31}, [r1], r3 @IV row Load pu1_pred buffer
+ vswp d16, d17 @Reverse positions of x2 and x3
+
+ vsub.s16 q11, q7, q8 @x0-x3 and x1-x2 combined
+ vtrn.32 d29, d30 @ D29 -- row III and IV of pu1_pred_buf
+ vadd.s16 q10, q7, q8 @x0 + x3 and x1+x2 combined
+
+ vswp d22, d23
+
+ vrshr.s16 q10, q10, #6 @
+ vrshr.s16 q11, q11, #6
+
+ vaddw.u8 q10, q10, d28
+ vaddw.u8 q11, q11, d29
+
+ vld1.u8 d0, [r2], r4 @Loading out buffer 16 coeffs
+ vld1.u8 d1, [r2], r4
+ vld1.u8 d2, [r2], r4
+ vld1.u8 d3, [r2], r4
+
+ sub r2, r2, r4, lsl #2
+
+ vqmovun.s16 d20, q10 @Getting quantized coeffs
+ vqmovun.s16 d22, q11
+
+ vmovl.u8 q10, d20 @Move the coffs into 16 bit
+ vmovl.u8 q11, d22 @so that we can use vbit to copy
+
+ vmov.u16 q14, #0x00ff @Copy lsb from qantized(long)coeffs
+
+ vbit.u8 q0, q10, q14
+ vbit.u8 q1, q11, q14
+
+ vst1.u8 d0, [r2], r4
+ vst1.u8 d1, [r2], r4
+ vst1.u8 d2, [r2], r4
+ vst1.u8 d3, [r2]
+
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, r15} @Reload the registers from SP
+
+
+@/*
+@ *******************************************************************************
+@ *
+@ * @brief
+@ * This function performs inverse quant and Inverse transform type Ci4 for 8*8 block
+@ *
+@ * @par Description:
+@ * Performs inverse transform Ci8 and adds the residue to get the
+@ * reconstructed block
+@ *
+@ * @param[in] pi2_src
+@ * Input 4x4 coefficients
+@ *
+@ * @param[in] pu1_pred
+@ * Prediction 4x4 block
+@ *
+@ * @param[out] pu1_out
+@ * Output 4x4 block
+@ *
+@ * @param[in] u4_qp_div_6
+@ * QP
+@ *
+@ * @param[in] pu2_weigh_mat
+@ * Pointer to weight matrix
+@ *
+@ * @param[in] pred_strd,
+@ * Prediction stride
+@ *
+@ * @param[in] out_strd
+@ * Output Stride
+@ *
+@ *@param[in] pi2_tmp
+@ * temporary buffer of size 1*64
+@ *
+@ * @param[in] pu2_iscal_mat
+@ * Pointer to the inverse quantization matrix
+@ *
+@ * @returns Void
+@ *
+@ * @remarks
+@ * None
+@ *
+@ *******************************************************************************
+@ */
+@void ih264_iquant_itrans_recon_8x8(WORD16 *pi2_src,
+@ UWORD8 *pu1_pred,
+@ UWORD8 *pu1_out,
+@ WORD32 pred_strd,
+@ WORD32 out_strd,
+@ const UWORD16 *pu2_iscal_mat,
+@ const UWORD16 *pu2_weigh_mat,
+@ UWORD32 u4_qp_div_6,
+@ WORD32 *pi4_tmp,
+@ WORD32 iq_start_idx)
+@**************Variables Vs Registers*****************************************
+@r0 => *pi2_src
+@r1 => *pu1_pred
+@r2 => *pu1_out
+@r3 => pred_strd
+@r4 => out_strd
+@r5 => *pu2_iscal_mat
+@r6 => *pu2_weigh_mat
+@r7 => u4_qp_div_6
+
+
+ .global ih264_iquant_itrans_recon_8x8_a9
+ih264_iquant_itrans_recon_8x8_a9:
+
+ stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments
+ ldr r7, [sp, #52] @Loads u4_qp_div_6
+ ldr r4, [sp, #40] @Loads out_strd
+
+ ldr r5, [sp, #44] @Loads *pu2_iscal_mat
+ ldr r6, [sp, #48] @Loads *pu2_weigh_mat
+ vdup.s32 q15, r7 @Populate the u4_qp_div_6 in Q15
+ vpush {d8-d15}
+
+idct_8x8_begin:
+
+@========= DEQUANT FROM HERE ===========
+
+ vld1.32 {q13}, [r5]! @ Q13 = dequant values row 0
+ vld1.32 {q10}, [r6]! @ Q10 = scaling factors row 0
+ vld1.32 {q14}, [r5]! @ Q14 = dequant values row 1
+ vmul.s16 q10, q10, q13 @ Q10 = x[i] = (scale[i] * dequant[i]) where i = 0..7
+ vld1.32 {q11}, [r6]! @ Q11 = scaling factors row 1
+ vld1.32 {q8}, [r0]! @ Q8 = Source row 0
+ vmul.s16 q11, q11, q14 @ Q11 = x[i] = (scale[i] * dequant[i]) where i = 8..15
+ vmull.s16 q0, d16, d20 @ Q0 = p[i] = (x[i] * trns_coeff[i]) where i = 0..3
+ vld1.32 {q9}, [r0]! @ Q8 = Source row 1
+ vmull.s16 q1, d17, d21 @ Q1 = p[i] = (x[i] * trns_coeff[i]) where i = 4..7
+ vmull.s16 q2, d18, d22 @ Q2 = p[i] = (x[i] * trns_coeff[i]) where i = 8..11
+ vld1.32 {q13}, [r6]! @ Scaling factors row 2
+ vmull.s16 q3, d19, d23 @ Q3 = p[i] = (x[i] * trns_coeff[i]) where i = 12..15
+ vld1.32 {q14}, [r6]! @ Scaling factors row 3
+ vshl.s32 q0, q0, q15 @ Q0 = q[i] = (p[i] << (qP/6)) where i = 0..3
+ vld1.32 {q10}, [r5]! @ Q10 = Dequant values row 2
+ vshl.s32 q1, q1, q15 @ Q1 = q[i] = (p[i] << (qP/6)) where i = 4..7
+ vld1.32 {q8}, [r0]! @ Source Row 2
+ vshl.s32 q2, q2, q15 @ Q2 = q[i] = (p[i] << (qP/6)) where i = 8..11
+ vld1.32 {q11}, [r5]! @ Q11 = Dequant values row 3
+ vshl.s32 q3, q3, q15 @ Q3 = q[i] = (p[i] << (qP/6)) where i = 12..15
+ vld1.32 {q9}, [r0]! @ Source Row 3
+ vmul.s16 q10, q10, q13 @ Dequant row2*scale matrix row 2
+ vmul.s16 q11, q11, q14 @ Dequant row 3*scale matrix row 3
+ vld1.32 {q4}, [r6]! @ Scaling factors row 4
+ vqrshrn.s32 d0, q0, #0x6 @ D0 = c[i] = ((q[i] + 32) >> 6) where i = 0..3
+ vqrshrn.s32 d1, q1, #0x6 @ D1 = c[i] = ((q[i] + 32) >> 6) where i = 4..7
+ vld1.32 {q5}, [r6]! @ Scaling factors row 5
+ vqrshrn.s32 d2, q2, #0x6 @ D2 = c[i] = ((q[i] + 32) >> 6) where i = 8..11
+ vqrshrn.s32 d3, q3, #0x6 @ D3 = c[i] = ((q[i] + 32) >> 6) where i = 12..15
+ vld1.32 {q13}, [r5]! @ Q13 = Dequant values row 4
+ vmull.s16 q2, d16, d20 @ p[i] = (x[i] * trns_coeff[i]) where i=16..19
+ vmull.s16 q3, d17, d21 @ p[i] = (x[i] * trns_coeff[i]) where i=20..23
+ vld1.32 {q12}, [r5]! @ Q12 = Dequant values row 5
+ vmull.s16 q6, d18, d22 @ p[i] = (x[i] * trns_coeff[i]) where i=24..27
+ vmull.s16 q7, d19, d23 @ p[i] = (x[i] * trns_coeff[i]) where i=28..31
+
+ vld1.32 {q14}, [r0]! @ Source row 4
+ vmul.s16 q10, q4, q13 @ Dequant row4*scale matrix row 4
+ vmul.s16 q11, q5, q12 @ Dequant row5*scale matrix row 5
+ vld1.32 {q9}, [r0]! @ Source row 5
+ vshl.s32 q2, q2, q15 @
+ vshl.s32 q3, q3, q15 @
+ vld1.32 {q13}, [r6]! @ Scaling factors row 6
+ vshl.s32 q6, q6, q15 @
+ vshl.s32 q7, q7, q15 @
+ vmull.s16 q4, d28, d20 @ i = 32..35
+ vqrshrn.s32 d4, q2, #0x6 @ D4 = c[i] = ((q[i] + 32) >> 6) where i = 16..19
+ vqrshrn.s32 d5, q3, #0x6 @ D5 = c[i] = ((q[i] + 32) >> 6) where i = 20..23
+ vmull.s16 q5, d29, d21 @ i =36..39
+ vld1.32 {q10}, [r5]! @ Dequant values row 6
+ vqrshrn.s32 d6, q6, #0x6 @ D6 = c[i] = ((q[i] + 32) >> 6) where i = 24..27
+ vqrshrn.s32 d7, q7, #0x6 @ D7 = c[i] = ((q[i] + 32) >> 6) where i = 28..31
+ vld1.32 {q14}, [r6]! @ Scaling factors row 7
+ vmull.s16 q6, d18, d22 @
+ vld1.32 {q8}, [r0]! @ Source row 6
+ vmull.s16 q7, d19, d23 @
+ vld1.32 {q11}, [r5]! @ Dequant values row 7
+ vshl.s32 q4, q4, q15 @
+ vld1.32 {q9}, [r0]! @ Source row 7
+ vshl.s32 q5, q5, q15 @
+
+ vshl.s32 q6, q6, q15 @
+ vshl.s32 q7, q7, q15 @
+ vmul.s16 q10, q10, q13 @ Dequant*scaling row 6
+ vmul.s16 q11, q11, q14 @ Dequant*scaling row 7
+ vqrshrn.s32 d8, q4, #0x6 @ D8 = c[i] = ((q[i] + 32) >> 6) where i = 32..35
+ vqrshrn.s32 d9, q5, #0x6 @ D9 = c[i] = ((q[i] + 32) >> 6) where i = 36..39
+ vqrshrn.s32 d10, q6, #0x6 @ D10 = c[i] = ((q[i] + 32) >> 6) where i = 40..43
+ vqrshrn.s32 d11, q7, #0x6 @ D11 = c[i] = ((q[i] + 32) >> 6) where i = 44..47
+ vmull.s16 q6, d16, d20 @ i= 48..51
+ vmull.s16 q7, d17, d21 @ i= 52..55
+ vmull.s16 q8, d18, d22 @ i=56..59
+ vmull.s16 q9, d19, d23 @ i=60..63
+ vshl.s32 q6, q6, q15 @
+ vzip.s16 q0, q1 @Transpose
+ vshl.s32 q7, q7, q15 @
+ vshl.s32 q8, q8, q15 @
+ vzip.s16 q2, q3 @
+ vshl.s32 q9, q9, q15 @
+ vqrshrn.s32 d12, q6, #0x6 @ D12 = c[i] = ((q[i] + 32) >> 6) where i = 48..51
+ vzip.s16 q4, q5 @Transpose
+ vqrshrn.s32 d13, q7, #0x6 @ D13 = c[i] = ((q[i] + 32) >> 6) where i = 52..55
+ vqrshrn.s32 d14, q8, #0x6 @ D14 = c[i] = ((q[i] + 32) >> 6) where i = 56..59
+ vzip.s32 q0, q2 @Transpose
+ vqrshrn.s32 d15, q9, #0x6 @ D15 = c[i] = ((q[i] + 32) >> 6) where i = 60..63
+
+@========= PROCESS IDCT FROM HERE =======
+
+@Steps for Stage 2:
+@------------------
+
+@ TRANSPOSE 8x8 coeffs to actual order
+
+ vzip.s16 q6, q7 @
+
+ vzip.s32 q1, q3 @
+ vzip.s32 q4, q6 @
+ vzip.s32 q5, q7 @
+
+ vswp d1, d8 @ Q0/Q1 = Row order x0/x1
+ vswp d3, d10 @ Q2/Q3 = Row order x2/x3
+ vswp d5, d12 @ Q4/Q5 = Row order x4/x5
+ vswp d7, d14 @ Q6/Q7 = Row order x6/x7
+
+ vswp q1, q4 @
+ vshr.s16 q10, q2, #0x1 @
+ vswp q3, q6 @
+
+@Steps for Stage 1:
+@------------------
+
+ vadd.s16 q8, q0, q4 @ Q8 = y0
+ vsub.s16 q9, q0, q4 @ Q9 = y2
+
+ vsra.s16 q2, q6, #0x1 @ Q2 = y6
+ vsub.s16 q6, q10, q6 @ Q6 = y4
+
+ vaddl.s16 q12, d14, d2 @ y3 (0-3) 1+7
+ vaddl.s16 q13, d15, d3 @ y3 (4-7) 1+7
+
+ vsubl.s16 q10, d14, d2 @ y5 (0-3) 7-1
+ vsubl.s16 q11, d15, d3 @ y5 (4-7) 7-1
+
+ vadd.s16 q0, q8, q2 @ Q0 = z0
+ vsub.s16 q4, q8, q2 @ Q4 = z6
+
+ vadd.s16 q8, q9, q6 @ Q8 = z2
+ vsub.s16 q2, q9, q6 @ Q2 = z4
+
+ vsubw.s16 q12, q12, d6 @ y3 (0-3) 1+7-3
+ vsubw.s16 q13, q13, d7 @ y3 (0-7) 1+7-3
+
+ vshr.s16 q6, q3, #0x1 @
+
+ vaddw.s16 q10, q10, d10 @
+ vaddw.s16 q11, q11, d11 @
+
+ vshr.s16 q9, q5, #0x1 @
+
+ vsubw.s16 q12, q12, d12 @
+ vsubw.s16 q13, q13, d13 @
+
+ vaddw.s16 q10, q10, d18 @
+ vaddw.s16 q11, q11, d19 @
+
+ vqmovn.s32 d12, q12 @
+ vaddl.s16 q12, d10, d6 @
+ vqmovn.s32 d13, q13 @ Q6 = y3
+ vaddl.s16 q13, d11, d7 @
+ vqmovn.s32 d18, q10 @
+ vsubl.s16 q10, d10, d6 @
+ vqmovn.s32 d19, q11 @ Q9 = y5
+ vsubl.s16 q11, d11, d7 @
+
+ vshr.s16 q3, q6, #0x2 @
+
+ vsra.s16 q6, q9, #0x2 @ Q6 = z3
+
+ vaddw.s16 q12, q12, d2 @
+ vaddw.s16 q13, q13, d3 @
+
+ vshr.s16 q1, #0x1 @
+
+ vsub.s16 q5, q3, q9 @ Q5 = z5
+
+ vsubw.s16 q10, q10, d14 @
+ vsubw.s16 q11, q11, d15 @
+
+ vshr.s16 q7, #0x1 @
+
+ vaddw.s16 q12, q12, d2 @
+ vaddw.s16 q13, q13, d3 @
+
+ vsubw.s16 q10, q10, d14 @
+ vsubw.s16 q11, q11, d15 @
+
+
+ vqmovn.s32 d14, q12 @
+ vadd.s16 q1, q8, q5 @ Q1 = x1
+ vqmovn.s32 d15, q13 @ Q7 = y7
+ vsub.s16 q3, q8, q5 @ Q3 = x6
+ vqmovn.s32 d18, q10 @
+ vsub.s16 q5, q2, q6 @ Q5 = x5
+ vqmovn.s32 d19, q11 @ Q9 = y1
+ vadd.s16 q2, q2, q6 @ Q2 = x2
+
+ vshr.s16 q12, q9, #0x2 @
+ vsra.s16 q9, q7, #0x2 @ Q9 = z1
+
+ vsub.s16 q11, q7, q12 @ Q11 = z7
+
+ vadd.s16 q6, q4, q9 @ Q6 = x3
+ vsub.s16 q4, q4, q9 @ Q4 = x4
+
+ vsub.s16 q7, q0, q11 @ Q7 = x7
+ vadd.s16 q0, q0, q11 @ Q0 = x0
+
+ vswp.s16 q3, q6 @ Q3 = x3, Q6 = x6
+
+
+@Steps for Stage 2:
+@------------------
+
+@ TRANSPOSE 8x8 coeffs to actual order
+
+ vzip.s16 q0, q1 @
+ vzip.s16 q2, q3 @
+ vzip.s16 q4, q5 @
+ vzip.s16 q6, q7 @
+
+ vzip.s32 q0, q2 @
+ vzip.s32 q1, q3 @
+ vzip.s32 q4, q6 @
+ vzip.s32 q5, q7 @
+
+ vswp d1, d8 @ Q0/Q1 = Row order x0/x1
+ vswp d3, d10 @ Q2/Q3 = Row order x2/x3
+ vswp d5, d12 @ Q4/Q5 = Row order x4/x5
+ vswp d7, d14 @ Q6/Q7 = Row order x6/x7
+
+ vswp q1, q4 @
+ vshr.s16 q10, q2, #0x1 @
+ vswp q3, q6 @
+
+@Steps for Stage 3:
+@------------------
+
+@Repeat stage 1 again for vertical transform
+
+ vadd.s16 q8, q0, q4 @ Q8 = y0
+ vld1.32 d28, [r1], r3 @ Q12 = 0x070605....0x070605....
+ vsub.s16 q9, q0, q4 @ Q9 = y2
+
+ vsra.s16 q2, q6, #0x1 @ Q2 = y6
+ vsub.s16 q6, q10, q6 @ Q6 = y4
+
+ vaddl.s16 q12, d14, d2 @
+ vld1.32 d29, [r1], r3 @ Q12 = 0x070605....0x070605....
+ vaddl.s16 q13, d15, d3 @
+
+ vsubl.s16 q10, d14, d2 @
+ vld1.32 d30, [r1], r3 @ Q12 = 0x070605....0x070605....
+ vsubl.s16 q11, d15, d3 @
+
+ vadd.s16 q0, q8, q2 @ Q0 = z0
+ vld1.32 d31, [r1], r3 @ Q12 = 0x070605....0x070605....
+ vsub.s16 q4, q8, q2 @ Q4 = z6
+
+ vadd.s16 q8, q9, q6 @ Q8 = z2
+ vsub.s16 q2, q9, q6 @ Q2 = z4
+
+ vsubw.s16 q12, q12, d6 @
+ vsubw.s16 q13, q13, d7 @
+
+ vshr.s16 q6, q3, #0x1 @
+
+ vaddw.s16 q10, q10, d10 @
+ vaddw.s16 q11, q11, d11 @
+
+ vshr.s16 q9, q5, #0x1 @
+
+ vsubw.s16 q12, q12, d12 @
+ vsubw.s16 q13, q13, d13 @
+
+ vaddw.s16 q10, q10, d18 @
+ vaddw.s16 q11, q11, d19 @
+
+ vqmovn.s32 d12, q12 @
+ vaddl.s16 q12, d10, d6 @
+ vqmovn.s32 d13, q13 @ Q6 = y3
+ vaddl.s16 q13, d11, d7 @
+ vqmovn.s32 d18, q10 @
+ vsubl.s16 q10, d10, d6 @
+ vqmovn.s32 d19, q11 @ Q9 = y5
+ vsubl.s16 q11, d11, d7 @
+
+ vshr.s16 q3, q6, #0x2 @
+
+ vsra.s16 q6, q9, #0x2 @ Q6 = z3
+
+ vaddw.s16 q12, q12, d2 @
+ vaddw.s16 q13, q13, d3 @
+
+ vshr.s16 q1, #0x1 @
+
+ vsub.s16 q5, q3, q9 @ Q5 = z5
+
+ vsubw.s16 q10, q10, d14 @
+ vsubw.s16 q11, q11, d15 @
+
+ vshr.s16 q7, #0x1 @
+
+ vaddw.s16 q12, q12, d2 @
+ vaddw.s16 q13, q13, d3 @
+
+ vsubw.s16 q10, q10, d14 @
+ vsubw.s16 q11, q11, d15 @
+
+ vqmovn.s32 d14, q12 @
+ vadd.s16 q1, q8, q5 @ Q1 = x1
+ vqmovn.s32 d15, q13 @ Q7 = y7
+ vsub.s16 q3, q8, q5 @ Q3 = x6
+ vqmovn.s32 d18, q10 @
+ vsub.s16 q5, q2, q6 @ Q5 = x5
+ vqmovn.s32 d19, q11 @ Q9 = y1
+ vadd.s16 q2, q2, q6 @ Q2 = x2
+
+ vshr.s16 q12, q9, #0x2 @
+ vsra.s16 q9, q7, #0x2 @ Q9 = z1
+
+ vsub.s16 q11, q7, q12 @ Q11 = z7
+
+ vadd.s16 q6, q4, q9 @ Q6 = x3
+ vsub.s16 q4, q4, q9 @ Q4 = x4
+
+ vsub.s16 q7, q0, q11 @ Q7 = x7
+ vadd.s16 q0, q0, q11 @ Q0 = x0
+
+ vswp.s16 q3, q6 @ Q3 <-> Q6
+
+ vrshr.s16 q1, q1, #6 @
+ vld1.32 d16, [r1], r3 @ Q12 = 0x070605....0x070605....
+ vrshr.s16 q2, q2, #6 @
+ vrshr.s16 q4, q4, #6 @
+ vld1.32 d17, [r1], r3 @ Q12 = 0x070605....0x070605....
+ vrshr.s16 q5, q5, #6 @
+ vrshr.s16 q7, q7, #6 @
+ vld1.32 d18, [r1], r3 @ Q12 = 0x070605....0x070605....
+ vrshr.s16 q0, q0, #6 @
+ vrshr.s16 q3, q3, #6 @
+ vld1.32 d19, [r1], r3 @ Q12 = 0x070605....0x070605....
+ vrshr.s16 q6, q6, #6 @
+
+@ Code Added to pack sign and magnitudes
+
+ vaddw.u8 q0, q0, d28
+ vaddw.u8 q1, q1, d29
+ vaddw.u8 q2, q2, d30
+ vaddw.u8 q3, q3, d31
+ vqmovun.s16 d0, q0
+ vaddw.u8 q4, q4, d16
+ vqmovun.s16 d1, q1
+ vaddw.u8 q5, q5, d17
+ vqmovun.s16 d2, q2
+ vaddw.u8 q6, q6, d18
+ vqmovun.s16 d3, q3
+ vaddw.u8 q7, q7, d19
+
+ vqmovun.s16 d4, q4
+ vst1.32 d0, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs
+ vqmovun.s16 d5, q5
+ vst1.32 d1, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs
+ vqmovun.s16 d6, q6
+ vst1.32 d2, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs
+ vqmovun.s16 d7, q7
+ vst1.32 d3, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs
+ vst1.32 d4, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs
+
+ vst1.32 d5, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs
+
+
+ vst1.32 d6, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs
+
+
+ vst1.32 d7, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs
+
+idct_8x8_end:
+
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, r15}
+
diff --git a/common/arm/ih264_iquant_itrans_recon_dc_a9.s b/common/arm/ih264_iquant_itrans_recon_dc_a9.s
new file mode 100755
index 0000000..8d71bdb
--- /dev/null
+++ b/common/arm/ih264_iquant_itrans_recon_dc_a9.s
@@ -0,0 +1,399 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@ *******************************************************************************
+@ * @file
+@ * ih264_iquant_itrans_recon_dc_a9.s
+@ *
+@ * @brief
+@ * Contains function definitions for single stage inverse transform
+@ *
+@ * @author
+@ * Mohit
+@ *
+@ * @par List of Functions:
+@ * - ih264_iquant_itrans_recon_4x4_dc_a9()
+@ * - ih264_iquant_itrans_recon_8x8_dc_a9()
+@ * - ih264_iquant_itrans_recon_chroma_4x4_dc_a9()
+@ *
+@ * @remarks
+@ * None
+@ *
+@ *******************************************************************************
+@*/
+@/**
+@ *******************************************************************************
+@ *
+@ * @brief
+@ * This function performs inverse quant and Inverse transform type Ci4 for 4*4 block
+@ * for dc input pattern only, i.e. only the (0,0) element of the input 4x4 block is
+@ * non-zero. For complete function, refer ih264_iquant_itrans_recon_a9.s
+@ *
+@ * @par Description:
+@ * Performs inverse transform Ci4 and adds the residue to get the
+@ * reconstructed block
+@ *
+@ * @param[in] pi2_src
+@ * Input 4x4 coefficients
+@ *
+@ * @param[in] pu1_pred
+@ * Prediction 4x4 block
+@ *
+@ * @param[out] pu1_out
+@ * Output 4x4 block
+@ *
+@ * @param[in] u4_qp_div_6
+@ * QP
+@ *
+@ * @param[in] pu2_weigh_mat
+@ * Pointer to weight matrix
+@ *
+@ * @param[in] pred_strd,
+@ * Prediction stride
+@ *
+@ * @param[in] out_strd
+@ * Output Stride
+@ *
+@ *@param[in] pi2_tmp
+@ * temporary buffer of size 1*16
+@ *
+@ * @param[in] pu2_iscal_mat
+@ * Pointer to the inverse quantization matrix
+@ *
+@ * @returns Void
+@ *
+@ * @remarks
+@ * None
+@ *
+@ *******************************************************************************
+@ */
+@void ih264_iquant_itrans_recon_4x4_dc(WORD16 *pi2_src,
+@ UWORD8 *pu1_pred,
+@ UWORD8 *pu1_out,
+@ WORD32 pred_strd,
+@ WORD32 out_strd,
+@ const UWORD16 *pu2_iscal_mat,
+@ const UWORD16 *pu2_weigh_mat,
+@ UWORD32 u4_qp_div_6,
+@ WORD32 *pi4_tmp,
+@ WORD32 iq_start_idx
+@ WORD16 *pi2_dc_ld_addr)
+@**************Variables Vs Registers*****************************************
+@r0 => *pi2_src
+@r1 => *pu1_pred
+@r2 => *pu1_out
+@r3 => pred_strd
+@r4 => out_strd
+@r5 => *pu2_iscal_mat
+@r6 => *pu2_weigh_mat
+@r7 => u4_qp_div_6
+@r9 => iq_start_idx
+@unused => pi2_dc_ld_addr
+
+.text
+.p2align 2
+
+ .global ih264_iquant_itrans_recon_4x4_dc_a9
+
+ih264_iquant_itrans_recon_4x4_dc_a9:
+
+@Only one shift is done in horizontal inverse because,
+@if u4_qp_div_6 is lesser than 4 then shift value will be neagative and do negative left shift, in this case rnd_factor has value
+@if u4_qp_div_6 is greater than 4 then shift value will be positive and do left shift, here rnd_factor is 0
+
+ stmfd sp!, {r4-r10, r14} @stack stores the values of the arguments
+ ldr r5, [sp, #36] @Loads *pu2_iscal_mat
+ ldr r6, [sp, #40] @Loads *pu2_weigh_mat
+ ldrsh r8, [r0] @load pi2_src[0], SH for signed halfword load
+ ldrh r6, [r6] @load pu2_weight_mat[0] , H for unsigned halfword load
+ ldrh r5, [r5] @load pu2_iscal_mat[0] , H for unsigned halfword load
+@=======================DEQUANT FROM HERE===================================
+ mul r6, r6, r5 @pu2_iscal_mat[0]*pu2_weigh_mat[0]
+ ldr r7, [sp, #44] @Loads u4_qp_div_6
+ mul r6, r6, r8 @pi2_src[0]*pu2_iscal_mat[0]*pu2_weigh_mat[0]
+ ldr r4, [sp, #32] @Loads out_strd
+ ldr r9, [sp, #52] @Loads iq_start_idx
+
+ lsl r6, r6, r7 @(pi2_src[0]*pu2_iscal_mat[0]*pu2_weigh_mat[0])<<u4_qp_div_6
+ add r6, r6, #8 @(pi2_src[0]*pu2_iscal_mat[0]*pu2_weigh_mat[0])<<u4_qp_div_6 + rnd_fact
+ asr r6, r6, #4 @q0 = (pi2_src[0]*pu2_iscal_mat[0]*pu2_weigh_mat[0] + rnd_fact)<<(u4_qp_div_6-4)
+
+ subs r9, r9, #1 @ if r8 == 1 => intra case , so result of subtraction is zero and Z flag is set
+ ldreqsh r10, [r0] @ Loads signed halfword pi2_src[0], if r9==1
+ moveq r6, r10 @ Restore dc value in case of intra, i.e. r9 == 1
+
+ add r6, r6, #32 @i_macro = q0 + 32
+ asr r6, r6, #6 @i_macro >>6 = DC output of 2-stage transform
+ vdup.s16 q0, r6 @copy transform output to Q0
+
+ vld1.32 d30[0], [r1], r3 @I row Load pu1_pred buffer
+
+ vld1.32 d30[1], [r1], r3 @II row Load pu1_pred buffer
+
+ vld1.32 d31[0], [r1], r3 @III row Load pu1_pred buf
+
+ vld1.32 d31[1], [r1], r3 @IV row Load pu1_pred buffer
+ vaddw.u8 q10, q0, d30
+
+ vaddw.u8 q11, q0, d31
+
+ vqmovun.s16 d0, q10
+
+ vst1.32 d0[0], [r2], r4 @I row store the value
+ vqmovun.s16 d1, q11
+ vst1.32 d0[1], [r2], r4 @II row store the value
+ vst1.32 d1[0], [r2], r4 @III row store the value
+ vst1.32 d1[1], [r2] @IV row store the value
+
+ ldmfd sp!, {r4-r10, r15} @Reload the registers from SP
+
+
+
+
+@/*
+@ *******************************************************************************
+@ *
+@ * @brief
+@ * This function performs inverse quant and Inverse transform type Ci4 for 8*8 block
+@ * for dc input pattern only, i.e. only the (0,0) element of the input 8x8 block is
+@ * non-zero. For complete function, refer ih264_iquant_itrans_recon_a9.s
+@ *
+@ * @par Description:
+@ * Performs inverse transform Ci8 and adds the residue to get the
+@ * reconstructed block
+@ *
+@ * @param[in] pi2_src
+@ * Input 4x4 coefficients
+@ *
+@ * @param[in] pu1_pred
+@ * Prediction 4x4 block
+@ *
+@ * @param[out] pu1_out
+@ * Output 4x4 block
+@ *
+@ * @param[in] u4_qp_div_6
+@ * QP
+@ *
+@ * @param[in] pu2_weigh_mat
+@ * Pointer to weight matrix
+@ *
+@ * @param[in] pred_strd,
+@ * Prediction stride
+@ *
+@ * @param[in] out_strd
+@ * Output Stride
+@ *
+@ *@param[in] pi2_tmp
+@ * temporary buffer of size 1*64
+@ *
+@ * @param[in] pu2_iscal_mat
+@ * Pointer to the inverse quantization matrix
+@ *
+@ * @returns Void
+@ *
+@ * @remarks
+@ * None
+@ *
+@ *******************************************************************************
+@ */
+@void ih264_iquant_itrans_recon_8x8_dc(WORD16 *pi2_src,
+@ UWORD8 *pu1_pred,
+@ UWORD8 *pu1_out,
+@ WORD32 pred_strd,
+@ WORD32 out_strd,
+@ const UWORD16 *pu2_iscal_mat,
+@ const UWORD16 *pu2_weigh_mat,
+@ UWORD32 u4_qp_div_6,
+@ WORD32 *pi4_tmp,
+@ WORD32 iq_start_idx)
+@**************Variables Vs Registers*****************************************
+@r0 => *pi2_src
+@r1 => *pu1_pred
+@r2 => *pu1_out
+@r3 => pred_strd
+@r4 => out_strd
+@r5 => *pu2_iscal_mat
+@r6 => *pu2_weigh_mat
+@r7 => u4_qp_div_6
+
+
+ .global ih264_iquant_itrans_recon_8x8_dc_a9
+ih264_iquant_itrans_recon_8x8_dc_a9:
+
+ stmfd sp!, {r4-r8, r14} @stack stores the values of the arguments
+ ldr r5, [sp, #28] @Loads *pu2_iscal_mat
+ ldr r6, [sp, #32] @Loads *pu2_weigh_mat
+ ldrsh r8, [r0] @load pi2_src[0], SH for signed halfword load
+ ldrh r6, [r6] @load pu2_weight_mat[0] , H for unsigned halfword load
+ ldrh r5, [r5] @load pu2_iscal_mat[0] , H for unsigned halfword load
+@=======================DEQUANT FROM HERE===================================
+ mul r6, r6, r5 @pu2_iscal_mat[0]*pu2_weigh_mat[0]
+ ldr r7, [sp, #36] @Loads u4_qp_div_6
+ mul r6, r6, r8 @pi2_src[0]*pu2_iscal_mat[0]*pu2_weigh_mat[0]
+ ldr r4, [sp, #24] @Loads out_strd
+
+ vpush {d8-d15}
+ lsl r6, r6, r7 @(pi2_src[0]*pu2_iscal_mat[0]*pu2_weigh_mat[0])<<u4_qp_div_6
+ add r6, r6, #32 @(pi2_src[0]*pu2_iscal_mat[0]*pu2_weigh_mat[0])<<u4_qp_div_6 + rnd_fact
+ asr r6, r6, #6 @q0 = (pi2_src[0]*pu2_iscal_mat[0]*pu2_weigh_mat[0] + rnd_fact)<<(u4_qp_div_6-4)
+ add r6, r6, #32 @i_macro = q0 + 32
+ asr r6, r6, #6 @i_macro >>6 = DC output of 2-stage transform
+ vdup.s16 q8, r6 @copy transform output to Q0
+
+ vld1.32 d24, [r1], r3 @ Q12 = 0x070605....0x070605....
+
+ vld1.32 d25, [r1], r3 @ Q12 = 0x070605....0x070605....
+
+ vld1.32 d26, [r1], r3 @ Q12 = 0x070605....0x070605....
+ vaddw.u8 q0, q8, d24
+ vld1.32 d27, [r1], r3 @ Q12 = 0x070605....0x070605....
+ vaddw.u8 q1, q8, d25
+ vld1.32 d28, [r1], r3 @ Q12 = 0x070605....0x070605....
+ vaddw.u8 q2, q8, d26
+ vld1.32 d29, [r1], r3 @ Q12 = 0x070605....0x070605....
+ vaddw.u8 q3, q8, d27
+ vld1.32 d30, [r1], r3 @ Q12 = 0x070605....0x070605....
+ vaddw.u8 q4, q8, d28
+ vld1.32 d31, [r1], r3 @ Q12 = 0x070605....0x070605....
+
+@ Code Added to pack sign and magnitudes
+
+
+ vqmovun.s16 d0, q0
+ vaddw.u8 q5, q8, d29
+ vqmovun.s16 d1, q1
+ vaddw.u8 q6, q8, d30
+ vqmovun.s16 d2, q2
+ vqmovun.s16 d3, q3
+ vaddw.u8 q7, q8, d31
+ vqmovun.s16 d4, q4
+ vqmovun.s16 d5, q5
+ vst1.32 d0, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs
+ vqmovun.s16 d6, q6
+ vst1.32 d1, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs
+ vqmovun.s16 d7, q7
+ vst1.32 d2, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs
+ vst1.32 d3, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs
+ vst1.32 d4, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs
+ vst1.32 d5, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs
+ vst1.32 d6, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs
+ vst1.32 d7, [r2], r4 @ Magnitudes of 1st 4x4 block coeffs
+
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r8, r15}
+
+
+@ /*
+@ ********************************************************************************
+@ *
+@ * @brief This function reconstructs a 4x4 sub block from quantized resiude and
+@ * prediction buffer if only dc value is present for residue
+@ *
+@ * @par Description:
+@ * The quantized residue is first inverse quantized,
+@ * This inverse quantized content is added to the prediction buffer to recon-
+@ * struct the end output
+@ *
+@ * @param[in] pi2_src
+@ * quantized dc coeffiient
+@ *
+@ * @param[in] pu1_pred
+@ * prediction 4x4 block in interleaved format
+@ *
+@ * @param[in] pred_strd,
+@ * Prediction buffer stride in interleaved format
+@ *
+@ * @param[in] out_strd
+@ * recon buffer Stride
+@ *
+@ * @returns none
+@ *
+@ * @remarks none
+@ *
+@ *******************************************************************************
+@ */
+@ void ih264_iquant_itrans_recon_chroma_4x4_dc(WORD16 *pi2_src,
+@ UWORD8 *pu1_pred,
+@ UWORD8 *pu1_out,
+@ WORD32 pred_strd,
+@ WORD32 out_strd,
+@ const UWORD16 *pu2_iscal_mat,
+@ const UWORD16 *pu2_weigh_mat,
+@ UWORD32 u4_qp_div_6,
+@ WORD16 *pi2_tmp,
+@ WORD16 *pi2_dc_src)
+@ Register Usage
+@ r0 : pi2_src
+@ r1 : pu1_pred
+@ r2 : pu1_out
+@ r3 : pred_strd
+@ Neon registers d0-d7, d16-d30 are used
+@ No need for pushing arm and neon registers
+ .global ih264_iquant_itrans_recon_chroma_4x4_dc_a9
+ih264_iquant_itrans_recon_chroma_4x4_dc_a9:
+
+ ldr r0, [sp, #20]
+ vld1.s16 d0, [r0] @load pi2_dc_src
+
+ ldr r0, [sp] @load out_strd
+
+ vld2.s8 {d2, d3}, [r1], r3 @load pred plane 1 => d2 &pred palne 2 => d3
+ vld2.s8 {d3, d4}, [r1], r3
+ vrshr.s16 d0, d0, #6 @i_macro = ((q0 + 32) >> 6);
+ vld2.s8 {d4, d5}, [r1], r3
+ vld2.s8 {d5, d6}, [r1], r3
+
+ vdup.s16 q0, d0[0] @duplicate pi2_sr[0]
+ mov r1, r2 @backup pu1_out
+
+ vtrn.32 d2, d3 @mov the 4 coeffs of current block to d2
+ vtrn.32 d4, d5
+
+ vmov.u16 q15, #0x00ff
+
+ vld1.u8 d18, [r2], r0 @load out [8 bit size) -8 coeffs
+ vaddw.u8 q1, q0, d2 @Add pred
+ vld1.u8 d19, [r2], r0
+ vaddw.u8 q2, q0, d4
+ vld1.u8 d20, [r2], r0
+ vld1.u8 d21, [r2], r0
+
+ vqmovun.s16 d2, q1
+ vqmovun.s16 d4, q2
+
+ vmovl.u8 q1, d2
+ vmovl.u8 q2, d4
+
+ vbit.u8 q9, q1, q15
+ vbit.u8 q10, q2, q15
+
+ vst1.u8 d18, [r1], r0 @store out
+ vst1.u8 d19, [r1], r0
+ vst1.u8 d20, [r1], r0
+ vst1.u8 d21, [r1], r0
+
+ bx lr
+
+
+
+
+
+
+
diff --git a/common/arm/ih264_itrans_recon_a9.s b/common/arm/ih264_itrans_recon_a9.s
new file mode 100755
index 0000000..1d74da5
--- /dev/null
+++ b/common/arm/ih264_itrans_recon_a9.s
@@ -0,0 +1,216 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@ *******************************************************************************
+@ * @file
+@ * ih264_itrans_recon_neon_a9.s
+@ *
+@ * @brief
+@ * Contains function definitions for single stage inverse transform
+@ *
+@ *
+@ * @par List of Functions:
+@ * - ih264_itrans_recon_4x4_a9()
+@ *
+@ * @remarks
+@ * None
+@ *
+@ *******************************************************************************
+@*/
+@/**
+@ *******************************************************************************
+@ *
+@ * @brief
+@ * This function performs Inverse transform type Ci4 for 4*4 block
+@ *
+@ * @par Description:
+@ * Performs inverse transform Ci4 and adds the residue to get the
+@ * reconstructed block
+@ *
+@ * @param[in] pi16_levelBlock
+@ * Input 4x4 coefficients
+@ *
+@ * @param[in] puc_predBuffer
+@ * Prediction 4x4 block
+@ *
+@ * @param[out] puc_reconPic
+@ * Output 4x4 block
+@ *
+@ * @param[in] ui16_picWidth
+@ * Input stride
+@ *
+@ * @param[in] pred_strd
+@ * Prediction stride
+@ *
+@ * @param[in] dst_strd
+@ * Output Stride
+@ *
+@ * @param[in] zero_cols
+@ * Zero columns in pi2_src
+@ *
+@ * @returns Void
+@ *
+@ * @remarks
+@ * None
+@ *
+@ *
+@ *******************************************************************************
+@ */
+@void ih264_itrans_recon_4x4(
+@ WORD16 *pi2_src,
+@ UWORD8 *pu1_pred,
+@ UWORD8 *pu1_recon,
+@ WORD32 src_strd,
+@ WORD32 pred_strd,
+@ WORD32 dst_strd,
+@ UWORD32 q_lev, //quantizer level
+@ WORD32 *pi4_tmp)
+@**************Variables Vs Registers*****************************************
+@r0 => *pi2_src
+@r1 => *pu1_pred
+@r2 => *pu1_recon
+@r3 => src_strd
+@r4 => pred_strd
+@r5 => dst_strd
+@r6 => q_lev
+@r7 => *pi4_tmp
+
+.text
+.p2align 2
+
+
+ .global ih264_itrans_recon_4x4_a9
+
+ih264_itrans_recon_4x4_a9:
+ stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments
+ lsl r3, r3, #1
+
+ vld1.16 d0, [r0], r3 @0th row pi2_src_tmp[0]
+ ldr r4, [sp, #40] @Loads pred_strd
+
+ vld1.16 d1, [r0], r3 @I row pi2_src_tmp[0]
+ ldr r5, [sp, #44] @Loads *dst_strd
+
+ vld1.16 d2, [r0], r3 @II row pi2_src_tmp[0]
+
+ vld1.16 d3, [r0] @III row pi2_src_tmp[0]
+ ldr r7, [sp, #52] @Loads *pi4_tmp
+
+ vpush {d8-d15}
+
+ vtrn.16 d0, d1 @Transpose to get all the 0th element in the single D register
+ vtrn.16 d2, d3
+ vtrn.32 d0, d2
+ vtrn.32 d1, d3 @D0 --> pi2_src_tmp[0], D1 --> pi2_src_tmp[1]
+ @D2 --> pi2_src_tmp[2], D3 --> pi2_src_tmp[3]
+
+ vaddl.s16 q3, d0, d2 @x0 = (pi2_src_tmp[0] + pi2_src_tmp[2])
+ vsubl.s16 q4, d0, d2 @x1 = (pi2_src_tmp[0] - pi2_src_tmp[2])
+ vshr.s16 d4, d1, #1 @pi2_src_tmp[1] >> 1
+ vshr.s16 d5, d3, #1 @pi2_src_tmp[3] >> 1
+
+ vsubl.s16 q5, d4, d3 @x2 = D_SHIFT(pi2_src_tmp[1],1,shft) - pi2_src_tmp[3]
+
+ vaddl.s16 q6, d1, d5 @x3 = pi2_src_tmp[1] + D_SHIFT(pi2_src_tmp[3],1,shft)
+
+ vadd.s32 q8, q4, q5 @x1 + x2
+ vsub.s32 q9, q4, q5 @x1 - x2
+
+ vadd.s32 q7, q3, q6 @x0 + x3
+ vsub.s32 q10, q3, q6 @x0 - x3
+
+ vtrn.32 q7, q8 @Transpose the register to have the adjacent values
+
+ vtrn.32 q9, q10
+ vadd.s32 d6, d14, d15 @x0(0,1) = (pi4_tblk[0,1] + pi4_tblk[8,9])
+
+ vsub.s32 d7, d14, d15 @x1(0,1) = (pi4_tblk[0,1] - pi4_tblk[8,9])
+
+ vshr.s32 d4, d16, #1 @pi4_tblk[4,5] >> 1
+ vshr.s32 d5, d17, #1 @pi4_tblk[12,13] >> 1
+
+ vsub.s32 d8, d4, d17 @x2(0,1) = D_SHIFT(pi4_tblk[4,5],1,shft) - pi4_tblk[12,13]
+ vadd.s32 d9, d16, d5 @x3(0,1) = pi4_tblk[4,5] + D_SHIFT(pi4_tblk[12,13],1,shft)
+
+ vadd.s32 d10, d18, d19 @x0(2,3) = (pi4_tblk[2,3] + pi4_tblk[10,11])
+ vsub.s32 d11, d18, d19 @x1(2,3) = (pi4_tblk[2,3] - pi4_tblk[10,11])
+ vshr.s32 d4, d20, #1 @pi4_tblk[6,7] >> 1
+ vshr.s32 d5, d21, #1 @pi4_tblk[14,15] >> 1
+
+ vld1.32 d30[0], [r1], r4 @I row Load pu1_pred buffer
+ vsub.s32 d12, d4, d21 @x2(2,3) = D_SHIFT(pi4_tblk[6,7],1,shft) - pi4_tblk[14,15]
+
+ vmovl.u8 q15, d30 @I row Convert 8 bit pred buffer to 16 bit
+ vadd.s32 d13, d20, d5 @x3(2,3) = pi4_tblk[6,7] + D_SHIFT(pi4_tblk[14,15],1,shft)
+
+ vadd.s32 d16, d6, d9 @I row i_macro(0,1) = x0(0,1) + x3(0,1)
+
+ vld1.32 d28[0], [r1], r4 @II row Load pu1_pred buffer
+ vadd.s32 d17, d10, d13 @I row i_macro(2,3) = x0(2,3) + x3(2,3)
+
+ vqrshrn.s32 d16, q8, #6 @I row i_macro = D_SHIFT(i_macro,6,shft)
+
+ vmovl.u8 q14, d28 @II row Convert 8 bit pred buffer to 16 bit
+ vadd.u16 d16, d16, d30 @I row i_macro += *pu1_pred_tmp
+
+ vqmovun.s16 d16, q8 @I row CLIP_U8(i_macro)
+ vadd.s32 d18, d7, d8 @II row i_macro(0,1) = x1(0,1) + x2(0,1)
+
+ vld1.32 d26[0], [r1], r4 @III row Load pu1_pred buffer
+ vadd.s32 d19, d11, d12 @II row i_macro(2,3) = x1(2,3) + x2(2,3)
+
+ vqrshrn.s32 d18, q9, #6 @II row i_macro = D_SHIFT(i_macro,6,shft)
+
+ vmovl.u8 q13, d26 @III row Convert 8 bit pred buffer to 16 bit
+ vadd.u16 d18, d18, d28 @II row i_macro += *pu1_pred_tmp
+
+ vst1.32 d16[0], [r2], r5 @I row store the value
+ vsub.s32 d20, d7, d8 @III row i_macro(0,1) = x1(0,1) - x2(0,1)
+
+ vqmovun.s16 d18, q9 @II row CLIP_U8(i_macro)
+ vsub.s32 d21, d11, d12 @III row i_macro(2,3) = x1(2,3) - x2(2,3)
+
+ vld1.32 d24[0], [r1], r4 @IV row Load pu1_pred buffer
+ vqrshrn.s32 d20, q10, #6 @III row i_macro = D_SHIFT(i_macro,6,shft)
+
+ vmovl.u8 q12, d24 @IV row Convert 8 bit pred buffer to 16 bit
+ vadd.u16 d20, d20, d26 @III row i_macro += *pu1_pred_tmp
+
+ vqmovun.s16 d20, q10 @III row CLIP_U8(i_macro)
+ vsub.s32 d22, d6, d9 @IV row i_macro(0,1) = x0(0,1) - x3(0,1)
+
+ vst1.32 d18[0], [r2], r5 @II row store the value
+ vsub.s32 d23, d10, d13 @IV row i_macro(2,3) = x0(2,3) - x3(2,3)
+
+ vqrshrn.s32 d22, q11, #6 @IV row i_macro = D_SHIFT(i_macro,6,shft)
+
+ vst1.32 d20[0], [r2], r5 @III row store the value
+ vadd.u16 d22, d22, d24 @IV row i_macro += *pu1_pred_tmp
+
+ vqmovun.s16 d22, q11 @IV row CLIP_U8(i_macro)
+ vst1.32 d22[0], [r2], r5 @IV row store the value
+
+
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, r15} @Reload the registers from SP
+
+
+
+
diff --git a/common/arm/ih264_mem_fns_neon.s b/common/arm/ih264_mem_fns_neon.s
new file mode 100755
index 0000000..2808897
--- /dev/null
+++ b/common/arm/ih264_mem_fns_neon.s
@@ -0,0 +1,268 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@ *******************************************************************************
+@ * @file
+@ * ih264_mem_fns_neon.s
+@ *
+@ * @brief
+@ * Contains function definitions for memory manipulation
+@ *
+@ * @author
+@ * Naveen SR
+@ *
+@ * @par List of Functions:
+@ * - ih264_memcpy_mul_8_a9q()
+@ * - ih264_memcpy_a9q()
+@ * - ih264_memset_mul_8_a9q()
+@ * - ih264_memset_a9q()
+@ * - ih264_memset_16bit_mul_8_a9q()
+@ * - ih264_memset_a9q()
+@ *
+@ * @remarks
+@ * None
+@ *
+@ *******************************************************************************
+@*/
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* memcpy of a 1d array
+@*
+@* @par Description:
+@* Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes
+@*
+@* @param[in] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[in] num_bytes
+@* number of bytes to copy
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@void ih264_memcpy_mul_8(UWORD8 *pu1_dst,
+@ UWORD8 *pu1_src,
+@ UWORD8 num_bytes)
+@**************Variables Vs Registers*************************
+@ r0 => *pu1_dst
+@ r1 => *pu1_src
+@ r2 => num_bytes
+
+.text
+.p2align 2
+
+
+ .global ih264_memcpy_mul_8_a9q
+
+ih264_memcpy_mul_8_a9q:
+
+loop_neon_memcpy_mul_8:
+ @ Memcpy 8 bytes
+ vld1.8 d0, [r1]!
+ vst1.8 d0, [r0]!
+
+ subs r2, r2, #8
+ bne loop_neon_memcpy_mul_8
+ bx lr
+
+
+
+@*******************************************************************************
+@*/
+@void ih264_memcpy(UWORD8 *pu1_dst,
+@ UWORD8 *pu1_src,
+@ UWORD8 num_bytes)
+@**************Variables Vs Registers*************************
+@ r0 => *pu1_dst
+@ r1 => *pu1_src
+@ r2 => num_bytes
+
+
+
+ .global ih264_memcpy_a9q
+
+ih264_memcpy_a9q:
+ subs r2, #8
+ blt memcpy
+loop_neon_memcpy:
+ @ Memcpy 8 bytes
+ vld1.8 d0, [r1]!
+ vst1.8 d0, [r0]!
+
+ subs r2, #8
+ bge loop_neon_memcpy
+ cmp r2, #-8
+ bxeq lr
+
+memcpy:
+ add r2, #8
+
+loop_memcpy:
+ ldrb r3, [r1], #1
+ strb r3, [r0], #1
+ subs r2, #1
+ bne loop_memcpy
+ bx lr
+
+
+
+
+@void ih264_memset_mul_8(UWORD8 *pu1_dst,
+@ UWORD8 value,
+@ UWORD8 num_bytes)
+@**************Variables Vs Registers*************************
+@ r0 => *pu1_dst
+@ r1 => value
+@ r2 => num_bytes
+
+
+
+ .global ih264_memset_mul_8_a9q
+
+ih264_memset_mul_8_a9q:
+
+@ Assumptions: numbytes is either 8, 16 or 32
+ vdup.8 d0, r1
+loop_memset_mul_8:
+ @ Memset 8 bytes
+ vst1.8 d0, [r0]!
+
+ subs r2, r2, #8
+ bne loop_memset_mul_8
+
+ bx lr
+
+
+
+
+@void ih264_memset(UWORD8 *pu1_dst,
+@ UWORD8 value,
+@ UWORD8 num_bytes)
+@**************Variables Vs Registers*************************
+@ r0 => *pu1_dst
+@ r1 => value
+@ r2 => num_bytes
+
+
+
+ .global ih264_memset_a9q
+
+ih264_memset_a9q:
+ subs r2, #8
+ blt memset
+ vdup.8 d0, r1
+loop_neon_memset:
+ @ Memcpy 8 bytes
+ vst1.8 d0, [r0]!
+
+ subs r2, #8
+ bge loop_neon_memset
+ cmp r2, #-8
+ bxeq lr
+
+memset:
+ add r2, #8
+
+loop_memset:
+ strb r1, [r0], #1
+ subs r2, #1
+ bne loop_memset
+ bx lr
+
+
+
+
+@void ih264_memset_16bit_mul_8(UWORD16 *pu2_dst,
+@ UWORD16 value,
+@ UWORD8 num_words)
+@**************Variables Vs Registers*************************
+@ r0 => *pu2_dst
+@ r1 => value
+@ r2 => num_words
+
+
+
+ .global ih264_memset_16bit_mul_8_a9q
+
+ih264_memset_16bit_mul_8_a9q:
+
+@ Assumptions: num_words is either 8, 16 or 32
+
+ @ Memset 8 words
+ vdup.16 d0, r1
+loop_memset_16bit_mul_8:
+ vst1.16 d0, [r0]!
+ vst1.16 d0, [r0]!
+
+ subs r2, r2, #8
+ bne loop_memset_16bit_mul_8
+
+ bx lr
+
+
+
+
+@void ih264_memset_16bit(UWORD16 *pu2_dst,
+@ UWORD16 value,
+@ UWORD8 num_words)
+@**************Variables Vs Registers*************************
+@ r0 => *pu2_dst
+@ r1 => value
+@ r2 => num_words
+
+
+
+ .global ih264_memset_16bit_a9q
+
+ih264_memset_16bit_a9q:
+ subs r2, #8
+ blt memset_16bit
+ vdup.16 d0, r1
+loop_neon_memset_16bit:
+ @ Memset 8 words
+ vst1.16 d0, [r0]!
+ vst1.16 d0, [r0]!
+
+ subs r2, #8
+ bge loop_neon_memset_16bit
+ cmp r2, #-8
+ bxeq lr
+
+memset_16bit:
+ add r2, #8
+
+loop_memset_16bit:
+ strh r1, [r0], #2
+ subs r2, #1
+ bne loop_memset_16bit
+ bx lr
+
+
+
+
diff --git a/common/arm/ih264_padding_neon.s b/common/arm/ih264_padding_neon.s
new file mode 100755
index 0000000..9bab268
--- /dev/null
+++ b/common/arm/ih264_padding_neon.s
@@ -0,0 +1,646 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@ *******************************************************************************
+@ * @file
+@ * ih264_padding_neon.s
+@ *
+@ * @brief
+@ * Contains function definitions padding
+@ *
+@ * @author
+@ * Ittiam
+@ *
+@ * @par List of Functions:
+@ * - ih264_pad_top_a9q()
+@ * - ih264_pad_left_luma_a9q()
+@ * - ih264_pad_left_chroma_a9q()
+@ * - ih264_pad_right_luma_a9q()
+@ * - ih264_pad_right_chroma_a9q()
+@ *
+@ * @remarks
+@ * None
+@ *
+@ *******************************************************************************
+@*/
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief pad at the top of a 2d array
+@*
+@* @par Description:
+@* The top row of a 2d array is replicated for pad_size times at the top
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @param[in] pad_size
+@* integer -padding size of the array
+@*
+@* @returns none
+@*
+@* @remarks none
+@*
+@*******************************************************************************
+@*/
+@void ih264_pad_top(UWORD8 *pu1_src,
+@ WORD32 src_strd,
+@ WORD32 wd,
+@ WORD32 pad_size)
+@**************Variables Vs Registers*************************
+@ r0 => *pu1_src
+@ r1 => src_strd
+@ r2 => wd
+@ r3 => pad_size
+
+.text
+.p2align 2
+
+ .global ih264_pad_top_a9q
+
+ih264_pad_top_a9q:
+
+ stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments
+
+ sub r5, r0, r1
+ rsb r6, r1, #0
+
+loop_neon_memcpy_mul_16:
+ @ Load 16 bytes
+ vld1.8 {d0, d1}, [r0]!
+ mov r4, r5
+ mov r7, r3
+ add r5, r5, #16
+
+loop_neon_pad_top:
+ vst1.8 {d0, d1}, [r4], r6
+ subs r7, r7, #1
+ bne loop_neon_pad_top
+
+ subs r2, r2, #16
+ bne loop_neon_memcpy_mul_16
+
+ ldmfd sp!, {r4-r11, pc} @Reload the registers from SP
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Padding (luma block) at the left of a 2d array
+@*
+@* @par Description:
+@* The left column of a 2d array is replicated for pad_size times at the left
+@*
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @param[in] pad_size
+@* integer -padding size of the array
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@#if PAD_LEFT_LUMA == C
+@void ih264_pad_left_luma(UWORD8 *pu1_src,
+@ WORD32 src_strd,
+@ WORD32 ht,
+@ WORD32 pad_size)
+@**************Variables Vs Registers*************************
+@ r0 => *pu1_src
+@ r1 => src_strd
+@ r2 => ht
+@ r3 => pad_size
+
+
+ .global ih264_pad_left_luma_a9q
+
+ih264_pad_left_luma_a9q:
+
+ stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments
+
+
+ sub r4, r0, r3
+ sub r6, r1, #16
+ subs r5, r3, #16
+ bne loop_32
+loop_16: @ /*hard coded for width=16 ,height =8,16*/
+ ldrb r8, [r0], r1
+ ldrb r9, [r0], r1
+ vdup.u8 q0, r8
+ ldrb r10, [r0], r1
+ vst1.8 {q0}, [r4], r1 @ 16 bytes store
+ vdup.u8 q1, r9
+ vst1.8 {q1}, [r4], r1 @ 16 bytes store
+ ldrb r11, [r0], r1
+ vdup.u8 q2, r10
+ vdup.u8 q3, r11
+ vst1.8 {q2}, [r4], r1 @ 16 bytes store
+ ldrb r8, [r0], r1
+ vst1.8 {q3}, [r4], r1 @ 16 bytes store
+ ldrb r9, [r0], r1
+ vdup.u8 q0, r8
+ ldrb r10, [r0], r1
+ vst1.8 {q0}, [r4], r1 @ 16 bytes store
+ vdup.u8 q1, r9
+ ldrb r11, [r0], r1
+ vst1.8 {q1}, [r4], r1 @ 16 bytes store
+ vdup.u8 q2, r10
+ vdup.u8 q3, r11
+ subs r2, r2, #8
+ vst1.8 {q2}, [r4], r1 @ 16 bytes store
+ vst1.8 {q3}, [r4], r1 @ 16 bytes store
+ bne loop_16
+ b end_func
+
+loop_32: @ /*hard coded for width=32 ,height =8,16*/
+ ldrb r8, [r0], r1
+ ldrb r9, [r0], r1
+ vdup.u8 q0, r8
+ ldrb r10, [r0], r1
+ vst1.8 {q0}, [r4]! @ 16 bytes store
+ vdup.u8 q1, r9
+ vst1.8 {q0}, [r4], r6
+ vst1.8 {q1}, [r4]! @ 16 bytes store
+ vdup.u8 q2, r10
+ vst1.8 {q1}, [r4], r6 @ 16 bytes store
+ ldrb r11, [r0], r1
+ vst1.8 {q2}, [r4]! @ 16 bytes store
+ vdup.u8 q3, r11
+ vst1.8 {q2}, [r4], r6 @ 16 bytes store
+ ldrb r8, [r0], r1
+ vst1.8 {q3}, [r4]! @ 16 bytes store
+ vdup.u8 q0, r8
+ ldrb r9, [r0], r1
+ vst1.8 {q3}, [r4], r6 @ 16 bytes store
+ ldrb r10, [r0], r1
+ vst1.8 {q0}, [r4]! @ 16 bytes store
+ vdup.u8 q1, r9
+ vst1.8 {q0}, [r4], r6 @ 16 bytes store
+ ldrb r11, [r0], r1
+ vst1.8 {q1}, [r4]! @ 16 bytes store
+ vdup.u8 q2, r10
+ vst1.8 {q1}, [r4], r6 @ 16 bytes store
+ vst1.8 {q2}, [r4]! @ 16 bytes store
+ vdup.u8 q3, r11
+ vst1.8 {q2}, [r4], r6 @ 16 bytes store
+ subs r2, r2, #8
+ vst1.8 {q3}, [r4]! @ 16 bytes store
+ vst1.8 {q3}, [r4], r6 @ 16 bytes store
+ bne loop_32
+
+
+
+end_func:
+ ldmfd sp!, {r4-r11, pc} @Reload the registers from SP
+
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Padding (chroma block) at the left of a 2d array
+@*
+@* @par Description:
+@* The left column of a 2d array is replicated for pad_size times at the left
+@*
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array (each colour component)
+@*
+@* @param[in] pad_size
+@* integer -padding size of the array
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@#if PAD_LEFT_CHROMA == C
+@void ih264_pad_left_chroma(UWORD8 *pu1_src,
+@ WORD32 src_strd,
+@ WORD32 ht,
+@ WORD32 pad_size)
+@{
+@ r0 => *pu1_src
+@ r1 => src_strd
+@ r2 => ht
+@ r3 => pad_size
+
+
+
+ .global ih264_pad_left_chroma_a9q
+
+ih264_pad_left_chroma_a9q:
+
+ stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments
+
+ sub r4, r0, r3
+ sub r6, r1, #16
+
+
+loop_32_l_c: @ /*hard coded for width=32 ,height =4,8,12*/
+ ldrh r8, [r0], r1
+ ldrh r9, [r0], r1
+ vdup.u16 q0, r8
+ ldrh r10, [r0], r1
+ vst1.8 {q0}, [r4]! @ 16 bytes store
+ vdup.u16 q1, r9
+ vst1.8 {q0}, [r4], r6 @ 16 bytes store
+ ldrh r11, [r0], r1
+ vst1.8 {q1}, [r4]! @ 16 bytes store
+ vdup.u16 q2, r10
+ vst1.8 {q1}, [r4], r6 @ 16 bytes store
+ vdup.u16 q3, r11
+ vst1.8 {q2}, [r4]! @ 16 bytes store
+ vst1.8 {q2}, [r4], r6 @ 16 bytes store
+ subs r2, r2, #4
+ vst1.8 {q3}, [r4]! @ 16 bytes store
+ vst1.8 {q3}, [r4], r6 @ 16 bytes store
+
+
+ beq end_func_l_c @/* Branching when ht=4*/
+
+ ldrh r8, [r0], r1
+ ldrh r9, [r0], r1
+ vdup.u16 q0, r8
+ ldrh r10, [r0], r1
+ vst1.8 {q0}, [r4]! @ 16 bytes store
+ vdup.u16 q1, r9
+ vst1.8 {q0}, [r4], r6
+ ldrh r11, [r0], r1
+ vst1.8 {q1}, [r4]! @ 16 bytes store
+ vdup.u16 q2, r10
+ vst1.8 {q1}, [r4], r6 @ 16 bytes store
+ vdup.u16 q3, r11
+ vst1.8 {q2}, [r4]! @ 16 bytes store
+ vst1.8 {q2}, [r4], r6 @ 16 bytes store
+ subs r2, r2, #4
+ vst1.8 {q3}, [r4]! @ 16 bytes store
+ vst1.8 {q3}, [r4], r6 @ 16 bytes store
+
+ beq end_func_l_c @/* Branching when ht=8*/
+ bne loop_32_l_c
+
+ ldrh r8, [r0], r1
+ ldrh r9, [r0], r1
+ vdup.u16 q0, r8
+ ldrh r10, [r0], r1
+ vst1.8 {q0}, [r4]! @ 16 bytes store
+ vdup.u16 q1, r9
+ vst1.8 {q0}, [r4], r6
+ ldrh r11, [r0], r1
+ vst1.8 {q1}, [r4]! @ 16 bytes store
+ vdup.u16 q2, r10
+ vst1.8 {q1}, [r4], r6 @ 16 bytes store
+ vdup.u16 q3, r11
+ vst1.8 {q2}, [r4]! @ 16 bytes store
+ vst1.8 {q2}, [r4], r6 @ 16 bytes store
+ vst1.8 {q3}, [r4]! @ 16 bytes store
+ vst1.8 {q3}, [r4], r6 @ 16 bytes store
+
+end_func_l_c:
+ ldmfd sp!, {r4-r11, pc} @Reload the registers from SP
+
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* Padding (luma block) at the right of a 2d array
+@*
+@* @par Description:
+@* The right column of a 2d array is replicated for pad_size times at the right
+@*
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @param[in] pad_size
+@* integer -padding size of the array
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@#if PAD_RIGHT_LUMA == C
+@void ih264_pad_right_luma(UWORD8 *pu1_src,
+@ WORD32 src_strd,
+@ WORD32 ht,
+@ WORD32 pad_size)
+@{
+@ WORD32 row;
+@
+@ for(row = 0; row < ht; row++)
+@ {
+@ memset(pu1_src, *(pu1_src -1), pad_size);
+@
+@ pu1_src += src_strd;
+@ }
+@}
+@
+@ r0 => *pu1_src
+@ r1 => src_strd
+@ r2 => ht
+@ r3 => pad_size
+
+
+
+ .global ih264_pad_right_luma_a9q
+
+ih264_pad_right_luma_a9q:
+
+ stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments
+
+ mov r4, r0
+ sub r6, r1, #16
+ sub r0, r0, #1
+ subs r5, r3, #16
+ bne loop_32
+loop_16_r: @ /*hard coded for width=16 ,height =8,16*/
+ ldrb r8, [r0], r1
+ ldrb r9, [r0], r1
+ vdup.u8 q0, r8
+ ldrb r10, [r0], r1
+ vst1.8 {q0}, [r4], r1 @ 16 bytes store
+ vdup.u8 q1, r9
+ vst1.8 {q1}, [r4], r1 @ 16 bytes store
+ ldrb r11, [r0], r1
+ vdup.u8 q2, r10
+ vdup.u8 q3, r11
+ vst1.8 {q2}, [r4], r1 @ 16 bytes store
+ ldrb r8, [r0], r1
+ vst1.8 {q3}, [r4], r1 @ 16 bytes store
+ ldrb r9, [r0], r1
+ vdup.u8 q0, r8
+ ldrb r10, [r0], r1
+ vst1.8 {q0}, [r4], r1 @ 16 bytes store
+ vdup.u8 q1, r9
+ ldrb r11, [r0], r1
+ vst1.8 {q1}, [r4], r1 @ 16 bytes store
+ vdup.u8 q2, r10
+ vdup.u8 q3, r11
+ subs r2, r2, #8
+ vst1.8 {q2}, [r4], r1 @ 16 bytes store
+ vst1.8 {q3}, [r4], r1 @ 16 bytes store
+ bne loop_16_r
+ b end_func_r
+
+loop_32_r: @ /*hard coded for width=32 ,height =8,16*/
+ ldrb r8, [r0], r1
+ ldrb r9, [r0], r1
+ vdup.u8 q0, r8
+ ldrb r10, [r0], r1
+ vst1.8 {q0}, [r4]! @ 16 bytes store
+ vdup.u8 q1, r9
+ vst1.8 {q0}, [r4], r6
+ vst1.8 {q1}, [r4]! @ 16 bytes store
+ vdup.u8 q2, r10
+ vst1.8 {q1}, [r4], r6 @ 16 bytes store
+ ldrb r11, [r0], r1
+ vst1.8 {q2}, [r4]! @ 16 bytes store
+ vdup.u8 q3, r11
+ vst1.8 {q2}, [r4], r6 @ 16 bytes store
+ ldrb r8, [r0], r1
+ vst1.8 {q3}, [r4]! @ 16 bytes store
+ ldrb r9, [r0], r1
+ vdup.u8 q0, r8
+ vst1.8 {q3}, [r4], r6 @ 16 bytes store
+ ldrb r10, [r0], r1
+ vst1.8 {q0}, [r4]! @ 16 bytes store
+ vdup.u8 q1, r9
+ vst1.8 {q0}, [r4], r6 @ 16 bytes store
+ ldrb r11, [r0], r1
+ vst1.8 {q1}, [r4]! @ 16 bytes store
+ vdup.u8 q2, r10
+ vst1.8 {q1}, [r4], r6 @ 16 bytes store
+ vst1.8 {q2}, [r4]! @ 16 bytes store
+ vdup.u8 q3, r11
+ vst1.8 {q2}, [r4], r6 @ 16 bytes store
+ subs r2, r2, #8
+ vst1.8 {q3}, [r4]! @ 16 bytes store
+ vst1.8 {q3}, [r4], r6 @ 16 bytes store
+ bne loop_32_r
+
+
+
+end_func_r:
+ ldmfd sp!, {r4-r11, pc} @Reload the registers from SP
+
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@;* Padding (chroma block) at the right of a 2d array
+@*
+@* @par Description:
+@* The right column of a 2d array is replicated for pad_size times at the right
+@*
+@*
+@* @param[in] pu1_src
+@;* UWORD8 pointer to the source
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] ht
+@;* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array (each colour component)
+@*
+@* @param[in] pad_size
+@* integer -padding size of the array
+@*
+@* @param[in] ht
+@;* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@#if PAD_RIGHT_CHROMA == C
+@void ih264_pad_right_chroma(UWORD8 *pu1_src,
+@ WORD32 src_strd,
+@ WORD32 ht,
+@ WORD32 pad_size)
+@ r0 => *pu1_src
+@ r1 => src_strd
+@ r2 => ht
+@ r3 => pad_size
+
+
+
+ .global ih264_pad_right_chroma_a9q
+
+ih264_pad_right_chroma_a9q:
+
+ stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments
+
+ mov r4, r0
+ sub r6, r1, #16
+ sub r0, r0, #2
+loop_32_r_c: @ /*hard coded for width=32 ,height =8,4*/
+ ldrh r8, [r0], r1
+ ldrh r9, [r0], r1
+ vdup.u16 q0, r8
+ ldrh r10, [r0], r1
+ vst1.8 {q0}, [r4]! @ 16 bytes store
+ vdup.u16 q1, r9
+ vst1.8 {q0}, [r4], r6
+ vst1.8 {q1}, [r4]! @ 16 bytes store
+ vdup.u16 q2, r10
+ vst1.8 {q1}, [r4], r6 @ 16 bytes store
+ subs r2, r2, #4
+ ldrh r11, [r0], r1
+ vst1.8 {q2}, [r4]! @ 16 bytes store
+ vdup.u16 q3, r11
+ vst1.8 {q2}, [r4], r6 @ 16 bytes store
+ vst1.8 {q3}, [r4]! @ 16 bytes store
+ vst1.8 {q3}, [r4], r6 @ 16 bytes store
+
+ beq end_func_r_c @/* Branching when ht=4*/
+
+ ldrh r8, [r0], r1
+ vdup.u16 q0, r8
+ ldrh r9, [r0], r1
+ ldrh r10, [r0], r1
+ vst1.8 {q0}, [r4]! @ 16 bytes store
+ vdup.u16 q1, r9
+ vst1.8 {q0}, [r4], r6 @ 16 bytes store
+ ldrh r11, [r0], r1
+ vst1.8 {q1}, [r4]! @ 16 bytes store
+ vdup.u16 q2, r10
+ vst1.8 {q1}, [r4], r6 @ 16 bytes store
+ vst1.8 {q2}, [r4]! @ 16 bytes store
+ vdup.u16 q3, r11
+ vst1.8 {q2}, [r4], r6 @ 16 bytes store
+ subs r2, r2, #4
+ vst1.8 {q3}, [r4]! @ 16 bytes store
+ vst1.8 {q3}, [r4], r6 @ 16 bytes store
+
+ beq end_func_r_c @/* Branching when ht=8*/
+ bne loop_32_r_c
+
+ ldrh r8, [r0], r1
+ vdup.u16 q0, r8
+ ldrh r9, [r0], r1
+ ldrh r10, [r0], r1
+ vst1.8 {q0}, [r4]! @ 16 bytes store
+ vdup.u16 q1, r9
+ vst1.8 {q0}, [r4], r6 @ 16 bytes store
+ ldrh r11, [r0], r1
+ vst1.8 {q1}, [r4]! @ 16 bytes store
+ vdup.u16 q2, r10
+ vst1.8 {q1}, [r4], r6 @ 16 bytes store
+ vst1.8 {q2}, [r4]! @ 16 bytes store
+ vdup.u16 q3, r11
+ vst1.8 {q2}, [r4], r6 @ 16 bytes store
+ vst1.8 {q3}, [r4]! @ 16 bytes store
+ vst1.8 {q3}, [r4], r6 @ 16 bytes store
+
+end_func_r_c:
+ ldmfd sp!, {r4-r11, pc} @Reload the registers from SP
+
+
+
+
+
diff --git a/common/arm/ih264_platform_macros.h b/common/arm/ih264_platform_macros.h
new file mode 100755
index 0000000..1f67403
--- /dev/null
+++ b/common/arm/ih264_platform_macros.h
@@ -0,0 +1,152 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_platform_macros.h
+*
+* @brief
+* Platform specific Macro definitions used in the codec
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#ifndef _IHEVC_PLATFORM_MACROS_H_
+#define _IHEVC_PLATFORM_MACROS_H_
+
+#ifndef ARMV8
+void ih264_arm_dsb(void);
+
+#define DATA_SYNC() ih264_arm_dsb()
+static __inline WORD32 CLIP_U8(WORD32 x)
+{
+ asm("usat %0, #8, %1" : "=r"(x) : "r"(x));
+ return x;
+}
+
+static __inline WORD32 CLIP_S8(WORD32 x)
+{
+ asm("ssat %0, #8, %1" : "=r"(x) : "r"(x));
+ return x;
+}
+
+static __inline WORD32 CLIP_U10(WORD32 x)
+{
+ asm("usat %0, #10, %1" : "=r"(x) : "r"(x));
+ return x;
+}
+
+static __inline WORD32 CLIP_S10(WORD32 x)
+{
+ asm("ssat %0, #10, %1" : "=r"(x) : "r"(x));
+ return x;
+}
+
+static __inline WORD32 CLIP_U12(WORD32 x)
+{
+ asm("usat %0, #12, %1" : "=r"(x) : "r"(x));
+ return x;
+}
+
+static __inline WORD32 CLIP_S12(WORD32 x)
+{
+ asm("ssat %0, #12, %1" : "=r"(x) : "r"(x));
+ return x;
+}
+
+static __inline WORD32 CLIP_U16(WORD32 x)
+{
+ asm("usat %0, #16, %1" : "=r"(x) : "r"(x));
+ return x;
+}
+static __inline WORD32 CLIP_S16(WORD32 x)
+{
+ asm("ssat %0, #16, %1" : "=r"(x) : "r"(x));
+ return x;
+}
+
+
+static __inline UWORD32 ITT_BIG_ENDIAN(UWORD32 x)
+{
+ asm("rev %0, %1" : "=r"(x) : "r"(x));
+ return x;
+}
+#else
+#define DATA_SYNC() ;
+
+#define CLIP_U8(x) CLIP3(0, 255, (x))
+#define CLIP_S8(x) CLIP3(-128, 127, (x))
+
+#define CLIP_U10(x) CLIP3(0, 1023, (x))
+#define CLIP_S10(x) CLIP3(-512, 511, (x))
+
+#define CLIP_U12(x) CLIP3(0, 4095, (x))
+#define CLIP_S12(x) CLIP3(-2048, 2047, (x))
+
+#define CLIP_U16(x) CLIP3(0, 65535, (x))
+#define CLIP_S16(x) CLIP3(-32768, 32767, (x))
+
+#define ITT_BIG_ENDIAN(x) ((x & 0x000000ff) << 24) | \
+ ((x & 0x0000ff00) << 8) | \
+ ((x & 0x00ff0000) >> 8) | \
+ ((UWORD32)x >> 24);
+#endif
+
+#define SHL(x,y) (((y) < 32) ? ((x) << (y)) : 0)
+#define SHR(x,y) (((y) < 32) ? ((x) >> (y)) : 0)
+
+#define SHR_NEG(val,shift) ((shift>0)?(val>>shift):(val<<(-shift)))
+#define SHL_NEG(val,shift) ((shift<0)?(val>>(-shift)):(val<<shift))
+
+#define INLINE inline
+
+static INLINE UWORD32 CLZ(UWORD32 u4_word)
+{
+ if(u4_word)
+ return (__builtin_clz(u4_word));
+ else
+ return 32;
+}
+static INLINE UWORD32 CTZ(UWORD32 u4_word)
+{
+ if(0 == u4_word)
+ return 31;
+ else
+ {
+ unsigned int index;
+ index = __builtin_ctz(u4_word);
+ return (UWORD32)index;
+ }
+}
+
+
+#define NOP(nop_cnt) {UWORD32 nop_i; for (nop_i = 0; nop_i < nop_cnt; nop_i++);}
+
+
+#define MEM_ALIGN8 __attribute__ ((aligned (8)))
+#define MEM_ALIGN16 __attribute__ ((aligned (16)))
+#define MEM_ALIGN32 __attribute__ ((aligned (32)))
+
+#endif /* _IHEVC_PLATFORM_MACROS_H_ */
diff --git a/common/arm/ih264_resi_trans_a9.s b/common/arm/ih264_resi_trans_a9.s
new file mode 100755
index 0000000..08821f5
--- /dev/null
+++ b/common/arm/ih264_resi_trans_a9.s
@@ -0,0 +1,604 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@*******************************************************************************
+@* @file
+@* ih264_resi_trans_a9.s
+@*
+@* @brief
+@* Contains function definitions for residual and forward trans
+@*
+@* @author
+@* Ittiam
+@*
+@* @par List of Functions:
+@* ih264_resi_trans_4x4_a9
+@* ih264_resi_trans_8x8_a9
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+
+
+.text
+.p2align 2
+@*****************************************************************************
+@*
+@* Function Name : ih264_resi_trans_4x4_a9
+@* Description : This function does cf4 of H264 followed by and approximate scaling
+@*
+@* Arguments :
+@ R0 :pointer to src buffer
+@ R1 :pointer to pred buffer
+@ R2 :pointer to dst buffer
+@ R3 :src_stride
+@ STACk :pred_stride,dst_stride
+
+@* Values Returned : NONE
+@*
+@* Register Usage :
+@* Stack Usage :
+@* Cycles : Around
+@* Interruptiaility : Interruptable
+@*
+@* Known Limitations
+@* \Assumptions :
+@*
+@* Revision History :
+@* DD MM YYYY Author(s) Changes
+@* 30 12 2009 100633 First version
+@*
+@*****************************************************************************
+
+
+ .global ih264_resi_trans_4x4_a9
+ .extern g_scal_coff_h264_4x4
+g_scal_coff_h264_4x4_addr:
+ .long g_scal_coff_h264_4x4 - 4x4lbl - 8
+
+ih264_resi_trans_4x4_a9:
+
+ @R0 :pointer to src buffer
+ @R1 :pointer to pred buffer
+ @R2 :pointer to dst buffer
+ @R3 :src_stride
+ @STACk :pred_stride,dst_stride
+
+ push {r4-r12, lr} @push all the variables first
+
+ mov r6, sp
+ add r6, r6, #40 @decrement stack pointer,to accomodate two variables
+ ldmfd r6, {r4-r5} @load the strides into registers
+ @R4 pred_stride
+ @R5 dst_stride
+
+
+ @we have to give the stride as post inrement in VLDR1
+ @but since thr stride is from end of row 1 to start of row 2,
+ @we need to add the size of the curent row to strides ie we need to add 4 to it (4 bytes)
+ @ADD R3,#4
+ @ADD R4,#4
+ @ADD R5,#4
+ @in case of dst the stride represnts 16 bit ie 2*8bits
+ @hence we need to add #4 to it and thenm multiply by 2
+ @--------------------function loading done------------------------
+
+ @lets find residual
+ @data is like 1a -> d0[1:31] d0[32:64]
+ @ a b c d # # # #
+ vld1.u8 d30, [r0], r3 @load 4 pixels of row1 current buffer
+ vld1.u8 d31, [r1], r4 @load 4 pixels of row1 pred buffer
+ @ data is like 1a -> q4[1:63] q4[64:148]
+ @ d8[1:63] d9[1:63]
+ @ a b c d # # # #
+
+ vld1.u8 d28, [r0], r3 @load row 2 of src to d28[0]
+ vld1.u8 d29, [r1], r4 @load row2 of pred to d29[0]
+
+ vld1.u8 d26, [r0], r3 @load row 3 of src to d26[0]
+ vsubl.u8 q0, d30, d31 @curr - pred for row one
+
+ vld1.u8 d27, [r1], r4 @load row 3of pred t0 d27[0]
+ vsubl.u8 q1, d28, d29 @find row 2 of src -pred to d0
+
+ vld1.u8 d24, [r0], r3 @load row 4 of src to d24[0]
+
+ vld1.u8 d25, [r1], r4 @load row 4 of src tp d25[0]
+ vsubl.u8 q2, d26, d27 @load src-pred row 3 to d[2]
+
+ lsl r5, r5, #2 @ multiply dst stride by since we are storing 32 bit values
+ ldr r6, g_scal_coff_h264_4x4_addr
+4x4lbl:
+ add r6, r6, pc @ load the address of global array
+
+ vsubl.u8 q3, d24, d25 @load row 4 of src - pred to q6
+
+ @after this
+ @D0 -> 1a
+ @D2 -> 2a
+ @D4 -> 3a
+ @D6 -> 4a
+
+ @transpose the matrix so that we can do the horizontal transform first
+ @#1 #2 #3 #4
+ @a b c d ---- D0
+ @e f g h -----D2
+ @i j k l -----D4
+ @m n o p -----D6
+ @transpose the inner 2x2 blocks
+ vtrn.16 d0, d2
+ vld1.s16 {q10}, [r6]! @ load the scaling values 0-7;
+ vtrn.16 d4, d6
+ @a e c g
+ @b f d h
+ @i m k o
+ @j n l p
+ vtrn.32 d0, d4
+ vtrn.32 d2, d6
+ @a e i m #1 -- D0 --- x4
+ @b f j n #2 -- D2 --- x5
+ @c g k o #3 -- D4 ----x6
+ @d h l p #4 -- D6 ----x7
+
+ @we have loaded the residuals into the registers , now we need to add and subtract them
+ @let us do the horiz transform first
+
+ vsub.s16 d5, d2, d4 @x2 = x5-x6
+ vsub.s16 d7, d0, d6 @x3 = x4-x7;
+
+ vadd.s16 d3, d2, d4 @x1 = x5+x6
+ vadd.s16 d1, d0, d6 @x0 = x4+x7
+
+
+ vshl.s16 d31, d7, #1 @
+ vshl.s16 d30, d5, #1 @
+
+ vadd.s16 d0, d1, d3 @x0 + x1;
+ vsub.s16 d4, d1, d3 @x0 - x1;
+
+ vadd.s16 d2, d31, d5 @U_SHIFT(x3,1,shft) + x2;
+ vsub.s16 d6, d7, d30 @x3 - U_SHIFT(x2,1,shft);
+
+ @taking transform again so as to make do vert transform
+ vtrn.16 d0, d2
+ vtrn.16 d4, d6
+
+ vtrn.32 d0, d4
+ vtrn.32 d2, d6
+
+ @let us do vertical transform
+ @same code as horiz
+
+ vadd.s16 d1, d0, d6 @x0 = x4+x7
+ vadd.s16 d3, d2, d4 @x1 = x5+x6
+ vsub.s16 d7, d0, d6 @x3 = x4-x7;
+ vsub.s16 d5, d2, d4 @x2 = x5-x6
+
+
+@Since we are going to do scal / quant or whatever, we are going to divide by
+@a 32 bit number. So we have to expand the values
+
+ @VADDL.S16 Q12,D1,D3;x0 + x1
+ @VSUBL.S16 Q14,D1,D3;x0 - x1
+
+ @VSHL.S16 D8,D5,#1;
+ @VSHL.S16 D9,D7,#1;
+
+ @VADDL.S16 Q13,D9,D5 ; + x2
+ @VSUBL.S16 Q15,D7,D8 ;x3 - U_SHIFT(x2,1,shft)
+
+@scaling follows
+
+@now we need to do the scaling,so load the scaling matrix
+@mutliplying by the scaling coeffient; store the results from q5-q8 ;
+
+ vadd.s16 d24, d3, d1 @x4 = x0 + x1
+ vsub.s16 d28, d1, d3 @x6 = x0 - x1
+
+ vshl.s16 d0, d7, #1 @ U_SHIFT(x3,1,shft)
+ vmull.s16 q4, d24, d20 @x4*s0
+
+ vshl.s16 d2, d5, #1 @ U_SHIFT(x2,1,shft)
+
+ vadd.s16 d26, d0, d5 @x5 = U_SHIFT(x3,1,shft) + x2
+ vmull.s16 q5, d26, d21 @x5*s1
+
+ vst1.s32 {q4}, [r2], r5 @save 4 pixels of row1 current buffer and increment pointer by stride
+
+ vld1.s16 {q10}, [r6] @load 8-16 scaling coeffcients
+
+ vsub.s16 d30, d7, d2 @x7 = x3 - U_SHIFT(x2,1,shft)
+
+ vmull.s16 q6, d28, d20 @x6*s2
+ vst1.s32 {q5}, [r2], r5
+
+ vmull.s16 q7, d30, d21 @x7*s3
+
+
+ vst1.s32 {q6}, [r2], r5
+ vst1.s32 {q7}, [r2]
+
+ pop {r4-r12, pc} @pop back all variables
+
+
+
+
+@*****************************************************************************
+@* Function Name : ih264_resi_trans_8x8_a9
+@* Description : This function does cf8 followd by an approximate normalization of H264
+@*
+@* Arguments :
+@* R0 :pointer to src buffer
+@ R1 :pointer to pred buffer
+@ R2 :pointer to dst buffer
+@ R3 :src_stride
+@ STACk :pred_stride,dst_st
+@*
+@*
+@* Values Returned : NONE
+@*
+@* Register Usage :
+@* Stack Usage :
+@* Cycles : Around
+@* Interruptiaility : Interruptable
+@*
+@* Known Limitations
+@* \Assumptions :
+@*
+@* Revision History :
+@* DD MM YYYY Author(s) Changes
+@* 30 12 2009 100633 First version
+@*
+@*****************************************************************************
+
+
+ .global ih264_resi_trans_8x8_a9
+ .extern g_scal_coff_h264_8x8
+g_scal_coff_h264_8x8_addr:
+ .long g_scal_coff_h264_8x8 - 8x8lbl - 8
+
+
+ih264_resi_trans_8x8_a9:
+
+ @R0 :pointer to src buffer
+ @R1 :pointer to pred buffer
+ @R2 :pointer to dst buffer
+ @R3 :src_stride
+ @STACk :pred_stride,dst_stride
+
+ push {r4-r12, lr} @push all the variables first
+
+ mov r6, sp
+ add r6, r6, #40 @decrement stack pointer,to accomodate two variables
+ ldmfd r6, {r4-r5} @load the strides into registers
+ @R4 pred_stride
+ @R5 dst_stride
+
+ @we have to give the stride as post inrement in vst1
+ @in case of dst the stride represnts 16 bit ie 2*8bits
+ @hence we need to add #4 to it and thenm multiply by 2
+ @--------------------function loading done------------------------
+
+ @lets find residual
+ @data is like 1a -> d0[1:31] d0[32:64]
+ @ a b c d # # # #
+ vld1.u8 d30, [r0], r3 @load 4 pixels of row1 current buffer
+ vld1.u8 d31, [r1], r4 @load 4 pixels of row1 pred buffer
+
+ vld1.u8 d28, [r0], r3 @src rw2
+ vld1.u8 d29, [r1], r4 @pred rw2
+ vsubl.u8 q0, d30, d31 @src-pred rw1
+
+ vld1.u8 d26, [r0], r3
+ vld1.u8 d27, [r1], r4
+ vsubl.u8 q1, d28, d29
+
+ vld1.u8 d24, [r0], r3
+ vld1.u8 d25, [r1], r4
+ vsubl.u8 q2, d26, d27
+
+ vld1.u8 d22, [r0], r3
+ vld1.u8 d23, [r1], r4
+ vsubl.u8 q3, d24, d25
+
+ vld1.u8 d20, [r0], r3
+ vld1.u8 d21, [r1], r4
+ vsubl.u8 q4, d22, d23
+
+ vld1.u8 d18, [r0], r3
+ vld1.u8 d19, [r1], r4
+ vsubl.u8 q5, d20, d21
+
+ vld1.u8 d16, [r0], r3
+ vld1.u8 d17, [r1], r4
+ vsubl.u8 q6, d18, d19
+
+ lsl r5, r5, #2
+
+
+ vsubl.u8 q7, d16, d17
+
+ @after this
+ @Q0 -> 1a
+ @Q1 -> 2a
+ @Q2 -> 3a
+ @Q3 -> 4a
+ @Q4 -> 5a
+ @Q5 -> 6a
+ @Q6 -> 7a
+ @Q7 -> 8a
+
+ @transpose the matrix so that we can do the horizontal transform first
+
+ @transpose the inner 2x2 blocks
+ vtrn.16 q0, q1
+ vtrn.16 q2, q3
+ vtrn.16 q4, q5
+ vtrn.16 q6, q7
+
+ @transpose the inner 4x4 blocks
+ vtrn.32 q0, q2
+ vtrn.32 q1, q3
+
+ vtrn.32 q4, q6
+ vtrn.32 q5, q7
+
+ @transpose the outer 8x8 blocks
+ vswp d1, d8
+ vswp d7, d14
+ vswp d3, d10
+ vswp d5, d12
+ @transpose done
+
+@@this point we will have data in Q0-Q7
+@Q7 will be populated within 2 clock cycle
+@all others are availabe @ this clock cycle
+
+ @we have loaded the residuals into the registers , now we need to add and subtract them
+ @let us do the horiz transform first
+
+ vadd.s16 q8, q0, q7 @ a0 = r0 + r7;
+ vadd.s16 q9, q1, q6 @ a1 = r1 + r6;
+ vadd.s16 q10, q2, q5 @ a2 = r2 + r5;
+ vadd.s16 q11, q3, q4 @ a3 = r3 + r4;
+
+ vsub.s16 q12, q0, q7 @ b0 = r0 - r7;
+ vsub.s16 q13, q1, q6 @ b1 = r1 - r6;
+ vsub.s16 q15, q3, q4 @ b3 = r3 - r4;
+ vsub.s16 q14, q2, q5 @ b2 = r2 - r5;
+
+ vadd.s16 q1, q8, q11 @ a4 = a0 + a3;
+ vadd.s16 q3, q9, q10 @ a5 = a1 + a2;
+ vsub.s16 q7, q9, q10 @ a7 = a1 - a2;
+ vsub.s16 q5, q8, q11 @ a6 = a0 - a3;
+
+ ldr r6, g_scal_coff_h264_8x8_addr
+8x8lbl:
+ add r6, r6, pc @ load the address of global array
+
+ vadd.s16 q0, q1, q3 @ pi2_res[0] = a4 + a5;
+ vshr.s16 q8, q7, #1 @ pi2_res[2] = a6 + D_SHIFT(a7,1,shft);
+
+ vsub.s16 q4, q1, q3 @ pi2_res[4] = a4 - a5;
+
+ vadd.s16 q2, q5, q8 @
+
+
+ vshr.s16 q9, q5, #1 @ pi2_res[6] = D_SHIFT(a6,1,shft) - a7;
+ vsub.s16 q6, q9, q7 @
+
+@do not change Q0,Q2.Q4,Q6 they contain results
+@Q1,Q3,Q5,Q7 TO STORE RESULTS
+@Q8 Q9 Q10 Q11 USE @WILL
+
+ vshr.s16 q1, q12, #1 @ D_SHIFT(b0,1,shft)
+ vshr.s16 q3, q13, #1 @ D_SHIFT(b1,1,shft)
+ vshr.s16 q5, q14, #1 @ D_SHIFT(b2,1,shft)
+ vshr.s16 q7, q15, #1 @ D_SHIFT(b3,1,shft)
+
+ vadd.s16 q8, q1, q12 @ (D_SHIFT(b0,1,shft) + b0);
+ vadd.s16 q9, q3, q13 @ (D_SHIFT(b1,1,shft) + b1);
+ vadd.s16 q10, q5, q14 @ (D_SHIFT(b2,1,shft) + b2);
+ vadd.s16 q11, q7, q15 @ (D_SHIFT(b3,1,shft) + b3);
+
+ vadd.s16 q1, q14, q8 @ b2 + (D_SHIFT(b0,1,shft) + b0);
+ vsub.s16 q5, q15, q9 @ b3 - (D_SHIFT(b1,1,shft) + b1);
+ vadd.s16 q3, q15, q10 @ b3 + (D_SHIFT(b2,1,shft) + b2);
+ vsub.s16 q7, q11, q14 @ -b2 + (D_SHIFT(b3,1,shft) + b3);
+
+ vadd.s16 q8, q13, q1 @ b4 = b1 + b2 + (D_SHIFT(b0,1,shft) + b0);
+ vsub.s16 q9, q12, q3 @ b5 = b0 - b3 - (D_SHIFT(b2,1,shft) + b2);
+ vadd.s16 q10, q12, q5 @ b6 = b0 + b3 - (D_SHIFT(b1,1,shft) + b1);
+ vadd.s16 q11, q13, q7 @ b7 = b1 - b2 + (D_SHIFT(b3,1,shft) + b3);
+
+ vshr.s16 q15, q8, #2 @ D_SHIFT(b4,2,shft)
+ vshr.s16 q14, q9, #2 @ D_SHIFT(b5,2,shft);
+ vshr.s16 q13, q10, #2 @ D_SHIFT(b6,2,shft);
+ vshr.s16 q12, q11, #2 @ D_SHIFT(b7,2,shft);
+
+
+ vadd.s16 q3, q9, q13 @ pi2_res[3] = b5 + D_SHIFT(b6,2,shft);
+ vsub.s16 q5, q10, q14 @ pi2_res[5] = b6 - D_SHIFT(b5,2,shft);
+ vadd.s16 q1, q8, q12 @ pi2_res[1] = b4 + D_SHIFT(b7,2,shft);
+ vsub.s16 q7, q15, q11 @ pi2_res[7] = D_SHIFT(b4,2,shft) - b7;
+
+ @------------horiz transform done-------------------------
+ @results are in Q0-Q7
+ @all other neon registes can be used at will
+
+@doing vertical transform
+@code exact copy of horiz transform above
+
+ @transpose the inner 2x2 blocks
+ vtrn.16 q0, q1
+ vtrn.16 q2, q3
+ vtrn.16 q4, q5
+ vtrn.16 q6, q7
+
+ @transpose the inner 4x4 blocks
+ vtrn.32 q0, q2
+ vtrn.32 q1, q3
+
+ vtrn.32 q4, q6
+ vtrn.32 q5, q7
+
+ @transpose the outer 8x8 blocks
+ vswp d1, d8
+ vswp d3, d10
+ vswp d5, d12
+ vswp d7, d14
+
+ @transpose done
+
+ vadd.s16 q8, q0, q7 @ a0 = r0 + r7;
+ vadd.s16 q9, q1, q6 @ a1 = r1 + r6;
+ vadd.s16 q10, q2, q5 @ a2 = r2 + r5;
+ vadd.s16 q11, q3, q4 @ a3 = r3 + r4;
+
+ vsub.s16 q12, q0, q7 @ b0 = r0 - r7;
+ vsub.s16 q13, q1, q6 @ b1 = r1 - r6;
+ vsub.s16 q14, q2, q5 @ b2 = r2 - r5;
+ vsub.s16 q15, q3, q4 @ b3 = r3 - r4;
+
+ vadd.s16 q1, q8, q11 @ a4 = a0 + a3;
+ vadd.s16 q3, q9, q10 @ a5 = a1 + a2;
+ vsub.s16 q5, q8, q11 @ a6 = a0 - a3;
+ vsub.s16 q7, q9, q10 @ a7 = a1 - a2;
+
+
+ vadd.s16 q0, q1, q3 @ pi2_res[0] = a4 + a5;
+
+ vshr.s16 q8, q7, #1 @ pi2_res[2] = a6 + D_SHIFT(a7,1,shft);
+ @DSHIFT_TO_0 Q8,Q7,#1,#0
+ vadd.s16 q2, q5, q8 @
+
+ vsub.s16 q4, q1, q3 @ pi2_res[4] = a4 - a5;
+
+ vshr.s16 q9, q5, #1 @ pi2_res[6] = D_SHIFT(a6,1,shft) - a7;
+ vsub.s16 q6, q9, q7 @
+
+@do not change Q0,Q2.Q4,Q6 they contain results
+@Q1,Q3,Q5,Q7 TO STORE RESULTS
+@Q8 Q9 Q10 Q11 USE @WILL
+
+ vshr.s16 q1, q12, #1 @ D_SHIFT(b0,1,shft)
+ vshr.s16 q3, q13, #1 @ D_SHIFT(b1,1,shft)
+ vshr.s16 q5, q14, #1 @ D_SHIFT(b2,1,shft)
+ vshr.s16 q7, q15, #1 @ D_SHIFT(b3,1,shft)
+
+
+ vadd.s16 q8, q1, q12 @ (D_SHIFT(b0,1,shft) + b0);
+ vadd.s16 q9, q3, q13 @ (D_SHIFT(b1,1,shft) + b1);
+ vadd.s16 q10, q5, q14 @ (D_SHIFT(b2,1,shft) + b2);
+ vadd.s16 q11, q7, q15 @ (D_SHIFT(b3,1,shft) + b3);
+
+ vadd.s16 q1, q14, q8 @ b2 + (D_SHIFT(b0,1,shft) + b0);
+ vadd.s16 q3, q15, q10 @ b3 + (D_SHIFT(b2,1,shft) + b2);
+ vsub.s16 q5, q15, q9 @ b3 - (D_SHIFT(b1,1,shft) + b1);
+ vsub.s16 q7, q11, q14 @ -b2 + (D_SHIFT(b3,1,shft) + b3);
+
+ vadd.s16 q8, q13, q1 @ b4 = b1 + b2 + (D_SHIFT(b0,1,shft) + b0);
+ vsub.s16 q9, q12, q3 @ b5 = b0 - b3 - (D_SHIFT(b2,1,shft) + b2);
+ vadd.s16 q10, q12, q5 @ b6 = b0 + b3 - (D_SHIFT(b1,1,shft) + b1);
+ vadd.s16 q11, q13, q7 @ b7 = b1 - b2 + (D_SHIFT(b3,1,shft) + b3);
+
+ vshr.s16 q15, q8, #2 @ D_SHIFT(b4,2,shft)
+ vshr.s16 q14, q9, #2 @ D_SHIFT(b5,2,shft);
+ vshr.s16 q13, q10, #2 @ D_SHIFT(b6,2,shft);
+ vshr.s16 q12, q11, #2 @ D_SHIFT(b7,2,shft);
+
+
+@since we are going to scal by small values, we need not expand the guys to 32 bit bit values
+ vsub.s16 q5, q10, q14 @ pi2_res[5] = b6 - D_SHIFT(b5,2,shft);
+ vsub.s16 q7, q15, q11 @ pi2_res[7] = D_SHIFT(b4,2,shft) - b7;
+ vadd.s16 q3, q9, q13 @ pi2_res[3] = b5 + D_SHIFT(b6,2,shft);
+ vadd.s16 q1, q8, q12 @ pi2_res[1] = b4 + D_SHIFT(b7,2,shft);
+
+ @------------vert transform done-------------------------
+ @results are in Q0-Q7
+ @all other neon registes can be used at will
+
+ @scaling
+ @since the 8x8 scaling matrix repeats in 1x4,1x4 block ,
+ @we need only load 4 values for each row and in total 4 rows
+ vld1.s16 {q14-q15}, [r6] @
+
+ @since we need to get a 32 bit o/p for two 16 bit multiplications
+ @we need a VMULL instruction
+@-----------------------------first and second row
+
+ vmull.s16 q8, d0, d28 @scale the first row first 4 elem
+ vmull.s16 q9, d28, d1 @scale the second row last 4 elemts
+
+ vmull.s16 q10, d2, d29 @ scale second row first 4 elem
+ vmull.s16 q11, d29, d3 @scale the second row last 4 elem
+ vmull.s16 q12, d4, d30 @scale third row first 4 elem
+
+ vst1.s32 {q8, q9}, [r2], r5 @ write the first row complete
+
+ vmull.s16 q13, d30, d5 @scale the third row last 4 elem
+ vmull.s16 q8, d6, d31 @scale the fourth row first 4 elem
+
+
+ vst1.s32 {q10, q11}, [r2], r5 @store the second row complete
+
+@------------------------------- 3rd and 4th row
+
+ vmull.s16 q9, d31, d7 @scale the fourth row second column
+
+ vst1.s32 {q12, q13}, [r2], r5 @store the third row complete
+
+ vmull.s16 q10, d8, d28 @scale the 5th row fisrst 4 elms
+ vmull.s16 q11, d28, d9 @scale the 5th row second 4 elems
+
+ vmull.s16 q12, d10, d29 @scale the 6th row first4 elements
+
+
+ vst1.s32 {q8, q9}, [r2], r5 @store fifth row
+
+@--------------------------------5th and 6th row
+
+ vmull.s16 q13, d29, d11 @scale 6th row sendond 4 elems
+
+ vmull.s16 q8, d12, d30 @scale 7th rw first 4 elms
+
+ vst1.s32 {q10, q11}, [r2], r5 @store 6th row second 4 elements
+
+ vmull.s16 q9, d30, d13 @scale 7th rw second 4 elms
+ vmull.s16 q10, d14, d31 @scale 8th rw forst 4 elms
+
+
+ vst1.s32 {q12, q13}, [r2], r5 @store 6th row
+
+@----------------------------------7th and 8th row
+ vmull.s16 q11, d31, d15 @scale 8th row second 4 elms
+
+ vst1.s32 {q8, q9}, [r2], r5 @store 7th row
+ vst1.s32 {q10, q11}, [r2], r5 @store 8th row
+
+@----------------------------------done writing
+
+ pop {r4-r12, pc} @pop back all variables
+
+
+
+
+
+
diff --git a/common/arm/ih264_resi_trans_quant_a9.s b/common/arm/ih264_resi_trans_quant_a9.s
new file mode 100755
index 0000000..caf362e
--- /dev/null
+++ b/common/arm/ih264_resi_trans_quant_a9.s
@@ -0,0 +1,694 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@*******************************************************************************
+@* @file
+@* ih264_resi_trans_quant_a9.s
+@*
+@* @brief
+@* Contains function definitions for residual and forward trans
+@*
+@* @author
+@* Ittiam
+@*
+@* @par List of Functions:
+@* ih264_resi_trans_quant_4x4_a9
+@* ih264_resi_trans_quant_8x8_a9
+@* ih264_resi_trans_quant_chroma_4x4_a9
+@* ih264_hadamard_quant_4x4_a9
+@* ih264_hadamard_quant_2x2_uv_a9
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+
+
+.text
+.p2align 2
+@*****************************************************************************
+@*
+@* Function Name : ih264_resi_trans_quant_4x4_a9
+@* Description : This function does cf4 of H264
+@*
+@* Arguments : R0 :pointer to src buffer
+@ R1 :pointer to pred buffer
+@ R2 :pointer to dst buffer
+@ R3 :source stride
+@ STACK : pred stride,
+@ dst stride,
+@ pointer to scaling matrix,
+@ pointer to threshold matrix,
+@ qbits,
+@ rounding factor,
+@ pointer to store nnz
+@ pointer to store non quantized dc value
+@ Values Returned : NONE
+@
+@ Register Usage :
+@ Stack Usage : 40 bytes
+@ Cycles : Around
+@ Interruptiaility : Interruptable
+@
+@ Known Limitations
+@ \Assumptions :
+@
+@ Revision History :
+@ DD MM YYYY Author(s) Changes
+@ 1 12 2013 100633 First version
+@ 20 1 2014 100633 Changes the API, Optimization
+@
+@*****************************************************************************
+
+ .global ih264_resi_trans_quant_4x4_a9
+ih264_resi_trans_quant_4x4_a9:
+
+ @R0 :pointer to src buffer
+ @R1 :pointer to pred buffer
+ @R2 :pointer to dst buffer
+ @R3 :Source stride
+ @STACk :pred stride
+ @ :scale matirx,
+ @ :threshold matrix
+ @ :qbits
+ @ :round factor
+ @ :nnz
+
+ push {r4-r12, lr} @push all the variables first
+
+ add r11, sp, #40 @decrement stack pointer,to accomodate two variables
+ ldmfd r11, {r4-r10} @load the strides into registers
+
+ @R0 :pointer to src buffer
+ @R1 :pointer to pred buffer
+ @R2 :pointer to dst buffer
+ @R3 :Source stride
+ @R4 :Pred stride
+ @R5 :scale matirx,
+ @R6 :threshold matrix
+ @R7 :qbits
+ @R8 :round factor
+ @R9 :nnz
+
+ vpush {d8-d15}
+
+ mov r11, #0
+ sub r7, r11, r7 @Negate the qbit value for usiing LSL
+
+ @------------Fucntion Loading done----------------;
+
+ vld1.u8 d30, [r0], r3 @load first 8 pix src row 1
+
+ vld1.u8 d31, [r1], r4 @load first 8 pix pred row 1
+
+ vld1.u8 d28, [r0], r3 @load first 8 pix src row 2
+
+ vld1.u8 d29, [r1], r4 @load first 8 pix pred row 2
+
+ vld1.u8 d26, [r0], r3 @load first 8 pix src row 3
+
+ vld1.u8 d27, [r1], r4 @load first 8 pix pred row 3
+ vsubl.u8 q0, d30, d31 @find residue row 1
+
+ vld1.u8 d24, [r0], r3 @load first 8 pix src row 4
+
+ vld1.u8 d25, [r1], r4 @load first 8 pix pred row 4
+ vsubl.u8 q1, d28, d29 @find residue row 2
+
+ vsubl.u8 q2, d26, d27 @find residue row 3
+ vsubl.u8 q3, d24, d25 @find residue row 4
+
+ vtrn.16 d0, d2 @T12
+ vtrn.16 d4, d6 @T23
+ vtrn.32 d0, d4 @T13
+ vtrn.32 d2, d6 @T14
+
+ vadd.s16 d8 , d0, d6 @x0 = x4+x7
+ vadd.s16 d9 , d2, d4 @x1 = x5+x6
+ vsub.s16 d10, d2, d4 @x2 = x5-x6
+ vsub.s16 d11, d0, d6 @x3 = x4-x7
+
+ vshl.s16 d12, d10, #1 @U_SHIFT(x2,1,shft)
+ vshl.s16 d13, d11, #1 @U_SHIFT(x3,1,shft)
+
+ vadd.s16 d14, d8, d9 @x4 = x0 + x1;
+ vsub.s16 d16, d8, d9 @x6 = x0 - x1;
+ vadd.s16 d15, d13, d10 @x5 = U_SHIFT(x3,1,shft) + x2;
+ vsub.s16 d17, d11, d12 @x7 = x3 - U_SHIFT(x2,1,shft);
+
+ @taking transpose again so as to make do vert transform
+ vtrn.16 d14, d15 @T12
+ vtrn.16 d16, d17 @T23
+ vtrn.32 d14, d16 @T13
+ vtrn.32 d15, d17 @T24
+
+ @let us do vertical transform
+ @same code as horiz
+ vadd.s16 d18, d14, d17 @x0 = x4+x7
+ vadd.s16 d19, d15, d16 @x1 = x5+x6
+ vsub.s16 d20, d15, d16 @x2 = x5-x6
+ vsub.s16 d21, d14, d17 @x3 = x4-x7
+
+ vshl.s16 d22, d20, #1 @U_SHIFT(x2,1,shft)
+ vshl.s16 d23, d21, #1 @U_SHIFT(x3,1,shft)
+
+ vdup.s32 q4, r8 @Load rounding value row 1
+
+ vadd.s16 d24, d18, d19 @x5 = x0 + x1;
+ vsub.s16 d26, d18, d19 @x7 = x0 - x1;
+ vadd.s16 d25, d23, d20 @x6 = U_SHIFT(x3,1,shft) + x2;
+ vsub.s16 d27, d21, d22 @x8 = x3 - U_SHIFT(x2,1,shft);
+ vdup.s32 q10, r7 @Load qbit values
+
+ vst1.s16 d24[0], [r10] @Store the dc value to alternate dc sddress
+
+@core tranform is done for 4x8 block 1
+ vld1.s16 {q14-q15}, [r5] @load the scaling values
+
+ vabs.s16 q0, q12 @Abs val of row 1 blk 1
+
+ vabs.s16 q1, q13 @Abs val of row 2 blk 1
+
+ vmov.s32 q5, q4 @copy round fact for row 2
+
+ vmov.s32 q6, q4 @copy round fact for row 2
+ vclt.s16 q2, q12, #0 @Get the sign of row 1 blk 1
+
+ vmov.s32 q7, q4 @copy round fact for row 2
+ vclt.s16 q3, q13, #0 @Get the sign of row 2 blk 1
+
+ vmlal.s16 q4, d0, d28 @Multiply and add row 1
+ vmlal.s16 q5, d1, d29 @Multiply and add row 2
+ vmlal.s16 q6, d2, d30 @Multiply and add row 3
+ vmlal.s16 q7, d3, d31 @Multiply and add row 4
+
+ vshl.s32 q11, q4, q10 @Shift row 1
+ vshl.s32 q12, q5, q10 @Shift row 2
+ vshl.s32 q13, q6, q10 @Shift row 3
+ vshl.s32 q14, q7, q10 @Shift row 4
+
+ vmovn.s32 d30, q11 @Narrow row 1
+ vmovn.s32 d31, q12 @Narrow row 2
+ vmovn.s32 d0 , q13 @Narrow row 3
+ vmovn.s32 d1 , q14 @Narrow row 4
+
+ vneg.s16 q1, q15 @Get negative
+ vneg.s16 q4, q0 @Get negative
+
+ vceq.s16 q5, q15, #0 @I compare with zero row 1 and 2 blk 1
+ vceq.s16 q6, q0 , #0 @I compare with zero row 1 and 2 blk 1
+
+ vbsl.s16 q2, q1, q15 @Restore sign of row 1 and 2
+ vbsl.s16 q3, q4, q0 @Restore sign of row 3 and 4
+
+
+ vmovn.u16 d14, q5 @I Narrow the comparison for row 1 and 2 blk 1
+ vmovn.u16 d15, q6 @I Narrow the comparison for row 1 and 2 blk 2
+
+ vshr.u8 q8, q7, #7 @I Reduce comaparison bit to a signle bit row 1 and 2 blk 1 and 2 [ keep the value for later use ]
+
+ vpadd.u8 d18, d16, d17 @I pair add nnz 1
+ vpadd.u8 d20, d18, d19 @I Pair add nnz 2
+ vpadd.u8 d22, d20, d21 @I Pair add nnz 3
+ vpadd.u8 d24, d22, d23 @I Pair add nnz4
+ vst1.s16 {q2-q3}, [r2] @Store blk
+
+ vmov.u8 d25, #16 @I Get max nnz
+ vsub.u8 d26, d25, d24 @I invert current nnz
+
+ vst1.u8 d26[0], [r9] @I Write nnz
+
+ vpop {d8-d15}
+ pop {r4-r12, pc}
+
+
+
+@*****************************************************************************
+@*
+@* Function Name : ih264_resi_trans_quant_chroma_4x4_a9
+@* Description : This function does residue calculation, forward transform
+@* and quantization for 4x4 chroma block.
+@*
+@* Arguments : R0 :pointer to src buffer
+@ R1 :pointer to pred buffer
+@ R2 :pointer to dst buffer
+@ R3 :source stride
+@ STACK : pred stride,
+@ dst stride,
+@ pointer to scaling matrix,
+@ pointer to threshold matrix,
+@ qbits,
+@ rounding factor,
+@ pointer to store nnz
+@ pointer to store unquantized dc values
+@ Values Returned : NONE
+@
+@ Register Usage :
+@ Stack Usage : 40 bytes
+@ Cycles : Around
+@ Interruptiaility : Interruptable
+@
+@ Known Limitations
+@ \Assumptions :
+@
+@ Revision History :
+@ DD MM YYYY Author(s) Changes
+@ 11 2 2015 100664 First version
+@
+@*****************************************************************************
+
+ .global ih264_resi_trans_quant_chroma_4x4_a9
+ih264_resi_trans_quant_chroma_4x4_a9:
+
+ @R0 :pointer to src buffer
+ @R1 :pointer to pred buffer
+ @R2 :pointer to dst buffer
+ @R3 :Source stride
+ @STACk :pred stride
+ @ :scale matirx,
+ @ :threshold matrix
+ @ :qbits
+ @ :round factor
+ @ :nnz
+ @ :pu1_dc_alt_addr
+ push {r4-r12, lr} @push all the variables first
+
+ add r11, sp, #40 @decrement stack pointer,to accomodate two variables
+ ldmfd r11, {r4-r10} @load the strides into registers
+
+ @R0 :pointer to src buffer
+ @R1 :pointer to pred buffer
+ @R2 :pointer to dst buffer
+ @R3 :Source stride
+ @R4 :Pred stride
+ @R5 :scale matirx,
+ @R6 :threshold matrix
+ @R7 :qbits
+ @R8 :round factor
+ @R9 :nnz
+ vpush {d8-d15}
+ mov r11, #0
+ sub r7, r11, r7 @Negate the qbit value for usiing LSL
+
+ @------------Fucntion Loading done----------------;
+
+ vld2.u8 {d10, d11}, [r0], r3 @load first 8 pix src row 1
+
+ vld2.u8 {d11, d12}, [r1], r4 @load first 8 pix pred row 1
+
+ vld2.u8 {d28, d29}, [r0], r3 @load first 8 pix src row 2
+
+ vld2.u8 {d29, d30}, [r1], r4 @load first 8 pix pred row 2
+
+ vld2.u8 {d25, d26}, [r0], r3 @load first 8 pix src row 3
+
+ vld2.u8 {d26, d27}, [r1], r4 @load first 8 pix pred row 3
+ vsubl.u8 q0, d10, d11 @find residue row 1
+
+ vld2.u8 {d22, d23}, [r0], r3 @load first 8 pix src row 4
+
+ vld2.u8 {d23, d24}, [r1], r4 @load first 8 pix pred row 4
+ vsubl.u8 q1, d28, d29 @find residue row 2
+
+ vsubl.u8 q2, d25, d26 @find residue row 3
+ vsubl.u8 q3, d22, d23 @find residue row 4
+
+ vtrn.16 d0, d2 @T12
+ vtrn.16 d4, d6 @T23
+ vtrn.32 d0, d4 @T13
+ vtrn.32 d2, d6 @T14
+
+ vadd.s16 d8 , d0, d6 @x0 = x4+x7
+ vadd.s16 d9 , d2, d4 @x1 = x5+x6
+ vsub.s16 d10, d2, d4 @x2 = x5-x6
+ vsub.s16 d11, d0, d6 @x3 = x4-x7
+
+ vshl.s16 d12, d10, #1 @U_SHIFT(x2,1,shft)
+ vshl.s16 d13, d11, #1 @U_SHIFT(x3,1,shft)
+
+ vadd.s16 d14, d8, d9 @x4 = x0 + x1;
+ vsub.s16 d16, d8, d9 @x6 = x0 - x1;
+ vadd.s16 d15, d13, d10 @x5 = U_SHIFT(x3,1,shft) + x2;
+ vsub.s16 d17, d11, d12 @x7 = x3 - U_SHIFT(x2,1,shft);
+
+ @taking transpose again so as to make do vert transform
+ vtrn.16 d14, d15 @T12
+ vtrn.16 d16, d17 @T23
+ vtrn.32 d14, d16 @T13
+ vtrn.32 d15, d17 @T24
+
+ @let us do vertical transform
+ @same code as horiz
+ vadd.s16 d18, d14, d17 @x0 = x4+x7
+ vadd.s16 d19, d15, d16 @x1 = x5+x6
+ vsub.s16 d20, d15, d16 @x2 = x5-x6
+ vsub.s16 d21, d14, d17 @x3 = x4-x7
+
+ vshl.s16 d22, d20, #1 @U_SHIFT(x2,1,shft)
+ vshl.s16 d23, d21, #1 @U_SHIFT(x3,1,shft)
+
+ vdup.s32 q4, r8 @Load rounding value row 1
+
+ vadd.s16 d24, d18, d19 @x5 = x0 + x1;
+ vsub.s16 d26, d18, d19 @x7 = x0 - x1;
+ vadd.s16 d25, d23, d20 @x6 = U_SHIFT(x3,1,shft) + x2;
+ vsub.s16 d27, d21, d22 @x8 = x3 - U_SHIFT(x2,1,shft);
+ vdup.s32 q10, r7 @Load qbit values
+
+ vst1.s16 d24[0], [r10] @Store Unquantized dc value to dc alte address
+
+@core tranform is done for 4x8 block 1
+ vld1.s16 {q14-q15}, [r5] @load the scaling values
+
+ vabs.s16 q0, q12 @Abs val of row 1 blk 1
+
+ vabs.s16 q1, q13 @Abs val of row 2 blk 1
+
+ vmov.s32 q5, q4 @copy round fact for row 2
+
+ vmov.s32 q6, q4 @copy round fact for row 2
+ vclt.s16 q2, q12, #0 @Get the sign of row 1 blk 1
+
+ vmov.s32 q7, q4 @copy round fact for row 2
+ vclt.s16 q3, q13, #0 @Get the sign of row 2 blk 1
+
+ vmlal.s16 q4, d0, d28 @Multiply and add row 1
+ vmlal.s16 q5, d1, d29 @Multiply and add row 2
+ vmlal.s16 q6, d2, d30 @Multiply and add row 3
+ vmlal.s16 q7, d3, d31 @Multiply and add row 4
+
+ vshl.s32 q11, q4, q10 @Shift row 1
+ vshl.s32 q12, q5, q10 @Shift row 2
+ vshl.s32 q13, q6, q10 @Shift row 3
+ vshl.s32 q14, q7, q10 @Shift row 4
+
+ vmovn.s32 d30, q11 @Narrow row 1
+ vmovn.s32 d31, q12 @Narrow row 2
+ vmovn.s32 d0 , q13 @Narrow row 3
+ vmovn.s32 d1 , q14 @Narrow row 4
+
+ vneg.s16 q1, q15 @Get negative
+ vneg.s16 q4, q0 @Get negative
+
+ vceq.s16 q5, q15, #0 @I compare with zero row 1 and 2 blk 1
+ vceq.s16 q6, q0 , #0 @I compare with zero row 1 and 2 blk 1
+
+ vbsl.s16 q2, q1, q15 @Restore sign of row 1 and 2
+ vbsl.s16 q3, q4, q0 @Restore sign of row 3 and 4
+
+ vmovn.u16 d14, q5 @I Narrow the comparison for row 1 and 2 blk 1
+ vmovn.u16 d15, q6 @I Narrow the comparison for row 1 and 2 blk 2
+
+ vshr.u8 q8, q7, #7 @I Reduce comaparison bit to a signle bit row 1 and 2 blk 1 and 2 [ keep the value for later use ]
+
+ vpadd.u8 d18, d16, d17 @I pair add nnz 1
+ vpadd.u8 d20, d18, d19 @I Pair add nnz 2
+ vpadd.u8 d22, d20, d21 @I Pair add nnz 3
+ vpadd.u8 d24, d22, d23 @I Pair add nnz4
+ vst1.s16 {q2-q3}, [r2] @Store blk
+
+ vmov.u8 d25, #16 @I Get max nnz
+ vsub.u8 d26, d25, d24 @I invert current nnz
+
+ vst1.u8 d26[0], [r9] @I Write nnz
+
+ vpop {d8-d15}
+ pop {r4-r12, pc}
+
+
+
+@*****************************************************************************
+@*
+@* Function Name : ih264_hadamard_quant_4x4_a9
+@* Description : This function does forward hadamard transform and
+@* quantization for luma dc block
+@*
+@* Arguments : R0 :pointer to src buffer
+@ R1 :pointer to dst buffer
+@ R2 :pu2_scale_matrix
+@ R2 :pu2_threshold_matrix
+@ STACk : u4_qbits
+@ u4_round_factor
+@ pu1_nnz
+@ Values Returned : NONE
+@
+@ Register Usage :
+@ Stack Usage : 0 bytes
+@ Cycles : Around
+@ Interruptiaility : Interruptable
+@
+@ Known Limitations
+@ \Assumptions :
+@
+@ Revision History :
+@ DD MM YYYY Author(s) Changes
+@ 20 2 2015 100633 First version
+@
+@*****************************************************************************
+@ih264_hadamard_quant_4x4_a9(WORD16 *pi2_src, WORD16 *pi2_dst,
+@ const UWORD16 *pu2_scale_matrix,
+@ const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits,
+@ UWORD32 u4_round_factor,UWORD8 *pu1_nnz
+@ )
+ .global ih264_hadamard_quant_4x4_a9
+ih264_hadamard_quant_4x4_a9:
+
+@Registert usage
+@ r0 : src
+@ r1 : dst
+@ r2 : *pu2_scale_matrix
+@ r3 : *pu2_threshold_matrix
+
+ vld4.s16 {d0, d1, d2, d3}, [r0]! @Load 4x4 block
+ vpush {d8-d15}
+
+ vld1.u16 d30[0], [r2] @load pu2_scale_matrix[0]
+
+ vaddl.s16 q3, d0, d3 @x0 = x4 + x7;
+ vaddl.s16 q4, d1, d2 @x1 = x5 + x6;
+ vsubl.s16 q5, d1, d2 @x2 = x5 - x6;
+ vsubl.s16 q6, d0, d3 @x3 = x4 - x7;
+
+ vdup.u16 d30, d30[0] @pu2_scale_matrix[0]
+
+ vadd.s32 q7, q3, q4 @pi2_dst[0] = x0 + x1;
+ vadd.s32 q8, q6, q5 @pi2_dst[1] = x3 + x2;
+ add r3, sp, #68 @Get address of u4_round_factor
+ vsub.s32 q9, q3, q4 @pi2_dst[2] = x0 - x1;
+ vsub.s32 q10, q6, q5 @pi2_dst[3] = x3 - x2;
+
+ vtrn.s32 q7, q8 @transpose 4x4 block
+ vtrn.s32 q9, q10
+ vld1.s32 d0[0], [r3] @load u4_round_factor
+ vswp d15, d18
+ vswp d17, d20
+
+ add r3, sp, #64 @Get address of u4_qbits
+ vadd.s32 q11, q7, q10 @x0 = x4 + x7;
+ vadd.s32 q12, q8, q9 @x1 = x5 + x6;
+ vld1.s32 d31[0], [r3] @load u4_qbits
+ vsub.s32 q13, q8, q9 @x2 = x5 - x6;
+ vsub.s32 q14, q7, q10 @x3 = x4 - x7;
+
+ vdup.s32 q7, d0[0] @u4_round_factor
+
+ vadd.s32 q0, q11, q12 @(x0 + x1)
+ vadd.s32 q1, q14, q13 @(x3 + x2)
+ vsub.s32 q2, q11, q12 @(x0 - x1)
+ vsub.s32 q3, q14, q13 @(x3 - x2)
+
+ vdup.s32 q11, d31[0] @u4_round_factor
+
+ vshrn.s32 d0, q0, #1 @i4_value = (x0 + x1) >> 1;
+ vshrn.s32 d1, q1, #1 @i4_value = (x3 + x2) >> 1;
+ vshrn.s32 d2, q2, #1 @i4_value = (x0 - x1) >> 1;
+ vshrn.s32 d3, q3, #1 @i4_value = (x3 - x2) >> 1;
+
+ vabs.s16 q5, q0
+ vabs.s16 q6, q1
+
+ vmov.s32 q8, q7 @Get the round fact
+ vmov.s32 q9, q7
+ vmov.s32 q10, q7
+
+ vclt.s16 q3, q0, #0 @get the sign row 1,2
+ vclt.s16 q4, q1, #0
+
+ vneg.s32 q11, q11 @-u4_round_factor
+
+ vmlal.u16 q7, d10, d30
+ vmlal.u16 q8, d11, d30
+ vmlal.u16 q9, d12, d30
+ vmlal.u16 q10, d13, d30
+
+ vshl.u32 q7, q7, q11
+ vshl.u32 q8, q8, q11
+ vshl.u32 q9, q9, q11
+ vshl.u32 q10, q10, q11
+
+ vqmovn.u32 d22, q7
+ vqmovn.u32 d23, q8
+ vqmovn.u32 d24, q9
+ vqmovn.u32 d25, q10
+
+ vneg.s16 q13, q11
+ vneg.s16 q14, q12
+
+ vbsl.s16 q3, q13, q11
+ vbsl.s16 q4, q14, q12
+
+ vceq.s16 q5, q11, #0
+ vceq.s16 q6, q12, #0
+
+ vst1.s16 {q3}, [r1]!
+
+ vshrn.u16 d14, q5, #8
+ vshrn.u16 d15, q6, #8
+
+ ldr r3, [sp, #72] @Load *pu1_nnz
+
+ vshr.u8 q7, q7, #7
+
+ vst1.s16 {q4}, [r1]!
+
+ vadd.u8 d16, d14, d15
+ vmov.u8 d20, #16
+ vpadd.u8 d17, d16, d16
+ vpadd.u8 d18, d17, d17
+ vpadd.u8 d19, d18, d18
+ vsub.u8 d20, d20, d19
+ vst1.u8 d20[0], [r3]
+
+ vpop {d8-d15}
+ bx lr
+
+
+
+
+@*****************************************************************************
+@*
+@* Function Name : ih264_hadamard_quant_2x2_uv_a9
+@* Description : This function does forward hadamard transform and
+@* quantization for dc block of chroma for both planes
+@*
+@* Arguments : R0 :pointer to src buffer
+@ R1 :pointer to dst buffer
+@ R2 :pu2_scale_matrix
+@ R2 :pu2_threshold_matrix
+@ STACk : u4_qbits
+@ u4_round_factor
+@ pu1_nnz
+@ Values Returned : NONE
+@
+@ Register Usage :
+@ Stack Usage : 0 bytes
+@ Cycles : Around
+@ Interruptiaility : Interruptable
+@
+@ Known Limitations
+@ \Assumptions :
+@
+@ Revision History :
+@ DD MM YYYY Author(s) Changes
+@ 20 2 2015 100633 First version
+@
+@*****************************************************************************
+@ ih264_hadamard_quant_2x2_uv_a9(WORD16 *pi2_src, WORD16 *pi2_dst,
+@ const UWORD16 *pu2_scale_matrix,
+@ const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits,
+@ UWORD32 u4_round_factor,UWORD8 *pu1_nnz
+@ )
+
+ .global ih264_hadamard_quant_2x2_uv_a9
+ih264_hadamard_quant_2x2_uv_a9:
+
+ vpush {d8-d15}
+ vld2.s16 {d0-d1}, [r0] @load src
+
+ add r3, sp, #68 @Get address of u4_round_factor
+
+ vaddl.s16 q3, d0, d1 @x0 = x4 + x5;, x2 = x6 + x7;
+ vld1.u16 d30[0], [r2] @load pu2_scale_matrix[0]
+ vsubl.s16 q4, d0, d1 @x1 = x4 - x5; x3 = x6 - x7;
+
+ add r0, sp, #64 @Get affress of u4_qbits
+ vld1.s32 d28[0], [r3] @load u4_round_factor
+ vtrn.s32 q3, q4 @q1 -> x0 x1, q2 -> x2 x3
+
+ vadd.s32 q0, q3, q4 @ (x0 + x2) (x1 + x3) (y0 + y2); (y1 + y3);
+ vld1.s32 d24[0], [r0] @load u4_qbits
+ vsub.s32 q1, q3, q4 @ (x0 - x2) (x1 - x3) (y0 - y2); (y1 - y3);
+
+ vdup.u16 d30, d30[0] @pu2_scale_matrix
+
+ vabs.s32 q2, q0
+ vabs.s32 q3, q1
+
+ vdup.s32 q14, d28[0] @u4_round_factor
+
+ vmovl.u16 q15, d30 @pu2_scale_matrix
+
+ vclt.s32 q4, q0, #0 @get the sign row 1,2
+ vdup.s32 q12, d24[0] @u4_round_factor
+ vclt.s32 q5, q1, #0
+
+ vqmovn.u32 d8, q4
+ vqmovn.s32 d9, q5
+
+ vmov.s32 q13, q14 @Get the round fact
+ vneg.s32 q12, q12 @-u4_round_factor
+
+ vmla.u32 q13, q2, q15
+ vmla.u32 q14, q3, q15
+
+ vshl.u32 q13, q13, q12 @>>qbit
+ vshl.u32 q14, q14, q12 @>>qbit
+
+ vqmovn.u32 d10, q13
+ vqmovn.u32 d11, q14
+
+ vneg.s16 q6, q5
+
+ vbsl.s16 q4, q6, q5 @*sign
+
+ vtrn.s32 d8, d9
+
+ vceq.s16 q7, q4, #0 @Compute nnz
+
+ vshrn.u16 d14, q7, #8 @reduce nnz comparison to 1 bit
+
+ ldr r3, [sp, #72] @Load *pu1_nnz
+ vshr.u8 d14, d14, #7 @reduce nnz comparison to 1 bit
+ vmov.u8 d20, #4 @Since we add zeros, we need to subtract from 4 to get nnz
+ vpadd.u8 d17, d14, d14 @Sum up nnz
+
+ vst1.s16 {q4}, [r1]! @Store the block
+
+ vpadd.u8 d17, d17, d17 @Sum up nnz
+ vsub.u8 d20, d20, d17 @4- numzeros
+ vst1.u16 d20[0], [r3] @store nnz
+
+ vpop {d8-d15}
+ bx lr
+
+
+
+
+
diff --git a/common/arm/ih264_weighted_bi_pred_a9q.s b/common/arm/ih264_weighted_bi_pred_a9q.s
new file mode 100755
index 0000000..ccae779
--- /dev/null
+++ b/common/arm/ih264_weighted_bi_pred_a9q.s
@@ -0,0 +1,642 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@******************************************************************************
+@* @file
+@* ih264_weighted_bi_pred_a9q.s
+@*
+@* @brief
+@* Contains function definitions for weighted biprediction.
+@*
+@* @author
+@* Kaushik Senthoor R
+@*
+@* @par List of Functions:
+@*
+@* - ih264_weighted_bi_pred_luma_a9q()
+@* - ih264_weighted_bi_pred_chroma_a9q()
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@*******************************************************************************
+@* @function
+@* ih264_weighted_bi_pred_luma_a9q()
+@*
+@* @brief
+@* This routine performs the weighted biprediction as described in sec
+@* 8.4.2.3.2 titled "Weighted sample prediction process" for luma.
+@*
+@* @par Description:
+@* This function gets two ht x wd blocks, calculates the weighted samples,
+@* rounds off, adds offset and stores it in the destination block.
+@*
+@* @param[in] pu1_src1
+@* UWORD8 Pointer to the buffer containing the input block 1.
+@*
+@* @param[in] pu1_src2
+@* UWORD8 Pointer to the buffer containing the input block 2.
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination where the output block is stored.
+@*
+@* @param[in] src_strd1
+@* Stride of the input buffer 1
+@*
+@* @param[in] src_strd2
+@* Stride of the input buffer 2
+@*
+@* @param[in] dst_strd
+@* Stride of the destination buffer
+@*
+@* @param[in] log_wd
+@* number of bits to be rounded off
+@*
+@* @param[in] wt1
+@* weight for the weighted prediction
+@*
+@* @param[in] wt2
+@* weight for the weighted prediction
+@*
+@* @param[in] ofst1
+@* offset 1 used after rounding off
+@*
+@* @param[in] ofst2
+@* offset 2 used after rounding off
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16).
+@*
+@*******************************************************************************
+@*/
+@void ih264_weighted_bi_pred_luma_a9q(UWORD8 *pu1_src1,
+@ UWORD8 *pu1_src2,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd1,
+@ WORD32 src_strd2,
+@ WORD32 dst_strd,
+@ WORD32 log_wd,
+@ WORD32 wt1,
+@ WORD32 wt2,
+@ WORD32 ofst1,
+@ WORD32 ofst2,
+@ WORD32 ht,
+@ WORD32 wd)
+@
+@**************Variables Vs Registers*****************************************
+@ r0 => pu1_src1
+@ r1 => pu1_src2
+@ r2 => pu1_dst
+@ r3 => src_strd1
+@ [sp] => src_strd2 (r4)
+@ [sp+4] => dst_strd (r5)
+@ [sp+8] => log_wd (r6)
+@ [sp+12] => wt1 (r7)
+@ [sp+16] => wt2 (r8)
+@ [sp+20] => ofst1 (r9)
+@ [sp+24] => ofst2 (r10)
+@ [sp+28] => ht (r11)
+@ [sp+32] => wd (r12)
+@
+.text
+.p2align 2
+
+ .global ih264_weighted_bi_pred_luma_a9q
+
+ih264_weighted_bi_pred_luma_a9q:
+
+ stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments
+ ldr r6, [sp, #48] @Load log_wd in r6
+ ldr r7, [sp, #52] @Load wt1 in r7
+ ldr r8, [sp, #56] @Load wt2 in r8
+ ldr r9, [sp, #60] @Load ofst1 in r9
+
+ add r6, r6, #1 @r6 = log_wd + 1
+ sxtb r7, r7 @sign-extend 16-bit wt1 to 32-bit
+ ldr r4, [sp, #40] @Load src_strd2 in r4
+ ldr r5, [sp, #44] @Load dst_strd in r5
+ sxtb r9, r9 @sign-extend 8-bit ofst1 to 32-bit
+ rsb r10, r6, #0 @r13 = -(log_wd + 1)
+ ldr r11, [sp, #68] @Load ht in r11
+ ldr r12, [sp, #72] @Load wd in r12
+ vdup.16 q0, r10 @Q0 = -(log_wd + 1) (32-bit)
+ add r9, r9, #1 @r9 = ofst1 + 1
+
+ ldr r10, [sp, #64] @Load ofst2 in r10
+ sxtb r8, r8 @sign-extend 16-bit wt2 to 32-bit
+ cmp r12, #16 @check if wd is 16
+ vpush {d8-d15}
+ sxtb r10, r10 @sign-extend 8-bit ofst2 to 32-bit
+ add r9, r9, r10 @r9 = ofst1 + ofst2 + 1
+ vmov d2, r7, r8 @D2 = {wt1(32-bit), wt2(32-bit)}
+ asr r9, r9, #1 @r9 = ofst = (ofst1 + ofst2 + 1) >> 1
+ vdup.8 d3, r9 @D3 = ofst (8-bit)
+ beq loop_16 @branch if wd is 16
+
+ cmp r12, #8 @check if wd is 8
+ beq loop_8 @branch if wd is 8
+
+loop_4: @each iteration processes four rows
+
+ vld1.32 d4[0], [r0], r3 @load row 1 in source 1
+ vld1.32 d4[1], [r0], r3 @load row 2 in source 1
+ vld1.32 d6[0], [r1], r4 @load row 1 in source 2
+ vld1.32 d6[1], [r1], r4 @load row 2 in source 2
+
+ vmovl.u8 q2, d4 @converting rows 1,2 in source 1 to 16-bit
+ vld1.32 d8[0], [r0], r3 @load row 3 in source 1
+ vld1.32 d8[1], [r0], r3 @load row 4 in source 1
+ vmovl.u8 q3, d6 @converting rows 1,2 in source 2 to 16-bit
+ vld1.32 d10[0], [r1], r4 @load row 3 in source 2
+ vld1.32 d10[1], [r1], r4 @load row 4 in source 2
+
+ vmovl.u8 q4, d8 @converting rows 3,4 in source 1 to 16-bit
+ vmovl.u8 q5, d10 @converting rows 3,4 in source 2 to 16-bit
+
+ vmul.s16 q2, q2, d2[0] @weight 1 mult. for rows 1,2
+ vmla.s16 q2, q3, d2[2] @weight 2 mult. for rows 1,2
+ vmul.s16 q4, q4, d2[0] @weight 1 mult. for rows 3,4
+ vmla.s16 q4, q5, d2[2] @weight 2 mult. for rows 3,4
+
+ subs r11, r11, #4 @decrement ht by 4
+ vrshl.s16 q2, q2, q0 @rounds off the weighted samples from rows 1,2
+ vrshl.s16 q4, q4, q0 @rounds off the weighted samples from rows 3,4
+
+ vaddw.s8 q2, q2, d3 @adding offset for rows 1,2
+ vaddw.s8 q4, q4, d3 @adding offset for rows 3,4
+
+ vqmovun.s16 d4, q2 @saturating rows 1,2 to unsigned 8-bit
+ vqmovun.s16 d8, q4 @saturating rows 3,4 to unsigned 8-bit
+
+ vst1.32 d4[0], [r2], r5 @store row 1 in destination
+ vst1.32 d4[1], [r2], r5 @store row 2 in destination
+ vst1.32 d8[0], [r2], r5 @store row 3 in destination
+ vst1.32 d8[1], [r2], r5 @store row 4 in destination
+
+ bgt loop_4 @if greater than 0 repeat the loop again
+
+ b end_loops
+
+loop_8: @each iteration processes four rows
+
+ vld1.8 d4, [r0], r3 @load row 1 in source 1
+ vld1.8 d6, [r1], r4 @load row 1 in source 2
+ vld1.8 d8, [r0], r3 @load row 2 in source 1
+ vld1.8 d10, [r1], r4 @load row 2 in source 2
+ vmovl.u8 q2, d4 @converting row 1 in source 1 to 16-bit
+ vld1.8 d12, [r0], r3 @load row 3 in source 1
+ vld1.8 d14, [r1], r4 @load row 3 in source 2
+ vmovl.u8 q3, d6 @converting row 1 in source 2 to 16-bit
+ vld1.8 d16, [r0], r3 @load row 4 in source 1
+ vld1.8 d18, [r1], r4 @load row 4 in source 2
+
+ vmovl.u8 q4, d8 @converting row 2 in source 1 to 16-bit
+ vmovl.u8 q5, d10 @converting row 2 in source 2 to 16-bit
+
+ vmul.s16 q2, q2, d2[0] @weight 1 mult. for row 1
+ vmla.s16 q2, q3, d2[2] @weight 2 mult. for row 1
+ vmovl.u8 q6, d12 @converting row 3 in source 1 to 16-bit
+ vmovl.u8 q7, d14 @converting row 3 in source 2 to 16-bit
+ vmul.s16 q4, q4, d2[0] @weight 1 mult. for row 2
+ vmla.s16 q4, q5, d2[2] @weight 2 mult. for row 2
+ vmovl.u8 q8, d16 @converting row 4 in source 1 to 16-bit
+ vmovl.u8 q9, d18 @converting row 4 in source 2 to 16-bit
+
+ vmul.s16 q6, q6, d2[0] @weight 1 mult. for row 3
+ vmla.s16 q6, q7, d2[2] @weight 2 mult. for row 3
+ vmul.s16 q8, q8, d2[0] @weight 1 mult. for row 4
+ vmla.s16 q8, q9, d2[2] @weight 2 mult. for row 4
+
+ vrshl.s16 q2, q2, q0 @rounds off the weighted samples from row 1
+ vrshl.s16 q4, q4, q0 @rounds off the weighted samples from row 2
+ vrshl.s16 q6, q6, q0 @rounds off the weighted samples from row 3
+ vaddw.s8 q2, q2, d3 @adding offset for row 1
+ vrshl.s16 q8, q8, q0 @rounds off the weighted samples from row 4
+ vaddw.s8 q4, q4, d3 @adding offset for row 2
+
+ vaddw.s8 q6, q6, d3 @adding offset for row 3
+ vqmovun.s16 d4, q2 @saturating row 1 to unsigned 8-bit
+ vaddw.s8 q8, q8, d3 @adding offset for row 4
+ vqmovun.s16 d8, q4 @saturating row 2 to unsigned 8-bit
+
+ vqmovun.s16 d12, q6 @saturating row 3 to unsigned 8-bit
+ vqmovun.s16 d16, q8 @saturating row 4 to unsigned 8-bit
+
+ vst1.8 d4, [r2], r5 @store row 1 in destination
+ vst1.8 d8, [r2], r5 @store row 2 in destination
+ subs r11, r11, #4 @decrement ht by 4
+ vst1.8 d12, [r2], r5 @store row 3 in destination
+ vst1.8 d16, [r2], r5 @store row 4 in destination
+
+ bgt loop_8 @if greater than 0 repeat the loop again
+
+ b end_loops
+
+loop_16: @each iteration processes two rows
+
+ vld1.8 {q2}, [r0], r3 @load row 1 in source 1
+ vld1.8 {q3}, [r1], r4 @load row 1 in source 2
+ vld1.8 {q4}, [r0], r3 @load row 2 in source 1
+ vld1.8 {q5}, [r1], r4 @load row 2 in source 2
+ vmovl.u8 q10, d4 @converting row 1L in source 1 to 16-bit
+ vld1.8 {q6}, [r0], r3 @load row 3 in source 1
+ vld1.8 {q7}, [r1], r4 @load row 3 in source 2
+ vmovl.u8 q11, d6 @converting row 1L in source 2 to 16-bit
+ vld1.8 {q8}, [r0], r3 @load row 4 in source 1
+ vld1.8 {q9}, [r1], r4 @load row 4 in source 2
+
+ vmovl.u8 q2, d5 @converting row 1H in source 1 to 16-bit
+ vmovl.u8 q3, d7 @converting row 1H in source 2 to 16-bit
+
+ vmul.s16 q10, q10, d2[0] @weight 1 mult. for row 1L
+ vmla.s16 q10, q11, d2[2] @weight 2 mult. for row 1L
+ vmovl.u8 q12, d8 @converting row 2L in source 1 to 16-bit
+ vmovl.u8 q13, d10 @converting row 2L in source 2 to 16-bit
+
+ vmul.s16 q2, q2, d2[0] @weight 1 mult. for row 1H
+ vmla.s16 q2, q3, d2[2] @weight 2 mult. for row 1H
+ vmovl.u8 q4, d9 @converting row 2H in source 1 to 16-bit
+ vmovl.u8 q5, d11 @converting row 2H in source 2 to 16-bit
+
+ vmul.s16 q12, q12, d2[0] @weight 1 mult. for row 2L
+ vmla.s16 q12, q13, d2[2] @weight 2 mult. for row 2L
+ vmovl.u8 q14, d12 @converting row 3L in source 1 to 16-bit
+ vmovl.u8 q15, d14 @converting row 3L in source 2 to 16-bit
+
+ vmul.s16 q4, q4, d2[0] @weight 1 mult. for row 2H
+ vmla.s16 q4, q5, d2[2] @weight 2 mult. for row 2H
+ vmovl.u8 q6, d13 @converting row 3H in source 1 to 16-bit
+ vmovl.u8 q7, d15 @converting row 3H in source 2 to 16-bit
+
+ vmul.s16 q14, q14, d2[0] @weight 1 mult. for row 3L
+ vmla.s16 q14, q15, d2[2] @weight 2 mult. for row 3L
+ vmovl.u8 q11, d16 @converting row 4L in source 1 to 16-bit
+ vmovl.u8 q3, d18 @converting row 4L in source 2 to 16-bit
+
+ vmul.s16 q6, q6, d2[0] @weight 1 mult. for row 3H
+ vmla.s16 q6, q7, d2[2] @weight 2 mult. for row 3H
+ vmovl.u8 q8, d17 @converting row 4H in source 1 to 16-bit
+ vmovl.u8 q9, d19 @converting row 4H in source 2 to 16-bit
+
+ vmul.s16 q11, q11, d2[0] @weight 1 mult. for row 4L
+ vmla.s16 q11, q3, d2[2] @weight 2 mult. for row 4L
+ vrshl.s16 q10, q10, q0 @rounds off the weighted samples from row 1L
+
+ vmul.s16 q8, q8, d2[0] @weight 1 mult. for row 4H
+ vmla.s16 q8, q9, d2[2] @weight 2 mult. for row 4H
+ vrshl.s16 q2, q2, q0 @rounds off the weighted samples from row 1H
+
+ vrshl.s16 q12, q12, q0 @rounds off the weighted samples from row 2L
+ vaddw.s8 q10, q10, d3 @adding offset for row 1L
+ vrshl.s16 q4, q4, q0 @rounds off the weighted samples from row 2H
+ vaddw.s8 q2, q2, d3 @adding offset for row 1H
+ vrshl.s16 q14, q14, q0 @rounds off the weighted samples from row 3L
+ vaddw.s8 q12, q12, d3 @adding offset for row 2L
+ vrshl.s16 q6, q6, q0 @rounds off the weighted samples from row 3H
+ vaddw.s8 q4, q4, d3 @adding offset for row 2H
+ vrshl.s16 q11, q11, q0 @rounds off the weighted samples from row 4L
+ vaddw.s8 q14, q14, d3 @adding offset for row 3L
+ vrshl.s16 q8, q8, q0 @rounds off the weighted samples from row 4H
+ vaddw.s8 q6, q6, d3 @adding offset for row 3H
+
+ vqmovun.s16 d26, q10 @saturating row 1L to unsigned 8-bit
+ vaddw.s8 q11, q11, d3 @adding offset for row 4L
+ vqmovun.s16 d27, q2 @saturating row 1H to unsigned 8-bit
+ vaddw.s8 q8, q8, d3 @adding offset for row 4H
+
+ vqmovun.s16 d10, q12 @saturating row 2L to unsigned 8-bit
+ vqmovun.s16 d11, q4 @saturating row 2H to unsigned 8-bit
+ vqmovun.s16 d30, q14 @saturating row 3L to unsigned 8-bit
+ vqmovun.s16 d31, q6 @saturating row 3H to unsigned 8-bit
+ vst1.8 {q13}, [r2], r5 @store row 1 in destination
+ vqmovun.s16 d14, q11 @saturating row 4L to unsigned 8-bit
+ vqmovun.s16 d15, q8 @saturating row 4H to unsigned 8-bit
+
+ vst1.8 {q5}, [r2], r5 @store row 2 in destination
+ subs r11, r11, #4 @decrement ht by 4
+ vst1.8 {q15}, [r2], r5 @store row 3 in destination
+ vst1.8 {q7}, [r2], r5 @store row 4 in destination
+
+ bgt loop_16 @if greater than 0 repeat the loop again
+
+end_loops:
+
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, r15} @Reload the registers from sp
+
+
+@*******************************************************************************
+@* @function
+@* ih264_weighted_bi_pred_chroma_a9q()
+@*
+@* @brief
+@* This routine performs the default weighted prediction as described in sec
+@* 8.4.2.3.2 titled "Weighted sample prediction process" for chroma.
+@*
+@* @par Description:
+@* This function gets two ht x wd blocks, calculates the weighted samples,
+@* rounds off, adds offset and stores it in the destination block for U and V.
+@*
+@* @param[in] pu1_src1
+@* UWORD8 Pointer to the buffer containing the input block 1.
+@*
+@* @param[in] pu1_src2
+@* UWORD8 Pointer to the buffer containing the input block 2.
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination where the output block is stored.
+@*
+@* @param[in] src_strd1
+@* Stride of the input buffer 1
+@*
+@* @param[in] src_strd2
+@* Stride of the input buffer 2
+@*
+@* @param[in] dst_strd
+@* Stride of the destination buffer
+@*
+@* @param[in] log_wd
+@* number of bits to be rounded off
+@*
+@* @param[in] wt1
+@* weights for the weighted prediction in U and V
+@*
+@* @param[in] wt2
+@* weights for the weighted prediction in U and V
+@*
+@* @param[in] ofst1
+@* offset 1 used after rounding off for U an dV
+@*
+@* @param[in] ofst2
+@* offset 2 used after rounding off for U and V
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8).
+@*
+@*******************************************************************************
+@*/
+@void ih264_weighted_bi_pred_chroma_a9q(UWORD8 *pu1_src1,
+@ UWORD8 *pu1_src2,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd1,
+@ WORD32 src_strd2,
+@ WORD32 dst_strd,
+@ WORD32 log_wd,
+@ WORD32 wt1,
+@ WORD32 wt2,
+@ WORD32 ofst1,
+@ WORD32 ofst2,
+@ WORD32 ht,
+@ WORD32 wd)
+@
+@**************Variables Vs Registers*****************************************
+@ r0 => pu1_src1
+@ r1 => pu1_src2
+@ r2 => pu1_dst
+@ r3 => src_strd1
+@ [sp] => src_strd2 (r4)
+@ [sp+4] => dst_strd (r5)
+@ [sp+8] => log_wd (r6)
+@ [sp+12] => wt1 (r7)
+@ [sp+16] => wt2 (r8)
+@ [sp+20] => ofst1 (r9)
+@ [sp+24] => ofst2 (r10)
+@ [sp+28] => ht (r11)
+@ [sp+32] => wd (r12)
+@
+
+
+ .global ih264_weighted_bi_pred_chroma_a9q
+
+ih264_weighted_bi_pred_chroma_a9q:
+
+ stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments
+
+ ldr r6, [sp, #48] @Load log_wd in r6
+ ldr r7, [sp, #52] @Load wt1 in r7
+ ldr r8, [sp, #56] @Load wt2 in r8
+ add r6, r6, #1 @r6 = log_wd + 1
+ ldr r9, [sp, #60] @Load ofst1 in r9
+ ldr r10, [sp, #64] @Load ofst2 in r10
+
+ rsb r12, r6, #0 @r12 = -(log_wd + 1)
+ ldr r4, [sp, #40] @Load src_strd2 in r4
+ ldr r5, [sp, #44] @Load dst_strd in r5
+ vdup.16 q0, r12 @Q0 = -(log_wd + 1) (16-bit)
+
+ ldr r11, [sp, #68] @Load ht in r11
+ vdup.32 q1, r7 @Q1 = (wt1_u, wt1_v) (32-bit)
+ ldr r12, [sp, #72] @Load wd in r12
+ vdup.32 q2, r8 @Q2 = (wt2_u, wt2_v) (32-bit)
+ asr r7, r9, #8 @r7 = ofst1_v
+ asr r8, r10, #8 @r8 = ofst2_v
+ vpush {d8-d15}
+ sxtb r9, r9 @sign-extend 8-bit ofst1_u to 32-bit
+ sxtb r10, r10 @sign-extend 8-bit ofst2_u to 32-bit
+ sxtb r7, r7 @sign-extend 8-bit ofst1_v to 32-bit
+ sxtb r8, r8 @sign-extend 8-bit ofst2_v to 32-bit
+
+ add r9, r9, #1 @r9 = ofst1_u + 1
+ add r7, r7, #1 @r7 = ofst1_v + 1
+ add r9, r9, r10 @r9 = ofst1_u + ofst2_u + 1
+ add r7, r7, r8 @r7 = ofst1_v + ofst2_v + 1
+ asr r9, r9, #1 @r9 = ofst_u = (ofst1_u + ofst2_u + 1) >> 1
+ asr r7, r7, #1 @r7 = ofst_v = (ofst1_v + ofst2_v + 1) >> 1
+ cmp r12, #8 @check if wd is 8
+ pkhbt r9, r9, r7, lsl #16 @r9 = {ofst_u(16-bit), ofst_v(16-bit)}
+ vdup.32 q3, r9 @Q3 = {ofst_u(16-bit), ofst_v(16-bit)}
+ beq loop_8_uv @branch if wd is 8
+
+ cmp r12, #4 @check if wd is 4
+ beq loop_4_uv @branch if wd is 4
+
+loop_2_uv: @each iteration processes two rows
+
+ vld1.32 d8[0], [r0], r3 @load row 1 in source 1
+ vld1.32 d8[1], [r0], r3 @load row 2 in source 1
+ vld1.32 d10[0], [r1], r4 @load row 1 in source 2
+ vld1.32 d10[1], [r1], r4 @load row 2 in source 2
+
+ vmovl.u8 q4, d8 @converting rows 1,2 in source 1 to 16-bit
+ vmovl.u8 q5, d10 @converting rows 1,2 in source 2 to 16-bit
+
+ vmul.s16 q4, q4, q1 @weight 1 mult. for rows 1,2
+ vmla.s16 q4, q5, q2 @weight 2 mult. for rows 1,2
+
+ vrshl.s16 q4, q4, q0 @rounds off the weighted samples from rows 1,2
+
+ vadd.s16 q4, q4, q3 @adding offset for rows 1,2
+
+ vqmovun.s16 d8, q4 @saturating rows 1,2 to unsigned 8-bit
+
+ vst1.32 d8[0], [r2], r5 @store row 1 in destination
+ vst1.32 d8[1], [r2], r5 @store row 2 in destination
+
+ subs r11, r11, #2 @decrement ht by 2
+ bgt loop_2_uv @if greater than 0 repeat the loop again
+
+ b end_loops_uv
+
+loop_4_uv: @each iteration processes two rows
+
+ vld1.8 d8, [r0], r3 @load row 1 in source 1
+ vld1.8 d10, [r1], r4 @load row 1 in source 2
+ vmovl.u8 q4, d8 @converting row 1 in source 1 to 16-bit
+ vld1.8 d12, [r0], r3 @load row 2 in source 1
+ vmovl.u8 q5, d10 @converting row 1 in source 2 to 16-bit
+ vld1.8 d14, [r1], r4 @load row 2 in source 2
+
+ vmovl.u8 q6, d12 @converting row 2 in source 1 to 16-bit
+ vmul.s16 q4, q4, q1 @weight 1 mult. for row 1
+ vmla.s16 q4, q5, q2 @weight 2 mult. for row 1
+ vmovl.u8 q7, d14 @converting row 2 in source 2 to 16-bit
+
+ vmul.s16 q6, q6, q1 @weight 1 mult. for row 2
+ vmla.s16 q6, q7, q2 @weight 2 mult. for row 2
+
+ subs r11, r11, #2 @decrement ht by 2
+ vrshl.s16 q4, q4, q0 @rounds off the weighted samples from row 1
+ vrshl.s16 q6, q6, q0 @rounds off the weighted samples from row 2
+ vadd.s16 q4, q4, q3 @adding offset for row 1
+ vadd.s16 q6, q6, q3 @adding offset for row 2
+
+ vqmovun.s16 d8, q4 @saturating row 1 to unsigned 8-bit
+ vqmovun.s16 d12, q6 @saturating row 2 to unsigned 8-bit
+
+ vst1.8 d8, [r2], r5 @store row 1 in destination
+ vst1.8 d12, [r2], r5 @store row 2 in destination
+
+ bgt loop_4_uv @if greater than 0 repeat the loop again
+
+ b end_loops_uv
+
+loop_8_uv: @each iteration processes two rows
+
+ vld1.8 {q4}, [r0], r3 @load row 1 in source 1
+ vld1.8 {q5}, [r1], r4 @load row 1 in source 2
+ vld1.8 {q6}, [r0], r3 @load row 2 in source 1
+ vld1.8 {q7}, [r1], r4 @load row 2 in source 2
+ vmovl.u8 q12, d8 @converting row 1L in source 1 to 16-bit
+ vld1.8 {q8}, [r0], r3 @load row 3 in source 1
+ vld1.8 {q9}, [r1], r4 @load row 3 in source 2
+ vmovl.u8 q13, d10 @converting row 1L in source 2 to 16-bit
+ vld1.8 {q10}, [r0], r3 @load row 4 in source 1
+ vld1.8 {q11}, [r1], r4 @load row 4 in source 2
+
+ vmovl.u8 q4, d9 @converting row 1H in source 1 to 16-bit
+ vmovl.u8 q5, d11 @converting row 1H in source 2 to 16-bit
+
+ vmul.s16 q12, q12, q1 @weight 1 mult. for row 1L
+ vmla.s16 q12, q13, q2 @weight 2 mult. for row 1L
+ vmovl.u8 q14, d12 @converting row 2L in source 1 to 16-bit
+ vmovl.u8 q15, d14 @converting row 2L in source 2 to 16-bit
+
+ vmul.s16 q4, q4, q1 @weight 1 mult. for row 1H
+ vmla.s16 q4, q5, q2 @weight 2 mult. for row 1H
+ vmovl.u8 q6, d13 @converting row 2H in source 1 to 16-bit
+ vmovl.u8 q7, d15 @converting row 2H in source 2 to 16-bit
+
+ vmul.s16 q14, q14, q1 @weight 1 mult. for row 2L
+ vmla.s16 q14, q15, q2 @weight 2 mult. for row 2L
+ vmovl.u8 q13, d16 @converting row 3L in source 1 to 16-bit
+ vmovl.u8 q5, d18 @converting row 3L in source 2 to 16-bit
+
+ vmul.s16 q6, q6, q1 @weight 1 mult. for row 2H
+ vmla.s16 q6, q7, q2 @weight 2 mult. for row 2H
+ vmovl.u8 q8, d17 @converting row 3H in source 1 to 16-bit
+ vmovl.u8 q9, d19 @converting row 3H in source 2 to 16-bit
+
+ vmul.s16 q13, q13, q1 @weight 1 mult. for row 3L
+ vmla.s16 q13, q5, q2 @weight 2 mult. for row 3L
+ vmovl.u8 q15, d20 @converting row 4L in source 1 to 16-bit
+ vmovl.u8 q7, d22 @converting row 4L in source 2 to 16-bit
+
+ vmul.s16 q8, q8, q1 @weight 1 mult. for row 3H
+ vmla.s16 q8, q9, q2 @weight 2 mult. for row 3H
+ vmovl.u8 q10, d21 @converting row 4H in source 1 to 16-bit
+ vmovl.u8 q11, d23 @converting row 4H in source 2 to 16-bit
+
+ vmul.s16 q15, q15, q1 @weight 1 mult. for row 4L
+ vmla.s16 q15, q7, q2 @weight 2 mult. for row 4L
+ vrshl.s16 q12, q12, q0 @rounds off the weighted samples from row 1L
+
+ vmul.s16 q10, q10, q1 @weight 1 mult. for row 4H
+ vmla.s16 q10, q11, q2 @weight 2 mult. for row 4H
+ vrshl.s16 q4, q4, q0 @rounds off the weighted samples from row 1H
+
+ vrshl.s16 q14, q14, q0 @rounds off the weighted samples from row 2L
+ vadd.s16 q12, q12, q3 @adding offset for row 1L
+ vrshl.s16 q6, q6, q0 @rounds off the weighted samples from row 2H
+ vadd.s16 q4, q4, q3 @adding offset for row 1H
+ vrshl.s16 q13, q13, q0 @rounds off the weighted samples from row 3L
+ vadd.s16 q14, q14, q3 @adding offset for row 2L
+ vrshl.s16 q8, q8, q0 @rounds off the weighted samples from row 3H
+ vadd.s16 q6, q6, q3 @adding offset for row 2H
+ vrshl.s16 q15, q15, q0 @rounds off the weighted samples from row 4L
+ vadd.s16 q13, q13, q3 @adding offset for row 3L
+ vrshl.s16 q10, q10, q0 @rounds off the weighted samples from row 4H
+ vadd.s16 q8, q8, q3 @adding offset for row 3H
+
+ vqmovun.s16 d10, q12 @saturating row 1L to unsigned 8-bit
+ vadd.s16 q15, q15, q3 @adding offset for row 4L
+ vqmovun.s16 d11, q4 @saturating row 1H to unsigned 8-bit
+ vadd.s16 q10, q10, q3 @adding offset for row 4H
+
+ vqmovun.s16 d18, q14 @saturating row 2L to unsigned 8-bit
+ vqmovun.s16 d19, q6 @saturating row 2H to unsigned 8-bit
+ vqmovun.s16 d14, q13 @saturating row 3L to unsigned 8-bit
+ vqmovun.s16 d15, q8 @saturating row 3H to unsigned 8-bit
+ vst1.8 {q5}, [r2], r5 @store row 1 in destination
+ vqmovun.s16 d22, q15 @saturating row 4L to unsigned 8-bit
+ vqmovun.s16 d23, q10 @saturating row 4H to unsigned 8-bit
+
+ vst1.8 {q9}, [r2], r5 @store row 2 in destination
+ subs r11, r11, #4 @decrement ht by 4
+ vst1.8 {q7}, [r2], r5 @store row 3 in destination
+ vst1.8 {q11}, [r2], r5 @store row 4 in destination
+
+ bgt loop_8_uv @if greater than 0 repeat the loop again
+
+end_loops_uv:
+
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, r15} @Reload the registers from sp
+
+
diff --git a/common/arm/ih264_weighted_pred_a9q.s b/common/arm/ih264_weighted_pred_a9q.s
new file mode 100755
index 0000000..1ce94d0
--- /dev/null
+++ b/common/arm/ih264_weighted_pred_a9q.s
@@ -0,0 +1,479 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@******************************************************************************
+@* @file
+@* ih264_weighted_pred_a9q.s
+@*
+@* @brief
+@* Contains function definitions for weighted prediction.
+@*
+@* @author
+@* Kaushik Senthoor R
+@*
+@* @par List of Functions:
+@*
+@* - ih264_weighted_pred_luma_a9q()
+@* - ih264_weighted_pred_chroma_a9q()
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@*******************************************************************************
+@* @function
+@* ih264_weighted_pred_luma_a9q()
+@*
+@* @brief
+@* This routine performs the default weighted prediction as described in sec
+@* 8.4.2.3.2 titled "Weighted sample prediction process" for luma.
+@*
+@* @par Description:
+@* This function gets a ht x wd block, calculates the weighted sample, rounds
+@* off, adds offset and stores it in the destination block.
+@*
+@* @param[in] pu1_src:
+@* UWORD8 Pointer to the buffer containing the input block.
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination where the output block is stored.
+@*
+@* @param[in] src_strd
+@* Stride of the input buffer
+@*
+@* @param[in] dst_strd
+@* Stride of the destination buffer
+@*
+@* @param[in] log_wd
+@* number of bits to be rounded off
+@*
+@* @param[in] wt
+@* weight for the weighted prediction
+@*
+@* @param[in] ofst
+@* offset used after rounding off
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16).
+@*
+@*******************************************************************************
+@*/
+@void ih264_weighted_pred_luma_a9q(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 log_wd,
+@ WORD32 wt,
+@ WORD32 ofst,
+@ WORD32 ht,
+@ WORD32 wd)
+@
+@**************Variables Vs Registers*****************************************
+@ r0 => pu1_src
+@ r1 => pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ [sp] => log_wd (r4)
+@ [sp+4] => wt (r5)
+@ [sp+8] => ofst (r6)
+@ [sp+12] => ht (r7)
+@ [sp+16] => wd (r8)
+@
+.text
+.p2align 2
+
+ .global ih264_weighted_pred_luma_a9q
+
+ih264_weighted_pred_luma_a9q:
+
+ stmfd sp!, {r4-r9, r14} @stack stores the values of the arguments
+ ldr r5, [sp, #32] @Load wt
+ ldr r4, [sp, #28] @Load log_wd in r4
+ ldr r6, [sp, #36] @Load ofst
+ ldr r7, [sp, #40] @Load ht
+ ldr r8, [sp, #44] @Load wd
+ vpush {d8-d15}
+
+ vdup.16 d2, r5 @D2 = wt (16-bit)
+ rsb r9, r4, #0 @r9 = -log_wd
+ vdup.8 d3, r6 @D3 = ofst (8-bit)
+ cmp r8, #16 @check if wd is 16
+ vdup.16 q0, r9 @Q0 = -log_wd (16-bit)
+ beq loop_16 @branch if wd is 16
+
+ cmp r8, #8 @check if wd is 8
+ beq loop_8 @branch if wd is 8
+
+loop_4: @each iteration processes four rows
+
+ vld1.32 d4[0], [r0], r2 @load row 1 in source
+ vld1.32 d4[1], [r0], r2 @load row 2 in source
+ vld1.32 d6[0], [r0], r2 @load row 3 in source
+ vld1.32 d6[1], [r0], r2 @load row 4 in source
+
+ vmovl.u8 q2, d4 @converting rows 1,2 to 16-bit
+ vmovl.u8 q3, d6 @converting rows 3,4 to 16-bit
+
+ vmul.s16 q2, q2, d2[0] @weight mult. for rows 1,2
+ vmul.s16 q3, q3, d2[0] @weight mult. for rows 3,4
+
+ subs r7, r7, #4 @decrement ht by 4
+ vrshl.s16 q2, q2, q0 @rounds off the weighted samples from rows 1,2
+ vrshl.s16 q3, q3, q0 @rounds off the weighted samples from rows 3,4
+
+ vaddw.s8 q2, q2, d3 @adding offset for rows 1,2
+ vaddw.s8 q3, q3, d3 @adding offset for rows 3,4
+
+ vqmovun.s16 d4, q2 @saturating rows 1,2 to unsigned 8-bit
+ vqmovun.s16 d6, q3 @saturating rows 3,4 to unsigned 8-bit
+
+ vst1.32 d4[0], [r1], r3 @store row 1 in destination
+ vst1.32 d4[1], [r1], r3 @store row 2 in destination
+ vst1.32 d6[0], [r1], r3 @store row 3 in destination
+ vst1.32 d6[1], [r1], r3 @store row 4 in destination
+
+ bgt loop_4 @if greater than 0 repeat the loop again
+
+ b end_loops
+
+loop_8: @each iteration processes four rows
+
+ vld1.8 d4, [r0], r2 @load row 1 in source
+ vld1.8 d6, [r0], r2 @load row 2 in source
+ vld1.8 d8, [r0], r2 @load row 3 in source
+ vmovl.u8 q2, d4 @converting row 1 to 16-bit
+ vld1.8 d10, [r0], r2 @load row 4 in source
+ vmovl.u8 q3, d6 @converting row 2 to 16-bit
+
+ vmovl.u8 q4, d8 @converting row 3 to 16-bit
+ vmul.s16 q2, q2, d2[0] @weight mult. for row 1
+ vmovl.u8 q5, d10 @converting row 4 to 16-bit
+ vmul.s16 q3, q3, d2[0] @weight mult. for row 2
+ vmul.s16 q4, q4, d2[0] @weight mult. for row 3
+ vmul.s16 q5, q5, d2[0] @weight mult. for row 4
+
+ vrshl.s16 q2, q2, q0 @rounds off the weighted samples from row 1
+ vrshl.s16 q3, q3, q0 @rounds off the weighted samples from row 2
+ vrshl.s16 q4, q4, q0 @rounds off the weighted samples from row 3
+ vaddw.s8 q2, q2, d3 @adding offset for row 1
+ vrshl.s16 q5, q5, q0 @rounds off the weighted samples from row 4
+ vaddw.s8 q3, q3, d3 @adding offset for row 2
+
+ vaddw.s8 q4, q4, d3 @adding offset for row 3
+ vqmovun.s16 d4, q2 @saturating row 1 to unsigned 8-bit
+ vaddw.s8 q5, q5, d3 @adding offset for row 4
+ vqmovun.s16 d6, q3 @saturating row 2 to unsigned 8-bit
+ vqmovun.s16 d8, q4 @saturating row 3 to unsigned 8-bit
+ vqmovun.s16 d10, q5 @saturating row 4 to unsigned 8-bit
+
+ vst1.8 d4, [r1], r3 @store row 1 in destination
+ vst1.8 d6, [r1], r3 @store row 2 in destination
+ subs r7, r7, #4 @decrement ht by 4
+ vst1.8 d8, [r1], r3 @store row 3 in destination
+ vst1.8 d10, [r1], r3 @store row 4 in destination
+
+ bgt loop_8 @if greater than 0 repeat the loop again
+
+ b end_loops
+
+loop_16: @each iteration processes two rows
+
+ vld1.8 {q2}, [r0], r2 @load row 1 in source
+ vld1.8 {q3}, [r0], r2 @load row 2 in source
+ vmovl.u8 q6, d4 @converting row 1L to 16-bit
+ vld1.8 {q4}, [r0], r2 @load row 3 in source
+ vmovl.u8 q7, d5 @converting row 1H to 16-bit
+ vld1.8 {q5}, [r0], r2 @load row 4 in source
+
+ vmovl.u8 q8, d6 @converting row 2L to 16-bit
+ vmul.s16 q6, q6, d2[0] @weight mult. for row 1L
+ vmovl.u8 q9, d7 @converting row 2H to 16-bit
+ vmul.s16 q7, q7, d2[0] @weight mult. for row 1H
+ vmovl.u8 q10, d8 @converting row 3L to 16-bit
+ vmul.s16 q8, q8, d2[0] @weight mult. for row 2L
+ vmovl.u8 q11, d9 @converting row 3H to 16-bit
+ vmul.s16 q9, q9, d2[0] @weight mult. for row 2H
+ vmovl.u8 q12, d10 @converting row 4L to 16-bit
+ vmul.s16 q10, q10, d2[0] @weight mult. for row 3L
+ vmovl.u8 q13, d11 @converting row 4H to 16-bit
+ vmul.s16 q11, q11, d2[0] @weight mult. for row 3H
+
+ vmul.s16 q12, q12, d2[0] @weight mult. for row 4L
+ vrshl.s16 q6, q6, q0 @rounds off the weighted samples from row 1L
+ vmul.s16 q13, q13, d2[0] @weight mult. for row 4H
+
+ vrshl.s16 q7, q7, q0 @rounds off the weighted samples from row 1H
+ vrshl.s16 q8, q8, q0 @rounds off the weighted samples from row 2L
+ vaddw.s8 q6, q6, d3 @adding offset for row 1L
+ vrshl.s16 q9, q9, q0 @rounds off the weighted samples from row 2H
+ vaddw.s8 q7, q7, d3 @adding offset for row 1H
+ vqmovun.s16 d4, q6 @saturating row 1L to unsigned 8-bit
+ vrshl.s16 q10, q10, q0 @rounds off the weighted samples from row 3L
+ vaddw.s8 q8, q8, d3 @adding offset for row 2L
+ vqmovun.s16 d5, q7 @saturating row 1H to unsigned 8-bit
+ vrshl.s16 q11, q11, q0 @rounds off the weighted samples from row 3H
+ vaddw.s8 q9, q9, d3 @adding offset for row 2H
+ vqmovun.s16 d6, q8 @saturating row 2L to unsigned 8-bit
+ vrshl.s16 q12, q12, q0 @rounds off the weighted samples from row 4L
+ vaddw.s8 q10, q10, d3 @adding offset for row 3L
+ vqmovun.s16 d7, q9 @saturating row 2H to unsigned 8-bit
+ vrshl.s16 q13, q13, q0 @rounds off the weighted samples from row 4H
+ vaddw.s8 q11, q11, d3 @adding offset for row 3H
+
+ vqmovun.s16 d8, q10 @saturating row 3L to unsigned 8-bit
+ vaddw.s8 q12, q12, d3 @adding offset for row 4L
+ vqmovun.s16 d9, q11 @saturating row 3H to unsigned 8-bit
+ vaddw.s8 q13, q13, d3 @adding offset for row 4H
+
+ vqmovun.s16 d10, q12 @saturating row 4L to unsigned 8-bit
+ vst1.8 {q2}, [r1], r3 @store row 1 in destination
+ vqmovun.s16 d11, q13 @saturating row 4H to unsigned 8-bit
+ vst1.8 {q3}, [r1], r3 @store row 2 in destination
+ subs r7, r7, #4 @decrement ht by 4
+ vst1.8 {q4}, [r1], r3 @store row 3 in destination
+ vst1.8 {q5}, [r1], r3 @store row 4 in destination
+
+ bgt loop_16 @if greater than 0 repeat the loop again
+
+end_loops:
+
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r9, r15} @Reload the registers from sp
+
+
+@*******************************************************************************
+@* @function
+@* ih264_weighted_pred_chroma_a9q()
+@*
+@* @brief
+@* This routine performs the default weighted prediction as described in sec
+@* 8.4.2.3.2 titled "Weighted sample prediction process" for chroma.
+@*
+@* @par Description:
+@* This function gets a ht x wd block, calculates the weighted sample, rounds
+@* off, adds offset and stores it in the destination block for U and V.
+@*
+@* @param[in] pu1_src:
+@* UWORD8 Pointer to the buffer containing the input block.
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination where the output block is stored.
+@*
+@* @param[in] src_strd
+@* Stride of the input buffer
+@*
+@* @param[in] dst_strd
+@* Stride of the destination buffer
+@*
+@* @param[in] log_wd
+@* number of bits to be rounded off
+@*
+@* @param[in] wt
+@* weights for the weighted prediction for U and V
+@*
+@* @param[in] ofst
+@* offsets used after rounding off for U and V
+@*
+@* @param[in] ht
+@* integer height of the array
+@*
+@* @param[in] wd
+@* integer width of the array
+@*
+@* @returns
+@* None
+@*
+@* @remarks
+@* (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8).
+@*
+@*******************************************************************************
+@*/
+@void ih264_weighted_pred_chroma_a9q(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 log_wd,
+@ WORD32 wt,
+@ WORD32 ofst,
+@ WORD32 ht,
+@ WORD32 wd)
+@
+@**************Variables Vs Registers*****************************************
+@ r0 => pu1_src
+@ r1 => pu1_dst
+@ r2 => src_strd
+@ r3 => dst_strd
+@ [sp] => log_wd (r4)
+@ [sp+4] => wt (r5)
+@ [sp+8] => ofst (r6)
+@ [sp+12] => ht (r7)
+@ [sp+16] => wd (r8)
+@
+
+
+ .global ih264_weighted_pred_chroma_a9q
+
+ih264_weighted_pred_chroma_a9q:
+
+ stmfd sp!, {r4-r9, r14} @stack stores the values of the arguments
+
+ ldr r4, [sp, #28] @Load log_wd in r4
+ ldr r5, [sp, #32] @Load wt = {wt_u (16-bit), wt_v (16-bit)}
+ ldr r6, [sp, #36] @Load ofst = {ofst_u (8-bit), ofst_v (8-bit)}
+ ldr r8, [sp, #44] @Load wd
+
+ rsb r9, r4, #0 @r9 = -log_wd
+ vdup.32 q1, r5 @Q1 = {wt_u (16-bit), wt_v (16-bit)}
+ ldr r7, [sp, #40] @Load ht
+ vpush {d8-d15}
+ vdup.16 d4, r6 @D4 = {ofst_u (8-bit), ofst_v (8-bit)}
+ cmp r8, #8 @check if wd is 8
+ vdup.16 q0, r9 @Q0 = -log_wd (16-bit)
+ beq loop_8_uv @branch if wd is 8
+
+ cmp r8, #4 @check if ws is 4
+ beq loop_4_uv @branch if wd is 4
+
+loop_2_uv: @each iteration processes two rows
+
+ vld1.32 d6[0], [r0], r2 @load row 1 in source
+ vld1.32 d6[1], [r0], r2 @load row 2 in source
+
+ vmovl.u8 q3, d6 @converting rows 1,2 to 16-bit
+
+ vmul.s16 q3, q3, q1 @weight mult. for rows 1,2
+
+ vrshl.s16 q3, q3, q0 @rounds off the weighted samples from rows 1,2
+
+ vaddw.s8 q3, q3, d4 @adding offset for rows 1,2
+
+ vqmovun.s16 d6, q3 @saturating rows 1,2 to unsigned 8-bit
+
+ subs r7, r7, #2 @decrement ht by 2
+ vst1.32 d6[0], [r1], r3 @store row 1 in destination
+ vst1.32 d6[1], [r1], r3 @store row 2 in destination
+
+ bgt loop_2_uv @if greater than 0 repeat the loop again
+
+ b end_loops_uv
+
+loop_4_uv: @each iteration processes two rows
+
+ vld1.8 d6, [r0], r2 @load row 1 in source
+ vld1.8 d8, [r0], r2 @load row 2 in source
+
+ vmovl.u8 q3, d6 @converting row 1 to 16-bit
+ vmovl.u8 q4, d8 @converting row 2 to 16-bit
+
+ vmul.s16 q3, q3, q1 @weight mult. for row 1
+ vmul.s16 q4, q4, q1 @weight mult. for row 2
+
+ subs r7, r7, #2 @decrement ht by 2
+ vrshl.s16 q3, q3, q0 @rounds off the weighted samples from row 1
+ vrshl.s16 q4, q4, q0 @rounds off the weighted samples from row 2
+
+ vaddw.s8 q3, q3, d4 @adding offset for row 1
+ vaddw.s8 q4, q4, d4 @adding offset for row 2
+
+ vqmovun.s16 d6, q3 @saturating row 1 to unsigned 8-bit
+ vqmovun.s16 d8, q4 @saturating row 2 to unsigned 8-bit
+
+ vst1.8 d6, [r1], r3 @store row 1 in destination
+ vst1.8 d8, [r1], r3 @store row 2 in destination
+
+ bgt loop_4_uv @if greater than 0 repeat the loop again
+
+ b end_loops_uv
+
+loop_8_uv: @each iteration processes two rows
+
+ vld1.8 {q3}, [r0], r2 @load row 1 in source
+ vld1.8 {q4}, [r0], r2 @load row 2 in source
+ vmovl.u8 q7, d6 @converting row 1L to 16-bit
+ vld1.8 {q5}, [r0], r2 @load row 3 in source
+ vmovl.u8 q8, d7 @converting row 1H to 16-bit
+ vld1.8 {q6}, [r0], r2 @load row 4 in source
+
+ vmul.s16 q7, q7, q1 @weight mult. for row 1L
+ vmovl.u8 q9, d8 @converting row 2L to 16-bit
+ vmul.s16 q8, q8, q1 @weight mult. for row 1H
+ vmovl.u8 q10, d9 @converting row 2H to 16-bit
+ vmul.s16 q9, q9, q1 @weight mult. for row 2L
+ vmovl.u8 q11, d10 @converting row 3L to 16-bit
+ vmul.s16 q10, q10, q1 @weight mult. for row 2H
+ vmovl.u8 q12, d11 @converting row 3H to 16-bit
+ vmul.s16 q11, q11, q1 @weight mult. for row 3L
+ vmovl.u8 q13, d12 @converting row 4L to 16-bit
+ vmul.s16 q12, q12, q1 @weight mult. for row 3H
+ vmovl.u8 q14, d13 @converting row 4H to 16-bit
+
+ vmul.s16 q13, q13, q1 @weight mult. for row 4L
+ vrshl.s16 q7, q7, q0 @rounds off the weighted samples from row 1L
+ vmul.s16 q14, q14, q1 @weight mult. for row 4H
+
+ vrshl.s16 q8, q8, q0 @rounds off the weighted samples from row 1H
+ vrshl.s16 q9, q9, q0 @rounds off the weighted samples from row 2L
+ vaddw.s8 q7, q7, d4 @adding offset for row 1L
+ vrshl.s16 q10, q10, q0 @rounds off the weighted samples from row 2H
+ vaddw.s8 q8, q8, d4 @adding offset for row 1H
+ vqmovun.s16 d6, q7 @saturating row 1L to unsigned 8-bit
+ vrshl.s16 q11, q11, q0 @rounds off the weighted samples from row 3L
+ vaddw.s8 q9, q9, d4 @adding offset for row 2L
+ vqmovun.s16 d7, q8 @saturating row 1H to unsigned 8-bit
+ vrshl.s16 q12, q12, q0 @rounds off the weighted samples from row 3H
+ vaddw.s8 q10, q10, d4 @adding offset for row 2H
+ vqmovun.s16 d8, q9 @saturating row 2L to unsigned 8-bit
+ vrshl.s16 q13, q13, q0 @rounds off the weighted samples from row 4L
+ vaddw.s8 q11, q11, d4 @adding offset for row 3L
+ vqmovun.s16 d9, q10 @saturating row 2H to unsigned 8-bit
+ vrshl.s16 q14, q14, q0 @rounds off the weighted samples from row 4H
+ vaddw.s8 q12, q12, d4 @adding offset for row 3H
+
+ vqmovun.s16 d10, q11 @saturating row 3L to unsigned 8-bit
+ vaddw.s8 q13, q13, d4 @adding offset for row 4L
+ vqmovun.s16 d11, q12 @saturating row 3H to unsigned 8-bit
+ vaddw.s8 q14, q14, d4 @adding offset for row 4H
+
+ vqmovun.s16 d12, q13 @saturating row 4L to unsigned 8-bit
+ vst1.8 {q3}, [r1], r3 @store row 1 in destination
+ vqmovun.s16 d13, q14 @saturating row 4H to unsigned 8-bit
+ vst1.8 {q4}, [r1], r3 @store row 2 in destination
+ subs r7, r7, #4 @decrement ht by 4
+ vst1.8 {q5}, [r1], r3 @store row 3 in destination
+ vst1.8 {q6}, [r1], r3 @store row 4 in destination
+
+ bgt loop_8_uv @if greater than 0 repeat the loop again
+
+end_loops_uv:
+
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r9, r15} @Reload the registers from sp
+
+
diff --git a/common/armv8/ih264_deblk_chroma_av8.s b/common/armv8/ih264_deblk_chroma_av8.s
new file mode 100755
index 0000000..3021556
--- /dev/null
+++ b/common/armv8/ih264_deblk_chroma_av8.s
@@ -0,0 +1,585 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///*****************************************************************************/
+///* */
+///* File Name : ih264_deblk_chroma_av8.s */
+///* */
+///* Description : Contains function definitions for deblocking luma */
+///* edge. Functions are coded in NEON assembly and can */
+///* be compiled using ARM RVDS. */
+///* */
+///* List of Functions : ih264_deblk_chroma_vert_bs4_av8() */
+///* ih264_deblk_chroma_vert_bslt4_av8() */
+///* ih264_deblk_chroma_horz_bs4_av8() */
+///* ih264_deblk_chroma_horz_bslt4_av8() */
+///* Issues / Problems : None */
+///* */
+///* Revision History : */
+///* */
+///* DD MM YYYY Author(s) Changes (Describe the changes made) */
+///* 28 11 2013 Ittiam Draft */
+///*****************************************************************************/
+
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* Performs filtering of a chroma block horizontal edge when the
+//* boundary strength is set to 4 in high profile
+//*
+//* @par Description:
+//* This operation is described in Sec. 8.7.2.4 under the title
+//* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+//*
+//* @param[in] x0 - pu1_src
+//* Pointer to the src sample q0
+//*
+//* @param[in] x1 - src_strd
+//* Source stride
+//*
+//* @param[in] x2 - alpha_cb
+//* Alpha Value for the boundary in U
+//*
+//* @param[in] x3 - beta_cb
+//* Beta Value for the boundary in U
+//*
+//* @param[in] sp(0) - alpha_cr
+//* Alpha Value for the boundary in V
+//*
+//* @param[in] sp(4) - beta_cr
+//* Beta Value for the boundary in V
+//*
+//* @returns
+//* None
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+ .global ih264_deblk_chroma_horz_bs4_av8
+
+ih264_deblk_chroma_horz_bs4_av8:
+
+ // STMFD sp!,{x4-x6,x14} //
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+ mov x6, x5
+ mov x5, x4
+ sub x0, x0, x1, lsl #1 //x0 = uc_edgePixel pointing to p1 of chroma
+ ld2 {v6.8b, v7.8b}, [x0], x1 //D6 = p1u , D7 = p1v
+ mov x4, x0 //Keeping a backup of the pointer p0 of chroma
+ ld2 {v4.8b, v5.8b}, [x0], x1 //D4 = p0u , D5 = p0v
+ dup v20.8b, w2 //D20 contains alpha_cb
+ dup v21.8b, w5 //D21 contains alpha_cr
+ mov v20.d[1], v21.d[0]
+ ld2 {v0.8b, v1.8b}, [x0], x1 //D0 = q0u , D1 = q0v
+ uaddl v8.8h, v6.8b, v0.8b //
+ uaddl v10.8h, v7.8b, v1.8b //Q4,Q5 = q0 + p1
+ movi v31.8b, #2 //
+ ld2 {v2.8b, v3.8b}, [x0] //D2 = q1u , D3 = q1v
+ mov v0.d[1], v1.d[0]
+ mov v2.d[1], v3.d[0]
+ mov v4.d[1], v5.d[0]
+ mov v6.d[1], v7.d[0]
+ uabd v26.16b, v6.16b , v4.16b //Q13 = ABS(p1 - p0)
+ umlal v8.8h, v2.8b, v31.8b //
+ umlal v10.8h, v3.8b, v31.8b //Q5,Q4 = (X2(q1U) + q0U + p1U)
+ uabd v22.16b, v4.16b , v0.16b //Q11 = ABS(p0 - q0)
+ uabd v24.16b, v2.16b , v0.16b //Q12 = ABS(q1 - q0)
+ uaddl v14.8h, v4.8b, v2.8b //
+ uaddl v28.8h, v5.8b, v3.8b //Q14,Q7 = P0 + Q1
+ dup v16.8b, w3 //D16 contains beta_cb
+ dup v17.8b, w6 //D17 contains beta_cr
+ mov v16.d[1], v17.d[0]
+ umlal v14.8h, v6.8b, v31.8b //
+ umlal v28.8h, v7.8b, v31.8b //Q14,Q7 = (X2(p1U) + p0U + q1U)
+ cmhs v18.16b, v22.16b, v20.16b
+ cmhs v24.16b, v24.16b, v16.16b
+ cmhs v26.16b, v26.16b, v16.16b
+ rshrn v8.8b, v8.8h, #2 //
+ rshrn v9.8b, v10.8h, #2 //Q4 = (X2(q1U) + q0U + p1U + 2) >> 2
+ mov v8.d[1], v9.d[0]
+ orr v18.16b, v18.16b , v24.16b //Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta )
+ rshrn v10.8b, v14.8h, #2 //
+ rshrn v11.8b, v28.8h, #2 //Q5 = (X2(p1U) + p0U + q1U + 2) >> 2
+ mov v10.d[1], v11.d[0]
+ orr v18.16b, v18.16b , v26.16b //Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta )
+ bit v10.16b, v4.16b , v18.16b //
+ bit v8.16b, v0.16b , v18.16b //
+ mov v11.d[0], v10.d[1]
+ mov v9.d[0], v8.d[1]
+ st2 {v10.8b, v11.8b}, [x4], x1 //
+ st2 {v8.8b, v9.8b}, [x4] //
+ // LDMFD sp!,{x4-x6,pc} //
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* Performs filtering of a chroma block vertical edge when the
+//* boundary strength is set to 4 in high profile
+//*
+//* @par Description:
+//* This operation is described in Sec. 8.7.2.4 under the title
+//* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+//*
+//* @param[in] x0 - pu1_src
+//* Pointer to the src sample q0
+//*
+//* @param[in] x1 - src_strd
+//* Source stride
+//*
+//* @param[in] x2 - alpha_cb
+//* Alpha Value for the boundary in U
+//*
+//* @param[in] x3 - beta_cb
+//* Beta Value for the boundary in U
+//*
+//* @param[in] sp(0) - alpha_cr
+//* Alpha Value for the boundary in V
+//*
+//* @param[in] sp(4) - beta_cr
+//* Beta Value for the boundary in V
+//*
+//* @returns
+//* None
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+ .global ih264_deblk_chroma_vert_bs4_av8
+
+ih264_deblk_chroma_vert_bs4_av8:
+
+ // STMFD sp!,{x4,x5,x12,x14}
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ sub x0, x0, #4 //point x0 to p1u of row0.
+ mov x12, x0 //keep a back up of x0 for buffer write
+
+ add x2, x2, x4, lsl #8 //x2 = (alpha_cr,alpha_cb)
+ add x3, x3, x5, lsl #8 //x3 = (beta_cr,beta_cb)
+
+ ld4 {v0.h, v1.h, v2.h, v3.h}[0], [x0], x1
+ ld4 {v0.h, v1.h, v2.h, v3.h}[1], [x0], x1
+ ld4 {v0.h, v1.h, v2.h, v3.h}[2], [x0], x1
+ ld4 {v0.h, v1.h, v2.h, v3.h}[3], [x0], x1
+
+ ld4 {v4.h, v5.h, v6.h, v7.h}[0], [x0], x1
+ ld4 {v4.h, v5.h, v6.h, v7.h}[1], [x0], x1
+ ld4 {v4.h, v5.h, v6.h, v7.h}[2], [x0], x1
+ ld4 {v4.h, v5.h, v6.h, v7.h}[3], [x0], x1
+
+ mov v10.16b, v2.16b
+ mov v2.16b, v1.16b
+ mov v1.16b, v4.16b
+ mov v4.16b, v10.16b
+ mov v10.16b, v6.16b
+ mov v6.16b, v3.16b
+ mov v3.16b, v5.16b
+ mov v5.16b, v10.16b
+
+ dup v22.8h, w2 //Q11 = alpha
+ dup v24.8h, w3 //Q12 = beta
+ movi v31.8b, #2
+
+ mov v0.d[1], v1.d[0]
+ mov v2.d[1], v3.d[0]
+ mov v4.d[1], v5.d[0]
+ mov v6.d[1], v7.d[0]
+
+ uabd v8.16b, v2.16b , v4.16b //|p0-q0|
+ uabd v10.16b, v6.16b , v4.16b //|q1-q0|
+ uabd v12.16b, v0.16b , v2.16b //|p1-p0|
+ uaddl v14.8h, v2.8b, v6.8b
+ uaddl v16.8h, v3.8b, v7.8b //(p0 + q1)
+ cmhi v8.16b, v22.16b , v8.16b //|p0-q0| < alpha ?
+ cmhi v10.16b, v24.16b , v10.16b //|q1-q0| < beta ?
+ cmhi v12.16b, v24.16b , v12.16b //|p1-p0| < beta ?
+ umlal v14.8h, v0.8b, v31.8b
+ umlal v16.8h, v1.8b, v31.8b //2*p1 + (p0 + q1)
+ uaddl v18.8h, v0.8b, v4.8b
+ uaddl v20.8h, v1.8b, v5.8b //(p1 + q0)
+ and v8.16b, v8.16b , v10.16b //|p0-q0| < alpha && |q1-q0| < beta
+ umlal v18.8h, v6.8b, v31.8b
+ umlal v20.8h, v7.8b, v31.8b //2*q1 + (p1 + q0)
+
+ rshrn v14.8b, v14.8h, #2
+ rshrn v15.8b, v16.8h, #2 //(2*p1 + (p0 + q1) + 2) >> 2
+ mov v14.d[1], v15.d[0]
+ and v8.16b, v8.16b , v12.16b //|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta
+ rshrn v18.8b, v18.8h, #2
+ rshrn v19.8b, v20.8h, #2 //(2*q1 + (p1 + q0) + 2) >> 2
+ mov v18.d[1], v19.d[0]
+ bit v2.16b, v14.16b , v8.16b
+ bit v4.16b, v18.16b , v8.16b
+
+ mov v1.d[0], v0.d[1]
+ mov v3.d[0], v2.d[1]
+ mov v5.d[0], v4.d[1]
+ mov v7.d[0], v6.d[1]
+
+ mov v10.16b, v1.16b
+ mov v1.16b, v2.16b
+ mov v2.16b, v4.16b
+ mov v4.16b, v10.16b
+ mov v10.16b, v3.16b
+ mov v3.16b, v6.16b
+ mov v6.16b, v5.16b
+ mov v5.16b, v10.16b
+
+ st4 {v0.h, v1.h, v2.h, v3.h}[0], [x12], x1
+ st4 {v0.h, v1.h, v2.h, v3.h}[1], [x12], x1
+ st4 {v0.h, v1.h, v2.h, v3.h}[2], [x12], x1
+ st4 {v0.h, v1.h, v2.h, v3.h}[3], [x12], x1
+
+ st4 {v4.h, v5.h, v6.h, v7.h}[0], [x12], x1
+ st4 {v4.h, v5.h, v6.h, v7.h}[1], [x12], x1
+ st4 {v4.h, v5.h, v6.h, v7.h}[2], [x12], x1
+ st4 {v4.h, v5.h, v6.h, v7.h}[3], [x12], x1
+
+ // LDMFD sp!,{x4,x5,x12,pc}
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* Performs filtering of a chroma block horizontal edge for cases where the
+//* boundary strength is less than 4 in high profile
+//*
+//* @par Description:
+//* This operation is described in Sec. 8.7.2.4 under the title
+//* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+//*
+//* @param[in] x0 - pu1_src
+//* Pointer to the src sample q0
+//*
+//* @param[in] x1 - src_strd
+//* Source stride
+//*
+//* @param[in] x2 - alpha_cb
+//* Alpha Value for the boundary in U
+//*
+//* @param[in] x3 - beta_cb
+//* Beta Value for the boundary in U
+//*
+//* @param[in] sp(0) - alpha_cr
+//* Alpha Value for the boundary in V
+//*
+//* @param[in] sp(4) - beta_cr
+//* Beta Value for the boundary in V
+//*
+//* @param[in] sp(8) - u4_bs
+//* Packed Boundary strength array
+//*
+//* @param[in] sp(12) - pu1_cliptab_cb
+//* tc0_table for U
+//*
+//* @param[in] sp(16) - pu1_cliptab_cr
+//* tc0_table for V
+//*
+//* @returns
+//* None
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+ .global ih264_deblk_chroma_horz_bslt4_av8
+
+ih264_deblk_chroma_horz_bslt4_av8:
+
+ // STMFD sp!,{x4-x9,x14} //
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+ mov x8, x7
+ mov x7, x6
+ ldr x9, [sp, #80]
+ sub x0, x0, x1, lsl #1 //x0 = uc_edgePixelU pointing to p1 of chroma U
+ rev w7, w7 //
+ mov v12.2s[0], w7 //D12[0] = ui_Bs
+ ld1 {v16.s}[0], [x8] //D16[0] contains cliptab_cb
+ ld1 {v17.s}[0], [x9] //D17[0] contains cliptab_cr
+ ld2 {v6.8b, v7.8b}, [x0], x1 //Q3=p1
+ tbl v14.8b, {v16.16b}, v12.8b //Retreiving cliptab values for U
+ tbl v28.8b, {v17.16b}, v12.8b //Retrieving cliptab values for V
+ uxtl v12.8h, v12.8b //Q6 = uc_Bs in each 16 bit scalar
+ mov x6, x0 //Keeping a backup of the pointer to chroma U P0
+ ld2 {v4.8b, v5.8b}, [x0], x1 //Q2=p0
+ movi v30.8b, #1 //
+ dup v20.8b, w2 //D20 contains alpha_cb
+ dup v21.8b, w4 //D21 contains alpha_cr
+ mov v20.d[1], v21.d[0]
+ ld2 {v0.8b, v1.8b}, [x0], x1 //Q0=q0
+ uxtl v14.8h, v14.8b //
+ uxtl v28.8h, v28.8b //
+ mov v15.d[0], v28.d[0] //D14 has cliptab values for U, D15 for V
+ mov v14.d[1], v28.d[0]
+ ld2 {v2.8b, v3.8b}, [x0] //Q1=q1
+ usubl v10.8h, v1.8b, v5.8b //
+ usubl v8.8h, v0.8b, v4.8b //Q5,Q4 = (q0 - p0)
+ mov v6.d[1], v7.d[0]
+ mov v4.d[1], v5.d[0]
+ uabd v26.16b, v6.16b , v4.16b //Q13 = ABS(p1 - p0)
+ shl v10.8h, v10.8h, #2 //Q5 = (q0 - p0)<<2
+ mov v0.d[1], v1.d[0]
+ uabd v22.16b, v4.16b , v0.16b //Q11 = ABS(p0 - q0)
+ shl v8.8h, v8.8h, #2 //Q4 = (q0 - p0)<<2
+ mov v14.d[1], v15.d[0]
+ sli v14.8h, v14.8h, #8
+ mov v15.d[0], v14.d[1]
+ mov v2.d[1], v3.d[0]
+ uabd v24.16b, v2.16b , v0.16b //Q12 = ABS(q1 - q0)
+ cmhs v18.16b, v22.16b, v20.16b
+ usubl v20.8h, v6.8b, v2.8b //Q10 = (p1 - q1)L
+ usubl v6.8h, v7.8b, v3.8b //Q3 = (p1 - q1)H
+ dup v16.8b, w3 //Q8 contains beta_cb
+ dup v17.8b, w5 //Q8 contains beta_cr
+ mov v16.d[1], v17.d[0]
+ add v8.8h, v8.8h , v20.8h //
+ add v10.8h, v10.8h , v6.8h //Q5,Q4 = [ (q0 - p0)<<2 ] + (p1 - q1)
+ cmhs v24.16b, v24.16b, v16.16b
+ cmgt v12.4h, v12.4h, #0
+ sqrshrn v8.8b, v8.8h, #3 //
+ sqrshrn v9.8b, v10.8h, #3 //Q4 = i_macro = (((q0 - p0)<<2) + (p1 - q1) + 4)>>3
+ mov v8.d[1], v9.d[0]
+ add v14.8b, v14.8b , v30.8b //D14 = C = C0+1 for U
+ cmhs v26.16b, v26.16b, v16.16b
+ orr v18.16b, v18.16b , v24.16b //Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta )
+ abs v6.16b, v8.16b //Q4 = ABS (i_macro)
+ add v15.8b, v15.8b , v30.8b //D15 = C = C0+1 for V
+ mov v14.d[1], v15.d[0]
+ mov v13.8b, v12.8b
+ mov v12.d[1], v13.d[0] //
+ orr v18.16b, v18.16b , v26.16b //Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta )
+ umin v14.16b, v6.16b , v14.16b //Q7 = delta = (ABS(i_macro) > C) ? C : ABS(i_macro)
+ bic v12.16b, v12.16b , v18.16b //final condition
+ cmge v8.16b, v8.16b, #0
+ and v14.16b, v14.16b , v12.16b //Making delta zero in places where values shouldn be filterd
+ uqadd v16.16b, v4.16b , v14.16b //Q8 = p0 + delta
+ uqsub v4.16b, v4.16b , v14.16b //Q2 = p0 - delta
+ uqadd v18.16b, v0.16b , v14.16b //Q9 = q0 + delta
+ uqsub v0.16b, v0.16b , v14.16b //Q0 = q0 - delta
+ bif v16.16b, v4.16b , v8.16b //Q8 = (i_macro >= 0 ) ? (p0+delta) : (p0-delta)
+ bif v0.16b, v18.16b , v8.16b //Q0 = (i_macro >= 0 ) ? (q0-delta) : (q0+delta)
+ mov v17.d[0], v16.d[1]
+ mov v1.d[0], v0.d[1]
+ st2 {v16.8b, v17.8b}, [x6], x1 //
+ st2 {v0.8b, v1.8b}, [x6] //
+
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* Performs filtering of a chroma block vertical edge for cases where the
+//* boundary strength is less than 4 in high profile
+//*
+//* @par Description:
+//* This operation is described in Sec. 8.7.2.4 under the title
+//* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+//*
+//* @param[in] x0 - pu1_src
+//* Pointer to the src sample q0
+//*
+//* @param[in] x1 - src_strd
+//* Source stride
+//*
+//* @param[in] x2 - alpha_cb
+//* Alpha Value for the boundary in U
+//*
+//* @param[in] x3 - beta_cb
+//* Beta Value for the boundary in U
+//*
+//* @param[in] sp(0) - alpha_cr
+//* Alpha Value for the boundary in V
+//*
+//* @param[in] sp(4) - beta_cr
+//* Beta Value for the boundary in V
+//*
+//* @param[in] sp(8) - u4_bs
+//* Packed Boundary strength array
+//*
+//* @param[in] sp(12) - pu1_cliptab_cb
+//* tc0_table for U
+//*
+//* @param[in] sp(16) - pu1_cliptab_cr
+//* tc0_table for V
+//*
+//* @returns
+//* None
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+ .global ih264_deblk_chroma_vert_bslt4_av8
+
+ih264_deblk_chroma_vert_bslt4_av8:
+
+ // STMFD sp!,{x4-x7,x10-x12,x14}
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+ mov x10, x7
+ ldr x11, [sp, #80] //x6 = u4_bs
+ sub x0, x0, #4 //point x0 to p1u of row0.
+ add x2, x2, x4, lsl #8
+ add x3, x3, x5, lsl #8
+ mov x12, x0 //keep a back up of x0 for buffer write
+ ld4 {v0.h, v1.h, v2.h, v3.h}[0], [x0], x1
+ ld4 {v0.h, v1.h, v2.h, v3.h}[1], [x0], x1
+ ld4 {v0.h, v1.h, v2.h, v3.h}[2], [x0], x1
+ ld4 {v0.h, v1.h, v2.h, v3.h}[3], [x0], x1
+
+ ld4 {v4.h, v5.h, v6.h, v7.h}[0], [x0], x1
+ ld4 {v4.h, v5.h, v6.h, v7.h}[1], [x0], x1
+ ld4 {v4.h, v5.h, v6.h, v7.h}[2], [x0], x1
+ ld4 {v4.h, v5.h, v6.h, v7.h}[3], [x0], x1
+
+ mov v10.16b, v2.16b
+ mov v2.16b, v1.16b
+ mov v1.16b, v4.16b
+ mov v4.16b, v10.16b
+ mov v10.16b, v6.16b
+ mov v6.16b, v3.16b
+ mov v3.16b, v5.16b
+ mov v5.16b, v10.16b
+ dup v22.8h, w2 //Q11 = alpha
+ mov v2.d[1], v3.d[0]
+ mov v4.d[1], v5.d[0]
+ uabd v8.16b, v2.16b , v4.16b //|p0-q0|
+ dup v24.8h, w3 //Q12 = beta
+ mov v25.d[0], v24.d[1]
+ mov v6.d[1], v7.d[0]
+ mov v0.d[1], v1.d[0]
+ uabd v10.16b, v6.16b , v4.16b //|q1-q0|
+ uabd v12.16b, v0.16b , v2.16b //|p1-p0|
+ cmhi v8.16b, v22.16b , v8.16b //|p0-q0| < alpha ?
+ usubl v14.8h, v0.8b, v6.8b
+ cmhi v10.16b, v24.16b , v10.16b //|q1-q0| < beta ?
+ usubl v16.8h, v1.8b, v7.8b //(p1 - q1)
+ cmhi v12.16b, v24.16b , v12.16b //|p1-p0| < beta ?
+ usubl v18.8h, v4.8b, v2.8b
+ and v8.16b, v8.16b , v10.16b //|p0-q0| < alpha && |q1-q0| < beta
+ usubl v20.8h, v5.8b, v3.8b //(q0 - p0)
+ movi v28.8h, #4
+ ld1 {v24.s}[0], [x10] //Load ClipTable for U
+ ld1 {v25.s}[0], [x11] //Load ClipTable for V
+ rev w6, w6 //Blocking strengths
+ and v8.16b, v8.16b , v12.16b //|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta
+ mov v10.s[0], w6
+ mla v14.8h, v18.8h , v28.8h
+ mla v16.8h, v20.8h , v28.8h //4*(q0 - p0) + (p1 - q1)
+ uxtl v10.8h, v10.8b
+ sli v10.4h, v10.4h, #8
+ tbl v12.8b, {v24.16b}, v10.8b //tC0 for U
+ tbl v13.8b, {v25.16b}, v10.8b //tC0 for V
+ zip1 v31.8b, v12.8b, v13.8b
+ zip2 v13.8b, v12.8b, v13.8b
+ mov v12.8b, v31.8b
+ mov v12.d[1], v13.d[0]
+ uxtl v10.4s, v10.4h
+ sli v10.4s, v10.4s, #16
+ movi v24.16b, #1
+ add v12.16b, v12.16b , v24.16b //tC0 + 1
+ cmhs v10.16b, v10.16b , v24.16b
+ and v8.16b, v8.16b , v10.16b //|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0
+ // Q0 - Q3(inputs),
+ // Q4 (|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0),
+ // Q6 (tC)
+ srshr v14.8h, v14.8h, #3
+ srshr v16.8h, v16.8h, #3 //(((q0 - p0) << 2) + (p1 - q1) + 4) >> 3)
+ cmgt v18.8h, v14.8h , #0
+ cmgt v20.8h, v16.8h , #0
+ xtn v18.8b, v18.8h
+ xtn v19.8b, v20.8h //Q9 = sign(delta)
+ mov v18.d[1], v19.d[0]
+ abs v14.8h, v14.8h
+ abs v16.8h, v16.8h
+ xtn v14.8b, v14.8h
+ xtn v15.8b, v16.8h
+ mov v14.d[1], v15.d[0]
+ umin v14.16b, v14.16b , v12.16b //Q7 = |delta|
+ uqadd v20.16b, v2.16b , v14.16b //p0+|delta|
+ uqadd v22.16b, v4.16b , v14.16b //q0+|delta|
+ uqsub v24.16b, v2.16b , v14.16b //p0-|delta|
+ uqsub v26.16b, v4.16b , v14.16b //q0-|delta|
+ bit v24.16b, v20.16b , v18.16b //p0 + delta
+ bit v22.16b, v26.16b , v18.16b //q0 - delta
+ bit v2.16b, v24.16b , v8.16b
+ bit v4.16b, v22.16b , v8.16b
+ mov v1.d[0], v0.d[1]
+ mov v3.d[0], v2.d[1]
+ mov v5.d[0], v4.d[1]
+ mov v7.d[0], v6.d[1]
+ mov v10.16b, v1.16b
+ mov v1.16b, v2.16b
+ mov v2.16b, v4.16b
+ mov v4.16b, v10.16b
+ mov v10.16b, v3.16b
+ mov v3.16b, v6.16b
+ mov v6.16b, v5.16b
+ mov v5.16b, v10.16b
+ st4 {v0.h, v1.h, v2.h, v3.h}[0], [x12], x1
+ st4 {v0.h, v1.h, v2.h, v3.h}[1], [x12], x1
+ st4 {v0.h, v1.h, v2.h, v3.h}[2], [x12], x1
+ st4 {v0.h, v1.h, v2.h, v3.h}[3], [x12], x1
+
+ st4 {v4.h, v5.h, v6.h, v7.h}[0], [x12], x1
+ st4 {v4.h, v5.h, v6.h, v7.h}[1], [x12], x1
+ st4 {v4.h, v5.h, v6.h, v7.h}[2], [x12], x1
+ st4 {v4.h, v5.h, v6.h, v7.h}[3], [x12], x1
+
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
diff --git a/common/armv8/ih264_deblk_luma_av8.s b/common/armv8/ih264_deblk_luma_av8.s
new file mode 100755
index 0000000..bcdb03f
--- /dev/null
+++ b/common/armv8/ih264_deblk_luma_av8.s
@@ -0,0 +1,1084 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///*****************************************************************************/
+///* */
+///* File Name : ih264_deblk_luma_av8.s */
+///* */
+///* Description : Contains function definitions for deblocking luma */
+///* edge. Functions are coded in NEON assembly and can */
+///* be compiled using ARM RVDS. */
+///* */
+///* List of Functions : ih264_deblk_luma_vert_bs4_av8() */
+///* ih264_deblk_luma_vert_bslt4_av8() */
+///* ih264_deblk_luma_horz_bs4_av8() */
+///* ih264_deblk_luma_horz_bslt4_av8() */
+///* */
+///* Issues / Problems : None */
+///* */
+///* Revision History : */
+///* */
+///* DD MM YYYY Author(s) Changes (Describe the changes made) */
+///* 28 11 2013 Ittiam Draft */
+///* */
+///*****************************************************************************/
+
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+
+
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* Performs filtering of a luma block horizontal edge for cases where the
+//* boundary strength is less than 4
+//*
+//* @par Description:
+//* This operation is described in Sec. 8.7.2.4 under the title
+//* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+//*
+//* @param[in] x0 - pu1_src
+//* Pointer to the src sample q0
+//*
+//* @param[in] x1 - src_strd
+//* Source stride
+//*
+//* @param[in] x2 - alpha
+//* Alpha Value for the boundary
+//*
+//* @param[in] x3 - beta
+//* Beta Value for the boundary
+//*
+//* @param[in] sp(0) - u4_bs
+//* Packed Boundary strength array
+//*
+//* @param[in] sp(4) - pu1_cliptab
+//* tc0_table
+//*
+//* @returns
+//* None
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+ .global ih264_deblk_luma_horz_bslt4_av8
+
+ih264_deblk_luma_horz_bslt4_av8:
+
+ // STMFD sp!,{x4-x7,x14}
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ //LDRD x4,x5,[SP,#0x14] //x4 = ui_Bs , x5 = *puc_ClpTab
+ sub x0, x0, x1, lsl #1 //x1 = uc_Horizonpad
+ sub x0, x0, x1 //x0 pointer to p2
+ rev w4, w4 //
+ ld1 {v10.8b, v11.8b}, [x0], x1 //p2 values are loaded into q5
+ mov v12.2s[0], w4 //d12[0] = ui_Bs
+ mov x6, x0 //keeping backup of pointer to p1
+ ld1 {v8.8b, v9.8b}, [x0], x1 //p1 values are loaded into q4
+ mov x7, x0 //keeping backup of pointer to p0
+ ld1 {v6.8b, v7.8b}, [x0], x1 //p0 values are loaded into q3
+ uxtl v12.8h, v12.8b //q6 = uc_Bs in each 16 bt scalar
+ ld1 {v0.8b, v1.8b}, [x0], x1 //q0 values are loaded into q0
+ mov v10.d[1], v11.d[0]
+ mov v8.d[1], v9.d[0]
+ mov v6.d[1], v7.d[0]
+ uabd v26.16b, v8.16b, v6.16b
+ ld1 {v2.8b, v3.8b}, [x0], x1 //q1 values are loaded into q1
+ mov v0.d[1], v1.d[0]
+ mov v2.d[1], v3.d[0]
+ uabd v22.16b, v6.16b, v0.16b
+ ld1 {v16.s}[0], [x5] //D16[0] contains cliptab
+ uabd v24.16b, v2.16b, v0.16b
+ ld1 {v4.8b, v5.8b}, [x0], x1 //q2 values are loaded into q2
+ tbl v14.8b, {v16.16b}, v12.8b //
+ mov v4.d[1], v5.d[0]
+ dup v20.16b, w2 //Q10 contains alpha
+ dup v16.16b, w3 //Q8 contains beta
+ uxtl v12.4s, v12.4h //
+ uxtl v14.4s, v14.4h //
+ uabd v28.16b, v10.16b, v6.16b
+ uabd v30.16b, v4.16b, v0.16b
+ cmgt v12.4s, v12.4s, #0
+ sli v14.4s, v14.4s, #8
+ cmhs v18.16b, v22.16b, v20.16b
+ cmhs v24.16b, v24.16b, v16.16b
+ cmhs v26.16b, v26.16b, v16.16b
+ cmhi v20.16b, v16.16b , v28.16b //Q10=(Ap<Beta)
+ cmhi v22.16b, v16.16b , v30.16b //Q11=(Aq<Beta)
+ sli v14.4s, v14.4s, #16
+ orr v18.16b, v18.16b , v24.16b //Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta )
+ usubl v30.8h, v1.8b, v7.8b //
+ usubl v24.8h, v0.8b, v6.8b //Q15,Q12 = (q0 - p0)
+ orr v18.16b, v18.16b , v26.16b //Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta )
+ usubl v28.8h, v8.8b, v2.8b //Q14 = (p1 - q1)L
+ shl v26.8h, v30.8h, #2 //Q13 = (q0 - p0)<<2
+ shl v24.8h, v24.8h, #2 //Q12 = (q0 - p0)<<2
+ usubl v30.8h, v9.8b, v3.8b //Q15 = (p1 - q1)H
+ bic v12.16b, v12.16b , v18.16b //final condition
+ add v24.8h, v24.8h , v28.8h //
+ add v26.8h, v26.8h , v30.8h //Q13,Q12 = [ (q0 - p0)<<2 ] + (p1 - q1)
+ sub v18.16b, v14.16b , v20.16b //Q9 = C0 + (Ap < Beta)
+ urhadd v16.16b, v6.16b , v0.16b //Q8 = ((p0+q0+1) >> 1)
+ mov v17.d[0], v16.d[1]
+ sqrshrn v24.8b, v24.8h, #3 //
+ sqrshrn v25.8b, v26.8h, #3 //Q12 = i_macro = (((q0 - p0)<<2) + (p1 - q1) + 4)>>3
+ mov v24.d[1], v25.d[0]
+ sub v18.16b, v18.16b , v22.16b //Q9 = C0 + (Ap < Beta) + (Aq < Beta)
+ and v20.16b, v20.16b , v12.16b //
+ and v22.16b, v22.16b , v12.16b //
+ abs v26.16b, v24.16b //Q13 = ABS (i_macro)
+ uaddl v28.8h, v17.8b, v11.8b //
+ uaddl v10.8h, v16.8b, v10.8b //Q14,Q5 = p2 + (p0+q0+1)>>1
+ uaddl v30.8h, v17.8b, v5.8b //
+ umin v18.16b, v26.16b , v18.16b //Q9 = delta = (ABS(i_macro) > C) ? C : ABS(i_macro)
+ ushll v26.8h, v9.8b, #1 //
+ uaddl v4.8h, v16.8b, v4.8b //Q15,Q2 = q2 + (p0+q0+1)>>1
+ ushll v16.8h, v8.8b, #1 //Q13,Q8 = (p1<<1)
+ and v18.16b, v18.16b , v12.16b //Making delta zero in places where values shouldn be filterd
+ sub v28.8h, v28.8h , v26.8h //Q14,Q5 = [p2 + (p0+q0+1)>>1] - (p1<<1)
+ sub v10.8h, v10.8h , v16.8h //
+ ushll v16.8h, v2.8b, #1 //
+ ushll v26.8h, v3.8b, #1 //Q13,Q8 = (q1<<1)
+ sqshrn v29.8b, v28.8h, #1 //
+ sqshrn v28.8b, v10.8h, #1 //Q14 = i_macro_p1
+ mov v28.d[1], v29.d[0]
+ sub v4.8h, v4.8h , v16.8h //
+ sub v30.8h, v30.8h , v26.8h //Q15,Q2 = [q2 + (p0+q0+1)>>1] - (q1<<1)
+ neg v26.16b, v14.16b //Q13 = -C0
+ smin v28.16b, v28.16b , v14.16b //Q14 = min(C0,i_macro_p1)
+ cmge v24.16b, v24.16b, #0
+ sqshrn v31.8b, v30.8h, #1 //
+ sqshrn v30.8b, v4.8h, #1 //Q15 = i_macro_q1
+ mov v30.d[1], v31.d[0]
+ smax v28.16b, v28.16b , v26.16b //Q14 = max( - C0 , min(C0, i_macro_p1) )
+ uqadd v16.16b, v6.16b , v18.16b //Q8 = p0 + delta
+ uqsub v6.16b, v6.16b , v18.16b //Q3 = p0 - delta
+ smin v30.16b, v30.16b , v14.16b //Q15 = min(C0,i_macro_q1)
+ and v28.16b, v20.16b , v28.16b //condition check Ap<beta
+ uqadd v14.16b, v0.16b , v18.16b //Q7 = q0 + delta
+ uqsub v0.16b, v0.16b , v18.16b //Q0 = q0 - delta
+ smax v30.16b, v30.16b , v26.16b //Q15 = max( - C0 , min(C0, i_macro_q1) )
+ bif v16.16b, v6.16b , v24.16b //Q8 = (i_macro >= 0 ) ? (p0+delta) : (p0-delta)
+ bif v0.16b, v14.16b , v24.16b //Q0 = (i_macro >= 0 ) ? (q0-delta) : (q0+delta)
+ add v28.16b, v28.16b , v8.16b //
+ and v30.16b, v22.16b , v30.16b //condition check Aq<beta
+ st1 {v16.16b}, [x7], x1 //writting back filtered value of p0
+ add v30.16b, v30.16b , v2.16b //
+ st1 {v0.16b}, [x7], x1 //writting back filtered value of q0
+ st1 {v28.16b}, [x6] //writting back filtered value of p1
+ st1 {v30.16b}, [x7], x1 //writting back filtered value of q1
+
+ // LDMFD sp!,{x4-x7,pc}
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* Performs filtering of a luma block horizontal edge when the
+//* boundary strength is set to 4
+//*
+//* @par Description:
+//* This operation is described in Sec. 8.7.2.4 under the title
+//* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+//*
+//* @param[in] x0 - pu1_src
+//* Pointer to the src sample q0
+//*
+//* @param[in] x1 - src_strd
+//* Source stride
+//*
+//* @param[in] x2 - alpha
+//* Alpha Value for the boundary
+//*
+//* @param[in] x3 - beta
+//* Beta Value for the boundary
+//*
+//* @returns
+//* None
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+ .global ih264_deblk_luma_horz_bs4_av8
+
+ih264_deblk_luma_horz_bs4_av8:
+
+ // Back up necessary registers on stack
+ // STMFD sp!,{x12,x14}
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ // Init
+ dup v0.16b, w2 //duplicate alpha
+ sub x12, x0, x1 //pointer to p0 = q0 - src_strd
+ dup v2.16b, w3 //duplicate beta
+ sub x14, x0, x1, lsl#1 //pointer to p1 = q0 - src_strd*2
+ sub x2, x0, x1, lsl#2 //pointer to p3 = q0 - src_strd*4
+ sub x3, x14, x1 //pointer to p2 = p1 - src_strd
+
+ // Load Data
+ ld1 {v4.8b, v5.8b}, [x0], x1 //load q0 to Q2, q0 = q0 + src_strd
+ ld1 {v6.8b, v7.8b}, [x12] //load p0 to Q3
+ ld1 {v8.8b, v9.8b}, [x0], x1 //load q1 to Q4, q0 = q0 + src_strd
+ ld1 {v10.8b, v11.8b}, [x14] //load p1 to Q5
+ mov v4.d[1] , v5.d[0]
+ mov v6.d[1] , v7.d[0]
+ mov v8.d[1] , v9.d[0]
+ mov v10.d[1] , v11.d[0]
+
+ // Filter Decision
+ uabd v12.16b , v4.16b, v6.16b
+ uabd v14.16b , v8.16b, v4.16b
+ uabd v16.16b , v10.16b, v6.16b
+ cmhs v18.16b, v12.16b , v0.16b //ABS(p0 - q0) >= Alpha
+ cmhs v14.16b, v14.16b , v2.16b //ABS(q1 - q0) >= Beta
+ cmhs v16.16b, v16.16b , v2.16b //ABS(q1 - q0) >= Beta
+ movi v20.16b, #2
+ orr v18.16b, v18.16b , v14.16b //ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta
+ ld1 {v14.8b, v15.8b}, [x0], x1 //load q2 to Q7, q0 = q0 + src_strd
+ mov v14.d[1] , v15.d[0]
+ orr v18.16b, v18.16b , v16.16b //ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta
+ usra v20.16b, v0.16b, #2 //alpha >>2 +2
+ uabd v22.16b , v14.16b, v4.16b
+ uaddl v24.8h, v4.8b, v6.8b //p0+q0 L
+ uaddl v26.8h, v5.8b, v7.8b //p0+q0 H
+ cmhi v22.16b, v2.16b , v22.16b //Aq < Beta
+ cmhi v20.16b, v20.16b , v12.16b //(ABS(p0 - q0) <((Alpha >>2) + 2))
+ // Deblock Filtering q0', q1', q2'
+ uaddw v28.8h, v24.8h , v8.8b //p0+q0+q1 L
+ uaddw v30.8h, v26.8h , v9.8b //p0+q0+q1 H
+ and v22.16b, v22.16b , v20.16b //(Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2))
+ // q0' if (Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) TRUE
+ add v16.8h, v28.8h , v28.8h //2*(p0+q0+q1)L
+ add v0.8h, v30.8h , v30.8h //2*(p0+q0+q1)H
+ uaddw v16.8h, v16.8h , v14.8b //2*(p0+q0+q1)+q2 L
+ uaddw v0.8h, v0.8h , v15.8b //2*(p0+q0+q1)+q2 H
+ uaddw v16.8h, v16.8h , v10.8b //2*(p0+q0+q1)+q2 +p1 L
+ uaddw v0.8h, v0.8h , v11.8b //2*(p0+q0+q1)+q2 +p1 H
+ rshrn v12.8b, v16.8h, #3 //(2*(p0+q0+q1)+q2 +p1 +4)>> 3 L [q0']
+ rshrn v13.8b, v0.8h, #3 //(2*(p0+q0+q1)+q2 +p1 +4)>> 3 H [q0']
+ mov v12.d[1] , v13.d[0]
+ // q0" if (Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) FALSE
+ uaddl v16.8h, v8.8b, v8.8b //2*q1 L
+ uaddl v0.8h, v9.8b, v9.8b //2*q1 H
+ uaddw v16.8h, v16.8h , v4.8b //2*q1+q0 L
+ uaddw v0.8h, v0.8h , v5.8b //2*q1+q0 H
+ uaddw v16.8h, v16.8h , v10.8b //2*q1+q0+p1 L
+ uaddw v0.8h, v0.8h , v11.8b //2*q1+q0+p1 H
+ rshrn v16.8b, v16.8h, #2 //(2*q1+q0+p1+2)>>2 L [q0"]
+ rshrn v17.8b, v0.8h, #2 //(2*q1+q0+p1+2)>>2 H [q0"]
+ mov v16.d[1] , v17.d[0]
+ uaddw v28.8h, v28.8h , v14.8b //p0+q0+q1+q2 L
+ uaddw v30.8h, v30.8h , v15.8b //p0+q0+q1+q2 H
+ ld1 {v0.8b, v1.8b}, [x0], x1 //load q3 to Q0, q0 = q0 + src_strd
+ mov v0.d[1] , v1.d[0]
+ bit v16.16b, v12.16b , v22.16b //choosing between q0' and q0" depending on condn
+ sub x0, x0, x1, lsl #2 //pointer to q0
+ bic v22.16b, v22.16b , v18.16b //((ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta))
+ // && (Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2))
+ rshrn v12.8b, v28.8h, #2 //(p0+q0+q1+q2+2)>>2 L [q1']
+ rshrn v13.8b, v30.8h, #2 //(p0+q0+q1+q2+2)>>2 H [q1']
+ mov v12.d[1] , v13.d[0]
+ bif v4.16b, v16.16b , v18.16b //choose q0 or filtered q0
+ mov v5.d[0] , v4.d[1]
+ uaddl v16.8h, v14.8b, v0.8b //q2+q3,L
+ uaddl v0.8h, v15.8b, v1.8b //q2+q3,H
+ add v28.8h, v28.8h , v16.8h //p0+q0+q1+2*q2+q3 L
+ st1 {v4.8b, v5.8b}, [x0], x1 //store q0
+ add v30.8h, v30.8h , v0.8h //p0+q0+q1+2*q2+q3 H
+ add v28.8h, v28.8h , v16.8h //p0+q0+q1+3*q2+2*q3 L
+ add v30.8h, v30.8h , v0.8h //p0+q0+q1+3*q2+2*q3 H
+ rshrn v0.8b, v28.8h, #3 //(p0+q0+q1+3*q2+2*q3+4)>>3 L [q2']
+ rshrn v1.8b, v30.8h, #3 //(p0+q0+q1+3*q2+2*q3+4)>>3 H [q2']
+ mov v0.d[1] , v1.d[0]
+ ld1 {v30.8b, v31.8b}, [x3] //load p2 to Q15
+ mov v30.d[1] , v31.d[0]
+ bif v12.16b, v8.16b , v22.16b //choose q1 or filtered value of q1
+ mov v13.d[0] , v12.d[1]
+ uabd v16.16b , v30.16b, v6.16b
+ uaddw v24.8h, v24.8h , v10.8b //p0+q0+p1 L
+ bif v0.16b, v14.16b , v22.16b //choose q2 or filtered q2
+ mov v1.d[0] , v0.d[1]
+ uaddw v26.8h, v26.8h , v11.8b //p0+q0+p1 H
+ st1 {v12.8b, v13.8b}, [x0], x1 //store q1
+ cmhi v16.16b, v2.16b , v16.16b //Ap < Beta
+ add v28.8h, v24.8h , v24.8h //2*(p0+q0+p1) L
+ add v4.8h, v26.8h , v26.8h //2*(p0+q0+p1) H
+ st1 {v0.8b, v1.8b}, [x0], x1 //store q2
+ and v20.16b, v20.16b , v16.16b //((Ap < Beta) && (ABS(p0 - q0) <((Alpha >>2) + 2)))
+ uaddw v28.8h, v28.8h , v30.8b //2*(p0+q0+p1)+p2 l
+ uaddw v4.8h, v4.8h , v31.8b //2*(p0+q0+p1)+p2 H
+ uaddw v28.8h, v28.8h , v8.8b //2*(p0+q0+p1)+p2+q1 L
+ uaddw v4.8h, v4.8h , v9.8b //2*(p0+q0+p1)+p2+q1 H
+ rshrn v28.8b, v28.8h, #3 //(2*(p0+q0+p1)+p2+q1+4)>>3 L,p0'
+ rshrn v29.8b, v4.8h, #3 //(2*(p0+q0+p1)+p2+q1+4)>>3 H,p0'
+ mov v28.d[1] , v29.d[0]
+ movi v0.8b, #2
+ movi v1.4h, #2
+ uaddl v2.8h, v6.8b, v8.8b //p0+q1 L
+ umlal v2.8h, v10.8b, v0.8b //2*p1+p0+q1 L
+ uaddl v16.8h, v7.8b, v9.8b //p0+q1 H
+ umlal v16.8h, v11.8b, v0.8b //2*p1+p0+q1 H
+ uaddw v12.8h, v24.8h , v30.8b //(p0+q0+p1) +p2 L
+ ld1 {v24.8b, v25.8b}, [x2] //load p3,Q12
+ mov v24.d[1] , v25.d[0]
+ uaddw v4.8h, v26.8h , v31.8b //(p0+q0+p1) +p2 H
+ uaddl v8.8h, v30.8b, v24.8b //p2+p3 L
+ rshrn v26.8b, v12.8h, #2 //((p0+q0+p1)+p2 +2)>>2,p1' L
+ rshrn v2.8b, v2.8h, #2 //(2*p1+p0+q1+2)>>2,p0"L
+ rshrn v27.8b, v4.8h, #2 //((p0+q0+p1)+p2 +2)>>2,p1' H
+ rshrn v3.8b, v16.8h, #2 //(2*p1+p0+q1+2)>>2,p0" H
+ mov v26.d[1] , v27.d[0]
+ mov v2.d[1] , v3.d[0]
+ uaddl v16.8h, v31.8b, v25.8b //p2+p3 H
+ mla v12.8h, v8.8h , v1.4h[0] //(p0+q0+p1)+3*p2+2*p3 L
+ mla v4.8h, v16.8h , v1.4h[0] //(p0+q0+p1)+3*p2+2*p3 H
+ bic v16.16b, v20.16b , v18.16b //((ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta))
+ mov v17.d[0] , v16.d[1] //&& (Ap < Beta && ABS(p0 - q0) <((Alpha >>2) + 2))
+ bit v2.16b, v28.16b , v20.16b //choosing between po' and p0"
+ mov v3.d[0] , v2.d[1]
+ rshrn v12.8b, v12.8h, #3 //((p0+q0+p1)+3*p2+2*p3+4)>>3 L p2'
+ rshrn v13.8b, v4.8h, #3 //((p0+q0+p1)+3*p2+2*p3+4)>>3 H p2'
+ mov v12.d[1] , v13.d[0]
+ bif v6.16b, v2.16b , v18.16b //choosing between p0 and filtered value of p0
+ bit v10.16b, v26.16b , v16.16b //choosing between p1 and p1'
+ bit v30.16b, v12.16b , v16.16b //choosing between p2 and p2'
+ st1 {v6.16b}, [x12] //store p0
+ st1 {v10.16b}, [x14] //store p1
+ st1 {v30.16b}, [x3] //store p2
+
+ // LDMFD sp!,{x12,pc}
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* Performs filtering of a luma block vertical edge for cases where the
+//* boundary strength is less than 4
+//*
+//* @par Description:
+//* This operation is described in Sec. 8.7.2.4 under the title
+//* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+//*
+//* @param[in] x0 - pu1_src
+//* Pointer to the src sample q0
+//*
+//* @param[in] x1 - src_strd
+//* Source stride
+//*
+//* @param[in] x2 - alpha
+//* Alpha Value for the boundary
+//*
+//* @param[in] x3 - beta
+//* Beta Value for the boundary
+//*
+//* @param[in] sp(0) - u4_bs
+//* Packed Boundary strength array
+//*
+//* @param[in] sp(4) - pu1_cliptab
+//* tc0_table
+//*
+//* @returns
+//* None
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+ .global ih264_deblk_luma_vert_bslt4_av8
+
+ih264_deblk_luma_vert_bslt4_av8:
+
+ // STMFD sp!,{x12,x14}
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ sub x0, x0, #4 //pointer uc_edgePixel-4
+ mov x12, x4
+ mov x14, x5
+ mov x17, x0
+ //loading p3:p2:p1:p0:q0:q1:q2:q3 for every row
+ ld1 {v0.8b}, [x0], x1 //row1
+ ld1 {v2.8b}, [x0], x1 //row2
+ ld1 {v4.8b}, [x0], x1 //row3
+ rev w12, w12 //reversing ui_bs
+ ld1 {v6.8b}, [x0], x1 //row4
+ mov v18.2s[0], w12 //d12[0] = ui_Bs
+ ld1 {v16.s}[0], [x14] //D16[0] contains cliptab
+ ld1 {v8.8b}, [x0], x1 //row5
+ uxtl v18.8h, v18.8b //q6 = uc_Bs in each 16 bt scalar
+ ld1 {v10.8b}, [x0], x1 //row6
+ ld1 {v12.8b}, [x0], x1 //row7
+ tbl v16.8b, {v16.16b}, v18.8b //puc_ClipTab[uc_Bs]
+ ld1 {v14.8b}, [x0], x1 //row8
+ ld1 {v1.8b}, [x0], x1 //row9
+ uxtl v16.4s, v16.4h //
+ ld1 {v3.8b}, [x0], x1 //row10
+ ld1 {v5.8b}, [x0], x1 //row11
+ ld1 {v7.8b}, [x0], x1 //row12
+ sli v16.4s, v16.4s, #8 //
+ ld1 {v9.8b}, [x0], x1 //row13
+ ld1 {v11.8b}, [x0], x1 //row14
+ ld1 {v13.8b}, [x0], x1 //row15
+ sli v16.4s, v16.4s, #16
+ ld1 {v15.8b}, [x0], x1 //row16
+
+
+ //taking two 8x8 transposes
+ //2X2 transposes
+ trn1 v21.8b, v0.8b, v2.8b
+ trn2 v2.8b, v0.8b, v2.8b //row1 &2
+ mov v0.8b, v21.8b
+ trn1 v21.8b, v4.8b, v6.8b
+ trn2 v6.8b, v4.8b, v6.8b //row3&row4
+ mov v4.8b, v21.8b
+ trn1 v21.8b, v8.8b, v10.8b
+ trn2 v10.8b, v8.8b, v10.8b //row5&6
+ mov v8.8b, v21.8b
+ trn1 v21.8b, v12.8b, v14.8b
+ trn2 v14.8b, v12.8b, v14.8b //row7 & 8
+ mov v12.8b, v21.8b
+ trn1 v21.8b, v1.8b, v3.8b
+ trn2 v3.8b, v1.8b, v3.8b //row9 &10
+ mov v1.8b, v21.8b
+ trn1 v21.8b, v5.8b, v7.8b
+ trn2 v7.8b, v5.8b, v7.8b //row11 & 12
+ mov v5.8b, v21.8b
+ trn1 v21.8b, v9.8b, v11.8b
+ trn2 v11.8b, v9.8b, v11.8b //row13 &14
+ mov v9.8b, v21.8b
+ trn1 v21.8b, v13.8b, v15.8b
+ trn2 v15.8b, v13.8b, v15.8b //row15 & 16
+ mov v13.8b, v21.8b
+ //4x4 transposes
+ trn1 v21.4h, v2.4h, v6.4h
+ trn2 v6.4h, v2.4h, v6.4h //row2 & row4
+ mov v2.8b, v21.8b
+ trn1 v21.4h, v10.4h, v14.4h
+ trn2 v14.4h, v10.4h, v14.4h //row6 & row8
+ mov v10.8b, v21.8b
+ trn1 v21.4h, v3.4h, v7.4h
+ trn2 v7.4h, v3.4h, v7.4h //row10 & 12
+ mov v3.8b, v21.8b
+ trn1 v21.4h, v11.4h, v15.4h
+ trn2 v15.4h, v11.4h, v15.4h //row14 & row16
+ mov v11.8b, v21.8b
+ trn1 v21.2s, v6.2s, v14.2s
+ trn2 v14.2s, v6.2s, v14.2s //row4 & 8
+ mov v6.8b, v21.8b
+ trn1 v21.2s, v7.2s, v15.2s
+ trn2 v15.2s, v7.2s, v15.2s //row 12 & 16
+ mov v7.8b, v21.8b
+ //now Q3 ->p0 and Q7->q3
+ trn1 v21.4h, v0.4h, v4.4h
+ trn2 v4.4h, v0.4h, v4.4h //row1 & 3
+ mov v0.8b, v21.8b
+ trn1 v21.4h, v8.4h, v12.4h
+ trn2 v12.4h, v8.4h, v12.4h //row 5 & 7
+ mov v8.8b, v21.8b
+ trn1 v21.4h, v1.4h, v5.4h
+ trn2 v5.4h, v1.4h, v5.4h //row9 & row11
+ mov v1.8b, v21.8b
+ trn1 v21.4h, v9.4h, v13.4h
+ trn2 v13.4h, v9.4h, v13.4h //row13 & row15
+ mov v9.8b, v21.8b
+ trn1 v21.2s, v0.2s, v8.2s
+ trn2 v8.2s, v0.2s, v8.2s //row1 & row5
+ mov v0.8b, v21.8b
+ trn1 v21.2s, v1.2s, v9.2s
+ trn2 v9.2s, v1.2s, v9.2s //row9 & 13
+ mov v1.8b, v21.8b
+ //now Q0->p3 & Q4->q0
+ //starting processing as p0 and q0 are now ready
+ trn1 v21.2s, v2.2s, v10.2s
+ trn2 v10.2s, v2.2s, v10.2s //row2 &6
+ mov v2.8b, v21.8b
+ mov v6.d[1] , v7.d[0]
+ mov v8.d[1] , v9.d[0]
+ urhadd v20.16b, v6.16b , v8.16b //((p0 + q0 + 1) >> 1)
+ mov v21.d[0], v20.d[1]
+ trn1 v31.2s, v3.2s, v11.2s
+ trn2 v11.2s, v3.2s, v11.2s //row10&row14
+ mov v3.8b, v31.8b
+ movi v19.8b, #2
+ mov v18.d[1], v19.d[0]
+ //now Q1->p2 & Q5->q1
+ trn1 v31.2s, v4.2s, v12.2s
+ trn2 v12.2s, v4.2s, v12.2s //row3 & 7
+ mov v4.8b, v31.8b
+ uabd v22.16b , v6.16b, v8.16b //ABS(q1 - q0)
+ trn1 v31.2s, v5.2s, v13.2s
+ trn2 v13.2s, v5.2s, v13.2s //row11 & row15
+ mov v5.8b, v31.8b
+ mov v0.d[1] , v1.d[0]
+ mov v2.d[1] , v3.d[0]
+ mov v4.d[1] , v5.d[0]
+ mov v10.d[1] , v11.d[0]
+ mov v12.d[1] , v13.d[0]
+ mov v14.d[1] , v15.d[0]
+ uaddl v24.8h, v20.8b, v2.8b //(p2 + ((p0 + q0 + 1) >> 1) L
+ //now Q2->p1,Q6->q2
+ uaddl v26.8h, v21.8b, v3.8b //(p2 + ((p0 + q0 + 1) >> 1) H
+ umlsl v24.8h, v4.8b, v19.8b //(p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) L
+ umlsl v26.8h, v5.8b, v19.8b //(p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) H
+ dup v28.16b, w2 //alpha
+ cmhs v22.16b, v22.16b , v28.16b //ABS(p0 - q0) >= Alpha(Alpha <=ABS(p0 - q0))
+ dup v28.16b, w3 //beta
+ uabd v30.16b , v10.16b, v8.16b //ABS(q1 - q0)
+ sqshrn v24.8b, v24.8h, #1 //((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1) L
+ sqshrn v25.8b, v26.8h, #1 //((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1) H
+ mov v24.d[1], v25.d[0]
+ cmhs v30.16b, v30.16b , v28.16b //ABS(p0 - q0) >= Alpha(Alpha <=ABS(p0 - q0))
+ uabd v26.16b , v4.16b, v6.16b //ABS(q1 - q0)
+
+ smin v24.16b, v24.16b , v16.16b //min(deltap1 ,C0)
+ orr v22.16b, v22.16b , v30.16b //ABS(q1 - q0) >= Beta ||ABS(p0 - q0) >= Alpha
+ neg v30.16b, v16.16b //-C0
+ cmhs v26.16b, v26.16b , v28.16b //ABS(p0 - q0) >= Alpha(Alpha <=ABS(p0 - q0))
+ smax v24.16b, v24.16b , v30.16b //max(deltap1,-C0)
+ orr v22.16b, v22.16b , v26.16b //ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta)
+ uxtl v26.4s, v18.4h //ui_bs
+ uaddl v18.8h, v20.8b, v12.8b //q2 + ((p0 + q0 + 1) >> 1) L
+ cmeq v26.4s, v26.4s , #0 //ABS(p0 - q0) >= Alpha(Alpha <=ABS(p0 - q0))
+ usubw v18.8h, v18.8h , v10.8b //(q2 + ((p0 + q0 + 1) >> 1) - q1) L
+ uaddl v20.8h, v21.8b, v13.8b //q2 + ((p0 + q0 + 1) >> 1) H
+ usubw v18.8h, v18.8h , v10.8b //(q2 + ((p0 + q0 + 1) >> 1) - 2*q1)L
+ usubw v20.8h, v20.8h , v11.8b //(q2 + ((p0 + q0 + 1) >> 1) - q1) H
+ orr v26.16b, v26.16b , v22.16b //(ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta)) &&(ui_bs)
+ usubw v20.8h, v20.8h , v11.8b //(q2 + ((p0 + q0 + 1) >> 1) - 2*q1) H
+ sqshrn v18.8b, v18.8h, #1 //((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1) L
+ uabd v22.16b , v2.16b, v6.16b //ABS(q1 - q0)
+ sqshrn v19.8b, v20.8h, #1 //((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1) H
+ mov v18.d[1], v19.d[0]
+ uabd v20.16b , v12.16b, v8.16b //ABS(q1 - q0)
+ cmhi v22.16b, v28.16b , v22.16b //Ap < Beta
+ smin v18.16b, v18.16b , v16.16b //min(delatq1,C0)
+ cmhi v20.16b, v28.16b , v20.16b //Aq <Beta
+ usubl v28.8h, v8.8b, v6.8b //(q0 - p0) L
+ smax v18.16b, v18.16b , v30.16b //max(deltaq1,-C0)
+ usubl v30.8h, v9.8b, v7.8b //(q0 - p0) H
+ shl v28.8h, v28.8h, #2 //(q0 - p0)<<2 L
+ sub v16.16b, v16.16b , v22.16b //C0 + (Ap < Beta)
+ shl v30.8h, v30.8h, #2 //(q0 - p0) << 2) H
+ uaddw v28.8h, v28.8h , v4.8b //((q0 - p0) << 2) + (p1 L
+ uaddw v30.8h, v30.8h , v5.8b //((q0 - p0) << 2) + (p1 H
+ usubw v28.8h, v28.8h , v10.8b //((q0 - p0) << 2) + (p1 - q1) L
+ usubw v30.8h, v30.8h , v11.8b //((q0 - p0) << 2) + (p1 - q1) H
+ bic v22.16b, v22.16b , v26.16b //final condition for p1
+ rshrn v28.8b, v28.8h, #3 //delta = ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3); L
+ rshrn v29.8b, v30.8h, #3 //delta = ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3) H
+ mov v28.d[1], v29.d[0]
+ sub v16.16b, v16.16b , v20.16b //C0 + (Ap < Beta) + (Aq < Beta)
+ bic v20.16b, v20.16b , v26.16b //final condition for q1
+ abs v30.16b, v28.16b //abs(delta)
+ and v24.16b, v24.16b , v22.16b //delatp1
+ and v18.16b, v18.16b , v20.16b //delta q1
+ umin v30.16b, v30.16b , v16.16b //min((abs(delta),C)
+ add v4.16b, v4.16b , v24.16b //p1+deltap1
+ add v10.16b, v10.16b , v18.16b //q1+deltaq1
+ mov v5.d[0], v4.d[1]
+ mov v11.d[0], v10.d[1]
+ bic v30.16b, v30.16b , v26.16b //abs(delta) of pixels to be changed only
+ // VCGE.S8 Q14, Q14,#0 //sign(delta)
+ cmge v28.16b, v28.16b , #0
+ uqsub v22.16b, v6.16b , v30.16b //clip(p0-delta)
+
+ trn1 v21.8b, v0.8b, v2.8b
+ trn2 v2.8b, v0.8b, v2.8b //row1 &2
+ mov v0.8b, v21.8b
+ uqadd v6.16b, v6.16b , v30.16b //clip(p0+delta)
+
+ trn1 v21.8b, v1.8b, v3.8b
+ trn2 v3.8b, v1.8b, v3.8b //row9 &10
+ mov v1.8b, v21.8b
+ uqadd v24.16b, v8.16b , v30.16b //clip(q0+delta)
+ trn1 v21.8b, v12.8b, v14.8b
+ trn2 v14.8b, v12.8b, v14.8b //row7 & 8
+ mov v12.8b, v21.8b
+ uqsub v8.16b, v8.16b , v30.16b //clip(q0-delta)
+ trn1 v21.8b, v13.8b, v15.8b
+ trn2 v15.8b, v13.8b, v15.8b //row15 & 16
+ mov v13.8b, v21.8b
+ bif v6.16b, v22.16b , v28.16b //p0
+ bif v8.16b, v24.16b , v28.16b //q0
+ mov v7.d[0], v6.d[1]
+ mov v9.d[0], v8.d[1]
+ trn1 v21.8b, v4.8b, v6.8b
+ trn2 v6.8b, v4.8b, v6.8b //row3&row4
+ mov v4.8b, v21.8b
+ trn1 v21.8b, v8.8b, v10.8b
+ trn2 v10.8b, v8.8b, v10.8b //row5&6
+ mov v8.8b, v21.8b
+ trn1 v21.8b, v5.8b, v7.8b
+ trn2 v7.8b, v5.8b, v7.8b //row11 & 12
+ mov v5.8b, v21.8b
+ trn1 v21.8b, v9.8b, v11.8b
+ trn2 v11.8b, v9.8b, v11.8b //row13 &14
+ mov v9.8b, v21.8b
+ trn1 v21.4h, v2.4h, v6.4h
+ trn2 v6.4h, v2.4h, v6.4h //row2 & row4
+ mov v2.8b, v21.8b
+ trn1 v21.4h, v10.4h, v14.4h
+ trn2 v14.4h, v10.4h, v14.4h //row6 & row8
+ mov v10.8b, v21.8b
+ trn1 v21.4h, v3.4h, v7.4h
+ trn2 v7.4h, v3.4h, v7.4h //row10 & 12
+ mov v3.8b, v21.8b
+ trn1 v21.4h, v11.4h, v15.4h
+ trn2 v15.4h, v11.4h, v15.4h //row14 & row16
+ mov v11.8b, v21.8b
+ trn1 v21.2s, v6.2s, v14.2s
+ trn2 v14.2s, v6.2s, v14.2s //row4 & 8
+ mov v6.8b, v21.8b
+ trn1 v21.2s, v7.2s, v15.2s
+ trn2 v15.2s, v7.2s, v15.2s //row 12 & 16
+ mov v7.8b, v21.8b
+ //now Q3 ->p0 and Q7->q3
+ trn1 v21.4h, v0.4h, v4.4h
+ trn2 v4.4h, v0.4h, v4.4h //row1 & 3
+ mov v0.8b, v21.8b
+ trn1 v21.4h, v8.4h, v12.4h
+ trn2 v12.4h, v8.4h, v12.4h //row 5 & 7
+ mov v8.8b, v21.8b
+ trn1 v21.4h, v1.4h, v5.4h
+ trn2 v5.4h, v1.4h, v5.4h //row9 & row11
+ mov v1.8b, v21.8b
+ trn1 v21.4h, v9.4h, v13.4h
+ trn2 v13.4h, v9.4h, v13.4h //row13 & row15
+ mov v9.8b, v21.8b
+ sub x0, x0, x1, lsl#4 //restore pointer
+ trn1 v21.2s, v0.2s, v8.2s
+ trn2 v8.2s, v0.2s, v8.2s //row1 & row5
+ mov v0.8b, v21.8b
+ trn1 v21.2s, v1.2s, v9.2s
+ trn2 v9.2s, v1.2s, v9.2s //row9 & 13
+ mov v1.8b, v21.8b
+ trn1 v21.2s, v2.2s, v10.2s
+ trn2 v10.2s, v2.2s, v10.2s //row2 &6
+ mov v2.8b, v21.8b
+ trn1 v21.2s, v3.2s, v11.2s
+ trn2 v11.2s, v3.2s, v11.2s //row10&row14
+ mov v3.8b, v21.8b
+ trn1 v21.2s, v4.2s, v12.2s
+ trn2 v12.2s, v4.2s, v12.2s //row3 & 7
+ mov v4.8b, v21.8b
+ trn1 v21.2s, v5.2s, v13.2s
+ trn2 v13.2s, v5.2s, v13.2s //row11 & row15
+ mov v5.8b, v21.8b
+ st1 {v0.8b}, [x0], x1 //row1
+ st1 {v2.8b}, [x0], x1 //row2
+ st1 {v4.8b}, [x0], x1 //row3
+ st1 {v6.8b}, [x0], x1 //row4
+ st1 {v8.8b}, [x0], x1 //row5
+ st1 {v10.8b}, [x0], x1 //row6
+ st1 {v12.8b}, [x0], x1 //row7
+ st1 {v14.8b}, [x0], x1 //row8
+ st1 {v1.8b}, [x0], x1 //row9
+ st1 {v3.8b}, [x0], x1 //row10
+ st1 {v5.8b}, [x0], x1 //row11
+ st1 {v7.8b}, [x0], x1 //row12
+ st1 {v9.8b}, [x0], x1 //row13
+ st1 {v11.8b}, [x0], x1 //row14
+ st1 {v13.8b}, [x0], x1 //row15
+ st1 {v15.8b}, [x0], x1 //row16
+
+ // LDMFD sp!,{x12,pc}
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* Performs filtering of a luma block vertical edge when the
+//* boundary strength is set to 4
+//*
+//* @par Description:
+//* This operation is described in Sec. 8.7.2.4 under the title
+//* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
+//*
+//* @param[in] x0 - pu1_src
+//* Pointer to the src sample q0
+//*
+//* @param[in] x1 - src_strd
+//* Source stride
+//*
+//* @param[in] x2 - alpha
+//* Alpha Value for the boundary
+//*
+//* @param[in] x3 - beta
+//* Beta Value for the boundary
+//*
+//* @returns
+//* None
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+ .global ih264_deblk_luma_vert_bs4_av8
+
+ih264_deblk_luma_vert_bs4_av8:
+
+ // STMFD sp!,{x12,x14}
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ sub x0, x0, #4 //pointer uc_edgePixel-4
+ mov x17, x0
+ //loading p3:p2:p1:p0:q0:q1:q2:q3 for every row
+ ld1 {v0.8b}, [x0], x1 //row1
+ ld1 {v2.8b}, [x0], x1 //row2
+ ld1 {v4.8b}, [x0], x1 //row3
+ ld1 {v6.8b}, [x0], x1 //row4
+ ld1 {v8.8b}, [x0], x1 //row5
+ ld1 {v10.8b}, [x0], x1 //row6
+ ld1 {v12.8b}, [x0], x1 //row7
+ ld1 {v14.8b}, [x0], x1 //row8
+ ld1 {v1.8b}, [x0], x1 //row9
+ ld1 {v3.8b}, [x0], x1 //row10
+ ld1 {v5.8b}, [x0], x1 //row11
+ ld1 {v7.8b}, [x0], x1 //row12
+ ld1 {v9.8b}, [x0], x1 //row13
+ ld1 {v11.8b}, [x0], x1 //row14
+ ld1 {v13.8b}, [x0], x1 //row15
+ ld1 {v15.8b}, [x0], x1 //row16
+
+ //taking two 8x8 transposes
+ //2X2 transposes
+ trn1 v21.8b, v0.8b, v2.8b
+ trn2 v2.8b, v0.8b, v2.8b //row1 &2
+ mov v0.8b, v21.8b
+ trn1 v21.8b, v4.8b, v6.8b
+ trn2 v6.8b, v4.8b, v6.8b //row3&row4
+ mov v4.8b, v21.8b
+ trn1 v21.8b, v8.8b, v10.8b
+ trn2 v10.8b, v8.8b, v10.8b //row5&6
+ mov v8.8b, v21.8b
+ trn1 v21.8b, v12.8b, v14.8b
+ trn2 v14.8b, v12.8b, v14.8b //row7 & 8
+ mov v12.8b, v21.8b
+ trn1 v21.8b, v1.8b, v3.8b
+ trn2 v3.8b, v1.8b, v3.8b //row9 &10
+ mov v1.8b , v21.8b
+ trn1 v21.8b, v5.8b, v7.8b
+ trn2 v7.8b, v5.8b, v7.8b //row11 & 12
+ mov v5.8b , v21.8b
+ trn1 v21.8b, v9.8b, v11.8b
+ trn2 v11.8b, v9.8b, v11.8b //row13 &14
+ mov v9.8b , v21.8b
+ trn1 v21.8b, v13.8b, v15.8b
+ trn2 v15.8b, v13.8b, v15.8b //row15 & 16
+ mov v13.8b , v21.8b
+ //4x4 transposes
+ trn1 v21.4h, v2.4h, v6.4h
+ trn2 v6.4h, v2.4h, v6.4h //row2 & row4
+ mov v2.8b, v21.8b
+ trn1 v21.4h, v10.4h, v14.4h
+ trn2 v14.4h, v10.4h, v14.4h //row6 & row8
+ mov v10.8b , v21.8b
+ trn1 v21.4h, v3.4h, v7.4h
+ trn2 v7.4h, v3.4h, v7.4h //row10 & 12
+ mov v3.8b, v21.8b
+ trn1 v21.4h, v11.4h, v15.4h
+ trn2 v15.4h, v11.4h, v15.4h //row14 & row16
+ mov v11.8b, v21.8b
+ trn1 v21.2s, v6.2s, v14.2s
+ trn2 v14.2s, v6.2s, v14.2s //row4 & 8
+ mov v6.8b, v21.8b
+ trn1 v21.2s, v7.2s, v15.2s
+ trn2 v15.2s, v7.2s, v15.2s //row 12 & 16
+ mov v7.8b, v21.8b
+ //now Q3 ->p0 and Q7->q3
+ trn1 v21.4h, v0.4h, v4.4h
+ trn2 v4.4h, v0.4h, v4.4h //row1 & 3
+ mov v0.8b , v21.8b
+ trn1 v21.4h, v8.4h, v12.4h
+ trn2 v12.4h, v8.4h, v12.4h //row 5 & 7
+ mov v8.8b, v21.8b
+ trn1 v21.4h, v1.4h, v5.4h
+ trn2 v5.4h, v1.4h, v5.4h //row9 & row11
+ mov v1.8b, v21.8b
+ trn1 v21.4h, v9.4h, v13.4h
+ trn2 v13.4h, v9.4h, v13.4h //row13 & row15
+ mov v9.8b , v21.8b
+ trn1 v21.2s, v0.2s, v8.2s
+ trn2 v8.2s, v0.2s, v8.2s //row1 & row5
+ mov v0.8b, v21.8b
+ trn1 v21.2s, v1.2s, v9.2s
+ trn2 v9.2s, v1.2s, v9.2s //row9 & 13
+ mov v1.8b, v21.8b
+ //now Q0->p3 & Q4->q0
+ //starting processing as p0 and q0 are now ready
+ //now Q1->p2 & Q5->q1
+ mov v31.d[0], v14.d[0]
+ mov v31.d[1], v15.d[0]
+ trn1 v21.2s, v4.2s, v12.2s
+ trn2 v12.2s, v4.2s, v12.2s //row3 & 7
+ mov v4.8b, v21.8b
+ movi v28.8h, #2
+ trn1 v21.2s, v5.2s, v13.2s
+ trn2 v13.2s, v5.2s, v13.2s //row11 & row15
+ mov v5.8b, v21.8b
+ uaddl v16.8h, v6.8b, v8.8b //p0+q0 L
+ trn1 v21.2s, v2.2s, v10.2s
+ trn2 v10.2s, v2.2s, v10.2s //row2 &6
+ mov v2.8b, v21.8b
+ uaddl v18.8h, v7.8b, v9.8b //p0+q0 H
+ trn1 v21.2s, v3.2s, v11.2s
+ trn2 v11.2s, v3.2s, v11.2s //row10&row14
+ mov v3.8b, v21.8b
+ uaddw v20.8h, v16.8h , v4.8b //p0+q0+p1 L
+ uaddw v22.8h, v18.8h , v5.8b //p0+q0+p1 H
+ uaddl v24.8h, v2.8b, v10.8b //p2+q1 L
+ uaddl v26.8h, v3.8b, v11.8b //p2+q1 H
+ mla v24.8h, v20.8h , v28.8h //p2 + X2(p1) + X2(p0) + X2(q0) + q1 L
+ mla v26.8h, v22.8h , v28.8h //p2 + X2(p1) + X2(p0) + X2(q0) + q1 H
+ movi v28.16b, #2
+ uaddw v16.8h, v20.8h , v2.8b //p0+q0+p1+p2 L
+ uaddw v18.8h, v22.8h , v3.8b //p0+q0+p1+p2 H
+ dup v30.16b, w2 //duplicate alpha
+ rshrn v20.8b, v16.8h, #2 //(p2 + p1 + p0 + q0 + 2) >> 2)L p1'
+ rshrn v21.8b, v18.8h, #2 //(p2 + p1 + p0 + q0 + 2) >> 2)H p1'
+ mov v20.d[1] , v21.d[0]
+ mov v0.d[1] , v1.d[0]
+ mov v2.d[1] , v3.d[0]
+ mov v4.d[1] , v5.d[0]
+ mov v6.d[1] , v7.d[0]
+ mov v8.d[1] , v9.d[0]
+ mov v10.d[1] , v11.d[0]
+ mov v12.d[1] , v13.d[0]
+ mov v14.d[1] , v15.d[0]
+ uabd v22.16b , v6.16b, v8.16b
+ usra v28.16b, v30.16b, #2 //alpha >>2 +2
+ uabd v30.16b , v2.16b, v6.16b
+ rshrn v24.8b, v24.8h, #3 //((p2 + X2(p1) + X2(p0) + X2(q0) + q1 + 4) >> 3) L p0'
+ rshrn v25.8b, v26.8h, #3 //((p2 + X2(p1) + X2(p0) + X2(q0) + q1 + 4) >> 3) H p0'
+ mov v24.d[1] , v25.d[0]
+ dup v26.16b, w3 //beta
+ cmhi v28.16b, v28.16b , v22.16b //ABS(p0 - q0) <((Alpha >>2) + 2)
+ uaddl v22.8h, v6.8b, v10.8b //p0+q1 L
+ cmhi v14.16b, v26.16b , v30.16b //beta>Ap
+ uaddl v30.8h, v7.8b, v11.8b //p0+q1 H
+ uaddw v22.8h, v22.8h , v4.8b //p0+q1+p1 L
+ uaddw v30.8h, v30.8h , v5.8b //p0+q1+p1 H
+ uaddw v22.8h, v22.8h , v4.8b //p0+q1+2*p1 L
+ uaddw v30.8h, v30.8h , v5.8b //p0+q1+2*p1 H
+ and v14.16b, v14.16b , v28.16b //(Ap < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)
+ rshrn v22.8b, v22.8h, #2 //((X2(p1) + p0 + q1 + 2) >> 2) L p0"
+ rshrn v23.8b, v30.8h, #2 //((X2(p1) + p0 + q1 + 2) >> 2) H p0"
+ mov v22.d[1] , v23.d[0]
+ uaddl v30.8h, v2.8b, v0.8b //p2+p3 L
+ bif v24.16b, v22.16b , v14.16b //p0' or p0 "
+ uaddl v22.8h, v3.8b, v1.8b //p2+p3 H
+ add v30.8h, v30.8h , v30.8h //2*(p2+p3) L
+ add v22.8h, v22.8h , v22.8h //2*(p2+p3)H
+ add v16.8h, v16.8h , v30.8h //(X2(p3) + X3(p2) + p1 + p0 + q0) L
+ add v18.8h, v18.8h , v22.8h //(X2(p3) + X3(p2) + p1 + p0 + q0) H
+ uabd v30.16b , v12.16b, v8.16b
+ uabd v22.16b , v10.16b, v8.16b
+ rshrn v16.8b, v16.8h, #3 //((X2(p3) + X3(p2) + p1 + p0 + q0 + 4) >> 3); L p2'
+ rshrn v17.8b, v18.8h, #3 //((X2(p3) + X3(p2) + p1 + p0 + q0 + 4) >> 3); H p2'
+ mov v16.d[1] , v17.d[0]
+ uabd v18.16b , v4.16b, v6.16b
+ cmhi v30.16b, v26.16b , v30.16b //Aq < Beta
+ cmhs v22.16b, v22.16b, v26.16b
+ cmhs v18.16b, v18.16b, v26.16b
+ dup v26.16b, w2 //duplicate alpha
+ and v30.16b, v30.16b , v28.16b //(Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2))
+ uabd v28.16b , v6.16b, v8.16b
+ orr v22.16b, v22.16b , v18.16b //ABS(p1 - p0) >= Beta || ABS(q1 - q0) >= Beta
+ uaddl v18.8h, v6.8b, v8.8b //p0+q0 L
+ cmhs v28.16b, v28.16b, v26.16b
+ uaddl v26.8h, v7.8b, v9.8b //p0+q0 H
+ uaddw v18.8h, v18.8h , v10.8b //p0+q0+q1 L
+ orr v22.16b, v22.16b , v28.16b //ABS(p1 - p0) >= Beta || ABS(q1 - q0) >= Beta||ABS(p0 - q0) >= Alpha
+ uaddw v26.8h, v26.8h , v11.8b //p0+q0+q1 H
+ bic v14.16b, v14.16b , v22.16b //final condn for p's
+ movi v28.16b, #2
+ bif v6.16b, v24.16b , v22.16b //final p0
+ bit v2.16b, v16.16b , v14.16b //final p2
+ bif v20.16b, v4.16b , v14.16b //final p1
+ mov v7.d[0] , v6.d[1]
+ mov v3.d[0] , v2.d[1]
+ mov v21.d[0] , v20.d[1]
+ uaddl v24.8h, v8.8b, v4.8b //q0+p1 L
+ umlal v24.8h, v10.8b, v28.8b //X2(q1) + q0 + p1 L
+ uaddl v16.8h, v9.8b, v5.8b //q0+p1 H
+ umlal v16.8h, v11.8b, v28.8b //X2(q1) + q0 + p1 H
+ movi v28.8h, #2
+ uaddl v14.8h, v4.8b, v12.8b //p1+q2 L
+ mla v14.8h, v18.8h , v28.8h //p1 + X2(p0) + X2(q0) + X2(q1) + q2L
+ uaddl v4.8h, v5.8b, v13.8b //p1+q2H
+ mla v4.8h, v26.8h , v28.8h //p1 + X2(p0) + X2(q0) + X2(q1) + q2H
+ rshrn v24.8b, v24.8h, #2 //(X2(q1) + q0 + p1 + 2) >> 2; L q0'
+ rshrn v25.8b, v16.8h, #2 //(X2(q1) + q0 + p1 + 2) >> 2; H q0'
+ mov v24.d[1] , v25.d[0]
+ uaddw v18.8h, v18.8h , v12.8b //p0 + q0 + q1 + q2 L
+ uaddw v26.8h, v26.8h , v13.8b //p0 + q0 + q1 + q2 H
+ rshrn v16.8b, v14.8h, #3 //(p1 + X2(p0) + X2(q0) + X2(q1) + q2 + 4) >> 3 L qo"
+ mov v14.16b, v31.16b
+ rshrn v17.8b, v4.8h, #3 //(p1 + X2(p0) + X2(q0) + X2(q1) + q2 + 4) >> 3 H qo"
+ mov v16.d[1] , v17.d[0]
+ rshrn v4.8b, v18.8h, #2 //p0 + q0 + q1 + q2 + 2)>>2 L q1'
+ rshrn v5.8b, v26.8h, #2 //p0 + q0 + q1 + q2 + 2)>>2 H q1'
+ mov v4.d[1] , v5.d[0]
+ bit v24.16b, v16.16b , v30.16b //q0' or q0"
+ bic v30.16b, v30.16b , v22.16b //final condn for q's
+ trn1 v31.8b, v0.8b, v2.8b
+ trn2 v2.8b, v0.8b, v2.8b //row1 &2
+ mov v0.8b, v31.8b
+ bit v10.16b, v4.16b , v30.16b
+ mov v11.d[0] , v10.d[1]
+ mov v25.d[0] , v24.d[1]
+ mov v31.d[0] , v30.d[1]
+ trn1 v31.8b, v1.8b, v3.8b
+ trn2 v3.8b, v1.8b, v3.8b //row9 &10
+ mov v1.8b, v31.8b
+ uaddl v16.8h, v12.8b, v14.8b //q2+q3 L
+ trn1 v31.8b, v20.8b, v6.8b
+ trn2 v6.8b, v20.8b, v6.8b //row3&row4
+ mov v20.8b , v31.8b
+ uaddl v4.8h, v13.8b, v15.8b //q2+q3 H
+ trn1 v31.8b, v21.8b, v7.8b
+ trn2 v7.8b, v21.8b, v7.8b //row11 & 12
+ mov v21.8b , v31.8b
+ mla v18.8h, v16.8h , v28.8h //X2(q3) + X3(q2) + q1 + q0 + p0 L
+ trn1 v31.4h, v2.4h, v6.4h
+ trn2 v6.4h, v2.4h, v6.4h //row2 & row4
+ mov v2.8b, v31.8b
+ mla v26.8h, v4.8h , v28.8h //X2(q3) + X3(q2) + q1 + q0 + p0 H
+ trn1 v31.4h, v3.4h, v7.4h
+ trn2 v7.4h, v3.4h, v7.4h //row10 & 12
+ mov v3.8b , v31.8b
+ bif v8.16b, v24.16b , v22.16b //final q0
+ mov v9.d[0] , v8.d[1]
+ trn1 v31.4h, v0.4h, v20.4h
+ trn2 v20.4h, v0.4h, v20.4h //row1 & 3
+ mov v0.8b , v31.8b
+ rshrn v18.8b, v18.8h, #3 //(X2(q3) + X3(q2) + q1 + q0 + p0 + 4) >> 3; L
+ trn1 v31.4h, v1.4h, v21.4h
+ trn2 v21.4h, v1.4h, v21.4h //row9 & row11
+ mov v1.8b, v31.8b
+ rshrn v19.8b, v26.8h, #3 //(X2(q3) + X3(q2) + q1 + q0 + p0 + 4) >> 3; H
+ mov v18.d[1] , v19.d[0]
+ trn1 v31.8b, v8.8b, v10.8b
+ trn2 v10.8b, v8.8b, v10.8b //row5&6
+ mov v8.8b, v31.8b
+ bit v12.16b, v18.16b , v30.16b //final q2
+ mov v13.d[0] , v12.d[1]
+ trn1 v31.8b, v9.8b, v11.8b
+ trn2 v11.8b, v9.8b, v11.8b //row13 &14
+ mov v9.8b, v31.8b
+ trn1 v31.8b, v12.8b, v14.8b
+ trn2 v14.8b, v12.8b, v14.8b //row7 & 8
+ mov v12.8b, v31.8b
+ trn1 v31.8b, v13.8b, v15.8b
+ trn2 v15.8b, v13.8b, v15.8b //row15 & 16
+ mov v13.8b , v31.8b
+ trn1 v31.4h, v10.4h, v14.4h
+ trn2 v14.4h, v10.4h, v14.4h //row6 & row8
+ mov v10.8b, v31.8b
+ trn1 v31.4h, v11.4h, v15.4h
+ trn2 v15.4h, v11.4h, v15.4h //row14 & row16
+ mov v11.8b, v31.8b
+ //now Q3 ->p0 and Q7->q3
+ trn1 v31.4h, v8.4h, v12.4h
+ trn2 v12.4h, v8.4h, v12.4h //row 5 & 7
+ mov v8.8b, v31.8b
+ trn1 v31.4h, v9.4h, v13.4h
+ trn2 v13.4h, v9.4h, v13.4h //row13 & row15
+ mov v9.8b, v31.8b
+ sub x0, x0, x1, lsl#4 //restore pointer
+ trn1 v31.2s, v6.2s, v14.2s
+ trn2 v14.2s, v6.2s, v14.2s //row4 & 8
+ mov v6.8b , v31.8b
+ trn1 v31.2s, v7.2s, v15.2s
+ trn2 v15.2s, v7.2s, v15.2s //row 12 & 16
+ mov v7.8b, v31.8b
+ trn1 v31.2s, v0.2s, v8.2s
+ trn2 v8.2s, v0.2s, v8.2s //row1 & row5
+ mov v0.8b , v31.8b
+ trn1 v31.2s, v1.2s, v9.2s
+ trn2 v9.2s, v1.2s, v9.2s //row9 & 13
+ mov v1.8b , v31.8b
+ trn1 v31.2s, v2.2s, v10.2s
+ trn2 v10.2s, v2.2s, v10.2s //row2 &6
+ mov v2.8b , v31.8b
+ trn1 v31.2s, v3.2s, v11.2s
+ trn2 v11.2s, v3.2s, v11.2s //row10&row14
+ mov v3.8b , v31.8b
+ trn1 v31.2s, v20.2s, v12.2s
+ trn2 v12.2s, v20.2s, v12.2s //row3 & 7
+ mov v20.8b , v31.8b
+ trn1 v31.2s, v21.2s, v13.2s
+ trn2 v13.2s, v21.2s, v13.2s //row11 & row15
+ mov v21.8b, v31.8b
+ st1 {v0.8b}, [x0], x1 //row1
+ st1 {v2.8b}, [x0], x1 //row2
+ st1 {v20.8b}, [x0], x1 //row3
+ st1 {v6.8b}, [x0], x1 //row4
+ st1 {v8.8b}, [x0], x1 //row5
+ st1 {v10.8b}, [x0], x1 //row6
+ st1 {v12.8b}, [x0], x1 //row7
+ st1 {v14.8b}, [x0], x1 //row8
+ st1 {v1.8b}, [x0], x1 //row9
+ st1 {v3.8b}, [x0], x1 //row10
+ st1 {v21.8b}, [x0], x1 //row11
+ st1 {v7.8b}, [x0], x1 //row12
+ st1 {v9.8b}, [x0], x1 //row13
+ st1 {v11.8b}, [x0], x1 //row14
+ st1 {v13.8b}, [x0], x1 //row15
+ st1 {v15.8b}, [x0], x1 //row16
+
+ // LDMFD sp!,{x12,pc}
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
diff --git a/common/armv8/ih264_default_weighted_pred_av8.s b/common/armv8/ih264_default_weighted_pred_av8.s
new file mode 100755
index 0000000..aefb902
--- /dev/null
+++ b/common/armv8/ih264_default_weighted_pred_av8.s
@@ -0,0 +1,353 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+//******************************************************************************
+//* @file
+//* ih264_default_weighted_pred_av8.s
+//*
+//* @brief
+//* Contains function definitions for default weighted prediction.
+//* Functions are coded using NEON intrinsics and can be compiled using ARM RVCT
+//*
+//* @author
+//* Kaushik Senthoor R
+//*
+//* @par List of Functions:
+//*
+//* - ih264_default_weighted_pred_luma_av8()
+//* - ih264_default_weighted_pred_chroma_av8()
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+//*******************************************************************************
+//* @function
+//* ih264_default_weighted_pred_luma_av8()
+//*
+//* @brief
+//* This routine performs the default weighted prediction as described in sec
+//* 8.4.2.3.1 titled "Default weighted sample prediction process" for luma.
+//*
+//* @par Description:
+//* This function gets two ht x wd blocks, calculates their rounded-average and
+//* stores it in the destination block.
+//*
+//* @param[in] puc_src1:
+//* UWORD8 Pointer to the buffer containing the first input block.
+//*
+//* @param[in] puc_src2:
+//* UWORD8 Pointer to the buffer containing the second input block.
+//*
+//* @param[out] puc_dst
+//* UWORD8 pointer to the destination where the output block is stored.
+//*
+//* @param[in] src_strd1
+//* Stride of the first input buffer
+//*
+//* @param[in] src_strd2
+//* Stride of the second input buffer
+//*
+//* @param[in] dst_strd
+//* Stride of the destination buffer
+//*
+//* @param[in] ht
+//* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+//* @returns
+//* None
+//*
+//* @remarks
+//* (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16).
+//*
+//*******************************************************************************
+//*/
+//void ih264_default_weighted_pred_luma_av8(UWORD8 *puc_src1,
+// UWORD8 *puc_src2,
+// UWORD8 *puc_dst,
+// WORD32 src_strd1,
+// WORD32 src_strd2,
+// WORD32 dst_strd,
+// UWORD8 ht,
+// UWORD8 wd)
+//
+//**************Variables Vs Registers*****************************************
+// x0 => puc_src1
+// x1 => puc_src2
+// x2 => puc_dst
+// x3 => src_strd1
+// [sp] => src_strd2 (x4)
+// [sp+4] => dst_strd (x5)
+// [sp+8] => ht (x6)
+// [sp+12] => wd (x7)
+//
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+
+
+ .global ih264_default_weighted_pred_luma_av8
+
+ih264_default_weighted_pred_luma_av8:
+
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+ cmp w7, #16
+ beq loop_16 //branch if wd is 16
+ cmp w7, #8
+ beq loop_8 //branch if wd is 8
+
+loop_4: //each iteration processes four rows
+
+ ld1 {v0.s}[0], [x0], x3 //load row 1 in source 1
+ ld1 {v0.s}[1], [x0], x3 //load row 2 in source 1
+ ld1 {v2.s}[0], [x1], x4 //load row 1 in source 2
+ ld1 {v2.s}[1], [x1], x4 //load row 2 in source 2
+ ld1 {v1.s}[0], [x0], x3 //load row 3 in source 1
+ ld1 {v1.s}[1], [x0], x3 //load row 4 in source 1
+ urhadd v0.8b, v0.8b , v2.8b
+ ld1 {v3.s}[0], [x1], x4 //load row 3 in source 2
+ ld1 {v3.s}[1], [x1], x4 //load row 4 in source 2
+ subs w6, w6, #4 //decrement ht by 4
+ st1 {v0.s}[0], [x2], x5 //load row 1 in destination
+ st1 {v0.s}[1], [x2], x5 //load row 2 in destination
+ urhadd v1.8b, v1.8b , v3.8b
+ st1 {v1.s}[0], [x2], x5 //load row 3 in destination
+ st1 {v1.s}[1], [x2], x5 //load row 4 in destination
+ bgt loop_4 //if greater than 0 repeat the loop again
+ b end_loops
+
+loop_8: //each iteration processes four rows
+
+ ld1 {v0.8b}, [x0], x3 //load row 1 in source 1
+ ld1 {v4.8b}, [x1], x4 //load row 1 in source 2
+ ld1 {v1.8b}, [x0], x3 //load row 2 in source 1
+ ld1 {v5.8b}, [x1], x4 //load row 2 in source 2
+ ld1 {v2.8b}, [x0], x3 //load row 3 in source 1
+ urhadd v0.16b, v0.16b , v4.16b
+ urhadd v1.16b, v1.16b , v5.16b
+ ld1 {v6.8b}, [x1], x4 //load row 3 in source 2
+ ld1 {v3.8b}, [x0], x3 //load row 4 in source 1
+ urhadd v2.8b, v2.8b , v6.8b
+ ld1 {v7.8b}, [x1], x4 //load row 4 in source 2
+ subs w6, w6, #4 //decrement ht by 4
+ st1 {v0.8b}, [x2], x5 //load row 1 in destination
+ urhadd v3.8b, v3.8b , v7.8b
+ st1 {v1.8b}, [x2], x5 //load row 2 in destination
+ st1 {v2.8b}, [x2], x5 //load row 3 in destination
+ st1 {v3.8b}, [x2], x5 //load row 4 in destination
+ bgt loop_8 //if greater than 0 repeat the loop again
+ b end_loops
+
+loop_16: //each iteration processes eight rows
+
+ ld1 {v0.8b, v1.8b}, [x0], x3 //load row 1 in source 1
+ ld1 {v16.8b, v17.8b}, [x1], x4 //load row 1 in source 2
+ ld1 {v2.8b, v3.8b}, [x0], x3 //load row 2 in source 1
+ ld1 {v18.8b, v19.8b}, [x1], x4 //load row 2 in source 2
+ urhadd v0.16b, v0.16b , v16.16b
+ urhadd v1.16b, v1.16b , v17.16b
+ ld1 {v4.8b, v5.8b}, [x0], x3 //load row 3 in source 1
+ ld1 {v20.8b, v21.8b}, [x1], x4 //load row 3 in source 2
+ urhadd v2.16b, v2.16b , v18.16b
+ urhadd v3.16b, v3.16b , v19.16b
+ ld1 {v6.8b, v7.8b}, [x0], x3 //load row 4 in source 1
+ ld1 {v22.8b, v23.8b}, [x1], x4 //load row 4 in source 2
+ urhadd v4.16b, v4.16b , v20.16b
+ urhadd v5.16b, v5.16b , v21.16b
+ ld1 {v8.8b, v9.8b}, [x0], x3 //load row 5 in source 1
+ ld1 {v24.8b, v25.8b}, [x1], x4 //load row 5 in source 2
+ urhadd v6.16b, v6.16b , v22.16b
+ urhadd v7.16b, v7.16b , v23.16b
+ ld1 {v10.8b, v11.8b}, [x0], x3 //load row 6 in source 1
+ ld1 {v26.8b, v27.8b}, [x1], x4 //load row 6 in source 2
+ urhadd v8.16b, v8.16b , v24.16b
+ urhadd v9.16b, v9.16b , v25.16b
+ ld1 {v12.8b, v13.8b}, [x0], x3 //load row 7 in source 1
+ ld1 {v28.8b, v29.8b}, [x1], x4 //load row 7 in source 2
+ urhadd v10.16b, v10.16b , v26.16b
+ urhadd v11.16b, v11.16b , v27.16b
+ ld1 {v14.8b, v15.8b}, [x0], x3 //load row 8 in source 1
+ ld1 {v30.8b, v31.8b}, [x1], x4 //load row 8 in source 2
+ urhadd v12.16b, v12.16b , v28.16b
+ urhadd v13.16b, v13.16b , v29.16b
+ st1 {v0.8b, v1.8b}, [x2], x5 //load row 1 in destination
+ st1 {v2.8b, v3.8b}, [x2], x5 //load row 2 in destination
+ urhadd v14.16b, v14.16b , v30.16b
+ urhadd v15.16b, v15.16b , v31.16b
+ st1 {v4.8b, v5.8b}, [x2], x5 //load row 3 in destination
+ st1 {v6.8b, v7.8b}, [x2], x5 //load row 4 in destination
+ subs w6, w6, #8 //decrement ht by 8
+ st1 {v8.8b, v9.8b}, [x2], x5 //load row 5 in destination
+ st1 {v10.8b, v11.8b}, [x2], x5 //load row 6 in destination
+ st1 {v12.8b, v13.8b}, [x2], x5 //load row 7 in destination
+ st1 {v14.8b, v15.8b}, [x2], x5 //load row 8 in destination
+ bgt loop_16 //if greater than 0 repeat the loop again
+
+end_loops:
+
+ // LDMFD sp!,{x4-x7,x15} //Reload the registers from sp
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+//*******************************************************************************
+//* @function
+//* ih264_default_weighted_pred_chroma_av8()
+//*
+//* @brief
+//* This routine performs the default weighted prediction as described in sec
+//* 8.4.2.3.1 titled "Default weighted sample prediction process" for chroma.
+//*
+//* @par Description:
+//* This function gets two ht x wd blocks, calculates their rounded-average and
+//* stores it in the destination block for U and V.
+//*
+//* @param[in] puc_src1:
+//* UWORD8 Pointer to the buffer containing the first input block.
+//*
+//* @param[in] puc_src2:
+//* UWORD8 Pointer to the buffer containing the second input block.
+//*
+//* @param[out] puc_dst
+//* UWORD8 pointer to the destination where the output block is stored.
+//*
+//* @param[in] src_strd1
+//* Stride of the first input buffer
+//*
+//* @param[in] src_strd2
+//* Stride of the second input buffer
+//*
+//* @param[in] dst_strd
+//* Stride of the destination buffer
+//*
+//* @param[in] ht
+//* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+//* @returns
+//* None
+//*
+//* @remarks
+//* (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8).
+//*
+//*******************************************************************************
+//*/
+//void ih264_default_weighted_pred_chroma_av8(UWORD8 *puc_src1,
+// UWORD8 *puc_src2,
+// UWORD8 *puc_dst,
+// WORD32 src_strd1,
+// WORD32 src_strd2,
+// WORD32 dst_strd,
+// UWORD8 ht,
+// UWORD8 wd)
+//
+//**************Variables Vs Registers*****************************************
+// x0 => puc_src1
+// x1 => puc_src2
+// x2 => puc_dst
+// x3 => src_strd1
+// [sp] => src_strd2 (x4)
+// [sp+4] => dst_strd (x5)
+// [sp+8] => ht (x6)
+// [sp+12] => wd (x7)
+//
+
+
+
+
+ .global ih264_default_weighted_pred_chroma_av8
+
+ih264_default_weighted_pred_chroma_av8:
+
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+ cmp w7, #8
+ beq loop_8_uv //branch if wd is 8
+ cmp w7, #4
+ beq loop_4_uv //branch if wd is 4
+
+loop_2_uv: //each iteration processes two rows
+
+ ld1 {v0.s}[0], [x0], x3 //load row 1 in source 1
+ ld1 {v0.s}[1], [x0], x3 //load row 2 in source 1
+ ld1 {v1.s}[0], [x1], x4 //load row 1 in source 2
+ ld1 {v1.s}[1], [x1], x4 //load row 2 in source 2
+ urhadd v0.8b, v0.8b , v1.8b
+ subs w6, w6, #2 //decrement ht by 2
+ st1 {v0.s}[0], [x2], x5 //load row 1 in destination
+ st1 {v0.s}[1], [x2], x5 //load row 2 in destination
+ bgt loop_2_uv //if greater than 0 repeat the loop again
+ b end_loops_uv
+
+loop_4_uv: //each iteration processes two rows
+
+ ld1 {v0.8b}, [x0], x3 //load row 1 in source 1
+ ld1 {v2.8b}, [x1], x4 //load row 1 in source 2
+ ld1 {v1.8b}, [x0], x3 //load row 2 in source 1
+ urhadd v0.8b, v0.8b , v2.8b
+ ld1 {v3.8b}, [x1], x4 //load row 2 in source 2
+ urhadd v1.8b, v1.8b , v3.8b
+ st1 {v0.8b}, [x2], x5 //load row 1 in destination
+ subs w6, w6, #2 //decrement ht by 2
+ st1 {v1.8b}, [x2], x5 //load row 2 in destination
+ bgt loop_4_uv //if greater than 0 repeat the loop again
+ b end_loops_uv
+
+loop_8_uv: //each iteration processes four rows
+
+ ld1 {v0.8b, v1.8b}, [x0], x3 //load row 1 in source 1
+ ld1 {v8.8b, v9.8b}, [x1], x4 //load row 1 in source 2
+ ld1 {v2.8b, v3.8b}, [x0], x3 //load row 2 in source 1
+ urhadd v0.16b, v0.16b , v8.16b
+ urhadd v1.16b, v1.16b , v9.16b
+ ld1 {v10.8b, v11.8b}, [x1], x4 //load row 2 in source 2
+ ld1 {v4.8b, v5.8b}, [x0], x3 //load row 3 in source 1
+ urhadd v2.16b, v2.16b , v10.16b
+ urhadd v3.16b, v3.16b , v11.16b
+ ld1 {v12.8b, v13.8b}, [x1], x4 //load row 3 in source 2
+ ld1 {v6.8b, v7.8b}, [x0], x3 //load row 4 in source 1
+ urhadd v4.16b, v4.16b , v12.16b
+ urhadd v5.16b, v5.16b , v13.16b
+ ld1 {v14.8b, v15.8b}, [x1], x4 //load row 4 in source 2
+ st1 {v0.8b, v1.8b}, [x2], x5 //load row 1 in destination
+ urhadd v6.16b, v6.16b , v14.16b
+ urhadd v7.16b, v7.16b , v15.16b
+ st1 {v2.8b, v3.8b}, [x2], x5 //load row 2 in destination
+ subs w6, w6, #4 //decrement ht by 4
+ st1 {v4.8b, v5.8b}, [x2], x5 //load row 3 in destination
+ st1 {v6.8b, v7.8b}, [x2], x5 //load row 4 in destination
+ bgt loop_8_uv //if greater than 0 repeat the loop again
+
+end_loops_uv:
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
diff --git a/common/armv8/ih264_ihadamard_scaling_av8.s b/common/armv8/ih264_ihadamard_scaling_av8.s
new file mode 100755
index 0000000..712c9ae
--- /dev/null
+++ b/common/armv8/ih264_ihadamard_scaling_av8.s
@@ -0,0 +1,250 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+// *******************************************************************************
+// * @file
+// * ih264_ihadamard_scaling_av8.s
+// *
+// * @brief
+// * Contains function definitions for inverse hadamard transform on 4x4 DC outputs
+// * of 16x16 intra-prediction
+// *
+// * @author
+// * Mohit
+// *
+// * @par List of Functions:
+// * - ih264_ihadamard_scaling_4x4_av8()
+// *
+// * @remarks
+// * None
+// *
+.include "ih264_neon_macros.s"
+
+// *******************************************************************************
+// */
+// * @brief This function performs a 4x4 inverse hadamard transform on the 4x4 DC coefficients
+// * of a 16x16 intra prediction macroblock, and then performs scaling.
+// * prediction buffer
+// *
+// * @par Description:
+// * The DC coefficients pass through a 2-stage inverse hadamard transform.
+// * This inverse transformed content is scaled to based on Qp value.
+// *
+// * @param[in] pi2_src
+// * input 4x4 block of DC coefficients
+// *
+// * @param[out] pi2_out
+// * output 4x4 block
+// *
+// * @param[in] pu2_iscal_mat
+// * pointer to scaling list
+// *
+// * @param[in] pu2_weigh_mat
+// * pointer to weight matrix
+// *
+// * @param[in] u4_qp_div_6
+// * Floor (qp/6)
+// *
+// * @param[in] pi4_tmp
+// * temporary buffer of size 1*16
+// *
+// * @returns none
+// *
+// * @remarks none
+// *
+// *******************************************************************************
+// */
+// *
+// *******************************************************************************
+// */
+// void ih264_ihadamard_scaling_4x4(word16* pi2_src,
+// word16* pi2_out,
+// const uword16 *pu2_iscal_mat,
+// const uword16 *pu2_weigh_mat,
+// uword32 u4_qp_div_6,
+// word32* pi4_tmp)
+//**************variables vs registers*****************************************
+//x0 => *pi2_src
+//x1 => *pi2_out
+//x2 => *pu2_iscal_mat
+//x3 => *pu2_weigh_mat
+//x4=> u4_qp_div_6
+
+.text
+.p2align 2
+
+ .global ih264_ihadamard_scaling_4x4_av8
+ih264_ihadamard_scaling_4x4_av8:
+
+//only one shift is done in horizontal inverse because,
+//if u4_qp_div_6 is lesser than 4 then shift value will be neagative and do negative left shift, in this case rnd_factor has value
+//if u4_qp_div_6 is greater than 4 then shift value will be positive and do left shift, here rnd_factor is 0
+ push_v_regs
+
+//=======================inverse hadamard transform================================
+
+ ld4 {v0.4h-v3.4h}, [x0] //load x4,x5,x6,x7
+
+ dup v14.4s, w4 // populate the u4_qp_div_6
+ ld1 {v15.h}[0], [x3] // pu2_weigh_mat
+ ld1 {v16.h}[0], [x2] //pu2_iscal_mat
+
+ saddl v4.4s, v0.4h, v3.4h //x0 = x4 + x7
+ saddl v5.4s, v1.4h, v2.4h //x1 = x5 + x6
+ ssubl v6.4s, v1.4h, v2.4h //x2 = x5 - x6
+ ssubl v7.4s, v0.4h, v3.4h //x3 = x4 - x7
+
+ add v0.4s, v4.4s, v5.4s //pi4_tmp_ptr[0] = x0 + x1
+ add v1.4s, v7.4s, v6.4s //pi4_tmp_ptr[1] = x3 + x2
+ sub v2.4s, v4.4s, v5.4s //pi4_tmp_ptr[2] = x0 - x1
+ sub v3.4s, v7.4s, v6.4s //pi4_tmp_ptr[3] = x3 - x2
+
+ umull v15.4s, v15.4h, v16.4h
+ dup v15.4s, v15.s[0] //pu2_weigh_mat[0]*pu2_iscal_mat[0]
+
+ //transpose
+ trn1 v4.4s, v0.4s, v1.4s
+ trn2 v5.4s, v0.4s, v1.4s
+ trn1 v6.4s, v2.4s, v3.4s
+ trn2 v7.4s, v2.4s, v3.4s
+
+ trn1 v0.2d, v4.2d, v6.2d
+ trn2 v2.2d, v4.2d, v6.2d
+ trn1 v1.2d, v5.2d, v7.2d
+ trn2 v3.2d, v5.2d, v7.2d
+ //end transpose
+
+ add v4.4s, v0.4s, v3.4s //x0 = x4+x7
+ add v5.4s, v1.4s, v2.4s //x1 = x5+x6
+ sub v6.4s, v1.4s, v2.4s //x2 = x5-x6
+ sub v7.4s, v0.4s, v3.4s //x3 = x4-x7
+
+ add v0.4s, v4.4s, v5.4s //pi4_tmp_ptr[0] = x0 + x1
+ add v1.4s, v7.4s, v6.4s //pi4_tmp_ptr[1] = x3 + x2
+ sub v2.4s, v4.4s, v5.4s //pi4_tmp_ptr[2] = x0 - x1
+ sub v3.4s, v7.4s, v6.4s //pi4_tmp_ptr[3] = x3 - x2
+
+ mul v0.4s, v0.4s, v15.4s // q0 = p[i] = (x[i] * trns_coeff[i]) where i = 0..3
+ mul v1.4s, v1.4s, v15.4s // q1 = p[i] = (x[i] * trns_coeff[i]) where i = 4..7
+ mul v2.4s, v2.4s, v15.4s // q2 = p[i] = (x[i] * trns_coeff[i]) where i = 8..11
+ mul v3.4s, v3.4s, v15.4s // q3 = p[i] = (x[i] * trns_coeff[i]) where i = 12..15
+
+ sshl v0.4s, v0.4s, v14.4s // q0 = q[i] = (p[i] << (qp/6)) where i = 0..3
+ sshl v1.4s, v1.4s, v14.4s // q1 = q[i] = (p[i] << (qp/6)) where i = 4..7
+ sshl v2.4s, v2.4s, v14.4s // q2 = q[i] = (p[i] << (qp/6)) where i = 8..11
+ sshl v3.4s, v3.4s, v14.4s // q3 = q[i] = (p[i] << (qp/6)) where i = 12..15
+
+ sqrshrn v0.4h, v0.4s, #6 // d0 = c[i] = ((q[i] + 32) >> 4) where i = 0..3
+ sqrshrn v1.4h, v1.4s, #6 // d1 = c[i] = ((q[i] + 32) >> 4) where i = 4..7
+ sqrshrn v2.4h, v2.4s, #6 // d2 = c[i] = ((q[i] + 32) >> 4) where i = 8..11
+ sqrshrn v3.4h, v3.4s, #6 // d3 = c[i] = ((q[i] + 32) >> 4) where i = 12..15
+
+ st1 {v0.4h-v3.4h}, [x1] //store the result
+
+ pop_v_regs
+ ret
+
+
+// *******************************************************************************
+// */
+// * @brief This function performs a 2x2 inverse hadamard transform for chroma block
+// *
+// * @par Description:
+// * The DC coefficients pass through a 2-stage inverse hadamard transform.
+// * This inverse transformed content is scaled to based on Qp value.
+// * Both DC blocks of U and v blocks are processesd
+// *
+// * @param[in] pi2_src
+// * input 1x8 block of ceffs. First 4 are from U and next from V
+// *
+// * @param[out] pi2_out
+// * output 1x8 block
+// *
+// * @param[in] pu2_iscal_mat
+// * pointer to scaling list
+// *
+// * @param[in] pu2_weigh_mat
+// * pointer to weight matrix
+// *
+// * @param[in] u4_qp_div_6
+// * Floor (qp/6)
+// *
+// * @returns none
+// *
+// * @remarks none
+// *
+// *******************************************************************************
+// */
+// *
+// *******************************************************************************
+// */
+// void ih264_ihadamard_scaling_2x2_uv(WORD16* pi2_src,
+// WORD16* pi2_out,
+// const UWORD16 *pu2_iscal_mat,
+// const UWORD16 *pu2_weigh_mat,
+// UWORD32 u4_qp_div_6,
+
+ .global ih264_ihadamard_scaling_2x2_uv_av8
+ih264_ihadamard_scaling_2x2_uv_av8:
+
+//Registers used
+// x0 : *pi2_src
+// x1 : *pi2_out
+// x2 : *pu2_iscal_mat
+// x3 : *pu2_weigh_mat
+// x4 : u4_qp_div_6
+ push_v_regs
+ ld1 {v26.h}[0], [x2]
+ ld1 {v27.h}[0], [x3]
+
+ sub w4, w4, #5 //qp/6 - 4
+ dup v28.4s, w4 //load qp/6
+
+ ld2 {v0.4h, v1.4h}, [x0] //load 8 dc coeffs
+ //i2_x4,i2_x6,i2_y4,i1_y6 -> d0
+ //i2_x5,i2_x7,i2_y5,i1_y6 -> d1
+
+ saddl v2.4s, v0.4h, v1.4h //i4_x0 = i4_x4 + i4_x5;...x2
+ ssubl v4.4s, v0.4h, v1.4h //i4_x1 = i4_x4 - i4_x5;...x3
+
+ umull v30.4s, v26.4h, v27.4h //pu2_iscal_mat[0]*pu2_weigh_mat[0]
+ dup v30.4s, v30.s[0]
+
+ trn1 v0.4s, v2.4s, v4.4s
+ trn2 v1.4s, v2.4s, v4.4s //i4_x0 i4_x1 -> q1
+
+ add v2.4s, v0.4s, v1.4s //i4_x4 = i4_x0+i4_x2;.. i4_x5
+ sub v3.4s, v0.4s, v1.4s //i4_x6 = i4_x0-i4_x2;.. i4_x7
+
+ mul v2.4s, v2.4s, v30.4s
+ mul v3.4s, v3.4s, v30.4s
+
+ sshl v2.4s, v2.4s, v28.4s
+ sshl v3.4s, v3.4s, v28.4s
+
+ xtn v0.4h, v2.4s //i4_x4 i4_x5 i4_y4 i4_y5
+ xtn v1.4h, v3.4s //i4_x6 i4_x7 i4_y6 i4_y7
+
+ st2 {v0.4s-v1.4s}, [x1]
+ pop_v_regs
+ ret
+
+
+
diff --git a/common/armv8/ih264_inter_pred_chroma_av8.s b/common/armv8/ih264_inter_pred_chroma_av8.s
new file mode 100755
index 0000000..714e271
--- /dev/null
+++ b/common/armv8/ih264_inter_pred_chroma_av8.s
@@ -0,0 +1,392 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+//******************************************************************************
+//* @file
+//* ih264_inter_pred_chroma_av8.s
+//*
+//* @brief
+//* Contains function definitions for inter prediction interpolation.
+//*
+//* @author
+//* Ittaim
+//*
+//* @par List of Functions:
+//*
+//* - ih264_inter_pred_chroma_av8()
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+///* All the functions here are replicated from ih264_inter_pred_filters.c
+//
+
+///**
+///**
+///**
+//
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* Interprediction chroma filter
+//*
+//* @par Description:
+//* Applies filtering to chroma samples as mentioned in
+//* sec 8.4.2.2.2 titled "chroma sample interpolation process"
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source containing alternate U and V samples
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in]uc_dx
+//* dx value where the sample is to be produced(refer sec 8.4.2.2.2 )
+//*
+//* @param[in] uc_dy
+//* dy value where the sample is to be produced(refer sec 8.4.2.2.2 )
+//*
+//* @param[in] ht
+//* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+//void ih264_inter_pred_chroma(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// UWORD8 u1_dx,
+// UWORD8 u1_dy,
+// WORD32 ht,
+// WORD32 wd)
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => u1_dx
+// x5 => u1_dy
+// x6 => height
+// x7 => width
+//
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+
+
+ .global ih264_inter_pred_chroma_av8
+
+ih264_inter_pred_chroma_av8:
+
+
+
+ // STMFD sp!, {x4-x12, x14} //store register values to stack
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+
+
+
+
+ sub x20, x4, #8 //8-u1_dx
+ neg x8, x20
+ sub x20, x5, #8 //8-u1_dy
+ neg x9, x20
+ mul x10, x8, x9 //
+ mul x11, x4, x9 //
+
+ dup v28.8b, w10
+ dup v29.8b, w11
+
+ mul x10, x8, x5 //
+ mul x11, x4, x5 //
+
+ dup v30.8b, w10
+ dup v31.8b, w11
+
+ subs x12, x7, #2 //if wd=4 branch to loop_4
+ beq loop_2
+ subs x12, x7, #4 //if wd=8 branch to loop_8
+ beq loop_4
+
+loop_8:
+ ld1 {v0.8b, v1.8b, v2.8b}, [x0], x2 //// Load row0 ;
+ ext v3.8b, v0.8b , v1.8b , #2
+ ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row1;
+ umull v20.8h, v0.8b, v28.8b
+ ext v8.8b, v5.8b , v6.8b , #2
+ umlal v20.8h, v3.8b, v29.8b
+ ext v9.8b, v6.8b , v7.8b , #2
+ umlal v20.8h, v5.8b, v30.8b
+ ext v4.8b, v1.8b , v2.8b , #2
+ umlal v20.8h, v8.8b, v31.8b
+ sqrshrun v26.8b, v20.8h, #6
+ umull v22.8h, v1.8b, v28.8b
+ ld1 {v10.8b, v11.8b, v12.8b}, [x0], x2 //// Load row2 ;
+ umlal v22.8h, v4.8b, v29.8b
+ ext v13.8b, v10.8b , v11.8b , #2
+ umlal v22.8h, v6.8b, v30.8b
+ ext v14.8b, v11.8b , v12.8b , #2
+ umlal v22.8h, v9.8b, v31.8b
+ sqrshrun v27.8b, v22.8h, #6
+ umull v24.8h, v5.8b, v28.8b
+ st1 { v26.8b, v27.8b}, [x1], x3 ////Store dest row
+ umlal v24.8h, v8.8b, v29.8b
+ ld1 {v0.8b, v1.8b, v2.8b}, [x0], x2 //// Load row3 ;
+ umlal v24.8h, v10.8b, v30.8b
+ ext v3.8b, v0.8b , v1.8b , #2
+ umlal v24.8h, v13.8b, v31.8b
+ ext v4.8b, v1.8b , v2.8b , #2
+ umull v16.8h, v6.8b, v28.8b
+ sqrshrun v18.8b, v24.8h, #6
+ umlal v16.8h, v9.8b, v29.8b
+ umlal v16.8h, v11.8b, v30.8b
+ umlal v16.8h, v14.8b, v31.8b
+ sqrshrun v19.8b, v16.8h, #6
+ st1 {v18.8b, v19.8b}, [x1], x3 // store row 1
+ umull v20.8h, v10.8b, v28.8b
+ umlal v20.8h, v13.8b, v29.8b
+ umlal v20.8h, v0.8b, v30.8b
+ umlal v20.8h, v3.8b, v31.8b
+ sqrshrun v26.8b, v20.8h, #6
+ umull v24.8h, v11.8b, v28.8b
+ ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row4;
+ umlal v24.8h, v14.8b, v29.8b
+ ext v8.8b, v5.8b , v6.8b , #2
+ umlal v24.8h, v1.8b, v30.8b
+ ext v9.8b, v6.8b , v7.8b , #2
+ umlal v24.8h, v4.8b, v31.8b
+ umull v20.8h, v0.8b, v28.8b
+ sqrshrun v27.8b, v24.8h, #6
+ umlal v20.8h, v3.8b, v29.8b
+ st1 { v26.8b, v27.8b}, [x1], x3 ////Store dest row2
+ umlal v20.8h, v5.8b, v30.8b
+ umlal v20.8h, v8.8b, v31.8b
+ umull v22.8h, v1.8b, v28.8b
+ umlal v22.8h, v4.8b, v29.8b
+ umlal v22.8h, v6.8b, v30.8b
+ sqrshrun v26.8b, v20.8h, #6
+ umlal v22.8h, v9.8b, v31.8b
+ subs x12, x6, #4
+ sqrshrun v27.8b, v22.8h, #6
+ st1 { v26.8b, v27.8b}, [x1], x3 ////Store dest row3
+
+ beq end_func //If ht=4
+
+ ld1 {v10.8b, v11.8b, v12.8b}, [x0], x2 //// Load row5
+ ext v13.8b, v10.8b , v11.8b , #2
+ umull v24.8h, v5.8b, v28.8b
+ ext v14.8b, v11.8b , v12.8b , #2
+ ld1 {v0.8b, v1.8b, v2.8b}, [x0], x2 //// Load row6;
+ umlal v24.8h, v8.8b, v29.8b
+ umlal v24.8h, v10.8b, v30.8b
+ umlal v24.8h, v13.8b, v31.8b
+ ext v3.8b, v0.8b , v1.8b , #2
+ umull v16.8h, v6.8b, v28.8b
+ sqrshrun v18.8b, v24.8h, #6
+ umlal v16.8h, v9.8b, v29.8b
+ umlal v16.8h, v11.8b, v30.8b
+ umlal v16.8h, v14.8b, v31.8b
+ ext v4.8b, v1.8b , v2.8b , #2
+ sqrshrun v19.8b, v16.8h, #6
+ st1 { v18.8b, v19.8b}, [x1], x3 // store row 4
+ umull v20.8h, v10.8b, v28.8b
+ umlal v20.8h, v13.8b, v29.8b
+ umlal v20.8h, v0.8b, v30.8b
+ umlal v20.8h, v3.8b, v31.8b
+ ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row7;
+ sqrshrun v26.8b, v20.8h, #6
+ umull v24.8h, v11.8b, v28.8b
+ umlal v24.8h, v14.8b, v29.8b
+ ext v8.8b, v5.8b , v6.8b , #2
+ umlal v24.8h, v1.8b, v30.8b
+ umlal v24.8h, v4.8b, v31.8b
+ ext v9.8b, v6.8b , v7.8b , #2
+ sqrshrun v27.8b, v24.8h, #6
+ st1 {v26.8b, v27.8b}, [x1], x3 ////Store dest row5
+ umull v20.8h, v0.8b, v28.8b
+ umlal v20.8h, v3.8b, v29.8b
+ umlal v20.8h, v5.8b, v30.8b
+ umlal v20.8h, v8.8b, v31.8b
+ ld1 {v10.8b, v11.8b, v12.8b}, [x0], x2 //// Load row8 ;
+ sqrshrun v26.8b, v20.8h, #6
+ umull v22.8h, v1.8b, v28.8b
+ umlal v22.8h, v4.8b, v29.8b
+ umlal v22.8h, v6.8b, v30.8b
+ ext v13.8b, v10.8b , v11.8b , #2
+ umlal v22.8h, v9.8b, v31.8b
+ ext v14.8b, v11.8b , v12.8b , #2
+ sqrshrun v27.8b, v22.8h, #6
+ st1 { v26.8b, v27.8b}, [x1], x3 ////Store dest row6
+ umull v24.8h, v5.8b, v28.8b
+ umlal v24.8h, v8.8b, v29.8b
+ umlal v24.8h, v10.8b, v30.8b
+ umlal v24.8h, v13.8b, v31.8b
+ umull v16.8h, v6.8b, v28.8b
+ sqrshrun v18.8b, v24.8h, #6
+ umlal v16.8h, v9.8b, v29.8b
+ umlal v16.8h, v11.8b, v30.8b
+ umlal v16.8h, v14.8b, v31.8b
+ sqrshrun v19.8b, v16.8h, #6
+ st1 { v18.8b, v19.8b}, [x1], x3 // store row 7
+ b end_func
+
+loop_4:
+ ld1 {v0.8b, v1.8b}, [x0], x2 //// Load row0 ;
+ ext v2.8b, v0.8b , v1.8b , #2
+ ld1 {v3.8b, v4.8b}, [x0], x2 //// Load row1;
+ ext v5.8b, v3.8b , v4.8b , #2
+ umull v20.8h, v0.8b, v28.8b
+ umlal v20.8h, v2.8b, v29.8b
+ umlal v20.8h, v3.8b, v30.8b
+ umlal v20.8h, v5.8b, v31.8b
+ ld1 {v6.8b, v7.8b}, [x0], x2 //// Load row2
+ sqrshrun v26.8b, v20.8h, #6
+ ext v8.8b, v6.8b , v7.8b , #2
+ st1 {v26.8b}, [x1], x3 ////Store dest row0
+ umull v22.8h, v3.8b, v28.8b
+ umlal v22.8h, v5.8b, v29.8b
+ umlal v22.8h, v6.8b, v30.8b
+ umlal v22.8h, v8.8b, v31.8b
+ subs x12, x6, #2
+ sqrshrun v27.8b, v22.8h, #6
+ st1 {v27.8b}, [x1], x3 ////Store dest row1
+ beq end_func //If ht=2
+
+ ld1 {v9.8b, v10.8b}, [x0], x2 //// Load row3;
+ ext v11.8b, v9.8b , v10.8b , #2
+ umull v24.8h, v6.8b, v28.8b
+ umlal v24.8h, v8.8b, v29.8b
+ umlal v24.8h, v9.8b, v30.8b
+ umlal v24.8h, v11.8b, v31.8b
+ ld1 {v0.8b, v1.8b}, [x0], x2 //// Load row4 ;
+ sqrshrun v16.8b, v24.8h, #6
+ ext v2.8b, v0.8b , v1.8b , #2
+ st1 {v16.8b}, [x1], x3 ////Store dest row2
+ umull v18.8h, v9.8b, v28.8b
+ umlal v18.8h, v11.8b, v29.8b
+ umlal v18.8h, v0.8b, v30.8b
+ umlal v18.8h, v2.8b, v31.8b
+ subs x12, x6, #4
+ sqrshrun v17.8b, v18.8h, #6
+ st1 {v17.8b}, [x1], x3 ////Store dest row3
+ beq end_func //If ht=4
+
+ ld1 {v3.8b, v4.8b}, [x0], x2 //// Load row5;
+ ext v5.8b, v3.8b , v4.8b , #2
+ umull v20.8h, v0.8b, v28.8b
+ umlal v20.8h, v2.8b, v29.8b
+ umlal v20.8h, v3.8b, v30.8b
+ umlal v20.8h, v5.8b, v31.8b
+ ld1 {v6.8b, v7.8b}, [x0], x2 //// Load row6 ;
+ sqrshrun v26.8b, v20.8h, #6
+ ext v8.8b, v6.8b , v7.8b , #2
+ st1 {v26.8b}, [x1], x3 ////Store dest row4
+ umull v22.8h, v3.8b, v28.8b
+ umlal v22.8h, v5.8b, v29.8b
+ umlal v22.8h, v6.8b, v30.8b
+ umlal v22.8h, v8.8b, v31.8b
+ ld1 {v9.8b, v10.8b}, [x0], x2 //// Load row7;
+ sqrshrun v27.8b, v22.8h, #6
+ ext v11.8b, v9.8b , v10.8b , #2
+ st1 {v27.8b}, [x1], x3 ////Store dest row5
+ umull v24.8h, v6.8b, v28.8b
+ umlal v24.8h, v8.8b, v29.8b
+ umlal v24.8h, v9.8b, v30.8b
+ umlal v24.8h, v11.8b, v31.8b
+ ld1 {v0.8b, v1.8b}, [x0], x2 //// Load row8;
+ sqrshrun v16.8b, v24.8h, #6
+ ext v2.8b, v0.8b , v1.8b , #2
+ st1 {v16.8b}, [x1], x3 ////Store dest row6
+ umull v18.8h, v9.8b, v28.8b
+ umlal v18.8h, v11.8b, v29.8b
+ umlal v18.8h, v0.8b, v30.8b
+ umlal v18.8h, v2.8b, v31.8b
+ sqrshrun v17.8b, v18.8h, #6
+ st1 {v17.8b}, [x1], x3 ////Store dest row7
+ b end_func
+
+loop_2:
+ ld1 {v0.8b}, [x0], x2 //// Load row0 ;
+ ext v2.8b, v0.8b , v0.8b , #2
+ ld1 {v3.8b}, [x0], x2 //// Load row1;
+ ext v5.8b, v3.8b , v3.8b , #2
+ umull v20.8h, v0.8b, v28.8b
+ umlal v20.8h, v2.8b, v29.8b
+ umlal v20.8h, v3.8b, v30.8b
+ umlal v20.8h, v5.8b, v31.8b
+ ld1 {v6.8b}, [x0], x2 //// Load row2
+ sqrshrun v26.8b, v20.8h, #6
+ ext v8.8b, v6.8b , v6.8b , #2
+ st1 {v26.s}[0], [x1], x3 ////Store dest row0
+ umull v22.8h, v3.8b, v28.8b
+ umlal v22.8h, v5.8b, v29.8b
+ umlal v22.8h, v6.8b, v30.8b
+ umlal v22.8h, v8.8b, v31.8b
+ subs x12, x6, #2
+ sqrshrun v27.8b, v22.8h, #6
+ st1 {v27.s}[0], [x1], x3 ////Store dest row1
+ beq end_func //If ht=2
+
+ ld1 {v9.8b}, [x0], x2 //// Load row3;
+ ext v11.8b, v9.8b , v9.8b , #2
+ umull v24.8h, v6.8b, v28.8b
+ umlal v24.8h, v8.8b, v29.8b
+ umlal v24.8h, v9.8b, v30.8b
+ umlal v24.8h, v11.8b, v31.8b
+ ld1 {v0.8b}, [x0], x2 //// Load row4 ;
+ sqrshrun v16.8b, v24.8h, #6
+ ext v2.8b, v0.8b , v0.8b , #2
+ st1 {v16.s}[0], [x1], x3 ////Store dest row2
+ umull v18.8h, v9.8b, v28.8b
+ umlal v18.8h, v11.8b, v29.8b
+ umlal v18.8h, v0.8b, v30.8b
+ umlal v18.8h, v2.8b, v31.8b
+ sqrshrun v17.8b, v18.8h, #6
+ st1 {v17.s}[0], [x1], x3 ////Store dest row3
+
+
+end_func:
+ // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
diff --git a/common/armv8/ih264_inter_pred_filters_luma_horz_av8.s b/common/armv8/ih264_inter_pred_filters_luma_horz_av8.s
new file mode 100755
index 0000000..6ad463a
--- /dev/null
+++ b/common/armv8/ih264_inter_pred_filters_luma_horz_av8.s
@@ -0,0 +1,530 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+//******************************************************************************
+//* @file
+//* ih264_inter_pred_luma_horz_av8.s
+//*
+//* @brief
+//* Contains function definitions for inter prediction interpolation.
+//*
+//* @author
+//* Ittiam
+//*
+//* @par List of Functions:
+//*
+//* - ih264_inter_pred_luma_horz_av8()
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+///* All the functions here are replicated from ih264_inter_pred_filters.c
+//
+
+///**
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* Interprediction luma filter for horizontal input
+//*
+//* @par Description:
+//* Applies a 6 tap horizontal filter .The output is clipped to 8 bits
+//* sec 8.4.2.2.1 titled "Luma sample interpolation process"
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ht
+//* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+//* @returns
+//*
+// @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+//void ih264_inter_pred_luma_horz (
+// UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ht,
+// WORD32 wd )
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ht
+// x5 => wd
+
+.text
+.p2align 2
+
+.include "ih264_neon_macros.s"
+
+
+
+ .global ih264_inter_pred_luma_horz_av8
+
+ih264_inter_pred_luma_horz_av8:
+
+
+
+
+ // STMFD sp!, {x4-x12, x14} //store register values to stack
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+ sub x0, x0, #2 //pu1_src-2
+ sub x14, x4, #16
+ movi v0.8b, #5 //filter coeff
+ subs x12, x5, #8 //if wd=8 branch to loop_8
+ movi v1.8b, #20 //filter coeff
+ beq loop_8
+
+ subs x12, x5, #4 //if wd=4 branch to loop_4
+ beq loop_4
+
+loop_16: //when wd=16
+ //// Processing row0 and row1
+ ld1 {v2.8b, v3.8b, v4.8b}, [x0], x2 //// Load row0
+ add x14, x14, #1 //for checking loop
+ ext v31.8b, v2.8b , v3.8b, #5 ////extract a[5] (column1,row0)
+ ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row1
+ ext v30.8b, v3.8b , v4.8b, #5 ////extract a[5] (column2,row0)
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row0)
+ ext v28.8b, v5.8b , v6.8b, #5 ////extract a[5] (column1,row1)
+ uaddl v10.8h, v30.8b, v3.8b //// a0 + a5 (column2,row0)
+ ext v27.8b, v6.8b , v7.8b, #5 ////extract a[5] (column2,row1)
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row1)
+ ext v31.8b, v2.8b , v3.8b, #2 ////extract a[2] (column1,row0)
+ uaddl v16.8h, v27.8b, v6.8b //// a0 + a5 (column2,row1)
+ ext v30.8b, v3.8b , v4.8b, #2 ////extract a[2] (column2,row0)
+ umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0)
+ ext v28.8b, v5.8b , v6.8b, #2 ////extract a[2] (column1,row1)
+ umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0)
+ ext v27.8b, v6.8b , v7.8b, #2 ////extract a[2] (column2,row1)
+ umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 (column1,row1)
+ ext v31.8b, v2.8b , v3.8b, #3 ////extract a[3] (column1,row0)
+ umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 (column2,row1)
+ ext v30.8b, v3.8b , v4.8b, #3 ////extract a[3] (column2,row0)
+ umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ ext v28.8b, v5.8b , v6.8b, #3 ////extract a[3] (column1,row1)
+ umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ ext v27.8b, v6.8b , v7.8b, #3 ////extract a[3] (column2,row1)
+ umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row1)
+ ext v31.8b, v2.8b , v3.8b, #1 ////extract a[1] (column1,row0)
+ umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row1)
+ ext v30.8b, v3.8b , v4.8b, #1 ////extract a[1] (column2,row0)
+ umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ ext v28.8b, v5.8b , v6.8b, #1 ////extract a[1] (column1,row1)
+ umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ ext v27.8b, v6.8b , v7.8b, #1 ////extract a[1] (column2,row1)
+ umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1)
+ ext v31.8b, v2.8b , v3.8b, #4 ////extract a[4] (column1,row0)
+ umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row1)
+ ext v30.8b, v3.8b , v4.8b, #4 ////extract a[4] (column2,row0)
+ umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ ext v28.8b, v5.8b , v6.8b, #4 ////extract a[4] (column1,row1)
+ umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ ext v27.8b, v6.8b , v7.8b, #4 ////extract a[4] (column2,row1)
+ umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1)
+ ld1 {v2.8b, v3.8b, v4.8b}, [x0], x2 //// Load row2
+ umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row1)
+
+ sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row3
+ sqrshrun v21.8b, v10.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ ext v31.8b, v2.8b , v3.8b, #5 ////extract a[5] (column1,row2)
+ st1 {v20.8b, v21.8b}, [x1], x3 ////Store dest row0
+ sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1)
+ ext v30.8b, v3.8b , v4.8b, #5 ////extract a[5] (column2,row2)
+ sqrshrun v24.8b, v16.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row1)
+
+
+
+//// Processing row2 and row3
+ ext v28.8b, v5.8b , v6.8b, #5 ////extract a[5] (column1,row3)
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row2)
+ st1 {v23.8b, v24.8b}, [x1], x3 ////Store dest row1
+ uaddl v10.8h, v30.8b, v3.8b //// a0 + a5 (column2,row2)
+ ext v27.8b, v6.8b , v7.8b, #5 ////extract a[5] (column2,row3)
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row3)
+ ext v31.8b, v2.8b , v3.8b, #2 ////extract a[2] (column1,row2)
+ uaddl v16.8h, v27.8b, v6.8b //// a0 + a5 (column2,row3)
+ ext v30.8b, v3.8b , v4.8b, #2 ////extract a[2] (column2,row2)
+ umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 (column1,row2)
+ ext v27.8b, v6.8b , v7.8b, #2 ////extract a[2] (column2,row3)
+ umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column2,row2)
+ ext v28.8b, v5.8b , v6.8b, #2 ////extract a[2] (column1,row3)
+ umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 (column1,row3)
+ ext v31.8b, v2.8b , v3.8b, #3 ////extract a[3] (column1,row2)
+ umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 (column2,row3)
+ ext v30.8b, v3.8b , v4.8b, #3 ////extract a[3] (column2,row2)
+ umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row2)
+ ext v28.8b, v5.8b , v6.8b, #3 ////extract a[3] (column1,row3)
+ umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row2)
+ ext v27.8b, v6.8b , v7.8b, #3 ////extract a[3] (column2,row3)
+ umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row3)
+ ext v31.8b, v2.8b , v3.8b, #1 ////extract a[1] (column1,row2)
+ umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row3)
+ ext v30.8b, v3.8b , v4.8b, #1 ////extract a[1] (column2,row2)
+ umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row2)
+ ext v28.8b, v5.8b , v6.8b, #1 ////extract a[1] (column1,row3)
+ umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row2)
+ ext v27.8b, v6.8b , v7.8b, #1 ////extract a[1] (column2,row3)
+ umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row3)
+ ext v31.8b, v2.8b , v3.8b, #4 ////extract a[4] (column1,row2)
+ umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row3)
+ ext v30.8b, v3.8b , v4.8b, #4 ////extract a[4] (column2,row2)
+ umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row2)
+ ext v28.8b, v5.8b , v6.8b, #4 ////extract a[4] (column1,row3)
+ umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row2)
+ ext v27.8b, v6.8b , v7.8b, #4 ////extract a[4] (column2,row3)
+ umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row3)
+ ld1 {v2.8b, v3.8b, v4.8b}, [x0], x2 //// Load row4
+ umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row3)
+
+ sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row2)
+ ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row5
+ sqrshrun v21.8b, v10.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row2)
+ ext v31.8b, v2.8b , v3.8b, #5 ////extract a[5] (column1,row4)
+ st1 {v20.8b, v21.8b}, [x1], x3 ////Store dest row2
+ sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row3)
+ ext v30.8b, v3.8b , v4.8b, #5 ////extract a[5] (column2,row4)
+ sqrshrun v24.8b, v16.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row3)
+
+
+//// Processing row4 and row5
+ ext v28.8b, v5.8b , v6.8b, #5 ////extract a[5] (column1,row5)
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row4)
+ st1 {v23.8b, v24.8b}, [x1], x3 ////Store dest row3
+ uaddl v10.8h, v30.8b, v3.8b //// a0 + a5 (column2,row4)
+ ext v27.8b, v6.8b , v7.8b, #5 ////extract a[5] (column2,row5)
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row5)
+ ext v31.8b, v2.8b , v3.8b, #2 ////extract a[2] (column1,row4)
+ uaddl v16.8h, v27.8b, v6.8b //// a0 + a5 (column2,row5)
+ ext v30.8b, v3.8b , v4.8b, #2 ////extract a[2] (column2,row4)
+ umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 (column1,row4)
+ ext v27.8b, v6.8b , v7.8b, #2 ////extract a[2] (column2,row5)
+ umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column2,row4)
+ ext v28.8b, v5.8b , v6.8b, #2 ////extract a[2] (column1,row5)
+ umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 (column1,row5)
+ ext v31.8b, v2.8b , v3.8b, #3 ////extract a[3] (column1,row4)
+ umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 (column2,row5)
+ ext v30.8b, v3.8b , v4.8b, #3 ////extract a[3] (column2,row4)
+ umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row4)
+ ext v28.8b, v5.8b , v6.8b, #3 ////extract a[3] (column1,row5)
+ umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row4)
+ ext v27.8b, v6.8b , v7.8b, #3 ////extract a[3] (column2,row5)
+ umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row5)
+ ext v31.8b, v2.8b , v3.8b, #1 ////extract a[1] (column1,row4)
+ umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row5)
+ ext v30.8b, v3.8b , v4.8b, #1 ////extract a[1] (column2,row4)
+ umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row4)
+ ext v28.8b, v5.8b , v6.8b, #1 ////extract a[1] (column1,row5)
+ umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row4)
+ ext v27.8b, v6.8b , v7.8b, #1 ////extract a[1] (column2,row5)
+ umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row4)
+ ext v31.8b, v2.8b , v3.8b, #4 ////extract a[4] (column1,row4)
+ umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row5)
+ ext v30.8b, v3.8b , v4.8b, #4 ////extract a[4] (column2,row4)
+ umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row4)
+ ext v28.8b, v5.8b , v6.8b, #4 ////extract a[4] (column1,row5)
+ umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row4)
+ ext v27.8b, v6.8b , v7.8b, #4 ////extract a[4] (column2,row5)
+ umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row5)
+ ld1 {v2.8b, v3.8b, v4.8b}, [x0], x2 //// Load row6
+ umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row5)
+
+ sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row4)
+ ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row7
+ sqrshrun v21.8b, v10.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row4)
+ ext v31.8b, v2.8b , v3.8b, #5 ////extract a[5] (column1,row6)
+ st1 {v20.8b, v21.8b}, [x1], x3 ////Store dest row2
+ sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row5)
+ ext v30.8b, v3.8b , v4.8b, #5 ////extract a[5] (column2,row6)
+ sqrshrun v24.8b, v16.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row5)
+
+
+
+ //// Processing row6 and row7
+
+ ext v28.8b, v5.8b , v6.8b, #5 ////extract a[5] (column1,row7)
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row6)
+ st1 {v23.8b, v24.8b}, [x1], x3 ////Store dest row5
+ uaddl v10.8h, v30.8b, v3.8b //// a0 + a5 (column2,row6)
+ ext v27.8b, v6.8b , v7.8b, #5 ////extract a[5] (column2,row7)
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row7)
+ ext v31.8b, v2.8b , v3.8b, #2 ////extract a[2] (column1,row6)
+ uaddl v16.8h, v27.8b, v6.8b //// a0 + a5 (column2,row7)
+ ext v30.8b, v3.8b , v4.8b, #2 ////extract a[2] (column2,row6)
+ umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 (column1,row6)
+ ext v27.8b, v6.8b , v7.8b, #2 ////extract a[2] (column2,row7)
+ umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column2,row6)
+ ext v28.8b, v5.8b , v6.8b, #2 ////extract a[2] (column1,row7)
+ umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 (column1,row7)
+ ext v31.8b, v2.8b , v3.8b, #3 ////extract a[3] (column1,row6)
+ umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 (column2,row7)
+ ext v30.8b, v3.8b , v4.8b, #3 ////extract a[3] (column2,row6)
+ umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row6)
+ ext v28.8b, v5.8b , v6.8b, #3 ////extract a[3] (column1,row7)
+ umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row6)
+ ext v27.8b, v6.8b , v7.8b, #3 ////extract a[3] (column2,row7)
+ umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row7)
+ ext v31.8b, v2.8b , v3.8b, #1 ////extract a[1] (column1,row6)
+ umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row7)
+ ext v30.8b, v3.8b , v4.8b, #1 ////extract a[1] (column2,row6)
+ umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row6)
+ ext v28.8b, v5.8b , v6.8b, #1 ////extract a[1] (column1,row7)
+ umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row6)
+ ext v27.8b, v6.8b , v7.8b, #1 ////extract a[1] (column2,row7)
+ umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row6)
+ ext v31.8b, v2.8b , v3.8b, #4 ////extract a[4] (column1,row6)
+ umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row7)
+ ext v30.8b, v3.8b , v4.8b, #4 ////extract a[4] (column2,row6)
+ umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row6)
+ ext v28.8b, v5.8b , v6.8b, #4 ////extract a[4] (column1,row7)
+ umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row6)
+ ext v27.8b, v6.8b , v7.8b, #4 ////extract a[4] (column2,row6)
+
+ sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row6)
+ umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row7)
+ sqrshrun v21.8b, v10.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row6)
+ umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row7)
+ sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row7)
+ st1 {v20.8b, v21.8b}, [x1], x3 ////Store dest row6
+ sqrshrun v24.8b, v16.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row7)
+ subs x12, x14, #1 // if height==16 - looping
+ st1 {v23.8b, v24.8b}, [x1], x3 ////Store dest row7
+
+
+
+ beq loop_16
+ b end_func
+
+
+
+loop_8:
+//// Processing row0 and row1
+
+
+ ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row1
+ add x14, x14, #1 //for checking loop
+ ext v28.8b, v5.8b , v6.8b, #5 ////extract a[5] (column1,row1)
+ ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row0
+ ext v25.8b, v5.8b , v6.8b, #2 ////extract a[2] (column1,row1)
+ ext v31.8b, v2.8b , v3.8b, #5 ////extract a[5] (column1,row0)
+ ext v24.8b, v5.8b , v6.8b, #3 ////extract a[3] (column1,row1)
+ ext v23.8b, v5.8b , v6.8b, #1 ////extract a[1] (column1,row1)
+ ext v22.8b, v5.8b , v6.8b, #4 ////extract a[4] (column1,row1)
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row1)
+ ext v29.8b, v2.8b , v3.8b, #3 ////extract a[3] (column1,row0)
+ umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row1)
+ umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row1)
+ umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1)
+ umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1)
+ ext v30.8b, v2.8b , v3.8b, #2 ////extract a[2] (column1,row0)
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row0)
+ ext v27.8b, v2.8b , v3.8b, #1 ////extract a[1] (column1,row0)
+ ext v26.8b, v2.8b , v3.8b, #4 ////extract a[4] (column1,row0)
+ ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row2
+ umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0)
+ umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row3
+ sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+
+ //// Processing row2 and row3
+ ext v28.8b, v5.8b , v6.8b, #5 ////extract a[5] (column1,row3)
+ ext v25.8b, v5.8b , v6.8b, #2 ////extract a[2] (column1,row3)
+ ext v31.8b, v2.8b , v3.8b, #5 ////extract a[5] (column1,row2)
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row3)
+ st1 {v23.8b}, [x1], x3 ////Store dest row0
+ ext v24.8b, v5.8b , v6.8b, #3 ////extract a[3] (column1,row2)
+ ext v23.8b, v5.8b , v6.8b, #1 ////extract a[1] (column1,row3)
+ sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1)
+ ext v22.8b, v5.8b , v6.8b, #4 ////extract a[4] (column1,row3)
+ ext v29.8b, v2.8b , v3.8b, #3 ////extract a[3] (column1,row2)
+ umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row3)
+ umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row3)
+ umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row3)
+ umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row3)
+ st1 {v20.8b}, [x1], x3 ////Store dest row1
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row2)
+ ext v30.8b, v2.8b , v3.8b, #2 ////extract a[2] (column1,row2)
+ ext v27.8b, v2.8b , v3.8b, #1 ////extract a[1] (column1,row2)
+ ext v26.8b, v2.8b , v3.8b, #4 ////extract a[4] (column1,row2)
+ ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row4
+ umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row2)
+ umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row2)
+ umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row2)
+ umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row2)
+ ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row3
+ subs x9, x4, #4
+ sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row3)
+ ext v28.8b, v5.8b , v6.8b, #5 ////extract a[5] (column1,row5)
+ ext v25.8b, v5.8b , v6.8b, #2 ////extract a[2] (column1,row5)
+ ext v31.8b, v2.8b , v3.8b, #5 ////extract a[5] (column1,row4)
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row5)
+ ext v24.8b, v5.8b , v6.8b, #3 ////extract a[3] (column1,row5)
+ sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row2)
+ ext v22.8b, v5.8b , v6.8b, #4 ////extract a[4] (column1,row5)
+ ext v29.8b, v2.8b , v3.8b, #3 ////extract a[3] (column1,row4)
+ st1 {v20.8b}, [x1], x3 ////Store dest row2
+ ext v30.8b, v2.8b , v3.8b, #2 ////extract a[2] (column1,row4)
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row4)
+ st1 {v23.8b}, [x1], x3 ////Store dest row3
+ beq end_func // Branch if height==4
+
+//// Processing row4 and row5
+ ext v23.8b, v5.8b , v6.8b, #1 ////extract a[1] (column1,row5)
+ umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row5)
+ umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row5)
+ umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row5)
+ umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row5)
+ ext v27.8b, v2.8b , v3.8b, #1 ////extract a[1] (column1,row4)
+ ext v26.8b, v2.8b , v3.8b, #4 ////extract a[4] (column1,row4)
+ ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row6
+ umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row4)
+ umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row4)
+ umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row4)
+ umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row4)
+ sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row5)
+ ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row7
+ ext v31.8b, v2.8b , v3.8b, #5 ////extract a[5] (column1,row6)
+ ext v28.8b, v5.8b , v6.8b, #5 ////extract a[5] (column1,row7)
+ ext v25.8b, v5.8b , v6.8b, #2 ////extract a[2] (column1,row7)
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row7)
+ ext v24.8b, v5.8b , v6.8b, #3 ////extract a[3] (column1,row7)
+ ext v22.8b, v5.8b , v6.8b, #4 ////extract a[4] (column1,row7)
+ sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row4)
+ ext v29.8b, v2.8b , v3.8b, #3 ////extract a[3] (column1,row6)
+ ext v30.8b, v2.8b , v3.8b, #2 ////extract a[2] (column1,row6)
+ st1 {v20.8b}, [x1], x3 ////Store dest row4
+ ext v27.8b, v2.8b , v3.8b, #1 ////extract a[1] (column1,row6)
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row6)
+ ext v26.8b, v2.8b , v3.8b, #4 ////extract a[4] (column1,row6)
+ umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row6)
+ umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row6)
+ umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row6)
+ umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row6)
+ //// Processing row6 and row7
+ st1 {v23.8b}, [x1], x3 ////Store dest row5
+ ext v23.8b, v5.8b , v6.8b, #1 ////extract a[1] (column1,row7)
+ umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row7)
+ umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row7)
+ umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row7)
+ umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row7)
+ sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row6)
+ subs x12, x14, #1
+ st1 {v20.8b}, [x1], x3 ////Store dest row6
+ sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row7)
+ st1 {v23.8b}, [x1], x3 ////Store dest row7
+
+ beq loop_8 //looping if height ==16
+
+ b end_func
+loop_4:
+ ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row1
+ ext v28.8b, v5.8b , v6.8b, #5 ////extract a[5] (column1,row1)
+ ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row0
+ ext v25.8b, v5.8b , v6.8b, #2 ////extract a[2] (column1,row1)
+ ext v31.8b, v2.8b , v3.8b, #5 ////extract a[5] (column1,row0)
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row1)
+ ext v24.8b, v5.8b , v6.8b, #3 ////extract a[3] (column1,row1)
+ ext v23.8b, v5.8b , v6.8b, #1 ////extract a[1] (column1,row1)
+ ext v22.8b, v5.8b , v6.8b, #4 ////extract a[4] (column1,row1)
+ ext v29.8b, v2.8b , v3.8b, #3 ////extract a[3] (column1,row0)
+ umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row1)
+ umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row1)
+ umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1)
+ umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1)
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row0)
+ ext v30.8b, v2.8b , v3.8b, #2 ////extract a[2] (column1,row0)
+ ext v27.8b, v2.8b , v3.8b, #1 ////extract a[1] (column1,row0)
+ ext v26.8b, v2.8b , v3.8b, #4 ////extract a[4] (column1,row0)
+ ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row2
+ umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0)
+ umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row3
+ ext v28.8b, v5.8b , v6.8b, #5 ////extract a[5] (column1,row3)
+ ext v25.8b, v5.8b , v6.8b, #2 ////extract a[2] (column1,row3)
+ sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ ext v31.8b, v2.8b , v3.8b, #5 ////extract a[5] (column1,row2)
+ ext v24.8b, v5.8b , v6.8b, #3 ////extract a[3] (column1,row2)
+ st1 {v23.s}[0], [x1], x3 ////Store dest row0
+ ext v23.8b, v5.8b , v6.8b, #1 ////extract a[1] (column1,row3)
+ ext v22.8b, v5.8b , v6.8b, #4 ////extract a[4] (column1,row3)
+ ext v29.8b, v2.8b , v3.8b, #3 ////extract a[3] (column1,row2)
+ sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1)
+ ext v30.8b, v2.8b , v3.8b, #2 ////extract a[2] (column1,row2)
+ ext v27.8b, v2.8b , v3.8b, #1 ////extract a[1] (column1,row2)
+
+ //// Processing row2 and row3
+ st1 {v20.s}[0], [x1], x3 ////Store dest row1
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row3)
+ ext v26.8b, v2.8b , v3.8b, #4 ////extract a[4] (column1,row2)
+ umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row3)
+ umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row3)
+ umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row3)
+ umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row3)
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row2)
+ umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row2)
+ umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row2)
+ umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row2)
+ umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row2)
+ sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row3)
+ sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row2)
+ st1 {v20.s}[0], [x1], x3 ////Store dest row2
+ subs x4, x4, #8 // Loop if height =8
+ st1 {v23.s}[0], [x1], x3 ////Store dest row3
+ beq loop_4
+
+end_func:
+ // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
diff --git a/common/armv8/ih264_inter_pred_filters_luma_vert_av8.s b/common/armv8/ih264_inter_pred_filters_luma_vert_av8.s
new file mode 100755
index 0000000..38934c9
--- /dev/null
+++ b/common/armv8/ih264_inter_pred_filters_luma_vert_av8.s
@@ -0,0 +1,452 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+//******************************************************************************
+//* @file
+//* ih264_inter_pred_luma_vert_av8.s
+//*
+//* @brief
+//* Contains function definitions for inter prediction interpolation.
+//*
+//* @author
+//* Ittiam
+//*
+//* @par List of Functions:
+//*
+//* - ih264_inter_pred_luma_vert_av8()
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+///* All the functions here are replicated from ih264_inter_pred_filters.c
+//
+
+///**
+///**
+///**
+// *******************************************************************************
+// *
+// * @brief
+// * Interprediction luma filter for vertical input
+// *
+// * @par Description:
+// * Applies a 6 tap vertcal filter.The output is clipped to 8 bits
+// * sec 8.4.2.2.1 titled "Luma sample interpolation process"
+// *
+// * @param[in] pu1_src
+// * UWORD8 pointer to the source
+// *
+// * @param[out] pu1_dst
+// * UWORD8 pointer to the destination
+// *
+// * @param[in] src_strd
+// * integer source stride
+// *
+// * @param[in] dst_strd
+// * integer destination stride
+// *
+// * @param[in] ht
+// * integer height of the array
+// *
+// * @param[in] wd
+// * integer width of the array
+// *
+// * @returns
+// *
+// * @remarks
+// * None
+// *
+// *******************************************************************************
+
+//void ih264_inter_pred_luma_vert (
+// UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ht,
+// WORD32 wd )
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ht
+// x5 => wd
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+
+
+
+ .global ih264_inter_pred_luma_vert_av8
+
+ih264_inter_pred_luma_vert_av8:
+
+ // STMFD sp!, {x4-x12, x14} //store register values to stack
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ sub x0, x0, x2, lsl #1 //pu1_src-2*src_strd
+
+ sub x14, x4, #16
+ movi v22.8h, #20 // Filter coeff 0x14 into Q11
+
+ subs x12, x5, #8 //if wd=8 branch to loop_8
+ movi v24.8h, #5 // Filter coeff 0x4 into Q12
+ beq loop_8_start
+
+ subs x12, x5, #4 //if wd=4 branch to loop_4
+ beq loop_4_start
+
+
+ ld1 {v0.2s, v1.2s}, [x0], x2 // Vector load from src[0_0]
+ ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[1_0]
+ ld1 {v4.2s, v5.2s}, [x0], x2 // Vector load from src[2_0]
+ ld1 {v6.2s, v7.2s}, [x0], x2 // Vector load from src[3_0]
+ add x14, x14, #1 //for checking loop
+ ld1 {v8.2s, v9.2s}, [x0], x2 // Vector load from src[4_0]
+ uaddl v12.8h, v4.8b, v6.8b // temp1 = src[2_0] + src[3_0]
+ ld1 {v10.2s, v11.2s}, [x0], x2 // Vector load from src[5_0]
+
+loop_16: //when wd=16
+
+ uaddl v14.8h, v0.8b, v10.8b // temp = src[0_0] + src[5_0]
+ uaddl v16.8h, v2.8b, v8.8b // temp2 = src[1_0] + src[4_0]
+ mla v14.8h, v12.8h, v22.8h // temp += temp1 * 20
+ uaddl v20.8h, v1.8b, v11.8b // temp4 = src[0_8] + src[5_8]
+ uaddl v18.8h, v5.8b, v7.8b // temp3 = src[2_8] + src[3_8]
+ mla v20.8h, v18.8h , v22.8h // temp4 += temp3 * 20
+ ld1 {v0.2s, v1.2s}, [x0], x2
+ uaddl v26.8h, v3.8b, v9.8b // temp5 = src[1_8] + src[4_8]
+ uaddl v12.8h, v6.8b, v8.8b
+ mls v14.8h, v16.8h , v24.8h // temp -= temp2 * 5
+ uaddl v16.8h, v2.8b, v0.8b
+ uaddl v18.8h, v4.8b, v10.8b
+ mla v16.8h, v12.8h , v22.8h
+ mls v20.8h, v26.8h , v24.8h // temp4 -= temp5 * 5
+ uaddl v26.8h, v5.8b, v11.8b
+ uaddl v12.8h, v7.8b, v9.8b
+ sqrshrun v30.8b, v14.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5)
+ uaddl v14.8h, v3.8b, v1.8b
+ ld1 {v2.2s, v3.2s}, [x0], x2
+ mla v14.8h, v12.8h , v22.8h
+ mls v16.8h, v18.8h , v24.8h
+ sqrshrun v31.8b, v20.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5)
+ uaddl v18.8h, v4.8b, v2.8b
+ uaddl v12.8h, v8.8b, v10.8b
+
+ st1 {v30.2s, v31.2s}, [x1], x3 // Vector store to dst[0_0]
+ mla v18.8h, v12.8h , v22.8h
+ uaddl v20.8h, v6.8b, v0.8b
+ mls v14.8h, v26.8h , v24.8h
+ sqrshrun v30.8b, v16.8h, #5
+ uaddl v12.8h, v9.8b, v11.8b
+ uaddl v16.8h, v5.8b, v3.8b
+ uaddl v26.8h, v7.8b, v1.8b
+ mla v16.8h, v12.8h , v22.8h
+ mls v18.8h, v20.8h , v24.8h
+ ld1 {v4.2s, v5.2s}, [x0], x2
+
+ sqrshrun v31.8b, v14.8h, #5
+ uaddl v12.8h, v10.8b, v0.8b
+ uaddl v14.8h, v6.8b, v4.8b
+ uaddl v20.8h, v8.8b, v2.8b
+ mla v14.8h, v12.8h , v22.8h
+ mls v16.8h, v26.8h , v24.8h
+ st1 {v30.2s, v31.2s}, [x1], x3 //store row 1
+ sqrshrun v30.8b, v18.8h, #5
+ uaddl v18.8h, v7.8b, v5.8b
+ uaddl v12.8h, v11.8b, v1.8b
+ mla v18.8h, v12.8h , v22.8h
+ uaddl v26.8h, v9.8b, v3.8b
+ mls v14.8h, v20.8h , v24.8h
+ ld1 {v6.2s, v7.2s}, [x0], x2
+ sqrshrun v31.8b, v16.8h, #5
+ mls v18.8h, v26.8h , v24.8h
+ uaddl v12.8h, v0.8b, v2.8b // temp1 = src[2_0] + src[3_0]
+ st1 {v30.2s, v31.2s}, [x1], x3 //store row 2
+ uaddl v16.8h, v10.8b, v4.8b // temp2 = src[1_0] + src[4_0]
+ uaddl v20.8h, v9.8b, v7.8b // temp4 = src[0_8] + src[5_8]
+ sqrshrun v30.8b, v14.8h, #5
+ uaddl v26.8h, v5.8b, v11.8b // temp5 = src[1_8] + src[4_8]
+ uaddl v14.8h, v8.8b, v6.8b // temp = src[0_0] + src[5_0]
+ sqrshrun v31.8b, v18.8h, #5
+ mla v14.8h, v12.8h , v22.8h // temp += temp1 * 20
+ uaddl v18.8h, v1.8b, v3.8b // temp3 = src[2_8] + src[3_8]
+ st1 {v30.2s, v31.2s}, [x1], x3 //store row 3
+ // 4 rows processed
+ mla v20.8h, v18.8h , v22.8h // temp4 += temp3 * 20
+ ld1 {v8.2s, v9.2s}, [x0], x2
+ uaddl v12.8h, v2.8b, v4.8b
+ uaddl v18.8h, v3.8b, v5.8b
+ mls v14.8h, v16.8h , v24.8h // temp -= temp2 * 5
+ uaddl v28.8h, v9.8b, v11.8b
+ uaddl v16.8h, v6.8b, v0.8b
+ mla v28.8h, v18.8h , v22.8h // temp4 += temp3 * 20
+ mls v20.8h, v26.8h , v24.8h // temp4 -= temp5 * 5
+ uaddl v26.8h, v1.8b, v7.8b
+ uaddl v18.8h, v5.8b, v7.8b
+ sqrshrun v30.8b, v14.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5)
+ uaddl v14.8h, v8.8b, v10.8b
+
+ sqrshrun v31.8b, v20.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5)
+ ld1 {v10.2s, v11.2s}, [x0], x2
+ mls v28.8h, v26.8h , v24.8h // temp4 -= temp5 * 5
+ st1 {v30.2s, v31.2s}, [x1], x3 // store row 4
+ mla v14.8h, v12.8h , v22.8h // temp += temp1 * 20
+ uaddl v20.8h, v11.8b, v1.8b
+ uaddl v26.8h, v3.8b, v9.8b
+ mla v20.8h, v18.8h , v22.8h // temp4 += temp3 * 20
+ uaddl v12.8h, v6.8b, v4.8b
+ uaddl v18.8h, v7.8b, v9.8b
+ sqrshrun v31.8b, v28.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5)
+ mls v14.8h, v16.8h , v24.8h // temp -= temp2 * 5
+ uaddl v16.8h, v8.8b, v2.8b
+ sqrshrun v30.8b, v14.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5)
+ mls v20.8h, v26.8h , v24.8h // temp4 -= temp5 * 5
+ uaddl v14.8h, v10.8b, v0.8b
+ st1 {v30.2s, v31.2s}, [x1], x3 // store row 5
+ mla v14.8h, v12.8h , v22.8h // temp += temp1 * 20
+ ld1 {v0.2s, v1.2s}, [x0], x2
+ uaddl v26.8h, v5.8b, v11.8b
+ uaddl v12.8h, v8.8b, v6.8b
+ uaddl v28.8h, v0.8b, v2.8b
+ sqrshrun v31.8b, v20.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5)
+ mla v28.8h, v12.8h , v22.8h // temp += temp1 * 20
+ uaddl v20.8h, v1.8b, v3.8b
+ mls v14.8h, v16.8h , v24.8h // temp -= temp2 * 5
+ mla v20.8h, v18.8h , v22.8h // temp4 += temp3 * 20
+ uaddl v16.8h, v10.8b, v4.8b
+ sqrshrun v30.8b, v14.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5)
+ mov v2.8b, v6.8b
+ mov v3.8b, v7.8b
+ mls v28.8h, v16.8h , v24.8h // temp -= temp2 * 5
+ st1 {v30.2s, v31.2s}, [x1], x3 // store row 6
+ sqrshrun v30.8b, v28.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5)
+
+ swp v0.8b v4.8b
+ swp v1.8b v5.8b
+
+
+
+ mls v20.8h, v26.8h , v24.8h // temp4 -= temp5 * 5
+ mov v6.8b, v10.8b
+ mov v7.8b, v11.8b
+ subs x12, x14, #1 // if height==16 - looping
+
+ swp v4.8b v8.8b
+ swp v5.8b v9.8b
+
+
+ sqrshrun v31.8b, v20.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5)
+ st1 {v30.2s, v31.2s}, [x1], x3 // store row 7
+ bne end_func //if height =8 end function
+ add x14, x14, #1 //for checking loop
+ ld1 {v10.2s, v11.2s}, [x0], x2
+ uaddl v12.8h, v4.8b, v6.8b // temp1 = src[2_0] + src[3_0]
+
+ b loop_16 // looping if height =16
+
+loop_8_start:
+//// Processing row0 and row1
+
+ ld1 {v0.2s}, [x0], x2 // Vector load from src[0_0]
+ ld1 {v1.2s}, [x0], x2 // Vector load from src[1_0]
+ ld1 {v2.2s}, [x0], x2 // Vector load from src[2_0]
+ ld1 {v3.2s}, [x0], x2 // Vector load from src[3_0]
+ add x14, x14, #1 //for checking loop
+ ld1 {v4.2s}, [x0], x2 // Vector load from src[4_0]
+ ld1 {v5.2s}, [x0], x2 // Vector load from src[5_0]
+
+loop_8:
+ //for checking loop
+ uaddl v6.8h, v2.8b, v3.8b // temp1 = src[2_0] + src[3_0]
+ uaddl v8.8h, v0.8b, v5.8b // temp = src[0_0] + src[5_0]
+ uaddl v10.8h, v1.8b, v4.8b // temp2 = src[1_0] + src[4_0]
+ mla v8.8h, v6.8h , v22.8h // temp += temp1 * 20
+ ld1 {v6.2s}, [x0], x2
+ uaddl v14.8h, v3.8b, v4.8b
+ uaddl v16.8h, v1.8b, v6.8b
+ uaddl v18.8h, v2.8b, v5.8b
+ mls v8.8h, v10.8h , v24.8h // temp -= temp2 * 5
+ mla v16.8h, v14.8h , v22.8h
+ ld1 {v7.2s}, [x0], x2
+ uaddl v20.8h, v4.8b, v5.8b
+ uaddl v12.8h, v2.8b, v7.8b
+ uaddl v10.8h, v3.8b, v6.8b
+ mls v16.8h, v18.8h , v24.8h
+ sqrshrun v26.8b, v8.8h, #5 // dst[0_0] = CLIP_U8( (temp + 16) >> 5)
+ mla v12.8h, v20.8h , v22.8h
+ ld1 {v0.2s}, [x0], x2
+ uaddl v14.8h, v5.8b, v6.8b
+ sqrshrun v27.8b, v16.8h, #5
+ uaddl v20.8h, v3.8b, v0.8b
+ mls v12.8h, v10.8h , v24.8h
+ st1 {v26.2s}, [x1], x3 // Vector store to dst[0_0]
+ uaddl v18.8h, v4.8b, v7.8b
+ mla v20.8h, v14.8h , v22.8h
+ st1 {v27.2s}, [x1], x3
+ sqrshrun v28.8b, v12.8h, #5
+ st1 {v28.2s}, [x1], x3
+ mls v20.8h, v18.8h , v24.8h
+ ld1 {v1.2s}, [x0], x2
+ sqrshrun v29.8b, v20.8h, #5
+ subs x9, x4, #4
+ st1 {v29.2s}, [x1], x3 //store row 3
+
+
+ beq end_func // Branch if height==4
+
+
+ uaddl v14.8h, v6.8b, v7.8b // temp1 = src[2_0] + src[3_0]
+ uaddl v16.8h, v0.8b, v5.8b // temp = src[0_0] + src[5_0]
+ uaddl v18.8h, v1.8b, v4.8b // temp2 = src[1_0] + src[4_0]
+ mla v18.8h, v14.8h , v22.8h // temp += temp1 * 20
+ ld1 {v2.2s}, [x0], x2
+ mls v18.8h, v16.8h , v24.8h // temp -= temp2 * 5
+ uaddl v8.8h, v0.8b, v7.8b
+ uaddl v10.8h, v1.8b, v6.8b
+ uaddl v12.8h, v2.8b, v5.8b
+ sqrshrun v26.8b, v18.8h, #5
+ mla v12.8h, v8.8h , v22.8h
+ ld1 {v3.2s}, [x0], x2
+ mls v12.8h, v10.8h , v24.8h
+ st1 {v26.2s}, [x1], x3
+ sqrshrun v27.8b, v12.8h, #5
+ st1 {v27.2s}, [x1], x3
+ uaddl v14.8h, v0.8b, v1.8b // temp1 = src[2_0] + src[3_0]
+ uaddl v16.8h, v2.8b, v7.8b // temp = src[0_0] + src[5_0]
+ uaddl v18.8h, v3.8b, v6.8b // temp2 = src[1_0] + src[4_0]
+ mla v18.8h, v14.8h , v22.8h // temp += temp1 * 20
+ ld1 {v4.2s}, [x0], x2
+ mls v18.8h, v16.8h , v24.8h // temp -= temp2 * 5
+ uaddl v8.8h, v2.8b, v1.8b
+ uaddl v10.8h, v3.8b, v0.8b
+ uaddl v12.8h, v4.8b, v7.8b
+ sqrshrun v26.8b, v18.8h, #5
+ mla v12.8h, v8.8h , v22.8h
+ ld1 {v5.2s}, [x0], x2
+ mls v12.8h, v10.8h , v24.8h
+ st1 {v26.2s}, [x1], x3
+ sqrshrun v27.8b, v12.8h, #5
+ subs x12, x14, #1
+ st1 {v27.2s}, [x1], x3
+ add x14, x14, #1
+ beq loop_8 //looping if height ==16
+
+ b end_func
+
+
+loop_4_start:
+//// Processing row0 and row1
+
+
+ ld1 {v0.s}[0], [x0], x2 // Vector load from src[0_0]
+ ld1 {v1.s}[0], [x0], x2 // Vector load from src[1_0]
+ ld1 {v2.s}[0], [x0], x2 // Vector load from src[2_0]
+ ld1 {v3.s}[0], [x0], x2 // Vector load from src[3_0]
+ ld1 {v4.s}[0], [x0], x2 // Vector load from src[4_0]
+ ld1 {v5.s}[0], [x0], x2 // Vector load from src[5_0]
+
+ uaddl v6.8h, v2.8b, v3.8b // temp1 = src[2_0] + src[3_0]
+ uaddl v8.8h, v0.8b, v5.8b // temp = src[0_0] + src[5_0]
+ uaddl v10.8h, v1.8b, v4.8b // temp2 = src[1_0] + src[4_0]
+ mla v8.8h, v6.8h , v22.8h // temp += temp1 * 20
+ ld1 {v6.2s}, [x0], x2
+ uaddl v14.8h, v3.8b, v4.8b
+ uaddl v16.8h, v1.8b, v6.8b
+ uaddl v18.8h, v2.8b, v5.8b
+ mls v8.8h, v10.8h , v24.8h // temp -= temp2 * 5
+ ld1 {v7.s}[0], [x0], x2
+ mla v16.8h, v14.8h , v22.8h
+ uaddl v20.8h, v4.8b, v5.8b
+ uaddl v12.8h, v2.8b, v7.8b
+ uaddl v10.8h, v3.8b, v6.8b
+ mls v16.8h, v18.8h , v24.8h
+ sqrshrun v26.8b, v8.8h, #5 // dst[0_0] = CLIP_U8( (temp + 16) >> 5)
+ mla v12.8h, v20.8h , v22.8h
+ ld1 {v0.s}[0], [x0], x2
+ uaddl v14.8h, v5.8b, v6.8b
+ sqrshrun v27.8b, v16.8h, #5
+ uaddl v20.8h, v3.8b, v0.8b
+ mls v12.8h, v10.8h , v24.8h
+ st1 {v26.s}[0], [x1], x3 // Vector store to dst[0_0]
+ uaddl v18.8h, v4.8b, v7.8b
+ mla v20.8h, v14.8h , v22.8h
+ st1 {v27.s}[0], [x1], x3
+ sqrshrun v28.8b, v12.8h, #5
+ st1 {v28.s}[0], [x1], x3
+ mls v20.8h, v18.8h , v24.8h
+ ld1 {v1.s}[0], [x0], x2
+ sqrshrun v29.8b, v20.8h, #5
+ st1 {v29.s}[0], [x1], x3 //store row 3
+
+ subs x9, x4, #4
+ beq end_func // Branch if height==4
+
+
+ uaddl v14.8h, v6.8b, v7.8b // temp1 = src[2_0] + src[3_0]
+ uaddl v16.8h, v0.8b, v5.8b // temp = src[0_0] + src[5_0]
+ uaddl v18.8h, v1.8b, v4.8b // temp2 = src[1_0] + src[4_0]
+ mla v18.8h, v14.8h , v22.8h // temp += temp1 * 20
+ ld1 {v2.s}[0], [x0], x2
+ mls v18.8h, v16.8h , v24.8h // temp -= temp2 * 5
+ uaddl v8.8h, v0.8b, v7.8b
+ uaddl v10.8h, v1.8b, v6.8b
+ uaddl v12.8h, v2.8b, v5.8b
+ sqrshrun v26.8b, v18.8h, #5
+ mla v12.8h, v8.8h , v22.8h
+ ld1 {v3.s}[0], [x0], x2
+ mls v12.8h, v10.8h , v24.8h
+ st1 {v26.s}[0], [x1], x3
+ sqrshrun v27.8b, v12.8h, #5
+ st1 {v27.s}[0], [x1], x3
+ uaddl v14.8h, v0.8b, v1.8b // temp1 = src[2_0] + src[3_0]
+ uaddl v16.8h, v2.8b, v7.8b // temp = src[0_0] + src[5_0]
+ uaddl v18.8h, v3.8b, v6.8b // temp2 = src[1_0] + src[4_0]
+ mla v18.8h, v14.8h , v22.8h // temp += temp1 * 20
+ ld1 {v4.s}[0], [x0], x2
+ mls v18.8h, v16.8h , v24.8h // temp -= temp2 * 5
+ uaddl v8.8h, v2.8b, v1.8b
+ uaddl v10.8h, v3.8b, v0.8b
+ uaddl v12.8h, v4.8b, v7.8b
+ sqrshrun v26.8b, v18.8h, #5
+ mla v12.8h, v8.8h , v22.8h
+ ld1 {v5.s}[0], [x0], x2
+ mls v12.8h, v10.8h , v24.8h
+ st1 {v26.s}[0], [x1], x3
+ sqrshrun v27.8b, v12.8h, #5
+ st1 {v27.s}[0], [x1], x3
+
+
+end_func:
+ // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
diff --git a/common/armv8/ih264_inter_pred_luma_copy_av8.s b/common/armv8/ih264_inter_pred_luma_copy_av8.s
new file mode 100755
index 0000000..1a76c1c
--- /dev/null
+++ b/common/armv8/ih264_inter_pred_luma_copy_av8.s
@@ -0,0 +1,267 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* Interprediction luma function for copy
+//*
+//* @par Description:
+//* Copies the array of width 'wd' and height 'ht' from the location pointed
+//* by 'src' to the location pointed by 'dst'
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//*
+//* @param[in] ht
+//* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+//void ih264_inter_pred_luma_copy (
+// UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ht,
+// WORD32 wd )
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x7 => ht
+// x12 => wd
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+
+
+ .global ih264_inter_pred_luma_copy_av8
+
+ih264_inter_pred_luma_copy_av8:
+
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ mov x12, x5
+ mov x7, x4
+ cmp x7, #0 //checks ht == 0
+ ble end_loops
+ tst x12, #15 //checks wd for multiples for 4 & 8
+ beq core_loop_wd_16
+ tst x12, #7 //checks wd for multiples for 4 & 8
+ beq core_loop_wd_8
+ sub x11, x12, #4
+
+outer_loop_wd_4:
+ subs x4, x12, #0 //checks wd == 0
+ ble end_inner_loop_wd_4
+
+inner_loop_wd_4:
+ ld1 {v0.s}[0], [x0] //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0)
+ add x5, x0, x2 //pu1_src_tmp += src_strd
+ add x6, x1, x3 //pu1_dst_tmp += dst_strd
+ st1 {v0.s}[0], [x1] //vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0)
+ ld1 {v0.s}[0], [x5], x2 //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0)
+ add x0, x0, #4 //pu1_src += 4
+ st1 {v0.s}[0], [x6], x3 //vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0)
+ ld1 {v0.s}[0], [x5], x2 //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0)
+ subs x4, x4, #4 //(wd -4)
+ st1 {v0.s}[0], [x6], x3 //vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0)
+ ld1 {v0.s}[0], [x5], x2 //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0)
+ add x1, x1, #4 //pu1_dst += 4
+ st1 {v0.s}[0], [x6], x3 //vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0)
+
+ bgt inner_loop_wd_4
+
+end_inner_loop_wd_4:
+ subs x7, x7, #4 //ht - 4
+ sub x0, x5, x11 //pu1_src = pu1_src_tmp
+ sub x1, x6, x11 //pu1_dst = pu1_dst_tmp
+ bgt outer_loop_wd_4
+
+end_loops:
+ // LDMFD sp!,{x4-x12,x15} //Reload the registers from SP
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+core_loop_wd_8:
+ sub x11, x12, #8
+
+outer_loop_wd_8:
+ subs x4, x12, #0 //checks wd
+ ble end_inner_loop_wd_8
+
+inner_loop_wd_8:
+ add x5, x0, x2 //pu1_src_tmp += src_strd
+ ld1 {v0.8b}, [x0], #8 //vld1_u8(pu1_src_tmp)
+ add x6, x1, x3 //pu1_dst_tmp += dst_strd
+ st1 {v0.8b}, [x1], #8 //vst1_u8(pu1_dst_tmp, tmp_src)
+ ld1 {v1.8b}, [x5], x2 //vld1_u8(pu1_src_tmp)
+ st1 {v1.8b}, [x6], x3 //vst1_u8(pu1_dst_tmp, tmp_src)
+ subs x4, x4, #8 //wd - 8(Loop condition)
+ ld1 {v2.8b}, [x5], x2 //vld1_u8(pu1_src_tmp)
+ st1 {v2.8b}, [x6], x3 //vst1_u8(pu1_dst_tmp, tmp_src)
+ ld1 {v3.8b}, [x5], x2 //vld1_u8(pu1_src_tmp)
+ st1 {v3.8b}, [x6], x3 //vst1_u8(pu1_dst_tmp, tmp_src)
+ bgt inner_loop_wd_8
+
+end_inner_loop_wd_8:
+ subs x7, x7, #4 //ht -= 4
+ sub x0, x5, x11 //pu1_src = pu1_src_tmp
+ sub x1, x6, x11 //pu1_dst = pu1_dst_tmp
+ bgt outer_loop_wd_8
+
+ // LDMFD sp!,{x4-x12,x15} //Reload the registers from SP
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+core_loop_wd_16:
+ sub x11, x12, #16
+
+outer_loop_wd_16:
+ subs x4, x12, #0 //checks wd
+ ble end_inner_loop_wd_16
+
+inner_loop_wd_16:
+ add x5, x0, x2 //pu1_src_tmp += src_strd
+ ld1 { v0.16b}, [x0], #16 //vld1_u8(pu1_src_tmp)
+ add x6, x1, x3 //pu1_dst_tmp += dst_strd
+ st1 { v0.16b}, [x1], #16 //vst1_u8(pu1_dst_tmp, tmp_src)
+ ld1 { v2.16b}, [x5], x2 //vld1_u8(pu1_src_tmp)
+ st1 { v2.16b}, [x6], x3 //vst1_u8(pu1_dst_tmp, tmp_src)
+ subs x4, x4, #16 //wd - 8(Loop condition)
+ ld1 { v4.16b}, [x5], x2 //vld1_u8(pu1_src_tmp)
+ st1 { v4.16b}, [x6], x3 //vst1_u8(pu1_dst_tmp, tmp_src)
+ ld1 { v6.16b}, [x5], x2 //vld1_u8(pu1_src_tmp)
+ st1 { v6.16b}, [x6], x3 //vst1_u8(pu1_dst_tmp, tmp_src)
+ bgt inner_loop_wd_16
+
+end_inner_loop_wd_16:
+ subs x7, x7, #4 //ht -= 4
+ sub x0, x5, x11 //pu1_src = pu1_src_tmp
+ sub x1, x6, x11 //pu1_dst = pu1_dst_tmp
+ bgt outer_loop_wd_16
+
+
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+// /*
+// ********************************************************************************
+// *
+// * @brief This function copies a 4x4 block to destination
+// *
+// * @par Description:
+// * Copies a 4x4 block to destination, where both src and dst are interleaved
+// *
+// * @param[in] pi2_src
+// * Source
+// *
+// * @param[in] pu1_out
+// * Output pointer
+// *
+// * @param[in] pred_strd,
+// * Prediction buffer stride
+// *
+// * @param[in] out_strd
+// * output buffer buffer Stride
+// *
+// * @returns none
+// *
+// * @remarks none
+// * Currently wd and height is not used, ie a 4x4 block is always copied
+// *
+// *******************************************************************************
+// */
+// void ih264_interleave_copy(WORD16 *pi2_src,
+// UWORD8 *pu1_out,
+// WORD32 pred_strd,
+// WORD32 out_strd
+// WORD32 wd
+// WORD32 ht)
+// Register Usage
+// x0 : pi2_src
+// x1 : pu1_out
+// x2 : src_strd
+// x3 : out_strd
+// Neon registers d0-d7, d16-d30 are used
+// No need for pushing arm and neon registers
+
+ .global ih264_interleave_copy_av8
+ih264_interleave_copy_av8:
+ push_v_regs
+ ld1 {v2.8b}, [x0], x2 //load src plane 1 => d2 &pred palne 2 => d3
+ ld1 {v3.8b}, [x0], x2
+ mov v2.d[1], v3.d[0]
+ ld1 {v4.8b}, [x0], x2
+ ld1 {v5.8b}, [x0], x2
+ mov v4.d[1], v5.d[0]
+
+ mov x0, x1
+
+ ld1 {v18.8b}, [x1], x3 //load out [8 bit size) -8 coeffs
+ ld1 {v19.8b}, [x1], x3
+ mov v18.d[1], v19.d[0]
+ movi v30.8h, #0x00ff
+ ld1 {v20.8b}, [x1], x3
+ ld1 {v21.8b}, [x1], x3
+ mov v20.d[1], v21.d[0]
+
+ bit v18.16b, v2.16b , v30.16b
+ bit v20.16b, v4.16b , v30.16b
+
+ st1 {v18.8b}, [x0], x3 //store out
+ st1 {v18.d}[1], [x0], x3
+ st1 {v20.8b}, [x0], x3
+ st1 {v20.d}[1], [x0], x3
+
+ pop_v_regs
+ ret
+
+
diff --git a/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s
new file mode 100755
index 0000000..ea7645e
--- /dev/null
+++ b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s
@@ -0,0 +1,820 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+//******************************************************************************
+//* @file
+//* ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s
+//*
+//* @brief
+//* Contains function definitions for inter prediction interpolation.
+//*
+//* @author
+//* Mohit
+//*
+//* @par List of Functions:
+//*
+//* - ih264_inter_pred_luma_horz_hpel_vert_hpel_av8()
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+
+
+//void ih264_inter_pred_luma_horz_hpel_vert_hpel(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,,
+// WORD32 dst_strd,
+// WORD32 ht,
+// WORD32 wd,
+// UWORD8* pu1_tmp,
+// UWORD32 dydx)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ht
+// x5 => wd
+
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+
+
+ .global ih264_inter_pred_luma_horz_hpel_vert_hpel_av8
+
+ih264_inter_pred_luma_horz_hpel_vert_hpel_av8:
+
+ //store register values to stack
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ sub x0, x0, x2, lsl #1 //pu1_src-2*src_strd
+ sub x0, x0, #2 //pu1_src-2
+
+ movi v26.8h, #0x14 // Filter coeff 20 into Q13
+ movi v24.8h, #0x5 // Filter coeff 5 into Q12
+ movi v27.8h, #0x14 // Filter coeff 20 into Q13
+ movi v25.8h, #0x5 // Filter coeff 5 into Q12
+ mov x7, #0x20
+ mov x8, #0x30
+ subs x12, x5, #4 //if wd=4 branch to loop_4
+ beq loop_4_start
+
+ subs x12, x5, #8 //if wd=8 branch to loop_8
+ beq loop_8_start
+
+ //when wd=16
+ movi v28.8h, #0x14 // Filter coeff 20 into Q13
+ movi v30.8h, #0x5 // Filter coeff 5 into Q12
+ sub x2, x2, #16
+ ld1 {v0.2s, v1.2s}, [x0], #16 // Vector load from src[0_0]
+ ld1 {v12.2s}, [x0], x2 // Vector load from src[0_0]
+ ld1 {v2.2s, v3.2s}, [x0], #16 // Vector load from src[1_0]
+ ld1 {v13.2s}, [x0], x2 // Vector load from src[1_0]
+ ld1 {v4.2s, v5.2s}, [x0], #16 // Vector load from src[2_0]
+ ld1 {v14.2s}, [x0], x2 // Vector load from src[2_0]
+ ld1 {v6.2s, v7.2s}, [x0], #16 // Vector load from src[3_0]
+ ld1 {v15.2s}, [x0], x2 // Vector load from src[3_0]
+ ld1 {v8.2s, v9.2s}, [x0], #16 // Vector load from src[4_0]
+ ld1 {v16.2s}, [x0], x2 // Vector load from src[4_0]
+loop_16:
+
+ ld1 {v10.2s, v11.2s}, [x0], #16 // Vector load from src[5_0]
+ ld1 {v17.2s}, [x0], x2 // Vector load from src[5_0]
+
+
+ uaddl v20.8h, v4.8b, v6.8b
+ uaddl v18.8h, v0.8b, v10.8b
+ uaddl v22.8h, v2.8b, v8.8b
+ mla v18.8h, v20.8h , v28.8h
+ uaddl v24.8h, v5.8b, v7.8b
+ uaddl v20.8h, v1.8b, v11.8b
+ uaddl v26.8h, v3.8b, v9.8b
+ mla v20.8h, v24.8h , v28.8h
+ uaddl v24.8h, v14.8b, v15.8b
+ mls v18.8h, v22.8h , v30.8h
+ uaddl v22.8h, v12.8b, v17.8b
+ mls v20.8h, v26.8h , v30.8h
+ uaddl v26.8h, v13.8b, v16.8b
+ mla v22.8h, v24.8h , v28.8h
+ mls v22.8h, v26.8h , v30.8h
+
+ ext v24.16b, v18.16b , v20.16b , #4
+ ext v26.16b, v18.16b , v20.16b , #6
+
+ ext v23.16b, v18.16b , v20.16b , #10
+ add v0.8h, v24.8h , v26.8h
+ ext v24.16b, v18.16b , v20.16b , #2
+ ext v26.16b, v18.16b , v20.16b , #8
+ add v24.8h, v24.8h , v26.8h
+
+ saddl v26.4s, v18.4h, v23.4h
+ smlal v26.4s, v0.4h, v28.4h
+ smlsl v26.4s, v24.4h, v30.4h
+
+ saddl2 v23.4s, v18.8h, v23.8h
+ smlal2 v23.4s, v0.8h, v28.8h
+ smlsl2 v23.4s, v24.8h, v30.8h
+
+ sqrshrun v18.4h, v26.4s, #10
+ sqrshrun v19.4h, v23.4s, #10
+
+
+ uqxtn v18.8b, v18.8h
+ uqxtn v19.8b, v19.8h
+ mov v18.2s[1], v19.2s[0]
+
+ ext v24.16b, v20.16b , v22.16b , #4
+ ext v26.16b, v20.16b , v22.16b , #6
+ ext v0.16b, v20.16b , v22.16b , #10
+
+ add v25.8h, v24.8h , v26.8h
+ ext v24.16b, v20.16b , v22.16b , #2
+ ext v26.16b, v20.16b , v22.16b , #8
+ add v24.8h, v24.8h , v26.8h
+
+ saddl v26.4s, v0.4h, v20.4h
+ smlal v26.4s, v25.4h, v28.4h
+ smlsl v26.4s, v24.4h, v30.4h
+
+ saddl2 v22.4s, v0.8h, v20.8h
+ smlal2 v22.4s, v25.8h, v28.8h
+ smlsl2 v22.4s, v24.8h, v30.8h
+
+ sqrshrun v19.4h, v26.4s, #10
+ sqrshrun v25.4h, v22.4s, #10
+
+ uaddl v24.8h, v7.8b, v9.8b
+
+
+
+ uqxtn v19.8b, v19.8h
+ uqxtn v25.8b, v25.8h
+ mov v19.2s[1], v25.2s[0]
+
+ uaddl v22.8h, v4.8b, v10.8b
+ ld1 {v0.2s, v1.2s}, [x0], #16 // Vector load from src[6_0]
+
+
+ ld1 {v12.2s}, [x0], x2 // Vector load from src[6_0]
+ uaddl v20.8h, v6.8b, v8.8b
+ uaddl v26.8h, v5.8b, v11.8b
+ st1 {v18.2s, v19.2s}, [x1], x3 // store row 0
+
+
+//ROW_2
+
+
+ uaddl v18.8h, v2.8b, v0.8b
+
+ mla v18.8h, v20.8h , v28.8h
+
+ uaddl v20.8h, v3.8b, v1.8b
+
+ mla v20.8h, v24.8h , v28.8h
+ uaddl v24.8h, v15.8b, v16.8b
+ mls v18.8h, v22.8h , v30.8h
+ uaddl v22.8h, v13.8b, v12.8b
+ mls v20.8h, v26.8h , v30.8h
+ uaddl v26.8h, v14.8b, v17.8b
+ mla v22.8h, v24.8h , v28.8h
+ mls v22.8h, v26.8h , v30.8h
+
+ ext v24.16b, v18.16b , v20.16b , #4
+ ext v26.16b, v18.16b , v20.16b , #6
+
+ ext v23.16b, v18.16b , v20.16b , #10
+ add v2.8h, v24.8h , v26.8h
+ ext v24.16b, v18.16b , v20.16b , #2
+ ext v26.16b, v18.16b , v20.16b , #8
+ add v24.8h, v24.8h , v26.8h
+
+ saddl v26.4s, v18.4h, v23.4h
+ smlal v26.4s, v2.4h, v28.4h
+ smlsl v26.4s, v24.4h, v30.4h
+
+ saddl2 v23.4s, v18.8h, v23.8h
+ smlal2 v23.4s, v2.8h, v28.8h
+ smlsl2 v23.4s, v24.8h, v30.8h
+
+ sqrshrun v18.4h, v26.4s, #10
+ sqrshrun v19.4h, v23.4s, #10
+
+
+
+ uqxtn v18.8b, v18.8h
+ uqxtn v19.8b, v19.8h
+ mov v18.2s[1], v19.2s[0]
+
+ ext v24.16b, v20.16b , v22.16b , #4
+ ext v26.16b, v20.16b , v22.16b , #6
+ ext v2.16b, v20.16b , v22.16b , #10
+
+ add v25.8h, v24.8h , v26.8h
+ ext v24.16b, v20.16b , v22.16b , #2
+ ext v26.16b, v20.16b , v22.16b , #8
+ add v24.8h, v24.8h , v26.8h
+
+ saddl v26.4s, v2.4h, v20.4h
+ smlal v26.4s, v25.4h, v28.4h
+ smlsl v26.4s, v24.4h, v30.4h
+
+ saddl2 v22.4s, v2.8h, v20.8h
+ smlal2 v22.4s, v25.8h, v28.8h
+ smlsl2 v22.4s, v24.8h, v30.8h
+
+ sqrshrun v19.4h, v26.4s, #10
+ sqrshrun v25.4h, v22.4s, #10
+ uaddl v24.8h, v9.8b, v11.8b
+
+ uqxtn v19.8b, v19.8h
+ uqxtn v25.8b, v25.8h
+ mov v19.2s[1], v25.2s[0]
+
+
+ uaddl v22.8h, v6.8b, v0.8b
+ ld1 {v2.2s, v3.2s}, [x0], #16 // Vector load from src[7_0]
+
+
+ ld1 {v13.2s}, [x0], x2 // Vector load from src[7_0]
+ uaddl v20.8h, v8.8b, v10.8b
+ uaddl v26.8h, v7.8b, v1.8b
+ st1 {v18.2s, v19.2s}, [x1], x3 // store row 1
+
+//ROW_3
+
+
+ uaddl v18.8h, v4.8b, v2.8b
+
+ mla v18.8h, v20.8h , v28.8h
+
+ uaddl v20.8h, v5.8b, v3.8b
+
+ mla v20.8h, v24.8h , v28.8h
+ uaddl v24.8h, v16.8b, v17.8b
+ mls v18.8h, v22.8h , v30.8h
+ uaddl v22.8h, v14.8b, v13.8b
+ mls v20.8h, v26.8h , v30.8h
+ uaddl v26.8h, v15.8b, v12.8b
+ mla v22.8h, v24.8h , v28.8h
+ mls v22.8h, v26.8h , v30.8h
+
+ ext v24.16b, v18.16b , v20.16b , #4
+ ext v26.16b, v18.16b , v20.16b , #6
+
+ ext v23.16b, v18.16b , v20.16b , #10
+ add v4.8h, v24.8h , v26.8h
+ ext v24.16b, v18.16b , v20.16b , #2
+ ext v26.16b, v18.16b , v20.16b , #8
+ add v24.8h, v24.8h , v26.8h
+
+ saddl v26.4s, v18.4h, v23.4h
+ smlal v26.4s, v4.4h, v28.4h
+ smlsl v26.4s, v24.4h, v30.4h
+
+ saddl2 v23.4s, v18.8h, v23.8h
+ smlal2 v23.4s, v4.8h, v28.8h
+ smlsl2 v23.4s, v24.8h, v30.8h
+
+ sqrshrun v18.4h, v26.4s, #10
+ sqrshrun v19.4h, v23.4s, #10
+
+
+ uqxtn v18.8b, v18.8h
+ uqxtn v19.8b, v19.8h
+ mov v18.2s[1], v19.2s[0]
+
+
+ ext v24.16b, v20.16b , v22.16b , #4
+ ext v26.16b, v20.16b , v22.16b , #6
+ ext v4.16b, v20.16b , v22.16b , #10
+
+ add v25.8h, v24.8h , v26.8h
+ ext v24.16b, v20.16b , v22.16b , #2
+ ext v26.16b, v20.16b , v22.16b , #8
+ add v24.8h, v24.8h , v26.8h
+
+ saddl v26.4s, v4.4h, v20.4h
+ smlal v26.4s, v25.4h, v28.4h
+ smlsl v26.4s, v24.4h, v30.4h
+
+ saddl2 v22.4s, v4.8h, v20.8h
+ smlal2 v22.4s, v25.8h, v28.8h
+ smlsl2 v22.4s, v24.8h, v30.8h
+
+ sqrshrun v19.4h, v26.4s, #10
+ sqrshrun v25.4h, v22.4s, #10
+
+ uaddl v24.8h, v11.8b, v1.8b
+
+
+ uqxtn v19.8b, v19.8h
+ uqxtn v25.8b, v25.8h
+ mov v19.2s[1], v25.2s[0]
+
+
+
+ uaddl v22.8h, v8.8b, v2.8b
+ ld1 {v4.2s, v5.2s}, [x0], #16 // Vector load from src[8_0]
+
+
+ ld1 {v14.2s}, [x0], x2 // Vector load from src[8_0]
+ uaddl v20.8h, v10.8b, v0.8b
+ uaddl v26.8h, v9.8b, v3.8b
+ st1 {v18.2s, v19.2s}, [x1], x3 // store row 2
+
+
+//ROW_4
+
+ uaddl v18.8h, v6.8b, v4.8b
+
+ mla v18.8h, v20.8h , v28.8h
+
+ uaddl v20.8h, v7.8b, v5.8b
+
+ mla v20.8h, v24.8h , v28.8h
+ uaddl v24.8h, v17.8b, v12.8b
+ mls v18.8h, v22.8h , v30.8h
+ uaddl v22.8h, v15.8b, v14.8b
+ mls v20.8h, v26.8h , v30.8h
+ uaddl v26.8h, v16.8b, v13.8b
+ mla v22.8h, v24.8h , v28.8h
+ mls v22.8h, v26.8h , v30.8h
+
+ ext v24.16b, v18.16b , v20.16b , #4
+ ext v26.16b, v18.16b , v20.16b , #6
+
+ ext v23.16b, v18.16b , v20.16b , #10
+ add v6.8h, v24.8h , v26.8h
+ ext v24.16b, v18.16b , v20.16b , #2
+ ext v26.16b, v18.16b , v20.16b , #8
+ add v24.8h, v24.8h , v26.8h
+
+ saddl v26.4s, v18.4h, v23.4h
+ smlal v26.4s, v6.4h, v28.4h
+ smlsl v26.4s, v24.4h, v30.4h
+
+ saddl2 v23.4s, v18.8h, v23.8h
+ smlal2 v23.4s, v6.8h, v28.8h
+ smlsl2 v23.4s, v24.8h, v30.8h
+
+ sqrshrun v18.4h, v26.4s, #10
+ sqrshrun v19.4h, v23.4s, #10
+
+ uqxtn v18.8b, v18.8h
+ uqxtn v19.8b, v19.8h
+ mov v18.2s[1], v19.2s[0]
+
+
+ ext v24.16b, v20.16b , v22.16b , #4
+ ext v26.16b, v20.16b , v22.16b , #6
+ ext v6.16b, v20.16b , v22.16b , #10
+
+ add v25.8h, v24.8h , v26.8h
+ ext v24.16b, v20.16b , v22.16b , #2
+ ext v26.16b, v20.16b , v22.16b , #8
+ add v24.8h, v24.8h , v26.8h
+
+ saddl v26.4s, v6.4h, v20.4h
+ smlal v26.4s, v25.4h, v28.4h
+ smlsl v26.4s, v24.4h, v30.4h
+
+ saddl2 v22.4s, v6.8h, v20.8h
+ smlal2 v22.4s, v25.8h, v28.8h
+ smlsl2 v22.4s, v24.8h, v30.8h
+
+ mov v6.16b, v2.16b
+ mov v7.16b, v3.16b
+
+ mov v2.16b, v10.16b
+ mov v3.16b, v11.16b
+
+ subs x4, x4, #4
+ sqrshrun v19.4h, v26.4s, #10
+ sqrshrun v25.4h, v22.4s, #10
+ mov v10.16b, v0.16b
+ mov v11.16b, v1.16b
+
+ mov v24.8b, v14.8b
+
+ mov v14.16b, v12.16b
+ mov v15.16b, v13.16b
+
+
+ uqxtn v19.8b, v19.8h
+ uqxtn v25.8b, v25.8h
+ mov v19.2s[1], v25.2s[0]
+
+
+
+ mov v0.16b, v8.16b
+ mov v1.16b, v9.16b
+
+ mov v8.16b, v4.16b
+ mov v9.16b, v5.16b
+
+ mov v12.16b, v16.16b
+ mov v13.16b, v17.16b
+
+ mov v4.16b, v10.16b
+ mov v5.16b, v11.16b
+
+ mov v16.8b, v24.8b
+ st1 {v18.2s, v19.2s}, [x1], x3 // store row 3
+
+ bgt loop_16 // looping if height =16
+ b end_func
+
+loop_8_start:
+ ld1 {v0.2s, v1.2s}, [x0], x2 // Vector load from src[0_0]
+ ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[1_0]
+ ld1 {v4.2s, v5.2s}, [x0], x2 // Vector load from src[2_0]
+ ld1 {v6.2s, v7.2s}, [x0], x2 // Vector load from src[3_0]
+ ld1 {v8.2s, v9.2s}, [x0], x2 // Vector load from src[4_0]
+
+loop_8:
+
+ ld1 {v10.2s, v11.2s}, [x0], x2 // Vector load from src[5_0]
+ uaddl v14.8h, v4.8b, v6.8b
+ uaddl v12.8h, v0.8b, v10.8b
+ uaddl v16.8h, v2.8b, v8.8b
+ mla v12.8h, v14.8h , v26.8h
+ uaddl v18.8h, v5.8b, v7.8b
+ uaddl v14.8h, v1.8b, v11.8b
+ uaddl v22.8h, v3.8b, v9.8b
+ mla v14.8h, v18.8h , v26.8h
+ mls v12.8h, v16.8h , v24.8h
+ ld1 {v0.2s, v1.2s}, [x0], x2 // Vector load from src[6_0]
+ uaddl v16.8h, v6.8b, v8.8b
+ mls v14.8h, v22.8h , v24.8h
+ uaddl v28.8h, v2.8b, v0.8b
+
+ ext v22.16b, v12.16b , v14.16b , #10
+ uaddl v18.8h, v4.8b, v10.8b
+ mla v28.8h, v16.8h , v26.8h
+ saddl v30.4s, v12.4h, v22.4h
+
+ saddl2 v22.4s, v12.8h, v22.8h
+ ext v16.16b, v12.16b , v14.16b , #4
+ mls v28.8h, v18.8h , v24.8h
+ ext v18.16b, v12.16b , v14.16b , #6
+ ext v20.16b, v12.16b , v14.16b , #8
+ ext v14.16b, v12.16b , v14.16b , #2
+ add v16.8h, v16.8h , v18.8h
+ add v18.8h, v14.8h , v20.8h
+ uaddl v20.8h, v7.8b, v9.8b
+ smlal v30.4s, v16.4h, v26.4h
+ smlsl v30.4s, v18.4h, v24.4h
+ smlal2 v22.4s, v16.8h, v26.8h
+ smlsl2 v22.4s, v18.8h, v24.8h
+ uaddl v14.8h, v3.8b, v1.8b
+
+ mla v14.8h, v20.8h , v26.8h
+ sqrshrun v12.4h, v30.4s, #10
+ uaddl v16.8h, v5.8b, v11.8b
+ sqrshrun v13.4h, v22.4s, #10
+ mls v14.8h, v16.8h , v24.8h
+ ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[7_0]
+ uqxtn v25.8b, v12.8h
+ uqxtn v13.8b, v13.8h
+ mov v25.2s[1], v13.2s[0]
+ uaddl v16.8h, v8.8b, v10.8b
+
+
+ ext v22.16b, v28.16b , v14.16b , #10
+ uaddl v20.8h, v4.8b, v2.8b
+ saddl v30.4s, v28.4h, v22.4h
+ mla v20.8h, v16.8h , v26.8h
+
+ saddl2 v22.4s, v28.8h, v22.8h
+ ext v16.16b, v28.16b , v14.16b , #4
+ ext v18.16b, v28.16b , v14.16b , #6
+ ext v12.16b, v28.16b , v14.16b , #8
+ ext v14.16b, v28.16b , v14.16b , #2
+ add v16.8h, v16.8h , v18.8h
+ add v18.8h, v12.8h , v14.8h
+
+ smlal v30.4s, v16.4h, v26.4h
+ smlsl v30.4s, v18.4h, v24.4h
+ smlal2 v22.4s, v16.8h, v26.8h
+ smlsl2 v22.4s, v18.8h, v24.8h
+
+
+ uaddl v18.8h, v6.8b, v0.8b
+ sqrshrun v16.4h, v30.4s, #10
+
+ sqrshrun v17.4h, v22.4s, #10
+
+ mov v12.8b, v25.8b
+ mov v25.8b, v24.8b
+
+ uaddl v28.8h, v9.8b, v11.8b
+ uqxtn v13.8b, v16.8h
+ uqxtn v17.8b, v17.8h
+ mov v13.2s[1], v17.2s[0]
+
+
+ uaddl v14.8h, v5.8b, v3.8b
+ uaddl v22.8h, v7.8b, v1.8b
+ mls v20.8h, v18.8h , v24.8h
+ st1 {v12.2s}, [x1], x3 // store row 0
+ mla v14.8h, v28.8h , v26.8h
+ ld1 {v4.2s, v5.2s}, [x0], x2 // Vector load from src[8_0]
+ uaddl v30.8h, v10.8b, v0.8b
+ uaddl v28.8h, v6.8b, v4.8b
+ mls v14.8h, v22.8h , v24.8h
+ st1 {v13.2s}, [x1], x3 // store row 1
+ mla v28.8h, v30.8h , v26.8h
+
+ ext v22.16b, v20.16b , v14.16b , #10
+ saddl v30.4s, v20.4h, v22.4h
+
+ saddl2 v22.4s, v20.8h, v22.8h
+ ext v16.16b, v20.16b , v14.16b , #4
+ ext v18.16b, v20.16b , v14.16b , #6
+ ext v12.16b, v20.16b , v14.16b , #8
+ ext v14.16b, v20.16b , v14.16b , #2
+ add v16.8h, v16.8h , v18.8h
+ add v18.8h, v14.8h , v12.8h
+ uaddl v20.8h, v8.8b, v2.8b
+ smlal v30.4s, v16.4h, v26.4h
+ smlsl v30.4s, v18.4h, v24.4h
+ smlal2 v22.4s, v16.8h, v26.8h
+ smlsl2 v22.4s, v18.8h, v24.8h
+ uaddl v18.8h, v11.8b, v1.8b
+ uaddl v16.8h, v7.8b, v5.8b
+ sqrshrun v12.4h, v30.4s, #10
+ uaddl v30.8h, v9.8b, v3.8b
+ mla v16.8h, v18.8h , v26.8h
+ sqrshrun v13.4h, v22.4s, #10
+ mls v28.8h, v20.8h , v24.8h
+
+ mls v16.8h, v30.8h , v24.8h
+ uqxtn v27.8b, v12.8h
+ uqxtn v13.8b, v13.8h
+ mov v27.2s[1], v13.2s[0]
+
+
+ ext v22.16b, v28.16b , v16.16b , #10
+
+ saddl v30.4s, v28.4h, v22.4h
+
+ saddl2 v22.4s, v28.8h, v22.8h
+ ext v12.16b, v28.16b , v16.16b , #4
+ ext v18.16b, v28.16b , v16.16b , #6
+ ext v20.16b, v28.16b , v16.16b , #8
+ ext v28.16b, v28.16b , v16.16b , #2
+ add v12.8h, v12.8h , v18.8h
+ add v18.8h, v28.8h , v20.8h
+
+ smlal v30.4s, v12.4h, v26.4h
+ smlsl v30.4s, v18.4h, v24.4h
+ smlal2 v22.4s, v12.8h, v26.8h
+ smlsl2 v22.4s, v18.8h, v24.8h
+
+
+ mov v12.8b, v27.8b
+ mov v27.8b, v26.8b
+
+ sqrshrun v16.4h, v30.4s, #10
+
+ mov v6.16b, v2.16b
+ mov v7.16b, v3.16b
+
+ sqrshrun v17.4h, v22.4s, #10
+
+ mov v2.16b, v10.16b
+ mov v3.16b, v11.16b
+
+ mov v10.16b, v0.16b
+ mov v11.16b, v1.16b
+
+ subs x4, x4, #4
+ uqxtn v13.8b, v16.8h
+ uqxtn v17.8b, v17.8h
+ mov v13.2s[1], v17.2s[0]
+
+
+ mov v0.16b, v8.16b
+ mov v1.16b, v9.16b
+
+ mov v8.16b, v4.16b
+ mov v9.16b, v5.16b
+
+ mov v4.16b, v10.16b
+ mov v5.16b, v11.16b
+
+ st1 {v12.2s}, [x1], x3 // store row 2
+ st1 {v13.2s}, [x1], x3 // store row 3
+
+ bgt loop_8 //if height =8 loop
+ b end_func
+
+loop_4_start:
+ ld1 {v0.2s, v1.2s}, [x0], x2 // Vector load from src[0_0]
+ ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[1_0]
+ ld1 {v4.2s, v5.2s}, [x0], x2 // Vector load from src[2_0]
+ ld1 {v6.2s, v7.2s}, [x0], x2 // Vector load from src[3_0]
+ ld1 {v8.2s, v9.2s}, [x0], x2 // Vector load from src[4_0]
+
+loop_4:
+ ld1 {v10.2s, v11.2s}, [x0], x2 // Vector load from src[5_0]
+ uaddl v14.8h, v4.8b, v6.8b // temp1 = src[2_0] + src[3_0]
+ uaddl v12.8h, v0.8b, v10.8b // temp = src[0_0] + src[5_0]
+ uaddl v16.8h, v2.8b, v8.8b // temp2 = src[1_0] + src[4_0]
+ mla v12.8h, v14.8h , v26.8h // temp += temp1 * 20
+ uaddl v18.8h, v5.8b, v7.8b // temp1 = src[2_0] + src[3_0]
+ uaddl v14.8h, v1.8b, v11.8b // temp = src[0_0] + src[5_0]
+ uaddl v22.8h, v3.8b, v9.8b // temp2 = src[1_0] + src[4_0]
+ mla v14.8h, v18.8h , v26.8h // temp += temp1 * 20
+ mls v12.8h, v16.8h , v24.8h // temp -= temp2 * 5
+ ld1 {v0.2s, v1.2s}, [x0], x2 // Vector load from src[6_0]
+ uaddl v16.8h, v6.8b, v8.8b
+ mls v14.8h, v22.8h , v24.8h // temp -= temp2 * 5
+ //Q6 and Q7 have filtered values
+ uaddl v28.8h, v2.8b, v0.8b
+
+ ext v22.16b, v12.16b , v14.16b , #10
+ uaddl v18.8h, v4.8b, v10.8b
+ mla v28.8h, v16.8h , v26.8h
+ saddl v30.4s, v12.4h, v22.4h
+
+ saddl v22.4s, v13.4h, v23.4h
+ ext v16.16b, v12.16b , v14.16b , #4
+ mls v28.8h, v18.8h , v24.8h
+ ext v18.16b, v12.16b , v14.16b , #6
+ ext v20.16b, v12.16b , v14.16b , #8
+ ext v14.16b, v12.16b , v14.16b , #2
+ add v16.8h, v16.8h , v18.8h
+ add v18.8h, v14.8h , v20.8h
+ uaddl v20.8h, v7.8b, v9.8b
+ smlal v30.4s, v16.4h, v26.4h
+ smlsl v30.4s, v18.4h, v24.4h
+ smlal v22.4s, v17.4h, v26.4h
+ smlsl v22.4s, v19.4h, v24.4h
+ uaddl v14.8h, v3.8b, v1.8b
+
+ mla v14.8h, v20.8h , v26.8h
+ sqrshrun v12.4h, v30.4s, #10
+ uaddl v16.8h, v5.8b, v11.8b
+ sqrshrun v13.4h, v22.4s, #10
+ mls v14.8h, v16.8h , v24.8h
+ ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[7_0]
+ uqxtn v25.8b, v12.8h
+ uaddl v16.8h, v8.8b, v10.8b
+
+ ext v22.16b, v28.16b , v14.16b , #10
+ uaddl v20.8h, v4.8b, v2.8b
+ saddl v30.4s, v28.4h, v22.4h
+ mla v20.8h, v16.8h , v26.8h
+
+ saddl v22.4s, v29.4h, v23.4h
+ ext v16.16b, v28.16b , v14.16b , #4
+ ext v18.16b, v28.16b , v14.16b , #6
+ ext v12.16b, v28.16b , v14.16b , #8
+ ext v14.16b, v28.16b , v14.16b , #2
+ add v16.8h, v16.8h , v18.8h
+ add v18.8h, v12.8h , v14.8h
+
+ smlal v30.4s, v16.4h, v26.4h
+ smlsl v30.4s, v18.4h, v24.4h
+ smlal v22.4s, v17.4h, v26.4h
+ smlsl v22.4s, v19.4h, v24.4h
+
+
+ uaddl v18.8h, v6.8b, v0.8b
+ sqrshrun v16.4h, v30.4s, #10
+
+ sqrshrun v17.4h, v22.4s, #10
+
+ mov v12.8b, v25.8b
+ mov v25.8b, v24.8b
+
+ uaddl v28.8h, v9.8b, v11.8b
+ uqxtn v13.8b, v16.8h
+
+
+
+ uaddl v14.8h, v5.8b, v3.8b
+ uaddl v22.8h, v7.8b, v1.8b
+ mls v20.8h, v18.8h , v24.8h
+ st1 {v12.s}[0], [x1], x3 // store row 0
+ mla v14.8h, v28.8h , v26.8h
+ ld1 {v4.2s, v5.2s}, [x0], x2 // Vector load from src[8_0]
+ uaddl v30.8h, v10.8b, v0.8b
+ uaddl v28.8h, v6.8b, v4.8b
+ mls v14.8h, v22.8h , v24.8h
+ st1 {v13.s}[0], [x1], x3 //store row 1
+ mla v28.8h, v30.8h , v26.8h
+
+ ext v22.16b, v20.16b , v14.16b , #10
+ saddl v30.4s, v20.4h, v22.4h
+
+ saddl v22.4s, v21.4h, v23.4h
+ ext v16.16b, v20.16b , v14.16b , #4
+ ext v18.16b, v20.16b , v14.16b , #6
+ ext v12.16b, v20.16b , v14.16b , #8
+ ext v14.16b, v20.16b , v14.16b , #2
+ add v16.8h, v16.8h , v18.8h
+ add v18.8h, v14.8h , v12.8h
+ uaddl v20.8h, v8.8b, v2.8b
+ smlal v30.4s, v16.4h, v26.4h
+ smlsl v30.4s, v18.4h, v24.4h
+ smlal v22.4s, v17.4h, v26.4h
+ smlsl v22.4s, v19.4h, v24.4h
+ uaddl v18.8h, v11.8b, v1.8b
+ uaddl v16.8h, v7.8b, v5.8b
+ sqrshrun v12.4h, v30.4s, #10
+ uaddl v30.8h, v9.8b, v3.8b
+ mla v16.8h, v18.8h , v26.8h
+ sqrshrun v13.4h, v22.4s, #10
+ mls v28.8h, v20.8h , v24.8h
+
+ mls v16.8h, v30.8h , v24.8h
+ uqxtn v27.8b, v12.8h
+
+ ext v22.16b, v28.16b , v16.16b , #10
+
+ saddl v30.4s, v28.4h, v22.4h
+
+ saddl v22.4s, v29.4h, v23.4h
+ ext v12.16b, v28.16b , v16.16b , #4
+ ext v18.16b, v28.16b , v16.16b , #6
+ ext v20.16b, v28.16b , v16.16b , #8
+ ext v28.16b, v28.16b , v16.16b , #2
+ add v12.8h, v12.8h , v18.8h
+ add v18.8h, v28.8h , v20.8h
+
+ smlal v30.4s, v12.4h, v26.4h
+ smlsl v30.4s, v18.4h, v24.4h
+ smlal v22.4s, v13.4h, v26.4h
+ smlsl v22.4s, v19.4h, v24.4h
+
+
+ mov v12.8b, v27.8b
+ mov v27.8b, v26.8b
+
+ sqrshrun v16.4h, v30.4s, #10
+
+ mov v6.16b, v2.16b
+ mov v7.16b, v3.16b
+
+ sqrshrun v17.4h, v22.4s, #10
+
+ mov v2.16b, v10.16b
+ mov v3.16b, v11.16b
+
+ mov v10.16b, v0.16b
+ mov v11.16b, v1.16b
+
+ subs x4, x4, #4
+ uqxtn v13.8b, v16.8h
+
+ mov v0.16b, v8.16b
+ mov v1.16b, v9.16b
+
+ mov v8.16b, v4.16b
+ mov v9.16b, v5.16b
+
+
+ mov v4.16b, v10.16b
+ mov v5.16b, v11.16b
+
+
+ st1 {v12.s}[0], [x1], x3 // store row 2
+ st1 {v13.s}[0], [x1], x3 // store row 3
+
+ bgt loop_4
+
+end_func:
+ //Restoring registers from stack
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
diff --git a/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s
new file mode 100755
index 0000000..3737e3f
--- /dev/null
+++ b/common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s
@@ -0,0 +1,1120 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+//******************************************************************************
+//* @file
+//* ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s
+//*
+//* @brief
+//* Contains function definitions for inter prediction interpolation.
+//*
+//* @author
+//* Mohit
+//*
+//* @par List of Functions:
+//*
+//* - ih264_inter_pred_luma_horz_hpel_vert_qpel_av8()
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+///* All the functions here are replicated from ih264_inter_pred_filters.c
+//
+
+///**
+///**
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* This function implements a two stage cascaded six tap filter. It
+//* applies the six tap filter in the horizontal direction on the
+//* predictor values, followed by applying the same filter in the
+//* vertical direction on the output of the first stage. It then averages
+//* the output of the 1st stage and the output of the 2nd stage to obtain
+//* the quarter pel values. The six tap filtering operation is described
+//* in sec 8.4.2.2.1 titled "Luma sample interpolation process".
+//*
+//* @par Description:
+//* This function is called to obtain pixels lying at the following
+//* location (1/2,1/4) or (1/2,3/4). The function interpolates
+//* the predictors first in the horizontal direction and then in the
+//* vertical direction to output the (1/2,1/2). It then averages
+//* the output of the 2nd stage and (1/2,1/2) value to obtain (1/2,1/4)
+//* or (1/2,3/4) depending on the offset.
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ht
+//* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+//* @param[in] pu1_tmp: temporary buffer
+//*
+//* @param[in] dydx: x and y reference offset for qpel calculations
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/;
+
+//void ih264_inter_pred_luma_horz_hpel_vert_qpel(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,,
+// WORD32 dst_strd,
+// WORD32 ht,
+// WORD32 wd,
+// UWORD8* pu1_tmp,
+// UWORD32 dydx)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ht
+// x5 => wd
+// x7 => dydx
+// x9 => *pu1_tmp
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+
+
+ .global ih264_inter_pred_luma_horz_hpel_vert_qpel_av8
+
+ih264_inter_pred_luma_horz_hpel_vert_qpel_av8:
+
+
+ // store register values to stack
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+
+
+ sub x0, x0, x2, lsl #1 // pu1_src-2*src_strd
+ sub x0, x0, #2 // pu1_src-2
+
+ mov x9, x6
+
+ lsr x7, x7, #3 // dydx >> 2 followed by dydx & 0x3 and dydx>>1 to obtain the deciding bit
+
+ add x7, x7, #2
+ mov x6, #48
+ madd x7, x7, x6, x9
+
+ subs x12, x5, #4 //if wd=4 branch to loop_4
+ beq loop_4_start
+
+ subs x12, x5, #8 //if wd=8 branch to loop_8
+ beq loop_8_start
+
+ //when wd=16
+ movi v22.8h, #20 // Filter coeff 0x14 into Q11
+ movi v24.8h, #5 // Filter coeff 0x5 into Q12
+ add x8, x0, #8
+ add x14, x1, #8
+ add x10, x9, #8
+ mov x12, x4
+ add x11, x7, #8
+loop_16_lowhalf_start:
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row -2 load for horizontal filter
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v6.8h, v0.8b, v5.8b
+
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v8.8h, v2.8b, v3.8b
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v6.8h, v8.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v8.8h, v1.8b, v4.8b
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row -1 load for horizontal filter
+ mls v6.8h, v8.8h , v24.8h
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v8.8h, v0.8b, v5.8b
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v10.8h, v2.8b, v3.8b
+
+ st1 {v6.4s}, [x9], x6 // store temp buffer 0
+
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v8.8h, v10.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v10.8h, v1.8b, v4.8b
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row 0 load for horizontal filter
+ mls v8.8h, v10.8h , v24.8h
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v10.8h, v0.8b, v5.8b
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v12.8h, v2.8b, v3.8b
+
+ st1 {v8.4s}, [x9], x6 // store temp buffer 1
+
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v10.8h, v12.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v12.8h, v1.8b, v4.8b
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row 1 load for horizontal filter
+ mls v10.8h, v12.8h , v24.8h
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v12.8h, v0.8b, v5.8b
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v14.8h, v2.8b, v3.8b
+
+ st1 {v10.4s}, [x9], x6 // store temp buffer 2
+
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v12.8h, v14.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v14.8h, v1.8b, v4.8b
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row 2 load for horizontal filter
+ mls v12.8h, v14.8h , v24.8h
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v14.8h, v0.8b, v5.8b
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v16.8h, v2.8b, v3.8b
+
+ st1 {v12.4s}, [x9], x6 // store temp buffer 3
+
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v14.8h, v16.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v16.8h, v1.8b, v4.8b
+
+ mls v14.8h, v16.8h , v24.8h
+loop_16_lowhalf:
+
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row 3 load for horizontal filter
+ ext v5.8b, v0.8b , v1.8b , #5
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v16.8h, v0.8b, v5.8b
+
+ st1 {v14.4s}, [x9], x6 // store temp buffer 4
+
+ uaddl v18.8h, v2.8b, v3.8b
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v16.8h, v18.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ add v28.8h, v8.8h , v14.8h
+ uaddl v18.8h, v1.8b, v4.8b
+ add v30.8h, v10.8h , v12.8h
+ mls v16.8h, v18.8h , v24.8h
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row 4 load for hoorizontal filter
+ ext v5.8b, v0.8b , v1.8b , #5
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v20.8h, v0.8b, v5.8b
+
+ st1 {v16.4s}, [x9], x6 // store temp buffer x5
+
+ saddl v18.4s, v6.4h, v16.4h
+
+ ld1 {v26.4s}, [x7], x6 // load from temp buffer 0
+
+ saddl2 v6.4s, v6.8h, v16.8h
+
+ sqrshrun v26.8b, v26.8h, #5
+
+ smlal v18.4s, v30.4h, v22.4h
+ smlsl v18.4s, v28.4h, v24.4h
+ smlal2 v6.4s, v30.8h, v22.8h
+ smlsl2 v6.4s, v28.8h, v24.8h
+ uaddl v2.8h, v2.8b, v3.8b
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v20.8h, v2.8h , v22.8h
+ sqrshrun v18.4h, v18.4s, #10
+ ext v1.8b, v0.8b , v1.8b , #1
+ sqrshrun v19.4h, v6.4s, #10
+ add v28.8h, v10.8h , v16.8h
+ uaddl v2.8h, v1.8b, v4.8b
+ add v30.8h, v12.8h , v14.8h
+ mls v20.8h, v2.8h , v24.8h
+
+ uqxtn v18.8b, v18.8h
+ uqxtn v19.8b, v19.8h
+ mov v18.2s[1], v19.2s[0]
+
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row 5 load for horizontal filter
+
+ urhadd v26.8b, v18.8b , v26.8b
+
+ ext v5.8b, v0.8b , v1.8b , #5
+ ext v2.8b, v0.8b , v1.8b , #2
+
+ st1 {v20.4s}, [x9], x6 // store temp buffer x6
+
+ saddl v18.4s, v8.4h, v20.4h
+
+ saddl2 v6.4s, v8.8h, v20.8h
+
+ ld1 {v8.4s}, [x7], x6 //load from temp buffer 1
+
+
+ st1 {v26.2s}, [x1], x3 // store row 0
+
+ smlal v18.4s, v30.4h, v22.4h
+ smlsl v18.4s, v28.4h, v24.4h
+ smlal2 v6.4s, v30.8h, v22.8h
+ smlsl2 v6.4s, v28.8h, v24.8h
+
+ sqrshrun v28.8b, v8.8h, #5
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v8.8h, v0.8b, v5.8b
+ uaddl v2.8h, v2.8b, v3.8b
+ sqrshrun v18.4h, v18.4s, #10
+ ext v4.8b, v0.8b , v1.8b , #4
+ sqrshrun v19.4h, v6.4s, #10
+ mla v8.8h, v2.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ add v26.8h, v12.8h , v20.8h
+ uaddl v2.8h, v1.8b, v4.8b
+ uqxtn v18.8b, v18.8h
+ uqxtn v19.8b, v19.8h
+ mov v18.2s[1], v19.2s[0]
+ add v30.8h, v14.8h , v16.8h
+ mls v8.8h, v2.8h , v24.8h
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row 6 load for horizontal filter
+
+ urhadd v28.8b, v28.8b , v18.8b
+
+ ext v5.8b, v0.8b , v1.8b , #5
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+
+ st1 {v28.2s}, [x1], x3 // store row 1
+
+ uaddl v28.8h, v0.8b, v5.8b
+
+ st1 {v8.4s}, [x9], x6 // store temp buffer x7
+
+ saddl v18.4s, v10.4h, v8.4h
+ saddl2 v6.4s, v10.8h, v8.8h
+
+ ld1 {v10.4s}, [x7], x6 // load from temp buffer 2
+
+ smlal v18.4s, v30.4h, v22.4h
+ smlsl v18.4s, v26.4h, v24.4h
+
+ smlal2 v6.4s, v30.8h, v22.8h
+ smlsl2 v6.4s, v26.8h, v24.8h
+
+ sqrshrun v26.8b, v10.8h, #5
+
+ uaddl v2.8h, v2.8b, v3.8b
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v28.8h, v2.8h , v22.8h
+ sqrshrun v18.4h, v18.4s, #10
+ ext v1.8b, v0.8b , v1.8b , #1
+ sqrshrun v19.4h, v6.4s, #10
+ add v10.8h, v14.8h , v8.8h
+ uaddl v2.8h, v1.8b, v4.8b
+ add v30.8h, v16.8h , v20.8h
+ mls v28.8h, v2.8h , v24.8h
+ uqxtn v27.8b, v18.8h
+ uqxtn v19.8b, v19.8h
+ mov v27.2s[1], v19.2s[0]
+ saddl v18.4s, v12.4h, v28.4h
+ saddl2 v6.4s, v12.8h, v28.8h
+
+ urhadd v26.8b, v26.8b , v27.8b
+
+ smlal v18.4s, v30.4h, v22.4h
+ smlsl v18.4s, v10.4h, v24.4h
+ smlal2 v6.4s, v30.8h, v22.8h
+ smlsl2 v6.4s, v10.8h, v24.8h
+
+ st1 {v26.2s}, [x1], x3 // store row 2
+
+ st1 {v28.2s, v29.2s}, [x9]
+
+
+ sqrshrun v18.4h, v18.4s, #10
+
+ mov v10.16b, v20.16b
+ mov v11.16b, v21.16b
+ ld1 {v30.4s}, [x7], x6 // load from temp buffer 3
+
+ sqrshrun v19.4h, v6.4s, #10
+ subs x4, x4, #4
+
+ sqrshrun v30.8b, v30.8h, #5
+
+ uqxtn v18.8b, v18.8h
+ uqxtn v19.8b, v19.8h
+ mov v18.2s[1], v19.2s[0]
+
+ mov v12.16b, v8.16b
+ mov v13.16b, v9.16b
+ mov v6.16b, v14.16b
+ mov v7.16b, v15.16b
+
+ urhadd v30.8b, v18.8b , v30.8b
+
+ mov v8.16b, v16.16b
+ mov v9.16b, v17.16b
+ mov v14.16b, v28.16b
+ mov v15.16b, v29.16b
+
+ st1 {v30.2s}, [x1], x3 // store row 3
+
+ bgt loop_16_lowhalf // looping if height =16
+
+
+loop_16_highhalf_start:
+ ld1 {v0.2s, v1.2s}, [x8], x2
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v6.8h, v0.8b, v5.8b
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v8.8h, v2.8b, v3.8b
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v6.8h, v8.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v8.8h, v1.8b, v4.8b
+ ld1 {v0.2s, v1.2s}, [x8], x2
+ mls v6.8h, v8.8h , v24.8h
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v8.8h, v0.8b, v5.8b
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v10.8h, v2.8b, v3.8b
+
+ st1 {v6.4s}, [x10], x6
+
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v8.8h, v10.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v10.8h, v1.8b, v4.8b
+ ld1 {v0.2s, v1.2s}, [x8], x2
+ mls v8.8h, v10.8h , v24.8h
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v10.8h, v0.8b, v5.8b
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v12.8h, v2.8b, v3.8b
+
+ st1 {v8.4s}, [x10], x6
+
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v10.8h, v12.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v12.8h, v1.8b, v4.8b
+ ld1 {v0.2s, v1.2s}, [x8], x2
+ mls v10.8h, v12.8h , v24.8h
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v12.8h, v0.8b, v5.8b
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v14.8h, v2.8b, v3.8b
+
+ st1 {v10.4s}, [x10], x6
+
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v12.8h, v14.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v14.8h, v1.8b, v4.8b
+ ld1 {v0.2s, v1.2s}, [x8], x2
+ mls v12.8h, v14.8h , v24.8h
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v14.8h, v0.8b, v5.8b
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v16.8h, v2.8b, v3.8b
+
+ st1 {v12.4s}, [x10], x6
+
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v14.8h, v16.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v16.8h, v1.8b, v4.8b
+
+ mls v14.8h, v16.8h , v24.8h
+
+loop_16_highhalf:
+
+ ld1 {v0.2s, v1.2s}, [x8], x2
+ ext v5.8b, v0.8b , v1.8b , #5
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v16.8h, v0.8b, v5.8b
+
+ st1 {v14.4s}, [x10], x6
+
+ uaddl v18.8h, v2.8b, v3.8b
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v16.8h, v18.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ add v28.8h, v8.8h , v14.8h
+ uaddl v18.8h, v1.8b, v4.8b
+ add v30.8h, v10.8h , v12.8h
+ mls v16.8h, v18.8h , v24.8h
+ ld1 {v0.2s, v1.2s}, [x8], x2
+ ext v5.8b, v0.8b , v1.8b , #5
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v20.8h, v0.8b, v5.8b
+
+ st1 {v16.4s}, [x10], x6
+
+ saddl v18.4s, v6.4h, v16.4h
+
+ ld1 {v26.4s}, [x11], x6
+
+ saddl2 v6.4s, v6.8h, v16.8h
+
+ sqrshrun v26.8b, v26.8h, #5
+
+ smlal v18.4s, v30.4h, v22.4h
+ smlsl v18.4s, v28.4h, v24.4h
+ smlal2 v6.4s, v30.8h, v22.8h
+ smlsl2 v6.4s, v28.8h, v24.8h
+ uaddl v2.8h, v2.8b, v3.8b
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v20.8h, v2.8h , v22.8h
+ sqrshrun v18.4h, v18.4s, #10
+ ext v1.8b, v0.8b , v1.8b , #1
+ sqrshrun v19.4h, v6.4s, #10
+ add v28.8h, v10.8h , v16.8h
+ uaddl v2.8h, v1.8b, v4.8b
+ add v30.8h, v12.8h , v14.8h
+ mls v20.8h, v2.8h , v24.8h
+ uqxtn v18.8b, v18.8h
+ uqxtn v19.8b, v19.8h
+ mov v18.2s[1], v19.2s[0]
+ ld1 {v0.2s, v1.2s}, [x8], x2
+
+ urhadd v26.8b, v18.8b , v26.8b
+
+ ext v5.8b, v0.8b , v1.8b , #5
+ ext v2.8b, v0.8b , v1.8b , #2
+
+ st1 {v20.4s}, [x10], x6
+
+ saddl v18.4s, v8.4h, v20.4h
+ saddl2 v6.4s, v8.8h, v20.8h
+
+ ld1 {v8.4s}, [x11], x6
+
+
+ st1 {v26.2s}, [x14], x3 //store row 0
+
+ smlal v18.4s, v30.4h, v22.4h
+ smlsl v18.4s, v28.4h, v24.4h
+ smlal2 v6.4s, v30.8h, v22.8h
+ smlsl2 v6.4s, v28.8h, v24.8h
+ sqrshrun v28.8b, v8.8h, #5
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v8.8h, v0.8b, v5.8b
+ uaddl v2.8h, v2.8b, v3.8b
+ sqrshrun v18.4h, v18.4s, #10
+ ext v4.8b, v0.8b , v1.8b , #4
+ sqrshrun v19.4h, v6.4s, #10
+ mla v8.8h, v2.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ add v26.8h, v12.8h , v20.8h
+ uaddl v2.8h, v1.8b, v4.8b
+ uqxtn v18.8b, v18.8h
+ uqxtn v19.8b, v19.8h
+ mov v18.2s[1], v19.2s[0]
+ add v30.8h, v14.8h , v16.8h
+ mls v8.8h, v2.8h , v24.8h
+ ld1 {v0.2s, v1.2s}, [x8], x2
+
+ urhadd v28.8b, v28.8b , v18.8b
+
+ ext v5.8b, v0.8b , v1.8b , #5
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+
+ st1 {v28.2s}, [x14], x3 //store row 1
+
+ uaddl v28.8h, v0.8b, v5.8b
+
+ st1 {v8.4s}, [x10], x6
+
+ saddl v18.4s, v10.4h, v8.4h
+ saddl2 v6.4s, v10.8h, v8.8h
+
+ ld1 {v10.4s}, [x11], x6
+
+ smlal v18.4s, v30.4h, v22.4h
+ smlsl v18.4s, v26.4h, v24.4h
+ smlal2 v6.4s, v30.8h, v22.8h
+ smlsl2 v6.4s, v26.8h, v24.8h
+
+ sqrshrun v26.8b, v10.8h, #5
+ uaddl v2.8h, v2.8b, v3.8b
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v28.8h, v2.8h , v22.8h
+ sqrshrun v18.4h, v18.4s, #10
+ ext v1.8b, v0.8b , v1.8b , #1
+ sqrshrun v19.4h, v6.4s, #10
+ add v10.8h, v14.8h , v8.8h
+ uaddl v2.8h, v1.8b, v4.8b
+ add v30.8h, v16.8h , v20.8h
+ mls v28.8h, v2.8h , v24.8h
+ uqxtn v27.8b, v18.8h
+ uqxtn v19.8b, v19.8h
+ mov v27.2s[1], v19.2s[0]
+
+
+ saddl v18.4s, v12.4h, v28.4h
+ saddl2 v6.4s, v12.8h, v28.8h
+
+ urhadd v26.8b, v26.8b , v27.8b
+
+ smlal v18.4s, v30.4h, v22.4h
+ smlsl v18.4s, v10.4h, v24.4h
+ smlal2 v6.4s, v30.8h, v22.8h
+ smlsl2 v6.4s, v10.8h, v24.8h
+
+ st1 {v26.2s}, [x14], x3 // store row 2
+
+ st1 {v28.4s}, [x10]
+
+ sqrshrun v18.4h, v18.4s, #10
+ mov v10.16b, v20.16b
+ mov v11.16b, v21.16b
+ ld1 {v30.4s}, [x11], x6
+
+ sqrshrun v19.4h, v6.4s, #10
+ subs x12, x12, #4
+
+ sqrshrun v30.8b, v30.8h, #5
+
+ uqxtn v18.8b, v18.8h
+ uqxtn v19.8b, v19.8h
+ mov v18.2s[1], v19.2s[0]
+
+ mov v12.16b, v8.16b
+ mov v13.16b, v9.16b
+ mov v6.16b, v14.16b
+ mov v7.16b, v15.16b
+ urhadd v30.8b, v18.8b , v30.8b
+
+ mov v8.16b, v16.16b
+ mov v9.16b, v17.16b
+ mov v14.16b, v28.16b
+ mov v15.16b, v29.16b
+ st1 {v30.2s}, [x14], x3 // store row 3
+
+ bgt loop_16_highhalf // looping if height = 8 or 16
+ b end_func
+
+loop_8_start:
+
+ movi v22.8h, #0x14 // Filter coeff 20 into Q11
+ movi v24.8h, #5 // Filter coeff 5 into Q12
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row -2 load for horizontal filter
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v6.8h, v0.8b, v5.8b
+
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v8.8h, v2.8b, v3.8b
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v6.8h, v8.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v8.8h, v1.8b, v4.8b
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row -1 load for horizontal filter
+ mls v6.8h, v8.8h , v24.8h
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v8.8h, v0.8b, v5.8b
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v10.8h, v2.8b, v3.8b
+
+ st1 {v6.4s}, [x9], x6 // store temp buffer 0
+
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v8.8h, v10.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v10.8h, v1.8b, v4.8b
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row 0 load for horizontal filter
+ mls v8.8h, v10.8h , v24.8h
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v10.8h, v0.8b, v5.8b
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v12.8h, v2.8b, v3.8b
+
+ st1 {v8.4s}, [x9], x6 // store temp buffer 1
+
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v10.8h, v12.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v12.8h, v1.8b, v4.8b
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row 1 load for horizontal filter
+ mls v10.8h, v12.8h , v24.8h
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v12.8h, v0.8b, v5.8b
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v14.8h, v2.8b, v3.8b
+
+ st1 {v10.4s}, [x9], x6 // store temp buffer 2
+
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v12.8h, v14.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v14.8h, v1.8b, v4.8b
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row 2 load for horizontal filter
+ mls v12.8h, v14.8h , v24.8h
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v14.8h, v0.8b, v5.8b
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v16.8h, v2.8b, v3.8b
+
+ st1 {v12.4s}, [x9], x6 // store temp buffer 3
+
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v14.8h, v16.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v16.8h, v1.8b, v4.8b
+
+ mls v14.8h, v16.8h , v24.8h
+loop_8:
+
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row 3 load for horizontal filter
+ ext v5.8b, v0.8b , v1.8b , #5
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v16.8h, v0.8b, v5.8b
+
+ st1 {v14.4s}, [x9], x6 // store temp buffer 4
+
+ uaddl v18.8h, v2.8b, v3.8b
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v16.8h, v18.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ add v28.8h, v8.8h , v14.8h
+ uaddl v18.8h, v1.8b, v4.8b
+ add v30.8h, v10.8h , v12.8h
+ mls v16.8h, v18.8h , v24.8h
+ ld1 {v0.2s, v1.2s} , [x0], x2 // row 4 load for hoorizontal filter
+ ext v5.8b, v0.8b , v1.8b , #5
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v20.8h, v0.8b, v5.8b
+
+ st1 {v16.4s}, [x9], x6 // store temp buffer x5
+
+ saddl v18.4s, v6.4h, v16.4h
+
+ ld1 {v26.4s}, [x7], x6 // load from temp buffer 0
+
+ saddl2 v6.4s, v6.8h, v16.8h
+
+ sqrshrun v26.8b, v26.8h, #5
+
+ smlal v18.4s, v30.4h, v22.4h
+ smlsl v18.4s, v28.4h, v24.4h
+ smlal2 v6.4s, v30.8h, v22.8h
+ smlsl2 v6.4s, v28.8h, v24.8h
+ uaddl v2.8h, v2.8b, v3.8b
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v20.8h, v2.8h , v22.8h
+ sqrshrun v18.4h, v18.4s, #10
+ ext v1.8b, v0.8b , v1.8b , #1
+ sqrshrun v19.4h, v6.4s, #10
+ add v28.8h, v10.8h , v16.8h
+ uaddl v2.8h, v1.8b, v4.8b
+ add v30.8h, v12.8h , v14.8h
+ mls v20.8h, v2.8h , v24.8h
+
+ uqxtn v18.8b, v18.8h
+ uqxtn v19.8b, v19.8h
+ mov v18.2s[1], v19.2s[0]
+
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row 5 load for horizontal filter
+
+ urhadd v26.8b, v18.8b , v26.8b
+
+ ext v5.8b, v0.8b , v1.8b , #5
+ ext v2.8b, v0.8b , v1.8b , #2
+
+ st1 {v20.4s}, [x9], x6 // store temp buffer x6
+
+ saddl v18.4s, v8.4h, v20.4h
+
+ saddl2 v6.4s, v8.8h, v20.8h
+
+ ld1 {v8.4s}, [x7], x6 //load from temp buffer 1
+
+
+ st1 {v26.2s}, [x1], x3 // store row 0
+
+ smlal v18.4s, v30.4h, v22.4h
+ smlsl v18.4s, v28.4h, v24.4h
+
+
+
+ smlal2 v6.4s, v30.8h, v22.8h
+ smlsl2 v6.4s, v28.8h, v24.8h
+
+ sqrshrun v28.8b, v8.8h, #5
+
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v8.8h, v0.8b, v5.8b
+ uaddl v2.8h, v2.8b, v3.8b
+ sqrshrun v18.4h, v18.4s, #10
+ ext v4.8b, v0.8b , v1.8b , #4
+ sqrshrun v19.4h, v6.4s, #10
+ mla v8.8h, v2.8h , v22.8h
+ ext v1.8b, v0.8b , v1.8b , #1
+ add v26.8h, v12.8h , v20.8h
+ uaddl v2.8h, v1.8b, v4.8b
+
+
+ uqxtn v18.8b, v18.8h
+ uqxtn v19.8b, v19.8h
+ mov v18.2s[1], v19.2s[0]
+
+ add v30.8h, v14.8h , v16.8h
+ mls v8.8h, v2.8h , v24.8h
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row 6 load for horizontal filter
+
+ urhadd v28.8b, v28.8b , v18.8b
+
+ ext v5.8b, v0.8b , v1.8b , #5
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+
+ st1 {v28.2s}, [x1], x3 // store row 1
+
+ uaddl v28.8h, v0.8b, v5.8b
+
+ st1 {v8.4s}, [x9], x6 // store temp buffer x7
+
+ saddl v18.4s, v10.4h, v8.4h
+ saddl2 v6.4s, v10.8h, v8.8h
+
+ ld1 {v10.4s}, [x7], x6 // load from temp buffer 2
+
+ smlal v18.4s, v30.4h, v22.4h
+ smlsl v18.4s, v26.4h, v24.4h
+ smlal2 v6.4s, v30.8h, v22.8h
+ smlsl2 v6.4s, v26.8h, v24.8h
+
+ sqrshrun v26.8b, v10.8h, #5
+ uaddl v2.8h, v2.8b, v3.8b
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v28.8h, v2.8h , v22.8h
+ sqrshrun v18.4h, v18.4s, #10
+ ext v1.8b, v0.8b , v1.8b , #1
+ sqrshrun v19.4h, v6.4s, #10
+ add v10.8h, v14.8h , v8.8h
+ uaddl v2.8h, v1.8b, v4.8b
+ add v30.8h, v16.8h , v20.8h
+ mls v28.8h, v2.8h , v24.8h
+
+ uqxtn v27.8b, v18.8h
+ uqxtn v19.8b, v19.8h
+
+ mov v27.2s[1], v19.2s[0]
+
+ saddl v18.4s, v12.4h, v28.4h
+ saddl2 v6.4s, v12.8h, v28.8h
+
+ urhadd v26.8b, v26.8b , v27.8b
+
+ smlal v18.4s, v30.4h, v22.4h
+ smlsl v18.4s, v10.4h, v24.4h
+ smlal2 v6.4s, v30.8h, v22.8h
+ smlsl2 v6.4s, v10.8h, v24.8h
+
+ st1 {v26.2s}, [x1], x3 // store row 2
+
+ st1 {v28.2s, v29.2s}, [x9]
+
+
+ sqrshrun v18.4h, v18.4s, #10
+ mov v10.16b, v20.16b
+ mov v11.16b, v21.16b
+ ld1 {v30.4s}, [x7], x6 // load from temp buffer 3
+
+ sqrshrun v19.4h, v6.4s, #10
+ subs x4, x4, #4
+
+ sqrshrun v30.8b, v30.8h, #5
+
+
+ uqxtn v18.8b, v18.8h
+ uqxtn v19.8b, v19.8h
+ mov v18.2s[1], v19.2s[0]
+
+
+ mov v12.16b, v8.16b
+ mov v13.16b, v9.16b
+ mov v6.16b, v14.16b
+ mov v7.16b, v15.16b
+
+ urhadd v30.8b, v18.8b , v30.8b
+ mov v8.16b, v16.16b
+ mov v9.16b, v17.16b
+ mov v14.16b, v28.16b
+ mov v15.16b, v29.16b
+ st1 {v30.2s}, [x1], x3 // store row 3
+
+ bgt loop_8 //if height =8 or 16 loop
+ b end_func
+
+loop_4_start:
+ movi v22.8h, #20 // Filter coeff 20 into D22
+ movi v23.8h, #5 // Filter coeff 5 into D23
+
+ ld1 {v0.2s, v1.2s}, [x0], x2 //row -2 load
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v6.8h, v0.8b, v5.8b
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v8.8h, v2.8b, v3.8b
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v6.4h, v8.4h , v22.4h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v8.8h, v1.8b, v4.8b
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row -1 load
+ mls v6.4h, v8.4h , v23.4h
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v8.8h, v0.8b, v5.8b
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v10.8h, v2.8b, v3.8b
+
+ st1 {v6.2s}, [x9], x6 // store temp buffer 0
+
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v8.4h, v10.4h , v22.4h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v10.8h, v1.8b, v4.8b
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row 0 load
+ mls v8.4h, v10.4h , v23.4h
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v10.8h, v0.8b, v5.8b
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v12.8h, v2.8b, v3.8b
+
+ st1 {v8.2s}, [x9], x6 // store temp buffer 1
+
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v10.4h, v12.4h , v22.4h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v12.8h, v1.8b, v4.8b
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row 1 load
+ mls v10.4h, v12.4h , v23.4h
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v12.8h, v0.8b, v5.8b
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v14.8h, v2.8b, v3.8b
+
+ st1 {v10.2s}, [x9], x6 // store temp buffer 2
+
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v12.4h, v14.4h , v22.4h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v14.8h, v1.8b, v4.8b
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row 2 load
+ mls v12.4h, v14.4h , v23.4h
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v14.8h, v0.8b, v5.8b
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v16.8h, v2.8b, v3.8b
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v14.4h, v16.4h , v22.4h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v16.8h, v1.8b, v4.8b
+
+ st1 {v12.2s}, [x9], x6 // store temp buffer 3
+
+ mls v14.4h, v16.4h , v23.4h
+
+loop_4:
+
+ ld1 {v0.2s, v1.2s}, [x0], x2 // row 3 load
+ ext v5.8b, v0.8b , v1.8b , #5
+ uaddl v16.8h, v0.8b, v5.8b
+ ext v2.8b, v0.8b , v1.8b , #2
+ ext v3.8b, v0.8b , v1.8b , #3
+ uaddl v18.8h, v2.8b, v3.8b
+ st1 {v14.2s}, [x9], x6 // store temp buffer 4
+ ext v4.8b, v0.8b , v1.8b , #4
+ mla v16.4h, v18.4h , v22.4h
+ ext v1.8b, v0.8b , v1.8b , #1
+ uaddl v18.8h, v1.8b, v4.8b
+ add v2.4h, v10.4h , v12.4h
+ mls v16.4h, v18.4h , v23.4h
+ add v3.4h, v8.4h , v14.4h
+ ld1 {v18.2s, v19.2s}, [x0], x2 // row 4 load
+ ext v25.8b, v18.8b , v19.8b , #5
+ uaddl v26.8h, v18.8b, v25.8b
+ ext v20.8b, v18.8b , v19.8b , #2
+
+ st1 {v16.2s}, [x9], x6 // store temp buffer 5
+
+ saddl v0.4s, v6.4h, v16.4h
+ smlal v0.4s, v2.4h, v22.4h
+ ext v21.8b, v18.8b , v19.8b , #3
+ uaddl v28.8h, v20.8b, v21.8b
+ ext v24.8b, v18.8b , v19.8b , #4
+ smlsl v0.4s, v3.4h, v23.4h
+ mla v26.4h, v28.4h , v22.4h
+ ext v19.8b, v18.8b , v19.8b , #1
+ uaddl v28.8h, v19.8b, v24.8b
+ add v2.4h, v12.4h , v14.4h
+ mls v26.4h, v28.4h , v23.4h
+ sqrshrun v0.4h, v0.4s, #0xa
+ add v3.4h, v10.4h , v16.4h
+ ld1 {v18.2s, v19.2s}, [x0], x2 // row 5 load
+ ext v25.8b, v18.8b , v19.8b , #5
+ uqxtn v11.8b, v0.8h
+ uaddl v28.8h, v18.8b, v25.8b
+
+ st1 {v26.2s}, [x9], x6 // store temp buffer 6
+
+ //Q3 available here
+ ld1 {v6.2s}, [x7], x6 // load from temp buffer 0
+ ld1 {v7.2s}, [x7], x6 // load from temp buffer 1
+
+ sqrshrun v9.8b, v6.8h, #5
+ sqrshrun v7.8b, v7.8h, #5
+ mov v9.2s[1], v7.2s[0]
+
+ ext v20.8b, v18.8b , v19.8b , #2
+
+ saddl v0.4s, v8.4h, v26.4h
+ smlal v0.4s, v2.4h, v22.4h
+ ext v21.8b, v18.8b , v19.8b , #3
+ uaddl v6.8h, v20.8b, v21.8b
+ ext v24.8b, v18.8b , v19.8b , #4
+ smlsl v0.4s, v3.4h, v23.4h
+ mla v28.4h, v6.4h , v22.4h
+ ext v19.8b, v18.8b , v19.8b , #1
+ uaddl v6.8h, v19.8b, v24.8b
+ add v2.4h, v14.4h , v16.4h
+ mls v28.4h, v6.4h , v23.4h
+ sqrshrun v0.4h, v0.4s, #0xa
+ add v3.4h, v12.4h , v26.4h
+ ld1 {v18.2s, v19.2s}, [x0], x2 // row 6 load
+ ext v25.8b, v18.8b , v19.8b , #5
+ uqxtn v13.8b, v0.8h
+
+ trn1 v11.2s, v11.2s, v13.2s
+ trn2 v13.2s, v11.2s, v13.2s
+ saddl v0.4s, v10.4h, v28.4h
+ urhadd v9.8b, v9.8b , v11.8b
+
+ st1 {v28.2s}, [x9], x6 // store temp buffer 7
+
+ smlal v0.4s, v2.4h, v22.4h
+ uaddl v30.8h, v18.8b, v25.8b
+
+ st1 {v9.s}[0], [x1], x3 // store row 0
+
+ ext v20.8b, v18.8b , v19.8b , #2
+
+ st1 {v9.s}[1], [x1], x3 // store row 1
+
+ ext v21.8b, v18.8b , v19.8b , #3
+ smlsl v0.4s, v3.4h, v23.4h
+ uaddl v8.8h, v20.8b, v21.8b
+ ext v24.8b, v18.8b , v19.8b , #4
+ mla v30.4h, v8.4h , v22.4h
+ ext v19.8b, v18.8b , v19.8b , #1
+ uaddl v8.8h, v19.8b, v24.8b
+ sqrshrun v0.4h, v0.4s, #0xa
+ add v2.4h, v16.4h , v26.4h
+ mls v30.4h, v8.4h , v23.4h
+ uqxtn v4.8b, v0.8h
+
+ add v3.4h, v14.4h , v28.4h
+
+
+ saddl v0.4s, v12.4h, v30.4h
+
+ st1 {v30.2s}, [x9]
+
+ smlal v0.4s, v2.4h, v22.4h
+
+ ld1 {v8.2s}, [x7], x6 // load from temp buffer 2
+ ld1 {v9.2s}, [x7], x6 // load from temp buffer 3
+ smlsl v0.4s, v3.4h, v23.4h
+ subs x4, x4, #4
+
+ sqrshrun v10.8b, v8.8h, #5
+ sqrshrun v9.8b, v9.8h, #5
+ mov v10.2s[1], v9.2s[0]
+
+ mov v12.8b, v28.8b
+
+ sqrshrun v0.4h, v0.4s, #0xa
+ mov v6.8b, v14.8b
+ mov v8.8b, v16.8b
+
+ uqxtn v5.8b, v0.8h
+
+ trn1 v4.2s, v4.2s, v5.2s
+ trn2 v5.2s, v4.2s, v5.2s
+ urhadd v4.8b, v4.8b , v10.8b
+ mov v10.8b, v26.8b
+ mov v14.8b, v30.8b
+
+ st1 {v4.s}[0], [x1], x3 // store row 2
+ st1 {v4.s}[1], [x1], x3 // store row 3
+
+ bgt loop_4
+
+end_func:
+ //Restoring registers from stack
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
diff --git a/common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s
new file mode 100755
index 0000000..39e3253
--- /dev/null
+++ b/common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s
@@ -0,0 +1,597 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+//******************************************************************************
+//* @file
+//* ih264_inter_pred_luma_horz_qpel_av8.s
+//*
+//* @brief
+//* Contains function definitions for inter prediction horizontal quarter pel interpolation.
+//*
+//* @author
+//* Mohit
+//*
+//* @par List of Functions:
+//*
+//* - ih264_inter_pred_luma_horz_qpel_av8()
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+///* All the functions here are replicated from ih264_inter_pred_filters.c
+//
+
+///**
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* Quarter pel interprediction luma filter for horizontal input
+//*
+//* @par Description:
+//* Applies a 6 tap horizontal filter .The output is clipped to 8 bits
+//* sec 8.4.2.2.1 titled "Luma sample interpolation process"
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ht
+//* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+// @param[in] pu1_tmp: temporary buffer: UNUSED in this function
+//*
+//* @param[in] dydx: x and y reference offset for qpel calculations.
+//* @returns
+//*
+// @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+//void ih264_inter_pred_luma_horz (
+// UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ht,
+// WORD32 wd,
+// UWORD8* pu1_tmp,
+// UWORD32 dydx)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ht
+// x5 => wd
+// x7 => dydx
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+
+
+
+ .global ih264_inter_pred_luma_horz_qpel_av8
+
+ih264_inter_pred_luma_horz_qpel_av8:
+
+
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+
+ and x7, x7, #3 //Finds x-offset
+ add x7, x0, x7, lsr #1 //pu1_src + (x_offset>>1)
+ sub x0, x0, #2 //pu1_src-2
+ sub x14, x4, #16
+ movi v0.16b, #5 //filter coeff
+ subs x12, x5, #8 //if wd=8 branch to loop_8
+ movi v1.16b, #20 //filter coeff
+
+ beq loop_8
+
+ subs x12, x5, #4 //if wd=4 branch to loop_4
+ beq loop_4
+
+loop_16: //when wd=16
+ //// Processing row0 and row1
+ ld1 {v2.8b, v3.8b, v4.8b}, [x0], x2 //// Load row0
+ add x14, x14, #1 //for checking loop
+ ext v31.8b, v2.8b , v3.8b , #5
+ ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row1
+ ext v30.8b, v3.8b , v4.8b , #5
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row0)
+ ext v28.8b, v5.8b , v6.8b , #5
+ uaddl v10.8h, v30.8b, v3.8b //// a0 + a5 (column2,row0)
+ ext v27.8b, v6.8b , v7.8b , #5
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row1)
+ ext v31.8b, v2.8b , v3.8b , #2
+ uaddl v16.8h, v27.8b, v6.8b //// a0 + a5 (column2,row1)
+ ext v30.8b, v3.8b , v4.8b , #2
+ umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0)
+ ext v28.8b, v5.8b , v6.8b , #2
+ umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0)
+ ext v27.8b, v6.8b , v7.8b , #2
+ umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 (column1,row1)
+ ext v31.8b, v2.8b , v3.8b , #3
+ umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 (column2,row1)
+ ext v30.8b, v3.8b , v4.8b , #3
+ umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ ext v28.8b, v5.8b , v6.8b , #3
+ umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ ext v27.8b, v6.8b , v7.8b , #3
+ umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row1)
+ ext v31.8b, v2.8b , v3.8b , #1
+ umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row1)
+ ext v30.8b, v3.8b , v4.8b , #1
+ umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ ext v28.8b, v5.8b , v6.8b , #1
+ umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ ext v27.8b, v6.8b , v7.8b , #1
+ umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1)
+ ext v31.8b, v2.8b , v3.8b , #4
+ umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row1)
+ ext v30.8b, v3.8b , v4.8b , #4
+ umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ ext v28.8b, v5.8b , v6.8b , #4
+ umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ ext v27.8b, v6.8b , v7.8b , #4
+ umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1)
+ ld1 {v2.8b, v3.8b, v4.8b}, [x0], x2 //// Load row2
+ umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row1)
+
+ ld1 {v12.2s, v13.2s}, [x7], x2 //Load value for interpolation (column1,row0)
+ sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row3
+ sqrshrun v21.8b, v10.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ ext v31.8b, v2.8b , v3.8b , #5
+ urhadd v20.16b, v12.16b , v20.16b //Interpolation step for qpel calculation
+ urhadd v21.16b, v13.16b , v21.16b //Interpolation step for qpel calculation
+
+ sqrshrun v18.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1)
+ st1 {v20.8b, v21.8b}, [x1], x3 ////Store dest row0
+ ext v30.8b, v3.8b , v4.8b , #5
+ sqrshrun v19.8b, v16.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row1)
+
+
+
+//// Processing row2 and row3
+ ld1 {v12.2s, v13.2s}, [x7], x2 //Load value for interpolation (column1,row1)
+ ext v28.8b, v5.8b , v6.8b , #5
+ urhadd v18.16b, v12.16b , v18.16b //Interpolation step for qpel calculation
+ urhadd v19.16b, v13.16b , v19.16b //Interpolation step for qpel calculation
+
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row2)
+ st1 {v18.8b, v19.8b}, [x1], x3 ////Store dest row1
+ uaddl v10.8h, v30.8b, v3.8b //// a0 + a5 (column2,row2)
+ ext v27.8b, v6.8b , v7.8b , #5
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row3)
+ ext v31.8b, v2.8b , v3.8b , #2
+ uaddl v16.8h, v27.8b, v6.8b //// a0 + a5 (column2,row3)
+ ext v30.8b, v3.8b , v4.8b , #2
+ umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 (column1,row2)
+ ext v27.8b, v6.8b , v7.8b , #2
+ umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column2,row2)
+ ext v28.8b, v5.8b , v6.8b , #2
+ umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 (column1,row3)
+ ext v31.8b, v2.8b , v3.8b , #3
+ umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 (column2,row3)
+ ext v30.8b, v3.8b , v4.8b , #3
+ umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row2)
+ ext v28.8b, v5.8b , v6.8b , #3
+ umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row2)
+ ext v27.8b, v6.8b , v7.8b , #3
+ umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row3)
+ ext v31.8b, v2.8b , v3.8b , #1
+ umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row3)
+ ext v30.8b, v3.8b , v4.8b , #1
+ umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row2)
+ ext v28.8b, v5.8b , v6.8b , #1
+ umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row2)
+ ext v27.8b, v6.8b , v7.8b , #1
+ umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row3)
+ ext v31.8b, v2.8b , v3.8b , #4
+ umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row3)
+ ext v30.8b, v3.8b , v4.8b , #4
+ umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row2)
+ ext v28.8b, v5.8b , v6.8b , #4
+ umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row2)
+ ext v27.8b, v6.8b , v7.8b , #4
+ umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row3)
+ ld1 {v2.8b, v3.8b, v4.8b}, [x0], x2 //// Load row4
+ umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row3)
+
+ ld1 {v12.2s, v13.2s}, [x7], x2 //Load value for interpolation (column1,row2)
+ sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row2)
+ ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row5
+ sqrshrun v21.8b, v10.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row2)
+ ext v31.8b, v2.8b , v3.8b , #5
+ urhadd v20.16b, v12.16b , v20.16b //Interpolation step for qpel calculation
+ urhadd v21.16b, v13.16b , v21.16b //Interpolation step for qpel calculation
+
+ sqrshrun v18.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row3)
+ ext v30.8b, v3.8b , v4.8b , #5
+ st1 {v20.8b, v21.8b}, [x1], x3 ////Store dest row2
+ sqrshrun v19.8b, v16.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row3)
+ ld1 {v12.2s, v13.2s}, [x7], x2 //Load value for interpolation (column1,row3)
+
+//// Processing row4 and row5
+ ext v28.8b, v5.8b , v6.8b , #5
+ urhadd v18.16b, v12.16b , v18.16b //Interpolation step for qpel calculation
+ urhadd v19.16b, v13.16b , v19.16b //Interpolation step for qpel calculation
+
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row4)
+ st1 {v18.8b, v19.8b}, [x1], x3 ////Store dest row3
+ uaddl v10.8h, v30.8b, v3.8b //// a0 + a5 (column2,row4)
+ ext v27.8b, v6.8b , v7.8b , #5
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row5)
+ ext v31.8b, v2.8b , v3.8b , #2
+ uaddl v16.8h, v27.8b, v6.8b //// a0 + a5 (column2,row5)
+ ext v30.8b, v3.8b , v4.8b , #2
+ umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 (column1,row4)
+ ext v27.8b, v6.8b , v7.8b , #2
+ umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column2,row4)
+ ext v28.8b, v5.8b , v6.8b , #2
+ umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 (column1,row5)
+ ext v31.8b, v2.8b , v3.8b , #3
+ umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 (column2,row5)
+ ext v30.8b, v3.8b , v4.8b , #3
+ umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row4)
+ ext v28.8b, v5.8b , v6.8b , #3
+ umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row4)
+ ext v27.8b, v6.8b , v7.8b , #3
+ umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row5)
+ ext v31.8b, v2.8b , v3.8b , #1
+ umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row5)
+ ext v30.8b, v3.8b , v4.8b , #1
+ umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row4)
+ ext v28.8b, v5.8b , v6.8b , #1
+ umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row4)
+ ext v27.8b, v6.8b , v7.8b , #1
+ umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row4)
+ ext v31.8b, v2.8b , v3.8b , #4
+ umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row5)
+ ext v30.8b, v3.8b , v4.8b , #4
+ umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row4)
+ ext v28.8b, v5.8b , v6.8b , #4
+ umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row4)
+ ext v27.8b, v6.8b , v7.8b , #4
+ umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row5)
+ ld1 {v2.8b, v3.8b, v4.8b}, [x0], x2 //// Load row6
+ umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row5)
+ ld1 {v12.2s, v13.2s}, [x7], x2 //Load value for interpolation (column1,row4)
+ sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row4)
+ ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row7
+ sqrshrun v21.8b, v10.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row4)
+ ext v31.8b, v2.8b , v3.8b , #5
+ urhadd v20.16b, v12.16b , v20.16b //Interpolation step for qpel calculation
+ urhadd v21.16b, v13.16b , v21.16b //Interpolation step for qpel calculation
+
+ sqrshrun v18.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row5)
+ st1 {v20.8b, v21.8b}, [x1], x3 ////Store dest row4
+ ext v30.8b, v3.8b , v4.8b , #5
+ sqrshrun v19.8b, v16.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row5)
+ ld1 {v12.2s, v13.2s}, [x7], x2 //Load value for interpolation (column1,row5)
+
+
+ //// Processing row6 and row7
+
+ ext v28.8b, v5.8b , v6.8b , #5
+ urhadd v18.16b, v12.16b , v18.16b //Interpolation step for qpel calculation
+ urhadd v19.16b, v13.16b , v19.16b //Interpolation step for qpel calculation
+
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row6)
+ st1 {v18.8b, v19.8b}, [x1], x3 ////Store dest row5
+ uaddl v10.8h, v30.8b, v3.8b //// a0 + a5 (column2,row6)
+ ext v27.8b, v6.8b , v7.8b , #5
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row7)
+ ext v31.8b, v2.8b , v3.8b , #2
+ uaddl v16.8h, v27.8b, v6.8b //// a0 + a5 (column2,row7)
+ ext v30.8b, v3.8b , v4.8b , #2
+ umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 (column1,row6)
+ ext v27.8b, v6.8b , v7.8b , #2
+ umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column2,row6)
+ ext v28.8b, v5.8b , v6.8b , #2
+ umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 (column1,row7)
+ ext v31.8b, v2.8b , v3.8b , #3
+ umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 (column2,row7)
+ ext v30.8b, v3.8b , v4.8b , #3
+ umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row6)
+ ext v28.8b, v5.8b , v6.8b , #3
+ umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row6)
+ ext v27.8b, v6.8b , v7.8b , #3
+ umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row7)
+ ext v31.8b, v2.8b , v3.8b , #1
+ umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row7)
+ ext v30.8b, v3.8b , v4.8b , #1
+ umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row6)
+ ext v28.8b, v5.8b , v6.8b , #1
+ umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row6)
+ ext v27.8b, v6.8b , v7.8b , #1
+ umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row6)
+ ext v31.8b, v2.8b , v3.8b , #4
+ umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row7)
+ ext v30.8b, v3.8b , v4.8b , #4
+ umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row6)
+ ext v28.8b, v5.8b , v6.8b , #4
+ umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row6)
+ ext v27.8b, v6.8b , v7.8b , #4
+ ld1 {v12.2s, v13.2s}, [x7], x2 //Load value for interpolation (column1,row6)
+ sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row6)
+ umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row7)
+ sqrshrun v21.8b, v10.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row6)
+ umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row7)
+ urhadd v20.16b, v12.16b , v20.16b //Interpolation step for qpel calculation
+ urhadd v21.16b, v13.16b , v21.16b //Interpolation step for qpel calculation
+
+ ld1 {v12.2s, v13.2s}, [x7], x2 //Load value for interpolation (column1,row7)
+ sqrshrun v18.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row7)
+ st1 {v20.8b, v21.8b}, [x1], x3 ////Store dest row6
+ sqrshrun v19.8b, v16.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row7)
+ urhadd v18.16b, v12.16b , v18.16b //Interpolation step for qpel calculation
+ urhadd v19.16b, v13.16b , v19.16b //Interpolation step for qpel calculation
+
+ subs x12, x14, #1 // if height==16 - looping
+ st1 {v18.8b, v19.8b}, [x1], x3 ////Store dest row7
+
+
+
+ beq loop_16
+ b end_func
+
+loop_8:
+//// Processing row0 and row1
+
+ ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row1
+ add x14, x14, #1 //for checking loop
+ ext v28.8b, v5.8b , v6.8b , #5
+ ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row0
+ ext v25.8b, v5.8b , v6.8b , #2
+ ext v31.8b, v2.8b , v3.8b , #5
+ ext v24.8b, v5.8b , v6.8b , #3
+ ext v23.8b, v5.8b , v6.8b , #1
+ ext v22.8b, v5.8b , v6.8b , #4
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row1)
+ ext v29.8b, v2.8b , v3.8b , #3
+ umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row1)
+ umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row1)
+ umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1)
+ umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1)
+ ext v30.8b, v2.8b , v3.8b , #2
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row0)
+ ext v27.8b, v2.8b , v3.8b , #1
+ ext v26.8b, v2.8b , v3.8b , #4
+ ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row2
+ umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0)
+ umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row3
+ sqrshrun v18.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+
+ //// Processing row2 and row3
+ ext v28.8b, v5.8b , v6.8b , #5
+ ext v25.8b, v5.8b , v6.8b , #2
+ ext v31.8b, v2.8b , v3.8b , #5
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row3)
+ ld1 {v12.2s}, [x7], x2 //Load value for interpolation (column1,row0)
+ ld1 {v13.2s}, [x7], x2 //Load value for interpolation (column1,row1)
+ ext v24.8b, v5.8b , v6.8b , #3
+ ext v23.8b, v5.8b , v6.8b , #1
+ sqrshrun v19.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1)
+ ext v22.8b, v5.8b , v6.8b , #4
+ ext v29.8b, v2.8b , v3.8b , #3
+ umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row3)
+ umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row3)
+ umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row3)
+ umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row3)
+ urhadd v18.16b, v12.16b , v18.16b //Interpolation step for qpel calculation
+ urhadd v19.16b, v13.16b , v19.16b //Interpolation step for qpel calculation
+
+ st1 {v18.8b}, [x1], x3 ////Store dest row0
+ st1 {v19.8b}, [x1], x3 ////Store dest row1
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row2)
+ ext v30.8b, v2.8b , v3.8b , #2
+ ext v27.8b, v2.8b , v3.8b , #1
+ ext v26.8b, v2.8b , v3.8b , #4
+ ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row4
+ umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row2)
+ umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row2)
+ umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row2)
+ umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row2)
+ ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row5
+ subs x9, x4, #4
+ sqrshrun v19.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row3)
+ ld1 {v12.2s}, [x7], x2 //Load value for interpolation (column1,row2)
+ ld1 {v13.2s}, [x7], x2 //Load value for interpolation (column1,row3)
+ ext v28.8b, v5.8b , v6.8b , #5
+ ext v25.8b, v5.8b , v6.8b , #2
+ ext v31.8b, v2.8b , v3.8b , #5
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row5)
+ ext v24.8b, v5.8b , v6.8b , #3
+ sqrshrun v18.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row2)
+ ext v22.8b, v5.8b , v6.8b , #4
+ ext v29.8b, v2.8b , v3.8b , #3
+ urhadd v18.16b, v12.16b , v18.16b //Interpolation step for qpel calculation
+ urhadd v19.16b, v13.16b , v19.16b //Interpolation step for qpel calculation
+
+ st1 {v18.8b}, [x1], x3 ////Store dest row2
+ ext v30.8b, v2.8b , v3.8b , #2
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row4)
+ st1 {v19.8b}, [x1], x3 ////Store dest row3
+ beq end_func // Branch if height==4
+
+//// Processing row4 and row5
+ ext v23.8b, v5.8b , v6.8b , #1
+ umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row5)
+ umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row5)
+ umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row5)
+ umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row5)
+ ext v27.8b, v2.8b , v3.8b , #1
+ ext v26.8b, v2.8b , v3.8b , #4
+ ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row6
+ umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row4)
+ umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row4)
+ umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row4)
+ umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row4)
+ sqrshrun v19.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row5)
+ ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row7
+ ext v31.8b, v2.8b , v3.8b , #5
+ ext v28.8b, v5.8b , v6.8b , #5
+ ld1 {v12.2s}, [x7], x2 //Load value for interpolation (column1,row4)
+ ld1 {v13.2s}, [x7], x2 //Load value for interpolation (column1,row5)
+ ext v25.8b, v5.8b , v6.8b , #2
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row7)
+ ext v24.8b, v5.8b , v6.8b , #3
+ ext v22.8b, v5.8b , v6.8b , #4
+ sqrshrun v18.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row4)
+ ext v29.8b, v2.8b , v3.8b , #3
+ ext v30.8b, v2.8b , v3.8b , #2
+ urhadd v18.16b, v12.16b , v18.16b //Interpolation step for qpel calculation
+ urhadd v19.16b, v13.16b , v19.16b //Interpolation step for qpel calculation
+
+ st1 {v18.8b}, [x1], x3 ////Store dest row4
+ ext v27.8b, v2.8b , v3.8b , #1
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row6)
+ ext v26.8b, v2.8b , v3.8b , #4
+ umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row6)
+ umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row6)
+ umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row6)
+ umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row6)
+ //// Processing row6 and row7
+ st1 {v19.8b}, [x1], x3 ////Store dest row5
+ ext v23.8b, v5.8b , v6.8b , #1
+ umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row7)
+ umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row7)
+ umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row7)
+ umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row7)
+ ld1 {v12.2s}, [x7], x2 //Load value for interpolation (column1,row6)
+ ld1 {v13.2s}, [x7], x2 //Load value for interpolation (column1,row7)
+ sqrshrun v18.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row6)
+ subs x12, x14, #1
+ sqrshrun v19.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row7)
+ urhadd v18.16b, v12.16b , v18.16b //Interpolation step for qpel calculation
+ urhadd v19.16b, v13.16b , v19.16b //Interpolation step for qpel calculation
+
+ st1 {v18.8b}, [x1], x3 ////Store dest row6
+ st1 {v19.8b}, [x1], x3 ////Store dest row7
+
+ beq loop_8 //looping if height ==16
+
+ b end_func
+
+loop_4:
+ ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row1
+ ext v28.8b, v5.8b , v6.8b , #5
+ ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row0
+ ext v25.8b, v5.8b , v6.8b , #2
+ ext v31.8b, v2.8b , v3.8b , #5
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row1)
+ ext v24.8b, v5.8b , v6.8b , #3
+ ext v23.8b, v5.8b , v6.8b , #1
+ ext v22.8b, v5.8b , v6.8b , #4
+ ext v29.8b, v2.8b , v3.8b , #3
+ umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row1)
+ umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row1)
+ umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1)
+ umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1)
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row0)
+ ext v30.8b, v2.8b , v3.8b , #2
+ ld1 {v12.2s}, [x7], x2 //Load value for interpolation (column1,row0)
+ ld1 {v13.2s}, [x7], x2 //Load value for interpolation (column1,row1)
+ ext v27.8b, v2.8b , v3.8b , #1
+ ext v26.8b, v2.8b , v3.8b , #4
+ ld1 {v2.8b, v3.8b}, [x0], x2 //// Load row2
+ umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0)
+ umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ ld1 {v5.8b, v6.8b}, [x0], x2 //// Load row3
+ ext v28.8b, v5.8b , v6.8b , #5
+ ext v25.8b, v5.8b , v6.8b , #2
+ sqrshrun v18.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ ext v31.8b, v2.8b , v3.8b , #5
+ ext v24.8b, v5.8b , v6.8b , #3
+
+ ext v23.8b, v5.8b , v6.8b , #1
+ ext v22.8b, v5.8b , v6.8b , #4
+ ext v29.8b, v2.8b , v3.8b , #3
+ sqrshrun v19.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1)
+ ext v30.8b, v2.8b , v3.8b , #2
+ ext v27.8b, v2.8b , v3.8b , #1
+
+ //// Processing row2 and row3
+ urhadd v18.16b, v12.16b , v18.16b //Interpolation step for qpel calculation
+ urhadd v19.16b, v13.16b , v19.16b //Interpolation step for qpel calculation
+
+ st1 {v18.s}[0], [x1], x3 ////Store dest row0
+ st1 {v19.s}[0], [x1], x3 ////Store dest row1
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row3)
+ ext v26.8b, v2.8b , v3.8b , #4
+ ld1 {v12.2s}, [x7], x2 //Load value for interpolation (column1,row2)
+ ld1 {v13.2s}, [x7], x2 //Load value for interpolation (column1,row3)
+
+ umlal v14.8h, v25.8b, v1.8b //// a0 + a5 + 20a2 (column1,row3)
+ umlal v14.8h, v24.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row3)
+ umlsl v14.8h, v23.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row3)
+ umlsl v14.8h, v22.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row3)
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row2)
+ umlal v8.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row2)
+ umlal v8.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column1,row2)
+ umlsl v8.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row2)
+ umlsl v8.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row2)
+ sqrshrun v19.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row3)
+ sqrshrun v18.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row2)
+ urhadd v18.16b, v12.16b , v18.16b //Interpolation step for qpel calculation
+ urhadd v19.16b, v13.16b , v19.16b //Interpolation step for qpel calculation
+
+ st1 {v18.s}[0], [x1], x3 ////Store dest row2
+ subs x4, x4, #8 // Loop if height =8
+ st1 {v19.s}[0], [x1], x3 ////Store dest row3
+
+ beq loop_4
+
+end_func:
+
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
diff --git a/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s
new file mode 100755
index 0000000..b1e4866
--- /dev/null
+++ b/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s
@@ -0,0 +1,910 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+//******************************************************************************
+//* @file
+//* ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s
+//*
+//* @brief
+//* Contains function definitions for inter prediction interpolation.
+//*
+//* @author
+//* Mohit
+//*
+//* @par List of Functions:
+//*
+//* - ih264_inter_pred_luma_horz_qpel_vert_hpel_av8()
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+///* All the functions here are replicated from ih264_inter_pred_filters.c
+//
+
+///**
+///**
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* This function implements a two stage cascaded six tap filter. It
+//* applies the six tap filter in the vertical direction on the
+//* predictor values, followed by applying the same filter in the
+//* horizontal direction on the output of the first stage. It then averages
+//* the output of the 1st stage and the final stage to obtain the quarter
+//* pel values.The six tap filtering operation is described in sec 8.4.2.2.1
+//* titled "Luma sample interpolation process".
+//*
+//* @par Description:
+//* This function is called to obtain pixels lying at the following
+//* location (1/4,1/2) or (3/4,1/2). The function interpolates
+//* the predictors first in the verical direction and then in the
+//* horizontal direction to output the (1/2,1/2). It then averages
+//* the output of the 2nd stage and (1/2,1/2) value to obtain (1/4,1/2)
+//* or (3/4,1/2) depending on the offset.
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ht
+//* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+//* @param[in] pu1_tmp: temporary buffer
+//*
+//* @param[in] dydx: x and y reference offset for qpel calculations
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/;
+
+//void ih264_inter_pred_luma_horz_qpel_vert_hpel(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,,
+// WORD32 dst_strd,
+// WORD32 ht,
+// WORD32 wd,
+// UWORD8* pu1_tmp,
+// UWORD32 dydx)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ht
+// x5 => wd
+// x6 => dydx
+// x9 => *pu1_tmp
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+
+
+ .global ih264_inter_pred_luma_horz_qpel_vert_hpel_av8
+
+ih264_inter_pred_luma_horz_qpel_vert_hpel_av8:
+
+ // STMFD sp!, {x4-x12, x14} //store register values to stack
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ sub x0, x0, x2, lsl #1 //pu1_src-2*src_strd
+ sub x0, x0, #2 //pu1_src-2
+ mov x9, x6
+ mov x6, x7
+
+ and x6, x6, #2 // dydx & 0x3 followed by dydx>>1 and dydx<<1
+
+ add x7, x9, #4
+ add x6, x7, x6 // pi16_pred1_temp += (x_offset>>1)
+
+ movi v26.8h, #0x14 // Filter coeff 20 into Q13
+ movi v24.8h, #0x5 // Filter coeff 5 into Q12
+ movi v27.8h, #0x14 // Filter coeff 20 into Q13
+ movi v25.8h, #0x5 // Filter coeff 5 into Q12
+ mov x7, #0x20
+ mov x8, #0x30
+ subs x12, x5, #4 //if wd=4 branch to loop_4
+ beq loop_4_start
+
+ subs x12, x5, #8 //if wd=8 branch to loop_8
+ beq loop_8_start
+
+ //when wd=16
+ movi v28.8h, #0x14 // Filter coeff 20 into Q13
+ movi v30.8h, #0x5 // Filter coeff 5 into Q12
+ sub x2, x2, #16
+ ld1 {v0.2s, v1.2s}, [x0], #16 // Vector load from src[0_0]
+ ld1 {v12.2s}, [x0], x2 // Vector load from src[0_0]
+ ld1 {v2.2s, v3.2s}, [x0], #16 // Vector load from src[1_0]
+ ld1 {v13.2s}, [x0], x2 // Vector load from src[1_0]
+ ld1 {v4.2s, v5.2s}, [x0], #16 // Vector load from src[2_0]
+ ld1 {v14.2s}, [x0], x2 // Vector load from src[2_0]
+ ld1 {v6.2s, v7.2s}, [x0], #16 // Vector load from src[3_0]
+ ld1 {v15.2s}, [x0], x2 // Vector load from src[3_0]
+ ld1 {v8.2s, v9.2s}, [x0], #16 // Vector load from src[4_0]
+ ld1 {v16.2s}, [x0], x2 // Vector load from src[4_0]
+
+loop_16:
+
+ ld1 {v10.2s, v11.2s}, [x0], #16 // Vector load from src[5_0]
+ ld1 {v17.2s}, [x0], x2 // Vector load from src[5_0]
+
+
+ uaddl v20.8h, v4.8b, v6.8b
+ uaddl v18.8h, v0.8b, v10.8b
+ uaddl v22.8h, v2.8b, v8.8b
+ mla v18.8h, v20.8h , v28.8h
+ uaddl v24.8h, v5.8b, v7.8b
+ uaddl v20.8h, v1.8b, v11.8b
+ uaddl v26.8h, v3.8b, v9.8b
+ mla v20.8h, v24.8h , v28.8h
+ uaddl v24.8h, v14.8b, v15.8b
+ mls v18.8h, v22.8h , v30.8h
+ uaddl v22.8h, v12.8b, v17.8b
+ mls v20.8h, v26.8h , v30.8h
+ uaddl v26.8h, v13.8b, v16.8b
+ mla v22.8h, v24.8h , v28.8h
+ mls v22.8h, v26.8h , v30.8h
+ st1 {v18.4s }, [x9], #16
+ st1 {v20.4s}, [x9], #16
+ ext v24.16b, v18.16b , v20.16b , #4
+ ext v26.16b, v18.16b , v20.16b , #6
+ st1 {v22.4s}, [x9]
+ ext v22.16b, v18.16b , v20.16b , #10
+ add v0.8h, v24.8h , v26.8h
+ ext v24.16b, v18.16b , v20.16b , #2
+ ext v26.16b, v18.16b , v20.16b , #8
+ add v24.8h, v24.8h , v26.8h
+
+ saddl v26.4s, v18.4h, v22.4h
+ smlal v26.4s, v0.4h, v28.4h
+ smlsl v26.4s, v24.4h, v30.4h
+
+ saddl2 v22.4s, v18.8h, v22.8h
+ smlal2 v22.4s, v0.8h, v28.8h
+ smlsl2 v22.4s, v24.8h, v30.8h
+
+ sqrshrun v18.4h, v26.4s, #10
+ sqrshrun v19.4h, v22.4s, #10
+ ld1 {v22.4s}, [x9], #16
+
+ uqxtn v18.8b, v18.8h
+ uqxtn v19.8b, v19.8h
+ mov v18.2s[1], v19.2s[0]
+
+ ext v24.16b, v20.16b , v22.16b , #4
+ ext v26.16b, v20.16b , v22.16b , #6
+ ext v0.16b, v20.16b , v22.16b , #10
+ st1 {v18.2s}, [x1]
+ add v18.8h, v24.8h , v26.8h
+ ext v24.16b, v20.16b , v22.16b , #2
+ ext v26.16b, v20.16b , v22.16b , #8
+ add v24.8h, v24.8h , v26.8h
+
+ saddl v26.4s, v0.4h, v20.4h
+ smlal v26.4s, v18.4h, v28.4h
+ smlsl v26.4s, v24.4h, v30.4h
+
+ saddl2 v22.4s, v0.8h, v20.8h
+ smlal2 v22.4s, v18.8h, v28.8h
+ smlsl2 v22.4s, v24.8h, v30.8h
+
+ sqrshrun v19.4h, v26.4s, #10
+ sqrshrun v18.4h, v22.4s, #10
+
+ uaddl v24.8h, v7.8b, v9.8b
+ ld1 {v20.4s}, [x6], #16
+ ld1 {v22.4s}, [x6], x7
+
+
+ uqxtn v19.8b, v19.8h
+ uqxtn v18.8b, v18.8h
+ mov v19.2s[1], v18.2s[0]
+
+ ld1 {v18.2s}, [x1]
+ sqrshrun v20.8b, v20.8h, #5
+ sqrshrun v21.8b, v22.8h, #5
+ uaddl v22.8h, v4.8b, v10.8b
+ ld1 {v0.2s, v1.2s}, [x0], #16 // Vector load from src[6_0]
+ urhadd v18.16b, v18.16b , v20.16b
+ urhadd v19.16b, v19.16b , v21.16b
+
+ ld1 {v12.2s}, [x0], x2 // Vector load from src[6_0]
+ uaddl v20.8h, v6.8b, v8.8b
+ uaddl v26.8h, v5.8b, v11.8b
+ st1 {v18.2s, v19.2s}, [x1], x3 // store row 0
+
+
+//ROW_2
+
+
+ uaddl v18.8h, v2.8b, v0.8b
+
+ mla v18.8h, v20.8h , v28.8h
+
+ uaddl v20.8h, v3.8b, v1.8b
+
+ mla v20.8h, v24.8h , v28.8h
+ uaddl v24.8h, v15.8b, v16.8b
+ mls v18.8h, v22.8h , v30.8h
+ uaddl v22.8h, v13.8b, v12.8b
+ mls v20.8h, v26.8h , v30.8h
+ uaddl v26.8h, v14.8b, v17.8b
+ mla v22.8h, v24.8h , v28.8h
+ mls v22.8h, v26.8h , v30.8h
+ st1 {v18.4s}, [x9], #16
+ st1 {v20.4s}, [x9], #16
+ ext v24.16b, v18.16b , v20.16b , #4
+ ext v26.16b, v18.16b , v20.16b , #6
+ st1 {v22.4s}, [x9]
+ ext v22.16b, v18.16b , v20.16b , #10
+ add v2.8h, v24.8h , v26.8h
+ ext v24.16b, v18.16b , v20.16b , #2
+ ext v26.16b, v18.16b , v20.16b , #8
+ add v24.8h, v24.8h , v26.8h
+
+ saddl v26.4s, v18.4h, v22.4h
+ smlal v26.4s, v2.4h, v28.4h
+ smlsl v26.4s, v24.4h, v30.4h
+
+ saddl2 v22.4s, v18.8h, v22.8h
+ smlal2 v22.4s, v2.8h, v28.8h
+ smlsl2 v22.4s, v24.8h, v30.8h
+
+ sqrshrun v18.4h, v26.4s, #10
+ sqrshrun v19.4h, v22.4s, #10
+
+ ld1 {v22.4s}, [x9], #16
+
+ uqxtn v18.8b, v18.8h
+ uqxtn v19.8b, v19.8h
+ mov v18.2s[1], v19.2s[0]
+
+ ext v24.16b, v20.16b , v22.16b , #4
+ ext v26.16b, v20.16b , v22.16b , #6
+ ext v2.16b, v20.16b , v22.16b , #10
+ st1 {v18.2s}, [x1]
+ add v18.8h, v24.8h , v26.8h
+ ext v24.16b, v20.16b , v22.16b , #2
+ ext v26.16b, v20.16b , v22.16b , #8
+ add v24.8h, v24.8h , v26.8h
+
+ saddl v26.4s, v2.4h, v20.4h
+ smlal v26.4s, v18.4h, v28.4h
+ smlsl v26.4s, v24.4h, v30.4h
+
+ saddl2 v22.4s, v2.8h, v20.8h
+ smlal2 v22.4s, v18.8h, v28.8h
+ smlsl2 v22.4s, v24.8h, v30.8h
+
+ sqrshrun v19.4h, v26.4s, #10
+ sqrshrun v18.4h, v22.4s, #10
+ uaddl v24.8h, v9.8b, v11.8b
+ ld1 {v20.4s}, [x6], #16
+ ld1 {v22.4s}, [x6], x7
+ uqxtn v19.8b, v19.8h
+ uqxtn v18.8b, v18.8h
+ mov v19.2s[1], v18.2s[0]
+ ld1 {v18.4s}, [x1]
+ sqrshrun v20.8b, v20.8h, #5
+ sqrshrun v21.8b, v22.8h, #5
+
+ uaddl v22.8h, v6.8b, v0.8b
+ ld1 {v2.2s, v3.2s}, [x0], #16 // Vector load from src[7_0]
+
+ urhadd v18.16b, v18.16b , v20.16b
+ urhadd v19.16b, v19.16b , v21.16b
+ ld1 {v13.2s}, [x0], x2 // Vector load from src[7_0]
+ uaddl v20.8h, v8.8b, v10.8b
+ uaddl v26.8h, v7.8b, v1.8b
+ st1 {v18.2s, v19.2s}, [x1], x3 // store row 1
+
+//ROW_3
+
+
+ uaddl v18.8h, v4.8b, v2.8b
+
+ mla v18.8h, v20.8h , v28.8h
+
+ uaddl v20.8h, v5.8b, v3.8b
+
+ mla v20.8h, v24.8h , v28.8h
+ uaddl v24.8h, v16.8b, v17.8b
+ mls v18.8h, v22.8h , v30.8h
+ uaddl v22.8h, v14.8b, v13.8b
+ mls v20.8h, v26.8h , v30.8h
+ uaddl v26.8h, v15.8b, v12.8b
+ mla v22.8h, v24.8h , v28.8h
+ mls v22.8h, v26.8h , v30.8h
+ st1 {v18.4s}, [x9], #16
+ st1 {v20.4s}, [x9], #16
+ ext v24.16b, v18.16b , v20.16b , #4
+ ext v26.16b, v18.16b , v20.16b , #6
+ st1 {v22.4s}, [x9]
+ ext v22.16b, v18.16b , v20.16b , #10
+ add v4.8h, v24.8h , v26.8h
+ ext v24.16b, v18.16b , v20.16b , #2
+ ext v26.16b, v18.16b , v20.16b , #8
+ add v24.8h, v24.8h , v26.8h
+
+ saddl v26.4s, v18.4h, v22.4h
+ smlal v26.4s, v4.4h, v28.4h
+ smlsl v26.4s, v24.4h, v30.4h
+
+ saddl2 v22.4s, v18.8h, v22.8h
+ smlal2 v22.4s, v4.8h, v28.8h
+ smlsl2 v22.4s, v24.8h, v30.8h
+
+ sqrshrun v18.4h, v26.4s, #10
+ sqrshrun v19.4h, v22.4s, #10
+ ld1 {v22.4s}, [x9], #16
+
+ uqxtn v18.8b, v18.8h
+ uqxtn v19.8b, v19.8h
+ mov v18.2s[1], v19.2s[0]
+
+
+ ext v24.16b, v20.16b , v22.16b , #4
+ ext v26.16b, v20.16b , v22.16b , #6
+ ext v4.16b, v20.16b , v22.16b , #10
+ st1 {v18.2s}, [x1]
+ add v18.8h, v24.8h , v26.8h
+ ext v24.16b, v20.16b , v22.16b , #2
+ ext v26.16b, v20.16b , v22.16b , #8
+ add v24.8h, v24.8h , v26.8h
+
+ saddl v26.4s, v4.4h, v20.4h
+ smlal v26.4s, v18.4h, v28.4h
+ smlsl v26.4s, v24.4h, v30.4h
+
+ saddl2 v22.4s, v4.8h, v20.8h
+ smlal2 v22.4s, v18.8h, v28.8h
+ smlsl2 v22.4s, v24.8h, v30.8h
+
+ sqrshrun v19.4h, v26.4s, #10
+ sqrshrun v18.4h, v22.4s, #10
+
+ uaddl v24.8h, v11.8b, v1.8b
+ ld1 {v20.4s}, [x6], #16
+ ld1 {v22.4s}, [x6], x7
+
+ uqxtn v19.8b, v19.8h
+ uqxtn v18.8b, v18.8h
+ mov v19.2s[1], v18.2s[0]
+
+ ld1 {v18.2s}, [x1]
+ sqrshrun v20.8b, v20.8h, #5
+ sqrshrun v21.8b, v22.8h, #5
+
+ uaddl v22.8h, v8.8b, v2.8b
+ ld1 {v4.2s, v5.2s}, [x0], #16 // Vector load from src[8_0]
+
+ urhadd v18.16b, v18.16b , v20.16b
+ urhadd v19.16b, v19.16b , v21.16b
+ ld1 {v14.2s}, [x0], x2 // Vector load from src[8_0]
+ uaddl v20.8h, v10.8b, v0.8b
+ uaddl v26.8h, v9.8b, v3.8b
+ st1 {v18.2s, v19.2s}, [x1], x3 // store row 2
+
+
+//ROW_4
+
+ uaddl v18.8h, v6.8b, v4.8b
+
+ mla v18.8h, v20.8h , v28.8h
+
+ uaddl v20.8h, v7.8b, v5.8b
+
+ mla v20.8h, v24.8h , v28.8h
+ uaddl v24.8h, v17.8b, v12.8b
+ mls v18.8h, v22.8h , v30.8h
+ uaddl v22.8h, v15.8b, v14.8b
+ mls v20.8h, v26.8h , v30.8h
+ uaddl v26.8h, v16.8b, v13.8b
+ mla v22.8h, v24.8h , v28.8h
+ mls v22.8h, v26.8h , v30.8h
+ st1 {v18.4s}, [x9], #16
+ st1 {v20.4s}, [x9], #16
+ ext v24.16b, v18.16b , v20.16b , #4
+ ext v26.16b, v18.16b , v20.16b , #6
+ st1 {v22.4s}, [x9]
+ ext v22.16b, v18.16b , v20.16b , #10
+ add v6.8h, v24.8h , v26.8h
+ ext v24.16b, v18.16b , v20.16b , #2
+ ext v26.16b, v18.16b , v20.16b , #8
+ add v24.8h, v24.8h , v26.8h
+
+ saddl v26.4s, v18.4h, v22.4h
+ smlal v26.4s, v6.4h, v28.4h
+ smlsl v26.4s, v24.4h, v30.4h
+
+ saddl2 v22.4s, v18.8h, v22.8h
+ smlal2 v22.4s, v6.8h, v28.8h
+ smlsl2 v22.4s, v24.8h, v30.8h
+
+ sqrshrun v18.4h, v26.4s, #10
+ sqrshrun v19.4h, v22.4s, #10
+ ld1 {v22.4s}, [x9], #16
+ uqxtn v18.8b, v18.8h
+ uqxtn v19.8b, v19.8h
+ mov v18.2s[1], v19.2s[0]
+
+
+ ext v24.16b, v20.16b , v22.16b , #4
+ ext v26.16b, v20.16b , v22.16b , #6
+ ext v6.16b, v20.16b , v22.16b , #10
+ st1 {v18.2s}, [x1]
+ add v18.8h, v24.8h , v26.8h
+ ext v24.16b, v20.16b , v22.16b , #2
+ ext v26.16b, v20.16b , v22.16b , #8
+ add v24.8h, v24.8h , v26.8h
+
+ saddl v26.4s, v6.4h, v20.4h
+ smlal v26.4s, v18.4h, v28.4h
+ smlsl v26.4s, v24.4h, v30.4h
+
+ saddl2 v22.4s, v6.8h, v20.8h
+ smlal2 v22.4s, v18.8h, v28.8h
+ smlsl2 v22.4s, v24.8h, v30.8h
+
+ mov v6.16b, v2.16b
+ mov v7.16b, v3.16b
+
+ mov v2.16b, v10.16b
+ mov v3.16b, v11.16b
+
+ subs x4, x4, #4
+ sqrshrun v19.4h, v26.4s, #10
+ sqrshrun v18.4h, v22.4s, #10
+ mov v10.16b, v0.16b
+ mov v11.16b, v1.16b
+
+ mov v24.8b, v14.8b
+
+ mov v14.16b, v12.16b
+ mov v15.16b, v13.16b
+
+
+ uqxtn v19.8b, v19.8h
+ uqxtn v18.8b, v18.8h
+ mov v19.2s[1], v18.2s[0]
+
+ ld1 {v20.4s}, [x6], #16
+ ld1 {v22.4s}, [x6], x7
+ ld1 {v18.2s}, [x1]
+ sqrshrun v20.8b, v20.8h, #5
+ sqrshrun v21.8b, v22.8h, #5
+
+ mov v0.16b, v8.16b
+ mov v1.16b, v9.16b
+
+ mov v8.16b, v4.16b
+ mov v9.16b, v5.16b
+
+ mov v12.16b, v16.16b
+ mov v13.16b, v17.16b
+ urhadd v18.16b, v18.16b , v20.16b
+ urhadd v19.16b, v19.16b , v21.16b
+
+ mov v4.16b, v10.16b
+ mov v5.16b, v11.16b
+
+ mov v16.8b, v24.8b
+ st1 {v18.2s, v19.2s}, [x1], x3 // store row 3
+
+ bgt loop_16 // looping if height =16
+ b end_func
+
+loop_8_start:
+ ld1 {v0.2s, v1.2s}, [x0], x2 // Vector load from src[0_0]
+ ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[1_0]
+ ld1 {v4.2s, v5.2s}, [x0], x2 // Vector load from src[2_0]
+ ld1 {v6.2s, v7.2s}, [x0], x2 // Vector load from src[3_0]
+ ld1 {v8.2s, v9.2s}, [x0], x2 // Vector load from src[4_0]
+
+loop_8:
+
+ ld1 {v10.2s, v11.2s}, [x0], x2 // Vector load from src[5_0]
+ uaddl v14.8h, v4.8b, v6.8b
+ uaddl v12.8h, v0.8b, v10.8b
+ uaddl v16.8h, v2.8b, v8.8b
+ mla v12.8h, v14.8h , v26.8h
+ uaddl v18.8h, v5.8b, v7.8b
+ uaddl v14.8h, v1.8b, v11.8b
+ uaddl v22.8h, v3.8b, v9.8b
+ mla v14.8h, v18.8h , v26.8h
+ mls v12.8h, v16.8h , v24.8h
+ ld1 {v0.2s, v1.2s}, [x0], x2 // Vector load from src[6_0]
+ uaddl v16.8h, v6.8b, v8.8b
+ mls v14.8h, v22.8h , v24.8h
+ uaddl v28.8h, v2.8b, v0.8b
+ st1 {v12.4s}, [x9], #16 // store row 0 to temp buffer: col 0
+ ext v22.16b, v12.16b , v14.16b , #10
+ uaddl v18.8h, v4.8b, v10.8b
+ mla v28.8h, v16.8h , v26.8h
+ saddl v30.4s, v12.4h, v22.4h
+ st1 {v14.4s}, [x9], x7 // store row 0 to temp buffer: col 1
+ saddl2 v22.4s, v12.8h, v22.8h
+ ext v16.16b, v12.16b , v14.16b , #4
+ mls v28.8h, v18.8h , v24.8h
+ ext v18.16b, v12.16b , v14.16b , #6
+ ext v20.16b, v12.16b , v14.16b , #8
+ ext v14.16b, v12.16b , v14.16b , #2
+ add v16.8h, v16.8h , v18.8h
+ add v18.8h, v14.8h , v20.8h
+ uaddl v20.8h, v7.8b, v9.8b
+ smlal v30.4s, v16.4h, v26.4h
+ smlsl v30.4s, v18.4h, v24.4h
+ smlal2 v22.4s, v16.8h, v26.8h
+ smlsl2 v22.4s, v18.8h, v24.8h
+ uaddl v14.8h, v3.8b, v1.8b
+ st1 {v28.4s}, [x9], #16 // store row 1 to temp buffer: col 0
+ mla v14.8h, v20.8h , v26.8h
+ sqrshrun v12.4h, v30.4s, #10
+ uaddl v16.8h, v5.8b, v11.8b
+ sqrshrun v13.4h, v22.4s, #10
+ mls v14.8h, v16.8h , v24.8h
+ ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[7_0]
+ uqxtn v25.8b, v12.8h
+ uqxtn v13.8b, v13.8h
+ mov v25.2s[1], v13.2s[0]
+ uaddl v16.8h, v8.8b, v10.8b
+
+
+ ext v22.16b, v28.16b , v14.16b , #10
+ uaddl v20.8h, v4.8b, v2.8b
+ saddl v30.4s, v28.4h, v22.4h
+ mla v20.8h, v16.8h , v26.8h
+ st1 {v14.4s}, [x9], x7 // store row 1 to temp buffer: col 1
+ saddl2 v22.4s, v28.8h, v22.8h
+ ext v16.16b, v28.16b , v14.16b , #4
+ ext v18.16b, v28.16b , v14.16b , #6
+ ext v12.16b, v28.16b , v14.16b , #8
+ ext v14.16b, v28.16b , v14.16b , #2
+ add v16.8h, v16.8h , v18.8h
+ add v18.8h, v12.8h , v14.8h
+ ld1 {v14.4s, v15.4s}, [x6], x8 // load row 0 from temp buffer
+ smlal v30.4s, v16.4h, v26.4h
+ smlsl v30.4s, v18.4h, v24.4h
+ smlal2 v22.4s, v16.8h, v26.8h
+ smlsl2 v22.4s, v18.8h, v24.8h
+ sqrshrun v14.8b, v14.8h, #0x5
+ ld1 {v28.4s, v29.4s}, [x6], x8 // load row 1 from temp buffer
+ uaddl v18.8h, v6.8b, v0.8b
+ sqrshrun v16.4h, v30.4s, #10
+ sqrshrun v15.8b, v28.8h, #0x5
+ sqrshrun v17.4h, v22.4s, #10
+
+ mov v12.8b, v25.8b
+ mov v25.8b, v24.8b
+
+ uaddl v28.8h, v9.8b, v11.8b
+ uqxtn v13.8b, v16.8h
+ uqxtn v17.8b, v17.8h
+ mov v13.2s[1], v17.2s[0]
+
+ urhadd v12.16b, v12.16b , v14.16b
+ urhadd v13.16b, v13.16b , v15.16b
+ uaddl v14.8h, v5.8b, v3.8b
+ uaddl v22.8h, v7.8b, v1.8b
+ mls v20.8h, v18.8h , v24.8h
+ st1 {v12.2s}, [x1], x3 // store row 0
+ mla v14.8h, v28.8h , v26.8h
+ ld1 {v4.2s, v5.2s}, [x0], x2 // Vector load from src[8_0]
+ uaddl v30.8h, v10.8b, v0.8b
+ uaddl v28.8h, v6.8b, v4.8b
+ mls v14.8h, v22.8h , v24.8h
+ st1 {v13.2s}, [x1], x3 // store row 1
+ mla v28.8h, v30.8h , v26.8h
+ st1 {v20.4s}, [x9], #16 // store row 2 to temp buffer: col 0
+ ext v22.16b, v20.16b , v14.16b , #10
+ saddl v30.4s, v20.4h, v22.4h
+ st1 {v14.2s, v15.2s}, [x9], x7 // store row 2 to temp buffer: col 0
+ saddl2 v22.4s, v20.8h, v22.8h
+ ext v16.16b, v20.16b , v14.16b , #4
+ ext v18.16b, v20.16b , v14.16b , #6
+ ext v12.16b, v20.16b , v14.16b , #8
+ ext v14.16b, v20.16b , v14.16b , #2
+ add v16.8h, v16.8h , v18.8h
+ add v18.8h, v14.8h , v12.8h
+ uaddl v20.8h, v8.8b, v2.8b
+ smlal v30.4s, v16.4h, v26.4h
+ smlsl v30.4s, v18.4h, v24.4h
+ smlal2 v22.4s, v16.8h, v26.8h
+ smlsl2 v22.4s, v18.8h, v24.8h
+ uaddl v18.8h, v11.8b, v1.8b
+ uaddl v16.8h, v7.8b, v5.8b
+ sqrshrun v12.4h, v30.4s, #10
+ uaddl v30.8h, v9.8b, v3.8b
+ mla v16.8h, v18.8h , v26.8h
+ sqrshrun v13.4h, v22.4s, #10
+ mls v28.8h, v20.8h , v24.8h
+ ld1 {v14.4s, v15.4s}, [x6], x8 // load row 2 from temp buffer
+ mls v16.8h, v30.8h , v24.8h
+ uqxtn v27.8b, v12.8h
+ uqxtn v13.8b, v13.8h
+ mov v27.2s[1], v13.2s[0]
+
+ sqrshrun v14.8b, v14.8h, #5
+ ext v22.16b, v28.16b , v16.16b , #10
+ st1 {v28.4s}, [x9], #16 // store row 3 to temp buffer: col 0
+ saddl v30.4s, v28.4h, v22.4h
+ st1 {v16.2s, v17.2s}, [x9], x7 // store row 3 to temp buffer: col 1
+ saddl2 v22.4s, v28.8h, v22.8h
+ ext v12.16b, v28.16b , v16.16b , #4
+ ext v18.16b, v28.16b , v16.16b , #6
+ ext v20.16b, v28.16b , v16.16b , #8
+ ext v28.16b, v28.16b , v16.16b , #2
+ add v12.8h, v12.8h , v18.8h
+ add v18.8h, v28.8h , v20.8h
+ ld1 {v16.4s, v17.4s}, [x6], x8 // load row 3 from temp buffer
+ smlal v30.4s, v12.4h, v26.4h
+ smlsl v30.4s, v18.4h, v24.4h
+ smlal2 v22.4s, v12.8h, v26.8h
+ smlsl2 v22.4s, v18.8h, v24.8h
+ sqrshrun v15.8b, v16.8h, #0x5
+
+ mov v12.8b, v27.8b
+ mov v27.8b, v26.8b
+
+ sqrshrun v16.4h, v30.4s, #10
+
+ mov v6.16b, v2.16b
+ mov v7.16b, v3.16b
+
+ sqrshrun v17.4h, v22.4s, #10
+
+ mov v2.16b, v10.16b
+ mov v3.16b, v11.16b
+
+ mov v10.16b, v0.16b
+ mov v11.16b, v1.16b
+
+ subs x4, x4, #4
+ uqxtn v13.8b, v16.8h
+ uqxtn v17.8b, v17.8h
+ mov v13.2s[1], v17.2s[0]
+ urhadd v12.16b, v12.16b , v14.16b
+ urhadd v13.16b, v13.16b , v15.16b
+
+ mov v0.16b, v8.16b
+ mov v1.16b, v9.16b
+
+ mov v8.16b, v4.16b
+ mov v9.16b, v5.16b
+
+ mov v4.16b, v10.16b
+ mov v5.16b, v11.16b
+
+ st1 {v12.2s}, [x1], x3 // store row 2
+ st1 {v13.2s}, [x1], x3 // store row 3
+
+ bgt loop_8 //if height =8 loop
+ b end_func
+
+loop_4_start:
+ ld1 {v0.2s, v1.2s}, [x0], x2 // Vector load from src[0_0]
+ ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[1_0]
+ ld1 {v4.2s, v5.2s}, [x0], x2 // Vector load from src[2_0]
+ ld1 {v6.2s, v7.2s}, [x0], x2 // Vector load from src[3_0]
+ ld1 {v8.2s, v9.2s}, [x0], x2 // Vector load from src[4_0]
+
+loop_4:
+ ld1 {v10.2s, v11.2s}, [x0], x2 // Vector load from src[5_0]
+ uaddl v14.8h, v4.8b, v6.8b // temp1 = src[2_0] + src[3_0]
+ uaddl v12.8h, v0.8b, v10.8b // temp = src[0_0] + src[5_0]
+ uaddl v16.8h, v2.8b, v8.8b // temp2 = src[1_0] + src[4_0]
+ mla v12.8h, v14.8h , v26.8h // temp += temp1 * 20
+ uaddl v18.8h, v5.8b, v7.8b // temp1 = src[2_0] + src[3_0]
+ uaddl v14.8h, v1.8b, v11.8b // temp = src[0_0] + src[5_0]
+ uaddl v22.8h, v3.8b, v9.8b // temp2 = src[1_0] + src[4_0]
+ mla v14.8h, v18.8h , v26.8h // temp += temp1 * 20
+ mls v12.8h, v16.8h , v24.8h // temp -= temp2 * 5
+ ld1 {v0.2s, v1.2s}, [x0], x2 // Vector load from src[6_0]
+ uaddl v16.8h, v6.8b, v8.8b
+ mls v14.8h, v22.8h , v24.8h // temp -= temp2 * 5
+ //Q6 and Q7 have filtered values
+ uaddl v28.8h, v2.8b, v0.8b
+ st1 {v12.4s}, [x9], #16 // store row 0 to temp buffer: col 0
+ ext v22.16b, v12.16b , v14.16b , #10
+ uaddl v18.8h, v4.8b, v10.8b
+ mla v28.8h, v16.8h , v26.8h
+ saddl v30.4s, v12.4h, v22.4h
+ st1 {v14.4s}, [x9], x7 // store row 0 to temp buffer: col 1
+ saddl v22.4s, v13.4h, v23.4h
+ ext v16.16b, v12.16b , v14.16b , #4
+ mls v28.8h, v18.8h , v24.8h
+ ext v18.16b, v12.16b , v14.16b , #6
+ ext v20.16b, v12.16b , v14.16b , #8
+ ext v14.16b, v12.16b , v14.16b , #2
+ add v16.8h, v16.8h , v18.8h
+ add v18.8h, v14.8h , v20.8h
+ uaddl v20.8h, v7.8b, v9.8b
+ smlal v30.4s, v16.4h, v26.4h
+ smlsl v30.4s, v18.4h, v24.4h
+ smlal v22.4s, v17.4h, v26.4h
+ smlsl v22.4s, v19.4h, v24.4h
+ uaddl v14.8h, v3.8b, v1.8b
+ st1 {v28.4s}, [x9], #16 // store row 1 to temp buffer: col 0
+ mla v14.8h, v20.8h , v26.8h
+ sqrshrun v12.4h, v30.4s, #10
+ uaddl v16.8h, v5.8b, v11.8b
+ sqrshrun v13.4h, v22.4s, #10
+ mls v14.8h, v16.8h , v24.8h
+ ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[7_0]
+ uqxtn v25.8b, v12.8h
+ uaddl v16.8h, v8.8b, v10.8b
+
+ ext v22.16b, v28.16b , v14.16b , #10
+ uaddl v20.8h, v4.8b, v2.8b
+ saddl v30.4s, v28.4h, v22.4h
+ mla v20.8h, v16.8h , v26.8h
+ st1 {v14.4s}, [x9], x7 // store row 1 to temp buffer: col 1
+ saddl v22.4s, v29.4h, v23.4h
+ ext v16.16b, v28.16b , v14.16b , #4
+ ext v18.16b, v28.16b , v14.16b , #6
+ ext v12.16b, v28.16b , v14.16b , #8
+ ext v14.16b, v28.16b , v14.16b , #2
+ add v16.8h, v16.8h , v18.8h
+ add v18.8h, v12.8h , v14.8h
+ ld1 {v14.2s}, [x6], x8 //load row 0 from temp buffer
+ smlal v30.4s, v16.4h, v26.4h
+ smlsl v30.4s, v18.4h, v24.4h
+ smlal v22.4s, v17.4h, v26.4h
+ smlsl v22.4s, v19.4h, v24.4h
+ sqrshrun v14.8b, v14.8h, #0x5
+ ld1 {v28.2s}, [x6], x8 //load row 1 from temp buffer
+ uaddl v18.8h, v6.8b, v0.8b
+ sqrshrun v16.4h, v30.4s, #10
+ sqrshrun v15.8b, v28.8h, #0x5
+ sqrshrun v17.4h, v22.4s, #10
+
+ mov v12.8b, v25.8b
+ mov v25.8b, v24.8b
+
+ uaddl v28.8h, v9.8b, v11.8b
+ uqxtn v13.8b, v16.8h
+
+ urhadd v12.16b, v12.16b , v14.16b
+ urhadd v13.16b, v13.16b , v15.16b
+
+ uaddl v14.8h, v5.8b, v3.8b
+ uaddl v22.8h, v7.8b, v1.8b
+ mls v20.8h, v18.8h , v24.8h
+ st1 {v12.s}[0], [x1], x3 // store row 0
+ mla v14.8h, v28.8h , v26.8h
+ ld1 {v4.2s, v5.2s}, [x0], x2 // Vector load from src[8_0]
+ uaddl v30.8h, v10.8b, v0.8b
+ uaddl v28.8h, v6.8b, v4.8b
+ mls v14.8h, v22.8h , v24.8h
+ st1 {v13.s}[0], [x1], x3 //store row 1
+ mla v28.8h, v30.8h , v26.8h
+ st1 {v20.4s}, [x9], #16 // store row 2 to temp buffer: col 0
+ ext v22.16b, v20.16b , v14.16b , #10
+ saddl v30.4s, v20.4h, v22.4h
+ st1 {v14.4s}, [x9], x7 // store row 2 to temp buffer: col 1
+ saddl v22.4s, v21.4h, v23.4h
+ ext v16.16b, v20.16b , v14.16b , #4
+ ext v18.16b, v20.16b , v14.16b , #6
+ ext v12.16b, v20.16b , v14.16b , #8
+ ext v14.16b, v20.16b , v14.16b , #2
+ add v16.8h, v16.8h , v18.8h
+ add v18.8h, v14.8h , v12.8h
+ uaddl v20.8h, v8.8b, v2.8b
+ smlal v30.4s, v16.4h, v26.4h
+ smlsl v30.4s, v18.4h, v24.4h
+ smlal v22.4s, v17.4h, v26.4h
+ smlsl v22.4s, v19.4h, v24.4h
+ uaddl v18.8h, v11.8b, v1.8b
+ uaddl v16.8h, v7.8b, v5.8b
+ sqrshrun v12.4h, v30.4s, #10
+ uaddl v30.8h, v9.8b, v3.8b
+ mla v16.8h, v18.8h , v26.8h
+ sqrshrun v13.4h, v22.4s, #10
+ mls v28.8h, v20.8h , v24.8h
+ ld1 {v14.2s}, [x6], x8 //load row 3 from temp buffer
+ mls v16.8h, v30.8h , v24.8h
+ uqxtn v27.8b, v12.8h
+ sqrshrun v14.8b, v14.8h, #5
+ ext v22.16b, v28.16b , v16.16b , #10
+ st1 {v28.4s}, [x9], #16 // store row 3 to temp buffer: col 0
+ saddl v30.4s, v28.4h, v22.4h
+ st1 {v16.4s}, [x9], x7 // store row 3 to temp buffer: col 1
+ saddl v22.4s, v29.4h, v23.4h
+ ext v12.16b, v28.16b , v16.16b , #4
+ ext v18.16b, v28.16b , v16.16b , #6
+ ext v20.16b, v28.16b , v16.16b , #8
+ ext v28.16b, v28.16b , v16.16b , #2
+ add v12.8h, v12.8h , v18.8h
+ add v18.8h, v28.8h , v20.8h
+ ld1 {v16.2s}, [x6], x8 //load row 4 from temp buffer
+ smlal v30.4s, v12.4h, v26.4h
+ smlsl v30.4s, v18.4h, v24.4h
+ smlal v22.4s, v13.4h, v26.4h
+ smlsl v22.4s, v19.4h, v24.4h
+ sqrshrun v15.8b, v16.8h, #0x5
+
+ mov v12.8b, v27.8b
+ mov v27.8b, v26.8b
+
+ sqrshrun v16.4h, v30.4s, #10
+
+ mov v6.16b, v2.16b
+ mov v7.16b, v3.16b
+
+ sqrshrun v17.4h, v22.4s, #10
+
+ mov v2.16b, v10.16b
+ mov v3.16b, v11.16b
+
+ mov v10.16b, v0.16b
+ mov v11.16b, v1.16b
+
+ subs x4, x4, #4
+ uqxtn v13.8b, v16.8h
+ urhadd v12.16b, v12.16b , v14.16b
+ urhadd v13.16b, v13.16b , v15.16b
+
+ mov v0.16b, v8.16b
+ mov v1.16b, v9.16b
+
+ mov v8.16b, v4.16b
+ mov v9.16b, v5.16b
+
+
+ mov v4.16b, v10.16b
+ mov v5.16b, v11.16b
+
+
+ st1 {v12.s}[0], [x1], x3 // store row 2
+ st1 {v13.s}[0], [x1], x3 // store row 3
+
+ bgt loop_4
+
+end_func:
+ // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
diff --git a/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s b/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s
new file mode 100755
index 0000000..ab663d0
--- /dev/null
+++ b/common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s
@@ -0,0 +1,958 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+//******************************************************************************
+//* @file
+//* ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s
+//*
+//* @brief
+//* Contains function definitions for inter prediction interpolation.
+//*
+//* @author
+//* Mohit
+//*
+//* @par List of Functions:
+//*
+//* - ih264_inter_pred_luma_horz_qpel_vert_qpel_av8()
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+///* All the functions here are replicated from ih264_inter_pred_filters.c
+//
+
+///**
+///**
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* This function implements two six tap filters. It
+//* applies the six tap filter in the horizontal direction on the
+//* predictor values, then applies the same filter in the
+//* vertical direction on the predictor values. It then averages these
+//* two outputs to obtain quarter pel values in horizontal and vertical direction.
+//* The six tap filtering operation is described in sec 8.4.2.2.1 titled
+//* "Luma sample interpolation process"
+//*
+//* @par Description:
+//* This function is called to obtain pixels lying at the following
+//* location (1/4,1/4) or (3/4,1/4) or (1/4,3/4) or (3/4,3/4).
+//* The function interpolates the predictors first in the horizontal direction
+//* and then in the vertical direction, and then averages these two
+//* values.
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ht
+//* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+//* @param[in] pu1_tmp: temporary buffer
+//*
+//* @param[in] dydx: x and y reference offset for qpel calculations
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/;
+
+//void ih264_inter_pred_luma_horz_qpel_vert_qpel(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,,
+// WORD32 dst_strd,
+// WORD32 ht,
+// WORD32 wd,
+// UWORD8* pu1_tmp,
+// UWORD32 dydx)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ht
+// x5 => wd
+// x6 => dydx
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+
+
+ .global ih264_inter_pred_luma_horz_qpel_vert_qpel_av8
+
+ih264_inter_pred_luma_horz_qpel_vert_qpel_av8:
+
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+ mov x6, x7
+ and x7, x6, #3
+ add x7, x0, x7, lsr #1 //pu1_pred_vert = pu1_src + (x_offset>>1)
+
+ and x6, x6, #12 //Finds y-offset
+ lsr x6, x6, #3 //dydx>>3
+ mul x6, x2, x6
+ add x6, x0, x6 //pu1_pred_horz = pu1_src + (y_offset>>1)*src_strd
+ sub x7, x7, x2, lsl #1 //pu1_pred_vert-2*src_strd
+ sub x6, x6, #2 //pu1_pred_horz-2
+ movi v30.8b, #20 // Filter coeff 20
+ movi v31.8b, #5 // Filter coeff 5
+
+ subs x12, x5, #4 //if wd=4 branch to loop_4
+ beq loop_4_start
+ subs x12, x5, #8 //if wd=8 branch to loop_8
+ beq loop_8_start
+
+ ld1 {v0.2s, v1.2s}, [x7], x2 // Vector load from src[0_0]
+ ld1 {v2.2s, v3.2s}, [x7], x2 // Vector load from src[1_0]
+
+ ld1 {v4.2s, v5.2s}, [x7], x2 // Vector load from src[2_0]
+ ld1 {v6.2s, v7.2s}, [x7], x2 // Vector load from src[3_0]
+ ld1 {v8.2s, v9.2s}, [x7], x2 // Vector load from src[4_0]
+ add x11, x6, #8
+loop_16:
+ ld1 {v10.2s, v11.2s}, [x7], x2 // Vector load from src[5_0]
+ ld1 {v18.2s, v19.2s}, [x6], x2 // horz row0, col 0
+ uaddl v24.8h, v0.8b, v10.8b
+ umlal v24.8h, v4.8b, v30.8b
+ umlal v24.8h, v6.8b, v30.8b
+ umlsl v24.8h, v2.8b, v31.8b
+ umlsl v24.8h, v8.8b, v31.8b
+ ext v23.8b, v18.8b , v19.8b , #5
+ ext v20.8b, v18.8b , v19.8b , #2
+ ext v21.8b, v18.8b , v19.8b , #3
+ ext v22.8b, v18.8b , v19.8b , #4
+ ext v19.8b, v18.8b , v19.8b , #1
+ sqrshrun v26.8b, v24.8h, #5
+ uaddl v28.8h, v18.8b, v23.8b
+ umlal v28.8h, v20.8b, v30.8b
+ umlal v28.8h, v21.8b, v30.8b
+ umlsl v28.8h, v19.8b, v31.8b
+ umlsl v28.8h, v22.8b, v31.8b
+ ld1 {v18.2s, v19.2s}, [x11], x2 // horz row 0, col 1
+ uaddl v24.8h, v1.8b, v11.8b
+ umlal v24.8h, v5.8b, v30.8b
+ umlal v24.8h, v7.8b, v30.8b
+ umlsl v24.8h, v3.8b, v31.8b
+ umlsl v24.8h, v9.8b, v31.8b
+ sqrshrun v28.8b, v28.8h, #5
+ ext v23.8b, v18.8b , v19.8b , #5
+ ext v20.8b, v18.8b , v19.8b , #2
+ ext v21.8b, v18.8b , v19.8b , #3
+ ext v22.8b, v18.8b , v19.8b , #4
+ ext v19.8b, v18.8b , v19.8b , #1
+
+ sqrshrun v27.8b, v24.8h, #5
+ ld1 {v12.2s, v13.2s}, [x7], x2 // src[6_0]
+
+ uaddl v24.8h, v18.8b, v23.8b
+ umlal v24.8h, v20.8b, v30.8b
+ umlal v24.8h, v21.8b, v30.8b
+ umlsl v24.8h, v19.8b, v31.8b
+ umlsl v24.8h, v22.8b, v31.8b
+
+ uaddl v16.8h, v2.8b, v12.8b
+ umlal v16.8h, v6.8b, v30.8b
+ umlal v16.8h, v8.8b, v30.8b
+ umlsl v16.8h, v4.8b, v31.8b
+ umlsl v16.8h, v10.8b, v31.8b
+
+ sqrshrun v29.8b, v24.8h, #5
+ ld1 {v18.2s, v19.2s}, [x6], x2 // horz row 1, col 0
+
+ uaddl v24.8h, v3.8b, v13.8b
+ umlal v24.8h, v7.8b, v30.8b
+ umlal v24.8h, v9.8b, v30.8b
+ umlsl v24.8h, v5.8b, v31.8b
+ umlsl v24.8h, v11.8b, v31.8b
+ urhadd v28.16b, v28.16b , v26.16b
+ urhadd v29.16b, v29.16b , v27.16b
+ sqrshrun v26.8b, v16.8h, #5
+ ext v23.8b, v18.8b , v19.8b , #5
+ ext v20.8b, v18.8b , v19.8b , #2
+ ext v21.8b, v18.8b , v19.8b , #3
+ ext v22.8b, v18.8b , v19.8b , #4
+ st1 {v28.2s, v29.2s}, [x1], x3 // store row 0
+ ext v19.8b, v18.8b , v19.8b , #1
+
+ sqrshrun v27.8b, v24.8h, #5
+
+ uaddl v28.8h, v18.8b, v23.8b
+ umlal v28.8h, v20.8b, v30.8b
+ umlal v28.8h, v21.8b, v30.8b
+ umlsl v28.8h, v19.8b, v31.8b
+ umlsl v28.8h, v22.8b, v31.8b
+
+ ld1 {v18.2s, v19.2s}, [x11], x2 // horz row 1, col 1
+ ld1 {v14.2s, v15.2s}, [x7], x2 // src[7_0]
+
+ ext v23.8b, v18.8b , v19.8b , #5
+ ext v20.8b, v18.8b , v19.8b , #2
+ ext v21.8b, v18.8b , v19.8b , #3
+ ext v22.8b, v18.8b , v19.8b , #4
+ ext v19.8b, v18.8b , v19.8b , #1
+
+ sqrshrun v28.8b, v28.8h, #5
+ uaddl v24.8h, v18.8b, v23.8b
+ umlal v24.8h, v20.8b, v30.8b
+ umlal v24.8h, v21.8b, v30.8b
+ umlsl v24.8h, v19.8b, v31.8b
+ umlsl v24.8h, v22.8b, v31.8b
+
+ ld1 {v18.2s, v19.2s}, [x6], x2 // horz row 2, col 0
+ uaddl v16.8h, v4.8b, v14.8b
+ umlal v16.8h, v8.8b, v30.8b
+ umlal v16.8h, v10.8b, v30.8b
+ umlsl v16.8h, v6.8b, v31.8b
+ umlsl v16.8h, v12.8b, v31.8b
+
+ sqrshrun v29.8b, v24.8h, #5
+ ext v23.8b, v18.8b , v19.8b , #5
+ ext v20.8b, v18.8b , v19.8b , #2
+ ext v21.8b, v18.8b , v19.8b , #3
+ ext v22.8b, v18.8b , v19.8b , #4
+ ext v19.8b, v18.8b , v19.8b , #1
+ urhadd v28.16b, v28.16b , v26.16b
+ urhadd v29.16b, v29.16b , v27.16b
+ sqrshrun v26.8b, v16.8h, #5
+
+ uaddl v24.8h, v5.8b, v15.8b
+ umlal v24.8h, v9.8b, v30.8b
+ umlal v24.8h, v11.8b, v30.8b
+ umlsl v24.8h, v7.8b, v31.8b
+ umlsl v24.8h, v13.8b, v31.8b
+
+ st1 {v28.2s, v29.2s}, [x1], x3 // store row 1
+
+ uaddl v28.8h, v18.8b, v23.8b
+ umlal v28.8h, v20.8b, v30.8b
+ umlal v28.8h, v21.8b, v30.8b
+ umlsl v28.8h, v19.8b, v31.8b
+ umlsl v28.8h, v22.8b, v31.8b
+
+ ld1 {v18.2s, v19.2s}, [x11], x2 // horz row 2, col 1
+ sqrshrun v27.8b, v24.8h, #5
+ ext v23.8b, v18.8b , v19.8b , #5
+ ext v20.8b, v18.8b , v19.8b , #2
+ ext v21.8b, v18.8b , v19.8b , #3
+ ext v22.8b, v18.8b , v19.8b , #4
+ ext v19.8b, v18.8b , v19.8b , #1
+
+ sqrshrun v28.8b, v28.8h, #5
+ ld1 {v16.2s, v17.2s}, [x7], x2 // src[8_0]
+ uaddl v24.8h, v18.8b, v23.8b
+ umlal v24.8h, v20.8b, v30.8b
+ umlal v24.8h, v21.8b, v30.8b
+ umlsl v24.8h, v19.8b, v31.8b
+ umlsl v24.8h, v22.8b, v31.8b
+
+ ld1 {v18.2s, v19.2s}, [x6], x2 // horz row 3, col 0
+ uaddl v0.8h, v6.8b, v16.8b
+ umlal v0.8h, v10.8b, v30.8b
+ umlal v0.8h, v12.8b, v30.8b
+ umlsl v0.8h, v8.8b, v31.8b
+ umlsl v0.8h, v14.8b, v31.8b
+
+ sqrshrun v29.8b, v24.8h, #5
+
+ ext v23.8b, v18.8b , v19.8b , #5
+ ext v20.8b, v18.8b , v19.8b , #2
+ ext v21.8b, v18.8b , v19.8b , #3
+ urhadd v28.16b, v28.16b , v26.16b
+ urhadd v29.16b, v29.16b , v27.16b
+ ext v22.8b, v18.8b , v19.8b , #4
+ ext v19.8b, v18.8b , v19.8b , #1
+ sqrshrun v26.8b, v0.8h, #5
+ st1 {v28.2s, v29.2s}, [x1], x3 // store row 2
+
+ uaddl v24.8h, v18.8b, v23.8b
+ umlal v24.8h, v20.8b, v30.8b
+ umlal v24.8h, v21.8b, v30.8b
+ umlsl v24.8h, v19.8b, v31.8b
+ umlsl v24.8h, v22.8b, v31.8b
+
+ ld1 {v18.2s, v19.2s}, [x11], x2 // horz row 3, col 1
+
+ uaddl v0.8h, v7.8b, v17.8b
+ umlal v0.8h, v11.8b, v30.8b
+ umlal v0.8h, v13.8b, v30.8b
+ umlsl v0.8h, v9.8b, v31.8b
+ umlsl v0.8h, v15.8b, v31.8b
+
+ sqrshrun v28.8b, v24.8h, #5
+
+ ext v23.8b, v18.8b , v19.8b , #5
+ ext v20.8b, v18.8b , v19.8b , #2
+ ext v21.8b, v18.8b , v19.8b , #3
+ ext v22.8b, v18.8b , v19.8b , #4
+ ext v19.8b, v18.8b , v19.8b , #1
+
+ sqrshrun v27.8b, v0.8h, #5
+
+ uaddl v24.8h, v18.8b, v23.8b
+ umlal v24.8h, v20.8b, v30.8b
+ umlal v24.8h, v21.8b, v30.8b
+ umlsl v24.8h, v19.8b, v31.8b
+ umlsl v24.8h, v22.8b, v31.8b
+
+ mov v0.16b, v8.16b
+ mov v1.16b, v9.16b
+
+ mov v2.16b, v10.16b
+ mov v3.16b, v11.16b
+
+ mov v4.16b, v12.16b
+ mov v5.16b, v13.16b
+
+ mov v6.16b, v14.16b
+ mov v7.16b, v15.16b
+
+ mov v8.16b, v16.16b
+ mov v9.16b, v17.16b
+
+ sqrshrun v29.8b, v24.8h, #5
+ urhadd v28.16b, v28.16b , v26.16b
+ urhadd v29.16b, v29.16b , v27.16b
+ st1 {v28.2s, v29.2s}, [x1], x3 // store row 3
+
+ ld1 {v10.2s, v11.2s}, [x7], x2 // Vector load from src[9_0]
+ ld1 {v18.2s, v19.2s}, [x6], x2 // horz row4, col 0
+ uaddl v24.8h, v0.8b, v10.8b
+ umlal v24.8h, v4.8b, v30.8b
+ umlal v24.8h, v6.8b, v30.8b
+ umlsl v24.8h, v2.8b, v31.8b
+ umlsl v24.8h, v8.8b, v31.8b
+ ext v23.8b, v18.8b , v19.8b , #5
+ ext v20.8b, v18.8b , v19.8b , #2
+ ext v21.8b, v18.8b , v19.8b , #3
+ ext v22.8b, v18.8b , v19.8b , #4
+ ext v19.8b, v18.8b , v19.8b , #1
+ sqrshrun v26.8b, v24.8h, #5
+ uaddl v28.8h, v18.8b, v23.8b
+ umlal v28.8h, v20.8b, v30.8b
+ umlal v28.8h, v21.8b, v30.8b
+ umlsl v28.8h, v19.8b, v31.8b
+ umlsl v28.8h, v22.8b, v31.8b
+ ld1 {v18.2s, v19.2s}, [x11], x2 // horz row 4, col 1
+ uaddl v24.8h, v1.8b, v11.8b
+ umlal v24.8h, v5.8b, v30.8b
+ umlal v24.8h, v7.8b, v30.8b
+ umlsl v24.8h, v3.8b, v31.8b
+ umlsl v24.8h, v9.8b, v31.8b
+ sqrshrun v28.8b, v28.8h, #5
+ ext v23.8b, v18.8b , v19.8b , #5
+ ext v20.8b, v18.8b , v19.8b , #2
+ ext v21.8b, v18.8b , v19.8b , #3
+ ext v22.8b, v18.8b , v19.8b , #4
+ ext v19.8b, v18.8b , v19.8b , #1
+
+ sqrshrun v27.8b, v24.8h, #5
+ ld1 {v12.2s, v13.2s}, [x7], x2 // src[10_0]
+ uaddl v24.8h, v18.8b, v23.8b
+ umlal v24.8h, v20.8b, v30.8b
+ umlal v24.8h, v21.8b, v30.8b
+ umlsl v24.8h, v19.8b, v31.8b
+ umlsl v24.8h, v22.8b, v31.8b
+ uaddl v16.8h, v2.8b, v12.8b
+ umlal v16.8h, v6.8b, v30.8b
+ umlal v16.8h, v8.8b, v30.8b
+ umlsl v16.8h, v4.8b, v31.8b
+ umlsl v16.8h, v10.8b, v31.8b
+ sqrshrun v29.8b, v24.8h, #5
+ ld1 {v18.2s, v19.2s}, [x6], x2 // horz row 5, col 0
+ uaddl v24.8h, v3.8b, v13.8b
+ umlal v24.8h, v7.8b, v30.8b
+ umlal v24.8h, v9.8b, v30.8b
+ umlsl v24.8h, v5.8b, v31.8b
+ umlsl v24.8h, v11.8b, v31.8b
+ urhadd v28.16b, v28.16b , v26.16b
+ urhadd v29.16b, v29.16b , v27.16b
+ sqrshrun v26.8b, v16.8h, #5
+ ext v23.8b, v18.8b , v19.8b , #5
+ ext v20.8b, v18.8b , v19.8b , #2
+ ext v21.8b, v18.8b , v19.8b , #3
+ ext v22.8b, v18.8b , v19.8b , #4
+ st1 {v28.2s, v29.2s}, [x1], x3 // store row 4
+ ext v19.8b, v18.8b , v19.8b , #1
+
+ sqrshrun v27.8b, v24.8h, #5
+
+ uaddl v28.8h, v18.8b, v23.8b
+ umlal v28.8h, v20.8b, v30.8b
+ umlal v28.8h, v21.8b, v30.8b
+ umlsl v28.8h, v19.8b, v31.8b
+ umlsl v28.8h, v22.8b, v31.8b
+
+ ld1 {v18.2s, v19.2s}, [x11], x2 // horz row 5, col 1
+ ld1 {v14.2s, v15.2s}, [x7], x2 // src[11_0]
+
+ ext v23.8b, v18.8b , v19.8b , #5
+ ext v20.8b, v18.8b , v19.8b , #2
+ ext v21.8b, v18.8b , v19.8b , #3
+ ext v22.8b, v18.8b , v19.8b , #4
+ ext v19.8b, v18.8b , v19.8b , #1
+
+ sqrshrun v28.8b, v28.8h, #5
+ uaddl v24.8h, v18.8b, v23.8b
+ umlal v24.8h, v20.8b, v30.8b
+ umlal v24.8h, v21.8b, v30.8b
+ umlsl v24.8h, v19.8b, v31.8b
+ umlsl v24.8h, v22.8b, v31.8b
+
+ ld1 {v18.2s, v19.2s}, [x6], x2 // horz row 6, col 0
+ uaddl v16.8h, v4.8b, v14.8b
+ umlal v16.8h, v8.8b, v30.8b
+ umlal v16.8h, v10.8b, v30.8b
+ umlsl v16.8h, v6.8b, v31.8b
+ umlsl v16.8h, v12.8b, v31.8b
+
+ sqrshrun v29.8b, v24.8h, #5
+ ext v23.8b, v18.8b , v19.8b , #5
+ ext v20.8b, v18.8b , v19.8b , #2
+ ext v21.8b, v18.8b , v19.8b , #3
+ ext v22.8b, v18.8b , v19.8b , #4
+ ext v19.8b, v18.8b , v19.8b , #1
+ urhadd v28.16b, v28.16b , v26.16b
+ urhadd v29.16b, v29.16b , v27.16b
+ sqrshrun v26.8b, v16.8h, #5
+
+ uaddl v24.8h, v5.8b, v15.8b
+ umlal v24.8h, v9.8b, v30.8b
+ umlal v24.8h, v11.8b, v30.8b
+ umlsl v24.8h, v7.8b, v31.8b
+ umlsl v24.8h, v13.8b, v31.8b
+
+ st1 {v28.2s, v29.2s}, [x1], x3 // store row 5
+
+ uaddl v28.8h, v18.8b, v23.8b
+ umlal v28.8h, v20.8b, v30.8b
+ umlal v28.8h, v21.8b, v30.8b
+ umlsl v28.8h, v19.8b, v31.8b
+ umlsl v28.8h, v22.8b, v31.8b
+
+ ld1 {v18.2s, v19.2s}, [x11], x2 // horz row 6, col 1
+ sqrshrun v27.8b, v24.8h, #5
+ ext v23.8b, v18.8b , v19.8b , #5
+ ext v20.8b, v18.8b , v19.8b , #2
+ ext v21.8b, v18.8b , v19.8b , #3
+ ext v22.8b, v18.8b , v19.8b , #4
+ ext v19.8b, v18.8b , v19.8b , #1
+
+ sqrshrun v28.8b, v28.8h, #5
+ ld1 {v16.2s, v17.2s}, [x7], x2 // src[12_0]
+ uaddl v24.8h, v18.8b, v23.8b
+ umlal v24.8h, v20.8b, v30.8b
+ umlal v24.8h, v21.8b, v30.8b
+ umlsl v24.8h, v19.8b, v31.8b
+ umlsl v24.8h, v22.8b, v31.8b
+
+ ld1 {v18.2s, v19.2s}, [x6], x2 // horz row 7, col 0
+ uaddl v0.8h, v6.8b, v16.8b
+ umlal v0.8h, v10.8b, v30.8b
+ umlal v0.8h, v12.8b, v30.8b
+ umlsl v0.8h, v8.8b, v31.8b
+ umlsl v0.8h, v14.8b, v31.8b
+
+ sqrshrun v29.8b, v24.8h, #5
+
+ ext v23.8b, v18.8b , v19.8b , #5
+ ext v20.8b, v18.8b , v19.8b , #2
+ ext v21.8b, v18.8b , v19.8b , #3
+ urhadd v28.16b, v28.16b , v26.16b
+ urhadd v29.16b, v29.16b , v27.16b
+ ext v22.8b, v18.8b , v19.8b , #4
+ ext v19.8b, v18.8b , v19.8b , #1
+ sqrshrun v26.8b, v0.8h, #5
+ st1 {v28.2s, v29.2s}, [x1], x3 // store row 6
+
+ uaddl v24.8h, v18.8b, v23.8b
+ umlal v24.8h, v20.8b, v30.8b
+ umlal v24.8h, v21.8b, v30.8b
+ umlsl v24.8h, v19.8b, v31.8b
+ umlsl v24.8h, v22.8b, v31.8b
+
+ ld1 {v18.2s, v19.2s}, [x11], x2 // horz row 7, col 1
+
+ uaddl v0.8h, v7.8b, v17.8b
+ umlal v0.8h, v11.8b, v30.8b
+ umlal v0.8h, v13.8b, v30.8b
+ umlsl v0.8h, v9.8b, v31.8b
+ umlsl v0.8h, v15.8b, v31.8b
+
+ sqrshrun v28.8b, v24.8h, #5
+
+ ext v23.8b, v18.8b , v19.8b , #5
+ ext v20.8b, v18.8b , v19.8b , #2
+ ext v21.8b, v18.8b , v19.8b , #3
+ ext v22.8b, v18.8b , v19.8b , #4
+ ext v19.8b, v18.8b , v19.8b , #1
+
+ sqrshrun v27.8b, v0.8h, #5
+
+ uaddl v24.8h, v18.8b, v23.8b
+ umlal v24.8h, v20.8b, v30.8b
+ umlal v24.8h, v21.8b, v30.8b
+ umlsl v24.8h, v19.8b, v31.8b
+ umlsl v24.8h, v22.8b, v31.8b
+
+ mov v0.16b, v8.16b
+ mov v1.16b, v9.16b
+
+ mov v2.16b, v10.16b
+ mov v3.16b, v11.16b
+
+ mov v4.16b, v12.16b
+ mov v5.16b, v13.16b
+
+ mov v6.16b, v14.16b
+ mov v7.16b, v15.16b
+
+ mov v8.16b, v16.16b
+ mov v9.16b, v17.16b
+
+ sqrshrun v29.8b, v24.8h, #5
+ subs x4, x4, #8
+ urhadd v28.16b, v28.16b , v26.16b
+ urhadd v29.16b, v29.16b , v27.16b
+ st1 {v28.2s, v29.2s}, [x1], x3 // store row 7
+
+ beq end_func // stop looping if ht == 8
+ b loop_16
+
+
+loop_8_start:
+ ld1 {v0.2s}, [x7], x2 // Vector load from src[0_0]
+ ld1 {v1.2s}, [x7], x2 // Vector load from src[1_0]
+ ld1 {v2.2s}, [x7], x2 // Vector load from src[2_0]
+ ld1 {v3.2s}, [x7], x2 // Vector load from src[3_0]
+ ld1 {v4.2s}, [x7], x2 // Vector load from src[4_0]
+
+loop_8:
+ ld1 {v5.2s}, [x7], x2 // Vector load from src[5_0]
+ uaddl v10.8h, v0.8b, v5.8b
+ umlal v10.8h, v2.8b, v30.8b
+ umlal v10.8h, v3.8b, v30.8b
+ umlsl v10.8h, v1.8b, v31.8b
+ umlsl v10.8h, v4.8b, v31.8b
+ ld1 {v12.2s, v13.2s}, [x6], x2 //horz row 0
+ ext v17.8b, v12.8b , v13.8b , #5
+ ext v14.8b, v12.8b , v13.8b , #2
+ ext v15.8b, v12.8b , v13.8b , #3
+ ext v16.8b, v12.8b , v13.8b , #4
+ ext v13.8b, v12.8b , v13.8b , #1
+ sqrshrun v26.8b, v10.8h, #5
+ ld1 {v6.2s}, [x7], x2 // src[6_0]
+ uaddl v10.8h, v12.8b, v17.8b
+ umlal v10.8h, v14.8b, v30.8b
+ umlal v10.8h, v15.8b, v30.8b
+ umlsl v10.8h, v13.8b, v31.8b
+ umlsl v10.8h, v16.8b, v31.8b
+ ld1 {v12.2s, v13.2s}, [x6], x2 // horz row 1
+ uaddl v18.8h, v1.8b, v6.8b
+ umlal v18.8h, v3.8b, v30.8b
+ umlal v18.8h, v4.8b, v30.8b
+ umlsl v18.8h, v2.8b, v31.8b
+ umlsl v18.8h, v5.8b, v31.8b
+ sqrshrun v28.8b, v10.8h, #5
+ ext v17.8b, v12.8b , v13.8b , #5
+ ext v14.8b, v12.8b , v13.8b , #2
+ ext v15.8b, v12.8b , v13.8b , #3
+ ext v16.8b, v12.8b , v13.8b , #4
+ ext v13.8b, v12.8b , v13.8b , #1
+ sqrshrun v27.8b, v18.8h, #5
+ ld1 {v7.2s}, [x7], x2 // src[7_0]
+ uaddl v10.8h, v12.8b, v17.8b
+ umlal v10.8h, v14.8b, v30.8b
+ umlal v10.8h, v15.8b, v30.8b
+ umlsl v10.8h, v13.8b, v31.8b
+ umlsl v10.8h, v16.8b, v31.8b
+ ld1 {v12.2s, v13.2s}, [x6], x2 // horz row 2
+ uaddl v18.8h, v2.8b, v7.8b
+ umlal v18.8h, v4.8b, v30.8b
+ umlal v18.8h, v5.8b, v30.8b
+ umlsl v18.8h, v3.8b, v31.8b
+ umlsl v18.8h, v6.8b, v31.8b
+ sqrshrun v29.8b, v10.8h, #5
+ ext v17.8b, v12.8b , v13.8b , #5
+ ext v14.8b, v12.8b , v13.8b , #2
+ ext v15.8b, v12.8b , v13.8b , #3
+ ext v16.8b, v12.8b , v13.8b , #4
+ ext v13.8b, v12.8b , v13.8b , #1
+ urhadd v26.16b, v26.16b , v28.16b
+ urhadd v27.16b, v27.16b , v29.16b
+ sqrshrun v28.8b, v18.8h, #5
+ ld1 {v8.2s}, [x7], x2 // src[8_0]
+ uaddl v10.8h, v12.8b, v17.8b
+ umlal v10.8h, v14.8b, v30.8b
+ umlal v10.8h, v15.8b, v30.8b
+ umlsl v10.8h, v13.8b, v31.8b
+ umlsl v10.8h, v16.8b, v31.8b
+ ld1 {v12.2s, v13.2s}, [x6], x2 // horz row 3
+ uaddl v18.8h, v3.8b, v8.8b
+ umlal v18.8h, v5.8b, v30.8b
+ umlal v18.8h, v6.8b, v30.8b
+ umlsl v18.8h, v4.8b, v31.8b
+ umlsl v18.8h, v7.8b, v31.8b
+ sqrshrun v24.8b, v10.8h, #5
+ ext v17.8b, v12.8b , v13.8b , #5
+ ext v14.8b, v12.8b , v13.8b , #2
+ ext v15.8b, v12.8b , v13.8b , #3
+ ext v16.8b, v12.8b , v13.8b , #4
+ ext v13.8b, v12.8b , v13.8b , #1
+ sqrshrun v29.8b, v18.8h, #5
+ uaddl v10.8h, v12.8b, v17.8b
+ umlal v10.8h, v14.8b, v30.8b
+ umlal v10.8h, v15.8b, v30.8b
+ umlsl v10.8h, v13.8b, v31.8b
+ umlsl v10.8h, v16.8b, v31.8b
+ st1 {v26.2s}, [x1], x3
+
+ mov v0.16b, v4.16b
+ mov v1.16b, v5.16b
+
+ st1 {v27.2s}, [x1], x3
+
+ mov v2.16b, v6.16b
+ mov v3.16b, v7.16b
+
+ mov v4.8b, v8.8b
+
+ sqrshrun v25.8b, v10.8h, #5
+ subs x9, x4, #4
+ urhadd v24.16b, v24.16b , v28.16b
+ urhadd v25.16b, v25.16b , v29.16b
+ st1 {v24.2s}, [x1], x3
+ st1 {v25.2s}, [x1], x3
+ beq end_func // Branch if height==4
+
+ ld1 {v5.2s}, [x7], x2 // Vector load from src[9_0]
+ uaddl v10.8h, v0.8b, v5.8b
+ umlal v10.8h, v2.8b, v30.8b
+ umlal v10.8h, v3.8b, v30.8b
+ umlsl v10.8h, v1.8b, v31.8b
+ umlsl v10.8h, v4.8b, v31.8b
+ ld1 {v12.2s, v13.2s}, [x6], x2 //horz row 4
+ ext v17.8b, v12.8b , v13.8b , #5
+ ext v14.8b, v12.8b , v13.8b , #2
+ ext v15.8b, v12.8b , v13.8b , #3
+ ext v16.8b, v12.8b , v13.8b , #4
+ ext v13.8b, v12.8b , v13.8b , #1
+ sqrshrun v26.8b, v10.8h, #5
+ ld1 {v6.2s}, [x7], x2 // src[10_0]
+ uaddl v10.8h, v12.8b, v17.8b
+ umlal v10.8h, v14.8b, v30.8b
+ umlal v10.8h, v15.8b, v30.8b
+ umlsl v10.8h, v13.8b, v31.8b
+ umlsl v10.8h, v16.8b, v31.8b
+ ld1 {v12.2s, v13.2s}, [x6], x2 // horz row 5
+ uaddl v18.8h, v1.8b, v6.8b
+ umlal v18.8h, v3.8b, v30.8b
+ umlal v18.8h, v4.8b, v30.8b
+ umlsl v18.8h, v2.8b, v31.8b
+ umlsl v18.8h, v5.8b, v31.8b
+ sqrshrun v28.8b, v10.8h, #5
+ ext v17.8b, v12.8b , v13.8b , #5
+ ext v14.8b, v12.8b , v13.8b , #2
+ ext v15.8b, v12.8b , v13.8b , #3
+ ext v16.8b, v12.8b , v13.8b , #4
+ ext v13.8b, v12.8b , v13.8b , #1
+ sqrshrun v27.8b, v18.8h, #5
+ ld1 {v7.2s}, [x7], x2 // src[11_0]
+ uaddl v10.8h, v12.8b, v17.8b
+ umlal v10.8h, v14.8b, v30.8b
+ umlal v10.8h, v15.8b, v30.8b
+ umlsl v10.8h, v13.8b, v31.8b
+ umlsl v10.8h, v16.8b, v31.8b
+ ld1 {v12.2s, v13.2s}, [x6], x2 // horz row 6
+ uaddl v18.8h, v2.8b, v7.8b
+ umlal v18.8h, v4.8b, v30.8b
+ umlal v18.8h, v5.8b, v30.8b
+ umlsl v18.8h, v3.8b, v31.8b
+ umlsl v18.8h, v6.8b, v31.8b
+ sqrshrun v29.8b, v10.8h, #5
+ ext v17.8b, v12.8b , v13.8b , #5
+ ext v14.8b, v12.8b , v13.8b , #2
+ ext v15.8b, v12.8b , v13.8b , #3
+ ext v16.8b, v12.8b , v13.8b , #4
+ ext v13.8b, v12.8b , v13.8b , #1
+ urhadd v26.16b, v26.16b , v28.16b
+ urhadd v27.16b, v27.16b , v29.16b
+ sqrshrun v28.8b, v18.8h, #5
+ ld1 {v8.2s}, [x7], x2 // src[12_0]
+ uaddl v10.8h, v12.8b, v17.8b
+ umlal v10.8h, v14.8b, v30.8b
+ umlal v10.8h, v15.8b, v30.8b
+ umlsl v10.8h, v13.8b, v31.8b
+ umlsl v10.8h, v16.8b, v31.8b
+ ld1 {v12.2s, v13.2s}, [x6], x2 // horz row 7
+ uaddl v18.8h, v3.8b, v8.8b
+ umlal v18.8h, v5.8b, v30.8b
+ umlal v18.8h, v6.8b, v30.8b
+ umlsl v18.8h, v4.8b, v31.8b
+ umlsl v18.8h, v7.8b, v31.8b
+ sqrshrun v24.8b, v10.8h, #5
+ ext v17.8b, v12.8b , v13.8b , #5
+ ext v14.8b, v12.8b , v13.8b , #2
+ ext v15.8b, v12.8b , v13.8b , #3
+ ext v16.8b, v12.8b , v13.8b , #4
+ ext v13.8b, v12.8b , v13.8b , #1
+ sqrshrun v29.8b, v18.8h, #5
+ uaddl v10.8h, v12.8b, v17.8b
+ umlal v10.8h, v14.8b, v30.8b
+ umlal v10.8h, v15.8b, v30.8b
+ umlsl v10.8h, v13.8b, v31.8b
+ umlsl v10.8h, v16.8b, v31.8b
+ st1 {v26.2s}, [x1], x3
+
+ mov v0.16b, v4.16b
+ mov v1.16b, v5.16b
+ st1 {v27.2s}, [x1], x3
+
+ mov v2.16b, v6.16b
+ mov v3.16b, v7.16b
+
+ mov v4.8b, v8.8b
+ mov v5.8b, v9.8b
+
+ sqrshrun v25.8b, v10.8h, #5
+ subs x4, x4, #8
+ urhadd v24.16b, v24.16b , v28.16b
+ urhadd v25.16b, v25.16b , v29.16b
+ st1 {v24.2s}, [x1], x3
+ st1 {v25.2s}, [x1], x3
+ bgt loop_8 //if height =8 loop
+ b end_func
+
+loop_4_start:
+ ld1 {v0.s}[0], [x7], x2 // Vector load from src[0_0]
+ ld1 {v1.s}[0], [x7], x2 // Vector load from src[1_0]
+
+ ld1 {v2.s}[0], [x7], x2 // Vector load from src[2_0]
+ ld1 {v3.s}[0], [x7], x2 // Vector load from src[3_0]
+ ld1 {v4.s}[0], [x7], x2 // Vector load from src[4_0]
+
+ ld1 {v5.s}[0], [x7], x2 // Vector load from src[5_0]
+ uaddl v10.8h, v0.8b, v5.8b
+ umlal v10.8h, v2.8b, v30.8b
+ umlal v10.8h, v3.8b, v30.8b
+ umlsl v10.8h, v1.8b, v31.8b
+ umlsl v10.8h, v4.8b, v31.8b
+ ld1 {v12.2s, v13.2s}, [x6], x2 //load for horz filter row 0
+ ext v17.8b, v12.8b , v13.8b , #5
+ ext v14.8b, v12.8b , v13.8b , #2
+ ext v15.8b, v12.8b , v13.8b , #3
+ ext v16.8b, v12.8b , v13.8b , #4
+ ext v13.8b, v12.8b , v13.8b , #1
+ sqrshrun v26.8b, v10.8h, #5
+ ld1 {v6.s}[0], [x7], x2 // Vector load from src[6_0]
+ uaddl v10.8h, v12.8b, v17.8b
+ umlal v10.8h, v14.8b, v30.8b
+ umlal v10.8h, v15.8b, v30.8b
+ umlsl v10.8h, v13.8b, v31.8b
+ umlsl v10.8h, v16.8b, v31.8b
+ ld1 {v12.2s, v13.2s}, [x6], x2 //horz row 1
+ uaddl v18.8h, v1.8b, v6.8b
+ umlal v18.8h, v3.8b, v30.8b
+ umlal v18.8h, v4.8b, v30.8b
+ umlsl v18.8h, v2.8b, v31.8b
+ umlsl v18.8h, v5.8b, v31.8b
+ sqrshrun v28.8b, v10.8h, #5
+ ext v17.8b, v12.8b , v13.8b , #5
+ ext v14.8b, v12.8b , v13.8b , #2
+ ext v15.8b, v12.8b , v13.8b , #3
+ ext v16.8b, v12.8b , v13.8b , #4
+ ext v13.8b, v12.8b , v13.8b , #1
+ sqrshrun v27.8b, v18.8h, #5
+ ld1 {v7.s}[0], [x7], x2 // Vector load from src[7_0]
+ uaddl v10.8h, v12.8b, v17.8b
+ umlal v10.8h, v14.8b, v30.8b
+ umlal v10.8h, v15.8b, v30.8b
+ umlsl v10.8h, v13.8b, v31.8b
+ umlsl v10.8h, v16.8b, v31.8b
+ ld1 {v12.2s, v13.2s}, [x6], x2 //horz row 2
+ uaddl v18.8h, v2.8b, v7.8b
+ umlal v18.8h, v4.8b, v30.8b
+ umlal v18.8h, v5.8b, v30.8b
+ umlsl v18.8h, v3.8b, v31.8b
+ umlsl v18.8h, v6.8b, v31.8b
+ sqrshrun v29.8b, v10.8h, #5
+ ext v17.8b, v12.8b , v13.8b , #5
+ ext v14.8b, v12.8b , v13.8b , #2
+ ext v15.8b, v12.8b , v13.8b , #3
+ ext v16.8b, v12.8b , v13.8b , #4
+ ext v13.8b, v12.8b , v13.8b , #1
+ urhadd v26.16b, v26.16b , v28.16b
+ urhadd v27.16b, v27.16b , v29.16b
+ sqrshrun v28.8b, v18.8h, #5
+ ld1 {v8.s}[0], [x7], x2 // Vector load from src[8_0]
+ uaddl v10.8h, v12.8b, v17.8b
+ umlal v10.8h, v14.8b, v30.8b
+ umlal v10.8h, v15.8b, v30.8b
+ umlsl v10.8h, v13.8b, v31.8b
+ umlsl v10.8h, v16.8b, v31.8b
+ ld1 {v12.2s, v13.2s}, [x6], x2 //horz row 3
+ uaddl v18.8h, v3.8b, v8.8b
+ umlal v18.8h, v5.8b, v30.8b
+ umlal v18.8h, v6.8b, v30.8b
+ umlsl v18.8h, v4.8b, v31.8b
+ umlsl v18.8h, v7.8b, v31.8b
+ sqrshrun v24.8b, v10.8h, #5
+ ext v17.8b, v12.8b , v13.8b , #5
+ ext v14.8b, v12.8b , v13.8b , #2
+ ext v15.8b, v12.8b , v13.8b , #3
+ ext v16.8b, v12.8b , v13.8b , #4
+ ext v13.8b, v12.8b , v13.8b , #1
+ sqrshrun v29.8b, v18.8h, #5
+ uaddl v10.8h, v12.8b, v17.8b
+ umlal v10.8h, v14.8b, v30.8b
+ umlal v10.8h, v15.8b, v30.8b
+ umlsl v10.8h, v13.8b, v31.8b
+ umlsl v10.8h, v16.8b, v31.8b
+ st1 {v26.s}[0], [x1], x3
+
+ mov v0.16b, v4.16b
+ mov v1.16b, v5.16b
+
+ st1 {v27.s}[0], [x1], x3
+
+ mov v2.16b, v6.16b
+ mov v3.16b, v7.16b
+ mov v4.8b, v8.8b
+
+ sqrshrun v25.8b, v10.8h, #5
+ subs x4, x4, #4
+ urhadd v24.16b, v24.16b , v28.16b
+ urhadd v25.16b, v25.16b , v29.16b
+ st1 {v24.s}[0], [x1], x3
+ st1 {v25.s}[0], [x1], x3
+ beq end_func // Branch if height==4
+
+ ld1 {v5.s}[0], [x7], x2 // Vector load from src[5_0]
+ uaddl v10.8h, v0.8b, v5.8b
+ umlal v10.8h, v2.8b, v30.8b
+ umlal v10.8h, v3.8b, v30.8b
+ umlsl v10.8h, v1.8b, v31.8b
+ umlsl v10.8h, v4.8b, v31.8b
+ ld1 {v12.2s, v13.2s}, [x6], x2 //load for horz filter row 4
+ ext v17.8b, v12.8b , v13.8b , #5
+ ext v14.8b, v12.8b , v13.8b , #2
+ ext v15.8b, v12.8b , v13.8b , #3
+ ext v16.8b, v12.8b , v13.8b , #4
+ ext v13.8b, v12.8b , v13.8b , #1
+ sqrshrun v26.8b, v10.8h, #5
+ ld1 {v6.s}[0], [x7], x2
+ uaddl v10.8h, v12.8b, v17.8b
+ umlal v10.8h, v14.8b, v30.8b
+ umlal v10.8h, v15.8b, v30.8b
+ umlsl v10.8h, v13.8b, v31.8b
+ umlsl v10.8h, v16.8b, v31.8b
+ ld1 {v12.2s, v13.2s}, [x6], x2 //horz row 5
+ uaddl v18.8h, v1.8b, v6.8b
+ umlal v18.8h, v3.8b, v30.8b
+ umlal v18.8h, v4.8b, v30.8b
+ umlsl v18.8h, v2.8b, v31.8b
+ umlsl v18.8h, v5.8b, v31.8b
+ sqrshrun v28.8b, v10.8h, #5
+ ext v17.8b, v12.8b , v13.8b , #5
+ ext v14.8b, v12.8b , v13.8b , #2
+ ext v15.8b, v12.8b , v13.8b , #3
+ ext v16.8b, v12.8b , v13.8b , #4
+ ext v13.8b, v12.8b , v13.8b , #1
+ sqrshrun v27.8b, v18.8h, #5
+ ld1 {v7.s}[0], [x7], x2
+ uaddl v10.8h, v12.8b, v17.8b
+ umlal v10.8h, v14.8b, v30.8b
+ umlal v10.8h, v15.8b, v30.8b
+ umlsl v10.8h, v13.8b, v31.8b
+ umlsl v10.8h, v16.8b, v31.8b
+ ld1 {v12.2s, v13.2s}, [x6], x2 //horz row 6
+ uaddl v18.8h, v2.8b, v7.8b
+ umlal v18.8h, v4.8b, v30.8b
+ umlal v18.8h, v5.8b, v30.8b
+ umlsl v18.8h, v3.8b, v31.8b
+ umlsl v18.8h, v6.8b, v31.8b
+ sqrshrun v29.8b, v10.8h, #5
+ ext v17.8b, v12.8b , v13.8b , #5
+ ext v14.8b, v12.8b , v13.8b , #2
+ ext v15.8b, v12.8b , v13.8b , #3
+ ext v16.8b, v12.8b , v13.8b , #4
+ ext v13.8b, v12.8b , v13.8b , #1
+ urhadd v26.16b, v26.16b , v28.16b
+ urhadd v27.16b, v27.16b , v29.16b
+ sqrshrun v28.8b, v18.8h, #5
+ ld1 {v8.s}[0], [x7], x2
+ uaddl v10.8h, v12.8b, v17.8b
+ umlal v10.8h, v14.8b, v30.8b
+ umlal v10.8h, v15.8b, v30.8b
+ umlsl v10.8h, v13.8b, v31.8b
+ umlsl v10.8h, v16.8b, v31.8b
+ ld1 {v12.2s, v13.2s}, [x6], x2 //horz row 7
+ uaddl v18.8h, v3.8b, v8.8b
+ umlal v18.8h, v5.8b, v30.8b
+ umlal v18.8h, v6.8b, v30.8b
+ umlsl v18.8h, v4.8b, v31.8b
+ umlsl v18.8h, v7.8b, v31.8b
+ sqrshrun v24.8b, v10.8h, #5
+ ext v17.8b, v12.8b , v13.8b , #5
+ ext v14.8b, v12.8b , v13.8b , #2
+ ext v15.8b, v12.8b , v13.8b , #3
+ ext v16.8b, v12.8b , v13.8b , #4
+ ext v13.8b, v12.8b , v13.8b , #1
+ sqrshrun v29.8b, v18.8h, #5
+ uaddl v10.8h, v12.8b, v17.8b
+ umlal v10.8h, v14.8b, v30.8b
+ umlal v10.8h, v15.8b, v30.8b
+ umlsl v10.8h, v13.8b, v31.8b
+ umlsl v10.8h, v16.8b, v31.8b
+ st1 {v26.s}[0], [x1], x3
+ st1 {v27.s}[0], [x1], x3
+ sqrshrun v25.8b, v10.8h, #5
+ urhadd v24.16b, v24.16b , v28.16b
+ urhadd v25.16b, v25.16b , v29.16b
+ st1 {v24.s}[0], [x1], x3
+ st1 {v25.s}[0], [x1], x3
+
+end_func:
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
diff --git a/common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s b/common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s
new file mode 100755
index 0000000..9d19a2d
--- /dev/null
+++ b/common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s
@@ -0,0 +1,511 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+//******************************************************************************
+//* @file
+//* ih264_inter_pred_luma_vert_qpel_av8.s
+//*
+//* @brief
+//* Contains function definitions for inter prediction vertical quarter pel interpolation.
+//*
+//* @author
+//* Mohit
+//*
+//* @par List of Functions:
+//*
+//* - ih264_inter_pred_luma_vert_qpel_av8()
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+///* All the functions here are replicated from ih264_inter_pred_filters.c
+//
+
+///**
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* Quarter pel interprediction luma filter for vertical input
+//*
+//* @par Description:
+//* Applies a 6 tap horizontal filter .The output is clipped to 8 bits
+//* sec 8.4.2.2.1 titled "Luma sample interpolation process"
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ht
+//* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+//* @param[in] pu1_tmp: temporary buffer: UNUSED in this function
+//*
+//* @param[in] dydx: x and y reference offset for qpel calculations.
+//* @returns
+//*
+// @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+//void ih264_inter_pred_luma_vert (
+// UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ht,
+// WORD32 wd,
+// UWORD8* pu1_tmp,
+// UWORD32 dydx)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ht
+// x5 => wd
+// x7 => dydx
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+
+
+ .global ih264_inter_pred_luma_vert_qpel_av8
+
+ih264_inter_pred_luma_vert_qpel_av8:
+
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+
+ and x7, x7, #12 //Finds y-offset
+ lsr x7, x7, #3 //dydx>>3
+ mul x7, x2, x7
+ add x7, x0, x7 //pu1_src + (y_offset>>1)*src_strd
+ sub x14, x4, #16
+ movi v22.8h, #20 // Filter coeff 0x14 into Q11
+ sub x0, x0, x2, lsl #1 //pu1_src-2*src_strd
+ subs x12, x5, #8 //if wd=8 branch to loop_8
+ movi v24.8h, #5 // Filter coeff 0x4 into Q12
+ beq loop_8_start
+
+ subs x12, x5, #4 //if wd=4 branch to loop_4
+ beq loop_4_start
+
+
+ ld1 {v0.2s, v1.2s}, [x0], x2 // Vector load from src[0_0]
+ ld1 {v2.2s, v3.2s}, [x0], x2 // Vector load from src[1_0]
+ ld1 {v4.2s, v5.2s}, [x0], x2 // Vector load from src[2_0]
+ ld1 {v6.2s, v7.2s}, [x0], x2 // Vector load from src[3_0]
+ add x14, x14, #1 //for checking loop
+ ld1 {v8.2s, v9.2s}, [x0], x2 // Vector load from src[4_0]
+ uaddl v12.8h, v4.8b, v6.8b // temp1 = src[2_0] + src[3_0]
+ ld1 {v10.2s, v11.2s}, [x0], x2 // Vector load from src[5_0]
+
+loop_16: //when wd=16
+
+ uaddl v14.8h, v0.8b, v10.8b // temp = src[0_0] + src[5_0]
+ uaddl v16.8h, v2.8b, v8.8b // temp2 = src[1_0] + src[4_0]
+ mla v14.8h, v12.8h , v22.8h // temp += temp1 * 20
+ uaddl v20.8h, v1.8b, v11.8b // temp4 = src[0_8] + src[5_8]
+ uaddl v18.8h, v5.8b, v7.8b // temp3 = src[2_8] + src[3_8]
+ mla v20.8h, v18.8h , v22.8h // temp4 += temp3 * 20
+ ld1 {v0.2s, v1.2s}, [x0], x2
+ uaddl v26.8h, v3.8b, v9.8b // temp5 = src[1_8] + src[4_8]
+ uaddl v12.8h, v6.8b, v8.8b
+ mls v14.8h, v16.8h , v24.8h // temp -= temp2 * 5
+ uaddl v16.8h, v2.8b, v0.8b
+ uaddl v18.8h, v4.8b, v10.8b
+ mla v16.8h, v12.8h , v22.8h
+ mls v20.8h, v26.8h , v24.8h // temp4 -= temp5 * 5
+ uaddl v26.8h, v5.8b, v11.8b
+ uaddl v12.8h, v7.8b, v9.8b
+ sqrshrun v30.8b, v14.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5)
+ uaddl v14.8h, v3.8b, v1.8b
+ ld1 {v2.2s, v3.2s}, [x0], x2
+ mla v14.8h, v12.8h , v22.8h
+ mls v16.8h, v18.8h , v24.8h
+ sqrshrun v31.8b, v20.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5)
+ ld1 {v20.2s, v21.2s}, [x7], x2 // Load for interpolation row 0
+ urhadd v30.16b, v20.16b , v30.16b // Interpolation to obtain qpel value
+ urhadd v31.16b, v21.16b , v31.16b // Interpolation to obtain qpel value
+ uaddl v18.8h, v4.8b, v2.8b
+ uaddl v12.8h, v8.8b, v10.8b
+ st1 {v30.2s, v31.2s}, [x1], x3 // Vector store to dst[0_0]
+ mla v18.8h, v12.8h , v22.8h
+ uaddl v20.8h, v6.8b, v0.8b
+ mls v14.8h, v26.8h , v24.8h
+ sqrshrun v30.8b, v16.8h, #5
+ uaddl v12.8h, v9.8b, v11.8b
+ uaddl v16.8h, v5.8b, v3.8b
+ uaddl v26.8h, v7.8b, v1.8b
+ mla v16.8h, v12.8h , v22.8h
+ mls v18.8h, v20.8h , v24.8h
+ ld1 {v4.2s, v5.2s}, [x0], x2
+ sqrshrun v31.8b, v14.8h, #5
+ ld1 {v14.2s, v15.2s}, [x7], x2 // Load for interpolation row 1
+ uaddl v12.8h, v10.8b, v0.8b
+ urhadd v30.16b, v14.16b , v30.16b // Interpolation to obtain qpel value
+ urhadd v31.16b, v15.16b , v31.16b // Interpolation to obtain qpel value
+ uaddl v14.8h, v6.8b, v4.8b
+ uaddl v20.8h, v8.8b, v2.8b
+ mla v14.8h, v12.8h , v22.8h
+ mls v16.8h, v26.8h , v24.8h
+ st1 {v30.2s, v31.2s}, [x1], x3 //store row 1
+ sqrshrun v30.8b, v18.8h, #5
+ uaddl v18.8h, v7.8b, v5.8b
+ uaddl v12.8h, v11.8b, v1.8b
+ mla v18.8h, v12.8h , v22.8h
+ uaddl v26.8h, v9.8b, v3.8b
+ mls v14.8h, v20.8h , v24.8h
+ ld1 {v6.2s, v7.2s}, [x0], x2
+ sqrshrun v31.8b, v16.8h, #5
+ ld1 {v16.2s, v17.2s}, [x7], x2 // Load for interpolation row 2
+ mls v18.8h, v26.8h , v24.8h
+ urhadd v30.16b, v16.16b , v30.16b // Interpolation to obtain qpel value
+ urhadd v31.16b, v17.16b , v31.16b // Interpolation to obtain qpel value
+ uaddl v12.8h, v0.8b, v2.8b // temp1 = src[2_0] + src[3_0]
+ st1 {v30.2s, v31.2s}, [x1], x3 //store row 2
+ uaddl v16.8h, v10.8b, v4.8b // temp2 = src[1_0] + src[4_0]
+ uaddl v20.8h, v9.8b, v7.8b // temp4 = src[0_8] + src[5_8]
+ sqrshrun v30.8b, v14.8h, #5
+ uaddl v26.8h, v5.8b, v11.8b // temp5 = src[1_8] + src[4_8]
+ uaddl v14.8h, v8.8b, v6.8b // temp = src[0_0] + src[5_0]
+ sqrshrun v31.8b, v18.8h, #5
+ ld1 {v18.2s, v19.2s}, [x7], x2 // Load for interpolation row 3
+ mla v14.8h, v12.8h , v22.8h // temp += temp1 * 20
+ urhadd v30.16b, v18.16b , v30.16b // Interpolation to obtain qpel value
+ urhadd v31.16b, v19.16b , v31.16b // Interpolation to obtain qpel value
+ uaddl v18.8h, v1.8b, v3.8b // temp3 = src[2_8] + src[3_8]
+ st1 {v30.2s, v31.2s}, [x1], x3 //store row 3
+ // 4 rows processed
+ mla v20.8h, v18.8h , v22.8h // temp4 += temp3 * 20
+ ld1 {v8.2s, v9.2s}, [x0], x2
+ uaddl v12.8h, v2.8b, v4.8b
+ uaddl v18.8h, v3.8b, v5.8b
+ mls v14.8h, v16.8h , v24.8h // temp -= temp2 * 5
+ uaddl v28.8h, v9.8b, v11.8b
+ uaddl v16.8h, v6.8b, v0.8b
+ mla v28.8h, v18.8h , v22.8h // temp4 += temp3 * 20
+ mls v20.8h, v26.8h , v24.8h // temp4 -= temp5 * 5
+ uaddl v26.8h, v1.8b, v7.8b
+ uaddl v18.8h, v5.8b, v7.8b
+ sqrshrun v30.8b, v14.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5)
+ uaddl v14.8h, v8.8b, v10.8b
+ sqrshrun v31.8b, v20.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5)
+ ld1 {v20.2s, v21.2s}, [x7], x2 // Load for interpolation row 4
+ ld1 {v10.2s, v11.2s}, [x0], x2
+ urhadd v30.16b, v20.16b , v30.16b // Interpolation to obtain qpel value
+ urhadd v31.16b, v21.16b , v31.16b // Interpolation to obtain qpel value
+ mls v28.8h, v26.8h , v24.8h // temp4 -= temp5 * 5
+ st1 {v30.2s, v31.2s}, [x1], x3 // store row 4
+ mla v14.8h, v12.8h , v22.8h // temp += temp1 * 20
+ uaddl v20.8h, v11.8b, v1.8b
+ uaddl v26.8h, v3.8b, v9.8b
+ mla v20.8h, v18.8h , v22.8h // temp4 += temp3 * 20
+ uaddl v12.8h, v6.8b, v4.8b
+ uaddl v18.8h, v7.8b, v9.8b
+ sqrshrun v31.8b, v28.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5)
+ mls v14.8h, v16.8h , v24.8h // temp -= temp2 * 5
+ uaddl v16.8h, v8.8b, v2.8b
+ sqrshrun v30.8b, v14.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5)
+ ld1 {v14.2s, v15.2s}, [x7], x2 // Load for interpolation row 5
+ mls v20.8h, v26.8h , v24.8h // temp4 -= temp5 * 5
+ urhadd v30.16b, v14.16b , v30.16b // Interpolation to obtain qpel value
+ urhadd v31.16b, v15.16b , v31.16b // Interpolation to obtain qpel value
+ uaddl v14.8h, v10.8b, v0.8b
+ st1 {v30.2s, v31.2s}, [x1], x3 // store row 5
+ mla v14.8h, v12.8h , v22.8h // temp += temp1 * 20
+ ld1 {v0.2s, v1.2s}, [x0], x2
+ uaddl v26.8h, v5.8b, v11.8b
+ uaddl v12.8h, v8.8b, v6.8b
+ uaddl v28.8h, v0.8b, v2.8b
+ sqrshrun v31.8b, v20.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5)
+ mla v28.8h, v12.8h , v22.8h // temp += temp1 * 20
+ uaddl v20.8h, v1.8b, v3.8b
+ mls v14.8h, v16.8h , v24.8h // temp -= temp2 * 5
+ mla v20.8h, v18.8h , v22.8h // temp4 += temp3 * 20
+ uaddl v16.8h, v10.8b, v4.8b
+ sqrshrun v30.8b, v14.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5)
+ ld1 {v14.2s, v15.2s}, [x7], x2 // Load for interpolation row 6
+ mov v2.8b, v6.8b
+ mov v3.8b, v7.8b
+ urhadd v30.16b, v14.16b , v30.16b // Interpolation to obtain qpel value
+ urhadd v31.16b, v15.16b , v31.16b // Interpolation to obtain qpel value
+
+ mls v28.8h, v16.8h , v24.8h // temp -= temp2 * 5
+ st1 {v30.2s, v31.2s}, [x1], x3 // store row 6
+ sqrshrun v30.8b, v28.8h, #5 // dst[0_0] = CLIP_U8((temp +16) >> 5)
+ swp v0.8b, v4.8b // swapping registers to put it in order
+ swp v1.8b, v5.8b // swapping registers to put it in order
+
+ mls v20.8h, v26.8h , v24.8h // temp4 -= temp5 * 5
+ mov v6.8b, v10.8b
+ mov v7.8b, v11.8b
+ subs x12, x14, #1 // if height==16 - looping
+ swp v4.8b, v8.8b
+ swp v5.8b, v9.8b
+ sqrshrun v31.8b, v20.8h, #5 // dst[0_8] = CLIP_U8((temp4 +16) >> 5)
+ ld1 {v20.2s, v21.2s}, [x7], x2 // Load for interpolation row 7
+ urhadd v30.16b, v20.16b , v30.16b // Interpolation to obtain qpel value
+ urhadd v31.16b, v21.16b , v31.16b // Interpolation to obtain qpel value
+ st1 {v30.2s, v31.2s}, [x1], x3 // store row 7
+ bne end_func //if height =8 end function
+ add x14, x14, #1 //for checking loop
+ ld1 {v10.2s, v11.2s}, [x0], x2
+ uaddl v12.8h, v4.8b, v6.8b // temp1 = src[2_0] + src[3_0]
+
+ b loop_16 // looping if height =16
+
+loop_8_start:
+//// Processing row0 and row1
+
+ ld1 {v0.2s}, [x0], x2 // Vector load from src[0_0]
+ ld1 {v1.2s}, [x0], x2 // Vector load from src[1_0]
+ ld1 {v2.2s}, [x0], x2 // Vector load from src[2_0]
+ ld1 {v3.2s}, [x0], x2 // Vector load from src[3_0]
+ add x14, x14, #1 //for checking loop
+ ld1 {v4.2s}, [x0], x2 // Vector load from src[4_0]
+ ld1 {v5.2s}, [x0], x2 // Vector load from src[5_0]
+
+loop_8:
+ //for checking loop
+ uaddl v6.8h, v2.8b, v3.8b // temp1 = src[2_0] + src[3_0]
+ uaddl v8.8h, v0.8b, v5.8b // temp = src[0_0] + src[5_0]
+ uaddl v10.8h, v1.8b, v4.8b // temp2 = src[1_0] + src[4_0]
+ mla v8.8h, v6.8h , v22.8h // temp += temp1 * 20
+ ld1 {v6.2s}, [x0], x2
+ uaddl v14.8h, v3.8b, v4.8b
+ uaddl v16.8h, v1.8b, v6.8b
+ uaddl v18.8h, v2.8b, v5.8b
+ mls v8.8h, v10.8h , v24.8h // temp -= temp2 * 5
+ mla v16.8h, v14.8h , v22.8h
+ ld1 {v7.2s}, [x0], x2
+ uaddl v20.8h, v4.8b, v5.8b
+ uaddl v12.8h, v2.8b, v7.8b
+ uaddl v10.8h, v3.8b, v6.8b
+ mls v16.8h, v18.8h , v24.8h
+ sqrshrun v26.8b, v8.8h, #5 // dst[0_0] = CLIP_U8( (temp + 16) >> 5)
+ mla v12.8h, v20.8h , v22.8h
+ ld1 {v8.2s}, [x7], x2 //Load value for interpolation (row0)
+ ld1 {v9.2s}, [x7], x2 //Load value for interpolation (row1)
+ ld1 {v0.2s}, [x0], x2
+ uaddl v14.8h, v5.8b, v6.8b
+ sqrshrun v27.8b, v16.8h, #5
+ urhadd v26.16b, v8.16b , v26.16b // Interpolation step for qpel calculation
+ urhadd v27.16b, v9.16b , v27.16b // Interpolation step for qpel calculation
+
+ uaddl v20.8h, v3.8b, v0.8b
+ mls v12.8h, v10.8h , v24.8h
+ st1 {v26.2s}, [x1], x3 // Vector store to dst[0_0]
+ uaddl v18.8h, v4.8b, v7.8b
+ mla v20.8h, v14.8h , v22.8h
+ st1 {v27.2s}, [x1], x3 // Vector store to dst[1_0]
+ sqrshrun v28.8b, v12.8h, #5
+ mls v20.8h, v18.8h , v24.8h
+ ld1 {v12.2s}, [x7], x2 //Load value for interpolation (row2)
+ ld1 {v13.2s}, [x7], x2 //Load value for interpolation (row3)
+ ld1 {v1.2s}, [x0], x2
+ sqrshrun v29.8b, v20.8h, #5
+ subs x9, x4, #4
+ urhadd v28.16b, v12.16b , v28.16b
+ urhadd v29.16b, v13.16b , v29.16b
+ st1 {v28.2s}, [x1], x3 //store row 2
+ st1 {v29.2s}, [x1], x3 //store row 3
+ beq end_func // Branch if height==4
+ uaddl v14.8h, v6.8b, v7.8b // temp1 = src[2_0] + src[3_0]
+ uaddl v16.8h, v0.8b, v5.8b // temp = src[0_0] + src[5_0]
+ uaddl v18.8h, v1.8b, v4.8b // temp2 = src[1_0] + src[4_0]
+ mla v18.8h, v14.8h , v22.8h // temp += temp1 * 20
+ ld1 {v2.2s}, [x0], x2
+ mls v18.8h, v16.8h , v24.8h // temp -= temp2 * 5
+ uaddl v8.8h, v0.8b, v7.8b
+ uaddl v10.8h, v1.8b, v6.8b
+ uaddl v12.8h, v2.8b, v5.8b
+ sqrshrun v26.8b, v18.8h, #5
+ mla v12.8h, v8.8h , v22.8h
+ ld1 {v18.2s}, [x7], x2 //Load value for interpolation (row4)
+ ld1 {v19.2s}, [x7], x2 //Load value for interpolation (row5)
+ ld1 {v3.2s}, [x0], x2
+ mls v12.8h, v10.8h , v24.8h
+ sqrshrun v27.8b, v12.8h, #5
+ urhadd v26.16b, v18.16b , v26.16b // Interpolation step for qpel calculation
+ urhadd v27.16b, v19.16b , v27.16b // Interpolation step for qpel calculation
+
+ st1 {v26.2s}, [x1], x3 // store row 4
+ st1 {v27.2s}, [x1], x3 // store row 5
+ uaddl v14.8h, v0.8b, v1.8b // temp1 = src[2_0] + src[3_0]
+ uaddl v16.8h, v2.8b, v7.8b // temp = src[0_0] + src[5_0]
+ uaddl v18.8h, v3.8b, v6.8b // temp2 = src[1_0] + src[4_0]
+ mla v18.8h, v14.8h , v22.8h // temp += temp1 * 20
+ ld1 {v4.2s}, [x0], x2
+ mls v18.8h, v16.8h , v24.8h // temp -= temp2 * 5
+ uaddl v8.8h, v2.8b, v1.8b
+ uaddl v10.8h, v3.8b, v0.8b
+ uaddl v12.8h, v4.8b, v7.8b
+ sqrshrun v26.8b, v18.8h, #5
+ mla v12.8h, v8.8h , v22.8h
+ ld1 {v18.2s}, [x7], x2 //Load value for interpolation (row6)
+ ld1 {v19.2s}, [x7], x2 //Load value for interpolation (row7)
+ ld1 {v5.2s}, [x0], x2
+ mls v12.8h, v10.8h , v24.8h
+ sqrshrun v27.8b, v12.8h, #5
+ urhadd v26.16b, v18.16b , v26.16b // Interpolation step for qpel calculation
+ urhadd v27.16b, v19.16b , v27.16b // Interpolation step for qpel calculation
+
+ subs x12, x14, #1
+ st1 {v26.2s}, [x1], x3 // store row 6
+ st1 {v27.2s}, [x1], x3 // store row 7
+ add x14, x14, #1
+ beq loop_8 //looping if height ==16
+
+ b end_func
+
+
+loop_4_start:
+//// Processing row0 and row1
+
+
+ ld1 {v0.s}[0], [x0], x2 // Vector load from src[0_0]
+ ld1 {v1.s}[0], [x0], x2 // Vector load from src[1_0]
+ ld1 {v2.s}[0], [x0], x2 // Vector load from src[2_0]
+ ld1 {v3.s}[0], [x0], x2 // Vector load from src[3_0]
+ ld1 {v4.s}[0], [x0], x2 // Vector load from src[4_0]
+ ld1 {v5.s}[0], [x0], x2 // Vector load from src[5_0]
+
+ uaddl v6.8h, v2.8b, v3.8b // temp1 = src[2_0] + src[3_0]
+ uaddl v8.8h, v0.8b, v5.8b // temp = src[0_0] + src[5_0]
+ uaddl v10.8h, v1.8b, v4.8b // temp2 = src[1_0] + src[4_0]
+ mla v8.8h, v6.8h , v22.8h // temp += temp1 * 20
+ ld1 {v6.2s}, [x0], x2
+ uaddl v14.8h, v3.8b, v4.8b
+ uaddl v16.8h, v1.8b, v6.8b
+ uaddl v18.8h, v2.8b, v5.8b
+ mls v8.8h, v10.8h , v24.8h // temp -= temp2 * 5
+ ld1 {v7.s}[0], [x0], x2
+ mla v16.8h, v14.8h , v22.8h
+ uaddl v20.8h, v4.8b, v5.8b
+ uaddl v12.8h, v2.8b, v7.8b
+ uaddl v10.8h, v3.8b, v6.8b
+ mls v16.8h, v18.8h , v24.8h
+ sqrshrun v26.8b, v8.8h, #5 // dst[0_0] = CLIP_U8( (temp + 16) >> 5)
+ ld1 {v8.s}[0], [x7], x2 //Load value for interpolation - row 0
+ ld1 {v9.s}[0], [x7], x2 //Load value for interpolation - row 1
+ mla v12.8h, v20.8h , v22.8h
+ ld1 {v0.s}[0], [x0], x2
+ uaddl v14.8h, v5.8b, v6.8b
+ sqrshrun v27.8b, v16.8h, #5
+ uaddl v20.8h, v3.8b, v0.8b
+ urhadd v26.16b, v26.16b , v8.16b //Interpolation step for qpel calculation
+ urhadd v27.16b, v27.16b , v9.16b //Interpolation step for qpel calculation
+
+ mls v12.8h, v10.8h , v24.8h
+ st1 {v26.s}[0], [x1], x3 // Vector store to dst[0_0]
+ uaddl v18.8h, v4.8b, v7.8b
+ mla v20.8h, v14.8h , v22.8h
+ st1 {v27.s}[0], [x1], x3 // store row 1
+ sqrshrun v28.8b, v12.8h, #5
+ ld1 {v12.s}[0], [x7], x2 //Load value for interpolation - row 2
+ ld1 {v13.s}[0], [x7], x2 //Load value for interpolation - row 3
+
+ mls v20.8h, v18.8h , v24.8h
+ ld1 {v1.s}[0], [x0], x2
+ sqrshrun v29.8b, v20.8h, #5
+ urhadd v28.16b, v12.16b , v28.16b //Interpolation step for qpel calculation
+ urhadd v29.16b, v13.16b , v29.16b //Interpolation step for qpel calculation
+
+ st1 {v28.s}[0], [x1], x3 //store row 2
+ st1 {v29.s}[0], [x1], x3 //store row 3
+
+ subs x9, x4, #4
+ beq end_func // Branch if height==4
+
+
+ uaddl v14.8h, v6.8b, v7.8b // temp1 = src[2_0] + src[3_0]
+ uaddl v16.8h, v0.8b, v5.8b // temp = src[0_0] + src[5_0]
+ uaddl v18.8h, v1.8b, v4.8b // temp2 = src[1_0] + src[4_0]
+ mla v18.8h, v14.8h , v22.8h // temp += temp1 * 20
+ ld1 {v2.s}[0], [x0], x2
+ mls v18.8h, v16.8h , v24.8h // temp -= temp2 * 5
+ uaddl v8.8h, v0.8b, v7.8b
+ uaddl v10.8h, v1.8b, v6.8b
+ uaddl v12.8h, v2.8b, v5.8b
+ sqrshrun v26.8b, v18.8h, #5
+ ld1 {v18.s}[0], [x7], x2 //Load value for interpolation - row 4
+ ld1 {v19.s}[0], [x7], x2 //Load value for interpolation - row 5
+ mla v12.8h, v8.8h , v22.8h
+ ld1 {v3.s}[0], [x0], x2
+ mls v12.8h, v10.8h , v24.8h
+ sqrshrun v27.8b, v12.8h, #5
+ urhadd v26.16b, v18.16b , v26.16b //Interpolation step for qpel calculation
+ urhadd v27.16b, v27.16b , v19.16b //Interpolation step for qpel calculation
+
+ st1 {v26.s}[0], [x1], x3 //store row 4
+ st1 {v27.s}[0], [x1], x3 // store row 5
+ uaddl v14.8h, v0.8b, v1.8b // temp1 = src[2_0] + src[3_0]
+ uaddl v16.8h, v2.8b, v7.8b // temp = src[0_0] + src[5_0]
+ uaddl v18.8h, v3.8b, v6.8b // temp2 = src[1_0] + src[4_0]
+ mla v18.8h, v14.8h , v22.8h // temp += temp1 * 20
+ ld1 {v4.s}[0], [x0], x2
+ mls v18.8h, v16.8h , v24.8h // temp -= temp2 * 5
+ uaddl v8.8h, v2.8b, v1.8b
+ uaddl v10.8h, v3.8b, v0.8b
+ uaddl v12.8h, v4.8b, v7.8b
+ sqrshrun v26.8b, v18.8h, #5
+ ld1 {v18.s}[0], [x7], x2 //Load value for interpolation - row 6
+ ld1 {v19.s}[0], [x7], x2 //Load value for interpolation - row 7
+ mla v12.8h, v8.8h , v22.8h
+ ld1 {v5.s}[0], [x0], x2
+ mls v12.8h, v10.8h , v24.8h
+ sqrshrun v27.8b, v12.8h, #5
+ urhadd v26.16b, v18.16b , v26.16b //Interpolation step for qpel calculation
+ urhadd v27.16b, v19.16b , v27.16b //Interpolation step for qpel calculation
+
+ st1 {v26.s}[0], [x1], x3 // store row 6
+ st1 {v27.s}[0], [x1], x3 // store row 7
+
+
+end_func:
+ // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
diff --git a/common/armv8/ih264_intra_pred_chroma_av8.s b/common/armv8/ih264_intra_pred_chroma_av8.s
new file mode 100755
index 0000000..62edfdc
--- /dev/null
+++ b/common/armv8/ih264_intra_pred_chroma_av8.s
@@ -0,0 +1,574 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+//******************************************************************************
+//* @file
+//* ih264_intra_pred_chroma.s
+//*
+//* @brief
+//* Contains function definitions for intra chroma prediction .
+//*
+//* @author
+//* Ittiam
+//*
+//* @par List of Functions:
+//*
+//* - ih264_intra_pred_luma_chroma_mode_vert_av8()
+//* - ih264_intra_pred_luma_chroma_mode_horz_av8()
+//* - ih264_intra_pred_luma_chroma_mode_dc_av8()
+//* - ih264_intra_pred_luma_chroma_mode_plane_av8()
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+///* All the functions here are replicated from ih264_chroma_intra_pred_filters.c
+//
+
+///**
+///**
+///**
+//
+
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+.extern ih264_gai1_intrapred_chroma_plane_coeffs1
+.extern ih264_gai1_intrapred_chroma_plane_coeffs2
+
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_chroma_8x8_mode_dc
+//*
+//* @brief
+//* Perform Intra prediction for chroma_8x8 mode:DC
+//*
+//* @par Description:
+//* Perform Intra prediction for chroma_8x8 mode:DC ,described in sec 8.3.4.1
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source containing alternate U and V samples
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination with alternate U and V samples
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//** @param[in] ui_neighboravailability
+//* availability of neighbouring pixels
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************/
+//void ih264_intra_pred_chroma_8x8_mode_dc(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+
+
+ .global ih264_intra_pred_chroma_8x8_mode_dc_av8
+
+ih264_intra_pred_chroma_8x8_mode_dc_av8:
+
+
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ mov x19, #5
+ ands x6, x4, x19
+ beq none_available
+ cmp x6, #1
+ beq left_only_available
+ cmp x6, #4
+ beq top_only_available
+
+all_available:
+ ld1 {v0.8b, v1.8b}, [x0]
+ add x6, x0, #18
+ ld1 {v2.8b, v3.8b}, [x6]
+ uxtl v0.8h, v0.8b
+ uxtl v1.8h, v1.8b
+ addp v0.4s, v0.4s , v0.4s
+ addp v1.4s, v1.4s , v1.4s
+ addp v0.4s, v0.4s , v0.4s
+ addp v1.4s, v1.4s , v1.4s
+ uxtl v2.8h, v2.8b
+ uxtl v3.8h, v3.8b
+ addp v2.4s, v2.4s , v2.4s
+ addp v3.4s, v3.4s , v3.4s
+ addp v2.4s, v2.4s , v2.4s
+ addp v3.4s, v3.4s , v3.4s
+ rshrn v5.8b, v0.8h, #2
+ dup v21.8h, v5.h[0]
+ rshrn v6.8b, v3.8h, #2
+ dup v20.8h, v6.h[0]
+ add v1.8h, v1.8h, v2.8h
+ rshrn v1.8b, v1.8h, #3
+ dup v23.8h, v1.h[0]
+ mov v20.d[0], v23.d[0]
+ add v0.8h, v0.8h, v3.8h
+ rshrn v0.8b, v0.8h, #3
+ dup v23.8h, v0.h[0]
+ mov v21.d[1], v23.d[0]
+ b store
+left_only_available:
+ ld1 {v0.8b, v1.8b}, [x0]
+ uxtl v0.8h, v0.8b
+ uxtl v1.8h, v1.8b
+ addp v0.4s, v0.4s , v0.4s
+ addp v1.4s, v1.4s , v1.4s
+ addp v0.4s, v0.4s , v0.4s
+ addp v1.4s, v1.4s , v1.4s
+ rshrn v0.8b, v0.8h, #2
+ rshrn v1.8b, v1.8h, #2
+ dup v20.8h , v1.h[0]
+ dup v21.8h, v0.h[0]
+ b store
+
+top_only_available:
+ add x6, x0, #18
+ ld1 {v0.8b, v1.8b}, [x6]
+ uxtl v0.8h, v0.8b
+ uxtl v1.8h, v1.8b
+ addp v0.4s, v0.4s , v0.4s
+ addp v1.4s, v1.4s , v1.4s
+ addp v0.4s, v0.4s , v0.4s
+ addp v1.4s, v1.4s , v1.4s
+ rshrn v0.8b, v0.8h, #2
+ rshrn v1.8b, v1.8h, #2
+ dup v20.8h , v0.h[0]
+ dup v21.8h, v1.h[0]
+ mov v20.d[1], v21.d[1]
+ mov v21.d[0], v20.d[0]
+ b store
+none_available:
+ mov w15, #128
+ dup v20.16b, w15
+ dup v21.16b, w15
+
+
+store:
+
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v21.16b}, [x1], x3
+ st1 { v21.16b}, [x1], x3
+ st1 { v21.16b}, [x1], x3
+ st1 { v21.16b}, [x1], x3
+end_func:
+
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+
+///******************************************************************************
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_chroma_8x8_mode_horz
+//*
+//* @brief
+//* Perform Intra prediction for chroma_8x8 mode:Horizontal
+//*
+//* @par Description:
+//* Perform Intra prediction for chroma_8x8 mode:Horizontal ,described in sec 8.3.4.2
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source containing alternate U and V samples
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination with alternate U and V samples
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels(Not used in this function)
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+//void ih264_intra_pred_chroma_8x8_mode_horz(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_chroma_8x8_mode_horz_av8
+
+ih264_intra_pred_chroma_8x8_mode_horz_av8:
+
+
+
+ push_v_regs
+ ld1 {v0.8h}, [x0]
+
+ dup v10.8h, v0.h[7]
+ dup v11.8h, v0.h[6]
+ dup v12.8h, v0.h[5]
+ dup v13.8h, v0.h[4]
+ st1 {v10.8h}, [x1], x3
+ dup v14.8h, v0.h[3]
+ st1 {v11.8h}, [x1], x3
+ dup v15.8h, v0.h[2]
+ st1 {v12.8h}, [x1], x3
+ dup v16.8h, v0.h[1]
+ st1 {v13.8h}, [x1], x3
+ dup v17.8h, v0.h[0]
+ st1 {v14.8h}, [x1], x3
+ st1 {v15.8h}, [x1], x3
+ st1 {v16.8h}, [x1], x3
+ st1 {v17.8h}, [x1], x3
+
+
+ pop_v_regs
+ ret
+
+
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_chroma_8x8_mode_vert
+//*
+//* @brief
+//* Perform Intra prediction for chroma_8x8 mode:vertical
+//*
+//* @par Description:
+//*Perform Intra prediction for chroma_8x8 mode:vertical ,described in sec 8.3.4.3
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source containing alternate U and V samples
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination with alternate U and V samples
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels(Not used in this function)
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//void ih264_intra_pred_chroma_8x8_mode_vert(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_chroma_8x8_mode_vert_av8
+
+ih264_intra_pred_chroma_8x8_mode_vert_av8:
+
+ push_v_regs
+
+ add x0, x0, #18
+ ld1 {v0.8b, v1.8b}, [x0]
+
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+
+ pop_v_regs
+ ret
+
+
+
+
+///******************************************************************************
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_chroma_8x8_mode_plane
+//*
+//* @brief
+//* Perform Intra prediction for chroma_8x8 mode:PLANE
+//*
+//* @par Description:
+//* Perform Intra prediction for chroma_8x8 mode:PLANE ,described in sec 8.3.4.4
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source containing alternate U and V samples
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination with alternate U and V samples
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************/
+//void ih264_intra_pred_chroma_8x8_mode_plane(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+ .global ih264_intra_pred_chroma_8x8_mode_plane_av8
+ih264_intra_pred_chroma_8x8_mode_plane_av8:
+
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ ld1 {v0.2s}, [x0]
+ add x10, x0, #10
+ ld1 {v1.2s}, [x10]
+ add x10, x10, #6
+ rev64 v5.4h, v0.4h
+ ld1 {v2.2s}, [x10], #8
+ add x10, x10, #2
+ rev64 v7.4h, v2.4h
+ ld1 {v3.2s}, [x10]
+ sub x5, x3, #8
+ adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs1
+ ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs1]
+ usubl v10.8h, v5.8b, v1.8b
+ ld1 {v8.8b, v9.8b}, [x12] // Load multiplication factors 1 to 8 into D3
+ mov v8.d[1], v9.d[0]
+ usubl v12.8h, v3.8b, v7.8b
+ mul v14.8h, v10.8h , v8.8h
+ mul v16.8h, v12.8h , v8.8h
+ uzp1 v15.8h, v14.8h, v16.8h
+ uzp2 v16.8h, v14.8h, v16.8h
+ mov v14.16b, v15.16b
+ mov v15.d[0], v14.d[1]
+ mov v17.d[0], v16.d[1]
+ addp v14.4h, v14.4h, v14.4h
+ addp v15.4h, v15.4h, v15.4h
+ addp v16.4h, v16.4h, v16.4h
+ addp v17.4h, v17.4h, v17.4h
+ addp v14.4h, v14.4h, v14.4h
+ addp v15.4h, v15.4h, v15.4h
+ addp v16.4h, v16.4h, v16.4h
+ addp v17.4h, v17.4h, v17.4h
+ mov x6, #34
+ dup v18.8h, w6
+ smull v22.4s, v14.4h, v18.4h
+ smull v24.4s, v15.4h, v18.4h
+ smull v26.4s, v16.4h, v18.4h
+ smull v28.4s, v17.4h, v18.4h
+ rshrn v10.4h, v22.4s, #6
+ rshrn v12.4h, v24.4s, #6
+ rshrn v13.4h, v26.4s, #6
+ rshrn v14.4h, v28.4s, #6
+ ldrb w6, [x0], #1
+ sxtw x6, w6
+ add x10, x0, #31
+ ldrb w8, [x0], #1
+ sxtw x8, w8
+ ldrb w7, [x10], #1
+ sxtw x7, w7
+ ldrb w9, [x10], #1
+ sxtw x9, w9
+ add x6, x6, x7
+ add x8, x8, x9
+ lsl x6, x6, #4
+ lsl x8, x8, #4
+ dup v0.8h, w6
+ dup v2.8h, w8
+ dup v4.8h, v12.h[0]
+ dup v6.8h, v10.h[0]
+ dup v24.8h, v14.h[0]
+ dup v26.8h, v13.h[0]
+ zip1 v5.8h, v4.8h, v24.8h
+ zip2 v24.8h, v4.8h, v24.8h
+ mov v4.16b, v5.16b
+ zip1 v7.8h, v6.8h, v26.8h
+ zip2 v26.8h, v6.8h, v26.8h
+ mov v6.16b, v7.16b
+ zip1 v1.8h, v0.8h, v2.8h
+ zip2 v2.8h, v0.8h, v2.8h
+ mov v0.16b, v1.16b
+
+ adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs2
+ ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs2]
+
+ ld1 {v8.2s, v9.2s}, [x12]
+ mov v8.d[1], v9.d[0]
+ mov v10.16b, v8.16b
+ mov v22.16b, v8.16b
+ zip1 v9.8h, v8.8h, v10.8h
+ zip2 v10.8h, v8.8h, v10.8h
+ mov v8.16b, v9.16b
+ mul v12.8h, v4.8h , v8.8h
+ mul v16.8h, v4.8h , v10.8h
+ add v12.8h, v0.8h , v12.8h
+ add v16.8h, v0.8h , v16.8h
+ dup v20.8h, v22.h[0]
+ mul v4.8h, v6.8h , v20.8h
+ dup v30.8h, v22.4h[1]
+ mul v18.8h, v6.8h , v20.8h
+ mul v14.8h, v6.8h , v30.8h
+ mul v8.8h, v6.8h , v30.8h
+ add v24.8h, v12.8h , v4.8h
+ add v0.8h, v16.8h , v18.8h
+ add v2.8h, v12.8h , v14.8h
+ sqrshrun v28.8b, v24.8h, #5
+ add v26.8h, v16.8h , v8.8h
+ sqrshrun v29.8b, v0.8h, #5
+ dup v20.8h, v22.4h[2]
+ st1 {v28.8b, v29.8b}, [x1], x3
+ sqrshrun v28.8b, v2.8h, #5
+ sqrshrun v29.8b, v26.8h, #5
+ mul v4.8h, v6.8h , v20.8h
+ mul v18.8h, v6.8h , v20.8h
+ st1 {v28.8b, v29.8b}, [x1], x3
+ add v24.8h, v12.8h , v4.8h
+ add v0.8h, v16.8h , v18.8h
+ dup v30.8h, v22.4h[3]
+ sqrshrun v28.8b, v24.8h, #5
+ sqrshrun v29.8b, v0.8h, #5
+ mul v14.8h, v6.8h , v30.8h
+ mul v8.8h, v6.8h , v30.8h
+ st1 {v28.8b, v29.8b}, [x1], x3
+ add v2.8h, v12.8h , v14.8h
+ add v26.8h, v16.8h , v8.8h
+ dup v20.8h, v22.h[4]
+ sqrshrun v28.8b, v2.8h, #5
+ sqrshrun v29.8b, v26.8h, #5
+ mul v4.8h, v6.8h , v20.8h
+ mul v18.8h, v6.8h , v20.8h
+ st1 {v28.8b, v29.8b}, [x1], x3
+ add v24.8h, v12.8h , v4.8h
+ add v0.8h, v16.8h , v18.8h
+ dup v30.8h, v22.h[5]
+ sqrshrun v28.8b, v24.8h, #5
+ sqrshrun v29.8b, v0.8h, #5
+ mul v14.8h, v6.8h , v30.8h
+ mul v8.8h, v6.8h , v30.8h
+ st1 {v28.8b, v29.8b}, [x1], x3
+ add v2.8h, v12.8h , v14.8h
+ add v26.8h, v16.8h , v8.8h
+ dup v20.8h, v22.h[6]
+ sqrshrun v28.8b, v2.8h, #5
+ sqrshrun v29.8b, v26.8h, #5
+ mul v4.8h, v6.8h , v20.8h
+ mul v18.8h, v6.8h , v20.8h
+ st1 {v28.8b, v29.8b}, [x1], x3
+ add v24.8h, v12.8h , v4.8h
+ add v0.8h, v16.8h , v18.8h
+ dup v30.8h, v22.h[7]
+ sqrshrun v28.8b, v24.8h, #5
+ sqrshrun v29.8b, v0.8h, #5
+ mul v14.8h, v6.8h , v30.8h
+ mul v8.8h, v6.8h , v30.8h
+ st1 {v28.8b, v29.8b}, [x1], x3
+ add v2.8h, v12.8h , v14.8h
+ add v26.8h, v16.8h , v8.8h
+ sqrshrun v28.8b, v2.8h, #5
+ sqrshrun v29.8b, v26.8h, #5
+ st1 {v28.8b, v29.8b}, [x1], x3
+
+end_func_plane:
+
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
diff --git a/common/armv8/ih264_intra_pred_luma_16x16_av8.s b/common/armv8/ih264_intra_pred_luma_16x16_av8.s
new file mode 100755
index 0000000..a9eb165
--- /dev/null
+++ b/common/armv8/ih264_intra_pred_luma_16x16_av8.s
@@ -0,0 +1,606 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+//******************************************************************************
+//* @file
+//* ih264_intra_pred_luma_16x16_av8.s
+//*
+//* @brief
+//* Contains function definitions for intra 16x16 Luma prediction .
+//*
+//* @author
+//* Ittiam
+//*
+//* @par List of Functions:
+//*
+//* - ih264_intra_pred_luma_16x16_mode_vert_av8()
+//* - ih264_intra_pred_luma_16x16_mode_horz_av8()
+//* - ih264_intra_pred_luma_16x16_mode_dc_av8()
+//* - ih264_intra_pred_luma_16x16_mode_plane_av8()
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+///* All the functions here are replicated from ih264_intra_pred_filters.c
+//
+
+///**
+///**
+///**
+//
+
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+.extern ih264_gai1_intrapred_luma_plane_coeffs
+
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_16x16_mode_vert
+//*
+//* @brief
+//* Perform Intra prediction for luma_16x16 mode:vertical
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_16x16 mode:Vertical ,described in sec 8.3.3.1
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels(Not used in this function)
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//void ih264_intra_pred_luma_16x16_mode_vert(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_16x16_mode_vert_av8
+
+ih264_intra_pred_luma_16x16_mode_vert_av8:
+
+ push_v_regs
+
+
+ add x0, x0, #17
+ ld1 {v0.8b, v1.8b}, [x0]
+
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+ st1 {v0.8b, v1.8b}, [x1], x3
+
+ pop_v_regs
+ ret
+
+
+
+
+
+///******************************************************************************
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_16x16_mode_horz
+//*
+//* @brief
+//* Perform Intra prediction for luma_16x16 mode:horizontal
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_16x16 mode:horizontal ,described in sec 8.3.3.2
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels(Not used in this function)
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+//void ih264_intra_pred_luma_16x16_mode_horz(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+ .global ih264_intra_pred_luma_16x16_mode_horz_av8
+
+ih264_intra_pred_luma_16x16_mode_horz_av8:
+
+
+
+ push_v_regs
+
+ ld1 {v0.16b}, [x0]
+
+
+
+ dup v10.16b, v0.b[15]
+ dup v11.16b, v0.b[14]
+ dup v12.16b, v0.b[13]
+ dup v13.16b, v0.b[12]
+ st1 {v10.16b}, [x1], x3
+ dup v14.16b, v0.b[11]
+ st1 {v11.16b}, [x1], x3
+ dup v15.16b, v0.b[10]
+ st1 {v12.16b}, [x1], x3
+ dup v16.16b, v0.b[9]
+ st1 {v13.16b}, [x1], x3
+ dup v17.16b, v0.b[8]
+ st1 {v14.16b}, [x1], x3
+ dup v18.16b, v0.b[7]
+ st1 {v15.16b}, [x1], x3
+ dup v19.16b, v0.b[6]
+ st1 {v16.16b}, [x1], x3
+ dup v20.16b, v0.b[5]
+ st1 {v17.16b}, [x1], x3
+ dup v21.16b, v0.b[4]
+ st1 {v18.16b}, [x1], x3
+ dup v22.16b, v0.b[3]
+ st1 {v19.16b}, [x1], x3
+ dup v23.16b, v0.b[2]
+ st1 {v20.16b}, [x1], x3
+ dup v24.16b, v0.b[1]
+ st1 {v21.16b}, [x1], x3
+ dup v25.16b, v0.b[0]
+ st1 {v22.16b}, [x1], x3
+ st1 {v23.16b}, [x1], x3
+ st1 {v24.16b}, [x1], x3
+ st1 {v25.16b}, [x1], x3
+
+ pop_v_regs
+ ret
+
+
+
+
+
+
+
+///******************************************************************************
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_16x16_mode_dc
+//*
+//* @brief
+//* Perform Intra prediction for luma_16x16 mode:DC
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_16x16 mode:DC ,described in sec 8.3.3.3
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************/
+//void ih264_intra_pred_luma_16x16_mode_dc(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+ .global ih264_intra_pred_luma_16x16_mode_dc_av8
+
+ih264_intra_pred_luma_16x16_mode_dc_av8:
+
+
+
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ sub v0.16b, v0.16b, v0.16b
+ sub v1.16b, v1.16b, v1.16b
+ mov w10, #0
+ mov w11 , #3
+ ands x6, x4, #0x01
+ beq top_available //LEFT NOT AVAILABLE
+ ld1 {v0.16b}, [x0]
+ add w10, w10, #8
+ add w11, w11, #1
+top_available:
+ ands x6, x4, #0x04
+ beq none_available
+ add x6, x0, #17
+ ld1 {v1.16b}, [x6]
+ add w10, w10, #8
+ add w11, w11, #1
+ b summation
+none_available:
+ cmp x4, #0
+ bne summation
+ mov w15, #128
+ dup v20.16b, w15
+ b store
+summation:
+ uaddl v2.8h, v0.8b, v1.8b
+ uaddl2 v3.8h, v0.16b, v1.16b
+ dup v10.8h, w10
+ neg w11, w11
+ dup v20.8h, w11
+ add v0.8h, v2.8h, v3.8h
+ mov v1.d[0], v0.d[1]
+ add v0.4h, v0.4h, v1.4h
+ addp v0.4h, v0.4h , v0.4h
+ addp v0.4h, v0.4h , v0.4h
+ add v0.4h, v0.4h, v10.4h
+ uqshl v0.8h, v0.8h, v20.8h
+ sqxtun v0.8b, v0.8h
+ dup v20.16b, v0.b[0]
+
+store:
+
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+ st1 { v20.16b}, [x1], x3
+
+
+
+end_func:
+
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+
+///******************************************************************************
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_16x16_mode_plane
+//*
+//* @brief
+//* Perform Intra prediction for luma_16x16 mode:PLANE
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_16x16 mode:PLANE ,described in sec 8.3.3.4
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************/
+//void ih264_intra_pred_luma_16x16_mode_plane(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+ .global ih264_intra_pred_luma_16x16_mode_plane_av8
+ih264_intra_pred_luma_16x16_mode_plane_av8:
+
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+ mov x2, x1
+ add x1, x0, #17
+ add x0, x0, #15
+ mov x8, #9
+ sub x1, x1, #1
+ mov x10, x1 //top_left
+ mov x4, #-1
+ ld1 {v2.2s}, [x1], x8
+
+ adrp x7, :got:ih264_gai1_intrapred_luma_plane_coeffs
+ ldr x7, [x7, #:got_lo12:ih264_gai1_intrapred_luma_plane_coeffs]
+
+ ld1 {v0.2s}, [x1]
+ rev64 v2.8b, v2.8b
+ ld1 {v6.2s, v7.2s}, [x7]
+ usubl v0.8h, v0.8b, v2.8b
+ uxtl v16.8h, v6.8b
+ mul v0.8h, v0.8h , v16.8h
+ uxtl v18.8h, v7.8b
+ add x7, x0, x4, lsl #3
+ sub x0, x7, x4, lsl #1
+ sub x20, x4, #0x0
+ neg x14, x20
+ addp v0.8h, v0.8h, v1.8h
+ ldrb w8, [x7], #-1
+ sxtw x8, w8
+ ldrb w9, [x0], #1
+ sxtw x9, w9
+ saddlp v0.2s, v0.4h
+ sub x12, x8, x9
+ ldrb w8, [x7], #-1
+ sxtw x8, w8
+ saddlp v0.1d, v0.2s
+ ldrb w9, [x0], #1
+ sxtw x9, w9
+ sub x8, x8, x9
+ shl v2.2s, v0.2s, #2
+ add x12, x12, x8, lsl #1
+ add v0.2s, v0.2s , v2.2s
+ ldrb w8, [x7], #-1
+ sxtw x8, w8
+ ldrb w9, [x0], #1
+ sxtw x9, w9
+ srshr v0.2s, v0.2s, #6 // i_b = D0[0]
+ sub x8, x8, x9
+ ldrb w5, [x7], #-1
+ sxtw x5, w5
+ add x8, x8, x8, lsl #1
+ dup v4.8h, v0.4h[0]
+ add x12, x12, x8
+ ldrb w9, [x0], #1
+ sxtw x9, w9
+ mul v0.8h, v4.8h , v16.8h
+ sub x5, x5, x9
+ mul v2.8h, v4.8h , v18.8h
+ add x12, x12, x5, lsl #2
+ ldrb w8, [x7], #-1
+ sxtw x8, w8
+ ldrb w9, [x0], #1
+ sxtw x9, w9
+ sub x8, x8, x9
+ ldrb w5, [x7], #-1
+ sxtw x5, w5
+ add x8, x8, x8, lsl #2
+ ldrb w6, [x0], #1
+ sxtw x6, w6
+ add x12, x12, x8
+ ldrb w8, [x7], #-1
+ sxtw x8, w8
+ ldrb w9, [x0], #1
+ sxtw x9, w9
+ sub x5, x5, x6
+ sub x8, x8, x9
+ add x5, x5, x5, lsl #1
+ sub x20, x8, x8, lsl #3
+ neg x8, x20
+ add x12, x12, x5, lsl #1
+ ldrb w5, [x7], #-1
+ sxtw x5, w5
+ ldrb w6, [x10] //top_left
+ sxtw x6, w6
+ add x12, x12, x8
+ sub x9, x5, x6
+ ldrb w6, [x1, #7]
+ sxtw x6, w6
+ add x12, x12, x9, lsl #3 // i_c = x12
+ add x8, x5, x6
+ add x12, x12, x12, lsl #2
+ lsl x8, x8, #4 // i_a = x8
+ add x12, x12, #0x20
+ lsr x12, x12, #6
+ shl v28.8h, v4.8h, #3
+ dup v6.8h, w12
+ dup v30.8h, w8
+ shl v26.8h, v6.8h, #3
+ sub v30.8h, v30.8h , v28.8h
+ sub v30.8h, v30.8h , v26.8h
+ add v28.8h, v30.8h , v6.8h
+ add v26.8h, v28.8h , v0.8h
+ add v28.8h, v28.8h , v2.8h
+ sqrshrun v20.8b, v26.8h, #5
+ sqrshrun v21.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v22.8b, v26.8h, #5
+ st1 {v20.2s, v21.2s}, [x2], x3
+ sqrshrun v23.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v20.8b, v26.8h, #5
+ st1 {v22.2s, v23.2s}, [x2], x3
+ sqrshrun v21.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v22.8b, v26.8h, #5
+ st1 {v20.2s, v21.2s}, [x2], x3
+ sqrshrun v23.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v20.8b, v26.8h, #5
+ st1 {v22.2s, v23.2s}, [x2], x3
+ sqrshrun v21.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v22.8b, v26.8h, #5
+ st1 {v20.2s, v21.2s}, [x2], x3
+ sqrshrun v23.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v20.8b, v26.8h, #5
+ st1 {v22.2s, v23.2s}, [x2], x3
+ sqrshrun v21.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v22.8b, v26.8h, #5
+ st1 {v20.2s, v21.2s}, [x2], x3
+ sqrshrun v23.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v20.8b, v26.8h, #5
+ st1 {v22.2s, v23.2s}, [x2], x3
+ sqrshrun v21.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v22.8b, v26.8h, #5
+ st1 {v20.2s, v21.2s}, [x2], x3
+ sqrshrun v23.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v20.8b, v26.8h, #5
+ st1 {v22.2s, v23.2s}, [x2], x3
+ sqrshrun v21.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v22.8b, v26.8h, #5
+ st1 {v20.2s, v21.2s}, [x2], x3
+ sqrshrun v23.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v20.8b, v26.8h, #5
+ st1 {v22.2s, v23.2s}, [x2], x3
+ sqrshrun v21.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v22.8b, v26.8h, #5
+ st1 {v20.2s, v21.2s}, [x2], x3
+ sqrshrun v23.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v20.8b, v26.8h, #5
+ st1 {v22.2s, v23.2s}, [x2], x3
+ sqrshrun v21.8b, v28.8h, #5
+ add v26.8h, v26.8h , v6.8h
+ add v28.8h, v28.8h , v6.8h
+ sqrshrun v22.8b, v26.8h, #5
+ st1 {v20.2s, v21.2s}, [x2], x3
+ sqrshrun v23.8b, v28.8h, #5
+ st1 {v22.2s, v23.2s}, [x2], x3
+
+end_func_plane:
+
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
diff --git a/common/armv8/ih264_intra_pred_luma_4x4_av8.s b/common/armv8/ih264_intra_pred_luma_4x4_av8.s
new file mode 100755
index 0000000..62e8cee
--- /dev/null
+++ b/common/armv8/ih264_intra_pred_luma_4x4_av8.s
@@ -0,0 +1,876 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+//******************************************************************************
+//* @file
+//* ih264_intra_pred_luma_4x4_av8.s
+//*
+//* @brief
+//* Contains function definitions for intra 4x4 Luma prediction .
+//*
+//* @author
+//* Ittiam
+//*
+//* @par List of Functions:
+//*
+//* -ih264_intra_pred_luma_4x4_mode_vert_av8
+//* -ih264_intra_pred_luma_4x4_mode_horz_av8
+//* -ih264_intra_pred_luma_4x4_mode_dc_av8
+//* -ih264_intra_pred_luma_4x4_mode_diag_dl_av8
+//* -ih264_intra_pred_luma_4x4_mode_diag_dr_av8
+//* -ih264_intra_pred_luma_4x4_mode_vert_r_av8
+//* -ih264_intra_pred_luma_4x4_mode_horz_d_av8
+//* -ih264_intra_pred_luma_4x4_mode_vert_l_av8
+//* -ih264_intra_pred_luma_4x4_mode_horz_u_av8
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+///* All the functions here are replicated from ih264_intra_pred_filters.c
+//
+
+///**
+///**
+///**
+//
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_4x4_mode_vert
+//*
+//* @brief
+//* Perform Intra prediction for luma_4x4 mode:vertical
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_4x4 mode:vertical ,described in sec 8.3.1.2.1
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels(Not used in this function)
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//void ih264_intra_pred_luma_4x4_mode_vert(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+ .global ih264_intra_pred_luma_4x4_mode_vert_av8
+
+ih264_intra_pred_luma_4x4_mode_vert_av8:
+
+ push_v_regs
+
+ add x0, x0, #5
+
+ ld1 {v0.s}[0], [x0]
+ st1 {v0.s}[0], [x1], x3
+ st1 {v0.s}[0], [x1], x3
+ st1 {v0.s}[0], [x1], x3
+ st1 {v0.s}[0], [x1], x3
+
+ pop_v_regs
+ ret
+
+
+
+
+
+///******************************************************************************
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_4x4_mode_horz
+//*
+//* @brief
+//* Perform Intra prediction for luma_4x4 mode:horizontal
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_4x4 mode:horizontal ,described in sec 8.3.1.2.2
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels(Not used in this function)
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+//void ih264_intra_pred_luma_4x4_mode_horz(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+
+
+ .global ih264_intra_pred_luma_4x4_mode_horz_av8
+
+ih264_intra_pred_luma_4x4_mode_horz_av8:
+
+ push_v_regs
+
+ ld1 {v1.s}[0], [x0]
+ dup v0.8b, v1.b[3]
+ dup v2.8b, v1.b[2]
+ st1 {v0.s}[0], [x1], x3
+ dup v3.8b, v1.b[1]
+ st1 {v2.s}[0], [x1], x3
+ dup v4.8b, v1.b[0]
+ st1 {v3.s}[0], [x1], x3
+ st1 {v4.s}[0], [x1], x3
+
+ pop_v_regs
+ ret
+
+
+
+
+
+
+
+///******************************************************************************
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_4x4_mode_dc
+//*
+//* @brief
+//* Perform Intra prediction for luma_4x4 mode:DC
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_4x4 mode:DC ,described in sec 8.3.1.2.3
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************/
+//void ih264_intra_pred_luma_4x4_mode_dc(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+
+
+ .global ih264_intra_pred_luma_4x4_mode_dc_av8
+
+ih264_intra_pred_luma_4x4_mode_dc_av8:
+
+
+
+
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ ands x5, x4, #0x01
+ beq top_available //LEFT NOT AVAILABLE
+
+ add x10, x0, #3
+ mov x2, #-1
+ ldrb w5, [x10], #-1
+ sxtw x5, w5
+ ldrb w6, [x10], #-1
+ sxtw x6, w6
+ ldrb w7, [x10], #-1
+ sxtw x7, w7
+ add x5, x5, x6
+ ldrb w8, [x10], #-1
+ sxtw x8, w8
+ add x5, x5, x7
+ ands x11, x4, #0x04 // CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE
+ add x5, x5, x8
+ beq left_available
+ add x10, x0, #5
+ // BOTH LEFT AND TOP AVAILABLE
+ ldrb w6, [x10], #1
+ sxtw x6, w6
+ ldrb w7, [x10], #1
+ sxtw x7, w7
+ add x5, x5, x6
+ ldrb w8, [x10], #1
+ sxtw x8, w8
+ add x5, x5, x7
+ ldrb w9, [x10], #1
+ sxtw x9, w9
+ add x5, x5, x8
+ add x5, x5, x9
+ add x5, x5, #4
+ lsr x5, x5, #3
+ dup v0.8b, w5
+ st1 {v0.s}[0], [x1], x3
+ st1 {v0.s}[0], [x1], x3
+ st1 {v0.s}[0], [x1], x3
+ st1 {v0.s}[0], [x1], x3
+ b end_func
+
+top_available: // ONLT TOP AVAILABLE
+ ands x11, x4, #0x04 // CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE
+ beq none_available
+
+ add x10, x0, #5
+ ldrb w6, [x10], #1
+ sxtw x6, w6
+ ldrb w7, [x10], #1
+ sxtw x7, w7
+ ldrb w8, [x10], #1
+ sxtw x8, w8
+ add x5, x6, x7
+ ldrb w9, [x10], #1
+ sxtw x9, w9
+ add x5, x5, x8
+ add x5, x5, x9
+ add x5, x5, #2
+ lsr x5, x5, #2
+ dup v0.8b, w5
+ st1 {v0.s}[0], [x1], x3
+ st1 {v0.s}[0], [x1], x3
+ st1 {v0.s}[0], [x1], x3
+ st1 {v0.s}[0], [x1], x3
+ b end_func
+
+left_available: //ONLY LEFT AVAILABLE
+ add x5, x5, #2
+ lsr x5, x5, #2
+ dup v0.8b, w5
+ st1 {v0.s}[0], [x1], x3
+ st1 {v0.s}[0], [x1], x3
+ st1 {v0.s}[0], [x1], x3
+ st1 {v0.s}[0], [x1], x3
+ b end_func
+
+none_available: //NONE AVAILABLE
+ mov x5, #128
+ dup v0.8b, w5
+ st1 {v0.s}[0], [x1], x3
+ st1 {v0.s}[0], [x1], x3
+ st1 {v0.s}[0], [x1], x3
+ st1 {v0.s}[0], [x1], x3
+ b end_func
+
+
+end_func:
+
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_4x4_mode_diag_dl
+//*
+//* @brief
+//* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left ,described in sec 8.3.1.2.4
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************/
+//void ih264_intra_pred_luma_4x4_mode_diag_dl(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_4x4_mode_diag_dl_av8
+
+ih264_intra_pred_luma_4x4_mode_diag_dl_av8:
+
+
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ add x0, x0, #5
+ sub x5, x3, #2
+ add x6, x0, #7
+ ld1 {v0.8b}, [x0]
+ ext v1.8b, v0.8b , v0.8b , #1
+ ext v2.8b, v0.8b , v0.8b , #2
+ ld1 {v2.b}[6], [x6]
+ uaddl v20.8h, v0.8b, v1.8b
+ uaddl v22.8h, v1.8b, v2.8b
+ add v24.8h, v20.8h , v22.8h
+ sqrshrun v3.8b, v24.8h, #2
+ st1 {v3.s}[0], [x1], x3
+ ext v4.8b, v3.8b , v3.8b , #1
+ st1 {v4.s}[0], [x1], x3
+ st1 {v3.h}[1], [x1], #2
+ st1 {v3.h}[2], [x1], x5
+ st1 {v4.h}[1], [x1], #2
+ st1 {v4.h}[2], [x1]
+
+end_func_diag_dl:
+
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_4x4_mode_diag_dr
+//*
+//* @brief
+//* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right ,described in sec 8.3.1.2.5
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************/
+//void ih264_intra_pred_luma_4x4_mode_diag_dr(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_4x4_mode_diag_dr_av8
+
+ih264_intra_pred_luma_4x4_mode_diag_dr_av8:
+
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+
+ ld1 {v0.8b}, [x0]
+ add x0, x0, #1
+ ld1 {v1.8b}, [x0]
+ ext v2.8b, v1.8b , v1.8b , #1
+ uaddl v20.8h, v0.8b, v1.8b
+ uaddl v22.8h, v1.8b, v2.8b
+ add v24.8h, v20.8h , v22.8h
+ sqrshrun v3.8b, v24.8h, #2
+
+ ext v4.8b, v3.8b , v3.8b , #1
+ sub x5, x3, #2
+ st1 {v4.h}[1], [x1], #2
+ st1 {v4.h}[2], [x1], x5
+ st1 {v3.h}[1], [x1], #2
+ st1 {v3.h}[2], [x1], x5
+ st1 {v4.s}[0], [x1], x3
+ st1 {v3.s}[0], [x1], x3
+
+end_func_diag_dr:
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_4x4_mode_vert_r
+//*
+//* @brief
+//* Perform Intra prediction for luma_4x4 mode:Vertical_Right
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_4x4 mode:Vertical_Right ,described in sec 8.3.1.2.6
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************/
+//void ih264_intra_pred_luma_4x4_mode_vert_r(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_4x4_mode_vert_r_av8
+
+ih264_intra_pred_luma_4x4_mode_vert_r_av8:
+
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+
+ ld1 {v0.8b}, [x0]
+ add x0, x0, #1
+ ld1 {v1.8b}, [x0]
+ ext v2.8b, v1.8b , v1.8b , #1
+ uaddl v20.8h, v0.8b, v1.8b
+ uaddl v22.8h, v1.8b, v2.8b
+ add v24.8h, v20.8h , v22.8h
+ sqrshrun v4.8b, v20.8h, #1
+ sqrshrun v3.8b, v24.8h, #2
+ sub x5, x3, #2
+ ext v5.8b, v3.8b , v3.8b , #3
+ st1 {v4.s}[1], [x1], x3
+ st1 {v5.s}[0], [x1], x3
+ sub x8, x3, #3
+ st1 {v3.b}[2], [x1], #1
+ st1 {v4.h}[2], [x1], #2
+ st1 {v4.b}[6], [x1], x8
+ st1 {v3.b}[1], [x1], #1
+ st1 {v5.h}[0], [x1], #2
+ st1 {v5.b}[2], [x1]
+
+
+end_func_vert_r:
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_4x4_mode_horz_d
+//*
+//* @brief
+//* Perform Intra prediction for luma_4x4 mode:Horizontal_Down
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_4x4 mode:Horizontal_Down ,described in sec 8.3.1.2.7
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************/
+//void ih264_intra_pred_luma_4x4_mode_horz_d(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_4x4_mode_horz_d_av8
+
+ih264_intra_pred_luma_4x4_mode_horz_d_av8:
+
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ ld1 {v0.8b}, [x0]
+ add x0, x0, #1
+ ld1 {v1.8b}, [x0]
+ ext v2.8b, v1.8b , v0.8b , #1
+ uaddl v20.8h, v0.8b, v1.8b
+ uaddl v22.8h, v1.8b, v2.8b
+ add v24.8h, v20.8h , v22.8h
+ sqrshrun v4.8b, v20.8h, #1
+ sqrshrun v5.8b, v24.8h, #2
+ sub x5, x3, #2
+ mov v6.8b, v5.8b
+ trn1 v10.8b, v4.8b, v5.8b
+ trn2 v5.8b, v4.8b, v5.8b //
+ mov v4.8b, v10.8b
+ st1 {v5.h}[1], [x1], #2
+ st1 {v6.h}[2], [x1], x5
+ st1 {v4.h}[1], [x1], #2
+ st1 {v5.h}[1], [x1], x5
+ st1 {v5.h}[0], [x1], #2
+ st1 {v4.h}[1], [x1], x5
+ st1 {v4.h}[0], [x1], #2
+ st1 {v5.h}[0], [x1], x5
+
+end_func_horz_d:
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_4x4_mode_vert_l
+//*
+//* @brief
+//* Perform Intra prediction for luma_4x4 mode:Vertical_Left
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_4x4 mode:Vertical_Left ,described in sec 8.3.1.2.8
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************/
+//void ih264_intra_pred_luma_4x4_mode_vert_l(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_4x4_mode_vert_l_av8
+
+ih264_intra_pred_luma_4x4_mode_vert_l_av8:
+
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+ add x0, x0, #4
+ ld1 {v0.8b}, [x0]
+ add x0, x0, #1
+ ld1 {v1.8b}, [x0]
+ ext v2.8b, v1.8b , v0.8b , #1
+ uaddl v20.8h, v0.8b, v1.8b
+ uaddl v22.8h, v1.8b, v2.8b
+ add v24.8h, v20.8h , v22.8h
+ sqrshrun v4.8b, v20.8h, #1
+ sqrshrun v5.8b, v24.8h, #2
+ ext v6.8b, v4.8b , v4.8b , #1
+ ext v7.8b, v5.8b , v5.8b , #1
+ st1 {v6.s}[0], [x1], x3
+ ext v8.8b, v4.8b , v4.8b , #2
+ ext v9.8b, v5.8b , v5.8b , #2
+ st1 {v7.s}[0], [x1], x3
+ st1 {v8.s}[0], [x1], x3
+ st1 {v9.s}[0], [x1], x3
+
+end_func_vert_l:
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_4x4_mode_horz_u
+//*
+//* @brief
+//* Perform Intra prediction for luma_4x4 mode:Horizontal_Up
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_4x4 mode:Horizontal_Up ,described in sec 8.3.1.2.9
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************/
+//void ih264_intra_pred_luma_4x4_mode_horz_u(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_4x4_mode_horz_u_av8
+
+ih264_intra_pred_luma_4x4_mode_horz_u_av8:
+
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+ mov x10, x0
+ ld1 {v0.8b}, [x0]
+ ldrb w9, [x0], #1
+ sxtw x9, w9
+ ext v1.8b, v0.8b , v0.8b , #1
+ ld1 {v0.b}[7], [x10]
+ ext v2.8b, v1.8b , v1.8b , #1
+ uaddl v20.8h, v0.8b, v1.8b
+ uaddl v22.8h, v1.8b, v2.8b
+ add v24.8h, v20.8h , v22.8h
+ sqrshrun v4.8b, v20.8h, #1
+ sqrshrun v5.8b, v24.8h, #2
+ mov v6.8b, v4.8b
+ ext v6.8b, v5.8b , v4.8b , #1
+ st1 {v4.b}[2], [x1], #1
+ st1 {v6.b}[0], [x1], #1
+ trn1 v10.8b, v6.8b, v5.8b
+ trn2 v5.8b, v6.8b, v5.8b //
+ mov v6.8b , v10.8b
+ sub x5, x3, #2
+ trn1 v10.8b, v4.8b, v6.8b
+ trn2 v6.8b, v4.8b, v6.8b //
+ mov v4.8b , v10.8b
+ dup v7.8b, w9
+ st1 {v6.h}[0], [x1], x5
+ st1 {v6.h}[0], [x1], #2
+ st1 {v5.h}[3], [x1], x5
+ st1 {v5.h}[3], [x1], #2
+ st1 {v7.h}[3], [x1], x5
+ st1 {v7.s}[0], [x1], x3
+
+end_func_horz_u:
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
diff --git a/common/armv8/ih264_intra_pred_luma_8x8_av8.s b/common/armv8/ih264_intra_pred_luma_8x8_av8.s
new file mode 100755
index 0000000..2b972ca
--- /dev/null
+++ b/common/armv8/ih264_intra_pred_luma_8x8_av8.s
@@ -0,0 +1,1084 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+//******************************************************************************
+//* @file
+//* ih264_intra_pred_luma_8x8_av8.s
+//*
+//* @brief
+//* Contains function definitions for intra 8x8 Luma prediction .
+//*
+//* @author
+//* Ittiam
+//*
+//* @par List of Functions:
+//*
+//* -ih264_intra_pred_luma_8x8_mode_vert_av8
+//* -ih264_intra_pred_luma_8x8_mode_horz_av8
+//* -ih264_intra_pred_luma_8x8_mode_dc_av8
+//* -ih264_intra_pred_luma_8x8_mode_diag_dl_av8
+//* -ih264_intra_pred_luma_8x8_mode_diag_dr_av8
+//* -ih264_intra_pred_luma_8x8_mode_vert_r_av8
+//* -ih264_intra_pred_luma_8x8_mode_horz_d_av8
+//* -ih264_intra_pred_luma_8x8_mode_vert_l_av8
+//* -ih264_intra_pred_luma_8x8_mode_horz_u_av8
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+
+///* All the functions here are replicated from ih264_intra_pred_filters.c
+//
+
+///**
+///**
+///**
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+.extern ih264_gai1_intrapred_luma_8x8_horz_u
+
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_8x8_mode_vert
+//*
+//* @brief
+//* Perform Intra prediction for luma_8x8 mode:vertical
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_8x8 mode:vertical ,described in sec 8.3.2.2.2
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels(Not used in this function)
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//void ih264_intra_pred_luma_8x8_mode_vert(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_8x8_mode_vert_av8
+
+ih264_intra_pred_luma_8x8_mode_vert_av8:
+
+ // STMFD sp!, {x4-x12, x14} //store register values to stack
+ push_v_regs
+ //stp x19, x20,[sp,#-16]!
+
+ add x0, x0, #9
+ ld1 {v0.8b}, [x0]
+
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+
+ // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack
+ //ldp x19, x20,[sp],#16
+ pop_v_regs
+ ret
+
+
+
+
+
+///******************************************************************************
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_8x8_mode_horz
+//*
+//* @brief
+//* Perform Intra prediction for luma_8x8 mode:horizontal
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_8x8 mode:horizontal ,described in sec 8.3.2.2.2
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels(Not used in this function)
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+//void ih264_intra_pred_luma_8x8_mode_horz(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_8x8_mode_horz_av8
+
+ih264_intra_pred_luma_8x8_mode_horz_av8:
+
+
+
+ // STMFD sp!, {x4-x12, x14} //store register values to stack
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+ add x0, x0, #7
+ mov x2 , #-1
+
+ ldrb w5, [x0], #-1
+ sxtw x5, w5
+ ldrb w6, [x0], #-1
+ sxtw x6, w6
+ dup v0.8b, w5
+ st1 {v0.8b}, [x1], x3
+ ldrb w7, [x0], #-1
+ sxtw x7, w7
+ dup v1.8b, w6
+ st1 {v1.8b}, [x1], x3
+ dup v2.8b, w7
+ ldrb w8, [x0], #-1
+ sxtw x8, w8
+ dup v3.8b, w8
+ st1 {v2.8b}, [x1], x3
+ ldrb w5, [x0], #-1
+ sxtw x5, w5
+ st1 {v3.8b}, [x1], x3
+ dup v0.8b, w5
+ ldrb w6, [x0], #-1
+ sxtw x6, w6
+ st1 {v0.8b}, [x1], x3
+ ldrb w7, [x0], #-1
+ sxtw x7, w7
+ dup v1.8b, w6
+ dup v2.8b, w7
+ st1 {v1.8b}, [x1], x3
+ ldrb w8, [x0], #-1
+ sxtw x8, w8
+ dup v3.8b, w8
+ st1 {v2.8b}, [x1], x3
+ st1 {v3.8b}, [x1], x3
+
+ // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+
+
+
+///******************************************************************************
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_8x8_mode_dc
+//*
+//* @brief
+//* Perform Intra prediction for luma_8x8 mode:DC
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_8x8 mode:DC ,described in sec 8.3.2.2.3
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************/
+//void ih264_intra_pred_luma_8x8_mode_dc(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_8x8_mode_dc_av8
+
+ih264_intra_pred_luma_8x8_mode_dc_av8:
+
+
+
+ // STMFD sp!, {x4-x12, x14} //store register values to stack
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ ands x6, x4, #0x01
+ beq top_available //LEFT NOT AVAILABLE
+
+ add x10, x0, #7
+ mov x2, #-1
+ ldrb w5, [x10], -1
+ sxtw x5, w5
+ ldrb w6, [x10], -1
+ sxtw x6, w6
+ ldrb w7, [x10], -1
+ sxtw x7, w7
+ add x5, x5, x6
+ ldrb w8, [x10], -1
+ sxtw x8, w8
+ add x5, x5, x7
+ ldrb w6, [x10], -1
+ sxtw x6, w6
+ add x5, x5, x8
+ ldrb w7, [x10], -1
+ sxtw x7, w7
+ add x5, x5, x6
+ ldrb w8, [x10], -1
+ sxtw x8, w8
+ add x5, x5, x7
+ ands x11, x4, #0x04 // CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE
+ add x5, x5, x8
+ ldrb w6, [x10], -1
+ sxtw x6, w6
+ add x5, x5, x6
+ beq left_available
+ add x10, x0, #9
+ // BOTH LEFT AND TOP AVAILABLE
+ ld1 {v0.8b}, [x10]
+ uaddlp v1.4h, v0.8b
+ uaddlp v3.2s, v1.4h
+ uaddlp v2.1d, v3.2s
+ dup v10.8h, w5
+ dup v8.8h, v2.4h[0]
+ add v12.8h, v8.8h , v10.8h
+ sqrshrun v31.8b, v12.8h, #4
+ st1 {v31.8b}, [x1], x3
+ st1 {v31.8b}, [x1], x3
+ st1 {v31.8b}, [x1], x3
+ st1 {v31.8b}, [x1], x3
+ st1 {v31.8b}, [x1], x3
+ st1 {v31.8b}, [x1], x3
+ st1 {v31.8b}, [x1], x3
+ st1 {v31.8b}, [x1], x3
+ b end_func
+
+top_available: // ONLT TOP AVAILABLE
+ ands x11, x4, #0x04 // CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE
+ beq none_available
+
+ add x10, x0, #9
+ ld1 {v10.8b}, [x10]
+ uaddlp v14.4h, v10.8b
+ uaddlp v13.2s, v14.4h
+ uaddlp v12.1d, v13.2s
+ rshrn v4.8b, v12.8h, #3
+ dup v31.8b, v4.8b[0]
+ st1 {v31.8b}, [x1], x3
+ st1 {v31.8b}, [x1], x3
+ st1 {v31.8b}, [x1], x3
+ st1 {v31.8b}, [x1], x3
+ st1 {v31.8b}, [x1], x3
+ st1 {v31.8b}, [x1], x3
+ st1 {v31.8b}, [x1], x3
+ st1 {v31.8b}, [x1], x3
+ b end_func
+
+
+left_available: //ONLY LEFT AVAILABLE
+ add x5, x5, #4
+ lsr x5, x5, #3
+ dup v0.8b, w5
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+ b end_func
+
+none_available: //NONE AVAILABLE
+ mov x9, #128
+ dup v0.8b, w9
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+ st1 {v0.8b}, [x1], x3
+
+
+end_func:
+
+ // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_8x8_mode_diag_dl
+//*
+//* @brief
+//* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left ,described in sec 8.3.2.2.4
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************/
+//void ih264_intra_pred_luma_8x8_mode_diag_dl(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+ .global ih264_intra_pred_luma_8x8_mode_diag_dl_av8
+
+ih264_intra_pred_luma_8x8_mode_diag_dl_av8:
+
+ // STMFD sp!, {x4-x12, x14} //store register values to stack
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ add x0, x0, #9
+ sub x5, x3, #4
+ add x6, x0, #15
+ ld1 { v0.16b}, [x0]
+ mov v1.d[0], v0.d[1]
+ ext v4.16b, v0.16b , v0.16b , #2
+ mov v5.d[0], v4.d[1]
+ ext v2.16b, v0.16b , v0.16b , #1
+ mov v3.d[0], v2.d[1]
+ ld1 {v5.b}[6], [x6]
+ // q1 = q0 shifted to left once
+ // q2 = q1 shifted to left once
+ uaddl v20.8h, v0.8b, v2.8b //Adding for FILT121
+ uaddl v22.8h, v1.8b, v3.8b
+ uaddl v24.8h, v2.8b, v4.8b
+ uaddl v26.8h, v3.8b, v5.8b
+ add v24.8h, v20.8h , v24.8h
+ add v26.8h, v22.8h , v26.8h
+
+ sqrshrun v4.8b, v24.8h, #2
+ sqrshrun v5.8b, v26.8h, #2
+ mov v4.d[1], v5.d[0]
+ //Q2 has all FILT121 values
+ st1 {v4.8b}, [x1], x3
+ ext v18.16b, v4.16b , v4.16b , #1
+ ext v16.16b, v18.16b , v18.16b , #1
+ st1 {v18.8b}, [x1], x3
+ ext v14.16b, v16.16b , v16.16b , #1
+ st1 {v16.8b}, [x1], x3
+ st1 {v14.8b}, [x1], x3
+ st1 {v4.s}[1], [x1], #4
+ st1 {v5.s}[0], [x1], x5
+ st1 {v18.s}[1], [x1], #4
+ st1 {v18.s}[2], [x1], x5
+ st1 {v16.s}[1], [x1], #4
+ st1 {v16.s}[2], [x1], x5
+ st1 {v14.s}[1], [x1], #4
+ st1 {v14.s}[2], [x1], x5
+
+
+end_func_diag_dl:
+ // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_8x8_mode_diag_dr
+//*
+//* @brief
+//* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right ,described in sec 8.3.2.2.5
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************/
+//void ih264_intra_pred_luma_8x8_mode_diag_dr(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_8x8_mode_diag_dr_av8
+
+ih264_intra_pred_luma_8x8_mode_diag_dr_av8:
+
+ // STMFD sp!, {x4-x12, x14} //store register values to stack
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+
+ ld1 { v0.16b}, [x0]
+ mov v1.d[0], v0.d[1]
+ add x0, x0, #1
+ ld1 { v2.16b}, [x0]
+ mov v3.d[0], v2.d[1]
+ ext v4.16b, v2.16b , v2.16b , #1
+ mov v5.d[0], v4.d[1]
+ // q1 = q0 shifted to left once
+ // q2 = q1 shifted to left once
+ uaddl v20.8h, v0.8b, v2.8b //Adding for FILT121
+ uaddl v22.8h, v1.8b, v3.8b
+ uaddl v24.8h, v2.8b, v4.8b
+ uaddl v26.8h, v3.8b, v5.8b
+ add v24.8h, v20.8h , v24.8h
+ add v26.8h, v22.8h , v26.8h
+ sqrshrun v4.8b, v24.8h, #2
+ sqrshrun v5.8b, v26.8h, #2
+ mov v4.d[1], v5.d[0]
+ //Q2 has all FILT121 values
+ sub x5, x3, #4
+ ext v18.16b, v4.16b , v4.16b , #15
+ st1 {v18.d}[1], [x1], x3
+ ext v16.16b, v18.16b , v18.16b , #15
+ st1 {v16.d}[1], [x1], x3
+ ext v14.16b, v16.16b , v16.16b , #15
+ st1 {v14.d}[1], [x1], x3
+ st1 {v4.s}[1], [x1], #4
+ st1 {v5.s}[0], [x1], x5
+ st1 {v18.s}[1], [x1], #4
+ st1 {v18.s}[2], [x1], x5
+ st1 {v16.s}[1], [x1], #4
+ st1 {v16.s}[2], [x1], x5
+ st1 {v14.s}[1], [x1], #4
+ st1 {v14.s}[2], [x1], x5
+ st1 {v4.8b}, [x1], x3
+
+end_func_diag_dr:
+ // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_8x8_mode_vert_r
+//*
+//* @brief
+//* Perform Intra prediction for luma_8x8 mode:Vertical_Right
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_8x8 mode:Vertical_Right ,described in sec 8.3.2.2.6
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************/
+//void ih264_intra_pred_luma_8x8_mode_vert_r(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_8x8_mode_vert_r_av8
+
+ih264_intra_pred_luma_8x8_mode_vert_r_av8:
+
+ // STMFD sp!, {x4-x12, x14} //store register values to stack
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ ld1 { v0.16b}, [x0]
+ mov v1.d[0], v0.d[1]
+ add x0, x0, #1
+ ld1 { v2.16b}, [x0]
+ mov v3.d[0], v2.d[1]
+ ext v4.16b, v2.16b , v2.16b , #1
+ mov v5.d[0], v4.d[1]
+ // q1 = q0 shifted to left once
+ // q2 = q1 shifted to left once
+ uaddl v20.8h, v0.8b, v2.8b
+ uaddl v22.8h, v1.8b, v3.8b
+ uaddl v24.8h, v2.8b, v4.8b
+ uaddl v26.8h, v3.8b, v5.8b
+ add v24.8h, v20.8h , v24.8h
+ add v26.8h, v22.8h , v26.8h
+
+ sqrshrun v4.8b, v20.8h, #1
+ sqrshrun v5.8b, v22.8h, #1
+ mov v4.d[1], v5.d[0]
+ sqrshrun v6.8b, v24.8h, #2
+ sqrshrun v7.8b, v26.8h, #2
+ mov v6.d[1], v7.d[0]
+ //Q2 has all FILT11 values
+ //Q3 has all FILT121 values
+ sub x5, x3, #6
+ sub x6, x3, #4
+ st1 {v5.8b}, [x1], x3 // row 0
+ ext v18.16b, v6.16b , v6.16b , #15
+ mov v22.16b , v18.16b
+ ext v16.16b, v4.16b , v4.16b , #1
+ st1 {v18.d}[1], [x1], x3 //row 1
+ mov v14.16b , v16.16b
+ ext v20.16b, v4.16b , v4.16b , #15
+ uzp1 v17.16b, v16.16b, v18.16b
+ uzp2 v18.16b, v16.16b, v18.16b
+ mov v16.16b , v17.16b
+ //row 2
+ ext v12.16b, v16.16b , v16.16b , #1
+ st1 {v20.d}[1], [x1]
+ st1 {v6.b}[6], [x1], x3
+ //row 3
+
+ st1 {v12.h}[5], [x1], #2
+ st1 {v6.s}[2], [x1], #4
+ st1 {v6.h}[6], [x1], x5
+ //row 4
+ st1 {v18.h}[5], [x1], #2
+ st1 {v4.s}[2], [x1], #4
+ st1 {v4.h}[6], [x1], x5
+ //row 5
+ ext v26.16b, v18.16b , v18.16b , #1
+ st1 {v16.h}[5], [x1], #2
+ st1 {v22.s}[2], [x1], #4
+ st1 {v22.h}[6], [x1], x5
+ //row 6
+ st1 {v26.h}[4], [x1], #2
+ st1 {v26.b}[10], [x1], #1
+ st1 {v4.b}[8], [x1], #1
+ st1 {v14.s}[2], [x1], x6
+ //row 7
+ st1 {v12.s}[2], [x1], #4
+ st1 {v6.s}[2], [x1], #4
+
+end_func_vert_r:
+ // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_8x8_mode_horz_d
+//*
+//* @brief
+//* Perform Intra prediction for luma_8x8 mode:Horizontal_Down
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_8x8 mode:Horizontal_Down ,described in sec 8.3.2.2.7
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************/
+//void ih264_intra_pred_luma_8x8_mode_horz_d(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+ .global ih264_intra_pred_luma_8x8_mode_horz_d_av8
+
+ih264_intra_pred_luma_8x8_mode_horz_d_av8:
+
+ // STMFD sp!, {x4-x12, x14} //store register values to stack
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ ld1 { v0.16b}, [x0]
+ mov v1.d[0], v0.d[1]
+ add x0, x0, #1
+ ld1 { v2.16b}, [x0]
+ mov v3.d[0], v2.d[1]
+ ext v4.16b, v2.16b , v2.16b , #1
+ mov v5.d[0], v4.d[1]
+ // q1 = q0 shifted to left once
+ // q2 = q1 shifted to left once
+ uaddl v20.8h, v0.8b, v2.8b
+ uaddl v22.8h, v1.8b, v3.8b
+ uaddl v24.8h, v2.8b, v4.8b
+ uaddl v26.8h, v3.8b, v5.8b
+ add v24.8h, v20.8h , v24.8h
+ add v26.8h, v22.8h , v26.8h
+
+ sqrshrun v4.8b, v20.8h, #1
+ sqrshrun v5.8b, v22.8h, #1
+ mov v4.d[1], v5.d[0]
+ sqrshrun v6.8b, v24.8h, #2
+ sqrshrun v7.8b, v26.8h, #2
+ mov v6.d[1], v7.d[0]
+ //Q2 has all FILT11 values
+ //Q3 has all FILT121 values
+ mov v8.16b, v4.16b
+ mov v10.16b, v6.16b
+ sub x6, x3, #6
+ trn1 v9.16b, v8.16b, v10.16b
+ trn2 v10.16b, v8.16b, v10.16b //
+ mov v8.16b, v9.16b
+ mov v12.16b, v8.16b
+ mov v14.16b, v10.16b
+ sub x5, x3, #4
+ trn1 v13.8h, v12.8h, v14.8h
+ trn2 v14.8h, v12.8h, v14.8h
+ mov v12.16b, v13.16b
+ ext v16.16b, v6.16b , v6.16b , #14
+ //ROW 0
+ st1 {v16.d}[1], [x1]
+ st1 {v10.h}[3], [x1], x3
+
+ //ROW 1
+ st1 {v14.s}[1], [x1], #4
+ st1 {v6.s}[2], [x1], x5
+ //ROW 2
+ st1 {v10.h}[2], [x1], #2
+ st1 {v14.s}[1], [x1], #4
+ st1 {v7.h}[0], [x1], x6
+ //ROW 3
+ st1 {v12.s}[1], [x1], #4
+ st1 {v14.s}[1], [x1], x5
+ //ROW 4
+ st1 {v14.h}[1], [x1], #2
+ st1 {v12.s}[1], [x1], #4
+ st1 {v14.h}[2], [x1], x6
+ //ROW 5
+ st1 {v14.s}[0], [x1], #4
+ st1 {v12.s}[1], [x1], x5
+ //ROW 6
+ st1 {v10.h}[0], [x1], #2
+ st1 {v8.h}[1], [x1], #2
+ st1 {v14.h}[1], [x1], #2
+ st1 {v12.h}[2], [x1], x6
+ //ROW 7
+ st1 {v12.s}[0], [x1], #4
+ st1 {v14.s}[0], [x1], x5
+
+end_func_horz_d:
+ // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_8x8_mode_vert_l
+//*
+//* @brief
+//* Perform Intra prediction for luma_8x8 mode:Vertical_Left
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_8x8 mode:Vertical_Left ,described in sec 8.3.2.2.8
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************/
+//void ih264_intra_pred_luma_8x8_mode_vert_l(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+
+ .global ih264_intra_pred_luma_8x8_mode_vert_l_av8
+
+ih264_intra_pred_luma_8x8_mode_vert_l_av8:
+
+ // STMFD sp!, {x4-x12, x14} //Restoring registers from stack
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+ add x0, x0, #9
+ ld1 { v0.16b}, [x0]
+ mov v1.d[0], v0.d[1]
+ add x0, x0, #1
+ ld1 { v2.16b}, [x0]
+ mov v3.d[0], v2.d[1]
+ ext v4.16b, v2.16b , v2.16b , #1
+ mov v5.d[0], v4.d[1]
+ uaddl v20.8h, v0.8b, v2.8b
+ uaddl v22.8h, v1.8b, v3.8b
+ uaddl v24.8h, v2.8b, v4.8b
+ uaddl v26.8h, v3.8b, v5.8b
+ add v24.8h, v20.8h , v24.8h
+ add v26.8h, v22.8h , v26.8h
+
+ sqrshrun v4.8b, v20.8h, #1
+ sqrshrun v5.8b, v22.8h, #1
+ mov v4.d[1], v5.d[0]
+ sqrshrun v6.8b, v24.8h, #2
+ ext v8.16b, v4.16b , v4.16b , #1
+ sqrshrun v7.8b, v26.8h, #2
+ mov v6.d[1], v7.d[0]
+ //Q2 has all FILT11 values
+ //Q3 has all FILT121 values
+
+ ext v10.16b, v6.16b , v6.16b , #1
+ //ROW 0,1
+ st1 {v4.8b}, [x1], x3
+ st1 {v6.8b}, [x1], x3
+
+ ext v12.16b, v8.16b , v8.16b , #1
+ ext v14.16b, v10.16b , v10.16b , #1
+ //ROW 2,3
+ st1 {v8.8b}, [x1], x3
+ st1 {v10.8b}, [x1], x3
+
+ ext v16.16b, v12.16b , v12.16b , #1
+ ext v18.16b, v14.16b , v14.16b , #1
+ //ROW 4,5
+ st1 {v12.8b}, [x1], x3
+ st1 {v14.8b}, [x1], x3
+ //ROW 6,7
+ st1 {v16.8b}, [x1], x3
+ st1 {v18.8b}, [x1], x3
+
+end_func_vert_l:
+ // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//*ih264_intra_pred_luma_8x8_mode_horz_u
+//*
+//* @brief
+//* Perform Intra prediction for luma_8x8 mode:Horizontal_Up
+//*
+//* @par Description:
+//* Perform Intra prediction for luma_8x8 mode:Horizontal_Up ,described in sec 8.3.2.2.9
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] ui_neighboravailability
+//* availability of neighbouring pixels
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************/
+//void ih264_intra_pred_luma_8x8_mode_horz_u(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 ui_neighboravailability)
+
+//**************Variables Vs Registers*****************************************
+// x0 => *pu1_src
+// x1 => *pu1_dst
+// x2 => src_strd
+// x3 => dst_strd
+// x4 => ui_neighboravailability
+
+ .global ih264_intra_pred_luma_8x8_mode_horz_u_av8
+
+ih264_intra_pred_luma_8x8_mode_horz_u_av8:
+
+ // STMFD sp!, {x4-x12, x14} //store register values to stack
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ ld1 {v0.8b}, [x0]
+ ld1 {v1.b}[7], [x0]
+ mov v0.d[1], v1.d[0]
+ ext v2.16b, v0.16b , v0.16b , #1
+ mov v3.d[0], v2.d[1]
+ ext v4.16b, v2.16b , v2.16b , #1
+ mov v5.d[0], v4.d[1]
+
+ adrp x12, :got:ih264_gai1_intrapred_luma_8x8_horz_u
+ ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_luma_8x8_horz_u]
+ uaddl v20.8h, v0.8b, v2.8b
+ uaddl v22.8h, v1.8b, v3.8b
+ uaddl v24.8h, v2.8b, v4.8b
+ uaddl v26.8h, v3.8b, v5.8b
+ add v24.8h, v20.8h , v24.8h
+ add v26.8h, v22.8h , v26.8h
+ ld1 { v10.16b}, [x12]
+ mov v11.d[0], v10.d[1]
+ sqrshrun v4.8b, v20.8h, #1
+ sqrshrun v5.8b, v22.8h, #1
+ mov v4.d[1], v5.d[0]
+ sqrshrun v6.8b, v24.8h, #2
+ sqrshrun v7.8b, v26.8h, #2
+ mov v6.d[1], v7.d[0]
+ //Q2 has all FILT11 values
+ //Q3 has all FILT121 values
+ mov v30.16b, v4.16b
+ mov v31.16b, v6.16b
+ tbl v12.8b, {v30.16b, v31.16b}, v10.8b
+ dup v14.16b, v5.8b[7] //
+ tbl v13.8b, {v30.16b, v31.16b}, v11.8b
+ mov v12.d[1], v13.d[0]
+ ext v16.16b, v12.16b , v14.16b , #2
+ ext v18.16b, v16.16b , v14.16b , #2
+ st1 {v12.8b}, [x1], x3 //0
+ ext v20.16b, v18.16b , v14.16b , #2
+ st1 {v16.8b}, [x1], x3 //1
+ st1 {v18.8b}, [x1], x3 //2
+ st1 {v20.8b}, [x1], x3 //3
+ st1 {v13.8b}, [x1], x3 //4
+ st1 {v16.d}[1], [x1], x3 //5
+ st1 {v18.d}[1], [x1], x3 //6
+ st1 {v20.d}[1], [x1], x3 //7
+
+
+end_func_horz_u:
+ // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
diff --git a/common/armv8/ih264_iquant_itrans_recon_av8.s b/common/armv8/ih264_iquant_itrans_recon_av8.s
new file mode 100755
index 0000000..4c83036
--- /dev/null
+++ b/common/armv8/ih264_iquant_itrans_recon_av8.s
@@ -0,0 +1,778 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+///*******************************************************************************
+// * //file
+// * ih264_iquant_itrans_recon_a9.s
+// *
+// * //brief
+// * Contains function definitions for single stage inverse transform
+// *
+// * //author
+// * Parthiban V
+// * Mohit
+// * Harinarayanaan
+// *
+// * //par List of Functions:
+// * - ih264_iquant_itrans_recon_4x4_av8()
+// * - ih264_iquant_itrans_recon_8x8_av8()
+// * - ih264_iquant_itrans_recon_chroma_4x4_av8()
+// *
+// * //remarks
+// * None
+// *
+// *******************************************************************************
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+///*
+// *******************************************************************************
+// *
+// * //brief
+// * This function performs inverse quant and Inverse transform type Ci4 for 4*4 block
+// *
+// * //par Description:
+// * Performs inverse transform Ci4 and adds the residue to get the
+// * reconstructed block
+// *
+// * //param[in] pi2_src
+// * Input 4x4 coefficients
+// *
+// * //param[in] pu1_pred
+// * Prediction 4x4 block
+// *
+// * //param[out] pu1_out
+// * Output 4x4 block
+// *
+// * //param[in] u4_qp_div_6
+// * QP
+// *
+// * //param[in] pu2_weigh_mat
+// * Pointer to weight matrix
+// *
+// * //param[in] pred_strd,
+// * Prediction stride
+// *
+// * //param[in] out_strd
+// * Output Stride
+// *
+// *//param[in] pi2_tmp
+// * temporary buffer of size 1*16
+// *
+// * //param[in] pu2_iscal_mat
+// * Pointer to the inverse quantization matrix
+// *
+// * //returns Void
+// *
+// * //remarks
+// * None
+// *
+// *******************************************************************************
+// */
+//void ih264_iquant_itrans_recon_4x4(WORD16 *pi2_src,
+// UWORD8 *pu1_pred,
+// UWORD8 *pu1_out,
+// WORD32 pred_strd,
+// WORD32 out_strd,
+// const UWORD16 *pu2_iscal_mat,
+// const UWORD16 *pu2_weigh_mat,
+// UWORD32 u4_qp_div_6,
+// WORD32 *pi4_tmp,
+// WORD32 iq_start_idx
+// WORD16 *pi2_dc_ld_addr)
+//**************Variables Vs Registers*****************************************
+//x0 => *pi2_src
+//x1 => *pu1_pred
+//x2 => *pu1_out
+//x3 => pred_strd
+//x4 => out_strd
+//x5 => *pu2_iscal_mat
+//x6 => *pu2_weigh_mat
+//x7 => u4_qp_div_6
+// => pi4_tmp
+// => iq_start_idx
+// => pi2_dc_ld_addr
+//Only one shift is done in horizontal inverse because,
+//if u4_qp_div_6 is lesser than 4 then shift value will be neagative and do negative left shift, in this case rnd_factor has value
+//if u4_qp_div_6 is greater than 4 then shift value will be positive and do left shift, here rnd_factor is 0
+
+ .global ih264_iquant_itrans_recon_4x4_av8
+ih264_iquant_itrans_recon_4x4_av8:
+
+ push_v_regs
+
+ dup v30.4s, w7 //Populate the u4_qp_div_6 in Q15
+
+ ldr w8, [sp, #72] //Loads iq_start_idx
+ sxtw x8, w8
+
+ ldr x10, [sp, #80] //Load alternate dc address
+
+ subs x8, x8, #1 // if x8 == 1 => intra case , so result of subtraction is zero and z flag is set
+
+
+//=======================DEQUANT FROM HERE===================================
+
+ ld4 {v20.4h - v23.4h}, [x5] // load pu2_iscal_mat[i], i =0..15
+ ld4 {v26.4h - v29.4h}, [x6] // pu2_weigh_mat[i], i =0..15
+ ld4 {v16.4h - v19.4h}, [x0] // pi2_src_tmp[i], i =0..15
+
+
+ mul v20.4h, v20.4h, v26.4h // x[i]=(scale[i] * dequant[i]) where i = 0..3
+ mul v21.4h, v21.4h, v27.4h // x[i]=(scale[i] * dequant[i]) where i = 4..7
+ mul v22.4h, v22.4h, v28.4h // x[i]=(scale[i] * dequant[i]) where i = 8..11
+ mul v23.4h, v23.4h, v29.4h // x[i]=(scale[i] * dequant[i]) where i = 12..14
+
+ smull v0.4s, v16.4h, v20.4h // q0 = p[i] = (x[i] * trns_coeff[i]) where i = 0..3
+ smull v2.4s, v17.4h, v21.4h // q1 = p[i] = (x[i] * trns_coeff[i]) where i = 4..7
+ smull v4.4s, v18.4h, v22.4h // q2 = p[i] = (x[i] * trns_coeff[i]) where i = 8..11
+ smull v6.4s, v19.4h, v23.4h // q3 = p[i] = (x[i] * trns_coeff[i]) where i = 12..15
+
+ sshl v0.4s, v0.4s, v30.4s // q0 = q[i] = (p[i] << (qp/6)) where i = 0..3
+ sshl v2.4s, v2.4s, v30.4s // q1 = q[i] = (p[i] << (qp/6)) where i = 4..7
+ sshl v4.4s, v4.4s, v30.4s // q2 = q[i] = (p[i] << (qp/6)) where i = 8..11
+ sshl v6.4s, v6.4s, v30.4s // q3 = q[i] = (p[i] << (qp/6)) where i = 12..15
+
+ sqrshrn v0.4h, v0.4s, #0x4 // d0 = c[i] = ((q[i] + 32) >> 4) where i = 0..3
+ sqrshrn v1.4h, v2.4s, #0x4 // d1 = c[i] = ((q[i] + 32) >> 4) where i = 4..7
+ sqrshrn v2.4h, v4.4s, #0x4 // d2 = c[i] = ((q[i] + 32) >> 4) where i = 8..11
+ sqrshrn v3.4h, v6.4s, #0x4 // d3 = c[i] = ((q[i] + 32) >> 4) where i = 12..15
+
+ bne skip_loading_luma_dc_src
+ ld1 {v0.h}[0], [x10] // loads signed halfword pi2_dc_ld_addr[0], if x8==1
+skip_loading_luma_dc_src:
+
+ //========= PROCESS IDCT FROM HERE =======
+ //Steps for Stage 1:
+ //------------------
+ ld1 {v30.s}[0], [x1], x3 // i row load pu1_pred buffer
+
+ sshr v8.4h, v1.4h, #1 // d1>>1
+ sshr v9.4h, v3.4h, #1 // d3>>1
+
+ add v4.4h, v0.4h, v2.4h // x0 = d0 + d2//
+ sub v5.4h, v0.4h, v2.4h // x1 = d0 - d2//
+ sub v6.4h, v8.4h, v3.4h // x2 = (d1 >> 1) - d3//
+ add v7.4h, v1.4h, v9.4h // x3 = d1 + (d3 >> 1)//
+
+ ld1 {v30.s}[1], [x1], x3 // ii row load pu1_pred buffer
+
+ add v10.4h, v4.4h , v7.4h // x0+x3
+ add v11.4h, v5.4h , v6.4h // x1+x2
+ sub v12.4h, v5.4h , v6.4h // x1-x2
+ sub v13.4h, v4.4h , v7.4h
+
+ ld1 {v31.s}[0], [x1], x3 // iii row load pu1_pred buf
+
+
+ //Steps for Stage 2:
+ //transopose
+ trn1 v4.4h, v10.4h, v11.4h
+ trn2 v5.4h, v10.4h, v11.4h
+ trn1 v6.4h, v12.4h, v13.4h
+ trn2 v7.4h, v12.4h, v13.4h
+
+ trn1 v10.2s, v4.2s, v6.2s // 0
+ trn1 v11.2s, v5.2s, v7.2s // 8
+ trn2 v12.2s, v4.2s, v6.2s // 4
+ trn2 v13.2s, v5.2s, v7.2s
+ //end transpose
+
+ sshr v18.4h, v11.4h, #1 // q0>>1
+ sshr v19.4h, v13.4h, #1 // q1>>1
+
+ add v14.4h, v10.4h, v12.4h // x0 = q0 + q2//
+ sub v15.4h, v10.4h, v12.4h // x1 = q0 - q2//
+ sub v16.4h, v18.4h, v13.4h // x2 = (q1 >> 1) - q3//
+ add v17.4h, v11.4h, v19.4h // x3 = q1+ (q3 >> 3)//
+
+
+ ld1 {v31.s}[1], [x1], x3 // iv row load pu1_pred buffer
+
+ add v20.4h, v14.4h, v17.4h // x0 + x3
+ add v21.4h, v15.4h, v16.4h // x1 + x2
+ sub v22.4h, v15.4h, v16.4h // x1 - x2
+ sub v23.4h, v14.4h, v17.4h // x0 - x3
+
+ mov v20.d[1], v21.d[0]
+ mov v22.d[1], v23.d[0]
+
+ srshr v20.8h, v20.8h, #6
+ srshr v22.8h, v22.8h, #6
+
+ uaddw v20.8h, v20.8h , v30.8b
+ uaddw v22.8h, v22.8h , v31.8b
+
+ sqxtun v0.8b, v20.8h
+ sqxtun v1.8b, v22.8h
+
+ st1 {v0.s}[0], [x2], x4 //i row store the value
+ st1 {v0.s}[1], [x2], x4 //ii row store the value
+ st1 {v1.s}[0], [x2], x4 //iii row store the value
+ st1 {v1.s}[1], [x2] //iv row store the value
+
+ pop_v_regs
+ ret
+
+
+///**
+// *******************************************************************************
+// *
+// * @brief
+// * This function performs inverse quant and Inverse transform type Ci4 for 4*4 block
+// *
+// * @par Description:
+// * Performs inverse transform Ci4 and adds the residue to get the
+// * reconstructed block
+// *
+// * @param[in] pi2_src
+// * Input 4x4 coefficients
+// *
+// * @param[in] pu1_pred
+// * Prediction 4x4 block
+// *
+// * @param[out] pu1_out
+// * Output 4x4 block
+// *
+// * @param[in] u4_qp_div_6
+// * QP
+// *
+// * @param[in] pu2_weigh_mat
+// * Pointer to weight matrix
+// *
+// * @param[in] pred_strd,
+// * Prediction stride
+// *
+// * @param[in] out_strd
+// * Output Stride
+// *
+// *@param[in] pi2_tmp
+// * temporary buffer of size 1*16
+// *
+// * @param[in] pu2_iscal_mat
+// * Pointer to the inverse quantization matrix
+// *
+// * @returns Void
+// *
+// * @remarks
+// * None
+// *
+// *******************************************************************************
+// */
+//void ih264_iquant_itrans_recon_chroma_4x4(WORD16 *pi2_src,
+// UWORD8 *pu1_pred,
+// UWORD8 *pu1_out,
+// WORD32 pred_strd,
+// WORD32 out_strd,
+// const UWORD16 *pu2_iscal_mat,
+// const UWORD16 *pu2_weigh_mat,
+// UWORD32 u4_qp_div_6,
+// WORD32 *pi4_tmp
+// WORD16 *pi2_dc_src)
+//**************Variables Vs Registers*****************************************
+//x0 => *pi2_src
+//x1 => *pu1_pred
+//x2 => *pu1_out
+//x3 => pred_strd
+//x4 => out_strd
+//x5 => *pu2_iscal_mat
+//x6 => *pu2_weigh_mat
+//x7 => u4_qp_div_6
+//sp => pi4_tmp
+//sp#8 => *pi2_dc_src
+
+ .global ih264_iquant_itrans_recon_chroma_4x4_av8
+ih264_iquant_itrans_recon_chroma_4x4_av8:
+
+//VLD4.S16 is used because the pointer is incremented by SUB_BLK_WIDTH_4x4
+//If the macro value changes need to change the instruction according to it.
+//Only one shift is done in horizontal inverse because,
+//if u4_qp_div_6 is lesser than 4 then shift value will be neagative and do negative left shift, in this case rnd_factor has value
+//if u4_qp_div_6 is greater than 4 then shift value will be positive and do left shift, here rnd_factor is 0
+
+//at the end of the fucntion, we could have moved 64 bits into heigher 64 bits of register and done further processing
+//but it seem to give only reduce the number of instruction by 1. [Since a15 we saw add and sub to be very high throughput
+//all instructions were taken as equal
+
+ //reduce sp by 64
+ push_v_regs
+
+ dup v30.4s, w7 //Populate the u4_qp_div_6 in Q15
+
+ //was at sp + 8, hence now at sp+64+8 = sp+72
+ ldr x10, [sp, #72] //Load alternate dc address
+
+//=======================DEQUANT FROM HERE===================================
+
+ ld4 {v20.4h - v23.4h}, [x5] // load pu2_iscal_mat[i], i =0..15
+ ld4 {v26.4h - v29.4h}, [x6] // pu2_weigh_mat[i], i =0..15
+ ld4 {v16.4h - v19.4h}, [x0] // pi2_src_tmp[i], i =0..15
+
+
+ mul v20.4h, v20.4h, v26.4h // x[i]=(scale[i] * dequant[i]) where i = 0..3
+ mul v21.4h, v21.4h, v27.4h // x[i]=(scale[i] * dequant[i]) where i = 4..7
+ mul v22.4h, v22.4h, v28.4h // x[i]=(scale[i] * dequant[i]) where i = 8..11
+ mul v23.4h, v23.4h, v29.4h // x[i]=(scale[i] * dequant[i]) where i = 12..14
+
+ smull v0.4s, v16.4h, v20.4h // q0 = p[i] = (x[i] * trns_coeff[i]) where i = 0..3
+ smull v2.4s, v17.4h, v21.4h // q1 = p[i] = (x[i] * trns_coeff[i]) where i = 4..7
+ smull v4.4s, v18.4h, v22.4h // q2 = p[i] = (x[i] * trns_coeff[i]) where i = 8..11
+ smull v6.4s, v19.4h, v23.4h // q3 = p[i] = (x[i] * trns_coeff[i]) where i = 12..15
+
+ sshl v0.4s, v0.4s, v30.4s // q0 = q[i] = (p[i] << (qp/6)) where i = 0..3
+ sshl v2.4s, v2.4s, v30.4s // q1 = q[i] = (p[i] << (qp/6)) where i = 4..7
+ sshl v4.4s, v4.4s, v30.4s // q2 = q[i] = (p[i] << (qp/6)) where i = 8..11
+ sshl v6.4s, v6.4s, v30.4s // q3 = q[i] = (p[i] << (qp/6)) where i = 12..15
+
+ sqrshrn v0.4h, v0.4s, #0x4 // d0 = c[i] = ((q[i] + 32) >> 4) where i = 0..3
+ sqrshrn v1.4h, v2.4s, #0x4 // d1 = c[i] = ((q[i] + 32) >> 4) where i = 4..7
+ sqrshrn v2.4h, v4.4s, #0x4 // d2 = c[i] = ((q[i] + 32) >> 4) where i = 8..11
+ sqrshrn v3.4h, v6.4s, #0x4 // d3 = c[i] = ((q[i] + 32) >> 4) where i = 12..15
+
+ ld1 {v0.h}[0], [x10] // loads signed halfword pi2_dc_src[0]
+
+ //========= PROCESS IDCT FROM HERE =======
+ //Steps for Stage 1:
+ //------------------
+
+ sshr v8.4h, v1.4h, #1 // d1>>1
+ sshr v9.4h, v3.4h, #1 // d3>>1
+
+ add v4.4h, v0.4h, v2.4h // x0 = d0 + d2//
+ sub v5.4h, v0.4h, v2.4h // x1 = d0 - d2//
+ sub v6.4h, v8.4h, v3.4h // x2 = (d1 >> 1) - d3//
+ add v7.4h, v1.4h, v9.4h // x3 = d1 + (d3 >> 1)//
+
+
+ add v10.4h, v4.4h , v7.4h // x0+x3
+ add v11.4h, v5.4h , v6.4h // x1+x2
+ sub v12.4h, v5.4h , v6.4h // x1-x2
+ sub v13.4h, v4.4h , v7.4h
+
+ ld1 {v26.8b}, [x1], x3 // i row load pu1_pred buffer
+ ld1 {v27.8b}, [x1], x3 // ii row load pu1_pred buffer
+ ld1 {v28.8b}, [x1], x3 // iii row load pu1_pred buf
+ ld1 {v29.8b}, [x1], x3 // iv row load pu1_pred buffer
+
+ //Steps for Stage 2:
+ //transopose
+ trn1 v4.4h, v10.4h, v11.4h
+ trn2 v5.4h, v10.4h, v11.4h
+ trn1 v6.4h, v12.4h, v13.4h
+ trn2 v7.4h, v12.4h, v13.4h
+
+ trn1 v10.2s, v4.2s, v6.2s // 0
+ trn1 v11.2s, v5.2s, v7.2s // 8
+ trn2 v12.2s, v4.2s, v6.2s // 4
+ trn2 v13.2s, v5.2s, v7.2s
+ //end transpose
+
+ sshr v18.4h, v11.4h, #1 // q0>>1
+ sshr v19.4h, v13.4h, #1 // q1>>1
+
+ add v14.4h, v10.4h, v12.4h // x0 = q0 + q2//
+ sub v15.4h, v10.4h, v12.4h // x1 = q0 - q2//
+ sub v16.4h, v18.4h, v13.4h // x2 = (q1 >> 1) - q3//
+ add v17.4h, v11.4h, v19.4h // x3 = q1+ (q3 >> 3)//
+
+ //Backup the output addr
+ mov x0, x2
+
+ //load outpt buufer for interleaving
+ ld1 {v10.8b}, [x2], x4
+ ld1 {v11.8b}, [x2], x4
+ ld1 {v12.8b}, [x2], x4
+ ld1 {v13.8b}, [x2]
+
+ add v20.4h, v14.4h, v17.4h // x0 + x3
+ add v21.4h, v15.4h, v16.4h // x1 + x2
+ sub v22.4h, v15.4h, v16.4h // x1 - x2
+ sub v23.4h, v14.4h, v17.4h // x0 - x3
+
+ srshr v20.4h, v20.4h, #6
+ srshr v21.4h, v21.4h, #6
+ srshr v22.4h, v22.4h, #6
+ srshr v23.4h, v23.4h, #6
+
+ //nop v30.8b //dummy for deinterleaving
+ movi v31.4h, #0x00ff //mask for interleaving [copy lower 8 bits]
+
+ //Extract u/v plane from interleaved data
+ uzp1 v26.8b, v26.8b, v30.8b
+ uzp1 v27.8b, v27.8b, v30.8b
+ uzp1 v28.8b, v28.8b, v30.8b
+ uzp1 v29.8b, v29.8b, v30.8b
+
+ uaddw v20.8h, v20.8h, v26.8b
+ uaddw v21.8h, v21.8h, v27.8b
+ uaddw v22.8h, v22.8h, v28.8b
+ uaddw v23.8h, v23.8h, v29.8b
+
+ sqxtun v0.8b, v20.8h
+ sqxtun v1.8b, v21.8h
+ sqxtun v2.8b, v22.8h
+ sqxtun v3.8b, v23.8h
+
+ //long the output so that we have 0 at msb and value at lsb
+ uxtl v6.8h, v0.8b
+ uxtl v7.8h, v1.8b
+ uxtl v8.8h, v2.8b
+ uxtl v9.8h, v3.8b
+
+ //select lsbs from proceesd data and msbs from pu1_out loaded data
+ bit v10.8b, v6.8b, v31.8b
+ bit v11.8b, v7.8b, v31.8b
+ bit v12.8b, v8.8b, v31.8b
+ bit v13.8b, v9.8b, v31.8b
+
+ //store the interleaved result
+ st1 {v10.8b}, [x0], x4
+ st1 {v11.8b}, [x0], x4
+ st1 {v12.8b}, [x0], x4
+ st1 {v13.8b}, [x0]
+
+ pop_v_regs
+ ret
+
+///*
+// *******************************************************************************
+// *
+// * //brief
+// * This function performs inverse quant and Inverse transform type Ci4 for 8*8 block
+// *
+// * //par Description:
+// * Performs inverse transform Ci8 and adds the residue to get the
+// * reconstructed block
+// *
+// * //param[in] pi2_src
+// * Input 4x4 coefficients
+// *
+// * //param[in] pu1_pred
+// * Prediction 4x4 block
+// *
+// * //param[out] pu1_out
+// * Output 4x4 block
+// *
+// * //param[in] u4_qp_div_6
+// * QP
+// *
+// * //param[in] pu2_weigh_mat
+// * Pointer to weight matrix
+// *
+// * //param[in] pred_strd,
+// * Prediction stride
+// *
+// * //param[in] out_strd
+// * Output Stride
+// *
+// *//param[in] pi2_tmp
+// * temporary buffer of size 1*64
+// *
+// * //param[in] pu2_iscal_mat
+// * Pointer to the inverse quantization matrix
+// *
+// * //returns Void
+// *
+// * //remarks
+// * None
+// *
+// *******************************************************************************
+// */
+//void ih264_iquant_itrans_recon_8x8(WORD16 *pi2_src,
+// UWORD8 *pu1_pred,
+// UWORD8 *pu1_out,
+// WORD32 pred_strd,
+// WORD32 out_strd,
+// const UWORD16 *pu2_iscal_mat,
+// const UWORD16 *pu2_weigh_mat,
+// UWORD32 u4_qp_div_6,
+// WORD32 *pi4_tmp,
+// WORD32 iq_start_idx
+// WORD16 *pi2_dc_ld_addr)
+//**************Variables Vs Registers*****************************************
+//x0 => *pi2_src
+//x1 => *pu1_pred
+//x2 => *pu1_out
+//x3 => pred_strd
+//x4 => out_strd
+//x5 => *pu2_iscal_mat
+//x6 => *pu2_weigh_mat
+//x7 => u4_qp_div_6
+//NOT USED => pi4_tmp
+//NOT USED => iq_start_idx
+//NOT USED => pi2_dc_ld_addr
+
+ .global ih264_iquant_itrans_recon_8x8_av8
+ih264_iquant_itrans_recon_8x8_av8:
+
+ push_v_regs
+
+ ld1 {v8.8h -v11.8h}, [x5], #64
+ ld1 {v12.8h-v15.8h}, [x5]
+
+ ld1 {v16.8h -v19.8h}, [x6], #64
+ ld1 {v20.8h -v23.8h}, [x6]
+
+ mov x8, #16
+ ld1 {v0.8h}, [x0], x8
+ ld1 {v1.8h}, [x0], x8
+ ld1 {v2.8h}, [x0], x8
+ ld1 {v3.8h}, [x0], x8
+ ld1 {v4.8h}, [x0], x8
+ ld1 {v5.8h}, [x0], x8
+ ld1 {v6.8h}, [x0], x8
+ ld1 {v7.8h}, [x0]
+
+ mul v8.8h, v8.8h, v16.8h
+ mul v9.8h, v9.8h, v17.8h
+ mul v10.8h, v10.8h, v18.8h
+ mul v11.8h, v11.8h, v19.8h
+ mul v12.8h, v12.8h, v20.8h
+ mul v13.8h, v13.8h, v21.8h
+ mul v14.8h, v14.8h, v22.8h
+ mul v15.8h, v15.8h, v23.8h
+
+ smull v16.4s, v0.4h, v8.4h
+ smull2 v17.4s, v0.8h, v8.8h
+ smull v18.4s, v1.4h, v9.4h
+ smull2 v19.4s, v1.8h, v9.8h
+ smull v20.4s, v2.4h, v10.4h
+ smull2 v21.4s, v2.8h, v10.8h
+ smull v22.4s, v3.4h, v11.4h
+ smull2 v23.4s, v3.8h, v11.8h
+ smull v24.4s, v4.4h, v12.4h
+ smull2 v25.4s, v4.8h, v12.8h
+ smull v26.4s, v5.4h, v13.4h
+ smull2 v27.4s, v5.8h, v13.8h
+ smull v28.4s, v6.4h, v14.4h
+ smull2 v29.4s, v6.8h, v14.8h
+ smull v30.4s, v7.4h, v15.4h
+ smull2 v31.4s, v7.8h, v15.8h
+
+ dup v0.4s, w7
+
+ sshl v16.4s, v16.4s, v0.4s
+ sshl v17.4s, v17.4s, v0.4s
+ sshl v18.4s, v18.4s, v0.4s
+ sshl v19.4s, v19.4s, v0.4s
+ sshl v20.4s, v20.4s, v0.4s
+ sshl v21.4s, v21.4s, v0.4s
+ sshl v22.4s, v22.4s, v0.4s
+ sshl v23.4s, v23.4s, v0.4s
+ sshl v24.4s, v24.4s, v0.4s
+ sshl v25.4s, v25.4s, v0.4s
+ sshl v26.4s, v26.4s, v0.4s
+ sshl v27.4s, v27.4s, v0.4s
+ sshl v28.4s, v28.4s, v0.4s
+ sshl v29.4s, v29.4s, v0.4s
+ sshl v30.4s, v30.4s, v0.4s
+ sshl v31.4s, v31.4s, v0.4s
+
+ sqrshrn v0.4h, v16.4s, #6
+ sqrshrn2 v0.8h, v17.4s, #6
+ sqrshrn v1.4h, v18.4s, #6
+ sqrshrn2 v1.8h, v19.4s, #6
+ sqrshrn v2.4h, v20.4s, #6
+ sqrshrn2 v2.8h, v21.4s, #6
+ sqrshrn v3.4h, v22.4s, #6
+ sqrshrn2 v3.8h, v23.4s, #6
+ sqrshrn v4.4h, v24.4s, #6
+ sqrshrn2 v4.8h, v25.4s, #6
+ sqrshrn v5.4h, v26.4s, #6
+ sqrshrn2 v5.8h, v27.4s, #6
+ sqrshrn v6.4h, v28.4s, #6
+ sqrshrn2 v6.8h, v29.4s, #6
+ sqrshrn v7.4h, v30.4s, #6
+ sqrshrn2 v7.8h, v31.4s, #6
+
+ //loop counter
+ mov x8, #2
+//1x8 transofORM
+trans_1x8_1d:
+
+ //transpose 8x8
+ trn1 v8.8h, v0.8h, v1.8h
+ trn2 v9.8h, v0.8h, v1.8h
+ trn1 v10.8h, v2.8h, v3.8h
+ trn2 v11.8h, v2.8h, v3.8h
+ trn1 v12.8h, v4.8h, v5.8h
+ trn2 v13.8h, v4.8h, v5.8h
+ trn1 v14.8h, v6.8h, v7.8h
+ trn2 v15.8h, v6.8h, v7.8h
+
+ trn1 v0.4s, v8.4s, v10.4s
+ trn2 v2.4s, v8.4s, v10.4s
+ trn1 v1.4s, v9.4s, v11.4s
+ trn2 v3.4s, v9.4s, v11.4s
+ trn1 v4.4s, v12.4s, v14.4s
+ trn2 v6.4s, v12.4s, v14.4s
+ trn1 v5.4s, v13.4s, v15.4s
+ trn2 v7.4s, v13.4s, v15.4s
+
+ trn1 v8.2d, v0.2d, v4.2d //0
+ trn2 v12.2d, v0.2d, v4.2d //1
+ trn1 v9.2d, v1.2d, v5.2d //2
+ trn2 v13.2d, v1.2d, v5.2d //3
+ trn1 v10.2d, v2.2d, v6.2d //4
+ trn2 v14.2d, v2.2d, v6.2d //5
+ trn1 v11.2d, v3.2d, v7.2d //6
+ trn2 v15.2d, v3.2d, v7.2d //7
+
+ // 1 3 5 6 7
+ sshr v16.8h, v9.8h, #1 //(pi2_tmp_ptr[1] >> 1)
+ sshr v17.8h, v10.8h, #1 //(pi2_tmp_ptr[2] >> 1)
+ sshr v18.8h, v11.8h, #1 //(pi2_tmp_ptr[3] >> 1)
+ sshr v19.8h, v13.8h, #1 //(pi2_tmp_ptr[5] >> 1)
+ sshr v20.8h, v14.8h, #1 //(pi2_tmp_ptr[6] >> 1)
+ sshr v21.8h, v15.8h, #1 //(pi2_tmp_ptr[7] >> 1)
+
+ add v0.8h, v8.8h, v12.8h // i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4] );
+ sub v2.8h, v8.8h, v12.8h // i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4] );
+
+ sub v4.8h, v17.8h, v14.8h //i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6] );
+ add v6.8h, v10.8h, v20.8h //i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1));
+
+ //-w3 + w5
+ ssubl v22.4s, v13.4h, v11.4h
+ ssubl2 v23.4s, v13.8h, v11.8h
+ //w3 + w5
+ saddl v24.4s, v13.4h, v11.4h
+ saddl2 v25.4s, v13.8h, v11.8h
+ //-w1 + w7
+ ssubl v26.4s, v15.4h, v9.4h
+ ssubl2 v27.4s, v15.8h, v9.8h
+ //w1 + w7
+ saddl v28.4s, v15.4h, v9.4h
+ saddl2 v29.4s, v15.8h, v9.8h
+
+ //-w3 + w5 - w7
+ ssubw v22.4s, v22.4s, v15.4h
+ ssubw2 v23.4s, v23.4s, v15.8h
+ //w3 + w5 + w1
+ saddw v24.4s, v24.4s, v9.4h
+ saddw2 v25.4s, v25.4s, v9.8h
+ //-w1 + w7 + w5
+ saddw v26.4s, v26.4s, v13.4h
+ saddw2 v27.4s, v27.4s, v13.8h
+ //w1 + w7 - w3
+ ssubw v28.4s, v28.4s, v11.4h
+ ssubw2 v29.4s, v29.4s, v11.8h
+
+ //-w3 + w5 - w7 - (w7 >> 1)
+ ssubw v22.4s, v22.4s, v21.4h
+ ssubw2 v23.4s, v23.4s, v21.8h
+ //w3 + w5 + w1 + (w1 >> 1)
+ saddw v24.4s, v24.4s, v16.4h
+ saddw2 v25.4s, v25.4s, v16.8h
+ //-w1 + w7 + w5 + (w5 >> 1)
+ saddw v26.4s, v26.4s, v19.4h
+ saddw2 v27.4s, v27.4s, v19.8h
+ //w1 + w7 - w3 - (w3 >> 1)
+ ssubw v28.4s, v28.4s, v18.4h
+ ssubw2 v29.4s, v29.4s, v18.8h
+
+ xtn v1.4h, v22.4s
+ xtn2 v1.8h, v23.4s
+ xtn v3.4h, v28.4s
+ xtn2 v3.8h, v29.4s
+ xtn v5.4h, v26.4s
+ xtn2 v5.8h, v27.4s
+ xtn v7.4h, v24.4s
+ xtn2 v7.8h, v25.4s
+
+ sshr v16.8h, v1.8h, #2 //(y1 >> 2)
+ sshr v17.8h, v3.8h, #2 //(y3 >> 2)
+ sshr v18.8h, v5.8h, #2 //(y5 >> 2)
+ sshr v19.8h, v7.8h, #2 //(y7 >> 2)
+
+ add v8.8h, v0.8h, v6.8h
+ add v9.8h, v1.8h, v19.8h
+ add v10.8h, v2.8h, v4.8h
+ add v11.8h, v3.8h, v18.8h
+ sub v12.8h, v2.8h, v4.8h
+ sub v13.8h, v17.8h, v5.8h
+ sub v14.8h, v0.8h, v6.8h
+ sub v15.8h, v7.8h, v16.8h
+
+ add v0.8h, v8.8h, v15.8h
+ add v1.8h, v10.8h, v13.8h
+ add v2.8h, v12.8h, v11.8h
+ add v3.8h, v14.8h, v9.8h
+ sub v4.8h, v14.8h, v9.8h
+ sub v5.8h, v12.8h, v11.8h
+ sub v6.8h, v10.8h, v13.8h
+ sub v7.8h, v8.8h, v15.8h
+
+ subs x8, x8, #1
+ bne trans_1x8_1d
+
+ ld1 {v22.8b}, [x1], x3
+ ld1 {v23.8b}, [x1], x3
+ ld1 {v24.8b}, [x1], x3
+ ld1 {v25.8b}, [x1], x3
+ ld1 {v26.8b}, [x1], x3
+ ld1 {v27.8b}, [x1], x3
+ ld1 {v28.8b}, [x1], x3
+ ld1 {v29.8b}, [x1]
+
+ srshr v0.8h, v0.8h, #6
+ srshr v1.8h, v1.8h, #6
+ srshr v2.8h, v2.8h, #6
+ srshr v3.8h, v3.8h, #6
+ srshr v4.8h, v4.8h, #6
+ srshr v5.8h, v5.8h, #6
+ srshr v6.8h, v6.8h, #6
+ srshr v7.8h, v7.8h, #6
+
+ uaddw v0.8h, v0.8h, v22.8b
+ uaddw v1.8h, v1.8h, v23.8b
+ uaddw v2.8h, v2.8h, v24.8b
+ uaddw v3.8h, v3.8h, v25.8b
+ uaddw v4.8h, v4.8h, v26.8b
+ uaddw v5.8h, v5.8h, v27.8b
+ uaddw v6.8h, v6.8h, v28.8b
+ uaddw v7.8h, v7.8h, v29.8b
+
+ sqxtun v0.8b, v0.8h
+ sqxtun v1.8b, v1.8h
+ sqxtun v2.8b, v2.8h
+ sqxtun v3.8b, v3.8h
+ sqxtun v4.8b, v4.8h
+ sqxtun v5.8b, v5.8h
+ sqxtun v6.8b, v6.8h
+ sqxtun v7.8b, v7.8h
+
+ st1 {v0.8b}, [x2], x4
+ st1 {v1.8b}, [x2], x4
+ st1 {v2.8b}, [x2], x4
+ st1 {v3.8b}, [x2], x4
+ st1 {v4.8b}, [x2], x4
+ st1 {v5.8b}, [x2], x4
+ st1 {v6.8b}, [x2], x4
+ st1 {v7.8b}, [x2]
+
+ pop_v_regs
+ ret
+
+
+
+
diff --git a/common/armv8/ih264_iquant_itrans_recon_dc_av8.s b/common/armv8/ih264_iquant_itrans_recon_dc_av8.s
new file mode 100755
index 0000000..8bb9c32
--- /dev/null
+++ b/common/armv8/ih264_iquant_itrans_recon_dc_av8.s
@@ -0,0 +1,397 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+// *******************************************************************************
+// * @file
+// * ih264_iquant_itrans_recon_dc_av8.s
+// *
+// * @brief
+// * Contains function definitions for single stage inverse transform
+// *
+// * @author
+// * Mohit
+// *
+// * @par List of Functions:
+// * - ih264_iquant_itrans_recon_4x4_dc_av8()
+// * - ih264_iquant_itrans_recon_8x8_dc_av8()
+// * - ih264_iquant_itrans_recon_chroma_4x4_dc_av8()
+// *
+// * @remarks
+// * None
+// *
+// *******************************************************************************
+//*/
+
+
+.include "ih264_neon_macros.s"
+
+
+///**
+// *******************************************************************************
+// *
+// * @brief
+// * This function performs inverse quant and Inverse transform type Ci4 for 4*4 block
+// * for dc input pattern only, i.e. only the (0,0) element of the input 4x4 block is
+// * non-zero. For complete function, refer ih264_iquant_itrans_recon_a9.s
+// *
+// * @par Description:
+// * Performs inverse transform Ci4 and adds the residue to get the
+// * reconstructed block
+// *
+// * @param[in] pi2_src
+// * Input 4x4 coefficients
+// *
+// * @param[in] pu1_pred
+// * Prediction 4x4 block
+// *
+// * @param[out] pu1_out
+// * Output 4x4 block
+// *
+// * @param[in] u4_qp_div_6
+// * QP
+// *
+// * @param[in] pu2_weigh_mat
+// * Pointer to weight matrix
+// *
+// * @param[in] pred_strd,
+// * Prediction stride
+// *
+// * @param[in] out_strd
+// * Output Stride
+// *
+// *@param[in] pi2_tmp
+// * temporary buffer of size 1*16
+// *
+// * @param[in] pu2_iscal_mat
+// * Pointer to the inverse quantization matrix
+// *
+// * @returns Void
+// *
+// * @remarks
+// * None
+// *
+// *******************************************************************************
+// */
+//void ih264_iquant_itrans_recon_4x4_dc(WORD16 *pi2_src,
+// UWORD8 *pu1_pred,
+// UWORD8 *pu1_out,
+// WORD32 pred_strd,
+// WORD32 out_strd,
+// const UWORD16 *pu2_iscal_mat,
+// const UWORD16 *pu2_weigh_mat,
+// UWORD32 u4_qp_div_6,
+// WORD32 *pi4_tmp,
+// WORD32 iq_start_idx
+// WORD16 *pi2_dc_ld_addr)
+//**************Variables Vs Registers*****************************************
+//x0 => *pi2_src
+//x1 => *pu1_pred
+//x2 => *pu1_out
+//x3 => pred_strd
+//x4 => out_strd
+//x5 => *pu2_iscal_mat
+//x6 => *pu2_weigh_mat
+//x7 => u4_qp_div_6
+// => pi4_tmp
+// => iq_start_idx
+// => pi2_dc_ld_addr
+
+.text
+.p2align 2
+
+ .global ih264_iquant_itrans_recon_4x4_dc_av8
+ih264_iquant_itrans_recon_4x4_dc_av8:
+
+ ldr w8, [sp, #8] //Loads iq_start_idx
+ subs w8, w8, #1 // if x8 == 1 => intra case , so result of subtraction is zero and z flag is set
+
+ ldr x10, [sp, #16] //Load alternate dc address
+ push_v_regs
+ dup v30.4s, w7 //Populate the u4_qp_div_6 in Q15
+
+
+ bne donot_use_pi2_dc_ld_addr_luma_dc
+ ld1 {v0.h}[0], [x10]
+donot_use_pi2_dc_ld_addr_luma_dc:
+
+ beq donot_use_pi2_src_luma_dc
+ ld1 {v0.h}[0], [x5]
+ ld1 {v1.h}[0], [x6]
+ ld1 {v2.h}[0], [x0]
+ mul v0.4h, v1.4h, v0.4h
+ smull v0.4s, v0.4h, v2.4h
+ sshl v0.4s, v0.4s, v30.4s
+ sqrshrn v0.4h, v0.4s, #4
+donot_use_pi2_src_luma_dc:
+
+
+ dup v0.8h, v0.h[0]
+ srshr v0.8h, v0.8h, #6
+
+ ld1 {v1.s}[0], [x1], x3
+ ld1 {v1.s}[1], [x1], x3
+ ld1 {v2.s}[0], [x1], x3
+ ld1 {v2.s}[1], [x1]
+
+ uxtl v1.8h, v1.8b
+ uxtl v2.8h, v2.8b
+
+ add v1.8h, v0.8h, v1.8h
+ add v2.8h, v0.8h, v2.8h
+
+ sqxtun v1.8b, v1.8h
+ sqxtun v2.8b, v2.8h
+
+ st1 {v1.s}[0], [x2], x4
+ st1 {v1.s}[1], [x2], x4
+ st1 {v2.s}[0], [x2], x4
+ st1 {v2.s}[1], [x2]
+ pop_v_regs
+ ret
+
+// /*
+// ********************************************************************************
+// *
+// * @brief This function reconstructs a 4x4 sub block from quantized resiude and
+// * prediction buffer if only dc value is present for residue
+// *
+// * @par Description:
+// * The quantized residue is first inverse quantized,
+// * This inverse quantized content is added to the prediction buffer to recon-
+// * struct the end output
+// *
+// * @param[in] pi2_src
+// * quantized dc coeffiient
+// *
+// * @param[in] pu1_pred
+// * prediction 4x4 block in interleaved format
+// *
+// * @param[in] pred_strd,
+// * Prediction buffer stride in interleaved format
+// *
+// * @param[in] out_strd
+// * recon buffer Stride
+// *
+// * @returns none
+// *
+// * @remarks none
+// *
+// *******************************************************************************
+// */
+// void ih264_iquant_itrans_recon_chroma_4x4_dc(WORD16 *pi2_src,
+// UWORD8 *pu1_pred,
+// UWORD8 *pu1_out,
+// WORD32 pred_strd,
+// WORD32 out_strd,
+// const UWORD16 *pu2_iscal_mat,
+// const UWORD16 *pu2_weigh_mat,
+// UWORD32 u4_qp_div_6,
+// WORD16 *pi2_tmp,
+// WORD16 *pi2_dc_src)
+// Register Usage
+// x0 : pi2_src
+// x1 : pu1_pred
+// x2 : pu1_out
+// x3 : pred_strd
+// x4 : out_strd
+// x5 : pu2_iscal_mat
+// x6 : pu2_weigh_mat
+// x7 : u4_qp_div_6
+// : pi2_tmp
+// : pi2_dc_src
+// Neon registers d0-d7, d16-d30 are used
+// No need for pushing arm and neon registers
+
+
+ .global ih264_iquant_itrans_recon_chroma_4x4_dc_av8
+ih264_iquant_itrans_recon_chroma_4x4_dc_av8:
+
+ ldr x0, [sp, #8]
+ push_v_regs
+ ld1 {v0.h}[0], [x0]
+ dup v0.8h, v0.h[0]
+ srshr v0.8h, v0.8h, #6
+
+
+ //backup pu1_out
+ mov x0, x2
+
+ //nop v3.16b //dummy for deinterleaving
+ movi v31.8h, #0x00ff //mask for interleaving [copy lower 8 bits]
+
+ ld1 {v1.d}[0], [x1], x3
+ ld1 {v1.d}[1], [x1], x3
+ ld1 {v2.d}[0], [x1], x3
+ ld1 {v2.d}[1], [x1], x3
+
+ ld1 {v11.d}[0], [x2], x4 //load pu1_out for interleaving
+ ld1 {v11.d}[1], [x2], x4
+ ld1 {v12.d}[0], [x2], x4
+ ld1 {v12.d}[1], [x2]
+
+ uzp1 v1.16b, v1.16b, v3.16b
+ uzp1 v2.16b, v2.16b, v3.16b
+
+ uaddw v1.8h, v0.8h, v1.8b
+ uaddw v2.8h, v0.8h, v2.8b
+
+ sqxtun v1.8b, v1.8h
+ sqxtun v2.8b, v2.8h
+
+ uxtl v1.8h, v1.8b
+ uxtl v2.8h, v2.8b
+
+ bit v11.16b, v1.16b, v31.16b
+ bit v12.16b, v2.16b, v31.16b
+
+ st1 {v11.d}[0], [x0], x4
+ st1 {v11.d}[1], [x0], x4
+ st1 {v12.d}[0], [x0], x4
+ st1 {v12.d}[1], [x0]
+ pop_v_regs
+ ret
+
+///*
+// *******************************************************************************
+// *
+// * //brief
+// * This function performs inverse quant and Inverse transform type Ci4 for 8*8 block
+// * [Only for Dc coeff]
+// * //par Description:
+// * Performs inverse transform Ci8 and adds the residue to get the
+// * reconstructed block
+// *
+// * //param[in] pi2_src
+// * Input 4x4 coefficients
+// *
+// * //param[in] pu1_pred
+// * Prediction 4x4 block
+// *
+// * //param[out] pu1_out
+// * Output 4x4 block
+// *
+// * //param[in] u4_qp_div_6
+// * QP
+// *
+// * //param[in] pu2_weigh_mat
+// * Pointer to weight matrix
+// *
+// * //param[in] pred_strd,
+// * Prediction stride
+// *
+// * //param[in] out_strd
+// * Output Stride
+// *
+// *//param[in] pi2_tmp
+// * temporary buffer of size 1*64
+// *
+// * //param[in] pu2_iscal_mat
+// * Pointer to the inverse quantization matrix
+// *
+// * //returns Void
+// *
+// * //remarks
+// * None
+// *
+// *******************************************************************************
+// */
+//void ih264_iquant_itrans_recon_dc_8x8(WORD16 *pi2_src,
+// UWORD8 *pu1_pred,
+// UWORD8 *pu1_out,
+// WORD32 pred_strd,
+// WORD32 out_strd,
+// const UWORD16 *pu2_iscal_mat,
+// const UWORD16 *pu2_weigh_mat,
+// UWORD32 u4_qp_div_6,
+// WORD32 *pi4_tmp,
+// WORD32 iq_start_idx
+// WORD16 *pi2_dc_ld_addr)
+//**************Variables Vs Registers*****************************************
+//x0 => *pi2_src
+//x1 => *pu1_pred
+//x2 => *pu1_out
+//x3 => pred_strd
+//x4 => out_strd
+//x5 => *pu2_iscal_mat
+//x6 => *pu2_weigh_mat
+//x7 => u4_qp_div_6
+//NOT USED => pi4_tmp
+//NOT USED => iq_start_idx
+//NOT USED => pi2_dc_ld_addr
+
+ .global ih264_iquant_itrans_recon_8x8_dc_av8
+ih264_iquant_itrans_recon_8x8_dc_av8:
+
+ push_v_regs
+
+ ld1 {v1.h}[0], [x5]
+ ld1 {v2.h}[0], [x6]
+ ld1 {v0.h}[0], [x0]
+ dup v3.4s, w7
+
+
+ mul v1.8h, v1.8h, v2.8h
+ smull v0.4s, v0.4h, v1.4h
+ sshl v0.4s, v0.4s, v3.4s
+
+ sqrshrn v0.4h, v0.4s, #6
+ srshr v0.8h, v0.8h, #6
+ dup v0.8h, v0.h[0]
+
+ ld1 {v22.8b}, [x1], x3
+ ld1 {v23.8b}, [x1], x3
+ ld1 {v24.8b}, [x1], x3
+ ld1 {v25.8b}, [x1], x3
+ ld1 {v26.8b}, [x1], x3
+ ld1 {v27.8b}, [x1], x3
+ ld1 {v28.8b}, [x1], x3
+ ld1 {v29.8b}, [x1]
+
+ uaddw v1.8h, v0.8h, v22.8b
+ uaddw v2.8h, v0.8h, v23.8b
+ uaddw v3.8h, v0.8h, v24.8b
+ uaddw v8.8h, v0.8h, v25.8b
+ uaddw v9.8h, v0.8h, v26.8b
+ uaddw v10.8h, v0.8h, v27.8b
+ uaddw v11.8h, v0.8h, v28.8b
+ uaddw v12.8h, v0.8h, v29.8b
+
+ sqxtun v1.8b, v1.8h
+ sqxtun v2.8b, v2.8h
+ sqxtun v3.8b, v3.8h
+ sqxtun v8.8b, v8.8h
+ sqxtun v9.8b, v9.8h
+ sqxtun v10.8b, v10.8h
+ sqxtun v11.8b, v11.8h
+ sqxtun v12.8b, v12.8h
+
+ st1 {v1.8b}, [x2], x4
+ st1 {v2.8b}, [x2], x4
+ st1 {v3.8b}, [x2], x4
+ st1 {v8.8b}, [x2], x4
+ st1 {v9.8b}, [x2], x4
+ st1 {v10.8b}, [x2], x4
+ st1 {v11.8b}, [x2], x4
+ st1 {v12.8b}, [x2]
+
+ pop_v_regs
+ ret
+
+
diff --git a/common/armv8/ih264_mem_fns_neon_av8.s b/common/armv8/ih264_mem_fns_neon_av8.s
new file mode 100755
index 0000000..f5c2e29
--- /dev/null
+++ b/common/armv8/ih264_mem_fns_neon_av8.s
@@ -0,0 +1,274 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+// *******************************************************************************
+// * @file
+// * ih264_mem_fns_neon.s
+// *
+// * @brief
+// * Contains function definitions for memory manipulation
+// *
+// * @author
+// * Naveen SR
+// *
+// * @par List of Functions:
+// * - ih264_memcpy_av8()
+// * - ih264_memcpy_mul_8_av8()
+// * - ih264_memset_mul_8_av8()
+// * - ih264_memset_16bit_mul_8_av8()
+// * - ih264_memset_16bit_av8()
+// *
+// * @remarks
+// * None
+// *
+// *******************************************************************************
+//*/
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* memcpy of a 1d array
+//*
+//* @par Description:
+//* Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes
+//*
+//* @param[in] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[in] num_bytes
+//* number of bytes to copy
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+//void ih264_memcpy_mul_8(UWORD8 *pu1_dst,
+// UWORD8 *pu1_src,
+// UWORD8 num_bytes)
+//**************Variables Vs Registers*************************
+// x0 => *pu1_dst
+// x1 => *pu1_src
+// x2 => num_bytes
+
+
+
+
+
+ .global ih264_memcpy_mul_8_av8
+
+ih264_memcpy_mul_8_av8:
+
+loop_neon_memcpy_mul_8:
+ // Memcpy 8 bytes
+ ld1 {v0.8b}, [x1], #8
+ st1 {v0.8b}, [x0], #8
+
+ subs x2, x2, #8
+ bne loop_neon_memcpy_mul_8
+ ret
+
+
+
+//*******************************************************************************
+//*/
+//void ih264_memcpy(UWORD8 *pu1_dst,
+// UWORD8 *pu1_src,
+// UWORD8 num_bytes)
+//**************Variables Vs Registers*************************
+// x0 => *pu1_dst
+// x1 => *pu1_src
+// x2 => num_bytes
+
+
+
+ .global ih264_memcpy_av8
+
+ih264_memcpy_av8:
+ subs x2, x2, #8
+ blt arm_memcpy
+loop_neon_memcpy:
+ // Memcpy 8 bytes
+ ld1 {v0.8b}, [x1], #8
+ st1 {v0.8b}, [x0], #8
+
+ subs x2, x2, #8
+ bge loop_neon_memcpy
+ cmp x2, #-8
+ beq end_func1
+
+arm_memcpy:
+ add x2, x2, #8
+
+loop_arm_memcpy:
+ ldrb w3, [x1], #1
+ sxtw x3, w3
+ strb w3, [x0], #1
+ sxtw x3, w3
+ subs x2, x2, #1
+ bne loop_arm_memcpy
+ ret
+end_func1:
+ ret
+
+
+//void ih264_memset_mul_8(UWORD8 *pu1_dst,
+// UWORD8 value,
+// UWORD8 num_bytes)
+//**************Variables Vs Registers*************************
+// x0 => *pu1_dst
+// x1 => value
+// x2 => num_bytes
+
+
+ .global ih264_memset_mul_8_av8
+
+ih264_memset_mul_8_av8:
+
+// Assumptions: numbytes is either 8, 16 or 32
+ dup v0.8b, w1
+loop_memset_mul_8:
+ // Memset 8 bytes
+ st1 {v0.8b}, [x0], #8
+
+ subs x2, x2, #8
+ bne loop_memset_mul_8
+
+ ret
+
+
+//void ih264_memset(UWORD8 *pu1_dst,
+// UWORD8 value,
+// UWORD8 num_bytes)
+//**************Variables Vs Registers*************************
+// x0 => *pu1_dst
+// x1 => value
+// x2 => num_bytes
+
+
+
+ .global ih264_memset_av8
+
+ih264_memset_av8:
+ subs x2, x2, #8
+ blt arm_memset
+ dup v0.8b, w1
+loop_neon_memset:
+ // Memcpy 8 bytes
+ st1 {v0.8b}, [x0], #8
+
+ subs x2, x2, #8
+ bge loop_neon_memset
+ cmp x2, #-8
+ beq end_func2
+
+arm_memset:
+ add x2, x2, #8
+
+loop_arm_memset:
+ strb w1, [x0], #1
+ sxtw x1, w1
+ subs x2, x2, #1
+ bne loop_arm_memset
+ ret
+end_func2:
+ ret
+
+
+
+
+
+//void ih264_memset_16bit_mul_8(UWORD16 *pu2_dst,
+// UWORD16 value,
+// UWORD8 num_words)
+//**************Variables Vs Registers*************************
+// x0 => *pu2_dst
+// x1 => value
+// x2 => num_words
+
+
+ .global ih264_memset_16bit_mul_8_av8
+
+ih264_memset_16bit_mul_8_av8:
+
+// Assumptions: num_words is either 8, 16 or 32
+
+ // Memset 8 words
+ dup v0.4h, w1
+loop_memset_16bit_mul_8:
+ st1 {v0.4h}, [x0], #8
+ st1 {v0.4h}, [x0], #8
+
+ subs x2, x2, #8
+ bne loop_memset_16bit_mul_8
+
+ ret
+
+
+
+//void ih264_memset_16bit(UWORD16 *pu2_dst,
+// UWORD16 value,
+// UWORD8 num_words)
+//**************Variables Vs Registers*************************
+// x0 => *pu2_dst
+// x1 => value
+// x2 => num_words
+
+
+
+ .global ih264_memset_16bit_av8
+
+ih264_memset_16bit_av8:
+ subs x2, x2, #8
+ blt arm_memset_16bit
+ dup v0.4h, w1
+loop_neon_memset_16bit:
+ // Memset 8 words
+ st1 {v0.4h}, [x0], #8
+ st1 {v0.4h}, [x0], #8
+
+ subs x2, x2, #8
+ bge loop_neon_memset_16bit
+ cmp x2, #-8
+ beq end_func3
+
+arm_memset_16bit:
+ add x2, x2, #8
+
+loop_arm_memset_16bit:
+ strh w1, [x0], #2
+ sxtw x1, w1
+ subs x2, x2, #1
+ bne loop_arm_memset_16bit
+ ret
+
+end_func3:
+ ret
+
+
+
diff --git a/common/armv8/ih264_neon_macros.s b/common/armv8/ih264_neon_macros.s
new file mode 100755
index 0000000..6ff5b91
--- /dev/null
+++ b/common/armv8/ih264_neon_macros.s
@@ -0,0 +1,41 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+//*******************************************************************************
+
+
+.macro push_v_regs
+ stp d8, d9, [sp, #-16]!
+ stp d10, d11, [sp, #-16]!
+ stp d12, d13, [sp, #-16]!
+ stp d14, d15, [sp, #-16]!
+.endm
+.macro pop_v_regs
+ ldp d14, d15, [sp], #16
+ ldp d12, d13, [sp], #16
+ ldp d10, d11, [sp], #16
+ ldp d8, d9, [sp], #16
+.endm
+
+.macro swp reg1, reg2
+ eor \reg1, \reg1, \reg2
+ eor \reg2, \reg1, \reg2
+ eor \reg1, \reg1, \reg2
+.endm
+
diff --git a/common/armv8/ih264_padding_neon_av8.s b/common/armv8/ih264_padding_neon_av8.s
new file mode 100755
index 0000000..35d9c8a
--- /dev/null
+++ b/common/armv8/ih264_padding_neon_av8.s
@@ -0,0 +1,784 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+// *******************************************************************************
+// * @file
+// * ih264_padding_neon.s
+// *
+// * @brief
+// * Contains function definitions padding
+// *
+// * @author
+// * Ittiam
+// *
+// * @par List of Functions:
+// * - ih264_pad_top_av8()
+// * - ih264_pad_left_luma_av8()
+// * - ih264_pad_left_chroma_av8()
+// * - ih264_pad_right_luma_av8()
+// * - ih264_pad_right_chroma_av8()
+// *
+// * @remarks
+// * None
+// *
+// *******************************************************************************
+//*/
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+///**
+//*******************************************************************************
+//*
+//* @brief pad at the top of a 2d array
+//*
+//* @par Description:
+//* The top row of a 2d array is replicated for pad_size times at the top
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+//* @param[in] pad_size
+//* integer -padding size of the array
+//*
+//* @returns none
+//*
+//* @remarks none
+//*
+//*******************************************************************************
+//*/
+//void ih264_pad_top(UWORD8 *pu1_src,
+// WORD32 src_strd,
+// WORD32 wd,
+// WORD32 pad_size)
+//**************Variables Vs Registers*************************
+// x0 => *pu1_src
+// x1 => src_strd
+// x2 => wd
+// x3 => pad_size
+
+ .global ih264_pad_top_av8
+
+ih264_pad_top_av8:
+
+ // STMFD sp!, {x4-x11,x14} //stack stores the values of the arguments
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ sub x5, x0, x1
+ sub x20, x1, #0
+ neg x6, x20
+
+loop_neon_memcpy_mul_16:
+ // Load 16 bytes
+ ld1 {v0.8b, v1.8b}, [x0], #16
+ mov x4, x5
+ mov x7, x3
+ add x5, x5, #16
+
+loop_neon_pad_top:
+ st1 {v0.8b, v1.8b}, [x4], x6
+ subs x7, x7, #1
+ bne loop_neon_pad_top
+
+ subs x2, x2, #16
+ bne loop_neon_memcpy_mul_16
+
+ // LDMFD sp!,{x4-x11,pc} //Reload the registers from SP
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* Padding (luma block) at the left of a 2d array
+//*
+//* @par Description:
+//* The left column of a 2d array is replicated for pad_size times at the left
+//*
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] ht
+//* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+//* @param[in] pad_size
+//* integer -padding size of the array
+//*
+//* @param[in] ht
+//* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+//#if PAD_LEFT_LUMA == C
+//void ih264_pad_left_luma(UWORD8 *pu1_src,
+// WORD32 src_strd,
+// WORD32 ht,
+// WORD32 pad_size)
+//**************Variables Vs Registers*************************
+// x0 => *pu1_src
+// x1 => src_strd
+// x2 => ht
+// x3 => pad_size
+
+
+
+ .global ih264_pad_left_luma_av8
+
+ih264_pad_left_luma_av8:
+
+ // STMFD sp!, {x4-x11,x14} //stack stores the values of the arguments
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+
+ sub x4, x0, x3
+ sub x6, x1, #16
+ subs x5, x3, #16
+ bne loop_32
+loop_16: // /*hard coded for width=16 ,height =8,16*/
+ ldrb w8, [x0]
+ add x0, x0, x1
+ sxtw x8, w8
+ ldrb w9, [x0]
+ add x0, x0, x1
+ sxtw x9, w9
+ dup v0.16b, w8
+ ldrb w10, [x0]
+ add x0, x0, x1
+ sxtw x10, w10
+ st1 {v0.16b}, [x4], x1 // 16 bytes store
+ dup v2.16b, w9
+ st1 {v2.16b}, [x4], x1 // 16 bytes store
+ ldrb w11, [x0]
+ add x0, x0, x1
+ sxtw x11, w11
+ dup v4.16b, w10
+ dup v6.16b, w11
+ st1 {v4.16b}, [x4], x1 // 16 bytes store
+ ldrb w8, [x0]
+ add x0, x0, x1
+ sxtw x8, w8
+ st1 {v6.16b}, [x4], x1 // 16 bytes store
+ ldrb w9, [x0]
+ add x0, x0, x1
+ sxtw x9, w9
+ dup v0.16b, w8
+ ldrb w10, [x0]
+ add x0, x0, x1
+ sxtw x10, w10
+ st1 {v0.16b}, [x4], x1 // 16 bytes store
+ dup v2.16b, w9
+ ldrb w11, [x0]
+ add x0, x0, x1
+ sxtw x11, w11
+ st1 {v2.16b}, [x4], x1 // 16 bytes store
+ dup v4.16b, w10
+ dup v6.16b, w11
+ subs x2, x2, #8
+ st1 {v4.16b}, [x4], x1 // 16 bytes store
+ st1 {v6.16b}, [x4], x1 // 16 bytes store
+ bne loop_16
+ b end_func
+
+loop_32: // /*hard coded for width=32 ,height =8,16*/
+ ldrb w8, [x0]
+ add x0, x0, x1
+ sxtw x8, w8
+ ldrb w9, [x0]
+ add x0, x0, x1
+ sxtw x9, w9
+ dup v0.16b, w8
+ ldrb w10, [x0]
+ add x0, x0, x1
+ sxtw x10, w10
+ st1 {v0.16b}, [x4], #16 // 16 bytes store
+ dup v2.16b, w9
+ st1 {v0.16b}, [x4], x6
+ st1 {v2.16b}, [x4], #16 // 16 bytes store
+ dup v4.16b, w10
+ st1 {v2.16b}, [x4], x6 // 16 bytes store
+ ldrb w11, [x0]
+ add x0, x0, x1
+ sxtw x11, w11
+ st1 {v4.16b}, [x4], #16 // 16 bytes store
+ dup v6.16b, w11
+ st1 {v4.16b}, [x4], x6 // 16 bytes store
+ ldrb w8, [x0]
+ add x0, x0, x1
+ sxtw x8, w8
+ st1 {v6.16b}, [x4], #16 // 16 bytes store
+ dup v0.16b, w8
+ ldrb w9, [x0]
+ add x0, x0, x1
+ sxtw x9, w9
+ st1 {v6.16b}, [x4], x6 // 16 bytes store
+ ldrb w10, [x0]
+ add x0, x0, x1
+ sxtw x10, w10
+ st1 {v0.16b}, [x4], #16 // 16 bytes store
+ dup v2.16b, w9
+ st1 {v0.16b}, [x4], x6 // 16 bytes store
+ ldrb w11, [x0]
+ add x0, x0, x1
+ sxtw x11, w11
+ st1 {v2.16b}, [x4], #16 // 16 bytes store
+ dup v4.16b, w10
+ st1 {v2.16b}, [x4], x6 // 16 bytes store
+ st1 {v4.16b}, [x4], #16 // 16 bytes store
+ dup v6.16b, w11
+ st1 {v4.16b}, [x4], x6 // 16 bytes store
+ subs x2, x2, #8
+ st1 {v6.16b}, [x4], #16 // 16 bytes store
+ st1 {v6.16b}, [x4], x6 // 16 bytes store
+ bne loop_32
+
+
+
+end_func:
+ // LDMFD sp!,{x4-x11,pc} //Reload the registers from SP
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* Padding (chroma block) at the left of a 2d array
+//*
+//* @par Description:
+//* The left column of a 2d array is replicated for pad_size times at the left
+//*
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] ht
+//* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array (each colour component)
+//*
+//* @param[in] pad_size
+//* integer -padding size of the array
+//*
+//* @param[in] ht
+//* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+//#if PAD_LEFT_CHROMA == C
+//void ih264_pad_left_chroma(UWORD8 *pu1_src,
+// WORD32 src_strd,
+// WORD32 ht,
+// WORD32 pad_size)
+//{
+// x0 => *pu1_src
+// x1 => src_strd
+// x2 => ht
+// x3 => pad_size
+
+
+
+ .global ih264_pad_left_chroma_av8
+
+ih264_pad_left_chroma_av8:
+
+ // STMFD sp!, {x4-x11, x14} //stack stores the values of the arguments
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ sub x4, x0, x3
+ sub x6, x1, #16
+
+
+loop_32_l_c: // /*hard coded for width=32 ,height =4,8,12*/
+ ldrh w8, [x0]
+ add x0, x0, x1
+ sxtw x8, w8
+ ldrh w9, [x0]
+ add x0, x0, x1
+ sxtw x9, w9
+ dup v0.8h, w8
+ ldrh w10, [x0]
+ add x0, x0, x1
+ sxtw x10, w10
+ st1 {v0.16b}, [x4], #16 // 16 bytes store
+ dup v2.8h, w9
+ st1 {v0.16b}, [x4], x6 // 16 bytes store
+ ldrh w11, [x0]
+ add x0, x0, x1
+ sxtw x11, w11
+ st1 {v2.16b}, [x4], #16 // 16 bytes store
+ dup v4.8h, w10
+ st1 {v2.16b}, [x4], x6 // 16 bytes store
+ dup v6.8h, w11
+ st1 {v4.16b}, [x4], #16 // 16 bytes store
+ st1 {v4.16b}, [x4], x6 // 16 bytes store
+ subs x2, x2, #4
+ st1 {v6.16b}, [x4], #16 // 16 bytes store
+ st1 {v6.16b}, [x4], x6 // 16 bytes store
+
+
+ beq end_func_l_c ///* Branching when ht=4*/
+
+ ldrh w8, [x0]
+ add x0, x0, x1
+ sxtw x8, w8
+ ldrh w9, [x0]
+ add x0, x0, x1
+ sxtw x9, w9
+ dup v0.8h, w8
+ ldrh w10, [x0]
+ add x0, x0, x1
+ sxtw x10, w10
+ st1 {v0.16b}, [x4], #16 // 16 bytes store
+ dup v2.8h, w9
+ st1 {v0.16b}, [x4], x6
+ ldrh w11, [x0]
+ add x0, x0, x1
+ sxtw x11, w11
+ st1 {v2.16b}, [x4], #16 // 16 bytes store
+ dup v4.8h, w10
+ st1 {v2.16b}, [x4], x6 // 16 bytes store
+ dup v6.8h, w11
+ st1 {v4.16b}, [x4], #16 // 16 bytes store
+ st1 {v4.16b}, [x4], x6 // 16 bytes store
+ subs x2, x2, #4
+ st1 {v6.16b}, [x4], #16 // 16 bytes store
+ st1 {v6.16b}, [x4], x6 // 16 bytes store
+
+ beq end_func_l_c ///* Branching when ht=8*/
+ bne loop_32_l_c
+
+ ldrh w8, [x0]
+ add x0, x0, x1
+ sxtw x8, w8
+ ldrh w9, [x0]
+ add x0, x0, x1
+ sxtw x9, w9
+ dup v0.8h, w8
+ ldrh w10, [x0]
+ add x0, x0, x1
+ sxtw x10, w10
+ st1 {v0.16b}, [x4], #16 // 16 bytes store
+ dup v2.8h, w9
+ st1 {v0.16b}, [x4], x6
+ ldrh w11, [x0]
+ add x0, x0, x1
+ sxtw x11, w11
+ st1 {v2.16b}, [x4], #16 // 16 bytes store
+ dup v4.8h, w10
+ st1 {v2.16b}, [x4], x6 // 16 bytes store
+ dup v6.8h, w11
+ st1 {v4.16b}, [x4], #16 // 16 bytes store
+ st1 {v4.16b}, [x4], x6 // 16 bytes store
+ st1 {v6.16b}, [x4], #16 // 16 bytes store
+ st1 {v6.16b}, [x4], x6 // 16 bytes store
+
+end_func_l_c:
+ // LDMFD sp!,{x4-x11,pc} //Reload the registers from SP
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* Padding (luma block) at the right of a 2d array
+//*
+//* @par Description:
+//* The right column of a 2d array is replicated for pad_size times at the right
+//*
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] ht
+//* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+//* @param[in] pad_size
+//* integer -padding size of the array
+//*
+//* @param[in] ht
+//* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+//#if PAD_RIGHT_LUMA == C
+//void ih264_pad_right_luma(UWORD8 *pu1_src,
+// WORD32 src_strd,
+// WORD32 ht,
+// WORD32 pad_size)
+//{
+// WORD32 row;
+//
+// for(row = 0; row < ht; row++)
+// {
+// memset(pu1_src, *(pu1_src -1), pad_size);
+//
+// pu1_src += src_strd;
+// }
+//}
+//
+// x0 => *pu1_src
+// x1 => src_strd
+// x2 => ht
+// x3 => pad_size
+
+
+
+ .global ih264_pad_right_luma_av8
+
+ih264_pad_right_luma_av8:
+
+ // STMFD sp!, {x4-x11, x14} //stack stores the values of the arguments
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ mov x4, x0
+ sub x6, x1, #16
+ sub x0, x0, #1
+ subs x5, x3, #16
+ bne loop_32
+loop_16_r: // /*hard coded for width=16 ,height =8,16*/
+ ldrb w8, [x0]
+ add x0, x0, x1
+ sxtw x8, w8
+ ldrb w9, [x0]
+ add x0, x0, x1
+ sxtw x9, w9
+ dup v0.16b, w8
+ ldrb w10, [x0]
+ add x0, x0, x1
+ sxtw x10, w10
+ st1 {v0.16b}, [x4], x1 // 16 bytes store
+ dup v2.16b, w9
+ st1 {v2.16b}, [x4], x1 // 16 bytes store
+ ldrb w11, [x0]
+ add x0, x0, x1
+ sxtw x11, w11
+ dup v4.16b, w10
+ dup v6.16b, w11
+ st1 {v4.16b}, [x4], x1 // 16 bytes store
+ ldrb w8, [x0]
+ add x0, x0, x1
+ sxtw x8, w8
+ st1 {v6.16b}, [x4], x1 // 16 bytes store
+ ldrb w9, [x0]
+ add x0, x0, x1
+ sxtw x9, w9
+ dup v0.16b, w8
+ ldrb w10, [x0]
+ add x0, x0, x1
+ sxtw x10, w10
+ st1 {v0.16b}, [x4], x1 // 16 bytes store
+ dup v2.16b, w9
+ ldrb w11, [x0]
+ add x0, x0, x1
+ sxtw x11, w11
+ st1 {v2.16b}, [x4], x1 // 16 bytes store
+ dup v4.16b, w10
+ dup v6.16b, w11
+ subs x2, x2, #8
+ st1 {v4.16b}, [x4], x1 // 16 bytes store
+ st1 {v6.16b}, [x4], x1 // 16 bytes store
+ bne loop_16_r
+ b end_func_r
+
+loop_32_r: // /*hard coded for width=32 ,height =8,16*/
+ ldrb w8, [x0]
+ add x0, x0, x1
+ sxtw x8, w8
+ ldrb w9, [x0]
+ add x0, x0, x1
+ sxtw x9, w9
+ dup v0.16b, w8
+ ldrb w10, [x0]
+ add x0, x0, x1
+ sxtw x10, w10
+ st1 {v0.16b}, [x4], #16 // 16 bytes store
+ dup v2.16b, w9
+ st1 {v0.16b}, [x4], x6
+ st1 {v2.16b}, [x4], #16 // 16 bytes store
+ dup v4.16b, w10
+ st1 {v2.16b}, [x4], x6 // 16 bytes store
+ ldrb w11, [x0]
+ add x0, x0, x1
+ sxtw x11, w11
+ st1 {v4.16b}, [x4], #16 // 16 bytes store
+ dup v6.16b, w11
+ st1 {v4.16b}, [x4], x6 // 16 bytes store
+ ldrb w8, [x0]
+ add x0, x0, x1
+ sxtw x8, w8
+ st1 {v6.16b}, [x4], #16 // 16 bytes store
+ ldrb w9, [x0]
+ add x0, x0, x1
+ sxtw x9, w9
+ dup v0.16b, w8
+ st1 {v6.16b}, [x4], x6 // 16 bytes store
+ ldrb w10, [x0]
+ add x0, x0, x1
+ sxtw x10, w10
+ st1 {v0.16b}, [x4], #16 // 16 bytes store
+ dup v2.16b, w9
+ st1 {v0.16b}, [x4], x6 // 16 bytes store
+ ldrb w11, [x0]
+ add x0, x0, x1
+ sxtw x11, w11
+ st1 {v2.16b}, [x4], #16 // 16 bytes store
+ dup v4.16b, w10
+ st1 {v2.16b}, [x4], x6 // 16 bytes store
+ st1 {v4.16b}, [x4], #16 // 16 bytes store
+ dup v6.16b, w11
+ st1 {v4.16b}, [x4], x6 // 16 bytes store
+ subs x2, x2, #8
+ st1 {v6.16b}, [x4], #16 // 16 bytes store
+ st1 {v6.16b}, [x4], x6 // 16 bytes store
+ bne loop_32_r
+
+
+
+end_func_r:
+ // LDMFD sp!,{x4-x11,pc} //Reload the registers from SP
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//* @brief
+//;* Padding (chroma block) at the right of a 2d array
+//*
+//* @par Description:
+//* The right column of a 2d array is replicated for pad_size times at the right
+//*
+//*
+//* @param[in] pu1_src
+//;* UWORD8 pointer to the source
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] ht
+//;* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array (each colour component)
+//*
+//* @param[in] pad_size
+//* integer -padding size of the array
+//*
+//* @param[in] ht
+//;* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+//#if PAD_RIGHT_CHROMA == C
+//void ih264_pad_right_chroma(UWORD8 *pu1_src,
+// WORD32 src_strd,
+// WORD32 ht,
+// WORD32 pad_size)
+// x0 => *pu1_src
+// x1 => src_strd
+// x2 => ht
+// x3 => pad_size
+
+
+
+ .global ih264_pad_right_chroma_av8
+
+ih264_pad_right_chroma_av8:
+
+ // STMFD sp!, {x4-x11, x14} //stack stores the values of the arguments
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ mov x4, x0
+ sub x6, x1, #16
+ sub x0, x0, #2
+loop_32_r_c: // /*hard coded for width=32 ,height =8,4*/
+ ldrh w8, [x0]
+ add x0, x0, x1
+ sxtw x8, w8
+ ldrh w9, [x0]
+ add x0, x0, x1
+ sxtw x9, w9
+ dup v0.8h, w8
+ ldrh w10, [x0]
+ add x0, x0, x1
+ sxtw x10, w10
+ st1 {v0.16b}, [x4], #16 // 16 bytes store
+ dup v2.8h, w9
+ st1 {v0.16b}, [x4], x6
+ st1 {v2.16b}, [x4], #16 // 16 bytes store
+ dup v4.8h, w10
+ st1 {v2.16b}, [x4], x6 // 16 bytes store
+ subs x2, x2, #4
+ ldrh w11, [x0]
+ add x0, x0, x1
+ sxtw x11, w11
+ st1 {v4.16b}, [x4], #16 // 16 bytes store
+ dup v6.8h, w11
+ st1 {v4.16b}, [x4], x6 // 16 bytes store
+ st1 {v6.16b}, [x4], #16 // 16 bytes store
+ st1 {v6.16b}, [x4], x6 // 16 bytes store
+
+ beq end_func_r_c ///* Branching when ht=4*/
+
+ ldrh w8, [x0]
+ add x0, x0, x1
+ sxtw x8, w8
+ dup v0.8h, w8
+ ldrh w9, [x0]
+ add x0, x0, x1
+ sxtw x9, w9
+ ldrh w10, [x0]
+ add x0, x0, x1
+ sxtw x10, w10
+ st1 {v0.16b}, [x4], #16 // 16 bytes store
+ dup v2.8h, w9
+ st1 {v0.16b}, [x4], x6 // 16 bytes store
+ ldrh w11, [x0]
+ add x0, x0, x1
+ sxtw x11, w11
+ st1 {v2.16b}, [x4], #16 // 16 bytes store
+ dup v4.8h, w10
+ st1 {v2.16b}, [x4], x6 // 16 bytes store
+ st1 {v4.16b}, [x4], #16 // 16 bytes store
+ dup v6.8h, w11
+ st1 {v4.16b}, [x4], x6 // 16 bytes store
+ subs x2, x2, #4
+ st1 {v6.16b}, [x4], #16 // 16 bytes store
+ st1 {v6.16b}, [x4], x6 // 16 bytes store
+
+ beq end_func_r_c ///* Branching when ht=8*/
+ bne loop_32_r_c
+ ldrh w8, [x0]
+ add x0, x0, x1
+ sxtw x8, w8
+ dup v0.8h, w8
+ ldrh w9, [x0]
+ add x0, x0, x1
+ sxtw x9, w9
+ ldrh w10, [x0]
+ add x0, x0, x1
+ sxtw x10, w10
+ st1 {v0.16b}, [x4], #16 // 16 bytes store
+ dup v2.8h, w9
+ st1 {v0.16b}, [x4], x6 // 16 bytes store
+ ldrh w11, [x0]
+ add x0, x0, x1
+ sxtw x11, w11
+ st1 {v2.16b}, [x4], #16 // 16 bytes store
+ dup v4.8h, w10
+ st1 {v2.16b}, [x4], x6 // 16 bytes store
+ st1 {v4.16b}, [x4], #16 // 16 bytes store
+ dup v6.8h, w11
+ st1 {v4.16b}, [x4], x6 // 16 bytes store
+ st1 {v6.16b}, [x4], #16 // 16 bytes store
+ st1 {v6.16b}, [x4], x6 // 16 bytes store
+
+end_func_r_c:
+ // LDMFD sp!,{x4-x11,pc} //Reload the registers from SP
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+
+
diff --git a/common/armv8/ih264_platform_macros.h b/common/armv8/ih264_platform_macros.h
new file mode 100755
index 0000000..1f67403
--- /dev/null
+++ b/common/armv8/ih264_platform_macros.h
@@ -0,0 +1,152 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_platform_macros.h
+*
+* @brief
+* Platform specific Macro definitions used in the codec
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#ifndef _IHEVC_PLATFORM_MACROS_H_
+#define _IHEVC_PLATFORM_MACROS_H_
+
+#ifndef ARMV8
+void ih264_arm_dsb(void);
+
+#define DATA_SYNC() ih264_arm_dsb()
+static __inline WORD32 CLIP_U8(WORD32 x)
+{
+ asm("usat %0, #8, %1" : "=r"(x) : "r"(x));
+ return x;
+}
+
+static __inline WORD32 CLIP_S8(WORD32 x)
+{
+ asm("ssat %0, #8, %1" : "=r"(x) : "r"(x));
+ return x;
+}
+
+static __inline WORD32 CLIP_U10(WORD32 x)
+{
+ asm("usat %0, #10, %1" : "=r"(x) : "r"(x));
+ return x;
+}
+
+static __inline WORD32 CLIP_S10(WORD32 x)
+{
+ asm("ssat %0, #10, %1" : "=r"(x) : "r"(x));
+ return x;
+}
+
+static __inline WORD32 CLIP_U12(WORD32 x)
+{
+ asm("usat %0, #12, %1" : "=r"(x) : "r"(x));
+ return x;
+}
+
+static __inline WORD32 CLIP_S12(WORD32 x)
+{
+ asm("ssat %0, #12, %1" : "=r"(x) : "r"(x));
+ return x;
+}
+
+static __inline WORD32 CLIP_U16(WORD32 x)
+{
+ asm("usat %0, #16, %1" : "=r"(x) : "r"(x));
+ return x;
+}
+static __inline WORD32 CLIP_S16(WORD32 x)
+{
+ asm("ssat %0, #16, %1" : "=r"(x) : "r"(x));
+ return x;
+}
+
+
+static __inline UWORD32 ITT_BIG_ENDIAN(UWORD32 x)
+{
+ asm("rev %0, %1" : "=r"(x) : "r"(x));
+ return x;
+}
+#else
+#define DATA_SYNC() ;
+
+#define CLIP_U8(x) CLIP3(0, 255, (x))
+#define CLIP_S8(x) CLIP3(-128, 127, (x))
+
+#define CLIP_U10(x) CLIP3(0, 1023, (x))
+#define CLIP_S10(x) CLIP3(-512, 511, (x))
+
+#define CLIP_U12(x) CLIP3(0, 4095, (x))
+#define CLIP_S12(x) CLIP3(-2048, 2047, (x))
+
+#define CLIP_U16(x) CLIP3(0, 65535, (x))
+#define CLIP_S16(x) CLIP3(-32768, 32767, (x))
+
+#define ITT_BIG_ENDIAN(x) ((x & 0x000000ff) << 24) | \
+ ((x & 0x0000ff00) << 8) | \
+ ((x & 0x00ff0000) >> 8) | \
+ ((UWORD32)x >> 24);
+#endif
+
+#define SHL(x,y) (((y) < 32) ? ((x) << (y)) : 0)
+#define SHR(x,y) (((y) < 32) ? ((x) >> (y)) : 0)
+
+#define SHR_NEG(val,shift) ((shift>0)?(val>>shift):(val<<(-shift)))
+#define SHL_NEG(val,shift) ((shift<0)?(val>>(-shift)):(val<<shift))
+
+#define INLINE inline
+
+static INLINE UWORD32 CLZ(UWORD32 u4_word)
+{
+ if(u4_word)
+ return (__builtin_clz(u4_word));
+ else
+ return 32;
+}
+static INLINE UWORD32 CTZ(UWORD32 u4_word)
+{
+ if(0 == u4_word)
+ return 31;
+ else
+ {
+ unsigned int index;
+ index = __builtin_ctz(u4_word);
+ return (UWORD32)index;
+ }
+}
+
+
+#define NOP(nop_cnt) {UWORD32 nop_i; for (nop_i = 0; nop_i < nop_cnt; nop_i++);}
+
+
+#define MEM_ALIGN8 __attribute__ ((aligned (8)))
+#define MEM_ALIGN16 __attribute__ ((aligned (16)))
+#define MEM_ALIGN32 __attribute__ ((aligned (32)))
+
+#endif /* _IHEVC_PLATFORM_MACROS_H_ */
diff --git a/common/armv8/ih264_resi_trans_quant_av8.s b/common/armv8/ih264_resi_trans_quant_av8.s
new file mode 100755
index 0000000..dc1c680
--- /dev/null
+++ b/common/armv8/ih264_resi_trans_quant_av8.s
@@ -0,0 +1,731 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///*****************************************************************************/
+///**
+//*******************************************************************************
+//* @file
+//* ih264_resi_trans_quant_av8.c
+//*
+//* @brief
+//* contains function definitions for residual and forward trans
+//*
+//* @author
+//* ittiam
+//*
+//* @par list of functions:
+//* ih264_resi_trans_quant_4x4_av8
+//* ih264_resi_trans_quant_8x8_av8
+//* ih264_resi_trans_quant_chroma_4x4_av8
+//* @remarks
+//* none
+//*
+//*******************************************************************************
+.include "ih264_neon_macros.s"
+.text
+.p2align 2
+//*****************************************************************************
+//*
+//* function name : ih264_resi_trans_quant_4x4
+//* description : this function does cf4 of h264
+//*
+//* arguments : x0 :pointer to src buffer
+// x1 :pointer to pred buffer
+// x2 :pointer to dst buffer
+// x3 :source stride
+// x4 :pred stride,
+// x5 :dst stride,
+// x6 :pointer to scaling matrix,
+// x7 :pointer to threshold matrix,
+// stack qbits,
+// rounding factor,
+// pointer to store nnz
+// pointer to store non quantized dc value
+// values returned : none
+//
+// register usage :
+// stack usage : 64 bytes
+// cycles :
+// interruptiaility : interruptable
+//
+// known limitations
+// \assumptions :
+//
+// revision history :
+// dd mm yyyy author(s) changes
+// 1 12 2013 100633 first version
+// 20 1 2014 100633 changes the api, optimization
+//
+//*****************************************************************************
+
+ .global ih264_resi_trans_quant_4x4_av8
+ih264_resi_trans_quant_4x4_av8:
+
+ //x0 :pointer to src buffer
+ //x1 :pointer to pred buffer
+ //x2 :pointer to dst buffer
+ //x3 :source stride
+ //x4 :pred stride
+ //x5 :dst stride,
+ //x6 :scale matirx,
+ //x7 :threshold matrix
+ // :qbits
+ // :round factor
+ // :nnz
+ // :pointer to store non quantized dc value
+ push_v_regs
+ //x0 :pointer to src buffer
+ //x1 :pointer to pred buffer
+ //x2 :pointer to dst buffer
+ //x3 :source stride
+ //x4 :pred stride
+ //x5 :scale matirx,
+ //x6 :threshold matrix
+ //x7 :qbits
+ //x8 :round factor
+ //x9 :nnz
+ //x10 :pointer to store non quantized dc value
+
+ ldr w8, [sp, #64] //load round factor
+ ldr x10, [sp, #80] //load addres for non quant val
+ neg x7, x7 //negate the qbit value for usiing lsl
+ ldr x9, [sp, #72]
+
+ //------------fucntion loading done----------------;
+
+ ld1 {v30.8b}, [x0], x3 //load first 8 pix src row 1
+ ld1 {v31.8b}, [x1], x4 //load first 8 pix pred row 1
+ ld1 {v28.8b}, [x0], x3 //load first 8 pix src row 2
+ ld1 {v29.8b}, [x1], x4 //load first 8 pix pred row 2
+ ld1 {v26.8b}, [x0], x3 //load first 8 pix src row 3
+ ld1 {v27.8b}, [x1], x4 //load first 8 pix pred row 3
+ ld1 {v24.8b}, [x0] //load first 8 pix src row 4
+ ld1 {v25.8b}, [x1] //load first 8 pix pred row 4
+
+ usubl v0.8h, v30.8b, v31.8b //find residue row 1
+ usubl v2.8h, v28.8b, v29.8b //find residue row 2
+ usubl v4.8h, v26.8b, v27.8b //find residue row 3
+ usubl v6.8h, v24.8b, v25.8b //find residue row 4
+
+ trn1 v1.4h, v0.4h, v2.4h
+ trn2 v3.4h, v0.4h, v2.4h //t12
+ trn1 v5.4h, v4.4h, v6.4h
+ trn2 v7.4h, v4.4h, v6.4h //t23
+
+ trn1 v0.2s, v1.2s, v5.2s
+ trn2 v4.2s, v1.2s, v5.2s //t13
+ trn1 v2.2s, v3.2s, v7.2s
+ trn2 v6.2s, v3.2s, v7.2s //t14
+
+ add v8.4h, v0.4h, v6.4h //x0 = x4+x7
+ add v9.4h, v2.4h, v4.4h //x1 = x5+x6
+ sub v10.4h, v2.4h, v4.4h //x2 = x5-x6
+ sub v11.4h, v0.4h, v6.4h //x3 = x4-x7
+
+ shl v12.4h, v10.4h, #1 //u_shift(x2,1,shft)
+ shl v13.4h, v11.4h, #1 //u_shift(x3,1,shft)
+
+ add v14.4h, v8.4h, v9.4h //x4 = x0 + x1;
+ sub v16.4h, v8.4h, v9.4h //x6 = x0 - x1;
+ add v15.4h, v13.4h, v10.4h //x5 = u_shift(x3,1,shft) + x2;
+ sub v17.4h, v11.4h, v12.4h //x7 = x3 - u_shift(x2,1,shft);
+
+ //taking transpose again so as to make do vert transform
+ trn1 v0.4h, v14.4h, v15.4h
+ trn2 v1.4h, v14.4h, v15.4h //t12
+ trn1 v2.4h, v16.4h, v17.4h
+ trn2 v3.4h, v16.4h, v17.4h //t23
+
+ trn1 v14.2s, v0.2s, v2.2s
+ trn2 v16.2s, v0.2s, v2.2s //t13
+ trn1 v15.2s, v1.2s, v3.2s
+ trn2 v17.2s, v1.2s, v3.2s //t24
+
+ //let us do vertical transform
+ //same code as horiz
+ add v18.4h, v14.4h , v17.4h //x0 = x4+x7
+ add v19.4h, v15.4h , v16.4h //x1 = x5+x6
+ sub v20.4h, v15.4h , v16.4h //x2 = x5-x6
+ sub v21.4h, v14.4h , v17.4h //x3 = x4-x7
+
+ shl v22.4h, v20.4h, #1 //u_shift(x2,1,shft)
+ shl v23.4h, v21.4h, #1 //u_shift(x3,1,shft)
+
+ dup v8.4s, w8 //load rounding value row 1
+
+ add v24.4h, v18.4h , v19.4h //x5 = x0 + x1;
+ sub v26.4h, v18.4h , v19.4h //x7 = x0 - x1;
+ add v25.4h, v23.4h , v20.4h //x6 = u_shift(x3,1,shft) + x2;
+ sub v27.4h, v21.4h , v22.4h //x8 = x3 - u_shift(x2,1,shft);
+
+ dup v23.4s, w8 //load round factor values
+
+ st1 {v24.h}[0], [x10] //store the dc value to alternate dc sddress
+//core tranform is done for 4x8 block 1
+ ld1 {v28.4h-v31.4h}, [x5] //load the scaling values
+
+ abs v0.4h, v24.4h //abs val of row 1
+ abs v1.4h, v25.4h //abs val of row 2
+ abs v2.4h, v26.4h //abs val of row 3
+ abs v3.4h, v27.4h //abs val of row 4
+
+ cmgt v4.4h, v24.4h, #0
+ cmgt v5.4h, v25.4h, #0
+ cmgt v6.4h, v26.4h, #0
+ cmgt v7.4h, v27.4h, #0
+
+ smull v0.4s, v0.4h, v28.4h //multiply and add row 1
+ smull v1.4s, v1.4h, v29.4h //multiply and add row 2
+ smull v2.4s, v2.4h, v30.4h //multiply and add row 3
+ smull v3.4s, v3.4h, v31.4h //multiply and add row 4
+
+ add v20.4s, v0.4s, v23.4s
+ add v21.4s, v1.4s, v23.4s
+ add v22.4s, v2.4s, v23.4s
+ add v23.4s, v3.4s, v23.4s
+
+ dup v24.4s, w7
+
+ sshl v20.4s, v20.4s, v24.4s //shift row 1
+ sshl v21.4s, v21.4s, v24.4s //shift row 2
+ sshl v22.4s, v22.4s, v24.4s //shift row 3
+ sshl v23.4s, v23.4s, v24.4s //shift row 4
+
+ xtn v20.4h, v20.4s //narrow row 1
+ xtn v21.4h, v21.4s //narrow row 2
+ xtn v22.4h, v22.4s //narrow row 3
+ xtn v23.4h, v23.4s //narrow row 4
+
+ neg v24.8h, v20.8h //get negative
+ neg v25.8h, v21.8h //get negative
+ neg v26.8h, v22.8h //get negative
+ neg v27.8h, v23.8h //get negative
+
+ //compare with zero for computng nnz
+ cmeq v0.4h, v20.4h, #0
+ cmeq v1.4h, v21.4h, #0
+ cmeq v2.4h, v22.4h, #0
+ cmeq v3.4h, v23.4h, #0
+
+ bsl v4.8b, v20.8b, v24.8b //restore sign of row 1 and 2
+ bsl v5.8b, v21.8b, v25.8b //restore sign of row 3 and 4
+ bsl v6.8b, v22.8b, v26.8b //restore sign of row 1 and 2
+ bsl v7.8b, v23.8b, v27.8b //restore sign of row 3 and 4
+
+ //narrow the comaprison result
+ mov v0.d[1], v2.d[0]
+ mov v1.d[1], v3.d[0]
+
+ xtn v0.8b, v0.8h
+ xtn v1.8b, v1.8h
+
+ ushr v0.8b, v0.8b, #7 //i reduce comaparison bit to a signle bit row 1 and 2 blk 1 and 2 [ keep the value for later use ]
+ ushr v1.8b, v1.8b, #7 //i reduce comaparison bit to a signle bit row 1 and 2 blk 1 and 2 [ keep the value for later use ]
+
+ add v0.8b, v0.8b, v1.8b //i pair add nnz 1
+ addp v0.8b, v0.8b, v0.8b //i pair add nnz 1
+ addp v0.8b, v0.8b, v0.8b //i pair add nnz 1
+ addp v0.8b, v0.8b, v0.8b //i pair add nnz 1
+
+ st1 {v4.4h-v7.4h}, [x2] //store blk
+
+ movi v25.8b, #16 //get max nnz
+ sub v26.8b, v25.8b , v0.8b //invert current nnz
+ st1 {v26.b}[0], [x9] //write nnz
+
+ pop_v_regs
+ ret
+
+
+//*****************************************************************************
+//*
+//* function name : ih264_resi_trans_quant_chroma_4x4
+//* description : this function does residue calculation, forward transform
+//* and quantization for 4x4 chroma block.
+//*
+//* arguments : x0 :pointer to src buffer
+// x1 :pointer to pred buffer
+// x2 :pointer to dst buffer
+// x3 :source stride
+// x4 :pred stride,
+// x5 :dst stride,
+// x6 :pointer to scaling matrix,
+// x7 :pointer to threshold matrix,
+// stack qbits,
+// rounding factor,
+// pointer to store nnz
+// pointer to store unquantized dc values
+// values returned : none
+//
+// register usage :
+// stack usage : 64 bytes
+// cycles :
+// interruptiaility : interruptable
+//
+// known limitations
+// \assumptions :
+//
+// revision history :
+// dd mm yyyy author(s) changes
+// 11 2 2015 100664 first version
+// 25 2 2015 100633 first av8 version
+//*****************************************************************************
+
+ .global ih264_resi_trans_quant_chroma_4x4_av8
+ih264_resi_trans_quant_chroma_4x4_av8:
+
+ //x0 :pointer to src buffer
+ //x1 :pointer to pred buffer
+ //x2 :pointer to dst buffer
+ //x3 :source stride
+ //stack :pred stride
+ // :scale matirx,
+ // :threshold matrix
+ // :qbits
+ // :round factor
+ // :nnz
+ // :pu1_dc_alt_addr
+ push_v_regs
+ //x0 :pointer to src buffer
+ //x1 :pointer to pred buffer
+ //x2 :pointer to dst buffer
+ //x3 :source stride
+ //x4 :pred stride
+ //x5 :scale matirx,
+ //x6 :threshold matrix
+ //x7 :qbits
+ //x8 :round factor
+ //x9 :nnz
+ //x10 :pointer to store non quantized dc value
+
+ ldr w8, [sp, #64] //load round factor
+ ldr x10, [sp, #80] //load addres for non quant val
+ neg x7, x7 //negate the qbit value for usiing lsl
+ ldr x9, [sp, #72]
+ //------------fucntion loading done----------------;
+
+ ld1 {v30.8b}, [x0], x3 //load first 8 pix src row 1
+ ld1 {v31.8b}, [x1], x4 //load first 8 pix pred row 1
+ ld1 {v28.8b}, [x0], x3 //load first 8 pix src row 2
+ ld1 {v29.8b}, [x1], x4 //load first 8 pix pred row 2
+ ld1 {v26.8b}, [x0], x3 //load first 8 pix src row 3
+ ld1 {v27.8b}, [x1], x4 //load first 8 pix pred row 3
+ ld1 {v24.8b}, [x0] //load first 8 pix src row 4
+ ld1 {v25.8b}, [x1] //load first 8 pix pred row 4
+
+
+ //deinterleave the loaded values
+ uzp1 v30.8b, v30.8b, v30.8b
+ uzp1 v31.8b, v31.8b, v31.8b
+ uzp1 v28.8b, v28.8b, v28.8b
+ uzp1 v29.8b, v29.8b, v29.8b
+ uzp1 v26.8b, v26.8b, v26.8b
+ uzp1 v27.8b, v27.8b, v27.8b
+ uzp1 v24.8b, v24.8b, v24.8b
+ uzp1 v25.8b, v25.8b, v25.8b
+ //this deinterleaving is the only differnece betweenchrom and luma fucntions
+
+ usubl v0.8h, v30.8b, v31.8b //find residue row 1
+ usubl v2.8h, v28.8b, v29.8b //find residue row 2
+ usubl v4.8h, v26.8b, v27.8b //find residue row 3
+ usubl v6.8h, v24.8b, v25.8b //find residue row 4
+
+ trn1 v1.4h, v0.4h, v2.4h
+ trn2 v3.4h, v0.4h, v2.4h //t12
+ trn1 v5.4h, v4.4h, v6.4h
+ trn2 v7.4h, v4.4h, v6.4h //t23
+
+ trn1 v0.2s, v1.2s, v5.2s
+ trn2 v4.2s, v1.2s, v5.2s //t13
+ trn1 v2.2s, v3.2s, v7.2s
+ trn2 v6.2s, v3.2s, v7.2s //t14
+
+ add v8.4h, v0.4h, v6.4h //x0 = x4+x7
+ add v9.4h, v2.4h, v4.4h //x1 = x5+x6
+ sub v10.4h, v2.4h, v4.4h //x2 = x5-x6
+ sub v11.4h, v0.4h, v6.4h //x3 = x4-x7
+
+ shl v12.4h, v10.4h, #1 //u_shift(x2,1,shft)
+ shl v13.4h, v11.4h, #1 //u_shift(x3,1,shft)
+
+ add v14.4h, v8.4h, v9.4h //x4 = x0 + x1;
+ sub v16.4h, v8.4h, v9.4h //x6 = x0 - x1;
+ add v15.4h, v13.4h, v10.4h //x5 = u_shift(x3,1,shft) + x2;
+ sub v17.4h, v11.4h, v12.4h //x7 = x3 - u_shift(x2,1,shft);
+
+ //taking transpose again so as to make do vert transform
+ trn1 v0.4h, v14.4h, v15.4h
+ trn2 v1.4h, v14.4h, v15.4h //t12
+ trn1 v2.4h, v16.4h, v17.4h
+ trn2 v3.4h, v16.4h, v17.4h //t23
+
+ trn1 v14.2s, v0.2s, v2.2s
+ trn2 v16.2s, v0.2s, v2.2s //t13
+ trn1 v15.2s, v1.2s, v3.2s
+ trn2 v17.2s, v1.2s, v3.2s //t24
+
+ //let us do vertical transform
+ //same code as horiz
+ add v18.4h, v14.4h , v17.4h //x0 = x4+x7
+ add v19.4h, v15.4h , v16.4h //x1 = x5+x6
+ sub v20.4h, v15.4h , v16.4h //x2 = x5-x6
+ sub v21.4h, v14.4h , v17.4h //x3 = x4-x7
+
+ shl v22.4h, v20.4h, #1 //u_shift(x2,1,shft)
+ shl v23.4h, v21.4h, #1 //u_shift(x3,1,shft)
+
+ dup v8.4s, w8 //load rounding value row 1
+
+ add v24.4h, v18.4h , v19.4h //x5 = x0 + x1;
+ sub v26.4h, v18.4h , v19.4h //x7 = x0 - x1;
+ add v25.4h, v23.4h , v20.4h //x6 = u_shift(x3,1,shft) + x2;
+ sub v27.4h, v21.4h , v22.4h //x8 = x3 - u_shift(x2,1,shft);
+
+ dup v23.4s, w8 //load round factor values
+
+ st1 {v24.h}[0], [x10] //store the dc value to alternate dc sddress
+//core tranform is done for 4x8 block 1
+ ld1 {v28.4h-v31.4h}, [x5] //load the scaling values
+
+ abs v0.4h, v24.4h //abs val of row 1
+ abs v1.4h, v25.4h //abs val of row 2
+ abs v2.4h, v26.4h //abs val of row 3
+ abs v3.4h, v27.4h //abs val of row 4
+
+ cmgt v4.4h, v24.4h, #0
+ cmgt v5.4h, v25.4h, #0
+ cmgt v6.4h, v26.4h, #0
+ cmgt v7.4h, v27.4h, #0
+
+ smull v0.4s, v0.4h, v28.4h //multiply and add row 1
+ smull v1.4s, v1.4h, v29.4h //multiply and add row 2
+ smull v2.4s, v2.4h, v30.4h //multiply and add row 3
+ smull v3.4s, v3.4h, v31.4h //multiply and add row 4
+
+ add v20.4s, v0.4s, v23.4s
+ add v21.4s, v1.4s, v23.4s
+ add v22.4s, v2.4s, v23.4s
+ add v23.4s, v3.4s, v23.4s
+
+ dup v24.4s, w7
+
+ sshl v20.4s, v20.4s, v24.4s //shift row 1
+ sshl v21.4s, v21.4s, v24.4s //shift row 2
+ sshl v22.4s, v22.4s, v24.4s //shift row 3
+ sshl v23.4s, v23.4s, v24.4s //shift row 4
+
+ xtn v20.4h, v20.4s //narrow row 1
+ xtn v21.4h, v21.4s //narrow row 2
+ xtn v22.4h, v22.4s //narrow row 3
+ xtn v23.4h, v23.4s //narrow row 4
+
+ neg v24.8h, v20.8h //get negative
+ neg v25.8h, v21.8h //get negative
+ neg v26.8h, v22.8h //get negative
+ neg v27.8h, v23.8h //get negative
+
+ //compare with zero for computng nnz
+ cmeq v0.4h, v20.4h, #0
+ cmeq v1.4h, v21.4h, #0
+ cmeq v2.4h, v22.4h, #0
+ cmeq v3.4h, v23.4h, #0
+
+ bsl v4.8b, v20.8b, v24.8b //restore sign of row 1 and 2
+ bsl v5.8b, v21.8b, v25.8b //restore sign of row 3 and 4
+ bsl v6.8b, v22.8b, v26.8b //restore sign of row 1 and 2
+ bsl v7.8b, v23.8b, v27.8b //restore sign of row 3 and 4
+
+ //narrow the comaprison result
+ mov v0.d[1], v2.d[0]
+ mov v1.d[1], v3.d[0]
+
+ xtn v0.8b, v0.8h
+ xtn v1.8b, v1.8h
+
+ ushr v0.8b, v0.8b, #7 //i reduce comaparison bit to a signle bit row 1 and 2 blk 1 and 2 [ keep the value for later use ]
+ ushr v1.8b, v1.8b, #7 //i reduce comaparison bit to a signle bit row 1 and 2 blk 1 and 2 [ keep the value for later use ]
+
+ add v0.8b, v0.8b, v1.8b //i pair add nnz 1
+ addp v0.8b, v0.8b, v0.8b //i pair add nnz 1
+ addp v0.8b, v0.8b, v0.8b //i pair add nnz 1
+ addp v0.8b, v0.8b, v0.8b //i pair add nnz 1
+
+ st1 {v4.4h-v7.4h}, [x2] //store blk
+
+ movi v25.8b, #16 //get max nnz
+ sub v26.8b, v25.8b , v0.8b //invert current nnz
+ st1 {v26.b}[0], [x9] //write nnz
+
+ pop_v_regs
+ ret
+
+
+//*****************************************************************************
+//*
+//* function name : ih264_hadamard_quant_4x4_av8
+//* description : this function does forward hadamard transform and
+//* quantization for luma dc block
+//*
+//* arguments : x0 :pointer to src buffer
+// x1 :pointer to dst buffer
+// x2 :pu2_scale_matrix
+// x2 :pu2_threshold_matrix
+// x3 :u4_qbits
+// x4 :u4_round_factor
+// x5 :pu1_nnz
+// values returned : none
+//
+// register usage :
+// stack usage : 0 bytes
+// cycles : around
+// interruptiaility : interruptable
+//
+// known limitations
+// \assumptions :
+//
+// revision history :
+// dd mm yyyy author(s) changes
+// 20 2 2015 100633 first version
+//
+//*****************************************************************************
+//ih264_hadamard_quant_4x4_av8(word16 *pi2_src, word16 *pi2_dst,
+// const uword16 *pu2_scale_matrix,
+// const uword16 *pu2_threshold_matrix, uword32 u4_qbits,
+// uword32 u4_round_factor,uword8 *pu1_nnz
+// )
+ .global ih264_hadamard_quant_4x4_av8
+ih264_hadamard_quant_4x4_av8:
+
+//x0 :pointer to src buffer
+//x1 :pointer to dst buffer
+//x2 :pu2_scale_matrix
+//x3 :pu2_threshold_matrix
+//x4 :u4_qbits
+//x5 :u4_round_factor
+//x6 :pu1_nnz
+
+ push_v_regs
+
+ ld4 {v0.4h-v3.4h}, [x0] //load 4x4 block
+ ld1 {v30.h}[0], [x2] //load pu2_scale_matrix[0]
+
+ saddl v4.4s, v0.4h, v3.4h //x0 = x4 + x7;
+ saddl v5.4s, v1.4h, v2.4h //x1 = x5 + x6;
+ ssubl v6.4s, v1.4h, v2.4h //x2 = x5 - x6;
+ ssubl v7.4s, v0.4h, v3.4h //x3 = x4 - x7;
+
+ dup v30.8h, v30.h[0] //pu2_scale_matrix[0]
+
+ add v14.4s, v4.4s, v5.4s //pi2_dst[0] = x0 + x1;
+ add v15.4s, v7.4s, v6.4s //pi2_dst[1] = x3 + x2;
+ sub v16.4s, v4.4s, v5.4s //pi2_dst[2] = x0 - x1;
+ sub v17.4s, v7.4s, v6.4s //pi2_dst[3] = x3 - x2;
+
+ //transpose 4x4 block
+ trn1 v18.4s, v14.4s, v15.4s
+ trn2 v19.4s, v14.4s, v15.4s
+ trn1 v20.4s, v16.4s, v17.4s
+ trn2 v21.4s, v16.4s, v17.4s
+
+ trn1 v14.2d, v18.2d, v20.2d
+ trn2 v16.2d, v18.2d, v20.2d
+ trn1 v15.2d, v19.2d, v21.2d
+ trn2 v17.2d, v19.2d, v21.2d
+ //end transpose
+
+ add v18.4s, v14.4s, v17.4s //x0 = x4 + x7;
+ add v19.4s, v15.4s, v16.4s //x1 = x5 + x6;
+ sub v20.4s, v15.4s, v16.4s //x2 = x5 - x6;
+ sub v21.4s, v14.4s, v17.4s //x3 = x4 - x7;
+
+ dup v14.4s, w5 //round factor
+ dup v15.4s, v14.s[0]
+ dup v16.4s, v14.s[0]
+ dup v17.4s, v14.s[0]
+
+ add v22.4s, v18.4s, v19.4s //(x0 + x1)
+ add v23.4s, v21.4s, v20.4s //(x3 + x2)
+ sub v24.4s, v18.4s, v19.4s //(x0 - x1)
+ sub v25.4s, v21.4s, v20.4s //(x3 - x2)
+
+ shrn v0.4h, v22.4s, #1 //i4_value = (x0 + x1) >> 1;
+ shrn2 v0.8h, v23.4s, #1 //i4_value = (x3 + x2) >> 1;
+ shrn v1.4h, v24.4s, #1 //i4_value = (x0 - x1) >> 1;
+ shrn2 v1.8h, v25.4s, #1 //i4_value = (x3 - x2) >> 1;
+
+ abs v2.8h, v0.8h
+ abs v3.8h, v1.8h
+
+ cmgt v4.8h, v0.8h, #0 //get the sign row 1,2
+ cmgt v5.8h, v1.8h, #0
+
+ neg w4, w4 //-u4_qbits
+ dup v22.4s, w4 //load -u4_qbits
+
+ umlal v14.4s, v2.4h, v30.4h
+ umlal2 v15.4s, v2.8h, v30.8h
+ umlal v16.4s, v3.4h, v30.4h
+ umlal2 v17.4s, v3.8h, v30.8h
+
+ ushl v14.4s, v14.4s, v22.4s
+ ushl v15.4s, v15.4s, v22.4s
+ ushl v16.4s, v16.4s, v22.4s
+ ushl v17.4s, v17.4s, v22.4s
+
+ uqxtn v14.4h, v14.4s
+ uqxtn2 v14.8h, v15.4s
+ uqxtn v16.4h, v16.4s
+ uqxtn2 v16.8h, v17.4s
+
+ neg v15.8h, v14.8h
+ neg v17.8h, v16.8h
+
+ bsl v4.16b, v14.16b, v15.16b
+ bsl v5.16b, v16.16b, v17.16b
+
+ cmeq v0.8h, v14.8h, #0
+ cmeq v1.8h, v16.8h, #0
+
+ st1 {v4.8h-v5.8h}, [x1]
+
+ movi v20.8b, #16
+
+ xtn v2.8b, v0.8h
+ xtn v3.8b, v1.8h
+
+ ushr v2.8b, v2.8b, #7
+ ushr v3.8b, v3.8b, #7
+
+ add v2.8b, v2.8b, v3.8b
+ addp v2.8b, v2.8b, v2.8b
+ addp v2.8b, v2.8b, v2.8b
+ addp v2.8b, v2.8b, v2.8b
+ sub v20.8b, v20.8b, v2.8b
+ st1 {v20.b}[0], [x6]
+
+ pop_v_regs
+ ret
+
+
+//*****************************************************************************
+//*
+//* function name : ih264_hadamard_quant_2x2_uv
+//* description : this function does forward hadamard transform and
+//* quantization for dc block of chroma for both planes
+//*
+//* arguments : x0 :pointer to src buffer
+// x1 :pointer to dst buffer
+// x2 :pu2_scale_matrix
+// x2 :pu2_threshold_matrix
+// x3 :u4_qbits
+// x4 :u4_round_factor
+// x5 :pu1_nnz
+// values returned : none
+//
+// register usage :
+// stack usage : 0 bytes
+// cycles : around
+// interruptiaility : interruptable
+//
+// known limitations
+// \assumptions :
+//
+// revision history :
+// dd mm yyyy author(s) changes
+// 20 2 2015 100633 first version
+//
+//*****************************************************************************
+// ih264_hadamard_quant_2x2_uv_av8(word16 *pi2_src, word16 *pi2_dst,
+// const uword16 *pu2_scale_matrix,
+// const uword16 *pu2_threshold_matrix, uword32 u4_qbits,
+// uword32 u4_round_factor,uword8 *pu1_nnz
+// )
+
+ .global ih264_hadamard_quant_2x2_uv_av8
+ih264_hadamard_quant_2x2_uv_av8:
+
+ push_v_regs
+
+ ld2 {v0.4h-v1.4h}, [x0] //load src
+
+ ld1 {v30.h}[0], [x2] //load pu2_scale_matrix[0]
+ dup v30.4h, v30.4h[0] //pu2_scale_matrix
+ uxtl v30.4s, v30.4h //pu2_scale_matrix
+
+ neg w4, w4
+ dup v24.4s, w4 //u4_qbits
+
+ dup v25.4s, w5 //round fact
+ dup v26.4s, v25.s[0]
+
+ saddl v2.4s, v0.4h, v1.4h //x0 = x4 + x5;, x2 = x6 + x7;
+ ssubl v3.4s, v0.4h, v1.4h //x1 = x4 - x5; x3 = x6 - x7;
+
+ trn1 v4.4s, v2.4s, v3.4s
+ trn2 v5.4s, v2.4s, v3.4s //q1 -> x0 x1, q2 -> x2 x3
+
+ add v0.4s, v4.4s , v5.4s // (x0 + x2) (x1 + x3) (y0 + y2); (y1 + y3);
+ sub v1.4s, v4.4s , v5.4s // (x0 - x2) (x1 - x3) (y0 - y2); (y1 - y3);
+
+ abs v2.4s, v0.4s
+ abs v3.4s, v1.4s
+
+ cmgt v4.4s, v0.4s, #0 //get the sign row 1,2
+ cmgt v5.4s, v1.4s, #0
+
+ uqxtn v4.4h, v4.4s
+ sqxtn2 v4.8h, v5.4s
+
+ mla v25.4s, v2.4s, v30.4s
+ mla v26.4s, v3.4s, v30.4s
+
+ ushl v2.4s, v25.4s, v24.4s //>>qbit
+ ushl v3.4s, v26.4s, v24.4s //>>qbit
+
+ uqxtn v2.4h, v2.4s
+ uqxtn2 v2.8h, v3.4s
+
+ neg v5.8h, v2.8h
+
+ bsl v4.16b, v2.16b, v5.16b //*sign
+
+ //rearrange such that we get each plane coeffs as continous
+ mov v5.s[0], v4.s[1]
+ mov v4.s[1], v4.s[2]
+ mov v4.s[2], v5.s[0]
+
+ cmeq v5.8h, v4.8h, #0 //compute nnz
+ xtn v5.8b, v5.8h //reduce nnz comparison to 1 bit
+ ushr v5.8b, v5.8b, #7 //reduce nnz comparison to 1 bit
+ movi v20.8b, #4 //since we add zeros, we need to subtract from 4 to get nnz
+ addp v5.8b, v5.8b, v5.8b //sum up nnz
+ addp v5.8b, v5.8b, v5.8b //sum up nnz
+
+ st1 {v4.8h}, [x1] //store the block
+
+ st1 {v4.8h}, [x1] //store the block
+ sub v20.8b, v20.8b, v5.8b //4- numzeros
+
+ st1 {v20.h}[0], [x6] //store nnz
+
+ pop_v_regs
+ ret
+
+
+
diff --git a/common/armv8/ih264_weighted_bi_pred_av8.s b/common/armv8/ih264_weighted_bi_pred_av8.s
new file mode 100755
index 0000000..f7d0846
--- /dev/null
+++ b/common/armv8/ih264_weighted_bi_pred_av8.s
@@ -0,0 +1,574 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+//******************************************************************************
+//* @file
+//* ih264_weighted_bi_pred_av8.s
+//*
+//* @brief
+//* Contains function definitions for weighted biprediction.
+//* Functions are coded using NEON intrinsics and can be compiled using ARM RVCT
+//*
+//* @author
+//* Kaushik Senthoor R
+//*
+//* @par List of Functions:
+//*
+//* - ih264_weighted_bi_pred_luma_av8()
+//* - ih264_weighted_bi_pred_chroma_av8()
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+//*******************************************************************************
+//* @function
+//* ih264_weighted_bi_pred_luma_av8()
+//*
+//* @brief
+//* This routine performs the default weighted prediction as described in sec
+//* 8.4.2.3.2 titled "Weighted sample prediction process" for luma.
+//*
+//* @par Description:
+//* This function gets two ht x wd blocks, calculates the weighted samples,
+//* rounds off, adds offset and stores it in the destination block.
+//*
+//* @param[in] puc_src1
+//* UWORD8 Pointer to the buffer containing the input block 1.
+//*
+//* @param[in] puc_src2
+//* UWORD8 Pointer to the buffer containing the input block 2.
+//*
+//* @param[out] puc_dst
+//* UWORD8 pointer to the destination where the output block is stored.
+//*
+//* @param[in] src_strd1
+//* Stride of the input buffer 1
+//*
+//* @param[in] src_strd2
+//* Stride of the input buffer 2
+//*
+//* @param[in] dst_strd
+//* Stride of the destination buffer
+//*
+//* @param[in] log_WD
+//* number of bits to be rounded off
+//*
+//* @param[in] wt1
+//* weight for the weighted prediction
+//*
+//* @param[in] wt2
+//* weight for the weighted prediction
+//*
+//* @param[in] ofst1
+//* offset 1 used after rounding off
+//*
+//* @param[in] ofst2
+//* offset 2 used after rounding off
+//*
+//* @param[in] ht
+//* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+//* @returns
+//* None
+//*
+//* @remarks
+//* (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16).
+//*
+//*******************************************************************************
+//*/
+//void ih264_weighted_bi_pred_luma_av8(UWORD8 *puc_src1,
+// UWORD8 *puc_src2,
+// UWORD8 *puc_dst,
+// WORD32 src_strd1,
+// WORD32 src_strd2,
+// WORD32 dst_strd,
+// UWORD16 log_WD,
+// UWORD32 wt1,
+// UWORD32 wt2,
+// UWORD16 ofst1,
+// UWORD16 ofst2,
+// UWORD8 ht,
+// UWORD8 wd)
+//
+//**************Variables Vs Registers*****************************************
+// x0 => puc_src1
+// x1 => puc_src2
+// x2 => puc_dst
+// x3 => src_strd1
+// [sp] => src_strd2 (x4)
+// [sp+4] => dst_strd (x5)
+// [sp+8] => log_WD (x6)
+// [sp+12] => wt1 (x7)
+// [sp+16] => wt2 (x8)
+// [sp+20] => ofst1 (x9)
+// [sp+24] => ofst2 (x10)
+// [sp+28] => ht (x11)
+// [sp+32] => wd (x12)
+//
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+
+
+ .global ih264_weighted_bi_pred_luma_av8
+
+ih264_weighted_bi_pred_luma_av8:
+
+ // STMFD sp!, {x4-x12,x14} //stack stores the values of the arguments
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+ ldr x8, [sp, #80] //Load wt2 in x8
+ ldr x9, [sp, #88] //Load ofst1 in x9
+ add x6, x6, #1 //x6 = log_WD + 1
+ sub x20, x6, #0 //x13 = -(log_WD + 1)
+ neg x10, x20
+ dup v0.8h, w10 //Q0 = -(log_WD + 1) (32-bit)
+ ldr x10, [sp, #96] //Load ofst2 in x10
+ ldr x11, [sp, #104] //Load ht in x11
+ ldr x12, [sp, #112] //Load wd in x12
+ add x9, x9, #1 //x9 = ofst1 + 1
+ add x9, x9, x10 //x9 = ofst1 + ofst2 + 1
+ mov v2.s[0], w7
+ mov v2.s[1], w8 //D2 = {wt1(32-bit), wt2(32-bit)}
+ asr x9, x9, #1 //x9 = ofst = (ofst1 + ofst2 + 1) >> 1
+ dup v3.8b, w9 //D3 = ofst (8-bit)
+ cmp w12, #16
+ beq loop_16 //branch if wd is 16
+ cmp w12, #8 //check if wd is 8
+ beq loop_8 //branch if wd is 8
+
+loop_4: //each iteration processes four rows
+
+ ld1 {v4.s}[0], [x0], x3 //load row 1 in source 1
+ ld1 {v4.s}[1], [x0], x3 //load row 2 in source 1
+ ld1 {v6.s}[0], [x1], x4 //load row 1 in source 2
+ ld1 {v6.s}[1], [x1], x4 //load row 2 in source 2
+ uxtl v4.8h, v4.8b //converting rows 1,2 in source 1 to 16-bit
+ ld1 {v8.s}[0], [x0], x3 //load row 3 in source 1
+ ld1 {v8.s}[1], [x0], x3 //load row 4 in source 1
+ uxtl v6.8h, v6.8b //converting rows 1,2 in source 2 to 16-bit
+ ld1 {v10.s}[0], [x1], x4 //load row 3 in source 2
+ ld1 {v10.s}[1], [x1], x4 //load row 4 in source 2
+ uxtl v8.8h, v8.8b //converting rows 3,4 in source 1 to 16-bit
+ uxtl v10.8h, v10.8b //converting rows 3,4 in source 2 to 16-bit
+ mul v4.8h, v4.8h , v2.4h[0] //weight 1 mult. for rows 1,2
+ mla v4.8h, v6.8h , v2.4h[2] //weight 2 mult. for rows 1,2
+ mul v8.8h, v8.8h , v2.4h[0] //weight 1 mult. for rows 3,4
+ mla v8.8h, v10.8h , v2.4h[2] //weight 2 mult. for rows 3,4
+ subs w11, w11, #4 //decrement ht by 4
+ srshl v4.8h, v4.8h , v0.8h //rounds off the weighted samples from rows 1,2
+ srshl v8.8h, v8.8h , v0.8h //rounds off the weighted samples from rows 3,4
+ saddw v4.8h, v4.8h , v3.8b //adding offset for rows 1,2
+ saddw v8.8h, v8.8h , v3.8b //adding offset for rows 3,4
+ sqxtun v4.8b, v4.8h //saturating rows 1,2 to unsigned 8-bit
+ sqxtun v8.8b, v8.8h //saturating rows 3,4 to unsigned 8-bit
+ st1 {v4.s}[0], [x2], x5 //store row 1 in destination
+ st1 {v4.s}[1], [x2], x5 //store row 2 in destination
+ st1 {v8.s}[0], [x2], x5 //store row 3 in destination
+ st1 {v8.s}[1], [x2], x5 //store row 4 in destination
+ bgt loop_4 //if greater than 0 repeat the loop again
+ b end_loops
+
+loop_8: //each iteration processes four rows
+
+ ld1 {v4.8b}, [x0], x3 //load row 1 in source 1
+ ld1 {v6.8b}, [x1], x4 //load row 1 in source 2
+ ld1 {v8.8b}, [x0], x3 //load row 2 in source 1
+ ld1 {v10.8b}, [x1], x4 //load row 2 in source 2
+ uxtl v4.8h, v4.8b //converting row 1 in source 1 to 16-bit
+ ld1 {v12.8b}, [x0], x3 //load row 3 in source 1
+ ld1 {v14.8b}, [x1], x4 //load row 3 in source 2
+ uxtl v6.8h, v6.8b //converting row 1 in source 2 to 16-bit
+ ld1 {v16.8b}, [x0], x3 //load row 4 in source 1
+ ld1 {v18.8b}, [x1], x4 //load row 4 in source 2
+ uxtl v8.8h, v8.8b //converting row 2 in source 1 to 16-bit
+ uxtl v10.8h, v10.8b //converting row 2 in source 2 to 16-bit
+ mul v4.8h, v4.8h , v2.4h[0] //weight 1 mult. for row 1
+ mla v4.8h, v6.8h , v2.4h[2] //weight 2 mult. for row 1
+ uxtl v12.8h, v12.8b //converting row 3 in source 1 to 16-bit
+ uxtl v14.8h, v14.8b //converting row 3 in source 2 to 16-bit
+ mul v8.8h, v8.8h , v2.4h[0] //weight 1 mult. for row 2
+ mla v8.8h, v10.8h , v2.4h[2] //weight 2 mult. for row 2
+ uxtl v16.8h, v16.8b //converting row 4 in source 1 to 16-bit
+ uxtl v18.8h, v18.8b //converting row 4 in source 2 to 16-bit
+ mul v12.8h, v12.8h , v2.4h[0] //weight 1 mult. for row 3
+ mla v12.8h, v14.8h , v2.4h[2] //weight 2 mult. for row 3
+ mul v16.8h, v16.8h , v2.4h[0] //weight 1 mult. for row 4
+ mla v16.8h, v18.8h , v2.4h[2] //weight 2 mult. for row 4
+ srshl v4.8h, v4.8h , v0.8h //rounds off the weighted samples from row 1
+ srshl v8.8h, v8.8h , v0.8h //rounds off the weighted samples from row 2
+ srshl v12.8h, v12.8h , v0.8h //rounds off the weighted samples from row 3
+ saddw v4.8h, v4.8h , v3.8b //adding offset for row 1
+ srshl v16.8h, v16.8h , v0.8h //rounds off the weighted samples from row 4
+ saddw v8.8h, v8.8h , v3.8b //adding offset for row 2
+ saddw v12.8h, v12.8h , v3.8b //adding offset for row 3
+ sqxtun v4.8b, v4.8h //saturating row 1 to unsigned 8-bit
+ saddw v16.8h, v16.8h , v3.8b //adding offset for row 4
+ sqxtun v8.8b, v8.8h //saturating row 2 to unsigned 8-bit
+ sqxtun v12.8b, v12.8h //saturating row 3 to unsigned 8-bit
+ sqxtun v16.8b, v16.8h //saturating row 4 to unsigned 8-bit
+ st1 {v4.8b}, [x2], x5 //store row 1 in destination
+ st1 {v8.8b}, [x2], x5 //store row 2 in destination
+ subs w11, w11, #4 //decrement ht by 4
+ st1 {v12.8b}, [x2], x5 //store row 3 in destination
+ st1 {v16.8b}, [x2], x5 //store row 4 in destination
+ bgt loop_8 //if greater than 0 repeat the loop again
+ b end_loops
+
+loop_16: //each iteration processes two rows
+
+ ld1 {v4.8b, v5.8b}, [x0], x3 //load row 1 in source 1
+ ld1 {v6.8b, v7.8b}, [x1], x4 //load row 1 in source 2
+ ld1 {v8.8b, v9.8b}, [x0], x3 //load row 2 in source 1
+ ld1 {v10.8b, v11.8b}, [x1], x4 //load row 2 in source 2
+ uxtl v20.8h, v4.8b //converting row 1L in source 1 to 16-bit
+ ld1 {v12.8b, v13.8b}, [x0], x3 //load row 3 in source 1
+ ld1 {v14.8b, v15.8b}, [x1], x4 //load row 3 in source 2
+ uxtl v22.8h, v6.8b //converting row 1L in source 2 to 16-bit
+ ld1 {v16.8b, v17.8b}, [x0], x3 //load row 4 in source 1
+ ld1 {v18.8b, v19.8b}, [x1], x4 //load row 4 in source 2
+ uxtl v4.8h, v5.8b //converting row 1H in source 1 to 16-bit
+ uxtl v6.8h, v7.8b //converting row 1H in source 2 to 16-bit
+ mul v20.8h, v20.8h , v2.4h[0] //weight 1 mult. for row 1L
+ mla v20.8h, v22.8h , v2.4h[2] //weight 2 mult. for row 1L
+ uxtl v24.8h, v8.8b //converting row 2L in source 1 to 16-bit
+ uxtl v26.8h, v10.8b //converting row 2L in source 2 to 16-bit
+ mul v4.8h, v4.8h , v2.4h[0] //weight 1 mult. for row 1H
+ mla v4.8h, v6.8h , v2.4h[2] //weight 2 mult. for row 1H
+ uxtl v8.8h, v9.8b //converting row 2H in source 1 to 16-bit
+ uxtl v10.8h, v11.8b //converting row 2H in source 2 to 16-bit
+ mul v24.8h, v24.8h , v2.4h[0] //weight 1 mult. for row 2L
+ mla v24.8h, v26.8h , v2.4h[2] //weight 2 mult. for row 2L
+ uxtl v28.8h, v12.8b //converting row 3L in source 1 to 16-bit
+ uxtl v30.8h, v14.8b //converting row 3L in source 2 to 16-bit
+ mul v8.8h, v8.8h , v2.4h[0] //weight 1 mult. for row 2H
+ mla v8.8h, v10.8h , v2.4h[2] //weight 2 mult. for row 2H
+ uxtl v12.8h, v13.8b //converting row 3H in source 1 to 16-bit
+ uxtl v14.8h, v15.8b //converting row 3H in source 2 to 16-bit
+ mul v28.8h, v28.8h , v2.4h[0] //weight 1 mult. for row 3L
+ mla v28.8h, v30.8h , v2.4h[2] //weight 2 mult. for row 3L
+ uxtl v22.8h, v16.8b //converting row 4L in source 1 to 16-bit
+ uxtl v6.8h, v18.8b //converting row 4L in source 2 to 16-bit
+ mul v12.8h, v12.8h , v2.4h[0] //weight 1 mult. for row 3H
+ mla v12.8h, v14.8h , v2.4h[2] //weight 2 mult. for row 3H
+ uxtl v16.8h, v17.8b //converting row 4H in source 1 to 16-bit
+ uxtl v18.8h, v19.8b //converting row 4H in source 2 to 16-bit
+ mul v22.8h, v22.8h , v2.4h[0] //weight 1 mult. for row 4L
+ mla v22.8h, v6.8h , v2.4h[2] //weight 2 mult. for row 4L
+ srshl v20.8h, v20.8h , v0.8h //rounds off the weighted samples from row 1L
+ mul v16.8h, v16.8h , v2.4h[0] //weight 1 mult. for row 4H
+ mla v16.8h, v18.8h , v2.4h[2] //weight 2 mult. for row 4H
+ srshl v4.8h, v4.8h , v0.8h //rounds off the weighted samples from row 1H
+ srshl v24.8h, v24.8h , v0.8h //rounds off the weighted samples from row 2L
+ saddw v20.8h, v20.8h , v3.8b //adding offset for row 1L
+ srshl v8.8h, v8.8h , v0.8h //rounds off the weighted samples from row 2H
+ saddw v4.8h, v4.8h , v3.8b //adding offset for row 1H
+ srshl v28.8h, v28.8h , v0.8h //rounds off the weighted samples from row 3L
+ saddw v24.8h, v24.8h , v3.8b //adding offset for row 2L
+ srshl v12.8h, v12.8h , v0.8h //rounds off the weighted samples from row 3H
+ saddw v8.8h, v8.8h , v3.8b //adding offset for row 2H
+ srshl v22.8h, v22.8h , v0.8h //rounds off the weighted samples from row 4L
+ saddw v28.8h, v28.8h , v3.8b //adding offset for row 3L
+ srshl v16.8h, v16.8h , v0.8h //rounds off the weighted samples from row 4H
+ saddw v12.8h, v12.8h , v3.8b //adding offset for row 3H
+ sqxtun v26.8b, v20.8h //saturating row 1L to unsigned 8-bit
+ saddw v22.8h, v22.8h , v3.8b //adding offset for row 4L
+ sqxtun v27.8b, v4.8h //saturating row 1H to unsigned 8-bit
+ saddw v16.8h, v16.8h , v3.8b //adding offset for row 4H
+ sqxtun v10.8b, v24.8h //saturating row 2L to unsigned 8-bit
+ sqxtun v11.8b, v8.8h //saturating row 2H to unsigned 8-bit
+ sqxtun v30.8b, v28.8h //saturating row 3L to unsigned 8-bit
+ sqxtun v31.8b, v12.8h //saturating row 3H to unsigned 8-bit
+ st1 {v26.8b, v27.8b}, [x2], x5 //store row 1 in destination
+ sqxtun v14.8b, v22.8h //saturating row 4L to unsigned 8-bit
+ sqxtun v15.8b, v16.8h //saturating row 4H to unsigned 8-bit
+ st1 {v10.8b, v11.8b}, [x2], x5 //store row 2 in destination
+ subs w11, w11, #4 //decrement ht by 4
+ st1 {v30.8b, v31.8b}, [x2], x5 //store row 3 in destination
+ st1 {v14.8b, v15.8b}, [x2], x5 //store row 4 in destination
+ bgt loop_16 //if greater than 0 repeat the loop again
+
+end_loops:
+
+ // LDMFD sp!,{x4-x12,x15} //Reload the registers from sp
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+//*******************************************************************************
+//* @function
+//* ih264_weighted_bi_pred_chroma_av8()
+//*
+//* @brief
+//* This routine performs the default weighted prediction as described in sec
+//* 8.4.2.3.2 titled "Weighted sample prediction process" for chroma.
+//*
+//* @par Description:
+//* This function gets two ht x wd blocks, calculates the weighted samples,
+//* rounds off, adds offset and stores it in the destination block for U and V.
+//*
+//* @param[in] puc_src1
+//* UWORD8 Pointer to the buffer containing the input block 1.
+//*
+//* @param[in] puc_src2
+//* UWORD8 Pointer to the buffer containing the input block 2.
+//*
+//* @param[out] puc_dst
+//* UWORD8 pointer to the destination where the output block is stored.
+//*
+//* @param[in] src_strd1
+//* Stride of the input buffer 1
+//*
+//* @param[in] src_strd2
+//* Stride of the input buffer 2
+//*
+//* @param[in] dst_strd
+//* Stride of the destination buffer
+//*
+//* @param[in] log_WD
+//* number of bits to be rounded off
+//*
+//* @param[in] wt1
+//* weights for the weighted prediction in U and V
+//*
+//* @param[in] wt2
+//* weights for the weighted prediction in U and V
+//*
+//* @param[in] ofst1
+//* offset 1 used after rounding off for U an dV
+//*
+//* @param[in] ofst2
+//* offset 2 used after rounding off for U and V
+//*
+//* @param[in] ht
+//* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+//* @returns
+//* None
+//*
+//* @remarks
+//* (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8).
+//*
+//*******************************************************************************
+//*/
+//void ih264_weighted_bi_pred_chroma_av8(UWORD8 *puc_src1,
+// UWORD8 *puc_src2,
+// UWORD8 *puc_dst,
+// WORD32 src_strd1,
+// WORD32 src_strd2,
+// WORD32 dst_strd,
+// UWORD16 log_WD,
+// UWORD32 wt1,
+// UWORD32 wt2,
+// UWORD16 ofst1,
+// UWORD16 ofst2,
+// UWORD8 ht,
+// UWORD8 wd)
+//
+//**************Variables Vs Registers*****************************************
+// x0 => puc_src1
+// x1 => puc_src2
+// x2 => puc_dst
+// x3 => src_strd1
+// [sp] => src_strd2 (x4)
+// [sp+4] => dst_strd (x5)
+// [sp+8] => log_WD (x6)
+// [sp+12] => wt1 (x7)
+// [sp+16] => wt2 (x8)
+// [sp+20] => ofst1 (x9)
+// [sp+24] => ofst2 (x10)
+// [sp+28] => ht (x11)
+// [sp+32] => wd (x12)
+//
+
+
+
+
+
+ .global ih264_weighted_bi_pred_chroma_av8
+
+ih264_weighted_bi_pred_chroma_av8:
+
+ // STMFD sp!, {x4-x12,x14} //stack stores the values of the arguments
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+
+ ldr x8, [sp, #80] //Load wt2 in x8
+ dup v4.4s, w8 //Q2 = (wt2_u, wt2_v) (32-bit)
+ dup v2.4s, w7 //Q1 = (wt1_u, wt1_v) (32-bit)
+ add x6, x6, #1 //x6 = log_WD + 1
+ ldr w9, [sp, #88] //Load ofst1 in x9
+ sxtw x9, w9
+ ldr w10, [sp, #96] //Load ofst2 in x10
+ sxtw x10, w10
+ sub x20, x6, #0 //x12 = -(log_WD + 1)
+ neg x20, x20
+ dup v0.8h, w20 //Q0 = -(log_WD + 1) (16-bit)
+ ldr w11, [sp, #104] //Load ht in x11
+ ldr w12, [sp, #112] //Load wd in x12
+ sxtw x11, w11
+ sxtw x12, w12
+ dup v20.8h, w9 //0ffset1
+ dup v21.8h, w10 //0ffset2
+ srhadd v6.8b, v20.8b, v21.8b
+ sxtl v6.8h, v6.8b
+ cmp w12, #8 //check if wd is 8
+ beq loop_8_uv //branch if wd is 8
+ cmp w12, #4 //check if wd is 4
+ beq loop_4_uv //branch if wd is 4
+
+loop_2_uv: //each iteration processes two rows
+
+ ld1 {v8.s}[0], [x0], x3 //load row 1 in source 1
+ ld1 {v8.s}[1], [x0], x3 //load row 2 in source 1
+ ld1 {v10.s}[0], [x1], x4 //load row 1 in source 2
+ ld1 {v10.s}[1], [x1], x4 //load row 2 in source 2
+ uxtl v8.8h, v8.8b //converting rows 1,2 in source 1 to 16-bit
+ uxtl v10.8h, v10.8b //converting rows 1,2 in source 2 to 16-bit
+ mul v8.8h, v8.8h , v2.8h //weight 1 mult. for rows 1,2
+ mla v8.8h, v10.8h , v4.8h //weight 2 mult. for rows 1,2
+ srshl v8.8h, v8.8h , v0.8h //rounds off the weighted samples from rows 1,2
+ add v8.8h, v8.8h , v6.8h //adding offset for rows 1,2
+ sqxtun v8.8b, v8.8h //saturating rows 1,2 to unsigned 8-bit/
+ st1 {v8.s}[0], [x2], x5 //store row 1 in destination
+ st1 {v8.s}[1], [x2], x5 //store row 2 in destination
+ subs w11, w11, #2 //decrement ht by 2
+ bgt loop_2_uv //if greater than 0 repeat the loop again
+ b end_loops_uv
+
+loop_4_uv: //each iteration processes two rows
+
+ ld1 {v8.8b}, [x0], x3 //load row 1 in source 1
+ ld1 {v10.8b}, [x1], x4 //load row 1 in source 2
+ uxtl v8.8h, v8.8b //converting row 1 in source 1 to 16-bit
+ ld1 {v12.8b}, [x0], x3 //load row 2 in source 1
+ uxtl v10.8h, v10.8b //converting row 1 in source 2 to 16-bit
+ ld1 {v14.8b}, [x1], x4 //load row 2 in source 2
+ uxtl v12.8h, v12.8b //converting row 2 in source 1 to 16-bit
+ mul v8.8h, v8.8h , v2.8h //weight 1 mult. for row 1
+ mla v8.8h, v10.8h , v4.8h //weight 2 mult. for row 1
+ uxtl v14.8h, v14.8b //converting row 2 in source 2 to 16-bit
+ mul v12.8h, v12.8h , v2.8h //weight 1 mult. for row 2
+ mla v12.8h, v14.8h , v4.8h //weight 2 mult. for row 2
+ subs w11, w11, #2 //decrement ht by 2
+ srshl v8.8h, v8.8h , v0.8h //rounds off the weighted samples from row 1
+ srshl v12.8h, v12.8h , v0.8h //rounds off the weighted samples from row 2
+ add v8.8h, v8.8h , v6.8h //adding offset for row 1
+ add v12.8h, v12.8h , v6.8h //adding offset for row 2
+ sqxtun v8.8b, v8.8h //saturating row 1 to unsigned 8-bit
+ sqxtun v12.8b, v12.8h //saturating row 2 to unsigned 8-bit
+ st1 {v8.8b}, [x2], x5 //store row 1 in destination
+ st1 {v12.8b}, [x2], x5 //store row 2 in destination
+ bgt loop_4_uv //if greater than 0 repeat the loop again
+ b end_loops_uv
+
+loop_8_uv: //each iteration processes two rows
+
+ ld1 {v8.8b, v9.8b}, [x0], x3 //load row 1 in source 1
+ ld1 {v10.8b, v11.8b}, [x1], x4 //load row 1 in source 2
+ ld1 {v12.8b, v13.8b}, [x0], x3 //load row 2 in source 1
+ ld1 {v14.8b, v15.8b}, [x1], x4 //load row 2 in source 2
+ uxtl v24.8h, v8.8b //converting row 1L in source 1 to 16-bit
+ ld1 {v16.8b, v17.8b}, [x0], x3 //load row 3 in source 1
+ ld1 {v18.8b, v19.8b}, [x1], x4 //load row 3 in source 2
+ uxtl v26.8h, v10.8b //converting row 1L in source 2 to 16-bit
+ ld1 {v20.8b, v21.8b}, [x0], x3 //load row 4 in source 1
+ ld1 {v22.8b, v23.8b}, [x1], x4 //load row 4 in source 2
+ uxtl v8.8h, v9.8b //converting row 1H in source 1 to 16-bit
+ uxtl v10.8h, v11.8b //converting row 1H in source 2 to 16-bit
+ mul v24.8h, v24.8h , v2.8h //weight 1 mult. for row 1L
+ mla v24.8h, v26.8h , v4.8h //weight 2 mult. for row 1L
+ uxtl v28.8h, v12.8b //converting row 2L in source 1 to 16-bit
+ uxtl v30.8h, v14.8b //converting row 2L in source 2 to 16-bit
+ mul v8.8h, v8.8h , v2.8h //weight 1 mult. for row 1H
+ mla v8.8h, v10.8h , v4.8h //weight 2 mult. for row 1H
+ uxtl v12.8h, v13.8b //converting row 2H in source 1 to 16-bit
+ uxtl v14.8h, v15.8b //converting row 2H in source 2 to 16-bit
+ mul v28.8h, v28.8h , v2.8h //weight 1 mult. for row 2L
+ mla v28.8h, v30.8h , v4.8h //weight 2 mult. for row 2L
+ uxtl v26.8h, v16.8b //converting row 3L in source 1 to 16-bit
+ uxtl v10.8h, v18.8b //converting row 3L in source 2 to 16-bit
+ mul v12.8h, v12.8h , v2.8h //weight 1 mult. for row 2H
+ mla v12.8h, v14.8h , v4.8h //weight 2 mult. for row 2H
+ uxtl v16.8h, v17.8b //converting row 3H in source 1 to 16-bit
+ uxtl v18.8h, v19.8b //converting row 3H in source 2 to 16-bit
+ mul v26.8h, v26.8h , v2.8h //weight 1 mult. for row 3L
+ mla v26.8h, v10.8h , v4.8h //weight 2 mult. for row 3L
+ uxtl v30.8h, v20.8b //converting row 4L in source 1 to 16-bit
+ uxtl v14.8h, v22.8b //converting row 4L in source 2 to 16-bit
+ mul v16.8h, v16.8h , v2.8h //weight 1 mult. for row 3H
+ mla v16.8h, v18.8h , v4.8h //weight 2 mult. for row 3H
+ uxtl v20.8h, v21.8b //converting row 4H in source 1 to 16-bit
+ uxtl v22.8h, v23.8b //converting row 4H in source 2 to 16-bit
+ mul v30.8h, v30.8h , v2.8h //weight 1 mult. for row 4L
+ mla v30.8h, v14.8h , v4.8h //weight 2 mult. for row 4L
+ srshl v24.8h, v24.8h , v0.8h //rounds off the weighted samples from row 1L
+ mul v20.8h, v20.8h , v2.8h //weight 1 mult. for row 4H
+ mla v20.8h, v22.8h , v4.8h //weight 2 mult. for row 4H
+ srshl v8.8h, v8.8h , v0.8h //rounds off the weighted samples from row 1H
+ srshl v28.8h, v28.8h , v0.8h //rounds off the weighted samples from row 2L
+ add v24.8h, v24.8h , v6.8h //adding offset for row 1L
+ srshl v12.8h, v12.8h , v0.8h //rounds off the weighted samples from row 2H
+ add v8.8h, v8.8h , v6.8h //adding offset for row 1H
+ srshl v26.8h, v26.8h , v0.8h //rounds off the weighted samples from row 3L
+ add v28.8h, v28.8h , v6.8h //adding offset for row 2L
+ srshl v16.8h, v16.8h , v0.8h //rounds off the weighted samples from row 3H
+ add v12.8h, v12.8h , v6.8h //adding offset for row 2H
+ srshl v30.8h, v30.8h , v0.8h //rounds off the weighted samples from row 4L
+ add v26.8h, v26.8h , v6.8h //adding offset for row 3L
+ srshl v20.8h, v20.8h , v0.8h //rounds off the weighted samples from row 4H
+ add v16.8h, v16.8h , v6.8h //adding offset for row 3H
+ sqxtun v10.8b, v24.8h //saturating row 1L to unsigned 8-bit
+ add v30.8h, v30.8h , v6.8h //adding offset for row 4L
+ sqxtun v11.8b, v8.8h //saturating row 1H to unsigned 8-bit
+ add v20.8h, v20.8h , v6.8h //adding offset for row 4H
+ sqxtun v18.8b, v28.8h //saturating row 2L to unsigned 8-bit
+ sqxtun v19.8b, v12.8h //saturating row 2H to unsigned 8-bit
+ sqxtun v14.8b, v26.8h //saturating row 3L to unsigned 8-bit
+ sqxtun v15.8b, v16.8h //saturating row 3H to unsigned 8-bit
+ st1 {v10.8b, v11.8b}, [x2], x5 //store row 1 in destination
+ sqxtun v22.8b, v30.8h //saturating row 4L to unsigned 8-bit
+ sqxtun v23.8b, v20.8h //saturating row 4H to unsigned 8-bit
+ st1 {v18.8b, v19.8b}, [x2], x5 //store row 2 in destination
+ subs w11, w11, #4 //decrement ht by 4
+ st1 {v14.8b, v15.8b}, [x2], x5 //store row 3 in destination
+ st1 {v22.8b, v23.8b}, [x2], x5 //store row 4 in destination
+ bgt loop_8_uv //if greater than 0 repeat the loop again
+
+end_loops_uv:
+
+ // LDMFD sp!,{x4-x12,x15} //Reload the registers from sp
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
diff --git a/common/armv8/ih264_weighted_pred_av8.s b/common/armv8/ih264_weighted_pred_av8.s
new file mode 100755
index 0000000..6a03875
--- /dev/null
+++ b/common/armv8/ih264_weighted_pred_av8.s
@@ -0,0 +1,471 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+//******************************************************************************
+//* @file
+//* ih264_weighted_pred_av8.s
+//*
+//* @brief
+//* Contains function definitions for weighted prediction.
+//* Functions are coded using NEON intrinsics and can be compiled using ARM RVCT
+//*
+//* @author
+//* Kaushik Senthoor R
+//*
+//* @par List of Functions:
+//*
+//* - ih264_weighted_pred_luma_av8()
+//* - ih264_weighted_pred_chroma_av8()
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+//*******************************************************************************
+//* @function
+//* ih264_weighted_pred_luma_av8()
+//*
+//* @brief
+//* This routine performs the default weighted prediction as described in sec
+//* 8.4.2.3.2 titled "Weighted sample prediction process" for luma.
+//*
+//* @par Description:
+//* This function gets a ht x wd block, calculates the weighted sample, rounds
+//* off, adds offset and stores it in the destination block.
+//*
+//* @param[in] puc_src:
+//* UWORD8 Pointer to the buffer containing the input block.
+//*
+//* @param[out] puc_dst
+//* UWORD8 pointer to the destination where the output block is stored.
+//*
+//* @param[in] src_strd
+//* Stride of the input buffer
+//*
+//* @param[in] dst_strd
+//* Stride of the destination buffer
+//*
+//* @param[in] log_WD
+//* number of bits to be rounded off
+//*
+//* @param[in] wt
+//* weight for the weighted prediction
+//*
+//* @param[in] ofst
+//* offset used after rounding off
+//*
+//* @param[in] ht
+//* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+//* @returns
+//* None
+//*
+//* @remarks
+//* (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16).
+//*
+//*******************************************************************************
+//*/
+//void ih264_weighted_pred_luma_av8(UWORD8 *puc_src,
+// UWORD8 *puc_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// UWORD8 log_WD,
+// UWORD32 wt,
+// UWORD16 ofst,
+// UWORD8 ht,
+// UWORD8 wd)
+//
+//**************Variables Vs Registers*****************************************
+// x0 => puc_src
+// x1 => puc_dst
+// x2 => src_strd
+// x3 => dst_strd
+// [sp] => log_WD (x4)
+// [sp+4] => wt (x5)
+// [sp+8] => ofst (x6)
+// [sp+12] => ht (x7)
+// [sp+16] => wd (x8)
+//
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+
+
+ .global ih264_weighted_pred_luma_av8
+
+ih264_weighted_pred_luma_av8:
+
+ // STMFD sp!, {x4-x9,x14} //stack stores the values of the arguments
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+ ldr w8, [sp, #80] //Load wd
+ sxtw x8, w8
+
+ dup v2.4h, w5 //D2 = wt (16-bit)
+ sub x20, x4, #0 //x9 = -log_WD
+ neg x9, x20
+ dup v3.8b, w6 //D3 = ofst (8-bit)
+ cmp w8, #16 //check if wd is 16
+ dup v0.8h, w9 //Q0 = -log_WD (16-bit)
+ beq loop_16 //branch if wd is 16
+
+ cmp w8, #8 //check if wd is 8
+ beq loop_8 //branch if wd is 8
+
+loop_4: //each iteration processes four rows
+
+ ld1 {v4.s}[0], [x0], x2 //load row 1 in source
+ ld1 {v4.s}[1], [x0], x2 //load row 2 in source
+ ld1 {v6.s}[0], [x0], x2 //load row 3 in source
+ ld1 {v6.s}[1], [x0], x2 //load row 4 in source
+
+ uxtl v4.8h, v4.8b //converting rows 1,2 to 16-bit
+ uxtl v6.8h, v6.8b //converting rows 3,4 to 16-bit
+
+ mul v4.8h, v4.8h , v2.4h[0] //weight mult. for rows 1,2
+ mul v6.8h, v6.8h , v2.4h[0] //weight mult. for rows 3,4
+
+ subs w7, w7, #4 //decrement ht by 4
+ srshl v4.8h, v4.8h , v0.8h //rounds off the weighted samples from rows 1,2
+ srshl v6.8h, v6.8h , v0.8h //rounds off the weighted samples from rows 3,4
+
+ saddw v4.8h, v4.8h , v3.8b //adding offset for rows 1,2
+ saddw v6.8h, v6.8h , v3.8b //adding offset for rows 3,4
+
+ sqxtun v4.8b, v4.8h //saturating rows 1,2 to unsigned 8-bit
+ sqxtun v6.8b, v6.8h //saturating rows 3,4 to unsigned 8-bit
+
+ st1 {v4.s}[0], [x1], x3 //store row 1 in destination
+ st1 {v4.s}[1], [x1], x3 //store row 2 in destination
+ st1 {v6.s}[0], [x1], x3 //store row 3 in destination
+ st1 {v6.s}[1], [x1], x3 //store row 4 in destination
+
+ bgt loop_4 //if greater than 0 repeat the loop again
+
+ b end_loops
+
+loop_8: //each iteration processes four rows
+
+ ld1 {v4.8b}, [x0], x2 //load row 1 in source
+ ld1 {v6.8b}, [x0], x2 //load row 2 in source
+ ld1 {v8.8b}, [x0], x2 //load row 3 in source
+ uxtl v4.8h, v4.8b //converting row 1 to 16-bit
+ ld1 {v10.8b}, [x0], x2 //load row 4 in source
+ uxtl v6.8h, v6.8b //converting row 2 to 16-bit
+
+ uxtl v8.8h, v8.8b //converting row 3 to 16-bit
+ mul v4.8h, v4.8h , v2.4h[0] //weight mult. for row 1
+ uxtl v10.8h, v10.8b //converting row 4 to 16-bit
+ mul v6.8h, v6.8h , v2.4h[0] //weight mult. for row 2
+ mul v8.8h, v8.8h , v2.4h[0] //weight mult. for row 3
+ mul v10.8h, v10.8h , v2.4h[0] //weight mult. for row 4
+
+ srshl v4.8h, v4.8h , v0.8h //rounds off the weighted samples from row 1
+ srshl v6.8h, v6.8h , v0.8h //rounds off the weighted samples from row 2
+ srshl v8.8h, v8.8h , v0.8h //rounds off the weighted samples from row 3
+ saddw v4.8h, v4.8h , v3.8b //adding offset for row 1
+ srshl v10.8h, v10.8h , v0.8h //rounds off the weighted samples from row 4
+ saddw v6.8h, v6.8h , v3.8b //adding offset for row 2
+
+ saddw v8.8h, v8.8h , v3.8b //adding offset for row 3
+ sqxtun v4.8b, v4.8h //saturating row 1 to unsigned 8-bit
+ saddw v10.8h, v10.8h , v3.8b //adding offset for row 4
+ sqxtun v6.8b, v6.8h //saturating row 2 to unsigned 8-bit
+ sqxtun v8.8b, v8.8h //saturating row 3 to unsigned 8-bit
+ sqxtun v10.8b, v10.8h //saturating row 4 to unsigned 8-bit
+
+ st1 {v4.8b}, [x1], x3 //store row 1 in destination
+ st1 {v6.8b}, [x1], x3 //store row 2 in destination
+ subs w7, w7, #4 //decrement ht by 4
+ st1 {v8.8b}, [x1], x3 //store row 3 in destination
+ st1 {v10.8b}, [x1], x3 //store row 4 in destination
+
+ bgt loop_8 //if greater than 0 repeat the loop again
+
+ b end_loops
+
+loop_16: //each iteration processes two rows
+
+ ld1 {v4.8b, v5.8b}, [x0], x2 //load row 1 in source
+ ld1 {v6.8b, v7.8b}, [x0], x2 //load row 2 in source
+ uxtl v12.8h, v4.8b //converting row 1L to 16-bit
+ ld1 {v8.8b, v9.8b}, [x0], x2 //load row 3 in source
+ uxtl v14.8h, v5.8b //converting row 1H to 16-bit
+ ld1 {v10.8b, v11.8b}, [x0], x2 //load row 4 in source
+ uxtl v16.8h, v6.8b //converting row 2L to 16-bit
+ mul v12.8h, v12.8h , v2.4h[0] //weight mult. for row 1L
+ uxtl v18.8h, v7.8b //converting row 2H to 16-bit
+ mul v14.8h, v14.8h , v2.4h[0] //weight mult. for row 1H
+ uxtl v20.8h, v8.8b //converting row 3L to 16-bit
+ mul v16.8h, v16.8h , v2.4h[0] //weight mult. for row 2L
+ uxtl v22.8h, v9.8b //converting row 3H to 16-bit
+ mul v18.8h, v18.8h , v2.4h[0] //weight mult. for row 2H
+ uxtl v24.8h, v10.8b //converting row 4L to 16-bit
+ mul v20.8h, v20.8h , v2.4h[0] //weight mult. for row 3L
+ uxtl v26.8h, v11.8b //converting row 4H to 16-bit
+ mul v22.8h, v22.8h , v2.4h[0] //weight mult. for row 3H
+ mul v24.8h, v24.8h , v2.4h[0] //weight mult. for row 4L
+ srshl v12.8h, v12.8h , v0.8h //rounds off the weighted samples from row 1L
+ mul v26.8h, v26.8h , v2.4h[0] //weight mult. for row 4H
+ srshl v14.8h, v14.8h , v0.8h //rounds off the weighted samples from row 1H
+ srshl v16.8h, v16.8h , v0.8h //rounds off the weighted samples from row 2L
+ saddw v12.8h, v12.8h , v3.8b //adding offset for row 1L
+ srshl v18.8h, v18.8h , v0.8h //rounds off the weighted samples from row 2H
+ saddw v14.8h, v14.8h , v3.8b //adding offset for row 1H
+ sqxtun v4.8b, v12.8h //saturating row 1L to unsigned 8-bit
+ srshl v20.8h, v20.8h , v0.8h //rounds off the weighted samples from row 3L
+ saddw v16.8h, v16.8h , v3.8b //adding offset for row 2L
+ sqxtun v5.8b, v14.8h //saturating row 1H to unsigned 8-bit
+ srshl v22.8h, v22.8h , v0.8h //rounds off the weighted samples from row 3H
+ saddw v18.8h, v18.8h , v3.8b //adding offset for row 2H
+ sqxtun v6.8b, v16.8h //saturating row 2L to unsigned 8-bit
+ srshl v24.8h, v24.8h , v0.8h //rounds off the weighted samples from row 4L
+ saddw v20.8h, v20.8h , v3.8b //adding offset for row 3L
+ sqxtun v7.8b, v18.8h //saturating row 2H to unsigned 8-bit
+ srshl v26.8h, v26.8h , v0.8h //rounds off the weighted samples from row 4H
+ saddw v22.8h, v22.8h , v3.8b //adding offset for row 3H
+ sqxtun v8.8b, v20.8h //saturating row 3L to unsigned 8-bit
+ saddw v24.8h, v24.8h , v3.8b //adding offset for row 4L
+ sqxtun v9.8b, v22.8h //saturating row 3H to unsigned 8-bit
+ saddw v26.8h, v26.8h , v3.8b //adding offset for row 4H
+ sqxtun v10.8b, v24.8h //saturating row 4L to unsigned 8-bit
+ st1 {v4.8b, v5.8b}, [x1], x3 //store row 1 in destination
+ sqxtun v11.8b, v26.8h //saturating row 4H to unsigned 8-bit
+ st1 {v6.8b, v7.8b}, [x1], x3 //store row 2 in destination
+ subs w7, w7, #4 //decrement ht by 4
+ st1 {v8.8b, v9.8b}, [x1], x3 //store row 3 in destination
+ st1 {v10.8b, v11.8b}, [x1], x3 //store row 4 in destination
+
+ bgt loop_16 //if greater than 0 repeat the loop again
+
+end_loops:
+
+ // LDMFD sp!,{x4-x9,x15} //Reload the registers from sp
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+//*******************************************************************************
+//* @function
+//* ih264_weighted_pred_chroma_av8()
+//*
+//* @brief
+//* This routine performs the default weighted prediction as described in sec
+//* 8.4.2.3.2 titled "Weighted sample prediction process" for chroma.
+//*
+//* @par Description:
+//* This function gets a ht x wd block, calculates the weighted sample, rounds
+//* off, adds offset and stores it in the destination block for U and V.
+//*
+//* @param[in] puc_src:
+//* UWORD8 Pointer to the buffer containing the input block.
+//*
+//* @param[out] puc_dst
+//* UWORD8 pointer to the destination where the output block is stored.
+//*
+//* @param[in] src_strd
+//* Stride of the input buffer
+//*
+//* @param[in] dst_strd
+//* Stride of the destination buffer
+//*
+//* @param[in] log_WD
+//* number of bits to be rounded off
+//*
+//* @param[in] wt
+//* weights for the weighted prediction for U and V
+//*
+//* @param[in] ofst
+//* offsets used after rounding off for U and V
+//*
+//* @param[in] ht
+//* integer height of the array
+//*
+//* @param[in] wd
+//* integer width of the array
+//*
+//* @returns
+//* None
+//*
+//* @remarks
+//* (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8).
+//*
+//*******************************************************************************
+//*/
+//void ih264_weighted_pred_chroma_av8(UWORD8 *puc_src,
+// UWORD8 *puc_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// UWORD8 log_WD,
+// UWORD32 wt,
+// UWORD16 ofst,
+// UWORD8 ht,
+// UWORD8 wd)
+//
+//**************Variables Vs Registers*****************************************
+// x0 => puc_src
+// x1 => puc_dst
+// x2 => src_strd
+// x3 => dst_strd
+// [sp] => log_WD (x4)
+// [sp+4] => wt (x5)
+// [sp+8] => ofst (x6)
+// [sp+12] => ht (x7)
+// [sp+16] => wd (x8)
+//
+
+
+
+
+ .global ih264_weighted_pred_chroma_av8
+
+ih264_weighted_pred_chroma_av8:
+
+ // STMFD sp!, {x4-x9,x14} //stack stores the values of the arguments
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ ldr w8, [sp, #80] //Load wd
+ sxtw x8, w8
+
+ sub x20, x4, #0 //x9 = -log_WD
+ neg x9, x20
+ dup v2.4s, w5 //Q1 = {wt_u (16-bit), wt_v (16-bit)}
+
+
+ dup v4.4h, w6 //D4 = {ofst_u (8-bit), ofst_v (8-bit)}
+ cmp w8, #8 //check if wd is 8
+ dup v0.8h, w9 //Q0 = -log_WD (16-bit)
+ beq loop_8_uv //branch if wd is 8
+
+ cmp w8, #4 //check if ws is 4
+ beq loop_4_uv //branch if wd is 4
+
+loop_2_uv: //each iteration processes two rows
+
+ ld1 {v6.s}[0], [x0], x2 //load row 1 in source
+ ld1 {v6.s}[1], [x0], x2 //load row 2 in source
+ uxtl v6.8h, v6.8b //converting rows 1,2 to 16-bit
+ mul v6.8h, v6.8h , v2.8h //weight mult. for rows 1,2
+ srshl v6.8h, v6.8h , v0.8h //rounds off the weighted samples from rows 1,2
+ saddw v6.8h, v6.8h , v4.8b //adding offset for rows 1,2
+ sqxtun v6.8b, v6.8h //saturating rows 1,2 to unsigned 8-bit
+ subs w7, w7, #2 //decrement ht by 2
+ st1 {v6.s}[0], [x1], x3 //store row 1 in destination
+ st1 {v6.s}[1], [x1], x3 //store row 2 in destination
+ bgt loop_2_uv //if greater than 0 repeat the loop again
+ b end_loops_uv
+
+loop_4_uv: //each iteration processes two rows
+
+ ld1 {v6.8b}, [x0], x2 //load row 1 in source
+ ld1 {v8.8b}, [x0], x2 //load row 2 in source
+ uxtl v6.8h, v6.8b //converting row 1 to 16-bit
+ uxtl v8.8h, v8.8b //converting row 2 to 16-bit
+ mul v6.8h, v6.8h , v2.8h //weight mult. for row 1
+ mul v8.8h, v8.8h , v2.8h //weight mult. for row 2
+ subs w7, w7, #2 //decrement ht by 2
+ srshl v6.8h, v6.8h , v0.8h //rounds off the weighted samples from row 1
+ srshl v8.8h, v8.8h , v0.8h //rounds off the weighted samples from row 2
+ saddw v6.8h, v6.8h , v4.8b //adding offset for row 1
+ saddw v8.8h, v8.8h , v4.8b //adding offset for row 2
+ sqxtun v6.8b, v6.8h //saturating row 1 to unsigned 8-bit
+ sqxtun v8.8b, v8.8h //saturating row 2 to unsigned 8-bit
+ st1 {v6.8b}, [x1], x3 //store row 1 in destination
+ st1 {v8.8b}, [x1], x3 //store row 2 in destination
+
+ bgt loop_4_uv //if greater than 0 repeat the loop again
+
+ b end_loops_uv
+
+loop_8_uv: //each iteration processes two rows
+
+ ld1 {v6.8b, v7.8b}, [x0], x2 //load row 1 in source
+ ld1 {v8.8b, v9.8b}, [x0], x2 //load row 2 in source
+ uxtl v14.8h, v6.8b //converting row 1L to 16-bit
+ ld1 {v10.8b, v11.8b}, [x0], x2 //load row 3 in source
+ uxtl v16.8h, v7.8b //converting row 1H to 16-bit
+ ld1 {v12.8b, v13.8b}, [x0], x2 //load row 4 in source
+
+ mul v14.8h, v14.8h , v2.8h //weight mult. for row 1L
+ uxtl v18.8h, v8.8b //converting row 2L to 16-bit
+ mul v16.8h, v16.8h , v2.8h //weight mult. for row 1H
+ uxtl v20.8h, v9.8b //converting row 2H to 16-bit
+ mul v18.8h, v18.8h , v2.8h //weight mult. for row 2L
+ uxtl v22.8h, v10.8b //converting row 3L to 16-bit
+ mul v20.8h, v20.8h , v2.8h //weight mult. for row 2H
+ uxtl v24.8h, v11.8b //converting row 3H to 16-bit
+ mul v22.8h, v22.8h , v2.8h //weight mult. for row 3L
+ uxtl v26.8h, v12.8b //converting row 4L to 16-bit
+ mul v24.8h, v24.8h , v2.8h //weight mult. for row 3H
+ uxtl v28.8h, v13.8b //converting row 4H to 16-bit
+
+ mul v26.8h, v26.8h , v2.8h //weight mult. for row 4L
+ srshl v14.8h, v14.8h , v0.8h //rounds off the weighted samples from row 1L
+ mul v28.8h, v28.8h , v2.8h //weight mult. for row 4H
+
+ srshl v16.8h, v16.8h , v0.8h //rounds off the weighted samples from row 1H
+ srshl v18.8h, v18.8h , v0.8h //rounds off the weighted samples from row 2L
+ saddw v14.8h, v14.8h , v4.8b //adding offset for row 1L
+ srshl v20.8h, v20.8h , v0.8h //rounds off the weighted samples from row 2H
+ saddw v16.8h, v16.8h , v4.8b //adding offset for row 1H
+ sqxtun v6.8b, v14.8h //saturating row 1L to unsigned 8-bit
+ srshl v22.8h, v22.8h , v0.8h //rounds off the weighted samples from row 3L
+ saddw v18.8h, v18.8h , v4.8b //adding offset for row 2L
+ sqxtun v7.8b, v16.8h //saturating row 1H to unsigned 8-bit
+ srshl v24.8h, v24.8h , v0.8h //rounds off the weighted samples from row 3H
+ saddw v20.8h, v20.8h , v4.8b //adding offset for row 2H
+ sqxtun v8.8b, v18.8h //saturating row 2L to unsigned 8-bit
+ srshl v26.8h, v26.8h , v0.8h //rounds off the weighted samples from row 4L
+ saddw v22.8h, v22.8h , v4.8b //adding offset for row 3L
+ sqxtun v9.8b, v20.8h //saturating row 2H to unsigned 8-bit
+ srshl v28.8h, v28.8h , v0.8h //rounds off the weighted samples from row 4H
+ saddw v24.8h, v24.8h , v4.8b //adding offset for row 3H
+
+ sqxtun v10.8b, v22.8h //saturating row 3L to unsigned 8-bit
+ saddw v26.8h, v26.8h , v4.8b //adding offset for row 4L
+ sqxtun v11.8b, v24.8h //saturating row 3H to unsigned 8-bit
+ saddw v28.8h, v28.8h , v4.8b //adding offset for row 4H
+
+ sqxtun v12.8b, v26.8h //saturating row 4L to unsigned 8-bit
+ st1 {v6.8b, v7.8b}, [x1], x3 //store row 1 in destination
+ sqxtun v13.8b, v28.8h //saturating row 4H to unsigned 8-bit
+ st1 {v8.8b, v9.8b}, [x1], x3 //store row 2 in destination
+ subs w7, w7, #4 //decrement ht by 4
+ st1 {v10.8b, v11.8b}, [x1], x3 //store row 3 in destination
+ st1 {v12.8b, v13.8b}, [x1], x3 //store row 4 in destination
+
+ bgt loop_8_uv //if greater than 0 repeat the loop again
+
+end_loops_uv:
+
+ // LDMFD sp!,{x4-x9,x15} //Reload the registers from sp
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
diff --git a/common/ih264_buf_mgr.c b/common/ih264_buf_mgr.c
new file mode 100755
index 0000000..ea4333e
--- /dev/null
+++ b/common/ih264_buf_mgr.c
@@ -0,0 +1,696 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_buf_mgr.c
+*
+* @brief
+* Contains function definitions for buffer management
+*
+* @author
+* Srinivas T
+*
+* @par List of Functions:
+* - ih264_buf_mgr_size()
+* - ih264_buf_mgr_lock()
+* - ih264_buf_mgr_unlock()
+* - ih264_buf_mgr_yield()
+* - ih264_buf_mgr_free()
+* - ih264_buf_mgr_init()
+* - ih264_buf_mgr_add()
+* - ih264_buf_mgr_get_next_free()
+* - ih264_buf_mgr_check_free()
+* - ih264_buf_mgr_set_status()
+* - ih264_buf_mgr_get_status()
+* - ih264_buf_mgr_get_buf()
+* - ih264_buf_mgr_get_bufid()
+* - ih264_buf_mgr_get_num_active_buf()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_buf_mgr.h"
+
+#include "ithread.h"
+
+/**
+*******************************************************************************
+*
+* @brief Returns size for buf queue context. Does not include buf queue buffer
+* requirements
+*
+* @par Description
+* Returns size for buf queue context. Does not include buf queue buffer
+* requirements. Buffer size required to store the bufs should be allocated in
+* addition to the value returned here.
+*
+* @returns Size of the buf queue context
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264_buf_mgr_size(void)
+{
+ WORD32 size;
+
+ size = sizeof(buf_mgr_t);
+ size += ithread_get_mutex_lock_size();
+
+ return size;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Locks the buf_mgr context
+*
+* @par Description
+* Locks the buf_mgr context by calling ithread_mutex_lock()
+*
+* @param[in] ps_buf_mgr
+* Job Queue context
+*
+* @returns IH264_FAIL if mutex lock fails else IH264_SUCCESS
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IH264_ERROR_T ih264_buf_mgr_lock(buf_mgr_t *ps_buf_mgr)
+{
+ WORD32 retval;
+ retval = ithread_mutex_lock(ps_buf_mgr->pv_mutex);
+ if(retval)
+ {
+ return IH264_FAIL;
+ }
+ return IH264_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Unlocks the buf_mgr context
+*
+* @par Description
+* Unlocks the buf_mgr context by calling ithread_mutex_unlock()
+*
+* @param[in] ps_buf_mgr
+* Job Queue context
+*
+* @returns IH264_FAIL if mutex unlock fails else IH264_SUCCESS
+*
+* @remarks
+*
+*******************************************************************************
+*/
+
+IH264_ERROR_T ih264_buf_mgr_unlock(buf_mgr_t *ps_buf_mgr)
+{
+ WORD32 retval;
+ retval = ithread_mutex_unlock(ps_buf_mgr->pv_mutex);
+ if(retval)
+ {
+ return IH264_FAIL;
+ }
+ return IH264_SUCCESS;
+
+}
+/**
+*******************************************************************************
+*
+* @brief
+* Yeilds the thread
+*
+* @par Description
+* Unlocks the buf_mgr context by calling
+* ih264_buf_mgr_unlock(), ithread_yield() and then ih264_buf_mgr_lock()
+* buf_mgr is unlocked before to ensure the buf_mgr can be accessed by other threads
+* If unlock is not done before calling yield then no other thread can access
+* the buf_mgr functions and update buf_mgr.
+*
+* @param[in] ps_buf_mgr
+* Job Queue context
+*
+* @returns IH264_FAIL if mutex lock unlock or yield fails else IH264_SUCCESS
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IH264_ERROR_T ih264_buf_mgr_yield(buf_mgr_t *ps_buf_mgr)
+{
+
+ IH264_ERROR_T ret = IH264_SUCCESS;
+
+ IH264_ERROR_T rettmp;
+ rettmp = ih264_buf_mgr_unlock(ps_buf_mgr);
+ RETURN_IF((rettmp != IH264_SUCCESS), rettmp);
+
+ //ithread_usleep(10);
+ ithread_yield();
+
+ rettmp = ih264_buf_mgr_lock(ps_buf_mgr);
+ RETURN_IF((rettmp != IH264_SUCCESS), rettmp);
+ return ret;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief free the buf queue pointers
+*
+* @par Description
+* Frees the buf_mgr context
+*
+* @param[in] pv_buf
+* Memoy for buf queue buffer and buf queue context
+*
+* @returns Pointer to buf queue context
+*
+* @remarks
+* Since it will be called only once by master thread this is not thread safe.
+*
+*******************************************************************************
+*/
+IH264_ERROR_T ih264_buf_mgr_free(buf_mgr_t *ps_buf_mgr)
+{
+ WORD32 ret;
+ ret = ithread_mutex_destroy(ps_buf_mgr->pv_mutex);
+
+ if(0 == ret)
+ return IH264_SUCCESS;
+ else
+ return IH264_FAIL;
+}
+/**
+*******************************************************************************
+*
+* @brief
+* Buffer manager initialization function.
+*
+* @par Description:
+* Initializes the buffer manager structure
+*
+* @param[in] ps_buf_mgr
+* Pointer to the buffer manager
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+void *ih264_buf_mgr_init(void *pv_buf)
+{
+ WORD32 id;
+ UWORD8 *pu1_buf;
+ buf_mgr_t *ps_buf_mgr;
+ pu1_buf = (UWORD8 *)pv_buf;
+
+ ps_buf_mgr = (buf_mgr_t *)pu1_buf;
+ pu1_buf += sizeof(buf_mgr_t);
+
+ ps_buf_mgr->pv_mutex = pu1_buf;
+ pu1_buf += ithread_get_mutex_lock_size();
+
+ ithread_mutex_init(ps_buf_mgr->pv_mutex);
+
+ ps_buf_mgr->i4_max_buf_cnt = BUF_MGR_MAX_CNT;
+ ps_buf_mgr->i4_active_buf_cnt = 0;
+
+ for(id = 0; id < BUF_MGR_MAX_CNT; id++)
+ {
+ ps_buf_mgr->au4_status[id] = 0;
+ ps_buf_mgr->apv_ptr[id] = NULL;
+ }
+
+ return ps_buf_mgr;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Adds and increments the buffer and buffer count.
+*
+* @par Description:
+* Adds a buffer to the buffer manager if it is not already present and
+* increments the active buffer count
+*
+* @param[in] ps_buf_mgr
+* Pointer to the buffer manager
+*
+* @param[in] pv_ptr
+* Pointer to the buffer to be added
+*
+* @returns Returns 0 on success, -1 otherwise
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+IH264_ERROR_T ih264_buf_mgr_add(buf_mgr_t *ps_buf_mgr,
+ void *pv_ptr,
+ WORD32 buf_id)
+{
+
+ IH264_ERROR_T ret = IH264_SUCCESS;
+ ret = ih264_buf_mgr_lock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+
+ /* Check if buffer ID is within allowed range */
+ if(buf_id >= ps_buf_mgr->i4_max_buf_cnt)
+ {
+ ret = ih264_buf_mgr_unlock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+
+ return IH264_FAIL;
+ }
+
+ /* Check if the current ID is being used to hold some other buffer */
+ if((ps_buf_mgr->apv_ptr[buf_id] != NULL) &&
+ (ps_buf_mgr->apv_ptr[buf_id] !=pv_ptr))
+ {
+ ret = ih264_buf_mgr_unlock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+
+ return IH264_FAIL;
+ }
+ ps_buf_mgr->apv_ptr[buf_id] = pv_ptr;
+ ps_buf_mgr->i4_active_buf_cnt++;
+
+ ret = ih264_buf_mgr_unlock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+
+ return ret;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Gets the next free buffer.
+*
+* @par Description:
+* Returns the next free buffer available and sets the corresponding status
+* to DEC
+*
+* @param[in] ps_buf_mgr
+* Pointer to the buffer manager
+*
+* @param[in] pi4_buf_id
+* Pointer to the id of the free buffer
+*
+* @returns Pointer to the free buffer
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+void* ih264_buf_mgr_get_next_free(buf_mgr_t *ps_buf_mgr, WORD32 *pi4_buf_id)
+{
+ WORD32 id;
+ void *pv_ret_ptr;
+ IH264_ERROR_T ret = IH264_SUCCESS;
+ ret = ih264_buf_mgr_lock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), NULL);
+
+ pv_ret_ptr = NULL;
+ for(id = 0; id < ps_buf_mgr->i4_active_buf_cnt; id++)
+ {
+ /* Check if the buffer is non-null and status is zero */
+ if((ps_buf_mgr->au4_status[id] == 0) && (ps_buf_mgr->apv_ptr[id]))
+ {
+ *pi4_buf_id = id;
+ /* DEC is set to 1 */
+ ps_buf_mgr->au4_status[id] = 1;
+ pv_ret_ptr = ps_buf_mgr->apv_ptr[id];
+ break;
+ }
+ }
+ ret = ih264_buf_mgr_unlock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), NULL);
+
+ return pv_ret_ptr;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Checks the buffer manager for free buffers available.
+*
+* @par Description:
+* Checks if there are any free buffers available
+*
+* @param[in] ps_buf_mgr
+* Pointer to the buffer manager
+*
+* @returns Returns 0 if available, -1 otherwise
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+IH264_ERROR_T ih264_buf_mgr_check_free(buf_mgr_t *ps_buf_mgr)
+{
+ WORD32 id;
+ IH264_ERROR_T ret = IH264_SUCCESS;
+ IH264_ERROR_T rettmp = IH264_SUCCESS;
+ rettmp = ih264_buf_mgr_lock(ps_buf_mgr);
+ RETURN_IF((rettmp != IH264_SUCCESS), ret);
+
+ ret = IH264_FAIL;
+ for(id = 0; id < ps_buf_mgr->i4_active_buf_cnt; id++)
+ {
+ if((ps_buf_mgr->au4_status[id] == 0) &&
+ (ps_buf_mgr->apv_ptr[id]))
+ {
+ ret = IH264_SUCCESS;
+ break;
+ }
+ }
+ rettmp = ih264_buf_mgr_unlock(ps_buf_mgr);
+ RETURN_IF((rettmp != IH264_SUCCESS), ret);
+
+ return ret;
+
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Resets the status bits.
+*
+* @par Description:
+* resets the status bits that the mask contains (status corresponding to
+* the id)
+*
+* @param[in] ps_buf_mgr
+* Pointer to the buffer manager
+*
+* @param[in] buf_id
+* ID of the buffer status to be released
+*
+* @param[in] mask
+* Contains the bits that are to be reset
+*
+* @returns 0 if success, -1 otherwise
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+IH264_ERROR_T ih264_buf_mgr_release(buf_mgr_t *ps_buf_mgr,
+ WORD32 buf_id,
+ UWORD32 mask)
+{
+ IH264_ERROR_T ret = IH264_SUCCESS;
+ ret = ih264_buf_mgr_lock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+
+
+ /* If the given id is pointing to an id which is not yet added */
+ if(buf_id >= ps_buf_mgr->i4_active_buf_cnt)
+ {
+ ret = ih264_buf_mgr_unlock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+ return IH264_FAIL;
+ }
+
+ ps_buf_mgr->au4_status[buf_id] &= ~mask;
+
+
+/* If both the REF and DISP are zero, DEC is set to zero */
+ if(ps_buf_mgr->au4_status[buf_id] == 1)
+ {
+ ps_buf_mgr->au4_status[buf_id] = 0;
+ }
+
+
+ ret = ih264_buf_mgr_unlock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+
+ return ret;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets the status bit.
+*
+* @par Description:
+* sets the status bits that the mask contains (status corresponding to the
+* id)
+*
+*
+* @param[in] ps_buf_mgr
+* Pointer to the buffer manager
+*
+* @param[in] buf_id
+* ID of the buffer whose status needs to be modified
+*
+*
+* @param[in] mask
+* Contains the bits that are to be set
+*
+* @returns 0 if success, -1 otherwise
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+IH264_ERROR_T ih264_buf_mgr_set_status(buf_mgr_t *ps_buf_mgr,
+ WORD32 buf_id,
+ UWORD32 mask)
+{
+ IH264_ERROR_T ret = IH264_SUCCESS;
+ ret = ih264_buf_mgr_lock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+
+ if(buf_id >= ps_buf_mgr->i4_active_buf_cnt)
+ {
+ ret = ih264_buf_mgr_unlock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+ return IH264_FAIL;
+ }
+
+
+ if((ps_buf_mgr->au4_status[buf_id] & mask) != 0)
+ {
+ ret = ih264_buf_mgr_unlock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+ return IH264_FAIL;
+ }
+
+ ps_buf_mgr->au4_status[buf_id] |= mask;
+ ret = ih264_buf_mgr_unlock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+
+ return ret;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Returns the status of the buffer.
+*
+* @par Description:
+* Returns the status of the buffer corresponding to the id
+*
+* @param[in] ps_buf_mgr
+* Pointer to the buffer manager
+*
+* @param[in] buf_id
+* ID of the buffer status required
+*
+* @returns Status of the buffer corresponding to the id
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+WORD32 ih264_buf_mgr_get_status( buf_mgr_t *ps_buf_mgr, WORD32 buf_id )
+{
+ IH264_ERROR_T ret = IH264_SUCCESS;
+ UWORD32 status;
+
+ ret = ih264_buf_mgr_lock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+
+ status = ps_buf_mgr->au4_status[buf_id];
+
+ ret = ih264_buf_mgr_unlock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+
+ return status;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Gets the buffer from the buffer manager
+*
+* @par Description:
+* Returns the pointer to the buffer corresponding to the id
+*
+* @param[in] ps_buf_mgr
+* Pointer to the buffer manager
+*
+* @param[in] buf_id
+* ID of the buffer required
+*
+* @returns Pointer to the buffer required
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+void* ih264_buf_mgr_get_buf(buf_mgr_t *ps_buf_mgr, WORD32 buf_id)
+{
+ IH264_ERROR_T ret = IH264_SUCCESS;
+ void *pv_buf;
+ ret = ih264_buf_mgr_lock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), NULL);
+
+ pv_buf = ps_buf_mgr->apv_ptr[buf_id];
+
+ ret = ih264_buf_mgr_unlock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), NULL);
+
+ return pv_buf;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Gets the buffer id from the buffer manager if the buffer is added to the
+* buffer manager
+*
+* @par Description:
+* Returns the buffer id corresponding to the given buffer if it exists
+*
+* @param[in] ps_buf_mgr
+* Pointer to the buffer manager
+*
+* @param[in] pv_buf
+* Pointer to the buffer
+*
+* @returns Buffer id if exists, else -1
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+WORD32 ih264_buf_mgr_get_bufid(buf_mgr_t *ps_buf_mgr, void *pv_buf)
+{
+ WORD32 id;
+ WORD32 buf_id = -1;
+ IH264_ERROR_T ret = IH264_SUCCESS;
+ ret = ih264_buf_mgr_lock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+
+ for(id = 0; id < ps_buf_mgr->i4_active_buf_cnt; id++)
+ {
+ if(ps_buf_mgr->apv_ptr[id] == pv_buf)
+ {
+ buf_id = id;
+ break;
+ }
+ }
+ ret = ih264_buf_mgr_unlock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+
+ return buf_id;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Gets the no.of active buffer
+*
+* @par Description:
+* Return the number of active buffers in the buffer manager
+*
+* @param[in] ps_buf_mgr
+* Pointer to the buffer manager
+*
+* @returns number of active buffers
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+UWORD32 ih264_buf_mgr_get_num_active_buf(buf_mgr_t *ps_buf_mgr)
+{
+ UWORD32 u4_buf_cnt;
+ IH264_ERROR_T ret = IH264_SUCCESS;
+
+ u4_buf_cnt = 0;
+
+ ret = ih264_buf_mgr_lock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+ u4_buf_cnt = ps_buf_mgr->i4_active_buf_cnt;
+
+ ret = ih264_buf_mgr_unlock(ps_buf_mgr);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+
+ return u4_buf_cnt;
+}
diff --git a/common/ih264_buf_mgr.h b/common/ih264_buf_mgr.h
new file mode 100755
index 0000000..52efa70
--- /dev/null
+++ b/common/ih264_buf_mgr.h
@@ -0,0 +1,122 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_buf_mgr.h
+*
+* @brief
+* Function declarations used for buffer management
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#ifndef _IH264_BUF_MGR_H_
+#define _IH264_BUF_MGR_H_
+
+#define BUF_MGR_MAX_CNT 64
+
+/** Flag for current encoding decoder */
+#define BUF_MGR_CODEC (1 << 1)
+
+/** Flag for reference status */
+#define BUF_MGR_REF (1 << 2)
+
+/** Flag for I/O - Display/output in case of decoder, capture/input in case of encoder */
+#define BUF_MGR_IO (1 << 3)
+
+typedef struct
+{
+ /**
+ * Mutex used to keep the functions thread-safe
+ */
+ void *pv_mutex;
+
+ /**
+ * max_buf_cnt
+ */
+ WORD32 i4_max_buf_cnt;
+
+ /**
+ * active_buf_cnt
+ */
+ WORD32 i4_active_buf_cnt;
+
+ /**
+ * au4_status[BUF_MGR_MAX_CNT]
+ */
+ UWORD32 au4_status[BUF_MGR_MAX_CNT];
+
+ /* The last three bit of status are: */
+
+ /* Bit 0 - IN USE */
+ /* Bit 1 - CODEC */
+ /* Bit 2 - REF */
+ /* Bit 3 - DISP/IO/RECON */
+ void *apv_ptr[BUF_MGR_MAX_CNT];
+
+}buf_mgr_t;
+
+// Returns size of the buffer manager context
+WORD32 ih264_buf_mgr_size(void);
+
+//Free buffer manager
+IH264_ERROR_T ih264_buf_mgr_free(buf_mgr_t *ps_buf_mgr);
+
+// Initializes the buffer API structure
+void *ih264_buf_mgr_init(void *pv_buf);
+
+// Add buffer to buffer manager. 0: success, -1: fail (u4_active_buf_cnt has reached u4_max_buf_cnt)
+IH264_ERROR_T ih264_buf_mgr_add(buf_mgr_t *ps_buf_mgr,
+ void *pv_ptr,
+ WORD32 buf_id);
+
+// this function will set the buffer status to DEC
+void* ih264_buf_mgr_get_next_free(buf_mgr_t *ps_buf_mgr, WORD32 *pi4_id);
+
+// this function will check if there are any free buffers
+IH264_ERROR_T ih264_buf_mgr_check_free(buf_mgr_t *ps_buf_mgr);
+
+// mask will have who released it: DISP:REF:DEC
+IH264_ERROR_T ih264_buf_mgr_release(buf_mgr_t *ps_buf_mgr,
+ WORD32 id,
+ UWORD32 mask);
+
+// sets the status to one or all of DISP:REF:DEC
+IH264_ERROR_T ih264_buf_mgr_set_status(buf_mgr_t *ps_buf_mgr,
+ WORD32 id,
+ UWORD32 mask);
+
+// Gets status of the buffer
+WORD32 ih264_buf_mgr_get_status(buf_mgr_t *ps_buf_mgr, WORD32 id);
+
+// pass the ID - buffer will be returned
+void* ih264_buf_mgr_get_buf(buf_mgr_t *ps_buf_mgr, WORD32 id);
+//Pass buffer to get ID
+WORD32 ih264_buf_mgr_get_bufid(buf_mgr_t *ps_buf_mgr, void *pv_buf);
+
+// will return number of active buffers
+UWORD32 ih264_buf_mgr_get_num_active_buf(buf_mgr_t *ps_buf_mgr);
+
+
+
+#endif /* _IH264_BUF_MGR_H_ */
diff --git a/common/ih264_cabac_tables.c b/common/ih264_cabac_tables.c
new file mode 100755
index 0000000..118ca12
--- /dev/null
+++ b/common/ih264_cabac_tables.c
@@ -0,0 +1,10869 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+
+/**
+******************************************************************************
+* @file
+* ih264_cabac_tables.c
+*
+* @brief
+* This file contains H264 cabac tables for init contexts, rlps and
+* cabac state trasnitions
+*
+* @author
+* Ittiam
+*
+* @par List of Tables
+* - gau1_ih264_cab_ctxts[]
+* - gau1_ih264_next_state[]
+* - gau1_ih264_cab_ctxts[][][]
+*
+******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_cabac_tables.h"
+
+
+/*****************************************************************************/
+/* Extern global definitions */
+/*****************************************************************************/
+
+/**
+ ******************************************************************************
+ * @brief Table for rangeTabLPS depending on pStateIdx and qCodIRangeIdx
+ * input : pStateIdx(0-63) and qCodIRangeIdx(0-3) [(Range >> 6) & 0x3]
+ * output : RLPS
+ *
+ * @remarks See Table 9-35 of H264 spec for rangeTabLPS
+ ******************************************************************************
+ */
+const UWORD8 gau1_ih264_cabac_rlps[64][4] =
+{
+ { 128, 176, 208, 240},
+ { 128, 167, 197, 227},
+ { 128, 158, 187, 216},
+ { 123, 150, 178, 205},
+ { 116, 142, 169, 195},
+ { 111, 135, 160, 185},
+ { 105, 128, 152, 175},
+ { 100, 122, 144, 166},
+ { 95, 116, 137, 158},
+ { 90, 110, 130, 150},
+ { 85, 104, 123, 142},
+ { 81, 99, 117, 135},
+ { 77, 94, 111, 128},
+ { 73, 89, 105, 122},
+ { 69, 85, 100, 116},
+ { 66, 80, 95, 110},
+ { 62, 76, 90, 104},
+ { 59, 72, 86, 99},
+ { 56, 69, 81, 94},
+ { 53, 65, 77, 89},
+ { 51, 62, 73, 85},
+ { 48, 59, 69, 80},
+ { 46, 56, 66, 76},
+ { 43, 53, 63, 72},
+ { 41, 50, 59, 69},
+ { 39, 48, 56, 65},
+ { 37, 45, 54, 62},
+ { 35, 43, 51, 59},
+ { 33, 41, 48, 56},
+ { 32, 39, 46, 53},
+ { 30, 37, 43, 50},
+ { 29, 35, 41, 48},
+ { 27, 33, 39, 45},
+ { 26, 31, 37, 43},
+ { 24, 30, 35, 41},
+ { 23, 28, 33, 39},
+ { 22, 27, 32, 37},
+ { 21, 26, 30, 35},
+ { 20, 24, 29, 33},
+ { 19, 23, 27, 31},
+ { 18, 22, 26, 30},
+ { 17, 21, 25, 28},
+ { 16, 20, 23, 27},
+ { 15, 19, 22, 25},
+ { 14, 18, 21, 24},
+ { 14, 17, 20, 23},
+ { 13, 16, 19, 22},
+ { 12, 15, 18, 21},
+ { 12, 14, 17, 20},
+ { 11, 14, 16, 19},
+ { 11, 13, 15, 18},
+ { 10, 12, 15, 17},
+ { 10, 12, 14, 16},
+ { 9, 11, 13, 15},
+ { 9, 11, 12, 14},
+ { 8, 10, 12, 14},
+ { 8, 9, 11, 13},
+ { 7, 9, 11, 12},
+ { 7, 9, 10, 12},
+ { 7, 8, 10, 11},
+ { 6, 8, 9, 11},
+ { 6, 7, 9, 10},
+ { 6, 7, 8, 9},
+ { 2, 2, 2, 2}
+};
+
+/**
+ ******************************************************************************
+ * @brief probaility+MPS state transition tables based on cur State and bin
+ * input : curpState[bits7-2] | curMPS[bit1] | decodedBin[bit0]
+ * output : nextpState[bits6-1] | nextMPS[bit0]
+ * @remarks Modified form of Table-9-36 State Transition table in H264 spec
+ ******************************************************************************
+ */
+const UWORD8 gau1_ih264_next_state[64 * 2 * 2] =
+{
+/*****************************************************************************/
+/* m=0,b=0 | m=0,b=1 | m=1,b=0 | m=1,b=1 */
+/*****************************************************************************/
+ 2, 1, 0, 3,/* mps reversal for m=0,b=1 / m=1,b=0 */
+ 4, 0, 1, 5,
+ 6, 2, 3, 7,
+ 8, 4, 5, 9,
+ 10, 4, 5, 11,
+ 12, 8, 9, 13,
+ 14, 8, 9, 15,
+ 16, 10, 11, 17,
+ 18, 12, 13, 19,
+ 20, 14, 15, 21,
+ 22, 16, 17, 23,
+ 24, 18, 19, 25,
+ 26, 18, 19, 27,
+ 28, 22, 23, 29,
+ 30, 22, 23, 31,
+ 32, 24, 25, 33,
+ 34, 26, 27, 35,
+ 36, 26, 27, 37,
+ 38, 30, 31, 39,
+ 40, 30, 31, 41,
+ 42, 32, 33, 43,
+ 44, 32, 33, 45,
+ 46, 36, 37, 47,
+ 48, 36, 37, 49,
+ 50, 38, 39, 51,
+ 52, 38, 39, 53,
+ 54, 42, 43, 55,
+ 56, 42, 43, 57,
+ 58, 44, 45, 59,
+ 60, 44, 45, 61,
+ 62, 46, 47, 63,
+ 64, 48, 49, 65,
+ 66, 48, 49, 67,
+ 68, 50, 51, 69,
+ 70, 52, 53, 71,
+ 72, 52, 53, 73,
+ 74, 54, 55, 75,
+ 76, 54, 55, 77,
+ 78, 56, 57, 79,
+ 80, 58, 59, 81,
+ 82, 58, 59, 83,
+ 84, 60, 61, 85,
+ 86, 60, 61, 87,
+ 88, 60, 61, 89,
+ 90, 62, 63, 91,
+ 92, 64, 65, 93,
+ 94, 64, 65, 95,
+ 96, 66, 67, 97,
+ 98, 66, 67, 99,
+ 100, 66, 67, 101,
+ 102, 68, 69, 103,
+ 104, 68, 69, 105,
+ 106, 70, 71, 107,
+ 108, 70, 71, 109,
+ 110, 70, 71, 111,
+ 112, 72, 73, 113,
+ 114, 72, 73, 115,
+ 116, 72, 73, 117,
+ 118, 74, 75, 119,
+ 120, 74, 75, 121,
+ 122, 74, 75, 123,
+ 124, 76, 77, 125,
+ 124, 76, 77, 125,
+ 126, 126, 127, 127
+};
+
+
+/*
+******************************************************************************
+* As per H264 standard the cabac initialization of context variables
+* are generated using following logic
+* (ref: section 9.3.1.1 of ITU-T Rec. H.264 (03/2005))
+*
+* The two values assigned to pStateIdx and valMPS during this initialization
+* are derived from SliceQPY
+*
+* Given the two table entries [m, n] (for a given slice type, context index and
+* cabac_init_idc), the initialization is specified by the following pseudo-code process
+*
+* preCtxState = Clip3( 1, 126, ( ( m * Clip3( 0, 51, SliceQPY ) ) >> 4 ) + n )
+* if( preCtxState <= 63 ) {
+* pStateIdx = 63 - preCtxState
+* valMPS = 0
+* } else {
+* pStateIdx = preCtxState - 64
+* valMPS = 1
+* }
+******************************************************************************
+*/
+
+/**
+ ******************************************************************************
+ * @brief Init context tables for all combinations of qp and cabac_init_idc
+ * @remarks Packing format MPS in lsb and pState in bits[1-6]
+ ******************************************************************************
+ */
+const UWORD8 gau1_ih264_cab_ctxts[IH264_NUM_CABAC_INIT_IDC_PLUS_ONE][IH264_MAX_QP][IH264_NUM_CABAC_CTXTS] =
+{
+
+ {
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 0 */
+
+ 124, 18, 21, 124, 18, 21, 125, 81, 20, 18,
+ 24, 60, 122, 124, 108, 28, 109, 12, 29, 3,
+ 2, 28, 19, 26, 1, 40, 124, 7, 53, 81,
+ 125, 81, 7, 29, 3, 2, 45, 63, 4, 36,
+ 11, 35, 65, 16, 7, 45, 49, 10, 25, 61,
+ 18, 11, 35, 49, 7, 21, 21, 33, 17, 10,
+ 44, 0, 0, 0, 39, 45, 67, 17, 44, 2,
+ 36, 29, 65, 125, 69, 75, 7, 37, 61, 39,
+ 93, 55, 77, 59, 125, 57, 51, 65, 89, 34,
+ 3, 12, 59, 21, 57, 47, 125, 18, 6, 8,
+ 11, 30, 9, 11, 49, 43, 29, 23, 27, 18,
+ 26, 9, 26, 42, 35, 0, 13, 7, 12, 25,
+ 56, 1, 4, 56, 76, 78, 68, 54, 59, 19,
+ 19, 34, 28, 73, 20, 20, 20, 4, 14, 14,
+ 0, 6, 2, 12, 11, 12, 48, 24, 9, 1,
+ 4, 0, 26, 48, 38, 22, 30, 6, 8, 8,
+ 60, 38, 40, 29, 6, 11, 70, 46, 38, 28,
+ 34, 38, 24, 32, 48, 2, 34, 18, 18, 10,
+ 0, 24, 12, 20, 22, 16, 36, 54, 20, 37,
+ 16, 29, 34, 64, 41, 112, 124, 120, 118, 124,
+ 124, 114, 114, 108, 88, 72, 66, 86, 58, 13,
+ 7, 8, 7, 66, 62, 56, 68, 64, 50, 40,
+ 44, 0, 8, 1, 61, 51, 89, 25, 38, 36,
+ 22, 1, 8, 13, 23, 37, 77, 27, 78, 42,
+ 30, 16, 8, 15, 39, 47, 111, 10, 68, 54,
+ 50, 40, 16, 10, 1, 21, 53, 13, 68, 64,
+ 42, 8, 10, 17, 35, 67, 10, 116, 98, 90,
+ 72, 46, 10, 13, 31, 43, 124, 85, 85, 47,
+ 101, 93, 69, 93, 85, 79, 87, 89, 97, 65,
+ 63, 55, 59, 61, 45, 7, 33, 43, 13, 6,
+ 10, 4, 26, 26, 28, 18, 44, 34, 24, 28,
+ 22, 44, 32, 16, 44, 38, 26, 20, 28, 0,
+ 1, 11, 8, 13, 38, 64, 40, 20, 58, 50,
+ 22, 46, 62, 38, 50, 26, 12, 40, 104, 98,
+ 104, 104, 108, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 68, 124, 124, 124, 124, 124, 124, 108,
+ 74, 72, 12, 37, 23, 67, 123, 124, 124, 124,
+ 114, 110, 106, 82, 88, 62, 64, 44, 38, 32,
+ 3, 15, 6, 0, 3, 78, 86, 80, 62, 80,
+ 78, 46, 62, 68, 42, 12, 20, 4, 45, 46,
+ 24, 8, 31, 15, 11, 13, 5, 9, 19, 11,
+ 13, 7, 2, 13, 5, 3, 0, 124, 124, 124,
+ 124, 124, 120, 108, 72, 8, 5, 56, 42, 36,
+ 30, 14, 6, 2, 5, 25, 43, 35, 27, 35,
+ 33, 19, 21, 39, 15, 7, 4, 5, 5, 8,
+ 8, 124, 124, 124, 124, 122, 114, 92, 58, 2,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 1 */
+
+ 124, 18, 21, 124, 18, 21, 123, 77, 22, 20,
+ 24, 58, 120, 124, 108, 28, 103, 12, 27, 1,
+ 2, 28, 17, 24, 3, 40, 124, 9, 55, 81,
+ 121, 77, 7, 27, 1, 2, 43, 59, 6, 36,
+ 9, 33, 63, 16, 7, 43, 49, 10, 23, 59,
+ 18, 11, 33, 49, 5, 19, 19, 31, 15, 10,
+ 44, 0, 0, 0, 37, 45, 67, 15, 44, 2,
+ 36, 27, 63, 121, 65, 71, 3, 33, 57, 37,
+ 89, 51, 73, 57, 123, 55, 49, 63, 87, 36,
+ 1, 14, 57, 19, 55, 45, 121, 18, 6, 8,
+ 11, 32, 9, 9, 47, 41, 27, 21, 25, 18,
+ 26, 7, 26, 42, 33, 0, 11, 7, 12, 23,
+ 56, 1, 4, 56, 74, 78, 68, 54, 57, 17,
+ 17, 34, 28, 71, 20, 20, 20, 6, 14, 14,
+ 2, 8, 4, 12, 9, 12, 48, 24, 9, 1,
+ 4, 0, 26, 46, 38, 22, 30, 8, 10, 8,
+ 58, 38, 40, 27, 6, 11, 70, 46, 38, 28,
+ 34, 38, 24, 32, 48, 2, 34, 18, 18, 10,
+ 0, 24, 12, 20, 22, 16, 36, 54, 20, 35,
+ 16, 27, 34, 62, 39, 110, 124, 118, 116, 122,
+ 124, 112, 112, 104, 86, 70, 64, 82, 56, 15,
+ 7, 8, 7, 64, 60, 54, 66, 62, 48, 38,
+ 42, 0, 8, 1, 59, 49, 87, 23, 40, 36,
+ 22, 0, 10, 11, 21, 35, 73, 25, 78, 42,
+ 30, 16, 10, 13, 37, 45, 107, 10, 70, 56,
+ 50, 40, 18, 10, 1, 19, 51, 13, 70, 64,
+ 42, 8, 12, 15, 33, 65, 10, 116, 98, 90,
+ 72, 46, 10, 11, 29, 41, 124, 83, 83, 45,
+ 97, 89, 67, 89, 81, 75, 83, 85, 93, 63,
+ 61, 53, 57, 57, 43, 7, 31, 41, 11, 6,
+ 10, 4, 26, 26, 26, 16, 44, 34, 26, 28,
+ 22, 44, 32, 16, 44, 38, 26, 20, 28, 0,
+ 1, 9, 10, 13, 38, 64, 40, 20, 58, 50,
+ 24, 46, 60, 38, 50, 26, 12, 38, 104, 98,
+ 104, 102, 106, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 66, 124, 124, 124, 124, 124, 124, 106,
+ 72, 70, 12, 35, 21, 63, 117, 124, 124, 124,
+ 112, 106, 104, 80, 84, 60, 62, 42, 36, 30,
+ 5, 15, 6, 0, 5, 76, 84, 78, 60, 78,
+ 76, 44, 60, 66, 40, 10, 18, 2, 45, 46,
+ 24, 8, 29, 13, 9, 11, 3, 7, 15, 9,
+ 11, 5, 6, 9, 3, 0, 4, 124, 124, 124,
+ 124, 124, 116, 102, 68, 4, 3, 58, 44, 38,
+ 32, 16, 8, 4, 3, 23, 41, 33, 25, 33,
+ 29, 15, 19, 37, 13, 5, 6, 3, 3, 8,
+ 8, 124, 124, 124, 124, 116, 108, 86, 52, 1,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 2 */
+
+ 124, 18, 21, 124, 18, 21, 119, 75, 22, 20,
+ 24, 56, 118, 122, 108, 28, 99, 12, 25, 0,
+ 2, 26, 17, 22, 5, 38, 120, 13, 57, 83,
+ 115, 75, 7, 25, 0, 2, 43, 57, 6, 34,
+ 9, 33, 61, 16, 7, 43, 49, 10, 23, 57,
+ 18, 11, 33, 49, 5, 19, 19, 31, 15, 10,
+ 44, 0, 0, 0, 35, 45, 67, 15, 42, 2,
+ 36, 27, 63, 117, 61, 67, 1, 29, 55, 35,
+ 87, 49, 71, 55, 119, 55, 49, 63, 85, 36,
+ 1, 14, 55, 19, 53, 45, 119, 18, 6, 8,
+ 11, 32, 9, 9, 47, 41, 27, 21, 25, 18,
+ 26, 7, 26, 42, 33, 0, 11, 7, 12, 23,
+ 54, 1, 4, 54, 72, 76, 66, 52, 55, 17,
+ 17, 32, 26, 71, 18, 20, 20, 6, 14, 14,
+ 4, 8, 4, 12, 9, 12, 46, 24, 11, 1,
+ 4, 1, 26, 44, 38, 22, 28, 8, 10, 8,
+ 56, 38, 38, 27, 6, 13, 68, 46, 38, 28,
+ 34, 38, 24, 32, 48, 2, 34, 18, 18, 10,
+ 0, 24, 12, 20, 22, 16, 34, 52, 18, 35,
+ 16, 27, 32, 60, 39, 106, 124, 114, 112, 118,
+ 120, 108, 108, 100, 82, 66, 60, 78, 52, 17,
+ 7, 8, 9, 62, 58, 52, 64, 58, 46, 36,
+ 40, 1, 6, 3, 59, 49, 85, 23, 40, 36,
+ 22, 0, 10, 11, 21, 35, 71, 23, 78, 42,
+ 30, 16, 10, 13, 35, 43, 103, 10, 70, 56,
+ 50, 40, 18, 10, 1, 19, 49, 13, 70, 64,
+ 42, 8, 12, 15, 33, 63, 10, 114, 96, 88,
+ 70, 46, 10, 11, 29, 41, 124, 81, 81, 43,
+ 95, 87, 65, 87, 79, 73, 81, 83, 89, 61,
+ 59, 53, 55, 55, 43, 9, 31, 39, 11, 6,
+ 8, 4, 24, 24, 24, 14, 42, 34, 26, 28,
+ 20, 42, 32, 16, 42, 36, 26, 20, 26, 0,
+ 1, 9, 10, 13, 36, 62, 38, 20, 56, 48,
+ 24, 44, 58, 38, 50, 24, 10, 34, 102, 96,
+ 102, 100, 104, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 64, 124, 124, 124, 124, 124, 124, 102,
+ 70, 68, 12, 33, 21, 61, 113, 120, 120, 124,
+ 108, 102, 100, 76, 80, 58, 58, 40, 32, 28,
+ 7, 17, 4, 0, 7, 74, 82, 74, 56, 74,
+ 72, 42, 56, 62, 38, 8, 16, 0, 47, 44,
+ 22, 6, 29, 13, 9, 9, 3, 5, 13, 7,
+ 9, 3, 8, 7, 1, 2, 6, 124, 124, 124,
+ 124, 120, 110, 96, 62, 0, 3, 58, 44, 38,
+ 32, 18, 8, 4, 3, 23, 41, 33, 23, 33,
+ 27, 13, 19, 35, 11, 3, 6, 3, 1, 8,
+ 8, 124, 124, 124, 120, 110, 100, 78, 46, 7,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 3 */
+
+ 124, 18, 21, 124, 18, 21, 115, 71, 24, 20,
+ 22, 52, 114, 120, 108, 28, 95, 12, 23, 2,
+ 2, 24, 17, 20, 7, 38, 116, 15, 59, 83,
+ 109, 73, 7, 23, 2, 2, 41, 55, 8, 34,
+ 9, 31, 59, 14, 9, 43, 49, 10, 23, 57,
+ 18, 11, 33, 49, 3, 19, 19, 31, 13, 10,
+ 44, 0, 0, 0, 35, 45, 67, 13, 40, 2,
+ 36, 27, 63, 113, 57, 65, 2, 25, 53, 33,
+ 83, 47, 69, 53, 115, 53, 49, 61, 83, 36,
+ 1, 14, 55, 19, 53, 43, 115, 18, 4, 6,
+ 13, 32, 9, 9, 45, 41, 25, 21, 23, 18,
+ 26, 7, 26, 40, 33, 0, 11, 7, 12, 23,
+ 52, 1, 4, 52, 70, 74, 64, 50, 55, 15,
+ 17, 30, 26, 69, 18, 20, 20, 6, 14, 14,
+ 6, 8, 4, 12, 7, 12, 44, 24, 13, 1,
+ 4, 1, 24, 42, 38, 22, 26, 8, 10, 8,
+ 52, 38, 36, 27, 6, 13, 66, 46, 38, 28,
+ 34, 38, 24, 32, 48, 2, 32, 18, 18, 10,
+ 0, 22, 10, 18, 20, 14, 32, 50, 18, 35,
+ 14, 27, 30, 56, 39, 104, 124, 110, 108, 114,
+ 116, 104, 104, 96, 78, 64, 58, 74, 48, 19,
+ 7, 8, 9, 60, 56, 50, 60, 56, 42, 34,
+ 38, 3, 6, 3, 59, 49, 85, 21, 40, 36,
+ 22, 0, 10, 11, 21, 33, 69, 23, 78, 42,
+ 30, 16, 12, 11, 33, 41, 99, 10, 70, 56,
+ 50, 40, 20, 10, 1, 19, 49, 13, 70, 64,
+ 40, 8, 12, 15, 33, 61, 10, 114, 96, 86,
+ 68, 46, 10, 11, 27, 39, 124, 79, 79, 43,
+ 93, 85, 63, 83, 77, 71, 79, 79, 87, 61,
+ 57, 53, 55, 51, 43, 9, 31, 39, 11, 4,
+ 8, 4, 22, 22, 22, 12, 42, 32, 26, 26,
+ 20, 42, 30, 16, 40, 36, 24, 20, 24, 0,
+ 3, 9, 10, 15, 36, 62, 36, 20, 54, 48,
+ 24, 42, 56, 36, 48, 22, 10, 32, 100, 94,
+ 102, 98, 102, 122, 124, 124, 124, 124, 124, 124,
+ 124, 124, 62, 124, 124, 124, 124, 124, 124, 98,
+ 68, 66, 12, 31, 21, 59, 109, 116, 116, 124,
+ 104, 98, 96, 74, 76, 54, 56, 38, 30, 24,
+ 9, 19, 4, 1, 9, 72, 78, 72, 52, 70,
+ 68, 38, 54, 58, 34, 6, 12, 3, 49, 42,
+ 20, 4, 29, 11, 9, 9, 1, 5, 11, 5,
+ 7, 1, 10, 5, 0, 6, 8, 124, 124, 124,
+ 124, 116, 104, 90, 56, 3, 1, 60, 46, 40,
+ 32, 20, 10, 4, 1, 21, 41, 31, 23, 31,
+ 25, 11, 19, 35, 11, 3, 6, 1, 0, 8,
+ 8, 124, 124, 124, 114, 104, 92, 70, 38, 11,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 4 */
+
+ 124, 18, 21, 124, 18, 21, 113, 69, 24, 20,
+ 22, 50, 112, 116, 108, 28, 89, 10, 21, 2,
+ 2, 22, 17, 18, 9, 36, 112, 19, 61, 85,
+ 103, 71, 7, 21, 2, 2, 41, 53, 8, 32,
+ 9, 31, 59, 14, 9, 41, 49, 10, 23, 55,
+ 16, 13, 33, 49, 3, 17, 19, 29, 13, 10,
+ 44, 0, 0, 0, 33, 47, 67, 13, 38, 2,
+ 36, 27, 63, 111, 55, 61, 4, 23, 51, 31,
+ 81, 43, 67, 51, 111, 53, 47, 61, 81, 36,
+ 1, 14, 53, 19, 51, 43, 113, 16, 4, 6,
+ 13, 32, 9, 9, 45, 41, 25, 21, 23, 18,
+ 24, 7, 26, 40, 33, 0, 11, 7, 12, 23,
+ 52, 3, 4, 52, 68, 72, 62, 48, 53, 15,
+ 17, 28, 24, 69, 16, 20, 18, 6, 14, 14,
+ 8, 10, 4, 10, 7, 10, 42, 22, 15, 1,
+ 4, 3, 24, 40, 36, 20, 26, 10, 10, 8,
+ 50, 36, 34, 27, 6, 15, 66, 46, 38, 28,
+ 34, 38, 24, 32, 46, 2, 32, 18, 18, 10,
+ 1, 22, 10, 18, 20, 14, 32, 48, 16, 35,
+ 14, 27, 28, 54, 39, 100, 124, 106, 104, 110,
+ 112, 100, 100, 92, 74, 60, 54, 68, 44, 21,
+ 7, 6, 11, 58, 54, 48, 58, 52, 40, 32,
+ 34, 3, 4, 5, 59, 49, 83, 21, 40, 36,
+ 22, 0, 10, 11, 21, 33, 67, 21, 78, 42,
+ 30, 16, 12, 11, 33, 41, 95, 10, 70, 56,
+ 50, 40, 20, 10, 1, 19, 47, 13, 70, 62,
+ 40, 8, 12, 15, 33, 61, 10, 112, 94, 84,
+ 66, 46, 10, 11, 27, 39, 124, 77, 77, 41,
+ 89, 83, 61, 81, 73, 69, 75, 77, 83, 59,
+ 57, 51, 53, 49, 41, 11, 31, 37, 11, 4,
+ 6, 2, 20, 20, 20, 10, 40, 32, 26, 26,
+ 18, 40, 30, 16, 38, 34, 24, 18, 22, 1,
+ 3, 9, 10, 15, 34, 60, 34, 20, 52, 46,
+ 24, 40, 54, 36, 48, 20, 8, 28, 98, 94,
+ 100, 96, 98, 120, 124, 124, 124, 124, 124, 124,
+ 124, 124, 58, 124, 124, 124, 124, 124, 124, 94,
+ 66, 62, 12, 29, 19, 57, 105, 114, 112, 120,
+ 102, 94, 92, 70, 72, 52, 52, 34, 26, 22,
+ 11, 21, 2, 1, 11, 68, 76, 68, 50, 66,
+ 64, 36, 50, 54, 32, 4, 10, 5, 49, 40,
+ 20, 2, 29, 11, 7, 7, 1, 3, 9, 5,
+ 5, 0, 12, 3, 2, 8, 10, 124, 124, 124,
+ 122, 110, 98, 84, 50, 9, 1, 60, 46, 40,
+ 34, 20, 10, 6, 1, 21, 39, 31, 21, 31,
+ 23, 9, 19, 33, 9, 1, 6, 1, 2, 8,
+ 8, 124, 124, 122, 108, 98, 84, 62, 32, 17,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 5 */
+
+ 124, 18, 21, 124, 18, 21, 109, 65, 24, 20,
+ 20, 46, 108, 114, 108, 28, 85, 10, 19, 4,
+ 2, 22, 15, 16, 11, 36, 108, 23, 63, 85,
+ 97, 67, 7, 19, 4, 2, 41, 51, 8, 32,
+ 9, 31, 57, 14, 11, 41, 49, 10, 23, 53,
+ 16, 13, 33, 49, 1, 17, 17, 29, 11, 10,
+ 44, 0, 0, 0, 33, 47, 67, 11, 36, 2,
+ 36, 25, 63, 107, 51, 59, 8, 19, 47, 29,
+ 79, 41, 65, 49, 107, 51, 47, 59, 79, 36,
+ 1, 14, 53, 19, 51, 41, 109, 16, 4, 6,
+ 13, 32, 9, 7, 43, 41, 25, 21, 21, 18,
+ 24, 7, 26, 40, 31, 0, 9, 7, 12, 23,
+ 50, 3, 4, 50, 66, 72, 60, 46, 51, 13,
+ 17, 26, 24, 67, 16, 20, 18, 6, 14, 14,
+ 10, 10, 4, 10, 7, 10, 40, 22, 17, 1,
+ 4, 3, 22, 38, 36, 20, 24, 10, 10, 8,
+ 48, 36, 32, 27, 6, 15, 64, 46, 38, 28,
+ 34, 38, 24, 32, 46, 2, 32, 18, 18, 10,
+ 1, 22, 10, 16, 20, 14, 30, 46, 16, 35,
+ 12, 27, 26, 52, 39, 98, 122, 104, 102, 106,
+ 108, 96, 96, 88, 70, 56, 50, 64, 42, 23,
+ 7, 6, 11, 56, 52, 46, 56, 50, 36, 30,
+ 32, 5, 4, 5, 59, 49, 83, 21, 40, 36,
+ 22, 0, 10, 9, 19, 31, 65, 21, 78, 42,
+ 30, 16, 12, 9, 31, 39, 91, 10, 70, 56,
+ 50, 40, 20, 10, 1, 19, 45, 13, 72, 62,
+ 38, 8, 12, 15, 33, 59, 10, 112, 92, 82,
+ 64, 46, 10, 11, 27, 37, 124, 75, 75, 39,
+ 87, 81, 59, 79, 71, 67, 73, 73, 79, 57,
+ 55, 51, 53, 47, 41, 11, 29, 35, 11, 2,
+ 6, 2, 20, 18, 18, 8, 38, 30, 26, 24,
+ 18, 40, 30, 16, 36, 32, 24, 18, 20, 1,
+ 3, 9, 10, 15, 32, 60, 34, 20, 50, 44,
+ 24, 38, 52, 34, 46, 18, 6, 24, 96, 92,
+ 100, 94, 96, 116, 124, 124, 124, 124, 124, 124,
+ 124, 124, 56, 124, 124, 124, 124, 124, 122, 90,
+ 64, 60, 12, 27, 19, 55, 101, 110, 110, 116,
+ 98, 90, 88, 68, 68, 50, 48, 32, 22, 18,
+ 13, 23, 2, 1, 13, 66, 72, 64, 46, 64,
+ 62, 32, 48, 52, 28, 2, 8, 7, 51, 40,
+ 18, 0, 27, 9, 7, 7, 0, 1, 7, 3,
+ 3, 2, 16, 1, 4, 10, 14, 124, 124, 124,
+ 116, 106, 92, 78, 44, 13, 1, 62, 48, 42,
+ 34, 22, 10, 6, 0, 19, 39, 31, 19, 29,
+ 21, 7, 17, 31, 9, 1, 6, 0, 4, 8,
+ 8, 124, 124, 116, 102, 92, 78, 54, 24, 23,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 6 */
+
+ 124, 18, 23, 124, 18, 23, 105, 63, 26, 20,
+ 20, 44, 106, 112, 108, 28, 81, 10, 19, 6,
+ 2, 20, 15, 14, 13, 34, 106, 25, 65, 87,
+ 91, 65, 7, 19, 6, 2, 39, 49, 10, 30,
+ 7, 29, 55, 12, 11, 41, 49, 10, 21, 53,
+ 16, 13, 31, 49, 1, 17, 17, 29, 11, 10,
+ 44, 0, 0, 0, 31, 47, 67, 11, 36, 0,
+ 36, 25, 61, 103, 47, 55, 10, 15, 45, 27,
+ 75, 39, 63, 49, 105, 51, 47, 59, 79, 38,
+ 1, 14, 51, 17, 49, 41, 107, 16, 2, 4,
+ 15, 32, 9, 7, 43, 41, 23, 21, 21, 18,
+ 24, 5, 26, 38, 31, 0, 9, 7, 12, 23,
+ 48, 3, 4, 48, 64, 70, 60, 46, 51, 13,
+ 17, 26, 22, 67, 14, 20, 18, 6, 14, 14,
+ 10, 10, 4, 10, 5, 10, 38, 22, 17, 3,
+ 4, 5, 22, 36, 36, 20, 22, 10, 10, 8,
+ 44, 36, 30, 27, 6, 17, 62, 46, 36, 28,
+ 34, 38, 24, 32, 46, 2, 30, 18, 16, 10,
+ 1, 20, 8, 16, 18, 12, 28, 44, 14, 35,
+ 12, 25, 24, 48, 39, 94, 118, 100, 98, 102,
+ 104, 92, 92, 84, 66, 54, 48, 60, 38, 25,
+ 7, 6, 13, 54, 50, 44, 52, 46, 34, 28,
+ 30, 7, 2, 7, 59, 49, 81, 19, 40, 36,
+ 22, 2, 10, 9, 19, 31, 63, 19, 76, 42,
+ 30, 16, 14, 9, 29, 37, 87, 10, 72, 56,
+ 50, 40, 22, 10, 1, 17, 45, 13, 72, 62,
+ 38, 8, 12, 13, 31, 57, 10, 110, 92, 80,
+ 64, 46, 10, 9, 25, 37, 124, 75, 73, 39,
+ 85, 79, 57, 75, 69, 65, 71, 71, 77, 57,
+ 53, 51, 51, 43, 41, 13, 29, 35, 11, 2,
+ 4, 2, 18, 16, 16, 6, 38, 30, 26, 24,
+ 16, 38, 28, 16, 36, 32, 22, 18, 20, 1,
+ 5, 9, 10, 17, 32, 58, 32, 18, 48, 44,
+ 26, 38, 50, 34, 46, 18, 6, 22, 94, 90,
+ 98, 92, 94, 114, 124, 124, 124, 124, 124, 124,
+ 124, 122, 54, 124, 124, 124, 124, 124, 118, 86,
+ 62, 58, 12, 25, 19, 51, 95, 106, 106, 112,
+ 94, 86, 84, 64, 64, 46, 46, 30, 20, 16,
+ 15, 25, 0, 3, 15, 64, 70, 62, 42, 60,
+ 58, 30, 44, 48, 26, 1, 4, 11, 53, 38,
+ 16, 1, 27, 9, 7, 5, 0, 1, 3, 1,
+ 1, 4, 18, 2, 6, 14, 16, 124, 124, 120,
+ 112, 100, 88, 72, 40, 17, 0, 62, 48, 42,
+ 34, 24, 12, 6, 0, 19, 39, 29, 19, 29,
+ 19, 5, 17, 31, 7, 0, 6, 0, 6, 8,
+ 8, 124, 124, 112, 96, 84, 70, 48, 18, 27,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 7 */
+
+ 124, 18, 23, 124, 18, 23, 101, 59, 26, 20,
+ 18, 40, 102, 108, 108, 28, 75, 8, 17, 6,
+ 2, 18, 15, 12, 15, 34, 102, 29, 67, 87,
+ 85, 63, 7, 17, 6, 2, 39, 47, 10, 30,
+ 7, 29, 55, 12, 13, 39, 49, 10, 21, 51,
+ 14, 13, 31, 49, 0, 15, 17, 27, 9, 10,
+ 44, 0, 0, 0, 31, 47, 67, 9, 34, 0,
+ 36, 25, 61, 101, 43, 53, 14, 11, 43, 25,
+ 73, 35, 61, 47, 101, 49, 45, 57, 77, 38,
+ 1, 14, 51, 17, 49, 39, 103, 14, 2, 4,
+ 15, 32, 9, 7, 41, 41, 23, 21, 19, 18,
+ 22, 5, 26, 38, 31, 0, 9, 7, 12, 23,
+ 48, 3, 4, 48, 62, 68, 58, 44, 49, 11,
+ 17, 24, 22, 65, 14, 20, 16, 6, 14, 14,
+ 12, 12, 4, 10, 5, 10, 36, 22, 19, 3,
+ 4, 5, 20, 34, 34, 20, 22, 12, 10, 8,
+ 42, 34, 28, 27, 6, 17, 62, 46, 36, 28,
+ 34, 38, 24, 32, 46, 2, 30, 18, 16, 10,
+ 1, 20, 8, 14, 18, 12, 28, 42, 14, 35,
+ 10, 25, 22, 46, 39, 92, 114, 96, 94, 98,
+ 100, 88, 88, 80, 62, 50, 44, 54, 34, 27,
+ 7, 4, 13, 52, 48, 42, 50, 44, 30, 26,
+ 28, 7, 2, 7, 59, 49, 81, 19, 40, 36,
+ 22, 2, 10, 9, 19, 29, 61, 19, 76, 42,
+ 30, 16, 14, 7, 27, 37, 83, 10, 72, 56,
+ 50, 40, 22, 10, 1, 17, 43, 13, 72, 60,
+ 36, 8, 12, 13, 31, 57, 10, 110, 90, 78,
+ 62, 46, 10, 9, 25, 35, 124, 73, 71, 37,
+ 81, 77, 55, 73, 65, 63, 67, 67, 73, 55,
+ 51, 49, 51, 41, 39, 13, 29, 33, 11, 0,
+ 4, 0, 16, 14, 14, 4, 36, 28, 26, 22,
+ 16, 38, 28, 16, 34, 30, 22, 16, 18, 1,
+ 5, 9, 10, 17, 30, 58, 30, 18, 46, 42,
+ 26, 36, 48, 32, 44, 16, 4, 18, 92, 90,
+ 98, 90, 90, 110, 124, 124, 124, 124, 124, 124,
+ 124, 118, 50, 124, 124, 124, 124, 124, 112, 82,
+ 60, 56, 12, 23, 17, 49, 91, 104, 102, 108,
+ 92, 82, 80, 62, 60, 44, 42, 26, 16, 12,
+ 17, 27, 0, 3, 17, 60, 66, 58, 40, 56,
+ 54, 26, 42, 44, 22, 3, 2, 13, 53, 36,
+ 16, 3, 27, 7, 5, 5, 2, 0, 1, 0,
+ 0, 6, 20, 4, 8, 16, 18, 124, 122, 116,
+ 106, 96, 82, 66, 34, 21, 0, 64, 50, 44,
+ 36, 26, 12, 8, 2, 17, 37, 29, 17, 27,
+ 17, 3, 17, 29, 7, 0, 6, 2, 8, 8,
+ 8, 124, 124, 106, 90, 78, 62, 40, 10, 33,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 8 */
+
+ 124, 16, 23, 124, 16, 23, 99, 57, 26, 20,
+ 18, 38, 100, 106, 108, 28, 71, 8, 15, 8,
+ 2, 16, 15, 10, 19, 32, 98, 33, 69, 89,
+ 81, 61, 7, 15, 8, 2, 39, 45, 10, 28,
+ 7, 29, 53, 10, 13, 39, 51, 10, 21, 51,
+ 14, 15, 31, 49, 0, 15, 17, 27, 9, 10,
+ 44, 0, 0, 0, 29, 49, 67, 9, 32, 0,
+ 36, 25, 61, 97, 41, 49, 16, 9, 41, 23,
+ 71, 33, 59, 45, 97, 49, 45, 57, 75, 38,
+ 1, 14, 49, 17, 47, 39, 101, 14, 0, 2,
+ 17, 32, 9, 7, 41, 41, 23, 21, 19, 16,
+ 22, 5, 26, 36, 31, 0, 9, 7, 10, 23,
+ 46, 5, 4, 46, 58, 66, 56, 42, 49, 11,
+ 17, 22, 20, 65, 12, 18, 16, 6, 14, 14,
+ 14, 12, 4, 8, 5, 8, 34, 20, 21, 3,
+ 4, 7, 20, 32, 34, 18, 20, 12, 10, 8,
+ 38, 34, 26, 27, 6, 19, 60, 44, 36, 28,
+ 34, 36, 22, 32, 44, 0, 28, 18, 16, 8,
+ 3, 18, 6, 14, 16, 10, 26, 40, 12, 35,
+ 10, 25, 20, 42, 39, 88, 110, 92, 90, 94,
+ 94, 84, 84, 76, 58, 46, 40, 50, 30, 29,
+ 7, 4, 15, 50, 44, 38, 46, 40, 28, 22,
+ 24, 9, 0, 9, 59, 49, 79, 19, 40, 36,
+ 22, 2, 10, 9, 19, 29, 59, 17, 76, 42,
+ 30, 16, 14, 7, 27, 35, 81, 10, 72, 56,
+ 50, 38, 22, 10, 1, 17, 43, 13, 72, 60,
+ 36, 8, 12, 13, 31, 55, 10, 108, 88, 76,
+ 60, 44, 10, 9, 25, 35, 124, 71, 69, 37,
+ 79, 75, 55, 71, 63, 61, 65, 65, 71, 55,
+ 51, 49, 49, 39, 39, 15, 29, 33, 11, 0,
+ 2, 0, 14, 12, 10, 2, 34, 28, 26, 22,
+ 14, 36, 26, 14, 32, 28, 20, 16, 16, 3,
+ 7, 9, 10, 19, 28, 56, 28, 18, 44, 40,
+ 26, 34, 46, 32, 44, 14, 2, 14, 90, 88,
+ 96, 86, 88, 108, 124, 124, 124, 124, 124, 124,
+ 124, 112, 48, 124, 124, 124, 124, 122, 108, 78,
+ 56, 52, 12, 23, 17, 47, 87, 100, 98, 104,
+ 88, 76, 76, 58, 56, 40, 38, 24, 12, 10,
+ 19, 29, 1, 5, 19, 58, 64, 54, 36, 52,
+ 50, 24, 38, 40, 20, 5, 1, 17, 55, 34,
+ 14, 5, 27, 7, 5, 3, 2, 0, 0, 0,
+ 2, 8, 22, 6, 10, 18, 20, 122, 118, 112,
+ 102, 90, 76, 60, 28, 27, 0, 64, 50, 44,
+ 36, 26, 12, 8, 2, 17, 37, 29, 17, 27,
+ 15, 1, 17, 29, 5, 2, 6, 2, 8, 8,
+ 6, 124, 122, 102, 84, 72, 54, 32, 4, 39,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 9 */
+
+ 124, 16, 23, 124, 16, 23, 95, 55, 28, 20,
+ 18, 36, 98, 104, 108, 28, 67, 8, 13, 10,
+ 2, 16, 13, 8, 21, 30, 94, 35, 71, 91,
+ 75, 57, 7, 13, 10, 2, 37, 43, 12, 26,
+ 7, 27, 51, 10, 13, 39, 51, 10, 21, 49,
+ 14, 15, 31, 49, 0, 15, 15, 27, 9, 10,
+ 44, 0, 0, 0, 27, 49, 67, 9, 30, 0,
+ 36, 23, 61, 93, 37, 45, 18, 5, 37, 21,
+ 67, 31, 55, 43, 93, 49, 45, 57, 73, 38,
+ 1, 14, 47, 17, 45, 37, 99, 14, 0, 2,
+ 17, 32, 9, 5, 39, 39, 21, 21, 19, 16,
+ 22, 5, 26, 36, 29, 0, 7, 7, 10, 21,
+ 44, 5, 4, 44, 56, 66, 54, 40, 47, 11,
+ 15, 20, 18, 65, 10, 18, 16, 8, 14, 14,
+ 16, 12, 4, 8, 3, 8, 34, 20, 23, 3,
+ 4, 9, 20, 30, 34, 18, 18, 12, 10, 8,
+ 36, 34, 26, 27, 6, 21, 58, 44, 36, 28,
+ 34, 36, 22, 32, 44, 0, 28, 18, 16, 8,
+ 3, 18, 6, 14, 16, 10, 24, 40, 12, 35,
+ 10, 25, 18, 40, 39, 84, 108, 90, 88, 90,
+ 90, 82, 82, 72, 54, 44, 38, 46, 28, 31,
+ 7, 4, 17, 48, 42, 36, 44, 38, 26, 20,
+ 22, 11, 1, 11, 59, 47, 77, 17, 42, 36,
+ 22, 2, 12, 7, 17, 27, 57, 15, 76, 42,
+ 30, 16, 16, 7, 25, 33, 77, 10, 72, 56,
+ 50, 38, 24, 10, 1, 17, 41, 13, 74, 60,
+ 36, 8, 14, 13, 31, 53, 10, 108, 88, 76,
+ 58, 44, 10, 9, 23, 33, 124, 69, 67, 35,
+ 77, 71, 53, 67, 61, 57, 63, 63, 67, 53,
+ 49, 49, 47, 35, 39, 17, 27, 31, 11, 0,
+ 0, 0, 14, 10, 8, 0, 34, 28, 26, 22,
+ 14, 34, 26, 14, 30, 28, 20, 16, 14, 3,
+ 7, 7, 12, 19, 28, 54, 28, 18, 44, 40,
+ 26, 32, 44, 32, 44, 12, 2, 12, 90, 86,
+ 94, 84, 86, 106, 120, 120, 124, 124, 124, 124,
+ 124, 108, 46, 124, 124, 124, 124, 116, 104, 76,
+ 54, 50, 12, 21, 17, 45, 83, 96, 96, 100,
+ 84, 72, 74, 56, 52, 38, 36, 22, 10, 8,
+ 21, 29, 1, 5, 21, 56, 62, 52, 32, 50,
+ 48, 22, 36, 38, 18, 7, 3, 19, 57, 34,
+ 12, 5, 25, 7, 5, 1, 4, 2, 2, 2,
+ 4, 10, 26, 8, 12, 22, 24, 120, 116, 108,
+ 98, 84, 70, 54, 22, 31, 2, 64, 50, 46,
+ 36, 28, 14, 8, 4, 15, 37, 27, 15, 27,
+ 13, 2, 15, 27, 3, 4, 6, 4, 10, 8,
+ 6, 124, 118, 98, 80, 66, 48, 24, 1, 43,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 10 */
+
+ 124, 16, 23, 124, 16, 23, 91, 51, 28, 20,
+ 16, 32, 94, 100, 108, 28, 61, 6, 11, 10,
+ 2, 14, 13, 6, 23, 30, 90, 39, 73, 91,
+ 69, 55, 7, 11, 10, 2, 37, 41, 12, 26,
+ 7, 27, 51, 10, 15, 37, 51, 10, 21, 47,
+ 12, 15, 31, 49, 2, 13, 15, 25, 7, 10,
+ 44, 0, 0, 0, 27, 49, 67, 7, 28, 0,
+ 36, 23, 61, 91, 33, 43, 22, 1, 35, 19,
+ 65, 27, 53, 41, 89, 47, 43, 55, 71, 38,
+ 1, 14, 47, 17, 45, 37, 95, 12, 0, 2,
+ 17, 32, 9, 5, 39, 39, 21, 21, 17, 16,
+ 20, 5, 26, 36, 29, 0, 7, 7, 10, 21,
+ 44, 5, 4, 44, 54, 64, 52, 38, 45, 9,
+ 15, 18, 18, 63, 10, 18, 14, 8, 14, 14,
+ 18, 14, 4, 8, 3, 8, 32, 20, 25, 3,
+ 4, 9, 18, 28, 32, 18, 18, 14, 10, 8,
+ 34, 32, 24, 27, 6, 21, 58, 44, 36, 28,
+ 34, 36, 22, 32, 44, 0, 28, 18, 16, 8,
+ 3, 18, 6, 12, 16, 10, 24, 38, 10, 35,
+ 8, 25, 16, 38, 39, 82, 104, 86, 84, 86,
+ 86, 78, 78, 68, 50, 40, 34, 40, 24, 33,
+ 7, 2, 17, 46, 40, 34, 42, 34, 22, 18,
+ 20, 11, 1, 11, 59, 47, 77, 17, 42, 36,
+ 22, 2, 12, 7, 17, 27, 55, 15, 76, 42,
+ 30, 16, 16, 5, 23, 33, 73, 10, 72, 56,
+ 50, 38, 24, 10, 1, 17, 39, 13, 74, 58,
+ 34, 8, 14, 13, 31, 53, 10, 106, 86, 74,
+ 56, 44, 10, 9, 23, 33, 124, 67, 65, 33,
+ 73, 69, 51, 65, 57, 55, 59, 59, 63, 51,
+ 47, 47, 47, 33, 37, 17, 27, 29, 11, 1,
+ 0, 1, 12, 8, 6, 1, 32, 26, 26, 20,
+ 12, 34, 26, 14, 28, 26, 20, 14, 12, 3,
+ 7, 7, 12, 19, 26, 54, 26, 18, 42, 38,
+ 26, 30, 42, 30, 42, 10, 0, 8, 88, 86,
+ 94, 82, 82, 102, 116, 116, 124, 124, 124, 124,
+ 124, 104, 42, 118, 124, 118, 124, 112, 98, 72,
+ 52, 48, 12, 19, 15, 43, 79, 94, 92, 96,
+ 82, 68, 70, 52, 48, 36, 32, 18, 6, 4,
+ 23, 31, 3, 5, 23, 52, 58, 48, 30, 46,
+ 44, 18, 32, 34, 14, 9, 5, 21, 57, 32,
+ 12, 7, 25, 5, 3, 1, 4, 4, 4, 4,
+ 6, 12, 28, 10, 14, 24, 26, 120, 112, 104,
+ 92, 80, 64, 48, 16, 35, 2, 66, 52, 46,
+ 38, 30, 14, 10, 4, 15, 35, 27, 13, 25,
+ 11, 4, 15, 25, 3, 4, 6, 4, 12, 8,
+ 6, 124, 114, 92, 74, 60, 40, 16, 9, 49,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 11 */
+
+ 124, 16, 25, 124, 16, 25, 87, 49, 30, 20,
+ 16, 30, 92, 98, 108, 28, 57, 6, 11, 12,
+ 2, 12, 13, 4, 25, 28, 88, 41, 75, 93,
+ 63, 53, 7, 11, 12, 2, 35, 39, 14, 24,
+ 5, 25, 49, 8, 15, 37, 51, 10, 19, 47,
+ 12, 15, 29, 49, 2, 13, 15, 25, 7, 10,
+ 44, 0, 0, 0, 25, 49, 67, 7, 28, 1,
+ 36, 23, 59, 87, 29, 39, 24, 2, 33, 17,
+ 61, 25, 51, 41, 87, 47, 43, 55, 71, 40,
+ 1, 14, 45, 15, 43, 35, 93, 12, 1, 0,
+ 19, 32, 9, 5, 37, 39, 19, 21, 17, 16,
+ 20, 3, 26, 34, 29, 0, 7, 7, 10, 21,
+ 42, 5, 4, 42, 52, 62, 52, 38, 45, 9,
+ 15, 18, 16, 63, 8, 18, 14, 8, 14, 14,
+ 18, 14, 4, 8, 1, 8, 30, 20, 25, 5,
+ 4, 11, 18, 26, 32, 18, 16, 14, 10, 8,
+ 30, 32, 22, 27, 6, 23, 56, 44, 34, 28,
+ 34, 36, 22, 32, 44, 0, 26, 18, 14, 8,
+ 3, 16, 4, 12, 14, 8, 22, 36, 10, 35,
+ 8, 23, 14, 34, 39, 78, 100, 82, 80, 82,
+ 82, 74, 74, 64, 46, 38, 32, 36, 20, 35,
+ 7, 2, 19, 44, 38, 32, 38, 32, 20, 16,
+ 18, 13, 3, 13, 59, 47, 75, 15, 42, 36,
+ 22, 4, 12, 7, 17, 25, 53, 13, 74, 42,
+ 30, 16, 18, 5, 21, 31, 69, 10, 74, 56,
+ 50, 38, 26, 10, 1, 15, 39, 13, 74, 58,
+ 34, 8, 14, 11, 29, 51, 10, 106, 86, 72,
+ 56, 44, 10, 7, 21, 31, 124, 67, 63, 33,
+ 71, 67, 49, 61, 55, 53, 57, 57, 61, 51,
+ 45, 47, 45, 29, 37, 19, 27, 29, 11, 1,
+ 1, 1, 10, 6, 4, 3, 32, 26, 26, 20,
+ 12, 32, 24, 14, 28, 26, 18, 14, 12, 3,
+ 9, 7, 12, 21, 26, 52, 24, 16, 40, 38,
+ 28, 30, 40, 30, 42, 10, 0, 6, 86, 84,
+ 92, 80, 80, 100, 112, 112, 122, 120, 124, 124,
+ 120, 98, 40, 114, 124, 112, 124, 106, 94, 68,
+ 50, 46, 12, 17, 15, 39, 73, 90, 88, 92,
+ 78, 64, 66, 50, 44, 32, 30, 16, 4, 2,
+ 25, 33, 3, 7, 25, 50, 56, 46, 26, 42,
+ 40, 16, 30, 30, 12, 13, 9, 25, 59, 30,
+ 10, 9, 25, 5, 3, 0, 6, 4, 8, 6,
+ 8, 14, 30, 14, 16, 28, 28, 118, 110, 100,
+ 88, 74, 60, 42, 12, 39, 4, 66, 52, 48,
+ 38, 32, 16, 10, 6, 13, 35, 25, 13, 25,
+ 9, 6, 15, 25, 1, 6, 6, 6, 14, 8,
+ 6, 124, 110, 88, 68, 52, 32, 10, 15, 53,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 12 */
+
+ 124, 16, 25, 124, 16, 25, 85, 45, 30, 20,
+ 14, 26, 88, 96, 108, 28, 53, 6, 9, 14,
+ 2, 10, 13, 2, 27, 28, 84, 45, 77, 93,
+ 57, 51, 7, 9, 14, 2, 35, 37, 14, 24,
+ 5, 25, 47, 8, 17, 37, 51, 10, 19, 45,
+ 12, 17, 29, 49, 4, 13, 15, 25, 5, 10,
+ 44, 0, 0, 0, 25, 51, 67, 5, 26, 1,
+ 36, 23, 59, 83, 27, 37, 28, 4, 31, 15,
+ 59, 23, 49, 39, 83, 45, 43, 53, 69, 40,
+ 1, 14, 45, 15, 43, 35, 89, 12, 1, 0,
+ 19, 32, 9, 5, 37, 39, 19, 21, 15, 16,
+ 20, 3, 26, 34, 29, 0, 7, 7, 10, 21,
+ 40, 7, 4, 40, 50, 60, 50, 36, 43, 7,
+ 15, 16, 16, 61, 8, 18, 14, 8, 14, 14,
+ 20, 14, 4, 6, 1, 6, 28, 18, 27, 5,
+ 4, 11, 16, 24, 32, 16, 14, 14, 10, 8,
+ 28, 32, 20, 27, 6, 23, 54, 44, 34, 28,
+ 34, 36, 22, 32, 42, 0, 26, 18, 14, 8,
+ 5, 16, 4, 10, 14, 8, 20, 34, 8, 35,
+ 6, 23, 12, 32, 39, 76, 96, 78, 76, 78,
+ 78, 70, 70, 60, 42, 34, 28, 32, 16, 37,
+ 7, 2, 19, 42, 36, 30, 36, 28, 16, 14,
+ 14, 15, 3, 13, 59, 47, 75, 15, 42, 36,
+ 22, 4, 12, 7, 17, 25, 51, 13, 74, 42,
+ 30, 16, 18, 3, 21, 29, 65, 10, 74, 56,
+ 50, 38, 26, 10, 1, 15, 37, 13, 74, 58,
+ 32, 8, 14, 11, 29, 49, 10, 104, 84, 70,
+ 54, 44, 10, 7, 21, 31, 124, 65, 61, 31,
+ 69, 65, 47, 59, 53, 51, 55, 53, 57, 49,
+ 45, 47, 45, 27, 37, 19, 27, 27, 11, 3,
+ 1, 1, 8, 4, 2, 5, 30, 24, 26, 18,
+ 10, 32, 24, 14, 26, 24, 18, 14, 10, 5,
+ 9, 7, 12, 21, 24, 52, 22, 16, 38, 36,
+ 28, 28, 38, 28, 40, 8, 1, 2, 84, 82,
+ 92, 78, 78, 96, 108, 108, 118, 114, 124, 124,
+ 114, 94, 38, 108, 124, 106, 116, 100, 88, 64,
+ 48, 42, 12, 15, 15, 37, 69, 86, 84, 88,
+ 74, 60, 62, 46, 40, 30, 26, 14, 0, 1,
+ 27, 35, 5, 7, 27, 48, 52, 42, 22, 38,
+ 36, 12, 26, 26, 8, 15, 11, 27, 61, 28,
+ 8, 11, 25, 3, 3, 0, 6, 6, 10, 6,
+ 10, 16, 32, 16, 18, 30, 30, 118, 106, 96,
+ 82, 70, 54, 36, 6, 45, 4, 68, 54, 48,
+ 38, 32, 16, 10, 6, 13, 35, 25, 11, 23,
+ 7, 8, 15, 23, 1, 6, 6, 6, 16, 8,
+ 6, 122, 106, 82, 62, 46, 24, 2, 23, 59,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 13 */
+
+ 124, 16, 25, 124, 16, 25, 81, 43, 30, 20,
+ 14, 24, 86, 92, 108, 28, 47, 4, 7, 14,
+ 2, 10, 11, 0, 29, 26, 80, 49, 79, 95,
+ 51, 47, 7, 7, 14, 2, 35, 35, 14, 22,
+ 5, 25, 47, 8, 17, 35, 51, 10, 19, 43,
+ 10, 17, 29, 49, 4, 11, 13, 23, 5, 10,
+ 44, 0, 0, 0, 23, 51, 67, 5, 24, 1,
+ 36, 21, 59, 81, 23, 33, 30, 8, 27, 13,
+ 57, 19, 47, 37, 79, 45, 41, 53, 67, 40,
+ 1, 14, 43, 15, 41, 33, 87, 10, 1, 0,
+ 19, 32, 9, 3, 35, 39, 19, 21, 15, 16,
+ 18, 3, 26, 34, 27, 0, 5, 7, 10, 21,
+ 40, 7, 4, 40, 48, 60, 48, 34, 41, 7,
+ 15, 14, 14, 61, 6, 18, 12, 8, 14, 14,
+ 22, 16, 4, 6, 1, 6, 26, 18, 29, 5,
+ 4, 13, 16, 22, 30, 16, 14, 16, 10, 8,
+ 26, 30, 18, 27, 6, 25, 54, 44, 34, 28,
+ 34, 36, 22, 32, 42, 0, 26, 18, 14, 8,
+ 5, 16, 4, 10, 14, 8, 20, 32, 8, 35,
+ 6, 23, 10, 30, 39, 72, 92, 76, 74, 74,
+ 74, 66, 66, 56, 38, 30, 24, 26, 14, 39,
+ 7, 0, 21, 40, 34, 28, 34, 26, 14, 12,
+ 12, 15, 5, 15, 59, 47, 73, 15, 42, 36,
+ 22, 4, 12, 5, 15, 23, 49, 11, 74, 42,
+ 30, 16, 18, 3, 19, 29, 61, 10, 74, 56,
+ 50, 38, 26, 10, 1, 15, 35, 13, 76, 56,
+ 32, 8, 14, 11, 29, 49, 10, 104, 82, 68,
+ 52, 44, 10, 7, 21, 29, 124, 63, 59, 29,
+ 65, 63, 45, 57, 49, 49, 51, 51, 53, 47,
+ 43, 45, 43, 25, 35, 21, 25, 25, 11, 3,
+ 3, 3, 8, 2, 0, 7, 28, 24, 26, 18,
+ 10, 30, 24, 14, 24, 22, 18, 12, 8, 5,
+ 9, 7, 12, 21, 22, 50, 22, 16, 36, 34,
+ 28, 26, 36, 28, 40, 6, 3, 1, 82, 82,
+ 90, 76, 74, 94, 104, 104, 114, 110, 124, 122,
+ 108, 90, 34, 102, 124, 100, 108, 96, 84, 60,
+ 46, 40, 12, 13, 13, 35, 65, 84, 82, 84,
+ 72, 56, 58, 44, 36, 28, 22, 10, 3, 3,
+ 29, 37, 5, 7, 29, 44, 50, 38, 20, 36,
+ 34, 10, 24, 24, 6, 17, 13, 29, 61, 28,
+ 8, 13, 23, 3, 1, 2, 8, 8, 12, 8,
+ 12, 18, 36, 18, 20, 32, 34, 116, 102, 92,
+ 78, 64, 48, 30, 0, 49, 4, 68, 54, 50,
+ 40, 34, 16, 12, 8, 11, 33, 25, 9, 23,
+ 5, 10, 13, 21, 0, 8, 6, 8, 18, 8,
+ 6, 118, 102, 78, 56, 40, 18, 5, 29, 65,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 14 */
+
+ 122, 16, 25, 122, 16, 25, 77, 39, 32, 20,
+ 12, 20, 82, 90, 108, 28, 43, 4, 5, 16,
+ 2, 8, 11, 1, 31, 26, 76, 51, 81, 95,
+ 45, 45, 7, 5, 16, 2, 33, 33, 16, 22,
+ 5, 23, 45, 6, 19, 35, 51, 10, 19, 43,
+ 10, 17, 29, 49, 6, 11, 13, 23, 3, 10,
+ 44, 0, 0, 0, 23, 51, 67, 3, 22, 1,
+ 36, 21, 59, 77, 19, 31, 34, 12, 25, 11,
+ 53, 17, 45, 35, 75, 43, 41, 51, 65, 40,
+ 1, 14, 43, 15, 41, 33, 83, 10, 3, 1,
+ 21, 32, 9, 3, 35, 39, 17, 21, 13, 16,
+ 18, 3, 26, 32, 27, 0, 5, 7, 10, 21,
+ 38, 7, 4, 38, 46, 58, 46, 32, 41, 5,
+ 15, 12, 14, 59, 6, 18, 12, 8, 14, 14,
+ 24, 16, 4, 6, 0, 6, 24, 18, 31, 5,
+ 4, 13, 14, 20, 30, 16, 12, 16, 10, 8,
+ 22, 30, 16, 27, 6, 25, 52, 44, 34, 28,
+ 34, 36, 22, 32, 42, 0, 24, 18, 14, 8,
+ 5, 14, 2, 8, 12, 6, 18, 30, 6, 35,
+ 4, 23, 8, 26, 39, 70, 88, 72, 70, 70,
+ 70, 62, 62, 52, 34, 28, 22, 22, 10, 41,
+ 7, 0, 21, 38, 32, 26, 30, 22, 10, 10,
+ 10, 17, 5, 15, 59, 47, 73, 13, 42, 36,
+ 22, 4, 12, 5, 15, 23, 47, 11, 74, 42,
+ 30, 16, 20, 1, 17, 27, 57, 10, 74, 56,
+ 50, 38, 28, 10, 1, 15, 35, 13, 76, 56,
+ 30, 8, 14, 11, 29, 47, 10, 102, 82, 66,
+ 50, 44, 10, 7, 19, 29, 124, 61, 57, 29,
+ 63, 61, 43, 53, 47, 47, 49, 47, 51, 47,
+ 41, 45, 43, 21, 35, 21, 25, 25, 11, 5,
+ 3, 3, 6, 0, 1, 9, 28, 22, 26, 16,
+ 8, 30, 22, 14, 22, 22, 16, 12, 6, 5,
+ 11, 7, 12, 23, 22, 50, 20, 16, 34, 34,
+ 28, 24, 34, 26, 38, 4, 3, 3, 80, 80,
+ 90, 74, 72, 90, 100, 100, 110, 104, 120, 118,
+ 102, 84, 32, 96, 124, 94, 100, 90, 78, 56,
+ 44, 38, 12, 11, 13, 33, 61, 80, 78, 80,
+ 68, 52, 54, 40, 32, 24, 20, 8, 5, 7,
+ 31, 39, 7, 9, 31, 42, 46, 36, 16, 32,
+ 30, 6, 20, 20, 2, 19, 17, 33, 63, 26,
+ 6, 15, 23, 1, 1, 2, 8, 8, 14, 10,
+ 14, 20, 38, 20, 22, 36, 36, 116, 100, 88,
+ 72, 60, 42, 24, 5, 53, 6, 70, 56, 50,
+ 40, 36, 18, 12, 8, 11, 33, 23, 9, 21,
+ 3, 12, 13, 21, 0, 8, 6, 8, 20, 8,
+ 6, 116, 98, 72, 50, 34, 10, 13, 37, 69,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 15 */
+
+ 120, 16, 25, 120, 16, 25, 73, 37, 32, 20,
+ 12, 18, 80, 88, 108, 28, 39, 4, 3, 18,
+ 2, 6, 11, 3, 33, 24, 72, 55, 83, 97,
+ 39, 43, 7, 3, 18, 2, 33, 31, 16, 20,
+ 5, 23, 43, 6, 19, 35, 51, 10, 19, 41,
+ 10, 17, 29, 49, 6, 11, 13, 23, 3, 10,
+ 44, 0, 0, 0, 21, 51, 67, 3, 20, 1,
+ 36, 21, 59, 73, 15, 27, 36, 16, 23, 9,
+ 51, 15, 43, 33, 71, 43, 41, 51, 63, 40,
+ 1, 14, 41, 15, 39, 31, 81, 10, 3, 1,
+ 21, 32, 9, 3, 33, 39, 17, 21, 13, 16,
+ 18, 3, 26, 32, 27, 0, 5, 7, 10, 21,
+ 36, 7, 4, 36, 44, 56, 44, 30, 39, 5,
+ 15, 10, 12, 59, 4, 18, 12, 8, 14, 14,
+ 26, 16, 4, 6, 0, 6, 22, 18, 33, 5,
+ 4, 15, 14, 18, 30, 16, 10, 16, 10, 8,
+ 20, 30, 14, 27, 6, 27, 50, 44, 34, 28,
+ 34, 36, 22, 32, 42, 0, 24, 18, 14, 8,
+ 5, 14, 2, 8, 12, 6, 16, 28, 6, 35,
+ 4, 23, 6, 24, 39, 66, 84, 68, 66, 66,
+ 66, 58, 58, 48, 30, 24, 18, 18, 6, 43,
+ 7, 0, 23, 36, 30, 24, 28, 20, 8, 8,
+ 8, 19, 7, 17, 59, 47, 71, 13, 42, 36,
+ 22, 4, 12, 5, 15, 21, 45, 9, 74, 42,
+ 30, 16, 20, 1, 15, 25, 53, 10, 74, 56,
+ 50, 38, 28, 10, 1, 15, 33, 13, 76, 56,
+ 30, 8, 14, 11, 29, 45, 10, 102, 80, 64,
+ 48, 44, 10, 7, 19, 27, 124, 59, 55, 27,
+ 61, 59, 41, 51, 45, 45, 47, 45, 47, 45,
+ 39, 45, 41, 19, 35, 23, 25, 23, 11, 5,
+ 5, 3, 4, 1, 3, 11, 26, 22, 26, 16,
+ 8, 28, 22, 14, 20, 20, 16, 12, 4, 5,
+ 11, 7, 12, 23, 20, 48, 18, 16, 32, 32,
+ 28, 22, 32, 26, 38, 2, 5, 7, 78, 78,
+ 88, 72, 70, 88, 96, 96, 106, 100, 114, 112,
+ 96, 80, 30, 90, 118, 88, 92, 84, 74, 52,
+ 42, 36, 12, 9, 13, 31, 57, 76, 74, 76,
+ 64, 48, 50, 38, 28, 22, 16, 6, 9, 9,
+ 33, 41, 7, 9, 33, 40, 44, 32, 12, 28,
+ 26, 4, 18, 16, 0, 21, 19, 35, 65, 24,
+ 4, 17, 23, 1, 1, 4, 10, 10, 16, 12,
+ 16, 22, 40, 22, 24, 38, 38, 114, 96, 84,
+ 68, 54, 36, 18, 11, 57, 6, 70, 56, 52,
+ 40, 38, 18, 12, 10, 9, 33, 23, 7, 21,
+ 1, 14, 13, 19, 2, 10, 6, 10, 22, 8,
+ 6, 114, 94, 68, 44, 28, 2, 21, 43, 75,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 16 */
+
+ 116, 14, 27, 116, 14, 27, 71, 35, 32, 20,
+ 10, 14, 76, 84, 106, 28, 35, 2, 3, 18,
+ 0, 4, 11, 7, 37, 22, 68, 59, 85, 99,
+ 35, 41, 9, 3, 18, 0, 33, 29, 16, 18,
+ 5, 23, 43, 4, 21, 35, 53, 10, 19, 41,
+ 8, 19, 29, 49, 6, 11, 13, 23, 3, 8,
+ 44, 0, 0, 0, 21, 53, 67, 3, 18, 3,
+ 36, 21, 59, 71, 13, 25, 38, 18, 21, 7,
+ 49, 13, 41, 33, 69, 43, 41, 51, 63, 40,
+ 1, 14, 41, 15, 39, 31, 79, 8, 5, 3,
+ 23, 32, 9, 3, 33, 39, 17, 21, 13, 14,
+ 16, 3, 24, 30, 27, 1, 5, 7, 8, 21,
+ 34, 9, 2, 34, 40, 54, 42, 28, 39, 5,
+ 15, 8, 10, 59, 2, 16, 10, 8, 14, 14,
+ 26, 16, 4, 4, 0, 4, 20, 16, 35, 7,
+ 2, 17, 12, 16, 28, 14, 8, 16, 10, 8,
+ 16, 28, 12, 27, 6, 29, 48, 42, 32, 28,
+ 34, 34, 20, 32, 40, 1, 22, 18, 12, 6,
+ 7, 12, 0, 6, 10, 4, 14, 26, 4, 35,
+ 2, 23, 4, 20, 39, 62, 80, 64, 62, 62,
+ 60, 54, 54, 44, 26, 20, 14, 12, 2, 47,
+ 9, 1, 25, 34, 26, 20, 24, 16, 4, 4,
+ 4, 21, 9, 19, 59, 47, 71, 13, 42, 36,
+ 22, 4, 12, 5, 15, 21, 43, 9, 72, 42,
+ 30, 16, 20, 1, 15, 25, 51, 8, 74, 56,
+ 48, 36, 28, 10, 1, 15, 33, 13, 76, 54,
+ 28, 6, 14, 11, 29, 45, 10, 100, 78, 62,
+ 46, 42, 10, 7, 19, 27, 124, 59, 53, 27,
+ 59, 57, 41, 49, 43, 43, 45, 43, 45, 45,
+ 39, 45, 41, 17, 35, 25, 25, 23, 11, 7,
+ 7, 5, 2, 3, 7, 15, 24, 20, 26, 14,
+ 6, 26, 20, 12, 18, 18, 14, 10, 2, 7,
+ 13, 7, 12, 25, 18, 46, 16, 14, 30, 30,
+ 28, 20, 28, 24, 36, 0, 7, 11, 76, 76,
+ 86, 68, 66, 84, 92, 92, 100, 94, 108, 106,
+ 90, 74, 26, 84, 110, 82, 82, 78, 68, 48,
+ 38, 32, 12, 9, 13, 29, 53, 72, 70, 72,
+ 60, 42, 46, 34, 22, 18, 12, 2, 13, 13,
+ 35, 43, 9, 11, 37, 36, 40, 28, 8, 24,
+ 22, 0, 14, 12, 3, 25, 23, 39, 67, 22,
+ 2, 19, 23, 1, 1, 4, 10, 10, 18, 12,
+ 18, 22, 42, 24, 26, 40, 40, 112, 92, 78,
+ 62, 48, 30, 10, 17, 63, 6, 70, 56, 52,
+ 40, 38, 18, 12, 10, 9, 33, 23, 7, 21,
+ 0, 16, 13, 19, 2, 10, 6, 10, 22, 8,
+ 4, 110, 88, 62, 38, 20, 5, 29, 51, 81,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 17 */
+
+ 114, 14, 27, 114, 14, 27, 67, 31, 34, 22,
+ 10, 12, 74, 82, 106, 28, 29, 2, 1, 20,
+ 0, 4, 9, 9, 39, 22, 66, 61, 87, 99,
+ 29, 37, 9, 1, 20, 0, 31, 25, 18, 18,
+ 3, 21, 41, 4, 21, 33, 53, 10, 17, 39,
+ 8, 19, 27, 49, 8, 9, 11, 21, 1, 8,
+ 44, 0, 0, 0, 19, 53, 67, 1, 18, 3,
+ 36, 19, 57, 67, 9, 21, 42, 22, 17, 5,
+ 45, 9, 37, 31, 65, 41, 39, 49, 61, 42,
+ 0, 16, 39, 13, 37, 29, 75, 8, 5, 3,
+ 23, 34, 9, 1, 31, 37, 15, 19, 11, 14,
+ 16, 1, 24, 30, 25, 1, 3, 7, 8, 19,
+ 34, 9, 2, 34, 38, 54, 42, 28, 37, 3,
+ 13, 8, 10, 57, 2, 16, 10, 10, 14, 14,
+ 28, 18, 6, 4, 2, 4, 20, 16, 35, 7,
+ 2, 17, 12, 14, 28, 14, 8, 18, 12, 8,
+ 14, 28, 12, 25, 6, 29, 48, 42, 32, 28,
+ 34, 34, 20, 32, 40, 1, 22, 18, 12, 6,
+ 7, 12, 0, 6, 10, 4, 14, 26, 4, 33,
+ 2, 21, 4, 18, 37, 60, 78, 62, 60, 58,
+ 56, 52, 52, 40, 24, 18, 12, 8, 0, 49,
+ 9, 1, 25, 32, 24, 18, 22, 14, 2, 2,
+ 2, 21, 9, 19, 57, 45, 69, 11, 44, 36,
+ 22, 6, 14, 3, 13, 19, 39, 7, 72, 42,
+ 30, 16, 22, 0, 13, 23, 47, 8, 76, 58,
+ 48, 36, 30, 10, 1, 13, 31, 13, 78, 54,
+ 28, 6, 16, 9, 27, 43, 10, 100, 78, 62,
+ 46, 42, 10, 5, 17, 25, 124, 57, 51, 25,
+ 55, 53, 39, 45, 39, 39, 41, 39, 41, 43,
+ 37, 43, 39, 13, 33, 25, 23, 21, 9, 7,
+ 7, 5, 2, 3, 9, 17, 24, 20, 28, 14,
+ 6, 26, 20, 12, 18, 18, 14, 10, 2, 7,
+ 13, 5, 14, 25, 18, 46, 16, 14, 30, 30,
+ 30, 20, 26, 24, 36, 0, 7, 13, 76, 76,
+ 86, 66, 64, 82, 88, 88, 96, 90, 104, 102,
+ 86, 70, 24, 80, 104, 76, 74, 74, 64, 46,
+ 36, 30, 12, 7, 11, 25, 47, 70, 68, 70,
+ 58, 38, 44, 32, 18, 16, 10, 0, 15, 15,
+ 37, 43, 9, 11, 39, 34, 38, 26, 6, 22,
+ 20, 1, 12, 10, 5, 27, 25, 41, 67, 22,
+ 2, 19, 21, 0, 0, 6, 12, 12, 22, 14,
+ 20, 24, 46, 28, 28, 44, 44, 112, 90, 74,
+ 58, 44, 26, 4, 21, 67, 8, 72, 58, 54,
+ 42, 40, 20, 14, 12, 7, 31, 21, 5, 19,
+ 4, 20, 11, 17, 4, 12, 8, 12, 24, 8,
+ 4, 108, 84, 58, 34, 14, 11, 35, 57, 85,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 18 */
+
+ 112, 14, 27, 112, 14, 27, 63, 29, 34, 22,
+ 10, 10, 72, 80, 106, 28, 25, 2, 0, 22,
+ 0, 2, 9, 11, 41, 20, 62, 65, 89, 101,
+ 23, 35, 9, 0, 22, 0, 31, 23, 18, 16,
+ 3, 21, 39, 4, 21, 33, 53, 10, 17, 37,
+ 8, 19, 27, 49, 8, 9, 11, 21, 1, 8,
+ 44, 0, 0, 0, 17, 53, 67, 1, 16, 3,
+ 36, 19, 57, 63, 5, 17, 44, 26, 15, 3,
+ 43, 7, 35, 29, 61, 41, 39, 49, 59, 42,
+ 0, 16, 37, 13, 35, 29, 73, 8, 5, 3,
+ 23, 34, 9, 1, 31, 37, 15, 19, 11, 14,
+ 16, 1, 24, 30, 25, 1, 3, 7, 8, 19,
+ 32, 9, 2, 32, 36, 52, 40, 26, 35, 3,
+ 13, 6, 8, 57, 0, 16, 10, 10, 14, 14,
+ 30, 18, 6, 4, 2, 4, 18, 16, 37, 7,
+ 2, 19, 12, 12, 28, 14, 6, 18, 12, 8,
+ 12, 28, 10, 25, 6, 31, 46, 42, 32, 28,
+ 34, 34, 20, 32, 40, 1, 22, 18, 12, 6,
+ 7, 12, 0, 6, 10, 4, 12, 24, 2, 33,
+ 2, 21, 2, 16, 37, 56, 74, 58, 56, 54,
+ 52, 48, 48, 36, 20, 14, 8, 4, 3, 51,
+ 9, 1, 27, 30, 22, 16, 20, 10, 0, 0,
+ 0, 23, 11, 21, 57, 45, 67, 11, 44, 36,
+ 22, 6, 14, 3, 13, 19, 37, 5, 72, 42,
+ 30, 16, 22, 0, 11, 21, 43, 8, 76, 58,
+ 48, 36, 30, 10, 1, 13, 29, 13, 78, 54,
+ 28, 6, 16, 9, 27, 41, 10, 98, 76, 60,
+ 44, 42, 10, 5, 17, 25, 124, 55, 49, 23,
+ 53, 51, 37, 43, 37, 37, 39, 37, 37, 41,
+ 35, 43, 37, 11, 33, 27, 23, 19, 9, 7,
+ 9, 5, 0, 5, 11, 19, 22, 20, 28, 14,
+ 4, 24, 20, 12, 16, 16, 14, 10, 0, 7,
+ 13, 5, 14, 25, 16, 44, 14, 14, 28, 28,
+ 30, 18, 24, 24, 36, 1, 9, 17, 74, 74,
+ 84, 64, 62, 80, 84, 84, 92, 86, 98, 96,
+ 80, 66, 22, 74, 98, 70, 66, 68, 60, 42,
+ 34, 28, 12, 5, 11, 23, 43, 66, 64, 66,
+ 54, 34, 40, 28, 14, 14, 6, 1, 19, 17,
+ 39, 45, 11, 11, 41, 32, 36, 22, 2, 18,
+ 16, 3, 8, 6, 7, 29, 27, 43, 69, 20,
+ 0, 21, 21, 0, 0, 8, 12, 14, 24, 16,
+ 22, 26, 48, 30, 30, 46, 46, 110, 86, 70,
+ 54, 38, 20, 1, 27, 71, 8, 72, 58, 54,
+ 42, 42, 20, 14, 12, 7, 31, 21, 3, 19,
+ 6, 22, 11, 15, 6, 14, 8, 12, 26, 8,
+ 4, 106, 80, 54, 28, 8, 19, 43, 63, 91,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 19 */
+
+ 110, 14, 27, 110, 14, 27, 59, 25, 36, 22,
+ 8, 6, 68, 78, 106, 28, 21, 2, 2, 24,
+ 0, 0, 9, 13, 43, 20, 58, 67, 91, 101,
+ 17, 33, 9, 2, 24, 0, 29, 21, 20, 16,
+ 3, 19, 37, 2, 23, 33, 53, 10, 17, 37,
+ 8, 19, 27, 49, 10, 9, 11, 21, 0, 8,
+ 44, 0, 0, 0, 17, 53, 67, 0, 14, 3,
+ 36, 19, 57, 59, 1, 15, 48, 30, 13, 1,
+ 39, 5, 33, 27, 57, 39, 39, 47, 57, 42,
+ 0, 16, 37, 13, 35, 27, 69, 8, 7, 5,
+ 25, 34, 9, 1, 29, 37, 13, 19, 9, 14,
+ 16, 1, 24, 28, 25, 1, 3, 7, 8, 19,
+ 30, 9, 2, 30, 34, 50, 38, 24, 35, 1,
+ 13, 4, 8, 55, 0, 16, 10, 10, 14, 14,
+ 32, 18, 6, 4, 4, 4, 16, 16, 39, 7,
+ 2, 19, 10, 10, 28, 14, 4, 18, 12, 8,
+ 8, 28, 8, 25, 6, 31, 44, 42, 32, 28,
+ 34, 34, 20, 32, 40, 1, 20, 18, 12, 6,
+ 7, 10, 1, 4, 8, 2, 10, 22, 2, 33,
+ 0, 21, 0, 12, 37, 54, 70, 54, 52, 50,
+ 48, 44, 44, 32, 16, 12, 6, 0, 7, 53,
+ 9, 1, 27, 28, 20, 14, 16, 8, 3, 1,
+ 1, 25, 11, 21, 57, 45, 67, 9, 44, 36,
+ 22, 6, 14, 3, 13, 17, 35, 5, 72, 42,
+ 30, 16, 24, 2, 9, 19, 39, 8, 76, 58,
+ 48, 36, 32, 10, 1, 13, 29, 13, 78, 54,
+ 26, 6, 16, 9, 27, 39, 10, 98, 76, 58,
+ 42, 42, 10, 5, 15, 23, 124, 53, 47, 23,
+ 51, 49, 35, 39, 35, 35, 37, 33, 35, 41,
+ 33, 43, 37, 7, 33, 27, 23, 19, 9, 9,
+ 9, 5, 1, 7, 13, 21, 22, 18, 28, 12,
+ 4, 24, 18, 12, 14, 16, 12, 10, 1, 7,
+ 15, 5, 14, 27, 16, 44, 12, 14, 26, 28,
+ 30, 16, 22, 22, 34, 3, 9, 19, 72, 72,
+ 84, 62, 60, 76, 80, 80, 88, 80, 94, 92,
+ 74, 60, 20, 68, 92, 64, 58, 62, 54, 38,
+ 32, 26, 12, 3, 11, 21, 39, 62, 60, 62,
+ 50, 30, 36, 26, 10, 10, 4, 3, 21, 21,
+ 41, 47, 11, 13, 43, 30, 32, 20, 1, 14,
+ 12, 7, 6, 2, 11, 31, 31, 47, 71, 18,
+ 1, 23, 21, 2, 0, 8, 14, 14, 26, 18,
+ 24, 28, 50, 32, 32, 50, 48, 110, 84, 66,
+ 48, 34, 14, 7, 33, 75, 10, 74, 60, 56,
+ 42, 44, 22, 14, 14, 5, 31, 19, 3, 17,
+ 8, 24, 11, 15, 6, 14, 8, 14, 28, 8,
+ 4, 104, 76, 48, 22, 2, 27, 51, 71, 95,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 20 */
+
+ 106, 14, 27, 106, 14, 27, 57, 23, 36, 22,
+ 8, 4, 66, 74, 106, 28, 15, 0, 4, 24,
+ 0, 1, 9, 15, 45, 18, 54, 71, 93, 103,
+ 11, 31, 9, 4, 24, 0, 29, 19, 20, 14,
+ 3, 19, 37, 2, 23, 31, 53, 10, 17, 35,
+ 6, 21, 27, 49, 10, 7, 11, 19, 0, 8,
+ 44, 0, 0, 0, 15, 55, 67, 0, 12, 3,
+ 36, 19, 57, 57, 0, 11, 50, 32, 11, 0,
+ 37, 1, 31, 25, 53, 39, 37, 47, 55, 42,
+ 0, 16, 35, 13, 33, 27, 67, 6, 7, 5,
+ 25, 34, 9, 1, 29, 37, 13, 19, 9, 14,
+ 14, 1, 24, 28, 25, 1, 3, 7, 8, 19,
+ 30, 11, 2, 30, 32, 48, 36, 22, 33, 1,
+ 13, 2, 6, 55, 1, 16, 8, 10, 14, 14,
+ 34, 20, 6, 2, 4, 2, 14, 14, 41, 7,
+ 2, 21, 10, 8, 26, 12, 4, 20, 12, 8,
+ 6, 26, 6, 25, 6, 33, 44, 42, 32, 28,
+ 34, 34, 20, 32, 38, 1, 20, 18, 12, 6,
+ 9, 10, 1, 4, 8, 2, 10, 20, 0, 33,
+ 0, 21, 1, 10, 37, 50, 66, 50, 48, 46,
+ 44, 40, 40, 28, 12, 8, 2, 5, 11, 55,
+ 9, 3, 29, 26, 18, 12, 14, 4, 5, 3,
+ 5, 25, 13, 23, 57, 45, 65, 9, 44, 36,
+ 22, 6, 14, 3, 13, 17, 33, 3, 72, 42,
+ 30, 16, 24, 2, 9, 19, 35, 8, 76, 58,
+ 48, 36, 32, 10, 1, 13, 27, 13, 78, 52,
+ 26, 6, 16, 9, 27, 39, 10, 96, 74, 56,
+ 40, 42, 10, 5, 15, 23, 124, 51, 45, 21,
+ 47, 47, 33, 37, 31, 33, 33, 31, 31, 39,
+ 33, 41, 35, 5, 31, 29, 23, 17, 9, 9,
+ 11, 7, 3, 9, 15, 23, 20, 18, 28, 12,
+ 2, 22, 18, 12, 12, 14, 12, 8, 3, 9,
+ 15, 5, 14, 27, 14, 42, 10, 14, 24, 26,
+ 30, 14, 20, 22, 34, 5, 11, 23, 70, 72,
+ 82, 60, 56, 74, 76, 76, 84, 76, 88, 86,
+ 68, 56, 16, 62, 84, 58, 50, 58, 50, 34,
+ 30, 22, 12, 1, 9, 19, 35, 60, 56, 58,
+ 48, 26, 32, 22, 6, 8, 0, 7, 25, 23,
+ 43, 49, 13, 13, 45, 26, 30, 16, 3, 10,
+ 8, 9, 2, 1, 13, 33, 33, 49, 71, 16,
+ 1, 25, 21, 2, 2, 10, 14, 16, 28, 18,
+ 26, 30, 52, 34, 34, 52, 50, 108, 80, 62,
+ 44, 28, 8, 13, 39, 81, 10, 74, 60, 56,
+ 44, 44, 22, 16, 14, 5, 29, 19, 1, 17,
+ 10, 26, 11, 13, 8, 16, 8, 14, 30, 8,
+ 4, 100, 72, 44, 16, 3, 35, 59, 77, 101,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 21 */
+
+ 104, 14, 27, 104, 14, 27, 53, 19, 36, 22,
+ 6, 0, 62, 72, 106, 28, 11, 0, 6, 26,
+ 0, 1, 7, 17, 47, 18, 50, 75, 95, 103,
+ 5, 27, 9, 6, 26, 0, 29, 17, 20, 14,
+ 3, 19, 35, 2, 25, 31, 53, 10, 17, 33,
+ 6, 21, 27, 49, 12, 7, 9, 19, 2, 8,
+ 44, 0, 0, 0, 15, 55, 67, 2, 10, 3,
+ 36, 17, 57, 53, 4, 9, 54, 36, 7, 2,
+ 35, 0, 29, 23, 49, 37, 37, 45, 53, 42,
+ 0, 16, 35, 13, 33, 25, 63, 6, 7, 5,
+ 25, 34, 9, 0, 27, 37, 13, 19, 7, 14,
+ 14, 1, 24, 28, 23, 1, 1, 7, 8, 19,
+ 28, 11, 2, 28, 30, 48, 34, 20, 31, 0,
+ 13, 0, 6, 53, 1, 16, 8, 10, 14, 14,
+ 36, 20, 6, 2, 4, 2, 12, 14, 43, 7,
+ 2, 21, 8, 6, 26, 12, 2, 20, 12, 8,
+ 4, 26, 4, 25, 6, 33, 42, 42, 32, 28,
+ 34, 34, 20, 32, 38, 1, 20, 18, 12, 6,
+ 9, 10, 1, 2, 8, 2, 8, 18, 0, 33,
+ 1, 21, 3, 8, 37, 48, 62, 48, 46, 42,
+ 40, 36, 36, 24, 8, 4, 1, 9, 13, 57,
+ 9, 3, 29, 24, 16, 10, 12, 2, 9, 5,
+ 7, 27, 13, 23, 57, 45, 65, 9, 44, 36,
+ 22, 6, 14, 1, 11, 15, 31, 3, 72, 42,
+ 30, 16, 24, 4, 7, 17, 31, 8, 76, 58,
+ 48, 36, 32, 10, 1, 13, 25, 13, 80, 52,
+ 24, 6, 16, 9, 27, 37, 10, 96, 72, 54,
+ 38, 42, 10, 5, 15, 21, 124, 49, 43, 19,
+ 45, 45, 31, 35, 29, 31, 31, 27, 27, 37,
+ 31, 41, 35, 3, 31, 29, 21, 15, 9, 11,
+ 11, 7, 3, 11, 17, 25, 18, 16, 28, 10,
+ 2, 22, 18, 12, 10, 12, 12, 8, 5, 9,
+ 15, 5, 14, 27, 12, 42, 10, 14, 22, 24,
+ 30, 12, 18, 20, 32, 7, 13, 27, 68, 70,
+ 82, 58, 54, 70, 72, 72, 80, 70, 82, 82,
+ 62, 52, 14, 56, 78, 52, 42, 52, 44, 30,
+ 28, 20, 12, 0, 9, 17, 31, 56, 54, 54,
+ 44, 22, 28, 20, 2, 6, 3, 9, 29, 27,
+ 45, 51, 13, 13, 47, 24, 26, 12, 7, 8,
+ 6, 13, 0, 3, 17, 35, 35, 51, 73, 16,
+ 3, 27, 19, 4, 2, 10, 16, 18, 30, 20,
+ 28, 32, 56, 36, 36, 54, 54, 108, 76, 58,
+ 38, 24, 2, 19, 45, 85, 10, 76, 62, 58,
+ 44, 46, 22, 16, 16, 3, 29, 19, 0, 15,
+ 12, 28, 9, 11, 8, 16, 8, 16, 32, 8,
+ 4, 98, 68, 38, 10, 9, 41, 67, 85, 107,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 22 */
+
+ 102, 14, 29, 102, 14, 29, 49, 17, 38, 22,
+ 6, 1, 60, 70, 106, 28, 7, 0, 6, 28,
+ 0, 3, 7, 19, 49, 16, 48, 77, 97, 105,
+ 0, 25, 9, 6, 28, 0, 27, 15, 22, 12,
+ 1, 17, 33, 0, 25, 31, 53, 10, 15, 33,
+ 6, 21, 25, 49, 12, 7, 9, 19, 2, 8,
+ 44, 0, 0, 0, 13, 55, 67, 2, 10, 5,
+ 36, 17, 55, 49, 8, 5, 56, 40, 5, 4,
+ 31, 2, 27, 23, 47, 37, 37, 45, 53, 44,
+ 0, 16, 33, 11, 31, 25, 61, 6, 9, 7,
+ 27, 34, 9, 0, 27, 37, 11, 19, 7, 14,
+ 14, 0, 24, 26, 23, 1, 1, 7, 8, 19,
+ 26, 11, 2, 26, 28, 46, 34, 20, 31, 0,
+ 13, 0, 4, 53, 3, 16, 8, 10, 14, 14,
+ 36, 20, 6, 2, 6, 2, 10, 14, 43, 9,
+ 2, 23, 8, 4, 26, 12, 0, 20, 12, 8,
+ 0, 26, 2, 25, 6, 35, 40, 42, 30, 28,
+ 34, 34, 20, 32, 38, 1, 18, 18, 10, 6,
+ 9, 8, 3, 2, 6, 0, 6, 16, 1, 33,
+ 1, 19, 5, 4, 37, 44, 58, 44, 42, 38,
+ 36, 32, 32, 20, 4, 2, 3, 13, 17, 59,
+ 9, 3, 31, 22, 14, 8, 8, 1, 11, 7,
+ 9, 29, 15, 25, 57, 45, 63, 7, 44, 36,
+ 22, 8, 14, 1, 11, 15, 29, 1, 70, 42,
+ 30, 16, 26, 4, 5, 15, 27, 8, 78, 58,
+ 48, 36, 34, 10, 1, 11, 25, 13, 80, 52,
+ 24, 6, 16, 7, 25, 35, 10, 94, 72, 52,
+ 38, 42, 10, 3, 13, 21, 124, 49, 41, 19,
+ 43, 43, 29, 31, 27, 29, 29, 25, 25, 37,
+ 29, 41, 33, 0, 31, 31, 21, 15, 9, 11,
+ 13, 7, 5, 13, 19, 27, 18, 16, 28, 10,
+ 0, 20, 16, 12, 10, 12, 10, 8, 5, 9,
+ 17, 5, 14, 29, 12, 40, 8, 12, 20, 24,
+ 32, 12, 16, 20, 32, 7, 13, 29, 66, 68,
+ 80, 56, 52, 68, 68, 68, 76, 66, 78, 76,
+ 56, 46, 12, 52, 72, 46, 34, 46, 40, 26,
+ 26, 18, 12, 2, 9, 13, 25, 52, 50, 50,
+ 40, 18, 24, 16, 1, 2, 5, 11, 31, 29,
+ 47, 53, 15, 15, 49, 22, 24, 10, 11, 4,
+ 2, 15, 3, 7, 19, 39, 39, 55, 75, 14,
+ 5, 29, 19, 4, 2, 12, 16, 18, 34, 22,
+ 30, 34, 58, 40, 38, 58, 56, 106, 74, 54,
+ 34, 18, 1, 25, 49, 89, 12, 76, 62, 58,
+ 44, 48, 24, 16, 16, 3, 29, 17, 0, 15,
+ 14, 30, 9, 11, 10, 18, 8, 16, 34, 8,
+ 4, 96, 64, 34, 4, 17, 49, 73, 91, 111,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 23 */
+
+ 100, 14, 29, 100, 14, 29, 45, 13, 38, 22,
+ 4, 5, 56, 66, 106, 28, 1, 1, 8, 28,
+ 0, 5, 7, 21, 51, 16, 44, 81, 99, 105,
+ 6, 23, 9, 8, 28, 0, 27, 13, 22, 12,
+ 1, 17, 33, 0, 27, 29, 53, 10, 15, 31,
+ 4, 21, 25, 49, 14, 5, 9, 17, 4, 8,
+ 44, 0, 0, 0, 13, 55, 67, 4, 8, 5,
+ 36, 17, 55, 47, 12, 3, 60, 44, 3, 6,
+ 29, 6, 25, 21, 43, 35, 35, 43, 51, 44,
+ 0, 16, 33, 11, 31, 23, 57, 4, 9, 7,
+ 27, 34, 9, 0, 25, 37, 11, 19, 5, 14,
+ 12, 0, 24, 26, 23, 1, 1, 7, 8, 19,
+ 26, 11, 2, 26, 26, 44, 32, 18, 29, 2,
+ 13, 1, 4, 51, 3, 16, 6, 10, 14, 14,
+ 38, 22, 6, 2, 6, 2, 8, 14, 45, 9,
+ 2, 23, 6, 2, 24, 12, 0, 22, 12, 8,
+ 1, 24, 0, 25, 6, 35, 40, 42, 30, 28,
+ 34, 34, 20, 32, 38, 1, 18, 18, 10, 6,
+ 9, 8, 3, 0, 6, 0, 6, 14, 1, 33,
+ 3, 19, 7, 2, 37, 42, 54, 40, 38, 34,
+ 32, 28, 28, 16, 0, 1, 7, 19, 21, 61,
+ 9, 5, 31, 20, 12, 6, 6, 3, 15, 9,
+ 11, 29, 15, 25, 57, 45, 63, 7, 44, 36,
+ 22, 8, 14, 1, 11, 13, 27, 1, 70, 42,
+ 30, 16, 26, 6, 3, 15, 23, 8, 78, 58,
+ 48, 36, 34, 10, 1, 11, 23, 13, 80, 50,
+ 22, 6, 16, 7, 25, 35, 10, 94, 70, 50,
+ 36, 42, 10, 3, 13, 19, 124, 47, 39, 17,
+ 39, 41, 27, 29, 23, 27, 25, 21, 21, 35,
+ 27, 39, 33, 2, 29, 31, 21, 13, 9, 13,
+ 13, 9, 7, 15, 21, 29, 16, 14, 28, 8,
+ 0, 20, 16, 12, 8, 10, 10, 6, 7, 9,
+ 17, 5, 14, 29, 10, 40, 6, 12, 18, 22,
+ 32, 10, 14, 18, 30, 9, 15, 33, 64, 68,
+ 80, 54, 48, 64, 64, 64, 72, 60, 72, 72,
+ 50, 42, 8, 46, 64, 40, 26, 42, 34, 22,
+ 24, 16, 12, 4, 7, 11, 21, 50, 46, 46,
+ 38, 14, 20, 14, 5, 0, 9, 15, 35, 33,
+ 49, 55, 15, 15, 51, 18, 20, 6, 13, 0,
+ 1, 19, 5, 11, 23, 41, 41, 57, 75, 12,
+ 5, 31, 19, 6, 4, 12, 18, 20, 36, 24,
+ 32, 36, 60, 42, 40, 60, 58, 106, 70, 50,
+ 28, 14, 7, 31, 55, 93, 12, 78, 64, 60,
+ 46, 50, 24, 18, 18, 1, 27, 17, 2, 13,
+ 16, 32, 9, 9, 10, 18, 8, 18, 36, 8,
+ 4, 92, 60, 28, 1, 23, 57, 81, 99, 117,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 24 */
+
+ 96, 12, 29, 96, 12, 29, 43, 11, 38, 22,
+ 4, 7, 54, 64, 106, 28, 2, 1, 10, 30,
+ 0, 7, 7, 23, 55, 14, 40, 85, 101, 107,
+ 10, 21, 9, 10, 30, 0, 27, 11, 22, 10,
+ 1, 17, 31, 1, 27, 29, 55, 10, 15, 31,
+ 4, 23, 25, 49, 14, 5, 9, 17, 4, 8,
+ 44, 0, 0, 0, 11, 57, 67, 4, 6, 5,
+ 36, 17, 55, 43, 14, 0, 62, 46, 1, 8,
+ 27, 8, 23, 19, 39, 35, 35, 43, 49, 44,
+ 0, 16, 31, 11, 29, 23, 55, 4, 11, 9,
+ 29, 34, 9, 0, 25, 37, 11, 19, 5, 12,
+ 12, 0, 24, 24, 23, 1, 1, 7, 6, 19,
+ 24, 13, 2, 24, 22, 42, 30, 16, 29, 2,
+ 13, 3, 2, 51, 5, 14, 6, 10, 14, 14,
+ 40, 22, 6, 0, 6, 0, 6, 12, 47, 9,
+ 2, 25, 6, 0, 24, 10, 1, 22, 12, 8,
+ 5, 24, 1, 25, 6, 37, 38, 40, 30, 28,
+ 34, 32, 18, 32, 36, 3, 16, 18, 10, 4,
+ 11, 6, 5, 0, 4, 1, 4, 12, 3, 33,
+ 3, 19, 9, 1, 37, 38, 50, 36, 34, 30,
+ 26, 24, 24, 12, 3, 5, 11, 23, 25, 63,
+ 9, 5, 33, 18, 8, 2, 2, 7, 17, 13,
+ 15, 31, 17, 27, 57, 45, 61, 7, 44, 36,
+ 22, 8, 14, 1, 11, 13, 25, 0, 70, 42,
+ 30, 16, 26, 6, 3, 13, 21, 8, 78, 58,
+ 48, 34, 34, 10, 1, 11, 23, 13, 80, 50,
+ 22, 6, 16, 7, 25, 33, 10, 92, 68, 48,
+ 34, 40, 10, 3, 13, 19, 124, 45, 37, 17,
+ 37, 39, 27, 27, 21, 25, 23, 19, 19, 35,
+ 27, 39, 31, 4, 29, 33, 21, 13, 9, 13,
+ 15, 9, 9, 17, 25, 31, 14, 14, 28, 8,
+ 1, 18, 14, 10, 6, 8, 8, 6, 9, 11,
+ 19, 5, 14, 31, 8, 38, 4, 12, 16, 20,
+ 32, 8, 12, 18, 30, 11, 17, 37, 62, 66,
+ 78, 50, 46, 62, 60, 60, 66, 56, 66, 66,
+ 44, 36, 6, 40, 58, 34, 18, 36, 30, 18,
+ 20, 12, 12, 4, 7, 9, 17, 46, 42, 42,
+ 34, 8, 16, 10, 9, 3, 13, 17, 39, 35,
+ 51, 57, 17, 17, 53, 16, 18, 2, 17, 3,
+ 5, 21, 9, 15, 25, 43, 45, 61, 77, 10,
+ 7, 33, 19, 6, 4, 14, 18, 20, 38, 24,
+ 34, 38, 62, 44, 42, 62, 60, 104, 66, 46,
+ 24, 8, 13, 37, 61, 99, 12, 78, 64, 60,
+ 46, 50, 24, 18, 18, 1, 27, 17, 2, 13,
+ 18, 34, 9, 9, 12, 20, 8, 18, 36, 8,
+ 2, 90, 56, 24, 7, 29, 65, 89, 105, 123,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 25 */
+
+ 94, 12, 29, 94, 12, 29, 39, 9, 40, 22,
+ 4, 9, 52, 62, 106, 28, 6, 1, 12, 32,
+ 0, 7, 5, 25, 57, 12, 36, 87, 103, 109,
+ 16, 17, 9, 12, 32, 0, 25, 9, 24, 8,
+ 1, 15, 29, 1, 27, 29, 55, 10, 15, 29,
+ 4, 23, 25, 49, 14, 5, 7, 17, 4, 8,
+ 44, 0, 0, 0, 9, 57, 67, 4, 4, 5,
+ 36, 15, 55, 39, 18, 4, 64, 50, 2, 10,
+ 23, 10, 19, 17, 35, 35, 35, 43, 47, 44,
+ 0, 16, 29, 11, 27, 21, 53, 4, 11, 9,
+ 29, 34, 9, 2, 23, 35, 9, 19, 5, 12,
+ 12, 0, 24, 24, 21, 1, 0, 7, 6, 17,
+ 22, 13, 2, 22, 20, 42, 28, 14, 27, 2,
+ 11, 5, 0, 51, 7, 14, 6, 12, 14, 14,
+ 42, 22, 6, 0, 8, 0, 6, 12, 49, 9,
+ 2, 27, 6, 1, 24, 10, 3, 22, 12, 8,
+ 7, 24, 1, 25, 6, 39, 36, 40, 30, 28,
+ 34, 32, 18, 32, 36, 3, 16, 18, 10, 4,
+ 11, 6, 5, 0, 4, 1, 2, 12, 3, 33,
+ 3, 19, 11, 3, 37, 34, 48, 34, 32, 26,
+ 22, 22, 22, 8, 7, 7, 13, 27, 27, 65,
+ 9, 5, 35, 16, 6, 0, 0, 9, 19, 15,
+ 17, 33, 19, 29, 57, 43, 59, 5, 46, 36,
+ 22, 8, 16, 0, 9, 11, 23, 2, 70, 42,
+ 30, 16, 28, 6, 1, 11, 17, 8, 78, 58,
+ 48, 34, 36, 10, 1, 11, 21, 13, 82, 50,
+ 22, 6, 18, 7, 25, 31, 10, 92, 68, 48,
+ 32, 40, 10, 3, 11, 17, 124, 43, 35, 15,
+ 35, 35, 25, 23, 19, 21, 21, 17, 15, 33,
+ 25, 39, 29, 8, 29, 35, 19, 11, 9, 13,
+ 17, 9, 9, 19, 27, 33, 14, 14, 28, 8,
+ 1, 16, 14, 10, 4, 8, 8, 6, 11, 11,
+ 19, 3, 16, 31, 8, 36, 4, 12, 16, 20,
+ 32, 6, 10, 18, 30, 13, 17, 39, 62, 64,
+ 76, 48, 44, 60, 56, 56, 62, 52, 62, 60,
+ 40, 32, 4, 34, 52, 28, 10, 30, 26, 16,
+ 18, 10, 12, 6, 7, 7, 13, 42, 40, 38,
+ 30, 4, 14, 8, 13, 5, 15, 19, 41, 37,
+ 53, 57, 17, 17, 55, 14, 16, 0, 21, 5,
+ 7, 23, 11, 17, 27, 45, 47, 63, 79, 10,
+ 9, 33, 17, 6, 4, 16, 20, 22, 40, 26,
+ 36, 40, 66, 46, 44, 66, 64, 102, 64, 42,
+ 20, 2, 19, 43, 67, 103, 14, 78, 64, 62,
+ 46, 52, 26, 18, 20, 0, 27, 15, 4, 13,
+ 20, 38, 7, 7, 14, 22, 8, 20, 38, 8,
+ 2, 88, 52, 20, 11, 35, 71, 97, 111, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 26 */
+
+ 92, 12, 29, 92, 12, 29, 35, 5, 40, 22,
+ 2, 13, 48, 58, 106, 28, 12, 3, 14, 32,
+ 0, 9, 5, 27, 59, 12, 32, 91, 105, 109,
+ 22, 15, 9, 14, 32, 0, 25, 7, 24, 8,
+ 1, 15, 29, 1, 29, 27, 55, 10, 15, 27,
+ 2, 23, 25, 49, 16, 3, 7, 15, 6, 8,
+ 44, 0, 0, 0, 9, 57, 67, 6, 2, 5,
+ 36, 15, 55, 37, 22, 6, 68, 54, 4, 12,
+ 21, 14, 17, 15, 31, 33, 33, 41, 45, 44,
+ 0, 16, 29, 11, 27, 21, 49, 2, 11, 9,
+ 29, 34, 9, 2, 23, 35, 9, 19, 3, 12,
+ 10, 0, 24, 24, 21, 1, 0, 7, 6, 17,
+ 22, 13, 2, 22, 18, 40, 26, 12, 25, 4,
+ 11, 7, 0, 49, 7, 14, 4, 12, 14, 14,
+ 44, 24, 6, 0, 8, 0, 4, 12, 51, 9,
+ 2, 27, 4, 3, 22, 10, 3, 24, 12, 8,
+ 9, 22, 3, 25, 6, 39, 36, 40, 30, 28,
+ 34, 32, 18, 32, 36, 3, 16, 18, 10, 4,
+ 11, 6, 5, 1, 4, 1, 2, 10, 5, 33,
+ 5, 19, 13, 5, 37, 32, 44, 30, 28, 22,
+ 18, 18, 18, 4, 11, 11, 17, 33, 31, 67,
+ 9, 7, 35, 14, 4, 1, 1, 13, 23, 17,
+ 19, 33, 19, 29, 57, 43, 59, 5, 46, 36,
+ 22, 8, 16, 0, 9, 11, 21, 2, 70, 42,
+ 30, 16, 28, 8, 0, 11, 13, 8, 78, 58,
+ 48, 34, 36, 10, 1, 11, 19, 13, 82, 48,
+ 20, 6, 18, 7, 25, 31, 10, 90, 66, 46,
+ 30, 40, 10, 3, 11, 17, 124, 41, 33, 13,
+ 31, 33, 23, 21, 15, 19, 17, 13, 11, 31,
+ 23, 37, 29, 10, 27, 35, 19, 9, 9, 15,
+ 17, 11, 11, 21, 29, 35, 12, 12, 28, 6,
+ 3, 16, 14, 10, 2, 6, 8, 4, 13, 11,
+ 19, 3, 16, 31, 6, 36, 2, 12, 14, 18,
+ 32, 4, 8, 16, 28, 15, 19, 43, 60, 64,
+ 76, 46, 40, 56, 52, 52, 58, 46, 56, 56,
+ 34, 28, 0, 28, 44, 22, 2, 26, 20, 12,
+ 16, 8, 12, 8, 5, 5, 9, 40, 36, 34,
+ 28, 0, 10, 4, 17, 7, 19, 23, 45, 41,
+ 55, 59, 19, 17, 57, 10, 12, 3, 23, 9,
+ 11, 27, 15, 21, 31, 47, 49, 65, 79, 8,
+ 9, 35, 17, 8, 6, 16, 20, 24, 42, 28,
+ 38, 42, 68, 48, 46, 68, 66, 102, 60, 38,
+ 14, 1, 25, 49, 73, 107, 14, 80, 66, 62,
+ 48, 54, 26, 20, 20, 0, 25, 15, 6, 11,
+ 22, 40, 7, 5, 14, 22, 8, 20, 40, 8,
+ 2, 84, 48, 14, 17, 41, 79, 105, 119, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 27 */
+
+ 90, 12, 31, 90, 12, 31, 31, 3, 42, 22,
+ 2, 15, 46, 56, 106, 28, 16, 3, 14, 34,
+ 0, 11, 5, 29, 61, 10, 30, 93, 107, 111,
+ 28, 13, 9, 14, 34, 0, 23, 5, 26, 6,
+ 0, 13, 27, 3, 29, 27, 55, 10, 13, 27,
+ 2, 23, 23, 49, 16, 3, 7, 15, 6, 8,
+ 44, 0, 0, 0, 7, 57, 67, 6, 2, 7,
+ 36, 15, 53, 33, 26, 10, 70, 58, 6, 14,
+ 17, 16, 15, 15, 29, 33, 33, 41, 45, 46,
+ 0, 16, 27, 9, 25, 19, 47, 2, 13, 11,
+ 31, 34, 9, 2, 21, 35, 7, 19, 3, 12,
+ 10, 2, 24, 22, 21, 1, 0, 7, 6, 17,
+ 20, 13, 2, 20, 16, 38, 26, 12, 25, 4,
+ 11, 7, 1, 49, 9, 14, 4, 12, 14, 14,
+ 44, 24, 6, 0, 10, 0, 2, 12, 51, 11,
+ 2, 29, 4, 5, 22, 10, 5, 24, 12, 8,
+ 13, 22, 5, 25, 6, 41, 34, 40, 28, 28,
+ 34, 32, 18, 32, 36, 3, 14, 18, 8, 4,
+ 11, 4, 7, 1, 2, 3, 0, 8, 5, 33,
+ 5, 17, 15, 9, 37, 28, 40, 26, 24, 18,
+ 14, 14, 14, 0, 15, 13, 19, 37, 35, 69,
+ 9, 7, 37, 12, 2, 3, 5, 15, 25, 19,
+ 21, 35, 21, 31, 57, 43, 57, 3, 46, 36,
+ 22, 10, 16, 0, 9, 9, 19, 4, 68, 42,
+ 30, 16, 30, 8, 2, 9, 9, 8, 80, 58,
+ 48, 34, 38, 10, 1, 9, 19, 13, 82, 48,
+ 20, 6, 18, 5, 23, 29, 10, 90, 66, 44,
+ 30, 40, 10, 1, 9, 15, 124, 41, 31, 13,
+ 29, 31, 21, 17, 13, 17, 15, 11, 9, 31,
+ 21, 37, 27, 14, 27, 37, 19, 9, 9, 15,
+ 19, 11, 13, 23, 31, 37, 12, 12, 28, 6,
+ 3, 14, 12, 10, 2, 6, 6, 4, 13, 11,
+ 21, 3, 16, 33, 6, 34, 0, 10, 12, 18,
+ 34, 4, 6, 16, 28, 15, 19, 45, 58, 62,
+ 74, 44, 38, 54, 48, 48, 54, 42, 52, 50,
+ 28, 22, 1, 24, 38, 16, 5, 20, 16, 8,
+ 14, 6, 12, 10, 5, 1, 3, 36, 32, 30,
+ 24, 3, 6, 2, 21, 11, 21, 25, 47, 43,
+ 57, 61, 19, 19, 59, 8, 10, 5, 27, 13,
+ 15, 29, 17, 25, 33, 51, 53, 69, 81, 6,
+ 11, 37, 17, 8, 6, 18, 22, 24, 46, 30,
+ 40, 44, 70, 52, 48, 72, 68, 100, 58, 34,
+ 10, 7, 29, 55, 77, 111, 16, 80, 66, 64,
+ 48, 56, 28, 20, 22, 2, 25, 13, 6, 11,
+ 24, 42, 7, 5, 16, 24, 8, 22, 42, 8,
+ 2, 82, 44, 10, 23, 49, 87, 111, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 28 */
+
+ 86, 12, 31, 86, 12, 31, 29, 0, 42, 22,
+ 0, 19, 42, 54, 106, 28, 20, 3, 16, 36,
+ 0, 13, 5, 31, 63, 10, 26, 97, 109, 111,
+ 34, 11, 9, 16, 36, 0, 23, 3, 26, 6,
+ 0, 13, 25, 3, 31, 27, 55, 10, 13, 25,
+ 2, 25, 23, 49, 18, 3, 7, 15, 8, 8,
+ 44, 0, 0, 0, 7, 59, 67, 8, 0, 7,
+ 36, 15, 53, 29, 28, 12, 74, 60, 8, 16,
+ 15, 18, 13, 13, 25, 31, 33, 39, 43, 46,
+ 0, 16, 27, 9, 25, 19, 43, 2, 13, 11,
+ 31, 34, 9, 2, 21, 35, 7, 19, 1, 12,
+ 10, 2, 24, 22, 21, 1, 0, 7, 6, 17,
+ 18, 15, 2, 18, 14, 36, 24, 10, 23, 6,
+ 11, 9, 1, 47, 9, 14, 4, 12, 14, 14,
+ 46, 24, 6, 1, 10, 1, 0, 10, 53, 11,
+ 2, 29, 2, 7, 22, 8, 7, 24, 12, 8,
+ 15, 22, 7, 25, 6, 41, 32, 40, 28, 28,
+ 34, 32, 18, 32, 34, 3, 14, 18, 8, 4,
+ 13, 4, 7, 3, 2, 3, 1, 6, 7, 33,
+ 7, 17, 17, 11, 37, 26, 36, 22, 20, 14,
+ 10, 10, 10, 3, 19, 17, 23, 41, 39, 71,
+ 9, 7, 37, 10, 0, 5, 7, 19, 29, 21,
+ 25, 37, 21, 31, 57, 43, 57, 3, 46, 36,
+ 22, 10, 16, 0, 9, 9, 17, 4, 68, 42,
+ 30, 16, 30, 10, 2, 7, 5, 8, 80, 58,
+ 48, 34, 38, 10, 1, 9, 17, 13, 82, 48,
+ 18, 6, 18, 5, 23, 27, 10, 88, 64, 42,
+ 28, 40, 10, 1, 9, 15, 124, 39, 29, 11,
+ 27, 29, 19, 15, 11, 15, 13, 7, 5, 29,
+ 21, 37, 27, 16, 27, 37, 19, 7, 9, 17,
+ 19, 11, 15, 25, 33, 39, 10, 10, 28, 4,
+ 5, 14, 12, 10, 0, 4, 6, 4, 15, 13,
+ 21, 3, 16, 33, 4, 34, 1, 10, 10, 16,
+ 34, 2, 4, 14, 26, 17, 21, 49, 56, 60,
+ 74, 42, 36, 50, 44, 44, 50, 36, 46, 46,
+ 22, 18, 3, 18, 32, 10, 13, 14, 10, 4,
+ 12, 2, 12, 12, 5, 0, 0, 32, 28, 26,
+ 20, 7, 2, 1, 25, 13, 25, 27, 51, 47,
+ 59, 63, 21, 19, 61, 6, 6, 9, 31, 17,
+ 19, 33, 21, 29, 37, 53, 55, 71, 83, 4,
+ 13, 39, 17, 10, 6, 18, 22, 26, 48, 30,
+ 42, 46, 72, 54, 50, 74, 70, 100, 54, 30,
+ 4, 11, 35, 61, 83, 117, 16, 82, 68, 64,
+ 48, 56, 28, 20, 22, 2, 25, 13, 8, 9,
+ 26, 44, 7, 3, 16, 24, 8, 22, 44, 8,
+ 2, 80, 40, 4, 29, 55, 95, 119, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 29 */
+
+ 84, 12, 31, 84, 12, 31, 25, 2, 42, 22,
+ 0, 21, 40, 50, 106, 28, 26, 5, 18, 36,
+ 0, 13, 3, 33, 65, 8, 22, 101, 111, 113,
+ 40, 7, 9, 18, 36, 0, 23, 1, 26, 4,
+ 0, 13, 25, 3, 31, 25, 55, 10, 13, 23,
+ 0, 25, 23, 49, 18, 1, 5, 13, 8, 8,
+ 44, 0, 0, 0, 5, 59, 67, 8, 1, 7,
+ 36, 13, 53, 27, 32, 16, 76, 64, 12, 18,
+ 13, 22, 11, 11, 21, 31, 31, 39, 41, 46,
+ 0, 16, 25, 9, 23, 17, 41, 0, 13, 11,
+ 31, 34, 9, 4, 19, 35, 7, 19, 1, 12,
+ 8, 2, 24, 22, 19, 1, 2, 7, 6, 17,
+ 18, 15, 2, 18, 12, 36, 22, 8, 21, 6,
+ 11, 11, 3, 47, 11, 14, 2, 12, 14, 14,
+ 48, 26, 6, 1, 10, 1, 1, 10, 55, 11,
+ 2, 31, 2, 9, 20, 8, 7, 26, 12, 8,
+ 17, 20, 9, 25, 6, 43, 32, 40, 28, 28,
+ 34, 32, 18, 32, 34, 3, 14, 18, 8, 4,
+ 13, 4, 7, 3, 2, 3, 1, 4, 7, 33,
+ 7, 17, 19, 13, 37, 22, 32, 20, 18, 10,
+ 6, 6, 6, 7, 23, 21, 27, 47, 41, 73,
+ 9, 9, 39, 8, 1, 7, 9, 21, 31, 23,
+ 27, 37, 23, 33, 57, 43, 55, 3, 46, 36,
+ 22, 10, 16, 2, 7, 7, 15, 6, 68, 42,
+ 30, 16, 30, 10, 4, 7, 1, 8, 80, 58,
+ 48, 34, 38, 10, 1, 9, 15, 13, 84, 46,
+ 18, 6, 18, 5, 23, 27, 10, 88, 62, 40,
+ 26, 40, 10, 1, 9, 13, 124, 37, 27, 9,
+ 23, 27, 17, 13, 7, 13, 9, 5, 1, 27,
+ 19, 35, 25, 18, 25, 39, 17, 5, 9, 17,
+ 21, 13, 15, 27, 35, 41, 8, 10, 28, 4,
+ 5, 12, 12, 10, 1, 2, 6, 2, 17, 13,
+ 21, 3, 16, 33, 2, 32, 1, 10, 8, 14,
+ 34, 0, 2, 14, 26, 19, 23, 53, 54, 60,
+ 72, 40, 32, 48, 40, 40, 46, 32, 40, 40,
+ 16, 14, 7, 12, 24, 4, 21, 10, 6, 0,
+ 10, 0, 12, 14, 3, 2, 4, 30, 26, 22,
+ 18, 11, 1, 3, 29, 15, 29, 31, 55, 49,
+ 61, 65, 21, 19, 63, 2, 4, 13, 33, 19,
+ 21, 35, 23, 31, 39, 55, 57, 73, 83, 4,
+ 13, 41, 15, 10, 8, 20, 24, 28, 50, 32,
+ 44, 48, 76, 56, 52, 76, 74, 98, 50, 26,
+ 0, 17, 41, 67, 89, 121, 16, 82, 68, 66,
+ 50, 58, 28, 22, 24, 4, 23, 13, 10, 9,
+ 28, 46, 5, 1, 18, 26, 8, 24, 46, 8,
+ 2, 76, 36, 0, 35, 61, 101, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 30 */
+
+ 82, 12, 31, 82, 12, 31, 21, 6, 44, 22,
+ 1, 25, 36, 48, 106, 28, 30, 5, 20, 38,
+ 0, 15, 3, 35, 67, 8, 18, 103, 113, 113,
+ 46, 5, 9, 20, 38, 0, 21, 0, 28, 4,
+ 0, 11, 23, 5, 33, 25, 55, 10, 13, 23,
+ 0, 25, 23, 49, 20, 1, 5, 13, 10, 8,
+ 44, 0, 0, 0, 5, 59, 67, 10, 3, 7,
+ 36, 13, 53, 23, 36, 18, 80, 68, 14, 20,
+ 9, 24, 9, 9, 17, 29, 31, 37, 39, 46,
+ 0, 16, 25, 9, 23, 17, 37, 0, 15, 13,
+ 33, 34, 9, 4, 19, 35, 5, 19, 0, 12,
+ 8, 2, 24, 20, 19, 1, 2, 7, 6, 17,
+ 16, 15, 2, 16, 10, 34, 20, 6, 21, 8,
+ 11, 13, 3, 45, 11, 14, 2, 12, 14, 14,
+ 50, 26, 6, 1, 12, 1, 3, 10, 57, 11,
+ 2, 31, 0, 11, 20, 8, 9, 26, 12, 8,
+ 21, 20, 11, 25, 6, 43, 30, 40, 28, 28,
+ 34, 32, 18, 32, 34, 3, 12, 18, 8, 4,
+ 13, 2, 9, 5, 0, 5, 3, 2, 9, 33,
+ 9, 17, 21, 17, 37, 20, 28, 16, 14, 6,
+ 2, 2, 2, 11, 27, 23, 29, 51, 45, 75,
+ 9, 9, 39, 6, 3, 9, 13, 25, 35, 25,
+ 29, 39, 23, 33, 57, 43, 55, 1, 46, 36,
+ 22, 10, 16, 2, 7, 7, 13, 6, 68, 42,
+ 30, 16, 32, 12, 6, 5, 2, 8, 80, 58,
+ 48, 34, 40, 10, 1, 9, 15, 13, 84, 46,
+ 16, 6, 18, 5, 23, 25, 10, 86, 62, 38,
+ 24, 40, 10, 1, 7, 13, 124, 35, 25, 9,
+ 21, 25, 15, 9, 5, 11, 7, 1, 0, 27,
+ 17, 35, 25, 22, 25, 39, 17, 5, 9, 19,
+ 21, 13, 17, 29, 37, 43, 8, 8, 28, 2,
+ 7, 12, 10, 10, 3, 2, 4, 2, 19, 13,
+ 23, 3, 16, 35, 2, 32, 3, 10, 6, 14,
+ 34, 1, 0, 12, 24, 21, 23, 55, 52, 58,
+ 72, 38, 30, 44, 36, 36, 42, 26, 36, 36,
+ 10, 8, 9, 6, 18, 1, 29, 4, 0, 3,
+ 8, 1, 12, 16, 3, 4, 8, 26, 22, 18,
+ 14, 15, 5, 7, 33, 19, 31, 33, 57, 53,
+ 63, 67, 23, 21, 65, 0, 0, 15, 37, 23,
+ 25, 39, 27, 35, 43, 57, 61, 77, 85, 2,
+ 15, 43, 15, 12, 8, 20, 24, 28, 52, 34,
+ 46, 50, 78, 58, 54, 80, 76, 98, 48, 22,
+ 5, 21, 47, 73, 95, 125, 18, 84, 70, 66,
+ 50, 60, 30, 22, 24, 4, 23, 11, 10, 7,
+ 30, 48, 5, 1, 18, 26, 8, 24, 48, 8,
+ 2, 74, 32, 5, 41, 67, 109, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 31 */
+
+ 80, 12, 31, 80, 12, 31, 17, 8, 44, 22,
+ 1, 27, 34, 46, 106, 28, 34, 5, 22, 40,
+ 0, 17, 3, 37, 69, 6, 14, 107, 115, 115,
+ 52, 3, 9, 22, 40, 0, 21, 2, 28, 2,
+ 0, 11, 21, 5, 33, 25, 55, 10, 13, 21,
+ 0, 25, 23, 49, 20, 1, 5, 13, 10, 8,
+ 44, 0, 0, 0, 3, 59, 67, 10, 5, 7,
+ 36, 13, 53, 19, 40, 22, 82, 72, 16, 22,
+ 7, 26, 7, 7, 13, 29, 31, 37, 37, 46,
+ 0, 16, 23, 9, 21, 15, 35, 0, 15, 13,
+ 33, 34, 9, 4, 17, 35, 5, 19, 0, 12,
+ 8, 2, 24, 20, 19, 1, 2, 7, 6, 17,
+ 14, 15, 2, 14, 8, 32, 18, 4, 19, 8,
+ 11, 15, 5, 45, 13, 14, 2, 12, 14, 14,
+ 52, 26, 6, 1, 12, 1, 5, 10, 59, 11,
+ 2, 33, 0, 13, 20, 8, 11, 26, 12, 8,
+ 23, 20, 13, 25, 6, 45, 28, 40, 28, 28,
+ 34, 32, 18, 32, 34, 3, 12, 18, 8, 4,
+ 13, 2, 9, 5, 0, 5, 5, 0, 9, 33,
+ 9, 17, 23, 19, 37, 16, 24, 12, 10, 2,
+ 1, 1, 1, 15, 31, 27, 33, 55, 49, 77,
+ 9, 9, 41, 4, 5, 11, 15, 27, 37, 27,
+ 31, 41, 25, 35, 57, 43, 53, 1, 46, 36,
+ 22, 10, 16, 2, 7, 5, 11, 8, 68, 42,
+ 30, 16, 32, 12, 8, 3, 6, 8, 80, 58,
+ 48, 34, 40, 10, 1, 9, 13, 13, 84, 46,
+ 16, 6, 18, 5, 23, 23, 10, 86, 60, 36,
+ 22, 40, 10, 1, 7, 11, 124, 33, 23, 7,
+ 19, 23, 13, 7, 3, 9, 5, 0, 4, 25,
+ 15, 35, 23, 24, 25, 41, 17, 3, 9, 19,
+ 23, 13, 19, 31, 39, 45, 6, 8, 28, 2,
+ 7, 10, 10, 10, 5, 0, 4, 2, 21, 13,
+ 23, 3, 16, 35, 0, 30, 5, 10, 4, 12,
+ 34, 3, 1, 12, 24, 23, 25, 59, 50, 56,
+ 70, 36, 28, 42, 32, 32, 38, 22, 30, 30,
+ 4, 4, 11, 0, 12, 7, 37, 1, 3, 7,
+ 6, 3, 12, 18, 3, 6, 12, 22, 18, 14,
+ 10, 19, 9, 9, 37, 21, 35, 35, 61, 55,
+ 65, 69, 23, 21, 67, 1, 1, 19, 41, 27,
+ 29, 41, 29, 39, 45, 59, 63, 79, 87, 0,
+ 17, 45, 15, 12, 8, 22, 26, 30, 54, 36,
+ 48, 52, 80, 60, 56, 82, 78, 96, 44, 18,
+ 9, 27, 53, 79, 101, 125, 18, 84, 70, 68,
+ 50, 62, 30, 22, 26, 6, 23, 11, 12, 7,
+ 32, 50, 5, 0, 20, 28, 8, 26, 50, 8,
+ 2, 72, 28, 9, 47, 73, 117, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 32 */
+
+ 76, 10, 33, 76, 10, 33, 15, 10, 44, 22,
+ 3, 31, 30, 42, 104, 28, 38, 7, 22, 40,
+ 1, 19, 3, 41, 73, 4, 10, 111, 117, 117,
+ 56, 1, 11, 22, 40, 1, 21, 4, 28, 0,
+ 0, 11, 21, 7, 35, 25, 57, 10, 13, 21,
+ 1, 27, 23, 49, 20, 1, 5, 13, 10, 6,
+ 44, 0, 0, 0, 3, 61, 67, 10, 7, 9,
+ 36, 13, 53, 17, 42, 24, 84, 74, 18, 24,
+ 5, 28, 5, 7, 11, 29, 31, 37, 37, 46,
+ 0, 16, 23, 9, 21, 15, 33, 1, 17, 15,
+ 35, 34, 9, 4, 17, 35, 5, 19, 0, 10,
+ 6, 2, 22, 18, 19, 3, 2, 7, 4, 17,
+ 12, 17, 0, 12, 4, 30, 16, 2, 19, 8,
+ 11, 17, 7, 45, 15, 12, 0, 12, 14, 14,
+ 52, 26, 6, 3, 12, 3, 7, 8, 61, 13,
+ 0, 35, 1, 15, 18, 6, 13, 26, 12, 8,
+ 27, 18, 15, 25, 6, 47, 26, 38, 26, 28,
+ 34, 30, 16, 32, 32, 5, 10, 18, 6, 2,
+ 15, 0, 11, 7, 1, 7, 7, 1, 11, 33,
+ 11, 17, 25, 23, 37, 12, 20, 8, 6, 1,
+ 7, 5, 5, 19, 35, 31, 37, 61, 53, 81,
+ 11, 11, 43, 2, 9, 15, 19, 31, 41, 31,
+ 35, 43, 27, 37, 57, 43, 53, 1, 46, 36,
+ 22, 10, 16, 2, 7, 5, 9, 8, 66, 42,
+ 30, 16, 32, 12, 8, 3, 8, 6, 80, 58,
+ 46, 32, 40, 10, 1, 9, 13, 13, 84, 44,
+ 14, 4, 18, 5, 23, 23, 10, 84, 58, 34,
+ 20, 38, 10, 1, 7, 11, 124, 33, 21, 7,
+ 17, 21, 13, 5, 1, 7, 3, 2, 6, 25,
+ 15, 35, 23, 26, 25, 43, 17, 3, 9, 21,
+ 25, 15, 21, 33, 43, 49, 4, 6, 28, 0,
+ 9, 8, 8, 8, 7, 1, 2, 0, 23, 15,
+ 25, 3, 16, 37, 1, 28, 7, 8, 2, 10,
+ 34, 5, 5, 10, 22, 25, 27, 63, 48, 54,
+ 68, 32, 24, 38, 28, 28, 32, 16, 24, 24,
+ 1, 1, 15, 5, 4, 13, 47, 7, 9, 11,
+ 2, 7, 12, 18, 3, 8, 16, 18, 14, 10,
+ 6, 25, 13, 13, 43, 25, 39, 39, 65, 59,
+ 67, 71, 25, 23, 71, 5, 5, 23, 45, 31,
+ 33, 45, 33, 43, 49, 63, 67, 83, 89, 1,
+ 19, 47, 15, 12, 8, 22, 26, 30, 56, 36,
+ 50, 52, 82, 62, 58, 84, 80, 94, 40, 12,
+ 15, 33, 59, 87, 107, 125, 18, 84, 70, 68,
+ 50, 62, 30, 22, 26, 6, 23, 11, 12, 7,
+ 34, 52, 5, 0, 20, 28, 8, 26, 50, 8,
+ 0, 68, 22, 15, 53, 81, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 33 */
+
+ 74, 10, 33, 74, 10, 33, 11, 14, 46, 24,
+ 3, 33, 28, 40, 104, 28, 44, 7, 24, 42,
+ 1, 19, 1, 43, 75, 4, 8, 113, 119, 117,
+ 62, 2, 11, 24, 42, 1, 19, 8, 30, 0,
+ 2, 9, 19, 7, 35, 23, 57, 10, 11, 19,
+ 1, 27, 21, 49, 22, 0, 3, 11, 12, 6,
+ 44, 0, 0, 0, 1, 61, 67, 12, 7, 9,
+ 36, 11, 51, 13, 46, 28, 88, 78, 22, 26,
+ 1, 32, 1, 5, 7, 27, 29, 35, 35, 48,
+ 2, 18, 21, 7, 19, 13, 29, 1, 17, 15,
+ 35, 36, 9, 6, 15, 33, 3, 17, 2, 10,
+ 6, 4, 22, 18, 17, 3, 4, 7, 4, 15,
+ 12, 17, 0, 12, 2, 30, 16, 2, 17, 10,
+ 9, 17, 7, 43, 15, 12, 0, 14, 14, 14,
+ 54, 28, 8, 3, 14, 3, 7, 8, 61, 13,
+ 0, 35, 1, 17, 18, 6, 13, 28, 14, 8,
+ 29, 18, 15, 23, 6, 47, 26, 38, 26, 28,
+ 34, 30, 16, 32, 32, 5, 10, 18, 6, 2,
+ 15, 0, 11, 7, 1, 7, 7, 1, 11, 31,
+ 11, 15, 25, 25, 35, 10, 18, 6, 4, 5,
+ 11, 7, 7, 23, 37, 33, 39, 65, 55, 83,
+ 11, 11, 43, 0, 11, 17, 21, 33, 43, 33,
+ 37, 43, 27, 37, 55, 41, 51, 0, 48, 36,
+ 22, 12, 18, 4, 5, 3, 5, 10, 66, 42,
+ 30, 16, 34, 14, 10, 1, 12, 6, 82, 60,
+ 46, 32, 42, 10, 1, 7, 11, 13, 86, 44,
+ 14, 4, 20, 3, 21, 21, 10, 84, 58, 34,
+ 20, 38, 10, 0, 5, 9, 124, 31, 19, 5,
+ 13, 17, 11, 1, 2, 3, 0, 6, 10, 23,
+ 13, 33, 21, 30, 23, 43, 15, 1, 7, 21,
+ 25, 15, 21, 33, 45, 51, 4, 6, 30, 0,
+ 9, 8, 8, 8, 7, 1, 2, 0, 23, 15,
+ 25, 1, 18, 37, 1, 28, 7, 8, 2, 10,
+ 36, 5, 7, 10, 22, 25, 27, 65, 48, 54,
+ 68, 30, 22, 36, 24, 24, 28, 12, 20, 20,
+ 5, 5, 17, 9, 1, 19, 55, 11, 13, 13,
+ 0, 9, 12, 20, 1, 12, 22, 16, 12, 8,
+ 4, 29, 15, 15, 47, 27, 41, 41, 67, 61,
+ 69, 71, 25, 23, 73, 7, 7, 25, 47, 33,
+ 35, 47, 35, 45, 51, 65, 69, 85, 89, 1,
+ 19, 47, 13, 14, 10, 24, 28, 32, 60, 38,
+ 52, 54, 86, 66, 60, 88, 84, 94, 38, 8,
+ 19, 37, 63, 93, 111, 125, 20, 86, 72, 70,
+ 52, 64, 32, 24, 28, 8, 21, 9, 14, 5,
+ 38, 56, 3, 2, 22, 30, 10, 28, 52, 8,
+ 0, 66, 18, 19, 57, 87, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 34 */
+
+ 72, 10, 33, 72, 10, 33, 7, 16, 46, 24,
+ 3, 35, 26, 38, 104, 28, 48, 7, 26, 44,
+ 1, 21, 1, 45, 77, 2, 4, 117, 121, 119,
+ 68, 4, 11, 26, 44, 1, 19, 10, 30, 1,
+ 2, 9, 17, 7, 35, 23, 57, 10, 11, 17,
+ 1, 27, 21, 49, 22, 0, 3, 11, 12, 6,
+ 44, 0, 0, 0, 0, 61, 67, 12, 9, 9,
+ 36, 11, 51, 9, 50, 32, 90, 82, 24, 28,
+ 0, 34, 0, 3, 3, 27, 29, 35, 33, 48,
+ 2, 18, 19, 7, 17, 13, 27, 1, 17, 15,
+ 35, 36, 9, 6, 15, 33, 3, 17, 2, 10,
+ 6, 4, 22, 18, 17, 3, 4, 7, 4, 15,
+ 10, 17, 0, 10, 0, 28, 14, 0, 15, 10,
+ 9, 19, 9, 43, 17, 12, 0, 14, 14, 14,
+ 56, 28, 8, 3, 14, 3, 9, 8, 63, 13,
+ 0, 37, 1, 19, 18, 6, 15, 28, 14, 8,
+ 31, 18, 17, 23, 6, 49, 24, 38, 26, 28,
+ 34, 30, 16, 32, 32, 5, 10, 18, 6, 2,
+ 15, 0, 11, 7, 1, 7, 9, 3, 13, 31,
+ 11, 15, 27, 27, 35, 6, 14, 2, 0, 9,
+ 15, 11, 11, 27, 41, 37, 43, 69, 59, 85,
+ 11, 11, 45, 1, 13, 19, 23, 37, 45, 35,
+ 39, 45, 29, 39, 55, 41, 49, 0, 48, 36,
+ 22, 12, 18, 4, 5, 3, 3, 12, 66, 42,
+ 30, 16, 34, 14, 12, 0, 16, 6, 82, 60,
+ 46, 32, 42, 10, 1, 7, 9, 13, 86, 44,
+ 14, 4, 20, 3, 21, 19, 10, 82, 56, 32,
+ 18, 38, 10, 0, 5, 9, 124, 29, 17, 3,
+ 11, 15, 9, 0, 4, 1, 2, 8, 14, 21,
+ 11, 33, 19, 32, 23, 45, 15, 0, 7, 21,
+ 27, 15, 23, 35, 47, 53, 2, 6, 30, 0,
+ 11, 6, 8, 8, 9, 3, 2, 0, 25, 15,
+ 25, 1, 18, 37, 3, 26, 9, 8, 0, 8,
+ 36, 7, 9, 10, 22, 27, 29, 69, 46, 52,
+ 66, 28, 20, 34, 20, 20, 24, 8, 14, 14,
+ 11, 9, 19, 15, 7, 25, 63, 17, 17, 17,
+ 1, 11, 12, 22, 1, 14, 26, 12, 8, 4,
+ 0, 33, 19, 19, 51, 29, 45, 43, 71, 63,
+ 71, 73, 27, 23, 75, 9, 9, 29, 51, 37,
+ 39, 49, 39, 49, 53, 67, 71, 87, 91, 3,
+ 21, 49, 13, 14, 10, 26, 28, 34, 62, 40,
+ 54, 56, 88, 68, 62, 90, 86, 92, 34, 4,
+ 23, 43, 69, 99, 117, 125, 20, 86, 72, 70,
+ 52, 66, 32, 24, 28, 8, 21, 9, 16, 5,
+ 40, 58, 3, 4, 24, 32, 10, 28, 54, 8,
+ 0, 64, 14, 23, 63, 93, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 35 */
+
+ 70, 10, 33, 70, 10, 33, 3, 20, 48, 24,
+ 5, 39, 22, 36, 104, 28, 52, 7, 28, 46,
+ 1, 23, 1, 47, 79, 2, 0, 119, 123, 119,
+ 74, 6, 11, 28, 46, 1, 17, 12, 32, 1,
+ 2, 7, 15, 9, 37, 23, 57, 10, 11, 17,
+ 1, 27, 21, 49, 24, 0, 3, 11, 14, 6,
+ 44, 0, 0, 0, 0, 61, 67, 14, 11, 9,
+ 36, 11, 51, 5, 54, 34, 94, 86, 26, 30,
+ 4, 36, 2, 1, 0, 25, 29, 33, 31, 48,
+ 2, 18, 19, 7, 17, 11, 23, 1, 19, 17,
+ 37, 36, 9, 6, 13, 33, 1, 17, 4, 10,
+ 6, 4, 22, 16, 17, 3, 4, 7, 4, 15,
+ 8, 17, 0, 8, 1, 26, 12, 1, 15, 12,
+ 9, 21, 9, 41, 17, 12, 0, 14, 14, 14,
+ 58, 28, 8, 3, 16, 3, 11, 8, 65, 13,
+ 0, 37, 3, 21, 18, 6, 17, 28, 14, 8,
+ 35, 18, 19, 23, 6, 49, 22, 38, 26, 28,
+ 34, 30, 16, 32, 32, 5, 8, 18, 6, 2,
+ 15, 1, 13, 9, 3, 9, 11, 5, 13, 31,
+ 13, 15, 29, 31, 35, 4, 10, 1, 3, 13,
+ 19, 15, 15, 31, 45, 39, 45, 73, 63, 87,
+ 11, 11, 45, 3, 15, 21, 27, 39, 49, 37,
+ 41, 47, 29, 39, 55, 41, 49, 2, 48, 36,
+ 22, 12, 18, 4, 5, 1, 1, 12, 66, 42,
+ 30, 16, 36, 16, 14, 2, 20, 6, 82, 60,
+ 46, 32, 44, 10, 1, 7, 9, 13, 86, 44,
+ 12, 4, 20, 3, 21, 17, 10, 82, 56, 30,
+ 16, 38, 10, 0, 3, 7, 124, 27, 15, 3,
+ 9, 13, 7, 4, 6, 0, 4, 12, 16, 21,
+ 9, 33, 19, 36, 23, 45, 15, 0, 7, 23,
+ 27, 15, 25, 37, 49, 55, 2, 4, 30, 1,
+ 11, 6, 6, 8, 11, 3, 0, 0, 27, 15,
+ 27, 1, 18, 39, 3, 26, 11, 8, 1, 8,
+ 36, 9, 11, 8, 20, 29, 29, 71, 44, 50,
+ 66, 26, 18, 30, 16, 16, 20, 2, 10, 10,
+ 17, 15, 21, 21, 13, 31, 71, 23, 23, 21,
+ 3, 13, 12, 24, 1, 16, 30, 8, 4, 0,
+ 3, 37, 23, 21, 55, 33, 47, 45, 73, 67,
+ 73, 75, 27, 25, 77, 11, 13, 31, 55, 41,
+ 43, 53, 41, 53, 57, 69, 75, 91, 93, 5,
+ 23, 51, 13, 16, 10, 26, 30, 34, 64, 42,
+ 56, 58, 90, 70, 64, 94, 88, 92, 32, 0,
+ 29, 47, 75, 105, 123, 125, 22, 88, 74, 72,
+ 52, 68, 34, 24, 30, 10, 21, 7, 16, 3,
+ 42, 60, 3, 4, 24, 32, 10, 30, 56, 8,
+ 0, 62, 10, 29, 69, 99, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 36 */
+
+ 66, 10, 33, 66, 10, 33, 1, 22, 48, 24,
+ 5, 41, 20, 32, 104, 28, 58, 9, 30, 46,
+ 1, 25, 1, 49, 81, 0, 3, 123, 125, 121,
+ 80, 8, 11, 30, 46, 1, 17, 14, 32, 3,
+ 2, 7, 15, 9, 37, 21, 57, 10, 11, 15,
+ 3, 29, 21, 49, 24, 2, 3, 9, 14, 6,
+ 44, 0, 0, 0, 2, 63, 67, 14, 13, 9,
+ 36, 11, 51, 3, 56, 38, 96, 88, 28, 32,
+ 6, 40, 4, 0, 4, 25, 27, 33, 29, 48,
+ 2, 18, 17, 7, 15, 11, 21, 3, 19, 17,
+ 37, 36, 9, 6, 13, 33, 1, 17, 4, 10,
+ 4, 4, 22, 16, 17, 3, 4, 7, 4, 15,
+ 8, 19, 0, 8, 3, 24, 10, 3, 13, 12,
+ 9, 23, 11, 41, 19, 12, 1, 14, 14, 14,
+ 60, 30, 8, 5, 16, 5, 13, 6, 67, 13,
+ 0, 39, 3, 23, 16, 4, 17, 30, 14, 8,
+ 37, 16, 21, 23, 6, 51, 22, 38, 26, 28,
+ 34, 30, 16, 32, 30, 5, 8, 18, 6, 2,
+ 17, 1, 13, 9, 3, 9, 11, 7, 15, 31,
+ 13, 15, 31, 33, 35, 0, 6, 5, 7, 17,
+ 23, 19, 19, 35, 49, 43, 49, 79, 67, 89,
+ 11, 13, 47, 5, 17, 23, 29, 43, 51, 39,
+ 45, 47, 31, 41, 55, 41, 47, 2, 48, 36,
+ 22, 12, 18, 4, 5, 1, 0, 14, 66, 42,
+ 30, 16, 36, 16, 14, 2, 24, 6, 82, 60,
+ 46, 32, 44, 10, 1, 7, 7, 13, 86, 42,
+ 12, 4, 20, 3, 21, 17, 10, 80, 54, 28,
+ 14, 38, 10, 0, 3, 7, 124, 25, 13, 1,
+ 5, 11, 5, 6, 10, 2, 8, 14, 20, 19,
+ 9, 31, 17, 38, 21, 47, 15, 2, 7, 23,
+ 29, 17, 27, 39, 51, 57, 0, 4, 30, 1,
+ 13, 4, 6, 8, 13, 5, 0, 1, 29, 17,
+ 27, 1, 18, 39, 5, 24, 13, 8, 3, 6,
+ 36, 11, 13, 8, 20, 31, 31, 75, 42, 50,
+ 64, 24, 14, 28, 12, 12, 16, 1, 4, 4,
+ 23, 19, 25, 27, 21, 37, 79, 27, 27, 25,
+ 5, 17, 12, 26, 0, 18, 34, 6, 0, 3,
+ 5, 41, 27, 25, 59, 35, 51, 49, 77, 69,
+ 75, 77, 29, 25, 79, 15, 15, 35, 57, 45,
+ 47, 55, 45, 57, 59, 71, 77, 93, 93, 7,
+ 23, 53, 13, 16, 12, 28, 30, 36, 66, 42,
+ 58, 60, 92, 72, 66, 96, 90, 90, 28, 3,
+ 33, 53, 81, 111, 125, 125, 22, 88, 74, 72,
+ 54, 68, 34, 26, 30, 10, 19, 7, 18, 3,
+ 44, 62, 3, 6, 26, 34, 10, 30, 58, 8,
+ 0, 58, 6, 33, 75, 105, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 37 */
+
+ 64, 10, 33, 64, 10, 33, 2, 26, 48, 24,
+ 7, 45, 16, 30, 104, 28, 62, 9, 32, 48,
+ 1, 25, 0, 51, 83, 0, 7, 125, 125, 121,
+ 86, 12, 11, 32, 48, 1, 17, 16, 32, 3,
+ 2, 7, 13, 9, 39, 21, 57, 10, 11, 13,
+ 3, 29, 21, 49, 26, 2, 1, 9, 16, 6,
+ 44, 0, 0, 0, 2, 63, 67, 16, 15, 9,
+ 36, 9, 51, 0, 60, 40, 100, 92, 32, 34,
+ 8, 42, 6, 2, 8, 23, 27, 31, 27, 48,
+ 2, 18, 17, 7, 15, 9, 17, 3, 19, 17,
+ 37, 36, 9, 8, 11, 33, 1, 17, 6, 10,
+ 4, 4, 22, 16, 15, 3, 6, 7, 4, 15,
+ 6, 19, 0, 6, 5, 24, 8, 5, 11, 14,
+ 9, 25, 11, 39, 19, 12, 1, 14, 14, 14,
+ 62, 30, 8, 5, 16, 5, 15, 6, 69, 13,
+ 0, 39, 5, 25, 16, 4, 19, 30, 14, 8,
+ 39, 16, 23, 23, 6, 51, 20, 38, 26, 28,
+ 34, 30, 16, 32, 30, 5, 8, 18, 6, 2,
+ 17, 1, 13, 11, 3, 9, 13, 9, 15, 31,
+ 15, 15, 33, 35, 35, 1, 2, 7, 9, 21,
+ 27, 23, 23, 39, 53, 47, 53, 83, 69, 91,
+ 11, 13, 47, 7, 19, 25, 31, 45, 55, 41,
+ 47, 49, 31, 41, 55, 41, 47, 2, 48, 36,
+ 22, 12, 18, 6, 3, 0, 2, 14, 66, 42,
+ 30, 16, 36, 18, 16, 4, 28, 6, 82, 60,
+ 46, 32, 44, 10, 1, 7, 5, 13, 88, 42,
+ 10, 4, 20, 3, 21, 15, 10, 80, 52, 26,
+ 12, 38, 10, 0, 3, 5, 124, 23, 11, 0,
+ 3, 9, 3, 8, 12, 4, 10, 18, 24, 17,
+ 7, 31, 17, 40, 21, 47, 13, 4, 7, 25,
+ 29, 17, 27, 41, 53, 59, 1, 2, 30, 3,
+ 13, 4, 6, 8, 15, 7, 0, 1, 31, 17,
+ 27, 1, 18, 39, 7, 24, 13, 8, 5, 4,
+ 36, 13, 15, 6, 18, 33, 33, 79, 40, 48,
+ 64, 22, 12, 24, 8, 8, 12, 7, 1, 0,
+ 29, 23, 27, 33, 27, 43, 87, 33, 33, 29,
+ 7, 19, 12, 28, 0, 20, 38, 2, 1, 7,
+ 9, 45, 31, 27, 63, 37, 55, 51, 81, 73,
+ 77, 79, 29, 25, 81, 17, 19, 39, 61, 47,
+ 49, 59, 47, 59, 63, 73, 79, 95, 95, 7,
+ 25, 55, 11, 18, 12, 28, 32, 38, 68, 44,
+ 60, 62, 96, 74, 68, 98, 94, 90, 24, 7,
+ 39, 57, 87, 117, 125, 125, 22, 90, 76, 74,
+ 54, 70, 34, 26, 32, 12, 19, 7, 20, 1,
+ 46, 64, 1, 8, 26, 34, 10, 32, 60, 8,
+ 0, 56, 2, 39, 81, 111, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 38 */
+
+ 62, 10, 35, 62, 10, 35, 6, 28, 50, 24,
+ 7, 47, 14, 28, 104, 28, 66, 9, 32, 50,
+ 1, 27, 0, 53, 85, 1, 9, 125, 125, 123,
+ 92, 14, 11, 32, 50, 1, 15, 18, 34, 5,
+ 4, 5, 11, 11, 39, 21, 57, 10, 9, 13,
+ 3, 29, 19, 49, 26, 2, 1, 9, 16, 6,
+ 44, 0, 0, 0, 4, 63, 67, 16, 15, 11,
+ 36, 9, 49, 4, 64, 44, 102, 96, 34, 36,
+ 12, 44, 8, 2, 10, 23, 27, 31, 27, 50,
+ 2, 18, 15, 5, 13, 9, 15, 3, 21, 19,
+ 39, 36, 9, 8, 11, 33, 0, 17, 6, 10,
+ 4, 6, 22, 14, 15, 3, 6, 7, 4, 15,
+ 4, 19, 0, 4, 7, 22, 8, 5, 11, 14,
+ 9, 25, 13, 39, 21, 12, 1, 14, 14, 14,
+ 62, 30, 8, 5, 18, 5, 17, 6, 69, 15,
+ 0, 41, 5, 27, 16, 4, 21, 30, 14, 8,
+ 43, 16, 25, 23, 6, 53, 18, 38, 24, 28,
+ 34, 30, 16, 32, 30, 5, 6, 18, 4, 2,
+ 17, 3, 15, 11, 5, 11, 15, 11, 17, 31,
+ 15, 13, 35, 39, 35, 5, 1, 11, 13, 25,
+ 31, 27, 27, 43, 57, 49, 55, 87, 73, 93,
+ 11, 13, 49, 9, 21, 27, 35, 49, 57, 43,
+ 49, 51, 33, 43, 55, 41, 45, 4, 48, 36,
+ 22, 14, 18, 6, 3, 0, 4, 16, 64, 42,
+ 30, 16, 38, 18, 18, 6, 32, 6, 84, 60,
+ 46, 32, 46, 10, 1, 5, 5, 13, 88, 42,
+ 10, 4, 20, 1, 19, 13, 10, 78, 52, 24,
+ 12, 38, 10, 2, 1, 5, 124, 23, 9, 0,
+ 1, 7, 1, 12, 14, 6, 12, 20, 26, 17,
+ 5, 31, 15, 44, 21, 49, 13, 4, 7, 25,
+ 31, 17, 29, 43, 55, 61, 1, 2, 30, 3,
+ 15, 2, 4, 8, 15, 7, 1, 1, 31, 17,
+ 29, 1, 18, 41, 7, 22, 15, 6, 7, 4,
+ 38, 13, 17, 6, 18, 33, 33, 81, 38, 46,
+ 62, 20, 10, 22, 4, 4, 8, 11, 5, 5,
+ 35, 29, 29, 37, 33, 49, 95, 39, 37, 33,
+ 9, 21, 12, 30, 0, 24, 44, 1, 5, 11,
+ 13, 49, 35, 31, 67, 41, 57, 53, 83, 75,
+ 79, 81, 31, 27, 83, 19, 21, 41, 65, 51,
+ 53, 61, 51, 63, 65, 77, 83, 99, 97, 9,
+ 27, 57, 11, 18, 12, 30, 32, 38, 72, 46,
+ 62, 64, 98, 78, 70, 102, 96, 88, 22, 11,
+ 43, 63, 91, 123, 125, 125, 24, 90, 76, 74,
+ 54, 72, 36, 26, 32, 12, 19, 5, 20, 1,
+ 48, 66, 1, 8, 28, 36, 10, 32, 62, 8,
+ 0, 54, 1, 43, 87, 119, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 39 */
+
+ 60, 10, 35, 60, 10, 35, 10, 32, 50, 24,
+ 9, 51, 10, 24, 104, 28, 72, 11, 34, 50,
+ 1, 29, 0, 55, 87, 1, 13, 125, 125, 123,
+ 98, 16, 11, 34, 50, 1, 15, 20, 34, 5,
+ 4, 5, 11, 11, 41, 19, 57, 10, 9, 11,
+ 5, 29, 19, 49, 28, 4, 1, 7, 18, 6,
+ 44, 0, 0, 0, 4, 63, 67, 18, 17, 11,
+ 36, 9, 49, 6, 68, 46, 106, 100, 36, 38,
+ 14, 48, 10, 4, 14, 21, 25, 29, 25, 50,
+ 2, 18, 15, 5, 13, 7, 11, 5, 21, 19,
+ 39, 36, 9, 8, 9, 33, 0, 17, 8, 10,
+ 2, 6, 22, 14, 15, 3, 6, 7, 4, 15,
+ 4, 19, 0, 4, 9, 20, 6, 7, 9, 16,
+ 9, 27, 13, 37, 21, 12, 3, 14, 14, 14,
+ 64, 32, 8, 5, 18, 5, 19, 6, 71, 15,
+ 0, 41, 7, 29, 14, 4, 21, 32, 14, 8,
+ 45, 14, 27, 23, 6, 53, 18, 38, 24, 28,
+ 34, 30, 16, 32, 30, 5, 6, 18, 4, 2,
+ 17, 3, 15, 13, 5, 11, 15, 13, 17, 31,
+ 17, 13, 37, 41, 35, 7, 5, 15, 17, 29,
+ 35, 31, 31, 47, 61, 53, 59, 93, 77, 95,
+ 11, 15, 49, 11, 23, 29, 37, 51, 61, 45,
+ 51, 51, 33, 43, 55, 41, 45, 4, 48, 36,
+ 22, 14, 18, 6, 3, 2, 6, 16, 64, 42,
+ 30, 16, 38, 20, 20, 6, 36, 6, 84, 60,
+ 46, 32, 46, 10, 1, 5, 3, 13, 88, 40,
+ 8, 4, 20, 1, 19, 13, 10, 78, 50, 22,
+ 10, 38, 10, 2, 1, 3, 124, 21, 7, 2,
+ 2, 5, 0, 14, 18, 8, 16, 24, 30, 15,
+ 3, 29, 15, 46, 19, 49, 13, 6, 7, 27,
+ 31, 19, 31, 45, 57, 63, 3, 0, 30, 5,
+ 15, 2, 4, 8, 17, 9, 1, 3, 33, 17,
+ 29, 1, 18, 41, 9, 22, 17, 6, 9, 2,
+ 38, 15, 19, 4, 16, 35, 35, 85, 36, 46,
+ 62, 18, 6, 18, 0, 0, 4, 17, 11, 9,
+ 41, 33, 33, 43, 41, 55, 103, 43, 43, 37,
+ 11, 23, 12, 32, 2, 26, 48, 3, 9, 15,
+ 15, 53, 39, 33, 71, 43, 61, 57, 87, 79,
+ 81, 83, 31, 27, 85, 23, 25, 45, 67, 55,
+ 57, 65, 53, 67, 69, 79, 85, 101, 97, 11,
+ 27, 59, 11, 20, 14, 30, 34, 40, 74, 48,
+ 64, 66, 100, 80, 72, 104, 98, 88, 18, 15,
+ 49, 67, 97, 125, 125, 125, 24, 92, 78, 76,
+ 56, 74, 36, 28, 34, 14, 17, 5, 22, 0,
+ 50, 68, 1, 10, 28, 36, 10, 34, 64, 8,
+ 0, 50, 5, 49, 93, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 40 */
+
+ 56, 8, 35, 56, 8, 35, 12, 34, 50, 24,
+ 9, 53, 8, 22, 104, 28, 76, 11, 36, 52,
+ 1, 31, 0, 57, 91, 3, 17, 125, 125, 125,
+ 102, 18, 11, 36, 52, 1, 15, 22, 34, 7,
+ 4, 5, 9, 13, 41, 19, 59, 10, 9, 11,
+ 5, 31, 19, 49, 28, 4, 1, 7, 18, 6,
+ 44, 0, 0, 0, 6, 65, 67, 18, 19, 11,
+ 36, 9, 49, 10, 70, 50, 108, 102, 38, 40,
+ 16, 50, 12, 6, 18, 21, 25, 29, 23, 50,
+ 2, 18, 13, 5, 11, 7, 9, 5, 23, 21,
+ 41, 36, 9, 8, 9, 33, 0, 17, 8, 8,
+ 2, 6, 22, 12, 15, 3, 6, 7, 2, 15,
+ 2, 21, 0, 2, 13, 18, 4, 9, 9, 16,
+ 9, 29, 15, 37, 23, 10, 3, 14, 14, 14,
+ 66, 32, 8, 7, 18, 7, 21, 4, 73, 15,
+ 0, 43, 7, 31, 14, 2, 23, 32, 14, 8,
+ 49, 14, 29, 23, 6, 55, 16, 36, 24, 28,
+ 34, 28, 14, 32, 28, 7, 4, 18, 4, 0,
+ 19, 5, 17, 13, 7, 13, 17, 15, 19, 31,
+ 17, 13, 39, 45, 35, 11, 9, 19, 21, 33,
+ 41, 35, 35, 51, 65, 57, 63, 97, 81, 97,
+ 11, 15, 51, 13, 27, 33, 41, 55, 63, 49,
+ 55, 53, 35, 45, 55, 41, 43, 4, 48, 36,
+ 22, 14, 18, 6, 3, 2, 8, 18, 64, 42,
+ 30, 16, 38, 20, 20, 8, 38, 6, 84, 60,
+ 46, 30, 46, 10, 1, 5, 3, 13, 88, 40,
+ 8, 4, 20, 1, 19, 11, 10, 76, 48, 20,
+ 8, 36, 10, 2, 1, 3, 124, 19, 5, 2,
+ 4, 3, 0, 16, 20, 10, 18, 26, 32, 15,
+ 3, 29, 13, 48, 19, 51, 13, 6, 7, 27,
+ 33, 19, 33, 47, 61, 65, 5, 0, 30, 5,
+ 17, 0, 2, 6, 19, 11, 3, 3, 35, 19,
+ 31, 1, 18, 43, 11, 20, 19, 6, 11, 0,
+ 38, 17, 21, 4, 16, 37, 37, 89, 34, 44,
+ 60, 14, 4, 16, 3, 3, 1, 21, 17, 15,
+ 47, 39, 35, 49, 47, 61, 111, 49, 47, 41,
+ 15, 27, 12, 32, 2, 28, 52, 7, 13, 19,
+ 19, 59, 43, 37, 75, 47, 65, 59, 91, 81,
+ 83, 85, 33, 29, 87, 25, 27, 49, 71, 59,
+ 61, 67, 57, 71, 71, 81, 89, 105, 99, 13,
+ 29, 61, 11, 20, 14, 32, 34, 40, 76, 48,
+ 66, 68, 102, 82, 74, 106, 100, 86, 14, 19,
+ 53, 73, 103, 125, 125, 125, 24, 92, 78, 76,
+ 56, 74, 36, 28, 34, 14, 17, 5, 22, 0,
+ 52, 70, 1, 10, 30, 38, 10, 34, 64, 8,
+ 1, 48, 9, 53, 99, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 41 */
+
+ 54, 8, 35, 54, 8, 35, 16, 36, 52, 24,
+ 9, 55, 6, 20, 104, 28, 80, 11, 38, 54,
+ 1, 31, 2, 59, 93, 5, 21, 125, 125, 125,
+ 108, 22, 11, 38, 54, 1, 13, 24, 36, 9,
+ 4, 3, 7, 13, 41, 19, 59, 10, 9, 9,
+ 5, 31, 19, 49, 28, 4, 0, 7, 18, 6,
+ 44, 0, 0, 0, 8, 65, 67, 18, 21, 11,
+ 36, 7, 49, 14, 74, 54, 110, 106, 42, 42,
+ 20, 52, 16, 8, 22, 21, 25, 29, 21, 50,
+ 2, 18, 11, 5, 9, 5, 7, 5, 23, 21,
+ 41, 36, 9, 10, 7, 31, 2, 17, 8, 8,
+ 2, 6, 22, 12, 13, 3, 8, 7, 2, 13,
+ 0, 21, 0, 0, 15, 18, 2, 11, 7, 16,
+ 7, 31, 17, 37, 25, 10, 3, 16, 14, 14,
+ 68, 32, 8, 7, 20, 7, 21, 4, 75, 15,
+ 0, 45, 7, 33, 14, 2, 25, 32, 14, 8,
+ 51, 14, 29, 23, 6, 57, 14, 36, 24, 28,
+ 34, 28, 14, 32, 28, 7, 4, 18, 4, 0,
+ 19, 5, 17, 13, 7, 13, 19, 15, 19, 31,
+ 17, 13, 41, 47, 35, 15, 11, 21, 23, 37,
+ 45, 37, 37, 55, 69, 59, 65, 101, 83, 99,
+ 11, 15, 53, 15, 29, 35, 43, 57, 65, 51,
+ 57, 55, 37, 47, 55, 39, 41, 6, 50, 36,
+ 22, 14, 20, 8, 1, 4, 10, 20, 64, 42,
+ 30, 16, 40, 20, 22, 10, 42, 6, 84, 60,
+ 46, 30, 48, 10, 1, 5, 1, 13, 90, 40,
+ 8, 4, 22, 1, 19, 9, 10, 76, 48, 20,
+ 6, 36, 10, 2, 0, 1, 124, 17, 3, 4,
+ 6, 0, 2, 20, 22, 14, 20, 28, 36, 13,
+ 1, 29, 11, 52, 19, 53, 11, 8, 7, 27,
+ 35, 19, 33, 49, 63, 67, 5, 0, 30, 5,
+ 17, 1, 2, 6, 21, 11, 3, 3, 37, 19,
+ 31, 0, 20, 43, 11, 18, 19, 6, 11, 0,
+ 38, 19, 23, 4, 16, 39, 37, 91, 34, 42,
+ 58, 12, 2, 14, 7, 7, 5, 25, 21, 21,
+ 51, 43, 37, 55, 53, 67, 119, 55, 51, 43,
+ 17, 29, 12, 34, 2, 30, 56, 11, 15, 23,
+ 23, 63, 45, 39, 79, 49, 67, 61, 93, 83,
+ 85, 85, 33, 29, 89, 27, 29, 51, 75, 61,
+ 63, 69, 59, 73, 73, 83, 91, 107, 101, 13,
+ 31, 61, 9, 20, 14, 34, 36, 42, 78, 50,
+ 68, 70, 106, 84, 76, 110, 104, 84, 12, 23,
+ 57, 79, 109, 125, 125, 125, 26, 92, 78, 78,
+ 56, 76, 38, 28, 36, 16, 17, 3, 24, 0,
+ 54, 74, 0, 12, 32, 40, 10, 36, 66, 8,
+ 1, 46, 13, 57, 103, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 42 */
+
+ 52, 8, 35, 52, 8, 35, 20, 40, 52, 24,
+ 11, 59, 2, 16, 104, 28, 86, 13, 40, 54,
+ 1, 33, 2, 61, 95, 5, 25, 125, 125, 125,
+ 114, 24, 11, 40, 54, 1, 13, 26, 36, 9,
+ 4, 3, 7, 13, 43, 17, 59, 10, 9, 7,
+ 7, 31, 19, 49, 30, 6, 0, 5, 20, 6,
+ 44, 0, 0, 0, 8, 65, 67, 20, 23, 11,
+ 36, 7, 49, 16, 78, 56, 114, 110, 44, 44,
+ 22, 56, 18, 10, 26, 19, 23, 27, 19, 50,
+ 2, 18, 11, 5, 9, 5, 3, 7, 23, 21,
+ 41, 36, 9, 10, 7, 31, 2, 17, 10, 8,
+ 0, 6, 22, 12, 13, 3, 8, 7, 2, 13,
+ 0, 21, 0, 0, 17, 16, 0, 13, 5, 18,
+ 7, 33, 17, 35, 25, 10, 5, 16, 14, 14,
+ 70, 34, 8, 7, 20, 7, 23, 4, 77, 15,
+ 0, 45, 9, 35, 12, 2, 25, 34, 14, 8,
+ 53, 12, 31, 23, 6, 57, 14, 36, 24, 28,
+ 34, 28, 14, 32, 28, 7, 4, 18, 4, 0,
+ 19, 5, 17, 15, 7, 13, 19, 17, 21, 31,
+ 19, 13, 43, 49, 35, 17, 15, 25, 27, 41,
+ 49, 41, 41, 59, 73, 63, 69, 107, 87, 101,
+ 11, 17, 53, 17, 31, 37, 45, 61, 69, 53,
+ 59, 55, 37, 47, 55, 39, 41, 6, 50, 36,
+ 22, 14, 20, 8, 1, 4, 12, 20, 64, 42,
+ 30, 16, 40, 22, 24, 10, 46, 6, 84, 60,
+ 46, 30, 48, 10, 1, 5, 0, 13, 90, 38,
+ 6, 4, 22, 1, 19, 9, 10, 74, 46, 18,
+ 4, 36, 10, 2, 0, 1, 124, 15, 1, 6,
+ 10, 2, 4, 22, 26, 16, 24, 32, 40, 11,
+ 0, 27, 11, 54, 17, 53, 11, 10, 7, 29,
+ 35, 21, 35, 51, 65, 69, 7, 1, 30, 7,
+ 19, 1, 2, 6, 23, 13, 3, 5, 39, 19,
+ 31, 0, 20, 43, 13, 18, 21, 6, 13, 1,
+ 38, 21, 25, 2, 14, 41, 39, 95, 32, 42,
+ 58, 10, 1, 10, 11, 11, 9, 31, 27, 25,
+ 57, 47, 41, 61, 61, 73, 125, 59, 57, 47,
+ 19, 31, 12, 36, 4, 32, 60, 13, 19, 27,
+ 25, 67, 49, 43, 83, 51, 71, 65, 97, 87,
+ 87, 87, 35, 29, 91, 31, 33, 55, 77, 65,
+ 67, 73, 63, 77, 77, 85, 93, 109, 101, 15,
+ 31, 63, 9, 22, 16, 34, 36, 44, 80, 52,
+ 70, 72, 108, 86, 78, 112, 106, 84, 8, 27,
+ 63, 83, 115, 125, 125, 125, 26, 94, 80, 78,
+ 58, 78, 38, 30, 36, 16, 15, 3, 26, 2,
+ 56, 76, 0, 14, 32, 40, 10, 36, 68, 8,
+ 1, 42, 17, 63, 109, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 43 */
+
+ 50, 8, 37, 50, 8, 37, 24, 42, 54, 24,
+ 11, 61, 0, 14, 104, 28, 90, 13, 40, 56,
+ 1, 35, 2, 63, 97, 7, 27, 125, 125, 125,
+ 120, 26, 11, 40, 56, 1, 11, 28, 38, 11,
+ 6, 1, 5, 15, 43, 17, 59, 10, 7, 7,
+ 7, 31, 17, 49, 30, 6, 0, 5, 20, 6,
+ 44, 0, 0, 0, 10, 65, 67, 20, 23, 13,
+ 36, 7, 47, 20, 82, 60, 116, 114, 46, 46,
+ 26, 58, 20, 10, 28, 19, 23, 27, 19, 52,
+ 2, 18, 9, 3, 7, 3, 1, 7, 25, 23,
+ 43, 36, 9, 10, 5, 31, 4, 17, 10, 8,
+ 0, 8, 22, 10, 13, 3, 8, 7, 2, 13,
+ 1, 21, 0, 1, 19, 14, 0, 13, 5, 18,
+ 7, 33, 19, 35, 27, 10, 5, 16, 14, 14,
+ 70, 34, 8, 7, 22, 7, 25, 4, 77, 17,
+ 0, 47, 9, 37, 12, 2, 27, 34, 14, 8,
+ 57, 12, 33, 23, 6, 59, 12, 36, 22, 28,
+ 34, 28, 14, 32, 28, 7, 2, 18, 2, 0,
+ 19, 7, 19, 15, 9, 15, 21, 19, 21, 31,
+ 19, 11, 45, 53, 35, 21, 19, 29, 31, 45,
+ 53, 45, 45, 63, 77, 65, 71, 111, 91, 103,
+ 11, 17, 55, 19, 33, 39, 49, 63, 71, 55,
+ 61, 57, 39, 49, 55, 39, 39, 8, 50, 36,
+ 22, 16, 20, 8, 1, 6, 14, 22, 62, 42,
+ 30, 16, 42, 22, 26, 12, 50, 6, 86, 60,
+ 46, 30, 50, 10, 1, 3, 0, 13, 90, 38,
+ 6, 4, 22, 0, 17, 7, 10, 74, 46, 16,
+ 4, 36, 10, 4, 2, 0, 124, 15, 0, 6,
+ 12, 4, 6, 26, 28, 18, 26, 34, 42, 11,
+ 2, 27, 9, 58, 17, 55, 11, 10, 7, 29,
+ 37, 21, 37, 53, 67, 71, 7, 1, 30, 7,
+ 19, 3, 0, 6, 23, 13, 5, 5, 39, 19,
+ 33, 0, 20, 45, 13, 16, 23, 4, 15, 1,
+ 40, 21, 27, 2, 14, 41, 39, 97, 30, 40,
+ 56, 8, 3, 8, 15, 15, 13, 35, 31, 31,
+ 63, 53, 43, 65, 67, 79, 125, 65, 61, 51,
+ 21, 33, 12, 38, 4, 36, 66, 17, 23, 31,
+ 29, 71, 53, 45, 87, 55, 73, 67, 99, 89,
+ 89, 89, 35, 31, 93, 33, 35, 57, 81, 69,
+ 71, 75, 65, 81, 79, 89, 97, 113, 103, 17,
+ 33, 65, 9, 22, 16, 36, 38, 44, 84, 54,
+ 72, 74, 110, 90, 80, 116, 108, 82, 6, 31,
+ 67, 89, 119, 125, 125, 125, 28, 94, 80, 80,
+ 58, 80, 40, 30, 38, 18, 15, 1, 26, 2,
+ 58, 78, 0, 14, 34, 42, 10, 38, 70, 8,
+ 1, 40, 21, 67, 115, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 44 */
+
+ 46, 8, 37, 46, 8, 37, 26, 46, 54, 24,
+ 13, 65, 3, 12, 104, 28, 94, 13, 42, 58,
+ 1, 37, 2, 65, 99, 7, 31, 125, 125, 125,
+ 124, 28, 11, 42, 58, 1, 11, 30, 38, 11,
+ 6, 1, 3, 15, 45, 17, 59, 10, 7, 5,
+ 7, 33, 17, 49, 32, 6, 0, 5, 22, 6,
+ 44, 0, 0, 0, 10, 67, 67, 22, 25, 13,
+ 36, 7, 47, 24, 84, 62, 120, 116, 48, 48,
+ 28, 60, 22, 12, 32, 17, 23, 25, 17, 52,
+ 2, 18, 9, 3, 7, 3, 2, 7, 25, 23,
+ 43, 36, 9, 10, 5, 31, 4, 17, 12, 8,
+ 0, 8, 22, 10, 13, 3, 8, 7, 2, 13,
+ 3, 23, 0, 3, 21, 12, 1, 15, 3, 20,
+ 7, 35, 19, 33, 27, 10, 5, 16, 14, 14,
+ 72, 34, 8, 9, 22, 9, 27, 2, 79, 17,
+ 0, 47, 11, 39, 12, 0, 29, 34, 14, 8,
+ 59, 12, 35, 23, 6, 59, 10, 36, 22, 28,
+ 34, 28, 14, 32, 26, 7, 2, 18, 2, 0,
+ 21, 7, 19, 17, 9, 15, 23, 21, 23, 31,
+ 21, 11, 47, 55, 35, 23, 23, 33, 35, 49,
+ 57, 49, 49, 67, 81, 69, 75, 115, 95, 105,
+ 11, 17, 55, 21, 35, 41, 51, 67, 75, 57,
+ 65, 59, 39, 49, 55, 39, 39, 8, 50, 36,
+ 22, 16, 20, 8, 1, 6, 16, 22, 62, 42,
+ 30, 16, 42, 24, 26, 14, 54, 6, 86, 60,
+ 46, 30, 50, 10, 1, 3, 2, 13, 90, 38,
+ 4, 4, 22, 0, 17, 5, 10, 72, 44, 14,
+ 2, 36, 10, 4, 2, 0, 124, 13, 2, 8,
+ 14, 6, 8, 28, 30, 20, 28, 38, 46, 9,
+ 2, 27, 9, 60, 17, 55, 11, 12, 7, 31,
+ 37, 21, 39, 55, 69, 73, 9, 3, 30, 9,
+ 21, 3, 0, 6, 25, 15, 5, 5, 41, 21,
+ 33, 0, 20, 45, 15, 16, 25, 4, 17, 3,
+ 40, 23, 29, 0, 12, 43, 41, 101, 28, 38,
+ 56, 6, 5, 4, 19, 19, 17, 41, 37, 35,
+ 69, 57, 45, 71, 73, 85, 125, 71, 67, 55,
+ 23, 37, 12, 40, 4, 38, 70, 21, 27, 35,
+ 33, 75, 57, 49, 91, 57, 77, 69, 103, 93,
+ 91, 91, 37, 31, 95, 35, 39, 61, 85, 73,
+ 75, 79, 69, 85, 83, 91, 99, 115, 105, 19,
+ 35, 67, 9, 24, 16, 36, 38, 46, 86, 54,
+ 74, 76, 112, 92, 82, 118, 110, 82, 2, 35,
+ 73, 93, 125, 125, 125, 125, 28, 96, 82, 80,
+ 58, 80, 40, 30, 38, 18, 15, 1, 28, 4,
+ 60, 80, 0, 16, 34, 42, 10, 38, 72, 8,
+ 1, 38, 25, 73, 121, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 45 */
+
+ 44, 8, 37, 44, 8, 37, 30, 48, 54, 24,
+ 13, 67, 5, 8, 104, 28, 100, 15, 44, 58,
+ 1, 37, 4, 67, 101, 9, 35, 125, 125, 125,
+ 124, 32, 11, 44, 58, 1, 11, 32, 38, 13,
+ 6, 1, 3, 15, 45, 15, 59, 10, 7, 3,
+ 9, 33, 17, 49, 32, 8, 2, 3, 22, 6,
+ 44, 0, 0, 0, 12, 67, 67, 22, 27, 13,
+ 36, 5, 47, 26, 88, 66, 122, 120, 52, 50,
+ 30, 64, 24, 14, 36, 17, 21, 25, 15, 52,
+ 2, 18, 7, 3, 5, 1, 4, 9, 25, 23,
+ 43, 36, 9, 12, 3, 31, 4, 17, 12, 8,
+ 1, 8, 22, 10, 11, 3, 10, 7, 2, 13,
+ 3, 23, 0, 3, 23, 12, 3, 17, 1, 20,
+ 7, 37, 21, 33, 29, 10, 7, 16, 14, 14,
+ 74, 36, 8, 9, 22, 9, 29, 2, 81, 17,
+ 0, 49, 11, 41, 10, 0, 29, 36, 14, 8,
+ 61, 10, 37, 23, 6, 61, 10, 36, 22, 28,
+ 34, 28, 14, 32, 26, 7, 2, 18, 2, 0,
+ 21, 7, 19, 17, 9, 15, 23, 23, 23, 31,
+ 21, 11, 49, 57, 35, 27, 27, 35, 37, 53,
+ 61, 53, 53, 71, 85, 73, 79, 121, 97, 107,
+ 11, 19, 57, 23, 37, 43, 53, 69, 77, 59,
+ 67, 59, 41, 51, 55, 39, 37, 8, 50, 36,
+ 22, 16, 20, 10, 0, 8, 18, 24, 62, 42,
+ 30, 16, 42, 24, 28, 14, 58, 6, 86, 60,
+ 46, 30, 50, 10, 1, 3, 4, 13, 92, 36,
+ 4, 4, 22, 0, 17, 5, 10, 72, 42, 12,
+ 0, 36, 10, 4, 2, 2, 124, 11, 4, 10,
+ 18, 8, 10, 30, 34, 22, 32, 40, 50, 7,
+ 4, 25, 7, 62, 15, 57, 9, 14, 7, 31,
+ 39, 23, 39, 57, 71, 75, 11, 3, 30, 9,
+ 21, 5, 0, 6, 27, 17, 5, 7, 43, 21,
+ 33, 0, 20, 45, 17, 14, 25, 4, 19, 5,
+ 40, 25, 31, 0, 12, 45, 43, 105, 26, 38,
+ 54, 4, 9, 2, 23, 23, 21, 45, 43, 41,
+ 75, 61, 49, 77, 81, 91, 125, 75, 71, 59,
+ 25, 39, 12, 42, 6, 40, 74, 23, 29, 39,
+ 35, 79, 61, 51, 95, 59, 81, 73, 107, 95,
+ 93, 93, 37, 31, 97, 39, 41, 65, 87, 75,
+ 77, 81, 71, 87, 85, 93, 101, 117, 105, 19,
+ 35, 69, 7, 24, 18, 38, 40, 48, 88, 56,
+ 76, 78, 116, 94, 84, 120, 114, 80, 1, 39,
+ 77, 99, 125, 125, 125, 125, 28, 96, 82, 82,
+ 60, 82, 40, 32, 40, 20, 13, 1, 30, 4,
+ 62, 82, 2, 18, 36, 44, 10, 40, 74, 8,
+ 1, 34, 29, 77, 125, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 46 */
+
+ 42, 8, 37, 42, 8, 37, 34, 52, 56, 24,
+ 15, 71, 9, 6, 104, 28, 104, 15, 46, 60,
+ 1, 39, 4, 69, 103, 9, 39, 125, 125, 125,
+ 124, 34, 11, 46, 60, 1, 9, 34, 40, 13,
+ 6, 0, 1, 17, 47, 15, 59, 10, 7, 3,
+ 9, 33, 17, 49, 34, 8, 2, 3, 24, 6,
+ 44, 0, 0, 0, 12, 67, 67, 24, 29, 13,
+ 36, 5, 47, 30, 92, 68, 124, 124, 54, 52,
+ 34, 66, 26, 16, 40, 15, 21, 23, 13, 52,
+ 2, 18, 7, 3, 5, 1, 8, 9, 27, 25,
+ 45, 36, 9, 12, 3, 31, 6, 17, 14, 8,
+ 1, 8, 22, 8, 11, 3, 10, 7, 2, 13,
+ 5, 23, 0, 5, 25, 10, 5, 19, 1, 22,
+ 7, 39, 21, 31, 29, 10, 7, 16, 14, 14,
+ 76, 36, 8, 9, 24, 9, 31, 2, 83, 17,
+ 0, 49, 13, 43, 10, 0, 31, 36, 14, 8,
+ 65, 10, 39, 23, 6, 61, 8, 36, 22, 28,
+ 34, 28, 14, 32, 26, 7, 0, 18, 2, 0,
+ 21, 9, 21, 19, 11, 17, 25, 25, 25, 31,
+ 23, 11, 51, 61, 35, 29, 31, 39, 41, 57,
+ 65, 57, 57, 75, 89, 75, 81, 125, 101, 109,
+ 11, 19, 57, 25, 39, 45, 57, 73, 81, 61,
+ 69, 61, 41, 51, 55, 39, 37, 10, 50, 36,
+ 22, 16, 20, 10, 0, 8, 20, 24, 62, 42,
+ 30, 16, 44, 26, 30, 16, 62, 6, 86, 60,
+ 46, 30, 52, 10, 1, 3, 4, 13, 92, 36,
+ 2, 4, 22, 0, 17, 3, 10, 70, 42, 10,
+ 1, 36, 10, 4, 4, 2, 124, 9, 6, 10,
+ 20, 10, 12, 34, 36, 24, 34, 44, 52, 7,
+ 6, 25, 7, 66, 15, 57, 9, 14, 7, 33,
+ 39, 23, 41, 59, 73, 77, 11, 5, 30, 11,
+ 23, 5, 1, 6, 29, 17, 7, 7, 45, 21,
+ 35, 0, 20, 47, 17, 14, 27, 4, 21, 5,
+ 40, 27, 33, 1, 10, 47, 43, 107, 24, 36,
+ 54, 2, 11, 1, 27, 27, 25, 51, 47, 45,
+ 81, 67, 51, 83, 87, 97, 125, 81, 77, 63,
+ 27, 41, 12, 44, 6, 42, 78, 27, 33, 43,
+ 39, 83, 65, 55, 99, 63, 83, 75, 109, 99,
+ 95, 95, 39, 33, 99, 41, 45, 67, 91, 79,
+ 81, 85, 75, 91, 89, 95, 105, 121, 107, 21,
+ 37, 71, 7, 26, 18, 38, 40, 48, 90, 58,
+ 78, 80, 118, 96, 86, 124, 116, 80, 3, 43,
+ 83, 103, 125, 125, 125, 125, 30, 98, 84, 82,
+ 60, 84, 42, 32, 40, 20, 13, 0, 30, 6,
+ 64, 84, 2, 18, 36, 44, 10, 40, 76, 8,
+ 1, 32, 33, 83, 125, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 47 */
+
+ 40, 8, 37, 40, 8, 37, 38, 54, 56, 24,
+ 15, 73, 11, 4, 104, 28, 108, 15, 48, 62,
+ 1, 41, 4, 71, 105, 11, 43, 125, 125, 125,
+ 124, 36, 11, 48, 62, 1, 9, 36, 40, 15,
+ 6, 0, 0, 17, 47, 15, 59, 10, 7, 1,
+ 9, 33, 17, 49, 34, 8, 2, 3, 24, 6,
+ 44, 0, 0, 0, 14, 67, 67, 24, 31, 13,
+ 36, 5, 47, 34, 96, 72, 124, 124, 56, 54,
+ 36, 68, 28, 18, 44, 15, 21, 23, 11, 52,
+ 2, 18, 5, 3, 3, 0, 10, 9, 27, 25,
+ 45, 36, 9, 12, 1, 31, 6, 17, 14, 8,
+ 1, 8, 22, 8, 11, 3, 10, 7, 2, 13,
+ 7, 23, 0, 7, 27, 8, 7, 21, 0, 22,
+ 7, 41, 23, 31, 31, 10, 7, 16, 14, 14,
+ 78, 36, 8, 9, 24, 9, 33, 2, 85, 17,
+ 0, 51, 13, 45, 10, 0, 33, 36, 14, 8,
+ 67, 10, 41, 23, 6, 63, 6, 36, 22, 28,
+ 34, 28, 14, 32, 26, 7, 0, 18, 2, 0,
+ 21, 9, 21, 19, 11, 17, 27, 27, 25, 31,
+ 23, 11, 53, 63, 35, 33, 35, 43, 45, 61,
+ 69, 61, 61, 79, 93, 79, 85, 125, 105, 111,
+ 11, 19, 59, 27, 41, 47, 59, 75, 83, 63,
+ 71, 63, 43, 53, 55, 39, 35, 10, 50, 36,
+ 22, 16, 20, 10, 0, 10, 22, 26, 62, 42,
+ 30, 16, 44, 26, 32, 18, 66, 6, 86, 60,
+ 46, 30, 52, 10, 1, 3, 6, 13, 92, 36,
+ 2, 4, 22, 0, 17, 1, 10, 70, 40, 8,
+ 3, 36, 10, 4, 4, 4, 124, 7, 8, 12,
+ 22, 12, 14, 36, 38, 26, 36, 46, 56, 5,
+ 8, 25, 5, 68, 15, 59, 9, 16, 7, 33,
+ 41, 23, 43, 61, 75, 79, 13, 5, 30, 11,
+ 23, 7, 1, 6, 31, 19, 7, 7, 47, 21,
+ 35, 0, 20, 47, 19, 12, 29, 4, 23, 7,
+ 40, 29, 35, 1, 10, 49, 45, 111, 22, 34,
+ 52, 0, 13, 3, 31, 31, 29, 55, 53, 51,
+ 87, 71, 53, 89, 93, 103, 125, 87, 81, 67,
+ 29, 43, 12, 46, 6, 44, 82, 31, 37, 47,
+ 43, 87, 69, 57, 103, 65, 87, 77, 113, 101,
+ 97, 97, 39, 33, 101, 43, 47, 71, 95, 83,
+ 85, 87, 77, 95, 91, 97, 107, 123, 109, 23,
+ 39, 73, 7, 26, 18, 40, 42, 50, 92, 60,
+ 80, 82, 120, 98, 88, 124, 118, 78, 7, 47,
+ 87, 109, 125, 125, 125, 125, 30, 98, 84, 84,
+ 60, 86, 42, 32, 42, 22, 13, 0, 32, 6,
+ 66, 86, 2, 20, 38, 46, 10, 42, 78, 8,
+ 1, 30, 37, 87, 125, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 48 */
+
+ 36, 6, 39, 36, 6, 39, 40, 56, 56, 24,
+ 17, 77, 15, 0, 102, 28, 112, 17, 48, 62,
+ 3, 43, 4, 75, 109, 13, 47, 125, 125, 125,
+ 124, 38, 13, 48, 62, 3, 9, 38, 40, 17,
+ 6, 0, 0, 19, 49, 15, 61, 10, 7, 1,
+ 11, 35, 17, 49, 34, 8, 2, 3, 24, 4,
+ 44, 0, 0, 0, 14, 69, 67, 24, 33, 15,
+ 36, 5, 47, 36, 98, 74, 124, 124, 58, 56,
+ 38, 70, 30, 18, 46, 15, 21, 23, 11, 52,
+ 2, 18, 5, 3, 3, 0, 12, 11, 29, 27,
+ 47, 36, 9, 12, 1, 31, 6, 17, 14, 6,
+ 3, 8, 20, 6, 11, 5, 10, 7, 0, 13,
+ 9, 25, 1, 9, 31, 6, 9, 23, 0, 22,
+ 7, 43, 25, 31, 33, 8, 9, 16, 14, 14,
+ 78, 36, 8, 11, 24, 11, 35, 0, 87, 19,
+ 1, 53, 15, 47, 8, 1, 35, 36, 14, 8,
+ 71, 8, 43, 23, 6, 65, 4, 34, 20, 28,
+ 34, 26, 12, 32, 24, 9, 1, 18, 0, 1,
+ 23, 11, 23, 21, 13, 19, 29, 29, 27, 31,
+ 25, 11, 55, 67, 35, 37, 39, 47, 49, 65,
+ 75, 65, 65, 83, 97, 83, 89, 125, 109, 115,
+ 13, 21, 61, 29, 45, 51, 63, 79, 87, 67,
+ 75, 65, 45, 55, 55, 39, 35, 10, 50, 36,
+ 22, 16, 20, 10, 0, 10, 24, 26, 60, 42,
+ 30, 16, 44, 26, 32, 18, 68, 4, 86, 60,
+ 44, 28, 52, 10, 1, 3, 6, 13, 92, 34,
+ 0, 2, 22, 0, 17, 1, 10, 68, 38, 6,
+ 5, 34, 10, 4, 4, 4, 124, 7, 10, 12,
+ 24, 14, 14, 38, 40, 28, 38, 48, 58, 5,
+ 8, 25, 5, 70, 15, 61, 9, 16, 7, 35,
+ 43, 25, 45, 63, 79, 83, 15, 7, 30, 13,
+ 25, 9, 3, 4, 33, 21, 9, 9, 49, 23,
+ 37, 0, 20, 49, 21, 10, 31, 2, 25, 9,
+ 40, 31, 39, 3, 8, 51, 47, 115, 20, 32,
+ 50, 3, 17, 7, 35, 35, 35, 61, 59, 57,
+ 93, 77, 57, 95, 101, 109, 125, 93, 87, 71,
+ 33, 47, 12, 46, 6, 46, 86, 35, 41, 51,
+ 47, 93, 73, 61, 109, 69, 91, 81, 117, 105,
+ 99, 99, 41, 35, 105, 47, 51, 75, 99, 87,
+ 89, 91, 81, 99, 95, 101, 111, 125, 111, 25,
+ 41, 75, 7, 26, 18, 40, 42, 50, 94, 60,
+ 82, 82, 122, 100, 90, 124, 120, 76, 11, 53,
+ 93, 115, 125, 125, 125, 125, 30, 98, 84, 84,
+ 60, 86, 42, 32, 42, 22, 13, 0, 32, 6,
+ 68, 88, 2, 20, 38, 46, 10, 42, 78, 8,
+ 3, 26, 43, 93, 125, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 49 */
+
+ 34, 6, 39, 34, 6, 39, 44, 60, 58, 26,
+ 17, 79, 17, 1, 102, 28, 118, 17, 50, 64,
+ 3, 43, 6, 77, 111, 13, 49, 125, 125, 125,
+ 124, 42, 13, 50, 64, 3, 7, 42, 42, 17,
+ 8, 2, 2, 19, 49, 13, 61, 10, 5, 0,
+ 11, 35, 15, 49, 36, 10, 4, 1, 26, 4,
+ 44, 0, 0, 0, 16, 69, 67, 26, 33, 15,
+ 36, 3, 45, 40, 102, 78, 124, 124, 62, 58,
+ 42, 74, 34, 20, 50, 13, 19, 21, 9, 54,
+ 4, 20, 3, 1, 1, 2, 16, 11, 29, 27,
+ 47, 38, 9, 14, 0, 29, 8, 15, 16, 6,
+ 3, 10, 20, 6, 9, 5, 12, 7, 0, 11,
+ 9, 25, 1, 9, 33, 6, 9, 23, 2, 24,
+ 5, 43, 25, 29, 33, 8, 9, 18, 14, 14,
+ 80, 38, 10, 11, 26, 11, 35, 0, 87, 19,
+ 1, 53, 15, 49, 8, 1, 35, 38, 16, 8,
+ 73, 8, 43, 21, 6, 65, 4, 34, 20, 28,
+ 34, 26, 12, 32, 24, 9, 1, 18, 0, 1,
+ 23, 11, 23, 21, 13, 19, 29, 29, 27, 29,
+ 25, 9, 55, 69, 33, 39, 41, 49, 51, 69,
+ 79, 67, 67, 87, 99, 85, 91, 125, 111, 117,
+ 13, 21, 61, 31, 47, 53, 65, 81, 89, 69,
+ 77, 65, 45, 55, 53, 37, 33, 12, 52, 36,
+ 22, 18, 22, 12, 2, 12, 28, 28, 60, 42,
+ 30, 16, 46, 28, 34, 20, 72, 4, 88, 62,
+ 44, 28, 54, 10, 1, 1, 8, 13, 94, 34,
+ 0, 2, 24, 2, 15, 0, 10, 68, 38, 6,
+ 5, 34, 10, 6, 6, 6, 124, 5, 12, 14,
+ 28, 18, 16, 42, 44, 32, 42, 52, 62, 3,
+ 10, 23, 3, 74, 13, 61, 7, 18, 5, 35,
+ 43, 25, 45, 63, 81, 85, 15, 7, 32, 13,
+ 25, 9, 3, 4, 33, 21, 9, 9, 49, 23,
+ 37, 2, 22, 49, 21, 10, 31, 2, 25, 9,
+ 42, 31, 41, 3, 8, 51, 47, 117, 20, 32,
+ 50, 5, 19, 9, 39, 39, 39, 65, 63, 61,
+ 97, 81, 59, 99, 107, 115, 125, 97, 91, 73,
+ 35, 49, 12, 48, 8, 50, 92, 37, 43, 53,
+ 49, 97, 75, 63, 113, 71, 93, 83, 119, 107,
+ 101, 99, 41, 35, 107, 49, 53, 77, 101, 89,
+ 91, 93, 83, 101, 97, 103, 113, 125, 111, 25,
+ 41, 75, 5, 28, 20, 42, 44, 52, 98, 62,
+ 84, 84, 124, 104, 92, 124, 124, 76, 13, 57,
+ 97, 119, 125, 125, 125, 125, 32, 100, 86, 86,
+ 62, 88, 44, 34, 44, 24, 11, 2, 34, 8,
+ 72, 92, 4, 22, 40, 48, 12, 44, 80, 8,
+ 3, 24, 47, 97, 125, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 50 */
+
+ 32, 6, 39, 32, 6, 39, 48, 62, 58, 26,
+ 17, 81, 19, 3, 102, 28, 122, 17, 52, 66,
+ 3, 45, 6, 79, 113, 15, 53, 125, 125, 125,
+ 124, 44, 13, 52, 66, 3, 7, 44, 42, 19,
+ 8, 2, 4, 19, 49, 13, 61, 10, 5, 2,
+ 11, 35, 15, 49, 36, 10, 4, 1, 26, 4,
+ 44, 0, 0, 0, 18, 69, 67, 26, 35, 15,
+ 36, 3, 45, 44, 106, 82, 124, 124, 64, 60,
+ 44, 76, 36, 22, 54, 13, 19, 21, 7, 54,
+ 4, 20, 1, 1, 0, 2, 18, 11, 29, 27,
+ 47, 38, 9, 14, 0, 29, 8, 15, 16, 6,
+ 3, 10, 20, 6, 9, 5, 12, 7, 0, 11,
+ 11, 25, 1, 11, 35, 4, 11, 25, 4, 24,
+ 5, 45, 27, 29, 35, 8, 9, 18, 14, 14,
+ 82, 38, 10, 11, 26, 11, 37, 0, 89, 19,
+ 1, 55, 15, 51, 8, 1, 37, 38, 16, 8,
+ 75, 8, 45, 21, 6, 67, 2, 34, 20, 28,
+ 34, 26, 12, 32, 24, 9, 1, 18, 0, 1,
+ 23, 11, 23, 21, 13, 19, 31, 31, 29, 29,
+ 25, 9, 57, 71, 33, 43, 45, 53, 55, 73,
+ 83, 71, 71, 91, 103, 89, 95, 125, 115, 119,
+ 13, 21, 63, 33, 49, 55, 67, 85, 91, 71,
+ 79, 67, 47, 57, 53, 37, 31, 12, 52, 36,
+ 22, 18, 22, 12, 2, 12, 30, 30, 60, 42,
+ 30, 16, 46, 28, 36, 22, 76, 4, 88, 62,
+ 44, 28, 54, 10, 1, 1, 10, 13, 94, 34,
+ 0, 2, 24, 2, 15, 2, 10, 66, 36, 4,
+ 7, 34, 10, 6, 6, 6, 124, 3, 14, 16,
+ 30, 20, 18, 44, 46, 34, 44, 54, 66, 1,
+ 12, 23, 1, 76, 13, 63, 7, 20, 5, 35,
+ 45, 25, 47, 65, 83, 87, 17, 7, 32, 13,
+ 27, 11, 3, 4, 35, 23, 9, 9, 51, 23,
+ 37, 2, 22, 49, 23, 8, 33, 2, 27, 11,
+ 42, 33, 43, 3, 8, 53, 49, 121, 18, 30,
+ 48, 7, 21, 11, 43, 43, 43, 69, 69, 67,
+ 103, 85, 61, 105, 113, 121, 125, 103, 95, 77,
+ 37, 51, 12, 50, 8, 52, 96, 41, 47, 57,
+ 53, 101, 79, 67, 117, 73, 97, 85, 123, 109,
+ 103, 101, 43, 35, 109, 51, 55, 81, 105, 93,
+ 95, 95, 87, 105, 99, 105, 115, 125, 113, 27,
+ 43, 77, 5, 28, 20, 44, 44, 54, 100, 64,
+ 86, 86, 124, 106, 94, 124, 124, 74, 17, 61,
+ 101, 125, 125, 125, 125, 125, 32, 100, 86, 86,
+ 62, 90, 44, 34, 44, 24, 11, 2, 36, 8,
+ 74, 94, 4, 24, 42, 50, 12, 44, 82, 8,
+ 3, 22, 51, 101, 125, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 0, qp = 51 */
+
+ 30, 6, 39, 30, 6, 39, 52, 66, 60, 26,
+ 19, 85, 23, 5, 102, 28, 124, 17, 54, 68,
+ 3, 47, 6, 81, 115, 15, 57, 125, 125, 125,
+ 124, 46, 13, 54, 68, 3, 5, 46, 44, 19,
+ 8, 4, 6, 21, 51, 13, 61, 10, 5, 2,
+ 11, 35, 15, 49, 38, 10, 4, 1, 28, 4,
+ 44, 0, 0, 0, 18, 69, 67, 28, 37, 15,
+ 36, 3, 45, 48, 110, 84, 124, 124, 66, 62,
+ 48, 78, 38, 24, 58, 11, 19, 19, 5, 54,
+ 4, 20, 1, 1, 0, 4, 22, 11, 31, 29,
+ 49, 38, 9, 14, 2, 29, 10, 15, 18, 6,
+ 3, 10, 20, 4, 9, 5, 12, 7, 0, 11,
+ 13, 25, 1, 13, 37, 2, 13, 27, 4, 26,
+ 5, 47, 27, 27, 35, 8, 9, 18, 14, 14,
+ 84, 38, 10, 11, 28, 11, 39, 0, 91, 19,
+ 1, 55, 17, 53, 8, 1, 39, 38, 16, 8,
+ 79, 8, 47, 21, 6, 67, 0, 34, 20, 28,
+ 34, 26, 12, 32, 24, 9, 3, 18, 0, 1,
+ 23, 13, 25, 23, 15, 21, 33, 33, 29, 29,
+ 27, 9, 59, 75, 33, 45, 49, 57, 59, 77,
+ 87, 75, 75, 95, 107, 91, 97, 125, 119, 121,
+ 13, 21, 63, 35, 51, 57, 71, 87, 95, 73,
+ 81, 69, 47, 57, 53, 37, 31, 14, 52, 36,
+ 22, 18, 22, 12, 2, 14, 32, 30, 60, 42,
+ 30, 16, 48, 30, 38, 24, 80, 4, 88, 62,
+ 44, 28, 56, 10, 1, 1, 10, 13, 94, 34,
+ 1, 2, 24, 2, 15, 4, 10, 66, 36, 2,
+ 9, 34, 10, 6, 8, 8, 124, 1, 16, 16,
+ 32, 22, 20, 48, 48, 36, 46, 58, 68, 1,
+ 14, 23, 1, 80, 13, 63, 7, 20, 5, 37,
+ 45, 25, 49, 67, 85, 89, 17, 9, 32, 15,
+ 27, 11, 5, 4, 37, 23, 11, 9, 53, 23,
+ 39, 2, 22, 51, 23, 8, 35, 2, 29, 11,
+ 42, 35, 45, 5, 6, 55, 49, 123, 16, 28,
+ 48, 9, 23, 15, 47, 47, 47, 75, 73, 71,
+ 109, 91, 63, 111, 119, 125, 125, 109, 101, 81,
+ 39, 53, 12, 52, 8, 54, 100, 45, 51, 61,
+ 57, 105, 83, 69, 121, 77, 99, 87, 125, 113,
+ 105, 103, 43, 37, 111, 53, 59, 83, 109, 97,
+ 99, 99, 89, 109, 103, 107, 119, 125, 115, 29,
+ 45, 79, 5, 30, 20, 44, 46, 54, 102, 66,
+ 88, 88, 124, 108, 96, 124, 124, 74, 19, 65,
+ 107, 125, 125, 125, 125, 125, 34, 102, 88, 88,
+ 62, 92, 46, 34, 46, 26, 11, 4, 36, 10,
+ 76, 96, 4, 24, 42, 50, 12, 46, 84, 8,
+ 3, 20, 55, 107, 125, 125, 125, 125, 125, 125,
+ },
+
+ },
+
+ {
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 0 */
+
+ 124, 18, 21, 124, 18, 21, 125, 81, 20, 18,
+ 24, 76, 124, 124, 108, 44, 109, 3, 15, 31,
+ 22, 26, 13, 18, 58, 82, 124, 122, 54, 11,
+ 125, 75, 25, 15, 31, 22, 11, 53, 22, 40,
+ 11, 37, 65, 8, 23, 47, 73, 14, 21, 43,
+ 8, 35, 45, 63, 5, 27, 13, 45, 17, 4,
+ 44, 0, 0, 0, 39, 45, 67, 17, 44, 2,
+ 96, 24, 33, 125, 55, 65, 35, 69, 77, 67,
+ 111, 71, 93, 77, 125, 33, 51, 61, 57, 48,
+ 3, 41, 125, 19, 81, 55, 125, 16, 14, 16,
+ 4, 20, 9, 21, 49, 79, 55, 51, 57, 25,
+ 47, 93, 83, 29, 97, 71, 125, 125, 125, 125,
+ 5, 29, 15, 17, 8, 16, 13, 23, 51, 111,
+ 23, 86, 82, 125, 18, 4, 10, 6, 4, 7,
+ 41, 21, 3, 22, 12, 4, 11, 13, 16, 15,
+ 10, 4, 44, 76, 62, 40, 32, 38, 24, 34,
+ 50, 5, 50, 42, 58, 51, 36, 70, 64, 124,
+ 124, 96, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 106, 124, 124, 124, 124, 124, 124, 124,
+ 112, 124, 124, 124, 54, 124, 124, 124, 124, 124,
+ 124, 124, 124, 124, 124, 124, 106, 90, 76, 44,
+ 23, 17, 27, 56, 64, 56, 66, 36, 42, 36,
+ 74, 18, 5, 14, 19, 7, 105, 97, 15, 4,
+ 20, 5, 27, 33, 41, 47, 125, 75, 48, 20,
+ 4, 23, 27, 55, 87, 95, 117, 25, 38, 22,
+ 12, 10, 17, 11, 11, 21, 45, 5, 58, 62,
+ 64, 22, 16, 7, 19, 51, 22, 118, 110, 110,
+ 88, 52, 4, 19, 13, 29, 124, 125, 121, 93,
+ 125, 121, 83, 115, 107, 77, 107, 105, 117, 63,
+ 73, 63, 95, 101, 51, 33, 37, 43, 35, 17,
+ 1, 7, 14, 11, 11, 11, 11, 7, 27, 1,
+ 4, 7, 1, 12, 3, 5, 2, 24, 5, 15,
+ 23, 13, 17, 6, 52, 32, 56, 52, 44, 44,
+ 30, 44, 44, 8, 26, 46, 5, 26, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 108, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 98, 74, 52, 16, 3, 124, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 86,
+ 66, 38, 30, 28, 36, 82, 82, 84, 86, 70,
+ 78, 58, 42, 48, 26, 13, 18, 15, 39, 62,
+ 28, 18, 43, 35, 27, 35, 33, 19, 21, 39,
+ 15, 7, 4, 5, 5, 8, 8, 124, 124, 124,
+ 124, 124, 120, 106, 72, 12, 15, 78, 54, 42,
+ 22, 12, 0, 3, 7, 37, 35, 25, 17, 29,
+ 17, 9, 13, 25, 5, 2, 12, 4, 6, 18,
+ 10, 124, 124, 124, 124, 124, 120, 106, 72, 12,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 1 */
+
+ 124, 18, 21, 124, 18, 21, 123, 77, 22, 20,
+ 24, 74, 122, 124, 110, 44, 105, 3, 13, 29,
+ 22, 26, 11, 18, 56, 80, 122, 116, 50, 13,
+ 121, 73, 23, 13, 29, 22, 11, 51, 22, 40,
+ 9, 35, 63, 8, 23, 45, 71, 14, 19, 41,
+ 8, 33, 43, 61, 3, 25, 13, 43, 15, 4,
+ 44, 0, 0, 0, 37, 45, 67, 15, 44, 2,
+ 96, 24, 33, 121, 51, 61, 31, 63, 73, 63,
+ 107, 67, 89, 73, 121, 33, 49, 59, 55, 48,
+ 3, 39, 121, 17, 79, 53, 123, 16, 14, 16,
+ 4, 22, 9, 19, 47, 77, 53, 49, 55, 23,
+ 45, 89, 79, 27, 93, 67, 117, 117, 119, 121,
+ 3, 27, 13, 15, 8, 18, 11, 21, 49, 105,
+ 21, 82, 80, 121, 18, 6, 10, 8, 6, 5,
+ 37, 19, 1, 22, 12, 4, 9, 11, 14, 13,
+ 10, 4, 44, 74, 62, 40, 32, 38, 24, 34,
+ 48, 3, 50, 42, 58, 51, 36, 70, 64, 124,
+ 124, 94, 124, 124, 124, 122, 124, 124, 124, 124,
+ 124, 124, 104, 124, 124, 124, 124, 124, 124, 124,
+ 108, 124, 120, 124, 52, 124, 124, 124, 124, 124,
+ 124, 124, 124, 124, 124, 122, 104, 88, 74, 42,
+ 23, 17, 27, 56, 62, 54, 64, 34, 40, 34,
+ 72, 16, 5, 12, 19, 7, 103, 93, 13, 6,
+ 20, 3, 25, 31, 39, 45, 121, 71, 50, 22,
+ 6, 21, 25, 51, 83, 91, 113, 23, 40, 24,
+ 14, 12, 15, 9, 9, 19, 43, 5, 60, 62,
+ 64, 22, 18, 5, 19, 49, 22, 118, 110, 108,
+ 86, 52, 6, 17, 11, 27, 124, 121, 117, 89,
+ 121, 117, 79, 111, 103, 73, 103, 101, 111, 61,
+ 71, 61, 91, 97, 49, 31, 35, 41, 33, 15,
+ 1, 7, 14, 11, 11, 11, 9, 5, 25, 0,
+ 4, 5, 0, 12, 1, 3, 2, 24, 3, 13,
+ 21, 11, 15, 6, 50, 32, 54, 52, 44, 44,
+ 30, 44, 44, 8, 26, 44, 5, 24, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 104, 124, 124, 124, 124, 124, 124, 124,
+ 122, 124, 96, 72, 50, 16, 3, 124, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 84,
+ 64, 36, 30, 28, 34, 80, 80, 82, 82, 68,
+ 76, 56, 40, 46, 24, 13, 16, 15, 39, 60,
+ 26, 16, 41, 33, 25, 33, 29, 15, 19, 37,
+ 13, 5, 6, 3, 3, 8, 8, 124, 124, 124,
+ 124, 120, 112, 98, 64, 8, 13, 78, 56, 44,
+ 24, 14, 2, 1, 5, 35, 33, 23, 15, 27,
+ 15, 7, 11, 23, 3, 4, 12, 6, 8, 18,
+ 10, 124, 124, 124, 124, 120, 112, 98, 64, 8,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 2 */
+
+ 124, 18, 21, 124, 18, 21, 119, 75, 22, 20,
+ 24, 72, 118, 122, 110, 44, 101, 3, 13, 27,
+ 22, 24, 11, 16, 52, 78, 116, 108, 44, 17,
+ 115, 71, 23, 13, 27, 22, 11, 49, 22, 38,
+ 9, 35, 61, 8, 23, 45, 71, 14, 19, 41,
+ 8, 33, 43, 61, 3, 25, 13, 43, 15, 4,
+ 44, 0, 0, 0, 35, 45, 67, 15, 42, 2,
+ 94, 24, 33, 117, 49, 59, 27, 59, 71, 61,
+ 103, 65, 87, 71, 117, 33, 49, 59, 55, 48,
+ 3, 37, 117, 17, 77, 51, 119, 16, 14, 16,
+ 2, 22, 9, 19, 45, 75, 51, 47, 53, 23,
+ 43, 87, 77, 25, 91, 65, 107, 109, 113, 115,
+ 3, 27, 13, 15, 8, 18, 11, 21, 49, 101,
+ 21, 78, 76, 115, 18, 6, 10, 8, 6, 5,
+ 33, 17, 1, 22, 12, 4, 7, 9, 12, 13,
+ 10, 4, 42, 72, 60, 40, 30, 38, 24, 34,
+ 46, 3, 48, 40, 56, 51, 36, 68, 62, 124,
+ 124, 92, 120, 124, 124, 118, 124, 124, 124, 124,
+ 124, 124, 100, 124, 124, 124, 124, 124, 124, 124,
+ 104, 124, 116, 124, 48, 124, 124, 124, 124, 124,
+ 124, 124, 124, 124, 124, 118, 100, 84, 70, 38,
+ 23, 17, 29, 54, 60, 52, 62, 32, 38, 32,
+ 68, 14, 5, 10, 21, 9, 101, 91, 11, 6,
+ 20, 3, 23, 29, 37, 43, 117, 69, 50, 22,
+ 6, 19, 23, 49, 79, 87, 109, 21, 42, 26,
+ 16, 14, 13, 9, 9, 19, 41, 5, 62, 62,
+ 62, 22, 18, 5, 19, 49, 22, 118, 108, 106,
+ 84, 52, 6, 17, 11, 27, 124, 119, 115, 87,
+ 117, 113, 77, 107, 99, 71, 99, 97, 107, 59,
+ 69, 61, 89, 93, 49, 31, 35, 39, 33, 15,
+ 1, 7, 12, 11, 11, 11, 9, 5, 23, 0,
+ 4, 5, 0, 12, 1, 3, 2, 22, 3, 13,
+ 21, 11, 13, 4, 48, 32, 52, 50, 42, 42,
+ 30, 42, 42, 8, 26, 42, 5, 22, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 100, 124, 124, 124, 124, 124, 124, 124,
+ 118, 118, 92, 68, 48, 14, 5, 124, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 80,
+ 60, 32, 28, 26, 30, 78, 78, 78, 78, 64,
+ 72, 52, 38, 42, 22, 15, 14, 17, 41, 56,
+ 24, 14, 41, 33, 23, 33, 27, 13, 19, 35,
+ 11, 3, 6, 3, 1, 8, 8, 124, 124, 124,
+ 124, 114, 104, 90, 56, 2, 13, 78, 56, 44,
+ 24, 16, 2, 1, 5, 35, 33, 23, 15, 27,
+ 13, 5, 11, 23, 3, 4, 12, 6, 10, 18,
+ 10, 124, 124, 124, 124, 114, 104, 90, 56, 2,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 3 */
+
+ 124, 18, 21, 124, 18, 21, 115, 71, 24, 20,
+ 22, 68, 114, 120, 110, 44, 97, 3, 11, 25,
+ 22, 24, 11, 16, 50, 76, 112, 102, 40, 19,
+ 109, 69, 23, 11, 25, 22, 13, 47, 22, 38,
+ 9, 35, 61, 8, 23, 45, 71, 14, 19, 39,
+ 8, 33, 41, 61, 3, 25, 13, 43, 15, 4,
+ 44, 0, 0, 0, 35, 45, 67, 13, 40, 2,
+ 92, 22, 33, 111, 47, 57, 25, 55, 67, 57,
+ 99, 61, 85, 69, 113, 33, 49, 57, 55, 48,
+ 3, 35, 113, 17, 75, 51, 115, 16, 12, 14,
+ 2, 22, 9, 17, 45, 73, 49, 47, 51, 21,
+ 41, 83, 73, 25, 89, 63, 97, 99, 107, 109,
+ 3, 27, 13, 13, 8, 18, 9, 19, 47, 97,
+ 21, 74, 72, 109, 18, 6, 10, 8, 6, 3,
+ 31, 15, 1, 22, 12, 4, 7, 7, 10, 13,
+ 10, 2, 42, 70, 60, 40, 30, 38, 24, 34,
+ 44, 3, 46, 38, 56, 51, 36, 68, 62, 124,
+ 124, 90, 116, 124, 124, 114, 124, 124, 124, 124,
+ 124, 122, 96, 124, 124, 124, 124, 124, 124, 120,
+ 100, 124, 112, 124, 44, 124, 124, 124, 124, 124,
+ 124, 124, 124, 124, 124, 114, 96, 80, 68, 34,
+ 23, 17, 29, 52, 58, 50, 60, 30, 36, 30,
+ 64, 12, 7, 8, 23, 9, 101, 87, 9, 8,
+ 20, 3, 21, 29, 37, 43, 113, 67, 50, 22,
+ 8, 17, 21, 47, 77, 85, 105, 19, 42, 26,
+ 16, 14, 11, 7, 9, 19, 41, 5, 62, 62,
+ 60, 22, 18, 5, 19, 47, 22, 116, 108, 104,
+ 82, 52, 6, 17, 11, 27, 124, 117, 111, 85,
+ 115, 111, 75, 103, 95, 69, 97, 93, 103, 59,
+ 67, 59, 87, 89, 47, 31, 35, 39, 31, 15,
+ 1, 7, 12, 11, 11, 13, 7, 3, 21, 0,
+ 4, 3, 0, 12, 1, 3, 2, 22, 3, 13,
+ 21, 11, 13, 2, 46, 32, 50, 48, 40, 42,
+ 30, 40, 40, 8, 26, 40, 5, 20, 124, 124,
+ 122, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 96, 124, 124, 124, 124, 124, 124, 124,
+ 114, 114, 88, 64, 44, 12, 7, 124, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 118, 120, 76,
+ 56, 30, 26, 24, 28, 74, 74, 74, 74, 62,
+ 68, 48, 36, 40, 20, 17, 12, 19, 43, 54,
+ 22, 12, 41, 31, 23, 31, 25, 11, 19, 35,
+ 11, 3, 6, 1, 0, 8, 8, 124, 124, 124,
+ 118, 108, 96, 82, 48, 3, 13, 78, 56, 44,
+ 24, 16, 4, 1, 5, 33, 33, 23, 13, 25,
+ 11, 3, 11, 21, 3, 4, 12, 6, 10, 18,
+ 10, 124, 124, 124, 118, 108, 96, 82, 48, 3,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 4 */
+
+ 124, 18, 21, 124, 18, 21, 113, 69, 24, 20,
+ 22, 66, 110, 118, 110, 42, 93, 3, 11, 23,
+ 20, 22, 11, 14, 46, 74, 106, 94, 34, 23,
+ 103, 67, 23, 11, 23, 20, 13, 45, 22, 36,
+ 9, 33, 59, 8, 23, 45, 71, 14, 19, 39,
+ 8, 33, 41, 59, 3, 25, 13, 43, 13, 4,
+ 44, 0, 0, 0, 33, 47, 67, 13, 38, 2,
+ 90, 22, 33, 107, 45, 55, 21, 51, 65, 55,
+ 97, 59, 81, 67, 109, 33, 47, 57, 55, 48,
+ 3, 33, 109, 17, 75, 49, 111, 16, 12, 14,
+ 0, 22, 9, 17, 43, 71, 47, 45, 49, 21,
+ 41, 81, 71, 23, 87, 61, 87, 91, 101, 103,
+ 3, 25, 13, 13, 8, 18, 9, 19, 47, 93,
+ 21, 70, 68, 105, 18, 8, 10, 8, 6, 3,
+ 27, 13, 0, 20, 12, 4, 5, 7, 8, 13,
+ 10, 2, 40, 68, 58, 38, 28, 38, 24, 34,
+ 42, 3, 44, 36, 54, 51, 34, 66, 60, 124,
+ 124, 88, 112, 124, 124, 110, 124, 124, 124, 124,
+ 124, 118, 92, 118, 124, 124, 124, 124, 124, 114,
+ 96, 124, 108, 124, 42, 124, 124, 124, 124, 124,
+ 124, 124, 124, 124, 120, 110, 92, 76, 64, 30,
+ 23, 17, 31, 50, 56, 48, 56, 28, 32, 28,
+ 62, 10, 7, 6, 23, 11, 99, 85, 7, 8,
+ 20, 1, 21, 27, 35, 41, 109, 63, 50, 24,
+ 8, 17, 19, 45, 73, 81, 103, 19, 44, 28,
+ 18, 16, 9, 7, 9, 17, 39, 5, 64, 62,
+ 60, 20, 18, 5, 19, 47, 22, 116, 106, 102,
+ 80, 52, 6, 15, 11, 27, 124, 113, 109, 83,
+ 111, 107, 73, 101, 93, 67, 93, 91, 99, 57,
+ 65, 59, 85, 87, 47, 31, 35, 37, 31, 15,
+ 3, 7, 10, 11, 11, 13, 7, 3, 19, 0,
+ 4, 3, 0, 12, 1, 3, 2, 20, 3, 13,
+ 21, 11, 11, 0, 44, 32, 48, 48, 38, 40,
+ 30, 38, 38, 8, 26, 38, 5, 18, 124, 124,
+ 120, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 92, 124, 124, 124, 124, 124, 124, 124,
+ 108, 108, 84, 60, 42, 10, 7, 124, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 114, 114, 72,
+ 52, 26, 24, 24, 24, 72, 72, 72, 70, 58,
+ 64, 46, 34, 36, 18, 19, 8, 21, 43, 50,
+ 18, 8, 39, 31, 21, 31, 23, 9, 19, 33,
+ 9, 1, 6, 1, 2, 8, 8, 124, 124, 124,
+ 112, 100, 88, 72, 40, 9, 11, 78, 56, 44,
+ 24, 18, 4, 1, 5, 33, 33, 23, 13, 25,
+ 11, 1, 11, 21, 1, 6, 12, 6, 12, 18,
+ 10, 124, 124, 124, 112, 100, 88, 72, 40, 9,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 5 */
+
+ 124, 18, 21, 124, 18, 21, 109, 65, 24, 20,
+ 20, 64, 106, 116, 110, 42, 89, 3, 11, 21,
+ 20, 22, 11, 12, 42, 72, 102, 88, 30, 27,
+ 97, 65, 21, 11, 21, 20, 13, 43, 22, 36,
+ 9, 33, 57, 8, 23, 45, 71, 14, 19, 39,
+ 8, 33, 39, 59, 3, 25, 13, 43, 13, 4,
+ 44, 0, 0, 0, 33, 47, 67, 11, 36, 2,
+ 88, 20, 33, 101, 43, 53, 17, 47, 61, 51,
+ 93, 55, 79, 65, 103, 33, 47, 55, 53, 48,
+ 3, 31, 105, 17, 73, 49, 107, 16, 10, 12,
+ 0, 22, 9, 15, 43, 69, 45, 45, 47, 19,
+ 39, 77, 67, 21, 83, 59, 77, 83, 95, 97,
+ 1, 25, 11, 11, 8, 18, 7, 19, 45, 89,
+ 21, 66, 64, 99, 18, 8, 10, 8, 6, 1,
+ 25, 11, 0, 20, 12, 4, 5, 5, 6, 11,
+ 10, 0, 40, 66, 58, 38, 28, 38, 24, 34,
+ 40, 1, 42, 36, 54, 51, 34, 64, 58, 124,
+ 124, 86, 110, 124, 124, 106, 124, 124, 124, 124,
+ 122, 114, 88, 114, 124, 120, 124, 124, 124, 110,
+ 92, 124, 104, 124, 38, 124, 124, 124, 124, 124,
+ 124, 124, 124, 122, 116, 106, 88, 74, 60, 26,
+ 23, 17, 31, 48, 54, 46, 54, 26, 30, 26,
+ 58, 8, 9, 4, 25, 13, 97, 81, 5, 10,
+ 20, 1, 19, 27, 35, 39, 105, 61, 50, 24,
+ 10, 15, 17, 43, 71, 79, 99, 17, 46, 30,
+ 20, 16, 7, 5, 7, 17, 39, 5, 64, 62,
+ 58, 20, 18, 5, 19, 45, 22, 114, 104, 100,
+ 78, 52, 6, 15, 11, 25, 124, 111, 105, 79,
+ 107, 105, 71, 97, 89, 65, 89, 87, 95, 55,
+ 63, 57, 83, 83, 47, 31, 33, 37, 29, 15,
+ 3, 7, 10, 11, 11, 15, 5, 3, 17, 0,
+ 4, 3, 0, 12, 1, 3, 2, 20, 3, 13,
+ 21, 11, 11, 1, 42, 32, 46, 46, 38, 38,
+ 30, 38, 36, 8, 26, 36, 5, 16, 124, 124,
+ 118, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 88, 124, 124, 124, 124, 124, 124, 122,
+ 104, 104, 80, 58, 38, 10, 9, 124, 124, 124,
+ 124, 124, 124, 124, 124, 124, 122, 110, 108, 68,
+ 48, 24, 24, 22, 20, 70, 68, 68, 66, 54,
+ 60, 42, 32, 34, 16, 19, 6, 23, 45, 48,
+ 16, 6, 39, 31, 19, 29, 21, 7, 17, 31,
+ 9, 1, 6, 0, 4, 8, 8, 124, 124, 118,
+ 106, 94, 80, 64, 32, 15, 11, 78, 56, 44,
+ 24, 18, 4, 0, 3, 31, 33, 23, 11, 25,
+ 9, 0, 11, 21, 1, 6, 12, 8, 12, 18,
+ 10, 124, 124, 118, 106, 94, 80, 64, 32, 15,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 6 */
+
+ 124, 18, 23, 124, 18, 23, 105, 63, 26, 20,
+ 20, 60, 102, 114, 110, 42, 87, 3, 9, 21,
+ 20, 20, 9, 12, 40, 68, 96, 80, 24, 29,
+ 93, 63, 21, 9, 21, 20, 15, 43, 22, 34,
+ 9, 33, 57, 8, 23, 43, 69, 14, 17, 37,
+ 8, 31, 39, 59, 3, 25, 13, 43, 13, 4,
+ 44, 0, 0, 0, 31, 47, 67, 11, 36, 0,
+ 88, 20, 33, 97, 41, 51, 15, 41, 59, 49,
+ 89, 53, 77, 63, 99, 33, 47, 55, 53, 48,
+ 3, 29, 99, 17, 71, 47, 103, 14, 10, 12,
+ 1, 24, 9, 15, 41, 69, 45, 43, 45, 19,
+ 37, 75, 65, 21, 81, 57, 67, 73, 89, 91,
+ 1, 25, 11, 11, 8, 18, 7, 17, 45, 85,
+ 19, 62, 60, 93, 18, 8, 10, 8, 8, 1,
+ 21, 9, 0, 20, 12, 4, 3, 3, 4, 11,
+ 10, 0, 38, 64, 56, 38, 26, 38, 24, 34,
+ 36, 1, 40, 34, 52, 51, 34, 64, 58, 124,
+ 124, 84, 106, 124, 124, 102, 124, 124, 124, 124,
+ 114, 110, 86, 110, 124, 116, 124, 124, 124, 104,
+ 88, 124, 100, 124, 34, 124, 124, 124, 124, 124,
+ 124, 124, 124, 118, 112, 100, 84, 70, 58, 24,
+ 23, 17, 33, 46, 52, 44, 52, 24, 28, 24,
+ 54, 6, 9, 2, 27, 13, 97, 79, 3, 10,
+ 20, 1, 17, 25, 33, 39, 101, 59, 52, 24,
+ 10, 13, 15, 41, 67, 75, 95, 15, 46, 30,
+ 20, 18, 5, 5, 7, 17, 37, 5, 66, 62,
+ 56, 20, 18, 5, 19, 45, 20, 114, 104, 98,
+ 76, 50, 6, 15, 11, 25, 124, 109, 103, 77,
+ 105, 101, 69, 93, 85, 63, 87, 83, 91, 55,
+ 61, 57, 81, 79, 45, 31, 33, 35, 29, 15,
+ 3, 7, 8, 11, 11, 15, 5, 1, 15, 0,
+ 4, 1, 2, 12, 0, 1, 2, 18, 3, 13,
+ 21, 11, 9, 3, 40, 32, 44, 44, 36, 38,
+ 30, 36, 36, 8, 24, 32, 7, 14, 124, 124,
+ 116, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 84, 124, 124, 124, 124, 124, 124, 116,
+ 100, 98, 76, 54, 36, 8, 11, 124, 124, 124,
+ 124, 124, 124, 124, 124, 122, 116, 104, 102, 64,
+ 46, 20, 22, 20, 18, 66, 66, 64, 62, 52,
+ 56, 38, 30, 30, 14, 21, 4, 25, 47, 44,
+ 14, 4, 39, 29, 19, 29, 19, 5, 17, 31,
+ 7, 0, 6, 0, 6, 8, 8, 124, 124, 114,
+ 100, 88, 72, 56, 24, 21, 11, 78, 56, 44,
+ 24, 20, 6, 0, 3, 31, 31, 21, 11, 23,
+ 7, 2, 9, 19, 1, 6, 12, 8, 14, 18,
+ 10, 124, 124, 114, 100, 88, 72, 56, 24, 21,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 7 */
+
+ 124, 18, 23, 124, 18, 23, 101, 59, 26, 20,
+ 18, 58, 98, 112, 110, 42, 83, 3, 9, 19,
+ 18, 20, 9, 10, 36, 66, 92, 74, 20, 33,
+ 87, 61, 21, 9, 19, 18, 15, 41, 22, 34,
+ 9, 31, 55, 8, 23, 43, 69, 14, 17, 37,
+ 8, 31, 37, 57, 3, 25, 13, 43, 11, 4,
+ 44, 0, 0, 0, 31, 47, 67, 9, 34, 0,
+ 86, 18, 33, 91, 39, 49, 11, 37, 55, 45,
+ 87, 49, 73, 61, 95, 33, 45, 53, 53, 48,
+ 3, 27, 95, 17, 69, 47, 99, 14, 8, 10,
+ 1, 24, 9, 13, 41, 67, 43, 43, 43, 17,
+ 35, 71, 61, 19, 79, 55, 57, 65, 83, 85,
+ 1, 23, 11, 9, 8, 18, 5, 17, 43, 81,
+ 19, 58, 56, 87, 18, 10, 10, 8, 8, 0,
+ 19, 7, 2, 18, 12, 4, 3, 3, 2, 11,
+ 10, 1, 38, 62, 56, 36, 26, 38, 24, 34,
+ 34, 1, 38, 32, 52, 51, 34, 62, 56, 120,
+ 124, 82, 102, 124, 124, 98, 124, 122, 124, 124,
+ 108, 106, 82, 104, 124, 110, 124, 124, 124, 98,
+ 84, 124, 96, 124, 32, 124, 124, 124, 124, 124,
+ 124, 124, 124, 114, 106, 96, 80, 66, 54, 20,
+ 23, 17, 33, 44, 50, 42, 48, 22, 26, 22,
+ 52, 4, 11, 0, 27, 15, 95, 75, 1, 12,
+ 20, 0, 17, 25, 33, 37, 97, 55, 52, 26,
+ 12, 13, 13, 39, 65, 73, 91, 15, 48, 32,
+ 22, 18, 3, 3, 7, 15, 37, 5, 66, 62,
+ 56, 18, 18, 5, 19, 43, 20, 112, 102, 96,
+ 74, 50, 6, 13, 11, 25, 124, 105, 99, 75,
+ 101, 99, 67, 91, 83, 61, 83, 81, 87, 53,
+ 59, 55, 79, 75, 45, 31, 33, 35, 27, 15,
+ 5, 7, 8, 11, 11, 17, 3, 1, 13, 0,
+ 4, 1, 2, 12, 0, 1, 2, 18, 3, 13,
+ 21, 11, 9, 5, 38, 32, 42, 44, 34, 36,
+ 30, 34, 34, 8, 24, 30, 7, 12, 122, 124,
+ 114, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 80, 124, 124, 124, 124, 124, 124, 112,
+ 96, 94, 72, 50, 32, 6, 11, 124, 124, 124,
+ 124, 124, 124, 124, 124, 118, 112, 100, 96, 60,
+ 42, 18, 20, 20, 14, 64, 62, 62, 58, 48,
+ 52, 36, 28, 28, 12, 23, 0, 27, 47, 42,
+ 10, 0, 37, 29, 17, 27, 17, 3, 17, 29,
+ 7, 0, 6, 2, 8, 8, 8, 124, 124, 108,
+ 94, 80, 64, 48, 16, 27, 9, 78, 56, 44,
+ 24, 20, 6, 0, 3, 29, 31, 21, 9, 23,
+ 5, 4, 9, 19, 0, 8, 12, 8, 14, 18,
+ 10, 124, 124, 108, 94, 80, 64, 48, 16, 27,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 8 */
+
+ 124, 16, 23, 124, 16, 23, 99, 57, 26, 20,
+ 18, 54, 92, 110, 110, 40, 79, 5, 9, 17,
+ 18, 18, 9, 8, 32, 64, 86, 66, 14, 37,
+ 81, 59, 21, 9, 17, 18, 17, 39, 22, 32,
+ 9, 31, 55, 6, 25, 43, 69, 14, 17, 37,
+ 8, 31, 37, 57, 3, 25, 13, 43, 11, 4,
+ 44, 0, 0, 0, 29, 49, 67, 9, 32, 0,
+ 84, 18, 35, 87, 37, 47, 9, 33, 53, 43,
+ 83, 47, 71, 59, 91, 33, 45, 53, 53, 48,
+ 3, 25, 91, 17, 69, 45, 95, 14, 8, 10,
+ 3, 24, 9, 13, 39, 65, 41, 41, 43, 17,
+ 35, 69, 59, 19, 77, 53, 49, 57, 77, 81,
+ 1, 23, 11, 9, 6, 18, 5, 17, 43, 77,
+ 19, 54, 52, 83, 18, 10, 10, 8, 8, 0,
+ 15, 7, 2, 18, 10, 4, 1, 1, 1, 11,
+ 10, 1, 36, 58, 54, 36, 24, 38, 24, 32,
+ 32, 1, 36, 30, 50, 51, 32, 60, 54, 116,
+ 124, 78, 98, 124, 124, 92, 124, 118, 124, 124,
+ 100, 102, 78, 100, 124, 106, 124, 124, 124, 92,
+ 80, 124, 92, 124, 28, 124, 124, 124, 124, 124,
+ 124, 124, 120, 110, 102, 92, 76, 62, 50, 16,
+ 23, 19, 35, 42, 46, 40, 46, 20, 22, 18,
+ 48, 2, 11, 1, 29, 17, 95, 73, 0, 12,
+ 20, 0, 15, 23, 31, 37, 93, 53, 52, 26,
+ 12, 11, 11, 37, 61, 69, 89, 13, 48, 32,
+ 22, 20, 1, 3, 7, 15, 35, 7, 68, 62,
+ 54, 18, 18, 5, 19, 43, 20, 112, 100, 94,
+ 72, 50, 6, 13, 11, 25, 124, 103, 97, 73,
+ 99, 95, 65, 87, 79, 59, 81, 77, 83, 53,
+ 59, 55, 77, 73, 45, 31, 33, 33, 27, 15,
+ 5, 7, 6, 11, 11, 17, 3, 1, 11, 0,
+ 2, 1, 2, 10, 0, 1, 2, 16, 3, 13,
+ 21, 11, 7, 7, 36, 32, 38, 42, 32, 34,
+ 28, 32, 32, 8, 24, 28, 7, 8, 120, 120,
+ 112, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 120, 76, 124, 124, 124, 124, 124, 124, 106,
+ 90, 88, 68, 46, 30, 4, 13, 124, 124, 124,
+ 124, 124, 124, 124, 124, 112, 106, 94, 90, 56,
+ 38, 14, 18, 18, 10, 60, 60, 58, 54, 44,
+ 48, 32, 24, 24, 8, 25, 1, 29, 49, 38,
+ 8, 1, 37, 29, 17, 27, 15, 1, 17, 29,
+ 5, 2, 6, 2, 8, 8, 6, 124, 120, 102,
+ 88, 74, 56, 38, 6, 33, 9, 78, 56, 44,
+ 24, 22, 6, 0, 3, 29, 31, 21, 9, 23,
+ 5, 4, 9, 19, 0, 8, 12, 8, 16, 18,
+ 8, 124, 120, 102, 88, 74, 56, 38, 6, 33,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 9 */
+
+ 124, 16, 23, 124, 16, 23, 95, 55, 28, 20,
+ 18, 52, 88, 108, 112, 40, 75, 5, 7, 15,
+ 18, 16, 9, 8, 30, 62, 82, 58, 8, 39,
+ 75, 57, 19, 7, 15, 18, 17, 37, 22, 32,
+ 7, 31, 53, 6, 25, 43, 69, 14, 17, 35,
+ 8, 31, 37, 57, 3, 25, 13, 41, 11, 4,
+ 44, 0, 0, 0, 27, 49, 67, 9, 30, 0,
+ 82, 18, 35, 83, 33, 45, 5, 29, 49, 41,
+ 79, 43, 69, 55, 85, 33, 45, 53, 51, 48,
+ 3, 23, 87, 15, 67, 43, 91, 14, 8, 10,
+ 3, 24, 9, 13, 37, 63, 39, 39, 41, 15,
+ 33, 67, 55, 17, 73, 51, 39, 47, 69, 75,
+ 0, 23, 9, 7, 6, 18, 5, 15, 41, 71,
+ 19, 50, 50, 77, 18, 10, 10, 8, 8, 2,
+ 11, 5, 2, 18, 10, 4, 0, 0, 3, 9,
+ 10, 1, 34, 56, 52, 36, 22, 38, 24, 32,
+ 30, 0, 34, 30, 48, 51, 32, 60, 54, 112,
+ 124, 76, 96, 124, 124, 88, 120, 114, 124, 124,
+ 94, 98, 74, 96, 124, 102, 124, 124, 124, 88,
+ 76, 124, 88, 124, 24, 124, 124, 124, 124, 124,
+ 124, 120, 116, 106, 98, 88, 74, 60, 48, 12,
+ 23, 19, 35, 42, 44, 38, 44, 18, 20, 16,
+ 44, 0, 11, 3, 31, 17, 93, 71, 2, 12,
+ 20, 0, 13, 21, 29, 35, 87, 51, 52, 26,
+ 12, 9, 9, 35, 57, 65, 85, 11, 50, 34,
+ 24, 22, 0, 3, 5, 15, 33, 7, 70, 62,
+ 52, 18, 20, 3, 19, 41, 20, 112, 100, 92,
+ 70, 50, 6, 13, 11, 23, 124, 101, 95, 69,
+ 95, 91, 63, 83, 75, 57, 77, 73, 79, 51,
+ 57, 53, 75, 69, 43, 29, 31, 31, 25, 15,
+ 5, 7, 4, 11, 11, 17, 3, 0, 9, 2,
+ 2, 0, 2, 10, 0, 1, 2, 14, 3, 11,
+ 19, 11, 5, 7, 34, 32, 36, 40, 32, 34,
+ 28, 32, 30, 8, 24, 26, 7, 6, 118, 118,
+ 112, 122, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 114, 72, 124, 124, 124, 124, 124, 124, 100,
+ 86, 84, 66, 44, 28, 4, 15, 124, 124, 124,
+ 124, 124, 124, 124, 124, 108, 102, 90, 86, 52,
+ 34, 10, 18, 16, 8, 58, 58, 54, 50, 42,
+ 46, 28, 22, 20, 6, 25, 3, 29, 51, 34,
+ 6, 3, 37, 27, 15, 27, 13, 2, 15, 27,
+ 3, 4, 6, 4, 10, 8, 6, 124, 116, 98,
+ 82, 68, 48, 30, 1, 39, 9, 78, 56, 46,
+ 26, 24, 8, 2, 1, 29, 31, 21, 9, 21,
+ 3, 6, 9, 17, 0, 8, 12, 10, 18, 18,
+ 8, 124, 116, 98, 82, 68, 48, 30, 1, 39,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 10 */
+
+ 124, 16, 23, 124, 16, 23, 91, 51, 28, 20,
+ 16, 50, 84, 106, 112, 40, 71, 5, 7, 13,
+ 16, 16, 9, 6, 26, 60, 76, 52, 4, 43,
+ 69, 55, 19, 7, 13, 16, 17, 35, 22, 30,
+ 7, 29, 51, 6, 25, 43, 69, 14, 17, 35,
+ 8, 31, 35, 55, 3, 25, 13, 41, 9, 4,
+ 44, 0, 0, 0, 27, 49, 67, 7, 28, 0,
+ 80, 16, 35, 77, 31, 43, 1, 25, 47, 37,
+ 77, 41, 65, 53, 81, 33, 43, 51, 51, 48,
+ 3, 21, 83, 15, 65, 43, 87, 14, 6, 8,
+ 5, 24, 9, 11, 37, 61, 37, 39, 39, 15,
+ 31, 63, 53, 15, 71, 49, 29, 39, 63, 69,
+ 0, 21, 9, 7, 6, 18, 3, 15, 41, 67,
+ 19, 46, 46, 71, 18, 12, 10, 8, 8, 2,
+ 9, 3, 4, 16, 10, 4, 0, 0, 5, 9,
+ 10, 3, 34, 54, 52, 34, 22, 38, 24, 32,
+ 28, 0, 32, 28, 48, 51, 32, 58, 52, 108,
+ 124, 74, 92, 124, 124, 84, 114, 110, 124, 124,
+ 86, 94, 70, 90, 122, 96, 124, 124, 124, 82,
+ 72, 116, 84, 124, 22, 124, 124, 124, 124, 124,
+ 120, 116, 112, 102, 92, 84, 70, 56, 44, 8,
+ 23, 19, 37, 40, 42, 36, 40, 16, 18, 14,
+ 42, 1, 13, 5, 31, 19, 91, 67, 4, 14,
+ 20, 2, 13, 21, 29, 33, 83, 47, 52, 28,
+ 14, 9, 7, 33, 55, 63, 81, 11, 52, 36,
+ 26, 22, 2, 1, 5, 13, 33, 7, 70, 62,
+ 52, 16, 20, 3, 19, 41, 20, 110, 98, 90,
+ 68, 50, 6, 11, 11, 23, 124, 97, 91, 67,
+ 91, 89, 61, 81, 73, 55, 73, 71, 75, 49,
+ 55, 53, 73, 65, 43, 29, 31, 31, 25, 15,
+ 7, 7, 4, 11, 11, 19, 1, 0, 7, 2,
+ 2, 0, 2, 10, 0, 1, 2, 14, 3, 11,
+ 19, 11, 5, 9, 32, 32, 34, 40, 30, 32,
+ 28, 30, 28, 8, 24, 24, 7, 4, 116, 116,
+ 110, 118, 120, 124, 124, 124, 124, 124, 124, 124,
+ 124, 110, 68, 124, 124, 124, 124, 124, 124, 96,
+ 82, 78, 62, 40, 24, 2, 15, 124, 124, 124,
+ 124, 124, 124, 124, 124, 104, 96, 86, 80, 48,
+ 30, 8, 16, 16, 4, 56, 54, 52, 46, 38,
+ 42, 26, 20, 18, 4, 27, 7, 31, 51, 32,
+ 2, 7, 35, 27, 13, 25, 11, 4, 15, 25,
+ 3, 4, 6, 4, 12, 8, 6, 124, 112, 92,
+ 76, 60, 40, 22, 9, 45, 7, 78, 56, 46,
+ 26, 24, 8, 2, 1, 27, 31, 21, 7, 21,
+ 1, 8, 9, 17, 2, 10, 12, 10, 18, 18,
+ 8, 124, 112, 92, 76, 60, 40, 22, 9, 45,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 11 */
+
+ 124, 16, 25, 124, 16, 25, 87, 49, 30, 20,
+ 16, 46, 80, 104, 112, 40, 69, 5, 5, 13,
+ 16, 14, 7, 6, 24, 56, 72, 44, 1, 45,
+ 65, 53, 19, 5, 13, 16, 19, 35, 22, 30,
+ 7, 29, 51, 6, 25, 41, 67, 14, 15, 33,
+ 8, 29, 35, 55, 3, 25, 13, 41, 9, 4,
+ 44, 0, 0, 0, 25, 49, 67, 7, 28, 1,
+ 80, 16, 35, 73, 29, 41, 0, 19, 43, 35,
+ 73, 37, 63, 51, 77, 33, 43, 51, 51, 48,
+ 3, 19, 77, 15, 63, 41, 83, 12, 6, 8,
+ 5, 26, 9, 11, 35, 61, 37, 37, 37, 13,
+ 29, 61, 49, 15, 69, 47, 19, 29, 57, 63,
+ 0, 21, 9, 5, 6, 18, 3, 13, 39, 63,
+ 17, 42, 42, 65, 18, 12, 10, 8, 10, 4,
+ 5, 1, 4, 16, 10, 4, 2, 2, 7, 9,
+ 10, 3, 32, 52, 50, 34, 20, 38, 24, 32,
+ 24, 0, 30, 26, 46, 51, 32, 58, 52, 104,
+ 124, 72, 88, 122, 124, 80, 110, 106, 124, 124,
+ 80, 90, 68, 86, 114, 92, 124, 124, 124, 76,
+ 68, 110, 80, 124, 18, 124, 124, 124, 124, 124,
+ 116, 110, 108, 98, 88, 78, 66, 52, 42, 6,
+ 23, 19, 37, 38, 40, 34, 38, 14, 16, 12,
+ 38, 3, 13, 7, 33, 19, 91, 65, 6, 14,
+ 20, 2, 11, 19, 27, 33, 79, 45, 54, 28,
+ 14, 7, 5, 31, 51, 59, 77, 9, 52, 36,
+ 26, 24, 4, 1, 5, 13, 31, 7, 72, 62,
+ 50, 16, 20, 3, 19, 39, 18, 110, 98, 88,
+ 66, 48, 6, 11, 11, 23, 124, 95, 89, 65,
+ 89, 85, 59, 77, 69, 53, 71, 67, 71, 49,
+ 53, 51, 71, 61, 41, 29, 31, 29, 23, 15,
+ 7, 7, 2, 11, 11, 19, 1, 2, 5, 2,
+ 2, 2, 4, 10, 2, 0, 2, 12, 3, 11,
+ 19, 11, 3, 11, 30, 32, 32, 38, 28, 32,
+ 28, 28, 28, 8, 22, 20, 9, 2, 112, 114,
+ 108, 116, 116, 124, 124, 124, 124, 124, 124, 124,
+ 124, 104, 64, 124, 124, 124, 124, 124, 124, 90,
+ 78, 74, 58, 36, 22, 0, 17, 124, 124, 124,
+ 124, 124, 124, 120, 118, 98, 92, 80, 74, 44,
+ 28, 4, 14, 14, 2, 52, 52, 48, 42, 36,
+ 38, 22, 18, 14, 2, 29, 9, 33, 53, 28,
+ 0, 9, 35, 25, 13, 25, 9, 6, 15, 25,
+ 1, 6, 6, 6, 14, 8, 6, 124, 108, 88,
+ 70, 54, 32, 14, 17, 51, 7, 78, 56, 46,
+ 26, 26, 10, 2, 1, 27, 29, 19, 7, 19,
+ 0, 10, 7, 15, 2, 10, 12, 10, 20, 18,
+ 8, 124, 108, 88, 70, 54, 32, 14, 17, 51,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 12 */
+
+ 124, 16, 25, 124, 16, 25, 85, 45, 30, 20,
+ 14, 44, 76, 102, 112, 38, 65, 5, 5, 11,
+ 16, 14, 7, 4, 20, 54, 66, 38, 5, 49,
+ 59, 51, 19, 5, 11, 16, 19, 33, 22, 28,
+ 7, 29, 49, 6, 25, 41, 67, 14, 15, 33,
+ 8, 29, 33, 55, 3, 25, 13, 41, 9, 4,
+ 44, 0, 0, 0, 25, 51, 67, 5, 26, 1,
+ 78, 14, 35, 67, 27, 39, 4, 15, 41, 31,
+ 69, 35, 61, 49, 73, 33, 43, 49, 51, 48,
+ 3, 17, 73, 15, 63, 41, 79, 12, 4, 6,
+ 7, 26, 9, 9, 35, 59, 35, 37, 35, 13,
+ 29, 57, 47, 13, 67, 45, 9, 21, 51, 57,
+ 0, 21, 9, 5, 6, 18, 1, 13, 39, 59,
+ 17, 38, 38, 61, 18, 12, 10, 8, 10, 4,
+ 3, 0, 4, 16, 10, 4, 2, 4, 9, 9,
+ 10, 5, 32, 50, 50, 34, 20, 38, 24, 32,
+ 22, 0, 28, 24, 46, 51, 30, 56, 50, 100,
+ 124, 70, 84, 118, 120, 76, 104, 102, 124, 124,
+ 72, 86, 64, 82, 108, 86, 116, 124, 124, 70,
+ 64, 102, 76, 124, 14, 124, 124, 124, 124, 124,
+ 112, 106, 104, 94, 84, 74, 62, 48, 38, 2,
+ 23, 19, 39, 36, 38, 32, 36, 12, 12, 10,
+ 34, 5, 15, 9, 35, 21, 89, 61, 8, 16,
+ 20, 2, 9, 19, 27, 31, 75, 43, 54, 28,
+ 16, 5, 3, 29, 49, 57, 75, 7, 54, 38,
+ 28, 24, 6, 0, 5, 13, 31, 7, 72, 62,
+ 48, 16, 20, 3, 19, 39, 18, 108, 96, 86,
+ 64, 48, 6, 11, 11, 23, 124, 93, 85, 63,
+ 85, 83, 57, 73, 65, 51, 67, 63, 67, 47,
+ 51, 51, 69, 59, 41, 29, 31, 29, 23, 15,
+ 7, 7, 2, 11, 11, 21, 0, 2, 3, 2,
+ 2, 2, 4, 10, 2, 0, 2, 12, 3, 11,
+ 19, 11, 3, 13, 28, 32, 30, 36, 26, 30,
+ 28, 26, 26, 8, 22, 18, 9, 0, 110, 112,
+ 106, 112, 112, 124, 122, 124, 124, 124, 124, 124,
+ 122, 100, 60, 124, 124, 124, 124, 124, 118, 86,
+ 72, 68, 54, 32, 18, 1, 19, 124, 124, 124,
+ 124, 124, 124, 114, 112, 94, 86, 76, 68, 40,
+ 24, 2, 12, 12, 1, 50, 48, 44, 38, 32,
+ 34, 18, 16, 12, 0, 31, 11, 35, 55, 26,
+ 1, 11, 35, 25, 11, 23, 7, 8, 15, 23,
+ 1, 6, 6, 6, 16, 8, 6, 122, 104, 82,
+ 64, 48, 24, 4, 25, 57, 7, 78, 56, 46,
+ 26, 26, 10, 2, 1, 25, 29, 19, 5, 19,
+ 0, 12, 7, 15, 2, 10, 12, 10, 20, 18,
+ 8, 122, 104, 82, 64, 48, 24, 4, 25, 57,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 13 */
+
+ 124, 16, 25, 124, 16, 25, 81, 43, 30, 20,
+ 14, 42, 72, 100, 112, 38, 61, 5, 5, 9,
+ 14, 12, 7, 2, 16, 52, 62, 30, 11, 53,
+ 53, 49, 17, 5, 9, 14, 19, 31, 22, 28,
+ 7, 27, 47, 6, 25, 41, 67, 14, 15, 33,
+ 8, 29, 33, 53, 3, 25, 13, 41, 7, 4,
+ 44, 0, 0, 0, 23, 51, 67, 5, 24, 1,
+ 76, 14, 35, 63, 25, 37, 8, 11, 37, 29,
+ 67, 31, 57, 47, 67, 33, 41, 49, 49, 48,
+ 3, 15, 69, 15, 61, 39, 75, 12, 4, 6,
+ 7, 26, 9, 9, 33, 57, 33, 35, 33, 11,
+ 27, 55, 43, 11, 63, 43, 0, 13, 45, 51,
+ 2, 19, 7, 3, 6, 18, 1, 13, 37, 55,
+ 17, 34, 34, 55, 18, 14, 10, 8, 10, 6,
+ 0, 2, 6, 14, 10, 4, 4, 4, 11, 7,
+ 10, 5, 30, 48, 48, 32, 18, 38, 24, 32,
+ 20, 2, 26, 24, 44, 51, 30, 54, 48, 96,
+ 124, 68, 82, 114, 116, 72, 100, 98, 124, 124,
+ 66, 82, 60, 76, 102, 82, 110, 124, 124, 66,
+ 60, 96, 72, 124, 12, 124, 124, 124, 122, 120,
+ 108, 102, 100, 90, 78, 70, 58, 46, 34, 1,
+ 23, 19, 39, 34, 36, 30, 32, 10, 10, 8,
+ 32, 7, 15, 11, 35, 23, 87, 59, 10, 16,
+ 20, 4, 9, 17, 25, 29, 71, 39, 54, 30,
+ 16, 5, 1, 27, 45, 53, 71, 7, 56, 40,
+ 30, 26, 8, 0, 3, 11, 29, 7, 74, 62,
+ 48, 14, 20, 3, 19, 37, 18, 108, 94, 84,
+ 62, 48, 6, 9, 11, 21, 124, 89, 83, 59,
+ 81, 79, 55, 71, 63, 49, 63, 61, 63, 45,
+ 49, 49, 67, 55, 41, 29, 29, 27, 21, 15,
+ 9, 7, 0, 11, 11, 21, 0, 2, 1, 2,
+ 2, 2, 4, 10, 2, 0, 2, 10, 3, 11,
+ 19, 11, 1, 15, 26, 32, 28, 36, 26, 28,
+ 28, 26, 24, 8, 22, 16, 9, 1, 108, 110,
+ 104, 108, 108, 124, 118, 122, 124, 118, 124, 124,
+ 116, 94, 56, 124, 124, 124, 124, 118, 112, 80,
+ 68, 64, 50, 30, 16, 1, 19, 124, 124, 124,
+ 124, 118, 118, 110, 106, 90, 82, 72, 62, 36,
+ 20, 1, 12, 12, 5, 48, 46, 42, 34, 28,
+ 30, 16, 14, 8, 1, 31, 15, 37, 55, 22,
+ 5, 15, 33, 25, 9, 23, 5, 10, 13, 21,
+ 0, 8, 6, 8, 18, 8, 6, 120, 100, 76,
+ 58, 40, 16, 3, 33, 63, 5, 78, 56, 46,
+ 26, 28, 10, 4, 0, 25, 29, 19, 5, 19,
+ 2, 14, 7, 15, 4, 12, 12, 12, 22, 18,
+ 8, 120, 100, 76, 58, 40, 16, 3, 33, 63,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 14 */
+
+ 122, 16, 25, 122, 16, 25, 77, 39, 32, 20,
+ 12, 38, 68, 98, 112, 38, 57, 5, 3, 7,
+ 14, 12, 7, 2, 14, 50, 56, 24, 15, 55,
+ 47, 47, 17, 3, 7, 14, 21, 29, 22, 26,
+ 7, 27, 47, 6, 25, 41, 67, 14, 15, 31,
+ 8, 29, 31, 53, 3, 25, 13, 41, 7, 4,
+ 44, 0, 0, 0, 23, 51, 67, 3, 22, 1,
+ 74, 12, 35, 57, 23, 35, 10, 7, 35, 25,
+ 63, 29, 55, 45, 63, 33, 41, 47, 49, 48,
+ 3, 13, 65, 15, 59, 39, 71, 12, 2, 4,
+ 9, 26, 9, 7, 33, 55, 31, 35, 31, 11,
+ 25, 51, 41, 11, 61, 41, 10, 3, 39, 45,
+ 2, 19, 7, 3, 6, 18, 0, 11, 37, 51,
+ 17, 30, 30, 49, 18, 14, 10, 8, 10, 6,
+ 2, 4, 6, 14, 10, 4, 4, 6, 13, 7,
+ 10, 7, 30, 46, 48, 32, 18, 38, 24, 32,
+ 18, 2, 24, 22, 44, 51, 30, 54, 48, 92,
+ 122, 66, 78, 110, 110, 68, 94, 94, 124, 124,
+ 58, 78, 56, 72, 96, 76, 104, 122, 124, 60,
+ 56, 88, 68, 124, 8, 120, 124, 120, 116, 114,
+ 104, 98, 96, 86, 74, 66, 54, 42, 32, 5,
+ 23, 19, 41, 32, 34, 28, 30, 8, 8, 6,
+ 28, 9, 17, 13, 37, 23, 87, 55, 12, 18,
+ 20, 4, 7, 17, 25, 29, 67, 37, 54, 30,
+ 18, 3, 0, 25, 43, 51, 67, 5, 56, 40,
+ 30, 26, 10, 2, 3, 11, 29, 7, 74, 62,
+ 46, 14, 20, 3, 19, 37, 18, 106, 94, 82,
+ 60, 48, 6, 9, 11, 21, 124, 87, 79, 57,
+ 79, 77, 53, 67, 59, 47, 61, 57, 59, 45,
+ 47, 49, 65, 51, 39, 29, 29, 27, 21, 15,
+ 9, 7, 0, 11, 11, 23, 2, 4, 0, 2,
+ 2, 4, 4, 10, 2, 0, 2, 10, 3, 11,
+ 19, 11, 1, 17, 24, 32, 26, 34, 24, 28,
+ 28, 24, 22, 8, 22, 14, 9, 3, 106, 108,
+ 102, 106, 104, 120, 114, 118, 118, 114, 124, 120,
+ 110, 90, 52, 124, 124, 124, 124, 110, 106, 76,
+ 64, 58, 46, 26, 12, 3, 21, 124, 124, 124,
+ 120, 112, 114, 104, 100, 84, 76, 66, 56, 32,
+ 16, 3, 10, 10, 7, 44, 42, 38, 30, 26,
+ 26, 12, 12, 6, 3, 33, 17, 39, 57, 20,
+ 7, 17, 33, 23, 9, 21, 3, 12, 13, 21,
+ 0, 8, 6, 8, 20, 8, 6, 118, 96, 72,
+ 52, 34, 8, 11, 41, 69, 5, 78, 56, 46,
+ 26, 28, 12, 4, 0, 23, 29, 19, 3, 17,
+ 4, 16, 7, 13, 4, 12, 12, 12, 22, 18,
+ 8, 118, 96, 72, 52, 34, 8, 11, 41, 69,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 15 */
+
+ 120, 16, 25, 120, 16, 25, 73, 37, 32, 20,
+ 12, 36, 64, 96, 112, 38, 53, 5, 3, 5,
+ 14, 10, 7, 0, 10, 48, 52, 16, 21, 59,
+ 41, 45, 17, 3, 5, 14, 21, 27, 22, 26,
+ 7, 27, 45, 6, 25, 41, 67, 14, 15, 31,
+ 8, 29, 31, 53, 3, 25, 13, 41, 7, 4,
+ 44, 0, 0, 0, 21, 51, 67, 3, 20, 1,
+ 72, 12, 35, 53, 21, 33, 14, 3, 31, 23,
+ 59, 25, 53, 43, 59, 33, 41, 47, 49, 48,
+ 3, 11, 61, 15, 57, 37, 67, 12, 2, 4,
+ 9, 26, 9, 7, 31, 53, 29, 33, 29, 9,
+ 23, 49, 37, 9, 59, 39, 20, 4, 33, 39,
+ 2, 19, 7, 1, 6, 18, 0, 11, 35, 47,
+ 17, 26, 26, 43, 18, 14, 10, 8, 10, 8,
+ 6, 6, 6, 14, 10, 4, 6, 8, 15, 7,
+ 10, 7, 28, 44, 46, 32, 16, 38, 24, 32,
+ 16, 2, 22, 20, 42, 51, 30, 52, 46, 88,
+ 116, 64, 74, 106, 106, 64, 90, 90, 124, 124,
+ 52, 74, 52, 68, 90, 72, 98, 114, 124, 54,
+ 52, 82, 64, 124, 4, 116, 124, 116, 112, 110,
+ 100, 94, 92, 82, 70, 62, 50, 38, 28, 9,
+ 23, 19, 41, 30, 32, 26, 28, 6, 6, 4,
+ 24, 11, 17, 15, 39, 25, 85, 53, 14, 18,
+ 20, 4, 5, 15, 23, 27, 63, 35, 54, 30,
+ 18, 1, 2, 23, 39, 47, 63, 3, 58, 42,
+ 32, 28, 12, 2, 3, 11, 27, 7, 76, 62,
+ 44, 14, 20, 3, 19, 35, 18, 106, 92, 80,
+ 58, 48, 6, 9, 11, 21, 124, 85, 77, 55,
+ 75, 73, 51, 63, 55, 45, 57, 53, 55, 43,
+ 45, 47, 63, 47, 39, 29, 29, 25, 19, 15,
+ 9, 7, 1, 11, 11, 23, 2, 4, 2, 2,
+ 2, 4, 4, 10, 2, 0, 2, 8, 3, 11,
+ 19, 11, 0, 19, 22, 32, 24, 32, 22, 26,
+ 28, 22, 20, 8, 22, 12, 9, 5, 104, 106,
+ 100, 102, 100, 116, 110, 114, 114, 108, 122, 114,
+ 104, 84, 48, 124, 124, 124, 124, 104, 100, 70,
+ 60, 54, 42, 22, 10, 5, 23, 124, 124, 124,
+ 116, 106, 108, 100, 94, 80, 72, 62, 50, 28,
+ 12, 7, 8, 8, 11, 42, 40, 34, 26, 22,
+ 22, 8, 10, 2, 5, 35, 19, 41, 59, 16,
+ 9, 19, 33, 23, 7, 21, 1, 14, 13, 19,
+ 2, 10, 6, 10, 22, 8, 6, 116, 92, 66,
+ 46, 28, 0, 19, 49, 75, 5, 78, 56, 46,
+ 26, 30, 12, 4, 0, 23, 29, 19, 3, 17,
+ 6, 18, 7, 13, 4, 12, 12, 12, 24, 18,
+ 8, 116, 92, 66, 46, 28, 0, 19, 49, 75,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 16 */
+
+ 116, 14, 27, 116, 14, 27, 71, 35, 32, 20,
+ 10, 32, 58, 94, 112, 36, 51, 7, 3, 5,
+ 12, 8, 7, 1, 6, 44, 46, 8, 27, 63,
+ 37, 45, 17, 3, 5, 12, 23, 27, 22, 24,
+ 7, 27, 45, 4, 27, 41, 67, 12, 15, 31,
+ 8, 29, 31, 53, 3, 25, 15, 41, 7, 4,
+ 44, 0, 0, 0, 21, 53, 67, 3, 18, 3,
+ 70, 10, 37, 49, 19, 31, 16, 0, 29, 21,
+ 57, 23, 51, 41, 55, 33, 41, 47, 49, 48,
+ 3, 11, 57, 15, 57, 37, 65, 10, 0, 2,
+ 11, 26, 9, 7, 31, 53, 29, 33, 29, 9,
+ 23, 47, 35, 9, 57, 37, 28, 12, 27, 35,
+ 2, 19, 7, 1, 4, 18, 0, 11, 35, 43,
+ 17, 22, 22, 39, 18, 14, 10, 8, 10, 8,
+ 8, 6, 6, 12, 8, 4, 6, 8, 19, 7,
+ 10, 9, 26, 40, 44, 30, 14, 38, 24, 30,
+ 12, 2, 20, 18, 40, 51, 28, 50, 44, 82,
+ 108, 60, 70, 100, 100, 58, 84, 86, 110, 124,
+ 44, 68, 48, 62, 82, 66, 90, 104, 118, 48,
+ 48, 74, 60, 124, 0, 110, 118, 110, 106, 104,
+ 94, 88, 86, 78, 64, 56, 46, 34, 24, 13,
+ 23, 21, 43, 28, 28, 22, 24, 2, 2, 0,
+ 20, 13, 19, 17, 41, 27, 85, 51, 14, 18,
+ 20, 4, 5, 15, 23, 27, 59, 33, 54, 30,
+ 18, 1, 2, 21, 37, 45, 61, 3, 58, 42,
+ 32, 28, 14, 2, 3, 11, 27, 9, 76, 60,
+ 42, 12, 20, 3, 19, 35, 16, 104, 90, 76,
+ 56, 46, 6, 9, 11, 21, 124, 83, 75, 53,
+ 73, 71, 49, 61, 53, 43, 55, 51, 51, 43,
+ 45, 47, 61, 45, 39, 29, 29, 25, 19, 15,
+ 11, 9, 3, 11, 13, 25, 2, 4, 4, 2,
+ 0, 4, 4, 8, 2, 0, 2, 6, 3, 11,
+ 19, 11, 0, 21, 20, 32, 20, 30, 20, 24,
+ 26, 20, 18, 8, 20, 8, 11, 9, 100, 102,
+ 98, 98, 96, 110, 104, 108, 108, 102, 116, 108,
+ 96, 78, 44, 124, 124, 122, 120, 96, 92, 64,
+ 54, 48, 38, 18, 6, 7, 25, 118, 120, 120,
+ 110, 100, 102, 94, 86, 74, 66, 56, 44, 24,
+ 8, 11, 6, 6, 15, 38, 36, 30, 20, 18,
+ 18, 4, 6, 1, 9, 37, 23, 43, 61, 12,
+ 13, 23, 33, 23, 7, 21, 0, 16, 13, 19,
+ 2, 10, 6, 10, 22, 8, 4, 112, 88, 60,
+ 38, 20, 7, 29, 59, 81, 5, 78, 56, 46,
+ 26, 30, 12, 4, 0, 23, 29, 19, 3, 17,
+ 6, 18, 7, 13, 4, 12, 12, 12, 24, 16,
+ 6, 112, 88, 60, 38, 20, 7, 29, 59, 81,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 17 */
+
+ 114, 14, 27, 114, 14, 27, 67, 31, 34, 22,
+ 10, 30, 54, 92, 114, 36, 47, 7, 1, 3,
+ 12, 8, 5, 1, 4, 42, 42, 2, 31, 65,
+ 31, 43, 15, 1, 3, 12, 23, 25, 22, 24,
+ 5, 25, 43, 4, 27, 39, 65, 12, 13, 29,
+ 8, 27, 29, 51, 1, 23, 15, 39, 5, 4,
+ 44, 0, 0, 0, 19, 53, 67, 1, 18, 3,
+ 70, 10, 37, 43, 15, 27, 20, 6, 25, 17,
+ 53, 19, 47, 37, 49, 33, 39, 45, 47, 48,
+ 3, 9, 51, 13, 55, 35, 61, 10, 0, 2,
+ 11, 28, 9, 5, 29, 51, 27, 31, 27, 7,
+ 21, 43, 31, 7, 53, 33, 38, 22, 19, 29,
+ 4, 17, 5, 0, 4, 20, 2, 9, 33, 37,
+ 15, 18, 20, 33, 18, 16, 10, 10, 12, 10,
+ 12, 8, 8, 12, 8, 4, 8, 10, 21, 5,
+ 10, 9, 26, 38, 44, 30, 14, 38, 24, 30,
+ 10, 4, 20, 18, 40, 51, 28, 50, 44, 78,
+ 102, 58, 68, 96, 96, 54, 80, 82, 98, 124,
+ 38, 64, 46, 58, 76, 62, 84, 96, 110, 44,
+ 44, 68, 56, 124, 1, 106, 114, 106, 102, 100,
+ 90, 84, 82, 74, 60, 52, 44, 32, 22, 15,
+ 23, 21, 43, 28, 26, 20, 22, 0, 0, 1,
+ 18, 15, 19, 19, 41, 27, 83, 47, 16, 20,
+ 20, 6, 3, 13, 21, 25, 53, 29, 56, 32,
+ 20, 0, 4, 17, 33, 41, 57, 1, 60, 44,
+ 34, 30, 16, 4, 1, 9, 25, 9, 78, 60,
+ 42, 12, 22, 1, 19, 33, 16, 104, 90, 74,
+ 54, 46, 8, 7, 9, 19, 124, 79, 71, 49,
+ 69, 67, 45, 57, 49, 39, 51, 47, 45, 41,
+ 43, 45, 57, 41, 37, 27, 27, 23, 17, 13,
+ 11, 9, 3, 11, 13, 25, 4, 6, 6, 4,
+ 0, 6, 6, 8, 4, 2, 2, 6, 1, 9,
+ 17, 9, 2, 21, 18, 32, 18, 30, 20, 24,
+ 26, 20, 18, 8, 20, 6, 11, 11, 98, 100,
+ 98, 96, 94, 106, 100, 104, 104, 98, 112, 104,
+ 90, 74, 40, 122, 120, 114, 112, 90, 86, 60,
+ 50, 44, 36, 16, 4, 7, 25, 114, 116, 116,
+ 106, 96, 98, 90, 80, 70, 62, 52, 40, 22,
+ 6, 13, 6, 6, 17, 36, 34, 28, 16, 16,
+ 16, 2, 4, 3, 11, 37, 25, 43, 61, 10,
+ 15, 25, 31, 21, 5, 19, 4, 20, 11, 17,
+ 4, 12, 8, 12, 24, 8, 4, 110, 84, 56,
+ 32, 14, 15, 37, 67, 85, 3, 78, 58, 48,
+ 28, 32, 14, 6, 2, 21, 27, 17, 1, 15,
+ 8, 20, 5, 11, 6, 14, 12, 14, 26, 16,
+ 6, 110, 84, 56, 32, 14, 15, 37, 67, 85,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 18 */
+
+ 112, 14, 27, 112, 14, 27, 63, 29, 34, 22,
+ 10, 28, 50, 90, 114, 36, 43, 7, 1, 1,
+ 12, 6, 5, 3, 0, 40, 36, 5, 37, 69,
+ 25, 41, 15, 1, 1, 12, 23, 23, 22, 22,
+ 5, 25, 41, 4, 27, 39, 65, 12, 13, 29,
+ 8, 27, 29, 51, 1, 23, 15, 39, 5, 4,
+ 44, 0, 0, 0, 17, 53, 67, 1, 16, 3,
+ 68, 10, 37, 39, 13, 25, 24, 10, 23, 15,
+ 49, 17, 45, 35, 45, 33, 39, 45, 47, 48,
+ 3, 7, 47, 13, 53, 33, 57, 10, 0, 2,
+ 13, 28, 9, 5, 27, 49, 25, 29, 25, 7,
+ 19, 41, 29, 5, 51, 31, 48, 30, 13, 23,
+ 4, 17, 5, 0, 4, 20, 2, 9, 33, 33,
+ 15, 14, 16, 27, 18, 16, 10, 10, 12, 10,
+ 16, 10, 8, 12, 8, 4, 10, 12, 23, 5,
+ 10, 9, 24, 36, 42, 30, 12, 38, 24, 30,
+ 8, 4, 18, 16, 38, 51, 28, 48, 42, 74,
+ 96, 56, 64, 92, 92, 50, 76, 78, 86, 124,
+ 30, 60, 42, 54, 70, 58, 78, 88, 102, 38,
+ 40, 62, 52, 124, 5, 102, 110, 102, 98, 96,
+ 86, 80, 78, 70, 56, 48, 40, 28, 18, 19,
+ 23, 21, 45, 26, 24, 18, 20, 1, 1, 3,
+ 14, 17, 19, 21, 43, 29, 81, 45, 18, 20,
+ 20, 6, 1, 11, 19, 23, 49, 27, 56, 32,
+ 20, 2, 6, 15, 29, 37, 53, 0, 62, 46,
+ 36, 32, 18, 4, 1, 9, 23, 9, 80, 60,
+ 40, 12, 22, 1, 19, 33, 16, 104, 88, 72,
+ 52, 46, 8, 7, 9, 19, 124, 77, 69, 47,
+ 65, 63, 43, 53, 45, 37, 47, 43, 41, 39,
+ 41, 45, 55, 37, 37, 27, 27, 21, 17, 13,
+ 11, 9, 5, 11, 13, 25, 4, 6, 8, 4,
+ 0, 6, 6, 8, 4, 2, 2, 4, 1, 9,
+ 17, 9, 4, 23, 16, 32, 16, 28, 18, 22,
+ 26, 18, 16, 8, 20, 4, 11, 13, 96, 98,
+ 96, 92, 90, 102, 96, 100, 100, 92, 106, 98,
+ 84, 68, 36, 114, 112, 106, 102, 84, 80, 54,
+ 46, 38, 32, 12, 2, 9, 27, 110, 112, 110,
+ 102, 90, 92, 84, 74, 66, 56, 48, 34, 18,
+ 2, 17, 4, 4, 21, 34, 32, 24, 12, 12,
+ 12, 1, 2, 7, 13, 39, 27, 45, 63, 6,
+ 17, 27, 31, 21, 3, 19, 6, 22, 11, 15,
+ 6, 14, 8, 12, 26, 8, 4, 108, 80, 50,
+ 26, 8, 23, 45, 75, 91, 3, 78, 58, 48,
+ 28, 34, 14, 6, 2, 21, 27, 17, 1, 15,
+ 10, 22, 5, 11, 6, 14, 12, 14, 28, 16,
+ 6, 108, 80, 50, 26, 8, 23, 45, 75, 91,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 19 */
+
+ 110, 14, 27, 110, 14, 27, 59, 25, 36, 22,
+ 8, 24, 46, 88, 114, 36, 39, 7, 0, 0,
+ 12, 6, 5, 3, 1, 38, 32, 11, 41, 71,
+ 19, 39, 15, 0, 0, 12, 25, 21, 22, 22,
+ 5, 25, 41, 4, 27, 39, 65, 12, 13, 27,
+ 8, 27, 27, 51, 1, 23, 15, 39, 5, 4,
+ 44, 0, 0, 0, 17, 53, 67, 0, 14, 3,
+ 66, 8, 37, 33, 11, 23, 26, 14, 19, 11,
+ 45, 13, 43, 33, 41, 33, 39, 43, 47, 48,
+ 3, 5, 43, 13, 51, 33, 53, 10, 1, 0,
+ 13, 28, 9, 3, 27, 47, 23, 29, 23, 5,
+ 17, 37, 25, 5, 49, 29, 58, 40, 7, 17,
+ 4, 17, 5, 2, 4, 20, 4, 7, 31, 29,
+ 15, 10, 12, 21, 18, 16, 10, 10, 12, 12,
+ 18, 12, 8, 12, 8, 4, 10, 14, 25, 5,
+ 10, 11, 24, 34, 42, 30, 12, 38, 24, 30,
+ 6, 4, 16, 14, 38, 51, 28, 48, 42, 70,
+ 90, 54, 60, 88, 86, 46, 70, 74, 72, 124,
+ 24, 56, 38, 50, 64, 52, 72, 80, 94, 32,
+ 36, 54, 48, 124, 9, 98, 106, 98, 92, 90,
+ 82, 76, 74, 66, 52, 44, 36, 24, 16, 23,
+ 23, 21, 45, 24, 22, 16, 18, 3, 3, 5,
+ 10, 19, 21, 23, 45, 29, 81, 41, 20, 22,
+ 20, 6, 0, 11, 19, 23, 45, 25, 56, 32,
+ 22, 4, 8, 13, 27, 35, 49, 2, 62, 46,
+ 36, 32, 20, 6, 1, 9, 23, 9, 80, 60,
+ 38, 12, 22, 1, 19, 31, 16, 102, 88, 70,
+ 50, 46, 8, 7, 9, 19, 124, 75, 65, 45,
+ 63, 61, 41, 49, 41, 35, 45, 39, 37, 39,
+ 39, 43, 53, 33, 35, 27, 27, 21, 15, 13,
+ 11, 9, 5, 11, 13, 27, 6, 8, 10, 4,
+ 0, 8, 6, 8, 4, 2, 2, 4, 1, 9,
+ 17, 9, 4, 25, 14, 32, 14, 26, 16, 22,
+ 26, 16, 14, 8, 20, 2, 11, 15, 94, 96,
+ 94, 90, 86, 98, 92, 96, 94, 88, 100, 92,
+ 78, 64, 32, 106, 104, 98, 92, 76, 74, 50,
+ 42, 34, 28, 8, 1, 11, 29, 106, 106, 106,
+ 96, 84, 88, 80, 68, 60, 52, 42, 28, 14,
+ 1, 19, 2, 2, 23, 30, 28, 20, 8, 10,
+ 8, 5, 0, 9, 15, 41, 29, 47, 65, 4,
+ 19, 29, 31, 19, 3, 17, 8, 24, 11, 15,
+ 6, 14, 8, 14, 28, 8, 4, 106, 76, 46,
+ 20, 2, 31, 53, 83, 97, 3, 78, 58, 48,
+ 28, 34, 16, 6, 2, 19, 27, 17, 0, 13,
+ 12, 24, 5, 9, 6, 14, 12, 14, 28, 16,
+ 6, 106, 76, 46, 20, 2, 31, 53, 83, 97,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 20 */
+
+ 106, 14, 27, 106, 14, 27, 57, 23, 36, 22,
+ 8, 22, 42, 86, 114, 34, 35, 7, 0, 2,
+ 10, 4, 5, 5, 5, 36, 26, 19, 47, 75,
+ 13, 37, 15, 0, 2, 10, 25, 19, 22, 20,
+ 5, 23, 39, 4, 27, 39, 65, 12, 13, 27,
+ 8, 27, 27, 49, 1, 23, 15, 39, 3, 4,
+ 44, 0, 0, 0, 15, 55, 67, 0, 12, 3,
+ 64, 8, 37, 29, 9, 21, 30, 18, 17, 9,
+ 43, 11, 39, 31, 37, 33, 37, 43, 47, 48,
+ 3, 3, 39, 13, 51, 31, 49, 10, 1, 0,
+ 15, 28, 9, 3, 25, 45, 21, 27, 21, 5,
+ 17, 35, 23, 3, 47, 27, 68, 48, 1, 11,
+ 4, 15, 5, 2, 4, 20, 4, 7, 31, 25,
+ 15, 6, 8, 17, 18, 18, 10, 10, 12, 12,
+ 22, 14, 10, 10, 8, 4, 12, 14, 27, 5,
+ 10, 11, 22, 32, 40, 28, 10, 38, 24, 30,
+ 4, 4, 14, 12, 36, 51, 26, 46, 40, 66,
+ 82, 52, 56, 84, 82, 42, 66, 70, 60, 124,
+ 16, 52, 34, 44, 58, 48, 64, 70, 86, 26,
+ 32, 48, 44, 124, 11, 94, 102, 92, 88, 86,
+ 78, 72, 70, 62, 46, 40, 32, 20, 12, 27,
+ 23, 21, 47, 22, 20, 14, 14, 5, 7, 7,
+ 8, 21, 21, 25, 45, 31, 79, 39, 22, 22,
+ 20, 8, 0, 9, 17, 21, 41, 21, 56, 34,
+ 22, 4, 10, 11, 23, 31, 47, 2, 64, 48,
+ 38, 34, 22, 6, 1, 7, 21, 9, 82, 60,
+ 38, 10, 22, 1, 19, 31, 16, 102, 86, 68,
+ 48, 46, 8, 5, 9, 19, 124, 71, 63, 43,
+ 59, 57, 39, 47, 39, 33, 41, 37, 33, 37,
+ 37, 43, 51, 31, 35, 27, 27, 19, 15, 13,
+ 13, 9, 7, 11, 13, 27, 6, 8, 12, 4,
+ 0, 8, 6, 8, 4, 2, 2, 2, 1, 9,
+ 17, 9, 6, 27, 12, 32, 12, 26, 14, 20,
+ 26, 14, 12, 8, 20, 0, 11, 17, 92, 94,
+ 92, 86, 82, 94, 88, 90, 90, 82, 94, 86,
+ 72, 58, 28, 96, 96, 90, 82, 70, 66, 44,
+ 36, 28, 24, 4, 3, 13, 29, 100, 102, 100,
+ 92, 78, 82, 74, 62, 56, 46, 38, 22, 10,
+ 5, 23, 0, 2, 27, 28, 26, 18, 4, 6,
+ 4, 7, 1, 13, 17, 43, 33, 49, 65, 0,
+ 23, 33, 29, 19, 1, 17, 10, 26, 11, 13,
+ 8, 16, 8, 14, 30, 8, 4, 104, 72, 40,
+ 14, 5, 39, 63, 91, 103, 1, 78, 58, 48,
+ 28, 36, 16, 6, 2, 19, 27, 17, 0, 13,
+ 12, 26, 5, 9, 8, 16, 12, 14, 30, 16,
+ 6, 104, 72, 40, 14, 5, 39, 63, 91, 103,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 21 */
+
+ 104, 14, 27, 104, 14, 27, 53, 19, 36, 22,
+ 6, 20, 38, 84, 114, 34, 31, 7, 0, 4,
+ 10, 4, 5, 7, 9, 34, 22, 25, 51, 79,
+ 7, 35, 13, 0, 4, 10, 25, 17, 22, 20,
+ 5, 23, 37, 4, 27, 39, 65, 12, 13, 27,
+ 8, 27, 25, 49, 1, 23, 15, 39, 3, 4,
+ 44, 0, 0, 0, 15, 55, 67, 2, 10, 3,
+ 62, 6, 37, 23, 7, 19, 34, 22, 13, 5,
+ 39, 7, 37, 29, 31, 33, 37, 41, 45, 48,
+ 3, 1, 35, 13, 49, 31, 45, 10, 3, 1,
+ 15, 28, 9, 1, 25, 43, 19, 27, 19, 3,
+ 15, 31, 19, 1, 43, 25, 78, 56, 4, 5,
+ 6, 15, 3, 4, 4, 20, 6, 7, 29, 21,
+ 15, 2, 4, 11, 18, 18, 10, 10, 12, 14,
+ 24, 16, 10, 10, 8, 4, 12, 16, 29, 3,
+ 10, 13, 22, 30, 40, 28, 10, 38, 24, 30,
+ 2, 6, 12, 12, 36, 51, 26, 44, 38, 62,
+ 76, 50, 54, 80, 78, 38, 60, 66, 48, 124,
+ 10, 48, 30, 40, 52, 42, 58, 62, 78, 22,
+ 28, 40, 40, 124, 15, 90, 98, 88, 84, 82,
+ 74, 68, 66, 58, 42, 36, 28, 18, 8, 31,
+ 23, 21, 47, 20, 18, 12, 12, 7, 9, 9,
+ 4, 23, 23, 27, 47, 33, 77, 35, 24, 24,
+ 20, 8, 2, 9, 17, 19, 37, 19, 56, 34,
+ 24, 6, 12, 9, 21, 29, 43, 4, 66, 50,
+ 40, 34, 24, 8, 0, 7, 21, 9, 82, 60,
+ 36, 10, 22, 1, 19, 29, 16, 100, 84, 66,
+ 46, 46, 8, 5, 9, 17, 124, 69, 59, 39,
+ 55, 55, 37, 43, 35, 31, 37, 33, 29, 35,
+ 35, 41, 49, 27, 35, 27, 25, 19, 13, 13,
+ 13, 9, 7, 11, 13, 29, 8, 8, 14, 4,
+ 0, 8, 6, 8, 4, 2, 2, 2, 1, 9,
+ 17, 9, 6, 29, 10, 32, 10, 24, 14, 18,
+ 26, 14, 10, 8, 20, 1, 11, 19, 90, 92,
+ 90, 82, 78, 90, 84, 86, 84, 76, 88, 80,
+ 66, 54, 24, 88, 88, 82, 72, 64, 60, 40,
+ 32, 24, 20, 2, 7, 13, 31, 96, 96, 96,
+ 88, 72, 76, 70, 56, 52, 42, 34, 16, 6,
+ 9, 25, 0, 0, 31, 26, 22, 14, 0, 2,
+ 0, 11, 3, 15, 19, 43, 35, 51, 67, 1,
+ 25, 35, 29, 19, 0, 15, 12, 28, 9, 11,
+ 8, 16, 8, 16, 32, 8, 4, 102, 68, 34,
+ 8, 11, 47, 71, 99, 109, 1, 78, 58, 48,
+ 28, 36, 16, 8, 4, 17, 27, 17, 2, 13,
+ 14, 28, 5, 9, 8, 16, 12, 16, 30, 16,
+ 6, 102, 68, 34, 8, 11, 47, 71, 99, 109,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 22 */
+
+ 102, 14, 29, 102, 14, 29, 49, 17, 38, 22,
+ 6, 16, 34, 82, 114, 34, 29, 7, 2, 4,
+ 10, 2, 3, 7, 11, 30, 16, 33, 57, 81,
+ 3, 33, 13, 2, 4, 10, 27, 17, 22, 18,
+ 5, 23, 37, 4, 27, 37, 63, 12, 11, 25,
+ 8, 25, 25, 49, 1, 23, 15, 39, 3, 4,
+ 44, 0, 0, 0, 13, 55, 67, 2, 10, 5,
+ 62, 6, 37, 19, 5, 17, 36, 28, 11, 3,
+ 35, 5, 35, 27, 27, 33, 37, 41, 45, 48,
+ 3, 0, 29, 13, 47, 29, 41, 8, 3, 1,
+ 17, 30, 9, 1, 23, 43, 19, 25, 17, 3,
+ 13, 29, 17, 1, 41, 23, 88, 66, 10, 0,
+ 6, 15, 3, 4, 4, 20, 6, 5, 29, 17,
+ 13, 1, 0, 5, 18, 18, 10, 10, 14, 14,
+ 28, 18, 10, 10, 8, 4, 14, 18, 31, 3,
+ 10, 13, 20, 28, 38, 28, 8, 38, 24, 30,
+ 1, 6, 10, 10, 34, 51, 26, 44, 38, 58,
+ 70, 48, 50, 74, 72, 34, 56, 62, 34, 124,
+ 2, 44, 28, 36, 44, 38, 52, 54, 68, 16,
+ 24, 34, 36, 124, 19, 86, 94, 84, 78, 76,
+ 70, 62, 62, 54, 38, 30, 24, 14, 6, 33,
+ 23, 21, 49, 18, 16, 10, 10, 9, 11, 11,
+ 0, 25, 23, 29, 49, 33, 77, 33, 26, 24,
+ 20, 8, 4, 7, 15, 19, 33, 17, 58, 34,
+ 24, 8, 14, 7, 17, 25, 39, 6, 66, 50,
+ 40, 36, 26, 8, 0, 7, 19, 9, 84, 60,
+ 34, 10, 22, 1, 19, 29, 14, 100, 84, 64,
+ 44, 44, 8, 5, 9, 17, 124, 67, 57, 37,
+ 53, 51, 35, 39, 31, 29, 35, 29, 25, 35,
+ 33, 41, 47, 23, 33, 27, 25, 17, 13, 13,
+ 13, 9, 9, 11, 13, 29, 8, 10, 16, 4,
+ 0, 10, 8, 8, 6, 4, 2, 0, 1, 9,
+ 17, 9, 8, 31, 8, 32, 8, 22, 12, 18,
+ 26, 12, 10, 8, 18, 5, 13, 21, 86, 90,
+ 88, 80, 74, 86, 80, 82, 80, 72, 82, 76,
+ 60, 48, 20, 80, 80, 74, 64, 56, 54, 34,
+ 28, 18, 16, 1, 9, 15, 33, 92, 92, 90,
+ 82, 66, 72, 64, 50, 46, 36, 28, 10, 2,
+ 11, 29, 1, 1, 33, 22, 20, 10, 3, 0,
+ 3, 15, 5, 19, 21, 45, 37, 53, 69, 5,
+ 27, 37, 29, 17, 0, 15, 14, 30, 9, 11,
+ 10, 18, 8, 16, 34, 8, 4, 100, 64, 30,
+ 2, 17, 55, 79, 107, 115, 1, 78, 58, 48,
+ 28, 38, 18, 8, 4, 17, 25, 15, 2, 11,
+ 16, 30, 3, 7, 8, 16, 12, 16, 32, 16,
+ 6, 100, 64, 30, 2, 17, 55, 79, 107, 115,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 23 */
+
+ 100, 14, 29, 100, 14, 29, 45, 13, 38, 22,
+ 4, 14, 30, 80, 114, 34, 25, 7, 2, 6,
+ 8, 2, 3, 9, 15, 28, 12, 39, 61, 85,
+ 2, 31, 13, 2, 6, 8, 27, 15, 22, 18,
+ 5, 21, 35, 4, 27, 37, 63, 12, 11, 25,
+ 8, 25, 23, 47, 1, 23, 15, 39, 1, 4,
+ 44, 0, 0, 0, 13, 55, 67, 4, 8, 5,
+ 60, 4, 37, 13, 3, 15, 40, 32, 7, 0,
+ 33, 1, 31, 25, 23, 33, 35, 39, 45, 48,
+ 3, 2, 25, 13, 45, 29, 37, 8, 5, 3,
+ 17, 30, 9, 0, 23, 41, 17, 25, 15, 1,
+ 11, 25, 13, 0, 39, 21, 98, 74, 16, 6,
+ 6, 13, 3, 6, 4, 20, 8, 5, 27, 13,
+ 13, 5, 3, 0, 18, 20, 10, 10, 14, 16,
+ 30, 20, 12, 8, 8, 4, 14, 18, 33, 3,
+ 10, 15, 20, 26, 38, 26, 8, 38, 24, 30,
+ 3, 6, 8, 8, 34, 51, 26, 42, 36, 54,
+ 64, 46, 46, 70, 68, 30, 50, 58, 22, 124,
+ 3, 40, 24, 30, 38, 32, 46, 44, 60, 10,
+ 20, 26, 32, 124, 21, 82, 90, 80, 74, 72,
+ 66, 58, 58, 50, 32, 26, 20, 10, 2, 37,
+ 23, 21, 49, 16, 14, 8, 6, 11, 13, 13,
+ 1, 27, 25, 31, 49, 35, 75, 29, 28, 26,
+ 20, 10, 4, 7, 15, 17, 29, 13, 58, 36,
+ 26, 8, 16, 5, 15, 23, 35, 6, 68, 52,
+ 42, 36, 28, 10, 0, 5, 19, 9, 84, 60,
+ 34, 8, 22, 1, 19, 27, 14, 98, 82, 62,
+ 42, 44, 8, 3, 9, 17, 124, 63, 53, 35,
+ 49, 49, 33, 37, 29, 27, 31, 27, 21, 33,
+ 31, 39, 45, 19, 33, 27, 25, 17, 11, 13,
+ 15, 9, 9, 11, 13, 31, 10, 10, 18, 4,
+ 0, 10, 8, 8, 6, 4, 2, 0, 1, 9,
+ 17, 9, 8, 33, 6, 32, 6, 22, 10, 16,
+ 26, 10, 8, 8, 18, 7, 13, 23, 84, 88,
+ 86, 76, 70, 82, 76, 76, 74, 66, 76, 70,
+ 54, 44, 16, 70, 72, 66, 54, 50, 48, 30,
+ 24, 14, 12, 5, 13, 17, 33, 86, 86, 86,
+ 78, 60, 66, 60, 44, 42, 32, 24, 4, 1,
+ 15, 31, 3, 1, 37, 20, 16, 8, 7, 3,
+ 7, 17, 7, 21, 23, 47, 41, 55, 69, 7,
+ 31, 41, 27, 17, 2, 13, 16, 32, 9, 9,
+ 10, 18, 8, 18, 36, 8, 4, 98, 60, 24,
+ 3, 25, 63, 87, 115, 121, 0, 78, 58, 48,
+ 28, 38, 18, 8, 4, 15, 25, 15, 4, 11,
+ 18, 32, 3, 7, 10, 18, 12, 16, 32, 16,
+ 6, 98, 60, 24, 3, 25, 63, 87, 115, 121,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 24 */
+
+ 96, 12, 29, 96, 12, 29, 43, 11, 38, 22,
+ 4, 10, 24, 78, 114, 32, 21, 9, 2, 8,
+ 8, 0, 3, 11, 19, 26, 6, 47, 67, 89,
+ 8, 29, 13, 2, 8, 8, 29, 13, 22, 16,
+ 5, 21, 35, 2, 29, 37, 63, 12, 11, 25,
+ 8, 25, 23, 47, 1, 23, 15, 39, 1, 4,
+ 44, 0, 0, 0, 11, 57, 67, 4, 6, 5,
+ 58, 4, 39, 9, 1, 13, 42, 36, 5, 2,
+ 29, 0, 29, 23, 19, 33, 35, 39, 45, 48,
+ 3, 4, 21, 13, 45, 27, 33, 8, 5, 3,
+ 19, 30, 9, 0, 21, 39, 15, 23, 15, 1,
+ 11, 23, 11, 0, 37, 19, 106, 82, 22, 10,
+ 6, 13, 3, 6, 2, 20, 8, 5, 27, 9,
+ 13, 9, 7, 4, 18, 20, 10, 10, 14, 16,
+ 34, 20, 12, 8, 6, 4, 16, 20, 37, 3,
+ 10, 15, 18, 22, 36, 26, 6, 38, 24, 28,
+ 5, 6, 6, 6, 32, 51, 24, 40, 34, 50,
+ 56, 42, 42, 66, 62, 24, 46, 54, 8, 124,
+ 11, 36, 20, 26, 32, 28, 38, 36, 52, 4,
+ 16, 20, 28, 124, 25, 78, 84, 74, 68, 66,
+ 60, 54, 52, 46, 28, 22, 16, 6, 1, 41,
+ 23, 23, 51, 14, 10, 6, 4, 13, 17, 17,
+ 5, 29, 25, 33, 51, 37, 75, 27, 30, 26,
+ 20, 10, 6, 5, 13, 17, 25, 11, 58, 36,
+ 26, 10, 18, 3, 11, 19, 33, 8, 68, 52,
+ 42, 38, 30, 10, 0, 5, 17, 11, 86, 60,
+ 32, 8, 22, 1, 19, 27, 14, 98, 80, 60,
+ 40, 44, 8, 3, 9, 17, 124, 61, 51, 33,
+ 47, 45, 31, 33, 25, 25, 29, 23, 17, 33,
+ 31, 39, 43, 17, 33, 27, 25, 15, 11, 13,
+ 15, 9, 11, 11, 13, 31, 10, 10, 20, 4,
+ 1, 10, 8, 6, 6, 4, 2, 1, 1, 9,
+ 17, 9, 10, 35, 4, 32, 2, 20, 8, 14,
+ 24, 8, 6, 8, 18, 9, 13, 27, 82, 84,
+ 84, 72, 66, 78, 72, 72, 70, 60, 70, 64,
+ 48, 38, 12, 62, 64, 56, 44, 42, 40, 24,
+ 18, 8, 8, 9, 15, 19, 35, 82, 82, 80,
+ 72, 54, 60, 54, 38, 36, 26, 18, 1, 5,
+ 19, 35, 5, 3, 41, 16, 14, 4, 11, 7,
+ 11, 21, 11, 25, 27, 49, 43, 57, 71, 11,
+ 33, 43, 27, 17, 2, 13, 18, 34, 9, 9,
+ 12, 20, 8, 18, 36, 8, 2, 96, 56, 18,
+ 9, 31, 71, 97, 125, 125, 0, 78, 58, 48,
+ 28, 40, 18, 8, 4, 15, 25, 15, 4, 11,
+ 18, 32, 3, 7, 10, 18, 12, 16, 34, 16,
+ 4, 96, 56, 18, 9, 31, 71, 97, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 25 */
+
+ 94, 12, 29, 94, 12, 29, 39, 9, 40, 22,
+ 4, 8, 20, 76, 116, 32, 17, 9, 4, 10,
+ 8, 1, 3, 11, 21, 24, 2, 55, 73, 91,
+ 14, 27, 11, 4, 10, 8, 29, 11, 22, 16,
+ 3, 21, 33, 2, 29, 37, 63, 12, 11, 23,
+ 8, 25, 23, 47, 1, 23, 15, 37, 1, 4,
+ 44, 0, 0, 0, 9, 57, 67, 4, 4, 5,
+ 56, 4, 39, 5, 2, 11, 46, 40, 1, 4,
+ 25, 4, 27, 19, 13, 33, 35, 39, 43, 48,
+ 3, 6, 17, 11, 43, 25, 29, 8, 5, 3,
+ 19, 30, 9, 0, 19, 37, 13, 21, 13, 0,
+ 9, 21, 7, 2, 33, 17, 116, 92, 30, 16,
+ 8, 13, 1, 8, 2, 20, 8, 3, 25, 3,
+ 13, 13, 9, 10, 18, 20, 10, 10, 14, 18,
+ 38, 22, 12, 8, 6, 4, 18, 22, 39, 1,
+ 10, 15, 16, 20, 34, 26, 4, 38, 24, 28,
+ 7, 8, 4, 6, 30, 51, 24, 40, 34, 46,
+ 50, 40, 40, 62, 58, 20, 42, 50, 3, 124,
+ 17, 32, 16, 22, 26, 24, 32, 28, 44, 0,
+ 12, 14, 24, 124, 29, 74, 80, 70, 64, 62,
+ 56, 50, 48, 42, 24, 18, 14, 4, 3, 45,
+ 23, 23, 51, 14, 8, 4, 2, 15, 19, 19,
+ 9, 31, 25, 35, 53, 37, 73, 25, 32, 26,
+ 20, 10, 8, 3, 11, 15, 19, 9, 58, 36,
+ 26, 12, 20, 1, 7, 15, 29, 10, 70, 54,
+ 44, 40, 32, 10, 2, 5, 15, 11, 88, 60,
+ 30, 8, 24, 0, 19, 25, 14, 98, 80, 58,
+ 38, 44, 8, 3, 9, 15, 124, 59, 49, 29,
+ 43, 41, 29, 29, 21, 23, 25, 19, 13, 31,
+ 29, 37, 41, 13, 31, 25, 23, 13, 9, 13,
+ 15, 9, 13, 11, 13, 31, 10, 12, 22, 6,
+ 1, 12, 8, 6, 6, 4, 2, 3, 1, 7,
+ 15, 9, 12, 35, 2, 32, 0, 18, 8, 14,
+ 24, 8, 4, 8, 18, 11, 13, 29, 80, 82,
+ 84, 70, 62, 74, 68, 68, 66, 56, 64, 58,
+ 42, 32, 8, 54, 56, 48, 34, 36, 34, 18,
+ 14, 4, 6, 11, 17, 19, 37, 78, 78, 76,
+ 68, 50, 56, 50, 32, 32, 22, 14, 5, 9,
+ 23, 39, 5, 5, 43, 14, 12, 0, 15, 9,
+ 13, 25, 13, 29, 29, 49, 45, 57, 73, 15,
+ 35, 45, 27, 15, 4, 13, 20, 38, 7, 7,
+ 14, 22, 8, 20, 38, 8, 2, 94, 52, 14,
+ 15, 37, 79, 105, 125, 125, 0, 78, 58, 50,
+ 30, 42, 20, 10, 6, 15, 25, 15, 4, 9,
+ 20, 34, 3, 5, 10, 18, 12, 18, 36, 16,
+ 4, 94, 52, 14, 15, 37, 79, 105, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 26 */
+
+ 92, 12, 29, 92, 12, 29, 35, 5, 40, 22,
+ 2, 6, 16, 74, 116, 32, 13, 9, 4, 12,
+ 6, 1, 3, 13, 25, 22, 3, 61, 77, 95,
+ 20, 25, 11, 4, 12, 6, 29, 9, 22, 14,
+ 3, 19, 31, 2, 29, 37, 63, 12, 11, 23,
+ 8, 25, 21, 45, 1, 23, 15, 37, 0, 4,
+ 44, 0, 0, 0, 9, 57, 67, 6, 2, 5,
+ 54, 2, 39, 0, 4, 9, 50, 44, 0, 8,
+ 23, 6, 23, 17, 9, 33, 33, 37, 43, 48,
+ 3, 8, 13, 11, 41, 25, 25, 8, 7, 5,
+ 21, 30, 9, 2, 19, 35, 11, 21, 11, 0,
+ 7, 17, 5, 4, 31, 15, 124, 100, 36, 22,
+ 8, 11, 1, 8, 2, 20, 10, 3, 25, 0,
+ 13, 17, 13, 16, 18, 22, 10, 10, 14, 18,
+ 40, 24, 14, 6, 6, 4, 18, 22, 41, 1,
+ 10, 17, 16, 18, 34, 24, 4, 38, 24, 28,
+ 9, 8, 2, 4, 30, 51, 24, 38, 32, 42,
+ 44, 38, 36, 58, 54, 16, 36, 46, 15, 124,
+ 25, 28, 12, 16, 20, 18, 26, 18, 36, 5,
+ 8, 6, 20, 124, 31, 70, 76, 66, 60, 58,
+ 52, 46, 44, 38, 18, 14, 10, 0, 7, 49,
+ 23, 23, 53, 12, 6, 2, 1, 17, 21, 21,
+ 11, 33, 27, 37, 53, 39, 71, 21, 34, 28,
+ 20, 12, 8, 3, 11, 13, 15, 5, 58, 38,
+ 28, 12, 22, 0, 5, 13, 25, 10, 72, 56,
+ 46, 40, 34, 12, 2, 3, 15, 11, 88, 60,
+ 30, 6, 24, 0, 19, 25, 14, 96, 78, 56,
+ 36, 44, 8, 1, 9, 15, 124, 55, 45, 27,
+ 39, 39, 27, 27, 19, 21, 21, 17, 9, 29,
+ 27, 37, 39, 9, 31, 25, 23, 13, 9, 13,
+ 17, 9, 13, 11, 13, 33, 12, 12, 24, 6,
+ 1, 12, 8, 6, 6, 4, 2, 3, 1, 7,
+ 15, 9, 12, 37, 0, 32, 1, 18, 6, 12,
+ 24, 6, 2, 8, 18, 13, 13, 31, 78, 80,
+ 82, 66, 58, 70, 64, 62, 60, 50, 58, 52,
+ 36, 28, 4, 44, 48, 40, 24, 30, 28, 14,
+ 10, 1, 2, 15, 21, 21, 37, 72, 72, 70,
+ 64, 44, 50, 44, 26, 28, 16, 10, 11, 13,
+ 27, 41, 7, 5, 47, 12, 8, 1, 19, 13,
+ 17, 27, 15, 31, 31, 51, 49, 59, 73, 17,
+ 39, 49, 25, 15, 6, 11, 22, 40, 7, 5,
+ 14, 22, 8, 20, 40, 8, 2, 92, 48, 8,
+ 21, 45, 87, 113, 125, 125, 2, 78, 58, 50,
+ 30, 42, 20, 10, 6, 13, 25, 15, 6, 9,
+ 22, 36, 3, 5, 12, 20, 12, 18, 36, 16,
+ 4, 92, 48, 8, 21, 45, 87, 113, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 27 */
+
+ 90, 12, 31, 90, 12, 31, 31, 3, 42, 22,
+ 2, 2, 12, 72, 116, 32, 11, 9, 6, 12,
+ 6, 3, 1, 13, 27, 18, 7, 69, 83, 97,
+ 24, 23, 11, 6, 12, 6, 31, 9, 22, 14,
+ 3, 19, 31, 2, 29, 35, 61, 12, 9, 21,
+ 8, 23, 21, 45, 1, 23, 15, 37, 0, 4,
+ 44, 0, 0, 0, 7, 57, 67, 6, 2, 7,
+ 54, 2, 39, 4, 6, 7, 52, 50, 4, 10,
+ 19, 10, 21, 15, 5, 33, 33, 37, 43, 48,
+ 3, 10, 7, 11, 39, 23, 21, 6, 7, 5,
+ 21, 32, 9, 2, 17, 35, 11, 19, 9, 2,
+ 5, 15, 1, 4, 29, 13, 124, 110, 42, 28,
+ 8, 11, 1, 10, 2, 20, 10, 1, 23, 4,
+ 11, 21, 17, 22, 18, 22, 10, 10, 16, 20,
+ 44, 26, 14, 6, 6, 4, 20, 24, 43, 1,
+ 10, 17, 14, 16, 32, 24, 2, 38, 24, 28,
+ 13, 8, 0, 2, 28, 51, 24, 38, 32, 38,
+ 38, 36, 32, 52, 48, 12, 32, 42, 29, 124,
+ 31, 24, 10, 12, 12, 14, 20, 10, 26, 11,
+ 4, 0, 16, 124, 35, 66, 72, 62, 54, 52,
+ 48, 40, 40, 34, 14, 8, 6, 3, 9, 51,
+ 23, 23, 53, 10, 4, 0, 3, 19, 23, 23,
+ 15, 35, 27, 39, 55, 39, 71, 19, 36, 28,
+ 20, 12, 10, 1, 9, 13, 11, 3, 60, 38,
+ 28, 14, 24, 2, 1, 9, 21, 12, 72, 56,
+ 46, 42, 36, 12, 2, 3, 13, 11, 90, 60,
+ 28, 6, 24, 0, 19, 23, 12, 96, 78, 54,
+ 34, 42, 8, 1, 9, 15, 124, 53, 43, 25,
+ 37, 35, 25, 23, 15, 19, 19, 13, 5, 29,
+ 25, 35, 37, 5, 29, 25, 23, 11, 7, 13,
+ 17, 9, 15, 11, 13, 33, 12, 14, 26, 6,
+ 1, 14, 10, 6, 8, 6, 2, 5, 1, 7,
+ 15, 9, 14, 39, 1, 32, 3, 16, 4, 12,
+ 24, 4, 2, 8, 16, 17, 15, 33, 74, 78,
+ 80, 64, 54, 66, 60, 58, 56, 46, 52, 48,
+ 30, 22, 0, 36, 40, 32, 16, 22, 22, 8,
+ 6, 5, 1, 19, 23, 23, 39, 68, 68, 66,
+ 58, 38, 46, 40, 20, 22, 12, 4, 17, 17,
+ 29, 45, 9, 7, 49, 8, 6, 5, 23, 15,
+ 21, 31, 17, 35, 33, 53, 51, 61, 75, 21,
+ 41, 51, 25, 13, 6, 11, 24, 42, 7, 5,
+ 16, 24, 8, 22, 42, 8, 2, 90, 44, 4,
+ 27, 51, 95, 121, 125, 125, 2, 78, 58, 50,
+ 30, 44, 22, 10, 6, 13, 23, 13, 6, 7,
+ 24, 38, 1, 3, 12, 20, 12, 18, 38, 16,
+ 4, 90, 44, 4, 27, 51, 95, 121, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 28 */
+
+ 86, 12, 31, 86, 12, 31, 29, 0, 42, 22,
+ 0, 0, 8, 70, 116, 30, 7, 9, 6, 14,
+ 6, 3, 1, 15, 31, 16, 13, 75, 87, 101,
+ 30, 21, 11, 6, 14, 6, 31, 7, 22, 12,
+ 3, 19, 29, 2, 29, 35, 61, 12, 9, 21,
+ 8, 23, 19, 45, 1, 23, 15, 37, 0, 4,
+ 44, 0, 0, 0, 7, 59, 67, 8, 0, 7,
+ 52, 0, 39, 10, 8, 5, 56, 54, 6, 14,
+ 15, 12, 19, 13, 1, 33, 33, 35, 43, 48,
+ 3, 12, 3, 11, 39, 23, 17, 6, 9, 7,
+ 23, 32, 9, 4, 17, 33, 9, 19, 7, 2,
+ 5, 11, 0, 6, 27, 11, 124, 118, 48, 34,
+ 8, 11, 1, 10, 2, 20, 12, 1, 23, 8,
+ 11, 25, 21, 26, 18, 22, 10, 10, 16, 20,
+ 46, 28, 14, 6, 6, 4, 20, 26, 45, 1,
+ 10, 19, 14, 14, 32, 24, 2, 38, 24, 28,
+ 15, 8, 1, 0, 28, 51, 22, 36, 30, 34,
+ 30, 34, 28, 48, 44, 8, 26, 38, 41, 124,
+ 39, 20, 6, 8, 6, 8, 12, 2, 18, 17,
+ 0, 7, 12, 124, 39, 62, 68, 56, 50, 48,
+ 44, 36, 36, 30, 10, 4, 2, 7, 13, 55,
+ 23, 23, 55, 8, 2, 1, 5, 21, 27, 25,
+ 19, 37, 29, 41, 57, 41, 69, 15, 38, 30,
+ 20, 12, 12, 1, 9, 11, 7, 1, 60, 38,
+ 30, 16, 26, 4, 0, 7, 19, 14, 74, 58,
+ 48, 42, 38, 14, 2, 3, 13, 11, 90, 60,
+ 26, 6, 24, 0, 19, 23, 12, 94, 76, 52,
+ 32, 42, 8, 1, 9, 15, 124, 51, 39, 23,
+ 33, 33, 23, 19, 11, 17, 15, 9, 1, 27,
+ 23, 35, 35, 3, 29, 25, 23, 11, 7, 13,
+ 17, 9, 15, 11, 13, 35, 14, 14, 28, 6,
+ 1, 14, 10, 6, 8, 6, 2, 5, 1, 7,
+ 15, 9, 14, 41, 3, 32, 5, 14, 2, 10,
+ 24, 2, 0, 8, 16, 19, 15, 35, 72, 76,
+ 78, 60, 50, 62, 56, 54, 50, 40, 46, 42,
+ 24, 18, 3, 28, 32, 24, 6, 16, 14, 4,
+ 0, 11, 5, 23, 27, 25, 41, 64, 62, 60,
+ 54, 32, 40, 34, 14, 18, 6, 0, 23, 21,
+ 33, 47, 11, 9, 53, 6, 2, 9, 27, 19,
+ 25, 35, 19, 37, 35, 55, 53, 63, 77, 23,
+ 43, 53, 25, 13, 8, 9, 26, 44, 7, 3,
+ 16, 24, 8, 22, 44, 8, 2, 88, 40, 1,
+ 33, 57, 103, 125, 125, 125, 2, 78, 58, 50,
+ 30, 44, 22, 10, 6, 11, 23, 13, 8, 7,
+ 24, 40, 1, 3, 12, 20, 12, 18, 38, 16,
+ 4, 88, 40, 1, 33, 57, 103, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 29 */
+
+ 84, 12, 31, 84, 12, 31, 25, 2, 42, 22,
+ 0, 1, 4, 68, 116, 30, 3, 9, 6, 16,
+ 4, 5, 1, 17, 35, 14, 17, 83, 93, 105,
+ 36, 19, 9, 6, 16, 4, 31, 5, 22, 12,
+ 3, 17, 27, 2, 29, 35, 61, 12, 9, 21,
+ 8, 23, 19, 43, 1, 23, 15, 37, 2, 4,
+ 44, 0, 0, 0, 5, 59, 67, 8, 1, 7,
+ 50, 0, 39, 14, 10, 3, 60, 58, 10, 16,
+ 13, 16, 15, 11, 4, 33, 31, 35, 41, 48,
+ 3, 14, 0, 11, 37, 21, 13, 6, 9, 7,
+ 23, 32, 9, 4, 15, 31, 7, 17, 5, 4,
+ 3, 9, 4, 8, 23, 9, 124, 124, 54, 40,
+ 10, 9, 0, 12, 2, 20, 12, 1, 21, 12,
+ 11, 29, 25, 32, 18, 24, 10, 10, 16, 22,
+ 50, 30, 16, 4, 6, 4, 22, 26, 47, 0,
+ 10, 19, 12, 12, 30, 22, 0, 38, 24, 28,
+ 17, 10, 3, 0, 26, 51, 22, 34, 28, 30,
+ 24, 32, 26, 44, 40, 4, 22, 34, 53, 124,
+ 45, 16, 2, 2, 0, 4, 6, 7, 10, 21,
+ 3, 13, 8, 124, 41, 58, 64, 52, 46, 44,
+ 40, 32, 32, 26, 4, 0, 1, 9, 17, 59,
+ 23, 23, 55, 6, 0, 3, 9, 23, 29, 27,
+ 21, 39, 29, 43, 57, 43, 67, 13, 40, 30,
+ 20, 14, 12, 0, 7, 9, 3, 2, 60, 40,
+ 30, 16, 28, 6, 4, 3, 15, 14, 76, 60,
+ 50, 44, 40, 14, 4, 1, 11, 11, 92, 60,
+ 26, 4, 24, 0, 19, 21, 12, 94, 74, 50,
+ 30, 42, 8, 0, 9, 13, 124, 47, 37, 19,
+ 29, 29, 21, 17, 9, 15, 11, 7, 2, 25,
+ 21, 33, 33, 0, 29, 25, 21, 9, 5, 13,
+ 19, 9, 17, 11, 13, 35, 14, 14, 30, 6,
+ 1, 14, 10, 6, 8, 6, 2, 7, 1, 7,
+ 15, 9, 16, 43, 5, 32, 7, 14, 2, 8,
+ 24, 2, 1, 8, 16, 21, 15, 37, 70, 74,
+ 76, 56, 46, 58, 52, 48, 46, 34, 40, 36,
+ 18, 12, 7, 18, 24, 16, 3, 10, 8, 1,
+ 3, 15, 9, 25, 29, 25, 41, 58, 58, 56,
+ 50, 26, 34, 30, 8, 14, 2, 3, 29, 25,
+ 37, 51, 11, 9, 57, 4, 0, 11, 31, 23,
+ 29, 37, 21, 41, 37, 55, 57, 65, 77, 27,
+ 47, 57, 23, 13, 10, 9, 28, 46, 5, 1,
+ 18, 26, 8, 24, 46, 8, 2, 86, 36, 7,
+ 39, 65, 111, 125, 125, 125, 4, 78, 58, 50,
+ 30, 46, 22, 12, 8, 11, 23, 13, 8, 7,
+ 26, 42, 1, 3, 14, 22, 12, 20, 40, 16,
+ 4, 86, 36, 7, 39, 65, 111, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 30 */
+
+ 82, 12, 31, 82, 12, 31, 21, 6, 44, 22,
+ 1, 5, 0, 66, 116, 30, 0, 9, 8, 18,
+ 4, 5, 1, 17, 37, 12, 23, 89, 97, 107,
+ 42, 17, 9, 8, 18, 4, 33, 3, 22, 10,
+ 3, 17, 27, 2, 29, 35, 61, 12, 9, 19,
+ 8, 23, 17, 43, 1, 23, 15, 37, 2, 4,
+ 44, 0, 0, 0, 5, 59, 67, 10, 3, 7,
+ 48, 1, 39, 20, 12, 1, 62, 62, 12, 20,
+ 9, 18, 13, 9, 8, 33, 31, 33, 41, 48,
+ 3, 16, 4, 11, 35, 21, 9, 6, 11, 9,
+ 25, 32, 9, 6, 15, 29, 5, 17, 3, 4,
+ 1, 5, 6, 8, 21, 7, 124, 124, 60, 46,
+ 10, 9, 0, 12, 2, 20, 14, 0, 21, 16,
+ 11, 33, 29, 38, 18, 24, 10, 10, 16, 22,
+ 52, 32, 16, 4, 6, 4, 22, 28, 49, 0,
+ 10, 21, 12, 10, 30, 22, 0, 38, 24, 28,
+ 19, 10, 5, 1, 26, 51, 22, 34, 28, 26,
+ 18, 30, 22, 40, 34, 0, 16, 30, 67, 124,
+ 53, 12, 1, 1, 5, 1, 0, 15, 2, 27,
+ 7, 21, 4, 124, 45, 54, 60, 48, 40, 38,
+ 36, 28, 28, 22, 0, 3, 5, 13, 19, 63,
+ 23, 23, 57, 4, 1, 5, 11, 25, 31, 29,
+ 25, 41, 31, 45, 59, 43, 67, 9, 42, 32,
+ 20, 14, 14, 0, 7, 9, 0, 4, 60, 40,
+ 32, 18, 30, 8, 6, 1, 11, 16, 76, 60,
+ 50, 44, 42, 16, 4, 1, 11, 11, 92, 60,
+ 24, 4, 24, 0, 19, 21, 12, 92, 74, 48,
+ 28, 42, 8, 0, 9, 13, 124, 45, 33, 17,
+ 27, 27, 19, 13, 5, 13, 9, 3, 6, 25,
+ 19, 33, 31, 4, 27, 25, 21, 9, 5, 13,
+ 19, 9, 17, 11, 13, 37, 16, 16, 32, 6,
+ 1, 16, 10, 6, 8, 6, 2, 7, 1, 7,
+ 15, 9, 16, 45, 7, 32, 9, 12, 0, 8,
+ 24, 0, 3, 8, 16, 23, 15, 39, 68, 72,
+ 74, 54, 42, 54, 48, 44, 40, 30, 34, 30,
+ 12, 8, 11, 10, 16, 8, 13, 2, 2, 5,
+ 7, 21, 13, 29, 33, 27, 43, 54, 52, 50,
+ 44, 20, 30, 24, 2, 8, 3, 9, 35, 29,
+ 41, 53, 13, 11, 59, 0, 3, 15, 35, 25,
+ 33, 41, 23, 43, 39, 57, 59, 67, 79, 29,
+ 49, 59, 23, 11, 10, 7, 30, 48, 5, 1,
+ 18, 26, 8, 24, 48, 8, 2, 84, 32, 11,
+ 45, 71, 119, 125, 125, 125, 4, 78, 58, 50,
+ 30, 46, 24, 12, 8, 9, 23, 13, 10, 5,
+ 28, 44, 1, 1, 14, 22, 12, 20, 40, 16,
+ 4, 84, 32, 11, 45, 71, 119, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 31 */
+
+ 80, 12, 31, 80, 12, 31, 17, 8, 44, 22,
+ 1, 7, 3, 64, 116, 30, 4, 9, 8, 20,
+ 4, 7, 1, 19, 41, 10, 27, 97, 103, 111,
+ 48, 15, 9, 8, 20, 4, 33, 1, 22, 10,
+ 3, 17, 25, 2, 29, 35, 61, 12, 9, 19,
+ 8, 23, 17, 43, 1, 23, 15, 37, 2, 4,
+ 44, 0, 0, 0, 3, 59, 67, 10, 5, 7,
+ 46, 1, 39, 24, 14, 0, 66, 66, 16, 22,
+ 5, 22, 11, 7, 12, 33, 31, 33, 41, 48,
+ 3, 18, 8, 11, 33, 19, 5, 6, 11, 9,
+ 25, 32, 9, 6, 13, 27, 3, 15, 1, 6,
+ 0, 3, 10, 10, 19, 5, 124, 124, 66, 52,
+ 10, 9, 0, 14, 2, 20, 14, 0, 19, 20,
+ 11, 37, 33, 44, 18, 24, 10, 10, 16, 24,
+ 56, 34, 16, 4, 6, 4, 24, 30, 51, 0,
+ 10, 21, 10, 8, 28, 22, 1, 38, 24, 28,
+ 21, 10, 7, 3, 24, 51, 22, 32, 26, 22,
+ 12, 28, 18, 36, 30, 3, 12, 26, 79, 124,
+ 59, 8, 5, 5, 11, 5, 5, 23, 5, 33,
+ 11, 27, 0, 124, 49, 50, 56, 44, 36, 34,
+ 32, 24, 24, 18, 3, 7, 9, 17, 23, 67,
+ 23, 23, 57, 2, 3, 7, 13, 27, 33, 31,
+ 29, 43, 31, 47, 61, 45, 65, 7, 44, 32,
+ 20, 14, 16, 2, 5, 7, 4, 6, 60, 40,
+ 32, 20, 32, 10, 10, 2, 7, 18, 78, 62,
+ 52, 46, 44, 16, 4, 1, 9, 11, 94, 60,
+ 22, 4, 24, 0, 19, 19, 12, 92, 72, 46,
+ 26, 42, 8, 0, 9, 13, 124, 43, 31, 15,
+ 23, 23, 17, 9, 1, 11, 5, 0, 10, 23,
+ 17, 31, 29, 8, 27, 25, 21, 7, 3, 13,
+ 19, 9, 19, 11, 13, 37, 16, 16, 34, 6,
+ 1, 16, 10, 6, 8, 6, 2, 9, 1, 7,
+ 15, 9, 18, 47, 9, 32, 11, 10, 1, 6,
+ 24, 1, 5, 8, 16, 25, 15, 41, 66, 70,
+ 72, 50, 38, 50, 44, 40, 36, 24, 28, 24,
+ 6, 2, 15, 2, 8, 0, 23, 3, 3, 11,
+ 11, 25, 17, 33, 35, 29, 45, 50, 48, 46,
+ 40, 14, 24, 20, 3, 4, 7, 13, 41, 33,
+ 45, 57, 15, 13, 63, 1, 5, 19, 39, 29,
+ 37, 45, 25, 47, 41, 59, 61, 69, 81, 33,
+ 51, 61, 23, 11, 12, 7, 32, 50, 5, 0,
+ 20, 28, 8, 26, 50, 8, 2, 82, 28, 17,
+ 51, 77, 125, 125, 125, 125, 4, 78, 58, 50,
+ 30, 48, 24, 12, 8, 9, 23, 13, 10, 5,
+ 30, 46, 1, 1, 14, 22, 12, 20, 42, 16,
+ 4, 82, 28, 17, 51, 77, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 32 */
+
+ 76, 10, 33, 76, 10, 33, 15, 10, 44, 22,
+ 3, 11, 9, 62, 116, 28, 6, 11, 8, 20,
+ 2, 9, 1, 21, 45, 6, 33, 105, 109, 115,
+ 52, 15, 9, 8, 20, 2, 35, 1, 22, 8,
+ 3, 17, 25, 0, 31, 35, 61, 10, 9, 19,
+ 8, 23, 17, 43, 1, 23, 17, 37, 2, 4,
+ 44, 0, 0, 0, 3, 61, 67, 10, 7, 9,
+ 44, 3, 41, 28, 16, 2, 68, 70, 18, 24,
+ 3, 24, 9, 5, 16, 33, 31, 33, 41, 48,
+ 3, 18, 12, 11, 33, 19, 3, 4, 13, 11,
+ 27, 32, 9, 6, 13, 27, 3, 15, 1, 6,
+ 0, 1, 12, 10, 17, 3, 124, 124, 72, 56,
+ 10, 9, 0, 14, 0, 20, 14, 0, 19, 24,
+ 11, 41, 37, 48, 18, 24, 10, 10, 16, 24,
+ 58, 34, 16, 2, 4, 4, 24, 30, 55, 0,
+ 10, 23, 8, 4, 26, 20, 3, 38, 24, 26,
+ 25, 10, 9, 5, 22, 51, 20, 30, 24, 16,
+ 4, 24, 14, 30, 24, 9, 6, 22, 93, 124,
+ 67, 2, 9, 11, 19, 11, 13, 33, 15, 39,
+ 15, 35, 3, 124, 53, 44, 50, 38, 30, 28,
+ 26, 18, 18, 14, 9, 13, 13, 21, 27, 71,
+ 23, 25, 59, 0, 7, 11, 17, 31, 37, 35,
+ 33, 45, 33, 49, 63, 47, 65, 5, 44, 32,
+ 20, 14, 16, 2, 5, 7, 8, 8, 60, 40,
+ 32, 20, 32, 12, 12, 4, 5, 18, 78, 62,
+ 52, 46, 46, 16, 4, 1, 9, 13, 94, 58,
+ 20, 2, 24, 0, 19, 19, 10, 90, 70, 42,
+ 24, 40, 8, 0, 9, 13, 124, 41, 29, 13,
+ 21, 21, 15, 7, 0, 9, 3, 2, 14, 23,
+ 17, 31, 27, 10, 27, 25, 21, 7, 3, 13,
+ 21, 11, 21, 11, 15, 39, 16, 16, 36, 6,
+ 3, 16, 10, 4, 8, 6, 2, 11, 1, 7,
+ 15, 9, 18, 49, 11, 32, 15, 8, 3, 4,
+ 22, 3, 7, 8, 14, 29, 17, 45, 62, 66,
+ 70, 46, 34, 44, 38, 34, 30, 18, 22, 18,
+ 1, 3, 19, 7, 0, 9, 33, 11, 11, 17,
+ 17, 31, 21, 37, 39, 31, 47, 44, 42, 40,
+ 34, 8, 18, 14, 11, 1, 13, 19, 47, 37,
+ 49, 61, 17, 15, 67, 5, 9, 23, 45, 33,
+ 41, 49, 29, 51, 45, 61, 65, 71, 83, 37,
+ 55, 65, 23, 11, 12, 7, 34, 52, 5, 0,
+ 20, 28, 8, 26, 50, 8, 0, 78, 24, 23,
+ 59, 85, 125, 125, 125, 125, 4, 78, 58, 50,
+ 30, 48, 24, 12, 8, 9, 23, 13, 10, 5,
+ 30, 46, 1, 1, 14, 22, 12, 20, 42, 14,
+ 2, 78, 24, 23, 59, 85, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 33 */
+
+ 74, 10, 33, 74, 10, 33, 11, 14, 46, 24,
+ 3, 13, 13, 60, 118, 28, 10, 11, 10, 22,
+ 2, 9, 0, 21, 47, 4, 37, 111, 113, 117,
+ 58, 13, 7, 10, 22, 2, 35, 0, 22, 8,
+ 1, 15, 23, 0, 31, 33, 59, 10, 7, 17,
+ 8, 21, 15, 41, 0, 21, 17, 35, 4, 4,
+ 44, 0, 0, 0, 1, 61, 67, 12, 7, 9,
+ 44, 3, 41, 34, 20, 6, 72, 76, 22, 28,
+ 0, 28, 5, 1, 22, 33, 29, 31, 39, 48,
+ 3, 20, 18, 9, 31, 17, 0, 4, 13, 11,
+ 27, 34, 9, 8, 11, 25, 1, 13, 0, 8,
+ 2, 2, 16, 12, 13, 0, 124, 124, 80, 62,
+ 12, 7, 2, 16, 0, 22, 16, 2, 17, 30,
+ 9, 45, 39, 54, 18, 26, 10, 12, 18, 26,
+ 62, 36, 18, 2, 4, 4, 26, 32, 57, 2,
+ 10, 23, 8, 2, 26, 20, 3, 38, 24, 26,
+ 27, 12, 9, 5, 22, 51, 20, 30, 24, 12,
+ 1, 22, 12, 26, 20, 13, 2, 18, 105, 124,
+ 73, 1, 11, 15, 25, 15, 19, 41, 23, 43,
+ 19, 41, 7, 124, 55, 40, 46, 34, 26, 24,
+ 22, 14, 14, 10, 13, 17, 15, 23, 29, 73,
+ 23, 25, 59, 0, 9, 13, 19, 33, 39, 37,
+ 35, 47, 33, 51, 63, 47, 63, 1, 46, 34,
+ 20, 16, 18, 4, 3, 5, 14, 12, 62, 42,
+ 34, 22, 34, 16, 16, 8, 1, 20, 80, 64,
+ 54, 48, 48, 18, 6, 0, 7, 13, 96, 58,
+ 20, 2, 26, 2, 19, 17, 10, 90, 70, 40,
+ 22, 40, 10, 2, 7, 11, 124, 37, 25, 9,
+ 17, 17, 11, 3, 4, 5, 0, 6, 20, 21,
+ 15, 29, 23, 14, 25, 23, 19, 5, 1, 11,
+ 21, 11, 21, 11, 15, 39, 18, 18, 38, 8,
+ 3, 18, 12, 4, 10, 8, 2, 11, 0, 5,
+ 13, 7, 20, 49, 13, 32, 17, 8, 3, 4,
+ 22, 3, 7, 8, 14, 31, 17, 47, 60, 64,
+ 70, 44, 32, 40, 34, 30, 26, 14, 18, 14,
+ 7, 7, 23, 15, 5, 17, 41, 17, 17, 21,
+ 21, 35, 23, 39, 41, 31, 47, 40, 38, 36,
+ 30, 4, 14, 10, 17, 5, 17, 23, 51, 39,
+ 51, 63, 17, 15, 69, 7, 11, 25, 49, 35,
+ 43, 51, 31, 53, 47, 61, 67, 71, 83, 39,
+ 57, 67, 21, 9, 14, 5, 38, 56, 3, 2,
+ 22, 30, 10, 28, 52, 8, 0, 76, 20, 27,
+ 65, 91, 125, 125, 125, 125, 6, 78, 60, 52,
+ 32, 50, 26, 14, 10, 7, 21, 11, 12, 3,
+ 32, 48, 0, 0, 16, 24, 12, 22, 44, 14,
+ 2, 76, 20, 27, 65, 91, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 34 */
+
+ 72, 10, 33, 72, 10, 33, 7, 16, 46, 24,
+ 3, 15, 17, 58, 118, 28, 14, 11, 10, 24,
+ 2, 11, 0, 23, 51, 2, 43, 119, 119, 121,
+ 64, 11, 7, 10, 24, 2, 35, 2, 22, 6,
+ 1, 15, 21, 0, 31, 33, 59, 10, 7, 17,
+ 8, 21, 15, 41, 0, 21, 17, 35, 4, 4,
+ 44, 0, 0, 0, 0, 61, 67, 12, 9, 9,
+ 42, 3, 41, 38, 22, 8, 76, 80, 24, 30,
+ 4, 30, 3, 0, 26, 33, 29, 31, 39, 48,
+ 3, 22, 22, 9, 29, 15, 4, 4, 13, 11,
+ 29, 34, 9, 8, 9, 23, 0, 11, 2, 8,
+ 4, 4, 18, 14, 11, 2, 124, 124, 86, 68,
+ 12, 7, 2, 16, 0, 22, 16, 2, 17, 34,
+ 9, 49, 43, 60, 18, 26, 10, 12, 18, 26,
+ 66, 38, 18, 2, 4, 4, 28, 34, 59, 2,
+ 10, 23, 6, 0, 24, 20, 5, 38, 24, 26,
+ 29, 12, 11, 7, 20, 51, 20, 28, 22, 8,
+ 7, 20, 8, 22, 16, 17, 1, 14, 117, 124,
+ 81, 5, 15, 19, 31, 19, 25, 49, 31, 49,
+ 23, 47, 11, 124, 59, 36, 42, 30, 22, 20,
+ 18, 10, 10, 6, 17, 21, 19, 27, 33, 77,
+ 23, 25, 61, 1, 11, 15, 21, 35, 41, 39,
+ 39, 49, 33, 53, 65, 49, 61, 0, 48, 34,
+ 20, 16, 20, 6, 1, 3, 18, 14, 62, 42,
+ 34, 24, 36, 18, 20, 12, 2, 22, 82, 66,
+ 56, 50, 50, 18, 6, 0, 5, 13, 98, 58,
+ 18, 2, 26, 2, 19, 17, 10, 90, 68, 38,
+ 20, 40, 10, 2, 7, 11, 124, 35, 23, 7,
+ 13, 13, 9, 0, 8, 3, 4, 10, 24, 19,
+ 13, 29, 21, 18, 25, 23, 19, 3, 1, 11,
+ 21, 11, 23, 11, 15, 39, 18, 18, 40, 8,
+ 3, 18, 12, 4, 10, 8, 2, 13, 0, 5,
+ 13, 7, 22, 51, 15, 32, 19, 6, 5, 2,
+ 22, 5, 9, 8, 14, 33, 17, 49, 58, 62,
+ 68, 40, 28, 36, 30, 26, 22, 8, 12, 8,
+ 13, 13, 27, 23, 13, 25, 51, 23, 23, 27,
+ 25, 41, 27, 43, 43, 33, 49, 36, 34, 30,
+ 26, 1, 8, 4, 23, 9, 23, 27, 57, 43,
+ 55, 67, 19, 17, 73, 9, 13, 29, 53, 39,
+ 47, 55, 33, 57, 49, 63, 69, 73, 85, 43,
+ 59, 69, 21, 9, 16, 5, 40, 58, 3, 4,
+ 24, 32, 10, 28, 54, 8, 0, 74, 16, 33,
+ 71, 97, 125, 125, 125, 125, 6, 78, 60, 52,
+ 32, 52, 26, 14, 10, 7, 21, 11, 12, 3,
+ 34, 50, 0, 0, 16, 24, 12, 22, 46, 14,
+ 2, 74, 16, 33, 71, 97, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 35 */
+
+ 70, 10, 33, 70, 10, 33, 3, 20, 48, 24,
+ 5, 19, 21, 56, 118, 28, 18, 11, 12, 26,
+ 2, 11, 0, 23, 53, 0, 47, 125, 123, 123,
+ 70, 9, 7, 12, 26, 2, 37, 4, 22, 6,
+ 1, 15, 21, 0, 31, 33, 59, 10, 7, 15,
+ 8, 21, 13, 41, 0, 21, 17, 35, 4, 4,
+ 44, 0, 0, 0, 0, 61, 67, 14, 11, 9,
+ 40, 5, 41, 44, 24, 10, 78, 84, 28, 34,
+ 8, 34, 1, 2, 30, 33, 29, 29, 39, 48,
+ 3, 24, 26, 9, 27, 15, 8, 4, 15, 13,
+ 29, 34, 9, 10, 9, 21, 2, 11, 4, 10,
+ 6, 8, 22, 14, 9, 4, 124, 124, 92, 74,
+ 12, 7, 2, 18, 0, 22, 18, 4, 15, 38,
+ 9, 53, 47, 66, 18, 26, 10, 12, 18, 28,
+ 68, 40, 18, 2, 4, 4, 28, 36, 61, 2,
+ 10, 25, 6, 1, 24, 20, 5, 38, 24, 26,
+ 31, 12, 13, 9, 20, 51, 20, 28, 22, 4,
+ 13, 18, 4, 18, 10, 21, 7, 10, 125, 124,
+ 87, 9, 19, 23, 37, 25, 31, 57, 39, 55,
+ 27, 55, 15, 124, 63, 32, 38, 26, 16, 14,
+ 14, 6, 6, 2, 21, 25, 23, 31, 35, 81,
+ 23, 25, 61, 3, 13, 17, 23, 37, 43, 41,
+ 43, 51, 35, 55, 67, 49, 61, 4, 50, 36,
+ 20, 16, 22, 6, 1, 3, 22, 16, 62, 42,
+ 36, 26, 38, 20, 22, 14, 6, 24, 82, 66,
+ 56, 50, 52, 20, 6, 0, 5, 13, 98, 58,
+ 16, 2, 26, 2, 19, 15, 10, 88, 68, 36,
+ 18, 40, 10, 2, 7, 11, 124, 33, 19, 5,
+ 11, 11, 7, 4, 12, 1, 6, 14, 28, 19,
+ 11, 27, 19, 22, 23, 23, 19, 3, 0, 11,
+ 21, 11, 23, 11, 15, 41, 20, 20, 42, 8,
+ 3, 20, 12, 4, 10, 8, 2, 13, 0, 5,
+ 13, 7, 22, 53, 17, 32, 21, 4, 7, 2,
+ 22, 7, 11, 8, 14, 35, 17, 51, 56, 60,
+ 66, 38, 24, 32, 26, 22, 16, 4, 6, 2,
+ 19, 17, 31, 31, 21, 33, 61, 31, 29, 31,
+ 29, 45, 31, 47, 47, 35, 51, 32, 28, 26,
+ 20, 7, 4, 0, 29, 15, 27, 33, 63, 47,
+ 59, 69, 21, 19, 75, 13, 17, 33, 57, 41,
+ 51, 59, 35, 59, 51, 65, 71, 75, 87, 45,
+ 61, 71, 21, 7, 16, 3, 42, 60, 3, 4,
+ 24, 32, 10, 30, 56, 8, 0, 72, 12, 37,
+ 77, 103, 125, 125, 125, 125, 6, 78, 60, 52,
+ 32, 52, 28, 14, 10, 5, 21, 11, 14, 1,
+ 36, 52, 0, 2, 16, 24, 12, 22, 46, 14,
+ 2, 72, 12, 37, 77, 103, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 36 */
+
+ 66, 10, 33, 66, 10, 33, 1, 22, 48, 24,
+ 5, 21, 25, 54, 118, 26, 22, 11, 12, 28,
+ 0, 13, 0, 25, 57, 1, 53, 125, 125, 125,
+ 76, 7, 7, 12, 28, 0, 37, 6, 22, 4,
+ 1, 13, 19, 0, 31, 33, 59, 10, 7, 15,
+ 8, 21, 13, 39, 0, 21, 17, 35, 6, 4,
+ 44, 0, 0, 0, 2, 63, 67, 14, 13, 9,
+ 38, 5, 41, 48, 26, 12, 82, 88, 30, 36,
+ 10, 36, 2, 4, 34, 33, 27, 29, 39, 48,
+ 3, 26, 30, 9, 27, 13, 12, 4, 15, 13,
+ 31, 34, 9, 10, 7, 19, 4, 9, 6, 10,
+ 6, 10, 24, 16, 7, 6, 124, 124, 98, 80,
+ 12, 5, 2, 18, 0, 22, 18, 4, 15, 42,
+ 9, 57, 51, 70, 18, 28, 10, 12, 18, 28,
+ 72, 42, 20, 0, 4, 4, 30, 36, 63, 2,
+ 10, 25, 4, 3, 22, 18, 7, 38, 24, 26,
+ 33, 12, 15, 11, 18, 51, 18, 26, 20, 0,
+ 21, 16, 0, 14, 6, 25, 11, 6, 125, 124,
+ 95, 13, 23, 29, 43, 29, 39, 67, 47, 61,
+ 31, 61, 19, 124, 65, 28, 34, 20, 12, 10,
+ 10, 2, 2, 1, 27, 29, 27, 35, 39, 85,
+ 23, 25, 63, 5, 15, 19, 27, 39, 47, 43,
+ 45, 53, 35, 57, 67, 51, 59, 6, 52, 36,
+ 20, 18, 22, 8, 0, 1, 26, 20, 62, 44,
+ 36, 26, 40, 22, 26, 18, 8, 24, 84, 68,
+ 58, 52, 54, 20, 6, 2, 3, 13, 100, 58,
+ 16, 0, 26, 2, 19, 15, 10, 88, 66, 34,
+ 16, 40, 10, 4, 7, 11, 124, 29, 17, 3,
+ 7, 7, 5, 6, 14, 0, 10, 16, 32, 17,
+ 9, 27, 17, 24, 23, 23, 19, 1, 0, 11,
+ 23, 11, 25, 11, 15, 41, 20, 20, 44, 8,
+ 3, 20, 12, 4, 10, 8, 2, 15, 0, 5,
+ 13, 7, 24, 55, 19, 32, 23, 4, 9, 0,
+ 22, 9, 13, 8, 14, 37, 17, 53, 54, 58,
+ 64, 34, 20, 28, 22, 16, 12, 1, 0, 3,
+ 25, 23, 35, 41, 29, 41, 71, 37, 37, 37,
+ 35, 51, 35, 51, 49, 37, 51, 26, 24, 20,
+ 16, 13, 1, 5, 35, 19, 33, 37, 69, 51,
+ 63, 73, 23, 19, 79, 15, 19, 35, 61, 45,
+ 55, 61, 37, 63, 53, 67, 75, 77, 87, 49,
+ 65, 75, 19, 7, 18, 3, 44, 62, 3, 6,
+ 26, 34, 10, 30, 58, 8, 0, 70, 8, 43,
+ 83, 111, 125, 125, 125, 125, 8, 78, 60, 52,
+ 32, 54, 28, 14, 10, 5, 21, 11, 14, 1,
+ 36, 54, 0, 2, 18, 26, 12, 22, 48, 14,
+ 2, 70, 8, 43, 83, 111, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 37 */
+
+ 64, 10, 33, 64, 10, 33, 2, 26, 48, 24,
+ 7, 23, 29, 52, 118, 26, 26, 11, 12, 30,
+ 0, 13, 0, 27, 61, 3, 57, 125, 125, 125,
+ 82, 5, 5, 12, 30, 0, 37, 8, 22, 4,
+ 1, 13, 17, 0, 31, 33, 59, 10, 7, 15,
+ 8, 21, 11, 39, 0, 21, 17, 35, 6, 4,
+ 44, 0, 0, 0, 2, 63, 67, 16, 15, 9,
+ 36, 7, 41, 54, 28, 14, 86, 92, 34, 40,
+ 14, 40, 4, 6, 40, 33, 27, 27, 37, 48,
+ 3, 28, 34, 9, 25, 13, 16, 4, 17, 15,
+ 31, 34, 9, 12, 7, 17, 6, 9, 8, 12,
+ 8, 14, 28, 18, 3, 8, 124, 124, 104, 86,
+ 14, 5, 4, 20, 0, 22, 20, 4, 13, 46,
+ 9, 61, 55, 76, 18, 28, 10, 12, 18, 30,
+ 74, 44, 20, 0, 4, 4, 30, 38, 65, 4,
+ 10, 27, 4, 5, 22, 18, 7, 38, 24, 26,
+ 35, 14, 17, 11, 18, 51, 18, 24, 18, 3,
+ 27, 14, 1, 10, 2, 29, 17, 2, 125, 124,
+ 101, 17, 27, 33, 49, 35, 45, 75, 55, 65,
+ 35, 69, 23, 124, 69, 24, 30, 16, 8, 6,
+ 6, 1, 1, 5, 31, 33, 31, 37, 43, 89,
+ 23, 25, 63, 7, 17, 21, 29, 41, 49, 45,
+ 49, 55, 37, 59, 69, 53, 57, 10, 54, 38,
+ 20, 18, 24, 8, 0, 0, 30, 22, 62, 44,
+ 38, 28, 42, 24, 28, 20, 12, 26, 86, 70,
+ 60, 52, 56, 22, 8, 2, 3, 13, 100, 58,
+ 14, 0, 26, 2, 19, 13, 10, 86, 64, 32,
+ 14, 40, 10, 4, 7, 9, 124, 27, 13, 0,
+ 3, 5, 3, 10, 18, 2, 14, 20, 36, 15,
+ 7, 25, 15, 28, 23, 23, 17, 1, 2, 11,
+ 23, 11, 25, 11, 15, 43, 22, 20, 46, 8,
+ 3, 20, 12, 4, 10, 8, 2, 15, 0, 5,
+ 13, 7, 24, 57, 21, 32, 25, 2, 9, 1,
+ 22, 9, 15, 8, 14, 39, 17, 55, 52, 56,
+ 62, 30, 16, 24, 18, 12, 6, 7, 5, 9,
+ 31, 27, 39, 49, 37, 49, 81, 43, 43, 41,
+ 39, 55, 39, 53, 53, 37, 53, 22, 18, 16,
+ 12, 19, 7, 9, 41, 23, 37, 41, 75, 55,
+ 67, 75, 23, 21, 83, 17, 23, 39, 65, 49,
+ 59, 65, 39, 65, 55, 67, 77, 79, 89, 51,
+ 67, 77, 19, 7, 20, 1, 46, 64, 1, 8,
+ 26, 34, 10, 32, 60, 8, 0, 68, 4, 49,
+ 89, 117, 125, 125, 125, 125, 8, 78, 60, 52,
+ 32, 54, 28, 16, 12, 3, 21, 11, 16, 1,
+ 38, 56, 0, 2, 18, 26, 12, 24, 48, 14,
+ 2, 68, 4, 49, 89, 117, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 38 */
+
+ 62, 10, 35, 62, 10, 35, 6, 28, 50, 24,
+ 7, 27, 33, 50, 118, 26, 28, 11, 14, 30,
+ 0, 15, 2, 27, 63, 7, 63, 125, 125, 125,
+ 86, 3, 5, 14, 30, 0, 39, 8, 22, 2,
+ 1, 13, 17, 0, 31, 31, 57, 10, 5, 13,
+ 8, 19, 11, 39, 0, 21, 17, 35, 6, 4,
+ 44, 0, 0, 0, 4, 63, 67, 16, 15, 11,
+ 36, 7, 41, 58, 30, 16, 88, 98, 36, 42,
+ 18, 42, 6, 8, 44, 33, 27, 27, 37, 48,
+ 3, 30, 40, 9, 23, 11, 20, 2, 17, 15,
+ 33, 36, 9, 12, 5, 17, 6, 7, 10, 12,
+ 10, 16, 30, 18, 1, 10, 124, 124, 110, 92,
+ 14, 5, 4, 20, 0, 22, 20, 6, 13, 50,
+ 7, 65, 59, 82, 18, 28, 10, 12, 20, 30,
+ 78, 46, 20, 0, 4, 4, 32, 40, 67, 4,
+ 10, 27, 2, 7, 20, 18, 9, 38, 24, 26,
+ 39, 14, 19, 13, 16, 51, 18, 24, 18, 7,
+ 33, 12, 5, 4, 3, 33, 21, 1, 125, 124,
+ 109, 21, 29, 37, 57, 39, 51, 83, 65, 71,
+ 39, 75, 27, 124, 73, 20, 26, 12, 2, 0,
+ 2, 7, 5, 9, 35, 39, 35, 41, 45, 91,
+ 23, 25, 65, 9, 19, 23, 31, 43, 51, 47,
+ 53, 57, 37, 61, 71, 53, 57, 12, 56, 38,
+ 20, 18, 26, 10, 2, 0, 34, 24, 64, 44,
+ 38, 30, 44, 26, 32, 24, 16, 28, 86, 70,
+ 60, 54, 58, 22, 8, 2, 1, 13, 102, 58,
+ 12, 0, 26, 2, 19, 13, 8, 86, 64, 30,
+ 12, 38, 10, 4, 7, 9, 124, 25, 11, 2,
+ 1, 1, 1, 14, 22, 4, 16, 24, 40, 15,
+ 5, 25, 13, 32, 21, 23, 17, 0, 2, 11,
+ 23, 11, 27, 11, 15, 43, 22, 22, 48, 8,
+ 3, 22, 14, 4, 12, 10, 2, 17, 0, 5,
+ 13, 7, 26, 59, 23, 32, 27, 0, 11, 1,
+ 22, 11, 15, 8, 12, 43, 19, 57, 48, 54,
+ 60, 28, 12, 20, 14, 8, 2, 11, 11, 13,
+ 37, 33, 43, 57, 45, 57, 89, 51, 49, 47,
+ 43, 61, 43, 57, 55, 39, 55, 18, 14, 10,
+ 6, 25, 11, 15, 47, 29, 43, 47, 81, 59,
+ 69, 79, 25, 23, 85, 21, 25, 43, 69, 51,
+ 63, 69, 41, 69, 57, 69, 79, 81, 91, 55,
+ 69, 79, 19, 5, 20, 1, 48, 66, 1, 8,
+ 28, 36, 10, 32, 62, 8, 0, 66, 0, 53,
+ 95, 123, 125, 125, 125, 125, 8, 78, 60, 52,
+ 32, 56, 30, 16, 12, 3, 19, 9, 16, 0,
+ 40, 58, 2, 4, 18, 26, 12, 24, 50, 14,
+ 2, 66, 0, 53, 95, 123, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 39 */
+
+ 60, 10, 35, 60, 10, 35, 10, 32, 50, 24,
+ 9, 29, 37, 48, 118, 26, 32, 11, 14, 32,
+ 1, 15, 2, 29, 67, 9, 67, 125, 125, 125,
+ 92, 1, 5, 14, 32, 1, 39, 10, 22, 2,
+ 1, 11, 15, 0, 31, 31, 57, 10, 5, 13,
+ 8, 19, 9, 37, 0, 21, 17, 35, 8, 4,
+ 44, 0, 0, 0, 4, 63, 67, 18, 17, 11,
+ 34, 9, 41, 64, 32, 18, 92, 102, 40, 46,
+ 20, 46, 10, 10, 48, 33, 25, 25, 37, 48,
+ 3, 32, 44, 9, 21, 11, 24, 2, 19, 17,
+ 33, 36, 9, 14, 5, 15, 8, 7, 12, 14,
+ 12, 20, 34, 20, 0, 12, 124, 124, 116, 98,
+ 14, 3, 4, 22, 0, 22, 22, 6, 11, 54,
+ 7, 69, 63, 88, 18, 30, 10, 12, 20, 32,
+ 80, 48, 22, 1, 4, 4, 32, 40, 69, 4,
+ 10, 29, 2, 9, 20, 16, 9, 38, 24, 26,
+ 41, 14, 21, 15, 16, 51, 18, 22, 16, 11,
+ 39, 10, 9, 0, 7, 37, 27, 5, 125, 124,
+ 115, 25, 33, 43, 63, 45, 57, 93, 73, 77,
+ 43, 83, 31, 124, 75, 16, 22, 8, 1, 3,
+ 1, 11, 9, 13, 41, 43, 39, 45, 49, 95,
+ 23, 25, 65, 11, 21, 25, 35, 45, 53, 49,
+ 55, 59, 39, 63, 71, 55, 55, 16, 58, 40,
+ 20, 20, 26, 10, 2, 2, 38, 28, 64, 46,
+ 40, 30, 46, 28, 34, 26, 20, 28, 88, 72,
+ 62, 54, 60, 24, 8, 4, 1, 13, 102, 58,
+ 12, 1, 26, 2, 19, 11, 8, 84, 62, 28,
+ 10, 38, 10, 6, 7, 9, 124, 21, 7, 4,
+ 2, 0, 0, 16, 24, 6, 20, 26, 44, 13,
+ 3, 23, 11, 36, 21, 23, 17, 0, 4, 11,
+ 25, 11, 27, 11, 15, 45, 24, 22, 50, 8,
+ 3, 22, 14, 4, 12, 10, 2, 17, 0, 5,
+ 13, 7, 26, 61, 25, 32, 29, 0, 13, 3,
+ 22, 13, 17, 8, 12, 45, 19, 59, 46, 52,
+ 58, 24, 8, 16, 10, 2, 3, 17, 17, 19,
+ 43, 37, 47, 67, 53, 65, 99, 57, 55, 51,
+ 47, 65, 47, 61, 59, 41, 55, 12, 8, 6,
+ 2, 31, 17, 19, 53, 33, 47, 51, 87, 63,
+ 73, 81, 27, 23, 89, 23, 29, 45, 73, 55,
+ 67, 71, 43, 71, 59, 71, 83, 83, 91, 57,
+ 73, 83, 17, 5, 22, 0, 50, 68, 1, 10,
+ 28, 36, 10, 34, 64, 8, 0, 64, 3, 59,
+ 101, 125, 125, 125, 125, 125, 10, 78, 60, 52,
+ 32, 56, 30, 16, 12, 1, 19, 9, 18, 0,
+ 42, 60, 2, 4, 20, 28, 12, 24, 50, 14,
+ 2, 64, 3, 59, 101, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 40 */
+
+ 56, 8, 35, 56, 8, 35, 12, 34, 50, 24,
+ 9, 33, 43, 46, 118, 24, 36, 13, 14, 34,
+ 1, 17, 2, 31, 71, 11, 73, 125, 125, 125,
+ 98, 0, 5, 14, 34, 1, 41, 12, 22, 0,
+ 1, 11, 15, 1, 33, 31, 57, 10, 5, 13,
+ 8, 19, 9, 37, 0, 21, 17, 35, 8, 4,
+ 44, 0, 0, 0, 6, 65, 67, 18, 19, 11,
+ 32, 9, 43, 68, 34, 20, 94, 106, 42, 48,
+ 24, 48, 12, 12, 52, 33, 25, 25, 37, 48,
+ 3, 34, 48, 9, 21, 9, 28, 2, 19, 17,
+ 35, 36, 9, 14, 3, 13, 10, 5, 12, 14,
+ 12, 22, 36, 20, 2, 14, 124, 124, 122, 102,
+ 14, 3, 4, 22, 1, 22, 22, 6, 11, 58,
+ 7, 73, 67, 92, 18, 30, 10, 12, 20, 32,
+ 84, 48, 22, 1, 2, 4, 34, 42, 73, 4,
+ 10, 29, 0, 13, 18, 16, 11, 38, 24, 24,
+ 43, 14, 23, 17, 14, 51, 16, 20, 14, 15,
+ 47, 6, 13, 3, 13, 43, 31, 9, 125, 124,
+ 123, 29, 37, 47, 69, 49, 65, 101, 81, 83,
+ 47, 89, 35, 124, 79, 12, 16, 2, 7, 9,
+ 7, 15, 15, 17, 45, 47, 43, 49, 53, 99,
+ 23, 27, 67, 13, 25, 27, 37, 47, 57, 53,
+ 59, 61, 39, 65, 73, 57, 55, 18, 60, 40,
+ 20, 20, 28, 12, 4, 2, 42, 30, 64, 46,
+ 40, 32, 48, 30, 38, 30, 22, 30, 88, 72,
+ 62, 56, 62, 24, 8, 4, 0, 15, 104, 58,
+ 10, 1, 26, 2, 19, 11, 8, 84, 60, 26,
+ 8, 38, 10, 6, 7, 9, 124, 19, 5, 6,
+ 4, 4, 2, 20, 28, 8, 22, 30, 48, 13,
+ 3, 23, 9, 38, 21, 23, 17, 2, 4, 11,
+ 25, 11, 29, 11, 15, 45, 24, 22, 52, 8,
+ 5, 22, 14, 2, 12, 10, 2, 19, 0, 5,
+ 13, 7, 28, 63, 27, 32, 33, 1, 15, 5,
+ 20, 15, 19, 8, 12, 47, 19, 63, 44, 48,
+ 56, 20, 4, 12, 6, 1, 7, 23, 23, 25,
+ 49, 43, 51, 75, 61, 75, 109, 65, 63, 57,
+ 53, 71, 51, 65, 61, 43, 57, 8, 4, 0,
+ 3, 37, 23, 25, 59, 39, 53, 57, 93, 67,
+ 77, 85, 29, 25, 93, 27, 31, 49, 77, 59,
+ 71, 75, 47, 75, 63, 73, 85, 85, 93, 61,
+ 75, 85, 17, 5, 22, 0, 52, 70, 1, 10,
+ 30, 38, 10, 34, 64, 8, 1, 62, 7, 65,
+ 107, 125, 125, 125, 125, 125, 10, 78, 60, 52,
+ 32, 58, 30, 16, 12, 1, 19, 9, 18, 0,
+ 42, 60, 2, 4, 20, 28, 12, 24, 52, 14,
+ 0, 62, 7, 65, 107, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 41 */
+
+ 54, 8, 35, 54, 8, 35, 16, 36, 52, 24,
+ 9, 35, 47, 44, 120, 24, 40, 13, 16, 36,
+ 1, 19, 2, 31, 73, 13, 77, 125, 125, 125,
+ 104, 2, 3, 16, 36, 1, 41, 14, 22, 0,
+ 0, 11, 13, 1, 33, 31, 57, 10, 5, 11,
+ 8, 19, 9, 37, 0, 21, 17, 33, 8, 4,
+ 44, 0, 0, 0, 8, 65, 67, 18, 21, 11,
+ 30, 9, 43, 72, 38, 22, 98, 110, 46, 50,
+ 28, 52, 14, 16, 58, 33, 25, 25, 35, 48,
+ 3, 36, 52, 7, 19, 7, 32, 2, 19, 17,
+ 35, 36, 9, 14, 1, 11, 12, 3, 14, 16,
+ 14, 24, 40, 22, 6, 16, 124, 124, 124, 108,
+ 16, 3, 6, 24, 1, 22, 22, 8, 9, 64,
+ 7, 77, 69, 98, 18, 30, 10, 12, 20, 34,
+ 88, 50, 22, 1, 2, 4, 36, 44, 75, 6,
+ 10, 29, 1, 15, 16, 16, 13, 38, 24, 24,
+ 45, 16, 25, 17, 12, 51, 16, 20, 14, 19,
+ 53, 4, 15, 7, 17, 47, 35, 13, 125, 124,
+ 125, 33, 41, 51, 75, 53, 71, 109, 89, 87,
+ 51, 95, 39, 124, 83, 8, 12, 1, 11, 13,
+ 11, 19, 19, 21, 49, 51, 45, 51, 55, 103,
+ 23, 27, 67, 13, 27, 29, 39, 49, 59, 55,
+ 63, 63, 39, 67, 75, 57, 53, 20, 62, 40,
+ 20, 20, 30, 14, 6, 4, 48, 32, 64, 46,
+ 40, 34, 50, 32, 42, 34, 26, 32, 90, 74,
+ 64, 58, 64, 24, 10, 4, 2, 15, 106, 58,
+ 8, 1, 28, 4, 19, 9, 8, 84, 60, 24,
+ 6, 38, 10, 6, 7, 7, 124, 17, 3, 10,
+ 8, 8, 4, 24, 32, 10, 26, 34, 52, 11,
+ 1, 21, 7, 42, 19, 21, 15, 4, 6, 11,
+ 25, 11, 31, 11, 15, 45, 24, 24, 54, 10,
+ 5, 24, 14, 2, 12, 10, 2, 21, 0, 3,
+ 11, 7, 30, 63, 29, 32, 35, 3, 15, 5,
+ 20, 15, 21, 8, 12, 49, 19, 65, 42, 46,
+ 56, 18, 0, 8, 2, 5, 11, 27, 29, 31,
+ 55, 49, 55, 83, 69, 83, 119, 71, 69, 63,
+ 57, 75, 53, 67, 63, 43, 59, 4, 0, 3,
+ 7, 41, 27, 29, 65, 43, 57, 61, 97, 71,
+ 81, 89, 29, 27, 95, 29, 33, 53, 81, 61,
+ 73, 79, 49, 79, 65, 73, 87, 85, 95, 65,
+ 77, 87, 17, 3, 24, 0, 54, 74, 0, 12,
+ 32, 40, 10, 36, 66, 8, 1, 60, 11, 69,
+ 113, 125, 125, 125, 125, 125, 10, 78, 60, 54,
+ 34, 60, 32, 18, 14, 1, 19, 9, 18, 2,
+ 44, 62, 2, 6, 20, 28, 12, 26, 54, 14,
+ 0, 60, 11, 69, 113, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 42 */
+
+ 52, 8, 35, 52, 8, 35, 20, 40, 52, 24,
+ 11, 37, 51, 42, 120, 24, 44, 13, 16, 38,
+ 3, 19, 2, 33, 77, 15, 83, 125, 125, 125,
+ 110, 4, 3, 16, 38, 3, 41, 16, 22, 1,
+ 0, 9, 11, 1, 33, 31, 57, 10, 5, 11,
+ 8, 19, 7, 35, 0, 21, 17, 33, 10, 4,
+ 44, 0, 0, 0, 8, 65, 67, 20, 23, 11,
+ 28, 11, 43, 78, 40, 24, 102, 114, 48, 54,
+ 30, 54, 18, 18, 62, 33, 23, 23, 35, 48,
+ 3, 38, 56, 7, 17, 7, 36, 2, 21, 19,
+ 37, 36, 9, 16, 1, 9, 14, 3, 16, 16,
+ 16, 28, 42, 24, 8, 18, 124, 124, 124, 114,
+ 16, 1, 6, 24, 1, 22, 24, 8, 9, 68,
+ 7, 81, 73, 104, 18, 32, 10, 12, 20, 34,
+ 90, 52, 24, 3, 2, 4, 36, 44, 77, 6,
+ 10, 31, 1, 17, 16, 14, 13, 38, 24, 24,
+ 47, 16, 27, 19, 12, 51, 16, 18, 12, 23,
+ 59, 2, 19, 11, 21, 51, 41, 17, 125, 124,
+ 125, 37, 45, 57, 81, 59, 77, 119, 97, 93,
+ 55, 103, 43, 124, 85, 4, 8, 5, 15, 17,
+ 15, 23, 23, 25, 55, 55, 49, 55, 59, 107,
+ 23, 27, 69, 15, 29, 31, 43, 51, 61, 57,
+ 65, 65, 41, 69, 75, 59, 51, 24, 64, 42,
+ 20, 22, 30, 14, 6, 6, 52, 36, 64, 48,
+ 42, 34, 52, 34, 44, 36, 30, 32, 92, 76,
+ 66, 58, 66, 26, 10, 6, 2, 15, 106, 58,
+ 8, 3, 28, 4, 19, 9, 8, 82, 58, 22,
+ 4, 38, 10, 8, 7, 7, 124, 13, 0, 12,
+ 12, 10, 6, 26, 34, 12, 30, 36, 56, 9,
+ 0, 21, 5, 46, 19, 21, 15, 4, 6, 11,
+ 27, 11, 31, 11, 15, 47, 26, 24, 56, 10,
+ 5, 24, 14, 2, 12, 10, 2, 21, 0, 3,
+ 11, 7, 30, 65, 31, 32, 37, 3, 17, 7,
+ 20, 17, 23, 8, 12, 51, 19, 67, 40, 44,
+ 54, 14, 3, 4, 1, 11, 17, 33, 35, 37,
+ 61, 53, 59, 93, 77, 91, 125, 77, 75, 67,
+ 61, 81, 57, 71, 67, 45, 59, 1, 5, 9,
+ 11, 47, 33, 35, 71, 47, 63, 65, 103, 75,
+ 85, 91, 31, 27, 99, 31, 37, 55, 85, 65,
+ 77, 81, 51, 81, 67, 75, 91, 87, 95, 67,
+ 81, 91, 15, 3, 26, 2, 56, 76, 0, 14,
+ 32, 40, 10, 36, 68, 8, 1, 58, 15, 75,
+ 119, 125, 125, 125, 125, 125, 12, 78, 60, 54,
+ 34, 60, 32, 18, 14, 0, 19, 9, 20, 2,
+ 46, 64, 2, 6, 22, 30, 12, 26, 54, 14,
+ 0, 58, 15, 75, 119, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 43 */
+
+ 50, 8, 37, 50, 8, 37, 24, 42, 54, 24,
+ 11, 41, 55, 40, 120, 24, 46, 13, 18, 38,
+ 3, 21, 4, 33, 79, 19, 87, 125, 125, 125,
+ 114, 6, 3, 18, 38, 3, 43, 16, 22, 1,
+ 0, 9, 11, 1, 33, 29, 55, 10, 3, 9,
+ 8, 17, 7, 35, 0, 21, 17, 33, 10, 4,
+ 44, 0, 0, 0, 10, 65, 67, 20, 23, 13,
+ 28, 11, 43, 82, 42, 26, 104, 120, 52, 56,
+ 34, 58, 20, 20, 66, 33, 23, 23, 35, 48,
+ 3, 40, 62, 7, 15, 5, 40, 0, 21, 19,
+ 37, 38, 9, 16, 0, 9, 14, 1, 18, 18,
+ 18, 30, 46, 24, 10, 20, 124, 124, 124, 120,
+ 16, 1, 6, 26, 1, 22, 24, 10, 7, 72,
+ 5, 85, 77, 110, 18, 32, 10, 12, 22, 36,
+ 94, 54, 24, 3, 2, 4, 38, 46, 79, 6,
+ 10, 31, 3, 19, 14, 14, 15, 38, 24, 24,
+ 51, 16, 29, 21, 10, 51, 16, 18, 12, 27,
+ 65, 0, 23, 17, 27, 55, 45, 21, 125, 124,
+ 125, 41, 47, 61, 89, 63, 83, 125, 107, 99,
+ 59, 109, 47, 124, 89, 0, 4, 9, 21, 23,
+ 19, 29, 27, 29, 59, 61, 53, 59, 61, 109,
+ 23, 27, 69, 17, 31, 33, 45, 53, 63, 59,
+ 69, 67, 41, 71, 77, 59, 51, 26, 66, 42,
+ 20, 22, 32, 16, 8, 6, 56, 38, 66, 48,
+ 42, 36, 54, 36, 48, 40, 34, 34, 92, 76,
+ 66, 60, 68, 26, 10, 6, 4, 15, 108, 58,
+ 6, 3, 28, 4, 19, 7, 6, 82, 58, 20,
+ 2, 36, 10, 8, 7, 7, 124, 11, 2, 14,
+ 14, 14, 8, 30, 38, 14, 32, 40, 60, 9,
+ 2, 19, 3, 50, 17, 21, 15, 6, 8, 11,
+ 27, 11, 33, 11, 15, 47, 26, 26, 58, 10,
+ 5, 26, 16, 2, 14, 12, 2, 23, 0, 3,
+ 11, 7, 32, 67, 33, 32, 39, 5, 19, 7,
+ 20, 19, 23, 8, 10, 55, 21, 69, 36, 42,
+ 52, 12, 7, 0, 5, 15, 21, 37, 41, 41,
+ 67, 59, 63, 101, 85, 99, 125, 85, 81, 73,
+ 65, 85, 61, 75, 69, 47, 61, 5, 9, 13,
+ 17, 53, 37, 39, 77, 53, 67, 71, 109, 79,
+ 87, 95, 33, 29, 101, 35, 39, 59, 89, 67,
+ 81, 85, 53, 85, 69, 77, 93, 89, 97, 71,
+ 83, 93, 15, 1, 26, 2, 58, 78, 0, 14,
+ 34, 42, 10, 38, 70, 8, 1, 56, 19, 79,
+ 125, 125, 125, 125, 125, 125, 12, 78, 60, 54,
+ 34, 62, 34, 18, 14, 0, 17, 7, 20, 4,
+ 48, 66, 4, 8, 22, 30, 12, 26, 56, 14,
+ 0, 56, 19, 79, 125, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 44 */
+
+ 46, 8, 37, 46, 8, 37, 26, 46, 54, 24,
+ 13, 43, 59, 38, 120, 22, 50, 13, 18, 40,
+ 3, 21, 4, 35, 83, 21, 93, 125, 125, 125,
+ 120, 8, 3, 18, 40, 3, 43, 18, 22, 3,
+ 0, 9, 9, 1, 33, 29, 55, 10, 3, 9,
+ 8, 17, 5, 35, 0, 21, 17, 33, 10, 4,
+ 44, 0, 0, 0, 10, 67, 67, 22, 25, 13,
+ 26, 13, 43, 88, 44, 28, 108, 124, 54, 60,
+ 38, 60, 22, 22, 70, 33, 23, 21, 35, 48,
+ 3, 42, 66, 7, 15, 5, 44, 0, 23, 21,
+ 39, 38, 9, 18, 0, 7, 16, 1, 20, 18,
+ 18, 34, 48, 26, 12, 22, 124, 124, 124, 124,
+ 16, 1, 6, 26, 1, 22, 26, 10, 7, 76,
+ 5, 89, 81, 114, 18, 32, 10, 12, 22, 36,
+ 96, 56, 24, 3, 2, 4, 38, 48, 81, 6,
+ 10, 33, 3, 21, 14, 14, 15, 38, 24, 24,
+ 53, 16, 31, 23, 10, 51, 14, 16, 10, 31,
+ 73, 1, 27, 21, 31, 59, 51, 25, 125, 124,
+ 125, 45, 51, 65, 95, 69, 91, 125, 115, 105,
+ 63, 117, 51, 124, 93, 3, 0, 15, 25, 27,
+ 23, 33, 31, 33, 63, 65, 57, 63, 65, 113,
+ 23, 27, 71, 19, 33, 35, 47, 55, 67, 61,
+ 73, 69, 43, 73, 79, 61, 49, 30, 68, 44,
+ 20, 22, 34, 16, 8, 8, 60, 40, 66, 48,
+ 44, 38, 56, 38, 50, 42, 36, 36, 94, 78,
+ 68, 60, 70, 28, 10, 6, 4, 15, 108, 58,
+ 4, 3, 28, 4, 19, 7, 6, 80, 56, 18,
+ 0, 36, 10, 8, 7, 7, 124, 9, 6, 16,
+ 18, 16, 10, 34, 42, 16, 36, 44, 64, 7,
+ 4, 19, 1, 52, 17, 21, 15, 6, 8, 11,
+ 27, 11, 33, 11, 15, 49, 28, 26, 60, 10,
+ 5, 26, 16, 2, 14, 12, 2, 23, 0, 3,
+ 11, 7, 32, 69, 35, 32, 41, 7, 21, 9,
+ 20, 21, 25, 8, 10, 57, 21, 71, 34, 40,
+ 50, 8, 11, 3, 9, 19, 27, 43, 47, 47,
+ 73, 63, 67, 109, 93, 107, 125, 91, 89, 77,
+ 71, 91, 65, 79, 73, 49, 63, 9, 15, 19,
+ 21, 59, 43, 45, 83, 57, 73, 75, 115, 83,
+ 91, 97, 35, 31, 105, 37, 43, 63, 93, 71,
+ 85, 89, 55, 87, 71, 79, 95, 91, 99, 73,
+ 85, 95, 15, 1, 28, 4, 60, 80, 0, 16,
+ 34, 42, 10, 38, 72, 8, 1, 54, 23, 85,
+ 125, 125, 125, 125, 125, 125, 12, 78, 60, 54,
+ 34, 62, 34, 18, 14, 2, 17, 7, 22, 4,
+ 48, 68, 4, 8, 22, 30, 12, 26, 56, 14,
+ 0, 54, 23, 85, 125, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 45 */
+
+ 44, 8, 37, 44, 8, 37, 30, 48, 54, 24,
+ 13, 45, 63, 36, 120, 22, 54, 13, 18, 42,
+ 5, 23, 4, 37, 87, 23, 97, 125, 125, 125,
+ 124, 10, 1, 18, 42, 5, 43, 20, 22, 3,
+ 0, 7, 7, 1, 33, 29, 55, 10, 3, 9,
+ 8, 17, 5, 33, 0, 21, 17, 33, 12, 4,
+ 44, 0, 0, 0, 12, 67, 67, 22, 27, 13,
+ 24, 13, 43, 92, 46, 30, 112, 124, 58, 62,
+ 40, 64, 26, 24, 76, 33, 21, 21, 33, 48,
+ 3, 44, 70, 7, 13, 3, 48, 0, 23, 21,
+ 39, 38, 9, 18, 2, 5, 18, 0, 22, 20,
+ 20, 36, 52, 28, 16, 24, 124, 124, 124, 124,
+ 18, 0, 8, 28, 1, 22, 26, 10, 5, 80,
+ 5, 93, 85, 120, 18, 34, 10, 12, 22, 38,
+ 100, 58, 26, 5, 2, 4, 40, 48, 83, 8,
+ 10, 33, 5, 23, 12, 12, 17, 38, 24, 24,
+ 55, 18, 33, 23, 8, 51, 14, 14, 8, 35,
+ 79, 3, 29, 25, 35, 63, 55, 29, 125, 124,
+ 125, 49, 55, 71, 101, 73, 97, 125, 123, 109,
+ 67, 123, 55, 124, 95, 7, 3, 19, 29, 31,
+ 27, 37, 35, 37, 69, 69, 61, 65, 69, 117,
+ 23, 27, 71, 21, 35, 37, 51, 57, 69, 63,
+ 75, 71, 43, 75, 79, 63, 47, 32, 70, 44,
+ 20, 24, 34, 18, 10, 10, 64, 44, 66, 50,
+ 44, 38, 58, 40, 54, 46, 40, 36, 96, 80,
+ 70, 62, 72, 28, 12, 8, 6, 15, 110, 58,
+ 4, 5, 28, 4, 19, 5, 6, 80, 54, 16,
+ 1, 36, 10, 10, 7, 5, 124, 5, 8, 20,
+ 22, 20, 12, 36, 44, 18, 40, 46, 68, 5,
+ 6, 17, 0, 56, 17, 21, 13, 8, 10, 11,
+ 29, 11, 35, 11, 15, 49, 28, 26, 62, 10,
+ 5, 26, 16, 2, 14, 12, 2, 25, 0, 3,
+ 11, 7, 34, 71, 37, 32, 43, 7, 21, 11,
+ 20, 21, 27, 8, 10, 59, 21, 73, 32, 38,
+ 48, 4, 15, 7, 13, 25, 31, 49, 53, 53,
+ 79, 69, 71, 119, 101, 115, 125, 97, 95, 83,
+ 75, 95, 69, 81, 75, 49, 63, 15, 19, 23,
+ 25, 65, 49, 49, 89, 61, 77, 79, 121, 87,
+ 95, 101, 35, 31, 109, 39, 45, 65, 97, 75,
+ 89, 91, 57, 91, 73, 79, 99, 93, 99, 77,
+ 89, 99, 13, 1, 30, 4, 62, 82, 2, 18,
+ 36, 44, 10, 40, 74, 8, 1, 52, 27, 91,
+ 125, 125, 125, 125, 125, 125, 14, 78, 60, 54,
+ 34, 64, 34, 20, 16, 2, 17, 7, 22, 4,
+ 50, 70, 4, 8, 24, 32, 12, 28, 58, 14,
+ 0, 52, 27, 91, 125, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 46 */
+
+ 42, 8, 37, 42, 8, 37, 34, 52, 56, 24,
+ 15, 49, 67, 34, 120, 22, 58, 13, 20, 44,
+ 5, 23, 4, 37, 89, 25, 103, 125, 125, 125,
+ 124, 12, 1, 20, 44, 5, 45, 22, 22, 5,
+ 0, 7, 7, 1, 33, 29, 55, 10, 3, 7,
+ 8, 17, 3, 33, 0, 21, 17, 33, 12, 4,
+ 44, 0, 0, 0, 12, 67, 67, 24, 29, 13,
+ 22, 15, 43, 98, 48, 32, 114, 124, 60, 66,
+ 44, 66, 28, 26, 80, 33, 21, 19, 33, 48,
+ 3, 46, 74, 7, 11, 3, 52, 0, 25, 23,
+ 41, 38, 9, 20, 2, 3, 20, 0, 24, 20,
+ 22, 40, 54, 28, 18, 26, 124, 124, 124, 124,
+ 18, 0, 8, 28, 1, 22, 28, 12, 5, 84,
+ 5, 97, 89, 124, 18, 34, 10, 12, 22, 38,
+ 102, 60, 26, 5, 2, 4, 40, 50, 85, 8,
+ 10, 35, 5, 25, 12, 12, 17, 38, 24, 24,
+ 57, 18, 35, 25, 8, 51, 14, 14, 8, 39,
+ 85, 5, 33, 29, 41, 67, 61, 33, 125, 124,
+ 125, 53, 59, 75, 107, 79, 103, 125, 125, 115,
+ 71, 125, 59, 124, 99, 11, 7, 23, 35, 37,
+ 31, 41, 39, 41, 73, 73, 65, 69, 71, 121,
+ 23, 27, 73, 23, 37, 39, 53, 59, 71, 65,
+ 79, 73, 45, 77, 81, 63, 47, 36, 72, 46,
+ 20, 24, 36, 18, 10, 10, 68, 46, 66, 50,
+ 46, 40, 60, 42, 56, 48, 44, 38, 96, 80,
+ 70, 62, 74, 30, 12, 8, 6, 15, 110, 58,
+ 2, 5, 28, 4, 19, 5, 6, 78, 54, 14,
+ 3, 36, 10, 10, 7, 5, 124, 3, 12, 22,
+ 24, 22, 14, 40, 48, 20, 42, 50, 72, 5,
+ 8, 17, 2, 60, 15, 21, 13, 8, 10, 11,
+ 29, 11, 35, 11, 15, 51, 30, 28, 64, 10,
+ 5, 28, 16, 2, 14, 12, 2, 25, 0, 3,
+ 11, 7, 34, 73, 39, 32, 45, 9, 23, 11,
+ 20, 23, 29, 8, 10, 61, 21, 75, 30, 36,
+ 46, 2, 19, 11, 17, 29, 37, 53, 59, 59,
+ 85, 73, 75, 125, 109, 123, 125, 105, 101, 87,
+ 79, 101, 73, 85, 79, 51, 65, 19, 25, 29,
+ 31, 71, 53, 55, 95, 67, 83, 85, 125, 91,
+ 99, 103, 37, 33, 111, 43, 49, 69, 101, 77,
+ 93, 95, 59, 93, 75, 81, 101, 95, 101, 79,
+ 91, 101, 13, 0, 30, 6, 64, 84, 2, 18,
+ 36, 44, 10, 40, 76, 8, 1, 50, 31, 95,
+ 125, 125, 125, 125, 125, 125, 14, 78, 60, 54,
+ 34, 64, 36, 20, 16, 4, 17, 7, 24, 6,
+ 52, 72, 4, 10, 24, 32, 12, 28, 58, 14,
+ 0, 50, 31, 95, 125, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 47 */
+
+ 40, 8, 37, 40, 8, 37, 38, 54, 56, 24,
+ 15, 51, 71, 32, 120, 22, 62, 13, 20, 46,
+ 5, 25, 4, 39, 93, 27, 107, 125, 125, 125,
+ 124, 14, 1, 20, 46, 5, 45, 24, 22, 5,
+ 0, 7, 5, 1, 33, 29, 55, 10, 3, 7,
+ 8, 17, 3, 33, 0, 21, 17, 33, 12, 4,
+ 44, 0, 0, 0, 14, 67, 67, 24, 31, 13,
+ 20, 15, 43, 102, 50, 34, 118, 124, 64, 68,
+ 48, 70, 30, 28, 84, 33, 21, 19, 33, 48,
+ 3, 48, 78, 7, 9, 1, 56, 0, 25, 23,
+ 41, 38, 9, 20, 4, 1, 22, 2, 26, 22,
+ 24, 42, 58, 30, 20, 28, 124, 124, 124, 124,
+ 18, 0, 8, 30, 1, 22, 28, 12, 3, 88,
+ 5, 101, 93, 124, 18, 34, 10, 12, 22, 40,
+ 106, 62, 26, 5, 2, 4, 42, 52, 87, 8,
+ 10, 35, 7, 27, 10, 12, 19, 38, 24, 24,
+ 59, 18, 37, 27, 6, 51, 14, 12, 6, 43,
+ 91, 7, 37, 33, 45, 71, 65, 37, 125, 124,
+ 125, 57, 63, 79, 113, 83, 109, 125, 125, 121,
+ 75, 125, 63, 124, 103, 15, 11, 27, 39, 41,
+ 35, 45, 43, 45, 77, 77, 69, 73, 75, 125,
+ 23, 27, 73, 25, 39, 41, 55, 61, 73, 67,
+ 83, 75, 45, 79, 83, 65, 45, 38, 74, 46,
+ 20, 24, 38, 20, 12, 12, 72, 48, 66, 50,
+ 46, 42, 62, 44, 60, 52, 48, 40, 98, 82,
+ 72, 64, 76, 30, 12, 8, 8, 15, 112, 58,
+ 0, 5, 28, 4, 19, 3, 6, 78, 52, 12,
+ 5, 36, 10, 10, 7, 5, 124, 1, 14, 24,
+ 28, 26, 16, 44, 52, 22, 46, 54, 76, 3,
+ 10, 15, 4, 64, 15, 21, 13, 10, 12, 11,
+ 29, 11, 37, 11, 15, 51, 30, 28, 66, 10,
+ 5, 28, 16, 2, 14, 12, 2, 27, 0, 3,
+ 11, 7, 36, 75, 41, 32, 47, 11, 25, 13,
+ 20, 25, 31, 8, 10, 63, 21, 77, 28, 34,
+ 44, 1, 23, 15, 21, 33, 41, 59, 65, 65,
+ 91, 79, 79, 125, 117, 125, 125, 111, 107, 93,
+ 83, 105, 77, 89, 81, 53, 67, 23, 29, 33,
+ 35, 77, 59, 59, 101, 71, 87, 89, 125, 95,
+ 103, 107, 39, 35, 115, 45, 51, 73, 105, 81,
+ 97, 99, 61, 97, 77, 83, 103, 97, 103, 83,
+ 93, 103, 13, 0, 32, 6, 66, 86, 2, 20,
+ 38, 46, 10, 42, 78, 8, 1, 48, 35, 101,
+ 125, 125, 125, 125, 125, 125, 14, 78, 60, 54,
+ 34, 66, 36, 20, 16, 4, 17, 7, 24, 6,
+ 54, 74, 4, 10, 24, 32, 12, 28, 60, 14,
+ 0, 48, 35, 101, 125, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 48 */
+
+ 36, 6, 39, 36, 6, 39, 40, 56, 56, 24,
+ 17, 55, 77, 30, 120, 20, 64, 15, 20, 46,
+ 7, 27, 4, 41, 97, 31, 113, 125, 125, 125,
+ 124, 14, 1, 20, 46, 7, 47, 24, 22, 7,
+ 0, 7, 5, 3, 35, 29, 55, 8, 3, 7,
+ 8, 17, 3, 33, 0, 21, 19, 33, 12, 4,
+ 44, 0, 0, 0, 14, 69, 67, 24, 33, 15,
+ 18, 17, 45, 106, 52, 36, 120, 124, 66, 70,
+ 50, 72, 32, 30, 88, 33, 21, 19, 33, 48,
+ 3, 48, 82, 7, 9, 1, 58, 1, 27, 25,
+ 43, 38, 9, 20, 4, 1, 22, 2, 26, 22,
+ 24, 44, 60, 30, 22, 30, 124, 124, 124, 124,
+ 18, 0, 8, 30, 3, 22, 28, 12, 3, 92,
+ 5, 105, 97, 124, 18, 34, 10, 12, 22, 40,
+ 108, 62, 26, 7, 0, 4, 42, 52, 91, 8,
+ 10, 37, 9, 31, 8, 10, 21, 38, 24, 22,
+ 63, 18, 39, 29, 4, 51, 12, 10, 4, 49,
+ 99, 11, 41, 39, 51, 77, 71, 41, 125, 124,
+ 125, 63, 67, 85, 121, 89, 117, 125, 125, 125,
+ 79, 125, 67, 124, 107, 21, 17, 33, 45, 47,
+ 41, 51, 49, 49, 83, 83, 73, 77, 79, 125,
+ 23, 29, 75, 27, 43, 45, 59, 65, 77, 71,
+ 87, 77, 47, 81, 85, 67, 45, 40, 74, 46,
+ 20, 24, 38, 20, 12, 12, 76, 50, 66, 50,
+ 46, 42, 62, 46, 62, 54, 50, 40, 98, 82,
+ 72, 64, 78, 30, 12, 8, 8, 17, 112, 56,
+ 1, 7, 28, 4, 19, 3, 4, 76, 50, 8,
+ 7, 34, 10, 10, 7, 5, 124, 0, 16, 26,
+ 30, 28, 18, 46, 54, 24, 48, 56, 80, 3,
+ 10, 15, 6, 66, 15, 21, 13, 10, 12, 11,
+ 31, 13, 39, 11, 17, 53, 30, 28, 68, 10,
+ 7, 28, 16, 0, 14, 12, 2, 29, 0, 3,
+ 11, 7, 36, 77, 43, 32, 51, 13, 27, 15,
+ 18, 27, 33, 8, 8, 67, 23, 81, 24, 30,
+ 42, 5, 27, 21, 27, 39, 47, 65, 71, 71,
+ 99, 85, 83, 125, 125, 125, 125, 119, 115, 99,
+ 89, 111, 81, 93, 85, 55, 69, 29, 35, 39,
+ 41, 83, 65, 65, 109, 77, 93, 95, 125, 99,
+ 107, 111, 41, 37, 119, 49, 55, 77, 111, 85,
+ 101, 103, 65, 101, 81, 85, 107, 99, 105, 87,
+ 97, 107, 13, 0, 32, 6, 68, 88, 2, 20,
+ 38, 46, 10, 42, 78, 8, 3, 44, 39, 107,
+ 125, 125, 125, 125, 125, 125, 14, 78, 60, 54,
+ 34, 66, 36, 20, 16, 4, 17, 7, 24, 6,
+ 54, 74, 4, 10, 24, 32, 12, 28, 60, 12,
+ 1, 44, 39, 107, 125, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 49 */
+
+ 34, 6, 39, 34, 6, 39, 44, 60, 58, 26,
+ 17, 57, 81, 28, 122, 20, 68, 15, 22, 48,
+ 7, 27, 6, 41, 99, 33, 117, 125, 125, 125,
+ 124, 16, 0, 22, 48, 7, 47, 26, 22, 7,
+ 2, 5, 3, 3, 35, 27, 53, 8, 1, 5,
+ 8, 15, 1, 31, 2, 19, 19, 31, 14, 4,
+ 44, 0, 0, 0, 16, 69, 67, 26, 33, 15,
+ 18, 17, 45, 112, 56, 40, 124, 124, 70, 74,
+ 54, 76, 36, 34, 94, 33, 19, 17, 31, 48,
+ 3, 50, 88, 5, 7, 0, 62, 1, 27, 25,
+ 43, 40, 9, 22, 6, 0, 24, 4, 28, 24,
+ 26, 48, 64, 32, 26, 34, 124, 124, 124, 124,
+ 20, 2, 10, 32, 3, 24, 30, 14, 1, 98,
+ 3, 109, 99, 124, 18, 36, 10, 14, 24, 42,
+ 112, 64, 28, 7, 0, 4, 44, 54, 93, 10,
+ 10, 37, 9, 33, 8, 10, 21, 38, 24, 22,
+ 65, 20, 39, 29, 4, 51, 12, 10, 4, 53,
+ 105, 13, 43, 43, 55, 81, 75, 45, 125, 124,
+ 125, 67, 69, 89, 125, 93, 123, 125, 125, 125,
+ 83, 125, 71, 124, 109, 25, 21, 37, 49, 51,
+ 45, 55, 53, 53, 87, 87, 75, 79, 81, 125,
+ 23, 29, 75, 27, 45, 47, 61, 67, 79, 73,
+ 89, 79, 47, 83, 85, 67, 43, 44, 76, 48,
+ 20, 26, 40, 22, 14, 14, 82, 54, 68, 52,
+ 48, 44, 64, 50, 66, 58, 54, 42, 100, 84,
+ 74, 66, 80, 32, 14, 10, 10, 17, 114, 56,
+ 1, 7, 30, 6, 19, 1, 4, 76, 50, 6,
+ 9, 34, 12, 12, 5, 3, 124, 4, 20, 30,
+ 34, 32, 22, 50, 58, 28, 52, 60, 86, 1,
+ 12, 13, 10, 70, 13, 19, 11, 12, 14, 9,
+ 31, 13, 39, 11, 17, 53, 32, 30, 70, 12,
+ 7, 30, 18, 0, 16, 14, 2, 29, 2, 1,
+ 9, 5, 38, 77, 45, 32, 53, 13, 27, 15,
+ 18, 27, 33, 8, 8, 69, 23, 83, 22, 28,
+ 42, 7, 29, 25, 31, 43, 51, 69, 75, 75,
+ 105, 89, 87, 125, 125, 125, 125, 125, 121, 103,
+ 93, 115, 83, 95, 87, 55, 69, 33, 39, 43,
+ 45, 87, 69, 69, 115, 81, 97, 99, 125, 101,
+ 109, 113, 41, 37, 121, 51, 57, 79, 115, 87,
+ 103, 105, 67, 103, 83, 85, 109, 99, 105, 89,
+ 99, 109, 11, 2, 34, 8, 72, 92, 4, 22,
+ 40, 48, 12, 44, 80, 8, 3, 42, 43, 111,
+ 125, 125, 125, 125, 125, 125, 16, 78, 62, 56,
+ 36, 68, 38, 22, 18, 6, 15, 5, 26, 8,
+ 56, 76, 6, 12, 26, 34, 12, 30, 62, 12,
+ 1, 42, 43, 111, 125, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 50 */
+
+ 32, 6, 39, 32, 6, 39, 48, 62, 58, 26,
+ 17, 59, 85, 26, 122, 20, 72, 15, 22, 50,
+ 7, 29, 6, 43, 103, 35, 123, 125, 125, 125,
+ 124, 18, 0, 22, 50, 7, 47, 28, 22, 9,
+ 2, 5, 1, 3, 35, 27, 53, 8, 1, 5,
+ 8, 15, 1, 31, 2, 19, 19, 31, 14, 4,
+ 44, 0, 0, 0, 18, 69, 67, 26, 35, 15,
+ 16, 17, 45, 116, 58, 42, 124, 124, 72, 76,
+ 58, 78, 38, 36, 98, 33, 19, 17, 31, 48,
+ 3, 52, 92, 5, 5, 2, 66, 1, 27, 25,
+ 45, 40, 9, 22, 8, 2, 26, 6, 30, 24,
+ 28, 50, 66, 34, 28, 36, 124, 124, 124, 124,
+ 20, 2, 10, 32, 3, 24, 30, 14, 1, 102,
+ 3, 113, 103, 124, 18, 36, 10, 14, 24, 42,
+ 116, 66, 28, 7, 0, 4, 46, 56, 95, 10,
+ 10, 37, 11, 35, 6, 10, 23, 38, 24, 22,
+ 67, 20, 41, 31, 2, 51, 12, 8, 2, 57,
+ 111, 15, 47, 47, 59, 85, 79, 49, 125, 124,
+ 125, 71, 73, 93, 125, 97, 125, 125, 125, 125,
+ 87, 125, 75, 124, 113, 29, 25, 41, 53, 55,
+ 49, 59, 57, 57, 91, 91, 79, 83, 85, 125,
+ 23, 29, 77, 29, 47, 49, 63, 69, 81, 75,
+ 93, 81, 47, 85, 87, 69, 41, 46, 78, 48,
+ 20, 26, 42, 24, 16, 16, 86, 56, 68, 52,
+ 48, 46, 66, 52, 70, 62, 58, 44, 102, 86,
+ 76, 68, 82, 32, 14, 10, 12, 17, 116, 56,
+ 3, 7, 30, 6, 19, 1, 4, 76, 48, 4,
+ 11, 34, 12, 12, 5, 3, 124, 6, 22, 32,
+ 38, 36, 24, 54, 62, 30, 56, 64, 90, 0,
+ 14, 13, 12, 74, 13, 19, 11, 14, 14, 9,
+ 31, 13, 41, 11, 17, 53, 32, 30, 72, 12,
+ 7, 30, 18, 0, 16, 14, 2, 31, 2, 1,
+ 9, 5, 40, 79, 47, 32, 55, 15, 29, 17,
+ 18, 29, 35, 8, 8, 71, 23, 85, 20, 26,
+ 40, 11, 33, 29, 35, 47, 55, 75, 81, 81,
+ 111, 95, 91, 125, 125, 125, 125, 125, 125, 109,
+ 97, 121, 87, 99, 89, 57, 71, 37, 43, 49,
+ 49, 93, 75, 75, 121, 85, 103, 103, 125, 105,
+ 113, 117, 43, 39, 125, 53, 59, 83, 119, 91,
+ 107, 109, 69, 107, 85, 87, 111, 101, 107, 93,
+ 101, 111, 11, 2, 36, 8, 74, 94, 4, 24,
+ 42, 50, 12, 44, 82, 8, 3, 40, 47, 117,
+ 125, 125, 125, 125, 125, 125, 16, 78, 62, 56,
+ 36, 70, 38, 22, 18, 6, 15, 5, 26, 8,
+ 58, 78, 6, 12, 26, 34, 12, 30, 64, 12,
+ 1, 40, 47, 117, 125, 125, 125, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 1, qp = 51 */
+
+ 30, 6, 39, 30, 6, 39, 52, 66, 60, 26,
+ 19, 63, 89, 24, 122, 20, 76, 15, 24, 52,
+ 7, 29, 6, 43, 105, 37, 125, 125, 125, 125,
+ 124, 20, 0, 24, 52, 7, 49, 30, 22, 9,
+ 2, 5, 1, 3, 35, 27, 53, 8, 1, 3,
+ 8, 15, 0, 31, 2, 19, 19, 31, 14, 4,
+ 44, 0, 0, 0, 18, 69, 67, 28, 37, 15,
+ 14, 19, 45, 122, 60, 44, 124, 124, 76, 80,
+ 62, 82, 40, 38, 102, 33, 19, 15, 31, 48,
+ 3, 54, 96, 5, 3, 2, 70, 1, 29, 27,
+ 45, 40, 9, 24, 8, 4, 28, 6, 32, 26,
+ 30, 54, 70, 34, 30, 38, 124, 124, 124, 124,
+ 20, 2, 10, 34, 3, 24, 32, 16, 0, 106,
+ 3, 117, 107, 124, 18, 36, 10, 14, 24, 44,
+ 118, 68, 28, 7, 0, 4, 46, 58, 97, 10,
+ 10, 39, 11, 37, 6, 10, 23, 38, 24, 22,
+ 69, 20, 43, 33, 2, 51, 12, 8, 2, 61,
+ 117, 17, 51, 51, 65, 89, 85, 53, 125, 124,
+ 125, 75, 77, 97, 125, 103, 125, 125, 125, 125,
+ 91, 125, 79, 124, 117, 33, 29, 45, 59, 61,
+ 53, 63, 61, 61, 95, 95, 83, 87, 87, 125,
+ 23, 29, 77, 31, 49, 51, 65, 71, 83, 77,
+ 97, 83, 49, 87, 89, 69, 41, 50, 80, 50,
+ 20, 26, 44, 24, 16, 16, 90, 58, 68, 52,
+ 50, 48, 68, 54, 72, 64, 62, 46, 102, 86,
+ 76, 68, 84, 34, 14, 10, 12, 17, 116, 56,
+ 5, 7, 30, 6, 19, 0, 4, 74, 48, 2,
+ 13, 34, 12, 12, 5, 3, 124, 8, 26, 34,
+ 40, 38, 26, 58, 66, 32, 58, 68, 94, 0,
+ 16, 11, 14, 78, 11, 19, 11, 14, 16, 9,
+ 31, 13, 41, 11, 17, 55, 34, 32, 74, 12,
+ 7, 32, 18, 0, 16, 14, 2, 31, 2, 1,
+ 9, 5, 40, 81, 49, 32, 57, 17, 31, 17,
+ 18, 31, 37, 8, 8, 73, 23, 87, 18, 24,
+ 38, 13, 37, 33, 39, 51, 61, 79, 87, 87,
+ 117, 99, 95, 125, 125, 125, 125, 125, 125, 113,
+ 101, 125, 91, 103, 93, 59, 73, 41, 49, 53,
+ 55, 99, 79, 79, 125, 91, 107, 109, 125, 109,
+ 117, 119, 45, 41, 125, 57, 63, 87, 123, 93,
+ 111, 113, 71, 109, 87, 89, 113, 103, 109, 95,
+ 103, 113, 11, 4, 36, 10, 76, 96, 4, 24,
+ 42, 50, 12, 46, 84, 8, 3, 38, 51, 121,
+ 125, 125, 125, 125, 125, 125, 16, 78, 62, 56,
+ 36, 70, 40, 22, 18, 8, 15, 5, 28, 10,
+ 60, 80, 6, 14, 26, 34, 12, 30, 64, 12,
+ 1, 38, 51, 121, 125, 125, 125, 125, 125, 125,
+ },
+
+ },
+
+ {
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 0 */
+
+ 124, 18, 21, 124, 18, 21, 125, 81, 20, 18,
+ 24, 94, 124, 124, 24, 2, 71, 94, 43, 77,
+ 12, 12, 19, 12, 46, 106, 124, 124, 42, 67,
+ 125, 107, 21, 43, 77, 12, 59, 49, 38, 16,
+ 51, 79, 105, 12, 10, 41, 65, 0, 43, 85,
+ 0, 23, 53, 75, 16, 31, 23, 67, 26, 6,
+ 44, 0, 0, 0, 39, 45, 67, 17, 44, 2,
+ 58, 49, 125, 125, 55, 63, 41, 45, 51, 55,
+ 125, 25, 79, 53, 125, 33, 25, 41, 29, 16,
+ 4, 39, 125, 31, 81, 55, 125, 3, 31, 17,
+ 57, 14, 9, 15, 69, 45, 49, 37, 17, 7,
+ 17, 51, 11, 8, 5, 12, 15, 15, 10, 21,
+ 38, 11, 2, 24, 32, 42, 44, 20, 25, 29,
+ 39, 22, 7, 53, 7, 17, 23, 33, 39, 1,
+ 64, 1, 61, 23, 0, 21, 56, 72, 55, 3,
+ 11, 27, 5, 2, 9, 35, 66, 112, 80, 21,
+ 5, 121, 52, 124, 124, 125, 48, 42, 58, 68,
+ 64, 52, 42, 46, 60, 40, 54, 32, 16, 10,
+ 6, 38, 38, 42, 30, 14, 22, 52, 28, 10,
+ 30, 36, 11, 60, 0, 124, 124, 124, 106, 124,
+ 124, 124, 124, 92, 76, 68, 60, 96, 86, 19,
+ 58, 64, 38, 94, 54, 54, 70, 84, 86, 102,
+ 94, 42, 59, 14, 12, 50, 125, 103, 37, 2,
+ 20, 8, 43, 51, 61, 57, 125, 73, 12, 7,
+ 15, 27, 43, 49, 81, 69, 125, 37, 30, 4,
+ 5, 13, 23, 31, 39, 57, 89, 31, 11, 23,
+ 10, 10, 29, 39, 35, 71, 35, 50, 2, 10,
+ 8, 19, 25, 45, 39, 47, 124, 125, 125, 113,
+ 125, 101, 107, 109, 107, 99, 109, 113, 121, 61,
+ 77, 71, 85, 125, 57, 12, 45, 61, 55, 27,
+ 15, 19, 1, 35, 1, 12, 7, 9, 7, 9,
+ 27, 1, 9, 29, 16, 8, 3, 18, 38, 6,
+ 13, 25, 45, 13, 1, 13, 16, 14, 11, 3,
+ 21, 18, 18, 25, 37, 27, 27, 42, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 104, 124, 124, 124, 124, 124, 124, 96,
+ 124, 124, 92, 50, 36, 18, 31, 124, 124, 124,
+ 124, 96, 96, 76, 82, 94, 90, 70, 44, 70,
+ 32, 2, 64, 74, 78, 80, 94, 66, 68, 44,
+ 42, 6, 22, 6, 29, 119, 20, 14, 4, 60,
+ 26, 4, 29, 21, 17, 17, 23, 15, 0, 13,
+ 23, 17, 7, 20, 8, 22, 9, 124, 124, 124,
+ 124, 112, 102, 80, 50, 1, 15, 52, 38, 28,
+ 14, 8, 0, 7, 9, 31, 29, 21, 17, 17,
+ 23, 15, 0, 13, 23, 17, 7, 20, 8, 22,
+ 9, 124, 124, 124, 124, 112, 102, 80, 50, 1,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 1 */
+
+ 124, 18, 21, 124, 18, 21, 123, 77, 22, 20,
+ 24, 92, 124, 124, 26, 4, 67, 92, 41, 73,
+ 12, 12, 15, 12, 44, 104, 124, 120, 38, 67,
+ 123, 103, 19, 41, 73, 12, 57, 47, 40, 16,
+ 49, 77, 101, 10, 8, 41, 65, 0, 41, 83,
+ 0, 23, 51, 73, 16, 29, 21, 65, 28, 6,
+ 44, 0, 0, 0, 37, 45, 67, 15, 44, 2,
+ 58, 47, 123, 121, 51, 61, 37, 41, 49, 51,
+ 123, 23, 75, 51, 121, 33, 25, 41, 29, 18,
+ 4, 37, 121, 29, 79, 53, 123, 3, 29, 17,
+ 55, 16, 9, 13, 67, 43, 47, 35, 15, 5,
+ 15, 49, 9, 10, 5, 12, 13, 13, 10, 19,
+ 40, 9, 2, 26, 34, 44, 46, 22, 25, 27,
+ 37, 22, 7, 51, 7, 15, 21, 31, 35, 2,
+ 66, 2, 57, 23, 1, 19, 58, 74, 55, 3,
+ 9, 27, 3, 2, 7, 31, 66, 112, 82, 17,
+ 7, 117, 50, 124, 124, 123, 48, 42, 58, 68,
+ 64, 52, 42, 46, 60, 40, 54, 32, 16, 10,
+ 6, 38, 38, 42, 30, 14, 22, 52, 28, 8,
+ 30, 36, 11, 58, 0, 124, 124, 124, 104, 124,
+ 124, 124, 124, 90, 74, 64, 58, 92, 84, 21,
+ 56, 62, 36, 92, 54, 54, 68, 82, 84, 100,
+ 92, 40, 59, 14, 12, 48, 123, 99, 33, 4,
+ 20, 8, 41, 49, 59, 55, 123, 69, 14, 5,
+ 13, 25, 39, 47, 77, 67, 121, 35, 32, 6,
+ 3, 11, 21, 29, 37, 55, 85, 29, 7, 21,
+ 12, 10, 27, 37, 33, 69, 33, 52, 4, 12,
+ 10, 17, 23, 43, 37, 45, 124, 123, 123, 109,
+ 123, 97, 103, 105, 103, 95, 105, 109, 115, 59,
+ 75, 69, 83, 119, 55, 10, 43, 59, 53, 25,
+ 15, 17, 1, 33, 1, 12, 7, 9, 5, 9,
+ 27, 1, 9, 27, 16, 8, 3, 18, 38, 6,
+ 13, 23, 41, 13, 1, 11, 16, 14, 11, 3,
+ 19, 18, 18, 23, 35, 25, 25, 40, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 100, 124, 124, 124, 124, 124, 124, 94,
+ 120, 120, 90, 48, 34, 18, 31, 124, 124, 124,
+ 120, 92, 94, 74, 78, 92, 86, 68, 40, 66,
+ 30, 0, 62, 72, 74, 78, 92, 64, 66, 42,
+ 40, 4, 22, 6, 29, 117, 18, 12, 2, 58,
+ 24, 2, 27, 19, 15, 15, 19, 13, 2, 11,
+ 19, 15, 5, 22, 10, 24, 7, 124, 124, 124,
+ 124, 108, 100, 76, 48, 3, 13, 54, 40, 30,
+ 16, 10, 2, 5, 7, 29, 27, 19, 15, 15,
+ 19, 13, 2, 11, 19, 15, 5, 22, 10, 24,
+ 7, 124, 124, 124, 124, 108, 100, 76, 48, 3,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 2 */
+
+ 124, 18, 21, 124, 18, 21, 119, 75, 22, 20,
+ 24, 88, 120, 124, 28, 4, 63, 88, 41, 71,
+ 12, 12, 13, 10, 42, 102, 120, 114, 34, 69,
+ 119, 101, 19, 41, 71, 12, 57, 45, 40, 16,
+ 47, 75, 99, 8, 6, 41, 65, 0, 41, 81,
+ 0, 23, 51, 73, 16, 29, 21, 63, 28, 6,
+ 44, 0, 0, 0, 35, 45, 67, 15, 42, 2,
+ 58, 45, 121, 117, 49, 59, 33, 37, 47, 49,
+ 119, 21, 73, 49, 117, 35, 25, 41, 29, 18,
+ 4, 35, 117, 29, 77, 51, 119, 3, 29, 17,
+ 55, 16, 9, 13, 65, 43, 45, 35, 15, 5,
+ 15, 47, 7, 10, 5, 12, 13, 13, 10, 19,
+ 40, 9, 2, 26, 34, 44, 46, 22, 27, 25,
+ 35, 20, 7, 51, 7, 13, 21, 31, 33, 4,
+ 68, 6, 53, 25, 3, 19, 58, 74, 57, 3,
+ 9, 29, 1, 2, 7, 29, 66, 112, 82, 15,
+ 9, 115, 48, 124, 124, 121, 48, 42, 58, 66,
+ 62, 52, 42, 46, 58, 38, 52, 32, 16, 10,
+ 6, 36, 36, 40, 30, 14, 22, 50, 26, 6,
+ 28, 34, 11, 56, 1, 124, 124, 124, 100, 120,
+ 124, 124, 124, 88, 70, 60, 54, 88, 80, 23,
+ 54, 60, 32, 90, 52, 52, 66, 78, 80, 96,
+ 88, 36, 59, 12, 10, 44, 121, 97, 31, 6,
+ 20, 8, 39, 47, 57, 53, 119, 67, 16, 3,
+ 11, 23, 37, 45, 75, 65, 117, 33, 32, 6,
+ 3, 11, 19, 27, 35, 53, 83, 29, 5, 19,
+ 12, 10, 25, 35, 33, 67, 31, 52, 6, 12,
+ 10, 15, 21, 41, 35, 43, 124, 121, 119, 105,
+ 119, 95, 101, 101, 99, 93, 101, 105, 111, 57,
+ 73, 67, 81, 113, 55, 8, 43, 57, 51, 25,
+ 15, 17, 1, 33, 1, 10, 7, 9, 3, 9,
+ 27, 1, 9, 27, 16, 8, 3, 16, 36, 6,
+ 13, 23, 39, 15, 1, 9, 14, 14, 11, 3,
+ 19, 18, 18, 23, 33, 25, 25, 36, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 96, 124, 124, 124, 124, 124, 122, 90,
+ 116, 116, 86, 46, 32, 16, 31, 124, 124, 124,
+ 116, 88, 90, 70, 74, 88, 82, 64, 36, 62,
+ 26, 1, 60, 70, 70, 74, 88, 60, 62, 40,
+ 38, 2, 20, 4, 29, 115, 16, 10, 1, 56,
+ 22, 0, 27, 19, 13, 13, 17, 11, 4, 11,
+ 17, 13, 3, 22, 12, 26, 5, 124, 124, 124,
+ 120, 104, 96, 72, 44, 5, 11, 54, 40, 32,
+ 18, 12, 2, 3, 7, 27, 27, 19, 13, 13,
+ 17, 11, 4, 11, 17, 13, 3, 22, 12, 26,
+ 5, 124, 124, 124, 120, 104, 96, 72, 44, 5,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 3 */
+
+ 124, 18, 21, 124, 18, 21, 115, 71, 24, 20,
+ 22, 84, 118, 122, 28, 4, 59, 86, 41, 67,
+ 12, 10, 11, 8, 40, 100, 116, 106, 30, 71,
+ 115, 97, 19, 41, 67, 12, 55, 43, 42, 16,
+ 45, 73, 97, 6, 4, 41, 67, 0, 41, 79,
+ 0, 25, 51, 73, 16, 29, 21, 61, 30, 6,
+ 44, 0, 0, 0, 35, 45, 67, 13, 40, 2,
+ 56, 45, 119, 113, 47, 57, 31, 35, 45, 47,
+ 115, 19, 71, 47, 113, 37, 25, 41, 29, 20,
+ 4, 33, 113, 29, 75, 49, 115, 3, 29, 17,
+ 55, 18, 9, 11, 63, 43, 43, 35, 15, 5,
+ 13, 45, 7, 10, 5, 12, 13, 13, 10, 19,
+ 40, 9, 2, 28, 34, 46, 46, 24, 27, 25,
+ 33, 20, 7, 51, 7, 11, 21, 29, 31, 6,
+ 70, 8, 49, 25, 5, 17, 58, 74, 59, 3,
+ 7, 29, 1, 2, 7, 27, 66, 112, 82, 13,
+ 11, 111, 46, 124, 124, 117, 48, 42, 56, 64,
+ 62, 50, 40, 46, 58, 36, 50, 32, 16, 10,
+ 4, 36, 34, 38, 28, 14, 22, 48, 26, 4,
+ 28, 32, 11, 54, 1, 124, 124, 122, 98, 116,
+ 124, 124, 124, 86, 66, 56, 52, 84, 76, 27,
+ 52, 58, 28, 88, 50, 50, 64, 76, 76, 92,
+ 84, 34, 59, 10, 8, 42, 117, 93, 27, 6,
+ 20, 8, 37, 45, 55, 51, 115, 65, 18, 1,
+ 9, 23, 35, 43, 71, 63, 113, 33, 34, 8,
+ 1, 9, 17, 27, 35, 51, 81, 29, 1, 17,
+ 12, 10, 23, 35, 33, 65, 29, 54, 8, 14,
+ 10, 13, 21, 39, 35, 43, 124, 117, 117, 103,
+ 115, 93, 97, 99, 97, 89, 97, 101, 107, 57,
+ 71, 67, 79, 107, 55, 6, 43, 55, 49, 25,
+ 15, 17, 1, 31, 1, 8, 7, 9, 3, 9,
+ 27, 1, 9, 27, 14, 8, 3, 14, 34, 6,
+ 13, 23, 37, 17, 1, 7, 12, 14, 11, 3,
+ 17, 18, 16, 21, 31, 25, 25, 34, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 92, 124, 124, 124, 124, 124, 118, 86,
+ 112, 110, 82, 44, 30, 14, 31, 124, 124, 124,
+ 112, 84, 86, 68, 70, 84, 78, 60, 32, 58,
+ 22, 3, 58, 68, 66, 72, 84, 58, 58, 36,
+ 34, 0, 18, 2, 29, 113, 14, 6, 3, 54,
+ 20, 1, 27, 17, 13, 13, 15, 9, 6, 11,
+ 15, 11, 1, 24, 14, 26, 3, 124, 124, 124,
+ 116, 100, 92, 68, 40, 7, 11, 56, 42, 34,
+ 18, 14, 4, 3, 5, 27, 27, 17, 13, 13,
+ 15, 9, 6, 11, 15, 11, 1, 24, 14, 26,
+ 3, 124, 124, 124, 116, 100, 92, 68, 40, 7,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 4 */
+
+ 124, 18, 21, 124, 18, 21, 113, 69, 24, 20,
+ 22, 80, 114, 120, 30, 4, 57, 82, 41, 65,
+ 10, 10, 9, 6, 36, 96, 112, 100, 24, 73,
+ 111, 95, 19, 41, 65, 10, 55, 41, 42, 14,
+ 45, 71, 93, 4, 0, 43, 67, 0, 39, 77,
+ 1, 25, 51, 73, 16, 29, 21, 61, 30, 6,
+ 44, 0, 0, 0, 33, 47, 67, 13, 38, 2,
+ 56, 43, 117, 109, 45, 55, 27, 31, 45, 45,
+ 111, 17, 69, 45, 107, 37, 27, 41, 31, 20,
+ 2, 31, 107, 27, 75, 49, 111, 3, 29, 17,
+ 55, 18, 9, 11, 61, 43, 43, 33, 15, 5,
+ 13, 43, 5, 10, 7, 10, 13, 13, 10, 19,
+ 40, 9, 2, 28, 34, 46, 46, 24, 29, 23,
+ 33, 18, 7, 49, 7, 9, 19, 29, 27, 10,
+ 72, 12, 45, 27, 7, 17, 60, 74, 61, 3,
+ 7, 31, 0, 2, 7, 25, 66, 112, 82, 9,
+ 13, 109, 44, 124, 124, 115, 46, 42, 56, 64,
+ 60, 50, 40, 46, 56, 34, 48, 30, 16, 10,
+ 4, 34, 34, 36, 28, 12, 20, 46, 24, 2,
+ 26, 30, 11, 50, 3, 124, 124, 118, 94, 114,
+ 124, 124, 124, 84, 62, 50, 48, 80, 72, 29,
+ 48, 56, 26, 86, 48, 48, 60, 72, 72, 88,
+ 82, 30, 59, 8, 6, 38, 115, 91, 25, 8,
+ 20, 8, 35, 43, 53, 51, 111, 61, 20, 1,
+ 9, 21, 31, 41, 69, 61, 107, 31, 34, 8,
+ 1, 9, 15, 25, 33, 51, 79, 29, 0, 15,
+ 12, 10, 21, 33, 33, 63, 27, 54, 10, 14,
+ 10, 11, 19, 37, 33, 41, 124, 115, 113, 99,
+ 113, 91, 95, 95, 93, 87, 95, 97, 101, 55,
+ 69, 65, 77, 101, 53, 4, 41, 53, 49, 25,
+ 15, 17, 3, 31, 3, 6, 7, 9, 1, 9,
+ 27, 1, 9, 25, 14, 6, 3, 12, 32, 4,
+ 13, 23, 35, 19, 3, 7, 12, 12, 11, 3,
+ 17, 16, 16, 21, 31, 25, 25, 30, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 88, 124, 124, 124, 124, 124, 114, 82,
+ 108, 106, 78, 40, 28, 12, 31, 124, 124, 124,
+ 108, 80, 82, 64, 66, 80, 74, 56, 28, 52,
+ 20, 7, 56, 66, 60, 68, 82, 54, 54, 34,
+ 32, 1, 16, 0, 29, 111, 10, 4, 7, 50,
+ 18, 3, 27, 17, 11, 11, 13, 9, 6, 9,
+ 13, 9, 0, 24, 16, 28, 3, 124, 124, 120,
+ 112, 96, 88, 62, 36, 11, 9, 56, 42, 34,
+ 20, 14, 4, 1, 5, 25, 27, 17, 11, 11,
+ 13, 9, 6, 9, 13, 9, 0, 24, 16, 28,
+ 3, 124, 124, 120, 112, 96, 88, 62, 36, 11,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 5 */
+
+ 124, 18, 21, 124, 18, 21, 109, 65, 24, 20,
+ 20, 76, 112, 118, 32, 4, 53, 78, 39, 61,
+ 10, 10, 7, 4, 34, 94, 108, 94, 20, 73,
+ 107, 93, 19, 39, 61, 10, 55, 39, 42, 14,
+ 43, 69, 91, 2, 1, 43, 67, 0, 39, 75,
+ 1, 25, 51, 73, 16, 27, 21, 59, 32, 6,
+ 44, 0, 0, 0, 33, 47, 67, 11, 36, 2,
+ 54, 43, 113, 103, 43, 53, 25, 29, 43, 43,
+ 107, 15, 67, 43, 103, 39, 27, 41, 31, 20,
+ 2, 29, 103, 27, 73, 47, 107, 3, 29, 17,
+ 53, 18, 9, 9, 59, 41, 41, 33, 15, 3,
+ 11, 41, 5, 10, 7, 10, 11, 13, 10, 19,
+ 42, 9, 2, 30, 36, 46, 46, 24, 29, 23,
+ 31, 18, 7, 49, 7, 7, 19, 27, 25, 12,
+ 74, 14, 41, 27, 9, 15, 60, 74, 63, 3,
+ 5, 31, 2, 2, 7, 21, 66, 112, 82, 7,
+ 15, 105, 42, 124, 124, 113, 46, 42, 54, 62,
+ 60, 50, 38, 46, 56, 32, 46, 30, 16, 10,
+ 4, 34, 32, 34, 26, 12, 20, 44, 24, 0,
+ 24, 30, 11, 48, 3, 124, 124, 116, 92, 110,
+ 124, 124, 124, 82, 58, 46, 46, 76, 68, 31,
+ 46, 54, 22, 84, 46, 46, 58, 70, 68, 84,
+ 78, 28, 59, 6, 4, 34, 111, 87, 23, 8,
+ 20, 8, 33, 41, 51, 49, 107, 59, 22, 0,
+ 7, 19, 29, 39, 65, 59, 103, 29, 36, 10,
+ 0, 7, 13, 23, 33, 49, 77, 27, 2, 13,
+ 12, 10, 19, 33, 31, 61, 25, 54, 12, 14,
+ 10, 9, 17, 35, 33, 39, 124, 113, 111, 97,
+ 109, 89, 91, 93, 89, 83, 91, 93, 97, 53,
+ 67, 63, 75, 95, 53, 2, 41, 51, 47, 25,
+ 15, 17, 3, 29, 3, 4, 7, 9, 0, 9,
+ 27, 1, 9, 25, 12, 6, 3, 10, 30, 4,
+ 13, 23, 33, 19, 3, 5, 10, 12, 11, 3,
+ 17, 16, 14, 21, 29, 25, 25, 28, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 84, 124, 124, 124, 124, 124, 110, 80,
+ 104, 100, 74, 38, 26, 10, 31, 124, 124, 124,
+ 104, 76, 78, 62, 62, 76, 70, 52, 24, 48,
+ 16, 9, 54, 64, 56, 66, 78, 52, 50, 32,
+ 30, 3, 14, 1, 29, 109, 8, 2, 9, 48,
+ 16, 5, 27, 15, 11, 9, 11, 7, 8, 9,
+ 11, 7, 2, 26, 18, 28, 1, 124, 124, 116,
+ 108, 92, 84, 58, 32, 13, 9, 58, 44, 36,
+ 22, 16, 6, 1, 5, 23, 27, 15, 11, 9,
+ 11, 7, 8, 9, 11, 7, 2, 26, 18, 28,
+ 1, 124, 124, 116, 108, 92, 84, 58, 32, 13,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 6 */
+
+ 124, 18, 23, 124, 18, 23, 105, 63, 26, 20,
+ 20, 74, 108, 116, 32, 6, 49, 76, 39, 59,
+ 10, 8, 5, 2, 32, 92, 106, 86, 16, 75,
+ 103, 89, 19, 39, 59, 10, 53, 37, 44, 14,
+ 41, 67, 89, 1, 3, 43, 69, 0, 39, 75,
+ 1, 27, 49, 73, 14, 27, 21, 57, 32, 6,
+ 44, 0, 0, 0, 31, 47, 67, 11, 36, 0,
+ 54, 41, 111, 99, 41, 51, 21, 25, 41, 41,
+ 103, 13, 65, 43, 99, 41, 27, 41, 31, 22,
+ 2, 27, 99, 27, 71, 45, 103, 3, 29, 17,
+ 53, 20, 11, 9, 59, 41, 39, 33, 13, 3,
+ 11, 39, 3, 10, 7, 10, 11, 13, 10, 19,
+ 42, 9, 2, 30, 36, 48, 48, 26, 31, 21,
+ 29, 16, 7, 49, 7, 5, 19, 27, 23, 14,
+ 74, 18, 39, 29, 11, 15, 60, 74, 63, 5,
+ 5, 33, 2, 0, 5, 19, 66, 112, 84, 5,
+ 17, 103, 40, 124, 124, 109, 46, 42, 54, 60,
+ 58, 48, 38, 44, 54, 32, 46, 30, 14, 10,
+ 2, 32, 30, 32, 26, 12, 20, 44, 22, 3,
+ 24, 28, 11, 46, 5, 124, 124, 112, 88, 106,
+ 124, 124, 124, 78, 54, 42, 42, 72, 64, 35,
+ 44, 50, 18, 80, 44, 44, 56, 66, 64, 80,
+ 74, 24, 59, 4, 2, 32, 109, 85, 19, 10,
+ 20, 8, 31, 41, 51, 47, 105, 57, 24, 2,
+ 5, 19, 27, 37, 63, 57, 99, 29, 36, 10,
+ 0, 7, 11, 23, 31, 47, 75, 27, 6, 11,
+ 12, 10, 19, 31, 31, 61, 25, 56, 12, 16,
+ 10, 7, 17, 35, 31, 39, 124, 109, 107, 93,
+ 105, 85, 89, 89, 87, 81, 87, 89, 93, 53,
+ 65, 63, 75, 89, 53, 0, 41, 51, 45, 25,
+ 15, 17, 3, 29, 3, 2, 7, 9, 0, 9,
+ 27, 1, 9, 25, 12, 6, 3, 8, 28, 4,
+ 13, 23, 31, 21, 3, 3, 8, 12, 11, 3,
+ 15, 16, 14, 19, 27, 25, 25, 24, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 80, 124, 124, 124, 124, 124, 104, 76,
+ 100, 96, 70, 36, 24, 8, 31, 124, 124, 124,
+ 100, 72, 76, 58, 58, 72, 64, 48, 20, 44,
+ 12, 11, 52, 60, 52, 62, 74, 48, 46, 28,
+ 26, 5, 12, 3, 31, 107, 6, 1, 13, 46,
+ 12, 7, 25, 15, 9, 9, 9, 5, 10, 9,
+ 9, 5, 4, 26, 20, 30, 0, 124, 124, 112,
+ 104, 88, 80, 54, 28, 15, 7, 58, 44, 38,
+ 22, 18, 6, 0, 3, 23, 25, 15, 9, 9,
+ 9, 5, 10, 9, 9, 5, 4, 26, 20, 30,
+ 0, 124, 124, 112, 104, 88, 80, 54, 28, 15,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 7 */
+
+ 124, 18, 23, 124, 18, 23, 101, 59, 26, 20,
+ 18, 70, 106, 114, 34, 6, 47, 72, 39, 55,
+ 8, 8, 3, 0, 30, 90, 102, 80, 10, 77,
+ 99, 87, 19, 39, 55, 8, 53, 35, 44, 14,
+ 41, 65, 85, 3, 5, 43, 69, 0, 37, 73,
+ 3, 27, 49, 73, 14, 27, 21, 55, 34, 6,
+ 44, 0, 0, 0, 31, 47, 67, 9, 34, 0,
+ 52, 41, 109, 95, 39, 49, 19, 23, 39, 39,
+ 99, 11, 63, 41, 93, 41, 29, 41, 33, 22,
+ 2, 25, 93, 25, 71, 45, 99, 3, 29, 17,
+ 53, 20, 11, 7, 57, 41, 37, 31, 13, 3,
+ 9, 37, 3, 10, 9, 10, 11, 13, 10, 19,
+ 42, 9, 2, 32, 36, 48, 48, 26, 31, 21,
+ 29, 16, 7, 47, 7, 3, 17, 25, 19, 18,
+ 76, 20, 35, 29, 13, 13, 62, 74, 65, 5,
+ 3, 33, 4, 0, 5, 17, 66, 112, 84, 1,
+ 19, 99, 38, 124, 124, 107, 46, 42, 52, 60,
+ 58, 48, 36, 44, 54, 30, 44, 30, 14, 10,
+ 2, 32, 30, 30, 24, 12, 20, 42, 22, 5,
+ 22, 26, 11, 44, 5, 124, 124, 108, 86, 104,
+ 124, 124, 124, 76, 50, 38, 40, 68, 60, 37,
+ 42, 48, 16, 78, 42, 42, 52, 64, 60, 76,
+ 72, 22, 59, 2, 0, 28, 105, 81, 17, 10,
+ 20, 8, 29, 39, 49, 47, 101, 53, 26, 4,
+ 5, 17, 23, 35, 59, 55, 93, 27, 38, 12,
+ 2, 5, 9, 21, 31, 45, 73, 27, 8, 9,
+ 12, 10, 17, 31, 31, 59, 23, 56, 14, 16,
+ 10, 5, 15, 33, 31, 37, 124, 107, 105, 91,
+ 103, 83, 85, 87, 83, 77, 83, 85, 87, 51,
+ 63, 61, 73, 83, 51, 1, 39, 49, 43, 25,
+ 15, 17, 3, 27, 5, 0, 7, 9, 2, 9,
+ 27, 1, 9, 23, 10, 4, 3, 6, 26, 2,
+ 13, 23, 29, 23, 5, 1, 8, 10, 11, 3,
+ 15, 14, 12, 19, 27, 25, 25, 22, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 124, 76, 124, 124, 124, 124, 124, 100, 72,
+ 96, 90, 66, 34, 22, 6, 31, 124, 122, 124,
+ 96, 68, 72, 56, 54, 68, 60, 44, 16, 40,
+ 10, 15, 50, 58, 48, 60, 72, 46, 42, 26,
+ 24, 7, 10, 5, 31, 105, 2, 3, 15, 42,
+ 10, 9, 25, 13, 9, 7, 7, 3, 10, 7,
+ 7, 3, 6, 28, 22, 30, 0, 124, 120, 108,
+ 100, 84, 76, 48, 24, 17, 7, 60, 46, 38,
+ 24, 20, 8, 0, 3, 21, 25, 13, 9, 7,
+ 7, 3, 10, 7, 7, 3, 6, 28, 22, 30,
+ 0, 124, 120, 108, 100, 84, 76, 48, 24, 17,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 8 */
+
+ 124, 16, 23, 124, 16, 23, 99, 57, 26, 20,
+ 18, 66, 102, 112, 34, 6, 43, 68, 39, 53,
+ 8, 6, 1, 1, 26, 86, 98, 72, 6, 79,
+ 95, 85, 19, 39, 53, 8, 53, 35, 44, 12,
+ 39, 63, 83, 5, 9, 45, 71, 0, 37, 71,
+ 3, 29, 49, 73, 14, 27, 21, 55, 34, 6,
+ 44, 0, 0, 0, 29, 49, 67, 9, 32, 0,
+ 52, 39, 107, 91, 37, 49, 15, 19, 39, 37,
+ 95, 11, 61, 39, 89, 43, 29, 43, 33, 22,
+ 0, 25, 89, 25, 69, 43, 97, 3, 29, 17,
+ 53, 20, 11, 7, 55, 41, 37, 31, 13, 3,
+ 9, 35, 1, 10, 9, 8, 11, 13, 8, 19,
+ 42, 9, 2, 32, 36, 48, 48, 26, 33, 19,
+ 27, 14, 7, 47, 7, 1, 17, 25, 17, 20,
+ 78, 24, 31, 31, 15, 13, 62, 74, 67, 5,
+ 3, 35, 4, 0, 5, 15, 66, 112, 84, 0,
+ 21, 97, 36, 118, 124, 105, 44, 42, 52, 58,
+ 56, 46, 36, 44, 52, 28, 42, 28, 14, 8,
+ 0, 30, 28, 28, 24, 10, 18, 40, 20, 7,
+ 20, 24, 11, 40, 7, 124, 124, 104, 82, 100,
+ 120, 124, 124, 74, 46, 32, 36, 62, 56, 41,
+ 38, 46, 12, 76, 40, 40, 50, 60, 56, 72,
+ 68, 18, 59, 0, 1, 24, 103, 79, 15, 12,
+ 20, 8, 29, 37, 47, 45, 97, 51, 26, 4,
+ 3, 17, 21, 33, 57, 53, 89, 27, 38, 12,
+ 2, 5, 9, 21, 29, 45, 71, 27, 10, 7,
+ 12, 10, 15, 29, 31, 57, 21, 56, 16, 16,
+ 10, 3, 15, 31, 29, 37, 124, 105, 101, 87,
+ 99, 81, 83, 83, 81, 75, 81, 81, 83, 51,
+ 61, 61, 71, 77, 51, 3, 39, 47, 43, 25,
+ 15, 17, 5, 27, 5, 1, 7, 9, 2, 9,
+ 27, 3, 9, 23, 10, 4, 5, 4, 24, 2,
+ 15, 23, 27, 25, 5, 1, 6, 10, 11, 5,
+ 15, 14, 12, 19, 25, 25, 25, 18, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 122, 72, 124, 124, 124, 124, 122, 96, 68,
+ 90, 86, 62, 30, 18, 4, 31, 122, 118, 124,
+ 92, 62, 68, 52, 48, 64, 56, 40, 12, 34,
+ 6, 17, 46, 56, 42, 56, 68, 42, 38, 22,
+ 20, 9, 8, 7, 31, 103, 0, 7, 19, 40,
+ 8, 11, 25, 13, 7, 7, 5, 3, 12, 7,
+ 5, 3, 8, 28, 22, 32, 2, 122, 116, 104,
+ 96, 80, 72, 44, 20, 21, 5, 60, 46, 40,
+ 24, 20, 8, 2, 3, 21, 25, 13, 7, 7,
+ 5, 3, 12, 7, 5, 3, 8, 28, 22, 32,
+ 2, 122, 116, 104, 96, 80, 72, 44, 20, 21,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 9 */
+
+ 124, 16, 23, 124, 16, 23, 95, 55, 28, 20,
+ 18, 62, 98, 112, 36, 6, 39, 66, 37, 49,
+ 8, 6, 0, 1, 24, 84, 94, 66, 2, 79,
+ 91, 81, 17, 37, 49, 8, 51, 33, 46, 12,
+ 37, 61, 81, 7, 11, 45, 71, 0, 37, 69,
+ 3, 29, 49, 73, 14, 25, 19, 53, 34, 6,
+ 44, 0, 0, 0, 27, 49, 67, 9, 30, 0,
+ 52, 37, 103, 85, 35, 47, 11, 15, 37, 35,
+ 91, 9, 57, 37, 85, 45, 29, 43, 33, 24,
+ 0, 23, 85, 25, 67, 41, 93, 3, 27, 17,
+ 51, 22, 11, 5, 53, 39, 35, 31, 13, 1,
+ 7, 33, 0, 10, 9, 8, 9, 11, 8, 19,
+ 44, 9, 2, 32, 38, 50, 48, 28, 33, 17,
+ 25, 12, 7, 47, 7, 0, 17, 23, 15, 22,
+ 80, 28, 27, 33, 17, 11, 62, 76, 69, 5,
+ 3, 35, 6, 0, 5, 11, 66, 112, 84, 2,
+ 23, 95, 34, 114, 124, 101, 44, 42, 52, 56,
+ 56, 46, 36, 44, 52, 26, 40, 28, 14, 8,
+ 0, 30, 26, 28, 24, 10, 18, 38, 18, 9,
+ 20, 24, 11, 38, 7, 124, 124, 102, 80, 96,
+ 116, 124, 124, 72, 42, 28, 34, 58, 54, 43,
+ 36, 44, 8, 74, 38, 38, 48, 56, 54, 68,
+ 64, 16, 59, 0, 3, 22, 99, 75, 11, 14,
+ 20, 8, 27, 35, 45, 43, 93, 49, 28, 6,
+ 1, 15, 19, 31, 55, 51, 85, 25, 40, 14,
+ 4, 5, 7, 19, 27, 43, 67, 25, 14, 5,
+ 14, 10, 13, 27, 29, 55, 19, 58, 18, 18,
+ 12, 1, 13, 29, 27, 35, 124, 101, 97, 83,
+ 95, 79, 81, 79, 77, 71, 77, 77, 79, 49,
+ 59, 59, 69, 69, 51, 5, 39, 45, 41, 23,
+ 15, 17, 5, 27, 5, 3, 7, 9, 4, 9,
+ 27, 3, 9, 23, 10, 4, 5, 4, 22, 2,
+ 15, 21, 23, 25, 5, 0, 4, 10, 11, 5,
+ 13, 14, 12, 17, 23, 23, 23, 14, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 116, 68, 124, 124, 124, 124, 116, 92, 66,
+ 86, 82, 60, 28, 16, 2, 31, 118, 114, 120,
+ 88, 58, 64, 50, 44, 60, 52, 36, 8, 30,
+ 2, 19, 44, 54, 38, 54, 64, 40, 34, 20,
+ 18, 11, 6, 7, 31, 101, 1, 9, 23, 38,
+ 6, 13, 25, 11, 5, 5, 1, 1, 14, 7,
+ 3, 1, 10, 30, 24, 34, 4, 120, 114, 100,
+ 92, 76, 68, 40, 16, 23, 3, 60, 48, 42,
+ 26, 22, 10, 4, 1, 19, 25, 11, 5, 5,
+ 1, 1, 14, 7, 3, 1, 10, 30, 24, 34,
+ 4, 120, 114, 100, 92, 76, 68, 40, 16, 23,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 10 */
+
+ 124, 16, 23, 124, 16, 23, 91, 51, 28, 20,
+ 16, 58, 96, 110, 38, 6, 37, 62, 37, 47,
+ 6, 6, 2, 3, 22, 82, 90, 60, 3, 81,
+ 87, 79, 17, 37, 47, 6, 51, 31, 46, 12,
+ 37, 59, 77, 9, 13, 45, 71, 0, 35, 67,
+ 5, 29, 49, 73, 14, 25, 19, 51, 36, 6,
+ 44, 0, 0, 0, 27, 49, 67, 7, 28, 0,
+ 50, 37, 101, 81, 33, 45, 9, 13, 35, 33,
+ 87, 7, 55, 35, 79, 45, 31, 43, 35, 24,
+ 0, 21, 79, 23, 67, 41, 89, 3, 27, 17,
+ 51, 22, 11, 5, 51, 39, 33, 29, 13, 1,
+ 7, 31, 0, 10, 11, 8, 9, 11, 8, 19,
+ 44, 9, 2, 34, 38, 50, 48, 28, 35, 17,
+ 25, 12, 7, 45, 7, 2, 15, 23, 11, 26,
+ 82, 30, 23, 33, 19, 11, 64, 76, 71, 5,
+ 1, 37, 8, 0, 5, 9, 66, 112, 84, 6,
+ 25, 91, 32, 108, 124, 99, 44, 42, 50, 56,
+ 54, 46, 34, 44, 50, 24, 38, 28, 14, 8,
+ 0, 28, 26, 26, 22, 10, 18, 36, 18, 11,
+ 18, 22, 11, 36, 9, 120, 124, 98, 76, 94,
+ 112, 124, 124, 70, 38, 24, 30, 54, 50, 45,
+ 34, 42, 6, 72, 36, 36, 44, 54, 50, 64,
+ 62, 12, 59, 1, 5, 18, 97, 73, 9, 14,
+ 20, 8, 25, 33, 43, 43, 89, 45, 30, 8,
+ 1, 13, 15, 29, 51, 49, 79, 23, 40, 14,
+ 4, 3, 5, 17, 27, 41, 65, 25, 16, 3,
+ 14, 10, 11, 27, 29, 53, 17, 58, 20, 18,
+ 12, 0, 11, 27, 27, 33, 124, 99, 95, 81,
+ 93, 77, 77, 77, 73, 69, 73, 73, 73, 47,
+ 57, 57, 67, 63, 49, 7, 37, 43, 39, 23,
+ 15, 17, 5, 25, 7, 5, 7, 9, 6, 9,
+ 27, 3, 9, 21, 8, 2, 5, 2, 20, 0,
+ 15, 21, 21, 27, 7, 2, 4, 8, 11, 5,
+ 13, 12, 10, 17, 23, 23, 23, 12, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 112, 64, 124, 124, 124, 124, 110, 88, 62,
+ 82, 76, 56, 26, 14, 0, 31, 114, 108, 114,
+ 84, 54, 60, 46, 40, 56, 48, 32, 4, 26,
+ 0, 23, 42, 52, 34, 50, 62, 36, 30, 18,
+ 16, 13, 4, 9, 31, 99, 5, 11, 25, 34,
+ 4, 15, 25, 11, 5, 3, 0, 0, 14, 5,
+ 1, 0, 12, 30, 26, 34, 4, 120, 110, 96,
+ 88, 72, 64, 34, 12, 25, 3, 62, 48, 42,
+ 28, 24, 10, 4, 1, 17, 25, 11, 5, 3,
+ 0, 0, 14, 5, 1, 0, 12, 30, 26, 34,
+ 4, 120, 110, 96, 88, 72, 64, 34, 12, 25,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 11 */
+
+ 124, 16, 25, 124, 16, 25, 87, 49, 30, 20,
+ 16, 56, 92, 108, 38, 8, 33, 60, 37, 43,
+ 6, 4, 4, 5, 20, 80, 88, 52, 7, 83,
+ 83, 75, 17, 37, 43, 6, 49, 29, 48, 12,
+ 35, 57, 75, 13, 15, 45, 73, 0, 35, 67,
+ 5, 31, 47, 73, 12, 25, 19, 49, 36, 6,
+ 44, 0, 0, 0, 25, 49, 67, 7, 28, 1,
+ 50, 35, 99, 77, 31, 43, 5, 9, 33, 31,
+ 83, 5, 53, 35, 75, 47, 31, 43, 35, 26,
+ 0, 19, 75, 23, 65, 39, 85, 3, 27, 17,
+ 51, 24, 13, 3, 51, 39, 31, 29, 11, 1,
+ 5, 29, 2, 10, 11, 8, 9, 11, 8, 19,
+ 44, 9, 2, 34, 38, 52, 50, 30, 35, 15,
+ 23, 10, 7, 45, 7, 4, 15, 21, 9, 28,
+ 82, 34, 21, 35, 21, 9, 64, 76, 71, 7,
+ 1, 37, 8, 1, 3, 7, 66, 112, 86, 8,
+ 27, 89, 30, 102, 124, 95, 44, 42, 50, 54,
+ 54, 44, 34, 42, 50, 24, 38, 28, 12, 8,
+ 1, 28, 24, 24, 22, 10, 18, 36, 16, 15,
+ 18, 20, 11, 34, 9, 114, 124, 94, 74, 90,
+ 108, 124, 122, 66, 34, 20, 28, 50, 46, 49,
+ 32, 38, 2, 68, 34, 34, 42, 50, 46, 60,
+ 58, 10, 59, 3, 7, 16, 93, 69, 5, 16,
+ 20, 8, 23, 33, 43, 41, 87, 43, 32, 10,
+ 0, 13, 13, 27, 49, 47, 75, 23, 42, 16,
+ 6, 3, 3, 17, 25, 39, 63, 25, 20, 1,
+ 14, 10, 11, 25, 29, 53, 17, 60, 20, 20,
+ 12, 2, 11, 27, 25, 33, 124, 95, 91, 77,
+ 89, 73, 75, 73, 71, 65, 69, 69, 69, 47,
+ 55, 57, 67, 57, 49, 9, 37, 43, 37, 23,
+ 15, 17, 5, 25, 7, 7, 7, 9, 6, 9,
+ 27, 3, 9, 21, 8, 2, 5, 0, 18, 0,
+ 15, 21, 19, 29, 7, 4, 2, 8, 11, 5,
+ 11, 12, 10, 15, 21, 23, 23, 8, 124, 122,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 106, 60, 124, 124, 124, 124, 106, 82, 58,
+ 78, 72, 52, 24, 12, 1, 31, 110, 104, 110,
+ 80, 50, 58, 44, 36, 52, 42, 28, 0, 22,
+ 3, 25, 40, 48, 30, 48, 58, 34, 26, 14,
+ 12, 15, 2, 11, 33, 97, 7, 15, 29, 32,
+ 0, 17, 23, 9, 3, 3, 2, 2, 16, 5,
+ 0, 2, 14, 32, 28, 36, 6, 118, 106, 92,
+ 84, 68, 60, 30, 8, 27, 1, 62, 50, 44,
+ 28, 26, 12, 6, 0, 17, 23, 9, 3, 3,
+ 2, 2, 16, 5, 0, 2, 14, 32, 28, 36,
+ 6, 118, 106, 92, 84, 68, 60, 30, 8, 27,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 12 */
+
+ 124, 16, 25, 124, 16, 25, 85, 45, 30, 20,
+ 14, 52, 90, 106, 40, 8, 29, 56, 37, 41,
+ 6, 4, 6, 7, 16, 76, 84, 46, 11, 85,
+ 79, 73, 17, 37, 41, 6, 49, 27, 48, 10,
+ 33, 55, 73, 15, 19, 47, 73, 0, 35, 65,
+ 5, 31, 47, 73, 12, 25, 19, 49, 38, 6,
+ 44, 0, 0, 0, 25, 51, 67, 5, 26, 1,
+ 48, 35, 97, 73, 29, 41, 3, 7, 33, 29,
+ 79, 3, 51, 33, 71, 49, 31, 43, 35, 26,
+ 1, 17, 71, 23, 63, 37, 81, 3, 27, 17,
+ 51, 24, 13, 3, 49, 39, 31, 29, 11, 1,
+ 5, 27, 2, 10, 11, 6, 9, 11, 8, 19,
+ 44, 9, 2, 36, 38, 52, 50, 30, 37, 15,
+ 21, 10, 7, 45, 7, 6, 15, 21, 7, 30,
+ 84, 36, 17, 35, 23, 9, 64, 76, 73, 7,
+ 0, 39, 10, 1, 3, 5, 66, 112, 86, 10,
+ 29, 85, 28, 96, 120, 93, 42, 42, 48, 52,
+ 52, 44, 32, 42, 48, 22, 36, 26, 12, 8,
+ 1, 26, 22, 22, 20, 8, 16, 34, 16, 17,
+ 16, 18, 11, 30, 11, 110, 124, 90, 70, 86,
+ 104, 124, 116, 64, 30, 14, 24, 46, 42, 51,
+ 28, 36, 1, 66, 32, 32, 40, 48, 42, 56,
+ 54, 6, 59, 5, 9, 12, 91, 67, 3, 16,
+ 20, 8, 21, 31, 41, 39, 83, 41, 34, 10,
+ 2, 11, 11, 25, 45, 45, 71, 21, 42, 16,
+ 6, 1, 1, 15, 25, 39, 61, 25, 22, 0,
+ 14, 10, 9, 25, 29, 51, 15, 60, 22, 20,
+ 12, 4, 9, 25, 25, 31, 124, 93, 89, 75,
+ 85, 71, 71, 71, 67, 63, 67, 65, 65, 45,
+ 53, 55, 65, 51, 49, 11, 37, 41, 37, 23,
+ 15, 17, 7, 23, 7, 9, 7, 9, 8, 9,
+ 27, 3, 9, 21, 6, 2, 5, 1, 16, 0,
+ 15, 21, 17, 31, 7, 4, 0, 8, 11, 5,
+ 11, 12, 8, 15, 19, 23, 23, 6, 124, 120,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 124, 100, 56, 124, 124, 124, 124, 100, 78, 54,
+ 74, 66, 48, 20, 10, 3, 31, 104, 100, 106,
+ 76, 46, 54, 40, 32, 48, 38, 24, 3, 16,
+ 7, 27, 38, 46, 24, 44, 54, 30, 22, 12,
+ 10, 17, 0, 13, 33, 95, 9, 17, 31, 30,
+ 1, 19, 23, 9, 3, 1, 4, 2, 18, 5,
+ 2, 4, 16, 32, 30, 36, 8, 118, 102, 88,
+ 80, 64, 56, 26, 4, 31, 1, 64, 50, 46,
+ 30, 26, 12, 6, 0, 15, 23, 9, 3, 1,
+ 4, 2, 18, 5, 2, 4, 16, 32, 30, 36,
+ 8, 118, 102, 88, 80, 64, 56, 26, 4, 31,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 13 */
+
+ 124, 16, 25, 124, 16, 25, 81, 43, 30, 20,
+ 14, 48, 86, 104, 42, 8, 27, 52, 35, 37,
+ 4, 4, 8, 9, 14, 74, 80, 40, 17, 85,
+ 75, 71, 17, 35, 37, 4, 49, 25, 48, 10,
+ 33, 53, 69, 17, 21, 47, 73, 0, 33, 63,
+ 7, 31, 47, 73, 12, 23, 19, 47, 38, 6,
+ 44, 0, 0, 0, 23, 51, 67, 5, 24, 1,
+ 48, 33, 93, 67, 27, 39, 0, 3, 31, 27,
+ 75, 1, 49, 31, 65, 49, 33, 43, 37, 26,
+ 1, 15, 65, 21, 63, 37, 77, 3, 27, 17,
+ 49, 24, 13, 1, 47, 37, 29, 27, 11, 0,
+ 3, 25, 4, 10, 13, 6, 7, 11, 8, 19,
+ 46, 9, 2, 36, 40, 52, 50, 30, 37, 13,
+ 21, 8, 7, 43, 7, 8, 13, 19, 3, 34,
+ 86, 40, 13, 37, 25, 7, 66, 76, 75, 7,
+ 0, 39, 12, 1, 3, 1, 66, 112, 86, 14,
+ 31, 83, 26, 92, 114, 91, 42, 42, 48, 52,
+ 52, 44, 32, 42, 48, 20, 34, 26, 12, 8,
+ 1, 26, 22, 20, 20, 8, 16, 32, 14, 19,
+ 14, 18, 11, 28, 11, 106, 124, 88, 68, 84,
+ 100, 124, 112, 62, 26, 10, 22, 42, 38, 53,
+ 26, 34, 3, 64, 30, 30, 36, 44, 38, 52,
+ 52, 4, 59, 7, 11, 8, 87, 63, 1, 18,
+ 20, 8, 19, 29, 39, 39, 79, 37, 36, 12,
+ 2, 9, 7, 23, 43, 43, 65, 19, 44, 18,
+ 8, 1, 0, 13, 23, 37, 59, 23, 24, 2,
+ 14, 10, 7, 23, 27, 49, 13, 60, 24, 20,
+ 12, 6, 7, 23, 23, 29, 124, 91, 85, 71,
+ 83, 69, 69, 67, 63, 59, 63, 61, 59, 43,
+ 51, 53, 63, 45, 47, 13, 35, 39, 35, 23,
+ 15, 17, 7, 23, 9, 11, 7, 9, 10, 9,
+ 27, 3, 9, 19, 6, 0, 5, 3, 14, 1,
+ 15, 21, 15, 31, 9, 6, 0, 6, 11, 5,
+ 11, 10, 8, 15, 19, 23, 23, 2, 124, 118,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+ 120, 96, 52, 124, 124, 124, 124, 94, 74, 52,
+ 70, 62, 44, 18, 8, 5, 31, 100, 94, 100,
+ 72, 42, 50, 38, 28, 44, 34, 20, 7, 12,
+ 9, 31, 36, 44, 20, 42, 52, 28, 18, 10,
+ 8, 19, 1, 15, 33, 93, 13, 19, 35, 26,
+ 3, 21, 23, 7, 1, 0, 6, 4, 18, 3,
+ 4, 6, 18, 34, 32, 38, 8, 116, 98, 84,
+ 76, 60, 52, 20, 0, 33, 0, 64, 52, 46,
+ 32, 28, 14, 8, 0, 13, 23, 7, 1, 0,
+ 6, 4, 18, 3, 4, 6, 18, 34, 32, 38,
+ 8, 116, 98, 84, 76, 60, 52, 20, 0, 33,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 14 */
+
+ 122, 16, 25, 122, 16, 25, 77, 39, 32, 20,
+ 12, 44, 84, 102, 42, 8, 23, 50, 35, 35,
+ 4, 2, 10, 11, 12, 72, 76, 32, 21, 87,
+ 71, 67, 17, 35, 35, 4, 47, 23, 50, 10,
+ 31, 51, 67, 19, 23, 47, 75, 0, 33, 61,
+ 7, 33, 47, 73, 12, 23, 19, 45, 40, 6,
+ 44, 0, 0, 0, 23, 51, 67, 3, 22, 1,
+ 46, 33, 91, 63, 25, 37, 2, 1, 29, 25,
+ 71, 0, 47, 29, 61, 51, 33, 43, 37, 28,
+ 1, 13, 61, 21, 61, 35, 73, 3, 27, 17,
+ 49, 26, 13, 1, 45, 37, 27, 27, 11, 0,
+ 3, 23, 4, 10, 13, 6, 7, 11, 8, 19,
+ 46, 9, 2, 38, 40, 54, 50, 32, 39, 13,
+ 19, 8, 7, 43, 7, 10, 13, 19, 1, 36,
+ 88, 42, 9, 37, 27, 7, 66, 76, 77, 7,
+ 2, 41, 12, 1, 3, 0, 66, 112, 86, 16,
+ 33, 79, 24, 86, 108, 87, 42, 42, 46, 50,
+ 50, 42, 30, 42, 46, 18, 32, 26, 12, 8,
+ 3, 24, 20, 18, 18, 8, 16, 30, 14, 21,
+ 14, 16, 11, 26, 13, 102, 120, 84, 64, 80,
+ 96, 124, 106, 60, 22, 6, 18, 38, 34, 57,
+ 24, 32, 7, 62, 28, 28, 34, 42, 34, 48,
+ 48, 0, 59, 9, 13, 6, 85, 61, 2, 18,
+ 20, 8, 17, 27, 37, 37, 75, 35, 38, 14,
+ 4, 9, 5, 21, 39, 41, 61, 19, 44, 18,
+ 8, 0, 2, 13, 23, 35, 57, 23, 28, 4,
+ 14, 10, 5, 23, 27, 47, 11, 62, 26, 22,
+ 12, 8, 7, 21, 23, 29, 124, 87, 83, 69,
+ 79, 67, 65, 65, 61, 57, 59, 57, 55, 43,
+ 49, 53, 61, 39, 47, 15, 35, 37, 33, 23,
+ 15, 17, 7, 21, 9, 13, 7, 9, 10, 9,
+ 27, 3, 9, 19, 4, 0, 5, 5, 12, 1,
+ 15, 21, 13, 33, 9, 8, 1, 6, 11, 5,
+ 9, 10, 6, 13, 17, 23, 23, 0, 124, 116,
+ 122, 122, 122, 124, 124, 124, 122, 124, 124, 124,
+ 114, 90, 48, 124, 120, 118, 120, 88, 70, 48,
+ 66, 56, 40, 16, 6, 7, 31, 96, 90, 96,
+ 68, 38, 46, 34, 24, 40, 30, 16, 11, 8,
+ 13, 33, 34, 42, 16, 38, 48, 24, 14, 6,
+ 4, 21, 3, 17, 33, 91, 15, 23, 37, 24,
+ 5, 23, 23, 7, 1, 0, 8, 6, 20, 3,
+ 6, 8, 20, 34, 34, 38, 10, 116, 94, 80,
+ 72, 56, 48, 16, 3, 35, 0, 66, 52, 48,
+ 32, 30, 14, 8, 2, 13, 23, 7, 1, 0,
+ 8, 6, 20, 3, 6, 8, 20, 34, 34, 38,
+ 10, 116, 94, 80, 72, 56, 48, 16, 3, 35,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 15 */
+
+ 120, 16, 25, 120, 16, 25, 73, 37, 32, 20,
+ 12, 40, 80, 100, 44, 8, 19, 46, 35, 31,
+ 4, 2, 12, 13, 10, 70, 72, 26, 25, 89,
+ 67, 65, 17, 35, 31, 4, 47, 21, 50, 10,
+ 29, 49, 65, 21, 25, 47, 75, 0, 33, 59,
+ 7, 33, 47, 73, 12, 23, 19, 43, 40, 6,
+ 44, 0, 0, 0, 21, 51, 67, 3, 20, 1,
+ 46, 31, 89, 59, 23, 35, 6, 2, 27, 23,
+ 67, 2, 45, 27, 57, 53, 33, 43, 37, 28,
+ 1, 11, 57, 21, 59, 33, 69, 3, 27, 17,
+ 49, 26, 13, 0, 43, 37, 25, 27, 11, 0,
+ 1, 21, 6, 10, 13, 6, 7, 11, 8, 19,
+ 46, 9, 2, 38, 40, 54, 50, 32, 39, 11,
+ 17, 6, 7, 43, 7, 12, 13, 17, 0, 38,
+ 90, 46, 5, 39, 29, 5, 66, 76, 79, 7,
+ 2, 41, 14, 1, 3, 2, 66, 112, 86, 18,
+ 35, 77, 22, 80, 102, 85, 42, 42, 46, 48,
+ 50, 42, 30, 42, 46, 16, 30, 26, 12, 8,
+ 3, 24, 18, 16, 18, 8, 16, 28, 12, 23,
+ 12, 14, 11, 24, 13, 98, 116, 80, 62, 76,
+ 92, 118, 102, 58, 18, 2, 16, 34, 30, 59,
+ 22, 30, 11, 60, 26, 26, 32, 38, 30, 44,
+ 44, 1, 59, 11, 15, 2, 81, 57, 4, 20,
+ 20, 8, 15, 25, 35, 35, 71, 33, 40, 16,
+ 6, 7, 3, 19, 37, 39, 57, 17, 46, 20,
+ 10, 0, 4, 11, 21, 33, 55, 23, 30, 6,
+ 14, 10, 3, 21, 27, 45, 9, 62, 28, 22,
+ 12, 10, 5, 19, 21, 27, 124, 85, 79, 65,
+ 75, 65, 63, 61, 57, 53, 55, 53, 51, 41,
+ 47, 51, 59, 33, 47, 17, 35, 35, 31, 23,
+ 15, 17, 7, 21, 9, 15, 7, 9, 12, 9,
+ 27, 3, 9, 19, 4, 0, 5, 7, 10, 1,
+ 15, 21, 11, 35, 9, 10, 3, 6, 11, 5,
+ 9, 10, 6, 13, 15, 23, 23, 3, 122, 114,
+ 120, 118, 118, 124, 124, 124, 118, 120, 124, 122,
+ 108, 84, 44, 122, 114, 110, 110, 82, 66, 44,
+ 62, 52, 36, 14, 4, 9, 31, 92, 86, 92,
+ 64, 34, 42, 32, 20, 36, 26, 12, 15, 4,
+ 17, 35, 32, 40, 12, 36, 44, 22, 10, 4,
+ 2, 23, 5, 19, 33, 89, 17, 25, 41, 22,
+ 7, 25, 23, 5, 0, 2, 10, 8, 22, 3,
+ 8, 10, 22, 36, 36, 40, 12, 114, 90, 76,
+ 68, 52, 44, 12, 7, 37, 2, 66, 54, 50,
+ 34, 32, 16, 10, 2, 11, 23, 5, 0, 2,
+ 10, 8, 22, 3, 8, 10, 22, 36, 36, 40,
+ 12, 114, 90, 76, 68, 52, 44, 12, 7, 37,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 16 */
+
+ 116, 14, 27, 116, 14, 27, 71, 35, 32, 20,
+ 10, 36, 76, 98, 44, 8, 17, 42, 35, 29,
+ 2, 0, 14, 15, 6, 66, 68, 18, 31, 91,
+ 63, 63, 17, 35, 29, 2, 47, 21, 50, 8,
+ 29, 49, 63, 25, 29, 49, 77, 1, 33, 59,
+ 9, 35, 47, 73, 10, 23, 19, 43, 40, 4,
+ 44, 0, 0, 0, 21, 53, 67, 3, 18, 3,
+ 44, 31, 87, 55, 21, 35, 8, 4, 27, 21,
+ 65, 2, 43, 27, 53, 55, 35, 45, 39, 28,
+ 3, 11, 53, 21, 59, 33, 67, 3, 27, 17,
+ 49, 26, 15, 0, 43, 37, 25, 27, 11, 0,
+ 1, 19, 6, 10, 15, 4, 7, 11, 6, 19,
+ 46, 9, 2, 38, 40, 54, 50, 32, 41, 11,
+ 17, 4, 7, 43, 9, 12, 13, 17, 2, 40,
+ 90, 48, 3, 41, 33, 5, 66, 76, 81, 9,
+ 2, 43, 14, 3, 3, 4, 66, 110, 86, 20,
+ 37, 75, 18, 74, 94, 83, 40, 42, 44, 46,
+ 48, 40, 28, 40, 44, 14, 28, 24, 10, 6,
+ 5, 22, 16, 14, 16, 6, 14, 26, 10, 27,
+ 10, 12, 11, 20, 15, 92, 110, 76, 58, 72,
+ 86, 110, 96, 54, 14, 3, 12, 28, 26, 63,
+ 18, 26, 15, 56, 24, 24, 28, 34, 26, 40,
+ 40, 5, 59, 13, 17, 1, 79, 55, 6, 20,
+ 20, 8, 15, 25, 35, 35, 69, 31, 40, 16,
+ 6, 7, 1, 17, 35, 39, 53, 17, 46, 20,
+ 10, 0, 4, 11, 21, 33, 53, 23, 32, 8,
+ 14, 8, 3, 21, 27, 45, 9, 62, 28, 22,
+ 12, 12, 5, 19, 21, 27, 124, 83, 77, 63,
+ 73, 63, 61, 59, 55, 51, 53, 51, 47, 41,
+ 47, 51, 59, 27, 47, 21, 35, 35, 31, 23,
+ 15, 17, 9, 21, 11, 17, 9, 9, 12, 11,
+ 27, 5, 9, 19, 2, 1, 7, 9, 8, 3,
+ 17, 21, 9, 37, 11, 10, 5, 4, 11, 7,
+ 9, 8, 4, 13, 15, 23, 23, 7, 118, 112,
+ 116, 114, 112, 124, 124, 124, 112, 114, 124, 116,
+ 100, 78, 40, 114, 106, 102, 98, 76, 60, 40,
+ 56, 46, 32, 10, 0, 11, 31, 86, 80, 86,
+ 60, 28, 38, 28, 14, 32, 20, 8, 21, 1,
+ 21, 39, 28, 36, 6, 32, 40, 18, 6, 0,
+ 1, 25, 7, 21, 35, 87, 21, 29, 45, 18,
+ 11, 29, 23, 5, 0, 2, 12, 8, 22, 3,
+ 10, 10, 24, 36, 36, 40, 12, 112, 86, 72,
+ 62, 46, 40, 6, 11, 41, 2, 66, 54, 50,
+ 34, 32, 16, 10, 2, 11, 23, 5, 0, 2,
+ 12, 8, 22, 3, 10, 10, 24, 36, 36, 40,
+ 12, 112, 86, 72, 62, 46, 40, 6, 11, 41,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 17 */
+
+ 114, 14, 27, 114, 14, 27, 67, 31, 34, 22,
+ 10, 34, 74, 98, 46, 10, 13, 40, 33, 25,
+ 2, 0, 18, 15, 4, 64, 66, 12, 35, 91,
+ 59, 59, 15, 33, 25, 2, 45, 19, 52, 8,
+ 27, 47, 59, 27, 31, 49, 77, 1, 31, 57,
+ 9, 35, 45, 71, 10, 21, 17, 41, 42, 4,
+ 44, 0, 0, 0, 19, 53, 67, 1, 18, 3,
+ 44, 29, 83, 49, 17, 33, 12, 8, 25, 17,
+ 61, 4, 39, 25, 47, 55, 35, 45, 39, 30,
+ 3, 9, 47, 19, 57, 31, 63, 3, 25, 17,
+ 47, 28, 15, 2, 41, 35, 23, 25, 9, 2,
+ 0, 17, 8, 12, 15, 4, 5, 9, 6, 17,
+ 48, 7, 2, 40, 42, 56, 52, 34, 41, 9,
+ 15, 4, 7, 41, 9, 14, 11, 15, 6, 44,
+ 92, 52, 0, 41, 35, 3, 68, 78, 81, 9,
+ 4, 43, 16, 3, 1, 8, 66, 110, 88, 24,
+ 39, 71, 16, 70, 88, 79, 40, 42, 44, 46,
+ 48, 40, 28, 40, 44, 14, 28, 24, 10, 6,
+ 5, 22, 16, 14, 16, 6, 14, 26, 10, 29,
+ 10, 12, 11, 18, 15, 88, 106, 74, 56, 70,
+ 82, 104, 92, 52, 12, 7, 10, 24, 24, 65,
+ 16, 24, 17, 54, 24, 24, 26, 32, 24, 38,
+ 38, 7, 59, 13, 17, 3, 75, 51, 10, 22,
+ 20, 8, 13, 23, 33, 33, 65, 27, 42, 18,
+ 8, 5, 2, 15, 31, 37, 47, 15, 48, 22,
+ 12, 2, 6, 9, 19, 31, 49, 21, 36, 10,
+ 16, 8, 1, 19, 25, 43, 7, 64, 30, 24,
+ 14, 14, 3, 17, 19, 25, 124, 79, 73, 59,
+ 69, 59, 57, 55, 51, 47, 49, 47, 41, 39,
+ 45, 49, 57, 19, 45, 23, 33, 33, 29, 21,
+ 15, 15, 9, 19, 11, 17, 9, 9, 14, 11,
+ 27, 5, 9, 17, 2, 1, 7, 9, 8, 3,
+ 17, 19, 5, 37, 11, 12, 5, 4, 11, 7,
+ 7, 8, 4, 11, 13, 21, 21, 9, 116, 110,
+ 114, 112, 108, 120, 120, 118, 108, 110, 118, 112,
+ 94, 74, 36, 108, 100, 96, 88, 72, 56, 38,
+ 52, 42, 30, 8, 1, 11, 31, 82, 76, 82,
+ 56, 24, 36, 26, 10, 30, 16, 6, 25, 5,
+ 23, 41, 26, 34, 2, 30, 38, 16, 4, 1,
+ 3, 27, 7, 21, 35, 85, 23, 31, 47, 16,
+ 13, 31, 21, 3, 2, 4, 16, 10, 24, 1,
+ 14, 12, 26, 38, 38, 42, 14, 112, 84, 70,
+ 58, 42, 38, 2, 13, 43, 4, 68, 56, 52,
+ 36, 34, 18, 12, 4, 9, 21, 3, 2, 4,
+ 16, 10, 24, 1, 14, 12, 26, 38, 38, 42,
+ 14, 112, 84, 70, 58, 42, 38, 2, 13, 43,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 18 */
+
+ 112, 14, 27, 112, 14, 27, 63, 29, 34, 22,
+ 10, 30, 70, 96, 48, 10, 9, 36, 33, 23,
+ 2, 0, 20, 17, 2, 62, 62, 6, 39, 93,
+ 55, 57, 15, 33, 23, 2, 45, 17, 52, 8,
+ 25, 45, 57, 29, 33, 49, 77, 1, 31, 55,
+ 9, 35, 45, 71, 10, 21, 17, 39, 42, 4,
+ 44, 0, 0, 0, 17, 53, 67, 1, 16, 3,
+ 44, 27, 81, 45, 15, 31, 16, 12, 23, 15,
+ 57, 6, 37, 23, 43, 57, 35, 45, 39, 30,
+ 3, 7, 43, 19, 55, 29, 59, 3, 25, 17,
+ 47, 28, 15, 2, 39, 35, 21, 25, 9, 2,
+ 0, 15, 10, 12, 15, 4, 5, 9, 6, 17,
+ 48, 7, 2, 40, 42, 56, 52, 34, 43, 7,
+ 13, 2, 7, 41, 9, 16, 11, 15, 8, 46,
+ 94, 56, 4, 43, 37, 3, 68, 78, 83, 9,
+ 4, 45, 18, 3, 1, 10, 66, 110, 88, 26,
+ 41, 69, 14, 64, 82, 77, 40, 42, 44, 44,
+ 46, 40, 28, 40, 42, 12, 26, 24, 10, 6,
+ 5, 20, 14, 12, 16, 6, 14, 24, 8, 31,
+ 8, 10, 11, 16, 17, 84, 102, 70, 52, 66,
+ 78, 98, 88, 50, 8, 11, 6, 20, 20, 67,
+ 14, 22, 21, 52, 22, 22, 24, 28, 20, 34,
+ 34, 11, 59, 15, 19, 7, 73, 49, 12, 24,
+ 20, 8, 11, 21, 31, 31, 61, 25, 44, 20,
+ 10, 3, 4, 13, 29, 35, 43, 13, 48, 22,
+ 12, 2, 8, 7, 17, 29, 47, 21, 38, 12,
+ 16, 8, 0, 17, 25, 41, 5, 64, 32, 24,
+ 14, 16, 1, 15, 17, 23, 124, 77, 69, 55,
+ 65, 57, 55, 51, 47, 45, 45, 43, 37, 37,
+ 43, 47, 55, 13, 45, 25, 33, 31, 27, 21,
+ 15, 15, 9, 19, 11, 19, 9, 9, 16, 11,
+ 27, 5, 9, 17, 2, 1, 7, 11, 6, 3,
+ 17, 19, 3, 39, 11, 14, 7, 4, 11, 7,
+ 7, 8, 4, 11, 11, 21, 21, 13, 114, 108,
+ 112, 108, 104, 114, 114, 112, 104, 104, 112, 106,
+ 88, 68, 32, 100, 92, 88, 78, 66, 52, 34,
+ 48, 38, 26, 6, 3, 13, 31, 78, 72, 78,
+ 52, 20, 32, 22, 6, 26, 12, 2, 29, 9,
+ 27, 43, 24, 32, 1, 26, 34, 12, 0, 3,
+ 5, 29, 9, 23, 35, 83, 25, 33, 51, 14,
+ 15, 33, 21, 3, 4, 6, 18, 12, 26, 1,
+ 16, 14, 28, 38, 40, 44, 16, 110, 80, 66,
+ 54, 38, 34, 1, 17, 45, 6, 68, 56, 54,
+ 38, 36, 18, 14, 4, 7, 21, 3, 4, 6,
+ 18, 12, 26, 1, 16, 14, 28, 38, 40, 44,
+ 16, 110, 80, 66, 54, 38, 34, 1, 17, 45,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 19 */
+
+ 110, 14, 27, 110, 14, 27, 59, 25, 36, 22,
+ 8, 26, 68, 94, 48, 10, 5, 34, 33, 19,
+ 2, 1, 22, 19, 0, 60, 58, 1, 43, 95,
+ 51, 53, 15, 33, 19, 2, 43, 15, 54, 8,
+ 23, 43, 55, 31, 35, 49, 79, 1, 31, 53,
+ 9, 37, 45, 71, 10, 21, 17, 37, 44, 4,
+ 44, 0, 0, 0, 17, 53, 67, 0, 14, 3,
+ 42, 27, 79, 41, 13, 29, 18, 14, 21, 13,
+ 53, 8, 35, 21, 39, 59, 35, 45, 39, 32,
+ 3, 5, 39, 19, 53, 27, 55, 3, 25, 17,
+ 47, 30, 15, 4, 37, 35, 19, 25, 9, 2,
+ 2, 13, 10, 12, 15, 4, 5, 9, 6, 17,
+ 48, 7, 2, 42, 42, 58, 52, 36, 43, 7,
+ 11, 2, 7, 41, 9, 18, 11, 13, 10, 48,
+ 96, 58, 8, 43, 39, 1, 68, 78, 85, 9,
+ 6, 45, 18, 3, 1, 12, 66, 110, 88, 28,
+ 43, 65, 12, 58, 76, 73, 40, 42, 42, 42,
+ 46, 38, 26, 40, 42, 10, 24, 24, 10, 6,
+ 7, 20, 12, 10, 14, 6, 14, 22, 8, 33,
+ 8, 8, 11, 14, 17, 80, 98, 66, 50, 62,
+ 74, 92, 82, 48, 4, 15, 4, 16, 16, 71,
+ 12, 20, 25, 50, 20, 20, 22, 26, 16, 30,
+ 30, 13, 59, 17, 21, 9, 69, 45, 16, 24,
+ 20, 8, 9, 19, 29, 29, 57, 23, 46, 22,
+ 12, 3, 6, 11, 25, 33, 39, 13, 50, 24,
+ 14, 4, 10, 7, 17, 27, 45, 21, 42, 14,
+ 16, 8, 2, 17, 25, 39, 3, 66, 34, 26,
+ 14, 18, 1, 13, 17, 23, 124, 73, 67, 53,
+ 61, 55, 51, 49, 45, 41, 41, 39, 33, 37,
+ 41, 47, 53, 7, 45, 27, 33, 29, 25, 21,
+ 15, 15, 9, 17, 11, 21, 9, 9, 16, 11,
+ 27, 5, 9, 17, 0, 1, 7, 13, 4, 3,
+ 17, 19, 1, 41, 11, 16, 9, 4, 11, 7,
+ 5, 8, 2, 9, 9, 21, 21, 15, 112, 106,
+ 110, 104, 100, 110, 110, 106, 98, 98, 106, 100,
+ 82, 62, 28, 92, 86, 80, 68, 60, 48, 30,
+ 44, 32, 22, 4, 5, 15, 31, 74, 68, 74,
+ 48, 16, 28, 20, 2, 22, 8, 1, 33, 13,
+ 31, 45, 22, 30, 5, 24, 30, 10, 3, 7,
+ 9, 31, 11, 25, 35, 81, 27, 37, 53, 12,
+ 17, 35, 21, 1, 4, 6, 20, 14, 28, 1,
+ 18, 16, 30, 40, 42, 44, 18, 110, 76, 62,
+ 50, 34, 30, 5, 21, 47, 6, 70, 58, 56,
+ 38, 38, 20, 14, 6, 7, 21, 1, 4, 6,
+ 20, 14, 28, 1, 18, 16, 30, 40, 42, 44,
+ 18, 110, 76, 62, 50, 34, 30, 5, 21, 47,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 20 */
+
+ 106, 14, 27, 106, 14, 27, 57, 23, 36, 22,
+ 8, 22, 64, 92, 50, 10, 3, 30, 33, 17,
+ 0, 1, 24, 21, 3, 56, 54, 7, 49, 97,
+ 47, 51, 15, 33, 17, 0, 43, 13, 54, 6,
+ 23, 41, 51, 33, 39, 51, 79, 1, 29, 51,
+ 11, 37, 45, 71, 10, 21, 17, 37, 44, 4,
+ 44, 0, 0, 0, 15, 55, 67, 0, 12, 3,
+ 42, 25, 77, 37, 11, 27, 22, 18, 21, 11,
+ 49, 10, 33, 19, 33, 59, 37, 45, 41, 32,
+ 5, 3, 33, 17, 53, 27, 51, 3, 25, 17,
+ 47, 30, 15, 4, 35, 35, 19, 23, 9, 2,
+ 2, 11, 12, 12, 17, 2, 5, 9, 6, 17,
+ 48, 7, 2, 42, 42, 58, 52, 36, 45, 5,
+ 11, 0, 7, 39, 9, 20, 9, 13, 14, 52,
+ 98, 62, 12, 45, 41, 1, 70, 78, 87, 9,
+ 6, 47, 20, 3, 1, 14, 66, 110, 88, 32,
+ 45, 63, 10, 52, 70, 71, 38, 42, 42, 42,
+ 44, 38, 26, 40, 40, 8, 22, 22, 10, 6,
+ 7, 18, 12, 8, 14, 4, 12, 20, 6, 35,
+ 6, 6, 11, 10, 19, 76, 94, 62, 46, 60,
+ 70, 84, 78, 46, 0, 21, 0, 12, 12, 73,
+ 8, 18, 27, 48, 18, 18, 18, 22, 12, 26,
+ 28, 17, 59, 19, 23, 13, 67, 43, 18, 26,
+ 20, 8, 7, 17, 27, 29, 53, 19, 48, 22,
+ 12, 1, 10, 9, 23, 31, 33, 11, 50, 24,
+ 14, 4, 12, 5, 15, 27, 43, 21, 44, 16,
+ 16, 8, 4, 15, 25, 37, 1, 66, 36, 26,
+ 14, 20, 0, 11, 15, 21, 124, 71, 63, 49,
+ 59, 53, 49, 45, 41, 39, 39, 35, 27, 35,
+ 39, 45, 51, 1, 43, 29, 31, 27, 25, 21,
+ 15, 15, 11, 17, 13, 23, 9, 9, 18, 11,
+ 27, 5, 9, 15, 0, 3, 7, 15, 2, 5,
+ 17, 19, 0, 43, 13, 16, 9, 2, 11, 7,
+ 5, 6, 2, 9, 9, 21, 21, 19, 110, 104,
+ 108, 102, 94, 104, 104, 100, 94, 92, 98, 94,
+ 74, 58, 24, 84, 78, 72, 58, 54, 44, 26,
+ 40, 28, 18, 0, 7, 17, 31, 68, 62, 68,
+ 44, 12, 24, 16, 1, 18, 4, 5, 37, 19,
+ 33, 49, 20, 28, 11, 20, 28, 6, 7, 9,
+ 11, 33, 13, 27, 35, 79, 31, 39, 57, 8,
+ 19, 37, 21, 1, 6, 8, 22, 14, 28, 0,
+ 20, 18, 32, 40, 44, 46, 18, 108, 72, 58,
+ 46, 30, 26, 11, 25, 51, 8, 70, 58, 56,
+ 40, 38, 20, 16, 6, 5, 21, 1, 6, 8,
+ 22, 14, 28, 0, 20, 18, 32, 40, 44, 46,
+ 18, 108, 72, 58, 46, 30, 26, 11, 25, 51,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 21 */
+
+ 104, 14, 27, 104, 14, 27, 53, 19, 36, 22,
+ 6, 18, 62, 90, 52, 10, 0, 26, 31, 13,
+ 0, 1, 26, 23, 5, 54, 50, 13, 53, 97,
+ 43, 49, 15, 31, 13, 0, 43, 11, 54, 6,
+ 21, 39, 49, 35, 41, 51, 79, 1, 29, 49,
+ 11, 37, 45, 71, 10, 19, 17, 35, 46, 4,
+ 44, 0, 0, 0, 15, 55, 67, 2, 10, 3,
+ 40, 25, 73, 31, 9, 25, 24, 20, 19, 9,
+ 45, 12, 31, 17, 29, 61, 37, 45, 41, 32,
+ 5, 1, 29, 17, 51, 25, 47, 3, 25, 17,
+ 45, 30, 15, 6, 33, 33, 17, 23, 9, 4,
+ 4, 9, 12, 12, 17, 2, 3, 9, 6, 17,
+ 50, 7, 2, 44, 44, 58, 52, 36, 45, 5,
+ 9, 0, 7, 39, 9, 22, 9, 11, 16, 54,
+ 100, 64, 16, 45, 43, 0, 70, 78, 89, 9,
+ 8, 47, 22, 3, 1, 18, 66, 110, 88, 34,
+ 47, 59, 8, 48, 64, 69, 38, 42, 40, 40,
+ 44, 38, 24, 40, 40, 6, 20, 22, 10, 6,
+ 7, 18, 10, 6, 12, 4, 12, 18, 6, 37,
+ 4, 6, 11, 8, 19, 72, 90, 60, 44, 56,
+ 66, 78, 72, 44, 3, 25, 1, 8, 8, 75,
+ 6, 16, 31, 46, 16, 16, 16, 20, 8, 22,
+ 24, 19, 59, 21, 25, 17, 63, 39, 20, 26,
+ 20, 8, 5, 15, 25, 27, 49, 17, 50, 24,
+ 14, 0, 12, 7, 19, 29, 29, 9, 52, 26,
+ 16, 6, 14, 3, 15, 25, 41, 19, 46, 18,
+ 16, 8, 6, 15, 23, 35, 0, 66, 38, 26,
+ 14, 22, 2, 9, 15, 19, 124, 69, 61, 47,
+ 55, 51, 45, 43, 37, 35, 35, 31, 23, 33,
+ 37, 43, 49, 4, 43, 31, 31, 25, 23, 21,
+ 15, 15, 11, 15, 13, 25, 9, 9, 20, 11,
+ 27, 5, 9, 15, 1, 3, 7, 17, 0, 5,
+ 17, 19, 2, 43, 13, 18, 11, 2, 11, 7,
+ 5, 6, 0, 9, 7, 21, 21, 21, 108, 102,
+ 106, 98, 90, 100, 98, 94, 88, 86, 92, 88,
+ 68, 52, 20, 76, 72, 64, 48, 48, 40, 24,
+ 36, 22, 14, 1, 9, 19, 31, 64, 58, 64,
+ 40, 8, 20, 14, 5, 14, 0, 9, 41, 23,
+ 37, 51, 18, 26, 15, 18, 24, 4, 11, 11,
+ 13, 35, 15, 29, 35, 77, 33, 41, 59, 6,
+ 21, 39, 21, 0, 6, 10, 24, 16, 30, 0,
+ 22, 20, 34, 42, 46, 46, 20, 108, 68, 54,
+ 42, 26, 22, 15, 29, 53, 8, 72, 60, 58,
+ 42, 40, 22, 16, 6, 3, 21, 0, 6, 10,
+ 24, 16, 30, 0, 22, 20, 34, 42, 46, 46,
+ 20, 108, 68, 54, 42, 26, 22, 15, 29, 53,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 22 */
+
+ 102, 14, 29, 102, 14, 29, 49, 17, 38, 22,
+ 6, 16, 58, 88, 52, 12, 4, 24, 31, 11,
+ 0, 3, 28, 25, 7, 52, 48, 21, 57, 99,
+ 39, 45, 15, 31, 11, 0, 41, 9, 56, 6,
+ 19, 37, 47, 39, 43, 51, 81, 1, 29, 49,
+ 11, 39, 43, 71, 8, 19, 17, 33, 46, 4,
+ 44, 0, 0, 0, 13, 55, 67, 2, 10, 5,
+ 40, 23, 71, 27, 7, 23, 28, 24, 17, 7,
+ 41, 14, 29, 17, 25, 63, 37, 45, 41, 34,
+ 5, 0, 25, 17, 49, 23, 43, 3, 25, 17,
+ 45, 32, 17, 6, 33, 33, 15, 23, 7, 4,
+ 4, 7, 14, 12, 17, 2, 3, 9, 6, 17,
+ 50, 7, 2, 44, 44, 60, 54, 38, 47, 3,
+ 7, 1, 7, 39, 9, 24, 9, 11, 18, 56,
+ 100, 68, 18, 47, 45, 0, 70, 78, 89, 11,
+ 8, 49, 22, 5, 0, 20, 66, 110, 90, 36,
+ 49, 57, 6, 42, 58, 65, 38, 42, 40, 38,
+ 42, 36, 24, 38, 38, 6, 20, 22, 8, 6,
+ 9, 16, 8, 4, 12, 4, 12, 18, 4, 41,
+ 4, 4, 11, 6, 21, 66, 86, 56, 40, 52,
+ 62, 72, 68, 40, 7, 29, 5, 4, 4, 79,
+ 4, 12, 35, 42, 14, 14, 14, 16, 4, 18,
+ 20, 23, 59, 23, 27, 19, 61, 37, 24, 28,
+ 20, 8, 3, 15, 25, 25, 47, 15, 52, 26,
+ 16, 0, 14, 5, 17, 27, 25, 9, 52, 26,
+ 16, 6, 16, 3, 13, 23, 39, 19, 50, 20,
+ 16, 8, 6, 13, 23, 35, 0, 68, 38, 28,
+ 14, 24, 2, 9, 13, 19, 124, 65, 57, 43,
+ 51, 47, 43, 39, 35, 33, 31, 27, 19, 33,
+ 35, 43, 49, 10, 43, 33, 31, 25, 21, 21,
+ 15, 15, 11, 15, 13, 27, 9, 9, 20, 11,
+ 27, 5, 9, 15, 1, 3, 7, 19, 1, 5,
+ 17, 19, 4, 45, 13, 20, 13, 2, 11, 7,
+ 3, 6, 0, 7, 5, 21, 21, 25, 106, 100,
+ 104, 94, 86, 94, 94, 88, 84, 80, 86, 82,
+ 62, 46, 16, 70, 64, 56, 38, 44, 34, 20,
+ 32, 18, 10, 3, 11, 21, 31, 60, 54, 60,
+ 36, 4, 18, 10, 9, 10, 5, 13, 45, 27,
+ 41, 53, 16, 22, 19, 14, 20, 0, 15, 15,
+ 17, 37, 17, 31, 37, 75, 35, 45, 63, 4,
+ 25, 41, 19, 0, 8, 10, 26, 18, 32, 0,
+ 24, 22, 36, 42, 48, 48, 22, 106, 64, 50,
+ 38, 22, 18, 19, 33, 55, 10, 72, 60, 60,
+ 42, 42, 22, 18, 8, 3, 19, 0, 8, 10,
+ 26, 18, 32, 0, 24, 22, 36, 42, 48, 48,
+ 22, 106, 64, 50, 38, 22, 18, 19, 33, 55,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 23 */
+
+ 100, 14, 29, 100, 14, 29, 45, 13, 38, 22,
+ 4, 12, 56, 86, 54, 12, 6, 20, 31, 7,
+ 1, 3, 30, 27, 9, 50, 44, 27, 63, 101,
+ 35, 43, 15, 31, 7, 1, 41, 7, 56, 6,
+ 19, 35, 43, 41, 45, 51, 81, 1, 27, 47,
+ 13, 39, 43, 71, 8, 19, 17, 31, 48, 4,
+ 44, 0, 0, 0, 13, 55, 67, 4, 8, 5,
+ 38, 23, 69, 23, 5, 21, 30, 26, 15, 5,
+ 37, 16, 27, 15, 19, 63, 39, 45, 43, 34,
+ 5, 2, 19, 15, 49, 23, 39, 3, 25, 17,
+ 45, 32, 17, 8, 31, 33, 13, 21, 7, 4,
+ 6, 5, 14, 12, 19, 2, 3, 9, 6, 17,
+ 50, 7, 2, 46, 44, 60, 54, 38, 47, 3,
+ 7, 1, 7, 37, 9, 26, 7, 9, 22, 60,
+ 102, 70, 22, 47, 47, 2, 72, 78, 91, 11,
+ 10, 49, 24, 5, 0, 22, 66, 110, 90, 40,
+ 51, 53, 4, 36, 52, 63, 38, 42, 38, 38,
+ 42, 36, 22, 38, 38, 4, 18, 22, 8, 6,
+ 9, 16, 8, 2, 10, 4, 12, 16, 4, 43,
+ 2, 2, 11, 4, 21, 62, 82, 52, 38, 50,
+ 58, 66, 62, 38, 11, 33, 7, 0, 0, 81,
+ 2, 10, 37, 40, 12, 12, 10, 14, 0, 14,
+ 18, 25, 59, 25, 29, 23, 57, 33, 26, 28,
+ 20, 8, 1, 13, 23, 25, 43, 11, 54, 28,
+ 16, 2, 18, 3, 13, 25, 19, 7, 54, 28,
+ 18, 8, 18, 1, 13, 21, 37, 19, 52, 22,
+ 16, 8, 8, 13, 23, 33, 2, 68, 40, 28,
+ 14, 26, 4, 7, 13, 17, 124, 63, 55, 41,
+ 49, 45, 39, 37, 31, 29, 27, 23, 13, 31,
+ 33, 41, 47, 16, 41, 35, 29, 23, 19, 21,
+ 15, 15, 11, 13, 15, 29, 9, 9, 22, 11,
+ 27, 5, 9, 13, 3, 5, 7, 21, 3, 7,
+ 17, 19, 6, 47, 15, 22, 13, 0, 11, 7,
+ 3, 4, 1, 7, 5, 21, 21, 27, 104, 98,
+ 102, 92, 80, 90, 88, 82, 78, 74, 80, 76,
+ 56, 42, 12, 62, 58, 48, 28, 38, 30, 16,
+ 28, 12, 6, 5, 13, 23, 31, 56, 48, 54,
+ 32, 0, 14, 8, 13, 6, 9, 17, 49, 31,
+ 43, 57, 14, 20, 23, 12, 18, 1, 19, 17,
+ 19, 39, 19, 33, 37, 73, 39, 47, 65, 0,
+ 27, 43, 19, 2, 8, 12, 28, 20, 32, 2,
+ 26, 24, 38, 44, 50, 48, 22, 106, 60, 46,
+ 34, 18, 14, 25, 37, 57, 10, 74, 62, 60,
+ 44, 44, 24, 18, 8, 1, 19, 2, 8, 12,
+ 28, 20, 32, 2, 26, 24, 38, 44, 50, 48,
+ 22, 106, 60, 46, 34, 18, 14, 25, 37, 57,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 24 */
+
+ 96, 12, 29, 96, 12, 29, 43, 11, 38, 22,
+ 4, 8, 52, 84, 54, 12, 10, 16, 31, 5,
+ 1, 5, 32, 29, 13, 46, 40, 35, 67, 103,
+ 31, 41, 15, 31, 5, 1, 41, 7, 56, 4,
+ 17, 33, 41, 43, 49, 53, 83, 1, 27, 45,
+ 13, 41, 43, 71, 8, 19, 17, 31, 48, 4,
+ 44, 0, 0, 0, 11, 57, 67, 4, 6, 5,
+ 38, 21, 67, 19, 3, 21, 34, 30, 15, 3,
+ 33, 16, 25, 13, 15, 65, 39, 47, 43, 34,
+ 7, 2, 15, 15, 47, 21, 37, 3, 25, 17,
+ 45, 32, 17, 8, 29, 33, 13, 21, 7, 4,
+ 6, 3, 16, 12, 19, 0, 3, 9, 4, 17,
+ 50, 7, 2, 46, 44, 60, 54, 38, 49, 1,
+ 5, 3, 7, 37, 9, 28, 7, 9, 24, 62,
+ 104, 74, 26, 49, 49, 2, 72, 78, 93, 11,
+ 10, 51, 24, 5, 0, 24, 66, 110, 90, 42,
+ 53, 51, 2, 30, 44, 61, 36, 42, 38, 36,
+ 40, 34, 22, 38, 36, 2, 16, 20, 8, 4,
+ 11, 14, 6, 0, 10, 2, 10, 14, 2, 45,
+ 0, 0, 11, 0, 23, 58, 78, 48, 34, 46,
+ 52, 58, 58, 36, 15, 39, 11, 5, 3, 85,
+ 1, 8, 41, 38, 10, 10, 8, 10, 3, 10,
+ 14, 29, 59, 27, 31, 27, 55, 31, 28, 30,
+ 20, 8, 1, 11, 21, 23, 39, 9, 54, 28,
+ 18, 2, 20, 1, 11, 23, 15, 7, 54, 28,
+ 18, 8, 18, 1, 11, 21, 35, 19, 54, 24,
+ 16, 8, 10, 11, 23, 31, 4, 68, 42, 28,
+ 14, 28, 4, 5, 11, 17, 124, 61, 51, 37,
+ 45, 43, 37, 33, 29, 27, 25, 19, 9, 31,
+ 31, 41, 45, 22, 41, 37, 29, 21, 19, 21,
+ 15, 15, 13, 13, 15, 31, 9, 9, 22, 11,
+ 27, 7, 9, 13, 3, 5, 9, 23, 5, 7,
+ 19, 19, 8, 49, 15, 22, 15, 0, 11, 9,
+ 3, 4, 1, 7, 3, 21, 21, 31, 102, 96,
+ 100, 88, 76, 84, 82, 76, 74, 68, 72, 70,
+ 48, 36, 8, 54, 50, 40, 18, 32, 26, 12,
+ 22, 8, 2, 9, 17, 25, 31, 50, 44, 50,
+ 28, 5, 10, 4, 19, 2, 13, 21, 53, 37,
+ 47, 59, 10, 18, 29, 8, 14, 5, 23, 21,
+ 23, 41, 21, 35, 37, 71, 41, 51, 69, 1,
+ 29, 45, 19, 2, 10, 12, 30, 20, 34, 2,
+ 28, 24, 40, 44, 50, 50, 24, 104, 56, 42,
+ 30, 14, 10, 29, 41, 61, 12, 74, 62, 62,
+ 44, 44, 24, 20, 8, 1, 19, 2, 10, 12,
+ 30, 20, 34, 2, 28, 24, 40, 44, 50, 50,
+ 24, 104, 56, 42, 30, 14, 10, 29, 41, 61,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 25 */
+
+ 94, 12, 29, 94, 12, 29, 39, 9, 40, 22,
+ 4, 4, 48, 84, 56, 12, 14, 14, 29, 1,
+ 1, 5, 34, 29, 15, 44, 36, 41, 71, 103,
+ 27, 37, 13, 29, 1, 1, 39, 5, 58, 4,
+ 15, 31, 39, 45, 51, 53, 83, 1, 27, 43,
+ 13, 41, 43, 71, 8, 17, 15, 29, 48, 4,
+ 44, 0, 0, 0, 9, 57, 67, 4, 4, 5,
+ 38, 19, 63, 13, 1, 19, 38, 34, 13, 1,
+ 29, 18, 21, 11, 11, 67, 39, 47, 43, 36,
+ 7, 4, 11, 15, 45, 19, 33, 3, 23, 17,
+ 43, 34, 17, 10, 27, 31, 11, 21, 7, 6,
+ 8, 1, 18, 12, 19, 0, 1, 7, 4, 17,
+ 52, 7, 2, 46, 46, 62, 54, 40, 49, 0,
+ 3, 5, 7, 37, 9, 30, 7, 7, 26, 64,
+ 106, 78, 30, 51, 51, 4, 72, 80, 95, 11,
+ 10, 51, 26, 5, 0, 28, 66, 110, 90, 44,
+ 55, 49, 0, 26, 38, 57, 36, 42, 38, 34,
+ 40, 34, 22, 38, 36, 0, 14, 20, 8, 4,
+ 11, 14, 4, 0, 10, 2, 10, 12, 0, 47,
+ 0, 0, 11, 1, 23, 54, 74, 46, 32, 42,
+ 48, 52, 54, 34, 19, 43, 13, 9, 5, 87,
+ 3, 6, 45, 36, 8, 8, 6, 6, 5, 6,
+ 10, 31, 59, 27, 33, 29, 51, 27, 32, 32,
+ 20, 8, 0, 9, 19, 21, 35, 7, 56, 30,
+ 20, 4, 22, 0, 9, 21, 11, 5, 56, 30,
+ 20, 8, 20, 0, 9, 19, 31, 17, 58, 26,
+ 18, 8, 12, 9, 21, 29, 6, 70, 44, 30,
+ 16, 30, 6, 3, 9, 15, 124, 57, 47, 33,
+ 41, 41, 35, 29, 25, 23, 21, 15, 5, 29,
+ 29, 39, 43, 30, 41, 39, 29, 19, 17, 19,
+ 15, 15, 13, 13, 15, 33, 9, 9, 24, 11,
+ 27, 7, 9, 13, 3, 5, 9, 23, 7, 7,
+ 19, 17, 12, 49, 15, 24, 17, 0, 11, 9,
+ 1, 4, 1, 5, 1, 19, 19, 35, 100, 94,
+ 98, 84, 72, 78, 78, 70, 70, 64, 66, 66,
+ 42, 30, 4, 46, 44, 34, 8, 26, 22, 10,
+ 18, 4, 0, 11, 19, 27, 31, 46, 40, 46,
+ 24, 9, 6, 2, 23, 1, 17, 25, 57, 41,
+ 51, 61, 8, 16, 33, 6, 10, 7, 27, 23,
+ 25, 43, 23, 35, 37, 69, 43, 53, 73, 3,
+ 31, 47, 19, 4, 12, 14, 34, 22, 36, 2,
+ 30, 26, 42, 46, 52, 52, 26, 102, 54, 38,
+ 26, 10, 6, 33, 45, 63, 14, 74, 64, 64,
+ 46, 46, 26, 22, 10, 0, 19, 4, 12, 14,
+ 34, 22, 36, 2, 30, 26, 42, 46, 52, 52,
+ 26, 102, 54, 38, 26, 10, 6, 33, 45, 63,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 26 */
+
+ 92, 12, 29, 92, 12, 29, 35, 5, 40, 22,
+ 2, 0, 46, 82, 58, 12, 16, 10, 29, 0,
+ 3, 5, 36, 31, 17, 42, 32, 47, 77, 105,
+ 23, 35, 13, 29, 0, 3, 39, 3, 58, 4,
+ 15, 29, 35, 47, 53, 53, 83, 1, 25, 41,
+ 15, 41, 43, 71, 8, 17, 15, 27, 50, 4,
+ 44, 0, 0, 0, 9, 57, 67, 6, 2, 5,
+ 36, 19, 61, 9, 0, 17, 40, 36, 11, 0,
+ 25, 20, 19, 9, 5, 67, 41, 47, 45, 36,
+ 7, 6, 5, 13, 45, 19, 29, 3, 23, 17,
+ 43, 34, 17, 10, 25, 31, 9, 19, 7, 6,
+ 8, 0, 18, 12, 21, 0, 1, 7, 4, 17,
+ 52, 7, 2, 48, 46, 62, 54, 40, 51, 0,
+ 3, 5, 7, 35, 9, 32, 5, 7, 30, 68,
+ 108, 80, 34, 51, 53, 4, 74, 80, 97, 11,
+ 12, 53, 28, 5, 0, 30, 66, 110, 90, 48,
+ 57, 45, 1, 20, 32, 55, 36, 42, 36, 34,
+ 38, 34, 20, 38, 34, 1, 12, 20, 8, 4,
+ 11, 12, 4, 1, 8, 2, 10, 10, 0, 49,
+ 1, 1, 11, 3, 25, 50, 70, 42, 28, 40,
+ 44, 46, 48, 32, 23, 47, 17, 13, 9, 89,
+ 5, 4, 47, 34, 6, 6, 2, 4, 9, 2,
+ 8, 35, 59, 29, 35, 33, 49, 25, 34, 32,
+ 20, 8, 2, 7, 17, 21, 31, 3, 58, 32,
+ 20, 6, 26, 2, 5, 19, 5, 3, 56, 30,
+ 20, 10, 22, 2, 9, 17, 29, 17, 60, 28,
+ 18, 8, 14, 9, 21, 27, 8, 70, 46, 30,
+ 16, 32, 8, 1, 9, 13, 124, 55, 45, 31,
+ 39, 39, 31, 27, 21, 21, 17, 11, 0, 27,
+ 27, 37, 41, 36, 39, 41, 27, 17, 15, 19,
+ 15, 15, 13, 11, 17, 35, 9, 9, 26, 11,
+ 27, 7, 9, 11, 5, 7, 9, 25, 9, 9,
+ 19, 17, 14, 51, 17, 26, 17, 1, 11, 9,
+ 1, 2, 3, 5, 1, 19, 19, 37, 98, 92,
+ 96, 82, 66, 74, 72, 64, 64, 58, 60, 60,
+ 36, 26, 0, 38, 36, 26, 1, 20, 18, 6,
+ 14, 1, 3, 13, 21, 29, 31, 42, 34, 40,
+ 20, 13, 2, 1, 27, 5, 21, 29, 61, 45,
+ 53, 65, 6, 14, 37, 2, 8, 11, 31, 25,
+ 27, 45, 25, 37, 37, 67, 47, 55, 75, 7,
+ 33, 49, 19, 4, 12, 16, 36, 24, 36, 4,
+ 32, 28, 44, 46, 54, 52, 26, 102, 50, 34,
+ 22, 6, 2, 39, 49, 65, 14, 76, 64, 64,
+ 48, 48, 26, 22, 10, 2, 19, 4, 12, 16,
+ 36, 24, 36, 4, 32, 28, 44, 46, 54, 52,
+ 26, 102, 50, 34, 22, 6, 2, 39, 49, 65,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 27 */
+
+ 90, 12, 31, 90, 12, 31, 31, 3, 42, 22,
+ 2, 1, 42, 80, 58, 14, 20, 8, 29, 4,
+ 3, 7, 38, 33, 19, 40, 30, 55, 81, 107,
+ 19, 31, 13, 29, 4, 3, 37, 1, 60, 4,
+ 13, 27, 33, 51, 55, 53, 85, 1, 25, 41,
+ 15, 43, 41, 71, 6, 17, 15, 25, 50, 4,
+ 44, 0, 0, 0, 7, 57, 67, 6, 2, 7,
+ 36, 17, 59, 5, 2, 15, 44, 40, 9, 2,
+ 21, 22, 17, 9, 1, 69, 41, 47, 45, 38,
+ 7, 8, 1, 13, 43, 17, 25, 3, 23, 17,
+ 43, 36, 19, 12, 25, 31, 7, 19, 5, 6,
+ 10, 2, 20, 12, 21, 0, 1, 7, 4, 17,
+ 52, 7, 2, 48, 46, 64, 56, 42, 51, 2,
+ 1, 7, 7, 35, 9, 34, 5, 5, 32, 70,
+ 108, 84, 36, 53, 55, 6, 74, 80, 97, 13,
+ 12, 53, 28, 7, 2, 32, 66, 110, 92, 50,
+ 59, 43, 3, 14, 26, 51, 36, 42, 36, 32,
+ 38, 32, 20, 36, 34, 1, 12, 20, 6, 4,
+ 13, 12, 2, 3, 8, 2, 10, 10, 1, 53,
+ 1, 3, 11, 5, 25, 44, 66, 38, 26, 36,
+ 40, 40, 44, 28, 27, 51, 19, 17, 13, 93,
+ 7, 0, 51, 30, 4, 4, 0, 0, 13, 1,
+ 4, 37, 59, 31, 37, 35, 45, 21, 38, 34,
+ 20, 8, 4, 7, 17, 19, 29, 1, 60, 34,
+ 22, 6, 28, 4, 3, 17, 1, 3, 58, 32,
+ 22, 10, 24, 2, 7, 15, 27, 17, 64, 30,
+ 18, 8, 14, 7, 21, 27, 8, 72, 46, 32,
+ 16, 34, 8, 1, 7, 13, 124, 51, 41, 27,
+ 35, 35, 29, 23, 19, 17, 13, 7, 4, 27,
+ 25, 37, 41, 42, 39, 43, 27, 17, 13, 19,
+ 15, 15, 13, 11, 17, 37, 9, 9, 26, 11,
+ 27, 7, 9, 11, 5, 7, 9, 27, 11, 9,
+ 19, 17, 16, 53, 17, 28, 19, 1, 11, 9,
+ 0, 2, 3, 3, 0, 19, 19, 41, 96, 90,
+ 94, 78, 62, 68, 68, 58, 60, 52, 54, 54,
+ 30, 20, 3, 32, 30, 18, 11, 16, 12, 2,
+ 10, 5, 7, 15, 23, 31, 31, 38, 30, 36,
+ 16, 17, 0, 3, 31, 9, 27, 33, 65, 49,
+ 57, 67, 4, 10, 41, 0, 4, 13, 35, 29,
+ 31, 47, 27, 39, 39, 65, 49, 59, 79, 9,
+ 37, 51, 17, 6, 14, 16, 38, 26, 38, 4,
+ 34, 30, 46, 48, 56, 54, 28, 100, 46, 30,
+ 18, 2, 1, 43, 53, 67, 16, 76, 66, 66,
+ 48, 50, 28, 24, 12, 2, 17, 6, 14, 16,
+ 38, 26, 38, 4, 34, 30, 46, 48, 56, 54,
+ 28, 100, 46, 30, 18, 2, 1, 43, 53, 67,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 28 */
+
+ 86, 12, 31, 86, 12, 31, 29, 0, 42, 22,
+ 0, 5, 40, 78, 60, 14, 24, 4, 29, 6,
+ 3, 7, 40, 35, 23, 36, 26, 61, 85, 109,
+ 15, 29, 13, 29, 6, 3, 37, 0, 60, 2,
+ 11, 25, 31, 53, 59, 55, 85, 1, 25, 39,
+ 15, 43, 41, 71, 6, 17, 15, 25, 52, 4,
+ 44, 0, 0, 0, 7, 59, 67, 8, 0, 7,
+ 34, 17, 57, 1, 4, 13, 46, 42, 9, 4,
+ 17, 24, 15, 7, 2, 71, 41, 47, 45, 38,
+ 9, 10, 2, 13, 41, 15, 21, 3, 23, 17,
+ 43, 36, 19, 12, 23, 31, 7, 19, 5, 6,
+ 10, 4, 20, 12, 21, 1, 1, 7, 4, 17,
+ 52, 7, 2, 50, 46, 64, 56, 42, 53, 2,
+ 0, 7, 7, 35, 9, 36, 5, 5, 34, 72,
+ 110, 86, 40, 53, 57, 6, 74, 80, 99, 13,
+ 14, 55, 30, 7, 2, 34, 66, 110, 92, 52,
+ 61, 39, 5, 8, 20, 49, 34, 42, 34, 30,
+ 36, 32, 18, 36, 32, 3, 10, 18, 6, 4,
+ 13, 10, 0, 5, 6, 0, 8, 8, 1, 55,
+ 3, 5, 11, 9, 27, 40, 62, 34, 22, 32,
+ 36, 32, 38, 26, 31, 57, 23, 21, 17, 95,
+ 11, 1, 55, 28, 2, 2, 1, 1, 17, 5,
+ 0, 41, 59, 33, 39, 39, 43, 19, 40, 34,
+ 20, 8, 6, 5, 15, 17, 25, 0, 62, 34,
+ 24, 8, 30, 6, 0, 15, 2, 1, 58, 32,
+ 22, 12, 26, 4, 7, 15, 25, 17, 66, 32,
+ 18, 8, 16, 7, 21, 25, 10, 72, 48, 32,
+ 16, 36, 10, 0, 7, 11, 124, 49, 39, 25,
+ 31, 33, 25, 21, 15, 15, 11, 3, 8, 25,
+ 23, 35, 39, 48, 39, 45, 27, 15, 13, 19,
+ 15, 15, 15, 9, 17, 39, 9, 9, 28, 11,
+ 27, 7, 9, 11, 7, 7, 9, 29, 13, 9,
+ 19, 17, 18, 55, 17, 28, 21, 1, 11, 9,
+ 0, 2, 5, 3, 2, 19, 19, 43, 94, 88,
+ 92, 74, 58, 64, 62, 52, 54, 46, 46, 48,
+ 22, 14, 7, 24, 22, 10, 21, 10, 8, 1,
+ 6, 11, 11, 19, 25, 33, 31, 32, 26, 32,
+ 12, 21, 3, 7, 35, 13, 31, 37, 69, 55,
+ 61, 69, 2, 8, 47, 3, 0, 17, 39, 31,
+ 33, 49, 29, 41, 39, 63, 51, 61, 81, 11,
+ 39, 53, 17, 6, 14, 18, 40, 26, 40, 4,
+ 36, 32, 48, 48, 58, 54, 30, 100, 42, 26,
+ 14, 1, 5, 47, 57, 71, 16, 78, 66, 68,
+ 50, 50, 28, 24, 12, 4, 17, 6, 14, 18,
+ 40, 26, 40, 4, 36, 32, 48, 48, 58, 54,
+ 30, 100, 42, 26, 14, 1, 5, 47, 57, 71,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 29 */
+
+ 84, 12, 31, 84, 12, 31, 25, 2, 42, 22,
+ 0, 9, 36, 76, 62, 14, 26, 0, 27, 10,
+ 5, 7, 42, 37, 25, 34, 22, 67, 91, 109,
+ 11, 27, 13, 27, 10, 5, 37, 2, 60, 2,
+ 11, 23, 27, 55, 61, 55, 85, 1, 23, 37,
+ 17, 43, 41, 71, 6, 15, 15, 23, 52, 4,
+ 44, 0, 0, 0, 5, 59, 67, 8, 1, 7,
+ 34, 15, 53, 4, 6, 11, 50, 46, 7, 6,
+ 13, 26, 13, 5, 8, 71, 43, 47, 47, 38,
+ 9, 12, 8, 11, 41, 15, 17, 3, 23, 17,
+ 41, 36, 19, 14, 21, 29, 5, 17, 5, 8,
+ 12, 6, 22, 12, 23, 1, 0, 7, 4, 17,
+ 54, 7, 2, 50, 48, 64, 56, 42, 53, 4,
+ 0, 9, 7, 33, 9, 38, 3, 3, 38, 76,
+ 112, 90, 44, 55, 59, 8, 76, 80, 101, 13,
+ 14, 55, 32, 7, 2, 38, 66, 110, 92, 56,
+ 63, 37, 7, 4, 14, 47, 34, 42, 34, 30,
+ 36, 32, 18, 36, 32, 5, 8, 18, 6, 4,
+ 13, 10, 0, 7, 6, 0, 8, 6, 3, 57,
+ 5, 5, 11, 11, 27, 36, 58, 32, 20, 30,
+ 32, 26, 34, 24, 35, 61, 25, 25, 21, 97,
+ 13, 3, 57, 26, 0, 0, 5, 5, 21, 9,
+ 1, 43, 59, 35, 41, 43, 39, 15, 42, 36,
+ 20, 8, 8, 3, 13, 17, 21, 4, 64, 36,
+ 24, 10, 34, 8, 2, 13, 8, 0, 60, 34,
+ 24, 12, 28, 6, 5, 13, 23, 15, 68, 34,
+ 18, 8, 18, 5, 19, 23, 12, 72, 50, 32,
+ 16, 38, 12, 2, 5, 9, 124, 47, 35, 21,
+ 29, 31, 23, 17, 11, 11, 7, 0, 14, 23,
+ 21, 33, 37, 54, 37, 47, 25, 13, 11, 19,
+ 15, 15, 15, 9, 19, 41, 9, 9, 30, 11,
+ 27, 7, 9, 9, 7, 9, 9, 31, 15, 11,
+ 19, 17, 20, 55, 19, 30, 21, 3, 11, 9,
+ 0, 0, 5, 3, 2, 19, 19, 47, 92, 86,
+ 90, 72, 52, 58, 56, 46, 50, 40, 40, 42,
+ 16, 10, 11, 16, 16, 2, 31, 4, 4, 3,
+ 2, 15, 15, 21, 27, 35, 31, 28, 20, 26,
+ 8, 25, 7, 9, 39, 17, 35, 41, 73, 59,
+ 63, 73, 0, 6, 51, 5, 1, 19, 43, 33,
+ 35, 51, 31, 43, 39, 61, 55, 63, 85, 15,
+ 41, 55, 17, 8, 16, 20, 42, 28, 40, 6,
+ 38, 34, 50, 50, 60, 56, 30, 98, 38, 22,
+ 10, 5, 9, 53, 61, 73, 18, 78, 68, 68,
+ 52, 52, 30, 26, 12, 6, 17, 8, 16, 20,
+ 42, 28, 40, 6, 38, 34, 50, 50, 60, 56,
+ 30, 98, 38, 22, 10, 5, 9, 53, 61, 73,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 30 */
+
+ 82, 12, 31, 82, 12, 31, 21, 6, 44, 22,
+ 1, 13, 34, 74, 62, 14, 30, 1, 27, 12,
+ 5, 9, 44, 39, 27, 32, 18, 75, 95, 111,
+ 7, 23, 13, 27, 12, 5, 35, 4, 62, 2,
+ 9, 21, 25, 57, 63, 55, 87, 1, 23, 35,
+ 17, 45, 41, 71, 6, 15, 15, 21, 54, 4,
+ 44, 0, 0, 0, 5, 59, 67, 10, 3, 7,
+ 32, 15, 51, 8, 8, 9, 52, 48, 5, 8,
+ 9, 28, 11, 3, 12, 73, 43, 47, 47, 40,
+ 9, 14, 12, 11, 39, 13, 13, 3, 23, 17,
+ 41, 38, 19, 14, 19, 29, 3, 17, 5, 8,
+ 12, 8, 22, 12, 23, 1, 0, 7, 4, 17,
+ 54, 7, 2, 52, 48, 66, 56, 44, 55, 4,
+ 2, 9, 7, 33, 9, 40, 3, 3, 40, 78,
+ 114, 92, 48, 55, 61, 8, 76, 80, 103, 13,
+ 16, 57, 32, 7, 2, 40, 66, 110, 92, 58,
+ 65, 33, 9, 1, 8, 43, 34, 42, 32, 28,
+ 34, 30, 16, 36, 30, 7, 6, 18, 6, 4,
+ 15, 8, 1, 9, 4, 0, 8, 4, 3, 59,
+ 5, 7, 11, 13, 29, 32, 54, 28, 16, 26,
+ 28, 20, 28, 22, 39, 65, 29, 29, 25, 101,
+ 15, 5, 61, 24, 1, 1, 7, 7, 25, 13,
+ 5, 47, 59, 37, 43, 45, 37, 13, 46, 36,
+ 20, 8, 10, 1, 11, 15, 17, 6, 66, 38,
+ 26, 10, 36, 10, 6, 11, 12, 0, 60, 34,
+ 24, 14, 30, 6, 5, 11, 21, 15, 72, 36,
+ 18, 8, 20, 5, 19, 21, 14, 74, 52, 34,
+ 16, 40, 12, 4, 5, 9, 124, 43, 33, 19,
+ 25, 29, 19, 15, 9, 9, 3, 4, 18, 23,
+ 19, 33, 35, 60, 37, 49, 25, 11, 9, 19,
+ 15, 15, 15, 7, 19, 43, 9, 9, 30, 11,
+ 27, 7, 9, 9, 9, 9, 9, 33, 17, 11,
+ 19, 17, 22, 57, 19, 32, 23, 3, 11, 9,
+ 2, 0, 7, 1, 4, 19, 19, 49, 90, 84,
+ 88, 68, 48, 54, 52, 40, 44, 34, 34, 36,
+ 10, 4, 15, 8, 8, 5, 41, 1, 0, 7,
+ 1, 21, 19, 23, 29, 37, 31, 24, 16, 22,
+ 4, 29, 11, 13, 43, 21, 39, 45, 77, 63,
+ 67, 75, 1, 4, 55, 9, 5, 23, 47, 37,
+ 39, 53, 33, 45, 39, 59, 57, 67, 87, 17,
+ 43, 57, 17, 8, 16, 20, 44, 30, 42, 6,
+ 40, 36, 52, 50, 62, 56, 32, 98, 34, 18,
+ 6, 9, 13, 57, 65, 75, 18, 80, 68, 70,
+ 52, 54, 30, 26, 14, 6, 17, 8, 16, 20,
+ 44, 30, 42, 6, 40, 36, 52, 50, 62, 56,
+ 32, 98, 34, 18, 6, 9, 13, 57, 65, 75,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 31 */
+
+ 80, 12, 31, 80, 12, 31, 17, 8, 44, 22,
+ 1, 17, 30, 72, 64, 14, 34, 5, 27, 16,
+ 5, 9, 46, 41, 29, 30, 14, 81, 99, 113,
+ 3, 21, 13, 27, 16, 5, 35, 6, 62, 2,
+ 7, 19, 23, 59, 65, 55, 87, 1, 23, 33,
+ 17, 45, 41, 71, 6, 15, 15, 19, 54, 4,
+ 44, 0, 0, 0, 3, 59, 67, 10, 5, 7,
+ 32, 13, 49, 12, 10, 7, 56, 52, 3, 10,
+ 5, 30, 9, 1, 16, 75, 43, 47, 47, 40,
+ 9, 16, 16, 11, 37, 11, 9, 3, 23, 17,
+ 41, 38, 19, 16, 17, 29, 1, 17, 5, 8,
+ 14, 10, 24, 12, 23, 1, 0, 7, 4, 17,
+ 54, 7, 2, 52, 48, 66, 56, 44, 55, 6,
+ 4, 11, 7, 33, 9, 42, 3, 1, 42, 80,
+ 116, 96, 52, 57, 63, 10, 76, 80, 105, 13,
+ 16, 57, 34, 7, 2, 42, 66, 110, 92, 60,
+ 67, 31, 11, 7, 2, 41, 34, 42, 32, 26,
+ 34, 30, 16, 36, 30, 9, 4, 18, 6, 4,
+ 15, 8, 3, 11, 4, 0, 8, 2, 5, 61,
+ 7, 9, 11, 15, 29, 28, 50, 24, 14, 22,
+ 24, 14, 24, 20, 43, 69, 31, 33, 29, 103,
+ 17, 7, 65, 22, 3, 3, 9, 11, 29, 17,
+ 9, 49, 59, 39, 45, 49, 33, 9, 48, 38,
+ 20, 8, 12, 0, 9, 13, 13, 8, 68, 40,
+ 28, 12, 38, 12, 8, 9, 16, 2, 62, 36,
+ 26, 14, 32, 8, 3, 9, 19, 15, 74, 38,
+ 18, 8, 22, 3, 19, 19, 16, 74, 54, 34,
+ 16, 42, 14, 6, 3, 7, 124, 41, 29, 15,
+ 21, 27, 17, 11, 5, 5, 0, 8, 22, 21,
+ 17, 31, 33, 66, 37, 51, 25, 9, 7, 19,
+ 15, 15, 15, 7, 19, 45, 9, 9, 32, 11,
+ 27, 7, 9, 9, 9, 9, 9, 35, 19, 11,
+ 19, 17, 24, 59, 19, 34, 25, 3, 11, 9,
+ 2, 0, 7, 1, 6, 19, 19, 53, 88, 82,
+ 86, 64, 44, 48, 46, 34, 40, 28, 28, 30,
+ 4, 1, 19, 0, 2, 13, 51, 7, 3, 11,
+ 5, 25, 23, 25, 31, 39, 31, 20, 12, 18,
+ 0, 33, 15, 15, 47, 25, 43, 49, 81, 67,
+ 71, 77, 3, 2, 59, 11, 9, 25, 51, 39,
+ 41, 55, 35, 47, 39, 57, 59, 69, 91, 19,
+ 45, 59, 17, 10, 18, 22, 46, 32, 44, 6,
+ 42, 38, 54, 52, 64, 58, 34, 96, 30, 14,
+ 2, 13, 17, 61, 69, 77, 20, 80, 70, 72,
+ 54, 56, 32, 28, 14, 8, 17, 10, 18, 22,
+ 46, 32, 44, 6, 42, 38, 54, 52, 64, 58,
+ 34, 96, 30, 14, 2, 13, 17, 61, 69, 77,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 32 */
+
+ 76, 10, 33, 76, 10, 33, 15, 10, 44, 22,
+ 3, 21, 26, 70, 64, 14, 36, 9, 27, 18,
+ 7, 11, 48, 43, 33, 26, 10, 89, 105, 115,
+ 0, 19, 13, 27, 18, 7, 35, 6, 62, 0,
+ 7, 19, 21, 63, 69, 57, 89, 3, 23, 33,
+ 19, 47, 41, 71, 4, 15, 15, 19, 54, 2,
+ 44, 0, 0, 0, 3, 61, 67, 10, 7, 9,
+ 30, 13, 47, 16, 12, 7, 58, 54, 3, 12,
+ 3, 30, 7, 1, 20, 77, 45, 49, 49, 40,
+ 11, 16, 20, 11, 37, 11, 7, 3, 23, 17,
+ 41, 38, 21, 16, 17, 29, 1, 17, 5, 8,
+ 14, 12, 24, 12, 25, 3, 0, 7, 2, 17,
+ 54, 7, 2, 52, 48, 66, 56, 44, 57, 6,
+ 4, 13, 7, 33, 11, 42, 3, 1, 44, 82,
+ 116, 98, 54, 59, 67, 10, 76, 80, 107, 15,
+ 16, 59, 34, 9, 2, 44, 66, 108, 92, 62,
+ 69, 29, 15, 13, 5, 39, 32, 42, 30, 24,
+ 32, 28, 14, 34, 28, 11, 2, 16, 4, 2,
+ 17, 6, 5, 13, 2, 1, 6, 0, 7, 65,
+ 9, 11, 11, 19, 31, 22, 44, 20, 10, 18,
+ 18, 6, 18, 16, 47, 75, 35, 39, 33, 107,
+ 21, 11, 69, 18, 5, 5, 13, 15, 33, 21,
+ 13, 53, 59, 41, 47, 53, 31, 7, 50, 38,
+ 20, 8, 12, 0, 9, 13, 11, 10, 68, 40,
+ 28, 12, 40, 14, 10, 9, 20, 2, 62, 36,
+ 26, 14, 32, 8, 3, 9, 17, 15, 76, 40,
+ 18, 6, 22, 3, 19, 19, 16, 74, 54, 34,
+ 16, 44, 14, 6, 3, 7, 124, 39, 27, 13,
+ 19, 25, 15, 9, 3, 3, 2, 10, 26, 21,
+ 17, 31, 33, 72, 37, 55, 25, 9, 7, 19,
+ 15, 15, 17, 7, 21, 47, 11, 9, 32, 13,
+ 27, 9, 9, 9, 11, 11, 11, 37, 21, 13,
+ 21, 17, 26, 61, 21, 34, 27, 5, 11, 11,
+ 2, 1, 9, 1, 6, 19, 19, 57, 84, 80,
+ 82, 60, 38, 42, 40, 28, 34, 22, 20, 24,
+ 3, 7, 23, 7, 5, 21, 63, 13, 9, 15,
+ 11, 31, 27, 29, 35, 41, 31, 14, 6, 12,
+ 3, 39, 19, 19, 53, 29, 49, 53, 87, 73,
+ 75, 81, 7, 1, 65, 15, 13, 29, 55, 43,
+ 45, 57, 37, 49, 41, 55, 63, 73, 95, 23,
+ 49, 63, 17, 10, 18, 22, 48, 32, 44, 6,
+ 44, 38, 56, 52, 64, 58, 34, 94, 26, 10,
+ 3, 19, 21, 67, 73, 81, 20, 80, 70, 72,
+ 54, 56, 32, 28, 14, 8, 17, 10, 18, 22,
+ 48, 32, 44, 6, 44, 38, 56, 52, 64, 58,
+ 34, 94, 26, 10, 3, 19, 21, 67, 73, 81,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 33 */
+
+ 74, 10, 33, 74, 10, 33, 11, 14, 46, 24,
+ 3, 23, 24, 70, 66, 16, 40, 11, 25, 22,
+ 7, 11, 52, 43, 35, 24, 8, 95, 109, 115,
+ 4, 15, 11, 25, 22, 7, 33, 8, 64, 0,
+ 5, 17, 17, 65, 71, 57, 89, 3, 21, 31,
+ 19, 47, 39, 69, 4, 13, 13, 17, 56, 2,
+ 44, 0, 0, 0, 1, 61, 67, 12, 7, 9,
+ 30, 11, 43, 22, 16, 5, 62, 58, 1, 16,
+ 0, 32, 3, 0, 26, 77, 45, 49, 49, 42,
+ 11, 18, 26, 9, 35, 9, 3, 3, 21, 17,
+ 39, 40, 21, 18, 15, 27, 0, 15, 3, 10,
+ 16, 14, 26, 14, 25, 3, 2, 5, 2, 15,
+ 56, 5, 2, 54, 50, 68, 58, 46, 57, 8,
+ 6, 13, 7, 31, 11, 44, 1, 0, 48, 86,
+ 118, 102, 58, 59, 69, 12, 78, 82, 107, 15,
+ 18, 59, 36, 9, 4, 48, 66, 108, 94, 66,
+ 71, 25, 17, 17, 11, 35, 32, 42, 30, 24,
+ 32, 28, 14, 34, 28, 11, 2, 16, 4, 2,
+ 17, 6, 5, 13, 2, 1, 6, 0, 7, 67,
+ 9, 11, 11, 21, 31, 18, 40, 18, 8, 16,
+ 14, 0, 14, 14, 49, 79, 37, 43, 35, 109,
+ 23, 13, 71, 16, 5, 5, 15, 17, 35, 23,
+ 15, 55, 59, 41, 47, 55, 27, 3, 54, 40,
+ 20, 8, 14, 2, 7, 11, 7, 14, 70, 42,
+ 30, 14, 44, 16, 14, 7, 26, 4, 64, 38,
+ 28, 16, 34, 10, 1, 7, 13, 13, 80, 42,
+ 20, 6, 24, 1, 17, 17, 18, 76, 56, 36,
+ 18, 46, 16, 8, 1, 5, 124, 35, 23, 9,
+ 15, 21, 11, 5, 0, 0, 6, 14, 32, 19,
+ 15, 29, 31, 80, 35, 57, 23, 7, 5, 17,
+ 15, 13, 17, 5, 21, 47, 11, 9, 34, 13,
+ 27, 9, 9, 7, 11, 11, 11, 37, 21, 13,
+ 21, 15, 30, 61, 21, 36, 27, 5, 11, 11,
+ 4, 1, 9, 0, 8, 17, 17, 59, 82, 78,
+ 80, 58, 34, 38, 36, 22, 30, 18, 14, 20,
+ 9, 11, 27, 13, 11, 27, 73, 17, 13, 17,
+ 15, 35, 29, 31, 37, 41, 31, 10, 2, 8,
+ 7, 43, 21, 21, 57, 31, 53, 55, 91, 77,
+ 77, 83, 9, 3, 69, 17, 15, 31, 57, 45,
+ 47, 59, 37, 49, 41, 53, 65, 75, 97, 25,
+ 51, 65, 15, 12, 20, 24, 52, 34, 46, 8,
+ 48, 40, 58, 54, 66, 60, 36, 94, 24, 8,
+ 7, 23, 23, 71, 75, 83, 22, 82, 72, 74,
+ 56, 58, 34, 30, 16, 10, 15, 12, 20, 24,
+ 52, 34, 46, 8, 48, 40, 58, 54, 66, 60,
+ 36, 94, 24, 8, 7, 23, 23, 71, 75, 83,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 34 */
+
+ 72, 10, 33, 72, 10, 33, 7, 16, 46, 24,
+ 3, 27, 20, 68, 68, 16, 44, 15, 25, 24,
+ 7, 11, 54, 45, 37, 22, 4, 101, 113, 117,
+ 8, 13, 11, 25, 24, 7, 33, 10, 64, 0,
+ 3, 15, 15, 67, 73, 57, 89, 3, 21, 29,
+ 19, 47, 39, 69, 4, 13, 13, 15, 56, 2,
+ 44, 0, 0, 0, 0, 61, 67, 12, 9, 9,
+ 30, 9, 41, 26, 18, 3, 66, 62, 0, 18,
+ 4, 34, 1, 2, 30, 79, 45, 49, 49, 42,
+ 11, 20, 30, 9, 33, 7, 0, 3, 21, 17,
+ 39, 40, 21, 18, 13, 27, 2, 15, 3, 10,
+ 16, 16, 28, 14, 25, 3, 2, 5, 2, 15,
+ 56, 5, 2, 54, 50, 68, 58, 46, 59, 10,
+ 8, 15, 7, 31, 11, 46, 1, 0, 50, 88,
+ 120, 106, 62, 61, 71, 12, 78, 82, 109, 15,
+ 18, 61, 38, 9, 4, 50, 66, 108, 94, 68,
+ 73, 23, 19, 23, 17, 33, 32, 42, 30, 22,
+ 30, 28, 14, 34, 26, 13, 0, 16, 4, 2,
+ 17, 4, 7, 15, 2, 1, 6, 1, 9, 69,
+ 11, 13, 11, 23, 33, 14, 36, 14, 4, 12,
+ 10, 5, 10, 12, 53, 83, 41, 47, 39, 111,
+ 25, 15, 75, 14, 7, 7, 17, 21, 39, 27,
+ 19, 59, 59, 43, 49, 59, 25, 1, 56, 42,
+ 20, 8, 16, 4, 5, 9, 3, 16, 72, 44,
+ 32, 16, 46, 18, 16, 5, 30, 6, 64, 38,
+ 28, 16, 36, 12, 0, 5, 11, 13, 82, 44,
+ 20, 6, 26, 0, 17, 15, 20, 76, 58, 36,
+ 18, 48, 18, 10, 0, 3, 124, 33, 19, 5,
+ 11, 19, 9, 1, 4, 2, 10, 18, 36, 17,
+ 13, 27, 29, 86, 35, 59, 23, 5, 3, 17,
+ 15, 13, 17, 5, 21, 49, 11, 9, 36, 13,
+ 27, 9, 9, 7, 11, 11, 11, 39, 23, 13,
+ 21, 15, 32, 63, 21, 38, 29, 5, 11, 11,
+ 4, 1, 9, 0, 10, 17, 17, 63, 80, 76,
+ 78, 54, 30, 32, 30, 16, 26, 12, 8, 14,
+ 15, 17, 31, 21, 19, 35, 83, 23, 17, 21,
+ 19, 39, 33, 33, 39, 43, 31, 6, 1, 4,
+ 11, 47, 25, 25, 61, 35, 57, 59, 95, 81,
+ 81, 85, 11, 5, 73, 21, 19, 35, 61, 47,
+ 49, 61, 39, 51, 41, 51, 67, 77, 101, 27,
+ 53, 67, 15, 12, 22, 26, 54, 36, 48, 8,
+ 50, 42, 60, 54, 68, 62, 38, 92, 20, 4,
+ 11, 27, 27, 75, 79, 85, 24, 82, 72, 76,
+ 58, 60, 34, 32, 16, 12, 15, 12, 22, 26,
+ 54, 36, 48, 8, 50, 42, 60, 54, 68, 62,
+ 38, 92, 20, 4, 11, 27, 27, 75, 79, 85,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 35 */
+
+ 70, 10, 33, 70, 10, 33, 3, 20, 48, 24,
+ 5, 31, 18, 66, 68, 16, 48, 17, 25, 28,
+ 7, 13, 56, 47, 39, 20, 0, 109, 117, 119,
+ 12, 9, 11, 25, 28, 7, 31, 12, 66, 0,
+ 1, 13, 13, 69, 75, 57, 91, 3, 21, 27,
+ 19, 49, 39, 69, 4, 13, 13, 13, 58, 2,
+ 44, 0, 0, 0, 0, 61, 67, 14, 11, 9,
+ 28, 9, 39, 30, 20, 1, 68, 64, 2, 20,
+ 8, 36, 0, 4, 34, 81, 45, 49, 49, 44,
+ 11, 22, 34, 9, 31, 5, 4, 3, 21, 17,
+ 39, 42, 21, 20, 11, 27, 4, 15, 3, 10,
+ 18, 18, 28, 14, 25, 3, 2, 5, 2, 15,
+ 56, 5, 2, 56, 50, 70, 58, 48, 59, 10,
+ 10, 15, 7, 31, 11, 48, 1, 2, 52, 90,
+ 122, 108, 66, 61, 73, 14, 78, 82, 111, 15,
+ 20, 61, 38, 9, 4, 52, 66, 108, 94, 70,
+ 75, 19, 21, 29, 23, 29, 32, 42, 28, 20,
+ 30, 26, 12, 34, 26, 15, 1, 16, 4, 2,
+ 19, 4, 9, 17, 0, 1, 6, 3, 9, 71,
+ 11, 15, 11, 25, 33, 10, 32, 10, 2, 8,
+ 6, 11, 4, 10, 57, 87, 43, 51, 43, 115,
+ 27, 17, 79, 12, 9, 9, 19, 23, 43, 31,
+ 23, 61, 59, 45, 51, 61, 21, 2, 60, 42,
+ 20, 8, 18, 6, 3, 7, 0, 18, 74, 46,
+ 34, 16, 48, 20, 20, 3, 34, 6, 66, 40,
+ 30, 18, 38, 12, 0, 3, 9, 13, 86, 46,
+ 20, 6, 28, 0, 17, 13, 22, 78, 60, 38,
+ 18, 50, 18, 12, 0, 3, 124, 29, 17, 3,
+ 7, 17, 5, 0, 6, 6, 14, 22, 40, 17,
+ 11, 27, 27, 92, 35, 61, 23, 3, 1, 17,
+ 15, 13, 17, 3, 21, 51, 11, 9, 36, 13,
+ 27, 9, 9, 7, 13, 11, 11, 41, 25, 13,
+ 21, 15, 34, 65, 21, 40, 31, 5, 11, 11,
+ 6, 1, 11, 2, 12, 17, 17, 65, 78, 74,
+ 76, 50, 26, 28, 26, 10, 20, 6, 2, 8,
+ 21, 23, 35, 29, 25, 43, 93, 29, 21, 25,
+ 23, 45, 37, 35, 41, 45, 31, 2, 5, 0,
+ 15, 51, 29, 27, 65, 39, 61, 63, 99, 85,
+ 85, 87, 13, 7, 77, 23, 23, 37, 65, 51,
+ 53, 63, 41, 53, 41, 49, 69, 81, 103, 29,
+ 55, 69, 15, 14, 22, 26, 56, 38, 50, 8,
+ 52, 44, 62, 56, 70, 62, 40, 92, 16, 0,
+ 15, 31, 31, 79, 83, 87, 24, 84, 74, 78,
+ 58, 62, 36, 32, 18, 12, 15, 14, 22, 26,
+ 56, 38, 50, 8, 52, 44, 62, 56, 70, 62,
+ 40, 92, 16, 0, 15, 31, 31, 79, 83, 87,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 36 */
+
+ 66, 10, 33, 66, 10, 33, 1, 22, 48, 24,
+ 5, 35, 14, 64, 70, 16, 50, 21, 25, 30,
+ 9, 13, 58, 49, 43, 16, 3, 115, 123, 121,
+ 16, 7, 11, 25, 30, 9, 31, 14, 66, 1,
+ 1, 11, 9, 71, 79, 59, 91, 3, 19, 25,
+ 21, 49, 39, 69, 4, 13, 13, 13, 58, 2,
+ 44, 0, 0, 0, 2, 63, 67, 14, 13, 9,
+ 28, 7, 37, 34, 22, 0, 72, 68, 2, 22,
+ 12, 38, 2, 6, 40, 81, 47, 49, 51, 44,
+ 13, 24, 40, 7, 31, 5, 8, 3, 21, 17,
+ 39, 42, 21, 20, 9, 27, 4, 13, 3, 10,
+ 18, 20, 30, 14, 27, 5, 2, 5, 2, 15,
+ 56, 5, 2, 56, 50, 70, 58, 48, 61, 12,
+ 10, 17, 7, 29, 11, 50, 0, 2, 56, 94,
+ 124, 112, 70, 63, 75, 14, 80, 82, 113, 15,
+ 20, 63, 40, 9, 4, 54, 66, 108, 94, 74,
+ 77, 17, 23, 35, 29, 27, 30, 42, 28, 20,
+ 28, 26, 12, 34, 24, 17, 3, 14, 4, 2,
+ 19, 2, 9, 19, 0, 3, 4, 5, 11, 73,
+ 13, 17, 11, 29, 35, 6, 28, 6, 1, 6,
+ 2, 19, 0, 8, 61, 93, 47, 55, 47, 117,
+ 31, 19, 81, 10, 11, 11, 23, 27, 47, 35,
+ 25, 65, 59, 47, 53, 65, 19, 4, 62, 44,
+ 20, 8, 20, 8, 1, 7, 4, 22, 76, 46,
+ 34, 18, 52, 22, 22, 1, 40, 8, 66, 40,
+ 30, 18, 40, 14, 2, 3, 7, 13, 88, 48,
+ 20, 6, 30, 2, 17, 11, 24, 78, 62, 38,
+ 18, 52, 20, 14, 2, 1, 124, 27, 13, 0,
+ 5, 15, 3, 4, 10, 8, 16, 26, 46, 15,
+ 9, 25, 25, 98, 33, 63, 21, 1, 1, 17,
+ 15, 13, 19, 3, 23, 53, 11, 9, 38, 13,
+ 27, 9, 9, 5, 13, 13, 11, 43, 27, 15,
+ 21, 15, 36, 67, 23, 40, 31, 7, 11, 11,
+ 6, 3, 11, 2, 12, 17, 17, 69, 76, 72,
+ 74, 48, 20, 22, 20, 4, 16, 0, 5, 2,
+ 29, 27, 39, 37, 33, 51, 103, 35, 25, 29,
+ 27, 49, 41, 39, 43, 47, 31, 3, 11, 5,
+ 19, 55, 33, 31, 69, 43, 65, 67, 103, 91,
+ 87, 91, 15, 9, 83, 27, 25, 41, 69, 53,
+ 55, 65, 43, 55, 41, 47, 73, 83, 107, 33,
+ 57, 71, 15, 14, 24, 28, 58, 38, 50, 10,
+ 54, 46, 64, 56, 72, 64, 40, 90, 12, 3,
+ 19, 35, 35, 85, 87, 91, 26, 84, 74, 78,
+ 60, 62, 36, 34, 18, 14, 15, 14, 24, 28,
+ 58, 38, 50, 10, 54, 46, 64, 56, 72, 64,
+ 40, 90, 12, 3, 19, 35, 35, 85, 87, 91,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 37 */
+
+ 64, 10, 33, 64, 10, 33, 2, 26, 48, 24,
+ 7, 39, 12, 62, 72, 16, 54, 25, 23, 34,
+ 9, 13, 60, 51, 45, 14, 7, 121, 125, 121,
+ 20, 5, 11, 23, 34, 9, 31, 16, 66, 1,
+ 0, 9, 7, 73, 81, 59, 91, 3, 19, 23,
+ 21, 49, 39, 69, 4, 11, 13, 11, 60, 2,
+ 44, 0, 0, 0, 2, 63, 67, 16, 15, 9,
+ 26, 7, 33, 40, 24, 2, 74, 70, 4, 24,
+ 16, 40, 4, 8, 44, 83, 47, 49, 51, 44,
+ 13, 26, 44, 7, 29, 3, 12, 3, 21, 17,
+ 37, 42, 21, 22, 7, 25, 6, 13, 3, 12,
+ 20, 22, 30, 14, 27, 5, 4, 5, 2, 15,
+ 58, 5, 2, 58, 52, 70, 58, 48, 61, 12,
+ 12, 17, 7, 29, 11, 52, 0, 4, 58, 96,
+ 124, 114, 74, 63, 77, 16, 80, 82, 115, 15,
+ 22, 63, 42, 9, 4, 58, 66, 108, 94, 76,
+ 79, 13, 25, 39, 35, 25, 30, 42, 26, 18,
+ 28, 26, 10, 34, 24, 19, 5, 14, 4, 2,
+ 19, 2, 11, 21, 1, 3, 4, 7, 11, 75,
+ 15, 17, 11, 31, 35, 2, 24, 4, 3, 2,
+ 1, 25, 5, 6, 65, 97, 49, 59, 51, 119,
+ 33, 21, 85, 8, 13, 13, 25, 29, 51, 39,
+ 29, 67, 59, 49, 55, 69, 15, 8, 64, 44,
+ 20, 8, 22, 10, 0, 5, 8, 24, 78, 48,
+ 36, 20, 54, 24, 26, 0, 44, 10, 68, 42,
+ 32, 20, 42, 16, 2, 1, 5, 11, 90, 50,
+ 20, 6, 32, 2, 15, 9, 26, 78, 64, 38,
+ 18, 54, 22, 16, 2, 0, 124, 25, 11, 2,
+ 1, 13, 0, 6, 14, 12, 20, 30, 50, 13,
+ 7, 23, 23, 104, 33, 65, 21, 0, 0, 17,
+ 15, 13, 19, 1, 23, 55, 11, 9, 40, 13,
+ 27, 9, 9, 5, 15, 13, 11, 45, 29, 15,
+ 21, 15, 38, 67, 23, 42, 33, 7, 11, 11,
+ 6, 3, 13, 2, 14, 17, 17, 71, 74, 70,
+ 72, 44, 16, 18, 14, 1, 10, 5, 11, 3,
+ 35, 33, 43, 45, 39, 59, 113, 41, 29, 31,
+ 31, 55, 45, 41, 45, 49, 31, 7, 15, 9,
+ 23, 59, 37, 33, 73, 47, 69, 71, 107, 95,
+ 91, 93, 17, 11, 87, 29, 29, 43, 73, 55,
+ 57, 67, 45, 57, 41, 45, 75, 85, 109, 35,
+ 59, 73, 15, 16, 24, 30, 60, 40, 52, 10,
+ 56, 48, 66, 58, 74, 64, 42, 90, 8, 7,
+ 23, 39, 39, 89, 91, 93, 26, 86, 76, 80,
+ 62, 64, 38, 34, 18, 16, 15, 16, 24, 30,
+ 60, 40, 52, 10, 56, 48, 66, 58, 74, 64,
+ 42, 90, 8, 7, 23, 39, 39, 89, 91, 93,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 38 */
+
+ 62, 10, 35, 62, 10, 35, 6, 28, 50, 24,
+ 7, 41, 8, 60, 72, 18, 58, 27, 23, 36,
+ 9, 15, 62, 53, 47, 12, 9, 125, 125, 123,
+ 24, 1, 11, 23, 36, 9, 29, 18, 68, 1,
+ 2, 7, 5, 77, 83, 59, 93, 3, 19, 23,
+ 21, 51, 37, 69, 2, 11, 13, 9, 60, 2,
+ 44, 0, 0, 0, 4, 63, 67, 16, 15, 11,
+ 26, 5, 31, 44, 26, 4, 78, 74, 6, 26,
+ 20, 42, 6, 8, 48, 85, 47, 49, 51, 46,
+ 13, 28, 48, 7, 27, 1, 16, 3, 21, 17,
+ 37, 44, 23, 22, 7, 25, 8, 13, 1, 12,
+ 20, 24, 32, 14, 27, 5, 4, 5, 2, 15,
+ 58, 5, 2, 58, 52, 72, 60, 50, 63, 14,
+ 14, 19, 7, 29, 11, 54, 0, 4, 60, 98,
+ 124, 118, 76, 65, 79, 16, 80, 82, 115, 17,
+ 22, 65, 42, 11, 6, 60, 66, 108, 96, 78,
+ 81, 11, 27, 45, 41, 21, 30, 42, 26, 16,
+ 26, 24, 10, 32, 22, 19, 5, 14, 2, 2,
+ 21, 0, 13, 23, 1, 3, 4, 7, 13, 79,
+ 15, 19, 11, 33, 37, 3, 20, 0, 7, 1,
+ 5, 31, 9, 2, 69, 101, 53, 63, 55, 123,
+ 35, 25, 89, 4, 15, 15, 27, 33, 55, 43,
+ 33, 71, 59, 51, 57, 71, 13, 10, 68, 46,
+ 20, 8, 24, 10, 0, 3, 10, 26, 80, 50,
+ 38, 20, 56, 26, 28, 2, 48, 10, 68, 42,
+ 32, 20, 44, 16, 4, 0, 3, 11, 94, 52,
+ 20, 6, 32, 4, 15, 9, 26, 80, 64, 40,
+ 18, 56, 22, 16, 4, 0, 124, 21, 7, 6,
+ 2, 9, 2, 10, 16, 14, 24, 34, 54, 13,
+ 5, 23, 23, 110, 33, 67, 21, 0, 2, 17,
+ 15, 13, 19, 1, 23, 57, 11, 9, 40, 13,
+ 27, 9, 9, 5, 15, 13, 11, 47, 31, 15,
+ 21, 15, 40, 69, 23, 44, 35, 7, 11, 11,
+ 8, 3, 13, 4, 16, 17, 17, 75, 72, 68,
+ 70, 40, 12, 12, 10, 7, 6, 11, 17, 9,
+ 41, 39, 47, 51, 47, 67, 123, 45, 35, 35,
+ 35, 59, 49, 43, 47, 51, 31, 11, 19, 13,
+ 27, 63, 39, 37, 77, 51, 75, 75, 111, 99,
+ 95, 95, 19, 15, 91, 33, 33, 47, 77, 59,
+ 61, 69, 47, 59, 43, 43, 77, 89, 113, 37,
+ 63, 75, 13, 16, 26, 30, 62, 42, 54, 10,
+ 58, 50, 68, 58, 76, 66, 44, 88, 4, 11,
+ 27, 43, 43, 93, 95, 95, 28, 86, 76, 82,
+ 62, 66, 38, 36, 20, 16, 13, 16, 26, 30,
+ 62, 42, 54, 10, 58, 50, 68, 58, 76, 66,
+ 44, 88, 4, 11, 27, 43, 43, 93, 95, 95,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 39 */
+
+ 60, 10, 35, 60, 10, 35, 10, 32, 50, 24,
+ 9, 45, 6, 58, 74, 18, 60, 31, 23, 40,
+ 11, 15, 64, 55, 49, 10, 13, 125, 125, 125,
+ 28, 0, 11, 23, 40, 11, 29, 20, 68, 1,
+ 2, 5, 1, 79, 85, 59, 93, 3, 17, 21,
+ 23, 51, 37, 69, 2, 11, 13, 7, 62, 2,
+ 44, 0, 0, 0, 4, 63, 67, 18, 17, 11,
+ 24, 5, 29, 48, 28, 6, 80, 76, 8, 28,
+ 24, 44, 8, 10, 54, 85, 49, 49, 53, 46,
+ 13, 30, 54, 5, 27, 1, 20, 3, 21, 17,
+ 37, 44, 23, 24, 5, 25, 10, 11, 1, 12,
+ 22, 26, 32, 14, 29, 5, 4, 5, 2, 15,
+ 58, 5, 2, 60, 52, 72, 60, 50, 63, 14,
+ 14, 19, 7, 27, 11, 56, 2, 6, 64, 102,
+ 124, 120, 80, 65, 81, 18, 82, 82, 117, 17,
+ 24, 65, 44, 11, 6, 62, 66, 108, 96, 82,
+ 83, 7, 29, 51, 47, 19, 30, 42, 24, 16,
+ 26, 24, 8, 32, 22, 21, 7, 14, 2, 2,
+ 21, 0, 13, 25, 3, 3, 4, 9, 13, 81,
+ 17, 21, 11, 35, 37, 7, 16, 3, 9, 3,
+ 9, 37, 15, 0, 73, 105, 55, 67, 59, 125,
+ 37, 27, 91, 2, 17, 17, 31, 35, 59, 47,
+ 35, 73, 59, 53, 59, 75, 9, 14, 70, 46,
+ 20, 8, 26, 12, 2, 3, 14, 30, 82, 52,
+ 38, 22, 60, 28, 32, 4, 54, 12, 70, 44,
+ 34, 22, 46, 18, 4, 2, 1, 11, 96, 54,
+ 20, 6, 34, 4, 15, 7, 28, 80, 66, 40,
+ 18, 58, 24, 18, 4, 2, 124, 19, 5, 8,
+ 4, 7, 6, 12, 20, 18, 28, 38, 60, 11,
+ 3, 21, 21, 116, 31, 69, 19, 2, 4, 17,
+ 15, 13, 19, 0, 25, 59, 11, 9, 42, 13,
+ 27, 9, 9, 3, 17, 15, 11, 49, 33, 17,
+ 21, 15, 42, 71, 25, 46, 35, 9, 11, 11,
+ 8, 5, 15, 4, 16, 17, 17, 77, 70, 66,
+ 68, 38, 6, 8, 4, 13, 0, 17, 23, 15,
+ 47, 43, 51, 59, 53, 75, 125, 51, 39, 39,
+ 39, 65, 53, 45, 49, 53, 31, 15, 25, 19,
+ 31, 67, 43, 39, 81, 55, 79, 79, 115, 103,
+ 97, 99, 21, 17, 95, 35, 35, 49, 81, 61,
+ 63, 71, 49, 61, 43, 41, 81, 91, 115, 41,
+ 65, 77, 13, 18, 26, 32, 64, 44, 54, 12,
+ 60, 52, 70, 60, 78, 66, 44, 88, 0, 15,
+ 31, 47, 47, 99, 99, 97, 28, 88, 78, 82,
+ 64, 68, 40, 36, 20, 18, 13, 18, 26, 32,
+ 64, 44, 54, 12, 60, 52, 70, 60, 78, 66,
+ 44, 88, 0, 15, 31, 47, 47, 99, 99, 97,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 40 */
+
+ 56, 8, 35, 56, 8, 35, 12, 34, 50, 24,
+ 9, 49, 2, 56, 74, 18, 64, 35, 23, 42,
+ 11, 17, 66, 57, 53, 6, 17, 125, 125, 125,
+ 32, 2, 11, 23, 42, 11, 29, 20, 68, 3,
+ 4, 3, 0, 81, 89, 61, 95, 3, 17, 19,
+ 23, 53, 37, 69, 2, 11, 13, 7, 62, 2,
+ 44, 0, 0, 0, 6, 65, 67, 18, 19, 11,
+ 24, 3, 27, 52, 30, 6, 84, 80, 8, 30,
+ 28, 44, 10, 12, 58, 87, 49, 51, 53, 46,
+ 15, 30, 58, 5, 25, 0, 22, 3, 21, 17,
+ 37, 44, 23, 24, 3, 25, 10, 11, 1, 12,
+ 22, 28, 34, 14, 29, 7, 4, 5, 0, 15,
+ 58, 5, 2, 60, 52, 72, 60, 50, 65, 16,
+ 16, 21, 7, 27, 11, 58, 2, 6, 66, 104,
+ 124, 124, 84, 67, 83, 18, 82, 82, 119, 17,
+ 24, 67, 44, 11, 6, 64, 66, 108, 96, 84,
+ 85, 5, 31, 57, 55, 17, 28, 42, 24, 14,
+ 24, 22, 8, 32, 20, 23, 9, 12, 2, 0,
+ 23, 1, 15, 27, 3, 5, 2, 11, 15, 83,
+ 19, 23, 11, 39, 39, 11, 12, 7, 13, 7,
+ 15, 45, 19, 1, 77, 111, 59, 73, 63, 125,
+ 41, 29, 95, 0, 19, 19, 33, 39, 63, 51,
+ 39, 77, 59, 55, 61, 79, 7, 16, 72, 48,
+ 20, 8, 26, 14, 4, 1, 18, 32, 82, 52,
+ 40, 22, 62, 30, 34, 6, 58, 12, 70, 44,
+ 34, 22, 46, 18, 6, 2, 0, 11, 98, 56,
+ 20, 6, 36, 6, 15, 5, 30, 80, 68, 40,
+ 18, 60, 24, 20, 6, 2, 124, 17, 1, 12,
+ 8, 5, 8, 16, 22, 20, 30, 42, 64, 11,
+ 1, 21, 19, 122, 31, 71, 19, 4, 4, 17,
+ 15, 13, 21, 0, 25, 61, 11, 9, 42, 13,
+ 27, 11, 9, 3, 17, 15, 13, 51, 35, 17,
+ 23, 15, 44, 73, 25, 46, 37, 9, 11, 13,
+ 8, 5, 15, 4, 18, 17, 17, 81, 68, 64,
+ 66, 34, 2, 2, 1, 19, 3, 23, 31, 21,
+ 55, 49, 55, 67, 61, 83, 125, 57, 43, 43,
+ 45, 69, 57, 49, 53, 55, 31, 21, 29, 23,
+ 35, 73, 47, 43, 87, 59, 83, 83, 119, 109,
+ 101, 101, 25, 19, 101, 39, 39, 53, 85, 65,
+ 67, 73, 51, 63, 43, 39, 83, 95, 119, 43,
+ 67, 79, 13, 18, 28, 32, 66, 44, 56, 12,
+ 62, 52, 72, 60, 78, 68, 46, 86, 3, 19,
+ 35, 51, 51, 103, 103, 101, 30, 88, 78, 84,
+ 64, 68, 40, 38, 20, 18, 13, 18, 28, 32,
+ 66, 44, 56, 12, 62, 52, 72, 60, 78, 68,
+ 46, 86, 3, 19, 35, 51, 51, 103, 103, 101,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 41 */
+
+ 54, 8, 35, 54, 8, 35, 16, 36, 52, 24,
+ 9, 53, 1, 56, 76, 18, 68, 37, 21, 46,
+ 11, 17, 68, 57, 55, 4, 21, 125, 125, 125,
+ 36, 6, 9, 21, 46, 11, 27, 22, 70, 3,
+ 6, 1, 2, 83, 91, 61, 95, 3, 17, 17,
+ 23, 53, 37, 69, 2, 9, 11, 5, 62, 2,
+ 44, 0, 0, 0, 8, 65, 67, 18, 21, 11,
+ 24, 1, 23, 58, 32, 8, 88, 84, 10, 32,
+ 32, 46, 14, 14, 62, 89, 49, 51, 53, 48,
+ 15, 32, 62, 5, 23, 2, 26, 3, 19, 17,
+ 35, 46, 23, 26, 1, 23, 12, 11, 1, 14,
+ 24, 30, 36, 14, 29, 7, 6, 3, 0, 15,
+ 60, 5, 2, 60, 54, 74, 60, 52, 65, 18,
+ 18, 23, 7, 27, 11, 60, 2, 8, 68, 106,
+ 124, 124, 88, 69, 85, 20, 82, 84, 121, 17,
+ 24, 67, 46, 11, 6, 68, 66, 108, 96, 86,
+ 87, 3, 33, 61, 61, 13, 28, 42, 24, 12,
+ 24, 22, 8, 32, 20, 25, 11, 12, 2, 0,
+ 23, 1, 17, 27, 3, 5, 2, 13, 17, 85,
+ 19, 23, 11, 41, 39, 15, 8, 9, 15, 11,
+ 19, 51, 23, 3, 81, 115, 61, 77, 65, 125,
+ 43, 31, 99, 1, 21, 21, 35, 43, 65, 55,
+ 43, 79, 59, 55, 63, 81, 3, 20, 76, 50,
+ 20, 8, 28, 16, 6, 0, 22, 34, 84, 54,
+ 42, 24, 64, 32, 36, 8, 62, 14, 72, 46,
+ 36, 22, 48, 20, 8, 4, 4, 9, 102, 58,
+ 22, 6, 38, 8, 13, 3, 32, 82, 70, 42,
+ 20, 62, 26, 22, 8, 4, 124, 13, 2, 16,
+ 12, 3, 10, 20, 26, 24, 34, 46, 68, 9,
+ 0, 19, 17, 124, 31, 73, 19, 6, 6, 15,
+ 15, 13, 21, 0, 25, 63, 11, 9, 44, 13,
+ 27, 11, 9, 3, 17, 15, 13, 51, 37, 17,
+ 23, 13, 48, 73, 25, 48, 39, 9, 11, 13,
+ 10, 5, 15, 6, 20, 15, 15, 85, 66, 62,
+ 64, 30, 1, 3, 5, 25, 7, 27, 37, 25,
+ 61, 55, 59, 75, 67, 89, 125, 63, 47, 45,
+ 49, 73, 59, 51, 55, 57, 31, 25, 33, 27,
+ 39, 77, 51, 45, 91, 63, 87, 87, 123, 113,
+ 105, 103, 27, 21, 105, 41, 43, 55, 89, 67,
+ 69, 75, 53, 63, 43, 37, 85, 97, 123, 45,
+ 69, 81, 13, 20, 30, 34, 70, 46, 58, 12,
+ 64, 54, 74, 62, 80, 70, 48, 84, 5, 23,
+ 39, 55, 55, 107, 107, 103, 32, 88, 80, 86,
+ 66, 70, 42, 40, 22, 20, 13, 20, 30, 34,
+ 70, 46, 58, 12, 64, 54, 74, 62, 80, 70,
+ 48, 84, 5, 23, 39, 55, 55, 107, 107, 103,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 42 */
+
+ 52, 8, 35, 52, 8, 35, 20, 40, 52, 24,
+ 11, 57, 3, 54, 78, 18, 70, 41, 21, 48,
+ 13, 17, 70, 59, 57, 2, 25, 125, 125, 125,
+ 40, 8, 9, 21, 48, 13, 27, 24, 70, 3,
+ 6, 0, 6, 85, 93, 61, 95, 3, 15, 15,
+ 25, 53, 37, 69, 2, 9, 11, 3, 64, 2,
+ 44, 0, 0, 0, 8, 65, 67, 20, 23, 11,
+ 22, 1, 21, 62, 34, 10, 90, 86, 12, 34,
+ 36, 48, 16, 16, 68, 89, 51, 51, 55, 48,
+ 15, 34, 68, 3, 23, 2, 30, 3, 19, 17,
+ 35, 46, 23, 26, 0, 23, 14, 9, 1, 14,
+ 24, 32, 36, 14, 31, 7, 6, 3, 0, 15,
+ 60, 5, 2, 62, 54, 74, 60, 52, 67, 18,
+ 18, 23, 7, 25, 11, 62, 4, 8, 72, 110,
+ 124, 124, 92, 69, 87, 20, 84, 84, 123, 17,
+ 26, 69, 48, 11, 6, 70, 66, 108, 96, 90,
+ 89, 0, 35, 67, 67, 11, 28, 42, 22, 12,
+ 22, 22, 6, 32, 18, 27, 13, 12, 2, 0,
+ 23, 3, 17, 29, 5, 5, 2, 15, 17, 87,
+ 21, 25, 11, 43, 41, 19, 4, 13, 19, 13,
+ 23, 57, 29, 5, 85, 119, 65, 81, 69, 125,
+ 45, 33, 101, 3, 23, 23, 39, 45, 69, 59,
+ 45, 83, 59, 57, 65, 85, 1, 22, 78, 50,
+ 20, 8, 30, 18, 8, 0, 26, 38, 86, 56,
+ 42, 26, 68, 34, 40, 10, 68, 16, 72, 46,
+ 36, 24, 50, 22, 8, 6, 6, 9, 104, 60,
+ 22, 6, 40, 8, 13, 1, 34, 82, 72, 42,
+ 20, 64, 28, 24, 8, 6, 124, 11, 4, 18,
+ 14, 1, 14, 22, 30, 26, 38, 50, 74, 7,
+ 2, 17, 15, 124, 29, 75, 17, 8, 8, 15,
+ 15, 13, 21, 2, 27, 65, 11, 9, 46, 13,
+ 27, 11, 9, 1, 19, 17, 13, 53, 39, 19,
+ 23, 13, 50, 75, 27, 50, 39, 11, 11, 13,
+ 10, 7, 17, 6, 20, 15, 15, 87, 64, 60,
+ 62, 28, 7, 7, 11, 31, 13, 33, 43, 31,
+ 67, 59, 63, 83, 75, 97, 125, 69, 51, 49,
+ 53, 79, 63, 53, 57, 59, 31, 29, 39, 33,
+ 43, 81, 55, 49, 95, 67, 91, 91, 125, 117,
+ 107, 107, 29, 23, 109, 45, 45, 59, 93, 69,
+ 71, 77, 55, 65, 43, 35, 89, 99, 125, 49,
+ 71, 83, 13, 20, 30, 36, 72, 48, 58, 14,
+ 66, 56, 76, 62, 82, 70, 48, 84, 9, 27,
+ 43, 59, 59, 113, 111, 105, 32, 90, 80, 86,
+ 68, 72, 42, 40, 22, 22, 13, 20, 30, 36,
+ 72, 48, 58, 14, 66, 56, 76, 62, 82, 70,
+ 48, 84, 9, 27, 43, 59, 59, 113, 111, 105,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 43 */
+
+ 50, 8, 37, 50, 8, 37, 24, 42, 54, 24,
+ 11, 59, 7, 52, 78, 20, 74, 43, 21, 52,
+ 13, 19, 72, 61, 59, 0, 27, 125, 125, 125,
+ 44, 12, 9, 21, 52, 13, 25, 26, 72, 3,
+ 8, 2, 8, 89, 95, 61, 97, 3, 15, 15,
+ 25, 55, 35, 69, 0, 9, 11, 1, 64, 2,
+ 44, 0, 0, 0, 10, 65, 67, 20, 23, 13,
+ 22, 0, 19, 66, 36, 12, 94, 90, 14, 36,
+ 40, 50, 18, 16, 72, 91, 51, 51, 55, 50,
+ 15, 36, 72, 3, 21, 4, 34, 3, 19, 17,
+ 35, 48, 25, 28, 0, 23, 16, 9, 0, 14,
+ 26, 34, 38, 14, 31, 7, 6, 3, 0, 15,
+ 60, 5, 2, 62, 54, 76, 62, 54, 67, 20,
+ 20, 25, 7, 25, 11, 64, 4, 10, 74, 112,
+ 124, 124, 94, 71, 89, 22, 84, 84, 123, 19,
+ 26, 69, 48, 13, 8, 72, 66, 108, 98, 92,
+ 91, 2, 37, 73, 73, 7, 28, 42, 22, 10,
+ 22, 20, 6, 30, 18, 27, 13, 12, 0, 0,
+ 25, 3, 19, 31, 5, 5, 2, 15, 19, 91,
+ 21, 27, 11, 45, 41, 25, 0, 17, 21, 17,
+ 27, 63, 33, 9, 89, 123, 67, 85, 73, 125,
+ 47, 37, 105, 7, 25, 25, 41, 49, 73, 63,
+ 49, 85, 59, 59, 67, 87, 2, 26, 82, 52,
+ 20, 8, 32, 18, 8, 2, 28, 40, 88, 58,
+ 44, 26, 70, 36, 42, 12, 72, 16, 74, 48,
+ 38, 24, 52, 22, 10, 8, 8, 9, 108, 62,
+ 22, 6, 40, 10, 13, 1, 34, 84, 72, 44,
+ 20, 66, 28, 24, 10, 6, 124, 7, 8, 22,
+ 18, 2, 16, 26, 32, 30, 42, 54, 78, 7,
+ 4, 17, 15, 124, 29, 77, 17, 8, 10, 15,
+ 15, 13, 21, 2, 27, 67, 11, 9, 46, 13,
+ 27, 11, 9, 1, 19, 17, 13, 55, 41, 19,
+ 23, 13, 52, 77, 27, 52, 41, 11, 11, 13,
+ 12, 7, 17, 8, 22, 15, 15, 91, 62, 58,
+ 60, 24, 11, 13, 15, 37, 17, 39, 49, 37,
+ 73, 65, 67, 89, 81, 105, 125, 73, 57, 53,
+ 57, 83, 67, 55, 59, 61, 31, 33, 43, 37,
+ 47, 85, 57, 51, 99, 71, 97, 95, 125, 121,
+ 111, 109, 31, 27, 113, 47, 49, 61, 97, 73,
+ 75, 79, 57, 67, 45, 33, 91, 103, 125, 51,
+ 75, 85, 11, 22, 32, 36, 74, 50, 60, 14,
+ 68, 58, 78, 64, 84, 72, 50, 82, 13, 31,
+ 47, 63, 63, 117, 115, 107, 34, 90, 82, 88,
+ 68, 74, 44, 42, 24, 22, 11, 22, 32, 36,
+ 74, 50, 60, 14, 68, 58, 78, 64, 84, 72,
+ 50, 82, 13, 31, 47, 63, 63, 117, 115, 107,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 44 */
+
+ 46, 8, 37, 46, 8, 37, 26, 46, 54, 24,
+ 13, 63, 9, 50, 80, 20, 78, 47, 21, 54,
+ 13, 19, 74, 63, 63, 3, 31, 125, 125, 125,
+ 48, 14, 9, 21, 54, 13, 25, 28, 72, 5,
+ 10, 4, 10, 91, 99, 63, 97, 3, 15, 13,
+ 25, 55, 35, 69, 0, 9, 11, 1, 66, 2,
+ 44, 0, 0, 0, 10, 67, 67, 22, 25, 13,
+ 20, 0, 17, 70, 38, 14, 96, 92, 14, 38,
+ 44, 52, 20, 18, 76, 93, 51, 51, 55, 50,
+ 17, 38, 76, 3, 19, 6, 38, 3, 19, 17,
+ 35, 48, 25, 28, 2, 23, 16, 9, 0, 14,
+ 26, 36, 38, 14, 31, 9, 6, 3, 0, 15,
+ 60, 5, 2, 64, 54, 76, 62, 54, 69, 20,
+ 22, 25, 7, 25, 11, 66, 4, 10, 76, 114,
+ 124, 124, 98, 71, 91, 22, 84, 84, 125, 19,
+ 28, 71, 50, 13, 8, 74, 66, 108, 98, 94,
+ 93, 6, 39, 79, 79, 5, 26, 42, 20, 8,
+ 20, 20, 4, 30, 16, 29, 15, 10, 0, 0,
+ 25, 5, 21, 33, 7, 7, 0, 17, 19, 93,
+ 23, 29, 11, 49, 43, 29, 3, 21, 25, 21,
+ 31, 71, 39, 11, 93, 125, 71, 89, 77, 125,
+ 51, 39, 109, 9, 27, 27, 43, 51, 77, 67,
+ 53, 89, 59, 61, 69, 91, 4, 28, 84, 52,
+ 20, 8, 34, 20, 10, 4, 32, 42, 90, 58,
+ 46, 28, 72, 38, 46, 14, 76, 18, 74, 48,
+ 38, 26, 54, 24, 10, 8, 10, 9, 110, 64,
+ 22, 6, 42, 10, 13, 0, 36, 84, 74, 44,
+ 20, 68, 30, 26, 10, 8, 124, 5, 10, 24,
+ 22, 4, 20, 28, 36, 32, 44, 58, 82, 5,
+ 6, 15, 13, 124, 29, 79, 17, 10, 10, 15,
+ 15, 13, 23, 4, 27, 69, 11, 9, 48, 13,
+ 27, 11, 9, 1, 21, 17, 13, 57, 43, 19,
+ 23, 13, 54, 79, 27, 52, 43, 11, 11, 13,
+ 12, 7, 19, 8, 24, 15, 15, 93, 60, 56,
+ 58, 20, 15, 17, 21, 43, 23, 45, 57, 43,
+ 81, 71, 71, 97, 89, 113, 125, 79, 61, 57,
+ 61, 89, 71, 59, 61, 63, 31, 39, 47, 41,
+ 51, 89, 61, 55, 103, 75, 101, 99, 125, 125,
+ 115, 111, 33, 29, 119, 51, 53, 65, 101, 75,
+ 77, 81, 59, 69, 45, 31, 93, 105, 125, 53,
+ 77, 87, 11, 22, 32, 38, 76, 50, 62, 14,
+ 70, 60, 80, 64, 86, 72, 52, 82, 17, 35,
+ 51, 67, 67, 121, 119, 111, 34, 92, 82, 90,
+ 70, 74, 44, 42, 24, 24, 11, 22, 32, 38,
+ 76, 50, 62, 14, 70, 60, 80, 64, 86, 72,
+ 52, 82, 17, 35, 51, 67, 67, 121, 119, 111,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 45 */
+
+ 44, 8, 37, 44, 8, 37, 30, 48, 54, 24,
+ 13, 67, 13, 48, 82, 20, 80, 51, 19, 58,
+ 15, 19, 76, 65, 65, 5, 35, 125, 125, 125,
+ 52, 16, 9, 19, 58, 15, 25, 30, 72, 5,
+ 10, 6, 14, 93, 101, 63, 97, 3, 13, 11,
+ 27, 55, 35, 69, 0, 7, 11, 0, 66, 2,
+ 44, 0, 0, 0, 12, 67, 67, 22, 27, 13,
+ 20, 2, 13, 76, 40, 16, 100, 96, 16, 40,
+ 48, 54, 22, 20, 82, 93, 53, 51, 57, 50,
+ 17, 40, 82, 1, 19, 6, 42, 3, 19, 17,
+ 33, 48, 25, 30, 4, 21, 18, 7, 0, 16,
+ 28, 38, 40, 14, 33, 9, 8, 3, 0, 15,
+ 62, 5, 2, 64, 56, 76, 62, 54, 69, 22,
+ 22, 27, 7, 23, 11, 68, 6, 12, 80, 118,
+ 124, 124, 102, 73, 93, 24, 86, 84, 125, 19,
+ 28, 71, 52, 13, 8, 78, 66, 108, 98, 98,
+ 95, 8, 41, 83, 85, 3, 26, 42, 20, 8,
+ 20, 20, 4, 30, 16, 31, 17, 10, 0, 0,
+ 25, 5, 21, 35, 7, 7, 0, 19, 21, 95,
+ 25, 29, 11, 51, 43, 33, 7, 23, 27, 23,
+ 35, 77, 43, 13, 97, 125, 73, 93, 81, 125,
+ 53, 41, 111, 11, 29, 29, 47, 55, 81, 71,
+ 55, 91, 59, 63, 71, 95, 8, 32, 86, 54,
+ 20, 8, 36, 22, 12, 4, 36, 46, 92, 60,
+ 46, 30, 76, 40, 48, 16, 82, 20, 76, 50,
+ 40, 26, 56, 26, 12, 10, 12, 7, 112, 66,
+ 22, 6, 44, 12, 11, 2, 38, 84, 76, 44,
+ 20, 70, 32, 28, 12, 10, 124, 3, 14, 28,
+ 24, 6, 22, 32, 40, 36, 48, 62, 88, 3,
+ 8, 13, 11, 124, 27, 81, 15, 12, 12, 15,
+ 15, 13, 23, 4, 29, 71, 11, 9, 50, 13,
+ 27, 11, 9, 0, 21, 19, 13, 59, 45, 21,
+ 23, 13, 56, 79, 29, 54, 43, 13, 11, 13,
+ 12, 9, 19, 8, 24, 15, 15, 97, 58, 54,
+ 56, 18, 21, 23, 27, 49, 27, 51, 63, 49,
+ 87, 75, 75, 105, 95, 121, 125, 85, 65, 59,
+ 65, 93, 75, 61, 63, 65, 31, 43, 53, 47,
+ 55, 93, 65, 57, 107, 79, 105, 103, 125, 125,
+ 117, 115, 35, 31, 123, 53, 55, 67, 105, 77,
+ 79, 83, 61, 71, 45, 29, 97, 107, 125, 57,
+ 79, 89, 11, 24, 34, 40, 78, 52, 62, 16,
+ 72, 62, 82, 66, 88, 74, 52, 80, 21, 39,
+ 55, 71, 71, 125, 123, 113, 36, 92, 84, 90,
+ 72, 76, 46, 44, 24, 26, 11, 24, 34, 40,
+ 78, 52, 62, 16, 72, 62, 82, 66, 88, 74,
+ 52, 80, 21, 39, 55, 71, 71, 125, 123, 113,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 46 */
+
+ 42, 8, 37, 42, 8, 37, 34, 52, 56, 24,
+ 15, 71, 15, 46, 82, 20, 84, 53, 19, 60,
+ 15, 21, 78, 67, 67, 7, 39, 125, 125, 125,
+ 56, 20, 9, 19, 60, 15, 23, 32, 74, 5,
+ 12, 8, 16, 95, 103, 63, 99, 3, 13, 9,
+ 27, 57, 35, 69, 0, 7, 11, 2, 68, 2,
+ 44, 0, 0, 0, 12, 67, 67, 24, 29, 13,
+ 18, 2, 11, 80, 42, 18, 102, 98, 18, 42,
+ 52, 56, 24, 22, 86, 95, 53, 51, 57, 52,
+ 17, 42, 86, 1, 17, 8, 46, 3, 19, 17,
+ 33, 50, 25, 30, 6, 21, 20, 7, 0, 16,
+ 28, 40, 40, 14, 33, 9, 8, 3, 0, 15,
+ 62, 5, 2, 66, 56, 78, 62, 56, 71, 22,
+ 24, 27, 7, 23, 11, 70, 6, 12, 82, 120,
+ 124, 124, 106, 73, 95, 24, 86, 84, 125, 19,
+ 30, 73, 52, 13, 8, 80, 66, 108, 98, 100,
+ 97, 12, 43, 89, 91, 0, 26, 42, 18, 6,
+ 18, 18, 2, 30, 14, 33, 19, 10, 0, 0,
+ 27, 7, 23, 37, 9, 7, 0, 21, 21, 97,
+ 25, 31, 11, 53, 45, 37, 11, 27, 31, 27,
+ 39, 83, 49, 15, 101, 125, 77, 97, 85, 125,
+ 55, 43, 115, 13, 31, 31, 49, 57, 85, 75,
+ 59, 95, 59, 65, 73, 97, 10, 34, 90, 54,
+ 20, 8, 38, 24, 14, 6, 40, 48, 94, 62,
+ 48, 30, 78, 42, 52, 18, 86, 20, 76, 50,
+ 40, 28, 58, 26, 12, 12, 14, 7, 116, 68,
+ 22, 6, 46, 12, 11, 4, 40, 86, 78, 46,
+ 20, 72, 32, 30, 12, 10, 124, 0, 16, 30,
+ 28, 8, 26, 34, 42, 38, 52, 66, 92, 3,
+ 10, 13, 9, 124, 27, 83, 15, 14, 14, 15,
+ 15, 13, 23, 6, 29, 73, 11, 9, 50, 13,
+ 27, 11, 9, 0, 23, 19, 13, 61, 47, 21,
+ 23, 13, 58, 81, 29, 56, 45, 13, 11, 13,
+ 14, 9, 21, 10, 26, 15, 15, 99, 56, 52,
+ 54, 14, 25, 27, 31, 55, 33, 57, 69, 55,
+ 93, 81, 79, 113, 103, 125, 125, 91, 69, 63,
+ 69, 99, 79, 63, 65, 67, 31, 47, 57, 51,
+ 59, 97, 69, 61, 111, 83, 109, 107, 125, 125,
+ 121, 117, 37, 33, 125, 57, 59, 71, 109, 81,
+ 83, 85, 63, 73, 45, 27, 99, 111, 125, 59,
+ 81, 91, 11, 24, 34, 40, 80, 54, 64, 16,
+ 74, 64, 84, 66, 90, 74, 54, 80, 25, 43,
+ 59, 75, 75, 125, 125, 115, 36, 94, 84, 92,
+ 72, 78, 46, 44, 26, 26, 11, 24, 34, 40,
+ 80, 54, 64, 16, 74, 64, 84, 66, 90, 74,
+ 54, 80, 25, 43, 59, 75, 75, 125, 125, 115,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 47 */
+
+ 40, 8, 37, 40, 8, 37, 38, 54, 56, 24,
+ 15, 75, 19, 44, 84, 20, 88, 57, 19, 64,
+ 15, 21, 80, 69, 69, 9, 43, 125, 125, 125,
+ 60, 22, 9, 19, 64, 15, 23, 34, 74, 5,
+ 14, 10, 18, 97, 105, 63, 99, 3, 13, 7,
+ 27, 57, 35, 69, 0, 7, 11, 4, 68, 2,
+ 44, 0, 0, 0, 14, 67, 67, 24, 31, 13,
+ 18, 4, 9, 84, 44, 20, 106, 102, 20, 44,
+ 56, 58, 26, 24, 90, 97, 53, 51, 57, 52,
+ 17, 44, 90, 1, 15, 10, 50, 3, 19, 17,
+ 33, 50, 25, 32, 8, 21, 22, 7, 0, 16,
+ 30, 42, 42, 14, 33, 9, 8, 3, 0, 15,
+ 62, 5, 2, 66, 56, 78, 62, 56, 71, 24,
+ 26, 29, 7, 23, 11, 72, 6, 14, 84, 122,
+ 124, 124, 110, 75, 97, 26, 86, 84, 125, 19,
+ 30, 73, 54, 13, 8, 82, 66, 108, 98, 102,
+ 99, 14, 45, 95, 97, 2, 26, 42, 18, 4,
+ 18, 18, 2, 30, 14, 35, 21, 10, 0, 0,
+ 27, 7, 25, 39, 9, 7, 0, 23, 23, 99,
+ 27, 33, 11, 55, 45, 41, 15, 31, 33, 31,
+ 43, 89, 53, 17, 105, 125, 79, 101, 89, 125,
+ 57, 45, 119, 15, 33, 33, 51, 61, 89, 79,
+ 63, 97, 59, 67, 75, 101, 14, 38, 92, 56,
+ 20, 8, 40, 26, 16, 8, 44, 50, 96, 64,
+ 50, 32, 80, 44, 54, 20, 90, 22, 78, 52,
+ 42, 28, 60, 28, 14, 14, 16, 7, 118, 70,
+ 22, 6, 48, 14, 11, 6, 42, 86, 80, 46,
+ 20, 74, 34, 32, 14, 12, 124, 2, 20, 34,
+ 32, 10, 28, 38, 46, 42, 56, 70, 96, 1,
+ 12, 11, 7, 124, 27, 85, 15, 16, 16, 15,
+ 15, 13, 23, 6, 29, 75, 11, 9, 52, 13,
+ 27, 11, 9, 0, 23, 19, 13, 63, 49, 21,
+ 23, 13, 60, 83, 29, 58, 47, 13, 11, 13,
+ 14, 9, 21, 10, 28, 15, 15, 103, 54, 50,
+ 52, 10, 29, 33, 37, 61, 37, 63, 75, 61,
+ 99, 87, 83, 121, 109, 125, 125, 97, 73, 67,
+ 73, 103, 83, 65, 67, 69, 31, 51, 61, 55,
+ 63, 101, 73, 63, 115, 87, 113, 111, 125, 125,
+ 125, 119, 39, 35, 125, 59, 63, 73, 113, 83,
+ 85, 87, 65, 75, 45, 25, 101, 113, 125, 61,
+ 83, 93, 11, 26, 36, 42, 82, 56, 66, 16,
+ 76, 66, 86, 68, 92, 76, 56, 78, 29, 47,
+ 63, 79, 79, 125, 125, 117, 38, 94, 86, 94,
+ 74, 80, 48, 46, 26, 28, 11, 26, 36, 42,
+ 82, 56, 66, 16, 76, 66, 86, 68, 92, 76,
+ 56, 78, 29, 47, 63, 79, 79, 125, 125, 117,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 48 */
+
+ 36, 6, 39, 36, 6, 39, 40, 56, 56, 24,
+ 17, 79, 23, 42, 84, 20, 90, 61, 19, 66,
+ 17, 23, 82, 71, 73, 13, 47, 125, 125, 125,
+ 64, 24, 9, 19, 66, 17, 23, 34, 74, 7,
+ 14, 10, 20, 101, 109, 65, 101, 5, 13, 7,
+ 29, 59, 35, 69, 1, 7, 11, 4, 68, 0,
+ 44, 0, 0, 0, 14, 69, 67, 24, 33, 15,
+ 16, 4, 7, 88, 46, 20, 108, 104, 20, 46,
+ 58, 58, 28, 24, 94, 99, 55, 53, 59, 52,
+ 19, 44, 94, 1, 15, 10, 52, 3, 19, 17,
+ 33, 50, 27, 32, 8, 21, 22, 7, 0, 16,
+ 30, 44, 42, 14, 35, 11, 8, 3, 1, 15,
+ 62, 5, 2, 66, 56, 78, 62, 56, 73, 24,
+ 26, 31, 7, 23, 13, 72, 6, 14, 86, 124,
+ 124, 124, 112, 77, 101, 26, 86, 84, 125, 21,
+ 30, 75, 54, 15, 8, 84, 66, 106, 98, 104,
+ 101, 16, 49, 101, 105, 4, 24, 42, 16, 2,
+ 16, 16, 0, 28, 12, 37, 23, 8, 1, 1,
+ 29, 9, 27, 41, 11, 9, 1, 25, 25, 103,
+ 29, 35, 11, 59, 47, 47, 21, 35, 37, 35,
+ 49, 97, 59, 21, 109, 125, 83, 107, 93, 125,
+ 61, 49, 123, 19, 35, 35, 55, 65, 93, 83,
+ 67, 101, 59, 69, 77, 105, 16, 40, 94, 56,
+ 20, 8, 40, 26, 16, 8, 46, 52, 96, 64,
+ 50, 32, 82, 46, 56, 20, 94, 22, 78, 52,
+ 42, 28, 60, 28, 14, 14, 18, 7, 120, 72,
+ 22, 4, 48, 14, 11, 6, 42, 86, 80, 46,
+ 20, 76, 34, 32, 14, 12, 124, 4, 22, 36,
+ 34, 12, 30, 40, 48, 44, 58, 72, 100, 1,
+ 12, 11, 7, 124, 27, 89, 15, 16, 16, 15,
+ 15, 13, 25, 6, 31, 77, 13, 9, 52, 15,
+ 27, 13, 9, 0, 25, 21, 15, 65, 51, 23,
+ 25, 13, 62, 85, 31, 58, 49, 15, 11, 15,
+ 14, 11, 23, 10, 28, 15, 15, 107, 50, 48,
+ 48, 6, 35, 39, 43, 67, 43, 69, 83, 67,
+ 107, 93, 87, 125, 117, 125, 125, 103, 79, 71,
+ 79, 109, 87, 69, 71, 71, 31, 57, 67, 61,
+ 67, 107, 77, 67, 121, 91, 119, 115, 125, 125,
+ 125, 123, 43, 39, 125, 63, 67, 77, 117, 87,
+ 89, 89, 67, 77, 47, 23, 105, 117, 125, 65,
+ 87, 97, 11, 26, 36, 42, 84, 56, 66, 16,
+ 78, 66, 88, 68, 92, 76, 56, 76, 33, 51,
+ 69, 85, 83, 125, 125, 121, 38, 94, 86, 94,
+ 74, 80, 48, 46, 26, 28, 11, 26, 36, 42,
+ 84, 56, 66, 16, 78, 66, 88, 68, 92, 76,
+ 56, 76, 33, 51, 69, 85, 83, 125, 125, 121,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 49 */
+
+ 34, 6, 39, 34, 6, 39, 44, 60, 58, 26,
+ 17, 81, 25, 42, 86, 22, 94, 63, 17, 70,
+ 17, 23, 86, 71, 75, 15, 49, 125, 125, 125,
+ 68, 28, 7, 17, 70, 17, 21, 36, 76, 7,
+ 16, 12, 24, 103, 111, 65, 101, 5, 11, 5,
+ 29, 59, 33, 67, 1, 5, 9, 6, 70, 0,
+ 44, 0, 0, 0, 16, 69, 67, 26, 33, 15,
+ 16, 6, 3, 94, 50, 22, 112, 108, 22, 50,
+ 62, 60, 32, 26, 100, 99, 55, 53, 59, 54,
+ 19, 46, 100, 0, 13, 12, 56, 3, 17, 17,
+ 31, 52, 27, 34, 10, 19, 24, 5, 2, 18,
+ 32, 46, 44, 16, 35, 11, 10, 1, 1, 13,
+ 64, 3, 2, 68, 58, 80, 64, 58, 73, 26,
+ 28, 31, 7, 21, 13, 74, 8, 16, 90, 124,
+ 124, 124, 116, 77, 103, 28, 88, 86, 125, 21,
+ 32, 75, 56, 15, 10, 88, 66, 106, 100, 108,
+ 103, 20, 51, 105, 111, 8, 24, 42, 16, 2,
+ 16, 16, 0, 28, 12, 37, 23, 8, 1, 1,
+ 29, 9, 27, 41, 11, 9, 1, 25, 25, 105,
+ 29, 35, 11, 61, 47, 51, 25, 37, 39, 37,
+ 53, 103, 63, 23, 111, 125, 85, 111, 95, 125,
+ 63, 51, 125, 21, 35, 35, 57, 67, 95, 85,
+ 69, 103, 59, 69, 77, 107, 20, 44, 98, 58,
+ 20, 8, 42, 28, 18, 10, 50, 56, 98, 66,
+ 52, 34, 86, 48, 60, 22, 100, 24, 80, 54,
+ 44, 30, 62, 30, 16, 16, 22, 5, 124, 74,
+ 24, 4, 50, 16, 9, 8, 44, 88, 82, 48,
+ 22, 78, 36, 34, 16, 14, 124, 8, 26, 40,
+ 38, 16, 34, 44, 52, 48, 62, 76, 106, 0,
+ 14, 9, 5, 124, 25, 91, 13, 18, 18, 13,
+ 15, 11, 25, 8, 31, 77, 13, 9, 54, 15,
+ 27, 13, 9, 2, 25, 21, 15, 65, 51, 23,
+ 25, 11, 66, 85, 31, 60, 49, 15, 11, 15,
+ 16, 11, 23, 12, 30, 13, 13, 109, 48, 46,
+ 46, 4, 39, 43, 47, 73, 47, 73, 89, 71,
+ 113, 97, 91, 125, 123, 125, 125, 107, 83, 73,
+ 83, 113, 89, 71, 73, 71, 31, 61, 71, 65,
+ 71, 111, 79, 69, 125, 93, 123, 117, 125, 125,
+ 125, 125, 45, 41, 125, 65, 69, 79, 119, 89,
+ 91, 91, 67, 77, 47, 21, 107, 119, 125, 67,
+ 89, 99, 9, 28, 38, 44, 88, 58, 68, 18,
+ 82, 68, 90, 70, 94, 78, 58, 76, 35, 53,
+ 73, 89, 85, 125, 125, 123, 40, 96, 88, 96,
+ 76, 82, 50, 48, 28, 30, 9, 28, 38, 44,
+ 88, 58, 68, 18, 82, 68, 90, 70, 94, 78,
+ 58, 76, 35, 53, 73, 89, 85, 125, 125, 123,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 50 */
+
+ 32, 6, 39, 32, 6, 39, 48, 62, 58, 26,
+ 17, 85, 29, 40, 88, 22, 98, 67, 17, 72,
+ 17, 23, 88, 73, 77, 17, 53, 125, 125, 125,
+ 72, 30, 7, 17, 72, 17, 21, 38, 76, 7,
+ 18, 14, 26, 105, 113, 65, 101, 5, 11, 3,
+ 29, 59, 33, 67, 1, 5, 9, 8, 70, 0,
+ 44, 0, 0, 0, 18, 69, 67, 26, 35, 15,
+ 16, 8, 1, 98, 52, 24, 116, 112, 24, 52,
+ 66, 62, 34, 28, 104, 101, 55, 53, 59, 54,
+ 19, 48, 104, 0, 11, 14, 60, 3, 17, 17,
+ 31, 52, 27, 34, 12, 19, 26, 5, 2, 18,
+ 32, 48, 46, 16, 35, 11, 10, 1, 1, 13,
+ 64, 3, 2, 68, 58, 80, 64, 58, 75, 28,
+ 30, 33, 7, 21, 13, 76, 8, 16, 92, 124,
+ 124, 124, 120, 79, 105, 28, 88, 86, 125, 21,
+ 32, 77, 58, 15, 10, 90, 66, 106, 100, 110,
+ 105, 22, 53, 111, 117, 10, 24, 42, 16, 0,
+ 14, 16, 0, 28, 10, 39, 25, 8, 1, 1,
+ 29, 11, 29, 43, 11, 9, 1, 27, 27, 107,
+ 31, 37, 11, 63, 49, 55, 29, 41, 43, 41,
+ 57, 109, 67, 25, 115, 125, 89, 115, 99, 125,
+ 65, 53, 125, 23, 37, 37, 59, 71, 99, 89,
+ 73, 107, 59, 71, 79, 111, 22, 46, 100, 60,
+ 20, 8, 44, 30, 20, 12, 54, 58, 100, 68,
+ 54, 36, 88, 50, 62, 24, 104, 26, 80, 54,
+ 44, 30, 64, 32, 18, 18, 24, 5, 124, 76,
+ 24, 4, 52, 18, 9, 10, 46, 88, 84, 48,
+ 22, 80, 38, 36, 18, 16, 124, 10, 30, 44,
+ 42, 18, 36, 48, 56, 50, 66, 80, 110, 2,
+ 16, 7, 3, 124, 25, 93, 13, 20, 20, 13,
+ 15, 11, 25, 8, 31, 79, 13, 9, 56, 15,
+ 27, 13, 9, 2, 25, 21, 15, 67, 53, 23,
+ 25, 11, 68, 87, 31, 62, 51, 15, 11, 15,
+ 16, 11, 23, 12, 32, 13, 13, 113, 46, 44,
+ 44, 0, 43, 49, 53, 79, 51, 79, 95, 77,
+ 119, 103, 95, 125, 125, 125, 125, 113, 87, 77,
+ 87, 117, 93, 73, 75, 73, 31, 65, 75, 69,
+ 75, 115, 83, 73, 125, 97, 125, 121, 125, 125,
+ 125, 125, 47, 43, 125, 69, 73, 83, 123, 91,
+ 93, 93, 69, 79, 47, 19, 109, 121, 125, 69,
+ 91, 101, 9, 28, 40, 46, 90, 60, 70, 18,
+ 84, 70, 92, 70, 96, 80, 60, 74, 39, 57,
+ 77, 93, 89, 125, 125, 125, 42, 96, 88, 98,
+ 78, 84, 50, 50, 28, 32, 9, 28, 40, 46,
+ 90, 60, 70, 18, 84, 70, 92, 70, 96, 80,
+ 60, 74, 39, 57, 77, 93, 89, 125, 125, 125,
+ },
+
+ {
+ /* Context Tables for P, SP, B Slices :: cabac_init_idc = 2, qp = 51 */
+
+ 30, 6, 39, 30, 6, 39, 52, 66, 60, 26,
+ 19, 89, 31, 38, 88, 22, 102, 69, 17, 76,
+ 17, 25, 90, 75, 79, 19, 57, 125, 125, 125,
+ 76, 34, 7, 17, 76, 17, 19, 40, 78, 7,
+ 20, 16, 28, 107, 115, 65, 103, 5, 11, 1,
+ 29, 61, 33, 67, 1, 5, 9, 10, 72, 0,
+ 44, 0, 0, 0, 18, 69, 67, 28, 37, 15,
+ 14, 8, 0, 102, 54, 26, 118, 114, 26, 54,
+ 70, 64, 36, 30, 108, 103, 55, 53, 59, 56,
+ 19, 50, 108, 0, 9, 16, 64, 3, 17, 17,
+ 31, 54, 27, 36, 14, 19, 28, 5, 2, 18,
+ 34, 50, 46, 16, 35, 11, 10, 1, 1, 13,
+ 64, 3, 2, 70, 58, 82, 64, 60, 75, 28,
+ 32, 33, 7, 21, 13, 78, 8, 18, 94, 124,
+ 124, 124, 124, 79, 107, 30, 88, 86, 125, 21,
+ 34, 77, 58, 15, 10, 92, 66, 106, 100, 112,
+ 107, 26, 55, 117, 123, 14, 24, 42, 14, 1,
+ 14, 14, 1, 28, 10, 41, 27, 8, 1, 1,
+ 31, 11, 31, 45, 13, 9, 1, 29, 27, 109,
+ 31, 39, 11, 65, 49, 59, 33, 45, 45, 45,
+ 61, 115, 73, 27, 119, 125, 91, 119, 103, 125,
+ 67, 55, 125, 25, 39, 39, 61, 73, 103, 93,
+ 77, 109, 59, 73, 81, 113, 26, 50, 104, 60,
+ 20, 8, 46, 32, 22, 14, 58, 60, 102, 70,
+ 56, 36, 90, 52, 66, 26, 108, 26, 82, 56,
+ 46, 32, 66, 32, 18, 20, 26, 5, 124, 78,
+ 24, 4, 54, 18, 9, 12, 48, 90, 86, 50,
+ 22, 82, 38, 38, 18, 16, 124, 14, 32, 46,
+ 46, 20, 40, 50, 58, 54, 70, 84, 114, 2,
+ 18, 7, 1, 124, 25, 95, 13, 22, 22, 13,
+ 15, 11, 25, 10, 31, 81, 13, 9, 56, 15,
+ 27, 13, 9, 2, 27, 21, 15, 69, 55, 23,
+ 25, 11, 70, 89, 31, 64, 53, 15, 11, 15,
+ 18, 11, 25, 14, 34, 13, 13, 115, 44, 42,
+ 42, 3, 47, 53, 57, 85, 57, 85, 101, 83,
+ 125, 109, 99, 125, 125, 125, 125, 119, 91, 81,
+ 91, 123, 97, 75, 77, 75, 31, 69, 79, 73,
+ 79, 119, 87, 75, 125, 101, 125, 125, 125, 125,
+ 125, 125, 49, 45, 125, 71, 77, 85, 125, 95,
+ 97, 95, 71, 81, 47, 17, 111, 125, 125, 71,
+ 93, 103, 9, 30, 40, 46, 92, 62, 72, 18,
+ 86, 72, 94, 72, 98, 80, 62, 74, 43, 61,
+ 81, 97, 93, 125, 125, 125, 42, 98, 90, 100,
+ 78, 86, 52, 50, 30, 32, 9, 30, 40, 46,
+ 92, 62, 72, 18, 86, 72, 94, 72, 98, 80,
+ 62, 74, 43, 61, 81, 97, 93, 125, 125, 125,
+ },
+
+ },
+
+ {
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 0 */
+
+ 124, 18, 21, 124, 18, 21, 125, 81, 20, 18,
+ 24, 60, 122, 124, 108, 28, 109, 12, 29, 3,
+ 2, 28, 19, 26, 1, 40, 124, 7, 53, 81,
+ 125, 81, 7, 29, 3, 2, 45, 63, 4, 36,
+ 11, 35, 65, 16, 7, 45, 49, 10, 25, 61,
+ 18, 11, 35, 49, 7, 21, 21, 33, 17, 10,
+ 44, 0, 0, 0, 39, 45, 67, 17, 44, 2,
+ 104, 16, 11, 125, 77, 37, 21, 87, 125, 125,
+ 125, 63, 125, 101, 125, 119, 103, 117, 103, 0,
+ 9, 41, 81, 13, 59, 53, 125, 21, 67, 55,
+ 125, 14, 37, 25, 123, 59, 47, 27, 15, 0,
+ 9, 41, 2, 3, 4, 14, 5, 1, 4, 29,
+ 26, 22, 56, 38, 50, 36, 34, 38, 92, 24,
+ 26, 88, 60, 2, 89, 73, 75, 55, 61, 49,
+ 41, 45, 39, 47, 61, 13, 17, 21, 8, 77,
+ 73, 63, 23, 17, 23, 15, 34, 11, 2, 3,
+ 52, 17, 12, 18, 2, 17, 124, 108, 76, 90,
+ 108, 88, 52, 90, 68, 60, 66, 36, 10, 2,
+ 4, 50, 36, 48, 42, 38, 36, 44, 28, 58,
+ 42, 16, 24, 34, 51, 124, 124, 124, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 124, 92,
+ 124, 120, 82, 124, 124, 124, 124, 120, 116, 124,
+ 94, 82, 30, 52, 6, 9, 67, 15, 42, 26,
+ 18, 2, 10, 0, 17, 21, 55, 7, 72, 48,
+ 38, 34, 1, 9, 29, 27, 45, 57, 16, 6,
+ 2, 3, 19, 25, 33, 49, 93, 67, 41, 31,
+ 19, 21, 45, 65, 67, 107, 29, 60, 30, 20,
+ 2, 15, 31, 45, 53, 67, 124, 59, 41, 31,
+ 5, 15, 2, 6, 8, 23, 2, 10, 5, 31,
+ 15, 9, 38, 2, 54, 46, 72, 68, 38, 54,
+ 62, 42, 30, 2, 34, 1, 81, 67, 65, 49,
+ 43, 43, 43, 49, 5, 27, 25, 25, 10, 25,
+ 39, 71, 63, 63, 25, 21, 13, 23, 9, 3,
+ 19, 2, 2, 9, 23, 16, 1, 13, 114, 88,
+ 94, 98, 100, 104, 96, 94, 80, 80, 86, 74,
+ 38, 46, 32, 92, 84, 82, 72, 68, 56, 26,
+ 12, 0, 27, 37, 61, 11, 91, 124, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 122, 100,
+ 56, 10, 124, 124, 66, 124, 124, 124, 120, 124,
+ 116, 104, 116, 102, 104, 68, 74, 48, 5, 84,
+ 64, 26, 113, 97, 101, 43, 57, 51, 15, 35,
+ 33, 9, 13, 14, 9, 26, 21, 124, 124, 124,
+ 124, 120, 114, 58, 18, 37, 23, 80, 58, 40,
+ 18, 16, 4, 1, 9, 57, 85, 67, 53, 53,
+ 49, 19, 31, 45, 19, 13, 11, 5, 1, 10,
+ 8, 124, 124, 124, 124, 120, 108, 86, 54, 7,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 1 */
+
+ 124, 18, 21, 124, 18, 21, 123, 77, 22, 20,
+ 24, 58, 120, 124, 108, 28, 103, 12, 27, 1,
+ 2, 28, 17, 24, 3, 40, 124, 9, 55, 81,
+ 121, 77, 7, 27, 1, 2, 43, 59, 6, 36,
+ 9, 33, 63, 16, 7, 43, 49, 10, 23, 59,
+ 18, 11, 33, 49, 5, 19, 19, 31, 15, 10,
+ 44, 0, 0, 0, 37, 45, 67, 15, 44, 2,
+ 104, 16, 11, 123, 75, 37, 19, 83, 123, 123,
+ 123, 59, 123, 97, 123, 115, 101, 115, 101, 2,
+ 7, 39, 79, 11, 57, 51, 123, 19, 65, 53,
+ 123, 16, 35, 23, 119, 57, 45, 25, 13, 2,
+ 7, 39, 4, 1, 4, 14, 3, 1, 4, 27,
+ 26, 22, 56, 38, 50, 36, 34, 38, 90, 24,
+ 26, 86, 58, 2, 87, 71, 73, 53, 59, 47,
+ 39, 43, 37, 45, 57, 13, 17, 19, 6, 75,
+ 71, 63, 21, 17, 21, 13, 34, 9, 2, 3,
+ 50, 15, 12, 16, 2, 17, 124, 108, 76, 90,
+ 108, 88, 52, 90, 68, 58, 66, 36, 10, 2,
+ 4, 50, 36, 48, 42, 38, 34, 44, 28, 56,
+ 40, 16, 22, 32, 51, 124, 124, 124, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 120, 88,
+ 124, 118, 80, 124, 124, 124, 124, 116, 112, 122,
+ 90, 78, 30, 50, 4, 9, 67, 13, 44, 28,
+ 20, 4, 10, 0, 15, 19, 53, 5, 74, 50,
+ 40, 34, 0, 7, 27, 25, 43, 55, 18, 8,
+ 4, 1, 17, 23, 31, 47, 89, 65, 37, 29,
+ 17, 19, 43, 63, 65, 103, 27, 62, 32, 22,
+ 4, 13, 29, 43, 51, 65, 124, 57, 39, 29,
+ 5, 13, 2, 8, 10, 21, 4, 12, 3, 29,
+ 15, 9, 38, 4, 54, 46, 70, 68, 38, 52,
+ 60, 42, 30, 2, 32, 1, 79, 65, 63, 47,
+ 41, 41, 41, 47, 5, 25, 23, 23, 10, 23,
+ 37, 69, 61, 63, 25, 19, 13, 21, 9, 3,
+ 17, 2, 2, 7, 21, 16, 1, 13, 114, 88,
+ 94, 98, 98, 104, 96, 94, 80, 80, 86, 74,
+ 38, 44, 30, 90, 82, 80, 70, 66, 54, 26,
+ 12, 0, 25, 35, 59, 11, 89, 124, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 122, 118, 96,
+ 54, 10, 124, 124, 64, 124, 124, 124, 116, 124,
+ 112, 100, 112, 98, 100, 66, 70, 46, 7, 82,
+ 62, 24, 109, 93, 97, 41, 55, 49, 11, 33,
+ 31, 9, 11, 18, 5, 30, 19, 124, 124, 124,
+ 124, 116, 110, 54, 14, 39, 21, 82, 58, 40,
+ 18, 18, 4, 1, 9, 55, 83, 65, 51, 51,
+ 45, 17, 29, 43, 17, 11, 9, 3, 0, 12,
+ 8, 124, 124, 124, 124, 118, 106, 82, 52, 7,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 2 */
+
+ 124, 18, 21, 124, 18, 21, 119, 75, 22, 20,
+ 24, 56, 118, 122, 108, 28, 99, 12, 25, 0,
+ 2, 26, 17, 22, 5, 38, 120, 13, 57, 83,
+ 115, 75, 7, 25, 0, 2, 43, 57, 6, 34,
+ 9, 33, 61, 16, 7, 43, 49, 10, 23, 57,
+ 18, 11, 33, 49, 5, 19, 19, 31, 15, 10,
+ 44, 0, 0, 0, 35, 45, 67, 15, 42, 2,
+ 104, 16, 11, 121, 73, 37, 19, 81, 119, 119,
+ 121, 57, 119, 95, 119, 113, 99, 113, 99, 4,
+ 7, 37, 77, 11, 57, 49, 119, 19, 65, 53,
+ 121, 16, 35, 23, 117, 57, 43, 25, 13, 2,
+ 7, 37, 4, 1, 2, 14, 3, 1, 4, 27,
+ 26, 22, 54, 38, 48, 36, 34, 38, 86, 24,
+ 26, 82, 56, 0, 85, 69, 71, 51, 57, 45,
+ 37, 41, 37, 43, 55, 13, 17, 19, 4, 75,
+ 69, 63, 21, 17, 19, 13, 32, 7, 2, 3,
+ 48, 13, 10, 14, 2, 19, 120, 106, 74, 88,
+ 106, 86, 50, 88, 68, 56, 64, 36, 10, 2,
+ 4, 48, 34, 46, 40, 36, 32, 42, 26, 52,
+ 38, 14, 20, 30, 51, 124, 124, 124, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 116, 82,
+ 124, 114, 76, 124, 124, 124, 124, 112, 108, 116,
+ 86, 74, 28, 46, 2, 11, 67, 13, 44, 28,
+ 20, 4, 10, 0, 15, 19, 51, 5, 74, 50,
+ 40, 34, 2, 7, 25, 25, 41, 53, 20, 10,
+ 4, 1, 15, 23, 31, 45, 87, 63, 35, 27,
+ 17, 19, 41, 61, 63, 101, 27, 62, 32, 22,
+ 4, 11, 27, 41, 49, 63, 124, 57, 39, 29,
+ 5, 13, 2, 8, 10, 21, 4, 12, 1, 29,
+ 15, 9, 36, 4, 52, 44, 68, 66, 38, 50,
+ 58, 42, 30, 0, 30, 3, 77, 63, 61, 47,
+ 41, 41, 39, 45, 5, 25, 23, 23, 8, 23,
+ 37, 69, 59, 63, 25, 19, 13, 19, 9, 3,
+ 15, 2, 2, 7, 19, 14, 1, 15, 112, 88,
+ 94, 96, 96, 102, 94, 92, 78, 78, 84, 72,
+ 36, 42, 28, 86, 80, 76, 66, 64, 52, 24,
+ 10, 0, 25, 35, 59, 13, 87, 124, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 118, 114, 92,
+ 52, 8, 124, 120, 62, 124, 124, 124, 112, 120,
+ 108, 96, 108, 94, 96, 62, 66, 42, 9, 78,
+ 58, 20, 107, 91, 95, 39, 53, 47, 7, 31,
+ 29, 9, 9, 20, 3, 32, 17, 124, 124, 124,
+ 124, 110, 104, 48, 10, 41, 21, 82, 58, 40,
+ 18, 18, 4, 1, 9, 53, 81, 63, 49, 49,
+ 43, 15, 27, 41, 15, 9, 7, 3, 2, 12,
+ 8, 124, 124, 124, 122, 114, 102, 78, 48, 9,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 3 */
+
+ 124, 18, 21, 124, 18, 21, 115, 71, 24, 20,
+ 22, 52, 114, 120, 108, 28, 95, 12, 23, 2,
+ 2, 24, 17, 20, 7, 38, 116, 15, 59, 83,
+ 109, 73, 7, 23, 2, 2, 41, 55, 8, 34,
+ 9, 31, 59, 14, 9, 43, 49, 10, 23, 57,
+ 18, 11, 33, 49, 3, 19, 19, 31, 13, 10,
+ 44, 0, 0, 0, 35, 45, 67, 13, 40, 2,
+ 104, 16, 11, 119, 71, 37, 17, 79, 115, 115,
+ 117, 55, 115, 93, 115, 111, 97, 111, 97, 6,
+ 7, 35, 75, 11, 55, 49, 115, 19, 63, 51,
+ 119, 16, 35, 21, 113, 55, 41, 25, 13, 2,
+ 7, 35, 6, 0, 2, 14, 3, 1, 4, 27,
+ 26, 20, 54, 38, 46, 36, 34, 38, 82, 24,
+ 24, 78, 54, 1, 83, 67, 69, 49, 55, 45,
+ 35, 41, 35, 41, 53, 13, 17, 19, 2, 73,
+ 67, 63, 21, 17, 17, 13, 30, 5, 2, 3,
+ 46, 11, 10, 12, 2, 21, 118, 104, 74, 86,
+ 104, 84, 50, 86, 66, 54, 62, 36, 10, 2,
+ 2, 46, 32, 44, 38, 34, 30, 40, 26, 48,
+ 36, 14, 18, 28, 51, 124, 124, 124, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 124, 110, 78,
+ 124, 110, 74, 124, 122, 124, 118, 108, 102, 112,
+ 82, 68, 26, 42, 0, 13, 67, 13, 46, 28,
+ 20, 4, 10, 0, 15, 19, 51, 5, 74, 50,
+ 40, 34, 4, 5, 25, 23, 41, 51, 22, 10,
+ 6, 1, 13, 21, 29, 45, 85, 61, 33, 25,
+ 15, 19, 39, 59, 61, 99, 25, 62, 32, 22,
+ 4, 9, 27, 39, 47, 61, 124, 55, 37, 27,
+ 5, 13, 2, 8, 10, 21, 4, 12, 1, 29,
+ 15, 9, 36, 6, 50, 42, 66, 64, 38, 48,
+ 56, 42, 30, 0, 28, 3, 75, 61, 59, 45,
+ 39, 39, 39, 43, 5, 25, 23, 21, 8, 23,
+ 37, 67, 57, 63, 25, 19, 13, 17, 9, 3,
+ 13, 2, 2, 7, 17, 12, 1, 17, 110, 86,
+ 92, 94, 94, 100, 92, 90, 76, 76, 82, 70,
+ 34, 40, 26, 84, 78, 74, 62, 60, 50, 22,
+ 10, 1, 25, 35, 59, 13, 85, 124, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 114, 108, 88,
+ 48, 6, 122, 118, 58, 124, 124, 120, 108, 116,
+ 104, 92, 104, 90, 90, 58, 62, 38, 11, 74,
+ 54, 18, 105, 89, 93, 37, 51, 45, 5, 29,
+ 27, 9, 7, 24, 0, 36, 15, 124, 124, 124,
+ 124, 104, 98, 42, 6, 43, 21, 82, 58, 40,
+ 18, 18, 4, 1, 9, 53, 79, 61, 47, 47,
+ 41, 15, 27, 39, 15, 9, 7, 3, 2, 12,
+ 8, 124, 124, 124, 118, 110, 98, 74, 44, 11,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 4 */
+
+ 124, 18, 21, 124, 18, 21, 113, 69, 24, 20,
+ 22, 50, 112, 116, 108, 28, 89, 10, 21, 2,
+ 2, 22, 17, 18, 9, 36, 112, 19, 61, 85,
+ 103, 71, 7, 21, 2, 2, 41, 53, 8, 32,
+ 9, 31, 59, 14, 9, 41, 49, 10, 23, 55,
+ 16, 13, 33, 49, 3, 17, 19, 29, 13, 10,
+ 44, 0, 0, 0, 33, 47, 67, 13, 38, 2,
+ 104, 16, 11, 117, 69, 37, 17, 75, 113, 111,
+ 115, 53, 113, 89, 111, 109, 97, 109, 97, 6,
+ 7, 33, 73, 11, 55, 47, 111, 19, 63, 51,
+ 117, 16, 33, 21, 111, 55, 41, 25, 11, 2,
+ 7, 35, 6, 0, 0, 12, 3, 1, 4, 27,
+ 26, 20, 52, 38, 46, 36, 34, 36, 78, 24,
+ 24, 74, 52, 3, 81, 65, 67, 47, 55, 43,
+ 33, 39, 35, 39, 51, 13, 17, 17, 0, 73,
+ 65, 63, 21, 17, 17, 13, 28, 3, 2, 3,
+ 42, 9, 8, 10, 2, 23, 114, 102, 72, 84,
+ 102, 82, 48, 84, 66, 50, 60, 34, 10, 2,
+ 2, 44, 32, 42, 38, 32, 28, 38, 24, 44,
+ 34, 12, 16, 26, 51, 124, 124, 124, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 118, 106, 72,
+ 124, 108, 70, 124, 118, 124, 114, 102, 98, 106,
+ 78, 64, 24, 40, 3, 15, 67, 13, 46, 30,
+ 20, 4, 10, 0, 15, 19, 49, 3, 76, 50,
+ 40, 34, 6, 5, 23, 23, 39, 51, 24, 12,
+ 6, 1, 13, 21, 29, 43, 83, 61, 31, 25,
+ 15, 19, 37, 57, 61, 97, 25, 64, 32, 22,
+ 4, 7, 25, 39, 45, 59, 124, 55, 37, 27,
+ 5, 13, 2, 8, 10, 19, 4, 12, 0, 29,
+ 15, 9, 34, 6, 48, 40, 64, 62, 38, 44,
+ 54, 40, 30, 1, 26, 5, 75, 61, 57, 45,
+ 39, 39, 37, 41, 7, 25, 23, 21, 6, 23,
+ 37, 67, 55, 63, 25, 17, 13, 17, 9, 3,
+ 11, 2, 0, 7, 15, 12, 3, 19, 108, 86,
+ 92, 92, 92, 98, 90, 88, 74, 74, 80, 68,
+ 32, 38, 24, 80, 74, 70, 58, 58, 48, 20,
+ 8, 1, 25, 35, 59, 15, 85, 124, 124, 124,
+ 124, 124, 124, 124, 124, 124, 120, 110, 104, 84,
+ 46, 4, 118, 114, 56, 124, 124, 116, 104, 110,
+ 100, 88, 100, 86, 86, 54, 58, 34, 13, 70,
+ 50, 14, 103, 87, 91, 37, 49, 43, 1, 27,
+ 25, 9, 5, 26, 2, 38, 15, 124, 124, 124,
+ 124, 98, 92, 36, 2, 45, 21, 82, 58, 40,
+ 18, 18, 4, 1, 9, 51, 77, 59, 45, 47,
+ 39, 13, 25, 37, 13, 7, 5, 1, 4, 14,
+ 8, 124, 124, 124, 114, 106, 94, 70, 40, 13,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 5 */
+
+ 124, 18, 21, 124, 18, 21, 109, 65, 24, 20,
+ 20, 46, 108, 114, 108, 28, 85, 10, 19, 4,
+ 2, 22, 15, 16, 11, 36, 108, 23, 63, 85,
+ 97, 67, 7, 19, 4, 2, 41, 51, 8, 32,
+ 9, 31, 57, 14, 11, 41, 49, 10, 23, 53,
+ 16, 13, 33, 49, 1, 17, 17, 29, 11, 10,
+ 44, 0, 0, 0, 33, 47, 67, 11, 36, 2,
+ 104, 16, 11, 115, 67, 37, 15, 73, 109, 107,
+ 111, 51, 109, 87, 107, 107, 95, 107, 95, 8,
+ 7, 31, 71, 11, 53, 45, 107, 19, 63, 49,
+ 113, 18, 33, 19, 109, 53, 39, 25, 11, 4,
+ 5, 33, 8, 2, 0, 12, 3, 1, 4, 27,
+ 26, 18, 50, 38, 44, 36, 34, 36, 74, 24,
+ 22, 72, 50, 5, 79, 63, 65, 45, 53, 41,
+ 31, 37, 33, 37, 49, 13, 17, 17, 1, 71,
+ 63, 63, 19, 17, 15, 13, 26, 1, 2, 3,
+ 40, 7, 8, 8, 2, 23, 112, 100, 72, 82,
+ 100, 80, 46, 84, 66, 48, 58, 34, 10, 2,
+ 0, 44, 30, 40, 36, 30, 26, 38, 22, 40,
+ 32, 10, 14, 24, 51, 124, 124, 124, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 114, 102, 68,
+ 120, 104, 66, 124, 114, 120, 110, 98, 94, 100,
+ 74, 58, 22, 36, 5, 15, 67, 13, 46, 30,
+ 20, 4, 10, 0, 15, 19, 49, 3, 76, 50,
+ 40, 34, 8, 3, 21, 23, 37, 49, 26, 14,
+ 6, 0, 11, 19, 27, 43, 81, 59, 27, 23,
+ 15, 17, 35, 55, 59, 95, 23, 64, 34, 22,
+ 4, 5, 23, 37, 43, 57, 124, 55, 37, 25,
+ 5, 13, 2, 8, 10, 19, 4, 14, 0, 29,
+ 15, 9, 32, 8, 46, 38, 62, 62, 38, 42,
+ 52, 40, 30, 3, 24, 5, 73, 59, 55, 43,
+ 37, 37, 37, 39, 7, 25, 23, 21, 4, 23,
+ 37, 65, 53, 63, 25, 17, 13, 15, 9, 3,
+ 9, 2, 0, 7, 13, 10, 3, 19, 106, 86,
+ 90, 92, 90, 96, 88, 86, 74, 72, 78, 66,
+ 30, 36, 22, 78, 72, 68, 54, 56, 46, 18,
+ 6, 3, 25, 33, 59, 15, 83, 124, 124, 124,
+ 124, 124, 124, 124, 124, 120, 116, 106, 100, 80,
+ 42, 2, 114, 110, 54, 122, 124, 112, 100, 106,
+ 96, 84, 96, 82, 80, 50, 54, 30, 15, 66,
+ 46, 12, 101, 83, 89, 35, 47, 41, 2, 25,
+ 23, 9, 3, 30, 6, 42, 13, 124, 124, 124,
+ 124, 94, 86, 32, 1, 47, 21, 82, 58, 40,
+ 18, 18, 4, 1, 9, 51, 75, 57, 43, 45,
+ 37, 11, 25, 35, 11, 5, 3, 1, 4, 14,
+ 8, 124, 124, 124, 112, 102, 90, 66, 36, 15,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 6 */
+
+ 124, 18, 23, 124, 18, 23, 105, 63, 26, 20,
+ 20, 44, 106, 112, 108, 28, 81, 10, 19, 6,
+ 2, 20, 15, 14, 13, 34, 106, 25, 65, 87,
+ 91, 65, 7, 19, 6, 2, 39, 49, 10, 30,
+ 7, 29, 55, 12, 11, 41, 49, 10, 21, 53,
+ 16, 13, 31, 49, 1, 17, 17, 29, 11, 10,
+ 44, 0, 0, 0, 31, 47, 67, 11, 36, 0,
+ 104, 16, 11, 113, 67, 37, 15, 71, 105, 103,
+ 109, 49, 105, 85, 103, 105, 93, 105, 93, 10,
+ 7, 29, 71, 9, 53, 45, 103, 19, 61, 49,
+ 111, 18, 33, 19, 105, 53, 37, 23, 11, 4,
+ 5, 31, 8, 2, 1, 12, 3, 1, 4, 27,
+ 26, 18, 50, 38, 42, 36, 34, 36, 70, 24,
+ 22, 68, 48, 7, 79, 61, 65, 45, 51, 41,
+ 29, 37, 33, 37, 45, 13, 17, 17, 3, 71,
+ 61, 63, 19, 17, 13, 11, 24, 1, 2, 3,
+ 38, 5, 6, 6, 2, 25, 108, 98, 70, 82,
+ 98, 80, 46, 82, 64, 46, 56, 34, 10, 2,
+ 0, 42, 28, 38, 34, 30, 24, 36, 22, 36,
+ 30, 10, 12, 22, 51, 124, 124, 124, 124, 124,
+ 124, 124, 124, 124, 124, 124, 124, 108, 96, 62,
+ 116, 100, 64, 124, 108, 114, 104, 94, 88, 96,
+ 68, 54, 20, 32, 7, 17, 67, 11, 48, 30,
+ 22, 4, 10, 0, 15, 19, 47, 3, 76, 52,
+ 40, 34, 10, 3, 21, 21, 37, 47, 28, 14,
+ 8, 0, 9, 19, 27, 41, 79, 57, 25, 21,
+ 13, 17, 35, 55, 57, 91, 23, 64, 34, 22,
+ 6, 5, 23, 35, 43, 55, 124, 53, 35, 25,
+ 5, 13, 2, 8, 10, 19, 6, 14, 2, 29,
+ 15, 11, 32, 8, 44, 36, 60, 60, 38, 40,
+ 50, 40, 30, 3, 22, 7, 71, 57, 53, 43,
+ 37, 37, 35, 39, 7, 23, 21, 19, 4, 23,
+ 37, 65, 51, 63, 25, 17, 13, 13, 9, 3,
+ 7, 0, 0, 7, 13, 8, 3, 21, 104, 84,
+ 90, 90, 88, 96, 88, 84, 72, 72, 76, 64,
+ 28, 34, 20, 74, 70, 64, 50, 52, 42, 16,
+ 6, 3, 25, 33, 57, 17, 81, 124, 124, 124,
+ 124, 124, 124, 124, 124, 116, 110, 102, 94, 76,
+ 40, 2, 112, 108, 50, 118, 124, 108, 96, 102,
+ 92, 80, 90, 78, 76, 46, 50, 28, 19, 62,
+ 42, 8, 99, 81, 87, 33, 45, 39, 4, 23,
+ 21, 9, 1, 32, 8, 44, 11, 124, 124, 124,
+ 118, 88, 82, 26, 5, 51, 19, 82, 58, 40,
+ 18, 18, 4, 1, 9, 49, 73, 57, 41, 43,
+ 35, 11, 23, 33, 11, 5, 3, 1, 6, 14,
+ 8, 124, 124, 122, 108, 100, 88, 60, 34, 17,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 7 */
+
+ 124, 18, 23, 124, 18, 23, 101, 59, 26, 20,
+ 18, 40, 102, 108, 108, 28, 75, 8, 17, 6,
+ 2, 18, 15, 12, 15, 34, 102, 29, 67, 87,
+ 85, 63, 7, 17, 6, 2, 39, 47, 10, 30,
+ 7, 29, 55, 12, 13, 39, 49, 10, 21, 51,
+ 14, 13, 31, 49, 0, 15, 17, 27, 9, 10,
+ 44, 0, 0, 0, 31, 47, 67, 9, 34, 0,
+ 104, 16, 11, 111, 65, 37, 13, 67, 103, 99,
+ 105, 47, 103, 81, 99, 103, 91, 103, 93, 12,
+ 7, 27, 69, 9, 51, 43, 99, 19, 61, 47,
+ 109, 18, 31, 17, 103, 51, 37, 23, 9, 4,
+ 5, 29, 10, 4, 1, 10, 3, 1, 4, 27,
+ 26, 16, 48, 38, 42, 36, 34, 34, 66, 24,
+ 20, 64, 46, 9, 77, 59, 63, 43, 49, 39,
+ 27, 35, 31, 35, 43, 13, 17, 15, 5, 69,
+ 59, 63, 19, 17, 13, 11, 22, 0, 2, 3,
+ 34, 3, 6, 4, 2, 27, 106, 96, 70, 80,
+ 96, 78, 44, 80, 64, 44, 54, 34, 10, 2,
+ 1, 40, 28, 36, 34, 28, 22, 34, 20, 32,
+ 28, 8, 10, 20, 51, 124, 124, 124, 124, 124,
+ 124, 124, 124, 124, 124, 120, 122, 104, 92, 58,
+ 112, 98, 60, 124, 104, 110, 100, 88, 84, 90,
+ 64, 48, 18, 30, 11, 19, 67, 11, 48, 32,
+ 22, 4, 10, 0, 15, 19, 47, 1, 78, 52,
+ 40, 34, 12, 1, 19, 21, 35, 45, 30, 16,
+ 8, 0, 7, 17, 25, 41, 77, 57, 23, 21,
+ 13, 17, 33, 53, 57, 89, 21, 66, 34, 22,
+ 6, 3, 21, 33, 41, 53, 124, 53, 35, 23,
+ 5, 13, 2, 8, 10, 17, 6, 14, 2, 29,
+ 15, 11, 30, 10, 42, 34, 58, 58, 38, 38,
+ 48, 38, 30, 5, 20, 7, 69, 57, 51, 41,
+ 35, 35, 35, 37, 7, 23, 21, 19, 2, 23,
+ 37, 63, 49, 63, 25, 15, 13, 13, 9, 3,
+ 5, 0, 0, 7, 11, 8, 5, 23, 102, 84,
+ 88, 88, 86, 94, 86, 82, 70, 70, 74, 62,
+ 26, 32, 18, 72, 66, 62, 46, 50, 40, 14,
+ 4, 5, 25, 33, 57, 17, 79, 124, 124, 124,
+ 124, 124, 124, 124, 122, 112, 106, 98, 90, 72,
+ 36, 0, 108, 104, 48, 114, 124, 104, 92, 98,
+ 88, 76, 86, 74, 70, 42, 46, 24, 21, 58,
+ 38, 6, 97, 79, 85, 33, 43, 37, 8, 21,
+ 19, 9, 0, 36, 12, 48, 11, 124, 124, 122,
+ 112, 82, 76, 20, 9, 53, 19, 82, 58, 40,
+ 18, 18, 4, 1, 9, 49, 71, 55, 39, 41,
+ 33, 9, 23, 31, 9, 3, 1, 0, 6, 16,
+ 8, 124, 124, 118, 104, 96, 84, 56, 30, 19,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 8 */
+
+ 124, 16, 23, 124, 16, 23, 99, 57, 26, 20,
+ 18, 38, 100, 106, 108, 28, 71, 8, 15, 8,
+ 2, 16, 15, 10, 19, 32, 98, 33, 69, 89,
+ 81, 61, 7, 15, 8, 2, 39, 45, 10, 28,
+ 7, 29, 53, 10, 13, 39, 51, 10, 21, 51,
+ 14, 15, 31, 49, 0, 15, 17, 27, 9, 10,
+ 44, 0, 0, 0, 29, 49, 67, 9, 32, 0,
+ 104, 16, 11, 109, 63, 37, 13, 65, 99, 95,
+ 103, 45, 99, 79, 97, 101, 91, 101, 91, 12,
+ 7, 25, 67, 9, 51, 43, 97, 19, 61, 47,
+ 107, 18, 31, 17, 101, 51, 35, 23, 9, 4,
+ 5, 29, 10, 4, 3, 10, 3, 1, 4, 27,
+ 26, 16, 46, 38, 40, 36, 34, 34, 62, 24,
+ 20, 60, 44, 11, 75, 57, 61, 41, 49, 39,
+ 25, 35, 31, 33, 41, 13, 17, 15, 9, 69,
+ 57, 63, 19, 19, 11, 11, 20, 2, 2, 3,
+ 32, 1, 4, 2, 2, 29, 102, 94, 68, 78,
+ 94, 76, 42, 78, 62, 40, 52, 32, 10, 2,
+ 1, 38, 26, 34, 32, 26, 20, 32, 18, 28,
+ 24, 6, 8, 18, 51, 124, 124, 124, 124, 124,
+ 124, 124, 124, 124, 122, 116, 116, 98, 86, 52,
+ 108, 94, 56, 122, 100, 104, 94, 84, 78, 84,
+ 60, 44, 16, 26, 13, 21, 69, 11, 48, 32,
+ 22, 4, 10, 0, 15, 19, 45, 1, 78, 52,
+ 40, 34, 14, 1, 19, 21, 35, 45, 32, 16,
+ 8, 0, 7, 17, 25, 39, 75, 55, 21, 19,
+ 13, 17, 31, 51, 55, 87, 21, 66, 34, 22,
+ 6, 1, 21, 33, 39, 53, 124, 53, 35, 23,
+ 5, 13, 2, 8, 10, 17, 6, 14, 4, 29,
+ 15, 11, 28, 10, 40, 32, 56, 56, 38, 34,
+ 44, 38, 30, 7, 18, 9, 69, 55, 49, 41,
+ 35, 35, 33, 35, 9, 23, 21, 19, 0, 23,
+ 37, 63, 49, 65, 25, 15, 13, 11, 9, 3,
+ 5, 0, 1, 7, 9, 6, 5, 25, 100, 82,
+ 88, 86, 82, 92, 84, 80, 68, 68, 72, 60,
+ 24, 30, 16, 68, 64, 58, 42, 46, 38, 12,
+ 2, 5, 25, 33, 57, 19, 79, 124, 124, 124,
+ 124, 124, 124, 122, 116, 108, 102, 94, 84, 68,
+ 34, 1, 104, 100, 44, 110, 122, 98, 86, 92,
+ 82, 72, 82, 68, 66, 38, 40, 20, 23, 54,
+ 34, 2, 95, 77, 83, 31, 41, 37, 10, 19,
+ 19, 9, 0, 38, 14, 50, 9, 124, 124, 116,
+ 106, 76, 70, 14, 13, 55, 19, 82, 58, 40,
+ 18, 18, 4, 1, 9, 47, 71, 53, 37, 41,
+ 31, 9, 21, 31, 9, 3, 1, 0, 8, 16,
+ 6, 124, 124, 114, 100, 92, 80, 52, 26, 21,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 9 */
+
+ 124, 16, 23, 124, 16, 23, 95, 55, 28, 20,
+ 18, 36, 98, 104, 108, 28, 67, 8, 13, 10,
+ 2, 16, 13, 8, 21, 30, 94, 35, 71, 91,
+ 75, 57, 7, 13, 10, 2, 37, 43, 12, 26,
+ 7, 27, 51, 10, 13, 39, 51, 10, 21, 49,
+ 14, 15, 31, 49, 0, 15, 15, 27, 9, 10,
+ 44, 0, 0, 0, 27, 49, 67, 9, 30, 0,
+ 104, 16, 11, 107, 61, 37, 13, 63, 95, 91,
+ 99, 41, 95, 77, 93, 99, 89, 99, 89, 14,
+ 5, 23, 65, 9, 49, 41, 93, 19, 59, 47,
+ 103, 20, 31, 17, 97, 51, 33, 23, 9, 6,
+ 3, 27, 10, 4, 3, 10, 1, 1, 4, 25,
+ 26, 16, 46, 38, 38, 36, 34, 34, 58, 24,
+ 20, 58, 42, 11, 73, 55, 59, 39, 47, 37,
+ 23, 33, 31, 31, 39, 13, 17, 15, 11, 67,
+ 55, 63, 17, 19, 9, 11, 18, 4, 2, 3,
+ 30, 0, 2, 0, 2, 29, 100, 92, 68, 76,
+ 92, 74, 42, 78, 62, 38, 50, 32, 10, 2,
+ 1, 38, 24, 32, 30, 24, 18, 32, 18, 26,
+ 22, 6, 6, 16, 51, 124, 124, 124, 124, 124,
+ 124, 124, 124, 124, 118, 112, 112, 92, 82, 46,
+ 106, 90, 54, 118, 96, 100, 90, 80, 74, 80,
+ 56, 40, 16, 22, 15, 21, 69, 11, 50, 32,
+ 22, 6, 10, 0, 13, 19, 43, 1, 78, 52,
+ 42, 34, 16, 0, 17, 19, 33, 43, 34, 18,
+ 10, 2, 5, 15, 25, 37, 73, 53, 17, 17,
+ 11, 15, 29, 49, 53, 85, 19, 66, 36, 24,
+ 6, 0, 19, 31, 37, 51, 124, 51, 33, 21,
+ 5, 13, 2, 10, 12, 17, 6, 16, 6, 29,
+ 15, 11, 28, 10, 38, 32, 54, 56, 38, 32,
+ 42, 38, 30, 7, 16, 11, 67, 53, 47, 41,
+ 33, 35, 31, 33, 9, 23, 21, 17, 0, 23,
+ 37, 63, 47, 65, 25, 15, 13, 9, 9, 3,
+ 3, 0, 1, 7, 7, 4, 5, 25, 98, 82,
+ 88, 86, 80, 90, 82, 78, 68, 66, 70, 60,
+ 24, 28, 14, 66, 62, 54, 38, 44, 36, 12,
+ 2, 5, 23, 31, 57, 21, 77, 124, 124, 124,
+ 124, 124, 124, 118, 112, 104, 98, 90, 80, 64,
+ 32, 3, 100, 98, 42, 106, 118, 94, 82, 88,
+ 78, 68, 78, 64, 62, 36, 36, 16, 25, 50,
+ 30, 1, 93, 73, 79, 29, 39, 35, 14, 17,
+ 17, 9, 2, 42, 16, 54, 7, 124, 124, 112,
+ 100, 72, 64, 10, 17, 57, 19, 82, 58, 40,
+ 18, 20, 4, 1, 9, 45, 69, 51, 35, 39,
+ 27, 7, 19, 29, 7, 1, 0, 0, 10, 16,
+ 6, 124, 122, 112, 98, 88, 76, 48, 22, 21,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 10 */
+
+ 124, 16, 23, 124, 16, 23, 91, 51, 28, 20,
+ 16, 32, 94, 100, 108, 28, 61, 6, 11, 10,
+ 2, 14, 13, 6, 23, 30, 90, 39, 73, 91,
+ 69, 55, 7, 11, 10, 2, 37, 41, 12, 26,
+ 7, 27, 51, 10, 15, 37, 51, 10, 21, 47,
+ 12, 15, 31, 49, 2, 13, 15, 25, 7, 10,
+ 44, 0, 0, 0, 27, 49, 67, 7, 28, 0,
+ 104, 16, 11, 105, 59, 37, 11, 59, 93, 87,
+ 97, 39, 93, 73, 89, 97, 87, 97, 89, 16,
+ 5, 21, 63, 9, 49, 39, 89, 19, 59, 45,
+ 101, 20, 29, 15, 95, 49, 33, 23, 7, 6,
+ 3, 25, 12, 6, 5, 8, 1, 1, 4, 25,
+ 26, 14, 44, 38, 38, 36, 34, 32, 54, 24,
+ 18, 54, 40, 13, 71, 53, 57, 37, 45, 35,
+ 21, 31, 29, 29, 37, 13, 17, 13, 13, 67,
+ 53, 63, 17, 19, 9, 11, 16, 6, 2, 3,
+ 26, 2, 2, 1, 2, 31, 96, 90, 66, 74,
+ 90, 72, 40, 76, 62, 36, 48, 32, 10, 2,
+ 3, 36, 24, 30, 30, 22, 16, 30, 16, 22,
+ 20, 4, 4, 14, 51, 124, 124, 124, 124, 124,
+ 124, 124, 124, 124, 114, 108, 106, 88, 78, 42,
+ 102, 88, 50, 112, 92, 96, 86, 74, 70, 74,
+ 52, 34, 14, 20, 19, 23, 69, 11, 50, 34,
+ 22, 6, 10, 0, 13, 19, 43, 0, 80, 52,
+ 42, 34, 18, 0, 15, 19, 31, 41, 36, 20,
+ 10, 2, 3, 15, 23, 37, 71, 53, 15, 17,
+ 11, 15, 27, 47, 53, 83, 19, 68, 36, 24,
+ 6, 2, 17, 29, 35, 49, 124, 51, 33, 21,
+ 5, 13, 2, 10, 12, 15, 6, 16, 6, 29,
+ 15, 11, 26, 12, 36, 30, 52, 54, 38, 30,
+ 40, 36, 30, 9, 14, 11, 65, 53, 45, 39,
+ 33, 33, 31, 31, 9, 23, 21, 17, 1, 23,
+ 37, 61, 45, 65, 25, 13, 13, 9, 9, 3,
+ 1, 0, 1, 7, 5, 4, 7, 27, 96, 82,
+ 86, 84, 78, 88, 80, 76, 66, 64, 68, 58,
+ 22, 26, 12, 62, 58, 52, 34, 42, 34, 10,
+ 0, 7, 23, 31, 57, 21, 75, 124, 124, 124,
+ 124, 124, 120, 114, 106, 100, 94, 86, 76, 60,
+ 28, 5, 96, 94, 40, 102, 114, 90, 78, 84,
+ 74, 64, 74, 60, 56, 32, 32, 12, 27, 46,
+ 26, 3, 91, 71, 77, 29, 37, 33, 18, 15,
+ 15, 9, 4, 44, 20, 56, 7, 124, 120, 106,
+ 94, 66, 58, 4, 21, 59, 19, 82, 58, 40,
+ 18, 20, 4, 1, 9, 45, 67, 49, 33, 37,
+ 25, 5, 19, 27, 5, 0, 2, 2, 10, 18,
+ 6, 120, 118, 108, 94, 84, 72, 44, 18, 23,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 11 */
+
+ 124, 16, 25, 124, 16, 25, 87, 49, 30, 20,
+ 16, 30, 92, 98, 108, 28, 57, 6, 11, 12,
+ 2, 12, 13, 4, 25, 28, 88, 41, 75, 93,
+ 63, 53, 7, 11, 12, 2, 35, 39, 14, 24,
+ 5, 25, 49, 8, 15, 37, 51, 10, 19, 47,
+ 12, 15, 29, 49, 2, 13, 15, 25, 7, 10,
+ 44, 0, 0, 0, 25, 49, 67, 7, 28, 1,
+ 104, 16, 11, 103, 59, 37, 11, 57, 89, 83,
+ 93, 37, 89, 71, 85, 95, 85, 95, 87, 18,
+ 5, 19, 63, 7, 47, 39, 85, 19, 57, 45,
+ 99, 20, 29, 15, 91, 49, 31, 21, 7, 6,
+ 3, 23, 12, 6, 5, 8, 1, 1, 4, 25,
+ 26, 14, 44, 38, 36, 36, 34, 32, 50, 24,
+ 18, 50, 38, 15, 71, 51, 57, 37, 43, 35,
+ 19, 31, 29, 29, 33, 13, 17, 13, 15, 65,
+ 51, 63, 17, 19, 7, 9, 14, 6, 2, 3,
+ 24, 4, 0, 3, 2, 33, 94, 88, 66, 74,
+ 88, 72, 40, 74, 60, 34, 46, 32, 10, 2,
+ 3, 34, 22, 28, 28, 22, 14, 28, 16, 18,
+ 18, 4, 2, 12, 51, 124, 124, 124, 124, 124,
+ 124, 124, 124, 124, 108, 104, 102, 82, 72, 36,
+ 98, 84, 48, 108, 86, 90, 80, 70, 64, 70,
+ 46, 30, 12, 16, 21, 25, 69, 9, 52, 34,
+ 24, 6, 10, 0, 13, 19, 41, 0, 80, 54,
+ 42, 34, 20, 2, 15, 17, 31, 39, 38, 20,
+ 12, 2, 1, 13, 23, 35, 69, 51, 13, 15,
+ 9, 15, 27, 47, 51, 79, 17, 68, 36, 24,
+ 8, 2, 17, 27, 35, 47, 124, 49, 31, 19,
+ 5, 13, 2, 10, 12, 15, 8, 16, 8, 29,
+ 15, 13, 26, 12, 34, 28, 50, 52, 38, 28,
+ 38, 36, 30, 9, 12, 13, 63, 51, 43, 39,
+ 31, 33, 29, 31, 9, 21, 19, 15, 1, 23,
+ 37, 61, 43, 65, 25, 13, 13, 7, 9, 3,
+ 0, 1, 1, 7, 5, 2, 7, 29, 94, 80,
+ 86, 82, 76, 88, 80, 74, 64, 64, 66, 56,
+ 20, 24, 10, 60, 56, 48, 30, 38, 30, 8,
+ 0, 7, 23, 31, 55, 23, 73, 124, 124, 124,
+ 124, 124, 116, 110, 102, 96, 88, 82, 70, 56,
+ 26, 5, 94, 92, 36, 98, 108, 86, 74, 80,
+ 70, 60, 68, 56, 52, 28, 28, 10, 31, 42,
+ 22, 7, 89, 69, 75, 27, 35, 31, 20, 13,
+ 13, 9, 6, 48, 22, 60, 5, 122, 118, 102,
+ 88, 60, 54, 1, 25, 63, 17, 82, 58, 40,
+ 18, 20, 4, 1, 9, 43, 65, 49, 31, 35,
+ 23, 5, 17, 25, 5, 0, 2, 2, 12, 18,
+ 6, 118, 116, 104, 90, 82, 70, 38, 16, 25,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 12 */
+
+ 124, 16, 25, 124, 16, 25, 85, 45, 30, 20,
+ 14, 26, 88, 96, 108, 28, 53, 6, 9, 14,
+ 2, 10, 13, 2, 27, 28, 84, 45, 77, 93,
+ 57, 51, 7, 9, 14, 2, 35, 37, 14, 24,
+ 5, 25, 47, 8, 17, 37, 51, 10, 19, 45,
+ 12, 17, 29, 49, 4, 13, 15, 25, 5, 10,
+ 44, 0, 0, 0, 25, 51, 67, 5, 26, 1,
+ 104, 16, 11, 101, 57, 37, 9, 55, 85, 79,
+ 91, 35, 85, 69, 81, 93, 85, 93, 85, 18,
+ 5, 17, 61, 7, 47, 37, 81, 19, 57, 43,
+ 97, 20, 29, 13, 89, 47, 29, 21, 7, 6,
+ 3, 23, 14, 8, 7, 8, 1, 1, 4, 25,
+ 26, 12, 42, 38, 34, 36, 34, 32, 46, 24,
+ 16, 46, 36, 17, 69, 49, 55, 35, 43, 33,
+ 17, 29, 27, 27, 31, 13, 17, 13, 17, 65,
+ 49, 63, 17, 19, 5, 9, 12, 8, 2, 3,
+ 22, 6, 0, 5, 2, 35, 90, 86, 64, 72,
+ 86, 70, 38, 72, 60, 30, 44, 30, 10, 2,
+ 5, 32, 20, 26, 26, 20, 12, 26, 14, 14,
+ 16, 2, 0, 10, 51, 124, 124, 122, 124, 124,
+ 124, 124, 124, 122, 104, 100, 96, 78, 68, 32,
+ 94, 80, 44, 104, 82, 86, 76, 66, 60, 64,
+ 42, 24, 10, 12, 23, 27, 69, 9, 52, 34,
+ 24, 6, 10, 0, 13, 19, 41, 0, 80, 54,
+ 42, 34, 22, 2, 13, 17, 29, 39, 40, 22,
+ 12, 2, 1, 13, 21, 35, 67, 49, 11, 13,
+ 9, 15, 25, 45, 49, 77, 17, 68, 36, 24,
+ 8, 4, 15, 27, 33, 45, 124, 49, 31, 19,
+ 5, 13, 2, 10, 12, 15, 8, 16, 8, 29,
+ 15, 13, 24, 14, 32, 26, 48, 50, 38, 24,
+ 36, 36, 30, 11, 10, 13, 63, 49, 41, 37,
+ 31, 31, 29, 29, 11, 21, 19, 15, 3, 23,
+ 37, 59, 41, 65, 25, 13, 13, 5, 9, 3,
+ 2, 1, 3, 7, 3, 0, 7, 31, 92, 80,
+ 84, 80, 74, 86, 78, 72, 62, 62, 64, 54,
+ 18, 22, 8, 56, 54, 46, 26, 36, 28, 6,
+ 1, 9, 23, 31, 55, 23, 73, 124, 124, 124,
+ 124, 124, 112, 106, 96, 92, 84, 78, 66, 52,
+ 22, 7, 90, 88, 34, 94, 104, 82, 70, 74,
+ 66, 56, 64, 52, 46, 24, 24, 6, 33, 38,
+ 18, 9, 87, 67, 73, 25, 33, 29, 24, 11,
+ 11, 9, 8, 50, 26, 62, 3, 118, 114, 96,
+ 82, 54, 48, 7, 29, 65, 17, 82, 58, 40,
+ 18, 20, 4, 1, 9, 43, 63, 47, 29, 35,
+ 21, 3, 17, 23, 3, 2, 4, 2, 12, 18,
+ 6, 116, 112, 100, 86, 78, 66, 34, 12, 27,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 13 */
+
+ 124, 16, 25, 124, 16, 25, 81, 43, 30, 20,
+ 14, 24, 86, 92, 108, 28, 47, 4, 7, 14,
+ 2, 10, 11, 0, 29, 26, 80, 49, 79, 95,
+ 51, 47, 7, 7, 14, 2, 35, 35, 14, 22,
+ 5, 25, 47, 8, 17, 35, 51, 10, 19, 43,
+ 10, 17, 29, 49, 4, 11, 13, 23, 5, 10,
+ 44, 0, 0, 0, 23, 51, 67, 5, 24, 1,
+ 104, 16, 11, 99, 55, 37, 9, 51, 83, 75,
+ 87, 33, 83, 65, 77, 91, 83, 91, 85, 20,
+ 5, 15, 59, 7, 45, 35, 77, 19, 57, 43,
+ 93, 22, 27, 13, 87, 47, 29, 21, 5, 8,
+ 1, 21, 14, 8, 7, 6, 1, 1, 4, 25,
+ 26, 12, 40, 38, 34, 36, 34, 30, 42, 24,
+ 16, 44, 34, 19, 67, 47, 53, 33, 41, 31,
+ 15, 27, 27, 25, 29, 13, 17, 11, 19, 63,
+ 47, 63, 15, 19, 5, 9, 10, 10, 2, 3,
+ 18, 8, 1, 7, 2, 35, 88, 84, 64, 70,
+ 84, 68, 36, 72, 60, 28, 42, 30, 10, 2,
+ 5, 32, 20, 24, 26, 18, 10, 26, 12, 10,
+ 14, 0, 1, 8, 51, 122, 124, 118, 124, 122,
+ 120, 120, 120, 118, 100, 96, 92, 72, 64, 26,
+ 90, 78, 40, 98, 78, 82, 72, 60, 56, 58,
+ 38, 20, 8, 10, 27, 27, 69, 9, 52, 36,
+ 24, 6, 10, 0, 13, 19, 39, 2, 82, 54,
+ 42, 34, 24, 4, 11, 17, 27, 37, 42, 24,
+ 12, 4, 0, 11, 21, 33, 65, 49, 7, 13,
+ 9, 13, 23, 43, 49, 75, 15, 70, 38, 24,
+ 8, 6, 13, 25, 31, 43, 124, 49, 31, 17,
+ 5, 13, 2, 10, 12, 13, 8, 18, 10, 29,
+ 15, 13, 22, 14, 30, 24, 46, 50, 38, 22,
+ 34, 34, 30, 13, 8, 15, 61, 49, 39, 37,
+ 29, 31, 27, 27, 11, 21, 19, 15, 5, 23,
+ 37, 59, 39, 65, 25, 11, 13, 5, 9, 3,
+ 4, 1, 3, 7, 1, 0, 9, 31, 90, 80,
+ 84, 80, 72, 84, 76, 70, 62, 60, 62, 52,
+ 16, 20, 6, 54, 50, 42, 22, 34, 26, 4,
+ 3, 9, 23, 29, 55, 25, 71, 124, 124, 124,
+ 124, 120, 108, 102, 92, 88, 80, 74, 62, 48,
+ 20, 9, 86, 84, 32, 90, 100, 78, 66, 70,
+ 62, 52, 60, 48, 42, 20, 20, 2, 35, 34,
+ 14, 13, 85, 63, 71, 25, 31, 27, 28, 9,
+ 9, 9, 10, 54, 28, 66, 3, 116, 110, 92,
+ 76, 50, 42, 11, 33, 67, 17, 82, 58, 40,
+ 18, 20, 4, 1, 9, 41, 61, 45, 27, 33,
+ 19, 1, 15, 21, 1, 4, 6, 4, 14, 20,
+ 6, 112, 110, 98, 84, 74, 62, 30, 8, 29,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 14 */
+
+ 122, 16, 25, 122, 16, 25, 77, 39, 32, 20,
+ 12, 20, 82, 90, 108, 28, 43, 4, 5, 16,
+ 2, 8, 11, 1, 31, 26, 76, 51, 81, 95,
+ 45, 45, 7, 5, 16, 2, 33, 33, 16, 22,
+ 5, 23, 45, 6, 19, 35, 51, 10, 19, 43,
+ 10, 17, 29, 49, 6, 11, 13, 23, 3, 10,
+ 44, 0, 0, 0, 23, 51, 67, 3, 22, 1,
+ 104, 16, 11, 97, 53, 37, 7, 49, 79, 71,
+ 85, 31, 79, 63, 73, 89, 81, 89, 83, 22,
+ 5, 13, 57, 7, 45, 35, 73, 19, 55, 41,
+ 91, 22, 27, 11, 83, 45, 27, 21, 5, 8,
+ 1, 19, 16, 10, 9, 6, 1, 1, 4, 25,
+ 26, 10, 40, 38, 32, 36, 34, 30, 38, 24,
+ 14, 40, 32, 21, 65, 45, 51, 31, 39, 31,
+ 13, 27, 25, 23, 27, 13, 17, 11, 21, 63,
+ 45, 63, 15, 19, 3, 9, 8, 12, 2, 3,
+ 16, 10, 1, 9, 2, 37, 84, 82, 62, 68,
+ 82, 66, 36, 70, 58, 26, 40, 30, 10, 2,
+ 7, 30, 18, 22, 24, 16, 8, 24, 12, 6,
+ 12, 0, 3, 6, 51, 120, 122, 116, 124, 118,
+ 116, 116, 116, 112, 94, 92, 86, 68, 58, 22,
+ 86, 74, 38, 94, 74, 76, 66, 56, 50, 54,
+ 34, 14, 6, 6, 29, 29, 69, 9, 54, 36,
+ 24, 6, 10, 0, 13, 19, 39, 2, 82, 54,
+ 42, 34, 26, 4, 11, 15, 27, 35, 44, 24,
+ 14, 4, 2, 11, 19, 33, 63, 47, 5, 11,
+ 7, 13, 21, 41, 47, 73, 15, 70, 38, 24,
+ 8, 8, 13, 23, 29, 41, 124, 47, 29, 17,
+ 5, 13, 2, 10, 12, 13, 8, 18, 10, 29,
+ 15, 13, 22, 16, 28, 22, 44, 48, 38, 20,
+ 32, 34, 30, 13, 6, 15, 59, 47, 37, 35,
+ 29, 29, 27, 25, 11, 21, 19, 13, 5, 23,
+ 37, 57, 37, 65, 25, 11, 13, 3, 9, 3,
+ 6, 1, 3, 7, 0, 1, 9, 33, 88, 78,
+ 82, 78, 70, 82, 74, 68, 60, 58, 60, 50,
+ 14, 18, 4, 50, 48, 40, 18, 30, 24, 2,
+ 3, 11, 23, 29, 55, 25, 69, 124, 124, 122,
+ 122, 114, 104, 98, 86, 84, 76, 70, 56, 44,
+ 16, 11, 82, 82, 28, 86, 96, 74, 62, 66,
+ 58, 48, 56, 44, 36, 16, 16, 1, 37, 30,
+ 10, 15, 83, 61, 69, 23, 29, 25, 30, 7,
+ 7, 9, 12, 56, 32, 68, 1, 112, 108, 86,
+ 70, 44, 36, 17, 37, 69, 17, 82, 58, 40,
+ 18, 20, 4, 1, 9, 41, 59, 43, 25, 31,
+ 17, 1, 15, 19, 1, 4, 6, 4, 14, 20,
+ 6, 110, 106, 94, 80, 70, 58, 26, 4, 31,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 15 */
+
+ 120, 16, 25, 120, 16, 25, 73, 37, 32, 20,
+ 12, 18, 80, 88, 108, 28, 39, 4, 3, 18,
+ 2, 6, 11, 3, 33, 24, 72, 55, 83, 97,
+ 39, 43, 7, 3, 18, 2, 33, 31, 16, 20,
+ 5, 23, 43, 6, 19, 35, 51, 10, 19, 41,
+ 10, 17, 29, 49, 6, 11, 13, 23, 3, 10,
+ 44, 0, 0, 0, 21, 51, 67, 3, 20, 1,
+ 104, 16, 11, 95, 51, 37, 7, 47, 75, 67,
+ 81, 29, 75, 61, 69, 87, 79, 87, 81, 24,
+ 5, 11, 55, 7, 43, 33, 69, 19, 55, 41,
+ 89, 22, 27, 11, 81, 45, 25, 21, 5, 8,
+ 1, 17, 16, 10, 9, 6, 1, 1, 4, 25,
+ 26, 10, 38, 38, 30, 36, 34, 30, 34, 24,
+ 14, 36, 30, 23, 63, 43, 49, 29, 37, 29,
+ 11, 25, 25, 21, 25, 13, 17, 11, 23, 61,
+ 43, 63, 15, 19, 1, 9, 6, 14, 2, 3,
+ 14, 12, 3, 11, 2, 39, 82, 80, 62, 66,
+ 80, 64, 34, 68, 58, 24, 38, 30, 10, 2,
+ 7, 28, 16, 20, 22, 14, 6, 22, 10, 2,
+ 10, 1, 5, 4, 51, 116, 120, 112, 120, 114,
+ 112, 112, 112, 108, 90, 88, 82, 62, 54, 16,
+ 82, 70, 34, 90, 70, 72, 62, 52, 46, 48,
+ 30, 10, 4, 2, 31, 31, 69, 9, 54, 36,
+ 24, 6, 10, 0, 13, 19, 37, 2, 82, 54,
+ 42, 34, 28, 6, 9, 15, 25, 33, 46, 26,
+ 14, 4, 4, 9, 19, 31, 61, 45, 3, 9,
+ 7, 13, 19, 39, 45, 71, 13, 70, 38, 24,
+ 8, 10, 11, 21, 27, 39, 124, 47, 29, 15,
+ 5, 13, 2, 10, 12, 13, 8, 18, 12, 29,
+ 15, 13, 20, 16, 26, 20, 42, 46, 38, 18,
+ 30, 34, 30, 15, 4, 17, 57, 45, 35, 35,
+ 27, 29, 25, 23, 11, 21, 19, 13, 7, 23,
+ 37, 57, 35, 65, 25, 11, 13, 1, 9, 3,
+ 8, 1, 3, 7, 2, 3, 9, 35, 86, 78,
+ 82, 76, 68, 80, 72, 66, 58, 56, 58, 48,
+ 12, 16, 2, 48, 46, 36, 14, 28, 22, 0,
+ 5, 11, 23, 29, 55, 27, 67, 124, 124, 118,
+ 118, 108, 100, 94, 82, 80, 72, 66, 52, 40,
+ 14, 13, 78, 78, 26, 82, 92, 70, 58, 62,
+ 54, 44, 52, 40, 32, 12, 12, 5, 39, 26,
+ 6, 19, 81, 59, 67, 21, 27, 23, 34, 5,
+ 5, 9, 14, 60, 34, 72, 0, 110, 104, 82,
+ 64, 38, 30, 23, 41, 71, 17, 82, 58, 40,
+ 18, 20, 4, 1, 9, 39, 57, 41, 23, 29,
+ 15, 0, 13, 17, 0, 6, 8, 4, 16, 20,
+ 6, 108, 104, 90, 76, 66, 54, 22, 0, 33,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 16 */
+
+ 116, 14, 27, 116, 14, 27, 71, 35, 32, 20,
+ 10, 14, 76, 84, 106, 28, 35, 2, 3, 18,
+ 0, 4, 11, 7, 37, 22, 68, 59, 85, 99,
+ 35, 41, 9, 3, 18, 0, 33, 29, 16, 18,
+ 5, 23, 43, 4, 21, 35, 53, 10, 19, 41,
+ 8, 19, 29, 49, 6, 11, 13, 23, 3, 8,
+ 44, 0, 0, 0, 21, 53, 67, 3, 18, 3,
+ 104, 14, 11, 93, 51, 37, 7, 45, 73, 65,
+ 79, 27, 73, 59, 67, 85, 79, 85, 81, 24,
+ 5, 11, 55, 7, 43, 33, 67, 19, 55, 41,
+ 87, 22, 27, 11, 79, 45, 25, 21, 5, 8,
+ 1, 17, 16, 10, 11, 4, 1, 3, 4, 25,
+ 24, 8, 36, 38, 28, 34, 34, 28, 30, 22,
+ 12, 32, 28, 25, 63, 43, 49, 29, 37, 29,
+ 9, 25, 25, 21, 23, 15, 17, 11, 27, 61,
+ 43, 63, 15, 21, 1, 9, 4, 14, 2, 3,
+ 10, 12, 5, 13, 2, 41, 78, 78, 60, 64,
+ 78, 62, 32, 66, 56, 20, 36, 28, 8, 2,
+ 9, 26, 14, 18, 20, 12, 4, 20, 8, 1,
+ 6, 3, 9, 0, 51, 112, 116, 108, 116, 110,
+ 106, 106, 106, 102, 84, 82, 76, 56, 48, 10,
+ 78, 66, 30, 84, 64, 66, 56, 46, 40, 42,
+ 24, 4, 2, 1, 35, 33, 71, 9, 54, 36,
+ 24, 6, 10, 1, 13, 19, 37, 2, 82, 54,
+ 42, 34, 30, 6, 9, 15, 25, 33, 46, 26,
+ 14, 4, 4, 9, 19, 31, 59, 45, 1, 9,
+ 7, 13, 19, 39, 45, 69, 13, 70, 38, 24,
+ 8, 10, 11, 21, 27, 39, 124, 47, 29, 15,
+ 5, 13, 2, 10, 12, 13, 8, 18, 12, 29,
+ 15, 15, 18, 16, 24, 18, 40, 44, 36, 14,
+ 26, 32, 28, 17, 0, 19, 57, 45, 33, 35,
+ 27, 29, 25, 23, 13, 21, 19, 13, 9, 23,
+ 37, 57, 35, 67, 25, 11, 13, 1, 11, 3,
+ 8, 3, 5, 7, 2, 5, 11, 37, 84, 76,
+ 80, 74, 64, 78, 70, 64, 56, 54, 56, 46,
+ 10, 12, 1, 44, 42, 32, 10, 24, 18, 1,
+ 7, 13, 23, 29, 55, 29, 67, 124, 122, 114,
+ 112, 102, 94, 88, 76, 74, 66, 60, 46, 34,
+ 10, 15, 74, 74, 22, 78, 86, 64, 52, 56,
+ 48, 40, 46, 34, 26, 8, 6, 9, 43, 22,
+ 2, 23, 79, 57, 65, 21, 27, 23, 36, 5,
+ 5, 9, 14, 62, 36, 74, 0, 106, 100, 76,
+ 56, 32, 24, 29, 47, 75, 17, 82, 56, 38,
+ 18, 20, 4, 3, 9, 39, 57, 41, 23, 29,
+ 13, 0, 13, 17, 0, 6, 8, 4, 16, 20,
+ 4, 104, 100, 86, 72, 62, 50, 16, 3, 35,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 17 */
+
+ 114, 14, 27, 114, 14, 27, 67, 31, 34, 22,
+ 10, 12, 74, 82, 106, 28, 29, 2, 1, 20,
+ 0, 4, 9, 9, 39, 22, 66, 61, 87, 99,
+ 29, 37, 9, 1, 20, 0, 31, 25, 18, 18,
+ 3, 21, 41, 4, 21, 33, 53, 10, 17, 39,
+ 8, 19, 27, 49, 8, 9, 11, 21, 1, 8,
+ 44, 0, 0, 0, 19, 53, 67, 1, 18, 3,
+ 104, 14, 11, 89, 49, 37, 5, 41, 69, 61,
+ 75, 23, 69, 55, 63, 81, 77, 83, 79, 26,
+ 3, 9, 53, 5, 41, 31, 63, 17, 53, 39,
+ 83, 24, 25, 9, 75, 43, 23, 19, 3, 10,
+ 0, 15, 18, 12, 11, 4, 0, 3, 4, 23,
+ 24, 8, 36, 38, 28, 34, 34, 28, 28, 22,
+ 12, 30, 26, 25, 61, 41, 47, 27, 35, 27,
+ 7, 23, 23, 19, 19, 15, 17, 9, 29, 59,
+ 41, 63, 13, 21, 0, 7, 4, 16, 2, 3,
+ 8, 14, 5, 15, 2, 41, 76, 78, 60, 64,
+ 78, 62, 32, 66, 56, 18, 36, 28, 8, 2,
+ 9, 26, 14, 18, 20, 12, 2, 20, 8, 3,
+ 4, 3, 11, 1, 51, 110, 114, 106, 114, 108,
+ 102, 102, 102, 98, 80, 78, 72, 52, 44, 6,
+ 76, 64, 28, 80, 60, 62, 52, 42, 36, 38,
+ 20, 0, 2, 3, 37, 33, 71, 7, 56, 38,
+ 26, 8, 10, 1, 11, 17, 35, 4, 84, 56,
+ 44, 34, 32, 8, 7, 13, 23, 31, 48, 28,
+ 16, 6, 6, 7, 17, 29, 55, 43, 2, 7,
+ 5, 11, 17, 37, 43, 65, 11, 72, 40, 26,
+ 10, 12, 9, 19, 25, 37, 124, 45, 27, 13,
+ 5, 11, 2, 12, 14, 11, 10, 20, 14, 27,
+ 15, 15, 18, 18, 24, 18, 38, 44, 36, 12,
+ 24, 32, 28, 17, 1, 19, 55, 43, 31, 33,
+ 25, 27, 23, 21, 13, 19, 17, 11, 9, 21,
+ 35, 55, 33, 67, 25, 9, 13, 0, 11, 3,
+ 10, 3, 5, 5, 4, 5, 11, 37, 84, 76,
+ 80, 74, 62, 78, 70, 64, 56, 54, 56, 46,
+ 10, 10, 3, 42, 40, 30, 8, 22, 16, 1,
+ 7, 13, 21, 27, 53, 29, 65, 120, 118, 110,
+ 108, 98, 90, 84, 72, 70, 62, 56, 42, 30,
+ 8, 15, 72, 72, 20, 76, 82, 60, 48, 52,
+ 44, 36, 42, 30, 22, 6, 2, 11, 45, 20,
+ 0, 25, 75, 53, 61, 19, 25, 21, 40, 3,
+ 3, 9, 16, 66, 40, 78, 2, 104, 98, 72,
+ 50, 28, 20, 33, 51, 77, 15, 84, 56, 38,
+ 18, 22, 4, 3, 9, 37, 55, 39, 21, 27,
+ 9, 2, 11, 15, 2, 8, 10, 6, 18, 22,
+ 4, 102, 98, 84, 70, 60, 48, 12, 5, 35,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 18 */
+
+ 112, 14, 27, 112, 14, 27, 63, 29, 34, 22,
+ 10, 10, 72, 80, 106, 28, 25, 2, 0, 22,
+ 0, 2, 9, 11, 41, 20, 62, 65, 89, 101,
+ 23, 35, 9, 0, 22, 0, 31, 23, 18, 16,
+ 3, 21, 39, 4, 21, 33, 53, 10, 17, 37,
+ 8, 19, 27, 49, 8, 9, 11, 21, 1, 8,
+ 44, 0, 0, 0, 17, 53, 67, 1, 16, 3,
+ 104, 14, 11, 87, 47, 37, 5, 39, 65, 57,
+ 73, 21, 65, 53, 59, 79, 75, 81, 77, 28,
+ 3, 7, 51, 5, 41, 29, 59, 17, 53, 39,
+ 81, 24, 25, 9, 73, 43, 21, 19, 3, 10,
+ 0, 13, 18, 12, 13, 4, 0, 3, 4, 23,
+ 24, 8, 34, 38, 26, 34, 34, 28, 24, 22,
+ 12, 26, 24, 27, 59, 39, 45, 25, 33, 25,
+ 5, 21, 23, 17, 17, 15, 17, 9, 31, 59,
+ 39, 63, 13, 21, 2, 7, 2, 18, 2, 3,
+ 6, 16, 7, 17, 2, 43, 72, 76, 58, 62,
+ 76, 60, 30, 64, 56, 16, 34, 28, 8, 2,
+ 9, 24, 12, 16, 18, 10, 0, 18, 6, 7,
+ 2, 5, 13, 3, 51, 106, 112, 102, 110, 104,
+ 98, 98, 98, 92, 76, 74, 66, 46, 40, 0,
+ 72, 60, 24, 76, 56, 58, 48, 38, 32, 32,
+ 16, 3, 0, 7, 39, 35, 71, 7, 56, 38,
+ 26, 8, 10, 1, 11, 17, 33, 4, 84, 56,
+ 44, 34, 34, 8, 5, 13, 21, 29, 50, 30,
+ 16, 6, 8, 7, 17, 27, 53, 41, 4, 5,
+ 5, 11, 15, 35, 41, 63, 11, 72, 40, 26,
+ 10, 14, 7, 17, 23, 35, 124, 45, 27, 13,
+ 5, 11, 2, 12, 14, 11, 10, 20, 16, 27,
+ 15, 15, 16, 18, 22, 16, 36, 42, 36, 10,
+ 22, 32, 28, 19, 3, 21, 53, 41, 29, 33,
+ 25, 27, 21, 19, 13, 19, 17, 11, 11, 21,
+ 35, 55, 31, 67, 25, 9, 13, 2, 11, 3,
+ 12, 3, 5, 5, 6, 7, 11, 39, 82, 76,
+ 80, 72, 60, 76, 68, 62, 54, 52, 54, 44,
+ 8, 8, 5, 38, 38, 26, 4, 20, 14, 3,
+ 9, 13, 21, 27, 53, 31, 63, 116, 114, 106,
+ 104, 92, 86, 80, 66, 66, 58, 52, 38, 26,
+ 6, 17, 68, 68, 18, 72, 78, 56, 44, 48,
+ 40, 32, 38, 26, 18, 2, 1, 15, 47, 16,
+ 3, 29, 73, 51, 59, 17, 23, 19, 44, 1,
+ 1, 9, 18, 68, 42, 80, 4, 102, 94, 66,
+ 44, 22, 14, 39, 55, 79, 15, 84, 56, 38,
+ 18, 22, 4, 3, 9, 35, 53, 37, 19, 25,
+ 7, 4, 9, 13, 4, 10, 12, 6, 20, 22,
+ 4, 100, 94, 80, 66, 56, 44, 8, 9, 37,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 19 */
+
+ 110, 14, 27, 110, 14, 27, 59, 25, 36, 22,
+ 8, 6, 68, 78, 106, 28, 21, 2, 2, 24,
+ 0, 0, 9, 13, 43, 20, 58, 67, 91, 101,
+ 17, 33, 9, 2, 24, 0, 29, 21, 20, 16,
+ 3, 19, 37, 2, 23, 33, 53, 10, 17, 37,
+ 8, 19, 27, 49, 10, 9, 11, 21, 0, 8,
+ 44, 0, 0, 0, 17, 53, 67, 0, 14, 3,
+ 104, 14, 11, 85, 45, 37, 3, 37, 61, 53,
+ 69, 19, 61, 51, 55, 77, 73, 79, 75, 30,
+ 3, 5, 49, 5, 39, 29, 55, 17, 51, 37,
+ 79, 24, 25, 7, 69, 41, 19, 19, 3, 10,
+ 0, 11, 20, 14, 13, 4, 0, 3, 4, 23,
+ 24, 6, 34, 38, 24, 34, 34, 28, 20, 22,
+ 10, 22, 22, 29, 57, 37, 43, 23, 31, 25,
+ 3, 21, 21, 15, 15, 15, 17, 9, 33, 57,
+ 37, 63, 13, 21, 4, 7, 0, 20, 2, 3,
+ 4, 18, 7, 19, 2, 45, 70, 74, 58, 60,
+ 74, 58, 30, 62, 54, 14, 32, 28, 8, 2,
+ 11, 22, 10, 14, 16, 8, 1, 16, 6, 11,
+ 0, 5, 15, 5, 51, 104, 108, 100, 106, 100,
+ 94, 94, 94, 88, 70, 70, 62, 42, 34, 3,
+ 68, 56, 22, 72, 52, 52, 42, 34, 26, 28,
+ 12, 9, 1, 11, 41, 37, 71, 7, 58, 38,
+ 26, 8, 10, 1, 11, 17, 33, 4, 84, 56,
+ 44, 34, 36, 10, 5, 11, 21, 27, 52, 30,
+ 18, 6, 10, 5, 15, 27, 51, 39, 6, 3,
+ 3, 11, 13, 33, 39, 61, 9, 72, 40, 26,
+ 10, 16, 7, 15, 21, 33, 124, 43, 25, 11,
+ 5, 11, 2, 12, 14, 11, 10, 20, 16, 27,
+ 15, 15, 16, 20, 20, 14, 34, 40, 36, 8,
+ 20, 32, 28, 19, 5, 21, 51, 39, 27, 31,
+ 23, 25, 21, 17, 13, 19, 17, 9, 11, 21,
+ 35, 53, 29, 67, 25, 9, 13, 4, 11, 3,
+ 14, 3, 5, 5, 8, 9, 11, 41, 80, 74,
+ 78, 70, 58, 74, 66, 60, 52, 50, 52, 42,
+ 6, 6, 7, 36, 36, 24, 0, 16, 12, 5,
+ 9, 15, 21, 27, 53, 31, 61, 112, 110, 102,
+ 100, 86, 82, 76, 62, 62, 54, 48, 32, 22,
+ 2, 19, 64, 66, 14, 68, 74, 52, 40, 44,
+ 36, 28, 34, 22, 12, 1, 5, 19, 49, 12,
+ 7, 31, 71, 49, 57, 15, 21, 17, 46, 0,
+ 0, 9, 20, 72, 46, 84, 6, 98, 92, 62,
+ 38, 16, 8, 45, 59, 81, 15, 84, 56, 38,
+ 18, 22, 4, 3, 9, 35, 51, 35, 17, 23,
+ 5, 4, 9, 11, 4, 10, 12, 6, 20, 22,
+ 4, 98, 92, 76, 62, 52, 40, 4, 13, 39,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 20 */
+
+ 106, 14, 27, 106, 14, 27, 57, 23, 36, 22,
+ 8, 4, 66, 74, 106, 28, 15, 0, 4, 24,
+ 0, 1, 9, 15, 45, 18, 54, 71, 93, 103,
+ 11, 31, 9, 4, 24, 0, 29, 19, 20, 14,
+ 3, 19, 37, 2, 23, 31, 53, 10, 17, 35,
+ 6, 21, 27, 49, 10, 7, 11, 19, 0, 8,
+ 44, 0, 0, 0, 15, 55, 67, 0, 12, 3,
+ 104, 14, 11, 83, 43, 37, 3, 33, 59, 49,
+ 67, 17, 59, 47, 51, 75, 73, 77, 75, 30,
+ 3, 3, 47, 5, 39, 27, 51, 17, 51, 37,
+ 77, 24, 23, 7, 67, 41, 19, 19, 1, 10,
+ 0, 11, 20, 14, 15, 2, 0, 3, 4, 23,
+ 24, 6, 32, 38, 24, 34, 34, 26, 16, 22,
+ 10, 18, 20, 31, 55, 35, 41, 21, 31, 23,
+ 1, 19, 21, 13, 13, 15, 17, 7, 35, 57,
+ 35, 63, 13, 21, 4, 7, 1, 22, 2, 3,
+ 0, 20, 9, 21, 2, 47, 66, 72, 56, 58,
+ 72, 56, 28, 60, 54, 10, 30, 26, 8, 2,
+ 11, 20, 10, 12, 16, 6, 3, 14, 4, 15,
+ 1, 7, 17, 7, 51, 100, 106, 96, 102, 96,
+ 90, 88, 90, 82, 66, 66, 56, 36, 30, 9,
+ 64, 54, 18, 66, 48, 48, 38, 28, 22, 22,
+ 8, 13, 3, 13, 45, 39, 71, 7, 58, 40,
+ 26, 8, 10, 1, 11, 17, 31, 6, 86, 56,
+ 44, 34, 38, 10, 3, 11, 19, 27, 54, 32,
+ 18, 6, 10, 5, 15, 25, 49, 39, 8, 3,
+ 3, 11, 11, 31, 39, 59, 9, 74, 40, 26,
+ 10, 18, 5, 15, 19, 31, 124, 43, 25, 11,
+ 5, 11, 2, 12, 14, 9, 10, 20, 18, 27,
+ 15, 15, 14, 20, 18, 12, 32, 38, 36, 4,
+ 18, 30, 28, 21, 7, 23, 51, 39, 25, 31,
+ 23, 25, 19, 15, 15, 19, 17, 9, 13, 21,
+ 35, 53, 27, 67, 25, 7, 13, 4, 11, 3,
+ 16, 3, 7, 5, 10, 9, 13, 43, 78, 74,
+ 78, 68, 56, 72, 64, 58, 50, 48, 50, 40,
+ 4, 4, 9, 32, 32, 20, 3, 14, 10, 7,
+ 11, 15, 21, 27, 53, 33, 61, 106, 104, 98,
+ 94, 80, 78, 72, 56, 58, 50, 44, 28, 18,
+ 0, 21, 60, 62, 12, 64, 70, 48, 36, 38,
+ 32, 24, 30, 18, 8, 5, 9, 23, 51, 8,
+ 11, 35, 69, 47, 55, 15, 19, 15, 50, 2,
+ 2, 9, 22, 74, 48, 86, 6, 96, 88, 56,
+ 32, 10, 2, 51, 63, 83, 15, 84, 56, 38,
+ 18, 22, 4, 3, 9, 33, 49, 33, 15, 23,
+ 3, 6, 7, 9, 6, 12, 14, 8, 22, 24,
+ 4, 94, 88, 72, 58, 48, 36, 0, 17, 41,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 21 */
+
+ 104, 14, 27, 104, 14, 27, 53, 19, 36, 22,
+ 6, 0, 62, 72, 106, 28, 11, 0, 6, 26,
+ 0, 1, 7, 17, 47, 18, 50, 75, 95, 103,
+ 5, 27, 9, 6, 26, 0, 29, 17, 20, 14,
+ 3, 19, 35, 2, 25, 31, 53, 10, 17, 33,
+ 6, 21, 27, 49, 12, 7, 9, 19, 2, 8,
+ 44, 0, 0, 0, 15, 55, 67, 2, 10, 3,
+ 104, 14, 11, 81, 41, 37, 1, 31, 55, 45,
+ 63, 15, 55, 45, 47, 73, 71, 75, 73, 32,
+ 3, 1, 45, 5, 37, 25, 47, 17, 51, 35,
+ 73, 26, 23, 5, 65, 39, 17, 19, 1, 12,
+ 2, 9, 22, 16, 15, 2, 0, 3, 4, 23,
+ 24, 4, 30, 38, 22, 34, 34, 26, 12, 22,
+ 8, 16, 18, 33, 53, 33, 39, 19, 29, 21,
+ 0, 17, 19, 11, 11, 15, 17, 7, 37, 55,
+ 33, 63, 11, 21, 6, 7, 3, 24, 2, 3,
+ 1, 22, 9, 23, 2, 47, 64, 70, 56, 56,
+ 70, 54, 26, 60, 54, 8, 28, 26, 8, 2,
+ 13, 20, 8, 10, 14, 4, 5, 14, 2, 19,
+ 3, 9, 19, 9, 51, 96, 104, 92, 98, 94,
+ 86, 84, 86, 78, 62, 62, 52, 32, 26, 13,
+ 60, 50, 14, 62, 44, 44, 34, 24, 18, 16,
+ 4, 19, 5, 17, 47, 39, 71, 7, 58, 40,
+ 26, 8, 10, 1, 11, 17, 31, 6, 86, 56,
+ 44, 34, 40, 12, 1, 11, 17, 25, 56, 34,
+ 18, 8, 12, 3, 13, 25, 47, 37, 12, 1,
+ 3, 9, 9, 29, 37, 57, 7, 74, 42, 26,
+ 10, 20, 3, 13, 17, 29, 124, 43, 25, 9,
+ 5, 11, 2, 12, 14, 9, 10, 22, 18, 27,
+ 15, 15, 12, 22, 16, 10, 30, 38, 36, 2,
+ 16, 30, 28, 23, 9, 23, 49, 37, 23, 29,
+ 21, 23, 19, 13, 15, 19, 17, 9, 15, 21,
+ 35, 51, 25, 67, 25, 7, 13, 6, 11, 3,
+ 18, 3, 7, 5, 12, 11, 13, 43, 76, 74,
+ 76, 68, 54, 70, 62, 56, 50, 46, 48, 38,
+ 2, 2, 11, 30, 30, 18, 7, 12, 8, 9,
+ 13, 17, 21, 25, 53, 33, 59, 102, 100, 94,
+ 90, 76, 74, 68, 52, 54, 46, 40, 24, 14,
+ 3, 23, 56, 58, 10, 60, 66, 44, 32, 34,
+ 28, 20, 26, 14, 2, 9, 13, 27, 53, 4,
+ 15, 37, 67, 43, 53, 13, 17, 13, 54, 4,
+ 4, 9, 24, 78, 52, 90, 8, 92, 84, 52,
+ 26, 6, 3, 55, 67, 85, 15, 84, 56, 38,
+ 18, 22, 4, 3, 9, 33, 47, 31, 13, 21,
+ 1, 8, 7, 7, 8, 14, 16, 8, 22, 24,
+ 4, 92, 86, 70, 56, 44, 32, 3, 21, 43,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 22 */
+
+ 102, 14, 29, 102, 14, 29, 49, 17, 38, 22,
+ 6, 1, 60, 70, 106, 28, 7, 0, 6, 28,
+ 0, 3, 7, 19, 49, 16, 48, 77, 97, 105,
+ 0, 25, 9, 6, 28, 0, 27, 15, 22, 12,
+ 1, 17, 33, 0, 25, 31, 53, 10, 15, 33,
+ 6, 21, 25, 49, 12, 7, 9, 19, 2, 8,
+ 44, 0, 0, 0, 13, 55, 67, 2, 10, 5,
+ 104, 14, 11, 79, 41, 37, 1, 29, 51, 41,
+ 61, 13, 51, 43, 43, 71, 69, 73, 71, 34,
+ 3, 0, 45, 3, 37, 25, 43, 17, 49, 35,
+ 71, 26, 23, 5, 61, 39, 15, 17, 1, 12,
+ 2, 7, 22, 16, 17, 2, 0, 3, 4, 23,
+ 24, 4, 30, 38, 20, 34, 34, 26, 8, 22,
+ 8, 12, 16, 35, 53, 31, 39, 19, 27, 21,
+ 2, 17, 19, 11, 7, 15, 17, 7, 39, 55,
+ 31, 63, 11, 21, 8, 5, 5, 24, 2, 3,
+ 3, 24, 11, 25, 2, 49, 60, 68, 54, 56,
+ 68, 54, 26, 58, 52, 6, 26, 26, 8, 2,
+ 13, 18, 6, 8, 12, 4, 7, 12, 2, 23,
+ 5, 9, 21, 11, 51, 94, 100, 90, 94, 90,
+ 82, 80, 82, 72, 56, 58, 46, 26, 20, 19,
+ 56, 46, 12, 58, 38, 38, 28, 20, 12, 12,
+ 1, 23, 7, 21, 49, 41, 71, 5, 60, 40,
+ 28, 8, 10, 1, 11, 17, 29, 6, 86, 58,
+ 44, 34, 42, 12, 1, 9, 17, 23, 58, 34,
+ 20, 8, 14, 3, 13, 23, 45, 35, 14, 0,
+ 1, 9, 9, 29, 35, 53, 7, 74, 42, 26,
+ 12, 20, 3, 11, 17, 27, 124, 41, 23, 9,
+ 5, 11, 2, 12, 14, 9, 12, 22, 20, 27,
+ 15, 17, 12, 22, 14, 8, 28, 36, 36, 0,
+ 14, 30, 28, 23, 11, 25, 47, 35, 21, 29,
+ 21, 23, 17, 13, 15, 17, 15, 7, 15, 21,
+ 35, 51, 23, 67, 25, 7, 13, 8, 11, 3,
+ 20, 5, 7, 5, 12, 13, 13, 45, 74, 72,
+ 76, 66, 52, 70, 62, 54, 48, 46, 46, 36,
+ 0, 0, 13, 26, 28, 14, 11, 8, 4, 11,
+ 13, 17, 21, 25, 51, 35, 57, 98, 96, 90,
+ 86, 70, 70, 64, 46, 50, 40, 36, 18, 10,
+ 5, 23, 54, 56, 6, 56, 60, 40, 28, 30,
+ 24, 16, 20, 10, 1, 13, 17, 29, 57, 0,
+ 19, 41, 65, 41, 51, 11, 15, 11, 56, 6,
+ 6, 9, 26, 80, 54, 92, 10, 90, 82, 46,
+ 20, 0, 7, 61, 71, 89, 13, 84, 56, 38,
+ 18, 22, 4, 3, 9, 31, 45, 31, 11, 19,
+ 0, 8, 5, 5, 8, 14, 16, 8, 24, 24,
+ 4, 90, 82, 66, 52, 42, 30, 9, 23, 45,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 23 */
+
+ 100, 14, 29, 100, 14, 29, 45, 13, 38, 22,
+ 4, 5, 56, 66, 106, 28, 1, 1, 8, 28,
+ 0, 5, 7, 21, 51, 16, 44, 81, 99, 105,
+ 6, 23, 9, 8, 28, 0, 27, 13, 22, 12,
+ 1, 17, 33, 0, 27, 29, 53, 10, 15, 31,
+ 4, 21, 25, 49, 14, 5, 9, 17, 4, 8,
+ 44, 0, 0, 0, 13, 55, 67, 4, 8, 5,
+ 104, 14, 11, 77, 39, 37, 0, 25, 49, 37,
+ 57, 11, 49, 39, 39, 69, 67, 71, 71, 36,
+ 3, 2, 43, 3, 35, 23, 39, 17, 49, 33,
+ 69, 26, 21, 3, 59, 37, 15, 17, 0, 12,
+ 2, 5, 24, 18, 17, 0, 0, 3, 4, 23,
+ 24, 2, 28, 38, 20, 34, 34, 24, 4, 22,
+ 6, 8, 14, 37, 51, 29, 37, 17, 25, 19,
+ 4, 15, 17, 9, 5, 15, 17, 5, 41, 53,
+ 29, 63, 11, 21, 8, 5, 7, 26, 2, 3,
+ 7, 26, 11, 27, 2, 51, 58, 66, 54, 54,
+ 66, 52, 24, 56, 52, 4, 24, 26, 8, 2,
+ 15, 16, 6, 6, 12, 2, 9, 10, 0, 27,
+ 7, 11, 23, 13, 51, 90, 98, 86, 90, 86,
+ 78, 74, 78, 68, 52, 54, 42, 22, 16, 23,
+ 52, 44, 8, 52, 34, 34, 24, 14, 8, 6,
+ 5, 29, 9, 23, 53, 43, 71, 5, 60, 42,
+ 28, 8, 10, 1, 11, 17, 29, 8, 88, 58,
+ 44, 34, 44, 14, 0, 9, 15, 21, 60, 36,
+ 20, 8, 16, 1, 11, 23, 43, 35, 16, 0,
+ 1, 9, 7, 27, 35, 51, 5, 76, 42, 26,
+ 12, 22, 1, 9, 15, 25, 124, 41, 23, 7,
+ 5, 11, 2, 12, 14, 7, 12, 22, 20, 27,
+ 15, 17, 10, 24, 12, 6, 26, 34, 36, 1,
+ 12, 28, 28, 25, 13, 25, 45, 35, 19, 27,
+ 19, 21, 17, 11, 15, 17, 15, 7, 17, 21,
+ 35, 49, 21, 67, 25, 5, 13, 8, 11, 3,
+ 22, 5, 7, 5, 14, 13, 15, 47, 72, 72,
+ 74, 64, 50, 68, 60, 52, 46, 44, 44, 34,
+ 1, 1, 15, 24, 24, 12, 15, 6, 2, 13,
+ 15, 19, 21, 25, 51, 35, 55, 94, 92, 86,
+ 80, 64, 66, 60, 42, 46, 36, 32, 14, 6,
+ 9, 25, 50, 52, 4, 52, 56, 36, 24, 26,
+ 20, 12, 16, 6, 7, 17, 21, 33, 59, 3,
+ 23, 43, 63, 39, 49, 11, 13, 9, 60, 8,
+ 8, 9, 28, 84, 58, 96, 10, 86, 78, 42,
+ 14, 5, 13, 67, 75, 91, 13, 84, 56, 38,
+ 18, 22, 4, 3, 9, 31, 43, 29, 9, 17,
+ 2, 10, 5, 3, 10, 16, 18, 10, 24, 26,
+ 4, 86, 80, 62, 48, 38, 26, 13, 27, 47,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 24 */
+
+ 96, 12, 29, 96, 12, 29, 43, 11, 38, 22,
+ 4, 7, 54, 64, 106, 28, 2, 1, 10, 30,
+ 0, 7, 7, 23, 55, 14, 40, 85, 101, 107,
+ 10, 21, 9, 10, 30, 0, 27, 11, 22, 10,
+ 1, 17, 31, 1, 27, 29, 55, 10, 15, 31,
+ 4, 23, 25, 49, 14, 5, 9, 17, 4, 8,
+ 44, 0, 0, 0, 11, 57, 67, 4, 6, 5,
+ 104, 14, 11, 75, 37, 37, 0, 23, 45, 33,
+ 55, 9, 45, 37, 37, 67, 67, 69, 69, 36,
+ 3, 4, 41, 3, 35, 23, 37, 17, 49, 33,
+ 67, 26, 21, 3, 57, 37, 13, 17, 0, 12,
+ 2, 5, 24, 18, 19, 0, 0, 3, 4, 23,
+ 24, 2, 26, 38, 18, 34, 34, 24, 0, 22,
+ 6, 4, 12, 39, 49, 27, 35, 15, 25, 19,
+ 6, 15, 17, 7, 3, 15, 17, 5, 45, 53,
+ 27, 63, 11, 23, 10, 5, 9, 28, 2, 3,
+ 9, 28, 13, 29, 2, 53, 54, 64, 52, 52,
+ 64, 50, 22, 54, 50, 0, 22, 24, 8, 2,
+ 15, 14, 4, 4, 10, 0, 11, 8, 1, 31,
+ 11, 13, 25, 15, 51, 86, 94, 82, 86, 82,
+ 74, 70, 74, 62, 46, 50, 36, 16, 10, 29,
+ 48, 40, 4, 48, 30, 28, 18, 10, 2, 0,
+ 9, 33, 11, 27, 55, 45, 73, 5, 60, 42,
+ 28, 8, 10, 1, 11, 17, 27, 8, 88, 58,
+ 44, 34, 46, 14, 0, 9, 15, 21, 62, 36,
+ 20, 8, 16, 1, 11, 21, 41, 33, 18, 2,
+ 1, 9, 5, 25, 33, 49, 5, 76, 42, 26,
+ 12, 24, 1, 9, 13, 25, 124, 41, 23, 7,
+ 5, 11, 2, 12, 14, 7, 12, 22, 22, 27,
+ 15, 17, 8, 24, 10, 4, 24, 32, 36, 5,
+ 8, 28, 28, 27, 15, 27, 45, 33, 17, 27,
+ 19, 21, 15, 9, 17, 17, 15, 7, 19, 21,
+ 35, 49, 21, 69, 25, 5, 13, 10, 11, 3,
+ 22, 5, 9, 5, 16, 15, 15, 49, 70, 70,
+ 74, 62, 46, 66, 58, 50, 44, 42, 42, 32,
+ 3, 3, 17, 20, 22, 8, 19, 2, 0, 15,
+ 17, 19, 21, 25, 51, 37, 55, 88, 86, 82,
+ 76, 58, 60, 54, 36, 42, 32, 28, 8, 2,
+ 11, 27, 46, 48, 0, 48, 52, 30, 18, 20,
+ 14, 8, 12, 0, 11, 21, 27, 37, 61, 7,
+ 27, 47, 61, 37, 47, 9, 11, 9, 62, 10,
+ 8, 9, 28, 86, 60, 98, 12, 84, 74, 36,
+ 8, 11, 19, 73, 79, 93, 13, 84, 56, 38,
+ 18, 22, 4, 3, 9, 29, 43, 27, 7, 17,
+ 4, 10, 3, 3, 10, 16, 18, 10, 26, 26,
+ 2, 84, 76, 58, 44, 34, 22, 17, 31, 49,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 25 */
+
+ 94, 12, 29, 94, 12, 29, 39, 9, 40, 22,
+ 4, 9, 52, 62, 106, 28, 6, 1, 12, 32,
+ 0, 7, 5, 25, 57, 12, 36, 87, 103, 109,
+ 16, 17, 9, 12, 32, 0, 25, 9, 24, 8,
+ 1, 15, 29, 1, 27, 29, 55, 10, 15, 29,
+ 4, 23, 25, 49, 14, 5, 7, 17, 4, 8,
+ 44, 0, 0, 0, 9, 57, 67, 4, 4, 5,
+ 104, 14, 11, 73, 35, 37, 0, 21, 41, 29,
+ 51, 5, 41, 35, 33, 65, 65, 67, 67, 38,
+ 1, 6, 39, 3, 33, 21, 33, 17, 47, 33,
+ 63, 28, 21, 3, 53, 37, 11, 17, 0, 14,
+ 4, 3, 24, 18, 19, 0, 2, 3, 4, 21,
+ 24, 2, 26, 38, 16, 34, 34, 24, 3, 22,
+ 6, 2, 10, 39, 47, 25, 33, 13, 23, 17,
+ 8, 13, 17, 5, 1, 15, 17, 5, 47, 51,
+ 25, 63, 9, 23, 12, 5, 11, 30, 2, 3,
+ 11, 30, 15, 31, 2, 53, 52, 62, 52, 50,
+ 62, 48, 22, 54, 50, 1, 20, 24, 8, 2,
+ 15, 14, 2, 2, 8, 1, 13, 8, 1, 33,
+ 13, 13, 27, 17, 51, 84, 92, 80, 84, 80,
+ 70, 66, 70, 58, 42, 46, 32, 10, 6, 35,
+ 46, 36, 2, 44, 26, 24, 14, 6, 1, 3,
+ 13, 37, 11, 31, 57, 45, 73, 5, 62, 42,
+ 28, 10, 10, 1, 9, 17, 25, 8, 88, 58,
+ 46, 34, 48, 16, 2, 7, 13, 19, 64, 38,
+ 22, 10, 18, 0, 11, 19, 39, 31, 22, 4,
+ 0, 7, 3, 23, 31, 47, 3, 76, 44, 28,
+ 12, 26, 0, 7, 11, 23, 124, 39, 21, 5,
+ 5, 11, 2, 14, 16, 7, 12, 24, 24, 27,
+ 15, 17, 8, 24, 8, 4, 22, 32, 36, 7,
+ 6, 28, 28, 27, 17, 29, 43, 31, 15, 27,
+ 17, 21, 13, 7, 17, 17, 15, 5, 19, 21,
+ 35, 49, 19, 69, 25, 5, 13, 12, 11, 3,
+ 24, 5, 9, 5, 18, 17, 15, 49, 68, 70,
+ 74, 62, 44, 64, 56, 48, 44, 40, 40, 32,
+ 3, 5, 19, 18, 20, 4, 23, 0, 1, 15,
+ 17, 19, 19, 23, 51, 39, 53, 84, 82, 78,
+ 72, 54, 56, 50, 32, 38, 28, 24, 4, 1,
+ 13, 29, 42, 46, 1, 44, 48, 26, 14, 16,
+ 10, 4, 8, 3, 15, 23, 31, 41, 63, 11,
+ 31, 51, 59, 33, 43, 7, 9, 7, 66, 12,
+ 10, 9, 30, 90, 62, 102, 14, 82, 72, 32,
+ 2, 15, 25, 77, 83, 95, 13, 84, 56, 38,
+ 18, 24, 4, 3, 9, 27, 41, 25, 5, 15,
+ 8, 12, 1, 1, 12, 18, 20, 10, 28, 26,
+ 2, 82, 74, 56, 42, 30, 18, 21, 35, 49,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 26 */
+
+ 92, 12, 29, 92, 12, 29, 35, 5, 40, 22,
+ 2, 13, 48, 58, 106, 28, 12, 3, 14, 32,
+ 0, 9, 5, 27, 59, 12, 32, 91, 105, 109,
+ 22, 15, 9, 14, 32, 0, 25, 7, 24, 8,
+ 1, 15, 29, 1, 29, 27, 55, 10, 15, 27,
+ 2, 23, 25, 49, 16, 3, 7, 15, 6, 8,
+ 44, 0, 0, 0, 9, 57, 67, 6, 2, 5,
+ 104, 14, 11, 71, 33, 37, 2, 17, 39, 25,
+ 49, 3, 39, 31, 29, 63, 63, 65, 67, 40,
+ 1, 8, 37, 3, 33, 19, 29, 17, 47, 31,
+ 61, 28, 19, 1, 51, 35, 11, 17, 2, 14,
+ 4, 1, 26, 20, 21, 1, 2, 3, 4, 21,
+ 24, 0, 24, 38, 16, 34, 34, 22, 7, 22,
+ 4, 1, 8, 41, 45, 23, 31, 11, 21, 15,
+ 10, 11, 15, 3, 0, 15, 17, 3, 49, 51,
+ 23, 63, 9, 23, 12, 5, 13, 32, 2, 3,
+ 15, 32, 15, 33, 2, 55, 48, 60, 50, 48,
+ 60, 46, 20, 52, 50, 3, 18, 24, 8, 2,
+ 17, 12, 2, 0, 8, 3, 15, 6, 3, 37,
+ 15, 15, 29, 19, 51, 80, 90, 76, 80, 76,
+ 66, 60, 66, 52, 38, 42, 26, 6, 2, 39,
+ 42, 34, 1, 38, 22, 20, 10, 0, 5, 9,
+ 17, 43, 13, 33, 61, 47, 73, 5, 62, 44,
+ 28, 10, 10, 1, 9, 17, 25, 10, 90, 58,
+ 46, 34, 50, 16, 4, 7, 11, 17, 66, 40,
+ 22, 10, 20, 0, 9, 19, 37, 31, 24, 4,
+ 0, 7, 1, 21, 31, 45, 3, 78, 44, 28,
+ 12, 28, 2, 5, 9, 21, 124, 39, 21, 5,
+ 5, 11, 2, 14, 16, 5, 12, 24, 24, 27,
+ 15, 17, 6, 26, 6, 2, 20, 30, 36, 9,
+ 4, 26, 28, 29, 19, 29, 41, 31, 13, 25,
+ 17, 19, 13, 5, 17, 17, 15, 5, 21, 21,
+ 35, 47, 17, 69, 25, 3, 13, 12, 11, 3,
+ 26, 5, 9, 5, 20, 17, 17, 51, 66, 70,
+ 72, 60, 42, 62, 54, 46, 42, 38, 38, 30,
+ 5, 7, 21, 14, 16, 2, 27, 1, 3, 17,
+ 19, 21, 19, 23, 51, 39, 51, 80, 78, 74,
+ 66, 48, 52, 46, 26, 34, 24, 20, 0, 5,
+ 17, 31, 38, 42, 3, 40, 44, 22, 10, 12,
+ 6, 0, 4, 7, 21, 27, 35, 45, 65, 15,
+ 35, 53, 57, 31, 41, 7, 7, 5, 70, 14,
+ 12, 9, 32, 92, 66, 104, 14, 78, 68, 26,
+ 3, 21, 31, 83, 87, 97, 13, 84, 56, 38,
+ 18, 24, 4, 3, 9, 27, 39, 23, 3, 13,
+ 10, 14, 1, 0, 14, 20, 22, 12, 28, 28,
+ 2, 78, 70, 52, 38, 26, 14, 25, 39, 51,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 27 */
+
+ 90, 12, 31, 90, 12, 31, 31, 3, 42, 22,
+ 2, 15, 46, 56, 106, 28, 16, 3, 14, 34,
+ 0, 11, 5, 29, 61, 10, 30, 93, 107, 111,
+ 28, 13, 9, 14, 34, 0, 23, 5, 26, 6,
+ 0, 13, 27, 3, 29, 27, 55, 10, 13, 27,
+ 2, 23, 23, 49, 16, 3, 7, 15, 6, 8,
+ 44, 0, 0, 0, 7, 57, 67, 6, 2, 7,
+ 104, 14, 11, 69, 33, 37, 2, 15, 35, 21,
+ 45, 1, 35, 29, 25, 61, 61, 63, 65, 42,
+ 1, 10, 37, 1, 31, 19, 25, 17, 45, 31,
+ 59, 28, 19, 1, 47, 35, 9, 15, 2, 14,
+ 4, 0, 26, 20, 21, 1, 2, 3, 4, 21,
+ 24, 0, 24, 38, 14, 34, 34, 22, 11, 22,
+ 4, 5, 6, 43, 45, 21, 31, 11, 19, 15,
+ 12, 11, 15, 3, 4, 15, 17, 3, 51, 49,
+ 21, 63, 9, 23, 14, 3, 15, 32, 2, 3,
+ 17, 34, 17, 35, 2, 57, 46, 58, 50, 48,
+ 58, 46, 20, 50, 48, 5, 16, 24, 8, 2,
+ 17, 10, 0, 1, 6, 3, 17, 4, 3, 41,
+ 17, 15, 31, 21, 51, 78, 86, 74, 76, 72,
+ 62, 56, 62, 48, 32, 38, 22, 0, 3, 45,
+ 38, 30, 3, 34, 16, 14, 4, 3, 11, 13,
+ 23, 47, 15, 37, 63, 49, 73, 3, 64, 44,
+ 30, 10, 10, 1, 9, 17, 23, 10, 90, 60,
+ 46, 34, 52, 18, 4, 5, 11, 15, 68, 40,
+ 24, 10, 22, 2, 9, 17, 35, 29, 26, 6,
+ 2, 7, 1, 21, 29, 41, 1, 78, 44, 28,
+ 14, 28, 2, 3, 9, 19, 124, 37, 19, 3,
+ 5, 11, 2, 14, 16, 5, 14, 24, 26, 27,
+ 15, 19, 6, 26, 4, 0, 18, 28, 36, 11,
+ 2, 26, 28, 29, 21, 31, 39, 29, 11, 25,
+ 15, 19, 11, 5, 17, 15, 13, 3, 21, 21,
+ 35, 47, 15, 69, 25, 3, 13, 14, 11, 3,
+ 28, 7, 9, 5, 20, 19, 17, 53, 64, 68,
+ 72, 58, 40, 62, 54, 44, 40, 38, 36, 28,
+ 7, 9, 23, 12, 14, 1, 31, 5, 7, 19,
+ 19, 21, 19, 23, 49, 41, 49, 76, 74, 70,
+ 62, 42, 48, 42, 22, 30, 18, 16, 5, 9,
+ 19, 31, 36, 40, 7, 36, 38, 18, 6, 8,
+ 2, 3, 1, 11, 25, 31, 39, 47, 69, 19,
+ 39, 57, 55, 29, 39, 5, 5, 3, 72, 16,
+ 14, 9, 34, 96, 68, 108, 16, 76, 66, 22,
+ 9, 27, 35, 89, 91, 101, 11, 84, 56, 38,
+ 18, 24, 4, 3, 9, 25, 37, 23, 1, 11,
+ 12, 14, 0, 2, 14, 20, 22, 12, 30, 28,
+ 2, 76, 68, 48, 34, 24, 12, 31, 41, 53,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 28 */
+
+ 86, 12, 31, 86, 12, 31, 29, 0, 42, 22,
+ 0, 19, 42, 54, 106, 28, 20, 3, 16, 36,
+ 0, 13, 5, 31, 63, 10, 26, 97, 109, 111,
+ 34, 11, 9, 16, 36, 0, 23, 3, 26, 6,
+ 0, 13, 25, 3, 31, 27, 55, 10, 13, 25,
+ 2, 25, 23, 49, 18, 3, 7, 15, 8, 8,
+ 44, 0, 0, 0, 7, 59, 67, 8, 0, 7,
+ 104, 14, 11, 67, 31, 37, 4, 13, 31, 17,
+ 43, 0, 31, 27, 21, 59, 61, 61, 63, 42,
+ 1, 12, 35, 1, 31, 17, 21, 17, 45, 29,
+ 57, 28, 19, 0, 45, 33, 7, 15, 2, 14,
+ 4, 0, 28, 22, 23, 1, 2, 3, 4, 21,
+ 24, 1, 22, 38, 12, 34, 34, 22, 15, 22,
+ 2, 9, 4, 45, 43, 19, 29, 9, 19, 13,
+ 14, 9, 13, 1, 6, 15, 17, 3, 53, 49,
+ 19, 63, 9, 23, 16, 3, 17, 34, 2, 3,
+ 19, 36, 17, 37, 2, 59, 42, 56, 48, 46,
+ 56, 44, 18, 48, 48, 9, 14, 22, 8, 2,
+ 19, 8, 1, 3, 4, 5, 19, 2, 5, 45,
+ 19, 17, 33, 23, 51, 74, 84, 70, 72, 68,
+ 58, 52, 58, 42, 28, 34, 16, 3, 7, 49,
+ 34, 26, 7, 30, 12, 10, 0, 7, 15, 19,
+ 27, 53, 17, 41, 65, 51, 73, 3, 64, 44,
+ 30, 10, 10, 1, 9, 17, 23, 10, 90, 60,
+ 46, 34, 54, 18, 6, 5, 9, 15, 70, 42,
+ 24, 10, 22, 2, 7, 17, 33, 27, 28, 8,
+ 2, 7, 0, 19, 27, 39, 1, 78, 44, 28,
+ 14, 30, 4, 3, 7, 17, 124, 37, 19, 3,
+ 5, 11, 2, 14, 16, 5, 14, 24, 26, 27,
+ 15, 19, 4, 28, 2, 1, 16, 26, 36, 15,
+ 0, 26, 28, 31, 23, 31, 39, 27, 9, 23,
+ 15, 17, 11, 3, 19, 15, 13, 3, 23, 21,
+ 35, 45, 13, 69, 25, 3, 13, 16, 11, 3,
+ 30, 7, 11, 5, 22, 21, 17, 55, 62, 68,
+ 70, 56, 38, 60, 52, 42, 38, 36, 34, 26,
+ 9, 11, 25, 8, 12, 3, 35, 7, 9, 21,
+ 21, 23, 19, 23, 49, 41, 49, 70, 68, 66,
+ 58, 36, 44, 38, 16, 26, 14, 12, 9, 13,
+ 23, 33, 32, 36, 9, 32, 34, 14, 2, 2,
+ 1, 7, 5, 15, 31, 35, 43, 51, 71, 23,
+ 43, 59, 53, 27, 37, 3, 3, 1, 76, 18,
+ 16, 9, 36, 98, 72, 110, 18, 72, 62, 16,
+ 15, 33, 41, 95, 95, 103, 11, 84, 56, 38,
+ 18, 24, 4, 3, 9, 25, 35, 21, 0, 11,
+ 14, 16, 0, 4, 16, 22, 24, 12, 30, 28,
+ 2, 74, 64, 44, 30, 20, 8, 35, 45, 55,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 29 */
+
+ 84, 12, 31, 84, 12, 31, 25, 2, 42, 22,
+ 0, 21, 40, 50, 106, 28, 26, 5, 18, 36,
+ 0, 13, 3, 33, 65, 8, 22, 101, 111, 113,
+ 40, 7, 9, 18, 36, 0, 23, 1, 26, 4,
+ 0, 13, 25, 3, 31, 25, 55, 10, 13, 23,
+ 0, 25, 23, 49, 18, 1, 5, 13, 8, 8,
+ 44, 0, 0, 0, 5, 59, 67, 8, 1, 7,
+ 104, 14, 11, 65, 29, 37, 4, 9, 29, 13,
+ 39, 2, 29, 23, 17, 57, 59, 59, 63, 44,
+ 1, 14, 33, 1, 29, 15, 17, 17, 45, 29,
+ 53, 30, 17, 0, 43, 33, 7, 15, 4, 16,
+ 6, 2, 28, 22, 23, 3, 2, 3, 4, 21,
+ 24, 1, 20, 38, 12, 34, 34, 20, 19, 22,
+ 2, 11, 2, 47, 41, 17, 27, 7, 17, 11,
+ 16, 7, 13, 0, 8, 15, 17, 1, 55, 47,
+ 17, 63, 7, 23, 16, 3, 19, 36, 2, 3,
+ 23, 38, 19, 39, 2, 59, 40, 54, 48, 44,
+ 54, 42, 16, 48, 48, 11, 12, 22, 8, 2,
+ 19, 8, 1, 5, 4, 7, 21, 2, 7, 49,
+ 21, 19, 35, 25, 51, 70, 82, 66, 68, 66,
+ 54, 46, 54, 38, 24, 30, 12, 9, 11, 55,
+ 30, 24, 11, 24, 8, 6, 3, 13, 19, 25,
+ 31, 57, 19, 43, 69, 51, 73, 3, 64, 46,
+ 30, 10, 10, 1, 9, 17, 21, 12, 92, 60,
+ 46, 34, 56, 20, 8, 5, 7, 13, 72, 44,
+ 24, 12, 24, 4, 7, 15, 31, 27, 32, 8,
+ 2, 5, 2, 17, 27, 37, 0, 80, 46, 28,
+ 14, 32, 6, 1, 5, 15, 124, 37, 19, 1,
+ 5, 11, 2, 14, 16, 3, 14, 26, 28, 27,
+ 15, 19, 2, 28, 0, 3, 14, 26, 36, 17,
+ 1, 24, 28, 33, 25, 33, 37, 27, 7, 23,
+ 13, 17, 9, 1, 19, 15, 13, 3, 25, 21,
+ 35, 45, 11, 69, 25, 1, 13, 16, 11, 3,
+ 32, 7, 11, 5, 24, 21, 19, 55, 60, 68,
+ 70, 56, 36, 58, 50, 40, 38, 34, 32, 24,
+ 11, 13, 27, 6, 8, 7, 39, 9, 11, 23,
+ 23, 23, 19, 21, 49, 43, 47, 66, 64, 62,
+ 52, 32, 40, 34, 12, 22, 10, 8, 13, 17,
+ 25, 35, 28, 32, 11, 28, 30, 10, 1, 1,
+ 5, 11, 9, 19, 35, 39, 47, 55, 73, 27,
+ 47, 63, 51, 23, 35, 3, 1, 0, 80, 20,
+ 18, 9, 38, 102, 74, 114, 18, 70, 58, 12,
+ 21, 37, 47, 99, 99, 105, 11, 84, 56, 38,
+ 18, 24, 4, 3, 9, 23, 33, 19, 2, 9,
+ 16, 18, 2, 6, 18, 24, 26, 14, 32, 30,
+ 2, 70, 62, 42, 28, 16, 4, 39, 49, 57,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 30 */
+
+ 82, 12, 31, 82, 12, 31, 21, 6, 44, 22,
+ 1, 25, 36, 48, 106, 28, 30, 5, 20, 38,
+ 0, 15, 3, 35, 67, 8, 18, 103, 113, 113,
+ 46, 5, 9, 20, 38, 0, 21, 0, 28, 4,
+ 0, 11, 23, 5, 33, 25, 55, 10, 13, 23,
+ 0, 25, 23, 49, 20, 1, 5, 13, 10, 8,
+ 44, 0, 0, 0, 5, 59, 67, 10, 3, 7,
+ 104, 14, 11, 63, 27, 37, 6, 7, 25, 9,
+ 37, 4, 25, 21, 13, 55, 57, 57, 61, 46,
+ 1, 16, 31, 1, 29, 15, 13, 17, 43, 27,
+ 51, 30, 17, 2, 39, 31, 5, 15, 4, 16,
+ 6, 4, 30, 24, 25, 3, 2, 3, 4, 21,
+ 24, 3, 20, 38, 10, 34, 34, 20, 23, 22,
+ 0, 15, 0, 49, 39, 15, 25, 5, 15, 11,
+ 18, 7, 11, 2, 10, 15, 17, 1, 57, 47,
+ 15, 63, 7, 23, 18, 3, 21, 38, 2, 3,
+ 25, 40, 19, 41, 2, 61, 36, 52, 46, 42,
+ 52, 40, 16, 46, 46, 13, 10, 22, 8, 2,
+ 21, 6, 3, 7, 2, 9, 23, 0, 7, 53,
+ 23, 19, 37, 27, 51, 68, 78, 64, 64, 62,
+ 50, 42, 50, 32, 18, 26, 6, 13, 17, 59,
+ 26, 20, 13, 20, 4, 0, 9, 17, 25, 29,
+ 35, 63, 21, 47, 71, 53, 73, 3, 66, 46,
+ 30, 10, 10, 1, 9, 17, 21, 12, 92, 60,
+ 46, 34, 58, 20, 8, 3, 7, 11, 74, 44,
+ 26, 12, 26, 4, 5, 15, 29, 25, 34, 10,
+ 4, 5, 4, 15, 25, 35, 0, 80, 46, 28,
+ 14, 34, 6, 0, 3, 13, 124, 35, 17, 1,
+ 5, 11, 2, 14, 16, 3, 14, 26, 28, 27,
+ 15, 19, 2, 30, 1, 5, 12, 24, 36, 19,
+ 3, 24, 28, 33, 27, 33, 35, 25, 5, 21,
+ 13, 15, 9, 0, 19, 15, 13, 1, 25, 21,
+ 35, 43, 9, 69, 25, 1, 13, 18, 11, 3,
+ 34, 7, 11, 5, 26, 23, 19, 57, 58, 66,
+ 68, 54, 34, 56, 48, 38, 36, 32, 30, 22,
+ 13, 15, 29, 2, 6, 9, 43, 13, 13, 25,
+ 23, 25, 19, 21, 49, 43, 45, 62, 60, 58,
+ 48, 26, 36, 30, 6, 18, 6, 4, 19, 21,
+ 29, 37, 24, 30, 15, 24, 26, 6, 5, 5,
+ 9, 15, 13, 23, 41, 43, 51, 59, 75, 31,
+ 51, 65, 49, 21, 33, 1, 0, 2, 82, 22,
+ 20, 9, 40, 104, 78, 116, 20, 66, 56, 6,
+ 27, 43, 53, 105, 103, 107, 11, 84, 56, 38,
+ 18, 24, 4, 3, 9, 23, 31, 17, 4, 7,
+ 18, 18, 2, 8, 18, 24, 26, 14, 32, 30,
+ 2, 68, 58, 38, 24, 12, 0, 43, 53, 59,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 31 */
+
+ 80, 12, 31, 80, 12, 31, 17, 8, 44, 22,
+ 1, 27, 34, 46, 106, 28, 34, 5, 22, 40,
+ 0, 17, 3, 37, 69, 6, 14, 107, 115, 115,
+ 52, 3, 9, 22, 40, 0, 21, 2, 28, 2,
+ 0, 11, 21, 5, 33, 25, 55, 10, 13, 21,
+ 0, 25, 23, 49, 20, 1, 5, 13, 10, 8,
+ 44, 0, 0, 0, 3, 59, 67, 10, 5, 7,
+ 104, 14, 11, 61, 25, 37, 6, 5, 21, 5,
+ 33, 6, 21, 19, 9, 53, 55, 55, 59, 48,
+ 1, 18, 29, 1, 27, 13, 9, 17, 43, 27,
+ 49, 30, 17, 2, 37, 31, 3, 15, 4, 16,
+ 6, 6, 30, 24, 25, 3, 2, 3, 4, 21,
+ 24, 3, 18, 38, 8, 34, 34, 20, 27, 22,
+ 0, 19, 1, 51, 37, 13, 23, 3, 13, 9,
+ 20, 5, 11, 4, 12, 15, 17, 1, 59, 45,
+ 13, 63, 7, 23, 20, 3, 23, 40, 2, 3,
+ 27, 42, 21, 43, 2, 63, 34, 50, 46, 40,
+ 50, 38, 14, 44, 46, 15, 8, 22, 8, 2,
+ 21, 4, 5, 9, 0, 11, 25, 1, 9, 57,
+ 25, 21, 39, 29, 51, 64, 76, 60, 60, 58,
+ 46, 38, 46, 28, 14, 22, 2, 19, 21, 65,
+ 22, 16, 17, 16, 0, 3, 13, 21, 29, 35,
+ 39, 67, 23, 51, 73, 55, 73, 3, 66, 46,
+ 30, 10, 10, 1, 9, 17, 19, 12, 92, 60,
+ 46, 34, 60, 22, 10, 3, 5, 9, 76, 46,
+ 26, 12, 28, 6, 5, 13, 27, 23, 36, 12,
+ 4, 5, 6, 13, 23, 33, 2, 80, 46, 28,
+ 14, 36, 8, 2, 1, 11, 124, 35, 17, 0,
+ 5, 11, 2, 14, 16, 3, 14, 26, 30, 27,
+ 15, 19, 0, 30, 3, 7, 10, 22, 36, 21,
+ 5, 24, 28, 35, 29, 35, 33, 23, 3, 21,
+ 11, 15, 7, 2, 19, 15, 13, 1, 27, 21,
+ 35, 43, 7, 69, 25, 1, 13, 20, 11, 3,
+ 36, 7, 11, 5, 28, 25, 19, 59, 56, 66,
+ 68, 52, 32, 54, 46, 36, 34, 30, 28, 20,
+ 15, 17, 31, 0, 4, 13, 47, 15, 15, 27,
+ 25, 25, 19, 21, 49, 45, 43, 58, 56, 54,
+ 44, 20, 32, 26, 2, 14, 2, 0, 23, 25,
+ 31, 39, 20, 26, 17, 20, 22, 2, 9, 9,
+ 13, 19, 17, 27, 45, 47, 55, 63, 77, 35,
+ 55, 69, 47, 19, 31, 0, 2, 4, 86, 24,
+ 22, 9, 42, 108, 80, 120, 22, 64, 52, 2,
+ 33, 49, 59, 111, 107, 109, 11, 84, 56, 38,
+ 18, 24, 4, 3, 9, 21, 29, 15, 6, 5,
+ 20, 20, 4, 10, 20, 26, 28, 14, 34, 30,
+ 2, 66, 56, 34, 20, 8, 3, 47, 57, 61,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 32 */
+
+ 76, 10, 33, 76, 10, 33, 15, 10, 44, 22,
+ 3, 31, 30, 42, 104, 28, 38, 7, 22, 40,
+ 1, 19, 3, 41, 73, 4, 10, 111, 117, 117,
+ 56, 1, 11, 22, 40, 1, 21, 4, 28, 0,
+ 0, 11, 21, 7, 35, 25, 57, 10, 13, 21,
+ 1, 27, 23, 49, 20, 1, 5, 13, 10, 6,
+ 44, 0, 0, 0, 3, 61, 67, 10, 7, 9,
+ 104, 12, 11, 59, 25, 37, 6, 3, 19, 3,
+ 31, 8, 19, 17, 7, 51, 55, 53, 59, 48,
+ 1, 18, 29, 1, 27, 13, 7, 17, 43, 27,
+ 47, 30, 17, 2, 35, 31, 3, 15, 4, 16,
+ 6, 6, 30, 24, 27, 5, 2, 5, 4, 21,
+ 22, 5, 16, 38, 6, 32, 34, 18, 31, 20,
+ 1, 23, 3, 53, 37, 13, 23, 3, 13, 9,
+ 22, 5, 11, 4, 14, 17, 17, 1, 63, 45,
+ 13, 63, 7, 25, 20, 3, 25, 40, 2, 3,
+ 31, 42, 23, 45, 2, 65, 30, 48, 44, 38,
+ 48, 36, 12, 42, 44, 19, 6, 20, 6, 2,
+ 23, 2, 7, 11, 1, 13, 27, 3, 11, 61,
+ 29, 23, 43, 33, 51, 60, 72, 56, 56, 54,
+ 40, 32, 40, 22, 8, 16, 3, 25, 27, 71,
+ 18, 12, 21, 10, 5, 9, 19, 27, 35, 41,
+ 45, 73, 25, 55, 77, 57, 75, 3, 66, 46,
+ 30, 10, 10, 3, 9, 17, 19, 12, 92, 60,
+ 46, 34, 62, 22, 10, 3, 5, 9, 76, 46,
+ 26, 12, 28, 6, 5, 13, 25, 23, 38, 12,
+ 4, 5, 6, 13, 23, 31, 2, 80, 46, 28,
+ 14, 36, 8, 2, 1, 11, 124, 35, 17, 0,
+ 5, 11, 2, 14, 16, 3, 14, 26, 30, 27,
+ 15, 21, 1, 30, 5, 9, 8, 20, 34, 25,
+ 9, 22, 26, 37, 33, 37, 33, 23, 1, 21,
+ 11, 15, 7, 2, 21, 15, 13, 1, 29, 21,
+ 35, 43, 7, 71, 25, 1, 13, 20, 13, 3,
+ 36, 9, 13, 5, 28, 27, 21, 61, 54, 64,
+ 66, 50, 28, 52, 44, 34, 32, 28, 26, 18,
+ 17, 21, 35, 3, 0, 17, 51, 19, 19, 29,
+ 27, 27, 19, 21, 49, 47, 43, 52, 50, 50,
+ 38, 14, 26, 20, 3, 8, 3, 5, 29, 31,
+ 35, 41, 16, 22, 21, 16, 16, 3, 15, 15,
+ 19, 23, 23, 33, 51, 51, 61, 67, 81, 39,
+ 59, 73, 45, 17, 29, 0, 2, 4, 88, 24,
+ 22, 9, 42, 110, 82, 122, 22, 60, 48, 3,
+ 41, 55, 65, 117, 113, 113, 11, 84, 54, 36,
+ 18, 24, 4, 5, 9, 21, 29, 15, 6, 5,
+ 22, 20, 4, 10, 20, 26, 28, 14, 34, 30,
+ 0, 62, 52, 30, 16, 4, 7, 53, 61, 63,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 33 */
+
+ 74, 10, 33, 74, 10, 33, 11, 14, 46, 24,
+ 3, 33, 28, 40, 104, 28, 44, 7, 24, 42,
+ 1, 19, 1, 43, 75, 4, 8, 113, 119, 117,
+ 62, 2, 11, 24, 42, 1, 19, 8, 30, 0,
+ 2, 9, 19, 7, 35, 23, 57, 10, 11, 19,
+ 1, 27, 21, 49, 22, 0, 3, 11, 12, 6,
+ 44, 0, 0, 0, 1, 61, 67, 12, 7, 9,
+ 104, 12, 11, 55, 23, 37, 8, 0, 15, 0,
+ 27, 12, 15, 13, 3, 47, 53, 51, 57, 50,
+ 0, 20, 27, 0, 25, 11, 3, 15, 41, 25,
+ 43, 32, 15, 4, 31, 29, 1, 13, 6, 18,
+ 8, 8, 32, 26, 27, 5, 4, 5, 4, 19,
+ 22, 5, 16, 38, 6, 32, 34, 18, 33, 20,
+ 1, 25, 5, 53, 35, 11, 21, 1, 11, 7,
+ 24, 3, 9, 6, 18, 17, 17, 0, 65, 43,
+ 11, 63, 5, 25, 22, 1, 25, 42, 2, 3,
+ 33, 44, 23, 47, 2, 65, 28, 48, 44, 38,
+ 48, 36, 12, 42, 44, 21, 6, 20, 6, 2,
+ 23, 2, 7, 11, 1, 13, 29, 3, 11, 63,
+ 31, 23, 45, 35, 51, 58, 70, 54, 54, 52,
+ 36, 28, 36, 18, 4, 12, 7, 29, 31, 75,
+ 16, 10, 23, 6, 9, 13, 23, 31, 39, 45,
+ 49, 77, 25, 57, 79, 57, 75, 1, 68, 48,
+ 32, 12, 10, 3, 7, 15, 17, 14, 94, 62,
+ 48, 34, 64, 24, 12, 1, 3, 7, 78, 48,
+ 28, 14, 30, 8, 3, 11, 21, 21, 42, 14,
+ 6, 3, 8, 11, 21, 27, 4, 82, 48, 30,
+ 16, 38, 10, 4, 0, 9, 124, 33, 15, 2,
+ 5, 9, 2, 16, 18, 1, 16, 28, 32, 25,
+ 15, 21, 1, 32, 5, 9, 6, 20, 34, 27,
+ 11, 22, 26, 37, 35, 37, 31, 21, 0, 19,
+ 9, 13, 5, 4, 21, 13, 11, 0, 29, 19,
+ 33, 41, 5, 71, 25, 0, 13, 22, 13, 3,
+ 38, 9, 13, 3, 30, 27, 21, 61, 54, 64,
+ 66, 50, 26, 52, 44, 34, 32, 28, 26, 18,
+ 17, 23, 37, 5, 1, 19, 53, 21, 21, 29,
+ 27, 27, 17, 19, 47, 47, 41, 48, 46, 46,
+ 34, 10, 22, 16, 7, 4, 7, 9, 33, 35,
+ 37, 41, 14, 20, 23, 14, 12, 7, 19, 19,
+ 23, 27, 27, 37, 55, 53, 65, 69, 83, 41,
+ 61, 75, 41, 13, 25, 2, 4, 6, 92, 26,
+ 24, 9, 44, 114, 86, 124, 24, 58, 46, 7,
+ 47, 59, 69, 121, 117, 115, 9, 86, 54, 36,
+ 18, 26, 4, 5, 9, 19, 27, 13, 8, 3,
+ 26, 22, 6, 12, 22, 28, 30, 16, 36, 32,
+ 0, 60, 50, 28, 14, 2, 9, 57, 63, 63,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 34 */
+
+ 72, 10, 33, 72, 10, 33, 7, 16, 46, 24,
+ 3, 35, 26, 38, 104, 28, 48, 7, 26, 44,
+ 1, 21, 1, 45, 77, 2, 4, 117, 121, 119,
+ 68, 4, 11, 26, 44, 1, 19, 10, 30, 1,
+ 2, 9, 17, 7, 35, 23, 57, 10, 11, 17,
+ 1, 27, 21, 49, 22, 0, 3, 11, 12, 6,
+ 44, 0, 0, 0, 0, 61, 67, 12, 9, 9,
+ 104, 12, 11, 53, 21, 37, 8, 2, 11, 4,
+ 25, 14, 11, 11, 0, 45, 51, 49, 55, 52,
+ 0, 22, 25, 0, 25, 9, 0, 15, 41, 25,
+ 41, 32, 15, 4, 29, 29, 0, 13, 6, 18,
+ 8, 10, 32, 26, 29, 5, 4, 5, 4, 19,
+ 22, 5, 14, 38, 4, 32, 34, 18, 37, 20,
+ 1, 29, 7, 55, 33, 9, 19, 0, 9, 5,
+ 26, 1, 9, 8, 20, 17, 17, 0, 67, 43,
+ 9, 63, 5, 25, 24, 1, 27, 44, 2, 3,
+ 35, 46, 25, 49, 2, 67, 24, 46, 42, 36,
+ 46, 34, 10, 40, 44, 23, 4, 20, 6, 2,
+ 23, 0, 9, 13, 3, 15, 31, 5, 13, 67,
+ 33, 25, 47, 37, 51, 54, 68, 50, 50, 48,
+ 32, 24, 32, 12, 0, 8, 13, 35, 35, 81,
+ 12, 6, 27, 2, 13, 17, 27, 35, 43, 51,
+ 53, 81, 27, 61, 81, 59, 75, 1, 68, 48,
+ 32, 12, 10, 3, 7, 15, 15, 14, 94, 62,
+ 48, 34, 66, 24, 14, 1, 1, 5, 80, 50,
+ 28, 14, 32, 8, 3, 9, 19, 19, 44, 16,
+ 6, 3, 10, 9, 19, 25, 4, 82, 48, 30,
+ 16, 40, 12, 6, 2, 7, 124, 33, 15, 2,
+ 5, 9, 2, 16, 18, 1, 16, 28, 34, 25,
+ 15, 21, 3, 32, 7, 11, 4, 18, 34, 29,
+ 13, 22, 26, 39, 37, 39, 29, 19, 2, 19,
+ 9, 13, 3, 6, 21, 13, 11, 0, 31, 19,
+ 33, 41, 3, 71, 25, 0, 13, 24, 13, 3,
+ 40, 9, 13, 3, 32, 29, 21, 63, 52, 64,
+ 66, 48, 24, 50, 42, 32, 30, 26, 24, 16,
+ 19, 25, 39, 9, 3, 23, 57, 23, 23, 31,
+ 29, 27, 17, 19, 47, 49, 39, 44, 42, 42,
+ 30, 4, 18, 12, 13, 0, 11, 13, 37, 39,
+ 39, 43, 10, 16, 25, 10, 8, 11, 23, 23,
+ 27, 31, 31, 41, 59, 57, 69, 73, 85, 45,
+ 65, 79, 39, 11, 23, 4, 6, 8, 96, 28,
+ 26, 9, 46, 116, 88, 124, 26, 56, 42, 13,
+ 53, 65, 75, 125, 121, 117, 9, 86, 54, 36,
+ 18, 26, 4, 5, 9, 17, 25, 11, 10, 1,
+ 28, 24, 8, 14, 24, 30, 32, 16, 38, 32,
+ 0, 58, 46, 24, 10, 1, 13, 61, 67, 65,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 35 */
+
+ 70, 10, 33, 70, 10, 33, 3, 20, 48, 24,
+ 5, 39, 22, 36, 104, 28, 52, 7, 28, 46,
+ 1, 23, 1, 47, 79, 2, 0, 119, 123, 119,
+ 74, 6, 11, 28, 46, 1, 17, 12, 32, 1,
+ 2, 7, 15, 9, 37, 23, 57, 10, 11, 17,
+ 1, 27, 21, 49, 24, 0, 3, 11, 14, 6,
+ 44, 0, 0, 0, 0, 61, 67, 14, 11, 9,
+ 104, 12, 11, 51, 19, 37, 10, 4, 7, 8,
+ 21, 16, 7, 9, 4, 43, 49, 47, 53, 54,
+ 0, 24, 23, 0, 23, 9, 4, 15, 39, 23,
+ 39, 32, 15, 6, 25, 27, 2, 13, 6, 18,
+ 8, 12, 34, 28, 29, 5, 4, 5, 4, 19,
+ 22, 7, 14, 38, 2, 32, 34, 18, 41, 20,
+ 3, 33, 9, 57, 31, 7, 17, 2, 7, 5,
+ 28, 1, 7, 10, 22, 17, 17, 0, 69, 41,
+ 7, 63, 5, 25, 26, 1, 29, 46, 2, 3,
+ 37, 48, 25, 51, 2, 69, 22, 44, 42, 34,
+ 44, 32, 10, 38, 42, 25, 2, 20, 6, 2,
+ 25, 1, 11, 15, 5, 17, 33, 7, 13, 71,
+ 35, 25, 49, 39, 51, 52, 64, 48, 46, 44,
+ 28, 20, 28, 8, 5, 4, 17, 39, 41, 85,
+ 8, 2, 29, 1, 17, 23, 33, 39, 49, 55,
+ 57, 87, 29, 65, 83, 61, 75, 1, 70, 48,
+ 32, 12, 10, 3, 7, 15, 15, 14, 94, 62,
+ 48, 34, 68, 26, 14, 0, 1, 3, 82, 50,
+ 30, 14, 34, 10, 1, 9, 17, 17, 46, 18,
+ 8, 3, 12, 7, 17, 23, 6, 82, 48, 30,
+ 16, 42, 12, 8, 4, 5, 124, 31, 13, 4,
+ 5, 9, 2, 16, 18, 1, 16, 28, 34, 25,
+ 15, 21, 3, 34, 9, 13, 2, 16, 34, 31,
+ 15, 22, 26, 39, 39, 39, 27, 17, 4, 17,
+ 7, 11, 3, 8, 21, 13, 11, 2, 31, 19,
+ 33, 39, 1, 71, 25, 0, 13, 26, 13, 3,
+ 42, 9, 13, 3, 34, 31, 21, 65, 50, 62,
+ 64, 46, 22, 48, 40, 30, 28, 24, 22, 14,
+ 21, 27, 41, 11, 5, 25, 61, 27, 25, 33,
+ 29, 29, 17, 19, 47, 49, 37, 40, 38, 38,
+ 26, 1, 14, 8, 17, 3, 15, 17, 43, 43,
+ 43, 45, 6, 14, 29, 6, 4, 15, 27, 27,
+ 31, 35, 35, 45, 65, 61, 73, 77, 87, 49,
+ 69, 81, 37, 9, 21, 6, 8, 10, 98, 30,
+ 28, 9, 48, 120, 92, 124, 28, 52, 40, 17,
+ 59, 71, 81, 125, 125, 119, 9, 86, 54, 36,
+ 18, 26, 4, 5, 9, 17, 23, 9, 12, 0,
+ 30, 24, 8, 16, 24, 30, 32, 16, 38, 32,
+ 0, 56, 44, 20, 6, 5, 17, 65, 71, 67,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 36 */
+
+ 66, 10, 33, 66, 10, 33, 1, 22, 48, 24,
+ 5, 41, 20, 32, 104, 28, 58, 9, 30, 46,
+ 1, 25, 1, 49, 81, 0, 3, 123, 125, 121,
+ 80, 8, 11, 30, 46, 1, 17, 14, 32, 3,
+ 2, 7, 15, 9, 37, 21, 57, 10, 11, 15,
+ 3, 29, 21, 49, 24, 2, 3, 9, 14, 6,
+ 44, 0, 0, 0, 2, 63, 67, 14, 13, 9,
+ 104, 12, 11, 49, 17, 37, 10, 8, 5, 12,
+ 19, 18, 5, 5, 8, 41, 49, 45, 53, 54,
+ 0, 26, 21, 0, 23, 7, 8, 15, 39, 23,
+ 37, 32, 13, 6, 23, 27, 2, 13, 8, 18,
+ 8, 12, 34, 28, 31, 7, 4, 5, 4, 19,
+ 22, 7, 12, 38, 2, 32, 34, 16, 45, 20,
+ 3, 37, 11, 59, 29, 5, 15, 4, 7, 3,
+ 30, 0, 7, 12, 24, 17, 17, 2, 71, 41,
+ 5, 63, 5, 25, 26, 1, 31, 48, 2, 3,
+ 41, 50, 27, 53, 2, 71, 18, 42, 40, 32,
+ 42, 30, 8, 36, 42, 29, 0, 18, 6, 2,
+ 25, 3, 11, 17, 5, 19, 35, 9, 15, 75,
+ 37, 27, 51, 41, 51, 48, 62, 44, 42, 40,
+ 24, 14, 24, 2, 9, 0, 23, 45, 45, 91,
+ 4, 0, 33, 7, 21, 27, 37, 45, 53, 61,
+ 61, 91, 31, 67, 87, 63, 75, 1, 70, 50,
+ 32, 12, 10, 3, 7, 15, 13, 16, 96, 62,
+ 48, 34, 70, 26, 16, 0, 0, 3, 84, 52,
+ 30, 14, 34, 10, 1, 7, 15, 17, 48, 18,
+ 8, 3, 14, 5, 17, 21, 6, 84, 48, 30,
+ 16, 44, 14, 8, 6, 3, 124, 31, 13, 4,
+ 5, 9, 2, 16, 18, 0, 16, 28, 36, 25,
+ 15, 21, 5, 34, 11, 15, 0, 14, 34, 35,
+ 17, 20, 26, 41, 41, 41, 27, 17, 6, 17,
+ 7, 11, 1, 10, 23, 13, 11, 2, 33, 19,
+ 33, 39, 0, 71, 25, 2, 13, 26, 13, 3,
+ 44, 9, 15, 3, 36, 31, 23, 67, 48, 62,
+ 64, 44, 20, 46, 38, 28, 26, 22, 20, 12,
+ 23, 29, 43, 15, 9, 29, 65, 29, 27, 35,
+ 31, 29, 17, 19, 47, 51, 37, 34, 32, 34,
+ 20, 7, 10, 4, 23, 7, 19, 21, 47, 47,
+ 45, 47, 2, 10, 31, 2, 0, 19, 31, 33,
+ 35, 39, 39, 49, 69, 65, 77, 81, 89, 53,
+ 73, 85, 35, 7, 19, 6, 10, 12, 102, 32,
+ 30, 9, 50, 122, 94, 124, 28, 50, 36, 23,
+ 65, 77, 87, 125, 125, 121, 9, 86, 54, 36,
+ 18, 26, 4, 5, 9, 15, 21, 7, 14, 0,
+ 32, 26, 10, 18, 26, 32, 34, 18, 40, 34,
+ 0, 52, 40, 16, 2, 9, 21, 69, 75, 69,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 37 */
+
+ 64, 10, 33, 64, 10, 33, 2, 26, 48, 24,
+ 7, 45, 16, 30, 104, 28, 62, 9, 32, 48,
+ 1, 25, 0, 51, 83, 0, 7, 125, 125, 121,
+ 86, 12, 11, 32, 48, 1, 17, 16, 32, 3,
+ 2, 7, 13, 9, 39, 21, 57, 10, 11, 13,
+ 3, 29, 21, 49, 26, 2, 1, 9, 16, 6,
+ 44, 0, 0, 0, 2, 63, 67, 16, 15, 9,
+ 104, 12, 11, 47, 15, 37, 12, 10, 1, 16,
+ 15, 20, 1, 3, 12, 39, 47, 43, 51, 56,
+ 0, 28, 19, 0, 21, 5, 12, 15, 39, 21,
+ 33, 34, 13, 8, 21, 25, 4, 13, 8, 20,
+ 10, 14, 36, 30, 31, 7, 4, 5, 4, 19,
+ 22, 9, 10, 38, 0, 32, 34, 16, 49, 20,
+ 5, 39, 13, 61, 27, 3, 13, 6, 5, 1,
+ 32, 2, 5, 14, 26, 17, 17, 2, 73, 39,
+ 3, 63, 3, 25, 28, 1, 33, 50, 2, 3,
+ 43, 52, 27, 55, 2, 71, 16, 40, 40, 30,
+ 40, 28, 6, 36, 42, 31, 1, 18, 6, 2,
+ 27, 3, 13, 19, 7, 21, 37, 9, 17, 79,
+ 39, 29, 53, 43, 51, 44, 60, 40, 38, 38,
+ 20, 10, 20, 1, 13, 3, 27, 49, 49, 95,
+ 0, 3, 37, 11, 25, 31, 41, 49, 57, 67,
+ 65, 97, 33, 71, 89, 63, 75, 1, 70, 50,
+ 32, 12, 10, 3, 7, 15, 13, 16, 96, 62,
+ 48, 34, 72, 28, 18, 0, 2, 1, 86, 54,
+ 30, 16, 36, 12, 0, 7, 13, 15, 52, 20,
+ 8, 1, 16, 3, 15, 19, 8, 84, 50, 30,
+ 16, 46, 16, 10, 8, 1, 124, 31, 13, 6,
+ 5, 9, 2, 16, 18, 0, 16, 30, 36, 25,
+ 15, 21, 7, 36, 13, 17, 1, 14, 34, 37,
+ 19, 20, 26, 43, 43, 41, 25, 15, 8, 15,
+ 5, 9, 1, 12, 23, 13, 11, 2, 35, 19,
+ 33, 37, 2, 71, 25, 2, 13, 28, 13, 3,
+ 46, 9, 15, 3, 38, 33, 23, 67, 46, 62,
+ 62, 44, 18, 44, 36, 26, 26, 20, 18, 10,
+ 25, 31, 45, 17, 11, 31, 69, 31, 29, 37,
+ 33, 31, 17, 17, 47, 51, 35, 30, 28, 30,
+ 16, 11, 6, 0, 27, 11, 23, 25, 51, 51,
+ 49, 49, 1, 6, 33, 1, 3, 23, 35, 37,
+ 39, 43, 43, 53, 75, 69, 81, 85, 91, 57,
+ 77, 87, 33, 3, 17, 8, 12, 14, 106, 34,
+ 32, 9, 52, 124, 98, 124, 30, 46, 32, 27,
+ 71, 81, 93, 125, 125, 123, 9, 86, 54, 36,
+ 18, 26, 4, 5, 9, 15, 19, 5, 16, 2,
+ 34, 28, 10, 20, 28, 34, 36, 18, 40, 34,
+ 0, 50, 38, 14, 0, 13, 25, 73, 79, 71,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 38 */
+
+ 62, 10, 35, 62, 10, 35, 6, 28, 50, 24,
+ 7, 47, 14, 28, 104, 28, 66, 9, 32, 50,
+ 1, 27, 0, 53, 85, 1, 9, 125, 125, 123,
+ 92, 14, 11, 32, 50, 1, 15, 18, 34, 5,
+ 4, 5, 11, 11, 39, 21, 57, 10, 9, 13,
+ 3, 29, 19, 49, 26, 2, 1, 9, 16, 6,
+ 44, 0, 0, 0, 4, 63, 67, 16, 15, 11,
+ 104, 12, 11, 45, 15, 37, 12, 12, 2, 20,
+ 13, 22, 2, 1, 16, 37, 45, 41, 49, 58,
+ 0, 30, 19, 2, 21, 5, 16, 15, 37, 21,
+ 31, 34, 13, 8, 17, 25, 6, 11, 8, 20,
+ 10, 16, 36, 30, 33, 7, 4, 5, 4, 19,
+ 22, 9, 10, 38, 1, 32, 34, 16, 53, 20,
+ 5, 43, 15, 63, 27, 1, 13, 6, 3, 1,
+ 34, 2, 5, 14, 30, 17, 17, 2, 75, 39,
+ 1, 63, 3, 25, 30, 0, 35, 50, 2, 3,
+ 45, 54, 29, 57, 2, 73, 12, 38, 38, 30,
+ 38, 28, 6, 34, 40, 33, 3, 18, 6, 2,
+ 27, 5, 15, 21, 9, 21, 39, 11, 17, 83,
+ 41, 29, 55, 45, 51, 42, 56, 38, 34, 34,
+ 16, 6, 16, 7, 19, 7, 33, 55, 55, 101,
+ 3, 7, 39, 15, 31, 37, 47, 53, 63, 71,
+ 71, 101, 35, 75, 91, 65, 75, 0, 72, 50,
+ 34, 12, 10, 3, 7, 15, 11, 16, 96, 64,
+ 48, 34, 74, 28, 18, 2, 2, 0, 88, 54,
+ 32, 16, 38, 12, 0, 5, 11, 13, 54, 22,
+ 10, 1, 16, 3, 13, 15, 8, 84, 50, 30,
+ 18, 46, 16, 12, 8, 0, 124, 29, 11, 6,
+ 5, 9, 2, 16, 18, 0, 18, 30, 38, 25,
+ 15, 23, 7, 36, 15, 19, 3, 12, 34, 39,
+ 21, 20, 26, 43, 45, 43, 23, 13, 10, 15,
+ 5, 9, 0, 12, 23, 11, 9, 4, 35, 19,
+ 33, 37, 4, 71, 25, 2, 13, 30, 13, 3,
+ 48, 11, 15, 3, 38, 35, 23, 69, 44, 60,
+ 62, 42, 16, 44, 36, 24, 24, 20, 16, 8,
+ 27, 33, 47, 21, 13, 35, 73, 35, 33, 39,
+ 33, 31, 17, 17, 45, 53, 33, 26, 24, 26,
+ 12, 17, 2, 3, 33, 15, 29, 29, 57, 55,
+ 51, 49, 3, 4, 37, 5, 9, 27, 39, 41,
+ 43, 47, 49, 57, 79, 73, 85, 87, 95, 61,
+ 81, 91, 31, 1, 15, 10, 14, 16, 108, 36,
+ 34, 9, 54, 124, 100, 124, 32, 44, 30, 33,
+ 77, 87, 97, 125, 125, 125, 7, 86, 54, 36,
+ 18, 26, 4, 5, 9, 13, 17, 5, 18, 4,
+ 36, 28, 12, 22, 28, 34, 36, 18, 42, 34,
+ 0, 48, 34, 10, 3, 15, 27, 79, 81, 73,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 39 */
+
+ 60, 10, 35, 60, 10, 35, 10, 32, 50, 24,
+ 9, 51, 10, 24, 104, 28, 72, 11, 34, 50,
+ 1, 29, 0, 55, 87, 1, 13, 125, 125, 123,
+ 98, 16, 11, 34, 50, 1, 15, 20, 34, 5,
+ 4, 5, 11, 11, 41, 19, 57, 10, 9, 11,
+ 5, 29, 19, 49, 28, 4, 1, 7, 18, 6,
+ 44, 0, 0, 0, 4, 63, 67, 18, 17, 11,
+ 104, 12, 11, 43, 13, 37, 14, 16, 4, 24,
+ 9, 24, 4, 2, 20, 35, 43, 39, 49, 60,
+ 0, 32, 17, 2, 19, 3, 20, 15, 37, 19,
+ 29, 34, 11, 10, 15, 23, 6, 11, 10, 20,
+ 10, 18, 38, 32, 33, 9, 4, 5, 4, 19,
+ 22, 11, 8, 38, 1, 32, 34, 14, 57, 20,
+ 7, 47, 17, 65, 25, 0, 11, 8, 1, 0,
+ 36, 4, 3, 16, 32, 17, 17, 4, 77, 37,
+ 0, 63, 3, 25, 30, 0, 37, 52, 2, 3,
+ 49, 56, 29, 59, 2, 75, 10, 36, 38, 28,
+ 36, 26, 4, 32, 40, 35, 5, 18, 6, 2,
+ 29, 7, 15, 23, 9, 23, 41, 13, 19, 87,
+ 43, 31, 57, 47, 51, 38, 54, 34, 30, 30,
+ 12, 0, 12, 11, 23, 11, 37, 59, 59, 105,
+ 7, 9, 43, 21, 35, 41, 51, 59, 67, 77,
+ 75, 107, 37, 77, 95, 67, 75, 0, 72, 52,
+ 34, 12, 10, 3, 7, 15, 11, 18, 98, 64,
+ 48, 34, 76, 30, 20, 2, 4, 2, 90, 56,
+ 32, 16, 40, 14, 2, 5, 9, 13, 56, 22,
+ 10, 1, 18, 1, 13, 13, 10, 86, 50, 30,
+ 18, 48, 18, 14, 10, 2, 124, 29, 11, 8,
+ 5, 9, 2, 16, 18, 2, 18, 30, 38, 25,
+ 15, 23, 9, 38, 17, 21, 5, 10, 34, 41,
+ 23, 18, 26, 45, 47, 43, 21, 13, 12, 13,
+ 3, 7, 0, 14, 23, 11, 9, 4, 37, 19,
+ 33, 35, 6, 71, 25, 4, 13, 30, 13, 3,
+ 50, 11, 15, 3, 40, 35, 25, 71, 42, 60,
+ 60, 40, 14, 42, 34, 22, 22, 18, 14, 6,
+ 29, 35, 49, 23, 17, 37, 77, 37, 35, 41,
+ 35, 33, 17, 17, 45, 53, 31, 22, 20, 22,
+ 6, 23, 1, 7, 37, 19, 33, 33, 61, 59,
+ 55, 51, 7, 0, 39, 9, 13, 31, 43, 45,
+ 47, 51, 53, 61, 85, 77, 89, 91, 97, 65,
+ 85, 93, 29, 0, 13, 10, 16, 18, 112, 38,
+ 36, 9, 56, 124, 104, 124, 32, 40, 26, 37,
+ 83, 93, 103, 125, 125, 125, 7, 86, 54, 36,
+ 18, 26, 4, 5, 9, 13, 15, 3, 20, 6,
+ 38, 30, 12, 24, 30, 36, 38, 20, 42, 36,
+ 0, 44, 32, 6, 7, 19, 31, 83, 85, 75,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 40 */
+
+ 56, 8, 35, 56, 8, 35, 12, 34, 50, 24,
+ 9, 53, 8, 22, 104, 28, 76, 11, 36, 52,
+ 1, 31, 0, 57, 91, 3, 17, 125, 125, 125,
+ 102, 18, 11, 36, 52, 1, 15, 22, 34, 7,
+ 4, 5, 9, 13, 41, 19, 59, 10, 9, 11,
+ 5, 31, 19, 49, 28, 4, 1, 7, 18, 6,
+ 44, 0, 0, 0, 6, 65, 67, 18, 19, 11,
+ 104, 12, 11, 41, 11, 37, 14, 18, 8, 28,
+ 7, 26, 8, 4, 22, 33, 43, 37, 47, 60,
+ 0, 34, 15, 2, 19, 3, 22, 15, 37, 19,
+ 27, 34, 11, 10, 13, 23, 8, 11, 10, 20,
+ 10, 18, 38, 32, 35, 9, 4, 5, 4, 19,
+ 22, 11, 6, 38, 3, 32, 34, 14, 61, 20,
+ 7, 51, 19, 67, 23, 2, 9, 10, 1, 0,
+ 38, 4, 3, 18, 34, 17, 17, 4, 81, 37,
+ 2, 63, 3, 27, 32, 0, 39, 54, 2, 3,
+ 51, 58, 31, 61, 2, 77, 6, 34, 36, 26,
+ 34, 24, 2, 30, 38, 39, 7, 16, 6, 2,
+ 29, 9, 17, 25, 11, 25, 43, 15, 21, 91,
+ 47, 33, 59, 49, 51, 34, 50, 30, 26, 26,
+ 8, 3, 8, 17, 29, 15, 43, 65, 65, 111,
+ 11, 13, 47, 25, 39, 47, 57, 63, 73, 83,
+ 79, 111, 39, 81, 97, 69, 77, 0, 72, 52,
+ 34, 12, 10, 3, 7, 15, 9, 18, 98, 64,
+ 48, 34, 78, 30, 20, 2, 4, 2, 92, 56,
+ 32, 16, 40, 14, 2, 3, 7, 11, 58, 24,
+ 10, 1, 20, 0, 11, 11, 10, 86, 50, 30,
+ 18, 50, 18, 14, 12, 2, 124, 29, 11, 8,
+ 5, 9, 2, 16, 18, 2, 18, 30, 40, 25,
+ 15, 23, 11, 38, 19, 23, 7, 8, 34, 45,
+ 27, 18, 26, 47, 49, 45, 21, 11, 14, 13,
+ 3, 7, 2, 16, 25, 11, 9, 4, 39, 19,
+ 33, 35, 6, 73, 25, 4, 13, 32, 13, 3,
+ 50, 11, 17, 3, 42, 37, 25, 73, 40, 58,
+ 60, 38, 10, 40, 32, 20, 20, 16, 12, 4,
+ 31, 37, 51, 27, 19, 41, 81, 41, 37, 43,
+ 37, 33, 17, 17, 45, 55, 31, 16, 14, 18,
+ 2, 29, 7, 13, 43, 23, 37, 37, 67, 63,
+ 57, 53, 11, 3, 43, 13, 17, 37, 49, 51,
+ 53, 55, 57, 67, 89, 81, 95, 95, 99, 69,
+ 89, 97, 27, 2, 11, 12, 18, 18, 114, 40,
+ 36, 9, 56, 124, 106, 124, 34, 38, 22, 43,
+ 89, 99, 109, 125, 125, 125, 7, 86, 54, 36,
+ 18, 26, 4, 5, 9, 11, 15, 1, 22, 6,
+ 40, 30, 14, 24, 30, 36, 38, 20, 44, 36,
+ 1, 42, 28, 2, 11, 23, 35, 87, 89, 77,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 41 */
+
+ 54, 8, 35, 54, 8, 35, 16, 36, 52, 24,
+ 9, 55, 6, 20, 104, 28, 80, 11, 38, 54,
+ 1, 31, 2, 59, 93, 5, 21, 125, 125, 125,
+ 108, 22, 11, 38, 54, 1, 13, 24, 36, 9,
+ 4, 3, 7, 13, 41, 19, 59, 10, 9, 9,
+ 5, 31, 19, 49, 28, 4, 0, 7, 18, 6,
+ 44, 0, 0, 0, 8, 65, 67, 18, 21, 11,
+ 104, 12, 11, 39, 9, 37, 14, 20, 12, 32,
+ 3, 30, 12, 6, 26, 31, 41, 35, 45, 62,
+ 2, 36, 13, 2, 17, 1, 26, 15, 35, 19,
+ 23, 36, 11, 10, 9, 23, 10, 11, 10, 22,
+ 12, 20, 38, 32, 35, 9, 6, 5, 4, 17,
+ 22, 11, 6, 38, 5, 32, 34, 14, 65, 20,
+ 7, 53, 21, 67, 21, 4, 7, 12, 0, 2,
+ 40, 6, 3, 20, 36, 17, 17, 4, 83, 35,
+ 4, 63, 1, 27, 34, 0, 41, 56, 2, 3,
+ 53, 60, 33, 63, 2, 77, 4, 32, 36, 24,
+ 32, 22, 2, 30, 38, 41, 9, 16, 6, 2,
+ 29, 9, 19, 27, 13, 27, 45, 15, 21, 93,
+ 49, 33, 61, 51, 51, 32, 48, 28, 24, 24,
+ 4, 7, 4, 21, 33, 19, 47, 71, 69, 117,
+ 13, 17, 49, 29, 43, 51, 61, 67, 77, 87,
+ 83, 115, 39, 85, 99, 69, 77, 0, 74, 52,
+ 34, 14, 10, 3, 5, 15, 7, 18, 98, 64,
+ 50, 34, 80, 32, 22, 4, 6, 4, 94, 58,
+ 34, 18, 42, 16, 2, 1, 5, 9, 62, 26,
+ 12, 0, 22, 2, 9, 9, 12, 86, 52, 32,
+ 18, 52, 20, 16, 14, 4, 124, 27, 9, 10,
+ 5, 9, 2, 18, 20, 2, 18, 32, 42, 25,
+ 15, 23, 11, 38, 21, 23, 9, 8, 34, 47,
+ 29, 18, 26, 47, 51, 47, 19, 9, 16, 13,
+ 1, 7, 4, 18, 25, 11, 9, 6, 39, 19,
+ 33, 35, 8, 73, 25, 4, 13, 34, 13, 3,
+ 52, 11, 17, 3, 44, 39, 25, 73, 38, 58,
+ 60, 38, 8, 38, 30, 18, 20, 14, 10, 4,
+ 31, 39, 53, 29, 21, 45, 85, 43, 39, 43,
+ 37, 33, 15, 15, 45, 57, 29, 12, 10, 14,
+ 1, 33, 11, 17, 47, 27, 41, 41, 71, 67,
+ 59, 55, 15, 5, 45, 17, 21, 41, 53, 55,
+ 57, 59, 61, 71, 93, 83, 99, 99, 101, 73,
+ 93, 101, 25, 6, 7, 14, 20, 20, 118, 42,
+ 38, 9, 58, 124, 108, 124, 36, 36, 20, 47,
+ 95, 103, 115, 125, 125, 125, 7, 86, 54, 36,
+ 18, 28, 4, 5, 9, 9, 13, 0, 24, 8,
+ 44, 32, 16, 26, 32, 38, 40, 20, 46, 36,
+ 1, 40, 26, 0, 13, 27, 39, 91, 93, 77,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 42 */
+
+ 52, 8, 35, 52, 8, 35, 20, 40, 52, 24,
+ 11, 59, 2, 16, 104, 28, 86, 13, 40, 54,
+ 1, 33, 2, 61, 95, 5, 25, 125, 125, 125,
+ 114, 24, 11, 40, 54, 1, 13, 26, 36, 9,
+ 4, 3, 7, 13, 43, 17, 59, 10, 9, 7,
+ 7, 31, 19, 49, 30, 6, 0, 5, 20, 6,
+ 44, 0, 0, 0, 8, 65, 67, 20, 23, 11,
+ 104, 12, 11, 37, 7, 37, 16, 24, 14, 36,
+ 1, 32, 14, 10, 30, 29, 39, 33, 45, 64,
+ 2, 38, 11, 2, 17, 0, 30, 15, 35, 17,
+ 21, 36, 9, 12, 7, 21, 10, 11, 12, 22,
+ 12, 22, 40, 34, 37, 11, 6, 5, 4, 17,
+ 22, 13, 4, 38, 5, 32, 34, 12, 69, 20,
+ 9, 57, 23, 69, 19, 6, 5, 14, 2, 4,
+ 42, 8, 1, 22, 38, 17, 17, 6, 85, 35,
+ 6, 63, 1, 27, 34, 0, 43, 58, 2, 3,
+ 57, 62, 33, 65, 2, 79, 0, 30, 34, 22,
+ 30, 20, 0, 28, 38, 43, 11, 16, 6, 2,
+ 31, 11, 19, 29, 13, 29, 47, 17, 23, 97,
+ 51, 35, 63, 53, 51, 28, 46, 24, 20, 20,
+ 0, 13, 0, 27, 37, 23, 53, 75, 73, 121,
+ 17, 19, 53, 35, 47, 55, 65, 73, 81, 93,
+ 87, 121, 41, 87, 103, 71, 77, 0, 74, 54,
+ 34, 14, 10, 3, 5, 15, 7, 20, 100, 64,
+ 50, 34, 82, 32, 24, 4, 8, 6, 96, 60,
+ 34, 18, 44, 16, 4, 1, 3, 9, 64, 26,
+ 12, 0, 24, 4, 9, 7, 12, 88, 52, 32,
+ 18, 54, 22, 18, 16, 6, 124, 27, 9, 10,
+ 5, 9, 2, 18, 20, 4, 18, 32, 42, 25,
+ 15, 23, 13, 40, 23, 25, 11, 6, 34, 49,
+ 31, 16, 26, 49, 53, 47, 17, 9, 18, 11,
+ 1, 5, 4, 20, 25, 11, 9, 6, 41, 19,
+ 33, 33, 10, 73, 25, 6, 13, 34, 13, 3,
+ 54, 11, 17, 3, 46, 39, 27, 75, 36, 58,
+ 58, 36, 6, 36, 28, 16, 18, 12, 8, 2,
+ 33, 41, 55, 33, 25, 47, 89, 45, 41, 45,
+ 39, 35, 15, 15, 45, 57, 27, 8, 6, 10,
+ 7, 39, 15, 21, 53, 31, 45, 45, 75, 71,
+ 63, 57, 19, 9, 47, 21, 25, 45, 57, 59,
+ 61, 63, 65, 75, 99, 87, 103, 103, 103, 77,
+ 97, 103, 23, 8, 5, 14, 22, 22, 122, 44,
+ 40, 9, 60, 124, 112, 124, 36, 32, 16, 53,
+ 101, 109, 121, 125, 125, 125, 7, 86, 54, 36,
+ 18, 28, 4, 5, 9, 9, 11, 2, 26, 10,
+ 46, 34, 16, 28, 34, 40, 42, 22, 46, 38,
+ 1, 36, 22, 3, 17, 31, 43, 95, 97, 79,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 43 */
+
+ 50, 8, 37, 50, 8, 37, 24, 42, 54, 24,
+ 11, 61, 0, 14, 104, 28, 90, 13, 40, 56,
+ 1, 35, 2, 63, 97, 7, 27, 125, 125, 125,
+ 120, 26, 11, 40, 56, 1, 11, 28, 38, 11,
+ 6, 1, 5, 15, 43, 17, 59, 10, 7, 7,
+ 7, 31, 17, 49, 30, 6, 0, 5, 20, 6,
+ 44, 0, 0, 0, 10, 65, 67, 20, 23, 13,
+ 104, 12, 11, 35, 7, 37, 16, 26, 18, 40,
+ 2, 34, 18, 12, 34, 27, 37, 31, 43, 66,
+ 2, 40, 11, 4, 15, 0, 34, 15, 33, 17,
+ 19, 36, 9, 12, 3, 21, 12, 9, 12, 22,
+ 12, 24, 40, 34, 37, 11, 6, 5, 4, 17,
+ 22, 13, 4, 38, 7, 32, 34, 12, 73, 20,
+ 9, 61, 25, 71, 19, 8, 5, 14, 4, 4,
+ 44, 8, 1, 22, 42, 17, 17, 6, 87, 33,
+ 8, 63, 1, 27, 36, 2, 45, 58, 2, 3,
+ 59, 64, 35, 67, 2, 81, 1, 28, 34, 22,
+ 28, 20, 0, 26, 36, 45, 13, 16, 6, 2,
+ 31, 13, 21, 31, 15, 29, 49, 19, 23, 101,
+ 53, 35, 65, 55, 51, 26, 42, 22, 16, 16,
+ 3, 17, 3, 31, 43, 27, 57, 81, 79, 125,
+ 21, 23, 55, 39, 53, 61, 71, 77, 87, 97,
+ 93, 125, 43, 91, 105, 73, 77, 2, 76, 54,
+ 36, 14, 10, 3, 5, 15, 5, 20, 100, 66,
+ 50, 34, 84, 34, 24, 6, 8, 8, 98, 60,
+ 36, 18, 46, 18, 4, 0, 1, 7, 66, 28,
+ 14, 0, 24, 4, 7, 3, 14, 88, 52, 32,
+ 20, 54, 22, 20, 16, 8, 124, 25, 7, 12,
+ 5, 9, 2, 18, 20, 4, 20, 32, 44, 25,
+ 15, 25, 13, 40, 25, 27, 13, 4, 34, 51,
+ 33, 16, 26, 49, 55, 49, 15, 7, 20, 11,
+ 0, 5, 6, 20, 25, 9, 7, 8, 41, 19,
+ 33, 33, 12, 73, 25, 6, 13, 36, 13, 3,
+ 56, 13, 17, 3, 46, 41, 27, 77, 34, 56,
+ 58, 34, 4, 36, 28, 14, 16, 12, 6, 0,
+ 35, 43, 57, 35, 27, 51, 93, 49, 45, 47,
+ 39, 35, 15, 15, 43, 59, 25, 4, 2, 6,
+ 11, 45, 19, 25, 57, 35, 51, 49, 81, 75,
+ 65, 57, 21, 11, 51, 25, 31, 49, 61, 63,
+ 65, 67, 71, 79, 103, 91, 107, 105, 107, 81,
+ 101, 107, 21, 10, 3, 16, 24, 24, 124, 46,
+ 42, 9, 62, 124, 114, 124, 38, 30, 14, 57,
+ 107, 115, 125, 125, 125, 125, 5, 86, 54, 36,
+ 18, 28, 4, 5, 9, 7, 9, 2, 28, 12,
+ 48, 34, 18, 30, 34, 40, 42, 22, 48, 38,
+ 1, 34, 20, 7, 21, 33, 45, 101, 99, 81,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 44 */
+
+ 46, 8, 37, 46, 8, 37, 26, 46, 54, 24,
+ 13, 65, 3, 12, 104, 28, 94, 13, 42, 58,
+ 1, 37, 2, 65, 99, 7, 31, 125, 125, 125,
+ 124, 28, 11, 42, 58, 1, 11, 30, 38, 11,
+ 6, 1, 3, 15, 45, 17, 59, 10, 7, 5,
+ 7, 33, 17, 49, 32, 6, 0, 5, 22, 6,
+ 44, 0, 0, 0, 10, 67, 67, 22, 25, 13,
+ 104, 12, 11, 33, 5, 37, 18, 28, 22, 44,
+ 4, 36, 22, 14, 38, 25, 37, 29, 41, 66,
+ 2, 42, 9, 4, 15, 2, 38, 15, 33, 15,
+ 17, 36, 9, 14, 1, 19, 14, 9, 12, 22,
+ 12, 24, 42, 36, 39, 11, 6, 5, 4, 17,
+ 22, 15, 2, 38, 9, 32, 34, 12, 77, 20,
+ 11, 65, 27, 73, 17, 10, 3, 16, 4, 6,
+ 46, 10, 0, 24, 44, 17, 17, 6, 89, 33,
+ 10, 63, 1, 27, 38, 2, 47, 60, 2, 3,
+ 61, 66, 35, 69, 2, 83, 5, 26, 32, 20,
+ 26, 18, 1, 24, 36, 49, 15, 14, 6, 2,
+ 33, 15, 23, 33, 17, 31, 51, 21, 25, 105,
+ 55, 37, 67, 57, 51, 22, 40, 18, 12, 12,
+ 7, 21, 7, 37, 47, 31, 63, 85, 83, 125,
+ 25, 27, 59, 43, 57, 65, 75, 81, 91, 103,
+ 97, 125, 45, 95, 107, 75, 77, 2, 76, 54,
+ 36, 14, 10, 3, 5, 15, 5, 20, 100, 66,
+ 50, 34, 86, 34, 26, 6, 10, 8, 100, 62,
+ 36, 18, 46, 18, 6, 0, 0, 5, 68, 30,
+ 14, 0, 26, 6, 5, 1, 14, 88, 52, 32,
+ 20, 56, 24, 20, 18, 10, 124, 25, 7, 12,
+ 5, 9, 2, 18, 20, 4, 20, 32, 44, 25,
+ 15, 25, 15, 42, 27, 29, 15, 2, 34, 55,
+ 35, 16, 26, 51, 57, 49, 15, 5, 22, 9,
+ 0, 3, 6, 22, 27, 9, 7, 8, 43, 19,
+ 33, 31, 14, 73, 25, 6, 13, 38, 13, 3,
+ 58, 13, 19, 3, 48, 43, 27, 79, 32, 56,
+ 56, 32, 2, 34, 26, 12, 14, 10, 4, 1,
+ 37, 45, 59, 39, 29, 53, 97, 51, 47, 49,
+ 41, 37, 15, 15, 43, 59, 25, 1, 3, 2,
+ 15, 51, 23, 29, 63, 39, 55, 53, 85, 79,
+ 69, 59, 25, 15, 53, 29, 35, 53, 65, 69,
+ 69, 71, 75, 83, 109, 95, 111, 109, 109, 85,
+ 105, 109, 19, 12, 1, 18, 26, 26, 124, 48,
+ 44, 9, 64, 124, 118, 124, 40, 26, 10, 63,
+ 113, 121, 125, 125, 125, 125, 5, 86, 54, 36,
+ 18, 28, 4, 5, 9, 7, 7, 4, 30, 12,
+ 50, 36, 18, 32, 36, 42, 44, 22, 48, 38,
+ 1, 32, 16, 11, 25, 37, 49, 105, 103, 83,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 45 */
+
+ 44, 8, 37, 44, 8, 37, 30, 48, 54, 24,
+ 13, 67, 5, 8, 104, 28, 100, 15, 44, 58,
+ 1, 37, 4, 67, 101, 9, 35, 125, 125, 125,
+ 124, 32, 11, 44, 58, 1, 11, 32, 38, 13,
+ 6, 1, 3, 15, 45, 15, 59, 10, 7, 3,
+ 9, 33, 17, 49, 32, 8, 2, 3, 22, 6,
+ 44, 0, 0, 0, 12, 67, 67, 22, 27, 13,
+ 104, 12, 11, 31, 3, 37, 18, 32, 24, 48,
+ 8, 38, 24, 18, 42, 23, 35, 27, 41, 68,
+ 2, 44, 7, 4, 13, 4, 42, 15, 33, 15,
+ 13, 38, 7, 14, 0, 19, 14, 9, 14, 24,
+ 14, 26, 42, 36, 39, 13, 6, 5, 4, 17,
+ 22, 15, 0, 38, 9, 32, 34, 10, 81, 20,
+ 11, 67, 29, 75, 15, 12, 1, 18, 6, 8,
+ 48, 12, 0, 26, 46, 17, 17, 8, 91, 31,
+ 12, 63, 0, 27, 38, 2, 49, 62, 2, 3,
+ 65, 68, 37, 71, 2, 83, 7, 24, 32, 18,
+ 24, 16, 3, 24, 36, 51, 17, 14, 6, 2,
+ 33, 15, 23, 35, 17, 33, 53, 21, 27, 109,
+ 57, 39, 69, 59, 51, 18, 38, 14, 8, 10,
+ 11, 27, 11, 41, 51, 35, 67, 91, 87, 125,
+ 29, 29, 63, 49, 61, 69, 79, 87, 95, 109,
+ 101, 125, 47, 97, 111, 75, 77, 2, 76, 56,
+ 36, 14, 10, 3, 5, 15, 3, 22, 102, 66,
+ 50, 34, 88, 36, 28, 6, 12, 10, 102, 64,
+ 36, 20, 48, 20, 6, 2, 2, 5, 72, 30,
+ 14, 2, 28, 8, 5, 0, 16, 90, 54, 32,
+ 20, 58, 26, 22, 20, 12, 124, 25, 7, 14,
+ 5, 9, 2, 18, 20, 6, 20, 34, 46, 25,
+ 15, 25, 17, 42, 29, 31, 17, 2, 34, 57,
+ 37, 14, 26, 53, 59, 51, 13, 5, 24, 9,
+ 2, 3, 8, 24, 27, 9, 7, 8, 45, 19,
+ 33, 31, 16, 73, 25, 8, 13, 38, 13, 3,
+ 60, 13, 19, 3, 50, 43, 29, 79, 30, 56,
+ 56, 32, 0, 32, 24, 10, 14, 8, 2, 3,
+ 39, 47, 61, 41, 33, 57, 101, 53, 49, 51,
+ 43, 37, 15, 13, 43, 61, 23, 5, 7, 1,
+ 21, 55, 27, 33, 67, 43, 59, 57, 89, 83,
+ 71, 61, 29, 19, 55, 33, 39, 57, 69, 73,
+ 73, 75, 79, 87, 113, 99, 115, 113, 111, 89,
+ 109, 113, 17, 16, 0, 18, 28, 28, 124, 50,
+ 46, 9, 66, 124, 120, 124, 40, 24, 6, 67,
+ 119, 125, 125, 125, 125, 125, 5, 86, 54, 36,
+ 18, 28, 4, 5, 9, 5, 5, 6, 32, 14,
+ 52, 38, 20, 34, 38, 44, 46, 24, 50, 40,
+ 1, 28, 14, 13, 27, 41, 53, 109, 107, 85,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 46 */
+
+ 42, 8, 37, 42, 8, 37, 34, 52, 56, 24,
+ 15, 71, 9, 6, 104, 28, 104, 15, 46, 60,
+ 1, 39, 4, 69, 103, 9, 39, 125, 125, 125,
+ 124, 34, 11, 46, 60, 1, 9, 34, 40, 13,
+ 6, 0, 1, 17, 47, 15, 59, 10, 7, 3,
+ 9, 33, 17, 49, 34, 8, 2, 3, 24, 6,
+ 44, 0, 0, 0, 12, 67, 67, 24, 29, 13,
+ 104, 12, 11, 29, 1, 37, 20, 34, 28, 52,
+ 10, 40, 28, 20, 46, 21, 33, 25, 39, 70,
+ 2, 46, 5, 4, 13, 4, 46, 15, 31, 13,
+ 11, 38, 7, 16, 4, 17, 16, 9, 14, 24,
+ 14, 28, 44, 38, 41, 13, 6, 5, 4, 17,
+ 22, 17, 0, 38, 11, 32, 34, 10, 85, 20,
+ 13, 71, 31, 77, 13, 14, 0, 20, 8, 8,
+ 50, 12, 2, 28, 48, 17, 17, 8, 93, 31,
+ 14, 63, 0, 27, 40, 2, 51, 64, 2, 3,
+ 67, 70, 37, 73, 2, 85, 11, 22, 30, 16,
+ 22, 14, 3, 22, 34, 53, 19, 14, 6, 2,
+ 35, 17, 25, 37, 19, 35, 55, 23, 27, 113,
+ 59, 39, 71, 61, 51, 16, 34, 12, 4, 6,
+ 15, 31, 15, 47, 57, 39, 73, 95, 93, 125,
+ 33, 33, 65, 53, 65, 75, 85, 91, 101, 113,
+ 105, 125, 49, 101, 113, 77, 77, 2, 78, 56,
+ 36, 14, 10, 3, 5, 15, 3, 22, 102, 66,
+ 50, 34, 90, 36, 28, 8, 12, 12, 104, 64,
+ 38, 20, 50, 20, 8, 2, 4, 3, 74, 32,
+ 16, 2, 30, 10, 3, 2, 16, 90, 54, 32,
+ 20, 60, 26, 24, 22, 14, 124, 23, 5, 14,
+ 5, 9, 2, 18, 20, 6, 20, 34, 46, 25,
+ 15, 25, 17, 44, 31, 33, 19, 0, 34, 59,
+ 39, 14, 26, 53, 61, 51, 11, 3, 26, 7,
+ 2, 1, 8, 26, 27, 9, 7, 10, 45, 19,
+ 33, 29, 18, 73, 25, 8, 13, 40, 13, 3,
+ 62, 13, 19, 3, 52, 45, 29, 81, 28, 54,
+ 54, 30, 1, 30, 22, 8, 12, 6, 0, 5,
+ 41, 49, 63, 45, 35, 59, 105, 57, 51, 53,
+ 43, 39, 15, 13, 43, 61, 21, 9, 11, 5,
+ 25, 61, 31, 37, 73, 47, 63, 61, 95, 87,
+ 75, 63, 33, 21, 59, 37, 43, 61, 73, 77,
+ 77, 79, 83, 91, 119, 103, 119, 117, 113, 93,
+ 113, 115, 15, 18, 2, 20, 30, 30, 124, 52,
+ 48, 9, 68, 124, 124, 124, 42, 20, 4, 73,
+ 125, 125, 125, 125, 125, 125, 5, 86, 54, 36,
+ 18, 28, 4, 5, 9, 5, 3, 8, 34, 16,
+ 54, 38, 20, 36, 38, 44, 46, 24, 50, 40,
+ 1, 26, 10, 17, 31, 45, 57, 113, 111, 87,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 47 */
+
+ 40, 8, 37, 40, 8, 37, 38, 54, 56, 24,
+ 15, 73, 11, 4, 104, 28, 108, 15, 48, 62,
+ 1, 41, 4, 71, 105, 11, 43, 125, 125, 125,
+ 124, 36, 11, 48, 62, 1, 9, 36, 40, 15,
+ 6, 0, 0, 17, 47, 15, 59, 10, 7, 1,
+ 9, 33, 17, 49, 34, 8, 2, 3, 24, 6,
+ 44, 0, 0, 0, 14, 67, 67, 24, 31, 13,
+ 104, 12, 11, 27, 0, 37, 20, 36, 32, 56,
+ 14, 42, 32, 22, 50, 19, 31, 23, 37, 72,
+ 2, 48, 3, 4, 11, 6, 50, 15, 31, 13,
+ 9, 38, 7, 16, 6, 17, 18, 9, 14, 24,
+ 14, 30, 44, 38, 41, 13, 6, 5, 4, 17,
+ 22, 17, 1, 38, 13, 32, 34, 10, 89, 20,
+ 13, 75, 33, 79, 11, 16, 2, 22, 10, 10,
+ 52, 14, 2, 30, 50, 17, 17, 8, 95, 29,
+ 16, 63, 0, 27, 42, 2, 53, 66, 2, 3,
+ 69, 72, 39, 75, 2, 87, 13, 20, 30, 14,
+ 20, 12, 5, 20, 34, 55, 21, 14, 6, 2,
+ 35, 19, 27, 39, 21, 37, 57, 25, 29, 117,
+ 61, 41, 73, 63, 51, 12, 32, 8, 0, 2,
+ 19, 35, 19, 51, 61, 43, 77, 101, 97, 125,
+ 37, 37, 69, 57, 69, 79, 89, 95, 105, 119,
+ 109, 125, 51, 105, 115, 79, 77, 2, 78, 56,
+ 36, 14, 10, 3, 5, 15, 1, 22, 102, 66,
+ 50, 34, 92, 38, 30, 8, 14, 14, 106, 66,
+ 38, 20, 52, 22, 8, 4, 6, 1, 76, 34,
+ 16, 2, 32, 12, 1, 4, 18, 90, 54, 32,
+ 20, 62, 28, 26, 24, 16, 124, 23, 5, 16,
+ 5, 9, 2, 18, 20, 6, 20, 34, 48, 25,
+ 15, 25, 19, 44, 33, 35, 21, 1, 34, 61,
+ 41, 14, 26, 55, 63, 53, 9, 1, 28, 7,
+ 4, 1, 10, 28, 27, 9, 7, 10, 47, 19,
+ 33, 29, 20, 73, 25, 8, 13, 42, 13, 3,
+ 64, 13, 19, 3, 54, 47, 29, 83, 26, 54,
+ 54, 28, 3, 28, 20, 6, 10, 4, 1, 7,
+ 43, 51, 65, 47, 37, 63, 109, 59, 53, 55,
+ 45, 39, 15, 13, 43, 63, 19, 13, 15, 9,
+ 29, 67, 35, 41, 77, 51, 67, 65, 99, 91,
+ 77, 65, 37, 25, 61, 41, 47, 65, 77, 81,
+ 81, 83, 87, 95, 123, 107, 123, 121, 115, 97,
+ 117, 119, 13, 20, 4, 22, 32, 32, 124, 54,
+ 50, 9, 70, 124, 124, 124, 44, 18, 0, 77,
+ 125, 125, 125, 125, 125, 125, 5, 86, 54, 36,
+ 18, 28, 4, 5, 9, 3, 1, 10, 36, 18,
+ 56, 40, 22, 38, 40, 46, 48, 24, 52, 40,
+ 1, 24, 8, 21, 35, 49, 61, 117, 115, 89,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 48 */
+
+ 36, 6, 39, 36, 6, 39, 40, 56, 56, 24,
+ 17, 77, 15, 0, 102, 28, 112, 17, 48, 62,
+ 3, 43, 4, 75, 109, 13, 47, 125, 125, 125,
+ 124, 38, 13, 48, 62, 3, 9, 38, 40, 17,
+ 6, 0, 0, 19, 49, 15, 61, 10, 7, 1,
+ 11, 35, 17, 49, 34, 8, 2, 3, 24, 4,
+ 44, 0, 0, 0, 14, 69, 67, 24, 33, 15,
+ 104, 10, 11, 25, 0, 37, 20, 38, 34, 58,
+ 16, 44, 34, 24, 52, 17, 31, 21, 37, 72,
+ 2, 48, 3, 4, 11, 6, 52, 15, 31, 13,
+ 7, 38, 7, 16, 8, 17, 18, 9, 14, 24,
+ 14, 30, 44, 38, 43, 15, 6, 7, 4, 17,
+ 20, 19, 3, 38, 15, 30, 34, 8, 93, 18,
+ 15, 79, 35, 81, 11, 16, 2, 22, 10, 10,
+ 54, 14, 2, 30, 52, 19, 17, 8, 99, 29,
+ 16, 63, 0, 29, 42, 2, 55, 66, 2, 3,
+ 73, 72, 41, 77, 2, 89, 17, 18, 28, 12,
+ 18, 10, 7, 18, 32, 59, 23, 12, 4, 2,
+ 37, 21, 29, 41, 23, 39, 59, 27, 31, 121,
+ 65, 43, 77, 67, 51, 8, 28, 4, 3, 1,
+ 25, 41, 25, 57, 67, 49, 83, 107, 103, 125,
+ 41, 41, 73, 63, 75, 85, 95, 101, 111, 125,
+ 115, 125, 53, 109, 119, 81, 79, 2, 78, 56,
+ 36, 14, 10, 5, 5, 15, 1, 22, 102, 66,
+ 50, 34, 94, 38, 30, 8, 14, 14, 106, 66,
+ 38, 20, 52, 22, 8, 4, 8, 1, 78, 34,
+ 16, 2, 32, 12, 1, 6, 18, 90, 54, 32,
+ 20, 62, 28, 26, 24, 16, 124, 23, 5, 16,
+ 5, 9, 2, 18, 20, 6, 20, 34, 48, 25,
+ 15, 27, 21, 44, 35, 37, 23, 3, 32, 65,
+ 45, 12, 24, 57, 67, 55, 9, 1, 30, 7,
+ 4, 1, 10, 28, 29, 9, 7, 10, 49, 19,
+ 33, 29, 20, 75, 25, 8, 13, 42, 15, 3,
+ 64, 15, 21, 3, 54, 49, 31, 85, 24, 52,
+ 52, 26, 7, 26, 18, 4, 8, 2, 3, 9,
+ 45, 55, 69, 51, 41, 67, 113, 63, 57, 57,
+ 47, 41, 15, 13, 43, 65, 19, 19, 21, 13,
+ 35, 73, 41, 47, 83, 57, 73, 71, 105, 97,
+ 81, 67, 41, 29, 65, 45, 53, 71, 83, 87,
+ 87, 87, 93, 101, 125, 111, 125, 125, 119, 101,
+ 121, 123, 11, 22, 6, 22, 32, 32, 124, 54,
+ 50, 9, 70, 124, 124, 124, 44, 14, 3, 83,
+ 125, 125, 125, 125, 125, 125, 5, 86, 52, 34,
+ 18, 28, 4, 7, 9, 3, 1, 10, 36, 18,
+ 58, 40, 22, 38, 40, 46, 48, 24, 52, 40,
+ 3, 20, 4, 25, 39, 53, 65, 123, 119, 91,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 49 */
+
+ 34, 6, 39, 34, 6, 39, 44, 60, 58, 26,
+ 17, 79, 17, 1, 102, 28, 118, 17, 50, 64,
+ 3, 43, 6, 77, 111, 13, 49, 125, 125, 125,
+ 124, 42, 13, 50, 64, 3, 7, 42, 42, 17,
+ 8, 2, 2, 19, 49, 13, 61, 10, 5, 0,
+ 11, 35, 15, 49, 36, 10, 4, 1, 26, 4,
+ 44, 0, 0, 0, 16, 69, 67, 26, 33, 15,
+ 104, 10, 11, 21, 2, 37, 22, 42, 38, 62,
+ 20, 48, 38, 28, 56, 13, 29, 19, 35, 74,
+ 4, 50, 1, 6, 9, 8, 56, 13, 29, 11,
+ 3, 40, 5, 18, 12, 15, 20, 7, 16, 26,
+ 16, 32, 46, 40, 43, 15, 8, 7, 4, 15,
+ 20, 19, 3, 38, 15, 30, 34, 8, 95, 18,
+ 15, 81, 37, 81, 9, 18, 4, 24, 12, 12,
+ 56, 16, 4, 32, 56, 19, 17, 10, 101, 27,
+ 18, 63, 2, 29, 44, 4, 55, 68, 2, 3,
+ 75, 74, 41, 79, 2, 89, 19, 18, 28, 12,
+ 18, 10, 7, 18, 32, 61, 23, 12, 4, 2,
+ 37, 21, 29, 41, 23, 39, 61, 27, 31, 123,
+ 67, 43, 79, 69, 51, 6, 26, 2, 5, 3,
+ 29, 45, 29, 61, 71, 53, 87, 111, 107, 125,
+ 43, 43, 75, 67, 79, 89, 99, 105, 115, 125,
+ 119, 125, 53, 111, 121, 81, 79, 4, 80, 58,
+ 38, 16, 10, 5, 3, 13, 0, 24, 104, 68,
+ 52, 34, 96, 40, 32, 10, 16, 16, 108, 68,
+ 40, 22, 54, 24, 10, 6, 12, 0, 82, 36,
+ 18, 4, 34, 14, 0, 10, 20, 92, 56, 34,
+ 22, 64, 30, 28, 26, 18, 124, 21, 3, 18,
+ 5, 7, 2, 20, 22, 8, 22, 36, 50, 23,
+ 15, 27, 21, 46, 35, 37, 25, 3, 32, 67,
+ 47, 12, 24, 57, 69, 55, 7, 0, 32, 5,
+ 6, 0, 12, 30, 29, 7, 5, 12, 49, 17,
+ 31, 27, 22, 75, 25, 10, 13, 44, 15, 3,
+ 66, 15, 21, 1, 56, 49, 31, 85, 24, 52,
+ 52, 26, 9, 26, 18, 4, 8, 2, 3, 9,
+ 45, 57, 71, 53, 43, 69, 115, 65, 59, 57,
+ 47, 41, 13, 11, 41, 65, 17, 23, 25, 17,
+ 39, 77, 45, 51, 87, 61, 77, 75, 109, 101,
+ 83, 67, 43, 31, 67, 47, 57, 75, 87, 91,
+ 91, 91, 97, 105, 125, 113, 125, 125, 121, 103,
+ 123, 125, 7, 26, 10, 24, 34, 34, 124, 56,
+ 52, 9, 72, 124, 124, 124, 46, 12, 5, 87,
+ 125, 125, 125, 125, 125, 125, 3, 88, 52, 34,
+ 18, 30, 4, 7, 9, 1, 0, 12, 38, 20,
+ 62, 42, 24, 40, 42, 48, 50, 26, 54, 42,
+ 3, 18, 2, 27, 41, 55, 67, 125, 121, 91,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 50 */
+
+ 32, 6, 39, 32, 6, 39, 48, 62, 58, 26,
+ 17, 81, 19, 3, 102, 28, 122, 17, 52, 66,
+ 3, 45, 6, 79, 113, 15, 53, 125, 125, 125,
+ 124, 44, 13, 52, 66, 3, 7, 44, 42, 19,
+ 8, 2, 4, 19, 49, 13, 61, 10, 5, 2,
+ 11, 35, 15, 49, 36, 10, 4, 1, 26, 4,
+ 44, 0, 0, 0, 18, 69, 67, 26, 35, 15,
+ 104, 10, 11, 19, 4, 37, 22, 44, 42, 66,
+ 22, 50, 42, 30, 60, 11, 27, 17, 33, 76,
+ 4, 52, 0, 6, 9, 10, 60, 13, 29, 11,
+ 1, 40, 5, 18, 14, 15, 22, 7, 16, 26,
+ 16, 34, 46, 40, 45, 15, 8, 7, 4, 15,
+ 20, 19, 5, 38, 17, 30, 34, 8, 99, 18,
+ 15, 85, 39, 83, 7, 20, 6, 26, 14, 14,
+ 58, 18, 4, 34, 58, 19, 17, 10, 103, 27,
+ 20, 63, 2, 29, 46, 4, 57, 70, 2, 3,
+ 77, 76, 43, 81, 2, 91, 23, 16, 26, 10,
+ 16, 8, 9, 16, 32, 63, 25, 12, 4, 2,
+ 37, 23, 31, 43, 25, 41, 63, 29, 33, 125,
+ 69, 45, 81, 71, 51, 2, 24, 1, 9, 7,
+ 33, 49, 33, 67, 75, 57, 93, 117, 111, 125,
+ 47, 47, 79, 71, 83, 93, 103, 109, 119, 125,
+ 123, 125, 55, 115, 123, 83, 79, 4, 80, 58,
+ 38, 16, 10, 5, 3, 13, 2, 24, 104, 68,
+ 52, 34, 98, 40, 34, 10, 18, 18, 110, 70,
+ 40, 22, 56, 24, 10, 8, 14, 2, 84, 38,
+ 18, 4, 36, 16, 2, 12, 20, 92, 56, 34,
+ 22, 66, 32, 30, 28, 20, 124, 21, 3, 18,
+ 5, 7, 2, 20, 22, 8, 22, 36, 52, 23,
+ 15, 27, 23, 46, 37, 39, 27, 5, 32, 69,
+ 49, 12, 24, 59, 71, 57, 5, 2, 34, 5,
+ 6, 0, 14, 32, 29, 7, 5, 12, 51, 17,
+ 31, 27, 24, 75, 25, 10, 13, 46, 15, 3,
+ 68, 15, 21, 1, 58, 51, 31, 87, 22, 52,
+ 52, 24, 11, 24, 16, 2, 6, 0, 5, 11,
+ 47, 59, 73, 57, 45, 73, 119, 67, 61, 59,
+ 49, 41, 13, 11, 41, 67, 15, 27, 29, 21,
+ 43, 83, 49, 55, 93, 65, 81, 79, 113, 105,
+ 85, 69, 47, 35, 69, 51, 61, 79, 91, 95,
+ 95, 95, 101, 109, 125, 117, 125, 125, 123, 107,
+ 125, 125, 5, 28, 12, 26, 36, 36, 124, 58,
+ 54, 9, 74, 124, 124, 124, 48, 10, 9, 93,
+ 125, 125, 125, 125, 125, 125, 3, 88, 52, 34,
+ 18, 30, 4, 7, 9, 0, 2, 14, 40, 22,
+ 64, 44, 26, 42, 44, 50, 52, 26, 56, 42,
+ 3, 16, 1, 31, 45, 59, 71, 125, 125, 93,
+ },
+
+ {
+ /* Context Tables for I, SI Slices :: qp = 51 */
+
+ 30, 6, 39, 30, 6, 39, 52, 66, 60, 26,
+ 19, 85, 23, 5, 102, 28, 124, 17, 54, 68,
+ 3, 47, 6, 81, 115, 15, 57, 125, 125, 125,
+ 124, 46, 13, 54, 68, 3, 5, 46, 44, 19,
+ 8, 4, 6, 21, 51, 13, 61, 10, 5, 2,
+ 11, 35, 15, 49, 38, 10, 4, 1, 28, 4,
+ 44, 0, 0, 0, 18, 69, 67, 28, 37, 15,
+ 104, 10, 11, 17, 6, 37, 24, 46, 46, 70,
+ 26, 52, 46, 32, 64, 9, 25, 15, 31, 78,
+ 4, 54, 2, 6, 7, 10, 64, 13, 27, 9,
+ 0, 40, 5, 20, 18, 13, 24, 7, 16, 26,
+ 16, 36, 48, 42, 45, 15, 8, 7, 4, 15,
+ 20, 21, 5, 38, 19, 30, 34, 8, 103, 18,
+ 17, 89, 41, 85, 5, 22, 8, 28, 16, 14,
+ 60, 18, 6, 36, 60, 19, 17, 10, 105, 25,
+ 22, 63, 2, 29, 48, 4, 59, 72, 2, 3,
+ 79, 78, 43, 83, 2, 93, 25, 14, 26, 8,
+ 14, 6, 9, 14, 30, 65, 27, 12, 4, 2,
+ 39, 25, 33, 45, 27, 43, 65, 31, 33, 125,
+ 71, 45, 83, 73, 51, 0, 20, 3, 13, 11,
+ 37, 53, 37, 71, 81, 61, 97, 121, 117, 125,
+ 51, 51, 81, 75, 87, 99, 109, 113, 125, 125,
+ 125, 125, 57, 119, 125, 85, 79, 4, 82, 58,
+ 38, 16, 10, 5, 3, 13, 2, 24, 104, 68,
+ 52, 34, 100, 42, 34, 12, 18, 20, 112, 70,
+ 42, 22, 58, 26, 12, 8, 16, 4, 86, 40,
+ 20, 4, 38, 18, 4, 14, 22, 92, 56, 34,
+ 22, 68, 32, 32, 30, 22, 124, 19, 1, 20,
+ 5, 7, 2, 20, 22, 8, 22, 36, 52, 23,
+ 15, 27, 23, 48, 39, 41, 29, 7, 32, 71,
+ 51, 12, 24, 59, 73, 57, 3, 4, 36, 3,
+ 8, 2, 14, 34, 29, 7, 5, 14, 51, 17,
+ 31, 25, 26, 75, 25, 10, 13, 48, 15, 3,
+ 70, 15, 21, 1, 60, 53, 31, 89, 20, 50,
+ 50, 22, 13, 22, 14, 0, 4, 1, 7, 13,
+ 49, 61, 75, 59, 47, 75, 123, 71, 63, 61,
+ 49, 43, 13, 11, 41, 67, 13, 31, 33, 25,
+ 47, 89, 53, 59, 97, 69, 85, 83, 119, 109,
+ 89, 71, 51, 37, 73, 55, 65, 83, 95, 99,
+ 99, 99, 105, 113, 125, 121, 125, 125, 125, 111,
+ 125, 125, 3, 30, 14, 28, 38, 38, 124, 60,
+ 56, 9, 76, 124, 124, 124, 50, 6, 11, 97,
+ 125, 125, 125, 125, 125, 125, 3, 88, 52, 34,
+ 18, 30, 4, 7, 9, 0, 4, 16, 42, 24,
+ 66, 44, 26, 44, 44, 50, 52, 26, 56, 42,
+ 3, 14, 3, 35, 49, 63, 75, 125, 125, 95,
+ },
+
+ },
+
+};
diff --git a/common/ih264_cabac_tables.h b/common/ih264_cabac_tables.h
new file mode 100755
index 0000000..0cef51e
--- /dev/null
+++ b/common/ih264_cabac_tables.h
@@ -0,0 +1,101 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ih264_cabac_tables.h
+*
+* @brief
+* This file contains enumerations, macros and extern declarations of H264
+* cabac tables
+*
+* @author
+* Ittiam
+*
+* @remarks
+* none
+******************************************************************************
+*/
+
+#ifndef IH264_CABAC_TABLES_H_
+#define IH264_CABAC_TABLES_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief maximum range of cabac_init_idc (0-2)
+******************************************************************************
+ */
+#define IH264_NUM_CABAC_INIT_IDC_PLUS_ONE 4
+
+/**
+******************************************************************************
+ * @brief max range of qps in H264 (0-51)
+******************************************************************************
+ */
+#define IH264_MAX_QP 52
+
+/**
+******************************************************************************
+ * @brief max range of cabac contexts in H264 (0-459)
+******************************************************************************
+ */
+#define IH264_NUM_CABAC_CTXTS 460
+
+/*****************************************************************************/
+/* Extern global declarations */
+/*****************************************************************************/
+
+/**
+ ******************************************************************************
+ * @breif Table for rangeTabLPS depending on pStateIdx and qCodIRangeIdx
+ * input : pStateIdx(0-63) and qCodIRangeIdx(0-3) [(Range >> 6) & 0x3]
+ * output : RLps
+ *
+ * @remarks See Table 9-35 of H264 spec for rangeTabLPS
+ *******************************************************************************
+ */
+extern const UWORD8 gau1_ih264_cabac_rlps[64][4];
+
+
+/**
+ ******************************************************************************
+ * @breif probability+MPS state transition tables based on cur State and bin
+ * input : curpState[bits7-2] | curMPS[bit1] | decodedBin[bit0]
+ * output : nextpState[bits6-1] | nextMPS[bit0]
+ * @remarks Modified form of Table-9-36 State Transition table in H264 spec
+ ******************************************************************************
+ */
+extern const UWORD8 gau1_ih264_next_state[128*2];
+
+
+/**
+ ******************************************************************************
+ * @brief Init context tables for all combinations of qp and cabac_init_idc
+ * @remarks Packing format MPS in lsb and pState in bits[1-6]
+ ******************************************************************************
+ */
+extern const UWORD8 gau1_ih264_cab_ctxts[IH264_NUM_CABAC_INIT_IDC_PLUS_ONE][IH264_MAX_QP][IH264_NUM_CABAC_CTXTS];
+
+
+#endif /* IH264_CABAC_TABLES_H_ */
diff --git a/common/ih264_cavlc_tables.c b/common/ih264_cavlc_tables.c
new file mode 100755
index 0000000..f122ab9
--- /dev/null
+++ b/common/ih264_cavlc_tables.c
@@ -0,0 +1,282 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+
+/**
+******************************************************************************
+* @file
+* ih264_cavlc_tables.c
+*
+* @brief
+* This file contains H264 cavlc tables for encoding coeff_tokens, levels, total
+* zeros and runs before zeros
+*
+* @author
+* Ittiam
+*
+* @par List of Tables
+* - gu1_code_coeff_token_table
+* - gu1_size_coeff_token_table
+* - gu1_code_coeff_token_table_chroma
+* - gu1_size_coeff_token_table_chroma
+* - gu1_threshold_vlc_level
+* - gu1_size_zero_table
+* - gu1_code_zero_table
+* - gu1_size_zero_table_chroma
+* - gu1_code_zero_table_chroma
+* - gu1_index_zero_table
+* - gu1_size_run_table
+* - gu1_code_run_table
+* - gu4_codeword_level_tables
+* - gu1_codesize_level_tables
+*
+* @remarks
+* none
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_cavlc_tables.h"
+
+
+/*****************************************************************************/
+/* Extern global definitions */
+/*****************************************************************************/
+
+/**
+ ******************************************************************************
+ * @brief Assignment of cbp to a codenum for intra and inter prediction modes
+ * chroma format idc != 0
+ * input : cbp, intra - 0/inter - 1
+ * output : codenum
+ * @remarks Table 9-4 – Assignment of codeNum to values of coded_block_pattern
+ * for macroblock prediction modes in H264 spec
+ ******************************************************************************
+ */
+const UWORD8 gu1_cbp_map_tables[48][2]=
+{
+ { 3, 0}, {29, 2}, {30, 3}, {17, 7}, {31, 4}, {18, 8}, {37, 17}, { 8, 13},
+ {32, 5}, {38, 18}, {19, 9}, { 9, 14}, {20, 10}, {10, 15}, {11, 16}, { 2, 11},
+ {16, 1}, {33, 32}, {34, 33}, {21, 36}, {35, 34}, {22, 37}, {39, 44}, { 4, 40},
+ {36, 35}, {40, 45}, {23, 38}, { 5, 41}, {24, 39}, { 6, 42}, { 7, 43}, { 1, 19},
+ {41, 6}, {42, 24}, {43, 25}, {25, 20}, {44, 26}, {26, 21}, {46, 46}, {12, 28},
+ {45, 27}, {47, 47}, {27, 22}, {13, 29}, {28, 23}, {14, 30}, {15, 31}, { 0, 12},
+};
+
+
+/**
+ ******************************************************************************
+ * @brief total non-zero coefficients and numbers of trailing ones of a residual
+ * block are mapped to coeff_token using the tables given below.
+ * input : VLC-Num | Trailing ones | Total coeffs
+ * output : coeff_token (code word, size of the code word)
+ * @remarks Table-9-5 coeff_token mapping to TotalCoeff( coeff_token )
+ * and TrailingOnes( coeff_token ) in H264 spec
+ ******************************************************************************
+ */
+const UWORD8 gu1_code_coeff_token_table[3][4][16] =
+{
+ {
+ { 5, 7, 7, 7, 7, 15, 11, 8, 15, 11, 15, 11, 15, 11, 7, 4, },
+ { 1, 4, 6, 6, 6, 6, 14, 10, 14, 10, 14, 10, 1, 14, 10, 6, },
+ { 0, 1, 5, 5, 5, 5, 5, 13, 9, 13, 9, 13, 9, 13, 9, 5, },
+ { 0, 0, 3, 3, 4, 4, 4, 4, 4, 12, 12, 8, 12, 8, 12, 8, },
+ },
+ {
+ {11, 7, 7, 7, 4, 7, 15, 11, 15, 11, 8, 15, 11, 7, 9, 7, },
+ { 2, 7, 10, 6, 6, 6, 6, 14, 10, 14, 10, 14, 10, 11, 8, 6, },
+ { 0, 3, 9, 5, 5, 5, 5, 13, 9, 13, 9, 13, 9, 6, 10, 5, },
+ { 0, 0, 5, 4, 6, 8, 4, 4, 4, 12, 8, 12, 12, 8, 1, 4, },
+ },
+ {
+ {15, 11, 8, 15, 11, 9, 8, 15, 11, 15, 11, 8, 13, 9, 5, 1, },
+ {14, 15, 12, 10, 8, 14, 10, 14, 14, 10, 14, 10, 7, 12, 8, 4, },
+ { 0, 13, 14, 11, 9, 13, 9, 13, 10, 13, 9, 13, 9, 11, 7, 3, },
+ { 0, 0, 12, 11, 10, 9, 8, 13, 12, 12, 12, 8, 12, 10, 6, 2, },
+ },
+};
+
+const UWORD8 gu1_size_coeff_token_table[3][4][16] =
+{
+ {
+ { 6, 8, 9, 10, 11, 13, 13, 13, 14, 14, 15, 15, 16, 16, 16, 16, },
+ { 2, 6, 8, 9, 10, 11, 13, 13, 14, 14, 15, 15, 15, 16, 16, 16, },
+ { 0, 3, 7, 8, 9, 10, 11, 13, 13, 14, 14, 15, 15, 16, 16, 16, },
+ { 0, 0, 5, 6, 7, 8, 9, 10, 11, 13, 14, 14, 15, 15, 16, 16, },
+ },
+ {
+ { 6, 6, 7, 8, 8, 9, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14, },
+ { 2, 5, 6, 6, 7, 8, 9, 11, 11, 12, 12, 13, 13, 14, 14, 14, },
+ { 0, 3, 6, 6, 7, 8, 9, 11, 11, 12, 12, 13, 13, 13, 14, 14, },
+ { 0, 0, 4, 4, 5, 6, 6, 7, 9, 11, 11, 12, 13, 13, 13, 14, },
+ },
+ {
+ { 6, 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 9, 10, 10, 10, 10, },
+ { 4, 5, 5, 5, 5, 6, 6, 7, 8, 8, 9, 9, 9, 10, 10, 10, },
+ { 0, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 10, },
+ { 0, 0, 4, 4, 4, 4, 4, 5, 6, 7, 8, 8, 9, 10, 10, 10, },
+ },
+};
+const UWORD8 gu1_code_coeff_token_table_chroma[4][4] =
+{
+ { 7, 4, 3, 2, },
+ { 1, 6, 3, 3, },
+ { 0, 1, 2, 2, },
+ { 0, 0, 5, 0, },
+};
+
+const UWORD8 gu1_size_coeff_token_table_chroma[4][4] =
+{
+ { 6, 6, 6, 6, },
+ { 1, 6, 7, 8, },
+ { 0, 3, 7, 8, },
+ { 0, 0, 6, 7, },
+};
+
+/**
+ ******************************************************************************
+ * @brief After encoding the current Level, to encode the next level, the choice
+ * of VLC table needs to be updated. The update is carried basing on a set of thresholds.
+ * These thresholds are listed in the table below for lookup.
+ * input : suffix_length
+ * output : threshold
+ ******************************************************************************
+ */
+const UWORD8 gu1_threshold_vlc_level[6] =
+{
+ 0, 3, 6, 12, 24, 48
+};
+
+
+/**
+ ******************************************************************************
+ * @brief table for encoding total number of zeros
+ * input : coeff_token, total zeros
+ * output : code word, size of the code word
+ * @remarks Table-9-7, 9-8 total_zeros tables for 4x4 blocks with
+ * TotalCoeff( coeff_token ) in H264 spec
+ ******************************************************************************
+ */
+const UWORD8 gu1_size_zero_table[135] =
+{
+ 1, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 9,
+ 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6,
+ 4, 3, 3, 3, 4, 4, 3, 3, 4, 5, 5, 6, 5, 6,
+ 5, 3, 4, 4, 3, 3, 3, 4, 3, 4, 5, 5, 5,
+ 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 4, 5,
+ 6, 5, 3, 3, 3, 3, 3, 3, 4, 3, 6,
+ 6, 5, 3, 3, 3, 2, 3, 4, 3, 6,
+ 6, 4, 5, 3, 2, 2, 3, 3, 6,
+ 6, 6, 4, 2, 2, 3, 2, 5,
+ 5, 5, 3, 2, 2, 2, 4,
+ 4, 4, 3, 3, 1, 3,
+ 4, 4, 2, 1, 3,
+ 3, 3, 1, 2,
+ 2, 2, 1,
+ 1, 1,
+};
+const UWORD8 gu1_code_zero_table[135] =
+{
+ 1, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 1,
+ 7, 6, 5, 4, 3, 5, 4, 3, 2, 3, 2, 3, 2, 1, 0,
+ 5, 7, 6, 5, 4, 3, 4, 3, 2, 3, 2, 1, 1, 0,
+ 3, 7, 5, 4, 6, 5, 4, 3, 3, 2, 2, 1, 0,
+ 5, 4, 3, 7, 6, 5, 4, 3, 2, 1, 1, 0,
+ 1, 1, 7, 6, 5, 4, 3, 2, 1, 1, 0,
+ 1, 1, 5, 4, 3, 3, 2, 1, 1, 0,
+ 1, 1, 1, 3, 3, 2, 2, 1, 0,
+ 1, 0, 1, 3, 2, 1, 1, 1,
+ 1, 0, 1, 3, 2, 1, 1,
+ 0, 1, 1, 2, 1, 3,
+ 0, 1, 1, 1, 1,
+ 0, 1, 1, 1,
+ 0, 1, 1,
+ 0, 1,
+};
+const UWORD8 gu1_size_zero_table_chroma[9] =
+{
+ 1, 2, 3, 3,
+ 1, 2, 2,
+ 1, 1,
+};
+const UWORD8 gu1_code_zero_table_chroma[9] =
+{
+ 1, 1, 1, 0,
+ 1, 1, 0,
+ 1, 0,
+};
+
+/**
+ ******************************************************************************
+ * @brief index to access zero table (look up)
+ * input : TotalCoeff( coeff_token )
+ * output : index to access zero table
+ ******************************************************************************
+ */
+const UWORD8 gu1_index_zero_table[15] =
+{
+ 0, 16, 31, 45, 58, 70, 81, 91, 100, 108, 115, 121, 126, 130, 133,
+};
+
+/**
+ ******************************************************************************
+ * @brief table for encoding runs of zeros before
+ * input : zeros left, runs of zeros before
+ * output : code word, size of the code word
+ * @remarks Table-9-10 table for run_before in H264 spec
+ ******************************************************************************
+ */
+const UWORD8 gu1_size_run_table[42] =
+{
+ 1, 1,
+ 1, 2, 2,
+ 2, 2, 2, 2,
+ 2, 2, 2, 3, 3,
+ 2, 2, 3, 3, 3, 3,
+ 2, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+};
+const UWORD8 gu1_code_run_table[42] =
+{
+ 1, 0,
+ 1, 1, 0,
+ 3, 2, 1, 0,
+ 3, 2, 1, 1, 0,
+ 3, 2, 3, 2, 1, 0,
+ 3, 0, 1, 3, 2, 5, 4,
+ 7, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+};
+/**
+ ******************************************************************************
+ * @brief index to access zero table (look up)
+ * input : TotalCoeff( coeff_token )
+ * output : index to access zero table
+ ******************************************************************************
+ */
+const UWORD8 gu1_index_run_table[7] =
+{
+ 0, 2, 5, 9, 14, 20, 27,
+};
diff --git a/common/ih264_cavlc_tables.h b/common/ih264_cavlc_tables.h
new file mode 100755
index 0000000..78057b5
--- /dev/null
+++ b/common/ih264_cavlc_tables.h
@@ -0,0 +1,133 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ih264_cavlc_tables.h
+*
+* @brief
+* This file contains enumerations, macros and extern declarations of H264
+* cavlc tables
+*
+* @author
+* Ittiam
+*
+* @remarks
+* none
+******************************************************************************
+*/
+
+#ifndef IH264_CAVLC_TABLES_H_
+#define IH264_CAVLC_TABLES_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+/**
+******************************************************************************
+ * @brief maximum zeros left
+******************************************************************************
+ */
+#define MAX_ZERO_LEFT 6
+
+/*****************************************************************************/
+/* Extern global declarations */
+/*****************************************************************************/
+
+/**
+ ******************************************************************************
+ * @brief Assignment of cbp to a codenum for intra and inter prediction modes
+ * chroma format idc != 0
+ * input : cbp, intra - 0/inter - 1
+ * output : codenum
+ * @remarks Table 9-4 – Assignment of codeNum to values of coded_block_pattern
+ * for macroblock prediction modes in H264 spec
+ ******************************************************************************
+ */
+extern const UWORD8 gu1_cbp_map_tables[48][2];
+
+/**
+ ******************************************************************************
+ * @brief total non-zero coefficients and numbers of trailing ones of a residual
+ * block are mapped to coefftoken using the tables given below.
+ * input : VLC-Num | Trailing ones | Total coeffs
+ * output : coeff_token (code word, size of the code word)
+ * @remarks Table-9-5 coeff_token mapping to TotalCoeff( coeff_token )
+ * and TrailingOnes( coeff_token ) in H264 spec
+ ******************************************************************************
+ */
+extern const UWORD8 gu1_code_coeff_token_table[3][4][16];
+extern const UWORD8 gu1_size_coeff_token_table[3][4][16];
+extern const UWORD8 gu1_code_coeff_token_table_chroma[4][4];
+extern const UWORD8 gu1_size_coeff_token_table_chroma[4][4];
+
+/**
+ ******************************************************************************
+ * @brief Thresholds for determining whether to increment Level table number.
+ * input : suffix_length
+ * output : threshold
+ ******************************************************************************
+ */
+extern const UWORD8 gu1_threshold_vlc_level[6];
+
+/**
+ ******************************************************************************
+ * @brief table for encoding total number of zeros
+ * input : coeff_token, total zeros
+ * output : code word, size of the code word
+ * @remarks Table-9-7, 9-8 total_zeros tables for 4x4 blocks with
+ * TotalCoeff( coeff_token ) in H264 spec
+ ******************************************************************************
+ */
+extern const UWORD8 gu1_size_zero_table[135];
+extern const UWORD8 gu1_code_zero_table[135];
+extern const UWORD8 gu1_size_zero_table_chroma[9];
+extern const UWORD8 gu1_code_zero_table_chroma[9];
+
+/**
+ ******************************************************************************
+ * @brief index to access zero table (for speed)
+ * input : TotalCoeff( coeff_token )
+ * output : index to access zero table
+ ******************************************************************************
+ */
+extern const UWORD8 gu1_index_zero_table[15];
+
+/**
+ ******************************************************************************
+ * @brief table for encoding runs of zeros before
+ * input : zeros left, runs of zeros before
+ * output : code word, size of the code word
+ * @remarks Table-9-10 table for run_before in H264 spec
+ ******************************************************************************
+ */
+extern const UWORD8 gu1_size_run_table[42];
+extern const UWORD8 gu1_code_run_table[42];
+
+/**
+ ******************************************************************************
+ * @brief index to access run table (look up)
+ * input : zeros left
+ * output : index to access run table
+ ******************************************************************************
+ */
+extern const UWORD8 gu1_index_run_table[7];
+
+#endif /* IH264_CAVLC_TABLES_H_ */
diff --git a/common/ih264_chroma_intra_pred_filters.c b/common/ih264_chroma_intra_pred_filters.c
new file mode 100755
index 0000000..ee145e5
--- /dev/null
+++ b/common/ih264_chroma_intra_pred_filters.c
@@ -0,0 +1,478 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_chroma_intra_pred_filters.c
+*
+* @brief
+* Contains function definitions for chroma intra prediction filters
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* -ih264_intra_pred_chroma_8x8_mode_dc
+* -ih264_intra_pred_chroma_8x8_mode_horz
+* -ih264_intra_pred_chroma_8x8_mode_vert
+* -ih264_intra_pred_chroma_8x8_mode_plane
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <string.h>
+
+/* User include files */
+#include "ih264_defs.h"
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+
+/* Global variables used only in assembly files*/
+const WORD8 ih264_gai1_intrapred_chroma_plane_coeffs1[] =
+{ 0x01,0x00,0x01,0x00,
+ 0x02,0x00,0x02,0x00,
+ 0x03,0x00,0x03,0x00,
+ 0x04,0x00,0x04,0x00
+};
+ const WORD8 ih264_gai1_intrapred_chroma_plane_coeffs2[] =
+ { 0xfd,0xff,0xfe,0xff,
+ 0xff,0xff,0x00,0x00,
+ 0x01,0x00,0x02,0x00,
+ 0x03,0x00,0x04,0x00,
+ };
+
+/*****************************************************************************/
+/* Chroma Intra prediction 8x8 filters */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* ih264_intra_pred_chroma_8x8_mode_dc
+*
+* @brief
+* Perform Intra prediction for chroma_8x8 mode:DC
+*
+* @par Description:
+* Perform Intra prediction for chroma_8x8 mode:DC ,described in sec 8.3.4.1
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source containing alternate U and V samples
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination with alternate U and V samples
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+** @param[in] ngbr_avail
+* availability of neighbouring pixels
+*
+* @returns
+*
+* @remarks
+* None
+*
+******************************************************************************
+*/
+void ih264_intra_pred_chroma_8x8_mode_dc(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ WORD32 left_avail, left_avail1, left_avail2; /* availability of left predictors (only for DC) */
+ WORD32 top_avail; /* availability of top predictors (only for DC) */
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ UNUSED(src_strd);
+
+ /* temporary variables to store accumulated first left half,second left half,
+ * first top half,second top half of U and V values*/
+ WORD32 val_u_l1 = 0, val_u_l2 = 0, val_u_t1 = 0, val_u_t2 = 0;
+ WORD32 val_v_l1 = 0, val_v_l2 = 0, val_v_t1 = 0, val_v_t2 = 0;
+
+ WORD32 val_u1 = 0, val_u2 = 0, val_v1 = 0, val_v2 = 0;
+
+ WORD32 col, row; /*loop variables*/
+
+ left_avail = ngbr_avail & 0x11;
+ left_avail1 = ngbr_avail & 1;
+ left_avail2 = (ngbr_avail >> 4) & 1;
+ top_avail = (ngbr_avail >> 2) & 1;
+
+ pu1_top = pu1_src + 2 * BLK8x8SIZE + 2;
+ pu1_left = pu1_src + 2 * BLK8x8SIZE - 2;
+
+ if(left_avail1)
+ { /* First 4x4 block*/
+ val_u_l1 += *pu1_left;
+ val_v_l1 += *(pu1_left + 1);
+ pu1_left -= 2;
+ val_u_l1 += *pu1_left;
+ val_v_l1 += *(pu1_left + 1);
+ pu1_left -= 2;
+ val_u_l1 += *pu1_left;
+ val_v_l1 += *(pu1_left + 1);
+ pu1_left -= 2;
+ val_u_l1 += *pu1_left + 2;
+ val_v_l1 += *(pu1_left + 1) + 2;
+ pu1_left -= 2;
+ }
+ else
+ pu1_left -= 2 * 4;
+
+ if(left_avail2)
+ {
+ /* Second 4x4 block*/
+ val_u_l2 += *pu1_left;
+ val_v_l2 += *(pu1_left + 1);
+ pu1_left -= 2;
+ val_u_l2 += *pu1_left;
+ val_v_l2 += *(pu1_left + 1);
+ pu1_left -= 2;
+ val_u_l2 += *pu1_left;
+ val_v_l2 += *(pu1_left + 1);
+ pu1_left -= 2;
+ val_u_l2 += *pu1_left + 2;
+ val_v_l2 += *(pu1_left + 1) + 2;
+ pu1_left -= 2;
+ }
+ else
+ pu1_left -= 2 * 4;
+
+ if(top_avail)
+ {
+ val_u_t1 += *pu1_top + *(pu1_top + 2) + *(pu1_top + 4)
+ + *(pu1_top + 6) + 2;
+ val_u_t2 += *(pu1_top + 8) + *(pu1_top + 10) + *(pu1_top + 12)
+ + *(pu1_top + 14) + 2;
+ val_v_t1 += *(pu1_top + 1) + *(pu1_top + 3) + *(pu1_top + 5)
+ + *(pu1_top + 7) + 2;
+ val_v_t2 += *(pu1_top + 9) + *(pu1_top + 11) + *(pu1_top + 13)
+ + *(pu1_top + 15) + 2;
+ }
+
+ if(left_avail + top_avail)
+ {
+ val_u1 = (left_avail1 + top_avail) ?
+ ((val_u_l1 + val_u_t1)
+ >> (1 + left_avail1 + top_avail)) :128;
+ val_v1 = (left_avail1 + top_avail) ?
+ ((val_v_l1 + val_v_t1)
+ >> (1 + left_avail1 + top_avail)) :128;
+ if(top_avail)
+ {
+ val_u2 = val_u_t2 >> 2;
+ val_v2 = val_v_t2 >> 2;
+ }
+ else if(left_avail1)
+ {
+ val_u2 = val_u_l1 >> 2;
+ val_v2 = val_v_l1 >> 2;
+ }
+ else
+ {
+ val_u2 = val_v2 = 128;
+ }
+
+ for(row = 0; row < 4; row++)
+ {
+ /*top left 4x4 block*/
+ for(col = 0; col < 8; col += 2)
+ {
+ *(pu1_dst + row * dst_strd + col) = val_u1;
+ *(pu1_dst + row * dst_strd + col + 1) = val_v1;
+ }
+ /*top right 4x4 block*/
+ for(col = 8; col < 16; col += 2)
+ {
+ *(pu1_dst + row * dst_strd + col) = val_u2;
+ *(pu1_dst + row * dst_strd + col + 1) = val_v2;
+ }
+ }
+
+ if(left_avail2)
+ {
+ val_u1 = val_u_l2 >> 2;
+ val_v1 = val_v_l2 >> 2;
+ }
+ else if(top_avail)
+ {
+ val_u1 = val_u_t1 >> 2;
+ val_v1 = val_v_t1 >> 2;
+ }
+ else
+ {
+ val_u1 = val_v1 = 128;
+ }
+ val_u2 = (left_avail2 + top_avail) ?
+ ((val_u_l2 + val_u_t2)
+ >> (1 + left_avail2 + top_avail)) : 128;
+ val_v2 = (left_avail2 + top_avail) ?
+ ((val_v_l2 + val_v_t2)
+ >> (1 + left_avail2 + top_avail)) : 128;
+
+ for(row = 4; row < 8; row++)
+ { /*bottom left 4x4 block*/
+ for(col = 0; col < 8; col += 2)
+ {
+ *(pu1_dst + row * dst_strd + col) = val_u1;
+ *(pu1_dst + row * dst_strd + col + 1) = val_v1;
+ }
+ /*bottom right 4x4 block*/
+ for(col = 8; col < 16; col += 2)
+ {
+ *(pu1_dst + row * dst_strd + col) = val_u2;
+ *(pu1_dst + row * dst_strd + col + 1) = val_v2;
+ }
+ }
+ }
+ else
+ {
+ /* Both left and top are unavailable, set the block to 128 */
+ for(row = 0; row < 8; row++)
+ {
+ memset(pu1_dst + row * dst_strd, 128, 8 * sizeof(UWORD16));
+ }
+ }
+}
+
+/**
+*******************************************************************************
+*
+*ih264_intra_pred_chroma_8x8_mode_horz
+*
+* @brief
+* Perform Intra prediction for chroma_8x8 mode:Horizontal
+*
+* @par Description:
+* Perform Intra prediction for chroma_8x8 mode:Horizontal ,described in sec 8.3.4.2
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source containing alternate U and V samples
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination with alternate U and V samples
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] ngbr_avail
+* availability of neighbouring pixels(Not used in this function)
+*
+* @returns
+*
+* @remarks
+* None
+*
+******************************************************************************
+*/
+void ih264_intra_pred_chroma_8x8_mode_horz(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+
+ UWORD8 *pu1_left = NULL; /* Pointer to start of top predictors */
+ WORD32 rows, cols; /* loop variables*/
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_left = pu1_src + 2 * BLK8x8SIZE - 2;
+ for(rows = 0; rows < 8; rows++)
+ {
+ for(cols = 0; cols < 16; cols += 2)
+ {
+ *(pu1_dst + rows * dst_strd + cols) = *pu1_left;
+
+ *(pu1_dst + rows * dst_strd + cols + 1) = *(pu1_left + 1);
+ }
+ pu1_left -= 2;
+ }
+
+}
+
+/**
+*******************************************************************************
+*
+*ih264_intra_pred_chroma_8x8_mode_vert
+*
+* @brief
+* Perform Intra prediction for chroma_8x8 mode:vertical
+*
+* @par Description:
+* Perform Intra prediction for chroma_8x8 mode:vertical ,described in sec 8.3.4.3
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source containing alternate U and V samples
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination with alternate U and V samples
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] ngbr_avail
+* availability of neighbouring pixels(Not used in this function)
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+void ih264_intra_pred_chroma_8x8_mode_vert(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ WORD32 row;/*loop variable*/
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_top = pu1_src + 2 * BLK8x8SIZE + 2;
+
+ /* 8 bytes are copied from src to dst */
+ for(row = 0; row < 2; row++)
+ {
+ memcpy(pu1_dst, pu1_top, 16);
+
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, pu1_top, 16);
+
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, pu1_top, 16);
+
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, pu1_top, 16);
+
+ pu1_dst += dst_strd;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* ih264_intra_pred_chroma_8x8_mode_plane
+*
+* @brief
+* Perform Intra prediction for chroma_8x8 mode:PLANE
+*
+* @par Description:
+* Perform Intra prediction for chroma_8x8 mode:PLANE ,described in sec 8.3.4.4
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source containing alternate U and V samples
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination with alternate U and V samples
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] ngbr_avail
+* availability of neighbouring pixels(Not used in this function)
+*
+* @returns
+*
+* @remarks
+* None
+*
+******************************************************************************
+*/
+void ih264_intra_pred_chroma_8x8_mode_plane(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ WORD32 val = 0;
+ WORD32 rows, cols; /* loop variables*/
+ WORD32 a_u, b_u, c_u, h_u, v_u; /* Implementing section 8.3.4.4 . The variables represent the corresponding variables in the section*/
+ WORD32 a_v, b_v, c_v, h_v, v_v;
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ a_u = b_u = c_u = h_u = v_u = 0;
+ a_v = b_v = c_v = h_v = v_v = 0;
+ /* As chroma format 4:2:0 is used,xCF = 4 * ( chroma_format_idc = = 3 ) = 0 and
+ yCF = 4 * ( chroma_format_idc != 1 ) = 0 */
+ pu1_top = pu1_src + 2 * BLK8x8SIZE + 2;
+ pu1_left = pu1_src + 2 * BLK8x8SIZE - 2;
+ /* Implementing section 8.3.4.4 */
+ for(cols = 0; cols < 4; cols++)
+ {
+ h_u += (cols + 1) * (pu1_top[8 + 2 * cols] - pu1_top[4 - 2 * cols]);/*section 8.3.4.4 equation (8-144)*/
+ h_v += (cols + 1) * (pu1_top[8 + 2 * cols + 1] - pu1_top[4 - 2 * cols+ 1]);
+
+ v_u += (cols + 1) * (pu1_left[(4 + cols) * (-2)] - pu1_left[(2 - cols) * (-2)]);
+ v_v += (cols + 1) * (pu1_left[(4 + cols) * (-2) + 1] - pu1_left[(2 - cols) * (-2) + 1]);/*section 8.3.4.4 equation (8-145)*/
+ }
+ a_u = 16 * (pu1_left[7 * (-2)] + pu1_top[14]);
+ a_v = 16 * (pu1_left[7 * (-2) + 1] + pu1_top[15]);/*section 8.3.3.4 equation (8-141)*/
+ b_u = (34 * h_u + 32) >> 6;/*section 8.3.3.4 equation (8-142)*/
+ b_v = (34 * h_v + 32) >> 6;/*section 8.3.3.4 equation (8-142)*/
+ c_u = (34 * v_u + 32) >> 6;/*section 8.3.3.4 equation (8-143)*/
+ c_v = (34 * v_v + 32) >> 6;/*section 8.3.3.4 equation (8-143)*/
+
+ for(rows = 0; rows < 8; rows++)
+ {
+ for(cols = 0; cols < 8; cols++)
+ {
+ val = (a_u + b_u * (cols - 3) + c_u * (rows - 3) );/*section 8.3.4.4 equation (8-140)*/
+ val = (val + 16) >> 5;
+ *(pu1_dst + rows * dst_strd + 2 * cols) = CLIP_U8(val);
+ val = (a_v + b_v * (cols - 3) + c_v * (rows - 3) );/*section 8.3.4.4 equation (8-140)*/
+ val = (val + 16) >> 5;
+ *(pu1_dst + rows * dst_strd + 2 * cols + 1) = CLIP_U8(val);
+ }
+ }
+}
+
diff --git a/common/ih264_common_tables.c b/common/ih264_common_tables.c
new file mode 100755
index 0000000..c53c276
--- /dev/null
+++ b/common/ih264_common_tables.c
@@ -0,0 +1,725 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_common_tables.c
+*
+* @brief
+* Contains common global tables
+*
+* @author
+* Harish M
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_defs.h"
+#include "ih264_macros.h"
+#include "ih264_structs.h"
+#include "ih264_common_tables.h"
+
+
+/*****************************************************************************/
+/* Extern global definitions */
+/*****************************************************************************/
+
+/**
+ ******************************************************************************
+ * @brief while encoding, basing on the input configuration parameters, the
+ * the level of the bitstream is computed basing on the table below.
+ * input : table_idx
+ * output : level_idc or cpb size
+ * @remarks Table A-1 – level table limits
+ ******************************************************************************
+ */
+const level_tables_t gas_ih264_lvl_tbl[16] =
+{
+ { IH264_LEVEL_10, 1485, 99, 297, 64, 175, 64 },
+ { IH264_LEVEL_11, 1485, 99, 297, 128, 350, 64 },
+ { IH264_LEVEL_1B, 3000, 396, 675, 192, 500, 128 },
+ { IH264_LEVEL_12, 6000, 396, 1782, 384, 1000, 128 },
+ { IH264_LEVEL_13, 11880, 396, 1782, 768, 2000, 128 },
+ { IH264_LEVEL_20, 11880, 396, 1782, 2000, 2000, 128 },
+ { IH264_LEVEL_21, 19800, 792, 3564, 4000, 4000, 256 },
+ { IH264_LEVEL_22, 20250, 1620, 6075, 4000, 4000, 256 },
+ { IH264_LEVEL_30, 40500, 1620, 6075, 10000, 10000, 256 },
+ { IH264_LEVEL_31, 108000, 3600, 13500, 14000, 14000, 512 },
+ { IH264_LEVEL_32, 216000, 5120, 15360, 20000, 20000, 512 },
+ { IH264_LEVEL_40, 245760, 8192, 24576, 20000, 25000, 512 },
+ { IH264_LEVEL_41, 245760, 8192, 24576, 50000, 62500, 512 },
+ { IH264_LEVEL_42, 522240, 8704, 26112, 50000, 62500, 512 },
+ { IH264_LEVEL_50, 589824, 22080, 82800, 135000, 135000, 512 },
+ { IH264_LEVEL_51, 983040, 36864, 138240, 240000, 240000, 512 },
+};
+
+
+/**
+ * Array containing supported levels
+ */
+const WORD32 gai4_ih264_levels[] =
+{
+ IH264_LEVEL_10,
+ IH264_LEVEL_11,
+ IH264_LEVEL_12,
+ IH264_LEVEL_13,
+ IH264_LEVEL_20,
+ IH264_LEVEL_21,
+ IH264_LEVEL_22,
+ IH264_LEVEL_30,
+ IH264_LEVEL_31,
+ IH264_LEVEL_32,
+ IH264_LEVEL_40,
+ IH264_LEVEL_41,
+ IH264_LEVEL_42,
+ IH264_LEVEL_50,
+ IH264_LEVEL_51,
+};
+
+
+/**
+ * Array giving size of max luma samples in a picture for a given level
+ */
+const WORD32 gai4_ih264_max_luma_pic_size[] =
+{
+ /* Level 1 */
+ 25344,
+ /* Level 1.1 */
+ 101376,
+ /* Level 1.2 */
+ 101376,
+ /* Level 1.3 */
+ 101376,
+ /* Level 2 */
+ 101376,
+ /* Level 2.1 */
+ 202752,
+ /* Level 2.2 */
+ 414720,
+ /* Level 3 */
+ 414720,
+ /* Level 3.1 */
+ 921600,
+ /* Level 3.1 */
+ 1310720,
+ /* Level 4 */
+ 2097152,
+ /* Level 4.1 */
+ 2097152,
+ /* Level 4.2 */
+ 2228224,
+ /* Level 5 */
+ 5652480,
+ /* Level 5.1 */
+ 9437184
+};
+
+
+/** Max width and height allowed for a given level */
+/** This is derived as SQRT(8 * gai4_ih264_max_luma_pic_size[]) */
+const WORD32 gai4_ih264_max_wd_ht[] =
+{
+ /* Level 1 */
+ 451,
+ /* Level 1.1 */
+ 901,
+ /* Level 1.2 */
+ 901,
+ /* Level 1.3 */
+ 901,
+ /* Level 2 */
+ 901,
+ /* Level 2.1 */
+ 1274,
+ /* Level 2.2 */
+ 1822,
+ /* Level 3 */
+ 1822,
+ /* Level 3.1 */
+ 2716,
+ /* Level 3.2 */
+ 3239,
+ /* Level 4 */
+ 4096,
+ /* Level 4.1 */
+ 4096,
+ /* Level 4.2 */
+ 4223,
+ /* Level 5 */
+ 6725,
+ /* Level 5.1 */
+ 8689
+};
+
+/** Min width and height allowed for a given level */
+/** This is derived as gai4_ih264_max_luma_pic_size[]/gai4_ih264_max_wd_ht[] */
+const WORD32 gai4_ih264_min_wd_ht[] =
+{
+ /* Level 1 */
+ 57,
+ /* Level 1.1 */
+ 113,
+ /* Level 1.2 */
+ 113,
+ /* Level 1.3 */
+ 113,
+ /* Level 2 */
+ 113,
+ /* Level 2.1 */
+ 160,
+ /* Level 2.2 */
+ 228,
+ /* Level 3 */
+ 228,
+ /* Level 3.1 */
+ 340,
+ /* Level 3.2 */
+ 405,
+ /* Level 4 */
+ 512,
+ /* Level 4.1 */
+ 512,
+ /* Level 4.2 */
+ 528,
+ /* Level 5 */
+ 841,
+ /* Level 5.1 */
+ 1087
+
+};
+
+
+/** Table 7-11 Macroblock types for I slices */
+intra_mbtype_info_t gas_ih264_i_mbtype_info[] =
+{
+ /* For first entry, if transform_size_8x8_flag is 1, mode will be MBPART_I8x8 */
+ /* This has to be taken care while accessing the table */
+ {0, MBPART_I4x4, VERT_I16x16, 0, 0},
+ {0, MBPART_I16x16, VERT_I16x16, 0, 0},
+ {0, MBPART_I16x16, HORZ_I16x16, 0, 0},
+ {0, MBPART_I16x16, DC_I16x16, 0, 0},
+ {0, MBPART_I16x16, PLANE_I16x16, 0, 0},
+ {0, MBPART_I16x16, VERT_I16x16, 1, 0},
+ {0, MBPART_I16x16, HORZ_I16x16, 1, 0},
+ {0, MBPART_I16x16, DC_I16x16, 1, 0},
+ {0, MBPART_I16x16, PLANE_I16x16, 1, 0},
+ {0, MBPART_I16x16, VERT_I16x16, 2, 0},
+ {0, MBPART_I16x16, HORZ_I16x16, 2, 0},
+ {0, MBPART_I16x16, DC_I16x16, 2, 0},
+ {0, MBPART_I16x16, PLANE_I16x16, 2, 0},
+ {0, MBPART_I16x16, VERT_I16x16, 0, 15},
+ {0, MBPART_I16x16, HORZ_I16x16, 0, 15},
+ {0, MBPART_I16x16, DC_I16x16, 0, 15},
+ {0, MBPART_I16x16, PLANE_I16x16, 0, 15},
+ {0, MBPART_I16x16, VERT_I16x16, 1, 15},
+ {0, MBPART_I16x16, HORZ_I16x16, 1, 15},
+ {0, MBPART_I16x16, DC_I16x16, 1, 15},
+ {0, MBPART_I16x16, PLANE_I16x16, 1, 15},
+ {0, MBPART_I16x16, VERT_I16x16, 2, 15},
+ {0, MBPART_I16x16, HORZ_I16x16, 2, 15},
+ {0, MBPART_I16x16, DC_I16x16, 2, 15},
+ {0, MBPART_I16x16, PLANE_I16x16, 2, 15},
+ {0, MBPART_IPCM, VERT_I16x16, 0, 0}
+};
+
+/** Table 7-13 Macroblock types for P slices */
+inter_mbtype_info_t gas_ih264_p_mbtype_info[] =
+{
+ {1, MBPART_L0, MBPART_NA, 16, 16},
+ {2, MBPART_L0, MBPART_L0, 16, 8},
+ {2, MBPART_L0, MBPART_L0, 8, 16},
+ {4, MBPART_NA, MBPART_NA, 8, 8},
+ {4, MBPART_NA, MBPART_NA, 8, 8},
+};
+
+/** Table 7-14 Macroblock types for B slices */
+inter_mbtype_info_t gas_ih264_b_mbtype_info[] =
+{
+ {0, MBPART_DIRECT, MBPART_NA, 8, 8, },
+ {1, MBPART_L0, MBPART_NA, 16, 16, },
+ {1, MBPART_L1, MBPART_NA, 16, 16, },
+ {1, MBPART_BI, MBPART_NA, 16, 16, },
+ {2, MBPART_L0, MBPART_L0, 16, 8, },
+ {2, MBPART_L0, MBPART_L0, 8, 16, },
+ {2, MBPART_L1, MBPART_L1, 16, 8, },
+ {2, MBPART_L1, MBPART_L1, 8, 16, },
+ {2, MBPART_L0, MBPART_L1, 16, 8, },
+ {2, MBPART_L0, MBPART_L1, 8, 16, },
+ {2, MBPART_L1, MBPART_L0, 16, 8, },
+ {2, MBPART_L1, MBPART_L0, 8, 16, },
+ {2, MBPART_L0, MBPART_BI, 16, 8, },
+ {2, MBPART_L0, MBPART_BI, 8, 16, },
+ {2, MBPART_L1, MBPART_BI, 16, 8, },
+ {2, MBPART_L1, MBPART_BI, 8, 16, },
+ {2, MBPART_BI, MBPART_L0, 16, 8, },
+ {2, MBPART_BI, MBPART_L0, 8, 16, },
+ {2, MBPART_BI, MBPART_L1, 16, 8, },
+ {2, MBPART_BI, MBPART_L1, 8, 16, },
+ {2, MBPART_BI, MBPART_BI, 16, 8, },
+ {2, MBPART_BI, MBPART_BI, 8, 16, },
+ {4, MBPART_NA, MBPART_NA, 8, 8, },
+};
+
+/** Table 7-17 – Sub-macroblock types in P macroblocks */
+submbtype_info_t gas_ih264_p_submbtype_info[] =
+{
+ {1, MBPART_L0, 8, 8},
+ {2, MBPART_L0, 8, 4},
+ {2, MBPART_L0, 4, 8},
+ {4, MBPART_L0, 4, 4},
+};
+
+/** Table 7-18 – Sub-macroblock types in B macroblocks */
+submbtype_info_t gas_ih264_b_submbtype_info[] =
+{
+ {4, MBPART_DIRECT, 4, 4},
+ {1, MBPART_L0, 8, 8},
+ {1, MBPART_L1, 8, 8},
+ {1, MBPART_BI, 8, 8},
+ {2, MBPART_L0, 8, 4},
+ {2, MBPART_L0, 4, 8},
+ {2, MBPART_L1, 8, 4},
+ {2, MBPART_L1, 4, 8},
+ {2, MBPART_BI, 8, 4},
+ {2, MBPART_BI, 4, 8},
+ {4, MBPART_L0, 4, 4},
+ {4, MBPART_L1, 4, 4},
+ {4, MBPART_BI, 4, 4},
+};
+
+
+
+
+const UWORD8 gau1_ih264_inv_scan_prog4x4[] =
+{
+ 0, 1, 4, 8,
+ 5, 2, 3, 6,
+ 9, 12, 13, 10,
+ 7, 11, 14, 15
+};
+
+const UWORD8 gau1_ih264_inv_scan_int4x4[] =
+{
+ 0, 4, 1, 8,
+ 12, 5, 9, 13,
+ 2, 6, 10, 14,
+ 3, 7, 11, 15
+};
+
+/** Inverse scan tables for individual 4x4 blocks of 8x8 transform coeffs of CAVLC */
+/* progressive */
+const UWORD8 gau1_ih264_inv_scan_prog8x8_cavlc[64] =
+{
+ 0, 9, 17, 18, 12, 40, 27, 7,
+ 35, 57, 29, 30, 58, 38, 53, 47,
+ 1, 2, 24, 11, 19, 48, 20, 14,
+ 42, 50, 22, 37, 59, 31, 60, 55,
+ 8, 3, 32, 4, 26, 41, 13, 21,
+ 49, 43, 15, 44, 52, 39, 61, 62,
+ 16, 10, 25, 5, 33, 34, 6, 28,
+ 56, 36, 23, 51, 45, 46, 54, 63
+};
+
+/* interlace */
+const UWORD8 gau1_ih264_inv_scan_int8x8_cavlc[64] =
+{
+ 0, 9, 2, 56, 18, 26, 34, 27,
+ 35, 28, 36, 29, 45, 7, 54, 39,
+ 8, 24, 25, 33, 41, 11, 42, 12,
+ 43, 13, 44, 14, 53, 15, 62, 47,
+ 16, 32, 40, 10, 49, 4, 50, 5,
+ 51, 6, 52, 22, 61, 38, 23, 55,
+ 1, 17, 48, 3, 57, 19, 58, 20,
+ 59, 21, 60, 37, 30, 46, 31, 63
+};
+
+
+
+/*Inverse scan tables for individual 8x8 blocks of 8x8 transform coeffs of CABAC */
+/* progressive */
+
+const UWORD8 gau1_ih264_inv_scan_prog8x8_cabac[64] =
+{
+ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63
+};
+
+
+/* interlace */
+
+const UWORD8 gau1_ih264_inv_scan_int8x8_cabac[64] =
+{
+ 0, 8, 16, 1, 9, 24, 32, 17,
+ 2, 25, 40, 48, 56, 33, 10, 3,
+ 18, 41, 49, 57, 26, 11, 4, 19,
+ 34, 42, 50, 58, 27, 12, 5, 20,
+ 35, 43, 51, 59, 28, 13, 6, 21,
+ 36, 44, 52, 60, 29, 14, 22, 37,
+ 45, 53, 61, 30, 7, 15, 38, 46,
+ 54, 62, 23, 31, 39, 47, 55, 63
+};
+
+
+const UWORD8 *gpau1_ih264_inv_scan8x8[] =
+{
+ gau1_ih264_inv_scan_prog8x8_cavlc,
+ gau1_ih264_inv_scan_int8x8_cavlc,
+ gau1_ih264_inv_scan_prog8x8_cabac,
+ gau1_ih264_inv_scan_int8x8_cabac
+};
+
+const UWORD8 *gpau1_ih264_inv_scan4x4[] =
+{
+ gau1_ih264_inv_scan_prog4x4,
+ gau1_ih264_inv_scan_int4x4,
+};
+
+const UWORD8 gau1_ih264_8x8_subblk_idx[] =
+{
+ 0, 1, 4, 5,
+ 2, 3, 6, 7,
+ 8, 9, 12, 13,
+ 10, 11, 14, 15
+};
+
+
+/* Table 8-15 Chroma QP offset table */
+const UWORD8 gau1_ih264_chroma_qp[] =
+{
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 29, 30,
+ 31, 32, 32, 33, 34, 34, 35, 35,
+ 36, 36, 37, 37, 37, 38, 38, 38,
+ 39, 39, 39, 39
+};
+
+
+/**
+******************************************************************************
+* @brief look up table to compute neigbour availability of 4x4 blocks
+* input : subblk idx, mb neighbor availability
+* output : sub blk neighbor availability
+* @remarks
+******************************************************************************
+*/
+const UWORD8 gau1_ih264_4x4_ngbr_avbl[16][16] =
+{
+ { 0x0, 0x1, 0xc, 0x7, 0x1, 0x1, 0xf, 0x7, 0xc, 0xf, 0xc, 0x7, 0xf, 0x7, 0xf, 0x7 },
+ { 0x1, 0x1, 0xf, 0x7, 0x1, 0x1, 0xf, 0x7, 0xf, 0xf, 0xf, 0x7, 0xf, 0x7, 0xf, 0x7 },
+ { 0x2, 0x1, 0xc, 0x7, 0x1, 0x1, 0xf, 0x7, 0xc, 0xf, 0xc, 0x7, 0xf, 0x7, 0xf, 0x7 },
+ { 0x3, 0x1, 0xf, 0x7, 0x1, 0x1, 0xf, 0x7, 0xf, 0xf, 0xf, 0x7, 0xf, 0x7, 0xf, 0x7 },
+
+ { 0xc, 0xf, 0xc, 0x7, 0xf, 0x7, 0xf, 0x7, 0xc, 0xf, 0xc, 0x7, 0xf, 0x7, 0xf, 0x7 },
+ { 0xd, 0xf, 0xf, 0x7, 0xf, 0x7, 0xf, 0x7, 0xf, 0xf, 0xf, 0x7, 0xf, 0x7, 0xf, 0x7 },
+ { 0xe, 0xf, 0xc, 0x7, 0xf, 0x7, 0xf, 0x7, 0xc, 0xf, 0xc, 0x7, 0xf, 0x7, 0xf, 0x7 },
+ { 0xf, 0xf, 0xf, 0x7, 0xf, 0x7, 0xf, 0x7, 0xf, 0xf, 0xf, 0x7, 0xf, 0x7, 0xf, 0x7 },
+
+ { 0x0, 0x1, 0xc, 0x7, 0x1, 0x9, 0xf, 0x7, 0xc, 0xf, 0xc, 0x7, 0xf, 0x7, 0xf, 0x7 },
+ { 0x1, 0x1, 0xf, 0x7, 0x1, 0x9, 0xf, 0x7, 0xf, 0xf, 0xf, 0x7, 0xf, 0x7, 0xf, 0x7 },
+ { 0x2, 0x1, 0xc, 0x7, 0x1, 0x9, 0xf, 0x7, 0xc, 0xf, 0xc, 0x7, 0xf, 0x7, 0xf, 0x7 },
+ { 0x3, 0x1, 0xf, 0x7, 0x1, 0x9, 0xf, 0x7, 0xf, 0xf, 0xf, 0x7, 0xf, 0x7, 0xf, 0x7 },
+
+ { 0xc, 0xf, 0xc, 0x7, 0xf, 0xf, 0xf, 0x7, 0xc, 0xf, 0xc, 0x7, 0xf, 0x7, 0xf, 0x7 },
+ { 0xd, 0xf, 0xf, 0x7, 0xf, 0xf, 0xf, 0x7, 0xf, 0xf, 0xf, 0x7, 0xf, 0x7, 0xf, 0x7 },
+ { 0xe, 0xf, 0xc, 0x7, 0xf, 0xf, 0xf, 0x7, 0xc, 0xf, 0xc, 0x7, 0xf, 0x7, 0xf, 0x7 },
+ { 0xf, 0xf, 0xf, 0x7, 0xf, 0xf, 0xf, 0x7, 0xf, 0xf, 0xf, 0x7, 0xf, 0x7, 0xf, 0x7 },
+};
+
+
+/**
+******************************************************************************
+* @brief look up table to compute neigbour availability of 8x8 blocks
+* input : subblk idx, mb neighbor availability
+* output : sub blk neighbor availability
+* @remarks
+******************************************************************************
+*/
+const UWORD8 gau1_ih264_8x8_ngbr_avbl[16][4] =
+{
+ { 0x0, 0x1, 0xc, 0x7 },
+ { 0x1, 0x1, 0xf, 0x7 },
+ { 0x2, 0x1, 0xc, 0x7 },
+ { 0x3, 0x1, 0xf, 0x7 },
+
+ { 0xc, 0x7, 0xc, 0x7 },
+ { 0xd, 0x7, 0xf, 0x7 },
+ { 0xe, 0x7, 0xc, 0x7 },
+ { 0xf, 0x7, 0xf, 0x7 },
+
+ { 0x0, 0x9, 0xc, 0x7 },
+ { 0x1, 0x9, 0xf, 0x7 },
+ { 0x2, 0x9, 0xc, 0x7 },
+ { 0x3, 0x9, 0xf, 0x7 },
+
+ { 0xc, 0xf, 0xc, 0x7 },
+ { 0xd, 0xf, 0xf, 0x7 },
+ { 0xe, 0xf, 0xc, 0x7 },
+ { 0xf, 0xf, 0xf, 0x7 },
+};
+
+/** Table 7-3 Default intra 4x4 scaling list */
+const UWORD16 gau2_ih264_default_intra4x4_scaling_list[] =
+{
+ 6, 13, 13, 20,
+ 20, 20, 28, 28,
+ 28, 28, 32, 32,
+ 32, 37, 37, 42
+};
+
+/** Table 7-3 Default inter 4x4 scaling list */
+const UWORD16 gau2_ih264_default_inter4x4_scaling_list[] =
+{
+ 10, 14, 14, 20,
+ 20, 20, 24, 24,
+ 24, 24, 27, 27,
+ 27, 30, 30, 34
+};
+
+/* Inverse scanned output of gau2_ih264_default_intra4x4_scaling_list */
+const UWORD16 gau2_ih264_default_intra4x4_weight_scale[] =
+{
+ 6, 13, 20, 28,
+ 13, 20, 28, 32,
+ 20, 28, 32, 37,
+ 28, 32, 37, 42
+};
+
+/* Inverse scanned output of gau2_ih264_default_inter4x4_scaling_list */
+const UWORD16 gau2_ih264_default_inter4x4_weight_scale[] =
+{
+ 10, 14, 20, 24,
+ 14, 20, 24, 27,
+ 20, 24, 27, 30,
+ 24, 27, 30, 34
+};
+
+/** Table 7-4 Default intra 8x8 scaling list */
+const UWORD16 gau2_ih264_default_intra8x8_scaling_list[] =
+{
+ 6, 10, 10, 13, 11, 13, 16, 16,
+ 16, 16, 18, 18, 18, 18, 18, 23,
+ 23, 23, 23, 23, 23, 25, 25, 25,
+ 25, 25, 25, 25, 27, 27, 27, 27,
+ 27, 27, 27, 27, 29, 29, 29, 29,
+ 29, 29, 29, 31, 31, 31, 31, 31,
+ 31, 33, 33, 33, 33, 33, 36, 36,
+ 36, 36, 38, 38, 38, 40, 40, 42
+};
+
+/** Table 7-4 Default inter 8x8 scaling list */
+const UWORD16 gau2_ih264_default_inter8x8_scaling_list[] =
+{
+ 9, 13, 13, 15, 13, 15, 17, 17,
+ 17, 17, 19, 19, 19, 19, 19, 21,
+ 21, 21, 21, 21, 21, 22, 22, 22,
+ 22, 22, 22, 22, 24, 24, 24, 24,
+ 24, 24, 24, 24, 25, 25, 25, 25,
+ 25, 25, 25, 27, 27, 27, 27, 27,
+ 27, 28, 28, 28, 28, 28, 30, 30,
+ 30, 30, 32, 32, 32, 33, 33, 35
+};
+
+/* Inverse scanned output of gau2_ih264_default_intra8x8_scaling_list */
+const UWORD16 gau2_ih264_default_intra8x8_weight_scale[] =
+{
+ 6, 10, 13, 16, 18, 23, 25, 27,
+ 10, 11, 16, 18, 23, 25, 27, 29,
+ 13, 16, 18, 23, 25, 27, 29, 31,
+ 16, 18, 23, 25, 27, 29, 31, 33,
+ 18, 23, 25, 27, 29, 31, 33, 36,
+ 23, 25, 27, 29, 31, 33, 36, 38,
+ 25, 27, 29, 31, 33, 36, 38, 40,
+ 27, 29, 31, 33, 36, 38, 40, 42
+};
+
+/* Inverse scanned output of gau2_ih264_default_inter8x8_scaling_list */
+const UWORD16 gau2_ih264_default_inter8x8_weight_scale[] =
+{
+ 9, 13, 15, 17, 19, 21, 22, 24,
+ 13, 13, 17, 19, 21, 22, 24, 25,
+ 15, 17, 19, 21, 22, 24, 25, 27,
+ 17, 19, 21, 22, 24, 25, 27, 28,
+ 19, 21, 22, 24, 25, 27, 28, 30,
+ 21, 22, 24, 25, 27, 28, 30, 32,
+ 22, 24, 25, 27, 28, 30, 32, 33,
+ 24, 25, 27, 28, 30, 32, 33, 35
+};
+/* Eq 7-8 Flat scaling matrix for 4x4 */
+const UWORD16 gau2_ih264_flat_4x4_weight_scale[] =
+{
+ 16, 16, 16, 16,
+ 16, 16, 16, 16,
+ 16, 16, 16, 16,
+ 16, 16, 16, 16
+};
+
+/* Eq 7-9 Flat scaling matrix for 8x8 */
+const UWORD16 gau2_ih264_flat_8x8_weight_scale[] =
+{
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16
+};
+
+
+/**
+ ******************************************************************************
+ * @brief Scale Table for inverse quantizing 4x4 subblock. To inverse quantize
+ * a given 4x4 quantized block, the coefficient at index location (i,j) is scaled
+ * by one of the constants in this table and right shift the result by abs (4 -
+ * floor(qp/6)), here qp is the quantization parameter used to quantize the mb.
+ *
+ * input : 16 * qp%6, index location (i,j)
+ * output : scale constant.
+ *
+ * @remarks 16 constants for each index position of the subblock and 6 for each
+ * qp%6 in the range 0-5 inclusive.
+ ******************************************************************************
+ */
+
+const UWORD16 gau2_ih264_iquant_scale_matrix_4x4[96] =
+{
+ 10, 13, 10, 13,
+ 13, 16, 13, 16,
+ 10, 13, 10, 13,
+ 13, 16, 13, 16,
+
+ 11, 14, 11, 14,
+ 14, 18, 14, 18,
+ 11, 14, 11, 14,
+ 14, 18, 14, 18,
+
+ 13, 16, 13, 16,
+ 16, 20, 16, 20,
+ 13, 16, 13, 16,
+ 16, 20, 16, 20,
+
+ 14, 18, 14, 18,
+ 18, 23, 18, 23,
+ 14, 18, 14, 18,
+ 18, 23, 18, 23,
+
+ 16, 20, 16, 20,
+ 20, 25, 20, 25,
+ 16, 20, 16, 20,
+ 20, 25, 20, 25,
+
+ 18, 23, 18, 23,
+ 23, 29, 23, 29,
+ 18, 23, 18, 23,
+ 23, 29, 23, 29,
+
+};
+
+/**
+ ******************************************************************************
+ * @brief Scale Table for inverse quantizing 8x8 subblock. To inverse quantize
+ * a given 8x8 quantized block, the coefficient at index location (i,j) is scaled
+ * by one of the constants in this table and right shift the result by abs (4 -
+ * floor(qp/6)), here qp is the quantization parameter used to quantize the mb.
+ *
+ * input : qp%6, index location (i,j)
+ * output : scale constant.
+ *
+ * @remarks 64 constants for each index position of the subblock and 6 for each
+ * qp%6 in the range 0-5 inclusive.
+ ******************************************************************************
+ */
+const UWORD16 gau2_ih264_iquant_scale_matrix_8x8 [384] =
+{
+ 20, 19, 25, 19, 20, 19, 25, 19,
+ 19, 18, 24, 18, 19, 18, 24, 18,
+ 25, 24, 32, 24, 25, 24, 32, 24,
+ 19, 18, 24, 18, 19, 18, 24, 18,
+ 20, 19, 25, 19, 20, 19, 25, 19,
+ 19, 18, 24, 18, 19, 18, 24, 18,
+ 25, 24, 32, 24, 25, 24, 32, 24,
+ 19, 18, 24, 18, 19, 18, 24, 18,
+
+ 22, 21, 28, 21, 22, 21, 28, 21,
+ 21, 19, 26, 19, 21, 19, 26, 19,
+ 28, 26, 35, 26, 28, 26, 35, 26,
+ 21, 19, 26, 19, 21, 19, 26, 19,
+ 22, 21, 28, 21, 22, 21, 28, 21,
+ 21, 19, 26, 19, 21, 19, 26, 19,
+ 28, 26, 35, 26, 28, 26, 35, 26,
+ 21, 19, 26, 19, 21, 19, 26, 19,
+
+ 26, 24, 33, 24, 26, 24, 33, 24,
+ 24, 23, 31, 23, 24, 23, 31, 23,
+ 33, 31, 42, 31, 33, 31, 42, 31,
+ 24, 23, 31, 23, 24, 23, 31, 23,
+ 26, 24, 33, 24, 26, 24, 33, 24,
+ 24, 23, 31, 23, 24, 23, 31, 23,
+ 33, 31, 42, 31, 33, 31, 42, 31,
+ 24, 23, 31, 23, 24, 23, 31, 23,
+
+ 28, 26, 35, 26, 28, 26, 35, 26,
+ 26, 25, 33, 25, 26, 25, 33, 25,
+ 35, 33, 45, 33, 35, 33, 45, 33,
+ 26, 25, 33, 25, 26, 25, 33, 25,
+ 28, 26, 35, 26, 28, 26, 35, 26,
+ 26, 25, 33, 25, 26, 25, 33, 25,
+ 35, 33, 45, 33, 35, 33, 45, 33,
+ 26, 25, 33, 25, 26, 25, 33, 25,
+
+ 32, 30, 40, 30, 32, 30, 40, 30,
+ 30, 28, 38, 28, 30, 28, 38, 28,
+ 40, 38, 51, 38, 40, 38, 51, 38,
+ 30, 28, 38, 28, 30, 28, 38, 28,
+ 32, 30, 40, 30, 32, 30, 40, 30,
+ 30, 28, 38, 28, 30, 28, 38, 28,
+ 40, 38, 51, 38, 40, 38, 51, 38,
+ 30, 28, 38, 28, 30, 28, 38, 28,
+
+ 36, 34, 46, 34, 36, 34, 46, 34,
+ 34, 32, 43, 32, 34, 32, 43, 32,
+ 46, 43, 58, 43, 46, 43, 58, 43,
+ 34, 32, 43, 32, 34, 32, 43, 32,
+ 36, 34, 46, 34, 36, 34, 46, 34,
+ 34, 32, 43, 32, 34, 32, 43, 32,
+ 46, 43, 58, 43, 46, 43, 58, 43,
+ 34, 32, 43, 32, 34, 32, 43, 32,
+
+};
diff --git a/common/ih264_common_tables.h b/common/ih264_common_tables.h
new file mode 100755
index 0000000..3127a2c
--- /dev/null
+++ b/common/ih264_common_tables.h
@@ -0,0 +1,136 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_common_tables.h
+*
+* @brief
+* Common tables
+*
+* @author
+* Harish
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef _IH264_COMMON_TABLES_H_
+#define _IH264_COMMON_TABLES_H_
+
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief level tables
+******************************************************************************
+ */
+typedef struct
+{
+ /* level */
+ IH264_LEVEL_T u4_level_idc;
+
+ /* max macroblock processing rate */
+ UWORD32 u4_max_mbps;
+
+ /* max frame size in mbs */
+ UWORD32 u4_max_fs;
+
+ /* max dpb size / 768 */
+ UWORD32 u4_max_dpb_size;
+
+ /* max bit rate */
+ UWORD32 u4_max_br;
+
+ /* max cpb size */
+ UWORD32 u4_max_cpb_size;
+
+ /* max vertical MV component range */
+ UWORD32 u4_max_mv_y;
+
+}level_tables_t;
+
+/*****************************************************************************/
+/* Extern global declarations */
+/*****************************************************************************/
+
+/**
+ ******************************************************************************
+ * @brief while encoding, basing on the input configuration parameters, the
+ * the level of the bitstream is computed basing on the table below.
+ * input : table_idx
+ * output : level_idc or cpb size
+ * @remarks Table A-1 – level table limits
+ ******************************************************************************
+ */
+extern const level_tables_t gas_ih264_lvl_tbl[16];
+
+extern const WORD32 gai4_ih264_levels[];
+extern const WORD32 gai4_ih264_max_luma_pic_size[];
+extern const WORD32 gai4_ih264_max_wd_ht[];
+extern const WORD32 gai4_ih264_min_wd_ht[];
+
+extern intra_mbtype_info_t gas_ih264_i_mbtype_info[];
+extern inter_mbtype_info_t gas_ih264_p_mbtype_info[];
+extern inter_mbtype_info_t gas_ih264_b_mbtype_info[];
+extern submbtype_info_t gas_ih264_p_submbtype_info[];
+extern submbtype_info_t gas_ih264_b_submbtype_info[];
+
+
+extern const UWORD8 gau1_ih264_inv_scan_prog4x4[];
+extern const UWORD8 gau1_ih264_inv_scan_int4x4[];
+extern const UWORD8 gau1_ih264_inv_scan_prog8x8_cavlc[64];
+extern const UWORD8 gau1_ih264_inv_scan_int8x8_cavlc[64];
+extern const UWORD8 gau1_ih264_inv_scan_prog8x8_cabac[64];
+extern const UWORD8 gau1_ih264_inv_scan_int8x8_cabac[64];
+
+extern const UWORD8 *gpau1_ih264_inv_scan8x8[];
+extern const UWORD8 *gpau1_ih264_inv_scan4x4[];
+
+extern const UWORD8 gau1_ih264_8x8_subblk_idx[];
+
+extern const UWORD8 gau1_ih264_chroma_qp[];
+
+extern const UWORD8 gau1_ih264_4x4_ngbr_avbl[16][16];
+extern const UWORD8 gau1_ih264_8x8_ngbr_avbl[16][4];
+
+
+extern const UWORD16 gau2_ih264_default_inter4x4_weight_scale[];
+extern const UWORD16 gau2_ih264_default_intra4x4_weight_scale[];
+extern const UWORD16 gau2_ih264_default_intra4x4_scaling_list[];
+extern const UWORD16 gau2_ih264_default_inter4x4_scaling_list[];
+extern const UWORD16 gau2_ih264_default_intra8x8_scaling_list[];
+extern const UWORD16 gau2_ih264_default_inter8x8_scaling_list[];
+extern const UWORD16 gau2_ih264_default_intra8x8_weight_scale[];
+extern const UWORD16 gau2_ih264_default_inter8x8_weight_scale[];
+extern const UWORD16 gau2_ih264_flat_4x4_weight_scale[];
+extern const UWORD16 gau2_ih264_flat_8x8_weight_scale[];
+
+extern const UWORD16 gau2_ih264_iquant_scale_matrix_4x4 [96];
+extern const UWORD16 gau2_ih264_iquant_scale_matrix_8x8 [384];
+
+#endif /*_IH264_COMMON_TABLES_H_*/
diff --git a/common/ih264_deblk_edge_filters.c b/common/ih264_deblk_edge_filters.c
new file mode 100755
index 0000000..d2ffefd
--- /dev/null
+++ b/common/ih264_deblk_edge_filters.c
@@ -0,0 +1,2087 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**************************************************************************** */
+/* */
+/* File Name : ih264_deblk_edge_filters.c */
+/* */
+/* Description : Contains function definitions for deblocking */
+/* */
+/* List of Functions : ih264_deblk_luma_vert_bs4() */
+/* ih264_deblk_luma_horz_bs4() */
+/* ih264_deblk_luma_vert_bslt4() */
+/* ih264_deblk_luma_horz_bslt4() */
+/* ih264_deblk_luma_vert_bs4_mbaff() */
+/* ih264_deblk_luma_vert_bslt4_mbaff() */
+/* ih264_deblk_chroma_vert_bs4_bp() */
+/* ih264_deblk_chroma_horz_bs4_bp() */
+/* ih264_deblk_chroma_vert_bslt4_bp() */
+/* ih264_deblk_chroma_horz_bslt4_bp() */
+/* ih264_deblk_chroma_vert_bs4_mbaff_bp() */
+/* ih264_deblk_chroma_vert_bslt4_mbaff_bp() */
+/* ih264_deblk_chroma_vert_bs4() */
+/* ih264_deblk_chroma_horz_bs4() */
+/* ih264_deblk_chroma_vert_bslt4() */
+/* ih264_deblk_chroma_horz_bslt4() */
+/* ih264_deblk_chroma_vert_bs4_mbaff() */
+/* ih264_deblk_chroma_vert_bslt4_mbaff() */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 28 11 2013 Ittiam Draft */
+/* 29 12 2014 Kaushik Added double-call vertical */
+/* Senthoor deblocking and high profile */
+/* deblocking functions */
+/* */
+/******************************************************************************/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_platform_macros.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_macros.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_luma_vert_bs4() */
+/* */
+/* Description : This function performs filtering of a luma block */
+/* vertical edge when the boundary strength is set to 4. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 */
+/* src_strd - source stride */
+/* alpha - alpha value for the boundary */
+/* beta - beta value for the boundary */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This operation is described in Sec. 8.7.2.4 under the */
+/* title "Filtering process for edges for bS equal to 4" in */
+/* ITU T Rec H.264. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 28 11 2013 Ittiam Draft */
+/* */
+/*****************************************************************************/
+void ih264_deblk_luma_vert_bs4(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha,
+ WORD32 beta)
+{
+ UWORD8 p3, p2, p1, p0, q0, q1, q2, q3;
+ WORD32 pos_p3, pos_p2, pos_p1, pos_p0;
+ WORD32 pos_q0, pos_q1, pos_q2,pos_q3;
+ UWORD8 a_p, a_q; /* threshold variables */
+ WORD32 blk_strd = src_strd << 2; /* block_increment = src_strd * 4 */
+ UWORD8 *pu1_src_temp;
+ WORD8 i = 0, edge;
+
+ pos_q0 = 0;
+ pos_q1 = 1;
+ pos_q2 = 2;
+ pos_q3 = 3;
+ pos_p0 = -1;
+ pos_p1 = -2;
+ pos_p2 = -3;
+ pos_p3 = -4;
+
+ for(edge = 0; edge < 4; edge++, pu1_src += blk_strd)
+ {
+ pu1_src_temp = pu1_src;
+ for(i = 0; i < 4; ++i, pu1_src_temp += src_strd)
+ {
+ q0 = pu1_src_temp[pos_q0];
+ q1 = pu1_src_temp[pos_q1];
+ p0 = pu1_src_temp[pos_p0];
+ p1 = pu1_src_temp[pos_p1];
+
+ /* Filter Decision */
+ if((ABS(p0 - q0) >= alpha) ||
+ (ABS(q1 - q0) >= beta) ||
+ (ABS(p1 - p0) >= beta))
+ continue;
+
+ p2 = pu1_src_temp[pos_p2];
+ p3 = pu1_src_temp[pos_p3];
+ q2 = pu1_src_temp[pos_q2];
+ q3 = pu1_src_temp[pos_q3];
+
+ if(ABS(p0 - q0) < ((alpha >> 2) + 2))
+ {
+ /* Threshold Variables */
+ a_p = (UWORD8)ABS(p2 - p0);
+ a_q = (UWORD8)ABS(q2 - q0);
+
+ if(a_p < beta)
+ {
+ /* p0', p1', p2' */
+ pu1_src_temp[pos_p0] = ((p2 + X2(p1) + X2(p0) + X2(q0) + q1
+ + 4) >> 3);
+ pu1_src_temp[pos_p1] = ((p2 + p1 + p0 + q0 + 2) >> 2);
+ pu1_src_temp[pos_p2] =
+ ((X2(p3) + X3(p2) + p1 + p0 + q0
+ + 4) >> 3);
+ }
+ else
+ {
+ /* p0'*/
+ pu1_src_temp[pos_p0] = ((X2(p1) + p0 + q1 + 2) >> 2);
+ }
+
+ if(a_q < beta)
+ {
+ /* q0', q1', q2' */
+ pu1_src_temp[pos_q0] = (p1 + X2(p0) + X2(q0) + X2(q1) + q2
+ + 4) >> 3;
+ pu1_src_temp[pos_q1] = (p0 + q0 + q1 + q2 + 2) >> 2;
+ pu1_src_temp[pos_q2] = (X2(q3) + X3(q2) + q1 + q0 + p0 + 4)
+ >> 3;
+ }
+ else
+ {
+ /* q0'*/
+ pu1_src_temp[pos_q0] = (X2(q1) + q0 + p1 + 2) >> 2;
+ }
+ }
+ else
+ {
+ /* p0', q0'*/
+ pu1_src_temp[pos_p0] = ((X2(p1) + p0 + q1 + 2) >> 2);
+ pu1_src_temp[pos_q0] = (X2(q1) + q0 + p1 + 2) >> 2;
+ }
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_luma_horz_bs4() */
+/* */
+/* Description : This function performs filtering of a luma block */
+/* horizontal edge when the boundary strength is set to 4. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 */
+/* src_strd - source stride */
+/* alpha - alpha value for the boundary */
+/* beta - beta value for the boundary */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This operation is described in Sec. 8.7.2.4 under the */
+/* title "Filtering process for edges for bS equal to 4" in */
+/* ITU T Rec H.264. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 28 11 2013 Ittiam Draft */
+/* */
+/*****************************************************************************/
+void ih264_deblk_luma_horz_bs4(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha,
+ WORD32 beta)
+{
+ UWORD8 p3, p2, p1, p0, q0, q1, q2, q3;
+ WORD32 pos_p3, pos_p2, pos_p1, pos_p0, pos_q0, pos_q1,
+ pos_q2, pos_q3;
+ UWORD8 a_p, a_q; /* threshold variables */
+ UWORD8 *pu1_p3; /* pointer to the src sample p3 */
+ UWORD8 *pu1_p3_temp;
+ UWORD8 *pu1_src_temp;
+ WORD8 i = 0, edge;
+
+ pu1_p3 = pu1_src - (src_strd << 2);
+ pos_q0 = 0;
+ pos_q1 = src_strd;
+ pos_q2 = X2(src_strd);
+ pos_q3 = X3(src_strd);
+ pos_p0 = X3(src_strd);
+ pos_p1 = X2(src_strd);
+ pos_p2 = src_strd;
+ pos_p3 = 0;
+
+ for(edge = 0; edge < 4; edge++, pu1_src += 4, pu1_p3 += 4)
+ {
+ pu1_src_temp = pu1_src;
+ pu1_p3_temp = pu1_p3;
+ for(i = 0; i < 4; ++i, pu1_src_temp++, pu1_p3_temp++)
+ {
+ q0 = pu1_src_temp[pos_q0];
+ q1 = pu1_src_temp[pos_q1];
+ p0 = pu1_p3_temp[pos_p0];
+ p1 = pu1_p3_temp[pos_p1];
+
+ /* Filter Decision */
+ if((ABS(p0 - q0) >= alpha) ||
+ (ABS(q1 - q0) >= beta) ||
+ (ABS(p1 - p0) >= beta))
+ continue;
+
+ p2 = pu1_p3_temp[pos_p2];
+ p3 = pu1_p3_temp[pos_p3];
+ q2 = pu1_src_temp[pos_q2];
+ q3 = pu1_src_temp[pos_q3];
+
+ if(ABS(p0 - q0) < ((alpha >> 2) + 2))
+ {
+ /* Threshold Variables */
+ a_p = ABS(p2 - p0);
+ a_q = ABS(q2 - q0);
+
+ if((a_p < beta))
+ {
+ /* p0', p1', p2' */
+ pu1_p3_temp[pos_p0] = (p2 + X2(p1) + X2(p0) + X2(q0) + q1
+ + 4) >> 3;
+ pu1_p3_temp[pos_p1] = (p2 + p1 + p0 + q0 + 2) >> 2;
+ pu1_p3_temp[pos_p2] =
+ (X2(p3) + X3(p2) + p1 + p0 + q0
+ + 4) >> 3;
+ }
+ else
+ {
+ /* p0'*/
+ pu1_p3_temp[pos_p0] = (X2(p1) + p0 + q1 + 2) >> 2;
+ }
+
+ if(a_q < beta)
+ {
+ /* q0', q1', q2' */
+ pu1_src_temp[pos_q0] = (p1 + X2(p0) + X2(q0) + X2(q1)
+ + q2 + 4) >> 3;
+ pu1_src_temp[pos_q1] = (p0 + q0 + q1 + q2 + 2) >> 2;
+ pu1_src_temp[pos_q2] = (X2(q3) + X3(q2) + q1 + q0 + p0
+ + 4) >> 3;
+ }
+ else
+ {
+ /* q0'*/
+ pu1_src_temp[pos_q0] = (X2(q1) + q0 + p1 + 2) >> 2;
+ }
+ }
+ else
+ {
+ /* p0', q0'*/
+ pu1_p3_temp[pos_p0] = (X2(p1) + p0 + q1 + 2) >> 2;
+ pu1_src_temp[pos_q0] = (X2(q1) + q0 + p1 + 2) >> 2;
+ }
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_chroma_vert_bs4_bp() */
+/* */
+/* Description : This function performs filtering of a chroma block */
+/* vertical edge when the boundary strength is set to 4. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 of U */
+/* src_strd - source stride */
+/* alpha - alpha value for the boundary */
+/* beta - beta value for the boundary */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This operation is described in Sec. 8.7.2.4 under the */
+/* title "Filtering process for edges for bS equal to 4" in */
+/* ITU T Rec H.264. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 28 11 2013 Ittiam Draft */
+/* */
+/*****************************************************************************/
+void ih264_deblk_chroma_vert_bs4_bp(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha,
+ WORD32 beta)
+{
+ UWORD8 *pu1_src_u = pu1_src; /* pointer to the src sample q0 of U */
+ UWORD8 *pu1_src_v = pu1_src + 1; /* pointer to the src sample q0 of V */
+ UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v;
+ WORD32 blk_strd = src_strd << 1; /* block_increment = src_strd * 2 */
+ WORD32 pos_p1, pos_p0, pos_q0, pos_q1;
+ UWORD8 *pu1_src_temp_u, *pu1_src_temp_v;
+ WORD8 i = 0, edge;
+
+ pos_q0 = 0;
+ pos_q1 = 2;
+ pos_p0 = -2;
+ pos_p1 = -4;
+
+ for(edge = 0; edge < 4;
+ edge++, pu1_src_u += blk_strd, pu1_src_v += blk_strd)
+ {
+ pu1_src_temp_u = pu1_src_u;
+ pu1_src_temp_v = pu1_src_v;
+ for(i = 0; i < 2; ++i, pu1_src_temp_u += src_strd, pu1_src_temp_v +=
+ src_strd)
+ {
+ q0_u = pu1_src_temp_u[pos_q0];
+ q1_u = pu1_src_temp_u[pos_q1];
+ p0_u = pu1_src_temp_u[pos_p0];
+ p1_u = pu1_src_temp_u[pos_p1];
+ q0_v = pu1_src_temp_v[pos_q0];
+ q1_v = pu1_src_temp_v[pos_q1];
+ p0_v = pu1_src_temp_v[pos_p0];
+ p1_v = pu1_src_temp_v[pos_p1];
+
+ /* Filter Decision */
+ if((ABS(p0_u - q0_u) < alpha) &&
+ (ABS(q1_u - q0_u) < beta) &&
+ (ABS(p1_u - p0_u) < beta))
+ {
+ /* p0' */
+ pu1_src_temp_u[pos_p0] = ((X2(p1_u) + p0_u + q1_u + 2) >> 2);
+ /* q0' */
+ pu1_src_temp_u[pos_q0] = (X2(q1_u) + q0_u + p1_u + 2) >> 2;
+ }
+
+ /* Filter Decision */
+ if((ABS(p0_v - q0_v) < alpha) &&
+ (ABS(q1_v - q0_v) < beta) &&
+ (ABS(p1_v - p0_v) < beta))
+ {
+ /* p0' */
+ pu1_src_temp_v[pos_p0] = ((X2(p1_v) + p0_v + q1_v + 2) >> 2);
+ /* q0' */
+ pu1_src_temp_v[pos_q0] = (X2(q1_v) + q0_v + p1_v + 2) >> 2;
+ }
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_chroma_horz_bs4_bp() */
+/* */
+/* Description : This function performs filtering of a chroma block */
+/* horizontal edge when the boundary strength is set to 4. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 of U */
+/* src_strd - source stride */
+/* alpha - alpha value for the boundary */
+/* beta - beta value for the boundary */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This operation is described in Sec. 8.7.2.4 under the */
+/* title "Filtering process for edges for bS equal to 4" in */
+/* ITU T Rec H.264. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 28 11 2013 Ittiam Draft */
+/* */
+/*****************************************************************************/
+void ih264_deblk_chroma_horz_bs4_bp(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha,
+ WORD32 beta)
+{
+ UWORD8 *pu1_src_u = pu1_src; /* pointer to the src sample q0 of U */
+ UWORD8 *pu1_src_v = pu1_src + 1; /* pointer to the src sample q0 of V */
+ UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v;
+ WORD32 pos_p1, pos_p0, pos_q0, pos_q1;
+ UWORD8 *pu1_src_temp_u, *pu1_src_temp_v;
+ UWORD8 *pu1_p1_u; /* pointer to the src sample p1 of U */
+ UWORD8 *pu1_p1_v; /* pointer to the src sample p1 of U */
+ UWORD8 *pu1_p1_temp_u, *pu1_p1_temp_v;
+ WORD8 i = 0, edge;
+
+ pu1_p1_u = pu1_src_u - (src_strd << 1);
+ pu1_p1_v = pu1_src_v - (src_strd << 1);
+ pos_q0 = 0;
+ pos_q1 = src_strd;
+ pos_p0 = src_strd;
+ pos_p1 = 0;
+
+ for(edge = 0; edge < 4; edge++, pu1_src_u += 4, pu1_p1_u += 4,
+ pu1_src_v += 4, pu1_p1_v += 4)
+ {
+ pu1_src_temp_u = pu1_src_u;
+ pu1_p1_temp_u = pu1_p1_u;
+ pu1_src_temp_v = pu1_src_v;
+ pu1_p1_temp_v = pu1_p1_v;
+ for(i = 0; i < 2; ++i, pu1_src_temp_u += 2, pu1_p1_temp_u += 2,
+ pu1_src_temp_v += 2, pu1_p1_temp_v += 2)
+ {
+ q0_u = pu1_src_temp_u[pos_q0];
+ q1_u = pu1_src_temp_u[pos_q1];
+ p0_u = pu1_p1_temp_u[pos_p0];
+ p1_u = pu1_p1_temp_u[pos_p1];
+
+ q0_v = pu1_src_temp_v[pos_q0];
+ q1_v = pu1_src_temp_v[pos_q1];
+ p0_v = pu1_p1_temp_v[pos_p0];
+ p1_v = pu1_p1_temp_v[pos_p1];
+
+ /* Filter Decision */
+ if((ABS(p0_u - q0_u) < alpha) &&
+ (ABS(q1_u - q0_u) < beta) &&
+ (ABS(p1_u - p0_u) < beta))
+ {
+ /* p0' */
+ pu1_p1_temp_u[pos_p0] = (X2(p1_u) + p0_u + q1_u + 2) >> 2;
+ /* q0' */
+ pu1_src_temp_u[pos_q0] = (X2(q1_u) + q0_u + p1_u + 2) >> 2;
+ }
+
+ /* Filter Decision */
+ if((ABS(p0_v - q0_v) < alpha) &&
+ (ABS(q1_v - q0_v) < beta) &&
+ (ABS(p1_v - p0_v) < beta))
+ {
+ /* p0' */
+ pu1_p1_temp_v[pos_p0] = (X2(p1_v) + p0_v + q1_v + 2) >> 2;
+ /* q0' */
+ pu1_src_temp_v[pos_q0] = (X2(q1_v) + q0_v + p1_v + 2) >> 2;
+ }
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_luma_vert_bslt4() */
+/* */
+/* Description : This function performs filtering of a luma block */
+/* vertical edge when the boundary strength is less than 4. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 */
+/* src_strd - source stride */
+/* alpha - alpha value for the boundary */
+/* beta - beta value for the boundary */
+/* u4_bs - packed Boundary strength array */
+/* pu1_cliptab - tc0_table */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This operation is described in Sec. 8.7.2.3 under the */
+/* title "Filtering process for edges for bS less than 4" */
+/* in ITU T Rec H.264. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 28 11 2013 Ittiam Draft */
+/* */
+/*****************************************************************************/
+void ih264_deblk_luma_vert_bslt4(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha,
+ WORD32 beta,
+ UWORD32 u4_bs,
+ const UWORD8 *pu1_cliptab)
+{
+ WORD8 i = 0, edge;
+ UWORD8 p2, p1, p0, q0, q1, q2;
+ WORD32 pos_p2, pos_p1, pos_p0, pos_q0, pos_q1, pos_q2;
+ UWORD8 a_p, a_q; /* threshold variables */
+ WORD32 blk_strd = src_strd << 2; /* block_increment = src_strd * 4 */
+ UWORD8 *pu1_src_temp;
+ WORD8 delta;
+ WORD8 tc;
+ WORD16 val;
+ UWORD8 tc0, u1_bs;
+
+ pos_q0 = 0;
+ pos_q1 = 1;
+ pos_q2 = 2;
+ pos_p0 = -1;
+ pos_p1 = -2;
+ pos_p2 = -3;
+
+ for(edge = 0; edge < 4; edge++, pu1_src += blk_strd)
+ {
+ pu1_src_temp = pu1_src;
+ /* Filter Decision */
+ u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff);
+ if(!u1_bs)
+ continue;
+ /* tc0 */
+ tc0 = pu1_cliptab[u1_bs];
+ for(i = 0; i < 4; ++i, pu1_src_temp += src_strd)
+ {
+ q0 = pu1_src_temp[pos_q0];
+ q1 = pu1_src_temp[pos_q1];
+ p0 = pu1_src_temp[pos_p0];
+ p1 = pu1_src_temp[pos_p1];
+
+ /* Filter Decision */
+ if((ABS(p0 - q0) >= alpha) ||
+ (ABS(q1 - q0) >= beta) ||
+ (ABS(p1 - p0) >= beta))
+ continue;
+
+ q2 = pu1_src_temp[pos_q2];
+ p2 = pu1_src_temp[pos_p2];
+
+ a_p = ABS(p2 - p0);
+ a_q = ABS(q2 - q0);
+
+ /* tc */
+ tc = tc0 + (a_p < beta) + (a_q < beta);
+
+ val = ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3);
+ delta = CLIP3(-tc, tc, val);
+
+ /* p0' */
+ val = p0 + delta;
+ pu1_src_temp[pos_p0] = CLIP_U8(val);
+ /* q0' */
+ val = q0 - delta;
+ pu1_src_temp[pos_q0] = CLIP_U8(val);
+
+ /* Luma only */
+ if(a_p < beta)
+ {
+ /* p1' */
+ val = ((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1);
+ pu1_src_temp[pos_p1] += CLIP3(-tc0, tc0, val);
+ }
+
+ if(a_q < beta)
+ {
+ /* q1' */
+ val = ((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1);
+ pu1_src_temp[pos_q1] += CLIP3(-tc0, tc0, val);
+ }
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_chroma_vert_bslt4_bp() */
+/* */
+/* Description : This function performs filtering of a chroma block */
+/* vertical edge when the boundary strength is less than 4. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 of U */
+/* src_strd - source stride */
+/* alpha - alpha value for the boundary */
+/* beta - beta value for the boundary */
+/* u4_bs - packed Boundary strength array */
+/* pu1_cliptab - tc0_table */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This operation is described in Sec. 8.7.2.3 under the */
+/* title "Filtering process for edges for bS less than 4" */
+/* in ITU T Rec H.264. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 28 11 2013 Ittiam Draft */
+/* */
+/*****************************************************************************/
+void ih264_deblk_chroma_vert_bslt4_bp(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha,
+ WORD32 beta,
+ UWORD32 u4_bs,
+ const UWORD8 *pu1_cliptab)
+{
+ UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of plane U*/
+ UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of plane V*/
+ UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v;
+ WORD32 blk_strd = src_strd << 1; /* block_increment = src_strd * (4 >> 1)*/
+ WORD32 pos_p1, pos_p0, pos_q0, pos_q1;
+ UWORD8 *pu1_src_temp_u, *pu1_src_temp_v;
+ WORD8 i = 0, edge;
+ WORD8 delta;
+ WORD8 tc;
+ WORD16 val;
+ UWORD8 tc0, u1_bs;
+
+ pos_q0 = 0;
+ pos_q1 = 2;
+ pos_p0 = -2;
+ pos_p1 = -4;
+
+ for(edge = 0; edge < 4;
+ edge++, pu1_src_u += blk_strd, pu1_src_v += blk_strd)
+ {
+ pu1_src_temp_u = pu1_src_u;
+ pu1_src_temp_v = pu1_src_v;
+ /* Filter Decision */
+ u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff);
+ if(!u1_bs)
+ continue;
+ /* tc0 */
+ tc0 = pu1_cliptab[u1_bs];
+ tc = tc0 + 1;
+ for(i = 0; i < 2; ++i, pu1_src_temp_u += src_strd, pu1_src_temp_v +=
+ src_strd)
+ {
+ q0_u = pu1_src_temp_u[pos_q0];
+ q1_u = pu1_src_temp_u[pos_q1];
+ p0_u = pu1_src_temp_u[pos_p0];
+ p1_u = pu1_src_temp_u[pos_p1];
+
+ q0_v = pu1_src_temp_v[pos_q0];
+ q1_v = pu1_src_temp_v[pos_q1];
+ p0_v = pu1_src_temp_v[pos_p0];
+ p1_v = pu1_src_temp_v[pos_p1];
+
+ /* Filter Decision */
+ if((ABS(p0_u - q0_u) < alpha) &&
+ (ABS(q1_u - q0_u) < beta) &&
+ (ABS(p1_u - p0_u) < beta))
+ {
+ val = ((((q0_u - p0_u) << 2) + (p1_u - q1_u) + 4) >> 3);
+ delta = CLIP3(-tc, tc, val);
+ /* p0' */
+ val = p0_u + delta;
+ pu1_src_temp_u[pos_p0] = CLIP_U8(val);
+ /* q0' */
+ val = q0_u - delta;
+ pu1_src_temp_u[pos_q0] = CLIP_U8(val);
+ }
+
+ /* Filter Decision */
+ if((ABS(p0_v - q0_v) < alpha) &&
+ (ABS(q1_v - q0_v) < beta) &&
+ (ABS(p1_v - p0_v) < beta))
+ {
+ val = ((((q0_v - p0_v) << 2) + (p1_v - q1_v) + 4) >> 3);
+ delta = CLIP3(-tc, tc, val);
+ /* p0' */
+ val = p0_v + delta;
+ pu1_src_temp_v[pos_p0] = CLIP_U8(val);
+ /* q0' */
+ val = q0_v - delta;
+ pu1_src_temp_v[pos_q0] = CLIP_U8(val);
+ }
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_luma_horz_bslt4() */
+/* */
+/* Description : This function performs filtering of a luma block */
+/* horizontal edge when boundary strength is less than 4. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 */
+/* src_strd - source stride */
+/* alpha - alpha value for the boundary */
+/* beta - beta value for the boundary */
+/* u4_bs - packed Boundary strength array */
+/* pu1_cliptab - tc0_table */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This operation is described in Sec. 8.7.2.3 under the */
+/* title "Filtering process for edges for bS less than 4" */
+/* in ITU T Rec H.264. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 28 11 2013 Ittiam Draft */
+/* */
+/*****************************************************************************/
+void ih264_deblk_luma_horz_bslt4(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha,
+ WORD32 beta,
+ UWORD32 u4_bs,
+ const UWORD8 *pu1_cliptab)
+{
+ UWORD8 p2, p1, p0, q0, q1, q2;
+ WORD32 pos_p2, pos_p1, pos_p0, pos_q0, pos_q1, pos_q2;
+ UWORD8 a_p, a_q; /* Threshold variables */
+ UWORD8 *pu1_p2; /* Pointer to the src sample p2 */
+ UWORD8 *pu1_p2_temp;
+ UWORD8 *pu1_src_temp;
+ WORD8 i = 0, edge;
+ WORD8 delta;
+ WORD8 tc;
+ WORD16 val;
+ UWORD8 tc0, u1_bs;
+
+ pu1_p2 = pu1_src - (src_strd << 2);
+ pos_q0 = 0;
+ pos_q1 = src_strd;
+ pos_q2 = X2(src_strd);
+ pos_p0 = X3(src_strd);
+ pos_p1 = X2(src_strd);
+ pos_p2 = src_strd;
+
+ for(edge = 0; edge < 4; edge++, pu1_src += 4, pu1_p2 += 4)
+ {
+ pu1_src_temp = pu1_src;
+ pu1_p2_temp = pu1_p2;
+
+ /* Filter Decision */
+ u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff);
+ if(!u1_bs)
+ continue;
+ /* tc0 */
+ tc0 = pu1_cliptab[u1_bs];
+
+ for(i = 0; i < 4; ++i, pu1_src_temp++, pu1_p2_temp++)
+ {
+ q0 = pu1_src_temp[pos_q0];
+ q1 = pu1_src_temp[pos_q1];
+ p0 = pu1_p2_temp[pos_p0];
+ p1 = pu1_p2_temp[pos_p1];
+
+ /* Filter Decision */
+ if((ABS(p0 - q0) >= alpha) ||
+ (ABS(q1 - q0) >= beta) ||
+ (ABS(p1 - p0) >= beta))
+ continue;
+
+ q2 = pu1_src_temp[pos_q2];
+ p2 = pu1_p2_temp[pos_p2];
+
+ a_p = ABS(p2 - p0);
+ a_q = ABS(q2 - q0);
+
+ /* tc */
+ tc = tc0 + (a_p < beta) + (a_q < beta);
+ val = ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3);
+ delta = CLIP3(-tc, tc, val);
+ /* p0' */
+ val = p0 + delta;
+ pu1_p2_temp[pos_p0] = CLIP_U8(val);
+ /* q0' */
+ val = q0 - delta;
+ pu1_src_temp[pos_q0] = CLIP_U8(val);
+
+ /* Luma */
+ if(a_p < beta)
+ {
+ /* p1' */
+ val = ((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1);
+ pu1_p2_temp[pos_p1] += CLIP3(-tc0, tc0, val);
+ }
+
+ if(a_q < beta)
+ {
+ /* q1' */
+ val = ((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1);
+ pu1_src_temp[pos_q1] += CLIP3(-tc0, tc0, val);
+ }
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_chroma_horz_bslt4_bp() */
+/* */
+/* Description : This function performs filtering of a chroma block */
+/* horizontal edge when boundary strength is less than 4. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 of U */
+/* src_strd - source stride */
+/* alpha - alpha value for the boundary */
+/* beta - beta value for the boundary */
+/* u4_bs - packed Boundary strength array */
+/* pu1_cliptab - tc0_table */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This operation is described in Sec. 8.7.2.3 under the */
+/* title "Filtering process for edges for bS less than 4" */
+/* in ITU T Rec H.264. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 28 11 2013 Ittiam Draft */
+/* */
+/*****************************************************************************/
+void ih264_deblk_chroma_horz_bslt4_bp(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha,
+ WORD32 beta,
+ UWORD32 u4_bs,
+ const UWORD8 *pu1_cliptab)
+{
+ UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of plane U*/
+ UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of plane V*/
+ UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v;
+ WORD32 pos_p1, pos_p0, pos_q0, pos_q1;
+ UWORD8 *pu1_src_temp_u, *pu1_src_temp_v;
+ UWORD8 *pu1_p1_u; /* Pointer to the src sample p1 of plane U*/
+ UWORD8 *pu1_p1_v; /* Pointer to the src sample p1 of plane V*/
+ UWORD8 *pu1_p1_temp_u, *pu1_p1_temp_v;
+ WORD8 i = 0, edge;
+ WORD8 delta;
+ WORD8 tc;
+ WORD16 val;
+ UWORD8 u1_bs;
+ UWORD8 tc0;
+
+ pu1_p1_u = pu1_src_u - (src_strd << 1);
+ pu1_p1_v = pu1_src_v - (src_strd << 1);
+ pos_q0 = 0;
+ pos_q1 = src_strd;
+ pos_p0 = src_strd;
+ pos_p1 = 0;
+
+ for(edge = 0; edge < 4; edge++, pu1_src_u += 4, pu1_p1_u += 4,
+ pu1_src_v += 4, pu1_p1_v += 4)
+ {
+ pu1_src_temp_u = pu1_src_u;
+ pu1_p1_temp_u = pu1_p1_u;
+ pu1_src_temp_v = pu1_src_v;
+ pu1_p1_temp_v = pu1_p1_v;
+
+ /* Filter Decision */
+ u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff);
+ if(!u1_bs)
+ continue;
+ /* tc0 */
+ tc0 = pu1_cliptab[u1_bs];
+
+ for(i = 0; i < 2; ++i, pu1_src_temp_u += 2, pu1_p1_temp_u += 2,
+ pu1_src_temp_v += 2, pu1_p1_temp_v += 2)
+ {
+ q0_u = pu1_src_temp_u[pos_q0];
+ q1_u = pu1_src_temp_u[pos_q1];
+ p0_u = pu1_p1_temp_u[pos_p0];
+ p1_u = pu1_p1_temp_u[pos_p1];
+
+ q0_v = pu1_src_temp_v[pos_q0];
+ q1_v = pu1_src_temp_v[pos_q1];
+ p0_v = pu1_p1_temp_v[pos_p0];
+ p1_v = pu1_p1_temp_v[pos_p1];
+
+ /* tc */
+ tc = tc0 + 1;
+ /* Filter Decision */
+ if(ABS(p0_u - q0_u) < alpha && ABS(q1_u - q0_u) < beta
+ && ABS(p1_u - p0_u) < beta)
+ {
+ val = ((((q0_u - p0_u) << 2) + (p1_u - q1_u) + 4) >> 3);
+ delta = CLIP3(-tc, tc, val);
+ /* p0' */
+ val = p0_u + delta;
+ pu1_p1_temp_u[pos_p0] = CLIP_U8(val);
+ /* q0' */
+ val = q0_u - delta;
+ pu1_src_temp_u[pos_q0] = CLIP_U8(val);
+ }
+ /* Filter Decision */
+ if(ABS(p0_v - q0_v) < alpha && ABS(q1_v - q0_v) < beta
+ && ABS(p1_v - p0_v) < beta)
+ {
+ val = ((((q0_v - p0_v) << 2) + (p1_v - q1_v) + 4) >> 3);
+ delta = CLIP3(-tc, tc, val);
+ /* p0' */
+ val = p0_v + delta;
+ pu1_p1_temp_v[pos_p0] = CLIP_U8(val);
+ /* q0' */
+ val = q0_v - delta;
+ pu1_src_temp_v[pos_q0] = CLIP_U8(val);
+ }
+ }
+ }
+}
+
+/*****************************************************************************/
+/* Function Definitions for vertical edge deblocking for double-call */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_luma_vert_bs4_mbaff() */
+/* */
+/* Description : This function performs filtering of a luma block */
+/* vertical edge when boundary strength is set to 4. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 */
+/* src_strd - source stride */
+/* alpha - alpha value for the boundary */
+/* beta - beta value for the boundary */
+/* */
+/* Globals : None */
+/* */
+/* Processing : When the function is called twice, this operation is as */
+/* described in Sec. 8.7.2.3 under the title "Filtering */
+/* process for edges for bS equal to 4" in ITU T Rec H.264. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 29 12 2014 Kaushik Draft */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_deblk_luma_vert_bs4_mbaff(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha,
+ WORD32 beta)
+{
+ UWORD8 p3, p2, p1, p0, q0, q1, q2, q3;
+ WORD32 pos_p3, pos_p2, pos_p1, pos_p0;
+ WORD32 pos_q0, pos_q1, pos_q2, pos_q3;
+ UWORD8 a_p, a_q; /* threshold variables */
+ WORD32 blk_strd = src_strd << 1; /* block_increment = src_strd * 2 */
+ UWORD8 *pu1_src_temp;
+ WORD8 i = 0, edge;
+
+ pos_q0 = 0;
+ pos_q1 = 1;
+ pos_q2 = 2;
+ pos_q3 = 3;
+ pos_p0 = -1;
+ pos_p1 = -2;
+ pos_p2 = -3;
+ pos_p3 = -4;
+
+ for(edge = 0; edge < 4; edge++, pu1_src += blk_strd)
+ {
+ pu1_src_temp = pu1_src;
+ for(i = 0; i < 2; ++i, pu1_src_temp += src_strd)
+ {
+ q0 = pu1_src_temp[pos_q0];
+ q1 = pu1_src_temp[pos_q1];
+ p0 = pu1_src_temp[pos_p0];
+ p1 = pu1_src_temp[pos_p1];
+
+ /* Filter Decision */
+ if((ABS(p0 - q0) >= alpha) ||
+ (ABS(q1 - q0) >= beta) ||
+ (ABS(p1 - p0) >= beta))
+ continue;
+
+ p2 = pu1_src_temp[pos_p2];
+ p3 = pu1_src_temp[pos_p3];
+ q2 = pu1_src_temp[pos_q2];
+ q3 = pu1_src_temp[pos_q3];
+
+ if(ABS(p0 - q0) < ((alpha >> 2) + 2))
+ {
+ /* Threshold Variables */
+ a_p = (UWORD8)ABS(p2 - p0);
+ a_q = (UWORD8)ABS(q2 - q0);
+
+ if(a_p < beta)
+ {
+ /* p0', p1', p2' */
+ pu1_src_temp[pos_p0] = ((p2 + X2(p1) + X2(p0) + X2(q0) + q1
+ + 4) >> 3);
+ pu1_src_temp[pos_p1] = ((p2 + p1 + p0 + q0 + 2) >> 2);
+ pu1_src_temp[pos_p2] =
+ ((X2(p3) + X3(p2) + p1 + p0 + q0
+ + 4) >> 3);
+ }
+ else
+ {
+ /* p0'*/
+ pu1_src_temp[pos_p0] = ((X2(p1) + p0 + q1 + 2) >> 2);
+ }
+
+ if(a_q < beta)
+ {
+ /* q0', q1', q2' */
+ pu1_src_temp[pos_q0] = (p1 + X2(p0) + X2(q0) + X2(q1) + q2
+ + 4) >> 3;
+ pu1_src_temp[pos_q1] = (p0 + q0 + q1 + q2 + 2) >> 2;
+ pu1_src_temp[pos_q2] = (X2(q3) + X3(q2) + q1 + q0 + p0 + 4)
+ >> 3;
+ }
+ else
+ {
+ /* q0'*/
+ pu1_src_temp[pos_q0] = (X2(q1) + q0 + p1 + 2) >> 2;
+ }
+ }
+ else
+ {
+ /* p0', q0'*/
+ pu1_src_temp[pos_p0] = ((X2(p1) + p0 + q1 + 2) >> 2);
+ pu1_src_temp[pos_q0] = (X2(q1) + q0 + p1 + 2) >> 2;
+ }
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_chroma_vert_bs4_mbaff_bp() */
+/* */
+/* Description : This function performs filtering of a chroma block */
+/* vertical edge when boundary strength is set to 4. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 of U */
+/* src_strd - source stride */
+/* alpha - alpha value for the boundary */
+/* beta - beta value for the boundary */
+/* */
+/* Globals : None */
+/* */
+/* Processing : When the function is called twice, this operation is as */
+/* described in Sec. 8.7.2.3 under the title "Filtering */
+/* process for edges for bS equal to 4" in ITU T Rec H.264. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 29 12 2014 Kaushik Draft */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_deblk_chroma_vert_bs4_mbaff_bp(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha,
+ WORD32 beta)
+{
+ UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of U */
+ UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of V */
+ UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v;
+ WORD32 blk_strd = src_strd;
+ WORD32 pos_p1, pos_p0, pos_q0, pos_q1;
+ UWORD8 *pu1_src_temp_u, *pu1_src_temp_v;
+ WORD8 edge;
+
+ pos_q0 = 0;
+ pos_q1 = 2;
+ pos_p0 = -2;
+ pos_p1 = -4;
+
+ for(edge = 0; edge < 4;
+ edge++, pu1_src_u += blk_strd, pu1_src_v += blk_strd)
+ {
+ pu1_src_temp_u = pu1_src_u;
+ pu1_src_temp_v = pu1_src_v;
+
+ q0_u = pu1_src_temp_u[pos_q0];
+ q1_u = pu1_src_temp_u[pos_q1];
+ p0_u = pu1_src_temp_u[pos_p0];
+ p1_u = pu1_src_temp_u[pos_p1];
+ q0_v = pu1_src_temp_v[pos_q0];
+ q1_v = pu1_src_temp_v[pos_q1];
+ p0_v = pu1_src_temp_v[pos_p0];
+ p1_v = pu1_src_temp_v[pos_p1];
+
+ /* Filter Decision */
+ if((ABS(p0_u - q0_u) < alpha) &&
+ (ABS(q1_u - q0_u) < beta) &&
+ (ABS(p1_u - p0_u) < beta))
+ {
+ /* p0' */
+ pu1_src_temp_u[pos_p0] = ((X2(p1_u) + p0_u + q1_u + 2) >> 2);
+ /* q0' */
+ pu1_src_temp_u[pos_q0] = (X2(q1_u) + q0_u + p1_u + 2) >> 2;
+ }
+
+ /* Filter Decision */
+ if(ABS(p0_v - q0_v) < alpha && ABS(q1_v - q0_v) < beta
+ && ABS(p1_v - p0_v) < beta)
+ {
+ /* p0' */
+ pu1_src_temp_v[pos_p0] = ((X2(p1_v) + p0_v + q1_v + 2) >> 2);
+ /* q0' */
+ pu1_src_temp_v[pos_q0] = (X2(q1_v) + q0_v + p1_v + 2) >> 2;
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_luma_vert_bslt4_mbaff() */
+/* */
+/* Description : This function performs filtering of a luma block */
+/* vertical edge when boundary strength is less than 4. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 */
+/* src_strd - source stride */
+/* alpha - alpha value for the boundary */
+/* beta - beta value for the boundary */
+/* u4_bs - packed Boundary strength array */
+/* pu1_cliptab - tc0_table */
+/* */
+/* Globals : None */
+/* */
+/* Processing : When the function is called twice, this operation is as */
+/* described in Sec. 8.7.2.3 under the title "Filtering */
+/* process for edges for bS less than 4" in ITU T Rec H.264.*/
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 29 12 2014 Kaushik Draft */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_deblk_luma_vert_bslt4_mbaff(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha,
+ WORD32 beta,
+ UWORD32 u4_bs,
+ const UWORD8 *pu1_cliptab)
+{
+ WORD8 i = 0, edge;
+ UWORD8 p2, p1, p0, q0, q1, q2;
+ WORD32 pos_p2, pos_p1, pos_p0, pos_q0, pos_q1, pos_q2;
+ UWORD8 a_p, a_q; /* Threshold variables */
+ WORD32 blk_strd = src_strd << 1; /* block_increment = src_strd * 2 */
+ UWORD8 *pu1_src_temp;
+ WORD8 delta;
+ WORD8 tc;
+ WORD16 val;
+ UWORD8 tc0, u1_bs;
+
+ pos_q0 = 0;
+ pos_q1 = 1;
+ pos_q2 = 2;
+ pos_p0 = -1;
+ pos_p1 = -2;
+ pos_p2 = -3;
+
+ for(edge = 0; edge < 4; edge++, pu1_src += blk_strd)
+ {
+ pu1_src_temp = pu1_src;
+ /* Filter Decision */
+ u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff);
+ if(!u1_bs)
+ continue;
+ /* tc0 */
+ tc0 = pu1_cliptab[u1_bs];
+ for(i = 0; i < 2; ++i, pu1_src_temp += src_strd)
+ {
+ q0 = pu1_src_temp[pos_q0];
+ q1 = pu1_src_temp[pos_q1];
+ p0 = pu1_src_temp[pos_p0];
+ p1 = pu1_src_temp[pos_p1];
+
+ /* Filter Decision */
+ if((ABS(p0 - q0) >= alpha) ||
+ (ABS(q1 - q0) >= beta) ||
+ (ABS(p1 - p0) >= beta))
+ continue;
+
+ q2 = pu1_src_temp[pos_q2];
+ p2 = pu1_src_temp[pos_p2];
+
+ a_p = ABS(p2 - p0);
+ a_q = ABS(q2 - q0);
+
+ /* tc */
+ tc = tc0 + (a_p < beta) + (a_q < beta);
+
+ val = ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3);
+ delta = CLIP3(-tc, tc, val);
+ /* p0' */
+ val = p0 + delta;
+ pu1_src_temp[pos_p0] = CLIP_U8(val);
+ /* q0' */
+ val = q0 - delta;
+ pu1_src_temp[pos_q0] = CLIP_U8(val);
+
+ /* Luma only */
+ if(a_p < beta)
+ {
+ /* p1' */
+ val = ((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1);
+ pu1_src_temp[pos_p1] += CLIP3(-tc0, tc0, val);
+ }
+
+ if(a_q < beta)
+ {
+ /* q1' */
+ val = ((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1);
+ pu1_src_temp[pos_q1] += CLIP3(-tc0, tc0, val);
+ }
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_chroma_vert_bslt4_mbaff_bp() */
+/* */
+/* Description : This function performs filtering of a chroma block */
+/* vertical edge when boundary strength is less than 4. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 of U */
+/* src_strd - source stride */
+/* alpha - alpha value for the boundary */
+/* beta - beta value for the boundary */
+/* u4_bs - packed Boundary strength array */
+/* pu1_cliptab - tc0_table */
+/* */
+/* Globals : None */
+/* */
+/* Processing : When the function is called twice, this operation is as */
+/* described in Sec. 8.7.2.3 under the title "Filtering */
+/* process for edges for bS less than 4" in ITU T Rec H.264.*/
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 29 12 2014 Kaushik Draft */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_deblk_chroma_vert_bslt4_mbaff_bp(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha,
+ WORD32 beta,
+ UWORD32 u4_bs,
+ const UWORD8 *pu1_cliptab)
+{
+ UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of plane U*/
+ UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of plane V*/
+ UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v;
+ WORD32 blk_strd = src_strd;
+ WORD32 pos_p1, pos_p0, pos_q0, pos_q1;
+ UWORD8 *pu1_src_temp_u, *pu1_src_temp_v;
+ WORD8 edge;
+ WORD8 delta;
+ WORD8 tc;
+ WORD16 val;
+ UWORD8 tc0, u1_bs;
+
+ pos_q0 = 0;
+ pos_q1 = 2;
+ pos_p0 = -2;
+ pos_p1 = -4;
+
+ for(edge = 0; edge < 4;
+ edge++, pu1_src_u += blk_strd, pu1_src_v += blk_strd)
+ {
+ pu1_src_temp_u = pu1_src_u;
+ pu1_src_temp_v = pu1_src_v;
+ /* Filter Decision */
+ u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff);
+ if(!u1_bs)
+ continue;
+ /* tc0 */
+ tc0 = pu1_cliptab[u1_bs];
+ tc = tc0 + 1;
+
+ q0_u = pu1_src_temp_u[pos_q0];
+ q1_u = pu1_src_temp_u[pos_q1];
+ p0_u = pu1_src_temp_u[pos_p0];
+ p1_u = pu1_src_temp_u[pos_p1];
+
+ q0_v = pu1_src_temp_v[pos_q0];
+ q1_v = pu1_src_temp_v[pos_q1];
+ p0_v = pu1_src_temp_v[pos_p0];
+ p1_v = pu1_src_temp_v[pos_p1];
+
+ /* Filter Decision */
+ if((ABS(p0_u - q0_u) < alpha) &&
+ (ABS(q1_u - q0_u) < beta) &&
+ (ABS(p1_u - p0_u) < beta))
+ {
+ val = ((((q0_u - p0_u) << 2) + (p1_u - q1_u) + 4) >> 3);
+ delta = CLIP3(-tc, tc, val);
+ /* p0' */
+ val = p0_u + delta;
+ pu1_src_temp_u[pos_p0] = CLIP_U8(val);
+ /* q0' */
+ val = q0_u - delta;
+ pu1_src_temp_u[pos_q0] = CLIP_U8(val);
+ }
+
+ /* Filter Decision */
+ if((ABS(p0_v - q0_v) < alpha) &&
+ (ABS(q1_v - q0_v) < beta) &&
+ (ABS(p1_v - p0_v) < beta))
+ {
+ val = ((((q0_v - p0_v) << 2) + (p1_v - q1_v) + 4) >> 3);
+ delta = CLIP3(-tc, tc, val);
+ /* p0' */
+ val = p0_v + delta;
+ pu1_src_temp_v[pos_p0] = CLIP_U8(val);
+ /* q0' */
+ val = q0_v - delta;
+ pu1_src_temp_v[pos_q0] = CLIP_U8(val);
+ }
+ }
+}
+
+/*****************************************************************************/
+/* Function Definitions for chroma deblocking in high profile */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_chroma_vert_bs4() */
+/* */
+/* Description : This function performs filtering of a chroma block */
+/* vertical edge when the boundary strength is set to 4 in */
+/* high profile. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 of U */
+/* src_strd - source stride */
+/* alpha_cb - alpha value for the boundary in U */
+/* beta_cb - beta value for the boundary in U */
+/* alpha_cr - alpha value for the boundary in V */
+/* beta_cr - beta value for the boundary in V */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This operation is described in Sec. 8.7.2.4 under the */
+/* title "Filtering process for edges for bS equal to 4" in */
+/* ITU T Rec H.264 with alpha and beta values different in */
+/* U and V. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 29 12 2014 Kaushik Draft */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_deblk_chroma_vert_bs4(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha_cb,
+ WORD32 beta_cb,
+ WORD32 alpha_cr,
+ WORD32 beta_cr)
+{
+ UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of U */
+ UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of V */
+ UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v;
+ WORD32 blk_strd = src_strd << 1; /* block_increment = src_strd * 2*/
+ WORD32 pos_p1, pos_p0, pos_q0, pos_q1;
+ UWORD8 *pu1_src_temp_u, *pu1_src_temp_v;
+ WORD8 i = 0, edge;
+
+ pos_q0 = 0;
+ pos_q1 = 2;
+ pos_p0 = -2;
+ pos_p1 = -4;
+
+ for(edge = 0; edge < 4;
+ edge++, pu1_src_u += blk_strd, pu1_src_v += blk_strd)
+ {
+ pu1_src_temp_u = pu1_src_u;
+ pu1_src_temp_v = pu1_src_v;
+ for(i = 0; i < 2; ++i, pu1_src_temp_u += src_strd, pu1_src_temp_v +=
+ src_strd)
+ {
+ q0_u = pu1_src_temp_u[pos_q0];
+ q1_u = pu1_src_temp_u[pos_q1];
+ p0_u = pu1_src_temp_u[pos_p0];
+ p1_u = pu1_src_temp_u[pos_p1];
+ q0_v = pu1_src_temp_v[pos_q0];
+ q1_v = pu1_src_temp_v[pos_q1];
+ p0_v = pu1_src_temp_v[pos_p0];
+ p1_v = pu1_src_temp_v[pos_p1];
+
+ /* Filter Decision */
+ if((ABS(p0_u - q0_u) < alpha_cb) &&
+ (ABS(q1_u - q0_u) < beta_cb) &&
+ (ABS(p1_u - p0_u) < beta_cb))
+ {
+ /* p0' */
+ pu1_src_temp_u[pos_p0] = ((X2(p1_u) + p0_u + q1_u + 2) >> 2);
+ /* q0' */
+ pu1_src_temp_u[pos_q0] = (X2(q1_u) + q0_u + p1_u + 2) >> 2;
+ }
+
+ /* Filter Decision */
+ if((ABS(p0_v - q0_v) < alpha_cr) &&
+ (ABS(q1_v - q0_v) < beta_cr) &&
+ (ABS(p1_v - p0_v) < beta_cr))
+ {
+ /* p0' */
+ pu1_src_temp_v[pos_p0] = ((X2(p1_v) + p0_v + q1_v + 2) >> 2);
+ /* q0' */
+ pu1_src_temp_v[pos_q0] = (X2(q1_v) + q0_v + p1_v + 2) >> 2;
+ }
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_chroma_horz_bs4() */
+/* */
+/* Description : This function performs filtering of a chroma block */
+/* horizontal edge when the boundary strength is set to 4 */
+/* in high profile. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 of U */
+/* src_strd - source stride */
+/* alpha_cb - alpha value for the boundary in U */
+/* beta_cb - beta value for the boundary in U */
+/* alpha_cr - alpha value for the boundary in V */
+/* beta_cr - beta value for the boundary in V */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This operation is described in Sec. 8.7.2.4 under the */
+/* title "Filtering process for edges for bS equal to 4" in */
+/* ITU T Rec H.264 with alpha and beta values different in */
+/* U and V. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 29 12 2014 Kaushik Draft */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_deblk_chroma_horz_bs4(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha_cb,
+ WORD32 beta_cb,
+ WORD32 alpha_cr,
+ WORD32 beta_cr)
+{
+ UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of U */
+ UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of V */
+ UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v;
+ WORD32 pos_p1, pos_p0, pos_q0, pos_q1;
+ UWORD8 *pu1_src_temp_u, *pu1_src_temp_v;
+ UWORD8 *pu1_p1_u; /* Pointer to the src sample p1 of U */
+ UWORD8 *pu1_p1_v; /* Pointer to the src sample p1 of U */
+ UWORD8 *pu1_p1_temp_u, *pu1_p1_temp_v;
+ WORD8 i = 0, edge;
+
+ pu1_p1_u = pu1_src_u - (src_strd << 1);
+ pu1_p1_v = pu1_src_v - (src_strd << 1);
+ pos_q0 = 0;
+ pos_q1 = src_strd;
+ pos_p0 = src_strd;
+ pos_p1 = 0;
+
+ for(edge = 0; edge < 4; edge++, pu1_src_u += 4, pu1_p1_u += 4, pu1_src_v +=
+ 4, pu1_p1_v += 4)
+ {
+ pu1_src_temp_u = pu1_src_u;
+ pu1_p1_temp_u = pu1_p1_u;
+ pu1_src_temp_v = pu1_src_v;
+ pu1_p1_temp_v = pu1_p1_v;
+ for(i = 0; i < 2; ++i, pu1_src_temp_u += 2, pu1_p1_temp_u += 2,
+ pu1_src_temp_v += 2, pu1_p1_temp_v += 2)
+ {
+ q0_u = pu1_src_temp_u[pos_q0];
+ q1_u = pu1_src_temp_u[pos_q1];
+ p0_u = pu1_p1_temp_u[pos_p0];
+ p1_u = pu1_p1_temp_u[pos_p1];
+
+ q0_v = pu1_src_temp_v[pos_q0];
+ q1_v = pu1_src_temp_v[pos_q1];
+ p0_v = pu1_p1_temp_v[pos_p0];
+ p1_v = pu1_p1_temp_v[pos_p1];
+
+ /* Filter Decision */
+ if(ABS(p0_u - q0_u) < alpha_cb && ABS(q1_u - q0_u) < beta_cb
+ && ABS(p1_u - p0_u) < beta_cb)
+ {
+ /* p0' */
+ pu1_p1_temp_u[pos_p0] = (X2(p1_u) + p0_u + q1_u + 2) >> 2;
+ /* q0' */
+ pu1_src_temp_u[pos_q0] = (X2(q1_u) + q0_u + p1_u + 2) >> 2;
+ }
+
+ /* Filter Decision */
+ if(ABS(p0_v - q0_v) < alpha_cr && ABS(q1_v - q0_v) < beta_cr
+ && ABS(p1_v - p0_v) < beta_cr)
+ {
+ /* p0' */
+ pu1_p1_temp_v[pos_p0] = (X2(p1_v) + p0_v + q1_v + 2) >> 2;
+ /* q0' */
+ pu1_src_temp_v[pos_q0] = (X2(q1_v) + q0_v + p1_v + 2) >> 2;
+ }
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_chroma_vert_bslt4() */
+/* */
+/* Description : This function performs filtering of a chroma block */
+/* vertical edge when the boundary strength is less than 4 */
+/* in high profile. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 of U */
+/* src_strd - source stride */
+/* alpha_cb - alpha value for the boundary in U */
+/* beta_cb - beta value for the boundary in U */
+/* alpha_cr - alpha value for the boundary in V */
+/* beta_cr - beta value for the boundary in V */
+/* u4_bs - packed Boundary strength array */
+/* pu1_cliptab_cb - tc0_table for U */
+/* pu1_cliptab_cr - tc0_table for V */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This operation is described in Sec. 8.7.2.3 under the */
+/* title "Filtering process for edges for bS less than 4" */
+/* in ITU T Rec H.264 with alpha and beta values different */
+/* in U and V. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 29 12 2014 Kaushik Draft */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_deblk_chroma_vert_bslt4(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha_cb,
+ WORD32 beta_cb,
+ WORD32 alpha_cr,
+ WORD32 beta_cr,
+ UWORD32 u4_bs,
+ const UWORD8 *pu1_cliptab_cb,
+ const UWORD8 *pu1_cliptab_cr)
+{
+ UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of plane U*/
+ UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of plane V*/
+ UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v;
+ WORD32 blk_strd = src_strd << 1; /* block_increment = src_strd * 2 */
+ WORD32 pos_p1, pos_p0, pos_q0, pos_q1;
+ UWORD8 *pu1_src_temp_u, *pu1_src_temp_v;
+ WORD8 i = 0, edge;
+ WORD8 delta;
+ WORD8 tcb, tcr;
+ WORD16 val;
+ UWORD8 tcb0, tcr0, u1_bs;
+
+ pos_q0 = 0;
+ pos_q1 = 2;
+ pos_p0 = -2;
+ pos_p1 = -4;
+
+ for(edge = 0; edge < 4;
+ edge++, pu1_src_u += blk_strd, pu1_src_v += blk_strd)
+ {
+ pu1_src_temp_u = pu1_src_u;
+ pu1_src_temp_v = pu1_src_v;
+ /* Filter Decision */
+ u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff);
+ if(!u1_bs)
+ continue;
+ /* tc0 */
+ tcb0 = pu1_cliptab_cb[u1_bs];
+ tcr0 = pu1_cliptab_cr[u1_bs];
+ tcb = tcb0 + 1;
+ tcr = tcr0 + 1;
+ for(i = 0; i < 2; ++i, pu1_src_temp_u += src_strd, pu1_src_temp_v +=
+ src_strd)
+ {
+ q0_u = pu1_src_temp_u[pos_q0];
+ q1_u = pu1_src_temp_u[pos_q1];
+ p0_u = pu1_src_temp_u[pos_p0];
+ p1_u = pu1_src_temp_u[pos_p1];
+
+ q0_v = pu1_src_temp_v[pos_q0];
+ q1_v = pu1_src_temp_v[pos_q1];
+ p0_v = pu1_src_temp_v[pos_p0];
+ p1_v = pu1_src_temp_v[pos_p1];
+
+ /* Filter Decision */
+ if(ABS(p0_u - q0_u) < alpha_cb && ABS(q1_u - q0_u) < beta_cb
+ && ABS(p1_u - p0_u) < beta_cb)
+ {
+ val = ((((q0_u - p0_u) << 2) + (p1_u - q1_u) + 4) >> 3);
+ delta = CLIP3(-tcb, tcb, val);
+ /* p0' */
+ val = p0_u + delta;
+ pu1_src_temp_u[pos_p0] = CLIP_U8(val);
+ /* q0' */
+ val = q0_u - delta;
+ pu1_src_temp_u[pos_q0] = CLIP_U8(val);
+ }
+
+ /* Filter Decision */
+ if(ABS(p0_v - q0_v) < alpha_cr && ABS(q1_v - q0_v) < beta_cr
+ && ABS(p1_v - p0_v) < beta_cr)
+ {
+ val = ((((q0_v - p0_v) << 2) + (p1_v - q1_v) + 4) >> 3);
+ delta = CLIP3(-tcr, tcr, val);
+ /* p0' */
+ val = p0_v + delta;
+ pu1_src_temp_v[pos_p0] = CLIP_U8(val);
+ /* q0' */
+ val = q0_v - delta;
+ pu1_src_temp_v[pos_q0] = CLIP_U8(val);
+ }
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_chroma_horz_bslt4() */
+/* */
+/* Description : This function performs filtering of a chroma block */
+/* horizontal edge when the boundary strength is less than */
+/* 4 in high profile. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 of U */
+/* src_strd - source stride */
+/* alpha_cb - alpha value for the boundary in U */
+/* beta_cb - beta value for the boundary in U */
+/* alpha_cr - alpha value for the boundary in V */
+/* beta_cr - beta value for the boundary in V */
+/* u4_bs - packed Boundary strength array */
+/* pu1_cliptab_cb - tc0_table for U */
+/* pu1_cliptab_cr - tc0_table for V */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This operation is described in Sec. 8.7.2.3 under the */
+/* title "Filtering process for edges for bS less than 4" */
+/* in ITU T Rec H.264 with alpha and beta values different */
+/* in U and V. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 29 12 2014 Kaushik Draft */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_deblk_chroma_horz_bslt4(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha_cb,
+ WORD32 beta_cb,
+ WORD32 alpha_cr,
+ WORD32 beta_cr,
+ UWORD32 u4_bs,
+ const UWORD8 *pu1_cliptab_cb,
+ const UWORD8 *pu1_cliptab_cr)
+{
+ UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of plane U*/
+ UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of plane V*/
+ UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v;
+ WORD32 pos_p1, pos_p0, pos_q0, pos_q1;
+ UWORD8 *pu1_src_temp_u, *pu1_src_temp_v;
+ UWORD8 *pu1_p1_u; /* Pointer to the src sample p1 of plane U*/
+ UWORD8 *pu1_p1_v; /* Pointer to the src sample p1 of plane V*/
+ UWORD8 *pu1_p1_temp_u, *pu1_p1_temp_v;
+ WORD8 i = 0, edge;
+ WORD8 delta;
+ WORD8 tcb, tcr;
+ WORD16 val;
+ UWORD8 u1_bs;
+ UWORD8 tcb0, tcr0;
+
+ pu1_p1_u = pu1_src_u - (src_strd << 1);
+ pu1_p1_v = pu1_src_v - (src_strd << 1);
+ pos_q0 = 0;
+ pos_q1 = src_strd;
+ pos_p0 = src_strd;
+ pos_p1 = 0;
+
+ for(edge = 0; edge < 4; edge++, pu1_src_u += 4, pu1_p1_u += 4,
+ pu1_src_v += 4, pu1_p1_v += 4)
+ {
+ pu1_src_temp_u = pu1_src_u;
+ pu1_p1_temp_u = pu1_p1_u;
+ pu1_src_temp_v = pu1_src_v;
+ pu1_p1_temp_v = pu1_p1_v;
+
+ /* Filter Decision */
+ u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff);
+ if(!u1_bs)
+ continue;
+ /* tc0 */
+ tcb0 = pu1_cliptab_cb[u1_bs];
+ tcr0 = pu1_cliptab_cr[u1_bs];
+
+ for(i = 0; i < 2; ++i, pu1_src_temp_u += 2, pu1_p1_temp_u += 2,
+ pu1_src_temp_v += 2, pu1_p1_temp_v += 2)
+ {
+ q0_u = pu1_src_temp_u[pos_q0];
+ q1_u = pu1_src_temp_u[pos_q1];
+ p0_u = pu1_p1_temp_u[pos_p0];
+ p1_u = pu1_p1_temp_u[pos_p1];
+
+ q0_v = pu1_src_temp_v[pos_q0];
+ q1_v = pu1_src_temp_v[pos_q1];
+ p0_v = pu1_p1_temp_v[pos_p0];
+ p1_v = pu1_p1_temp_v[pos_p1];
+
+ /* tc */
+ tcb = tcb0 + 1;
+ tcr = tcr0 + 1;
+ /* Filter Decision */
+ if(ABS(p0_u - q0_u) < alpha_cb && ABS(q1_u - q0_u) < beta_cb
+ && ABS(p1_u - p0_u) < beta_cb)
+ {
+ val = ((((q0_u - p0_u) << 2) + (p1_u - q1_u) + 4) >> 3);
+ delta = CLIP3(-tcb, tcb, val);
+ /* p0' */
+ val = p0_u + delta;
+ pu1_p1_temp_u[pos_p0] = CLIP_U8(val);
+ /* q0' */
+ val = q0_u - delta;
+ pu1_src_temp_u[pos_q0] = CLIP_U8(val);
+ }
+ /* Filter Decision */
+ if(ABS(p0_v - q0_v) < alpha_cr && ABS(q1_v - q0_v) < beta_cr
+ && ABS(p1_v - p0_v) < beta_cr)
+ {
+ val = ((((q0_v - p0_v) << 2) + (p1_v - q1_v) + 4) >> 3);
+ delta = CLIP3(-tcr, tcr, val);
+ /* p0' */
+ val = p0_v + delta;
+ pu1_p1_temp_v[pos_p0] = CLIP_U8(val);
+ /* q0' */
+ val = q0_v - delta;
+ pu1_src_temp_v[pos_q0] = CLIP_U8(val);
+ }
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_chroma_vert_bs4_mbaff() */
+/* */
+/* Description : This function performs filtering of a chroma block */
+/* vertical edge when boundary strength is set to 4 in high */
+/* profile. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 of U */
+/* src_strd - source stride */
+/* alpha_cb - alpha value for the boundary in U */
+/* beta_cb - beta value for the boundary in U */
+/* alpha_cr - alpha value for the boundary in V */
+/* beta_cr - beta value for the boundary in V */
+/* u4_bs - packed Boundary strength array */
+/* pu1_cliptab_cb - tc0_table for U */
+/* pu1_cliptab_cr - tc0_table for V */
+/* */
+/* Globals : None */
+/* */
+/* Processing : When the function is called twice, this operation is as */
+/* described in Sec. 8.7.2.4 under the title "Filtering */
+/* process for edges for bS equal to 4" in ITU T Rec H.264 */
+/* with alpha and beta values different in U and V. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 29 12 2014 Kaushik Draft */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_deblk_chroma_vert_bs4_mbaff(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha_cb,
+ WORD32 beta_cb,
+ WORD32 alpha_cr,
+ WORD32 beta_cr)
+{
+ UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of U */
+ UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of V */
+ UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v;
+ WORD32 blk_strd = src_strd;
+ WORD32 pos_p1, pos_p0, pos_q0, pos_q1;
+ UWORD8 *pu1_src_temp_u, *pu1_src_temp_v;
+ WORD8 edge;
+
+ pos_q0 = 0;
+ pos_q1 = 2;
+ pos_p0 = -2;
+ pos_p1 = -4;
+
+ for(edge = 0; edge < 4;
+ edge++, pu1_src_u += blk_strd, pu1_src_v += blk_strd)
+ {
+ pu1_src_temp_u = pu1_src_u;
+ pu1_src_temp_v = pu1_src_v;
+ q0_u = pu1_src_temp_u[pos_q0];
+ q1_u = pu1_src_temp_u[pos_q1];
+ p0_u = pu1_src_temp_u[pos_p0];
+ p1_u = pu1_src_temp_u[pos_p1];
+ q0_v = pu1_src_temp_v[pos_q0];
+ q1_v = pu1_src_temp_v[pos_q1];
+ p0_v = pu1_src_temp_v[pos_p0];
+ p1_v = pu1_src_temp_v[pos_p1];
+
+ /* Filter Decision */
+ if((ABS(p0_u - q0_u) < alpha_cb) &&
+ (ABS(q1_u - q0_u) < beta_cb) &&
+ (ABS(p1_u - p0_u) < beta_cb))
+ {
+ /* p0' */
+ pu1_src_temp_u[pos_p0] = ((X2(p1_u) + p0_u + q1_u + 2) >> 2);
+ /* q0' */
+ pu1_src_temp_u[pos_q0] = (X2(q1_u) + q0_u + p1_u + 2) >> 2;
+ }
+
+ /* Filter Decision */
+ if((ABS(p0_v - q0_v) < alpha_cr) &&
+ (ABS(q1_v - q0_v) < beta_cr) &&
+ (ABS(p1_v - p0_v) < beta_cr))
+ {
+ /* p0' */
+ pu1_src_temp_v[pos_p0] = ((X2(p1_v) + p0_v + q1_v + 2) >> 2);
+ /* q0' */
+ pu1_src_temp_v[pos_q0] = (X2(q1_v) + q0_v + p1_v + 2) >> 2;
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_chroma_vert_bslt4_mbaff() */
+/* */
+/* Description : This function performs filtering of a chroma block */
+/* vertical edge when boundary strength is less than 4 in */
+/* high profile. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 of U */
+/* src_strd - source stride */
+/* alpha_cb - alpha value for the boundary in U */
+/* beta_cb - beta value for the boundary in U */
+/* alpha_cr - alpha value for the boundary in V */
+/* beta_cr - beta value for the boundary in V */
+/* u4_bs - packed Boundary strength array */
+/* pu1_cliptab_cb - tc0_table for U */
+/* pu1_cliptab_cr - tc0_table for V */
+/* */
+/* Globals : None */
+/* */
+/* Processing : When the function is called twice, this operation is as */
+/* described in Sec. 8.7.2.4 under the title "Filtering */
+/* process for edges for bS less than 4" in ITU T Rec H.264 */
+/* with alpha and beta values different in U and V. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 29 12 2014 Kaushik Draft */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_deblk_chroma_vert_bslt4_mbaff(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha_cb,
+ WORD32 beta_cb,
+ WORD32 alpha_cr,
+ WORD32 beta_cr,
+ UWORD32 u4_bs,
+ const UWORD8 *pu1_cliptab_cb,
+ const UWORD8 *pu1_cliptab_cr)
+{
+ UWORD8 *pu1_src_u = pu1_src; /* Pointer to the src sample q0 of plane U*/
+ UWORD8 *pu1_src_v = pu1_src + 1; /* Pointer to the src sample q0 of plane V*/
+ UWORD8 p1_u, p0_u, q0_u, q1_u, p1_v, p0_v, q0_v, q1_v;
+ WORD32 blk_strd = src_strd;
+ WORD32 pos_p1, pos_p0, pos_q0, pos_q1;
+ UWORD8 *pu1_src_temp_u, *pu1_src_temp_v;
+ WORD8 edge;
+ WORD8 delta;
+ WORD8 tcb, tcr;
+ WORD16 val;
+ UWORD8 tcb0, tcr0, u1_bs;
+
+ pos_q0 = 0;
+ pos_q1 = 2;
+ pos_p0 = -2;
+ pos_p1 = -4;
+
+ for(edge = 0; edge < 4;
+ edge++, pu1_src_u += blk_strd, pu1_src_v += blk_strd)
+ {
+ pu1_src_temp_u = pu1_src_u;
+ pu1_src_temp_v = pu1_src_v;
+ /* Filter Decision */
+ u1_bs = (UWORD8)((u4_bs >> ((3 - edge) << 3)) & 0x0ff);
+ if(!u1_bs)
+ continue;
+ /* tc0 */
+ tcb0 = pu1_cliptab_cb[u1_bs];
+ tcr0 = pu1_cliptab_cr[u1_bs];
+ tcb = tcb0 + 1;
+ tcr = tcr0 + 1;
+ q0_u = pu1_src_temp_u[pos_q0];
+ q1_u = pu1_src_temp_u[pos_q1];
+ p0_u = pu1_src_temp_u[pos_p0];
+ p1_u = pu1_src_temp_u[pos_p1];
+
+ q0_v = pu1_src_temp_v[pos_q0];
+ q1_v = pu1_src_temp_v[pos_q1];
+ p0_v = pu1_src_temp_v[pos_p0];
+ p1_v = pu1_src_temp_v[pos_p1];
+
+ /* Filter Decision */
+ if((ABS(p0_u - q0_u) < alpha_cb) &&
+ (ABS(q1_u - q0_u) < beta_cb) &&
+ (ABS(p1_u - p0_u) < beta_cb))
+ {
+ val = ((((q0_u - p0_u) << 2) + (p1_u - q1_u) + 4) >> 3);
+ delta = CLIP3(-tcb, tcb, val);
+ /* p0' */
+ val = p0_u + delta;
+ pu1_src_temp_u[pos_p0] = CLIP_U8(val);
+ /* q0' */
+ val = q0_u - delta;
+ pu1_src_temp_u[pos_q0] = CLIP_U8(val);
+ }
+
+ /* Filter Decision */
+ if((ABS(p0_v - q0_v) < alpha_cr) &&
+ (ABS(q1_v - q0_v) < beta_cr) &&
+ (ABS(p1_v - p0_v) < beta_cr))
+ {
+ val = ((((q0_v - p0_v) << 2) + (p1_v - q1_v) + 4) >> 3);
+ delta = CLIP3(-tcr, tcr, val);
+ /* p0' */
+ val = p0_v + delta;
+ pu1_src_temp_v[pos_p0] = CLIP_U8(val);
+ /* q0' */
+ val = q0_v - delta;
+ pu1_src_temp_v[pos_q0] = CLIP_U8(val);
+ }
+ }
+}
diff --git a/common/ih264_deblk_edge_filters.h b/common/ih264_deblk_edge_filters.h
new file mode 100755
index 0000000..4079dd2
--- /dev/null
+++ b/common/ih264_deblk_edge_filters.h
@@ -0,0 +1,195 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_deblk_edge_filters.h
+ *
+ * @brief
+ * This file contains declarations of functions used for deblocking
+ *
+ * @author
+ * Ittiam
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+#ifndef IH264_DEBLK_H_
+#define IH264_DEBLK_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+typedef void ih264_deblk_edge_bslt4_ft(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha,
+ WORD32 beta,
+ UWORD32 u4_bs,
+ const UWORD8 *pu1_cliptab );
+
+typedef void ih264_deblk_edge_bs4_ft(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha,
+ WORD32 beta );
+
+typedef void ih264_deblk_chroma_edge_bslt4_ft(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha_cb,
+ WORD32 beta_cb,
+ WORD32 alpha_cr,
+ WORD32 beta_cr,
+ UWORD32 u4_bs,
+ const UWORD8 *pu1_cliptab_cb,
+ const UWORD8 *pu1_cliptab_cr);
+
+typedef void ih264_deblk_chroma_edge_bs4_ft(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha_cb,
+ WORD32 beta_cb,
+ WORD32 alpha_cr,
+ WORD32 beta_cr);
+
+
+
+ih264_deblk_edge_bs4_ft ih264_deblk_luma_horz_bs4;
+ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4;
+ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_mbaff;
+
+
+ih264_deblk_edge_bs4_ft ih264_deblk_chroma_horz_bs4_bp;
+ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_bp;
+ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_bp;
+
+
+ih264_deblk_edge_bslt4_ft ih264_deblk_luma_horz_bslt4;
+ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4;
+ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_mbaff;
+
+
+ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_bp;
+ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_bp;
+ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_bp;
+
+ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4;
+ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4;
+ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff;
+ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_mbaff;
+
+ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4;
+ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4;
+ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff;
+ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_mbaff;
+
+
+/*A9*/
+ih264_deblk_edge_bs4_ft ih264_deblk_luma_horz_bs4_a9;
+ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_a9;
+ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_mbaff_a9;
+
+
+ih264_deblk_edge_bs4_ft ih264_deblk_chroma_horz_bs4_bp_a9;
+ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_bp_a9;
+ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_bp_a9;
+
+
+ih264_deblk_edge_bslt4_ft ih264_deblk_luma_horz_bslt4_a9;
+ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_a9;
+ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_mbaff_a9;
+
+
+ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_bp_a9;
+ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_bp_a9;
+ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_bp_a9;
+
+ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_a9;
+ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_a9;
+ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_a9;
+ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_mbaff_a9;
+
+ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_a9;
+ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_a9;
+ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_a9;
+ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_mbaff_a9;
+
+/*AV8*/
+ih264_deblk_edge_bs4_ft ih264_deblk_luma_horz_bs4_av8;
+ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_av8;
+ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_mbaff_av8;
+
+
+ih264_deblk_edge_bs4_ft ih264_deblk_chroma_horz_bs4_bp_av8;
+ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_bp_av8;
+ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_bp_av8;
+
+
+ih264_deblk_edge_bslt4_ft ih264_deblk_luma_horz_bslt4_av8;
+ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_av8;
+ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_mbaff_av8;
+
+
+ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_bp_av8;
+ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_bp_av8;
+ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_bp_av8;
+
+ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_av8;
+ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_av8;
+ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_av8;
+ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_mbaff_av8;
+
+ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_av8;
+ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_av8;
+ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_av8;
+ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_mbaff_av8;
+
+/*SSE3*/
+ih264_deblk_edge_bs4_ft ih264_deblk_luma_horz_bs4_ssse3;
+ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_ssse3;
+ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_mbaff_ssse3;
+
+
+ih264_deblk_edge_bs4_ft ih264_deblk_chroma_horz_bs4_bp_ssse3;
+ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_bp_ssse3;
+ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_bp_ssse3;
+
+
+ih264_deblk_edge_bslt4_ft ih264_deblk_luma_horz_bslt4_ssse3;
+ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_ssse3;
+ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_mbaff_ssse3;
+
+
+ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_bp_ssse3;
+ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_bp_ssse3;
+ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_bp_ssse3;
+
+ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_ssse3;
+ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_ssse3;
+ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_ssse3;
+ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_mbaff_ssse3;
+
+ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_ssse3;
+ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_ssse3;
+ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_ssse3;
+ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_mbaff_ssse3;
+
+#endif /* IH264_DEBLK_H_ */
diff --git a/common/ih264_deblk_tables.c b/common/ih264_deblk_tables.c
new file mode 100755
index 0000000..91e28e0
--- /dev/null
+++ b/common/ih264_deblk_tables.c
@@ -0,0 +1,119 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_deblk_tables.c
+*
+* @brief
+* Contains tables used for deblocking
+*
+* @author
+* Ittiam
+*
+* @par List of Tables:
+* - guc_ih264_qp_scale_cr[]
+* - guc_ih264_alpha_table[]
+* - guc_ih264_beta_table[]
+* - guc_ih264_clip_table[][]
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_deblk_tables.h"
+
+/*****************************************************************************/
+/* Extern global definitions */
+/*****************************************************************************/
+
+/**
+ ******************************************************************************
+ * @brief alpha & beta tables for deblocking
+ * input : indexA [0-51] & indexB [0-51]
+ * output : alpha & beta
+ *
+ * @remarks Table 8-16 – in H264 Specification,
+ * Derivation of offset dependent threshold variables
+ * alpha and beta from indexA and indexB
+ ******************************************************************************
+ */
+const UWORD8 gu1_ih264_alpha_table[52] =
+{
+ /* indexA :: 0-51 inclusive */
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 4, 4, 5, 6, 7, 8, 9, 10,
+ 12, 13, 15, 17, 20, 22, 25, 28,
+ 32, 36, 40, 45, 50, 56, 63, 71,
+ 80, 90, 101, 113, 127, 144, 162, 182,
+ 203, 226, 255, 255,
+};
+
+const UWORD8 gu1_ih264_beta_table[52] =
+{
+ /* indexB :: 0-51 inclusive */
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 2, 2, 2, 3, 3, 3, 3, 4,
+ 4, 4, 6, 6, 7, 7, 8, 8,
+ 9, 9, 10, 10, 11, 11, 12, 12,
+ 13, 13, 14, 14, 15, 15, 16, 16,
+ 17, 17, 18, 18,
+};
+
+/**
+ ******************************************************************************
+ * @brief t'C0 table for deblocking
+ * input : indexA [0-51] and bS [1,3]
+ * output : t'C0
+ *
+ * @remarks Table 8-17 – in H264 Specification,
+ * Value of variable t'C0 as a function of indexA and bS
+ ******************************************************************************
+ */
+const UWORD8 gu1_ih264_clip_table[52][4] =
+{
+ /* indexA :: 0-51 inclusive */
+ { 0, 0, 0, 0}, { 0, 0, 0, 0}, { 0, 0, 0, 0}, { 0, 0, 0, 0},
+ { 0, 0, 0, 0}, { 0, 0, 0, 0}, { 0, 0, 0, 0}, { 0, 0, 0, 0},
+ { 0, 0, 0, 0}, { 0, 0, 0, 0}, { 0, 0, 0, 0}, { 0, 0, 0, 0},
+ { 0, 0, 0, 0}, { 0, 0, 0, 0}, { 0, 0, 0, 0}, { 0, 0, 0, 0},
+ { 0, 0, 0, 0}, { 0, 0, 0, 1}, { 0, 0, 0, 1}, { 0, 0, 0, 1},
+ { 0, 0, 0, 1}, { 0, 0, 1, 1}, { 0, 0, 1, 1}, { 0, 1, 1, 1},
+ { 0, 1, 1, 1}, { 0, 1, 1, 1}, { 0, 1, 1, 1}, { 0, 1, 1, 2},
+ { 0, 1, 1, 2}, { 0, 1, 1, 2}, { 0, 1, 1, 2}, { 0, 1, 2, 3},
+ { 0, 1, 2, 3}, { 0, 2, 2, 3}, { 0, 2, 2, 4}, { 0, 2, 3, 4},
+ { 0, 2, 3, 4}, { 0, 3, 3, 5}, { 0, 3, 4, 6}, { 0, 3, 4, 6},
+ { 0, 4, 5, 7}, { 0, 4, 5, 8}, { 0, 4, 6, 9}, { 0, 5, 7,10},
+ { 0, 6, 8,11}, { 0, 6, 8,13}, { 0, 7,10,14}, { 0, 8,11,16},
+ { 0, 9,12,18}, { 0,10,13,20}, { 0,11,15,23}, { 0,13,17,25},
+};
diff --git a/common/ih264_deblk_tables.h b/common/ih264_deblk_tables.h
new file mode 100755
index 0000000..3935dcb
--- /dev/null
+++ b/common/ih264_deblk_tables.h
@@ -0,0 +1,73 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_deblk_tables.h
+ *
+ * @brief
+ * This file contains declarations of tables used for deblocking
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+#ifndef IH264_DEBLK_TABLES_H_
+#define IH264_DEBLK_TABLES_H_
+
+/*****************************************************************************/
+/* Extern global declarations */
+/*****************************************************************************/
+
+/**
+ ******************************************************************************
+ * @brief alpha & beta tables for deblocking
+ * input : indexA [0-51] & indexB [0-51]
+ * output : alpha & beta
+ *
+ * @remarks Table 8-16 – in H264 Specification,
+ * Derivation of offset dependent threshold variables
+ * alpha and beta from indexA and indexB
+ ******************************************************************************
+ */
+extern const UWORD8 gu1_ih264_alpha_table[52];
+
+extern const UWORD8 gu1_ih264_beta_table[52];
+
+/**
+ ******************************************************************************
+ * @brief t'C0 table for deblocking
+ * input : indexA [0-51] and bS [1,3]
+ * output : t'C0
+ *
+ * @remarks Table 8-17 – in H264 Specification,
+ * Value of variable t'C0 as a function of indexA and bS
+ ******************************************************************************
+ */
+extern const UWORD8 gu1_ih264_clip_table[52][4];
+
+#endif /* IH264_DEBLK_TABLES_H_ */
diff --git a/common/ih264_debug.h b/common/ih264_debug.h
new file mode 100755
index 0000000..96ff2a7
--- /dev/null
+++ b/common/ih264_debug.h
@@ -0,0 +1,61 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_debug.h
+*
+* @brief
+* Definitions for codec debugging
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#ifndef _IH264_DEBUG_H_
+#define _IH264_DEBUG_H_
+
+
+#if DEBUG_PRINT
+
+#define DEBUG(...) \
+{ \
+ printf("\n[H264 DBG] %s/%d:: ", __FUNCTION__, __LINE__); \
+ printf(__VA_ARGS__); \
+}
+
+#else
+
+#define DEBUG(...) {}
+
+#endif
+
+
+#define ASSERT(x) assert((x))
+
+
+#endif /* _IH264_DEBUG_H_ */
+
diff --git a/common/ih264_defs.h b/common/ih264_defs.h
new file mode 100755
index 0000000..8d7e387
--- /dev/null
+++ b/common/ih264_defs.h
@@ -0,0 +1,690 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_defs.h
+*
+* @brief
+* Definitions used in the codec
+*
+* @author
+* Ittiam
+*
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264_DEFS_H_
+#define IH264_DEFS_H_
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+
+/*****************************************************************************/
+/* Profile and Levels */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @enum PROFILE_IDC
+ * @brief Defines the set of possible profiles
+******************************************************************************
+*/
+enum
+{
+ IH264_PROFILE_BASELINE = 66,
+ IH264_PROFILE_MAIN = 77,
+ IH264_PROFILE_EXTENDED = 88,
+ IH264_PROFILE_HIGH = 100,
+ IH264_PROFILE_HIGH10 = 110,
+ IH264_PROFILE_HIGH422 = 122,
+ IH264_PROFILE_HIGH444 = 144,
+};
+
+/**
+******************************************************************************
+ * @enum LEVEL_IDC
+ * @brief Defines the set of possible levels
+******************************************************************************
+*/
+typedef enum
+{
+ IH264_LEVEL_10 = 10,
+ IH264_LEVEL_1B = 9,
+ IH264_LEVEL_11 = 11,
+ IH264_LEVEL_12 = 12,
+ IH264_LEVEL_13 = 13,
+ IH264_LEVEL_20 = 20,
+ IH264_LEVEL_21 = 21,
+ IH264_LEVEL_22 = 22,
+ IH264_LEVEL_30 = 30,
+ IH264_LEVEL_31 = 31,
+ IH264_LEVEL_32 = 32,
+ IH264_LEVEL_40 = 40,
+ IH264_LEVEL_41 = 41,
+ IH264_LEVEL_42 = 42,
+ IH264_LEVEL_50 = 50,
+ IH264_LEVEL_51 = 51,
+}IH264_LEVEL_T;
+
+
+/**
+******************************************************************************
+ * @enum PIC TYPES
+ * @brief Defines the set of possible picture type - not signaled in bitstream
+******************************************************************************
+*/
+typedef enum
+{
+ PIC_NA = 0x7FFFFFFF,
+ PIC_IDR = 0,
+ PIC_I = 1,
+ PIC_P = 2,
+ PIC_B = 3,
+ PIC_P_NONREF = 4,
+ PIC_B_NONREF = 5,
+ PIC_MAX,
+}PIC_TYPE_T;
+
+/**
+******************************************************************************
+ * @enum FRAME-FIELD types
+ * @brief Defines the set of possible field types.
+******************************************************************************
+*/
+enum
+{
+ TOP_FIELD,
+ BOTTOM_FIELD,
+ FRAME,
+};
+
+/**
+******************************************************************************
+ * @enum SLICE TYPES
+ * @brief Defines the set of possible SLICE TYPES
+******************************************************************************
+*/
+enum
+{
+ PSLICE = 0,
+ BSLICE = 1,
+ ISLICE = 2,
+ SPSLICE = 3,
+ SISLICE = 4,
+ MAXSLICE_TYPE,
+};
+
+/**
+******************************************************************************
+ * @enum NAL_UNIT_TYPE
+ * @brief Defines the set of possible nal unit types
+******************************************************************************
+*/
+enum
+{
+ NAL_UNSPEC_0 = 0,
+ NAL_SLICE_NON_IDR = 1,
+ NAL_SLICE_DPA = 2,
+ NAL_SLICE_DPB = 3,
+ NAL_SLICE_DPC = 4,
+ NAL_SLICE_IDR = 5,
+ NAL_SEI = 6,
+ NAL_SPS = 7,
+ NAL_PPS = 8,
+ NAL_AUD = 9,
+ NAL_EOSEQ = 10,
+ NAL_EOSTR = 11,
+ NAL_FILLER = 12,
+ NAL_SPSE = 13,
+ NAL_RES_18 = 14,
+ NAL_AUX_PIC = 19,
+ NAL_RES_23 = 20,
+ NAL_UNSPEC_31 = 24,
+};
+
+/**
+******************************************************************************
+ * @enum CHROMA_FORMAT_IDC
+ * @brief Defines the set of possible chroma formats
+ * Note Chorma format Do not change enum values
+******************************************************************************
+*/
+enum
+{
+ CHROMA_FMT_IDC_MONOCHROME = 0,
+ CHROMA_FMT_IDC_YUV420 = 1,
+ CHROMA_FMT_IDC_YUV422 = 2,
+ CHROMA_FMT_IDC_YUV444 = 3,
+ CHROMA_FMT_IDC_YUV444_PLANES = 4,
+};
+
+
+/**
+******************************************************************************
+ * @enum MBMODES_I16x16
+ * @brief Defines the set of possible intra 16x16 mb modes
+******************************************************************************
+*/
+typedef enum
+{
+ VERT_I16x16 = 0,
+ HORZ_I16x16 = 1,
+ DC_I16x16 = 2,
+ PLANE_I16x16 = 3,
+ MAX_I16x16 = 4,
+}MBMODES_I16x16;
+
+/**
+******************************************************************************
+ * @enum MBMODES_I4x4
+ * @brief Defines the set of possible intra 4x4 mb modes
+******************************************************************************
+*/
+typedef enum
+{
+ VERT_I4x4 = 0,
+ HORZ_I4x4 = 1,
+ DC_I4x4 = 2,
+ DIAG_DL_I4x4 = 3,
+ DIAG_DR_I4x4 = 4,
+ VERT_R_I4x4 = 5,
+ HORZ_D_I4x4 = 6,
+ VERT_L_I4x4 = 7,
+ HORZ_U_I4x4 = 8,
+ MAX_I4x4 = 9,
+}MBMODES_I4x4;
+
+/**
+******************************************************************************
+ * @enum MBMODES_I8x8
+ * @brief Defines the set of possible intra 8x8 mb modes
+******************************************************************************
+*/
+typedef enum
+{
+ VERT_I8x8 = 0,
+ HORZ_I8x8 = 1,
+ DC_I8x8 = 2,
+ DIAG_DL_I8x8 = 3,
+ DIAG_DR_I8x8 = 4,
+ VERT_R_I8x8 = 5,
+ HORZ_D_I8x8 = 6,
+ VERT_L_I8x8 = 7,
+ HORZ_U_I8x8 = 8,
+ MAX_I8x8 = 9,
+}MBMODES_I8x8;
+
+/**
+******************************************************************************
+ * @enum MBMODES_CHROMA_I8x8 (Chroma)
+ * @brief Defines the set of possible intra 8x8 mb modes for chroma
+******************************************************************************
+*/
+typedef enum
+{
+ DC_CH_I8x8 = 0,
+ HORZ_CH_I8x8 = 1,
+ VERT_CH_I8x8 = 2,
+ PLANE_CH_I8x8 = 3,
+ MAX_CH_I8x8 = 4,
+}MBMODES_CHROMA_I8x8;
+
+/**
+******************************************************************************
+ * @enum MBTYPES
+ * @brief Defines the set of possible macro block types
+******************************************************************************
+*/
+typedef enum
+{
+ I16x16 = 0,
+ I4x4 = 1,
+ I8x8 = 2,
+ P16x16 = 3,
+ P16x8 = 4,
+ P8x16 = 5,
+ P8x8 = 6,
+ PSKIP = 7,
+ IPCM = 8,
+ MAX_MBTYPES,
+}MBTYPES_T;
+
+/* Prediction list */
+/* Do not change enum values */
+enum
+{
+ PRED_L0 = 0,
+ PRED_L1 = 1,
+ PRED_BI = 2
+};
+
+
+/**
+******************************************************************************
+ * @enum ENTROPY_BLK_TYPE
+ * @brief Defines the nature of blocks employed in entropy coding
+******************************************************************************
+*/
+typedef enum
+{
+ ENTROPY_BLK_INVALID = -1,
+ CAVLC_LUMA_4x4_DC = 0,
+ CAVLC_LUMA_4x4_AC = 1,
+ CAVLC_LUMA_4x4 = 2,
+ CAVLC_CHROMA_4x4_DC = 3,
+ CAVLC_CHROMA_4x4_AC = 4,
+} ENTROPY_BLK_TYPE;
+
+/**
+******************************************************************************
+ * @enum ENTROPY_MODE
+ * @brief Entropy coding modes
+******************************************************************************
+*/
+typedef enum
+{
+ CAVLC = 0,
+ CABAC = 1,
+} ENTROPY_MODE;
+
+/**
+******************************************************************************
+ * @enum COMPONENT_TYPE
+ * @brief components Y, U & V
+******************************************************************************
+*/
+typedef enum
+{
+ Y,
+ U,
+ V,
+} COMPONENT_TYPE;
+
+
+/**
+******************************************************************************
+ * @enum MBPART_PREDMODE_T
+ * @brief MbPartPredMode Table 7-11 to 7-14
+******************************************************************************
+*/
+typedef enum
+{
+ MBPART_NA,
+ MBPART_I4x4,
+ MBPART_I8x8,
+ MBPART_I16x16,
+ MBPART_L0,
+ MBPART_L1,
+ MBPART_BI,
+ MBPART_DIRECT,
+ MBPART_IPCM,
+}MBPART_PREDMODE_T;
+
+
+typedef enum
+{
+ I_NxN,
+ I_16x16_0_0_0,
+ I_16x16_1_0_0,
+ I_16x16_2_0_0,
+ I_16x16_3_0_0,
+ I_16x16_0_1_0,
+ I_16x16_1_1_0,
+ I_16x16_2_1_0,
+ I_16x16_3_1_0,
+ I_16x16_0_2_0,
+ I_16x16_1_2_0,
+ I_16x16_2_2_0,
+ I_16x16_3_2_0,
+ I_16x16_0_0_1,
+ I_16x16_1_0_1,
+ I_16x16_2_0_1,
+ I_16x16_3_0_1,
+ I_16x16_0_1_1,
+ I_16x16_1_1_1,
+ I_16x16_2_1_1,
+ I_16x16_3_1_1,
+ I_16x16_0_2_1,
+ I_16x16_1_2_1,
+ I_16x16_2_2_1,
+ I_16x16_3_2_1,
+ I_PCM,
+}MBTYPE_ISLICE_T;
+
+typedef enum
+{
+ P_L0_16x16,
+ P_L0_L0_16x8,
+ P_L0_L0_8x16,
+ P_8x8,
+ P_8x8REF0,
+ P_SKIP
+}MBTYPE_PSLICE_T;
+
+typedef enum
+{
+ B_DIRECT_16x16,
+ B_L0_16x16,
+ B_L1_16x16,
+ B_BI_16x16,
+ B_L0_L0_16x8,
+ B_L0_L0_8x16,
+ B_L1_L1_16x8,
+ B_L1_L1_8x16,
+ B_L0_L1_16x8,
+ B_L0_L1_8x16,
+ B_L1_L0_16x8,
+ B_L1_L0_8x16,
+ B_L0_BI_16x8,
+ B_L0_BI_8x16,
+ B_L1_BI_16x8,
+ B_L1_BI_8x16,
+ B_BI_L0_16x8,
+ B_BI_L0_8x16,
+ B_BI_L1_16x8,
+ B_BI_L1_8x16,
+ B_BI_BI_16x8,
+ B_BI_BI_8x16,
+ B_8x8,
+ B_SKIP,
+}MBTYPE_BSLICE_T;
+
+
+typedef enum
+{
+ P_L0_8x8,
+ P_L0_8x4,
+ P_L0_4x8,
+ P_L0_4x4,
+}SUBMBTYPE_PSLICE_T;
+
+typedef enum
+{
+ B_DIRECT_8x8,
+ B_L0_8x8,
+ B_L1_8x8,
+ B_BI_8x8,
+ B_L0_8x4,
+ B_L0_4x8,
+ B_L1_8x4,
+ B_L1_4x8,
+ B_BI_8x4,
+ B_BI_4x8,
+ B_L0_4x4,
+ B_L1_4x4,
+ B_BI_4x4,
+}SUBMBTYPE_BSLICE_T;
+
+/**
+ * DC Mode pattern for 4 4x4 sub blocks in an MB row
+ */
+#define DC_I16X16_MB_ROW (DC_I16x16 << 24) | (DC_I16x16 << 16) | \
+ (DC_I16x16 << 8) | DC_I16x16
+
+
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Reference frame defs */
+/*****************************************************************************/
+/* Maximum DPB size */
+#define MAX_DPB_SIZE 16
+
+/* Maximum mmco commands in slice header */
+#define MAX_MMCO_COMMANDS 32
+
+/* Maximum reference reorder idc */
+#define MAX_MODICATION_IDC 32
+
+/*****************************************************************************/
+/* SPS restrictions */
+/*****************************************************************************/
+
+/* Number of SPS allowed */
+/* An extra buffer is allocated to write the parsed data
+ * It is copied to the appropriate location later */
+#define MAX_SPS_CNT (32 + 1)
+
+/* Maximum long term reference pics */
+#define MAX_LTREF_PICS_SPS 16
+
+/* Maximum short term reference pics */
+#define MAX_STREF_PICS_SPS 64
+
+
+/*****************************************************************************/
+/* PPS restrictions */
+/*****************************************************************************/
+
+/* Number of PPS allowed */
+/* An extra buffer is allocated to write the parsed data
+ * It is copied to the appropriate location later */
+#define MAX_PPS_CNT (256 + 1)
+
+/*****************************************************************************/
+/* Macro definitions for sizes of MB, PU, TU, CU */
+/*****************************************************************************/
+#define MB_SIZE 16
+#define BLK8x8SIZE 8
+#define BLK_SIZE 4
+
+
+/* TU Size Range */
+#define MAX_TU_SIZE 8
+#define MIN_TU_SIZE 4
+
+/* Max Transform Size */
+#define MAX_TRANS_SIZE (MAX_TU_SIZE*MAX_TU_SIZE)
+
+/* PU Size Range */
+#define MAX_PU_SIZE 16
+#define MIN_PU_SIZE 4
+
+/* Number of max TU in a MB row */
+#define MAX_TU_IN_MB_ROW ((MB_SIZE / MIN_TU_SIZE))
+
+/* Number of max PU in a CTb row */
+#define MAX_PU_IN_MB_ROW ((MB_SIZE / MIN_PU_SIZE))
+
+
+/* Number of max PU in a MB */
+/*****************************************************************************/
+/* Note though for 64 x 64 MB, Max PU in MB is 128, in order to store */
+/* intra pred info, 256 entries are needed */
+/*****************************************************************************/
+#define MAX_PU_IN_MB ((MB_SIZE / MIN_PU_SIZE) * \
+ (MB_SIZE / MIN_PU_SIZE))
+
+/* Number of max TU in a MB */
+#define MAX_TU_IN_MB ((MB_SIZE / MIN_TU_SIZE) * \
+ (MB_SIZE / MIN_TU_SIZE))
+
+
+
+/**
+ * Maximum transform depths
+ */
+#define MAX_TRAFO_DEPTH 5
+
+#define MAX_DC_4x4_SUBBLK_LUMA 1
+#define MAX_AC_4x4_SUBBLK_LUMA 16
+#define MAX_DC_4x4_SUBBLK_CHROMA 2
+#define MAX_AC_4x4_SUBBLK_CHROMA 8
+
+#define MAX_4x4_SUBBLKS (MAX_DC_4x4_SUBBLK_LUMA + MAX_DC_4x4_SUBBLK_CHROMA +\
+ MAX_AC_4x4_SUBBLK_LUMA + MAX_AC_4x4_SUBBLK_CHROMA)
+
+/* Max number of deblocking edges */
+#define MAX_VERT_DEBLK_EDGES ((MB_SIZE/8) * (MB_SIZE/4))
+#define MAX_HORZ_DEBLK_EDGES ((MB_SIZE/4) * (MB_SIZE/8))
+
+/* Qp can not change below 8x8 level */
+#define MAX_DEBLK_QP_CNT ((MB_SIZE/8) * (MB_SIZE/8))
+
+/*****************************************************************************/
+/* Parsing related macros */
+/*****************************************************************************/
+#define SUBBLK_COEFF_CNT 16
+
+/* Quant and Trans defs */
+
+/*****************************************************************************/
+/* Sizes for Transform functions */
+/*****************************************************************************/
+#define TRANS_SIZE_4 4
+#define TRANS_SIZE_8 8
+#define TRANS_SIZE_16 16
+#define TRANS_SIZE_32 32
+
+
+#define IT_SHIFT_STAGE_1 7
+#define IT_SHIFT_STAGE_2 12
+
+/**
+ * @breif Maximum transform dynamic range (excluding sign bit)
+ */
+#define MAX_TR_DYNAMIC_RANGE 15
+
+/**
+ * @brief Q(QP%6) * IQ(QP%6) = 2^20
+ */
+#define QUANT_IQUANT_SHIFT 20
+
+/**
+ * @breif Q factor for Qp%6 multiplication
+ */
+#define QUANT_SHIFT 14
+
+/**
+ * @breif Q shift factor for flat rescale matrix weights
+ */
+#define FLAT_RESCALE_MAT_Q_SHIFT 11
+
+/**
+ * @breif Scaling matrix is represented in Q15 format
+ */
+#define SCALING_Q_SHIFT 15
+
+/**
+ * @brief rounding factor for quantization represented in Q9 format
+ */
+#define QUANT_ROUND_FACTOR_Q 9
+
+/**
+ * @brief Minimum qp supported in H264 spec
+ */
+#define MIN_H264_QP 0
+
+/**
+ * @brief Maximum qp supported in H264 spec
+ */
+#define MAX_H264_QP 51
+
+/**
+ * @breif Total number of transform sizes
+ * used for sizeID while getting scale matrix
+ */
+#define NUM_UNIQUE_TRANS_SIZE 4
+
+/**
+ * @breif Maximum number of bits in frameNumber signaling
+ */
+#define MAX_BITS_IN_FRAME_NUM 16
+
+/**
+ * @breif Maximum number of bits in POC LSB signaling
+ */
+#define MAX_BITS_IN_POC_LSB 16
+
+
+/**
+ * @breif Maximum PIC Order Count type
+ */
+#define MAX_PIC_ORDER_COUNT_TYPE 2
+
+
+/**
+ * @breif Maximum Weighted bipred idc
+ */
+#define MAX_WEIGHT_BIPRED_IDC 2
+
+/*****************************************************************************/
+/* Number of scaling matrices for each transform size */
+/*****************************************************************************/
+#define SCALE_MAT_CNT_TRANS_SIZE_4 6
+#define SCALE_MAT_CNT_TRANS_SIZE_8 6
+#define SCALE_MAT_CNT_TRANS_SIZE_16 6
+#define SCALE_MAT_CNT_TRANS_SIZE_32 2
+
+/* Maximum number of scale matrices for a given transform size */
+#define SCALE_MAT_CNT_MAX_PER_TRANS_SIZE 6
+
+/* Total number of scale matrices */
+#define TOTAL_SCALE_MAT_COUNT (SCALE_MAT_CNT_TRANS_SIZE_4 + \
+ SCALE_MAT_CNT_TRANS_SIZE_8 + \
+ SCALE_MAT_CNT_TRANS_SIZE_16 + \
+ SCALE_MAT_CNT_TRANS_SIZE_32)
+
+
+/*****************************************************************************/
+/* Intra pred Macros */
+/*****************************************************************************/
+/** Planar Intra prediction mode */
+#define INTRA_PLANAR 0
+
+/** DC Intra prediction mode */
+#define INTRA_DC 1
+
+/** Gives angular mode for intra prediction */
+#define INTRA_ANGULAR(x) (x)
+
+/** Following is used to signal no intra prediction in case of pcm blocks
+ */
+#define INTRA_PRED_NONE 63
+
+
+/** Following is used to signal no intra prediction is needed for first three
+ * 4x4 luma blocks in case of 4x4 TU sizes
+ * Also used in pcm cases
+ */
+#define INTRA_PRED_CHROMA_IDX_NONE 7
+
+
+/**
+******************************************************************************
+ * @brief neighbor availability masks
+******************************************************************************
+ */
+#define LEFT_MB_AVAILABLE_MASK 0x01
+#define TOP_LEFT_MB_AVAILABLE_MASK 0x02
+#define TOP_MB_AVAILABLE_MASK 0x04
+#define TOP_RIGHT_MB_AVAILABLE_MASK 0x08
+
+#endif /* IH264_DEFS_H_ */
diff --git a/common/ih264_disp_mgr.c b/common/ih264_disp_mgr.c
new file mode 100755
index 0000000..2bdb524
--- /dev/null
+++ b/common/ih264_disp_mgr.c
@@ -0,0 +1,186 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_disp_mgr.c
+*
+* @brief
+* Contains function definitions for display management
+*
+* @author
+* Srinivas T
+*
+* @par List of Functions:
+* - ih264_disp_mgr_init()
+* - ih264_disp_mgr_add()
+* - ih264_disp_mgr_get()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#include <stdlib.h>
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_disp_mgr.h"
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Initialization function for display buffer manager
+*
+* @par Description:
+* Initializes the display buffer management structure
+*
+* @param[in] ps_disp_mgr
+* Pointer to the display buffer management structure
+*
+* @returns none
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+void ih264_disp_mgr_init(disp_mgr_t *ps_disp_mgr)
+{
+ WORD32 id;
+
+ ps_disp_mgr->u4_last_abs_poc = DEFAULT_POC;
+
+ for(id = 0; id < DISP_MGR_MAX_CNT; id++)
+ {
+ ps_disp_mgr->ai4_abs_poc[id] = DEFAULT_POC;
+ ps_disp_mgr->apv_ptr[id] = NULL;
+ }
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Adds a buffer to the display manager
+*
+* @par Description:
+* Adds a buffer to the display buffer manager
+*
+* @param[in] ps_disp_mgr
+* Pointer to the display buffer management structure
+*
+* @param[in] buf_id
+* ID of the display buffer
+*
+* @param[in] abs_poc
+* Absolute POC of the display buffer
+*
+* @param[in] pv_ptr
+* Pointer to the display buffer
+*
+* @returns 0 if success, -1 otherwise
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+WORD32 ih264_disp_mgr_add(disp_mgr_t *ps_disp_mgr,
+ WORD32 buf_id,
+ WORD32 abs_poc,
+ void *pv_ptr)
+{
+ if(buf_id >= DISP_MGR_MAX_CNT)
+ {
+ return (-1);
+ }
+
+ if(ps_disp_mgr->apv_ptr[buf_id] != NULL)
+ {
+ return (-1);
+ }
+
+ ps_disp_mgr->apv_ptr[buf_id] = pv_ptr;
+ ps_disp_mgr->ai4_abs_poc[buf_id] = abs_poc;
+ return 0;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Gets the next buffer
+*
+* @par Description:
+* Gets the next display buffer
+*
+* @param[in] ps_disp_mgr
+* Pointer to the display buffer structure
+*
+* @param[out] pi4_buf_id
+* Pointer to hold buffer id of the display buffer being returned
+*
+* @returns Pointer to the next display buffer
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+void* ih264_disp_mgr_get(disp_mgr_t *ps_disp_mgr, WORD32 *pi4_buf_id)
+{
+ WORD32 id;
+ void *pv_ret_ptr;
+ WORD32 i4_min_poc;
+ WORD32 min_poc_id;
+
+
+ pv_ret_ptr = NULL;
+ i4_min_poc = 0x7FFFFFFF;
+ min_poc_id = -1;
+
+ /* Find minimum POC */
+ for(id = 0; id < DISP_MGR_MAX_CNT; id++)
+ {
+ if((DEFAULT_POC != ps_disp_mgr->ai4_abs_poc[id]) &&
+ (ps_disp_mgr->ai4_abs_poc[id] <= i4_min_poc))
+ {
+ i4_min_poc = ps_disp_mgr->ai4_abs_poc[id];
+ min_poc_id = id;
+ }
+ }
+ *pi4_buf_id = min_poc_id;
+ /* If all pocs are still default_poc then return NULL */
+ if(-1 == min_poc_id)
+ {
+ return NULL;
+ }
+
+ pv_ret_ptr = ps_disp_mgr->apv_ptr[min_poc_id];
+
+ /* Set abs poc to default and apv_ptr to null so that the buffer is not returned again */
+ ps_disp_mgr->apv_ptr[min_poc_id] = NULL;
+ ps_disp_mgr->ai4_abs_poc[min_poc_id] = DEFAULT_POC;
+ return pv_ret_ptr;
+}
diff --git a/common/ih264_disp_mgr.h b/common/ih264_disp_mgr.h
new file mode 100755
index 0000000..6f56493
--- /dev/null
+++ b/common/ih264_disp_mgr.h
@@ -0,0 +1,70 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_disp_mgr.h
+*
+* @brief
+* Function declarations used for display management
+*
+* @author
+* Srinivas T
+*
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#ifndef _DISP_MGR_H_
+#define _DISP_MGR_H_
+
+#define DISP_MGR_MAX_CNT 64
+#define DEFAULT_POC 0x7FFFFFFF
+
+typedef struct
+{
+ /**
+ * last_abs_poc
+ */
+ UWORD32 u4_last_abs_poc;
+
+ /**
+ * au4_abs_poc[DISP_MGR_MAX_CNT]
+ */
+ WORD32 ai4_abs_poc[DISP_MGR_MAX_CNT];
+
+ /**
+ * apv_ptr[DISP_MGR_MAX_CNT]
+ */
+ void *apv_ptr[DISP_MGR_MAX_CNT];
+}disp_mgr_t;
+
+void ih264_disp_mgr_init(disp_mgr_t *ps_disp_mgr);
+
+WORD32 ih264_disp_mgr_add(disp_mgr_t *ps_disp_mgr,
+ WORD32 id,
+ WORD32 abs_poc,
+ void *pv_ptr);
+
+void* ih264_disp_mgr_get(disp_mgr_t *ps_disp_mgr, WORD32 *pi4_buf_id);
+
+#endif //_DISP_MGR_H_
diff --git a/common/ih264_dpb_mgr.c b/common/ih264_dpb_mgr.c
new file mode 100755
index 0000000..8e087d3
--- /dev/null
+++ b/common/ih264_dpb_mgr.c
@@ -0,0 +1,1176 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_dpb_mgr.c
+ *
+ * @brief
+ * Function definitions used for decoded picture buffer management
+ *
+ * @author
+ * Srinivas T
+ *
+ * @par List of Functions:
+ * - ih264_dpb_mgr_init()
+ * - ih264_dpb_mgr_sort_short_term_fields_by_frame_num()
+ * - ih264_dpb_mgr_sort_short_term_fields_by_poc_l0()
+ * - ih264_dpb_mgr_sort_short_term_fields_by_poc_l1()
+ * - ih264_dpb_mgr_sort_long_term_fields_by_frame_idx()
+ * - ih264_dpb_mgr_alternate_ref_fields()
+ * - ih264_dpb_mgr_insert_ref_field()
+ * - ih264_dpb_mgr_insert_ref_frame()
+ * - ih264_dpb_mgr_count_ref_frames()
+ * - ih264_dpb_mgr_delete_ref_frame()
+ * - ih264_dpb_mgr_delete_long_ref_fields_max_frame_idx()
+ * - ih264_dpb_mgr_delete_short_ref_frame()
+ * - ih264_dpb_mgr_delete_all_ref_frames()
+ * - ih264_dpb_mgr_reset()
+ * - ih264_dpb_mgr_release_pics()
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "ih264_typedefs.h"
+#include "ih264_defs.h"
+#include "ih264_macros.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_buf_mgr.h"
+#include "ih264_dpb_mgr.h"
+#include "ih264_debug.h"
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * DPB manager initializer
+ *
+ * @par Description:
+ * Initialises the DPB manager structure
+ *
+ * @param[in] ps_dpb_mgr
+ * Pointer to the DPB manager structure
+ *
+ * @returns
+ *
+ * @remarks
+ *
+ *
+ *******************************************************************************
+ */
+
+void ih264_dpb_mgr_init(dpb_mgr_t *ps_dpb_mgr)
+{
+ UWORD32 i;
+ dpb_info_t *ps_dpb_info = ps_dpb_mgr->as_dpb_info;
+ for(i = 0; i < MAX_DPB_BUFS; i++)
+ {
+ ps_dpb_info[i].ps_prev_dpb = NULL;
+ ps_dpb_info[i].ps_pic_buf = NULL;
+ ps_dpb_mgr->as_top_field_pics[i].i4_used_as_ref = INVALID;
+ ps_dpb_mgr->as_bottom_field_pics[i].i4_used_as_ref = INVALID;
+ ps_dpb_mgr->as_top_field_pics[i].i1_field_type = INVALID;
+ ps_dpb_mgr->as_bottom_field_pics[i].i1_field_type = INVALID;
+ ps_dpb_mgr->as_top_field_pics[i].i4_long_term_frame_idx = -1;
+ ps_dpb_mgr->as_bottom_field_pics[i].i4_long_term_frame_idx = -1;
+ }
+
+ ps_dpb_mgr->u1_num_short_term_ref_bufs = 0;
+ ps_dpb_mgr->u1_num_long_term_ref_bufs = 0;
+ ps_dpb_mgr->ps_dpb_short_term_head = NULL;
+ ps_dpb_mgr->ps_dpb_long_term_head = NULL;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Function to sort sort term pics by frame_num.
+ *
+ * @par Description:
+ * Sorts short term fields by frame_num. For 2 fields having same frame_num,
+ * orders them based on requested first field type.
+ *
+ * @param[in] ps_dpb_mgr
+ * Pointer to the DPB manager structure
+ *
+ * @param[in] curr_frame_num
+ * frame_num of the current pic
+ *
+ * @param[in] first_field_type
+ * For complementary fields, required first field
+ *
+ * @param[in] max_frame_num
+ * Maximum frame_num allowed
+ *
+ * @returns
+ *
+ * @remarks
+ *
+ *
+ *******************************************************************************
+ */
+WORD32 ih264_dpb_mgr_sort_short_term_fields_by_frame_num(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 curr_frame_num,
+ WORD32 first_field_type,
+ WORD32 max_frame_num)
+{
+ dpb_info_t *ps_dpb_node1 = ps_dpb_mgr->ps_dpb_short_term_head;
+ dpb_info_t *ps_dpb_node2;
+ WORD32 frame_num_node1;
+ WORD32 frame_num_node2;
+ pic_buf_t *ps_pic_buf;
+
+ if(ps_dpb_node1 == NULL)
+ return -1;
+
+ for (; ps_dpb_node1 != NULL; ps_dpb_node1 = ps_dpb_node1->ps_prev_dpb)
+ {
+ for (ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb; ps_dpb_node2 != NULL; ps_dpb_node2 = ps_dpb_node2->ps_prev_dpb)
+ {
+ frame_num_node1 = ps_dpb_node1->ps_pic_buf->i4_frame_num;
+ frame_num_node2 = ps_dpb_node2->ps_pic_buf->i4_frame_num;
+
+ if(frame_num_node1 > curr_frame_num)
+ frame_num_node1 = frame_num_node1 - max_frame_num;
+ if(frame_num_node2 > curr_frame_num)
+ frame_num_node2 = frame_num_node2 - max_frame_num;
+
+ if(frame_num_node1 < frame_num_node2)
+ {
+ ps_pic_buf = ps_dpb_node1->ps_pic_buf;
+ ps_dpb_node1->ps_pic_buf = ps_dpb_node2->ps_pic_buf;
+ ps_dpb_node2->ps_pic_buf = ps_pic_buf;
+ }
+ }
+ }
+
+ /**
+ * For frames and complementary field pairs,
+ * ensure first_field_type appears first in the list
+ */
+ ps_dpb_node1 = ps_dpb_mgr->ps_dpb_short_term_head;
+ ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb;
+ while(ps_dpb_node2 != NULL)
+ {
+ pic_buf_t *ps_pic_node1 = ps_dpb_node1->ps_pic_buf;
+ pic_buf_t *ps_pic_node2 = ps_dpb_node2->ps_pic_buf;
+ frame_num_node1 = ps_pic_node1->i4_frame_num;
+ frame_num_node2 = ps_pic_node2->i4_frame_num;
+ if(frame_num_node1 == frame_num_node2)
+ {
+ ASSERT(ps_pic_node1->i1_field_type != ps_pic_node2->i1_field_type);
+ if(ps_pic_node1->i1_field_type != first_field_type)
+ {
+ ps_dpb_node1->ps_pic_buf = ps_pic_node2;
+ ps_dpb_node2->ps_pic_buf = ps_pic_node1;
+ }
+ }
+ ps_dpb_node1 = ps_dpb_node2;
+ ps_dpb_node2 = ps_dpb_node2->ps_prev_dpb;
+ }
+ return 0;
+
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Function to sort sort term pics by poc for list 0.
+ *
+ * @par Description:
+ * Orders all the pocs less than current poc in the descending order.
+ * Then orders all the pocs greater than current poc in the ascending order.
+ *
+ * @param[in] ps_dpb_mgr
+ * Pointer to the DPB manager structure
+ *
+ * @param[in] curr_poc
+ * Poc of the current pic
+ *
+ * @param[in] first_field_type
+ * For complementary fields, required first field
+ *
+ * @returns
+ *
+ * @remarks
+ *
+ *
+ *******************************************************************************
+ */
+WORD32 ih264_dpb_mgr_sort_short_term_fields_by_poc_l0(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 curr_poc,
+ WORD32 first_field_type)
+{
+ dpb_info_t *ps_dpb_node1 = ps_dpb_mgr->ps_dpb_short_term_head;
+ dpb_info_t *ps_dpb_node2;
+ WORD32 poc_node1;
+ WORD32 poc_node2;
+ WORD32 frame_num_node1;
+ WORD32 frame_num_node2;
+ pic_buf_t *ps_pic_buf;
+
+ if(ps_dpb_node1 == NULL)
+ return -1;
+
+ /**
+ * Sort the fields by poc.
+ * All POCs less than current poc are first placed in the descending order.
+ * Then all POCs greater than current poc are placed in the ascending order.
+ */
+ for (; ps_dpb_node1 != NULL; ps_dpb_node1 = ps_dpb_node1->ps_prev_dpb)
+ {
+ for (ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb; ps_dpb_node2 != NULL; ps_dpb_node2 = ps_dpb_node2->ps_prev_dpb)
+ {
+ poc_node1 = ps_dpb_node1->ps_pic_buf->i4_abs_poc;
+ poc_node2 = ps_dpb_node2->ps_pic_buf->i4_abs_poc;
+ ASSERT(poc_node1 != curr_poc);
+ ASSERT(poc_node2 != curr_poc);
+ if(((poc_node1 < curr_poc) && (poc_node2 > curr_poc)) ||
+ ((poc_node1 < curr_poc) && (poc_node2 < curr_poc) && (poc_node1 > poc_node2)) ||
+ ((poc_node1 > curr_poc) && (poc_node2 > curr_poc) && (poc_node1 < poc_node2)))
+ continue;
+
+ ps_pic_buf = ps_dpb_node1->ps_pic_buf;
+ ps_dpb_node1->ps_pic_buf = ps_dpb_node2->ps_pic_buf;
+ ps_dpb_node2->ps_pic_buf = ps_pic_buf;
+ }
+ }
+
+ ps_dpb_node1 = ps_dpb_mgr->ps_dpb_short_term_head;
+ ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb;
+ while(ps_dpb_node2 != NULL)
+ {
+ pic_buf_t *ps_pic_node1 = ps_dpb_node1->ps_pic_buf;
+ pic_buf_t *ps_pic_node2 = ps_dpb_node2->ps_pic_buf;
+ frame_num_node1 = ps_pic_node1->i4_frame_num;
+ frame_num_node2 = ps_pic_node2->i4_frame_num;
+ if(frame_num_node1 == frame_num_node2)
+ {
+ ASSERT(ps_pic_node1->i1_field_type != ps_pic_node2->i1_field_type);
+ if(ps_pic_node1->i1_field_type != first_field_type)
+ {
+ ps_dpb_node1->ps_pic_buf = ps_pic_node2;
+ ps_dpb_node2->ps_pic_buf = ps_pic_node1;
+ }
+ }
+ ps_dpb_node1 = ps_dpb_node2;
+ ps_dpb_node2 = ps_dpb_node2->ps_prev_dpb;
+ }
+ return 0;
+
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Function to sort sort term pics by poc for list 1.
+ *
+ * @par Description:
+ * Orders all the pocs greater than current poc in the ascending order.
+ * Then rrders all the pocs less than current poc in the descending order.
+ *
+ * @param[in] ps_dpb_mgr
+ * Pointer to the DPB manager structure
+ *
+ * @param[in] curr_poc
+ * Poc of the current pic
+ *
+ * @param[in] first_field_type
+ * For complementary fields, required first field
+ *
+ * @returns
+ *
+ * @remarks
+ *
+ *
+ *******************************************************************************
+ */
+WORD32 ih264_dpb_mgr_sort_short_term_fields_by_poc_l1(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 curr_poc,
+ WORD32 first_field_type)
+{
+ dpb_info_t *ps_dpb_node1 = ps_dpb_mgr->ps_dpb_short_term_head;
+ dpb_info_t *ps_dpb_node2;
+ WORD32 poc_node1;
+ WORD32 poc_node2;
+ WORD32 frame_num_node1;
+ WORD32 frame_num_node2;
+ pic_buf_t *ps_pic_buf;
+
+ if(ps_dpb_node1 == NULL)
+ return -1;
+
+ /**
+ * Sort the fields by poc.
+ * All POCs greater than current poc are first placed in the ascending order.
+ * Then all POCs less than current poc are placed in the decending order.
+ */
+ for (; ps_dpb_node1 != NULL; ps_dpb_node1 = ps_dpb_node1->ps_prev_dpb)
+ {
+ for (ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb; ps_dpb_node2 != NULL; ps_dpb_node2 = ps_dpb_node2->ps_prev_dpb)
+ {
+ poc_node1 = ps_dpb_node1->ps_pic_buf->i4_abs_poc;
+ poc_node2 = ps_dpb_node2->ps_pic_buf->i4_abs_poc;
+ ASSERT(poc_node1 != curr_poc);
+ ASSERT(poc_node2 != curr_poc);
+ if(((poc_node1 > curr_poc) && (poc_node2 < curr_poc)) ||
+ ((poc_node1 < curr_poc) && (poc_node2 < curr_poc) && (poc_node1 > poc_node2)) ||
+ ((poc_node1 > curr_poc) && (poc_node2 > curr_poc) && (poc_node1 < poc_node2)))
+ continue;
+
+ ps_pic_buf = ps_dpb_node1->ps_pic_buf;
+ ps_dpb_node1->ps_pic_buf = ps_dpb_node2->ps_pic_buf;
+ ps_dpb_node2->ps_pic_buf = ps_pic_buf;
+ }
+ }
+
+ ps_dpb_node1 = ps_dpb_mgr->ps_dpb_short_term_head;
+ ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb;
+ while(ps_dpb_node2 != NULL)
+ {
+ pic_buf_t *ps_pic_node1 = ps_dpb_node1->ps_pic_buf;
+ pic_buf_t *ps_pic_node2 = ps_dpb_node2->ps_pic_buf;
+ frame_num_node1 = ps_pic_node1->i4_frame_num;
+ frame_num_node2 = ps_pic_node2->i4_frame_num;
+ if(frame_num_node1 == frame_num_node2)
+ {
+ ASSERT(ps_pic_node1->i1_field_type != ps_pic_node2->i1_field_type);
+ if(ps_pic_node1->i1_field_type != first_field_type)
+ {
+ ps_dpb_node1->ps_pic_buf = ps_pic_node2;
+ ps_dpb_node2->ps_pic_buf = ps_pic_node1;
+ }
+ }
+ ps_dpb_node1 = ps_dpb_node2;
+ ps_dpb_node2 = ps_dpb_node2->ps_prev_dpb;
+ }
+ return 0;
+}
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Function to sort long term pics by long term frame idx.
+ *
+ * @par Description:
+ * Sorts long term fields by long term frame idx. For 2 fields
+ * having same frame_num, orders them based on requested first field type.
+ *
+ * @param[in] ps_dpb_mgr
+ * Pointer to the DPB manager structure
+ *
+ * @param[in] first_field_type
+ * For complementary fields, required first field
+ *
+ * @returns
+ *
+ * @remarks
+ *
+ *
+ *******************************************************************************
+ */
+WORD32 ih264_dpb_mgr_sort_long_term_fields_by_frame_idx(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 first_field_type)
+{
+ dpb_info_t *ps_dpb_node1 = ps_dpb_mgr->ps_dpb_long_term_head;
+ dpb_info_t *ps_dpb_node2;
+ WORD32 frame_idx_node1;
+ WORD32 frame_idx_node2;
+ pic_buf_t *ps_pic_buf;
+
+ if(ps_dpb_node1 == NULL)
+ return -1;
+
+ /* Sort the fields by frame idx */
+ for (; ps_dpb_node1 != NULL; ps_dpb_node1 = ps_dpb_node1->ps_prev_dpb)
+ {
+ for (ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb; ps_dpb_node2 != NULL; ps_dpb_node2 = ps_dpb_node2->ps_prev_dpb)
+ {
+ frame_idx_node1 = ps_dpb_node1->ps_pic_buf->i4_long_term_frame_idx;
+ frame_idx_node2 = ps_dpb_node2->ps_pic_buf->i4_long_term_frame_idx;
+
+ if(frame_idx_node1 > frame_idx_node2)
+ {
+ ps_pic_buf = ps_dpb_node1->ps_pic_buf;
+ ps_dpb_node1->ps_pic_buf = ps_dpb_node2->ps_pic_buf;
+ ps_dpb_node2->ps_pic_buf = ps_pic_buf;
+ }
+ }
+ }
+
+ /**
+ * For frames and complementary field pairs,
+ * ensure first_field_type appears first in the list
+ */
+ ps_dpb_node1 = ps_dpb_mgr->ps_dpb_long_term_head;
+ ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb;
+ while(ps_dpb_node2 != NULL)
+ {
+ pic_buf_t *ps_pic_node1 = ps_dpb_node1->ps_pic_buf;
+ pic_buf_t *ps_pic_node2 = ps_dpb_node2->ps_pic_buf;
+ frame_idx_node1 = ps_pic_node1->i4_long_term_frame_idx;
+ frame_idx_node2 = ps_pic_node2->i4_long_term_frame_idx;
+ if(frame_idx_node1 == frame_idx_node2)
+ {
+ ASSERT(ps_pic_node1->i1_field_type != ps_pic_node2->i1_field_type);
+ if(ps_pic_node1->i1_field_type != first_field_type)
+ {
+ ps_dpb_node1->ps_pic_buf = ps_pic_node2;
+ ps_dpb_node2->ps_pic_buf = ps_pic_node1;
+ }
+ }
+ ps_dpb_node1 = ps_dpb_node2;
+ ps_dpb_node2 = ps_dpb_node2->ps_prev_dpb;
+ }
+ return 0;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Function to alternate fields.
+ *
+ * @par Description:
+ * In the ordered list of fields, alternate fields starting with
+ * first_field_type
+ *
+ * @param[in] ps_dpb_mgr
+ * Pointer to the DPB manager structure
+ *
+ * @param[in] reference_type
+ * This is used to select between short-term and long-term linked list.
+ *
+ * @param[in] first_field_type
+ * For complementary fields, required first field
+ *
+ * @returns
+ *
+ * @remarks
+ *
+ *
+ *******************************************************************************
+ */
+WORD32 ih264_dpb_mgr_alternate_ref_fields(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 reference_type,
+ WORD32 first_field_type)
+{
+ dpb_info_t s_dpb_head;
+ dpb_info_t *ps_dpb_head;
+ dpb_info_t *ps_dpb_node1;
+ dpb_info_t *ps_dpb_node2;
+ dpb_info_t *ps_dpb_node3;
+ dpb_info_t *ps_dpb_node4;
+ WORD32 expected_field;
+
+ expected_field = first_field_type;
+
+ ps_dpb_head = &s_dpb_head;
+
+ ps_dpb_head->ps_prev_dpb = (reference_type == SHORT_TERM_REF) ?
+ ps_dpb_mgr->ps_dpb_short_term_head:
+ ps_dpb_mgr->ps_dpb_long_term_head;
+
+ ps_dpb_node1 = ps_dpb_head;
+ ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb;
+ while(ps_dpb_node2 != NULL)
+ {
+ pic_buf_t *ps_pic_node2 = ps_dpb_node2->ps_pic_buf;
+ if(ps_pic_node2->i1_field_type != expected_field)
+ {
+ /*
+ * If it is not expected field, loop over the node till
+ * the expected field.
+ */
+ ps_dpb_node3 = ps_dpb_node2;
+ ps_dpb_node4 = ps_dpb_node2->ps_prev_dpb;
+ while((ps_dpb_node4 != NULL) &&
+ (ps_dpb_node4->ps_pic_buf->i1_field_type != expected_field))
+ {
+ ps_dpb_node3 = ps_dpb_node4;
+ ps_dpb_node4 = ps_dpb_node4->ps_prev_dpb;
+ }
+ if(ps_dpb_node4 != NULL)
+ {
+ ps_dpb_node1->ps_prev_dpb = ps_dpb_node4;
+ ps_dpb_node3->ps_prev_dpb = ps_dpb_node4->ps_prev_dpb;
+ ps_dpb_node4->ps_prev_dpb = ps_dpb_node2;
+ }
+ else
+ {
+ /* node4 null means we have reached the end */
+ break;
+ }
+ }
+ ps_dpb_node1 = ps_dpb_node1->ps_prev_dpb;
+ ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb;
+ expected_field = (ps_dpb_node1->ps_pic_buf->i1_field_type == TOP_FIELD)?
+ BOTTOM_FIELD:TOP_FIELD;
+ }
+
+ if((reference_type == SHORT_TERM_REF))
+ {
+ ps_dpb_mgr->ps_dpb_short_term_head = ps_dpb_head->ps_prev_dpb;
+ }
+ else
+ {
+ ps_dpb_mgr->ps_dpb_long_term_head = ps_dpb_head->ps_prev_dpb;
+ }
+
+ return 0;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Add a ref field to short-term or long-term linked list.
+ *
+ * @par Description:
+ * This function adds a ref field to either short-term or long-term linked
+ * list. It picks up memory for the link from the array of dpb_info in
+ * dpb_mgr. The field is added to the beginning of the linked list and the
+ * head is set the the field.
+ *
+ * @param[in] ps_dpb_mgr
+ * Pointer to the DPB manager structure
+ *
+ * @param[in] ps_pic_buf
+ * Pic buf structure for the field being added.
+ *
+ * @param[in] reference_type
+ * This is used to select between short-term and long-term linked list.
+ *
+ * @param[in] frame_num
+ * frame_num for the field.
+ *
+ * @param[in] long_term_frame_idx
+ * If the ref being added is long-term, long_term_frame_idx of the field.
+ * Otherwise invalid.
+ *
+ * @returns
+ *
+ * @remarks
+ *
+ *
+ *******************************************************************************
+ */
+WORD32 ih264_dpb_mgr_insert_ref_field(dpb_mgr_t *ps_dpb_mgr,
+ pic_buf_t *ps_pic_buf,
+ WORD32 reference_type,
+ UWORD32 frame_num,
+ WORD32 long_term_frame_idx)
+{
+ WORD32 i;
+ dpb_info_t *ps_dpb_info;
+ dpb_info_t *ps_dpb_head;
+
+ ps_dpb_info = ps_dpb_mgr->as_dpb_info;
+
+ /* Return error if buffer is already present in the DPB */
+ for(i = 0; i < MAX_DPB_BUFS; i++)
+ {
+ if( (ps_dpb_info[i].ps_pic_buf == ps_pic_buf)
+ && (ps_dpb_info[i].ps_pic_buf->i4_used_as_ref == reference_type) )
+ {
+ return (-1);
+ }
+ }
+
+ /* Find an unused DPB location */
+ for(i = 0; i < MAX_DPB_BUFS; i++)
+ {
+ if(NULL == ps_dpb_info[i].ps_pic_buf)
+ {
+ break;
+ }
+ }
+ if(i == MAX_DPB_BUFS)
+ {
+ return (-1);
+ }
+
+ ps_dpb_head = (reference_type == SHORT_TERM_REF)
+ ?ps_dpb_mgr->ps_dpb_short_term_head
+ :ps_dpb_mgr->ps_dpb_long_term_head;
+
+ if(reference_type == SHORT_TERM_REF)
+ long_term_frame_idx = -1;
+
+ /* Create DPB info */
+ ps_dpb_info[i].ps_pic_buf = ps_pic_buf;
+ ps_dpb_info[i].ps_prev_dpb = ps_dpb_head;
+ ps_dpb_info[i].ps_pic_buf->i4_used_as_ref = reference_type;
+ ps_dpb_info[i].ps_pic_buf->i4_frame_num = frame_num;
+ ps_dpb_info[i].ps_pic_buf->i4_long_term_frame_idx = long_term_frame_idx;
+
+ /* update the head node of linked list to point to the current picture */
+ if(reference_type == SHORT_TERM_REF)
+ {
+ ps_dpb_mgr->ps_dpb_short_term_head = ps_dpb_info + i;
+
+ /* Increment Short term buffer count */
+ ps_dpb_mgr->u1_num_short_term_ref_bufs++;
+
+ }
+ else
+ {
+ ps_dpb_mgr->ps_dpb_long_term_head = ps_dpb_info + i;
+
+ /* Increment Long term buffer count */
+ ps_dpb_mgr->u1_num_long_term_ref_bufs++;
+ }
+
+ return 0;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Add a ref frame to short-term or long-term linked list.
+ *
+ * @par Description:
+ * This function adds a ref frame to either short-term or long-term linked
+ * list. Internally it calls add ref field twice to add top and bottom field.
+ *
+ * @param[in] ps_dpb_mgr
+ * Pointer to the DPB manager structure
+ *
+ * @param[in] ps_pic_buf
+ * Pic buf structure for the field being added.
+ *
+ * @param[in] reference_type
+ * This is used to select between short-term and long-term linked list.
+ *
+ * @param[in] frame_num
+ * frame_num for the field.
+ *
+ * @param[in] long_term_frame_idx
+ * If the ref being added is long-term, long_term_frame_idx of the field.
+ * Otherwise invalid.
+ *
+ * @returns
+ *
+ * @remarks
+ *
+ *
+ *******************************************************************************
+ */
+WORD32 ih264_dpb_mgr_insert_ref_frame(dpb_mgr_t *ps_dpb_mgr,
+ pic_buf_t *ps_pic_buf,
+ WORD32 reference_type,
+ UWORD32 frame_num,
+ WORD32 long_term_frame_idx)
+{
+ WORD32 buf_id;
+ pic_buf_t *ps_pic_top;
+ pic_buf_t *ps_pic_bottom;
+ WORD32 ret;
+
+ /*
+ * For a frame, since the ps_pic_buf passed to this function is that of top field
+ * obtain bottom field using buf_id.
+ */
+ ps_pic_top = ps_pic_buf;
+ buf_id = ps_pic_top->i4_buf_id;
+ ps_pic_bottom = &ps_dpb_mgr->as_bottom_field_pics[buf_id];
+
+ /* Insert top field */
+ ret = ih264_dpb_mgr_insert_ref_field(ps_dpb_mgr,
+ ps_pic_top,
+ reference_type,
+ frame_num,
+ long_term_frame_idx);
+
+ if(ret != 0)
+ return ret;
+
+ /* Insert bottom field */
+ ret = ih264_dpb_mgr_insert_ref_field(ps_dpb_mgr,
+ ps_pic_bottom,
+ reference_type,
+ frame_num,
+ long_term_frame_idx);
+
+ if(ret != 0)
+ return ret;
+
+ return ret;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Returns the number of ref frames in both the linked list.
+ *
+ * @par Description:
+ * Returns the count of number of frames, number of complementary field pairs
+ * and number of unpaired fields.
+ *
+ * @param[in] ps_dpb_mgr
+ * Pointer to the DPB manager structure
+ *
+ * @param[in] curr_frame_num
+ * frame_num for the field.
+ *
+ * @param[in] max_frame_num
+ * Maximum frame_num allowed
+ *
+ * @returns
+ *
+ * @remarks
+ *
+ *
+ *******************************************************************************
+ */
+WORD32 ih264_dpb_mgr_count_ref_frames(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 curr_frame_num,
+ WORD32 max_frame_num)
+{
+ WORD32 numShortTerm = 0;
+ WORD32 numLongTerm = 0;
+ dpb_info_t *ps_dpb_node;
+ WORD32 frame_num;
+ WORD32 prev_frame_num;
+
+ /*
+ * Compute the number of short-term frames/complementary field pairs/
+ * unpaired fields
+ */
+ if(ps_dpb_mgr->ps_dpb_short_term_head != NULL)
+ {
+ /* Sort the short-term list by frame_num */
+ ih264_dpb_mgr_sort_short_term_fields_by_frame_num(ps_dpb_mgr,
+ curr_frame_num,
+ TOP_FIELD,
+ max_frame_num);
+
+ ps_dpb_node = ps_dpb_mgr->ps_dpb_short_term_head;
+ if(ps_dpb_node != NULL)
+ {
+ numShortTerm++;
+ prev_frame_num = ps_dpb_node->ps_pic_buf->i4_frame_num;
+ ps_dpb_node = ps_dpb_node->ps_prev_dpb;
+ }
+
+ while(ps_dpb_node != NULL)
+ {
+ frame_num = ps_dpb_node->ps_pic_buf->i4_frame_num;
+ if(frame_num != prev_frame_num)
+ numShortTerm++;
+ prev_frame_num = ps_dpb_node->ps_pic_buf->i4_frame_num;
+ ps_dpb_node = ps_dpb_node->ps_prev_dpb;
+ }
+ }
+
+ /*
+ * Compute the number of long-term frames/complementary field pairs/
+ * unpaired fields
+ */
+ if(ps_dpb_mgr->ps_dpb_long_term_head != NULL)
+ {
+ ih264_dpb_mgr_sort_long_term_fields_by_frame_idx(ps_dpb_mgr,
+ TOP_FIELD);
+
+ ps_dpb_node = ps_dpb_mgr->ps_dpb_long_term_head;
+ if(ps_dpb_node != NULL)
+ {
+ numLongTerm++;
+ prev_frame_num = ps_dpb_node->ps_pic_buf->i4_frame_num;
+ ps_dpb_node = ps_dpb_node->ps_prev_dpb;
+ }
+
+ while(ps_dpb_node != NULL)
+ {
+ frame_num = ps_dpb_node->ps_pic_buf->i4_frame_num;
+ if(frame_num != prev_frame_num)
+ numLongTerm++;
+ prev_frame_num = ps_dpb_node->ps_pic_buf->i4_frame_num;
+ ps_dpb_node = ps_dpb_node->ps_prev_dpb;
+ }
+ }
+ return (numShortTerm + numLongTerm);
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Deletes the ref frame at the end of the linked list.
+ *
+ * @par Description:
+ * Deletes the ref frame at the end of the linked list. For unpaired fields,
+ * it deletes just the last node. For frame or complementary field pair, it
+ * deletes the last two nodes.
+ *
+ * @param[in] ps_dpb_mgr
+ * Pointer to the DPB manager structure
+ *
+ * @param[in] reference_type
+ * This is used to select between short-term and long-term linked list.
+ *
+ * @returns
+ *
+ * @remarks
+ *
+ *
+ *******************************************************************************
+ */
+WORD32 ih264_dpb_mgr_delete_ref_frame(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 reference_type)
+{
+ dpb_info_t *ps_dpb_node1;
+ dpb_info_t *ps_dpb_node2;
+ dpb_info_t *ps_dpb_node3;
+
+ /*
+ * Assumption: The nodes sorted for frame num.
+ */
+
+
+ /* Select bw short-term and long-term list. */
+ ps_dpb_node1 = (reference_type == SHORT_TERM_REF)
+ ?ps_dpb_mgr->ps_dpb_short_term_head
+ :ps_dpb_mgr->ps_dpb_long_term_head;
+ /* If null, no entries in the list. Hence return. */
+ if(ps_dpb_node1 == NULL)
+ return 0;
+
+ /* If only one node in the list, set as unsed for refer and return. */
+ if(ps_dpb_node1->ps_prev_dpb == NULL)
+ {
+ /* Set the picture as unused for reference */
+ ps_dpb_node1->ps_pic_buf->i4_used_as_ref = UNUSED_FOR_REF;
+ ps_dpb_node1->ps_pic_buf = NULL;
+
+ if(reference_type == SHORT_TERM_REF)
+ {
+ ps_dpb_mgr->ps_dpb_short_term_head = NULL;
+
+ /* Increment Short term buffer count */
+ ps_dpb_mgr->u1_num_short_term_ref_bufs = 0;
+
+ }
+ else
+ {
+ ps_dpb_mgr->ps_dpb_long_term_head = NULL;
+
+ /* Increment Long term buffer count */
+ ps_dpb_mgr->u1_num_long_term_ref_bufs = 0;
+
+ }
+ return 0;
+ }
+
+ /**
+ * If there are only 2 nodes in the list, set second node as unused for reference.
+ * If the frame_num of second node and first node is same, set first node also as
+ * unused for reference and set the corresponding head to NULL.
+ */
+ ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb;
+ if(ps_dpb_node2->ps_prev_dpb == NULL)
+ {
+ /* Set the picture as unused for reference */
+ if(ps_dpb_node2->ps_pic_buf->i4_frame_num == ps_dpb_node1->ps_pic_buf->i4_frame_num)
+ {
+ /* Set the picture as unused for reference */
+ ps_dpb_node1->ps_pic_buf->i4_used_as_ref = UNUSED_FOR_REF;
+ ps_dpb_node1->ps_pic_buf = NULL;
+ if(reference_type == SHORT_TERM_REF)
+ {
+ ps_dpb_mgr->ps_dpb_short_term_head = NULL;
+
+ /* Increment Short term buffer count */
+ ps_dpb_mgr->u1_num_short_term_ref_bufs = 0;
+
+ }
+ else
+ {
+ ps_dpb_mgr->ps_dpb_long_term_head = NULL;
+
+ /* Increment Long term buffer count */
+ ps_dpb_mgr->u1_num_long_term_ref_bufs = 0;
+
+ }
+
+ }
+ ps_dpb_node2->ps_pic_buf->i4_used_as_ref = UNUSED_FOR_REF;
+ ps_dpb_node2->ps_pic_buf = NULL;
+ ps_dpb_node1->ps_prev_dpb = NULL;
+ return 0;
+ }
+ /*
+ * If there are more than 2 nodes, run a loop to get the last 3 nodes.
+ */
+ ps_dpb_node3 = ps_dpb_node2->ps_prev_dpb;
+ while(ps_dpb_node3->ps_prev_dpb != NULL)
+ {
+ ps_dpb_node1 = ps_dpb_node2;
+ ps_dpb_node2 = ps_dpb_node3;
+ ps_dpb_node3 = ps_dpb_node3->ps_prev_dpb;
+ }
+ /*
+ * If node 2 and node 3 frame_nums are same, set node 2 also as unsed for
+ * reference and del reference from node1.
+ */
+ if(ps_dpb_node2->ps_pic_buf->i4_frame_num == ps_dpb_node3->ps_pic_buf->i4_frame_num)
+ {
+ ps_dpb_node2->ps_pic_buf->i4_used_as_ref = UNUSED_FOR_REF;
+ ps_dpb_node2->ps_pic_buf = NULL;
+ ps_dpb_node1->ps_prev_dpb = NULL;
+
+ }
+ /* Set the third node as unused for reference */
+ ps_dpb_node3->ps_pic_buf->i4_used_as_ref = UNUSED_FOR_REF;
+ ps_dpb_node3->ps_pic_buf = NULL;
+ ps_dpb_node2->ps_prev_dpb = NULL;
+
+ return 0;
+}
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Delete long-term ref fields above max frame idx.
+ *
+ * @par Description:
+ * Deletes all the long-term ref fields having idx greater than max_frame_idx
+ *
+ * @param[in] ps_dpb_mgr
+ * Pointer to the DPB manager structure
+ *
+ * @param[in] max_frame_idx
+ * Max long-term frame idx allowed.
+ *
+ * @returns
+ *
+ * @remarks
+ *
+ *
+ *******************************************************************************
+ */
+WORD32 ih264_dpb_mgr_delete_long_ref_fields_max_frame_idx(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 max_frame_idx)
+{
+ dpb_info_t *ps_dpb_node1;
+ dpb_info_t *ps_dpb_node2;
+ /*
+ * Loop until there is node which isn't to be deleted is encountered.
+ */
+ while(ps_dpb_mgr->ps_dpb_long_term_head != NULL)
+ {
+ if(ps_dpb_mgr->ps_dpb_long_term_head->ps_pic_buf->i4_long_term_frame_idx
+ <= max_frame_idx)
+ {
+ break;
+ }
+ ps_dpb_mgr->ps_dpb_long_term_head->ps_pic_buf->i4_used_as_ref = UNUSED_FOR_REF;
+ ps_dpb_mgr->ps_dpb_long_term_head->ps_pic_buf = NULL;
+ ps_dpb_mgr->ps_dpb_long_term_head = ps_dpb_mgr->ps_dpb_long_term_head->ps_prev_dpb;
+ }
+
+ ps_dpb_node1 = ps_dpb_mgr->ps_dpb_long_term_head;
+ if(ps_dpb_node1 == NULL)
+ return 0;
+ /*
+ * With the node that isn't to be deleted as head, loop until the end.
+ */
+ ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb;
+ while(ps_dpb_node2 != NULL)
+ {
+ if(ps_dpb_node2->ps_pic_buf->i4_long_term_frame_idx > max_frame_idx)
+ {
+ ps_dpb_node2->ps_pic_buf->i4_used_as_ref = UNUSED_FOR_REF;
+ ps_dpb_node2->ps_pic_buf = NULL;
+ ps_dpb_node1->ps_prev_dpb = ps_dpb_node2->ps_prev_dpb;
+ }
+ ps_dpb_node1 = ps_dpb_node1->ps_prev_dpb;
+ if(ps_dpb_node1 == NULL)
+ break;
+ ps_dpb_node2 = ps_dpb_node1->ps_prev_dpb;
+ }
+ return 0;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Deletes the short-term with least frame_num
+ *
+ * @par Description:
+ * Deletes the short-term with least frame_num. It sorts the function the
+ * short-term linked list by frame-num and the function that deletes the last
+ * frame in the linked list.
+ *
+ * @param[in] ps_dpb_mgr
+ * Pointer to the DPB manager structure
+ *
+ * @param[in] curr_frame_num
+ * frame_num of the current pic
+ *
+ * @param[in] max_frame_num
+ * Maximum frame_num allowed
+ *
+ * @returns
+ *
+ * @remarks
+ *
+ *
+ *******************************************************************************
+ */
+WORD32 ih264_dpb_mgr_delete_short_ref_frame(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 curr_frame_num,
+ WORD32 max_frame_num)
+{
+ WORD32 ret;
+ /* Sort the short-term list by frame_num */
+ ret = ih264_dpb_mgr_sort_short_term_fields_by_frame_num(ps_dpb_mgr,
+ curr_frame_num,
+ TOP_FIELD,
+ max_frame_num);
+
+ /* Delete the last reference frame or field */
+ ret = ih264_dpb_mgr_delete_ref_frame(ps_dpb_mgr,SHORT_TERM_REF);
+
+ if(ret != 0)
+ {
+ ASSERT(0);
+ }
+
+ return ret;
+}
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Deletes all the ref frames.
+ *
+ * @par Description:
+ * Deletes all of the ref frames/fields in the short-term and long-term linked
+ * list.
+ *
+ * @param[in] ps_dpb_mgr
+ * Pointer to the DPB manager structure
+ *
+ * @returns
+ *
+ * @remarks
+ *
+ *
+ *******************************************************************************
+ */
+WORD32 ih264_dpb_mgr_delete_all_ref_frames(dpb_mgr_t *ps_dpb_mgr)
+{
+ /* Loop over short-term linked list. */
+ while(ps_dpb_mgr->ps_dpb_short_term_head != NULL)
+ {
+ ih264_dpb_mgr_delete_ref_frame(ps_dpb_mgr,SHORT_TERM_REF);
+ }
+
+ /* Loop over long-term linked list. */
+ while(ps_dpb_mgr->ps_dpb_long_term_head != NULL)
+ {
+ ih264_dpb_mgr_delete_ref_frame(ps_dpb_mgr,LONG_TERM_REF);
+ }
+ return 0;
+}
+
+
+void ih264_dpb_mgr_reset(dpb_mgr_t *ps_dpb_mgr, buf_mgr_t *ps_buf_mgr)
+{
+ WORD32 i;
+ dpb_info_t *ps_dpb_info;
+ ASSERT(0);
+
+
+ ps_dpb_info = ps_dpb_mgr->as_dpb_info;
+
+ for(i = 0; i < MAX_DPB_BUFS; i++)
+ {
+ if(ps_dpb_info[i].ps_pic_buf->i4_used_as_ref)
+ {
+ ps_dpb_info[i].ps_pic_buf->i4_used_as_ref = UNUSED_FOR_REF;
+ ps_dpb_info[i].ps_prev_dpb = NULL;
+ //Release physical buffer
+ ih264_buf_mgr_release(ps_buf_mgr, ps_dpb_info[i].ps_pic_buf->i4_buf_id,
+ BUF_MGR_REF);
+
+ ps_dpb_info[i].ps_pic_buf = NULL;
+ }
+ }
+ ps_dpb_mgr->u1_num_short_term_ref_bufs = 0;
+ ps_dpb_mgr->u1_num_long_term_ref_bufs = 0;
+ ps_dpb_mgr->ps_dpb_short_term_head = NULL;
+ ps_dpb_mgr->ps_dpb_long_term_head = NULL;
+
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * deletes all pictures from DPB
+ *
+ * @par Description:
+ * Deletes all pictures present in the DPB manager
+ *
+ * @param[in] ps_buf_mgr
+ * Pointer to buffer manager structure
+ *
+ * @param[in] u1_disp_bufs
+ * Number of buffers to be deleted
+ *
+ * @returns
+ *
+ * @remarks
+ *
+ *
+ *******************************************************************************
+ */
+
+void ih264_dpb_mgr_release_pics(buf_mgr_t *ps_buf_mgr, UWORD8 u1_disp_bufs)
+{
+ WORD8 i;
+ UWORD32 buf_status;
+ ASSERT(0);
+
+ for(i = 0; i < u1_disp_bufs; i++)
+ {
+ buf_status = ih264_buf_mgr_get_status(ps_buf_mgr, i);
+ if(0 != buf_status)
+ {
+ ih264_buf_mgr_release((buf_mgr_t *)ps_buf_mgr, i, BUF_MGR_REF);
+ }
+ }
+}
diff --git a/common/ih264_dpb_mgr.h b/common/ih264_dpb_mgr.h
new file mode 100755
index 0000000..b0cf0fd
--- /dev/null
+++ b/common/ih264_dpb_mgr.h
@@ -0,0 +1,186 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ * ih264_dpb_mgr.h
+ *
+ * @brief
+ * Function declarations used for decoded picture buffer management
+ *
+ * @author
+ * Srinivas T
+ *
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+#ifndef _IH264_DPB_MGR_H_
+#define _IH264_DPB_MGR_H_
+
+/* Temporary definitions. Have to be defined later */
+
+#define MAX_DPB_BUFS (MAX_DPB_SIZE * 4)
+
+#define MARK_ST_PICNUM_AS_NONREF 1
+#define MARK_LT_INDEX_AS_NONREF 2
+#define MARK_ST_PICNUM_AS_LT_INDEX 3
+#define RESET_REF_PICTURES 5
+
+typedef struct dpb_info_t dpb_info_t;
+
+enum
+{
+ INVALID = -1,
+ UNUSED_FOR_REF = 0 ,
+ LONG_TERM_REF ,
+ SHORT_TERM_REF ,
+};
+struct dpb_info_t
+{
+ /**
+ * Pointer to picture buffer structure
+ */
+ pic_buf_t *ps_pic_buf;
+
+ /**
+ * Link to the DPB buffer with previous link
+ */
+ dpb_info_t *ps_prev_dpb;
+
+};
+
+typedef struct
+{
+ /**
+ * Pointer to the most recent pic Num
+ */
+ dpb_info_t *ps_dpb_short_term_head;
+
+ /**
+ * Pointer to the most recent pic Num
+ */
+ dpb_info_t *ps_dpb_long_term_head;
+
+ /**
+ * Physical storage for dpbInfo for ref bufs
+ */
+ dpb_info_t as_dpb_info[MAX_DPB_BUFS];
+
+ /**
+ * Array of structures for bottom field.
+ */
+ pic_buf_t as_top_field_pics[MAX_DPB_BUFS];
+
+ /**
+ * Array of structures for bottom field.
+ */
+ pic_buf_t as_bottom_field_pics[MAX_DPB_BUFS];
+
+ /**
+ * Number of short-term reference buffers
+ */
+ UWORD8 u1_num_short_term_ref_bufs;
+
+ /**
+ * Number of long-term reference buffers
+ */
+ UWORD8 u1_num_long_term_ref_bufs;
+
+ /**
+ * buffer ID current frame
+ */
+ WORD32 i4_cur_frame_buf_id;
+
+} dpb_mgr_t;
+
+void ih264_dpb_mgr_init(dpb_mgr_t *ps_dpb_mgr);
+
+WORD32 ih264_dpb_mgr_insert_ref_frame(dpb_mgr_t *ps_dpb_mgr,
+ pic_buf_t *ps_pic_buf,
+ WORD32 reference_type,
+ UWORD32 frame_num,
+ WORD32 long_term_frame_idx);
+
+WORD32 ih264_dpb_mgr_delete_ref_frame(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 reference_type);
+
+WORD32 ih264_dpb_mgr_delete_all_ref_frames(dpb_mgr_t *ps_dpb_mgr);
+
+WORD32 ih264_dpb_mgr_count_ref_frames(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 curr_frame_num,
+ WORD32 max_frame_num);
+
+WORD32 ih264_dpb_mgr_delete_short_ref_frame(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 curr_frame_num,
+ WORD32 max_frame_num);
+
+WORD32 ih264_dpb_mgr_insert_ref_field(dpb_mgr_t *ps_dpb_mgr,
+ pic_buf_t *ps_pic_buf,
+ WORD32 reference_type,
+ UWORD32 frame_num,
+ WORD32 long_term_frame_idx);
+
+WORD32 ih264_dpb_mgr_delete_ref_field(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 reference_type);
+
+WORD32 ih264_dpb_mgr_alternate_ref_fields(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 reference_type,
+ WORD32 first_field_type);
+
+WORD32 ih264_dpb_mgr_sort_short_term_fields_by_frame_num(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 curr_frame_num,
+ WORD32 first_field_type,
+ WORD32 max_frame_num);
+
+WORD32 ih264_dpb_mgr_sort_short_term_fields_by_poc_l0(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 curr_poc,
+ WORD32 first_field_type);
+
+WORD32 ih264_dpb_mgr_sort_short_term_fields_by_poc_l1(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 curr_poc,
+ WORD32 first_field_type);
+
+WORD32 ih264_dpb_mgr_sort_long_term_fields_by_frame_idx(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 first_field_type);
+
+WORD32 ih264_dpb_mgr_delete_long_ref_fields_max_frame_idx(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 max_frame_idx);
+
+void ih264_dpb_mgr_del_ref(dpb_mgr_t *ps_dpb_mgr,
+ buf_mgr_t *ps_buf_mgr,
+ WORD32 u4_abs_poc);
+
+pic_buf_t *ih264_dpb_mgr_get_ref_by_nearest_poc(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 cur_abs_poc);
+
+pic_buf_t *ih264_dpb_mgr_get_ref_by_poc(dpb_mgr_t *ps_dpb_mgr, WORD32 abs_poc);
+
+pic_buf_t *ih264_dpb_mgr_get_ref_by_poc_lsb(dpb_mgr_t *ps_dpb_mgr,
+ WORD32 poc_lsb);
+
+void ih264_dpb_mgr_reset(dpb_mgr_t *ps_dpb_mgr, buf_mgr_t *ps_buf_mgr);
+
+void ih264_dpb_mgr_release_pics(buf_mgr_t *ps_buf_mgr, UWORD8 u1_disp_bufs);
+
+#endif /* _IH264_DPB_MGR_H_ */
diff --git a/common/ih264_error.h b/common/ih264_error.h
new file mode 100755
index 0000000..ff1662d
--- /dev/null
+++ b/common/ih264_error.h
@@ -0,0 +1,68 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_error.h
+*
+* @brief
+* Definitions related to error handling for common modules
+*
+* @author
+* Harish
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef _IH264_ERROR_H_
+#define _IH264_ERROR_H_
+
+/**
+ * Enumerations for error codes used in the codec.
+ * Not all these are expected to be returned to the application.
+ * Only select few will be exported
+ */
+typedef enum
+{
+ /**
+ * No error
+ */
+ IH264_SUCCESS = 0,
+ /**
+ * Start error code for decoder
+ */
+ IH264_DEC_ERROR_START = 0x100,
+
+ /**
+ * Start error code for encoder
+ */
+ IH264_ENC_ERROR_START = 0x200,
+ /**
+ * Generic failure
+ */
+ IH264_FAIL = 0x7FFFFFFF
+}IH264_ERROR_T;
+
+#endif /* _IH264_ERROR_H_ */
diff --git a/common/ih264_ihadamard_scaling.c b/common/ih264_ihadamard_scaling.c
new file mode 100755
index 0000000..e4729c8
--- /dev/null
+++ b/common/ih264_ihadamard_scaling.c
@@ -0,0 +1,216 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_ihadamard_scaling.c
+ *
+ * @brief
+ * Contains definition of functions for h264 inverse hadamard 4x4 transform and scaling
+ *
+ * @author
+ * Mohit
+ *
+ * @par List of Functions:
+ * - ih264_ihadamard_scaling_4x4()
+ *
+ * @remarks
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_defs.h"
+#include "ih264_trans_macros.h"
+#include "ih264_macros.h"
+#include "ih264_trans_data.h"
+#include "ih264_size_defs.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+
+/*
+ ********************************************************************************
+ *
+ * @brief This function performs a 4x4 inverse hadamard transform on the 4x4 DC coefficients
+ * of a 16x16 intra prediction macroblock, and then performs scaling.
+ * prediction buffer
+ *
+ * @par Description:
+ * The DC coefficients pass through a 2-stage inverse hadamard transform.
+ * This inverse transformed content is scaled to based on Qp value.
+ *
+ * @param[in] pi2_src
+ * input 4x4 block of DC coefficients
+ *
+ * @param[out] pi2_out
+ * output 4x4 block
+ *
+ * @param[in] pu2_iscal_mat
+ * pointer to scaling list
+ *
+ * @param[in] pu2_weigh_mat
+ * pointer to weight matrix
+ *
+ * @param[in] u4_qp_div_6
+ * Floor (qp/6)
+ *
+ * @param[in] pi4_tmp
+ * temporary buffer of size 1*16
+ *
+ * @returns none
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+void ih264_ihadamard_scaling_4x4(WORD16* pi2_src,
+ WORD16* pi2_out,
+ const UWORD16 *pu2_iscal_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 u4_qp_div_6,
+ WORD32* pi4_tmp)
+{
+ WORD32 i;
+ WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
+ WORD16* pi2_src_ptr, *pi2_out_ptr;
+ WORD32* pi4_tmp_ptr;
+ WORD32 rnd_fact = (u4_qp_div_6 < 6) ? (1 << (5 - u4_qp_div_6)) : 0;
+ pi4_tmp_ptr = pi4_tmp;
+ pi2_src_ptr = pi2_src;
+ pi2_out_ptr = pi2_out;
+ // Horizontal transform
+ for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+ {
+ x4 = pi2_src_ptr[0];
+ x5 = pi2_src_ptr[1];
+ x6 = pi2_src_ptr[2];
+ x7 = pi2_src_ptr[3];
+
+ x0 = x4 + x7;
+ x1 = x5 + x6;
+ x2 = x5 - x6;
+ x3 = x4 - x7;
+
+ pi4_tmp_ptr[0] = x0 + x1;
+ pi4_tmp_ptr[1] = x2 + x3;
+ pi4_tmp_ptr[2] = x0 - x1;
+ pi4_tmp_ptr[3] = x3 - x2;
+
+ pi4_tmp_ptr += SUB_BLK_WIDTH_4x4;
+ pi2_src_ptr += SUB_BLK_WIDTH_4x4;
+ }
+ pi4_tmp_ptr = pi4_tmp;
+ // Vertical Transform
+ for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+ {
+ x4 = pi4_tmp_ptr[0];
+ x5 = pi4_tmp_ptr[4];
+ x6 = pi4_tmp_ptr[8];
+ x7 = pi4_tmp_ptr[12];
+
+ x0 = x4 + x7;
+ x1 = x5 + x6;
+ x2 = x5 - x6;
+ x3 = x4 - x7;
+
+ pi4_tmp_ptr[0] = x0 + x1;
+ pi4_tmp_ptr[4] = x2 + x3;
+ pi4_tmp_ptr[8] = x0 - x1;
+ pi4_tmp_ptr[12] = x3 - x2;
+
+ pi4_tmp_ptr++;
+ }
+ pi4_tmp_ptr = pi4_tmp;
+ //Scaling
+ for(i = 0; i < (SUB_BLK_WIDTH_4x4 * SUB_BLK_WIDTH_4x4); i++)
+ {
+ INV_QUANT(pi4_tmp_ptr[i], pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6,
+ rnd_fact, 6);
+ pi2_out_ptr[i] = pi4_tmp_ptr[i];
+ }
+}
+
+void ih264_ihadamard_scaling_2x2_uv(WORD16* pi2_src,
+ WORD16* pi2_out,
+ const UWORD16 *pu2_iscal_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 u4_qp_div_6,
+ WORD32* pi4_tmp)
+{
+ WORD32 i4_x0,i4_x1,i4_x2,i4_x3,i4_x4,i4_x5,i4_x6,i4_x7;
+ WORD32 i4_y0,i4_y1,i4_y2,i4_y3,i4_y4,i4_y5,i4_y6,i4_y7;
+
+ UNUSED(pi4_tmp);
+
+ i4_x4 = pi2_src[0];
+ i4_x5 = pi2_src[1];
+ i4_x6 = pi2_src[2];
+ i4_x7 = pi2_src[3];
+
+ i4_x0 = i4_x4 + i4_x5;
+ i4_x1 = i4_x4 - i4_x5;
+ i4_x2 = i4_x6 + i4_x7;
+ i4_x3 = i4_x6 - i4_x7;
+
+ i4_x4 = i4_x0+i4_x2;
+ i4_x5 = i4_x1+i4_x3;
+ i4_x6 = i4_x0-i4_x2;
+ i4_x7 = i4_x1-i4_x3;
+
+ INV_QUANT(i4_x4,pu2_iscal_mat[0],pu2_weigh_mat[0],u4_qp_div_6,0,5);
+ INV_QUANT(i4_x5,pu2_iscal_mat[0],pu2_weigh_mat[0],u4_qp_div_6,0,5);
+ INV_QUANT(i4_x6,pu2_iscal_mat[0],pu2_weigh_mat[0],u4_qp_div_6,0,5);
+ INV_QUANT(i4_x7,pu2_iscal_mat[0],pu2_weigh_mat[0],u4_qp_div_6,0,5);
+
+ pi2_out[0] = i4_x4;
+ pi2_out[1] = i4_x5;
+ pi2_out[2] = i4_x6;
+ pi2_out[3] = i4_x7;
+
+ i4_y4 = pi2_src[4];
+ i4_y5 = pi2_src[5];
+ i4_y6 = pi2_src[6];
+ i4_y7 = pi2_src[7];
+
+ i4_y0 = i4_y4 + i4_y5;
+ i4_y1 = i4_y4 - i4_y5;
+ i4_y2 = i4_y6 + i4_y7;
+ i4_y3 = i4_y6 - i4_y7;
+
+ i4_y4 = i4_y0+i4_y2;
+ i4_y5 = i4_y1+i4_y3;
+ i4_y6 = i4_y0-i4_y2;
+ i4_y7 = i4_y1-i4_y3;
+
+ INV_QUANT(i4_y4,pu2_iscal_mat[0],pu2_weigh_mat[0],u4_qp_div_6,0,5);
+ INV_QUANT(i4_y5,pu2_iscal_mat[0],pu2_weigh_mat[0],u4_qp_div_6,0,5);
+ INV_QUANT(i4_y6,pu2_iscal_mat[0],pu2_weigh_mat[0],u4_qp_div_6,0,5);
+ INV_QUANT(i4_y7,pu2_iscal_mat[0],pu2_weigh_mat[0],u4_qp_div_6,0,5);
+
+ pi2_out[4] = i4_y4;
+ pi2_out[5] = i4_y5;
+ pi2_out[6] = i4_y6;
+ pi2_out[7] = i4_y7;
+}
diff --git a/common/ih264_inter_pred_filters.c b/common/ih264_inter_pred_filters.c
new file mode 100755
index 0000000..7d1e407
--- /dev/null
+++ b/common/ih264_inter_pred_filters.c
@@ -0,0 +1,1042 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_inter_pred_filters.c
+ *
+ * @brief
+ * Contains function definitions for inter prediction interpolation filters
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ * - ih264_inter_pred_luma_copy
+ * - ih264_interleave_copy
+ * - ih264_inter_pred_luma_horz
+ * - ih264_inter_pred_luma_vert
+ * - ih264_inter_pred_luma_horz_hpel_vert_hpel
+ * - ih264_inter_pred_luma_horz_qpel
+ * - ih264_inter_pred_luma_vert_qpel
+ * - ih264_inter_pred_luma_horz_qpel_vert_qpel
+ * - ih264_inter_pred_luma_horz_hpel_vert_qpel
+ * - ih264_inter_pred_luma_horz_qpel_vert_hpel
+ * - ih264_inter_pred_luma_bilinear
+ * - ih264_inter_pred_chroma
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_inter_pred_filters.h"
+
+
+/*****************************************************************************/
+/* Constant Data variables */
+/*****************************************************************************/
+
+/* coefficients for 6 tap filtering*/
+const WORD32 ih264_g_six_tap[3] ={1,-5,20};
+
+
+/*****************************************************************************/
+/* Function definitions . */
+/*****************************************************************************/
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Interprediction luma function for copy
+ *
+ * @par Description:
+ * Copies the array of width 'wd' and height 'ht' from the location pointed
+ * by 'src' to the location pointed by 'dst'
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ *
+ * @param[in] ht
+ * integer height of the array
+ *
+ * @param[in] wd
+ * integer width of the array
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+void ih264_inter_pred_luma_copy(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd,
+ UWORD8* pu1_tmp,
+ WORD32 dydx)
+{
+ WORD32 row, col;
+ UNUSED(pu1_tmp);
+ UNUSED(dydx);
+ for(row = 0; row < ht; row++)
+ {
+ for(col = 0; col < wd; col++)
+ {
+ pu1_dst[col] = pu1_src[col];
+ }
+
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+ }
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Fucntion for copying to an interleaved destination
+ *
+ * @par Description:
+ * Copies the array of width 'wd' and height 'ht' from the location pointed
+ * by 'src' to the location pointed by 'dst'
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ht
+ * integer height of the array
+ *
+ * @param[in] wd
+ * integer width of the array
+ *
+ * @returns
+ *
+ * @remarks
+ * The alternate elements of src will be copied to alternate locations in dsr
+ * Other locations are not touched
+ *
+ *******************************************************************************
+ */
+void ih264_interleave_copy(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd)
+{
+ WORD32 row, col;
+ wd *= 2;
+
+ for(row = 0; row < ht; row++)
+ {
+ for(col = 0; col < wd; col+=2)
+ {
+ pu1_dst[col] = pu1_src[col];
+ }
+
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+ }
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Interprediction luma filter for horizontal input
+ *
+ * @par Description:
+ * Applies a 6 tap horizontal filter .The output is clipped to 8 bits
+ * sec 8.4.2.2.1 titled "Luma sample interpolation process"
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ht
+ * integer height of the array
+ *
+ * @param[in] wd
+ * integer width of the array
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264_inter_pred_luma_horz(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd,
+ UWORD8* pu1_tmp,
+ WORD32 dydx)
+{
+ WORD32 row, col;
+ WORD16 i2_tmp;
+ UNUSED(pu1_tmp);
+ UNUSED(dydx);
+
+ for(row = 0; row < ht; row++)
+ {
+ for(col = 0; col < wd; col++)
+ {
+ i2_tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
+ i2_tmp = ih264_g_six_tap[0] *
+ (pu1_src[col - 2] + pu1_src[col + 3])
+ + ih264_g_six_tap[1] *
+ (pu1_src[col - 1] + pu1_src[col + 2])
+ + ih264_g_six_tap[2] *
+ (pu1_src[col] + pu1_src[col + 1]);
+ i2_tmp = (i2_tmp + 16) >> 5;
+ pu1_dst[col] = CLIP_U8(i2_tmp);
+ }
+
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+ }
+
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Interprediction luma filter for vertical input
+ *
+ * @par Description:
+ * Applies a 6 tap vertical filter.The output is clipped to 8 bits
+ * sec 8.4.2.2.1 titled "Luma sample interpolation process"
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ht
+ * integer height of the array
+ *
+ * @param[in] wd
+ * integer width of the array
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264_inter_pred_luma_vert(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd,
+ UWORD8* pu1_tmp,
+ WORD32 dydx)
+{
+ WORD32 row, col;
+ WORD16 i2_tmp;
+ UNUSED(pu1_tmp);
+ UNUSED(dydx);
+
+ for(row = 0; row < ht; row++)
+ {
+ for(col = 0; col < wd; col++)
+ {
+ i2_tmp = 0; /*ih264_g_six_tap[] is the array containing the filter coeffs*/
+ i2_tmp = ih264_g_six_tap[0] *
+ (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
+ + ih264_g_six_tap[1] *
+ (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
+ + ih264_g_six_tap[2] *
+ (pu1_src[col] + pu1_src[col + 1 * src_strd]);
+ i2_tmp = (i2_tmp + 16) >> 5;
+ pu1_dst[col] = CLIP_U8(i2_tmp);
+ }
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+ }
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264_inter_pred_luma_horz_hpel_vert_hpel \endif
+ *
+ * \brief
+ * This function implements a two stage cascaded six tap filter. It
+ * applies the six tap filter in the horizontal direction on the
+ * predictor values, followed by applying the same filter in the
+ * vertical direction on the output of the first stage. The six tap
+ * filtering operation is described in sec 8.4.2.2.1 titled "Luma sample
+ * interpolation process"
+ *
+ * \param pu1_src: Pointer to the buffer containing the predictor values.
+ * pu1_src could point to the frame buffer or the predictor buffer.
+ * \param pu1_dst: Pointer to the destination buffer where the output of
+ * the six tap filter is stored.
+ * \param ht: Height of the rectangular pixel grid to be interpolated
+ * \param wd: Width of the rectangular pixel grid to be interpolated
+ * \param src_strd: Width of the buffer pointed to by pu1_src.
+ * \param dst_strd: Width of the destination buffer
+ * \param pu1_tmp: temporary buffer.
+ * \param dydx: x and y reference offset for qpel calculations: UNUSED in this function.
+ *
+ * \return
+ * None.
+ *
+ * \note
+ * This function takes the 8 bit predictor values, applies the six tap
+ * filter in the horizontal direction and outputs the result clipped to
+ * 8 bit precision. The input is stored in the buffer pointed to by
+ * pu1_src while the output is stored in the buffer pointed by pu1_dst.
+ * Both pu1_src and pu1_dst could point to the same buffer i.e. the
+ * six tap filter could be done in place.
+ *
+ **************************************************************************
+ */
+void ih264_inter_pred_luma_horz_hpel_vert_hpel(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd,
+ UWORD8* pu1_tmp,
+ WORD32 dydx)
+{
+ WORD32 row, col;
+ WORD32 tmp;
+ WORD16* pi2_pred1_temp;
+ WORD16* pi2_pred1;
+ UNUSED(dydx);
+ pi2_pred1_temp = (WORD16*)pu1_tmp;
+ pi2_pred1_temp += 2;
+ pi2_pred1 = pi2_pred1_temp;
+ for(row = 0; row < ht; row++)
+ {
+ for(col = -2; col < wd + 3; col++)
+ {
+ tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
+ tmp = ih264_g_six_tap[0] *
+ (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
+ + ih264_g_six_tap[1] *
+ (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
+ + ih264_g_six_tap[2] *
+ (pu1_src[col] + pu1_src[col + 1 * src_strd]);
+ pi2_pred1_temp[col] = tmp;
+ }
+ pu1_src += src_strd;
+ pi2_pred1_temp = pi2_pred1_temp + wd + 5;
+ }
+
+ for(row = 0; row < ht; row++)
+ {
+ for(col = 0; col < wd; col++)
+ {
+ tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
+ tmp = ih264_g_six_tap[0] *
+ (pi2_pred1[col - 2] + pi2_pred1[col + 3])
+ + ih264_g_six_tap[1] *
+ (pi2_pred1[col - 1] + pi2_pred1[col + 2])
+ + ih264_g_six_tap[2] * (pi2_pred1[col] + pi2_pred1[col + 1]);
+ tmp = (tmp + 512) >> 10;
+ pu1_dst[col] = CLIP_U8(tmp);
+ }
+ pi2_pred1 += (wd + 5);
+ pu1_dst += dst_strd;
+ }
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264_inter_pred_luma_horz_qpel \endif
+ *
+ * \brief
+ * This routine applies the six tap filter to the predictors in the
+ * horizontal direction. The six tap filtering operation is described in
+ * sec 8.4.2.2.1 titled "Luma sample interpolation process"
+ *
+ * \param pu1_src: Pointer to the buffer containing the predictor values.
+ * pu1_src could point to the frame buffer or the predictor buffer.
+ * \param pu1_dst: Pointer to the destination buffer where the output of
+ * the six tap filter is stored.
+ * \param ht: Height of the rectangular pixel grid to be interpolated
+ * \param wd: Width of the rectangular pixel grid to be interpolated
+ * \param src_strd: Width of the buffer pointed to by pu1_src.
+ * \param dst_strd: Width of the destination buffer
+ * \param pu1_tmp: temporary buffer: UNUSED in this function
+ * \param dydx: x and y reference offset for qpel calculations.
+ *
+ * \return
+ * None.
+ *
+ * \note
+ * This function takes the 8 bit predictor values, applies the six tap
+ * filter in the horizontal direction and outputs the result clipped to
+ * 8 bit precision. The input is stored in the buffer pointed to by
+ * pu1_src while the output is stored in the buffer pointed by pu1_dst.
+ * Both pu1_src and pu1_dst could point to the same buffer i.e. the
+ * six tap filter could be done in place.
+ *
+ **************************************************************************
+ */
+void ih264_inter_pred_luma_horz_qpel(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd,
+ UWORD8* pu1_tmp,
+ WORD32 dydx)
+{
+ WORD32 row, col;
+ UWORD8 *pu1_pred1;
+ WORD32 x_offset = dydx & 0x3;
+ UNUSED(pu1_tmp);
+ pu1_pred1 = pu1_src + (x_offset >> 1);
+
+ for(row = 0; row < ht; row++)
+ {
+ for(col = 0; col < wd; col++, pu1_src++, pu1_dst++)
+ {
+ WORD16 i2_temp;
+ /* The logic below implements the following equation
+ i2_temp = puc_pred[-2] - 5 * (puc_pred[-1] + puc_pred[2]) +
+ 20 * (puc_pred[0] + puc_pred[1]) + puc_pred[3]; */
+ i2_temp = pu1_src[-2] + pu1_src[3]
+ - (pu1_src[-1] + pu1_src[2])
+ + ((pu1_src[0] + pu1_src[1] - pu1_src[-1] - pu1_src[2]) << 2)
+ + ((pu1_src[0] + pu1_src[1]) << 4);
+ i2_temp = (i2_temp + 16) >> 5;
+ i2_temp = CLIP_U8(i2_temp);
+ *pu1_dst = (i2_temp + *pu1_pred1 + 1) >> 1;
+
+ pu1_pred1++;
+ }
+ pu1_dst += dst_strd - wd;
+ pu1_src += src_strd - wd;
+ pu1_pred1 += src_strd - wd;
+ }
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264_inter_pred_luma_vert_qpel \endif
+ *
+ * \brief
+ * This routine applies the six tap filter to the predictors in the
+ * vertical direction and interpolates them to obtain pixels at quarter vertical
+ * positions (0, 1/4) and (0, 3/4). The six tap filtering operation is
+ * described in sec 8.4.2.2.1 titled "Luma sample interpolation process"
+ *
+ * \param pu1_src: Pointer to the buffer containing the predictor values.
+ * pu1_src could point to the frame buffer or the predictor buffer.
+ * \param pu1_dst: Pointer to the destination buffer where the output of
+ * the six tap filter is stored.
+ * \param ht: Height of the rectangular pixel grid to be interpolated
+ * \param wd: Width of the rectangular pixel grid to be interpolated
+ * \param src_strd: Width of the buffer pointed to by puc_pred.
+ * \param dst_strd: Width of the destination buffer
+ * \param pu1_tmp: temporary buffer: UNUSED in this function
+ * \param dydx: x and y reference offset for qpel calculations.
+ *
+ * \return
+ * void
+ *
+ * \note
+ * This function takes the 8 bit predictor values, applies the six tap
+ * filter in the vertical direction and outputs the result clipped to
+ * 8 bit precision. The input is stored in the buffer pointed to by
+ * puc_pred while the output is stored in the buffer pointed by puc_dest.
+ * Both puc_pred and puc_dest could point to the same buffer i.e. the
+ * six tap filter could be done in place.
+ *
+ * \para <title>
+ * <paragraph>
+ * ...
+ **************************************************************************
+ */
+void ih264_inter_pred_luma_vert_qpel(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd,
+ UWORD8* pu1_tmp,
+ WORD32 dydx)
+{
+ WORD32 row, col;
+ WORD32 y_offset = dydx >> 2;
+ WORD32 off1, off2, off3;
+ UWORD8 *pu1_pred1;
+ UNUSED(pu1_tmp);
+ y_offset = y_offset & 0x3;
+
+ off1 = src_strd;
+ off2 = src_strd << 1;
+ off3 = off1 + off2;
+
+ pu1_pred1 = pu1_src + (y_offset >> 1) * src_strd;
+
+ for(row = 0; row < ht; row++)
+ {
+ for(col = 0; col < wd; col++, pu1_dst++, pu1_src++, pu1_pred1++)
+ {
+ WORD16 i2_temp;
+ /* The logic below implements the following equation
+ i16_temp = puc_pred[-2*src_strd] + puc_pred[3*src_strd] -
+ 5 * (puc_pred[-1*src_strd] + puc_pred[2*src_strd]) +
+ 20 * (puc_pred[0] + puc_pred[src_strd]); */
+ i2_temp = pu1_src[-off2] + pu1_src[off3]
+ - (pu1_src[-off1] + pu1_src[off2])
+ + ((pu1_src[0] + pu1_src[off1] - pu1_src[-off1] - pu1_src[off2]) << 2)
+ + ((pu1_src[0] + pu1_src[off1]) << 4);
+ i2_temp = (i2_temp + 16) >> 5;
+ i2_temp = CLIP_U8(i2_temp);
+
+ *pu1_dst = (i2_temp + *pu1_pred1 + 1) >> 1;
+ }
+ pu1_src += src_strd - wd;
+ pu1_pred1 += src_strd - wd;
+ pu1_dst += dst_strd - wd;
+ }
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264_inter_pred_luma_horz_qpel_vert_qpel \endif
+ *
+ * \brief
+ * This routine applies the six tap filter to the predictors in the
+ * vertical and horizontal direction and averages them to get pixels at locations
+ * (1/4,1/4), (1/4, 3/4), (3/4, 1/4) & (3/4, 3/4). The six tap filtering operation
+ * is described in sec 8.4.2.2.1 titled "Luma sample interpolation process"
+ *
+ * \param pu1_src: Pointer to the buffer containing the predictor values.
+ * pu1_src could point to the frame buffer or the predictor buffer.
+ * \param pu1_dst: Pointer to the destination buffer where the output of
+ * the six tap filter is stored.
+ * \param wd: Width of the rectangular pixel grid to be interpolated
+ * \param ht: Height of the rectangular pixel grid to be interpolated
+ * \param src_strd: Width of the buffer pointed to by puc_pred.
+ * \param dst_strd: Width of the destination buffer
+ * \param pu1_tmp: temporary buffer, UNUSED in this function
+ * \param dydx: x and y reference offset for qpel calculations.
+ *
+ * \return
+ * void
+ *
+ * \note
+ * This function takes the 8 bit predictor values, applies the six tap
+ * filter in the vertical direction and outputs the result clipped to
+ * 8 bit precision. The input is stored in the buffer pointed to by
+ * puc_pred while the output is stored in the buffer pointed by puc_dest.
+ * Both puc_pred and puc_dest could point to the same buffer i.e. the
+ * six tap filter could be done in place.
+ *
+ * \para <title>
+ * <paragraph>
+ * ...
+ **************************************************************************
+ */
+void ih264_inter_pred_luma_horz_qpel_vert_qpel(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd,
+ UWORD8* pu1_tmp,
+ WORD32 dydx)
+{
+ WORD32 row, col;
+ WORD32 x_offset = dydx & 0x3;
+ WORD32 y_offset = dydx >> 2;
+
+ WORD32 off1, off2, off3;
+ UWORD8* pu1_pred_vert, *pu1_pred_horz;
+ UNUSED(pu1_tmp);
+ y_offset = y_offset & 0x3;
+
+ off1 = src_strd;
+ off2 = src_strd << 1;
+ off3 = off1 + off2;
+
+ pu1_pred_horz = pu1_src + (y_offset >> 1) * src_strd;
+ pu1_pred_vert = pu1_src + (x_offset >> 1);
+
+ for(row = 0; row < ht; row++)
+ {
+ for(col = 0; col < wd;
+ col++, pu1_dst++, pu1_pred_vert++, pu1_pred_horz++)
+ {
+ WORD16 i2_temp_vert, i2_temp_horz;
+ /* The logic below implements the following equation
+ i2_temp = puc_pred[-2*src_strd] + puc_pred[3*src_strd] -
+ 5 * (puc_pred[-1*src_strd] + puc_pred[2*src_strd]) +
+ 20 * (puc_pred[0] + puc_pred[src_strd]); */
+ i2_temp_vert = pu1_pred_vert[-off2] + pu1_pred_vert[off3]
+ - (pu1_pred_vert[-off1] + pu1_pred_vert[off2])
+ + ((pu1_pred_vert[0] + pu1_pred_vert[off1]
+ - pu1_pred_vert[-off1]
+ - pu1_pred_vert[off2]) << 2)
+ + ((pu1_pred_vert[0] + pu1_pred_vert[off1]) << 4);
+ i2_temp_vert = (i2_temp_vert + 16) >> 5;
+ i2_temp_vert = CLIP_U8(i2_temp_vert);
+
+ /* The logic below implements the following equation
+ i16_temp = puc_pred[-2] - 5 * (puc_pred[-1] + puc_pred[2]) +
+ 20 * (puc_pred[0] + puc_pred[1]) + puc_pred[3]; */
+ i2_temp_horz = pu1_pred_horz[-2] + pu1_pred_horz[3]
+ - (pu1_pred_horz[-1] + pu1_pred_horz[2])
+ + ((pu1_pred_horz[0] + pu1_pred_horz[1]
+ - pu1_pred_horz[-1]
+ - pu1_pred_horz[2]) << 2)
+ + ((pu1_pred_horz[0] + pu1_pred_horz[1]) << 4);
+ i2_temp_horz = (i2_temp_horz + 16) >> 5;
+ i2_temp_horz = CLIP_U8(i2_temp_horz);
+ *pu1_dst = (i2_temp_vert + i2_temp_horz + 1) >> 1;
+ }
+ pu1_pred_vert += (src_strd - wd);
+ pu1_pred_horz += (src_strd - wd);
+ pu1_dst += (dst_strd - wd);
+ }
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264_inter_pred_luma_horz_qpel_vert_hpel \endif
+ *
+ * \brief
+ * This routine applies the six tap filter to the predictors in the vertical
+ * and horizontal direction to obtain the pixel at (1/2,1/2). It then interpolates
+ * pixel at (0,1/2) and (1/2,1/2) to obtain pixel at (1/4,1/2). Similarly for (3/4,1/2).
+ * The six tap filtering operation is described in sec 8.4.2.2.1 titled
+ * "Luma sample interpolation process"
+ *
+ * \param pu1_src: Pointer to the buffer containing the predictor values.
+ * pu1_src could point to the frame buffer or the predictor buffer.
+ * \param pu1_dst: Pointer to the destination buffer where the output of
+ * the six tap filter followed by interpolation is stored.
+ * \param wd: Width of the rectangular pixel grid to be interpolated
+ * \param ht: Height of the rectangular pixel grid to be interpolated
+ * \param src_strd: Width of the buffer pointed to by puc_pred.
+ * \param dst_strd: Width of the destination buffer
+ * \param pu1_tmp: buffer to store temporary output after 1st 6-tap filter.
+ * \param dydx: x and y reference offset for qpel calculations.
+ *
+ * \return
+ * void
+ *
+ * \note
+ * This function takes the 8 bit predictor values, applies the six tap
+ * filter in the vertical direction and outputs the result clipped to
+ * 8 bit precision. The input is stored in the buffer pointed to by
+ * puc_pred while the output is stored in the buffer pointed by puc_dest.
+ * Both puc_pred and puc_dest could point to the same buffer i.e. the
+ * six tap filter could be done in place.
+ *
+ * \para <title>
+ * <paragraph>
+ * ...
+ **************************************************************************
+ */
+void ih264_inter_pred_luma_horz_qpel_vert_hpel(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd,
+ UWORD8* pu1_tmp,
+ WORD32 dydx)
+{
+ WORD32 row, col;
+ WORD32 tmp;
+ WORD16* pi2_pred1_temp, *pi2_pred1;
+ UWORD8* pu1_dst_tmp;
+ WORD32 x_offset = dydx & 0x3;
+ WORD16 i2_macro;
+
+ pi2_pred1_temp = (WORD16*)pu1_tmp;
+ pi2_pred1_temp += 2;
+ pi2_pred1 = pi2_pred1_temp;
+ pu1_dst_tmp = pu1_dst;
+
+ for(row = 0; row < ht; row++)
+ {
+ for(col = -2; col < wd + 3; col++)
+ {
+ tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
+ tmp = ih264_g_six_tap[0] *
+ (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
+ + ih264_g_six_tap[1] *
+ (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
+ + ih264_g_six_tap[2] *
+ (pu1_src[col] + pu1_src[col + 1 * src_strd]);
+ pi2_pred1_temp[col] = tmp;
+ }
+
+ pu1_src += src_strd;
+ pi2_pred1_temp = pi2_pred1_temp + wd + 5;
+ }
+
+ pi2_pred1_temp = pi2_pred1;
+ for(row = 0; row < ht; row++)
+ {
+ for(col = 0; col < wd; col++)
+ {
+ tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
+ tmp = ih264_g_six_tap[0] *
+ (pi2_pred1[col - 2] + pi2_pred1[col + 3])
+ + ih264_g_six_tap[1] *
+ (pi2_pred1[col - 1] + pi2_pred1[col + 2])
+ + ih264_g_six_tap[2] *
+ (pi2_pred1[col] + pi2_pred1[col + 1]);
+ tmp = (tmp + 512) >> 10;
+ pu1_dst[col] = CLIP_U8(tmp);
+ }
+ pi2_pred1 += (wd + 5);
+ pu1_dst += dst_strd;
+ }
+
+ pu1_dst = pu1_dst_tmp;
+ pi2_pred1_temp += (x_offset >> 1);
+ for(row = ht; row != 0; row--)
+ {
+ for(col = wd; col != 0; col--, pu1_dst++, pi2_pred1_temp++)
+ {
+ UWORD8 uc_temp;
+ /* Clipping the output of the six tap filter obtained from the
+ first stage of the 2d filter stage */
+ *pi2_pred1_temp = (*pi2_pred1_temp + 16) >> 5;
+ i2_macro = (*pi2_pred1_temp);
+ uc_temp = CLIP_U8(i2_macro);
+ *pu1_dst = (*pu1_dst + uc_temp + 1) >> 1;
+ }
+ pi2_pred1_temp += 5;
+ pu1_dst += dst_strd - wd;
+ }
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264_inter_pred_luma_horz_hpel_vert_qpel \endif
+ *
+ * \brief
+ * This routine applies the six tap filter to the predictors in the horizontal
+ * and vertical direction to obtain the pixel at (1/2,1/2). It then interpolates
+ * pixel at (1/2,0) and (1/2,1/2) to obtain pixel at (1/2,1/4). Similarly for (1/2,3/4).
+ * The six tap filtering operation is described in sec 8.4.2.2.1 titled
+ * "Luma sample interpolation process"
+ *
+ * \param pu1_src: Pointer to the buffer containing the predictor values.
+ * pu1_src could point to the frame buffer or the predictor buffer.
+ * \param pu1_dst: Pointer to the destination buffer where the output of
+ * the six tap filter followed by interpolation is stored.
+ * \param wd: Width of the rectangular pixel grid to be interpolated
+ * \param ht: Height of the rectangular pixel grid to be interpolated
+ * \param src_strd: Width of the buffer pointed to by puc_pred.
+ * \param dst_strd: Width of the destination buffer
+ * \param pu1_tmp: buffer to store temporary output after 1st 6-tap filter.
+ * \param dydx: x and y reference offset for qpel calculations.
+ *
+ * \return
+ * void
+ *
+ * \note
+ * This function takes the 8 bit predictor values, applies the six tap
+ * filter in the vertical direction and outputs the result clipped to
+ * 8 bit precision. The input is stored in the buffer pointed to by
+ * puc_pred while the output is stored in the buffer pointed by puc_dest.
+ * Both puc_pred and puc_dest could point to the same buffer i.e. the
+ * six tap filter could be done in place.
+ *
+ * \para <title>
+ * <paragraph>
+ * ...
+ **************************************************************************
+ */
+void ih264_inter_pred_luma_horz_hpel_vert_qpel(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd,
+ UWORD8* pu1_tmp,
+ WORD32 dydx)
+{
+
+ WORD32 row, col;
+ WORD32 tmp;
+ WORD32 y_offset = dydx >> 2;
+ WORD16* pi2_pred1_temp, *pi2_pred1;
+ UWORD8* pu1_dst_tmp;
+ //WORD32 x_offset = dydx & 0x3;
+ WORD16 i2_macro;
+
+ y_offset = y_offset & 0x3;
+
+ pi2_pred1_temp = (WORD16*)pu1_tmp;
+ pi2_pred1_temp += 2 * wd;
+ pi2_pred1 = pi2_pred1_temp;
+ pu1_dst_tmp = pu1_dst;
+ pu1_src -= 2 * src_strd;
+ for(row = -2; row < ht + 3; row++)
+ {
+ for(col = 0; col < wd; col++)
+ {
+ tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
+ tmp = ih264_g_six_tap[0] * (pu1_src[col - 2] + pu1_src[col + 3])
+ + ih264_g_six_tap[1] * (pu1_src[col - 1] + pu1_src[col + 2])
+ + ih264_g_six_tap[2] * (pu1_src[col] + pu1_src[col + 1]);
+ pi2_pred1_temp[col - 2 * wd] = tmp;
+ }
+
+ pu1_src += src_strd;
+ pi2_pred1_temp += wd;
+ }
+ pi2_pred1_temp = pi2_pred1;
+ for(row = 0; row < ht; row++)
+ {
+ for(col = 0; col < wd; col++)
+ {
+ tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
+ tmp = ih264_g_six_tap[0] * (pi2_pred1[col - 2 * wd] + pi2_pred1[col + 3 * wd])
+ + ih264_g_six_tap[1] * (pi2_pred1[col - 1 * wd] + pi2_pred1[col + 2 * wd])
+ + ih264_g_six_tap[2] * (pi2_pred1[col] + pi2_pred1[col + 1 * wd]);
+ tmp = (tmp + 512) >> 10;
+ pu1_dst[col] = CLIP_U8(tmp);
+ }
+ pi2_pred1 += wd;
+ pu1_dst += dst_strd;
+ }
+ pu1_dst = pu1_dst_tmp;
+ pi2_pred1_temp += (y_offset >> 1) * wd;
+ for(row = ht; row != 0; row--)
+
+ {
+ for(col = wd; col != 0; col--, pu1_dst++, pi2_pred1_temp++)
+ {
+ UWORD8 u1_temp;
+ /* Clipping the output of the six tap filter obtained from the
+ first stage of the 2d filter stage */
+ *pi2_pred1_temp = (*pi2_pred1_temp + 16) >> 5;
+ i2_macro = (*pi2_pred1_temp);
+ u1_temp = CLIP_U8(i2_macro);
+ *pu1_dst = (*pu1_dst + u1_temp + 1) >> 1;
+ }
+ //pi16_pred1_temp += wd;
+ pu1_dst += dst_strd - wd;
+ }
+}
+
+/**
+ *******************************************************************************
+ * function:ih264_inter_pred_luma_bilinear
+ *
+ * @brief
+ * This routine applies the bilinear filter to the predictors .
+ * The filtering operation is described in
+ * sec 8.4.2.2.1 titled "Luma sample interpolation process"
+ *
+ * @par Description:
+\note
+ * This function is called to obtain pixels lying at the following
+ * locations (1/4,1), (3/4,1),(1,1/4), (1,3/4) ,(1/4,1/2), (3/4,1/2),(1/2,1/4), (1/2,3/4),(3/4,1/4),(1/4,3/4),(3/4,3/4)&& (1/4,1/4) .
+ * The function averages the two adjacent values from the two input arrays in horizontal direction.
+ *
+ *
+ * @param[in] pu1_src1:
+ * UWORD8 Pointer to the buffer containing the first input array.
+ *
+ * @param[in] pu1_src2:
+ * UWORD8 Pointer to the buffer containing the second input array.
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination where the output of bilinear filter is stored.
+ *
+ * @param[in] src_strd1
+ * Stride of the first input buffer
+ *
+ * @param[in] src_strd2
+ * Stride of the second input buffer
+ *
+ * @param[in] dst_strd
+ * integer destination stride of pu1_dst
+ *
+ * @param[in] ht
+ * integer height of the array
+ *
+ * @param[in] wd
+ * integer width of the array
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264_inter_pred_luma_bilinear(UWORD8 *pu1_src1,
+ UWORD8 *pu1_src2,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd1,
+ WORD32 src_strd2,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd)
+{
+ WORD32 row, col;
+ WORD16 i2_tmp;
+
+ for(row = 0; row < ht; row++)
+ {
+ for(col = 0; col < wd; col++)
+ {
+ i2_tmp = pu1_src1[col] + pu1_src2[col];
+ i2_tmp = (i2_tmp + 1) >> 1;
+ pu1_dst[col] = CLIP_U8(i2_tmp);
+ }
+ pu1_src1 += src_strd1;
+ pu1_src2 += src_strd2;
+ pu1_dst += dst_strd;
+ }
+
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Interprediction chroma filter
+ *
+ * @par Description:
+ * Applies filtering to chroma samples as mentioned in
+ * sec 8.4.2.2.2 titled "chroma sample interpolation process"
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source containing alternate U and V samples
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] u1_dx
+ * dx value where the sample is to be produced(refer sec 8.4.2.2.2 )
+ *
+ * @param[in] u1_dy
+ * dy value where the sample is to be produced(refer sec 8.4.2.2.2 )
+ *
+ * @param[in] ht
+ * integer height of the array
+ *
+ * @param[in] wd
+ * integer width of the array
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264_inter_pred_chroma(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 dx,
+ WORD32 dy,
+ WORD32 ht,
+ WORD32 wd)
+{
+ WORD32 row, col;
+ WORD16 i2_tmp;
+
+ for(row = 0; row < ht; row++)
+ {
+ for(col = 0; col < 2 * wd; col++)
+ {
+ i2_tmp = 0; /* applies equation (8-266) in section 8.4.2.2.2 */
+ i2_tmp = (8 - dx) * (8 - dy) * pu1_src[col]
+ + (dx) * (8 - dy) * pu1_src[col + 2]
+ + (8 - dx) * (dy) * (pu1_src + src_strd)[col]
+ + (dx) * (dy) * (pu1_src + src_strd)[col + 2];
+ i2_tmp = (i2_tmp + 32) >> 6;
+ pu1_dst[col] = CLIP_U8(i2_tmp);
+ }
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+ }
+}
diff --git a/common/ih264_inter_pred_filters.h b/common/ih264_inter_pred_filters.h
new file mode 100755
index 0000000..c439ab8
--- /dev/null
+++ b/common/ih264_inter_pred_filters.h
@@ -0,0 +1,241 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ * ih264_inter_pred_filters.h
+ *
+ * @brief
+ * Declarations of functions used for inter prediction
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ * -ih264_inter_pred_luma_copy
+ * -ih264_interleave_copy
+ * -ih264_inter_pred_luma_horz
+ * -ih264_inter_pred_luma_vert
+ * -ih264_inter_pred_luma_horz_hpel_vert_hpel
+ * -ih264_inter_pred_luma_vert_qpel
+ * -ih264_inter_pred_luma_horz_qpel
+ * -ih264_inter_pred_luma_horz_qpel_vert_qpel
+ * -ih264_inter_pred_luma_horz_qpel_vert_hpel
+ * -ih264_inter_pred_luma_horz_hpel_vert_qpel
+ * -ih264_inter_pred_luma_bilinear
+ * -ih264_inter_pred_chroma
+ * -ih264_inter_pred_luma_copy_a9q
+ * -ih264_interleave_copy_a9
+ * -ih264_inter_pred_luma_horz_a9q
+ * -ih264_inter_pred_luma_vert_a9q
+ * -ih264_inter_pred_luma_bilinear_a9q
+ * -ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q
+ * -ih264_inter_pred_luma_horz_qpel_a9q
+ * -ih264_inter_pred_luma_vert_qpel_a9q
+ * -ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q
+ * -ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q
+ * -ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q
+ * -ih264_inter_pred_chroma_a9q
+ * -ih264_inter_pred_luma_copy_av8
+ * -ih264_interleave_copy_av8
+ * -ih264_inter_pred_luma_horz_av8
+ * -ih264_inter_pred_luma_vert_av8
+ * -ih264_inter_pred_luma_bilinear_av8
+ * -ih264_inter_pred_luma_horz_hpel_vert_hpel_av8
+ * -ih264_inter_pred_luma_horz_qpel_av8
+ * -ih264_inter_pred_luma_vert_qpel_av8
+ * -ih264_inter_pred_luma_horz_qpel_vert_qpel_av8
+ * -ih264_inter_pred_luma_horz_qpel_vert_hpel_av8
+ * -ih264_inter_pred_luma_horz_hpel_vert_qpel_av8
+ * -ih264_inter_pred_chroma_av8
+ * -ih264_inter_pred_chroma_dx_zero_av8
+ * -ih264_inter_pred_chroma_dy_zero_av8
+ * -ih264_inter_pred_luma_copy_ssse3
+ * -ih264_inter_pred_luma_copy_ssse3
+ * -ih264_inter_pred_luma_horz_ssse3
+ * -ih264_inter_pred_luma_vert_ssse3
+ * -ih264_inter_pred_luma_bilinear_ssse3
+ * -ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3
+ * -ih264_inter_pred_luma_horz_qpel_ssse3
+ * -ih264_inter_pred_luma_vert_qpel_ssse3
+ * -ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3
+ * -ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3
+ * -ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3
+ * -ih264_inter_pred_chroma_ssse3
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+#ifndef _IH264_INTER_PRED_H_
+#define _IH264_INTER_PRED_H_
+
+/*****************************************************************************/
+/* Constant Data variables */
+/*****************************************************************************/
+
+extern const WORD32 ih264_g_six_tap[3];/* coefficients for 6 tap filtering*/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+typedef void ih264_inter_pred_luma_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd,
+ UWORD8* pu1_tmp,
+ WORD32 dydx);
+
+typedef void ih264_interleave_copy_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd);
+
+typedef void ih264_inter_pred_luma_bilinear_ft(UWORD8 *pu1_src1,
+ UWORD8 *pu1_src2,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd1,
+ WORD32 src_strd2,
+ WORD32 dst_strd,
+ WORD32 height,
+ WORD32 width);
+
+typedef void ih264_inter_pred_chroma_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 dx,
+ WORD32 dy,
+ WORD32 ht,
+ WORD32 wd);
+
+/* No NEON Declarations */
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_copy;
+
+ih264_interleave_copy_ft ih264_interleave_copy;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_hpel;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_qpel;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_qpel;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_hpel;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_qpel;
+
+ih264_inter_pred_luma_bilinear_ft ih264_inter_pred_luma_bilinear;
+
+ih264_inter_pred_chroma_ft ih264_inter_pred_chroma;
+
+/* A9 NEON Declarations */
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_copy_a9q;
+
+ih264_interleave_copy_ft ih264_interleave_copy_a9;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_a9q;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_a9q;
+
+ih264_inter_pred_luma_bilinear_ft ih264_inter_pred_luma_bilinear_a9q;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_a9q;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_qpel_a9q;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q;
+
+ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_a9q;
+
+/* AV8 NEON Declarations */
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_copy_av8;
+
+ih264_interleave_copy_ft ih264_interleave_copy_av8;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_av8;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_av8;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_hpel_av8;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_av8;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_qpel_av8;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_qpel_av8;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_hpel_av8;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_qpel_av8;
+
+ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_av8;
+
+ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_dx_zero_av8;
+
+ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_dy_zero_av8;
+
+
+/* SSSE3 Intrinsic Declarations */
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_copy_ssse3;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_ssse3;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_ssse3;
+
+ih264_inter_pred_luma_bilinear_ft ih264_inter_pred_luma_bilinear_ssse3;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_ssse3;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_qpel_ssse3;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3;
+
+ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3;
+
+ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_ssse3;
+
+#endif
+
+/** Nothing past this point */
diff --git a/common/ih264_intra_pred_filters.h b/common/ih264_intra_pred_filters.h
new file mode 100755
index 0000000..caf6b33
--- /dev/null
+++ b/common/ih264_intra_pred_filters.h
@@ -0,0 +1,331 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_intra_pred_filters.h
+ *
+ * @brief
+ * Declarations of functions used for intra prediction
+ *
+ * @author
+ * Ittiam
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+#ifndef IH264_INTRA_PRED_FILTERS_H_
+
+#define IH264_INTRA_PRED_FILTERS_H_
+
+/*****************************************************************************/
+/* Macro Expansion */
+/*****************************************************************************/
+
+/*! Filter (1,2,1) i.e (a + 2b + c) / 4 */
+#define FILT121(a,b,c) ((a + (b<<1) + c + 2)>>2)
+/*! Filter (1,1) i.e (a + b) / 2 */
+#define FILT11(a,b) ((a + b + 1)>>1)
+/*****************************************************************************/
+/* Global Variables */
+/*****************************************************************************/
+
+/* Global variables used only in assembly files*/
+extern const WORD8 ih264_gai1_intrapred_luma_plane_coeffs[];
+extern const WORD8 ih264_gai1_intrapred_chroma_plane_coeffs1[];
+extern const WORD8 ih264_gai1_intrapred_chroma_plane_coeffs2[];
+extern const WORD8 ih264_gai1_intrapred_luma_8x8_horz_u[];
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+
+typedef void ih264_intra_pred_ref_filtering_ft(UWORD8 *pu1_left,
+ UWORD8 *pu1_topleft,
+ UWORD8 *pu1_top,
+ UWORD8 *pu1_dst,
+ WORD32 left_strd,
+ WORD32 ngbr_avail);
+
+typedef void ih264_intra_pred_luma_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail);
+
+/* No Neon Definitions */
+
+/* Luma 4x4 Intra pred filters */
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_dc;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dl;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dr;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_r;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_d;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_l;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_u;
+
+/* Luma 8x8 Intra pred filters */
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_dc;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dl;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dr;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_r;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_d;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_l;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_u;
+
+/* Luma 16x16 Intra pred filters */
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_vert;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_horz;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_dc;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_plane;
+
+/* Chroma 8x8 Intra pred filters */
+
+typedef ih264_intra_pred_luma_ft ih264_intra_pred_chroma_ft;
+
+ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_dc;
+
+ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_horz;
+
+ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_vert;
+
+ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_plane;
+
+
+ih264_intra_pred_ref_filtering_ft ih264_intra_pred_luma_8x8_mode_ref_filtering;
+
+/* A9 Definition */
+
+/* Luma 4x4 Intra pred filters */
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_a9q;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_a9q;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_dc_a9q;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dl_a9q;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dr_a9q;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_r_a9q;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_d_a9q;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_l_a9q;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_u_a9q;
+
+/* Luma 8x8 Intra pred filters */
+
+ih264_intra_pred_ref_filtering_ft ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_a9q;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_a9q;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_dc_a9q;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dl_a9q;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dr_a9q;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_r_a9q;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_d_a9q;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_l_a9q;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_u_a9q;
+
+/* Luma 16x16 Intra pred filters */
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_vert_a9q;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_horz_a9q;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_dc_a9q;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_plane_a9q;
+
+/* Chroma 8x8 Intra pred filters */
+
+ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_dc_a9q;
+
+ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_horz_a9q;
+
+ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_vert_a9q;
+
+ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_plane_a9q;
+
+/* X86 Intrinsic Definitions */
+
+/* Luma 4x4 Intra pred filters */
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_ssse3;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_ssse3;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_dc_ssse3;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_r_ssse3;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_d_ssse3;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_l_ssse3;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_u_ssse3;
+
+/* Luma 8x8 Intra pred filters */
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_ssse3;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_ssse3;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_dc_ssse3;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_r_ssse3;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_d_ssse3;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_l_ssse3;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_u_ssse3;
+
+/* Luma 16x16 Intra pred filters */
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_vert_ssse3;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_horz_ssse3;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_dc_ssse3;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_plane_ssse3;
+
+/* Chroma 8x8 Intra pred filters */
+
+ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_dc_ssse3;
+
+ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_horz_ssse3;
+
+ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_vert_ssse3;
+
+ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_plane_ssse3;
+
+/* AV8 Definition */
+
+/* Luma 4x4 Intra pred filters */
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_av8;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_av8;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_dc_av8;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dl_av8;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dr_av8;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_r_av8;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_d_av8;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_l_av8;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_u_av8;
+
+/* Luma 8x8 Intra pred filters */
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_av8;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_av8;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_dc_av8;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dl_av8;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dr_av8;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_r_av8;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_d_av8;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_l_av8;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_u_av8;
+
+/* Luma 16x16 Intra pred filters */
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_vert_av8;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_horz_av8;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_dc_av8;
+
+ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_plane_av8;
+
+/* Chroma 8x8 Intra pred filters */
+
+ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_dc_av8;
+
+ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_horz_av8;
+
+ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_vert_av8;
+
+ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_plane_av8;
+
+#endif /* IH264_INTRA_PRED_FILTERS_H_ */
diff --git a/common/ih264_iquant_itrans_recon.c b/common/ih264_iquant_itrans_recon.c
new file mode 100755
index 0000000..3c14046
--- /dev/null
+++ b/common/ih264_iquant_itrans_recon.c
@@ -0,0 +1,873 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_iquant_itrans_recon.c
+ *
+ * @brief
+ * Contains definition of functions for h264 inverse quantization inverse transformation and recon
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ * - ih264_iquant_itrans_recon_4x4()
+ * - ih264_iquant_itrans_recon_8x8()
+ * - ih264_iquant_itrans_recon_4x4_dc()
+ * - ih264_iquant_itrans_recon_8x8_dc()
+ * - ih264_iquant_itrans_recon_chroma_4x4()
+ * -ih264_iquant_itrans_recon_chroma_4x4_dc()
+ *
+ * @remarks
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_defs.h"
+#include "ih264_trans_macros.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_trans_data.h"
+#include "ih264_size_defs.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+
+/*
+ ********************************************************************************
+ *
+ * @brief This function reconstructs a 4x4 sub block from quantized resiude and
+ * prediction buffer
+ *
+ * @par Description:
+ * The quantized residue is first inverse quantized, then inverse transformed.
+ * This inverse transformed content is added to the prediction buffer to recon-
+ * struct the end output
+ *
+ * @param[in] pi2_src
+ * quantized 4x4 block
+ *
+ * @param[in] pu1_pred
+ * prediction 4x4 block
+ *
+ * @param[out] pu1_out
+ * reconstructed 4x4 block
+ *
+ * @param[in] src_strd
+ * quantization buffer stride
+ *
+ * @param[in] pred_strd,
+ * Prediction buffer stride
+ *
+ * @param[in] out_strd
+ * recon buffer Stride
+ *
+ * @param[in] pu2_scaling_list
+ * pointer to scaling list
+ *
+ * @param[in] pu2_norm_adjust
+ * pointer to inverse scale matrix
+ *
+ * @param[in] u4_qp_div_6
+ * Floor (qp/6)
+ *
+ * @param[in] pi4_tmp
+ * temporary buffer of size 1*16
+ *
+ * @returns none
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+void ih264_iquant_itrans_recon_4x4(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscal_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 u4_qp_div_6,
+ WORD16 *pi2_tmp,
+ WORD32 iq_start_idx,
+ WORD16 *pi2_dc_ld_addr
+)
+{
+ WORD16 *pi2_src_ptr = pi2_src;
+ WORD16 *pi2_tmp_ptr = pi2_tmp;
+ UWORD8 *pu1_pred_ptr = pu1_pred;
+ UWORD8 *pu1_out_ptr = pu1_out;
+ WORD16 x0, x1, x2, x3, i;
+ WORD32 q0, q1, q2, q3;
+ WORD16 i_macro;
+ WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
+
+ /* inverse quant */
+ /*horizontal inverse transform */
+ for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+ {
+ q0 = pi2_src_ptr[0];
+ INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact,
+ 4);
+ if (i==0 && iq_start_idx == 1)
+ q0 = pi2_dc_ld_addr[0]; // Restoring dc value for intra case
+
+ q2 = pi2_src_ptr[2];
+ INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact,
+ 4);
+
+ x0 = q0 + q2;
+ x1 = q0 - q2;
+
+ q1 = pi2_src_ptr[1];
+ INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact,
+ 4);
+
+ q3 = pi2_src_ptr[3];
+ INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact,
+ 4);
+
+ x2 = (q1 >> 1) - q3;
+ x3 = q1 + (q3 >> 1);
+
+ pi2_tmp_ptr[0] = x0 + x3;
+ pi2_tmp_ptr[1] = x1 + x2;
+ pi2_tmp_ptr[2] = x1 - x2;
+ pi2_tmp_ptr[3] = x0 - x3;
+
+ pi2_src_ptr += SUB_BLK_WIDTH_4x4;
+ pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
+ pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
+ pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
+ }
+
+ /* vertical inverse transform */
+ pi2_tmp_ptr = pi2_tmp;
+ for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+ {
+ pu1_pred_ptr = pu1_pred;
+ pu1_out = pu1_out_ptr;
+
+ x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
+ x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
+ x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
+ x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
+
+ /* inverse prediction */
+ i_macro = x0 + x3;
+ i_macro = ((i_macro + 32) >> 6);
+ i_macro += *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(i_macro);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ i_macro = x1 + x2;
+ i_macro = ((i_macro + 32) >> 6);
+ i_macro += *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(i_macro);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ i_macro = x1 - x2;
+ i_macro = ((i_macro + 32) >> 6);
+ i_macro += *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(i_macro);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ i_macro = x0 - x3;
+ i_macro = ((i_macro + 32) >> 6);
+ i_macro += *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(i_macro);
+
+ pi2_tmp_ptr++;
+ pu1_out_ptr++;
+ pu1_pred++;
+ }
+
+}
+
+void ih264_iquant_itrans_recon_4x4_dc(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscal_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 u4_qp_div_6,
+ WORD16 *pi2_tmp,
+ WORD32 iq_start_idx,
+ WORD16 *pi2_dc_ld_addr)
+{
+ UWORD8 *pu1_pred_ptr = pu1_pred;
+ UWORD8 *pu1_out_ptr = pu1_out;
+ WORD32 q0;
+ WORD16 x, i_macro, i;
+ WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
+ UNUSED(pi2_tmp);
+
+ if (iq_start_idx == 0)
+ {
+ q0 = pi2_src[0];
+ INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
+ }
+ else
+ {
+ q0 = pi2_dc_ld_addr[0]; // Restoring dc value for intra case3
+ }
+ i_macro = ((q0 + 32) >> 6);
+ for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+ {
+ pu1_pred_ptr = pu1_pred;
+ pu1_out = pu1_out_ptr;
+
+ /* inverse prediction */
+
+ x = i_macro + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(x);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ x = i_macro + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(x);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ x = i_macro + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(x);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ x = i_macro + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(x);
+
+ pu1_out_ptr++;
+ pu1_pred++;
+ }
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs inverse quant and Inverse transform type Ci4 for 8x8 block
+ *
+ * @par Description:
+ * Performs inverse transform Ci8 and adds the residue to get the
+ * reconstructed block
+ *
+ * @param[in] pi2_src
+ * Input 8x8coefficients
+ *
+ * @param[in] pu1_pred
+ * Prediction 8x8 block
+ *
+ * @param[out] pu1_recon
+ * Output 8x8 block
+ *
+ * @param[in] q_div
+ * QP/6
+ *
+ * @param[in] q_rem
+ * QP%6
+ *
+ * @param[in] q_lev
+ * Quantizer level
+ *
+ * @param[in] src_strd
+ * Input stride
+ *
+ * @param[in] pred_strd,
+ * Prediction stride
+ *
+ * @param[in] out_strd
+ * Output Stride
+ *
+ * @param[in] pi4_tmp
+ * temporary buffer of size 1*16 we dont need a bigger blcok since we reuse
+ * the tmp for each block
+ *
+ * @param[in] pu4_iquant_mat
+ * Pointer to the inverse quantization matrix
+ *
+ * @returns Void
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264_iquant_itrans_recon_8x8(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscale_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 qp_div,
+ WORD16 *pi2_tmp,
+ WORD32 iq_start_idx,
+ WORD16 *pi2_dc_ld_addr
+)
+{
+ WORD32 i;
+ WORD16 *pi2_tmp_ptr = pi2_tmp;
+ UWORD8 *pu1_pred_ptr = pu1_pred;
+ UWORD8 *pu1_out_ptr = pu1_out;
+ WORD16 i_z0, i_z1, i_z2, i_z3, i_z4, i_z5, i_z6, i_z7;
+ WORD16 i_y0, i_y1, i_y2, i_y3, i_y4, i_y5, i_y6, i_y7;
+ WORD16 i_macro;
+ WORD32 q;
+ WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
+ UNUSED(iq_start_idx);
+ UNUSED(pi2_dc_ld_addr);
+ /*************************************************************/
+ /* De quantization of coefficients. Will be replaced by SIMD */
+ /* operations on platform. Note : DC coeff is not scaled */
+ /*************************************************************/
+ for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++)
+ {
+ q = pi2_src[i];
+ INV_QUANT(q, pu2_iscale_mat[i], pu2_weigh_mat[i], qp_div, rnd_fact, 6);
+ pi2_tmp_ptr[i] = q;
+ }
+ /* Perform Inverse transform */
+ /*--------------------------------------------------------------------*/
+ /* IDCT [ Horizontal transformation ] */
+ /*--------------------------------------------------------------------*/
+ for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
+ {
+ /*------------------------------------------------------------------*/
+ /* y0 = w0 + w4 */
+ /* y1 = -w3 + w5 - w7 - (w7 >> 1) */
+ /* y2 = w0 - w4 */
+ /* y3 = w1 + w7 - w3 - (w3 >> 1) */
+ /* y4 = (w2 >> 1) - w6 */
+ /* y5 = -w1 + w7 + w5 + (w5 >> 1) */
+ /* y6 = w2 + (w6 >> 1) */
+ /* y7 = w3 + w5 + w1 + (w1 >> 1) */
+ /*------------------------------------------------------------------*/
+ i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4] );
+
+ i_y1 = ((WORD32)(-pi2_tmp_ptr[3]) + pi2_tmp_ptr[5] - pi2_tmp_ptr[7]
+ - (pi2_tmp_ptr[7] >> 1));
+
+ i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4] );
+
+ i_y3 = ((WORD32)pi2_tmp_ptr[1] + pi2_tmp_ptr[7] - pi2_tmp_ptr[3]
+ - (pi2_tmp_ptr[3] >> 1));
+
+ i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6] );
+
+ i_y5 = ((WORD32)(-pi2_tmp_ptr[1]) + pi2_tmp_ptr[7] + pi2_tmp_ptr[5]
+ + (pi2_tmp_ptr[5] >> 1));
+
+ i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1));
+
+ i_y7 = ((WORD32)pi2_tmp_ptr[3] + pi2_tmp_ptr[5] + pi2_tmp_ptr[1]
+ + (pi2_tmp_ptr[1] >> 1));
+
+ /*------------------------------------------------------------------*/
+ /* z0 = y0 + y6 */
+ /* z1 = y1 + (y7 >> 2) */
+ /* z2 = y2 + y4 */
+ /* z3 = y3 + (y5 >> 2) */
+ /* z4 = y2 - y4 */
+ /* z5 = (y3 >> 2) - y5 */
+ /* z6 = y0 - y6 */
+ /* z7 = y7 - (y1 >> 2) */
+ /*------------------------------------------------------------------*/
+ i_z0 = i_y0 + i_y6;
+ i_z1 = i_y1 + (i_y7 >> 2);
+ i_z2 = i_y2 + i_y4;
+ i_z3 = i_y3 + (i_y5 >> 2);
+ i_z4 = i_y2 - i_y4;
+ i_z5 = (i_y3 >> 2) - i_y5;
+ i_z6 = i_y0 - i_y6;
+ i_z7 = i_y7 - (i_y1 >> 2);
+
+ /*------------------------------------------------------------------*/
+ /* x0 = z0 + z7 */
+ /* x1 = z2 + z5 */
+ /* x2 = z4 + z3 */
+ /* x3 = z6 + z1 */
+ /* x4 = z6 - z1 */
+ /* x5 = z4 - z3 */
+ /* x6 = z2 - z5 */
+ /* x7 = z0 - z7 */
+ /*------------------------------------------------------------------*/
+ pi2_tmp_ptr[0] = i_z0 + i_z7;
+ pi2_tmp_ptr[1] = i_z2 + i_z5;
+ pi2_tmp_ptr[2] = i_z4 + i_z3;
+ pi2_tmp_ptr[3] = i_z6 + i_z1;
+ pi2_tmp_ptr[4] = i_z6 - i_z1;
+ pi2_tmp_ptr[5] = i_z4 - i_z3;
+ pi2_tmp_ptr[6] = i_z2 - i_z5;
+ pi2_tmp_ptr[7] = i_z0 - i_z7;
+
+ /* move to the next row */
+ //pi2_src_ptr += SUB_BLK_WIDTH_8x8;
+ pi2_tmp_ptr += SUB_BLK_WIDTH_8x8;
+ }
+ /*--------------------------------------------------------------------*/
+ /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */
+ /* */
+ /* Add the prediction and store it back to reconstructed frame buffer */
+ /* [Prediction buffer itself in this case] */
+ /*--------------------------------------------------------------------*/
+
+ pi2_tmp_ptr = pi2_tmp;
+ for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
+ {
+ pu1_pred_ptr = pu1_pred;
+ pu1_out = pu1_out_ptr;
+ /*------------------------------------------------------------------*/
+ /* y0j = w0j + w4j */
+ /* y1j = -w3j + w5j -w7j -(w7j >> 1) */
+ /* y2j = w0j -w4j */
+ /* y3j = w1j + w7j -w3j -(w3j >> 1) */
+ /* y4j = ( w2j >> 1 ) -w6j */
+ /* y5j = -w1j + w7j + w5j + (w5j >> 1) */
+ /* y6j = w2j + ( w6j >> 1 ) */
+ /* y7j = w3j + w5j + w1j + (w1j >> 1) */
+ /*------------------------------------------------------------------*/
+ i_y0 = pi2_tmp_ptr[0] + pi2_tmp_ptr[32];
+
+ i_y1 = (WORD32)(-pi2_tmp_ptr[24]) + pi2_tmp_ptr[40] - pi2_tmp_ptr[56]
+ - (pi2_tmp_ptr[56] >> 1);
+
+ i_y2 = pi2_tmp_ptr[0] - pi2_tmp_ptr[32];
+
+ i_y3 = (WORD32)pi2_tmp_ptr[8] + pi2_tmp_ptr[56] - pi2_tmp_ptr[24]
+ - (pi2_tmp_ptr[24] >> 1);
+
+ i_y4 = (pi2_tmp_ptr[16] >> 1) - pi2_tmp_ptr[48];
+
+ i_y5 = (WORD32)(-pi2_tmp_ptr[8]) + pi2_tmp_ptr[56] + pi2_tmp_ptr[40]
+ + (pi2_tmp_ptr[40] >> 1);
+
+ i_y6 = pi2_tmp_ptr[16] + (pi2_tmp_ptr[48] >> 1);
+
+ i_y7 = (WORD32)pi2_tmp_ptr[24] + pi2_tmp_ptr[40] + pi2_tmp_ptr[8]
+ + (pi2_tmp_ptr[8] >> 1);
+
+ /*------------------------------------------------------------------*/
+ /* z0j = y0j + y6j */
+ /* z1j = y1j + (y7j >> 2) */
+ /* z2j = y2j + y4j */
+ /* z3j = y3j + (y5j >> 2) */
+ /* z4j = y2j -y4j */
+ /* z5j = (y3j >> 2) -y5j */
+ /* z6j = y0j -y6j */
+ /* z7j = y7j -(y1j >> 2) */
+ /*------------------------------------------------------------------*/
+ i_z0 = i_y0 + i_y6;
+ i_z1 = i_y1 + (i_y7 >> 2);
+ i_z2 = i_y2 + i_y4;
+ i_z3 = i_y3 + (i_y5 >> 2);
+ i_z4 = i_y2 - i_y4;
+ i_z5 = (i_y3 >> 2) - i_y5;
+ i_z6 = i_y0 - i_y6;
+ i_z7 = i_y7 - (i_y1 >> 2);
+
+ /*------------------------------------------------------------------*/
+ /* x0j = z0j + z7j */
+ /* x1j = z2j + z5j */
+ /* x2j = z4j + z3j */
+ /* x3j = z6j + z1j */
+ /* x4j = z6j -z1j */
+ /* x5j = z4j -z3j */
+ /* x6j = z2j -z5j */
+ /* x7j = z0j -z7j */
+ /*------------------------------------------------------------------*/
+ i_macro = ((i_z0 + i_z7 + 32) >> 6) + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(i_macro);
+ /* Change uc_recBuffer to Point to next element in the same column*/
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ i_macro = ((i_z2 + i_z5 + 32) >> 6) + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(i_macro);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ i_macro = ((i_z4 + i_z3 + 32) >> 6) + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(i_macro);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ i_macro = ((i_z6 + i_z1 + 32) >> 6) + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(i_macro);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ i_macro = ((i_z6 - i_z1 + 32) >> 6) + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(i_macro);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ i_macro = ((i_z4 - i_z3 + 32) >> 6) + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(i_macro);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ i_macro = ((i_z2 - i_z5 + 32) >> 6) + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(i_macro);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ i_macro = ((i_z0 - i_z7 + 32) >> 6) + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(i_macro);
+
+ pi2_tmp_ptr++;
+ pu1_out_ptr++;
+ pu1_pred++;
+ }
+}
+
+void ih264_iquant_itrans_recon_8x8_dc(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscale_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 qp_div,
+ WORD16 *pi2_tmp,
+ WORD32 iq_start_idx,
+ WORD16 *pi2_dc_ld_addr)
+{
+ UWORD8 *pu1_pred_ptr = pu1_pred;
+ UWORD8 *pu1_out_ptr = pu1_out;
+ WORD16 x, i, i_macro;
+ WORD32 q;
+ WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
+ UNUSED(pi2_tmp);
+ UNUSED(iq_start_idx);
+ UNUSED(pi2_dc_ld_addr);
+ /*************************************************************/
+ /* Dequantization of coefficients. Will be replaced by SIMD */
+ /* operations on platform. Note : DC coeff is not scaled */
+ /*************************************************************/
+ q = pi2_src[0];
+ INV_QUANT(q, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
+ i_macro = (q + 32) >> 6;
+ /* Perform Inverse transform */
+ /*--------------------------------------------------------------------*/
+ /* IDCT [ Horizontal transformation ] */
+ /*--------------------------------------------------------------------*/
+ /*--------------------------------------------------------------------*/
+ /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */
+ /* */
+ /* Add the prediction and store it back to reconstructed frame buffer */
+ /* [Prediction buffer itself in this case] */
+ /*--------------------------------------------------------------------*/
+ for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
+ {
+ pu1_pred_ptr = pu1_pred;
+ pu1_out = pu1_out_ptr;
+
+ x = i_macro + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(x);
+ /* Change uc_recBuffer to Point to next element in the same column*/
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ x = i_macro + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(x);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ x = i_macro + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(x);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ x = i_macro + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(x);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ x = i_macro + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(x);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ x = i_macro + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(x);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ x = i_macro + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(x);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ x = i_macro + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(x);
+
+ pu1_out_ptr++;
+ pu1_pred++;
+ }
+}
+
+/*
+ ********************************************************************************
+ *
+ * @brief This function reconstructs a 4x4 sub block from quantized resiude and
+ * prediction buffer
+ *
+ * @par Description:
+ * The quantized residue is first inverse quantized, then inverse transformed.
+ * This inverse transformed content is added to the prediction buffer to recon-
+ * struct the end output
+ *
+ * @param[in] pi2_src
+ * quantized 4x4 block
+ *
+ * @param[in] pu1_pred
+ * prediction 4x4 block
+ *
+ * @param[out] pu1_out
+ * reconstructed 4x4 block
+ *
+ * @param[in] src_strd
+ * quantization buffer stride
+ *
+ * @param[in] pred_strd,
+ * Prediction buffer stride
+ *
+ * @param[in] out_strd
+ * recon buffer Stride
+ *
+ * @param[in] pu2_scaling_list
+ * pointer to scaling list
+ *
+ * @param[in] pu2_norm_adjust
+ * pointer to inverse scale matrix
+ *
+ * @param[in] u4_qp_div_6
+ * Floor (qp/6)
+ *
+ * @param[in] pi4_tmp
+ * temporary buffer of size 1*16
+ *
+ * @returns none
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+void ih264_iquant_itrans_recon_chroma_4x4(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscal_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 u4_qp_div_6,
+ WORD16 *pi2_tmp,
+ WORD16 *pi2_dc_src)
+{
+ WORD16 *pi2_src_ptr = pi2_src;
+ WORD16 *pi2_tmp_ptr = pi2_tmp;
+ UWORD8 *pu1_pred_ptr = pu1_pred;
+ UWORD8 *pu1_out_ptr = pu1_out;
+ WORD16 x0, x1, x2, x3, i;
+ WORD32 q0, q1, q2, q3;
+ WORD16 i_macro;
+ WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
+
+ /* inverse quant */
+ /*horizontal inverse transform */
+ for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+ {
+ if(i==0)
+ {
+ q0 = pi2_dc_src[0];
+ }
+ else
+ {
+ q0 = pi2_src_ptr[0];
+ INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
+ }
+
+ q2 = pi2_src_ptr[2];
+ INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact,
+ 4);
+
+ x0 = q0 + q2;
+ x1 = q0 - q2;
+
+ q1 = pi2_src_ptr[1];
+ INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact,
+ 4);
+
+ q3 = pi2_src_ptr[3];
+ INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact,
+ 4);
+
+ x2 = (q1 >> 1) - q3;
+ x3 = q1 + (q3 >> 1);
+
+ pi2_tmp_ptr[0] = x0 + x3;
+ pi2_tmp_ptr[1] = x1 + x2;
+ pi2_tmp_ptr[2] = x1 - x2;
+ pi2_tmp_ptr[3] = x0 - x3;
+
+ pi2_src_ptr += SUB_BLK_WIDTH_4x4;
+ pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
+ pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
+ pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
+ }
+
+ /* vertical inverse transform */
+ pi2_tmp_ptr = pi2_tmp;
+ for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+ {
+ pu1_pred_ptr = pu1_pred;
+ pu1_out = pu1_out_ptr;
+
+ x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
+ x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
+ x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
+ x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
+
+ /* inverse prediction */
+ i_macro = x0 + x3;
+ i_macro = ((i_macro + 32) >> 6);
+ i_macro += *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(i_macro);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ i_macro = x1 + x2;
+ i_macro = ((i_macro + 32) >> 6);
+ i_macro += *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(i_macro);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ i_macro = x1 - x2;
+ i_macro = ((i_macro + 32) >> 6);
+ i_macro += *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(i_macro);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ i_macro = x0 - x3;
+ i_macro = ((i_macro + 32) >> 6);
+ i_macro += *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(i_macro);
+
+ pi2_tmp_ptr++;
+ pu1_out_ptr+= 2; //Interleaved store for output
+ pu1_pred+= 2; //Interleaved load for pred buffer
+ }
+}
+
+/*
+ ********************************************************************************
+ *
+ * @brief This function reconstructs a 4x4 sub block from quantized resiude and
+ * prediction buffer if only dc value is present for residue
+ *
+ * @par Description:
+ * The quantized residue is first inverse quantized,
+ * This inverse quantized content is added to the prediction buffer to recon-
+ * struct the end output
+ *
+ * @param[in] pi2_src
+ * quantized dc coefficient
+ *
+ * @param[in] pu1_pred
+ * prediction 4x4 block in interleaved format
+ *
+ * @param[in] pred_strd,
+ * Prediction buffer stride in interleaved format
+ *
+ * @param[in] out_strd
+ * recon buffer Stride
+ *
+ * @returns none
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+
+void ih264_iquant_itrans_recon_chroma_4x4_dc(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscal_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 u4_qp_div_6,
+ WORD16 *pi2_tmp,
+ WORD16 *pi2_dc_src)
+{
+ UWORD8 *pu1_pred_ptr = pu1_pred;
+ UWORD8 *pu1_out_ptr = pu1_out;
+ WORD32 q0;
+ WORD16 x, i_macro, i;
+ UNUSED(pi2_src);
+ UNUSED(pu2_iscal_mat);
+ UNUSED(pu2_weigh_mat);
+ UNUSED(u4_qp_div_6);
+ UNUSED(pi2_tmp);
+
+ q0 = pi2_dc_src[0]; // Restoring dc value for intra case3
+ i_macro = ((q0 + 32) >> 6);
+
+ for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+ {
+ pu1_pred_ptr = pu1_pred;
+ pu1_out = pu1_out_ptr;
+
+ /* inverse prediction */
+ x = i_macro + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(x);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ x = i_macro + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(x);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ x = i_macro + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(x);
+ pu1_pred_ptr += pred_strd;
+ pu1_out += out_strd;
+
+ x = i_macro + *pu1_pred_ptr;
+ *pu1_out = CLIP_U8(x);
+
+ pu1_out_ptr+=2;
+ pu1_pred+=2;
+ }
+}
diff --git a/common/ih264_itrans_recon.h b/common/ih264_itrans_recon.h
new file mode 100755
index 0000000..fd1f239
--- /dev/null
+++ b/common/ih264_itrans_recon.h
@@ -0,0 +1,71 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_itrans_recon.h
+*
+* @brief
+* Contains function declarations for inverse transform and reconstruction of
+* the quantized macro blocks
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ih264_itrans_recon_ft
+* - ih264_itrans_recon_4x4
+* - ih264_itrans_recon_8x8
+* - ih264_itrans_recon_4x4_a9
+*
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264_ITRANS_RECON_H_
+#define IH264_ITRANS_RECON_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+typedef void ih264_itrans_recon_ft(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_recon,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd,
+ UWORD32 q_lev,
+ WORD32 *pi4_tmp);
+
+/*C declarations*/
+
+ih264_itrans_recon_ft ih264_itrans_recon_4x4;
+
+ih264_itrans_recon_ft ih264_itrans_recon_8x8;
+
+/*A9 declarations */
+
+ih264_itrans_recon_ft ih264_itrans_recon_4x4_a9;
+
+#endif /* IH264_ITRANS_RECON_H_ */
diff --git a/common/ih264_list.c b/common/ih264_list.c
new file mode 100755
index 0000000..736b41c
--- /dev/null
+++ b/common/ih264_list.c
@@ -0,0 +1,574 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_list.c
+*
+* @brief
+* Contains functions for buf queue
+*
+* @author
+* Harish
+*
+* @par List of Functions:
+* ih264_list_size()
+* ih264_list_lock()
+* ih264_list_unlock()
+* ih264_list_yield()
+* ih264_list_free()
+* ih264_list_init()
+* ih264_list_reset()
+* ih264_list_deinit()
+* ih264_list_terminate()
+* ih264_list_queue()
+* ih264_list_dequeue()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "ih264_typedefs.h"
+#include "ithread.h"
+#include "ih264_platform_macros.h"
+#include "ih264_macros.h"
+#include "ih264_debug.h"
+#include "ih264_error.h"
+#include "ih264_list.h"
+
+/**
+*******************************************************************************
+*
+* @brief Returns size for buf queue context. Does not include buf queue buffer
+* requirements
+*
+* @par Description
+* Returns size for buf queue context. Does not include buf queue buffer
+* requirements. Buffer size required to store the bufs should be allocated in
+* addition to the value returned here.
+*
+* @returns Size of the buf queue context
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264_list_size(WORD32 num_entries, WORD32 entry_size)
+{
+ WORD32 size;
+ WORD32 clz;
+ size = sizeof(list_t);
+ size += ithread_get_mutex_lock_size();
+
+ /* Use next power of two number of entries*/
+ clz = CLZ(num_entries);
+ num_entries = 1 << (32 - clz);
+
+ size += num_entries * entry_size;
+ return size;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Locks the list context
+*
+* @par Description
+* Locks the list context by calling ithread_mutex_lock()
+*
+* @param[in] ps_list
+* Job Queue context
+*
+* @returns IH264_FAIL if mutex lock fails else IH264_SUCCESS
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IH264_ERROR_T ih264_list_lock(list_t *ps_list)
+{
+ WORD32 retval;
+ retval = ithread_mutex_lock(ps_list->pv_mutex);
+ if(retval)
+ {
+ return IH264_FAIL;
+ }
+ return IH264_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Unlocks the list context
+*
+* @par Description
+* Unlocks the list context by calling ithread_mutex_unlock()
+*
+* @param[in] ps_list
+* Job Queue context
+*
+* @returns IH264_FAIL if mutex unlock fails else IH264_SUCCESS
+*
+* @remarks
+*
+*******************************************************************************
+*/
+
+IH264_ERROR_T ih264_list_unlock(list_t *ps_list)
+{
+ WORD32 retval;
+ retval = ithread_mutex_unlock(ps_list->pv_mutex);
+ if(retval)
+ {
+ return IH264_FAIL;
+ }
+ return IH264_SUCCESS;
+
+}
+/**
+*******************************************************************************
+*
+* @brief
+* Yields the thread
+*
+* @par Description
+* Unlocks the list context by calling
+* ih264_list_unlock(), ithread_yield() and then ih264_list_lock()
+* list is unlocked before to ensure the list can be accessed by other threads
+* If unlock is not done before calling yield then no other thread can access
+* the list functions and update list.
+*
+* @param[in] ps_list
+* Job Queue context
+*
+* @returns IH264_FAIL if mutex lock unlock or yield fails else IH264_SUCCESS
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IH264_ERROR_T ih264_list_yield(list_t *ps_list)
+{
+
+ IH264_ERROR_T ret = IH264_SUCCESS;
+
+ IH264_ERROR_T rettmp;
+ rettmp = ih264_list_unlock(ps_list);
+ RETURN_IF((rettmp != IH264_SUCCESS), rettmp);
+
+ ithread_yield();
+
+ if(ps_list->i4_yeild_interval_us > 0)
+ ithread_usleep(ps_list->i4_yeild_interval_us);
+
+ rettmp = ih264_list_lock(ps_list);
+ RETURN_IF((rettmp != IH264_SUCCESS), rettmp);
+ return ret;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief free the buf queue pointers
+*
+* @par Description
+* Frees the list context
+*
+* @param[in] pv_buf
+* Memory for buf queue buffer and buf queue context
+*
+* @returns Pointer to buf queue context
+*
+* @remarks
+* Since it will be called only once by master thread this is not thread safe.
+*
+*******************************************************************************
+*/
+IH264_ERROR_T ih264_list_free(list_t *ps_list)
+{
+ WORD32 ret;
+ ret = ithread_mutex_destroy(ps_list->pv_mutex);
+
+ if(0 == ret)
+ return IH264_SUCCESS;
+ else
+ return IH264_FAIL;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the buf queue
+*
+* @par Description
+* Initializes the list context and sets write and read pointers to start of
+* buf queue buffer
+*
+* @param[in] pv_buf
+* Memoy for buf queue buffer and buf queue context
+*
+* @param[in] buf_size
+* Size of the total memory allocated
+*
+* @returns Pointer to buf queue context
+*
+* @remarks
+* Since it will be called only once by master thread this is not thread safe.
+*
+*******************************************************************************
+*/
+void* ih264_list_init(void *pv_buf,
+ WORD32 buf_size,
+ WORD32 num_entries,
+ WORD32 entry_size,
+ WORD32 yeild_interval_us)
+{
+ list_t *ps_list;
+ UWORD8 *pu1_buf;
+
+ pu1_buf = (UWORD8 *)pv_buf;
+
+ ps_list = (list_t *)pu1_buf;
+ pu1_buf += sizeof(list_t);
+ buf_size -= sizeof(list_t);
+
+ ps_list->pv_mutex = pu1_buf;
+ pu1_buf += ithread_get_mutex_lock_size();
+ buf_size -= ithread_get_mutex_lock_size();
+
+ if (buf_size <= 0)
+ return NULL;
+
+ ithread_mutex_init(ps_list->pv_mutex);
+
+ /* Ensure num_entries is power of two */
+ ASSERT(0 == (num_entries & (num_entries - 1)));
+
+ /* Ensure remaining buffer is large enough to hold given number of entries */
+ ASSERT((num_entries * entry_size) <= buf_size);
+
+ ps_list->pv_buf_base = pu1_buf;
+ ps_list->i4_terminate = 0;
+ ps_list->i4_entry_size = entry_size;
+ ps_list->i4_buf_rd_idx = 0;
+ ps_list->i4_buf_wr_idx = 0;
+ ps_list->i4_log2_buf_max_idx = 32 - CLZ(num_entries);
+ ps_list->i4_buf_max_idx = num_entries;
+ ps_list->i4_yeild_interval_us = yeild_interval_us;
+
+ return ps_list;
+}
+/**
+*******************************************************************************
+*
+* @brief
+* Resets the list context
+*
+* @par Description
+* Resets the list context by initializing buf queue context elements
+*
+* @param[in] ps_list
+* Job Queue context
+*
+* @returns IH264_FAIL if lock unlock fails else IH264_SUCCESS
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IH264_ERROR_T ih264_list_reset(list_t *ps_list)
+{
+ IH264_ERROR_T ret = IH264_SUCCESS;
+ ret = ih264_list_lock(ps_list);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+
+ ps_list->i4_terminate = 0;
+ ps_list->i4_buf_rd_idx = 0;
+ ps_list->i4_buf_wr_idx = 0;
+
+ ret = ih264_list_unlock(ps_list);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+
+ return ret;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Deinitializes the list context
+*
+* @par Description
+* Deinitializes the list context by calling ih264_list_reset()
+* and then destrying the mutex created
+*
+* @param[in] ps_list
+* Job Queue context
+*
+* @returns IH264_FAIL if lock unlock fails else IH264_SUCCESS
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IH264_ERROR_T ih264_list_deinit(list_t *ps_list)
+{
+ WORD32 retval;
+ IH264_ERROR_T ret = IH264_SUCCESS;
+
+ ret = ih264_list_reset(ps_list);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+
+ retval = ithread_mutex_destroy(ps_list->pv_mutex);
+ if(retval)
+ {
+ return IH264_FAIL;
+ }
+
+ return IH264_SUCCESS;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Terminates the list
+*
+* @par Description
+* Terminates the list by setting a flag in context.
+*
+* @param[in] ps_list
+* Job Queue context
+*
+* @returns IH264_FAIL if lock unlock fails else IH264_SUCCESS
+*
+* @remarks
+*
+*******************************************************************************
+*/
+
+IH264_ERROR_T ih264_list_terminate(list_t *ps_list)
+{
+ IH264_ERROR_T ret = IH264_SUCCESS;
+ ret = ih264_list_lock(ps_list);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+
+ ps_list->i4_terminate = 1;
+
+ ret = ih264_list_unlock(ps_list);
+ RETURN_IF((ret != IH264_SUCCESS), ret);
+ return ret;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief Adds a buf to the queue
+*
+* @par Description
+* Adds a buf to the queue and updates wr address to next location.
+* Format/content of the buf structure is abstracted and hence size of the buf
+* buffer is being passed.
+*
+* @param[in] ps_list
+* Job Queue context
+*
+* @param[in] pv_buf
+* Pointer to the location that contains details of the buf to be added
+*
+* @param[in] buf_size
+* Size of the buf buffer
+*
+* @param[in] blocking
+* To signal if the write is blocking or non-blocking.
+*
+* @returns
+*
+* @remarks
+* Job Queue buffer is assumed to be allocated to handle worst case number of bufs
+* Wrap around is not supported
+*
+*******************************************************************************
+*/
+IH264_ERROR_T ih264_list_queue(list_t *ps_list, void *pv_buf, WORD32 blocking)
+{
+ IH264_ERROR_T ret = IH264_SUCCESS;
+ IH264_ERROR_T rettmp;
+
+ WORD32 diff;
+ void *pv_buf_wr;
+
+ volatile WORD32 *pi4_wr_idx, *pi4_rd_idx;
+ WORD32 buf_size = ps_list->i4_entry_size;
+
+
+ rettmp = ih264_list_lock(ps_list);
+ RETURN_IF((rettmp != IH264_SUCCESS), rettmp);
+
+
+
+ while(1)
+ {
+ /* Ensure wr idx does not go beyond rd idx by more than number of entries
+ */
+ pi4_wr_idx = &ps_list->i4_buf_wr_idx;
+ pi4_rd_idx = &ps_list->i4_buf_rd_idx;
+ diff = *pi4_wr_idx - *pi4_rd_idx;
+
+ if(diff < ps_list->i4_buf_max_idx)
+ {
+ WORD32 wr_idx;
+ wr_idx = ps_list->i4_buf_wr_idx & (ps_list->i4_buf_max_idx - 1);
+ pv_buf_wr = (UWORD8 *)ps_list->pv_buf_base + wr_idx * buf_size;
+
+ memcpy(pv_buf_wr, pv_buf, buf_size);
+ ps_list->i4_buf_wr_idx++;
+ break;
+ }
+ else
+ {
+ /* wr is ahead, so wait for rd to consume */
+ if(blocking)
+ {
+ ih264_list_yield(ps_list);
+ }
+ else
+ {
+ ret = IH264_FAIL;
+ break;
+ }
+ }
+
+ }
+ ps_list->i4_terminate = 0;
+
+ rettmp = ih264_list_unlock(ps_list);
+ RETURN_IF((rettmp != IH264_SUCCESS), rettmp);
+
+ return ret;
+}
+/**
+*******************************************************************************
+*
+* @brief Gets next from the Job queue
+*
+* @par Description
+* Gets next buf from the buf queue and updates rd address to next location.
+* Format/content of the buf structure is abstracted and hence size of the buf
+* buffer is being passed. If it is a blocking call and if there is no new buf
+* then this functions unlocks the mutex and calls yield and then locks it back.
+* and continues till a buf is available or terminate is set
+*
+* @param[in] ps_list
+* Job Queue context
+*
+* @param[out] pv_buf
+* Pointer to the location that contains details of the buf to be written
+*
+* @param[in] buf_size
+* Size of the buf buffer
+*
+* @param[in] blocking
+* To signal if the read is blocking or non-blocking.
+*
+* @returns
+*
+* @remarks
+* Job Queue buffer is assumed to be allocated to handle worst case number of bufs
+* Wrap around is not supported
+*
+*******************************************************************************
+*/
+IH264_ERROR_T ih264_list_dequeue(list_t *ps_list, void *pv_buf, WORD32 blocking)
+{
+ IH264_ERROR_T ret = IH264_SUCCESS;
+ IH264_ERROR_T rettmp;
+ WORD32 buf_size = ps_list->i4_entry_size;
+ WORD32 diff;
+
+ void *pv_buf_rd;
+ volatile WORD32 *pi4_wr_idx, *pi4_rd_idx;
+
+ rettmp = ih264_list_lock(ps_list);
+ RETURN_IF((rettmp != IH264_SUCCESS), rettmp);
+
+ while(1)
+ {
+ /* Ensure wr idx is ahead of rd idx and
+ * wr idx does not go beyond rd idx by more than number of entries
+ */
+ pi4_wr_idx = &ps_list->i4_buf_wr_idx;
+ pi4_rd_idx = &ps_list->i4_buf_rd_idx;
+ diff = *pi4_wr_idx - *pi4_rd_idx;
+
+
+ if(diff > 0)
+ {
+ WORD32 rd_idx;
+ rd_idx = ps_list->i4_buf_rd_idx & (ps_list->i4_buf_max_idx - 1);
+ pv_buf_rd = (UWORD8 *)ps_list->pv_buf_base + rd_idx * buf_size;
+
+ memcpy(pv_buf, pv_buf_rd, buf_size);
+ ps_list->i4_buf_rd_idx++;
+ break;
+ }
+ else
+ {
+ /* If terminate is signaled then break */
+ if(ps_list->i4_terminate)
+ {
+ ret = IH264_FAIL;
+ break;
+ }
+ /* wr is ahead, so wait for rd to consume */
+ if(blocking)
+ {
+ ih264_list_yield(ps_list);
+ }
+ else
+ {
+ ret = IH264_FAIL;
+ break;
+ }
+ }
+
+ }
+
+
+ rettmp = ih264_list_unlock(ps_list);
+ RETURN_IF((rettmp != IH264_SUCCESS), rettmp);
+
+ return ret;
+}
diff --git a/common/ih264_list.h b/common/ih264_list.h
new file mode 100755
index 0000000..fc59d95
--- /dev/null
+++ b/common/ih264_list.h
@@ -0,0 +1,93 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_list.h
+*
+* @brief
+* Contains functions for buf queue
+*
+* @author
+* Harish
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef _IH264_LIST_H_
+#define _IH264_LIST_H_
+
+typedef struct
+{
+ /** Pointer to buffer base which contains the bufs */
+ void *pv_buf_base;
+
+ /** Mutex used to keep the functions thread-safe */
+ void *pv_mutex;
+
+ /** Current write index */
+ volatile WORD32 i4_buf_wr_idx;
+
+ /** Current read index */
+ volatile WORD32 i4_buf_rd_idx;
+
+ /** Maximum index */
+ WORD32 i4_buf_max_idx;
+
+ /** Log2(buf_max_idx) -
+ * To ensure number of entries is power of two
+ * This makes it easier to wrap around by using AND with buf_max_idx - 1
+ * */
+ WORD32 i4_log2_buf_max_idx;
+
+ /** Flag to indicate list has to be terminated */
+ WORD32 i4_terminate;
+
+ /** Size of each entry */
+ WORD32 i4_entry_size;
+
+ /** If the list is to be used frequently send this as zero, else send a large value
+ * to ensure cores are not loaded unnecessarily.
+ * For eg: For picture level queues this can be a large value like 100us
+ * but for jobq this will be zero.
+ */
+ WORD32 i4_yeild_interval_us;
+
+}list_t;
+
+WORD32 ih264_list_size(WORD32 num_entries, WORD32 entry_size);
+void* ih264_list_init(void *pv_buf,
+ WORD32 buf_size,
+ WORD32 num_entries,
+ WORD32 entry_size,
+ WORD32 yeild_interval_us);
+IH264_ERROR_T ih264_list_free(list_t *ps_list);
+IH264_ERROR_T ih264_list_reset(list_t *ps_list);
+IH264_ERROR_T ih264_list_deinit(list_t *ps_list);
+IH264_ERROR_T ih264_list_terminate(list_t *ps_list);
+IH264_ERROR_T ih264_list_queue(list_t *ps_list, void *pv_buf, WORD32 blocking);
+IH264_ERROR_T ih264_list_dequeue(list_t *ps_list, void *pv_buf, WORD32 blocking);
+
+#endif /* _IH264_PROCESS_SLICE_H_ */
diff --git a/common/ih264_luma_intra_pred_filters.c b/common/ih264_luma_intra_pred_filters.c
new file mode 100755
index 0000000..4a5b143
--- /dev/null
+++ b/common/ih264_luma_intra_pred_filters.c
@@ -0,0 +1,1933 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_luma_intra_pred_filters.c
+ *
+ * @brief
+ * Contains function definitions for intra prediction filters
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ * - ih264_intra_pred_luma_4x4_mode_vert
+ * - ih264_intra_pred_luma_4x4_mode_horz
+ * - ih264_intra_pred_luma_4x4_mode_dc
+ * - ih264_intra_pred_luma_4x4_mode_diag_dl
+ * - ih264_intra_pred_luma_4x4_mode_diag_dr
+ * - ih264_intra_pred_luma_4x4_mode_vert_r
+ * - ih264_intra_pred_luma_4x4_mode_horz_d
+ * - ih264_intra_pred_luma_4x4_mode_vert_l
+ * - ih264_intra_pred_luma_4x4_mode_horz_u
+ * - ih264_intra_pred_luma_8x8_mode_ref_filtering
+ * - ih264_intra_pred_luma_8x8_mode_vert
+ * - ih264_intra_pred_luma_8x8_mode_horz
+ * - ih264_intra_pred_luma_8x8_mode_dc
+ * - ih264_intra_pred_luma_8x8_mode_diag_dl
+ * - ih264_intra_pred_luma_8x8_mode_diag_dr
+ * - ih264_intra_pred_luma_8x8_mode_vert_r
+ * - ih264_intra_pred_luma_8x8_mode_horz_d
+ * - ih264_intra_pred_luma_8x8_mode_vert_l
+ * - ih264_intra_pred_luma_8x8_mode_horz_u
+ * - ih264_intra_pred_luma_16x16_mode_vert
+ * - ih264_intra_pred_luma_16x16_mode_horz
+ * - ih264_intra_pred_luma_16x16_mode_dc
+ * - ih264_intra_pred_luma_16x16_mode_plane
+ *
+ *
+ * @remarks
+ * None
+ *
+ ******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <string.h>
+
+/* User include files */
+#include "ih264_defs.h"
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+
+/* Global variables used only in assembly files*/
+const WORD8 ih264_gai1_intrapred_luma_plane_coeffs[] =
+{ 0x01, 0x02, 0x03, 0x04,
+ 0x05, 0x06, 0x07, 0x08,
+ 0x09, 0x0A, 0x0B, 0x0C,
+ 0x0D, 0x0E, 0x0F, 0x10, };
+
+const WORD8 ih264_gai1_intrapred_luma_8x8_horz_u[] =
+{ 0x06,0x15,0x05,0x14,
+ 0x04,0x13,0x03,0x12,
+ 0x02,0x11,0x01,0x10,
+ 0x00,0x1F,0x0F,0x0F
+};
+
+/******************* LUMA INTRAPREDICTION *******************/
+
+/******************* 4x4 Modes *******************/
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_4x4_mode_vert
+ *
+ * @brief
+ * Perform Intra prediction for luma_4x4 mode:vertical
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_4x4 mode:vertical ,described in sec 8.3.1.2.1
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264_intra_pred_luma_4x4_mode_vert(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_top = pu1_src + BLK_SIZE + 1;
+
+ memcpy(pu1_dst, pu1_top, 4);
+ memcpy(pu1_dst + dst_strd, pu1_top, 4);
+ memcpy(pu1_dst + 2 * dst_strd, pu1_top, 4);
+ memcpy(pu1_dst + 3 * dst_strd, pu1_top, 4);
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_4x4_mode_horz
+ *
+ * @brief
+ * Perform Intra prediction for luma_4x4 mode:horizontal
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_4x4 mode:horizontal ,described in sec 8.3.1.2.2
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264_intra_pred_luma_4x4_mode_horz(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_left = pu1_src + BLK_SIZE - 1;
+
+ memset(pu1_dst, *pu1_left, 4);
+ memset(pu1_dst + dst_strd, *(pu1_left - 1), 4);
+ memset(pu1_dst + 2 * dst_strd, *(pu1_left - 2), 4);
+ memset(pu1_dst + 3 * dst_strd, *(pu1_left - 3), 4);
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_4x4_mode_dc
+ *
+ * @brief
+ * Perform Intra prediction for luma_4x4 mode:DC
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_4x4 mode:DC ,described in sec 8.3.1.2.3
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_4x4_mode_dc(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 u1_useleft; /* availability of left predictors (only for DC) */
+ UWORD8 u1_usetop; /* availability of top predictors (only for DC) */
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ WORD32 val = 0;
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK);
+ u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK);
+ pu1_top = pu1_src + BLK_SIZE + 1;
+ pu1_left = pu1_src + BLK_SIZE - 1;
+
+ if(u1_useleft)
+ {
+ val += *pu1_left--;
+ val += *pu1_left--;
+ val += *pu1_left--;
+ val += *pu1_left + 2;
+ }
+ if(u1_usetop)
+ {
+ val += *pu1_top + *(pu1_top + 1) + *(pu1_top + 2) + *(pu1_top + 3)
+ + 2;
+ }
+ /* Since 2 is added if either left/top pred is there,
+ val still being zero implies both preds are not there */
+ val = (val) ? (val >> (1 + u1_useleft + u1_usetop)) : 128;
+
+ /* 4 bytes are copied from src to dst */
+ memset(pu1_dst, val, 4);
+ memset(pu1_dst + dst_strd, val, 4);
+ memset(pu1_dst + 2 * dst_strd, val, 4);
+ memset(pu1_dst + 3 * dst_strd, val, 4);
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_4x4_mode_diag_dl
+ *
+ * @brief
+ * Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left ,described in sec 8.3.1.2.4
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_4x4_mode_diag_dl(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ UWORD32 ui4_a, ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g, ui4_h;
+ UWORD8 predicted_pixels[7];
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_top = pu1_src +BLK_SIZE + 1;
+
+ ui4_a = *pu1_top++;
+ ui4_b = *pu1_top++;
+ ui4_c = *pu1_top++;
+ ui4_d = *pu1_top++;
+ ui4_e = *pu1_top++;
+ ui4_f = *pu1_top++;
+ ui4_g = *pu1_top++;
+ ui4_h = *pu1_top;
+
+ predicted_pixels[0] = FILT121(ui4_a, ui4_b, ui4_c);
+ predicted_pixels[1] = FILT121(ui4_b, ui4_c, ui4_d);
+ predicted_pixels[2] = FILT121(ui4_c, ui4_d, ui4_e);
+ predicted_pixels[3] = FILT121(ui4_d, ui4_e, ui4_f);
+ predicted_pixels[4] = FILT121(ui4_e, ui4_f, ui4_g);
+ predicted_pixels[5] = FILT121(ui4_f, ui4_g, ui4_h);
+ predicted_pixels[6] = FILT121(ui4_g, ui4_h, ui4_h);
+
+ memcpy(pu1_dst, predicted_pixels, 4);
+ memcpy(pu1_dst + dst_strd, predicted_pixels + 1, 4);
+ memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 2, 4);
+ memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 3, 4);
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_4x4_mode_diag_dr
+ *
+ * @brief
+ * Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right ,described in sec 8.3.1.2.5
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_4x4_mode_diag_dr(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ UWORD8 *pu1_topleft = NULL;/* Pointer to top left predictor */
+ UWORD32 ui4_a, ui4_b, ui4_c, ui4_d, ui4_i, ui4_j, ui4_k, ui4_l, ui4_m;
+ UWORD8 predicted_pixels[7];
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_top = pu1_src + BLK_SIZE + 1;
+ pu1_left = pu1_src + BLK_SIZE - 1;
+ pu1_topleft = pu1_src +BLK_SIZE;
+
+ ui4_a = *pu1_top++;
+ ui4_b = *pu1_top++;
+ ui4_c = *pu1_top++;
+ ui4_d = *pu1_top++;
+ ui4_i = *pu1_left--;
+ ui4_j = *pu1_left--;
+ ui4_k = *pu1_left--;
+ ui4_l = *pu1_left;
+ ui4_m = *pu1_topleft;
+
+ predicted_pixels[2] = FILT121(ui4_j, ui4_i, ui4_m);
+ predicted_pixels[1] = FILT121(ui4_k, ui4_j, ui4_i);
+ predicted_pixels[0] = FILT121(ui4_l, ui4_k, ui4_j);
+ predicted_pixels[3] = FILT121(ui4_i, ui4_m, ui4_a);
+ predicted_pixels[4] = FILT121(ui4_m, ui4_a, ui4_b);
+ predicted_pixels[5] = FILT121(ui4_a, ui4_b, ui4_c);
+ predicted_pixels[6] = FILT121(ui4_b, ui4_c, ui4_d);
+
+ memcpy(pu1_dst, predicted_pixels + 3, 4);
+ memcpy(pu1_dst + dst_strd, predicted_pixels + 2, 4);
+ memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 1, 4);
+ memcpy(pu1_dst + 3 * dst_strd, predicted_pixels, 4);
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_4x4_mode_vert_r
+ *
+ * @brief
+ * Perform Intra prediction for luma_4x4 mode:Vertical_Right
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_4x4 mode:Vertical_Right ,described in sec 8.3.1.2.6
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_4x4_mode_vert_r(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+
+ UWORD32 ui4_a, ui4_b, ui4_c, ui4_d, ui4_i, ui4_j, ui4_k, ui4_m;
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ UWORD8 *pu1_topleft = NULL;/* Pointer to top left predictor */
+ UWORD8 predicted_pixels[10];
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_top = pu1_src +BLK_SIZE + 1;
+ pu1_left = pu1_src + BLK_SIZE - 1;
+ pu1_topleft = pu1_src + BLK_SIZE;
+
+ ui4_a = *pu1_top++;
+ ui4_b = *pu1_top++;
+ ui4_c = *pu1_top++;
+ ui4_d = *pu1_top++;
+ ui4_i = *pu1_left--;
+ ui4_j = *pu1_left--;
+ ui4_k = *pu1_left;
+ ui4_m = *pu1_topleft;
+
+ predicted_pixels[6] = FILT11(ui4_m, ui4_a);
+ predicted_pixels[7] = FILT11(ui4_a, ui4_b);
+ predicted_pixels[8] = FILT11(ui4_b, ui4_c);
+ predicted_pixels[9] = FILT11(ui4_c, ui4_d);
+ predicted_pixels[1] = FILT121(ui4_i, ui4_m, ui4_a);
+ predicted_pixels[2] = FILT121(ui4_m, ui4_a, ui4_b);
+ predicted_pixels[3] = FILT121(ui4_a, ui4_b, ui4_c);
+ predicted_pixels[4] = FILT121(ui4_b, ui4_c, ui4_d);
+ predicted_pixels[5] = FILT121(ui4_j, ui4_i, ui4_m);
+ predicted_pixels[0] = FILT121(ui4_k, ui4_j, ui4_i);
+
+ memcpy(pu1_dst, predicted_pixels + 6, 4);
+ memcpy(pu1_dst + dst_strd, predicted_pixels + 1, 4);
+ memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 5, 4);
+ memcpy(pu1_dst + 3 * dst_strd, predicted_pixels, 4);
+}
+
+/*
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_4x4_mode_horz_d
+ *
+ * @brief
+ * Perform Intra prediction for luma_4x4 mode:Horizontal_Down
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_4x4 mode:Horizontal_Down ,described in sec 8.3.1.2.7
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_4x4_mode_horz_d(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ UWORD8 *pu1_topleft = NULL;/* Pointer to top left predictor */
+ UWORD32 ui4_a, ui4_b, ui4_c, ui4_i, ui4_j, ui4_k, ui4_l, ui4_m;
+ UWORD8 predicted_pixels[10];
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_top = pu1_src + BLK_SIZE + 1;
+ pu1_left = pu1_src + BLK_SIZE - 1;
+ pu1_topleft = pu1_src + BLK_SIZE;
+
+ ui4_a = *pu1_top++;
+ ui4_b = *pu1_top++;
+ ui4_c = *pu1_top++;
+ ui4_i = *pu1_left--;
+ ui4_j = *pu1_left--;
+ ui4_k = *pu1_left--;
+ ui4_l = *pu1_left--;
+ ui4_m = *pu1_topleft;
+
+ predicted_pixels[6] = FILT11(ui4_i, ui4_m);
+ predicted_pixels[7] = FILT121(ui4_i, ui4_m, ui4_a);
+ predicted_pixels[8] = FILT121(ui4_m, ui4_a, ui4_b);
+ predicted_pixels[9] = FILT121(ui4_a, ui4_b, ui4_c);
+ predicted_pixels[1] = FILT121(ui4_l, ui4_k, ui4_j);
+ predicted_pixels[2] = FILT11(ui4_k, ui4_j);
+ predicted_pixels[3] = FILT121(ui4_k, ui4_j, ui4_i);
+ predicted_pixels[4] = FILT11(ui4_j, ui4_i);
+ predicted_pixels[5] = FILT121(ui4_j, ui4_i, ui4_m);
+ predicted_pixels[0] = FILT11(ui4_l, ui4_k);
+
+ memcpy(pu1_dst, predicted_pixels + 6, 4);
+ memcpy(pu1_dst + dst_strd, predicted_pixels + 4, 4);
+ memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 2, 4);
+ memcpy(pu1_dst + 3 * dst_strd, predicted_pixels, 4);
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_4x4_mode_vert_l
+ *
+ * @brief
+ * Perform Intra prediction for luma_4x4 mode:Vertical_Left
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_4x4 mode:Vertical_Left ,described in sec 8.3.1.2.8
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_4x4_mode_vert_l(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ UWORD32 ui4_a, ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g;
+ UWORD8 predicted_pixels[10];
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_top = pu1_src + BLK_SIZE + 1;
+
+ ui4_a = *pu1_top++;
+ ui4_b = *pu1_top++;
+ ui4_c = *pu1_top++;
+ ui4_d = *pu1_top++;
+ ui4_e = *pu1_top++;
+ ui4_f = *pu1_top++;
+ ui4_g = *pu1_top;
+
+ predicted_pixels[5] = FILT11(ui4_a, ui4_b);
+ predicted_pixels[6] = FILT11(ui4_b, ui4_c);
+ predicted_pixels[7] = FILT11(ui4_c, ui4_d);
+ predicted_pixels[8] = FILT11(ui4_d, ui4_e);
+ predicted_pixels[0] = FILT121(ui4_a, ui4_b, ui4_c);
+ predicted_pixels[1] = FILT121(ui4_b, ui4_c, ui4_d);
+ predicted_pixels[2] = FILT121(ui4_c, ui4_d, ui4_e);
+ predicted_pixels[3] = FILT121(ui4_d, ui4_e, ui4_f);
+ predicted_pixels[9] = FILT11(ui4_e, ui4_f);
+ predicted_pixels[4] = FILT121(ui4_e, ui4_f, ui4_g);
+
+ memcpy(pu1_dst, predicted_pixels + 5, 4);
+ memcpy(pu1_dst + dst_strd, predicted_pixels, 4);
+ memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 6, 4);
+ memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 1, 4);
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_4x4_mode_horz_u
+ *
+ * @brief
+ * Perform Intra prediction for luma_4x4 mode:Horizontal_Up
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_4x4 mode:Horizontal_Up ,described in sec 8.3.1.2.9
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_4x4_mode_horz_u(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ UWORD32 ui4_i, ui4_j, ui4_k, ui4_l;
+ UWORD8 predicted_pixels[10];
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_left = pu1_src + BLK_SIZE - 1;
+
+ ui4_i = *pu1_left--;
+ ui4_j = *pu1_left--;
+ ui4_k = *pu1_left--;
+ ui4_l = *pu1_left--;
+
+ predicted_pixels[0] = FILT11(ui4_j, ui4_i);
+ predicted_pixels[1] = FILT121(ui4_k, ui4_j, ui4_i);
+ predicted_pixels[2] = FILT11(ui4_k, ui4_j);
+ predicted_pixels[3] = FILT121(ui4_l, ui4_k, ui4_j);
+ predicted_pixels[4] = FILT11(ui4_l, ui4_k);
+ predicted_pixels[5] = FILT121(ui4_l, ui4_l, ui4_k);
+ predicted_pixels[6] = ui4_l;
+ predicted_pixels[7] = ui4_l;
+ predicted_pixels[8] = ui4_l;
+ predicted_pixels[9] = ui4_l;
+
+ memcpy(pu1_dst, predicted_pixels, 4);
+ memcpy(pu1_dst + dst_strd, predicted_pixels + 2, 4);
+ memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 4, 4);
+ memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 6, 4);
+}
+
+/******************* 8x8 Modes *******************/
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_8x8_mode_ref_filtering
+ *
+ * @brief
+ * Reference sample filtering process for Intra_8x8 sample prediction
+ *
+ * @par Description:
+ * Perform Reference sample filtering process for Intra_8x8 sample prediction ,described in sec 8.3.2.2.1
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride[Not Used]
+ *
+ * @param[in] dst_strd
+ * integer destination stride[Not Used]
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_8x8_mode_ref_filtering(UWORD8 *pu1_left,
+ UWORD8 *pu1_topleft,
+ UWORD8 *pu1_top,
+ UWORD8 *pu1_dst,
+ WORD32 left_strd,
+ WORD32 ngbr_avail)
+{
+ WORD32 top_avail, left_avail, top_left_avail, top_right_avail;
+
+ left_avail = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK);
+ top_avail = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK);
+ top_left_avail = BOOLEAN(ngbr_avail & TOP_LEFT_MB_AVAILABLE_MASK);
+ top_right_avail = BOOLEAN(ngbr_avail & TOP_RIGHT_MB_AVAILABLE_MASK);
+
+ if(top_avail)
+ {
+ WORD32 i;
+ UWORD32 u4_xm1;
+
+ if(!top_right_avail)
+ {
+ memset(pu1_dst + 8 + 1 + 8, pu1_top[7], 8);
+ top_right_avail = 1;
+ }
+ else
+ {
+ memcpy(pu1_dst + 8 + 1 + 8, pu1_top + 8, 8);
+ }
+
+ if(top_left_avail)
+ {
+ pu1_dst[8 + 1 + 0] = FILT121((*pu1_topleft), pu1_top[0],
+ pu1_top[1]);
+
+ }
+ else
+ {
+ pu1_dst[8 + 1] = ((3 * pu1_top[0]) + pu1_top[1] + 2) >> 2;
+ }
+
+ for(i = 1; i <= 6; i++)
+ {
+ pu1_dst[8 + 1 + i] = FILT121(pu1_top[i - 1], pu1_top[i],
+ pu1_top[i + 1]);
+
+ }
+ /* First byte of Top Right input is in pu1_dst[8 + 1 + 8]*/
+ pu1_dst[8 + 1 + 7] = FILT121(pu1_top[6], pu1_top[7],
+ pu1_dst[8 + 1 + 8]);
+
+ /* filtered output and source in same buf, to prevent output(x - 1)
+ being over written in process */
+ u4_xm1 = pu1_top[7];
+
+ for(i = 8; i <= 14; i++)
+ {
+ UWORD32 u4_x;
+ u4_x = (u4_xm1 + (pu1_dst[8 + 1 + i] << 1) + pu1_dst[8 + 1 + i + 1]
+ + 2) >> 2;
+ /* assigning u4_xm1 from the un-filtered values for the next iteration */
+ u4_xm1 = pu1_dst[8 + 1 + i];
+ pu1_dst[8 + 1 + i] = u4_x;
+ }
+
+ pu1_dst[8 + 1 + 15] = (u4_xm1 + (3 * pu1_dst[8 + 1 + 15]) + 2) >> 2;
+
+ }
+
+ /* pu1_topleft is overloaded. It is both: */
+ /* a. A pointer for the top left pixel */
+ /* b. An indicator of availability of top left. */
+ /* If it is null then top left not available */
+ if(top_left_avail)
+ {
+ if((!top_avail) || (!left_avail))
+ {
+ if(top_avail)
+ pu1_dst[8] = (3 * pu1_topleft[0] + pu1_top[0] + 2) >> 2;
+ else if(left_avail)
+ pu1_dst[8] = (3 * pu1_topleft[0] + pu1_left[0] + 2) >> 2;
+ }
+ else
+ {
+ pu1_dst[8] = FILT121(pu1_top[0], (*pu1_topleft), pu1_left[0]);
+ }
+ }
+
+ if(left_avail)
+ {
+ UWORD32 idx;
+ if(0 != pu1_topleft)
+ {
+ pu1_dst[7] = FILT121((*pu1_topleft), pu1_left[0],
+ pu1_left[left_strd]);
+ }
+ else
+ {
+ pu1_dst[7] = ((3 * pu1_left[0]) + pu1_left[left_strd] + 2) >> 2;
+ }
+
+ for(idx = 1; idx <= 6; idx++)
+ {
+ pu1_dst[7 - idx] = FILT121(pu1_left[(idx - 1) * left_strd],
+ pu1_left[idx * left_strd],
+ pu1_left[(idx + 1) * left_strd]);
+
+ }
+ pu1_dst[0] = (pu1_left[6 * left_strd] + 3 * pu1_left[7 * left_strd] + 2)
+ >> 2;
+
+ }
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_8x8_mode_vert
+ *
+ * @brief
+ * Perform Intra prediction for luma_8x8 mode:vertical
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_8x8 mode:vertical ,described in sec 8.3.2.2.2
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264_intra_pred_luma_8x8_mode_vert(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_top = NULL;
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_top = pu1_src + BLK8x8SIZE + 1;
+
+ memcpy(pu1_dst, pu1_top, 8);
+ memcpy(pu1_dst + dst_strd, pu1_top, 8);
+ memcpy(pu1_dst + 2 * dst_strd, pu1_top, 8);
+ memcpy(pu1_dst + 3 * dst_strd, pu1_top, 8);
+ memcpy(pu1_dst + 4 * dst_strd, pu1_top, 8);
+ memcpy(pu1_dst + 5 * dst_strd, pu1_top, 8);
+ memcpy(pu1_dst + 6 * dst_strd, pu1_top, 8);
+ memcpy(pu1_dst + 7 * dst_strd, pu1_top, 8);
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_8x8_mode_horz
+ *
+ * @brief
+ * Perform Intra prediction for luma_8x8 mode:horizontal
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_8x8 mode:horizontal ,described in sec 8.3.2.2.2
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+void ih264_intra_pred_luma_8x8_mode_horz(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_left = pu1_src + BLK8x8SIZE - 1;
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ memset(pu1_dst, *pu1_left, 8);
+ memset(pu1_dst + dst_strd, *(pu1_left - 1), 8);
+ memset(pu1_dst + 2 * dst_strd, *(pu1_left - 2), 8);
+ memset(pu1_dst + 3 * dst_strd, *(pu1_left - 3), 8);
+ memset(pu1_dst + 4 * dst_strd, *(pu1_left - 4), 8);
+ memset(pu1_dst + 5 * dst_strd, *(pu1_left - 5), 8);
+ memset(pu1_dst + 6 * dst_strd, *(pu1_left - 6), 8);
+ memset(pu1_dst + 7 * dst_strd, *(pu1_left - 7), 8);
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_8x8_mode_dc
+ *
+ * @brief
+ * Perform Intra prediction for luma_8x8 mode:DC
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_8x8 mode:DC ,described in sec 8.3.2.2.4
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_8x8_mode_dc(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 u1_useleft; /* availability of left predictors (only for DC) */
+ UWORD8 u1_usetop; /* availability of top predictors (only for DC) */
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ WORD32 row;
+ WORD32 val = 0;
+ UNUSED(src_strd);
+
+ u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK);
+ u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK);
+ pu1_top = pu1_src + BLK8x8SIZE + 1;
+ pu1_left = pu1_src + BLK8x8SIZE - 1;
+
+ if(u1_useleft)
+ {
+ for(row = 0; row < BLK8x8SIZE; row++)
+ val += *(pu1_left - row);
+ val += 4;
+ }
+ if(u1_usetop)
+ {
+ for(row = 0; row < BLK8x8SIZE; row++)
+ val += *(pu1_top + row);
+ val += 4;
+ }
+
+ /* Since 4 is added if either left/top pred is there,
+ val still being zero implies both preds are not there */
+ val = (val) ? (val >> (2 + u1_useleft + u1_usetop)) : 128;
+
+ memset(pu1_dst, val, 8);
+ memset(pu1_dst + dst_strd, val, 8);
+ memset(pu1_dst + 2 * dst_strd, val, 8);
+ memset(pu1_dst + 3 * dst_strd, val, 8);
+ memset(pu1_dst + 4 * dst_strd, val, 8);
+ memset(pu1_dst + 5 * dst_strd, val, 8);
+ memset(pu1_dst + 6 * dst_strd, val, 8);
+ memset(pu1_dst + 7 * dst_strd, val, 8);
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_8x8_mode_diag_dl
+ *
+ * @brief
+ * Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left ,described in sec 8.3.2.2.5
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_8x8_mode_diag_dl(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ UWORD32 ui4_a, ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g, ui4_h;
+ UWORD32 ui4_i, ui4_j, ui4_k, ui4_l, ui4_m, ui4_n, ui4_o, ui4_p;
+ UWORD8 predicted_pixels[15];
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_top = pu1_src + BLK8x8SIZE + 1;
+
+ ui4_a = *pu1_top++;
+ ui4_b = *pu1_top++;
+ ui4_c = *pu1_top++;
+ ui4_d = *pu1_top++;
+ ui4_e = *pu1_top++;
+ ui4_f = *pu1_top++;
+ ui4_g = *pu1_top++;
+ ui4_h = *pu1_top++;
+ ui4_i = *pu1_top++;
+ ui4_j = *pu1_top++;
+ ui4_k = *pu1_top++;
+ ui4_l = *pu1_top++;
+ ui4_m = *pu1_top++;
+ ui4_n = *pu1_top++;
+ ui4_o = *pu1_top++;
+ ui4_p = *pu1_top;
+
+ predicted_pixels[0] = FILT121(ui4_a, ui4_b, ui4_c);
+ predicted_pixels[1] = FILT121(ui4_b, ui4_c, ui4_d);
+ predicted_pixels[2] = FILT121(ui4_c, ui4_d, ui4_e);
+ predicted_pixels[3] = FILT121(ui4_d, ui4_e, ui4_f);
+ predicted_pixels[4] = FILT121(ui4_e, ui4_f, ui4_g);
+ predicted_pixels[5] = FILT121(ui4_f, ui4_g, ui4_h);
+ predicted_pixels[6] = FILT121(ui4_g, ui4_h, ui4_i);
+ predicted_pixels[7] = FILT121(ui4_h, ui4_i, ui4_j);
+ predicted_pixels[8] = FILT121(ui4_i, ui4_j, ui4_k);
+ predicted_pixels[9] = FILT121(ui4_j, ui4_k, ui4_l);
+ predicted_pixels[10] = FILT121(ui4_k, ui4_l, ui4_m);
+ predicted_pixels[11] = FILT121(ui4_l, ui4_m, ui4_n);
+ predicted_pixels[12] = FILT121(ui4_m, ui4_n, ui4_o);
+ predicted_pixels[13] = FILT121(ui4_n, ui4_o, ui4_p);
+ predicted_pixels[14] = FILT121(ui4_o, ui4_p, ui4_p);
+
+ memcpy(pu1_dst, predicted_pixels, 8);
+ memcpy(pu1_dst + dst_strd, predicted_pixels + 1, 8);
+ memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 2, 8);
+ memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 3, 8);
+ memcpy(pu1_dst + 4 * dst_strd, predicted_pixels + 4, 8);
+ memcpy(pu1_dst + 5 * dst_strd, predicted_pixels + 5, 8);
+ memcpy(pu1_dst + 6 * dst_strd, predicted_pixels + 6, 8);
+ memcpy(pu1_dst + 7 * dst_strd, predicted_pixels + 7, 8);
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_8x8_mode_diag_dr
+ *
+ * @brief
+ * Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right ,described in sec 8.3.2.2.6
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_8x8_mode_diag_dr(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ UWORD8 *pu1_topleft = NULL; /* Pointer to start of top left predictors */
+ UWORD32 ui4_a;
+ UWORD32 ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g, ui4_h, ui4_i;
+ UWORD32 ui4_j, ui4_k, ui4_l, ui4_m, ui4_n, ui4_o, ui4_p, ui4_q;
+ UWORD8 predicted_pixels[15];
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_top = pu1_src + BLK8x8SIZE + 1;
+ pu1_left = pu1_src + BLK8x8SIZE - 1;
+ pu1_topleft = pu1_src + BLK8x8SIZE;
+
+ ui4_a = *pu1_topleft;
+ ui4_b = *pu1_top++;
+ ui4_c = *pu1_top++;
+ ui4_d = *pu1_top++;
+ ui4_e = *pu1_top++;
+ ui4_f = *pu1_top++;
+ ui4_g = *pu1_top++;
+ ui4_h = *pu1_top++;
+ ui4_i = *pu1_top;
+ ui4_j = *pu1_left--;
+ ui4_k = *pu1_left--;
+ ui4_l = *pu1_left--;
+ ui4_m = *pu1_left--;
+ ui4_n = *pu1_left--;
+ ui4_o = *pu1_left--;
+ ui4_p = *pu1_left--;
+ ui4_q = *pu1_left;
+
+ predicted_pixels[6] = FILT121(ui4_a, ui4_j, ui4_k);
+ predicted_pixels[5] = FILT121(ui4_j, ui4_k, ui4_l);
+ predicted_pixels[4] = FILT121(ui4_k, ui4_l, ui4_m);
+ predicted_pixels[3] = FILT121(ui4_l, ui4_m, ui4_n);
+ predicted_pixels[2] = FILT121(ui4_m, ui4_n, ui4_o);
+ predicted_pixels[1] = FILT121(ui4_n, ui4_o, ui4_p);
+ predicted_pixels[0] = FILT121(ui4_o, ui4_p, ui4_q);
+ predicted_pixels[7] = FILT121(ui4_b, ui4_a, ui4_j);
+ predicted_pixels[8] = FILT121(ui4_a, ui4_b, ui4_c);
+ predicted_pixels[9] = FILT121(ui4_b, ui4_c, ui4_d);
+ predicted_pixels[10] = FILT121(ui4_c, ui4_d, ui4_e);
+ predicted_pixels[11] = FILT121(ui4_d, ui4_e, ui4_f);
+ predicted_pixels[12] = FILT121(ui4_e, ui4_f, ui4_g);
+ predicted_pixels[13] = FILT121(ui4_f, ui4_g, ui4_h);
+ predicted_pixels[14] = FILT121(ui4_g, ui4_h, ui4_i);
+
+ memcpy(pu1_dst, predicted_pixels + 7, 8);
+ memcpy(pu1_dst + dst_strd, predicted_pixels + 6, 8);
+ memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 5, 8);
+ memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 4, 8);
+ memcpy(pu1_dst + 4 * dst_strd, predicted_pixels + 3, 8);
+ memcpy(pu1_dst + 5 * dst_strd, predicted_pixels + 2, 8);
+ memcpy(pu1_dst + 6 * dst_strd, predicted_pixels + 1, 8);
+ memcpy(pu1_dst + 7 * dst_strd, predicted_pixels, 8);
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_8x8_mode_vert_r
+ *
+ * @brief
+ * Perform Intra prediction for luma_8x8 mode:Vertical_Right
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_8x8 mode:Vertical_Right ,described in sec 8.3.2.2.7
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_8x8_mode_vert_r(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ UWORD8 *pu1_topleft = NULL; /* Pointer to start of top left predictors */
+ UWORD32 ui4_a;
+ UWORD32 ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g, ui4_h, ui4_i;
+ UWORD32 ui4_j, ui4_k, ui4_l, ui4_m, ui4_n, ui4_o, ui4_p;
+ UWORD8 predicted_pixels[22];
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_top = pu1_src + BLK8x8SIZE + 1;
+ pu1_left = pu1_src + BLK8x8SIZE - 1;
+ pu1_topleft = pu1_src + BLK8x8SIZE;
+
+ ui4_a = *pu1_topleft;
+
+ ui4_b = *pu1_top++;
+ ui4_c = *pu1_top++;
+ ui4_d = *pu1_top++;
+ ui4_e = *pu1_top++;
+ ui4_f = *pu1_top++;
+ ui4_g = *pu1_top++;
+ ui4_h = *pu1_top++;
+ ui4_i = *pu1_top;
+ ui4_j = *pu1_left--;
+ ui4_k = *pu1_left--;
+ ui4_l = *pu1_left--;
+ ui4_m = *pu1_left--;
+ ui4_n = *pu1_left--;
+ ui4_o = *pu1_left--;
+ ui4_p = *pu1_left--;
+
+ predicted_pixels[0] = FILT121(ui4_o, ui4_n, ui4_m);
+ predicted_pixels[1] = FILT121(ui4_m, ui4_l, ui4_k);
+ predicted_pixels[2] = FILT121(ui4_k, ui4_j, ui4_a);
+ predicted_pixels[3] = FILT11(ui4_a, ui4_b);
+ predicted_pixels[4] = FILT11(ui4_b, ui4_c);
+ predicted_pixels[5] = FILT11(ui4_c, ui4_d);
+ predicted_pixels[6] = FILT11(ui4_d, ui4_e);
+ predicted_pixels[7] = FILT11(ui4_e, ui4_f);
+ predicted_pixels[8] = FILT11(ui4_f, ui4_g);
+ predicted_pixels[9] = FILT11(ui4_g, ui4_h);
+ predicted_pixels[10] = FILT11(ui4_h, ui4_i);
+ predicted_pixels[11] = FILT121(ui4_p, ui4_o, ui4_n);
+ predicted_pixels[12] = FILT121(ui4_n, ui4_m, ui4_l);
+ predicted_pixels[13] = FILT121(ui4_l, ui4_k, ui4_j);
+ predicted_pixels[14] = FILT121(ui4_b, ui4_a, ui4_j);
+ predicted_pixels[15] = FILT121(ui4_a, ui4_b, ui4_c);
+ predicted_pixels[16] = FILT121(ui4_b, ui4_c, ui4_d);
+ predicted_pixels[17] = FILT121(ui4_c, ui4_d, ui4_e);
+ predicted_pixels[18] = FILT121(ui4_d, ui4_e, ui4_f);
+ predicted_pixels[19] = FILT121(ui4_e, ui4_f, ui4_g);
+ predicted_pixels[20] = FILT121(ui4_f, ui4_g, ui4_h);
+ predicted_pixels[21] = FILT121(ui4_g, ui4_h, ui4_i);
+
+ memcpy(pu1_dst, predicted_pixels + 3, 8);
+ memcpy(pu1_dst + 1 * dst_strd, predicted_pixels + 14, 8);
+ memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 2, 8);
+ memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 13, 8);
+ memcpy(pu1_dst + 4 * dst_strd, predicted_pixels + 1, 8);
+ memcpy(pu1_dst + 5 * dst_strd, predicted_pixels + 12, 8);
+ memcpy(pu1_dst + 6 * dst_strd, predicted_pixels, 8);
+ memcpy(pu1_dst + 7 * dst_strd, predicted_pixels + 11, 8);
+
+}
+
+/*
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_8x8_mode_horz_d
+ *
+ * @brief
+ * Perform Intra prediction for luma_8x8 mode:Horizontal_Down
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_8x8 mode:Horizontal_Down ,described in sec 8.3.2.2.8
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+
+void ih264_intra_pred_luma_8x8_mode_horz_d(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ UWORD8 *pu1_topleft = NULL; /* Pointer to start of top left predictors */
+ UWORD32 ui4_a;
+ UWORD32 ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g, ui4_h, ui4_i;
+ UWORD32 ui4_j, ui4_k, ui4_l, ui4_m, ui4_n, ui4_o, ui4_p;
+ UWORD8 predicted_pixels[22];
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_top = pu1_src + BLK8x8SIZE + 1;
+ pu1_left = pu1_src + BLK8x8SIZE - 1;
+ pu1_topleft = pu1_src + BLK8x8SIZE;
+
+ ui4_a = *pu1_topleft;
+ ui4_j = *pu1_top++;
+ ui4_k = *pu1_top++;
+ ui4_l = *pu1_top++;
+ ui4_m = *pu1_top++;
+ ui4_n = *pu1_top++;
+ ui4_o = *pu1_top++;
+ ui4_p = *pu1_top++;
+ ui4_b = *pu1_left--;
+ ui4_c = *pu1_left--;
+ ui4_d = *pu1_left--;
+ ui4_e = *pu1_left--;
+ ui4_f = *pu1_left--;
+ ui4_g = *pu1_left--;
+ ui4_h = *pu1_left--;
+ ui4_i = *pu1_left;
+
+ predicted_pixels[0] = FILT11(ui4_h, ui4_i);
+ predicted_pixels[1] = FILT121(ui4_g, ui4_h, ui4_i);
+ predicted_pixels[2] = FILT11(ui4_g, ui4_h);
+ predicted_pixels[3] = FILT121(ui4_f, ui4_g, ui4_h);
+ predicted_pixels[4] = FILT11(ui4_f, ui4_g);
+ predicted_pixels[5] = FILT121(ui4_e, ui4_f, ui4_g);
+ predicted_pixels[6] = FILT11(ui4_e, ui4_f);
+ predicted_pixels[7] = FILT121(ui4_d, ui4_e, ui4_f);
+ predicted_pixels[8] = FILT11(ui4_d, ui4_e);
+ predicted_pixels[9] = FILT121(ui4_c, ui4_d, ui4_e);
+ predicted_pixels[10] = FILT11(ui4_c, ui4_d);
+ predicted_pixels[11] = FILT121(ui4_b, ui4_c, ui4_d);
+ predicted_pixels[12] = FILT11(ui4_b, ui4_c);
+ predicted_pixels[13] = FILT121(ui4_a, ui4_b, ui4_c);
+ predicted_pixels[14] = FILT11(ui4_a, ui4_b);
+ predicted_pixels[15] = FILT121(ui4_j, ui4_a, ui4_b);
+ predicted_pixels[16] = FILT121(ui4_k, ui4_j, ui4_a);
+ predicted_pixels[17] = FILT121(ui4_l, ui4_k, ui4_j);
+ predicted_pixels[18] = FILT121(ui4_m, ui4_l, ui4_k);
+ predicted_pixels[19] = FILT121(ui4_n, ui4_m, ui4_l);
+ predicted_pixels[20] = FILT121(ui4_o, ui4_n, ui4_m);
+ predicted_pixels[21] = FILT121(ui4_p, ui4_o, ui4_n);
+
+ memcpy(pu1_dst, predicted_pixels + 14, 8);
+ memcpy(pu1_dst + dst_strd, predicted_pixels + 12, 8);
+ memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 10, 8);
+ memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 8, 8);
+ memcpy(pu1_dst + 4 * dst_strd, predicted_pixels + 6, 8);
+ memcpy(pu1_dst + 5 * dst_strd, predicted_pixels + 4, 8);
+ memcpy(pu1_dst + 6 * dst_strd, predicted_pixels + 2, 8);
+ memcpy(pu1_dst + 7 * dst_strd, predicted_pixels, 8);
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_8x8_mode_vert_l
+ *
+ * @brief
+ * Perform Intra prediction for luma_8x8 mode:Vertical_Left
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_8x8 mode:Vertical_Left ,described in sec 8.3.2.2.9
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+
+void ih264_intra_pred_luma_8x8_mode_vert_l(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ UWORD32 ui4_a, ui4_b, ui4_c, ui4_d, ui4_e, ui4_f, ui4_g, ui4_h;
+ UWORD32 ui4_i, ui4_j, ui4_k, ui4_l, ui4_m;
+ UWORD8 predicted_pixels[22];
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_top = pu1_src + BLK8x8SIZE + 1;
+
+ ui4_a = *pu1_top++;
+ ui4_b = *pu1_top++;
+ ui4_c = *pu1_top++;
+ ui4_d = *pu1_top++;
+ ui4_e = *pu1_top++;
+ ui4_f = *pu1_top++;
+ ui4_g = *pu1_top++;
+ ui4_h = *pu1_top++;
+ ui4_i = *pu1_top++;
+ ui4_j = *pu1_top++;
+ ui4_k = *pu1_top++;
+ ui4_l = *pu1_top++;
+ ui4_m = *pu1_top++;
+
+ predicted_pixels[0] = FILT11(ui4_a, ui4_b);
+ predicted_pixels[1] = FILT11(ui4_b, ui4_c);
+ predicted_pixels[2] = FILT11(ui4_c, ui4_d);
+ predicted_pixels[3] = FILT11(ui4_d, ui4_e);
+ predicted_pixels[4] = FILT11(ui4_e, ui4_f);
+ predicted_pixels[5] = FILT11(ui4_f, ui4_g);
+ predicted_pixels[6] = FILT11(ui4_g, ui4_h);
+ predicted_pixels[7] = FILT11(ui4_h, ui4_i);
+ predicted_pixels[8] = FILT11(ui4_i, ui4_j);
+ predicted_pixels[9] = FILT11(ui4_j, ui4_k);
+ predicted_pixels[10] = FILT11(ui4_k, ui4_l);
+ predicted_pixels[11] = FILT121(ui4_a, ui4_b, ui4_c);
+ predicted_pixels[12] = FILT121(ui4_b, ui4_c, ui4_d);
+ predicted_pixels[13] = FILT121(ui4_c, ui4_d, ui4_e);
+ predicted_pixels[14] = FILT121(ui4_d, ui4_e, ui4_f);
+ predicted_pixels[15] = FILT121(ui4_e, ui4_f, ui4_g);
+ predicted_pixels[16] = FILT121(ui4_f, ui4_g, ui4_h);
+ predicted_pixels[17] = FILT121(ui4_g, ui4_h, ui4_i);
+ predicted_pixels[18] = FILT121(ui4_h, ui4_i, ui4_j);
+ predicted_pixels[19] = FILT121(ui4_i, ui4_j, ui4_k);
+ predicted_pixels[20] = FILT121(ui4_j, ui4_k, ui4_l);
+ predicted_pixels[21] = FILT121(ui4_k, ui4_l, ui4_m);
+
+ memcpy(pu1_dst, predicted_pixels, 8);
+ memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 1, 8);
+ memcpy(pu1_dst + 4 * dst_strd, predicted_pixels + 2, 8);
+ memcpy(pu1_dst + 6 * dst_strd, predicted_pixels + 3, 8);
+ memcpy(pu1_dst + 1 * dst_strd, predicted_pixels + 11, 8);
+ memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 12, 8);
+ memcpy(pu1_dst + 5 * dst_strd, predicted_pixels + 13, 8);
+ memcpy(pu1_dst + 7 * dst_strd, predicted_pixels + 14, 8);
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_8x8_mode_horz_u
+ *
+ * @brief
+ * Perform Intra prediction for luma_8x8 mode:Horizontal_Up
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_8x8 mode:Horizontal_Up ,described in sec 8.3.2.2.10
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+
+void ih264_intra_pred_luma_8x8_mode_horz_u(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+
+{
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ UWORD32 ui4_j, ui4_k, ui4_l, ui4_m, ui4_n, ui4_o, ui4_p, ui4_q;
+ UWORD8 predicted_pixels[22];
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_left = pu1_src + BLK8x8SIZE - 1;
+
+ ui4_j = *pu1_left--;
+ ui4_k = *pu1_left--;
+ ui4_l = *pu1_left--;
+ ui4_m = *pu1_left--;
+ ui4_n = *pu1_left--;
+ ui4_o = *pu1_left--;
+ ui4_p = *pu1_left--;
+ ui4_q = *pu1_left;
+
+ pu1_left = pu1_src + BLK8x8SIZE - 1;
+
+ predicted_pixels[0] = FILT11(ui4_j, ui4_k);
+ predicted_pixels[1] = FILT121(ui4_j, ui4_k, ui4_l);
+ predicted_pixels[2] = FILT11(ui4_k, ui4_l);
+ predicted_pixels[3] = FILT121(ui4_k, ui4_l, ui4_m);
+ predicted_pixels[4] = FILT11(ui4_l, ui4_m);
+ predicted_pixels[5] = FILT121(ui4_l, ui4_m, ui4_n);
+ predicted_pixels[6] = FILT11(ui4_m, ui4_n);
+ predicted_pixels[7] = FILT121(ui4_m, ui4_n, ui4_o);
+ predicted_pixels[8] = FILT11(ui4_n, ui4_o);
+ predicted_pixels[9] = FILT121(ui4_n, ui4_o, ui4_p);
+ predicted_pixels[10] = FILT11(ui4_o, ui4_p);
+ predicted_pixels[11] = FILT121(ui4_o, ui4_p, ui4_q);
+ predicted_pixels[12] = FILT11(ui4_p, ui4_q);
+ predicted_pixels[13] = FILT121(ui4_p, ui4_q, ui4_q);
+ memset(predicted_pixels+14,ui4_q,8);
+
+ memcpy(pu1_dst, predicted_pixels, 8);
+ memcpy(pu1_dst + 1 * dst_strd, predicted_pixels + 2, 8);
+ memcpy(pu1_dst + 2 * dst_strd, predicted_pixels + 4, 8);
+ memcpy(pu1_dst + 3 * dst_strd, predicted_pixels + 6, 8);
+ memcpy(pu1_dst + 4 * dst_strd, predicted_pixels + 8, 8);
+ memcpy(pu1_dst + 5 * dst_strd, predicted_pixels + 10, 8);
+ memcpy(pu1_dst + 6 * dst_strd, predicted_pixels + 12, 8);
+ memcpy(pu1_dst + 7 * dst_strd, predicted_pixels + 14, 8);
+}
+
+
+/******************* 16x16 Modes *******************/
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_16x16_mode_vert
+ *
+ * @brief
+ * Perform Intra prediction for luma_16x16 mode:Vertical
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_16x16 mode:Vertical, described in sec 8.3.3.1
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels (Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+
+void ih264_intra_pred_luma_16x16_mode_vert(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ WORD32 rows; /* loop variables*/
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_top = pu1_src + MB_SIZE + 1;
+
+ for(rows = 0; rows < 16; rows += 4, pu1_dst += dst_strd)
+ {
+ memcpy(pu1_dst, pu1_top, 16);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, pu1_top, 16);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, pu1_top, 16);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, pu1_top, 16);
+ }
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_16x16_mode_horz
+ *
+ * @brief
+ * Perform Intra prediction for luma_16x16 mode:Horizontal
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_16x16 mode:Horizontal, described in sec 8.3.3.2
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+
+void ih264_intra_pred_luma_16x16_mode_horz(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_left = NULL; /* Pointer to start of top predictors */
+ WORD32 rows;
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_left = pu1_src + MB_SIZE - 1;
+
+ for(rows = 0; rows < 16; rows += 4, pu1_dst += dst_strd, pu1_left --)
+ {
+ memset(pu1_dst, *pu1_left, 16); /* copy the left value to the entire row*/
+ pu1_left --;
+ pu1_dst += dst_strd;
+ memset(pu1_dst, *pu1_left, 16);
+ pu1_left --;
+ pu1_dst += dst_strd;
+ memset(pu1_dst, *pu1_left, 16);
+ pu1_left --;
+ pu1_dst += dst_strd;
+ memset(pu1_dst, *pu1_left, 16);
+ }
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_16x16_mode_dc
+ *
+ * @brief
+ * Perform Intra prediction for luma_16x16 mode:DC
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_16x16 mode:DC, described in sec 8.3.3.3
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ ** @param[in] ngbr_avail
+ * availability of neighbouring pixels
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+
+void ih264_intra_pred_luma_16x16_mode_dc(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ WORD8 u1_useleft; /* availability of left predictors (only for DC) */
+ UWORD8 u1_usetop; /* availability of top predictors (only for DC) */
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ WORD32 rows; /* loop variables*/
+ WORD32 val = 0;
+ UNUSED(src_strd);
+
+ u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK);
+ u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK);
+ pu1_top = pu1_src + MB_SIZE + 1;
+ pu1_left = pu1_src + MB_SIZE - 1;
+ if(u1_useleft)
+ {
+ for(rows = 0; rows < 16; rows++)
+ val += *(pu1_left - rows);
+ val += 8;
+ }
+ if(u1_usetop)
+ {
+ for(rows = 0; rows < 16; rows++)
+ val += *(pu1_top + rows);
+ val += 8;
+ }
+ /* Since 8 is added if either left/top pred is there,
+ val still being zero implies both preds are not there */
+ val = (val) ? (val >> (3 + u1_useleft + u1_usetop)) : 128;
+
+ for(rows = 0; rows < 16; rows += 4, pu1_dst += dst_strd)
+ {
+ memset(pu1_dst, val, 16);
+ pu1_dst += dst_strd;
+ memset(pu1_dst, val, 16);
+ pu1_dst += dst_strd;
+ memset(pu1_dst, val, 16);
+ pu1_dst += dst_strd;
+ memset(pu1_dst, val, 16);
+ }
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_16x16_mode_plane
+ *
+ * @brief
+ * Perform Intra prediction for luma_16x16 mode:PLANE
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_16x16 mode:PLANE, described in sec 8.3.3.4
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+
+void ih264_intra_pred_luma_16x16_mode_plane(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ /*! Written with no multiplications */
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ UWORD8 *pu1_topleft = NULL;
+ WORD32 a, b, c, tmp;
+ UWORD8 *pu1_tmp1, *pu1_tmp2;
+ WORD32 shift;
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_top = pu1_src + MB_SIZE + 1;
+ pu1_left = pu1_src + MB_SIZE - 1;
+ pu1_topleft = pu1_src + MB_SIZE;
+
+ {
+ a = (*(pu1_top + 15) + *(pu1_left - 15)) << 4;
+
+ /*! Implement Sum(x*(P((x+7),-1) - P((x-7),-1))) x=1...8 */
+ pu1_tmp1 = pu1_top + 8;
+ pu1_tmp2 = pu1_tmp1 - 2;
+
+ /* Pixel diffs are only 9 bits;
+ so sign extension allows shifts to be used even for signed */
+ b = ((*pu1_tmp1++) - (*pu1_tmp2--)); /* x=1 */
+ b += ((*pu1_tmp1++) - (*pu1_tmp2--)) << 1; /* x=2 */
+ tmp = ((*pu1_tmp1++) - (*pu1_tmp2--));
+ b += (tmp << 1) + tmp; /* x=3 */
+ b += ((*pu1_tmp1++) - (*pu1_tmp2--)) << 2; /* x=4 */
+
+ tmp = ((*pu1_tmp1++) - (*pu1_tmp2--));
+ b += (tmp << 2) + tmp; /* x=5 */
+ tmp = ((*pu1_tmp1++) - (*pu1_tmp2--));
+ b += (tmp << 2) + (tmp << 1); /* x=6 */
+ tmp = ((*pu1_tmp1++) - (*pu1_tmp2--));
+ b += (tmp << 3) - tmp; /* x=7 */
+ b += ((*pu1_tmp1) - (*pu1_topleft)) << 3; /* x=8 */
+
+ b = ((b << 2) + b + 32) >> 6; /*! (5*H + 32)>>6 */
+
+ /*! Implement Sum(y*(P(-1,(y+7)) - P(-1,(y-7)))) y=1...8 */
+ pu1_tmp1 = pu1_left - 8;
+ pu1_tmp2 = pu1_tmp1 + 2;
+
+ c = ((*pu1_tmp1) - (*pu1_tmp2)); /* y=1 */
+ pu1_tmp1--;
+ pu1_tmp2++;
+ c += ((*pu1_tmp1) - (*pu1_tmp2)) << 1; /* y=2 */
+ pu1_tmp1--;
+ pu1_tmp2++;
+ tmp = ((*pu1_tmp1) - (*pu1_tmp2));
+ c += (tmp << 1) + tmp; /* y=3 */
+ pu1_tmp1--;
+ pu1_tmp2++;
+ c += ((*pu1_tmp1) - (*pu1_tmp2)) << 2; /* y=4 */
+ pu1_tmp1--;
+ pu1_tmp2++;
+
+ tmp = ((*pu1_tmp1) - (*pu1_tmp2));
+ c += (tmp << 2) + tmp; /* y=5 */
+ pu1_tmp1--;
+ pu1_tmp2++;
+ tmp = ((*pu1_tmp1) - (*pu1_tmp2));
+ c += (tmp << 2) + (tmp << 1); /* y=6 */
+ pu1_tmp1--;
+ pu1_tmp2++;
+ tmp = ((*pu1_tmp1) - (*pu1_tmp2));
+ c += (tmp << 3) - tmp; /* y=7 */
+ pu1_tmp1--; //pu1_tmp2 ++;
+ /* Modified to get (-1,-1) location as *(pu1_top - 1) instead of (pu1_left - ui4_stride) */
+ //c += ((*pu1_tmp1) - (*(pu1_top - 1)))<<3; /* y=8 */
+ c += ((*pu1_tmp1) - (*pu1_topleft)) << 3; /* y=8 */
+
+ c = ((c << 2) + c + 32) >> 6; /*! (5*V + 32)>>32 */
+ shift = 3;
+ }
+
+ /*! Now from the plane parameters a, b, and c,
+ compute the fitted plane values over the block */
+ {
+ WORD32 tmp1, tmpx, tmpx_init, j, i;
+
+ tmpx_init = -(b << shift); /* -8b */
+ tmp = a - (c << shift) + 16; /* a-((4or8)*c)+16 */
+ for(i = 0; i < 16; i++)
+ {
+ tmp += c; /*increment every time by c to get c*(y-7or3)*/
+ tmpx = tmpx_init; /* Init to -8b */
+ for(j = 0; j < 16; j++)
+ {
+ tmpx += b; /* increment every time by b to get b*(x-7or3) */
+ tmp1 = (tmp + tmpx) >> 5;
+ *pu1_dst++ = CLIP_U8(tmp1);
+ }
+ pu1_dst += (dst_strd - 16);
+ }
+ }
+}
diff --git a/common/ih264_macros.h b/common/ih264_macros.h
new file mode 100755
index 0000000..6e4cb16
--- /dev/null
+++ b/common/ih264_macros.h
@@ -0,0 +1,110 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*********************************************************************************
+* @file
+* ih264_macros.h
+*
+* @brief
+* Macro definitions used in the codec
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#ifndef _IH264_MACROS_H_
+#define _IH264_MACROS_H_
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+#define RETURN_IF(cond, retval) if(cond) {return (retval);}
+#define UNUSED(x) ((void)(x))
+
+#define ALIGN128(x) ((((x) + 127) >> 7) << 7)
+#define ALIGN64(x) ((((x) + 63) >> 6) << 6)
+#define ALIGN32(x) ((((x) + 31) >> 5) << 5)
+#define ALIGN16(x) ((((x) + 15) >> 4) << 4)
+#define ALIGN8(x) ((((x) + 7) >> 3) << 3)
+#define ALIGN4(x) ((((x) + 3) >> 2) << 2)
+
+
+/**
+******************************************************************************
+ * @brief Min, Max
+******************************************************************************
+ */
+#define MAX(a,b) ((a > b)?(a):(b))
+#define MIN(a,b) ((a < b)?(a):(b))
+#define MIN3(a,b,c) ((a) < (b)) ? (((a) < (c)) ? (a) : (c)) : (((b) < (c)) ? (b) : (c))
+#define MAX3(a,b,c) ((a) > (b)) ? (((a) > (c)) ? (a) : (c)) : (((b) > (c)) ? (b) : (c))
+/**
+******************************************************************************
+ * @brief Div, Mod
+******************************************************************************
+ */
+#define MOD(x,y) ((x)%(y))
+#define DIV(x,y) ((x)/(y))
+
+/**
+******************************************************************************
+ * @brief Clip
+******************************************************************************
+ */
+#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > (maxy))?(maxy):(y)))
+
+/**
+******************************************************************************
+ * @brief True, False
+******************************************************************************
+ */
+#define BOOLEAN(x) (!!(x))
+
+/**
+******************************************************************************
+ * @brief Frequently used multiplications x2. x3, and x4
+******************************************************************************
+ */
+#define X2(a) ((a) << 1)
+#define X3(a) (((a) << 1) + (a))
+#define X4(a) ((a) << 2)
+
+/**
+******************************************************************************
+ * @brief Misc
+******************************************************************************
+ */
+#define ABS(x) ((x) < 0 ? (-(x)) : (x))
+#define SIGNXY(x,y) (((y) < 0) ? (-1 * (x)) : (x))
+
+#define SIGN(x) (((x) >= 0) ? (((x) > 0) ? 1 : 0) : -1)
+
+#define RESET_BIT(x, pos) (x) = (x) & ~(1 << pos);
+#define SET_BIT(x, pos) (x) = (x) | (1 << pos);
+#define GET_BIT(x, pos) ((x) >> (pos)) & 0x1
+
+#define INSERT_BIT(x, pos, bit) { RESET_BIT(x, pos); (x) = (x) | (bit << pos); }
+#endif /*_IH264_MACROS_H_*/
+
+
diff --git a/common/ih264_mem_fns.c b/common/ih264_mem_fns.c
new file mode 100755
index 0000000..1c1f328
--- /dev/null
+++ b/common/ih264_mem_fns.c
@@ -0,0 +1,176 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_mem_fns.c
+ *
+ * @brief
+ * Functions used for memory operations
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ * ih264_memcpy()
+ * ih264_memcpy_mul_8()
+ * ih264_memset()
+ * ih264_memset_mul_8()
+ * ih264_memset_16bit()
+ * ih264_memset_16bit_mul_8()
+ *
+ * @remarks
+ * None
+ *
+ ******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_mem_fns.h"
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * memcpy of a 8,16 or 32 bytes
+ *
+ * @par Description:
+ * Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes
+ *
+ * @param[in] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[in] num_bytes
+ * number of bytes to copy
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+void ih264_memcpy(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes)
+{
+ memcpy(pu1_dst, pu1_src, num_bytes);
+}
+
+
+void ih264_memcpy_mul_8(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes)
+{
+ memcpy(pu1_dst, pu1_src, num_bytes);
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * memset of a 8,16 or 32 bytes
+ *
+ * @par Description:
+ * Does memset of 8bit data for 8,16 or 32 number of bytes
+ *
+ * @param[in] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] value
+ * UWORD8 value used for memset
+ *
+ * @param[in] num_bytes
+ * number of bytes to set
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+void ih264_memset(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes)
+{
+ memset(pu1_dst, value, num_bytes);
+}
+
+
+void ih264_memset_mul_8(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes)
+{
+ memset(pu1_dst, value, num_bytes);
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * memset of 16bit data of a 8,16 or 32 bytes
+ *
+ * @par Description:
+ * Does memset of 16bit data for 8,16 or 32 number of bytes
+ *
+ * @param[in] pu2_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] value
+ * UWORD16 value used for memset
+ *
+ * @param[in] num_words
+ * number of words to set
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+void ih264_memset_16bit(UWORD16 *pu2_dst, UWORD16 value, UWORD32 num_words)
+{
+ UWORD32 i;
+ for(i = 0; i < num_words; i++)
+ {
+ *pu2_dst++ = value;
+ }
+}
+
+void ih264_memset_16bit_mul_8(UWORD16 *pu2_dst,
+ UWORD16 value,
+ UWORD32 num_words)
+{
+ UWORD32 i;
+ for(i = 0; i < num_words; i++)
+ {
+ *pu2_dst++ = value;
+ }
+}
+
diff --git a/common/ih264_mem_fns.h b/common/ih264_mem_fns.h
new file mode 100755
index 0000000..e0167f4
--- /dev/null
+++ b/common/ih264_mem_fns.h
@@ -0,0 +1,126 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_mem_fns.h
+*
+* @brief
+* Function declarations used for memory functions
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#ifndef _IH264_MEM_FNS_H_
+#define _IH264_MEM_FNS_H_
+
+typedef void ih264_memcpy_ft(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes);
+
+typedef void ih264_memcpy_mul_8_ft(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes);
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * memset of a 8,16 or 32 bytes
+ *
+ * @par Description:
+ * Does memset of 8bit data for 8,16 or 32 number of bytes
+ *
+ * @param[in] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] value
+ * UWORD8 value used for memset
+ *
+ * @param[in] num_bytes
+ * number of bytes to set
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+typedef void ih264_memset_ft(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes);
+
+typedef void ih264_memset_mul_8_ft(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes);
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * memset of 16bit data of a 8,16 or 32 bytes
+ *
+ * @par Description:
+ * Does memset of 16bit data for 8,16 or 32 number of bytes
+ *
+ * @param[in] pu2_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] value
+ * UWORD16 value used for memset
+ *
+ * @param[in] num_words
+ * number of words to set
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+typedef void ih264_memset_16bit_ft(UWORD16 *pu2_dst, UWORD16 value, UWORD32 num_words);
+
+typedef void ih264_memset_16bit_mul_8_ft(UWORD16 *pu2_dst, UWORD16 value, UWORD32 num_words);
+
+/* C function declarations */
+ih264_memcpy_ft ih264_memcpy;
+ih264_memcpy_mul_8_ft ih264_memcpy_mul_8;
+ih264_memset_ft ih264_memset;
+ih264_memset_mul_8_ft ih264_memset_mul_8;
+ih264_memset_16bit_ft ih264_memset_16bit;
+ih264_memset_16bit_mul_8_ft ih264_memset_16bit_mul_8;
+
+/* A9 Q function declarations */
+ih264_memcpy_ft ih264_memcpy_a9q;
+ih264_memcpy_mul_8_ft ih264_memcpy_mul_8_a9q;
+ih264_memset_ft ih264_memset_a9q;
+ih264_memset_mul_8_ft ih264_memset_mul_8_a9q;
+ih264_memset_16bit_ft ih264_memset_16bit_a9q;
+ih264_memset_16bit_mul_8_ft ih264_memset_16bit_mul_8_a9q;
+
+/* AV8 function declarations */
+ih264_memcpy_ft ih264_memcpy_av8;
+ih264_memcpy_mul_8_ft ih264_memcpy_mul_8_av8;
+ih264_memset_ft ih264_memset_av8;
+ih264_memset_mul_8_ft ih264_memset_mul_8_av8;
+ih264_memset_16bit_ft ih264_memset_16bit_av8;
+ih264_memset_16bit_mul_8_ft ih264_memset_16bit_mul_8_av8;
+
+
+ih264_memcpy_mul_8_ft ih264_memcpy_mul_8_ssse3;
+ih264_memset_mul_8_ft ih264_memset_mul_8_ssse3;
+ih264_memset_16bit_mul_8_ft ih264_memset_16bit_mul_8_ssse3;
+#endif //_MEM_FNS_H_
diff --git a/common/ih264_padding.c b/common/ih264_padding.c
new file mode 100755
index 0000000..8e8f3e2
--- /dev/null
+++ b/common/ih264_padding.c
@@ -0,0 +1,331 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264_padding.c
+*
+* @brief
+* Contains function definitions for Padding
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ih264_pad_top()
+* - ih264_pad_bottom()
+* - ih264_pad_left_luma()
+* - ih264_pad_left_chroma()
+* - ih264_pad_right_luma()
+* - ih264_pad_right_chroma()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stddef.h>
+#include <string.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_padding.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief pad at the top of a 2d array
+*
+* @par Description:
+* The top row of a 2d array is replicated for pad_size times at the top
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] wd
+* integer width of the array
+*
+* @param[in] pad_size
+* integer -padding size of the array
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264_pad_top(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 wd,
+ WORD32 pad_size)
+{
+ WORD32 row;
+
+ for(row = 1; row <= pad_size; row++)
+ {
+ memcpy(pu1_src - row * src_strd, pu1_src, wd);
+ }
+}
+
+
+
+/**
+*******************************************************************************
+*
+* @brief pad at the bottom of a 2d array
+*
+* @par Description:
+* The bottom row of a 2d array is replicated for pad_size times at the bottom
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] wd
+* integer width of the array
+*
+* @param[in] pad_size
+* integer -padding size of the array
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264_pad_bottom(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 wd,
+ WORD32 pad_size)
+{
+ WORD32 row;
+
+ for(row = 1; row <= pad_size; row++)
+ {
+ memcpy(pu1_src + (row - 1) * src_strd, pu1_src - 1 * src_strd, wd);
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief pad (luma block) at the left of a 2d array
+*
+* @par Description:
+* The left column of a 2d array is replicated for pad_size times to the left
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] ht
+* integer height of the array
+*
+* @param[in] pad_size
+* integer -padding size of the array
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+ */
+void ih264_pad_left_luma(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 ht,
+ WORD32 pad_size)
+{
+ WORD32 row;
+
+ for(row = 0; row < ht; row++)
+ {
+
+ memset(pu1_src - pad_size, *pu1_src, pad_size);
+
+ pu1_src += src_strd;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief pad (chroma block) at the left of a 2d array
+*
+* @par Description:
+* The left column of a 2d array is replicated for pad_size times to the left
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] ht
+* integer height of the array
+*
+* @param[in] pad_size
+* integer -padding size of the array
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264_pad_left_chroma(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 ht,
+ WORD32 pad_size)
+{
+ /* temp var */
+ WORD32 row, col;
+ UWORD16 u2_uv_val;
+
+ /* pointer to src */
+ UWORD16 *pu2_src = (UWORD16 *)pu1_src;
+
+ src_strd >>= 1;
+ pad_size >>= 1;
+
+ for(row = 0; row < ht; row++)
+ {
+ u2_uv_val = pu2_src[0];
+
+ for (col = -pad_size; col < 0; col++)
+ {
+ pu2_src[col] = u2_uv_val;
+ }
+
+ pu2_src += src_strd;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief pad (luma block) at the right of a 2d array
+*
+* @par Description:
+* The right column of a 2d array is replicated for pad_size times at the right
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] ht
+* integer height of the array
+*
+* @param[in] pad_size
+* integer -padding size of the array
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264_pad_right_luma(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 ht,
+ WORD32 pad_size)
+{
+ WORD32 row;
+
+ for(row = 0; row < ht; row++)
+ {
+ memset(pu1_src, *(pu1_src -1), pad_size);
+
+ pu1_src += src_strd;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief pad (chroma block) at the right of a 2d array
+*
+* @par Description:
+* The right column of a 2d array is replicated for pad_size times at the right
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] ht
+* integer height of the array
+*
+* @param[in] pad_size
+* integer -padding size of the array
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264_pad_right_chroma(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 ht,
+ WORD32 pad_size)
+{
+ WORD32 row, col;
+ UWORD16 u2_uv_val;
+ UWORD16 *pu2_src = (UWORD16 *)pu1_src;
+
+ src_strd >>= 1;
+ pad_size >>= 1;
+
+ for(row = 0; row < ht; row++)
+ {
+ u2_uv_val = pu2_src[-1];
+
+ for (col = 0; col < pad_size; col++)
+ {
+ pu2_src[col] = u2_uv_val;
+ }
+
+ pu2_src += src_strd;
+ }
+}
+
diff --git a/common/ih264_padding.h b/common/ih264_padding.h
new file mode 100755
index 0000000..e4e18fb
--- /dev/null
+++ b/common/ih264_padding.h
@@ -0,0 +1,74 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264_padding.h
+*
+* @brief
+* Declarations for padding functions
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#ifndef _IH264_PADDING_H_
+#define _IH264_PADDING_H_
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+
+typedef void ih264_pad(UWORD8 *, WORD32, WORD32, WORD32);
+
+/* C function declarations */
+ih264_pad ih264_pad_top;
+ih264_pad ih264_pad_bottom;
+ih264_pad ih264_pad_left_luma;
+ih264_pad ih264_pad_left_chroma;
+ih264_pad ih264_pad_right_luma;
+ih264_pad ih264_pad_right_chroma;
+
+/* A9 Q function declarations */
+ih264_pad ih264_pad_top_a9q;
+ih264_pad ih264_pad_left_luma_a9q;
+ih264_pad ih264_pad_left_chroma_a9q;
+ih264_pad ih264_pad_right_luma_a9q;
+ih264_pad ih264_pad_right_chroma_a9q;
+
+/* AV8 function declarations */
+ih264_pad ih264_pad_top_av8;
+ih264_pad ih264_pad_left_luma_av8;
+ih264_pad ih264_pad_left_chroma_av8;
+ih264_pad ih264_pad_right_luma_av8;
+ih264_pad ih264_pad_right_chroma_av8;
+
+
+ih264_pad ih264_pad_left_luma_ssse3;
+ih264_pad ih264_pad_left_chroma_ssse3;
+ih264_pad ih264_pad_right_luma_ssse3;
+ih264_pad ih264_pad_right_chroma_ssse3;
+
+#endif /*_IH264_PADDING_H_*/
diff --git a/common/ih264_resi_trans.h b/common/ih264_resi_trans.h
new file mode 100755
index 0000000..ee0add3
--- /dev/null
+++ b/common/ih264_resi_trans.h
@@ -0,0 +1,70 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_resi_trans.h
+*
+* @brief
+* Functions declarations for residue and forward transform
+*
+* @par List of Functions:
+* - ih264_resi_trans_ft
+* - ih264_resi_trans_4x4
+* - ih264_resi_trans_4x4
+* - ih264_resi_trans_4x4_a9
+* - ih264_resi_trans_4x4_a9
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264_RESI_TRANS_H_
+#define IH264_RESI_TRANS_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+typedef void ih264_resi_trans_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD32 *pi4_out,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 out_strd);
+
+/*C functions*/
+
+ih264_resi_trans_ft ih264_resi_trans_4x4;
+
+ih264_resi_trans_ft ih264_resi_trans_8x8;
+
+/*A9 functions*/
+
+ih264_resi_trans_ft ih264_resi_trans_4x4_a9;
+
+ih264_resi_trans_ft ih264_resi_trans_8x8_a9;
+
+#endif /* IH264_RESI_TRANS_H_ */
diff --git a/common/ih264_resi_trans_quant.c b/common/ih264_resi_trans_quant.c
new file mode 100755
index 0000000..cf1d43c
--- /dev/null
+++ b/common/ih264_resi_trans_quant.c
@@ -0,0 +1,814 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_resi_trans_quant.c
+ *
+ * @brief
+ * Contains function definitions single stage forward transform for H.264
+ * It will calculate the residue, do the cf and then do quantization
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ * - ih264_resi_trans_quant_4x4()
+ * - ih264_resi_trans_quant_chroma_4x4
+ * - ih264_hadamard_quant_4x4
+ * - ih264_hadamard_quant_2x2_uv
+ * - ih264_resi_trans_quant_8x8
+ *
+ * @remarks
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stddef.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264_macros.h"
+#include "ih264_trans_macros.h"
+#include "ih264_trans_data.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs forward transform and quantization on a 4*4 block
+ *
+ * @par Description:
+ * The function accepts source buffer and estimation buffer. From these, it
+ * computes the residue. This is residue is then transformed and quantized.
+ * The transform and quantization are in placed computed. They use the residue
+ * buffer for this.
+ *
+ * @param[in] pu1_src
+ * Pointer to source sub-block
+ *
+ * @param[in] pu1_pred
+ * Pointer to prediction sub-block
+ *
+ * @param[in] pi2_out
+ * Pointer to residual sub-block
+ *
+ * @param[in] src_strd
+ * Source stride
+ *
+ * @param[in] pred_strd
+ * Prediction stride
+ *
+ * @param[in] dst_strd
+ * Destination stride
+ *
+ * @param[in] u4_qbits
+ * QP_BITS_h264_4x4 + floor(QP/6)
+ *
+ * @param[in] pu2_threshold_matrix
+ * Pointer to Forward Quant Threshold Matrix
+ *
+ * @param[in] pu2_scale_matrix
+ * Pointer to Forward Quant Scale Matrix
+ *
+ * @param[in] u4_round_factor
+ * Quantization Round factor
+ *
+ * @param[out] pu1_nnz
+ * Total non-zero coefficients in the current sub-block
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264_resi_trans_quant_4x4(UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_out,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ const UWORD16 *pu2_scale_matrix,
+ const UWORD16 *pu2_threshold_matrix,
+ UWORD32 u4_qbits,
+ UWORD32 u4_round_factor,
+ UWORD8 *pu1_nnz,
+ WORD16 *pi2_alt_dc_addr)
+{
+ UWORD32 i;
+ WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
+ WORD32 i4_value, i4_sign;
+ UWORD32 u4_abs_value;
+ WORD16 *pi2_out_tmp = pi2_out;
+ UWORD32 u4_nonzero_coeff = 0;
+
+ for (i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+ {
+ /* computing prediction error (residue) */
+ x4 = pu1_src[0] - pu1_pred[0];
+ x5 = pu1_src[1] - pu1_pred[1];
+ x6 = pu1_src[2] - pu1_pred[2];
+ x7 = pu1_src[3] - pu1_pred[3];
+
+ /* Horizontal transform */
+ x0 = x4 + x7;
+ x1 = x5 + x6;
+ x2 = x5 - x6;
+ x3 = x4 - x7;
+
+ pi2_out_tmp[0] = x0 + x1;
+ pi2_out_tmp[1] = (x3 <<1) + x2;
+ pi2_out_tmp[2] = x0 - x1;
+ pi2_out_tmp[3] = x3 - (x2<<1);
+
+ /* pointing to next row; */
+ pu1_src += src_strd;
+ pu1_pred += pred_strd;
+ pi2_out_tmp += 4;
+
+ }
+ pi2_out_tmp = pi2_out;
+ for (i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+ {
+
+ /* Vertical transform and quantization */
+ x4 = pi2_out_tmp[0];
+ x5 = pi2_out_tmp[4];
+ x6 = pi2_out_tmp[8];
+ x7 = pi2_out_tmp[12];
+
+
+ x0 = x4 + x7;
+ x1 = x5 + x6;
+ x2 = x5 - x6;
+ x3 = x4 - x7;
+
+ /* quantization is done in place */
+
+ i4_value = x0 + x1;
+
+ if(i==0)
+ {
+ (*pi2_alt_dc_addr) = i4_value;
+ }
+
+ FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits, u4_nonzero_coeff);
+ pi2_out_tmp[0] = i4_value;
+
+
+ i4_value = (x3 << 1) + x2;
+ FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[4], pu2_scale_matrix[4], u4_round_factor, u4_qbits, u4_nonzero_coeff);
+ pi2_out_tmp[4] = i4_value;
+
+
+ i4_value = x0 - x1;
+ FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits, u4_nonzero_coeff);
+ pi2_out_tmp[8] = i4_value;
+
+
+ i4_value = x3 - (x2 << 1);
+ FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[12], pu2_scale_matrix[12], u4_round_factor, u4_qbits, u4_nonzero_coeff);
+ pi2_out_tmp[12] = i4_value;
+
+ pi2_out_tmp ++;
+ pu2_scale_matrix++;
+ pu2_threshold_matrix++;
+ }
+
+ /* Return total nonzero coefficients in the current sub block */
+ *pu1_nnz = u4_nonzero_coeff;
+}
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs forward transform and quantization on a 4*4 chroma block
+ * with interleaved values
+ *
+ * @par Description:
+ * The function accepts source buffer and estimation buffer. From these, it
+ * computes the residue. This is residue is then transformed and quantized.
+ * The transform and quantization are in placed computed. They use the residue
+ * buffer for this.
+ *
+ * @param[in] pu1_src
+ * Pointer to source sub-block
+ *
+ * @param[in] pu1_pred
+ * Pointer to prediction sub-block
+ *
+ * @param[in] pi2_out
+ * Pointer to residual sub-block
+ *
+ * @param[in] src_strd
+ * Source stride
+ *
+ * @param[in] pred_strd
+ * Prediction stride
+ *
+ * @param[in] dst_strd
+ * Destination stride
+ *
+ * @param[in] u4_qbits
+ * QP_BITS_h264_4x4 + floor(QP/6)
+ *
+ * @param[in] pu2_threshold_matrix
+ * Pointer to Forward Quant Threshold Matrix
+ *
+ * @param[in] pu2_scale_matrix
+ * Pointer to Forward Quant Scale Matrix
+ *
+ * @param[in] u4_round_factor
+ * Quantization Round factor
+ *
+ * @param[out] pu1_nnz
+ * Total non-zero coefficients in the current sub-block
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264_resi_trans_quant_chroma_4x4(UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_out,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ const UWORD16 *pu2_scale_matrix,
+ const UWORD16 *pu2_threshold_matrix,
+ UWORD32 u4_qbits,
+ UWORD32 u4_round_factor,
+ UWORD8 *pu1_nnz,
+ WORD16 *pu1_dc_alt_addr)
+{
+ UWORD32 i;
+ WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
+ WORD32 i4_value, i4_sign;
+ UWORD32 u4_abs_value;
+ WORD16 *pi2_out_tmp = pi2_out;
+ UWORD32 u4_nonzero_coeff = 0;
+
+ for (i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+ {
+ /* computing prediction error (residue) */
+ x4 = pu1_src[0] - pu1_pred[0];
+ x5 = pu1_src[2] - pu1_pred[2];
+ x6 = pu1_src[4] - pu1_pred[4];
+ x7 = pu1_src[6] - pu1_pred[6];
+
+ /* Horizontal transform */
+ x0 = x4 + x7;
+ x1 = x5 + x6;
+ x2 = x5 - x6;
+ x3 = x4 - x7;
+
+ pi2_out_tmp[0] = x0 + x1;
+ pi2_out_tmp[1] = (x3 <<1) + x2;
+ pi2_out_tmp[2] = x0 - x1;
+ pi2_out_tmp[3] = x3 - (x2<<1);
+
+ /* pointing to next row; */
+ pu1_src += src_strd;
+ pu1_pred += pred_strd;
+ pi2_out_tmp += 4;
+
+ }
+ pi2_out_tmp = pi2_out;
+ for (i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+ {
+
+ /* Vertical transform and quantization */
+ x4 = pi2_out_tmp[0];
+ x5 = pi2_out_tmp[4];
+ x6 = pi2_out_tmp[8];
+ x7 = pi2_out_tmp[12];
+
+
+ x0 = x4 + x7;
+ x1 = x5 + x6;
+ x2 = x5 - x6;
+ x3 = x4 - x7;
+
+ /* quantization is done in place */
+
+ i4_value = x0 + x1;
+
+ if(i==0)
+ {
+ *pu1_dc_alt_addr = i4_value;
+ }
+
+ FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0],
+ pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+ u4_nonzero_coeff);
+ pi2_out_tmp[0] = i4_value;
+
+ i4_value = (x3 << 1) + x2;
+ FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[4],
+ pu2_scale_matrix[4], u4_round_factor, u4_qbits,
+ u4_nonzero_coeff);
+ pi2_out_tmp[4] = i4_value;
+
+ i4_value = x0 - x1;
+ FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[8],
+ pu2_scale_matrix[8], u4_round_factor, u4_qbits,
+ u4_nonzero_coeff);
+ pi2_out_tmp[8] = i4_value;
+
+ i4_value = x3 - (x2 << 1);
+ FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[12],
+ pu2_scale_matrix[12], u4_round_factor, u4_qbits,
+ u4_nonzero_coeff);
+ pi2_out_tmp[12] = i4_value;
+
+ pi2_out_tmp ++;
+ pu2_scale_matrix++;
+ pu2_threshold_matrix++;
+ }
+
+ /* Return total nonzero coefficients in the current sub block */
+ *pu1_nnz = u4_nonzero_coeff;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs forward hadamard transform and quantization on a 4*4 block
+ *
+ * @par Description:
+ * The function accepts source buffer and estimation buffer. From these, it
+ * computes the residue. This is residue is then transformed and quantized.
+ * The transform and quantization are in placed computed. They use the residue
+ * buffer for this.
+ *
+ * @param[in] pu1_src
+ * Pointer to source sub-block
+ *
+ * @param[in] pu1_pred
+ * Pointer to prediction sub-block
+ *
+ * @param[in] pi2_out
+ * Pointer to residual sub-block
+ *
+ * @param[in] src_strd
+ * Source stride
+ *
+ * @param[in] pred_strd
+ * Prediction stride
+ *
+ * @param[in] dst_strd
+ * Destination stride
+ *
+ * @param[in] u4_qbits
+ * QP_BITS_h264_4x4 + floor(QP/6)
+ *
+ * @param[in] pu2_threshold_matrix
+ * Pointer to Forward Quant Threshold Matrix
+ *
+ * @param[in] pu2_scale_matrix
+ * Pointer to Forward Quant Scale Matrix
+ *
+ * @param[in] u4_round_factor
+ * Quantization Round factor
+ *
+ * @param[out] pu1_nnz
+ * Total non-zero coefficients in the current sub-block
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ */
+
+void ih264_hadamard_quant_4x4(WORD16 *pi2_src,
+ WORD16 *pi2_dst,
+ const UWORD16 *pu2_scale_matrix,
+ const UWORD16 *pu2_threshold_matrix,
+ UWORD32 u4_qbits,
+ UWORD32 u4_round_factor,
+ UWORD8 *pu1_nnz)
+{
+ WORD32 i;
+ WORD32 x0,x1,x2,x3,x4,x5,x6,x7,i4_value;
+ UWORD32 u4_abs_value;
+ WORD32 i4_sign;
+
+ *pu1_nnz = 0;
+
+ for (i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+ {
+ x4 = pi2_src[0];
+ x5 = pi2_src[1];
+ x6 = pi2_src[2];
+ x7 = pi2_src[3];
+
+ x0 = x4 + x7;
+ x1 = x5 + x6;
+ x2 = x5 - x6;
+ x3 = x4 - x7;
+
+ pi2_dst[0] = x0 + x1;
+ pi2_dst[1] = x3 + x2;
+ pi2_dst[2] = x0 - x1;
+ pi2_dst[3] = x3 - x2;
+
+ pi2_src += 4;
+ pi2_dst += 4;
+ }
+
+ /* Vertical transform and quantization */
+ pi2_dst -= SUB_BLK_WIDTH_4x4<<2;
+
+ for (i = 0; i < SUB_BLK_WIDTH_4x4; i++)
+ {
+ x4 = pi2_dst[0];
+ x5 = pi2_dst[4];
+ x6 = pi2_dst[8];
+ x7 = pi2_dst[12] ;
+
+ x0 = x4 + x7;
+ x1 = x5 + x6;
+ x2 = x5 - x6;
+ x3 = x4 - x7;
+
+
+ i4_value = (x0 + x1) >> 1;
+ FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0],
+ pu2_scale_matrix[0], u4_round_factor, u4_qbits, pu1_nnz[0]);
+ pi2_dst[0] = i4_value;
+
+ i4_value = (x3 + x2) >> 1;
+ FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0],
+ pu2_scale_matrix[0], u4_round_factor, u4_qbits, pu1_nnz[0]);
+ pi2_dst[4] = i4_value;
+
+ i4_value = (x0 - x1) >> 1;
+ FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0],
+ pu2_scale_matrix[0], u4_round_factor, u4_qbits, pu1_nnz[0]);
+ pi2_dst[8] = i4_value;
+
+ i4_value = (x3 - x2) >> 1;
+ FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0],
+ pu2_scale_matrix[0], u4_round_factor, u4_qbits, pu1_nnz[0]);
+ pi2_dst[12] = i4_value;
+
+ pi2_dst ++;
+ }
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs forward hadamard transform and quantization on a 2*2 block
+ * for both U and V planes
+ *
+ * @par Description:
+ * The function accepts source buffer and estimation buffer. From these, it
+ * computes the residue. This is residue is then transformed and quantized.
+ * The transform and quantization are in placed computed. They use the residue
+ * buffer for this.
+ *
+ * @param[in] pu1_src
+ * Pointer to source sub-block
+ *
+ * @param[in] pu1_pred
+ * Pointer to prediction sub-block
+ *
+ * @param[in] pi2_out
+ * Pointer to residual sub-block
+ *
+ * @param[in] src_strd
+ * Source stride
+ *
+ * @param[in] pred_strd
+ * Prediction stride
+ *
+ * @param[in] dst_strd
+ * Destination stride
+ *
+ * @param[in] u4_qbits
+ * QP_BITS_h264_4x4 + floor(QP/6)
+ *
+ * @param[in] pu2_threshold_matrix
+ * Pointer to Forward Quant Threshold Matrix
+ *
+ * @param[in] pu2_scale_matrix
+ * Pointer to Forward Quant Scale Matrix
+ *
+ * @param[in] u4_round_factor
+ * Quantization Round factor
+ *
+ * @param[out] pu1_nnz
+ * Total non-zero coefficients in the current sub-block
+ *
+ * @returns
+ *
+ * @remarks
+ * NNZ for dc is populated at 0 and 5th position of pu1_nnz
+ *
+ */
+
+void ih264_hadamard_quant_2x2_uv(WORD16 *pi2_src,
+ WORD16 *pi2_dst,
+ const UWORD16 *pu2_scale_matrix,
+ const UWORD16 *pu2_threshold_matrix,
+ UWORD32 u4_qbits,
+ UWORD32 u4_round_factor,
+ UWORD8 *pu1_nnz)
+{
+ WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
+ WORD32 i4_value, i4_sign, plane;
+ UWORD32 u4_abs_value;
+
+ for(plane = 0; plane < 2; plane++)
+ {
+ pu1_nnz[plane] = 0;
+
+ /* Horizontal transform */
+ x4 = pi2_src[0];
+ x5 = pi2_src[1];
+ x6 = pi2_src[2];
+ x7 = pi2_src[3];
+
+ x0 = x4 + x5;
+ x1 = x4 - x5;
+ x2 = x6 + x7;
+ x3 = x6 - x7;
+
+ /* Vertical transform and quantization */
+ i4_value = (x0 + x2);
+ FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0],
+ pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+ pu1_nnz[plane]);
+ pi2_dst[0] = i4_value;
+
+ i4_value = (x0 - x2);
+ FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0],
+ pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+ pu1_nnz[plane]);
+ pi2_dst[2] = i4_value;
+
+ i4_value = (x1 - x3);
+ FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0],
+ pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+ pu1_nnz[plane]);
+ pi2_dst[3] = i4_value;
+
+ i4_value = (x1 + x3);
+ FWD_QUANT(i4_value, u4_abs_value, i4_sign, pu2_threshold_matrix[0],
+ pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+ pu1_nnz[plane]);
+ pi2_dst[1] = i4_value;
+
+ pi2_dst += 4;
+ pi2_src += 4;
+
+ }
+}
+
+/*
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs Single stage forward transform CF8 and quantization on 8*8 blocks
+ * for h.264
+ *
+ * @par Description:
+ * Performs single stage 8x8 forward transform CF8 after calculating the residue
+ * The result is then quantized
+ *
+ * @param[in] pu1_src
+ * Input 8x8 pixels
+ *
+ * @param[in] pu1_pred
+ * Input 8x8 pixels
+ *
+ * @param[in] pi1_out
+ * Output 8x8 pixels
+ *
+ * @param[in] u4_thresh
+ * Threshold under which the coeffs are not quantized
+ *
+ * @param[in] u4_qp_div
+ * QP/6
+ *
+ * @param[in] u4_qp_rem
+ * QP%6
+ *
+ * @param[in] u2_src_stride
+ * Source stride
+ *
+ * @param[in] pred_strd
+ * stride for prediciton buffer
+ *
+ * @param[in] dst_strd
+ * stride for destination buffer
+ *
+ * @param[in] pu4_quant_mat
+ * Pointer to the 4x4 quantization matrix
+ *
+ * @returns Void
+ *
+ *
+ *******************************************************************************
+ */
+void ih264_resi_trans_quant_8x8(UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_out,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ const UWORD16 *pu2_scale_matrix,
+ const UWORD16 *pu2_threshold_matrix,
+ UWORD32 u4_qbits,
+ UWORD32 u4_round_factor,
+ UWORD8 *pu1_nnz,
+ WORD16 *pu1_dc_alt_addr)
+
+{
+ WORD16 *pi2_out_tmp = pi2_out;
+ UWORD32 i;
+ WORD32 a0, a1, a2, a3, a4, a5, a6, a7;
+ WORD32 r0, r1, r2, r3, r4, r5, r6, r7;
+ WORD32 i4_sign;
+ UWORD32 u4_abs_value;
+ UWORD32 u4_nonzero_coeff = 0;
+
+ UNUSED(pu1_dc_alt_addr);
+
+ /*Horizontal transform */
+ /* we are going to use the a's and r's in a twisted way since */
+ /*i dont want to declare more variables */
+ for(i = 0; i < SUB_BLK_WIDTH_8x8; ++i)
+ {
+ r0 = pu1_src[0];
+ r0 -= pu1_pred[0];
+ r1 = pu1_src[1];
+ r1 -= pu1_pred[1];
+ r2 = pu1_src[2];r2 -= pu1_pred[2];
+ r3 = pu1_src[3];r3 -= pu1_pred[3];
+ r4 = pu1_src[4];r4 -= pu1_pred[4];
+ r5 = pu1_src[5];r5 -= pu1_pred[5];
+ r6 = pu1_src[6];r6 -= pu1_pred[6];
+ r7 = pu1_src[7];r7 -= pu1_pred[7];
+
+
+ a0 = r0 + r7;
+ a1 = r1 + r6;
+ a2 = r2 + r5;
+ a3 = r3 + r4;
+
+ a4 = a0 + a3;
+ a5 = a1 + a2;
+ a6 = a0 - a3;
+ a7 = a1 - a2;
+
+ pi2_out_tmp[0] = a4 + a5;
+
+ pi2_out_tmp[2] = a6 + (a7>>1);
+ pi2_out_tmp[4] = a4 - a5;
+ pi2_out_tmp[6] = (a6>>1) - a7;
+
+ a0 = r0 - r7;
+ a1 = r1 - r6;
+ a2 = r2 - r5;
+ a3 = r3 - r4;
+
+ a4 = a1 + a2 + ((a0>>1) + a0);
+ a5 = a0 - a3 - ((a2>>1) + a2);
+ a6 = a0 + a3 - ((a1>>1) + a1);
+ a7 = a1 - a2 + ((a3>>1) + a3);
+
+ pi2_out_tmp[1] = a4 + (a7>>2);
+ pi2_out_tmp[3] = a5 + (a6>>2);
+ pi2_out_tmp[5] = a6 - (a5>>2);
+ pi2_out_tmp[7] = (a4>>2) - a7;
+
+ pu1_src += src_strd;
+ pu1_pred += pred_strd;
+ pi2_out_tmp += 8;
+ }
+
+ /*vertical transform and quant */
+
+ pi2_out_tmp = pi2_out;
+
+ for (i = 0; i < SUB_BLK_WIDTH_8x8; ++i)
+ {
+
+ r0 = pi2_out_tmp[0];
+ r1 = pi2_out_tmp[8];
+ r2 = pi2_out_tmp[16];
+ r3 = pi2_out_tmp[24];
+ r4 = pi2_out_tmp[32];
+ r5 = pi2_out_tmp[40];
+ r6 = pi2_out_tmp[48];
+ r7 = pi2_out_tmp[56];
+
+ a0 = r0 + r7;
+ a1 = r1 + r6;
+ a2 = r2 + r5;
+ a3 = r3 + r4;
+
+ a4 = a0 + a3;
+ a5 = a1 + a2;
+ a6 = a0 - a3;
+ a7 = a1 - a2;
+
+ a0 = r0 - r7;
+ a1 = r1 - r6;
+ a2 = r2 - r5;
+ a3 = r3 - r4;
+
+ r0 = a4 + a5;
+ r2 = a6 + (a7>>1);
+ r4 = a4 - a5;
+ r6 = (a6>>1) - a7;
+
+ a4 = a1 + a2 + ((a0>>1) + a0);
+ a5 = a0 - a3 - ((a2>>1) + a2);
+ a6 = a0 + a3 - ((a1>>1) + a1);
+ a7 = a1 - a2 + ((a3>>1) + a3);
+
+ r1 = a4 + (a7>>2);
+ r3 = a5 + (a6>>2);
+ r5 = a6 - (a5>>2);
+ r7 = (a4>>2) - a7;
+
+ FWD_QUANT(r0, u4_abs_value, i4_sign, pu2_threshold_matrix[0],
+ pu2_scale_matrix[0], u4_round_factor, u4_qbits,
+ u4_nonzero_coeff);
+ pi2_out_tmp[0] = r0;
+
+ FWD_QUANT(r1, u4_abs_value, i4_sign, pu2_threshold_matrix[8],
+ pu2_scale_matrix[8], u4_round_factor, u4_qbits,
+ u4_nonzero_coeff);
+ pi2_out_tmp[8] = r1;
+
+ FWD_QUANT(r2, u4_abs_value, i4_sign, pu2_threshold_matrix[16],
+ pu2_scale_matrix[16], u4_round_factor, u4_qbits,
+ u4_nonzero_coeff);
+ pi2_out_tmp[16] = r2;
+
+ FWD_QUANT(r3, u4_abs_value, i4_sign, pu2_threshold_matrix[24],
+ pu2_scale_matrix[24], u4_round_factor, u4_qbits,
+ u4_nonzero_coeff);
+ pi2_out_tmp[24] = r3;
+
+ FWD_QUANT(r4, u4_abs_value, i4_sign, pu2_threshold_matrix[32],
+ pu2_scale_matrix[32], u4_round_factor, u4_qbits,
+ u4_nonzero_coeff);
+ pi2_out_tmp[32] = r4;
+
+ FWD_QUANT(r5, u4_abs_value, i4_sign, pu2_threshold_matrix[40],
+ pu2_scale_matrix[40], u4_round_factor, u4_qbits,
+ u4_nonzero_coeff);
+ pi2_out_tmp[40] = r5;
+
+ FWD_QUANT(r6, u4_abs_value, i4_sign, pu2_threshold_matrix[48],
+ pu2_scale_matrix[48], u4_round_factor, u4_qbits,
+ u4_nonzero_coeff);
+ pi2_out_tmp[48] = r6;
+
+ FWD_QUANT(r7, u4_abs_value, i4_sign, pu2_threshold_matrix[56],
+ pu2_scale_matrix[56], u4_round_factor, u4_qbits,
+ u4_nonzero_coeff);
+ pi2_out_tmp[56] = r7;
+
+ pi2_out_tmp++;
+ pu2_scale_matrix++;
+ pu2_threshold_matrix++;
+ }
+ /* Return total nonzero coefficients in the current sub block */
+ *pu1_nnz = u4_nonzero_coeff;
+}
diff --git a/common/ih264_size_defs.h b/common/ih264_size_defs.h
new file mode 100755
index 0000000..e2a8b76
--- /dev/null
+++ b/common/ih264_size_defs.h
@@ -0,0 +1,85 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_size_defs.h
+ *
+ * @brief
+ * Contains declaration of global variables for H264 transform , quant and inverse quant
+ *
+ * @author
+ * Ittiam
+ *
+ * @remarks
+ *
+ ********************************************************************************/
+
+#ifndef IH264_SIZE_DEFS_H_
+#define IH264_SIZE_DEFS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*-----------------------Primary defs--------------------------*/
+
+/*Width of a 4x4 block*/
+#define SUB_BLK_WIDTH_4x4 4
+
+/*Width of an 8x8 block*/
+#define SUB_BLK_WIDTH_8x8 8
+
+/*Number of chroma blocks in a row of coffs*/
+#define SUB_BLK_COUNT_CHROMA_4x4_420 2
+
+/*Number of luma blocks in a row of coffs*/
+#define SUB_BLK_COUNT_LUMA_4x4 4
+
+/*Numbr of chroma planes*/
+#define NUM_CHROMA_PLANES 2
+
+/*Constant bit shifts*/
+#define QP_BITS_h264_4x4 15
+#define QP_BITS_h264_8x8 16
+
+
+/*---------------------------Derived defs------------------------*/
+
+/*Number of coefficients ina 4x4 block*/
+#define COFF_CNT_SUB_BLK_4x4 SUB_BLK_WIDTH_4x4*SUB_BLK_WIDTH_4x4;
+
+/*Number of luma blocks in a row of coffs*/
+#define SUB_BLK_LUMA_4X4_CNT_MB SUB_BLK_COUNT_LUMA_4x4 * SUB_BLK_COUNT_LUMA_4x4
+
+/*Number of chroma coffs in an MB*/
+#define SUB_BLK_CHROMA_4X4_CNT_MB SUB_BLK_COUNT_CHROMA_4x4_420 * SUB_BLK_COUNT_CHROMA_4x4_420
+#define SUB_BLK_CHROMA_4X4_CNT_MB_BIPLANE SUB_BLK_CHROMA_4X4_CNT_MB*NUM_CHROMA_PLANES
+
+/*Size of trans buff = 4x4 for DC block + 4x4 * coffs for 4x4 ac blocks*/
+#define SIZE_TRANS_BUFF (SUB_BLK_WIDTH_4x4*SUB_BLK_WIDTH_4x4*+ \
+ SUB_BLK_WIDTH_4x4*SUB_BLK_WIDTH_4x4* \
+ SUB_BLK_COUNT_LUMA_4x4*SUB_BLK_COUNT_LUMA_4x4)
+
+/*memory size = memory size of 4x4 block of resi coff + 4x4 for DC coff block */
+#define SIZE_TMP_BUFF_ITRANS ((SUB_BLK_WIDTH_4x4*SUB_BLK_WIDTH_4x4) +\
+ (SUB_BLK_WIDTH_4x4*SUB_BLK_WIDTH_4x4))
+
+#endif /* IH264_DEFS_H_ */
diff --git a/common/ih264_structs.h b/common/ih264_structs.h
new file mode 100755
index 0000000..fa4e142
--- /dev/null
+++ b/common/ih264_structs.h
@@ -0,0 +1,1722 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ * ih264_structs.h
+ *
+ * @brief
+ * Structure definitions used in the code
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+#ifndef _IH264_STRUCTS_H_
+#define _IH264_STRUCTS_H_
+
+/** MB Type info for Intra MBs */
+typedef struct
+{
+ UWORD32 u4_num_mbpart;
+ MBPART_PREDMODE_T e_mbpart_predmode;
+ MBMODES_I16x16 e_intra_predmode;
+ UWORD32 u4_cpb_chroma;
+ UWORD32 u4_cpb_luma;
+}intra_mbtype_info_t;
+
+/** MB Type info for Inter MBs */
+typedef struct
+{
+ UWORD32 u4_num_mbpart;
+ MBPART_PREDMODE_T e_mbpart_predmode_0;
+ MBPART_PREDMODE_T e_mbpart_predmode_1;
+ UWORD32 u4_mbpart_wd;
+ UWORD32 u4_mbpart_ht;
+}inter_mbtype_info_t;
+
+
+/** Sub MB Type info for Inter MBs */
+typedef struct
+{
+ UWORD32 u4_num_mbpart;
+ MBPART_PREDMODE_T e_mbpart_predmode;
+ UWORD32 u4_mbpart_wd;
+ UWORD32 u4_mbpart_ht;
+}submbtype_info_t;
+
+/**
+ * Picture buffer
+ */
+typedef struct
+{
+ UWORD8* pu1_luma;
+ UWORD8* pu1_chroma;
+
+ WORD32 i4_abs_poc;
+ WORD32 i4_poc_lsb;
+
+
+ /** Lower 32 bit of time stamp */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32 bit of time stamp */
+ UWORD32 u4_timestamp_high;
+
+ WORD32 i4_used_as_ref;
+
+ /**
+ * frame_num in the slice header
+ */
+ WORD32 i4_frame_num;
+
+ /**
+ * Long-term frame idx
+ * TODO: store in frame_num
+ */
+ WORD32 i4_long_term_frame_idx;
+
+ /*
+ * 0: Top Field
+ * 1: Bottom Field
+ */
+ WORD8 i1_field_type;
+
+ /**
+ * buffer ID from frame buffer manager
+ */
+ WORD32 i4_buf_id;
+
+} pic_buf_t;
+
+
+/**
+ * Reference List
+ */
+typedef struct
+{
+ void *pv_pic_buf;
+
+ void *pv_mv_buf;
+
+} ref_list_t;
+
+
+/**
+ * Motion vector
+ */
+typedef struct
+{
+ /**
+ * Horizontal Motion Vector
+ */
+ WORD16 i2_mvx;
+
+ /**
+ * Vertical Motion Vector
+ */
+ WORD16 i2_mvy;
+} mv_t;
+
+/*****************************************************************************/
+/* Following results in packed 48 bit structure. If mv_t included */
+/* ref_pic_buf_id, then 8 bits will be wasted for each mv for aligning. */
+/* Also using mv_t as elements directly instead of a pointer to l0 and l1 */
+/* mvs. Since pointer takes 4 bytes and MV itself is 4 bytes. It does not */
+/* really help using pointers. */
+/*****************************************************************************/
+
+/**
+ * PU Motion Vector info
+ */
+typedef struct
+{
+ /**
+ * L0 Motion Vector
+ */
+ mv_t s_l0_mv;
+
+ /**
+ * L1 Motion Vector
+ */
+ mv_t s_l1_mv;
+
+ /**
+ * L0 Ref index
+ */
+ WORD8 i1_l0_ref_idx;
+
+ /**
+ * L1 Ref index
+ */
+ WORD8 i1_l1_ref_idx;
+
+ /**
+ * L0 Ref Pic Buf ID
+ */
+ WORD8 i1_l0_ref_pic_buf_id;
+
+ /**
+ * L1 Ref Pic Buf ID
+ */
+ WORD8 i1_l1_ref_pic_buf_id;
+
+} pu_mv_t;
+
+/**
+ * PU information
+ */
+typedef struct
+{
+
+ /**
+ * Motion Vectors
+ */
+ pu_mv_t s_mv;
+
+ /**
+ * PU X position in terms of min PU (4x4) units
+ */
+ UWORD32 b2_pos_x : 2;
+
+ /**
+ * PU Y position in terms of min PU (4x4) units
+ */
+ UWORD32 b2_pos_y : 2;
+
+ /**
+ * PU width in pixels = (b2_wd + 1) << 2
+ */
+ UWORD32 b2_wd : 2;
+
+ /**
+ * PU height in pixels = (b2_ht + 1) << 2
+ */
+ UWORD32 b2_ht : 2;
+
+ /**
+ * Intra or Inter flag for each partition - 0 or 1
+ */
+ UWORD32 b1_intra_flag : 1;
+
+ /**
+ * PRED_L0, PRED_L1, PRED_BI
+ */
+ UWORD32 b2_pred_mode : 2;
+
+} pu_t;
+
+
+/**
+ * MB information to be stored for entire frame
+ */
+typedef struct
+{
+ /**
+ * Transform sizes 0: 4x4, 1: 8x8,
+ */
+ UWORD32 b1_trans_size : 1;
+
+ /**
+ * CBP - 4 bits for Y, 1 for U and 1 for V
+ */
+ UWORD32 b6_cbp: 6;
+
+ /**
+ * Intra pred sizes 0: 4x4, 1: 8x8, 2: 16x16
+ */
+ UWORD32 b2_intra_pred_size : 2;
+
+ /**
+ * Flag to signal if the current MB is IPCM
+ */
+ UWORD32 b1_ipcm : 1;
+
+}mb_t;
+
+/*****************************************************************************/
+/* Info from last TU row of MB is stored in a row level neighbour buffer */
+/* , which will be used for Boundary Strength computation */
+/*****************************************************************************/
+/**
+ * MB neighbor info
+ */
+typedef struct
+{
+ /**
+ * Slice index of the mb
+ */
+ UWORD16 u2_slice_idx;
+
+ /*************************************************************************/
+ /* CBF of bottom TU row (replicated in 4 pixel boundary) */
+ /* MSB contains CBF of first TU in the last row and LSB contains CBF */
+ /* of last TU in the last row */
+ /*************************************************************************/
+ /**
+ * CBF of bottom TU row
+ */
+ UWORD16 u2_packed_cbf;
+
+ /*************************************************************************/
+ /* QP of bottom TU row (replicated at 8 pixel boundary (Since QP can */
+ /* not change at less than min CU granularity) */
+ /*************************************************************************/
+ /**
+ * QP of bottom TU row
+ */
+ UWORD8 u1_qp;
+
+} mb_top_ny_info_t;
+
+/**
+ * MB level context
+ */
+typedef struct _mb_ctxt_t
+{
+ /*************************************************************************/
+ /* Tile boundary can be detected by looking at tile start x and tile */
+ /* start y. And based on the tile, slice and frame boundary the */
+ /* following will be initialized. */
+ /*************************************************************************/
+ /**
+ * Pointer to left MB
+ */
+ /* If not available, this will be set to NULL */
+ struct _mb_ctxt_t *ps_mb_left;
+
+ /**
+ * Pointer to top-left MB
+ */
+ /* If not available, this will be set to NULL */
+ mb_top_ny_info_t *ps_mb_ny_topleft;
+
+ /**
+ * Pointer to top MB
+ */
+ /* If not available, this will be set to NULL */
+ mb_top_ny_info_t *ps_mb_ny_top;
+
+ /**
+ * Pointer to top-right MB
+ */
+ /* If not available, this will be set to NULL */
+ mb_top_ny_info_t *ps_mb_ny_topright;
+
+ /*************************************************************************/
+ /* Pointer to PU data. */
+ /* This points to a MV Bank stored at frame level. Though this */
+ /* pointer can be derived by reading offset at frame level, it is */
+ /* stored here for faster access. Can be removed if storage of MB */
+ /* structure is critical */
+ /*************************************************************************/
+ /**
+ * Pointer to PU data
+ */
+ pu_t *ps_pu;
+
+ /*************************************************************************/
+ /* Pointer to a PU map stored at frame level, */
+ /* Though this pointer can be derived by multiplying MB address with */
+ /* number of minTUs in a MB, it is stored here for faster access. */
+ /* Can be removed if storage of MB structure is critical */
+ /*************************************************************************/
+ /**
+ * Pointer to a PU map stored at frame level
+ */
+ UWORD8 *pu1_pu_map;
+
+ /**
+ * Number of TUs filled in as_tu
+ */
+ /*************************************************************************/
+ /* Having the first entry as 32 bit data, helps in keeping each of */
+ /* the structures aligned to 32 bits at MB level */
+ /*************************************************************************/
+ WORD32 i4_tu_cnt;
+
+ /**
+ * Pointer to transform coeff data
+ */
+ /*************************************************************************/
+ /* Following format is repeated for every coded TU */
+ /* Luma Block */
+ /* num_coeffs : 16 bits */
+ /* zero_cols : 8 bits ( 1 bit per 4 columns) */
+ /* sig_coeff_map : ((TU Size * TU Size) + 31) >> 5 number of WORD32s */
+ /* coeff_data : Non zero coefficients */
+ /* Cb Block (only for last TU in 4x4 case else for every luma TU) */
+ /* num_coeffs : 16 bits */
+ /* zero_cols : 8 bits ( 1 bit per 4 columns) */
+ /* sig_coeff_map : ((TU Size * TU Size) + 31) >> 5 number of WORD32s */
+ /* coeff_data : Non zero coefficients */
+ /* Cr Block (only for last TU in 4x4 case else for every luma TU) */
+ /* num_coeffs : 16 bits */
+ /* zero_cols : 8 bits ( 1 bit per 4 columns) */
+ /* sig_coeff_map : ((TU Size * TU Size) + 31) >> 5 number of WORD32s */
+ /* coeff_data : Non zero coefficients */
+ /*************************************************************************/
+ void *pv_coeff_data;
+
+ /**
+ * Slice to which the MB belongs to
+ */
+ WORD32 i4_slice_idx;
+
+ /**
+ * MB column position
+ */
+ WORD32 i4_pos_x;
+
+ /**
+ * MB row position
+ */
+ WORD32 i4_pos_y;
+
+ /**
+ * Number of PUs filled in ps_pu
+ */
+ WORD32 i4_pu_cnt;
+
+ /**
+ * Index of current PU being processed in ps_pu
+ */
+ /* Scratch variable set to 0 at the start of any PU processing function */
+ WORD32 i4_pu_idx;
+
+ /**
+ * Vertical Boundary strength
+ */
+ /* Two bits per edge.
+ Stored in format. BS[15] | BS[14] | .. |BS[0]*/
+ UWORD32 *pu4_vert_bs;
+
+ /**
+ * Horizontal Boundary strength
+ */
+
+ /* Two bits per edge.
+ Stored in format. BS[15] | BS[14] | .. |BS[0]*/
+ UWORD32 *pu4_horz_bs;
+
+ /**
+ * Qp array stored for each 8x8 pixels
+ */
+ UWORD8 *pu1_qp;
+
+ /**
+ * Pointer to current frame's pu_t array
+ */
+ pu_t *ps_frm_pu;
+
+ /**
+ * Pointer to current frame's pu_t index array, which stores starting index
+ * of pu_t for every MB
+ */
+ UWORD32 *pu4_frm_pu_idx;
+
+ /**
+ * Pointer to current frame's pu map array
+ */
+ UWORD8 *pu1_frm_pu_map;
+
+ /*************************************************************************/
+ /* Need to add encoder specific elements for identifying the order of */
+ /* coding for CU, TU and PU if any */
+ /*************************************************************************/
+} mb_ctxt_t;
+
+/*************************************************************************/
+/* The following describes how each of the CU cases are handled */
+/*************************************************************************/
+
+/*************************************************************************/
+/* For SKIP MB */
+/* One Inter PU with appropriate MV */
+/* One TU which says CBP is zero and size is 16x16 */
+/*************************************************************************/
+
+/*************************************************************************/
+/* For Inter MB */
+/* M Inter PU with appropriate MVs (M between 1 to 4) */
+/* Number of TUs derived based on transform size */
+/*************************************************************************/
+
+/*************************************************************************/
+/* For Intra MB */
+/* Number of TUs derived based on transform size */
+/* N Intra Modes are signaled along with coeff data at the start */
+/*************************************************************************/
+
+/*************************************************************************/
+/* For Intra PCM MB */
+/* One TU which says ipcm is 1 */
+/*************************************************************************/
+
+
+
+/**
+ * Structure to hold quantization parameters of an mb
+ */
+typedef struct
+{
+
+ /*
+ * mb qp
+ */
+ UWORD8 u1_mb_qp;
+
+ /*
+ * mb qp / 6
+ */
+ UWORD8 u1_qp_div;
+
+ /*
+ * mb qp mod 6
+ */
+ UWORD8 u1_qp_rem;
+
+ /*
+ * QP bits
+ */
+ UWORD8 u1_qbits;
+
+ /*
+ * forward scale matrix
+ */
+ const UWORD16 *pu2_scale_mat;
+
+ /*
+ * threshold matrix for quantization
+ */
+ UWORD16 *pu2_thres_mat;
+
+ /*
+ * Threshold to compare the sad with
+ */
+ UWORD16 *pu2_sad_thrsh;
+
+ /*
+ * qp dependent rounding constant
+ */
+ UWORD32 u4_dead_zone;
+
+ /*
+ * inverse scale matrix
+ */
+ const UWORD16 *pu2_iscale_mat;
+
+ /*
+ * Weight matrix in iquant
+ */
+ UWORD16 *pu2_weigh_mat;
+
+}quant_params_t;
+
+/**
+ * Structure to hold Profile tier level info for a given layer
+ */
+
+typedef struct
+{
+ /**
+ * NAL unit type
+ */
+ WORD8 i1_nal_unit_type;
+
+ /**
+ * NAL ref idc
+ */
+ WORD8 i1_nal_ref_idc;
+
+
+} nal_header_t;
+
+/**
+ * HRD parameters Info
+ */
+typedef struct
+{
+ /**
+ * Specifies the number of alternative CPB specifications in the
+ * bitstream
+ */
+ UWORD8 u1_cpb_cnt_minus1;
+
+ /**
+ * (together with bit_rate_value_minus1) specifies the
+ * maximum input bit rate of the i-th CPB
+ */
+ UWORD32 u4_bit_rate_scale;
+
+ /**
+ * (together with cpb_size_du_value_minus1) specifies
+ * CPB size of the i-th CPB when the CPB operates
+ * at the access unit level
+ */
+ UWORD32 u4_cpb_size_scale;
+
+ /**
+ * (together with bit_rate_scale) specifies the
+ * maximum input bit rate for the i-th CPB
+ */
+ UWORD32 au4_bit_rate_value_minus1[32];
+ /**
+ * together with cpb_size_scale to specify the
+ * CPB size when the CPB operates at the access unit level.
+ */
+ UWORD32 au4_cpb_size_value_minus1[32];
+
+ /**
+ * if 1, specifies that the HSS operates in a constant bit rate (CBR) mode
+ * if 0, specifies that the HSS operates in a intermittent bit rate (CBR) mode
+ */
+ UWORD8 au1_cbr_flag[32];
+
+
+ /**
+ * specifies the length, in bits for initial cpb delay (nal/vcl)syntax in bp sei
+ */
+ UWORD8 u1_initial_cpb_removal_delay_length_minus1;
+
+ /**
+ * specifies the length, in bits for the cpb delay syntax in pt_sei
+ */
+ UWORD8 u1_cpb_removal_delay_length_minus1;
+
+ /**
+ * specifies the length, in bits, of the pic_dpb_output_delay syntax element in the pt SEI message
+ */
+ UWORD8 u1_dpb_output_delay_length_minus1;
+
+ /**
+ * Specifies length of the time offset parameter
+ */
+ UWORD8 u1_time_offset_length;
+
+}hrd_params_t;
+
+
+/**
+ * Structure to hold VUI parameters Info
+ */
+typedef struct
+{
+ /**
+ * indicates the presence of aspect_ratio
+ */
+ UWORD8 u1_aspect_ratio_info_present_flag;
+
+ /**
+ * specifies the aspect ratio of the luma samples
+ */
+ UWORD8 u1_aspect_ratio_idc;
+
+ /**
+ * width of the luma samples. user dependent
+ */
+ UWORD16 u2_sar_width;
+
+ /**
+ * Height of the luma samples. user dependent
+ */
+ UWORD16 u2_sar_height;
+
+ /**
+ * if 1, specifies that the overscan_appropriate_flag is present
+ * if 0, the preferred display method for the video signal is unspecified
+ */
+ UWORD8 u1_overscan_info_present_flag;
+
+ /**
+ * if 1,indicates that the cropped decoded pictures output
+ * are suitable for display using overscan
+ */
+ UWORD8 u1_overscan_appropriate_flag;
+
+ /**
+ * if 1 specifies that video_format, video_full_range_flag and
+ * colour_description_present_flag are present
+ */
+ UWORD8 u1_video_signal_type_present_flag;
+
+ /**
+ * pal, secam, ntsc, ...
+ */
+ UWORD8 u1_video_format;
+
+ /**
+ * indicates the black level and range of the luma and chroma signals
+ */
+ UWORD8 u1_video_full_range_flag;
+
+ /**
+ * if 1,to 1 specifies that colour_primaries, transfer_characteristics
+ * and matrix_coefficients are present
+ */
+ UWORD8 u1_colour_description_present_flag;
+
+ /**
+ * indicates the chromaticity coordinates of the source primaries
+ */
+ UWORD8 u1_colour_primaries;
+
+ /**
+ * indicates the opto-electronic transfer characteristic of the source picture
+ */
+ UWORD8 u1_transfer_characteristics;
+
+ /**
+ * the matrix coefficients used in deriving luma and chroma signals
+ * from the green, blue, and red primaries
+ */
+ UWORD8 u1_matrix_coefficients;
+
+ /**
+ * if 1, specifies that chroma_sample_loc_type_top_field and
+ * chroma_sample_loc_type_bottom_field are present
+ */
+ UWORD8 u1_chroma_loc_info_present_flag;
+
+ /**
+ * location of chroma samples
+ */
+ UWORD8 u1_chroma_sample_loc_type_top_field;
+
+ UWORD8 u1_chroma_sample_loc_type_bottom_field;
+
+ /**
+ * Indicates the presence of the
+ * num_units_in_ticks, time_scale flag
+ */
+ UWORD8 u1_vui_timing_info_present_flag;
+
+ /**
+ * Number of units that
+ * correspond to one increment of the
+ * clock. Indicates the resolution
+ */
+ UWORD32 u4_vui_num_units_in_tick;
+
+ /**
+ * The number of time units that pass in one second
+ */
+ UWORD32 u4_vui_time_scale;
+
+ /**
+ * Flag indicating that time difference between two frames is a constant
+ */
+ UWORD8 u1_fixed_frame_rate_flag;
+
+ /**
+ * Indicates the presence of NAL HRD parameters
+ */
+ UWORD8 u1_nal_hrd_parameters_present_flag;
+
+ /**
+ * NAL level HRD parameters
+ */
+ hrd_params_t s_nal_hrd_parameters;
+
+ /**
+ * Indicates the presence of VCL HRD parameters
+ */
+ UWORD8 u1_vcl_hrd_parameters_present_flag;
+
+ /**
+ * VCL level HRD parameters
+ */
+ hrd_params_t s_vcl_hrd_parameters;
+
+ /**
+ * Specifies the HRD operational mode
+ */
+ UWORD8 u1_low_delay_hrd_flag;
+
+ /**
+ * Indicates presence of SEI messages which include pic_struct syntax element
+ */
+ UWORD8 u1_pic_struct_present_flag;
+
+ /**
+ * 1, specifies that the following cvs bitstream restriction parameters are present
+ */
+ UWORD8 u1_bitstream_restriction_flag;
+
+ /**
+ * if 0, indicates that no pel outside the pic boundaries and
+ * no sub-pels derived using pels outside the pic boundaries is used for inter prediction
+ */
+ UWORD8 u1_motion_vectors_over_pic_boundaries_flag;
+
+ /**
+ * Indicates a number of bytes not exceeded by the sum of the sizes of the VCL NAL units
+ * associated with any coded picture
+ */
+ UWORD8 u1_max_bytes_per_pic_denom;
+
+ /**
+ * Indicates an upper bound for the number of bits of coding_unit() data
+ */
+ UWORD8 u1_max_bits_per_mb_denom;
+
+ /**
+ * Indicate the maximum absolute value of a decoded horizontal MV component
+ * in quarter-pel luma units
+ */
+ UWORD8 u1_log2_max_mv_length_horizontal;
+
+ /**
+ * Indicate the maximum absolute value of a decoded vertical MV component
+ * in quarter-pel luma units
+ */
+ UWORD8 u1_log2_max_mv_length_vertical;
+
+ /**
+ * Max number of frames that are not synchronized in display and decode order
+ */
+ UWORD8 u1_num_reorder_frames;
+
+ /**
+ * specifies required size of the HRD DPB in units of frame buffers.
+ */
+ UWORD8 u1_max_dec_frame_buffering;
+
+} vui_t;
+
+
+/**
+ * Structure to hold SPS info
+ */
+typedef struct
+{
+ /**
+ * profile_idc
+ */
+ UWORD8 u1_profile_idc;
+
+ /** constraint_set0_flag */
+ UWORD8 u1_constraint_set0_flag;
+
+ /** constraint_set1_flag */
+ UWORD8 u1_constraint_set1_flag;
+
+ /** constraint_set2_flag */
+ UWORD8 u1_constraint_set2_flag;
+
+ /** constraint_set3_flag */
+ UWORD8 u1_constraint_set3_flag;
+
+ /**
+ * level_idc
+ */
+ UWORD8 u1_level_idc;
+
+ /**
+ * seq_parameter_set_id
+ */
+ UWORD8 u1_sps_id;
+
+
+ /**
+ * chroma_format_idc
+ */
+ UWORD8 u1_chroma_format_idc;
+
+ /**
+ * residual_colour_transform_flag
+ */
+ WORD8 i1_residual_colour_transform_flag;
+
+ /**
+ * bit_depth_luma_minus8
+ */
+ WORD8 i1_bit_depth_luma;
+
+ /**
+ * bit_depth_chroma_minus8
+ */
+ WORD8 i1_bit_depth_chroma;
+
+ /**
+ * qpprime_y_zero_transform_bypass_flag
+ */
+ WORD8 i1_qpprime_y_zero_transform_bypass_flag;
+
+ /**
+ * seq_scaling_matrix_present_flag
+ */
+ WORD8 i1_seq_scaling_matrix_present_flag;
+
+ /**
+ * seq_scaling_list_present_flag
+ */
+ WORD8 ai1_seq_scaling_list_present_flag[8];
+
+ /**
+ * log2_max_frame_num_minus4
+ */
+ WORD8 i1_log2_max_frame_num;
+
+ /**
+ * MaxFrameNum in the standard
+ * 1 << i1_log2_max_frame_num
+ */
+ WORD32 i4_max_frame_num;
+
+ /**
+ * pic_order_cnt_type
+ */
+ WORD8 i1_pic_order_cnt_type;
+
+ /**
+ * log2_max_pic_order_cnt_lsb_minus4
+ */
+ WORD8 i1_log2_max_pic_order_cnt_lsb;
+
+ /**
+ * MaxPicOrderCntLsb in the standard.
+ * 1 << log2_max_pic_order_cnt_lsb_minus4
+ */
+ WORD32 i4_max_pic_order_cnt_lsb;
+
+ /**
+ * delta_pic_order_always_zero_flag
+ */
+ WORD8 i1_delta_pic_order_always_zero_flag;
+
+ /**
+ * offset_for_non_ref_pic
+ */
+ WORD32 i4_offset_for_non_ref_pic;
+
+ /**
+ * offset_for_top_to_bottom_field
+ */
+ WORD32 i4_offset_for_top_to_bottom_field;
+
+ /**
+ * num_ref_frames_in_pic_order_cnt_cycle
+ */
+ UWORD8 u1_num_ref_frames_in_pic_order_cnt_cycle;
+
+ /**
+ * Offset_for_ref_frame
+ */
+ WORD32 ai4_offset_for_ref_frame[256];
+
+ /**
+ * max_num_ref_frames
+ */
+ UWORD8 u1_max_num_ref_frames;
+
+ /**
+ * gaps_in_frame_num_value_allowed_flag
+ */
+ WORD8 i1_gaps_in_frame_num_value_allowed_flag;
+
+ /**
+ * pic_width_in_mbs_minus1
+ */
+ WORD16 i2_pic_width_in_mbs_minus1;
+
+ /**
+ * pic_height_in_map_units_minus1
+ */
+ WORD16 i2_pic_height_in_map_units_minus1;
+
+ /**
+ * frame_mbs_only_flag
+ */
+ WORD8 i1_frame_mbs_only_flag;
+
+ /**
+ * mb_adaptive_frame_field_flag
+ */
+ WORD8 i1_mb_adaptive_frame_field_flag;
+
+ /**
+ * direct_8x8_inference_flag
+ */
+ WORD8 i1_direct_8x8_inference_flag;
+
+ /**
+ * frame_cropping_flag
+ */
+ WORD8 i1_frame_cropping_flag;
+
+ /**
+ * frame_crop_left_offset
+ */
+ WORD16 i2_frame_crop_left_offset;
+
+ /**
+ * frame_crop_right_offset
+ */
+ WORD16 i2_frame_crop_right_offset;
+
+ /**
+ * frame_crop_top_offset
+ */
+ WORD16 i2_frame_crop_top_offset;
+
+ /**
+ * frame_crop_bottom_offset
+ */
+ WORD16 i2_frame_crop_bottom_offset;
+
+ /**
+ * vui_parameters_present_flag
+ */
+ WORD8 i1_vui_parameters_present_flag;
+
+ /**
+ * vui_parameters_Structure_info
+ */
+ vui_t s_vui_parameters;
+
+ /**
+ * Flag to give status of SPS structure
+ */
+ WORD8 i1_sps_valid;
+
+ /**
+ * Coded Picture width
+ */
+ WORD32 i2_pic_wd;
+
+ /**
+ * Coded Picture height
+ */
+ WORD32 i2_pic_ht;
+
+ /**
+ * Picture width in MB units
+ */
+
+ WORD16 i2_pic_wd_in_mb;
+
+ /**
+ * Picture height in MB units
+ */
+
+ WORD16 i2_pic_ht_in_mb;
+
+ /**
+ * useDefaultScalingMatrixFlag
+ */
+ WORD8 ai1_use_default_scaling_matrix_flag[8];
+
+ /**
+ * 4x4 Scaling lists after inverse zig zag scan
+ */
+ UWORD16 au2_4x4_weight_scale[6][16];
+
+ /**
+ * 4x4 Scaling lists after inverse zig zag scan
+ */
+ UWORD16 au2_8x8_weight_scale[2][64];
+
+} sps_t;
+
+
+/**
+ * Structure to hold PPS info
+ */
+typedef struct
+{
+ /**
+ * pic_parameter_set_id
+ */
+ UWORD8 u1_pps_id;
+
+ /**
+ * seq_parameter_set_id
+ */
+ UWORD8 u1_sps_id;
+
+ /**
+ * Entropy coding : 0-VLC; 1 - CABAC
+ */
+ UWORD8 u1_entropy_coding_mode_flag;
+
+ /*
+ * Pic order present flag
+ */
+ UWORD8 u1_pic_order_present_flag;
+
+ /*
+ * Number of slice groups
+ */
+ UWORD8 u1_num_slice_groups;
+
+ /*
+ * Slice group map type
+ */
+ UWORD8 u1_slice_group_map_type;
+
+ /*
+ * Maximum reference picture index in the reference list 0 : range [0 - 31]
+ */
+ WORD8 i1_num_ref_idx_l0_default_active;
+
+ /*
+ * Maximum reference picture index in the reference list 1 : range [0 - 31]
+ */
+ WORD8 i1_num_ref_idx_l1_default_active;
+
+ /**
+ * weighted_pred_flag
+ */
+ WORD8 i1_weighted_pred_flag;
+
+ /**
+ * weighted_bipred_flag
+ */
+ WORD8 i1_weighted_bipred_idc;
+
+ /**
+ * pic_init_qp_minus26
+ */
+ WORD8 i1_pic_init_qp;
+
+ /**
+ * pic_init_qs_minus26
+ */
+ WORD8 i1_pic_init_qs;
+
+ /*
+ * Chroma QP offset w.r.t QPY {-12,12}
+ */
+ WORD8 i1_chroma_qp_index_offset;
+
+ /**
+ * deblocking_filter_control_present_flag
+ */
+ WORD8 i1_deblocking_filter_control_present_flag;
+
+ /**
+ * constrained_intra_pred_flag
+ */
+ WORD8 i1_constrained_intra_pred_flag;
+
+ /**
+ * redundant_pic_cnt_present_flag
+ */
+ WORD8 i1_redundant_pic_cnt_present_flag;
+
+ /**
+ * transform_8x8_mode_flag
+ */
+ WORD8 i1_transform_8x8_mode_flag;
+
+ /**
+ * pic_scaling_matrix_present_flag
+ */
+ WORD8 i1_pic_scaling_matrix_present_flag;
+
+ /*
+ * Second chroma QP offset
+ */
+ WORD8 i1_second_chroma_qp_index_offset;
+
+
+ /**
+ * useDefaultScalingMatrixFlag
+ */
+ WORD8 ai1_use_default_scaling_matrix_flag[8];
+
+ /**
+ * 4x4 Scaling lists after inverse zig zag scan
+ */
+ UWORD16 au2_4x4_weight_scale[6][16];
+
+ /**
+ * 4x4 Scaling lists after inverse zig zag scan
+ */
+ UWORD16 au2_8x8_weight_scale[2][64];
+
+
+ /**
+ * pic_scaling_list_present_flag
+ */
+ WORD8 ai1_pic_scaling_list_present_flag[8];
+
+ /**
+ * Flag to give status of PPS structure
+ */
+ WORD8 i1_pps_valid;
+
+
+} pps_t;
+
+/**
+ * MMCO commands and params.
+ */
+typedef struct
+{
+ /* memory management control operation command */
+ UWORD8 u1_memory_management_control_operation;
+
+ /*
+ * Contains difference of pic nums of short-term pic/frame
+ * 1. To signal it as "unused for reference" if mmco = 1
+ * 2. To signal it as "used for long-term reference" if mmco = 3
+ */
+ UWORD32 u4_difference_of_pic_nums_minus1;
+
+ /* Long-term pic num to be set as "unused for reference" */
+ UWORD8 u1_long_term_pic_num;
+
+ /*
+ * Assign a long-term idx to a picture as follows
+ * 1. Assign to a short-term pic if mmco = 3
+ * 2. Assign to the current pic if mmco = 6
+ */
+ UWORD8 u1_long_term_frame_idx;
+
+ /*
+ * The max long-term idx. The long-term pics having idx above
+ * are set as "unused for reference
+ */
+ UWORD8 u1_max_long_term_frame_idx_plus1;
+
+}mmco_prms_t;
+
+/**
+ * Structure to hold Reference picture list modification info
+ */
+typedef struct
+{
+ /* ref_pic_list_modification_flag_l0 */
+ WORD8 i1_ref_pic_list_modification_flag_l0;
+
+ /* Modification required in list0 */
+ WORD8 i1_modification_of_pic_nums_idc_l0[MAX_MODICATION_IDC];
+
+ /*
+ * The absolute difference between the picture number of
+ * the picture being moved to the current index in
+ * list0 and the picture number prediction value
+ */
+ UWORD32 u4_abs_diff_pic_num_minus1_l0[MAX_MODICATION_IDC];
+
+ /*
+ * The long-term picture number of the picture being moved
+ * to the current index in list0
+ */
+ UWORD8 u1_long_term_pic_num_l0[MAX_MODICATION_IDC];
+
+ /* ref_pic_list_modification_flag_l1 */
+ WORD8 i1_ref_pic_list_modification_flag_l1;
+
+ /* Modification required in list1 */
+ WORD8 i1_modification_of_pic_nums_idc_l1[MAX_MODICATION_IDC];
+
+ /*
+ * The absolute difference between the picture number of
+ * the picture being moved to the current index in
+ * list1 and the picture number prediction value
+ */
+ UWORD32 u4_abs_diff_pic_num_minus1_l1[MAX_MODICATION_IDC];
+
+ /*
+ * The long-term picture number of the picture being moved
+ * to the current index in list1
+ */
+ UWORD8 u1_long_term_pic_num_l1[MAX_MODICATION_IDC];
+}rplm_t;
+
+/**
+ * Structure to hold Slice Header info
+ */
+typedef struct
+{
+
+ /*
+ * nal_unit_type
+ */
+ WORD8 i1_nal_unit_type;
+
+ /*
+ * nal_unit_idc
+ */
+ WORD8 i1_nal_unit_idc;
+
+ /*
+ * first_mb_in_slice
+ */
+ UWORD16 u2_first_mb_in_slice;
+
+ /*
+ * slice_type
+ */
+ UWORD8 u1_slice_type;
+
+ /*
+ * pic_parameter_set_id
+ */
+ UWORD8 u1_pps_id;
+
+ /*
+ * frame_num
+ */
+ WORD32 i4_frame_num;
+
+ /*
+ * field_pic_flag
+ */
+ WORD8 i1_field_pic_flag;
+
+ /*
+ * bottom_field_flag
+ */
+ WORD8 i1_bottom_field_flag;
+
+ /*
+ * second_field
+ */
+ WORD8 i1_second_field_flag;
+
+ /*
+ * idr_pic_id
+ */
+ UWORD16 u2_idr_pic_id ;
+
+ /*
+ * pic_order_cnt_lsb
+ */
+ UWORD16 i4_pic_order_cnt_lsb;
+
+ /*
+ * delta_pic_order_cnt_bottom
+ */
+ WORD32 i4_delta_pic_order_cnt_bottom;
+
+ /*
+ * delta_pic_order_cnt
+ */
+ WORD32 ai4_delta_pic_order_cnt[2];
+
+ /*
+ * redundant_pic_cnt
+ */
+ UWORD8 u1_redundant_pic_cnt;
+
+ /*
+ * direct_spatial_mv_pred_flag
+ */
+ UWORD8 u1_direct_spatial_mv_pred_flag;
+
+ /*
+ * num_ref_idx_active_override_flag
+ */
+ UWORD8 u1_num_ref_idx_active_override_flag;
+
+ /*
+ * num_ref_idx_l0_active
+ */
+ WORD8 i1_num_ref_idx_l0_active;
+
+ /*
+ * num_ref_idx_l1_active_minus1
+ */
+ WORD8 i1_num_ref_idx_l1_active;
+
+ /*
+ * ref_pic_list_reordering_flag_l0
+ */
+ UWORD8 u1_ref_idx_reordering_flag_l0;
+
+ /**
+ * Reference prediction list modification
+ */
+ rplm_t s_rplm;
+
+ /**
+ * L0 Reference pic lists
+ */
+ ref_list_t as_ref_pic_list0[MAX_DPB_SIZE];
+
+ /**
+ * L1 Reference pic lists
+ */
+ ref_list_t as_ref_pic_list1[MAX_DPB_SIZE];
+
+ /*
+ * weighted_bipred_idc
+ */
+ WORD8 u1_weighted_bipred_idc;
+
+ /*
+ * no_output_of_prior_pics_flag
+ */
+ UWORD8 u1_no_output_of_prior_pics_flag;
+
+ /*
+ * long_term_reference_flag
+ */
+ UWORD8 u1_long_term_reference_flag;
+
+ /*
+ * adaptive_ref_pic_marking_mode_flag
+ */
+ UWORD8 u1_adaptive_ref_pic_marking_mode_flag;
+
+ /*
+ * Array to structures to store mmco commands
+ * and parameters.
+ */
+ mmco_prms_t as_mmco_prms[MAX_MMCO_COMMANDS];
+
+ /*
+ * entropy_coding_mode_flag
+ */
+ WORD8 u1_entropy_coding_mode_flag;
+
+ /*
+ * cabac_init_idc
+ */
+ WORD8 i1_cabac_init_idc;
+
+ /*
+ * i1_slice_qp
+ */
+ WORD8 i1_slice_qp;
+
+ /*
+ * sp_for_switch_flag
+ */
+ UWORD8 u1_sp_for_switch_flag;
+
+ /*
+ * slice_qs_delta
+ */
+ UWORD8 u1_slice_qs;
+
+ /*
+ * disable_deblocking_filter_idc
+ */
+ WORD8 u1_disable_deblocking_filter_idc;
+
+ /*
+ * slice_alpha_c0_offset_div2
+ */
+ WORD8 i1_slice_alpha_c0_offset_div2;
+
+ /*
+ * slice_beta_offset_div2
+ */
+ WORD8 i1_slice_beta_offset_div2;
+
+ /*
+ * num_slice_groups_minus1
+ */
+ WORD8 u1_num_slice_groups_minus1;
+
+ /*
+ * slice_group_change_cycle
+ */
+ WORD8 u1_slice_group_change_cycle;
+
+ /**
+ * Start MB X
+ */
+ UWORD16 i2_mb_x;
+
+ /**
+ * Start MB Y
+ */
+ UWORD16 i2_mb_y;
+
+ /**
+ * Absolute POC. Contains minimum of top and bottom POC.
+ */
+ WORD32 i4_abs_pic_order_cnt;
+
+ /**
+ * Absolute top POC. Contains top poc for frame or top
+ * field. Invalid for bottom field.
+ */
+ WORD32 i4_abs_top_pic_order_cnt;
+
+ /**
+ * Absolute top POC. Contains bottom poc for frame or bottom
+ * field. Invalid for top field.
+ */
+ WORD32 i4_abs_bottom_pic_order_cnt;
+
+ /** Flag signaling if the current slice is ref slice */
+ UWORD8 i1_nal_ref_idc;
+
+ /** Flag to indicate if the current slice is MBAFF Frame */
+ UWORD8 u1_mbaff_frame_flag;
+
+ /** luma_log2_weight_denom */
+ UWORD8 u1_luma_log2_weight_denom;
+
+ /** chroma_log2_weight_denom */
+ UWORD8 u1_chroma_log2_weight_denom;
+
+ /** luma_weight_l0_flag */
+ UWORD8 au1_luma_weight_l0_flag[MAX_DPB_SIZE];
+
+ /** luma_weight_l0 : (-128, 127 )is the range of weights
+ * when weighted pred is enabled, 128 is default value */
+ WORD16 ai2_luma_weight_l0[MAX_DPB_SIZE];
+
+ /** luma_offset_l0 : (-128, 127 )is the range of offset
+ * when weighted pred is enabled, 0 is default value */
+ WORD8 ai1_luma_offset_l0[MAX_DPB_SIZE];
+
+ /** chroma_weight_l0_flag */
+ UWORD8 au1_chroma_weight_l0_flag[MAX_DPB_SIZE];
+
+ /** chroma_weight_l0 : (-128, 127 )is the range of weights
+ * when weighted pred is enabled, 128 is default value*/
+ WORD16 ai2_chroma_weight_l0[MAX_DPB_SIZE][2];
+
+ /** chroma_offset_l0 : (-128, 127 )is the range of offset
+ * when weighted pred is enabled, 0 is default value*/
+ WORD8 ai1_chroma_offset_l0[MAX_DPB_SIZE][2];
+
+ /** luma_weight_l0_flag */
+ UWORD8 au1_luma_weight_l1_flag[MAX_DPB_SIZE];
+
+ /** luma_weight_l1 : (-128, 127 )is the range of weights
+ * when weighted pred is enabled, 128 is default value */
+ WORD16 ai2_luma_weight_l1[MAX_DPB_SIZE];
+
+ /** luma_offset_l1 : (-128, 127 )is the range of offset
+ * when weighted pred is enabled, 0 is default value */
+ WORD8 ai1_luma_offset_l1[MAX_DPB_SIZE];
+
+ /** chroma_weight_l1_flag */
+ UWORD8 au1_chroma_weight_l1_flag[MAX_DPB_SIZE];
+
+ /** chroma_weight_l1 : (-128, 127 )is the range of weights
+ * when weighted pred is enabled, 128 is default value */
+ WORD16 ai2_chroma_weight_l1[MAX_DPB_SIZE][2];
+
+ /** chroma_offset_l1 :(-128, 127 )is the range of offset
+ * when weighted pred is enabled, 0 is default value */
+ WORD8 ai1_chroma_offset_l1[MAX_DPB_SIZE][2];
+}slice_header_t;
+
+
+/*****************************************************************************/
+/* The following can be used to type cast coefficient data that is stored */
+/* per subblock. Note that though i2_level is shown as an array that */
+/* holds 16 coefficients, only the first few entries will be valid. Next */
+/* subblocks data starts after the valid number of coefficients. Number */
+/* of non-zero coefficients will be derived using number of non-zero bits */
+/* in sig coeff map */
+/*****************************************************************************/
+
+/**
+ * Structure to hold coefficient info for a 2x2 chroma DC transform
+ */
+typedef struct
+{
+ /**
+ * significant coefficient map
+ */
+ UWORD8 u1_sig_coeff_map;
+
+ /**
+ * sub block position
+ */
+ UWORD8 u1_subblk_pos;
+
+ /**
+ * holds coefficients
+ */
+ WORD16 ai2_level[2 * 2];
+}tu_sblk2x2_coeff_data_t;
+
+/**
+ * Structure to hold coefficient info for a 4x4 transform
+ */
+typedef struct
+{
+ /**
+ * significant coefficient map
+ */
+ UWORD16 u2_sig_coeff_map;
+
+ /**
+ * sub block position
+ */
+ UWORD16 u2_subblk_pos;
+
+ /**
+ * holds coefficients
+ */
+ WORD16 ai2_level[SUBBLK_COEFF_CNT];
+}tu_sblk4x4_coeff_data_t;
+
+/**
+ * Structure to hold coefficient info for a 8x8 transform
+ */
+typedef struct
+{
+
+ /**
+ * significant coefficient map
+ */
+ UWORD32 au4_sig_coeff_map[2];
+
+ /**
+ * sub block position
+ */
+ UWORD16 u2_subblk_pos;
+
+ /**
+ * holds coefficients
+ */
+ WORD16 ai2_level[TRANS_SIZE_8 * TRANS_SIZE_8];
+}tu_blk8x8_coeff_data_t;
+
+
+/**
+ * Structure to hold coefficient info for a 16x16 IPCM MB
+ */
+typedef struct
+{
+ /**
+ * holds coefficients
+ */
+ UWORD8 au1_level[MB_SIZE * MB_SIZE * 3 / 2];
+}tu_ipcm_coeff_data_t;
+
+
+typedef struct
+{
+ /**
+ * Transform sizes 0: 4x4, 1: 8x8,
+ */
+ UWORD32 b1_trans_size : 1;
+
+ /**
+ * Flag to signal if the current MB is IPCM
+ */
+ UWORD32 b1_ipcm : 1;
+
+ /**
+ * Intra pred sizes 0: 4x4, 1: 8x8, 2: 16x16
+ */
+ UWORD32 b2_intra_pred_size : 2;
+
+ /**
+ * Chroma intra mode
+ */
+ UWORD32 b2_intra_chroma_pred_mode: 2;
+
+ /**
+ * Number of coded subblocks in the current MB, for which
+ * tu data is sent. Maximum of 27 subblocks in the following
+ * order.
+ * 1 4x4 luma DC(for intra16x16),
+ * 16 4x4 luma,
+ * 2 2x2 chroma DC,
+ * 8 4x4 chroma,
+ */
+ WORD32 b5_num_coded_sblks: 5;
+
+ /**
+ * Flag to signal if 4x4 subblock for DC values (in INTRA 16x16 MB)
+ * is coded
+ */
+ UWORD32 b1_luma_dc_coded: 1;
+
+ /**
+ * Flag to signal if 4x4 subblock for DC values (in INTRA 16x16 MB)
+ * is coded
+ */
+ UWORD32 b1_chroma_dc_coded: 1;
+
+ /**
+ * CSBP - 16 bits, 1 bit for each 4x4
+ * for intra16x16 mb_type only ac coefficients are
+ */
+ UWORD32 b16_luma_csbp: 16;
+
+ /**
+ * CSBP - 16 bits, 1 bit for each 4x4
+ * for intra16x16 mb_type only ac coefficients are
+ */
+ UWORD32 b8_chroma_csbp: 8;
+
+ /**
+ * Luma Intra pred modes,
+ * Based on intra pred size either 16, 4 or 1 entry will be
+ * populated below.
+ */
+ UWORD8 au1_luma_intra_modes[16];
+
+}intra_mb_t;
+
+
+typedef struct
+{
+ /**
+ * Transform sizes 0: 4x4, 1: 8x8,
+ */
+ UWORD8 b1_trans_size : 1;
+
+
+ /**
+ * Skip flag
+ */
+ UWORD8 b1_skip : 1;
+
+
+ /**
+ * Number of coded subblocks in the current MB, for which
+ * tu data is sent. Maximum of 26 subblocks in the following
+ * order.
+ * 16 4x4 luma,
+ * 2 2x2 chroma DC,
+ * 8 4x4 chroma,
+ */
+ WORD32 b5_num_coded_sblks: 5;
+
+ /**
+ * CSBP - 16 bits, 1 bit for each 4x4
+ * for intra16x16 mb_type only ac coefficients are
+ */
+ UWORD32 b16_luma_csbp: 16;
+
+ /**
+ * CSBP - 16 bits, 1 bit for each 4x4
+ * for intra16x16 mb_type only ac coefficients are
+ */
+ UWORD32 b16_chroma_csbp: 8;
+}inter_mb_t;
+
+#endif /* _IH264_STRUCTS_H_ */
diff --git a/common/ih264_trans_data.c b/common/ih264_trans_data.c
new file mode 100755
index 0000000..a1231e6
--- /dev/null
+++ b/common/ih264_trans_data.c
@@ -0,0 +1,312 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_trans_data.c
+ *
+ * @brief
+ * Contains definition of global variables for H264 encoder
+ *
+ * @author
+ * Ittiam
+ *
+ * @remarks
+ *
+ *******************************************************************************
+ */
+
+#include "ih264_typedefs.h"
+#include "ih264_trans_data.h"
+
+/*****************************************************************************/
+/* Extern global definitions */
+/*****************************************************************************/
+
+/*
+ * Since we don't have a division operation in neon
+ * we will multiply by LCM of 16,6,10 and scale accordingly
+ * so care that to get the actual transform you need to divide by LCM
+ * LCM = 240
+ */
+
+const UWORD16 g_scal_coff_h264_4x4[16] ={
+ 15,40,40,40,
+ 40,24,40,24,
+ 15,40,40,15,
+ 40,24,40,24};
+
+
+
+const UWORD16 g_scal_coff_h264_8x8[16]=
+ {
+ 16, 15, 20, 15,
+ 15, 14, 19, 14,
+ 20, 19, 25, 19,
+ 15, 14, 19, 14
+ };
+/*
+ * The scaling is by an 8x8 matrix, but due its 4x4 symmetry we can use
+ * a 4x4 matrix for scaling
+ * now since divide is to be avoided, we will compute 1/ values and scale it up
+ * to preserve information since our data is max 10 bit +1 sign bit we can shift a maximum of 21 bits up
+ * hence multiply the matrix as such
+{16.000 15.059 20.227 15.059
+15.059 14.173 19.051 14.173
+20.227 19.051 25.600 19.051
+15.059 14.173 19.051 14.173};
+{512, 544, 405, 544,
+544, 578, 430, 578,
+405, 430, 320, 430,
+544, 578, 430, 578};*/
+
+
+/**
+ ******************************************************************************
+ * @brief Scale Table for quantizing 4x4 subblock. To quantize a given 4x4 DCT
+ * transformed block, the coefficient at index location (i,j) is scaled by one of
+ * the constants in this table and right shift the result by (QP_BITS_h264_4x4 +
+ * floor(qp/6)), here qp is the quantization parameter used to quantize the mb.
+ *
+ * input : qp%6, index location (i,j)
+ * output : scale constant.
+ *
+ * @remarks 16 constants for each index position of the subblock and 6 for each
+ * qp%6 in the range 0-5 inclusive.
+ ******************************************************************************
+ */
+const UWORD16 gu2_quant_scale_matrix_4x4[96] =
+{
+ 13107, 8066, 13107, 8066,
+ 8066, 5243, 8066, 5243,
+ 13107, 8066, 13107, 8066,
+ 8066, 5243, 8066, 5243,
+
+ 11916, 7490, 11916, 7490,
+ 7490, 4660, 7490, 4660,
+ 11916, 7490, 11916, 7490,
+ 7490, 4660, 7490, 4660,
+
+ 10082, 6554, 10082, 6554,
+ 6554, 4194, 6554, 4194,
+ 10082, 6554, 10082, 6554,
+ 6554, 4194, 6554, 4194,
+
+ 9362, 5825, 9362, 5825,
+ 5825, 3647, 5825, 3647,
+ 9362, 5825, 9362, 5825,
+ 5825, 3647, 5825, 3647,
+
+ 8192, 5243, 8192, 5243,
+ 5243, 3355, 5243, 3355,
+ 8192, 5243, 8192, 5243,
+ 5243, 3355, 5243, 3355,
+
+ 7282, 4559, 7282, 4559,
+ 4559, 2893, 4559, 2893,
+ 7282, 4559, 7282, 4559,
+ 4559, 2893, 4559, 2893,
+
+};
+
+/**
+ ******************************************************************************
+ * @brief Round Factor for quantizing subblock. While quantizing a given 4x4 DCT
+ * transformed block, the coefficient at index location (i,j) is scaled by one of
+ * the constants in the table gu2_forward_quant_scalar_4x4 and then right shift
+ * the result by (QP_BITS_h264_4x4 + floor(qp/6)).
+ * Before right shifting a round factor is added.
+ * The round factor can be any value [a * (1 << (QP_BITS_h264_4x4 + floor(qp/6)))]
+ * for 'a' lies in the range 0-0.5.
+ * Here qp is the quantization parameter used to quantize the mb.
+ *
+ * input : qp/6
+ * output : round factor.
+ *
+ * @remarks The round factor is constructed by setting a = 1/3
+ *
+ * round factor constructed by setting a = 1/3
+ {
+ 10922, 21845, 43690, 87381,
+ 174762, 349525, 699050, 1398101,
+ 2796202,
+ }
+ *
+ * round factor constructed by setting a = 0.49
+ *{
+ 16056, 32112, 64225,
+ 128450, 256901, 513802,
+ 1027604, 2055208, 4110417,
+ };
+
+ * round factor constructed by setting a = 0.5
+ 16384, 32768, 65536,
+ 131072, 262144, 524288,
+ 1048576, 2097152, 4194304,
+
+ ******************************************************************************
+ */
+const UWORD32 gu4_forward_quant_round_factor_4x4[9] =
+{
+ 10922, 21845, 43690, 87381,
+ 174762, 349525, 699050, 1398101,
+ 2796202,
+};
+
+
+
+/**
+ ******************************************************************************
+ * @brief Threshold Table. Quantizing the given DCT coefficient is done only if
+ * it exceeds the threshold value presented in this table.
+ *
+ * input : qp/6, qp%6, index location (i,j)
+ * output : Threshold constant.
+ *
+ * @remarks 16 constants for each index position of the subblock and 6 for each
+ * qp%6 in the range 0-5 inclusive and 9 for each qp/6 in the range 0-51.
+ ******************************************************************************
+ */
+const UWORD16 gu2_forward_quant_threshold_4x4[96] =
+{
+ 426, 693, 426, 693,
+ 693, 1066, 693, 1066,
+ 426, 693, 426, 693,
+ 693, 1066, 693, 1066,
+
+ 469, 746, 469, 746,
+ 746, 1200, 746, 1200,
+ 469, 746, 469, 746,
+ 746, 1200, 746, 1200,
+
+ 554, 853, 554, 853,
+ 853, 1333, 853, 1333,
+ 554, 853, 554, 853,
+ 853, 1333, 853, 1333,
+
+ 597, 960, 597, 960,
+ 960, 1533, 960, 1533,
+ 597, 960, 597, 960,
+ 960, 1533, 960, 1533,
+
+ 682, 1066, 682, 1066,
+ 1066, 1666, 1066, 1666,
+ 682, 1066, 682, 1066,
+ 1066, 1666, 1066, 1666,
+
+ 767, 1226, 767, 1226,
+ 1226, 1933, 1226, 1933,
+ 767, 1226, 767, 1226,
+ 1226, 1933, 1226, 1933,
+};
+
+/**
+ ******************************************************************************
+ * @brief Scale Table for quantizing 8x8 subblock. To quantize a given 8x8 DCT
+ * transformed block, the coefficient at index location (i,j) is scaled by one of
+ * the constants in this table and right shift the result by (QP_BITS_h264_8x8 +
+ * floor(qp/6)), here qp is the quantization parameter used to quantize the mb.
+ *
+ * input : qp%6, index location (i,j)
+ * output : scale constant.
+ *
+ * @remarks 64 constants for each index position of the subblock and 6 for each
+ * qp%6 in the range 0-5 inclusive.
+ ******************************************************************************
+ */
+const UWORD16 gu2_quant_scale_matrix_8x8 [384] =
+{
+ 13107, 12222, 16777, 12222, 13107, 12222, 16777, 12222,
+ 12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428,
+ 16777, 15481, 20972, 15481, 16777, 15481, 20972, 15481,
+ 12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428,
+ 13107, 12222, 16777, 12222, 13107, 12222, 16777, 12222,
+ 12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428,
+ 16777, 15481, 20972, 15481, 16777, 15481, 20972, 15481,
+ 12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428,
+
+ 11916, 11058, 14980, 11058, 11916, 11058, 14980, 11058,
+ 11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826,
+ 14980, 14290, 19174, 14290, 14980, 14290, 19174, 14290,
+ 11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826,
+ 11916, 11058, 14980, 11058, 11916, 11058, 14980, 11058,
+ 11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826,
+ 14980, 14290, 19174, 14290, 14980, 14290, 19174, 14290,
+ 11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826,
+
+ 10082, 9675, 12710, 9675, 10082, 9675, 12710, 9675,
+ 9675, 8943, 11985, 8943, 9675, 8943, 11985, 8943,
+ 12710, 11985, 15978, 11985, 12710, 11985, 15978, 11985,
+ 9675, 8943, 11985, 8943, 9675, 8943, 11985, 8943,
+ 10082, 9675, 12710, 9675, 10082, 9675, 12710, 9675,
+ 9675, 8943, 11985, 8943, 9675, 8943, 11985, 8943,
+ 12710, 11985, 15978, 11985, 12710, 11985, 15978, 11985,
+ 9675, 8943, 11985, 8943, 9675, 8943, 11985, 8943,
+
+ 9362, 8931, 11984, 8931, 9362, 8931, 11984, 8931,
+ 8931, 8228, 11259, 8228, 8931, 8228, 11259, 8228,
+ 11984, 11259, 14913, 11259, 11984, 11259, 14913, 11259,
+ 8931, 8228, 11259, 8228, 8931, 8228, 11259, 8228,
+ 9362, 8931, 11984, 8931, 9362, 8931, 11984, 8931,
+ 8931, 8228, 11259, 8228, 8931, 8228, 11259, 8228,
+ 11984, 11259, 14913, 11259, 11984, 11259, 14913, 11259,
+ 8931, 8228, 11259, 8228, 8931, 8228, 11259, 8228,
+
+ 8192, 7740, 10486, 7740, 8192, 7740, 10486, 7740,
+ 7740, 7346, 9777, 7346, 7740, 7346, 9777, 7346,
+ 10486, 9777, 13159, 9777, 10486, 9777, 13159, 9777,
+ 7740, 7346, 9777, 7346, 7740, 7346, 9777, 7346,
+ 8192, 7740, 10486, 7740, 8192, 7740, 10486, 7740,
+ 7740, 7346, 9777, 7346, 7740, 7346, 9777, 7346,
+ 10486, 9777, 13159, 9777, 10486, 9777, 13159, 9777,
+ 7740, 7346, 9777, 7346, 7740, 7346, 9777, 7346,
+
+ 7282, 6830, 9118, 6830, 7282, 6830, 9118, 6830,
+ 6830, 6428, 8640, 6428, 6830, 6428, 8640, 6428,
+ 9118, 8640, 11570, 8640, 9118, 8640, 11570, 8640,
+ 6830, 6428, 8640, 6428, 6830, 6428, 8640, 6428,
+ 7282, 6830, 9118, 6830, 7282, 6830, 9118, 6830,
+ 6830, 6428, 8640, 6428, 6830, 6428, 8640, 6428,
+ 9118, 8640, 11570, 8640, 9118, 8640, 11570, 8640,
+ 6830, 6428, 8640, 6428, 6830, 6428, 8640, 6428,
+
+};
+
+
+/**
+ ******************************************************************************
+ * @brief Specification of QPc as a function of qPi
+ *
+ * input : qp luma
+ * output : qp chroma.
+ *
+ * @remarks Refer Table 8-15 of h264 specification.
+ ******************************************************************************
+ */
+const UWORD8 gu1_qpc_fqpi[52] =
+{
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 29, 30,
+ 31, 32, 32, 33, 34, 34, 35, 35,
+ 36, 36, 37, 37, 37, 38, 38, 38,
+ 39, 39, 39, 39,
+};
diff --git a/common/ih264_trans_data.h b/common/ih264_trans_data.h
new file mode 100755
index 0000000..dc77ae7
--- /dev/null
+++ b/common/ih264_trans_data.h
@@ -0,0 +1,125 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_trans_data.h
+ *
+ * @brief
+ * Contains declaration of global variables for H264 transform , qnat and inverse quant
+ *
+ * @author
+ * Ittiam
+ *
+ * @remarks
+ *
+ *******************************************************************************
+ */
+#ifndef IH264_GLOBAL_DATA_H_
+#define IH264_GLOBAL_DATA_H_
+
+/*****************************************************************************/
+/* Extern global declarations */
+/*****************************************************************************/
+
+/* Scaling matrices for h264 quantization */
+extern const UWORD16 g_scal_coff_h264_4x4[16];
+extern const UWORD16 g_scal_coff_h264_8x8[16];
+
+
+/**
+ ******************************************************************************
+ * @brief Scale Table for quantizing 4x4 subblock. To quantize a given 4x4 DCT
+ * transformed block, the coefficient at index location (i,j) is scaled by one of
+ * the constants in this table and right shift the result by (QP_BITS_h264_4x4 +
+ * floor(qp/6)), here qp is the quantization parameter used to quantize the mb.
+ *
+ * input : qp%6, index location (i,j)
+ * output : scale constant.
+ *
+ * @remarks 16 constants for each index position of the subblock and 6 for each
+ * qp%6 in the range 0-5 inclusive.
+ ******************************************************************************
+ */
+extern const UWORD16 gu2_quant_scale_matrix_4x4[96];
+
+/**
+ ******************************************************************************
+ * @brief Round Factor for quantizing subblock. While quantizing a given 4x4 DCT
+ * transformed block, the coefficient at index location (i,j) is scaled by one of
+ * the constants in the table gu2_forward_quant_scalar_4x4 and then right shift
+ * the result by (QP_BITS_h264_4x4 + floor(qp/6)).
+ * Before right shifting a round factor is added.
+ * The round factor can be any value [a * (1 << (QP_BITS_h264_4x4 + floor(qp/6)))]
+ * for 'a' lies in the range 0-0.5.
+ * Here qp is the quantization parameter used to quantize the mb.
+ *
+ * input : qp/6
+ * output : round factor.
+ *
+ * @remarks The round factor is constructed by setting a = 1/3
+ ******************************************************************************
+ */
+extern const UWORD32 gu4_forward_quant_round_factor_4x4[9];
+
+/**
+ ******************************************************************************
+ * @brief Threshold Table. Quantizing the given DCT coefficient is done only if
+ * it exceeds the threshold value presented in this table.
+ *
+ * input : qp/6, qp%6, index location (i,j)
+ * output : Threshold constant.
+ *
+ * @remarks 16 constants for each index position of the subblock and 6 for each
+ * qp%6 in the range 0-5 inclusive and 9 for each qp/6 in the range 0-51.
+ ******************************************************************************
+ */
+extern const UWORD16 gu2_forward_quant_threshold_4x4[96];
+
+/**
+ ******************************************************************************
+ * @brief Scale Table for quantizing 8x8 subblock. To quantize a given 8x8 DCT
+ * transformed block, the coefficient at index location (i,j) is scaled by one of
+ * the constants in this table and right shift the result by (QP_BITS_h264_8x8 +
+ * floor(qp/6)), here qp is the quantization parameter used to quantize the mb.
+ *
+ * input : qp%6, index location (i,j)
+ * output : scale constant.
+ *
+ * @remarks 64 constants for each index position of the subblock and 6 for each
+ * qp%6 in the range 0-5 inclusive.
+ ******************************************************************************
+ */
+extern const UWORD16 gu2_quant_scale_matrix_8x8 [384];
+
+/**
+ ******************************************************************************
+ * @brief Specification of QPc as a function of qPi
+ *
+ * input : qp luma
+ * output : qp chroma.
+ *
+ * @remarks Refer Table 8-15 of h264 specification.
+ ******************************************************************************
+ */
+extern const UWORD8 gu1_qpc_fqpi[52];
+
+
+#endif /* IH264_GLOBAL_DATA_H_ */
diff --git a/common/ih264_trans_macros.h b/common/ih264_trans_macros.h
new file mode 100755
index 0000000..f114d0e
--- /dev/null
+++ b/common/ih264_trans_macros.h
@@ -0,0 +1,124 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_trans_macros.h
+*
+* @brief
+* The file contains definitions of macros that perform forward and inverse
+* quantization
+*
+* @author
+* Ittiam
+*
+* @remark
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264_TRANS_MACROS_H_
+#define IH264_TRANS_MACROS_H_
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Macro to perform forward quantization.
+ * @description The value to be quantized is first compared with a threshold.
+ * If the value is less than the threshold, the quantization value is returned
+ * as zero else the value is quantized traditionally as per the rules of
+ * h264 specification
+******************************************************************************
+ */
+#define FWD_QUANT(i4_value, u4_abs_value, i4_sign, threshold, scale, rndfactor, qbits, u4_nnz) \
+ {\
+ if (i4_value < 0)\
+ {\
+ u4_abs_value = -i4_value;\
+ i4_sign = -1;\
+ }\
+ else\
+ {\
+ u4_abs_value = i4_value;\
+ i4_sign = 1;\
+ }\
+ if (u4_abs_value < threshold)\
+ {\
+ i4_value = 0;\
+ }\
+ else\
+ {\
+ u4_abs_value *= scale;\
+ u4_abs_value += rndfactor;\
+ u4_abs_value >>= qbits;\
+ i4_value = u4_abs_value * i4_sign;\
+ if (i4_value)\
+ {\
+ u4_nnz++;\
+ }\
+ }\
+ }
+
+/**
+******************************************************************************
+ * @brief Macro to perform inverse quantization.
+ * @remarks The value can also be de-quantized as
+ * if (u4_qp_div_6 < 4)
+ * {
+ * i4_value = (quant_scale * weight_scale * i4_value + (1 << (3-u4_qp_div_6)))
+ * i4_value >>= (4 - u4_qp_div_6)
+ * }
+ * else
+ * {
+ * i4_value = (quant_scale * weight_scale * i4_value) << (u4_qp_div_6 -4)
+ * }
+******************************************************************************
+ */
+#define INV_QUANT(i4_value, quant_scale, weight_scale, u4_qp_div_6, rndfactor, qbits)\
+ {\
+ i4_value *= quant_scale;\
+ i4_value *= weight_scale;\
+ i4_value += rndfactor;\
+ i4_value <<= u4_qp_div_6;\
+ i4_value >>= qbits;\
+ }
+
+#define QUANT_H264(x,y,w,z,shft) (shft = ABS(x),\
+ shft *= y,\
+ shft += z,\
+ shft = shft>>w,\
+ shft = SIGNXY(shft,x))
+
+#define IQUANT_H264(x,y,wscal,w,shft) (shft = x, \
+ shft *=y, \
+ shft *=wscal, \
+ shft = shft<<w)
+
+#define IQUANT_lev_H264(x,y,wscal,add_f,w,shft) (shft = x, \
+ shft *=y, \
+ shft *=wscal, \
+ shft+= add_f, \
+ shft = shft>>w)
+
+#endif /* IH264_TRANS_MACROS_H_ */
diff --git a/common/ih264_trans_quant_itrans_iquant.h b/common/ih264_trans_quant_itrans_iquant.h
new file mode 100755
index 0000000..83551aa
--- /dev/null
+++ b/common/ih264_trans_quant_itrans_iquant.h
@@ -0,0 +1,232 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_trans_quant.h
+ *
+ * @brief
+ * Contains declarations for forward and inverse transform paths for H264
+ *
+ * @author
+ * Ittiam
+ *
+ * @remarks
+ *
+ *******************************************************************************
+ */
+
+#ifndef IH264_TRANS_QUANT_H_
+#define IH264_TRANS_QUANT_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+
+typedef void ih264_resi_trans_dctrans_quant_ft(UWORD8*pu1_src,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_out,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd,
+ const UWORD16 *pu2_scale_mat,
+ const UWORD16 *pu2_thresh_mat,
+ UWORD32 u4_qbit,
+ UWORD32 u4_round_fact,
+ UWORD8 *pu1_nnz);
+
+typedef void ih264_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscale_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 qp_div,
+ UWORD32 pi4_cntrl,
+ WORD32 *pi4_tmp);
+
+
+/*Function prototype declarations*/
+typedef void ih264_resi_trans_quant_ft(UWORD8*pu1_src,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_out,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ const UWORD16 *pu2_scale_mat,
+ const UWORD16 *pu2_thresh_mat,
+ UWORD32 u4_qbit,
+ UWORD32 u4_round_fact,
+ UWORD8 *pu1_nnz,
+ WORD16 *pi2_alt_dc_addr);
+
+typedef void ih264_luma_16x16_resi_trans_dctrans_quant_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_out,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd,
+ const UWORD16 *pu2_scale_matrix,
+ const UWORD16 *pu2_threshold_matrix,
+ UWORD32 u4_qbits,
+ UWORD32 u4_round_factor,
+ UWORD8 *pu1_nnz,
+ UWORD32 u4_dc_flag);
+
+typedef void ih264_chroma_8x8_resi_trans_dctrans_quant_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_out,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd,
+ const UWORD16 *pu2_scale_matrix,
+ const UWORD16 *pu2_threshold_matrix,
+ UWORD32 u4_qbits,
+ UWORD32 u4_round_factor,
+ UWORD8 *pu1_nnz);
+
+typedef void ih264_iquant_itrans_recon_ft(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscale_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 qp_div,
+ WORD16 *pi2_tmp,
+ WORD32 iq_start_idx,
+ WORD16 *pi2_dc_ld_addr);
+
+
+typedef void ih264_iquant_itrans_recon_chroma_ft(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscal_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 u4_qp_div_6,
+ WORD16 *pi2_tmp,
+ WORD16 *pi2_dc_src);
+
+
+typedef void ih264_luma_16x16_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscale_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 qp_div,
+ UWORD32 pi4_cntrl,
+ UWORD32 u4_dc_trans_flag,
+ WORD32 *pi4_tmp);
+
+typedef void ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscale_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 qp_div,
+ UWORD32 pi4_cntrl,
+ WORD32 *pi4_tmp);
+
+typedef void ih264_ihadamard_scaling_ft(WORD16* pi2_src,
+ WORD16* pi2_out,
+ const UWORD16 *pu2_iscal_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 u4_qp_div_6,
+ WORD32* pi4_tmp);
+
+typedef void ih264_hadamard_quant_ft(WORD16 *pi2_src, WORD16 *pi2_dst,
+ const UWORD16 *pu2_scale_matrix,
+ const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits,
+ UWORD32 u4_round_factor,UWORD8 *pu1_nnz);
+
+ih264_resi_trans_quant_ft ih264_resi_trans_quant_4x4;
+ih264_resi_trans_quant_ft ih264_resi_trans_quant_chroma_4x4;
+ih264_resi_trans_quant_ft ih264_resi_trans_quant_8x8;
+ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4;
+ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8;
+ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_dc;
+ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_dc;
+ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4;
+ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_dc;
+ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4;
+ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_2x2_uv;
+ih264_hadamard_quant_ft ih264_hadamard_quant_4x4;
+ih264_hadamard_quant_ft ih264_hadamard_quant_2x2_uv;
+
+/*A9 Declarations*/
+ih264_resi_trans_quant_ft ih264_resi_trans_quant_4x4_a9;
+ih264_resi_trans_quant_ft ih264_resi_trans_quant_chroma_4x4_a9;
+ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_a9;
+ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_a9;
+ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_dc_a9;
+ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_dc_a9;
+ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_a9;
+ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_dc_a9;
+ih264_luma_16x16_resi_trans_dctrans_quant_ft ih264_luma_16x16_resi_trans_dctrans_quant_a9;
+ih264_chroma_8x8_resi_trans_dctrans_quant_ft ih264_chroma_8x8_resi_trans_dctrans_quant_a9;
+ih264_luma_16x16_idctrans_iquant_itrans_recon_ft ih264_luma_16x16_idctrans_iquant_itrans_recon_a9;
+ih264_chroma_8x8_idctrans_iquant_itrans_recon_ft ih264_chroma_8x8_idctrans_iquant_itrans_recon_a9;
+ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4_a9;
+ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_2x2_uv_a9;
+ih264_hadamard_quant_ft ih264_hadamard_quant_4x4_a9;
+ih264_hadamard_quant_ft ih264_hadamard_quant_2x2_uv_a9;
+
+/*Av8 Declarations*/
+ih264_resi_trans_quant_ft ih264_resi_trans_quant_4x4_av8;
+ih264_resi_trans_quant_ft ih264_resi_trans_quant_chroma_4x4_av8;
+ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_av8;
+ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_av8;
+ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_dc_av8;
+ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_dc_av8;
+ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_av8;
+ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_dc_av8;
+ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4_av8;
+ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_2x2_uv_av8;
+ih264_hadamard_quant_ft ih264_hadamard_quant_4x4_av8;
+ih264_hadamard_quant_ft ih264_hadamard_quant_2x2_uv_av8;
+
+/*SSSE3 Declarations*/
+ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_ssse3;
+ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_ssse3;
+ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_dc_ssse3;
+ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_dc_ssse3;
+ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3;
+ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4_ssse3;
+ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_2x2_uv_ssse3;
+/*SSSE42 Declarations*/
+ih264_resi_trans_quant_ft ih264_resi_trans_quant_4x4_sse42;
+ih264_resi_trans_quant_ft ih264_resi_trans_quant_chroma_4x4_sse42;
+ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_sse42;
+ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_sse42;
+ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4_sse42;
+ih264_hadamard_quant_ft ih264_hadamard_quant_4x4_sse42;
+ih264_hadamard_quant_ft ih264_hadamard_quant_2x2_uv_sse42;
+
+#endif /* IH264_TRANS_QUANT_H_ */
diff --git a/common/ih264_typedefs.h b/common/ih264_typedefs.h
new file mode 100755
index 0000000..8e4685a
--- /dev/null
+++ b/common/ih264_typedefs.h
@@ -0,0 +1,64 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_typedefs.h
+*
+* @brief
+* Type definitions used in the code
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef _IH264_TYPEDEFS_H_
+#define _IH264_TYPEDEFS_H_
+
+
+/*****************************************************************************/
+/* Unsigned data types */
+/*****************************************************************************/
+typedef unsigned char UWORD8;
+typedef unsigned short UWORD16;
+typedef unsigned int UWORD32;
+typedef unsigned long long UWORD64;
+
+
+/*****************************************************************************/
+/* Signed data types */
+/*****************************************************************************/
+typedef signed char WORD8;
+typedef short WORD16;
+typedef int WORD32;
+
+
+/*****************************************************************************/
+/* Miscellaneous data types */
+/*****************************************************************************/
+typedef char CHAR;
+typedef double DOUBLE;
+
+#endif /* _IH264_TYPEDEFS_H_ */
diff --git a/common/ih264_weighted_pred.c b/common/ih264_weighted_pred.c
new file mode 100755
index 0000000..d5d73f2
--- /dev/null
+++ b/common/ih264_weighted_pred.c
@@ -0,0 +1,495 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* */
+/* File Name : ih264_weighted_pred.c */
+/* */
+/* Description : Contains function definitions for weighted */
+/* prediction functions */
+/* */
+/* List of Functions : ih264_default_weighted_pred_luma() */
+/* ih264_default_weighted_pred_chroma() */
+/* ih264_weighted_pred_luma() */
+/* ih264_weighted_pred_chroma() */
+/* ih264_weighted_bipred_luma() */
+/* ih264_weighted_bipred_chroma() */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 01 2015 Kaushik Initial version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_weighted_pred.h"
+
+/*****************************************************************************/
+/* Function definitions . */
+/*****************************************************************************/
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_default_weighted_pred_luma */
+/* */
+/* Description : This function performs the default weighted prediction */
+/* as described in sec 8.4.2.3.1 titled "Default weighted */
+/* sample prediction process" for luma. The function gets */
+/* two ht x wd blocks, calculates their rounded-average and */
+/* stores it in the destination block. (ht,wd) can be */
+/* (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16). */
+/* */
+/* Inputs : puc_src1 - Pointer to source 1 */
+/* puc_src2 - Pointer to source 2 */
+/* puc_dst - Pointer to destination */
+/* src_strd1 - stride for source 1 */
+/* src_strd1 - stride for source 2 */
+/* dst_strd - stride for destination */
+/* ht - height of the block */
+/* wd - width of the block */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 01 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_default_weighted_pred_luma(UWORD8 *pu1_src1,
+ UWORD8 *pu1_src2,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd1,
+ WORD32 src_strd2,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd)
+{
+ WORD32 i, j;
+
+ src_strd1 -= wd;
+ src_strd2 -= wd;
+ dst_strd -= wd;
+
+ for(i = 0; i < ht; i++)
+ {
+ for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
+ *pu1_dst = (*pu1_src1 + *pu1_src2 + 1) >> 1;
+
+ pu1_src1 += src_strd1;
+ pu1_src2 += src_strd2;
+ pu1_dst += dst_strd;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_default_weighted_pred_chroma */
+/* */
+/* Description : This function performs the default weighted prediction */
+/* as described in sec 8.4.2.3.1 titled "Default weighted */
+/* sample prediction process" for chroma. The function gets */
+/* two ht x wd blocks, calculates their rounded-average and */
+/* stores it in the destination block. (ht,wd) can be */
+/* (2,2), (4,2) , (2,4), (4,4), (8,4), (4,8) or (8,8). */
+/* */
+/* Inputs : puc_src1 - Pointer to source 1 */
+/* puc_src2 - Pointer to source 2 */
+/* puc_dst - Pointer to destination */
+/* src_strd1 - stride for source 1 */
+/* src_strd1 - stride for source 2 */
+/* dst_strd - stride for destination */
+/* ht - height of the block */
+/* wd - width of the block */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 01 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_default_weighted_pred_chroma(UWORD8 *pu1_src1,
+ UWORD8 *pu1_src2,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd1,
+ WORD32 src_strd2,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd)
+{
+ WORD32 i, j;
+
+ wd = wd << 1;
+
+ src_strd1 -= wd;
+ src_strd2 -= wd;
+ dst_strd -= wd;
+
+ for(i = 0; i < ht; i++)
+ {
+ for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
+ *pu1_dst = (*pu1_src1 + *pu1_src2 + 1) >> 1;
+
+ pu1_src1 += src_strd1;
+ pu1_src2 += src_strd2;
+ pu1_dst += dst_strd;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_weighted_pred_luma */
+/* */
+/* Description : This function performs the weighted prediction as */
+/* described in sec 8.4.2.3.2 titled "Weighted sample */
+/* prediction process" for luma. The function gets one */
+/* ht x wd block, weights it, rounds it off, offsets it, */
+/* saturates it to unsigned 8-bit and stores it in the */
+/* destination block. (ht,wd) can be (4,4), (8,4), (4,8), */
+/* (8,8), (16,8), (8,16) or (16,16). */
+/* */
+/* Inputs : puc_src - Pointer to source */
+/* puc_dst - Pointer to destination */
+/* src_strd - stride for source */
+/* dst_strd - stride for destination */
+/* log_wd - number of bits to be rounded off */
+/* wt - weight value */
+/* ofst - offset value */
+/* ht - height of the block */
+/* wd - width of the block */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 01 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_weighted_pred_luma(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 log_wd,
+ WORD32 wt,
+ WORD32 ofst,
+ WORD32 ht,
+ WORD32 wd)
+{
+ WORD32 i, j;
+
+ wt = (WORD16)(wt & 0xffff);
+ ofst = (WORD8)(ofst & 0xff);
+
+ src_strd -= wd;
+ dst_strd -= wd;
+
+ if(log_wd >= 1)
+ {
+ WORD32 i_ofst = (1 << (log_wd - 1)) + (ofst << log_wd);
+ for(i = 0; i < ht; i++)
+ {
+ for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
+ *pu1_dst = CLIP_U8((wt * (*pu1_src) + i_ofst) >> log_wd);
+
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+ }
+ }
+ else
+ {
+ for(i = 0; i < ht; i++)
+ {
+ for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
+ *pu1_dst = CLIP_U8(wt * (*pu1_src) + ofst);
+
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_weighted_pred_chroma */
+/* */
+/* Description : This function performs the weighted prediction as */
+/* described in sec 8.4.2.3.2 titled "Weighted sample */
+/* prediction process" for chroma. The function gets one */
+/* ht x wd block, weights it, rounds it off, offsets it, */
+/* saturates it to unsigned 8-bit and stores it in the */
+/* destination block. (ht,wd) can be (2,2), (4,2), (2,4), */
+/* (4,4), (8,4), (4,8) or (8,8). */
+/* */
+/* Inputs : puc_src - Pointer to source */
+/* puc_dst - Pointer to destination */
+/* src_strd - stride for source */
+/* dst_strd - stride for destination */
+/* log_wd - number of bits to be rounded off */
+/* wt - weight values for u and v */
+/* ofst - offset values for u and v */
+/* ht - height of the block */
+/* wd - width of the block */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 01 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_weighted_pred_chroma(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 log_wd,
+ WORD32 wt,
+ WORD32 ofst,
+ WORD32 ht,
+ WORD32 wd)
+{
+ WORD32 i, j;
+ WORD32 wt_u, wt_v;
+ WORD32 ofst_u, ofst_v;
+
+ wt_u = (WORD16)(wt & 0xffff);
+ wt_v = (WORD16)(wt >> 16);
+
+ ofst_u = (WORD8)(ofst & 0xff);
+ ofst_v = (WORD8)(ofst >> 8);
+
+ src_strd -= wd << 1;
+ dst_strd -= wd << 1;
+
+ if(log_wd >= 1)
+ {
+ ofst_u = (1 << (log_wd - 1)) + (ofst_u << log_wd);
+ ofst_v = (1 << (log_wd - 1)) + (ofst_v << log_wd);
+
+ for(i = 0; i < ht; i++)
+ {
+ for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
+ {
+ *pu1_dst = CLIP_U8((wt_u * (*pu1_src) + ofst_u) >> log_wd);
+ pu1_src++;
+ pu1_dst++;
+ *pu1_dst = CLIP_U8((wt_v * (*pu1_src) + ofst_v) >> log_wd);
+ }
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+ }
+ }
+ else
+ {
+ for(i = 0; i < ht; i++)
+ {
+ for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
+ {
+ *pu1_dst = CLIP_U8(wt_u * (*pu1_src) + ofst_u);
+ pu1_src++;
+ pu1_dst++;
+ *pu1_dst = CLIP_U8(wt_v * (*pu1_src) + ofst_v);
+ }
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_weighted_bi_pred_luma */
+/* */
+/* Description : This function performs the weighted biprediction as */
+/* described in sec 8.4.2.3.2 titled "Weighted sample */
+/* prediction process" for luma. The function gets two */
+/* ht x wd blocks, weights them, adds them, rounds off the */
+/* sum, offsets it, saturates it to unsigned 8-bit and */
+/* stores it in the destination block. (ht,wd) can be */
+/* (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16). */
+/* */
+/* Inputs : puc_src1 - Pointer to source 1 */
+/* puc_src2 - Pointer to source 2 */
+/* puc_dst - Pointer to destination */
+/* src_strd1 - stride for source 1 */
+/* src_strd2 - stride for source 2 */
+/* dst_strd2 - stride for destination */
+/* log_wd - number of bits to be rounded off */
+/* wt1 - weight value for source 1 */
+/* wt2 - weight value for source 2 */
+/* ofst1 - offset value for source 1 */
+/* ofst2 - offset value for source 2 */
+/* ht - height of the block */
+/* wd - width of the block */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 01 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_weighted_bi_pred_luma(UWORD8 *pu1_src1,
+ UWORD8 *pu1_src2,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd1,
+ WORD32 src_strd2,
+ WORD32 dst_strd,
+ WORD32 log_wd,
+ WORD32 wt1,
+ WORD32 wt2,
+ WORD32 ofst1,
+ WORD32 ofst2,
+ WORD32 ht,
+ WORD32 wd)
+{
+ WORD32 i, j;
+ WORD32 shft, ofst;
+
+ ofst1 = (WORD8)(ofst1 & 0xff);
+ ofst2 = (WORD8)(ofst2 & 0xff);
+ wt1 = (WORD16)(wt1 & 0xffff);
+ wt2 = (WORD16)(wt2 & 0xffff);
+ ofst = (ofst1 + ofst2 + 1) >> 1;
+
+ shft = log_wd + 1;
+ ofst = (1 << log_wd) + (ofst << shft);
+
+ src_strd1 -= wd;
+ src_strd2 -= wd;
+ dst_strd -= wd;
+
+ for(i = 0; i < ht; i++)
+ {
+ for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
+ *pu1_dst = CLIP_U8((wt1 * (*pu1_src1) + wt2 * (*pu1_src2) + ofst) >> shft);
+
+ pu1_src1 += src_strd1;
+ pu1_src2 += src_strd2;
+ pu1_dst += dst_strd;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_weighted_bi_pred_chroma */
+/* */
+/* Description : This function performs the weighted biprediction as */
+/* described in sec 8.4.2.3.2 titled "Weighted sample */
+/* prediction process" for chroma. The function gets two */
+/* ht x wd blocks, weights them, adds them, rounds off the */
+/* sum, offsets it, saturates it to unsigned 8-bit and */
+/* stores it in the destination block. (ht,wd) can be */
+/* (2,2), (4,2), (2,4), (4,4), (8,4), (4,8) or (8,8). */
+/* */
+/* Inputs : puc_src1 - Pointer to source 1 */
+/* puc_src2 - Pointer to source 2 */
+/* puc_dst - Pointer to destination */
+/* src_strd1 - stride for source 1 */
+/* src_strd2 - stride for source 2 */
+/* dst_strd2 - stride for destination */
+/* log_wd - number of bits to be rounded off */
+/* wt1 - weight values for u and v in source 1 */
+/* wt2 - weight values for u and v in source 2 */
+/* ofst1 - offset value for u and v in source 1 */
+/* ofst2 - offset value for u and v in source 2 */
+/* ht - height of the block */
+/* wd - width of the block */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 01 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_weighted_bi_pred_chroma(UWORD8 *pu1_src1,
+ UWORD8 *pu1_src2,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd1,
+ WORD32 src_strd2,
+ WORD32 dst_strd,
+ WORD32 log_wd,
+ WORD32 wt1,
+ WORD32 wt2,
+ WORD32 ofst1,
+ WORD32 ofst2,
+ WORD32 ht,
+ WORD32 wd)
+{
+ WORD32 i, j;
+ WORD32 wt1_u, wt1_v, wt2_u, wt2_v;
+ WORD32 ofst1_u, ofst1_v, ofst2_u, ofst2_v;
+ WORD32 ofst_u, ofst_v;
+ WORD32 shft;
+
+ ofst1_u = (WORD8)(ofst1 & 0xff);
+ ofst1_v = (WORD8)(ofst1 >> 8);
+ ofst2_u = (WORD8)(ofst2 & 0xff);
+ ofst2_v = (WORD8)(ofst2 >> 8);
+ wt1_u = (WORD16)(wt1 & 0xffff);
+ wt1_v = (WORD16)(wt1 >> 16);
+ wt2_u = (WORD16)(wt2 & 0xffff);
+ wt2_v = (WORD16)(wt2 >> 16);
+ ofst_u = (ofst1_u + ofst2_u + 1) >> 1;
+ ofst_v = (ofst1_v + ofst2_v + 1) >> 1;
+
+ src_strd1 -= wd << 1;
+ src_strd2 -= wd << 1;
+ dst_strd -= wd << 1;
+
+ shft = log_wd + 1;
+ ofst_u = (1 << log_wd) + (ofst_u << shft);
+ ofst_v = (1 << log_wd) + (ofst_v << shft);
+
+ for(i = 0; i < ht; i++)
+ {
+ for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
+ {
+ *pu1_dst = CLIP_U8((wt1_u * (*pu1_src1) + wt2_u * (*pu1_src2) + ofst_u) >> shft);
+ pu1_src1++;
+ pu1_src2++;
+ pu1_dst++;
+ *pu1_dst = CLIP_U8((wt1_v * (*pu1_src1) + wt2_v * (*pu1_src2) + ofst_v) >> shft);
+ }
+ pu1_src1 += src_strd1;
+ pu1_src2 += src_strd2;
+ pu1_dst += dst_strd;
+ }
+}
diff --git a/common/ih264_weighted_pred.h b/common/ih264_weighted_pred.h
new file mode 100755
index 0000000..f9b93b0
--- /dev/null
+++ b/common/ih264_weighted_pred.h
@@ -0,0 +1,164 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264_weighted_pred.h
+*
+* @brief
+* Declarations of functions used for weighted prediction
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* -ih264_default_weighted_pred_luma
+* -ih264_default_weighted_pred_chroma
+* -ih264_weighted_pred_luma
+* -ih264_weighted_pred_chroma
+* -ih264_weighted_bi_pred_luma
+* -ih264_weighted_bi_pred_chroma
+* -ih264_default_weighted_pred_luma_a9q
+* -ih264_default_weighted_pred_chroma_a9q
+* -ih264_weighted_pred_luma_a9q
+* -ih264_weighted_pred_luma_a9q
+* -ih264_weighted_bi_pred_luma_a9q
+* -ih264_weighted_bi_pred_chroma_a9q
+* -ih264_default_weighted_pred_luma_av8
+* -ih264_default_weighted_pred_chroma_av8
+* -ih264_weighted_pred_luma_av8
+* -ih264_weighted_pred_chroma_av8
+* -ih264_weighted_bi_pred_luma_av8
+* -ih264_weighted_bi_pred_chroma_av8
+* -ih264_default_weighted_pred_luma_sse42
+* -ih264_default_weighted_pred_chroma_sse42
+* -ih264_weighted_pred_luma_sse42
+* -ih264_weighted_pred_chroma_sse42
+* -ih264_weighted_bi_pred_luma_sse42
+* -ih264_weighted_bi_pred_chroma_sse42
+*
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264_WEIGHTED_PRED_H_
+#define IH264_WEIGHTED_PRED_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+typedef void ih264_default_weighted_pred_ft(UWORD8 *puc_src1,
+ UWORD8 *puc_src2,
+ UWORD8 *puc_dst,
+ WORD32 src_strd1,
+ WORD32 src_strd2,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd);
+
+typedef void ih264_weighted_pred_ft(UWORD8 *puc_src,
+ UWORD8 *puc_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 log_wd,
+ WORD32 wt,
+ WORD32 ofst,
+ WORD32 ht,
+ WORD32 wd);
+
+typedef void ih264_weighted_bi_pred_ft(UWORD8 *puc_src1,
+ UWORD8 *puc_src2,
+ UWORD8 *puc_dst,
+ WORD32 src_strd1,
+ WORD32 src_strd2,
+ WORD32 dst_strd,
+ WORD32 log_wd,
+ WORD32 wt1,
+ WORD32 wt2,
+ WORD32 ofst1,
+ WORD32 ofst2,
+ WORD32 ht,
+ WORD32 wd);
+
+/* No NEON Declarations */
+
+ih264_default_weighted_pred_ft ih264_default_weighted_pred_luma;
+
+ih264_default_weighted_pred_ft ih264_default_weighted_pred_chroma;
+
+ih264_weighted_pred_ft ih264_weighted_pred_luma;
+
+ih264_weighted_pred_ft ih264_weighted_pred_chroma;
+
+ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_luma;
+
+ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_chroma;
+
+/* A9 NEON Declarations */
+
+ih264_default_weighted_pred_ft ih264_default_weighted_pred_luma_a9q;
+
+ih264_default_weighted_pred_ft ih264_default_weighted_pred_chroma_a9q;
+
+ih264_weighted_pred_ft ih264_weighted_pred_luma_a9q;
+
+ih264_weighted_pred_ft ih264_weighted_pred_chroma_a9q;
+
+ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_luma_a9q;
+
+ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_chroma_a9q;
+
+
+/* AV8 NEON Declarations */
+
+ih264_default_weighted_pred_ft ih264_default_weighted_pred_luma_av8;
+
+ih264_default_weighted_pred_ft ih264_default_weighted_pred_chroma_av8;
+
+ih264_weighted_pred_ft ih264_weighted_pred_luma_av8;
+
+ih264_weighted_pred_ft ih264_weighted_pred_chroma_av8;
+
+ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_luma_av8;
+
+ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_chroma_av8;
+
+
+/* SSE42 Intrinsic Declarations */
+
+ih264_default_weighted_pred_ft ih264_default_weighted_pred_luma_sse42;
+
+ih264_default_weighted_pred_ft ih264_default_weighted_pred_chroma_sse42;
+
+ih264_weighted_pred_ft ih264_weighted_pred_luma_sse42;
+
+ih264_weighted_pred_ft ih264_weighted_pred_chroma_sse42;
+
+ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_luma_sse42;
+
+ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_chroma_sse42;
+
+#endif /* IH264_WEIGHTED_PRED_H_ */
+
+/** Nothing past this point */
diff --git a/common/ithread.c b/common/ithread.c
new file mode 100755
index 0000000..4ffb98a
--- /dev/null
+++ b/common/ithread.c
@@ -0,0 +1,604 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* */
+/* File Name : ithread.c */
+/* */
+/* Description : Contains abstraction for threads, mutex and semaphores*/
+/* */
+/* List of Functions : */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 Harish Initial Version */
+/*****************************************************************************/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+#include <string.h>
+#include "ih264_typedefs.h"
+
+/*
+ * If the end target is bare metal, then there shall be no OS.
+ * In this case, the functions ithread_* used inside the h264 encoder library to assist multicore
+ * will not longer be functional. To resolve link issues, the functions are re-defined with no body.
+ */
+#ifndef BAREMETAL
+
+
+#include "ithread.h"
+#include <sys/types.h>
+
+
+#define UNUSED(x) ((void)(x))
+
+#ifndef X86_MSVC
+//#define PTHREAD_AFFINITY
+//#define SYSCALL_AFFINITY
+
+#ifdef PTHREAD_AFFINITY
+#define _GNU_SOURCE
+#define __USE_GNU
+#endif
+
+#include <pthread.h>
+#include <sched.h>
+#include <semaphore.h>
+#include <unistd.h>
+#ifdef PTHREAD_AFFINITY
+#include <sys/prctl.h>
+#endif
+
+#endif
+
+#ifdef X86_MSVC
+
+#include <windows.h>
+#define SEM_MAX_COUNT 100
+#define SEM_INCREMENT_COUNT 1
+
+UWORD32 ithread_get_handle_size(void)
+{
+ return (sizeof(HANDLE));
+}
+
+UWORD32 ithread_get_mutex_lock_size(void)
+{
+ return (sizeof(HANDLE));
+}
+
+WORD32 ithread_create(void *thread_handle, void *attribute, void *strt, void *argument)
+{
+ HANDLE *ppv_thread_handle;
+ HANDLE thread_handle_value;
+
+ UNUSED(attribute);
+
+ if(0 == thread_handle)
+ return -1;
+
+ ppv_thread_handle = (HANDLE *)thread_handle;
+ thread_handle_value = (void *)CreateThread
+ (NULL, /* Attributes */
+ 1024*128, /* Stack i4_size */
+ (LPTHREAD_START_ROUTINE)strt, /* Thread function */
+ argument, /* Parameters */
+ 0, /* Creation flags */
+ NULL); /* Thread ID */
+ *ppv_thread_handle = (HANDLE)thread_handle_value;
+
+ return 0;
+}
+
+WORD32 ithread_join(void *thread_handle, void ** val_ptr)
+{
+ HANDLE *ppv_thread_handle;
+ HANDLE thread_handle_value;
+
+ UNUSED(val_ptr);
+
+ if(0 == thread_handle)
+ return -1;
+
+ ppv_thread_handle = (HANDLE *)thread_handle;
+ thread_handle_value = *ppv_thread_handle;
+
+ if(WAIT_OBJECT_0 == WaitForSingleObject(thread_handle_value, INFINITE))
+ {
+ CloseHandle(thread_handle_value);
+ }
+
+ return 0;
+}
+
+void ithread_exit(void *thread_handle)
+{
+ HANDLE *ppv_thread_handle;
+ HANDLE thread_handle_value;
+ DWORD thread_exit_code;
+
+ if(0 == thread_handle)
+ return;
+
+ ppv_thread_handle = (HANDLE *)thread_handle;
+ thread_handle_value = *ppv_thread_handle;
+ /* Get exit code for thread. If the return value is 0, means thread is busy */
+ if( 0 != GetExitCodeThread(thread_handle_value, &thread_exit_code))
+ {
+ TerminateThread(thread_handle_value, thread_exit_code);
+ }
+
+ return;
+}
+
+WORD32 ithread_get_mutex_struct_size(void)
+{
+ return (sizeof(HANDLE));
+}
+
+WORD32 ithread_mutex_init(void *mutex)
+{
+ HANDLE *ppv_mutex_handle;
+ HANDLE mutex_handle_value;
+
+ if(0 == mutex)
+ return -1;
+
+ ppv_mutex_handle = (HANDLE *)mutex;
+ mutex_handle_value = CreateSemaphore(NULL, 1, 1, NULL);
+ *ppv_mutex_handle = mutex_handle_value;
+ return 0;
+}
+
+WORD32 ithread_mutex_destroy(void *mutex)
+{
+ HANDLE *ppv_mutex_handle;
+ HANDLE mutex_handle_value;
+
+ if(0 == mutex)
+ return -1;
+
+ ppv_mutex_handle = (HANDLE *)mutex;
+ mutex_handle_value = *ppv_mutex_handle;
+ CloseHandle(mutex_handle_value);
+ return 0;
+}
+
+WORD32 ithread_mutex_lock(void *mutex)
+{
+ HANDLE *ppv_mutex_handle;
+ HANDLE mutex_handle_value;
+ DWORD result = 0;
+
+ if(0 == mutex)
+ return -1;
+
+ ppv_mutex_handle = (HANDLE *)mutex;
+ mutex_handle_value = *ppv_mutex_handle;
+ result = WaitForSingleObject(mutex_handle_value, INFINITE);
+
+ if(WAIT_OBJECT_0 == result)
+ return 0;
+
+ return 1;
+
+}
+
+WORD32 ithread_mutex_unlock(void *mutex)
+{
+ HANDLE *ppv_mutex_handle;
+ HANDLE mutex_handle_value;
+ DWORD result = 0;
+
+ if(0 == mutex)
+ return -1;
+
+ ppv_mutex_handle = (HANDLE *)mutex;
+ mutex_handle_value = *ppv_mutex_handle;
+ result = ReleaseSemaphore(mutex_handle_value, 1, NULL);
+
+ if(0 == result)
+ return -1;
+
+ return 0;
+}
+
+void ithread_yield(void) { }
+
+void ithread_usleep(UWORD32 u4_time_us)
+{
+ UWORD32 u4_time_ms = u4_time_us / 1000;
+ Sleep(u4_time_ms);
+}
+
+void ithread_msleep(UWORD32 u4_time_ms)
+{
+ Sleep(u4_time_ms);
+}
+
+void ithread_sleep(UWORD32 u4_time)
+{
+ UWORD32 u4_time_ms = u4_time * 1000;
+ Sleep(u4_time_ms);
+}
+
+UWORD32 ithread_get_sem_struct_size(void)
+{
+ return (sizeof(HANDLE));
+}
+
+WORD32 ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value)
+{
+ HANDLE *sem_handle = (HANDLE *)sem;
+ HANDLE sem_handle_value;
+
+ if(0 == sem)
+ return -1;
+
+ sem_handle_value = CreateSemaphore(NULL, /* Security Attribute*/
+ value, /* Initial count */
+ SEM_MAX_COUNT,/* Max value */
+ NULL); /* Name, not used */
+ *sem_handle = sem_handle_value;
+ return 0;
+}
+
+WORD32 ithread_sem_post(void *sem)
+{
+ HANDLE *sem_handle = (HANDLE *)sem;
+ HANDLE sem_handle_value;
+
+ if(0 == sem)
+ return -1;
+
+ sem_handle_value = *sem_handle;
+
+ /* Post on Semaphore by releasing the lock on mutex */
+ if(ReleaseSemaphore(sem_handle_value, SEM_INCREMENT_COUNT, NULL))
+ return 0;
+
+ return -1;
+}
+
+WORD32 ithread_sem_wait(void *sem)
+{
+ DWORD result = 0;
+ HANDLE *sem_handle = (HANDLE *)sem;
+ HANDLE sem_handle_value;
+
+ if(0 == sem)
+ return -1;
+
+ sem_handle_value = *sem_handle;
+
+ /* Wait on Semaphore object infinitly */
+ result = WaitForSingleObject(sem_handle_value, INFINITE);
+
+ /* If lock on semaphore is acquired, return SUCCESS */
+ if(WAIT_OBJECT_0 == result)
+ return 0;
+
+ /* If call timeouts, return FAILURE */
+ if(WAIT_TIMEOUT == result)
+ return -1;
+
+ return 0;
+}
+
+WORD32 ithread_sem_destroy(void *sem)
+{
+ HANDLE *sem_handle = (HANDLE *)sem;
+ HANDLE sem_handle_value;
+
+ if(0 == sem)
+ return -1;
+
+ sem_handle_value = *sem_handle;
+
+ if(FALSE == CloseHandle(sem_handle_value) )
+ {
+ return -1;
+ }
+ return 0;
+}
+
+WORD32 ithread_set_affinity(WORD32 core_id)
+{
+ return 1;
+}
+
+#else
+
+UWORD32 ithread_get_handle_size(void)
+{
+ return sizeof(pthread_t);
+}
+
+UWORD32 ithread_get_mutex_lock_size(void)
+{
+ return sizeof(pthread_mutex_t);
+}
+
+
+WORD32 ithread_create(void *thread_handle, void *attribute, void *strt, void *argument)
+{
+ UNUSED(attribute);
+ return pthread_create((pthread_t *)thread_handle, NULL,(void *(*)(void *)) strt, argument);
+}
+
+WORD32 ithread_join(void *thread_handle, void ** val_ptr)
+{
+ UNUSED(val_ptr);
+ pthread_t *pthread_handle = (pthread_t *)thread_handle;
+ return pthread_join(*pthread_handle, NULL);
+}
+
+void ithread_exit(void *val_ptr)
+{
+ return pthread_exit(val_ptr);
+}
+
+WORD32 ithread_get_mutex_struct_size(void)
+{
+ return(sizeof(pthread_mutex_t));
+}
+WORD32 ithread_mutex_init(void *mutex)
+{
+ return pthread_mutex_init((pthread_mutex_t *) mutex, NULL);
+}
+
+WORD32 ithread_mutex_destroy(void *mutex)
+{
+ return pthread_mutex_destroy((pthread_mutex_t *) mutex);
+}
+
+WORD32 ithread_mutex_lock(void *mutex)
+{
+ return pthread_mutex_lock((pthread_mutex_t *)mutex);
+}
+
+WORD32 ithread_mutex_unlock(void *mutex)
+{
+ return pthread_mutex_unlock((pthread_mutex_t *)mutex);
+}
+
+void ithread_yield(void)
+{
+ sched_yield();
+}
+
+void ithread_sleep(UWORD32 u4_time)
+{
+ usleep(u4_time * 1000 * 1000);
+}
+
+void ithread_msleep(UWORD32 u4_time_ms)
+{
+ usleep(u4_time_ms * 1000);
+}
+
+void ithread_usleep(UWORD32 u4_time_us)
+{
+ usleep(u4_time_us);
+}
+
+UWORD32 ithread_get_sem_struct_size(void)
+{
+ return(sizeof(sem_t));
+}
+
+
+WORD32 ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value)
+{
+ return sem_init((sem_t *)sem,pshared,value);
+}
+
+WORD32 ithread_sem_post(void *sem)
+{
+ return sem_post((sem_t *)sem);
+}
+
+
+WORD32 ithread_sem_wait(void *sem)
+{
+ return sem_wait((sem_t *)sem);
+}
+
+
+WORD32 ithread_sem_destroy(void *sem)
+{
+ return sem_destroy((sem_t *)sem);
+}
+
+void ithread_set_name(CHAR *pc_thread_name)
+{
+
+#ifndef WIN32
+#ifndef QNX
+#ifndef IOS
+ UNUSED(pc_thread_name);
+//prctl(PR_SET_NAME, (unsigned long)pu1_thread_name, 0, 0, 0);
+#endif
+#endif
+#endif
+
+}
+WORD32 ithread_set_affinity(WORD32 core_id)
+{
+#ifdef PTHREAD_AFFINITY
+ cpu_set_t cpuset;
+ int num_cores = sysconf(_SC_NPROCESSORS_ONLN);
+ pthread_t cur_thread = pthread_self();
+
+ if (core_id >= num_cores)
+ return -1;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(core_id, &cpuset);
+
+ return pthread_setaffinity_np(cur_thread, sizeof(cpu_set_t), &cpuset);
+
+#elif SYSCALL_AFFINITY
+ WORD32 i4_sys_res;
+ UNUSED(core_id);
+
+ pid_t pid = gettid();
+
+
+ i4_sys_res = syscall(__NR_sched_setaffinity, pid, sizeof(i4_mask), &i4_mask);
+ if (i4_sys_res)
+ {
+ //WORD32 err;
+ //err = errno;
+ //perror("Error in setaffinity syscall PERROR : ");
+ //LOG_ERROR("Error in the syscall setaffinity: mask=0x%x err=0x%x", i4_mask, i4_sys_res);
+ return -1;
+ }
+#else
+ UNUSED(core_id);
+#endif
+ return 1;
+
+}
+#endif
+
+#else
+
+UWORD32 ithread_get_handle_size(void)
+{
+ return sizeof(int);
+}
+
+UWORD32 ithread_get_mutex_lock_size(void)
+{
+ return sizeof(int);
+}
+
+UWORD32 ithread_get_cond_size(void)
+{
+ return(sizeof(int));
+}
+WORD32 ithread_create(void *thread_handle, void *attribute, void *strt, void *argument)
+{
+ return 0;
+}
+
+WORD32 ithread_join(void *thread_handle, void ** val_ptr)
+{
+ return 0;
+}
+
+void ithread_exit(void *val_ptr)
+{
+ return;
+}
+
+WORD32 ithread_mutex_init(void *mutex)
+{
+ return 0;
+}
+
+WORD32 ithread_mutex_destroy(void *mutex)
+{
+ return 0;
+}
+
+WORD32 ithread_mutex_lock(void *mutex)
+{
+ return 0;
+}
+
+WORD32 ithread_mutex_unlock(void *mutex)
+{
+ return 0;
+}
+
+void ithread_yield(void)
+{
+ return;
+}
+
+void ithread_sleep(UWORD32 u4_time_in_us)
+{
+ return;
+}
+
+void ithread_usleep(UWORD32 u4_time_us)
+{
+ return;
+}
+
+UWORD32 ithread_get_sem_strcut_size(void)
+{
+ return(sizeof(int));
+}
+
+
+WORD32 ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value)
+{
+ return 0;
+}
+
+WORD32 ithread_sem_post(void *sem)
+{
+ return 0;
+}
+
+
+WORD32 ithread_sem_wait(void *sem)
+{
+ return 0;
+}
+
+WORD32 ithread_sem_destroy(void *sem)
+{
+ return 0;
+}
+
+void ithread_set_name(UWORD8 *pu1_thread_name)
+{
+ return;
+}
+
+void ithread_condition_init(void *condition)
+{
+ return;
+}
+
+void ithread_condition_signal(void * condition)
+{
+ return;
+}
+
+
+
+void ithread_condition_wait(void *condition,void *mutex)
+{
+ return;
+}
+
+WORD32 ithread_set_affinity(WORD32 core_id)
+{
+ return 1;
+}
+#endif
diff --git a/common/ithread.h b/common/ithread.h
new file mode 100755
index 0000000..f926f83
--- /dev/null
+++ b/common/ithread.h
@@ -0,0 +1,104 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* */
+/* File Name : ithread.h */
+/* */
+/* Description : This file contains all the necessary structure and */
+/* enumeration definitions needed for the Application */
+/* Program Interface(API) of the */
+/* Thread Abstraction Layer */
+/* */
+/* List of Functions : ithread_get_handle_size */
+/* ithread_get_mutex_lock_size */
+/* ithread_create */
+/* ithread_exit */
+/* ithread_join */
+/* ithread_get_mutex_struct_size */
+/* ithread_mutex_init */
+/* ithread_mutex_destroy */
+/* ithread_mutex_lock */
+/* ithread_mutex_unlock */
+/* ithread_yield */
+/* ithread_sleep */
+/* ithread_msleep */
+/* ithread_usleep */
+/* ithread_get_sem_struct_size */
+/* ithread_sem_init */
+/* ithread_sem_post */
+/* ithread_sem_wait */
+/* ithread_sem_destroy */
+/* ithread_set_affinity */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 06 09 2012 Harish Initial Version */
+/* */
+/*****************************************************************************/
+
+#ifndef _ITHREAD_H_
+#define _ITHREAD_H_
+
+UWORD32 ithread_get_handle_size(void);
+
+UWORD32 ithread_get_mutex_lock_size(void);
+
+WORD32 ithread_create(void *thread_handle, void *attribute, void *strt, void *argument);
+
+void ithread_exit(void *val_ptr);
+
+WORD32 ithread_join(void *thread_id, void ** val_ptr);
+
+WORD32 ithread_get_mutex_struct_size(void);
+
+WORD32 ithread_mutex_init(void *mutex);
+
+WORD32 ithread_mutex_destroy(void *mutex);
+
+WORD32 ithread_mutex_lock(void *mutex);
+
+WORD32 ithread_mutex_unlock(void *mutex);
+
+void ithread_yield(void);
+
+void ithread_sleep(UWORD32 u4_time);
+
+void ithread_msleep(UWORD32 u4_time_ms);
+
+void ithread_usleep(UWORD32 u4_time_us);
+
+UWORD32 ithread_get_sem_struct_size(void);
+
+WORD32 ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value);
+
+WORD32 ithread_sem_post(void *sem);
+
+WORD32 ithread_sem_wait(void *sem);
+
+WORD32 ithread_sem_destroy(void *sem);
+
+WORD32 ithread_set_affinity(WORD32 core_id);
+
+void ithread_set_name(CHAR *pc_thread_name);
+
+#endif /* _ITHREAD_H_ */
diff --git a/common/mips/ih264_platform_macros.h b/common/mips/ih264_platform_macros.h
new file mode 100755
index 0000000..d098372
--- /dev/null
+++ b/common/mips/ih264_platform_macros.h
@@ -0,0 +1,102 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_platform_macros.h
+*
+* @brief
+* Platform specific Macro definitions used in the codec
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+#ifndef _IH264_PLATFORM_MACROS_H_
+#define _IH264_PLATFORM_MACROS_H_
+
+#define CLIP_U8(x) CLIP3(0, 255, (x))
+#define CLIP_S8(x) CLIP3(-128, 127, (x))
+
+#define CLIP_U10(x) CLIP3(0, 1023, (x))
+#define CLIP_S10(x) CLIP3(-512, 511, (x))
+
+#define CLIP_U12(x) CLIP3(0, 4095, (x))
+#define CLIP_S12(x) CLIP3(-2048, 2047, (x))
+
+#define CLIP_U16(x) CLIP3(0, 65535, (x))
+#define CLIP_S16(x) CLIP3(-32768, 32767, (x))
+
+#define MEM_ALIGN16 __attribute__ ((aligned (16)))
+
+#define SHL(x,y) (((y) < 32) ? ((x) << (y)) : 0)
+#define SHR(x,y) (((y) < 32) ? ((x) >> (y)) : 0)
+
+#define SHR_NEG(val,shift) ((shift>0)?(val>>shift):(val<<(-shift)))
+#define SHL_NEG(val,shift) ((shift<0)?(val>>(-shift)):(val<<shift))
+
+
+#define ITT_BIG_ENDIAN(x) ((x << 24)) | \
+ ((x & 0x0000ff00) << 8) | \
+ ((x & 0x00ff0000) >> 8) | \
+ ((UWORD32)x >> 24);
+
+
+#define NOP(nop_cnt) {UWORD32 nop_i; for (nop_i = 0; nop_i < nop_cnt; nop_i++);}
+
+#define PLD(a)
+
+static __inline UWORD32 CLZ(UWORD32 u4_word)
+{
+ if(u4_word)
+ return(__builtin_clz(u4_word));
+ else
+ return 32;
+}
+
+static __inline UWORD32 CTZ(UWORD32 u4_word)
+{
+ if(0 == u4_word)
+ return 31;
+ else
+ {
+ unsigned int index;
+ index = __builtin_ctz(u4_word);
+ return (UWORD32)index;
+ }
+}
+
+#define DATA_SYNC()
+
+#define INLINE
+
+#define PREFETCH(ptr, type)
+
+#define MEM_ALIGN8 __attribute__ ((aligned (8)))
+#define MEM_ALIGN16 __attribute__ ((aligned (16)))
+#define MEM_ALIGN32 __attribute__ ((aligned (32)))
+
+#endif /* _IH264_PLATFORM_MACROS_H_ */
diff --git a/common/x86/ih264_chroma_intra_pred_filters_ssse3.c b/common/x86/ih264_chroma_intra_pred_filters_ssse3.c
new file mode 100755
index 0000000..45101a4
--- /dev/null
+++ b/common/x86/ih264_chroma_intra_pred_filters_ssse3.c
@@ -0,0 +1,433 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_chroma_intra_pred_filters_ssse3.c
+*
+* @brief
+* Contains function definitions for chroma intra prediction filters in x86
+* intrinsics
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* -ih264_intra_pred_chroma_8x8_mode_horz_ssse3
+* -ih264_intra_pred_chroma_8x8_mode_vert_ssse3
+* -ih264_intra_pred_chroma_8x8_mode_plane_ssse3
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <string.h>
+
+/* User include files */
+#include "ih264_defs.h"
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+
+
+/*****************************************************************************/
+/* Chroma Intra prediction 8x8 filters */
+/*****************************************************************************/
+/**
+*******************************************************************************
+*
+* ih264_intra_pred_chroma_8x8_mode_horz_ssse3
+*
+* @brief
+* Perform Intra prediction for chroma_8x8 mode:Horizontal
+*
+* @par Description:
+* Perform Intra prediction for chroma_8x8 mode:Horizontal ,described in sec 8.3.4.2
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source containing alternate U and V samples
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination with alternate U and V samples
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] ngbr_avail
+* availability of neighbouring pixels(Not used in this function)
+*
+* @returns
+*
+* @remarks
+* None
+*
+******************************************************************************
+*/
+void ih264_intra_pred_chroma_8x8_mode_horz_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+
+ UWORD8 *pu1_left; /* Pointer to start of top predictors */
+ WORD32 dst_strd2;
+
+ __m128i left_16x8b, left_sh_16x8b;
+ __m128i row1_16x8b, row2_16x8b;
+ __m128i const_14_15_16x8b;
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ pu1_left = pu1_src + 2 * BLK8x8SIZE - 2;
+
+ left_16x8b = _mm_loadu_si128((__m128i *)(pu1_left - 14));
+
+ const_14_15_16x8b = _mm_set1_epi16(0x0f0e);
+
+ dst_strd2 = dst_strd << 1;
+ left_sh_16x8b = _mm_slli_si128(left_16x8b, 2);
+ row1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b);
+ row2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b);
+ _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
+
+ left_16x8b = _mm_slli_si128(left_16x8b, 4);
+ left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+ pu1_dst += dst_strd2;
+ row1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b);
+ row2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b);
+ _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
+
+ left_16x8b = _mm_slli_si128(left_16x8b, 4);
+ left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+ pu1_dst += dst_strd2;
+ row1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b);
+ row2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b);
+ _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
+
+ left_16x8b = _mm_slli_si128(left_16x8b, 4);
+ left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+ pu1_dst += dst_strd2;
+ row1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b);
+ row2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b);
+ _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
+}
+
+/**
+*******************************************************************************
+*
+* ih264_intra_pred_chroma_8x8_mode_vert_ssse3
+*
+* @brief
+* Perform Intra prediction for chroma_8x8 mode:vertical
+*
+* @par Description:
+* Perform Intra prediction for chroma_8x8 mode:vertical ,described in sec 8.3.4.3
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source containing alternate U and V samples
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination with alternate U and V samples
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] ngbr_avail
+* availability of neighbouring pixels(Not used in this function)
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+void ih264_intra_pred_chroma_8x8_mode_vert_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_top; /* Pointer to start of top predictors */
+ WORD32 dst_strd2;
+
+ __m128i top_16x8b;
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ pu1_top = pu1_src + 2 * BLK8x8SIZE + 2;
+
+ top_16x8b = _mm_loadu_si128((__m128i *)pu1_top);
+
+ dst_strd2 = dst_strd << 1;
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b);
+
+ pu1_dst += dst_strd2;
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b);
+
+ pu1_dst += dst_strd2;
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b);
+
+ pu1_dst += dst_strd2;
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b);
+}
+
+/**
+*******************************************************************************
+*
+* ih264_intra_pred_chroma_8x8_mode_plane_ssse3
+*
+* @brief
+* Perform Intra prediction for chroma_8x8 mode:PLANE
+*
+* @par Description:
+* Perform Intra prediction for chroma_8x8 mode:PLANE ,described in sec 8.3.4.4
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source containing alternate U and V samples
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination with alternate U and V samples
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] ngbr_avail
+* availability of neighbouring pixels(Not used in this function)
+*
+* @returns
+*
+* @remarks
+* None
+*
+******************************************************************************
+*/
+void ih264_intra_pred_chroma_8x8_mode_plane_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_left, *pu1_top;
+ WORD32 a_u, a_v, b_u, b_v, c_u, c_v;
+
+ __m128i mul_8x16b, shuffle_8x16b;
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ pu1_top = pu1_src + MB_SIZE + 2;
+ pu1_left = pu1_src + MB_SIZE - 2;
+
+ mul_8x16b = _mm_setr_epi16(1, 2, 3, 4, 1, 2, 3, 4);
+ shuffle_8x16b = _mm_setr_epi16(0xff00, 0xff02, 0xff04, 0xff06,
+ 0xff01, 0xff03, 0xff05, 0xff07);
+
+ //calculating a, b and c
+ {
+ WORD32 h_u, h_v, v_u, v_v;
+ WORD32 temp1, temp2;
+
+ __m128i h_val1_16x8b, h_val2_16x8b;
+ __m128i h_val1_8x16b, h_val2_8x16b, h_val_4x32b;
+ __m128i v_val1_16x8b, v_val2_16x8b;
+ __m128i v_val1_8x16b, v_val2_8x16b, v_val_4x32b;
+ __m128i hv_val_4x32b;
+
+ h_val1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_top + 8));
+ h_val2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_top - 2));
+ v_val1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 14));
+ v_val2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 4));
+
+ // reversing the order
+ h_val2_16x8b = _mm_shufflelo_epi16(h_val2_16x8b, 0x1b);
+ v_val1_16x8b = _mm_shufflelo_epi16(v_val1_16x8b, 0x1b);
+
+ // separating u and v and 8-bit to 16-bit conversion
+ h_val1_8x16b = _mm_shuffle_epi8(h_val1_16x8b, shuffle_8x16b);
+ h_val2_8x16b = _mm_shuffle_epi8(h_val2_16x8b, shuffle_8x16b);
+ v_val1_8x16b = _mm_shuffle_epi8(v_val1_16x8b, shuffle_8x16b);
+ v_val2_8x16b = _mm_shuffle_epi8(v_val2_16x8b, shuffle_8x16b);
+
+ h_val1_8x16b = _mm_sub_epi16(h_val1_8x16b, h_val2_8x16b);
+ v_val1_8x16b = _mm_sub_epi16(v_val1_8x16b, v_val2_8x16b);
+
+ h_val_4x32b = _mm_madd_epi16(mul_8x16b, h_val1_8x16b);
+ v_val_4x32b = _mm_madd_epi16(mul_8x16b, v_val1_8x16b);
+
+ temp1 = _mm_extract_epi16(h_val1_16x8b, 3);
+ temp2 = _mm_extract_epi16(v_val1_16x8b, 3);
+
+ hv_val_4x32b = _mm_hadd_epi32(h_val_4x32b, v_val_4x32b);
+
+ a_u = ((temp1 & 0xff) + (temp2 & 0xff)) << 4;
+ a_v = ((temp1 >> 8) + (temp2 >> 8)) << 4;
+
+ h_u = _mm_extract_epi16(hv_val_4x32b, 0);
+ h_v = _mm_extract_epi16(hv_val_4x32b, 2);
+ v_u = _mm_extract_epi16(hv_val_4x32b, 4);
+ v_v = _mm_extract_epi16(hv_val_4x32b, 6);
+
+ h_u = (h_u << 16) >> 15; // sign-extension and multiplication by 2
+ h_v = (h_v << 16) >> 15;
+ v_u = (v_u << 16) >> 15;
+ v_v = (v_v << 16) >> 15;
+
+ b_u = ((h_u << 4) + h_u + 32) >> 6;
+ b_v = ((h_v << 4) + h_v + 32) >> 6;
+ c_u = ((v_u << 4) + v_u + 32) >> 6;
+ c_v = ((v_v << 4) + v_v + 32) >> 6;
+ }
+ //using a, b and c to compute the fitted plane values
+ {
+ __m128i const_8x16b, c2_8x16b;
+ __m128i res1_l_8x16b, res1_h_8x16b;
+ __m128i res2_l_8x16b, res2_h_8x16b;
+ __m128i res1_sh_l_8x16b, res1_sh_h_8x16b, res1_16x8b;
+ __m128i res2_sh_l_8x16b, res2_sh_h_8x16b, res2_16x8b;
+
+ WORD32 b_u2, b_v2, b_u3, b_v3;
+ WORD32 const_u, const_v;
+ WORD32 dst_strd2;
+
+ const_u = a_u - (c_u << 1) - c_u + 16;
+ const_v = a_v - (c_v << 1) - c_v + 16;
+
+ b_u2 = b_u << 1;
+ b_v2 = b_v << 1;
+ b_u3 = b_u + b_u2;
+ b_v3 = b_v + b_v2;
+
+ const_8x16b = _mm_setr_epi16(const_u, const_v, const_u, const_v, const_u, const_v, const_u, const_v);
+ res1_l_8x16b = _mm_setr_epi16(-b_u3, -b_v3, -b_u2, -b_v2, -b_u, -b_v, 0, 0);
+ //contains {-b*3, -b*2, -b*1, b*0}
+ res1_h_8x16b = _mm_setr_epi16(b_u, b_v, b_u2, b_v2, b_u3, b_v3, b_u << 2, b_v << 2);
+ //contains {b*1, b*2, b*3, b*4}
+ c2_8x16b = _mm_setr_epi16(c_u, c_v, c_u, c_v, c_u, c_v, c_u, c_v);
+
+ // rows 1, 2
+ res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, const_8x16b);
+ res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, const_8x16b);
+ res2_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
+ res2_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
+
+ res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
+ res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
+ res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
+ res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
+
+ dst_strd2 = dst_strd << 1;
+ c2_8x16b = _mm_slli_epi16(c2_8x16b, 1);
+
+ res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
+ res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
+
+ // rows 3, 4
+ res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
+ res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
+ res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
+ res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
+
+ res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
+ res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
+ res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
+ res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
+
+ pu1_dst += dst_strd2;
+
+ res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
+ res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
+
+ // rows 5, 6
+ res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
+ res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
+ res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
+ res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
+
+ res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
+ res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
+ res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
+ res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
+
+ pu1_dst += dst_strd2;
+
+ res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
+ res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
+
+ // rows 7, 8
+ res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
+ res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
+ res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
+ res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
+
+ res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
+ res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
+ res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
+ res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
+
+ pu1_dst += dst_strd2;
+
+ res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
+ res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
+
+ }
+}
diff --git a/common/x86/ih264_deblk_chroma_ssse3.c b/common/x86/ih264_deblk_chroma_ssse3.c
new file mode 100755
index 0000000..a36447a
--- /dev/null
+++ b/common/x86/ih264_deblk_chroma_ssse3.c
@@ -0,0 +1,1087 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* */
+/* File Name : ih264_deblk_chroma_ssse3.c */
+/* */
+/* Description : Contains function definitions for deblocking */
+/* */
+/* List of Functions : ih264_deblk_chroma_vert_bs4_ssse3() */
+/* ih264_deblk_chroma_horz_bs4_ssse3() */
+/* ih264_deblk_chroma_vert_bslt4_ssse3() */
+/* ih264_deblk_chroma_horz_bslt4_ssse3() */
+/* ih264_deblk_chroma_vert_bs4_mbaff_ssse3() */
+/* ih264_deblk_chroma_vert_bslt4_mbaff_ssse3() */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 12 02 2015 Naveen Kumar P Added chrom deblocking ssse3 */
+/* intrinsics */
+/* */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_platform_macros.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_macros.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_chroma_vert_bs4_ssse3() */
+/* */
+/* Description : This function performs filtering of a chroma block */
+/* vertical edge when the boundary strength is set to 4 in */
+/* high profile. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 of U */
+/* src_strd - source stride */
+/* alpha_cb - alpha value for the boundary in U */
+/* beta_cb - beta value for the boundary in U */
+/* alpha_cr - alpha value for the boundary in V */
+/* beta_cr - beta value for the boundary in V */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This operation is described in Sec. 8.7.2.4 under the */
+/* title "Filtering process for edges for bS equal to 4" in */
+/* ITU T Rec H.264 with alpha and beta values different in */
+/* U and V. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 12 02 2015 Naveen Kumar P Initial version */
+/* */
+/*****************************************************************************/
+void ih264_deblk_chroma_vert_bs4_ssse3(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha_cb,
+ WORD32 beta_cb,
+ WORD32 alpha_cr,
+ WORD32 beta_cr)
+{
+ UWORD8 *pu1_src_uv = pu1_src; /* Pointer to the src sample q0 of plane U*/
+ WORD32 alpha_cbcr = (alpha_cr << 16) + alpha_cb;
+ WORD32 beta_cbcr = (beta_cr << 16) + beta_cb;
+ __m128i linea, lineb, linec, lined, linee, linef, lineg, lineh;
+ __m128i temp1, temp2, temp3, temp4;
+
+ __m128i q0_uv_16x8, p0_uv_16x8, q1_uv_16x8, p1_uv_16x8;
+ __m128i q0_uv_8x16, p0_uv_8x16, q1_uv_8x16, p1_uv_8x16;
+ __m128i flag1, flag2;
+ __m128i diff, alpha_cbcr_16x8, beta_cbcr_16x8;
+ __m128i zero = _mm_setzero_si128();
+ __m128i p0_uv_8x16_1, p0_uv_8x16_2, q0_uv_8x16_1, q0_uv_8x16_2;
+
+ /* Load and transpose the pixel values */
+ linea = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4));
+ lineb = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + src_strd));
+ linec = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 2 * src_strd));
+ lined = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 3 * src_strd));
+ linee = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 4 * src_strd));
+ linef = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 5 * src_strd));
+ lineg = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 6 * src_strd));
+ lineh = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 7 * src_strd));
+
+ temp1 = _mm_unpacklo_epi16(linea, lineb);
+ temp2 = _mm_unpacklo_epi16(linec, lined);
+ temp3 = _mm_unpacklo_epi16(linee, linef);
+ temp4 = _mm_unpacklo_epi16(lineg, lineh);
+
+ p1_uv_8x16 = _mm_unpacklo_epi32(temp1, temp2);
+ p0_uv_8x16 = _mm_unpacklo_epi32(temp3, temp4);
+ q0_uv_8x16 = _mm_unpackhi_epi32(temp1, temp2);
+ q1_uv_8x16 = _mm_unpackhi_epi32(temp3, temp4);
+
+ p1_uv_16x8 = _mm_unpacklo_epi64(p1_uv_8x16, p0_uv_8x16);
+ p0_uv_16x8 = _mm_unpackhi_epi64(p1_uv_8x16, p0_uv_8x16);
+ q0_uv_16x8 = _mm_unpacklo_epi64(q0_uv_8x16, q1_uv_8x16);
+ q1_uv_16x8 = _mm_unpackhi_epi64(q0_uv_8x16, q1_uv_8x16);
+ /* End of transpose */
+
+ q0_uv_8x16 = _mm_unpacklo_epi8(q0_uv_16x8, zero);
+ q1_uv_8x16 = _mm_unpacklo_epi8(q1_uv_16x8, zero);
+ p1_uv_8x16 = _mm_unpacklo_epi8(p1_uv_16x8, zero);
+ p0_uv_8x16 = _mm_unpacklo_epi8(p0_uv_16x8, zero);
+
+ diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1
+ diff = _mm_abs_epi16(diff);
+ alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr);
+ flag1 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff);
+
+ diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2
+ diff = _mm_abs_epi16(diff);
+ beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr);
+ flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
+
+ diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3
+ diff = _mm_abs_epi16(diff);
+ flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
+
+ temp1 = _mm_slli_epi16(p1_uv_8x16, 1);
+ temp2 = _mm_add_epi16(p0_uv_8x16, q1_uv_8x16);
+ temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2));
+ temp1 = _mm_add_epi16(temp1, temp2);
+ p0_uv_8x16_1 = _mm_srai_epi16(temp1, 2);
+
+ temp1 = _mm_slli_epi16(q1_uv_8x16, 1);
+ temp2 = _mm_add_epi16(p1_uv_8x16, q0_uv_8x16);
+ temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2));
+ temp1 = _mm_add_epi16(temp1, temp2);
+ q0_uv_8x16_1 = _mm_srai_epi16(temp1, 2);
+
+ q0_uv_8x16 = _mm_unpackhi_epi8(q0_uv_16x8, zero);
+ q1_uv_8x16 = _mm_unpackhi_epi8(q1_uv_16x8, zero);
+ p1_uv_8x16 = _mm_unpackhi_epi8(p1_uv_16x8, zero);
+ p0_uv_8x16 = _mm_unpackhi_epi8(p0_uv_16x8, zero);
+
+ diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1
+ diff = _mm_abs_epi16(diff);
+ alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr);
+ flag2 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff);
+
+ diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2
+ diff = _mm_abs_epi16(diff);
+ beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr);
+ flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
+
+ diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3
+ diff = _mm_abs_epi16(diff);
+ flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
+
+ temp1 = _mm_slli_epi16(p1_uv_8x16, 1);
+ temp2 = _mm_add_epi16(p0_uv_8x16, q1_uv_8x16);
+ temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2));
+ temp1 = _mm_add_epi16(temp1, temp2);
+ p0_uv_8x16_2 = _mm_srai_epi16(temp1, 2);
+
+ temp1 = _mm_slli_epi16(q1_uv_8x16, 1);
+ temp2 = _mm_add_epi16(p1_uv_8x16, q0_uv_8x16);
+ temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2));
+ temp1 = _mm_add_epi16(temp1, temp2);
+ q0_uv_8x16_2 = _mm_srai_epi16(temp1, 2);
+
+ p0_uv_8x16_2 = _mm_packus_epi16(p0_uv_8x16_1, p0_uv_8x16_2);
+ q0_uv_8x16_2 = _mm_packus_epi16(q0_uv_8x16_1, q0_uv_8x16_2);
+
+ flag1 = _mm_packs_epi16(flag1, flag2);
+
+ p0_uv_8x16_1 = _mm_and_si128(p0_uv_16x8,
+ _mm_xor_si128(flag1, _mm_set1_epi8(0xFF)));
+ p0_uv_8x16_2 = _mm_and_si128(p0_uv_8x16_2, flag1);
+ p0_uv_16x8 = _mm_add_epi8(p0_uv_8x16_1, p0_uv_8x16_2);
+
+ q0_uv_8x16_1 = _mm_and_si128(q0_uv_16x8,
+ _mm_xor_si128(flag1, _mm_set1_epi8(0xFF)));
+ q0_uv_8x16_2 = _mm_and_si128(q0_uv_8x16_2, flag1);
+ q0_uv_16x8 = _mm_add_epi8(q0_uv_8x16_1, q0_uv_8x16_2);
+
+ /* Inverse-transpose and store back */
+ temp1 = _mm_unpacklo_epi16(p1_uv_16x8, p0_uv_16x8);
+ temp2 = _mm_unpackhi_epi16(p1_uv_16x8, p0_uv_16x8);
+ temp3 = _mm_unpacklo_epi16(q0_uv_16x8, q1_uv_16x8);
+ temp4 = _mm_unpackhi_epi16(q0_uv_16x8, q1_uv_16x8);
+
+ linea = _mm_unpacklo_epi32(temp1, temp3);
+ lineb = _mm_srli_si128(linea, 8);
+ linec = _mm_unpackhi_epi32(temp1, temp3);
+ lined = _mm_srli_si128(linec, 8);
+ linee = _mm_unpacklo_epi32(temp2, temp4);
+ linef = _mm_srli_si128(linee, 8);
+ lineg = _mm_unpackhi_epi32(temp2, temp4);
+ lineh = _mm_srli_si128(lineg, 8);
+
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4), linea);
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + src_strd), lineb);
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 2 * src_strd), linec);
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 3 * src_strd), lined);
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 4 * src_strd), linee);
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 5 * src_strd), linef);
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 6 * src_strd), lineg);
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 7 * src_strd), lineh);
+
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_chroma_horz_bs4_ssse3() */
+/* */
+/* Description : This function performs filtering of a chroma block */
+/* horizontal edge when the boundary strength is set to 4 */
+/* in high profile. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 of U */
+/* src_strd - source stride */
+/* alpha_cb - alpha value for the boundary in U */
+/* beta_cb - beta value for the boundary in U */
+/* alpha_cr - alpha value for the boundary in V */
+/* beta_cr - beta value for the boundary in V */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This operation is described in Sec. 8.7.2.4 under the */
+/* title "Filtering process for edges for bS equal to 4" in */
+/* ITU T Rec H.264 with alpha and beta values different in */
+/* U and V. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 12 02 2015 Naveen Kumar P Initial version */
+/* */
+/*****************************************************************************/
+void ih264_deblk_chroma_horz_bs4_ssse3(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha_cb,
+ WORD32 beta_cb,
+ WORD32 alpha_cr,
+ WORD32 beta_cr)
+{
+ UWORD8 *pu1_src_uv = pu1_src; /* Pointer to the src sample q0 of plane U*/
+ WORD16 i16_posP1, i16_posP0, i16_posQ1;
+
+ UWORD8 *pu1_HorzPixelUV; /*! < Pointer to the first pixel of the boundary */
+ WORD32 alpha_cbcr = (alpha_cr << 16) + alpha_cb;
+ WORD32 beta_cbcr = (beta_cr << 16) + beta_cb;
+ __m128i q0_uv_16x8, p0_uv_16x8, q1_uv_16x8, p1_uv_16x8;
+ __m128i q0_uv_8x16, p0_uv_8x16, q1_uv_8x16, p1_uv_8x16;
+ __m128i flag1, flag2;
+ __m128i diff, alpha_cbcr_16x8, beta_cbcr_16x8;
+ __m128i zero = _mm_setzero_si128();
+ __m128i p0_uv_8x16_1, p0_uv_8x16_2, q0_uv_8x16_1, q0_uv_8x16_2;
+ __m128i temp1, temp2;
+
+ pu1_HorzPixelUV = pu1_src_uv - (src_strd << 1);
+
+ i16_posQ1 = src_strd;
+ i16_posP0 = src_strd;
+ i16_posP1 = 0;
+
+ q0_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_src_uv));
+ q1_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_src_uv + i16_posQ1));
+ p1_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixelUV + i16_posP1));
+ p0_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixelUV + i16_posP0));
+
+ q0_uv_8x16 = _mm_unpacklo_epi8(q0_uv_16x8, zero);
+ q1_uv_8x16 = _mm_unpacklo_epi8(q1_uv_16x8, zero);
+ p1_uv_8x16 = _mm_unpacklo_epi8(p1_uv_16x8, zero);
+ p0_uv_8x16 = _mm_unpacklo_epi8(p0_uv_16x8, zero);
+
+ diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1
+ diff = _mm_abs_epi16(diff);
+ alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr);
+ flag1 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff);
+
+ diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2
+ diff = _mm_abs_epi16(diff);
+ beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr);
+ flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
+
+ diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3
+ diff = _mm_abs_epi16(diff);
+ flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
+
+ temp1 = _mm_slli_epi16(p1_uv_8x16, 1);
+ temp2 = _mm_add_epi16(p0_uv_8x16, q1_uv_8x16);
+ temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2));
+ temp1 = _mm_add_epi16(temp1, temp2);
+ p0_uv_8x16_1 = _mm_srai_epi16(temp1, 2);
+
+ temp1 = _mm_slli_epi16(q1_uv_8x16, 1);
+ temp2 = _mm_add_epi16(p1_uv_8x16, q0_uv_8x16);
+ temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2));
+ temp1 = _mm_add_epi16(temp1, temp2);
+ q0_uv_8x16_1 = _mm_srai_epi16(temp1, 2);
+
+ q0_uv_8x16 = _mm_unpackhi_epi8(q0_uv_16x8, zero);
+ q1_uv_8x16 = _mm_unpackhi_epi8(q1_uv_16x8, zero);
+ p1_uv_8x16 = _mm_unpackhi_epi8(p1_uv_16x8, zero);
+ p0_uv_8x16 = _mm_unpackhi_epi8(p0_uv_16x8, zero);
+
+ diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1
+ diff = _mm_abs_epi16(diff);
+ alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr);
+ flag2 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff);
+
+ diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2
+ diff = _mm_abs_epi16(diff);
+ beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr);
+ flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
+
+ diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3
+ diff = _mm_abs_epi16(diff);
+ flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
+
+ temp1 = _mm_slli_epi16(p1_uv_8x16, 1);
+ temp2 = _mm_add_epi16(p0_uv_8x16, q1_uv_8x16);
+ temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2));
+ temp1 = _mm_add_epi16(temp1, temp2);
+ p0_uv_8x16_2 = _mm_srai_epi16(temp1, 2);
+
+ temp1 = _mm_slli_epi16(q1_uv_8x16, 1);
+ temp2 = _mm_add_epi16(p1_uv_8x16, q0_uv_8x16);
+ temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2));
+ temp1 = _mm_add_epi16(temp1, temp2);
+ q0_uv_8x16_2 = _mm_srai_epi16(temp1, 2);
+
+ p0_uv_8x16_2 = _mm_packus_epi16(p0_uv_8x16_1, p0_uv_8x16_2);
+ q0_uv_8x16_2 = _mm_packus_epi16(q0_uv_8x16_1, q0_uv_8x16_2);
+
+ flag1 = _mm_packs_epi16(flag1, flag2);
+
+ p0_uv_8x16_1 = _mm_and_si128(p0_uv_16x8,
+ _mm_xor_si128(flag1, _mm_set1_epi8(0xFF)));
+ p0_uv_8x16_2 = _mm_and_si128(p0_uv_8x16_2, flag1);
+ p0_uv_8x16_1 = _mm_add_epi8(p0_uv_8x16_1, p0_uv_8x16_2);
+ _mm_storeu_si128((__m128i *)(pu1_HorzPixelUV + i16_posP0), p0_uv_8x16_1);
+
+ q0_uv_8x16_1 = _mm_and_si128(q0_uv_16x8,
+ _mm_xor_si128(flag1, _mm_set1_epi8(0xFF)));
+ q0_uv_8x16_2 = _mm_and_si128(q0_uv_8x16_2, flag1);
+ q0_uv_8x16_1 = _mm_add_epi8(q0_uv_8x16_1, q0_uv_8x16_2);
+ _mm_storeu_si128((__m128i *)(pu1_src_uv), q0_uv_8x16_1);
+
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_chroma_vert_bslt4_ssse3() */
+/* */
+/* Description : This function performs filtering of a chroma block */
+/* vertical edge when the boundary strength is less than 4 */
+/* in high profile. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 of U */
+/* src_strd - source stride */
+/* alpha_cb - alpha value for the boundary in U */
+/* beta_cb - beta value for the boundary in U */
+/* alpha_cr - alpha value for the boundary in V */
+/* beta_cr - beta value for the boundary in V */
+/* u4_bs - packed Boundary strength array */
+/* pu1_cliptab_cb - tc0_table for U */
+/* pu1_cliptab_cr - tc0_table for V */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This operation is described in Sec. 8.7.2.3 under the */
+/* title "Filtering process for edges for bS less than 4" */
+/* in ITU T Rec H.264 with alpha and beta values different */
+/* in U and V. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 12 02 2015 Naveen Kumar P Initial version */
+/* */
+/*****************************************************************************/
+void ih264_deblk_chroma_vert_bslt4_ssse3(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha_cb,
+ WORD32 beta_cb,
+ WORD32 alpha_cr,
+ WORD32 beta_cr,
+ UWORD32 u4_bs,
+ const UWORD8 *pu1_cliptab_cb,
+ const UWORD8 *pu1_cliptab_cr)
+{
+ UWORD8 *pu1_src_uv = pu1_src; /* Pointer to the src sample q0 of plane U*/
+ UWORD8 u1_Bs0, u1_Bs1, u1_Bs2, u1_Bs3;
+ WORD32 alpha_cbcr = (alpha_cr << 16) + alpha_cb;
+ WORD32 beta_cbcr = (beta_cr << 16) + beta_cb;
+ __m128i linea, lineb, linec, lined, linee, linef, lineg, lineh;
+ __m128i temp1, temp2, temp3, temp4;
+
+ __m128i q0_uv_16x8, p0_uv_16x8, q1_uv_16x8, p1_uv_16x8;
+ __m128i q0_uv_8x16, p0_uv_8x16, q1_uv_8x16, p1_uv_8x16;
+ __m128i flag_bs, flag1, flag2;
+ __m128i diff, diff1, alpha_cbcr_16x8, beta_cbcr_16x8, in_macro;
+ __m128i zero = _mm_setzero_si128();
+ __m128i C0_uv_8x16;
+ __m128i p0_uv_8x16_1, p0_uv_8x16_2, q0_uv_8x16_1, q0_uv_8x16_2;
+
+ u1_Bs0 = (u4_bs >> 24) & 0xff;
+ u1_Bs1 = (u4_bs >> 16) & 0xff;
+ u1_Bs2 = (u4_bs >> 8) & 0xff;
+ u1_Bs3 = (u4_bs >> 0) & 0xff;
+
+ flag_bs = _mm_set_epi8(u1_Bs3, u1_Bs3, u1_Bs3, u1_Bs3, u1_Bs2, u1_Bs2,
+ u1_Bs2, u1_Bs2, u1_Bs1, u1_Bs1, u1_Bs1, u1_Bs1,
+ u1_Bs0, u1_Bs0, u1_Bs0, u1_Bs0);
+ flag_bs = _mm_cmpeq_epi8(flag_bs, zero); //Set flag to 1s and 0s
+ flag_bs = _mm_xor_si128(flag_bs, _mm_set1_epi8(0xFF)); //Invert for required mask
+
+ /* Load and transpose the pixel values */
+ linea = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4));
+ lineb = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + src_strd));
+ linec = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 2 * src_strd));
+ lined = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 3 * src_strd));
+ linee = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 4 * src_strd));
+ linef = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 5 * src_strd));
+ lineg = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 6 * src_strd));
+ lineh = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 7 * src_strd));
+
+ temp1 = _mm_unpacklo_epi16(linea, lineb);
+ temp2 = _mm_unpacklo_epi16(linec, lined);
+ temp3 = _mm_unpacklo_epi16(linee, linef);
+ temp4 = _mm_unpacklo_epi16(lineg, lineh);
+
+ p1_uv_8x16 = _mm_unpacklo_epi32(temp1, temp2);
+ p0_uv_8x16 = _mm_unpacklo_epi32(temp3, temp4);
+ q0_uv_8x16 = _mm_unpackhi_epi32(temp1, temp2);
+ q1_uv_8x16 = _mm_unpackhi_epi32(temp3, temp4);
+
+ p1_uv_16x8 = _mm_unpacklo_epi64(p1_uv_8x16, p0_uv_8x16);
+ p0_uv_16x8 = _mm_unpackhi_epi64(p1_uv_8x16, p0_uv_8x16);
+ q0_uv_16x8 = _mm_unpacklo_epi64(q0_uv_8x16, q1_uv_8x16);
+ q1_uv_16x8 = _mm_unpackhi_epi64(q0_uv_8x16, q1_uv_8x16);
+ /* End of transpose */
+
+ q0_uv_8x16 = _mm_unpacklo_epi8(q0_uv_16x8, zero);
+ q1_uv_8x16 = _mm_unpacklo_epi8(q1_uv_16x8, zero);
+ p1_uv_8x16 = _mm_unpacklo_epi8(p1_uv_16x8, zero);
+ p0_uv_8x16 = _mm_unpacklo_epi8(p0_uv_16x8, zero);
+
+ diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1
+ diff = _mm_abs_epi16(diff);
+ alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr);
+ flag1 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff);
+
+ diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2
+ diff = _mm_abs_epi16(diff);
+ beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr);
+ flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
+
+ diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3
+ diff = _mm_abs_epi16(diff);
+ flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
+
+ diff = _mm_subs_epi16(q0_uv_8x16, p0_uv_8x16);
+ diff = _mm_slli_epi16(diff, 2);
+ diff1 = _mm_subs_epi16(p1_uv_8x16, q1_uv_8x16);
+ diff = _mm_add_epi16(diff, diff1);
+ diff = _mm_add_epi16(diff, _mm_set1_epi16(4));
+ in_macro = _mm_srai_epi16(diff, 3);
+
+ C0_uv_8x16 = _mm_set_epi16(pu1_cliptab_cr[u1_Bs1], pu1_cliptab_cb[u1_Bs1],
+ pu1_cliptab_cr[u1_Bs1], pu1_cliptab_cb[u1_Bs1],
+ pu1_cliptab_cr[u1_Bs0], pu1_cliptab_cb[u1_Bs0],
+ pu1_cliptab_cr[u1_Bs0], pu1_cliptab_cb[u1_Bs0]);
+
+ C0_uv_8x16 = _mm_add_epi16(C0_uv_8x16, _mm_set1_epi16(1));
+
+ in_macro = _mm_min_epi16(C0_uv_8x16, in_macro); //CLIP3
+ C0_uv_8x16 = _mm_subs_epi16(zero, C0_uv_8x16);
+ in_macro = _mm_max_epi16(C0_uv_8x16, in_macro);
+
+ p0_uv_8x16_1 = _mm_add_epi16(p0_uv_8x16, in_macro);
+ q0_uv_8x16_1 = _mm_sub_epi16(q0_uv_8x16, in_macro);
+
+ q0_uv_8x16 = _mm_unpackhi_epi8(q0_uv_16x8, zero);
+ q1_uv_8x16 = _mm_unpackhi_epi8(q1_uv_16x8, zero);
+ p1_uv_8x16 = _mm_unpackhi_epi8(p1_uv_16x8, zero);
+ p0_uv_8x16 = _mm_unpackhi_epi8(p0_uv_16x8, zero);
+
+ diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1
+ diff = _mm_abs_epi16(diff);
+ alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr);
+ flag2 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff);
+
+ diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2
+ diff = _mm_abs_epi16(diff);
+ beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr);
+ flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
+
+ diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3
+ diff = _mm_abs_epi16(diff);
+ flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
+
+ diff = _mm_subs_epi16(q0_uv_8x16, p0_uv_8x16);
+ diff = _mm_slli_epi16(diff, 2);
+ diff1 = _mm_subs_epi16(p1_uv_8x16, q1_uv_8x16);
+ diff = _mm_add_epi16(diff, diff1);
+ diff = _mm_add_epi16(diff, _mm_set1_epi16(4));
+ in_macro = _mm_srai_epi16(diff, 3);
+
+ C0_uv_8x16 = _mm_set_epi16(pu1_cliptab_cr[u1_Bs3], pu1_cliptab_cb[u1_Bs3],
+ pu1_cliptab_cr[u1_Bs3], pu1_cliptab_cb[u1_Bs3],
+ pu1_cliptab_cr[u1_Bs2], pu1_cliptab_cb[u1_Bs2],
+ pu1_cliptab_cr[u1_Bs2], pu1_cliptab_cb[u1_Bs2]);
+
+ C0_uv_8x16 = _mm_add_epi16(C0_uv_8x16, _mm_set1_epi16(1));
+
+ in_macro = _mm_min_epi16(C0_uv_8x16, in_macro); //CLIP3
+ C0_uv_8x16 = _mm_subs_epi16(zero, C0_uv_8x16);
+ in_macro = _mm_max_epi16(C0_uv_8x16, in_macro);
+
+ p0_uv_8x16_2 = _mm_add_epi16(p0_uv_8x16, in_macro);
+ q0_uv_8x16_2 = _mm_sub_epi16(q0_uv_8x16, in_macro);
+
+ p0_uv_8x16_2 = _mm_packus_epi16(p0_uv_8x16_1, p0_uv_8x16_2);
+ q0_uv_8x16_2 = _mm_packus_epi16(q0_uv_8x16_1, q0_uv_8x16_2);
+
+ flag1 = _mm_packs_epi16(flag1, flag2);
+ flag1 = _mm_and_si128(flag1, flag_bs); //Final flag (BS condition + other 3 conditions)
+
+ p0_uv_8x16_1 = _mm_and_si128(p0_uv_16x8,
+ _mm_xor_si128(flag1, _mm_set1_epi8(0xFF)));
+ p0_uv_8x16_2 = _mm_and_si128(p0_uv_8x16_2, flag1);
+ p0_uv_16x8 = _mm_add_epi8(p0_uv_8x16_1, p0_uv_8x16_2);
+
+ q0_uv_8x16_1 = _mm_and_si128(q0_uv_16x8,
+ _mm_xor_si128(flag1, _mm_set1_epi8(0xFF)));
+ q0_uv_8x16_2 = _mm_and_si128(q0_uv_8x16_2, flag1);
+ q0_uv_16x8 = _mm_add_epi8(q0_uv_8x16_1, q0_uv_8x16_2);
+
+ /* Inverse-transpose and store back */
+ temp1 = _mm_unpacklo_epi16(p1_uv_16x8, p0_uv_16x8);
+ temp2 = _mm_unpackhi_epi16(p1_uv_16x8, p0_uv_16x8);
+ temp3 = _mm_unpacklo_epi16(q0_uv_16x8, q1_uv_16x8);
+ temp4 = _mm_unpackhi_epi16(q0_uv_16x8, q1_uv_16x8);
+
+ linea = _mm_unpacklo_epi32(temp1, temp3);
+ lineb = _mm_srli_si128(linea, 8);
+ linec = _mm_unpackhi_epi32(temp1, temp3);
+ lined = _mm_srli_si128(linec, 8);
+ linee = _mm_unpacklo_epi32(temp2, temp4);
+ linef = _mm_srli_si128(linee, 8);
+ lineg = _mm_unpackhi_epi32(temp2, temp4);
+ lineh = _mm_srli_si128(lineg, 8);
+
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4), linea);
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + src_strd), lineb);
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 2 * src_strd), linec);
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 3 * src_strd), lined);
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 4 * src_strd), linee);
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 5 * src_strd), linef);
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 6 * src_strd), lineg);
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 7 * src_strd), lineh);
+
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_chroma_horz_bslt4_ssse3() */
+/* */
+/* Description : This function performs filtering of a chroma block */
+/* horizontal edge when the boundary strength is less than */
+/* 4 in high profile. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 of U */
+/* src_strd - source stride */
+/* alpha_cb - alpha value for the boundary in U */
+/* beta_cb - beta value for the boundary in U */
+/* alpha_cr - alpha value for the boundary in V */
+/* beta_cr - beta value for the boundary in V */
+/* u4_bs - packed Boundary strength array */
+/* pu1_cliptab_cb - tc0_table for U */
+/* pu1_cliptab_cr - tc0_table for V */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This operation is described in Sec. 8.7.2.3 under the */
+/* title "Filtering process for edges for bS less than 4" */
+/* in ITU T Rec H.264 with alpha and beta values different */
+/* in U and V. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 12 02 2015 Naveen Kumar P Initial version */
+/* */
+/*****************************************************************************/
+void ih264_deblk_chroma_horz_bslt4_ssse3(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha_cb,
+ WORD32 beta_cb,
+ WORD32 alpha_cr,
+ WORD32 beta_cr,
+ UWORD32 u4_bs,
+ const UWORD8 *pu1_cliptab_cb,
+ const UWORD8 *pu1_cliptab_cr)
+{
+ UWORD8 *pu1_src_uv = pu1_src; /* Pointer to the src sample q0 of plane U*/
+ WORD16 i16_posP1, i16_posP0, i16_posQ1;
+ UWORD8 u1_Bs0, u1_Bs1, u1_Bs2, u1_Bs3;
+
+ UWORD8 *pu1_HorzPixelUV; /*! < Pointer to the first pixel of the boundary */
+ WORD32 alpha_cbcr = (alpha_cr << 16) + alpha_cb;
+ WORD32 beta_cbcr = (beta_cr << 16) + beta_cb;
+ __m128i q0_uv_16x8, p0_uv_16x8, q1_uv_16x8, p1_uv_16x8;
+ __m128i q0_uv_8x16, p0_uv_8x16, q1_uv_8x16, p1_uv_8x16;
+ __m128i flag_bs, flag1, flag2;
+ __m128i diff, diff1, alpha_cbcr_16x8, beta_cbcr_16x8, in_macro;
+ __m128i zero = _mm_setzero_si128();
+ __m128i C0_uv_8x16;
+ __m128i p0_uv_8x16_1, p0_uv_8x16_2, q0_uv_8x16_1, q0_uv_8x16_2;
+
+ pu1_HorzPixelUV = pu1_src_uv - (src_strd << 1);
+
+ i16_posQ1 = src_strd;
+ i16_posP0 = src_strd;
+ i16_posP1 = 0;
+
+ u1_Bs0 = (u4_bs >> 24) & 0xff;
+ u1_Bs1 = (u4_bs >> 16) & 0xff;
+ u1_Bs2 = (u4_bs >> 8) & 0xff;
+ u1_Bs3 = (u4_bs >> 0) & 0xff;
+
+ flag_bs = _mm_set_epi8(u1_Bs3, u1_Bs3, u1_Bs3, u1_Bs3, u1_Bs2, u1_Bs2,
+ u1_Bs2, u1_Bs2, u1_Bs1, u1_Bs1, u1_Bs1, u1_Bs1,
+ u1_Bs0, u1_Bs0, u1_Bs0, u1_Bs0);
+ flag_bs = _mm_cmpeq_epi8(flag_bs, zero); //Set flag to 1s and 0s
+ flag_bs = _mm_xor_si128(flag_bs, _mm_set1_epi8(0xFF)); //Invert for required mask
+
+ q0_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_src_uv));
+ q1_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_src_uv + i16_posQ1));
+ p1_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixelUV + i16_posP1));
+ p0_uv_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixelUV + i16_posP0));
+
+ q0_uv_8x16 = _mm_unpacklo_epi8(q0_uv_16x8, zero);
+ q1_uv_8x16 = _mm_unpacklo_epi8(q1_uv_16x8, zero);
+ p1_uv_8x16 = _mm_unpacklo_epi8(p1_uv_16x8, zero);
+ p0_uv_8x16 = _mm_unpacklo_epi8(p0_uv_16x8, zero);
+
+ diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1
+ diff = _mm_abs_epi16(diff);
+ alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr);
+ flag1 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff);
+
+ diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2
+ diff = _mm_abs_epi16(diff);
+ beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr);
+ flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
+
+ diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3
+ diff = _mm_abs_epi16(diff);
+ flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
+
+ diff = _mm_subs_epi16(q0_uv_8x16, p0_uv_8x16);
+ diff = _mm_slli_epi16(diff, 2);
+ diff1 = _mm_subs_epi16(p1_uv_8x16, q1_uv_8x16);
+ diff = _mm_add_epi16(diff, diff1);
+ diff = _mm_add_epi16(diff, _mm_set1_epi16(4));
+ in_macro = _mm_srai_epi16(diff, 3);
+
+ C0_uv_8x16 = _mm_set_epi16(pu1_cliptab_cr[u1_Bs1], pu1_cliptab_cb[u1_Bs1],
+ pu1_cliptab_cr[u1_Bs1], pu1_cliptab_cb[u1_Bs1],
+ pu1_cliptab_cr[u1_Bs0], pu1_cliptab_cb[u1_Bs0],
+ pu1_cliptab_cr[u1_Bs0], pu1_cliptab_cb[u1_Bs0]);
+
+ C0_uv_8x16 = _mm_add_epi16(C0_uv_8x16, _mm_set1_epi16(1));
+
+ in_macro = _mm_min_epi16(C0_uv_8x16, in_macro); //CLIP3
+ C0_uv_8x16 = _mm_subs_epi16(zero, C0_uv_8x16);
+ in_macro = _mm_max_epi16(C0_uv_8x16, in_macro);
+
+ p0_uv_8x16_1 = _mm_add_epi16(p0_uv_8x16, in_macro);
+ q0_uv_8x16_1 = _mm_sub_epi16(q0_uv_8x16, in_macro);
+
+ q0_uv_8x16 = _mm_unpackhi_epi8(q0_uv_16x8, zero);
+ q1_uv_8x16 = _mm_unpackhi_epi8(q1_uv_16x8, zero);
+ p1_uv_8x16 = _mm_unpackhi_epi8(p1_uv_16x8, zero);
+ p0_uv_8x16 = _mm_unpackhi_epi8(p0_uv_16x8, zero);
+
+ diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1
+ diff = _mm_abs_epi16(diff);
+ alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr);
+ flag2 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff);
+
+ diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2
+ diff = _mm_abs_epi16(diff);
+ beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr);
+ flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
+
+ diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3
+ diff = _mm_abs_epi16(diff);
+ flag2 = _mm_and_si128(flag2, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
+
+ diff = _mm_subs_epi16(q0_uv_8x16, p0_uv_8x16);
+ diff = _mm_slli_epi16(diff, 2);
+ diff1 = _mm_subs_epi16(p1_uv_8x16, q1_uv_8x16);
+ diff = _mm_add_epi16(diff, diff1);
+ diff = _mm_add_epi16(diff, _mm_set1_epi16(4));
+ in_macro = _mm_srai_epi16(diff, 3);
+
+ C0_uv_8x16 = _mm_set_epi16(pu1_cliptab_cr[u1_Bs3], pu1_cliptab_cb[u1_Bs3],
+ pu1_cliptab_cr[u1_Bs3], pu1_cliptab_cb[u1_Bs3],
+ pu1_cliptab_cr[u1_Bs2], pu1_cliptab_cb[u1_Bs2],
+ pu1_cliptab_cr[u1_Bs2], pu1_cliptab_cb[u1_Bs2]);
+
+ C0_uv_8x16 = _mm_add_epi16(C0_uv_8x16, _mm_set1_epi16(1));
+
+ in_macro = _mm_min_epi16(C0_uv_8x16, in_macro); //CLIP3
+ C0_uv_8x16 = _mm_subs_epi16(zero, C0_uv_8x16);
+ in_macro = _mm_max_epi16(C0_uv_8x16, in_macro);
+
+ p0_uv_8x16_2 = _mm_add_epi16(p0_uv_8x16, in_macro);
+ q0_uv_8x16_2 = _mm_sub_epi16(q0_uv_8x16, in_macro);
+
+ p0_uv_8x16_2 = _mm_packus_epi16(p0_uv_8x16_1, p0_uv_8x16_2);
+ q0_uv_8x16_2 = _mm_packus_epi16(q0_uv_8x16_1, q0_uv_8x16_2);
+
+ flag1 = _mm_packs_epi16(flag1, flag2);
+ flag1 = _mm_and_si128(flag1, flag_bs); //Final flag (BS condition + other 3 conditions)
+
+ p0_uv_8x16_1 = _mm_and_si128(p0_uv_16x8,
+ _mm_xor_si128(flag1, _mm_set1_epi8(0xFF)));
+ p0_uv_8x16_2 = _mm_and_si128(p0_uv_8x16_2, flag1);
+ p0_uv_8x16_1 = _mm_add_epi8(p0_uv_8x16_1, p0_uv_8x16_2);
+ _mm_storeu_si128((__m128i *)(pu1_HorzPixelUV + i16_posP0), p0_uv_8x16_1);
+
+ q0_uv_8x16_1 = _mm_and_si128(q0_uv_16x8,
+ _mm_xor_si128(flag1, _mm_set1_epi8(0xFF)));
+ q0_uv_8x16_2 = _mm_and_si128(q0_uv_8x16_2, flag1);
+ q0_uv_8x16_1 = _mm_add_epi8(q0_uv_8x16_1, q0_uv_8x16_2);
+ _mm_storeu_si128((__m128i *)(pu1_src_uv), q0_uv_8x16_1);
+
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_chroma_vert_bs4_mbaff_ssse3() */
+/* */
+/* Description : This function performs filtering of a chroma block */
+/* vertical edge when boundary strength is set to 4 in high */
+/* profile. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 of U */
+/* src_strd - source stride */
+/* alpha_cb - alpha value for the boundary in U */
+/* beta_cb - beta value for the boundary in U */
+/* alpha_cr - alpha value for the boundary in V */
+/* beta_cr - beta value for the boundary in V */
+/* u4_bs - packed Boundary strength array */
+/* pu1_cliptab_cb - tc0_table for U */
+/* pu1_cliptab_cr - tc0_table for V */
+/* */
+/* Globals : None */
+/* */
+/* Processing : When the function is called twice, this operation is as */
+/* described in Sec. 8.7.2.4 under the title "Filtering */
+/* process for edges for bS equal to 4" in ITU T Rec H.264 */
+/* with alpha and beta values different in U and V. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 12 02 2015 Naveen Kumar P Initial version */
+/* */
+/*****************************************************************************/
+void ih264_deblk_chroma_vert_bs4_mbaff_ssse3(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha_cb,
+ WORD32 beta_cb,
+ WORD32 alpha_cr,
+ WORD32 beta_cr)
+{
+ UWORD8 *pu1_src_uv = pu1_src; /* Pointer to the src sample q0 of plane U*/
+ WORD32 alpha_cbcr = (alpha_cr << 16) + alpha_cb;
+ WORD32 beta_cbcr = (beta_cr << 16) + beta_cb;
+ __m128i linea, lineb, linec, lined;
+ __m128i temp1, temp2;
+
+ __m128i q0_uv_16x8, p0_uv_16x8, q1_uv_16x8, p1_uv_16x8;
+ __m128i q0_uv_8x16, p0_uv_8x16, q1_uv_8x16, p1_uv_8x16;
+ __m128i flag1;
+ __m128i diff, alpha_cbcr_16x8, beta_cbcr_16x8;
+ __m128i zero = _mm_setzero_si128();
+ __m128i p0_uv_8x16_1, p0_uv_8x16_2, q0_uv_8x16_1, q0_uv_8x16_2;
+
+ /* Load and transpose the pixel values */
+ linea = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4));
+ lineb = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + src_strd));
+ linec = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 2 * src_strd));
+ lined = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 3 * src_strd));
+
+ temp1 = _mm_unpacklo_epi16(linea, lineb);
+ temp2 = _mm_unpacklo_epi16(linec, lined);
+
+ p1_uv_16x8 = _mm_unpacklo_epi32(temp1, temp2);
+ p0_uv_16x8 = _mm_srli_si128(p1_uv_16x8, 8);
+ q0_uv_16x8 = _mm_unpackhi_epi32(temp1, temp2);
+ q1_uv_16x8 = _mm_srli_si128(q0_uv_16x8, 8);
+ /* End of transpose */
+
+ q0_uv_8x16 = _mm_unpacklo_epi8(q0_uv_16x8, zero);
+ q1_uv_8x16 = _mm_unpacklo_epi8(q1_uv_16x8, zero);
+ p1_uv_8x16 = _mm_unpacklo_epi8(p1_uv_16x8, zero);
+ p0_uv_8x16 = _mm_unpacklo_epi8(p0_uv_16x8, zero);
+
+ diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1
+ diff = _mm_abs_epi16(diff);
+ alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr);
+ flag1 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff);
+
+ diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2
+ diff = _mm_abs_epi16(diff);
+ beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr);
+ flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
+
+ diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3
+ diff = _mm_abs_epi16(diff);
+ flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
+
+ temp1 = _mm_slli_epi16(p1_uv_8x16, 1);
+ temp2 = _mm_add_epi16(p0_uv_8x16, q1_uv_8x16);
+ temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2));
+ temp1 = _mm_add_epi16(temp1, temp2);
+ p0_uv_8x16_1 = _mm_srai_epi16(temp1, 2);
+
+ temp1 = _mm_slli_epi16(q1_uv_8x16, 1);
+ temp2 = _mm_add_epi16(p1_uv_8x16, q0_uv_8x16);
+ temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(2));
+ temp1 = _mm_add_epi16(temp1, temp2);
+ q0_uv_8x16_1 = _mm_srai_epi16(temp1, 2);
+
+ p0_uv_8x16_2 = _mm_packus_epi16(p0_uv_8x16_1, p0_uv_8x16_1);
+ q0_uv_8x16_2 = _mm_packus_epi16(q0_uv_8x16_1, q0_uv_8x16_1);
+
+ flag1 = _mm_packs_epi16(flag1, flag1);
+
+ p0_uv_8x16_1 = _mm_and_si128(p0_uv_16x8,
+ _mm_xor_si128(flag1, _mm_set1_epi8(0xFF)));
+ p0_uv_8x16_2 = _mm_and_si128(p0_uv_8x16_2, flag1);
+ p0_uv_16x8 = _mm_add_epi8(p0_uv_8x16_1, p0_uv_8x16_2);
+
+ q0_uv_8x16_1 = _mm_and_si128(q0_uv_16x8,
+ _mm_xor_si128(flag1, _mm_set1_epi8(0xFF)));
+ q0_uv_8x16_2 = _mm_and_si128(q0_uv_8x16_2, flag1);
+ q0_uv_16x8 = _mm_add_epi8(q0_uv_8x16_1, q0_uv_8x16_2);
+
+ /* Inverse-transpose and store back */
+ temp1 = _mm_unpacklo_epi16(p1_uv_16x8, p0_uv_16x8);
+ temp2 = _mm_unpacklo_epi16(q0_uv_16x8, q1_uv_16x8);
+
+ linea = _mm_unpacklo_epi32(temp1, temp2);
+ lineb = _mm_srli_si128(linea, 8);
+ linec = _mm_unpackhi_epi32(temp1, temp2);
+ lined = _mm_srli_si128(linec, 8);
+
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4), linea);
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + src_strd), lineb);
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 2 * src_strd), linec);
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 3 * src_strd), lined);
+
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_chroma_vert_bslt4_mbaff_ssse3() */
+/* */
+/* Description : This function performs filtering of a chroma block */
+/* vertical edge when boundary strength is less than 4 in */
+/* high profile. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 of U */
+/* src_strd - source stride */
+/* alpha_cb - alpha value for the boundary in U */
+/* beta_cb - beta value for the boundary in U */
+/* alpha_cr - alpha value for the boundary in V */
+/* beta_cr - beta value for the boundary in V */
+/* u4_bs - packed Boundary strength array */
+/* pu1_cliptab_cb - tc0_table for U */
+/* pu1_cliptab_cr - tc0_table for V */
+/* */
+/* Globals : None */
+/* */
+/* Processing : When the function is called twice, this operation is as */
+/* described in Sec. 8.7.2.4 under the title "Filtering */
+/* process for edges for bS less than 4" in ITU T Rec H.264 */
+/* with alpha and beta values different in U and V. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 12 02 2015 Naveen Kumar P Initial version */
+/* */
+/*****************************************************************************/
+void ih264_deblk_chroma_vert_bslt4_mbaff_ssse3(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha_cb,
+ WORD32 beta_cb,
+ WORD32 alpha_cr,
+ WORD32 beta_cr,
+ UWORD32 u4_bs,
+ const UWORD8 *pu1_cliptab_cb,
+ const UWORD8 *pu1_cliptab_cr)
+{
+ UWORD8 *pu1_src_uv = pu1_src; /* Pointer to the src sample q0 of plane U*/
+ UWORD8 u1_Bs0, u1_Bs1, u1_Bs2, u1_Bs3;
+ WORD32 alpha_cbcr = (alpha_cr << 16) + alpha_cb;
+ WORD32 beta_cbcr = (beta_cr << 16) + beta_cb;
+ __m128i linea, lineb, linec, lined;
+ __m128i temp1, temp2;
+
+ __m128i q0_uv_16x8, p0_uv_16x8, q1_uv_16x8, p1_uv_16x8;
+ __m128i q0_uv_8x16, p0_uv_8x16, q1_uv_8x16, p1_uv_8x16;
+ __m128i flag_bs, flag1;
+ __m128i diff, diff1, alpha_cbcr_16x8, beta_cbcr_16x8, in_macro;
+ __m128i zero = _mm_setzero_si128();
+ __m128i C0_uv_8x16;
+ __m128i p0_uv_8x16_1, p0_uv_8x16_2, q0_uv_8x16_1, q0_uv_8x16_2;
+
+ u1_Bs0 = (u4_bs >> 24) & 0xff;
+ u1_Bs1 = (u4_bs >> 16) & 0xff;
+ u1_Bs2 = (u4_bs >> 8) & 0xff;
+ u1_Bs3 = (u4_bs >> 0) & 0xff;
+
+ flag_bs = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, u1_Bs3, u1_Bs3, u1_Bs2,
+ u1_Bs2, u1_Bs1, u1_Bs1, u1_Bs0, u1_Bs0);
+ flag_bs = _mm_cmpeq_epi8(flag_bs, zero); //Set flag to 1s and 0s
+ flag_bs = _mm_xor_si128(flag_bs, _mm_set1_epi8(0xFF)); //Invert for required mask
+
+ /* Load and transpose the pixel values */
+ linea = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4));
+ lineb = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + src_strd));
+ linec = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 2 * src_strd));
+ lined = _mm_loadl_epi64((__m128i *)(pu1_src_uv - 4 + 3 * src_strd));
+
+ temp1 = _mm_unpacklo_epi16(linea, lineb);
+ temp2 = _mm_unpacklo_epi16(linec, lined);
+
+ p1_uv_16x8 = _mm_unpacklo_epi32(temp1, temp2);
+ p0_uv_16x8 = _mm_srli_si128(p1_uv_16x8, 8);
+ q0_uv_16x8 = _mm_unpackhi_epi32(temp1, temp2);
+ q1_uv_16x8 = _mm_srli_si128(q0_uv_16x8, 8);
+ /* End of transpose */
+
+ q0_uv_8x16 = _mm_unpacklo_epi8(q0_uv_16x8, zero);
+ q1_uv_8x16 = _mm_unpacklo_epi8(q1_uv_16x8, zero);
+ p1_uv_8x16 = _mm_unpacklo_epi8(p1_uv_16x8, zero);
+ p0_uv_8x16 = _mm_unpacklo_epi8(p0_uv_16x8, zero);
+
+ diff = _mm_subs_epi16(p0_uv_8x16, q0_uv_8x16); //Condn 1
+ diff = _mm_abs_epi16(diff);
+ alpha_cbcr_16x8 = _mm_set1_epi32(alpha_cbcr);
+ flag1 = _mm_cmpgt_epi16(alpha_cbcr_16x8, diff);
+
+ diff = _mm_subs_epi16(q1_uv_8x16, q0_uv_8x16); //Condtn 2
+ diff = _mm_abs_epi16(diff);
+ beta_cbcr_16x8 = _mm_set1_epi32(beta_cbcr);
+ flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
+
+ diff = _mm_subs_epi16(p1_uv_8x16, p0_uv_8x16); //Condtn 3
+ diff = _mm_abs_epi16(diff);
+ flag1 = _mm_and_si128(flag1, _mm_cmpgt_epi16(beta_cbcr_16x8, diff));
+
+ diff = _mm_subs_epi16(q0_uv_8x16, p0_uv_8x16);
+ diff = _mm_slli_epi16(diff, 2);
+ diff1 = _mm_subs_epi16(p1_uv_8x16, q1_uv_8x16);
+ diff = _mm_add_epi16(diff, diff1);
+ diff = _mm_add_epi16(diff, _mm_set1_epi16(4));
+ in_macro = _mm_srai_epi16(diff, 3);
+
+ C0_uv_8x16 = _mm_set_epi16(pu1_cliptab_cr[u1_Bs3], pu1_cliptab_cb[u1_Bs3],
+ pu1_cliptab_cr[u1_Bs2], pu1_cliptab_cb[u1_Bs2],
+ pu1_cliptab_cr[u1_Bs1], pu1_cliptab_cb[u1_Bs1],
+ pu1_cliptab_cr[u1_Bs0], pu1_cliptab_cb[u1_Bs0]);
+
+ C0_uv_8x16 = _mm_add_epi16(C0_uv_8x16, _mm_set1_epi16(1));
+
+ in_macro = _mm_min_epi16(C0_uv_8x16, in_macro); //CLIP3
+ C0_uv_8x16 = _mm_subs_epi16(zero, C0_uv_8x16);
+ in_macro = _mm_max_epi16(C0_uv_8x16, in_macro);
+
+ p0_uv_8x16_1 = _mm_add_epi16(p0_uv_8x16, in_macro);
+ q0_uv_8x16_1 = _mm_sub_epi16(q0_uv_8x16, in_macro);
+
+ p0_uv_8x16_2 = _mm_packus_epi16(p0_uv_8x16_1, p0_uv_8x16_1);
+ q0_uv_8x16_2 = _mm_packus_epi16(q0_uv_8x16_1, q0_uv_8x16_1);
+
+ flag1 = _mm_packs_epi16(flag1, flag1);
+ flag1 = _mm_and_si128(flag1, flag_bs); //Final flag (BS condition + other 3 conditions)
+
+ p0_uv_8x16_1 = _mm_and_si128(p0_uv_16x8,
+ _mm_xor_si128(flag1, _mm_set1_epi8(0xFF)));
+ p0_uv_8x16_2 = _mm_and_si128(p0_uv_8x16_2, flag1);
+ p0_uv_16x8 = _mm_add_epi8(p0_uv_8x16_1, p0_uv_8x16_2);
+
+ q0_uv_8x16_1 = _mm_and_si128(q0_uv_16x8,
+ _mm_xor_si128(flag1, _mm_set1_epi8(0xFF)));
+ q0_uv_8x16_2 = _mm_and_si128(q0_uv_8x16_2, flag1);
+ q0_uv_16x8 = _mm_add_epi8(q0_uv_8x16_1, q0_uv_8x16_2);
+
+ /* Inverse-transpose and store back */
+ temp1 = _mm_unpacklo_epi16(p1_uv_16x8, p0_uv_16x8);
+ temp2 = _mm_unpacklo_epi16(q0_uv_16x8, q1_uv_16x8);
+
+ linea = _mm_unpacklo_epi32(temp1, temp2);
+ lineb = _mm_srli_si128(linea, 8);
+ linec = _mm_unpackhi_epi32(temp1, temp2);
+ lined = _mm_srli_si128(linec, 8);
+
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4), linea);
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + src_strd), lineb);
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 2 * src_strd), linec);
+ _mm_storel_epi64((__m128i *)(pu1_src_uv - 4 + 3 * src_strd), lined);
+
+}
+
diff --git a/common/x86/ih264_deblk_luma_ssse3.c b/common/x86/ih264_deblk_luma_ssse3.c
new file mode 100755
index 0000000..440d5f0
--- /dev/null
+++ b/common/x86/ih264_deblk_luma_ssse3.c
@@ -0,0 +1,2012 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* */
+/* File Name : ih264_deblk_luma_ssse3.c */
+/* */
+/* Description : Contains function definitions for deblocking */
+/* */
+/* List of Functions : ih264_deblk_luma_vert_bs4_ssse3() */
+/* ih264_deblk_luma_horz_bs4_ssse3() */
+/* ih264_deblk_luma_vert_bslt4_ssse3() */
+/* ih264_deblk_luma_horz_bslt4_ssse3() */
+/* ih264_deblk_luma_vert_bs4_mbaff_ssse3() */
+/* ih264_deblk_luma_vert_bslt4_mbaff_ssse3() */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 12 02 2015 Naveen Kumar P Added luma deblocking ssse3 */
+/* intrinsics */
+/* */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_platform_macros.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_macros.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_luma_vert_bs4_ssse3() */
+/* */
+/* Description : This function performs filtering of a luma block */
+/* vertical edge when the boundary strength is set to 4. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 */
+/* src_strd - source stride */
+/* alpha - alpha value for the boundary */
+/* beta - beta value for the boundary */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This operation is described in Sec. 8.7.2.4 under the */
+/* title "Filtering process for edges for bS equal to 4" in */
+/* ITU T Rec H.264. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 12 02 2015 Naveen Kumar P Initial version */
+/* */
+/*****************************************************************************/
+void ih264_deblk_luma_vert_bs4_ssse3(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha,
+ WORD32 beta)
+{
+ __m128i zero = _mm_setzero_si128();
+ __m128i q0_16x8, q1_16x8, q2_16x8, q3_16x8;
+ __m128i p0_16x8, p1_16x8, p2_16x8, p3_16x8;
+ __m128i q0_8x16, q1_8x16, q2_8x16, q3_8x16;
+ __m128i p0_8x16, p1_8x16, p2_8x16, p3_8x16;
+ __m128i q0_16x8_1;
+ __m128i p0_16x8_1;
+ __m128i q0_16x8_2, q1_16x8_2, q2_16x8_2;
+ __m128i p0_16x8_2, p1_16x8_2, p2_16x8_2;
+ __m128i temp1, temp2, temp3, temp4, temp5, temp6;
+ __m128i Alpha_8x16, Beta_8x16;
+ __m128i flag1_16x8, flag2_16x8, flag3_16x8, flag4_16x8;
+ __m128i const_val2_16x8 = _mm_set1_epi16(2);
+ __m128i line1, line2, line3, line4, line5, line6, line7, line8;
+
+ Alpha_8x16 = _mm_set1_epi16(alpha);
+ Beta_8x16 = _mm_set1_epi16(beta);
+
+ line1 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 0 * src_strd));
+ line2 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 1 * src_strd));
+ line3 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 2 * src_strd));
+ line4 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 3 * src_strd));
+ line5 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 4 * src_strd));
+ line6 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 5 * src_strd));
+ line7 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 6 * src_strd));
+ line8 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 7 * src_strd));
+
+ temp1 = _mm_unpacklo_epi8(line1, line2);
+ temp2 = _mm_unpacklo_epi8(line3, line4);
+ temp3 = _mm_unpacklo_epi8(line5, line6);
+ temp4 = _mm_unpacklo_epi8(line7, line8);
+
+ line1 = _mm_unpacklo_epi16(temp1, temp2);
+ line2 = _mm_unpackhi_epi16(temp1, temp2);
+ line3 = _mm_unpacklo_epi16(temp3, temp4);
+ line4 = _mm_unpackhi_epi16(temp3, temp4);
+
+ p1_8x16 = _mm_unpacklo_epi32(line1, line3);
+ p0_8x16 = _mm_unpackhi_epi32(line1, line3);
+ q0_8x16 = _mm_unpacklo_epi32(line2, line4);
+ q1_8x16 = _mm_unpackhi_epi32(line2, line4);
+
+ line1 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 8 * src_strd));
+ line2 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 9 * src_strd));
+ line3 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 10 * src_strd));
+ line4 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 11 * src_strd));
+ line5 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 12 * src_strd));
+ line6 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 13 * src_strd));
+ line7 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 14 * src_strd));
+ line8 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 15 * src_strd));
+
+ temp1 = _mm_unpacklo_epi8(line1, line2);
+ temp2 = _mm_unpacklo_epi8(line3, line4);
+ temp3 = _mm_unpacklo_epi8(line5, line6);
+ temp4 = _mm_unpacklo_epi8(line7, line8);
+
+ line1 = _mm_unpacklo_epi16(temp1, temp2);
+ line2 = _mm_unpackhi_epi16(temp1, temp2);
+ line3 = _mm_unpacklo_epi16(temp3, temp4);
+ line4 = _mm_unpackhi_epi16(temp3, temp4);
+
+ temp1 = _mm_unpacklo_epi32(line1, line3);
+ temp2 = _mm_unpackhi_epi32(line1, line3);
+ temp3 = _mm_unpacklo_epi32(line2, line4);
+ temp4 = _mm_unpackhi_epi32(line2, line4);
+
+ p3_16x8 = _mm_unpacklo_epi64(p1_8x16, temp1);
+ p2_16x8 = _mm_unpackhi_epi64(p1_8x16, temp1);
+ q2_16x8 = _mm_unpacklo_epi64(q1_8x16, temp4);
+ q3_16x8 = _mm_unpackhi_epi64(q1_8x16, temp4);
+ p1_16x8 = _mm_unpacklo_epi64(p0_8x16, temp2);
+ p0_16x8 = _mm_unpackhi_epi64(p0_8x16, temp2);
+ q0_16x8 = _mm_unpacklo_epi64(q0_8x16, temp3);
+ q1_16x8 = _mm_unpackhi_epi64(q0_8x16, temp3);
+
+ //Cond1 (ABS(p0 - q0) < alpha)
+ temp1 = _mm_subs_epu8(q0_16x8, p0_16x8);
+ temp2 = _mm_subs_epu8(p0_16x8, q0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+
+ temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Alpha_8x16, temp1);
+
+ flag1_16x8 = _mm_packs_epi16(temp2, temp1);
+
+ //Cond2 (ABS(q1 - q0) < beta)
+ temp1 = _mm_subs_epu8(q0_16x8, q1_16x8);
+ temp2 = _mm_subs_epu8(q1_16x8, q0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+
+ temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
+
+ flag2_16x8 = _mm_packs_epi16(temp2, temp1);
+
+ flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
+
+ //Cond3 (ABS(p1 - p0) < beta)
+ temp1 = _mm_subs_epu8(p0_16x8, p1_16x8);
+ temp2 = _mm_subs_epu8(p1_16x8, p0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+
+ temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
+
+ flag2_16x8 = _mm_packs_epi16(temp2, temp1);
+
+ // !((ABS(p0 - q0) < alpha) || (ABS(q1 - q0) < beta) || (ABS(p1 - p0) < beta))
+ flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
+
+ // (ABS(p0 - q0) < ((alpha >> 2) + 2))
+ temp1 = _mm_subs_epu8(p0_16x8, q0_16x8);
+ temp2 = _mm_subs_epu8(q0_16x8, p0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+ Alpha_8x16 = _mm_srai_epi16(Alpha_8x16, 2);
+ Alpha_8x16 = _mm_add_epi16(Alpha_8x16, const_val2_16x8);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+ temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Alpha_8x16, temp1);
+
+ flag2_16x8 = _mm_packs_epi16(temp2, temp1);
+ flag2_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
+
+ // (ABS(p2 - p0) < beta)
+ temp1 = _mm_subs_epu8(p0_16x8, p2_16x8);
+ temp2 = _mm_subs_epu8(p2_16x8, p0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+ temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
+
+ flag3_16x8 = _mm_packs_epi16(temp2, temp1);
+ flag3_16x8 = _mm_and_si128(flag3_16x8, flag2_16x8);
+
+ // (ABS(q2 - q0) < beta)
+ temp1 = _mm_subs_epu8(q0_16x8, q2_16x8);
+ temp2 = _mm_subs_epu8(q2_16x8, q0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+ temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
+
+ flag4_16x8 = _mm_packs_epi16(temp2, temp1);
+ flag4_16x8 = _mm_and_si128(flag4_16x8, flag2_16x8);
+
+ // First 8 pixels
+ p3_8x16 = _mm_unpacklo_epi8(p3_16x8, zero);
+ p2_8x16 = _mm_unpacklo_epi8(p2_16x8, zero);
+ p1_8x16 = _mm_unpacklo_epi8(p1_16x8, zero);
+ p0_8x16 = _mm_unpacklo_epi8(p0_16x8, zero);
+ q0_8x16 = _mm_unpacklo_epi8(q0_16x8, zero);
+ q1_8x16 = _mm_unpacklo_epi8(q1_16x8, zero);
+ q2_8x16 = _mm_unpacklo_epi8(q2_16x8, zero);
+ q3_8x16 = _mm_unpacklo_epi8(q3_16x8, zero);
+
+ // p0_1 and q0_1
+ temp1 = _mm_add_epi16(p0_8x16, q1_8x16);
+ temp2 = _mm_add_epi16(p1_8x16, q0_8x16);
+ temp5 = _mm_add_epi16(temp1, const_val2_16x8);
+ temp6 = _mm_add_epi16(temp2, const_val2_16x8);
+ temp3 = _mm_slli_epi16(p1_8x16, 1);
+ temp4 = _mm_slli_epi16(q1_8x16, 1);
+ temp1 = _mm_add_epi16(temp5, temp3);
+ temp2 = _mm_add_epi16(temp6, temp4);
+ p0_16x8_1 = _mm_srai_epi16(temp1, 2);
+ q0_16x8_1 = _mm_srai_epi16(temp2, 2);
+
+ // p1_2 and q1_2
+ temp6 = _mm_add_epi16(temp6, p0_8x16);
+ temp5 = _mm_add_epi16(temp5, q0_8x16);
+ temp1 = _mm_add_epi16(temp6, p2_8x16);
+ temp2 = _mm_add_epi16(temp5, q2_8x16);
+ p1_16x8_2 = _mm_srai_epi16(temp1, 2);
+ q1_16x8_2 = _mm_srai_epi16(temp2, 2);
+
+ // p0_2 and q0_2
+ temp1 = _mm_add_epi16(temp3, p2_8x16);
+ temp2 = _mm_add_epi16(temp4, q2_8x16);
+ temp1 = _mm_add_epi16(temp1, q1_8x16);
+ temp2 = _mm_add_epi16(temp2, p1_8x16);
+ temp3 = _mm_add_epi16(p0_8x16, q0_8x16);
+ temp3 = _mm_slli_epi16(temp3, 1);
+ temp1 = _mm_add_epi16(temp1, temp3);
+ temp2 = _mm_add_epi16(temp2, temp3);
+ temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(4));
+ temp2 = _mm_add_epi16(temp2, _mm_set1_epi16(4));
+ p0_16x8_2 = _mm_srai_epi16(temp1, 3);
+ q0_16x8_2 = _mm_srai_epi16(temp2, 3);
+
+ // p2_2 and q2_2
+ temp1 = _mm_add_epi16(temp6, const_val2_16x8);
+ temp2 = _mm_add_epi16(temp5, const_val2_16x8);
+ temp3 = _mm_slli_epi16(p2_8x16, 1);
+ temp4 = _mm_slli_epi16(q2_8x16, 1);
+ temp3 = _mm_add_epi16(p2_8x16, temp3);
+ temp4 = _mm_add_epi16(q2_8x16, temp4);
+ temp5 = _mm_slli_epi16(p3_8x16, 1);
+ temp6 = _mm_slli_epi16(q3_8x16, 1);
+ temp1 = _mm_add_epi16(temp1, temp3);
+ temp2 = _mm_add_epi16(temp2, temp4);
+ temp1 = _mm_add_epi16(temp1, temp5);
+ temp2 = _mm_add_epi16(temp2, temp6);
+ p2_16x8_2 = _mm_srai_epi16(temp1, 3);
+ q2_16x8_2 = _mm_srai_epi16(temp2, 3);
+
+ // Second 8 pixels and packing with first 8 pixels
+ p3_8x16 = _mm_unpackhi_epi8(p3_16x8, zero);
+ p2_8x16 = _mm_unpackhi_epi8(p2_16x8, zero);
+ p1_8x16 = _mm_unpackhi_epi8(p1_16x8, zero);
+ p0_8x16 = _mm_unpackhi_epi8(p0_16x8, zero);
+ q0_8x16 = _mm_unpackhi_epi8(q0_16x8, zero);
+ q1_8x16 = _mm_unpackhi_epi8(q1_16x8, zero);
+ q2_8x16 = _mm_unpackhi_epi8(q2_16x8, zero);
+ q3_8x16 = _mm_unpackhi_epi8(q3_16x8, zero);
+
+ // p0_1 and q0_1
+ temp1 = _mm_add_epi16(p0_8x16, q1_8x16);
+ temp2 = _mm_add_epi16(p1_8x16, q0_8x16);
+ temp5 = _mm_add_epi16(temp1, const_val2_16x8);
+ temp6 = _mm_add_epi16(temp2, const_val2_16x8);
+ temp3 = _mm_slli_epi16(p1_8x16, 1);
+ temp4 = _mm_slli_epi16(q1_8x16, 1);
+ temp1 = _mm_add_epi16(temp5, temp3);
+ temp2 = _mm_add_epi16(temp6, temp4);
+ temp1 = _mm_srai_epi16(temp1, 2);
+ temp2 = _mm_srai_epi16(temp2, 2);
+ p0_16x8_1 = _mm_packus_epi16(p0_16x8_1, temp1);
+ q0_16x8_1 = _mm_packus_epi16(q0_16x8_1, temp2);
+
+ // p1_2 and q1_2
+ temp6 = _mm_add_epi16(temp6, p0_8x16);
+ temp5 = _mm_add_epi16(temp5, q0_8x16);
+ temp1 = _mm_add_epi16(temp6, p2_8x16);
+ temp2 = _mm_add_epi16(temp5, q2_8x16);
+ temp1 = _mm_srai_epi16(temp1, 2);
+ temp2 = _mm_srai_epi16(temp2, 2);
+ p1_16x8_2 = _mm_packus_epi16(p1_16x8_2, temp1);
+ q1_16x8_2 = _mm_packus_epi16(q1_16x8_2, temp2);
+
+ // p0_2 and q0_2
+ temp1 = _mm_add_epi16(temp3, p2_8x16);
+ temp2 = _mm_add_epi16(temp4, q2_8x16);
+ temp1 = _mm_add_epi16(temp1, q1_8x16);
+ temp2 = _mm_add_epi16(temp2, p1_8x16);
+ temp3 = _mm_add_epi16(p0_8x16, q0_8x16);
+ temp3 = _mm_slli_epi16(temp3, 1);
+ temp1 = _mm_add_epi16(temp1, temp3);
+ temp2 = _mm_add_epi16(temp2, temp3);
+ temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(4));
+ temp2 = _mm_add_epi16(temp2, _mm_set1_epi16(4));
+ temp1 = _mm_srai_epi16(temp1, 3);
+ temp2 = _mm_srai_epi16(temp2, 3);
+ p0_16x8_2 = _mm_packus_epi16(p0_16x8_2, temp1);
+ q0_16x8_2 = _mm_packus_epi16(q0_16x8_2, temp2);
+
+ // p2_2 and q2_2
+ temp1 = _mm_add_epi16(temp6, const_val2_16x8);
+ temp2 = _mm_add_epi16(temp5, const_val2_16x8);
+ temp3 = _mm_slli_epi16(p2_8x16, 1);
+ temp4 = _mm_slli_epi16(q2_8x16, 1);
+ temp3 = _mm_add_epi16(p2_8x16, temp3);
+ temp4 = _mm_add_epi16(q2_8x16, temp4);
+ temp5 = _mm_slli_epi16(p3_8x16, 1);
+ temp6 = _mm_slli_epi16(q3_8x16, 1);
+ temp1 = _mm_add_epi16(temp1, temp3);
+ temp2 = _mm_add_epi16(temp2, temp4);
+ temp1 = _mm_add_epi16(temp1, temp5);
+ temp2 = _mm_add_epi16(temp2, temp6);
+ temp1 = _mm_srai_epi16(temp1, 3);
+ temp2 = _mm_srai_epi16(temp2, 3);
+ p2_16x8_2 = _mm_packus_epi16(p2_16x8_2, temp1);
+ q2_16x8_2 = _mm_packus_epi16(q2_16x8_2, temp2);
+
+ // p0 and q0
+ p0_16x8 = _mm_and_si128(p0_16x8,
+ _mm_xor_si128(flag1_16x8, _mm_set1_epi8(0xFF)));
+ p0_16x8_1 = _mm_and_si128(p0_16x8_1, flag1_16x8);
+ p0_16x8 = _mm_add_epi8(p0_16x8, p0_16x8_1);
+ q0_16x8 = _mm_and_si128(q0_16x8,
+ _mm_xor_si128(flag1_16x8, _mm_set1_epi8(0xFF)));
+ q0_16x8_1 = _mm_and_si128(q0_16x8_1, flag1_16x8);
+ q0_16x8 = _mm_add_epi8(q0_16x8, q0_16x8_1);
+
+ // p0 and q0
+ p0_16x8 = _mm_and_si128(p0_16x8,
+ _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF)));
+ p0_16x8_2 = _mm_and_si128(p0_16x8_2, flag3_16x8);
+ p0_16x8 = _mm_add_epi8(p0_16x8, p0_16x8_2);
+ q0_16x8 = _mm_and_si128(q0_16x8,
+ _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF)));
+ q0_16x8_2 = _mm_and_si128(q0_16x8_2, flag4_16x8);
+ q0_16x8 = _mm_add_epi8(q0_16x8, q0_16x8_2);
+
+ // p1 and q1
+ p1_16x8 = _mm_and_si128(p1_16x8,
+ _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF)));
+ p1_16x8_2 = _mm_and_si128(p1_16x8_2, flag3_16x8);
+ p1_16x8 = _mm_add_epi8(p1_16x8, p1_16x8_2);
+ q1_16x8 = _mm_and_si128(q1_16x8,
+ _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF)));
+ q1_16x8_2 = _mm_and_si128(q1_16x8_2, flag4_16x8);
+ q1_16x8 = _mm_add_epi8(q1_16x8, q1_16x8_2);
+
+ // p2 and q2
+ p2_16x8 = _mm_and_si128(p2_16x8,
+ _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF)));
+ p2_16x8_2 = _mm_and_si128(p2_16x8_2, flag3_16x8);
+ p2_16x8 = _mm_add_epi8(p2_16x8, p2_16x8_2);
+ q2_16x8 = _mm_and_si128(q2_16x8,
+ _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF)));
+ q2_16x8_2 = _mm_and_si128(q2_16x8_2, flag4_16x8);
+ q2_16x8 = _mm_add_epi8(q2_16x8, q2_16x8_2);
+
+ temp1 = _mm_unpacklo_epi8(p3_16x8, p2_16x8);
+ temp2 = _mm_unpacklo_epi8(p1_16x8, p0_16x8);
+ temp3 = _mm_unpacklo_epi8(q0_16x8, q1_16x8);
+ temp4 = _mm_unpacklo_epi8(q2_16x8, q3_16x8);
+
+ p3_8x16 = _mm_unpacklo_epi16(temp1, temp2);
+ p2_8x16 = _mm_unpackhi_epi16(temp1, temp2);
+ q2_8x16 = _mm_unpacklo_epi16(temp3, temp4);
+ q3_8x16 = _mm_unpackhi_epi16(temp3, temp4);
+
+ line1 = _mm_unpacklo_epi32(p3_8x16, q2_8x16);
+ line2 = _mm_srli_si128(line1, 8);
+ line3 = _mm_unpackhi_epi32(p3_8x16, q2_8x16);
+ line4 = _mm_srli_si128(line3, 8);
+ line5 = _mm_unpacklo_epi32(p2_8x16, q3_8x16);
+ line6 = _mm_srli_si128(line5, 8);
+ line7 = _mm_unpackhi_epi32(p2_8x16, q3_8x16);
+ line8 = _mm_srli_si128(line7, 8);
+
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 0 * src_strd), line1);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 1 * src_strd), line2);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 2 * src_strd), line3);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 3 * src_strd), line4);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 4 * src_strd), line5);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 5 * src_strd), line6);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 6 * src_strd), line7);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 7 * src_strd), line8);
+
+ temp1 = _mm_unpackhi_epi8(p3_16x8, p2_16x8);
+ temp2 = _mm_unpackhi_epi8(p1_16x8, p0_16x8);
+ temp3 = _mm_unpackhi_epi8(q0_16x8, q1_16x8);
+ temp4 = _mm_unpackhi_epi8(q2_16x8, q3_16x8);
+
+ p3_8x16 = _mm_unpacklo_epi16(temp1, temp2);
+ p2_8x16 = _mm_unpackhi_epi16(temp1, temp2);
+ q2_8x16 = _mm_unpacklo_epi16(temp3, temp4);
+ q3_8x16 = _mm_unpackhi_epi16(temp3, temp4);
+
+ line1 = _mm_unpacklo_epi32(p3_8x16, q2_8x16);
+ line2 = _mm_srli_si128(line1, 8);
+ line3 = _mm_unpackhi_epi32(p3_8x16, q2_8x16);
+ line4 = _mm_srli_si128(line3, 8);
+ line5 = _mm_unpacklo_epi32(p2_8x16, q3_8x16);
+ line6 = _mm_srli_si128(line5, 8);
+ line7 = _mm_unpackhi_epi32(p2_8x16, q3_8x16);
+ line8 = _mm_srli_si128(line7, 8);
+
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 8 * src_strd), line1);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 9 * src_strd), line2);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 10 * src_strd), line3);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 11 * src_strd), line4);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 12 * src_strd), line5);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 13 * src_strd), line6);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 14 * src_strd), line7);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 15 * src_strd), line8);
+
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_luma_horz_bs4_ssse3() */
+/* */
+/* Description : This function performs filtering of a luma block */
+/* horizontal edge when the boundary strength is set to 4. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 */
+/* src_strd - source stride */
+/* alpha - alpha value for the boundary */
+/* beta - beta value for the boundary */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This operation is described in Sec. 8.7.2.4 under the */
+/* title "Filtering process for edges for bS equal to 4" in */
+/* ITU T Rec H.264. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 12 02 2015 Naveen Kumar P Initial version */
+/* */
+/*****************************************************************************/
+void ih264_deblk_luma_horz_bs4_ssse3(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha,
+ WORD32 beta)
+{
+ WORD16 i16_posP3, i16_posP2, i16_posP1, i16_posP0;
+ WORD16 i16_posQ1, i16_posQ2, i16_posQ3;
+ UWORD8 *pu1_HorzPixel;
+ __m128i zero = _mm_setzero_si128();
+ __m128i q0_16x8, q1_16x8, q2_16x8, q3_16x8;
+ __m128i p0_16x8, p1_16x8, p2_16x8, p3_16x8;
+ __m128i q0_8x16, q1_8x16, q2_8x16, q3_8x16;
+ __m128i p0_8x16, p1_8x16, p2_8x16, p3_8x16;
+ __m128i q0_16x8_1;
+ __m128i p0_16x8_1;
+ __m128i q0_16x8_2, q1_16x8_2, q2_16x8_2;
+ __m128i p0_16x8_2, p1_16x8_2, p2_16x8_2;
+ __m128i temp1, temp2, temp3, temp4, temp5, temp6;
+ __m128i Alpha_8x16, Beta_8x16;
+ __m128i flag1_16x8, flag2_16x8, flag3_16x8, flag4_16x8;
+ __m128i const_val2_16x8 = _mm_set1_epi16(2);
+
+ pu1_HorzPixel = pu1_src - (src_strd << 2);
+
+ i16_posQ1 = src_strd;
+ i16_posQ2 = X2(src_strd);
+ i16_posQ3 = X3(src_strd);
+ i16_posP0 = X3(src_strd);
+ i16_posP1 = X2(src_strd);
+ i16_posP2 = src_strd;
+ i16_posP3 = 0;
+
+ Alpha_8x16 = _mm_set1_epi16(alpha);
+ Beta_8x16 = _mm_set1_epi16(beta);
+
+ p3_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP3));
+ p2_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP2));
+ p1_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP1));
+ p0_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP0));
+ q0_16x8 = _mm_loadu_si128((__m128i *)(pu1_src));
+ q1_16x8 = _mm_loadu_si128((__m128i *)(pu1_src + i16_posQ1));
+ q2_16x8 = _mm_loadu_si128((__m128i *)(pu1_src + i16_posQ2));
+ q3_16x8 = _mm_loadu_si128((__m128i *)(pu1_src + i16_posQ3));
+
+ //Cond1 (ABS(p0 - q0) < alpha)
+ temp1 = _mm_subs_epu8(q0_16x8, p0_16x8);
+ temp2 = _mm_subs_epu8(p0_16x8, q0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+
+ temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Alpha_8x16, temp1);
+
+ flag1_16x8 = _mm_packs_epi16(temp2, temp1);
+
+ //Cond2 (ABS(q1 - q0) < beta)
+ temp1 = _mm_subs_epu8(q0_16x8, q1_16x8);
+ temp2 = _mm_subs_epu8(q1_16x8, q0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+
+ temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
+
+ flag2_16x8 = _mm_packs_epi16(temp2, temp1);
+
+ flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
+
+ //Cond3 (ABS(p1 - p0) < beta)
+ temp1 = _mm_subs_epu8(p0_16x8, p1_16x8);
+ temp2 = _mm_subs_epu8(p1_16x8, p0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+
+ temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
+
+ flag2_16x8 = _mm_packs_epi16(temp2, temp1);
+
+ // !((ABS(p0 - q0) < alpha) || (ABS(q1 - q0) < beta) || (ABS(p1 - p0) < beta))
+ flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
+
+ // (ABS(p0 - q0) < ((alpha >> 2) + 2))
+ temp1 = _mm_subs_epu8(p0_16x8, q0_16x8);
+ temp2 = _mm_subs_epu8(q0_16x8, p0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+ Alpha_8x16 = _mm_srai_epi16(Alpha_8x16, 2);
+ Alpha_8x16 = _mm_add_epi16(Alpha_8x16, const_val2_16x8);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+ temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Alpha_8x16, temp1);
+
+ flag2_16x8 = _mm_packs_epi16(temp2, temp1);
+ flag2_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
+
+ // (ABS(p2 - p0) < beta)
+ temp1 = _mm_subs_epu8(p0_16x8, p2_16x8);
+ temp2 = _mm_subs_epu8(p2_16x8, p0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+ temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
+
+ flag3_16x8 = _mm_packs_epi16(temp2, temp1);
+ flag3_16x8 = _mm_and_si128(flag3_16x8, flag2_16x8);
+
+ // (ABS(q2 - q0) < beta)
+ temp1 = _mm_subs_epu8(q0_16x8, q2_16x8);
+ temp2 = _mm_subs_epu8(q2_16x8, q0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+ temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
+
+ flag4_16x8 = _mm_packs_epi16(temp2, temp1);
+ flag4_16x8 = _mm_and_si128(flag4_16x8, flag2_16x8);
+
+ // First 8 pixels
+ p3_8x16 = _mm_unpacklo_epi8(p3_16x8, zero);
+ p2_8x16 = _mm_unpacklo_epi8(p2_16x8, zero);
+ p1_8x16 = _mm_unpacklo_epi8(p1_16x8, zero);
+ p0_8x16 = _mm_unpacklo_epi8(p0_16x8, zero);
+ q0_8x16 = _mm_unpacklo_epi8(q0_16x8, zero);
+ q1_8x16 = _mm_unpacklo_epi8(q1_16x8, zero);
+ q2_8x16 = _mm_unpacklo_epi8(q2_16x8, zero);
+ q3_8x16 = _mm_unpacklo_epi8(q3_16x8, zero);
+
+ // p0_1 and q0_1
+ temp1 = _mm_add_epi16(p0_8x16, q1_8x16);
+ temp2 = _mm_add_epi16(p1_8x16, q0_8x16);
+ temp5 = _mm_add_epi16(temp1, const_val2_16x8);
+ temp6 = _mm_add_epi16(temp2, const_val2_16x8);
+ temp3 = _mm_slli_epi16(p1_8x16, 1);
+ temp4 = _mm_slli_epi16(q1_8x16, 1);
+ temp1 = _mm_add_epi16(temp5, temp3);
+ temp2 = _mm_add_epi16(temp6, temp4);
+ p0_16x8_1 = _mm_srai_epi16(temp1, 2);
+ q0_16x8_1 = _mm_srai_epi16(temp2, 2);
+
+ // p1_2 and q1_2
+ temp6 = _mm_add_epi16(temp6, p0_8x16);
+ temp5 = _mm_add_epi16(temp5, q0_8x16);
+ temp1 = _mm_add_epi16(temp6, p2_8x16);
+ temp2 = _mm_add_epi16(temp5, q2_8x16);
+ p1_16x8_2 = _mm_srai_epi16(temp1, 2);
+ q1_16x8_2 = _mm_srai_epi16(temp2, 2);
+
+ // p0_2 and q0_2
+ temp1 = _mm_add_epi16(temp3, p2_8x16);
+ temp2 = _mm_add_epi16(temp4, q2_8x16);
+ temp1 = _mm_add_epi16(temp1, q1_8x16);
+ temp2 = _mm_add_epi16(temp2, p1_8x16);
+ temp3 = _mm_add_epi16(p0_8x16, q0_8x16);
+ temp3 = _mm_slli_epi16(temp3, 1);
+ temp1 = _mm_add_epi16(temp1, temp3);
+ temp2 = _mm_add_epi16(temp2, temp3);
+ temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(4));
+ temp2 = _mm_add_epi16(temp2, _mm_set1_epi16(4));
+ p0_16x8_2 = _mm_srai_epi16(temp1, 3);
+ q0_16x8_2 = _mm_srai_epi16(temp2, 3);
+
+ // p2_2 and q2_2
+ temp1 = _mm_add_epi16(temp6, const_val2_16x8);
+ temp2 = _mm_add_epi16(temp5, const_val2_16x8);
+ temp3 = _mm_slli_epi16(p2_8x16, 1);
+ temp4 = _mm_slli_epi16(q2_8x16, 1);
+ temp3 = _mm_add_epi16(p2_8x16, temp3);
+ temp4 = _mm_add_epi16(q2_8x16, temp4);
+ temp5 = _mm_slli_epi16(p3_8x16, 1);
+ temp6 = _mm_slli_epi16(q3_8x16, 1);
+ temp1 = _mm_add_epi16(temp1, temp3);
+ temp2 = _mm_add_epi16(temp2, temp4);
+ temp1 = _mm_add_epi16(temp1, temp5);
+ temp2 = _mm_add_epi16(temp2, temp6);
+ p2_16x8_2 = _mm_srai_epi16(temp1, 3);
+ q2_16x8_2 = _mm_srai_epi16(temp2, 3);
+
+ // Second 8 pixels and packing with first 8 pixels
+ p3_8x16 = _mm_unpackhi_epi8(p3_16x8, zero);
+ p2_8x16 = _mm_unpackhi_epi8(p2_16x8, zero);
+ p1_8x16 = _mm_unpackhi_epi8(p1_16x8, zero);
+ p0_8x16 = _mm_unpackhi_epi8(p0_16x8, zero);
+ q0_8x16 = _mm_unpackhi_epi8(q0_16x8, zero);
+ q1_8x16 = _mm_unpackhi_epi8(q1_16x8, zero);
+ q2_8x16 = _mm_unpackhi_epi8(q2_16x8, zero);
+ q3_8x16 = _mm_unpackhi_epi8(q3_16x8, zero);
+
+ // p0_1 and q0_1
+ temp1 = _mm_add_epi16(p0_8x16, q1_8x16);
+ temp2 = _mm_add_epi16(p1_8x16, q0_8x16);
+ temp5 = _mm_add_epi16(temp1, const_val2_16x8);
+ temp6 = _mm_add_epi16(temp2, const_val2_16x8);
+ temp3 = _mm_slli_epi16(p1_8x16, 1);
+ temp4 = _mm_slli_epi16(q1_8x16, 1);
+ temp1 = _mm_add_epi16(temp5, temp3);
+ temp2 = _mm_add_epi16(temp6, temp4);
+ temp1 = _mm_srai_epi16(temp1, 2);
+ temp2 = _mm_srai_epi16(temp2, 2);
+ p0_16x8_1 = _mm_packus_epi16(p0_16x8_1, temp1);
+ q0_16x8_1 = _mm_packus_epi16(q0_16x8_1, temp2);
+
+ // p1_2 and q1_2
+ temp6 = _mm_add_epi16(temp6, p0_8x16);
+ temp5 = _mm_add_epi16(temp5, q0_8x16);
+ temp1 = _mm_add_epi16(temp6, p2_8x16);
+ temp2 = _mm_add_epi16(temp5, q2_8x16);
+ temp1 = _mm_srai_epi16(temp1, 2);
+ temp2 = _mm_srai_epi16(temp2, 2);
+ p1_16x8_2 = _mm_packus_epi16(p1_16x8_2, temp1);
+ q1_16x8_2 = _mm_packus_epi16(q1_16x8_2, temp2);
+
+ // p0_2 and q0_2
+ temp1 = _mm_add_epi16(temp3, p2_8x16);
+ temp2 = _mm_add_epi16(temp4, q2_8x16);
+ temp1 = _mm_add_epi16(temp1, q1_8x16);
+ temp2 = _mm_add_epi16(temp2, p1_8x16);
+ temp3 = _mm_add_epi16(p0_8x16, q0_8x16);
+ temp3 = _mm_slli_epi16(temp3, 1);
+ temp1 = _mm_add_epi16(temp1, temp3);
+ temp2 = _mm_add_epi16(temp2, temp3);
+ temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(4));
+ temp2 = _mm_add_epi16(temp2, _mm_set1_epi16(4));
+ temp1 = _mm_srai_epi16(temp1, 3);
+ temp2 = _mm_srai_epi16(temp2, 3);
+ p0_16x8_2 = _mm_packus_epi16(p0_16x8_2, temp1);
+ q0_16x8_2 = _mm_packus_epi16(q0_16x8_2, temp2);
+
+ // p2_2 and q2_2
+ temp1 = _mm_add_epi16(temp6, const_val2_16x8);
+ temp2 = _mm_add_epi16(temp5, const_val2_16x8);
+ temp3 = _mm_slli_epi16(p2_8x16, 1);
+ temp4 = _mm_slli_epi16(q2_8x16, 1);
+ temp3 = _mm_add_epi16(p2_8x16, temp3);
+ temp4 = _mm_add_epi16(q2_8x16, temp4);
+ temp5 = _mm_slli_epi16(p3_8x16, 1);
+ temp6 = _mm_slli_epi16(q3_8x16, 1);
+ temp1 = _mm_add_epi16(temp1, temp3);
+ temp2 = _mm_add_epi16(temp2, temp4);
+ temp1 = _mm_add_epi16(temp1, temp5);
+ temp2 = _mm_add_epi16(temp2, temp6);
+ temp1 = _mm_srai_epi16(temp1, 3);
+ temp2 = _mm_srai_epi16(temp2, 3);
+ p2_16x8_2 = _mm_packus_epi16(p2_16x8_2, temp1);
+ q2_16x8_2 = _mm_packus_epi16(q2_16x8_2, temp2);
+
+ // p0 and q0
+ p0_16x8 = _mm_and_si128(p0_16x8,
+ _mm_xor_si128(flag1_16x8, _mm_set1_epi8(0xFF)));
+ p0_16x8_1 = _mm_and_si128(p0_16x8_1, flag1_16x8);
+ p0_16x8 = _mm_add_epi8(p0_16x8, p0_16x8_1);
+ q0_16x8 = _mm_and_si128(q0_16x8,
+ _mm_xor_si128(flag1_16x8, _mm_set1_epi8(0xFF)));
+ q0_16x8_1 = _mm_and_si128(q0_16x8_1, flag1_16x8);
+ q0_16x8 = _mm_add_epi8(q0_16x8, q0_16x8_1);
+
+ // p0 and q0
+ p0_16x8 = _mm_and_si128(p0_16x8,
+ _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF)));
+ p0_16x8_2 = _mm_and_si128(p0_16x8_2, flag3_16x8);
+ p0_16x8 = _mm_add_epi8(p0_16x8, p0_16x8_2);
+ q0_16x8 = _mm_and_si128(q0_16x8,
+ _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF)));
+ q0_16x8_2 = _mm_and_si128(q0_16x8_2, flag4_16x8);
+ q0_16x8 = _mm_add_epi8(q0_16x8, q0_16x8_2);
+
+ // p1 and q1
+ p1_16x8 = _mm_and_si128(p1_16x8,
+ _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF)));
+ p1_16x8_2 = _mm_and_si128(p1_16x8_2, flag3_16x8);
+ p1_16x8 = _mm_add_epi8(p1_16x8, p1_16x8_2);
+ q1_16x8 = _mm_and_si128(q1_16x8,
+ _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF)));
+ q1_16x8_2 = _mm_and_si128(q1_16x8_2, flag4_16x8);
+ q1_16x8 = _mm_add_epi8(q1_16x8, q1_16x8_2);
+
+ // p2 and q2
+ p2_16x8 = _mm_and_si128(p2_16x8,
+ _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF)));
+ p2_16x8_2 = _mm_and_si128(p2_16x8_2, flag3_16x8);
+ p2_16x8 = _mm_add_epi8(p2_16x8, p2_16x8_2);
+ q2_16x8 = _mm_and_si128(q2_16x8,
+ _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF)));
+ q2_16x8_2 = _mm_and_si128(q2_16x8_2, flag4_16x8);
+ q2_16x8 = _mm_add_epi8(q2_16x8, q2_16x8_2);
+
+ _mm_storeu_si128((__m128i *)(pu1_HorzPixel + i16_posP2), p2_16x8);
+ _mm_storeu_si128((__m128i *)(pu1_HorzPixel + i16_posP1), p1_16x8);
+ _mm_storeu_si128((__m128i *)(pu1_HorzPixel + i16_posP0), p0_16x8);
+
+ _mm_storeu_si128((__m128i *)(pu1_src), q0_16x8);
+ _mm_storeu_si128((__m128i *)(pu1_src + i16_posQ1), q1_16x8);
+ _mm_storeu_si128((__m128i *)(pu1_src + i16_posQ2), q2_16x8);
+
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_luma_vert_bslt4_ssse3() */
+/* */
+/* Description : This function performs filtering of a luma block */
+/* vertical edge when the boundary strength is less than 4. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 */
+/* src_strd - source stride */
+/* alpha - alpha value for the boundary */
+/* beta - beta value for the boundary */
+/* u4_bs - packed Boundary strength array */
+/* pu1_cliptab - tc0_table */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This operation is described in Sec. 8.7.2.3 under the */
+/* title "Filtering process for edges for bS less than 4" */
+/* in ITU T Rec H.264. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 12 02 2015 Naveen Kumar P Initial version */
+/* */
+/*****************************************************************************/
+void ih264_deblk_luma_vert_bslt4_ssse3(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha,
+ WORD32 beta,
+ UWORD32 u4_bs,
+ const UWORD8 *pu1_cliptab)
+{
+ UWORD8 u1_Bs, u1_Bs1;
+
+ UWORD32 j = 0;
+
+ __m128i linea, lineb, linec, lined, linee, linef, lineg, lineh;
+ __m128i int1, int2, int3, int4, high1, high2;
+ __m128i flag, flag1, i_C, i_C0;
+ __m128i i_Ap, i_Aq, diff, const1, const2, in_macro, in_macrotemp, temp,
+ temp1;
+ __m128i zero = _mm_setzero_si128();
+
+ for(j = 0; j <= 8 * src_strd; j += 8 * src_strd)
+ {
+ //Transpose
+ linea = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + j));
+ lineb = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + src_strd + j));
+ linec = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + 2 * src_strd + j));
+ lined = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + 3 * src_strd + j));
+
+ linea = _mm_unpacklo_epi8(linea, zero);
+ lineb = _mm_unpacklo_epi8(lineb, zero);
+ linec = _mm_unpacklo_epi8(linec, zero);
+ lined = _mm_unpacklo_epi8(lined, zero);
+
+ int1 = _mm_unpacklo_epi16(linea, lineb);
+ lineb = _mm_unpackhi_epi16(linea, lineb);
+
+ int2 = _mm_unpacklo_epi16(linec, lined);
+ lined = _mm_unpackhi_epi16(linec, lined);
+
+ linea = _mm_unpacklo_epi16(int1, int2);
+ int1 = _mm_unpackhi_epi16(int1, int2);
+
+ linec = _mm_unpacklo_epi16(lineb, lined);
+ high1 = _mm_unpackhi_epi16(lineb, lined);
+
+ linee = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + 4 * src_strd + j));
+ linef = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + 5 * src_strd + j));
+ lineg = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + 6 * src_strd + j));
+ lineh = _mm_loadl_epi64((__m128i *)(pu1_src - 3 + 7 * src_strd + j));
+
+ linee = _mm_unpacklo_epi8(linee, zero);
+ linef = _mm_unpacklo_epi8(linef, zero);
+ lineg = _mm_unpacklo_epi8(lineg, zero);
+ lineh = _mm_unpacklo_epi8(lineh, zero);
+
+ int2 = _mm_unpacklo_epi16(linee, linef);
+ linef = _mm_unpackhi_epi16(linee, linef);
+
+ int3 = _mm_unpacklo_epi16(lineg, lineh);
+ lineh = _mm_unpackhi_epi16(lineg, lineh);
+
+ linee = _mm_unpacklo_epi16(int2, int3);
+ int2 = _mm_unpackhi_epi16(int2, int3);
+
+ lineg = _mm_unpacklo_epi16(linef, lineh);
+ high2 = _mm_unpackhi_epi16(linef, lineh);
+
+ int4 = _mm_unpacklo_epi16(linea, linee);
+ lineb = _mm_unpackhi_epi16(linea, linee);
+
+ int3 = _mm_unpacklo_epi16(int1, int2);
+ lined = _mm_unpackhi_epi16(int1, int2);
+
+ int2 = _mm_unpacklo_epi16(linec, lineg);
+ linef = _mm_unpackhi_epi16(linec, lineg);
+
+ linea = int4;
+ linec = int3;
+ linee = int2;
+
+ lineg = _mm_unpacklo_epi16(high1, high2);
+ lineh = _mm_unpackhi_epi16(high1, high2);
+
+ //end of transpose
+
+ u1_Bs = (u4_bs >> 24) & 0xff;
+ u1_Bs1 = (u4_bs >> 16) & 0xff;
+ u4_bs <<= 16;
+
+ flag1 = _mm_set_epi16(u1_Bs1, u1_Bs, u1_Bs1, u1_Bs, u1_Bs1, u1_Bs,
+ u1_Bs1, u1_Bs);
+ flag1 = _mm_cmpeq_epi16(flag1, zero); //Set flag to 1s and 0s
+ flag1 = _mm_xor_si128(flag1, _mm_set1_epi16(0xFFFF)); //Invert for required mask
+
+ i_C0 = _mm_set_epi16(pu1_cliptab[u1_Bs1], pu1_cliptab[u1_Bs],
+ pu1_cliptab[u1_Bs1], pu1_cliptab[u1_Bs],
+ pu1_cliptab[u1_Bs1], pu1_cliptab[u1_Bs],
+ pu1_cliptab[u1_Bs1], pu1_cliptab[u1_Bs]);
+
+ diff = _mm_subs_epi16(linec, lined); //Condn 1
+ diff = _mm_abs_epi16(diff);
+ const1 = _mm_set1_epi16(alpha);
+ flag = _mm_cmpgt_epi16(const1, diff);
+
+ diff = _mm_subs_epi16(linee, lined); //Condtn 2
+ diff = _mm_abs_epi16(diff);
+ const1 = _mm_set1_epi16(beta);
+ flag = _mm_and_si128(flag, _mm_cmpgt_epi16(const1, diff));
+
+ diff = _mm_subs_epi16(lineb, linec); //Condtn 3
+ diff = _mm_abs_epi16(diff);
+ flag = _mm_and_si128(flag, _mm_cmpgt_epi16(const1, diff)); //Const 1= Beta from now on
+
+ flag = _mm_and_si128(flag, flag1); //Final flag (ui_B condition + other 3 conditions)
+
+ //Adding Ap<Beta and Aq<Beta
+ i_Ap = _mm_subs_epi16(linea, linec);
+ i_Ap = _mm_abs_epi16(i_Ap);
+ const2 = _mm_cmpgt_epi16(const1, i_Ap);
+ const2 = _mm_subs_epi16(zero, const2); //Make FFFF=1 and 0000=0
+ i_C = _mm_add_epi16(i_C0, const2);
+
+ i_Aq = _mm_subs_epi16(linef, lined);
+ i_Aq = _mm_abs_epi16(i_Aq);
+ const2 = _mm_cmpgt_epi16(const1, i_Aq);
+ const2 = _mm_subs_epi16(zero, const2);
+ i_C = _mm_add_epi16(i_C, const2);
+
+ //Calculate in_macro
+ diff = _mm_subs_epi16(lined, linec);
+ diff = _mm_slli_epi16(diff, 2);
+ const2 = _mm_subs_epi16(lineb, linee);
+ diff = _mm_add_epi16(diff, const2);
+ const2 = _mm_set1_epi16(4);
+ diff = _mm_add_epi16(diff, const2);
+ in_macro = _mm_srai_epi16(diff, 3);
+
+ in_macro = _mm_min_epi16(i_C, in_macro); //CLIP3
+ i_C = _mm_subs_epi16(zero, i_C);
+ in_macro = _mm_max_epi16(i_C, in_macro);
+
+ //Compute and store
+ in_macrotemp = _mm_add_epi16(linec, in_macro);
+ in_macrotemp = _mm_and_si128(in_macrotemp, flag);
+ temp = _mm_and_si128(linec,
+ _mm_xor_si128(flag, _mm_set1_epi16(0xFFFF)));
+ temp = _mm_add_epi16(temp, in_macrotemp);
+ //temp= _mm_packus_epi16 (temp, zero);
+ //_mm_storel_epi64(uc_HorzPixel+i16_posP0+i, in_macrotemp);
+
+ in_macrotemp = _mm_subs_epi16(lined, in_macro);
+ in_macrotemp = _mm_and_si128(in_macrotemp, flag);
+ temp1 = _mm_and_si128(lined,
+ _mm_xor_si128(flag, _mm_set1_epi16(0xFFFF)));
+ temp1 = _mm_add_epi16(temp1, in_macrotemp);
+ //temp1= _mm_packus_epi16 (temp1, zero);
+ //_mm_storel_epi64(pu1_src+i, in_macrotemp);
+
+ //If Ap<Beta
+ flag1 = _mm_cmpgt_epi16(const1, i_Ap);
+ flag1 = _mm_and_si128(flag, flag1);
+ in_macrotemp = _mm_add_epi16(linec, lined);
+ in_macrotemp = _mm_add_epi16(in_macrotemp, _mm_set1_epi16(1));
+ in_macrotemp = _mm_srai_epi16(in_macrotemp, 1);
+ in_macro = _mm_add_epi16(in_macrotemp, linea);
+ in_macro = _mm_subs_epi16(in_macro, _mm_slli_epi16(lineb, 1));
+ in_macro = _mm_srai_epi16(in_macro, 1);
+
+ in_macro = _mm_min_epi16(i_C0, in_macro); //CLIP3
+ i_C0 = _mm_subs_epi16(zero, i_C0);
+ in_macro = _mm_max_epi16(i_C0, in_macro);
+
+ in_macro = _mm_and_si128(in_macro, flag1);
+ lineb = _mm_add_epi16(lineb, in_macro);
+ //in_macro= _mm_packus_epi16 (i_p1, zero);
+ //_mm_storel_epi64(uc_HorzPixel+i16_posP1+i, in_macro);
+
+ flag1 = _mm_cmpgt_epi16(const1, i_Aq);
+ flag1 = _mm_and_si128(flag, flag1);
+ in_macro = _mm_add_epi16(in_macrotemp, linef);
+ in_macro = _mm_subs_epi16(in_macro, _mm_slli_epi16(linee, 1));
+ in_macro = _mm_srai_epi16(in_macro, 1);
+
+ i_C0 = _mm_abs_epi16(i_C0);
+ in_macro = _mm_min_epi16(i_C0, in_macro); //CLIP3
+ i_C0 = _mm_subs_epi16(zero, i_C0);
+ in_macro = _mm_max_epi16(i_C0, in_macro);
+
+ in_macro = _mm_and_si128(in_macro, flag1);
+ linee = _mm_add_epi16(linee, in_macro);
+ //in_macro= _mm_packus_epi16 (i_q1, zero);
+ //_mm_storel_epi64(pu1_src+i16_posQ1+i, in_macro);
+ linec = temp;
+ lined = temp1;
+ //End of filtering
+
+ int1 = _mm_unpacklo_epi16(linea, linee);
+ linee = _mm_unpackhi_epi16(linea, linee);
+
+ int2 = _mm_unpacklo_epi16(linec, lineg);
+ lineg = _mm_unpackhi_epi16(linec, lineg);
+
+ linea = _mm_unpacklo_epi16(int1, int2);
+ int3 = _mm_unpackhi_epi16(int1, int2);
+
+ linec = _mm_unpacklo_epi16(linee, lineg);
+ lineg = _mm_unpackhi_epi16(linee, lineg);
+
+ int1 = _mm_unpacklo_epi16(lineb, linef);
+ linef = _mm_unpackhi_epi16(lineb, linef);
+
+ int2 = _mm_unpacklo_epi16(lined, lineh);
+ lineh = _mm_unpackhi_epi16(lined, lineh);
+
+ lineb = _mm_unpacklo_epi16(int1, int2);
+ int4 = _mm_unpackhi_epi16(int1, int2);
+
+ lined = _mm_unpacklo_epi16(linef, lineh);
+ lineh = _mm_unpackhi_epi16(linef, lineh);
+
+ int1 = _mm_unpackhi_epi16(linea, lineb);
+ linea = _mm_unpacklo_epi16(linea, lineb);
+
+ int2 = _mm_unpacklo_epi16(int3, int4);
+ high1 = _mm_unpackhi_epi16(int3, int4);
+
+ lineb = _mm_unpacklo_epi16(linec, lined);
+ linef = _mm_unpackhi_epi16(linec, lined);
+
+ lined = _mm_unpacklo_epi16(lineg, lineh);
+ lineh = _mm_unpackhi_epi16(lineg, lineh);
+
+ linee = int1;
+ lineg = high1;
+ linec = int2;
+ //End of inverse transpose
+
+ //Packs and stores
+ linea = _mm_packus_epi16(linea, zero);
+ _mm_storel_epi64((__m128i *)(pu1_src - 3 + j), linea);
+
+ lineb = _mm_packus_epi16(lineb, zero);
+ _mm_storel_epi64((__m128i *)(pu1_src - 3 + src_strd + j), lineb);
+
+ linec = _mm_packus_epi16(linec, zero);
+ _mm_storel_epi64((__m128i *)(pu1_src - 3 + 2 * src_strd + j), linec);
+
+ lined = _mm_packus_epi16(lined, zero);
+ _mm_storel_epi64((__m128i *)(pu1_src - 3 + 3 * src_strd + j), lined);
+
+ linee = _mm_packus_epi16(linee, zero);
+ _mm_storel_epi64((__m128i *)(pu1_src - 3 + 4 * src_strd + j), linee);
+
+ linef = _mm_packus_epi16(linef, zero);
+ _mm_storel_epi64((__m128i *)(pu1_src - 3 + 5 * src_strd + j), linef);
+
+ lineg = _mm_packus_epi16(lineg, zero);
+ _mm_storel_epi64((__m128i *)(pu1_src - 3 + 6 * src_strd + j), lineg);
+
+ lineh = _mm_packus_epi16(lineh, zero);
+ _mm_storel_epi64((__m128i *)(pu1_src - 3 + 7 * src_strd + j), lineh);
+
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_luma_horz_bslt4_ssse3() */
+/* */
+/* Description : This function performs filtering of a luma block */
+/* horizontal edge when boundary strength is less than 4. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 */
+/* src_strd - source stride */
+/* alpha - alpha value for the boundary */
+/* beta - beta value for the boundary */
+/* u4_bs - packed Boundary strength array */
+/* pu1_cliptab - tc0_table */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This operation is described in Sec. 8.7.2.3 under the */
+/* title "Filtering process for edges for bS less than 4" */
+/* in ITU T Rec H.264. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 12 02 2015 Naveen Kumar P Initial version */
+/* */
+/*****************************************************************************/
+void ih264_deblk_luma_horz_bslt4_ssse3(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha,
+ WORD32 beta,
+ UWORD32 u4_bs,
+ const UWORD8 *pu1_cliptab)
+{
+ WORD16 i16_posP2, i16_posP1, i16_posP0, i16_posQ1, i16_posQ2;
+ UWORD8 *pu1_HorzPixel;
+ __m128i zero = _mm_setzero_si128();
+ __m128i bs_flag_16x8b, C0_16x8, C0_8x16, C0_hi_8x16, C_8x16, C_hi_8x16;
+ __m128i q0_16x8, q1_16x8, q2_16x8, p0_16x8, p1_16x8, p2_16x8;
+ __m128i temp1, temp2;
+ __m128i Alpha_8x16, Beta_8x16, flag1_16x8, flag2_16x8, flag3_16x8;
+ __m128i in_macro_16x8, in_macro_hi_16x8;
+ __m128i const_val4_8x16;
+ UWORD8 u1_Bs0, u1_Bs1, u1_Bs2, u1_Bs3;
+ UWORD8 clip0, clip1, clip2, clip3;
+
+ pu1_HorzPixel = pu1_src - (src_strd << 2);
+
+ i16_posQ1 = src_strd;
+ i16_posQ2 = X2(src_strd);
+ i16_posP0 = X3(src_strd);
+ i16_posP1 = X2(src_strd);
+ i16_posP2 = src_strd;
+
+ q0_16x8 = _mm_loadu_si128((__m128i *)(pu1_src));
+ q1_16x8 = _mm_loadu_si128((__m128i *)(pu1_src + i16_posQ1));
+
+ u1_Bs0 = (u4_bs >> 24) & 0xff;
+ u1_Bs1 = (u4_bs >> 16) & 0xff;
+ u1_Bs2 = (u4_bs >> 8) & 0xff;
+ u1_Bs3 = (u4_bs >> 0) & 0xff;
+ clip0 = pu1_cliptab[u1_Bs0];
+ clip1 = pu1_cliptab[u1_Bs1];
+ clip2 = pu1_cliptab[u1_Bs2];
+ clip3 = pu1_cliptab[u1_Bs3];
+
+ Alpha_8x16 = _mm_set1_epi16(alpha);
+ Beta_8x16 = _mm_set1_epi16(beta);
+
+ bs_flag_16x8b = _mm_set_epi8(u1_Bs3, u1_Bs3, u1_Bs3, u1_Bs3, u1_Bs2, u1_Bs2,
+ u1_Bs2, u1_Bs2, u1_Bs1, u1_Bs1, u1_Bs1, u1_Bs1,
+ u1_Bs0, u1_Bs0, u1_Bs0, u1_Bs0);
+
+ C0_16x8 = _mm_set_epi8(clip3, clip3, clip3, clip3, clip2, clip2, clip2,
+ clip2, clip1, clip1, clip1, clip1, clip0, clip0,
+ clip0, clip0);
+
+ bs_flag_16x8b = _mm_cmpeq_epi8(bs_flag_16x8b, zero);
+ bs_flag_16x8b = _mm_xor_si128(bs_flag_16x8b, _mm_set1_epi8(0xFF)); //Invert for required mask
+ C0_8x16 = _mm_unpacklo_epi8(C0_16x8, zero);
+ C0_hi_8x16 = _mm_unpackhi_epi8(C0_16x8, zero);
+
+ p1_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP1));
+ p0_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP0));
+ p2_16x8 = _mm_loadu_si128((__m128i *)(pu1_HorzPixel + i16_posP2));
+ q2_16x8 = _mm_loadu_si128((__m128i *)(pu1_src + i16_posQ2));
+
+ //Cond1 (ABS(p0 - q0) < alpha)
+ temp1 = _mm_subs_epu8(q0_16x8, p0_16x8);
+ temp2 = _mm_subs_epu8(p0_16x8, q0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+
+ temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Alpha_8x16, temp1);
+
+ flag1_16x8 = _mm_packs_epi16(temp2, temp1);
+ flag1_16x8 = _mm_and_si128(flag1_16x8, bs_flag_16x8b);
+
+ //Cond2 (ABS(q1 - q0) < beta)
+ temp1 = _mm_subs_epu8(q0_16x8, q1_16x8);
+ temp2 = _mm_subs_epu8(q1_16x8, q0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+
+ temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
+
+ flag2_16x8 = _mm_packs_epi16(temp2, temp1);
+
+ flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
+
+ //Cond3 (ABS(p1 - p0) < beta)
+ temp1 = _mm_subs_epu8(p0_16x8, p1_16x8);
+ temp2 = _mm_subs_epu8(p1_16x8, p0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+
+ temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
+
+ flag2_16x8 = _mm_packs_epi16(temp2, temp1);
+
+ // !((ABS(p0 - q0) < alpha) || (ABS(q1 - q0) < beta) || (ABS(p1 - p0) < beta))
+ flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
+
+ // (ABS(p2 - p0) < beta)
+ temp1 = _mm_subs_epu8(p0_16x8, p2_16x8);
+ temp2 = _mm_subs_epu8(p2_16x8, p0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+ temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
+
+ flag2_16x8 = _mm_packs_epi16(temp2, temp1);
+ flag2_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
+
+ temp2 = _mm_subs_epi16(zero, temp2);
+ temp1 = _mm_subs_epi16(zero, temp1);
+
+ C_8x16 = _mm_add_epi16(C0_8x16, temp2);
+ C_hi_8x16 = _mm_add_epi16(C0_hi_8x16, temp1);
+
+ // (ABS(q2 - q0) < beta)
+ temp1 = _mm_subs_epu8(q0_16x8, q2_16x8);
+ temp2 = _mm_subs_epu8(q2_16x8, q0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+ temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
+
+ flag3_16x8 = _mm_packs_epi16(temp2, temp1);
+ flag3_16x8 = _mm_and_si128(flag1_16x8, flag3_16x8);
+
+ temp2 = _mm_subs_epi16(zero, temp2);
+ temp1 = _mm_subs_epi16(zero, temp1);
+
+ C_8x16 = _mm_add_epi16(C_8x16, temp2);
+ C_hi_8x16 = _mm_add_epi16(C_hi_8x16, temp1);
+
+ const_val4_8x16 = _mm_set1_epi16(4);
+ temp1 = _mm_subs_epi16(_mm_unpacklo_epi8(q0_16x8, zero),
+ _mm_unpacklo_epi8(p0_16x8, zero));
+ temp2 = _mm_subs_epi16(_mm_unpacklo_epi8(p1_16x8, zero),
+ _mm_unpacklo_epi8(q1_16x8, zero));
+ temp1 = _mm_slli_epi16(temp1, 2);
+ temp1 = _mm_add_epi16(temp1, temp2);
+ temp1 = _mm_add_epi16(temp1, const_val4_8x16);
+ in_macro_16x8 = _mm_srai_epi16(temp1, 3);
+
+ temp1 = _mm_subs_epi16(_mm_unpackhi_epi8(q0_16x8, zero),
+ _mm_unpackhi_epi8(p0_16x8, zero));
+ temp2 = _mm_subs_epi16(_mm_unpackhi_epi8(p1_16x8, zero),
+ _mm_unpackhi_epi8(q1_16x8, zero));
+ temp1 = _mm_slli_epi16(temp1, 2);
+ temp1 = _mm_add_epi16(temp1, temp2);
+ temp1 = _mm_add_epi16(temp1, const_val4_8x16);
+ in_macro_hi_16x8 = _mm_srai_epi16(temp1, 3);
+
+ in_macro_16x8 = _mm_min_epi16(C_8x16, in_macro_16x8); //CLIP3
+ in_macro_hi_16x8 = _mm_min_epi16(C_hi_8x16, in_macro_hi_16x8); //CLIP3
+ C_8x16 = _mm_subs_epi16(zero, C_8x16);
+ C_hi_8x16 = _mm_subs_epi16(zero, C_hi_8x16);
+ in_macro_16x8 = _mm_max_epi16(C_8x16, in_macro_16x8); //CLIP3
+ in_macro_hi_16x8 = _mm_max_epi16(C_hi_8x16, in_macro_hi_16x8); //CLIP3
+
+ temp1 = _mm_add_epi16(_mm_unpacklo_epi8(p0_16x8, zero), in_macro_16x8);
+ temp2 = _mm_add_epi16(_mm_unpackhi_epi8(p0_16x8, zero), in_macro_hi_16x8);
+
+ temp1 = _mm_packus_epi16(temp1, temp2);
+
+ temp1 = _mm_and_si128(temp1, flag1_16x8);
+ temp2 = _mm_and_si128(p0_16x8,
+ _mm_xor_si128(flag1_16x8, _mm_set1_epi16(0xFFFF)));
+
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ _mm_storeu_si128((__m128i *)(pu1_HorzPixel + i16_posP0), temp1);
+
+ temp1 = _mm_sub_epi16(_mm_unpacklo_epi8(q0_16x8, zero), in_macro_16x8);
+ temp2 = _mm_sub_epi16(_mm_unpackhi_epi8(q0_16x8, zero), in_macro_hi_16x8);
+
+ temp1 = _mm_packus_epi16(temp1, temp2);
+
+ temp1 = _mm_and_si128(temp1, flag1_16x8);
+ temp2 = _mm_and_si128(q0_16x8,
+ _mm_xor_si128(flag1_16x8, _mm_set1_epi16(0xFFFF)));
+
+ temp1 = _mm_add_epi8(temp1, temp2);
+ _mm_storeu_si128((__m128i *)(pu1_src), temp1);
+
+ //if(Ap < Beta)
+ temp1 = _mm_avg_epu16(_mm_unpacklo_epi8(q0_16x8, zero),
+ _mm_unpacklo_epi8(p0_16x8, zero));
+ temp2 = _mm_slli_epi16(_mm_unpacklo_epi8(p1_16x8, zero), 1);
+ //temp2 = _mm_subs_epi16(zero,temp2);
+ temp2 = _mm_subs_epi16(_mm_unpacklo_epi8(p2_16x8, zero), temp2);
+ temp2 = _mm_add_epi16(temp1, temp2);
+ in_macro_16x8 = _mm_srai_epi16(temp2, 1);
+
+ temp1 = _mm_avg_epu16(_mm_unpackhi_epi8(q0_16x8, zero),
+ _mm_unpackhi_epi8(p0_16x8, zero));
+ temp2 = _mm_slli_epi16(_mm_unpackhi_epi8(p1_16x8, zero), 1);
+ //temp2 = _mm_subs_epi16(zero,temp2);
+ temp2 = _mm_subs_epi16(_mm_unpackhi_epi8(p2_16x8, zero), temp2);
+ temp2 = _mm_add_epi16(temp1, temp2);
+ in_macro_hi_16x8 = _mm_srai_epi16(temp2, 1);
+
+ in_macro_16x8 = _mm_min_epi16(C0_8x16, in_macro_16x8); //CLIP3
+ in_macro_hi_16x8 = _mm_min_epi16(C0_hi_8x16, in_macro_hi_16x8); //CLIP3
+ C0_8x16 = _mm_subs_epi16(zero, C0_8x16);
+ C0_hi_8x16 = _mm_subs_epi16(zero, C0_hi_8x16);
+ in_macro_16x8 = _mm_max_epi16(C0_8x16, in_macro_16x8); //CLIP3
+ in_macro_hi_16x8 = _mm_max_epi16(C0_hi_8x16, in_macro_hi_16x8); //CLIP3
+
+ temp1 = _mm_add_epi16(_mm_unpacklo_epi8(p1_16x8, zero), in_macro_16x8);
+ temp2 = _mm_add_epi16(_mm_unpackhi_epi8(p1_16x8, zero), in_macro_hi_16x8);
+
+ temp1 = _mm_packus_epi16(temp1, temp2);
+
+ temp1 = _mm_and_si128(temp1, flag2_16x8);
+ temp2 = _mm_and_si128(p1_16x8,
+ _mm_xor_si128(flag2_16x8, _mm_set1_epi16(0xFFFF)));
+ temp1 = _mm_add_epi8(temp1, temp2);
+ _mm_storeu_si128((__m128i *)(pu1_HorzPixel + i16_posP1), temp1);
+
+ //if(Aq < Beta)
+ temp1 = _mm_avg_epu16(_mm_unpacklo_epi8(q0_16x8, zero),
+ _mm_unpacklo_epi8(p0_16x8, zero));
+ temp2 = _mm_slli_epi16(_mm_unpacklo_epi8(q1_16x8, zero), 1);
+ //temp2 = _mm_slli_epi16 (temp2, 1);
+ temp2 = _mm_subs_epi16(_mm_unpacklo_epi8(q2_16x8, zero), temp2);
+ temp2 = _mm_add_epi16(temp1, temp2);
+ in_macro_16x8 = _mm_srai_epi16(temp2, 1);
+
+ temp1 = _mm_avg_epu16(_mm_unpackhi_epi8(q0_16x8, zero),
+ _mm_unpackhi_epi8(p0_16x8, zero));
+ temp2 = _mm_slli_epi16(_mm_unpackhi_epi8(q1_16x8, zero), 1);
+ //temp2 = _mm_slli_epi16 (temp2, 1);
+ temp2 = _mm_subs_epi16(_mm_unpackhi_epi8(q2_16x8, zero), temp2);
+ temp2 = _mm_add_epi16(temp1, temp2);
+ in_macro_hi_16x8 = _mm_srai_epi16(temp2, 1);
+
+ in_macro_16x8 = _mm_max_epi16(C0_8x16, in_macro_16x8); //CLIP3
+ in_macro_hi_16x8 = _mm_max_epi16(C0_hi_8x16, in_macro_hi_16x8); //CLIP3
+ C0_8x16 = _mm_subs_epi16(zero, C0_8x16);
+ C0_hi_8x16 = _mm_subs_epi16(zero, C0_hi_8x16);
+ in_macro_16x8 = _mm_min_epi16(C0_8x16, in_macro_16x8); //CLIP3
+ in_macro_hi_16x8 = _mm_min_epi16(C0_hi_8x16, in_macro_hi_16x8); //CLIP3
+
+ temp1 = _mm_add_epi16(_mm_unpacklo_epi8(q1_16x8, zero), in_macro_16x8);
+ temp2 = _mm_add_epi16(_mm_unpackhi_epi8(q1_16x8, zero), in_macro_hi_16x8);
+
+ temp1 = _mm_packus_epi16(temp1, temp2);
+
+ temp1 = _mm_and_si128(temp1, flag3_16x8);
+ temp2 = _mm_and_si128(q1_16x8,
+ _mm_xor_si128(flag3_16x8, _mm_set1_epi16(0xFFFF)));
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ _mm_storeu_si128((__m128i *)(pu1_src + i16_posQ1), temp1);
+
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_luma_vert_bs4_mbaff_ssse3() */
+/* */
+/* Description : This function performs filtering of a luma block */
+/* vertical edge when boundary strength is set to 4. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 */
+/* src_strd - source stride */
+/* alpha - alpha value for the boundary */
+/* beta - beta value for the boundary */
+/* */
+/* Globals : None */
+/* */
+/* Processing : When the function is called twice, this operation is as */
+/* described in Sec. 8.7.2.3 under the title "Filtering */
+/* process for edges for bS equal to 4" in ITU T Rec H.264. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 12 02 2015 Naveen Kumar P Initial version */
+/* */
+/*****************************************************************************/
+void ih264_deblk_luma_vert_bs4_mbaff_ssse3(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha,
+ WORD32 beta)
+{
+ __m128i zero = _mm_setzero_si128();
+ __m128i q0_16x8, q1_16x8, q2_16x8, q3_16x8;
+ __m128i p0_16x8, p1_16x8, p2_16x8, p3_16x8;
+ __m128i q0_8x16, q1_8x16, q2_8x16, q3_8x16;
+ __m128i p0_8x16, p1_8x16, p2_8x16, p3_8x16;
+ __m128i q0_16x8_1;
+ __m128i p0_16x8_1;
+ __m128i q0_16x8_2, q1_16x8_2, q2_16x8_2;
+ __m128i p0_16x8_2, p1_16x8_2, p2_16x8_2;
+ __m128i temp1, temp2, temp3, temp4, temp5, temp6;
+ __m128i Alpha_8x16, Beta_8x16;
+ __m128i flag1_16x8, flag2_16x8, flag3_16x8, flag4_16x8;
+ __m128i const_val2_16x8 = _mm_set1_epi16(2);
+ __m128i line1, line2, line3, line4, line5, line6, line7, line8;
+
+ Alpha_8x16 = _mm_set1_epi16(alpha);
+ Beta_8x16 = _mm_set1_epi16(beta);
+
+ line1 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 0 * src_strd));
+ line2 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 1 * src_strd));
+ line3 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 2 * src_strd));
+ line4 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 3 * src_strd));
+ line5 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 4 * src_strd));
+ line6 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 5 * src_strd));
+ line7 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 6 * src_strd));
+ line8 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 7 * src_strd));
+
+ temp1 = _mm_unpacklo_epi8(line1, line2);
+ temp2 = _mm_unpacklo_epi8(line3, line4);
+ temp3 = _mm_unpacklo_epi8(line5, line6);
+ temp4 = _mm_unpacklo_epi8(line7, line8);
+
+ line1 = _mm_unpacklo_epi16(temp1, temp2);
+ line2 = _mm_unpackhi_epi16(temp1, temp2);
+ line3 = _mm_unpacklo_epi16(temp3, temp4);
+ line4 = _mm_unpackhi_epi16(temp3, temp4);
+
+ p1_8x16 = _mm_unpacklo_epi32(line1, line3);
+ p0_8x16 = _mm_unpackhi_epi32(line1, line3);
+ q0_8x16 = _mm_unpacklo_epi32(line2, line4);
+ q1_8x16 = _mm_unpackhi_epi32(line2, line4);
+
+ p3_16x8 = _mm_unpacklo_epi64(p1_8x16, zero);
+ p2_16x8 = _mm_unpackhi_epi64(p1_8x16, zero);
+ q2_16x8 = _mm_unpacklo_epi64(q1_8x16, zero);
+ q3_16x8 = _mm_unpackhi_epi64(q1_8x16, zero);
+ p1_16x8 = _mm_unpacklo_epi64(p0_8x16, zero);
+ p0_16x8 = _mm_unpackhi_epi64(p0_8x16, zero);
+ q0_16x8 = _mm_unpacklo_epi64(q0_8x16, zero);
+ q1_16x8 = _mm_unpackhi_epi64(q0_8x16, zero);
+
+ //Cond1 (ABS(p0 - q0) < alpha)
+ temp1 = _mm_subs_epu8(q0_16x8, p0_16x8);
+ temp2 = _mm_subs_epu8(p0_16x8, q0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+
+ temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Alpha_8x16, temp1);
+
+ flag1_16x8 = _mm_packs_epi16(temp2, temp1);
+
+ //Cond2 (ABS(q1 - q0) < beta)
+ temp1 = _mm_subs_epu8(q0_16x8, q1_16x8);
+ temp2 = _mm_subs_epu8(q1_16x8, q0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+
+ temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
+
+ flag2_16x8 = _mm_packs_epi16(temp2, temp1);
+
+ flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
+
+ //Cond3 (ABS(p1 - p0) < beta)
+ temp1 = _mm_subs_epu8(p0_16x8, p1_16x8);
+ temp2 = _mm_subs_epu8(p1_16x8, p0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+
+ temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
+
+ flag2_16x8 = _mm_packs_epi16(temp2, temp1);
+
+ // !((ABS(p0 - q0) < alpha) || (ABS(q1 - q0) < beta) || (ABS(p1 - p0) < beta))
+ flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
+
+ // (ABS(p0 - q0) < ((alpha >> 2) + 2))
+ temp1 = _mm_subs_epu8(p0_16x8, q0_16x8);
+ temp2 = _mm_subs_epu8(q0_16x8, p0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+ Alpha_8x16 = _mm_srai_epi16(Alpha_8x16, 2);
+ Alpha_8x16 = _mm_add_epi16(Alpha_8x16, const_val2_16x8);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+ temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Alpha_8x16, temp1);
+
+ flag2_16x8 = _mm_packs_epi16(temp2, temp1);
+ flag2_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
+
+ // (ABS(p2 - p0) < beta)
+ temp1 = _mm_subs_epu8(p0_16x8, p2_16x8);
+ temp2 = _mm_subs_epu8(p2_16x8, p0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+ temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
+
+ flag3_16x8 = _mm_packs_epi16(temp2, temp1);
+ flag3_16x8 = _mm_and_si128(flag3_16x8, flag2_16x8);
+
+ // (ABS(q2 - q0) < beta)
+ temp1 = _mm_subs_epu8(q0_16x8, q2_16x8);
+ temp2 = _mm_subs_epu8(q2_16x8, q0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp1 = _mm_unpackhi_epi8(temp1, zero);
+ temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
+ temp1 = _mm_cmpgt_epi16(Beta_8x16, temp1);
+
+ flag4_16x8 = _mm_packs_epi16(temp2, temp1);
+ flag4_16x8 = _mm_and_si128(flag4_16x8, flag2_16x8);
+
+ // First 8 pixels
+ p3_8x16 = _mm_unpacklo_epi8(p3_16x8, zero);
+ p2_8x16 = _mm_unpacklo_epi8(p2_16x8, zero);
+ p1_8x16 = _mm_unpacklo_epi8(p1_16x8, zero);
+ p0_8x16 = _mm_unpacklo_epi8(p0_16x8, zero);
+ q0_8x16 = _mm_unpacklo_epi8(q0_16x8, zero);
+ q1_8x16 = _mm_unpacklo_epi8(q1_16x8, zero);
+ q2_8x16 = _mm_unpacklo_epi8(q2_16x8, zero);
+ q3_8x16 = _mm_unpacklo_epi8(q3_16x8, zero);
+
+ // p0_1 and q0_1
+ temp1 = _mm_add_epi16(p0_8x16, q1_8x16);
+ temp2 = _mm_add_epi16(p1_8x16, q0_8x16);
+ temp5 = _mm_add_epi16(temp1, const_val2_16x8);
+ temp6 = _mm_add_epi16(temp2, const_val2_16x8);
+ temp3 = _mm_slli_epi16(p1_8x16, 1);
+ temp4 = _mm_slli_epi16(q1_8x16, 1);
+ temp1 = _mm_add_epi16(temp5, temp3);
+ temp2 = _mm_add_epi16(temp6, temp4);
+ p0_16x8_1 = _mm_srai_epi16(temp1, 2);
+ q0_16x8_1 = _mm_srai_epi16(temp2, 2);
+
+ // p1_2 and q1_2
+ temp6 = _mm_add_epi16(temp6, p0_8x16);
+ temp5 = _mm_add_epi16(temp5, q0_8x16);
+ temp1 = _mm_add_epi16(temp6, p2_8x16);
+ temp2 = _mm_add_epi16(temp5, q2_8x16);
+ p1_16x8_2 = _mm_srai_epi16(temp1, 2);
+ q1_16x8_2 = _mm_srai_epi16(temp2, 2);
+
+ // p0_2 and q0_2
+ temp1 = _mm_add_epi16(temp3, p2_8x16);
+ temp2 = _mm_add_epi16(temp4, q2_8x16);
+ temp1 = _mm_add_epi16(temp1, q1_8x16);
+ temp2 = _mm_add_epi16(temp2, p1_8x16);
+ temp3 = _mm_add_epi16(p0_8x16, q0_8x16);
+ temp3 = _mm_slli_epi16(temp3, 1);
+ temp1 = _mm_add_epi16(temp1, temp3);
+ temp2 = _mm_add_epi16(temp2, temp3);
+ temp1 = _mm_add_epi16(temp1, _mm_set1_epi16(4));
+ temp2 = _mm_add_epi16(temp2, _mm_set1_epi16(4));
+ p0_16x8_2 = _mm_srai_epi16(temp1, 3);
+ q0_16x8_2 = _mm_srai_epi16(temp2, 3);
+
+ // p2_2 and q2_2
+ temp1 = _mm_add_epi16(temp6, const_val2_16x8);
+ temp2 = _mm_add_epi16(temp5, const_val2_16x8);
+ temp3 = _mm_slli_epi16(p2_8x16, 1);
+ temp4 = _mm_slli_epi16(q2_8x16, 1);
+ temp3 = _mm_add_epi16(p2_8x16, temp3);
+ temp4 = _mm_add_epi16(q2_8x16, temp4);
+ temp5 = _mm_slli_epi16(p3_8x16, 1);
+ temp6 = _mm_slli_epi16(q3_8x16, 1);
+ temp1 = _mm_add_epi16(temp1, temp3);
+ temp2 = _mm_add_epi16(temp2, temp4);
+ temp1 = _mm_add_epi16(temp1, temp5);
+ temp2 = _mm_add_epi16(temp2, temp6);
+ p2_16x8_2 = _mm_srai_epi16(temp1, 3);
+ q2_16x8_2 = _mm_srai_epi16(temp2, 3);
+
+ // p0_1 and q0_1
+ p0_16x8_1 = _mm_packus_epi16(p0_16x8_1, zero);
+ q0_16x8_1 = _mm_packus_epi16(q0_16x8_1, zero);
+
+ // p1_2 and q1_2
+ p1_16x8_2 = _mm_packus_epi16(p1_16x8_2, zero);
+ q1_16x8_2 = _mm_packus_epi16(q1_16x8_2, zero);
+
+ // p0_2 and q0_2
+ p0_16x8_2 = _mm_packus_epi16(p0_16x8_2, zero);
+ q0_16x8_2 = _mm_packus_epi16(q0_16x8_2, zero);
+
+ // p2_2 and q2_2
+ p2_16x8_2 = _mm_packus_epi16(p2_16x8_2, zero);
+ q2_16x8_2 = _mm_packus_epi16(q2_16x8_2, zero);
+
+ // p0 and q0
+ p0_16x8 = _mm_and_si128(p0_16x8,
+ _mm_xor_si128(flag1_16x8, _mm_set1_epi8(0xFF)));
+ p0_16x8_1 = _mm_and_si128(p0_16x8_1, flag1_16x8);
+ p0_16x8 = _mm_add_epi8(p0_16x8, p0_16x8_1);
+ q0_16x8 = _mm_and_si128(q0_16x8,
+ _mm_xor_si128(flag1_16x8, _mm_set1_epi8(0xFF)));
+ q0_16x8_1 = _mm_and_si128(q0_16x8_1, flag1_16x8);
+ q0_16x8 = _mm_add_epi8(q0_16x8, q0_16x8_1);
+
+ // p0 and q0
+ p0_16x8 = _mm_and_si128(p0_16x8,
+ _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF)));
+ p0_16x8_2 = _mm_and_si128(p0_16x8_2, flag3_16x8);
+ p0_16x8 = _mm_add_epi8(p0_16x8, p0_16x8_2);
+ q0_16x8 = _mm_and_si128(q0_16x8,
+ _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF)));
+ q0_16x8_2 = _mm_and_si128(q0_16x8_2, flag4_16x8);
+ q0_16x8 = _mm_add_epi8(q0_16x8, q0_16x8_2);
+
+ // p1 and q1
+ p1_16x8 = _mm_and_si128(p1_16x8,
+ _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF)));
+ p1_16x8_2 = _mm_and_si128(p1_16x8_2, flag3_16x8);
+ p1_16x8 = _mm_add_epi8(p1_16x8, p1_16x8_2);
+ q1_16x8 = _mm_and_si128(q1_16x8,
+ _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF)));
+ q1_16x8_2 = _mm_and_si128(q1_16x8_2, flag4_16x8);
+ q1_16x8 = _mm_add_epi8(q1_16x8, q1_16x8_2);
+
+ // p2 and q2
+ p2_16x8 = _mm_and_si128(p2_16x8,
+ _mm_xor_si128(flag3_16x8, _mm_set1_epi8(0xFF)));
+ p2_16x8_2 = _mm_and_si128(p2_16x8_2, flag3_16x8);
+ p2_16x8 = _mm_add_epi8(p2_16x8, p2_16x8_2);
+ q2_16x8 = _mm_and_si128(q2_16x8,
+ _mm_xor_si128(flag4_16x8, _mm_set1_epi8(0xFF)));
+ q2_16x8_2 = _mm_and_si128(q2_16x8_2, flag4_16x8);
+ q2_16x8 = _mm_add_epi8(q2_16x8, q2_16x8_2);
+
+ temp1 = _mm_unpacklo_epi8(p3_16x8, p2_16x8);
+ temp2 = _mm_unpacklo_epi8(p1_16x8, p0_16x8);
+ temp3 = _mm_unpacklo_epi8(q0_16x8, q1_16x8);
+ temp4 = _mm_unpacklo_epi8(q2_16x8, q3_16x8);
+
+ p3_8x16 = _mm_unpacklo_epi16(temp1, temp2);
+ p2_8x16 = _mm_unpackhi_epi16(temp1, temp2);
+ q2_8x16 = _mm_unpacklo_epi16(temp3, temp4);
+ q3_8x16 = _mm_unpackhi_epi16(temp3, temp4);
+
+ line1 = _mm_unpacklo_epi32(p3_8x16, q2_8x16);
+ line2 = _mm_srli_si128(line1, 8);
+ line3 = _mm_unpackhi_epi32(p3_8x16, q2_8x16);
+ line4 = _mm_srli_si128(line3, 8);
+ line5 = _mm_unpacklo_epi32(p2_8x16, q3_8x16);
+ line6 = _mm_srli_si128(line5, 8);
+ line7 = _mm_unpackhi_epi32(p2_8x16, q3_8x16);
+ line8 = _mm_srli_si128(line7, 8);
+
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 0 * src_strd), line1);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 1 * src_strd), line2);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 2 * src_strd), line3);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 3 * src_strd), line4);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 4 * src_strd), line5);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 5 * src_strd), line6);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 6 * src_strd), line7);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 7 * src_strd), line8);
+
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_deblk_luma_vert_bslt4_mbaff_ssse3() */
+/* */
+/* Description : This function performs filtering of a luma block */
+/* vertical edge when boundary strength is less than 4. */
+/* */
+/* Inputs : pu1_src - pointer to the src sample q0 */
+/* src_strd - source stride */
+/* alpha - alpha value for the boundary */
+/* beta - beta value for the boundary */
+/* u4_bs - packed Boundary strength array */
+/* pu1_cliptab - tc0_table */
+/* */
+/* Globals : None */
+/* */
+/* Processing : When the function is called twice, this operation is as */
+/* described in Sec. 8.7.2.3 under the title "Filtering */
+/* process for edges for bS less than 4" in ITU T Rec H.264.*/
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 12 02 2015 Naveen Kumar P Initial version */
+/* */
+/*****************************************************************************/
+void ih264_deblk_luma_vert_bslt4_mbaff_ssse3(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 alpha,
+ WORD32 beta,
+ UWORD32 u4_bs,
+ const UWORD8 *pu1_cliptab)
+{
+ __m128i zero = _mm_setzero_si128();
+ __m128i bs_flag_16x8b, C0_16x8, C0_8x16, C_8x16;
+ __m128i q0_16x8, q1_16x8, q2_16x8, q3_16x8;
+ __m128i p0_16x8, p1_16x8, p2_16x8, p3_16x8;
+ __m128i temp1, temp2, temp3, temp4;
+ __m128i Alpha_8x16, Beta_8x16, flag1_16x8, flag2_16x8, flag3_16x8;
+ __m128i in_macro_16x8;
+ __m128i const_val4_8x16;
+ UWORD8 u1_Bs0, u1_Bs1, u1_Bs2, u1_Bs3;
+ UWORD8 clip0, clip1, clip2, clip3;
+ __m128i line1, line2, line3, line4, line5, line6, line7, line8;
+ __m128i q0_16x8_1, q1_16x8_1, q0_16x8_2;
+ __m128i p0_16x8_1, p1_16x8_1, p0_16x8_2;
+
+ line1 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 0 * src_strd));
+ line2 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 1 * src_strd));
+ line3 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 2 * src_strd));
+ line4 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 3 * src_strd));
+ line5 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 4 * src_strd));
+ line6 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 5 * src_strd));
+ line7 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 6 * src_strd));
+ line8 = _mm_loadl_epi64((__m128i *)(pu1_src - 4 + 7 * src_strd));
+
+ temp1 = _mm_unpacklo_epi8(line1, line2);
+ temp2 = _mm_unpacklo_epi8(line3, line4);
+ temp3 = _mm_unpacklo_epi8(line5, line6);
+ temp4 = _mm_unpacklo_epi8(line7, line8);
+
+ line1 = _mm_unpacklo_epi16(temp1, temp2);
+ line2 = _mm_unpackhi_epi16(temp1, temp2);
+ line3 = _mm_unpacklo_epi16(temp3, temp4);
+ line4 = _mm_unpackhi_epi16(temp3, temp4);
+
+ temp1 = _mm_unpacklo_epi32(line1, line3);
+ temp2 = _mm_unpackhi_epi32(line1, line3);
+ temp3 = _mm_unpacklo_epi32(line2, line4);
+ temp4 = _mm_unpackhi_epi32(line2, line4);
+
+ p3_16x8 = _mm_unpacklo_epi64(temp1, zero);
+ p2_16x8 = _mm_unpackhi_epi64(temp1, zero);
+ q2_16x8 = _mm_unpacklo_epi64(temp4, zero);
+ q3_16x8 = _mm_unpackhi_epi64(temp4, zero);
+ p1_16x8 = _mm_unpacklo_epi64(temp2, zero);
+ p0_16x8 = _mm_unpackhi_epi64(temp2, zero);
+ q0_16x8 = _mm_unpacklo_epi64(temp3, zero);
+ q1_16x8 = _mm_unpackhi_epi64(temp3, zero);
+
+ u1_Bs0 = (u4_bs >> 24) & 0xff;
+ u1_Bs1 = (u4_bs >> 16) & 0xff;
+ u1_Bs2 = (u4_bs >> 8) & 0xff;
+ u1_Bs3 = (u4_bs >> 0) & 0xff;
+ clip0 = pu1_cliptab[u1_Bs0];
+ clip1 = pu1_cliptab[u1_Bs1];
+ clip2 = pu1_cliptab[u1_Bs2];
+ clip3 = pu1_cliptab[u1_Bs3];
+
+ Alpha_8x16 = _mm_set1_epi16(alpha);
+ Beta_8x16 = _mm_set1_epi16(beta);
+
+ bs_flag_16x8b = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, u1_Bs3, u1_Bs3, u1_Bs2,
+ u1_Bs2, u1_Bs1, u1_Bs1, u1_Bs0, u1_Bs0);
+
+ C0_16x8 = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, clip3, clip3, clip2, clip2,
+ clip1, clip1, clip0, clip0);
+
+ bs_flag_16x8b = _mm_cmpeq_epi8(bs_flag_16x8b, zero);
+ bs_flag_16x8b = _mm_xor_si128(bs_flag_16x8b, _mm_set1_epi8(0xFF)); //Invert for required mask
+ C0_8x16 = _mm_unpacklo_epi8(C0_16x8, zero);
+
+ //Cond1 (ABS(p0 - q0) < alpha)
+ temp1 = _mm_subs_epu8(q0_16x8, p0_16x8);
+ temp2 = _mm_subs_epu8(p0_16x8, q0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp2 = _mm_cmpgt_epi16(Alpha_8x16, temp2);
+
+ flag1_16x8 = _mm_packs_epi16(temp2, zero);
+ flag1_16x8 = _mm_and_si128(flag1_16x8, bs_flag_16x8b);
+
+ //Cond2 (ABS(q1 - q0) < beta)
+ temp1 = _mm_subs_epu8(q0_16x8, q1_16x8);
+ temp2 = _mm_subs_epu8(q1_16x8, q0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
+
+ flag2_16x8 = _mm_packs_epi16(temp2, zero);
+ flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
+
+ //Cond3 (ABS(p1 - p0) < beta)
+ temp1 = _mm_subs_epu8(p0_16x8, p1_16x8);
+ temp2 = _mm_subs_epu8(p1_16x8, p0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
+
+ flag2_16x8 = _mm_packs_epi16(temp2, zero);
+
+ // !((ABS(p0 - q0) < alpha) || (ABS(q1 - q0) < beta) || (ABS(p1 - p0) < beta))
+ flag1_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
+
+ // (ABS(p2 - p0) < beta)
+ temp1 = _mm_subs_epu8(p0_16x8, p2_16x8);
+ temp2 = _mm_subs_epu8(p2_16x8, p0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
+
+ flag2_16x8 = _mm_packs_epi16(temp2, zero);
+ flag2_16x8 = _mm_and_si128(flag1_16x8, flag2_16x8);
+
+ temp2 = _mm_subs_epi16(zero, temp2);
+
+ C_8x16 = _mm_add_epi16(C0_8x16, temp2);
+
+ // (ABS(q2 - q0) < beta)
+ temp1 = _mm_subs_epu8(q0_16x8, q2_16x8);
+ temp2 = _mm_subs_epu8(q2_16x8, q0_16x8);
+ temp1 = _mm_add_epi8(temp1, temp2);
+
+ temp2 = _mm_unpacklo_epi8(temp1, zero);
+ temp2 = _mm_cmpgt_epi16(Beta_8x16, temp2);
+
+ flag3_16x8 = _mm_packs_epi16(temp2, zero);
+ flag3_16x8 = _mm_and_si128(flag1_16x8, flag3_16x8);
+
+ temp2 = _mm_subs_epi16(zero, temp2);
+
+ C_8x16 = _mm_add_epi16(C_8x16, temp2);
+
+ const_val4_8x16 = _mm_set1_epi16(4);
+ temp1 = _mm_subs_epi16(_mm_unpacklo_epi8(q0_16x8, zero),
+ _mm_unpacklo_epi8(p0_16x8, zero));
+ temp2 = _mm_subs_epi16(_mm_unpacklo_epi8(p1_16x8, zero),
+ _mm_unpacklo_epi8(q1_16x8, zero));
+ temp1 = _mm_slli_epi16(temp1, 2);
+ temp1 = _mm_add_epi16(temp1, temp2);
+ temp1 = _mm_add_epi16(temp1, const_val4_8x16);
+ in_macro_16x8 = _mm_srai_epi16(temp1, 3);
+
+ in_macro_16x8 = _mm_min_epi16(C_8x16, in_macro_16x8); //CLIP3
+ C_8x16 = _mm_subs_epi16(zero, C_8x16);
+ in_macro_16x8 = _mm_max_epi16(C_8x16, in_macro_16x8); //CLIP3
+
+ // p0
+ temp1 = _mm_add_epi16(_mm_unpacklo_epi8(p0_16x8, zero), in_macro_16x8);
+
+ temp1 = _mm_packus_epi16(temp1, zero);
+
+ p0_16x8_1 = _mm_and_si128(temp1, flag1_16x8);
+ p0_16x8_2 = _mm_and_si128(
+ p0_16x8, _mm_xor_si128(flag1_16x8, _mm_set1_epi16(0xFFFF)));
+
+ p0_16x8_1 = _mm_add_epi8(p0_16x8_1, p0_16x8_2);
+
+ // q0
+ temp1 = _mm_sub_epi16(_mm_unpacklo_epi8(q0_16x8, zero), in_macro_16x8);
+
+ temp1 = _mm_packus_epi16(temp1, zero);
+
+ q0_16x8_1 = _mm_and_si128(temp1, flag1_16x8);
+ q0_16x8_2 = _mm_and_si128(
+ q0_16x8, _mm_xor_si128(flag1_16x8, _mm_set1_epi16(0xFFFF)));
+
+ q0_16x8_1 = _mm_add_epi8(q0_16x8_1, q0_16x8_2);
+
+ //if(Ap < Beta)
+ temp1 = _mm_avg_epu16(_mm_unpacklo_epi8(q0_16x8, zero),
+ _mm_unpacklo_epi8(p0_16x8, zero));
+ temp2 = _mm_slli_epi16(_mm_unpacklo_epi8(p1_16x8, zero), 1);
+ //temp2 = _mm_subs_epi16(zero,temp2);
+ temp2 = _mm_subs_epi16(_mm_unpacklo_epi8(p2_16x8, zero), temp2);
+ temp2 = _mm_add_epi16(temp1, temp2);
+ in_macro_16x8 = _mm_srai_epi16(temp2, 1);
+
+ in_macro_16x8 = _mm_min_epi16(C0_8x16, in_macro_16x8); //CLIP3
+ C0_8x16 = _mm_subs_epi16(zero, C0_8x16);
+ in_macro_16x8 = _mm_max_epi16(C0_8x16, in_macro_16x8); //CLIP3
+
+ // p1
+ temp1 = _mm_add_epi16(_mm_unpacklo_epi8(p1_16x8, zero), in_macro_16x8);
+
+ temp1 = _mm_packus_epi16(temp1, zero);
+
+ p1_16x8_1 = _mm_and_si128(temp1, flag2_16x8);
+ p1_16x8 = _mm_and_si128(p1_16x8,
+ _mm_xor_si128(flag2_16x8, _mm_set1_epi16(0xFFFF)));
+ p1_16x8 = _mm_add_epi8(p1_16x8, p1_16x8_1);
+
+ //if(Aq < Beta)
+ temp1 = _mm_avg_epu16(_mm_unpacklo_epi8(q0_16x8, zero),
+ _mm_unpacklo_epi8(p0_16x8, zero));
+ temp2 = _mm_slli_epi16(_mm_unpacklo_epi8(q1_16x8, zero), 1);
+ //temp2 = _mm_slli_epi16 (temp2, 1);
+ temp2 = _mm_subs_epi16(_mm_unpacklo_epi8(q2_16x8, zero), temp2);
+ temp2 = _mm_add_epi16(temp1, temp2);
+ in_macro_16x8 = _mm_srai_epi16(temp2, 1);
+
+ in_macro_16x8 = _mm_max_epi16(C0_8x16, in_macro_16x8); //CLIP3
+ C0_8x16 = _mm_subs_epi16(zero, C0_8x16);
+ in_macro_16x8 = _mm_min_epi16(C0_8x16, in_macro_16x8); //CLIP3
+
+ temp1 = _mm_add_epi16(_mm_unpacklo_epi8(q1_16x8, zero), in_macro_16x8);
+
+ // q1
+ temp1 = _mm_packus_epi16(temp1, zero);
+
+ q1_16x8_1 = _mm_and_si128(temp1, flag3_16x8);
+ q1_16x8 = _mm_and_si128(q1_16x8,
+ _mm_xor_si128(flag3_16x8, _mm_set1_epi16(0xFFFF)));
+ q1_16x8 = _mm_add_epi8(q1_16x8, q1_16x8_1);
+
+ temp1 = _mm_unpacklo_epi8(p3_16x8, p2_16x8);
+ temp2 = _mm_unpacklo_epi8(p1_16x8, p0_16x8_1);
+ temp3 = _mm_unpacklo_epi8(q0_16x8_1, q1_16x8);
+ temp4 = _mm_unpacklo_epi8(q2_16x8, q3_16x8);
+
+ line7 = _mm_unpacklo_epi16(temp1, temp2);
+ temp1 = _mm_unpackhi_epi16(temp1, temp2);
+ line8 = _mm_unpacklo_epi16(temp3, temp4);
+ temp2 = _mm_unpackhi_epi16(temp3, temp4);
+
+ line1 = _mm_unpacklo_epi32(line7, line8);
+ line2 = _mm_srli_si128(line1, 8);
+ line3 = _mm_unpackhi_epi32(line7, line8);
+ line4 = _mm_srli_si128(line3, 8);
+ line5 = _mm_unpacklo_epi32(temp1, temp2);
+ line6 = _mm_srli_si128(line5, 8);
+ line7 = _mm_unpackhi_epi32(temp1, temp2);
+ line8 = _mm_srli_si128(line7, 8);
+
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 0 * src_strd), line1);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 1 * src_strd), line2);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 2 * src_strd), line3);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 3 * src_strd), line4);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 4 * src_strd), line5);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 5 * src_strd), line6);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 6 * src_strd), line7);
+ _mm_storel_epi64((__m128i *)(pu1_src - 4 + 7 * src_strd), line8);
+}
+
diff --git a/common/x86/ih264_ihadamard_scaling_sse42.c b/common/x86/ih264_ihadamard_scaling_sse42.c
new file mode 100755
index 0000000..895291b
--- /dev/null
+++ b/common/x86/ih264_ihadamard_scaling_sse42.c
@@ -0,0 +1,238 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_ihadamard_scaling_sse42.c
+ *
+ * @brief
+ * Contains definition of functions for h264 inverse hadamard 4x4 transform and scaling
+ *
+ * @author
+ * Mohit
+ *
+ * @par List of Functions:
+ * - ih264_ihadamard_scaling_4x4_sse42()
+ * - ih264_ihadamard_scaling_2x2_uv_ssse42()
+ *
+ * @remarks
+ *
+ *******************************************************************************
+ */
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_defs.h"
+#include "ih264_trans_macros.h"
+#include "ih264_macros.h"
+#include "ih264_trans_data.h"
+#include "ih264_size_defs.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include <immintrin.h>
+
+/*
+ ********************************************************************************
+ *
+ * @brief This function performs a 4x4 inverse hadamard transform on the 4x4 DC coefficients
+ * of a 16x16 intra prediction macroblock, and then performs scaling.
+ * prediction buffer
+ *
+ * @par Description:
+ * The DC coefficients pass through a 2-stage inverse hadamard transform.
+ * This inverse transformed content is scaled to based on Qp value.
+ *
+ * @param[in] pi2_src
+ * input 4x4 block of DC coefficients
+ *
+ * @param[out] pi2_out
+ * output 4x4 block
+ *
+ * @param[in] pu2_iscal_mat
+ * pointer to scaling list
+ *
+ * @param[in] pu2_weigh_mat
+ * pointer to weight matrix
+ *
+ * @param[in] u4_qp_div_6
+ * Floor (qp/6)
+ *
+ * @param[in] pi4_tmp
+ * temporary buffer of size 1*16
+ *
+ * @returns none
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+void ih264_ihadamard_scaling_4x4_sse42(WORD16* pi2_src, WORD16* pi2_out,
+ const UWORD16 *pu2_iscal_mat, const UWORD16 *pu2_weigh_mat,
+ UWORD32 u4_qp_div_6, WORD32* pi4_tmp) {
+ __m128i src_r0_r1, src_r2_r3;
+ __m128i src_r0, src_r1, src_r2, src_r3;
+ __m128i temp0, temp1, temp2, temp3;
+ __m128i add_rshift = _mm_set1_epi32((1 << (5 - u4_qp_div_6)));
+ __m128i mult_val = _mm_set1_epi32(pu2_iscal_mat[0] * pu2_weigh_mat[0]);
+
+ src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row
+ src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row
+ //sign_reg = _mm_cmpgt_epi16(zero_8x16b, src_r0_r1);
+ src_r0 = _mm_cvtepi16_epi32(src_r0_r1);
+ src_r0_r1 = _mm_srli_si128(src_r0_r1, 8);
+ src_r1 = _mm_cvtepi16_epi32(src_r0_r1);
+
+ src_r2 = _mm_cvtepi16_epi32(src_r2_r3);
+ src_r2_r3 = _mm_srli_si128(src_r2_r3, 8);
+ src_r3 = _mm_cvtepi16_epi32(src_r2_r3);
+
+ /* Perform Inverse transform */
+ /*-------------------------------------------------------------*/
+ /* IDCT [ Horizontal transformation ] */
+ /*-------------------------------------------------------------*/
+ // Matrix transpose
+ /*
+ * a0 a1 a2 a3
+ * b0 b1 b2 b3
+ * c0 c1 c2 c3
+ * d0 d1 d2 d3
+ */
+ temp0 = _mm_unpacklo_epi32(src_r0, src_r1); //a0 b0 a1 b1
+ temp2 = _mm_unpacklo_epi32(src_r2, src_r3); //c0 d0 c1 d1
+ temp1 = _mm_unpackhi_epi32(src_r0, src_r1); //a2 b2 a3 b3
+ temp3 = _mm_unpackhi_epi32(src_r2, src_r3); //c2 d2 c3 d3
+ src_r0 = _mm_unpacklo_epi64(temp0, temp2); //a0 b0 c0 d0
+ src_r1 = _mm_unpackhi_epi64(temp0, temp2); //a1 b1 c1 d1
+ src_r2 = _mm_unpacklo_epi64(temp1, temp3); //a2 b2 c2 d2
+ src_r3 = _mm_unpackhi_epi64(temp1, temp3); //a3 b3 c3 d3
+
+ temp0 = _mm_add_epi32(src_r0, src_r3);
+ temp1 = _mm_add_epi32(src_r1, src_r2);
+ temp2 = _mm_sub_epi32(src_r1, src_r2);
+ temp3 = _mm_sub_epi32(src_r0, src_r3);
+
+ src_r0 = _mm_add_epi32(temp0, temp1);
+ src_r1 = _mm_add_epi32(temp2, temp3);
+ src_r2 = _mm_sub_epi32(temp0, temp1);
+ src_r3 = _mm_sub_epi32(temp3, temp2);
+
+ /*-------------------------------------------------------------*/
+ /* IDCT [ Vertical transformation ] */
+ /*-------------------------------------------------------------*/
+ // Matrix transpose
+ /*
+ * a0 b0 c0 d0
+ * a1 b1 c1 d1
+ * a2 b2 c2 d2
+ * a3 b3 c3 d3
+ */
+ temp0 = _mm_unpacklo_epi32(src_r0, src_r1); //a0 a1 b0 b1
+ temp2 = _mm_unpacklo_epi32(src_r2, src_r3); //a2 a3 b2 b3
+ temp1 = _mm_unpackhi_epi32(src_r0, src_r1); //c0 c1 d0 d1
+ temp3 = _mm_unpackhi_epi32(src_r2, src_r3); //c2 c3 d2 d3
+ src_r0 = _mm_unpacklo_epi64(temp0, temp2); //a0 a1 a2 a3
+ src_r1 = _mm_unpackhi_epi64(temp0, temp2); //b0 b1 b2 b3
+ src_r2 = _mm_unpacklo_epi64(temp1, temp3); //c0 c1 c2 c3
+ src_r3 = _mm_unpackhi_epi64(temp1, temp3); //d0 d1 d2 d3
+
+ temp0 = _mm_add_epi32(src_r0, src_r3);
+ temp1 = _mm_add_epi32(src_r1, src_r2);
+ temp2 = _mm_sub_epi32(src_r1, src_r2);
+ temp3 = _mm_sub_epi32(src_r0, src_r3);
+
+ src_r0 = _mm_add_epi32(temp0, temp1);
+ src_r1 = _mm_add_epi32(temp2, temp3);
+ src_r2 = _mm_sub_epi32(temp0, temp1);
+ src_r3 = _mm_sub_epi32(temp3, temp2);
+
+ src_r0 = _mm_mullo_epi32(src_r0, mult_val);
+ src_r1 = _mm_mullo_epi32(src_r1, mult_val);
+ src_r2 = _mm_mullo_epi32(src_r2, mult_val);
+ src_r3 = _mm_mullo_epi32(src_r3, mult_val);
+
+ //Scaling
+ if (u4_qp_div_6 >= 6) {
+ src_r0 = _mm_slli_epi32(src_r0, u4_qp_div_6 - 6);
+ src_r1 = _mm_slli_epi32(src_r1, u4_qp_div_6 - 6);
+ src_r2 = _mm_slli_epi32(src_r2, u4_qp_div_6 - 6);
+ src_r3 = _mm_slli_epi32(src_r3, u4_qp_div_6 - 6);
+ } else {
+ temp0 = _mm_add_epi32(src_r0, add_rshift);
+ temp1 = _mm_add_epi32(src_r1, add_rshift);
+ temp2 = _mm_add_epi32(src_r2, add_rshift);
+ temp3 = _mm_add_epi32(src_r3, add_rshift);
+ src_r0 = _mm_srai_epi32(temp0, 6 - u4_qp_div_6);
+ src_r1 = _mm_srai_epi32(temp1, 6 - u4_qp_div_6);
+ src_r2 = _mm_srai_epi32(temp2, 6 - u4_qp_div_6);
+ src_r3 = _mm_srai_epi32(temp3, 6 - u4_qp_div_6);
+ }
+ src_r0_r1 = _mm_packs_epi32(src_r0, src_r1);
+ src_r2_r3 = _mm_packs_epi32(src_r2, src_r3);
+
+ _mm_storeu_si128((__m128i *) (&pi2_out[0]), src_r0_r1);
+ _mm_storeu_si128((__m128i *) (&pi2_out[8]), src_r2_r3);
+}
+
+void ih264_ihadamard_scaling_2x2_uv_sse42(WORD16* pi2_src,
+ WORD16* pi2_out,
+ const UWORD16 *pu2_iscal_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 u4_qp_div_6,
+ WORD32* pi4_tmp)
+{
+ UNUSED(pi4_tmp);
+ __m128i src, plane_0, plane_1, temp0, temp1, sign_reg;
+ __m128i zero_8x16b = _mm_setzero_si128();
+ __m128i scale_val = _mm_set1_epi32((WORD32)(pu2_iscal_mat[0] * pu2_weigh_mat[0]));
+ src = _mm_loadu_si128((__m128i *) pi2_src); //a0 a1 a2 a3 b0 b1 b2 b3
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, src);
+ plane_0 = _mm_unpacklo_epi16(src, sign_reg); //a0 a1 a2 a3 -- 32 bits
+ plane_1 = _mm_unpackhi_epi16(src, sign_reg); //b0 b1 b2 b3 -- 32 bits
+
+ temp0 = _mm_hadd_epi32(plane_0, plane_1); //a0+a1 a2+a3 b0+b1 b2+b3
+ temp1 = _mm_hsub_epi32(plane_0, plane_1); //a0-a1 a2-a3 b0-b1 b2-b3
+ plane_0 = _mm_hadd_epi32(temp0, temp1); //a0+a1+a2+a3 b0+b1+b2+b3 a0-a1+a2-a3 b0-b1+b2-b3
+ plane_1 = _mm_hsub_epi32(temp0, temp1); //a0+a1-a2-a3 b0+b1-b2-b3 a0-a1-a2+a3 b0-b1-b2+b3
+ temp0 = _mm_unpacklo_epi32(plane_0, plane_1); //a0+a1+a2+a3 a0+a1-a2-a3 b0+b1+b2+b3 b0+b1-b2-b3
+ temp1 = _mm_unpackhi_epi32(plane_0, plane_1); //a0-a1+a2-a3 a0-a1-a2+a3 b0-b1+b2-b3 b0-b1-b2+b3
+
+ plane_0 = _mm_unpacklo_epi64(temp0, temp1); //a0+a1+a2+a3 a0+a1-a2-a3 a0-a1+a2-a3 a0-a1-a2+a3
+ plane_1 = _mm_unpackhi_epi64(temp0, temp1); //b0+b1+b2+b3 b0+b1-b2-b3 b0-b1+b2-b3 b0-b1-b2+b3
+
+ plane_0 = _mm_shuffle_epi32(plane_0, 0xd8); //a0+a1+a2+a3 a0-a1+a2-a3 a0+a1-a2-a3 a0-a1-a2+a3
+ plane_1 = _mm_shuffle_epi32(plane_1, 0xd8); //b0+b1+b2+b3 b0-b1+b2-b3 b0+b1-b2-b3 b0-b1-b2+b3
+
+ temp0 = _mm_mullo_epi32(scale_val, plane_0); //multiply by pu2_iscal_mat[0] * pu2_weigh_mat[0]
+ temp1 = _mm_mullo_epi32(scale_val, plane_1); //multiply by pu2_iscal_mat[0] * pu2_weigh_mat[0]
+
+ temp0 = _mm_slli_epi32(temp0, u4_qp_div_6);
+ temp1 = _mm_slli_epi32(temp1, u4_qp_div_6);
+
+ temp0 = _mm_srai_epi32(temp0, 5);
+ temp1 = _mm_srai_epi32(temp1, 5);
+
+ temp0 = _mm_packs_epi32(temp0, temp1); //Final values are 16-bits only.
+
+ _mm_storeu_si128((__m128i *) (&pi2_out[0]), temp0);
+
+}
diff --git a/common/x86/ih264_ihadamard_scaling_ssse3.c b/common/x86/ih264_ihadamard_scaling_ssse3.c
new file mode 100755
index 0000000..232d9fa
--- /dev/null
+++ b/common/x86/ih264_ihadamard_scaling_ssse3.c
@@ -0,0 +1,200 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_ihadamard_scaling_ssse3.c
+ *
+ * @brief
+ * Contains definition of functions for h264 inverse hadamard 4x4 transform and scaling
+ *
+ * @author
+ * Mohit
+ *
+ * @par List of Functions:
+ * - ih264_ihadamard_scaling_4x4_ssse3()
+ *
+ * @remarks
+ *
+ *******************************************************************************
+ */
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_defs.h"
+#include "ih264_trans_macros.h"
+#include "ih264_macros.h"
+#include "ih264_trans_data.h"
+#include "ih264_size_defs.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include <immintrin.h>
+
+/*
+ ********************************************************************************
+ *
+ * @brief This function performs a 4x4 inverse hadamard transform on the 4x4 DC coefficients
+ * of a 16x16 intra prediction macroblock, and then performs scaling.
+ * prediction buffer
+ *
+ * @par Description:
+ * The DC coefficients pass through a 2-stage inverse hadamard transform.
+ * This inverse transformed content is scaled to based on Qp value.
+ *
+ * @param[in] pi2_src
+ * input 4x4 block of DC coefficients
+ *
+ * @param[out] pi2_out
+ * output 4x4 block
+ *
+ * @param[in] pu2_iscal_mat
+ * pointer to scaling list
+ *
+ * @param[in] pu2_weigh_mat
+ * pointer to weight matrix
+ *
+ * @param[in] u4_qp_div_6
+ * Floor (qp/6)
+ *
+ * @param[in] pi4_tmp
+ * temporary buffer of size 1*16
+ *
+ * @returns none
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+void ih264_ihadamard_scaling_4x4_ssse3(WORD16* pi2_src, WORD16* pi2_out,
+ const UWORD16 *pu2_iscal_mat, const UWORD16 *pu2_weigh_mat,
+ UWORD32 u4_qp_div_6, WORD32* pi4_tmp) {
+ int val = 0xFFFF;
+ __m128i src_r0_r1, src_r2_r3, sign_reg, zero_8x16b = _mm_setzero_si128();
+ __m128i src_r0, src_r1, src_r2, src_r3;
+ __m128i temp0, temp1, temp2, temp3;
+ __m128i add_rshift = _mm_set1_epi32((1 << (5 - u4_qp_div_6)));
+ __m128i mult_val = _mm_set1_epi32(pu2_iscal_mat[0] * pu2_weigh_mat[0]);
+
+ __m128i mask = _mm_set1_epi32(val);
+ mult_val = _mm_and_si128(mult_val, mask);
+
+ src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row
+ src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, src_r0_r1);
+ src_r0 = _mm_unpacklo_epi16(src_r0_r1, sign_reg);
+ src_r1 = _mm_unpackhi_epi16(src_r0_r1, sign_reg);
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, src_r2_r3);
+ src_r2 = _mm_unpacklo_epi16(src_r2_r3, sign_reg);
+ src_r3 = _mm_unpackhi_epi16(src_r2_r3, sign_reg);
+
+ /* Perform Inverse transform */
+ /*-------------------------------------------------------------*/
+ /* IDCT [ Horizontal transformation ] */
+ /*-------------------------------------------------------------*/
+ // Matrix transpose
+ /*
+ * a0 a1 a2 a3
+ * b0 b1 b2 b3
+ * c0 c1 c2 c3
+ * d0 d1 d2 d3
+ */
+ temp0 = _mm_unpacklo_epi32(src_r0, src_r1); //a0 b0 a1 b1
+ temp2 = _mm_unpacklo_epi32(src_r2, src_r3); //c0 d0 c1 d1
+ temp1 = _mm_unpackhi_epi32(src_r0, src_r1); //a2 b2 a3 b3
+ temp3 = _mm_unpackhi_epi32(src_r2, src_r3); //c2 d2 c3 d3
+ src_r0 = _mm_unpacklo_epi64(temp0, temp2); //a0 b0 c0 d0
+ src_r1 = _mm_unpackhi_epi64(temp0, temp2); //a1 b1 c1 d1
+ src_r2 = _mm_unpacklo_epi64(temp1, temp3); //a2 b2 c2 d2
+ src_r3 = _mm_unpackhi_epi64(temp1, temp3); //a3 b3 c3 d3
+
+ temp0 = _mm_add_epi32(src_r0, src_r3);
+ temp1 = _mm_add_epi32(src_r1, src_r2);
+ temp2 = _mm_sub_epi32(src_r1, src_r2);
+ temp3 = _mm_sub_epi32(src_r0, src_r3);
+
+ src_r0 = _mm_add_epi32(temp0, temp1);
+ src_r1 = _mm_add_epi32(temp2, temp3);
+ src_r2 = _mm_sub_epi32(temp0, temp1);
+ src_r3 = _mm_sub_epi32(temp3, temp2);
+
+ /*-------------------------------------------------------------*/
+ /* IDCT [ Vertical transformation ] */
+ /*-------------------------------------------------------------*/
+ // Matrix transpose
+ /*
+ * a0 b0 c0 d0
+ * a1 b1 c1 d1
+ * a2 b2 c2 d2
+ * a3 b3 c3 d3
+ */
+ temp0 = _mm_unpacklo_epi32(src_r0, src_r1); //a0 a1 b0 b1
+ temp2 = _mm_unpacklo_epi32(src_r2, src_r3); //a2 a3 b2 b3
+ temp1 = _mm_unpackhi_epi32(src_r0, src_r1); //c0 c1 d0 d1
+ temp3 = _mm_unpackhi_epi32(src_r2, src_r3); //c2 c3 d2 d3
+ src_r0 = _mm_unpacklo_epi64(temp0, temp2); //a0 a1 a2 a3
+ src_r1 = _mm_unpackhi_epi64(temp0, temp2); //b0 b1 b2 b3
+ src_r2 = _mm_unpacklo_epi64(temp1, temp3); //c0 c1 c2 c3
+ src_r3 = _mm_unpackhi_epi64(temp1, temp3); //d0 d1 d2 d3
+
+ temp0 = _mm_add_epi32(src_r0, src_r3);
+ temp1 = _mm_add_epi32(src_r1, src_r2);
+ temp2 = _mm_sub_epi32(src_r1, src_r2);
+ temp3 = _mm_sub_epi32(src_r0, src_r3);
+
+ src_r0 = _mm_add_epi32(temp0, temp1);
+ src_r1 = _mm_add_epi32(temp2, temp3);
+ src_r2 = _mm_sub_epi32(temp0, temp1);
+ src_r3 = _mm_sub_epi32(temp3, temp2);
+
+ src_r0 = _mm_and_si128(src_r0, mask);
+ src_r1 = _mm_and_si128(src_r1, mask);
+ src_r2 = _mm_and_si128(src_r2, mask);
+ src_r3 = _mm_and_si128(src_r3, mask);
+
+ src_r0 = _mm_madd_epi16(src_r0, mult_val);
+ src_r1 = _mm_madd_epi16(src_r1, mult_val);
+ src_r2 = _mm_madd_epi16(src_r2, mult_val);
+ src_r3 = _mm_madd_epi16(src_r3, mult_val);
+
+ //Scaling
+ if (u4_qp_div_6 >= 6) {
+ src_r0 = _mm_slli_epi32(src_r0, u4_qp_div_6 - 6);
+ src_r1 = _mm_slli_epi32(src_r1, u4_qp_div_6 - 6);
+ src_r2 = _mm_slli_epi32(src_r2, u4_qp_div_6 - 6);
+ src_r3 = _mm_slli_epi32(src_r3, u4_qp_div_6 - 6);
+ } else {
+ temp0 = _mm_add_epi32(src_r0, add_rshift);
+ temp1 = _mm_add_epi32(src_r1, add_rshift);
+ temp2 = _mm_add_epi32(src_r2, add_rshift);
+ temp3 = _mm_add_epi32(src_r3, add_rshift);
+ src_r0 = _mm_srai_epi32(temp0, 6 - u4_qp_div_6);
+ src_r1 = _mm_srai_epi32(temp1, 6 - u4_qp_div_6);
+ src_r2 = _mm_srai_epi32(temp2, 6 - u4_qp_div_6);
+ src_r3 = _mm_srai_epi32(temp3, 6 - u4_qp_div_6);
+ }
+ src_r0_r1 = _mm_packs_epi32(src_r0, src_r1);
+ src_r2_r3 = _mm_packs_epi32(src_r2, src_r3);
+
+ _mm_storeu_si128((__m128i *) (&pi2_out[0]), src_r0_r1);
+ _mm_storeu_si128((__m128i *) (&pi2_out[8]), src_r2_r3);
+}
diff --git a/common/x86/ih264_inter_pred_filters_ssse3.c b/common/x86/ih264_inter_pred_filters_ssse3.c
new file mode 100755
index 0000000..64e364e
--- /dev/null
+++ b/common/x86/ih264_inter_pred_filters_ssse3.c
@@ -0,0 +1,4375 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* */
+/* File Name : ih264_inter_pred_filters_intr_ssse3.c */
+/* */
+/* Description : Contains function definitions for weighted */
+/* prediction functions in x86 sse4 intrinsics */
+/* */
+/* List of Functions : ih264_inter_pred_luma_copy_ssse3() */
+/* ih264_inter_pred_luma_horz_ssse3() */
+/* ih264_inter_pred_luma_vert_ssse3() */
+/* ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3() */
+/* ih264_inter_pred_luma_horz_qpel_ssse3() */
+/* ih264_inter_pred_luma_vert_qpel_ssse3() */
+/* ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3() */
+/* ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3() */
+/* ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3() */
+/* ih264_inter_pred_chroma_ssse3() */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 13 02 2015 Kaushik Initial version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+#include <immintrin.h>
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_inter_pred_filters.h"
+
+/*****************************************************************************/
+/* Constant Data variables */
+/*****************************************************************************/
+
+/* coefficients for 6 tap filtering*/
+//const WORD32 ih264_g_six_tap[3] ={1,-5,20};
+/*****************************************************************************/
+/* Function definitions . */
+/*****************************************************************************/
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_inter_pred_luma_copy_ssse3 */
+/* */
+/* Description : This function copies the contents of ht x wd block from */
+/* source to destination. (ht,wd) can be (4,4), (8,4), */
+/* (4,8), (8,8), (16,8), (8,16) or (16,16). */
+/* */
+/* Inputs : puc_src - pointer to source */
+/* puc_dst - pointer to destination */
+/* src_strd - stride for source */
+/* dst_strd - stride for destination */
+/* ht - height of the block */
+/* wd - width of the block */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 13 02 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_inter_pred_luma_copy_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd,
+ UWORD8* pu1_tmp,
+ WORD32 dydx)
+{
+ __m128i y_0_16x8b, y_1_16x8b, y_2_16x8b, y_3_16x8b;
+
+ UNUSED(pu1_tmp);
+ UNUSED(dydx);
+
+ WORD32 src_strd2, src_strd3, src_strd4, dst_strd2, dst_strd3, dst_strd4;
+
+ src_strd2 = src_strd << 1;
+ dst_strd2 = dst_strd << 1;
+ src_strd4 = src_strd << 2;
+ dst_strd4 = dst_strd << 2;
+ src_strd3 = src_strd2 + src_strd;
+ dst_strd3 = dst_strd2 + dst_strd;
+
+ if(wd == 4)
+ {
+ __m128i mask_full_128b, mask_low_32b;
+
+ mask_full_128b = _mm_set1_epi8(0xff);
+ mask_low_32b = _mm_srli_si128(mask_full_128b, 12);
+ // mask for first four bytes
+
+ do
+ {
+ y_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ y_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
+ y_2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd2));
+ y_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd3));
+
+ _mm_maskmoveu_si128(y_0_16x8b, mask_low_32b, (char*)pu1_dst);
+ _mm_maskmoveu_si128(y_1_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
+ _mm_maskmoveu_si128(y_2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
+ _mm_maskmoveu_si128(y_3_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+
+ ht -= 4;
+ pu1_src += src_strd4;
+ pu1_dst += dst_strd4;
+ }
+ while(ht > 0);
+ }
+ else if(wd == 8)
+ {
+ do
+ {
+ y_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ y_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
+ y_2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd2));
+ y_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd3));
+
+ _mm_storel_epi64((__m128i *)pu1_dst, y_0_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), y_1_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd2), y_2_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd3), y_3_16x8b);
+
+ ht -= 4;
+ pu1_src += src_strd4;
+ pu1_dst += dst_strd4;
+ }
+ while(ht > 0);
+ }
+ else // wd == 16
+ {
+ WORD32 src_strd5, src_strd6, src_strd7, src_strd8;
+ WORD32 dst_strd5, dst_strd6, dst_strd7, dst_strd8;
+
+ __m128i y_4_16x8b, y_5_16x8b, y_6_16x8b, y_7_16x8b;
+
+ src_strd5 = src_strd2 + src_strd3;
+ dst_strd5 = dst_strd2 + dst_strd3;
+ src_strd6 = src_strd3 << 1;
+ dst_strd6 = dst_strd3 << 1;
+ src_strd7 = src_strd3 + src_strd4;
+ dst_strd7 = dst_strd3 + dst_strd4;
+ src_strd8 = src_strd << 3;
+ dst_strd8 = dst_strd << 3;
+
+ do
+ {
+ y_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ y_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));
+ y_2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd2));
+ y_3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd3));
+ y_4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd4));
+ y_5_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd5));
+ y_6_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd6));
+ y_7_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd7));
+
+ _mm_storeu_si128((__m128i *)pu1_dst, y_0_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), y_1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), y_2_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), y_3_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd4), y_4_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd5), y_5_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd6), y_6_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd7), y_7_16x8b);
+
+ ht -= 8;
+ pu1_src += src_strd8;
+ pu1_dst += dst_strd8;
+ }
+ while(ht > 0);
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_inter_pred_luma_horz_ssse3 */
+/* */
+/* Description : This function applies a horizontal 6-tap filter on */
+/* ht x wd block as mentioned in sec. 8.4.2.2.1 titled */
+/* "Luma sample interpolation process". (ht,wd) can be */
+/* (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16). */
+/* */
+/* Inputs : puc_src - pointer to source */
+/* puc_dst - pointer to destination */
+/* src_strd - stride for source */
+/* dst_strd - stride for destination */
+/* ht - height of the block */
+/* wd - width of the block */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 13 02 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_inter_pred_luma_horz_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd,
+ UWORD8* pu1_tmp,
+ WORD32 dydx)
+{
+ __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+ __m128i const_val16_8x16b;
+
+ UNUSED(pu1_tmp);
+ UNUSED(dydx);
+
+ pu1_src -= 2; // the filter input starts from x[-2] (till x[3])
+
+ coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
+ coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
+ coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5
+ //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
+ const_val16_8x16b = _mm_set1_epi16(16);
+
+ if(wd == 4)
+ {
+ __m128i src_r0_16x8b, src_r1_16x8b, src_r0r1_16x8b;
+ __m128i src_r0_sht_16x8b, src_r1_sht_16x8b;
+
+ __m128i res_r0r1_t1_8x16b, res_r0r1_t2_8x16b, res_r0r1_t3_8x16b;
+ __m128i res_r0r1_16x8b;
+
+ __m128i mask_full_16x8b, mask_low32b;
+
+ mask_full_16x8b = _mm_set1_epi8(0xff);
+ mask_low32b = _mm_srli_si128(mask_full_16x8b, 12); // mask for first four bytes
+
+ //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
+ //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
+
+ do
+ {
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
+
+ src_r0_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
+ src_r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
+
+ src_r0r1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 b0 b1 b1 b2 b2 b3 b3 b4
+ res_r0r1_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
+ //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 0 0 0 0
+
+ src_r0r1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 b2 b3 b3 b4 b4 b5 b5 b6
+ res_r0r1_t2_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
+ //b2*c2+b3*c3 b3*c2+b4*c3 b4*c2+b5*c3 b5*c2+b6*c3
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4); //a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4); //b4 b5 b5 b6 b6 b7 b7 b8 0 0 0 0 0 0 0 0
+
+ src_r0r1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 b4 b5 b5 b6 b6 b7 b7 b8
+ res_r0r1_t3_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
+ //b4*c4+b5*c5 b5*c4+b6*c5 b4*c6+b7*c5 b7*c4+b8*c5
+
+ res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t2_8x16b);
+ res_r0r1_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0r1_t3_8x16b);
+ res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t3_8x16b); //a0*c0+a1*c1+a2*c2+a3*c3+a4*a4+a5*c5 + 16;
+ //a1*c0+a2*c1+a2*c2+a3*c3+a5*a4+a6*c5 + 16;
+ //a2*c0+a3*c1+a4*c2+a5*c3+a6*a4+a7*c5 + 16;
+ //a3*c0+a4*c1+a5*c2+a6*c3+a6*a4+a8*c5 + 16;
+ //b0*c0+b1*c1+b2*c2+b3*c3+b4*b4+b5*c5 + 16;
+ //b1*c0+b2*c1+b2*c2+b3*c3+b5*b4+b6*c5 + 16;
+ //b2*c0+b3*c1+b4*c2+b5*c3+b6*b4+b7*c5 + 16;
+ //b3*c0+b4*c1+b5*c2+b6*c3+b6*b4+b8*c5 + 16;
+
+ res_r0r1_t1_8x16b = _mm_srai_epi16(res_r0r1_t1_8x16b, 5); //shifting right by 5 bits.
+
+ res_r0r1_16x8b = _mm_packus_epi16(res_r0r1_t1_8x16b, res_r0r1_t1_8x16b);
+
+ _mm_maskmoveu_si128(res_r0r1_16x8b, mask_low32b, (char*)pu1_dst);
+ res_r0r1_16x8b = _mm_srli_si128(res_r0r1_16x8b, 4);
+ _mm_maskmoveu_si128(res_r0r1_16x8b, mask_low32b, (char*)(pu1_dst + dst_strd));
+
+ ht -= 2;
+ pu1_src += src_strd << 1;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+ else if(wd == 8)
+ {
+ __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b;
+ __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
+
+ __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
+ __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
+
+ //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
+ //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
+
+ do
+ {
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
+
+ res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
+ //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
+ res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
+ //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
+
+ res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
+ //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
+ res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
+ //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
+
+ res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
+ //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
+ res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
+ //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
+ res_r0_t3_8x16b = _mm_add_epi16(res_r0_t3_8x16b, const_val16_8x16b);
+ res_r1_t3_8x16b = _mm_add_epi16(res_r1_t3_8x16b, const_val16_8x16b);
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
+
+ res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5); //shifting right by 5 bits.
+ res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5);
+
+ src_r0_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r0_t1_8x16b);
+ src_r1_16x8b = _mm_packus_epi16(res_r1_t1_8x16b, res_r1_t1_8x16b);
+
+ _mm_storel_epi64((__m128i *)pu1_dst, src_r0_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), src_r1_16x8b);
+
+ ht -= 2;
+ pu1_src += src_strd << 1;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+ else // wd == 16
+ {
+ __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b;
+ __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
+
+ __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
+ __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
+
+ //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
+ //Row0 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
+ //b0 is same a8. Similarly other bn pixels are same as a(n+8) pixels.
+
+ do
+ {
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
+
+ res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
+ //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
+ res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
+ //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
+
+ res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
+ //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
+ res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
+ //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
+
+ res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
+ //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
+ res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
+ //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
+ res_r0_t3_8x16b = _mm_add_epi16(res_r0_t3_8x16b, const_val16_8x16b);
+ res_r1_t3_8x16b = _mm_add_epi16(res_r1_t3_8x16b, const_val16_8x16b);
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
+
+ res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5); //shifting right by 5 bits.
+ res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5);
+
+ src_r0_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r1_t1_8x16b);
+ _mm_storeu_si128((__m128i *)pu1_dst, src_r0_16x8b);
+
+ ht--;
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+ }
+ while(ht > 0);
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_inter_pred_luma_vert_ssse3 */
+/* */
+/* Description : This function applies a vertical 6-tap filter on */
+/* ht x wd block as mentioned in sec. 8.4.2.2.1 titled */
+/* "Luma sample interpolation process". (ht,wd) can be */
+/* (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16). */
+/* */
+/* Inputs : puc_src - pointer to source */
+/* puc_dst - pointer to destination */
+/* src_strd - stride for source */
+/* dst_strd - stride for destination */
+/* ht - height of the block */
+/* wd - width of the block */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 13 02 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_inter_pred_luma_vert_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd,
+ UWORD8* pu1_tmp,
+ WORD32 dydx)
+{
+ __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b, src_r4_16x8b;
+ __m128i src_r5_16x8b, src_r6_16x8b;
+ __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b;
+
+ __m128i res_16x8b, res_t1_8x16b, res_t2_8x16b, res_t3_8x16b;
+
+ __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+ __m128i const_val16_8x16b;
+
+ UNUSED(pu1_tmp);
+ UNUSED(dydx);
+
+ coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
+ coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
+ coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5
+ //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
+ const_val16_8x16b = _mm_set1_epi16(16);
+
+ pu1_src -= src_strd << 1; // the filter input starts from x[-2] (till x[3])
+
+ if(wd == 4)
+ {
+ __m128i mask_low32b;
+
+ mask_low32b = _mm_set1_epi8(0xff);
+
+ //Epilogue: Load all the pred rows except sixth and seventh row
+ // for the first and second row processing.
+ src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r2_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r3_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r4_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ pu1_src += src_strd;
+
+ mask_low32b = _mm_srli_si128(mask_low32b, 12); // mask for first four bytes
+
+ src_r0_16x8b = _mm_unpacklo_epi32(src_r0_16x8b, src_r1_16x8b);
+ src_r1_16x8b = _mm_unpacklo_epi32(src_r1_16x8b, src_r2_16x8b);
+ src_r2_16x8b = _mm_unpacklo_epi32(src_r2_16x8b, src_r3_16x8b);
+ src_r3_16x8b = _mm_unpacklo_epi32(src_r3_16x8b, src_r4_16x8b);
+
+ do
+ {
+ src_r5_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
+
+ src_r4_16x8b = _mm_unpacklo_epi32(src_r4_16x8b, src_r5_16x8b);
+ src_r5_16x8b = _mm_unpacklo_epi32(src_r5_16x8b, src_r6_16x8b);
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
+
+ res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+ res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b);
+
+ _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)pu1_dst);
+ res_16x8b = _mm_srli_si128(res_16x8b, 4);
+ _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)(pu1_dst + dst_strd));
+
+ src_r0_16x8b = src_r2_16x8b;
+ src_r1_16x8b = src_r3_16x8b;
+ src_r2_16x8b = src_r4_16x8b;
+ src_r3_16x8b = src_r5_16x8b;
+ src_r4_16x8b = src_r6_16x8b;
+
+ ht -= 2;
+ pu1_src += src_strd << 1;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+
+ else if(wd == 8)
+ {
+ //Epilogue: Load all the pred rows except sixth and seventh row
+ // for the first and second row processing.
+ src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r2_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r3_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r4_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ pu1_src += src_strd;
+
+ src_r0_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);
+ src_r1_16x8b = _mm_unpacklo_epi64(src_r1_16x8b, src_r2_16x8b);
+ src_r2_16x8b = _mm_unpacklo_epi64(src_r2_16x8b, src_r3_16x8b);
+ src_r3_16x8b = _mm_unpacklo_epi64(src_r3_16x8b, src_r4_16x8b);
+
+ do
+ {
+ src_r5_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
+
+ src_r4_16x8b = _mm_unpacklo_epi64(src_r4_16x8b, src_r5_16x8b);
+ src_r5_16x8b = _mm_unpacklo_epi64(src_r5_16x8b, src_r6_16x8b);
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
+
+ res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+ res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b);
+
+ _mm_storel_epi64((__m128i *)pu1_dst, res_16x8b);
+
+ src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
+
+ res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+ res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b);
+
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b);
+
+ src_r0_16x8b = src_r2_16x8b;
+ src_r1_16x8b = src_r3_16x8b;
+ src_r2_16x8b = src_r4_16x8b;
+ src_r3_16x8b = src_r5_16x8b;
+ src_r4_16x8b = src_r6_16x8b;
+
+ ht -= 2;
+ pu1_src += src_strd << 1;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+ else // wd == 16
+ {
+ __m128i res_t0_8x16b;
+
+ //Epilogue: Load all the pred rows except sixth and seventh row
+ // for the first and second row processing.
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r2_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r3_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r4_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+
+ do
+ {
+ src_r5_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src_r6_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
+ res_t0_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+
+ src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
+ res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+
+ res_16x8b = _mm_packus_epi16(res_t0_8x16b, res_t1_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b);
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r2_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src_r3_16x8b, src_r4_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src_r5_16x8b, src_r6_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
+ res_t0_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+
+ src_r0r1_16x8b = _mm_unpackhi_epi8(src_r1_16x8b, src_r2_16x8b);
+ src_r2r3_16x8b = _mm_unpackhi_epi8(src_r3_16x8b, src_r4_16x8b);
+ src_r4r5_16x8b = _mm_unpackhi_epi8(src_r5_16x8b, src_r6_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(res_t3_8x16b, const_val16_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
+ res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+
+ res_16x8b = _mm_packus_epi16(res_t0_8x16b, res_t1_8x16b);
+
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res_16x8b);
+
+ src_r0_16x8b = src_r2_16x8b;
+ src_r1_16x8b = src_r3_16x8b;
+ src_r2_16x8b = src_r4_16x8b;
+ src_r3_16x8b = src_r5_16x8b;
+ src_r4_16x8b = src_r6_16x8b;
+
+ ht -= 2;
+ pu1_src += src_strd << 1;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3 */
+/* */
+/* Description : This function implements a two stage cascaded six tap */
+/* filter, horizontally and then vertically on ht x wd */
+/* block as mentioned in sec. 8.4.2.2.1 titled "Luma sample */
+/* interpolation process". (ht,wd) can be (4,4), (8,4), */
+/* (4,8), (8,8), (16,8), (8,16) or (16,16). */
+/* */
+/* Inputs : puc_src - pointer to source */
+/* puc_dst - pointer to destination */
+/* src_strd - stride for source */
+/* dst_strd - stride for destination */
+/* ht - height of the block */
+/* wd - width of the block */
+/* pu1_tmp - pointer to temporary buffer */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 13 02 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd,
+ UWORD8* pu1_tmp,
+ WORD32 dydx)
+{
+ UNUSED(dydx);
+
+ if(wd == 4)
+ {
+ WORD16 *pi2_temp;
+
+ pu1_tmp += 4;
+ pu1_src -= src_strd << 1;
+ pi2_temp = (WORD16 *)pu1_tmp;
+ pu1_src -= 2; // the filter input starts from x[-2] (till x[3])
+
+ // Horizontal 6-tap filtering
+ {
+ WORD32 ht_tmp = ht + 4;
+
+ __m128i src_r0_16x8b, src_r1_16x8b;
+ __m128i src_r0_sht_16x8b, src_r1_sht_16x8b;
+ __m128i src_r0r1_t1_16x8b;
+ __m128i res_r0r1_t1_8x16b, res_r0r1_t2_8x16b, res_r0r1_t3_8x16b;
+ __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+
+ coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
+ coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
+ coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
+ //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
+ //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
+ //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
+
+ do
+ {
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
+
+ src_r0_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
+ src_r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
+
+ src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 b0 b1 b1 b2 b2 b3 b3 b4
+ res_r0r1_t1_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
+ //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 0 0 0 0
+
+ src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 b2 b3 b3 b4 b4 b5 b5 b6
+ res_r0r1_t2_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
+ //b2*c2+b3*c3 b3*c2+b4*c3 b4*c2+b5*c3 b5*c2+b6*c3
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4); //a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4); //b4 b5 b5 b6 b6 b7 b7 b8 0 0 0 0 0 0 0 0
+
+ src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 b4 b5 b5 b6 b6 b7 b7 b8
+ res_r0r1_t3_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
+ //b4*c4+b5*c5 b5*c4+b6*c5 b4*c6+b7*c5 b7*c4+b8*c5
+ res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t2_8x16b);
+ res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t3_8x16b, res_r0r1_t1_8x16b);
+
+ _mm_storeu_si128((__m128i *)pi2_temp, res_r0r1_t1_8x16b);
+
+ ht_tmp -= 2;
+ pu1_src += src_strd << 1;
+ pi2_temp += 8;
+ }
+ while(ht_tmp > 0);
+
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
+
+ src_r0_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
+ res_r0r1_t1_8x16b = _mm_maddubs_epi16(src_r0_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b,4); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0
+ res_r0r1_t2_8x16b = _mm_maddubs_epi16(src_r0_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b,4); //a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0 0 0 0 0
+ res_r0r1_t3_8x16b = _mm_maddubs_epi16(src_r0_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
+
+ res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t2_8x16b);
+ res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t3_8x16b, res_r0r1_t1_8x16b);
+
+ _mm_storel_epi64((__m128i *)pi2_temp, res_r0r1_t1_8x16b);
+ }
+
+ pi2_temp = (WORD16 *)pu1_tmp;
+
+ // Vertical 6-tap filtering
+ {
+ __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b,
+ src_r4_8x16b;
+ __m128i src_r5_8x16b, src_r6_8x16b;
+ __m128i src_t1_8x16b, src_t2_8x16b;
+
+ __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
+ __m128i res_8x16b, res_16x8b;
+
+ __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
+ __m128i const_val512_4x32b, mask_low32b;
+
+ mask_low32b = _mm_set1_epi8(0xff);
+
+ coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
+ coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
+ coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB);
+
+ mask_low32b = _mm_srli_si128(mask_low32b, 12);
+ const_val512_4x32b = _mm_set1_epi32(512);
+
+ src_r0_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp));
+ src_r1_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp + 4));
+ src_r2_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp + 8));
+ src_r3_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp + 12));
+ src_r4_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp + 16));
+ pi2_temp += 20;
+
+ do
+ {
+ src_r5_8x16b = _mm_loadl_epi64((__m128i *)pi2_temp);
+ src_r6_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp + 4));
+
+ src_r0_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_t1_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_t2_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_t1_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_t2_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+
+ res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ src_r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b);
+ src_t1_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b);
+ src_t2_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_t1_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_t2_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+
+ res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
+ res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
+
+ _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)pu1_dst);
+ res_16x8b = _mm_srli_si128(res_16x8b, 4);
+ _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)(pu1_dst + dst_strd));
+
+ src_r0_8x16b = src_r2_8x16b;
+ src_r1_8x16b = src_r3_8x16b;
+ src_r2_8x16b = src_r4_8x16b;
+ src_r3_8x16b = src_r5_8x16b;
+ src_r4_8x16b = src_r6_8x16b;
+
+ ht -= 2;
+ pi2_temp += 8;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+ }
+ else if(wd == 8)
+ {
+ WORD16 *pi2_temp;
+
+ pu1_tmp += 4;
+ pu1_src -= src_strd << 1;
+ pi2_temp = (WORD16 *)pu1_tmp;
+ pu1_src -= 2; // the filter input starts from x[-2] (till x[3])
+
+ // Horizontal 6-tap filtering
+ {
+ WORD32 ht_tmp = ht + 4;
+
+ __m128i src_r0_16x8b, src_r1_16x8b;
+ __m128i src_r0_sht_16x8b, src_r1_sht_16x8b;
+ __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
+ __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
+ __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
+ __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+
+ coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
+ coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
+ coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
+ //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
+ //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
+ //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
+
+ do
+ {
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 b10 b11 b12 b13 b14 b15
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
+
+ res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
+ //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
+ res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
+ //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
+
+ res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
+ //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
+ res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
+ //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
+
+ res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
+ //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
+ res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
+ //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
+
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
+
+ _mm_storeu_si128((__m128i *)pi2_temp, res_r0_t1_8x16b);
+ _mm_storeu_si128((__m128i *)(pi2_temp + 8), res_r1_t1_8x16b);
+
+ ht_tmp -= 2;
+ pu1_src += src_strd << 1;
+ pi2_temp += 16;
+ }
+ while(ht_tmp > 0);
+
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b,src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
+ res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b,coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
+ //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
+ res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
+ //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
+ res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
+ //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
+
+ _mm_storeu_si128((__m128i *)pi2_temp, res_r0_t1_8x16b);
+ }
+
+ pi2_temp = (WORD16 *)pu1_tmp;
+
+ // Vertical 6-tap filtering
+ {
+ __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b,
+ src_r4_8x16b;
+ __m128i src_r5_8x16b, src_r6_8x16b;
+ __m128i src_r0r1_8x16b, src_r2r3_8x16b, src_r4r5_8x16b;
+
+ __m128i res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
+ __m128i res_c0_4x32b, res_c1_4x32b;
+ __m128i res_8x16b, res_16x8b;
+
+ __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
+ __m128i const_val512_4x32b;
+
+ coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
+ coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
+ coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB);
+
+ const_val512_4x32b = _mm_set1_epi32(512);
+
+ src_r0_8x16b = _mm_loadu_si128((__m128i *)pi2_temp);
+ src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 8));
+ src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 16));
+ src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 24));
+ src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 32));
+ pi2_temp += 40;
+
+ do
+ {
+ src_r5_8x16b = _mm_loadu_si128((__m128i *)pi2_temp);
+ src_r6_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 8));
+
+ src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_c0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_c1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_8x16b = _mm_packs_epi32(res_c0_4x32b, res_c1_4x32b);
+ res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
+
+ _mm_storel_epi64((__m128i *)pu1_dst, res_16x8b);
+
+ src_r0r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b);
+ src_r2r3_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b);
+ src_r4r5_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_c0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ src_r0r1_8x16b = _mm_unpackhi_epi16(src_r1_8x16b, src_r2_8x16b);
+ src_r2r3_8x16b = _mm_unpackhi_epi16(src_r3_8x16b, src_r4_8x16b);
+ src_r4r5_8x16b = _mm_unpackhi_epi16(src_r5_8x16b, src_r6_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_c1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_8x16b = _mm_packs_epi32(res_c0_4x32b, res_c1_4x32b);
+ res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
+
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b);
+
+ src_r0_8x16b = src_r2_8x16b;
+ src_r1_8x16b = src_r3_8x16b;
+ src_r2_8x16b = src_r4_8x16b;
+ src_r3_8x16b = src_r5_8x16b;
+ src_r4_8x16b = src_r6_8x16b;
+
+ ht -= 2;
+ pi2_temp += 16;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+ }
+ else // wd == 16
+ {
+ WORD16 *pi2_temp;
+ WORD32 ht_tmp;
+
+ pu1_tmp += 4;
+ pu1_src -= src_strd << 1;
+ pi2_temp = (WORD16 *)pu1_tmp;
+ pu1_src -= 2; // the filter input starts from x[-2] (till x[3])
+
+ // Horizontal 6-tap filtering
+ {
+ ht_tmp = ht + 5;
+
+ __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b;
+ __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
+
+ __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
+ __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
+
+ __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+
+ coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
+ coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
+ coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
+ //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
+ //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
+ //Row0 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
+ //b0 is same a8. Similarly other bn pixels are same as a(n+8) pixels.
+
+ do
+ {
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
+
+ res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
+ //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
+ res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
+ //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
+
+ res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
+ //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
+ res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
+ //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
+
+ res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
+ //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
+ res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
+ //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
+
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
+
+ _mm_storeu_si128((__m128i *)pi2_temp, res_r0_t1_8x16b);
+ _mm_storeu_si128((__m128i *)(pi2_temp + 8), res_r1_t1_8x16b);
+
+ ht_tmp--;
+ pu1_src += src_strd;
+ pi2_temp += 16;
+ }
+ while(ht_tmp > 0);
+ }
+
+ pi2_temp = (WORD16 *)pu1_tmp;
+
+ // Vertical 6-tap filtering
+ {
+ WORD16 *pi2_temp2;
+ UWORD8 *pu1_dst2;
+ WORD32 ht_tmp;
+
+ __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b, src_r4_8x16b;
+ __m128i src_r5_8x16b, src_r6_8x16b;
+ __m128i src_r0r1_8x16b, src_r2r3_8x16b, src_r4r5_8x16b;
+
+ __m128i res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
+ __m128i res_c0_4x32b, res_c1_4x32b;
+ __m128i res_8x16b, res_16x8b;
+
+ __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
+ __m128i const_val512_4x32b;
+
+ coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
+ coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
+ coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB);
+
+ const_val512_4x32b = _mm_set1_epi32(512);
+
+ pi2_temp2 = pi2_temp + 8;
+ pu1_dst2 = pu1_dst + 8;
+ ht_tmp = ht;
+
+ /**********************************************************/
+ /* Do first height x 8 block */
+ /**********************************************************/
+ src_r0_8x16b = _mm_loadu_si128((__m128i *)pi2_temp);
+ src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 16));
+ src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 32));
+ src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 48));
+ src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 64));
+ pi2_temp += 80;
+
+ do
+ {
+ src_r5_8x16b = _mm_loadu_si128((__m128i *)pi2_temp);
+ src_r6_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp + 16));
+
+ src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_c0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_c1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_8x16b = _mm_packs_epi32(res_c0_4x32b, res_c1_4x32b);
+ res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
+
+ _mm_storel_epi64((__m128i *)pu1_dst, res_16x8b);
+
+ src_r0r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b);
+ src_r2r3_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b);
+ src_r4r5_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_c0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ src_r0r1_8x16b = _mm_unpackhi_epi16(src_r1_8x16b, src_r2_8x16b);
+ src_r2r3_8x16b = _mm_unpackhi_epi16(src_r3_8x16b, src_r4_8x16b);
+ src_r4r5_8x16b = _mm_unpackhi_epi16(src_r5_8x16b, src_r6_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_c1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_8x16b = _mm_packs_epi32(res_c0_4x32b, res_c1_4x32b);
+ res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
+
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b);
+
+ src_r0_8x16b = src_r2_8x16b;
+ src_r1_8x16b = src_r3_8x16b;
+ src_r2_8x16b = src_r4_8x16b;
+ src_r3_8x16b = src_r5_8x16b;
+ src_r4_8x16b = src_r6_8x16b;
+
+ ht_tmp -= 2;
+ pi2_temp += 32;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht_tmp > 0);
+
+ /**********************************************************/
+ /* Do second ht x 8 block */
+ /**********************************************************/
+ src_r0_8x16b = _mm_loadu_si128((__m128i *)pi2_temp2);
+ src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 16));
+ src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 32));
+ src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 48));
+ src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 64));
+ pi2_temp2 += 80;
+
+ do
+ {
+ src_r5_8x16b = _mm_loadu_si128((__m128i *)pi2_temp2);
+ src_r6_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 16));
+
+ src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_c0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_c1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_8x16b = _mm_packs_epi32(res_c0_4x32b, res_c1_4x32b);
+ res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
+
+ _mm_storel_epi64((__m128i *)pu1_dst2, res_16x8b);
+
+ src_r0r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b);
+ src_r2r3_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b);
+ src_r4r5_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_c0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ src_r0r1_8x16b = _mm_unpackhi_epi16(src_r1_8x16b, src_r2_8x16b);
+ src_r2r3_8x16b = _mm_unpackhi_epi16(src_r3_8x16b, src_r4_8x16b);
+ src_r4r5_8x16b = _mm_unpackhi_epi16(src_r5_8x16b, src_r6_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_c1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_8x16b = _mm_packs_epi32(res_c0_4x32b, res_c1_4x32b);
+ res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
+
+ _mm_storel_epi64((__m128i *)(pu1_dst2 + dst_strd), res_16x8b);
+
+ src_r0_8x16b = src_r2_8x16b;
+ src_r1_8x16b = src_r3_8x16b;
+ src_r2_8x16b = src_r4_8x16b;
+ src_r3_8x16b = src_r5_8x16b;
+ src_r4_8x16b = src_r6_8x16b;
+
+ ht -= 2;
+ pi2_temp2 += 32;
+ pu1_dst2 += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_inter_pred_luma_horz_qpel_ssse3 */
+/* */
+/* Description : This function implements a six-tap filter horizontally */
+/* on ht x wd block and averages the values with the source */
+/* pixels to calculate horizontal quarter-pel as mentioned */
+/* in sec. 8.4.2.2.1 titled "Luma sample interpolation */
+/* process". (ht,wd) can be (4,4), (8,4), (4,8), (8,8), */
+/* (16,8), (8,16) or (16,16). */
+/* */
+/* Inputs : puc_src - pointer to source */
+/* puc_dst - pointer to destination */
+/* src_strd - stride for source */
+/* dst_strd - stride for destination */
+/* ht - height of the block */
+/* wd - width of the block */
+/* pu1_tmp - pointer to temporary buffer */
+/* dydx - x and y reference offset for q-pel */
+/* calculations */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 13 02 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_inter_pred_luma_horz_qpel_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd,
+ UWORD8* pu1_tmp,
+ WORD32 dydx)
+{
+ WORD32 x_offset;
+ UWORD8 *pu1_pred1;
+
+ __m128i src_r0_16x8b, src_r1_16x8b;
+ __m128i src_r0_sht_16x8b, src_r1_sht_16x8b;
+ __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+ __m128i const_val16_8x16b;
+
+ UNUSED(pu1_tmp);
+
+ x_offset = dydx & 3;
+
+ coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
+ coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
+ coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5
+ //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
+ pu1_pred1 = pu1_src + (x_offset >> 1);
+
+ const_val16_8x16b = _mm_set1_epi16(16);
+
+ pu1_src -= 2; // the filter input starts from x[-2] (till x[3])
+
+ if(wd == 4)
+ {
+ __m128i src_r0r1_16x8b;
+
+ __m128i res_r0r1_t1_8x16b, res_r0r1_t2_8x16b, res_r0r1_t3_8x16b;
+ __m128i res_r0r1_16x8b;
+
+ __m128i mask_full_16x8b, mask_low32b;
+
+ mask_full_16x8b = _mm_set1_epi8(0xff);
+ mask_low32b = _mm_srli_si128(mask_full_16x8b, 12); // mask for first four bytes
+
+ //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
+ //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
+
+ do
+ {
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
+
+ src_r0_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
+ src_r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
+
+ src_r0r1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 b0 b1 b1 b2 b2 b3 b3 b4
+ res_r0r1_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
+ //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 0 0 0 0
+
+ src_r0r1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 b2 b3 b3 b4 b4 b5 b5 b6
+ res_r0r1_t2_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
+ //b2*c2+b3*c3 b3*c2+b4*c3 b4*c2+b5*c3 b5*c2+b6*c3
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4); //a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4); //b4 b5 b5 b6 b6 b7 b7 b8 0 0 0 0 0 0 0 0
+
+ src_r0r1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 b4 b5 b5 b6 b6 b7 b7 b8
+ res_r0r1_t3_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
+ //b4*c4+b5*c5 b5*c4+b6*c5 b4*c6+b7*c5 b7*c4+b8*c5
+ src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_pred1);
+ src_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred1 + src_strd));
+
+ res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t2_8x16b);
+ res_r0r1_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0r1_t3_8x16b);
+ res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t3_8x16b); //a0*c0+a1*c1+a2*c2+a3*c3+a4*a4+a5*c5 + 16;
+ //a1*c0+a2*c1+a2*c2+a3*c3+a5*a4+a6*c5 + 16;
+ //a2*c0+a3*c1+a4*c2+a5*c3+a6*a4+a7*c5 + 16;
+ //a3*c0+a4*c1+a5*c2+a6*c3+a6*a4+a8*c5 + 16;
+ //b0*c0+b1*c1+b2*c2+b3*c3+b4*b4+b5*c5 + 16;
+ //b1*c0+b2*c1+b2*c2+b3*c3+b5*b4+b6*c5 + 16;
+ //b2*c0+b3*c1+b4*c2+b5*c3+b6*b4+b7*c5 + 16;
+ //b3*c0+b4*c1+b5*c2+b6*c3+b6*b4+b8*c5 + 16;
+ src_r0r1_16x8b = _mm_unpacklo_epi32(src_r0_16x8b,src_r1_16x8b);
+
+ res_r0r1_t1_8x16b = _mm_srai_epi16(res_r0r1_t1_8x16b, 5); //shifting right by 5 bits.
+
+ res_r0r1_16x8b = _mm_packus_epi16(res_r0r1_t1_8x16b, res_r0r1_t1_8x16b);
+ res_r0r1_16x8b = _mm_avg_epu8(src_r0r1_16x8b, res_r0r1_16x8b); //computing q-pel
+
+ _mm_maskmoveu_si128(res_r0r1_16x8b, mask_low32b, (char*)pu1_dst);
+ res_r0r1_16x8b = _mm_srli_si128(res_r0r1_16x8b, 4);
+ _mm_maskmoveu_si128(res_r0r1_16x8b, mask_low32b, (char*)(pu1_dst + dst_strd));
+
+ ht -= 2;
+ pu1_src += src_strd << 1;
+ pu1_pred1 += src_strd << 1;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+ else if(wd == 8)
+ {
+ __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
+
+ __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
+ __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
+ __m128i res_r0_16x8b, res_r1_16x8b;
+
+ //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
+ //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
+
+ do
+ {
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
+
+ res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
+ //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
+ res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
+ //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
+
+ res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
+ //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
+ res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
+ //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
+
+ res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
+ //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
+ res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
+ //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
+ src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_pred1);
+ src_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred1 + src_strd));
+
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
+ res_r0_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0_t3_8x16b);
+ res_r1_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r1_t3_8x16b);
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
+
+ res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5);
+ res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5); //shifting right by 5 bits.
+
+ res_r0_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r0_t1_8x16b);
+ res_r1_16x8b = _mm_packus_epi16(res_r1_t1_8x16b, res_r1_t1_8x16b);
+
+ res_r0_16x8b = _mm_avg_epu8(src_r0_16x8b, res_r0_16x8b);
+ res_r1_16x8b = _mm_avg_epu8(src_r1_16x8b, res_r1_16x8b); //computing q-pel
+
+ _mm_storel_epi64((__m128i *)pu1_dst, res_r0_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_r1_16x8b);
+
+ ht -= 2;
+ pu1_src += src_strd << 1;
+ pu1_pred1 += src_strd << 1;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+ else // wd == 16
+ {
+ __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
+
+ __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
+ __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
+ __m128i res_16x8b;
+
+ //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
+ //Row0 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
+ //b0 is same a8. Similarly other bn pixels are same as a(n+8) pixels.
+
+ do
+ {
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
+
+ res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
+ //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
+ res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
+ //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
+
+ res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
+ //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
+ res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
+ //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
+
+ res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
+ //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
+ res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
+ //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_pred1);
+
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
+ res_r0_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0_t3_8x16b);
+ res_r1_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r1_t3_8x16b);
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
+
+ res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5);
+ res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5); //shifting right by 5 bits
+
+ res_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r1_t1_8x16b);
+ res_16x8b = _mm_avg_epu8(src_r0_16x8b, res_16x8b); //computing q-pel
+
+ _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b);
+
+ ht--;
+ pu1_src += src_strd;
+ pu1_pred1 += src_strd;
+ pu1_dst += dst_strd;
+ }
+ while(ht > 0);
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_inter_pred_luma_vert_qpel_ssse3 */
+/* */
+/* Description : This function implements a six-tap filter vertically on */
+/* ht x wd block and averages the values with the source */
+/* pixels to calculate vertical quarter-pel as mentioned in */
+/* sec. 8.4.2.2.1 titled "Luma sample interpolation */
+/* process". (ht,wd) can be (4,4), (8,4), (4,8), (8,8), */
+/* (16,8), (8,16) or (16,16). */
+/* */
+/* Inputs : puc_src - pointer to source */
+/* puc_dst - pointer to destination */
+/* src_strd - stride for source */
+/* dst_strd - stride for destination */
+/* ht - height of the block */
+/* wd - width of the block */
+/* pu1_tmp - pointer to temporary buffer */
+/* dydx - x and y reference offset for q-pel */
+/* calculations */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 13 02 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_inter_pred_luma_vert_qpel_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd,
+ UWORD8* pu1_tmp,
+ WORD32 dydx)
+{
+ WORD32 y_offset;
+ UWORD8 *pu1_pred1;
+
+ UNUSED(pu1_tmp);
+
+ __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b, src_r4_16x8b;
+ __m128i src_r5_16x8b, src_r6_16x8b;
+ __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b;
+ __m128i res_16x8b, res_t1_8x16b, res_t2_8x16b, res_t3_8x16b;
+
+ __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+ __m128i const_val16_8x16b;
+
+ y_offset = dydx & 0xf;
+
+ coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
+ coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
+ coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5
+ //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
+
+ pu1_pred1 = pu1_src + (y_offset >> 3) * src_strd;
+
+ const_val16_8x16b = _mm_set1_epi16(16);
+
+ pu1_src -= src_strd << 1; // the filter input starts from x[-2] (till x[3])
+
+ if(wd == 4)
+ {
+ __m128i mask_low32b;
+
+ mask_low32b = _mm_set1_epi8(0xff);
+
+ //Epilogue: Load all the pred rows except sixth and seventh row
+ // for the first and second row processing.
+ src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r2_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r3_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r4_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ pu1_src += src_strd;
+
+ mask_low32b = _mm_srli_si128(mask_low32b, 12); // mask for first four bytes
+
+ src_r0_16x8b = _mm_unpacklo_epi32(src_r0_16x8b, src_r1_16x8b);
+ src_r1_16x8b = _mm_unpacklo_epi32(src_r1_16x8b, src_r2_16x8b);
+ src_r2_16x8b = _mm_unpacklo_epi32(src_r2_16x8b, src_r3_16x8b);
+ src_r3_16x8b = _mm_unpacklo_epi32(src_r3_16x8b, src_r4_16x8b);
+
+ do
+ {
+ src_r5_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
+
+ src_r4_16x8b = _mm_unpacklo_epi32(src_r4_16x8b, src_r5_16x8b);
+ src_r5_16x8b = _mm_unpacklo_epi32(src_r5_16x8b, src_r6_16x8b);
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_pred1);
+ src_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred1 + src_strd));
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
+
+ src_r0r1_16x8b = _mm_unpacklo_epi32(src_r0_16x8b,src_r1_16x8b);
+
+ res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+
+ res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b);
+
+ res_16x8b = _mm_avg_epu8(src_r0r1_16x8b, res_16x8b); //computing q-pel
+
+ _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)pu1_dst);
+ res_16x8b = _mm_srli_si128(res_16x8b, 4);
+ _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)(pu1_dst + dst_strd));
+
+ src_r0_16x8b = src_r2_16x8b;
+ src_r1_16x8b = src_r3_16x8b;
+ src_r2_16x8b = src_r4_16x8b;
+ src_r3_16x8b = src_r5_16x8b;
+ src_r4_16x8b = src_r6_16x8b;
+
+ ht -= 2;
+ pu1_src += src_strd << 1;
+ pu1_pred1 += src_strd << 1;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+
+ else if(wd == 8)
+ {
+ //Epilogue: Load all the pred rows except sixth and seventh row
+ // for the first and second row processing.
+ src_r0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r2_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r3_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r4_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ pu1_src += src_strd;
+
+ src_r0_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);
+ src_r1_16x8b = _mm_unpacklo_epi64(src_r1_16x8b, src_r2_16x8b);
+ src_r2_16x8b = _mm_unpacklo_epi64(src_r2_16x8b, src_r3_16x8b);
+ src_r3_16x8b = _mm_unpacklo_epi64(src_r3_16x8b, src_r4_16x8b);
+
+ do
+ {
+ src_r5_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
+
+ src_r4_16x8b = _mm_unpacklo_epi64(src_r4_16x8b, src_r5_16x8b);
+ src_r5_16x8b = _mm_unpacklo_epi64(src_r5_16x8b, src_r6_16x8b);
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ src_r0r1_16x8b = _mm_loadl_epi64((__m128i *)pu1_pred1);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
+
+ res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+
+ res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b);
+ res_16x8b = _mm_avg_epu8(src_r0r1_16x8b, res_16x8b); //computing q-pel
+
+ _mm_storel_epi64((__m128i *)pu1_dst, res_16x8b);
+
+ src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ src_r0r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred1 + src_strd));
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
+
+ res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+
+ res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b);
+ res_16x8b = _mm_avg_epu8(src_r0r1_16x8b, res_16x8b); //computing q-pel
+
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b);
+
+ src_r0_16x8b = src_r2_16x8b;
+ src_r1_16x8b = src_r3_16x8b;
+ src_r2_16x8b = src_r4_16x8b;
+ src_r3_16x8b = src_r5_16x8b;
+ src_r4_16x8b = src_r6_16x8b;
+
+ ht -= 2;
+ pu1_src += src_strd << 1;
+ pu1_pred1 += src_strd << 1;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+ else // wd == 16
+ {
+ __m128i res_t0_8x16b;
+
+ //Epilogue: Load all the pred rows except sixth and seventh row
+ // for the first and second row processing.
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r2_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r3_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src_r4_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+
+ do
+ {
+ src_r5_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src_r6_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
+
+ res_t0_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+
+ src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ src_r0r1_16x8b = _mm_loadu_si128((__m128i *)pu1_pred1);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
+
+ res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+
+ res_16x8b = _mm_packus_epi16(res_t0_8x16b, res_t1_8x16b);
+ res_16x8b = _mm_avg_epu8(src_r0r1_16x8b, res_16x8b); //computing q-pel
+
+ _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b);
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r2_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src_r3_16x8b, src_r4_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src_r5_16x8b, src_r6_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
+
+ res_t0_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+
+ src_r0r1_16x8b = _mm_unpackhi_epi8(src_r1_16x8b, src_r2_16x8b);
+ src_r2r3_16x8b = _mm_unpackhi_epi8(src_r3_16x8b, src_r4_16x8b);
+ src_r4r5_16x8b = _mm_unpackhi_epi8(src_r5_16x8b, src_r6_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ src_r0r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred1 + src_strd));
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
+
+ res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+
+ res_16x8b = _mm_packus_epi16(res_t0_8x16b, res_t1_8x16b);
+ res_16x8b = _mm_avg_epu8(src_r0r1_16x8b, res_16x8b); //computing q-pel
+
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res_16x8b);
+
+ src_r0_16x8b = src_r2_16x8b;
+ src_r1_16x8b = src_r3_16x8b;
+ src_r2_16x8b = src_r4_16x8b;
+ src_r3_16x8b = src_r5_16x8b;
+ src_r4_16x8b = src_r6_16x8b;
+
+ ht -= 2;
+ pu1_src += src_strd << 1;
+ pu1_pred1 += src_strd << 1;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3 */
+/* */
+/* Description : This function implements a six-tap filter vertically and */
+/* horizontally on ht x wd block separately and averages */
+/* the two sets of values to calculate values at (1/4,1/4), */
+/* (1/4, 3/4), (3/4, 1/4) or (3/4, 3/4) as mentioned in */
+/* sec. 8.4.2.2.1 titled "Luma sample interpolation */
+/* process". (ht,wd) can be (4,4), (8,4), (4,8), (8,8), */
+/* (16,8), (8,16) or (16,16). */
+/* */
+/* Inputs : puc_src - pointer to source */
+/* puc_dst - pointer to destination */
+/* src_strd - stride for source */
+/* dst_strd - stride for destination */
+/* ht - height of the block */
+/* wd - width of the block */
+/* pu1_tmp - pointer to temporary buffer */
+/* dydx - x and y reference offset for q-pel */
+/* calculations */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 13 02 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd,
+ UWORD8* pu1_tmp,
+ WORD32 dydx)
+{
+ WORD32 ht_temp;
+ UWORD8 *pu1_pred_vert,*pu1_pred_horiz;
+ UWORD8 *pu1_tmp1, *pu1_tmp2;
+ WORD32 x_offset, y_offset;
+
+ pu1_tmp1 = pu1_tmp;
+
+ dydx &= 0xf;
+ ht_temp = ht;
+ x_offset = dydx & 0x3;
+ y_offset = dydx >> 2;
+ pu1_tmp2 = pu1_tmp1;
+
+ pu1_pred_vert = pu1_src + (x_offset >> 1) - 2*src_strd;
+ pu1_pred_horiz = pu1_src + (y_offset >> 1) * src_strd - 2;
+ //the filter input starts from x[-2] (till x[3])
+
+ __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+ __m128i const_val16_8x16b;
+
+ coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
+ coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
+ coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
+ //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
+ const_val16_8x16b = _mm_set1_epi16(16);
+
+ if(wd == 4)
+ {
+ //vertical q-pel filter
+ {
+ __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b, src_r4_16x8b;
+ __m128i src_r5_16x8b, src_r6_16x8b;
+ __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b;
+
+ __m128i res_r0r1_16x8b, res_t1_8x16b, res_t2_8x16b, res_t3_8x16b;
+
+ //epilogue: Load all the pred rows except sixth and seventh row for the
+ //first and second row processing.
+ src_r0_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
+ pu1_pred_vert = pu1_pred_vert + src_strd;
+
+ src_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
+ pu1_pred_vert = pu1_pred_vert + src_strd;
+ src_r0_16x8b = _mm_unpacklo_epi32(src_r0_16x8b, src_r1_16x8b);
+
+ src_r2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
+ pu1_pred_vert = pu1_pred_vert + src_strd;
+ src_r1_16x8b = _mm_unpacklo_epi32(src_r1_16x8b, src_r2_16x8b);
+
+ src_r3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
+ pu1_pred_vert = pu1_pred_vert + src_strd;
+ src_r2_16x8b = _mm_unpacklo_epi32(src_r2_16x8b, src_r3_16x8b);
+
+ src_r4_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
+ pu1_pred_vert = pu1_pred_vert + src_strd;
+ src_r3_16x8b = _mm_unpacklo_epi32(src_r3_16x8b, src_r4_16x8b);
+
+ //Core Loop: Process all the rows.
+ do
+ {
+ src_r5_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
+ src_r4_16x8b = _mm_unpacklo_epi32(src_r4_16x8b, src_r5_16x8b);
+
+ src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert + src_strd));
+ src_r5_16x8b = _mm_unpacklo_epi32(src_r5_16x8b, src_r6_16x8b);
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+ res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+ res_r0r1_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b);
+
+ _mm_storel_epi64((__m128i *)pu1_tmp1, res_r0r1_16x8b);
+
+ src_r0_16x8b = src_r2_16x8b;
+ src_r1_16x8b = src_r3_16x8b;
+ src_r2_16x8b = src_r4_16x8b;
+ src_r3_16x8b = src_r5_16x8b;
+ src_r4_16x8b = src_r6_16x8b;
+
+ ht_temp -= 2;
+ pu1_pred_vert += src_strd << 1;
+ pu1_tmp1 += 8;
+ }
+ while(ht_temp > 0);
+ }
+
+ //horizontal q-pel filter
+ {
+ __m128i src_r0_16x8b, src_r1_16x8b;
+ __m128i src_r0_sht_16x8b, src_r1_sht_16x8b;
+ __m128i src_r0r1_vpel_16x8b, src_r0r1_t1_16x8b;
+
+ __m128i res_r0r1_t1_8x16b, res_r0r1_t2_8x16b, res_r0r1_t3_8x16b;
+ __m128i res_r0r1_16x8b;
+
+ __m128i mask_low32b;
+
+ mask_low32b = _mm_set1_epi8(0xff);
+ mask_low32b = _mm_srli_si128(mask_low32b, 12);
+
+ //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
+ //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
+
+ do
+ {
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_pred_horiz); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_horiz + src_strd)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
+
+ src_r0r1_vpel_16x8b = _mm_loadl_epi64((__m128i *)pu1_tmp2);
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
+
+ src_r0_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
+ src_r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
+
+ src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 b0 b1 b1 b2 b2 b3 b3 b4
+ res_r0r1_t1_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
+ //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 0 0 0 0
+
+ src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 b2 b3 b3 b4 b4 b5 b5 b6
+ res_r0r1_t2_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
+ //b2*c2+b3*c3 b3*c2+b4*c3 b4*c2+b5*c3 b5*c2+b6*c3
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4); //a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4); //b4 b5 b5 b6 b6 b7 b7 b8 0 0 0 0 0 0 0 0
+
+ src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 b4 b5 b5 b6 b6 b7 b7 b8
+ res_r0r1_t3_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
+ //b4*c4+b5*c5 b5*c4+b6*c5 b4*c6+b7*c5 b7*c4+b8*c5
+
+ res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t2_8x16b);
+ res_r0r1_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0r1_t3_8x16b);
+ res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t3_8x16b); //a0*c0+a1*c1+a2*c2+a3*c3+a4*a4+a5*c5 + 15;
+ //a1*c0+a2*c1+a2*c2+a3*c3+a5*a4+a6*c5 + 15;
+ //a2*c0+a3*c1+a4*c2+a5*c3+a6*a4+a7*c5 + 15;
+ //a3*c0+a4*c1+a5*c2+a6*c3+a6*a4+a8*c5 + 15;
+ //b0*c0+b1*c1+b2*c2+b3*c3+b4*b4+b5*c5 + 15;
+ //b1*c0+b2*c1+b2*c2+b3*c3+b5*b4+b6*c5 + 15;
+ //b2*c0+b3*c1+b4*c2+b5*c3+b6*b4+b7*c5 + 15;
+ //b3*c0+b4*c1+b5*c2+b6*c3+b6*b4+b8*c5 + 15;
+
+ res_r0r1_t1_8x16b = _mm_srai_epi16(res_r0r1_t1_8x16b, 5); //shifting right by 5 bits.
+
+ res_r0r1_16x8b = _mm_packus_epi16(res_r0r1_t1_8x16b,res_r0r1_t1_8x16b);
+
+ res_r0r1_16x8b = _mm_avg_epu8(res_r0r1_16x8b,src_r0r1_vpel_16x8b);
+
+ _mm_maskmoveu_si128(res_r0r1_16x8b, mask_low32b, (char*)pu1_dst);
+ res_r0r1_16x8b = _mm_srli_si128(res_r0r1_16x8b, 4);
+ _mm_maskmoveu_si128(res_r0r1_16x8b, mask_low32b, (char*)(pu1_dst + dst_strd));
+
+ ht -= 2;
+ pu1_pred_horiz += src_strd << 1;
+ pu1_tmp2 += 8;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+ }
+ else if(wd == 8)
+ {
+ //vertical q-pel filter
+ {
+ __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b;
+ __m128i src_r4_16x8b, src_r5_16x8b, src_r6_16x8b;
+ __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b;
+
+ __m128i res_16x8b, res_t1_8x16b, res_t2_8x16b, res_t3_8x16b;
+
+ //epilogue: Load all the pred rows except sixth and seventh row for the
+ //first and second row processing.
+ src_r0_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
+ pu1_pred_vert = pu1_pred_vert + src_strd;
+
+ src_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
+ pu1_pred_vert = pu1_pred_vert + src_strd;
+ src_r0_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b);
+
+ src_r2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
+ pu1_pred_vert = pu1_pred_vert + src_strd;
+ src_r1_16x8b = _mm_unpacklo_epi64(src_r1_16x8b, src_r2_16x8b);
+
+ src_r3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
+ pu1_pred_vert = pu1_pred_vert + src_strd;
+ src_r2_16x8b = _mm_unpacklo_epi64(src_r2_16x8b, src_r3_16x8b);
+
+ src_r4_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
+ pu1_pred_vert = pu1_pred_vert + src_strd;
+ src_r3_16x8b = _mm_unpacklo_epi64(src_r3_16x8b, src_r4_16x8b);
+
+ //Core Loop: Process all the rows.
+ do
+ {
+ src_r5_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert));
+ src_r4_16x8b = _mm_unpacklo_epi64(src_r4_16x8b, src_r5_16x8b);
+
+ src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_pred_vert + src_strd));
+ src_r5_16x8b = _mm_unpacklo_epi64(src_r5_16x8b, src_r6_16x8b);
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+ res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+ res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b);
+
+ _mm_storel_epi64((__m128i *)(pu1_tmp1), res_16x8b);
+
+ src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+ res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+ res_16x8b = _mm_packus_epi16(res_t1_8x16b, res_t1_8x16b);
+
+ _mm_storel_epi64((__m128i *)(pu1_tmp1 + 8), res_16x8b);
+
+ src_r0_16x8b = src_r2_16x8b;
+ src_r1_16x8b = src_r3_16x8b;
+ src_r2_16x8b = src_r4_16x8b;
+ src_r3_16x8b = src_r5_16x8b;
+ src_r4_16x8b = src_r6_16x8b;
+
+ ht_temp -= 2;
+ pu1_pred_vert += src_strd << 1;
+ pu1_tmp1 += 16;
+ }
+ while(ht_temp > 0);
+ }
+
+ //horizontal q-pel filter
+ {
+ __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b;
+ __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
+ __m128i src_r0_vpel_16x8b, src_r1_vpel_16x8b;
+
+ __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
+ __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b, res_16x8b;
+
+ //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
+ //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
+
+ do
+ {
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_horiz)); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_horiz + src_strd)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
+
+ src_r0_vpel_16x8b = _mm_loadl_epi64((__m128i *)(pu1_tmp2)); //a2 a3 a4 a5 a6 a7 a8....a15 0 or
+ //a3 a4 a5 a6 a7 a8 a9....a15 0
+ src_r1_vpel_16x8b = _mm_loadl_epi64((__m128i *)(pu1_tmp2 + 8));
+ //b2 b3 b4 b5 b6 b7 b8....b15 0 or
+ //b3 b4 b5 b6 b7 b8 b9....b15 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
+
+ res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
+ //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
+ res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
+ //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
+
+ res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
+ //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
+ res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
+ //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
+
+ res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
+ //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
+ res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
+ //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
+ res_r0_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0_t3_8x16b);
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
+ res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5); //shifting right by 5 bits.
+
+ res_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r0_t1_8x16b);
+ res_16x8b = _mm_avg_epu8(res_16x8b, src_r0_vpel_16x8b);
+
+ _mm_storel_epi64((__m128i *)(pu1_dst), res_16x8b);
+
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
+ res_r1_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r1_t3_8x16b);
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
+ res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5); //shifting right by 5 bits.
+
+ res_16x8b = _mm_packus_epi16(res_r1_t1_8x16b, res_r1_t1_8x16b);
+ res_16x8b = _mm_avg_epu8(res_16x8b,src_r1_vpel_16x8b);
+
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b);
+
+ ht -= 2;
+ pu1_pred_horiz += src_strd << 1;
+ pu1_dst += dst_strd << 1;
+ pu1_tmp2 += 16;
+ }
+ while(ht > 0);
+ }
+ }
+ else // wd == 16
+ {
+ //vertical q-pel filter
+ {
+ __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b;
+ __m128i src_r4_16x8b, src_r5_16x8b, src_r6_16x8b;
+ __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b;
+
+ __m128i res_t0_8x16b, res_t1_8x16b, res_t2_8x16b, res_t3_8x16b;
+ __m128i res_16x8b;
+
+ //epilogue: Load all the pred rows except sixth and seventh row for the
+ //first and second row processing.
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert));
+ pu1_pred_vert = pu1_pred_vert + src_strd;
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert));
+ pu1_pred_vert = pu1_pred_vert + src_strd;
+ src_r2_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert));
+ pu1_pred_vert = pu1_pred_vert + src_strd;
+ src_r3_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert));
+ pu1_pred_vert = pu1_pred_vert + src_strd;
+ src_r4_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert));
+ pu1_pred_vert = pu1_pred_vert + src_strd;
+
+ //Core Loop: Process all the rows.
+ do
+ {
+ src_r5_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert));
+ src_r6_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_vert + src_strd));
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
+ res_t0_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+
+ src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
+ res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+
+ res_16x8b = _mm_packus_epi16(res_t0_8x16b, res_t1_8x16b);
+
+ _mm_storeu_si128((__m128i *)(pu1_tmp1), res_16x8b);
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r2_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src_r3_16x8b, src_r4_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src_r5_16x8b, src_r6_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
+ res_t0_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+
+ src_r0r1_16x8b = _mm_unpackhi_epi8(src_r1_16x8b, src_r2_16x8b);
+ src_r2r3_16x8b = _mm_unpackhi_epi8(src_r3_16x8b, src_r4_16x8b);
+ src_r4r5_16x8b = _mm_unpackhi_epi8(src_r5_16x8b, src_r6_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_t3_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t3_8x16b);
+ res_t1_8x16b = _mm_srai_epi16(res_t1_8x16b, 5); //shifting right by 5 bits.
+
+ res_16x8b = _mm_packus_epi16(res_t0_8x16b, res_t1_8x16b);
+
+ _mm_storeu_si128((__m128i *)(pu1_tmp1 + 16), res_16x8b);
+
+ src_r0_16x8b = src_r2_16x8b;
+ src_r1_16x8b = src_r3_16x8b;
+ src_r2_16x8b = src_r4_16x8b;
+ src_r3_16x8b = src_r5_16x8b;
+ src_r4_16x8b = src_r6_16x8b;
+
+ ht_temp -= 2;
+ pu1_pred_vert += src_strd << 1;
+ pu1_tmp1 += 32;
+ }
+ while(ht_temp > 0);
+ }
+ //horizontal q-pel filter
+ {
+ __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b;
+ __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
+ __m128i src_vpel_16x8b;
+
+ __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
+ __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
+ __m128i res_16x8b;
+
+ //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
+ //Row0 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
+ //b0 is same a8. Similarly other bn pixels are same as a(n+8) pixels.
+
+ do
+ {
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_horiz)); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_pred_horiz + 8)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
+ src_vpel_16x8b = _mm_loadu_si128((__m128i *)(pu1_tmp2));
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
+
+ res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
+ //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
+ res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
+ //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
+
+ res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
+ //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
+ res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
+ //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
+
+ res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
+ //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
+ res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
+ //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
+ res_r0_t3_8x16b = _mm_add_epi16(const_val16_8x16b, res_r0_t3_8x16b);
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
+ res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5); //shifting right by 5 bits.
+
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, const_val16_8x16b);
+ res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5); //shifting right by 5 bits.
+
+ res_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r1_t1_8x16b);
+
+ res_16x8b = _mm_avg_epu8(res_16x8b, src_vpel_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst), res_16x8b);
+
+ ht --;
+ pu1_pred_horiz += src_strd;
+ pu1_dst += dst_strd;
+ pu1_tmp2 += 16;
+ }
+ while(ht > 0);
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3 */
+/* */
+/* Description : This function implements a six-tap filter vertically and */
+/* horizontally on ht x wd block separately and averages */
+/* the two sets of values to calculate values at (1/4,1/2), */
+/* or (3/4, 1/2) as mentioned in sec. 8.4.2.2.1 titled */
+/* "Luma sample interpolation process". (ht,wd) can be */
+/* (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16). */
+/* */
+/* Inputs : puc_src - pointer to source */
+/* puc_dst - pointer to destination */
+/* src_strd - stride for source */
+/* dst_strd - stride for destination */
+/* ht - height of the block */
+/* wd - width of the block */
+/* pu1_tmp - pointer to temporary buffer */
+/* dydx - x and y reference offset for q-pel */
+/* calculations */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 13 02 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd,
+ UWORD8* pu1_tmp,
+ WORD32 dydx)
+{
+ WORD32 ht_temp;
+ WORD32 x_offset;
+ WORD32 off0,off1, off2, off3, off4, off5;
+ WORD16 *pi2_temp1,*pi2_temp2,*pi2_temp3;
+
+ ht_temp = ht;
+ x_offset = dydx & 0x3;
+ pi2_temp1 = (WORD16 *)pu1_tmp;
+ pi2_temp2 = pi2_temp1;
+ pi2_temp3 = pi2_temp1 + (x_offset >> 1);
+
+ pu1_src -= 2 * src_strd;
+ pu1_src -= 2;
+ pi2_temp3 += 2;
+ //the filter input starts from x[-2] (till x[3])
+
+ if(wd == 4)
+ {
+ //vertical half-pel
+ {
+ __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b, src_r4_16x8b;
+ __m128i src_r5_16x8b, src_r6_16x8b;
+ __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b;
+
+ __m128i res_t1_8x16b, res_t2_8x16b, res_t3_8x16b;
+
+ __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+
+ coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
+ coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
+ coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
+ //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
+ off0 = -((src_strd << 2) + src_strd) + 8;
+ off1 = -(src_strd << 2) + 8;
+ off2 = -((src_strd << 1) + src_strd) + 8;
+ off3 = -(src_strd << 1) + 8;
+ off4 = -src_strd + 8;
+ off5 = 8;
+
+ //epilogue: Load all the pred rows except sixth and seventh row for the
+ //first and second row processing.
+ src_r0_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src));
+ pu1_src = pu1_src + src_strd;
+
+ src_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src));
+ pu1_src = pu1_src + src_strd;
+
+ src_r2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src));
+ pu1_src = pu1_src + src_strd;
+
+ src_r3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src));
+ pu1_src = pu1_src + src_strd;
+
+ src_r4_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src));
+ pu1_src = pu1_src + src_strd;
+
+ //Core Loop: Process all the rows.
+ do
+ {
+ src_r5_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src));
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t2_8x16b, res_t1_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+ _mm_storeu_si128((__m128i *)(pi2_temp1), res_t1_8x16b);
+
+ pi2_temp1[8] = pu1_src[off0] + pu1_src[off5]
+ - (pu1_src[off1] + pu1_src[off4])
+ + ((pu1_src[off2] + pu1_src[off3] - pu1_src[off1] - pu1_src[off4]) << 2)
+ + ((pu1_src[off2] + pu1_src[off3]) << 4);
+
+ pu1_src = pu1_src + src_strd;
+ pi2_temp1 = pi2_temp1 + 9;
+
+ src_r6_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src));
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r2_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src_r3_16x8b, src_r4_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src_r5_16x8b, src_r6_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t2_8x16b, res_t1_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+ _mm_storeu_si128((__m128i *)(pi2_temp1), res_t1_8x16b);
+
+ pi2_temp1[8] = pu1_src[off0] + pu1_src[off5]
+ - (pu1_src[off1] + pu1_src[off4])
+ + ((pu1_src[off2] + pu1_src[off3] - pu1_src[off1] - pu1_src[off4]) << 2)
+ + ((pu1_src[off2] + pu1_src[off3]) << 4);
+
+ ht_temp -= 2;
+ pu1_src = pu1_src + src_strd;
+ pi2_temp1 = pi2_temp1 + 9;
+
+ src_r0_16x8b = src_r2_16x8b;
+ src_r1_16x8b = src_r3_16x8b;
+ src_r2_16x8b = src_r4_16x8b;
+ src_r3_16x8b = src_r5_16x8b;
+ src_r4_16x8b = src_r6_16x8b;
+ }
+ while(ht_temp > 0);
+ }
+
+ //horizontal q-pel
+ {
+ __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b;
+ __m128i src_r3_8x16b, src_r4_8x16b, src_r5_8x16b;
+ __m128i src_r0r1_c0_8x16b, src_r2r3_c0_8x16b, src_r4r5_c0_8x16b;
+ __m128i src_hpel_16x8b, src_hpel_8x16b;
+
+ __m128i res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
+ __m128i res_8x16b, res_16x8b;
+
+ __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
+ __m128i const_val512_4x32b, const_val16_8x16b;
+ __m128i mask_low32b;
+
+ mask_low32b = _mm_set1_epi8(0xff);
+
+ coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
+ coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
+ coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB);
+
+ mask_low32b = _mm_srli_si128(mask_low32b, 12);
+
+ const_val512_4x32b = _mm_set1_epi32(512);
+ const_val16_8x16b = _mm_set1_epi16(16);
+
+ do
+ {
+ src_r0_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2));
+ src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 1));
+ src_r2_8x16b = _mm_srli_si128(src_r1_8x16b, 2);
+ src_r3_8x16b = _mm_srli_si128(src_r1_8x16b, 4);
+ src_r4_8x16b = _mm_srli_si128(src_r1_8x16b, 6);
+ src_r5_8x16b = _mm_srli_si128(src_r1_8x16b, 8);
+
+ src_r0r1_c0_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_c0_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_c0_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_c0_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_c0_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_c0_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(const_val512_4x32b, res_t3_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_8x16b = _mm_packs_epi32(res_t1_4x32b, res_t1_4x32b);
+ res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
+
+ src_hpel_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp3));
+ src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b);
+ src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits.
+ src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b);
+
+ res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
+
+ _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char*)pu1_dst);
+
+ ht--;
+ pi2_temp2 = pi2_temp2 + 4 + 5;
+ pi2_temp3 = pi2_temp3 + 4 + 5;
+ pu1_dst = pu1_dst + dst_strd;
+ }
+ while(ht > 0);
+ }
+ }
+ else if(wd == 8)
+ {
+ // vertical half-pel
+ {
+ __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b, src_r4_16x8b;
+ __m128i src_r5_16x8b, src_r6_16x8b;
+ __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b;
+
+ __m128i res_t1_8x16b, res_t2_8x16b, res_t3_8x16b;
+
+ __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+
+ coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
+ coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
+ coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
+
+ //epilogue: Load all the pred rows except sixth and seventh row for the
+ //first and second row processing.
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
+ pu1_src = pu1_src + src_strd;
+
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
+ pu1_src = pu1_src + src_strd;
+
+ src_r2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
+ pu1_src = pu1_src + src_strd;
+
+ src_r3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
+ pu1_src = pu1_src + src_strd;
+
+ src_r4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
+ pu1_src = pu1_src + src_strd;
+
+ //Core Loop: Process all the rows.
+ do
+ {
+ src_r5_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
+ src_r6_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+ _mm_storeu_si128((__m128i *)(pi2_temp1), res_t1_8x16b);
+
+ src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+ _mm_storeu_si128((__m128i *)(pi2_temp1 + 8), res_t1_8x16b);
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r2_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src_r3_16x8b, src_r4_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src_r5_16x8b, src_r6_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+ _mm_storeu_si128((__m128i *)(pi2_temp1 + 8 + 5), res_t1_8x16b);
+
+ src_r0r1_16x8b = _mm_unpackhi_epi8(src_r1_16x8b, src_r2_16x8b);
+ src_r2r3_16x8b = _mm_unpackhi_epi8(src_r3_16x8b, src_r4_16x8b);
+ src_r4r5_16x8b = _mm_unpackhi_epi8(src_r5_16x8b, src_r6_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+ _mm_storeu_si128((__m128i *)(pi2_temp1 + 8 + 5 + 8), res_t1_8x16b);
+
+ src_r0_16x8b = src_r2_16x8b;
+ src_r1_16x8b = src_r3_16x8b;
+ src_r2_16x8b = src_r4_16x8b;
+ src_r3_16x8b = src_r5_16x8b;
+ src_r4_16x8b = src_r6_16x8b;
+
+ ht_temp -= 2;
+ pu1_src = pu1_src + (src_strd << 1);
+ pi2_temp1 = pi2_temp1 + (13 << 1);
+ }
+ while(ht_temp > 0);
+ }
+ // horizontal q-pel
+ {
+ __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b;
+ __m128i src_r4_8x16b, src_r5_8x16b;
+ __m128i src_r0r1_c0_8x16b, src_r2r3_c0_8x16b, src_r4r5_c0_8x16b;
+ __m128i src_r0r1_c1_8x16b, src_r2r3_c1_8x16b, src_r4r5_c1_8x16b;
+ __m128i src_hpel_8x16b, src_hpel_16x8b;
+
+ __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
+ __m128i res_8x16b, res_16x8b;
+
+ __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
+ __m128i const_val512_4x32b, const_val16_8x16b;
+
+ coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
+ coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
+ coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB);
+
+ const_val512_4x32b = _mm_set1_epi32(512);
+ const_val16_8x16b = _mm_set1_epi16(16);
+
+ do
+ {
+ src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2));
+ src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 1));
+ src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 2));
+ src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 3));
+ src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 4));
+ src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 5));
+
+ src_r0r1_c0_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_c0_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_c0_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ src_r0r1_c1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_c1_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_c1_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_c0_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_c0_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_c0_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+
+ res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_c1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_c1_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_c1_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+
+ res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
+ res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
+
+ src_hpel_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp3));
+ src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b);
+ src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits.
+ src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b);
+
+ res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
+
+ _mm_storel_epi64((__m128i *)(pu1_dst), res_16x8b);
+
+ ht--;
+ pi2_temp2 = pi2_temp2 + 8 + 5;
+ pi2_temp3 = pi2_temp3 + 8 + 5;
+ pu1_dst = pu1_dst + dst_strd;
+ }
+ while(ht > 0);
+ }
+ }
+ else // wd == 16
+ {
+ // vertical half-pel
+ {
+ __m128i src_r0_16x8b, src_r1_16x8b, src_r2_16x8b, src_r3_16x8b;
+ __m128i src_r4_16x8b, src_r5_16x8b;
+ __m128i src_r0_c2_16x8b, src_r1_c2_16x8b, src_r2_c2_16x8b, src_r3_c2_16x8b;
+ __m128i src_r4_c2_16x8b, src_r5_c2_16x8b;
+ __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b;
+
+ __m128i res_t1_8x16b, res_t2_8x16b, res_t3_8x16b;
+
+ __m128i coeff0_1_16x8b,coeff2_3_16x8b,coeff4_5_16x8b;
+
+ coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
+ coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
+ coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
+
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
+ src_r0_c2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 16));
+ pu1_src = pu1_src + src_strd;
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
+ src_r1_c2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 16));
+ pu1_src = pu1_src + src_strd;
+ src_r2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
+ src_r2_c2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 16));
+ pu1_src = pu1_src + src_strd;
+ src_r3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
+ src_r3_c2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 16));
+ pu1_src = pu1_src + src_strd;
+ src_r4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
+ src_r4_c2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 16));
+ pu1_src = pu1_src + src_strd;
+
+ //Core Loop: Process all the rows.
+ do
+ {
+ src_r5_16x8b = _mm_loadu_si128((__m128i *)(pu1_src));
+ src_r5_c2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 16));
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_16x8b, src_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_16x8b, src_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+ _mm_storeu_si128((__m128i *)(pi2_temp1), res_t1_8x16b);
+
+ src_r0r1_16x8b = _mm_unpackhi_epi8(src_r0_16x8b, src_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpackhi_epi8(src_r2_16x8b, src_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpackhi_epi8(src_r4_16x8b, src_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+ _mm_storeu_si128((__m128i *)(pi2_temp1 + 8), res_t1_8x16b);
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src_r0_c2_16x8b, src_r1_c2_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src_r2_c2_16x8b, src_r3_c2_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src_r4_c2_16x8b, src_r5_c2_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+ _mm_storeu_si128((__m128i *)(pi2_temp1 + 16), res_t1_8x16b);
+
+ src_r0_16x8b = src_r1_16x8b;
+ src_r1_16x8b = src_r2_16x8b;
+ src_r2_16x8b = src_r3_16x8b;
+ src_r3_16x8b = src_r4_16x8b;
+ src_r4_16x8b = src_r5_16x8b;
+
+ src_r0_c2_16x8b = src_r1_c2_16x8b;
+ src_r1_c2_16x8b = src_r2_c2_16x8b;
+ src_r2_c2_16x8b = src_r3_c2_16x8b;
+ src_r3_c2_16x8b = src_r4_c2_16x8b;
+ src_r4_c2_16x8b = src_r5_c2_16x8b;
+
+ ht_temp--;
+ pu1_src = pu1_src + src_strd;
+ pi2_temp1 = pi2_temp1 + 16 + 5;
+ }
+ while(ht_temp > 0);
+ }
+ // horizontal q-pel
+ {
+ __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b;
+ __m128i src_r4_8x16b, src_r5_8x16b;
+ __m128i src_r0r1_8x16b, src_r2r3_8x16b, src_r4r5_8x16b;
+ __m128i src_hpel1_8x16b, src_hpel2_8x16b, src_hpel_16x8b;
+
+ __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
+ __m128i res_c0_8x16b, res_c1_8x16b, res_16x8b;
+
+ __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
+ __m128i const_val512_4x32b, const_val16_8x16b;
+
+ coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
+ coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
+ coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB);
+
+ const_val512_4x32b = _mm_set1_epi32(512);
+ const_val16_8x16b = _mm_set1_epi16(16);
+
+ do
+ {
+ src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2));
+ src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 1));
+ src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 2));
+ src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 3));
+ src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 4));
+ src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 5));
+
+ src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(const_val512_4x32b, res_t3_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_c0_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
+
+ src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8));
+ src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8 + 1));
+ src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8 + 2));
+ src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8 + 3));
+ src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8 + 4));
+ src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8 + 5));
+
+ src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(const_val512_4x32b, res_t3_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b ,10);
+
+ src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(const_val512_4x32b, res_t3_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_c1_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
+ res_16x8b = _mm_packus_epi16(res_c0_8x16b, res_c1_8x16b);
+
+ src_hpel1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp3));
+ src_hpel1_8x16b = _mm_add_epi16(src_hpel1_8x16b, const_val16_8x16b);
+ src_hpel1_8x16b = _mm_srai_epi16(src_hpel1_8x16b, 5); //shifting right by 5 bits.
+
+ src_hpel2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp3 + 8));
+ src_hpel2_8x16b = _mm_add_epi16(src_hpel2_8x16b, const_val16_8x16b);
+ src_hpel2_8x16b = _mm_srai_epi16(src_hpel2_8x16b, 5); //shifting right by 5 bits.
+
+ src_hpel_16x8b = _mm_packus_epi16(src_hpel1_8x16b, src_hpel2_8x16b);
+ res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
+
+ _mm_storeu_si128((__m128i *)(pu1_dst), res_16x8b);
+
+ ht--;
+ pi2_temp2 = pi2_temp2 + 16 + 5;
+ pi2_temp3 = pi2_temp3 + 16 + 5;
+ pu1_dst = pu1_dst + dst_strd;
+ }
+ while(ht > 0);
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3 */
+/* */
+/* Description : This function implements a six-tap filter vertically and */
+/* horizontally on ht x wd block separately and averages */
+/* the two sets of values to calculate values at (1/2,1/4), */
+/* or (1/2, 3/4) as mentioned in sec. 8.4.2.2.1 titled */
+/* "Luma sample interpolation process". (ht,wd) can be */
+/* (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16). */
+/* */
+/* Inputs : puc_src - pointer to source */
+/* puc_dst - pointer to destination */
+/* src_strd - stride for source */
+/* dst_strd - stride for destination */
+/* ht - height of the block */
+/* wd - width of the block */
+/* pu1_tmp - pointer to temporary buffer */
+/* dydx - x and y reference offset for q-pel */
+/* calculations */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 13 02 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd,
+ UWORD8* pu1_tmp,
+ WORD32 dydx)
+{
+ WORD32 ht_temp;
+ WORD32 y_offset;
+ WORD16 *pi2_temp1,*pi2_temp2,*pi2_temp3;
+
+ y_offset = (dydx & 0xf) >> 2;
+ pi2_temp1 = (WORD16 *)pu1_tmp;
+ pi2_temp2 = pi2_temp1;
+ pi2_temp3 = pi2_temp1 + (y_offset >> 1) * wd;
+
+ ht_temp = ht + 5;
+ pu1_src -= src_strd << 1;
+ pu1_src -= 2;
+ pi2_temp3 += wd << 1;
+ //the filter input starts from x[-2] (till x[3])
+
+ if(wd == 4)
+ {
+ // horizontal half-pel
+ {
+ __m128i src_r0_16x8b, src_r1_16x8b, src_r0r1_t1_16x8b;
+ __m128i src_r0_sht_16x8b, src_r1_sht_16x8b;
+ __m128i res_r0r1_t1_8x16b, res_r0r1_t2_8x16b, res_r0r1_t3_8x16b;
+ __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+
+ coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
+ coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
+ coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
+
+ //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
+ //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
+
+ do
+ {
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
+
+ src_r0_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
+ src_r1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
+
+ src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 b0 b1 b1 b2 b2 b3 b3 b4
+ res_r0r1_t1_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
+ //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 0 0 0 0
+
+ src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 b2 b3 b3 b4 b4 b5 b5 b6
+ res_r0r1_t2_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
+ //b2*c2+b3*c3 b3*c2+b4*c3 b4*c2+b5*c3 b5*c2+b6*c3
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 4); //a4 a5 a5 a6 a6 a7 a7 a8 0 0 0 0 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 4); //b4 b5 b5 b6 b6 b7 b7 b8 0 0 0 0 0 0 0 0
+
+ src_r0r1_t1_16x8b = _mm_unpacklo_epi64(src_r0_16x8b, src_r1_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 b4 b5 b5 b6 b6 b7 b7 b8
+ res_r0r1_t3_8x16b = _mm_maddubs_epi16(src_r0r1_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
+ //b4*c4+b5*c5 b5*c4+b6*c5 b4*c6+b7*c5 b7*c4+b8*c5
+
+ res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t2_8x16b);
+ res_r0r1_t1_8x16b = _mm_add_epi16(res_r0r1_t1_8x16b, res_r0r1_t3_8x16b);
+
+
+ _mm_storeu_si128((__m128i *)(pi2_temp1), res_r0r1_t1_8x16b);
+
+ ht_temp -= 2;
+ pu1_src = pu1_src + (src_strd << 1);
+ pi2_temp1 = pi2_temp1 + (4 << 1);
+ }
+ while(ht_temp > 0);
+ }
+ // vertical q-pel
+ {
+ __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b;
+ __m128i src_r4_8x16b, src_r5_8x16b, src_r6_8x16b;
+ __m128i src_r0r1_c0_8x16b, src_r2r3_c0_8x16b, src_r4r5_c0_8x16b;
+ __m128i src_hpel_16x8b, src_hpel_8x16b;
+
+ __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
+ __m128i res_8x16b, res_16x8b;
+
+ __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
+ __m128i const_val512_4x32b, const_val16_8x16b;
+ __m128i mask_low32b;
+
+ mask_low32b = _mm_set1_epi8(0xff);
+ const_val512_4x32b = _mm_set1_epi32(512);
+ const_val16_8x16b = _mm_set1_epi16(16);
+ mask_low32b = _mm_srli_si128(mask_low32b, 12);
+
+ coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
+ coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
+ coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB);
+
+ src_r0_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2));
+ src_r1_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2 + 4));
+ src_r2_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2 + 8));
+ src_r3_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2 + 12));
+ src_r4_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2 + 16));
+ pi2_temp2 += 20;
+
+ do
+ {
+ src_r5_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2));
+ src_r6_8x16b = _mm_loadl_epi64((__m128i *)(pi2_temp2 + 4));
+
+ src_r0r1_c0_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_c0_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_c0_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_c0_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_c0_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_c0_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ src_r0r1_c0_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b);
+ src_r2r3_c0_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b);
+ src_r4r5_c0_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_c0_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_c0_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_c0_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
+ res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
+
+ src_hpel_8x16b = _mm_loadu_si128((__m128i *)pi2_temp3);
+ src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b);
+ src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits.
+ src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b);
+
+ res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
+
+ _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char *)(pu1_dst));
+ res_16x8b = _mm_srli_si128(res_16x8b, 4);
+ _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char *)(pu1_dst + dst_strd));
+
+ src_r0_8x16b = src_r2_8x16b;
+ src_r1_8x16b = src_r3_8x16b;
+ src_r2_8x16b = src_r4_8x16b;
+ src_r3_8x16b = src_r5_8x16b;
+ src_r4_8x16b = src_r6_8x16b;
+
+ ht -= 2;
+ pi2_temp2 = pi2_temp2 + (4 << 1);
+ pi2_temp3 = pi2_temp3 + (4 << 1);
+ pu1_dst = pu1_dst + (dst_strd << 1);
+ }
+ while(ht > 0);
+ }
+ }
+ else if(wd == 8)
+ {
+ // horizontal half-pel
+ {
+ __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b;
+ __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
+
+ __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
+ __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
+
+ __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+
+ coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
+ coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
+ coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
+
+ //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
+ //Row1 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
+
+ do
+ {
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src)); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
+
+ res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
+ //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
+ res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
+ //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
+
+ res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
+ //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
+ res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
+ //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
+
+ res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
+ //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
+ res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
+ //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
+
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
+
+ _mm_storeu_si128((__m128i *)(pi2_temp1), res_r0_t1_8x16b);
+ _mm_storeu_si128((__m128i *)(pi2_temp1 + 8), res_r1_t1_8x16b);
+
+ ht_temp -= 2;
+ pu1_src = pu1_src + (src_strd << 1);
+ pi2_temp1 = pi2_temp1 + (8 << 1);
+ }
+ while(ht_temp > 0);
+ }
+ // vertical q-pel
+ {
+ __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b;
+ __m128i src_r4_8x16b, src_r5_8x16b, src_r6_8x16b;
+ __m128i src_r0r1_8x16b, src_r2r3_8x16b, src_r4r5_8x16b;
+ __m128i src_hpel_8x16b, src_hpel_16x8b;
+
+ __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
+ __m128i res_8x16b, res_16x8b;
+
+ __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
+ __m128i const_val512_4x32b, const_val16_8x16b;
+
+ coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
+ coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
+ coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB);
+
+ const_val512_4x32b = _mm_set1_epi32(512);
+ const_val16_8x16b = _mm_set1_epi16(16);
+
+ src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2));
+ src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8));
+ src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 16));
+ src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 24));
+ src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 32));
+ pi2_temp2 += 40;
+
+ do
+ {
+ src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2));
+ src_r6_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 8));
+
+ src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
+ res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
+
+ src_hpel_8x16b = _mm_loadu_si128((__m128i *)pi2_temp3);
+ src_hpel_8x16b = _mm_add_epi16(const_val16_8x16b, src_hpel_8x16b);
+ src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits.
+ src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b);
+
+ res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
+
+ _mm_storel_epi64((__m128i *)(pu1_dst), res_16x8b);
+
+ src_r0r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b);
+ src_r2r3_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b);
+ src_r4r5_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ src_r0r1_8x16b = _mm_unpackhi_epi16(src_r1_8x16b, src_r2_8x16b);
+ src_r2r3_8x16b = _mm_unpackhi_epi16(src_r3_8x16b, src_r4_8x16b);
+ src_r4r5_8x16b = _mm_unpackhi_epi16(src_r5_8x16b, src_r6_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
+ res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
+
+ src_hpel_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp3 + 8));
+ src_hpel_8x16b = _mm_add_epi16(const_val16_8x16b, src_hpel_8x16b);
+ src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits.
+ src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b);
+
+ res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
+
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b);
+
+ src_r0_8x16b = src_r2_8x16b;
+ src_r1_8x16b = src_r3_8x16b;
+ src_r2_8x16b = src_r4_8x16b;
+ src_r3_8x16b = src_r5_8x16b;
+ src_r4_8x16b = src_r6_8x16b;
+
+ ht -= 2;
+ pi2_temp2 = pi2_temp2 + (8 << 1);
+ pi2_temp3 = pi2_temp3 + (8 << 1);
+ pu1_dst = pu1_dst + (dst_strd << 1);
+ }
+ while(ht > 0);
+ }
+ }
+ else // wd == 16
+ {
+ UWORD8 *pu1_dst1;
+ WORD16 *pi2_temp4,*pi2_temp5;
+
+ pu1_dst1 = pu1_dst + 8;
+ pi2_temp4 = pi2_temp2 + 8;
+ pi2_temp5 = pi2_temp3 + 8;
+
+ // horizontal half-pel
+ {
+ __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b;
+ __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
+
+ __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
+ __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
+
+ __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+
+ coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
+ coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
+ coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5 c4 c5 c5 c5
+
+ //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
+ //Row0 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
+ //b0 is same a8. Similarly other bn pixels are same as a(n+8) pixels.
+
+ do
+ {
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src)); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
+
+ res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
+ //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
+ res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
+ //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
+
+ res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
+ //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
+ res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
+ //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
+
+ res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
+ //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
+ res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
+ //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
+
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
+
+ _mm_storeu_si128((__m128i *)(pi2_temp1), res_r0_t1_8x16b);
+ _mm_storeu_si128((__m128i *)(pi2_temp1 + 8), res_r1_t1_8x16b);
+
+ ht_temp--;
+ pu1_src = pu1_src + src_strd;
+ pi2_temp1 = pi2_temp1 + 16;
+ }
+ while(ht_temp > 0);
+ }
+ // vertical q-pel
+ {
+ __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b, src_r4_8x16b;
+ __m128i src_r5_8x16b, src_r6_8x16b;
+ __m128i src_r0r1_8x16b, src_r2r3_8x16b, src_r4r5_8x16b;
+ __m128i src_hpel_8x16b, src_hpel_16x8b;
+
+ __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
+ __m128i res_8x16b, res_16x8b;
+
+ __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
+ __m128i const_val512_4x32b, const_val16_8x16b;
+
+ coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001);
+ coeff2_3_8x16b = _mm_set1_epi32(0x00140014);
+ coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB);
+
+ const_val512_4x32b = _mm_set1_epi32(512);
+ const_val16_8x16b = _mm_set1_epi16(16);
+
+ /**********************************************************/
+ /* Do first height x 8 block */
+ /**********************************************************/
+ src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2));
+ src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 16));
+ src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 32));
+ src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 48));
+ src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 64));
+ pi2_temp2 += 80;
+
+ ht_temp = ht;
+ do
+ {
+ src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2));
+ src_r6_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp2 + 16));
+
+ src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
+ res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
+
+ src_hpel_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp3));
+ src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b);
+ src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits.
+ src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b);
+
+ res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst), res_16x8b);
+
+ src_r0r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b);
+ src_r2r3_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b);
+ src_r4r5_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ src_r0r1_8x16b = _mm_unpackhi_epi16(src_r1_8x16b, src_r2_8x16b);
+ src_r2r3_8x16b = _mm_unpackhi_epi16(src_r3_8x16b, src_r4_8x16b);
+ src_r4r5_8x16b = _mm_unpackhi_epi16(src_r5_8x16b, src_r6_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
+ res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
+
+ src_hpel_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp3 + 16));
+ src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b);
+ src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits.
+ src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b);
+
+ res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res_16x8b);
+
+ src_r0_8x16b = src_r2_8x16b;
+ src_r1_8x16b = src_r3_8x16b;
+ src_r2_8x16b = src_r4_8x16b;
+ src_r3_8x16b = src_r5_8x16b;
+ src_r4_8x16b = src_r6_8x16b;
+
+ ht_temp -= 2;
+ pi2_temp3 = pi2_temp3 + (16 << 1);
+ pi2_temp2 = pi2_temp2 + (16 << 1);
+ pu1_dst = pu1_dst + (dst_strd << 1);
+ }
+ while(ht_temp > 0);
+
+ /**********************************************************/
+ /* Do second height * 8 block */
+ /**********************************************************/
+ src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4));
+ src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4 + 16));
+ src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4 + 32));
+ src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4 + 48));
+ src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4 + 64));
+ pi2_temp4 += 80;
+
+ do
+ {
+ src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4));
+ src_r6_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp4 + 16));
+
+ src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
+ res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
+
+ src_hpel_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp5));
+ src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b);
+ src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits.
+ src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b);
+
+ res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst1), res_16x8b);
+
+ src_r0r1_8x16b = _mm_unpacklo_epi16(src_r1_8x16b, src_r2_8x16b);
+ src_r2r3_8x16b = _mm_unpacklo_epi16(src_r3_8x16b, src_r4_8x16b);
+ src_r4r5_8x16b = _mm_unpacklo_epi16(src_r5_8x16b, src_r6_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ src_r0r1_8x16b = _mm_unpackhi_epi16(src_r1_8x16b, src_r2_8x16b);
+ src_r2r3_8x16b = _mm_unpackhi_epi16(src_r3_8x16b, src_r4_8x16b);
+ src_r4r5_8x16b = _mm_unpackhi_epi16(src_r5_8x16b, src_r6_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
+ res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
+
+ src_hpel_8x16b = _mm_loadu_si128((__m128i *)(pi2_temp5 + 16));
+ src_hpel_8x16b = _mm_add_epi16(src_hpel_8x16b, const_val16_8x16b);
+ src_hpel_8x16b = _mm_srai_epi16(src_hpel_8x16b, 5); //shifting right by 5 bits.
+ src_hpel_16x8b = _mm_packus_epi16(src_hpel_8x16b, src_hpel_8x16b);
+
+ res_16x8b = _mm_avg_epu8(res_16x8b, src_hpel_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst1 + dst_strd), res_16x8b);
+
+ src_r0_8x16b = src_r2_8x16b;
+ src_r1_8x16b = src_r3_8x16b;
+ src_r2_8x16b = src_r4_8x16b;
+ src_r3_8x16b = src_r5_8x16b;
+ src_r4_8x16b = src_r6_8x16b;
+
+ ht -= 2;
+ pi2_temp5 = pi2_temp5 + (16 << 1);
+ pi2_temp4 = pi2_temp4 + (16 << 1);
+ pu1_dst1 = pu1_dst1 + (dst_strd << 1);
+ }
+ while(ht > 0);
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_inter_pred_chroma_ssse3 */
+/* */
+/* Description : This function implements a four-tap 2D filter as */
+/* mentioned in sec. 8.4.2.2.2 titled "Chroma sample */
+/* "interpolation process". (ht,wd) can be (2,2), (4,2), */
+/* (2,4), (4,4), (8,4), (4,8) or (8,8). */
+/* */
+/* Inputs : puc_src - pointer to source */
+/* puc_dst - pointer to destination */
+/* src_strd - stride for source */
+/* dst_strd - stride for destination */
+/* dx - x position of destination value */
+/* dy - y position of destination value */
+/* ht - height of the block */
+/* wd - width of the block */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 13 02 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_inter_pred_chroma_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 dx,
+ WORD32 dy,
+ WORD32 ht,
+ WORD32 wd)
+{
+ WORD32 i, j, A, B, C, D;
+
+ i = 8 - dx;
+ j = 8 - dy;
+
+ A = i * j;
+ B = dx * j;
+ C = i * dy;
+ D = dx * dy;
+
+ if(wd == 2)
+ {
+ WORD32 tmp1, tmp2, tmp3, tmp4;
+
+ do
+ {
+ //U
+ tmp1 = A * pu1_src[0] + B * pu1_src[2] + C * pu1_src[src_strd] + D * pu1_src[src_strd + 2];
+ tmp2 = A * pu1_src[2] + B * pu1_src[4] + C * pu1_src[src_strd + 2] + D * pu1_src[src_strd + 4];
+ //V
+ tmp3 = A * pu1_src[1] + B * pu1_src[3] + C * pu1_src[src_strd + 1] + D * pu1_src[src_strd + 3];
+ tmp4 = A * pu1_src[3] + B * pu1_src[5] + C * pu1_src[src_strd + 3] + D * pu1_src[src_strd + 5];
+
+ tmp1 = (tmp1 + 32) >> 6;
+ tmp2 = (tmp2 + 32) >> 6;
+ tmp3 = (tmp3 + 32) >> 6;
+ tmp4 = (tmp4 + 32) >> 6;
+
+ pu1_dst[0] = CLIP_U8(tmp1);
+ pu1_dst[2] = CLIP_U8(tmp2);
+ pu1_dst[1] = CLIP_U8(tmp3);
+ pu1_dst[3] = CLIP_U8(tmp4);
+
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+
+ tmp1 = A * pu1_src[0] + B * pu1_src[2] + C * pu1_src[src_strd] + D * pu1_src[src_strd + 2];
+ tmp2 = A * pu1_src[2] + B * pu1_src[4] + C * pu1_src[src_strd + 2] + D * pu1_src[src_strd + 4];
+ tmp3 = A * pu1_src[1] + B * pu1_src[3] + C * pu1_src[src_strd + 1] + D * pu1_src[src_strd + 3];
+ tmp4 = A * pu1_src[3] + B * pu1_src[5] + C * pu1_src[src_strd + 3] + D * pu1_src[src_strd + 5];
+
+ tmp1 = (tmp1 + 32) >> 6;
+ tmp2 = (tmp2 + 32) >> 6;
+ tmp3 = (tmp3 + 32) >> 6;
+ tmp4 = (tmp4 + 32) >> 6;
+
+ pu1_dst[0] = CLIP_U8(tmp1);
+ pu1_dst[2] = CLIP_U8(tmp2);
+ pu1_dst[1] = CLIP_U8(tmp3);
+ pu1_dst[3] = CLIP_U8(tmp4);
+
+ ht -= 2;
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+ }
+ while(ht > 0);
+
+ /*
+ WORD32 AB, CD;
+
+ __m128i src_r1_16x8b, src_r2_16x8b, src_r3_16x8b;
+ __m128i src_r1r2_16x8b, src_r2r3_16x8b;
+ __m128i res_AB_8x16b, res_CD_8x16b, res_8x16b, res_16x8b;
+ __m128i mask_low32b;
+
+ __m128i coeffAB_16x8b, coeffCD_16x8b, round_add32_8x16b;
+ __m128i const_shuff_16x8b;
+
+ AB = (B << 8) + A;
+ CD = (D << 8) + C;
+
+ coeffAB_16x8b = _mm_set1_epi16(AB);
+ coeffCD_16x8b = _mm_set1_epi16(CD);
+
+ round_add32_8x16b = _mm_set1_epi16(32);
+
+ mask_low32b = _mm_set1_epi8(0xff);
+ src_r1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); //u1[0] v1[0] u1[1] v1[1] u1[2] v1[2] u1[3] v1[3]
+ pu1_src += src_strd;
+
+ const_shuff_16x8b = _mm_setr_epi32(0x03010200, 0x05030402, 0x0b090a08, 0x0d0b0c0a);
+ mask_low32b = _mm_srli_si128(mask_low32b, 12);
+
+ do
+ {
+ src_r2_16x8b = _mm_loadl_epi64((__m128i *)pu1_src); //u2[0] v2[0] u2[1] v2[1] u1[2] v2[2] u2[3] v2[3]
+ src_r3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd)); //u3[0] v3[0] u3[1] v3[1] u3[2] v3[2] u3[3] v3[3]
+
+ src_r1r2_16x8b = _mm_unpacklo_epi64(src_r1_16x8b, src_r2_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi64(src_r2_16x8b, src_r3_16x8b);
+
+ src_r1r2_16x8b = _mm_shuffle_epi8(src_r1r2_16x8b, const_shuff_16x8b); //u1[0] u1[1] v1[0] v1[1] u1[1] u1[2] v1[1] v1[2]
+ //u2[0] u2[1] v2[0] v2[1] u2[1] u2[2] v2[1] v2[2]
+ src_r2r3_16x8b = _mm_shuffle_epi8(src_r2r3_16x8b, const_shuff_16x8b); //u2[0] u2[1] v2[0] v2[1] u2[1] u2[2] v2[1] v2[2]
+ //u3[0] u3[1] v3[0] v3[1] u3[1] u3[2] v3[1] v3[2]
+ res_AB_8x16b = _mm_maddubs_epi16(src_r1r2_16x8b, coeffAB_16x8b);
+ res_CD_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeffCD_16x8b);
+
+ res_8x16b = _mm_add_epi16(res_AB_8x16b, round_add32_8x16b);
+ res_8x16b = _mm_add_epi16(res_8x16b, res_CD_8x16b);
+ res_8x16b = _mm_srai_epi16(res_8x16b, 6);
+ res_16x8b = _mm_packus_epi16(res_8x16b, res_8x16b);
+
+ _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char *)pu1_dst);
+
+ ht -= 2;
+ pu1_src += src_strd << 1;
+ res_16x8b = _mm_srli_si128(res_16x8b, 4);
+ src_r1_16x8b = src_r3_16x8b;
+
+ _mm_maskmoveu_si128(res_16x8b, mask_low32b, (char *)(pu1_dst + dst_strd));
+
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ */
+ }
+ else if(wd == 4)
+ {
+ WORD32 AB, CD;
+
+ __m128i src_r1_16x8b, src_r2_16x8b, src_r3_16x8b;
+ __m128i res1_AB_8x16b, res1_CD_8x16b, res1_8x16b, res1_16x8b;
+ __m128i res2_AB_8x16b, res2_CD_8x16b, res2_8x16b, res2_16x8b;
+
+ __m128i coeffAB_16x8b, coeffCD_16x8b, round_add32_8x16b;
+ __m128i const_shuff_16x8b;
+
+ AB = (B << 8) + A;
+ CD = (D << 8) + C;
+
+ coeffAB_16x8b = _mm_set1_epi16(AB);
+ coeffCD_16x8b = _mm_set1_epi16(CD);
+
+ round_add32_8x16b = _mm_set1_epi16(32);
+
+ const_shuff_16x8b = _mm_setr_epi32(0x03010200, 0x05030402, 0x07050604, 0x09070806);
+
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src_r1_16x8b = _mm_shuffle_epi8(src_r1_16x8b, const_shuff_16x8b);
+ pu1_src += src_strd;
+
+ do
+ {
+ src_r2_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src_r3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));
+
+ src_r2_16x8b = _mm_shuffle_epi8(src_r2_16x8b, const_shuff_16x8b);
+ src_r3_16x8b = _mm_shuffle_epi8(src_r3_16x8b, const_shuff_16x8b);
+
+ res1_AB_8x16b = _mm_maddubs_epi16(src_r1_16x8b, coeffAB_16x8b);
+ res1_CD_8x16b = _mm_maddubs_epi16(src_r2_16x8b, coeffCD_16x8b);
+ res2_AB_8x16b = _mm_maddubs_epi16(src_r2_16x8b, coeffAB_16x8b);
+ res2_CD_8x16b = _mm_maddubs_epi16(src_r3_16x8b, coeffCD_16x8b);
+
+ res1_8x16b = _mm_add_epi16(res1_AB_8x16b, res1_CD_8x16b);
+ res2_8x16b = _mm_add_epi16(res2_AB_8x16b, res2_CD_8x16b);
+ res1_8x16b = _mm_add_epi16(res1_8x16b, round_add32_8x16b);
+ res2_8x16b = _mm_add_epi16(res2_8x16b, round_add32_8x16b);
+
+ res1_8x16b = _mm_srai_epi16(res1_8x16b, 6);
+ res2_8x16b = _mm_srai_epi16(res2_8x16b, 6);
+
+ res1_16x8b = _mm_packus_epi16(res1_8x16b, res1_8x16b);
+ res2_16x8b = _mm_packus_epi16(res2_8x16b, res2_8x16b);
+
+ _mm_storel_epi64((__m128i *)pu1_dst, res1_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
+
+ src_r1_16x8b = src_r3_16x8b;
+
+ ht -= 2;
+ pu1_src += src_strd << 1;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+ else // wd == 8
+ {
+ WORD32 AB, CD;
+
+ __m128i src_r1l_16x8b, src_r2l_16x8b;
+ __m128i src_r1h_16x8b, src_r2h_16x8b;
+
+ __m128i res_l_AB_8x16b, res_l_CD_8x16b;
+ __m128i res_h_AB_8x16b, res_h_CD_8x16b;
+ __m128i res_l_8x16b, res_h_8x16b, res_16x8b;
+
+ __m128i coeffAB_16x8b, coeffCD_16x8b, round_add32_8x16b;
+ __m128i const_shuff_16x8b;
+
+ AB = (B << 8) + A;
+ CD = (D << 8) + C;
+
+ coeffAB_16x8b = _mm_set1_epi16(AB);
+ coeffCD_16x8b = _mm_set1_epi16(CD);
+
+ round_add32_8x16b = _mm_set1_epi16(32);
+
+ const_shuff_16x8b = _mm_setr_epi32(0x03010200, 0x05030402, 0x07050604, 0x09070806);
+
+ src_r1l_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src_r1h_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8));
+
+ src_r1l_16x8b = _mm_shuffle_epi8(src_r1l_16x8b, const_shuff_16x8b);
+ src_r1h_16x8b = _mm_shuffle_epi8(src_r1h_16x8b, const_shuff_16x8b);
+
+ pu1_src += src_strd;
+
+ do
+ {
+ //row 1
+ src_r2l_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src_r2h_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8));
+
+ src_r2l_16x8b = _mm_shuffle_epi8(src_r2l_16x8b, const_shuff_16x8b);
+ src_r2h_16x8b = _mm_shuffle_epi8(src_r2h_16x8b, const_shuff_16x8b);
+
+ res_l_AB_8x16b = _mm_maddubs_epi16(src_r1l_16x8b, coeffAB_16x8b);
+ res_h_AB_8x16b = _mm_maddubs_epi16(src_r1h_16x8b, coeffAB_16x8b);
+ res_l_CD_8x16b = _mm_maddubs_epi16(src_r2l_16x8b, coeffCD_16x8b);
+ res_h_CD_8x16b = _mm_maddubs_epi16(src_r2h_16x8b, coeffCD_16x8b);
+
+ res_l_8x16b = _mm_add_epi16(res_l_AB_8x16b, round_add32_8x16b);
+ res_h_8x16b = _mm_add_epi16(res_h_AB_8x16b, round_add32_8x16b);
+ res_l_8x16b = _mm_add_epi16(res_l_8x16b, res_l_CD_8x16b);
+ res_h_8x16b = _mm_add_epi16(res_h_8x16b, res_h_CD_8x16b);
+
+ res_l_8x16b = _mm_srai_epi16(res_l_8x16b, 6);
+ res_h_8x16b = _mm_srai_epi16(res_h_8x16b, 6);
+
+ res_16x8b = _mm_packus_epi16(res_l_8x16b, res_h_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b);
+
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+
+ //row 2
+ src_r1l_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src_r1h_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8));
+
+ src_r1l_16x8b = _mm_shuffle_epi8(src_r1l_16x8b, const_shuff_16x8b);
+ src_r1h_16x8b = _mm_shuffle_epi8(src_r1h_16x8b, const_shuff_16x8b);
+
+ res_l_AB_8x16b = _mm_maddubs_epi16(src_r2l_16x8b, coeffAB_16x8b);
+ res_h_AB_8x16b = _mm_maddubs_epi16(src_r2h_16x8b, coeffAB_16x8b);
+ res_l_CD_8x16b = _mm_maddubs_epi16(src_r1l_16x8b, coeffCD_16x8b);
+ res_h_CD_8x16b = _mm_maddubs_epi16(src_r1h_16x8b, coeffCD_16x8b);
+
+ res_l_8x16b = _mm_add_epi16(res_l_AB_8x16b, round_add32_8x16b);
+ res_h_8x16b = _mm_add_epi16(res_h_AB_8x16b, round_add32_8x16b);
+ res_l_8x16b = _mm_add_epi16(res_l_8x16b, res_l_CD_8x16b);
+ res_h_8x16b = _mm_add_epi16(res_h_8x16b, res_h_CD_8x16b);
+
+ res_l_8x16b = _mm_srai_epi16(res_l_8x16b, 6);
+ res_h_8x16b = _mm_srai_epi16(res_h_8x16b, 6);
+
+ res_16x8b = _mm_packus_epi16(res_l_8x16b, res_h_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b);
+
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+
+ //row 3
+ src_r2l_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src_r2h_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8));
+
+ src_r2l_16x8b = _mm_shuffle_epi8(src_r2l_16x8b, const_shuff_16x8b);
+ src_r2h_16x8b = _mm_shuffle_epi8(src_r2h_16x8b, const_shuff_16x8b);
+
+ res_l_AB_8x16b = _mm_maddubs_epi16(src_r1l_16x8b, coeffAB_16x8b);
+ res_h_AB_8x16b = _mm_maddubs_epi16(src_r1h_16x8b, coeffAB_16x8b);
+ res_l_CD_8x16b = _mm_maddubs_epi16(src_r2l_16x8b, coeffCD_16x8b);
+ res_h_CD_8x16b = _mm_maddubs_epi16(src_r2h_16x8b, coeffCD_16x8b);
+
+ res_l_8x16b = _mm_add_epi16(res_l_AB_8x16b, round_add32_8x16b);
+ res_h_8x16b = _mm_add_epi16(res_h_AB_8x16b, round_add32_8x16b);
+ res_l_8x16b = _mm_add_epi16(res_l_8x16b, res_l_CD_8x16b);
+ res_h_8x16b = _mm_add_epi16(res_h_8x16b, res_h_CD_8x16b);
+
+ res_l_8x16b = _mm_srai_epi16(res_l_8x16b, 6);
+ res_h_8x16b = _mm_srai_epi16(res_h_8x16b, 6);
+
+ res_16x8b = _mm_packus_epi16(res_l_8x16b, res_h_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b);
+
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+
+ //row 1
+ src_r1l_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src_r1h_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8));
+
+ src_r1l_16x8b = _mm_shuffle_epi8(src_r1l_16x8b, const_shuff_16x8b);
+ src_r1h_16x8b = _mm_shuffle_epi8(src_r1h_16x8b, const_shuff_16x8b);
+
+ res_l_AB_8x16b = _mm_maddubs_epi16(src_r2l_16x8b, coeffAB_16x8b);
+ res_h_AB_8x16b = _mm_maddubs_epi16(src_r2h_16x8b, coeffAB_16x8b);
+ res_l_CD_8x16b = _mm_maddubs_epi16(src_r1l_16x8b, coeffCD_16x8b);
+ res_h_CD_8x16b = _mm_maddubs_epi16(src_r1h_16x8b, coeffCD_16x8b);
+
+ res_l_8x16b = _mm_add_epi16(res_l_AB_8x16b, round_add32_8x16b);
+ res_h_8x16b = _mm_add_epi16(res_h_AB_8x16b, round_add32_8x16b);
+ res_l_8x16b = _mm_add_epi16(res_l_8x16b, res_l_CD_8x16b);
+ res_h_8x16b = _mm_add_epi16(res_h_8x16b, res_h_CD_8x16b);
+
+ res_l_8x16b = _mm_srai_epi16(res_l_8x16b, 6);
+ res_h_8x16b = _mm_srai_epi16(res_h_8x16b, 6);
+
+ res_16x8b = _mm_packus_epi16(res_l_8x16b, res_h_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, res_16x8b);
+
+ ht -= 4;
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+ }
+ while(ht > 0);
+ }
+}
diff --git a/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c b/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c
new file mode 100755
index 0000000..d43c8e2
--- /dev/null
+++ b/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c
@@ -0,0 +1,437 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_iquant_itrans_recon_dc_ssse3.c
+ *
+ * @brief
+ * Contains function definitions for inverse quantization, inverse
+ * transform and reconstruction
+ *
+ * @author
+ * Mohit [100664]
+ *
+ * @par List of Functions:
+ * - ihevc_iquant_itrans_recon_4x4_dc_ssse3()
+ * - ihevc_iquant_itrans_recon_8x8_dc_ssse3()
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_defs.h"
+#include "ih264_trans_macros.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_trans_data.h"
+#include "ih264_size_defs.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include <immintrin.h>
+
+/*
+ ********************************************************************************
+ *
+ * @brief This function reconstructs a 4x4 sub block from quantized resiude and
+ * prediction buffer for dc input pattern only, i.e. only the (0,0) element of the input
+ * 4x4 block is non-zero. For complete function, refer ih264_iquant_itrans_recon_ssse3.c
+ *
+ * @par Description:
+ * The quantized residue is first inverse quantized, then inverse transformed.
+ * This inverse transformed content is added to the prediction buffer to recon-
+ * struct the end output
+ *
+ * @param[in] pi2_src
+ * quantized 4x4 block
+ *
+ * @param[in] pu1_pred
+ * prediction 4x4 block
+ *
+ * @param[out] pu1_out
+ * reconstructed 4x4 block
+ *
+ * @param[in] src_strd
+ * quantization buffer stride
+ *
+ * @param[in] pred_strd,
+ * Prediction buffer stride
+ *
+ * @param[in] out_strd
+ * recon buffer Stride
+ *
+ * @param[in] pu2_scaling_list
+ * pointer to scaling list
+ *
+ * @param[in] pu2_norm_adjust
+ * pointer to inverse scale matrix
+ *
+ * @param[in] u4_qp_div_6
+ * Floor (qp/6)
+ *
+ * @param[in] pi4_tmp
+ * temporary buffer of size 1*16
+ *
+ * @returns none
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+void ih264_iquant_itrans_recon_4x4_dc_ssse3(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscal_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 u4_qp_div_6,
+ WORD16 *pi2_tmp,
+ WORD32 iq_start_idx,
+ WORD16 *pi2_dc_ld_addr)
+{
+ UWORD32 *pu4_out = (UWORD32 *)pu1_out;
+ WORD32 q0 = pi2_src[0];
+ WORD16 i_macro, rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
+ INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
+
+ if (iq_start_idx != 0 )
+ q0 = pi2_dc_ld_addr[0]; // Restoring dc value for intra case
+
+ i_macro = ((q0 + 32) >> 6);
+
+ __m128i predload_r,pred_r0, pred_r1, pred_r2, pred_r3;
+ __m128i sign_reg;
+ __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
+ __m128i temp4, temp5, temp6, temp7;
+ __m128i value_add = _mm_set1_epi16(i_macro);
+
+ zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
+ //Load pred buffer
+ predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r0 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p00 p01 p02 p03 0 0 0 0 -- all 16 bits
+ predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p10 p11 p12 p13 0 0 0 0 -- all 16 bits
+ predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[2*pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r2 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p20 p21 p22 p23 0 0 0 0 -- all 16 bits
+ predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[3*pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r3 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p30 p31 p32 p33 0 0 0 0 -- all 16 bits
+
+ pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1); //p00 p01 p02 p03 p10 p11 p12 p13
+ pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3); //p20 p21 p22p p23 p30 p31 p32 p33
+
+ temp4 = _mm_add_epi16(value_add, pred_r0);
+ temp5 = _mm_add_epi16(value_add, pred_r2);
+ /*------------------------------------------------------------------*/
+ //Clipping the results to 8 bits
+ sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b); // sign check
+ temp4 = _mm_and_si128(temp4, sign_reg);
+ sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b); // sign check
+ temp5 = _mm_and_si128(temp5, sign_reg);
+
+ temp4 = _mm_packus_epi16(temp4,temp5);
+ temp5 = _mm_srli_si128(temp4,4);
+ temp6 = _mm_srli_si128(temp5,4);
+ temp7 = _mm_srli_si128(temp6,4);
+
+ *pu4_out = _mm_cvtsi128_si32(temp4);
+ pu1_out += out_strd;
+ pu4_out = (UWORD32 *)(pu1_out);
+ *(pu4_out) = _mm_cvtsi128_si32(temp5);
+ pu1_out += out_strd;
+ pu4_out = (UWORD32 *)(pu1_out);
+ *(pu4_out) = _mm_cvtsi128_si32(temp6);
+ pu1_out += out_strd;
+ pu4_out = (UWORD32 *)(pu1_out);
+ *(pu4_out) = _mm_cvtsi128_si32(temp7);
+}
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs inverse quant and Inverse transform type Ci4 for 8x8 block
+ * for dc input pattern only, i.e. only the (0,0) element of the input 8x8 block is
+ * non-zero. For complete function, refer ih264_iquant_itrans_recon_ssse3.c
+ *
+ * @par Description:
+ * Performs inverse transform Ci8 and adds the residue to get the
+ * reconstructed block
+ *
+ * @param[in] pi2_src
+ * Input 8x8coefficients
+ *
+ * @param[in] pu1_pred
+ * Prediction 8x8 block
+ *
+ * @param[out] pu1_recon
+ * Output 8x8 block
+ *
+ * @param[in] q_div
+ * QP/6
+ *
+ * @param[in] q_rem
+ * QP%6
+ *
+ * @param[in] q_lev
+ * Quantizer level
+ *
+ * @param[in] u4_src_stride
+ * Input stride
+ *
+ * @param[in] u4_pred_stride,
+ * Prediction stride
+ *
+ * @param[in] u4_out_stride
+ * Output Stride
+ *
+ * @param[in] pi4_tmp
+ * temporary buffer of size 1*64
+ * the tmp for each block
+ *
+ * @param[in] pu4_iquant_mat
+ * Pointer to the inverse quantization matrix
+ *
+ * @returns Void
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+void ih264_iquant_itrans_recon_8x8_dc_ssse3 (WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscale_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 qp_div,
+ WORD16 *pi2_tmp,
+ WORD32 iq_start_idx,
+ WORD16 *pi2_dc_ld_addr)
+{
+ WORD32 q0 = pi2_src[0];
+ WORD16 i_macro, rnd_fact = (qp_div < 6) ? 1 << (5 - qp_div) : 0;
+ INV_QUANT(q0, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
+ i_macro = ((q0 + 32) >> 6);
+
+ __m128i predload_r,pred_r0, pred_r1, pred_r2, pred_r3,pred_r4,pred_r5,pred_r6,pred_r7;
+ __m128i sign_reg;
+ __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
+ __m128i temp1,temp2,temp3,temp4, temp5, temp6, temp7,temp8;
+ __m128i value_add = _mm_set1_epi16(i_macro);
+
+ //Load pred buffer row 0
+ predload_r = _mm_loadl_epi64((__m128i *)(&pu1_pred[0])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r0 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+ //Load pred buffer row 1
+ predload_r = _mm_loadl_epi64((__m128i *)(&pu1_pred[pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+ //Load pred buffer row 2
+ predload_r = _mm_loadl_epi64(
+ (__m128i *)(&pu1_pred[2 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r2 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+ //Load pred buffer row 3
+ predload_r = _mm_loadl_epi64(
+ (__m128i *)(&pu1_pred[3 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r3 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+ //Load pred buffer row 4
+ predload_r = _mm_loadl_epi64(
+ (__m128i *)(&pu1_pred[4 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r4 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+ //Load pred buffer row 5
+ predload_r = _mm_loadl_epi64(
+ (__m128i *)(&pu1_pred[5 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bit
+ pred_r5 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+ //Load pred buffer row 6
+ predload_r = _mm_loadl_epi64(
+ (__m128i *)(&pu1_pred[6 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r6 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+ //Load pred buffer row 7
+ predload_r = _mm_loadl_epi64(
+ (__m128i *)(&pu1_pred[7 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r7 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+
+ temp1 = _mm_add_epi16(value_add, pred_r0);
+
+ temp2 = _mm_add_epi16(value_add, pred_r1);
+
+ temp3 = _mm_add_epi16(value_add, pred_r2);
+
+ temp4 = _mm_add_epi16(value_add, pred_r3);
+
+ temp5 = _mm_add_epi16(value_add, pred_r4);
+
+ temp6 = _mm_add_epi16(value_add, pred_r5);
+
+ temp7 = _mm_add_epi16(value_add, pred_r6);
+
+ temp8 = _mm_add_epi16(value_add, pred_r7);
+ /*------------------------------------------------------------------*/
+ //Clipping the results to 8 bits
+ sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b); // sign check
+ temp1 = _mm_and_si128(temp1, sign_reg);
+ sign_reg = _mm_cmpgt_epi16(temp2, zero_8x16b); // sign check
+ temp2 = _mm_and_si128(temp2, sign_reg);
+ sign_reg = _mm_cmpgt_epi16(temp3, zero_8x16b); // sign check
+ temp3 = _mm_and_si128(temp3, sign_reg);
+ sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b); // sign check
+ temp4 = _mm_and_si128(temp4, sign_reg);
+ sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b); // sign check
+ temp5 = _mm_and_si128(temp5, sign_reg);
+ sign_reg = _mm_cmpgt_epi16(temp6, zero_8x16b); // sign check
+ temp6 = _mm_and_si128(temp6, sign_reg);
+ sign_reg = _mm_cmpgt_epi16(temp7, zero_8x16b); // sign check
+ temp7 = _mm_and_si128(temp7, sign_reg);
+ sign_reg = _mm_cmpgt_epi16(temp8, zero_8x16b); // sign check
+ temp8 = _mm_and_si128(temp8, sign_reg);
+
+ temp1 = _mm_packus_epi16(temp1, zero_8x16b);
+ temp2 = _mm_packus_epi16(temp2, zero_8x16b);
+ temp3 = _mm_packus_epi16(temp3, zero_8x16b);
+ temp4 = _mm_packus_epi16(temp4, zero_8x16b);
+ temp5 = _mm_packus_epi16(temp5, zero_8x16b);
+ temp6 = _mm_packus_epi16(temp6, zero_8x16b);
+ temp7 = _mm_packus_epi16(temp7, zero_8x16b);
+ temp8 = _mm_packus_epi16(temp8, zero_8x16b);
+
+ _mm_storel_epi64((__m128i *)(&pu1_out[0]), temp1);
+ _mm_storel_epi64((__m128i *)(&pu1_out[out_strd]), temp2);
+ _mm_storel_epi64((__m128i *)(&pu1_out[2 * out_strd]), temp3);
+ _mm_storel_epi64((__m128i *)(&pu1_out[3 * out_strd]), temp4);
+ _mm_storel_epi64((__m128i *)(&pu1_out[4 * out_strd]), temp5);
+ _mm_storel_epi64((__m128i *)(&pu1_out[5 * out_strd]), temp6);
+ _mm_storel_epi64((__m128i *)(&pu1_out[6 * out_strd]), temp7);
+ _mm_storel_epi64((__m128i *)(&pu1_out[7 * out_strd]), temp8);
+}
+
+/*
+ ********************************************************************************
+ *
+ * @brief This function reconstructs a 4x4 sub block from quantized chroma resiude and
+ * prediction buffer
+ *
+ * @par Description:
+ * The quantized residue is first inverse quantized, then inverse transformed.
+ * This inverse transformed content is added to the prediction buffer to recon-
+ * struct the end output
+ *
+ * @param[in] pi2_src
+ * quantized 4x4 block
+ *
+ * @param[in] pu1_pred
+ * prediction 4x4 block
+ *
+ * @param[out] pu1_out
+ * reconstructed 4x4 block
+ *
+ * @param[in] src_strd
+ * quantization buffer stride
+ *
+ * @param[in] pred_strd,
+ * Prediction buffer stride
+ *
+ * @param[in] out_strd
+ * recon buffer Stride
+ *
+ * @param[in] pu2_scaling_list
+ * pointer to scaling list
+ *
+ * @param[in] pu2_norm_adjust
+ * pointer to inverse scale matrix
+ *
+ * @param[in] u4_qp_div_6
+ * Floor (qp/6)
+ *
+ * @param[in] pi4_tmp
+ * temporary buffer of size 1*16
+ *
+ * @returns none
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+void ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscal_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 u4_qp_div_6,
+ WORD16 *pi2_tmp,
+ WORD16 *pi2_dc_src)
+ {
+ WORD16 q0 = pi2_dc_src[0]; // DC value won't be dequantized for chroma inverse transform
+ WORD16 i_macro = ((q0 + 32) >> 6);
+
+ __m128i pred_r0, pred_r1, pred_r2, pred_r3, sign_reg;
+ __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
+ __m128i chroma_mask = _mm_set1_epi16 (0xFF);
+ __m128i value_add = _mm_set1_epi16(i_macro);
+
+ //Load pred buffer
+ pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits
+
+ pred_r0 = _mm_and_si128(pred_r0, chroma_mask);
+ pred_r1 = _mm_and_si128(pred_r1, chroma_mask);
+ pred_r2 = _mm_and_si128(pred_r2, chroma_mask);
+ pred_r3 = _mm_and_si128(pred_r3, chroma_mask);
+
+ pred_r0 = _mm_unpacklo_epi64(pred_r0, pred_r1); //p00 p01 p02 p03 p10 p11 p12 p13
+ pred_r2 = _mm_unpacklo_epi64(pred_r2, pred_r3); //p20 p21 p22p p23 p30 p31 p32 p33
+
+ pred_r0 = _mm_add_epi16(value_add, pred_r0);
+ pred_r2 = _mm_add_epi16(value_add, pred_r2);
+
+ /*------------------------------------------------------------------*/
+ //Clipping the results to 8 bits
+ sign_reg = _mm_cmpgt_epi16(pred_r0, zero_8x16b); // sign check
+ pred_r0 = _mm_and_si128(pred_r0, sign_reg);
+ sign_reg = _mm_cmpgt_epi16(pred_r2, zero_8x16b);
+ pred_r2 = _mm_and_si128(pred_r2, sign_reg);
+
+ pred_r0 = _mm_packus_epi16(pred_r0, pred_r2);
+ pred_r1 = _mm_srli_si128(pred_r0, 4);
+ pred_r2 = _mm_srli_si128(pred_r1, 4);
+ pred_r3 = _mm_srli_si128(pred_r2, 4);
+
+ pred_r0 = _mm_unpacklo_epi8(pred_r0, zero_8x16b); //p00 p01 p02 p03 -- all 16 bits
+ pred_r1 = _mm_unpacklo_epi8(pred_r1, zero_8x16b); //p10 p11 p12 p13 -- all 16 bits
+ pred_r2 = _mm_unpacklo_epi8(pred_r2, zero_8x16b); //p20 p21 p22 p23 -- all 16 bits
+ pred_r3 = _mm_unpacklo_epi8(pred_r3, zero_8x16b); //p30 p31 p32 p33 -- all 16 bits
+
+ chroma_mask = _mm_unpacklo_epi64(chroma_mask, zero_8x16b); //1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 -- 8 bits
+
+ _mm_maskmoveu_si128(pred_r0, chroma_mask, (char *)(&pu1_out[0]));
+ _mm_maskmoveu_si128(pred_r1, chroma_mask, (char *)(&pu1_out[out_strd]));
+ _mm_maskmoveu_si128(pred_r2, chroma_mask, (char *)(&pu1_out[2*out_strd]));
+ _mm_maskmoveu_si128(pred_r3, chroma_mask, (char *)(&pu1_out[3*out_strd]));
+}
+
+
diff --git a/common/x86/ih264_iquant_itrans_recon_sse42.c b/common/x86/ih264_iquant_itrans_recon_sse42.c
new file mode 100755
index 0000000..2a4ea3f
--- /dev/null
+++ b/common/x86/ih264_iquant_itrans_recon_sse42.c
@@ -0,0 +1,554 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_iquant_itrans_recon_sse42.c
+ *
+ * @brief
+ * Contains function definitions for inverse quantization, inverse
+ * transform and reconstruction
+ *
+ * @author
+ * Mohit [100664]
+ *
+ * @par List of Functions:
+ * - ihevc_iquant_itrans_recon_4x4_sse42()
+ * - ihevc_iquant_itrans_recon_chroma_4x4_sse42()
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_defs.h"
+#include "ih264_trans_macros.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_trans_data.h"
+#include "ih264_size_defs.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include <immintrin.h>
+
+/*
+ ********************************************************************************
+ *
+ * @brief This function reconstructs a 4x4 sub block from quantized resiude and
+ * prediction buffer
+ *
+ * @par Description:
+ * The quantized residue is first inverse quantized, then inverse transformed.
+ * This inverse transformed content is added to the prediction buffer to recon-
+ * struct the end output
+ *
+ * @param[in] pi2_src
+ * quantized 4x4 block
+ *
+ * @param[in] pu1_pred
+ * prediction 4x4 block
+ *
+ * @param[out] pu1_out
+ * reconstructed 4x4 block
+ *
+ * @param[in] src_strd
+ * quantization buffer stride
+ *
+ * @param[in] pred_strd,
+ * Prediction buffer stride
+ *
+ * @param[in] out_strd
+ * recon buffer Stride
+ *
+ * @param[in] pu2_scaling_list
+ * pointer to scaling list
+ *
+ * @param[in] pu2_norm_adjust
+ * pointer to inverse scale matrix
+ *
+ * @param[in] u4_qp_div_6
+ * Floor (qp/6)
+ *
+ * @param[in] pi4_tmp
+ * temporary buffer of size 1*16
+ *
+ * @returns none
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+void ih264_iquant_itrans_recon_4x4_sse42(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscal_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 u4_qp_div_6,
+ WORD16 *pi2_tmp,
+ WORD32 iq_start_idx,
+ WORD16 *pi2_dc_ld_addr)
+ {
+ UWORD32 *pu4_out = (UWORD32 *) pu1_out;
+ __m128i src_r0_r1, src_r2_r3;
+ __m128i src_r0, src_r1, src_r2, src_r3;
+ __m128i scalemat_r0_r1, scalemat_r2_r3;
+ __m128i pred_r0, pred_r1, pred_r2, pred_r3;
+ __m128i sign_reg, dequant_r0_r1, dequant_r2_r3;
+ __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
+ __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+ __m128i resq_r0, resq_r1, resq_r2, resq_r3;
+ __m128i add_rshift = _mm_set1_epi32((1 << (3 - u4_qp_div_6)));
+ __m128i value_32 = _mm_set1_epi32(32);
+
+ /*************************************************************/
+ /* Dequantization of coefficients. Will be replaced by SIMD */
+ /* operations on platform */
+ /*************************************************************/
+ src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row
+ src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row
+ scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat)); //b00 b01 b02 b03 b10 b11 b12 b13 -- the scaling matrix 0th,1st row
+ scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8)); //b20 b21 b22 b23 b30 b31 b32 b33 -- the scaling matrix 2nd,3rd row
+ dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat)); //q00 q01 q02 q03 q10 q11 q12 q13 -- all 16 bits
+ dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8)); //q20 q21 q22 q23 q30 q31 q32 q33 -- all 16 bits
+
+ temp0 = _mm_mullo_epi16(scalemat_r0_r1, dequant_r0_r1); //b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result
+ temp1 = _mm_mullo_epi16(scalemat_r2_r3, dequant_r2_r3); //b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result
+
+ temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long
+ temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long
+ temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long
+ temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long
+
+ src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
+ src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b); // a10 0 a11 0 a12 0 a13 0 -- 16 bit long
+ src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b); // a20 0 a21 0 a22 0 a23 0 -- 16 bit long
+ src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b); // a30 0 a31 0 a32 0 a33 0 -- 16 bit long
+
+ temp4 = _mm_madd_epi16(src_r0, temp4); //a00*b00*q00 a10*b10*q10 a20*b20*q20 a30*b30 q30 -- 32 bits long
+ temp5 = _mm_madd_epi16(src_r1, temp5);
+ temp6 = _mm_madd_epi16(src_r2, temp6);
+ temp7 = _mm_madd_epi16(src_r3, temp7);
+
+ if (u4_qp_div_6 >= 4) {
+ resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4);
+ resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4);
+ resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4);
+ resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4);
+ } else {
+ temp4 = _mm_add_epi32(temp4, add_rshift);
+ temp5 = _mm_add_epi32(temp5, add_rshift);
+ temp6 = _mm_add_epi32(temp6, add_rshift);
+ temp7 = _mm_add_epi32(temp7, add_rshift);
+ resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6);
+ resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6);
+ resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6);
+ resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6);
+ }
+
+ if (iq_start_idx == 1)
+ resq_r0 = _mm_insert_epi32(resq_r0,(WORD32)pi2_dc_ld_addr[0],0);
+ /* Perform Inverse transform */
+ /*-------------------------------------------------------------*/
+ /* IDCT [ Horizontal transformation ] */
+ /*-------------------------------------------------------------*/
+ // Matrix transpose
+ /*
+ * a0 a1 a2 a3
+ * b0 b1 b2 b3
+ * c0 c1 c2 c3
+ * d0 d1 d2 d3
+ */
+ temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); //a0 b0 a1 b1
+ temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); //c0 d0 c1 d1
+ temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); //a2 b2 a3 b3
+ temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); //c2 d2 c3 d3
+ resq_r0 = _mm_unpacklo_epi64(temp1, temp3); //a0 b0 c0 d0
+ resq_r1 = _mm_unpackhi_epi64(temp1, temp3); //a1 b1 c1 d1
+ resq_r2 = _mm_unpacklo_epi64(temp2, temp4); //a2 b2 c2 d2
+ resq_r3 = _mm_unpackhi_epi64(temp2, temp4); //a3 b3 c3 d3
+ //Transform starts -- horizontal transform
+ /*------------------------------------------------------------------*/
+ /* z0 = w0 + w2 */
+ temp0 = _mm_add_epi32(resq_r0, resq_r2);
+ /* z1 = w0 - w2 */
+ temp1 = _mm_sub_epi32(resq_r0, resq_r2);
+ /* z2 = (w1 >> 1) - w3 */
+ temp2 = _mm_srai_epi32(resq_r1, 1); //(w1>>1)
+ temp2 = _mm_sub_epi32(temp2, resq_r3); //(w1>>1) - w3
+ /* z3 = w1 + (w3 >> 1) */
+ temp3 = _mm_srai_epi32(resq_r3, 1); //(w3>>1) + w1
+ temp3 = _mm_add_epi32(temp3, resq_r1);
+ /*----------------------------------------------------------*/
+ /* x0 = z0 + z3 */
+ resq_r0 = _mm_add_epi32(temp0, temp3);
+ /* x1 = z1 + z2 */
+ resq_r1 = _mm_add_epi32(temp1, temp2);
+ /* x2 = z1 - z2 */
+ resq_r2 = _mm_sub_epi32(temp1, temp2);
+ /* x3 = z0 - z3 */
+ resq_r3 = _mm_sub_epi32(temp0, temp3);
+ // Matrix transpose
+ /*
+ * a0 b0 c0 d0
+ * a1 b1 c1 d1
+ * a2 b2 c2 d2
+ * a3 b3 c3 d3
+ */
+ temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); //a0 a1 b0 b1
+ temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); //a2 a3 b2 b3
+ temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); //c0 c1 d0 d1
+ temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); //c2 c3 d2 d3
+ resq_r0 = _mm_unpacklo_epi64(temp1, temp3); //a0 a1 a2 a3
+ resq_r1 = _mm_unpackhi_epi64(temp1, temp3); //b0 b1 b2 b3
+ resq_r2 = _mm_unpacklo_epi64(temp2, temp4); //c0 c1 c2 c3
+ resq_r3 = _mm_unpackhi_epi64(temp2, temp4); //d0 d1 d2 d3
+ //Transform ends -- horizontal transform
+
+ //Load pred buffer
+ pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits
+
+ pred_r0 = _mm_cvtepu8_epi32(pred_r0); //p00 p01 p02 p03 -- all 32 bits
+ pred_r1 = _mm_cvtepu8_epi32(pred_r1); //p10 p11 p12 p13 -- all 32 bits
+ pred_r2 = _mm_cvtepu8_epi32(pred_r2); //p20 p21 p22 p23 -- all 32 bits
+ pred_r3 = _mm_cvtepu8_epi32(pred_r3); //p30 p31 p32 p33 -- all 32 bits
+
+ /*--------------------------------------------------------------*/
+ /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */
+ /* */
+ /* Add the prediction and store it back to same buffer */
+ /*--------------------------------------------------------------*/
+ /* z0j = y0j + y2j */
+ temp0 = _mm_add_epi32(resq_r0, resq_r2);
+ /* z1j = y0j - y2j */
+ temp1 = _mm_sub_epi32(resq_r0, resq_r2);
+ /* z2j = (y1j>>1) - y3j */
+ temp2 = _mm_srai_epi32(resq_r1, 1); //(y1j>>1)
+ temp2 = _mm_sub_epi32(temp2, resq_r3);
+ /* z3j = y1j + (y3j>>1) */
+ temp3 = _mm_srai_epi32(resq_r3, 1); //(y3j>>1)
+ temp3 = _mm_add_epi32(temp3, resq_r1);
+
+ /* x0j = z0j + z3j */
+ temp4 = _mm_add_epi32(temp0, temp3);
+ temp4 = _mm_add_epi32(temp4, value_32);
+ temp4 = _mm_srai_epi32(temp4, 6);
+ temp4 = _mm_add_epi32(temp4, pred_r0);
+ /* x1j = z1j + z2j */
+ temp5 = _mm_add_epi32(temp1, temp2);
+ temp5 = _mm_add_epi32(temp5, value_32);
+ temp5 = _mm_srai_epi32(temp5, 6);
+ temp5 = _mm_add_epi32(temp5, pred_r1);
+ /* x2j = z1j - z2j */
+ temp6 = _mm_sub_epi32(temp1, temp2);
+ temp6 = _mm_add_epi32(temp6, value_32);
+ temp6 = _mm_srai_epi32(temp6, 6);
+ temp6 = _mm_add_epi32(temp6, pred_r2);
+ /* x3j = z0j - z3j */
+ temp7 = _mm_sub_epi32(temp0, temp3);
+ temp7 = _mm_add_epi32(temp7, value_32);
+ temp7 = _mm_srai_epi32(temp7, 6);
+ temp7 = _mm_add_epi32(temp7, pred_r3);
+
+ // 32-bit to 16-bit conversion
+ temp0 = _mm_packs_epi32(temp4, temp5);
+ temp1 = _mm_packs_epi32(temp6, temp7);
+ /*------------------------------------------------------------------*/
+ //Clipping the results to 8 bits
+ sign_reg = _mm_cmpgt_epi16(temp0, zero_8x16b); // sign check
+ temp0 = _mm_and_si128(temp0, sign_reg);
+ sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b);
+ temp1 = _mm_and_si128(temp1, sign_reg);
+
+ resq_r0 = _mm_packus_epi16(temp0, temp1);
+ resq_r1 = _mm_srli_si128(resq_r0, 4);
+ resq_r2 = _mm_srli_si128(resq_r1, 4);
+ resq_r3 = _mm_srli_si128(resq_r2, 4);
+
+ *pu4_out = _mm_cvtsi128_si32(resq_r0);
+ pu1_out += out_strd;
+ pu4_out = (UWORD32 *) (pu1_out);
+ *(pu4_out) = _mm_cvtsi128_si32(resq_r1);
+ pu1_out += out_strd;
+ pu4_out = (UWORD32 *) (pu1_out);
+ *(pu4_out) = _mm_cvtsi128_si32(resq_r2);
+ pu1_out += out_strd;
+ pu4_out = (UWORD32 *) (pu1_out);
+ *(pu4_out) = _mm_cvtsi128_si32(resq_r3);
+}
+
+/*
+ ********************************************************************************
+ *
+ * @brief This function reconstructs a 4x4 sub block from quantized chroma resiude and
+ * prediction buffer
+ *
+ * @par Description:
+ * The quantized residue is first inverse quantized, then inverse transformed.
+ * This inverse transformed content is added to the prediction buffer to recon-
+ * struct the end output
+ *
+ * @param[in] pi2_src
+ * quantized 4x4 block
+ *
+ * @param[in] pu1_pred
+ * prediction 4x4 block
+ *
+ * @param[out] pu1_out
+ * reconstructed 4x4 block
+ *
+ * @param[in] src_strd
+ * quantization buffer stride
+ *
+ * @param[in] pred_strd,
+ * Prediction buffer stride
+ *
+ * @param[in] out_strd
+ * recon buffer Stride
+ *
+ * @param[in] pu2_scaling_list
+ * pointer to scaling list
+ *
+ * @param[in] pu2_norm_adjust
+ * pointer to inverse scale matrix
+ *
+ * @param[in] u4_qp_div_6
+ * Floor (qp/6)
+ *
+ * @param[in] pi4_tmp
+ * temporary buffer of size 1*16
+ *
+ * @returns none
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+void ih264_iquant_itrans_recon_chroma_4x4_sse42(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscal_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 u4_qp_div_6,
+ WORD16 *pi2_tmp,
+ WORD16 *pi2_dc_ld_addr)
+ {
+ __m128i src_r0_r1, src_r2_r3;
+ __m128i src_r0, src_r1, src_r2, src_r3;
+ __m128i scalemat_r0_r1, scalemat_r2_r3;
+ __m128i pred_r0, pred_r1, pred_r2, pred_r3;
+ __m128i sign_reg, dequant_r0_r1, dequant_r2_r3;
+ __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
+ __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+ __m128i resq_r0, resq_r1, resq_r2, resq_r3;
+ __m128i add_rshift = _mm_set1_epi32((1 << (3 - u4_qp_div_6)));
+ __m128i value_32 = _mm_set1_epi32(32);
+ __m128i chroma_mask = _mm_set1_epi16 (0xFF);
+ /*************************************************************/
+ /* Dequantization of coefficients. Will be replaced by SIMD */
+ /* operations on platform */
+ /*************************************************************/
+ src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row
+ src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row
+ scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat)); //b00 b01 b02 b03 b10 b11 b12 b13 -- the scaling matrix 0th,1st row
+ scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8)); //b20 b21 b22 b23 b30 b31 b32 b33 -- the scaling matrix 2nd,3rd row
+ dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat)); //q00 q01 q02 q03 q10 q11 q12 q13 -- all 16 bits
+ dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8)); //q20 q21 q22 q23 q30 q31 q32 q33 -- all 16 bits
+
+ temp0 = _mm_mullo_epi16(scalemat_r0_r1, dequant_r0_r1); //b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result
+ temp1 = _mm_mullo_epi16(scalemat_r2_r3, dequant_r2_r3); //b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result
+
+ temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long
+ temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long
+ temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long
+ temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long
+
+ src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
+ src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b); // a10 0 a11 0 a12 0 a13 0 -- 16 bit long
+ src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b); // a20 0 a21 0 a22 0 a23 0 -- 16 bit long
+ src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b); // a30 0 a31 0 a32 0 a33 0 -- 16 bit long
+
+ temp4 = _mm_madd_epi16(src_r0, temp4); //a00*b00*q00 a10*b10*q10 a20*b20*q20 a30*b30 q30 -- 32 bits long
+ temp5 = _mm_madd_epi16(src_r1, temp5);
+ temp6 = _mm_madd_epi16(src_r2, temp6);
+ temp7 = _mm_madd_epi16(src_r3, temp7);
+
+ if (u4_qp_div_6 >= 4) {
+ resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4);
+ resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4);
+ resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4);
+ resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4);
+ } else {
+ temp4 = _mm_add_epi32(temp4, add_rshift);
+ temp5 = _mm_add_epi32(temp5, add_rshift);
+ temp6 = _mm_add_epi32(temp6, add_rshift);
+ temp7 = _mm_add_epi32(temp7, add_rshift);
+ resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6);
+ resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6);
+ resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6);
+ resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6);
+ }
+
+ resq_r0 = _mm_insert_epi32(resq_r0,(WORD32)pi2_dc_ld_addr[0],0);
+ /* Perform Inverse transform */
+ /*-------------------------------------------------------------*/
+ /* IDCT [ Horizontal transformation ] */
+ /*-------------------------------------------------------------*/
+ // Matrix transpose
+ /*
+ * a0 a1 a2 a3
+ * b0 b1 b2 b3
+ * c0 c1 c2 c3
+ * d0 d1 d2 d3
+ */
+ temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); //a0 b0 a1 b1
+ temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); //c0 d0 c1 d1
+ temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); //a2 b2 a3 b3
+ temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); //c2 d2 c3 d3
+ resq_r0 = _mm_unpacklo_epi64(temp1, temp3); //a0 b0 c0 d0
+ resq_r1 = _mm_unpackhi_epi64(temp1, temp3); //a1 b1 c1 d1
+ resq_r2 = _mm_unpacklo_epi64(temp2, temp4); //a2 b2 c2 d2
+ resq_r3 = _mm_unpackhi_epi64(temp2, temp4); //a3 b3 c3 d3
+ //Transform starts -- horizontal transform
+ /*------------------------------------------------------------------*/
+ /* z0 = w0 + w2 */
+ temp0 = _mm_add_epi32(resq_r0, resq_r2);
+ /* z1 = w0 - w2 */
+ temp1 = _mm_sub_epi32(resq_r0, resq_r2);
+ /* z2 = (w1 >> 1) - w3 */
+ temp2 = _mm_srai_epi32(resq_r1, 1); //(w1>>1)
+ temp2 = _mm_sub_epi32(temp2, resq_r3); //(w1>>1) - w3
+ /* z3 = w1 + (w3 >> 1) */
+ temp3 = _mm_srai_epi32(resq_r3, 1); //(w3>>1) + w1
+ temp3 = _mm_add_epi32(temp3, resq_r1);
+ /*----------------------------------------------------------*/
+ /* x0 = z0 + z3 */
+ resq_r0 = _mm_add_epi32(temp0, temp3);
+ /* x1 = z1 + z2 */
+ resq_r1 = _mm_add_epi32(temp1, temp2);
+ /* x2 = z1 - z2 */
+ resq_r2 = _mm_sub_epi32(temp1, temp2);
+ /* x3 = z0 - z3 */
+ resq_r3 = _mm_sub_epi32(temp0, temp3);
+ // Matrix transpose
+ /*
+ * a0 b0 c0 d0
+ * a1 b1 c1 d1
+ * a2 b2 c2 d2
+ * a3 b3 c3 d3
+ */
+ temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); //a0 a1 b0 b1
+ temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); //a2 a3 b2 b3
+ temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); //c0 c1 d0 d1
+ temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); //c2 c3 d2 d3
+ resq_r0 = _mm_unpacklo_epi64(temp1, temp3); //a0 a1 a2 a3
+ resq_r1 = _mm_unpackhi_epi64(temp1, temp3); //b0 b1 b2 b3
+ resq_r2 = _mm_unpacklo_epi64(temp2, temp4); //c0 c1 c2 c3
+ resq_r3 = _mm_unpackhi_epi64(temp2, temp4); //d0 d1 d2 d3
+ //Transform ends -- horizontal transform
+
+ //Load pred buffer
+ pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits
+
+ pred_r0 = _mm_and_si128(pred_r0, chroma_mask);
+ pred_r1 = _mm_and_si128(pred_r1, chroma_mask);
+ pred_r2 = _mm_and_si128(pred_r2, chroma_mask);
+ pred_r3 = _mm_and_si128(pred_r3, chroma_mask);
+
+ pred_r0 = _mm_cvtepu16_epi32(pred_r0); //p00 p01 p02 p03 -- all 32 bits
+ pred_r1 = _mm_cvtepu16_epi32(pred_r1); //p10 p11 p12 p13 -- all 32 bits
+ pred_r2 = _mm_cvtepu16_epi32(pred_r2); //p20 p21 p22 p23 -- all 32 bits
+ pred_r3 = _mm_cvtepu16_epi32(pred_r3); //p30 p31 p32 p33 -- all 32 bits
+
+ /*--------------------------------------------------------------*/
+ /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */
+ /* */
+ /* Add the prediction and store it back to same buffer */
+ /*--------------------------------------------------------------*/
+ /* z0j = y0j + y2j */
+ temp0 = _mm_add_epi32(resq_r0, resq_r2);
+ /* z1j = y0j - y2j */
+ temp1 = _mm_sub_epi32(resq_r0, resq_r2);
+ /* z2j = (y1j>>1) - y3j */
+ temp2 = _mm_srai_epi32(resq_r1, 1); //(y1j>>1)
+ temp2 = _mm_sub_epi32(temp2, resq_r3);
+ /* z3j = y1j + (y3j>>1) */
+ temp3 = _mm_srai_epi32(resq_r3, 1); //(y3j>>1)
+ temp3 = _mm_add_epi32(temp3, resq_r1);
+
+ /* x0j = z0j + z3j */
+ temp4 = _mm_add_epi32(temp0, temp3);
+ temp4 = _mm_add_epi32(temp4, value_32);
+ temp4 = _mm_srai_epi32(temp4, 6);
+ temp4 = _mm_add_epi32(temp4, pred_r0);
+ /* x1j = z1j + z2j */
+ temp5 = _mm_add_epi32(temp1, temp2);
+ temp5 = _mm_add_epi32(temp5, value_32);
+ temp5 = _mm_srai_epi32(temp5, 6);
+ temp5 = _mm_add_epi32(temp5, pred_r1);
+ /* x2j = z1j - z2j */
+ temp6 = _mm_sub_epi32(temp1, temp2);
+ temp6 = _mm_add_epi32(temp6, value_32);
+ temp6 = _mm_srai_epi32(temp6, 6);
+ temp6 = _mm_add_epi32(temp6, pred_r2);
+ /* x3j = z0j - z3j */
+ temp7 = _mm_sub_epi32(temp0, temp3);
+ temp7 = _mm_add_epi32(temp7, value_32);
+ temp7 = _mm_srai_epi32(temp7, 6);
+ temp7 = _mm_add_epi32(temp7, pred_r3);
+
+ // 32-bit to 16-bit conversion
+ temp0 = _mm_packs_epi32(temp4, temp5);
+ temp1 = _mm_packs_epi32(temp6, temp7);
+ /*------------------------------------------------------------------*/
+ //Clipping the results to 8 bits
+ sign_reg = _mm_cmpgt_epi16(temp0, zero_8x16b); // sign check
+ temp0 = _mm_and_si128(temp0, sign_reg);
+ sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b);
+ temp1 = _mm_and_si128(temp1, sign_reg);
+
+ resq_r0 = _mm_packus_epi16(temp0, temp1);
+ resq_r1 = _mm_srli_si128(resq_r0, 4);
+ resq_r2 = _mm_srli_si128(resq_r1, 4);
+ resq_r3 = _mm_srli_si128(resq_r2, 4);
+
+ resq_r0 = _mm_cvtepu8_epi16(resq_r0); //p00 p01 p02 p03 -- all 16 bits
+ resq_r1 = _mm_cvtepu8_epi16(resq_r1); //p10 p11 p12 p13 -- all 16 bits
+ resq_r2 = _mm_cvtepu8_epi16(resq_r2); //p20 p21 p22 p23 -- all 16 bits
+ resq_r3 = _mm_cvtepu8_epi16(resq_r3); //p30 p31 p32 p33 -- all 16 bits
+
+ chroma_mask = _mm_unpacklo_epi64(chroma_mask, zero_8x16b);
+
+ _mm_maskmoveu_si128(resq_r0, chroma_mask, (char *)(&pu1_out[0]));
+ _mm_maskmoveu_si128(resq_r1, chroma_mask, (char *)(&pu1_out[out_strd]));
+ _mm_maskmoveu_si128(resq_r2, chroma_mask, (char *)(&pu1_out[2*out_strd]));
+ _mm_maskmoveu_si128(resq_r3, chroma_mask, (char *)(&pu1_out[3*out_strd]));
+}
diff --git a/common/x86/ih264_iquant_itrans_recon_ssse3.c b/common/x86/ih264_iquant_itrans_recon_ssse3.c
new file mode 100755
index 0000000..ca1397e
--- /dev/null
+++ b/common/x86/ih264_iquant_itrans_recon_ssse3.c
@@ -0,0 +1,1035 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_iquant_itrans_recon_ssse3.c
+ *
+ * @brief
+ * Contains function definitions for inverse quantization, inverse
+ * transform and reconstruction
+ *
+ * @author
+ * Mohit [100664]
+ *
+ * @par List of Functions:
+ * - ihevc_iquant_itrans_recon_4x4_ssse3()
+ * - ihevc_iquant_itrans_recon_8x8_ssse3()
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_defs.h"
+#include "ih264_trans_macros.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_trans_data.h"
+#include "ih264_size_defs.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include <immintrin.h>
+
+/*
+ ********************************************************************************
+ *
+ * @brief This function reconstructs a 4x4 sub block from quantized resiude and
+ * prediction buffer
+ *
+ * @par Description:
+ * The quantized residue is first inverse quantized, then inverse transformed.
+ * This inverse transformed content is added to the prediction buffer to recon-
+ * struct the end output
+ *
+ * @param[in] pi2_src
+ * quantized 4x4 block
+ *
+ * @param[in] pu1_pred
+ * prediction 4x4 block
+ *
+ * @param[out] pu1_out
+ * reconstructed 4x4 block
+ *
+ * @param[in] src_strd
+ * quantization buffer stride
+ *
+ * @param[in] pred_strd,
+ * Prediction buffer stride
+ *
+ * @param[in] out_strd
+ * recon buffer Stride
+ *
+ * @param[in] pu2_scaling_list
+ * pointer to scaling list
+ *
+ * @param[in] pu2_norm_adjust
+ * pointer to inverse scale matrix
+ *
+ * @param[in] u4_qp_div_6
+ * Floor (qp/6)
+ *
+ * @param[in] pi4_tmp
+ * temporary buffer of size 1*16
+ *
+ * @returns none
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+void ih264_iquant_itrans_recon_4x4_ssse3(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscal_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 u4_qp_div_6,
+ WORD16 *pi2_tmp,
+ WORD32 iq_start_idx,
+ WORD16 *pi2_dc_ld_addr)
+{
+ UWORD32 *pu4_out = (UWORD32 *) pu1_out;
+ __m128i src_r0_r1, src_r2_r3;
+ __m128i src_r0, src_r1, src_r2, src_r3;
+ __m128i scalemat_r0_r1, scalemat_r2_r3, predload_r;
+ __m128i pred_r0, pred_r1, pred_r2, pred_r3;
+ __m128i sign_reg, dequant_r0_r1, dequant_r2_r3;
+ __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
+ __m128i temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+ __m128i resq_r0, resq_r1, resq_r2, resq_r3;
+ __m128i add_rshift = _mm_set1_epi32((1 << (3 - u4_qp_div_6)));
+ __m128i value_32 = _mm_set1_epi32(32);
+
+ /*************************************************************/
+ /* Dequantization of coefficients. Will be replaced by SIMD */
+ /* operations on platform */
+ /*************************************************************/
+ src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row
+ src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row
+ scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat)); //b00 b01 b02 b03 b10 b11 b12 b13 -- the scaling matrix 0th,1st row
+ scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_iscal_mat + 8)); //b20 b21 b22 b23 b30 b31 b32 b33 -- the scaling matrix 2nd,3rd row
+ dequant_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat)); //q00 q01 q02 q03 q10 q11 q12 q13 -- all 16 bits
+ dequant_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_weigh_mat + 8)); //q20 q21 q22 q23 q30 q31 q32 q33 -- all 16 bits
+
+ temp0 = _mm_mullo_epi16(scalemat_r0_r1, dequant_r0_r1); //b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result
+ temp1 = _mm_mullo_epi16(scalemat_r2_r3, dequant_r2_r3); //b00*q00 b01*q01 b02*q02 b03*q03 b10*q10 b11*q11 b12*q12 b13*q13 -- 16 bit result
+
+ temp4 = _mm_unpacklo_epi16(temp0, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long
+ temp5 = _mm_unpackhi_epi16(temp0, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long
+ temp6 = _mm_unpacklo_epi16(temp1, zero_8x16b); // b00*q00 0 b01*q01 0 b02*q02 0 b03*q03 0 -- 16 bit long
+ temp7 = _mm_unpackhi_epi16(temp1, zero_8x16b); // b10*q10 0 b11*q11 0 b12*q12 0 b13*q13 0 -- 16 bit long
+
+ src_r0 = _mm_unpacklo_epi16(src_r0_r1, zero_8x16b); // a00 0 a01 0 a02 0 a03 0 -- 16 bit long
+ src_r1 = _mm_unpackhi_epi16(src_r0_r1, zero_8x16b); // a10 0 a11 0 a12 0 a13 0 -- 16 bit long
+ src_r2 = _mm_unpacklo_epi16(src_r2_r3, zero_8x16b); // a20 0 a21 0 a22 0 a23 0 -- 16 bit long
+ src_r3 = _mm_unpackhi_epi16(src_r2_r3, zero_8x16b); // a30 0 a31 0 a32 0 a33 0 -- 16 bit long
+
+ temp4 = _mm_madd_epi16(src_r0, temp4); //a00*b00*q00 a10*b10*q10 a20*b20*q20 a30*b30 q30 -- 32 bits long
+ temp5 = _mm_madd_epi16(src_r1, temp5);
+ temp6 = _mm_madd_epi16(src_r2, temp6);
+ temp7 = _mm_madd_epi16(src_r3, temp7);
+
+ if (u4_qp_div_6 >= 4) {
+ resq_r0 = _mm_slli_epi32(temp4, u4_qp_div_6 - 4);
+ resq_r1 = _mm_slli_epi32(temp5, u4_qp_div_6 - 4);
+ resq_r2 = _mm_slli_epi32(temp6, u4_qp_div_6 - 4);
+ resq_r3 = _mm_slli_epi32(temp7, u4_qp_div_6 - 4);
+ } else {
+ temp4 = _mm_add_epi32(temp4, add_rshift);
+ temp5 = _mm_add_epi32(temp5, add_rshift);
+ temp6 = _mm_add_epi32(temp6, add_rshift);
+ temp7 = _mm_add_epi32(temp7, add_rshift);
+ resq_r0 = _mm_srai_epi32(temp4, 4 - u4_qp_div_6);
+ resq_r1 = _mm_srai_epi32(temp5, 4 - u4_qp_div_6);
+ resq_r2 = _mm_srai_epi32(temp6, 4 - u4_qp_div_6);
+ resq_r3 = _mm_srai_epi32(temp7, 4 - u4_qp_div_6);
+ }
+
+ if (iq_start_idx == 1)
+ {
+ resq_r0 = _mm_insert_epi16(resq_r0,(WORD32)pi2_src[0],0);
+ if (pi2_src[0] >= 0)
+ resq_r0 = _mm_insert_epi16(resq_r0,0,1);
+ else
+ resq_r0 = _mm_insert_epi16(resq_r0,-1,1);
+ }
+ /* Perform Inverse transform */
+ /*-------------------------------------------------------------*/
+ /* IDCT [ Horizontal transformation ] */
+ /*-------------------------------------------------------------*/
+ // Matrix transpose
+ /*
+ * a0 a1 a2 a3
+ * b0 b1 b2 b3
+ * c0 c1 c2 c3
+ * d0 d1 d2 d3
+ */
+ temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); //a0 b0 a1 b1
+ temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); //c0 d0 c1 d1
+ temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); //a2 b2 a3 b3
+ temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); //c2 d2 c3 d3
+ resq_r0 = _mm_unpacklo_epi64(temp1, temp3); //a0 b0 c0 d0
+ resq_r1 = _mm_unpackhi_epi64(temp1, temp3); //a1 b1 c1 d1
+ resq_r2 = _mm_unpacklo_epi64(temp2, temp4); //a2 b2 c2 d2
+ resq_r3 = _mm_unpackhi_epi64(temp2, temp4); //a3 b3 c3 d3
+ //Transform starts -- horizontal transform
+ /*------------------------------------------------------------------*/
+ /* z0 = w0 + w2 */
+ temp0 = _mm_add_epi32(resq_r0, resq_r2);
+ /* z1 = w0 - w2 */
+ temp1 = _mm_sub_epi32(resq_r0, resq_r2);
+ /* z2 = (w1 >> 1) - w3 */
+ temp2 = _mm_srai_epi32(resq_r1, 1); //(w1>>1)
+ temp2 = _mm_sub_epi32(temp2, resq_r3); //(w1>>1) - w3
+ /* z3 = w1 + (w3 >> 1) */
+ temp3 = _mm_srai_epi32(resq_r3, 1); //(w3>>1) + w1
+ temp3 = _mm_add_epi32(temp3, resq_r1);
+ /*----------------------------------------------------------*/
+ /* x0 = z0 + z3 */
+ resq_r0 = _mm_add_epi32(temp0, temp3);
+ /* x1 = z1 + z2 */
+ resq_r1 = _mm_add_epi32(temp1, temp2);
+ /* x2 = z1 - z2 */
+ resq_r2 = _mm_sub_epi32(temp1, temp2);
+ /* x3 = z0 - z3 */
+ resq_r3 = _mm_sub_epi32(temp0, temp3);
+ // Matrix transpose
+ /*
+ * a0 b0 c0 d0
+ * a1 b1 c1 d1
+ * a2 b2 c2 d2
+ * a3 b3 c3 d3
+ */
+ temp1 = _mm_unpacklo_epi32(resq_r0, resq_r1); //a0 a1 b0 b1
+ temp3 = _mm_unpacklo_epi32(resq_r2, resq_r3); //a2 a3 b2 b3
+ temp2 = _mm_unpackhi_epi32(resq_r0, resq_r1); //c0 c1 d0 d1
+ temp4 = _mm_unpackhi_epi32(resq_r2, resq_r3); //c2 c3 d2 d3
+ resq_r0 = _mm_unpacklo_epi64(temp1, temp3); //a0 a1 a2 a3
+ resq_r1 = _mm_unpackhi_epi64(temp1, temp3); //b0 b1 b2 b3
+ resq_r2 = _mm_unpacklo_epi64(temp2, temp4); //c0 c1 c2 c3
+ resq_r3 = _mm_unpackhi_epi64(temp2, temp4); //d0 d1 d2 d3
+ //Transform ends -- horizontal transform
+
+ zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
+ //Load pred buffer
+ predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r0 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p00 p01 p02 p03 0 0 0 0 -- all 16 bits
+
+ predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p10 p11 p12 p13 0 0 0 0 -- all 16 bits
+
+ predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r2 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p20 p21 p22 p23 0 0 0 0 -- all 16 bits
+
+ predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r3 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p30 p31 p32 p33 0 0 0 0 -- all 16 bits
+ pred_r0 = _mm_unpacklo_epi16(pred_r0, zero_8x16b); //p00 p01 p02 p03 -- 32 bits sign extended
+ pred_r1 = _mm_unpacklo_epi16(pred_r1, zero_8x16b); //p10 p11 p12 p13 -- 32 bits sign extended
+ pred_r2 = _mm_unpacklo_epi16(pred_r2, zero_8x16b); //p20 p21 p22 p23 -- 32 bits sign extended
+ pred_r3 = _mm_unpacklo_epi16(pred_r3, zero_8x16b); //p30 p31 p32 p33 -- 32 bits sign extended
+
+ /*--------------------------------------------------------------*/
+ /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */
+ /* */
+ /* Add the prediction and store it back to same buffer */
+ /*--------------------------------------------------------------*/
+ /* z0j = y0j + y2j */
+ temp0 = _mm_add_epi32(resq_r0, resq_r2);
+ /* z1j = y0j - y2j */
+ temp1 = _mm_sub_epi32(resq_r0, resq_r2);
+ /* z2j = (y1j>>1) - y3j */
+ temp2 = _mm_srai_epi32(resq_r1, 1); //(y1j>>1)
+ temp2 = _mm_sub_epi32(temp2, resq_r3);
+ /* z3j = y1j + (y3j>>1) */
+ temp3 = _mm_srai_epi32(resq_r3, 1); //(y3j>>1)
+ temp3 = _mm_add_epi32(temp3, resq_r1);
+
+ /* x0j = z0j + z3j */
+ temp4 = _mm_add_epi32(temp0, temp3);
+ temp4 = _mm_add_epi32(temp4, value_32);
+ temp4 = _mm_srai_epi32(temp4, 6);
+ temp4 = _mm_add_epi32(temp4, pred_r0);
+ /* x1j = z1j + z2j */
+ temp5 = _mm_add_epi32(temp1, temp2);
+ temp5 = _mm_add_epi32(temp5, value_32);
+ temp5 = _mm_srai_epi32(temp5, 6);
+ temp5 = _mm_add_epi32(temp5, pred_r1);
+ /* x2j = z1j - z2j */
+ temp6 = _mm_sub_epi32(temp1, temp2);
+ temp6 = _mm_add_epi32(temp6, value_32);
+ temp6 = _mm_srai_epi32(temp6, 6);
+ temp6 = _mm_add_epi32(temp6, pred_r2);
+ /* x3j = z0j - z3j */
+ temp7 = _mm_sub_epi32(temp0, temp3);
+ temp7 = _mm_add_epi32(temp7, value_32);
+ temp7 = _mm_srai_epi32(temp7, 6);
+ temp7 = _mm_add_epi32(temp7, pred_r3);
+
+ // 32-bit to 16-bit conversion
+ temp0 = _mm_packs_epi32(temp4, temp5);
+ temp1 = _mm_packs_epi32(temp6, temp7);
+ /*------------------------------------------------------------------*/
+ //Clipping the results to 8 bits
+ sign_reg = _mm_cmpgt_epi16(temp0, zero_8x16b); // sign check
+ temp0 = _mm_and_si128(temp0, sign_reg);
+ sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b);
+ temp1 = _mm_and_si128(temp1, sign_reg);
+
+ resq_r0 = _mm_packus_epi16(temp0, temp1);
+ resq_r1 = _mm_srli_si128(resq_r0, 4);
+ resq_r2 = _mm_srli_si128(resq_r1, 4);
+ resq_r3 = _mm_srli_si128(resq_r2, 4);
+
+ *pu4_out = _mm_cvtsi128_si32(resq_r0);
+ pu1_out += out_strd;
+ pu4_out = (UWORD32 *) (pu1_out);
+ *(pu4_out) = _mm_cvtsi128_si32(resq_r1);
+ pu1_out += out_strd;
+ pu4_out = (UWORD32 *) (pu1_out);
+ *(pu4_out) = _mm_cvtsi128_si32(resq_r2);
+ pu1_out += out_strd;
+ pu4_out = (UWORD32 *) (pu1_out);
+ *(pu4_out) = _mm_cvtsi128_si32(resq_r3);
+}
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs inverse quant and Inverse transform type Ci4 for 8x8 block
+ *
+ * @par Description:
+ * Performs inverse transform Ci8 and adds the residue to get the
+ * reconstructed block
+ *
+ * @param[in] pi2_src
+ * Input 8x8coefficients
+ *
+ * @param[in] pu1_pred
+ * Prediction 8x8 block
+ *
+ * @param[out] pu1_recon
+ * Output 8x8 block
+ *
+ * @param[in] q_div
+ * QP/6
+ *
+ * @param[in] q_rem
+ * QP%6
+ *
+ * @param[in] q_lev
+ * Quantizer level
+ *
+ * @param[in] u4_src_stride
+ * Input stride
+ *
+ * @param[in] u4_pred_stride,
+ * Prediction stride
+ *
+ * @param[in] u4_out_stride
+ * Output Stride
+ *
+ * @param[in] pi4_tmp
+ * temporary buffer of size 1*64
+ * the tmp for each block
+ *
+ * @param[in] pu4_iquant_mat
+ * Pointer to the inverse quantization matrix
+ *
+ * @returns Void
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+void ih264_iquant_itrans_recon_8x8_ssse3(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscale_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 qp_div,
+ WORD16 *pi2_tmp,
+ WORD32 iq_start_idx,
+ WORD16 *pi2_dc_ld_addr)
+{
+ __m128i src_r0;
+ __m128i scalemat_r0;
+ __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
+ // __m128i one_8x16b = _mm_set1_epi8(255); // all bits set to 1
+ // __m128i one_zero_mask = _mm_unpacklo_epi16(one_8x16b, zero_8x16b); // 1 0 1 0 1 0 1 0 --- 16 bits size
+ __m128i value_32 = _mm_set1_epi32(32);
+ __m128i add_rshift = _mm_set1_epi32((1 << (5 - qp_div)));
+ __m128i dequant_r0;
+ __m128i predload_r;
+ __m128i pred_r0_1, pred_r1_1, pred_r2_1, pred_r3_1, pred_r4_1, pred_r5_1,
+ pred_r6_1, pred_r7_1;
+ __m128i sign_reg;
+ __m128i src_r0_1, src_r0_2;
+ __m128i scalemat_r0_1, scalemat_r0_2;
+ __m128i temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+ __m128i temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17,
+ temp18, temp19, temp20;
+ // To store dequantization results
+ __m128i resq_r0_1, resq_r0_2, resq_r1_1, resq_r1_2, resq_r2_1, resq_r2_2,
+ resq_r3_1, resq_r3_2, resq_r4_1, resq_r4_2, resq_r5_1, resq_r5_2,
+ resq_r6_1, resq_r6_2, resq_r7_1, resq_r7_2;
+
+ /*************************************************************/
+ /* Dequantization of coefficients. Will be replaced by SIMD */
+ /* operations on platform. Note : DC coeff is not scaled */
+ /*************************************************************/
+
+ // Row 0 processing
+ src_r0 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a04 a05 a06 a07 -- the source matrix 0th row
+ scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat)); //b00 b01 b02 b03 b04 b05 b06 b07 -- the scaling matrix 0th row
+ dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[0])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
+ src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long
+ src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
+ temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
+ scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
+ scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
+
+ temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
+ temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
+
+ if (qp_div >= 6) {
+ resq_r0_1 = _mm_slli_epi32(temp5, qp_div - 6);
+ resq_r0_2 = _mm_slli_epi32(temp7, qp_div - 6);
+ } else {
+ temp5 = _mm_add_epi32(temp5, add_rshift);
+ temp7 = _mm_add_epi32(temp7, add_rshift);
+ resq_r0_1 = _mm_srai_epi32(temp5, 6 - qp_div);
+ resq_r0_2 = _mm_srai_epi32(temp7, 6 - qp_div);
+ }
+ resq_r0_1 = _mm_packs_epi32(resq_r0_1, resq_r0_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long
+ // Row 1 processing
+ src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 1st row
+ scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 8)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 1st row
+ dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[8])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
+ src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long
+ src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
+ temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
+ scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
+ scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
+ temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
+ temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
+ if (qp_div >= 6) {
+ resq_r1_1 = _mm_slli_epi32(temp5, qp_div - 6);
+ resq_r1_2 = _mm_slli_epi32(temp7, qp_div - 6);
+ } else {
+ temp5 = _mm_add_epi32(temp5, add_rshift);
+ temp7 = _mm_add_epi32(temp7, add_rshift);
+ resq_r1_1 = _mm_srai_epi32(temp5, 6 - qp_div);
+ resq_r1_2 = _mm_srai_epi32(temp7, 6 - qp_div);
+ }
+ resq_r1_1 = _mm_packs_epi32(resq_r1_1, resq_r1_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long
+ // Row 2 processing
+ src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 16)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 2nd row
+ scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 16)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 2nd row
+ dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[16])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
+ src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long
+ src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
+ temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
+ scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
+ scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
+ temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
+ temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
+ if (qp_div >= 6) {
+ resq_r2_1 = _mm_slli_epi32(temp5, qp_div - 6);
+ resq_r2_2 = _mm_slli_epi32(temp7, qp_div - 6);
+ } else {
+ temp5 = _mm_add_epi32(temp5, add_rshift);
+ temp7 = _mm_add_epi32(temp7, add_rshift);
+ resq_r2_1 = _mm_srai_epi32(temp5, 6 - qp_div);
+ resq_r2_2 = _mm_srai_epi32(temp7, 6 - qp_div);
+ }
+ resq_r2_1 = _mm_packs_epi32(resq_r2_1, resq_r2_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long
+ // Row 3 processing
+ src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 24)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 3rd row
+ scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 24)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 3rd row
+ dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[24])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
+ src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long
+ src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
+ temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
+ scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
+ scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
+ temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 - 32 bits long
+ temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
+ if (qp_div >= 6) {
+ resq_r3_1 = _mm_slli_epi32(temp5, qp_div - 6);
+ resq_r3_2 = _mm_slli_epi32(temp7, qp_div - 6);
+ } else {
+ temp5 = _mm_add_epi32(temp5, add_rshift);
+ temp7 = _mm_add_epi32(temp7, add_rshift);
+ resq_r3_1 = _mm_srai_epi32(temp5, 6 - qp_div);
+ resq_r3_2 = _mm_srai_epi32(temp7, 6 - qp_div);
+ }
+ resq_r3_1 = _mm_packs_epi32(resq_r3_1, resq_r3_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long
+ // Row 4 processing
+ src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 32)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 4th row
+ scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 32)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 4th row
+ dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[32])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
+ src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long
+ src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
+ temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
+ scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
+ scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
+ temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
+ temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
+ if (qp_div >= 6) {
+ resq_r4_1 = _mm_slli_epi32(temp5, qp_div - 6);
+ resq_r4_2 = _mm_slli_epi32(temp7, qp_div - 6);
+
+ } else {
+ temp5 = _mm_add_epi32(temp5, add_rshift);
+ temp7 = _mm_add_epi32(temp7, add_rshift);
+ resq_r4_1 = _mm_srai_epi32(temp5, 6 - qp_div);
+ resq_r4_2 = _mm_srai_epi32(temp7, 6 - qp_div);
+ }
+ resq_r4_1 = _mm_packs_epi32(resq_r4_1, resq_r4_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long
+ // Row 5 processing
+ src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 40)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 5th row
+ scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 40)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 5th row
+ dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[40])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
+ src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long
+ src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
+ temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
+ scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
+ scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
+ temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
+ temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
+ if (qp_div >= 6) {
+ resq_r5_1 = _mm_slli_epi32(temp5, qp_div - 6);
+ resq_r5_2 = _mm_slli_epi32(temp7, qp_div - 6);
+ //resq_r5_1 = _mm_and_si128(resq_r5_1,one_zero_mask);
+ //resq_r5_2 = _mm_and_si128(resq_r5_2,one_zero_mask);
+ } else {
+ temp5 = _mm_add_epi32(temp5, add_rshift);
+ temp7 = _mm_add_epi32(temp7, add_rshift);
+ resq_r5_1 = _mm_srai_epi32(temp5, 6 - qp_div);
+ resq_r5_2 = _mm_srai_epi32(temp7, 6 - qp_div);
+ }
+ resq_r5_1 = _mm_packs_epi32(resq_r5_1, resq_r5_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long
+ // Row 6 processing
+ src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 48)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 6th row
+ scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 48)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 6th row
+ dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[48])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
+ src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long
+ src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
+ temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
+ scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
+ scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
+ temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
+ temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
+ if (qp_div >= 6) {
+ resq_r6_1 = _mm_slli_epi32(temp5, qp_div - 6);
+ resq_r6_2 = _mm_slli_epi32(temp7, qp_div - 6);
+ //resq_r6_1 = _mm_and_si128(resq_r6_1,one_zero_mask);
+ //resq_r6_2 = _mm_and_si128(resq_r6_2,one_zero_mask);
+ } else {
+ temp5 = _mm_add_epi32(temp5, add_rshift);
+ temp7 = _mm_add_epi32(temp7, add_rshift);
+ resq_r6_1 = _mm_srai_epi32(temp5, 6 - qp_div);
+ resq_r6_2 = _mm_srai_epi32(temp7, 6 - qp_div);
+ //resq_r6_1 = _mm_and_si128(resq_r6_1,one_zero_mask);
+ //resq_r6_2 = _mm_and_si128(resq_r6_2,one_zero_mask);
+ }
+ resq_r6_1 = _mm_packs_epi32(resq_r6_1, resq_r6_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long
+ // Row 7 processing
+ src_r0 = _mm_loadu_si128((__m128i *) (pi2_src + 56)); //a00 a01 a02 a03 a04 a05 a06 a07 a08 -- the source matrix 7th row
+ scalemat_r0 = _mm_loadu_si128((__m128i *) (pu2_iscale_mat + 56)); //b00 b01 b02 b03 b04 b05 b06 b07 b08 -- the scaling matrix 7th row
+ dequant_r0 = _mm_loadu_si128((__m128i *) (&pu2_weigh_mat[56])); //q0 q1 q2 q3 q4 q5 q6 q7 -- all 16 bits
+ src_r0_1 = _mm_unpacklo_epi16(src_r0, zero_8x16b); //a00 0 a01 0 a02 0 a03 0 -- 16 bit long
+ src_r0_2 = _mm_unpackhi_epi16(src_r0, zero_8x16b); // a04 0 a05 0 a06 0 a07 0 -- 16 bit long
+ temp10 = _mm_mullo_epi16(scalemat_r0, dequant_r0); //b00*q0 b01*q1 b02*q2 b03*q3 b04*q4 b05*q5 b06*q6 b07*q7 -- 16 bit result
+ scalemat_r0_1 = _mm_unpacklo_epi16(temp10, zero_8x16b); // b00*q0 0 b01*q1 0 b02*q2 0 b03*q3 0 -- 16 bit long
+ scalemat_r0_2 = _mm_unpackhi_epi16(temp10, zero_8x16b); // b04*q4 0 b05*q5 0 b06*q6 0 b07*q7 0 -- 16 bit long
+ temp5 = _mm_madd_epi16(src_r0_1, scalemat_r0_1); // a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 -- 32 bits long
+ temp7 = _mm_madd_epi16(src_r0_2, scalemat_r0_2); // a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 32 bits long
+ if (qp_div >= 6) {
+ resq_r7_1 = _mm_slli_epi32(temp5, qp_div - 6);
+ resq_r7_2 = _mm_slli_epi32(temp7, qp_div - 6);
+ } else {
+ temp5 = _mm_add_epi32(temp5, add_rshift);
+ temp7 = _mm_add_epi32(temp7, add_rshift);
+ resq_r7_1 = _mm_srai_epi32(temp5, 6 - qp_div);
+ resq_r7_2 = _mm_srai_epi32(temp7, 6 - qp_div);
+ }
+ resq_r7_1 = _mm_packs_epi32(resq_r7_1, resq_r7_2); //a00*b00*q0 a01*b01*q1 a02*b02*q2 a03*b03*q3 a04*b04*q4 a05*b05*q5 a06*b06*q6 a07*b07*q7 -- 16 bit long
+ /* Perform Inverse transform */
+ /*--------------------------------------------------------------------*/
+ /* IDCT [ Horizontal transformation ] */
+ /*--------------------------------------------------------------------*/
+ // Matrix transpose
+ /*
+ * a0 a1 a2 a3 a4 a5 a6 a7
+ * b0 b1 b2 b3 b4 b5 b6 b7
+ * c0 c1 c2 c3 c4 c5 c6 c7
+ * d0 d1 d2 d3 d4 d5 d6 d7
+ */
+ temp1 = _mm_unpacklo_epi16(resq_r0_1, resq_r1_1); //a0 b0 a1 b1 a2 b2 a3 b3
+ temp3 = _mm_unpacklo_epi16(resq_r2_1, resq_r3_1); //c0 d0 c1 d1 c2 d2 c3 d3
+ temp2 = _mm_unpackhi_epi16(resq_r0_1, resq_r1_1); //a4 b4 a5 b5 a6 b6 a7 b7
+ temp4 = _mm_unpackhi_epi16(resq_r2_1, resq_r3_1); //c4 d4 c5 d5 c6 d6 c7 d7
+ resq_r0_1 = _mm_unpacklo_epi32(temp1, temp3); //a0 b0 c0 d0 a1 b1 c1 d1
+ resq_r1_1 = _mm_unpackhi_epi32(temp1, temp3); //a2 b2 c2 d2 a3 b3 c3 d3
+ resq_r2_1 = _mm_unpacklo_epi32(temp2, temp4); //a4 b4 c4 d4 a5 b5 c5 d5
+ resq_r3_1 = _mm_unpackhi_epi32(temp2, temp4); //a6 b6 c6 d6 a7 b7 c7 d7
+ /*
+ * e0 e1 e2 e3 e4 e5 e6 e7
+ * f0 f1 f2 f3 f4 f5 f6 f7
+ * g0 g1 g2 g3 g4 g5 g6 g7
+ * h0 h1 h2 h3 h4 h5 h6 h7
+ */
+ temp1 = _mm_unpacklo_epi16(resq_r4_1, resq_r5_1); //e0 f0 e1 f1 e2 f2 e2 f3
+ temp3 = _mm_unpacklo_epi16(resq_r6_1, resq_r7_1); //g0 h0 g1 h1 g2 h2 g3 h3
+ temp2 = _mm_unpackhi_epi16(resq_r4_1, resq_r5_1); //e4 f4 e5 f5 e6 f6 e7 f7
+ temp4 = _mm_unpackhi_epi16(resq_r6_1, resq_r7_1); //g4 h4 g5 h5 g6 h6 g7 h7
+ resq_r4_1 = _mm_unpacklo_epi32(temp1, temp3); //e0 f0 g0 h0 e1 f1 g1 h1
+ resq_r5_1 = _mm_unpackhi_epi32(temp1, temp3); //e2 f2 g2 h2 e3 f3 g3 h3
+ resq_r6_1 = _mm_unpacklo_epi32(temp2, temp4); //e4 f4 g4 h4 e5 f5 g5 h5
+ resq_r7_1 = _mm_unpackhi_epi32(temp2, temp4); //e6 f6 g6 h6 e7 f7 g7 h7
+ /*
+ * a0 b0 c0 d0 a1 b1 c1 d1
+ * a2 b2 c2 d2 a3 b3 c3 d3
+ * a4 b4 c4 d4 a5 b5 c5 d5
+ * a6 b6 c6 d6 a7 b7 c7 d7
+ * e0 f0 g0 h0 e1 f1 g1 h1
+ * e2 f2 g2 h2 e3 f3 g3 h3
+ * e4 f4 g4 h4 e5 f5 g5 h5
+ * e6 f6 g6 h6 e7 f7 g7 h7
+ */
+ resq_r0_2 = _mm_unpacklo_epi64(resq_r0_1, resq_r4_1); //a0 b0 c0 d0 e0 f0 g0 h0
+ resq_r1_2 = _mm_unpackhi_epi64(resq_r0_1, resq_r4_1); //a1 b1 c1 d1 e1 f1 g1 h1
+ resq_r2_2 = _mm_unpacklo_epi64(resq_r1_1, resq_r5_1); //a2 b2 c2 d2 e2 f2 g2 h2
+ resq_r3_2 = _mm_unpackhi_epi64(resq_r1_1, resq_r5_1); //a3 b3 c3 d3 e3 f3 g3 h3
+ resq_r4_2 = _mm_unpacklo_epi64(resq_r2_1, resq_r6_1); //a4 b4 c4 d4 e4 f4 g4 h4
+ resq_r5_2 = _mm_unpackhi_epi64(resq_r2_1, resq_r6_1); //a5 b5 c5 d5 e5 f5 g5 h5
+ resq_r6_2 = _mm_unpacklo_epi64(resq_r3_1, resq_r7_1); //a6 b6 c6 d6 e6 f6 g6 h6
+ resq_r7_2 = _mm_unpackhi_epi64(resq_r3_1, resq_r7_1); //a7 b7 c7 d7 e7 f7 g7 h7
+
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r1_2);
+ resq_r1_1 = _mm_unpacklo_epi16(resq_r1_2, sign_reg); //a1 b1 c1 d1 -- 32 bit
+ resq_r1_2 = _mm_unpackhi_epi16(resq_r1_2, sign_reg); //e1 f1 g1 h1 -- 32 bit
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r3_2);
+ resq_r3_1 = _mm_unpacklo_epi16(resq_r3_2, sign_reg); //a3 b3 c3 d3 -- 32 bit
+ resq_r3_2 = _mm_unpackhi_epi16(resq_r3_2, sign_reg); //e3 f3 g3 h3 -- 32 bit
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r5_2);
+ resq_r5_1 = _mm_unpacklo_epi16(resq_r5_2, sign_reg); //a5 b5 c5 d5 -- 32 bit
+ resq_r5_2 = _mm_unpackhi_epi16(resq_r5_2, sign_reg); //e5 f5 g5 h5 -- 32 bit
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r7_2);
+ resq_r7_1 = _mm_unpacklo_epi16(resq_r7_2, sign_reg); //a7 b7 c7 d7 -- 32 bit
+ resq_r7_2 = _mm_unpackhi_epi16(resq_r7_2, sign_reg); //e7 f7 g7 h7 -- 32 bit
+ //Transform starts -- horizontal transform
+ /*------------------------------------------------------------------*/
+ /* y0 = w0 + w4 */
+ temp1 = _mm_add_epi16(resq_r0_2, resq_r4_2);
+ /* y2 = w0 - w4 */
+ temp3 = _mm_sub_epi16(resq_r0_2, resq_r4_2);
+ /* y1 = -w3 + w5 - w7 - (w7 >> 1) */
+ temp2 = _mm_sub_epi32(resq_r5_1, resq_r3_1); //-w3+w5
+ temp10 = _mm_sub_epi32(resq_r5_2, resq_r3_2);
+ temp4 = _mm_sub_epi32(temp2, resq_r7_1); //-w3+w5-w7
+ temp12 = _mm_sub_epi32(temp10, resq_r7_2);
+ temp5 = _mm_srai_epi32(resq_r7_1, 1); //w7>>1
+ temp13 = _mm_srai_epi32(resq_r7_2, 1);
+ temp2 = _mm_sub_epi32(temp4, temp5); //-w3+w5-w7 -(w7>>1)
+ temp10 = _mm_sub_epi32(temp12, temp13);
+ temp2 = _mm_packs_epi32(temp2, temp10);
+ /* y3 = w1 + w7 - w3 - (w3 >> 1) */
+ temp4 = _mm_add_epi32(resq_r1_1, resq_r7_1); //w1+w7
+ temp12 = _mm_add_epi32(resq_r1_2, resq_r7_2);
+ temp4 = _mm_sub_epi32(temp4, resq_r3_1); //w1+w7-w3
+ temp12 = _mm_sub_epi32(temp12, resq_r3_2);
+ temp5 = _mm_srai_epi32(resq_r3_1, 1); //w3>>1
+ temp13 = _mm_srai_epi32(resq_r3_2, 1);
+ temp4 = _mm_sub_epi32(temp4, temp5); //w1+w7-w3-(w3>>1)
+ temp12 = _mm_sub_epi32(temp12, temp13);
+ temp4 = _mm_packs_epi32(temp4, temp12);
+ /* y4 = (w2 >> 1) - w6 */
+ temp5 = _mm_srai_epi16(resq_r2_2, 1); //w2>>1
+ temp5 = _mm_sub_epi16(temp5, resq_r6_2); //(w2>>1)-w6
+ /* y5 = -w1 + w7 + w5 + (w5 >> 1) */
+ temp6 = _mm_sub_epi32(resq_r7_1, resq_r1_1); //w7-w1
+ temp14 = _mm_sub_epi32(resq_r7_2, resq_r1_2);
+ temp6 = _mm_add_epi32(temp6, resq_r5_1); //w7-w1+w5
+ temp14 = _mm_add_epi32(temp14, resq_r5_2);
+ temp7 = _mm_srai_epi32(resq_r5_1, 1); //w5>>1
+ temp15 = _mm_srai_epi32(resq_r5_2, 1);
+ temp6 = _mm_add_epi32(temp6, temp7); //w7-w1_w5+(w5>>1)
+ temp14 = _mm_add_epi32(temp14, temp15);
+ temp6 = _mm_packs_epi32(temp6, temp14);
+ /* y6 = w2 + (w6 >> 1) */
+ temp7 = _mm_srai_epi16(resq_r6_2, 1); //w6>>1
+ temp7 = _mm_add_epi16(temp7, resq_r2_2); //(w6>>1)+w2
+ /* y7 = w3 + w5 + w1 + (w1 >> 1) */
+ temp8 = _mm_add_epi32(resq_r3_1, resq_r5_1); //w3+w5
+ temp16 = _mm_add_epi32(resq_r3_2, resq_r5_2);
+ temp8 = _mm_add_epi32(temp8, resq_r1_1); //w3+w5+w1
+ temp16 = _mm_add_epi32(temp16, resq_r1_2);
+ temp17 = _mm_srai_epi32(resq_r1_1, 1); //w1>>1
+ temp18 = _mm_srai_epi32(resq_r1_2, 1);
+ temp8 = _mm_add_epi32(temp8, temp17); //w3+w5+w1+(w1>>1)
+ temp16 = _mm_add_epi32(temp16, temp18);
+ temp8 = _mm_packs_epi32(temp8, temp16);
+ /*------------------------------------------------------------------*/
+ /*------------------------------------------------------------------*/
+ /* z0 = y0 + y6 */
+ resq_r0_1 = _mm_add_epi16(temp1, temp7);
+ /* z1 = y1 + (y7 >> 2) */
+ resq_r1_1 = _mm_srai_epi16(temp8, 2);
+ resq_r1_1 = _mm_add_epi16(resq_r1_1, temp2);
+ /* z2 = y2 + y4 */
+ resq_r2_1 = _mm_add_epi16(temp3, temp5);
+ /* z3 = y3 + (y5 >> 2) */
+ resq_r3_1 = _mm_srai_epi16(temp6, 2);
+ resq_r3_1 = _mm_add_epi16(resq_r3_1, temp4);
+ /* z4 = y2 - y4 */
+ resq_r4_1 = _mm_sub_epi16(temp3, temp5);
+ /* z5 = (y3 >> 2) - y5 */
+ resq_r5_1 = _mm_srai_epi16(temp4, 2);
+ resq_r5_1 = _mm_sub_epi16(resq_r5_1, temp6);
+ /* z6 = y0 - y6 */
+ resq_r6_1 = _mm_sub_epi16(temp1, temp7);
+ /* z7 = y7 - (y1 >> 2) */
+ resq_r7_1 = _mm_srai_epi16(temp2, 2);
+ resq_r7_1 = _mm_sub_epi16(temp8, resq_r7_1);
+ /*------------------------------------------------------------------*/
+ /*------------------------------------------------------------------*/
+ /* x0 = z0 + z7 */
+ temp1 = _mm_add_epi16(resq_r0_1, resq_r7_1);
+ /* x1 = z2 + z5 */
+ temp2 = _mm_add_epi16(resq_r2_1, resq_r5_1);
+ /* x2 = z4 + z3 */
+ temp3 = _mm_add_epi16(resq_r4_1, resq_r3_1);
+ /* x3 = z6 + z1 */
+ temp4 = _mm_add_epi16(resq_r6_1, resq_r1_1);
+ /* x4 = z6 - z1 */
+ temp5 = _mm_sub_epi16(resq_r6_1, resq_r1_1);
+ /* x5 = z4 - z3 */
+ temp6 = _mm_sub_epi16(resq_r4_1, resq_r3_1);
+ /* x6 = z2 - z5 */
+ temp7 = _mm_sub_epi16(resq_r2_1, resq_r5_1);
+ /* x7 = z0 - z7 */
+ temp8 = _mm_sub_epi16(resq_r0_1, resq_r7_1);
+ /*------------------------------------------------------------------*/
+ // Matrix transpose
+ /*
+ * a0 b0 c0 d0 e0 f0 g0 h0
+ * a1 b1 c1 d1 e1 f1 g1 h1
+ * a2 b2 c2 d2 e2 f2 g2 h2
+ * a3 b3 c3 d3 e3 f3 g3 h3
+ */
+ temp17 = _mm_unpacklo_epi16(temp1, temp2); //a0 a1 b0 b1 c0 c1 d0 d1
+ temp19 = _mm_unpacklo_epi16(temp3, temp4); //a2 a3 b2 b3 c2 c3 d2 d3
+ temp18 = _mm_unpackhi_epi16(temp1, temp2); //e0 e1 f0 f1 g0 g1 h0 h1
+ temp20 = _mm_unpackhi_epi16(temp3, temp4); //e2 e3 f2 f3 g2 g3 h2 h3
+
+ resq_r0_1 = _mm_unpacklo_epi32(temp17, temp19); //a0 a1 a2 a3 b0 b1 b2 b3
+ resq_r1_1 = _mm_unpackhi_epi32(temp17, temp19); //c0 c1 c2 c3 d0 d1 d2 d3
+ resq_r2_1 = _mm_unpacklo_epi32(temp18, temp20); //e0 e1 e2 e3 f0 f1 f2 f3
+ resq_r3_1 = _mm_unpackhi_epi32(temp18, temp20); //g0 g2 g2 g3 h0 h1 h2 h3
+ /*
+ * a4 b4 c4 d4 e4 f4 g4 h4
+ * a5 b5 c5 d5 e5 f5 g5 h5
+ * a6 b6 c6 d6 e6 f6 g6 h6
+ * a7 b7 c7 d7 e7 f7 g7 h7
+ */
+ temp17 = _mm_unpacklo_epi16(temp5, temp6); //a4 a5 b4 b5 c4 c5 d4 d5
+ temp19 = _mm_unpacklo_epi16(temp7, temp8); //a6 a7 b6 b7 c6 c7 d6 d7
+ temp18 = _mm_unpackhi_epi16(temp5, temp6); //e4 e5 f4 f5 g4 g5 h4 h5
+ temp20 = _mm_unpackhi_epi16(temp7, temp8); //e6 e7 f6 f7 g6 g7 h6 h7
+
+ resq_r4_1 = _mm_unpacklo_epi32(temp17, temp19); //a4 a5 a6 a7 b4 b5 b6 b7
+ resq_r5_1 = _mm_unpackhi_epi32(temp17, temp19); //c4 c5 c6 c7 d4 d5 d6 d7
+ resq_r6_1 = _mm_unpacklo_epi32(temp18, temp20); //e4 e5 e6 e7 f4 f5 f6 f7
+ resq_r7_1 = _mm_unpackhi_epi32(temp18, temp20); //g4 g5 g6 g7 h4 h5 h6 h7
+ /* a0 a1 a2 a3 b0 b1 b2 b3
+ * c0 c1 c2 c3 d0 d1 d2 d3
+ * e0 e1 e2 e3 f0 f1 f2 f3
+ * g0 g2 g2 g3 h0 h1 h2 h3
+ * a4 a5 a6 a7 b4 b5 b6 b7
+ * c4 c5 c6 c7 d4 d5 d6 d7
+ * e4 e5 e6 e7 f4 f5 f6 f7
+ * g4 g5 g6 g7 h4 h5 h6 h7
+ */
+ resq_r0_2 = _mm_unpacklo_epi64(resq_r0_1, resq_r4_1); //a0 a1 a2 a3 a4 a5 a6 a7
+ resq_r1_2 = _mm_unpackhi_epi64(resq_r0_1, resq_r4_1); //b0 b1 b2 b3 b4 b5 b6 b7
+ resq_r2_2 = _mm_unpacklo_epi64(resq_r1_1, resq_r5_1); //c0 c1 c2 c3 c4 c5 c6 c7
+ resq_r3_2 = _mm_unpackhi_epi64(resq_r1_1, resq_r5_1); //d0 d1 d2 d3 d4 d5 d6 d7
+ resq_r4_2 = _mm_unpacklo_epi64(resq_r2_1, resq_r6_1); //e0 e1 e2 e3 e4 e5 e6 e7
+ resq_r5_2 = _mm_unpackhi_epi64(resq_r2_1, resq_r6_1); //f0 f1 f2 f3 f4 f5 f6 f7
+ resq_r6_2 = _mm_unpacklo_epi64(resq_r3_1, resq_r7_1); //g0 g1 g2 g3 g4 g5 g6 g7
+ resq_r7_2 = _mm_unpackhi_epi64(resq_r3_1, resq_r7_1); //h0 h1 h2 h3 h4 h5 h6 h7
+
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r1_2);
+ resq_r1_1 = _mm_unpacklo_epi16(resq_r1_2, sign_reg); //a1 b1 c1 d1 -- 32 bit
+ resq_r1_2 = _mm_unpackhi_epi16(resq_r1_2, sign_reg); //e1 f1 g1 h1 -- 32 bit
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r3_2);
+ resq_r3_1 = _mm_unpacklo_epi16(resq_r3_2, sign_reg); //a3 b3 c3 d3 -- 32 bit
+ resq_r3_2 = _mm_unpackhi_epi16(resq_r3_2, sign_reg); //e3 f3 g3 h3 -- 32 bit
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r5_2);
+ resq_r5_1 = _mm_unpacklo_epi16(resq_r5_2, sign_reg); //a5 b5 c5 d5 -- 32 bit
+ resq_r5_2 = _mm_unpackhi_epi16(resq_r5_2, sign_reg); //e5 f5 g5 h5 -- 32 bit
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, resq_r7_2);
+ resq_r7_1 = _mm_unpacklo_epi16(resq_r7_2, sign_reg); //a7 b7 c7 d7 -- 32 bit
+ resq_r7_2 = _mm_unpackhi_epi16(resq_r7_2, sign_reg); //e7 f7 g7 h7 -- 32 bit
+
+ zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
+ //Load pred buffer row 0
+ predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r0_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+ //Load pred buffer row 1
+ predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r1_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+ //Load pred buffer row 2
+ predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r2_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+ //Load pred buffer row 3
+ predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r3_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+ //Load pred buffer row 4
+ predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[4 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r4_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+ //Load pred buffer row 5
+ predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[5 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bit
+ pred_r5_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+ //Load pred buffer row 6
+ predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[6 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r6_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+ //Load pred buffer row 7
+ predload_r = _mm_loadl_epi64((__m128i *) (&pu1_pred[7 * pred_strd])); //p0 p1 p2 p3 p4 p5 p6 p7 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r7_1 = _mm_unpacklo_epi8(predload_r, zero_8x16b); //p0 p1 p2 p3 p4 p5 p6 p7 -- all 16 bits
+
+ /*--------------------------------------------------------------------*/
+ /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */
+ /* */
+ /* Add the prediction and store it back to reconstructed frame buffer */
+ /* [Prediction buffer itself in this case] */
+ /*--------------------------------------------------------------------*/
+
+ /* y0j = w0j + w4j */
+ temp1 = _mm_add_epi16(resq_r0_2, resq_r4_2);
+ /* y2j = w0j - w4j */
+ temp3 = _mm_sub_epi16(resq_r0_2, resq_r4_2);
+ /* y1j = -w3j + w5j - w7j - (w7j >> 1) */
+ temp2 = _mm_sub_epi32(resq_r5_1, resq_r3_1); //-w3+w5
+ temp10 = _mm_sub_epi32(resq_r5_2, resq_r3_2);
+ temp4 = _mm_sub_epi32(temp2, resq_r7_1); //-w3+w5-w7
+ temp12 = _mm_sub_epi32(temp10, resq_r7_2);
+ temp5 = _mm_srai_epi32(resq_r7_1, 1); //w7>>1
+ temp13 = _mm_srai_epi32(resq_r7_2, 1);
+ temp2 = _mm_sub_epi32(temp4, temp5); //-w3+w5-w7 -(w7>>1)
+ temp10 = _mm_sub_epi32(temp12, temp13);
+ temp2 = _mm_packs_epi32(temp2, temp10);
+ /* y3j = w1j + w7j - w3j - (w3j >> 1) */
+ temp4 = _mm_add_epi32(resq_r1_1, resq_r7_1); //w1+w7
+ temp12 = _mm_add_epi32(resq_r1_2, resq_r7_2);
+ temp4 = _mm_sub_epi32(temp4, resq_r3_1); //w1+w7-w3
+ temp12 = _mm_sub_epi32(temp12, resq_r3_2);
+ temp5 = _mm_srai_epi32(resq_r3_1, 1); //w3>>1
+ temp13 = _mm_srai_epi32(resq_r3_2, 1);
+ temp4 = _mm_sub_epi32(temp4, temp5); //w1+w7-w3-(w3>>1)
+ temp12 = _mm_sub_epi32(temp12, temp13);
+ temp4 = _mm_packs_epi32(temp4, temp12);
+ /* y4j = (w2j >> 1) - w6j */
+ temp5 = _mm_srai_epi16(resq_r2_2, 1); //w2>>1
+ temp5 = _mm_sub_epi16(temp5, resq_r6_2); //(w2>>1)-w6
+ /* y5j = -w1j + w7j + w5j + (w5j >> 1) */
+ temp6 = _mm_sub_epi32(resq_r7_1, resq_r1_1); //w7-w1
+ temp14 = _mm_sub_epi32(resq_r7_2, resq_r1_2);
+ temp6 = _mm_add_epi32(temp6, resq_r5_1); //w7-w1+w5
+ temp14 = _mm_add_epi32(temp14, resq_r5_2);
+ temp7 = _mm_srai_epi32(resq_r5_1, 1); //w5>>1
+ temp15 = _mm_srai_epi32(resq_r5_2, 1);
+ temp6 = _mm_add_epi32(temp6, temp7); //w7-w1_w5+(w5>>1)
+ temp14 = _mm_add_epi32(temp14, temp15);
+ temp6 = _mm_packs_epi32(temp6, temp14);
+ /* y6j = w2j + (w6j >> 1) */
+ temp7 = _mm_srai_epi16(resq_r6_2, 1); //w6>>1
+ temp7 = _mm_add_epi16(temp7, resq_r2_2); //(w6>>1)+w2
+ /* y7j = w3j + w5j + w1j + (w1j >> 1) */
+ temp8 = _mm_add_epi32(resq_r3_1, resq_r5_1); //w3+w5
+ temp16 = _mm_add_epi32(resq_r3_2, resq_r5_2);
+ temp8 = _mm_add_epi32(temp8, resq_r1_1); //w3+w5+w1
+ temp16 = _mm_add_epi32(temp16, resq_r1_2);
+ temp17 = _mm_srai_epi32(resq_r1_1, 1); //w1>>1
+ temp18 = _mm_srai_epi32(resq_r1_2, 1);
+ temp8 = _mm_add_epi32(temp8, temp17); //w3+w5+w1+(w1>>1)
+ temp16 = _mm_add_epi32(temp16, temp18);
+ temp8 = _mm_packs_epi32(temp8, temp16);
+ /*------------------------------------------------------------------*/
+ /*------------------------------------------------------------------*/
+ /* z0j = y0j + y6j */
+ resq_r0_1 = _mm_add_epi16(temp1, temp7);
+ /* z1j = y1j + (y7j >> 2) */
+ resq_r1_1 = _mm_srai_epi16(temp8, 2);
+ resq_r1_1 = _mm_add_epi16(resq_r1_1, temp2);
+ /* z2j = y2j + y4j */
+ resq_r2_1 = _mm_add_epi16(temp3, temp5);
+ /* z3j = y3j + (y5j >> 2) */
+ resq_r3_1 = _mm_srai_epi16(temp6, 2);
+ resq_r3_1 = _mm_add_epi16(resq_r3_1, temp4);
+ /* z4j = y2j - y4j */
+ resq_r4_1 = _mm_sub_epi16(temp3, temp5);
+ /* z5j = (y3j >> 2) - y5j */
+ resq_r5_1 = _mm_srai_epi16(temp4, 2);
+ resq_r5_1 = _mm_sub_epi16(resq_r5_1, temp6);
+ /* z6j = y0j - y6j */
+ resq_r6_1 = _mm_sub_epi16(temp1, temp7);
+ /* z7j = y7j - (y1j >> 2) */
+ resq_r7_1 = _mm_srai_epi16(temp2, 2);
+ resq_r7_1 = _mm_sub_epi16(temp8, resq_r7_1);
+ /*------------------------------------------------------------------*/
+
+ /*------------------------------------------------------------------*/
+ /* x0j = z0j + z7j */
+ temp1 = _mm_add_epi16(resq_r0_1, resq_r7_1);
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp1);
+ temp10 = _mm_unpacklo_epi16(temp1, sign_reg);
+ temp11 = _mm_unpackhi_epi16(temp1, sign_reg);
+ temp10 = _mm_add_epi32(temp10, value_32);
+ temp11 = _mm_add_epi32(temp11, value_32);
+ temp10 = _mm_srai_epi32(temp10, 6);
+ temp11 = _mm_srai_epi32(temp11, 6);
+ temp10 = _mm_packs_epi32(temp10, temp11);
+ temp1 = _mm_add_epi16(temp10, pred_r0_1);
+ /* x1j = z2j + z5j */
+ temp2 = _mm_add_epi16(resq_r2_1, resq_r5_1);
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp2);
+ temp10 = _mm_unpacklo_epi16(temp2, sign_reg);
+ temp11 = _mm_unpackhi_epi16(temp2, sign_reg);
+ temp10 = _mm_add_epi32(temp10, value_32);
+ temp11 = _mm_add_epi32(temp11, value_32);
+ temp10 = _mm_srai_epi32(temp10, 6);
+ temp11 = _mm_srai_epi32(temp11, 6);
+ temp10 = _mm_packs_epi32(temp10, temp11);
+ temp2 = _mm_add_epi16(temp10, pred_r1_1);
+ /* x2j = z4j + z3j */
+ temp3 = _mm_add_epi16(resq_r4_1, resq_r3_1);
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp3);
+ temp10 = _mm_unpacklo_epi16(temp3, sign_reg);
+ temp11 = _mm_unpackhi_epi16(temp3, sign_reg);
+ temp10 = _mm_add_epi32(temp10, value_32);
+ temp11 = _mm_add_epi32(temp11, value_32);
+ temp10 = _mm_srai_epi32(temp10, 6);
+ temp11 = _mm_srai_epi32(temp11, 6);
+ temp10 = _mm_packs_epi32(temp10, temp11);
+ temp3 = _mm_add_epi16(temp10, pred_r2_1);
+ /* x3j = z6j + z1j */
+ temp4 = _mm_add_epi16(resq_r6_1, resq_r1_1);
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp4);
+ temp10 = _mm_unpacklo_epi16(temp4, sign_reg);
+ temp11 = _mm_unpackhi_epi16(temp4, sign_reg);
+ temp10 = _mm_add_epi32(temp10, value_32);
+ temp11 = _mm_add_epi32(temp11, value_32);
+ temp10 = _mm_srai_epi32(temp10, 6);
+ temp11 = _mm_srai_epi32(temp11, 6);
+ temp10 = _mm_packs_epi32(temp10, temp11);
+ temp4 = _mm_add_epi16(temp10, pred_r3_1);
+ /* x4j = z6j - z1j */
+ temp5 = _mm_sub_epi16(resq_r6_1, resq_r1_1);
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp5);
+ temp10 = _mm_unpacklo_epi16(temp5, sign_reg);
+ temp11 = _mm_unpackhi_epi16(temp5, sign_reg);
+ temp10 = _mm_add_epi32(temp10, value_32);
+ temp11 = _mm_add_epi32(temp11, value_32);
+ temp10 = _mm_srai_epi32(temp10, 6);
+ temp11 = _mm_srai_epi32(temp11, 6);
+ temp10 = _mm_packs_epi32(temp10, temp11);
+ temp5 = _mm_add_epi16(temp10, pred_r4_1);
+ /* x5j = z4j - z3j */
+ temp6 = _mm_sub_epi16(resq_r4_1, resq_r3_1);
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp6);
+ temp10 = _mm_unpacklo_epi16(temp6, sign_reg);
+ temp11 = _mm_unpackhi_epi16(temp6, sign_reg);
+ temp10 = _mm_add_epi32(temp10, value_32);
+ temp11 = _mm_add_epi32(temp11, value_32);
+ temp10 = _mm_srai_epi32(temp10, 6);
+ temp11 = _mm_srai_epi32(temp11, 6);
+ temp10 = _mm_packs_epi32(temp10, temp11);
+ temp6 = _mm_add_epi16(temp10, pred_r5_1);
+ /* x6j = z2j - z5j */
+ temp7 = _mm_sub_epi16(resq_r2_1, resq_r5_1);
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp7);
+ temp10 = _mm_unpacklo_epi16(temp7, sign_reg);
+ temp11 = _mm_unpackhi_epi16(temp7, sign_reg);
+ temp10 = _mm_add_epi32(temp10, value_32);
+ temp11 = _mm_add_epi32(temp11, value_32);
+ temp10 = _mm_srai_epi32(temp10, 6);
+ temp11 = _mm_srai_epi32(temp11, 6);
+ temp10 = _mm_packs_epi32(temp10, temp11);
+ temp7 = _mm_add_epi16(temp10, pred_r6_1);
+ /* x7j = z0j - z7j */
+ temp8 = _mm_sub_epi16(resq_r0_1, resq_r7_1);
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, temp8);
+ temp10 = _mm_unpacklo_epi16(temp8, sign_reg);
+ temp11 = _mm_unpackhi_epi16(temp8, sign_reg);
+ temp10 = _mm_add_epi32(temp10, value_32);
+ temp11 = _mm_add_epi32(temp11, value_32);
+ temp10 = _mm_srai_epi32(temp10, 6);
+ temp11 = _mm_srai_epi32(temp11, 6);
+ temp10 = _mm_packs_epi32(temp10, temp11);
+ temp8 = _mm_add_epi16(temp10, pred_r7_1);
+ /*------------------------------------------------------------------*/
+ //Clipping the results to 8 bits
+ sign_reg = _mm_cmpgt_epi16(temp1, zero_8x16b); // sign check
+ temp1 = _mm_and_si128(temp1, sign_reg);
+ sign_reg = _mm_cmpgt_epi16(temp2, zero_8x16b); // sign check
+ temp2 = _mm_and_si128(temp2, sign_reg);
+ sign_reg = _mm_cmpgt_epi16(temp3, zero_8x16b); // sign check
+ temp3 = _mm_and_si128(temp3, sign_reg);
+ sign_reg = _mm_cmpgt_epi16(temp4, zero_8x16b); // sign check
+ temp4 = _mm_and_si128(temp4, sign_reg);
+ sign_reg = _mm_cmpgt_epi16(temp5, zero_8x16b); // sign check
+ temp5 = _mm_and_si128(temp5, sign_reg);
+ sign_reg = _mm_cmpgt_epi16(temp6, zero_8x16b); // sign check
+ temp6 = _mm_and_si128(temp6, sign_reg);
+ sign_reg = _mm_cmpgt_epi16(temp7, zero_8x16b); // sign check
+ temp7 = _mm_and_si128(temp7, sign_reg);
+ sign_reg = _mm_cmpgt_epi16(temp8, zero_8x16b); // sign check
+ temp8 = _mm_and_si128(temp8, sign_reg);
+
+ resq_r0_2 = _mm_packus_epi16(temp1, zero_8x16b);
+ resq_r1_2 = _mm_packus_epi16(temp2, zero_8x16b);
+ resq_r2_2 = _mm_packus_epi16(temp3, zero_8x16b);
+ resq_r3_2 = _mm_packus_epi16(temp4, zero_8x16b);
+ resq_r4_2 = _mm_packus_epi16(temp5, zero_8x16b);
+ resq_r5_2 = _mm_packus_epi16(temp6, zero_8x16b);
+ resq_r6_2 = _mm_packus_epi16(temp7, zero_8x16b);
+ resq_r7_2 = _mm_packus_epi16(temp8, zero_8x16b);
+
+ _mm_storel_epi64((__m128i *) (&pu1_out[0]), resq_r0_2);
+ _mm_storel_epi64((__m128i *) (&pu1_out[out_strd]), resq_r1_2);
+ _mm_storel_epi64((__m128i *) (&pu1_out[2 * out_strd]), resq_r2_2);
+ _mm_storel_epi64((__m128i *) (&pu1_out[3 * out_strd]), resq_r3_2);
+ _mm_storel_epi64((__m128i *) (&pu1_out[4 * out_strd]), resq_r4_2);
+ _mm_storel_epi64((__m128i *) (&pu1_out[5 * out_strd]), resq_r5_2);
+ _mm_storel_epi64((__m128i *) (&pu1_out[6 * out_strd]), resq_r6_2);
+ _mm_storel_epi64((__m128i *) (&pu1_out[7 * out_strd]), resq_r7_2);
+}
+
diff --git a/common/x86/ih264_luma_intra_pred_filters_ssse3.c b/common/x86/ih264_luma_intra_pred_filters_ssse3.c
new file mode 100755
index 0000000..5a35372
--- /dev/null
+++ b/common/x86/ih264_luma_intra_pred_filters_ssse3.c
@@ -0,0 +1,2282 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_luma_intra_pred_filters_ssse3.c
+ *
+ * @brief
+ * Contains function definitions for luma intra prediction filters in x86
+ * intrinsics
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ * - ih264_intra_pred_luma_4x4_mode_vert_ssse3
+ * - ih264_intra_pred_luma_4x4_mode_horz_ssse3
+ * - ih264_intra_pred_luma_4x4_mode_dc_ssse3
+ * - ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3
+ * - ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3
+ * - ih264_intra_pred_luma_4x4_mode_vert_r_ssse3
+ * - ih264_intra_pred_luma_4x4_mode_horz_d_ssse3
+ * - ih264_intra_pred_luma_4x4_mode_vert_l_ssse3
+ * - ih264_intra_pred_luma_4x4_mode_horz_u_ssse3
+ * - ih264_intra_pred_luma_8x8_mode_vert_ssse3
+ * - ih264_intra_pred_luma_8x8_mode_horz_ssse3
+ * - ih264_intra_pred_luma_8x8_mode_dc_ssse3
+ * - ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3
+ * - ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3
+ * - ih264_intra_pred_luma_8x8_mode_vert_r_ssse3
+ * - ih264_intra_pred_luma_8x8_mode_horz_d_ssse3
+ * - ih264_intra_pred_luma_8x8_mode_vert_l_ssse3
+ * - ih264_intra_pred_luma_8x8_mode_horz_u_ssse3
+ * - ih264_intra_pred_luma_16x16_mode_vert_ssse3
+ * - ih264_intra_pred_luma_16x16_mode_horz_ssse3
+ * - ih264_intra_pred_luma_16x16_mode_dc_ssse3
+ * - ih264_intra_pred_luma_16x16_mode_plane_ssse3
+ *
+ * @remarks
+ * None
+ *
+ ******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <string.h>
+#include <immintrin.h>
+
+/* User include files */
+#include "ih264_defs.h"
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+
+
+
+/******************* LUMA INTRAPREDICTION *******************/
+
+/******************* 4x4 Modes *******************/
+
+/**
+ *******************************************************************************
+ *
+ * ih264_intra_pred_luma_4x4_mode_vert_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_4x4 mode:vertical
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_4x4 mode:vertical ,described in sec 8.3.1.2.1
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264_intra_pred_luma_4x4_mode_vert_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_top;
+ WORD32 dst_strd2, dst_strd3;
+
+ __m128i top_16x8b;
+ __m128i mask_full_128b, mask_low_32b;
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ mask_full_128b = _mm_set1_epi8(0xff);
+
+ pu1_top = pu1_src + BLK_SIZE + 1;
+
+ mask_low_32b = _mm_srli_si128(mask_full_128b, 12);
+
+ top_16x8b = _mm_loadl_epi64((__m128i *)pu1_top);
+
+ dst_strd2 = dst_strd << 1;
+ dst_strd3 = dst_strd + dst_strd2;
+
+ _mm_maskmoveu_si128(top_16x8b, mask_low_32b, (char*)pu1_dst);
+ _mm_maskmoveu_si128(top_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
+ _mm_maskmoveu_si128(top_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
+ _mm_maskmoveu_si128(top_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_4x4_mode_horz_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_4x4 mode:horizontal
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_4x4 mode:horizontal ,described in sec 8.3.1.2.2
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264_intra_pred_luma_4x4_mode_horz_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_left;
+ WORD32 dst_strd2, dst_strd3;
+ WORD32 val1, val2;
+
+ __m128i left_16x8b;
+ __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
+ __m128i mask_full_128b, mask_low_32b;
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ mask_full_128b = _mm_set1_epi8(0xff);
+
+ pu1_left = pu1_src + BLK_SIZE - 1;
+
+ mask_low_32b = _mm_srli_si128(mask_full_128b, 12);
+ left_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 3));
+
+ val1 = _mm_extract_epi16(left_16x8b, 1);
+ val2 = _mm_extract_epi16(left_16x8b, 0);
+
+ row1_16x8b = _mm_set1_epi8(val1 >> 8);
+ row2_16x8b = _mm_set1_epi8(val1 & 0xff);
+ row3_16x8b = _mm_set1_epi8(val2 >> 8);
+ row4_16x8b = _mm_set1_epi8(val2 & 0xff);
+
+ dst_strd2 = dst_strd << 1;
+ dst_strd3 = dst_strd + dst_strd2;
+
+ _mm_maskmoveu_si128(row1_16x8b, mask_low_32b, (char*)pu1_dst);
+ _mm_maskmoveu_si128(row2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
+ _mm_maskmoveu_si128(row3_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
+ _mm_maskmoveu_si128(row4_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+}
+
+/**
+ *******************************************************************************
+ *
+ * ih264_intra_pred_luma_4x4_mode_dc_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_4x4 mode:DC
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_4x4 mode:DC ,described in sec 8.3.1.2.3
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_4x4_mode_dc_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 u1_useleft; /* availability of left predictors (only for DC) */
+ UWORD8 u1_usetop; /* availability of top predictors (only for DC) */
+ UWORD8 *pu1_left, *pu1_top;
+ WORD32 dc_val, flag;
+ WORD32 dst_strd2, dst_strd3;
+
+ __m128i mask_full_128b, mask_low_32b;
+ __m128i dcval_16x8b;
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ mask_full_128b = _mm_set1_epi8(0xff);
+
+ u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK);
+ u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK);
+
+ pu1_left = pu1_src + BLK_SIZE - 1;
+ pu1_top = pu1_src + BLK_SIZE + 1;
+
+ mask_low_32b = _mm_srli_si128(mask_full_128b, 12);
+
+ flag = u1_useleft + u1_usetop;
+
+ if(flag)
+ {
+ WORD32 shft, ofst = 0;
+
+ __m128i left_16x8b, top_16x8b, val_16x8b, tmp_8x16b, zero_vector;
+
+ if(u1_useleft)
+ {
+ left_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 3));
+ ofst += 2;
+ }
+ else
+ left_16x8b = _mm_setzero_si128();
+
+ zero_vector = _mm_setzero_si128();
+
+ if(u1_usetop)
+ {
+ top_16x8b = _mm_loadl_epi64((__m128i *)pu1_top);
+ ofst += 2;
+ }
+ else
+ top_16x8b = _mm_setzero_si128();
+
+ shft = flag + 1;
+ val_16x8b = _mm_unpacklo_epi32(left_16x8b, top_16x8b);
+ tmp_8x16b = _mm_sad_epu8(val_16x8b, zero_vector);
+
+ dc_val = _mm_extract_epi16(tmp_8x16b, 0);
+ dc_val = (dc_val + ofst) >> shft;
+ }
+ else
+ dc_val = 128;
+
+ dst_strd2 = dst_strd << 1;
+ dst_strd3 = dst_strd + dst_strd2;
+
+ dcval_16x8b = _mm_set1_epi8(dc_val);
+
+ _mm_maskmoveu_si128(dcval_16x8b, mask_low_32b, (char*)pu1_dst);
+ _mm_maskmoveu_si128(dcval_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
+ _mm_maskmoveu_si128(dcval_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
+ _mm_maskmoveu_si128(dcval_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+}
+
+/**
+ *******************************************************************************
+ *
+ * ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left ,described in sec 8.3.1.2.4
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_top;
+ WORD32 dst_strd2, dst_strd3;
+
+ __m128i top_16x8b, top_8x16b, top_sh_8x16b;
+ __m128i res1_8x16b, res2_8x16b, res_16x8b;
+ __m128i zero_vector, const_2_8x16b;
+ __m128i mask_full_128b, mask_low_32b;
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ pu1_top = pu1_src + BLK_SIZE + 1;
+
+ top_16x8b = _mm_loadl_epi64((__m128i *)pu1_top);
+ zero_vector = _mm_setzero_si128();
+ top_8x16b = _mm_unpacklo_epi8(top_16x8b, zero_vector); //t0 t1 t2 t3 t4 t5 t6 t7
+
+ mask_full_128b = _mm_set1_epi8(0xff);
+ top_sh_8x16b = _mm_srli_si128(top_8x16b, 2); //t1 t2 t3 t4 t5 t6 t7 0
+ const_2_8x16b = _mm_set1_epi16(2);
+
+ top_sh_8x16b = _mm_shufflehi_epi16(top_sh_8x16b, 0xa4); //t1 t2 t3 t4 t5 t6 t7 t7
+ res1_8x16b = _mm_add_epi16(top_8x16b, top_sh_8x16b);
+ mask_low_32b = _mm_srli_si128(mask_full_128b, 12);
+ res2_8x16b = _mm_srli_si128(res1_8x16b, 2);
+
+ res1_8x16b = _mm_add_epi16(res1_8x16b, const_2_8x16b);
+ res1_8x16b = _mm_add_epi16(res2_8x16b, res1_8x16b);
+ res1_8x16b = _mm_srai_epi16(res1_8x16b, 2);
+
+ dst_strd2 = dst_strd << 1;
+ dst_strd3 = dst_strd + dst_strd2;
+
+ res_16x8b = _mm_packus_epi16(res1_8x16b, res1_8x16b);
+ _mm_maskmoveu_si128(res_16x8b, mask_low_32b, (char*)pu1_dst);
+ res_16x8b = _mm_srli_si128(res_16x8b, 1);
+ _mm_maskmoveu_si128(res_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
+ res_16x8b = _mm_srli_si128(res_16x8b, 1);
+ _mm_maskmoveu_si128(res_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
+ res_16x8b = _mm_srli_si128(res_16x8b, 1);
+ _mm_maskmoveu_si128(res_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+}
+
+/**
+ *******************************************************************************
+ *
+ * ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right ,described in sec 8.3.1.2.5
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_left;
+ WORD32 dst_strd2, dst_strd3;
+
+ __m128i top_left_16x8b, top_left_8x16b;
+ __m128i top_left_sh_16x8b, top_left_sh_8x16b;
+ __m128i res1_8x16b, res2_8x16b;
+ __m128i res1_16x8b, res2_16x8b;
+ __m128i zero_vector, const_2_8x16b;
+ __m128i mask_full_128b, mask_low_32b;
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ pu1_left = pu1_src + BLK_SIZE - 1;
+
+ top_left_16x8b = _mm_loadu_si128((__m128i *)(pu1_left - 3)); //l3 l2 l1 l0 tl t0 t1 t2...
+ zero_vector = _mm_setzero_si128();
+ top_left_sh_16x8b = _mm_srli_si128(top_left_16x8b, 1); //l2 l1 l0 tl t0 t1 t2 t3...
+
+ top_left_8x16b = _mm_unpacklo_epi8(top_left_16x8b, zero_vector);
+ top_left_sh_8x16b = _mm_unpacklo_epi8(top_left_sh_16x8b, zero_vector);
+
+ mask_full_128b = _mm_set1_epi8(0xff);
+ res1_8x16b = _mm_add_epi16(top_left_8x16b, top_left_sh_8x16b); //l3+l2 l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3...
+ const_2_8x16b = _mm_set1_epi16(2);
+ res2_8x16b = _mm_srli_si128(res1_8x16b, 2); //l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3...
+
+ res1_8x16b = _mm_add_epi16(res1_8x16b, const_2_8x16b);
+ mask_low_32b = _mm_srli_si128(mask_full_128b, 12);
+ res1_8x16b = _mm_add_epi16(res2_8x16b, res1_8x16b); //l3+2*l2+l1+2 l2+2*l1+l0+2...
+ res1_8x16b = _mm_srai_epi16(res1_8x16b, 2);
+ res1_16x8b = _mm_packus_epi16(res1_8x16b, res1_8x16b);
+
+ dst_strd2 = dst_strd << 1;
+ dst_strd3 = dst_strd + dst_strd2;
+
+ res2_16x8b = _mm_srli_si128(res1_16x8b, 3);
+ _mm_maskmoveu_si128(res2_16x8b, mask_low_32b, (char*)pu1_dst);
+ res2_16x8b = _mm_srli_si128(res1_16x8b, 2);
+ _mm_maskmoveu_si128(res2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
+ res2_16x8b = _mm_srli_si128(res1_16x8b, 1);
+ _mm_maskmoveu_si128(res2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
+ _mm_maskmoveu_si128(res1_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+}
+
+/**
+ *******************************************************************************
+ *
+ * ih264_intra_pred_luma_4x4_mode_vert_r_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_4x4 mode:Vertical_Right
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_4x4 mode:Vertical_Right ,described in sec 8.3.1.2.6
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_4x4_mode_vert_r_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_left;
+ WORD32 dst_strd2, dst_strd3;
+
+ __m128i val_16x8b, temp_16x8b;
+ __m128i w11_a1_16x8b, w11_a2_16x8b;
+ __m128i w121_a1_8x16b, w121_a2_8x16b, w121_sh_8x16b;
+ __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
+ __m128i zero_vector, const_2_8x16b;
+ __m128i mask_full_128b, mask_low_32b;
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ mask_full_128b = _mm_set1_epi8(0xff);
+ mask_low_32b = _mm_srli_si128(mask_full_128b, 12);
+
+ pu1_left = pu1_src + BLK_SIZE - 1;
+
+ val_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 2));
+ zero_vector = _mm_setzero_si128();
+
+ w121_a1_8x16b = _mm_unpacklo_epi8(val_16x8b, zero_vector); //l2 l1 l0 tl t0 t1 t2 t3
+ w11_a1_16x8b = _mm_srli_si128(val_16x8b, 3);
+ w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //l1 l0 tl t0 t1 t2 t3 0
+ w11_a2_16x8b = _mm_srli_si128(val_16x8b, 4);
+
+ w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3 t3
+ row1_16x8b = _mm_avg_epu8(w11_a1_16x8b, w11_a2_16x8b);
+ w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3 t3 0
+
+ const_2_8x16b = _mm_set1_epi16(2);
+ w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l2+2*l1+l0 l1+2*l0+tl ...
+ w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b);
+ w121_a1_8x16b = _mm_srai_epi16(w121_a1_8x16b, 2);
+
+ w121_sh_8x16b = _mm_shufflelo_epi16(w121_a1_8x16b, 0xe1);
+ w121_sh_8x16b = _mm_srli_si128(w121_sh_8x16b, 2);
+
+ row4_16x8b = _mm_packus_epi16(w121_sh_8x16b, w121_sh_8x16b);
+ temp_16x8b = _mm_slli_si128(w121_a1_8x16b, 13);
+ row2_16x8b = _mm_srli_si128(row4_16x8b, 1);
+ row3_16x8b = _mm_alignr_epi8(row1_16x8b, temp_16x8b, 15);
+
+ dst_strd2 = dst_strd << 1;
+ dst_strd3 = dst_strd + dst_strd2;
+
+ _mm_maskmoveu_si128(row1_16x8b, mask_low_32b, (char*)pu1_dst);
+ _mm_maskmoveu_si128(row2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
+ _mm_maskmoveu_si128(row3_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
+ _mm_maskmoveu_si128(row4_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+}
+
+/*
+ *******************************************************************************
+ *
+ * ih264_intra_pred_luma_4x4_mode_horz_d_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_4x4 mode:Horizontal_Down
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_4x4 mode:Horizontal_Down ,described in sec 8.3.1.2.7
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_4x4_mode_horz_d_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_left;
+ WORD32 dst_strd2, dst_strd3;
+ WORD32 val_121_t0t1;
+
+ __m128i val_16x8b, val_sh_16x8b;
+ __m128i w11_16x8b;
+ __m128i w121_a1_8x16b, w121_a2_8x16b, w121_16x8b;
+ __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
+
+ __m128i zero_vector, const_2_8x16b;
+ __m128i mask_full_128b, mask_low_32b;
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ mask_full_128b = _mm_set1_epi8(0xff);
+ mask_low_32b = _mm_srli_si128(mask_full_128b, 12);
+
+ pu1_left = pu1_src + BLK_SIZE - 1;
+
+ val_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 3));
+ zero_vector = _mm_setzero_si128();
+ val_sh_16x8b = _mm_srli_si128(val_16x8b, 1);
+ w11_16x8b = _mm_avg_epu8(val_16x8b, val_sh_16x8b);
+
+ w121_a1_8x16b = _mm_unpacklo_epi8(val_16x8b, zero_vector); //l3 l2 l1 l0 tl t0 t1 t2
+ w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //l2 l1 l0 tl t0 t1 t2 0
+ w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l3+l2 l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2
+ w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2 0
+
+ zero_vector = _mm_setzero_si128();
+ const_2_8x16b = _mm_set1_epi16(2);
+
+ w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l3+2*l2+l1 l2+2*l1+l0 l1+2*l0+tl ...
+ w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b);
+ w121_a1_8x16b = _mm_srai_epi16(w121_a1_8x16b, 2);
+
+ w121_16x8b = _mm_packus_epi16(w121_a1_8x16b, w121_a1_8x16b);
+
+ row4_16x8b = _mm_unpacklo_epi8(w11_16x8b, w121_16x8b);
+ val_121_t0t1 = _mm_extract_epi16(w121_16x8b, 2);
+ row4_16x8b = _mm_insert_epi16(row4_16x8b, val_121_t0t1, 4);
+
+ dst_strd2 = dst_strd << 1;
+ dst_strd3 = dst_strd + dst_strd2;
+
+ row1_16x8b = _mm_srli_si128(row4_16x8b, 6);
+ row2_16x8b = _mm_srli_si128(row4_16x8b, 4);
+ row3_16x8b = _mm_srli_si128(row4_16x8b, 2);
+
+ _mm_maskmoveu_si128(row1_16x8b, mask_low_32b, (char*)pu1_dst);
+ _mm_maskmoveu_si128(row2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
+ _mm_maskmoveu_si128(row3_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
+ _mm_maskmoveu_si128(row4_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+}
+
+/**
+ *******************************************************************************
+ *
+ * ih264_intra_pred_luma_4x4_mode_vert_l_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_4x4 mode:Vertical_Left
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_4x4 mode:Vertical_Left ,described in sec 8.3.1.2.8
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_4x4_mode_vert_l_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_top;
+ WORD32 dst_strd2, dst_strd3;
+
+ __m128i val_16x8b, val_sh_16x8b;
+ __m128i w121_a1_8x16b, w121_a2_8x16b;
+ __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
+
+ __m128i zero_vector, const_2_8x16b;
+ __m128i mask_full_128b, mask_low_32b;
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ mask_full_128b = _mm_set1_epi8(0xff);
+ mask_low_32b = _mm_srli_si128(mask_full_128b, 12);
+
+ pu1_top = pu1_src +BLK_SIZE + 1;
+
+ val_16x8b = _mm_loadl_epi64((__m128i *)pu1_top);
+ zero_vector = _mm_setzero_si128();
+ val_sh_16x8b = _mm_srli_si128(val_16x8b, 1);
+ row1_16x8b = _mm_avg_epu8(val_16x8b, val_sh_16x8b);
+
+ w121_a1_8x16b = _mm_unpacklo_epi8(val_16x8b, zero_vector); //t0 t1 t2 t3 t4 t5...
+ w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //t1 t2 t3 t4 t5 t6...
+ w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //t0+t1 t1+t2 t2+t3 t3+t4 t4+t5...
+ w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //t1+t2 t2+t3 t3+t4 t4+t5 t5+t6...
+
+ zero_vector = _mm_setzero_si128();
+ const_2_8x16b = _mm_set1_epi16(2);
+
+ w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //t0+2*t1+t2 t1+2*t2+t3 t2+2*t3+t4...
+ w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b);
+ w121_a1_8x16b = _mm_srai_epi16(w121_a1_8x16b, 2);
+
+ row2_16x8b = _mm_packus_epi16(w121_a1_8x16b, w121_a1_8x16b);
+
+ dst_strd2 = dst_strd << 1;
+ dst_strd3 = dst_strd + dst_strd2;
+
+ row3_16x8b = _mm_srli_si128(row1_16x8b, 1);
+ row4_16x8b = _mm_srli_si128(row2_16x8b, 1);
+
+ _mm_maskmoveu_si128(row1_16x8b, mask_low_32b, (char*)pu1_dst);
+ _mm_maskmoveu_si128(row2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
+ _mm_maskmoveu_si128(row3_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
+ _mm_maskmoveu_si128(row4_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+}
+
+/**
+ *******************************************************************************
+ *
+ * ih264_intra_pred_luma_4x4_mode_horz_u_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_4x4 mode:Horizontal_Up
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_4x4 mode:Horizontal_Up ,described in sec 8.3.1.2.9
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_4x4_mode_horz_u_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_left;
+ WORD32 dst_strd2, dst_strd3;
+
+ __m128i val_16x8b, val_sh_16x8b;
+ __m128i w11_16x8b;
+ __m128i w121_a1_8x16b, w121_a2_8x16b, w121_16x8b;
+ __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
+
+ __m128i zero_vector, const_2_8x16b, rev_16x8b;
+ __m128i mask_full_128b, mask_low_32b;
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ mask_full_128b = _mm_set1_epi8(0xff);
+ mask_low_32b = _mm_srli_si128(mask_full_128b, 12);
+
+ pu1_left = pu1_src + BLK_SIZE - 1;
+
+ zero_vector = _mm_setzero_si128();
+ rev_16x8b = _mm_setr_epi8(3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+
+ val_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 3)); //l3 l2 l1 l0 0 0 0...
+ val_16x8b = _mm_shuffle_epi8(val_16x8b, rev_16x8b); //l0 l1 l2 l3 l3 l3 l3...
+
+ val_sh_16x8b = _mm_srli_si128(val_16x8b, 1);
+ w11_16x8b = _mm_avg_epu8(val_16x8b, val_sh_16x8b);
+
+ w121_a1_8x16b = _mm_unpacklo_epi8(val_16x8b, zero_vector); //l0 l1 l2 l3 l3 l3...
+ w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //l1 l2 l3 l3 l3 l3...
+
+ w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l0+t1 l1+l2 l2+l3 2*l3 2*l3...
+ w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //l1+t2 l2+l3 2*l3 2*l3 2*l3...
+
+ zero_vector = _mm_setzero_si128();
+ const_2_8x16b = _mm_set1_epi16(2);
+
+ w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l0+2*l1+l2 l1+2*l2+l3 l2+3*l3 4*l3 4*l3...
+ w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b);
+ w121_a1_8x16b = _mm_srai_epi16(w121_a1_8x16b, 2);
+
+ w121_16x8b = _mm_packus_epi16(w121_a1_8x16b, w121_a1_8x16b);
+
+ dst_strd2 = dst_strd << 1;
+ dst_strd3 = dst_strd + dst_strd2;
+
+ row1_16x8b = _mm_unpacklo_epi8(w11_16x8b, w121_16x8b);
+ row2_16x8b = _mm_srli_si128(row1_16x8b, 2);
+ row3_16x8b = _mm_srli_si128(row1_16x8b, 4);
+ row4_16x8b = _mm_srli_si128(row1_16x8b, 6);
+
+ _mm_maskmoveu_si128(row1_16x8b, mask_low_32b, (char*)pu1_dst);
+ _mm_maskmoveu_si128(row2_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
+ _mm_maskmoveu_si128(row3_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
+ _mm_maskmoveu_si128(row4_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+}
+
+/******************* 8x8 Modes *******************/
+
+/**
+ *******************************************************************************
+ *
+ * ih264_intra_pred_luma_8x8_mode_vert_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_8x8 mode:vertical
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_8x8 mode:vertical ,described in sec 8.3.2.2.2
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264_intra_pred_luma_8x8_mode_vert_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_top = NULL;
+ __m128i top_8x8b;
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_top = pu1_src + BLK8x8SIZE + 1;
+
+ top_8x8b = _mm_loadl_epi64((__m128i *)pu1_top);
+
+ _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), top_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), top_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), top_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), top_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), top_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), top_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), top_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), top_8x8b);
+}
+
+/**
+ *******************************************************************************
+ *
+ * ih264_intra_pred_luma_8x8_mode_horz_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_8x8 mode:horizontal
+ *
+ * @par Description:
+ * Perform Intra prediction for uma_8x8 mode:horizontal ,described in sec 8.3.2.2.2
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264_intra_pred_luma_8x8_mode_horz_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_left = pu1_src + BLK8x8SIZE - 1;
+ __m128i row1_8x8b, row2_8x8b, row3_8x8b, row4_8x8b;
+ __m128i row5_8x8b, row6_8x8b, row7_8x8b, row8_8x8b;
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ row1_8x8b = _mm_set1_epi8(pu1_left[0]);
+ row2_8x8b = _mm_set1_epi8(pu1_left[-1]);
+ row3_8x8b = _mm_set1_epi8(pu1_left[-2]);
+ row4_8x8b = _mm_set1_epi8(pu1_left[-3]);
+ row5_8x8b = _mm_set1_epi8(pu1_left[-4]);
+ row6_8x8b = _mm_set1_epi8(pu1_left[-5]);
+ row7_8x8b = _mm_set1_epi8(pu1_left[-6]);
+ row8_8x8b = _mm_set1_epi8(pu1_left[-7]);
+
+ _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), row1_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), row2_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), row3_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), row4_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), row5_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), row6_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), row7_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), row8_8x8b);
+}
+
+/**
+ *******************************************************************************
+ *
+ * ih264_intra_pred_luma_8x8_mode_dc_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_8x8 mode:DC
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_8x8 mode:DC ,described in sec 8.3.2.2.4
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_8x8_mode_dc_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 u1_useleft; /* availability of left predictors (only for DC) */
+ UWORD8 u1_usetop; /* availability of top predictors (only for DC) */
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ __m128i dc_val_8x8b;
+ WORD32 dc_val = 0;
+ UNUSED(src_strd);
+
+ u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK);
+ u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK);
+ pu1_top = pu1_src + BLK8x8SIZE + 1;
+ pu1_left = pu1_src + BLK8x8SIZE - 1;
+
+ if(u1_useleft || u1_usetop)
+ {
+ WORD32 shft = 2;
+ __m128i val_8x8b, zero_8x8b, sum_8x16b;
+
+ zero_8x8b = _mm_setzero_si128();
+
+ if(u1_useleft)
+ {
+ val_8x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 7));
+ sum_8x16b = _mm_sad_epu8(zero_8x8b, val_8x8b);
+
+ shft++;
+ dc_val += 4;
+ dc_val += _mm_extract_epi16(sum_8x16b, 0);
+ }
+ if(u1_usetop)
+ {
+ val_8x8b = _mm_loadl_epi64((__m128i *)pu1_top);
+ sum_8x16b = _mm_sad_epu8(zero_8x8b, val_8x8b);
+
+ shft++;
+ dc_val += 4;
+ dc_val += _mm_extract_epi16(sum_8x16b, 0);
+ }
+ dc_val = dc_val >> shft;
+ }
+ else
+ dc_val = 128;
+
+ dc_val_8x8b = _mm_set1_epi8(dc_val);
+
+ _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), dc_val_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), dc_val_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), dc_val_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), dc_val_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), dc_val_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), dc_val_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), dc_val_8x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), dc_val_8x8b);
+}
+
+/**
+ *******************************************************************************
+ *
+ * ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left ,described in sec 8.3.2.2.5
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ __m128i top_16x8;
+ __m128i out_15x16;
+ __m128i a0_8x16, a1_8x16, a2_8x16;
+ __m128i temp1, temp2;
+ __m128i res1_8x16, res2_8x16;
+ __m128i zero = _mm_setzero_si128();
+ __m128i const_val2_8x16 = _mm_set1_epi16(2);
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ pu1_top = pu1_src + BLK8x8SIZE + 1;
+
+ top_16x8 = _mm_loadu_si128((__m128i *)(pu1_top));
+
+ temp1 = _mm_srli_si128(top_16x8, 1);
+ temp2 = _mm_srli_si128(top_16x8, 2);
+ a0_8x16 = _mm_unpacklo_epi8(top_16x8, zero);
+ a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
+ a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
+
+ a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
+ a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
+ res1_8x16 = _mm_srai_epi16(a0_8x16, 2);
+
+ temp2 = _mm_srli_si128(top_16x8, 2);
+ temp1 = _mm_srli_si128(top_16x8, 1);
+ a2_8x16 = _mm_unpackhi_epi8(temp2, zero);
+ a0_8x16 = _mm_unpackhi_epi8(top_16x8, zero);
+ a2_8x16 = _mm_shufflehi_epi16(a2_8x16, 0x14);
+ a1_8x16 = _mm_unpackhi_epi8(temp1, zero);
+
+ a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
+ a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
+ res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
+
+ out_15x16 = _mm_packus_epi16(res1_8x16, res2_8x16);
+
+ _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), out_15x16);
+ out_15x16 = _mm_srli_si128(out_15x16, 1);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), out_15x16);
+ out_15x16 = _mm_srli_si128(out_15x16, 1);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), out_15x16);
+ out_15x16 = _mm_srli_si128(out_15x16, 1);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), out_15x16);
+ out_15x16 = _mm_srli_si128(out_15x16, 1);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), out_15x16);
+ out_15x16 = _mm_srli_si128(out_15x16, 1);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), out_15x16);
+ out_15x16 = _mm_srli_si128(out_15x16, 1);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), out_15x16);
+ out_15x16 = _mm_srli_si128(out_15x16, 1);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), out_15x16);
+}
+
+/**
+ *******************************************************************************
+ *
+ * ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right ,described in sec 8.3.2.2.6
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ __m128i top_8x8, left_16x8;
+ __m128i out_15x16;
+ __m128i a0_8x16, a1_8x16, a2_8x16;
+ __m128i temp1, temp2;
+ __m128i res1_8x16, res2_8x16;
+ __m128i zero = _mm_setzero_si128();
+ __m128i const_val2_8x16 = _mm_set1_epi16(2);
+ __m128i str_8x8;
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ pu1_left = pu1_src + BLK8x8SIZE - 1;
+ pu1_top = pu1_src + BLK8x8SIZE + 1;
+
+ left_16x8 = _mm_loadu_si128((__m128i *)(pu1_left - 7));
+
+ temp1 = _mm_srli_si128(left_16x8, 1);
+ temp2 = _mm_srli_si128(left_16x8, 2);
+ a0_8x16 = _mm_unpacklo_epi8(left_16x8, zero);
+ a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
+ a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
+
+ a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
+ a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
+ res1_8x16 = _mm_srai_epi16(a0_8x16, 2);
+
+ top_8x8 = _mm_loadu_si128((__m128i *)(pu1_top - 1));
+
+ temp1 = _mm_srli_si128(top_8x8, 1);
+ temp2 = _mm_srli_si128(top_8x8, 2);
+ a0_8x16 = _mm_unpacklo_epi8(top_8x8, zero);
+ a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
+ a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
+
+ a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
+ a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
+ res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
+
+ out_15x16 = _mm_packus_epi16(res1_8x16, res2_8x16);
+
+ str_8x8 = _mm_srli_si128(out_15x16, 7);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), str_8x8);
+ str_8x8 = _mm_srli_si128(out_15x16, 6);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), str_8x8);
+ str_8x8 = _mm_srli_si128(out_15x16, 5);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), str_8x8);
+ str_8x8 = _mm_srli_si128(out_15x16, 4);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), str_8x8);
+ str_8x8 = _mm_srli_si128(out_15x16, 3);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), str_8x8);
+ str_8x8 = _mm_srli_si128(out_15x16, 2);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), str_8x8);
+ str_8x8 = _mm_srli_si128(out_15x16, 1);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), str_8x8);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), out_15x16);
+}
+
+/**
+ *******************************************************************************
+ *
+ * ih264_intra_pred_luma_8x8_mode_vert_r_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_8x8 mode:Vertical_Right
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_8x8 mode:Vertical_Right ,described in sec 8.3.2.2.7
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_8x8_mode_vert_r_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ __m128i top_8x8, left_16x8;
+ __m128i out1_16x16, out2_16x16;
+ __m128i a0_8x16, a1_8x16, a2_8x16;
+ __m128i temp1, temp2;
+ __m128i res1_8x16, res2_8x16, res3_8x16;
+ __m128i zero = _mm_setzero_si128();
+ __m128i const_val2_8x16 = _mm_set1_epi16(2);
+ __m128i str_8x8;
+ __m128i mask = _mm_set1_epi32(0xFFFF);
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ pu1_left = pu1_src + BLK8x8SIZE - 1;
+ pu1_top = pu1_src + BLK8x8SIZE + 1;
+
+ left_16x8 = _mm_loadu_si128((__m128i *)(pu1_left - 6));
+
+ temp1 = _mm_srli_si128(left_16x8, 1);
+ temp2 = _mm_srli_si128(left_16x8, 2);
+ a0_8x16 = _mm_unpacklo_epi8(left_16x8, zero);
+ a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
+ a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
+
+ a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
+ a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
+ res1_8x16 = _mm_srai_epi16(a0_8x16, 2);
+
+ top_8x8 = _mm_loadu_si128((__m128i *)(pu1_top - 1));
+
+ temp1 = _mm_srli_si128(top_8x8, 1);
+ temp2 = _mm_srli_si128(top_8x8, 2);
+ a0_8x16 = _mm_unpacklo_epi8(top_8x8, zero);
+ a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
+ a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
+
+ res3_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16);
+
+ a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
+ a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
+ res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
+
+ str_8x8 = _mm_packus_epi16(res3_8x16, zero);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), str_8x8);
+
+ temp1 = _mm_and_si128(res1_8x16, mask);
+ temp1 = _mm_packs_epi32(temp1, temp1);
+ out1_16x16 = _mm_packus_epi16(temp1, res2_8x16);
+
+ res1_8x16 = _mm_slli_si128(res1_8x16, 2);
+ temp1 = _mm_and_si128(res1_8x16, mask);
+ temp1 = _mm_packs_epi32(temp1, temp1);
+ out2_16x16 = _mm_packus_epi16(temp1, res3_8x16);
+
+ str_8x8 = _mm_srli_si128(out1_16x16, 7);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), str_8x8);
+
+ str_8x8 = _mm_srli_si128(out2_16x16, 7);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), str_8x8);
+
+ str_8x8 = _mm_srli_si128(out1_16x16, 6);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), str_8x8);
+
+ str_8x8 = _mm_srli_si128(out2_16x16, 6);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), str_8x8);
+
+ str_8x8 = _mm_srli_si128(out1_16x16, 5);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), str_8x8);
+
+ str_8x8 = _mm_srli_si128(out2_16x16, 5);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), str_8x8);
+
+ str_8x8 = _mm_srli_si128(out1_16x16, 4);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), str_8x8);
+}
+
+/*
+ *******************************************************************************
+ *
+ * ih264_intra_pred_luma_8x8_mode_horz_d_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_8x8 mode:Horizontal_Down
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_8x8 mode:Horizontal_Down ,described in sec 8.3.2.2.8
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_8x8_mode_horz_d_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ __m128i pels_16x16;
+ __m128i temp1, temp2, temp3, temp4;
+ __m128i a0_8x16, a1_8x16, a2_8x16;
+ __m128i zero = _mm_setzero_si128();
+ __m128i const_val2_8x16 = _mm_set1_epi16(2);
+ __m128i res1_8x16, res2_8x16;
+ __m128i out1_16x16, out2_16x16;
+ __m128i str_8x8;
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ pu1_left = pu1_src + BLK8x8SIZE - 1;
+
+ pels_16x16 = _mm_loadu_si128((__m128i *)(pu1_left - 7));
+
+ temp1 = _mm_srli_si128(pels_16x16, 1);
+ temp2 = _mm_srli_si128(pels_16x16, 2);
+ a0_8x16 = _mm_unpacklo_epi8(pels_16x16, zero);
+ a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
+ a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
+
+ res1_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16);
+
+ a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
+ a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
+ res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
+
+ temp3 = _mm_unpacklo_epi16(res1_8x16, res2_8x16);
+ temp4 = _mm_unpackhi_epi16(res1_8x16, res2_8x16);
+ out2_16x16 = _mm_packus_epi16(temp3, temp4);
+
+ a0_8x16 = _mm_unpackhi_epi8(pels_16x16, zero);
+ a1_8x16 = _mm_unpackhi_epi8(temp1, zero);
+ a2_8x16 = _mm_unpackhi_epi8(temp2, zero);
+
+ a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
+ a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
+ res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
+
+ out1_16x16 = _mm_packus_epi16(res2_8x16, zero);
+ temp1 = _mm_srli_si128(out2_16x16, 8);
+ out1_16x16 = _mm_unpacklo_epi64(temp1, out1_16x16);
+
+ str_8x8 = _mm_srli_si128(out1_16x16, 6);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), str_8x8);
+ str_8x8 = _mm_srli_si128(out1_16x16, 4);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), str_8x8);
+ str_8x8 = _mm_srli_si128(out1_16x16, 2);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), str_8x8);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), out1_16x16);
+
+ str_8x8 = _mm_srli_si128(out2_16x16, 6);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), str_8x8);
+ str_8x8 = _mm_srli_si128(out2_16x16, 4);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), str_8x8);
+ str_8x8 = _mm_srli_si128(out2_16x16, 2);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), str_8x8);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), out2_16x16);
+}
+
+/**
+ *******************************************************************************
+ *
+ * ih264_intra_pred_luma_8x8_mode_vert_l_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_8x8 mode:Vertical_Left
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_8x8 mode:Vertical_Left ,described in sec 8.3.2.2.9
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+
+void ih264_intra_pred_luma_8x8_mode_vert_l_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
+ __m128i top_16x16;
+ __m128i temp1, temp2;
+ __m128i a0_8x16, a1_8x16, a2_8x16;
+ __m128i zero = _mm_setzero_si128();
+ __m128i const_val2_8x16 = _mm_set1_epi16(2);
+ __m128i res1_8x16, res2_8x16, res3_8x16, res4_8x16;
+ __m128i out1_16x16, out2_16x16;
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+ pu1_top = pu1_src + BLK8x8SIZE + 1;
+
+ top_16x16 = _mm_loadu_si128((__m128i *)(pu1_top));
+ temp1 = _mm_srli_si128(top_16x16, 1);
+ temp2 = _mm_srli_si128(top_16x16, 2);
+ a0_8x16 = _mm_unpacklo_epi8(top_16x16, zero);
+ a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
+ a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
+
+ res1_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16);
+
+ a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
+ a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
+ res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
+
+ a0_8x16 = _mm_unpackhi_epi8(top_16x16, zero);
+ a1_8x16 = _mm_unpackhi_epi8(temp1, zero);
+ a2_8x16 = _mm_unpackhi_epi8(temp2, zero);
+
+ res3_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16);
+
+ a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
+ a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
+ res4_8x16 = _mm_srai_epi16(a0_8x16, 2);
+
+ out1_16x16 = _mm_packus_epi16(res1_8x16, res3_8x16);
+ out2_16x16 = _mm_packus_epi16(res2_8x16, res4_8x16);
+
+ _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), out1_16x16);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), out2_16x16);
+ out1_16x16 = _mm_srli_si128(out1_16x16, 1);
+ out2_16x16 = _mm_srli_si128(out2_16x16, 1);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), out1_16x16);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), out2_16x16);
+ out1_16x16 = _mm_srli_si128(out1_16x16, 1);
+ out2_16x16 = _mm_srli_si128(out2_16x16, 1);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), out1_16x16);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), out2_16x16);
+ out1_16x16 = _mm_srli_si128(out1_16x16, 1);
+ out2_16x16 = _mm_srli_si128(out2_16x16, 1);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), out1_16x16);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), out2_16x16);
+}
+
+/**
+ *******************************************************************************
+ *
+ * ih264_intra_pred_luma_8x8_mode_horz_u_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_8x8 mode:Horizontal_Up
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_8x8 mode:Horizontal_Up ,described in sec 8.3.2.2.10
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_8x8_mode_horz_u_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
+ __m128i left_16x16;
+ __m128i temp1, temp2;
+ __m128i a0_8x16, a1_8x16, a2_8x16;
+ __m128i zero = _mm_setzero_si128();
+ __m128i const_val2_8x16 = _mm_set1_epi16(2);
+ __m128i res1_8x16, res2_8x16;
+ __m128i out1_16x16;
+ __m128i str_8x8;
+ __m128i shuffle_16x16;
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ pu1_left = pu1_src + BLK8x8SIZE - 1;
+ shuffle_16x16 = _mm_set_epi8(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,
+ 0x0F);
+
+ left_16x16 = _mm_loadu_si128((__m128i *)(pu1_left - 7));
+ temp1 = _mm_srli_si128(left_16x16, 1);
+ a0_8x16 = _mm_unpacklo_epi8(left_16x16, zero);
+ a0_8x16 = _mm_slli_si128(a0_8x16, 2);
+ a1_8x16 = _mm_unpacklo_epi8(left_16x16, zero);
+ a0_8x16 = _mm_shufflelo_epi16(a0_8x16, 0xE5);
+ a2_8x16 = _mm_unpacklo_epi8(temp1, zero);
+
+ res1_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16);
+
+ a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
+ a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
+ a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
+ res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
+
+ temp1 = _mm_unpacklo_epi16(res1_8x16, res2_8x16);
+ temp2 = _mm_unpackhi_epi16(res1_8x16, res2_8x16);
+ out1_16x16 = _mm_packus_epi16(temp1, temp2);
+ out1_16x16 = _mm_shuffle_epi8(out1_16x16, shuffle_16x16);
+
+ str_8x8 = _mm_srli_si128(out1_16x16, 1);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), str_8x8);
+ str_8x8 = _mm_srli_si128(out1_16x16, 3);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), str_8x8);
+ str_8x8 = _mm_srli_si128(out1_16x16, 5);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), str_8x8);
+ str_8x8 = _mm_srli_si128(out1_16x16, 7);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), str_8x8);
+ temp1 = _mm_set1_epi8(pu1_left[-7]);
+ str_8x8 = _mm_unpacklo_epi64(str_8x8, temp1);
+ str_8x8 = _mm_srli_si128(str_8x8, 2);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), str_8x8);
+ str_8x8 = _mm_srli_si128(str_8x8, 2);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), str_8x8);
+ str_8x8 = _mm_srli_si128(str_8x8, 2);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), str_8x8);
+ str_8x8 = _mm_srli_si128(str_8x8, 2);
+ _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), str_8x8);
+
+}
+
+
+/******************* 16x16 Modes *******************/
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_16x16_mode_vert_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_16x16 mode:Vertical
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_16x16 mode:Vertical, described in sec 8.3.3.1
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels (Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_16x16_mode_vert_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_top;
+ WORD32 dst_strd2, dst_strd3, dst_strd4;
+
+ __m128i top_16x8b;
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ pu1_top = pu1_src + MB_SIZE + 1;
+
+ dst_strd2 = dst_strd << 1;
+ dst_strd4 = dst_strd << 2;
+
+ top_16x8b = _mm_loadu_si128((__m128i *)pu1_top);
+
+ dst_strd3 = dst_strd + dst_strd2;
+
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), top_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), top_16x8b);
+ pu1_dst += dst_strd4;
+
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), top_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), top_16x8b);
+ pu1_dst += dst_strd4;
+
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), top_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), top_16x8b);
+ pu1_dst += dst_strd4;
+
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), top_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), top_16x8b);
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_16x16_mode_horz_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_16x16 mode:Horizontal
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_16x16 mode:Horizontal, described in sec 8.3.3.2
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_16x16_mode_horz_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_left;
+ WORD32 dst_strd2, dst_strd3, dst_strd4;
+ WORD32 val1, val2;
+
+ __m128i val_16x8b;
+ __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ pu1_left = pu1_src + MB_SIZE - 1;
+
+ dst_strd4 = dst_strd << 2;
+
+ val_16x8b = _mm_loadu_si128((__m128i *)(pu1_left - 15));
+
+ dst_strd2 = dst_strd << 1;
+ dst_strd3 = dst_strd4 - dst_strd;
+
+ val1 = _mm_extract_epi16(val_16x8b, 7);
+ val2 = _mm_extract_epi16(val_16x8b, 6);
+
+ row1_16x8b = _mm_set1_epi8(val1 >> 8);
+ row2_16x8b = _mm_set1_epi8(val1 & 0xff);
+ row3_16x8b = _mm_set1_epi8(val2 >> 8);
+ row4_16x8b = _mm_set1_epi8(val2 & 0xff);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b);
+
+ val1 = _mm_extract_epi16(val_16x8b, 5);
+ val2 = _mm_extract_epi16(val_16x8b, 4);
+
+ pu1_dst += dst_strd4;
+ row1_16x8b = _mm_set1_epi8(val1 >> 8);
+ row2_16x8b = _mm_set1_epi8(val1 & 0xff);
+ row3_16x8b = _mm_set1_epi8(val2 >> 8);
+ row4_16x8b = _mm_set1_epi8(val2 & 0xff);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b);
+
+ val1 = _mm_extract_epi16(val_16x8b, 3);
+ val2 = _mm_extract_epi16(val_16x8b, 2);
+
+ pu1_dst += dst_strd4;
+ row1_16x8b = _mm_set1_epi8(val1 >> 8);
+ row2_16x8b = _mm_set1_epi8(val1 & 0xff);
+ row3_16x8b = _mm_set1_epi8(val2 >> 8);
+ row4_16x8b = _mm_set1_epi8(val2 & 0xff);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b);
+
+ val1 = _mm_extract_epi16(val_16x8b, 1);
+ val2 = _mm_extract_epi16(val_16x8b, 0);
+
+ pu1_dst += dst_strd4;
+ row1_16x8b = _mm_set1_epi8(val1 >> 8);
+ row2_16x8b = _mm_set1_epi8(val1 & 0xff);
+ row3_16x8b = _mm_set1_epi8(val2 >> 8);
+ row4_16x8b = _mm_set1_epi8(val2 & 0xff);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b);
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_16x16_mode_dc_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_16x16 mode:DC
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_16x16 mode:DC, described in sec 8.3.3.3
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ ** @param[in] ngbr_avail
+ * availability of neighbouring pixels
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_16x16_mode_dc_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ WORD8 u1_useleft, u1_usetop;
+ WORD32 dc_val;
+
+ WORD32 dst_strd2, dst_strd3, dst_strd4;
+
+ __m128i dc_val_16x8b;
+
+ UNUSED(src_strd);
+
+ u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK);
+ u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK);
+
+ if(u1_useleft || u1_usetop)
+ {
+ WORD32 shft;
+ __m128i val_16x8b, zero_16x8b, sum_8x16b;
+
+ dc_val = 0;
+ shft = 3;
+
+ zero_16x8b = _mm_setzero_si128();
+
+ if(u1_useleft)
+ {
+ UWORD8 *pu1_left;
+
+ pu1_left = pu1_src + MB_SIZE - 1;
+
+ val_16x8b = _mm_loadu_si128((__m128i *)(pu1_left - 15));
+ sum_8x16b = _mm_sad_epu8(zero_16x8b, val_16x8b);
+
+ shft++;
+ dc_val += 8;
+ dc_val += _mm_extract_epi16(sum_8x16b, 0);
+ dc_val += _mm_extract_epi16(sum_8x16b, 4);
+ }
+ if(u1_usetop)
+ {
+ UWORD8 *pu1_top;
+
+ pu1_top = pu1_src + MB_SIZE + 1;
+
+ val_16x8b = _mm_loadu_si128((__m128i *)pu1_top);
+ sum_8x16b = _mm_sad_epu8(zero_16x8b, val_16x8b);
+
+ shft++;
+ dc_val += 8;
+ dc_val += _mm_extract_epi16(sum_8x16b, 0);
+ dc_val += _mm_extract_epi16(sum_8x16b, 4);
+ }
+ dc_val = dc_val >> shft;
+ }
+ else
+ dc_val = 128;
+
+ dc_val_16x8b = _mm_set1_epi8(dc_val);
+
+ dst_strd2 = dst_strd << 1;
+ dst_strd4 = dst_strd << 2;
+ dst_strd3 = dst_strd + dst_strd2;
+
+ _mm_storeu_si128((__m128i *)pu1_dst, dc_val_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), dc_val_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), dc_val_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), dc_val_16x8b);
+ pu1_dst += dst_strd4;
+
+ _mm_storeu_si128((__m128i *)pu1_dst, dc_val_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), dc_val_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), dc_val_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), dc_val_16x8b);
+ pu1_dst += dst_strd4;
+
+ _mm_storeu_si128((__m128i *)pu1_dst, dc_val_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), dc_val_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), dc_val_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), dc_val_16x8b);
+ pu1_dst += dst_strd4;
+
+ _mm_storeu_si128((__m128i *)pu1_dst, dc_val_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), dc_val_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), dc_val_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), dc_val_16x8b);
+}
+
+/**
+ *******************************************************************************
+ *
+ *ih264_intra_pred_luma_16x16_mode_plane_ssse3
+ *
+ * @brief
+ * Perform Intra prediction for luma_16x16 mode:PLANE
+ *
+ * @par Description:
+ * Perform Intra prediction for luma_16x16 mode:PLANE, described in sec 8.3.3.4
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[out] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] src_strd
+ * integer source stride
+ *
+ * @param[in] dst_strd
+ * integer destination stride
+ *
+ * @param[in] ngbr_avail
+ * availability of neighbouring pixels(Not used in this function)
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************/
+void ih264_intra_pred_luma_16x16_mode_plane_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 ngbr_avail)
+{
+ UWORD8 *pu1_left, *pu1_top;
+ WORD32 a, b, c;
+
+ __m128i rev_8x16b, mul_8x16b, zero_16x8b;
+
+ UNUSED(src_strd);
+ UNUSED(ngbr_avail);
+
+ pu1_top = pu1_src + MB_SIZE + 1;
+ pu1_left = pu1_src + MB_SIZE - 1;
+
+ rev_8x16b = _mm_setr_epi16(0x0f0e, 0x0d0c, 0x0b0a, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100);
+ //used to reverse the order of 16-bit values in a vector
+
+ mul_8x16b = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ zero_16x8b = _mm_setzero_si128();
+
+ //calculating a, b and c
+ {
+ WORD32 h, v;
+
+ __m128i h_val1_16x8b, h_val2_16x8b;
+ __m128i h_val1_8x16b, h_val2_8x16b, h_val_4x32b;
+ __m128i v_val1_16x8b, v_val2_16x8b;
+ __m128i v_val1_8x16b, v_val2_8x16b, v_val_4x32b;
+ __m128i hv_val_4x32b;
+
+ a = (pu1_top[15] + pu1_left[-15]) << 4;
+
+ h_val1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_top + 8));
+ h_val2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_top - 1));
+ v_val1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 15));
+ v_val2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 6));
+
+ h_val1_8x16b = _mm_unpacklo_epi8(h_val1_16x8b, zero_16x8b);
+ h_val2_8x16b = _mm_unpacklo_epi8(h_val2_16x8b, zero_16x8b);
+ v_val1_8x16b = _mm_unpacklo_epi8(v_val1_16x8b, zero_16x8b);
+ v_val2_8x16b = _mm_unpacklo_epi8(v_val2_16x8b, zero_16x8b);
+
+ h_val2_8x16b = _mm_shuffle_epi8(h_val2_8x16b, rev_8x16b);
+ v_val1_8x16b = _mm_shuffle_epi8(v_val1_8x16b, rev_8x16b);
+
+ h_val1_8x16b = _mm_sub_epi16(h_val1_8x16b, h_val2_8x16b);
+ v_val1_8x16b = _mm_sub_epi16(v_val1_8x16b, v_val2_8x16b);
+
+ h_val_4x32b = _mm_madd_epi16(mul_8x16b, h_val1_8x16b);
+ v_val_4x32b = _mm_madd_epi16(mul_8x16b, v_val1_8x16b);
+
+ hv_val_4x32b = _mm_hadd_epi32(h_val_4x32b, v_val_4x32b);
+ hv_val_4x32b = _mm_hadd_epi32(hv_val_4x32b, hv_val_4x32b);
+
+ h = _mm_extract_epi16(hv_val_4x32b, 0);
+ v = _mm_extract_epi16(hv_val_4x32b, 2);
+ h = (h << 16) >> 16;
+ v = (v << 16) >> 16;
+
+ b = ((h << 2) + h + 32) >> 6;
+ c = ((v << 2) + v + 32) >> 6;
+ }
+
+ //using a, b and c to compute the fitted plane values
+ {
+ __m128i const_8x16b, b_8x16b, c_8x16b, c2_8x16b;
+ __m128i res1_l_8x16b, res1_h_8x16b;
+ __m128i res2_l_8x16b, res2_h_8x16b;
+ __m128i res1_sh_l_8x16b, res1_sh_h_8x16b, res1_16x8b;
+ __m128i res2_sh_l_8x16b, res2_sh_h_8x16b, res2_16x8b;
+
+ b_8x16b = _mm_set1_epi16(b);
+ c_8x16b = _mm_set1_epi16(c);
+ c2_8x16b = _mm_set1_epi16(c << 1);
+ const_8x16b = _mm_set1_epi16(a - c*7 + 16);
+
+ res1_h_8x16b = _mm_mullo_epi16(mul_8x16b, b_8x16b);
+ //contains {b*1, b*2, b*3,... b*8}
+
+ res1_l_8x16b = _mm_shuffle_epi8(res1_h_8x16b, rev_8x16b);
+ res1_l_8x16b = _mm_srli_si128(res1_l_8x16b, 2);
+ res1_l_8x16b = _mm_sub_epi16(zero_16x8b, res1_l_8x16b);
+ //contains {-b*7, -b*6,... -b*1, b*0}
+
+ // rows 1, 2
+ res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, const_8x16b);
+ res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, const_8x16b);
+ res2_h_8x16b = _mm_add_epi16(res1_h_8x16b, c_8x16b);
+ res2_l_8x16b = _mm_add_epi16(res1_l_8x16b, c_8x16b);
+
+ res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
+ res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
+ res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
+ res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
+
+ res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
+ res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
+
+ // rows 3, 4
+ res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
+ res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
+ res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
+ res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
+
+ res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
+ res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
+ res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
+ res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
+
+ pu1_dst += dst_strd << 1;
+
+ res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
+ res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
+
+ // rows 5, 6
+ res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
+ res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
+ res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
+ res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
+
+ res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
+ res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
+ res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
+ res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
+
+ pu1_dst += dst_strd << 1;
+
+ res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
+ res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
+
+ // rows 7, 8
+ res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
+ res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
+ res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
+ res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
+
+ res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
+ res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
+ res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
+ res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
+
+ pu1_dst += dst_strd << 1;
+
+ res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
+ res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
+
+ // rows 9, 10
+ res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
+ res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
+ res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
+ res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
+
+ res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
+ res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
+ res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
+ res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
+
+ pu1_dst += dst_strd << 1;
+
+ res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
+ res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
+
+ // rows 11, 12
+ res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
+ res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
+ res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
+ res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
+
+ res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
+ res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
+ res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
+ res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
+
+ pu1_dst += dst_strd << 1;
+
+ res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
+ res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
+
+ // rows 13, 14
+ res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
+ res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
+ res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
+ res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
+
+ res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
+ res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
+ res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
+ res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
+
+ pu1_dst += dst_strd << 1;
+
+ res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
+ res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
+
+ // rows 15, 16
+ res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
+ res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
+ res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
+ res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
+
+ res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
+ res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
+ res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
+ res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
+
+ pu1_dst += dst_strd << 1;
+
+ res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
+ res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
+ }
+}
diff --git a/common/x86/ih264_mem_fns_ssse3.c b/common/x86/ih264_mem_fns_ssse3.c
new file mode 100755
index 0000000..8ca1f3e
--- /dev/null
+++ b/common/x86/ih264_mem_fns_ssse3.c
@@ -0,0 +1,169 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_mem_fns_atom_intr.c
+ *
+ * @brief
+ * Functions used for memory operations
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "ih264_typedefs.h"
+#include "ih264_mem_fns.h"
+
+#include <immintrin.h>
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * memcpy of a 8,16 or 32 bytes
+ *
+ * @par Description:
+ * Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes
+ *
+ * @param[in] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] pu1_src
+ * UWORD8 pointer to the source
+ *
+ * @param[in] num_bytes
+ * number of bytes to copy
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+
+
+
+void ih264_memcpy_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes)
+{
+ int col;
+ for(col = num_bytes; col >= 8; col -= 8)
+ {
+ __m128i src_temp16x8b;
+ src_temp16x8b = _mm_loadl_epi64((__m128i *)(pu1_src));
+ pu1_src += 8;
+ _mm_storel_epi64((__m128i *)(pu1_dst), src_temp16x8b);
+ pu1_dst += 8;
+ }
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * memset of a 8,16 or 32 bytes
+ *
+ * @par Description:
+ * Does memset of 8bit data for 8,16 or 32 number of bytes
+ *
+ * @param[in] pu1_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] value
+ * UWORD8 value used for memset
+ *
+ * @param[in] num_bytes
+ * number of bytes to set
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+
+void ih264_memset_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes)
+{
+ int col;
+ __m128i src_temp16x8b;
+ src_temp16x8b = _mm_set1_epi8(value);
+ for(col = num_bytes; col >= 8; col -= 8)
+ {
+ _mm_storel_epi64((__m128i *)(pu1_dst), src_temp16x8b);
+ pu1_dst += 8;
+ }
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * memset of 16bit data of a 8,16 or 32 bytes
+ *
+ * @par Description:
+ * Does memset of 16bit data for 8,16 or 32 number of bytes
+ *
+ * @param[in] pu2_dst
+ * UWORD8 pointer to the destination
+ *
+ * @param[in] value
+ * UWORD16 value used for memset
+ *
+ * @param[in] num_words
+ * number of words to set
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+
+void ih264_memset_16bit_mul_8_ssse3(UWORD16 *pu2_dst, UWORD16 value, UWORD32 num_words)
+{
+ int col;
+ __m128i src_temp16x8b;
+ src_temp16x8b = _mm_set1_epi16(value);
+ for(col = num_words; col >= 8; col -= 8)
+ {
+ _mm_storeu_si128((__m128i *)(pu2_dst), src_temp16x8b);
+ pu2_dst += 8;
+ }
+}
+
diff --git a/common/x86/ih264_padding_ssse3.c b/common/x86/ih264_padding_ssse3.c
new file mode 100755
index 0000000..6dadd39
--- /dev/null
+++ b/common/x86/ih264_padding_ssse3.c
@@ -0,0 +1,335 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_padding_atom_intr.c
+*
+* @brief
+* Contains function definitions for Padding
+*
+* @author
+* Srinivas T
+*
+* @par List of Functions:
+* - ih264_pad_left_luma_ssse3()
+* - ih264_pad_left_chroma_ssse3()
+* - ih264_pad_right_luma_ssse3()
+* - ih264_pad_right_chroma_ssse3()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#include <string.h>
+#include <assert.h>
+#include "ih264_typedefs.h"
+#include "ih264_platform_macros.h"
+#include "ih264_mem_fns.h"
+#include "ih264_debug.h"
+
+#include <immintrin.h>
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Padding (luma block) at the left of a 2d array
+*
+* @par Description:
+* The left column of a 2d array is replicated for pad_size times at the left
+*
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] ht
+* integer height of the array
+*
+* @param[in] wd
+* integer width of the array
+*
+* @param[in] pad_size
+* integer -padding size of the array
+*
+* @param[in] ht
+* integer height of the array
+*
+* @param[in] wd
+* integer width of the array
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ih264_pad_left_luma_ssse3(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 ht,
+ WORD32 pad_size)
+{
+ WORD32 row;
+ WORD32 i;
+ UWORD8 *pu1_dst;
+ __m128i const0_16x8b;
+
+ const0_16x8b = _mm_setzero_si128();
+
+ ASSERT(pad_size % 8 == 0);
+
+ for(row = 0; row < ht; row++)
+ {
+ __m128i src_temp0_16x8b;
+
+ src_temp0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_dst = pu1_src - pad_size;
+ src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
+ for(i = 0; i < pad_size; i += 8)
+ {
+ _mm_storel_epi64((__m128i *)(pu1_dst + i), src_temp0_16x8b);
+ }
+ pu1_src += src_strd;
+ }
+
+}
+
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Padding (chroma block) at the left of a 2d array
+*
+* @par Description:
+* The left column of a 2d array is replicated for pad_size times at the left
+*
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] ht
+* integer height of the array
+*
+* @param[in] wd
+* integer width of the array (each colour component)
+*
+* @param[in] pad_size
+* integer -padding size of the array
+*
+* @param[in] ht
+* integer height of the array
+*
+* @param[in] wd
+* integer width of the array
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ih264_pad_left_chroma_ssse3(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 ht,
+ WORD32 pad_size)
+{
+ WORD32 row;
+ WORD32 col;
+ UWORD8 *pu1_dst;
+ __m128i const0_16x8b, const1_16x8b;
+ const0_16x8b = _mm_setzero_si128();
+ const1_16x8b = _mm_set1_epi8(1);
+ const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b);
+
+ ASSERT(pad_size % 8 == 0);
+ for(row = 0; row < ht; row++)
+ {
+ __m128i src_temp0_16x8b;
+
+ src_temp0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_dst = pu1_src - pad_size;
+ src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
+
+ for(col = 0; col < pad_size; col += 8)
+ {
+ _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
+ }
+ pu1_src += src_strd;
+ }
+
+}
+
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Padding (luma block) at the right of a 2d array
+*
+* @par Description:
+* The right column of a 2d array is replicated for pad_size times at the right
+*
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] ht
+* integer height of the array
+*
+* @param[in] wd
+* integer width of the array
+*
+* @param[in] pad_size
+* integer -padding size of the array
+*
+* @param[in] ht
+* integer height of the array
+*
+* @param[in] wd
+* integer width of the array
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ih264_pad_right_luma_ssse3(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 ht,
+ WORD32 pad_size)
+{
+ WORD32 row;
+ WORD32 col;
+ UWORD8 *pu1_dst;
+ __m128i const0_16x8b;
+
+ ASSERT(pad_size % 8 == 0);
+
+ for(row = 0; row < ht; row++)
+ {
+ __m128i src_temp0_16x8b;
+
+ src_temp0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src - 1));
+ const0_16x8b = _mm_setzero_si128();
+ pu1_dst = pu1_src;
+ src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
+ for(col = 0; col < pad_size; col += 8)
+ {
+ _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
+ }
+ pu1_src += src_strd;
+ }
+
+}
+
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Padding (chroma block) at the right of a 2d array
+*
+* @par Description:
+* The right column of a 2d array is replicated for pad_size times at the right
+*
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] ht
+* integer height of the array
+*
+* @param[in] wd
+* integer width of the array (each colour component)
+*
+* @param[in] pad_size
+* integer -padding size of the array
+*
+* @param[in] ht
+* integer height of the array
+*
+* @param[in] wd
+* integer width of the array
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ih264_pad_right_chroma_ssse3(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 ht,
+ WORD32 pad_size)
+{
+ WORD32 row;
+ WORD32 col;
+ UWORD8 *pu1_dst;
+ __m128i const0_16x8b, const1_16x8b;
+ const0_16x8b = _mm_setzero_si128();
+ const1_16x8b = _mm_set1_epi8(1);
+ const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b);
+
+ ASSERT(pad_size % 8 == 0);
+
+ for(row = 0; row < ht; row++)
+ {
+ __m128i src_temp0_16x8b;
+
+ src_temp0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src - 2));
+ pu1_dst = pu1_src;
+ src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
+ for(col = 0; col < pad_size; col += 8)
+ {
+ _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
+ }
+
+ pu1_src += src_strd;
+ }
+}
+
diff --git a/common/x86/ih264_platform_macros.h b/common/x86/ih264_platform_macros.h
new file mode 100755
index 0000000..e4b9821
--- /dev/null
+++ b/common/x86/ih264_platform_macros.h
@@ -0,0 +1,114 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_platform_macros.h
+*
+* @brief
+* Platform specific Macro definitions used in the codec
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+#ifndef _IH264_PLATFORM_MACROS_H_
+#define _IH264_PLATFORM_MACROS_H_
+
+#include <immintrin.h>
+
+
+#define CLIP_U8(x) CLIP3(0, 255, (x))
+#define CLIP_S8(x) CLIP3(-128, 127, (x))
+
+#define CLIP_U10(x) CLIP3(0, 1023, (x))
+#define CLIP_S10(x) CLIP3(-512, 511, (x))
+
+#define CLIP_U12(x) CLIP3(0, 4095, (x))
+#define CLIP_S12(x) CLIP3(-2048, 2047, (x))
+
+#define CLIP_U16(x) CLIP3(0, 65535, (x))
+#define CLIP_S16(x) CLIP3(-32768, 32767, (x))
+
+#define MEM_ALIGN16 __attribute__ ((aligned (16)))
+
+#define SHL(x,y) (((y) < 32) ? ((x) << (y)) : 0)
+#define SHR(x,y) (((y) < 32) ? ((x) >> (y)) : 0)
+
+#define SHR_NEG(val,shift) ((shift>0)?(val>>shift):(val<<(-shift)))
+#define SHL_NEG(val,shift) ((shift<0)?(val>>(-shift)):(val<<shift))
+
+
+#define ITT_BIG_ENDIAN(x) ((x << 24)) | \
+ ((x & 0x0000ff00) << 8) | \
+ ((x & 0x00ff0000) >> 8) | \
+ ((UWORD32)x >> 24);
+
+
+#define NOP(nop_cnt) {UWORD32 nop_i; for (nop_i = 0; nop_i < nop_cnt; nop_i++);}
+
+#define PLD(a)
+
+static __inline UWORD32 CLZ(UWORD32 u4_word)
+{
+ if(u4_word)
+ return(__builtin_clz(u4_word));
+ else
+ return 32;
+}
+
+static __inline UWORD32 CTZ(UWORD32 u4_word)
+{
+ if(0 == u4_word)
+ return 31;
+ else
+ {
+ unsigned int index;
+ index = __builtin_ctz(u4_word);
+ return (UWORD32)index;
+ }
+}
+
+#define DATA_SYNC() __sync_synchronize()
+
+
+
+//#define INLINE __inline
+#define INLINE
+
+#define PREFETCH_ENABLE 1
+
+#if PREFETCH_ENABLE
+#define PREFETCH(ptr, type) _mm_prefetch(ptr, type);
+#else
+#define PREFETCH(ptr, type)
+#endif
+
+#define MEM_ALIGN8 __attribute__ ((aligned (8)))
+#define MEM_ALIGN16 __attribute__ ((aligned (16)))
+#define MEM_ALIGN32 __attribute__ ((aligned (32)))
+
+#endif /* _IH264_PLATFORM_MACROS_H_ */
diff --git a/common/x86/ih264_resi_trans_quant_sse42.c b/common/x86/ih264_resi_trans_quant_sse42.c
new file mode 100755
index 0000000..c267651
--- /dev/null
+++ b/common/x86/ih264_resi_trans_quant_sse42.c
@@ -0,0 +1,984 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264_resi_trans_quant_sse42.c
+ *
+ * @brief
+ * Contains function definitions single stage forward transform for H.264
+ * It will calculate the residue, do the cf and then do quantization
+ *
+ * @author
+ * Mohit [100664]
+ *
+ * @par List of Functions:
+ * - ih264_resi_trans_quant_4x4_sse42()
+ * - ih264_resi_trans_quant_chroma_4x4_sse42()
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+/* System include files */
+#include <stddef.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264_macros.h"
+#include "ih264_trans_macros.h"
+#include "ih264_trans_data.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include <immintrin.h>
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs forward transform and quantization on a 4*4 block
+ *
+ * @par Description:
+ * The function accepts source buffer and estimation buffer. From these, it
+ * computes the residue. This is residue is then transformed and quantized.
+ * The transform and quantization are in placed computed. They use the residue
+ * buffer for this.
+ *
+ * @param[in] pu1_src
+ * Pointer to source sub-block
+ *
+ * @param[in] pu1_pred
+ * Pointer to prediction sub-block
+ *
+ * @param[in] pi2_out
+ * Pointer to residual sub-block
+ *
+ * @param[in] src_strd
+ * Source stride
+ *
+ * @param[in] pred_strd
+ * Prediction stride
+ *
+ * @param[in] dst_strd
+ * Destination stride
+ *
+ * @param[in] u4_qbits
+ * QP_BITS_h264_4x4 + floor(QP/6)
+ *
+ * @param[in] pu2_threshold_matrix
+ * Pointer to Forward Quant Threshold Matrix
+ *
+ * @param[in] pu2_scale_matrix
+ * Pointer to Forward Quant Scale Matrix
+ *
+ * @param[in] u4_round_factor
+ * Quantization Round factor
+ *
+ * @param[out] pu1_nnz
+ * Total non-zero coefficients in the current sub-block
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264_resi_trans_quant_4x4_sse42(UWORD8 *pu1_src, UWORD8 *pu1_pred,
+ WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd,
+ const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix,
+ UWORD32 u4_qbits, UWORD32 u4_round_factor, UWORD8 *pu1_nnz,
+ WORD16 *pi2_alt_dc_addr)
+{
+ WORD32 tmp_dc, u4_zero_coeff, u4_nonzero_coeff = 0;
+ WORD32 mask0, mask1;
+ __m128i sum0, sum1, sum2, cmp0, cmp1;
+ __m128i rnd_fact = _mm_set1_epi32(u4_round_factor);
+ __m128i temp_2 = _mm_set1_epi16(2);
+ __m128i temp_1 = _mm_set1_epi16(1);
+ __m128i src_r0, src_r1, src_r2, src_r3;
+ __m128i pred_r0, pred_r1, pred_r2, pred_r3;
+ __m128i temp0, temp1, temp2, temp3;
+ __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
+ __m128i sign_reg0, sign_reg2;
+ __m128i scalemat_r0_r1, scalemat_r2_r3;
+ scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_scale_matrix)); //b00 b01 b02 b03 b10 b11 b12 b13 -- the scaling matrix 0th,1st row
+ scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_scale_matrix + 8)); //b20 b21 b22 b23 b30 b31 b32 b33 -- the scaling matrix 2nd,3rd row
+ src_r0 = _mm_loadl_epi64((__m128i *) (&pu1_src[0])); //a00 a01 a02 a03 0 0 0 0 0 0 0 0 -- all 8 bits
+ src_r1 = _mm_loadl_epi64((__m128i *) (&pu1_src[src_strd])); //a10 a11 a12 a13 0 0 0 0 0 0 0 0 -- all 8 bits
+ src_r2 = _mm_loadl_epi64((__m128i *) (&pu1_src[2 * src_strd])); //a20 a21 a22 a23 0 0 0 0 0 0 0 0 -- all 8 bits
+ src_r3 = _mm_loadl_epi64((__m128i *) (&pu1_src[3 * src_strd])); //a30 a31 a32 a33 0 0 0 0 0 0 0 0 -- all 8 bits
+
+ src_r0 = _mm_cvtepu8_epi16(src_r0);
+ src_r1 = _mm_cvtepu8_epi16(src_r1);
+ src_r2 = _mm_cvtepu8_epi16(src_r2);
+ src_r3 = _mm_cvtepu8_epi16(src_r3);
+
+ pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits
+
+ pred_r0 = _mm_cvtepu8_epi16(pred_r0); //p00 p01 p02 p03 -- all 16 bits
+ pred_r1 = _mm_cvtepu8_epi16(pred_r1); //p10 p11 p12 p13 -- all 16 bits
+ pred_r2 = _mm_cvtepu8_epi16(pred_r2); //p20 p21 p22 p23 -- all 16 bits
+ pred_r3 = _mm_cvtepu8_epi16(pred_r3); //p30 p31 p32 p33 -- all 16 bits
+
+ src_r0 = _mm_sub_epi16(src_r0, pred_r0);
+ src_r1 = _mm_sub_epi16(src_r1, pred_r1);
+ src_r2 = _mm_sub_epi16(src_r2, pred_r2);
+ src_r3 = _mm_sub_epi16(src_r3, pred_r3);
+
+ /* Perform Forward transform */
+ /*-------------------------------------------------------------*/
+ /* DCT [ Horizontal transformation ] */
+ /*-------------------------------------------------------------*/
+ // Matrix transpose
+ /*
+ * a0 a1 a2 a3
+ * b0 b1 b2 b3
+ * c0 c1 c2 c3
+ * d0 d1 d2 d3
+ */
+ temp0 = _mm_unpacklo_epi16(src_r0, src_r1); //a0 b0 a1 b1 a2 b2 a3 b3
+ temp2 = _mm_unpacklo_epi16(src_r2, src_r3); //c0 d0 c1 d1 c2 d2 c3 d3
+ temp1 = _mm_unpacklo_epi32(temp0, temp2); //a0 b0 c0 d0 a1 b1 c1 d1
+ temp3 = _mm_unpackhi_epi32(temp0, temp2); //a2 b2 c2 d2 a3 b3 c3 d3
+
+ src_r0 = _mm_unpacklo_epi64(temp1, zero_8x16b); //a0 b0 c0 d0
+ src_r1 = _mm_unpackhi_epi64(temp1, zero_8x16b); //a1 b1 c1 d1
+ src_r2 = _mm_unpacklo_epi64(temp3, zero_8x16b); //a2 b2 c2 d2
+ src_r3 = _mm_unpackhi_epi64(temp3, zero_8x16b); //a3 b3 c3 d3
+
+ /*----------------------------------------------------------*/
+ /* x0 = z0 + z3 */
+ temp0 = _mm_add_epi16(src_r0, src_r3);
+ /* x1 = z1 + z2 */
+ temp1 = _mm_add_epi16(src_r1, src_r2);
+ /* x2 = z1 - z2 */
+ temp2 = _mm_sub_epi16(src_r1, src_r2);
+ /* x3 = z0 - z3 */
+ temp3 = _mm_sub_epi16(src_r0, src_r3);
+
+ /* z0 = x0 + x1 */
+ src_r0 = _mm_add_epi16(temp0, temp1);
+ /* z1 = (x3 << 1) + x2 */
+ src_r1 = _mm_slli_epi16(temp3, 1); //(x3<<1)
+ src_r1 = _mm_add_epi16(src_r1, temp2);
+ /* z2 = x0 - x1 */
+ src_r2 = _mm_sub_epi16(temp0, temp1);
+ /* z3 = x3 - (x2 << 1) */
+ src_r3 = _mm_slli_epi16(temp2, 1); //(x2<<1)
+ src_r3 = _mm_sub_epi16(temp3, src_r3);
+
+ // Matrix transpose
+ /*
+ * a0 b0 c0 d0
+ * a1 b1 c1 d1
+ * a2 b2 c2 d2
+ * a3 b3 c3 d3
+ */
+ temp0 = _mm_unpacklo_epi16(src_r0, src_r1); //a0 a1 b0 b1 c0 c1 d0 d1
+ temp2 = _mm_unpacklo_epi16(src_r2, src_r3); //a2 a3 b2 b3 c2 c3 d2 d3
+ temp1 = _mm_unpacklo_epi32(temp0, temp2); //a0 a1 a2 a3 b0 b1 b2 b3
+ temp3 = _mm_unpackhi_epi32(temp0, temp2); //c0 c1 c2 c3 d0 d1 d2 d3
+
+ src_r0 = _mm_unpacklo_epi64(temp1, zero_8x16b); //a0 a1 a2 a3
+ src_r1 = _mm_unpackhi_epi64(temp1, zero_8x16b); //b0 b1 b2 b3
+ src_r2 = _mm_unpacklo_epi64(temp3, zero_8x16b); //c0 c1 c2 c3
+ src_r3 = _mm_unpackhi_epi64(temp3, zero_8x16b); //d0 d1 d2 d3
+
+ /*----------------------------------------------------------*/
+ /* x0 = z0 + z3 */
+ temp0 = _mm_add_epi16(src_r0, src_r3);
+ /* x1 = z1 + z2 */
+ temp1 = _mm_add_epi16(src_r1, src_r2);
+ /* x2 = z1 - z2 */
+ temp2 = _mm_sub_epi16(src_r1, src_r2);
+ /* x3 = z0 - z3 */
+ temp3 = _mm_sub_epi16(src_r0, src_r3);
+
+ /* z0 = x0 + x1 */
+ src_r0 = _mm_add_epi16(temp0, temp1);
+ /* z1 = (x3 << 1) + x2 */
+ src_r1 = _mm_slli_epi16(temp3, 1); //(x3<<1)
+ src_r1 = _mm_add_epi16(src_r1, temp2);
+ /* z2 = x0 - x1 */
+ src_r2 = _mm_sub_epi16(temp0, temp1);
+ /* z3 = x3 - (x2 << 1) */
+ src_r3 = _mm_slli_epi16(temp2, 1); //(x2<<1)
+ src_r3 = _mm_sub_epi16(temp3, src_r3);
+
+ tmp_dc = _mm_extract_epi16(src_r0,0); //a0
+ *pi2_alt_dc_addr = tmp_dc;
+
+ src_r0 = _mm_unpacklo_epi64(src_r0, src_r1); //a0 a1 a2 a3 b0 b1 b2 b3
+ src_r2 = _mm_unpacklo_epi64(src_r2, src_r3); //c0 c1 c2 c3 d0 d1 d2 d3
+ sign_reg0 = _mm_cmpgt_epi16(zero_8x16b,src_r0);
+ sign_reg2 = _mm_cmpgt_epi16(zero_8x16b,src_r2);
+
+ sign_reg0 = _mm_mullo_epi16(temp_2,sign_reg0);
+ sign_reg2 = _mm_mullo_epi16(temp_2,sign_reg2);
+
+ sign_reg0 = _mm_add_epi16(temp_1,sign_reg0);
+ sign_reg2 = _mm_add_epi16(temp_1,sign_reg2);
+
+ src_r0 = _mm_abs_epi16(src_r0);
+ src_r2 = _mm_abs_epi16(src_r2);
+
+ src_r1 = _mm_srli_si128(src_r0, 8);
+ src_r0 = _mm_cvtepu16_epi32(src_r0);
+ src_r1 = _mm_cvtepu16_epi32(src_r1);
+ src_r3 = _mm_srli_si128(src_r2, 8);
+ src_r2 = _mm_cvtepu16_epi32(src_r2);
+ src_r3 = _mm_cvtepu16_epi32(src_r3);
+
+ temp0 = _mm_cvtepu16_epi32(scalemat_r0_r1);
+ scalemat_r0_r1 = _mm_srli_si128(scalemat_r0_r1, 8);
+ temp2 = _mm_cvtepu16_epi32(scalemat_r2_r3);
+ scalemat_r2_r3 = _mm_srli_si128(scalemat_r2_r3, 8);
+ temp1 = _mm_cvtepu16_epi32(scalemat_r0_r1);
+ temp3 = _mm_cvtepu16_epi32(scalemat_r2_r3);
+
+ temp0 = _mm_mullo_epi32(temp0, src_r0);
+ temp1 = _mm_mullo_epi32(temp1, src_r1);
+ temp2 = _mm_mullo_epi32(temp2, src_r2);
+ temp3 = _mm_mullo_epi32(temp3, src_r3);
+
+ temp0 = _mm_add_epi32(temp0,rnd_fact);
+ temp1 = _mm_add_epi32(temp1,rnd_fact);
+ temp2 = _mm_add_epi32(temp2,rnd_fact);
+ temp3 = _mm_add_epi32(temp3,rnd_fact);
+
+ temp0 = _mm_srli_epi32(temp0,u4_qbits);
+ temp1 = _mm_srli_epi32(temp1,u4_qbits);
+ temp2 = _mm_srli_epi32(temp2,u4_qbits);
+ temp3 = _mm_srli_epi32(temp3,u4_qbits);
+
+ temp0 = _mm_packs_epi32 (temp0,temp1);
+ temp2 = _mm_packs_epi32 (temp2,temp3);
+
+ temp0 = _mm_sign_epi16(temp0, sign_reg0);
+ temp2 = _mm_sign_epi16(temp2, sign_reg2);
+
+ _mm_storeu_si128((__m128i *) (&pi2_out[0]), temp0);
+ _mm_storeu_si128((__m128i *) (&pi2_out[8]), temp2);
+
+ cmp0 = _mm_cmpeq_epi16(temp0, zero_8x16b);
+ cmp1 = _mm_cmpeq_epi16(temp2, zero_8x16b);
+
+ mask0 = _mm_movemask_epi8(cmp0);
+ mask1 = _mm_movemask_epi8(cmp1);
+ u4_zero_coeff = 0;
+ if(mask0)
+ {
+ if(mask0 == 0xffff)
+ u4_zero_coeff+=8;
+ else
+ {
+ cmp0 = _mm_and_si128(temp_1, cmp0);
+ sum0 = _mm_hadd_epi16(cmp0, zero_8x16b);
+ sum1 = _mm_hadd_epi16(sum0, zero_8x16b);
+ sum2 = _mm_hadd_epi16(sum1, zero_8x16b);
+ u4_zero_coeff += _mm_cvtsi128_si32(sum2);
+ }
+ }
+ if(mask1)
+ {
+ if(mask1 == 0xffff)
+ u4_zero_coeff+=8;
+ else
+ {
+ cmp1 = _mm_and_si128(temp_1, cmp1);
+ sum0 = _mm_hadd_epi16(cmp1, zero_8x16b);
+ sum1 = _mm_hadd_epi16(sum0, zero_8x16b);
+ sum2 = _mm_hadd_epi16(sum1, zero_8x16b);
+ u4_zero_coeff += _mm_cvtsi128_si32(sum2);
+ }
+ }
+
+ /* Return total nonzero coefficients in the current sub block */
+ u4_nonzero_coeff = 16 - u4_zero_coeff;
+ *pu1_nnz = u4_nonzero_coeff;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs forward transform and quantization on a 4*4 chroma block
+ *
+ * @par Description:
+ * The function accepts source buffer and estimation buffer. From these, it
+ * computes the residue. This is residue is then transformed and quantized.
+ * The transform and quantization are in placed computed. They use the residue
+ * buffer for this.
+ *
+ * @param[in] pu1_src
+ * Pointer to source sub-block
+ *
+ * @param[in] pu1_pred
+ * Pointer to prediction sub-block
+ *
+ * @param[in] pi2_out
+ * Pointer to residual sub-block
+ *
+ * @param[in] src_strd
+ * Source stride
+ *
+ * @param[in] pred_strd
+ * Prediction stride
+ *
+ * @param[in] dst_strd
+ * Destination stride
+ *
+ * @param[in] u4_qbits
+ * QP_BITS_h264_4x4 + floor(QP/6)
+ *
+ * @param[in] pu2_threshold_matrix
+ * Pointer to Forward Quant Threshold Matrix
+ *
+ * @param[in] pu2_scale_matrix
+ * Pointer to Forward Quant Scale Matrix
+ *
+ * @param[in] u4_round_factor
+ * Quantization Round factor
+ *
+ * @param[out] pu1_nnz
+ * Total non-zero coefficients in the current sub-block
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+void ih264_resi_trans_quant_chroma_4x4_sse42(UWORD8 *pu1_src,UWORD8 *pu1_pred,WORD16 *pi2_out,
+ WORD32 src_strd,WORD32 pred_strd,
+ const UWORD16 *pu2_scale_matrix,
+ const UWORD16 *pu2_threshold_matrix,
+ UWORD32 u4_qbits,UWORD32 u4_round_factor,
+ UWORD8 *pu1_nnz, WORD16 *pi2_alt_dc_addr)
+{
+ WORD32 tmp_dc, u4_zero_coeff, u4_nonzero_coeff = 0;
+ WORD32 mask0, mask1;
+ __m128i cmp0, cmp1, sum0, sum1, sum2;
+ __m128i rnd_fact = _mm_set1_epi32(u4_round_factor);
+ __m128i temp_2 = _mm_set1_epi16(2);
+ __m128i temp_1 = _mm_set1_epi16(1);
+ __m128i src_r0, src_r1, src_r2, src_r3;
+ __m128i pred_r0, pred_r1, pred_r2, pred_r3;
+ __m128i temp0, temp1, temp2, temp3;
+ __m128i zero_8x16b = _mm_setzero_si128(); // all bits reset to zero
+ __m128i sign_reg0, sign_reg2;
+ __m128i scalemat_r0_r1, scalemat_r2_r3;
+ __m128i chroma_mask = _mm_set1_epi16 (0xFF);
+
+ scalemat_r0_r1 = _mm_loadu_si128((__m128i *) (pu2_scale_matrix)); //b00 b01 b02 b03 b10 b11 b12 b13 -- the scaling matrix 0th,1st row
+ scalemat_r2_r3 = _mm_loadu_si128((__m128i *) (pu2_scale_matrix + 8)); //b20 b21 b22 b23 b30 b31 b32 b33 -- the scaling matrix 2nd,3rd row
+ src_r0 = _mm_loadl_epi64((__m128i *) (&pu1_src[0])); //a00 a01 a02 a03 0 0 0 0 0 0 0 0 -- all 8 bits
+ src_r1 = _mm_loadl_epi64((__m128i *) (&pu1_src[src_strd])); //a10 a11 a12 a13 0 0 0 0 0 0 0 0 -- all 8 bits
+ src_r2 = _mm_loadl_epi64((__m128i *) (&pu1_src[2 * src_strd])); //a20 a21 a22 a23 0 0 0 0 0 0 0 0 -- all 8 bits
+ src_r3 = _mm_loadl_epi64((__m128i *) (&pu1_src[3 * src_strd])); //a30 a31 a32 a33 0 0 0 0 0 0 0 0 -- all 8 bits
+
+ src_r0 = _mm_and_si128(src_r0, chroma_mask);
+ src_r1 = _mm_and_si128(src_r1, chroma_mask);
+ src_r2 = _mm_and_si128(src_r2, chroma_mask);
+ src_r3 = _mm_and_si128(src_r3, chroma_mask);
+// src_r0 = _mm_cvtepu8_epi16(src_r0);
+// src_r1 = _mm_cvtepu8_epi16(src_r1);
+// src_r2 = _mm_cvtepu8_epi16(src_r2);
+// src_r3 = _mm_cvtepu8_epi16(src_r3);
+
+ pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits
+ pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits
+
+ pred_r0 = _mm_and_si128(pred_r0, chroma_mask);
+ pred_r1 = _mm_and_si128(pred_r1, chroma_mask);
+ pred_r2 = _mm_and_si128(pred_r2, chroma_mask);
+ pred_r3 = _mm_and_si128(pred_r3, chroma_mask);
+// pred_r0 = _mm_cvtepu8_epi16(pred_r0); //p00 p01 p02 p03 -- all 16 bits
+// pred_r1 = _mm_cvtepu8_epi16(pred_r1); //p10 p11 p12 p13 -- all 16 bits
+// pred_r2 = _mm_cvtepu8_epi16(pred_r2); //p20 p21 p22 p23 -- all 16 bits
+// pred_r3 = _mm_cvtepu8_epi16(pred_r3); //p30 p31 p32 p33 -- all 16 bits
+
+ src_r0 = _mm_sub_epi16(src_r0, pred_r0);
+ src_r1 = _mm_sub_epi16(src_r1, pred_r1);
+ src_r2 = _mm_sub_epi16(src_r2, pred_r2);
+ src_r3 = _mm_sub_epi16(src_r3, pred_r3);
+
+ /* Perform Forward transform */
+ /*-------------------------------------------------------------*/
+ /* DCT [ Horizontal transformation ] */
+ /*-------------------------------------------------------------*/
+ // Matrix transpose
+ /*
+ * a0 a1 a2 a3
+ * b0 b1 b2 b3
+ * c0 c1 c2 c3
+ * d0 d1 d2 d3
+ */
+ temp0 = _mm_unpacklo_epi16(src_r0, src_r1); //a0 b0 a1 b1 a2 b2 a3 b3
+ temp2 = _mm_unpacklo_epi16(src_r2, src_r3); //c0 d0 c1 d1 c2 d2 c3 d3
+ temp1 = _mm_unpacklo_epi32(temp0, temp2); //a0 b0 c0 d0 a1 b1 c1 d1
+ temp3 = _mm_unpackhi_epi32(temp0, temp2); //a2 b2 c2 d2 a3 b3 c3 d3
+
+ src_r0 = _mm_unpacklo_epi64(temp1, zero_8x16b); //a0 b0 c0 d0
+ src_r1 = _mm_unpackhi_epi64(temp1, zero_8x16b); //a1 b1 c1 d1
+ src_r2 = _mm_unpacklo_epi64(temp3, zero_8x16b); //a2 b2 c2 d2
+ src_r3 = _mm_unpackhi_epi64(temp3, zero_8x16b); //a3 b3 c3 d3
+
+ /*----------------------------------------------------------*/
+ /* x0 = z0 + z3 */
+ temp0 = _mm_add_epi16(src_r0, src_r3);
+ /* x1 = z1 + z2 */
+ temp1 = _mm_add_epi16(src_r1, src_r2);
+ /* x2 = z1 - z2 */
+ temp2 = _mm_sub_epi16(src_r1, src_r2);
+ /* x3 = z0 - z3 */
+ temp3 = _mm_sub_epi16(src_r0, src_r3);
+
+ /* z0 = x0 + x1 */
+ src_r0 = _mm_add_epi16(temp0, temp1);
+ /* z1 = (x3 << 1) + x2 */
+ src_r1 = _mm_slli_epi16(temp3, 1); //(x3<<1)
+ src_r1 = _mm_add_epi16(src_r1, temp2);
+ /* z2 = x0 - x1 */
+ src_r2 = _mm_sub_epi16(temp0, temp1);
+ /* z3 = x3 - (x2 << 1) */
+ src_r3 = _mm_slli_epi16(temp2, 1); //(x2<<1)
+ src_r3 = _mm_sub_epi16(temp3, src_r3);
+
+ // Matrix transpose
+ /*
+ * a0 b0 c0 d0
+ * a1 b1 c1 d1
+ * a2 b2 c2 d2
+ * a3 b3 c3 d3
+ */
+ temp0 = _mm_unpacklo_epi16(src_r0, src_r1); //a0 a1 b0 b1 c0 c1 d0 d1
+ temp2 = _mm_unpacklo_epi16(src_r2, src_r3); //a2 a3 b2 b3 c2 c3 d2 d3
+ temp1 = _mm_unpacklo_epi32(temp0, temp2); //a0 a1 a2 a3 b0 b1 b2 b3
+ temp3 = _mm_unpackhi_epi32(temp0, temp2); //c0 c1 c2 c3 d0 d1 d2 d3
+
+ src_r0 = _mm_unpacklo_epi64(temp1, zero_8x16b); //a0 a1 a2 a3
+ src_r1 = _mm_unpackhi_epi64(temp1, zero_8x16b); //b0 b1 b2 b3
+ src_r2 = _mm_unpacklo_epi64(temp3, zero_8x16b); //c0 c1 c2 c3
+ src_r3 = _mm_unpackhi_epi64(temp3, zero_8x16b); //d0 d1 d2 d3
+
+ /*----------------------------------------------------------*/
+ /* x0 = z0 + z3 */
+ temp0 = _mm_add_epi16(src_r0, src_r3);
+ /* x1 = z1 + z2 */
+ temp1 = _mm_add_epi16(src_r1, src_r2);
+ /* x2 = z1 - z2 */
+ temp2 = _mm_sub_epi16(src_r1, src_r2);
+ /* x3 = z0 - z3 */
+ temp3 = _mm_sub_epi16(src_r0, src_r3);
+
+ /* z0 = x0 + x1 */
+ src_r0 = _mm_add_epi16(temp0, temp1);
+ /* z1 = (x3 << 1) + x2 */
+ src_r1 = _mm_slli_epi16(temp3, 1); //(x3<<1)
+ src_r1 = _mm_add_epi16(src_r1, temp2);
+ /* z2 = x0 - x1 */
+ src_r2 = _mm_sub_epi16(temp0, temp1);
+ /* z3 = x3 - (x2 << 1) */
+ src_r3 = _mm_slli_epi16(temp2, 1); //(x2<<1)
+ src_r3 = _mm_sub_epi16(temp3, src_r3);
+
+ tmp_dc = _mm_extract_epi16(src_r0,0); //a0
+ *pi2_alt_dc_addr = tmp_dc;
+
+ src_r0 = _mm_unpacklo_epi64(src_r0, src_r1); //a0 a1 a2 a3 b0 b1 b2 b3
+ src_r2 = _mm_unpacklo_epi64(src_r2, src_r3); //c0 c1 c2 c3 d0 d1 d2 d3
+ sign_reg0 = _mm_cmpgt_epi16(zero_8x16b,src_r0);
+ sign_reg2 = _mm_cmpgt_epi16(zero_8x16b,src_r2);
+
+ sign_reg0 = _mm_mullo_epi16(temp_2,sign_reg0);
+ sign_reg2 = _mm_mullo_epi16(temp_2,sign_reg2);
+
+ sign_reg0 = _mm_add_epi16(temp_1,sign_reg0);
+ sign_reg2 = _mm_add_epi16(temp_1,sign_reg2);
+
+ src_r0 = _mm_abs_epi16(src_r0);
+ src_r2 = _mm_abs_epi16(src_r2);
+
+ src_r1 = _mm_srli_si128(src_r0, 8);
+ src_r0 = _mm_cvtepu16_epi32(src_r0);
+ src_r1 = _mm_cvtepu16_epi32(src_r1);
+ src_r3 = _mm_srli_si128(src_r2, 8);
+ src_r2 = _mm_cvtepu16_epi32(src_r2);
+ src_r3 = _mm_cvtepu16_epi32(src_r3);
+
+ temp0 = _mm_cvtepu16_epi32(scalemat_r0_r1);
+ scalemat_r0_r1 = _mm_srli_si128(scalemat_r0_r1, 8);
+ temp2 = _mm_cvtepu16_epi32(scalemat_r2_r3);
+ scalemat_r2_r3 = _mm_srli_si128(scalemat_r2_r3, 8);
+ temp1 = _mm_cvtepu16_epi32(scalemat_r0_r1);
+ temp3 = _mm_cvtepu16_epi32(scalemat_r2_r3);
+
+ temp0 = _mm_mullo_epi32(temp0, src_r0);
+ temp1 = _mm_mullo_epi32(temp1, src_r1);
+ temp2 = _mm_mullo_epi32(temp2, src_r2);
+ temp3 = _mm_mullo_epi32(temp3, src_r3);
+
+ temp0 = _mm_add_epi32(temp0,rnd_fact);
+ temp1 = _mm_add_epi32(temp1,rnd_fact);
+ temp2 = _mm_add_epi32(temp2,rnd_fact);
+ temp3 = _mm_add_epi32(temp3,rnd_fact);
+
+ temp0 = _mm_srli_epi32(temp0,u4_qbits);
+ temp1 = _mm_srli_epi32(temp1,u4_qbits);
+ temp2 = _mm_srli_epi32(temp2,u4_qbits);
+ temp3 = _mm_srli_epi32(temp3,u4_qbits);
+
+ temp0 = _mm_packs_epi32 (temp0,temp1);
+ temp2 = _mm_packs_epi32 (temp2,temp3);
+
+ temp0 = _mm_sign_epi16(temp0, sign_reg0);
+ temp2 = _mm_sign_epi16(temp2, sign_reg2);
+
+ //temp0 = _mm_insert_epi16(temp0, tmp_dc, 0);
+
+ _mm_storeu_si128((__m128i *) (&pi2_out[0]), temp0);
+ _mm_storeu_si128((__m128i *) (&pi2_out[8]), temp2);
+
+ cmp0 = _mm_cmpeq_epi16(temp0, zero_8x16b);
+ cmp1 = _mm_cmpeq_epi16(temp2, zero_8x16b);
+
+ mask0 = _mm_movemask_epi8(cmp0);
+ mask1 = _mm_movemask_epi8(cmp1);
+ u4_zero_coeff = 0;
+ if(mask0)
+ {
+ if(mask0 == 0xffff)
+ u4_zero_coeff+=8;
+ else
+ {
+ cmp0 = _mm_and_si128(temp_1, cmp0);
+ sum0 = _mm_hadd_epi16(cmp0, zero_8x16b);
+ sum1 = _mm_hadd_epi16(sum0, zero_8x16b);
+ sum2 = _mm_hadd_epi16(sum1, zero_8x16b);
+ u4_zero_coeff += _mm_cvtsi128_si32(sum2);
+ }
+ }
+ if(mask1)
+ {
+ if(mask1 == 0xffff)
+ u4_zero_coeff+=8;
+ else
+ {
+ cmp1 = _mm_and_si128(temp_1, cmp1);
+ sum0 = _mm_hadd_epi16(cmp1, zero_8x16b);
+ sum1 = _mm_hadd_epi16(sum0, zero_8x16b);
+ sum2 = _mm_hadd_epi16(sum1, zero_8x16b);
+ u4_zero_coeff += _mm_cvtsi128_si32(sum2);
+ }
+ }
+
+ /* Return total nonzero coefficients in the current sub block */
+ u4_nonzero_coeff = 16 - u4_zero_coeff;
+ *pu1_nnz = u4_nonzero_coeff;
+
+}
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs forward hadamard transform and quantization on a 4*4 block
+ *
+ * @par Description:
+ * The function accepts source buffer and estimation buffer. From these, it
+ * computes the residue. This is residue is then transformed and quantized.
+ * The transform and quantization are in placed computed. They use the residue
+ * buffer for this.
+ *
+ * @param[in] pu1_src
+ * Pointer to source sub-block
+ *
+ * @param[in] pu1_pred
+ * Pointer to prediction sub-block
+ *
+ * @param[in] pi2_out
+ * Pointer to residual sub-block
+ *
+ * @param[in] src_strd
+ * Source stride
+ *
+ * @param[in] pred_strd
+ * Prediction stride
+ *
+ * @param[in] dst_strd
+ * Destination stride
+ *
+ * @param[in] u4_qbits
+ * QP_BITS_h264_4x4 + floor(QP/6)
+ *
+ * @param[in] pu2_threshold_matrix
+ * Pointer to Forward Quant Threshold Matrix
+ *
+ * @param[in] pu2_scale_matrix
+ * Pointer to Forward Quant Scale Matrix
+ *
+ * @param[in] u4_round_factor
+ * Quantization Round factor
+ *
+ * @param[out] pu1_nnz
+ * Total non-zero coefficients in the current sub-block
+ *
+ * @returns
+ *
+ * @remarks
+ * None
+ *
+ */
+
+void ih264_hadamard_quant_4x4_sse42(WORD16 *pi2_src, WORD16 *pi2_dst,
+ const UWORD16 *pu2_scale_matrix,
+ const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits,
+ UWORD32 u4_round_factor,UWORD8 *pu1_nnz
+ )
+{
+ WORD32 u4_zero_coeff,u4_nonzero_coeff=0;
+ __m128i cmp0, cmp1, sum0, sum1, sum2;
+ WORD32 mask0, mask1;
+ __m128i src_r0_r1, src_r2_r3, sign_reg;
+ __m128i src_r0, src_r1, src_r2, src_r3;
+ __m128i zero_8x16b = _mm_setzero_si128();
+ __m128i temp0, temp1, temp2, temp3;
+ __m128i sign_reg0, sign_reg1, sign_reg2, sign_reg3;
+ __m128i temp_1 = _mm_set1_epi16(1);
+ __m128i rnd_fact = _mm_set1_epi32(u4_round_factor);
+ __m128i scale_val = _mm_set1_epi32(pu2_scale_matrix[0]);
+
+ src_r0_r1 = _mm_loadu_si128((__m128i *) (pi2_src)); //a00 a01 a02 a03 a10 a11 a12 a13 -- the source matrix 0th,1st row
+ src_r2_r3 = _mm_loadu_si128((__m128i *) (pi2_src + 8)); //a20 a21 a22 a23 a30 a31 a32 a33 -- the source matrix 2nd,3rd row
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, src_r0_r1);
+ src_r0 = _mm_unpacklo_epi16(src_r0_r1, sign_reg); //a0 a1 a2 a3
+ src_r1 = _mm_unpackhi_epi16(src_r0_r1, sign_reg); //b0 b1 b2 b3
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, src_r2_r3);
+ src_r2 = _mm_unpacklo_epi16(src_r2_r3, sign_reg); //c0 c1 c2 c3
+ src_r3 = _mm_unpackhi_epi16(src_r2_r3, sign_reg); //d0 d1 d2 d3
+
+ /* Perform Inverse transform */
+ /*-------------------------------------------------------------*/
+ /* Forward DC transform [ Horizontal transformation ] */
+ /*-------------------------------------------------------------*/
+ // Matrix transpose
+ /*
+ * a0 a1 a2 a3
+ * b0 b1 b2 b3
+ * c0 c1 c2 c3
+ * d0 d1 d2 d3
+ */
+ temp0 = _mm_unpacklo_epi32(src_r0, src_r1); //a0 b0 a1 b1
+ temp2 = _mm_unpacklo_epi32(src_r2, src_r3); //c0 d0 c1 d1
+ temp1 = _mm_unpackhi_epi32(src_r0, src_r1); //a2 b2 a3 b3
+ temp3 = _mm_unpackhi_epi32(src_r2, src_r3); //c2 d2 c3 d3
+ src_r0 = _mm_unpacklo_epi64(temp0, temp2); //a0 b0 c0 d0
+ src_r1 = _mm_unpackhi_epi64(temp0, temp2); //a1 b1 c1 d1
+ src_r2 = _mm_unpacklo_epi64(temp1, temp3); //a2 b2 c2 d2
+ src_r3 = _mm_unpackhi_epi64(temp1, temp3); //a3 b3 c3 d3
+
+ temp0 = _mm_add_epi32(src_r0, src_r3);
+ temp1 = _mm_add_epi32(src_r1, src_r2);
+ temp2 = _mm_sub_epi32(src_r1, src_r2);
+ temp3 = _mm_sub_epi32(src_r0, src_r3);
+
+ src_r0 = _mm_add_epi32(temp0, temp1);
+ src_r1 = _mm_add_epi32(temp2, temp3);
+ src_r2 = _mm_sub_epi32(temp0, temp1);
+ src_r3 = _mm_sub_epi32(temp3, temp2);
+
+ /*-------------------------------------------------------------*/
+ /* Forward DC transform [ Vertical transformation ] */
+ /*-------------------------------------------------------------*/
+ // Matrix transpose
+ /*
+ * a0 b0 c0 d0
+ * a1 b1 c1 d1
+ * a2 b2 c2 d2
+ * a3 b3 c3 d3
+ */
+ temp0 = _mm_unpacklo_epi32(src_r0, src_r1); //a0 a1 b0 b1
+ temp2 = _mm_unpacklo_epi32(src_r2, src_r3); //a2 a3 b2 b3
+ temp1 = _mm_unpackhi_epi32(src_r0, src_r1); //c0 c1 d0 d1
+ temp3 = _mm_unpackhi_epi32(src_r2, src_r3); //c2 c3 d2 d3
+ src_r0 = _mm_unpacklo_epi64(temp0, temp2); //a0 a1 a2 a3
+ src_r1 = _mm_unpackhi_epi64(temp0, temp2); //b0 b1 b2 b3
+ src_r2 = _mm_unpacklo_epi64(temp1, temp3); //c0 c1 c2 c3
+ src_r3 = _mm_unpackhi_epi64(temp1, temp3); //d0 d1 d2 d3
+
+ temp0 = _mm_add_epi32(src_r0, src_r3);
+ temp1 = _mm_add_epi32(src_r1, src_r2);
+ temp2 = _mm_sub_epi32(src_r1, src_r2);
+ temp3 = _mm_sub_epi32(src_r0, src_r3);
+
+ src_r0 = _mm_add_epi32(temp0, temp1);
+ src_r1 = _mm_add_epi32(temp2, temp3);
+ src_r2 = _mm_sub_epi32(temp0, temp1);
+ src_r3 = _mm_sub_epi32(temp3, temp2);
+
+ src_r0 = _mm_srai_epi32(src_r0, 1);
+ src_r1 = _mm_srai_epi32(src_r1, 1);
+ src_r2 = _mm_srai_epi32(src_r2, 1);
+ src_r3 = _mm_srai_epi32(src_r3, 1);
+
+ // Quantization
+ sign_reg0 = _mm_cmpgt_epi32(zero_8x16b, src_r0); //Find sign of each value for later restoration
+ sign_reg1 = _mm_cmpgt_epi32(zero_8x16b, src_r1);
+ sign_reg2 = _mm_cmpgt_epi32(zero_8x16b, src_r2);
+ sign_reg3 = _mm_cmpgt_epi32(zero_8x16b, src_r3);
+
+ sign_reg0 = _mm_packs_epi32(sign_reg0, sign_reg1); //Sign = -1 or 0 depending on <0 or >0 respectively
+ sign_reg2 = _mm_packs_epi32(sign_reg2, sign_reg3);
+
+ sign_reg0 = _mm_slli_epi16(sign_reg0, 1); //Sign = -2 or 0 depending on <0 or >0 respectively
+ sign_reg2 = _mm_slli_epi16(sign_reg2, 1);
+
+ sign_reg0 = _mm_add_epi16(temp_1,sign_reg0); //Sign = -1 or 1 depending on <0 or >0 respectively
+ sign_reg2 = _mm_add_epi16(temp_1,sign_reg2);
+
+ src_r0 = _mm_abs_epi32(src_r0); //Absolute values
+ src_r1 = _mm_abs_epi32(src_r1);
+ src_r2 = _mm_abs_epi32(src_r2);
+ src_r3 = _mm_abs_epi32(src_r3);
+
+ temp0 = _mm_mullo_epi32(scale_val, src_r0); //multiply by pu2_scale_matrix[0]
+ temp1 = _mm_mullo_epi32(scale_val, src_r1);
+ temp2 = _mm_mullo_epi32(scale_val, src_r2);
+ temp3 = _mm_mullo_epi32(scale_val, src_r3);
+
+ temp0 = _mm_add_epi32(temp0,rnd_fact); //Add round factor
+ temp1 = _mm_add_epi32(temp1,rnd_fact);
+ temp2 = _mm_add_epi32(temp2,rnd_fact);
+ temp3 = _mm_add_epi32(temp3,rnd_fact);
+
+ temp0 = _mm_srli_epi32(temp0,u4_qbits); //RIght shift by qbits, unsigned variable, so shift right immediate works
+ temp1 = _mm_srli_epi32(temp1,u4_qbits);
+ temp2 = _mm_srli_epi32(temp2,u4_qbits);
+ temp3 = _mm_srli_epi32(temp3,u4_qbits);
+
+ temp0 = _mm_packs_epi32 (temp0,temp1); //Final values are 16-bits only.
+ temp2 = _mm_packs_epi32 (temp2,temp3);
+
+ temp0 = _mm_sign_epi16(temp0, sign_reg0); //Sign restoration
+ temp2 = _mm_sign_epi16(temp2, sign_reg2);
+
+ _mm_storeu_si128((__m128i *) (&pi2_dst[0]), temp0);
+ _mm_storeu_si128((__m128i *) (&pi2_dst[8]), temp2);
+
+ cmp0 = _mm_cmpeq_epi16(temp0, zero_8x16b);
+ cmp1 = _mm_cmpeq_epi16(temp2, zero_8x16b);
+
+ mask0 = _mm_movemask_epi8(cmp0);
+ mask1 = _mm_movemask_epi8(cmp1);
+ u4_zero_coeff = 0;
+ if(mask0)
+ {
+ if(mask0 == 0xffff)
+ u4_zero_coeff+=8;
+ else
+ {
+ cmp0 = _mm_and_si128(temp_1, cmp0);
+ sum0 = _mm_hadd_epi16(cmp0, zero_8x16b);
+ sum1 = _mm_hadd_epi16(sum0, zero_8x16b);
+ sum2 = _mm_hadd_epi16(sum1, zero_8x16b);
+ u4_zero_coeff += _mm_cvtsi128_si32(sum2);
+ }
+ }
+ if(mask1)
+ {
+ if(mask1 == 0xffff)
+ u4_zero_coeff+=8;
+ else
+ {
+ cmp1 = _mm_and_si128(temp_1, cmp1);
+ sum0 = _mm_hadd_epi16(cmp1, zero_8x16b);
+ sum1 = _mm_hadd_epi16(sum0, zero_8x16b);
+ sum2 = _mm_hadd_epi16(sum1, zero_8x16b);
+ u4_zero_coeff += _mm_cvtsi128_si32(sum2);
+ }
+ }
+
+ /* Return total nonzero coefficients in the current sub block */
+ u4_nonzero_coeff = 16 - u4_zero_coeff;
+ pu1_nnz[0] = u4_nonzero_coeff;
+}
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs forward hadamard transform and quantization on a 2*2 block
+ * for both U and V planes
+ *
+ * @par Description:
+ * The function accepts source buffer and estimation buffer. From these, it
+ * computes the residue. This is residue is then transformed and quantized.
+ * The transform and quantization are in placed computed. They use the residue
+ * buffer for this.
+ *
+ * @param[in] pu1_src
+ * Pointer to source sub-block
+ *
+ * @param[in] pu1_pred
+ * Pointer to prediction sub-block
+ *
+ * @param[in] pi2_out
+ * Pointer to residual sub-block
+ *
+ * @param[in] src_strd
+ * Source stride
+ *
+ * @param[in] pred_strd
+ * Prediction stride
+ *
+ * @param[in] dst_strd
+ * Destination stride
+ *
+ * @param[in] u4_qbits
+ * QP_BITS_h264_4x4 + floor(QP/6)
+ *
+ * @param[in] pu2_threshold_matrix
+ * Pointer to Forward Quant Threshold Matrix
+ *
+ * @param[in] pu2_scale_matrix
+ * Pointer to Forward Quant Scale Matrix
+ *
+ * @param[in] u4_round_factor
+ * Quantization Round factor
+ *
+ * @param[out] pu1_nnz
+ * Total non-zero coefficients in the current sub-block
+ *
+ * @returns
+ *
+ * @remarks
+ * NNZ for dc is populated at 0 and 5th position of pu1_nnz
+ *
+ */
+
+void ih264_hadamard_quant_2x2_uv_sse42(WORD16 *pi2_src, WORD16 *pi2_dst,
+ const UWORD16 *pu2_scale_matrix,
+ const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits,
+ UWORD32 u4_round_factor,UWORD8 *pu1_nnz)
+{
+ WORD32 val, nonzero_coeff_0, nonzero_coeff_1=0;
+ nonzero_coeff_0 = 0;
+ __m128i cmp, cmp0, cmp1;
+ __m128i sum0, sum1;
+ WORD32 mask, mask0, mask1;
+ __m128i src, plane_0, plane_1, temp0, temp1, sign_reg;
+ __m128i zero_8x16b = _mm_setzero_si128();
+ __m128i scale_val = _mm_set1_epi32(pu2_scale_matrix[0]);
+ __m128i sign_reg0, sign_reg1;
+ __m128i temp_1 = _mm_set1_epi16(1);
+ __m128i rnd_fact = _mm_set1_epi32(u4_round_factor);
+
+ src = _mm_loadu_si128((__m128i *)pi2_src); //a0 a1 a2 a3 b0 b1 b2 b3
+ sign_reg = _mm_cmpgt_epi16(zero_8x16b, src);
+ plane_0 = _mm_unpacklo_epi16(src, sign_reg); //a0 a1 a2 a3 -- 32 bits
+ plane_1 = _mm_unpackhi_epi16(src, sign_reg); //b0 b1 b2 b3 -- 32 bits
+
+ temp0 = _mm_hadd_epi32(plane_0, plane_1); //a0+a1 a2+a3 b0+b1 b2+b3
+ temp1 = _mm_hsub_epi32(plane_0, plane_1); //a0-a1 a2-a3 b0-b1 b2-b3
+
+ plane_0 = _mm_hadd_epi32(temp0, temp1); //a0+a1+a2+a3 b0+b1+b2+b3 a0-a1+a2-a3 b0-b1+b2-b3
+ plane_1 = _mm_hsub_epi32(temp0, temp1); //a0+a1-a2-a3 b0+b1-b2-b3 a0-a1-a2+a3 b0-b1-b2+b3
+
+ temp0 = _mm_unpacklo_epi32(plane_0, plane_1); //a0+a1+a2+a3 a0+a1-a2-a3 b0+b1+b2+b3 b0+b1-b2-b3
+ temp1 = _mm_unpackhi_epi32(plane_0, plane_1); //a0-a1+a2-a3 a0-a1-a2+a3 b0-b1+b2-b3 b0-b1-b2+b3
+
+ plane_0 = _mm_unpacklo_epi64(temp0, temp1); //a0+a1+a2+a3 a0+a1-a2-a3 a0-a1+a2-a3 a0-a1-a2+a3
+ plane_1 = _mm_unpackhi_epi64(temp0, temp1); //b0+b1+b2+b3 b0+b1-b2-b3 b0-b1+b2-b3 b0-b1-b2+b3
+
+ plane_0 = _mm_shuffle_epi32(plane_0, 0xd8); //a0+a1+a2+a3 a0-a1+a2-a3 a0+a1-a2-a3 a0-a1-a2+a3
+ plane_1 = _mm_shuffle_epi32(plane_1, 0xd8); //b0+b1+b2+b3 b0-b1+b2-b3 b0+b1-b2-b3 b0-b1-b2+b3
+ // Quantization
+ sign_reg0 = _mm_cmpgt_epi32(zero_8x16b, plane_0); //Find sign of each value for later restoration
+ sign_reg1 = _mm_cmpgt_epi32(zero_8x16b, plane_1);
+
+ sign_reg0 = _mm_packs_epi32(sign_reg0, sign_reg1); //Sign = -1 or 0 depending on <0 or >0 respectively
+ sign_reg0 = _mm_slli_epi16(sign_reg0, 1); //Sign = -2 or 0 depending on <0 or >0 respectively
+ sign_reg0 = _mm_add_epi16(temp_1,sign_reg0); //Sign = -1 or 1 depending on <0 or >0 respectively
+
+ plane_0 = _mm_abs_epi32(plane_0); //Absolute values
+ plane_1 = _mm_abs_epi32(plane_1);
+
+ temp0 = _mm_mullo_epi32(scale_val, plane_0); //multiply by pu2_scale_matrix[0]
+ temp1 = _mm_mullo_epi32(scale_val, plane_1); //multiply by pu2_scale_matrix[0]
+
+ temp0 = _mm_add_epi32(temp0,rnd_fact); //Add round factor
+ temp1 = _mm_add_epi32(temp1,rnd_fact);
+
+ temp0 = _mm_srli_epi32(temp0,u4_qbits); //RIght shift by qbits, unsigned variable, so shift right immediate works
+ temp1 = _mm_srli_epi32(temp1,u4_qbits);
+
+ temp0 = _mm_packs_epi32 (temp0,temp1); //Final values are 16-bits only.
+ temp0 = _mm_sign_epi16(temp0, sign_reg0); //Sign restoration
+
+ _mm_storeu_si128((__m128i *) (&pi2_dst[0]), temp0);
+
+ cmp = _mm_cmpeq_epi16(temp0, zero_8x16b);
+ mask = _mm_movemask_epi8(cmp);
+ mask0 = mask & 0xff;
+ mask1 = mask>>8;
+ if(mask0)
+ {
+ if(mask0 == 0xff)
+ nonzero_coeff_0 += 4;
+ else
+ {
+ cmp0 = _mm_and_si128(temp_1, cmp);
+ sum0 = _mm_hadd_epi16(cmp0, zero_8x16b);
+ sum1 = _mm_hadd_epi16(sum0, zero_8x16b);
+ val = _mm_cvtsi128_si32(sum1);
+ val = val & 0xffff;
+ nonzero_coeff_0 += val;
+ }
+ }
+ if(mask1)
+ {
+ if(mask1 == 0xff)
+ nonzero_coeff_1 += 4;
+ else
+ {
+ cmp1 = _mm_srli_si128(cmp, 8);
+ cmp1 = _mm_and_si128(temp_1, cmp1);
+ sum0 = _mm_hadd_epi16(cmp1, zero_8x16b);
+ sum1 = _mm_hadd_epi16(sum0, zero_8x16b);
+ nonzero_coeff_1 += _mm_cvtsi128_si32(sum1);
+ }
+ }
+
+ pu1_nnz[0] = 4 - nonzero_coeff_0;
+ pu1_nnz[1] = 4 - nonzero_coeff_1;
+
+}
diff --git a/common/x86/ih264_weighted_pred_sse42.c b/common/x86/ih264_weighted_pred_sse42.c
new file mode 100755
index 0000000..b1684b7
--- /dev/null
+++ b/common/x86/ih264_weighted_pred_sse42.c
@@ -0,0 +1,1349 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* */
+/* File Name : ih264_weighted_pred_intr_sse42.c */
+/* */
+/* Description : Contains function definitions for weighted */
+/* prediction functions in x86 sse4 intrinsics */
+/* */
+/* List of Functions : ih264_default_weighted_pred_luma_sse42() */
+/* ih264_default_weighted_pred_chroma_sse42() */
+/* ih264_weighted_pred_luma_sse42() */
+/* ih264_weighted_pred_chroma_sse42() */
+/* ih264_weighted_bipred_luma_sse42() */
+/* ih264_weighted_bipred_chroma_sse42() */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 30 01 2015 Kaushik Initial version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+#include <immintrin.h>
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_weighted_pred.h"
+
+/*****************************************************************************/
+/* Function definitions . */
+/*****************************************************************************/
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_default_weighted_pred_luma_sse42 */
+/* */
+/* Description : This function performs the default weighted prediction */
+/* as described in sec 8.4.2.3.1 titled "Default weighted */
+/* sample prediction process" for luma. The function gets */
+/* two ht x wd blocks, calculates their rounded-average and */
+/* stores it in the destination block. (ht,wd) can be */
+/* (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16). */
+/* */
+/* Inputs : pu1_src1 - Pointer to source 1 */
+/* pu1_src2 - Pointer to source 2 */
+/* pu1_dst - Pointer to destination */
+/* src_strd1 - stride for source 1 */
+/* src_strd1 - stride for source 2 */
+/* dst_strd - stride for destination */
+/* ht - height of the block */
+/* wd - width of the block */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 04 02 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_default_weighted_pred_luma_sse42(UWORD8 *pu1_src1,
+ UWORD8 *pu1_src2,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd1,
+ WORD32 src_strd2,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd)
+{
+ __m128i y0_0_16x8b, y0_1_16x8b, y0_2_16x8b, y0_3_16x8b;
+ __m128i y1_0_16x8b, y1_1_16x8b, y1_2_16x8b, y1_3_16x8b;
+
+ if(wd == 4)
+ {
+ __m128i mask_full_16x8b, mask_ll4B_16x8b;
+
+ mask_full_16x8b = _mm_set1_epi8(0xff);
+ mask_ll4B_16x8b = _mm_srli_si128(mask_full_16x8b, 12);
+ // mask for first four bytes
+
+ do
+ {
+ y0_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1);
+ y0_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1));
+ y0_2_16x8b = _mm_loadl_epi64(
+ (__m128i *)(pu1_src1 + (src_strd1 << 1)));
+ y0_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1 * 3));
+
+ y1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2);
+ y1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2));
+ y1_2_16x8b = _mm_loadl_epi64(
+ (__m128i *)(pu1_src2 + (src_strd2 << 1)));
+ y1_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2 * 3));
+
+ y0_0_16x8b = _mm_avg_epu8(y0_0_16x8b, y1_0_16x8b);
+ y0_1_16x8b = _mm_avg_epu8(y0_1_16x8b, y1_1_16x8b);
+ y0_2_16x8b = _mm_avg_epu8(y0_2_16x8b, y1_2_16x8b);
+ y0_3_16x8b = _mm_avg_epu8(y0_3_16x8b, y1_3_16x8b);
+
+ _mm_maskmoveu_si128(y0_0_16x8b, mask_ll4B_16x8b, (char*)pu1_dst);
+ _mm_maskmoveu_si128(y0_1_16x8b, mask_ll4B_16x8b,
+ (char*)(pu1_dst + dst_strd));
+ _mm_maskmoveu_si128(y0_2_16x8b, mask_ll4B_16x8b,
+ (char*)(pu1_dst + (dst_strd << 1)));
+ _mm_maskmoveu_si128(y0_3_16x8b, mask_ll4B_16x8b,
+ (char*)(pu1_dst + dst_strd * 3));
+
+ ht -= 4;
+ pu1_src1 += src_strd1 << 2;
+ pu1_src2 += src_strd2 << 2;
+ pu1_dst += dst_strd << 2;
+ }
+ while(ht > 0);
+ }
+ else if(wd == 8)
+ {
+ do
+ {
+ y0_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1);
+ y0_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1));
+ y0_2_16x8b = _mm_loadl_epi64(
+ (__m128i *)(pu1_src1 + (src_strd1 << 1)));
+ y0_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1 * 3));
+
+ y1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2);
+ y1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2));
+ y1_2_16x8b = _mm_loadl_epi64(
+ (__m128i *)(pu1_src2 + (src_strd2 << 1)));
+ y1_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2 * 3));
+
+ y0_0_16x8b = _mm_avg_epu8(y0_0_16x8b, y1_0_16x8b);
+ y0_1_16x8b = _mm_avg_epu8(y0_1_16x8b, y1_1_16x8b);
+ y0_2_16x8b = _mm_avg_epu8(y0_2_16x8b, y1_2_16x8b);
+ y0_3_16x8b = _mm_avg_epu8(y0_3_16x8b, y1_3_16x8b);
+
+ _mm_storel_epi64((__m128i *)pu1_dst, y0_0_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), y0_1_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd << 1)), y0_2_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd * 3), y0_3_16x8b);
+
+ ht -= 4;
+ pu1_src1 += src_strd1 << 2;
+ pu1_src2 += src_strd2 << 2;
+ pu1_dst += dst_strd << 2;
+ }
+ while(ht > 0);
+ }
+ else // wd == 16
+ {
+ __m128i y0_4_16x8b, y0_5_16x8b, y0_6_16x8b, y0_7_16x8b;
+ __m128i y1_4_16x8b, y1_5_16x8b, y1_6_16x8b, y1_7_16x8b;
+
+ do
+ {
+ y0_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src1);
+ y0_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1));
+ y0_2_16x8b = _mm_loadu_si128(
+ (__m128i *)(pu1_src1 + (src_strd1 << 1)));
+ y0_3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1 * 3));
+ y0_4_16x8b = _mm_loadu_si128(
+ (__m128i *)(pu1_src1 + (src_strd1 << 2)));
+ y0_5_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1 * 5));
+ y0_6_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1 * 6));
+ y0_7_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1 * 7));
+
+ y1_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src2);
+ y1_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2));
+ y1_2_16x8b = _mm_loadu_si128(
+ (__m128i *)(pu1_src2 + (src_strd2 << 1)));
+ y1_3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2 * 3));
+ y1_4_16x8b = _mm_loadu_si128(
+ (__m128i *)(pu1_src2 + (src_strd2 << 2)));
+ y1_5_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2 * 5));
+ y1_6_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2 * 6));
+ y1_7_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2 * 7));
+
+ y0_0_16x8b = _mm_avg_epu8(y0_0_16x8b, y1_0_16x8b);
+ y0_1_16x8b = _mm_avg_epu8(y0_1_16x8b, y1_1_16x8b);
+ y0_2_16x8b = _mm_avg_epu8(y0_2_16x8b, y1_2_16x8b);
+ y0_3_16x8b = _mm_avg_epu8(y0_3_16x8b, y1_3_16x8b);
+ y0_4_16x8b = _mm_avg_epu8(y0_4_16x8b, y1_4_16x8b);
+ y0_5_16x8b = _mm_avg_epu8(y0_5_16x8b, y1_5_16x8b);
+ y0_6_16x8b = _mm_avg_epu8(y0_6_16x8b, y1_6_16x8b);
+ y0_7_16x8b = _mm_avg_epu8(y0_7_16x8b, y1_7_16x8b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, y0_0_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), y0_1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + (dst_strd << 1)), y0_2_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 3), y0_3_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + (dst_strd << 2)), y0_4_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 5), y0_5_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 6), y0_6_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 7), y0_7_16x8b);
+
+ ht -= 8;
+ pu1_src1 += src_strd1 << 3;
+ pu1_src2 += src_strd2 << 3;
+ pu1_dst += dst_strd << 3;
+ }
+ while(ht > 0);
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_default_weighted_pred_chroma_sse42 */
+/* */
+/* Description : This function performs the default weighted prediction */
+/* as described in sec 8.4.2.3.1 titled "Default weighted */
+/* sample prediction process" for chroma. The function gets */
+/* two ht x wd blocks, calculates their rounded-average and */
+/* stores it in the destination block. (ht,wd) can be */
+/* (2,2), (4,2) , (2,4), (4,4), (8,4), (4,8) or (8,8). */
+/* */
+/* Inputs : pu1_src1 - Pointer to source 1 */
+/* pu1_src2 - Pointer to source 2 */
+/* pu1_dst - Pointer to destination */
+/* src_strd1 - stride for source 1 */
+/* src_strd1 - stride for source 2 */
+/* dst_strd - stride for destination */
+/* ht - height of the block */
+/* wd - width of the block */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 04 02 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_default_weighted_pred_chroma_sse42(UWORD8 *pu1_src1,
+ UWORD8 *pu1_src2,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd1,
+ WORD32 src_strd2,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd)
+{
+ __m128i uv0_0_16x8b, uv0_1_16x8b;
+ __m128i uv1_0_16x8b, uv1_1_16x8b;
+
+ if(wd == 2)
+ {
+ __m128i mask_full_16x8b, mask_ll4B_16x8b;
+
+ mask_full_16x8b = _mm_set1_epi8(0xff);
+ mask_ll4B_16x8b = _mm_srli_si128(mask_full_16x8b, 12);
+ // mask for first four bytes
+
+ do
+ {
+ uv0_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1);
+ uv0_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1));
+
+ uv1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2);
+ uv1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2));
+
+ uv0_0_16x8b = _mm_avg_epu8(uv0_0_16x8b, uv1_0_16x8b);
+ uv0_1_16x8b = _mm_avg_epu8(uv0_1_16x8b, uv1_1_16x8b);
+
+ _mm_maskmoveu_si128(uv0_0_16x8b, mask_ll4B_16x8b, (char*)pu1_dst);
+ _mm_maskmoveu_si128(uv0_1_16x8b, mask_ll4B_16x8b,
+ (char*)(pu1_dst + dst_strd));
+
+ ht -= 2;
+ pu1_src1 += src_strd1 << 1;
+ pu1_src2 += src_strd2 << 1;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+ else if(wd == 4)
+ {
+ do
+ {
+ uv0_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1);
+ uv0_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1));
+
+ uv1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2);
+ uv1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2));
+
+ uv0_0_16x8b = _mm_avg_epu8(uv0_0_16x8b, uv1_0_16x8b);
+ uv0_1_16x8b = _mm_avg_epu8(uv0_1_16x8b, uv1_1_16x8b);
+
+ _mm_storel_epi64((__m128i *)pu1_dst, uv0_0_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), uv0_1_16x8b);
+
+ ht -= 2;
+ pu1_src1 += src_strd1 << 1;
+ pu1_src2 += src_strd2 << 1;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+ else // wd == 8
+ {
+ __m128i uv0_2_16x8b, uv0_3_16x8b;
+ __m128i uv1_2_16x8b, uv1_3_16x8b;
+
+ do
+ {
+ uv0_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src1);
+ uv0_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1));
+ uv0_2_16x8b = _mm_loadu_si128(
+ (__m128i *)(pu1_src1 + (src_strd1 << 1)));
+ uv0_3_16x8b = _mm_loadu_si128(
+ (__m128i *)(pu1_src1 + src_strd1 * 3));
+
+ uv1_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src2);
+ uv1_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2));
+ uv1_2_16x8b = _mm_loadu_si128(
+ (__m128i *)(pu1_src2 + (src_strd2 << 1)));
+ uv1_3_16x8b = _mm_loadu_si128(
+ (__m128i *)(pu1_src2 + src_strd2 * 3));
+
+ uv0_0_16x8b = _mm_avg_epu8(uv0_0_16x8b, uv1_0_16x8b);
+ uv0_1_16x8b = _mm_avg_epu8(uv0_1_16x8b, uv1_1_16x8b);
+ uv0_2_16x8b = _mm_avg_epu8(uv0_2_16x8b, uv1_2_16x8b);
+ uv0_3_16x8b = _mm_avg_epu8(uv0_3_16x8b, uv1_3_16x8b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, uv0_0_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), uv0_1_16x8b);
+ _mm_storeu_si128(
+ (__m128i *)(pu1_dst + (dst_strd << 1)), uv0_2_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 3), uv0_3_16x8b);
+
+ ht -= 4;
+ pu1_src1 += src_strd1 << 2;
+ pu1_src2 += src_strd2 << 2;
+ pu1_dst += dst_strd << 2;
+ }
+ while(ht > 0);
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_weighted_pred_luma_sse42 */
+/* */
+/* Description : This function performs the weighted prediction as */
+/* described in sec 8.4.2.3.2 titled "Weighted sample */
+/* prediction process" for luma. The function gets one */
+/* ht x wd block, weights it, rounds it off, offsets it, */
+/* saturates it to unsigned 8-bit and stores it in the */
+/* destination block. (ht,wd) can be (4,4), (8,4), (4,8), */
+/* (8,8), (16,8), (8,16) or (16,16). */
+/* */
+/* Inputs : pu1_src - Pointer to source */
+/* pu1_dst - Pointer to destination */
+/* src_strd - stride for source */
+/* dst_strd - stride for destination */
+/* log_wd - number of bits to be rounded off */
+/* wt - weight value */
+/* ofst - offset value */
+/* ht - height of the block */
+/* wd - width of the block */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 04 02 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_weighted_pred_luma_sse42(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 log_wd,
+ WORD32 wt,
+ WORD32 ofst,
+ WORD32 ht,
+ WORD32 wd)
+{
+ __m128i y_0_16x8b, y_1_16x8b, y_2_16x8b, y_3_16x8b;
+
+ __m128i wt_8x16b, round_8x16b, ofst_8x16b;
+
+ WORD32 round_val;
+
+ wt = (WORD16)(wt & 0xffff);
+ round_val = 1 << (log_wd - 1);
+ ofst = (WORD8)(ofst & 0xff);
+
+ wt_8x16b = _mm_set1_epi16(wt);
+ round_8x16b = _mm_set1_epi16(round_val);
+ ofst_8x16b = _mm_set1_epi16(ofst);
+
+ if(wd == 4)
+ {
+ __m128i y_0_8x16b, y_2_8x16b;
+
+ __m128i mask_full_16x8b, mask_ll4B_16x8b;
+
+ mask_full_16x8b = _mm_set1_epi8(0xff);
+ mask_ll4B_16x8b = _mm_srli_si128(mask_full_16x8b, 12);
+ // mask for first four bytes
+
+ do
+ {
+ y_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ y_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
+ y_2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + (src_strd << 1)));
+ y_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd * 3));
+
+ y_0_16x8b = _mm_unpacklo_epi32(y_0_16x8b, y_1_16x8b);
+ y_2_16x8b = _mm_unpacklo_epi32(y_2_16x8b, y_3_16x8b);
+
+ y_0_8x16b = _mm_cvtepu8_epi16(y_0_16x8b);
+ y_2_8x16b = _mm_cvtepu8_epi16(y_2_16x8b);
+
+ y_0_8x16b = _mm_mullo_epi16(y_0_8x16b, wt_8x16b);
+ y_2_8x16b = _mm_mullo_epi16(y_2_8x16b, wt_8x16b);
+
+ y_0_8x16b = _mm_adds_epi16(round_8x16b, y_0_8x16b);
+ y_2_8x16b = _mm_adds_epi16(round_8x16b, y_2_8x16b);
+
+ y_0_8x16b = _mm_srai_epi16(y_0_8x16b, log_wd);
+ y_2_8x16b = _mm_srai_epi16(y_2_8x16b, log_wd);
+
+ y_0_8x16b = _mm_adds_epi16(ofst_8x16b, y_0_8x16b);
+ y_2_8x16b = _mm_adds_epi16(ofst_8x16b, y_2_8x16b);
+
+ y_0_16x8b = _mm_packus_epi16(y_0_8x16b, y_2_8x16b);
+ y_1_16x8b = _mm_srli_si128(y_0_16x8b, 4);
+ y_2_16x8b = _mm_srli_si128(y_0_16x8b, 8);
+ y_3_16x8b = _mm_srli_si128(y_0_16x8b, 12);
+
+ _mm_maskmoveu_si128(y_0_16x8b, mask_ll4B_16x8b, (char*)pu1_dst);
+ _mm_maskmoveu_si128(y_1_16x8b, mask_ll4B_16x8b,
+ (char*)(pu1_dst + dst_strd));
+ _mm_maskmoveu_si128(y_2_16x8b, mask_ll4B_16x8b,
+ (char*)(pu1_dst + (dst_strd << 1)));
+ _mm_maskmoveu_si128(y_3_16x8b, mask_ll4B_16x8b,
+ (char*)(pu1_dst + dst_strd * 3));
+
+ ht -= 4;
+ pu1_src += src_strd << 2;
+ pu1_dst += dst_strd << 2;
+ }
+ while(ht > 0);
+ }
+ else if(wd == 8)
+ {
+ __m128i y_0_8x16b, y_1_8x16b, y_2_8x16b, y_3_8x16b;
+
+ do
+ {
+ y_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ y_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
+ y_2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + (src_strd << 1)));
+ y_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd * 3));
+
+ y_0_8x16b = _mm_cvtepu8_epi16(y_0_16x8b);
+ y_1_8x16b = _mm_cvtepu8_epi16(y_1_16x8b);
+ y_2_8x16b = _mm_cvtepu8_epi16(y_2_16x8b);
+ y_3_8x16b = _mm_cvtepu8_epi16(y_3_16x8b);
+
+ y_0_8x16b = _mm_mullo_epi16(y_0_8x16b, wt_8x16b);
+ y_1_8x16b = _mm_mullo_epi16(y_1_8x16b, wt_8x16b);
+ y_2_8x16b = _mm_mullo_epi16(y_2_8x16b, wt_8x16b);
+ y_3_8x16b = _mm_mullo_epi16(y_3_8x16b, wt_8x16b);
+
+ y_0_8x16b = _mm_adds_epi16(round_8x16b, y_0_8x16b);
+ y_1_8x16b = _mm_adds_epi16(round_8x16b, y_1_8x16b);
+ y_2_8x16b = _mm_adds_epi16(round_8x16b, y_2_8x16b);
+ y_3_8x16b = _mm_adds_epi16(round_8x16b, y_3_8x16b);
+
+ y_0_8x16b = _mm_srai_epi16(y_0_8x16b, log_wd);
+ y_1_8x16b = _mm_srai_epi16(y_1_8x16b, log_wd);
+ y_2_8x16b = _mm_srai_epi16(y_2_8x16b, log_wd);
+ y_3_8x16b = _mm_srai_epi16(y_3_8x16b, log_wd);
+
+ y_0_8x16b = _mm_adds_epi16(ofst_8x16b, y_0_8x16b);
+ y_1_8x16b = _mm_adds_epi16(ofst_8x16b, y_1_8x16b);
+ y_2_8x16b = _mm_adds_epi16(ofst_8x16b, y_2_8x16b);
+ y_3_8x16b = _mm_adds_epi16(ofst_8x16b, y_3_8x16b);
+
+ y_0_16x8b = _mm_packus_epi16(y_0_8x16b, y_1_8x16b);
+ y_2_16x8b = _mm_packus_epi16(y_2_8x16b, y_3_8x16b);
+ y_1_16x8b = _mm_srli_si128(y_0_16x8b, 8);
+ y_3_16x8b = _mm_srli_si128(y_2_16x8b, 8);
+
+ _mm_storel_epi64((__m128i *)pu1_dst, y_0_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), y_1_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd << 1)), y_2_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd * 3), y_3_16x8b);
+
+ ht -= 4;
+ pu1_src += src_strd << 2;
+ pu1_dst += dst_strd << 2;
+ }
+ while(ht > 0);
+ }
+ else // wd == 16
+ {
+ __m128i y_0L_8x16b, y_1L_8x16b, y_2L_8x16b, y_3L_8x16b;
+ __m128i y_0H_8x16b, y_1H_8x16b, y_2H_8x16b, y_3H_8x16b;
+
+ __m128i zero_16x8b;
+ zero_16x8b = _mm_set1_epi8(0);
+
+ do
+ {
+ y_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ y_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));
+ y_2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + (src_strd << 1)));
+ y_3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd * 3));
+
+ y_0L_8x16b = _mm_cvtepu8_epi16(y_0_16x8b);
+ y_0H_8x16b = _mm_unpackhi_epi8(y_0_16x8b, zero_16x8b);
+ y_1L_8x16b = _mm_cvtepu8_epi16(y_1_16x8b);
+ y_1H_8x16b = _mm_unpackhi_epi8(y_1_16x8b, zero_16x8b);
+ y_2L_8x16b = _mm_cvtepu8_epi16(y_2_16x8b);
+ y_2H_8x16b = _mm_unpackhi_epi8(y_2_16x8b, zero_16x8b);
+ y_3L_8x16b = _mm_cvtepu8_epi16(y_3_16x8b);
+ y_3H_8x16b = _mm_unpackhi_epi8(y_3_16x8b, zero_16x8b);
+
+ y_0L_8x16b = _mm_mullo_epi16(y_0L_8x16b, wt_8x16b);
+ y_0H_8x16b = _mm_mullo_epi16(y_0H_8x16b, wt_8x16b);
+ y_1L_8x16b = _mm_mullo_epi16(y_1L_8x16b, wt_8x16b);
+ y_1H_8x16b = _mm_mullo_epi16(y_1H_8x16b, wt_8x16b);
+ y_2L_8x16b = _mm_mullo_epi16(y_2L_8x16b, wt_8x16b);
+ y_2H_8x16b = _mm_mullo_epi16(y_2H_8x16b, wt_8x16b);
+ y_3L_8x16b = _mm_mullo_epi16(y_3L_8x16b, wt_8x16b);
+ y_3H_8x16b = _mm_mullo_epi16(y_3H_8x16b, wt_8x16b);
+
+ y_0L_8x16b = _mm_adds_epi16(round_8x16b, y_0L_8x16b);
+ y_0H_8x16b = _mm_adds_epi16(round_8x16b, y_0H_8x16b);
+ y_1L_8x16b = _mm_adds_epi16(round_8x16b, y_1L_8x16b);
+ y_1H_8x16b = _mm_adds_epi16(round_8x16b, y_1H_8x16b);
+ y_2L_8x16b = _mm_adds_epi16(round_8x16b, y_2L_8x16b);
+ y_2H_8x16b = _mm_adds_epi16(round_8x16b, y_2H_8x16b);
+ y_3L_8x16b = _mm_adds_epi16(round_8x16b, y_3L_8x16b);
+ y_3H_8x16b = _mm_adds_epi16(round_8x16b, y_3H_8x16b);
+
+ y_0L_8x16b = _mm_srai_epi16(y_0L_8x16b, log_wd);
+ y_0H_8x16b = _mm_srai_epi16(y_0H_8x16b, log_wd);
+ y_1L_8x16b = _mm_srai_epi16(y_1L_8x16b, log_wd);
+ y_1H_8x16b = _mm_srai_epi16(y_1H_8x16b, log_wd);
+ y_2L_8x16b = _mm_srai_epi16(y_2L_8x16b, log_wd);
+ y_2H_8x16b = _mm_srai_epi16(y_2H_8x16b, log_wd);
+ y_3L_8x16b = _mm_srai_epi16(y_3L_8x16b, log_wd);
+ y_3H_8x16b = _mm_srai_epi16(y_3H_8x16b, log_wd);
+
+ y_0L_8x16b = _mm_adds_epi16(ofst_8x16b, y_0L_8x16b);
+ y_0H_8x16b = _mm_adds_epi16(ofst_8x16b, y_0H_8x16b);
+ y_1L_8x16b = _mm_adds_epi16(ofst_8x16b, y_1L_8x16b);
+ y_1H_8x16b = _mm_adds_epi16(ofst_8x16b, y_1H_8x16b);
+ y_2L_8x16b = _mm_adds_epi16(ofst_8x16b, y_2L_8x16b);
+ y_2H_8x16b = _mm_adds_epi16(ofst_8x16b, y_2H_8x16b);
+ y_3L_8x16b = _mm_adds_epi16(ofst_8x16b, y_3L_8x16b);
+ y_3H_8x16b = _mm_adds_epi16(ofst_8x16b, y_3H_8x16b);
+
+ y_0_16x8b = _mm_packus_epi16(y_0L_8x16b, y_0H_8x16b);
+ y_1_16x8b = _mm_packus_epi16(y_1L_8x16b, y_1H_8x16b);
+ y_2_16x8b = _mm_packus_epi16(y_2L_8x16b, y_2H_8x16b);
+ y_3_16x8b = _mm_packus_epi16(y_3L_8x16b, y_3H_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, y_0_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), y_1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + (dst_strd << 1)), y_2_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 3), y_3_16x8b);
+
+ ht -= 4;
+ pu1_src += src_strd << 2;
+ pu1_dst += dst_strd << 2;
+ }
+ while(ht > 0);
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_weighted_pred_chroma_sse42 */
+/* */
+/* Description : This function performs the weighted prediction as */
+/* described in sec 8.4.2.3.2 titled "Weighted sample */
+/* prediction process" for chroma. The function gets one */
+/* ht x wd block, weights it, rounds it off, offsets it, */
+/* saturates it to unsigned 8-bit and stores it in the */
+/* destination block. (ht,wd) can be (2,2), (4,2), (2,4), */
+/* (4,4), (8,4), (4,8) or (8,8). */
+/* */
+/* Inputs : pu1_src - Pointer to source */
+/* pu1_dst - Pointer to destination */
+/* src_strd - stride for source */
+/* dst_strd - stride for destination */
+/* log_wd - number of bits to be rounded off */
+/* wt - weight values for u and v */
+/* ofst - offset values for u and v */
+/* ht - height of the block */
+/* wd - width of the block */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 04 02 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_weighted_pred_chroma_sse42(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 log_wd,
+ WORD32 wt,
+ WORD32 ofst,
+ WORD32 ht,
+ WORD32 wd)
+{
+ __m128i y_0_16x8b, y_1_16x8b;
+
+ __m128i wt_8x16b, round_8x16b, ofst_8x16b;
+
+ WORD32 ofst_u, ofst_v;
+ WORD32 round_val;
+
+ ofst_u = (WORD8)(ofst & 0xff);
+ ofst_v = (WORD8)(ofst >> 8);
+ round_val = 1 << (log_wd - 1);
+ ofst = (ofst_u & 0xffff) | (ofst_v << 16);
+
+ wt_8x16b = _mm_set1_epi32(wt);
+ round_8x16b = _mm_set1_epi16(round_val);
+ ofst_8x16b = _mm_set1_epi32(ofst);
+
+ if(wd == 2)
+ {
+ __m128i y_0_8x16b;
+
+ __m128i mask_full_16x8b, mask_ll4B_16x8b;
+
+ mask_full_16x8b = _mm_set1_epi8(0xff);
+ mask_ll4B_16x8b = _mm_srli_si128(mask_full_16x8b, 12);
+ // mask for first four bytes
+
+ do
+ {
+ y_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ y_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
+
+ y_0_16x8b = _mm_unpacklo_epi32(y_0_16x8b, y_1_16x8b);
+
+ y_0_8x16b = _mm_cvtepu8_epi16(y_0_16x8b);
+
+ y_0_8x16b = _mm_mullo_epi16(y_0_8x16b, wt_8x16b);
+
+ y_0_8x16b = _mm_adds_epi16(round_8x16b, y_0_8x16b);
+
+ y_0_8x16b = _mm_srai_epi16(y_0_8x16b, log_wd);
+
+ y_0_8x16b = _mm_adds_epi16(ofst_8x16b, y_0_8x16b);
+
+ y_0_16x8b = _mm_packus_epi16(y_0_8x16b, y_0_8x16b);
+ y_1_16x8b = _mm_srli_si128(y_0_16x8b, 4);
+
+ _mm_maskmoveu_si128(y_0_16x8b, mask_ll4B_16x8b, (char*)pu1_dst);
+ _mm_maskmoveu_si128(y_1_16x8b, mask_ll4B_16x8b,
+ (char*)(pu1_dst + dst_strd));
+
+ ht -= 2;
+ pu1_src += src_strd << 1;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+ else if(wd == 4)
+ {
+ __m128i y_0_8x16b, y_1_8x16b;
+
+ do
+ {
+ y_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ y_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
+
+ y_0_8x16b = _mm_cvtepu8_epi16(y_0_16x8b);
+ y_1_8x16b = _mm_cvtepu8_epi16(y_1_16x8b);
+
+ y_0_8x16b = _mm_mullo_epi16(y_0_8x16b, wt_8x16b);
+ y_1_8x16b = _mm_mullo_epi16(y_1_8x16b, wt_8x16b);
+
+ y_0_8x16b = _mm_adds_epi16(round_8x16b, y_0_8x16b);
+ y_1_8x16b = _mm_adds_epi16(round_8x16b, y_1_8x16b);
+
+ y_0_8x16b = _mm_srai_epi16(y_0_8x16b, log_wd);
+ y_1_8x16b = _mm_srai_epi16(y_1_8x16b, log_wd);
+
+ y_0_8x16b = _mm_adds_epi16(ofst_8x16b, y_0_8x16b);
+ y_1_8x16b = _mm_adds_epi16(ofst_8x16b, y_1_8x16b);
+
+ y_0_16x8b = _mm_packus_epi16(y_0_8x16b, y_1_8x16b);
+ y_1_16x8b = _mm_srli_si128(y_0_16x8b, 8);
+
+ _mm_storel_epi64((__m128i *)pu1_dst, y_0_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), y_1_16x8b);
+
+ ht -= 2;
+ pu1_src += src_strd << 1;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+ else // wd == 16
+ {
+ __m128i y_2_16x8b, y_3_16x8b;
+ __m128i y_0L_8x16b, y_1L_8x16b, y_2L_8x16b, y_3L_8x16b;
+ __m128i y_0H_8x16b, y_1H_8x16b, y_2H_8x16b, y_3H_8x16b;
+
+ __m128i zero_16x8b;
+ zero_16x8b = _mm_set1_epi8(0);
+
+ do
+ {
+ y_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ y_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd));
+ y_2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + (src_strd << 1)));
+ y_3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + src_strd * 3));
+
+ y_0L_8x16b = _mm_cvtepu8_epi16(y_0_16x8b);
+ y_0H_8x16b = _mm_unpackhi_epi8(y_0_16x8b, zero_16x8b);
+ y_1L_8x16b = _mm_cvtepu8_epi16(y_1_16x8b);
+ y_1H_8x16b = _mm_unpackhi_epi8(y_1_16x8b, zero_16x8b);
+ y_2L_8x16b = _mm_cvtepu8_epi16(y_2_16x8b);
+ y_2H_8x16b = _mm_unpackhi_epi8(y_2_16x8b, zero_16x8b);
+ y_3L_8x16b = _mm_cvtepu8_epi16(y_3_16x8b);
+ y_3H_8x16b = _mm_unpackhi_epi8(y_3_16x8b, zero_16x8b);
+
+ y_0L_8x16b = _mm_mullo_epi16(y_0L_8x16b, wt_8x16b);
+ y_0H_8x16b = _mm_mullo_epi16(y_0H_8x16b, wt_8x16b);
+ y_1L_8x16b = _mm_mullo_epi16(y_1L_8x16b, wt_8x16b);
+ y_1H_8x16b = _mm_mullo_epi16(y_1H_8x16b, wt_8x16b);
+ y_2L_8x16b = _mm_mullo_epi16(y_2L_8x16b, wt_8x16b);
+ y_2H_8x16b = _mm_mullo_epi16(y_2H_8x16b, wt_8x16b);
+ y_3L_8x16b = _mm_mullo_epi16(y_3L_8x16b, wt_8x16b);
+ y_3H_8x16b = _mm_mullo_epi16(y_3H_8x16b, wt_8x16b);
+
+ y_0L_8x16b = _mm_adds_epi16(round_8x16b, y_0L_8x16b);
+ y_0H_8x16b = _mm_adds_epi16(round_8x16b, y_0H_8x16b);
+ y_1L_8x16b = _mm_adds_epi16(round_8x16b, y_1L_8x16b);
+ y_1H_8x16b = _mm_adds_epi16(round_8x16b, y_1H_8x16b);
+ y_2L_8x16b = _mm_adds_epi16(round_8x16b, y_2L_8x16b);
+ y_2H_8x16b = _mm_adds_epi16(round_8x16b, y_2H_8x16b);
+ y_3L_8x16b = _mm_adds_epi16(round_8x16b, y_3L_8x16b);
+ y_3H_8x16b = _mm_adds_epi16(round_8x16b, y_3H_8x16b);
+
+ y_0L_8x16b = _mm_srai_epi16(y_0L_8x16b, log_wd);
+ y_0H_8x16b = _mm_srai_epi16(y_0H_8x16b, log_wd);
+ y_1L_8x16b = _mm_srai_epi16(y_1L_8x16b, log_wd);
+ y_1H_8x16b = _mm_srai_epi16(y_1H_8x16b, log_wd);
+ y_2L_8x16b = _mm_srai_epi16(y_2L_8x16b, log_wd);
+ y_2H_8x16b = _mm_srai_epi16(y_2H_8x16b, log_wd);
+ y_3L_8x16b = _mm_srai_epi16(y_3L_8x16b, log_wd);
+ y_3H_8x16b = _mm_srai_epi16(y_3H_8x16b, log_wd);
+
+ y_0L_8x16b = _mm_adds_epi16(ofst_8x16b, y_0L_8x16b);
+ y_0H_8x16b = _mm_adds_epi16(ofst_8x16b, y_0H_8x16b);
+ y_1L_8x16b = _mm_adds_epi16(ofst_8x16b, y_1L_8x16b);
+ y_1H_8x16b = _mm_adds_epi16(ofst_8x16b, y_1H_8x16b);
+ y_2L_8x16b = _mm_adds_epi16(ofst_8x16b, y_2L_8x16b);
+ y_2H_8x16b = _mm_adds_epi16(ofst_8x16b, y_2H_8x16b);
+ y_3L_8x16b = _mm_adds_epi16(ofst_8x16b, y_3L_8x16b);
+ y_3H_8x16b = _mm_adds_epi16(ofst_8x16b, y_3H_8x16b);
+
+ y_0_16x8b = _mm_packus_epi16(y_0L_8x16b, y_0H_8x16b);
+ y_1_16x8b = _mm_packus_epi16(y_1L_8x16b, y_1H_8x16b);
+ y_2_16x8b = _mm_packus_epi16(y_2L_8x16b, y_2H_8x16b);
+ y_3_16x8b = _mm_packus_epi16(y_3L_8x16b, y_3H_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, y_0_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), y_1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + (dst_strd << 1)), y_2_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd * 3), y_3_16x8b);
+
+ ht -= 4;
+ pu1_src += src_strd << 2;
+ pu1_dst += dst_strd << 2;
+ }
+ while(ht > 0);
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_weighted_bi_pred_luma_sse42 */
+/* */
+/* Description : This function performs the weighted biprediction as */
+/* described in sec 8.4.2.3.2 titled "Weighted sample */
+/* prediction process" for luma. The function gets two */
+/* ht x wd blocks, weights them, adds them, rounds off the */
+/* sum, offsets it, saturates it to unsigned 8-bit and */
+/* stores it in the destination block. (ht,wd) can be */
+/* (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16). */
+/* */
+/* Inputs : pu1_src1 - Pointer to source 1 */
+/* pu1_src2 - Pointer to source 2 */
+/* pu1_dst - Pointer to destination */
+/* src_strd1 - stride for source 1 */
+/* src_strd2 - stride for source 2 */
+/* dst_strd2 - stride for destination */
+/* log_wd - number of bits to be rounded off */
+/* wt1 - weight value for source 1 */
+/* wt2 - weight value for source 2 */
+/* ofst1 - offset value for source 1 */
+/* ofst2 - offset value for source 2 */
+/* ht - height of the block */
+/* wd - width of the block */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 04 02 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_weighted_bi_pred_luma_sse42(UWORD8 *pu1_src1,
+ UWORD8 *pu1_src2,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd1,
+ WORD32 src_strd2,
+ WORD32 dst_strd,
+ WORD32 log_wd,
+ WORD32 wt1,
+ WORD32 wt2,
+ WORD32 ofst1,
+ WORD32 ofst2,
+ WORD32 ht,
+ WORD32 wd)
+{
+ __m128i y1_0_16x8b, y1_1_16x8b;
+ __m128i y2_0_16x8b, y2_1_16x8b;
+
+ __m128i wt1_8x16b, wt2_8x16b;
+ __m128i ofst_8x16b, round_8x16b;
+
+ WORD32 ofst;
+ WORD32 round_val, shft;
+
+ wt1 = (WORD16)(wt1 & 0xffff);
+ wt2 = (WORD16)(wt2 & 0xffff);
+ round_val = 1 << log_wd;
+ shft = log_wd + 1;
+ ofst1 = (WORD8)(ofst1 & 0xff);
+ ofst2 = (WORD8)(ofst2 & 0xff);
+ ofst = (ofst1 + ofst2 + 1) >> 1;
+
+ wt1_8x16b = _mm_set1_epi16(wt1);
+ wt2_8x16b = _mm_set1_epi16(wt2);
+ round_8x16b = _mm_set1_epi16(round_val);
+ ofst_8x16b = _mm_set1_epi16(ofst);
+
+ if(wd == 4)
+ {
+ __m128i y1_2_16x8b, y1_3_16x8b;
+ __m128i y2_2_16x8b, y2_3_16x8b;
+
+ __m128i y1_0_8x16b, y1_2_8x16b;
+ __m128i y2_0_8x16b, y2_2_8x16b;
+
+ __m128i mask_ll4B_16x8b;
+
+ mask_ll4B_16x8b = _mm_set1_epi8(0xff);
+ mask_ll4B_16x8b = _mm_srli_si128(mask_ll4B_16x8b, 12);
+ // mask for first four bytes
+
+ do
+ {
+ y1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1);
+ y1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1));
+ y1_2_16x8b = _mm_loadl_epi64(
+ (__m128i *)(pu1_src1 + (src_strd1 << 1)));
+ y1_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1 * 3));
+
+ y2_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2);
+ y2_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2));
+ y2_2_16x8b = _mm_loadl_epi64(
+ (__m128i *)(pu1_src2 + (src_strd2 << 1)));
+ y2_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2 * 3));
+
+ y1_0_16x8b = _mm_unpacklo_epi32(y1_0_16x8b, y1_1_16x8b);
+ y1_2_16x8b = _mm_unpacklo_epi32(y1_2_16x8b, y1_3_16x8b);
+ y2_0_16x8b = _mm_unpacklo_epi32(y2_0_16x8b, y2_1_16x8b);
+ y2_2_16x8b = _mm_unpacklo_epi32(y2_2_16x8b, y2_3_16x8b);
+
+ y1_0_8x16b = _mm_cvtepu8_epi16(y1_0_16x8b);
+ y1_2_8x16b = _mm_cvtepu8_epi16(y1_2_16x8b);
+ y2_0_8x16b = _mm_cvtepu8_epi16(y2_0_16x8b);
+ y2_2_8x16b = _mm_cvtepu8_epi16(y2_2_16x8b);
+
+ y1_0_8x16b = _mm_mullo_epi16(y1_0_8x16b, wt1_8x16b);
+ y2_0_8x16b = _mm_mullo_epi16(y2_0_8x16b, wt2_8x16b);
+ y1_2_8x16b = _mm_mullo_epi16(y1_2_8x16b, wt1_8x16b);
+ y2_2_8x16b = _mm_mullo_epi16(y2_2_8x16b, wt2_8x16b);
+
+ y1_0_8x16b = _mm_adds_epi16(y1_0_8x16b, y2_0_8x16b);
+ y1_2_8x16b = _mm_adds_epi16(y1_2_8x16b, y2_2_8x16b);
+
+ y1_0_8x16b = _mm_adds_epi16(round_8x16b, y1_0_8x16b);
+ y1_2_8x16b = _mm_adds_epi16(round_8x16b, y1_2_8x16b);
+
+ y1_0_8x16b = _mm_srai_epi16(y1_0_8x16b, shft);
+ y1_2_8x16b = _mm_srai_epi16(y1_2_8x16b, shft);
+
+ y1_0_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0_8x16b);
+ y1_2_8x16b = _mm_adds_epi16(ofst_8x16b, y1_2_8x16b);
+
+ y1_0_16x8b = _mm_packus_epi16(y1_0_8x16b, y1_2_8x16b);
+ y1_1_16x8b = _mm_srli_si128(y1_0_16x8b, 4);
+ y1_2_16x8b = _mm_srli_si128(y1_0_16x8b, 8);
+ y1_3_16x8b = _mm_srli_si128(y1_0_16x8b, 12);
+
+ _mm_maskmoveu_si128(y1_0_16x8b, mask_ll4B_16x8b, (char*)pu1_dst);
+ _mm_maskmoveu_si128(y1_1_16x8b, mask_ll4B_16x8b,
+ (char*)(pu1_dst + dst_strd));
+ _mm_maskmoveu_si128(y1_2_16x8b, mask_ll4B_16x8b,
+ (char*)(pu1_dst + (dst_strd << 1)));
+ _mm_maskmoveu_si128(y1_3_16x8b, mask_ll4B_16x8b,
+ (char*)(pu1_dst + dst_strd * 3));
+
+ ht -= 4;
+ pu1_src1 += src_strd1 << 2;
+ pu1_src2 += src_strd2 << 2;
+ pu1_dst += dst_strd << 2;
+ }
+ while(ht > 0);
+ }
+ else if(wd == 8)
+ {
+ __m128i y1_2_16x8b, y1_3_16x8b;
+ __m128i y2_2_16x8b, y2_3_16x8b;
+
+ __m128i y1_0_8x16b, y1_1_8x16b, y1_2_8x16b, y1_3_8x16b;
+ __m128i y2_0_8x16b, y2_1_8x16b, y2_2_8x16b, y2_3_8x16b;
+
+ do
+ {
+ y1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1);
+ y1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1));
+ y1_2_16x8b = _mm_loadl_epi64(
+ (__m128i *)(pu1_src1 + (src_strd1 << 1)));
+ y1_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1 * 3));
+
+ y2_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2);
+ y2_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2));
+ y2_2_16x8b = _mm_loadl_epi64(
+ (__m128i *)(pu1_src2 + (src_strd2 << 1)));
+ y2_3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2 * 3));
+
+ y1_0_8x16b = _mm_cvtepu8_epi16(y1_0_16x8b);
+ y1_1_8x16b = _mm_cvtepu8_epi16(y1_1_16x8b);
+ y1_2_8x16b = _mm_cvtepu8_epi16(y1_2_16x8b);
+ y1_3_8x16b = _mm_cvtepu8_epi16(y1_3_16x8b);
+
+ y2_0_8x16b = _mm_cvtepu8_epi16(y2_0_16x8b);
+ y2_1_8x16b = _mm_cvtepu8_epi16(y2_1_16x8b);
+ y2_2_8x16b = _mm_cvtepu8_epi16(y2_2_16x8b);
+ y2_3_8x16b = _mm_cvtepu8_epi16(y2_3_16x8b);
+
+ y1_0_8x16b = _mm_mullo_epi16(y1_0_8x16b, wt1_8x16b);
+ y2_0_8x16b = _mm_mullo_epi16(y2_0_8x16b, wt2_8x16b);
+ y1_1_8x16b = _mm_mullo_epi16(y1_1_8x16b, wt1_8x16b);
+ y2_1_8x16b = _mm_mullo_epi16(y2_1_8x16b, wt2_8x16b);
+
+ y1_2_8x16b = _mm_mullo_epi16(y1_2_8x16b, wt1_8x16b);
+ y2_2_8x16b = _mm_mullo_epi16(y2_2_8x16b, wt2_8x16b);
+ y1_3_8x16b = _mm_mullo_epi16(y1_3_8x16b, wt1_8x16b);
+ y2_3_8x16b = _mm_mullo_epi16(y2_3_8x16b, wt2_8x16b);
+
+ y1_0_8x16b = _mm_adds_epi16(y1_0_8x16b, y2_0_8x16b);
+ y1_1_8x16b = _mm_adds_epi16(y1_1_8x16b, y2_1_8x16b);
+ y1_2_8x16b = _mm_adds_epi16(y1_2_8x16b, y2_2_8x16b);
+ y1_3_8x16b = _mm_adds_epi16(y1_3_8x16b, y2_3_8x16b);
+
+ y1_0_8x16b = _mm_adds_epi16(round_8x16b, y1_0_8x16b);
+ y1_1_8x16b = _mm_adds_epi16(round_8x16b, y1_1_8x16b);
+ y1_2_8x16b = _mm_adds_epi16(round_8x16b, y1_2_8x16b);
+ y1_3_8x16b = _mm_adds_epi16(round_8x16b, y1_3_8x16b);
+
+ y1_0_8x16b = _mm_srai_epi16(y1_0_8x16b, shft);
+ y1_1_8x16b = _mm_srai_epi16(y1_1_8x16b, shft);
+ y1_2_8x16b = _mm_srai_epi16(y1_2_8x16b, shft);
+ y1_3_8x16b = _mm_srai_epi16(y1_3_8x16b, shft);
+
+ y1_0_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0_8x16b);
+ y1_1_8x16b = _mm_adds_epi16(ofst_8x16b, y1_1_8x16b);
+ y1_2_8x16b = _mm_adds_epi16(ofst_8x16b, y1_2_8x16b);
+ y1_3_8x16b = _mm_adds_epi16(ofst_8x16b, y1_3_8x16b);
+
+ y1_0_16x8b = _mm_packus_epi16(y1_0_8x16b, y1_1_8x16b);
+ y1_2_16x8b = _mm_packus_epi16(y1_2_8x16b, y1_3_8x16b);
+ y1_1_16x8b = _mm_srli_si128(y1_0_16x8b, 8);
+ y1_3_16x8b = _mm_srli_si128(y1_2_16x8b, 8);
+
+ _mm_storel_epi64((__m128i *)pu1_dst, y1_0_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), y1_1_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd << 1)), y1_2_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd * 3), y1_3_16x8b);
+
+ ht -= 4;
+ pu1_src1 += src_strd1 << 2;
+ pu1_src2 += src_strd2 << 2;
+ pu1_dst += dst_strd << 2;
+ }
+ while(ht > 0);
+ }
+ else // wd == 16
+ {
+ __m128i y1_0L_8x16b, y1_0H_8x16b, y1_1L_8x16b, y1_1H_8x16b;
+ __m128i y2_0L_8x16b, y2_0H_8x16b, y2_1L_8x16b, y2_1H_8x16b;
+
+ __m128i zero_16x8b;
+ zero_16x8b = _mm_set1_epi8(0);
+
+ do
+ {
+ y1_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src1);
+ y1_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1));
+ y2_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src2);
+ y2_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2));
+
+ y1_0L_8x16b = _mm_cvtepu8_epi16(y1_0_16x8b);
+ y1_0H_8x16b = _mm_unpackhi_epi8(y1_0_16x8b, zero_16x8b);
+ y1_1L_8x16b = _mm_cvtepu8_epi16(y1_1_16x8b);
+ y1_1H_8x16b = _mm_unpackhi_epi8(y1_1_16x8b, zero_16x8b);
+
+ y2_0L_8x16b = _mm_cvtepu8_epi16(y2_0_16x8b);
+ y2_0H_8x16b = _mm_unpackhi_epi8(y2_0_16x8b, zero_16x8b);
+ y2_1L_8x16b = _mm_cvtepu8_epi16(y2_1_16x8b);
+ y2_1H_8x16b = _mm_unpackhi_epi8(y2_1_16x8b, zero_16x8b);
+
+ y1_0L_8x16b = _mm_mullo_epi16(y1_0L_8x16b, wt1_8x16b);
+ y1_0H_8x16b = _mm_mullo_epi16(y1_0H_8x16b, wt1_8x16b);
+ y1_1L_8x16b = _mm_mullo_epi16(y1_1L_8x16b, wt1_8x16b);
+ y1_1H_8x16b = _mm_mullo_epi16(y1_1H_8x16b, wt1_8x16b);
+
+ y2_0L_8x16b = _mm_mullo_epi16(y2_0L_8x16b, wt2_8x16b);
+ y2_0H_8x16b = _mm_mullo_epi16(y2_0H_8x16b, wt2_8x16b);
+ y2_1L_8x16b = _mm_mullo_epi16(y2_1L_8x16b, wt2_8x16b);
+ y2_1H_8x16b = _mm_mullo_epi16(y2_1H_8x16b, wt2_8x16b);
+
+ y1_0L_8x16b = _mm_adds_epi16(y1_0L_8x16b, y2_0L_8x16b);
+ y1_0H_8x16b = _mm_adds_epi16(y1_0H_8x16b, y2_0H_8x16b);
+ y1_1L_8x16b = _mm_adds_epi16(y1_1L_8x16b, y2_1L_8x16b);
+ y1_1H_8x16b = _mm_adds_epi16(y1_1H_8x16b, y2_1H_8x16b);
+
+ y1_0L_8x16b = _mm_adds_epi16(round_8x16b, y1_0L_8x16b);
+ y1_0H_8x16b = _mm_adds_epi16(round_8x16b, y1_0H_8x16b);
+ y1_1L_8x16b = _mm_adds_epi16(round_8x16b, y1_1L_8x16b);
+ y1_1H_8x16b = _mm_adds_epi16(round_8x16b, y1_1H_8x16b);
+
+ y1_0L_8x16b = _mm_srai_epi16(y1_0L_8x16b, shft);
+ y1_0H_8x16b = _mm_srai_epi16(y1_0H_8x16b, shft);
+ y1_1L_8x16b = _mm_srai_epi16(y1_1L_8x16b, shft);
+ y1_1H_8x16b = _mm_srai_epi16(y1_1H_8x16b, shft);
+
+ y1_0L_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0L_8x16b);
+ y1_0H_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0H_8x16b);
+ y1_1L_8x16b = _mm_adds_epi16(ofst_8x16b, y1_1L_8x16b);
+ y1_1H_8x16b = _mm_adds_epi16(ofst_8x16b, y1_1H_8x16b);
+
+ y1_0_16x8b = _mm_packus_epi16(y1_0L_8x16b, y1_0H_8x16b);
+ y1_1_16x8b = _mm_packus_epi16(y1_1L_8x16b, y1_1H_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, y1_0_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), y1_1_16x8b);
+
+ ht -= 2;
+ pu1_src1 += src_strd1 << 1;
+ pu1_src2 += src_strd2 << 1;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264_weighted_bi_pred_chroma_sse42 */
+/* */
+/* Description : This function performs the weighted biprediction as */
+/* described in sec 8.4.2.3.2 titled "Weighted sample */
+/* prediction process" for chroma. The function gets two */
+/* ht x wd blocks, weights them, adds them, rounds off the */
+/* sum, offsets it, saturates it to unsigned 8-bit and */
+/* stores it in the destination block. (ht,wd) can be */
+/* (2,2), (4,2), (2,4), (4,4), (8,4), (4,8) or (8,8). */
+/* */
+/* Inputs : pu1_src1 - Pointer to source 1 */
+/* pu1_src2 - Pointer to source 2 */
+/* pu1_dst - Pointer to destination */
+/* src_strd1 - stride for source 1 */
+/* src_strd2 - stride for source 2 */
+/* dst_strd2 - stride for destination */
+/* log_wd - number of bits to be rounded off */
+/* wt1 - weight values for u and v in source 1 */
+/* wt2 - weight values for u and v in source 2 */
+/* ofst1 - offset value for u and v in source 1 */
+/* ofst2 - offset value for u and v in source 2 */
+/* ht - height of the block */
+/* wd - width of the block */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 04 02 2015 Kaushik Initial Version */
+/* Senthoor */
+/* */
+/*****************************************************************************/
+void ih264_weighted_bi_pred_chroma_sse42(UWORD8 *pu1_src1,
+ UWORD8 *pu1_src2,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd1,
+ WORD32 src_strd2,
+ WORD32 dst_strd,
+ WORD32 log_wd,
+ WORD32 wt1,
+ WORD32 wt2,
+ WORD32 ofst1,
+ WORD32 ofst2,
+ WORD32 ht,
+ WORD32 wd)
+{
+ __m128i y1_0_16x8b, y1_1_16x8b;
+ __m128i y2_0_16x8b, y2_1_16x8b;
+
+ __m128i wt1_8x16b, wt2_8x16b;
+ __m128i ofst_8x16b, round_8x16b;
+
+ WORD32 ofst1_u, ofst2_u, ofst_u;
+ WORD32 ofst1_v, ofst2_v, ofst_v;
+ WORD32 round_val, shft, ofst_val;
+
+ round_val = 1 << log_wd;
+ shft = log_wd + 1;
+
+ ofst1_u = (WORD8)(ofst1 & 0xff);
+ ofst1_v = (WORD8)(ofst1 >> 8);
+ ofst2_u = (WORD8)(ofst2 & 0xff);
+ ofst2_v = (WORD8)(ofst2 >> 8);
+
+ wt1_8x16b = _mm_set1_epi32(wt1);
+ wt2_8x16b = _mm_set1_epi32(wt2);
+
+ ofst_u = (ofst1_u + ofst2_u + 1) >> 1;
+ ofst_v = (ofst1_v + ofst2_v + 1) >> 1;
+ ofst_val = (ofst_u & 0xffff) | (ofst_v << 16);
+
+ round_8x16b = _mm_set1_epi16(round_val);
+ ofst_8x16b = _mm_set1_epi32(ofst_val);
+
+ if(wd == 2)
+ {
+ __m128i y1_0_8x16b, y2_0_8x16b;
+
+ __m128i mask_full_16x8b, mask_ll4B_16x8b;
+
+ mask_full_16x8b = _mm_set1_epi8(0xff);
+ mask_ll4B_16x8b = _mm_srli_si128(mask_full_16x8b, 12);
+
+ do
+ {
+ y1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1);
+ y1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1));
+
+ y2_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2);
+ y2_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2));
+
+ y1_0_16x8b = _mm_unpacklo_epi32(y1_0_16x8b, y1_1_16x8b);
+ y2_0_16x8b = _mm_unpacklo_epi32(y2_0_16x8b, y2_1_16x8b);
+
+ y1_0_8x16b = _mm_cvtepu8_epi16(y1_0_16x8b);
+ y2_0_8x16b = _mm_cvtepu8_epi16(y2_0_16x8b);
+
+ y1_0_8x16b = _mm_mullo_epi16(y1_0_8x16b, wt1_8x16b);
+ y2_0_8x16b = _mm_mullo_epi16(y2_0_8x16b, wt2_8x16b);
+
+ y1_0_8x16b = _mm_adds_epi16(y1_0_8x16b, y2_0_8x16b);
+ y1_0_8x16b = _mm_adds_epi16(round_8x16b, y1_0_8x16b);
+
+ y1_0_8x16b = _mm_srai_epi16(y1_0_8x16b, shft);
+ y1_0_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0_8x16b);
+
+ y1_0_16x8b = _mm_packus_epi16(y1_0_8x16b, y1_0_8x16b);
+ y1_1_16x8b = _mm_srli_si128(y1_0_16x8b, 4);
+
+ _mm_maskmoveu_si128(y1_0_16x8b, mask_ll4B_16x8b, (char*)pu1_dst);
+ _mm_maskmoveu_si128(y1_1_16x8b, mask_ll4B_16x8b,
+ (char*)(pu1_dst + dst_strd));
+
+ ht -= 2;
+ pu1_src1 += src_strd1 << 1;
+ pu1_src2 += src_strd2 << 1;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+ else if(wd == 4)
+ {
+ __m128i y1_0_8x16b, y1_1_8x16b;
+ __m128i y2_0_8x16b, y2_1_8x16b;
+
+ do
+ {
+ y1_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src1);
+ y1_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src1 + src_strd1));
+
+ y2_0_16x8b = _mm_loadl_epi64((__m128i *)pu1_src2);
+ y2_1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src2 + src_strd2));
+
+ y1_0_8x16b = _mm_cvtepu8_epi16(y1_0_16x8b);
+ y1_1_8x16b = _mm_cvtepu8_epi16(y1_1_16x8b);
+
+ y2_0_8x16b = _mm_cvtepu8_epi16(y2_0_16x8b);
+ y2_1_8x16b = _mm_cvtepu8_epi16(y2_1_16x8b);
+
+ y1_0_8x16b = _mm_mullo_epi16(y1_0_8x16b, wt1_8x16b);
+ y2_0_8x16b = _mm_mullo_epi16(y2_0_8x16b, wt2_8x16b);
+ y1_1_8x16b = _mm_mullo_epi16(y1_1_8x16b, wt1_8x16b);
+ y2_1_8x16b = _mm_mullo_epi16(y2_1_8x16b, wt2_8x16b);
+
+ y1_0_8x16b = _mm_adds_epi16(y1_0_8x16b, y2_0_8x16b);
+ y1_1_8x16b = _mm_adds_epi16(y1_1_8x16b, y2_1_8x16b);
+
+ y1_0_8x16b = _mm_adds_epi16(round_8x16b, y1_0_8x16b);
+ y1_1_8x16b = _mm_adds_epi16(round_8x16b, y1_1_8x16b);
+
+ y1_0_8x16b = _mm_srai_epi16(y1_0_8x16b, shft);
+ y1_1_8x16b = _mm_srai_epi16(y1_1_8x16b, shft);
+
+ y1_0_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0_8x16b);
+ y1_1_8x16b = _mm_adds_epi16(ofst_8x16b, y1_1_8x16b);
+
+ y1_0_16x8b = _mm_packus_epi16(y1_0_8x16b, y1_1_8x16b);
+ y1_1_16x8b = _mm_srli_si128(y1_0_16x8b, 8);
+
+ _mm_storel_epi64((__m128i *)pu1_dst, y1_0_16x8b);
+ _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), y1_1_16x8b);
+
+ ht -= 2;
+ pu1_src1 += src_strd1 << 1;
+ pu1_src2 += src_strd2 << 1;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+ else // wd == 8
+ {
+ __m128i y1_0L_8x16b, y1_0H_8x16b, y1_1L_8x16b, y1_1H_8x16b;
+ __m128i y2_0L_8x16b, y2_0H_8x16b, y2_1L_8x16b, y2_1H_8x16b;
+
+ __m128i zero_16x8b;
+ zero_16x8b = _mm_set1_epi8(0);
+
+ do
+ {
+ y1_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src1);
+ y1_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src1 + src_strd1));
+ y2_0_16x8b = _mm_loadu_si128((__m128i *)pu1_src2);
+ y2_1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src2 + src_strd2));
+
+ y1_0L_8x16b = _mm_cvtepu8_epi16(y1_0_16x8b);
+ y1_0H_8x16b = _mm_unpackhi_epi8(y1_0_16x8b, zero_16x8b);
+ y1_1L_8x16b = _mm_cvtepu8_epi16(y1_1_16x8b);
+ y1_1H_8x16b = _mm_unpackhi_epi8(y1_1_16x8b, zero_16x8b);
+
+ y2_0L_8x16b = _mm_cvtepu8_epi16(y2_0_16x8b);
+ y2_0H_8x16b = _mm_unpackhi_epi8(y2_0_16x8b, zero_16x8b);
+ y2_1L_8x16b = _mm_cvtepu8_epi16(y2_1_16x8b);
+ y2_1H_8x16b = _mm_unpackhi_epi8(y2_1_16x8b, zero_16x8b);
+
+ y1_0L_8x16b = _mm_mullo_epi16(y1_0L_8x16b, wt1_8x16b);
+ y1_0H_8x16b = _mm_mullo_epi16(y1_0H_8x16b, wt1_8x16b);
+ y1_1L_8x16b = _mm_mullo_epi16(y1_1L_8x16b, wt1_8x16b);
+ y1_1H_8x16b = _mm_mullo_epi16(y1_1H_8x16b, wt1_8x16b);
+
+ y2_0L_8x16b = _mm_mullo_epi16(y2_0L_8x16b, wt2_8x16b);
+ y2_0H_8x16b = _mm_mullo_epi16(y2_0H_8x16b, wt2_8x16b);
+ y2_1L_8x16b = _mm_mullo_epi16(y2_1L_8x16b, wt2_8x16b);
+ y2_1H_8x16b = _mm_mullo_epi16(y2_1H_8x16b, wt2_8x16b);
+
+ y1_0L_8x16b = _mm_adds_epi16(y1_0L_8x16b, y2_0L_8x16b);
+ y1_0H_8x16b = _mm_adds_epi16(y1_0H_8x16b, y2_0H_8x16b);
+ y1_1L_8x16b = _mm_adds_epi16(y1_1L_8x16b, y2_1L_8x16b);
+ y1_1H_8x16b = _mm_adds_epi16(y1_1H_8x16b, y2_1H_8x16b);
+
+ y1_0L_8x16b = _mm_adds_epi16(round_8x16b, y1_0L_8x16b);
+ y1_0H_8x16b = _mm_adds_epi16(round_8x16b, y1_0H_8x16b);
+ y1_1L_8x16b = _mm_adds_epi16(round_8x16b, y1_1L_8x16b);
+ y1_1H_8x16b = _mm_adds_epi16(round_8x16b, y1_1H_8x16b);
+
+ y1_0L_8x16b = _mm_srai_epi16(y1_0L_8x16b, shft);
+ y1_0H_8x16b = _mm_srai_epi16(y1_0H_8x16b, shft);
+ y1_1L_8x16b = _mm_srai_epi16(y1_1L_8x16b, shft);
+ y1_1H_8x16b = _mm_srai_epi16(y1_1H_8x16b, shft);
+
+ y1_0L_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0L_8x16b);
+ y1_0H_8x16b = _mm_adds_epi16(ofst_8x16b, y1_0H_8x16b);
+ y1_1L_8x16b = _mm_adds_epi16(ofst_8x16b, y1_1L_8x16b);
+ y1_1H_8x16b = _mm_adds_epi16(ofst_8x16b, y1_1H_8x16b);
+
+ y1_0_16x8b = _mm_packus_epi16(y1_0L_8x16b, y1_0H_8x16b);
+ y1_1_16x8b = _mm_packus_epi16(y1_1L_8x16b, y1_1H_8x16b);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, y1_0_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), y1_1_16x8b);
+
+ ht -= 2;
+ pu1_src1 += src_strd1 << 1;
+ pu1_src2 += src_strd2 << 1;
+ pu1_dst += dst_strd << 1;
+ }
+ while(ht > 0);
+ }
+}
diff --git a/decoder.arm.mk b/decoder.arm.mk
new file mode 100755
index 0000000..80093e4
--- /dev/null
+++ b/decoder.arm.mk
@@ -0,0 +1,44 @@
+libavcd_inc_dir_arm += $(LOCAL_PATH)/decoder/arm
+libavcd_inc_dir_arm += $(LOCAL_PATH)/common/arm
+
+libavcd_srcs_c_arm += decoder/arm/ih264d_function_selector.c
+libavcd_cflags_arm += -DDISABLE_NEONINTR -DARM -DARMGCC
+
+#LOCAL_ARM_MODE := arm
+
+ifeq ($(ARCH_ARM_HAVE_NEON),true)
+libavcd_srcs_c_arm += decoder/arm/ih264d_function_selector_a9q.c
+
+libavcd_srcs_asm_arm += common/arm/ih264_intra_pred_chroma_a9q.s
+libavcd_srcs_asm_arm += common/arm/ih264_intra_pred_luma_16x16_a9q.s
+libavcd_srcs_asm_arm += common/arm/ih264_intra_pred_luma_4x4_a9q.s
+libavcd_srcs_asm_arm += common/arm/ih264_intra_pred_luma_8x8_a9q.s
+libavcd_srcs_asm_arm += common/arm/ih264_inter_pred_chroma_a9q.s
+libavcd_srcs_asm_arm += common/arm/ih264_inter_pred_filters_luma_horz_a9q.s
+libavcd_srcs_asm_arm += common/arm/ih264_inter_pred_filters_luma_vert_a9q.s
+libavcd_srcs_asm_arm += common/arm/ih264_inter_pred_luma_copy_a9q.s
+libavcd_srcs_asm_arm += common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s
+libavcd_srcs_asm_arm += common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s
+libavcd_srcs_asm_arm += common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s
+libavcd_srcs_asm_arm += common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s
+libavcd_srcs_asm_arm += common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s
+libavcd_srcs_asm_arm += common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s
+libavcd_srcs_asm_arm += common/arm/ih264_default_weighted_pred_a9q.s
+libavcd_srcs_asm_arm += common/arm/ih264_weighted_pred_a9q.s
+libavcd_srcs_asm_arm += common/arm/ih264_weighted_bi_pred_a9q.s
+libavcd_srcs_asm_arm += common/arm/ih264_deblk_chroma_a9.s
+libavcd_srcs_asm_arm += common/arm/ih264_deblk_luma_a9.s
+libavcd_srcs_asm_arm += common/arm/ih264_padding_neon.s
+libavcd_srcs_asm_arm += common/arm/ih264_iquant_itrans_recon_a9.s
+libavcd_srcs_asm_arm += common/arm/ih264_iquant_itrans_recon_dc_a9.s
+libavcd_srcs_asm_arm += common/arm/ih264_ihadamard_scaling_a9.s
+libavcd_srcs_asm_arm += common/arm/ih264_arm_memory_barrier.s
+
+libavcd_cflags_arm += -DDEFAULT_ARCH=D_ARCH_ARM_A9Q
+else
+libavcd_cflags_arm += -DDISABLE_NEON -DDEFAULT_ARCH=D_ARCH_ARM_NONEON
+endif
+
+LOCAL_SRC_FILES_arm += $(libavcd_srcs_c_arm) $(libavcd_srcs_asm_arm)
+LOCAL_C_INCLUDES_arm += $(libavcd_inc_dir_arm)
+LOCAL_CFLAGS_arm += $(libavcd_cflags_arm)
diff --git a/decoder.arm64.mk b/decoder.arm64.mk
new file mode 100755
index 0000000..7a06163
--- /dev/null
+++ b/decoder.arm64.mk
@@ -0,0 +1,46 @@
+libavcd_cflags_arm64 += -DARMV8
+libavcd_cflags_arm64 += -DDISABLE_NEONINTR -DARM -DARMGCC
+
+libavcd_inc_dir_arm64 += $(LOCAL_PATH)/decoder/arm
+libavcd_inc_dir_arm64 += $(LOCAL_PATH)/common/armv8
+
+libavcd_srcs_c_arm64 += decoder/arm/ih264d_function_selector.c
+
+ifeq ($(ARCH_ARM_HAVE_NEON),true)
+libavcd_srcs_c_arm64 += decoder/arm/ih264d_function_selector_av8.c
+
+libavcd_srcs_asm_arm64 += common/armv8/ih264_intra_pred_chroma_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_intra_pred_luma_16x16_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_intra_pred_luma_4x4_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_inter_pred_chroma_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_inter_pred_filters_luma_horz_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_inter_pred_filters_luma_vert_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_inter_pred_luma_copy_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_default_weighted_pred_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_weighted_pred_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_weighted_bi_pred_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_deblk_chroma_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_deblk_luma_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_padding_neon_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_iquant_itrans_recon_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_iquant_itrans_recon_dc_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_ihadamard_scaling_av8.s
+libavcd_srcs_asm_arm64 += common/armv8/ih264_intra_pred_luma_8x8_av8.s
+
+libavcd_cflags_arm += -DDEFAULT_ARCH=D_ARCH_ARMV8_GENERIC
+else
+libavcd_cflags_arm64 += -DDISABLE_NEON -DDEFAULT_ARCH=D_ARCH_ARM_NONEON
+endif
+
+
+
+
+LOCAL_SRC_FILES_arm64 += $(libavcd_srcs_c_arm64) $(libavcd_srcs_asm_arm64)
+LOCAL_C_INCLUDES_arm64 += $(libavcd_inc_dir_arm64)
+LOCAL_CFLAGS_arm64 += $(libavcd_cflags_arm64)
diff --git a/decoder.mips.mk b/decoder.mips.mk
new file mode 100755
index 0000000..3d00395
--- /dev/null
+++ b/decoder.mips.mk
@@ -0,0 +1,6 @@
+libavcd_inc_dir_mips += $(LOCAL_PATH)/common/mips
+
+libavcd_srcs_c_mips += decoder/mips/ih264d_function_selector.c
+
+LOCAL_C_INCLUDES_mips += $(libavcd_inc_dir_mips)
+LOCAL_SRC_FILES_mips += $(libavcd_srcs_c_mips)
diff --git a/decoder.mips64.mk b/decoder.mips64.mk
new file mode 100755
index 0000000..ffcb882
--- /dev/null
+++ b/decoder.mips64.mk
@@ -0,0 +1,6 @@
+libavcd_inc_dir_mips64 += $(LOCAL_PATH)/common/mips
+
+libavcd_srcs_c_mips64 += decoder/mips/ih264d_function_selector.c
+
+LOCAL_C_INCLUDES_mips64 += $(libavcd_inc_dir_mips)
+LOCAL_SRC_FILES_mips64 += $(libavcd_srcs_c_mips)
diff --git a/decoder.mk b/decoder.mk
new file mode 100755
index 0000000..d4fa0be
--- /dev/null
+++ b/decoder.mk
@@ -0,0 +1,76 @@
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+libavcd_source_dir := $(LOCAL_PATH)
+
+## Arch-common settings
+LOCAL_MODULE := libavcdec
+#LOCAL_32_BIT_ONLY := true
+
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+
+LOCAL_CFLAGS += -D_LIB -DMULTICORE -fPIC -UAPPLY_CONCEALMENT -UINSERT_LOGO -DTHREAD_QUAD_CORE
+LOCAL_CFLAGS += -O3 -DANDROID
+
+LOCAL_C_INCLUDES := $(LOCAL_PATH)/decoder $(LOCAL_PATH)/common
+
+libavcd_srcs_c += common/ih264_buf_mgr.c
+libavcd_srcs_c += common/ih264_disp_mgr.c
+libavcd_srcs_c += common/ih264_inter_pred_filters.c
+libavcd_srcs_c += common/ih264_luma_intra_pred_filters.c
+libavcd_srcs_c += common/ih264_chroma_intra_pred_filters.c
+libavcd_srcs_c += common/ih264_padding.c
+libavcd_srcs_c += common/ih264_mem_fns.c
+libavcd_srcs_c += common/ih264_deblk_edge_filters.c
+libavcd_srcs_c += common/ih264_iquant_itrans_recon.c
+libavcd_srcs_c += common/ih264_ihadamard_scaling.c
+libavcd_srcs_c += common/ih264_weighted_pred.c
+
+libavcd_srcs_c += common/ithread.c
+
+libavcd_srcs_c += decoder/ih264d_cabac.c
+libavcd_srcs_c += decoder/ih264d_parse_mb_header.c
+libavcd_srcs_c += decoder/ih264d_parse_cabac.c
+libavcd_srcs_c += decoder/ih264d_process_intra_mb.c
+libavcd_srcs_c += decoder/ih264d_inter_pred.c
+libavcd_srcs_c += decoder/ih264d_parse_bslice.c
+libavcd_srcs_c += decoder/ih264d_parse_pslice.c
+libavcd_srcs_c += decoder/ih264d_parse_islice.c
+libavcd_srcs_c += decoder/ih264d_cabac_init_tables.c
+libavcd_srcs_c += decoder/ih264d_debug.c
+libavcd_srcs_c += decoder/ih264d_bitstrm.c
+libavcd_srcs_c += decoder/ih264d_compute_bs.c
+libavcd_srcs_c += decoder/ih264d_deblocking.c
+libavcd_srcs_c += decoder/ih264d_parse_headers.c
+libavcd_srcs_c += decoder/ih264d_mb_utils.c
+libavcd_srcs_c += decoder/ih264d_mvpred.c
+libavcd_srcs_c += decoder/ih264d_utils.c
+libavcd_srcs_c += decoder/ih264d_process_bslice.c
+libavcd_srcs_c += decoder/ih264d_process_pslice.c
+libavcd_srcs_c += decoder/ih264d_parse_slice.c
+libavcd_srcs_c += decoder/ih264d_quant_scaling.c
+libavcd_srcs_c += decoder/ih264d_parse_cavlc.c
+libavcd_srcs_c += decoder/ih264d_dpb_mgr.c
+libavcd_srcs_c += decoder/ih264d_nal.c
+libavcd_srcs_c += decoder/ih264d_sei.c
+libavcd_srcs_c += decoder/ih264d_tables.c
+libavcd_srcs_c += decoder/ih264d_vui.c
+libavcd_srcs_c += decoder/ih264d_format_conv.c
+libavcd_srcs_c += decoder/ih264d_thread_parse_decode.c
+libavcd_srcs_c += decoder/ih264d_api.c
+libavcd_srcs_c += decoder/ih264d_thread_compute_bs.c
+libavcd_srcs_c += decoder/ih264d_function_selector_generic.c
+
+
+LOCAL_SRC_FILES := $(libavcd_srcs_c) $(libavcd_srcs_asm)
+
+
+# Load the arch-specific settings
+include $(LOCAL_PATH)/decoder.arm.mk
+include $(LOCAL_PATH)/decoder.arm64.mk
+include $(LOCAL_PATH)/decoder.x86.mk
+include $(LOCAL_PATH)/decoder.x86_64.mk
+include $(LOCAL_PATH)/decoder.mips.mk
+include $(LOCAL_PATH)/decoder.mips64.mk
+
+include $(BUILD_STATIC_LIBRARY)
diff --git a/decoder.x86.mk b/decoder.x86.mk
new file mode 100755
index 0000000..309bc23
--- /dev/null
+++ b/decoder.x86.mk
@@ -0,0 +1,26 @@
+libavcd_cflags_x86 += -DX86 -DDISABLE_AVX2 -m32 -msse4.2 -mno-avx -DDEFAULT_ARCH=D_ARCH_X86_SSE42
+
+libavcd_inc_dir_x86 += $(LOCAL_PATH)/decoder/x86
+libavcd_inc_dir_x86 += $(LOCAL_PATH)/common/x86
+
+libavcd_srcs_c_x86 += decoder/x86/ih264d_function_selector.c
+libavcd_srcs_c_x86 += decoder/x86/ih264d_function_selector_sse42.c
+libavcd_srcs_c_x86 += decoder/x86/ih264d_function_selector_ssse3.c
+
+libavcd_srcs_c_x86 += common/x86/ih264_inter_pred_filters_ssse3.c
+libavcd_srcs_c_x86 += common/x86/ih264_deblk_luma_ssse3.c
+libavcd_srcs_c_x86 += common/x86/ih264_deblk_chroma_ssse3.c
+libavcd_srcs_c_x86 += common/x86/ih264_padding_ssse3.c
+libavcd_srcs_c_x86 += common/x86/ih264_mem_fns_ssse3.c
+libavcd_srcs_c_x86 += common/x86/ih264_iquant_itrans_recon_dc_ssse3.c
+libavcd_srcs_c_x86 += common/x86/ih264_iquant_itrans_recon_ssse3.c
+libavcd_srcs_c_x86 += common/x86/ih264_luma_intra_pred_filters_ssse3.c
+libavcd_srcs_c_x86 += common/x86/ih264_chroma_intra_pred_filters_ssse3.c
+libavcd_srcs_c_x86 += common/x86/ih264_iquant_itrans_recon_sse42.c
+libavcd_srcs_c_x86 += common/x86/ih264_weighted_pred_sse42.c
+libavcd_srcs_c_x86 += common/x86/ih264_ihadamard_scaling_sse42.c
+
+LOCAL_SRC_FILES_x86 += $(libavcd_srcs_c_x86) $(libavcd_srcs_asm_x86)
+LOCAL_C_INCLUDES_x86 += $(libavcd_inc_dir_x86)
+LOCAL_CFLAGS_x86 += $(libavcd_cflags_x86)
+
diff --git a/decoder.x86_64.mk b/decoder.x86_64.mk
new file mode 100755
index 0000000..1b018f7
--- /dev/null
+++ b/decoder.x86_64.mk
@@ -0,0 +1,30 @@
+libavcd_cflags_x86_64 += -DX86 -DDISABLE_AVX2 -m64 -msse4.2 -mno-avx -DDEFAULT_ARCH=D_ARCH_X86_SSE42
+libavcd_cflags_x86_64 += -UAPPLY_CONCEALMENT -ULOGO_EN -DTHREAD_QUAD_CORE
+
+libavcd_inc_dir_x86_64 += $(LOCAL_PATH)/decoder/x86
+libavcd_inc_dir_x86_64 += $(LOCAL_PATH)/common/x86
+
+libavcd_srcs_c_x86_64 += decoder/x86/ih264d_function_selector.c
+libavcd_srcs_c_x86_64 += decoder/x86/ih264d_function_selector_sse42.c
+libavcd_srcs_c_x86_64 += decoder/x86/ih264d_function_selector_ssse3.c
+
+libavcd_srcs_c_x86_64 += common/x86/ih264_inter_pred_filters_ssse3.c
+libavcd_srcs_c_x86_64 += common/x86/ih264_deblk_luma_ssse3.c
+libavcd_srcs_c_x86_64 += common/x86/ih264_deblk_chroma_ssse3.c
+libavcd_srcs_c_x86_64 += common/x86/ih264_padding_ssse3.c
+libavcd_srcs_c_x86_64 += common/x86/ih264_mem_fns_ssse3.c
+libavcd_srcs_c_x86_64 += common/x86/ih264_iquant_itrans_recon_dc_ssse3.c
+libavcd_srcs_c_x86_64 += common/x86/ih264_iquant_itrans_recon_ssse3.c
+libavcd_srcs_c_x86_64 += common/x86/ih264_luma_intra_pred_filters_ssse3.c
+libavcd_srcs_c_x86_64 += common/x86/ih264_chroma_intra_pred_filters_ssse3.c
+libavcd_srcs_c_x86_64 += common/x86/ih264_iquant_itrans_recon_sse42.c
+libavcd_srcs_c_x86_64 += common/x86/ih264_weighted_pred_sse42.c
+libavcd_srcs_c_x86_64 += common/x86/ih264_ihadamard_scaling_sse42.c
+
+
+LOCAL_SRC_FILES_x86_64 += $(libavcd_srcs_c_x86_64) $(libavcd_srcs_asm_x86_64)
+LOCAL_C_INCLUDES_x86_64 += $(libavcd_inc_dir_x86_64)
+LOCAL_CFLAGS_x86_64 += $(libavcd_cflags_x86_64)
+
+
+
diff --git a/decoder/arm/ih264d_function_selector.c b/decoder/arm/ih264d_function_selector.c
new file mode 100755
index 0000000..1aa0c43
--- /dev/null
+++ b/decoder/arm/ih264d_function_selector.c
@@ -0,0 +1,101 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevcd_function_selector.c
+*
+* @brief
+* Contains functions to initialize function pointers used in hevc
+*
+* @author
+* Naveen
+*
+* @par List of Functions:
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "ih264_typedefs.h"
+#include "iv.h"
+#include "ivd.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264_error.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+
+#include "ih264d_structs.h"
+#include "ih264d_function_selector.h"
+
+void ih264d_init_function_ptr(dec_struct_t *ps_codec)
+{
+
+ IVD_ARCH_T e_proc_arch = ps_codec->e_processor_arch;
+ ih264d_init_function_ptr_generic(ps_codec);
+ switch(e_proc_arch)
+ {
+ case ARCH_ARM_NONEON:
+ ih264d_init_function_ptr_generic(ps_codec);
+ break;
+#ifndef ARMV8
+ case ARCH_ARM_A5:
+ case ARCH_ARM_A7:
+ case ARCH_ARM_A9:
+ case ARCH_ARM_A15:
+ case ARCH_ARM_A9Q:
+ default:
+ ih264d_init_function_ptr_a9q(ps_codec);
+ break;
+#else /* ARMV8 */
+ case ARCH_ARMV8_GENERIC:
+ default:
+ ih264d_init_function_ptr_av8(ps_codec);
+ break;
+#endif /* ARMV8 */
+ }
+}
+
+void ih264d_init_arch(dec_struct_t *ps_codec)
+{
+#ifdef DEFAULT_ARCH
+#if DEFAULT_ARCH == D_ARCH_ARM_NONEON
+ ps_codec->e_processor_arch = ARCH_ARM_NONEON;
+#elif DEFAULT_ARCH == D_ARCH_ARMV8_GENERIC
+ ps_codec->e_processor_arch = ARCH_ARMV8_GENERIC;
+#elif DEFAULT_ARCH == D_ARCH_ARM_NEONINTR
+ ps_codec->e_processor_arch = ARCH_ARM_NEONINTR;
+#else
+ ps_codec->e_processor_arch = ARCH_ARM_A9Q;
+#endif
+#else
+ ps_codec->e_processor_arch = ARCH_ARM_A9Q;
+#endif
+
+}
diff --git a/decoder/arm/ih264d_function_selector_a9q.c b/decoder/arm/ih264d_function_selector_a9q.c
new file mode 100755
index 0000000..0cf8581
--- /dev/null
+++ b/decoder/arm/ih264d_function_selector_a9q.c
@@ -0,0 +1,200 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_function_selector_a9q.c
+*
+* @brief
+* Contains functions to initialize function pointers of codec context
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ih264e_init_function_ptr_a9q
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv.h"
+#include "ivd.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264_error.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+
+#include "ih264d_structs.h"
+
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264d_init_function_ptr_a9q(dec_struct_t *ps_codec)
+{
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 16x16 */
+ ps_codec->apf_intra_pred_luma_16x16[0] = ih264_intra_pred_luma_16x16_mode_vert_a9q;
+ ps_codec->apf_intra_pred_luma_16x16[1] = ih264_intra_pred_luma_16x16_mode_horz_a9q;
+ ps_codec->apf_intra_pred_luma_16x16[2] = ih264_intra_pred_luma_16x16_mode_dc_a9q;
+ ps_codec->apf_intra_pred_luma_16x16[3] = ih264_intra_pred_luma_16x16_mode_plane_a9q;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 4x4 */
+ ps_codec->apf_intra_pred_luma_4x4[0] = ih264_intra_pred_luma_4x4_mode_vert_a9q;
+ ps_codec->apf_intra_pred_luma_4x4[1] = ih264_intra_pred_luma_4x4_mode_horz_a9q;
+ ps_codec->apf_intra_pred_luma_4x4[2] = ih264_intra_pred_luma_4x4_mode_dc_a9q;
+ ps_codec->apf_intra_pred_luma_4x4[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_a9q;
+ ps_codec->apf_intra_pred_luma_4x4[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_a9q;
+ ps_codec->apf_intra_pred_luma_4x4[5] = ih264_intra_pred_luma_4x4_mode_vert_r_a9q;
+ ps_codec->apf_intra_pred_luma_4x4[6] = ih264_intra_pred_luma_4x4_mode_horz_d_a9q;
+ ps_codec->apf_intra_pred_luma_4x4[7] = ih264_intra_pred_luma_4x4_mode_vert_l_a9q;
+ ps_codec->apf_intra_pred_luma_4x4[8] = ih264_intra_pred_luma_4x4_mode_horz_u_a9q;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_luma_8x8[0] = ih264_intra_pred_luma_8x8_mode_vert_a9q;
+ ps_codec->apf_intra_pred_luma_8x8[1] = ih264_intra_pred_luma_8x8_mode_horz_a9q;
+ ps_codec->apf_intra_pred_luma_8x8[2] = ih264_intra_pred_luma_8x8_mode_dc_a9q;
+ ps_codec->apf_intra_pred_luma_8x8[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_a9q;
+ ps_codec->apf_intra_pred_luma_8x8[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_a9q;
+ ps_codec->apf_intra_pred_luma_8x8[5] = ih264_intra_pred_luma_8x8_mode_vert_r_a9q;
+ ps_codec->apf_intra_pred_luma_8x8[6] = ih264_intra_pred_luma_8x8_mode_horz_d_a9q;
+ ps_codec->apf_intra_pred_luma_8x8[7] = ih264_intra_pred_luma_8x8_mode_vert_l_a9q;
+ ps_codec->apf_intra_pred_luma_8x8[8] = ih264_intra_pred_luma_8x8_mode_horz_u_a9q;
+
+ /* ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q does not handle all availibilities */
+ ps_codec->pf_intra_pred_ref_filtering = ih264_intra_pred_luma_8x8_mode_ref_filtering;
+
+ /* Init function pointers for intra pred leaf level functions chroma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_chroma[0] = ih264_intra_pred_chroma_8x8_mode_vert_a9q;
+ ps_codec->apf_intra_pred_chroma[1] = ih264_intra_pred_chroma_8x8_mode_horz_a9q;
+ /* ih264_intra_pred_chroma_8x8_mode_dc_a9q does not support interlaced clips, hence using C */
+ ps_codec->apf_intra_pred_chroma[2] = ih264_intra_pred_chroma_8x8_mode_dc;
+ ps_codec->apf_intra_pred_chroma[3] = ih264_intra_pred_chroma_8x8_mode_plane_a9q;
+
+
+ ps_codec->pf_default_weighted_pred_luma = ih264_default_weighted_pred_luma_a9q;
+ ps_codec->pf_default_weighted_pred_chroma = ih264_default_weighted_pred_chroma_a9q;
+ ps_codec->pf_weighted_pred_luma = ih264_weighted_pred_luma_a9q;
+ ps_codec->pf_weighted_pred_chroma = ih264_weighted_pred_chroma_a9q;
+ ps_codec->pf_weighted_bi_pred_luma = ih264_weighted_bi_pred_luma_a9q;
+ ps_codec->pf_weighted_bi_pred_chroma = ih264_weighted_bi_pred_chroma_a9q;
+
+ /* Padding Functions */
+ ps_codec->pf_pad_top = ih264_pad_top_a9q;
+ ps_codec->pf_pad_bottom = ih264_pad_bottom;
+
+ ps_codec->pf_pad_left_luma = ih264_pad_left_luma_a9q;
+ ps_codec->pf_pad_right_luma = ih264_pad_right_luma_a9q;
+ ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_a9q;
+ ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_a9q;
+
+ ps_codec->pf_iquant_itrans_recon_luma_4x4 = ih264_iquant_itrans_recon_4x4_a9;
+ ps_codec->pf_iquant_itrans_recon_luma_4x4_dc = ih264_iquant_itrans_recon_4x4_dc_a9;
+ ps_codec->pf_iquant_itrans_recon_luma_8x8 = ih264_iquant_itrans_recon_8x8_a9;
+ ps_codec->pf_iquant_itrans_recon_luma_8x8_dc = ih264_iquant_itrans_recon_8x8_dc_a9;
+ ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_a9;
+
+
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4_a9;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_a9;
+
+ /* Init fn ptr luma deblocking */
+ ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_a9;
+ ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_a9;
+ ps_codec->pf_deblk_luma_vert_bs4_mbaff = ih264_deblk_luma_vert_bs4_mbaff_a9;
+ ps_codec->pf_deblk_luma_vert_bslt4_mbaff = ih264_deblk_luma_vert_bslt4_mbaff_a9;
+
+ ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_a9;
+ ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_a9;
+
+ /* Init fn ptr chroma deblocking */
+ ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_a9;
+ ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_a9;
+ ps_codec->pf_deblk_chroma_vert_bs4_mbaff = ih264_deblk_chroma_vert_bs4_mbaff_a9;
+ ps_codec->pf_deblk_chroma_vert_bslt4_mbaff = ih264_deblk_chroma_vert_bslt4_mbaff_a9;
+
+ ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_a9;
+ ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_a9;
+
+
+ /* Inter pred leaf level functions */
+ ps_codec->apf_inter_pred_luma[0] = ih264_inter_pred_luma_copy_a9q;
+
+ ps_codec->apf_inter_pred_luma[1] = ih264_inter_pred_luma_horz_qpel_a9q;
+ ps_codec->apf_inter_pred_luma[2] = ih264_inter_pred_luma_horz_a9q;
+ ps_codec->apf_inter_pred_luma[3] = ih264_inter_pred_luma_horz_qpel_a9q;
+ ps_codec->apf_inter_pred_luma[4] = ih264_inter_pred_luma_vert_qpel_a9q;
+
+ ps_codec->apf_inter_pred_luma[5] = ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q;
+
+ ps_codec->apf_inter_pred_luma[6] = ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q;
+
+ ps_codec->apf_inter_pred_luma[7] = ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q;
+
+ ps_codec->apf_inter_pred_luma[8] = ih264_inter_pred_luma_vert_a9q;
+ ps_codec->apf_inter_pred_luma[9] = ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q;
+ ps_codec->apf_inter_pred_luma[10] = ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q;
+ ps_codec->apf_inter_pred_luma[11] = ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q;
+ ps_codec->apf_inter_pred_luma[12] = ih264_inter_pred_luma_vert_qpel_a9q;
+ ps_codec->apf_inter_pred_luma[13] = ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q;
+ ps_codec->apf_inter_pred_luma[14] = ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q;
+ ps_codec->apf_inter_pred_luma[15] = ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q;
+
+ ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_a9q;
+
+
+ return;
+}
diff --git a/decoder/arm/ih264d_function_selector_av8.c b/decoder/arm/ih264d_function_selector_av8.c
new file mode 100755
index 0000000..5715ee0
--- /dev/null
+++ b/decoder/arm/ih264d_function_selector_av8.c
@@ -0,0 +1,191 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_function_selector_av8.c
+*
+* @brief
+* Contains functions to initialize function pointers of codec context
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ih264e_init_function_ptr_av8
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv.h"
+#include "ivd.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264_error.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+
+#include "ih264d_structs.h"
+#include "ih264d_function_selector.h"
+
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264d_init_function_ptr_av8(dec_struct_t *ps_codec)
+{
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 16x16 */
+ ps_codec->apf_intra_pred_luma_16x16[0] = ih264_intra_pred_luma_16x16_mode_vert_av8;
+ ps_codec->apf_intra_pred_luma_16x16[1] = ih264_intra_pred_luma_16x16_mode_horz_av8;
+ ps_codec->apf_intra_pred_luma_16x16[2] = ih264_intra_pred_luma_16x16_mode_dc_av8;
+ ps_codec->apf_intra_pred_luma_16x16[3] = ih264_intra_pred_luma_16x16_mode_plane_av8;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 4x4 */
+ ps_codec->apf_intra_pred_luma_4x4[0] = ih264_intra_pred_luma_4x4_mode_vert_av8;
+ ps_codec->apf_intra_pred_luma_4x4[1] = ih264_intra_pred_luma_4x4_mode_horz_av8;
+ ps_codec->apf_intra_pred_luma_4x4[2] = ih264_intra_pred_luma_4x4_mode_dc_av8;
+ ps_codec->apf_intra_pred_luma_4x4[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_av8;
+ ps_codec->apf_intra_pred_luma_4x4[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_av8;
+ ps_codec->apf_intra_pred_luma_4x4[5] = ih264_intra_pred_luma_4x4_mode_vert_r_av8;
+ ps_codec->apf_intra_pred_luma_4x4[6] = ih264_intra_pred_luma_4x4_mode_horz_d_av8;
+ ps_codec->apf_intra_pred_luma_4x4[7] = ih264_intra_pred_luma_4x4_mode_vert_l_av8;
+ ps_codec->apf_intra_pred_luma_4x4[8] = ih264_intra_pred_luma_4x4_mode_horz_u_av8;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_luma_8x8[0] = ih264_intra_pred_luma_8x8_mode_vert_av8;
+ ps_codec->apf_intra_pred_luma_8x8[1] = ih264_intra_pred_luma_8x8_mode_horz_av8;
+ ps_codec->apf_intra_pred_luma_8x8[2] = ih264_intra_pred_luma_8x8_mode_dc_av8;
+ ps_codec->apf_intra_pred_luma_8x8[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_av8;
+ ps_codec->apf_intra_pred_luma_8x8[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_av8;
+ ps_codec->apf_intra_pred_luma_8x8[5] = ih264_intra_pred_luma_8x8_mode_vert_r_av8;
+ ps_codec->apf_intra_pred_luma_8x8[6] = ih264_intra_pred_luma_8x8_mode_horz_d_av8;
+ ps_codec->apf_intra_pred_luma_8x8[7] = ih264_intra_pred_luma_8x8_mode_vert_l_av8;
+ ps_codec->apf_intra_pred_luma_8x8[8] = ih264_intra_pred_luma_8x8_mode_horz_u_av8;
+
+ ps_codec->pf_intra_pred_ref_filtering = ih264_intra_pred_luma_8x8_mode_ref_filtering;
+
+ /* Init function pointers for intra pred leaf level functions chroma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_chroma[0] = ih264_intra_pred_chroma_8x8_mode_vert_av8;
+ ps_codec->apf_intra_pred_chroma[1] = ih264_intra_pred_chroma_8x8_mode_horz_av8;
+ /* ih264_intra_pred_chroma_8x8_mode_dc_av8 does not support interlaced clips, hence using C */
+ ps_codec->apf_intra_pred_chroma[2] = ih264_intra_pred_chroma_8x8_mode_dc;
+ ps_codec->apf_intra_pred_chroma[3] = ih264_intra_pred_chroma_8x8_mode_plane_av8;
+
+ ps_codec->pf_default_weighted_pred_luma = ih264_default_weighted_pred_luma_av8;
+ ps_codec->pf_default_weighted_pred_chroma = ih264_default_weighted_pred_chroma_av8;
+ ps_codec->pf_weighted_pred_luma = ih264_weighted_pred_luma_av8;
+ ps_codec->pf_weighted_pred_chroma = ih264_weighted_pred_chroma_av8;
+ ps_codec->pf_weighted_bi_pred_luma = ih264_weighted_bi_pred_luma_av8;
+ ps_codec->pf_weighted_bi_pred_chroma = ih264_weighted_bi_pred_chroma_av8;
+
+ /* Padding Functions */
+ ps_codec->pf_pad_top = ih264_pad_top_av8;
+ ps_codec->pf_pad_bottom = ih264_pad_bottom;
+ ps_codec->pf_pad_left_luma = ih264_pad_left_luma_av8;
+ ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_av8;
+ ps_codec->pf_pad_right_luma = ih264_pad_right_luma_av8;
+ ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_av8;
+
+
+ ps_codec->pf_iquant_itrans_recon_luma_4x4 = ih264_iquant_itrans_recon_4x4_av8;
+ ps_codec->pf_iquant_itrans_recon_luma_4x4_dc = ih264_iquant_itrans_recon_4x4_dc_av8;
+ ps_codec->pf_iquant_itrans_recon_luma_8x8 = ih264_iquant_itrans_recon_8x8_av8;
+ ps_codec->pf_iquant_itrans_recon_luma_8x8_dc = ih264_iquant_itrans_recon_8x8_dc_av8;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4_av8;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_av8;
+ ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_av8;
+
+
+ /* Init fn ptr luma deblocking */
+ ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_av8;
+ ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_av8;
+ ps_codec->pf_deblk_luma_vert_bs4_mbaff = ih264_deblk_luma_vert_bs4_mbaff;
+ ps_codec->pf_deblk_luma_vert_bslt4_mbaff = ih264_deblk_luma_vert_bslt4_mbaff;
+
+ ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_av8;
+ ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_av8;
+
+ /* Init fn ptr chroma deblocking */
+ ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_av8;
+ ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_av8;
+ ps_codec->pf_deblk_chroma_vert_bs4_mbaff = ih264_deblk_chroma_vert_bs4_mbaff;
+ ps_codec->pf_deblk_chroma_vert_bslt4_mbaff = ih264_deblk_chroma_vert_bslt4_mbaff;
+
+ ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_av8;
+ ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_av8;
+
+ /* Inter pred leaf level functions */
+ ps_codec->apf_inter_pred_luma[0] = ih264_inter_pred_luma_copy_av8;
+ ps_codec->apf_inter_pred_luma[1] = ih264_inter_pred_luma_horz_qpel_av8;
+ ps_codec->apf_inter_pred_luma[2] = ih264_inter_pred_luma_horz_av8;
+ ps_codec->apf_inter_pred_luma[3] = ih264_inter_pred_luma_horz_qpel_av8;
+ ps_codec->apf_inter_pred_luma[4] = ih264_inter_pred_luma_vert_qpel_av8;
+ ps_codec->apf_inter_pred_luma[5] = ih264_inter_pred_luma_horz_qpel_vert_qpel_av8;
+ ps_codec->apf_inter_pred_luma[6] = ih264_inter_pred_luma_horz_hpel_vert_qpel_av8;
+ ps_codec->apf_inter_pred_luma[7] = ih264_inter_pred_luma_horz_qpel_vert_qpel_av8;
+ ps_codec->apf_inter_pred_luma[8] = ih264_inter_pred_luma_vert_av8;
+ ps_codec->apf_inter_pred_luma[9] = ih264_inter_pred_luma_horz_qpel_vert_hpel_av8;
+ ps_codec->apf_inter_pred_luma[10] = ih264_inter_pred_luma_horz_hpel_vert_hpel_av8;
+ ps_codec->apf_inter_pred_luma[11] = ih264_inter_pred_luma_horz_qpel_vert_hpel_av8;
+ ps_codec->apf_inter_pred_luma[12] = ih264_inter_pred_luma_vert_qpel_av8;
+ ps_codec->apf_inter_pred_luma[13] = ih264_inter_pred_luma_horz_qpel_vert_qpel_av8;
+ ps_codec->apf_inter_pred_luma[14] = ih264_inter_pred_luma_horz_hpel_vert_qpel_av8;
+ ps_codec->apf_inter_pred_luma[15] = ih264_inter_pred_luma_horz_qpel_vert_qpel_av8;
+
+ ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_av8;
+
+
+ return;
+}
diff --git a/decoder/ih264d.h b/decoder/ih264d.h
new file mode 100755
index 0000000..f89e576
--- /dev/null
+++ b/decoder/ih264d.h
@@ -0,0 +1,482 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* */
+/* File Name : ih264d.h */
+/* */
+/* Description : This file contains all the necessary structure and */
+/* enumeration definitions needed for the Application */
+/* Program Interface(API) of the Ittiam H264 ASP */
+/* Decoder on Cortex A8 - Neon platform */
+/* */
+/* List of Functions : ih264d_api_function */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 26 08 2010 100239(RCY) Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef _IH264D_H_
+#define _IH264D_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "iv.h"
+#include "ivd.h"
+
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+#define IS_IVD_CONCEALMENT_APPLIED(x) (x & (1 << IVD_APPLIEDCONCEALMENT))
+#define IS_IVD_INSUFFICIENTDATA_ERROR(x) (x & (1 << IVD_INSUFFICIENTDATA))
+#define IS_IVD_CORRUPTEDDATA_ERROR(x) (x & (1 << IVD_CORRUPTEDDATA))
+#define IS_IVD_CORRUPTEDHEADER_ERROR(x) (x & (1 << IVD_CORRUPTEDHEADER))
+#define IS_IVD_UNSUPPORTEDINPUT_ERROR(x) (x & (1 << IVD_UNSUPPORTEDINPUT))
+#define IS_IVD_UNSUPPORTEDPARAM_ERROR(x) (x & (1 << IVD_UNSUPPORTEDPARAM))
+#define IS_IVD_FATAL_ERROR(x) (x & (1 << IVD_FATALERROR))
+#define IS_IVD_INVALID_BITSTREAM_ERROR(x) (x & (1 << IVD_INVALID_BITSTREAM))
+#define IS_IVD_INCOMPLETE_BITSTREAM_ERROR(x) (x & (1 << IVD_INCOMPLETE_BITSTREAM))
+
+
+/*****************************************************************************/
+/* API Function Prototype */
+/*****************************************************************************/
+IV_API_CALL_STATUS_T ih264d_api_function(iv_obj_t *ps_handle, void *pv_api_ip,void *pv_api_op);
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+/* Codec Error codes for H264 ASP Decoder */
+
+typedef enum {
+
+ IH264D_VID_HDR_DEC_NUM_FRM_BUF_NOT_SUFFICIENT = IVD_DUMMY_ELEMENT_FOR_CODEC_EXTENSIONS + 1,
+
+}IH264D_ERROR_CODES_T;
+
+/*****************************************************************************/
+/* Extended Structures */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Get Number of Memory Records */
+/*****************************************************************************/
+
+
+typedef struct {
+ iv_num_mem_rec_ip_t s_ivd_num_mem_rec_ip_t;
+}ih264d_num_mem_rec_ip_t;
+
+
+typedef struct{
+ iv_num_mem_rec_op_t s_ivd_num_mem_rec_op_t;
+}ih264d_num_mem_rec_op_t;
+
+
+/*****************************************************************************/
+/* Fill Memory Records */
+/*****************************************************************************/
+
+
+typedef struct {
+ iv_fill_mem_rec_ip_t s_ivd_fill_mem_rec_ip_t;
+ WORD32 i4_level;
+ UWORD32 u4_num_reorder_frames;
+ UWORD32 u4_num_ref_frames;
+ UWORD32 u4_share_disp_buf;
+
+ /* format in which codec has to give out frame data for display */
+ IV_COLOR_FORMAT_T e_output_format;
+
+ /* Number of extra display buffers that will be allocated to handle display pipeline depth */
+ UWORD32 u4_num_extra_disp_buf;
+
+}ih264d_fill_mem_rec_ip_t;
+
+
+typedef struct{
+ iv_fill_mem_rec_op_t s_ivd_fill_mem_rec_op_t;
+
+}ih264d_fill_mem_rec_op_t;
+
+/*****************************************************************************/
+/* Retrieve Memory Records */
+/*****************************************************************************/
+
+
+typedef struct {
+ iv_retrieve_mem_rec_ip_t s_ivd_retrieve_mem_rec_ip_t;
+}ih264d_retrieve_mem_rec_ip_t;
+
+
+typedef struct{
+ iv_retrieve_mem_rec_op_t s_ivd_retrieve_mem_rec_op_t;
+}ih264d_retrieve_mem_rec_op_t;
+
+
+/*****************************************************************************/
+/* Initialize decoder */
+/*****************************************************************************/
+
+
+typedef struct {
+ ivd_init_ip_t s_ivd_init_ip_t;
+ WORD32 i4_level;
+ UWORD32 u4_num_reorder_frames;
+ UWORD32 u4_num_ref_frames;
+ UWORD32 u4_share_disp_buf;
+ /* Number of extra display buffers that will be allocated to handle display pipeline depth */
+ UWORD32 u4_num_extra_disp_buf;
+
+}ih264d_init_ip_t;
+
+
+typedef struct{
+ ivd_init_op_t s_ivd_init_op_t;
+}ih264d_init_op_t;
+
+
+/*****************************************************************************/
+/* Video Decode */
+/*****************************************************************************/
+
+
+typedef struct {
+ ivd_video_decode_ip_t s_ivd_video_decode_ip_t;
+}ih264d_video_decode_ip_t;
+
+
+typedef struct{
+ ivd_video_decode_op_t s_ivd_video_decode_op_t;
+}ih264d_video_decode_op_t;
+
+
+/*****************************************************************************/
+/* Get Display Frame */
+/*****************************************************************************/
+
+
+typedef struct
+{
+ ivd_get_display_frame_ip_t s_ivd_get_display_frame_ip_t;
+}ih264d_get_display_frame_ip_t;
+
+
+typedef struct
+{
+ ivd_get_display_frame_op_t s_ivd_get_display_frame_op_t;
+}ih264d_get_display_frame_op_t;
+
+/*****************************************************************************/
+/* Set Display Frame */
+/*****************************************************************************/
+
+
+typedef struct
+{
+ ivd_set_display_frame_ip_t s_ivd_set_display_frame_ip_t;
+}ih264d_set_display_frame_ip_t;
+
+
+typedef struct
+{
+ ivd_set_display_frame_op_t s_ivd_set_display_frame_op_t;
+}ih264d_set_display_frame_op_t;
+
+/*****************************************************************************/
+/* Release Display Buffers */
+/*****************************************************************************/
+
+
+typedef struct
+{
+ ivd_rel_display_frame_ip_t s_ivd_rel_display_frame_ip_t;
+}ih264d_rel_display_frame_ip_t;
+
+
+typedef struct
+{
+ ivd_rel_display_frame_op_t s_ivd_rel_display_frame_op_t;
+}ih264d_rel_display_frame_op_t;
+
+
+typedef enum {
+ /** Set number of cores/threads to be used */
+ IH264D_CMD_CTL_SET_NUM_CORES = IVD_CMD_CTL_CODEC_SUBCMD_START,
+
+ /** Set processor details */
+ IH264D_CMD_CTL_SET_PROCESSOR = IVD_CMD_CTL_CODEC_SUBCMD_START + 0x001,
+
+ /** Get display buffer dimensions */
+ IH264D_CMD_CTL_GET_BUFFER_DIMENSIONS = IVD_CMD_CTL_CODEC_SUBCMD_START + 0x100,
+
+ /** Get VUI parameters */
+ IH264D_CMD_CTL_GET_VUI_PARAMS = IVD_CMD_CTL_CODEC_SUBCMD_START + 0x101,
+
+ /** Enable/disable GPU, supported on select platforms */
+ IH264D_CMD_CTL_GPU_ENABLE_DISABLE = IVD_CMD_CTL_CODEC_SUBCMD_START + 0x200,
+
+ /** Set degrade level */
+ IH264D_CMD_CTL_DEGRADE = IVD_CMD_CTL_CODEC_SUBCMD_START + 0x300
+}IH264D_CMD_CTL_SUB_CMDS;
+/*****************************************************************************/
+/* Video control Flush */
+/*****************************************************************************/
+
+
+typedef struct{
+ ivd_ctl_flush_ip_t s_ivd_ctl_flush_ip_t;
+}ih264d_ctl_flush_ip_t;
+
+
+typedef struct{
+ ivd_ctl_flush_op_t s_ivd_ctl_flush_op_t;
+}ih264d_ctl_flush_op_t;
+
+/*****************************************************************************/
+/* Video control reset */
+/*****************************************************************************/
+
+
+typedef struct{
+ ivd_ctl_reset_ip_t s_ivd_ctl_reset_ip_t;
+}ih264d_ctl_reset_ip_t;
+
+
+typedef struct{
+ ivd_ctl_reset_op_t s_ivd_ctl_reset_op_t;
+}ih264d_ctl_reset_op_t;
+
+
+/*****************************************************************************/
+/* Video control Set Params */
+/*****************************************************************************/
+
+
+typedef struct {
+ ivd_ctl_set_config_ip_t s_ivd_ctl_set_config_ip_t;
+}ih264d_ctl_set_config_ip_t;
+
+
+typedef struct{
+ ivd_ctl_set_config_op_t s_ivd_ctl_set_config_op_t;
+}ih264d_ctl_set_config_op_t;
+
+/*****************************************************************************/
+/* Video control:Get Buf Info */
+/*****************************************************************************/
+
+
+typedef struct{
+ ivd_ctl_getbufinfo_ip_t s_ivd_ctl_getbufinfo_ip_t;
+}ih264d_ctl_getbufinfo_ip_t;
+
+
+
+typedef struct{
+ ivd_ctl_getbufinfo_op_t s_ivd_ctl_getbufinfo_op_t;
+}ih264d_ctl_getbufinfo_op_t;
+
+
+/*****************************************************************************/
+/* Video control:Getstatus Call */
+/*****************************************************************************/
+
+
+typedef struct{
+ ivd_ctl_getstatus_ip_t s_ivd_ctl_getstatus_ip_t;
+}ih264d_ctl_getstatus_ip_t;
+
+
+
+typedef struct{
+ ivd_ctl_getstatus_op_t s_ivd_ctl_getstatus_op_t;
+}ih264d_ctl_getstatus_op_t;
+
+
+/*****************************************************************************/
+/* Video control:Get Version Info */
+/*****************************************************************************/
+
+
+typedef struct{
+ ivd_ctl_getversioninfo_ip_t s_ivd_ctl_getversioninfo_ip_t;
+}ih264d_ctl_getversioninfo_ip_t;
+
+
+
+typedef struct{
+ ivd_ctl_getversioninfo_op_t s_ivd_ctl_getversioninfo_op_t;
+}ih264d_ctl_getversioninfo_op_t;
+
+typedef struct{
+
+ /**
+ * u4_size
+ */
+ UWORD32 u4_size;
+
+ /**
+ * cmd
+ */
+ IVD_API_COMMAND_TYPE_T e_cmd;
+
+ /**
+ * sub_cmd
+ */
+ IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /**
+ * Pictures that are are degraded
+ * 0 : No degrade
+ * 1 : Only on non-reference frames
+ * 2 : Use interval specified by u4_nondegrade_interval
+ * 3 : All non-key frames
+ * 4 : All frames
+ */
+ WORD32 i4_degrade_pics;
+
+ /**
+ * Interval for pictures which are completely decoded without any degradation
+ */
+ WORD32 i4_nondegrade_interval;
+
+ /**
+ * bit position (lsb is zero): Type of degradation
+ * 1 : Disable deblocking
+ * 2 : Faster inter prediction filters
+ * 3 : Fastest inter prediction filters
+ */
+ WORD32 i4_degrade_type;
+
+}ih264d_ctl_degrade_ip_t;
+
+typedef struct
+{
+ /**
+ * u4_size
+ */
+ UWORD32 u4_size;
+
+ /**
+ * error_code
+ */
+ UWORD32 u4_error_code;
+}ih264d_ctl_degrade_op_t;
+
+typedef struct{
+ UWORD32 u4_size;
+ IVD_API_COMMAND_TYPE_T e_cmd;
+ IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+ UWORD32 u4_disable_deblk_level;
+}ih264d_ctl_disable_deblock_ip_t;
+
+typedef struct{
+ UWORD32 u4_size;
+ UWORD32 u4_error_code;
+}ih264d_ctl_disable_deblock_op_t;
+
+
+typedef struct{
+ UWORD32 u4_size;
+ IVD_API_COMMAND_TYPE_T e_cmd;
+ IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+ UWORD32 u4_num_cores;
+}ih264d_ctl_set_num_cores_ip_t;
+
+typedef struct{
+ UWORD32 u4_size;
+ UWORD32 u4_error_code;
+}ih264d_ctl_set_num_cores_op_t;
+
+typedef struct
+{
+ /**
+ * i4_size
+ */
+ UWORD32 u4_size;
+ /**
+ * cmd
+ */
+ IVD_API_COMMAND_TYPE_T e_cmd;
+ /**
+ * sub cmd
+ */
+ IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+ /**
+ * Processor type
+ */
+ UWORD32 u4_arch;
+ /**
+ * SOC type
+ */
+ UWORD32 u4_soc;
+
+ /**
+ * num_cores
+ */
+ UWORD32 u4_num_cores;
+
+}ih264d_ctl_set_processor_ip_t;
+
+typedef struct
+{
+ /**
+ * i4_size
+ */
+ UWORD32 u4_size;
+ /**
+ * error_code
+ */
+ UWORD32 u4_error_code;
+}ih264d_ctl_set_processor_op_t;
+
+typedef struct{
+ UWORD32 u4_size;
+ IVD_API_COMMAND_TYPE_T e_cmd;
+ IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+}ih264d_ctl_get_frame_dimensions_ip_t;
+
+
+typedef struct{
+ UWORD32 u4_size;
+ UWORD32 u4_error_code;
+ UWORD32 u4_x_offset[3];
+ UWORD32 u4_y_offset[3];
+ UWORD32 u4_disp_wd[3];
+ UWORD32 u4_disp_ht[3];
+ UWORD32 u4_buffer_wd[3];
+ UWORD32 u4_buffer_ht[3];
+}ih264d_ctl_get_frame_dimensions_op_t;
+
+#ifdef __cplusplus
+} /* closing brace for extern "C" */
+#endif
+#endif /* _IH264D_H_ */
diff --git a/decoder/ih264d_api.c b/decoder/ih264d_api.c
new file mode 100755
index 0000000..67ef5bb
--- /dev/null
+++ b/decoder/ih264d_api.c
@@ -0,0 +1,4680 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : ih264d_api.c */
+/* */
+/* Description : Has all API related functions */
+/* */
+/* */
+/* List of Functions : api_check_struct_sanity */
+/* ih264d_set_processor */
+/* ih264d_get_num_rec */
+/* ih264d_init_decoder */
+/* ih264d_init_video_decoder */
+/* ih264d_fill_num_mem_rec */
+/* ih264d_clr */
+/* ih264d_init */
+/* ih264d_map_error */
+/* ih264d_video_decode */
+/* ih264d_get_version */
+/* ih264d_get_display_frame */
+/* ih264d_set_display_frame */
+/* ih264d_set_flush_mode */
+/* ih264d_get_status */
+/* ih264d_get_buf_info */
+/* ih264d_set_params */
+/* ih264d_set_default_params */
+/* ih264d_reset */
+/* ih264d_ctl */
+/* ih264d_rel_display_frame */
+/* ih264d_set_degrade */
+/* ih264d_get_frame_dimensions */
+/* ih264d_set_num_cores */
+/* ih264d_fill_output_struct_from_context */
+/* ih264d_api_function */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 14 10 2008 100356(SKV) Draft */
+/* */
+/*****************************************************************************/
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_tables.h"
+#include "iv.h"
+#include "ivd.h"
+#include "ih264d.h"
+#include "ih264d_defs.h"
+
+#include <string.h>
+#include <limits.h>
+#include <stddef.h>
+
+#include "ih264d_inter_pred.h"
+
+#include "ih264d_structs.h"
+#include "ih264d_nal.h"
+#include "ih264d_error_handler.h"
+
+#include "ih264d_defs.h"
+
+#include "ithread.h"
+#include "ih264d_parse_slice.h"
+#include "ih264d_function_selector.h"
+#include "ih264_error.h"
+#include "ih264_disp_mgr.h"
+#include "ih264_buf_mgr.h"
+#include "ih264d_deblocking.h"
+#include "ih264d_parse_cavlc.h"
+#include "ih264d_parse_cabac.h"
+#include "ih264d_utils.h"
+#include "ih264d_format_conv.h"
+#include "ih264d_parse_headers.h"
+#include <assert.h>
+
+
+/*********************/
+/* Codec Versioning */
+/*********************/
+//Move this to where it is used
+#define CODEC_NAME "H264VDEC"
+#define CODEC_RELEASE_TYPE "production"
+#define CODEC_RELEASE_VER "04.00"
+#define CODEC_VENDOR "ITTIAM"
+#define MAXVERSION_STRLEN 511
+#define VERSION(version_string, codec_name, codec_release_type, codec_release_ver, codec_vendor) \
+ strncpy(version_string,"@(#)Id:", MAXVERSION_STRLEN); \
+ strncat(version_string,codec_name, MAXVERSION_STRLEN); \
+ strncat(version_string,"_", MAXVERSION_STRLEN); \
+ strncat(version_string,codec_release_type, MAXVERSION_STRLEN); \
+ strncat(version_string," Ver:", MAXVERSION_STRLEN); \
+ strncat(version_string,codec_release_ver, MAXVERSION_STRLEN); \
+ strncat(version_string," Released by ", MAXVERSION_STRLEN); \
+ strncat(version_string,codec_vendor, MAXVERSION_STRLEN); \
+ strncat(version_string," Build: ", MAXVERSION_STRLEN); \
+ strncat(version_string,__DATE__, MAXVERSION_STRLEN); \
+ strncat(version_string," @ ", MAXVERSION_STRLEN); \
+ strncat(version_string,__TIME__, MAXVERSION_STRLEN);
+
+#define MAX_NAL_UNIT_SIZE MAX((H264_MAX_FRAME_HEIGHT * H264_MAX_FRAME_HEIGHT),MIN_NALUNIT_SIZE)
+#define MIN_NALUNIT_SIZE 200000
+#define FMT_CONV_NUM_ROWS 4
+
+#define MIN_IN_BUFS 1
+#define MIN_OUT_BUFS_420 3
+#define MIN_OUT_BUFS_422ILE 1
+#define MIN_OUT_BUFS_RGB565 1
+#define MIN_OUT_BUFS_420SP 2
+#define MIN_IN_BUF_SIZE (2*1024*1024) // Currently, i4_size set to 500kb, CHECK LATER
+
+#define NUM_FRAMES_LIMIT_ENABLED 0
+
+#if NUM_FRAMES_LIMIT_ENABLED
+#define NUM_FRAMES_LIMIT 10000
+#else
+#define NUM_FRAMES_LIMIT 0x7FFFFFFF
+#endif
+
+
+UWORD32 ih264d_get_extra_mem_external(UWORD32 width, UWORD32 height);
+WORD32 ih264d_get_frame_dimensions(iv_obj_t *dec_hdl,
+ void *pv_api_ip,
+ void *pv_api_op);
+WORD32 ih264d_set_num_cores(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op);
+
+WORD32 ih264d_deblock_display(dec_struct_t *ps_dec);
+
+void ih264d_signal_decode_thread(dec_struct_t *ps_dec);
+
+void ih264d_signal_bs_deblk_thread(dec_struct_t *ps_dec);
+void ih264d_decode_picture_thread(dec_struct_t *ps_dec);
+
+WORD32 ih264d_set_degrade(iv_obj_t *ps_codec_obj,
+ void *pv_api_ip,
+ void *pv_api_op);
+
+void ih264d_fill_output_struct_from_context(dec_struct_t *ps_dec,
+ ivd_video_decode_op_t *ps_dec_op);
+
+static IV_API_CALL_STATUS_T api_check_struct_sanity(iv_obj_t *ps_handle,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+ IVD_API_COMMAND_TYPE_T e_cmd;
+ UWORD32 *pu4_api_ip;
+ UWORD32 *pu4_api_op;
+ UWORD32 i, j;
+
+ if(NULL == pv_api_op)
+ return (IV_FAIL);
+
+ if(NULL == pv_api_ip)
+ return (IV_FAIL);
+
+ pu4_api_ip = (UWORD32 *)pv_api_ip;
+ pu4_api_op = (UWORD32 *)pv_api_op;
+ e_cmd = *(pu4_api_ip + 1);
+
+ /* error checks on handle */
+ switch((WORD32)e_cmd)
+ {
+ case IV_CMD_GET_NUM_MEM_REC:
+ case IV_CMD_FILL_NUM_MEM_REC:
+ break;
+ case IV_CMD_INIT:
+ if(ps_handle == NULL)
+ {
+ *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVD_HANDLE_NULL;
+ return IV_FAIL;
+ }
+
+ if(ps_handle->u4_size != sizeof(iv_obj_t))
+ {
+ *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVD_HANDLE_STRUCT_SIZE_INCORRECT;
+ H264_DEC_DEBUG_PRINT(
+ "Sizes do not match. Expected: %d, Got: %d",
+ sizeof(iv_obj_t), ps_handle->u4_size);
+ return IV_FAIL;
+ }
+ break;
+ case IVD_CMD_REL_DISPLAY_FRAME:
+ case IVD_CMD_SET_DISPLAY_FRAME:
+ case IVD_CMD_GET_DISPLAY_FRAME:
+ case IVD_CMD_VIDEO_DECODE:
+ case IV_CMD_RETRIEVE_MEMREC:
+ case IVD_CMD_VIDEO_CTL:
+ if(ps_handle == NULL)
+ {
+ *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVD_HANDLE_NULL;
+ return IV_FAIL;
+ }
+
+ if(ps_handle->u4_size != sizeof(iv_obj_t))
+ {
+ *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVD_HANDLE_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if(ps_handle->pv_fxns != ih264d_api_function)
+ {
+ *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVD_INVALID_HANDLE_NULL;
+ return IV_FAIL;
+ }
+
+ if(ps_handle->pv_codec_handle == NULL)
+ {
+ *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVD_INVALID_HANDLE_NULL;
+ return IV_FAIL;
+ }
+ break;
+ default:
+ *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVD_INVALID_API_CMD;
+ return IV_FAIL;
+ }
+
+ switch((WORD32)e_cmd)
+ {
+ case IV_CMD_GET_NUM_MEM_REC:
+ {
+ ih264d_num_mem_rec_ip_t *ps_ip =
+ (ih264d_num_mem_rec_ip_t *)pv_api_ip;
+ ih264d_num_mem_rec_op_t *ps_op =
+ (ih264d_num_mem_rec_op_t *)pv_api_op;
+ ps_op->s_ivd_num_mem_rec_op_t.u4_error_code = 0;
+
+ if(ps_ip->s_ivd_num_mem_rec_ip_t.u4_size
+ != sizeof(ih264d_num_mem_rec_ip_t))
+ {
+ ps_op->s_ivd_num_mem_rec_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_num_mem_rec_op_t.u4_error_code |=
+ IVD_IP_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if(ps_op->s_ivd_num_mem_rec_op_t.u4_size
+ != sizeof(ih264d_num_mem_rec_op_t))
+ {
+ ps_op->s_ivd_num_mem_rec_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_num_mem_rec_op_t.u4_error_code |=
+ IVD_OP_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+ }
+ break;
+ case IV_CMD_FILL_NUM_MEM_REC:
+ {
+ ih264d_fill_mem_rec_ip_t *ps_ip =
+ (ih264d_fill_mem_rec_ip_t *)pv_api_ip;
+ ih264d_fill_mem_rec_op_t *ps_op =
+ (ih264d_fill_mem_rec_op_t *)pv_api_op;
+ iv_mem_rec_t *ps_mem_rec;
+ WORD32 max_wd = ps_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_wd;
+ WORD32 max_ht = ps_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_ht;
+
+ max_wd = ((max_wd + 15) >> 4) << 4;
+ max_ht = ((max_ht + 15) >> 4) << 4;
+
+ ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code = 0;
+
+ if((ps_ip->s_ivd_fill_mem_rec_ip_t.u4_size
+ > sizeof(ih264d_fill_mem_rec_ip_t))
+ || (ps_ip->s_ivd_fill_mem_rec_ip_t.u4_size
+ < sizeof(iv_fill_mem_rec_ip_t)))
+ {
+ ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |=
+ IVD_IP_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if((ps_op->s_ivd_fill_mem_rec_op_t.u4_size
+ != sizeof(ih264d_fill_mem_rec_op_t))
+ && (ps_op->s_ivd_fill_mem_rec_op_t.u4_size
+ != sizeof(iv_fill_mem_rec_op_t)))
+ {
+ ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |=
+ IVD_OP_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if(max_wd < H264_MIN_FRAME_WIDTH)
+ {
+ ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |=
+ IVD_REQUESTED_WIDTH_NOT_SUPPPORTED;
+ return (IV_FAIL);
+ }
+
+ if(max_wd > H264_MAX_FRAME_WIDTH)
+ {
+ ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |=
+ IVD_REQUESTED_WIDTH_NOT_SUPPPORTED;
+ return (IV_FAIL);
+ }
+
+ if(max_ht < H264_MIN_FRAME_HEIGHT)
+ {
+ ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |=
+ IVD_REQUESTED_HEIGHT_NOT_SUPPPORTED;
+ return (IV_FAIL);
+ }
+
+ if((max_ht * max_wd)
+ > (H264_MAX_FRAME_HEIGHT * H264_MAX_FRAME_WIDTH))
+
+ {
+ ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |=
+ IVD_REQUESTED_HEIGHT_NOT_SUPPPORTED;
+ return (IV_FAIL);
+ }
+
+ if(NULL == ps_ip->s_ivd_fill_mem_rec_ip_t.pv_mem_rec_location)
+ {
+ ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |=
+ IVD_NUM_REC_NOT_SUFFICIENT;
+ return (IV_FAIL);
+ }
+
+ /* check memrecords sizes are correct */
+ ps_mem_rec = ps_ip->s_ivd_fill_mem_rec_ip_t.pv_mem_rec_location;
+ for(i = 0; i < MEM_REC_CNT; i++)
+ {
+ if(ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t))
+ {
+ ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |=
+ IVD_MEM_REC_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ }
+ }
+ break;
+
+ case IV_CMD_INIT:
+ {
+ ih264d_init_ip_t *ps_ip = (ih264d_init_ip_t *)pv_api_ip;
+ ih264d_init_op_t *ps_op = (ih264d_init_op_t *)pv_api_op;
+ iv_mem_rec_t *ps_mem_rec;
+ WORD32 max_wd = ps_ip->s_ivd_init_ip_t.u4_frm_max_wd;
+ WORD32 max_ht = ps_ip->s_ivd_init_ip_t.u4_frm_max_ht;
+
+ max_wd = ((max_wd + 15) >> 4) << 4;
+ max_ht = ((max_ht + 15) >> 4) << 4;
+
+ ps_op->s_ivd_init_op_t.u4_error_code = 0;
+
+ if((ps_ip->s_ivd_init_ip_t.u4_size > sizeof(ih264d_init_ip_t))
+ || (ps_ip->s_ivd_init_ip_t.u4_size
+ < sizeof(ivd_init_ip_t)))
+ {
+ ps_op->s_ivd_init_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_init_op_t.u4_error_code |=
+ IVD_IP_API_STRUCT_SIZE_INCORRECT;
+ H264_DEC_DEBUG_PRINT("\n");
+ return (IV_FAIL);
+ }
+
+ if((ps_op->s_ivd_init_op_t.u4_size != sizeof(ih264d_init_op_t))
+ && (ps_op->s_ivd_init_op_t.u4_size
+ != sizeof(ivd_init_op_t)))
+ {
+ ps_op->s_ivd_init_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_init_op_t.u4_error_code |=
+ IVD_OP_API_STRUCT_SIZE_INCORRECT;
+ H264_DEC_DEBUG_PRINT("\n");
+ return (IV_FAIL);
+ }
+
+ if(ps_ip->s_ivd_init_ip_t.u4_num_mem_rec != MEM_REC_CNT)
+ {
+ ps_op->s_ivd_init_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_init_op_t.u4_error_code |=
+ IVD_INIT_DEC_NOT_SUFFICIENT;
+ H264_DEC_DEBUG_PRINT("\n");
+ return (IV_FAIL);
+ }
+
+ if(max_wd < H264_MIN_FRAME_WIDTH)
+ {
+ ps_op->s_ivd_init_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_init_op_t.u4_error_code |=
+ IVD_INIT_DEC_WIDTH_NOT_SUPPPORTED;
+ H264_DEC_DEBUG_PRINT("\n");
+ return (IV_FAIL);
+ }
+
+ if(max_wd > H264_MAX_FRAME_WIDTH)
+ {
+ ps_op->s_ivd_init_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_init_op_t.u4_error_code |=
+ IVD_INIT_DEC_WIDTH_NOT_SUPPPORTED;
+ H264_DEC_DEBUG_PRINT("\n");
+ return (IV_FAIL);
+ }
+
+ if(max_ht < H264_MIN_FRAME_HEIGHT)
+ {
+ ps_op->s_ivd_init_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_init_op_t.u4_error_code |=
+ IVD_INIT_DEC_HEIGHT_NOT_SUPPPORTED;
+ H264_DEC_DEBUG_PRINT("\n");
+ return (IV_FAIL);
+ }
+
+ if((max_ht * max_wd)
+ > (H264_MAX_FRAME_HEIGHT * H264_MAX_FRAME_WIDTH))
+
+ {
+ ps_op->s_ivd_init_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_init_op_t.u4_error_code |=
+ IVD_INIT_DEC_HEIGHT_NOT_SUPPPORTED;
+ H264_DEC_DEBUG_PRINT("\n");
+ return (IV_FAIL);
+ }
+
+ if(NULL == ps_ip->s_ivd_init_ip_t.pv_mem_rec_location)
+ {
+ ps_op->s_ivd_init_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_init_op_t.u4_error_code |=
+ IVD_NUM_REC_NOT_SUFFICIENT;
+ H264_DEC_DEBUG_PRINT("\n");
+ return (IV_FAIL);
+ }
+
+ if((ps_ip->s_ivd_init_ip_t.e_output_format != IV_YUV_420P)
+ && (ps_ip->s_ivd_init_ip_t.e_output_format
+ != IV_YUV_422ILE)
+ && (ps_ip->s_ivd_init_ip_t.e_output_format
+ != IV_RGB_565)
+ && (ps_ip->s_ivd_init_ip_t.e_output_format
+ != IV_YUV_420SP_UV)
+ && (ps_ip->s_ivd_init_ip_t.e_output_format
+ != IV_YUV_420SP_VU))
+ {
+ ps_op->s_ivd_init_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_init_op_t.u4_error_code |=
+ IVD_INIT_DEC_COL_FMT_NOT_SUPPORTED;
+ H264_DEC_DEBUG_PRINT("\n");
+ return (IV_FAIL);
+ }
+
+ /* verify number of mem records */
+ if(ps_ip->s_ivd_init_ip_t.u4_num_mem_rec < MEM_REC_CNT)
+ {
+ ps_op->s_ivd_init_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_init_op_t.u4_error_code |=
+ IVD_INIT_DEC_MEM_REC_NOT_SUFFICIENT;
+ H264_DEC_DEBUG_PRINT("\n");
+ return IV_FAIL;
+ }
+
+ ps_mem_rec = ps_ip->s_ivd_init_ip_t.pv_mem_rec_location;
+ /* check memrecords sizes are correct */
+ for(i = 0; i < ps_ip->s_ivd_init_ip_t.u4_num_mem_rec; i++)
+ {
+ if(ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t))
+ {
+ ps_op->s_ivd_init_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_init_op_t.u4_error_code |=
+ IVD_MEM_REC_STRUCT_SIZE_INCORRECT;
+ H264_DEC_DEBUG_PRINT("i: %d\n", i);
+ return IV_FAIL;
+ }
+ /* check memrecords pointers are not NULL */
+
+ if(ps_mem_rec[i].pv_base == NULL)
+ {
+
+ ps_op->s_ivd_init_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_init_op_t.u4_error_code |=
+ IVD_INIT_DEC_MEM_REC_BASE_NULL;
+ H264_DEC_DEBUG_PRINT("i: %d\n", i);
+ return IV_FAIL;
+
+ }
+
+ }
+
+ /* verify memtabs for overlapping regions */
+ {
+ void *start[MEM_REC_CNT];
+ void *end[MEM_REC_CNT];
+
+ start[0] = (void *)(ps_mem_rec[0].pv_base);
+ end[0] = (void *)((UWORD8 *)ps_mem_rec[0].pv_base
+ + ps_mem_rec[0].u4_mem_size - 1);
+ for(i = 1; i < MEM_REC_CNT; i++)
+ {
+ /* This array is populated to check memtab overlapp */
+ start[i] = (void *)(ps_mem_rec[i].pv_base);
+ end[i] = (void *)((UWORD8 *)ps_mem_rec[i].pv_base
+ + ps_mem_rec[i].u4_mem_size - 1);
+
+ for(j = 0; j < i; j++)
+ {
+ if((start[i] >= start[j]) && (start[i] <= end[j]))
+ {
+ ps_op->s_ivd_init_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_init_op_t.u4_error_code |=
+ IVD_INIT_DEC_MEM_REC_OVERLAP_ERR;
+ H264_DEC_DEBUG_PRINT("i: %d, j: %d\n", i, j);
+ return IV_FAIL;
+ }
+
+ if((end[i] >= start[j]) && (end[i] <= end[j]))
+ {
+ ps_op->s_ivd_init_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_init_op_t.u4_error_code |=
+ IVD_INIT_DEC_MEM_REC_OVERLAP_ERR;
+ H264_DEC_DEBUG_PRINT("i: %d, j: %d\n", i, j);
+ return IV_FAIL;
+ }
+
+ if((start[i] < start[j]) && (end[i] > end[j]))
+ {
+ ps_op->s_ivd_init_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_init_op_t.u4_error_code |=
+ IVD_INIT_DEC_MEM_REC_OVERLAP_ERR;
+ H264_DEC_DEBUG_PRINT("i: %d, j: %d\n", i, j);
+ return IV_FAIL;
+ }
+ }
+
+ }
+ }
+
+ {
+ iv_mem_rec_t mem_rec_ittiam_api[MEM_REC_CNT];
+ ih264d_fill_mem_rec_ip_t s_fill_mem_rec_ip;
+ ih264d_fill_mem_rec_op_t s_fill_mem_rec_op;
+ IV_API_CALL_STATUS_T e_status;
+
+ UWORD32 i;
+ s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.e_cmd =
+ IV_CMD_FILL_NUM_MEM_REC;
+ s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.pv_mem_rec_location =
+ mem_rec_ittiam_api;
+ s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_max_frm_wd =
+ max_wd;
+ s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_max_frm_ht =
+ max_ht;
+
+ if(ps_ip->s_ivd_init_ip_t.u4_size
+ > offsetof(ih264d_init_ip_t, i4_level))
+ {
+ s_fill_mem_rec_ip.i4_level = ps_ip->i4_level;
+ }
+ else
+ {
+ s_fill_mem_rec_ip.i4_level = H264_LEVEL_3_1;
+ }
+
+ if(ps_ip->s_ivd_init_ip_t.u4_size
+ > offsetof(ih264d_init_ip_t, u4_num_ref_frames))
+ {
+ s_fill_mem_rec_ip.u4_num_ref_frames =
+ ps_ip->u4_num_ref_frames;
+ }
+ else
+ {
+ s_fill_mem_rec_ip.u4_num_ref_frames =
+ (H264_MAX_REF_PICS + 1);
+ }
+
+ if(ps_ip->s_ivd_init_ip_t.u4_size
+ > offsetof(ih264d_init_ip_t,
+ u4_num_reorder_frames))
+ {
+ s_fill_mem_rec_ip.u4_num_reorder_frames =
+ ps_ip->u4_num_reorder_frames;
+ }
+ else
+ {
+ s_fill_mem_rec_ip.u4_num_reorder_frames = (H264_MAX_REF_PICS
+ + 1);
+ }
+
+ if(ps_ip->s_ivd_init_ip_t.u4_size
+ > offsetof(ih264d_init_ip_t,
+ u4_num_extra_disp_buf))
+ {
+ s_fill_mem_rec_ip.u4_num_extra_disp_buf =
+ ps_ip->u4_num_extra_disp_buf;
+ }
+ else
+ {
+ s_fill_mem_rec_ip.u4_num_extra_disp_buf = 0;
+ }
+
+ if(ps_ip->s_ivd_init_ip_t.u4_size
+ > offsetof(ih264d_init_ip_t, u4_share_disp_buf))
+ {
+#ifndef LOGO_EN
+ s_fill_mem_rec_ip.u4_share_disp_buf =
+ ps_ip->u4_share_disp_buf;
+#else
+ s_fill_mem_rec_ip.u4_share_disp_buf = 0;
+#endif
+ }
+ else
+ {
+ s_fill_mem_rec_ip.u4_share_disp_buf = 0;
+ }
+
+ s_fill_mem_rec_ip.e_output_format =
+ ps_ip->s_ivd_init_ip_t.e_output_format;
+
+ if((s_fill_mem_rec_ip.e_output_format != IV_YUV_420P)
+ && (s_fill_mem_rec_ip.e_output_format
+ != IV_YUV_420SP_UV)
+ && (s_fill_mem_rec_ip.e_output_format
+ != IV_YUV_420SP_VU))
+ {
+ s_fill_mem_rec_ip.u4_share_disp_buf = 0;
+ }
+
+ s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_size =
+ sizeof(ih264d_fill_mem_rec_ip_t);
+ s_fill_mem_rec_op.s_ivd_fill_mem_rec_op_t.u4_size =
+ sizeof(ih264d_fill_mem_rec_op_t);
+
+ for(i = 0; i < MEM_REC_CNT; i++)
+ mem_rec_ittiam_api[i].u4_size = sizeof(iv_mem_rec_t);
+
+ e_status = ih264d_api_function(NULL,
+ (void *)&s_fill_mem_rec_ip,
+ (void *)&s_fill_mem_rec_op);
+ if(IV_FAIL == e_status)
+ {
+ ps_op->s_ivd_init_op_t.u4_error_code =
+ s_fill_mem_rec_op.s_ivd_fill_mem_rec_op_t.u4_error_code;
+ H264_DEC_DEBUG_PRINT("Fail\n");
+ return (IV_FAIL);
+ }
+
+ for(i = 0; i < MEM_REC_CNT; i++)
+ {
+ if(ps_mem_rec[i].u4_mem_size
+ < mem_rec_ittiam_api[i].u4_mem_size)
+ {
+ ps_op->s_ivd_init_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_init_op_t.u4_error_code |=
+ IVD_INIT_DEC_MEM_REC_INSUFFICIENT_SIZE;
+ H264_DEC_DEBUG_PRINT("i: %d \n", i);
+ return IV_FAIL;
+ }
+ if(ps_mem_rec[i].u4_mem_alignment
+ != mem_rec_ittiam_api[i].u4_mem_alignment)
+ {
+ ps_op->s_ivd_init_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_init_op_t.u4_error_code |=
+ IVD_INIT_DEC_MEM_REC_ALIGNMENT_ERR;
+ H264_DEC_DEBUG_PRINT("i: %d \n", i);
+ return IV_FAIL;
+ }
+ if(ps_mem_rec[i].e_mem_type
+ != mem_rec_ittiam_api[i].e_mem_type)
+ {
+ UWORD32 check = IV_SUCCESS;
+ UWORD32 diff = mem_rec_ittiam_api[i].e_mem_type
+ - ps_mem_rec[i].e_mem_type;
+
+ if((ps_mem_rec[i].e_mem_type
+ <= IV_EXTERNAL_CACHEABLE_SCRATCH_MEM)
+ && (mem_rec_ittiam_api[i].e_mem_type
+ >= IV_INTERNAL_NONCACHEABLE_PERSISTENT_MEM))
+ {
+ check = IV_FAIL;
+ }
+ if(3 != MOD(mem_rec_ittiam_api[i].e_mem_type, 4))
+ {
+ /*
+ * It is not IV_EXTERNAL_NONCACHEABLE_PERSISTENT_MEM or IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM
+ */
+ if((diff < 1) || (diff > 3))
+ {
+ // Difference between 1 and 3 is okay for all cases other than the two filtered
+ // with the MOD condition above
+ check = IV_FAIL;
+ }
+ }
+ else
+ {
+ if(diff == 1)
+ {
+ /*
+ * This particular case is when codec asked for External Persistent, but got
+ * Internal Scratch.
+ */
+ check = IV_FAIL;
+ }
+ if((diff != 2) && (diff != 3))
+ {
+ check = IV_FAIL;
+ }
+ }
+ if(check == IV_FAIL)
+ {
+ ps_op->s_ivd_init_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_init_op_t.u4_error_code |=
+ IVD_INIT_DEC_MEM_REC_INCORRECT_TYPE;
+ H264_DEC_DEBUG_PRINT("i: %d \n", i);
+ return IV_FAIL;
+ }
+ }
+ }
+ }
+
+ }
+ break;
+
+ case IVD_CMD_GET_DISPLAY_FRAME:
+ {
+ ih264d_get_display_frame_ip_t *ps_ip =
+ (ih264d_get_display_frame_ip_t *)pv_api_ip;
+ ih264d_get_display_frame_op_t *ps_op =
+ (ih264d_get_display_frame_op_t *)pv_api_op;
+
+ ps_op->s_ivd_get_display_frame_op_t.u4_error_code = 0;
+
+ if((ps_ip->s_ivd_get_display_frame_ip_t.u4_size
+ != sizeof(ih264d_get_display_frame_ip_t))
+ && (ps_ip->s_ivd_get_display_frame_ip_t.u4_size
+ != sizeof(ivd_get_display_frame_ip_t)))
+ {
+ ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_get_display_frame_op_t.u4_error_code |=
+ IVD_IP_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if((ps_op->s_ivd_get_display_frame_op_t.u4_size
+ != sizeof(ih264d_get_display_frame_op_t))
+ && (ps_op->s_ivd_get_display_frame_op_t.u4_size
+ != sizeof(ivd_get_display_frame_op_t)))
+ {
+ ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_get_display_frame_op_t.u4_error_code |=
+ IVD_OP_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+ }
+ break;
+
+ case IVD_CMD_REL_DISPLAY_FRAME:
+ {
+ ih264d_rel_display_frame_ip_t *ps_ip =
+ (ih264d_rel_display_frame_ip_t *)pv_api_ip;
+ ih264d_rel_display_frame_op_t *ps_op =
+ (ih264d_rel_display_frame_op_t *)pv_api_op;
+
+ ps_op->s_ivd_rel_display_frame_op_t.u4_error_code = 0;
+
+ if((ps_ip->s_ivd_rel_display_frame_ip_t.u4_size
+ != sizeof(ih264d_rel_display_frame_ip_t))
+ && (ps_ip->s_ivd_rel_display_frame_ip_t.u4_size
+ != sizeof(ivd_rel_display_frame_ip_t)))
+ {
+ ps_op->s_ivd_rel_display_frame_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_rel_display_frame_op_t.u4_error_code |=
+ IVD_IP_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if((ps_op->s_ivd_rel_display_frame_op_t.u4_size
+ != sizeof(ih264d_rel_display_frame_op_t))
+ && (ps_op->s_ivd_rel_display_frame_op_t.u4_size
+ != sizeof(ivd_rel_display_frame_op_t)))
+ {
+ ps_op->s_ivd_rel_display_frame_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_rel_display_frame_op_t.u4_error_code |=
+ IVD_OP_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ }
+ break;
+
+ case IVD_CMD_SET_DISPLAY_FRAME:
+ {
+ ih264d_set_display_frame_ip_t *ps_ip =
+ (ih264d_set_display_frame_ip_t *)pv_api_ip;
+ ih264d_set_display_frame_op_t *ps_op =
+ (ih264d_set_display_frame_op_t *)pv_api_op;
+ UWORD32 j;
+
+ ps_op->s_ivd_set_display_frame_op_t.u4_error_code = 0;
+
+ if((ps_ip->s_ivd_set_display_frame_ip_t.u4_size
+ != sizeof(ih264d_set_display_frame_ip_t))
+ && (ps_ip->s_ivd_set_display_frame_ip_t.u4_size
+ != sizeof(ivd_set_display_frame_ip_t)))
+ {
+ ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_set_display_frame_op_t.u4_error_code |=
+ IVD_IP_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if((ps_op->s_ivd_set_display_frame_op_t.u4_size
+ != sizeof(ih264d_set_display_frame_op_t))
+ && (ps_op->s_ivd_set_display_frame_op_t.u4_size
+ != sizeof(ivd_set_display_frame_op_t)))
+ {
+ ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_set_display_frame_op_t.u4_error_code |=
+ IVD_OP_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if(ps_ip->s_ivd_set_display_frame_ip_t.num_disp_bufs == 0)
+ {
+ ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_set_display_frame_op_t.u4_error_code |=
+ IVD_DISP_FRM_ZERO_OP_BUFS;
+ return IV_FAIL;
+ }
+
+ for(j = 0; j < ps_ip->s_ivd_set_display_frame_ip_t.num_disp_bufs;
+ j++)
+ {
+ if(ps_ip->s_ivd_set_display_frame_ip_t.s_disp_buffer[j].u4_num_bufs
+ == 0)
+ {
+ ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_set_display_frame_op_t.u4_error_code |=
+ IVD_DISP_FRM_ZERO_OP_BUFS;
+ return IV_FAIL;
+ }
+
+ for(i = 0;
+ i
+ < ps_ip->s_ivd_set_display_frame_ip_t.s_disp_buffer[j].u4_num_bufs;
+ i++)
+ {
+ if(ps_ip->s_ivd_set_display_frame_ip_t.s_disp_buffer[j].pu1_bufs[i]
+ == NULL)
+ {
+ ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_set_display_frame_op_t.u4_error_code |=
+ IVD_DISP_FRM_OP_BUF_NULL;
+ return IV_FAIL;
+ }
+
+ if(ps_ip->s_ivd_set_display_frame_ip_t.s_disp_buffer[j].u4_min_out_buf_size[i]
+ == 0)
+ {
+ ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_set_display_frame_op_t.u4_error_code |=
+ IVD_DISP_FRM_ZERO_OP_BUF_SIZE;
+ return IV_FAIL;
+ }
+ }
+ }
+ }
+ break;
+
+ case IVD_CMD_VIDEO_DECODE:
+ {
+ ih264d_video_decode_ip_t *ps_ip =
+ (ih264d_video_decode_ip_t *)pv_api_ip;
+ ih264d_video_decode_op_t *ps_op =
+ (ih264d_video_decode_op_t *)pv_api_op;
+
+ H264_DEC_DEBUG_PRINT("The input bytes is: %d",
+ ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes);
+ ps_op->s_ivd_video_decode_op_t.u4_error_code = 0;
+
+ if(ps_ip->s_ivd_video_decode_ip_t.u4_size
+ != sizeof(ih264d_video_decode_ip_t)&&
+ ps_ip->s_ivd_video_decode_ip_t.u4_size != offsetof(ivd_video_decode_ip_t, s_out_buffer))
+ {
+ ps_op->s_ivd_video_decode_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_video_decode_op_t.u4_error_code |=
+ IVD_IP_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if(ps_op->s_ivd_video_decode_op_t.u4_size
+ != sizeof(ih264d_video_decode_op_t)&&
+ ps_op->s_ivd_video_decode_op_t.u4_size != offsetof(ivd_video_decode_op_t, u4_output_present))
+ {
+ ps_op->s_ivd_video_decode_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_video_decode_op_t.u4_error_code |=
+ IVD_OP_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ }
+ break;
+
+ case IV_CMD_RETRIEVE_MEMREC:
+ {
+ ih264d_retrieve_mem_rec_ip_t *ps_ip =
+ (ih264d_retrieve_mem_rec_ip_t *)pv_api_ip;
+ ih264d_retrieve_mem_rec_op_t *ps_op =
+ (ih264d_retrieve_mem_rec_op_t *)pv_api_op;
+ iv_mem_rec_t *ps_mem_rec;
+
+ ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code = 0;
+
+ if(ps_ip->s_ivd_retrieve_mem_rec_ip_t.u4_size
+ != sizeof(ih264d_retrieve_mem_rec_ip_t))
+ {
+ ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |=
+ IVD_IP_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if(ps_op->s_ivd_retrieve_mem_rec_op_t.u4_size
+ != sizeof(ih264d_retrieve_mem_rec_op_t))
+ {
+ ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |=
+ IVD_OP_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ ps_mem_rec = ps_ip->s_ivd_retrieve_mem_rec_ip_t.pv_mem_rec_location;
+ /* check memrecords sizes are correct */
+ for(i = 0; i < MEM_REC_CNT; i++)
+ {
+ if(ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t))
+ {
+ ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |=
+ IVD_MEM_REC_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ }
+ }
+ break;
+
+ case IVD_CMD_VIDEO_CTL:
+ {
+ UWORD32 *pu4_ptr_cmd;
+ UWORD32 sub_command;
+
+ pu4_ptr_cmd = (UWORD32 *)pv_api_ip;
+ pu4_ptr_cmd += 2;
+ sub_command = *pu4_ptr_cmd;
+
+ switch(sub_command)
+ {
+ case IVD_CMD_CTL_SETPARAMS:
+ {
+ ih264d_ctl_set_config_ip_t *ps_ip;
+ ih264d_ctl_set_config_op_t *ps_op;
+ ps_ip = (ih264d_ctl_set_config_ip_t *)pv_api_ip;
+ ps_op = (ih264d_ctl_set_config_op_t *)pv_api_op;
+
+ if(ps_ip->s_ivd_ctl_set_config_ip_t.u4_size
+ != sizeof(ih264d_ctl_set_config_ip_t))
+ {
+ ps_op->s_ivd_ctl_set_config_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_ctl_set_config_op_t.u4_error_code |=
+ IVD_IP_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ }
+ //no break; is needed here
+ case IVD_CMD_CTL_SETDEFAULT:
+ {
+ ih264d_ctl_set_config_op_t *ps_op;
+ ps_op = (ih264d_ctl_set_config_op_t *)pv_api_op;
+ if(ps_op->s_ivd_ctl_set_config_op_t.u4_size
+ != sizeof(ih264d_ctl_set_config_op_t))
+ {
+ ps_op->s_ivd_ctl_set_config_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_ctl_set_config_op_t.u4_error_code |=
+ IVD_OP_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ }
+ break;
+
+ case IVD_CMD_CTL_GETPARAMS:
+ {
+ ih264d_ctl_getstatus_ip_t *ps_ip;
+ ih264d_ctl_getstatus_op_t *ps_op;
+
+ ps_ip = (ih264d_ctl_getstatus_ip_t *)pv_api_ip;
+ ps_op = (ih264d_ctl_getstatus_op_t *)pv_api_op;
+ if(ps_ip->s_ivd_ctl_getstatus_ip_t.u4_size
+ != sizeof(ih264d_ctl_getstatus_ip_t))
+ {
+ ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code |=
+ IVD_IP_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ if(ps_op->s_ivd_ctl_getstatus_op_t.u4_size
+ != sizeof(ih264d_ctl_getstatus_op_t))
+ {
+ ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code |=
+ IVD_OP_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ }
+ break;
+
+ case IVD_CMD_CTL_GETBUFINFO:
+ {
+ ih264d_ctl_getbufinfo_ip_t *ps_ip;
+ ih264d_ctl_getbufinfo_op_t *ps_op;
+ ps_ip = (ih264d_ctl_getbufinfo_ip_t *)pv_api_ip;
+ ps_op = (ih264d_ctl_getbufinfo_op_t *)pv_api_op;
+
+ if(ps_ip->s_ivd_ctl_getbufinfo_ip_t.u4_size
+ != sizeof(ih264d_ctl_getbufinfo_ip_t))
+ {
+ ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code |=
+ IVD_IP_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ if(ps_op->s_ivd_ctl_getbufinfo_op_t.u4_size
+ != sizeof(ih264d_ctl_getbufinfo_op_t))
+ {
+ ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code |=
+ IVD_OP_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ }
+ break;
+
+ case IVD_CMD_CTL_GETVERSION:
+ {
+ ih264d_ctl_getversioninfo_ip_t *ps_ip;
+ ih264d_ctl_getversioninfo_op_t *ps_op;
+ ps_ip = (ih264d_ctl_getversioninfo_ip_t *)pv_api_ip;
+ ps_op = (ih264d_ctl_getversioninfo_op_t *)pv_api_op;
+ if(ps_ip->s_ivd_ctl_getversioninfo_ip_t.u4_size
+ != sizeof(ih264d_ctl_getversioninfo_ip_t))
+ {
+ ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code |=
+ IVD_IP_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ if(ps_op->s_ivd_ctl_getversioninfo_op_t.u4_size
+ != sizeof(ih264d_ctl_getversioninfo_op_t))
+ {
+ ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code |=
+ IVD_OP_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ }
+ break;
+
+ case IVD_CMD_CTL_FLUSH:
+ {
+ ih264d_ctl_flush_ip_t *ps_ip;
+ ih264d_ctl_flush_op_t *ps_op;
+ ps_ip = (ih264d_ctl_flush_ip_t *)pv_api_ip;
+ ps_op = (ih264d_ctl_flush_op_t *)pv_api_op;
+ if(ps_ip->s_ivd_ctl_flush_ip_t.u4_size
+ != sizeof(ih264d_ctl_flush_ip_t))
+ {
+ ps_op->s_ivd_ctl_flush_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_ctl_flush_op_t.u4_error_code |=
+ IVD_IP_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ if(ps_op->s_ivd_ctl_flush_op_t.u4_size
+ != sizeof(ih264d_ctl_flush_op_t))
+ {
+ ps_op->s_ivd_ctl_flush_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_ctl_flush_op_t.u4_error_code |=
+ IVD_OP_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ }
+ break;
+
+ case IVD_CMD_CTL_RESET:
+ {
+ ih264d_ctl_reset_ip_t *ps_ip;
+ ih264d_ctl_reset_op_t *ps_op;
+ ps_ip = (ih264d_ctl_reset_ip_t *)pv_api_ip;
+ ps_op = (ih264d_ctl_reset_op_t *)pv_api_op;
+ if(ps_ip->s_ivd_ctl_reset_ip_t.u4_size
+ != sizeof(ih264d_ctl_reset_ip_t))
+ {
+ ps_op->s_ivd_ctl_reset_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_ctl_reset_op_t.u4_error_code |=
+ IVD_IP_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ if(ps_op->s_ivd_ctl_reset_op_t.u4_size
+ != sizeof(ih264d_ctl_reset_op_t))
+ {
+ ps_op->s_ivd_ctl_reset_op_t.u4_error_code |= 1
+ << IVD_UNSUPPORTEDPARAM;
+ ps_op->s_ivd_ctl_reset_op_t.u4_error_code |=
+ IVD_OP_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ }
+ break;
+
+ case IH264D_CMD_CTL_DEGRADE:
+ {
+ ih264d_ctl_degrade_ip_t *ps_ip;
+ ih264d_ctl_degrade_op_t *ps_op;
+
+ ps_ip = (ih264d_ctl_degrade_ip_t *)pv_api_ip;
+ ps_op = (ih264d_ctl_degrade_op_t *)pv_api_op;
+
+ if(ps_ip->u4_size != sizeof(ih264d_ctl_degrade_ip_t))
+ {
+ ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+ ps_op->u4_error_code |=
+ IVD_IP_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if(ps_op->u4_size != sizeof(ih264d_ctl_degrade_op_t))
+ {
+ ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+ ps_op->u4_error_code |=
+ IVD_OP_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if((ps_ip->i4_degrade_pics < 0)
+ || (ps_ip->i4_degrade_pics > 4)
+ || (ps_ip->i4_nondegrade_interval < 0)
+ || (ps_ip->i4_degrade_type < 0)
+ || (ps_ip->i4_degrade_type > 15))
+ {
+ ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IH264D_CMD_CTL_GET_BUFFER_DIMENSIONS:
+ {
+ ih264d_ctl_get_frame_dimensions_ip_t *ps_ip;
+ ih264d_ctl_get_frame_dimensions_op_t *ps_op;
+
+ ps_ip = (ih264d_ctl_get_frame_dimensions_ip_t *)pv_api_ip;
+ ps_op = (ih264d_ctl_get_frame_dimensions_op_t *)pv_api_op;
+
+ if(ps_ip->u4_size
+ != sizeof(ih264d_ctl_get_frame_dimensions_ip_t))
+ {
+ ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+ ps_op->u4_error_code |=
+ IVD_IP_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if(ps_op->u4_size
+ != sizeof(ih264d_ctl_get_frame_dimensions_op_t))
+ {
+ ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+ ps_op->u4_error_code |=
+ IVD_OP_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IH264D_CMD_CTL_SET_NUM_CORES:
+ {
+ ih264d_ctl_set_num_cores_ip_t *ps_ip;
+ ih264d_ctl_set_num_cores_op_t *ps_op;
+
+ ps_ip = (ih264d_ctl_set_num_cores_ip_t *)pv_api_ip;
+ ps_op = (ih264d_ctl_set_num_cores_op_t *)pv_api_op;
+
+ if(ps_ip->u4_size != sizeof(ih264d_ctl_set_num_cores_ip_t))
+ {
+ ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+ ps_op->u4_error_code |=
+ IVD_IP_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if(ps_op->u4_size != sizeof(ih264d_ctl_set_num_cores_op_t))
+ {
+ ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+ ps_op->u4_error_code |=
+ IVD_OP_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if((ps_ip->u4_num_cores != 1) && (ps_ip->u4_num_cores != 2)
+ && (ps_ip->u4_num_cores != 3)
+ && (ps_ip->u4_num_cores != 4))
+ {
+ ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+ return IV_FAIL;
+ }
+ break;
+ }
+ case IH264D_CMD_CTL_SET_PROCESSOR:
+ {
+ ih264d_ctl_set_processor_ip_t *ps_ip;
+ ih264d_ctl_set_processor_op_t *ps_op;
+
+ ps_ip = (ih264d_ctl_set_processor_ip_t *)pv_api_ip;
+ ps_op = (ih264d_ctl_set_processor_op_t *)pv_api_op;
+
+ if(ps_ip->u4_size != sizeof(ih264d_ctl_set_processor_ip_t))
+ {
+ ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+ ps_op->u4_error_code |=
+ IVD_IP_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if(ps_op->u4_size != sizeof(ih264d_ctl_set_processor_op_t))
+ {
+ ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+ ps_op->u4_error_code |=
+ IVD_OP_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+ default:
+ *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVD_UNSUPPORTED_API_CMD;
+ return IV_FAIL;
+ break;
+ }
+ }
+ break;
+ }
+
+ return IV_SUCCESS;
+}
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Sets Processor type
+ *
+ * @par Description:
+ * Sets Processor type
+ *
+ * @param[in] ps_codec_obj
+ * Pointer to codec object at API level
+ *
+ * @param[in] pv_api_ip
+ * Pointer to input argument structure
+ *
+ * @param[out] pv_api_op
+ * Pointer to output argument structure
+ *
+ * @returns Status
+ *
+ * @remarks
+ *
+ *
+ *******************************************************************************
+ */
+
+WORD32 ih264d_set_processor(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
+{
+ ih264d_ctl_set_processor_ip_t *ps_ip;
+ ih264d_ctl_set_processor_op_t *ps_op;
+ dec_struct_t *ps_codec = (dec_struct_t *)dec_hdl->pv_codec_handle;
+
+ ps_ip = (ih264d_ctl_set_processor_ip_t *)pv_api_ip;
+ ps_op = (ih264d_ctl_set_processor_op_t *)pv_api_op;
+
+ ps_codec->e_processor_arch = (IVD_ARCH_T)ps_ip->u4_arch;
+ ps_codec->e_processor_soc = (IVD_SOC_T)ps_ip->u4_soc;
+
+ ih264d_init_function_ptr(ps_codec);
+
+ ps_op->u4_error_code = 0;
+ return IV_SUCCESS;
+}
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_get_num_rec */
+/* */
+/* Description : returns number of mem records required */
+/* */
+/* Inputs : pv_api_ip input api structure */
+/* : pv_api_op output api structure */
+/* Outputs : */
+/* Outputs : */
+/* Returns : void */
+/* */
+/* Issues : none */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 10 2008 100356 Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_get_num_rec(void *pv_api_ip, void *pv_api_op)
+{
+ iv_num_mem_rec_ip_t *ps_mem_q_ip;
+ iv_num_mem_rec_op_t *ps_mem_q_op;
+ ps_mem_q_ip = (iv_num_mem_rec_ip_t *)pv_api_ip;
+ ps_mem_q_op = (iv_num_mem_rec_op_t *)pv_api_op;
+ UNUSED(ps_mem_q_ip);
+ ps_mem_q_op->u4_num_mem_rec = MEM_REC_CNT;
+
+ return IV_SUCCESS;
+
+}
+
+
+/**************************************************************************
+ * \if Function name : ih264d_init_decoder \endif
+ *
+ *
+ * \brief
+ * Initializes the decoder
+ *
+ * \param apiVersion : Version of the api being used.
+ * \param errorHandlingMechanism : Mechanism to be used for errror handling.
+ * \param postFilteringType: Type of post filtering operation to be used.
+ * \param uc_outputFormat: Format of the decoded picture [default 4:2:0].
+ * \param uc_dispBufs: Number of Display Buffers.
+ * \param p_NALBufAPI: Pointer to NAL Buffer API.
+ * \param p_DispBufAPI: Pointer to Display Buffer API.
+ * \param ih264d_dec_mem_manager :Pointer to the function that will be called by decoder
+ * for memory allocation and freeing.
+ *
+ * \return
+ * 0 on Success and -1 on error
+ *
+ **************************************************************************
+ */
+void ih264d_init_decoder(void * ps_dec_params)
+{
+ dec_struct_t * ps_dec = (dec_struct_t *)ps_dec_params;
+ dec_slice_params_t *ps_cur_slice;
+ pocstruct_t *ps_prev_poc, *ps_cur_poc;
+
+
+
+ /* Set pic_parameter_set_id to -1 */
+
+
+
+ ps_cur_slice = ps_dec->ps_cur_slice;
+ ps_dec->init_done = 0;
+
+ ps_dec->u4_num_cores = 1;
+
+ ps_dec->u2_pic_ht = ps_dec->u2_pic_wd = 0;
+
+ ps_dec->u1_separate_parse = DEFAULT_SEPARATE_PARSE;
+ ps_dec->u4_app_disable_deblk_frm = 0;
+ ps_dec->i4_degrade_type = 0;
+ ps_dec->i4_degrade_pics = 0;
+
+ ps_dec->i4_app_skip_mode = IVD_SKIP_NONE;
+ ps_dec->i4_dec_skip_mode = IVD_SKIP_NONE;
+
+ memset(ps_dec->ps_pps, 0,
+ ((sizeof(dec_pic_params_t)) * MAX_NUM_PIC_PARAMS));
+ memset(ps_dec->ps_sps, 0,
+ ((sizeof(dec_seq_params_t)) * MAX_NUM_SEQ_PARAMS));
+
+ /* Initialization of function pointers ih264d_deblock_picture function*/
+
+ ps_dec->p_DeblockPicture[0] = ih264d_deblock_picture_non_mbaff;
+ ps_dec->p_DeblockPicture[1] = ih264d_deblock_picture_mbaff;
+
+ ps_dec->s_cab_dec_env.pv_codec_handle = ps_dec;
+
+ ps_dec->u4_num_fld_in_frm = 0;
+
+ ps_dec->ps_dpb_mgr->pv_codec_handle = ps_dec;
+
+ /* Initialize the sei validity u4_flag with zero indiacting sei is not valid*/
+ ps_dec->ps_sei->u1_is_valid = 0;
+
+ /* decParams Initializations */
+ ps_dec->ps_cur_pps = NULL;
+ ps_dec->ps_cur_sps = NULL;
+ ps_dec->u1_init_dec_flag = 0;
+ ps_dec->u1_first_nal_in_pic = 1;
+ ps_dec->u1_first_pb_nal_in_pic = 1;
+ ps_dec->u1_last_pic_not_decoded = 0;
+ ps_dec->u4_app_disp_width = 0;
+ ps_dec->i4_header_decoded = 0;
+ ps_dec->u4_total_frames_decoded = 0;
+
+ ps_dec->i4_error_code = 0;
+ ps_dec->i4_content_type = -1;
+ ps_dec->ps_cur_slice->u1_mbaff_frame_flag = 0;
+
+ ps_dec->ps_dec_err_status->u1_err_flag = ACCEPT_ALL_PICS; //REJECT_PB_PICS;
+ ps_dec->ps_dec_err_status->u1_cur_pic_type = PIC_TYPE_UNKNOWN;
+ ps_dec->ps_dec_err_status->u4_frm_sei_sync = SYNC_FRM_DEFAULT;
+ ps_dec->ps_dec_err_status->u4_cur_frm = INIT_FRAME;
+ ps_dec->ps_dec_err_status->u1_pic_aud_i = PIC_TYPE_UNKNOWN;
+
+ ps_dec->u1_pr_sl_type = 0xFF;
+ ps_dec->u2_mbx = 0xffff;
+ ps_dec->u2_mby = 0;
+ ps_dec->u2_total_mbs_coded = 0;
+ ps_cur_slice->u1_end_of_frame_signal = 0;
+
+ /* POC initializations */
+ ps_prev_poc = &ps_dec->s_prev_pic_poc;
+ ps_cur_poc = &ps_dec->s_cur_pic_poc;
+ ps_prev_poc->i4_pic_order_cnt_lsb = ps_cur_poc->i4_pic_order_cnt_lsb = 0;
+ ps_prev_poc->i4_pic_order_cnt_msb = ps_cur_poc->i4_pic_order_cnt_msb = 0;
+ ps_prev_poc->i4_delta_pic_order_cnt_bottom =
+ ps_cur_poc->i4_delta_pic_order_cnt_bottom = 0;
+ ps_prev_poc->i4_delta_pic_order_cnt[0] =
+ ps_cur_poc->i4_delta_pic_order_cnt[0] = 0;
+ ps_prev_poc->i4_delta_pic_order_cnt[1] =
+ ps_cur_poc->i4_delta_pic_order_cnt[1] = 0;
+ ps_prev_poc->u1_mmco_equalto5 = ps_cur_poc->u1_mmco_equalto5 = 0;
+ ps_prev_poc->i4_top_field_order_count = ps_cur_poc->i4_top_field_order_count =
+ 0;
+ ps_prev_poc->i4_bottom_field_order_count =
+ ps_cur_poc->i4_bottom_field_order_count = 0;
+ ps_prev_poc->u1_bot_field = ps_cur_poc->u1_bot_field = 0;
+ ps_prev_poc->u1_mmco_equalto5 = ps_cur_poc->u1_mmco_equalto5 = 0;
+ ps_prev_poc->i4_prev_frame_num_ofst = ps_cur_poc->i4_prev_frame_num_ofst = 0;
+ ps_cur_slice->u1_mmco_equalto5 = 0;
+ ps_cur_slice->u2_frame_num = 0;
+
+ ps_dec->i4_max_poc = 0;
+ ps_dec->i4_prev_max_display_seq = 0;
+ ps_dec->u1_recon_mb_grp = 4;
+
+ /* Field PIC initializations */
+ ps_dec->u1_second_field = 0;
+ ps_dec->s_prev_seq_params.u1_eoseq_pending = 0;
+
+ /* Set the cropping parameters as zero */
+ ps_dec->u2_crop_offset_y = 0;
+ ps_dec->u2_crop_offset_uv = 0;
+
+ /* The Initial Frame Rate Info is not Present */
+ ps_dec->i4_vui_frame_rate = -1;
+ ps_dec->i4_pic_type = -1;
+ ps_dec->i4_frametype = -1;
+ ps_dec->i4_content_type = -1;
+
+ ps_dec->u1_res_changed = 0;
+
+
+ ps_dec->u1_frame_decoded_flag = 0;
+
+ /* Set the default frame seek mask mode */
+ ps_dec->u4_skip_frm_mask = SKIP_NONE;
+
+ /********************************************************/
+ /* Initialize CAVLC residual decoding function pointers */
+ /********************************************************/
+ ps_dec->pf_cavlc_4x4res_block[0] = ih264d_cavlc_4x4res_block_totalcoeff_1;
+ ps_dec->pf_cavlc_4x4res_block[1] =
+ ih264d_cavlc_4x4res_block_totalcoeff_2to10;
+ ps_dec->pf_cavlc_4x4res_block[2] =
+ ih264d_cavlc_4x4res_block_totalcoeff_11to16;
+
+ ps_dec->pf_cavlc_parse4x4coeff[0] = ih264d_cavlc_parse4x4coeff_n0to7;
+ ps_dec->pf_cavlc_parse4x4coeff[1] = ih264d_cavlc_parse4x4coeff_n8;
+
+ ps_dec->pf_cavlc_parse_8x8block[0] =
+ ih264d_cavlc_parse_8x8block_none_available;
+ ps_dec->pf_cavlc_parse_8x8block[1] =
+ ih264d_cavlc_parse_8x8block_left_available;
+ ps_dec->pf_cavlc_parse_8x8block[2] =
+ ih264d_cavlc_parse_8x8block_top_available;
+ ps_dec->pf_cavlc_parse_8x8block[3] =
+ ih264d_cavlc_parse_8x8block_both_available;
+
+ /***************************************************************************/
+ /* Initialize Bs calculation function pointers for P and B, 16x16/non16x16 */
+ /***************************************************************************/
+ ps_dec->pf_fill_bs1[0][0] = ih264d_fill_bs1_16x16mb_pslice;
+ ps_dec->pf_fill_bs1[0][1] = ih264d_fill_bs1_non16x16mb_pslice;
+
+ ps_dec->pf_fill_bs1[1][0] = ih264d_fill_bs1_16x16mb_bslice;
+ ps_dec->pf_fill_bs1[1][1] = ih264d_fill_bs1_non16x16mb_bslice;
+
+ ps_dec->pf_fill_bs_xtra_left_edge[0] =
+ ih264d_fill_bs_xtra_left_edge_cur_frm;
+ ps_dec->pf_fill_bs_xtra_left_edge[1] =
+ ih264d_fill_bs_xtra_left_edge_cur_fld;
+
+ /* Initialize Reference Pic Buffers */
+ ih264d_init_ref_bufs(ps_dec->ps_dpb_mgr);
+
+#if VERT_SCALE_UP_AND_422
+ ps_dec->u1_vert_up_scale_flag = 1;
+#else
+ ps_dec->u1_vert_up_scale_flag = 0;
+#endif
+
+ ps_dec->u2_prv_frame_num = 0;
+ ps_dec->u1_top_bottom_decoded = 0;
+ ps_dec->u1_dangling_field = 0;
+
+ ps_dec->s_cab_dec_env.cabac_table = gau4_ih264d_cabac_table;
+
+ ps_dec->pu1_left_mv_ctxt_inc = ps_dec->u1_left_mv_ctxt_inc_arr[0];
+ ps_dec->pi1_left_ref_idx_ctxt_inc =
+ &ps_dec->i1_left_ref_idx_ctx_inc_arr[0][0];
+ ps_dec->pu1_left_yuv_dc_csbp = &ps_dec->u1_yuv_dc_csbp_topmb;
+
+ /* ! */
+ /* Initializing flush frame u4_flag */
+ ps_dec->u1_flushfrm = 0;
+
+ {
+ ps_dec->s_cab_dec_env.pv_codec_handle = (void*)ps_dec;
+ ps_dec->ps_bitstrm->pv_codec_handle = (void*)ps_dec;
+ ps_dec->ps_cur_slice->pv_codec_handle = (void*)ps_dec;
+ ps_dec->ps_dpb_mgr->pv_codec_handle = (void*)ps_dec;
+ }
+
+ memset(ps_dec->disp_bufs, 0, (MAX_DISP_BUFS_NEW) * sizeof(disp_buf_t));
+ memset(ps_dec->u4_disp_buf_mapping, 0,
+ (MAX_DISP_BUFS_NEW) * sizeof(UWORD32));
+ memset(ps_dec->u4_disp_buf_to_be_freed, 0,
+ (MAX_DISP_BUFS_NEW) * sizeof(UWORD32));
+
+ ih264d_init_arch(ps_dec);
+ ih264d_init_function_ptr(ps_dec);
+
+ ps_dec->init_done = 1;
+ ps_dec->process_called = 1;
+}
+
+/**************************************************************************
+ * \if Function name : ih264d_init_video_decoder \endif
+ *
+ * \brief
+ * Wrapper for the decoder init
+ *
+ * \param p_NALBufAPI: Pointer to NAL Buffer API.
+ * \param ih264d_dec_mem_manager :Pointer to the function that will be called by decoder
+ * for memory allocation and freeing.
+ *
+ * \return
+ * pointer to the decparams
+ *
+ **************************************************************************
+ */
+
+WORD32 ih264d_init_video_decoder(iv_obj_t *dec_hdl,
+ ih264d_init_ip_t *ps_init_ip,
+ ih264d_init_op_t *ps_init_op)
+{
+ dec_struct_t * ps_dec;
+ iv_mem_rec_t *memtab;
+ UWORD8 *pu1_extra_mem_base,*pu1_mem_base;
+
+ memtab = ps_init_ip->s_ivd_init_ip_t.pv_mem_rec_location;
+
+ dec_hdl->pv_codec_handle = memtab[MEM_REC_CODEC].pv_base;
+ ps_dec = dec_hdl->pv_codec_handle;
+
+ memset(ps_dec, 0, sizeof(dec_struct_t));
+
+ if(ps_init_ip->s_ivd_init_ip_t.u4_size
+ > offsetof(ih264d_init_ip_t, i4_level))
+ {
+ ps_dec->u4_level_at_init = ps_init_ip->i4_level;
+ }
+ else
+ {
+ ps_dec->u4_level_at_init = H264_LEVEL_3_1;
+ }
+
+ if(ps_init_ip->s_ivd_init_ip_t.u4_size
+ > offsetof(ih264d_init_ip_t, u4_num_ref_frames))
+ {
+ ps_dec->u4_num_ref_frames_at_init = ps_init_ip->u4_num_ref_frames;
+ }
+ else
+ {
+ ps_dec->u4_num_ref_frames_at_init = H264_MAX_REF_PICS;
+ }
+
+ if(ps_init_ip->s_ivd_init_ip_t.u4_size
+ > offsetof(ih264d_init_ip_t, u4_num_reorder_frames))
+ {
+ ps_dec->u4_num_reorder_frames_at_init =
+ ps_init_ip->u4_num_reorder_frames;
+ }
+ else
+ {
+ ps_dec->u4_num_reorder_frames_at_init = H264_MAX_REF_PICS;
+ }
+
+ if(ps_init_ip->s_ivd_init_ip_t.u4_size
+ > offsetof(ih264d_init_ip_t, u4_num_extra_disp_buf))
+ {
+ ps_dec->u4_num_extra_disp_bufs_at_init =
+ ps_init_ip->u4_num_extra_disp_buf;
+ }
+ else
+ {
+ ps_dec->u4_num_extra_disp_bufs_at_init = 0;
+ }
+
+ if(ps_init_ip->s_ivd_init_ip_t.u4_size
+ > offsetof(ih264d_init_ip_t, u4_share_disp_buf))
+ {
+#ifndef LOGO_EN
+ ps_dec->u4_share_disp_buf = ps_init_ip->u4_share_disp_buf;
+#else
+ ps_dec->u4_share_disp_buf = 0;
+#endif
+ }
+ else
+ {
+ ps_dec->u4_share_disp_buf = 0;
+ }
+
+ if((ps_init_ip->s_ivd_init_ip_t.e_output_format != IV_YUV_420P)
+ && (ps_init_ip->s_ivd_init_ip_t.e_output_format
+ != IV_YUV_420SP_UV)
+ && (ps_init_ip->s_ivd_init_ip_t.e_output_format
+ != IV_YUV_420SP_VU))
+ {
+ ps_dec->u4_share_disp_buf = 0;
+ }
+
+ if((ps_dec->u4_level_at_init < MIN_LEVEL_SUPPORTED)
+ || (ps_dec->u4_level_at_init > MAX_LEVEL_SUPPORTED))
+ {
+ ps_init_op->s_ivd_init_op_t.u4_error_code |= ERROR_LEVEL_UNSUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if(ps_dec->u4_num_ref_frames_at_init > H264_MAX_REF_PICS)
+ {
+ ps_init_op->s_ivd_init_op_t.u4_error_code |= ERROR_NUM_REF;
+ ps_dec->u4_num_ref_frames_at_init = H264_MAX_REF_PICS;
+ }
+
+ if(ps_dec->u4_num_reorder_frames_at_init > H264_MAX_REF_PICS)
+ {
+ ps_init_op->s_ivd_init_op_t.u4_error_code |= ERROR_NUM_REF;
+ ps_dec->u4_num_reorder_frames_at_init = H264_MAX_REF_PICS;
+ }
+
+ if(ps_dec->u4_num_extra_disp_bufs_at_init > H264_MAX_REF_PICS)
+ {
+ ps_init_op->s_ivd_init_op_t.u4_error_code |= ERROR_NUM_REF;
+ ps_dec->u4_num_extra_disp_bufs_at_init = 0;
+ }
+
+ if(0 == ps_dec->u4_share_disp_buf)
+ ps_dec->u4_num_extra_disp_bufs_at_init = 0;
+
+ ps_dec->u4_num_disp_bufs_requested = 1;
+
+ ps_dec->u4_width_at_init = ps_init_ip->s_ivd_init_ip_t.u4_frm_max_wd;
+ ps_dec->u4_height_at_init = ps_init_ip->s_ivd_init_ip_t.u4_frm_max_ht;
+
+ ps_dec->u4_width_at_init = ALIGN16(ps_dec->u4_width_at_init);
+ ps_dec->u4_height_at_init = ALIGN16(ps_dec->u4_height_at_init);
+
+ ps_dec->pv_dec_thread_handle = memtab[MEM_REC_THREAD_HANDLE].pv_base;
+
+ pu1_mem_base = memtab[MEM_REC_THREAD_HANDLE].pv_base;
+ ps_dec->pv_bs_deblk_thread_handle = pu1_mem_base
+ + ithread_get_handle_size();
+
+ ps_dec->u4_extra_mem_used = 0;
+
+ pu1_extra_mem_base = memtab[MEM_REC_EXTRA_MEM].pv_base;
+
+ ps_dec->ps_dec_err_status = (dec_err_status_t *)(pu1_extra_mem_base + ps_dec->u4_extra_mem_used);
+ ps_dec->u4_extra_mem_used += (((sizeof(dec_err_status_t) + 127) >> 7) << 7);
+
+ ps_dec->ps_mem_tab = memtab[MEM_REC_BACKUP].pv_base;
+
+ memcpy(ps_dec->ps_mem_tab, memtab, sizeof(iv_mem_rec_t) * MEM_REC_CNT);
+
+ ps_dec->ps_pps = memtab[MEM_REC_PPS].pv_base;
+
+ ps_dec->ps_sps = memtab[MEM_REC_SPS].pv_base;
+
+ ps_dec->ps_sei = (sei *)(pu1_extra_mem_base + ps_dec->u4_extra_mem_used);
+ ps_dec->u4_extra_mem_used += sizeof(sei);
+
+ ps_dec->ps_dpb_mgr = memtab[MEM_REC_DPB_MGR].pv_base;
+
+ ps_dec->ps_dpb_cmds = (dpb_commands_t *)(pu1_extra_mem_base + ps_dec->u4_extra_mem_used);
+ ps_dec->u4_extra_mem_used += sizeof(dpb_commands_t);
+
+ ps_dec->ps_bitstrm = (dec_bit_stream_t *)(pu1_extra_mem_base + ps_dec->u4_extra_mem_used);
+ ps_dec->u4_extra_mem_used += sizeof(dec_bit_stream_t);
+
+ ps_dec->ps_cur_slice =(dec_slice_params_t *) (pu1_extra_mem_base + ps_dec->u4_extra_mem_used);
+ ps_dec->u4_extra_mem_used += sizeof(dec_slice_params_t);
+
+ ps_dec->pv_scratch_sps_pps = (void *)(pu1_extra_mem_base + ps_dec->u4_extra_mem_used);
+
+
+ ps_dec->u4_extra_mem_used += MAX(sizeof(dec_seq_params_t),
+ sizeof(dec_pic_params_t));
+ ps_dec->ps_pred_pkd = memtab[MEM_REC_PRED_INFO_PKD].pv_base;
+
+
+ ps_dec->ps_dpb_mgr->pv_codec_handle = ps_dec;
+
+ ps_dec->pv_dec_out = (void *)ps_init_op;
+ ps_dec->pv_dec_in = (void *)ps_init_ip;
+
+ ps_dec->u1_chroma_format =
+ (UWORD8)(ps_init_ip->s_ivd_init_ip_t.e_output_format);
+
+
+
+ ih264d_init_decoder(ps_dec);
+
+ return (IV_SUCCESS);
+
+}
+
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_fill_num_mem_rec */
+/* */
+/* Description : fills memory records */
+/* */
+/* Inputs : pv_api_ip input api structure */
+/* : pv_api_op output api structure */
+/* Outputs : */
+/* Returns : void */
+/* */
+/* Issues : none */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 10 2008 100356 Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op)
+{
+
+ ih264d_fill_mem_rec_ip_t *ps_mem_q_ip;
+ ih264d_fill_mem_rec_op_t *ps_mem_q_op;
+ WORD32 level;
+ UWORD32 num_reorder_frames;
+ UWORD32 num_ref_frames;
+ UWORD32 num_extra_disp_bufs;
+ UWORD32 u4_dpb_size_num_frames;
+ iv_mem_rec_t *memTab;
+
+ UWORD32 chroma_format, u4_share_disp_buf;
+ UWORD32 u4_total_num_mbs;
+ UWORD32 luma_width, luma_width_in_mbs;
+ UWORD32 luma_height, luma_height_in_mbs;
+ UWORD32 max_dpb_size;
+
+ ps_mem_q_ip = (ih264d_fill_mem_rec_ip_t *)pv_api_ip;
+ ps_mem_q_op = (ih264d_fill_mem_rec_op_t *)pv_api_op;
+
+ if(ps_mem_q_ip->s_ivd_fill_mem_rec_ip_t.u4_size
+ > offsetof(ih264d_fill_mem_rec_ip_t, i4_level))
+ {
+ level = ps_mem_q_ip->i4_level;
+ }
+ else
+ {
+ level = H264_LEVEL_3_1;
+ }
+
+ if(ps_mem_q_ip->s_ivd_fill_mem_rec_ip_t.u4_size
+ > offsetof(ih264d_fill_mem_rec_ip_t, u4_num_reorder_frames))
+ {
+ num_reorder_frames = ps_mem_q_ip->u4_num_reorder_frames;
+ }
+ else
+ {
+ num_reorder_frames = H264_MAX_REF_PICS;
+ }
+
+ if(ps_mem_q_ip->s_ivd_fill_mem_rec_ip_t.u4_size
+ > offsetof(ih264d_fill_mem_rec_ip_t, u4_num_ref_frames))
+ {
+ num_ref_frames = ps_mem_q_ip->u4_num_ref_frames;
+ }
+ else
+ {
+ num_ref_frames = H264_MAX_REF_PICS;
+ }
+
+ if(ps_mem_q_ip->s_ivd_fill_mem_rec_ip_t.u4_size
+ > offsetof(ih264d_fill_mem_rec_ip_t, u4_num_extra_disp_buf))
+ {
+ num_extra_disp_bufs = ps_mem_q_ip->u4_num_extra_disp_buf;
+ }
+ else
+ {
+ num_extra_disp_bufs = 0;
+ }
+
+ if(ps_mem_q_ip->s_ivd_fill_mem_rec_ip_t.u4_size
+ > offsetof(ih264d_fill_mem_rec_ip_t, u4_share_disp_buf))
+ {
+#ifndef LOGO_EN
+ u4_share_disp_buf = ps_mem_q_ip->u4_share_disp_buf;
+#else
+ u4_share_disp_buf = 0;
+#endif
+ }
+ else
+ {
+ u4_share_disp_buf = 0;
+ }
+
+ if(ps_mem_q_ip->s_ivd_fill_mem_rec_ip_t.u4_size
+ > offsetof(ih264d_fill_mem_rec_ip_t, e_output_format))
+ {
+ chroma_format = ps_mem_q_ip->e_output_format;
+ }
+ else
+ {
+ chroma_format = -1;
+ }
+
+ if((chroma_format != IV_YUV_420P) && (chroma_format != IV_YUV_420SP_UV)
+ && (chroma_format != IV_YUV_420SP_VU))
+ {
+ u4_share_disp_buf = 0;
+ }
+ if(0 == u4_share_disp_buf)
+ num_extra_disp_bufs = 0;
+
+ {
+
+ luma_height = ps_mem_q_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_ht;
+ luma_width = ps_mem_q_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_wd;
+
+ luma_height = ((luma_height + 15) >> 4) << 4;
+ luma_width = ((luma_width + 15) >> 4) << 4;
+ luma_width_in_mbs = luma_width >> 4;
+ luma_height_in_mbs = luma_height >> 4;
+ u4_total_num_mbs = (luma_height * luma_width) >> 8;
+ }
+ /*
+ * If level is lesser than 31 and the resolution required is higher,
+ * then make the level at least 31.
+ */
+ if(u4_total_num_mbs > MAX_MBS_LEVEL_30 && level < H264_LEVEL_3_1)
+ {
+ level = H264_LEVEL_3_1;
+ }
+
+ if((level < MIN_LEVEL_SUPPORTED) || (level > MAX_LEVEL_SUPPORTED))
+ {
+ ps_mem_q_op->s_ivd_fill_mem_rec_op_t.u4_error_code |=
+ ERROR_LEVEL_UNSUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if(num_ref_frames > H264_MAX_REF_PICS)
+ {
+ ps_mem_q_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= ERROR_NUM_REF;
+ num_ref_frames = H264_MAX_REF_PICS;
+ }
+
+ if(num_reorder_frames > H264_MAX_REF_PICS)
+ {
+ ps_mem_q_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= ERROR_NUM_REF;
+ num_reorder_frames = H264_MAX_REF_PICS;
+ }
+ memTab = ps_mem_q_ip->s_ivd_fill_mem_rec_ip_t.pv_mem_rec_location;
+
+ memTab[MEM_REC_IV_OBJ].u4_mem_size = sizeof(iv_obj_t);
+ memTab[MEM_REC_IV_OBJ].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_IV_OBJ].e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ H264_DEC_DEBUG_PRINT("MEM_REC_IV_OBJ MEM Size = %d\n",
+ memTab[MEM_REC_IV_OBJ].u4_mem_size);
+
+ memTab[MEM_REC_CODEC].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_CODEC].e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_CODEC].u4_mem_size = sizeof(dec_struct_t);
+
+ {
+ UWORD32 mvinfo_size, mv_info_size_pad;
+ UWORD32 MVbank, MVbank_pad;
+ UWORD32 Ysize;
+ UWORD32 UVsize;
+ UWORD32 one_frm_size;
+
+ UWORD32 extra_mem = 0;
+
+ UWORD32 pad_len_h, pad_len_v;
+
+ /*
+ * For low_delay, use num_buf as 2 -
+ * num_buf = (num_buf_ref) + 1;
+ * where num_buf_ref is 1.
+ */
+ UWORD32 num_buf;
+
+ {
+ UWORD32 num_bufs_app, num_bufs_level;
+
+ num_bufs_app = num_ref_frames + num_reorder_frames + 1;
+
+ if(num_bufs_app <= 1)
+ num_bufs_app = 2;
+
+ num_bufs_level = ih264d_get_dpb_size_new(level, (luma_width >> 4),
+ (luma_height >> 4));
+
+ max_dpb_size = num_bufs_level;
+
+ num_bufs_level = num_bufs_level * 2 + 1;
+
+ num_buf = MIN(num_bufs_level, num_bufs_app);
+
+ num_buf += num_extra_disp_bufs;
+
+ }
+
+ mvinfo_size = ((luma_width * (luma_height)) >> 4);
+
+ mv_info_size_pad = ((luma_width * (PAD_MV_BANK_ROW)) >> 4);
+
+ Ysize = ALIGN32((luma_width + (PAD_LEN_Y_H << 1)))
+ * (luma_height + (PAD_LEN_Y_V << 2));
+
+
+ UVsize = Ysize >> 2;
+ if(u4_share_disp_buf == 1)
+ {
+ /* In case of buffers getting shared between application and library
+ there is no need of reference memtabs. Instead of setting the i4_size
+ to zero, it is reduced to a small i4_size to ensure that changes
+ in the code are minimal */
+
+ if((chroma_format == IV_YUV_420P)
+ || (chroma_format == IV_YUV_420SP_UV)
+ || (chroma_format == IV_YUV_420SP_VU))
+ {
+ Ysize = 64;
+ }
+ if(chroma_format == IV_YUV_420SP_UV)
+ {
+ UVsize = 64;
+ }
+ }
+
+ one_frm_size = (((Ysize + 127) >> 7) << 7)
+ + ((((UVsize << 1) + 127) >> 7) << 7);
+
+ //Note that for ARM RVDS WS the sizeof(mv_pred_t) is 16
+
+ /*Add memory for colocated MB*/
+ MVbank = sizeof(mv_pred_t) * mvinfo_size;
+ MVbank_pad = sizeof(mv_pred_t) * mv_info_size_pad;
+
+ MVbank = (((MVbank + 127) >> 7) << 7);
+
+ MVbank_pad = (((MVbank_pad + 127) >> 7) << 7);
+
+ memTab[MEM_REC_MVBANK].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_MVBANK].e_mem_type =
+ IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_MVBANK].u4_mem_size = (MVbank + MVbank_pad)
+ * (MIN(max_dpb_size, num_ref_frames) + 1);
+
+
+ memTab[MEM_REC_REF_PIC].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_REF_PIC].e_mem_type =
+ IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_REF_PIC].u4_mem_size = one_frm_size * num_buf;
+
+ }
+
+ memTab[MEM_REC_DEBLK_MB_INFO].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_DEBLK_MB_INFO].e_mem_type =
+ IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_DEBLK_MB_INFO].u4_mem_size = (((((u4_total_num_mbs
+ + MAX_MBS_IN_ROW) * sizeof(deblk_mb_t)) + 127) >> 7) << 7);
+
+ memTab[MEM_REC_NEIGHBOR_INFO].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_NEIGHBOR_INFO].e_mem_type =
+ IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_NEIGHBOR_INFO].u4_mem_size = sizeof(mb_neigbour_params_t)
+ * ((luma_width + 16) >> 4) * 2 * 2;
+ {
+ WORD32 size;
+ WORD32 num_entries;
+
+ num_entries = MIN(MAX_FRAMES, num_ref_frames);
+ num_entries = 2 * ((2 * num_entries) + 1);
+
+ size = num_entries * sizeof(void *);
+ size += PAD_MAP_IDX_POC * sizeof(void *);
+ size *= u4_total_num_mbs;
+ size += sizeof(dec_slice_struct_t) * u4_total_num_mbs;
+ memTab[MEM_REC_SLICE_HDR].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_SLICE_HDR].e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_SLICE_HDR].u4_mem_size = size;
+ }
+ {
+
+ UWORD32 u4_num_entries;
+
+ u4_num_entries = u4_total_num_mbs;
+
+ memTab[MEM_REC_MB_INFO].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_MB_INFO].e_mem_type =
+ IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_MB_INFO].u4_mem_size = sizeof(dec_mb_info_t)
+ * u4_num_entries;
+
+ memTab[MEM_REC_PRED_INFO].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_PRED_INFO].e_mem_type =
+ IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+
+ memTab[MEM_REC_PRED_INFO].u4_mem_size = sizeof(pred_info_t) * 2*32;
+
+ memTab[MEM_REC_COEFF_DATA].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_COEFF_DATA].e_mem_type =
+ IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_COEFF_DATA].u4_mem_size = MB_LUM_SIZE * sizeof(WORD16);
+ memTab[MEM_REC_COEFF_DATA].u4_mem_size += u4_num_entries
+ * (MAX(16 * sizeof(tu_sblk4x4_coeff_data_t),4 * sizeof(tu_blk8x8_coeff_data_t))
+ + 8 * sizeof(tu_sblk4x4_coeff_data_t));
+ memTab[MEM_REC_COEFF_DATA].u4_mem_size += u4_num_entries * 32; //32 bytes for each mb to store u1_prev_intra4x4_pred_mode and u1_rem_intra4x4_pred_mode data
+
+ }
+
+ memTab[MEM_REC_SPS].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_SPS].e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_SPS].u4_mem_size = ((sizeof(dec_seq_params_t))
+ * MAX_NUM_SEQ_PARAMS);
+
+ memTab[MEM_REC_PPS].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_PPS].e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_PPS].u4_mem_size = (sizeof(dec_pic_params_t))
+ * MAX_NUM_PIC_PARAMS;
+
+ {
+ UWORD32 u4_mem_size;
+
+ u4_mem_size = 0;
+ u4_mem_size += (((sizeof(dec_err_status_t) + 127) >> 7) << 7);
+ u4_mem_size += sizeof(sei);
+ u4_mem_size += sizeof(dpb_commands_t);
+ u4_mem_size += sizeof(dec_bit_stream_t);
+ u4_mem_size += sizeof(dec_slice_params_t);
+ u4_mem_size += MAX(sizeof(dec_seq_params_t), sizeof(dec_pic_params_t));
+
+ memTab[MEM_REC_EXTRA_MEM].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_EXTRA_MEM].e_mem_type =
+ IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_EXTRA_MEM].u4_mem_size = u4_mem_size;
+ }
+
+ {
+
+ UWORD32 u4_mem_size;
+
+ u4_mem_size = 0;
+ u4_mem_size += ((TOTAL_LIST_ENTRIES + PAD_MAP_IDX_POC) * sizeof(void *));
+ u4_mem_size = ALIGN64(u4_mem_size);
+ u4_mem_size += (sizeof(bin_ctxt_model_t) * NUM_CABAC_CTXTS);
+ u4_mem_size = ALIGN64(u4_mem_size);
+ u4_mem_size += sizeof(ctxt_inc_mb_info_t);
+ u4_mem_size = ALIGN64(u4_mem_size);
+ u4_mem_size += sizeof(UWORD32) * (MAX_REF_BUFS * MAX_REF_BUFS);
+ u4_mem_size = ALIGN64(u4_mem_size);
+
+ u4_mem_size += MAX_REF_BUF_SIZE;
+ u4_mem_size = ALIGN64(u4_mem_size);
+ u4_mem_size += ((sizeof(WORD16)) * PRED_BUFFER_WIDTH
+ * PRED_BUFFER_HEIGHT);
+ u4_mem_size = ALIGN64(u4_mem_size);
+ u4_mem_size += sizeof(UWORD8) * (MB_LUM_SIZE);
+ u4_mem_size = ALIGN64(u4_mem_size);
+ u4_mem_size += sizeof(parse_pmbarams_t) * luma_width_in_mbs; //Max recon mb group*/
+ u4_mem_size = ALIGN64(u4_mem_size);
+ u4_mem_size += (sizeof(parse_part_params_t) * luma_width_in_mbs) << 4; //Max recon mb group*/
+ u4_mem_size = ALIGN64(u4_mem_size);
+
+ u4_mem_size += 2 * MAX_REF_BUFS * sizeof(struct pic_buffer_t);
+ u4_mem_size = ALIGN64(u4_mem_size);
+ u4_mem_size += 2 * MAX_REF_BUFS * sizeof(struct pic_buffer_t);
+ u4_mem_size = ALIGN64(u4_mem_size);
+ u4_mem_size += (sizeof(UWORD32) * 3 * (MAX_REF_BUFS * MAX_REF_BUFS)) << 3;
+ u4_mem_size = ALIGN64(u4_mem_size);
+
+ u4_mem_size += sizeof(UWORD32) * 2 * 3 * (MAX_REF_BUFS * MAX_REF_BUFS);
+ u4_mem_size = ALIGN64(u4_mem_size);
+
+ memTab[MEM_REC_INTERNAL_SCRATCH].u4_mem_alignment =
+ (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_INTERNAL_SCRATCH].e_mem_type =
+ IV_EXTERNAL_CACHEABLE_SCRATCH_MEM;
+ memTab[MEM_REC_INTERNAL_SCRATCH].u4_mem_size = u4_mem_size;
+ }
+
+ {
+
+ UWORD32 u4_mem_used;
+ UWORD32 u4_numRows = MB_SIZE << 1;
+ UWORD32 u4_blk_wd = ((luma_width_in_mbs << 4) >> 1) + 8;
+
+ u4_mem_used = 0;
+ u4_mem_used += ((luma_width_in_mbs * sizeof(deblkmb_neighbour_t)) << 1);
+ u4_mem_used = ALIGN64(u4_mem_used);
+ u4_mem_used += (sizeof(neighbouradd_t) << 2);
+ u4_mem_used = ALIGN64(u4_mem_used);
+ u4_mem_used += ((sizeof(ctxt_inc_mb_info_t))
+ * (((luma_width_in_mbs + 1) << 1) + 1));
+ u4_mem_used = ALIGN64(u4_mem_used);
+
+ u4_mem_used += (sizeof(mv_pred_t) * luma_width_in_mbs * 16);
+ u4_mem_used = ALIGN64(u4_mem_used);
+ u4_mem_used += (sizeof(mv_pred_t) * luma_width_in_mbs * 16);
+ u4_mem_used = ALIGN64(u4_mem_used);
+ u4_mem_used += (sizeof(mv_pred_t) * luma_width_in_mbs * 4
+ * MV_SCRATCH_BUFS);
+ u4_mem_used = ALIGN64(u4_mem_used);
+ u4_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd;
+ u4_mem_used = ALIGN64(u4_mem_used);
+ u4_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd;
+ u4_mem_used = ALIGN64(u4_mem_used);
+ u4_numRows = BLK8x8SIZE << 1;
+
+ u4_blk_wd = ((luma_width_in_mbs << 3) >> 1) + 8;
+
+ u4_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd;
+ u4_mem_used = ALIGN64(u4_mem_used);
+ u4_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd;
+ u4_mem_used = ALIGN64(u4_mem_used);
+ u4_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd;
+ u4_mem_used = ALIGN64(u4_mem_used);
+ u4_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd;
+ u4_mem_used += 32;
+ u4_mem_used = ALIGN64(u4_mem_used);
+ u4_mem_used += sizeof(UWORD8) * (luma_width + 16) * 2;
+ u4_mem_used = ALIGN64(u4_mem_used);
+ u4_mem_used += sizeof(UWORD8) * ((luma_width >> 1) + 16) * 2
+ * YUV420SP_FACTOR;
+ u4_mem_used = ALIGN64(u4_mem_used);
+ u4_mem_used += sizeof(UWORD8) * ((luma_width >> 1) + 16) * 2;
+ u4_mem_used = ALIGN64(u4_mem_used);
+ u4_mem_used += sizeof(mb_neigbour_params_t) * (luma_width_in_mbs + 1)
+ * luma_height_in_mbs;
+ u4_mem_used += luma_width;
+ u4_mem_used = ALIGN64(u4_mem_used);
+ u4_mem_used += luma_width >> 1;
+ u4_mem_used = ALIGN64(u4_mem_used);
+ u4_mem_used += luma_width >> 1;
+ u4_mem_used = ALIGN64(u4_mem_used);
+
+ u4_mem_used += ((MB_SIZE + 4) << 1) * PAD_LEN_Y_H;
+ u4_mem_used = ALIGN64(u4_mem_used);
+ u4_mem_used += ((BLK8x8SIZE + 2) << 1) * PAD_LEN_UV_H;
+ u4_mem_used = ALIGN64(u4_mem_used);
+ u4_mem_used += ((BLK8x8SIZE + 2) << 1) * PAD_LEN_UV_H;
+ u4_mem_used = ALIGN64(u4_mem_used);
+ memTab[MEM_REC_INTERNAL_PERSIST].u4_mem_alignment =
+ (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_INTERNAL_PERSIST].e_mem_type =
+ IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_INTERNAL_PERSIST].u4_mem_size = u4_mem_used;
+ }
+
+ memTab[MEM_REC_BITSBUF].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_BITSBUF].e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_BITSBUF].u4_mem_size = MAX(256000, (luma_width * luma_height));
+
+ {
+
+ UWORD32 u4_thread_struct_size = ithread_get_handle_size();
+
+ memTab[MEM_REC_THREAD_HANDLE].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_THREAD_HANDLE].e_mem_type =
+ IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_THREAD_HANDLE].u4_mem_size = u4_thread_struct_size * 2;
+
+ }
+
+ memTab[MEM_REC_PARSE_MAP].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_PARSE_MAP].e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_PARSE_MAP].u4_mem_size = u4_total_num_mbs;
+
+ memTab[MEM_REC_PROC_MAP].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_PROC_MAP].e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_PROC_MAP].u4_mem_size = u4_total_num_mbs;
+
+ memTab[MEM_REC_SLICE_NUM_MAP].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_SLICE_NUM_MAP].e_mem_type =
+ IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_SLICE_NUM_MAP].u4_mem_size = u4_total_num_mbs
+ * sizeof(UWORD16);
+
+ memTab[MEM_REC_DPB_MGR].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_DPB_MGR].e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_DPB_MGR].u4_mem_size = sizeof(dpb_manager_t);
+
+ memTab[MEM_REC_BACKUP].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_BACKUP].e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_BACKUP].u4_mem_size = sizeof(iv_mem_rec_t) * MEM_REC_CNT;
+
+ {
+
+ UWORD32 u4_mem_size;
+
+ u4_mem_size = sizeof(disp_mgr_t);
+ u4_mem_size += sizeof(buf_mgr_t) + ithread_get_mutex_lock_size();
+ u4_mem_size += sizeof(struct pic_buffer_t) * (H264_MAX_REF_PICS * 2);
+
+ memTab[MEM_REC_PIC_BUF_MGR].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_PIC_BUF_MGR].e_mem_type =
+ IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_PIC_BUF_MGR].u4_mem_size = u4_mem_size;
+ }
+
+ {
+ UWORD32 u4_mem_size;
+
+ u4_mem_size = sizeof(buf_mgr_t) + ithread_get_mutex_lock_size();
+ u4_mem_size += sizeof(col_mv_buf_t) * (H264_MAX_REF_PICS * 2);
+ u4_mem_size = ALIGN128(u4_mem_size);
+ u4_mem_size += ((luma_width * luma_height) >> 4)
+ * (MIN(max_dpb_size, num_ref_frames) + 1);
+ memTab[MEM_REC_MV_BUF_MGR].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_MV_BUF_MGR].e_mem_type =
+ IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ memTab[MEM_REC_MV_BUF_MGR].u4_mem_size = u4_mem_size;
+ }
+
+ memTab[MEM_REC_PRED_INFO_PKD].u4_mem_alignment = (128 * 8) / CHAR_BIT;
+ memTab[MEM_REC_PRED_INFO_PKD].e_mem_type =
+ IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+
+ {
+ UWORD32 u4_num_entries;
+ u4_num_entries = u4_total_num_mbs;
+
+ if(1 == num_ref_frames)
+ u4_num_entries *= 16;
+ else
+ u4_num_entries *= 16 * 2;
+
+ memTab[MEM_REC_PRED_INFO_PKD].u4_mem_size = sizeof(pred_info_pkd_t)
+ * u4_num_entries;
+ }
+
+ ps_mem_q_op->s_ivd_fill_mem_rec_op_t.u4_num_mem_rec_filled = MEM_REC_CNT;
+
+
+ return IV_SUCCESS;
+}
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_clr */
+/* */
+/* Description : returns memory records to app */
+/* */
+/* Inputs :iv_obj_t decoder handle */
+/* :pv_api_ip pointer to input structure */
+/* :pv_api_op pointer to output structure */
+/* Outputs : */
+/* Returns : void */
+/* */
+/* Issues : none */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 10 2008 100356 Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_clr(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
+{
+
+ dec_struct_t * ps_dec;
+ iv_retrieve_mem_rec_ip_t *dec_clr_ip;
+ iv_retrieve_mem_rec_op_t *dec_clr_op;
+
+ dec_clr_ip = (iv_retrieve_mem_rec_ip_t *)pv_api_ip;
+ dec_clr_op = (iv_retrieve_mem_rec_op_t *)pv_api_op;
+ ps_dec = (dec_struct_t *)(dec_hdl->pv_codec_handle);
+
+ if(ps_dec->init_done != 1)
+ {
+ //return a proper Error Code
+ return IV_FAIL;
+ }
+
+ ih264_buf_mgr_free((buf_mgr_t *)ps_dec->pv_pic_buf_mgr);
+ ih264_buf_mgr_free((buf_mgr_t *)ps_dec->pv_mv_buf_mgr);
+
+ memcpy(dec_clr_ip->pv_mem_rec_location, ps_dec->ps_mem_tab,
+ MEM_REC_CNT * (sizeof(iv_mem_rec_t)));
+ dec_clr_op->u4_num_mem_rec_filled = MEM_REC_CNT;
+
+ H264_DEC_DEBUG_PRINT("The clear non-conceal num mem recs: %d\n",
+ dec_clr_op->u4_num_mem_rec_filled);
+
+ return IV_SUCCESS;
+
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_init */
+/* */
+/* Description : initializes decoder */
+/* */
+/* Inputs :iv_obj_t decoder handle */
+/* :pv_api_ip pointer to input structure */
+/* :pv_api_op pointer to output structure */
+/* Outputs : */
+/* Returns : void */
+/* */
+/* Issues : none */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 10 2008 100356 Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_init(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
+{
+ ih264d_init_ip_t *ps_init_ip;
+ ih264d_init_op_t *ps_init_op;
+ ps_init_ip = (ih264d_init_ip_t *)pv_api_ip;
+ ps_init_op = (ih264d_init_op_t *)pv_api_op;
+ WORD32 init_status = IV_SUCCESS;
+
+ init_status = ih264d_init_video_decoder(dec_hdl, ps_init_ip, ps_init_op);
+
+ if(IV_SUCCESS != init_status)
+ {
+ return init_status;
+ }
+
+ return init_status;
+}
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_map_error */
+/* */
+/* Description : Maps error codes to IVD error groups */
+/* */
+/* Inputs : */
+/* Globals : <Does it use any global variables?> */
+/* Outputs : */
+/* Returns : void */
+/* */
+/* Issues : none */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 10 2008 100356 Draft */
+/* */
+/*****************************************************************************/
+UWORD32 ih264d_map_error(UWORD32 i4_err_status)
+{
+ UWORD32 temp = 0;
+
+ switch(i4_err_status)
+ {
+ case ERROR_MEM_ALLOC_ISRAM_T:
+ case ERROR_MEM_ALLOC_SDRAM_T:
+ case ERROR_BUF_MGR:
+ case ERROR_MB_GROUP_ASSGN_T:
+ case ERROR_FRAME_LIMIT_OVER:
+ case ERROR_ACTUAL_RESOLUTION_GREATER_THAN_INIT:
+ case ERROR_PROFILE_NOT_SUPPORTED:
+ case ERROR_INIT_NOT_DONE:
+ temp = 1 << IVD_FATALERROR;
+ H264_DEC_DEBUG_PRINT("\nFatal Error\n");
+ break;
+
+ case ERROR_DBP_MANAGER_T:
+ case ERROR_GAPS_IN_FRM_NUM:
+ case ERROR_UNKNOWN_NAL:
+ case ERROR_INV_MB_SLC_GRP_T:
+ case ERROR_MULTIPLE_SLC_GRP_T:
+ case ERROR_UNKNOWN_LEVEL:
+ case ERROR_UNAVAIL_PICBUF_T:
+ case ERROR_UNAVAIL_MVBUF_T:
+ case ERROR_UNAVAIL_DISPBUF_T:
+ case ERROR_NUM_REF:
+ case ERROR_REFIDX_ORDER_T:
+ case ERROR_PIC0_NOT_FOUND_T:
+ case ERROR_MB_TYPE:
+ case ERROR_SUB_MB_TYPE:
+ case ERROR_CBP:
+ case ERROR_REF_IDX:
+ case ERROR_NUM_MV:
+ case ERROR_CHROMA_PRED_MODE:
+ case ERROR_INTRAPRED:
+ case ERROR_NEXT_MB_ADDRESS_T:
+ case ERROR_MB_ADDRESS_T:
+ case ERROR_PIC1_NOT_FOUND_T:
+ case ERROR_CAVLC_NUM_COEFF_T:
+ case ERROR_CAVLC_SCAN_POS_T:
+ case ERROR_PRED_WEIGHT_TABLE_T:
+ case ERROR_CORRUPTED_SLICE:
+ temp = 1 << IVD_CORRUPTEDDATA;
+ break;
+
+ case ERROR_NOT_SUPP_RESOLUTION:
+ case ERROR_FEATURE_UNAVAIL:
+ case ERROR_ACTUAL_LEVEL_GREATER_THAN_INIT:
+ temp = 1 << IVD_UNSUPPORTEDINPUT;
+ break;
+
+ case ERROR_INVALID_PIC_PARAM:
+ case ERROR_INVALID_SEQ_PARAM:
+ case ERROR_EGC_EXCEED_32_1_T:
+ case ERROR_EGC_EXCEED_32_2_T:
+ case ERROR_INV_RANGE_TEV_T:
+ case ERROR_INV_SLC_TYPE_T:
+ case ERROR_INV_POC_TYPE_T:
+ case ERROR_INV_RANGE_QP_T:
+ case ERROR_INV_SPS_PPS_T:
+ case ERROR_INV_SLICE_HDR_T:
+ temp = 1 << IVD_CORRUPTEDHEADER;
+ break;
+
+ case ERROR_EOB_FLUSHBITS_T:
+ case ERROR_EOB_GETBITS_T:
+ case ERROR_EOB_GETBIT_T:
+ case ERROR_EOB_BYPASS_T:
+ case ERROR_EOB_DECISION_T:
+ case ERROR_EOB_TERMINATE_T:
+ case ERROR_EOB_READCOEFF4X4CAB_T:
+ temp = 1 << IVD_INSUFFICIENTDATA;
+ break;
+ case ERROR_DYNAMIC_RESOLUTION_NOT_SUPPORTED:
+ case ERROR_DISP_WIDTH_RESET_TO_PIC_WIDTH:
+ temp = 1 << IVD_UNSUPPORTEDPARAM | 1 << IVD_FATALERROR;
+ break;
+
+ case ERROR_DANGLING_FIELD_IN_PIC:
+ temp = 1 << IVD_APPLIEDCONCEALMENT;
+ break;
+
+ }
+
+ return temp;
+
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_video_decode */
+/* */
+/* Description : handle video decode API command */
+/* */
+/* Inputs :iv_obj_t decoder handle */
+/* :pv_api_ip pointer to input structure */
+/* :pv_api_op pointer to output structure */
+/* Outputs : */
+/* Returns : void */
+/* */
+/* Issues : none */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 10 2008 100356 Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 ih264d_video_decode(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
+{
+ /* ! */
+
+ dec_struct_t * ps_dec = (dec_struct_t *)(dec_hdl->pv_codec_handle);
+
+ WORD32 i4_err_status = 0;
+ UWORD8 *pu1_buf = NULL;
+ WORD32 buflen;
+ UWORD32 u4_max_ofst, u4_length_of_start_code = 0;
+
+ UWORD32 bytes_consumed = 0;
+ UWORD32 cur_slice_is_nonref = 0;
+ UWORD32 u4_next_is_aud;
+ UWORD32 u4_first_start_code_found = 0;
+ WORD32 ret;
+ WORD32 header_data_left = 0,frame_data_left = 0;
+ UWORD8 *pu1_bitstrm_buf;
+ ithread_set_name((void*)"Parse_thread");
+
+
+ ivd_video_decode_ip_t *ps_dec_ip;
+ ivd_video_decode_op_t *ps_dec_op;
+ ps_dec_ip = (ivd_video_decode_ip_t *)pv_api_ip;
+ ps_dec_op = (ivd_video_decode_op_t *)pv_api_op;
+ ps_dec->pv_dec_out = ps_dec_op;
+ ps_dec->process_called = 1;
+ ps_dec->u2_mb_skip_error = 0;
+ if(ps_dec->init_done != 1)
+ {
+ return IV_FAIL;
+ }
+
+ /*Data memory barries instruction,so that bitstream write by the application is complete*/
+ DATA_SYNC();
+
+ if(0 == ps_dec->u1_flushfrm)
+ {
+ if(ps_dec_ip->pv_stream_buffer == NULL)
+ {
+ ps_dec_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+ ps_dec_op->u4_error_code |= IVD_DEC_FRM_BS_BUF_NULL;
+ return IV_FAIL;
+ }
+ if(ps_dec_ip->u4_num_Bytes <= 0)
+ {
+ ps_dec_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+ ps_dec_op->u4_error_code |= IVD_DEC_NUMBYTES_INV;
+ return IV_FAIL;
+
+ }
+ }
+ ps_dec->u1_pic_decode_done = 0;
+
+ ps_dec_op->u4_num_bytes_consumed = 0;
+
+ ps_dec->ps_out_buffer = NULL;
+
+ if(ps_dec_ip->u4_size
+ >= offsetof(ivd_video_decode_ip_t, s_out_buffer))
+ ps_dec->ps_out_buffer = &ps_dec_ip->s_out_buffer;
+
+ if(ps_dec_op->u4_size
+ >= offsetof(ivd_video_decode_op_t, u4_disp_buf_id)
+ && ps_dec->ps_out_buffer != NULL)
+ ps_dec->u4_fmt_conv_in_process = 1;
+ else
+ ps_dec->u4_fmt_conv_in_process = 0;
+
+ ps_dec->u4_fmt_conv_cur_row = 0;
+
+ ps_dec->u4_output_present = 0;
+ ps_dec->s_disp_op.u4_error_code = 1;
+ ps_dec->u4_fmt_conv_num_rows = FMT_CONV_NUM_ROWS;
+ ps_dec->u4_stop_threads = 0;
+ if(ps_dec->u4_fmt_conv_in_process && 0 == ps_dec->u4_share_disp_buf
+ && ps_dec->i4_decode_header == 0)
+ {
+ UWORD32 i;
+ if(ps_dec->ps_out_buffer->u4_num_bufs == 0)
+ {
+ ps_dec_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+ ps_dec_op->u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUFS;
+ return IV_FAIL;
+ }
+
+ for(i = 0; i < ps_dec->ps_out_buffer->u4_num_bufs; i++)
+ {
+ if(ps_dec->ps_out_buffer->pu1_bufs[i] == NULL)
+ {
+ ps_dec_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+ ps_dec_op->u4_error_code |= IVD_DISP_FRM_OP_BUF_NULL;
+ return IV_FAIL;
+ }
+
+ if(ps_dec->ps_out_buffer->u4_min_out_buf_size[i] == 0)
+ {
+ ps_dec_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+ ps_dec_op->u4_error_code |=
+ IVD_DISP_FRM_ZERO_OP_BUF_SIZE;
+ return IV_FAIL;
+ }
+ }
+ }
+
+ if(ps_dec->u4_total_frames_decoded >= NUM_FRAMES_LIMIT)
+ {
+ ps_dec_op->u4_error_code = ERROR_FRAME_LIMIT_OVER;
+ return IV_FAIL;
+ }
+
+ /* ! */
+ ps_dec->u4_ts = ps_dec_ip->u4_ts;
+
+ ps_dec_op->u4_error_code = 0;
+ ps_dec_op->e_pic_type = -1;
+ ps_dec_op->u4_output_present = 0;
+ ps_dec_op->u4_frame_decoded_flag = 0;
+
+ ps_dec->i4_frametype = -1;
+ ps_dec->i4_content_type = -1;
+ /*
+ * For field pictures, set the bottom and top picture decoded u4_flag correctly.
+ */
+ {
+ if((TOP_FIELD_ONLY | BOT_FIELD_ONLY) == ps_dec->u1_top_bottom_decoded)
+ {
+ ps_dec->u1_top_bottom_decoded = 0;
+ }
+ }
+ ps_dec->u4_slice_start_code_found = 0;
+
+ /* In case the deocder is not in flush mode(in shared mode),
+ then decoder has to pick up a buffer to write current frame.
+ Check if a frame is available in such cases */
+
+ if(ps_dec->u1_init_dec_flag == 1 && ps_dec->u4_share_disp_buf == 1
+ && ps_dec->u1_flushfrm == 0)
+ {
+ UWORD32 i;
+
+ WORD32 disp_avail = 0, free_id;
+
+ /* Check if at least one buffer is available with the codec */
+ /* If not then return to application with error */
+ for(i = 0; i < ps_dec->u1_pic_bufs; i++)
+ {
+ if(0 == ps_dec->u4_disp_buf_mapping[i]
+ || 1 == ps_dec->u4_disp_buf_to_be_freed[i])
+ {
+ disp_avail = 1;
+ break;
+ }
+
+ }
+
+ if(0 == disp_avail)
+ {
+ /* If something is queued for display wait for that buffer to be returned */
+
+ ps_dec_op->u4_error_code = IVD_DEC_REF_BUF_NULL;
+ ps_dec_op->u4_error_code |= (1 << IVD_UNSUPPORTEDPARAM);
+ return (IV_FAIL);
+ }
+
+ while(1)
+ {
+ pic_buffer_t *ps_pic_buf;
+ ps_pic_buf = (pic_buffer_t *)ih264_buf_mgr_get_next_free(
+ (buf_mgr_t *)ps_dec->pv_pic_buf_mgr, &free_id);
+
+ if(ps_pic_buf == NULL)
+ {
+ UWORD32 i, display_queued = 0;
+
+ /* check if any buffer was given for display which is not returned yet */
+ for(i = 0; i < (MAX_DISP_BUFS_NEW); i++)
+ {
+ if(0 != ps_dec->u4_disp_buf_mapping[i])
+ {
+ display_queued = 1;
+ break;
+ }
+ }
+ /* If some buffer is queued for display, then codec has to singal an error and wait
+ for that buffer to be returned.
+ If nothing is queued for display then codec has ownership of all display buffers
+ and it can reuse any of the existing buffers and continue decoding */
+
+ if(1 == display_queued)
+ {
+ /* If something is queued for display wait for that buffer to be returned */
+ ps_dec_op->u4_error_code = IVD_DEC_REF_BUF_NULL;
+ ps_dec_op->u4_error_code |= (1
+ << IVD_UNSUPPORTEDPARAM);
+ return (IV_FAIL);
+ }
+ }
+ else
+ {
+ /* If the buffer is with display, then mark it as in use and then look for a buffer again */
+ if(1 == ps_dec->u4_disp_buf_mapping[free_id])
+ {
+ ih264_buf_mgr_set_status(
+ (buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
+ free_id,
+ BUF_MGR_IO);
+ }
+ else
+ {
+ /**
+ * Found a free buffer for present call. Release it now.
+ * Will be again obtained later.
+ */
+ ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
+ free_id,
+ BUF_MGR_IO);
+ break;
+ }
+ }
+ }
+
+ }
+
+ if(ps_dec->u4_fmt_conv_in_process && ps_dec->u1_flushfrm &&
+ ps_dec->u1_init_dec_flag)
+ {
+
+ ih264d_get_next_display_field(ps_dec, ps_dec->ps_out_buffer,
+ &(ps_dec->s_disp_op));
+ if(0 == ps_dec->s_disp_op.u4_error_code)
+ {
+ ps_dec->u4_fmt_conv_cur_row = 0;
+ ps_dec->u4_fmt_conv_num_rows = ps_dec->s_disp_frame_info.u4_y_ht;
+ ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op),
+ ps_dec->u4_fmt_conv_cur_row,
+ ps_dec->u4_fmt_conv_num_rows);
+ ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows;
+ ps_dec->u4_output_present = 1;
+
+ }
+ ih264d_release_display_field(ps_dec, &(ps_dec->s_disp_op));
+
+ ps_dec_op->u4_pic_wd = (UWORD32)ps_dec->u2_disp_width;
+ ps_dec_op->u4_pic_ht = (UWORD32)ps_dec->u2_disp_height;
+
+ ps_dec_op->u4_new_seq = 0;
+
+ ps_dec_op->u4_output_present = ps_dec->u4_output_present;
+ ps_dec_op->u4_progressive_frame_flag =
+ ps_dec->s_disp_op.u4_progressive_frame_flag;
+ ps_dec_op->e_output_format =
+ ps_dec->s_disp_op.e_output_format;
+ ps_dec_op->s_disp_frm_buf = ps_dec->s_disp_op.s_disp_frm_buf;
+ ps_dec_op->e4_fld_type = ps_dec->s_disp_op.e4_fld_type;
+ ps_dec_op->u4_ts = ps_dec->s_disp_op.u4_ts;
+ ps_dec_op->u4_disp_buf_id = ps_dec->s_disp_op.u4_disp_buf_id;
+
+ /*In the case of flush ,since no frame is decoded set pic type as invalid*/
+ ps_dec_op->u4_is_ref_flag = -1;
+ ps_dec_op->e_pic_type = IV_NA_FRAME;
+ ps_dec_op->u4_frame_decoded_flag = 0;
+
+ if(0 == ps_dec->s_disp_op.u4_error_code)
+ {
+ return (IV_SUCCESS);
+ }
+ else
+ return (IV_FAIL);
+
+ }
+ if(ps_dec->u1_res_changed == 1)
+ {
+ /*if resolution has changed and all buffers have been flushed, reset decoder*/
+ ih264d_init_decoder(ps_dec);
+ }
+
+ ps_dec->u4_prev_nal_skipped = 0;
+
+ ps_dec->u4_start_frame_decode = 0;
+ ps_dec->u2_cur_mb_addr = 0;
+ ps_dec->cur_dec_mb_num = 0;
+ ps_dec->u4_first_slice_in_pic = 1;
+
+ ps_dec->u4_dec_thread_created = 0;
+ ps_dec->u4_bs_deblk_thread_created = 0;
+ ps_dec->u4_cur_bs_mb_num = 0;
+
+ ps_dec->as_fmt_conv_part[0].u4_flag = 1;
+ ps_dec->as_fmt_conv_part[1].u4_flag = 1;
+ ps_dec->as_fmt_conv_part[1].u4_start_y = 0;
+ ps_dec->as_fmt_conv_part[1].u4_num_rows_y = 0;
+
+ DEBUG_THREADS_PRINTF(" Starting process call\n");
+
+ ps_dec->u4_pic_buf_got = 0;
+ ps_dec->u2_skip_deblock = 0;
+
+ do
+ {
+
+ pu1_buf = (UWORD8*)ps_dec_ip->pv_stream_buffer
+ + ps_dec_op->u4_num_bytes_consumed;
+
+ u4_max_ofst = ps_dec_ip->u4_num_Bytes
+ - ps_dec_op->u4_num_bytes_consumed;
+ pu1_bitstrm_buf = ps_dec->ps_mem_tab[MEM_REC_BITSBUF].pv_base;
+
+ u4_next_is_aud = 0;
+
+ buflen = ih264d_find_start_code(pu1_buf, 0, u4_max_ofst,
+ &u4_length_of_start_code,
+ &u4_next_is_aud);
+
+ if(buflen == -1)
+ buflen = 0;
+
+ bytes_consumed = buflen + u4_length_of_start_code;
+ ps_dec_op->u4_num_bytes_consumed += bytes_consumed;
+
+ if(buflen >= MAX_NAL_UNIT_SIZE)
+ {
+
+ ih264d_fill_output_struct_from_context(ps_dec, ps_dec_op);
+ H264_DEC_DEBUG_PRINT(
+ "\nNal Size exceeded %d, Processing Stopped..\n",
+ MAX_NAL_UNIT_SIZE);
+ ps_dec->i4_error_code = 1 << IVD_CORRUPTEDDATA;
+
+ ps_dec_op->e_pic_type = -1;
+ /*signal the decode thread*/
+ ps_dec->as_fmt_conv_part[1].u4_flag = 0;
+ ih264d_signal_decode_thread(ps_dec);
+ /*signal end of frame decode for curren frame*/
+
+ if(ps_dec->u4_pic_buf_got == 0)
+ {
+ if(ps_dec->i4_header_decoded == 3)
+ {
+ ps_dec->u2_total_mbs_coded =
+ ps_dec->ps_cur_sps->u2_max_mb_addr + 1;
+ ps_dec->ps_cur_slice->u1_end_of_frame_signal = 1;
+ }
+
+ /* close deblock thread if it is not closed yet*/
+ if(ps_dec->u4_num_cores == 3)
+ {
+ ih264d_signal_bs_deblk_thread(ps_dec);
+ }
+ return IV_FAIL;
+ }
+ else
+ {
+ ps_dec->u1_pic_decode_done = 1;
+ continue;
+ }
+ }
+
+ {
+ UWORD8 u1_firstbyte, u1_nal_ref_idc;
+
+ if(ps_dec->i4_app_skip_mode == IVD_SKIP_B)
+ {
+ u1_firstbyte = *(pu1_buf + u4_length_of_start_code);
+ u1_nal_ref_idc = (UWORD8)(NAL_REF_IDC(u1_firstbyte));
+ if(u1_nal_ref_idc == 0)
+ {
+ /*skip non reference frames*/
+ cur_slice_is_nonref = 1;
+ continue;
+ }
+ else
+ {
+ if(1 == cur_slice_is_nonref)
+ {
+ /*We have encountered a referenced frame,return to app*/
+ ps_dec_op->u4_num_bytes_consumed -=
+ bytes_consumed;
+ ps_dec_op->e_pic_type = IV_B_FRAME;
+ ps_dec_op->u4_error_code =
+ IVD_DEC_FRM_SKIPPED;
+ ps_dec_op->u4_error_code |= (1
+ << IVD_UNSUPPORTEDPARAM);
+ ps_dec_op->u4_frame_decoded_flag = 0;
+ ps_dec_op->u4_size =
+ sizeof(ivd_video_decode_op_t);
+ /*signal the decode thread*/
+ ps_dec->as_fmt_conv_part[1].u4_flag = 0;
+ ih264d_signal_decode_thread(ps_dec);
+ /* close deblock thread if it is not closed yet*/
+ if(ps_dec->u4_num_cores == 3)
+ {
+ ih264d_signal_bs_deblk_thread(ps_dec);
+ }
+
+ return (IV_FAIL);
+ }
+ }
+
+ }
+
+ }
+
+
+ if(buflen)
+ {
+ memcpy(pu1_bitstrm_buf, pu1_buf + u4_length_of_start_code,
+ buflen);
+ u4_first_start_code_found = 1;
+
+ }
+ else
+ {
+ /*start code not found*/
+
+ if(u4_first_start_code_found == 0)
+ {
+ /*no start codes found in current process call*/
+
+ ps_dec->i4_error_code = ERROR_START_CODE_NOT_FOUND;
+ ps_dec_op->u4_error_code |= 1 << IVD_INSUFFICIENTDATA;
+
+ if(ps_dec->u4_pic_buf_got == 0)
+ {
+
+ ih264d_fill_output_struct_from_context(ps_dec,
+ ps_dec_op);
+
+ ps_dec_op->u4_error_code = ps_dec->i4_error_code;
+ ps_dec_op->u4_frame_decoded_flag = 0;
+
+ return (IV_FAIL);
+ }
+ else
+ {
+ ps_dec->u1_pic_decode_done = 1;
+ continue;
+ }
+ }
+ else
+ {
+ /* a start code has already been found earlier in the same process call*/
+ continue;
+ }
+
+ }
+
+ ps_dec->u4_return_to_app = 0;
+ ret = ih264d_parse_nal_unit(dec_hdl, ps_dec_op,
+ pu1_bitstrm_buf, buflen);
+ if(ret != OK)
+ {
+ UWORD32 error = ih264d_map_error(ret);
+ ps_dec_op->u4_error_code = error | ret;
+
+ if((ret == IVD_RES_CHANGED)||(ret == IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED))
+ {
+ /*dont consume the SPS*/
+ ps_dec_op->u4_num_bytes_consumed -= bytes_consumed;
+ }
+ return IV_FAIL;
+ }
+
+ if(ps_dec->u4_return_to_app)
+ {
+ /*We have encountered a referenced frame,return to app*/
+ ps_dec_op->u4_num_bytes_consumed -= bytes_consumed;
+ ps_dec_op->u4_error_code = IVD_DEC_FRM_SKIPPED;
+ ps_dec_op->u4_error_code |= (1 << IVD_UNSUPPORTEDPARAM);
+ ps_dec_op->u4_frame_decoded_flag = 0;
+ ps_dec_op->u4_size = sizeof(ivd_video_decode_op_t);
+ /*signal the decode thread*/
+ ps_dec->as_fmt_conv_part[1].u4_flag = 0;
+ ih264d_signal_decode_thread(ps_dec);
+ /* close deblock thread if it is not closed yet*/
+ if(ps_dec->u4_num_cores == 3)
+ {
+ ih264d_signal_bs_deblk_thread(ps_dec);
+ }
+ return (IV_FAIL);
+
+ }
+
+
+
+ header_data_left = ((ps_dec->i4_decode_header == 1)
+ && (ps_dec->i4_header_decoded != 3)
+ && (ps_dec_op->u4_num_bytes_consumed
+ < ps_dec_ip->u4_num_Bytes));
+ frame_data_left = (((ps_dec->i4_decode_header == 0)
+ && ((ps_dec->u1_pic_decode_done == 0)
+ || (u4_next_is_aud == 1)))
+ && (ps_dec_op->u4_num_bytes_consumed
+ < ps_dec_ip->u4_num_Bytes));
+ }
+ while(( header_data_left == 1)||(frame_data_left == 1));
+
+ if((ps_dec->u2_total_mbs_coded
+ != (ps_dec->u2_frm_wd_in_mbs * ps_dec->u2_frm_ht_in_mbs))
+ && (ps_dec_op->u4_num_bytes_consumed
+ >= ps_dec_ip->u4_num_Bytes))
+ {
+ if(ps_dec->ps_parse_cur_slice != NULL)
+ {
+ ps_dec->ps_parse_cur_slice->u2_error_flag = 1;
+
+ ps_dec->u2_skip_deblock = 1;
+ }
+ }
+ if(ps_dec->u1_separate_parse)
+ {
+
+ /* If Format conversion is not complete,
+ complete it here */
+ if(ps_dec->u4_num_cores == 2)
+ {
+ ps_dec->u4_fmt_conv_num_rows = ps_dec->s_disp_frame_info.u4_y_ht
+ - ps_dec->u4_fmt_conv_cur_row;
+ if(ps_dec->u4_output_present && ps_dec->u4_fmt_conv_in_process
+ && ps_dec->u4_fmt_conv_num_rows)
+ {
+ ps_dec->u4_fmt_conv_num_rows = MIN(
+ ps_dec->u4_fmt_conv_num_rows,
+ (ps_dec->s_disp_frame_info.u4_y_ht
+ - ps_dec->u4_fmt_conv_cur_row));
+ if(ps_dec->u4_fmt_conv_num_rows > 64)
+ {
+ UWORD32 num_rows_first_part = (ps_dec->u4_fmt_conv_num_rows
+ / 2);
+
+ /* Align it to even number */
+ num_rows_first_part = (num_rows_first_part >> 1) << 1;
+
+ /* Schedule last half of the remaining rows to be processed in second thread */
+ ps_dec->as_fmt_conv_part[1].u4_start_y =
+ ps_dec->u4_fmt_conv_cur_row
+ + num_rows_first_part;
+ ps_dec->as_fmt_conv_part[1].u4_num_rows_y =
+ (ps_dec->u4_fmt_conv_num_rows
+ - num_rows_first_part);
+ ps_dec->u4_fmt_conv_num_rows = num_rows_first_part;
+ DATA_SYNC();
+ ps_dec->as_fmt_conv_part[1].u4_flag = 2;
+
+ }
+ else
+ {
+ ps_dec->as_fmt_conv_part[1].u4_flag = 0;
+ }
+
+ ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op),
+ ps_dec->u4_fmt_conv_cur_row,
+ ps_dec->u4_fmt_conv_num_rows);
+ ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows;
+
+ }
+ else
+ {
+ ps_dec->as_fmt_conv_part[1].u4_flag = 0;
+ }
+ }
+ else
+ {
+ ps_dec->as_fmt_conv_part[1].u4_flag = 0;
+ }
+
+ /*signal the decode thread*/
+ ih264d_signal_decode_thread(ps_dec);
+ /* close deblock thread if it is not closed yet*/
+ if(ps_dec->u4_num_cores == 3)
+ {
+ ih264d_signal_bs_deblk_thread(ps_dec);
+ }
+ }
+ /* Decode thread would have completed format conversion for ps_dec->as_fmt_conv_part[1].u4_num_rows_y rows */
+
+ ps_dec->u4_fmt_conv_cur_row += ps_dec->as_fmt_conv_part[1].u4_num_rows_y;
+
+ if((ps_dec_op->u4_error_code & 0xff)
+ != ERROR_DYNAMIC_RESOLUTION_NOT_SUPPORTED)
+ {
+ ps_dec_op->u4_pic_wd = (UWORD32)ps_dec->u2_disp_width;
+ ps_dec_op->u4_pic_ht = (UWORD32)ps_dec->u2_disp_height;
+ }
+
+//Report if header (sps and pps) has not been decoded yet
+ if(ps_dec->i4_header_decoded != 3)
+ {
+ ps_dec_op->u4_error_code |= (1 << IVD_INSUFFICIENTDATA);
+
+ }
+
+ if(ps_dec->i4_decode_header == 1 && ps_dec->i4_header_decoded != 3)
+ {
+ ps_dec_op->u4_error_code |= (1 << IVD_INSUFFICIENTDATA);
+
+ }
+ if(ps_dec->u4_prev_nal_skipped)
+ {
+ /*We have encountered a referenced frame,return to app*/
+ ps_dec_op->u4_error_code = IVD_DEC_FRM_SKIPPED;
+ ps_dec_op->u4_error_code |= (1 << IVD_UNSUPPORTEDPARAM);
+ ps_dec_op->u4_frame_decoded_flag = 0;
+ ps_dec_op->u4_size = sizeof(ivd_video_decode_op_t);
+ /* close deblock thread if it is not closed yet*/
+ if(ps_dec->u4_num_cores == 3)
+ {
+ ih264d_signal_bs_deblk_thread(ps_dec);
+ }
+ return (IV_FAIL);
+
+ }
+
+ if((ps_dec->u4_slice_start_code_found == 1)
+ && (ERROR_DANGLING_FIELD_IN_PIC != i4_err_status))
+ {
+ /*
+ * For field pictures, set the bottom and top picture decoded u4_flag correctly.
+ */
+
+ if(ps_dec->u4_pic_buf_got == 0)
+ {
+ ih264d_fill_output_struct_from_context(ps_dec, ps_dec_op);
+
+ ps_dec_op->u4_error_code = ps_dec->i4_error_code;
+ ps_dec_op->u4_frame_decoded_flag = 0;
+ /* close deblock thread if it is not closed yet*/
+ if(ps_dec->u4_num_cores == 3)
+ {
+ ih264d_signal_bs_deblk_thread(ps_dec);
+ }
+ return (IV_FAIL);
+ }
+
+ if(ps_dec->ps_cur_slice->u1_field_pic_flag)
+ {
+ if(1 == ps_dec->ps_cur_slice->u1_bottom_field_flag)
+ {
+ ps_dec->u1_top_bottom_decoded |= BOT_FIELD_ONLY;
+ }
+ else
+ {
+ ps_dec->u1_top_bottom_decoded |= TOP_FIELD_ONLY;
+ }
+ }
+
+ /* Calling Function to deblock Picture and Display */
+ ret = ih264d_deblock_display(ps_dec);
+ if(ret != 0)
+ return IV_FAIL;
+
+ /*set to complete ,as we dont support partial frame decode*/
+ if(ps_dec->i4_header_decoded == 3)
+ {
+ ps_dec->u2_total_mbs_coded = ps_dec->ps_cur_sps->u2_max_mb_addr + 1;
+ }
+
+ /*Update the i4_frametype at the end of picture*/
+ if(ps_dec->ps_cur_slice->u1_nal_unit_type == IDR_SLICE_NAL)
+ {
+ ps_dec->i4_frametype = IV_IDR_FRAME;
+ }
+ else if(ps_dec->i4_pic_type == B_SLICE)
+ {
+ ps_dec->i4_frametype = IV_B_FRAME;
+ }
+ else if(ps_dec->i4_pic_type == P_SLICE)
+ {
+ ps_dec->i4_frametype = IV_P_FRAME;
+ }
+ else if(ps_dec->i4_pic_type == I_SLICE)
+ {
+ ps_dec->i4_frametype = IV_I_FRAME;
+ }
+ else
+ {
+ H264_DEC_DEBUG_PRINT("Shouldn't come here\n");
+ }
+
+ //Update the content type
+ ps_dec->i4_content_type = ps_dec->ps_cur_slice->u1_field_pic_flag;
+
+ ps_dec->u4_total_frames_decoded = ps_dec->u4_total_frames_decoded + 2;
+ ps_dec->u4_total_frames_decoded = ps_dec->u4_total_frames_decoded
+ - ps_dec->ps_cur_slice->u1_field_pic_flag;
+
+ }
+
+ /* close deblock thread if it is not closed yet*/
+ if(ps_dec->u4_num_cores == 3)
+ {
+ ih264d_signal_bs_deblk_thread(ps_dec);
+ }
+
+ if(ps_dec->u4_fmt_conv_in_process)
+ {
+ /* In case the decoder is configured to run in low delay mode,
+ * then get display buffer and then format convert.
+ * Note in this mode, format conversion does not run paralelly in a thread and adds to the codec cycles
+ */
+
+ if((0 == ps_dec->u4_num_reorder_frames_at_init)
+ && ps_dec->u1_init_dec_flag)
+ {
+
+ ih264d_get_next_display_field(ps_dec, ps_dec->ps_out_buffer,
+ &(ps_dec->s_disp_op));
+ if(0 == ps_dec->s_disp_op.u4_error_code)
+ {
+ ps_dec->u4_fmt_conv_cur_row = 0;
+ ps_dec->u4_output_present = 1;
+ }
+ }
+
+ ih264d_fill_output_struct_from_context(ps_dec, ps_dec_op);
+
+ /* If Format conversion is not complete,
+ complete it here */
+ ps_dec->u4_fmt_conv_num_rows = ps_dec->s_disp_frame_info.u4_y_ht
+ - ps_dec->u4_fmt_conv_cur_row;
+ DEBUG_PERF_PRINTF("ps_dec->u4_fmt_conv_num_rows = %d\n",ps_dec->u4_fmt_conv_num_rows);
+ if(ps_dec->u4_output_present && ps_dec->u4_fmt_conv_num_rows)
+ {
+ ps_dec->u4_fmt_conv_num_rows = MIN(
+ ps_dec->u4_fmt_conv_num_rows,
+ (ps_dec->s_disp_frame_info.u4_y_ht
+ - ps_dec->u4_fmt_conv_cur_row));
+ ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op),
+ ps_dec->u4_fmt_conv_cur_row,
+ ps_dec->u4_fmt_conv_num_rows);
+ ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows;
+ }
+
+ ih264d_release_display_field(ps_dec, &(ps_dec->s_disp_op));
+ }
+
+ if(ps_dec->i4_decode_header == 1 && (ps_dec->i4_header_decoded & 1) == 1)
+ {
+ ps_dec_op->u4_progressive_frame_flag = 1;
+ if((NULL != ps_dec->ps_sps) && (1 == (ps_dec->ps_sps->u1_is_valid)))
+ {
+ if((0 == ps_dec->ps_sps->u1_frame_mbs_only_flag)
+ && (0 == ps_dec->ps_sps->u1_mb_aff_flag))
+ ps_dec_op->u4_progressive_frame_flag = 0;
+
+ }
+ }
+
+ /*Data memory barrier instruction,so that yuv write by the library is complete*/
+ DATA_SYNC();
+
+ H264_DEC_DEBUG_PRINT("The num bytes consumed: %d\n",
+ ps_dec_op->u4_num_bytes_consumed);
+ return IV_SUCCESS;
+}
+
+WORD32 ih264d_get_version(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
+{
+ char version_string[MAXVERSION_STRLEN + 1];
+
+ ivd_ctl_getversioninfo_ip_t *ps_ip;
+ ivd_ctl_getversioninfo_op_t *ps_op;
+
+ ps_ip = (ivd_ctl_getversioninfo_ip_t *)pv_api_ip;
+ ps_op = (ivd_ctl_getversioninfo_op_t *)pv_api_op;
+ UNUSED(dec_hdl);
+ ps_op->u4_error_code = IV_SUCCESS;
+
+ VERSION(version_string, CODEC_NAME, CODEC_RELEASE_TYPE, CODEC_RELEASE_VER,
+ CODEC_VENDOR);
+
+ if((WORD32)ps_ip->u4_version_buffer_size <= 0)
+ {
+ ps_op->u4_error_code = IH264D_VERS_BUF_INSUFFICIENT;
+ return (IV_FAIL);
+ }
+
+ if(ps_ip->u4_version_buffer_size >= (strnlen(version_string, MAXVERSION_STRLEN) + 1)) //(WORD32)sizeof(sizeof(version_string)))
+ {
+ strncpy(ps_ip->pv_version_buffer, version_string, MAXVERSION_STRLEN);
+ ps_op->u4_error_code = IV_SUCCESS;
+ }
+ else
+ {
+ ps_op->u4_error_code = IH264D_VERS_BUF_INSUFFICIENT;
+ return IV_FAIL;
+ }
+ return (IV_SUCCESS);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_get_display_frame */
+/* */
+/* Description : */
+/* Inputs :iv_obj_t decoder handle */
+/* :pv_api_ip pointer to input structure */
+/* :pv_api_op pointer to output structure */
+/* Outputs : */
+/* Returns : void */
+/* */
+/* Issues : none */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 10 2008 100356 Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_get_display_frame(iv_obj_t *dec_hdl,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+
+ ivd_get_display_frame_ip_t *dec_disp_ip;
+ ivd_get_display_frame_op_t *dec_disp_op;
+
+ WORD32 u4_api_ret;
+ dec_struct_t * ps_dec = (dec_struct_t *)(dec_hdl->pv_codec_handle);
+
+ dec_disp_ip = (ivd_get_display_frame_ip_t *)pv_api_ip;
+ dec_disp_op = (ivd_get_display_frame_op_t *)pv_api_op;
+
+ if(ps_dec->u4_fmt_conv_in_process)
+ {
+ return IV_FAIL;
+ }
+
+ {
+
+ if(ps_dec->process_called != 1)
+ {
+ //Return Proper Error Code
+ }
+
+ if(0 == ps_dec->u4_share_disp_buf)
+ {
+ UWORD32 i;
+ if(dec_disp_ip->s_out_buffer.u4_num_bufs == 0)
+ {
+ dec_disp_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+ dec_disp_op->u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUFS;
+ return IV_FAIL;
+ }
+
+ for(i = 0; i < dec_disp_ip->s_out_buffer.u4_num_bufs; i++)
+ {
+ if(dec_disp_ip->s_out_buffer.pu1_bufs[i] == NULL)
+ {
+ dec_disp_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+ dec_disp_op->u4_error_code |= IVD_DISP_FRM_OP_BUF_NULL;
+ return IV_FAIL;
+ }
+
+ if(dec_disp_ip->s_out_buffer.u4_min_out_buf_size[i] == 0)
+ {
+ dec_disp_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+ dec_disp_op->u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUF_SIZE;
+ return IV_FAIL;
+ }
+ }
+ }
+
+ u4_api_ret = ih264d_get_next_display_field(ps_dec,
+ &(dec_disp_ip->s_out_buffer),
+ &(ps_dec->s_disp_op));
+ *dec_disp_op = (ps_dec->s_disp_op);
+ if(0 == dec_disp_op->u4_error_code)
+ {
+ ps_dec->u4_fmt_conv_cur_row = 0;
+ ps_dec->u4_fmt_conv_num_rows = ps_dec->s_disp_frame_info.u4_y_ht;
+ ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op),
+ ps_dec->u4_fmt_conv_cur_row,
+ ps_dec->u4_fmt_conv_num_rows);
+ ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows;
+
+ }
+ ih264d_release_display_field(ps_dec, dec_disp_op);
+ return u4_api_ret;
+ }
+
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_set_display_frame */
+/* */
+/* Description : */
+/* */
+/* Inputs :iv_obj_t decoder handle */
+/* :pv_api_ip pointer to input structure */
+/* :pv_api_op pointer to output structure */
+/* Outputs : */
+/* Returns : void */
+/* */
+/* Issues : none */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 10 2008 100356 Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_set_display_frame(iv_obj_t *dec_hdl,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+
+ ivd_set_display_frame_ip_t *dec_disp_ip;
+ ivd_set_display_frame_op_t *dec_disp_op;
+
+ UWORD32 i, num_mvbank_req;
+ dec_struct_t * ps_dec = (dec_struct_t *)(dec_hdl->pv_codec_handle);
+
+ dec_disp_ip = (ivd_set_display_frame_ip_t *)pv_api_ip;
+ dec_disp_op = (ivd_set_display_frame_op_t *)pv_api_op;
+ dec_disp_op->u4_error_code = 0;
+ if((NULL != ps_dec->ps_sps) && (1 == (ps_dec->ps_sps->u1_is_valid)))
+ {
+ UWORD32 level, width_mbs, height_mbs;
+
+ level = ps_dec->u4_level_at_init;
+ width_mbs = ps_dec->u2_frm_wd_in_mbs;
+ height_mbs = ps_dec->u2_frm_ht_in_mbs;
+
+ if((ps_dec->ps_sps->u1_vui_parameters_present_flag == 1)
+ && (ps_dec->ps_sps->s_vui.u4_num_reorder_frames != 64))
+ {
+ num_mvbank_req = ps_dec->ps_sps->s_vui.u4_num_reorder_frames + 2;
+ }
+ else
+ {
+ /*if VUI is not present assume maximum possible refrence frames for the level,
+ * as max reorder frames*/
+ num_mvbank_req = ih264d_get_dpb_size_new(level, width_mbs,
+ height_mbs);
+ }
+
+ num_mvbank_req += ps_dec->ps_sps->u1_num_ref_frames + 1;
+ }
+ else
+ {
+ UWORD32 num_bufs_app, num_bufs_level;
+ UWORD32 num_ref_frames, num_reorder_frames, luma_width;
+ UWORD32 luma_height, level;
+
+ num_ref_frames = ps_dec->u4_num_ref_frames_at_init;
+ num_reorder_frames = ps_dec->u4_num_reorder_frames_at_init;
+ level = ps_dec->u4_level_at_init;
+ luma_width = ps_dec->u4_width_at_init;
+ luma_height = ps_dec->u4_height_at_init;
+
+ num_bufs_app = num_ref_frames + num_reorder_frames + 1;
+
+ if(num_bufs_app <= 1)
+ num_bufs_app = 2;
+
+ num_bufs_level = ih264d_get_dpb_size_new(level, (luma_width >> 4),
+ (luma_height >> 4));
+
+ num_bufs_level = num_bufs_level * 2 + 1;
+
+ num_mvbank_req = MIN(num_bufs_level, num_bufs_app);
+
+ num_mvbank_req += ps_dec->u4_num_extra_disp_bufs_at_init;
+
+ }
+
+ ps_dec->u4_num_disp_bufs = 0;
+ if(ps_dec->u4_share_disp_buf)
+ {
+ UWORD32 u4_num_bufs = dec_disp_ip->num_disp_bufs;
+ if(u4_num_bufs > MAX_DISP_BUFS_NEW)
+ u4_num_bufs = MAX_DISP_BUFS_NEW;
+
+ u4_num_bufs = MIN(u4_num_bufs, MAX_DISP_BUFS_NEW);
+ u4_num_bufs = MIN(u4_num_bufs, num_mvbank_req);
+
+ ps_dec->u4_num_disp_bufs = u4_num_bufs;
+ for(i = 0; i < u4_num_bufs; i++)
+ {
+ ps_dec->disp_bufs[i].u4_num_bufs =
+ dec_disp_ip->s_disp_buffer[i].u4_num_bufs;
+
+ ps_dec->disp_bufs[i].buf[0] =
+ dec_disp_ip->s_disp_buffer[i].pu1_bufs[0];
+ ps_dec->disp_bufs[i].buf[1] =
+ dec_disp_ip->s_disp_buffer[i].pu1_bufs[1];
+ ps_dec->disp_bufs[i].buf[2] =
+ dec_disp_ip->s_disp_buffer[i].pu1_bufs[2];
+
+ ps_dec->disp_bufs[i].u4_bufsize[0] =
+ dec_disp_ip->s_disp_buffer[i].u4_min_out_buf_size[0];
+ ps_dec->disp_bufs[i].u4_bufsize[1] =
+ dec_disp_ip->s_disp_buffer[i].u4_min_out_buf_size[1];
+ ps_dec->disp_bufs[i].u4_bufsize[2] =
+ dec_disp_ip->s_disp_buffer[i].u4_min_out_buf_size[2];
+
+ }
+ }
+ return IV_SUCCESS;
+
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_set_flush_mode */
+/* */
+/* Description : */
+/* */
+/* Inputs :iv_obj_t decoder handle */
+/* :pv_api_ip pointer to input structure */
+/* :pv_api_op pointer to output structure */
+/* Globals : <Does it use any global variables?> */
+/* Outputs : */
+/* Returns : void */
+/* */
+/* Issues : none */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 10 2008 100356 Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_set_flush_mode(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
+{
+
+ dec_struct_t * ps_dec;
+ ivd_ctl_flush_op_t *ps_ctl_op = (ivd_ctl_flush_op_t*)pv_api_op;
+ ps_ctl_op->u4_error_code = 0;
+
+ ps_dec = (dec_struct_t *)(dec_hdl->pv_codec_handle);
+ UNUSED(pv_api_ip);
+ /* ! */
+ /* Signal flush frame control call */
+ ps_dec->u1_flushfrm = 1;
+
+ ih264d_release_pics_in_dpb((void *)ps_dec,
+ ps_dec->u1_pic_bufs);
+ ih264d_release_display_bufs(ps_dec);
+
+ ps_ctl_op->u4_error_code =
+ ((ivd_ctl_flush_op_t*)ps_dec->pv_dec_out)->u4_error_code; //verify the value
+
+ return IV_SUCCESS;
+
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_get_status */
+/* */
+/* Description : */
+/* */
+/* Inputs :iv_obj_t decoder handle */
+/* :pv_api_ip pointer to input structure */
+/* :pv_api_op pointer to output structure */
+/* Globals : <Does it use any global variables?> */
+/* Outputs : */
+/* Returns : void */
+/* */
+/* Issues : none */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 10 2008 100356 Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 ih264d_get_status(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
+{
+
+ UWORD32 i;
+ dec_struct_t * ps_dec;
+ UWORD32 pic_wd, pic_ht;
+ ivd_ctl_getstatus_op_t *ps_ctl_op = (ivd_ctl_getstatus_op_t*)pv_api_op;
+ UNUSED(pv_api_ip);
+ ps_ctl_op->u4_error_code = 0;
+
+ ps_dec = (dec_struct_t *)(dec_hdl->pv_codec_handle);
+
+ pic_wd = ps_dec->u4_width_at_init;
+ pic_ht = ps_dec->u4_height_at_init;
+
+ if((NULL != ps_dec->ps_sps) && (1 == (ps_dec->ps_sps->u1_is_valid)))
+ {
+ ps_ctl_op->u4_pic_ht = ps_dec->u2_disp_height;
+ ps_ctl_op->u4_pic_wd = ps_dec->u2_disp_width;
+
+ if(0 == ps_dec->u4_share_disp_buf)
+ {
+ pic_wd = ps_dec->u2_disp_width;
+ pic_ht = ps_dec->u2_disp_height;
+
+ }
+ else
+ {
+ pic_wd = ps_dec->u2_frm_wd_y;
+ pic_ht = ps_dec->u2_frm_ht_y;
+ }
+ }
+ else
+ {
+ ps_ctl_op->u4_pic_ht = pic_wd;
+ ps_ctl_op->u4_pic_wd = pic_ht;
+
+ if(1 == ps_dec->u4_share_disp_buf)
+ {
+ pic_wd += (PAD_LEN_Y_H << 1);
+ pic_ht += (PAD_LEN_Y_V << 2);
+
+ }
+
+ }
+
+ if(ps_dec->u4_app_disp_width > pic_wd)
+ pic_wd = ps_dec->u4_app_disp_width;
+ if(0 == ps_dec->u4_share_disp_buf)
+ ps_ctl_op->u4_num_disp_bufs = 1;
+ else
+ {
+ if((NULL != ps_dec->ps_sps) && (1 == (ps_dec->ps_sps->u1_is_valid)))
+ {
+ UWORD32 level, width_mbs, height_mbs;
+
+ level = ps_dec->u4_level_at_init;
+ width_mbs = ps_dec->u2_frm_wd_in_mbs;
+ height_mbs = ps_dec->u2_frm_ht_in_mbs;
+
+ if((ps_dec->ps_sps->u1_vui_parameters_present_flag == 1)
+ && (ps_dec->ps_sps->s_vui.u4_num_reorder_frames
+ != 64))
+ {
+ ps_ctl_op->u4_num_disp_bufs =
+ ps_dec->ps_sps->s_vui.u4_num_reorder_frames + 2;
+ }
+ else
+ {
+ /*if VUI is not present assume maximum possible refrence frames for the level,
+ * as max reorder frames*/
+ ps_ctl_op->u4_num_disp_bufs = ih264d_get_dpb_size_new(
+ level, width_mbs, height_mbs);
+ }
+
+ ps_ctl_op->u4_num_disp_bufs +=
+ ps_dec->ps_sps->u1_num_ref_frames + 1;
+ }
+ else
+ {
+ ps_ctl_op->u4_num_disp_bufs = ih264d_get_dpb_size_new(
+ ps_dec->u4_level_at_init,
+ (ps_dec->u4_width_at_init >> 4),
+ (ps_dec->u4_height_at_init >> 4));
+
+ ps_ctl_op->u4_num_disp_bufs +=
+ ps_ctl_op->u4_num_disp_bufs;
+
+ ps_ctl_op->u4_num_disp_bufs =
+ MIN(ps_ctl_op->u4_num_disp_bufs,
+ (ps_dec->u4_num_ref_frames_at_init
+ + ps_dec->u4_num_reorder_frames_at_init));
+
+ }
+
+ ps_ctl_op->u4_num_disp_bufs = MAX(
+ ps_ctl_op->u4_num_disp_bufs, 6);
+ ps_ctl_op->u4_num_disp_bufs = MIN(
+ ps_ctl_op->u4_num_disp_bufs, 32);
+ }
+
+ ps_ctl_op->u4_error_code = ps_dec->i4_error_code;
+
+ ps_ctl_op->u4_frame_rate = 0; //make it proper
+ ps_ctl_op->u4_bit_rate = 0; //make it proper
+ ps_ctl_op->e_content_type = ps_dec->i4_content_type;
+ ps_ctl_op->e_output_chroma_format = ps_dec->u1_chroma_format;
+ ps_ctl_op->u4_min_num_in_bufs = MIN_IN_BUFS;
+
+ if(ps_dec->u1_chroma_format == IV_YUV_420P)
+ {
+ ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_420;
+ }
+ else if(ps_dec->u1_chroma_format == IV_YUV_422ILE)
+ {
+ ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_422ILE;
+ }
+ else if(ps_dec->u1_chroma_format == IV_RGB_565)
+ {
+ ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_RGB565;
+ }
+ else if((ps_dec->u1_chroma_format == IV_YUV_420SP_UV)
+ || (ps_dec->u1_chroma_format == IV_YUV_420SP_VU))
+ {
+ ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_420SP;
+ }
+
+ else
+ {
+ //Invalid chroma format; Error code may be updated, verify in testing if needed
+ ps_ctl_op->u4_error_code = ERROR_FEATURE_UNAVAIL;
+ return IV_FAIL;
+ }
+
+ for(i = 0; i < ps_ctl_op->u4_min_num_in_bufs; i++)
+ {
+ ps_ctl_op->u4_min_in_buf_size[i] = MIN_IN_BUF_SIZE;
+ }
+
+ /*!*/
+ if(ps_dec->u1_chroma_format == IV_YUV_420P)
+ {
+ ps_ctl_op->u4_min_out_buf_size[0] = (pic_wd * pic_ht);
+ ps_ctl_op->u4_min_out_buf_size[1] = (pic_wd * pic_ht)
+ >> 2;
+ ps_ctl_op->u4_min_out_buf_size[2] = (pic_wd * pic_ht)
+ >> 2;
+ }
+ else if(ps_dec->u1_chroma_format == IV_YUV_422ILE)
+ {
+ ps_ctl_op->u4_min_out_buf_size[0] = (pic_wd * pic_ht)
+ * 2;
+ ps_ctl_op->u4_min_out_buf_size[1] =
+ ps_ctl_op->u4_min_out_buf_size[2] = 0;
+ }
+ else if(ps_dec->u1_chroma_format == IV_RGB_565)
+ {
+ ps_ctl_op->u4_min_out_buf_size[0] = (pic_wd * pic_ht)
+ * 2;
+ ps_ctl_op->u4_min_out_buf_size[1] =
+ ps_ctl_op->u4_min_out_buf_size[2] = 0;
+ }
+ else if((ps_dec->u1_chroma_format == IV_YUV_420SP_UV)
+ || (ps_dec->u1_chroma_format == IV_YUV_420SP_VU))
+ {
+ ps_ctl_op->u4_min_out_buf_size[0] = (pic_wd * pic_ht);
+ ps_ctl_op->u4_min_out_buf_size[1] = (pic_wd * pic_ht)
+ >> 1;
+ ps_ctl_op->u4_min_out_buf_size[2] = 0;
+ }
+
+ ps_dec->u4_num_disp_bufs_requested = ps_ctl_op->u4_num_disp_bufs;
+ return IV_SUCCESS;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_get_buf_info */
+/* */
+/* Description : */
+/* */
+/* Inputs :iv_obj_t decoder handle */
+/* :pv_api_ip pointer to input structure */
+/* :pv_api_op pointer to output structure */
+/* Globals : <Does it use any global variables?> */
+/* Outputs : */
+/* Returns : void */
+/* */
+/* Issues : none */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 10 2008 100356 Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_get_buf_info(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
+{
+
+ dec_struct_t * ps_dec;
+ UWORD8 i = 0; // Default for 420P format
+ UWORD16 pic_wd, pic_ht;
+ ivd_ctl_getbufinfo_op_t *ps_ctl_op =
+ (ivd_ctl_getbufinfo_op_t*)pv_api_op;
+ UNUSED(pv_api_ip);
+ ps_ctl_op->u4_error_code = 0;
+
+ ps_dec = (dec_struct_t *)(dec_hdl->pv_codec_handle);
+
+ ps_ctl_op->u4_min_num_in_bufs = MIN_IN_BUFS;
+ if(ps_dec->u1_chroma_format == IV_YUV_420P)
+ ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_420;
+ else if(ps_dec->u1_chroma_format == IV_YUV_422ILE)
+ ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_422ILE;
+ else if(ps_dec->u1_chroma_format == IV_RGB_565)
+ ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_RGB565;
+ else if((ps_dec->u1_chroma_format == IV_YUV_420SP_UV)
+ || (ps_dec->u1_chroma_format == IV_YUV_420SP_VU))
+ ps_ctl_op->u4_min_num_out_bufs = MIN_OUT_BUFS_420SP;
+
+ else
+ {
+ //Invalid chroma format; Error code may be updated, verify in testing if needed
+ return IV_FAIL;
+ }
+
+ ps_ctl_op->u4_num_disp_bufs = 1;
+
+ for(i = 0; i < ps_ctl_op->u4_min_num_in_bufs; i++)
+ {
+ ps_ctl_op->u4_min_in_buf_size[i] = MIN_IN_BUF_SIZE;
+ }
+
+ pic_wd = ps_dec->u4_width_at_init;
+ pic_ht = ps_dec->u4_height_at_init;
+
+ if((NULL != ps_dec->ps_sps) && (1 == (ps_dec->ps_sps->u1_is_valid)))
+ {
+
+ if(0 == ps_dec->u4_share_disp_buf)
+ {
+ pic_wd = ps_dec->u2_disp_width;
+ pic_ht = ps_dec->u2_disp_height;
+
+ }
+ else
+ {
+ pic_wd = ps_dec->u2_frm_wd_y;
+ pic_ht = ps_dec->u2_frm_ht_y;
+ }
+ }
+ else
+ {
+ if(1 == ps_dec->u4_share_disp_buf)
+ {
+ pic_wd += (PAD_LEN_Y_H << 1);
+ pic_ht += (PAD_LEN_Y_V << 2);
+
+ }
+ }
+
+ if((WORD32)ps_dec->u4_app_disp_width > pic_wd)
+ pic_wd = ps_dec->u4_app_disp_width;
+
+ if(0 == ps_dec->u4_share_disp_buf)
+ ps_ctl_op->u4_num_disp_bufs = 1;
+ else
+ {
+ if((NULL != ps_dec->ps_sps) && (1 == (ps_dec->ps_sps->u1_is_valid)))
+ {
+ UWORD32 level, width_mbs, height_mbs;
+
+ level = ps_dec->u4_level_at_init;
+ width_mbs = ps_dec->u2_frm_wd_in_mbs;
+ height_mbs = ps_dec->u2_frm_ht_in_mbs;
+
+ if((ps_dec->ps_sps->u1_vui_parameters_present_flag == 1)
+ && (ps_dec->ps_sps->s_vui.u4_num_reorder_frames
+ != 64))
+ {
+ ps_ctl_op->u4_num_disp_bufs =
+ ps_dec->ps_sps->s_vui.u4_num_reorder_frames + 2;
+ }
+ else
+ {
+ /*if VUI is not present assume maximum possible refrence frames for the level,
+ * as max reorder frames*/
+ ps_ctl_op->u4_num_disp_bufs = ih264d_get_dpb_size_new(
+ level, width_mbs, height_mbs);
+ }
+
+ ps_ctl_op->u4_num_disp_bufs +=
+ ps_dec->ps_sps->u1_num_ref_frames + 1;
+
+ }
+ else
+ {
+ ps_ctl_op->u4_num_disp_bufs = ih264d_get_dpb_size_new(
+ ps_dec->u4_level_at_init,
+ (ps_dec->u4_width_at_init >> 4),
+ (ps_dec->u4_height_at_init >> 4));
+
+ ps_ctl_op->u4_num_disp_bufs +=
+ ps_ctl_op->u4_num_disp_bufs;
+
+ ps_ctl_op->u4_num_disp_bufs =
+ MIN(ps_ctl_op->u4_num_disp_bufs,
+ (ps_dec->u4_num_ref_frames_at_init
+ + ps_dec->u4_num_reorder_frames_at_init));
+
+ }
+
+ ps_ctl_op->u4_num_disp_bufs = MAX(
+ ps_ctl_op->u4_num_disp_bufs, 6);
+ ps_ctl_op->u4_num_disp_bufs = MIN(
+ ps_ctl_op->u4_num_disp_bufs, 32);
+ }
+
+ /*!*/
+ if(ps_dec->u1_chroma_format == IV_YUV_420P)
+ {
+ ps_ctl_op->u4_min_out_buf_size[0] = (pic_wd * pic_ht);
+ ps_ctl_op->u4_min_out_buf_size[1] = (pic_wd * pic_ht)
+ >> 2;
+ ps_ctl_op->u4_min_out_buf_size[2] = (pic_wd * pic_ht)
+ >> 2;
+ }
+ else if(ps_dec->u1_chroma_format == IV_YUV_422ILE)
+ {
+ ps_ctl_op->u4_min_out_buf_size[0] = (pic_wd * pic_ht)
+ * 2;
+ ps_ctl_op->u4_min_out_buf_size[1] =
+ ps_ctl_op->u4_min_out_buf_size[2] = 0;
+ }
+ else if(ps_dec->u1_chroma_format == IV_RGB_565)
+ {
+ ps_ctl_op->u4_min_out_buf_size[0] = (pic_wd * pic_ht)
+ * 2;
+ ps_ctl_op->u4_min_out_buf_size[1] =
+ ps_ctl_op->u4_min_out_buf_size[2] = 0;
+ }
+ else if((ps_dec->u1_chroma_format == IV_YUV_420SP_UV)
+ || (ps_dec->u1_chroma_format == IV_YUV_420SP_VU))
+ {
+ ps_ctl_op->u4_min_out_buf_size[0] = (pic_wd * pic_ht);
+ ps_ctl_op->u4_min_out_buf_size[1] = (pic_wd * pic_ht)
+ >> 1;
+ ps_ctl_op->u4_min_out_buf_size[2] = 0;
+ }
+ ps_dec->u4_num_disp_bufs_requested = ps_ctl_op->u4_num_disp_bufs;
+
+ return IV_SUCCESS;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_set_params */
+/* */
+/* Description : */
+/* */
+/* Inputs :iv_obj_t decoder handle */
+/* :pv_api_ip pointer to input structure */
+/* :pv_api_op pointer to output structure */
+/* Outputs : */
+/* Returns : void */
+/* */
+/* Issues : none */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 10 2008 100356 Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_set_params(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
+{
+
+ dec_struct_t * ps_dec;
+ WORD32 ret = IV_SUCCESS;
+
+ ivd_ctl_set_config_ip_t *ps_ctl_ip =
+ (ivd_ctl_set_config_ip_t *)pv_api_ip;
+ ivd_ctl_set_config_op_t *ps_ctl_op =
+ (ivd_ctl_set_config_op_t *)pv_api_op;
+
+ ps_dec = (dec_struct_t *)(dec_hdl->pv_codec_handle);
+
+ ps_dec->u4_skip_frm_mask = 0;
+
+ ps_ctl_op->u4_error_code = 0;
+
+ ps_dec->i4_app_skip_mode = ps_ctl_ip->e_frm_skip_mode;
+
+ /*Is it really supported test it when you so the corner testing using test app*/
+
+ if(ps_ctl_ip->e_frm_skip_mode != IVD_SKIP_NONE)
+ {
+
+ if(ps_ctl_ip->e_frm_skip_mode == IVD_SKIP_P)
+ ps_dec->u4_skip_frm_mask |= 1 << P_SLC_BIT;
+ else if(ps_ctl_ip->e_frm_skip_mode == IVD_SKIP_B)
+ ps_dec->u4_skip_frm_mask |= 1 << B_SLC_BIT;
+ else if(ps_ctl_ip->e_frm_skip_mode == IVD_SKIP_PB)
+ {
+ ps_dec->u4_skip_frm_mask |= 1 << B_SLC_BIT;
+ ps_dec->u4_skip_frm_mask |= 1 << P_SLC_BIT;
+ }
+ else if(ps_ctl_ip->e_frm_skip_mode == IVD_SKIP_I)
+ ps_dec->u4_skip_frm_mask |= 1 << I_SLC_BIT;
+ else
+ {
+ //dynamic parameter not supported
+ //Put an appropriate error code to return the error..
+ //when you do the error code tests and after that remove this comment
+ ps_ctl_op->u4_error_code = (1 << IVD_UNSUPPORTEDPARAM);
+ ret = IV_FAIL;
+ }
+ }
+
+ if((0 != ps_dec->u4_app_disp_width)
+ && (ps_ctl_ip->u4_disp_wd
+ != ps_dec->u4_app_disp_width))
+ {
+ ps_ctl_op->u4_error_code |= (1 << IVD_UNSUPPORTEDPARAM);
+ ps_ctl_op->u4_error_code |= ERROR_DISP_WIDTH_INVALID;
+ ret = IV_FAIL;
+ }
+ else
+ {
+ if((ps_ctl_ip->u4_disp_wd >= ps_dec->u2_pic_wd)/* && (ps_ctl_ip->u4_disp_wd <= ps_dec->u4_width_at_init) */)
+ {
+ ps_dec->u4_app_disp_width = ps_ctl_ip->u4_disp_wd;
+ }
+ else if((0 == ps_dec->i4_header_decoded) /*&& (ps_ctl_ip->u4_disp_wd <= ps_dec->u4_width_at_init)*/)
+ {
+ ps_dec->u4_app_disp_width = ps_ctl_ip->u4_disp_wd;
+ }
+ else if(ps_ctl_ip->u4_disp_wd == 0)
+ {
+ ps_dec->u4_app_disp_width = 0;
+ }
+ else
+ {
+ /*
+ * Set the display width to zero. This will ensure that the wrong value we had stored (0xFFFFFFFF)
+ * does not propogate.
+ */
+ ps_dec->u4_app_disp_width = 0;
+ ps_ctl_op->u4_error_code |= (1 << IVD_UNSUPPORTEDPARAM);
+ ps_ctl_op->u4_error_code |= ERROR_DISP_WIDTH_INVALID;
+ ret = IV_FAIL;
+ }
+ }
+ if(ps_ctl_ip->e_vid_dec_mode == IVD_DECODE_FRAME)
+ ps_dec->i4_decode_header = 0;
+ else if(ps_ctl_ip->e_vid_dec_mode == IVD_DECODE_HEADER)
+ ps_dec->i4_decode_header = 1;
+ else
+ {
+ ps_ctl_op->u4_error_code = (1 << IVD_UNSUPPORTEDPARAM);
+ ps_dec->i4_decode_header = 1;
+ ret = IV_FAIL;
+ }
+
+ return ret;
+
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_set_default_params */
+/* */
+/* Description : */
+/* */
+/* Inputs :iv_obj_t decoder handle */
+/* :pv_api_ip pointer to input structure */
+/* :pv_api_op pointer to output structure */
+/* Outputs : */
+/* Returns : void */
+/* */
+/* Issues : none */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 08 08 2011 100421 Copied from set_params */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_set_default_params(iv_obj_t *dec_hdl,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+
+ dec_struct_t * ps_dec;
+ WORD32 ret = IV_SUCCESS;
+
+ ivd_ctl_set_config_op_t *ps_ctl_op =
+ (ivd_ctl_set_config_op_t *)pv_api_op;
+ ps_dec = (dec_struct_t *)(dec_hdl->pv_codec_handle);
+ UNUSED(pv_api_ip);
+
+
+ {
+ ps_dec->u4_app_disp_width = 0;
+ ps_dec->u4_skip_frm_mask = 0;
+ ps_dec->i4_decode_header = 1;
+
+ ps_ctl_op->u4_error_code = 0;
+ }
+
+
+ return ret;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_reset */
+/* */
+/* Description : */
+/* */
+/* Inputs :iv_obj_t decoder handle */
+/* :pv_api_ip pointer to input structure */
+/* :pv_api_op pointer to output structure */
+/* Globals : <Does it use any global variables?> */
+/* Outputs : */
+/* Returns : void */
+/* */
+/* Issues : none */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 10 2008 100356 Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_reset(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
+{
+ dec_struct_t * ps_dec;
+ ivd_ctl_reset_op_t *ps_ctl_op = (ivd_ctl_reset_op_t *)pv_api_op;
+ UNUSED(pv_api_ip);
+ ps_ctl_op->u4_error_code = 0;
+
+ ps_dec = (dec_struct_t *)(dec_hdl->pv_codec_handle);
+//CHECK
+ if(ps_dec != NULL)
+ {
+
+ ih264d_init_decoder(ps_dec);
+
+ /*
+ memset(ps_dec->disp_bufs, 0, (MAX_DISP_BUFS_NEW) * sizeof(disp_buf_t));
+ memset(ps_dec->u4_disp_buf_mapping, 0, (MAX_DISP_BUFS_NEW) * sizeof(UWORD32));
+ memset(ps_dec->u4_disp_buf_to_be_freed, 0, (MAX_DISP_BUFS_NEW) * sizeof(UWORD32));
+ */
+ }
+ else
+ {
+ H264_DEC_DEBUG_PRINT(
+ "\nReset called without Initializing the decoder\n");
+ ps_ctl_op->u4_error_code = ERROR_INIT_NOT_DONE;
+ }
+
+ return IV_SUCCESS;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_ctl */
+/* */
+/* Description : */
+/* */
+/* Inputs :iv_obj_t decoder handle */
+/* :pv_api_ip pointer to input structure */
+/* :pv_api_op pointer to output structure */
+/* Outputs : */
+/* Returns : void */
+/* */
+/* Issues : none */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 10 2008 100356 Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_ctl(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
+{
+ ivd_ctl_set_config_ip_t *ps_ctl_ip;
+ ivd_ctl_set_config_op_t *ps_ctl_op;
+ WORD32 ret = IV_SUCCESS;
+ UWORD32 subcommand;
+ dec_struct_t *ps_dec = dec_hdl->pv_codec_handle;
+
+ if(ps_dec->init_done != 1)
+ {
+ //Return proper Error Code
+ return IV_FAIL;
+ }
+ ps_ctl_ip = (ivd_ctl_set_config_ip_t*)pv_api_ip;
+ ps_ctl_op = (ivd_ctl_set_config_op_t*)pv_api_op;
+ ps_ctl_op->u4_error_code = 0;
+ subcommand = ps_ctl_ip->e_sub_cmd;
+
+ switch(subcommand)
+ {
+ case IVD_CMD_CTL_GETPARAMS:
+ ret = ih264d_get_status(dec_hdl, (void *)pv_api_ip,
+ (void *)pv_api_op);
+ break;
+ case IVD_CMD_CTL_SETPARAMS:
+ ret = ih264d_set_params(dec_hdl, (void *)pv_api_ip,
+ (void *)pv_api_op);
+ break;
+ case IVD_CMD_CTL_RESET:
+ ret = ih264d_reset(dec_hdl, (void *)pv_api_ip, (void *)pv_api_op);
+ break;
+ case IVD_CMD_CTL_SETDEFAULT:
+ ret = ih264d_set_default_params(dec_hdl, (void *)pv_api_ip,
+ (void *)pv_api_op);
+ break;
+ case IVD_CMD_CTL_FLUSH:
+ ret = ih264d_set_flush_mode(dec_hdl, (void *)pv_api_ip,
+ (void *)pv_api_op);
+ break;
+ case IVD_CMD_CTL_GETBUFINFO:
+ ret = ih264d_get_buf_info(dec_hdl, (void *)pv_api_ip,
+ (void *)pv_api_op);
+ break;
+ case IVD_CMD_CTL_GETVERSION:
+ ret = ih264d_get_version(dec_hdl, (void *)pv_api_ip,
+ (void *)pv_api_op);
+ break;
+ case IH264D_CMD_CTL_DEGRADE:
+ ret = ih264d_set_degrade(dec_hdl, (void *)pv_api_ip,
+ (void *)pv_api_op);
+ break;
+
+ case IH264D_CMD_CTL_SET_NUM_CORES:
+ ret = ih264d_set_num_cores(dec_hdl, (void *)pv_api_ip,
+ (void *)pv_api_op);
+ break;
+ case IH264D_CMD_CTL_GET_BUFFER_DIMENSIONS:
+ ret = ih264d_get_frame_dimensions(dec_hdl, (void *)pv_api_ip,
+ (void *)pv_api_op);
+ break;
+ case IH264D_CMD_CTL_SET_PROCESSOR:
+ ret = ih264d_set_processor(dec_hdl, (void *)pv_api_ip,
+ (void *)pv_api_op);
+ break;
+ default:
+ H264_DEC_DEBUG_PRINT("\ndo nothing\n")
+ ;
+ break;
+ }
+
+ return ret;
+}
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_rel_display_frame */
+/* */
+/* Description : */
+/* */
+/* Inputs :iv_obj_t decoder handle */
+/* :pv_api_ip pointer to input structure */
+/* :pv_api_op pointer to output structure */
+/* Outputs : */
+/* Returns : void */
+/* */
+/* Issues : none */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 10 2008 100356 Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_rel_display_frame(iv_obj_t *dec_hdl,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+
+ ivd_rel_display_frame_ip_t *ps_rel_ip;
+ ivd_rel_display_frame_op_t *ps_rel_op;
+ UWORD32 buf_released = 0;
+
+ UWORD32 u4_ts = -1;
+ dec_struct_t *ps_dec = dec_hdl->pv_codec_handle;
+
+ ps_rel_ip = (ivd_rel_display_frame_ip_t *)pv_api_ip;
+ ps_rel_op = (ivd_rel_display_frame_op_t *)pv_api_op;
+ ps_rel_op->u4_error_code = 0;
+ u4_ts = ps_rel_ip->u4_disp_buf_id;
+
+ if(0 == ps_dec->u4_share_disp_buf)
+ {
+ ps_dec->u4_disp_buf_mapping[u4_ts] = 0;
+ ps_dec->u4_disp_buf_to_be_freed[u4_ts] = 0;
+ return IV_SUCCESS;
+ }
+
+ if(ps_dec->pv_pic_buf_mgr != NULL)
+ {
+ if(1 == ps_dec->u4_disp_buf_mapping[u4_ts])
+ {
+ ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
+ ps_rel_ip->u4_disp_buf_id,
+ BUF_MGR_IO);
+ ps_dec->u4_disp_buf_mapping[u4_ts] = 0;
+ buf_released = 1;
+ }
+ }
+
+ if((1 == ps_dec->u4_share_disp_buf) && (0 == buf_released))
+ ps_dec->u4_disp_buf_to_be_freed[u4_ts] = 1;
+
+ return IV_SUCCESS;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * Sets degrade params
+ *
+ * @par Description:
+ * Sets degrade params.
+ * Refer to ih264d_ctl_degrade_ip_t definition for details
+ *
+ * @param[in] ps_codec_obj
+ * Pointer to codec object at API level
+ *
+ * @param[in] pv_api_ip
+ * Pointer to input argument structure
+ *
+ * @param[out] pv_api_op
+ * Pointer to output argument structure
+ *
+ * @returns Status
+ *
+ * @remarks
+ *
+ *
+ *******************************************************************************
+ */
+
+WORD32 ih264d_set_degrade(iv_obj_t *ps_codec_obj,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+ ih264d_ctl_degrade_ip_t *ps_ip;
+ ih264d_ctl_degrade_op_t *ps_op;
+ dec_struct_t *ps_codec = (dec_struct_t *)ps_codec_obj->pv_codec_handle;
+
+ ps_ip = (ih264d_ctl_degrade_ip_t *)pv_api_ip;
+ ps_op = (ih264d_ctl_degrade_op_t *)pv_api_op;
+
+ ps_codec->i4_degrade_type = ps_ip->i4_degrade_type;
+ ps_codec->i4_nondegrade_interval = ps_ip->i4_nondegrade_interval;
+ ps_codec->i4_degrade_pics = ps_ip->i4_degrade_pics;
+
+ ps_op->u4_error_code = 0;
+ ps_codec->i4_degrade_pic_cnt = 0;
+
+ return IV_SUCCESS;
+}
+
+WORD32 ih264d_get_frame_dimensions(iv_obj_t *dec_hdl,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+ ih264d_ctl_get_frame_dimensions_ip_t *ps_ip;
+ ih264d_ctl_get_frame_dimensions_op_t *ps_op;
+ dec_struct_t *ps_dec = dec_hdl->pv_codec_handle;
+ UWORD32 disp_wd, disp_ht, buffer_wd, buffer_ht, x_offset, y_offset;
+
+ ps_ip = (ih264d_ctl_get_frame_dimensions_ip_t *)pv_api_ip;
+
+ ps_op = (ih264d_ctl_get_frame_dimensions_op_t *)pv_api_op;
+ UNUSED(ps_ip);
+ if((NULL != ps_dec->ps_sps) && (1 == (ps_dec->ps_sps->u1_is_valid)))
+ {
+ disp_wd = ps_dec->u2_disp_width;
+ disp_ht = ps_dec->u2_disp_height;
+
+ if(0 == ps_dec->u4_share_disp_buf)
+ {
+ buffer_wd = disp_wd;
+ buffer_ht = disp_ht;
+ }
+ else
+ {
+ buffer_wd = ps_dec->u2_frm_wd_y;
+ buffer_ht = ps_dec->u2_frm_ht_y;
+ }
+ }
+ else
+ {
+
+ disp_wd = ps_dec->u4_width_at_init;
+ disp_ht = ps_dec->u4_height_at_init;
+
+ if(0 == ps_dec->u4_share_disp_buf)
+ {
+ buffer_wd = disp_wd;
+ buffer_ht = disp_ht;
+ }
+ else
+ {
+ buffer_wd = ALIGN16(disp_wd) + (PAD_LEN_Y_H << 1);
+ buffer_ht = ALIGN16(disp_ht) + (PAD_LEN_Y_V << 2);
+
+ }
+ }
+ if(ps_dec->u4_app_disp_width > buffer_wd)
+ buffer_wd = ps_dec->u4_app_disp_width;
+
+ if(0 == ps_dec->u4_share_disp_buf)
+ {
+ x_offset = 0;
+ y_offset = 0;
+ }
+ else
+ {
+ y_offset = (PAD_LEN_Y_V << 1);
+ x_offset = PAD_LEN_Y_H;
+
+ if((NULL != ps_dec->ps_sps) && (1 == (ps_dec->ps_sps->u1_is_valid))
+ && (0 != ps_dec->u2_crop_offset_y))
+ {
+ y_offset += ps_dec->u2_crop_offset_y / ps_dec->u2_frm_wd_y;
+ x_offset += ps_dec->u2_crop_offset_y % ps_dec->u2_frm_wd_y;
+ }
+ }
+
+ ps_op->u4_disp_wd[0] = disp_wd;
+ ps_op->u4_disp_ht[0] = disp_ht;
+ ps_op->u4_buffer_wd[0] = buffer_wd;
+ ps_op->u4_buffer_ht[0] = buffer_ht;
+ ps_op->u4_x_offset[0] = x_offset;
+ ps_op->u4_y_offset[0] = y_offset;
+
+ ps_op->u4_disp_wd[1] = ps_op->u4_disp_wd[2] = ((ps_op->u4_disp_wd[0] + 1)
+ >> 1);
+ ps_op->u4_disp_ht[1] = ps_op->u4_disp_ht[2] = ((ps_op->u4_disp_ht[0] + 1)
+ >> 1);
+ ps_op->u4_buffer_wd[1] = ps_op->u4_buffer_wd[2] = (ps_op->u4_buffer_wd[0]
+ >> 1);
+ ps_op->u4_buffer_ht[1] = ps_op->u4_buffer_ht[2] = (ps_op->u4_buffer_ht[0]
+ >> 1);
+ ps_op->u4_x_offset[1] = ps_op->u4_x_offset[2] =
+ (ps_op->u4_x_offset[0] >> 1);
+ ps_op->u4_y_offset[1] = ps_op->u4_y_offset[2] =
+ (ps_op->u4_y_offset[0] >> 1);
+
+ if((ps_dec->u1_chroma_format == IV_YUV_420SP_UV)
+ || (ps_dec->u1_chroma_format == IV_YUV_420SP_VU))
+ {
+ ps_op->u4_disp_wd[2] = 0;
+ ps_op->u4_disp_ht[2] = 0;
+ ps_op->u4_buffer_wd[2] = 0;
+ ps_op->u4_buffer_ht[2] = 0;
+ ps_op->u4_x_offset[2] = 0;
+ ps_op->u4_y_offset[2] = 0;
+
+ ps_op->u4_disp_wd[1] <<= 1;
+ ps_op->u4_buffer_wd[1] <<= 1;
+ ps_op->u4_x_offset[1] <<= 1;
+ }
+
+ return IV_SUCCESS;
+
+}
+
+WORD32 ih264d_set_num_cores(iv_obj_t *dec_hdl, void *pv_api_ip, void *pv_api_op)
+{
+ ih264d_ctl_set_num_cores_ip_t *ps_ip;
+ ih264d_ctl_set_num_cores_op_t *ps_op;
+ dec_struct_t *ps_dec = dec_hdl->pv_codec_handle;
+
+ ps_ip = (ih264d_ctl_set_num_cores_ip_t *)pv_api_ip;
+ ps_op = (ih264d_ctl_set_num_cores_op_t *)pv_api_op;
+ ps_op->u4_error_code = 0;
+ ps_dec->u4_num_cores = ps_ip->u4_num_cores;
+ if(ps_dec->u4_num_cores == 1)
+ {
+ ps_dec->u1_separate_parse = 0;
+ ps_dec->pi4_ctxt_save_register_dec = ps_dec->pi4_ctxt_save_register;
+ }
+ else
+ {
+ ps_dec->u1_separate_parse = 1;
+ }
+
+ /*using only upto three threads currently*/
+ if(ps_dec->u4_num_cores > 3)
+ ps_dec->u4_num_cores = 3;
+
+ return IV_SUCCESS;
+}
+
+void ih264d_fill_output_struct_from_context(dec_struct_t *ps_dec,
+ ivd_video_decode_op_t *ps_dec_op)
+{
+ if((ps_dec_op->u4_error_code & 0xff)
+ != ERROR_DYNAMIC_RESOLUTION_NOT_SUPPORTED)
+ {
+ ps_dec_op->u4_pic_wd = (UWORD32)ps_dec->u2_disp_width;
+ ps_dec_op->u4_pic_ht = (UWORD32)ps_dec->u2_disp_height;
+ }
+ ps_dec_op->e_pic_type = ps_dec->i4_frametype;
+
+ ps_dec_op->u4_new_seq = 0;
+ ps_dec_op->u4_output_present = ps_dec->u4_output_present;
+ ps_dec_op->u4_progressive_frame_flag =
+ ps_dec->s_disp_op.u4_progressive_frame_flag;
+
+ ps_dec_op->u4_is_ref_flag = 1;
+ if(ps_dec_op->u4_frame_decoded_flag)
+ {
+ if(ps_dec->ps_cur_slice->u1_nal_ref_idc == 0)
+ ps_dec_op->u4_is_ref_flag = 0;
+ }
+
+ ps_dec_op->e_output_format = ps_dec->s_disp_op.e_output_format;
+ ps_dec_op->s_disp_frm_buf = ps_dec->s_disp_op.s_disp_frm_buf;
+ ps_dec_op->e4_fld_type = ps_dec->s_disp_op.e4_fld_type;
+ ps_dec_op->u4_ts = ps_dec->s_disp_op.u4_ts;
+ ps_dec_op->u4_disp_buf_id = ps_dec->s_disp_op.u4_disp_buf_id;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_api_function */
+/* */
+/* Description : */
+/* */
+/* Inputs :iv_obj_t decoder handle */
+/* :pv_api_ip pointer to input structure */
+/* :pv_api_op pointer to output structure */
+/* Outputs : */
+/* Returns : void */
+/* */
+/* Issues : none */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 10 2008 100356 Draft */
+/* */
+/*****************************************************************************/
+IV_API_CALL_STATUS_T ih264d_api_function(iv_obj_t *dec_hdl,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+ UWORD32 command;
+ UWORD32 *pu2_ptr_cmd;
+ UWORD32 u4_api_ret;
+ IV_API_CALL_STATUS_T e_status;
+ e_status = api_check_struct_sanity(dec_hdl, pv_api_ip, pv_api_op);
+
+ if(e_status != IV_SUCCESS)
+ {
+ UWORD32 *ptr_err;
+
+ ptr_err = (UWORD32 *)pv_api_op;
+ UNUSED(ptr_err);
+ H264_DEC_DEBUG_PRINT("error code = %d\n", *(ptr_err + 1));
+ return IV_FAIL;
+ }
+
+ pu2_ptr_cmd = (UWORD32 *)pv_api_ip;
+ pu2_ptr_cmd++;
+
+ command = *pu2_ptr_cmd;
+// H264_DEC_DEBUG_PRINT("inside lib = %d\n",command);
+ switch(command)
+ {
+
+ case IV_CMD_GET_NUM_MEM_REC:
+ u4_api_ret = ih264d_get_num_rec((void *)pv_api_ip,
+ (void *)pv_api_op);
+
+ break;
+ case IV_CMD_FILL_NUM_MEM_REC:
+
+ u4_api_ret = ih264d_fill_num_mem_rec((void *)pv_api_ip,
+ (void *)pv_api_op);
+ break;
+ case IV_CMD_INIT:
+ u4_api_ret = ih264d_init(dec_hdl, (void *)pv_api_ip,
+ (void *)pv_api_op);
+ break;
+
+ case IVD_CMD_VIDEO_DECODE:
+ u4_api_ret = ih264d_video_decode(dec_hdl, (void *)pv_api_ip,
+ (void *)pv_api_op);
+ break;
+
+ case IVD_CMD_GET_DISPLAY_FRAME:
+ u4_api_ret = ih264d_get_display_frame(dec_hdl, (void *)pv_api_ip,
+ (void *)pv_api_op);
+
+ break;
+
+ case IVD_CMD_SET_DISPLAY_FRAME:
+ u4_api_ret = ih264d_set_display_frame(dec_hdl, (void *)pv_api_ip,
+ (void *)pv_api_op);
+
+ break;
+
+ case IVD_CMD_REL_DISPLAY_FRAME:
+ u4_api_ret = ih264d_rel_display_frame(dec_hdl, (void *)pv_api_ip,
+ (void *)pv_api_op);
+ break;
+
+ case IV_CMD_RETRIEVE_MEMREC:
+ u4_api_ret = ih264d_clr(dec_hdl, (void *)pv_api_ip,
+ (void *)pv_api_op);
+ break;
+
+ case IVD_CMD_VIDEO_CTL:
+ u4_api_ret = ih264d_ctl(dec_hdl, (void *)pv_api_ip,
+ (void *)pv_api_op);
+ break;
+ default:
+ u4_api_ret = IV_FAIL;
+ break;
+ }
+
+ return u4_api_ret;
+}
diff --git a/decoder/ih264d_bitstrm.c b/decoder/ih264d_bitstrm.c
new file mode 100755
index 0000000..fd41bc6
--- /dev/null
+++ b/decoder/ih264d_bitstrm.c
@@ -0,0 +1,181 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+ **************************************************************************
+ * \file ih264d_bitstrm.c
+ *
+ * \brief
+ * Bitstream parsing routines
+ *
+ * \date
+ * 20/11/2002
+ *
+ * \author AI
+ **************************************************************************
+ */
+
+#include <stdlib.h>
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_bitstrm.h"
+#include "ih264d_error_handler.h"
+
+#include "ih264d_debug.h"
+#include "ih264d_tables.h"
+#include "ih264d_structs.h"
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_get_bit_h264 \endif
+ *
+ * \brief
+ * Read one bit from the bitstream.
+ *
+ * This is a Bitstream processing function. It reads the
+ * bit currently pointed by the bit pointer in the
+ * buffer and advances the pointer by one. It returns
+ * the bit (0 or 1) in the form of an unsigned integer.
+ *
+ * \return
+ * Returns the next bit (0 or 1) in the bitstream.
+ *
+ **************************************************************************
+ */
+UWORD8 ih264d_get_bit_h264(dec_bit_stream_t *ps_stream)
+{
+ UWORD32 u4_code;
+
+ GETBIT(u4_code, ps_stream->u4_ofst, ps_stream->pu4_buffer);
+ return (u4_code);
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_get_bits_h264 \endif
+ *
+ * \brief
+ * Read specified number of bits from the bitstream.
+ *
+ * This is a Bitstream processing function. It reads the
+ * number specified number of bits from the current bit
+ * position and advances the bit and byte pointers
+ * appropriately.
+ *
+ * \return
+ * An unsigned 32 bit integer with its least significant bits
+ * containing the bits in order of their occurence in the bitstream.
+ *
+ **************************************************************************
+ */
+
+UWORD32 ih264d_get_bits_h264(dec_bit_stream_t *ps_bitstrm, UWORD32 u4_num_bits)
+{
+ UWORD32 u4_code = 0;
+ if(u4_num_bits)
+ GETBITS(u4_code, ps_bitstrm->u4_ofst, ps_bitstrm->pu4_buffer, u4_num_bits);
+ return (u4_code);
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_next_bits_h264 \endif
+ *
+ * \brief
+ * Peek specified number of bits from the bitstream.
+ *
+ * This is a Bitstream processing function. It gets the
+ * specified number of bits from the buffer without
+ * altering the current pointers. It is equivalent to
+ * next_bits() function in the standard.
+ *
+ * \return
+ * An unsigned 32 bit integer with its least significant bits
+ * containing the bits in order of their occurence in the bitstream.
+ **************************************************************************
+ */
+UWORD32 ih264d_next_bits_h264(dec_bit_stream_t *ps_bitstrm, UWORD32 u4_num_bits)
+{
+ UWORD32 u4_word_off = (ps_bitstrm->u4_ofst >> 5);
+ UWORD32 u4_bit_off = ps_bitstrm->u4_ofst & 0x1F;
+ UWORD32 *pu4_bitstream = ps_bitstrm->pu4_buffer;
+ UWORD32 u4_bits = pu4_bitstream[u4_word_off++] << u4_bit_off;
+
+ /*************************************************************************/
+ /* Test if number of bits to be read exceeds the number of bits in the */
+ /* current word. If yes, read from the next word of the buffer, The bits */
+ /* from both the words are concatenated to get next 32 bits in 'u4_bits' */
+ /*************************************************************************/
+ if(u4_bit_off > (INT_IN_BITS - u4_num_bits))
+ u4_bits |= (pu4_bitstream[u4_word_off] >> (INT_IN_BITS - u4_bit_off));
+
+ return ((u4_bits >> (INT_IN_BITS - u4_num_bits)));
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_flush_bits_h264 \endif
+ *
+ * \brief
+ * Flush specified number of bits from the bitstream.
+ *
+ * This function flushes the specified number of bits (marks
+ * as read) from the buffer.
+ *
+ * \return
+ * A 8 bit unsigned integer with value
+ * '1' on successful flush
+ * '0' on failure.
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_flush_bits_h264(dec_bit_stream_t *ps_bitstrm, WORD32 u4_num_bits)
+{
+ ps_bitstrm->u4_ofst += u4_num_bits;
+
+ if(ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
+ {
+ return ERROR_EOB_FLUSHBITS_T;
+ }
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_check_byte_aligned \endif
+ *
+ * \brief
+ * Checks whether the bit ps_bitstrm u4_ofst is at byte boundary.
+ *
+ * \param ps_bitstrm : Pointer to bitstream
+ *
+ * \return
+ * Returns 1 if bit ps_bitstrm u4_ofst is at byte alligned position else zero.
+ **************************************************************************
+ */
+
+UWORD8 ih264d_check_byte_aligned(dec_bit_stream_t * ps_bitstrm)
+{
+ if(ps_bitstrm->u4_ofst & 0x07)
+ return (0);
+ else
+ return (1);
+}
diff --git a/decoder/ih264d_bitstrm.h b/decoder/ih264d_bitstrm.h
new file mode 100755
index 0000000..49cd5e7
--- /dev/null
+++ b/decoder/ih264d_bitstrm.h
@@ -0,0 +1,195 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _IH264D_BITSTRM_H_
+#define _IH264D_BITSTRM_H_
+/*!
+ *************************************************************************
+ * \file ih264d_bitstrm.h
+ *
+ * \brief
+ * Contains all the declarations of bitstream reading routines
+ *
+ * \date
+ * 20/11/2002
+ *
+ * \author AI
+ *************************************************************************
+ */
+
+/* Includes */
+#include <stdio.h>
+#include <stdlib.h>
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+
+#define INT_IN_BYTES 4
+#define INT_IN_BITS 32
+
+/* Based on level 1.2 of baseline profile */
+/* 396[MAX_FS] * 128 * 1.5 [ChromaFormatParameter] / sizeof(UWORD32)
+ i.e 396 * 128 * 1.5 / 4 = 19008 */
+/* Based on level 3 of main profile */
+/* 1620[MAX_FS] * 128 * 1.5 [ChromaFormatParameter] / sizeof(UWORD32)
+ i.e 1620 * 128 * 1.5 / 4= 77760 */
+#define SIZE_OF_BUFFER 77760
+
+/* Structure for the ps_bitstrm */
+typedef struct
+{
+ UWORD32 u4_ofst; /* Offset in the buffer for the current bit */
+ UWORD32 *pu4_buffer; /* Bitstream Buffer */
+ UWORD32 u4_max_ofst; /* Position of the last bit read in the current buffer */
+ void * pv_codec_handle; /* For Error Handling */
+} dec_bit_stream_t;
+
+/* To read the next bit */
+UWORD8 ih264d_get_bit_h264(dec_bit_stream_t *);
+
+/* To read the next specified number of bits */
+UWORD32 ih264d_get_bits_h264(dec_bit_stream_t *, UWORD32);
+
+/* To see the next specified number of bits */
+UWORD32 ih264d_next_bits_h264(dec_bit_stream_t *, UWORD32);
+
+/* To flush a specified number of bits*/
+WORD32 ih264d_flush_bits_h264(dec_bit_stream_t *, WORD32);
+
+/*!
+ **************************************************************************
+ * \if Function name : MoreRbspData \endif
+ *
+ * \brief
+ * Determines whether there is more data in RBSP or not.
+ *
+ * \param ps_bitstrm : Pointer to bitstream
+ *
+ * \return
+ * Returns 1 if there is more data in RBSP before rbsp_trailing_bits().
+ * Otherwise it returns FALSE.
+ **************************************************************************
+ */
+
+#define MORE_RBSP_DATA(ps_bitstrm) \
+ (ps_bitstrm->u4_ofst < ps_bitstrm->u4_max_ofst)
+#define EXCEED_OFFSET(ps_bitstrm) \
+ (ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
+
+void GoToByteBoundary(dec_bit_stream_t * ps_bitstrm);
+UWORD8 ih264d_check_byte_aligned(dec_bit_stream_t * ps_bitstrm);
+
+/*****************************************************************************/
+/* Define a macro for inlining of GETBIT: */
+/*****************************************************************************/
+#define GETBIT(u4_code, u4_offset, pu4_bitstream) \
+{ \
+ UWORD32 *pu4_buf = (pu4_bitstream); \
+ UWORD32 u4_word_off = ((u4_offset) >> 5); \
+ UWORD32 u4_bit_off = (u4_offset) & 0x1F; \
+ u4_code = pu4_buf[u4_word_off] << u4_bit_off; \
+ (u4_offset)++; \
+ u4_code = (u4_code >> 31); \
+}
+
+
+
+/*****************************************************************************/
+/* Define a macro for inlining of GETBITS: u4_no_bits shall not exceed 32 */
+/*****************************************************************************/
+#define GETBITS(u4_code, u4_offset, pu4_bitstream, u4_no_bits) \
+{ \
+ UWORD32 *pu4_buf = (pu4_bitstream); \
+ UWORD32 u4_word_off = ((u4_offset) >> 5); \
+ UWORD32 u4_bit_off = (u4_offset) & 0x1F; \
+ u4_code = pu4_buf[u4_word_off++] << u4_bit_off; \
+ \
+ if(u4_bit_off) \
+ u4_code |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off)); \
+ u4_code = u4_code >> (INT_IN_BITS - u4_no_bits); \
+ (u4_offset) += u4_no_bits; \
+} \
+ \
+
+/*****************************************************************************/
+/* Define a macro for inlining of NEXTBITS */
+/*****************************************************************************/
+#define NEXTBITS(u4_word, u4_offset, pu4_bitstream, u4_no_bits) \
+{ \
+ UWORD32 *pu4_buf = (pu4_bitstream); \
+ UWORD32 u4_word_off = ((u4_offset) >> 5); \
+ UWORD32 u4_bit_off = (u4_offset) & 0x1F; \
+ u4_word = pu4_buf[u4_word_off++] << u4_bit_off; \
+ if(u4_bit_off) \
+ u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off)); \
+ u4_word = u4_word >> (INT_IN_BITS - u4_no_bits); \
+}
+/*****************************************************************************/
+/* Define a macro for inlining of NEXTBITS_32 */
+/*****************************************************************************/
+#define NEXTBITS_32(u4_word, u4_offset, pu4_bitstream) \
+{ \
+ UWORD32 *pu4_buf = (pu4_bitstream); \
+ UWORD32 u4_word_off = ((u4_offset) >> 5); \
+ UWORD32 u4_bit_off = (u4_offset) & 0x1F; \
+ \
+ u4_word = pu4_buf[u4_word_off++] << u4_bit_off; \
+ if(u4_bit_off) \
+ u4_word |= (pu4_buf[u4_word_off] >> (INT_IN_BITS - u4_bit_off)); \
+}
+
+
+/*****************************************************************************/
+/* Define a macro for inlining of FIND_ONE_IN_STREAM_32 */
+/*****************************************************************************/
+#define FIND_ONE_IN_STREAM_32(u4_ldz, u4_offset, pu4_bitstream) \
+{ \
+ UWORD32 u4_word; \
+ NEXTBITS_32(u4_word, u4_offset, pu4_bitstream); \
+ u4_ldz = CLZ(u4_word); \
+ (u4_offset) += (u4_ldz + 1); \
+}
+
+/*****************************************************************************/
+/* Define a macro for inlining of FIND_ONE_IN_STREAM_LEN */
+/*****************************************************************************/
+#define FIND_ONE_IN_STREAM_LEN(u4_ldz, u4_offset, pu4_bitstream, u4_len) \
+{ \
+ UWORD32 u4_word; \
+ NEXTBITS_32(u4_word, u4_offset, pu4_bitstream); \
+ u4_ldz = CLZ(u4_word); \
+ if(u4_ldz < u4_len) \
+ (u4_offset) += (u4_ldz + 1); \
+ else \
+ { \
+ u4_ldz = u4_len; \
+ (u4_offset) += u4_ldz; \
+ } \
+}
+
+/*****************************************************************************/
+/* Define a macro for inlining of FLUSHBITS */
+/*****************************************************************************/
+#define FLUSHBITS(u4_offset, u4_no_bits) \
+{ \
+ (u4_offset) += (u4_no_bits); \
+}
+
+#endif /* _BITSTREAM_H_ */
diff --git a/decoder/ih264d_cabac.c b/decoder/ih264d_cabac.c
new file mode 100755
index 0000000..38028ae
--- /dev/null
+++ b/decoder/ih264d_cabac.c
@@ -0,0 +1,779 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+ ***************************************************************************
+ * \file ih264d_cabac.c
+ *
+ * \brief
+ * This file contains Binary decoding routines.
+ *
+ * \date
+ * 04/02/2003
+ *
+ * \author NS
+ ***************************************************************************
+ */
+#include <string.h>
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_structs.h"
+#include "ih264d_cabac.h"
+#include "ih264d_bitstrm.h"
+#include "ih264d_error_handler.h"
+#include "ih264d_defs.h"
+#include "ih264d_debug.h"
+#include "ih264d_tables.h"
+#include "ih264d_parse_cabac.h"
+#include "ih264d_tables.h"
+
+
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_init_cabac_dec_envirnoment \endif
+ *
+ * \brief
+ * This function initializes CABAC decoding envirnoment. This function
+ * implements 9.3.3.2.3.1 of ISO/IEC14496-10.
+ *
+ * \return
+ * None
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_init_cabac_dec_envirnoment(decoding_envirnoment_t * ps_cab_env,
+ dec_bit_stream_t *ps_bitstrm)
+{
+ UWORD32 u4_code_int_val_ofst;
+
+ ps_cab_env->u4_code_int_range = (HALF - 2) << 23;
+ NEXTBITS(u4_code_int_val_ofst, ps_bitstrm->u4_ofst, ps_bitstrm->pu4_buffer,
+ 32);
+ FLUSHBITS(ps_bitstrm->u4_ofst, 9)
+
+ if(ps_bitstrm->u4_ofst > ps_bitstrm->u4_max_ofst)
+ return ERROR_EOB_FLUSHBITS_T;
+
+ ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
+
+ /*brief description of the design adopted for CABAC*/
+ /*according to the standard the u4_code_int_range needs to be initialized 0x 1FE(10 bits) and
+ 9 bits from the bit stream need to be read and into the u4_code_int_val_ofst.As and when the
+ u4_code_int_range becomes less than 10 bits we need to renormalize and read from the bitstream*
+
+ In the implemented design
+ initially
+
+ range_new = range <<23
+ valOffset_new = valOffset << 23 + 23 bits(read from the bit stream)
+
+ Thus we have read 23 more bits ahead of time.
+
+ It can be mathematical proved that even with the modified range and u4_ofst the operations
+ like comparison and subtraction needed for a bin decode are still valid(both in the regular case and the bypass case)
+
+ As bins are decoded..we consume the bits that we have already read into the valOffset.The clz of Range
+ gives us the number of bits we consumed of the 23 bits that we have read ahead of time.
+
+ when the number bits we have consumed exceeds 23 ,we renormalize..and we read from the bitstream again*/
+
+RESET_BIN_COUNTS(ps_cab_env)
+
+ return OK;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_init_cabac_contexts */
+/* */
+/* Description : This function initializes the cabac contexts */
+/* depending upon slice type and Init_Idc value. */
+/* Inputs : ps_dec, slice type */
+/* Globals : <Does it use any global variables?> */
+/* Outputs : */
+/* Returns : void */
+/* */
+/* Issues : none */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 03 05 2005 100153) Draft */
+/* */
+/*****************************************************************************/
+
+void ih264d_init_cabac_contexts(UWORD8 u1_slice_type, dec_struct_t * ps_dec)
+{
+
+ bin_ctxt_model_t *p_cabac_ctxt_table_t = ps_dec->p_cabac_ctxt_table_t;
+ UWORD8 u1_qp_y = ps_dec->ps_cur_slice->u1_slice_qp;
+ UWORD8 u1_cabac_init_Idc = 0;
+
+ if(I_SLICE != u1_slice_type)
+ {
+ u1_cabac_init_Idc = ps_dec->ps_cur_slice->u1_cabac_init_idc;
+ }
+
+ {
+ /* MAKING ps_dec->p_ctxt_inc_mb_map a scratch buffer */
+ /* 0th entry of CtxtIncMbMap will be always be containing default values
+ for CABAC context representing MB not available */
+ ctxt_inc_mb_info_t *p_DefCtxt = ps_dec->p_ctxt_inc_mb_map - 1;
+ UWORD8 *pu1_temp;
+ WORD8 i;
+ p_DefCtxt->u1_mb_type = CAB_SKIP;
+
+ p_DefCtxt->u1_cbp = 0x0f;
+ p_DefCtxt->u1_intra_chroma_pred_mode = 0;
+
+ p_DefCtxt->u1_yuv_dc_csbp = 0x7;
+
+ p_DefCtxt->u1_transform8x8_ctxt = 0;
+
+ pu1_temp = (UWORD8*)p_DefCtxt->i1_ref_idx;
+ for(i = 0; i < 4; i++, pu1_temp++)
+ (*pu1_temp) = 0;
+ pu1_temp = (UWORD8*)p_DefCtxt->u1_mv;
+ for(i = 0; i < 16; i++, pu1_temp++)
+ (*pu1_temp) = 0;
+ ps_dec->ps_def_ctxt_mb_info = p_DefCtxt;
+ }
+
+ if(u1_slice_type == I_SLICE)
+ {
+ u1_cabac_init_Idc = 3;
+ ps_dec->p_mb_type_t = p_cabac_ctxt_table_t + MB_TYPE_I_SLICE;
+ }
+ else if(u1_slice_type == P_SLICE)
+ {
+ ps_dec->p_mb_type_t = p_cabac_ctxt_table_t + MB_TYPE_P_SLICE;
+ ps_dec->p_mb_skip_flag_t = p_cabac_ctxt_table_t + MB_SKIP_FLAG_P_SLICE;
+ ps_dec->p_sub_mb_type_t = p_cabac_ctxt_table_t + SUB_MB_TYPE_P_SLICE;
+ }
+ else if(u1_slice_type == B_SLICE)
+ {
+ ps_dec->p_mb_type_t = p_cabac_ctxt_table_t + MB_TYPE_B_SLICE;
+ ps_dec->p_mb_skip_flag_t = p_cabac_ctxt_table_t + MB_SKIP_FLAG_B_SLICE;
+ ps_dec->p_sub_mb_type_t = p_cabac_ctxt_table_t + SUB_MB_TYPE_B_SLICE;
+ }
+ {
+ bin_ctxt_model_t *p_cabac_ctxt_table_t_tmp = p_cabac_ctxt_table_t;
+ if(ps_dec->ps_cur_slice->u1_field_pic_flag)
+ {
+ p_cabac_ctxt_table_t_tmp += SIGNIFICANT_COEFF_FLAG_FLD;
+
+ }
+ else
+ {
+ p_cabac_ctxt_table_t_tmp += SIGNIFICANT_COEFF_FLAG_FRAME;
+ }
+ {
+ bin_ctxt_model_t * * p_significant_coeff_flag_t =
+ ps_dec->p_significant_coeff_flag_t;
+ p_significant_coeff_flag_t[0] = p_cabac_ctxt_table_t_tmp
+ + SIG_COEFF_CTXT_CAT_0_OFFSET;
+ p_significant_coeff_flag_t[1] = p_cabac_ctxt_table_t_tmp
+ + SIG_COEFF_CTXT_CAT_1_OFFSET;
+ p_significant_coeff_flag_t[2] = p_cabac_ctxt_table_t_tmp
+ + SIG_COEFF_CTXT_CAT_2_OFFSET;
+ p_significant_coeff_flag_t[3] = p_cabac_ctxt_table_t_tmp
+ + SIG_COEFF_CTXT_CAT_3_OFFSET;
+ p_significant_coeff_flag_t[4] = p_cabac_ctxt_table_t_tmp
+ + SIG_COEFF_CTXT_CAT_4_OFFSET;
+
+ p_significant_coeff_flag_t[5] = p_cabac_ctxt_table_t_tmp
+ + SIG_COEFF_CTXT_CAT_5_OFFSET;
+
+ }
+ }
+
+ memcpy(p_cabac_ctxt_table_t,
+ gau1_ih264d_cabac_ctxt_init_table[u1_cabac_init_Idc][u1_qp_y],
+ NUM_CABAC_CTXTS * sizeof(bin_ctxt_model_t));
+}
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_decode_bin \endif
+ *
+ * \brief
+ * This function implements decoding process of a decision as defined
+ * in 9.3.3.2.2.
+ *
+ * \return
+ * Returns symbol decoded.
+ *
+ * \note
+ * It is specified in 9.3.3.2.3.2 that, one of the input to this function
+ * is CtxIdx. CtxIdx is used to identify state and MPS of that context
+ * (Refer Fig 9.11 - Flowchart for encoding a decision). To suffice that
+ * here we pass a pointer bin_ctxt_model_t which contains these values.
+ *
+ **************************************************************************
+ */
+
+UWORD32 ih264d_decode_bin(UWORD32 u4_ctx_inc,
+ bin_ctxt_model_t *ps_src_bin_ctxt,
+ dec_bit_stream_t *ps_bitstrm,
+ decoding_envirnoment_t *ps_cab_env)
+
+{
+
+ UWORD32 u4_qnt_int_range, u4_code_int_range, u4_code_int_val_ofst,
+ u4_int_range_lps;
+
+ UWORD32 u4_symbol, u4_mps_state;
+
+ bin_ctxt_model_t *ps_bin_ctxt;
+
+ UWORD32 table_lookup;
+ const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table;
+ UWORD32 u4_clz;
+
+ ps_bin_ctxt = ps_src_bin_ctxt + u4_ctx_inc;
+
+ u4_code_int_range = ps_cab_env->u4_code_int_range;
+ u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
+
+ u4_mps_state = (ps_bin_ctxt->u1_mps_state);
+ u4_clz = CLZ(u4_code_int_range);
+
+ u4_qnt_int_range = u4_code_int_range << u4_clz;
+ u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3;
+
+ table_lookup = pu4_table[(u4_mps_state << 2) + u4_qnt_int_range];
+ u4_int_range_lps = table_lookup & 0xff;
+
+ u4_int_range_lps = u4_int_range_lps << (23 - u4_clz);
+ u4_code_int_range = u4_code_int_range - u4_int_range_lps;
+
+ u4_symbol = ((u4_mps_state >> 6) & 0x1);
+
+ u4_mps_state = (table_lookup >> 8) & 0x7F;
+
+ CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, u4_symbol,
+ u4_int_range_lps, u4_mps_state, table_lookup)
+
+ if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8)
+ {
+ UWORD32 *pu4_buffer, u4_offset;
+
+ pu4_buffer = ps_bitstrm->pu4_buffer;
+ u4_offset = ps_bitstrm->u4_ofst;
+
+ RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset,
+ pu4_buffer)
+
+ ps_bitstrm->u4_ofst = u4_offset;
+ }
+
+ INC_BIN_COUNT(ps_cab_env)
+
+ ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
+ ps_cab_env->u4_code_int_range = u4_code_int_range;
+ ps_bin_ctxt->u1_mps_state = u4_mps_state;
+
+ return (u4_symbol);
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_decode_terminate \endif
+ *
+ * \brief
+ * This function implements decoding process of a termination as defined
+ * 9.3.3.2.2.3 of ISO/IEC14496-10.
+ *
+ * \return
+ * Returns symbol decoded.
+ *
+ * \note
+ * This routine is called while decoding "end_of_skice_flag" and of the
+ * bin indicating PCM mode in MBType.
+ *
+ **************************************************************************
+ */
+UWORD8 ih264d_decode_terminate(decoding_envirnoment_t * ps_cab_env,
+ dec_bit_stream_t * ps_stream)
+{
+ UWORD32 u4_symbol;
+ UWORD32 u4_code_int_val_ofst, u4_code_int_range;
+ UWORD32 u4_clz;
+
+ u4_code_int_range = ps_cab_env->u4_code_int_range;
+ u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
+
+ u4_clz = CLZ(u4_code_int_range);
+ u4_code_int_range -= (2 << (23 - u4_clz));
+
+ if(u4_code_int_val_ofst >= u4_code_int_range)
+ {
+ /* S=1 */
+ u4_symbol = 1;
+
+ {
+
+ /*the u4_ofst needs to be updated before termination*/
+ ps_stream->u4_ofst += u4_clz;
+
+ }
+
+ }
+ else
+ {
+ /* S=0 */
+ u4_symbol = 0;
+
+ if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8)
+ {
+ UWORD32 *pu4_buffer, u4_offset;
+
+ pu4_buffer = ps_stream->pu4_buffer;
+ u4_offset = ps_stream->u4_ofst;
+
+ RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset,
+ pu4_buffer)
+ ps_stream->u4_ofst = u4_offset;
+ }
+ }
+
+ ps_cab_env->u4_code_int_range = u4_code_int_range;
+ ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
+
+ INC_BIN_COUNT(ps_cab_env)
+
+ return (u4_symbol);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_decode_bins_tunary */
+/* */
+/* Description : This function decodes bins in the case of TUNARY */
+/* binarization technique.valid_length is assumed equal to 3 */
+/* and u1_max_bins <= 4 in this functon. */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : <Does it use any global variables?> */
+/* Processing : <Describe how the function operates - include algorithm */
+/* description> */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 20 11 2008 SH Draft */
+/* */
+/*****************************************************************************/
+
+UWORD32 ih264d_decode_bins_tunary(UWORD8 u1_max_bins,
+ UWORD32 u4_ctx_inc,
+ bin_ctxt_model_t *ps_src_bin_ctxt,
+ dec_bit_stream_t *ps_bitstrm,
+ decoding_envirnoment_t *ps_cab_env)
+
+{
+ UWORD32 u4_value;
+ UWORD32 u4_symbol;
+ UWORD8 u4_ctx_Inc;
+ bin_ctxt_model_t *ps_bin_ctxt;
+ UWORD32 u4_code_int_range, u4_code_int_val_ofst;
+ const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table;
+
+ u4_value = 0;
+
+ /*u1_max_bins has to be less than or equal to 4, u1_max_bins <= 4 for this function*/
+
+ /*here the valid length is assumed to be equal to 3 ,so the calling function is expected
+ to duplicate CtxInc if valid lenth is 2 and cmaxbin is greater than2*/
+ u4_code_int_range = ps_cab_env->u4_code_int_range;
+ u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
+
+ do
+ {
+ u4_ctx_Inc = u4_ctx_inc & 0xF;
+ u4_ctx_inc = u4_ctx_inc >> 4;
+
+ ps_bin_ctxt = ps_src_bin_ctxt + u4_ctx_Inc;
+
+ DECODE_ONE_BIN_MACRO(ps_bin_ctxt, u4_code_int_range, u4_code_int_val_ofst,
+ pu4_table, ps_bitstrm, u4_symbol)
+
+ INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env);
+
+ u4_value++;
+ }
+ while((u4_value < u1_max_bins) & (u4_symbol));
+
+ u4_value = u4_value - 1 + u4_symbol;
+
+ ps_cab_env->u4_code_int_range = u4_code_int_range;
+ ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
+
+ return (u4_value);
+
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_decode_bins */
+/* */
+/* Description : This function decodes bins in the case of MSB_FIRST_FLC */
+/* binarization technique.valid_length is always equal max_bins */
+/* for MSB_FIRST_FLC. assumes u1_max_bins <= 4 */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : <Does it use any global variables?> */
+/* Processing : <Describe how the function operates - include algorithm */
+/* description> */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 20 11 2008 SH Draft */
+/* */
+/*****************************************************************************/
+
+UWORD32 ih264d_decode_bins(UWORD8 u1_max_bins,
+ UWORD32 u4_ctx_inc,
+ bin_ctxt_model_t *ps_src_bin_ctxt,
+ dec_bit_stream_t *ps_bitstrm,
+ decoding_envirnoment_t *ps_cab_env)
+
+{
+ UWORD32 u4_value;
+ UWORD32 u4_symbol, i;
+ UWORD32 u4_ctxt_inc;
+ bin_ctxt_model_t *ps_bin_ctxt;
+ UWORD32 u4_code_int_range, u4_code_int_val_ofst;
+ const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table;
+
+ i = 0;
+
+ u4_value = 0;
+
+ /*u1_max_bins has to be less than or equal to 4, u1_max_bins <= 4 for this fucntion*/
+ u4_code_int_range = ps_cab_env->u4_code_int_range;
+ u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
+
+ do
+ {
+ u4_ctxt_inc = u4_ctx_inc & 0xf;
+ u4_ctx_inc = u4_ctx_inc >> 4;
+
+ ps_bin_ctxt = ps_src_bin_ctxt + u4_ctxt_inc;
+
+ DECODE_ONE_BIN_MACRO(ps_bin_ctxt, u4_code_int_range, u4_code_int_val_ofst,
+ pu4_table, ps_bitstrm, u4_symbol)
+
+ INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env);
+
+ u4_value = (u4_value << 1) | (u4_symbol);
+
+ i++;
+ }
+ while(i < u1_max_bins);
+
+ ps_cab_env->u4_code_int_range = u4_code_int_range;
+ ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
+
+ return (u4_value);
+
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_decode_bins_unary */
+/* */
+/* Description : This function decodes bins in the case of UNARY */
+/* binarization technique.here the valid length is taken to 5*/
+/* and cmax is always greater than 9 */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : <Does it use any global variables?> */
+/* Processing : <Describe how the function operates - include algorithm */
+/* description> */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 20 11 2008 SH Draft */
+/* */
+/*****************************************************************************/
+UWORD32 ih264d_decode_bins_unary(UWORD8 u1_max_bins,
+ UWORD32 u4_ctx_inc,
+ bin_ctxt_model_t *ps_src_bin_ctxt,
+ dec_bit_stream_t *ps_bitstrm,
+ decoding_envirnoment_t *ps_cab_env)
+{
+ UWORD32 u4_value;
+ UWORD32 u4_symbol;
+ bin_ctxt_model_t *ps_bin_ctxt;
+ UWORD32 u4_ctx_Inc;
+ UWORD32 u4_code_int_range, u4_code_int_val_ofst;
+ const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table;
+
+ /* in this function the valid length for u4_ctx_inc is always taken to be,so if the
+ the valid length is lessthan 5 the caller need to duplicate accordingly*/
+
+ /*u1_max_bins is always greater or equal to 9 we have the check for u1_max_bins only after the 2 loop*/
+ u4_value = 0;
+ u4_code_int_range = ps_cab_env->u4_code_int_range;
+ u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
+
+ do
+ {
+ u4_ctx_Inc = u4_ctx_inc & 0xf;
+ u4_ctx_inc = u4_ctx_inc >> 4;
+
+ ps_bin_ctxt = ps_src_bin_ctxt + u4_ctx_Inc;
+
+ DECODE_ONE_BIN_MACRO(ps_bin_ctxt, u4_code_int_range, u4_code_int_val_ofst,
+ pu4_table, ps_bitstrm, u4_symbol)
+
+ INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env);
+
+ u4_value++;
+
+ }
+ while(u4_symbol && u4_value < 4);
+
+ if(u4_symbol && (u4_value < u1_max_bins))
+ {
+
+ u4_ctx_Inc = u4_ctx_inc & 0xf;
+
+ ps_bin_ctxt = ps_src_bin_ctxt + u4_ctx_Inc;
+
+ do
+ {
+
+ DECODE_ONE_BIN_MACRO(ps_bin_ctxt, u4_code_int_range, u4_code_int_val_ofst,
+ pu4_table, ps_bitstrm, u4_symbol)
+
+ INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env);
+
+ u4_value++;
+
+ }
+ while(u4_symbol && (u4_value < u1_max_bins));
+
+ }
+
+ ps_cab_env->u4_code_int_range = u4_code_int_range;
+ ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
+
+ u4_value = u4_value - 1 + u4_symbol;
+
+ return (u4_value);
+
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_decode_bypass_bins_unary */
+/* */
+/* Description : This function is used in the case of UNARY coding */
+/* */
+/* */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : <Does it use any global variables?> */
+/* Processing : <Describe how the function operates - include algorithm */
+/* description> */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 13 10 2005 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+UWORD32 ih264d_decode_bypass_bins_unary(decoding_envirnoment_t *ps_cab_env,
+ dec_bit_stream_t *ps_bitstrm)
+{
+ UWORD32 u4_value;
+ UWORD32 u4_bin;
+ UWORD32 u4_code_int_val_ofst, u4_code_int_range;
+
+ UWORD32 u1_max_bins;
+
+ u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
+ u4_code_int_range = ps_cab_env->u4_code_int_range;
+
+ if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9)
+ {
+ UWORD32 *pu4_buffer, u4_offset;
+
+ pu4_buffer = ps_bitstrm->pu4_buffer;
+ u4_offset = ps_bitstrm->u4_ofst;
+
+ RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset,
+ pu4_buffer)
+ ps_bitstrm->u4_ofst = u4_offset;
+ }
+
+ /*as it is called only form mvd*/
+ u1_max_bins = 32;
+ u4_value = 0;
+
+ do
+ {
+ u4_value++;
+
+ u4_code_int_range = u4_code_int_range >> 1;
+ if(u4_code_int_val_ofst >= u4_code_int_range)
+ {
+ /* S=1 */
+ u4_bin = 1;
+ u4_code_int_val_ofst -= u4_code_int_range;
+ }
+ else
+ {
+ /* S=0 */
+ u4_bin = 0;
+ }
+
+ INC_BIN_COUNT(ps_cab_env);INC_BYPASS_BINS(ps_cab_env);
+
+ if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9)
+ {
+ UWORD32 *pu4_buffer, u4_offset;
+
+ pu4_buffer = ps_bitstrm->pu4_buffer;
+ u4_offset = ps_bitstrm->u4_ofst;
+
+ RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset,
+ pu4_buffer)
+
+ ps_bitstrm->u4_ofst = u4_offset;
+ }
+
+ }
+ while(u4_bin && (u4_value < u1_max_bins));
+
+ ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
+ ps_cab_env->u4_code_int_range = u4_code_int_range;
+ u4_value = (u4_value - 1 + u4_bin);
+
+return (u4_value);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_decode_bypass_bins */
+/* */
+/* Description : This function is used in the case of FLC coding */
+/* */
+/* */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : <Does it use any global variables?> */
+/* Processing : <Describe how the function operates - include algorithm */
+/* description> */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 13 10 2005 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+UWORD32 ih264d_decode_bypass_bins(decoding_envirnoment_t *ps_cab_env,
+ UWORD8 u1_max_bins,
+ dec_bit_stream_t *ps_bitstrm)
+{
+ UWORD32 u4_bins;
+ UWORD32 u4_bin;
+ UWORD32 u4_code_int_val_ofst, u4_code_int_range;
+
+ u4_bins = 0;
+ u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
+ u4_code_int_range = ps_cab_env->u4_code_int_range;
+
+ if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9)
+ {
+ UWORD32 *pu4_buffer, u4_offset;
+
+ pu4_buffer = ps_bitstrm->pu4_buffer;
+ u4_offset = ps_bitstrm->u4_ofst;
+
+ RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset,
+ pu4_buffer)
+ ps_bitstrm->u4_ofst = u4_offset;
+ }
+
+ do
+ {
+
+ u4_code_int_range = u4_code_int_range >> 1;
+
+ if(u4_code_int_val_ofst >= u4_code_int_range)
+ {
+ /* S=1 */
+ u4_bin = 1;
+ u4_code_int_val_ofst -= u4_code_int_range;
+ }
+ else
+ {
+ /* S=0 */
+ u4_bin = 0;
+ }
+
+ INC_BIN_COUNT(ps_cab_env);INC_BYPASS_BINS(ps_cab_env);
+
+ u4_bins = ((u4_bins << 1) | u4_bin);
+ u1_max_bins--;
+
+ if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9)
+ {
+ UWORD32 *pu4_buffer, u4_offset;
+
+ pu4_buffer = ps_bitstrm->pu4_buffer;
+ u4_offset = ps_bitstrm->u4_ofst;
+
+ RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset,
+ pu4_buffer)
+ ps_bitstrm->u4_ofst = u4_offset;
+ }
+
+ }
+ while(u1_max_bins);
+
+ ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
+ ps_cab_env->u4_code_int_range = u4_code_int_range;
+
+ return (u4_bins);
+}
+
diff --git a/decoder/ih264d_cabac.h b/decoder/ih264d_cabac.h
new file mode 100755
index 0000000..6ee3d52
--- /dev/null
+++ b/decoder/ih264d_cabac.h
@@ -0,0 +1,267 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+ ***************************************************************************
+ * \file ih264d_cabac.h
+ *
+ * \brief
+ * This file contains declarations of Binary decoding routines and tables.
+ *
+ * \date
+ * 04/02/2003
+ *
+ * \author NS
+ ***************************************************************************
+ */
+
+#ifndef _IH264D_CABAC_H_
+#define _IH264D_CABAC_H_
+
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_bitstrm.h"
+#include "ih264d_defs.h"
+
+#define B_BITS 10
+
+#define HALF (1 << (B_BITS-1))
+#define QUARTER (1 << (B_BITS-2))
+
+#define CTXT_UNUSED {0,64}
+#define NUM_MB_SKIP_CTXT 6
+#define NUM_MB_TYPE_CTXT 9
+#define NUM_SUBMB_TYPE_CTXT 7
+#define NUM_REF_IDX_CTXT 6
+#define NUM_MB_QP_DELTA 4
+#define NUM_PRED_MODE 6
+#define NUM_MB_FIELD 3
+#define NUM_CBP 12
+#define NUM_CTX_MVD 14
+
+/* Residual block cabac context parameters */
+#define NUM_CTX_CAT 6
+#define NUM_LUMA_CTX_CAT 3
+#define NUM_CTX_CODED_BLOCK 4
+/* Luma CtxSigCoeff + CtxLastCoeff = 15 + 15 = 30 */
+#define NUM_LUMA_CTX_SIG_COEF 30
+/* Chroma DC CtxSigCoeff + CtxLastCoeff = 3 + 3 = 6 */
+#define NUM_CTX_CHROMA_DC_SIG_COEF 6
+/* Chroma AC CtxSigCoeff + CtxLastCoeff = 14 + 14 = 28 */
+#define NUM_CTX_CHROMA_AC_SIG_COEF 28
+#define NUM_CTX_ABS_LEVEL 10
+
+#define LUMA_DC_CTXCAT 0
+#define LUMA_AC_CTXCAT 1
+#define LUMA_4X4_CTXCAT 2
+#define CHROMA_DC_CTXCAT 3
+#define CHROMA_AC_CTXCAT 4
+#define LUMA_8X8_CTXCAT 5
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define NUM_CABAC_CTXTS 460
+#define QP_RANGE 52
+#define NUM_CAB_INIT_IDC_PLUS_ONE 4
+#define LAST_COEFF_CTXT_MINUS_SIG_COEFF_CTXT 61
+#define LAST_COEFF_CTXT_MINUS_SIG_COEFF_CTXT_8X8 15
+
+/*bits 0 to 5 :state
+ bit 6:mps*/
+typedef struct
+{
+ UWORD8 u1_mps_state; /* state number */
+} bin_ctxt_model_t;
+
+typedef struct
+
+{
+ /* Neighbour availability Variables needed to get CtxtInc, for CABAC */
+ UWORD8 u1_mb_type; /** macroblock type: I/P/B/SI/SP */
+ UWORD8 u1_cbp; /** Coded Block Pattern */
+ UWORD8 u1_intra_chroma_pred_mode;
+
+ /*************************************************************************/
+ /* Arrangnment of DC CSBP */
+ /* bits: b7 b6 b5 b4 b3 b2 b1 b0 */
+ /* CSBP: x x x x x Vdc Udc Ydc */
+ /*************************************************************************/
+ UWORD8 u1_yuv_dc_csbp;
+ WORD8 i1_ref_idx[4];
+ UWORD8 u1_mv[4][4];
+ UWORD8 u1_transform8x8_ctxt;
+} ctxt_inc_mb_info_t;
+
+#define ONE_RIGHT_SHIFTED_BY_8 1<<8
+#define ONE_RIGHT_SHIFTED_BY_9 1<<9
+#define ONE_RIGHT_SHIFTED_BY_14 1<<14
+typedef struct
+{
+ UWORD32 u4_code_int_range;
+ UWORD32 u4_code_int_val_ofst;
+ const void *cabac_table;
+ void * pv_codec_handle; /* For Error Handling */
+} decoding_envirnoment_t;
+
+WORD32 ih264d_init_cabac_dec_envirnoment(decoding_envirnoment_t * ps_cab_env,
+ dec_bit_stream_t *ps_bitstrm);
+
+UWORD32 ih264d_decode_bin(UWORD32 u4_ctx_inc,
+ bin_ctxt_model_t *ps_bin_ctxt,
+ dec_bit_stream_t *ps_bitstrm,
+ decoding_envirnoment_t *ps_cab_env);
+UWORD8 ih264d_decode_terminate(decoding_envirnoment_t * ps_cab_env,
+ dec_bit_stream_t * ps_bitstrm);
+
+UWORD32 ih264d_decode_bins_tunary(UWORD8 u1_max_bins,
+ UWORD32 u4_ctx_inc,
+ bin_ctxt_model_t *ps_src_bin_ctxt,
+ dec_bit_stream_t *ps_bitstrm,
+ decoding_envirnoment_t *ps_cab_env);
+
+UWORD32 ih264d_decode_bins(UWORD8 u1_max_bins,
+ UWORD32 u4_ctx_inc,
+ bin_ctxt_model_t *ps_src_bin_ctxt,
+ dec_bit_stream_t *ps_bitstrm,
+ decoding_envirnoment_t *ps_cab_env);
+UWORD32 ih264d_decode_bins_unary(UWORD8 u1_max_bins,
+ UWORD32 u4_ctx_inc,
+ bin_ctxt_model_t *ps_src_bin_ctxt,
+ dec_bit_stream_t *ps_bitstrm,
+ decoding_envirnoment_t *ps_cab_env);
+
+UWORD32 ih264d_decode_bypass_bins_unary(decoding_envirnoment_t *ps_cab_env,
+ dec_bit_stream_t *ps_bitstrm);
+
+UWORD32 ih264d_decode_bypass_bins(decoding_envirnoment_t *ps_cab_env,
+ UWORD8 u1_max_bins,
+ dec_bit_stream_t *ps_bitstrm);
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Defining a macro for renormalization*/
+/*****************************************************************************/
+
+/*we renormalize every time the number bits(which are read ahead of time) we have
+ consumed in the u4_ofst exceeds 23*/
+
+#define RENORM_RANGE_OFFSET(u4_codeIntRange_m,u4_codeIntValOffset_m,u4_offset_m,pu4_buffer_m) \
+ { \
+ UWORD32 read_bits_m,u4_clz_m ; \
+ u4_clz_m = CLZ(u4_codeIntRange_m); \
+ NEXTBITS(read_bits_m,(u4_offset_m+23),pu4_buffer_m,u4_clz_m) \
+ FLUSHBITS(u4_offset_m,(u4_clz_m)) \
+ u4_codeIntRange_m = u4_codeIntRange_m << u4_clz_m; \
+ u4_codeIntValOffset_m = (u4_codeIntValOffset_m << u4_clz_m) | read_bits_m; \
+ }
+
+/*****************************************************************************/
+/* Defining a macro for checking if the symbol is MPS*/
+/*****************************************************************************/
+
+#define CHECK_IF_LPS(u4_codeIntRange_m,u4_codeIntValOffset_m,u4_symbol_m, \
+ u4_codeIntRangeLPS_m,u1_mps_state_m,table_lookup_m) \
+{ \
+ if(u4_codeIntValOffset_m >= u4_codeIntRange_m) \
+ { \
+ u4_symbol_m = 1 - u4_symbol_m; \
+ u4_codeIntValOffset_m -= u4_codeIntRange_m; \
+ u4_codeIntRange_m = u4_codeIntRangeLPS_m; \
+ u1_mps_state_m = (table_lookup_m >> 15) & 0x7F; \
+ } \
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : DECODE_ONE_BIN_MACRO \endif
+ *
+ * \brief
+ * This function implements decoding process of a decision as defined
+ * in 9.3.3.2.2.
+ *
+ * \return
+ * Returns symbol decoded.
+ *
+ * \note
+ * It is specified in 9.3.3.2.3.2 that, one of the input to this function
+ * is CtxIdx. CtxIdx is used to identify state and MPS of that context
+ * (Refer Fig 9.11 - Flowchart for encoding a decision). To suffice that
+ * here we pass a pointer bin_ctxt_model_t which contains these values.
+ *
+ **************************************************************************
+ */
+
+#define DECODE_ONE_BIN_MACRO(p_binCtxt_arg ,u4_code_int_range,u4_code_int_val_ofst, \
+ pu4_table_arg, \
+ p_DecBitStream_arg,u4_symbol) \
+{ \
+ bin_ctxt_model_t *p_binCtxt_m = (bin_ctxt_model_t *) p_binCtxt_arg; \
+ dec_bit_stream_t *p_DecBitStream_m = (dec_bit_stream_t *) p_DecBitStream_arg; \
+ const UWORD32 *pu4_table_m = (const UWORD32 *) pu4_table_arg; \
+ \
+ UWORD32 u4_quantCodeIntRange_m,u4_codeIntRangeLPS_m; \
+ UWORD32 u1_mps_state_m; \
+ UWORD32 table_lookup_m; \
+ UWORD32 u4_clz_m; \
+ \
+ u1_mps_state_m = (p_binCtxt_m->u1_mps_state); \
+ u4_clz_m = CLZ(u4_code_int_range); \
+ u4_quantCodeIntRange_m = u4_code_int_range << u4_clz_m; \
+ u4_quantCodeIntRange_m = (u4_quantCodeIntRange_m >> 29) & 0x3; \
+ table_lookup_m = pu4_table_m[(u1_mps_state_m << 2)+u4_quantCodeIntRange_m]; \
+ u4_codeIntRangeLPS_m = table_lookup_m & 0xff; \
+ \
+ u4_codeIntRangeLPS_m = u4_codeIntRangeLPS_m << (23 - u4_clz_m); \
+ u4_code_int_range = u4_code_int_range - u4_codeIntRangeLPS_m; \
+ u4_symbol = ((u1_mps_state_m>> 6) & 0x1); \
+ /*if mps*/ \
+ u1_mps_state_m = (table_lookup_m >> 8) & 0x7F; \
+ if(u4_code_int_val_ofst >= u4_code_int_range) \
+ { \
+ \
+ u4_symbol = 1 - u4_symbol; \
+ u4_code_int_val_ofst -= u4_code_int_range; \
+ u4_code_int_range = u4_codeIntRangeLPS_m; \
+ u1_mps_state_m = (table_lookup_m >> 15) & 0x7F; \
+ } \
+ if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8) \
+ { \
+ UWORD32 *pu4_buffer,u4_offset; \
+ UWORD32 read_bits,u4_clz_m ; \
+ \
+ pu4_buffer = p_DecBitStream_m->pu4_buffer; \
+ u4_offset = p_DecBitStream_m->u4_ofst; \
+ u4_clz_m = CLZ(u4_code_int_range); \
+ NEXTBITS(read_bits,(u4_offset+23),pu4_buffer,u4_clz_m) \
+ FLUSHBITS(u4_offset,(u4_clz_m)) \
+ u4_code_int_range = u4_code_int_range << u4_clz_m; \
+ u4_code_int_val_ofst= (u4_code_int_val_ofst << u4_clz_m) | read_bits; \
+ \
+ \
+ p_DecBitStream_m->u4_ofst = u4_offset; \
+ } \
+ p_binCtxt_m->u1_mps_state = u1_mps_state_m; \
+}
+
+#endif /* _IH264D_CABAC_H_ */
diff --git a/decoder/ih264d_cabac_init_tables.c b/decoder/ih264d_cabac_init_tables.c
new file mode 100755
index 0000000..2c3a55e
--- /dev/null
+++ b/decoder/ih264d_cabac_init_tables.c
@@ -0,0 +1,9273 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef _CABAC_INIT_TABLES_H_
+#define _CABAC_INIT_TABLES_H_
+
+/*****************************************************************************/
+/* */
+/* File Name : ih264d_cabac_init_tables.c */
+/* */
+/* Description : This file contains the initialized cabac context */
+/* structures for all possible values of Qp (0 - 51) */
+/* Cabac_init Idc (0 - 2) and I slice. The contexts */
+/* are initialized and stored as per tables 9-11 to */
+/* 9 -23 */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 10 01 2005 SH */
+/* */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_cabac.h"
+
+/*combined table :guc_RTAB,NextStateLPS,NextStateMPS
+ input(combined_state):
+ bits 0-5: state
+ bits 6:mps
+ output
+ bits 0-7:rangeTabLPS
+ bits 8-14 :combined_next_state_if_mps
+ bits 15 -21:combined_next_state_if_lps
+
+ */
+
+const UWORD32 gau4_ih264d_cabac_table[128][4] =
+ {
+ { 2097536, 2097584, 2097616, 2097648 },
+
+ { 640, 679, 709, 739 },
+
+ { 33664, 33694, 33723, 33752 },
+
+ { 66683, 66710, 66738, 66765 },
+
+ { 66932, 66958, 66985, 67011 },
+
+ { 132719, 132743, 132768, 132793 },
+
+ { 132969, 132992, 133016, 133039 },
+
+ { 165988, 166010, 166032, 166054 },
+
+ { 199007, 199028, 199049, 199070 },
+
+ { 232026, 232046, 232066, 232086 },
+
+ { 265045, 265064, 265083, 265102 },
+
+ { 298065, 298083, 298101, 298119 },
+
+ { 298317, 298334, 298351, 298368 },
+
+ { 364105, 364121, 364137, 364154 },
+
+ { 364357, 364373, 364388, 364404 },
+
+ { 397378, 397392, 397407, 397422 },
+
+ { 430398, 430412, 430426, 430440 },
+
+ { 430651, 430664, 430678, 430691 },
+
+ { 496440, 496453, 496465, 496478 },
+
+ { 496693, 496705, 496717, 496729 },
+
+ { 529715, 529726, 529737, 529749 },
+
+ { 529968, 529979, 529989, 530000 },
+
+ { 595758, 595768, 595778, 595788 },
+
+ { 596011, 596021, 596031, 596040 },
+
+ { 629033, 629042, 629051, 629061 },
+
+ { 629287, 629296, 629304, 629313 },
+
+ { 695077, 695085, 695094, 695102 },
+
+ { 695331, 695339, 695347, 695355 },
+
+ { 728353, 728361, 728368, 728376 },
+
+ { 728608, 728615, 728622, 728629 },
+
+ { 761630, 761637, 761643, 761650 },
+
+ { 794653, 794659, 794665, 794672 },
+
+ { 794907, 794913, 794919, 794925 },
+
+ { 827930, 827935, 827941, 827947 },
+
+ { 860952, 860958, 860963, 860969 },
+
+ { 861207, 861212, 861217, 861223 },
+
+ { 894230, 894235, 894240, 894245 },
+
+ { 894485, 894490, 894494, 894499 },
+
+ { 927508, 927512, 927517, 927521 },
+
+ { 960531, 960535, 960539, 960543 },
+
+ { 960786, 960790, 960794, 960798 },
+
+ { 993809, 993813, 993817, 993820 },
+
+ { 994064, 994068, 994071, 994075 },
+
+ { 994319, 994323, 994326, 994329 },
+
+ { 1027342, 1027346, 1027349, 1027352 },
+
+ { 1060366, 1060369, 1060372, 1060375 },
+
+ { 1060621, 1060624, 1060627, 1060630 },
+
+ { 1093644, 1093647, 1093650, 1093653 },
+
+ { 1093900, 1093902, 1093905, 1093908 },
+
+ { 1094155, 1094158, 1094160, 1094163 },
+
+ { 1127179, 1127181, 1127183, 1127186 },
+
+ { 1127434, 1127436, 1127439, 1127441 },
+
+ { 1160458, 1160460, 1160462, 1160464 },
+
+ { 1160713, 1160715, 1160717, 1160719 },
+
+ { 1160969, 1160971, 1160972, 1160974 },
+
+ { 1193992, 1193994, 1193996, 1193998 },
+
+ { 1194248, 1194249, 1194251, 1194253 },
+
+ { 1194503, 1194505, 1194507, 1194508 },
+
+ { 1227527, 1227529, 1227530, 1227532 },
+
+ { 1227783, 1227784, 1227786, 1227787 },
+
+ { 1228038, 1228040, 1228041, 1228043 },
+
+ { 1261062, 1261063, 1261065, 1261066 },
+
+ { 1261062, 1261063, 1261064, 1261065 },
+
+ { 2080514, 2080514, 2080514, 2080514 },
+
+ { 16768, 16816, 16848, 16880 },
+
+ { 2114176, 2114215, 2114245, 2114275 },
+
+ { 2147200, 2147230, 2147259, 2147288 },
+
+ { 2180219, 2180246, 2180274, 2180301 },
+
+ { 2180468, 2180494, 2180521, 2180547 },
+
+ { 2246255, 2246279, 2246304, 2246329 },
+
+ { 2246505, 2246528, 2246552, 2246575 },
+
+ { 2279524, 2279546, 2279568, 2279590 },
+
+ { 2312543, 2312564, 2312585, 2312606 },
+
+ { 2345562, 2345582, 2345602, 2345622 },
+
+ { 2378581, 2378600, 2378619, 2378638 },
+
+ { 2411601, 2411619, 2411637, 2411655 },
+
+ { 2411853, 2411870, 2411887, 2411904 },
+
+ { 2477641, 2477657, 2477673, 2477690 },
+
+ { 2477893, 2477909, 2477924, 2477940 },
+
+ { 2510914, 2510928, 2510943, 2510958 },
+
+ { 2543934, 2543948, 2543962, 2543976 },
+
+ { 2544187, 2544200, 2544214, 2544227 },
+
+ { 2609976, 2609989, 2610001, 2610014 },
+
+ { 2610229, 2610241, 2610253, 2610265 },
+
+ { 2643251, 2643262, 2643273, 2643285 },
+
+ { 2643504, 2643515, 2643525, 2643536 },
+
+ { 2709294, 2709304, 2709314, 2709324 },
+
+ { 2709547, 2709557, 2709567, 2709576 },
+
+ { 2742569, 2742578, 2742587, 2742597 },
+
+ { 2742823, 2742832, 2742840, 2742849 },
+
+ { 2808613, 2808621, 2808630, 2808638 },
+
+ { 2808867, 2808875, 2808883, 2808891 },
+
+ { 2841889, 2841897, 2841904, 2841912 },
+
+ { 2842144, 2842151, 2842158, 2842165 },
+
+ { 2875166, 2875173, 2875179, 2875186 },
+
+ { 2908189, 2908195, 2908201, 2908208 },
+
+ { 2908443, 2908449, 2908455, 2908461 },
+
+ { 2941466, 2941471, 2941477, 2941483 },
+
+ { 2974488, 2974494, 2974499, 2974505 },
+
+ { 2974743, 2974748, 2974753, 2974759 },
+
+ { 3007766, 3007771, 3007776, 3007781 },
+
+ { 3008021, 3008026, 3008030, 3008035 },
+
+ { 3041044, 3041048, 3041053, 3041057 },
+
+ { 3074067, 3074071, 3074075, 3074079 },
+
+ { 3074322, 3074326, 3074330, 3074334 },
+
+ { 3107345, 3107349, 3107353, 3107356 },
+
+ { 3107600, 3107604, 3107607, 3107611 },
+
+ { 3107855, 3107859, 3107862, 3107865 },
+
+ { 3140878, 3140882, 3140885, 3140888 },
+
+ { 3173902, 3173905, 3173908, 3173911 },
+
+ { 3174157, 3174160, 3174163, 3174166 },
+
+ { 3207180, 3207183, 3207186, 3207189 },
+
+ { 3207436, 3207438, 3207441, 3207444 },
+
+ { 3207691, 3207694, 3207696, 3207699 },
+
+ { 3240715, 3240717, 3240719, 3240722 },
+
+ { 3240970, 3240972, 3240975, 3240977 },
+
+ { 3273994, 3273996, 3273998, 3274000 },
+
+ { 3274249, 3274251, 3274253, 3274255 },
+
+ { 3274505, 3274507, 3274508, 3274510 },
+
+ { 3307528, 3307530, 3307532, 3307534 },
+
+ { 3307784, 3307785, 3307787, 3307789 },
+
+ { 3308039, 3308041, 3308043, 3308044 },
+
+ { 3341063, 3341065, 3341066, 3341068 },
+
+ { 3341319, 3341320, 3341322, 3341323 },
+
+ { 3341574, 3341576, 3341577, 3341579 },
+
+ { 3374598, 3374599, 3374601, 3374602 },
+
+ { 3374598, 3374599, 3374600, 3374601 },
+
+ { 4194050, 4194050, 4194050, 4194050 },
+
+ };
+
+/*****************************************************************************/
+/* Global Variable Initialization */
+/*****************************************************************************/
+const UWORD8 gau1_ih264d_cabac_ctxt_init_table[NUM_CAB_INIT_IDC_PLUS_ONE][QP_RANGE][NUM_CABAC_CTXTS] =
+
+ {
+
+ {
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 126, 104, 10, 9, 12, 30, 61, 62,
+ 54, 14, 118, 6, 78, 65, 1, 14, 73, 13, 64, 20, 62,
+ 67, 90, 104, 126, 104, 67, 78, 65, 1, 86, 95, 2,
+ 18, 69, 81, 96, 8, 67, 86, 88, 5, 76, 94, 9, 69,
+ 81, 88, 67, 74, 74, 80, 72, 5, 22, 0, 0, 0, 83,
+ 86, 97, 72, 22, 1, 18, 78, 96, 126, 98, 101, 67,
+ 82, 94, 83, 110, 91, 102, 93, 126, 92, 89, 96,
+ 108, 17, 65, 6, 93, 74, 92, 87, 126, 9, 3, 4, 69,
+ 15, 68, 69, 88, 85, 78, 75, 77, 9, 13, 68, 13, 21,
+ 81, 0, 70, 67, 6, 76, 28, 64, 2, 28, 38, 39, 34,
+ 27, 93, 73, 73, 17, 14, 100, 10, 10, 10, 2, 7, 7,
+ 0, 3, 1, 6, 69, 6, 24, 12, 68, 64, 2, 0, 13, 24,
+ 19, 11, 15, 3, 4, 4, 30, 19, 20, 78, 3, 69, 35,
+ 23, 19, 14, 17, 19, 12, 16, 24, 1, 17, 9, 9, 5, 0,
+ 12, 6, 10, 11, 8, 18, 27, 10, 82, 8, 78, 17, 32,
+ 84, 56, 62, 60, 59, 62, 62, 57, 57, 54, 44, 36,
+ 33, 43, 29, 70, 67, 4, 67, 33, 31, 28, 34, 32, 25,
+ 20, 22, 0, 4, 64, 94, 89, 108, 76, 19, 18, 11, 64,
+ 4, 70, 75, 82, 102, 77, 39, 21, 15, 8, 4, 71, 83,
+ 87, 119, 5, 34, 27, 25, 20, 8, 5, 64, 74, 90, 70,
+ 34, 32, 21, 4, 5, 72, 81, 97, 5, 58, 49, 45, 36,
+ 23, 5, 70, 79, 85, 62, 106, 106, 87, 114, 110, 98,
+ 110, 106, 103, 107, 108, 112, 96, 95, 91, 93, 94,
+ 86, 67, 80, 85, 70, 3, 5, 2, 13, 13, 14, 9, 22,
+ 17, 12, 14, 11, 22, 16, 8, 22, 19, 13, 10, 14, 0,
+ 64, 69, 4, 70, 19, 32, 20, 10, 29, 25, 11, 23, 31,
+ 19, 25, 13, 6, 20, 52, 49, 52, 52, 54, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 34, 62, 62, 62, 62, 62,
+ 62, 54, 37, 36, 6, 82, 75, 97, 125, 62, 62, 62,
+ 57, 55, 53, 41, 44, 31, 32, 22, 19, 16, 65, 71, 3,
+ 0, 65, 39, 43, 40, 31, 40, 39, 23, 31, 34, 21, 6,
+ 10, 2, 86, 23, 12, 4, 79, 71, 69, 70, 66, 68, 73,
+ 69, 70, 67, 1, 70, 66, 65, 0, 62, 62, 62, 62, 62,
+ 60, 54, 36, 4, 66, 28, 21, 18, 15, 7, 3, 1, 66,
+ 76, 85, 81, 77, 81, 80, 73, 74, 83, 71, 67, 2, 66,
+ 66, 4, 4, 62, 62, 62, 62, 61, 57, 46, 29, 1 },
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 125, 102, 11, 10, 12, 29, 60,
+ 62, 54, 14, 115, 6, 77, 64, 1, 14, 72, 12, 65,
+ 20, 62, 68, 91, 104, 124, 102, 67, 77, 64, 1,
+ 85, 93, 3, 18, 68, 80, 95, 8, 67, 85, 88, 5, 75,
+ 93, 9, 69, 80, 88, 66, 73, 73, 79, 71, 5, 22, 0,
+ 0, 0, 82, 86, 97, 71, 22, 1, 18, 77, 95, 124,
+ 96, 99, 65, 80, 92, 82, 108, 89, 100, 92, 125,
+ 91, 88, 95, 107, 18, 64, 7, 92, 73, 91, 86, 124,
+ 9, 3, 4, 69, 16, 68, 68, 87, 84, 77, 74, 76, 9,
+ 13, 67, 13, 21, 80, 0, 69, 67, 6, 75, 28, 64, 2,
+ 28, 37, 39, 34, 27, 92, 72, 72, 17, 14, 99, 10,
+ 10, 10, 3, 7, 7, 1, 4, 2, 6, 68, 6, 24, 12, 68,
+ 64, 2, 0, 13, 23, 19, 11, 15, 4, 5, 4, 29, 19,
+ 20, 77, 3, 69, 35, 23, 19, 14, 17, 19, 12, 16,
+ 24, 1, 17, 9, 9, 5, 0, 12, 6, 10, 11, 8, 18, 27,
+ 10, 81, 8, 77, 17, 31, 83, 55, 62, 59, 58, 61,
+ 62, 56, 56, 52, 43, 35, 32, 41, 28, 71, 67, 4,
+ 67, 32, 30, 27, 33, 31, 24, 19, 21, 0, 4, 64,
+ 93, 88, 107, 75, 20, 18, 11, 0, 5, 69, 74, 81,
+ 100, 76, 39, 21, 15, 8, 5, 70, 82, 86, 117, 5,
+ 35, 28, 25, 20, 9, 5, 64, 73, 89, 70, 35, 32,
+ 21, 4, 6, 71, 80, 96, 5, 58, 49, 45, 36, 23, 5,
+ 69, 78, 84, 62, 105, 105, 86, 112, 108, 97, 108,
+ 104, 101, 105, 106, 110, 95, 94, 90, 92, 92, 85,
+ 67, 79, 84, 69, 3, 5, 2, 13, 13, 13, 8, 22, 17,
+ 13, 14, 11, 22, 16, 8, 22, 19, 13, 10, 14, 0,
+ 64, 68, 5, 70, 19, 32, 20, 10, 29, 25, 12, 23,
+ 30, 19, 25, 13, 6, 19, 52, 49, 52, 51, 53, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 33, 62, 62, 62,
+ 62, 62, 62, 53, 36, 35, 6, 81, 74, 95, 122, 62,
+ 62, 62, 56, 53, 52, 40, 42, 30, 31, 21, 18, 15,
+ 66, 71, 3, 0, 66, 38, 42, 39, 30, 39, 38, 22,
+ 30, 33, 20, 5, 9, 1, 86, 23, 12, 4, 78, 70, 68,
+ 69, 65, 67, 71, 68, 69, 66, 3, 68, 65, 0, 2, 62,
+ 62, 62, 62, 62, 58, 51, 34, 2, 65, 29, 22, 19,
+ 16, 8, 4, 2, 65, 75, 84, 80, 76, 80, 78, 71, 73,
+ 82, 70, 66, 3, 65, 65, 4, 4, 62, 62, 62, 62, 58,
+ 54, 43, 26, 64 },
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 123, 101, 11, 10, 12, 28, 59,
+ 61, 54, 14, 113, 6, 76, 0, 1, 13, 72, 11, 66,
+ 19, 60, 70, 92, 105, 121, 101, 67, 76, 0, 1, 85,
+ 92, 3, 17, 68, 80, 94, 8, 67, 85, 88, 5, 75, 92,
+ 9, 69, 80, 88, 66, 73, 73, 79, 71, 5, 22, 0, 0,
+ 0, 81, 86, 97, 71, 21, 1, 18, 77, 95, 122, 94,
+ 97, 64, 78, 91, 81, 107, 88, 99, 91, 123, 91,
+ 88, 95, 106, 18, 64, 7, 91, 73, 90, 86, 123, 9,
+ 3, 4, 69, 16, 68, 68, 87, 84, 77, 74, 76, 9, 13,
+ 67, 13, 21, 80, 0, 69, 67, 6, 75, 27, 64, 2, 27,
+ 36, 38, 33, 26, 91, 72, 72, 16, 13, 99, 9, 10,
+ 10, 3, 7, 7, 2, 4, 2, 6, 68, 6, 23, 12, 69, 64,
+ 2, 64, 13, 22, 19, 11, 14, 4, 5, 4, 28, 19, 19,
+ 77, 3, 70, 34, 23, 19, 14, 17, 19, 12, 16, 24,
+ 1, 17, 9, 9, 5, 0, 12, 6, 10, 11, 8, 17, 26, 9,
+ 81, 8, 77, 16, 30, 83, 53, 62, 57, 56, 59, 60,
+ 54, 54, 50, 41, 33, 30, 39, 26, 72, 67, 4, 68,
+ 31, 29, 26, 32, 29, 23, 18, 20, 64, 3, 65, 93,
+ 88, 106, 75, 20, 18, 11, 0, 5, 69, 74, 81, 99,
+ 75, 39, 21, 15, 8, 5, 70, 81, 85, 115, 5, 35,
+ 28, 25, 20, 9, 5, 64, 73, 88, 70, 35, 32, 21, 4,
+ 6, 71, 80, 95, 5, 57, 48, 44, 35, 23, 5, 69, 78,
+ 84, 62, 104, 104, 85, 111, 107, 96, 107, 103,
+ 100, 104, 105, 108, 94, 93, 90, 91, 91, 85, 68,
+ 79, 83, 69, 3, 4, 2, 12, 12, 12, 7, 21, 17, 13,
+ 14, 10, 21, 16, 8, 21, 18, 13, 10, 13, 0, 64,
+ 68, 5, 70, 18, 31, 19, 10, 28, 24, 12, 22, 29,
+ 19, 25, 12, 5, 17, 51, 48, 51, 50, 52, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 32, 62, 62, 62, 62,
+ 62, 62, 51, 35, 34, 6, 80, 74, 94, 120, 60, 60,
+ 62, 54, 51, 50, 38, 40, 29, 29, 20, 16, 14, 67,
+ 72, 2, 0, 67, 37, 41, 37, 28, 37, 36, 21, 28,
+ 31, 19, 4, 8, 0, 87, 22, 11, 3, 78, 70, 68, 68,
+ 65, 66, 70, 67, 68, 65, 4, 67, 64, 1, 3, 62, 62,
+ 62, 62, 60, 55, 48, 31, 0, 65, 29, 22, 19, 16,
+ 9, 4, 2, 65, 75, 84, 80, 75, 80, 77, 70, 73, 81,
+ 69, 65, 3, 65, 64, 4, 4, 62, 62, 62, 60, 55, 50,
+ 39, 23, 67 },
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 121, 99, 12, 10, 11, 26, 57,
+ 60, 54, 14, 111, 6, 75, 1, 1, 12, 72, 10, 67,
+ 19, 58, 71, 93, 105, 118, 100, 67, 75, 1, 1, 84,
+ 91, 4, 17, 68, 79, 93, 7, 68, 85, 88, 5, 75, 92,
+ 9, 69, 80, 88, 65, 73, 73, 79, 70, 5, 22, 0, 0,
+ 0, 81, 86, 97, 70, 20, 1, 18, 77, 95, 120, 92,
+ 96, 1, 76, 90, 80, 105, 87, 98, 90, 121, 90, 88,
+ 94, 105, 18, 64, 7, 91, 73, 90, 85, 121, 9, 2,
+ 3, 70, 16, 68, 68, 86, 84, 76, 74, 75, 9, 13,
+ 67, 13, 20, 80, 0, 69, 67, 6, 75, 26, 64, 2, 26,
+ 35, 37, 32, 25, 91, 71, 72, 15, 13, 98, 9, 10,
+ 10, 3, 7, 7, 3, 4, 2, 6, 67, 6, 22, 12, 70, 64,
+ 2, 64, 12, 21, 19, 11, 13, 4, 5, 4, 26, 19, 18,
+ 77, 3, 70, 33, 23, 19, 14, 17, 19, 12, 16, 24,
+ 1, 16, 9, 9, 5, 0, 11, 5, 9, 10, 7, 16, 25, 9,
+ 81, 7, 77, 15, 28, 83, 52, 62, 55, 54, 57, 58,
+ 52, 52, 48, 39, 32, 29, 37, 24, 73, 67, 4, 68,
+ 30, 28, 25, 30, 28, 21, 17, 19, 65, 3, 65, 93,
+ 88, 106, 74, 20, 18, 11, 0, 5, 69, 74, 80, 98,
+ 75, 39, 21, 15, 8, 6, 69, 80, 84, 113, 5, 35,
+ 28, 25, 20, 10, 5, 64, 73, 88, 70, 35, 32, 20,
+ 4, 6, 71, 80, 94, 5, 57, 48, 43, 34, 23, 5, 69,
+ 77, 83, 62, 103, 103, 85, 110, 106, 95, 105,
+ 102, 99, 103, 103, 107, 94, 92, 90, 91, 89, 85,
+ 68, 79, 83, 69, 2, 4, 2, 11, 11, 11, 6, 21, 16,
+ 13, 13, 10, 21, 15, 8, 20, 18, 12, 10, 12, 0,
+ 65, 68, 5, 71, 18, 31, 18, 10, 27, 24, 12, 21,
+ 28, 18, 24, 11, 5, 16, 50, 47, 51, 49, 51, 61,
+ 62, 62, 62, 62, 62, 62, 62, 62, 31, 62, 62, 62,
+ 62, 62, 62, 49, 34, 33, 6, 79, 74, 93, 118, 58,
+ 58, 62, 52, 49, 48, 37, 38, 27, 28, 19, 15, 12,
+ 68, 73, 2, 64, 68, 36, 39, 36, 26, 35, 34, 19,
+ 27, 29, 17, 3, 6, 65, 88, 21, 10, 2, 78, 69, 68,
+ 68, 64, 66, 69, 66, 67, 64, 5, 66, 0, 3, 4, 62,
+ 62, 62, 62, 58, 52, 45, 28, 65, 64, 30, 23, 20,
+ 16, 10, 5, 2, 64, 74, 84, 79, 75, 79, 76, 69,
+ 73, 81, 69, 65, 3, 64, 0, 4, 4, 62, 62, 62, 57,
+ 52, 46, 35, 19, 69 },
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 120, 98, 12, 10, 11, 25, 56,
+ 58, 54, 14, 108, 5, 74, 1, 1, 11, 72, 9, 68, 18,
+ 56, 73, 94, 106, 115, 99, 67, 74, 1, 1, 84, 90,
+ 4, 16, 68, 79, 93, 7, 68, 84, 88, 5, 75, 91, 8,
+ 70, 80, 88, 65, 72, 73, 78, 70, 5, 22, 0, 0, 0,
+ 80, 87, 97, 70, 19, 1, 18, 77, 95, 119, 91, 94,
+ 2, 75, 89, 79, 104, 85, 97, 89, 119, 90, 87, 94,
+ 104, 18, 64, 7, 90, 73, 89, 85, 120, 8, 2, 3,
+ 70, 16, 68, 68, 86, 84, 76, 74, 75, 9, 12, 67,
+ 13, 20, 80, 0, 69, 67, 6, 75, 26, 65, 2, 26, 34,
+ 36, 31, 24, 90, 71, 72, 14, 12, 98, 8, 10, 9, 3,
+ 7, 7, 4, 5, 2, 5, 67, 5, 21, 11, 71, 64, 2, 65,
+ 12, 20, 18, 10, 13, 5, 5, 4, 25, 18, 17, 77, 3,
+ 71, 33, 23, 19, 14, 17, 19, 12, 16, 23, 1, 16,
+ 9, 9, 5, 64, 11, 5, 9, 10, 7, 16, 24, 8, 81, 7,
+ 77, 14, 27, 83, 50, 62, 53, 52, 55, 56, 50, 50,
+ 46, 37, 30, 27, 34, 22, 74, 67, 3, 69, 29, 27,
+ 24, 29, 26, 20, 16, 17, 65, 2, 66, 93, 88, 105,
+ 74, 20, 18, 11, 0, 5, 69, 74, 80, 97, 74, 39,
+ 21, 15, 8, 6, 69, 80, 84, 111, 5, 35, 28, 25,
+ 20, 10, 5, 64, 73, 87, 70, 35, 31, 20, 4, 6, 71,
+ 80, 94, 5, 56, 47, 42, 33, 23, 5, 69, 77, 83,
+ 62, 102, 102, 84, 108, 105, 94, 104, 100, 98,
+ 101, 102, 105, 93, 92, 89, 90, 88, 84, 69, 79,
+ 82, 69, 2, 3, 1, 10, 10, 10, 5, 20, 16, 13, 13,
+ 9, 20, 15, 8, 19, 17, 12, 9, 11, 64, 65, 68, 5,
+ 71, 17, 30, 17, 10, 26, 23, 12, 20, 27, 18, 24,
+ 10, 4, 14, 49, 47, 50, 48, 49, 60, 62, 62, 62,
+ 62, 62, 62, 62, 62, 29, 62, 62, 62, 62, 62, 62,
+ 47, 33, 31, 6, 78, 73, 92, 116, 57, 56, 60, 51,
+ 47, 46, 35, 36, 26, 26, 17, 13, 11, 69, 74, 1,
+ 64, 69, 34, 38, 34, 25, 33, 32, 18, 25, 27, 16,
+ 2, 5, 66, 88, 20, 10, 1, 78, 69, 67, 67, 64, 65,
+ 68, 66, 66, 0, 6, 65, 1, 4, 5, 62, 62, 62, 61,
+ 55, 49, 42, 25, 68, 64, 30, 23, 20, 17, 10, 5,
+ 3, 64, 74, 83, 79, 74, 79, 75, 68, 73, 80, 68,
+ 64, 3, 64, 1, 4, 4, 62, 62, 61, 54, 49, 42, 31,
+ 16, 72 },
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 118, 96, 12, 10, 10, 23, 54,
+ 57, 54, 14, 106, 5, 73, 2, 1, 11, 71, 8, 69, 18,
+ 54, 75, 95, 106, 112, 97, 67, 73, 2, 1, 84, 89,
+ 4, 16, 68, 79, 92, 7, 69, 84, 88, 5, 75, 90, 8,
+ 70, 80, 88, 64, 72, 72, 78, 69, 5, 22, 0, 0, 0,
+ 80, 87, 97, 69, 18, 1, 18, 76, 95, 117, 89, 93,
+ 4, 73, 87, 78, 103, 84, 96, 88, 117, 89, 87, 93,
+ 103, 18, 64, 7, 90, 73, 89, 84, 118, 8, 2, 3,
+ 70, 16, 68, 67, 85, 84, 76, 74, 74, 9, 12, 67,
+ 13, 20, 79, 0, 68, 67, 6, 75, 25, 65, 2, 25, 33,
+ 36, 30, 23, 89, 70, 72, 13, 12, 97, 8, 10, 9, 3,
+ 7, 7, 5, 5, 2, 5, 67, 5, 20, 11, 72, 64, 2, 65,
+ 11, 19, 18, 10, 12, 5, 5, 4, 24, 18, 16, 77, 3,
+ 71, 32, 23, 19, 14, 17, 19, 12, 16, 23, 1, 16,
+ 9, 9, 5, 64, 11, 5, 8, 10, 7, 15, 23, 8, 81, 6,
+ 77, 13, 26, 83, 49, 61, 52, 51, 53, 54, 48, 48,
+ 44, 35, 28, 25, 32, 21, 75, 67, 3, 69, 28, 26,
+ 23, 28, 25, 18, 15, 16, 66, 2, 66, 93, 88, 105,
+ 74, 20, 18, 11, 0, 5, 68, 73, 79, 96, 74, 39,
+ 21, 15, 8, 6, 68, 79, 83, 109, 5, 35, 28, 25,
+ 20, 10, 5, 64, 73, 86, 70, 36, 31, 19, 4, 6, 71,
+ 80, 93, 5, 56, 46, 41, 32, 23, 5, 69, 77, 82,
+ 62, 101, 101, 83, 107, 104, 93, 103, 99, 97,
+ 100, 100, 103, 92, 91, 89, 90, 87, 84, 69, 78,
+ 81, 69, 1, 3, 1, 10, 9, 9, 4, 19, 15, 13, 12, 9,
+ 20, 15, 8, 18, 16, 12, 9, 10, 64, 65, 68, 5, 71,
+ 16, 30, 17, 10, 25, 22, 12, 19, 26, 17, 23, 9,
+ 3, 12, 48, 46, 50, 47, 48, 58, 62, 62, 62, 62,
+ 62, 62, 62, 62, 28, 62, 62, 62, 62, 62, 61, 45,
+ 32, 30, 6, 77, 73, 91, 114, 55, 55, 58, 49, 45,
+ 44, 34, 34, 25, 24, 16, 11, 9, 70, 75, 1, 64,
+ 70, 33, 36, 32, 23, 32, 31, 16, 24, 26, 14, 1,
+ 4, 67, 89, 20, 9, 0, 77, 68, 67, 67, 0, 64, 67,
+ 65, 65, 1, 8, 64, 2, 5, 7, 62, 62, 62, 58, 53,
+ 46, 39, 22, 70, 64, 31, 24, 21, 17, 11, 5, 3, 0,
+ 73, 83, 79, 73, 78, 74, 67, 72, 79, 68, 64, 3,
+ 0, 2, 4, 4, 62, 62, 58, 51, 46, 39, 27, 12, 75 },
+
+ {
+
+ 62,
+ 9, 75, 62, 9, 75, 116, 95, 13, 10, 10, 22, 53,
+ 56, 54, 14, 104, 5, 73, 3, 1, 10, 71, 7, 70, 17,
+ 53, 76, 96, 107, 109, 96, 67, 73, 3, 1, 83, 88,
+ 5, 15, 67, 78, 91, 6, 69, 84, 88, 5, 74, 90, 8,
+ 70, 79, 88, 64, 72, 72, 78, 69, 5, 22, 0, 0, 0,
+ 79, 87, 97, 69, 18, 0, 18, 76, 94, 115, 87, 91,
+ 5, 71, 86, 77, 101, 83, 95, 88, 116, 89, 87, 93,
+ 103, 19, 64, 7, 89, 72, 88, 84, 117, 8, 1, 2,
+ 71, 16, 68, 67, 85, 84, 75, 74, 74, 9, 12, 66,
+ 13, 19, 79, 0, 68, 67, 6, 75, 24, 65, 2, 24, 32,
+ 35, 30, 23, 89, 70, 72, 13, 11, 97, 7, 10, 9, 3,
+ 7, 7, 5, 5, 2, 5, 66, 5, 19, 11, 72, 65, 2, 66,
+ 11, 18, 18, 10, 11, 5, 5, 4, 22, 18, 15, 77, 3,
+ 72, 31, 23, 18, 14, 17, 19, 12, 16, 23, 1, 15,
+ 9, 8, 5, 64, 10, 4, 8, 9, 6, 14, 22, 7, 81, 6,
+ 76, 12, 24, 83, 47, 59, 50, 49, 51, 52, 46, 46,
+ 42, 33, 27, 24, 30, 19, 76, 67, 3, 70, 27, 25,
+ 22, 26, 23, 17, 14, 15, 67, 1, 67, 93, 88, 104,
+ 73, 20, 18, 11, 1, 5, 68, 73, 79, 95, 73, 38,
+ 21, 15, 8, 7, 68, 78, 82, 107, 5, 36, 28, 25,
+ 20, 11, 5, 64, 72, 86, 70, 36, 31, 19, 4, 6, 70,
+ 79, 92, 5, 55, 46, 40, 32, 23, 5, 68, 76, 82,
+ 62, 101, 100, 83, 106, 103, 92, 101, 98, 96, 99,
+ 99, 102, 92, 90, 89, 89, 85, 84, 70, 78, 81, 69,
+ 1, 2, 1, 9, 8, 8, 3, 19, 15, 13, 12, 8, 19, 14,
+ 8, 18, 16, 11, 9, 10, 64, 66, 68, 5, 72, 16, 29,
+ 16, 9, 24, 22, 13, 19, 25, 17, 23, 9, 3, 11, 47,
+ 45, 49, 46, 47, 57, 62, 62, 62, 62, 62, 62, 62,
+ 61, 27, 62, 62, 62, 62, 62, 59, 43, 31, 29, 6,
+ 76, 73, 89, 111, 53, 53, 56, 47, 43, 42, 32, 32,
+ 23, 23, 15, 10, 8, 71, 76, 0, 65, 71, 32, 35,
+ 31, 21, 30, 29, 15, 22, 24, 13, 64, 2, 69, 90,
+ 19, 8, 64, 77, 68, 67, 66, 0, 64, 65, 64, 64, 2,
+ 9, 1, 3, 7, 8, 62, 62, 60, 56, 50, 44, 36, 20,
+ 72, 0, 31, 24, 21, 17, 12, 6, 3, 0, 73, 83, 78,
+ 73, 78, 73, 66, 72, 79, 67, 0, 3, 0, 3, 4, 4,
+ 62, 62, 56, 48, 42, 35, 24, 9, 77 },
+
+ {
+
+ 62,
+ 9, 75, 62, 9, 75, 114, 93, 13, 10, 9, 20, 51,
+ 54, 54, 14, 101, 4, 72, 3, 1, 9, 71, 6, 71, 17,
+ 51, 78, 97, 107, 106, 95, 67, 72, 3, 1, 83, 87,
+ 5, 15, 67, 78, 91, 6, 70, 83, 88, 5, 74, 89, 7,
+ 70, 79, 88, 0, 71, 72, 77, 68, 5, 22, 0, 0, 0,
+ 79, 87, 97, 68, 17, 0, 18, 76, 94, 114, 85, 90,
+ 7, 69, 85, 76, 100, 81, 94, 87, 114, 88, 86, 92,
+ 102, 19, 64, 7, 89, 72, 88, 83, 115, 7, 1, 2,
+ 71, 16, 68, 67, 84, 84, 75, 74, 73, 9, 11, 66,
+ 13, 19, 79, 0, 68, 67, 6, 75, 24, 65, 2, 24, 31,
+ 34, 29, 22, 88, 69, 72, 12, 11, 96, 7, 10, 8, 3,
+ 7, 7, 6, 6, 2, 5, 66, 5, 18, 11, 73, 65, 2, 66,
+ 10, 17, 17, 10, 11, 6, 5, 4, 21, 17, 14, 77, 3,
+ 72, 31, 23, 18, 14, 17, 19, 12, 16, 23, 1, 15,
+ 9, 8, 5, 64, 10, 4, 7, 9, 6, 14, 21, 7, 81, 5,
+ 76, 11, 23, 83, 46, 57, 48, 47, 49, 50, 44, 44,
+ 40, 31, 25, 22, 27, 17, 77, 67, 2, 70, 26, 24,
+ 21, 25, 22, 15, 13, 14, 67, 1, 67, 93, 88, 104,
+ 73, 20, 18, 11, 1, 5, 68, 73, 78, 94, 73, 38,
+ 21, 15, 8, 7, 67, 77, 82, 105, 5, 36, 28, 25,
+ 20, 11, 5, 64, 72, 85, 70, 36, 30, 18, 4, 6, 70,
+ 79, 92, 5, 55, 45, 39, 31, 23, 5, 68, 76, 81,
+ 62, 100, 99, 82, 104, 102, 91, 100, 96, 95, 97,
+ 97, 100, 91, 89, 88, 89, 84, 83, 70, 78, 80, 69,
+ 0, 2, 0, 8, 7, 7, 2, 18, 14, 13, 11, 8, 19, 14,
+ 8, 17, 15, 11, 8, 9, 64, 66, 68, 5, 72, 15, 29,
+ 15, 9, 23, 21, 13, 18, 24, 16, 22, 8, 2, 9, 46,
+ 45, 49, 45, 45, 55, 62, 62, 62, 62, 62, 62, 62,
+ 59, 25, 62, 62, 62, 62, 62, 56, 41, 30, 28, 6,
+ 75, 72, 88, 109, 52, 51, 54, 46, 41, 40, 31, 30,
+ 22, 21, 13, 8, 6, 72, 77, 0, 65, 72, 30, 33, 29,
+ 20, 28, 27, 13, 21, 22, 11, 65, 1, 70, 90, 18,
+ 8, 65, 77, 67, 66, 66, 1, 0, 64, 0, 0, 3, 10, 2,
+ 4, 8, 9, 62, 61, 58, 53, 48, 41, 33, 17, 74, 0,
+ 32, 25, 22, 18, 13, 6, 4, 1, 72, 82, 78, 72, 77,
+ 72, 65, 72, 78, 67, 0, 3, 1, 4, 4, 4, 62, 62,
+ 53, 45, 39, 31, 20, 5, 80 },
+
+ {
+
+ 62,
+ 8, 75, 62, 8, 75, 113, 92, 13, 10, 9, 19, 50,
+ 53, 54, 14, 99, 4, 71, 4, 1, 8, 71, 5, 73, 16,
+ 49, 80, 98, 108, 104, 94, 67, 71, 4, 1, 83, 86,
+ 5, 14, 67, 78, 90, 5, 70, 83, 89, 5, 74, 89, 7,
+ 71, 79, 88, 0, 71, 72, 77, 68, 5, 22, 0, 0, 0,
+ 78, 88, 97, 68, 16, 0, 18, 76, 94, 112, 84, 88,
+ 8, 68, 84, 75, 99, 80, 93, 86, 112, 88, 86, 92,
+ 101, 19, 64, 7, 88, 72, 87, 83, 114, 7, 0, 1,
+ 72, 16, 68, 67, 84, 84, 75, 74, 73, 8, 11, 66,
+ 13, 18, 79, 0, 68, 67, 5, 75, 23, 66, 2, 23, 29,
+ 33, 28, 21, 88, 69, 72, 11, 10, 96, 6, 9, 8, 3,
+ 7, 7, 7, 6, 2, 4, 66, 4, 17, 10, 74, 65, 2, 67,
+ 10, 16, 17, 9, 10, 6, 5, 4, 19, 17, 13, 77, 3,
+ 73, 30, 22, 18, 14, 17, 18, 11, 16, 22, 0, 14,
+ 9, 8, 4, 65, 9, 3, 7, 8, 5, 13, 20, 6, 81, 5,
+ 76, 10, 21, 83, 44, 55, 46, 45, 47, 47, 42, 42,
+ 38, 29, 23, 20, 25, 15, 78, 67, 2, 71, 25, 22,
+ 19, 23, 20, 14, 11, 12, 68, 0, 68, 93, 88, 103,
+ 73, 20, 18, 11, 1, 5, 68, 73, 78, 93, 72, 38,
+ 21, 15, 8, 7, 67, 77, 81, 104, 5, 36, 28, 25,
+ 19, 11, 5, 64, 72, 85, 70, 36, 30, 18, 4, 6, 70,
+ 79, 91, 5, 54, 44, 38, 30, 22, 5, 68, 76, 81,
+ 62, 99, 98, 82, 103, 101, 91, 99, 95, 94, 96,
+ 96, 99, 91, 89, 88, 88, 83, 83, 71, 78, 80, 69,
+ 0, 1, 0, 7, 6, 5, 1, 17, 14, 13, 11, 7, 18, 13,
+ 7, 16, 14, 10, 8, 8, 65, 67, 68, 5, 73, 14, 28,
+ 14, 9, 22, 20, 13, 17, 23, 16, 22, 7, 1, 7, 45,
+ 44, 48, 43, 44, 54, 62, 62, 62, 62, 62, 62, 62,
+ 56, 24, 62, 62, 62, 62, 61, 54, 39, 28, 26, 6,
+ 75, 72, 87, 107, 50, 49, 52, 44, 38, 38, 29, 28,
+ 20, 19, 12, 6, 5, 73, 78, 64, 66, 73, 29, 32,
+ 27, 18, 26, 25, 12, 19, 20, 10, 66, 64, 72, 91,
+ 17, 7, 66, 77, 67, 66, 65, 1, 0, 0, 0, 1, 4, 11,
+ 3, 5, 9, 10, 61, 59, 56, 51, 45, 38, 30, 14, 77,
+ 0, 32, 25, 22, 18, 13, 6, 4, 1, 72, 82, 78, 72,
+ 77, 71, 64, 72, 78, 66, 1, 3, 1, 4, 4, 3, 62,
+ 61, 51, 42, 36, 27, 16, 2, 83 },
+
+ {
+
+ 62,
+ 8, 75, 62, 8, 75, 111, 91, 14, 10, 9, 18, 49,
+ 52, 54, 14, 97, 4, 70, 5, 1, 8, 70, 4, 74, 15,
+ 47, 81, 99, 109, 101, 92, 67, 70, 5, 1, 82, 85,
+ 6, 13, 67, 77, 89, 5, 70, 83, 89, 5, 74, 88, 7,
+ 71, 79, 88, 0, 71, 71, 77, 68, 5, 22, 0, 0, 0,
+ 77, 88, 97, 68, 15, 0, 18, 75, 94, 110, 82, 86,
+ 9, 66, 82, 74, 97, 79, 91, 85, 110, 88, 86, 92,
+ 100, 19, 64, 7, 87, 72, 86, 82, 113, 7, 0, 1,
+ 72, 16, 68, 66, 83, 83, 74, 74, 73, 8, 11, 66,
+ 13, 18, 78, 0, 67, 67, 5, 74, 22, 66, 2, 22, 28,
+ 33, 27, 20, 87, 69, 71, 10, 9, 96, 5, 9, 8, 4,
+ 7, 7, 8, 6, 2, 4, 65, 4, 17, 10, 75, 65, 2, 68,
+ 10, 15, 17, 9, 9, 6, 5, 4, 18, 17, 13, 77, 3,
+ 74, 29, 22, 18, 14, 17, 18, 11, 16, 22, 0, 14,
+ 9, 8, 4, 65, 9, 3, 7, 8, 5, 12, 20, 6, 81, 5,
+ 76, 9, 20, 83, 42, 54, 45, 44, 45, 45, 41, 41,
+ 36, 27, 22, 19, 23, 14, 79, 67, 2, 72, 24, 21,
+ 18, 22, 19, 13, 10, 11, 69, 64, 69, 93, 87, 102,
+ 72, 21, 18, 11, 1, 6, 67, 72, 77, 92, 71, 38,
+ 21, 15, 8, 8, 67, 76, 80, 102, 5, 36, 28, 25,
+ 19, 12, 5, 64, 72, 84, 70, 37, 30, 18, 4, 7, 70,
+ 79, 90, 5, 54, 44, 38, 29, 22, 5, 68, 75, 80,
+ 62, 98, 97, 81, 102, 99, 90, 97, 94, 92, 95, 95,
+ 97, 90, 88, 88, 87, 81, 83, 72, 77, 79, 69, 0,
+ 0, 0, 7, 5, 4, 0, 17, 14, 13, 11, 7, 17, 13, 7,
+ 15, 14, 10, 8, 7, 65, 67, 67, 6, 73, 14, 27, 14,
+ 9, 22, 20, 13, 16, 22, 16, 22, 6, 1, 6, 45, 43,
+ 47, 42, 43, 53, 60, 60, 62, 62, 62, 62, 62, 54,
+ 23, 62, 62, 62, 62, 58, 52, 38, 27, 25, 6, 74,
+ 72, 86, 105, 48, 48, 50, 42, 36, 37, 28, 26, 19,
+ 18, 11, 5, 4, 74, 78, 64, 66, 74, 28, 31, 26,
+ 16, 25, 24, 11, 18, 19, 9, 67, 65, 73, 92, 17,
+ 6, 66, 76, 67, 66, 64, 2, 1, 1, 1, 2, 5, 13, 4,
+ 6, 11, 12, 60, 58, 54, 49, 42, 35, 27, 11, 79,
+ 1, 32, 25, 23, 18, 14, 7, 4, 2, 71, 82, 77, 71,
+ 77, 70, 1, 71, 77, 65, 2, 3, 2, 5, 4, 3, 62, 59,
+ 49, 40, 33, 24, 12, 64, 85 },
+
+ {
+
+ 62,
+ 8, 75, 62, 8, 75, 109, 89, 14, 10, 8, 16, 47,
+ 50, 54, 14, 94, 3, 69, 5, 1, 7, 70, 3, 75, 15,
+ 45, 83, 100, 109, 98, 91, 67, 69, 5, 1, 82, 84,
+ 6, 13, 67, 77, 89, 5, 71, 82, 89, 5, 74, 87, 6,
+ 71, 79, 88, 1, 70, 71, 76, 67, 5, 22, 0, 0, 0,
+ 77, 88, 97, 67, 14, 0, 18, 75, 94, 109, 80, 85,
+ 11, 64, 81, 73, 96, 77, 90, 84, 108, 87, 85, 91,
+ 99, 19, 64, 7, 87, 72, 86, 82, 111, 6, 0, 1, 72,
+ 16, 68, 66, 83, 83, 74, 74, 72, 8, 10, 66, 13,
+ 18, 78, 0, 67, 67, 5, 74, 22, 66, 2, 22, 27, 32,
+ 26, 19, 86, 68, 71, 9, 9, 95, 5, 9, 7, 4, 7, 7,
+ 9, 7, 2, 4, 65, 4, 16, 10, 76, 65, 2, 68, 9, 14,
+ 16, 9, 9, 7, 5, 4, 17, 16, 12, 77, 3, 74, 29,
+ 22, 18, 14, 17, 18, 11, 16, 22, 0, 14, 9, 8, 4,
+ 65, 9, 3, 6, 8, 5, 12, 19, 5, 81, 4, 76, 8, 19,
+ 83, 41, 52, 43, 42, 43, 43, 39, 39, 34, 25, 20,
+ 17, 20, 12, 80, 67, 1, 72, 23, 20, 17, 21, 17,
+ 11, 9, 10, 69, 64, 69, 93, 87, 102, 72, 21, 18,
+ 11, 1, 6, 67, 72, 77, 91, 71, 38, 21, 15, 8, 8,
+ 66, 75, 80, 100, 5, 36, 28, 25, 19, 12, 5, 64,
+ 72, 83, 70, 37, 29, 17, 4, 7, 70, 79, 90, 5, 53,
+ 43, 37, 28, 22, 5, 68, 75, 80, 62, 97, 96, 80,
+ 100, 98, 89, 96, 92, 91, 93, 93, 95, 89, 87, 87,
+ 87, 80, 82, 72, 77, 78, 69, 64, 0, 64, 6, 4, 3,
+ 64, 16, 13, 13, 10, 6, 17, 13, 7, 14, 13, 10, 7,
+ 6, 65, 67, 67, 6, 73, 13, 27, 13, 9, 21, 19, 13,
+ 15, 21, 15, 21, 5, 0, 4, 44, 43, 47, 41, 41, 51,
+ 58, 58, 62, 62, 62, 62, 62, 52, 21, 59, 62, 59,
+ 62, 56, 49, 36, 26, 24, 6, 73, 71, 85, 103, 47,
+ 46, 48, 41, 34, 35, 26, 24, 18, 16, 9, 3, 2, 75,
+ 79, 65, 66, 75, 26, 29, 24, 15, 23, 22, 9, 16,
+ 17, 7, 68, 66, 74, 92, 16, 6, 67, 76, 66, 65,
+ 64, 2, 2, 2, 2, 3, 6, 14, 5, 7, 12, 13, 60, 56,
+ 52, 46, 40, 32, 24, 8, 81, 1, 33, 26, 23, 19,
+ 15, 7, 5, 2, 71, 81, 77, 70, 76, 69, 2, 71, 76,
+ 65, 2, 3, 2, 6, 4, 3, 62, 57, 46, 37, 30, 20, 8,
+ 68, 88 },
+
+ {
+
+ 62,
+ 8, 76, 62, 8, 76, 107, 88, 15, 10, 8, 15, 46,
+ 49, 54, 14, 92, 3, 69, 6, 1, 6, 70, 2, 76, 14,
+ 44, 84, 101, 110, 95, 90, 67, 69, 6, 1, 81, 83,
+ 7, 12, 66, 76, 88, 4, 71, 82, 89, 5, 73, 87, 6,
+ 71, 78, 88, 1, 70, 71, 76, 67, 5, 22, 0, 0, 0,
+ 76, 88, 97, 67, 14, 64, 18, 75, 93, 107, 78, 83,
+ 12, 1, 80, 72, 94, 76, 89, 84, 107, 87, 85, 91,
+ 99, 20, 64, 7, 86, 71, 85, 81, 110, 6, 64, 0,
+ 73, 16, 68, 66, 82, 83, 73, 74, 72, 8, 10, 65,
+ 13, 17, 78, 0, 67, 67, 5, 74, 21, 66, 2, 21, 26,
+ 31, 26, 19, 86, 68, 71, 9, 8, 95, 4, 9, 7, 4, 7,
+ 7, 9, 7, 2, 4, 64, 4, 15, 10, 76, 66, 2, 69, 9,
+ 13, 16, 9, 8, 7, 5, 4, 15, 16, 11, 77, 3, 75,
+ 28, 22, 17, 14, 17, 18, 11, 16, 22, 0, 13, 9, 7,
+ 4, 65, 8, 2, 6, 7, 4, 11, 18, 5, 81, 4, 75, 7,
+ 17, 83, 39, 50, 41, 40, 41, 41, 37, 37, 32, 23,
+ 19, 16, 18, 10, 81, 67, 1, 73, 22, 19, 16, 19,
+ 16, 10, 8, 9, 70, 65, 70, 93, 87, 101, 71, 21,
+ 18, 11, 2, 6, 67, 72, 76, 90, 70, 37, 21, 15, 8,
+ 9, 66, 74, 79, 98, 5, 37, 28, 25, 19, 13, 5, 64,
+ 71, 83, 70, 37, 29, 17, 4, 7, 69, 78, 89, 5, 53,
+ 43, 36, 28, 22, 5, 67, 74, 79, 62, 97, 95, 80,
+ 99, 97, 88, 94, 91, 90, 92, 92, 94, 89, 86, 87,
+ 86, 78, 82, 73, 77, 78, 69, 64, 64, 64, 5, 3, 2,
+ 65, 16, 13, 13, 10, 6, 16, 12, 7, 14, 13, 9, 7,
+ 6, 65, 68, 67, 6, 74, 13, 26, 12, 8, 20, 19, 14,
+ 15, 20, 15, 21, 5, 0, 3, 43, 42, 46, 40, 40, 50,
+ 56, 56, 61, 60, 62, 62, 60, 49, 20, 57, 62, 56,
+ 62, 53, 47, 34, 25, 23, 6, 72, 71, 83, 100, 45,
+ 44, 46, 39, 32, 33, 25, 22, 16, 15, 8, 2, 1, 76,
+ 80, 65, 67, 76, 25, 28, 23, 13, 21, 20, 8, 15,
+ 15, 6, 70, 68, 76, 93, 15, 5, 68, 76, 66, 65, 0,
+ 3, 2, 4, 3, 4, 7, 15, 7, 8, 14, 14, 59, 55, 50,
+ 44, 37, 30, 21, 6, 83, 2, 33, 26, 24, 19, 16, 8,
+ 5, 3, 70, 81, 76, 70, 76, 68, 3, 71, 76, 64, 3,
+ 3, 3, 7, 4, 3, 62, 55, 44, 34, 26, 16, 5, 71, 90 },
+
+ {
+
+ 62,
+ 8, 76, 62, 8, 76, 106, 86, 15, 10, 7, 13, 44,
+ 48, 54, 14, 90, 3, 68, 7, 1, 5, 70, 1, 77, 14,
+ 42, 86, 102, 110, 92, 89, 67, 68, 7, 1, 81, 82,
+ 7, 12, 66, 76, 87, 4, 72, 82, 89, 5, 73, 86, 6,
+ 72, 78, 88, 2, 70, 71, 76, 66, 5, 22, 0, 0, 0,
+ 76, 89, 97, 66, 13, 64, 18, 75, 93, 105, 77, 82,
+ 14, 2, 79, 71, 93, 75, 88, 83, 105, 86, 85, 90,
+ 98, 20, 64, 7, 86, 71, 85, 81, 108, 6, 64, 0,
+ 73, 16, 68, 66, 82, 83, 73, 74, 71, 8, 10, 65,
+ 13, 17, 78, 0, 67, 67, 5, 74, 20, 67, 2, 20, 25,
+ 30, 25, 18, 85, 67, 71, 8, 8, 94, 4, 9, 7, 4, 7,
+ 7, 10, 7, 2, 3, 64, 3, 14, 9, 77, 66, 2, 69, 8,
+ 12, 16, 8, 7, 7, 5, 4, 14, 16, 10, 77, 3, 75,
+ 27, 22, 17, 14, 17, 18, 11, 16, 21, 0, 13, 9, 7,
+ 4, 66, 8, 2, 5, 7, 4, 10, 17, 4, 81, 3, 75, 6,
+ 16, 83, 38, 48, 39, 38, 39, 39, 35, 35, 30, 21,
+ 17, 14, 16, 8, 82, 67, 1, 73, 21, 18, 15, 18,
+ 14, 8, 7, 7, 71, 65, 70, 93, 87, 101, 71, 21,
+ 18, 11, 2, 6, 67, 72, 76, 89, 70, 37, 21, 15, 8,
+ 9, 65, 74, 78, 96, 5, 37, 28, 25, 19, 13, 5, 64,
+ 71, 82, 70, 37, 29, 16, 4, 7, 69, 78, 88, 5, 52,
+ 42, 35, 27, 22, 5, 67, 74, 79, 62, 96, 94, 79,
+ 98, 96, 87, 93, 90, 89, 91, 90, 92, 88, 86, 87,
+ 86, 77, 82, 73, 77, 77, 69, 65, 64, 64, 4, 2, 1,
+ 66, 15, 12, 13, 9, 5, 16, 12, 7, 13, 12, 9, 7,
+ 5, 66, 68, 67, 6, 74, 12, 26, 11, 8, 19, 18, 14,
+ 14, 19, 14, 20, 4, 64, 1, 42, 41, 46, 39, 39,
+ 48, 54, 54, 59, 57, 62, 62, 57, 47, 19, 54, 62,
+ 53, 58, 50, 44, 32, 24, 21, 6, 71, 71, 82, 98,
+ 43, 42, 44, 37, 30, 31, 23, 20, 15, 13, 7, 0,
+ 64, 77, 81, 66, 67, 77, 24, 26, 21, 11, 19, 18,
+ 6, 13, 13, 4, 71, 69, 77, 94, 14, 4, 69, 76, 65,
+ 65, 0, 3, 3, 5, 3, 5, 8, 16, 8, 9, 15, 15, 59,
+ 53, 48, 41, 35, 27, 18, 3, 86, 2, 34, 27, 24,
+ 19, 16, 8, 5, 3, 70, 81, 76, 69, 75, 67, 4, 71,
+ 75, 64, 3, 3, 3, 8, 4, 3, 61, 53, 41, 31, 23,
+ 12, 1, 75, 93 },
+
+ {
+
+ 62,
+ 8, 76, 62, 8, 76, 104, 85, 15, 10, 7, 12, 43,
+ 46, 54, 14, 87, 2, 67, 7, 1, 5, 69, 0, 78, 13,
+ 40, 88, 103, 111, 89, 87, 67, 67, 7, 1, 81, 81,
+ 7, 11, 66, 76, 87, 4, 72, 81, 89, 5, 73, 85, 5,
+ 72, 78, 88, 2, 69, 70, 75, 66, 5, 22, 0, 0, 0,
+ 75, 89, 97, 66, 12, 64, 18, 74, 93, 104, 75, 80,
+ 15, 4, 77, 70, 92, 73, 87, 82, 103, 86, 84, 90,
+ 97, 20, 64, 7, 85, 71, 84, 80, 107, 5, 64, 0,
+ 73, 16, 68, 65, 81, 83, 73, 74, 71, 8, 9, 65,
+ 13, 17, 77, 0, 66, 67, 5, 74, 20, 67, 2, 20, 24,
+ 30, 24, 17, 84, 67, 71, 7, 7, 94, 3, 9, 6, 4, 7,
+ 7, 11, 8, 2, 3, 64, 3, 13, 9, 78, 66, 2, 70, 8,
+ 11, 15, 8, 7, 8, 5, 4, 13, 15, 9, 77, 3, 76, 27,
+ 22, 17, 14, 17, 18, 11, 16, 21, 0, 13, 9, 7, 4,
+ 66, 8, 2, 5, 7, 4, 10, 16, 4, 81, 3, 75, 5, 15,
+ 83, 36, 46, 38, 37, 37, 37, 33, 33, 28, 19, 15,
+ 12, 13, 7, 83, 67, 0, 74, 20, 17, 14, 17, 13, 7,
+ 6, 6, 71, 66, 71, 93, 87, 100, 71, 21, 18, 11,
+ 2, 6, 66, 71, 75, 88, 69, 37, 21, 15, 8, 9, 65,
+ 73, 78, 94, 5, 37, 28, 25, 19, 13, 5, 64, 71,
+ 81, 70, 38, 28, 16, 4, 7, 69, 78, 88, 5, 52, 41,
+ 34, 26, 22, 5, 67, 74, 78, 62, 95, 93, 78, 96,
+ 95, 86, 92, 88, 88, 89, 89, 90, 87, 85, 86, 85,
+ 76, 81, 74, 76, 76, 69, 65, 65, 65, 4, 1, 0, 67,
+ 14, 12, 13, 9, 5, 15, 12, 7, 12, 11, 9, 6, 4,
+ 66, 68, 67, 6, 74, 11, 25, 11, 8, 18, 17, 14,
+ 13, 18, 14, 20, 3, 65, 64, 41, 41, 45, 38, 37,
+ 47, 52, 52, 57, 55, 62, 61, 54, 45, 17, 51, 62,
+ 50, 54, 48, 42, 30, 23, 20, 6, 70, 70, 81, 96,
+ 42, 41, 42, 36, 28, 29, 22, 18, 14, 11, 5, 65,
+ 65, 78, 82, 66, 67, 78, 22, 25, 19, 10, 18, 17,
+ 5, 12, 12, 3, 72, 70, 78, 94, 14, 4, 70, 75, 65,
+ 64, 1, 4, 4, 6, 4, 6, 9, 18, 9, 10, 16, 17, 58,
+ 51, 46, 39, 32, 24, 15, 0, 88, 2, 34, 27, 25,
+ 20, 17, 8, 6, 4, 69, 80, 76, 68, 75, 66, 5, 70,
+ 74, 0, 4, 3, 4, 9, 4, 3, 59, 51, 39, 28, 20, 9,
+ 66, 78, 96 },
+
+ {
+
+ 61,
+ 8, 76, 61, 8, 76, 102, 83, 16, 10, 6, 10, 41,
+ 45, 54, 14, 85, 2, 66, 8, 1, 4, 69, 64, 79, 13,
+ 38, 89, 104, 111, 86, 86, 67, 66, 8, 1, 80, 80,
+ 8, 11, 66, 75, 86, 3, 73, 81, 89, 5, 73, 85, 5,
+ 72, 78, 88, 3, 69, 70, 75, 65, 5, 22, 0, 0, 0,
+ 75, 89, 97, 65, 11, 64, 18, 74, 93, 102, 73, 79,
+ 17, 6, 76, 69, 90, 72, 86, 81, 101, 85, 84, 89,
+ 96, 20, 64, 7, 85, 71, 84, 80, 105, 5, 65, 64,
+ 74, 16, 68, 65, 81, 83, 72, 74, 70, 8, 9, 65,
+ 13, 16, 77, 0, 66, 67, 5, 74, 19, 67, 2, 19, 23,
+ 29, 23, 16, 84, 66, 71, 6, 7, 93, 3, 9, 6, 4, 7,
+ 7, 12, 8, 2, 3, 0, 3, 12, 9, 79, 66, 2, 70, 7,
+ 10, 15, 8, 6, 8, 5, 4, 11, 15, 8, 77, 3, 76, 26,
+ 22, 17, 14, 17, 18, 11, 16, 21, 0, 12, 9, 7, 4,
+ 66, 7, 1, 4, 6, 3, 9, 15, 3, 81, 2, 75, 4, 13,
+ 83, 35, 44, 36, 35, 35, 35, 31, 31, 26, 17, 14,
+ 11, 11, 5, 84, 67, 0, 74, 19, 16, 13, 15, 11, 5,
+ 5, 5, 72, 66, 71, 93, 87, 100, 70, 21, 18, 11,
+ 2, 6, 66, 71, 75, 87, 69, 37, 21, 15, 8, 10, 64,
+ 72, 77, 92, 5, 37, 28, 25, 19, 14, 5, 64, 71,
+ 81, 70, 38, 28, 15, 4, 7, 69, 78, 87, 5, 51, 41,
+ 33, 25, 22, 5, 67, 73, 78, 62, 94, 92, 78, 95,
+ 94, 85, 90, 87, 87, 88, 87, 89, 87, 84, 86, 85,
+ 74, 81, 74, 76, 76, 69, 66, 65, 65, 3, 0, 64,
+ 68, 14, 11, 13, 8, 4, 15, 11, 7, 11, 11, 8, 6,
+ 3, 66, 69, 67, 6, 75, 11, 25, 10, 8, 17, 17, 14,
+ 12, 17, 13, 19, 2, 65, 65, 40, 40, 45, 37, 36,
+ 45, 50, 50, 55, 52, 60, 59, 51, 42, 16, 48, 62,
+ 47, 50, 45, 39, 28, 22, 19, 6, 69, 70, 80, 94,
+ 40, 39, 40, 34, 26, 27, 20, 16, 12, 10, 4, 66,
+ 67, 79, 83, 67, 68, 79, 21, 23, 18, 8, 16, 15,
+ 3, 10, 10, 1, 73, 72, 80, 95, 13, 3, 71, 75, 64,
+ 64, 1, 4, 4, 7, 5, 7, 10, 19, 10, 11, 18, 18,
+ 58, 50, 44, 36, 30, 21, 12, 66, 90, 3, 35, 28,
+ 25, 20, 18, 9, 6, 4, 69, 80, 75, 68, 74, 65, 6,
+ 70, 74, 0, 4, 3, 4, 10, 4, 3, 58, 49, 36, 25,
+ 17, 5, 70, 82, 98 },
+
+ {
+
+ 60,
+ 8, 76, 60, 8, 76, 100, 82, 16, 10, 6, 9, 40, 44,
+ 54, 14, 83, 2, 65, 9, 1, 3, 69, 65, 80, 12, 36,
+ 91, 105, 112, 83, 85, 67, 65, 9, 1, 80, 79, 8,
+ 10, 66, 75, 85, 3, 73, 81, 89, 5, 73, 84, 5, 72,
+ 78, 88, 3, 69, 70, 75, 65, 5, 22, 0, 0, 0, 74,
+ 89, 97, 65, 10, 64, 18, 74, 93, 100, 71, 77, 18,
+ 8, 75, 68, 89, 71, 85, 80, 99, 85, 84, 89, 95,
+ 20, 64, 7, 84, 71, 83, 79, 104, 5, 65, 64, 74,
+ 16, 68, 65, 80, 83, 72, 74, 70, 8, 9, 65, 13,
+ 16, 77, 0, 66, 67, 5, 74, 18, 67, 2, 18, 22, 28,
+ 22, 15, 83, 66, 71, 5, 6, 93, 2, 9, 6, 4, 7, 7,
+ 13, 8, 2, 3, 0, 3, 11, 9, 80, 66, 2, 71, 7, 9,
+ 15, 8, 5, 8, 5, 4, 10, 15, 7, 77, 3, 77, 25, 22,
+ 17, 14, 17, 18, 11, 16, 21, 0, 12, 9, 7, 4, 66,
+ 7, 1, 4, 6, 3, 8, 14, 3, 81, 2, 75, 3, 12, 83,
+ 33, 42, 34, 33, 33, 33, 29, 29, 24, 15, 12, 9,
+ 9, 3, 85, 67, 0, 75, 18, 15, 12, 14, 10, 4, 4,
+ 4, 73, 67, 72, 93, 87, 99, 70, 21, 18, 11, 2, 6,
+ 66, 71, 74, 86, 68, 37, 21, 15, 8, 10, 64, 71,
+ 76, 90, 5, 37, 28, 25, 19, 14, 5, 64, 71, 80,
+ 70, 38, 28, 15, 4, 7, 69, 78, 86, 5, 51, 40, 32,
+ 24, 22, 5, 67, 73, 77, 62, 93, 91, 77, 94, 93,
+ 84, 89, 86, 86, 87, 86, 87, 86, 83, 86, 84, 73,
+ 81, 75, 76, 75, 69, 66, 66, 65, 2, 64, 65, 69,
+ 13, 11, 13, 8, 4, 14, 11, 7, 10, 10, 8, 6, 2,
+ 66, 69, 67, 6, 75, 10, 24, 9, 8, 16, 16, 14, 11,
+ 16, 13, 19, 1, 66, 67, 39, 39, 44, 36, 35, 44,
+ 48, 48, 53, 50, 57, 56, 48, 40, 15, 45, 59, 44,
+ 46, 42, 37, 26, 21, 18, 6, 68, 70, 79, 92, 38,
+ 37, 38, 32, 24, 25, 19, 14, 11, 8, 3, 68, 68,
+ 80, 84, 67, 68, 80, 20, 22, 16, 6, 14, 13, 2, 9,
+ 8, 0, 74, 73, 81, 96, 12, 2, 72, 75, 64, 64, 2,
+ 5, 5, 8, 6, 8, 11, 20, 11, 12, 19, 19, 57, 48,
+ 42, 34, 27, 18, 9, 69, 92, 3, 35, 28, 26, 20,
+ 19, 9, 6, 5, 68, 80, 75, 67, 74, 64, 7, 70, 73,
+ 1, 5, 3, 5, 11, 4, 3, 57, 47, 34, 22, 14, 1, 74,
+ 85, 101 },
+
+ {
+
+ 58,
+ 7, 77, 58, 7, 77, 99, 81, 16, 10, 5, 7, 38, 42,
+ 53, 14, 81, 1, 65, 9, 0, 2, 69, 67, 82, 11, 34,
+ 93, 106, 113, 81, 84, 68, 65, 9, 0, 80, 78, 8,
+ 9, 66, 75, 85, 2, 74, 81, 90, 5, 73, 84, 4, 73,
+ 78, 88, 3, 69, 70, 75, 65, 4, 22, 0, 0, 0, 74,
+ 90, 97, 65, 9, 65, 18, 74, 93, 99, 70, 76, 19,
+ 9, 74, 67, 88, 70, 84, 80, 98, 85, 84, 89, 95,
+ 20, 64, 7, 84, 71, 83, 79, 103, 4, 66, 65, 75,
+ 16, 68, 65, 80, 83, 72, 74, 70, 7, 8, 65, 12,
+ 15, 77, 64, 66, 67, 4, 74, 17, 68, 1, 17, 20,
+ 27, 21, 14, 83, 66, 71, 4, 5, 93, 1, 8, 5, 4, 7,
+ 7, 13, 8, 2, 2, 0, 2, 10, 8, 81, 67, 1, 72, 6,
+ 8, 14, 7, 4, 8, 5, 4, 8, 14, 6, 77, 3, 78, 24,
+ 21, 16, 14, 17, 17, 10, 16, 20, 64, 11, 9, 6, 3,
+ 67, 6, 0, 3, 5, 2, 7, 13, 2, 81, 1, 75, 2, 10,
+ 83, 31, 40, 32, 31, 31, 30, 27, 27, 22, 13, 10,
+ 7, 6, 1, 87, 68, 64, 76, 17, 13, 10, 12, 8, 2,
+ 2, 2, 74, 68, 73, 93, 87, 99, 70, 21, 18, 11, 2,
+ 6, 66, 71, 74, 85, 68, 36, 21, 15, 8, 10, 64,
+ 71, 76, 89, 4, 37, 28, 24, 18, 14, 5, 64, 71,
+ 80, 70, 38, 27, 14, 3, 7, 69, 78, 86, 5, 50, 39,
+ 31, 23, 21, 5, 67, 73, 77, 62, 93, 90, 77, 93,
+ 92, 84, 88, 85, 85, 86, 85, 86, 86, 83, 86, 84,
+ 72, 81, 76, 76, 75, 69, 67, 67, 66, 1, 65, 67,
+ 71, 12, 10, 13, 7, 3, 13, 10, 6, 9, 9, 7, 5, 1,
+ 67, 70, 67, 6, 76, 9, 23, 8, 7, 15, 15, 14, 10,
+ 14, 12, 18, 0, 67, 69, 38, 38, 43, 34, 33, 42,
+ 46, 46, 50, 47, 54, 53, 45, 37, 13, 42, 55, 41,
+ 41, 39, 34, 24, 19, 16, 6, 68, 70, 78, 90, 36,
+ 35, 36, 30, 21, 23, 17, 11, 9, 6, 1, 70, 70, 81,
+ 85, 68, 69, 82, 18, 20, 14, 4, 12, 11, 0, 7, 6,
+ 65, 76, 75, 83, 97, 11, 1, 73, 75, 64, 64, 2, 5,
+ 5, 9, 6, 9, 11, 21, 12, 13, 20, 20, 56, 46, 39,
+ 31, 24, 15, 5, 72, 95, 3, 35, 28, 26, 20, 19, 9,
+ 6, 5, 68, 80, 75, 67, 74, 0, 8, 70, 73, 1, 5, 3,
+ 5, 11, 4, 2, 55, 44, 31, 19, 10, 66, 78, 89, 104 },
+
+ {
+
+ 57,
+ 7, 77, 57, 7, 77, 97, 79, 17, 11, 5, 6, 37, 41,
+ 53, 14, 78, 1, 64, 10, 0, 2, 68, 68, 83, 11, 33,
+ 94, 107, 113, 78, 82, 68, 64, 10, 0, 79, 76, 9,
+ 9, 65, 74, 84, 2, 74, 80, 90, 5, 72, 83, 4, 73,
+ 77, 88, 4, 68, 69, 74, 64, 4, 22, 0, 0, 0, 73,
+ 90, 97, 64, 9, 65, 18, 73, 92, 97, 68, 74, 21,
+ 11, 72, 66, 86, 68, 82, 79, 96, 84, 83, 88, 94,
+ 21, 0, 8, 83, 70, 82, 78, 101, 4, 66, 65, 75,
+ 17, 68, 64, 79, 82, 71, 73, 69, 7, 8, 64, 12,
+ 15, 76, 64, 65, 67, 4, 73, 17, 68, 1, 17, 19,
+ 27, 21, 14, 82, 65, 70, 4, 5, 92, 1, 8, 5, 5, 7,
+ 7, 14, 9, 3, 2, 1, 2, 10, 8, 81, 67, 1, 72, 6,
+ 7, 14, 7, 4, 9, 6, 4, 7, 14, 6, 76, 3, 78, 24,
+ 21, 16, 14, 17, 17, 10, 16, 20, 64, 11, 9, 6, 3,
+ 67, 6, 0, 3, 5, 2, 7, 13, 2, 80, 1, 74, 2, 9,
+ 82, 30, 39, 31, 30, 29, 28, 26, 26, 20, 12, 9,
+ 6, 4, 0, 88, 68, 64, 76, 16, 12, 9, 11, 7, 1, 1,
+ 1, 74, 68, 73, 92, 86, 98, 69, 22, 18, 11, 3, 7,
+ 65, 70, 73, 83, 67, 36, 21, 15, 8, 11, 0, 70,
+ 75, 87, 4, 38, 29, 24, 18, 15, 5, 64, 70, 79,
+ 70, 39, 27, 14, 3, 8, 68, 77, 85, 5, 50, 39, 31,
+ 23, 21, 5, 66, 72, 76, 62, 92, 89, 76, 91, 90,
+ 83, 86, 83, 83, 84, 83, 84, 85, 82, 85, 83, 70,
+ 80, 76, 75, 74, 68, 67, 67, 66, 1, 65, 68, 72,
+ 12, 10, 14, 7, 3, 13, 10, 6, 9, 9, 7, 5, 1, 67,
+ 70, 66, 7, 76, 9, 23, 8, 7, 15, 15, 15, 10, 13,
+ 12, 18, 0, 67, 70, 38, 38, 43, 33, 32, 41, 44,
+ 44, 48, 45, 52, 51, 43, 35, 12, 40, 52, 38, 37,
+ 37, 32, 23, 18, 15, 6, 67, 69, 76, 87, 35, 34,
+ 35, 29, 19, 22, 16, 9, 8, 5, 0, 71, 71, 82, 85,
+ 68, 69, 83, 17, 19, 13, 3, 11, 10, 64, 6, 5, 66,
+ 77, 76, 84, 97, 11, 1, 73, 74, 0, 0, 3, 6, 6,
+ 11, 7, 10, 12, 23, 14, 14, 22, 22, 56, 45, 37,
+ 29, 22, 13, 2, 74, 97, 4, 36, 29, 27, 21, 20,
+ 10, 7, 6, 67, 79, 74, 66, 73, 2, 10, 69, 72, 2,
+ 6, 4, 6, 12, 4, 2, 54, 42, 29, 17, 7, 69, 81,
+ 92, 106 },
+
+ {
+
+ 56,
+ 7, 77, 56, 7, 77, 95, 78, 17, 11, 5, 5, 36, 40,
+ 53, 14, 76, 1, 0, 11, 0, 1, 68, 69, 84, 10, 31,
+ 96, 108, 114, 75, 81, 68, 0, 11, 0, 79, 75, 9,
+ 8, 65, 74, 83, 2, 74, 80, 90, 5, 72, 82, 4, 73,
+ 77, 88, 4, 68, 69, 74, 64, 4, 22, 0, 0, 0, 72,
+ 90, 97, 64, 8, 65, 18, 73, 92, 95, 66, 72, 22,
+ 13, 71, 65, 85, 67, 81, 78, 94, 84, 83, 88, 93,
+ 21, 0, 8, 82, 70, 81, 78, 100, 4, 66, 65, 75,
+ 17, 68, 64, 79, 82, 71, 73, 69, 7, 8, 64, 12,
+ 15, 76, 64, 65, 67, 4, 73, 16, 68, 1, 16, 18,
+ 26, 20, 13, 81, 65, 70, 3, 4, 92, 0, 8, 5, 5, 7,
+ 7, 15, 9, 3, 2, 1, 2, 9, 8, 82, 67, 1, 73, 6, 6,
+ 14, 7, 3, 9, 6, 4, 6, 14, 5, 76, 3, 79, 23, 21,
+ 16, 14, 17, 17, 10, 16, 20, 64, 11, 9, 6, 3, 67,
+ 6, 0, 3, 5, 2, 6, 12, 1, 80, 1, 74, 1, 8, 82,
+ 28, 37, 29, 28, 27, 26, 24, 24, 18, 10, 7, 4, 2,
+ 65, 89, 68, 64, 77, 15, 11, 8, 10, 5, 0, 0, 0,
+ 75, 69, 74, 92, 86, 97, 69, 22, 18, 11, 3, 7,
+ 65, 70, 73, 82, 66, 36, 21, 15, 8, 11, 0, 69,
+ 74, 85, 4, 38, 29, 24, 18, 15, 5, 64, 70, 78,
+ 70, 39, 27, 14, 3, 8, 68, 77, 84, 5, 49, 38, 30,
+ 22, 21, 5, 66, 72, 76, 62, 91, 88, 75, 90, 89,
+ 82, 85, 82, 82, 83, 82, 82, 84, 81, 85, 82, 69,
+ 80, 77, 75, 73, 68, 67, 68, 66, 0, 66, 69, 73,
+ 11, 10, 14, 7, 2, 12, 10, 6, 8, 8, 7, 5, 0, 67,
+ 70, 66, 7, 76, 8, 22, 7, 7, 14, 14, 15, 9, 12,
+ 12, 18, 64, 68, 72, 37, 37, 42, 32, 31, 40, 42,
+ 42, 46, 43, 49, 48, 40, 33, 11, 37, 49, 35, 33,
+ 34, 30, 21, 17, 14, 6, 66, 69, 75, 85, 33, 32,
+ 33, 27, 17, 20, 14, 7, 7, 3, 64, 73, 72, 83, 86,
+ 69, 69, 84, 16, 18, 11, 1, 9, 8, 65, 4, 3, 67,
+ 78, 77, 85, 98, 10, 0, 74, 74, 0, 0, 4, 6, 7,
+ 12, 8, 11, 13, 24, 15, 15, 23, 23, 55, 43, 35,
+ 27, 19, 10, 64, 77, 99, 4, 36, 29, 27, 21, 21,
+ 10, 7, 6, 67, 79, 74, 65, 73, 3, 11, 69, 71, 3,
+ 7, 4, 6, 13, 4, 2, 53, 40, 27, 14, 4, 73, 85,
+ 95, 109 },
+
+ {
+
+ 55,
+ 7, 77, 55, 7, 77, 93, 76, 18, 11, 4, 3, 34, 39,
+ 53, 14, 74, 1, 1, 12, 0, 0, 68, 70, 85, 10, 29,
+ 97, 109, 114, 72, 80, 68, 1, 12, 0, 78, 74, 10,
+ 8, 65, 73, 82, 1, 75, 80, 90, 5, 72, 82, 4, 73,
+ 77, 88, 5, 68, 69, 74, 0, 4, 22, 0, 0, 0, 72,
+ 90, 97, 0, 7, 65, 18, 73, 92, 93, 64, 71, 24,
+ 15, 70, 64, 83, 66, 80, 77, 92, 83, 83, 87, 92,
+ 21, 0, 8, 82, 70, 81, 77, 98, 4, 67, 66, 76, 17,
+ 68, 64, 78, 82, 70, 73, 68, 7, 8, 64, 12, 14,
+ 76, 64, 65, 67, 4, 73, 15, 68, 1, 15, 17, 25,
+ 19, 12, 81, 64, 70, 2, 4, 91, 0, 8, 5, 5, 7, 7,
+ 16, 9, 3, 2, 2, 2, 8, 8, 83, 67, 1, 73, 5, 5,
+ 14, 7, 2, 9, 6, 4, 4, 14, 4, 76, 3, 79, 22, 21,
+ 16, 14, 17, 17, 10, 16, 20, 64, 10, 9, 6, 3, 67,
+ 5, 64, 2, 4, 1, 5, 11, 1, 80, 0, 74, 0, 6, 82,
+ 27, 35, 27, 26, 25, 24, 22, 22, 16, 8, 6, 3, 0,
+ 67, 90, 68, 64, 77, 14, 10, 7, 8, 4, 65, 64, 64,
+ 76, 69, 74, 92, 86, 97, 68, 22, 18, 11, 3, 7,
+ 65, 70, 72, 81, 66, 36, 21, 15, 8, 12, 1, 68,
+ 73, 83, 4, 38, 29, 24, 18, 16, 5, 64, 70, 78,
+ 70, 39, 27, 13, 3, 8, 68, 77, 83, 5, 49, 38, 29,
+ 21, 21, 5, 66, 71, 75, 62, 90, 87, 75, 89, 88,
+ 81, 83, 81, 81, 82, 80, 81, 84, 80, 85, 82, 67,
+ 80, 77, 75, 73, 68, 68, 68, 66, 64, 67, 70, 74,
+ 11, 9, 14, 6, 2, 12, 9, 6, 7, 8, 6, 5, 64, 67,
+ 71, 66, 7, 77, 8, 22, 6, 7, 13, 14, 15, 8, 11,
+ 11, 17, 65, 68, 73, 36, 36, 42, 31, 30, 38, 40,
+ 40, 44, 40, 47, 46, 37, 30, 10, 34, 46, 32, 29,
+ 31, 27, 19, 16, 13, 6, 65, 69, 74, 83, 31, 30,
+ 31, 25, 15, 18, 13, 5, 5, 2, 65, 74, 74, 84, 87,
+ 69, 70, 85, 15, 16, 10, 64, 7, 6, 67, 3, 1, 69,
+ 79, 79, 87, 99, 9, 64, 75, 74, 1, 0, 4, 7, 7,
+ 13, 9, 12, 14, 25, 16, 16, 25, 24, 55, 42, 33,
+ 24, 17, 7, 67, 80, 101, 5, 37, 30, 28, 21, 22,
+ 11, 7, 7, 66, 79, 73, 65, 72, 4, 12, 69, 71, 3,
+ 7, 4, 7, 14, 4, 2, 52, 38, 24, 11, 1, 77, 89,
+ 99, 111 },
+
+ {
+
+ 53,
+ 7, 77, 53, 7, 77, 92, 75, 18, 11, 4, 2, 33, 37,
+ 53, 14, 71, 0, 2, 12, 0, 64, 68, 71, 86, 9, 27,
+ 99, 110, 115, 69, 79, 68, 2, 12, 0, 78, 73, 10,
+ 7, 65, 73, 82, 1, 75, 79, 90, 5, 72, 81, 3, 74,
+ 77, 88, 5, 67, 69, 73, 0, 4, 22, 0, 0, 0, 71,
+ 91, 97, 0, 6, 65, 18, 73, 92, 92, 0, 69, 25, 16,
+ 69, 0, 82, 64, 79, 76, 90, 83, 82, 87, 91, 21,
+ 0, 8, 81, 70, 80, 77, 97, 3, 67, 66, 76, 17, 68,
+ 64, 78, 82, 70, 73, 68, 7, 7, 64, 12, 14, 76,
+ 64, 65, 67, 4, 73, 15, 69, 1, 15, 16, 24, 18,
+ 11, 80, 64, 70, 1, 3, 91, 64, 8, 4, 5, 7, 7, 17,
+ 10, 3, 1, 2, 1, 7, 7, 84, 67, 1, 74, 5, 4, 13,
+ 6, 2, 10, 6, 4, 3, 13, 3, 76, 3, 80, 22, 21, 16,
+ 14, 17, 17, 10, 16, 19, 64, 10, 9, 6, 3, 68, 5,
+ 64, 2, 4, 1, 5, 10, 0, 80, 0, 74, 64, 5, 82, 25,
+ 33, 25, 24, 23, 22, 20, 20, 14, 6, 4, 1, 66, 69,
+ 91, 68, 65, 78, 13, 9, 6, 7, 2, 66, 65, 66, 76,
+ 70, 75, 92, 86, 96, 68, 22, 18, 11, 3, 7, 65,
+ 70, 72, 80, 65, 36, 21, 15, 8, 12, 1, 68, 73,
+ 81, 4, 38, 29, 24, 18, 16, 5, 64, 70, 77, 70,
+ 39, 26, 13, 3, 8, 68, 77, 83, 5, 48, 37, 28, 20,
+ 21, 5, 66, 71, 75, 62, 89, 86, 74, 87, 87, 80,
+ 82, 79, 80, 80, 79, 79, 83, 80, 84, 81, 66, 79,
+ 78, 75, 72, 68, 68, 69, 67, 65, 68, 71, 75, 10,
+ 9, 14, 6, 1, 11, 9, 6, 6, 7, 6, 4, 65, 68, 71,
+ 66, 7, 77, 7, 21, 5, 7, 12, 13, 15, 7, 10, 11,
+ 17, 66, 69, 75, 35, 36, 41, 30, 28, 37, 38, 38,
+ 42, 38, 44, 43, 34, 28, 8, 31, 42, 29, 25, 29,
+ 25, 17, 15, 11, 6, 64, 68, 73, 81, 30, 28, 29,
+ 24, 13, 16, 11, 3, 4, 0, 67, 76, 75, 85, 88, 70,
+ 70, 86, 13, 15, 8, 65, 5, 4, 68, 1, 64, 70, 80,
+ 80, 88, 99, 8, 64, 76, 74, 1, 1, 5, 7, 8, 14, 9,
+ 13, 15, 26, 17, 17, 26, 25, 54, 40, 31, 22, 14,
+ 4, 70, 83, 104, 5, 37, 30, 28, 22, 22, 11, 8, 7,
+ 66, 78, 73, 64, 72, 5, 13, 69, 70, 4, 8, 4, 7,
+ 15, 4, 2, 50, 36, 22, 8, 65, 81, 93, 102, 114 },
+
+ {
+
+ 52,
+ 7, 77, 52, 7, 77, 90, 73, 18, 11, 3, 0, 31, 36,
+ 53, 14, 69, 0, 3, 13, 0, 64, 67, 72, 87, 9, 25,
+ 101, 111, 115, 66, 77, 68, 3, 13, 0, 78, 72, 10,
+ 7, 65, 73, 81, 1, 76, 79, 90, 5, 72, 80, 3, 74,
+ 77, 88, 6, 67, 68, 73, 1, 4, 22, 0, 0, 0, 71,
+ 91, 97, 1, 5, 65, 18, 72, 92, 90, 2, 68, 27, 18,
+ 67, 1, 81, 0, 78, 75, 88, 82, 82, 86, 90, 21, 0,
+ 8, 81, 70, 80, 76, 95, 3, 67, 66, 76, 17, 68, 0,
+ 77, 82, 70, 73, 67, 7, 7, 64, 12, 14, 75, 64,
+ 64, 67, 4, 73, 14, 69, 1, 14, 15, 24, 17, 10,
+ 79, 0, 70, 0, 3, 90, 64, 8, 4, 5, 7, 7, 18, 10,
+ 3, 1, 2, 1, 6, 7, 85, 67, 1, 74, 4, 3, 13, 6, 1,
+ 10, 6, 4, 2, 13, 2, 76, 3, 80, 21, 21, 16, 14,
+ 17, 17, 10, 16, 19, 64, 10, 9, 6, 3, 68, 5, 64,
+ 1, 4, 1, 4, 9, 0, 80, 64, 74, 65, 4, 82, 24, 31,
+ 24, 23, 21, 20, 18, 18, 12, 4, 2, 64, 68, 70,
+ 92, 68, 65, 78, 12, 8, 5, 6, 1, 68, 66, 67, 77,
+ 70, 75, 92, 86, 96, 68, 22, 18, 11, 3, 7, 64,
+ 69, 71, 79, 65, 36, 21, 15, 8, 12, 2, 67, 72,
+ 79, 4, 38, 29, 24, 18, 16, 5, 64, 70, 76, 70,
+ 40, 26, 12, 3, 8, 68, 77, 82, 5, 48, 36, 27, 19,
+ 21, 5, 66, 71, 74, 62, 88, 85, 73, 86, 86, 79,
+ 81, 78, 79, 79, 77, 77, 82, 79, 84, 81, 65, 79,
+ 78, 74, 71, 68, 69, 69, 67, 65, 69, 72, 76, 9,
+ 8, 14, 5, 1, 11, 9, 6, 5, 6, 6, 4, 66, 68, 71,
+ 66, 7, 77, 6, 21, 5, 7, 11, 12, 15, 6, 9, 10,
+ 16, 67, 70, 77, 34, 35, 41, 29, 27, 35, 36, 36,
+ 40, 35, 41, 41, 31, 26, 7, 28, 39, 26, 21, 26,
+ 22, 15, 14, 10, 6, 0, 68, 72, 79, 28, 27, 27,
+ 22, 11, 14, 10, 1, 3, 65, 68, 78, 77, 86, 89,
+ 70, 70, 87, 12, 13, 6, 67, 4, 3, 70, 0, 65, 72,
+ 81, 81, 89, 100, 8, 65, 77, 73, 2, 1, 5, 8, 9,
+ 15, 10, 14, 16, 28, 18, 18, 27, 27, 54, 38, 29,
+ 19, 12, 1, 73, 86, 106, 5, 38, 31, 29, 22, 23,
+ 11, 8, 8, 65, 78, 73, 0, 71, 6, 14, 68, 69, 4,
+ 8, 4, 8, 16, 4, 2, 49, 34, 19, 5, 68, 84, 97,
+ 106, 117 },
+
+ {
+
+ 51,
+ 7, 78, 51, 7, 78, 88, 72, 19, 11, 3, 64, 30, 35,
+ 53, 14, 67, 0, 3, 14, 0, 65, 67, 73, 88, 8, 24,
+ 102, 112, 116, 0, 76, 68, 3, 14, 0, 77, 71, 11,
+ 6, 64, 72, 80, 0, 76, 79, 90, 5, 71, 80, 3, 74,
+ 76, 88, 6, 67, 68, 73, 1, 4, 22, 0, 0, 0, 70,
+ 91, 97, 1, 5, 66, 18, 72, 91, 88, 4, 66, 28, 20,
+ 66, 2, 79, 1, 77, 75, 87, 82, 82, 86, 90, 22, 0,
+ 8, 80, 69, 79, 76, 94, 3, 68, 67, 77, 17, 68, 0,
+ 77, 82, 69, 73, 67, 7, 7, 0, 12, 13, 75, 64, 64,
+ 67, 4, 73, 13, 69, 1, 13, 14, 23, 17, 10, 79, 0,
+ 70, 0, 2, 90, 65, 8, 4, 5, 7, 7, 18, 10, 3, 1,
+ 3, 1, 5, 7, 85, 68, 1, 75, 4, 2, 13, 6, 0, 10,
+ 6, 4, 0, 13, 1, 76, 3, 81, 20, 21, 15, 14, 17,
+ 17, 10, 16, 19, 64, 9, 9, 5, 3, 68, 4, 65, 1, 3,
+ 0, 3, 8, 64, 80, 64, 73, 66, 2, 82, 22, 29, 22,
+ 21, 19, 18, 16, 16, 10, 2, 1, 65, 70, 72, 93,
+ 68, 65, 79, 11, 7, 4, 4, 64, 69, 67, 68, 78, 71,
+ 76, 92, 86, 95, 67, 22, 18, 11, 4, 7, 64, 69,
+ 71, 78, 64, 35, 21, 15, 8, 13, 2, 66, 71, 77, 4,
+ 39, 29, 24, 18, 17, 5, 64, 69, 76, 70, 40, 26,
+ 12, 3, 8, 67, 76, 81, 5, 47, 36, 26, 19, 21, 5,
+ 65, 70, 74, 62, 88, 84, 73, 85, 85, 78, 79, 77,
+ 78, 78, 76, 76, 82, 78, 84, 80, 0, 79, 79, 74,
+ 71, 68, 69, 70, 67, 66, 70, 73, 77, 9, 8, 14, 5,
+ 0, 10, 8, 6, 5, 6, 5, 4, 66, 68, 72, 66, 7, 78,
+ 6, 20, 4, 6, 10, 12, 16, 6, 8, 10, 16, 67, 70,
+ 78, 33, 34, 40, 28, 26, 34, 34, 34, 38, 33, 39,
+ 38, 28, 23, 6, 26, 36, 23, 17, 23, 20, 13, 13,
+ 9, 6, 1, 68, 70, 76, 26, 25, 25, 20, 9, 12, 8,
+ 64, 1, 66, 69, 79, 78, 87, 90, 71, 71, 88, 11,
+ 12, 5, 69, 2, 1, 71, 65, 67, 73, 83, 83, 91,
+ 101, 7, 66, 78, 73, 2, 1, 6, 8, 9, 17, 11, 15,
+ 17, 29, 20, 19, 29, 28, 53, 37, 27, 17, 9, 64,
+ 76, 88, 108, 6, 38, 31, 29, 22, 24, 12, 8, 8,
+ 65, 78, 72, 0, 71, 7, 15, 68, 69, 5, 9, 4, 8,
+ 17, 4, 2, 48, 32, 17, 2, 72, 88, 100, 109, 119 },
+
+ {
+
+ 50,
+ 7, 78, 50, 7, 78, 86, 70, 19, 11, 2, 66, 28, 33,
+ 53, 14, 64, 64, 4, 14, 0, 66, 67, 74, 89, 8, 22,
+ 104, 113, 116, 3, 75, 68, 4, 14, 0, 77, 70, 11,
+ 6, 64, 72, 80, 0, 77, 78, 90, 5, 71, 79, 2, 74,
+ 76, 88, 7, 66, 68, 72, 2, 4, 22, 0, 0, 0, 70,
+ 91, 97, 2, 4, 66, 18, 72, 91, 87, 6, 65, 30, 22,
+ 65, 3, 78, 3, 76, 74, 85, 81, 81, 85, 89, 22, 0,
+ 8, 80, 69, 79, 75, 92, 2, 68, 67, 77, 17, 68, 0,
+ 76, 82, 69, 73, 66, 7, 6, 0, 12, 13, 75, 64, 64,
+ 67, 4, 73, 13, 69, 1, 13, 13, 22, 16, 9, 78, 1,
+ 70, 64, 2, 89, 65, 8, 3, 5, 7, 7, 19, 11, 3, 1,
+ 3, 1, 4, 7, 86, 68, 1, 75, 3, 1, 12, 6, 0, 11,
+ 6, 4, 64, 12, 0, 76, 3, 81, 20, 21, 15, 14, 17,
+ 17, 10, 16, 19, 64, 9, 9, 5, 3, 68, 4, 65, 0, 3,
+ 0, 3, 7, 64, 80, 65, 73, 67, 1, 82, 21, 27, 20,
+ 19, 17, 16, 14, 14, 8, 0, 64, 67, 73, 74, 94,
+ 68, 66, 79, 10, 6, 3, 3, 65, 71, 68, 69, 78, 71,
+ 76, 92, 86, 95, 67, 22, 18, 11, 4, 7, 64, 69,
+ 70, 77, 64, 35, 21, 15, 8, 13, 3, 65, 71, 75, 4,
+ 39, 29, 24, 18, 17, 5, 64, 69, 75, 70, 40, 25,
+ 11, 3, 8, 67, 76, 81, 5, 47, 35, 25, 18, 21, 5,
+ 65, 70, 73, 62, 87, 83, 72, 83, 84, 77, 78, 75,
+ 77, 76, 74, 74, 81, 77, 83, 80, 1, 78, 79, 74,
+ 70, 68, 70, 70, 68, 67, 71, 74, 78, 8, 7, 14, 4,
+ 0, 10, 8, 6, 4, 5, 5, 3, 67, 68, 72, 66, 7, 78,
+ 5, 20, 3, 6, 9, 11, 16, 5, 7, 9, 15, 68, 71, 80,
+ 32, 34, 40, 27, 24, 32, 32, 32, 36, 30, 36, 36,
+ 25, 21, 4, 23, 32, 20, 13, 21, 17, 11, 12, 8, 6,
+ 2, 67, 69, 74, 25, 23, 23, 19, 7, 10, 7, 66, 0,
+ 68, 71, 81, 80, 88, 91, 71, 71, 89, 9, 10, 3,
+ 70, 0, 64, 73, 66, 69, 75, 84, 84, 92, 101, 6,
+ 66, 79, 73, 3, 2, 6, 9, 10, 18, 12, 16, 18, 30,
+ 21, 20, 30, 29, 53, 35, 25, 14, 7, 67, 79, 91,
+ 110, 6, 39, 32, 30, 23, 25, 12, 9, 9, 64, 77,
+ 72, 1, 70, 8, 16, 68, 68, 5, 9, 4, 9, 18, 4, 2,
+ 46, 30, 14, 64, 75, 92, 104, 113, 122 },
+
+ {
+
+ 48,
+ 6, 78, 48, 6, 78, 85, 69, 19, 11, 2, 67, 27, 32,
+ 53, 14, 1, 64, 5, 15, 0, 67, 67, 75, 91, 7, 20,
+ 106, 114, 117, 5, 74, 68, 5, 15, 0, 77, 69, 11,
+ 5, 64, 72, 79, 64, 77, 78, 91, 5, 71, 79, 2, 75,
+ 76, 88, 7, 66, 68, 72, 2, 4, 22, 0, 0, 0, 69,
+ 92, 97, 2, 3, 66, 18, 72, 91, 85, 7, 0, 31, 23,
+ 64, 4, 77, 4, 75, 73, 83, 81, 81, 85, 88, 22, 0,
+ 8, 79, 69, 78, 75, 91, 2, 69, 68, 78, 17, 68, 0,
+ 76, 82, 69, 73, 66, 6, 6, 0, 12, 12, 75, 64, 64,
+ 67, 3, 73, 12, 70, 1, 12, 11, 21, 15, 8, 78, 1,
+ 70, 65, 1, 89, 66, 7, 3, 5, 7, 7, 20, 11, 3, 0,
+ 3, 0, 3, 6, 87, 68, 1, 76, 3, 0, 12, 5, 64, 11,
+ 6, 4, 66, 12, 64, 76, 3, 82, 19, 20, 15, 14, 17,
+ 16, 9, 16, 18, 65, 8, 9, 5, 2, 69, 3, 66, 0, 2,
+ 64, 2, 6, 65, 80, 65, 73, 68, 64, 82, 19, 25,
+ 18, 17, 15, 13, 12, 12, 6, 65, 66, 69, 75, 76,
+ 95, 68, 66, 80, 9, 4, 1, 1, 67, 72, 70, 71, 79,
+ 72, 77, 92, 86, 94, 67, 22, 18, 11, 4, 7, 64,
+ 69, 70, 76, 0, 35, 21, 15, 8, 13, 3, 65, 70, 74,
+ 4, 39, 29, 24, 17, 17, 5, 64, 69, 75, 70, 40,
+ 25, 11, 3, 8, 67, 76, 80, 5, 46, 34, 24, 17, 20,
+ 5, 65, 70, 73, 62, 86, 82, 72, 82, 83, 77, 77,
+ 74, 76, 75, 73, 73, 81, 77, 83, 79, 2, 78, 80,
+ 74, 70, 68, 70, 71, 68, 68, 72, 76, 79, 7, 7,
+ 14, 4, 64, 9, 7, 5, 3, 4, 4, 3, 68, 69, 73, 66,
+ 7, 79, 4, 19, 2, 6, 8, 10, 16, 4, 6, 9, 15, 69,
+ 72, 82, 31, 33, 39, 25, 23, 31, 30, 30, 33, 28,
+ 33, 33, 22, 18, 3, 20, 29, 17, 9, 18, 15, 9, 10,
+ 6, 6, 2, 67, 68, 72, 23, 21, 21, 17, 4, 8, 5,
+ 68, 65, 70, 72, 83, 81, 89, 92, 72, 72, 90, 8,
+ 9, 1, 72, 65, 66, 74, 68, 71, 76, 85, 86, 94,
+ 102, 5, 67, 80, 73, 3, 2, 7, 9, 10, 19, 12, 17,
+ 19, 31, 22, 21, 31, 30, 52, 33, 23, 12, 4, 70,
+ 82, 94, 113, 6, 39, 32, 30, 23, 25, 12, 9, 9,
+ 64, 77, 72, 1, 70, 9, 17, 68, 68, 6, 10, 4, 9,
+ 18, 4, 1, 45, 28, 12, 67, 78, 96, 108, 116, 125 },
+
+ {
+
+ 47,
+ 6, 78, 47, 6, 78, 83, 68, 20, 11, 2, 68, 26, 31,
+ 53, 14, 3, 64, 6, 16, 0, 67, 66, 76, 92, 6, 18,
+ 107, 115, 118, 8, 72, 68, 6, 16, 0, 76, 68, 12,
+ 4, 64, 71, 78, 64, 77, 78, 91, 5, 71, 78, 2, 75,
+ 76, 88, 7, 66, 67, 72, 2, 4, 22, 0, 0, 0, 68,
+ 92, 97, 2, 2, 66, 18, 71, 91, 83, 9, 2, 32, 25,
+ 1, 5, 75, 5, 73, 72, 81, 81, 81, 85, 87, 22, 0,
+ 8, 78, 69, 77, 74, 90, 2, 69, 68, 78, 17, 68, 1,
+ 75, 81, 68, 73, 66, 6, 6, 0, 12, 12, 74, 64, 0,
+ 67, 3, 72, 11, 70, 1, 11, 10, 21, 14, 7, 77, 1,
+ 69, 66, 0, 89, 67, 7, 3, 6, 7, 7, 21, 11, 3, 0,
+ 4, 0, 3, 6, 88, 68, 1, 77, 3, 64, 12, 5, 65, 11,
+ 6, 4, 67, 12, 64, 76, 3, 83, 18, 20, 15, 14, 17,
+ 16, 9, 16, 18, 65, 8, 9, 5, 2, 69, 3, 66, 0, 2,
+ 64, 1, 6, 65, 80, 65, 73, 69, 65, 82, 17, 24,
+ 17, 16, 13, 11, 11, 11, 4, 67, 67, 70, 77, 77,
+ 96, 68, 66, 81, 8, 3, 0, 0, 68, 73, 71, 72, 80,
+ 73, 78, 92, 85, 93, 66, 23, 18, 11, 4, 8, 0, 68,
+ 69, 75, 1, 35, 21, 15, 8, 14, 3, 64, 69, 72, 4,
+ 39, 29, 24, 17, 18, 5, 64, 69, 74, 70, 41, 25,
+ 11, 3, 9, 67, 76, 79, 5, 46, 34, 24, 16, 20, 5,
+ 65, 69, 72, 62, 85, 81, 71, 81, 81, 76, 75, 73,
+ 74, 74, 72, 71, 80, 76, 83, 78, 4, 78, 81, 73,
+ 69, 68, 70, 72, 68, 68, 73, 77, 80, 7, 7, 14, 4,
+ 64, 8, 7, 5, 2, 4, 4, 3, 69, 69, 73, 65, 8, 79,
+ 4, 18, 2, 6, 8, 10, 16, 3, 5, 9, 15, 70, 72, 83,
+ 31, 32, 38, 24, 22, 30, 28, 28, 31, 26, 31, 30,
+ 20, 16, 2, 17, 26, 14, 5, 15, 13, 8, 9, 5, 6, 3,
+ 67, 67, 70, 21, 20, 19, 15, 2, 7, 4, 70, 66, 71,
+ 73, 84, 82, 90, 92, 72, 72, 91, 7, 8, 0, 74, 66,
+ 67, 75, 69, 72, 77, 86, 87, 95, 103, 5, 68, 80,
+ 72, 3, 2, 8, 10, 11, 20, 13, 18, 20, 33, 23, 22,
+ 33, 32, 51, 32, 21, 10, 1, 73, 85, 97, 115, 7,
+ 39, 32, 31, 23, 26, 13, 9, 10, 0, 77, 71, 2, 70,
+ 10, 19, 67, 67, 7, 11, 4, 10, 19, 4, 1, 44, 26,
+ 10, 69, 81, 99, 112, 119, 126 },
+
+ {
+
+ 46,
+ 6, 78, 46, 6, 78, 81, 66, 20, 11, 1, 70, 24, 29,
+ 53, 14, 6, 65, 7, 16, 0, 68, 66, 77, 93, 6, 16,
+ 109, 116, 118, 11, 71, 68, 7, 16, 0, 76, 67, 12,
+ 4, 64, 71, 78, 64, 78, 77, 91, 5, 71, 77, 1, 75,
+ 76, 88, 8, 65, 67, 71, 3, 4, 22, 0, 0, 0, 68,
+ 92, 97, 3, 1, 66, 18, 71, 91, 82, 11, 3, 34, 27,
+ 2, 6, 74, 7, 72, 71, 79, 80, 80, 84, 86, 22, 0,
+ 8, 78, 69, 77, 74, 88, 1, 69, 68, 78, 17, 68, 1,
+ 75, 81, 68, 73, 65, 6, 5, 0, 12, 12, 74, 64, 0,
+ 67, 3, 72, 11, 70, 1, 11, 9, 20, 13, 6, 76, 2,
+ 69, 67, 0, 88, 67, 7, 2, 6, 7, 7, 22, 12, 3, 0,
+ 4, 0, 2, 6, 89, 68, 1, 77, 2, 65, 11, 5, 65, 12,
+ 6, 4, 68, 11, 65, 76, 3, 83, 18, 20, 15, 14, 17,
+ 16, 9, 16, 18, 65, 8, 9, 5, 2, 69, 3, 66, 64, 2,
+ 64, 1, 5, 66, 80, 66, 73, 70, 66, 82, 16, 22,
+ 15, 14, 11, 9, 9, 9, 2, 69, 69, 72, 80, 79, 97,
+ 68, 67, 81, 7, 2, 64, 64, 70, 75, 72, 73, 80,
+ 73, 78, 92, 85, 93, 66, 23, 18, 11, 4, 8, 0, 68,
+ 69, 74, 1, 35, 21, 15, 8, 14, 4, 0, 69, 70, 4,
+ 39, 29, 24, 17, 18, 5, 64, 69, 73, 70, 41, 24,
+ 10, 3, 9, 67, 76, 79, 5, 45, 33, 23, 15, 20, 5,
+ 65, 69, 72, 62, 84, 80, 70, 79, 80, 75, 74, 71,
+ 73, 72, 70, 69, 79, 75, 82, 78, 5, 77, 81, 73,
+ 68, 68, 71, 72, 69, 69, 74, 78, 81, 6, 6, 14, 3,
+ 65, 8, 7, 5, 1, 3, 4, 2, 70, 69, 73, 65, 8, 79,
+ 3, 18, 1, 6, 7, 9, 16, 2, 4, 8, 14, 71, 73, 85,
+ 30, 32, 38, 23, 20, 28, 26, 26, 29, 23, 28, 28,
+ 17, 14, 0, 14, 22, 11, 1, 13, 10, 6, 8, 4, 6, 4,
+ 66, 66, 68, 20, 18, 17, 14, 0, 5, 2, 72, 67, 73,
+ 75, 86, 84, 91, 93, 73, 72, 92, 5, 6, 65, 75,
+ 68, 69, 77, 71, 74, 79, 87, 88, 96, 103, 4, 68,
+ 81, 72, 4, 3, 8, 10, 12, 21, 14, 19, 21, 34, 24,
+ 23, 34, 33, 51, 30, 19, 7, 64, 76, 88, 100, 117,
+ 7, 40, 33, 31, 24, 27, 13, 10, 10, 0, 76, 71, 3,
+ 69, 11, 20, 67, 66, 7, 11, 4, 10, 20, 4, 1, 42,
+ 24, 7, 72, 84, 103, 116, 123, 126 },
+
+ {
+
+ 45,
+ 6, 79, 45, 6, 79, 79, 65, 21, 11, 1, 71, 23, 28,
+ 53, 14, 8, 65, 7, 17, 0, 69, 66, 78, 94, 5, 15,
+ 110, 117, 119, 14, 70, 68, 7, 17, 0, 75, 66, 13,
+ 3, 0, 70, 77, 65, 78, 77, 91, 5, 70, 77, 1, 75,
+ 75, 88, 8, 65, 67, 71, 3, 4, 22, 0, 0, 0, 67,
+ 92, 97, 3, 1, 67, 18, 71, 90, 80, 13, 5, 35, 29,
+ 3, 7, 72, 8, 71, 71, 78, 80, 80, 84, 86, 23, 0,
+ 8, 77, 68, 76, 73, 87, 1, 70, 69, 79, 17, 68, 1,
+ 74, 81, 67, 73, 65, 6, 5, 1, 12, 11, 74, 64, 0,
+ 67, 3, 72, 10, 70, 1, 10, 8, 19, 13, 6, 76, 2,
+ 69, 67, 64, 88, 68, 7, 2, 6, 7, 7, 22, 12, 3, 0,
+ 5, 0, 1, 6, 89, 69, 1, 78, 2, 66, 11, 5, 66, 12,
+ 6, 4, 70, 11, 66, 76, 3, 84, 17, 20, 14, 14, 17,
+ 16, 9, 16, 18, 65, 7, 9, 4, 2, 69, 2, 67, 64, 1,
+ 65, 0, 4, 66, 80, 66, 72, 71, 68, 82, 14, 20,
+ 13, 12, 9, 7, 7, 7, 0, 71, 70, 73, 82, 81, 98,
+ 68, 67, 82, 6, 1, 65, 66, 71, 76, 73, 74, 81,
+ 74, 79, 92, 85, 92, 65, 23, 18, 11, 5, 8, 0, 68,
+ 68, 73, 2, 34, 21, 15, 8, 15, 4, 1, 68, 68, 4,
+ 40, 29, 24, 17, 19, 5, 64, 68, 73, 70, 41, 24,
+ 10, 3, 9, 66, 75, 78, 5, 45, 33, 22, 15, 20, 5,
+ 64, 68, 71, 62, 84, 79, 70, 78, 79, 74, 72, 70,
+ 72, 71, 69, 68, 79, 74, 82, 77, 7, 77, 82, 73,
+ 68, 68, 71, 73, 69, 70, 75, 79, 82, 6, 6, 14, 3,
+ 65, 7, 6, 5, 1, 3, 3, 2, 70, 69, 74, 65, 8, 80,
+ 3, 17, 0, 5, 6, 9, 17, 2, 3, 8, 14, 71, 73, 86,
+ 29, 31, 37, 22, 19, 27, 24, 24, 27, 21, 26, 25,
+ 14, 11, 64, 12, 19, 8, 66, 10, 8, 4, 7, 3, 6, 5,
+ 66, 64, 65, 18, 16, 15, 12, 65, 3, 1, 74, 69,
+ 74, 76, 87, 85, 92, 94, 73, 73, 93, 4, 5, 66,
+ 77, 70, 71, 78, 72, 76, 80, 89, 90, 98, 104, 3,
+ 69, 82, 72, 4, 3, 9, 11, 12, 23, 15, 20, 22, 35,
+ 26, 24, 36, 34, 50, 29, 17, 5, 67, 78, 91, 102,
+ 119, 8, 40, 33, 32, 24, 28, 14, 10, 11, 1, 76,
+ 70, 3, 69, 12, 21, 67, 66, 8, 12, 4, 11, 21, 4,
+ 1, 41, 22, 5, 75, 88, 107, 119, 126, 126 },
+
+ {
+
+ 43,
+ 6, 79, 43, 6, 79, 78, 0, 21, 11, 0, 73, 21, 27,
+ 53, 14, 10, 65, 8, 18, 0, 70, 66, 79, 95, 5, 13,
+ 112, 118, 119, 17, 69, 68, 8, 18, 0, 75, 65, 13,
+ 3, 0, 70, 76, 65, 79, 77, 91, 5, 70, 76, 1, 76,
+ 75, 88, 9, 65, 67, 71, 4, 4, 22, 0, 0, 0, 67,
+ 93, 97, 4, 0, 67, 18, 71, 90, 78, 14, 6, 37, 30,
+ 4, 8, 71, 9, 70, 70, 76, 79, 80, 83, 85, 23, 0,
+ 8, 77, 68, 76, 73, 85, 1, 70, 69, 79, 17, 68, 1,
+ 74, 81, 67, 73, 64, 6, 5, 1, 12, 11, 74, 64, 0,
+ 67, 3, 72, 9, 71, 1, 9, 7, 18, 12, 5, 75, 3, 69,
+ 68, 64, 87, 68, 7, 2, 6, 7, 7, 23, 12, 3, 64, 5,
+ 64, 0, 5, 90, 69, 1, 78, 1, 67, 11, 4, 67, 12,
+ 6, 4, 71, 11, 67, 76, 3, 84, 16, 20, 14, 14, 17,
+ 16, 9, 16, 17, 65, 7, 9, 4, 2, 70, 2, 67, 65, 1,
+ 65, 64, 3, 67, 80, 67, 72, 72, 69, 82, 13, 18,
+ 11, 10, 7, 5, 5, 5, 65, 73, 72, 75, 84, 83, 99,
+ 68, 67, 82, 5, 0, 66, 67, 73, 78, 74, 76, 82,
+ 74, 79, 92, 85, 92, 65, 23, 18, 11, 5, 8, 0, 68,
+ 68, 72, 2, 34, 21, 15, 8, 15, 5, 1, 67, 66, 4,
+ 40, 29, 24, 17, 19, 5, 64, 68, 72, 70, 41, 24,
+ 9, 3, 9, 66, 75, 77, 5, 44, 32, 21, 14, 20, 5,
+ 64, 68, 71, 62, 83, 78, 69, 77, 78, 73, 71, 69,
+ 71, 70, 67, 66, 78, 74, 82, 77, 8, 77, 82, 73,
+ 67, 68, 72, 73, 69, 71, 76, 80, 83, 5, 5, 14, 2,
+ 66, 7, 6, 5, 0, 2, 3, 2, 71, 70, 74, 65, 8, 80,
+ 2, 17, 64, 5, 5, 8, 17, 1, 2, 7, 13, 72, 74, 88,
+ 28, 30, 37, 21, 18, 25, 22, 22, 25, 18, 23, 23,
+ 11, 9, 65, 9, 16, 5, 70, 7, 5, 2, 6, 1, 6, 6,
+ 66, 0, 0, 16, 14, 13, 10, 67, 1, 64, 76, 70, 76,
+ 77, 89, 87, 93, 95, 74, 73, 94, 3, 3, 68, 79,
+ 72, 73, 80, 74, 78, 82, 90, 91, 99, 105, 2, 70,
+ 83, 72, 5, 3, 9, 11, 13, 24, 15, 21, 23, 36, 27,
+ 25, 37, 35, 50, 27, 15, 2, 69, 81, 94, 105, 122,
+ 8, 41, 34, 32, 24, 28, 14, 10, 11, 1, 76, 70, 4,
+ 68, 13, 22, 67, 65, 8, 12, 4, 11, 22, 4, 1, 40,
+ 20, 2, 78, 91, 111, 123, 126, 126 },
+
+ {
+
+ 42,
+ 6, 79, 42, 6, 79, 76, 1, 21, 11, 0, 74, 20, 25,
+ 53, 14, 13, 66, 9, 18, 0, 70, 65, 80, 96, 4, 11,
+ 114, 119, 120, 20, 67, 68, 9, 18, 0, 75, 64, 13,
+ 2, 0, 70, 76, 65, 79, 76, 91, 5, 70, 75, 0, 76,
+ 75, 88, 9, 64, 66, 70, 4, 4, 22, 0, 0, 0, 66,
+ 93, 97, 4, 64, 67, 18, 70, 90, 77, 16, 8, 38,
+ 32, 6, 9, 70, 11, 69, 69, 74, 79, 79, 83, 84,
+ 23, 0, 8, 76, 68, 75, 72, 84, 0, 70, 69, 79, 17,
+ 68, 2, 73, 81, 67, 73, 64, 6, 4, 1, 12, 11, 73,
+ 64, 1, 67, 3, 72, 9, 71, 1, 9, 6, 18, 11, 4, 74,
+ 3, 69, 69, 65, 87, 69, 7, 1, 6, 7, 7, 24, 13, 3,
+ 64, 5, 64, 64, 5, 91, 69, 1, 79, 1, 68, 10, 4,
+ 67, 13, 6, 4, 72, 10, 68, 76, 3, 85, 16, 20, 14,
+ 14, 17, 16, 9, 16, 17, 65, 7, 9, 4, 2, 70, 2,
+ 67, 65, 1, 65, 64, 2, 67, 80, 67, 72, 73, 70,
+ 82, 11, 16, 10, 9, 5, 3, 3, 3, 67, 75, 74, 77,
+ 87, 84, 100, 68, 68, 83, 4, 64, 67, 68, 74, 79,
+ 75, 77, 82, 75, 80, 92, 85, 91, 65, 23, 18, 11,
+ 5, 8, 1, 67, 67, 71, 3, 34, 21, 15, 8, 15, 5, 2,
+ 67, 64, 4, 40, 29, 24, 17, 19, 5, 64, 68, 71,
+ 70, 42, 23, 9, 3, 9, 66, 75, 77, 5, 44, 31, 20,
+ 13, 20, 5, 64, 68, 70, 62, 82, 77, 68, 75, 77,
+ 72, 70, 67, 70, 68, 66, 64, 77, 73, 81, 76, 9,
+ 76, 83, 72, 66, 68, 72, 74, 70, 71, 77, 81, 84,
+ 4, 5, 14, 2, 66, 6, 6, 5, 64, 1, 3, 1, 72, 70,
+ 74, 65, 8, 80, 1, 16, 64, 5, 4, 7, 17, 0, 1, 7,
+ 13, 73, 75, 90, 27, 30, 36, 20, 16, 24, 20, 20,
+ 23, 16, 20, 20, 8, 7, 67, 6, 12, 2, 74, 5, 3, 0,
+ 5, 0, 6, 7, 65, 1, 2, 15, 13, 11, 9, 69, 64, 65,
+ 78, 71, 78, 79, 91, 88, 94, 96, 74, 73, 95, 1,
+ 2, 70, 80, 73, 74, 81, 75, 79, 83, 91, 92, 100,
+ 105, 2, 70, 84, 71, 5, 4, 10, 12, 14, 25, 16,
+ 22, 24, 38, 28, 26, 38, 37, 49, 25, 13, 0, 72,
+ 84, 97, 108, 124, 8, 41, 34, 33, 25, 29, 14, 11,
+ 12, 2, 75, 70, 5, 68, 14, 23, 66, 64, 9, 13, 4,
+ 12, 23, 4, 1, 38, 18, 0, 81, 94, 114, 126, 126,
+ 126 },
+
+ {
+
+ 41,
+ 6, 79, 41, 6, 79, 74, 3, 22, 11, 64, 76, 18, 24,
+ 53, 14, 15, 66, 10, 19, 0, 71, 65, 81, 97, 4, 9,
+ 115, 120, 120, 23, 66, 68, 10, 19, 0, 74, 0, 14,
+ 2, 0, 69, 75, 66, 80, 76, 91, 5, 70, 75, 0, 76,
+ 75, 88, 10, 64, 66, 70, 5, 4, 22, 0, 0, 0, 66,
+ 93, 97, 5, 65, 67, 18, 70, 90, 75, 18, 9, 40,
+ 34, 7, 10, 68, 12, 68, 68, 72, 78, 79, 82, 83,
+ 23, 0, 8, 76, 68, 75, 72, 82, 0, 71, 70, 80, 17,
+ 68, 2, 73, 81, 66, 73, 0, 6, 4, 1, 12, 10, 73,
+ 64, 1, 67, 3, 72, 8, 71, 1, 8, 5, 17, 10, 3, 74,
+ 4, 69, 70, 65, 86, 69, 7, 1, 6, 7, 7, 25, 13, 3,
+ 64, 6, 64, 65, 5, 92, 69, 1, 79, 0, 69, 10, 4,
+ 68, 13, 6, 4, 74, 10, 69, 76, 3, 85, 15, 20, 14,
+ 14, 17, 16, 9, 16, 17, 65, 6, 9, 4, 2, 70, 1,
+ 68, 66, 0, 66, 65, 1, 68, 80, 68, 72, 74, 72,
+ 82, 10, 14, 8, 7, 3, 1, 1, 1, 69, 77, 75, 78,
+ 89, 86, 101, 68, 68, 83, 3, 65, 68, 70, 76, 81,
+ 76, 78, 83, 75, 80, 92, 85, 91, 64, 23, 18, 11,
+ 5, 8, 1, 67, 67, 70, 3, 34, 21, 15, 8, 16, 6, 3,
+ 66, 1, 4, 40, 29, 24, 17, 20, 5, 64, 68, 71, 70,
+ 42, 23, 8, 3, 9, 66, 75, 76, 5, 43, 31, 19, 12,
+ 20, 5, 64, 67, 70, 62, 81, 76, 68, 74, 76, 71,
+ 68, 66, 69, 67, 64, 0, 77, 72, 81, 76, 11, 76,
+ 83, 72, 66, 68, 73, 74, 70, 72, 78, 82, 85, 4,
+ 4, 14, 1, 67, 6, 5, 5, 65, 1, 2, 1, 73, 70, 75,
+ 65, 8, 81, 1, 16, 65, 5, 3, 7, 17, 64, 0, 6, 12,
+ 74, 75, 91, 26, 29, 36, 19, 15, 22, 18, 18, 21,
+ 13, 18, 18, 5, 4, 68, 3, 9, 64, 78, 2, 0, 65, 4,
+ 64, 6, 8, 65, 2, 4, 13, 11, 9, 7, 71, 66, 67,
+ 80, 73, 79, 80, 92, 90, 95, 97, 75, 74, 96, 0,
+ 0, 71, 82, 75, 76, 83, 77, 81, 85, 92, 94, 102,
+ 106, 1, 71, 85, 71, 6, 4, 10, 12, 14, 26, 17,
+ 23, 25, 39, 29, 27, 40, 38, 49, 24, 11, 66, 74,
+ 87, 100, 111, 126, 9, 42, 35, 33, 25, 30, 15,
+ 11, 12, 2, 75, 69, 5, 67, 15, 24, 66, 64, 9, 13,
+ 4, 12, 24, 4, 1, 37, 16, 66, 84, 97, 118, 126,
+ 126, 126 },
+
+ {
+
+ 40,
+ 6, 79, 40, 6, 79, 72, 4, 22, 11, 64, 77, 17, 23,
+ 53, 14, 17, 66, 11, 20, 0, 72, 65, 82, 98, 3, 7,
+ 117, 121, 121, 26, 65, 68, 11, 20, 0, 74, 1, 14,
+ 1, 0, 69, 74, 66, 80, 76, 91, 5, 70, 74, 0, 76,
+ 75, 88, 10, 64, 66, 70, 5, 4, 22, 0, 0, 0, 65,
+ 93, 97, 5, 66, 67, 18, 70, 90, 73, 20, 11, 41,
+ 36, 8, 11, 67, 13, 67, 67, 70, 78, 79, 82, 82,
+ 23, 0, 8, 75, 68, 74, 71, 81, 0, 71, 70, 80, 17,
+ 68, 2, 72, 81, 66, 73, 0, 6, 4, 1, 12, 10, 73,
+ 64, 1, 67, 3, 72, 7, 71, 1, 7, 4, 16, 9, 2, 73,
+ 4, 69, 71, 66, 86, 70, 7, 1, 6, 7, 7, 26, 13, 3,
+ 64, 6, 64, 66, 5, 93, 69, 1, 80, 0, 70, 10, 4,
+ 69, 13, 6, 4, 75, 10, 70, 76, 3, 86, 14, 20, 14,
+ 14, 17, 16, 9, 16, 17, 65, 6, 9, 4, 2, 70, 1,
+ 68, 66, 0, 66, 66, 0, 68, 80, 68, 72, 75, 73,
+ 82, 8, 12, 6, 5, 1, 64, 64, 64, 71, 79, 77, 80,
+ 91, 88, 102, 68, 68, 84, 2, 66, 69, 71, 77, 82,
+ 77, 79, 84, 76, 81, 92, 85, 90, 64, 23, 18, 11,
+ 5, 8, 1, 67, 66, 69, 4, 34, 21, 15, 8, 16, 6, 4,
+ 65, 3, 4, 40, 29, 24, 17, 20, 5, 64, 68, 70, 70,
+ 42, 23, 8, 3, 9, 66, 75, 75, 5, 43, 30, 18, 11,
+ 20, 5, 64, 67, 69, 62, 80, 75, 67, 73, 75, 70,
+ 67, 65, 68, 66, 0, 2, 76, 71, 81, 75, 12, 76,
+ 84, 72, 65, 68, 73, 75, 70, 73, 79, 83, 86, 3,
+ 4, 14, 1, 67, 5, 5, 5, 66, 0, 2, 1, 74, 70, 75,
+ 65, 8, 81, 0, 15, 66, 5, 2, 6, 17, 65, 64, 6,
+ 12, 75, 76, 93, 25, 28, 35, 18, 14, 21, 16, 16,
+ 19, 11, 15, 15, 2, 2, 69, 0, 6, 67, 82, 64, 65,
+ 67, 3, 65, 6, 9, 65, 3, 6, 11, 9, 7, 5, 73, 68,
+ 68, 82, 74, 81, 81, 94, 91, 96, 98, 75, 74, 97,
+ 64, 64, 73, 84, 77, 78, 84, 78, 83, 86, 93, 95,
+ 103, 107, 0, 72, 86, 71, 6, 4, 11, 13, 15, 27,
+ 18, 24, 26, 40, 30, 28, 41, 39, 48, 22, 9, 68,
+ 77, 90, 103, 114, 126, 9, 42, 35, 34, 25, 31,
+ 15, 11, 13, 3, 75, 69, 6, 67, 16, 25, 66, 0, 10,
+ 14, 4, 13, 25, 4, 1, 36, 14, 68, 87, 100, 122,
+ 126, 126, 126 },
+
+ {
+
+ 38,
+ 5, 80, 38, 5, 80, 71, 5, 22, 11, 65, 79, 15, 21,
+ 52, 14, 19, 67, 11, 20, 64, 73, 65, 84, 100, 2,
+ 5, 119, 122, 122, 28, 64, 69, 11, 20, 64, 74, 2,
+ 14, 0, 0, 69, 74, 67, 81, 76, 92, 5, 70, 74, 64,
+ 77, 75, 88, 10, 64, 66, 70, 5, 3, 22, 0, 0, 0,
+ 65, 94, 97, 5, 67, 68, 18, 70, 90, 72, 21, 12,
+ 42, 37, 9, 12, 66, 14, 66, 67, 69, 78, 79, 82,
+ 82, 23, 0, 8, 75, 68, 74, 71, 80, 64, 72, 71,
+ 81, 17, 68, 2, 72, 81, 66, 73, 0, 5, 3, 1, 11,
+ 9, 73, 65, 1, 67, 2, 72, 6, 72, 0, 6, 2, 15, 8,
+ 1, 73, 4, 69, 72, 67, 86, 71, 6, 0, 6, 7, 7, 26,
+ 13, 3, 65, 6, 65, 67, 4, 94, 70, 0, 81, 64, 71,
+ 9, 3, 70, 13, 6, 4, 77, 9, 71, 76, 3, 87, 13,
+ 19, 13, 14, 17, 15, 8, 16, 16, 66, 5, 9, 3, 1,
+ 71, 0, 69, 67, 64, 67, 67, 64, 69, 80, 69, 72,
+ 76, 75, 82, 6, 10, 4, 3, 64, 67, 66, 66, 73, 81,
+ 79, 82, 94, 90, 104, 69, 69, 85, 1, 68, 71, 73,
+ 79, 84, 79, 81, 85, 77, 82, 92, 85, 90, 64, 23,
+ 18, 11, 5, 8, 1, 67, 66, 68, 4, 33, 21, 15, 8,
+ 16, 6, 4, 65, 4, 3, 40, 29, 23, 16, 20, 5, 64,
+ 68, 70, 70, 42, 22, 7, 2, 9, 66, 75, 75, 5, 42,
+ 29, 17, 10, 19, 5, 64, 67, 69, 62, 80, 74, 67,
+ 72, 74, 70, 66, 64, 67, 65, 1, 3, 76, 71, 81,
+ 75, 13, 76, 85, 72, 65, 68, 74, 76, 71, 74, 80,
+ 85, 88, 2, 3, 14, 0, 68, 4, 4, 4, 67, 64, 1, 0,
+ 75, 71, 76, 65, 8, 82, 64, 14, 67, 4, 1, 5, 17,
+ 66, 66, 5, 11, 76, 77, 95, 24, 27, 34, 16, 12,
+ 19, 14, 14, 16, 8, 12, 12, 64, 64, 71, 66, 2,
+ 70, 87, 67, 68, 69, 1, 67, 6, 9, 65, 4, 8, 9, 7,
+ 5, 3, 76, 70, 70, 85, 76, 83, 83, 96, 93, 97,
+ 99, 76, 75, 99, 66, 66, 75, 86, 79, 80, 86, 80,
+ 85, 88, 95, 97, 105, 108, 64, 73, 87, 71, 6, 4,
+ 11, 13, 15, 28, 18, 25, 26, 41, 31, 29, 42, 40,
+ 47, 20, 6, 71, 80, 93, 107, 117, 126, 9, 42, 35,
+ 34, 25, 31, 15, 11, 13, 3, 75, 69, 6, 67, 17,
+ 26, 66, 0, 10, 14, 4, 13, 25, 4, 0, 34, 11, 71,
+ 90, 104, 126, 126, 126, 126 },
+
+ {
+
+ 37,
+ 5, 80, 37, 5, 80, 69, 7, 23, 12, 65, 80, 14, 20,
+ 52, 14, 22, 67, 12, 21, 64, 73, 64, 85, 101, 2,
+ 4, 120, 123, 122, 31, 1, 69, 12, 21, 64, 73, 4,
+ 15, 0, 1, 68, 73, 67, 81, 75, 92, 5, 69, 73, 64,
+ 77, 74, 88, 11, 0, 65, 69, 6, 3, 22, 0, 0, 0,
+ 64, 94, 97, 6, 67, 68, 18, 69, 89, 70, 23, 14,
+ 44, 39, 11, 13, 64, 16, 64, 66, 67, 77, 78, 81,
+ 81, 24, 1, 9, 74, 67, 73, 70, 78, 64, 72, 71,
+ 81, 18, 68, 3, 71, 80, 65, 72, 1, 5, 3, 2, 11,
+ 9, 72, 65, 2, 67, 2, 71, 6, 72, 0, 6, 1, 15, 8,
+ 1, 72, 5, 68, 72, 67, 85, 71, 6, 0, 7, 7, 7, 27,
+ 14, 4, 65, 7, 65, 67, 4, 94, 70, 0, 81, 64, 72,
+ 9, 3, 70, 14, 7, 4, 78, 9, 71, 75, 3, 87, 13,
+ 19, 13, 14, 17, 15, 8, 16, 16, 66, 5, 9, 3, 1,
+ 71, 0, 69, 67, 64, 67, 67, 64, 69, 79, 69, 71,
+ 76, 76, 81, 5, 9, 3, 2, 66, 69, 67, 67, 75, 82,
+ 80, 83, 96, 91, 105, 69, 69, 85, 0, 69, 72, 74,
+ 80, 85, 80, 82, 85, 77, 82, 91, 84, 89, 0, 24,
+ 18, 11, 6, 9, 2, 66, 65, 66, 5, 33, 21, 15, 8,
+ 17, 7, 5, 64, 6, 3, 41, 30, 23, 16, 21, 5, 64,
+ 67, 69, 70, 43, 22, 7, 2, 10, 65, 74, 74, 5, 42,
+ 29, 17, 10, 19, 5, 0, 66, 68, 62, 79, 73, 66,
+ 70, 72, 69, 64, 1, 65, 0, 3, 5, 75, 70, 80, 74,
+ 15, 75, 85, 71, 64, 67, 74, 76, 71, 74, 80, 86,
+ 89, 2, 3, 15, 0, 68, 4, 4, 4, 67, 64, 1, 0, 75,
+ 71, 76, 64, 9, 82, 64, 14, 67, 4, 1, 5, 18, 66,
+ 67, 5, 11, 76, 77, 96, 24, 27, 34, 15, 11, 18,
+ 12, 12, 14, 6, 10, 10, 66, 66, 72, 68, 64, 73,
+ 91, 69, 70, 70, 0, 68, 6, 10, 64, 6, 11, 8, 6,
+ 4, 2, 78, 71, 71, 87, 77, 84, 84, 97, 94, 98,
+ 99, 76, 75, 100, 67, 67, 76, 87, 80, 81, 87, 81,
+ 86, 89, 96, 98, 106, 108, 64, 73, 87, 70, 7, 5,
+ 12, 14, 16, 30, 19, 26, 27, 43, 33, 30, 44, 42,
+ 47, 19, 4, 73, 82, 95, 110, 119, 126, 10, 43,
+ 36, 35, 26, 32, 16, 12, 14, 4, 74, 68, 7, 66,
+ 19, 28, 65, 1, 11, 15, 5, 14, 26, 4, 0, 33, 9,
+ 73, 92, 107, 126, 126, 126, 126 },
+
+ {
+
+ 36,
+ 5, 80, 36, 5, 80, 67, 8, 23, 12, 65, 81, 13, 19,
+ 52, 14, 24, 67, 13, 22, 64, 74, 64, 86, 102, 1,
+ 2, 122, 124, 123, 34, 2, 69, 13, 22, 64, 73, 5,
+ 15, 64, 1, 68, 72, 67, 81, 75, 92, 5, 69, 72,
+ 64, 77, 74, 88, 11, 0, 65, 69, 6, 3, 22, 0, 0,
+ 0, 0, 94, 97, 6, 68, 68, 18, 69, 89, 68, 25, 16,
+ 45, 41, 12, 14, 0, 17, 0, 65, 65, 77, 78, 81,
+ 80, 24, 1, 9, 73, 67, 72, 70, 77, 64, 72, 71,
+ 81, 18, 68, 3, 71, 80, 65, 72, 1, 5, 3, 2, 11,
+ 9, 72, 65, 2, 67, 2, 71, 5, 72, 0, 5, 0, 14, 7,
+ 0, 71, 5, 68, 73, 68, 85, 72, 6, 0, 7, 7, 7, 28,
+ 14, 4, 65, 7, 65, 68, 4, 95, 70, 0, 82, 64, 73,
+ 9, 3, 71, 14, 7, 4, 79, 9, 72, 75, 3, 88, 12,
+ 19, 13, 14, 17, 15, 8, 16, 16, 66, 5, 9, 3, 1,
+ 71, 0, 69, 67, 64, 67, 68, 65, 70, 79, 69, 71,
+ 77, 77, 81, 3, 7, 1, 0, 68, 71, 69, 69, 77, 84,
+ 82, 85, 98, 93, 106, 69, 69, 86, 64, 70, 73, 75,
+ 82, 86, 81, 83, 86, 78, 83, 91, 84, 88, 0, 24,
+ 18, 11, 6, 9, 2, 66, 65, 65, 6, 33, 21, 15, 8,
+ 17, 7, 6, 0, 8, 3, 41, 30, 23, 16, 21, 5, 64,
+ 67, 68, 70, 43, 22, 7, 2, 10, 65, 74, 73, 5, 41,
+ 28, 16, 9, 19, 5, 0, 66, 68, 62, 78, 72, 65, 69,
+ 71, 68, 0, 2, 64, 1, 4, 7, 74, 69, 80, 73, 16,
+ 75, 86, 71, 0, 67, 74, 77, 71, 75, 81, 87, 90,
+ 1, 3, 15, 0, 69, 3, 4, 4, 68, 65, 1, 0, 76, 71,
+ 76, 64, 9, 82, 65, 13, 68, 4, 0, 4, 18, 67, 68,
+ 5, 11, 77, 78, 98, 23, 26, 33, 14, 10, 17, 10,
+ 10, 12, 4, 7, 7, 69, 68, 73, 71, 67, 76, 95, 72,
+ 72, 72, 64, 69, 6, 11, 64, 7, 13, 6, 4, 2, 0,
+ 80, 73, 73, 89, 78, 86, 85, 99, 95, 99, 100, 77,
+ 75, 101, 68, 68, 78, 89, 82, 83, 88, 83, 88, 90,
+ 97, 99, 107, 109, 65, 74, 88, 70, 7, 5, 13, 14,
+ 17, 31, 20, 27, 28, 44, 34, 31, 45, 43, 46, 17,
+ 2, 75, 85, 98, 113, 122, 126, 10, 43, 36, 35,
+ 26, 33, 16, 12, 14, 4, 74, 68, 8, 66, 20, 29,
+ 65, 2, 12, 16, 5, 14, 27, 4, 0, 32, 7, 75, 95,
+ 110, 126, 126, 126, 126 },
+
+ {
+
+ 35,
+ 5, 80, 35, 5, 80, 65, 10, 24, 12, 66, 83, 11,
+ 18, 52, 14, 26, 67, 14, 23, 64, 75, 64, 87, 103,
+ 1, 0, 123, 125, 123, 37, 3, 69, 14, 23, 64, 72,
+ 6, 16, 64, 1, 67, 71, 68, 82, 75, 92, 5, 69, 72,
+ 64, 77, 74, 88, 12, 0, 65, 69, 7, 3, 22, 0, 0,
+ 0, 0, 94, 97, 7, 69, 68, 18, 69, 89, 66, 27, 17,
+ 47, 43, 13, 15, 2, 18, 1, 64, 0, 76, 78, 80, 79,
+ 24, 1, 9, 73, 67, 72, 69, 75, 64, 73, 72, 82,
+ 18, 68, 3, 70, 80, 64, 72, 2, 5, 3, 2, 11, 8,
+ 72, 65, 2, 67, 2, 71, 4, 72, 0, 4, 64, 13, 6,
+ 64, 71, 6, 68, 74, 68, 84, 72, 6, 0, 7, 7, 7,
+ 29, 14, 4, 65, 8, 65, 69, 4, 96, 70, 0, 82, 65,
+ 74, 9, 3, 72, 14, 7, 4, 81, 9, 73, 75, 3, 88,
+ 11, 19, 13, 14, 17, 15, 8, 16, 16, 66, 4, 9, 3,
+ 1, 71, 64, 70, 68, 65, 68, 69, 66, 70, 79, 70,
+ 71, 78, 79, 81, 2, 5, 64, 65, 70, 73, 71, 71,
+ 79, 86, 83, 86, 100, 95, 107, 69, 69, 86, 65,
+ 71, 74, 77, 83, 88, 82, 84, 87, 78, 83, 91, 84,
+ 88, 1, 24, 18, 11, 6, 9, 2, 66, 64, 64, 6, 33,
+ 21, 15, 8, 18, 8, 7, 1, 10, 3, 41, 30, 23, 16,
+ 22, 5, 64, 67, 68, 70, 43, 22, 6, 2, 10, 65, 74,
+ 72, 5, 41, 28, 15, 8, 19, 5, 0, 65, 67, 62, 77,
+ 71, 65, 68, 70, 67, 2, 3, 0, 2, 6, 8, 74, 68,
+ 80, 73, 18, 75, 86, 71, 0, 67, 75, 77, 71, 76,
+ 82, 88, 91, 1, 2, 15, 64, 69, 3, 3, 4, 69, 65,
+ 0, 0, 77, 71, 77, 64, 9, 83, 65, 13, 69, 4, 64,
+ 4, 18, 68, 69, 4, 10, 78, 78, 99, 22, 25, 33,
+ 13, 9, 15, 8, 8, 10, 1, 5, 5, 72, 71, 74, 74,
+ 70, 79, 99, 75, 75, 74, 65, 70, 6, 12, 64, 8,
+ 15, 4, 2, 0, 65, 82, 75, 74, 91, 80, 87, 86,
+ 100, 97, 100, 101, 77, 76, 102, 69, 70, 79, 91,
+ 84, 85, 90, 84, 90, 92, 98, 101, 109, 110, 66,
+ 75, 89, 70, 8, 5, 13, 15, 17, 32, 21, 28, 29,
+ 45, 35, 32, 47, 44, 46, 16, 0, 78, 87, 101, 116,
+ 125, 126, 11, 44, 37, 36, 26, 34, 17, 12, 15, 5,
+ 74, 67, 8, 65, 21, 30, 65, 2, 12, 16, 5, 15, 28,
+ 4, 0, 31, 5, 78, 98, 113, 126, 126, 126, 126 },
+
+ {
+
+ 33,
+ 5, 80, 33, 5, 80, 64, 11, 24, 12, 66, 84, 10,
+ 16, 52, 14, 29, 68, 15, 23, 64, 76, 64, 88, 104,
+ 0, 65, 125, 126, 124, 40, 4, 69, 15, 23, 64, 72,
+ 7, 16, 65, 1, 67, 71, 68, 82, 74, 92, 5, 69, 71,
+ 65, 78, 74, 88, 12, 1, 65, 68, 7, 3, 22, 0, 0,
+ 0, 1, 95, 97, 7, 70, 68, 18, 69, 89, 65, 28, 19,
+ 48, 44, 14, 16, 3, 20, 2, 0, 2, 76, 77, 80, 78,
+ 24, 1, 9, 72, 67, 71, 69, 74, 65, 73, 72, 82,
+ 18, 68, 3, 70, 80, 64, 72, 2, 5, 2, 2, 11, 8,
+ 72, 65, 2, 67, 2, 71, 4, 73, 0, 4, 65, 12, 5,
+ 65, 70, 6, 68, 75, 69, 84, 73, 6, 64, 7, 7, 7,
+ 30, 15, 4, 66, 8, 66, 70, 3, 97, 70, 0, 83, 65,
+ 75, 8, 2, 72, 15, 7, 4, 82, 8, 74, 75, 3, 89,
+ 11, 19, 13, 14, 17, 15, 8, 16, 15, 66, 4, 9, 3,
+ 1, 72, 64, 70, 68, 65, 68, 69, 67, 71, 79, 70,
+ 71, 79, 80, 81, 0, 3, 66, 67, 72, 75, 73, 73,
+ 81, 88, 85, 88, 103, 97, 108, 69, 70, 87, 66,
+ 72, 75, 78, 85, 89, 83, 86, 87, 79, 84, 91, 84,
+ 87, 1, 24, 18, 11, 6, 9, 2, 66, 64, 0, 7, 33,
+ 21, 15, 8, 18, 8, 7, 1, 12, 3, 41, 30, 23, 16,
+ 22, 5, 64, 67, 67, 70, 43, 21, 6, 2, 10, 65, 74,
+ 72, 5, 40, 27, 14, 7, 19, 5, 0, 65, 67, 62, 76,
+ 70, 64, 66, 69, 66, 3, 5, 1, 4, 7, 10, 73, 68,
+ 79, 72, 19, 74, 87, 71, 1, 67, 75, 78, 72, 77,
+ 83, 89, 92, 0, 2, 15, 64, 70, 2, 3, 4, 70, 66,
+ 0, 64, 78, 72, 77, 64, 9, 83, 66, 12, 70, 4, 65,
+ 3, 18, 69, 70, 4, 10, 79, 79, 101, 21, 25, 32,
+ 12, 7, 14, 6, 6, 8, 64, 2, 2, 75, 73, 76, 77,
+ 74, 82, 103, 77, 77, 76, 66, 72, 6, 13, 0, 9,
+ 17, 3, 0, 65, 66, 84, 77, 76, 93, 81, 89, 88,
+ 102, 98, 101, 102, 78, 76, 103, 71, 71, 81, 92,
+ 86, 87, 91, 86, 92, 93, 99, 102, 110, 110, 67,
+ 75, 90, 70, 8, 6, 14, 15, 18, 33, 21, 29, 30,
+ 46, 36, 33, 48, 45, 45, 14, 65, 80, 90, 104,
+ 119, 126, 126, 11, 44, 37, 36, 27, 34, 17, 13,
+ 15, 5, 73, 67, 9, 65, 22, 31, 65, 3, 13, 17, 5,
+ 15, 29, 4, 0, 29, 3, 80, 101, 116, 126, 126,
+ 126, 126 },
+
+ {
+
+ 32,
+ 5, 80, 32, 5, 80, 1, 13, 24, 12, 67, 86, 8, 15,
+ 52, 14, 31, 68, 16, 24, 64, 76, 0, 89, 105, 0,
+ 67, 126, 126, 124, 43, 6, 69, 16, 24, 64, 72, 8,
+ 16, 65, 1, 67, 70, 68, 83, 74, 92, 5, 69, 70,
+ 65, 78, 74, 88, 13, 1, 64, 68, 8, 3, 22, 0, 0,
+ 0, 1, 95, 97, 8, 71, 68, 18, 68, 89, 0, 30, 20,
+ 50, 46, 16, 17, 4, 21, 3, 1, 4, 75, 77, 79, 77,
+ 24, 1, 9, 72, 67, 71, 68, 72, 65, 73, 72, 82,
+ 18, 68, 4, 69, 80, 64, 72, 3, 5, 2, 2, 11, 8,
+ 71, 65, 3, 67, 2, 71, 3, 73, 0, 3, 66, 12, 4,
+ 66, 69, 7, 68, 76, 69, 83, 73, 6, 64, 7, 7, 7,
+ 31, 15, 4, 66, 8, 66, 71, 3, 98, 70, 0, 83, 66,
+ 76, 8, 2, 73, 15, 7, 4, 83, 8, 75, 75, 3, 89,
+ 10, 19, 13, 14, 17, 15, 8, 16, 15, 66, 4, 9, 3,
+ 1, 72, 64, 70, 69, 65, 68, 70, 68, 71, 79, 71,
+ 71, 80, 81, 81, 64, 1, 67, 68, 74, 77, 75, 75,
+ 83, 90, 87, 90, 105, 98, 109, 69, 70, 87, 67,
+ 73, 76, 79, 86, 91, 84, 87, 88, 79, 84, 91, 84,
+ 87, 1, 24, 18, 11, 6, 9, 3, 65, 0, 1, 7, 33, 21,
+ 15, 8, 18, 9, 8, 2, 14, 3, 41, 30, 23, 16, 22,
+ 5, 64, 67, 66, 70, 44, 21, 5, 2, 10, 65, 74, 71,
+ 5, 40, 26, 13, 6, 19, 5, 0, 65, 66, 62, 75, 69,
+ 0, 65, 68, 65, 4, 6, 2, 5, 9, 12, 72, 67, 79,
+ 72, 20, 74, 87, 70, 2, 67, 76, 78, 72, 77, 84,
+ 90, 93, 64, 1, 15, 65, 70, 2, 3, 4, 71, 67, 0,
+ 64, 79, 72, 77, 64, 9, 83, 67, 12, 70, 4, 66, 2,
+ 18, 70, 71, 3, 9, 80, 80, 103, 20, 24, 32, 11,
+ 6, 12, 4, 4, 6, 67, 64, 0, 78, 75, 77, 80, 77,
+ 85, 107, 80, 80, 78, 67, 73, 6, 14, 0, 10, 19,
+ 1, 64, 67, 68, 86, 79, 77, 95, 82, 91, 89, 104,
+ 100, 102, 103, 78, 76, 104, 72, 73, 83, 94, 87,
+ 88, 93, 87, 93, 95, 100, 103, 111, 111, 67, 76,
+ 91, 69, 9, 6, 14, 16, 19, 34, 22, 30, 31, 48,
+ 37, 34, 49, 47, 45, 12, 67, 83, 92, 107, 122,
+ 126, 126, 11, 45, 38, 37, 27, 35, 17, 13, 16, 6,
+ 73, 67, 10, 64, 23, 32, 64, 4, 13, 17, 5, 16,
+ 30, 4, 0, 28, 1, 83, 104, 119, 126, 126, 126,
+ 126 },
+
+ {
+
+ 31,
+ 5, 81, 31, 5, 81, 3, 14, 25, 12, 67, 87, 7, 14,
+ 52, 14, 33, 68, 16, 25, 64, 77, 0, 90, 106, 64,
+ 68, 126, 126, 125, 46, 7, 69, 16, 25, 64, 71, 9,
+ 17, 66, 2, 66, 69, 69, 83, 74, 92, 5, 68, 70,
+ 65, 78, 73, 88, 13, 1, 64, 68, 8, 3, 22, 0, 0,
+ 0, 2, 95, 97, 8, 71, 69, 18, 68, 88, 2, 32, 22,
+ 51, 48, 17, 18, 6, 22, 4, 1, 5, 75, 77, 79, 77,
+ 25, 1, 9, 71, 66, 70, 68, 71, 65, 74, 73, 83,
+ 18, 68, 4, 69, 80, 0, 72, 3, 5, 2, 3, 11, 7, 71,
+ 65, 3, 67, 2, 71, 2, 73, 0, 2, 67, 11, 4, 66,
+ 69, 7, 68, 76, 70, 83, 74, 6, 64, 7, 7, 7, 31,
+ 15, 4, 66, 9, 66, 72, 3, 98, 71, 0, 84, 66, 77,
+ 8, 2, 74, 15, 7, 4, 85, 8, 76, 75, 3, 90, 9, 19,
+ 12, 14, 17, 15, 8, 16, 15, 66, 3, 9, 2, 1, 72,
+ 65, 71, 69, 66, 69, 71, 69, 72, 79, 71, 70, 81,
+ 83, 81, 66, 64, 69, 70, 76, 79, 77, 77, 85, 92,
+ 88, 91, 107, 100, 110, 69, 70, 88, 68, 74, 77,
+ 81, 88, 92, 85, 88, 89, 80, 85, 91, 84, 86, 2,
+ 24, 18, 11, 7, 9, 3, 65, 0, 2, 8, 32, 21, 15, 8,
+ 19, 9, 9, 3, 16, 3, 42, 30, 23, 16, 23, 5, 64,
+ 66, 66, 70, 44, 21, 5, 2, 10, 64, 73, 70, 5, 39,
+ 26, 12, 6, 19, 5, 1, 64, 66, 62, 75, 68, 0, 64,
+ 67, 64, 6, 7, 3, 6, 10, 13, 72, 66, 79, 71, 22,
+ 74, 88, 70, 2, 67, 76, 79, 72, 78, 85, 91, 94,
+ 64, 1, 15, 65, 71, 1, 2, 4, 71, 67, 64, 64, 79,
+ 72, 78, 64, 9, 84, 67, 11, 71, 3, 67, 2, 19, 70,
+ 72, 3, 9, 80, 80, 104, 19, 23, 31, 10, 5, 11, 2,
+ 2, 4, 69, 66, 66, 81, 78, 78, 82, 80, 88, 111,
+ 83, 82, 80, 68, 74, 6, 15, 0, 12, 22, 64, 66,
+ 69, 70, 88, 81, 79, 97, 84, 92, 90, 105, 101,
+ 103, 104, 79, 77, 105, 73, 74, 84, 96, 89, 90,
+ 94, 89, 95, 96, 102, 105, 113, 112, 68, 77, 92,
+ 69, 9, 6, 15, 16, 19, 36, 23, 31, 32, 49, 39,
+ 35, 51, 48, 44, 11, 69, 85, 95, 109, 125, 126,
+ 126, 12, 45, 38, 37, 27, 36, 18, 13, 16, 6, 73,
+ 66, 10, 64, 24, 33, 64, 4, 14, 18, 5, 16, 31, 4,
+ 0, 27, 64, 85, 107, 123, 126, 126, 126, 126 },
+
+ {
+
+ 30,
+ 5, 81, 30, 5, 81, 5, 16, 25, 12, 68, 89, 5, 12,
+ 52, 14, 36, 69, 17, 25, 64, 78, 0, 91, 107, 64,
+ 70, 126, 126, 125, 49, 8, 69, 17, 25, 64, 71,
+ 10, 17, 66, 2, 66, 69, 69, 84, 73, 92, 5, 68,
+ 69, 66, 78, 73, 88, 14, 2, 64, 67, 9, 3, 22, 0,
+ 0, 0, 2, 95, 97, 9, 72, 69, 18, 68, 88, 3, 34,
+ 23, 53, 50, 18, 19, 7, 24, 5, 2, 7, 74, 76, 78,
+ 76, 25, 1, 9, 71, 66, 70, 67, 69, 66, 74, 73,
+ 83, 18, 68, 4, 68, 80, 0, 72, 4, 5, 1, 3, 11, 7,
+ 71, 65, 3, 67, 2, 71, 2, 73, 0, 2, 68, 10, 3,
+ 67, 68, 8, 68, 77, 70, 82, 74, 6, 65, 7, 7, 7,
+ 32, 16, 4, 66, 9, 66, 73, 3, 99, 71, 0, 84, 67,
+ 78, 7, 2, 74, 16, 7, 4, 86, 7, 77, 75, 3, 90, 9,
+ 19, 12, 14, 17, 15, 8, 16, 15, 66, 3, 9, 2, 1,
+ 72, 65, 71, 70, 66, 69, 71, 70, 72, 79, 72, 70,
+ 82, 84, 81, 67, 66, 71, 72, 78, 81, 79, 79, 87,
+ 94, 90, 93, 110, 102, 111, 69, 71, 88, 69, 75,
+ 78, 82, 89, 94, 86, 89, 89, 80, 85, 91, 84, 86,
+ 2, 24, 18, 11, 7, 9, 3, 65, 1, 3, 8, 32, 21, 15,
+ 8, 19, 10, 10, 3, 18, 3, 42, 30, 23, 16, 23, 5,
+ 64, 66, 65, 70, 44, 20, 4, 2, 10, 64, 73, 70, 5,
+ 39, 25, 11, 5, 19, 5, 1, 64, 65, 62, 74, 67, 1,
+ 1, 66, 0, 7, 9, 4, 8, 12, 15, 71, 65, 78, 71,
+ 23, 73, 88, 70, 3, 67, 77, 79, 73, 79, 86, 92,
+ 95, 65, 0, 15, 66, 71, 1, 2, 4, 72, 68, 64, 65,
+ 80, 72, 78, 64, 9, 84, 68, 11, 72, 3, 68, 1, 19,
+ 71, 73, 2, 8, 81, 81, 106, 18, 23, 31, 9, 3, 9,
+ 0, 0, 2, 72, 69, 68, 84, 80, 80, 85, 84, 91,
+ 115, 85, 85, 82, 69, 75, 6, 16, 1, 13, 24, 65,
+ 68, 71, 71, 90, 83, 80, 99, 85, 94, 92, 107,
+ 103, 104, 105, 79, 77, 106, 75, 76, 86, 97, 91,
+ 92, 96, 90, 97, 98, 103, 106, 114, 112, 69, 77,
+ 93, 69, 10, 7, 15, 17, 20, 37, 24, 32, 33, 50,
+ 40, 36, 52, 49, 44, 9, 71, 88, 97, 112, 126,
+ 126, 126, 12, 46, 39, 38, 28, 37, 18, 14, 17, 7,
+ 72, 66, 11, 0, 25, 34, 64, 5, 14, 18, 5, 17, 32,
+ 4, 0, 25, 66, 88, 110, 126, 126, 126, 126, 126 },
+
+ {
+
+ 28,
+ 4, 81, 28, 4, 81, 6, 17, 25, 12, 68, 90, 4, 11,
+ 52, 14, 38, 69, 18, 26, 64, 79, 0, 92, 109, 65,
+ 72, 126, 126, 126, 51, 9, 69, 18, 26, 64, 71,
+ 11, 17, 67, 2, 66, 68, 70, 84, 73, 93, 5, 68,
+ 69, 66, 79, 73, 88, 14, 2, 64, 67, 9, 3, 22, 0,
+ 0, 0, 3, 96, 97, 9, 73, 69, 18, 68, 88, 5, 35,
+ 25, 54, 51, 19, 20, 8, 25, 6, 3, 9, 74, 76, 78,
+ 75, 25, 1, 9, 70, 66, 69, 67, 68, 66, 75, 74,
+ 84, 18, 68, 4, 68, 80, 0, 72, 4, 4, 1, 3, 11, 6,
+ 71, 65, 3, 67, 1, 71, 1, 74, 0, 1, 70, 9, 2, 68,
+ 68, 8, 68, 78, 71, 82, 75, 5, 65, 7, 7, 7, 33,
+ 16, 4, 67, 9, 67, 74, 2, 100, 71, 0, 85, 67, 79,
+ 7, 1, 75, 16, 7, 4, 88, 7, 78, 75, 3, 91, 8, 18,
+ 12, 14, 17, 14, 7, 16, 14, 67, 2, 9, 2, 0, 73,
+ 66, 72, 70, 67, 70, 72, 71, 73, 79, 72, 70, 83,
+ 86, 81, 69, 68, 73, 74, 80, 84, 81, 81, 89, 96,
+ 92, 95, 112, 104, 112, 69, 71, 89, 70, 77, 80,
+ 84, 91, 95, 88, 91, 90, 81, 86, 91, 84, 85, 2,
+ 24, 18, 11, 7, 9, 3, 65, 1, 4, 9, 32, 21, 15, 8,
+ 19, 10, 10, 4, 19, 3, 42, 30, 23, 15, 23, 5, 64,
+ 66, 65, 70, 44, 20, 4, 2, 10, 64, 73, 69, 5, 38,
+ 24, 10, 4, 18, 5, 1, 64, 65, 62, 73, 66, 1, 2,
+ 65, 0, 8, 10, 5, 9, 13, 16, 71, 65, 78, 70, 24,
+ 73, 89, 70, 3, 67, 77, 80, 73, 80, 87, 94, 96,
+ 66, 0, 15, 66, 72, 0, 1, 3, 73, 69, 65, 65, 81,
+ 73, 79, 64, 9, 85, 69, 10, 73, 3, 69, 0, 19, 72,
+ 74, 2, 8, 82, 82, 108, 17, 22, 30, 7, 2, 8, 65,
+ 65, 64, 74, 72, 71, 87, 83, 81, 88, 87, 94, 119,
+ 88, 87, 84, 71, 77, 6, 16, 1, 14, 26, 67, 70,
+ 73, 73, 93, 85, 82, 101, 87, 96, 93, 109, 104,
+ 105, 106, 80, 78, 107, 76, 77, 88, 99, 93, 94,
+ 97, 92, 99, 99, 104, 108, 116, 113, 70, 78, 94,
+ 69, 10, 7, 16, 17, 20, 38, 24, 33, 34, 51, 41,
+ 37, 53, 50, 43, 7, 73, 90, 100, 115, 126, 126,
+ 126, 12, 46, 39, 38, 28, 37, 18, 14, 17, 7, 72,
+ 66, 11, 0, 26, 35, 64, 5, 15, 19, 5, 17, 32, 4,
+ 64, 24, 68, 90, 113, 126, 126, 126, 126, 126 },
+
+ {
+
+ 27,
+ 4, 81, 27, 4, 81, 8, 18, 26, 12, 68, 91, 3, 10,
+ 52, 14, 40, 69, 19, 27, 64, 79, 1, 93, 110, 66,
+ 74, 126, 126, 126, 54, 11, 69, 19, 27, 64, 70,
+ 12, 18, 68, 2, 65, 67, 70, 84, 73, 93, 5, 68,
+ 68, 66, 79, 73, 88, 14, 2, 0, 67, 9, 3, 22, 0,
+ 0, 0, 4, 96, 97, 9, 74, 69, 18, 67, 88, 7, 37,
+ 27, 55, 53, 21, 21, 10, 26, 8, 4, 11, 74, 76,
+ 78, 74, 25, 1, 9, 69, 66, 68, 66, 67, 66, 75,
+ 74, 84, 18, 68, 5, 67, 79, 1, 72, 4, 4, 1, 3,
+ 11, 6, 70, 65, 4, 67, 1, 70, 0, 74, 0, 0, 71, 9,
+ 1, 69, 67, 8, 67, 79, 72, 82, 76, 5, 65, 8, 7,
+ 7, 34, 16, 4, 67, 10, 67, 74, 2, 101, 71, 0, 86,
+ 67, 80, 7, 1, 76, 16, 7, 4, 89, 7, 78, 75, 3,
+ 92, 7, 18, 12, 14, 17, 14, 7, 16, 14, 67, 2, 9,
+ 2, 0, 73, 66, 72, 70, 67, 70, 73, 71, 73, 79,
+ 72, 70, 84, 87, 81, 71, 69, 74, 75, 82, 86, 82,
+ 82, 91, 98, 93, 96, 114, 105, 113, 69, 71, 90,
+ 71, 78, 81, 85, 92, 96, 89, 92, 91, 82, 87, 91,
+ 83, 84, 3, 25, 18, 11, 7, 10, 4, 64, 2, 5, 10,
+ 32, 21, 15, 8, 20, 10, 11, 5, 21, 3, 42, 30, 23,
+ 15, 24, 5, 64, 66, 64, 70, 45, 20, 4, 2, 11, 64,
+ 73, 68, 5, 38, 24, 10, 3, 18, 5, 1, 0, 64, 62,
+ 72, 65, 2, 3, 0, 1, 10, 11, 7, 10, 14, 18, 70,
+ 64, 78, 69, 26, 73, 90, 69, 4, 67, 77, 81, 73,
+ 80, 88, 95, 97, 66, 0, 15, 66, 72, 64, 1, 3, 74,
+ 69, 65, 65, 82, 73, 79, 0, 10, 85, 69, 9, 73, 3,
+ 69, 0, 19, 73, 75, 2, 8, 83, 82, 109, 17, 21,
+ 29, 6, 1, 7, 67, 67, 66, 76, 74, 74, 89, 85, 82,
+ 91, 90, 97, 123, 91, 89, 85, 72, 78, 6, 17, 1,
+ 15, 28, 69, 71, 75, 75, 95, 86, 83, 103, 88, 97,
+ 94, 110, 105, 106, 106, 80, 78, 108, 77, 78, 89,
+ 101, 94, 95, 98, 93, 100, 100, 105, 109, 117,
+ 114, 70, 79, 94, 68, 10, 7, 17, 18, 21, 39, 25,
+ 34, 35, 53, 42, 38, 55, 52, 42, 6, 75, 92, 103,
+ 118, 126, 126, 126, 13, 46, 39, 39, 28, 38, 19,
+ 14, 18, 8, 72, 65, 12, 0, 27, 37, 0, 6, 16, 20,
+ 5, 18, 33, 4, 64, 23, 70, 92, 115, 126, 126,
+ 126, 126, 126 },
+
+ {
+
+ 26,
+ 4, 81, 26, 4, 81, 10, 20, 26, 12, 69, 93, 1, 8,
+ 52, 14, 43, 70, 20, 27, 64, 80, 1, 94, 111, 66,
+ 76, 126, 126, 126, 57, 12, 69, 20, 27, 64, 70,
+ 13, 18, 68, 2, 65, 67, 70, 85, 72, 93, 5, 68,
+ 67, 67, 79, 73, 88, 15, 3, 0, 66, 10, 3, 22, 0,
+ 0, 0, 4, 96, 97, 10, 75, 69, 18, 67, 88, 8, 39,
+ 28, 57, 55, 22, 22, 11, 28, 9, 5, 13, 73, 75,
+ 77, 73, 25, 1, 9, 69, 66, 68, 66, 65, 67, 75,
+ 74, 84, 18, 68, 5, 67, 79, 1, 72, 5, 4, 0, 3,
+ 11, 6, 70, 65, 4, 67, 1, 70, 0, 74, 0, 0, 72, 8,
+ 0, 70, 66, 9, 67, 80, 72, 81, 76, 5, 66, 8, 7,
+ 7, 35, 17, 4, 67, 10, 67, 75, 2, 102, 71, 0, 86,
+ 68, 81, 6, 1, 76, 17, 7, 4, 90, 6, 79, 75, 3,
+ 92, 7, 18, 12, 14, 17, 14, 7, 16, 14, 67, 2, 9,
+ 2, 0, 73, 66, 72, 71, 67, 70, 73, 72, 74, 79,
+ 73, 70, 85, 88, 81, 72, 71, 76, 77, 84, 88, 84,
+ 84, 93, 100, 95, 98, 117, 107, 114, 69, 72, 90,
+ 72, 79, 82, 86, 94, 98, 90, 93, 91, 82, 87, 91,
+ 83, 84, 3, 25, 18, 11, 7, 10, 4, 64, 2, 6, 10,
+ 32, 21, 15, 8, 20, 11, 12, 5, 23, 3, 42, 30, 23,
+ 15, 24, 5, 64, 66, 0, 70, 45, 19, 3, 2, 11, 64,
+ 73, 68, 5, 37, 23, 9, 2, 18, 5, 1, 0, 64, 62,
+ 71, 64, 3, 5, 1, 2, 11, 13, 8, 12, 16, 20, 69,
+ 0, 77, 69, 27, 72, 90, 69, 5, 67, 78, 81, 74,
+ 81, 89, 96, 98, 67, 64, 15, 67, 73, 64, 1, 3,
+ 75, 70, 65, 66, 83, 73, 79, 0, 10, 85, 70, 9,
+ 74, 3, 70, 64, 19, 74, 76, 1, 7, 84, 83, 111,
+ 16, 21, 29, 5, 64, 5, 69, 69, 68, 79, 77, 76,
+ 92, 87, 84, 94, 94, 100, 126, 93, 92, 87, 73,
+ 79, 6, 18, 2, 16, 30, 70, 73, 77, 76, 97, 88,
+ 85, 105, 89, 99, 96, 112, 107, 107, 107, 81, 78,
+ 109, 79, 80, 91, 102, 96, 97, 100, 95, 102, 102,
+ 106, 110, 118, 114, 71, 79, 95, 68, 11, 8, 17,
+ 18, 22, 40, 26, 35, 36, 54, 43, 39, 56, 53, 42,
+ 4, 77, 95, 105, 121, 126, 126, 126, 13, 47, 40,
+ 39, 29, 39, 19, 15, 18, 8, 71, 65, 13, 1, 28,
+ 38, 0, 7, 16, 20, 5, 18, 34, 4, 64, 21, 72, 95,
+ 118, 126, 126, 126, 126, 126 },
+
+ {
+
+ 25,
+ 4, 82, 25, 4, 82, 12, 21, 27, 12, 69, 94, 0, 7,
+ 52, 14, 45, 70, 20, 28, 64, 81, 1, 95, 112, 67,
+ 77, 126, 126, 126, 60, 13, 69, 20, 28, 64, 69,
+ 14, 19, 69, 3, 64, 66, 71, 85, 72, 93, 5, 67,
+ 67, 67, 79, 72, 88, 15, 3, 0, 66, 10, 3, 22, 0,
+ 0, 0, 5, 96, 97, 10, 75, 70, 18, 67, 87, 10, 41,
+ 30, 58, 57, 23, 23, 13, 29, 10, 5, 14, 73, 75,
+ 77, 73, 26, 1, 9, 68, 65, 67, 65, 64, 67, 76,
+ 75, 85, 18, 68, 5, 66, 79, 2, 72, 5, 4, 0, 4,
+ 11, 5, 70, 65, 4, 67, 1, 70, 64, 74, 0, 64, 73,
+ 7, 0, 70, 66, 9, 67, 80, 73, 81, 77, 5, 66, 8,
+ 7, 7, 35, 17, 4, 67, 11, 67, 76, 2, 102, 72, 0,
+ 87, 68, 82, 6, 1, 77, 17, 7, 4, 92, 6, 80, 75,
+ 3, 93, 6, 18, 11, 14, 17, 14, 7, 16, 14, 67, 1,
+ 9, 1, 0, 73, 67, 73, 71, 68, 71, 74, 73, 74, 79,
+ 73, 69, 86, 90, 81, 74, 73, 78, 79, 86, 90, 86,
+ 86, 95, 102, 96, 99, 119, 109, 115, 69, 72, 91,
+ 73, 80, 83, 88, 95, 99, 91, 94, 92, 83, 88, 91,
+ 83, 83, 4, 25, 18, 11, 8, 10, 4, 64, 3, 7, 11,
+ 31, 21, 15, 8, 21, 11, 13, 6, 25, 3, 43, 30, 23,
+ 15, 25, 5, 64, 65, 0, 70, 45, 19, 3, 2, 11, 0,
+ 72, 67, 5, 37, 23, 8, 2, 18, 5, 2, 1, 0, 62, 71,
+ 0, 3, 6, 2, 3, 13, 14, 9, 13, 17, 21, 69, 1, 77,
+ 68, 29, 72, 91, 69, 5, 67, 78, 82, 74, 82, 90,
+ 97, 99, 67, 64, 15, 67, 73, 65, 0, 3, 75, 70,
+ 66, 66, 83, 73, 80, 0, 10, 86, 70, 8, 75, 2, 71,
+ 64, 20, 74, 77, 1, 7, 84, 83, 112, 15, 20, 28,
+ 4, 65, 4, 71, 71, 70, 81, 79, 79, 95, 90, 85,
+ 96, 97, 103, 126, 96, 94, 89, 74, 80, 6, 19, 2,
+ 18, 33, 72, 75, 79, 78, 99, 90, 86, 107, 91,
+ 100, 97, 113, 108, 108, 108, 81, 79, 110, 80,
+ 81, 92, 104, 98, 99, 101, 96, 104, 103, 108,
+ 112, 120, 115, 72, 80, 96, 68, 11, 8, 18, 19,
+ 22, 42, 27, 36, 37, 55, 45, 40, 58, 54, 41, 3,
+ 79, 97, 108, 123, 126, 126, 126, 14, 47, 40, 40,
+ 29, 40, 20, 15, 19, 9, 71, 64, 13, 1, 29, 39, 0,
+ 7, 17, 21, 5, 19, 35, 4, 64, 20, 74, 97, 121,
+ 126, 126, 126, 126, 126 },
+
+ {
+
+ 23,
+ 4, 82, 23, 4, 82, 13, 23, 27, 12, 70, 96, 65, 6,
+ 52, 14, 47, 70, 21, 29, 64, 82, 1, 96, 113, 67,
+ 79, 126, 126, 126, 62, 14, 69, 21, 29, 64, 69,
+ 15, 19, 69, 3, 64, 65, 71, 86, 72, 93, 5, 67,
+ 66, 67, 80, 72, 88, 16, 3, 0, 66, 11, 3, 22, 0,
+ 0, 0, 5, 97, 97, 11, 76, 70, 18, 67, 87, 12, 42,
+ 31, 60, 58, 24, 24, 14, 30, 11, 6, 16, 72, 75,
+ 76, 72, 26, 1, 9, 68, 65, 67, 65, 1, 67, 76, 75,
+ 85, 18, 68, 5, 66, 79, 2, 72, 6, 4, 0, 4, 11, 5,
+ 70, 65, 4, 67, 1, 70, 65, 75, 0, 65, 74, 6, 64,
+ 71, 65, 10, 67, 81, 73, 80, 77, 5, 66, 8, 7, 7,
+ 36, 17, 4, 68, 11, 68, 77, 1, 103, 72, 0, 87,
+ 69, 83, 6, 0, 78, 17, 7, 4, 93, 6, 81, 75, 3,
+ 93, 5, 18, 11, 14, 17, 14, 7, 16, 13, 67, 1, 9,
+ 1, 0, 74, 67, 73, 72, 68, 71, 75, 74, 75, 79,
+ 74, 69, 87, 91, 81, 75, 75, 80, 81, 88, 92, 88,
+ 88, 97, 104, 98, 101, 121, 111, 116, 69, 72, 91,
+ 74, 81, 84, 89, 97, 101, 92, 96, 93, 83, 88, 91,
+ 83, 83, 4, 25, 18, 11, 8, 10, 4, 64, 3, 8, 11,
+ 31, 21, 15, 8, 21, 12, 13, 7, 27, 3, 43, 30, 23,
+ 15, 25, 5, 64, 65, 1, 70, 45, 19, 2, 2, 11, 0,
+ 72, 66, 5, 36, 22, 7, 1, 18, 5, 2, 1, 0, 62, 70,
+ 1, 4, 7, 3, 4, 14, 15, 10, 14, 19, 23, 68, 1,
+ 77, 68, 30, 72, 91, 69, 6, 67, 79, 82, 74, 83,
+ 91, 98, 100, 68, 65, 15, 68, 74, 65, 0, 3, 76,
+ 71, 66, 66, 84, 74, 80, 0, 10, 86, 71, 8, 76, 2,
+ 72, 65, 20, 75, 78, 0, 6, 85, 84, 114, 14, 19,
+ 28, 3, 66, 2, 73, 73, 72, 84, 82, 81, 98, 92,
+ 86, 99, 100, 106, 126, 99, 97, 91, 75, 82, 6,
+ 20, 2, 19, 35, 74, 77, 81, 80, 101, 92, 88, 109,
+ 92, 102, 98, 115, 110, 109, 109, 82, 79, 111,
+ 81, 83, 94, 106, 100, 101, 103, 98, 106, 105,
+ 109, 113, 121, 116, 73, 81, 97, 68, 12, 8, 18,
+ 19, 23, 43, 27, 37, 38, 56, 46, 41, 59, 55, 41,
+ 1, 81, 100, 110, 126, 126, 126, 126, 14, 48, 41,
+ 40, 29, 40, 20, 15, 19, 9, 71, 64, 14, 2, 30,
+ 40, 0, 8, 17, 21, 5, 19, 36, 4, 64, 19, 76, 100,
+ 124, 126, 126, 126, 126, 126 },
+
+ {
+
+ 22,
+ 4, 82, 22, 4, 82, 15, 24, 27, 12, 70, 97, 66, 4,
+ 52, 14, 50, 71, 22, 29, 64, 82, 2, 97, 114, 68,
+ 81, 126, 126, 126, 62, 16, 69, 22, 29, 64, 69,
+ 16, 19, 70, 3, 64, 65, 71, 86, 71, 93, 5, 67,
+ 65, 68, 80, 72, 88, 16, 4, 1, 65, 11, 3, 22, 0,
+ 0, 0, 6, 97, 97, 11, 77, 70, 18, 66, 87, 13, 44,
+ 33, 61, 60, 26, 25, 15, 32, 12, 7, 18, 72, 74,
+ 76, 71, 26, 1, 9, 67, 65, 66, 64, 2, 68, 76, 75,
+ 85, 18, 68, 6, 65, 79, 2, 72, 6, 4, 64, 4, 11,
+ 5, 69, 65, 5, 67, 1, 70, 65, 75, 0, 65, 75, 6,
+ 65, 72, 64, 10, 67, 82, 74, 80, 78, 5, 67, 8, 7,
+ 7, 37, 18, 4, 68, 11, 68, 78, 1, 104, 72, 0, 88,
+ 69, 84, 5, 0, 78, 18, 7, 4, 94, 5, 82, 75, 3,
+ 94, 5, 18, 11, 14, 17, 14, 7, 16, 13, 67, 1, 9,
+ 1, 0, 74, 67, 73, 72, 68, 71, 75, 75, 75, 79,
+ 74, 69, 88, 92, 81, 77, 77, 81, 82, 90, 94, 90,
+ 90, 99, 106, 100, 103, 124, 112, 117, 69, 73,
+ 92, 75, 82, 85, 90, 98, 102, 93, 97, 93, 84, 89,
+ 91, 83, 82, 4, 25, 18, 11, 8, 10, 5, 0, 4, 9,
+ 12, 31, 21, 15, 8, 21, 12, 14, 7, 29, 3, 43, 30,
+ 23, 15, 25, 5, 64, 65, 2, 70, 46, 18, 2, 2, 11,
+ 0, 72, 66, 5, 36, 21, 6, 0, 18, 5, 2, 1, 1, 62,
+ 69, 2, 5, 9, 4, 5, 15, 17, 11, 16, 20, 25, 67,
+ 2, 76, 67, 31, 71, 92, 68, 7, 67, 79, 83, 75,
+ 83, 92, 99, 101, 69, 65, 15, 68, 74, 66, 0, 3,
+ 77, 72, 66, 67, 85, 74, 80, 0, 10, 86, 72, 7,
+ 76, 2, 73, 66, 20, 76, 79, 0, 6, 86, 85, 116,
+ 13, 19, 27, 2, 68, 1, 75, 75, 74, 86, 85, 84,
+ 101, 94, 88, 102, 104, 109, 126, 101, 99, 93,
+ 76, 83, 6, 21, 3, 20, 37, 75, 78, 83, 81, 103,
+ 94, 89, 111, 93, 104, 100, 117, 111, 110, 110,
+ 82, 79, 112, 83, 84, 96, 107, 101, 102, 104, 99,
+ 107, 106, 110, 114, 122, 116, 73, 81, 98, 67,
+ 12, 9, 19, 20, 24, 44, 28, 38, 39, 58, 47, 42,
+ 60, 57, 40, 64, 83, 102, 113, 126, 126, 126,
+ 126, 14, 48, 41, 41, 30, 41, 20, 16, 20, 10, 70,
+ 64, 15, 2, 31, 41, 1, 9, 18, 22, 5, 20, 37, 4,
+ 64, 17, 78, 102, 126, 126, 126, 126, 126, 126 },
+
+ {
+
+ 21,
+ 4, 82, 21, 4, 82, 17, 26, 28, 12, 71, 99, 68, 3,
+ 52, 14, 52, 71, 23, 30, 64, 83, 2, 98, 115, 68,
+ 83, 126, 126, 126, 62, 17, 69, 23, 30, 64, 68,
+ 17, 20, 70, 3, 0, 64, 72, 87, 71, 93, 5, 67, 65,
+ 68, 80, 72, 88, 17, 4, 1, 65, 12, 3, 22, 0, 0,
+ 0, 6, 97, 97, 12, 78, 70, 18, 66, 87, 15, 46,
+ 34, 62, 62, 27, 26, 17, 33, 13, 8, 20, 71, 74,
+ 75, 70, 26, 1, 9, 67, 65, 66, 64, 4, 68, 77, 76,
+ 86, 18, 68, 6, 65, 79, 3, 72, 7, 4, 64, 4, 11,
+ 4, 69, 65, 5, 67, 1, 70, 66, 75, 0, 66, 76, 5,
+ 66, 73, 64, 11, 67, 83, 74, 79, 78, 5, 67, 8, 7,
+ 7, 38, 18, 4, 68, 12, 68, 79, 1, 105, 72, 0, 88,
+ 70, 85, 5, 0, 79, 18, 7, 4, 96, 5, 83, 75, 3,
+ 94, 4, 18, 11, 14, 17, 14, 7, 16, 13, 67, 0, 9,
+ 1, 0, 74, 68, 74, 73, 69, 72, 76, 76, 76, 79,
+ 75, 69, 89, 94, 81, 78, 79, 83, 84, 92, 96, 92,
+ 92, 101, 108, 101, 104, 126, 114, 118, 69, 73,
+ 92, 76, 83, 86, 92, 100, 104, 94, 98, 94, 84,
+ 89, 91, 83, 82, 5, 25, 18, 11, 8, 10, 5, 0, 4,
+ 10, 12, 31, 21, 15, 8, 22, 13, 15, 8, 31, 3, 43,
+ 30, 23, 15, 26, 5, 64, 65, 2, 70, 46, 18, 1, 2,
+ 11, 0, 72, 65, 5, 35, 21, 5, 64, 18, 5, 2, 2, 1,
+ 62, 68, 3, 5, 10, 5, 6, 17, 18, 12, 17, 22, 26,
+ 67, 3, 76, 67, 33, 71, 92, 68, 7, 67, 80, 83,
+ 75, 84, 93, 100, 102, 69, 66, 15, 69, 75, 66,
+ 64, 3, 78, 72, 67, 67, 86, 74, 81, 0, 10, 87,
+ 72, 7, 77, 2, 74, 66, 20, 77, 80, 64, 5, 87, 85,
+ 117, 12, 18, 27, 1, 69, 64, 77, 77, 76, 89, 87,
+ 86, 104, 97, 89, 105, 107, 112, 126, 104, 102,
+ 95, 77, 84, 6, 22, 3, 21, 39, 77, 80, 85, 83,
+ 105, 96, 91, 113, 95, 105, 101, 118, 113, 111,
+ 111, 83, 80, 113, 84, 86, 97, 109, 103, 104,
+ 106, 101, 109, 108, 111, 116, 124, 117, 74, 82,
+ 99, 67, 13, 9, 19, 20, 24, 45, 29, 39, 40, 59,
+ 48, 43, 62, 58, 40, 65, 85, 105, 115, 126, 126,
+ 126, 126, 15, 49, 42, 41, 30, 42, 21, 16, 20,
+ 10, 70, 0, 15, 3, 32, 42, 1, 9, 18, 22, 5, 20,
+ 38, 4, 64, 16, 80, 105, 126, 126, 126, 126, 126,
+ 126 },
+
+ {
+
+ 20,
+ 4, 82, 20, 4, 82, 19, 27, 28, 12, 71, 100, 69,
+ 2, 52, 14, 54, 71, 24, 31, 64, 84, 2, 99, 116,
+ 69, 85, 126, 126, 126, 62, 18, 69, 24, 31, 64,
+ 68, 18, 20, 71, 3, 0, 0, 72, 87, 71, 93, 5, 67,
+ 64, 68, 80, 72, 88, 17, 4, 1, 65, 12, 3, 22, 0,
+ 0, 0, 7, 97, 97, 12, 79, 70, 18, 66, 87, 17, 48,
+ 36, 62, 62, 28, 27, 18, 34, 14, 9, 22, 71, 74,
+ 75, 69, 26, 1, 9, 66, 65, 65, 0, 5, 68, 77, 76,
+ 86, 18, 68, 6, 64, 79, 3, 72, 7, 4, 64, 4, 11,
+ 4, 69, 65, 5, 67, 1, 70, 67, 75, 0, 67, 77, 4,
+ 67, 74, 0, 11, 67, 84, 75, 79, 79, 5, 67, 8, 7,
+ 7, 39, 18, 4, 68, 12, 68, 80, 1, 106, 72, 0, 89,
+ 70, 86, 5, 0, 80, 18, 7, 4, 97, 5, 84, 75, 3,
+ 95, 3, 18, 11, 14, 17, 14, 7, 16, 13, 67, 0, 9,
+ 1, 0, 74, 68, 74, 73, 69, 72, 77, 77, 76, 79,
+ 75, 69, 90, 95, 81, 80, 81, 85, 86, 94, 98, 94,
+ 94, 103, 110, 103, 106, 126, 116, 119, 69, 73,
+ 93, 77, 84, 87, 93, 101, 105, 95, 99, 95, 85,
+ 90, 91, 83, 81, 5, 25, 18, 11, 8, 10, 5, 0, 5,
+ 11, 13, 31, 21, 15, 8, 22, 13, 16, 9, 33, 3, 43,
+ 30, 23, 15, 26, 5, 64, 65, 3, 70, 46, 18, 1, 2,
+ 11, 0, 72, 64, 5, 35, 20, 4, 65, 18, 5, 2, 2, 2,
+ 62, 67, 4, 6, 11, 6, 7, 18, 19, 13, 18, 23, 28,
+ 66, 4, 76, 66, 34, 71, 93, 68, 8, 67, 80, 84,
+ 75, 85, 94, 101, 103, 70, 66, 15, 69, 75, 67,
+ 64, 3, 79, 73, 67, 67, 87, 74, 81, 0, 10, 87,
+ 73, 6, 78, 2, 75, 67, 20, 78, 81, 64, 5, 88, 86,
+ 119, 11, 17, 26, 0, 70, 65, 79, 79, 78, 91, 90,
+ 89, 107, 99, 90, 108, 110, 115, 126, 107, 104,
+ 97, 78, 85, 6, 23, 3, 22, 41, 79, 82, 87, 85,
+ 107, 98, 92, 115, 96, 107, 102, 120, 114, 112,
+ 112, 83, 80, 114, 85, 87, 99, 111, 105, 106,
+ 107, 102, 111, 109, 112, 117, 125, 118, 75, 83,
+ 100, 67, 13, 9, 20, 21, 25, 46, 30, 40, 41, 60,
+ 49, 44, 62, 59, 39, 67, 87, 107, 118, 126, 126,
+ 126, 126, 15, 49, 42, 42, 30, 43, 21, 16, 21,
+ 11, 70, 0, 16, 3, 33, 43, 1, 10, 19, 23, 5, 21,
+ 39, 4, 64, 15, 82, 107, 126, 126, 126, 126, 126,
+ 126 },
+
+ {
+
+ 18,
+ 3, 83, 18, 3, 83, 20, 28, 28, 12, 72, 102, 71,
+ 0, 51, 14, 56, 72, 24, 31, 65, 85, 2, 101, 118,
+ 70, 87, 126, 126, 126, 62, 19, 70, 24, 31, 65,
+ 68, 19, 20, 72, 3, 0, 0, 73, 88, 71, 94, 5, 67,
+ 64, 69, 81, 72, 88, 17, 4, 1, 65, 12, 2, 22, 0,
+ 0, 0, 7, 98, 97, 12, 80, 71, 18, 66, 87, 18, 49,
+ 37, 62, 62, 29, 28, 19, 35, 15, 9, 23, 71, 74,
+ 75, 69, 26, 1, 9, 66, 65, 65, 0, 6, 69, 78, 77,
+ 87, 18, 68, 6, 64, 79, 3, 72, 7, 3, 65, 4, 10,
+ 3, 69, 66, 5, 67, 0, 70, 68, 76, 64, 68, 79, 3,
+ 68, 75, 0, 11, 67, 85, 76, 79, 80, 4, 68, 8, 7,
+ 7, 39, 18, 4, 69, 12, 69, 81, 0, 107, 73, 64,
+ 90, 71, 87, 4, 64, 81, 18, 7, 4, 99, 4, 85, 75,
+ 3, 96, 2, 17, 10, 14, 17, 13, 6, 16, 12, 68, 64,
+ 9, 0, 64, 75, 69, 75, 74, 70, 73, 78, 78, 77,
+ 79, 76, 69, 91, 97, 81, 82, 83, 87, 88, 96, 101,
+ 96, 96, 105, 112, 105, 108, 126, 118, 121, 70,
+ 74, 94, 78, 86, 89, 95, 103, 107, 97, 101, 96,
+ 86, 91, 91, 83, 81, 5, 25, 18, 11, 8, 10, 5, 0,
+ 5, 12, 13, 30, 21, 15, 8, 22, 13, 16, 9, 34, 2,
+ 43, 30, 22, 14, 26, 5, 64, 65, 3, 70, 46, 17, 0,
+ 1, 11, 0, 72, 64, 5, 34, 19, 3, 66, 17, 5, 2, 2,
+ 2, 62, 67, 5, 6, 12, 7, 7, 19, 20, 14, 19, 24,
+ 29, 66, 4, 76, 66, 35, 71, 94, 68, 8, 67, 81,
+ 85, 76, 86, 95, 103, 105, 71, 67, 15, 70, 76,
+ 68, 65, 2, 80, 74, 68, 68, 88, 75, 82, 0, 10,
+ 88, 74, 5, 79, 1, 76, 68, 20, 79, 83, 65, 4, 89,
+ 87, 121, 10, 16, 25, 65, 72, 67, 81, 81, 81, 94,
+ 93, 92, 110, 102, 92, 111, 114, 118, 126, 110,
+ 107, 99, 80, 87, 6, 23, 3, 23, 43, 81, 84, 89,
+ 87, 110, 100, 94, 118, 98, 109, 104, 122, 116,
+ 113, 113, 84, 81, 116, 87, 89, 101, 113, 107,
+ 108, 109, 104, 113, 111, 114, 119, 126, 119, 76,
+ 84, 101, 67, 13, 9, 20, 21, 25, 47, 30, 41, 41,
+ 61, 50, 45, 62, 60, 38, 69, 90, 110, 121, 126,
+ 126, 126, 126, 15, 49, 42, 42, 30, 43, 21, 16,
+ 21, 11, 70, 0, 16, 3, 34, 44, 1, 10, 19, 23, 5,
+ 21, 39, 4, 65, 13, 85, 110, 126, 126, 126, 126,
+ 126, 126 },
+
+ {
+
+ 17,
+ 3, 83, 17, 3, 83, 22, 30, 29, 13, 72, 103, 72,
+ 64, 51, 14, 59, 72, 25, 32, 65, 85, 3, 102, 119,
+ 70, 88, 126, 126, 126, 62, 21, 70, 25, 32, 65,
+ 67, 21, 21, 72, 4, 1, 1, 73, 88, 70, 94, 5, 66,
+ 0, 69, 81, 71, 88, 18, 5, 2, 64, 13, 2, 22, 0,
+ 0, 0, 8, 98, 97, 13, 80, 71, 18, 65, 86, 20, 51,
+ 39, 62, 62, 31, 29, 21, 37, 17, 10, 25, 70, 73,
+ 74, 68, 27, 2, 10, 65, 64, 64, 1, 8, 69, 78, 77,
+ 87, 19, 68, 7, 0, 78, 4, 71, 8, 3, 65, 5, 10, 3,
+ 68, 66, 6, 67, 0, 69, 68, 76, 64, 68, 80, 3, 68,
+ 75, 1, 12, 66, 85, 76, 78, 80, 4, 68, 9, 7, 7,
+ 40, 19, 5, 69, 13, 69, 81, 0, 107, 73, 64, 90,
+ 71, 88, 4, 64, 81, 19, 8, 4, 100, 4, 85, 74, 3,
+ 96, 2, 17, 10, 14, 17, 13, 6, 16, 12, 68, 64, 9,
+ 0, 64, 75, 69, 75, 74, 70, 73, 78, 78, 77, 78,
+ 76, 68, 91, 98, 80, 83, 84, 88, 89, 98, 103, 97,
+ 97, 107, 113, 106, 109, 126, 119, 122, 70, 74,
+ 94, 79, 87, 90, 96, 104, 108, 98, 102, 96, 86,
+ 91, 90, 82, 80, 6, 26, 18, 11, 9, 11, 6, 1, 6,
+ 14, 14, 30, 21, 15, 8, 23, 14, 17, 10, 36, 2,
+ 44, 31, 22, 14, 27, 5, 64, 64, 4, 70, 47, 17, 0,
+ 1, 12, 1, 71, 0, 5, 34, 19, 3, 66, 17, 5, 3, 3,
+ 3, 62, 66, 6, 7, 14, 9, 8, 21, 22, 16, 21, 26,
+ 31, 65, 5, 75, 65, 37, 70, 94, 67, 9, 66, 81,
+ 85, 76, 86, 95, 104, 106, 71, 67, 16, 70, 76,
+ 68, 65, 2, 80, 74, 68, 68, 88, 75, 82, 1, 11,
+ 88, 74, 5, 79, 1, 76, 68, 21, 79, 84, 65, 4, 89,
+ 87, 122, 10, 16, 25, 66, 73, 68, 83, 83, 83, 96,
+ 95, 94, 112, 104, 93, 113, 117, 121, 126, 112,
+ 109, 100, 81, 88, 6, 24, 4, 25, 46, 82, 85, 90,
+ 88, 112, 101, 95, 120, 99, 110, 105, 123, 117,
+ 114, 113, 84, 81, 117, 88, 90, 102, 114, 108,
+ 109, 110, 105, 114, 112, 115, 120, 126, 119, 76,
+ 84, 101, 66, 14, 10, 21, 22, 26, 49, 31, 42, 42,
+ 62, 52, 46, 62, 62, 38, 70, 92, 112, 123, 126,
+ 126, 126, 126, 16, 50, 43, 43, 31, 44, 22, 17,
+ 22, 12, 69, 1, 17, 4, 36, 46, 2, 11, 20, 24, 6,
+ 22, 40, 4, 65, 12, 87, 112, 126, 126, 126, 126,
+ 126, 126 },
+
+ {
+
+ 16,
+ 3, 83, 16, 3, 83, 24, 31, 29, 13, 72, 104, 73,
+ 65, 51, 14, 61, 72, 26, 33, 65, 86, 3, 103, 120,
+ 71, 90, 126, 126, 126, 62, 22, 70, 26, 33, 65,
+ 67, 22, 21, 73, 4, 1, 2, 73, 88, 70, 94, 5, 66,
+ 1, 69, 81, 71, 88, 18, 5, 2, 64, 13, 2, 22, 0,
+ 0, 0, 9, 98, 97, 13, 81, 71, 18, 65, 86, 22, 53,
+ 41, 62, 62, 32, 30, 22, 38, 18, 11, 27, 70, 73,
+ 74, 67, 27, 2, 10, 64, 64, 0, 1, 9, 69, 78, 77,
+ 87, 19, 68, 7, 0, 78, 4, 71, 8, 3, 65, 5, 10, 3,
+ 68, 66, 6, 67, 0, 69, 69, 76, 64, 69, 81, 2, 69,
+ 76, 2, 12, 66, 86, 77, 78, 81, 4, 68, 9, 7, 7,
+ 41, 19, 5, 69, 13, 69, 82, 0, 108, 73, 64, 91,
+ 71, 89, 4, 64, 82, 19, 8, 4, 101, 4, 86, 74, 3,
+ 97, 1, 17, 10, 14, 17, 13, 6, 16, 12, 68, 64, 9,
+ 0, 64, 75, 69, 75, 74, 70, 73, 79, 79, 78, 78,
+ 76, 68, 92, 99, 80, 85, 86, 90, 91, 100, 105,
+ 99, 99, 109, 115, 108, 111, 126, 121, 123, 70,
+ 74, 95, 80, 88, 91, 97, 106, 109, 99, 103, 97,
+ 87, 92, 90, 82, 79, 6, 26, 18, 11, 9, 11, 6, 1,
+ 6, 15, 15, 30, 21, 15, 8, 23, 14, 18, 11, 38, 2,
+ 44, 31, 22, 14, 27, 5, 64, 64, 5, 70, 47, 17, 0,
+ 1, 12, 1, 71, 1, 5, 33, 18, 2, 67, 17, 5, 3, 3,
+ 3, 62, 65, 7, 8, 15, 10, 9, 22, 23, 17, 22, 27,
+ 33, 64, 6, 75, 64, 38, 70, 95, 67, 10, 66, 81,
+ 86, 76, 87, 96, 105, 107, 72, 67, 16, 70, 77,
+ 69, 65, 2, 81, 75, 68, 68, 89, 75, 82, 1, 11,
+ 88, 75, 4, 80, 1, 77, 69, 21, 80, 85, 65, 4, 90,
+ 88, 124, 9, 15, 24, 67, 74, 69, 85, 85, 85, 98,
+ 98, 97, 115, 106, 94, 116, 120, 124, 126, 115,
+ 111, 102, 82, 89, 6, 25, 4, 26, 48, 84, 87, 92,
+ 90, 114, 103, 97, 122, 100, 112, 106, 125, 118,
+ 115, 114, 85, 81, 118, 89, 91, 104, 116, 110,
+ 111, 111, 107, 116, 113, 116, 121, 126, 120, 77,
+ 85, 102, 66, 14, 10, 22, 22, 27, 50, 32, 43, 43,
+ 62, 53, 47, 62, 62, 37, 72, 94, 114, 126, 126,
+ 126, 126, 126, 16, 50, 43, 43, 31, 45, 22, 17,
+ 22, 12, 69, 1, 18, 4, 37, 47, 2, 12, 21, 25, 6,
+ 22, 41, 4, 65, 11, 89, 114, 126, 126, 126, 126,
+ 126, 126 },
+
+ {
+
+ 15,
+ 3, 83, 15, 3, 83, 26, 33, 30, 13, 73, 106, 75,
+ 66, 51, 14, 62, 72, 27, 34, 65, 87, 3, 104, 121,
+ 71, 92, 126, 126, 126, 62, 23, 70, 27, 34, 65,
+ 66, 23, 22, 73, 4, 2, 3, 74, 89, 70, 94, 5, 66,
+ 1, 69, 81, 71, 88, 19, 5, 2, 64, 14, 2, 22, 0,
+ 0, 0, 9, 98, 97, 14, 82, 71, 18, 65, 86, 24, 55,
+ 42, 62, 62, 33, 31, 24, 39, 19, 12, 29, 69, 73,
+ 73, 66, 27, 2, 10, 64, 64, 0, 2, 11, 69, 79, 78,
+ 88, 19, 68, 7, 1, 78, 5, 71, 9, 3, 65, 5, 10, 2,
+ 68, 66, 6, 67, 0, 69, 70, 76, 64, 70, 82, 1, 70,
+ 77, 2, 13, 66, 87, 77, 77, 81, 4, 68, 9, 7, 7,
+ 42, 19, 5, 69, 14, 69, 83, 0, 109, 73, 64, 91,
+ 72, 90, 4, 64, 83, 19, 8, 4, 103, 4, 87, 74, 3,
+ 97, 0, 17, 10, 14, 17, 13, 6, 16, 12, 68, 65, 9,
+ 0, 64, 75, 70, 76, 75, 71, 74, 80, 80, 78, 78,
+ 77, 68, 93, 101, 80, 86, 88, 92, 93, 102, 107,
+ 101, 101, 111, 117, 109, 112, 126, 123, 124, 70,
+ 74, 95, 81, 89, 92, 99, 107, 111, 100, 104, 98,
+ 87, 92, 90, 82, 79, 7, 26, 18, 11, 9, 11, 6, 1,
+ 7, 16, 15, 30, 21, 15, 8, 24, 15, 19, 12, 40, 2,
+ 44, 31, 22, 14, 28, 5, 64, 64, 5, 70, 47, 17,
+ 64, 1, 12, 1, 71, 2, 5, 33, 18, 1, 68, 17, 5, 3,
+ 4, 4, 62, 64, 8, 8, 16, 11, 10, 24, 24, 18, 23,
+ 29, 34, 64, 7, 75, 64, 40, 70, 95, 67, 10, 66,
+ 82, 86, 76, 88, 97, 106, 108, 72, 68, 16, 71,
+ 77, 69, 66, 2, 82, 75, 69, 68, 90, 75, 83, 1,
+ 11, 89, 75, 4, 81, 1, 78, 69, 21, 81, 86, 66, 3,
+ 91, 88, 125, 8, 14, 24, 68, 75, 71, 87, 87, 87,
+ 101, 100, 99, 118, 109, 95, 119, 123, 126, 126,
+ 118, 114, 104, 83, 90, 6, 26, 4, 27, 50, 86, 89,
+ 94, 92, 116, 105, 98, 124, 102, 113, 107, 126,
+ 120, 116, 115, 85, 82, 119, 90, 93, 105, 118,
+ 112, 113, 113, 108, 118, 115, 117, 123, 126,
+ 121, 78, 86, 103, 66, 15, 10, 22, 23, 27, 51,
+ 33, 44, 44, 62, 54, 48, 62, 62, 37, 73, 96, 117,
+ 126, 126, 126, 126, 126, 17, 51, 44, 44, 31, 46,
+ 23, 17, 23, 13, 69, 2, 18, 5, 38, 48, 2, 12, 21,
+ 25, 6, 23, 42, 4, 65, 10, 91, 117, 126, 126,
+ 126, 126, 126, 126 },
+
+ },
+
+ {
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 126, 104, 10, 9, 12, 38, 62,
+ 62, 54, 22, 118, 65, 71, 79, 11, 13, 70, 9, 29,
+ 41, 62, 61, 27, 69, 126, 101, 76, 71, 79, 11,
+ 69, 90, 11, 20, 69, 82, 96, 4, 75, 87, 100, 7,
+ 74, 85, 4, 81, 86, 95, 66, 77, 70, 86, 72, 2,
+ 22, 0, 0, 0, 83, 86, 97, 72, 22, 1, 48, 12, 80,
+ 126, 91, 96, 81, 98, 102, 97, 119, 99, 110, 102,
+ 126, 80, 89, 94, 92, 24, 65, 84, 126, 73, 104,
+ 91, 126, 8, 7, 8, 2, 10, 68, 74, 88, 103, 91,
+ 89, 92, 76, 87, 110, 105, 78, 112, 99, 126, 126,
+ 126, 126, 66, 78, 71, 72, 4, 8, 70, 75, 89, 119,
+ 75, 43, 41, 126, 9, 2, 5, 3, 2, 67, 84, 74, 65,
+ 11, 6, 2, 69, 70, 8, 71, 5, 2, 22, 38, 31, 20,
+ 16, 19, 12, 17, 25, 66, 25, 21, 29, 89, 18, 35,
+ 32, 62, 62, 48, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 53, 62, 62, 62, 62, 62, 62, 62, 56, 62,
+ 62, 62, 27, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 53, 45, 38, 22, 75, 72, 77, 28, 32, 28,
+ 33, 18, 21, 18, 37, 9, 66, 7, 73, 67, 116, 112,
+ 71, 2, 10, 66, 77, 80, 84, 87, 126, 101, 24, 10,
+ 2, 75, 77, 91, 107, 111, 122, 76, 19, 11, 6, 5,
+ 72, 69, 69, 74, 86, 66, 29, 31, 32, 11, 8, 67,
+ 73, 89, 11, 59, 55, 55, 44, 26, 2, 73, 70, 78,
+ 62, 126, 124, 110, 126, 124, 105, 121, 117, 102,
+ 117, 116, 122, 95, 100, 95, 111, 114, 89, 80,
+ 82, 85, 81, 72, 64, 67, 7, 69, 69, 69, 69, 67,
+ 77, 64, 2, 67, 64, 6, 65, 66, 1, 12, 66, 71, 75,
+ 70, 72, 3, 26, 16, 28, 26, 22, 22, 15, 22, 22,
+ 4, 13, 23, 66, 13, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 54, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 49, 37, 26, 8, 65, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 43, 33,
+ 19, 15, 14, 18, 41, 41, 42, 43, 35, 39, 29, 21,
+ 24, 13, 70, 9, 71, 83, 31, 14, 9, 85, 81, 77,
+ 81, 80, 73, 74, 83, 71, 67, 2, 66, 66, 4, 4, 62,
+ 62, 62, 62, 62, 60, 53, 36, 6, 71, 39, 27, 21,
+ 11, 6, 0, 65, 67, 82, 81, 76, 72, 78, 72, 68,
+ 70, 76, 66, 1, 6, 2, 3, 9, 5, 62, 62, 62, 62,
+ 62, 60, 53, 36, 6 },
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 125, 102, 11, 10, 12, 37,
+ 61, 62, 55, 22, 116, 65, 70, 78, 11, 13, 69,
+ 9, 28, 40, 61, 58, 25, 70, 124, 100, 75, 70,
+ 78, 11, 69, 89, 11, 20, 68, 81, 95, 4, 75, 86,
+ 99, 7, 73, 84, 4, 80, 85, 94, 65, 76, 70, 85,
+ 71, 2, 22, 0, 0, 0, 82, 86, 97, 71, 22, 1, 48,
+ 12, 80, 124, 89, 94, 79, 95, 100, 95, 117, 97,
+ 108, 100, 124, 80, 88, 93, 91, 24, 65, 83,
+ 124, 72, 103, 90, 125, 8, 7, 8, 2, 11, 68, 73,
+ 87, 102, 90, 88, 91, 75, 86, 108, 103, 77,
+ 110, 97, 122, 122, 123, 124, 65, 77, 70, 71,
+ 4, 9, 69, 74, 88, 116, 74, 41, 40, 124, 9, 3,
+ 5, 4, 3, 66, 82, 73, 64, 11, 6, 2, 68, 69, 7,
+ 70, 5, 2, 22, 37, 31, 20, 16, 19, 12, 17, 24,
+ 65, 25, 21, 29, 89, 18, 35, 32, 62, 62, 47,
+ 62, 62, 62, 61, 62, 62, 62, 62, 62, 62, 52,
+ 62, 62, 62, 62, 62, 62, 62, 54, 62, 60, 62,
+ 26, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 61, 52, 44, 37, 21, 75, 72, 77, 28, 31, 27,
+ 32, 17, 20, 17, 36, 8, 66, 6, 73, 67, 115,
+ 110, 70, 3, 10, 65, 76, 79, 83, 86, 124, 99,
+ 25, 11, 3, 74, 76, 89, 105, 109, 120, 75, 20,
+ 12, 7, 6, 71, 68, 68, 73, 85, 66, 30, 31, 32,
+ 11, 9, 66, 73, 88, 11, 59, 55, 54, 43, 26, 3,
+ 72, 69, 77, 62, 124, 122, 108, 124, 122, 103,
+ 119, 115, 100, 115, 114, 119, 94, 99, 94, 109,
+ 112, 88, 79, 81, 84, 80, 71, 64, 67, 7, 69,
+ 69, 69, 68, 66, 76, 0, 2, 66, 0, 6, 64, 65, 1,
+ 12, 65, 70, 74, 69, 71, 3, 25, 16, 27, 26, 22,
+ 22, 15, 22, 22, 4, 13, 22, 66, 12, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 52, 62, 62, 62, 62, 62, 62, 62, 61, 62, 48,
+ 36, 25, 8, 65, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 42, 32, 18, 15, 14, 17, 40,
+ 40, 41, 41, 34, 38, 28, 20, 23, 12, 70, 8, 71,
+ 83, 30, 13, 8, 84, 80, 76, 80, 78, 71, 73, 82,
+ 70, 66, 3, 65, 65, 4, 4, 62, 62, 62, 62, 60,
+ 56, 49, 32, 4, 70, 39, 28, 22, 12, 7, 1, 64,
+ 66, 81, 80, 75, 71, 77, 71, 67, 69, 75, 65, 2,
+ 6, 3, 4, 9, 5, 62, 62, 62, 62, 60, 56, 49, 32,
+ 4 },
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 123, 101, 11, 10, 12, 36,
+ 59, 61, 55, 22, 114, 65, 70, 77, 11, 12, 69,
+ 8, 26, 39, 58, 54, 22, 72, 121, 99, 75, 70,
+ 77, 11, 69, 88, 11, 19, 68, 81, 94, 4, 75, 86,
+ 99, 7, 73, 84, 4, 80, 85, 94, 65, 76, 70, 85,
+ 71, 2, 22, 0, 0, 0, 81, 86, 97, 71, 21, 1, 47,
+ 12, 80, 122, 88, 93, 77, 93, 99, 94, 115, 96,
+ 107, 99, 122, 80, 88, 93, 91, 24, 65, 82, 122,
+ 72, 102, 89, 123, 8, 7, 8, 1, 11, 68, 73, 86,
+ 101, 89, 87, 90, 75, 85, 107, 102, 76, 109,
+ 96, 117, 118, 120, 121, 65, 77, 70, 71, 4, 9,
+ 69, 74, 88, 114, 74, 39, 38, 121, 9, 3, 5, 4,
+ 3, 66, 80, 72, 64, 11, 6, 2, 67, 68, 6, 70, 5,
+ 2, 21, 36, 30, 20, 15, 19, 12, 17, 23, 65, 24,
+ 20, 28, 89, 18, 34, 31, 62, 62, 46, 60, 62,
+ 62, 59, 62, 62, 62, 62, 62, 62, 50, 62, 62,
+ 62, 62, 62, 62, 62, 52, 62, 58, 62, 24, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 59, 50,
+ 42, 35, 19, 75, 72, 78, 27, 30, 26, 31, 16,
+ 19, 16, 34, 7, 66, 5, 74, 68, 114, 109, 69, 3,
+ 10, 65, 75, 78, 82, 85, 122, 98, 25, 11, 3,
+ 73, 75, 88, 103, 107, 118, 74, 21, 13, 8, 7,
+ 70, 68, 68, 73, 84, 66, 31, 31, 31, 11, 9, 66,
+ 73, 88, 11, 59, 54, 53, 42, 26, 3, 72, 69, 77,
+ 62, 123, 121, 107, 122, 120, 102, 117, 113,
+ 99, 113, 112, 117, 93, 98, 94, 108, 110, 88,
+ 79, 81, 83, 80, 71, 64, 67, 6, 69, 69, 69, 68,
+ 66, 75, 0, 2, 66, 0, 6, 64, 65, 1, 11, 65, 70,
+ 74, 69, 70, 2, 24, 16, 26, 25, 21, 21, 15, 21,
+ 21, 4, 13, 21, 66, 11, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 50, 62, 62,
+ 62, 62, 62, 62, 62, 59, 59, 46, 34, 24, 7, 66,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 40, 30, 16, 14, 13, 15, 39, 39, 39, 39,
+ 32, 36, 26, 19, 21, 11, 71, 7, 72, 84, 28, 12,
+ 7, 84, 80, 75, 80, 77, 70, 73, 81, 69, 65, 3,
+ 65, 64, 4, 4, 62, 62, 62, 62, 57, 52, 45, 28,
+ 1, 70, 39, 28, 22, 12, 8, 1, 64, 66, 81, 80,
+ 75, 71, 77, 70, 66, 69, 75, 65, 2, 6, 3, 5, 9,
+ 5, 62, 62, 62, 62, 57, 52, 45, 28, 1 },
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 121, 99, 12, 10, 11, 34, 57,
+ 60, 55, 22, 112, 65, 69, 76, 11, 12, 69, 8,
+ 25, 38, 56, 51, 20, 73, 118, 98, 75, 69, 76,
+ 11, 70, 87, 11, 19, 68, 81, 94, 4, 75, 86, 99,
+ 7, 73, 83, 4, 80, 84, 94, 65, 76, 70, 85, 71,
+ 2, 22, 0, 0, 0, 81, 86, 97, 70, 20, 1, 46, 11,
+ 80, 119, 87, 92, 76, 91, 97, 92, 113, 94, 106,
+ 98, 120, 80, 88, 92, 91, 24, 65, 81, 120, 72,
+ 101, 89, 121, 8, 6, 7, 1, 11, 68, 72, 86, 100,
+ 88, 87, 89, 74, 84, 105, 100, 76, 108, 95,
+ 112, 113, 117, 118, 65, 77, 70, 70, 4, 9, 68,
+ 73, 87, 112, 74, 37, 36, 118, 9, 3, 5, 4, 3,
+ 65, 79, 71, 64, 11, 6, 2, 67, 67, 5, 70, 5, 1,
+ 21, 35, 30, 20, 15, 19, 12, 17, 22, 65, 23,
+ 19, 28, 89, 18, 34, 31, 62, 62, 45, 58, 62,
+ 62, 57, 62, 62, 62, 62, 62, 61, 48, 62, 62,
+ 62, 62, 62, 62, 60, 50, 62, 56, 62, 22, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 57, 48,
+ 40, 34, 17, 75, 72, 78, 26, 29, 25, 30, 15,
+ 18, 15, 32, 6, 67, 4, 75, 68, 114, 107, 68, 4,
+ 10, 65, 74, 78, 82, 85, 120, 97, 25, 11, 4,
+ 72, 74, 87, 102, 106, 116, 73, 21, 13, 8, 7,
+ 69, 67, 68, 73, 84, 66, 31, 31, 30, 11, 9, 66,
+ 73, 87, 11, 58, 54, 52, 41, 26, 3, 72, 69, 77,
+ 62, 122, 119, 106, 121, 119, 101, 115, 111,
+ 98, 112, 110, 115, 93, 97, 93, 107, 108, 87,
+ 79, 81, 83, 79, 71, 64, 67, 6, 69, 69, 70, 67,
+ 65, 74, 0, 2, 65, 0, 6, 64, 65, 1, 11, 65, 70,
+ 74, 69, 70, 1, 23, 16, 25, 24, 20, 21, 15, 20,
+ 20, 4, 13, 20, 66, 10, 62, 62, 61, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 48, 62, 62,
+ 62, 62, 62, 62, 62, 57, 57, 44, 32, 22, 6, 67,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 59,
+ 60, 38, 28, 15, 13, 12, 14, 37, 37, 37, 37,
+ 31, 34, 24, 18, 20, 10, 72, 6, 73, 85, 27, 11,
+ 6, 84, 79, 75, 79, 76, 69, 73, 81, 69, 65, 3,
+ 64, 0, 4, 4, 62, 62, 62, 59, 54, 48, 41, 24,
+ 65, 70, 39, 28, 22, 12, 8, 2, 64, 66, 80, 80,
+ 75, 70, 76, 69, 65, 69, 74, 65, 2, 6, 3, 5, 9,
+ 5, 62, 62, 62, 59, 54, 48, 41, 24, 65 },
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 120, 98, 12, 10, 11, 33, 55,
+ 59, 55, 21, 110, 65, 69, 75, 10, 11, 69, 7,
+ 23, 37, 53, 47, 17, 75, 115, 97, 75, 69, 75,
+ 10, 70, 86, 11, 18, 68, 80, 93, 4, 75, 86, 99,
+ 7, 73, 83, 4, 80, 84, 93, 65, 76, 70, 85, 70,
+ 2, 22, 0, 0, 0, 80, 87, 97, 70, 19, 1, 45, 11,
+ 80, 117, 86, 91, 74, 89, 96, 91, 112, 93, 104,
+ 97, 118, 80, 87, 92, 91, 24, 65, 80, 118, 72,
+ 101, 88, 119, 8, 6, 7, 0, 11, 68, 72, 85, 99,
+ 87, 86, 88, 74, 84, 104, 99, 75, 107, 94, 107,
+ 109, 114, 115, 65, 76, 70, 70, 4, 9, 68, 73,
+ 87, 110, 74, 35, 34, 116, 9, 4, 5, 4, 3, 65,
+ 77, 70, 0, 10, 6, 2, 66, 67, 4, 70, 5, 1, 20,
+ 34, 29, 19, 14, 19, 12, 17, 21, 65, 22, 18,
+ 27, 89, 17, 33, 30, 62, 62, 44, 56, 62, 62,
+ 55, 62, 62, 62, 62, 62, 59, 46, 59, 62, 62,
+ 62, 62, 62, 57, 48, 62, 54, 62, 21, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 60, 55, 46, 38,
+ 32, 15, 75, 72, 79, 25, 28, 24, 28, 14, 16,
+ 14, 31, 5, 67, 3, 75, 69, 113, 106, 67, 4, 10,
+ 64, 74, 77, 81, 84, 118, 95, 25, 12, 4, 72,
+ 73, 86, 100, 104, 115, 73, 22, 14, 9, 8, 68,
+ 67, 68, 72, 83, 66, 32, 31, 30, 10, 9, 66, 73,
+ 87, 11, 58, 53, 51, 40, 26, 3, 71, 69, 77, 62,
+ 120, 118, 105, 119, 117, 100, 114, 110, 97,
+ 110, 109, 113, 92, 96, 93, 106, 107, 87, 79,
+ 81, 82, 79, 71, 65, 67, 5, 69, 69, 70, 67, 65,
+ 73, 0, 2, 65, 0, 6, 64, 65, 1, 10, 65, 70, 74,
+ 69, 69, 0, 22, 16, 24, 24, 19, 20, 15, 19, 19,
+ 4, 13, 19, 66, 9, 62, 62, 60, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 46, 62, 62, 62,
+ 62, 62, 62, 62, 54, 54, 42, 30, 21, 5, 67, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 57, 57,
+ 36, 26, 13, 12, 12, 12, 36, 36, 36, 35, 29,
+ 32, 23, 17, 18, 9, 73, 4, 74, 85, 25, 9, 4,
+ 83, 79, 74, 79, 75, 68, 73, 80, 68, 64, 3, 64,
+ 1, 4, 4, 62, 62, 62, 56, 50, 44, 36, 20, 68,
+ 69, 39, 28, 22, 12, 9, 2, 64, 66, 80, 80, 75,
+ 70, 76, 69, 64, 69, 74, 64, 3, 6, 3, 6, 9, 5,
+ 62, 62, 62, 56, 50, 44, 36, 20, 68 },
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 118, 96, 12, 10, 10, 32, 53,
+ 58, 55, 21, 108, 65, 69, 74, 10, 11, 69, 6,
+ 21, 36, 51, 44, 15, 77, 112, 96, 74, 69, 74,
+ 10, 70, 85, 11, 18, 68, 80, 92, 4, 75, 86, 99,
+ 7, 73, 83, 4, 80, 83, 93, 65, 76, 70, 85, 70,
+ 2, 22, 0, 0, 0, 80, 87, 97, 69, 18, 1, 44, 10,
+ 80, 114, 85, 90, 72, 87, 94, 89, 110, 91, 103,
+ 96, 115, 80, 87, 91, 90, 24, 65, 79, 116, 72,
+ 100, 88, 117, 8, 5, 6, 0, 11, 68, 71, 85, 98,
+ 86, 86, 87, 73, 83, 102, 97, 74, 105, 93, 102,
+ 105, 111, 112, 64, 76, 69, 69, 4, 9, 67, 73,
+ 86, 108, 74, 33, 32, 113, 9, 4, 5, 4, 3, 64,
+ 76, 69, 0, 10, 6, 2, 66, 66, 3, 69, 5, 0, 20,
+ 33, 29, 19, 14, 19, 12, 17, 20, 64, 21, 18,
+ 27, 89, 17, 32, 29, 62, 62, 43, 55, 62, 62,
+ 53, 62, 62, 62, 62, 61, 57, 44, 57, 62, 60,
+ 62, 62, 62, 55, 46, 62, 52, 62, 19, 62, 62,
+ 62, 62, 62, 62, 62, 62, 61, 58, 53, 44, 37,
+ 30, 13, 75, 72, 79, 24, 27, 23, 27, 13, 15,
+ 13, 29, 4, 68, 2, 76, 70, 112, 104, 66, 5, 10,
+ 64, 73, 77, 81, 83, 116, 94, 25, 12, 5, 71,
+ 72, 85, 99, 103, 113, 72, 23, 15, 10, 8, 67,
+ 66, 67, 72, 83, 66, 32, 31, 29, 10, 9, 66, 73,
+ 86, 11, 57, 52, 50, 39, 26, 3, 71, 69, 76, 62,
+ 119, 116, 103, 117, 116, 99, 112, 108, 96,
+ 108, 107, 111, 91, 95, 92, 105, 105, 87, 79,
+ 80, 82, 78, 71, 65, 67, 5, 69, 69, 71, 66, 65,
+ 72, 0, 2, 65, 0, 6, 64, 65, 1, 10, 65, 70, 74,
+ 69, 69, 64, 21, 16, 23, 23, 19, 19, 15, 19,
+ 18, 4, 13, 18, 66, 8, 62, 62, 59, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 44, 62, 62,
+ 62, 62, 62, 62, 61, 52, 52, 40, 29, 19, 5, 68,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 61, 55,
+ 54, 34, 24, 12, 12, 11, 10, 35, 34, 34, 33,
+ 27, 30, 21, 16, 17, 8, 73, 3, 75, 86, 24, 8,
+ 3, 83, 79, 73, 78, 74, 67, 72, 79, 68, 64, 3,
+ 0, 2, 4, 4, 62, 62, 59, 53, 47, 40, 32, 16,
+ 71, 69, 39, 28, 22, 12, 9, 2, 0, 65, 79, 80,
+ 75, 69, 76, 68, 0, 69, 74, 64, 3, 6, 4, 6, 9,
+ 5, 62, 62, 59, 53, 47, 40, 32, 16, 71 },
+
+ {
+
+ 62,
+ 9, 75, 62, 9, 75, 116, 95, 13, 10, 10, 30, 51,
+ 57, 55, 21, 107, 65, 68, 74, 10, 10, 68, 6,
+ 20, 34, 48, 40, 12, 78, 110, 95, 74, 68, 74,
+ 10, 71, 85, 11, 17, 68, 80, 92, 4, 75, 85, 98,
+ 7, 72, 82, 4, 79, 83, 93, 65, 76, 70, 85, 70,
+ 2, 22, 0, 0, 0, 79, 87, 97, 69, 18, 0, 44, 10,
+ 80, 112, 84, 89, 71, 84, 93, 88, 108, 90, 102,
+ 95, 113, 80, 87, 91, 90, 24, 65, 78, 113, 72,
+ 99, 87, 115, 7, 5, 6, 64, 12, 68, 71, 84, 98,
+ 86, 85, 86, 73, 82, 101, 96, 74, 104, 92, 97,
+ 100, 108, 109, 64, 76, 69, 69, 4, 9, 67, 72,
+ 86, 106, 73, 31, 30, 110, 9, 4, 5, 4, 4, 64,
+ 74, 68, 0, 10, 6, 2, 65, 65, 2, 69, 5, 0, 19,
+ 32, 28, 19, 13, 19, 12, 17, 18, 64, 20, 17,
+ 26, 89, 17, 32, 29, 62, 62, 42, 53, 62, 62,
+ 51, 62, 62, 62, 62, 57, 55, 43, 55, 62, 58,
+ 62, 62, 62, 52, 44, 62, 50, 62, 17, 62, 62,
+ 62, 62, 62, 62, 62, 62, 59, 56, 50, 42, 35,
+ 29, 12, 75, 72, 80, 23, 26, 22, 26, 12, 14,
+ 12, 27, 3, 68, 1, 77, 70, 112, 103, 65, 5, 10,
+ 64, 72, 76, 80, 83, 114, 93, 26, 12, 5, 70,
+ 71, 84, 97, 101, 111, 71, 23, 15, 10, 9, 66,
+ 66, 67, 72, 82, 66, 33, 31, 28, 10, 9, 66, 73,
+ 86, 10, 57, 52, 49, 38, 25, 3, 71, 69, 76, 62,
+ 118, 115, 102, 116, 114, 98, 110, 106, 95,
+ 107, 105, 109, 91, 94, 92, 104, 103, 86, 79,
+ 80, 81, 78, 71, 65, 67, 4, 69, 69, 71, 66, 64,
+ 71, 0, 2, 64, 1, 6, 0, 64, 1, 9, 65, 70, 74,
+ 69, 68, 65, 20, 16, 22, 22, 18, 19, 15, 18,
+ 18, 4, 12, 16, 67, 7, 62, 62, 58, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 42, 62, 62,
+ 62, 62, 62, 62, 58, 50, 49, 38, 27, 18, 4, 69,
+ 62, 62, 62, 62, 62, 62, 62, 62, 61, 58, 52,
+ 51, 32, 23, 10, 11, 10, 9, 33, 33, 32, 31, 26,
+ 28, 19, 15, 15, 7, 74, 2, 76, 87, 22, 7, 2,
+ 83, 78, 73, 78, 73, 66, 72, 79, 67, 0, 3, 0,
+ 3, 4, 4, 62, 62, 57, 50, 44, 36, 28, 12, 74,
+ 69, 39, 28, 22, 12, 10, 3, 0, 65, 79, 79, 74,
+ 69, 75, 67, 1, 68, 73, 64, 3, 6, 4, 7, 9, 5,
+ 62, 62, 57, 50, 44, 36, 28, 12, 74 },
+
+ {
+
+ 62,
+ 9, 75, 62, 9, 75, 114, 93, 13, 10, 9, 29, 49,
+ 56, 55, 21, 105, 65, 68, 73, 9, 10, 68, 5, 18,
+ 33, 46, 37, 10, 80, 107, 94, 74, 68, 73, 9,
+ 71, 84, 11, 17, 68, 79, 91, 4, 75, 85, 98, 7,
+ 72, 82, 4, 79, 82, 92, 65, 76, 70, 85, 69, 2,
+ 22, 0, 0, 0, 79, 87, 97, 68, 17, 0, 43, 9, 80,
+ 109, 83, 88, 69, 82, 91, 86, 107, 88, 100, 94,
+ 111, 80, 86, 90, 90, 24, 65, 77, 111, 72, 98,
+ 87, 113, 7, 4, 5, 64, 12, 68, 70, 84, 97, 85,
+ 85, 85, 72, 81, 99, 94, 73, 103, 91, 92, 96,
+ 105, 106, 64, 75, 69, 68, 4, 9, 66, 72, 85,
+ 104, 73, 29, 28, 107, 9, 5, 5, 4, 4, 0, 73,
+ 67, 1, 9, 6, 2, 65, 65, 1, 69, 5, 64, 19, 31,
+ 28, 18, 13, 19, 12, 17, 17, 64, 19, 16, 26,
+ 89, 17, 31, 28, 60, 62, 41, 51, 62, 62, 49,
+ 62, 61, 62, 62, 54, 53, 41, 52, 62, 55, 62,
+ 62, 62, 49, 42, 62, 48, 62, 16, 62, 62, 62,
+ 62, 62, 62, 62, 62, 57, 53, 48, 40, 33, 27,
+ 10, 75, 72, 80, 22, 25, 21, 24, 11, 13, 11,
+ 26, 2, 69, 0, 77, 71, 111, 101, 64, 6, 10, 0,
+ 72, 76, 80, 82, 112, 91, 26, 13, 6, 70, 70,
+ 83, 96, 100, 109, 71, 24, 16, 11, 9, 65, 65,
+ 67, 71, 82, 66, 33, 31, 28, 9, 9, 66, 73, 85,
+ 10, 56, 51, 48, 37, 25, 3, 70, 69, 76, 62,
+ 116, 113, 101, 114, 113, 97, 109, 105, 94,
+ 105, 104, 107, 90, 93, 91, 103, 101, 86, 79,
+ 80, 81, 77, 71, 66, 67, 4, 69, 69, 72, 65, 64,
+ 70, 0, 2, 64, 1, 6, 0, 64, 1, 9, 65, 70, 74,
+ 69, 68, 66, 19, 16, 21, 22, 17, 18, 15, 17,
+ 17, 4, 12, 15, 67, 6, 61, 62, 57, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 40, 62, 62,
+ 62, 62, 62, 62, 56, 48, 47, 36, 25, 16, 3, 69,
+ 62, 62, 62, 62, 62, 62, 62, 62, 59, 56, 50,
+ 48, 30, 21, 9, 10, 10, 7, 32, 31, 31, 29, 24,
+ 26, 18, 14, 14, 6, 75, 0, 77, 87, 21, 5, 0,
+ 82, 78, 72, 77, 72, 65, 72, 78, 67, 0, 3, 1,
+ 4, 4, 4, 62, 62, 54, 47, 40, 32, 24, 8, 77,
+ 68, 39, 28, 22, 12, 10, 3, 0, 65, 78, 79, 74,
+ 68, 75, 66, 2, 68, 73, 0, 4, 6, 4, 7, 9, 5,
+ 62, 62, 54, 47, 40, 32, 24, 8, 77 },
+
+ {
+
+ 62,
+ 8, 75, 62, 8, 75, 113, 92, 13, 10, 9, 27, 46,
+ 55, 55, 20, 103, 66, 68, 72, 9, 9, 68, 4, 16,
+ 32, 43, 33, 7, 82, 104, 93, 74, 68, 72, 9, 72,
+ 83, 11, 16, 68, 79, 91, 3, 76, 85, 98, 7, 72,
+ 82, 4, 79, 82, 92, 65, 76, 70, 85, 69, 2, 22,
+ 0, 0, 0, 78, 88, 97, 68, 16, 0, 42, 9, 81,
+ 107, 82, 87, 68, 80, 90, 85, 105, 87, 99, 93,
+ 109, 80, 86, 90, 90, 24, 65, 76, 109, 72, 98,
+ 86, 111, 7, 4, 5, 65, 12, 68, 70, 83, 96, 84,
+ 84, 85, 72, 81, 98, 93, 73, 102, 90, 88, 92,
+ 102, 104, 64, 75, 69, 68, 3, 9, 66, 72, 85,
+ 102, 73, 27, 26, 105, 9, 5, 5, 4, 4, 0, 71,
+ 67, 1, 9, 5, 2, 64, 64, 64, 69, 5, 64, 18, 29,
+ 27, 18, 12, 19, 12, 16, 16, 64, 18, 15, 25,
+ 89, 16, 30, 27, 58, 62, 39, 49, 62, 62, 46,
+ 62, 59, 62, 62, 50, 51, 39, 50, 62, 53, 62,
+ 62, 62, 46, 40, 62, 46, 62, 14, 62, 62, 62,
+ 62, 62, 62, 62, 60, 55, 51, 46, 38, 31, 25, 8,
+ 75, 73, 81, 21, 23, 20, 23, 10, 11, 9, 24, 1,
+ 69, 64, 78, 72, 111, 100, 0, 6, 10, 0, 71, 75,
+ 79, 82, 110, 90, 26, 13, 6, 69, 69, 82, 94,
+ 98, 108, 70, 24, 16, 11, 10, 64, 65, 67, 71,
+ 81, 67, 34, 31, 27, 9, 9, 66, 73, 85, 10, 56,
+ 50, 47, 36, 25, 3, 70, 69, 76, 62, 115, 112,
+ 100, 113, 111, 96, 107, 103, 93, 104, 102,
+ 105, 90, 93, 91, 102, 100, 86, 79, 80, 80, 77,
+ 71, 66, 67, 3, 69, 69, 72, 65, 64, 69, 0, 1,
+ 64, 1, 5, 0, 64, 1, 8, 65, 70, 74, 69, 67, 67,
+ 18, 16, 19, 21, 16, 17, 14, 16, 16, 4, 12, 14,
+ 67, 4, 60, 60, 56, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 60, 38, 62, 62, 62, 62, 62, 62,
+ 53, 45, 44, 34, 23, 15, 2, 70, 62, 62, 62, 62,
+ 62, 62, 62, 62, 56, 53, 47, 45, 28, 19, 7, 9,
+ 9, 5, 30, 30, 29, 27, 22, 24, 16, 12, 12, 4,
+ 76, 64, 78, 88, 19, 4, 64, 82, 78, 72, 77, 71,
+ 64, 72, 78, 66, 1, 3, 1, 4, 4, 3, 62, 60, 51,
+ 44, 37, 28, 19, 3, 80, 68, 39, 28, 22, 12, 11,
+ 3, 0, 65, 78, 79, 74, 68, 75, 66, 2, 68, 73,
+ 0, 4, 6, 4, 8, 9, 4, 62, 60, 51, 44, 37, 28,
+ 19, 3, 80 },
+
+ {
+
+ 62,
+ 8, 75, 62, 8, 75, 111, 91, 14, 10, 9, 26, 44,
+ 54, 56, 20, 101, 66, 67, 71, 9, 8, 68, 4, 15,
+ 31, 41, 29, 4, 83, 101, 92, 73, 67, 71, 9, 72,
+ 82, 11, 16, 67, 79, 90, 3, 76, 85, 98, 7, 72,
+ 81, 4, 79, 82, 92, 65, 76, 70, 84, 69, 2, 22,
+ 0, 0, 0, 77, 88, 97, 68, 15, 0, 41, 9, 81,
+ 105, 80, 86, 66, 78, 88, 84, 103, 85, 98, 91,
+ 106, 80, 86, 90, 89, 24, 65, 75, 107, 71, 97,
+ 85, 109, 7, 4, 5, 65, 12, 68, 70, 82, 95, 83,
+ 83, 84, 71, 80, 97, 91, 72, 100, 89, 83, 87,
+ 98, 101, 0, 75, 68, 67, 3, 9, 66, 71, 84, 99,
+ 73, 25, 25, 102, 9, 5, 5, 4, 4, 1, 69, 66, 1,
+ 9, 5, 2, 0, 0, 65, 68, 5, 64, 17, 28, 26, 18,
+ 11, 19, 12, 16, 15, 0, 17, 15, 24, 89, 16, 30,
+ 27, 56, 62, 38, 48, 62, 62, 44, 60, 57, 62,
+ 62, 47, 49, 37, 48, 62, 51, 62, 62, 62, 44,
+ 38, 62, 44, 62, 12, 62, 62, 62, 62, 62, 62,
+ 60, 58, 53, 49, 44, 37, 30, 24, 6, 75, 73, 81,
+ 21, 22, 19, 22, 9, 10, 8, 22, 0, 69, 65, 79,
+ 72, 110, 99, 1, 6, 10, 0, 70, 74, 78, 81, 107,
+ 89, 26, 13, 6, 68, 68, 81, 92, 96, 106, 69,
+ 25, 17, 12, 11, 0, 65, 66, 71, 80, 67, 35, 31,
+ 26, 9, 10, 65, 73, 84, 10, 56, 50, 46, 35, 25,
+ 3, 70, 69, 75, 62, 114, 111, 98, 111, 109, 95,
+ 105, 101, 92, 102, 100, 103, 89, 92, 90, 101,
+ 98, 85, 78, 79, 79, 76, 71, 66, 67, 2, 69, 69,
+ 72, 65, 0, 68, 1, 1, 0, 1, 5, 0, 64, 1, 7, 65,
+ 69, 73, 69, 66, 67, 17, 16, 18, 20, 16, 17,
+ 14, 16, 15, 4, 12, 13, 67, 3, 59, 59, 56, 61,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 57, 36,
+ 62, 62, 62, 62, 62, 62, 50, 43, 42, 33, 22,
+ 14, 2, 71, 62, 62, 62, 62, 62, 62, 62, 62, 54,
+ 51, 45, 43, 26, 17, 5, 9, 8, 4, 29, 29, 27,
+ 25, 21, 23, 14, 11, 10, 3, 76, 65, 78, 89, 17,
+ 3, 65, 82, 77, 71, 77, 70, 1, 71, 77, 65, 2,
+ 3, 2, 5, 4, 3, 62, 58, 49, 41, 34, 24, 15, 64,
+ 83, 68, 39, 28, 23, 13, 12, 4, 1, 64, 78, 79,
+ 74, 68, 74, 65, 3, 68, 72, 0, 4, 6, 5, 9, 9,
+ 4, 62, 58, 49, 41, 34, 24, 15, 64, 83 },
+
+ {
+
+ 62,
+ 8, 75, 62, 8, 75, 109, 89, 14, 10, 8, 25, 42,
+ 53, 56, 20, 99, 66, 67, 70, 8, 8, 68, 3, 13,
+ 30, 38, 26, 2, 85, 98, 91, 73, 67, 70, 8, 72,
+ 81, 11, 15, 67, 78, 89, 3, 76, 85, 98, 7, 72,
+ 81, 4, 79, 81, 91, 65, 76, 70, 84, 68, 2, 22,
+ 0, 0, 0, 77, 88, 97, 67, 14, 0, 40, 8, 81,
+ 102, 79, 85, 64, 76, 87, 82, 102, 84, 96, 90,
+ 104, 80, 85, 89, 89, 24, 65, 74, 105, 71, 96,
+ 85, 107, 7, 3, 4, 66, 12, 68, 69, 82, 94, 82,
+ 83, 83, 71, 79, 95, 90, 71, 99, 88, 78, 83,
+ 95, 98, 0, 74, 68, 67, 3, 9, 65, 71, 84, 97,
+ 73, 23, 23, 99, 9, 6, 5, 4, 4, 1, 68, 65, 2,
+ 8, 5, 2, 0, 0, 66, 68, 5, 65, 17, 27, 26, 17,
+ 11, 19, 12, 16, 14, 0, 16, 14, 24, 89, 16, 29,
+ 26, 54, 62, 37, 46, 62, 62, 42, 57, 55, 62,
+ 62, 43, 47, 35, 45, 61, 48, 62, 62, 62, 41,
+ 36, 58, 42, 62, 11, 62, 62, 62, 62, 62, 60,
+ 58, 56, 51, 46, 42, 35, 28, 22, 4, 75, 73, 82,
+ 20, 21, 18, 20, 8, 9, 7, 21, 64, 70, 66, 79,
+ 73, 109, 97, 2, 7, 10, 1, 70, 74, 78, 80, 105,
+ 87, 26, 14, 7, 68, 67, 80, 91, 95, 104, 69,
+ 26, 18, 13, 11, 1, 64, 66, 70, 80, 67, 35, 31,
+ 26, 8, 10, 65, 73, 84, 10, 55, 49, 45, 34, 25,
+ 3, 69, 69, 75, 62, 112, 109, 97, 109, 108, 94,
+ 104, 100, 91, 100, 99, 101, 88, 91, 90, 100,
+ 96, 85, 78, 79, 79, 76, 71, 67, 67, 2, 69, 69,
+ 73, 64, 0, 67, 1, 1, 0, 1, 5, 0, 64, 1, 7, 65,
+ 69, 73, 69, 66, 68, 16, 16, 17, 20, 15, 16,
+ 14, 15, 14, 4, 12, 12, 67, 2, 58, 58, 55, 59,
+ 60, 62, 62, 62, 62, 62, 62, 62, 62, 55, 34,
+ 62, 62, 62, 62, 62, 62, 48, 41, 39, 31, 20,
+ 12, 1, 71, 62, 62, 62, 62, 62, 62, 62, 62, 52,
+ 48, 43, 40, 24, 15, 4, 8, 8, 2, 28, 27, 26,
+ 23, 19, 21, 13, 10, 9, 2, 77, 67, 79, 89, 16,
+ 1, 67, 81, 77, 70, 76, 69, 2, 71, 76, 65, 2,
+ 3, 2, 6, 4, 3, 62, 56, 46, 38, 30, 20, 11, 68,
+ 86, 67, 39, 28, 23, 13, 12, 4, 1, 64, 77, 79,
+ 74, 67, 74, 64, 4, 68, 72, 1, 5, 6, 5, 9, 9,
+ 4, 62, 56, 46, 38, 30, 20, 11, 68, 86 },
+
+ {
+
+ 62,
+ 8, 76, 62, 8, 76, 107, 88, 15, 10, 8, 23, 40,
+ 52, 56, 20, 98, 66, 66, 70, 8, 7, 67, 3, 12,
+ 28, 36, 22, 64, 86, 96, 90, 73, 66, 70, 8, 73,
+ 81, 11, 15, 67, 78, 89, 3, 76, 84, 97, 7, 71,
+ 80, 4, 78, 81, 91, 65, 76, 70, 84, 68, 2, 22,
+ 0, 0, 0, 76, 88, 97, 67, 14, 64, 40, 8, 81,
+ 100, 78, 84, 0, 73, 85, 81, 100, 82, 95, 89,
+ 102, 80, 85, 89, 89, 24, 65, 73, 102, 71, 95,
+ 84, 105, 6, 3, 4, 66, 13, 68, 69, 81, 94, 82,
+ 82, 82, 70, 78, 94, 88, 71, 98, 87, 73, 78,
+ 92, 95, 0, 74, 68, 66, 3, 9, 65, 70, 83, 95,
+ 72, 21, 21, 96, 9, 6, 5, 4, 5, 2, 66, 64, 2,
+ 8, 5, 2, 1, 1, 67, 68, 5, 65, 16, 26, 25, 17,
+ 10, 19, 12, 16, 12, 0, 15, 13, 23, 89, 16, 29,
+ 26, 52, 62, 36, 44, 61, 62, 40, 55, 53, 62,
+ 62, 40, 45, 34, 43, 57, 46, 62, 62, 62, 38,
+ 34, 55, 40, 62, 9, 62, 62, 62, 62, 62, 58, 55,
+ 54, 49, 44, 39, 33, 26, 21, 3, 75, 73, 82, 19,
+ 20, 17, 19, 7, 8, 6, 19, 65, 70, 67, 80, 73,
+ 109, 96, 3, 7, 10, 1, 69, 73, 77, 80, 103, 86,
+ 27, 14, 7, 67, 66, 79, 89, 93, 102, 68, 26,
+ 18, 13, 12, 2, 64, 66, 70, 79, 67, 36, 31, 25,
+ 8, 10, 65, 73, 83, 9, 55, 49, 44, 33, 24, 3,
+ 69, 69, 75, 62, 111, 108, 96, 108, 106, 93,
+ 102, 98, 90, 99, 97, 99, 88, 90, 89, 99, 94,
+ 84, 78, 79, 78, 75, 71, 67, 67, 1, 69, 69, 73,
+ 64, 1, 66, 1, 1, 1, 2, 5, 1, 0, 1, 6, 65, 69,
+ 73, 69, 65, 69, 15, 16, 16, 19, 14, 16, 14,
+ 14, 14, 4, 11, 10, 68, 1, 56, 57, 54, 58, 58,
+ 62, 62, 62, 62, 62, 62, 62, 62, 52, 32, 62,
+ 62, 62, 62, 62, 62, 45, 39, 37, 29, 18, 11, 0,
+ 72, 62, 62, 62, 62, 62, 62, 60, 59, 49, 46,
+ 40, 37, 22, 14, 2, 7, 7, 1, 26, 26, 24, 21,
+ 18, 19, 11, 9, 7, 1, 78, 68, 80, 90, 14, 0,
+ 68, 81, 76, 70, 76, 68, 3, 71, 76, 64, 3, 3,
+ 3, 7, 4, 3, 62, 54, 44, 35, 27, 16, 7, 72, 89,
+ 67, 39, 28, 23, 13, 13, 5, 1, 64, 77, 78, 73,
+ 67, 73, 0, 5, 67, 71, 1, 5, 6, 5, 10, 9, 4,
+ 62, 54, 44, 35, 27, 16, 7, 72, 89 },
+
+ {
+
+ 62,
+ 8, 76, 62, 8, 76, 106, 86, 15, 10, 7, 22, 38,
+ 51, 56, 19, 96, 66, 66, 69, 8, 7, 67, 2, 10,
+ 27, 33, 19, 66, 88, 93, 89, 73, 66, 69, 8, 73,
+ 80, 11, 14, 67, 78, 88, 3, 76, 84, 97, 7, 71,
+ 80, 4, 78, 80, 91, 65, 76, 70, 84, 68, 2, 22,
+ 0, 0, 0, 76, 89, 97, 66, 13, 64, 39, 7, 81,
+ 97, 77, 83, 2, 71, 84, 79, 98, 81, 94, 88,
+ 100, 80, 85, 88, 89, 24, 65, 72, 100, 71, 95,
+ 84, 103, 6, 2, 3, 67, 13, 68, 68, 81, 93, 81,
+ 82, 81, 70, 78, 92, 87, 70, 97, 86, 68, 74,
+ 89, 92, 0, 74, 68, 66, 3, 9, 64, 70, 83, 93,
+ 72, 19, 19, 94, 9, 6, 5, 4, 5, 2, 65, 0, 2, 8,
+ 5, 2, 1, 2, 68, 68, 5, 66, 16, 25, 25, 17, 10,
+ 19, 12, 16, 11, 0, 14, 12, 23, 89, 15, 28, 25,
+ 50, 62, 35, 42, 59, 60, 38, 52, 51, 62, 62,
+ 36, 43, 32, 41, 54, 43, 58, 62, 62, 35, 32,
+ 51, 38, 62, 7, 62, 62, 62, 62, 62, 56, 53, 52,
+ 47, 42, 37, 31, 24, 19, 1, 75, 73, 83, 18, 19,
+ 16, 18, 6, 6, 5, 17, 66, 71, 68, 81, 74, 108,
+ 94, 4, 8, 10, 1, 68, 73, 77, 79, 101, 85, 27,
+ 14, 8, 66, 65, 78, 88, 92, 101, 67, 27, 19,
+ 14, 12, 3, 0, 66, 70, 79, 67, 36, 31, 24, 8,
+ 10, 65, 73, 83, 9, 54, 48, 43, 32, 24, 3, 69,
+ 69, 75, 62, 110, 106, 95, 106, 105, 92, 100,
+ 96, 89, 97, 95, 97, 87, 89, 89, 98, 93, 84,
+ 78, 79, 78, 75, 71, 67, 67, 1, 69, 69, 74, 0,
+ 1, 65, 1, 1, 1, 2, 5, 1, 0, 1, 6, 65, 69, 73,
+ 69, 65, 70, 14, 16, 15, 18, 13, 15, 14, 13,
+ 13, 4, 11, 9, 68, 0, 55, 56, 53, 56, 56, 62,
+ 61, 62, 62, 62, 62, 62, 61, 50, 30, 62, 62,
+ 62, 62, 62, 59, 43, 36, 34, 27, 16, 9, 64, 73,
+ 62, 62, 62, 62, 62, 62, 57, 56, 47, 43, 38,
+ 34, 20, 12, 1, 6, 6, 64, 25, 24, 22, 19, 16,
+ 17, 9, 8, 6, 0, 79, 69, 81, 91, 13, 64, 69,
+ 81, 76, 69, 75, 67, 4, 71, 75, 64, 3, 3, 3, 8,
+ 4, 3, 61, 52, 41, 32, 24, 12, 2, 76, 92, 67,
+ 39, 28, 23, 13, 13, 5, 1, 64, 76, 78, 73, 66,
+ 73, 0, 6, 67, 71, 1, 5, 6, 5, 10, 9, 4, 61,
+ 52, 41, 32, 24, 12, 2, 76, 92 },
+
+ {
+
+ 62,
+ 8, 76, 62, 8, 76, 104, 85, 15, 10, 7, 21, 36,
+ 50, 56, 19, 94, 66, 66, 68, 7, 6, 67, 1, 8,
+ 26, 31, 15, 69, 90, 90, 88, 72, 66, 68, 7, 73,
+ 79, 11, 14, 67, 77, 87, 3, 76, 84, 97, 7, 71,
+ 80, 4, 78, 80, 90, 65, 76, 70, 84, 67, 2, 22,
+ 0, 0, 0, 75, 89, 97, 66, 12, 64, 38, 7, 81,
+ 95, 76, 82, 4, 69, 82, 78, 97, 79, 92, 87, 97,
+ 80, 84, 88, 88, 24, 65, 71, 98, 71, 94, 83,
+ 101, 6, 2, 3, 67, 13, 68, 68, 80, 92, 80, 81,
+ 80, 69, 77, 91, 85, 69, 95, 85, 0, 70, 86, 89,
+ 1, 73, 67, 65, 3, 9, 64, 70, 82, 91, 72, 17,
+ 17, 91, 9, 7, 5, 4, 5, 3, 0, 1, 3, 7, 5, 2, 2,
+ 2, 69, 67, 5, 66, 15, 24, 24, 16, 9, 19, 12,
+ 16, 10, 1, 13, 12, 22, 89, 15, 27, 24, 48, 62,
+ 34, 41, 57, 58, 36, 50, 49, 62, 62, 33, 41,
+ 30, 38, 51, 41, 55, 62, 62, 33, 30, 48, 36,
+ 62, 6, 62, 62, 62, 61, 60, 54, 51, 50, 45, 39,
+ 35, 29, 23, 17, 64, 75, 73, 83, 17, 18, 15,
+ 16, 5, 5, 4, 16, 67, 71, 69, 81, 75, 107, 93,
+ 5, 8, 10, 2, 68, 72, 76, 78, 99, 83, 27, 15,
+ 8, 66, 64, 77, 86, 90, 99, 67, 28, 20, 15, 13,
+ 4, 0, 65, 69, 78, 67, 37, 31, 24, 7, 10, 65,
+ 73, 82, 9, 54, 47, 42, 31, 24, 3, 68, 69, 74,
+ 62, 108, 105, 93, 104, 103, 91, 99, 95, 88,
+ 95, 94, 95, 86, 88, 88, 97, 91, 84, 78, 78,
+ 77, 74, 71, 68, 67, 0, 69, 69, 74, 0, 1, 64,
+ 1, 1, 1, 2, 5, 1, 0, 1, 5, 65, 69, 73, 69, 64,
+ 71, 13, 16, 14, 18, 13, 14, 14, 13, 12, 4, 11,
+ 8, 68, 64, 54, 55, 52, 54, 54, 62, 59, 61, 62,
+ 59, 62, 62, 58, 47, 28, 62, 62, 62, 62, 59,
+ 56, 40, 34, 32, 25, 15, 8, 64, 73, 62, 62, 62,
+ 62, 59, 59, 55, 53, 45, 41, 36, 31, 18, 10,
+ 64, 6, 6, 66, 24, 23, 21, 17, 14, 15, 8, 7, 4,
+ 64, 79, 71, 82, 91, 11, 66, 71, 80, 76, 68,
+ 75, 66, 5, 70, 74, 0, 4, 3, 4, 9, 4, 3, 60,
+ 50, 38, 29, 20, 8, 65, 80, 95, 66, 39, 28, 23,
+ 13, 14, 5, 2, 0, 76, 78, 73, 66, 73, 1, 7, 67,
+ 71, 2, 6, 6, 6, 11, 9, 4, 60, 50, 38, 29, 20,
+ 8, 65, 80, 95 },
+
+ {
+
+ 61,
+ 8, 76, 61, 8, 76, 102, 83, 16, 10, 6, 19, 34,
+ 49, 56, 19, 92, 66, 65, 67, 7, 6, 67, 1, 7,
+ 25, 28, 12, 71, 91, 87, 87, 72, 65, 67, 7, 74,
+ 78, 11, 13, 67, 77, 87, 3, 76, 84, 97, 7, 71,
+ 79, 4, 78, 79, 90, 65, 76, 70, 84, 67, 2, 22,
+ 0, 0, 0, 75, 89, 97, 65, 11, 64, 37, 6, 81,
+ 92, 75, 81, 5, 67, 81, 76, 95, 78, 91, 86, 95,
+ 80, 84, 87, 88, 24, 65, 70, 96, 71, 93, 83,
+ 99, 6, 1, 2, 68, 13, 68, 67, 80, 91, 79, 81,
+ 79, 69, 76, 89, 84, 69, 94, 84, 5, 65, 83, 86,
+ 1, 73, 67, 65, 3, 9, 0, 69, 82, 89, 72, 15,
+ 15, 88, 9, 7, 5, 4, 5, 3, 1, 2, 3, 7, 5, 2, 2,
+ 3, 70, 67, 5, 67, 15, 23, 24, 16, 9, 19, 12,
+ 16, 9, 1, 12, 11, 22, 89, 15, 27, 24, 46, 61,
+ 33, 39, 55, 55, 34, 47, 47, 62, 62, 29, 39,
+ 28, 36, 48, 38, 52, 61, 62, 30, 28, 44, 34,
+ 62, 4, 60, 62, 60, 58, 57, 52, 49, 48, 43, 37,
+ 33, 27, 21, 16, 66, 75, 73, 84, 16, 17, 14,
+ 15, 4, 4, 3, 14, 68, 72, 70, 82, 75, 107, 91,
+ 6, 9, 10, 2, 67, 72, 76, 78, 97, 82, 27, 15,
+ 9, 65, 0, 76, 85, 89, 97, 66, 28, 20, 15, 13,
+ 5, 1, 65, 69, 78, 67, 37, 31, 23, 7, 10, 65,
+ 73, 82, 9, 53, 47, 41, 30, 24, 3, 68, 69, 74,
+ 62, 107, 103, 92, 103, 102, 90, 97, 93, 87,
+ 94, 92, 93, 86, 87, 88, 96, 89, 83, 78, 78,
+ 77, 74, 71, 68, 67, 0, 69, 69, 75, 1, 2, 0, 1,
+ 1, 2, 2, 5, 1, 0, 1, 5, 65, 69, 73, 69, 64,
+ 72, 12, 16, 13, 17, 12, 14, 14, 12, 11, 4, 11,
+ 7, 68, 65, 53, 54, 51, 53, 52, 60, 57, 59, 59,
+ 57, 62, 60, 55, 45, 26, 62, 62, 62, 62, 55,
+ 53, 38, 32, 29, 23, 13, 6, 65, 74, 62, 62, 62,
+ 60, 56, 57, 52, 50, 42, 38, 33, 28, 16, 8, 65,
+ 5, 5, 67, 22, 21, 19, 15, 13, 13, 6, 6, 3, 65,
+ 80, 72, 83, 92, 10, 67, 72, 80, 75, 68, 74,
+ 65, 6, 70, 74, 0, 4, 3, 4, 10, 4, 3, 59, 48,
+ 36, 26, 17, 4, 69, 84, 98, 66, 39, 28, 23, 13,
+ 14, 6, 2, 0, 75, 78, 73, 65, 72, 2, 8, 67, 70,
+ 2, 6, 6, 6, 11, 9, 4, 59, 48, 36, 26, 17, 4,
+ 69, 84, 98 },
+
+ {
+
+ 60,
+ 8, 76, 60, 8, 76, 100, 82, 16, 10, 6, 18, 32,
+ 48, 56, 19, 90, 66, 65, 66, 7, 5, 67, 0, 5,
+ 24, 26, 8, 74, 93, 84, 86, 72, 65, 66, 7, 74,
+ 77, 11, 13, 67, 77, 86, 3, 76, 84, 97, 7, 71,
+ 79, 4, 78, 79, 90, 65, 76, 70, 84, 67, 2, 22,
+ 0, 0, 0, 74, 89, 97, 65, 10, 64, 36, 6, 81,
+ 90, 74, 80, 7, 65, 79, 75, 93, 76, 90, 85, 93,
+ 80, 84, 87, 88, 24, 65, 69, 94, 71, 92, 82,
+ 97, 6, 1, 2, 68, 13, 68, 67, 79, 90, 78, 80,
+ 78, 68, 75, 88, 82, 68, 93, 83, 10, 2, 80, 83,
+ 1, 73, 67, 64, 3, 9, 0, 69, 81, 87, 72, 13,
+ 13, 85, 9, 7, 5, 4, 5, 4, 3, 3, 3, 7, 5, 2, 3,
+ 4, 71, 67, 5, 67, 14, 22, 23, 16, 8, 19, 12,
+ 16, 8, 1, 11, 10, 21, 89, 15, 26, 23, 44, 58,
+ 32, 37, 53, 53, 32, 45, 45, 62, 62, 26, 37,
+ 26, 34, 45, 36, 49, 57, 62, 27, 26, 41, 32,
+ 62, 2, 58, 62, 58, 56, 55, 50, 47, 46, 41, 35,
+ 31, 25, 19, 14, 68, 75, 73, 84, 15, 16, 13,
+ 14, 3, 3, 2, 12, 69, 72, 71, 83, 76, 106, 90,
+ 7, 9, 10, 2, 66, 71, 75, 77, 95, 81, 27, 15,
+ 9, 64, 1, 75, 83, 87, 95, 65, 29, 21, 16, 14,
+ 6, 1, 65, 69, 77, 67, 38, 31, 22, 7, 10, 65,
+ 73, 81, 9, 53, 46, 40, 29, 24, 3, 68, 69, 74,
+ 62, 106, 102, 91, 101, 100, 89, 95, 91, 86,
+ 92, 90, 91, 85, 86, 87, 95, 87, 83, 78, 78,
+ 76, 73, 71, 68, 67, 64, 69, 69, 75, 1, 2, 1,
+ 1, 1, 2, 2, 5, 1, 0, 1, 4, 65, 69, 73, 69, 0,
+ 73, 11, 16, 12, 16, 11, 13, 14, 11, 10, 4, 11,
+ 6, 68, 66, 52, 53, 50, 51, 50, 58, 55, 57, 57,
+ 54, 61, 57, 52, 42, 24, 62, 62, 62, 62, 52,
+ 50, 35, 30, 27, 21, 11, 5, 66, 75, 62, 62, 62,
+ 58, 53, 54, 50, 47, 40, 36, 31, 25, 14, 6, 67,
+ 4, 4, 69, 21, 20, 17, 13, 11, 11, 4, 5, 1, 66,
+ 81, 73, 84, 93, 8, 68, 73, 80, 75, 67, 74, 64,
+ 7, 70, 73, 1, 5, 3, 5, 11, 4, 3, 58, 46, 33,
+ 23, 14, 0, 73, 88, 101, 66, 39, 28, 23, 13,
+ 15, 6, 2, 0, 75, 78, 73, 65, 72, 3, 9, 67, 70,
+ 2, 6, 6, 6, 12, 9, 4, 58, 46, 33, 23, 14, 0,
+ 73, 88, 101 },
+
+ {
+
+ 58,
+ 7, 77, 58, 7, 77, 99, 81, 16, 10, 5, 16, 29,
+ 47, 56, 18, 89, 67, 65, 66, 6, 4, 67, 64, 3,
+ 22, 23, 4, 77, 95, 82, 86, 72, 65, 66, 6, 75,
+ 77, 11, 12, 67, 77, 86, 2, 77, 84, 97, 6, 71,
+ 79, 4, 78, 79, 90, 65, 76, 71, 84, 67, 2, 22,
+ 0, 0, 0, 74, 90, 97, 65, 9, 65, 35, 5, 82, 88,
+ 73, 79, 8, 0, 78, 74, 92, 75, 89, 84, 91, 80,
+ 84, 87, 88, 24, 65, 69, 92, 71, 92, 82, 96, 5,
+ 0, 1, 69, 13, 68, 67, 79, 90, 78, 80, 78, 68,
+ 75, 87, 81, 68, 92, 82, 14, 6, 77, 81, 1, 73,
+ 67, 64, 2, 9, 0, 69, 81, 85, 72, 11, 11, 83,
+ 9, 7, 5, 4, 5, 4, 4, 3, 3, 6, 4, 2, 3, 4, 73,
+ 67, 5, 68, 13, 20, 22, 15, 7, 19, 12, 15, 6,
+ 1, 10, 9, 20, 89, 14, 25, 22, 41, 54, 30, 35,
+ 50, 50, 29, 42, 43, 55, 62, 22, 34, 24, 31,
+ 41, 33, 45, 52, 59, 24, 24, 37, 30, 62, 0, 55,
+ 59, 55, 53, 52, 47, 44, 43, 39, 32, 28, 23,
+ 17, 12, 70, 75, 74, 85, 14, 14, 11, 12, 1, 1,
+ 0, 10, 70, 73, 72, 84, 77, 106, 89, 7, 9, 10,
+ 2, 66, 71, 75, 77, 93, 80, 27, 15, 9, 64, 1,
+ 74, 82, 86, 94, 65, 29, 21, 16, 14, 7, 1, 65,
+ 69, 77, 68, 38, 30, 21, 6, 10, 65, 73, 81, 8,
+ 52, 45, 38, 28, 23, 3, 68, 69, 74, 62, 105,
+ 101, 90, 100, 99, 88, 94, 90, 85, 91, 89, 89,
+ 85, 86, 87, 94, 86, 83, 78, 78, 76, 73, 71,
+ 69, 68, 65, 69, 70, 76, 1, 2, 2, 1, 0, 2, 2,
+ 4, 1, 0, 1, 3, 65, 69, 73, 69, 0, 74, 10, 16,
+ 10, 15, 10, 12, 13, 10, 9, 4, 10, 4, 69, 68,
+ 50, 51, 49, 49, 48, 55, 52, 54, 54, 51, 58,
+ 54, 48, 39, 22, 62, 62, 61, 60, 48, 46, 32,
+ 27, 24, 19, 9, 3, 67, 76, 59, 60, 60, 55, 50,
+ 51, 47, 43, 37, 33, 28, 22, 12, 4, 69, 3, 3,
+ 71, 19, 18, 15, 10, 9, 9, 2, 3, 64, 68, 82,
+ 75, 85, 94, 6, 70, 75, 80, 75, 67, 74, 0, 8,
+ 70, 73, 1, 5, 3, 5, 11, 4, 2, 56, 44, 30, 19,
+ 10, 67, 78, 93, 104, 66, 39, 28, 23, 13, 15,
+ 6, 2, 0, 75, 78, 73, 65, 72, 3, 9, 67, 70, 2,
+ 6, 6, 6, 12, 8, 3, 56, 44, 30, 19, 10, 67, 78,
+ 93, 104 },
+
+ {
+
+ 57,
+ 7, 77, 57, 7, 77, 97, 79, 17, 11, 5, 15, 27,
+ 46, 57, 18, 87, 67, 64, 65, 6, 4, 66, 64, 2,
+ 21, 21, 1, 79, 96, 79, 85, 71, 64, 65, 6, 75,
+ 76, 11, 12, 66, 76, 85, 2, 77, 83, 96, 6, 70,
+ 78, 4, 77, 78, 89, 64, 75, 71, 83, 66, 2, 22,
+ 0, 0, 0, 73, 90, 97, 64, 9, 65, 35, 5, 82, 85,
+ 71, 77, 10, 3, 76, 72, 90, 73, 87, 82, 88, 80,
+ 83, 86, 87, 24, 65, 68, 89, 70, 91, 81, 94, 5,
+ 0, 1, 69, 14, 68, 66, 78, 89, 77, 79, 77, 67,
+ 74, 85, 79, 67, 90, 80, 19, 11, 73, 78, 2, 72,
+ 66, 0, 2, 10, 1, 68, 80, 82, 71, 9, 10, 80, 9,
+ 8, 5, 5, 6, 5, 6, 4, 4, 6, 4, 2, 4, 5, 74, 66,
+ 5, 68, 13, 19, 22, 15, 7, 19, 12, 15, 5, 2,
+ 10, 9, 20, 89, 14, 25, 22, 39, 51, 29, 34, 48,
+ 48, 27, 40, 41, 49, 62, 19, 32, 23, 29, 38,
+ 31, 42, 48, 55, 22, 22, 34, 28, 62, 64, 53,
+ 57, 53, 51, 50, 45, 42, 41, 37, 30, 26, 22,
+ 16, 11, 71, 75, 74, 85, 14, 13, 10, 11, 0, 0,
+ 64, 9, 71, 73, 73, 84, 77, 105, 87, 8, 10, 10,
+ 3, 65, 70, 74, 76, 90, 78, 28, 16, 10, 0, 2,
+ 72, 80, 84, 92, 64, 30, 22, 17, 15, 8, 2, 64,
+ 68, 76, 68, 39, 30, 21, 6, 11, 64, 73, 80, 8,
+ 52, 45, 37, 27, 23, 4, 67, 68, 73, 62, 103,
+ 99, 88, 98, 97, 86, 92, 88, 83, 89, 87, 86,
+ 84, 85, 86, 92, 84, 82, 77, 77, 75, 72, 70,
+ 69, 68, 65, 69, 70, 76, 2, 3, 3, 2, 0, 3, 3,
+ 4, 2, 1, 1, 3, 64, 68, 72, 68, 1, 74, 9, 16,
+ 9, 15, 10, 12, 13, 10, 9, 4, 10, 3, 69, 69,
+ 49, 50, 49, 48, 47, 53, 50, 52, 52, 49, 56,
+ 52, 45, 37, 20, 61, 60, 57, 56, 45, 43, 30,
+ 25, 22, 18, 8, 2, 67, 76, 57, 58, 58, 53, 48,
+ 49, 45, 40, 35, 31, 26, 20, 11, 3, 70, 3, 3,
+ 72, 18, 17, 14, 8, 8, 8, 1, 2, 65, 69, 82, 76,
+ 85, 94, 5, 71, 76, 79, 74, 66, 73, 2, 10, 69,
+ 72, 2, 6, 4, 6, 12, 4, 2, 55, 42, 28, 16, 7,
+ 71, 82, 97, 106, 65, 39, 29, 24, 14, 16, 7, 3,
+ 1, 74, 77, 72, 64, 71, 4, 10, 66, 69, 3, 7, 6,
+ 7, 13, 8, 3, 55, 42, 28, 16, 7, 71, 82, 97,
+ 106 },
+
+ {
+
+ 56,
+ 7, 77, 56, 7, 77, 95, 78, 17, 11, 5, 14, 25,
+ 45, 57, 18, 85, 67, 64, 64, 6, 3, 66, 65, 0,
+ 20, 18, 66, 82, 98, 76, 84, 71, 64, 64, 6, 75,
+ 75, 11, 11, 66, 76, 84, 2, 77, 83, 96, 6, 70,
+ 78, 4, 77, 78, 89, 64, 75, 71, 83, 66, 2, 22,
+ 0, 0, 0, 72, 90, 97, 64, 8, 65, 34, 5, 82, 83,
+ 70, 76, 12, 5, 75, 71, 88, 72, 86, 81, 86, 80,
+ 83, 86, 87, 24, 65, 67, 87, 70, 90, 80, 92, 5,
+ 0, 1, 70, 14, 68, 66, 77, 88, 76, 78, 76, 67,
+ 73, 84, 78, 66, 89, 79, 24, 15, 70, 75, 2, 72,
+ 66, 0, 2, 10, 1, 68, 80, 80, 71, 7, 8, 77, 9,
+ 8, 5, 5, 6, 5, 8, 5, 4, 6, 4, 2, 5, 6, 75, 66,
+ 5, 68, 12, 18, 21, 15, 6, 19, 12, 15, 4, 2, 9,
+ 8, 19, 89, 14, 24, 21, 37, 48, 28, 32, 46, 46,
+ 25, 38, 39, 43, 62, 15, 30, 21, 27, 35, 29,
+ 39, 44, 51, 19, 20, 31, 26, 62, 66, 51, 55,
+ 51, 49, 48, 43, 40, 39, 35, 28, 24, 20, 14, 9,
+ 73, 75, 74, 86, 13, 12, 9, 10, 64, 64, 65, 7,
+ 72, 73, 74, 85, 78, 104, 86, 9, 10, 10, 3, 64,
+ 69, 73, 75, 88, 77, 28, 16, 10, 1, 3, 71, 78,
+ 82, 90, 0, 31, 23, 18, 16, 9, 2, 64, 68, 75,
+ 68, 40, 30, 20, 6, 11, 64, 73, 80, 8, 52, 44,
+ 36, 26, 23, 4, 67, 68, 73, 62, 102, 98, 87,
+ 96, 95, 85, 90, 86, 82, 87, 85, 84, 83, 84,
+ 86, 91, 82, 82, 77, 77, 74, 72, 70, 69, 68,
+ 66, 69, 70, 76, 2, 3, 4, 2, 0, 3, 3, 4, 2, 1,
+ 1, 2, 64, 68, 72, 68, 2, 75, 8, 16, 8, 14, 9,
+ 11, 13, 9, 8, 4, 10, 2, 69, 70, 48, 49, 48,
+ 46, 45, 51, 48, 50, 50, 46, 53, 49, 42, 34,
+ 18, 57, 56, 53, 51, 42, 40, 27, 23, 19, 16, 6,
+ 1, 68, 77, 55, 56, 55, 51, 45, 46, 42, 37, 33,
+ 28, 24, 17, 9, 1, 72, 2, 2, 74, 17, 16, 12, 6,
+ 6, 6, 64, 1, 67, 70, 83, 77, 86, 95, 3, 72,
+ 77, 79, 74, 65, 73, 3, 11, 69, 71, 3, 7, 4, 6,
+ 13, 4, 2, 54, 40, 25, 13, 4, 75, 86, 101, 109,
+ 65, 39, 29, 24, 14, 17, 7, 3, 1, 74, 77, 72,
+ 64, 71, 5, 11, 66, 69, 3, 7, 6, 7, 14, 8, 3,
+ 54, 40, 25, 13, 4, 75, 86, 101, 109 },
+
+ {
+
+ 55,
+ 7, 77, 55, 7, 77, 93, 76, 18, 11, 4, 12, 23,
+ 44, 57, 18, 83, 67, 0, 0, 6, 3, 66, 65, 64,
+ 19, 16, 69, 84, 99, 73, 83, 71, 0, 0, 6, 76,
+ 74, 11, 11, 66, 76, 84, 2, 77, 83, 96, 6, 70,
+ 77, 4, 77, 77, 89, 64, 75, 71, 83, 66, 2, 22,
+ 0, 0, 0, 72, 90, 97, 0, 7, 65, 33, 4, 82, 80,
+ 69, 75, 13, 7, 73, 69, 86, 70, 85, 80, 84, 80,
+ 83, 85, 87, 24, 65, 66, 85, 70, 89, 80, 90, 5,
+ 64, 0, 70, 14, 68, 65, 77, 87, 75, 78, 75, 66,
+ 72, 82, 76, 66, 88, 78, 29, 20, 67, 72, 2, 72,
+ 66, 1, 2, 10, 2, 67, 79, 78, 71, 5, 6, 74, 9,
+ 8, 5, 5, 6, 6, 9, 6, 4, 6, 4, 2, 5, 7, 76, 66,
+ 5, 69, 12, 17, 21, 15, 6, 19, 12, 15, 3, 2, 8,
+ 7, 19, 89, 14, 24, 21, 35, 45, 27, 30, 44, 43,
+ 23, 35, 37, 36, 62, 12, 28, 19, 25, 32, 26,
+ 36, 40, 47, 16, 18, 27, 24, 62, 68, 49, 53,
+ 49, 46, 45, 41, 38, 37, 33, 26, 22, 18, 12, 8,
+ 75, 75, 74, 86, 12, 11, 8, 9, 65, 65, 66, 5,
+ 73, 74, 75, 86, 78, 104, 84, 10, 11, 10, 3, 0,
+ 69, 73, 75, 86, 76, 28, 16, 11, 2, 4, 70, 77,
+ 81, 88, 1, 31, 23, 18, 16, 10, 3, 64, 68, 75,
+ 68, 40, 30, 19, 6, 11, 64, 73, 79, 8, 51, 44,
+ 35, 25, 23, 4, 67, 68, 73, 62, 101, 96, 86,
+ 95, 94, 84, 88, 84, 81, 86, 83, 82, 83, 83,
+ 85, 90, 80, 81, 77, 77, 74, 71, 70, 69, 68,
+ 66, 69, 70, 77, 3, 4, 5, 2, 0, 4, 3, 4, 2, 1,
+ 1, 2, 64, 68, 72, 68, 2, 76, 7, 16, 7, 13, 8,
+ 11, 13, 8, 7, 4, 10, 1, 69, 71, 47, 48, 47,
+ 45, 43, 49, 46, 48, 47, 44, 50, 46, 39, 32,
+ 16, 53, 52, 49, 46, 38, 37, 25, 21, 17, 14, 4,
+ 64, 69, 78, 53, 53, 53, 48, 42, 44, 40, 34,
+ 30, 26, 21, 14, 7, 64, 73, 1, 1, 75, 15, 14,
+ 10, 4, 5, 4, 66, 0, 68, 71, 84, 78, 87, 96, 2,
+ 73, 78, 79, 73, 65, 72, 4, 12, 69, 71, 3, 7,
+ 4, 7, 14, 4, 2, 53, 38, 23, 10, 1, 79, 90,
+ 105, 112, 65, 39, 29, 24, 14, 17, 8, 3, 1, 73,
+ 77, 72, 0, 70, 6, 12, 66, 68, 3, 7, 6, 7, 14,
+ 8, 3, 53, 38, 23, 10, 1, 79, 90, 105, 112 },
+
+ {
+
+ 53,
+ 7, 77, 53, 7, 77, 92, 75, 18, 11, 4, 11, 21,
+ 43, 57, 17, 81, 67, 0, 1, 5, 2, 66, 66, 66,
+ 18, 13, 73, 87, 101, 70, 82, 71, 0, 1, 5, 76,
+ 73, 11, 10, 66, 75, 83, 2, 77, 83, 96, 6, 70,
+ 77, 4, 77, 77, 88, 64, 75, 71, 83, 65, 2, 22,
+ 0, 0, 0, 71, 91, 97, 0, 6, 65, 32, 4, 82, 78,
+ 68, 74, 15, 9, 72, 68, 85, 69, 83, 79, 82, 80,
+ 82, 85, 87, 24, 65, 65, 83, 70, 89, 79, 88, 5,
+ 64, 0, 71, 14, 68, 65, 76, 86, 74, 77, 74, 66,
+ 72, 81, 75, 65, 87, 77, 34, 24, 64, 69, 2, 71,
+ 66, 1, 2, 10, 2, 67, 79, 76, 71, 3, 4, 72, 9,
+ 9, 5, 5, 6, 6, 11, 7, 5, 5, 4, 2, 6, 7, 77,
+ 66, 5, 69, 11, 16, 20, 14, 5, 19, 12, 15, 2,
+ 2, 7, 6, 18, 89, 13, 23, 20, 33, 41, 26, 28,
+ 42, 41, 21, 33, 35, 30, 62, 8, 26, 17, 22, 29,
+ 24, 32, 35, 43, 13, 16, 24, 22, 62, 69, 47,
+ 51, 46, 44, 43, 39, 36, 35, 31, 23, 20, 16,
+ 10, 6, 77, 75, 74, 87, 11, 10, 7, 7, 66, 67,
+ 67, 4, 74, 74, 76, 86, 79, 103, 83, 11, 11,
+ 10, 4, 0, 68, 72, 74, 84, 74, 28, 17, 11, 2,
+ 5, 69, 75, 79, 87, 1, 32, 24, 19, 17, 11, 3,
+ 64, 67, 74, 68, 41, 30, 19, 5, 11, 64, 73, 79,
+ 8, 51, 43, 34, 24, 23, 4, 66, 68, 73, 62, 99,
+ 95, 85, 93, 92, 83, 87, 83, 80, 84, 82, 80,
+ 82, 82, 85, 89, 79, 81, 77, 77, 73, 71, 70,
+ 70, 68, 67, 69, 70, 77, 3, 4, 6, 2, 0, 4, 3,
+ 4, 2, 1, 1, 1, 64, 68, 72, 68, 3, 77, 6, 16,
+ 6, 13, 7, 10, 13, 7, 6, 4, 10, 0, 69, 72, 46,
+ 47, 46, 43, 41, 47, 44, 45, 45, 41, 47, 43,
+ 36, 29, 14, 48, 48, 45, 41, 35, 33, 22, 18,
+ 14, 12, 2, 65, 70, 78, 50, 51, 50, 46, 39, 41,
+ 37, 31, 28, 23, 19, 11, 5, 66, 75, 0, 1, 77,
+ 14, 13, 9, 2, 3, 2, 67, 64, 70, 72, 85, 80,
+ 88, 96, 0, 75, 80, 78, 73, 64, 72, 5, 13, 69,
+ 70, 4, 8, 4, 7, 15, 4, 2, 52, 36, 20, 7, 66,
+ 83, 95, 109, 115, 64, 39, 29, 24, 14, 18, 8,
+ 3, 1, 73, 77, 72, 0, 70, 6, 13, 66, 68, 4, 8,
+ 6, 7, 15, 8, 3, 52, 36, 20, 7, 66, 83, 95,
+ 109, 115 },
+
+ {
+
+ 52,
+ 7, 77, 52, 7, 77, 90, 73, 18, 11, 3, 10, 19,
+ 42, 57, 17, 79, 67, 0, 2, 5, 2, 66, 67, 68,
+ 17, 11, 76, 89, 103, 67, 81, 70, 0, 2, 5, 76,
+ 72, 11, 10, 66, 75, 82, 2, 77, 83, 96, 6, 70,
+ 77, 4, 77, 76, 88, 64, 75, 71, 83, 65, 2, 22,
+ 0, 0, 0, 71, 91, 97, 1, 5, 65, 31, 3, 82, 75,
+ 67, 73, 17, 11, 70, 66, 83, 67, 82, 78, 79,
+ 80, 82, 84, 86, 24, 65, 64, 81, 70, 88, 79,
+ 86, 5, 65, 64, 71, 14, 68, 64, 76, 85, 73, 77,
+ 73, 65, 71, 79, 73, 64, 85, 76, 39, 28, 2, 66,
+ 3, 71, 65, 2, 2, 10, 3, 67, 78, 74, 71, 1, 2,
+ 69, 9, 9, 5, 5, 6, 7, 12, 8, 5, 5, 4, 2, 6, 8,
+ 78, 65, 5, 70, 11, 15, 20, 14, 5, 19, 12, 15,
+ 1, 3, 6, 6, 18, 89, 13, 22, 19, 31, 38, 25,
+ 27, 40, 39, 19, 30, 33, 24, 62, 5, 24, 15, 20,
+ 26, 21, 29, 31, 39, 11, 14, 20, 20, 62, 71,
+ 45, 49, 44, 42, 41, 37, 34, 33, 29, 21, 18,
+ 14, 9, 4, 79, 75, 74, 87, 10, 9, 6, 6, 67, 68,
+ 68, 2, 75, 75, 77, 87, 80, 102, 81, 12, 12,
+ 10, 4, 1, 68, 72, 73, 82, 73, 28, 17, 12, 3,
+ 6, 68, 74, 78, 85, 2, 33, 25, 20, 17, 12, 4,
+ 0, 67, 74, 68, 41, 30, 18, 5, 11, 64, 73, 78,
+ 8, 50, 42, 33, 23, 23, 4, 66, 68, 72, 62, 98,
+ 93, 83, 91, 91, 82, 85, 81, 79, 82, 80, 78,
+ 81, 81, 84, 88, 77, 81, 77, 76, 73, 70, 70,
+ 70, 68, 67, 69, 70, 78, 4, 4, 7, 2, 0, 4, 3,
+ 4, 2, 1, 1, 1, 64, 68, 72, 68, 3, 78, 5, 16,
+ 5, 12, 7, 9, 13, 7, 5, 4, 10, 64, 69, 73, 45,
+ 46, 45, 41, 39, 45, 42, 43, 42, 38, 44, 40,
+ 33, 27, 12, 44, 44, 41, 36, 32, 30, 20, 16,
+ 12, 10, 1, 67, 70, 79, 48, 48, 48, 44, 36, 38,
+ 35, 28, 26, 21, 17, 8, 3, 68, 76, 0, 0, 79,
+ 13, 11, 7, 0, 1, 0, 69, 65, 71, 73, 85, 81,
+ 89, 97, 64, 76, 81, 78, 73, 0, 71, 6, 14, 68,
+ 69, 4, 8, 4, 8, 16, 4, 2, 51, 34, 17, 4, 69,
+ 87, 99, 113, 118, 64, 39, 29, 24, 14, 18, 8,
+ 4, 2, 72, 77, 72, 1, 70, 7, 14, 66, 68, 4, 8,
+ 6, 8, 15, 8, 3, 51, 34, 17, 4, 69, 87, 99,
+ 113, 118 },
+
+ {
+
+ 51,
+ 7, 78, 51, 7, 78, 88, 72, 19, 11, 3, 8, 17,
+ 41, 57, 17, 78, 67, 1, 2, 5, 1, 65, 67, 69,
+ 15, 8, 80, 92, 104, 65, 80, 70, 1, 2, 5, 77,
+ 72, 11, 9, 66, 75, 82, 2, 77, 82, 95, 6, 69,
+ 76, 4, 76, 76, 88, 64, 75, 71, 83, 65, 2, 22,
+ 0, 0, 0, 70, 91, 97, 1, 5, 66, 31, 3, 82, 73,
+ 66, 72, 18, 14, 69, 65, 81, 66, 81, 77, 77,
+ 80, 82, 84, 86, 24, 65, 0, 78, 70, 87, 78, 84,
+ 4, 65, 64, 72, 15, 68, 64, 75, 85, 73, 76, 72,
+ 65, 70, 78, 72, 64, 84, 75, 44, 33, 5, 0, 3,
+ 71, 65, 2, 2, 10, 3, 66, 78, 72, 70, 64, 0,
+ 66, 9, 9, 5, 5, 7, 7, 14, 9, 5, 5, 4, 2, 7, 9,
+ 79, 65, 5, 70, 10, 14, 19, 14, 4, 19, 12, 15,
+ 64, 3, 5, 5, 17, 89, 13, 22, 19, 29, 35, 24,
+ 25, 37, 36, 17, 28, 31, 17, 62, 1, 22, 14, 18,
+ 22, 19, 26, 27, 34, 8, 12, 17, 18, 62, 73, 43,
+ 47, 42, 39, 38, 35, 31, 31, 27, 19, 15, 12, 7,
+ 3, 80, 75, 74, 88, 9, 8, 5, 5, 68, 69, 69, 0,
+ 76, 75, 78, 88, 80, 102, 80, 13, 12, 10, 4, 2,
+ 67, 71, 73, 80, 72, 29, 17, 12, 4, 7, 67, 72,
+ 76, 83, 3, 33, 25, 20, 18, 13, 4, 0, 67, 73,
+ 68, 42, 30, 17, 5, 11, 64, 73, 78, 7, 50, 42,
+ 32, 22, 22, 4, 66, 68, 72, 62, 97, 92, 82, 90,
+ 89, 81, 83, 79, 78, 81, 78, 76, 81, 80, 84,
+ 87, 75, 80, 77, 76, 72, 70, 70, 70, 68, 68,
+ 69, 70, 78, 4, 5, 8, 2, 0, 5, 4, 4, 3, 2, 1,
+ 0, 64, 68, 72, 68, 4, 79, 4, 16, 4, 11, 6, 9,
+ 13, 6, 5, 4, 9, 66, 70, 74, 43, 45, 44, 40,
+ 37, 43, 40, 41, 40, 36, 41, 38, 30, 24, 10,
+ 40, 40, 37, 32, 28, 27, 17, 14, 9, 8, 64, 68,
+ 71, 80, 46, 46, 45, 41, 33, 36, 32, 25, 23,
+ 18, 14, 5, 1, 69, 78, 64, 64, 80, 11, 10, 5,
+ 65, 0, 65, 71, 66, 73, 74, 86, 82, 90, 98, 66,
+ 77, 82, 78, 72, 0, 71, 7, 15, 68, 69, 5, 9, 4,
+ 8, 17, 4, 2, 50, 32, 15, 1, 72, 91, 103, 117,
+ 121, 64, 39, 29, 24, 14, 19, 9, 4, 2, 72, 76,
+ 71, 1, 69, 8, 15, 65, 67, 4, 8, 6, 8, 16, 8,
+ 3, 50, 32, 15, 1, 72, 91, 103, 117, 121 },
+
+ {
+
+ 50,
+ 7, 78, 50, 7, 78, 86, 70, 19, 11, 2, 7, 15,
+ 40, 57, 17, 76, 67, 1, 3, 4, 1, 65, 68, 71,
+ 14, 6, 83, 94, 106, 1, 79, 70, 1, 3, 4, 77,
+ 71, 11, 9, 66, 74, 81, 2, 77, 82, 95, 6, 69,
+ 76, 4, 76, 75, 87, 64, 75, 71, 83, 64, 2, 22,
+ 0, 0, 0, 70, 91, 97, 2, 4, 66, 30, 2, 82, 70,
+ 65, 71, 20, 16, 67, 0, 80, 64, 79, 76, 75, 80,
+ 81, 83, 86, 24, 65, 1, 76, 70, 86, 78, 82, 4,
+ 66, 65, 72, 15, 68, 0, 75, 84, 72, 76, 71, 64,
+ 69, 76, 70, 0, 83, 74, 49, 37, 8, 3, 3, 70,
+ 65, 3, 2, 10, 4, 66, 77, 70, 70, 66, 65, 0, 9,
+ 10, 5, 5, 7, 8, 15, 10, 6, 4, 4, 2, 7, 9, 80,
+ 65, 5, 71, 10, 13, 19, 13, 4, 19, 12, 15, 65,
+ 3, 4, 4, 17, 89, 13, 21, 18, 27, 32, 23, 23,
+ 35, 34, 15, 25, 29, 11, 62, 65, 20, 12, 15,
+ 19, 16, 23, 22, 30, 5, 10, 13, 16, 62, 74, 41,
+ 45, 40, 37, 36, 33, 29, 29, 25, 16, 13, 10, 5,
+ 1, 82, 75, 74, 88, 8, 7, 4, 3, 69, 70, 70, 64,
+ 77, 76, 79, 88, 81, 101, 78, 14, 13, 10, 5, 2,
+ 67, 71, 72, 78, 70, 29, 18, 13, 4, 8, 66, 71,
+ 75, 81, 3, 34, 26, 21, 18, 14, 5, 0, 66, 73,
+ 68, 42, 30, 17, 4, 11, 64, 73, 77, 7, 49, 41,
+ 31, 21, 22, 4, 65, 68, 72, 62, 95, 90, 81, 88,
+ 88, 80, 82, 78, 77, 79, 77, 74, 80, 79, 83,
+ 86, 73, 80, 77, 76, 72, 69, 70, 71, 68, 68,
+ 69, 70, 79, 5, 5, 9, 2, 0, 5, 4, 4, 3, 2, 1,
+ 0, 64, 68, 72, 68, 4, 80, 3, 16, 3, 11, 5, 8,
+ 13, 5, 4, 4, 9, 67, 70, 75, 42, 44, 43, 38,
+ 35, 41, 38, 38, 37, 33, 38, 35, 27, 22, 8, 35,
+ 36, 33, 27, 25, 24, 15, 12, 7, 6, 66, 70, 72,
+ 80, 43, 43, 43, 39, 30, 33, 30, 22, 21, 16,
+ 12, 2, 64, 71, 79, 65, 64, 82, 10, 8, 4, 67,
+ 65, 67, 72, 67, 74, 75, 87, 84, 91, 98, 67,
+ 79, 84, 77, 72, 1, 70, 8, 16, 68, 68, 5, 9, 4,
+ 9, 18, 4, 2, 49, 30, 12, 65, 76, 95, 107, 121,
+ 124, 0, 39, 29, 24, 14, 19, 9, 4, 2, 71, 76,
+ 71, 2, 69, 9, 16, 65, 67, 5, 9, 6, 8, 16, 8,
+ 3, 49, 30, 12, 65, 76, 95, 107, 121, 124 },
+
+ {
+
+ 48,
+ 6, 78, 48, 6, 78, 85, 69, 19, 11, 2, 5, 12,
+ 39, 57, 16, 74, 68, 1, 4, 4, 0, 65, 69, 73,
+ 13, 3, 87, 97, 108, 4, 78, 70, 1, 4, 4, 78,
+ 70, 11, 8, 66, 74, 81, 1, 78, 82, 95, 6, 69,
+ 76, 4, 76, 75, 87, 64, 75, 71, 83, 64, 2, 22,
+ 0, 0, 0, 69, 92, 97, 2, 3, 66, 29, 2, 83, 68,
+ 64, 70, 21, 18, 66, 1, 78, 0, 78, 75, 73, 80,
+ 81, 83, 86, 24, 65, 2, 74, 70, 86, 77, 80, 4,
+ 66, 65, 73, 15, 68, 0, 74, 83, 71, 75, 71, 64,
+ 69, 75, 69, 0, 82, 73, 53, 41, 11, 5, 3, 70,
+ 65, 3, 1, 10, 4, 66, 77, 68, 70, 68, 67, 2, 9,
+ 10, 5, 5, 7, 8, 17, 10, 6, 4, 3, 2, 8, 10, 82,
+ 65, 5, 71, 9, 11, 18, 13, 3, 19, 12, 14, 66,
+ 3, 3, 3, 16, 89, 12, 20, 17, 25, 28, 21, 21,
+ 33, 31, 12, 23, 27, 4, 62, 69, 18, 10, 13, 16,
+ 14, 19, 18, 26, 2, 8, 10, 14, 62, 76, 39, 42,
+ 37, 34, 33, 30, 27, 26, 23, 14, 11, 8, 3, 64,
+ 84, 75, 75, 89, 7, 5, 3, 2, 70, 72, 72, 66,
+ 78, 76, 80, 89, 82, 101, 77, 15, 13, 10, 5, 3,
+ 66, 70, 72, 76, 69, 29, 18, 13, 5, 9, 65, 69,
+ 73, 80, 4, 34, 26, 21, 19, 15, 5, 0, 66, 72,
+ 69, 43, 30, 16, 4, 11, 64, 73, 77, 7, 49, 40,
+ 30, 20, 22, 4, 65, 68, 72, 62, 94, 89, 80, 87,
+ 86, 79, 80, 76, 76, 78, 75, 72, 80, 79, 83,
+ 85, 72, 80, 77, 76, 71, 69, 70, 71, 68, 69,
+ 69, 70, 79, 5, 5, 10, 2, 64, 5, 4, 3, 3, 2, 1,
+ 64, 64, 68, 72, 68, 5, 81, 2, 16, 1, 10, 4, 7,
+ 12, 4, 3, 4, 9, 68, 70, 77, 41, 42, 42, 36,
+ 33, 39, 36, 36, 35, 30, 35, 32, 24, 19, 6, 31,
+ 32, 28, 22, 21, 20, 12, 9, 4, 4, 68, 71, 73,
+ 81, 41, 41, 40, 36, 27, 30, 27, 19, 18, 13, 9,
+ 64, 66, 73, 81, 66, 65, 84, 8, 7, 2, 69, 67,
+ 69, 74, 69, 76, 77, 88, 85, 92, 99, 69, 80,
+ 85, 77, 72, 1, 70, 9, 17, 68, 68, 6, 10, 4, 9,
+ 18, 4, 1, 48, 28, 9, 68, 79, 99, 112, 126,
+ 126, 0, 39, 29, 24, 14, 20, 9, 4, 2, 71, 76,
+ 71, 2, 69, 9, 16, 65, 67, 5, 9, 6, 8, 17, 8,
+ 2, 48, 28, 9, 68, 79, 99, 112, 126, 126 },
+
+ {
+
+ 47,
+ 6, 78, 47, 6, 78, 83, 68, 20, 11, 2, 4, 10,
+ 38, 58, 16, 72, 68, 2, 5, 4, 64, 65, 69, 74,
+ 12, 1, 91, 100, 109, 7, 77, 69, 2, 5, 4, 78,
+ 69, 11, 8, 65, 74, 80, 1, 78, 82, 95, 6, 69,
+ 75, 4, 76, 75, 87, 64, 75, 71, 82, 64, 2, 22,
+ 0, 0, 0, 68, 92, 97, 2, 2, 66, 28, 2, 83, 66,
+ 1, 69, 23, 20, 64, 2, 76, 2, 77, 73, 70, 80,
+ 81, 83, 85, 24, 65, 3, 72, 69, 85, 76, 78, 4,
+ 66, 65, 73, 15, 68, 0, 73, 82, 70, 74, 70, 0,
+ 68, 74, 67, 1, 80, 72, 58, 46, 15, 8, 4, 70,
+ 64, 4, 1, 10, 4, 65, 76, 65, 70, 70, 68, 5, 9,
+ 10, 5, 5, 7, 9, 19, 11, 6, 4, 3, 2, 9, 11, 83,
+ 64, 5, 71, 8, 10, 17, 13, 2, 19, 12, 14, 67,
+ 4, 2, 3, 15, 89, 12, 20, 17, 23, 25, 20, 20,
+ 31, 29, 10, 21, 25, 65, 62, 72, 16, 8, 11, 13,
+ 12, 16, 14, 22, 0, 6, 7, 12, 62, 78, 37, 40,
+ 35, 32, 31, 28, 25, 24, 21, 12, 9, 7, 2, 65,
+ 86, 75, 75, 89, 7, 4, 2, 1, 71, 73, 73, 68,
+ 79, 76, 81, 90, 82, 100, 76, 16, 13, 10, 5, 4,
+ 65, 69, 71, 73, 68, 29, 18, 13, 6, 10, 64, 67,
+ 71, 78, 5, 35, 27, 22, 20, 16, 5, 1, 66, 71,
+ 69, 44, 30, 15, 4, 12, 0, 73, 76, 7, 49, 40,
+ 29, 19, 22, 4, 65, 68, 71, 62, 93, 88, 78, 85,
+ 84, 78, 78, 74, 75, 76, 73, 70, 79, 78, 82,
+ 84, 70, 79, 76, 75, 70, 68, 70, 71, 68, 70,
+ 69, 70, 79, 5, 6, 11, 3, 64, 6, 4, 3, 3, 2, 1,
+ 65, 64, 67, 71, 68, 6, 81, 1, 16, 0, 9, 4, 7,
+ 12, 4, 2, 4, 9, 69, 70, 78, 40, 41, 42, 35,
+ 31, 37, 34, 34, 33, 28, 32, 29, 21, 16, 4, 27,
+ 28, 24, 17, 18, 17, 9, 7, 2, 3, 69, 72, 73,
+ 82, 39, 39, 38, 34, 25, 28, 25, 16, 16, 11, 7,
+ 66, 68, 75, 83, 66, 66, 85, 7, 6, 0, 71, 68,
+ 70, 76, 70, 78, 78, 88, 86, 92, 100, 71, 81,
+ 86, 77, 71, 2, 70, 10, 19, 67, 67, 7, 11, 4,
+ 10, 19, 4, 1, 47, 26, 7, 71, 82, 103, 116,
+ 126, 126, 0, 39, 29, 25, 15, 21, 10, 5, 3, 71,
+ 76, 71, 2, 68, 10, 17, 65, 66, 5, 9, 6, 9, 18,
+ 8, 2, 47, 26, 7, 71, 82, 103, 116, 126, 126 },
+
+ {
+
+ 46,
+ 6, 78, 46, 6, 78, 81, 66, 20, 11, 1, 3, 8, 37,
+ 58, 16, 70, 68, 2, 6, 3, 64, 65, 70, 76, 11,
+ 65, 94, 102, 111, 10, 76, 69, 2, 6, 3, 78, 68,
+ 11, 7, 65, 73, 79, 1, 78, 82, 95, 6, 69, 75,
+ 4, 76, 74, 86, 64, 75, 71, 82, 0, 2, 22, 0, 0,
+ 0, 68, 92, 97, 3, 1, 66, 27, 1, 83, 0, 2, 68,
+ 25, 22, 0, 4, 75, 3, 75, 72, 68, 80, 80, 82,
+ 85, 24, 65, 4, 70, 69, 84, 76, 76, 4, 67, 66,
+ 74, 15, 68, 1, 73, 81, 69, 74, 69, 0, 67, 72,
+ 66, 2, 79, 71, 62, 50, 18, 11, 4, 69, 64, 4,
+ 1, 10, 5, 65, 76, 0, 70, 72, 70, 8, 9, 11, 5,
+ 5, 7, 9, 20, 12, 7, 3, 3, 2, 9, 11, 84, 64, 5,
+ 72, 8, 9, 17, 12, 2, 19, 12, 14, 68, 4, 1, 2,
+ 15, 89, 12, 19, 16, 21, 22, 19, 18, 29, 27, 8,
+ 18, 23, 71, 62, 76, 14, 6, 8, 10, 9, 13, 9,
+ 18, 66, 4, 3, 10, 62, 79, 35, 38, 33, 30, 29,
+ 26, 23, 22, 19, 9, 7, 5, 0, 67, 88, 75, 75,
+ 90, 6, 3, 1, 64, 72, 74, 74, 69, 80, 77, 82,
+ 90, 83, 99, 74, 17, 14, 10, 6, 4, 65, 69, 70,
+ 71, 66, 29, 19, 14, 6, 11, 0, 66, 70, 76, 5,
+ 36, 28, 23, 20, 17, 6, 1, 65, 71, 69, 44, 30,
+ 15, 3, 12, 0, 73, 76, 7, 48, 39, 28, 18, 22,
+ 4, 64, 68, 71, 62, 91, 86, 77, 83, 83, 77, 77,
+ 73, 74, 74, 72, 68, 78, 77, 82, 83, 68, 79,
+ 76, 75, 70, 68, 70, 72, 68, 70, 69, 70, 80, 6,
+ 6, 12, 3, 64, 6, 4, 3, 3, 2, 1, 65, 64, 67,
+ 71, 68, 6, 82, 0, 16, 64, 9, 3, 6, 12, 3, 1,
+ 4, 9, 70, 70, 79, 39, 40, 41, 33, 29, 35, 32,
+ 31, 30, 25, 29, 26, 18, 14, 2, 22, 24, 20, 12,
+ 15, 14, 7, 5, 64, 1, 71, 74, 74, 82, 36, 36,
+ 35, 32, 22, 25, 22, 13, 14, 8, 5, 69, 70, 77,
+ 84, 67, 66, 87, 6, 4, 64, 73, 70, 72, 77, 71,
+ 79, 79, 89, 88, 93, 100, 72, 83, 88, 76, 71,
+ 3, 69, 11, 20, 67, 66, 7, 11, 4, 10, 20, 4, 1,
+ 46, 24, 4, 74, 86, 107, 120, 126, 126, 1, 39,
+ 29, 25, 15, 21, 10, 5, 3, 70, 76, 71, 3, 68,
+ 11, 18, 65, 66, 6, 10, 6, 9, 18, 8, 2, 46, 24,
+ 4, 74, 86, 107, 120, 126, 126 },
+
+ {
+
+ 45,
+ 6, 79, 45, 6, 79, 79, 65, 21, 11, 1, 1, 6, 36,
+ 58, 16, 69, 68, 3, 6, 3, 65, 64, 70, 77, 9,
+ 67, 98, 105, 112, 12, 75, 69, 3, 6, 3, 79, 68,
+ 11, 7, 65, 73, 79, 1, 78, 81, 94, 6, 68, 74,
+ 4, 75, 74, 86, 64, 75, 71, 82, 0, 2, 22, 0, 0,
+ 0, 67, 92, 97, 3, 1, 67, 27, 1, 83, 2, 3, 67,
+ 26, 25, 2, 5, 73, 5, 74, 71, 66, 80, 80, 82,
+ 85, 24, 65, 5, 67, 69, 83, 75, 74, 3, 67, 66,
+ 74, 16, 68, 1, 72, 81, 69, 73, 68, 1, 66, 71,
+ 64, 2, 78, 70, 62, 55, 21, 14, 4, 69, 64, 5,
+ 1, 10, 5, 64, 75, 2, 69, 74, 72, 11, 9, 11, 5,
+ 5, 8, 10, 22, 13, 7, 3, 3, 2, 10, 12, 85, 64,
+ 5, 72, 7, 8, 16, 12, 1, 19, 12, 14, 70, 4, 0,
+ 1, 14, 89, 12, 19, 16, 19, 19, 18, 16, 26, 24,
+ 6, 16, 21, 78, 62, 79, 12, 5, 6, 6, 7, 10, 5,
+ 13, 69, 2, 0, 8, 62, 81, 33, 36, 31, 27, 26,
+ 24, 20, 20, 17, 7, 4, 3, 65, 68, 89, 75, 75,
+ 90, 5, 2, 0, 65, 73, 75, 75, 71, 81, 77, 83,
+ 91, 83, 99, 73, 18, 14, 10, 6, 5, 64, 68, 70,
+ 69, 65, 30, 19, 14, 7, 12, 1, 64, 68, 74, 6,
+ 36, 28, 23, 21, 18, 6, 1, 65, 70, 69, 45, 30,
+ 14, 3, 12, 0, 73, 75, 6, 48, 39, 27, 17, 21,
+ 4, 64, 68, 71, 62, 90, 85, 76, 82, 81, 76, 75,
+ 71, 73, 73, 70, 66, 78, 76, 81, 82, 66, 78,
+ 76, 75, 69, 67, 70, 72, 68, 71, 69, 70, 80, 6,
+ 7, 13, 3, 64, 7, 5, 3, 4, 3, 1, 66, 64, 67,
+ 71, 68, 7, 83, 64, 16, 65, 8, 2, 6, 12, 2, 1,
+ 4, 8, 72, 71, 80, 37, 39, 40, 32, 27, 33, 30,
+ 29, 28, 23, 26, 24, 15, 11, 0, 18, 20, 16, 8,
+ 11, 11, 4, 3, 66, 64, 73, 75, 75, 83, 34, 34,
+ 33, 29, 19, 23, 20, 10, 11, 6, 2, 72, 72, 78,
+ 86, 68, 67, 88, 4, 3, 66, 75, 71, 74, 79, 72,
+ 81, 80, 90, 89, 94, 101, 74, 84, 89, 76, 70,
+ 3, 69, 12, 21, 67, 66, 8, 12, 4, 11, 21, 4, 1,
+ 45, 22, 2, 77, 89, 111, 124, 126, 126, 1, 39,
+ 29, 25, 15, 22, 11, 5, 3, 70, 75, 70, 3, 67,
+ 12, 19, 64, 65, 6, 10, 6, 9, 19, 8, 2, 45, 22,
+ 2, 77, 89, 111, 124, 126, 126 },
+
+ {
+
+ 43,
+ 6, 79, 43, 6, 79, 78, 0, 21, 11, 0, 0, 4, 35,
+ 58, 15, 67, 68, 3, 7, 3, 65, 64, 71, 79, 8,
+ 70, 101, 107, 114, 15, 74, 69, 3, 7, 3, 79,
+ 67, 11, 6, 65, 73, 78, 1, 78, 81, 94, 6, 68,
+ 74, 4, 75, 73, 86, 64, 75, 71, 82, 0, 2, 22,
+ 0, 0, 0, 67, 93, 97, 4, 0, 67, 26, 0, 83, 5,
+ 4, 66, 28, 27, 3, 7, 71, 6, 73, 70, 64, 80,
+ 80, 81, 85, 24, 65, 6, 65, 69, 83, 75, 72, 3,
+ 68, 67, 75, 16, 68, 2, 72, 80, 68, 73, 67, 1,
+ 66, 69, 0, 3, 77, 69, 62, 59, 24, 17, 4, 69,
+ 64, 5, 1, 10, 6, 64, 75, 4, 69, 76, 74, 13, 9,
+ 11, 5, 5, 8, 10, 23, 14, 7, 3, 3, 2, 10, 13,
+ 86, 64, 5, 73, 7, 7, 16, 12, 1, 19, 12, 14,
+ 71, 4, 64, 0, 14, 89, 11, 18, 15, 17, 15, 17,
+ 14, 24, 22, 4, 13, 19, 84, 62, 83, 10, 3, 4,
+ 3, 4, 6, 1, 9, 72, 0, 67, 6, 62, 83, 31, 34,
+ 28, 25, 24, 22, 18, 18, 15, 5, 2, 1, 67, 70,
+ 91, 75, 75, 91, 4, 1, 64, 66, 74, 77, 76, 73,
+ 82, 78, 84, 92, 84, 98, 71, 19, 15, 10, 6, 6,
+ 64, 68, 69, 67, 64, 30, 19, 15, 8, 13, 2, 0,
+ 67, 73, 7, 37, 29, 24, 21, 19, 7, 1, 65, 70,
+ 69, 45, 30, 13, 3, 12, 0, 73, 75, 6, 47, 38,
+ 26, 16, 21, 4, 64, 68, 71, 62, 89, 83, 75, 80,
+ 80, 75, 73, 69, 72, 71, 68, 64, 77, 75, 81,
+ 81, 65, 78, 76, 75, 69, 67, 70, 72, 68, 71,
+ 69, 70, 81, 7, 7, 14, 3, 64, 7, 5, 3, 4, 3, 1,
+ 66, 64, 67, 71, 68, 7, 84, 65, 16, 66, 7, 1,
+ 5, 12, 1, 0, 4, 8, 73, 71, 81, 36, 38, 39, 30,
+ 25, 31, 28, 27, 25, 20, 23, 21, 12, 9, 65, 14,
+ 16, 12, 3, 8, 7, 2, 0, 69, 66, 75, 77, 76, 84,
+ 32, 31, 30, 27, 16, 20, 17, 7, 9, 3, 0, 75,
+ 74, 80, 87, 69, 68, 90, 3, 1, 68, 77, 73, 76,
+ 81, 73, 82, 81, 91, 90, 95, 102, 75, 85, 90,
+ 76, 70, 4, 68, 13, 22, 67, 65, 8, 12, 4, 11,
+ 22, 4, 1, 44, 20, 64, 80, 92, 115, 126, 126,
+ 126, 1, 39, 29, 25, 15, 22, 11, 5, 3, 69, 75,
+ 70, 4, 67, 12, 20, 64, 65, 6, 10, 6, 9, 19, 8,
+ 2, 44, 20, 64, 80, 92, 115, 126, 126, 126 },
+
+ {
+
+ 42,
+ 6, 79, 42, 6, 79, 76, 1, 21, 11, 0, 64, 2, 34,
+ 58, 15, 65, 68, 3, 8, 2, 66, 64, 72, 81, 7,
+ 72, 105, 110, 116, 18, 73, 68, 3, 8, 2, 79,
+ 66, 11, 6, 65, 72, 77, 1, 78, 81, 94, 6, 68,
+ 74, 4, 75, 73, 85, 64, 75, 71, 82, 1, 2, 22,
+ 0, 0, 0, 66, 93, 97, 4, 64, 67, 25, 0, 83, 7,
+ 5, 65, 30, 29, 5, 8, 70, 8, 71, 69, 2, 80, 79,
+ 81, 84, 24, 65, 7, 0, 69, 82, 74, 70, 3, 68,
+ 67, 75, 16, 68, 2, 71, 79, 67, 72, 66, 2, 65,
+ 68, 2, 4, 75, 68, 62, 62, 27, 20, 5, 68, 0, 6,
+ 1, 10, 6, 64, 74, 6, 69, 78, 76, 16, 9, 12, 5,
+ 5, 8, 11, 25, 15, 8, 2, 3, 2, 11, 13, 87, 0,
+ 5, 73, 6, 6, 15, 11, 0, 19, 12, 14, 72, 5, 65,
+ 0, 13, 89, 11, 17, 14, 15, 12, 16, 13, 22, 20,
+ 2, 11, 17, 90, 62, 86, 8, 1, 1, 0, 2, 3, 67,
+ 5, 74, 65, 70, 4, 62, 84, 29, 32, 26, 23, 22,
+ 20, 16, 16, 13, 2, 0, 64, 68, 72, 93, 75, 75,
+ 91, 3, 0, 65, 68, 75, 78, 77, 74, 83, 78, 85,
+ 92, 85, 97, 70, 20, 15, 10, 7, 6, 0, 67, 68,
+ 65, 1, 30, 20, 15, 8, 14, 3, 2, 65, 71, 7, 38,
+ 30, 25, 22, 20, 7, 2, 64, 69, 69, 46, 30, 13,
+ 2, 12, 0, 73, 74, 6, 47, 37, 25, 15, 21, 4, 0,
+ 68, 70, 62, 87, 82, 73, 78, 78, 74, 72, 68,
+ 71, 69, 67, 1, 76, 74, 80, 80, 0, 78, 76, 74,
+ 68, 66, 70, 73, 68, 72, 69, 70, 81, 7, 7, 15,
+ 3, 64, 7, 5, 3, 4, 3, 1, 67, 64, 67, 71, 68,
+ 8, 85, 66, 16, 67, 7, 1, 4, 12, 1, 64, 4, 8,
+ 74, 71, 82, 35, 37, 38, 28, 23, 29, 26, 24,
+ 23, 17, 20, 18, 9, 6, 67, 9, 12, 8, 65, 5, 4,
+ 64, 65, 71, 68, 76, 78, 76, 84, 29, 29, 28,
+ 25, 13, 17, 15, 4, 7, 1, 65, 78, 76, 82, 89,
+ 69, 68, 92, 2, 0, 69, 79, 75, 78, 82, 74, 84,
+ 82, 91, 92, 96, 102, 77, 87, 92, 75, 70, 5,
+ 68, 14, 23, 66, 64, 9, 13, 4, 12, 23, 4, 1,
+ 43, 18, 67, 83, 96, 119, 126, 126, 126, 2, 39,
+ 29, 25, 15, 23, 11, 6, 4, 69, 75, 70, 4, 67,
+ 13, 21, 64, 65, 7, 11, 6, 10, 20, 8, 2, 43,
+ 18, 67, 83, 96, 119, 126, 126, 126 },
+
+ {
+
+ 41,
+ 6, 79, 41, 6, 79, 74, 3, 22, 11, 64, 66, 0,
+ 33, 58, 15, 0, 68, 4, 9, 2, 66, 64, 72, 82, 6,
+ 75, 108, 112, 117, 21, 72, 68, 4, 9, 2, 80,
+ 65, 11, 5, 65, 72, 77, 1, 78, 81, 94, 6, 68,
+ 73, 4, 75, 72, 85, 64, 75, 71, 82, 1, 2, 22,
+ 0, 0, 0, 66, 93, 97, 5, 65, 67, 24, 64, 83,
+ 10, 6, 64, 31, 31, 6, 10, 68, 9, 70, 68, 4,
+ 80, 79, 80, 84, 24, 65, 8, 2, 69, 81, 74, 68,
+ 3, 69, 68, 76, 16, 68, 3, 71, 78, 66, 72, 65,
+ 2, 64, 66, 3, 4, 74, 67, 62, 62, 30, 23, 5,
+ 68, 0, 6, 1, 10, 7, 0, 74, 8, 69, 80, 78, 19,
+ 9, 12, 5, 5, 8, 11, 26, 16, 8, 2, 3, 2, 11,
+ 14, 88, 0, 5, 74, 6, 5, 15, 11, 0, 19, 12, 14,
+ 73, 5, 66, 64, 13, 89, 11, 17, 14, 13, 9, 15,
+ 11, 20, 17, 0, 8, 15, 97, 62, 90, 6, 64, 64,
+ 66, 64, 0, 71, 1, 77, 67, 74, 2, 62, 86, 27,
+ 30, 24, 20, 19, 18, 14, 14, 11, 0, 65, 66, 70,
+ 73, 95, 75, 75, 92, 2, 64, 66, 69, 76, 79, 78,
+ 76, 84, 79, 86, 93, 85, 97, 68, 21, 16, 10, 7,
+ 7, 0, 67, 68, 0, 2, 30, 20, 16, 9, 15, 4, 3,
+ 64, 69, 8, 38, 30, 25, 22, 21, 8, 2, 64, 69,
+ 69, 46, 30, 12, 2, 12, 0, 73, 74, 6, 46, 37,
+ 24, 14, 21, 4, 0, 68, 70, 62, 86, 80, 72, 77,
+ 77, 73, 70, 66, 70, 68, 65, 3, 76, 73, 80, 79,
+ 2, 77, 76, 74, 68, 66, 70, 73, 68, 72, 69, 70,
+ 82, 8, 8, 16, 3, 64, 8, 5, 3, 4, 3, 1, 67, 64,
+ 67, 71, 68, 8, 86, 67, 16, 68, 6, 0, 4, 12, 0,
+ 65, 4, 8, 75, 71, 83, 34, 36, 37, 27, 21, 27,
+ 24, 22, 20, 15, 17, 15, 6, 4, 69, 5, 8, 4, 70,
+ 1, 1, 66, 67, 74, 70, 78, 80, 77, 85, 27, 26,
+ 25, 22, 10, 15, 12, 1, 4, 65, 68, 81, 78, 84,
+ 90, 70, 69, 93, 0, 65, 71, 81, 76, 80, 84, 75,
+ 85, 83, 92, 93, 97, 103, 78, 88, 93, 75, 69,
+ 5, 67, 15, 24, 66, 64, 9, 13, 4, 12, 24, 4, 1,
+ 42, 16, 69, 86, 99, 123, 126, 126, 126, 2, 39,
+ 29, 25, 15, 23, 12, 6, 4, 68, 75, 70, 5, 66,
+ 14, 22, 64, 64, 7, 11, 6, 10, 20, 8, 2, 42,
+ 16, 69, 86, 99, 123, 126, 126, 126 },
+
+ {
+
+ 40,
+ 6, 79, 40, 6, 79, 72, 4, 22, 11, 64, 67, 65,
+ 32, 58, 15, 2, 68, 4, 10, 2, 67, 64, 73, 84,
+ 5, 77, 112, 115, 119, 24, 71, 68, 4, 10, 2,
+ 80, 64, 11, 5, 65, 72, 76, 1, 78, 81, 94, 6,
+ 68, 73, 4, 75, 72, 85, 64, 75, 71, 82, 1, 2,
+ 22, 0, 0, 0, 65, 93, 97, 5, 66, 67, 23, 64,
+ 83, 12, 7, 0, 33, 33, 8, 11, 66, 11, 69, 67,
+ 6, 80, 79, 80, 84, 24, 65, 9, 4, 69, 80, 73,
+ 66, 3, 69, 68, 76, 16, 68, 3, 70, 77, 65, 71,
+ 64, 3, 0, 65, 5, 5, 73, 66, 62, 62, 33, 26, 5,
+ 68, 0, 7, 1, 10, 7, 0, 73, 10, 69, 82, 80, 22,
+ 9, 12, 5, 5, 8, 12, 28, 17, 8, 2, 3, 2, 12,
+ 15, 89, 0, 5, 74, 5, 4, 14, 11, 64, 19, 12,
+ 14, 74, 5, 67, 65, 12, 89, 11, 16, 13, 11, 6,
+ 14, 9, 18, 15, 65, 6, 13, 103, 62, 93, 4, 66,
+ 66, 69, 66, 66, 75, 66, 80, 69, 77, 0, 62, 88,
+ 25, 28, 22, 18, 17, 16, 12, 12, 9, 65, 67, 68,
+ 72, 75, 97, 75, 75, 92, 1, 65, 67, 70, 77, 80,
+ 79, 78, 85, 79, 87, 94, 86, 96, 67, 22, 16,
+ 10, 7, 8, 1, 66, 67, 2, 3, 30, 20, 16, 10, 16,
+ 5, 5, 1, 67, 9, 39, 31, 26, 23, 22, 8, 2, 64,
+ 68, 69, 47, 30, 11, 2, 12, 0, 73, 73, 6, 46,
+ 36, 23, 13, 21, 4, 0, 68, 70, 62, 85, 79, 71,
+ 75, 75, 72, 68, 64, 69, 66, 0, 5, 75, 72, 79,
+ 78, 4, 77, 76, 74, 67, 65, 70, 73, 68, 73, 69,
+ 70, 82, 8, 8, 17, 3, 64, 8, 5, 3, 4, 3, 1, 68,
+ 64, 67, 71, 68, 9, 87, 68, 16, 69, 5, 64, 3,
+ 12, 64, 66, 4, 8, 76, 71, 84, 33, 35, 36, 25,
+ 19, 25, 22, 20, 18, 12, 14, 12, 3, 1, 71, 1,
+ 4, 0, 75, 65, 65, 69, 69, 76, 72, 80, 81, 78,
+ 86, 25, 24, 23, 20, 7, 12, 10, 65, 2, 67, 70,
+ 84, 80, 86, 92, 71, 70, 95, 64, 66, 73, 83,
+ 78, 82, 86, 76, 87, 84, 93, 94, 98, 104, 80,
+ 89, 94, 75, 69, 6, 67, 16, 25, 66, 0, 10, 14,
+ 4, 13, 25, 4, 1, 41, 14, 72, 89, 102, 126,
+ 126, 126, 126, 2, 39, 29, 25, 15, 24, 12, 6,
+ 4, 68, 75, 70, 5, 66, 15, 23, 64, 64, 7, 11,
+ 6, 10, 21, 8, 2, 41, 14, 72, 89, 102, 126,
+ 126, 126, 126 },
+
+ {
+
+ 38,
+ 5, 80, 38, 5, 80, 71, 5, 22, 11, 65, 69, 68,
+ 31, 58, 14, 3, 69, 4, 10, 1, 68, 64, 74, 86,
+ 3, 80, 116, 118, 121, 26, 71, 68, 4, 10, 1,
+ 81, 64, 11, 4, 65, 72, 76, 0, 79, 81, 94, 5,
+ 68, 73, 4, 75, 72, 85, 64, 75, 72, 82, 1, 2,
+ 22, 0, 0, 0, 65, 94, 97, 5, 67, 68, 22, 65,
+ 84, 14, 8, 1, 34, 35, 9, 12, 65, 12, 68, 66,
+ 8, 80, 79, 80, 84, 24, 65, 9, 6, 69, 80, 73,
+ 65, 2, 70, 69, 77, 16, 68, 3, 70, 77, 65, 71,
+ 64, 3, 0, 64, 6, 5, 72, 65, 62, 62, 36, 28, 5,
+ 68, 0, 7, 0, 10, 7, 0, 73, 12, 69, 84, 82, 24,
+ 9, 12, 5, 5, 8, 12, 29, 17, 8, 1, 2, 2, 12,
+ 15, 91, 0, 5, 75, 4, 2, 13, 10, 65, 19, 12,
+ 13, 76, 5, 68, 66, 11, 89, 10, 15, 12, 8, 2,
+ 12, 7, 15, 12, 68, 3, 11, 110, 62, 97, 1, 68,
+ 69, 73, 69, 70, 80, 71, 83, 71, 81, 65, 62,
+ 90, 22, 25, 19, 15, 14, 13, 9, 9, 7, 68, 70,
+ 70, 74, 77, 99, 75, 76, 93, 0, 67, 69, 72, 79,
+ 82, 81, 80, 86, 80, 88, 95, 87, 96, 66, 22,
+ 16, 10, 7, 8, 1, 66, 67, 4, 4, 30, 20, 16, 10,
+ 16, 6, 6, 2, 66, 9, 39, 31, 26, 23, 23, 8, 2,
+ 64, 68, 70, 47, 29, 10, 1, 12, 0, 73, 73, 5,
+ 45, 35, 21, 12, 20, 4, 0, 68, 70, 62, 84, 78,
+ 70, 74, 74, 71, 67, 0, 68, 65, 1, 7, 75, 72,
+ 79, 77, 5, 77, 76, 74, 67, 65, 70, 74, 69, 74,
+ 69, 71, 83, 8, 8, 18, 3, 65, 8, 5, 2, 4, 3, 1,
+ 69, 64, 67, 71, 68, 9, 88, 69, 16, 71, 4, 65,
+ 2, 11, 65, 67, 4, 7, 78, 72, 86, 31, 33, 35,
+ 23, 17, 22, 19, 17, 15, 9, 11, 9, 64, 65, 73,
+ 67, 0, 68, 80, 69, 69, 72, 72, 79, 74, 82, 83,
+ 79, 87, 22, 21, 20, 17, 4, 9, 7, 69, 64, 70,
+ 73, 87, 82, 88, 94, 72, 71, 97, 66, 68, 75,
+ 86, 80, 84, 88, 78, 89, 86, 94, 96, 99, 105,
+ 82, 91, 96, 75, 69, 6, 67, 17, 26, 66, 0, 10,
+ 14, 4, 13, 25, 4, 0, 39, 12, 75, 93, 106, 126,
+ 126, 126, 126, 2, 39, 29, 25, 15, 24, 12, 6,
+ 4, 68, 75, 70, 5, 66, 15, 23, 64, 64, 7, 11,
+ 6, 10, 21, 7, 1, 39, 12, 75, 93, 106, 126,
+ 126, 126, 126 },
+
+ {
+
+ 37,
+ 5, 80, 37, 5, 80, 69, 7, 23, 12, 65, 70, 70,
+ 30, 59, 14, 5, 69, 5, 11, 1, 68, 0, 74, 87, 2,
+ 82, 119, 120, 122, 29, 70, 67, 5, 11, 1, 81,
+ 0, 11, 4, 64, 71, 75, 0, 79, 80, 93, 5, 67,
+ 72, 4, 74, 71, 84, 0, 74, 72, 81, 2, 2, 22, 0,
+ 0, 0, 64, 94, 97, 6, 67, 68, 22, 65, 84, 17,
+ 10, 3, 36, 38, 11, 14, 0, 14, 66, 64, 11, 80,
+ 78, 79, 83, 24, 65, 10, 9, 68, 79, 72, 0, 2,
+ 70, 69, 77, 17, 68, 4, 69, 76, 64, 70, 0, 4,
+ 1, 1, 8, 6, 70, 0, 62, 62, 40, 31, 6, 67, 1,
+ 8, 0, 11, 8, 1, 72, 15, 68, 86, 83, 27, 9, 13,
+ 5, 6, 9, 13, 31, 18, 9, 1, 2, 2, 13, 16, 92,
+ 1, 5, 75, 4, 1, 13, 10, 65, 19, 12, 13, 77, 6,
+ 68, 66, 11, 89, 10, 15, 12, 6, 64, 11, 6, 13,
+ 10, 70, 1, 9, 116, 62, 100, 64, 69, 71, 76,
+ 71, 73, 84, 75, 85, 73, 84, 67, 62, 91, 20,
+ 23, 17, 13, 12, 11, 7, 7, 5, 70, 72, 71, 75,
+ 78, 100, 75, 76, 93, 0, 68, 70, 73, 80, 83,
+ 82, 81, 87, 80, 89, 95, 87, 95, 64, 23, 17,
+ 10, 8, 9, 2, 65, 66, 7, 6, 31, 21, 17, 11, 17,
+ 8, 8, 4, 64, 10, 40, 32, 27, 24, 24, 9, 3, 0,
+ 67, 70, 48, 29, 10, 1, 13, 1, 73, 72, 5, 45,
+ 35, 20, 11, 20, 5, 1, 67, 69, 62, 82, 76, 68,
+ 72, 72, 69, 65, 2, 66, 0, 3, 10, 74, 71, 78,
+ 75, 7, 76, 75, 73, 66, 64, 69, 74, 69, 74, 69,
+ 71, 83, 9, 9, 19, 4, 65, 9, 6, 2, 5, 4, 1, 69,
+ 0, 66, 70, 67, 10, 88, 70, 16, 72, 4, 65, 2,
+ 11, 65, 67, 4, 7, 79, 72, 87, 30, 32, 35, 22,
+ 16, 20, 17, 15, 13, 7, 9, 7, 67, 67, 75, 71,
+ 66, 72, 84, 72, 72, 74, 74, 81, 75, 83, 84,
+ 79, 87, 20, 19, 18, 15, 2, 7, 5, 72, 66, 72,
+ 75, 89, 83, 89, 95, 72, 71, 98, 67, 69, 76,
+ 88, 81, 85, 89, 79, 90, 87, 94, 97, 99, 105,
+ 83, 92, 97, 74, 68, 7, 66, 19, 28, 65, 1, 11,
+ 15, 5, 14, 26, 4, 0, 38, 10, 77, 96, 109, 126,
+ 126, 126, 126, 3, 39, 30, 26, 16, 25, 13, 7,
+ 5, 67, 74, 69, 6, 65, 16, 24, 0, 0, 8, 12, 6,
+ 11, 22, 7, 1, 38, 10, 77, 96, 109, 126, 126,
+ 126, 126 },
+
+ {
+
+ 36,
+ 5, 80, 36, 5, 80, 67, 8, 23, 12, 65, 71, 72,
+ 29, 59, 14, 7, 69, 5, 12, 1, 69, 0, 75, 89, 1,
+ 85, 123, 123, 124, 32, 69, 67, 5, 12, 1, 81,
+ 1, 11, 3, 64, 71, 74, 0, 79, 80, 93, 5, 67,
+ 72, 4, 74, 71, 84, 0, 74, 72, 81, 2, 2, 22, 0,
+ 0, 0, 0, 94, 97, 6, 68, 68, 21, 65, 84, 19,
+ 11, 4, 38, 40, 12, 15, 2, 15, 65, 0, 13, 80,
+ 78, 79, 83, 24, 65, 11, 11, 68, 78, 71, 2, 2,
+ 70, 69, 78, 17, 68, 4, 68, 75, 0, 69, 1, 4, 2,
+ 2, 9, 7, 69, 1, 62, 62, 43, 34, 6, 67, 1, 8,
+ 0, 11, 8, 1, 72, 17, 68, 88, 85, 30, 9, 13, 5,
+ 6, 9, 13, 33, 19, 9, 1, 2, 2, 14, 17, 93, 1,
+ 5, 75, 3, 0, 12, 10, 66, 19, 12, 13, 78, 6,
+ 69, 67, 10, 89, 10, 14, 11, 4, 67, 10, 4, 11,
+ 8, 72, 64, 7, 122, 62, 104, 66, 71, 73, 79,
+ 73, 76, 88, 79, 88, 75, 87, 69, 62, 93, 18,
+ 21, 15, 11, 10, 9, 5, 5, 3, 72, 74, 73, 77,
+ 80, 102, 75, 76, 94, 64, 69, 71, 74, 81, 84,
+ 83, 83, 88, 80, 90, 96, 88, 94, 0, 24, 17, 10,
+ 8, 10, 3, 64, 65, 9, 7, 31, 21, 17, 12, 18, 9,
+ 10, 6, 1, 11, 41, 33, 28, 25, 25, 9, 3, 0, 66,
+ 70, 49, 29, 9, 1, 13, 1, 73, 72, 5, 45, 34,
+ 19, 10, 20, 5, 1, 67, 69, 62, 81, 75, 67, 70,
+ 70, 68, 0, 4, 65, 2, 5, 12, 73, 70, 78, 74, 9,
+ 76, 75, 73, 65, 64, 69, 74, 69, 75, 69, 71,
+ 83, 9, 9, 20, 4, 65, 9, 6, 2, 5, 4, 1, 70, 0,
+ 66, 70, 67, 11, 89, 71, 16, 73, 3, 66, 1, 11,
+ 66, 68, 4, 7, 80, 72, 88, 29, 31, 34, 20, 14,
+ 18, 15, 13, 11, 4, 6, 4, 70, 70, 77, 75, 70,
+ 76, 89, 75, 75, 77, 76, 84, 77, 85, 85, 80,
+ 88, 18, 17, 15, 13, 64, 4, 2, 75, 68, 75, 77,
+ 92, 85, 91, 97, 73, 72, 100, 68, 70, 78, 90,
+ 83, 87, 91, 80, 92, 88, 95, 98, 100, 106, 85,
+ 93, 98, 74, 68, 8, 66, 20, 29, 65, 2, 12, 16,
+ 5, 14, 27, 4, 0, 37, 8, 80, 99, 112, 126, 126,
+ 126, 126, 3, 39, 30, 26, 16, 26, 13, 7, 5, 67,
+ 74, 69, 6, 65, 17, 25, 0, 0, 8, 12, 6, 11, 23,
+ 7, 1, 37, 8, 80, 99, 112, 126, 126, 126, 126 },
+
+ {
+
+ 35,
+ 5, 80, 35, 5, 80, 65, 10, 24, 12, 66, 73, 74,
+ 28, 59, 14, 9, 69, 6, 13, 1, 69, 0, 75, 90, 0,
+ 87, 126, 125, 125, 35, 68, 67, 6, 13, 1, 82,
+ 2, 11, 3, 64, 71, 74, 0, 79, 80, 93, 5, 67,
+ 71, 4, 74, 70, 84, 0, 74, 72, 81, 2, 2, 22, 0,
+ 0, 0, 0, 94, 97, 7, 69, 68, 20, 66, 84, 22,
+ 12, 5, 39, 42, 14, 17, 4, 17, 64, 1, 15, 80,
+ 78, 78, 83, 24, 65, 12, 13, 68, 77, 71, 4, 2,
+ 71, 70, 78, 17, 68, 5, 68, 74, 1, 69, 2, 5, 3,
+ 4, 11, 7, 68, 2, 62, 62, 46, 37, 6, 67, 1, 9,
+ 0, 11, 9, 2, 71, 19, 68, 90, 87, 33, 9, 13, 5,
+ 6, 9, 14, 34, 20, 9, 1, 2, 2, 14, 18, 94, 1,
+ 5, 76, 3, 64, 12, 10, 66, 19, 12, 13, 79, 6,
+ 70, 68, 10, 89, 10, 14, 11, 2, 70, 9, 2, 9, 5,
+ 74, 67, 5, 126, 62, 107, 68, 73, 75, 82, 76,
+ 79, 92, 83, 91, 77, 91, 71, 62, 95, 16, 19,
+ 13, 8, 7, 7, 3, 3, 1, 74, 76, 75, 79, 81, 104,
+ 75, 76, 94, 65, 70, 72, 75, 82, 85, 84, 85,
+ 89, 81, 91, 97, 88, 94, 2, 25, 18, 10, 8, 11,
+ 3, 64, 65, 11, 8, 31, 21, 18, 13, 19, 10, 11,
+ 7, 3, 12, 41, 33, 28, 25, 26, 10, 3, 0, 66,
+ 70, 49, 29, 8, 1, 13, 1, 73, 71, 5, 44, 34,
+ 18, 9, 20, 5, 1, 67, 69, 62, 80, 73, 66, 69,
+ 69, 67, 2, 6, 64, 3, 7, 14, 73, 69, 77, 73,
+ 11, 75, 75, 73, 65, 0, 69, 74, 69, 75, 69, 71,
+ 84, 10, 10, 21, 4, 65, 10, 6, 2, 5, 4, 1, 70,
+ 0, 66, 70, 67, 11, 90, 72, 16, 74, 2, 67, 1,
+ 11, 67, 69, 4, 7, 81, 72, 89, 28, 30, 33, 19,
+ 12, 16, 13, 11, 8, 2, 3, 1, 73, 72, 79, 79,
+ 74, 80, 94, 79, 78, 79, 78, 86, 79, 87, 87,
+ 81, 89, 16, 14, 13, 10, 67, 2, 0, 78, 71, 77,
+ 80, 95, 87, 93, 98, 74, 73, 101, 70, 72, 80,
+ 92, 84, 89, 93, 81, 93, 89, 96, 99, 101, 107,
+ 86, 94, 99, 74, 67, 8, 65, 21, 30, 65, 2, 12,
+ 16, 5, 15, 28, 4, 0, 36, 6, 82, 102, 115, 126,
+ 126, 126, 126, 3, 39, 30, 26, 16, 26, 14, 7,
+ 5, 66, 74, 69, 7, 64, 18, 26, 0, 1, 8, 12, 6,
+ 11, 23, 7, 1, 36, 6, 82, 102, 115, 126, 126,
+ 126, 126 },
+
+ {
+
+ 33,
+ 5, 80, 33, 5, 80, 64, 11, 24, 12, 66, 74, 76,
+ 27, 59, 13, 11, 69, 6, 14, 0, 70, 0, 76, 92,
+ 64, 90, 126, 126, 126, 38, 67, 67, 6, 14, 0,
+ 82, 3, 11, 2, 64, 70, 73, 0, 79, 80, 93, 5,
+ 67, 71, 4, 74, 70, 83, 0, 74, 72, 81, 3, 2,
+ 22, 0, 0, 0, 1, 95, 97, 7, 70, 68, 19, 66, 84,
+ 24, 13, 6, 41, 44, 15, 18, 5, 18, 1, 2, 17,
+ 80, 77, 78, 83, 24, 65, 13, 15, 68, 77, 70, 6,
+ 2, 71, 70, 79, 17, 68, 5, 67, 73, 2, 68, 3, 5,
+ 3, 5, 12, 8, 67, 3, 62, 62, 49, 40, 6, 66, 1,
+ 9, 0, 11, 9, 2, 71, 21, 68, 92, 89, 35, 9, 14,
+ 5, 6, 9, 14, 36, 21, 10, 0, 2, 2, 15, 18, 95,
+ 1, 5, 76, 2, 65, 11, 9, 67, 19, 12, 13, 80, 6,
+ 71, 69, 9, 89, 9, 13, 10, 0, 74, 8, 0, 7, 3,
+ 76, 69, 3, 126, 62, 111, 70, 75, 78, 85, 78,
+ 83, 97, 87, 94, 79, 94, 73, 62, 96, 14, 17,
+ 10, 6, 5, 5, 1, 1, 64, 77, 78, 77, 81, 83,
+ 106, 75, 76, 95, 66, 71, 73, 77, 83, 87, 85,
+ 86, 90, 81, 92, 97, 89, 93, 3, 26, 18, 10, 9,
+ 11, 4, 0, 64, 13, 10, 31, 22, 18, 13, 20, 11,
+ 13, 9, 4, 12, 42, 34, 29, 26, 27, 10, 3, 1,
+ 65, 70, 50, 29, 8, 0, 13, 1, 73, 71, 5, 44,
+ 33, 17, 8, 20, 5, 2, 67, 69, 62, 78, 72, 65,
+ 67, 67, 66, 3, 7, 0, 5, 8, 16, 72, 68, 77, 72,
+ 12, 75, 75, 73, 64, 0, 69, 75, 69, 76, 69, 71,
+ 84, 10, 10, 22, 4, 65, 10, 6, 2, 5, 4, 1, 71,
+ 0, 66, 70, 67, 12, 91, 73, 16, 75, 2, 68, 0,
+ 11, 68, 70, 4, 7, 82, 72, 90, 27, 29, 32, 17,
+ 10, 14, 11, 8, 6, 64, 0, 65, 76, 75, 81, 84,
+ 78, 84, 99, 82, 82, 82, 81, 89, 81, 89, 88,
+ 82, 89, 13, 12, 10, 8, 70, 64, 66, 81, 73, 80,
+ 82, 98, 89, 95, 100, 75, 73, 103, 71, 73, 81,
+ 94, 86, 91, 94, 82, 95, 90, 97, 101, 102, 107,
+ 88, 96, 101, 73, 67, 9, 65, 22, 31, 65, 3, 13,
+ 17, 5, 15, 29, 4, 0, 35, 4, 85, 105, 119, 126,
+ 126, 126, 126, 4, 39, 30, 26, 16, 27, 14, 7,
+ 5, 66, 74, 69, 7, 64, 18, 27, 0, 1, 9, 13, 6,
+ 11, 24, 7, 1, 35, 4, 85, 105, 119, 126, 126,
+ 126, 126 },
+
+ {
+
+ 32,
+ 5, 80, 32, 5, 80, 1, 13, 24, 12, 67, 75, 78,
+ 26, 59, 13, 13, 69, 6, 15, 0, 70, 0, 77, 94,
+ 65, 92, 126, 126, 126, 41, 66, 66, 6, 15, 0,
+ 82, 4, 11, 2, 64, 70, 72, 0, 79, 80, 93, 5,
+ 67, 71, 4, 74, 69, 83, 0, 74, 72, 81, 3, 2,
+ 22, 0, 0, 0, 1, 95, 97, 8, 71, 68, 18, 67, 84,
+ 27, 14, 7, 43, 46, 17, 20, 7, 20, 2, 3, 20,
+ 80, 77, 77, 82, 24, 65, 14, 17, 68, 76, 70, 8,
+ 2, 72, 71, 79, 17, 68, 6, 67, 72, 3, 68, 4, 6,
+ 4, 7, 14, 9, 65, 4, 62, 62, 52, 43, 7, 66, 2,
+ 10, 0, 11, 10, 2, 70, 23, 68, 94, 91, 38, 9,
+ 14, 5, 6, 9, 15, 37, 22, 10, 0, 2, 2, 15, 19,
+ 96, 2, 5, 77, 2, 66, 11, 9, 67, 19, 12, 13,
+ 81, 7, 72, 69, 9, 89, 9, 12, 9, 65, 77, 7, 64,
+ 5, 1, 78, 72, 1, 126, 62, 114, 72, 77, 80, 88,
+ 81, 86, 101, 91, 96, 81, 98, 75, 62, 98, 12,
+ 15, 8, 4, 3, 3, 64, 64, 66, 79, 80, 79, 82,
+ 85, 108, 75, 76, 95, 67, 72, 74, 78, 84, 88,
+ 86, 88, 91, 82, 93, 98, 90, 92, 5, 27, 19, 10,
+ 9, 12, 4, 0, 0, 15, 11, 31, 22, 19, 14, 21,
+ 12, 14, 10, 6, 13, 43, 35, 30, 26, 28, 11, 4,
+ 1, 65, 70, 50, 29, 7, 0, 13, 1, 73, 70, 5, 43,
+ 32, 16, 7, 20, 5, 2, 67, 68, 62, 77, 70, 0,
+ 65, 66, 65, 5, 9, 1, 7, 10, 18, 71, 67, 76,
+ 71, 14, 75, 75, 72, 64, 1, 69, 75, 69, 76, 69,
+ 71, 85, 11, 10, 23, 4, 65, 10, 6, 2, 5, 4, 1,
+ 71, 0, 66, 70, 67, 12, 92, 74, 16, 76, 1, 68,
+ 64, 11, 68, 71, 4, 7, 83, 72, 91, 26, 28, 31,
+ 15, 8, 12, 9, 6, 3, 67, 66, 68, 79, 77, 83,
+ 88, 82, 88, 104, 85, 85, 84, 83, 91, 83, 90,
+ 90, 82, 90, 11, 9, 8, 6, 73, 67, 68, 84, 75,
+ 82, 84, 101, 91, 97, 101, 75, 74, 105, 72, 75,
+ 83, 96, 88, 93, 96, 83, 96, 91, 97, 102, 103,
+ 108, 89, 97, 102, 73, 67, 10, 64, 23, 32, 64,
+ 4, 13, 17, 5, 16, 30, 4, 0, 34, 2, 88, 108,
+ 122, 126, 126, 126, 126, 4, 39, 30, 26, 16,
+ 27, 14, 8, 6, 65, 74, 69, 8, 64, 19, 28, 0, 1,
+ 9, 13, 6, 12, 24, 7, 1, 34, 2, 88, 108, 122,
+ 126, 126, 126, 126 },
+
+ {
+
+ 31,
+ 5, 81, 31, 5, 81, 3, 14, 25, 12, 67, 77, 80,
+ 25, 59, 13, 14, 69, 7, 15, 0, 71, 1, 77, 95,
+ 67, 95, 126, 126, 126, 43, 65, 66, 7, 15, 0,
+ 83, 4, 11, 1, 64, 70, 72, 0, 79, 79, 92, 5,
+ 66, 70, 4, 73, 69, 83, 0, 74, 72, 81, 3, 2,
+ 22, 0, 0, 0, 2, 95, 97, 8, 71, 69, 18, 67, 84,
+ 29, 15, 8, 44, 49, 18, 21, 9, 21, 3, 4, 22,
+ 80, 77, 77, 82, 24, 65, 15, 20, 68, 75, 69,
+ 10, 1, 72, 71, 80, 18, 68, 6, 66, 72, 3, 67,
+ 5, 6, 5, 8, 15, 9, 64, 5, 62, 62, 55, 46, 7,
+ 66, 2, 10, 0, 11, 10, 3, 70, 25, 67, 96, 93,
+ 41, 9, 14, 5, 6, 10, 15, 39, 23, 10, 0, 2, 2,
+ 16, 20, 97, 2, 5, 77, 1, 67, 10, 9, 68, 19,
+ 12, 13, 83, 7, 73, 70, 8, 89, 9, 12, 9, 67,
+ 80, 6, 66, 2, 65, 80, 74, 64, 126, 62, 118,
+ 74, 78, 82, 92, 83, 89, 105, 96, 99, 83, 101,
+ 77, 62, 100, 10, 13, 6, 1, 0, 1, 67, 66, 68,
+ 81, 83, 81, 84, 86, 109, 75, 76, 96, 68, 73,
+ 75, 79, 85, 89, 87, 90, 92, 82, 94, 99, 90,
+ 92, 6, 28, 19, 10, 9, 13, 5, 1, 0, 17, 12, 32,
+ 22, 19, 15, 22, 13, 16, 12, 8, 14, 43, 35, 30,
+ 27, 29, 11, 4, 1, 64, 70, 51, 29, 6, 0, 13, 1,
+ 73, 70, 4, 43, 32, 15, 6, 19, 5, 2, 67, 68,
+ 62, 76, 69, 1, 64, 64, 64, 7, 11, 2, 8, 12,
+ 20, 71, 66, 76, 70, 16, 74, 75, 72, 0, 1, 69,
+ 75, 69, 77, 69, 71, 85, 11, 11, 24, 4, 65, 11,
+ 7, 2, 6, 5, 1, 72, 0, 66, 70, 67, 13, 93, 75,
+ 16, 77, 0, 69, 64, 11, 69, 71, 4, 6, 85, 73,
+ 92, 24, 27, 30, 14, 6, 10, 7, 4, 1, 69, 69,
+ 70, 82, 80, 85, 92, 86, 92, 108, 89, 88, 87,
+ 85, 94, 85, 92, 91, 83, 91, 9, 7, 5, 3, 76,
+ 69, 71, 87, 78, 85, 87, 104, 93, 98, 103, 76,
+ 75, 106, 74, 76, 85, 98, 89, 95, 98, 84, 98,
+ 92, 98, 103, 104, 109, 91, 98, 103, 73, 66,
+ 10, 64, 24, 33, 64, 4, 14, 18, 5, 16, 31, 4,
+ 0, 33, 0, 90, 111, 125, 126, 126, 126, 126, 4,
+ 39, 30, 26, 16, 28, 15, 8, 6, 65, 73, 68, 8,
+ 0, 20, 29, 1, 2, 9, 13, 6, 12, 25, 7, 1, 33,
+ 0, 90, 111, 125, 126, 126, 126, 126 },
+
+ {
+
+ 30,
+ 5, 81, 30, 5, 81, 5, 16, 25, 12, 68, 78, 82,
+ 24, 59, 13, 16, 69, 7, 16, 64, 71, 1, 78, 97,
+ 68, 97, 126, 126, 126, 46, 64, 66, 7, 16, 64,
+ 83, 5, 11, 1, 64, 69, 71, 0, 79, 79, 92, 5,
+ 66, 70, 4, 73, 68, 82, 0, 74, 72, 81, 4, 2,
+ 22, 0, 0, 0, 2, 95, 97, 9, 72, 69, 17, 68, 84,
+ 32, 16, 9, 46, 51, 20, 23, 10, 23, 5, 5, 24,
+ 80, 76, 76, 82, 24, 65, 16, 22, 68, 74, 69,
+ 12, 1, 73, 72, 80, 18, 68, 7, 66, 71, 4, 67,
+ 6, 7, 6, 10, 17, 10, 0, 6, 62, 62, 58, 49, 7,
+ 65, 2, 11, 0, 11, 11, 3, 69, 27, 67, 98, 95,
+ 44, 9, 15, 5, 6, 10, 16, 40, 24, 11, 64, 2, 2,
+ 16, 20, 98, 2, 5, 78, 1, 68, 10, 8, 68, 19,
+ 12, 13, 84, 7, 74, 71, 8, 89, 9, 11, 8, 69,
+ 83, 5, 68, 0, 67, 82, 77, 66, 126, 62, 121,
+ 76, 80, 85, 95, 86, 92, 110, 100, 102, 85,
+ 105, 79, 62, 101, 8, 11, 4, 64, 65, 64, 69,
+ 68, 70, 84, 85, 83, 86, 88, 111, 75, 76, 96,
+ 69, 74, 76, 81, 86, 90, 88, 91, 93, 83, 95,
+ 99, 91, 91, 8, 29, 20, 10, 10, 13, 5, 1, 1,
+ 19, 14, 32, 23, 20, 15, 23, 14, 17, 13, 10,
+ 14, 44, 36, 31, 27, 30, 12, 4, 2, 64, 70, 51,
+ 29, 6, 64, 13, 1, 73, 69, 4, 42, 31, 14, 5,
+ 19, 5, 3, 67, 68, 62, 74, 67, 2, 1, 0, 0, 8,
+ 12, 3, 10, 13, 22, 70, 65, 75, 69, 18, 74, 75,
+ 72, 0, 2, 69, 76, 69, 77, 69, 71, 86, 12, 11,
+ 25, 4, 65, 11, 7, 2, 6, 5, 1, 72, 0, 66, 70,
+ 67, 13, 94, 76, 16, 78, 0, 70, 65, 11, 70, 72,
+ 4, 6, 86, 73, 93, 23, 26, 29, 12, 4, 8, 5, 1,
+ 65, 72, 72, 73, 85, 82, 87, 97, 90, 96, 113,
+ 92, 91, 89, 87, 96, 87, 94, 93, 84, 91, 6, 4,
+ 3, 1, 79, 72, 73, 90, 80, 87, 89, 107, 95,
+ 100, 104, 77, 75, 108, 75, 78, 86, 100, 91,
+ 97, 99, 85, 99, 93, 99, 105, 105, 109, 92,
+ 100, 105, 72, 66, 11, 0, 25, 34, 64, 5, 14,
+ 18, 5, 17, 32, 4, 0, 32, 65, 93, 114, 126,
+ 126, 126, 126, 126, 5, 39, 30, 26, 16, 28, 15,
+ 8, 6, 64, 73, 68, 9, 0, 21, 30, 1, 2, 10, 14,
+ 6, 12, 25, 7, 1, 32, 65, 93, 114, 126, 126,
+ 126, 126, 126 },
+
+ {
+
+ 28,
+ 4, 81, 28, 4, 81, 6, 17, 25, 12, 68, 80, 85,
+ 23, 59, 12, 18, 70, 7, 17, 64, 72, 1, 79, 99,
+ 69, 100, 126, 126, 126, 49, 0, 66, 7, 17, 64,
+ 84, 6, 11, 0, 64, 69, 71, 64, 80, 79, 92, 5,
+ 66, 70, 4, 73, 68, 82, 0, 74, 72, 81, 4, 2,
+ 22, 0, 0, 0, 3, 96, 97, 9, 73, 69, 16, 68, 85,
+ 34, 17, 10, 47, 53, 21, 24, 12, 24, 6, 6, 26,
+ 80, 76, 76, 82, 24, 65, 17, 24, 68, 74, 68,
+ 14, 1, 73, 72, 81, 18, 68, 7, 65, 70, 5, 66,
+ 6, 7, 6, 11, 18, 10, 1, 7, 62, 62, 61, 51, 7,
+ 65, 2, 11, 64, 11, 11, 3, 69, 29, 67, 100, 97,
+ 46, 9, 15, 5, 6, 10, 16, 42, 24, 11, 64, 1, 2,
+ 17, 21, 100, 2, 5, 78, 0, 70, 9, 8, 69, 19,
+ 12, 12, 85, 7, 75, 72, 7, 89, 8, 10, 7, 71,
+ 87, 3, 70, 65, 70, 85, 79, 68, 126, 62, 125,
+ 78, 82, 87, 98, 88, 96, 114, 104, 105, 87,
+ 108, 81, 62, 103, 6, 8, 1, 67, 68, 67, 71, 71,
+ 72, 86, 87, 85, 88, 90, 113, 75, 77, 97, 70,
+ 76, 77, 82, 87, 92, 90, 93, 94, 83, 96, 100,
+ 92, 91, 9, 30, 20, 10, 10, 14, 6, 2, 1, 21,
+ 15, 32, 23, 20, 16, 24, 15, 19, 15, 11, 15,
+ 44, 36, 31, 28, 31, 12, 4, 2, 0, 71, 52, 29,
+ 5, 64, 13, 1, 73, 69, 4, 42, 30, 13, 4, 19, 5,
+ 3, 67, 68, 62, 73, 66, 3, 2, 2, 1, 10, 14, 4,
+ 11, 15, 24, 70, 65, 75, 68, 19, 74, 75, 72, 1,
+ 2, 69, 76, 69, 78, 69, 71, 86, 12, 11, 26, 4,
+ 66, 11, 7, 1, 6, 5, 1, 73, 0, 66, 70, 67, 14,
+ 95, 77, 16, 80, 64, 71, 66, 10, 71, 73, 4, 6,
+ 87, 73, 95, 22, 24, 28, 10, 2, 6, 3, 64, 67,
+ 75, 75, 76, 88, 85, 89, 101, 94, 101, 118, 96,
+ 95, 92, 90, 99, 89, 96, 94, 85, 92, 4, 2, 0,
+ 65, 82, 75, 76, 93, 83, 90, 92, 110, 97, 102,
+ 106, 78, 76, 110, 77, 79, 88, 102, 93, 99,
+ 101, 87, 101, 95, 100, 106, 106, 110, 94, 101,
+ 106, 72, 66, 11, 0, 26, 35, 64, 5, 15, 19, 5,
+ 17, 32, 4, 64, 31, 67, 96, 117, 126, 126, 126,
+ 126, 126, 5, 39, 30, 26, 16, 29, 15, 8, 6, 64,
+ 73, 68, 9, 0, 21, 30, 1, 2, 10, 14, 6, 12, 26,
+ 7, 0, 31, 67, 96, 117, 126, 126, 126, 126, 126 },
+
+ {
+
+ 27,
+ 4, 81, 27, 4, 81, 8, 18, 26, 12, 68, 81, 87,
+ 22, 60, 12, 20, 70, 8, 18, 64, 73, 1, 79, 100,
+ 70, 102, 126, 126, 126, 52, 1, 65, 8, 18, 64,
+ 84, 7, 11, 0, 0, 69, 70, 64, 80, 79, 92, 5,
+ 66, 69, 4, 73, 68, 82, 0, 74, 72, 80, 4, 2,
+ 22, 0, 0, 0, 4, 96, 97, 9, 74, 69, 15, 68, 85,
+ 36, 19, 11, 49, 55, 23, 25, 14, 26, 7, 8, 29,
+ 80, 76, 76, 81, 24, 65, 18, 26, 67, 73, 67,
+ 16, 1, 73, 72, 81, 18, 68, 7, 64, 69, 6, 65,
+ 7, 8, 7, 12, 20, 11, 3, 8, 62, 62, 62, 54, 8,
+ 65, 3, 12, 64, 11, 11, 4, 68, 32, 67, 102, 98,
+ 49, 9, 15, 5, 6, 10, 17, 44, 25, 11, 64, 1, 2,
+ 18, 22, 101, 3, 5, 78, 64, 71, 8, 8, 70, 19,
+ 12, 12, 86, 8, 76, 72, 6, 89, 8, 10, 7, 73,
+ 90, 2, 71, 67, 72, 87, 81, 70, 126, 62, 126,
+ 80, 84, 89, 101, 90, 99, 118, 108, 107, 89,
+ 111, 83, 62, 105, 4, 6, 64, 69, 70, 69, 73,
+ 73, 74, 88, 89, 86, 89, 91, 115, 75, 77, 97,
+ 70, 77, 78, 83, 88, 93, 91, 95, 95, 83, 97,
+ 101, 92, 90, 10, 31, 20, 10, 10, 15, 7, 3, 2,
+ 24, 16, 32, 23, 20, 17, 25, 16, 21, 17, 13,
+ 16, 45, 37, 32, 29, 32, 12, 5, 2, 1, 71, 53,
+ 29, 4, 64, 14, 2, 73, 68, 4, 42, 30, 12, 3,
+ 19, 5, 3, 67, 67, 62, 72, 65, 5, 4, 4, 2, 12,
+ 16, 5, 13, 17, 26, 69, 64, 74, 67, 21, 73, 74,
+ 71, 2, 3, 69, 76, 69, 79, 69, 71, 86, 12, 12,
+ 27, 5, 66, 12, 7, 1, 6, 5, 1, 74, 0, 65, 69,
+ 67, 15, 95, 78, 16, 81, 65, 71, 66, 10, 71,
+ 74, 4, 6, 88, 73, 96, 21, 23, 28, 9, 0, 4, 1,
+ 66, 69, 77, 78, 79, 91, 88, 91, 105, 98, 105,
+ 123, 99, 98, 95, 92, 101, 90, 97, 95, 85, 93,
+ 2, 0, 65, 67, 84, 77, 78, 96, 85, 92, 94, 112,
+ 99, 104, 108, 78, 77, 111, 78, 80, 90, 104,
+ 94, 100, 103, 88, 103, 96, 100, 107, 106, 111,
+ 96, 102, 107, 72, 65, 12, 0, 27, 37, 0, 6, 16,
+ 20, 5, 18, 33, 4, 64, 30, 69, 98, 120, 126,
+ 126, 126, 126, 126, 5, 39, 30, 27, 17, 30, 16,
+ 9, 7, 64, 73, 68, 9, 1, 22, 31, 1, 3, 10, 14,
+ 6, 13, 27, 7, 0, 30, 69, 98, 120, 126, 126,
+ 126, 126, 126 },
+
+ {
+
+ 26,
+ 4, 81, 26, 4, 81, 10, 20, 26, 12, 69, 82, 89,
+ 21, 60, 12, 22, 70, 8, 19, 65, 73, 1, 80, 102,
+ 71, 105, 126, 126, 126, 55, 2, 65, 8, 19, 65,
+ 84, 8, 11, 64, 0, 68, 69, 64, 80, 79, 92, 5,
+ 66, 69, 4, 73, 67, 81, 0, 74, 72, 80, 5, 2,
+ 22, 0, 0, 0, 4, 96, 97, 10, 75, 69, 14, 69,
+ 85, 39, 20, 12, 51, 57, 24, 27, 15, 27, 9, 9,
+ 31, 80, 75, 75, 81, 24, 65, 19, 28, 67, 72,
+ 67, 18, 1, 74, 73, 82, 18, 68, 8, 64, 68, 7,
+ 65, 8, 8, 8, 14, 21, 12, 4, 9, 62, 62, 62, 57,
+ 8, 64, 3, 12, 64, 11, 12, 4, 68, 34, 67, 104,
+ 100, 52, 9, 16, 5, 6, 10, 17, 45, 26, 12, 65,
+ 1, 2, 18, 22, 102, 3, 5, 79, 64, 72, 8, 7, 70,
+ 19, 12, 12, 87, 8, 77, 73, 6, 89, 8, 9, 6, 75,
+ 93, 1, 73, 69, 74, 89, 84, 72, 126, 62, 126,
+ 82, 86, 92, 104, 93, 102, 123, 112, 110, 91,
+ 115, 85, 62, 106, 2, 4, 66, 71, 72, 71, 75,
+ 75, 76, 91, 91, 88, 91, 93, 117, 75, 77, 98,
+ 71, 78, 79, 85, 89, 94, 92, 96, 96, 84, 98,
+ 101, 93, 89, 12, 32, 21, 10, 11, 15, 7, 3, 3,
+ 26, 18, 32, 24, 21, 17, 26, 17, 22, 18, 15,
+ 16, 46, 38, 33, 29, 33, 13, 5, 3, 1, 71, 53,
+ 29, 4, 65, 14, 2, 73, 68, 4, 41, 29, 11, 2,
+ 19, 5, 4, 67, 67, 62, 70, 0, 6, 6, 5, 3, 13,
+ 17, 6, 15, 18, 28, 68, 0, 74, 66, 23, 73, 74,
+ 71, 2, 3, 69, 77, 69, 79, 69, 71, 87, 13, 12,
+ 28, 5, 66, 12, 7, 1, 6, 5, 1, 74, 0, 65, 69,
+ 67, 15, 96, 79, 16, 82, 65, 72, 67, 10, 72,
+ 75, 4, 6, 89, 73, 97, 20, 22, 27, 7, 65, 2,
+ 64, 69, 72, 80, 81, 82, 94, 90, 93, 110, 102,
+ 109, 126, 102, 101, 97, 94, 104, 92, 99, 97,
+ 86, 93, 64, 66, 68, 69, 87, 80, 81, 99, 87,
+ 95, 96, 115, 101, 106, 109, 79, 77, 113, 79,
+ 82, 91, 106, 96, 102, 104, 89, 104, 97, 101,
+ 109, 107, 111, 97, 104, 109, 71, 65, 13, 1,
+ 28, 38, 0, 7, 16, 20, 5, 18, 34, 4, 64, 29,
+ 71, 101, 123, 126, 126, 126, 126, 126, 6, 39,
+ 30, 27, 17, 30, 16, 9, 7, 0, 73, 68, 10, 1,
+ 23, 32, 1, 3, 11, 15, 6, 13, 27, 7, 0, 29, 71,
+ 101, 123, 126, 126, 126, 126, 126 },
+
+ {
+
+ 25,
+ 4, 82, 25, 4, 82, 12, 21, 27, 12, 69, 84, 91,
+ 20, 60, 12, 23, 70, 9, 19, 65, 74, 2, 80, 103,
+ 73, 107, 126, 126, 126, 57, 3, 65, 9, 19, 65,
+ 85, 8, 11, 64, 0, 68, 69, 64, 80, 78, 91, 5,
+ 65, 68, 4, 72, 67, 81, 0, 74, 72, 80, 5, 2,
+ 22, 0, 0, 0, 5, 96, 97, 10, 75, 70, 14, 69,
+ 85, 41, 21, 13, 52, 60, 26, 28, 17, 29, 10,
+ 10, 33, 80, 75, 75, 81, 24, 65, 20, 31, 67,
+ 71, 66, 20, 0, 74, 73, 82, 19, 68, 8, 0, 68,
+ 7, 64, 9, 9, 9, 15, 23, 12, 5, 10, 62, 62, 62,
+ 60, 8, 64, 3, 13, 64, 11, 12, 5, 67, 36, 66,
+ 106, 102, 55, 9, 16, 5, 6, 11, 18, 47, 27, 12,
+ 65, 1, 2, 19, 23, 103, 3, 5, 79, 65, 73, 7, 7,
+ 71, 19, 12, 12, 89, 8, 78, 74, 5, 89, 8, 9, 6,
+ 77, 96, 0, 75, 72, 77, 91, 86, 74, 126, 62,
+ 126, 84, 87, 94, 108, 95, 105, 126, 117, 113,
+ 93, 118, 87, 62, 108, 0, 2, 68, 74, 75, 73,
+ 78, 77, 78, 93, 94, 90, 93, 94, 118, 75, 77,
+ 98, 72, 79, 80, 86, 90, 95, 93, 98, 97, 84,
+ 99, 102, 93, 89, 13, 33, 21, 10, 11, 16, 8, 4,
+ 3, 28, 19, 33, 24, 21, 18, 27, 18, 24, 20, 17,
+ 17, 46, 38, 33, 30, 34, 13, 5, 3, 2, 71, 54,
+ 29, 3, 65, 14, 2, 73, 67, 3, 41, 29, 10, 1,
+ 18, 5, 4, 67, 67, 62, 69, 1, 7, 7, 7, 4, 15,
+ 19, 7, 16, 20, 30, 68, 1, 73, 65, 25, 72, 74,
+ 71, 3, 4, 69, 77, 69, 80, 69, 71, 87, 13, 13,
+ 29, 5, 66, 13, 8, 1, 7, 6, 1, 75, 0, 65, 69,
+ 67, 16, 97, 80, 16, 83, 66, 73, 67, 10, 73,
+ 75, 4, 5, 91, 74, 98, 18, 21, 26, 6, 67, 0,
+ 66, 71, 74, 82, 84, 84, 97, 93, 95, 114, 106,
+ 113, 126, 106, 104, 100, 96, 106, 94, 101, 98,
+ 87, 94, 66, 68, 70, 72, 90, 82, 83, 102, 90,
+ 97, 99, 118, 103, 107, 111, 80, 78, 114, 81,
+ 83, 93, 108, 97, 104, 106, 90, 106, 98, 102,
+ 110, 108, 112, 99, 105, 110, 71, 64, 13, 1,
+ 29, 39, 0, 7, 17, 21, 5, 19, 35, 4, 64, 28,
+ 73, 103, 126, 126, 126, 126, 126, 126, 6, 39,
+ 30, 27, 17, 31, 17, 9, 7, 0, 72, 67, 10, 2,
+ 24, 33, 2, 4, 11, 15, 6, 13, 28, 7, 0, 28, 73,
+ 103, 126, 126, 126, 126, 126, 126 },
+
+ {
+
+ 23,
+ 4, 82, 23, 4, 82, 13, 23, 27, 12, 70, 85, 93,
+ 19, 60, 11, 25, 70, 9, 20, 65, 74, 2, 81, 105,
+ 74, 110, 126, 126, 126, 60, 4, 65, 9, 20, 65,
+ 85, 9, 11, 65, 0, 68, 68, 64, 80, 78, 91, 5,
+ 65, 68, 4, 72, 66, 81, 0, 74, 72, 80, 5, 2,
+ 22, 0, 0, 0, 5, 97, 97, 11, 76, 70, 13, 70,
+ 85, 44, 22, 14, 54, 62, 27, 30, 19, 30, 11,
+ 11, 35, 80, 75, 74, 81, 24, 65, 21, 33, 67,
+ 71, 66, 22, 0, 75, 74, 83, 19, 68, 9, 0, 67,
+ 8, 64, 10, 9, 9, 17, 24, 13, 6, 11, 62, 62,
+ 62, 62, 8, 64, 3, 13, 64, 11, 13, 5, 67, 38,
+ 66, 108, 104, 57, 9, 16, 5, 6, 11, 18, 48, 28,
+ 12, 65, 1, 2, 19, 24, 104, 3, 5, 80, 65, 74,
+ 7, 7, 71, 19, 12, 12, 90, 8, 79, 75, 5, 89, 7,
+ 8, 5, 79, 100, 64, 77, 74, 79, 93, 89, 76,
+ 126, 62, 126, 86, 89, 96, 111, 98, 109, 126,
+ 121, 116, 95, 122, 89, 62, 110, 65, 0, 71, 76,
+ 77, 75, 80, 79, 80, 95, 96, 92, 95, 96, 120,
+ 75, 77, 99, 73, 80, 81, 87, 91, 97, 94, 100,
+ 98, 85, 100, 103, 94, 88, 15, 34, 22, 10, 11,
+ 17, 8, 4, 4, 30, 20, 33, 24, 22, 19, 28, 19,
+ 25, 21, 18, 18, 47, 39, 34, 30, 35, 14, 5, 3,
+ 2, 71, 54, 29, 2, 65, 14, 2, 73, 67, 3, 40,
+ 28, 9, 0, 18, 5, 4, 67, 67, 62, 68, 3, 8, 9,
+ 8, 5, 17, 21, 8, 18, 22, 32, 67, 2, 73, 64,
+ 26, 72, 74, 71, 3, 4, 69, 77, 69, 80, 69, 71,
+ 88, 14, 13, 30, 5, 66, 13, 8, 1, 7, 6, 1, 75,
+ 0, 65, 69, 67, 16, 98, 81, 16, 84, 67, 74, 68,
+ 10, 74, 76, 4, 5, 92, 74, 99, 17, 20, 25, 4,
+ 69, 65, 68, 73, 77, 85, 87, 87, 100, 95, 97,
+ 118, 110, 117, 126, 109, 108, 102, 99, 109,
+ 96, 103, 100, 88, 95, 68, 71, 73, 74, 93, 85,
+ 86, 105, 92, 100, 101, 121, 105, 109, 112, 81,
+ 79, 116, 82, 85, 95, 110, 99, 106, 108, 91,
+ 107, 99, 103, 111, 109, 113, 100, 106, 111,
+ 71, 64, 14, 2, 30, 40, 0, 8, 17, 21, 5, 19,
+ 36, 4, 64, 27, 75, 106, 126, 126, 126, 126,
+ 126, 126, 6, 39, 30, 27, 17, 31, 17, 9, 7, 1,
+ 72, 67, 11, 2, 24, 34, 2, 4, 11, 15, 6, 13,
+ 28, 7, 0, 27, 75, 106, 126, 126, 126, 126,
+ 126, 126 },
+
+ {
+
+ 22,
+ 4, 82, 22, 4, 82, 15, 24, 27, 12, 70, 86, 95,
+ 18, 60, 11, 27, 70, 9, 21, 66, 75, 2, 82, 107,
+ 75, 112, 126, 126, 126, 62, 5, 64, 9, 21, 66,
+ 85, 10, 11, 65, 0, 67, 67, 64, 80, 78, 91, 5,
+ 65, 68, 4, 72, 66, 80, 0, 74, 72, 80, 6, 2,
+ 22, 0, 0, 0, 6, 97, 97, 11, 77, 70, 12, 70,
+ 85, 46, 23, 15, 56, 62, 29, 31, 20, 32, 13,
+ 12, 38, 80, 74, 74, 80, 24, 65, 22, 35, 67,
+ 70, 65, 24, 0, 75, 74, 83, 19, 68, 9, 1, 66,
+ 9, 0, 11, 10, 10, 18, 26, 14, 8, 12, 62, 62,
+ 62, 62, 9, 0, 4, 14, 64, 11, 13, 5, 66, 40,
+ 66, 110, 106, 60, 9, 17, 5, 6, 11, 19, 50, 29,
+ 13, 66, 1, 2, 20, 24, 105, 4, 5, 80, 66, 75,
+ 6, 6, 72, 19, 12, 12, 91, 9, 80, 75, 4, 89, 7,
+ 7, 4, 81, 103, 65, 78, 76, 81, 95, 91, 78,
+ 126, 62, 126, 88, 91, 99, 114, 100, 112, 126,
+ 125, 118, 97, 125, 91, 62, 111, 67, 65, 73,
+ 78, 79, 77, 82, 81, 82, 98, 98, 94, 96, 98,
+ 122, 75, 77, 99, 74, 81, 82, 89, 92, 98, 95,
+ 101, 99, 85, 101, 103, 95, 87, 16, 35, 22, 10,
+ 12, 17, 9, 5, 5, 32, 22, 33, 25, 22, 19, 29,
+ 20, 27, 23, 20, 18, 48, 40, 35, 31, 36, 14, 6,
+ 4, 3, 71, 55, 29, 2, 66, 14, 2, 73, 66, 3, 40,
+ 27, 8, 64, 18, 5, 5, 67, 66, 62, 66, 4, 10,
+ 11, 10, 6, 18, 22, 9, 20, 23, 34, 66, 3, 72,
+ 0, 28, 72, 74, 70, 4, 5, 69, 78, 69, 81, 69,
+ 71, 88, 14, 13, 31, 5, 66, 13, 8, 1, 7, 6, 1,
+ 76, 0, 65, 69, 67, 17, 99, 82, 16, 85, 67, 74,
+ 69, 10, 74, 77, 4, 5, 93, 74, 100, 16, 19, 24,
+ 2, 71, 67, 70, 76, 79, 88, 90, 90, 103, 98,
+ 99, 123, 114, 121, 126, 112, 111, 105, 101,
+ 111, 98, 104, 101, 88, 95, 71, 73, 75, 76, 96,
+ 88, 88, 108, 94, 102, 103, 124, 107, 111, 114,
+ 81, 79, 118, 83, 86, 96, 112, 101, 108, 109,
+ 92, 109, 100, 103, 113, 110, 113, 102, 108,
+ 113, 70, 64, 15, 2, 31, 41, 1, 9, 18, 22, 5,
+ 20, 37, 4, 64, 26, 77, 109, 126, 126, 126,
+ 126, 126, 126, 7, 39, 30, 27, 17, 32, 17, 10,
+ 8, 1, 72, 67, 11, 2, 25, 35, 2, 4, 12, 16, 6,
+ 14, 29, 7, 0, 26, 77, 109, 126, 126, 126, 126,
+ 126, 126 },
+
+ {
+
+ 21,
+ 4, 82, 21, 4, 82, 17, 26, 28, 12, 71, 88, 97,
+ 17, 60, 11, 29, 70, 10, 22, 66, 75, 2, 82,
+ 108, 76, 115, 126, 126, 126, 62, 6, 64, 10,
+ 22, 66, 86, 11, 11, 66, 0, 67, 67, 64, 80, 78,
+ 91, 5, 65, 67, 4, 72, 65, 80, 0, 74, 72, 80,
+ 6, 2, 22, 0, 0, 0, 6, 97, 97, 12, 78, 70, 11,
+ 71, 85, 49, 24, 16, 57, 62, 30, 33, 22, 33,
+ 14, 13, 40, 80, 74, 73, 80, 24, 65, 23, 37,
+ 67, 69, 65, 26, 0, 76, 75, 84, 19, 68, 10, 1,
+ 65, 10, 0, 12, 10, 11, 20, 27, 14, 9, 13, 62,
+ 62, 62, 62, 9, 0, 4, 14, 64, 11, 14, 6, 66,
+ 42, 66, 112, 108, 62, 9, 17, 5, 6, 11, 19, 51,
+ 30, 13, 66, 1, 2, 20, 25, 106, 4, 5, 81, 66,
+ 76, 6, 6, 72, 19, 12, 12, 92, 9, 81, 76, 4,
+ 89, 7, 7, 4, 83, 106, 66, 80, 78, 84, 97, 94,
+ 80, 126, 62, 126, 90, 93, 101, 117, 103, 115,
+ 126, 126, 121, 99, 126, 93, 62, 113, 69, 67,
+ 75, 81, 82, 79, 84, 83, 84, 100, 100, 96, 98,
+ 99, 124, 75, 77, 100, 75, 82, 83, 90, 93, 99,
+ 96, 103, 100, 86, 102, 104, 95, 87, 18, 36,
+ 23, 10, 12, 18, 9, 5, 5, 34, 23, 33, 25, 23,
+ 20, 30, 21, 28, 24, 22, 19, 48, 40, 35, 31,
+ 37, 15, 6, 4, 3, 71, 55, 29, 1, 66, 14, 2, 73,
+ 66, 3, 39, 27, 7, 65, 18, 5, 5, 67, 66, 62,
+ 65, 6, 11, 12, 11, 7, 20, 24, 10, 21, 25, 36,
+ 66, 4, 72, 1, 30, 71, 74, 70, 4, 5, 69, 78,
+ 69, 81, 69, 71, 89, 15, 14, 32, 5, 66, 14, 8,
+ 1, 7, 6, 1, 76, 0, 65, 69, 67, 17, 100, 83,
+ 16, 86, 68, 75, 69, 10, 75, 78, 4, 5, 94, 74,
+ 101, 15, 18, 23, 1, 73, 69, 72, 78, 82, 90,
+ 93, 93, 106, 100, 101, 126, 118, 125, 126,
+ 116, 114, 107, 103, 114, 100, 106, 103, 89,
+ 96, 73, 76, 78, 79, 99, 90, 91, 111, 97, 105,
+ 106, 126, 109, 113, 115, 82, 80, 119, 85, 88,
+ 98, 114, 102, 110, 111, 93, 110, 101, 104,
+ 114, 111, 114, 103, 109, 114, 70, 0, 15, 3,
+ 32, 42, 1, 9, 18, 22, 5, 20, 38, 4, 64, 25,
+ 79, 111, 126, 126, 126, 126, 126, 126, 7, 39,
+ 30, 27, 17, 32, 18, 10, 8, 2, 72, 67, 12, 3,
+ 26, 36, 2, 5, 12, 16, 6, 14, 29, 7, 0, 25, 79,
+ 111, 126, 126, 126, 126, 126, 126 },
+
+ {
+
+ 20,
+ 4, 82, 20, 4, 82, 19, 27, 28, 12, 71, 89, 99,
+ 16, 60, 11, 31, 70, 10, 23, 66, 76, 2, 83,
+ 110, 77, 117, 126, 126, 126, 62, 7, 64, 10,
+ 23, 66, 86, 12, 11, 66, 0, 67, 66, 64, 80, 78,
+ 91, 5, 65, 67, 4, 72, 65, 80, 0, 74, 72, 80,
+ 6, 2, 22, 0, 0, 0, 7, 97, 97, 12, 79, 70, 10,
+ 71, 85, 51, 25, 17, 59, 62, 32, 34, 24, 35,
+ 15, 14, 42, 80, 74, 73, 80, 24, 65, 24, 39,
+ 67, 68, 64, 28, 0, 76, 75, 84, 19, 68, 10, 2,
+ 64, 11, 1, 13, 11, 12, 21, 29, 15, 10, 14, 62,
+ 62, 62, 62, 9, 0, 4, 15, 64, 11, 14, 6, 65,
+ 44, 66, 114, 110, 62, 9, 17, 5, 6, 11, 20, 53,
+ 31, 13, 66, 1, 2, 21, 26, 107, 4, 5, 81, 67,
+ 77, 5, 6, 73, 19, 12, 12, 93, 9, 82, 77, 3,
+ 89, 7, 6, 3, 85, 109, 67, 82, 80, 86, 99, 96,
+ 82, 126, 62, 126, 92, 95, 103, 120, 105, 118,
+ 126, 126, 124, 101, 126, 95, 62, 115, 71, 69,
+ 77, 83, 84, 81, 86, 85, 86, 102, 102, 98, 100,
+ 101, 126, 75, 77, 100, 76, 83, 84, 91, 94,
+ 100, 97, 105, 101, 86, 103, 105, 96, 86, 19,
+ 37, 23, 10, 12, 19, 10, 6, 6, 36, 24, 33, 25,
+ 23, 21, 31, 22, 30, 26, 24, 20, 49, 41, 36,
+ 32, 38, 15, 6, 4, 4, 71, 56, 29, 0, 66, 14, 2,
+ 73, 65, 3, 39, 26, 6, 66, 18, 5, 5, 67, 66,
+ 62, 64, 7, 12, 14, 13, 8, 22, 26, 11, 23, 27,
+ 38, 65, 5, 71, 2, 32, 71, 74, 70, 5, 6, 69,
+ 78, 69, 82, 69, 71, 89, 15, 14, 33, 5, 66, 14,
+ 8, 1, 7, 6, 1, 77, 0, 65, 69, 67, 18, 101, 84,
+ 16, 87, 69, 76, 70, 10, 76, 79, 4, 5, 95, 74,
+ 102, 14, 17, 22, 64, 75, 71, 74, 80, 84, 93,
+ 96, 96, 109, 103, 103, 126, 122, 126, 126,
+ 119, 117, 110, 105, 116, 102, 108, 104, 90,
+ 97, 75, 78, 80, 81, 102, 93, 93, 114, 99, 107,
+ 108, 126, 111, 115, 117, 83, 81, 121, 86, 89,
+ 100, 116, 104, 112, 113, 94, 112, 102, 105,
+ 115, 112, 115, 105, 110, 115, 70, 0, 16, 3,
+ 33, 43, 1, 10, 19, 23, 5, 21, 39, 4, 64, 24,
+ 81, 114, 126, 126, 126, 126, 126, 126, 7, 39,
+ 30, 27, 17, 33, 18, 10, 8, 2, 72, 67, 12, 3,
+ 27, 37, 2, 5, 12, 16, 6, 14, 30, 7, 0, 24, 81,
+ 114, 126, 126, 126, 126, 126, 126 },
+
+ {
+
+ 18,
+ 3, 83, 18, 3, 83, 20, 28, 28, 12, 72, 91, 102,
+ 15, 60, 10, 32, 71, 10, 23, 67, 77, 2, 84,
+ 112, 79, 120, 126, 126, 126, 62, 7, 64, 10,
+ 23, 67, 87, 12, 11, 67, 0, 67, 66, 65, 81, 78,
+ 91, 4, 65, 67, 4, 72, 65, 80, 0, 74, 73, 80,
+ 6, 2, 22, 0, 0, 0, 7, 98, 97, 12, 80, 71, 9,
+ 72, 86, 53, 26, 18, 60, 62, 33, 35, 25, 36,
+ 16, 15, 44, 80, 74, 73, 80, 24, 65, 24, 41,
+ 67, 68, 64, 29, 64, 77, 76, 85, 19, 68, 10, 2,
+ 64, 11, 1, 13, 11, 12, 22, 30, 15, 11, 15, 62,
+ 62, 62, 62, 9, 0, 4, 15, 65, 11, 14, 6, 65,
+ 46, 66, 116, 112, 62, 9, 17, 5, 6, 11, 20, 54,
+ 31, 13, 67, 0, 2, 21, 26, 109, 4, 5, 82, 68,
+ 79, 4, 5, 74, 19, 12, 11, 95, 9, 83, 78, 2,
+ 89, 6, 5, 2, 88, 113, 69, 84, 83, 89, 102, 99,
+ 84, 126, 62, 126, 95, 97, 106, 124, 108, 122,
+ 126, 126, 126, 103, 126, 97, 62, 117, 74, 72,
+ 80, 86, 87, 84, 89, 88, 88, 105, 105, 100,
+ 102, 103, 126, 75, 78, 101, 77, 85, 86, 93,
+ 96, 102, 99, 107, 102, 87, 104, 106, 97, 86,
+ 20, 37, 23, 10, 12, 19, 10, 6, 6, 38, 25, 33,
+ 25, 23, 21, 31, 23, 31, 27, 25, 20, 49, 41,
+ 36, 32, 39, 15, 6, 4, 4, 72, 56, 28, 64, 67,
+ 14, 2, 73, 65, 2, 38, 25, 4, 67, 17, 5, 5, 67,
+ 66, 62, 0, 8, 13, 15, 14, 9, 23, 27, 12, 24,
+ 28, 40, 65, 5, 71, 3, 33, 71, 74, 70, 5, 6,
+ 69, 79, 70, 83, 69, 72, 90, 15, 14, 34, 5, 67,
+ 14, 8, 0, 7, 6, 1, 78, 0, 65, 69, 67, 18, 102,
+ 85, 16, 89, 70, 77, 71, 9, 77, 80, 4, 4, 97,
+ 75, 104, 12, 15, 21, 66, 77, 74, 77, 83, 87,
+ 96, 99, 99, 113, 106, 105, 126, 126, 126, 126,
+ 123, 121, 113, 108, 119, 104, 110, 106, 91,
+ 98, 78, 81, 83, 84, 105, 96, 96, 118, 102,
+ 110, 111, 126, 113, 117, 119, 84, 82, 123, 88,
+ 91, 102, 119, 106, 114, 115, 96, 114, 104,
+ 106, 117, 113, 116, 107, 112, 117, 70, 0, 16,
+ 3, 34, 44, 1, 10, 19, 23, 5, 21, 39, 4, 65,
+ 22, 83, 117, 126, 126, 126, 126, 126, 126, 7,
+ 39, 30, 27, 17, 33, 18, 10, 8, 2, 72, 67, 12,
+ 3, 27, 37, 2, 5, 12, 16, 6, 14, 30, 6, 64, 22,
+ 83, 117, 126, 126, 126, 126, 126, 126 },
+
+ {
+
+ 17,
+ 3, 83, 17, 3, 83, 22, 30, 29, 13, 72, 92, 104,
+ 14, 61, 10, 34, 71, 11, 24, 67, 77, 3, 84,
+ 113, 80, 122, 126, 126, 126, 62, 8, 0, 11, 24,
+ 67, 87, 13, 11, 67, 1, 66, 65, 65, 81, 77, 90,
+ 4, 64, 66, 4, 71, 64, 79, 1, 73, 73, 79, 7, 2,
+ 22, 0, 0, 0, 8, 98, 97, 13, 80, 71, 9, 72, 86,
+ 56, 28, 20, 62, 62, 35, 37, 27, 38, 18, 17,
+ 47, 80, 73, 72, 79, 24, 65, 25, 44, 66, 67, 0,
+ 31, 64, 77, 76, 85, 20, 68, 11, 3, 0, 12, 2,
+ 14, 12, 13, 24, 32, 16, 13, 17, 62, 62, 62,
+ 62, 10, 1, 5, 16, 65, 12, 15, 7, 64, 49, 65,
+ 118, 113, 62, 9, 18, 5, 7, 12, 21, 56, 32, 14,
+ 67, 0, 2, 22, 27, 110, 5, 5, 82, 68, 80, 4, 5,
+ 74, 19, 12, 11, 96, 10, 83, 78, 2, 89, 6, 5,
+ 2, 90, 116, 70, 85, 85, 91, 104, 101, 86, 126,
+ 62, 126, 97, 98, 108, 126, 110, 125, 126, 126,
+ 126, 105, 126, 99, 62, 118, 76, 74, 82, 88,
+ 89, 86, 91, 90, 90, 107, 107, 101, 103, 104,
+ 126, 75, 78, 101, 77, 86, 87, 94, 97, 103,
+ 100, 108, 103, 87, 105, 106, 97, 85, 22, 38,
+ 24, 10, 13, 20, 11, 7, 7, 41, 27, 34, 26, 24,
+ 22, 32, 25, 33, 29, 27, 21, 50, 42, 37, 33,
+ 40, 16, 7, 5, 5, 72, 57, 28, 64, 67, 15, 3,
+ 73, 64, 2, 38, 25, 3, 68, 17, 6, 6, 66, 65,
+ 62, 2, 10, 15, 17, 16, 11, 25, 29, 14, 26, 30,
+ 43, 64, 6, 70, 5, 35, 70, 73, 69, 6, 7, 68,
+ 79, 70, 83, 69, 72, 90, 16, 15, 35, 6, 67, 15,
+ 9, 0, 8, 7, 1, 78, 1, 64, 68, 66, 19, 102, 86,
+ 16, 90, 70, 77, 71, 9, 77, 80, 4, 4, 98, 75,
+ 105, 11, 14, 21, 67, 78, 76, 79, 85, 89, 98,
+ 101, 101, 116, 108, 107, 126, 126, 126, 126,
+ 126, 124, 115, 110, 121, 105, 111, 107, 91,
+ 98, 80, 83, 85, 86, 107, 98, 98, 121, 104,
+ 112, 113, 126, 114, 118, 120, 84, 82, 124, 89,
+ 92, 103, 121, 107, 115, 116, 97, 115, 105,
+ 106, 118, 113, 116, 108, 113, 118, 69, 1, 17,
+ 4, 36, 46, 2, 11, 20, 24, 6, 22, 40, 4, 65,
+ 21, 85, 119, 126, 126, 126, 126, 126, 126, 8,
+ 39, 31, 28, 18, 34, 19, 11, 9, 3, 71, 66, 13,
+ 4, 28, 38, 3, 6, 13, 17, 6, 15, 31, 6, 64, 21,
+ 85, 119, 126, 126, 126, 126, 126, 126 },
+
+ {
+
+ 16,
+ 3, 83, 16, 3, 83, 24, 31, 29, 13, 72, 93, 106,
+ 13, 61, 10, 36, 71, 11, 25, 67, 78, 3, 85,
+ 115, 81, 125, 126, 126, 126, 62, 9, 0, 11, 25,
+ 67, 87, 14, 11, 68, 1, 66, 64, 65, 81, 77, 90,
+ 4, 64, 66, 4, 71, 64, 79, 1, 73, 73, 79, 7, 2,
+ 22, 0, 0, 0, 9, 98, 97, 13, 81, 71, 8, 72, 86,
+ 58, 29, 21, 62, 62, 36, 38, 29, 39, 19, 18,
+ 49, 80, 73, 72, 79, 24, 65, 26, 46, 66, 66, 1,
+ 33, 64, 77, 76, 86, 20, 68, 11, 4, 1, 13, 3,
+ 15, 12, 14, 25, 33, 17, 14, 18, 62, 62, 62,
+ 62, 10, 1, 5, 16, 65, 12, 15, 7, 64, 51, 65,
+ 120, 115, 62, 9, 18, 5, 7, 12, 21, 58, 33, 14,
+ 67, 0, 2, 23, 28, 111, 5, 5, 82, 69, 81, 3, 5,
+ 75, 19, 12, 11, 97, 10, 84, 79, 1, 89, 6, 4,
+ 1, 92, 119, 71, 87, 87, 93, 106, 103, 88, 126,
+ 62, 126, 99, 100, 110, 126, 112, 126, 126,
+ 126, 126, 107, 126, 101, 62, 120, 78, 76, 84,
+ 90, 91, 88, 93, 92, 92, 109, 109, 103, 105,
+ 106, 126, 75, 78, 102, 78, 87, 88, 95, 98,
+ 104, 101, 110, 104, 87, 106, 107, 98, 84, 23,
+ 39, 24, 10, 13, 21, 12, 8, 8, 43, 28, 34, 26,
+ 24, 23, 33, 26, 35, 31, 29, 22, 51, 43, 38,
+ 34, 41, 16, 7, 5, 6, 72, 58, 28, 65, 67, 15,
+ 3, 73, 64, 2, 38, 24, 2, 69, 17, 6, 6, 66, 65,
+ 62, 3, 11, 16, 19, 18, 12, 27, 31, 15, 28, 32,
+ 45, 0, 7, 70, 6, 37, 70, 73, 69, 7, 7, 68, 79,
+ 70, 84, 69, 72, 90, 16, 15, 36, 6, 67, 15, 9,
+ 0, 8, 7, 1, 79, 1, 64, 68, 66, 20, 103, 87,
+ 16, 91, 71, 78, 72, 9, 78, 81, 4, 4, 99, 75,
+ 106, 10, 13, 20, 69, 80, 78, 81, 87, 91, 101,
+ 104, 104, 119, 111, 109, 126, 126, 126, 126,
+ 126, 126, 118, 112, 124, 107, 113, 108, 92,
+ 99, 82, 85, 88, 88, 110, 101, 101, 124, 106,
+ 115, 115, 126, 116, 120, 122, 85, 83, 126, 90,
+ 93, 105, 123, 109, 117, 118, 98, 117, 106,
+ 107, 119, 114, 117, 110, 114, 119, 69, 1, 18,
+ 4, 37, 47, 2, 12, 21, 25, 6, 22, 41, 4, 65,
+ 20, 87, 122, 126, 126, 126, 126, 126, 126, 8,
+ 39, 31, 28, 18, 35, 19, 11, 9, 3, 71, 66, 13,
+ 4, 29, 39, 3, 6, 13, 17, 6, 15, 32, 6, 64, 20,
+ 87, 122, 126, 126, 126, 126, 126, 126 },
+
+ {
+
+ 15,
+ 3, 83, 15, 3, 83, 26, 33, 30, 13, 73, 95, 108,
+ 12, 61, 10, 38, 71, 12, 26, 67, 78, 3, 85,
+ 116, 82, 126, 126, 126, 126, 62, 10, 0, 12,
+ 26, 67, 88, 15, 11, 68, 1, 66, 64, 65, 81, 77,
+ 90, 4, 64, 65, 4, 71, 0, 79, 1, 73, 73, 79, 7,
+ 2, 22, 0, 0, 0, 9, 98, 97, 14, 82, 71, 7, 73,
+ 86, 61, 30, 22, 62, 62, 38, 40, 31, 41, 20,
+ 19, 51, 80, 73, 71, 79, 24, 65, 27, 48, 66,
+ 65, 1, 35, 64, 78, 77, 86, 20, 68, 12, 4, 2,
+ 14, 3, 16, 13, 15, 27, 35, 17, 15, 19, 62, 62,
+ 62, 62, 10, 1, 5, 17, 65, 12, 16, 8, 0, 53,
+ 65, 122, 117, 62, 9, 18, 5, 7, 12, 22, 59, 34,
+ 14, 67, 0, 2, 23, 29, 112, 5, 5, 83, 69, 82,
+ 3, 5, 75, 19, 12, 11, 98, 10, 85, 80, 1, 89,
+ 6, 4, 1, 94, 122, 72, 89, 89, 96, 108, 106,
+ 90, 126, 62, 126, 101, 102, 112, 126, 115,
+ 126, 126, 126, 126, 109, 126, 103, 62, 122,
+ 80, 78, 86, 93, 94, 90, 95, 94, 94, 111, 111,
+ 105, 107, 107, 126, 75, 78, 102, 79, 88, 89,
+ 96, 99, 105, 102, 112, 105, 88, 107, 108, 98,
+ 84, 25, 40, 25, 10, 13, 22, 12, 8, 8, 45, 29,
+ 34, 26, 25, 24, 34, 27, 36, 32, 31, 23, 51,
+ 43, 38, 34, 42, 17, 7, 5, 6, 72, 58, 28, 66,
+ 67, 15, 3, 73, 0, 2, 37, 24, 1, 70, 17, 6, 6,
+ 66, 65, 62, 4, 13, 17, 20, 19, 13, 29, 33, 16,
+ 29, 34, 47, 0, 8, 69, 7, 39, 69, 73, 69, 7, 8,
+ 68, 79, 70, 84, 69, 72, 91, 17, 16, 37, 6, 67,
+ 16, 9, 0, 8, 7, 1, 79, 1, 64, 68, 66, 20, 104,
+ 88, 16, 92, 72, 79, 72, 9, 79, 82, 4, 4, 100,
+ 75, 107, 9, 12, 19, 70, 82, 80, 83, 89, 94,
+ 103, 107, 107, 122, 113, 111, 126, 126, 126,
+ 126, 126, 126, 120, 114, 126, 109, 115, 110,
+ 93, 100, 84, 88, 90, 91, 113, 103, 103, 126,
+ 109, 117, 118, 126, 118, 122, 123, 86, 84,
+ 126, 92, 95, 107, 125, 110, 119, 120, 99, 118,
+ 107, 108, 120, 115, 118, 111, 115, 120, 69, 2,
+ 18, 5, 38, 48, 2, 12, 21, 25, 6, 23, 42, 4,
+ 65, 19, 89, 124, 126, 126, 126, 126, 126, 126,
+ 8, 39, 31, 28, 18, 35, 20, 11, 9, 4, 71, 66,
+ 14, 5, 30, 40, 3, 7, 13, 17, 6, 15, 32, 6, 64,
+ 19, 89, 124, 126, 126, 126, 126, 126, 126 },
+
+ },
+
+ {
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 126, 104, 10, 9, 12, 47, 62,
+ 62, 12, 1, 99, 47, 85, 102, 6, 6, 73, 6, 23, 53,
+ 62, 62, 21, 97, 126, 117, 74, 85, 102, 6, 93,
+ 88, 19, 8, 89, 103, 116, 6, 5, 84, 96, 0, 85,
+ 106, 0, 75, 90, 101, 8, 79, 75, 97, 13, 3, 22,
+ 0, 0, 0, 83, 86, 97, 72, 22, 1, 29, 88, 126,
+ 126, 91, 95, 84, 86, 89, 91, 126, 76, 103, 90,
+ 126, 80, 76, 84, 78, 8, 2, 83, 126, 79, 104, 91,
+ 126, 65, 79, 72, 92, 7, 68, 71, 98, 86, 88, 82,
+ 72, 67, 72, 89, 69, 4, 66, 6, 71, 71, 5, 74, 19,
+ 69, 1, 12, 16, 21, 22, 10, 76, 78, 83, 11, 67,
+ 90, 67, 72, 75, 80, 83, 64, 32, 64, 94, 75, 0,
+ 74, 28, 36, 91, 65, 69, 77, 66, 1, 68, 81, 33,
+ 56, 40, 74, 66, 124, 26, 62, 62, 126, 24, 21,
+ 29, 34, 32, 26, 21, 23, 30, 20, 27, 16, 8, 5, 3,
+ 19, 19, 21, 15, 7, 11, 26, 14, 5, 15, 18, 69,
+ 30, 0, 62, 62, 62, 53, 62, 62, 62, 62, 46, 38,
+ 34, 30, 48, 43, 73, 29, 32, 19, 47, 27, 27, 35,
+ 42, 43, 51, 47, 21, 93, 7, 6, 25, 126, 115, 82,
+ 1, 10, 4, 85, 89, 94, 92, 126, 100, 6, 67, 71,
+ 77, 85, 88, 104, 98, 126, 82, 15, 2, 66, 70, 75,
+ 79, 83, 92, 108, 79, 69, 75, 5, 5, 78, 83, 81,
+ 99, 81, 25, 1, 5, 4, 73, 76, 86, 83, 87, 62,
+ 126, 126, 120, 126, 114, 117, 118, 117, 113,
+ 118, 120, 124, 94, 102, 99, 106, 126, 92, 6, 86,
+ 94, 91, 77, 71, 73, 64, 81, 64, 6, 67, 68, 67,
+ 68, 77, 64, 68, 78, 8, 4, 65, 9, 19, 3, 70, 76,
+ 86, 70, 64, 70, 8, 7, 69, 65, 74, 9, 9, 76, 82,
+ 77, 77, 21, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 52, 62, 62, 62, 62, 62, 62,
+ 48, 62, 62, 46, 25, 18, 9, 79, 62, 62, 62, 62,
+ 48, 48, 38, 41, 47, 45, 35, 22, 35, 16, 1, 32,
+ 37, 39, 40, 47, 33, 34, 22, 21, 3, 11, 3, 78,
+ 123, 10, 7, 2, 30, 13, 2, 78, 74, 72, 72, 75,
+ 71, 0, 70, 75, 72, 67, 10, 4, 11, 68, 62, 62,
+ 62, 62, 56, 51, 40, 25, 64, 71, 26, 19, 14, 7,
+ 4, 0, 67, 68, 79, 78, 74, 72, 72, 75, 71, 0, 70,
+ 75, 72, 67, 10, 4, 11, 68, 62, 62, 62, 62, 56,
+ 51, 40, 25, 64 },
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 125, 102, 11, 10, 12, 46,
+ 62, 62, 13, 2, 97, 46, 84, 100, 6, 6, 71, 6,
+ 22, 52, 62, 60, 19, 97, 125, 115, 73, 84, 100,
+ 6, 92, 87, 20, 8, 88, 102, 114, 5, 4, 84, 96,
+ 0, 84, 105, 0, 75, 89, 100, 8, 78, 74, 96, 14,
+ 3, 22, 0, 0, 0, 82, 86, 97, 71, 22, 1, 29, 87,
+ 125, 124, 89, 94, 82, 84, 88, 89, 125, 75,
+ 101, 89, 124, 80, 76, 84, 78, 9, 2, 82, 124,
+ 78, 103, 90, 125, 65, 78, 72, 91, 8, 68, 70,
+ 97, 85, 87, 81, 71, 66, 71, 88, 68, 5, 66, 6,
+ 70, 70, 5, 73, 20, 68, 1, 13, 17, 22, 23, 11,
+ 76, 77, 82, 11, 67, 89, 67, 71, 74, 79, 81, 1,
+ 33, 1, 92, 75, 64, 73, 29, 37, 91, 65, 68, 77,
+ 65, 1, 67, 79, 33, 56, 41, 72, 67, 122, 25,
+ 62, 62, 125, 24, 21, 29, 34, 32, 26, 21, 23,
+ 30, 20, 27, 16, 8, 5, 3, 19, 19, 21, 15, 7,
+ 11, 26, 14, 4, 15, 18, 69, 29, 0, 62, 62, 62,
+ 52, 62, 62, 62, 62, 45, 37, 32, 29, 46, 42,
+ 74, 28, 31, 18, 46, 27, 27, 34, 41, 42, 50,
+ 46, 20, 93, 7, 6, 24, 125, 113, 80, 2, 10, 4,
+ 84, 88, 93, 91, 125, 98, 7, 66, 70, 76, 83,
+ 87, 102, 97, 124, 81, 16, 3, 65, 69, 74, 78,
+ 82, 91, 106, 78, 67, 74, 6, 5, 77, 82, 80, 98,
+ 80, 26, 2, 6, 5, 72, 75, 85, 82, 86, 62, 125,
+ 125, 118, 125, 112, 115, 116, 115, 111, 116,
+ 118, 121, 93, 101, 98, 105, 123, 91, 5, 85,
+ 93, 90, 76, 71, 72, 64, 80, 64, 6, 67, 68, 66,
+ 68, 77, 64, 68, 77, 8, 4, 65, 9, 19, 3, 70,
+ 75, 84, 70, 64, 69, 8, 7, 69, 65, 73, 9, 9,
+ 75, 81, 76, 76, 20, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 50, 62, 62,
+ 62, 62, 62, 62, 47, 60, 60, 45, 24, 17, 9, 79,
+ 62, 62, 62, 60, 46, 47, 37, 39, 46, 43, 34,
+ 20, 33, 15, 0, 31, 36, 37, 39, 46, 32, 33, 21,
+ 20, 2, 11, 3, 78, 122, 9, 6, 1, 29, 12, 1, 77,
+ 73, 71, 71, 73, 70, 1, 69, 73, 71, 66, 11, 5,
+ 12, 67, 62, 62, 62, 62, 54, 50, 38, 24, 65,
+ 70, 27, 20, 15, 8, 5, 1, 66, 67, 78, 77, 73,
+ 71, 71, 73, 70, 1, 69, 73, 71, 66, 11, 5, 12,
+ 67, 62, 62, 62, 62, 54, 50, 38, 24, 65 },
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 123, 101, 11, 10, 12, 44,
+ 60, 62, 14, 2, 95, 44, 84, 99, 6, 6, 70, 5,
+ 21, 51, 60, 57, 17, 98, 123, 114, 73, 84, 99,
+ 6, 92, 86, 20, 8, 87, 101, 113, 4, 3, 84, 96,
+ 0, 84, 104, 0, 75, 89, 100, 8, 78, 74, 95, 14,
+ 3, 22, 0, 0, 0, 81, 86, 97, 71, 21, 1, 29, 86,
+ 124, 122, 88, 93, 80, 82, 87, 88, 123, 74,
+ 100, 88, 122, 81, 76, 84, 78, 9, 2, 81, 122,
+ 78, 102, 89, 123, 65, 78, 72, 91, 8, 68, 70,
+ 96, 85, 86, 81, 71, 66, 71, 87, 67, 5, 66, 6,
+ 70, 70, 5, 73, 20, 68, 1, 13, 17, 22, 23, 11,
+ 77, 76, 81, 10, 67, 89, 67, 70, 74, 79, 80, 2,
+ 34, 3, 90, 76, 65, 73, 29, 37, 92, 65, 68, 78,
+ 64, 1, 67, 78, 33, 56, 41, 71, 68, 121, 24,
+ 62, 62, 124, 24, 21, 29, 33, 31, 26, 21, 23,
+ 29, 19, 26, 16, 8, 5, 3, 18, 18, 20, 15, 7,
+ 11, 25, 13, 3, 14, 17, 69, 28, 64, 62, 62, 62,
+ 50, 60, 62, 62, 62, 44, 35, 30, 27, 44, 40,
+ 75, 27, 30, 16, 45, 26, 26, 33, 39, 40, 48,
+ 44, 18, 93, 6, 5, 22, 124, 112, 79, 3, 10, 4,
+ 83, 87, 92, 90, 123, 97, 8, 65, 69, 75, 82,
+ 86, 101, 96, 122, 80, 16, 3, 65, 69, 73, 77,
+ 81, 90, 105, 78, 66, 73, 6, 5, 76, 81, 80, 97,
+ 79, 26, 3, 6, 5, 71, 74, 84, 81, 85, 62, 124,
+ 123, 116, 123, 111, 114, 114, 113, 110, 114,
+ 116, 119, 92, 100, 97, 104, 120, 91, 4, 85,
+ 92, 89, 76, 71, 72, 64, 80, 64, 5, 67, 68, 65,
+ 68, 77, 64, 68, 77, 8, 4, 65, 8, 18, 3, 70,
+ 75, 83, 71, 64, 68, 7, 7, 69, 65, 73, 9, 9,
+ 75, 80, 76, 76, 18, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 48, 62, 62,
+ 62, 62, 62, 61, 45, 58, 58, 43, 23, 16, 8, 79,
+ 62, 62, 62, 58, 44, 45, 35, 37, 44, 41, 32,
+ 18, 31, 13, 64, 30, 35, 35, 37, 44, 30, 31,
+ 20, 19, 1, 10, 2, 78, 121, 8, 5, 64, 28, 11,
+ 0, 77, 73, 70, 70, 72, 69, 2, 69, 72, 70, 65,
+ 11, 6, 13, 66, 62, 62, 62, 60, 52, 48, 36, 22,
+ 66, 69, 27, 20, 16, 9, 6, 1, 65, 67, 77, 77,
+ 73, 70, 70, 72, 69, 2, 69, 72, 70, 65, 11, 6,
+ 13, 66, 62, 62, 62, 60, 52, 48, 36, 22, 66 },
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 121, 99, 12, 10, 11, 42, 59,
+ 61, 14, 2, 93, 43, 84, 97, 6, 5, 69, 4, 20,
+ 50, 58, 53, 15, 99, 121, 112, 73, 84, 97, 6,
+ 91, 85, 21, 8, 86, 100, 112, 3, 2, 84, 97, 0,
+ 84, 103, 0, 76, 89, 100, 8, 78, 74, 94, 15, 3,
+ 22, 0, 0, 0, 81, 86, 97, 70, 20, 1, 28, 86,
+ 123, 120, 87, 92, 79, 81, 86, 87, 121, 73, 99,
+ 87, 120, 82, 76, 84, 78, 10, 2, 80, 120, 78,
+ 101, 88, 121, 65, 78, 72, 91, 9, 68, 69, 95,
+ 85, 85, 81, 71, 66, 70, 86, 67, 5, 66, 6, 70,
+ 70, 5, 73, 20, 68, 1, 14, 17, 23, 23, 12, 77,
+ 76, 80, 10, 67, 89, 67, 69, 74, 78, 79, 3, 35,
+ 4, 88, 76, 66, 72, 29, 37, 93, 65, 67, 78, 64,
+ 1, 67, 77, 33, 56, 41, 70, 69, 119, 23, 62,
+ 62, 122, 24, 21, 28, 32, 31, 25, 20, 23, 29,
+ 18, 25, 16, 8, 5, 2, 18, 17, 19, 14, 7, 11,
+ 24, 13, 2, 14, 16, 69, 27, 64, 62, 62, 61, 49,
+ 58, 62, 62, 62, 43, 33, 28, 26, 42, 38, 77,
+ 26, 29, 14, 44, 25, 25, 32, 38, 38, 46, 42,
+ 17, 93, 5, 4, 21, 122, 110, 77, 3, 10, 4, 82,
+ 86, 91, 89, 121, 96, 9, 64, 68, 75, 81, 85,
+ 99, 95, 120, 80, 17, 4, 64, 68, 72, 77, 81,
+ 89, 104, 78, 64, 72, 6, 5, 75, 81, 80, 96, 78,
+ 27, 4, 7, 5, 70, 74, 83, 81, 85, 62, 122, 122,
+ 115, 121, 110, 112, 113, 112, 108, 112, 114,
+ 117, 92, 99, 97, 103, 117, 91, 3, 85, 91, 88,
+ 76, 71, 72, 64, 79, 64, 4, 67, 68, 65, 68, 77,
+ 64, 68, 77, 7, 4, 65, 7, 17, 3, 70, 75, 82,
+ 72, 64, 67, 6, 7, 69, 65, 72, 9, 8, 74, 79,
+ 76, 76, 17, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 46, 62, 62, 62, 62,
+ 62, 59, 43, 56, 55, 41, 22, 15, 7, 79, 62, 62,
+ 62, 56, 42, 43, 34, 35, 42, 39, 30, 16, 29,
+ 11, 65, 29, 34, 33, 36, 42, 29, 29, 18, 17, 0,
+ 9, 1, 78, 120, 7, 3, 65, 27, 10, 64, 77, 72,
+ 70, 70, 71, 68, 3, 69, 71, 69, 64, 12, 7, 13,
+ 65, 62, 62, 62, 58, 50, 46, 34, 20, 67, 69,
+ 28, 21, 17, 9, 7, 2, 65, 66, 77, 77, 72, 70,
+ 70, 71, 68, 3, 69, 71, 69, 64, 12, 7, 13, 65,
+ 62, 62, 62, 58, 50, 46, 34, 20, 67 },
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 120, 98, 12, 10, 11, 40, 57,
+ 60, 15, 2, 92, 41, 84, 96, 5, 5, 68, 3, 18,
+ 48, 56, 50, 12, 100, 119, 111, 73, 84, 96, 5,
+ 91, 84, 21, 7, 86, 99, 110, 2, 0, 85, 97, 0,
+ 83, 102, 64, 76, 89, 100, 8, 78, 74, 94, 15,
+ 3, 22, 0, 0, 0, 80, 87, 97, 70, 19, 1, 28, 85,
+ 122, 118, 86, 91, 77, 79, 86, 86, 119, 72, 98,
+ 86, 117, 82, 77, 84, 79, 10, 1, 79, 117, 77,
+ 101, 88, 119, 65, 78, 72, 91, 9, 68, 69, 94,
+ 85, 85, 80, 71, 66, 70, 85, 66, 5, 67, 5, 70,
+ 70, 5, 73, 20, 68, 1, 14, 17, 23, 23, 12, 78,
+ 75, 80, 9, 67, 88, 67, 68, 73, 78, 77, 5, 36,
+ 6, 86, 77, 67, 72, 30, 37, 94, 65, 67, 79, 0,
+ 1, 67, 76, 33, 56, 41, 68, 70, 118, 22, 62,
+ 62, 121, 23, 21, 28, 32, 30, 25, 20, 23, 28,
+ 17, 24, 15, 8, 5, 2, 17, 17, 18, 14, 6, 10,
+ 23, 12, 1, 13, 15, 69, 25, 65, 62, 62, 59, 47,
+ 57, 62, 62, 62, 42, 31, 25, 24, 40, 36, 78,
+ 24, 28, 13, 43, 24, 24, 30, 36, 36, 44, 41,
+ 15, 93, 4, 3, 19, 121, 109, 76, 4, 10, 4, 81,
+ 85, 90, 89, 119, 94, 10, 64, 68, 74, 79, 84,
+ 98, 94, 117, 79, 17, 4, 64, 68, 71, 76, 80,
+ 89, 103, 78, 0, 71, 6, 5, 74, 80, 80, 95, 77,
+ 27, 5, 7, 5, 69, 73, 82, 80, 84, 62, 121, 120,
+ 113, 120, 109, 111, 111, 110, 107, 111, 112,
+ 114, 91, 98, 96, 102, 114, 90, 2, 84, 90, 88,
+ 76, 71, 72, 65, 79, 65, 3, 67, 68, 64, 68, 77,
+ 64, 68, 76, 7, 3, 65, 6, 16, 2, 70, 75, 81,
+ 73, 65, 67, 6, 6, 69, 65, 72, 8, 8, 74, 79,
+ 76, 76, 15, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 44, 62, 62, 62, 62,
+ 62, 57, 41, 54, 53, 39, 20, 14, 6, 79, 62, 62,
+ 62, 54, 40, 41, 32, 33, 40, 37, 28, 14, 26,
+ 10, 67, 28, 33, 30, 34, 41, 27, 27, 17, 16,
+ 64, 8, 0, 78, 119, 5, 2, 67, 25, 9, 65, 77,
+ 72, 69, 69, 70, 68, 3, 68, 70, 68, 0, 12, 8,
+ 14, 65, 62, 62, 60, 56, 48, 44, 31, 18, 69,
+ 68, 28, 21, 17, 10, 7, 2, 64, 66, 76, 77, 72,
+ 69, 69, 70, 68, 3, 68, 70, 68, 0, 12, 8, 14,
+ 65, 62, 62, 60, 56, 48, 44, 31, 18, 69 },
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 118, 96, 12, 10, 10, 38, 56,
+ 59, 16, 2, 90, 39, 83, 94, 5, 5, 67, 2, 17,
+ 47, 54, 47, 10, 100, 117, 110, 73, 83, 94, 5,
+ 91, 83, 21, 7, 85, 98, 109, 1, 64, 85, 97, 0,
+ 83, 101, 64, 76, 89, 100, 8, 77, 74, 93, 16,
+ 3, 22, 0, 0, 0, 80, 87, 97, 69, 18, 1, 27, 85,
+ 120, 115, 85, 90, 76, 78, 85, 85, 117, 71, 97,
+ 85, 115, 83, 77, 84, 79, 10, 1, 78, 115, 77,
+ 100, 87, 117, 65, 78, 72, 90, 9, 68, 68, 93,
+ 84, 84, 80, 71, 65, 69, 84, 66, 5, 67, 5, 69,
+ 70, 5, 73, 21, 68, 1, 15, 18, 23, 23, 12, 78,
+ 75, 79, 9, 67, 88, 67, 67, 73, 77, 76, 6, 37,
+ 7, 84, 77, 68, 71, 30, 37, 95, 65, 66, 79, 1,
+ 1, 67, 74, 33, 56, 41, 67, 71, 116, 21, 62,
+ 62, 120, 23, 21, 27, 31, 30, 25, 19, 23, 28,
+ 16, 23, 15, 8, 5, 2, 17, 16, 17, 13, 6, 10,
+ 22, 12, 0, 12, 15, 69, 24, 65, 62, 62, 58, 46,
+ 55, 62, 62, 62, 41, 29, 23, 23, 38, 34, 79,
+ 23, 27, 11, 42, 23, 23, 29, 35, 34, 42, 39,
+ 14, 93, 3, 2, 17, 119, 107, 75, 4, 10, 4, 80,
+ 84, 89, 88, 117, 93, 11, 0, 67, 73, 78, 83,
+ 96, 93, 115, 78, 18, 5, 0, 67, 70, 75, 80, 88,
+ 102, 77, 1, 70, 6, 5, 73, 80, 79, 94, 76, 27,
+ 6, 7, 5, 68, 72, 81, 80, 83, 62, 120, 119,
+ 112, 118, 108, 109, 110, 108, 105, 109, 110,
+ 112, 90, 97, 95, 101, 111, 90, 1, 84, 89, 87,
+ 76, 71, 72, 65, 78, 65, 2, 67, 68, 0, 68, 77,
+ 64, 68, 76, 6, 3, 65, 5, 15, 2, 70, 75, 80,
+ 73, 65, 66, 5, 6, 69, 65, 72, 8, 7, 74, 78,
+ 76, 76, 14, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 42, 62, 62, 62, 62,
+ 62, 55, 40, 52, 50, 37, 19, 13, 5, 79, 62, 62,
+ 62, 52, 38, 39, 31, 31, 38, 35, 26, 12, 24, 8,
+ 68, 27, 32, 28, 33, 39, 26, 25, 16, 15, 65, 7,
+ 64, 78, 118, 4, 1, 68, 24, 8, 66, 77, 71, 69,
+ 68, 69, 67, 4, 68, 69, 67, 1, 13, 9, 14, 64,
+ 62, 62, 58, 54, 46, 42, 29, 16, 70, 68, 29,
+ 22, 18, 11, 8, 3, 64, 66, 75, 77, 71, 69, 68,
+ 69, 67, 4, 68, 69, 67, 1, 13, 9, 14, 64, 62,
+ 62, 58, 54, 46, 42, 29, 16, 70 },
+
+ {
+
+ 62,
+ 9, 75, 62, 9, 75, 116, 95, 13, 10, 10, 37, 54,
+ 58, 16, 3, 88, 38, 83, 93, 5, 4, 66, 1, 16,
+ 46, 53, 43, 8, 101, 115, 108, 73, 83, 93, 5,
+ 90, 82, 22, 7, 84, 97, 108, 64, 65, 85, 98, 0,
+ 83, 101, 64, 77, 88, 100, 7, 77, 74, 92, 16,
+ 3, 22, 0, 0, 0, 79, 87, 97, 69, 18, 0, 27, 84,
+ 119, 113, 84, 89, 74, 76, 84, 84, 115, 70, 96,
+ 85, 113, 84, 77, 84, 79, 11, 1, 77, 113, 77,
+ 99, 86, 115, 65, 78, 72, 90, 10, 69, 68, 93,
+ 84, 83, 80, 70, 65, 69, 83, 65, 5, 67, 5, 69,
+ 70, 5, 73, 21, 68, 1, 15, 18, 24, 24, 13, 79,
+ 74, 78, 8, 67, 88, 67, 66, 73, 77, 75, 7, 37,
+ 9, 83, 78, 69, 71, 30, 37, 95, 66, 66, 80, 1,
+ 0, 66, 73, 33, 56, 42, 66, 72, 115, 20, 62,
+ 62, 118, 23, 21, 27, 30, 29, 24, 19, 22, 27,
+ 16, 23, 15, 7, 5, 1, 16, 15, 16, 13, 6, 10,
+ 22, 11, 65, 12, 14, 69, 23, 66, 62, 62, 56,
+ 44, 53, 62, 62, 62, 39, 27, 21, 21, 36, 32,
+ 81, 22, 25, 9, 40, 22, 22, 28, 33, 32, 40, 37,
+ 12, 93, 2, 1, 16, 118, 106, 73, 5, 10, 4, 79,
+ 84, 89, 87, 116, 92, 12, 1, 66, 73, 77, 82,
+ 95, 92, 113, 78, 18, 5, 0, 67, 69, 75, 79, 87,
+ 101, 77, 3, 69, 6, 5, 73, 79, 79, 94, 76, 28,
+ 6, 8, 5, 67, 72, 81, 79, 83, 62, 118, 117,
+ 110, 116, 106, 108, 108, 107, 104, 107, 108,
+ 110, 90, 96, 95, 101, 108, 90, 0, 84, 89, 86,
+ 76, 71, 72, 65, 78, 65, 1, 67, 68, 0, 68, 77,
+ 64, 68, 76, 6, 3, 65, 4, 14, 2, 70, 75, 79,
+ 74, 65, 65, 4, 6, 69, 65, 71, 8, 7, 73, 77,
+ 76, 76, 12, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 40, 62, 62, 62, 62,
+ 62, 52, 38, 50, 48, 35, 18, 12, 4, 79, 62, 62,
+ 62, 50, 36, 38, 29, 29, 36, 32, 24, 10, 22, 6,
+ 69, 26, 30, 26, 31, 37, 24, 23, 14, 13, 66, 6,
+ 65, 79, 117, 3, 64, 70, 23, 6, 67, 76, 71, 68,
+ 68, 68, 66, 5, 68, 68, 66, 2, 13, 10, 15, 0,
+ 62, 62, 56, 52, 44, 40, 27, 14, 71, 67, 29,
+ 22, 19, 11, 9, 3, 0, 65, 75, 76, 71, 68, 68,
+ 68, 66, 5, 68, 68, 66, 2, 13, 10, 15, 0, 62,
+ 62, 56, 52, 44, 40, 27, 14, 71 },
+
+ {
+
+ 62,
+ 9, 75, 62, 9, 75, 114, 93, 13, 10, 9, 35, 53,
+ 57, 17, 3, 87, 36, 83, 91, 4, 4, 65, 0, 15,
+ 45, 51, 40, 5, 102, 113, 107, 73, 83, 91, 4,
+ 90, 81, 22, 7, 84, 96, 106, 65, 66, 85, 98, 0,
+ 82, 100, 65, 77, 88, 100, 7, 77, 74, 91, 17,
+ 3, 22, 0, 0, 0, 79, 87, 97, 68, 17, 0, 26, 84,
+ 118, 111, 83, 88, 73, 75, 83, 83, 113, 69, 95,
+ 84, 110, 84, 78, 84, 80, 11, 1, 76, 110, 76,
+ 99, 86, 113, 65, 78, 72, 90, 10, 69, 67, 92,
+ 84, 82, 79, 70, 65, 68, 82, 65, 5, 68, 5, 69,
+ 70, 5, 73, 21, 68, 1, 16, 18, 24, 24, 13, 79,
+ 74, 78, 8, 67, 87, 67, 65, 72, 76, 73, 9, 38,
+ 10, 81, 78, 70, 70, 31, 37, 96, 66, 65, 80, 2,
+ 0, 66, 72, 33, 56, 42, 64, 73, 113, 19, 62,
+ 62, 117, 23, 21, 26, 30, 29, 24, 18, 22, 27,
+ 15, 22, 15, 7, 5, 1, 16, 15, 15, 12, 6, 10,
+ 21, 11, 66, 11, 13, 69, 22, 66, 62, 62, 54,
+ 43, 52, 62, 62, 62, 38, 25, 19, 20, 34, 30,
+ 82, 21, 24, 8, 39, 21, 21, 26, 32, 30, 38, 36,
+ 11, 93, 1, 0, 14, 116, 104, 72, 5, 10, 4, 78,
+ 83, 88, 87, 114, 90, 13, 2, 66, 72, 75, 81,
+ 93, 91, 110, 77, 19, 6, 1, 66, 68, 74, 79, 86,
+ 100, 77, 4, 68, 6, 5, 72, 79, 79, 93, 75, 28,
+ 7, 8, 5, 66, 71, 80, 79, 82, 62, 117, 116,
+ 109, 115, 105, 106, 107, 105, 102, 105, 106,
+ 107, 89, 95, 94, 100, 105, 89, 64, 83, 88, 85,
+ 76, 71, 72, 65, 77, 66, 0, 67, 68, 1, 68, 77,
+ 64, 68, 75, 5, 2, 65, 3, 13, 1, 70, 75, 78,
+ 75, 66, 64, 4, 5, 69, 65, 71, 7, 6, 73, 77,
+ 76, 76, 11, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 38, 62, 62, 62, 62,
+ 62, 50, 36, 48, 45, 33, 17, 11, 3, 79, 62, 61,
+ 62, 48, 34, 36, 28, 27, 34, 30, 22, 8, 20, 5,
+ 71, 25, 29, 24, 30, 36, 23, 21, 13, 12, 67, 5,
+ 66, 79, 116, 1, 65, 71, 21, 5, 68, 76, 70, 68,
+ 67, 67, 65, 5, 67, 67, 65, 3, 14, 11, 15, 0,
+ 62, 60, 54, 50, 42, 38, 24, 12, 72, 67, 30,
+ 23, 19, 12, 10, 4, 0, 65, 74, 76, 70, 68, 67,
+ 67, 65, 5, 67, 67, 65, 3, 14, 11, 15, 0, 62,
+ 60, 54, 50, 42, 38, 24, 12, 72 },
+
+ {
+
+ 62,
+ 8, 75, 62, 8, 75, 113, 92, 13, 10, 9, 33, 51,
+ 56, 17, 3, 85, 34, 83, 90, 4, 3, 64, 64, 13,
+ 43, 49, 36, 3, 103, 111, 106, 73, 83, 90, 4,
+ 90, 81, 22, 6, 83, 95, 105, 66, 68, 86, 99, 0,
+ 82, 99, 65, 78, 88, 100, 7, 77, 74, 91, 17, 3,
+ 22, 0, 0, 0, 78, 88, 97, 68, 16, 0, 26, 83,
+ 117, 109, 82, 88, 71, 73, 83, 82, 111, 69, 94,
+ 83, 108, 85, 78, 85, 80, 11, 0, 76, 108, 76,
+ 98, 85, 112, 65, 78, 72, 90, 10, 69, 67, 91,
+ 84, 82, 79, 70, 65, 68, 81, 64, 5, 68, 4, 69,
+ 70, 4, 73, 21, 68, 1, 16, 18, 24, 24, 13, 80,
+ 73, 77, 7, 67, 87, 67, 64, 72, 76, 72, 10, 39,
+ 12, 79, 79, 71, 70, 31, 37, 97, 66, 65, 81, 2,
+ 0, 66, 71, 33, 56, 42, 0, 74, 112, 18, 59, 62,
+ 116, 22, 21, 26, 29, 28, 23, 18, 22, 26, 14,
+ 21, 14, 7, 4, 0, 15, 14, 14, 12, 5, 9, 20, 10,
+ 67, 10, 12, 69, 20, 67, 62, 62, 52, 41, 50,
+ 60, 62, 62, 37, 23, 16, 18, 31, 28, 84, 19,
+ 23, 6, 38, 20, 20, 25, 30, 28, 36, 34, 9, 93,
+ 0, 64, 12, 115, 103, 71, 6, 10, 4, 78, 82, 87,
+ 86, 112, 89, 13, 2, 65, 72, 74, 80, 92, 90,
+ 108, 77, 19, 6, 1, 66, 68, 74, 78, 86, 99, 77,
+ 5, 67, 6, 5, 71, 78, 79, 92, 74, 28, 8, 8, 5,
+ 65, 71, 79, 78, 82, 62, 116, 114, 107, 113,
+ 104, 105, 105, 104, 101, 104, 104, 105, 89,
+ 94, 94, 99, 102, 89, 65, 83, 87, 85, 76, 71,
+ 72, 66, 77, 66, 64, 67, 68, 1, 68, 77, 65, 68,
+ 75, 5, 2, 66, 2, 12, 1, 71, 75, 77, 76, 66,
+ 64, 3, 5, 69, 66, 71, 7, 6, 73, 76, 76, 76, 9,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 61, 36, 62, 62, 62, 62, 61, 48, 34,
+ 45, 43, 31, 15, 9, 2, 79, 61, 59, 62, 46, 31,
+ 34, 26, 24, 32, 28, 20, 6, 17, 3, 72, 23, 28,
+ 21, 28, 34, 21, 19, 11, 10, 68, 4, 67, 79,
+ 115, 0, 67, 73, 20, 4, 69, 76, 70, 67, 67, 66,
+ 65, 6, 67, 66, 65, 4, 14, 11, 16, 1, 61, 58,
+ 52, 48, 40, 36, 22, 10, 74, 66, 30, 23, 20,
+ 12, 10, 4, 1, 65, 74, 76, 70, 67, 67, 66, 65,
+ 6, 67, 66, 65, 4, 14, 11, 16, 1, 61, 58, 52,
+ 48, 40, 36, 22, 10, 74 },
+
+ {
+
+ 62,
+ 8, 75, 62, 8, 75, 111, 91, 14, 10, 9, 31, 49,
+ 56, 18, 3, 83, 33, 82, 88, 4, 3, 0, 64, 12,
+ 42, 47, 33, 1, 103, 109, 104, 72, 82, 88, 4,
+ 89, 80, 23, 6, 82, 94, 104, 67, 69, 86, 99, 0,
+ 82, 98, 65, 78, 88, 100, 7, 76, 73, 90, 17, 3,
+ 22, 0, 0, 0, 77, 88, 97, 68, 15, 0, 26, 82,
+ 115, 106, 81, 87, 69, 71, 82, 81, 109, 68, 92,
+ 82, 106, 86, 78, 85, 80, 12, 0, 75, 106, 76,
+ 97, 84, 110, 65, 77, 72, 89, 11, 69, 66, 90,
+ 83, 81, 79, 70, 64, 67, 80, 0, 5, 68, 4, 68,
+ 69, 4, 73, 22, 68, 1, 16, 19, 25, 24, 14, 80,
+ 72, 76, 6, 67, 87, 67, 0, 72, 75, 71, 11, 40,
+ 14, 77, 80, 72, 69, 31, 38, 98, 66, 65, 81, 3,
+ 0, 66, 69, 33, 56, 42, 1, 75, 111, 17, 57, 62,
+ 114, 22, 21, 26, 28, 28, 23, 18, 22, 26, 13,
+ 20, 14, 7, 4, 0, 15, 13, 14, 12, 5, 9, 19, 9,
+ 68, 10, 12, 69, 19, 67, 62, 62, 51, 40, 48,
+ 58, 62, 62, 36, 21, 14, 17, 29, 27, 85, 18,
+ 22, 4, 37, 19, 19, 24, 28, 27, 34, 32, 8, 93,
+ 0, 65, 11, 113, 101, 69, 7, 10, 4, 77, 81, 86,
+ 85, 110, 88, 14, 3, 64, 71, 73, 79, 91, 89,
+ 106, 76, 20, 7, 2, 66, 67, 73, 77, 85, 97, 76,
+ 7, 66, 7, 5, 70, 77, 78, 91, 73, 29, 9, 9, 6,
+ 64, 70, 78, 77, 81, 62, 114, 112, 105, 111,
+ 103, 104, 103, 102, 99, 102, 102, 103, 88, 93,
+ 93, 98, 98, 89, 66, 83, 86, 84, 75, 71, 72,
+ 66, 77, 66, 65, 67, 68, 2, 68, 77, 65, 68, 75,
+ 5, 2, 66, 2, 11, 1, 71, 74, 75, 76, 66, 0, 2,
+ 5, 69, 66, 70, 7, 6, 72, 75, 75, 75, 7, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 58, 34, 62, 62, 62, 62, 58, 46, 33, 43,
+ 41, 30, 14, 8, 1, 79, 59, 57, 60, 44, 29, 32,
+ 25, 22, 30, 26, 18, 4, 15, 1, 73, 22, 27, 19,
+ 27, 32, 20, 17, 10, 9, 69, 3, 67, 79, 114, 64,
+ 68, 75, 19, 3, 70, 76, 69, 66, 66, 64, 64, 7,
+ 67, 65, 64, 5, 15, 12, 17, 2, 60, 57, 50, 46,
+ 38, 34, 20, 8, 75, 65, 30, 24, 21, 13, 11, 5,
+ 2, 64, 73, 76, 69, 66, 66, 64, 64, 7, 67, 65,
+ 64, 5, 15, 12, 17, 2, 60, 57, 50, 46, 38, 34,
+ 20, 8, 75 },
+
+ {
+
+ 62,
+ 8, 75, 62, 8, 75, 109, 89, 14, 10, 8, 29, 48,
+ 55, 19, 3, 82, 31, 82, 87, 3, 3, 1, 65, 11,
+ 41, 45, 30, 65, 104, 107, 103, 72, 82, 87, 3,
+ 89, 79, 23, 6, 82, 93, 102, 68, 70, 86, 99, 0,
+ 81, 97, 66, 78, 88, 100, 7, 76, 73, 89, 18, 3,
+ 22, 0, 0, 0, 77, 88, 97, 67, 14, 0, 25, 82,
+ 114, 104, 80, 86, 68, 70, 81, 80, 107, 67, 91,
+ 81, 103, 86, 79, 85, 81, 12, 0, 74, 103, 75,
+ 97, 84, 108, 65, 77, 72, 89, 11, 69, 66, 89,
+ 83, 80, 78, 70, 64, 67, 79, 0, 5, 69, 4, 68,
+ 69, 4, 73, 22, 68, 1, 17, 19, 25, 24, 14, 81,
+ 72, 76, 6, 67, 86, 67, 1, 71, 75, 69, 13, 41,
+ 15, 75, 80, 73, 69, 32, 38, 99, 66, 64, 82, 4,
+ 0, 66, 68, 33, 56, 42, 3, 76, 109, 16, 54, 62,
+ 113, 22, 21, 25, 28, 27, 23, 17, 22, 25, 12,
+ 19, 14, 7, 4, 0, 14, 13, 13, 11, 5, 9, 18, 9,
+ 69, 9, 11, 69, 18, 68, 60, 62, 49, 38, 47, 56,
+ 62, 62, 35, 19, 12, 15, 27, 25, 86, 17, 21, 3,
+ 36, 18, 18, 22, 27, 25, 32, 31, 6, 93, 64, 66,
+ 9, 112, 100, 68, 7, 10, 4, 76, 80, 85, 85,
+ 108, 86, 15, 4, 64, 70, 71, 78, 89, 88, 103,
+ 75, 20, 7, 2, 65, 66, 72, 77, 84, 96, 76, 8,
+ 65, 7, 5, 69, 77, 78, 90, 72, 29, 10, 9, 6, 0,
+ 69, 77, 77, 80, 62, 113, 111, 104, 110, 102,
+ 102, 102, 100, 98, 100, 100, 100, 87, 92, 92,
+ 97, 95, 88, 67, 82, 85, 83, 75, 71, 72, 66,
+ 76, 67, 66, 67, 68, 3, 68, 77, 65, 68, 74, 4,
+ 1, 66, 1, 10, 0, 71, 74, 74, 77, 67, 1, 2, 4,
+ 69, 66, 70, 6, 5, 72, 75, 75, 75, 6, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 56, 32, 62, 62, 62, 62, 55, 44, 31, 41, 38,
+ 28, 13, 7, 0, 79, 57, 54, 57, 42, 27, 30, 23,
+ 20, 28, 24, 16, 2, 13, 0, 75, 21, 26, 17, 25,
+ 31, 18, 15, 9, 8, 70, 2, 68, 79, 113, 66, 69,
+ 76, 17, 2, 71, 76, 69, 66, 65, 0, 0, 7, 66,
+ 64, 0, 6, 15, 13, 17, 2, 60, 55, 48, 44, 36,
+ 32, 17, 6, 76, 65, 31, 24, 21, 14, 12, 5, 2,
+ 64, 72, 76, 69, 66, 65, 0, 0, 7, 66, 64, 0, 6,
+ 15, 13, 17, 2, 60, 55, 48, 44, 36, 32, 17, 6,
+ 76 },
+
+ {
+
+ 62,
+ 8, 76, 62, 8, 76, 107, 88, 15, 10, 8, 28, 46,
+ 54, 19, 4, 80, 30, 82, 85, 3, 2, 2, 66, 10,
+ 40, 44, 26, 67, 105, 105, 101, 72, 82, 85, 3,
+ 88, 78, 24, 6, 81, 92, 101, 70, 71, 86, 100,
+ 0, 81, 97, 66, 79, 87, 100, 6, 76, 73, 88, 18,
+ 3, 22, 0, 0, 0, 76, 88, 97, 67, 14, 64, 25,
+ 81, 113, 102, 79, 85, 66, 68, 80, 79, 105, 66,
+ 90, 81, 101, 87, 79, 85, 81, 13, 0, 73, 101,
+ 75, 96, 83, 106, 65, 77, 72, 89, 12, 70, 65,
+ 89, 83, 79, 78, 69, 64, 66, 78, 1, 5, 69, 4,
+ 68, 69, 4, 73, 22, 68, 1, 17, 19, 26, 25, 15,
+ 81, 71, 75, 5, 67, 86, 67, 2, 71, 74, 68, 14,
+ 41, 17, 74, 81, 74, 68, 32, 38, 99, 67, 64,
+ 82, 4, 64, 65, 67, 33, 56, 43, 4, 77, 108, 15,
+ 51, 62, 111, 22, 21, 25, 27, 27, 22, 17, 21,
+ 25, 12, 19, 14, 6, 4, 64, 14, 12, 12, 11, 5,
+ 9, 18, 8, 71, 9, 10, 69, 17, 68, 57, 62, 47,
+ 37, 45, 54, 62, 61, 33, 17, 10, 14, 25, 23,
+ 88, 16, 19, 1, 34, 17, 17, 21, 25, 23, 30, 29,
+ 5, 93, 65, 67, 8, 110, 98, 66, 8, 10, 4, 75,
+ 80, 85, 84, 107, 85, 16, 5, 0, 70, 70, 77, 88,
+ 87, 101, 75, 21, 8, 3, 65, 65, 72, 76, 83, 95,
+ 76, 10, 64, 7, 5, 69, 76, 78, 90, 72, 30, 10,
+ 10, 6, 1, 69, 77, 76, 80, 62, 111, 109, 102,
+ 108, 100, 101, 100, 99, 96, 98, 98, 98, 87,
+ 91, 92, 97, 92, 88, 68, 82, 85, 82, 75, 71,
+ 72, 66, 76, 67, 67, 67, 68, 3, 68, 77, 65, 68,
+ 74, 4, 1, 66, 0, 9, 0, 71, 74, 73, 78, 67, 2,
+ 1, 4, 69, 66, 69, 6, 5, 71, 74, 75, 75, 4, 62,
+ 61, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 53, 30, 62, 62, 62, 62, 53, 41, 29, 39,
+ 36, 26, 12, 6, 64, 79, 55, 52, 55, 40, 25, 29,
+ 22, 18, 26, 21, 14, 0, 11, 65, 76, 20, 24, 15,
+ 24, 29, 17, 13, 7, 6, 71, 1, 69, 80, 112, 67,
+ 71, 78, 16, 0, 72, 75, 68, 65, 65, 1, 1, 8,
+ 66, 0, 1, 7, 16, 14, 18, 3, 59, 53, 46, 42,
+ 34, 30, 15, 4, 77, 64, 31, 25, 22, 14, 13, 6,
+ 3, 0, 72, 75, 68, 65, 65, 1, 1, 8, 66, 0, 1,
+ 7, 16, 14, 18, 3, 59, 53, 46, 42, 34, 30, 15,
+ 4, 77 },
+
+ {
+
+ 62,
+ 8, 76, 62, 8, 76, 106, 86, 15, 10, 7, 26, 45,
+ 53, 20, 4, 78, 28, 82, 84, 3, 2, 3, 67, 8, 38,
+ 42, 23, 69, 106, 103, 100, 72, 82, 84, 3, 88,
+ 77, 24, 5, 80, 91, 100, 71, 73, 87, 100, 0,
+ 81, 96, 66, 79, 87, 100, 6, 76, 73, 88, 19, 3,
+ 22, 0, 0, 0, 76, 89, 97, 66, 13, 64, 24, 81,
+ 112, 100, 78, 84, 65, 67, 80, 78, 103, 65, 89,
+ 80, 99, 88, 79, 85, 81, 13, 64, 72, 99, 75,
+ 95, 82, 104, 65, 77, 72, 89, 12, 70, 65, 88,
+ 83, 79, 78, 69, 64, 66, 77, 1, 5, 69, 3, 68,
+ 69, 4, 73, 22, 68, 1, 18, 19, 26, 25, 15, 82,
+ 71, 74, 5, 67, 86, 67, 3, 71, 74, 67, 15, 42,
+ 18, 72, 81, 75, 68, 32, 38, 100, 67, 0, 83, 5,
+ 64, 65, 66, 33, 56, 43, 5, 78, 106, 14, 48,
+ 60, 110, 21, 21, 24, 26, 26, 22, 16, 21, 24,
+ 11, 18, 13, 6, 4, 64, 13, 11, 11, 10, 4, 8,
+ 17, 8, 72, 8, 9, 69, 15, 69, 55, 62, 45, 35,
+ 43, 52, 62, 58, 32, 15, 7, 12, 23, 21, 89, 14,
+ 18, 64, 33, 16, 16, 20, 24, 21, 28, 27, 3, 93,
+ 66, 68, 6, 109, 97, 65, 8, 10, 4, 74, 79, 84,
+ 83, 105, 84, 17, 5, 1, 69, 69, 76, 86, 86, 99,
+ 74, 21, 8, 3, 64, 64, 71, 76, 83, 94, 76, 11,
+ 0, 7, 5, 68, 76, 78, 89, 71, 30, 11, 10, 6, 2,
+ 68, 76, 76, 79, 62, 110, 108, 101, 106, 99,
+ 99, 99, 97, 95, 97, 96, 96, 86, 90, 91, 96,
+ 89, 88, 69, 82, 84, 82, 75, 71, 72, 67, 75,
+ 67, 68, 67, 68, 4, 68, 77, 65, 68, 74, 3, 1,
+ 66, 64, 8, 0, 71, 74, 72, 79, 67, 2, 0, 4, 69,
+ 66, 69, 6, 4, 71, 73, 75, 75, 3, 62, 60, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 50,
+ 28, 62, 62, 62, 62, 50, 39, 27, 37, 33, 24,
+ 10, 5, 65, 79, 52, 50, 53, 38, 23, 27, 20, 16,
+ 24, 19, 12, 65, 8, 67, 77, 19, 23, 12, 22, 27,
+ 15, 11, 6, 5, 72, 0, 70, 80, 111, 68, 72, 79,
+ 15, 64, 73, 75, 68, 65, 64, 2, 1, 9, 66, 1, 2,
+ 8, 16, 15, 18, 4, 59, 51, 44, 40, 32, 28, 13,
+ 2, 79, 64, 32, 25, 23, 15, 13, 6, 3, 0, 71,
+ 75, 68, 65, 64, 2, 1, 9, 66, 1, 2, 8, 16, 15,
+ 18, 4, 59, 51, 44, 40, 32, 28, 13, 2, 79 },
+
+ {
+
+ 62,
+ 8, 76, 62, 8, 76, 104, 85, 15, 10, 7, 24, 43,
+ 52, 21, 4, 77, 26, 81, 82, 2, 2, 4, 68, 7, 37,
+ 40, 20, 72, 106, 101, 99, 72, 81, 82, 2, 88,
+ 76, 24, 5, 80, 90, 98, 72, 74, 87, 100, 0, 80,
+ 95, 67, 79, 87, 100, 6, 75, 73, 87, 19, 3, 22,
+ 0, 0, 0, 75, 89, 97, 66, 12, 64, 24, 80, 110,
+ 97, 77, 83, 0, 65, 79, 77, 101, 64, 88, 79,
+ 96, 88, 80, 85, 82, 13, 64, 71, 96, 74, 95,
+ 82, 102, 65, 77, 72, 88, 12, 70, 64, 87, 82,
+ 78, 77, 69, 0, 65, 76, 2, 5, 70, 3, 67, 69, 4,
+ 73, 23, 68, 1, 18, 20, 26, 25, 15, 82, 70, 74,
+ 4, 67, 85, 67, 4, 70, 73, 65, 17, 43, 20, 70,
+ 82, 76, 67, 33, 38, 101, 67, 0, 83, 6, 64, 65,
+ 64, 33, 56, 43, 7, 79, 105, 13, 46, 57, 109,
+ 21, 21, 24, 26, 26, 22, 16, 21, 24, 10, 17,
+ 13, 6, 4, 64, 13, 11, 10, 10, 4, 8, 16, 7, 73,
+ 7, 9, 69, 14, 69, 53, 62, 44, 34, 42, 50, 62,
+ 56, 31, 13, 5, 11, 21, 19, 90, 13, 17, 65, 32,
+ 15, 15, 18, 22, 19, 26, 26, 2, 93, 67, 69, 4,
+ 107, 95, 64, 9, 10, 4, 73, 78, 83, 83, 103,
+ 82, 18, 6, 1, 68, 67, 75, 85, 85, 96, 73, 22,
+ 9, 4, 64, 0, 70, 75, 82, 93, 75, 12, 1, 7, 5,
+ 67, 75, 77, 88, 70, 30, 12, 10, 6, 3, 67, 75,
+ 75, 78, 62, 109, 106, 99, 105, 98, 98, 97, 95,
+ 93, 95, 94, 93, 85, 89, 90, 95, 86, 87, 70,
+ 81, 83, 81, 75, 71, 72, 67, 75, 68, 69, 67,
+ 68, 5, 68, 77, 65, 68, 73, 3, 0, 66, 65, 7,
+ 64, 71, 74, 71, 79, 68, 3, 0, 3, 69, 66, 69,
+ 5, 4, 71, 73, 75, 75, 1, 62, 59, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 60, 48, 26, 62,
+ 62, 62, 62, 47, 37, 26, 35, 31, 22, 9, 4, 66,
+ 79, 50, 47, 50, 36, 21, 25, 19, 14, 22, 17,
+ 10, 67, 6, 68, 79, 18, 22, 10, 21, 26, 14, 9,
+ 5, 4, 73, 64, 71, 80, 110, 70, 73, 81, 13, 65,
+ 74, 75, 67, 64, 0, 3, 2, 9, 65, 2, 3, 9, 17,
+ 16, 19, 4, 58, 49, 42, 38, 30, 26, 10, 0, 80,
+ 0, 32, 26, 23, 16, 14, 7, 4, 0, 70, 75, 67,
+ 64, 0, 3, 2, 9, 65, 2, 3, 9, 17, 16, 19, 4,
+ 58, 49, 42, 38, 30, 26, 10, 0, 80 },
+
+ {
+
+ 61,
+ 8, 76, 61, 8, 76, 102, 83, 16, 10, 6, 22, 42,
+ 51, 21, 4, 75, 25, 81, 81, 2, 1, 5, 69, 6, 36,
+ 38, 16, 74, 107, 99, 97, 72, 81, 81, 2, 87,
+ 75, 25, 5, 79, 89, 97, 73, 75, 87, 101, 0, 80,
+ 94, 67, 80, 87, 100, 6, 75, 73, 86, 20, 3, 22,
+ 0, 0, 0, 75, 89, 97, 65, 11, 64, 23, 80, 109,
+ 95, 76, 82, 1, 64, 78, 76, 99, 0, 87, 78, 94,
+ 89, 80, 85, 82, 14, 64, 70, 94, 74, 94, 81,
+ 100, 65, 77, 72, 88, 13, 70, 64, 86, 82, 77,
+ 77, 69, 0, 65, 75, 2, 5, 70, 3, 67, 69, 4, 73,
+ 23, 68, 1, 19, 20, 27, 25, 16, 83, 70, 73, 4,
+ 67, 85, 67, 5, 70, 73, 64, 18, 44, 21, 68, 82,
+ 77, 67, 33, 38, 102, 67, 1, 84, 6, 64, 65, 0,
+ 33, 56, 43, 8, 80, 103, 12, 43, 54, 107, 21,
+ 21, 23, 25, 25, 21, 15, 21, 23, 9, 16, 13, 6,
+ 4, 65, 12, 10, 9, 9, 4, 8, 15, 7, 74, 7, 8,
+ 69, 13, 70, 51, 60, 42, 32, 40, 48, 62, 53,
+ 30, 11, 3, 9, 19, 17, 92, 12, 16, 67, 31, 14,
+ 14, 17, 21, 17, 24, 24, 0, 93, 68, 70, 3, 106,
+ 94, 1, 9, 10, 4, 72, 77, 82, 82, 101, 81, 19,
+ 7, 2, 68, 66, 74, 83, 84, 94, 73, 22, 9, 4, 0,
+ 1, 70, 75, 81, 92, 75, 14, 2, 7, 5, 66, 75,
+ 77, 87, 69, 31, 13, 11, 6, 4, 67, 74, 75, 78,
+ 62, 107, 105, 98, 103, 97, 96, 96, 94, 92, 93,
+ 92, 91, 85, 88, 90, 94, 83, 87, 71, 81, 82,
+ 80, 75, 71, 72, 67, 74, 68, 70, 67, 68, 5, 68,
+ 77, 65, 68, 73, 2, 0, 66, 66, 6, 64, 71, 74,
+ 70, 80, 68, 4, 64, 3, 69, 66, 68, 5, 3, 70,
+ 72, 75, 75, 0, 62, 58, 61, 61, 61, 62, 62, 62,
+ 61, 62, 62, 62, 57, 45, 24, 62, 60, 59, 60,
+ 44, 35, 24, 33, 28, 20, 8, 3, 67, 79, 48, 45,
+ 48, 34, 19, 23, 17, 12, 20, 15, 8, 69, 4, 70,
+ 80, 17, 21, 8, 19, 24, 12, 7, 3, 2, 74, 65,
+ 72, 80, 109, 71, 75, 82, 12, 66, 75, 75, 67,
+ 64, 0, 4, 3, 10, 65, 3, 4, 10, 17, 17, 19, 5,
+ 58, 47, 40, 36, 28, 24, 8, 65, 81, 0, 33, 26,
+ 24, 16, 15, 7, 4, 1, 70, 75, 67, 64, 0, 4, 3,
+ 10, 65, 3, 4, 10, 17, 17, 19, 5, 58, 47, 40,
+ 36, 28, 24, 8, 65, 81 },
+
+ {
+
+ 60,
+ 8, 76, 60, 8, 76, 100, 82, 16, 10, 6, 20, 40,
+ 50, 22, 4, 73, 23, 81, 79, 2, 1, 6, 70, 5, 35,
+ 36, 13, 76, 108, 97, 96, 72, 81, 79, 2, 87,
+ 74, 25, 5, 78, 88, 96, 74, 76, 87, 101, 0, 80,
+ 93, 67, 80, 87, 100, 6, 75, 73, 85, 20, 3, 22,
+ 0, 0, 0, 74, 89, 97, 65, 10, 64, 23, 79, 108,
+ 93, 75, 81, 3, 1, 77, 75, 97, 1, 86, 77, 92,
+ 90, 80, 85, 82, 14, 64, 69, 92, 74, 93, 80,
+ 98, 65, 77, 72, 88, 13, 70, 0, 85, 82, 76, 77,
+ 69, 0, 64, 74, 3, 5, 70, 3, 67, 69, 4, 73, 23,
+ 68, 1, 19, 20, 27, 25, 16, 83, 69, 72, 3, 67,
+ 85, 67, 6, 70, 72, 0, 19, 45, 23, 66, 83, 78,
+ 66, 33, 38, 103, 67, 1, 84, 7, 64, 65, 1, 33,
+ 56, 43, 9, 81, 102, 11, 40, 51, 106, 21, 21,
+ 23, 24, 25, 21, 15, 21, 23, 8, 15, 13, 6, 4,
+ 65, 12, 9, 8, 9, 4, 8, 14, 6, 75, 6, 7, 69,
+ 12, 70, 49, 58, 40, 31, 38, 46, 59, 51, 29, 9,
+ 1, 8, 17, 15, 93, 11, 15, 69, 30, 13, 13, 16,
+ 19, 15, 22, 22, 64, 93, 69, 71, 1, 104, 92, 2,
+ 10, 10, 4, 71, 76, 81, 81, 99, 80, 20, 8, 3,
+ 67, 65, 73, 82, 83, 92, 72, 23, 10, 5, 0, 2,
+ 69, 74, 80, 91, 75, 15, 3, 7, 5, 65, 74, 77,
+ 86, 68, 31, 14, 11, 6, 5, 66, 73, 74, 77, 62,
+ 106, 103, 96, 101, 96, 95, 94, 92, 90, 91, 90,
+ 89, 84, 87, 89, 93, 80, 87, 72, 81, 81, 79,
+ 75, 71, 72, 67, 74, 68, 71, 67, 68, 6, 68, 77,
+ 65, 68, 73, 2, 0, 66, 67, 5, 64, 71, 74, 69,
+ 81, 68, 5, 65, 3, 69, 66, 68, 5, 3, 70, 71,
+ 75, 75, 65, 61, 57, 60, 59, 59, 62, 62, 62,
+ 59, 60, 62, 61, 54, 42, 22, 61, 57, 55, 55,
+ 41, 33, 22, 31, 26, 18, 7, 2, 68, 79, 46, 43,
+ 46, 32, 17, 21, 16, 10, 18, 13, 6, 71, 2, 72,
+ 81, 16, 20, 6, 18, 22, 11, 5, 2, 1, 75, 66,
+ 73, 80, 108, 72, 76, 84, 11, 67, 76, 75, 66,
+ 0, 1, 5, 4, 11, 65, 4, 5, 11, 18, 18, 20, 6,
+ 57, 45, 38, 34, 26, 22, 6, 67, 82, 1, 33, 27,
+ 25, 17, 16, 8, 5, 1, 69, 75, 66, 0, 1, 5, 4,
+ 11, 65, 4, 5, 11, 18, 18, 20, 6, 57, 45, 38,
+ 34, 26, 22, 6, 67, 82 },
+
+ {
+
+ 58,
+ 7, 77, 58, 7, 77, 99, 81, 16, 10, 5, 18, 38,
+ 49, 22, 4, 72, 21, 81, 78, 1, 0, 7, 71, 3, 33,
+ 34, 9, 79, 109, 95, 95, 72, 81, 78, 1, 87, 74,
+ 25, 4, 78, 88, 95, 76, 78, 88, 102, 64, 80,
+ 93, 68, 81, 87, 100, 5, 75, 73, 85, 20, 2, 22,
+ 0, 0, 0, 74, 90, 97, 65, 9, 65, 22, 79, 107,
+ 91, 74, 81, 4, 2, 77, 74, 96, 1, 85, 77, 90,
+ 91, 81, 86, 83, 14, 65, 69, 90, 74, 93, 80,
+ 97, 65, 77, 72, 88, 13, 71, 0, 85, 82, 76, 77,
+ 69, 0, 64, 73, 3, 5, 71, 2, 67, 69, 3, 73, 23,
+ 68, 1, 19, 20, 27, 25, 16, 84, 69, 72, 2, 67,
+ 85, 68, 6, 70, 72, 1, 20, 45, 24, 65, 84, 80,
+ 66, 33, 38, 104, 68, 1, 85, 7, 65, 65, 2, 33,
+ 55, 43, 10, 82, 101, 9, 37, 47, 105, 20, 21,
+ 22, 23, 24, 20, 14, 20, 22, 7, 14, 12, 5, 3,
+ 66, 11, 8, 7, 8, 3, 7, 13, 5, 77, 5, 6, 69,
+ 10, 71, 46, 55, 38, 29, 36, 43, 55, 48, 27, 7,
+ 65, 6, 14, 13, 95, 9, 13, 71, 28, 12, 12, 14,
+ 17, 13, 20, 20, 66, 93, 70, 72, 64, 103, 91,
+ 3, 10, 10, 4, 71, 76, 81, 81, 98, 79, 20, 8,
+ 3, 67, 64, 72, 81, 83, 90, 72, 23, 10, 5, 0,
+ 2, 69, 74, 80, 90, 75, 16, 4, 7, 4, 65, 74,
+ 77, 86, 68, 31, 14, 11, 6, 6, 66, 73, 74, 77,
+ 62, 105, 102, 95, 100, 95, 94, 93, 91, 89, 90,
+ 89, 87, 84, 87, 89, 93, 77, 87, 74, 81, 81,
+ 79, 75, 71, 72, 68, 74, 69, 72, 68, 68, 6, 69,
+ 77, 66, 68, 73, 1, 64, 67, 68, 4, 65, 72, 74,
+ 68, 82, 69, 5, 66, 2, 69, 67, 68, 4, 2, 70,
+ 71, 75, 75, 67, 59, 56, 58, 57, 56, 62, 62,
+ 62, 56, 57, 62, 58, 50, 39, 20, 57, 53, 51,
+ 49, 38, 30, 20, 28, 23, 16, 5, 0, 69, 79, 43,
+ 40, 43, 30, 14, 19, 14, 7, 16, 10, 4, 74, 64,
+ 74, 83, 14, 18, 3, 16, 20, 9, 3, 0, 64, 76,
+ 67, 74, 81, 107, 74, 78, 86, 9, 69, 78, 75,
+ 66, 0, 1, 6, 4, 11, 65, 5, 5, 12, 18, 18, 20,
+ 6, 56, 43, 36, 31, 23, 20, 3, 69, 84, 1, 33,
+ 27, 25, 17, 16, 8, 5, 1, 69, 75, 66, 0, 1, 6,
+ 4, 11, 65, 5, 5, 12, 18, 18, 20, 6, 56, 43,
+ 36, 31, 23, 20, 3, 69, 84 },
+
+ {
+
+ 57,
+ 7, 77, 57, 7, 77, 97, 79, 17, 11, 5, 17, 37,
+ 49, 23, 5, 70, 20, 80, 76, 1, 0, 9, 71, 2, 32,
+ 33, 6, 81, 109, 93, 93, 71, 80, 76, 1, 86, 73,
+ 26, 4, 77, 87, 93, 77, 79, 88, 102, 64, 79,
+ 92, 68, 81, 86, 99, 5, 74, 72, 84, 21, 2, 22,
+ 0, 0, 0, 73, 90, 97, 64, 9, 65, 22, 78, 105,
+ 88, 72, 80, 6, 4, 76, 72, 94, 2, 83, 76, 87,
+ 91, 81, 86, 83, 15, 65, 68, 87, 73, 92, 79,
+ 95, 65, 76, 72, 87, 14, 71, 1, 84, 81, 75, 76,
+ 68, 1, 0, 72, 4, 6, 71, 2, 66, 68, 3, 72, 24,
+ 67, 1, 20, 21, 28, 26, 17, 84, 68, 71, 2, 67,
+ 84, 68, 7, 69, 71, 3, 22, 46, 26, 0, 84, 81,
+ 65, 34, 39, 104, 68, 2, 85, 8, 65, 64, 4, 33,
+ 55, 44, 12, 83, 99, 8, 35, 44, 103, 20, 21,
+ 22, 23, 24, 20, 14, 20, 22, 7, 14, 12, 5, 3,
+ 66, 11, 8, 7, 8, 3, 7, 13, 5, 78, 5, 6, 69, 9,
+ 71, 44, 53, 37, 28, 35, 41, 52, 46, 26, 6, 67,
+ 5, 12, 12, 96, 8, 12, 72, 27, 12, 12, 13, 16,
+ 12, 19, 19, 67, 93, 70, 72, 65, 101, 89, 5,
+ 11, 10, 4, 70, 75, 80, 80, 96, 77, 21, 9, 4,
+ 66, 1, 71, 79, 82, 87, 71, 24, 11, 6, 1, 3,
+ 68, 73, 79, 88, 74, 18, 5, 8, 4, 64, 73, 76,
+ 85, 67, 32, 15, 12, 7, 7, 65, 72, 73, 76, 62,
+ 103, 100, 93, 98, 93, 92, 91, 89, 87, 88, 87,
+ 84, 83, 86, 88, 92, 73, 86, 75, 80, 80, 78,
+ 74, 71, 71, 68, 73, 69, 72, 68, 68, 7, 69, 77,
+ 66, 68, 72, 1, 64, 67, 68, 4, 65, 72, 73, 66,
+ 82, 69, 6, 66, 2, 69, 67, 67, 4, 2, 69, 70,
+ 74, 74, 68, 58, 55, 57, 56, 54, 60, 60, 59,
+ 54, 55, 59, 56, 47, 37, 18, 54, 50, 48, 44,
+ 36, 28, 19, 26, 21, 15, 4, 64, 69, 79, 41, 38,
+ 41, 28, 12, 18, 13, 5, 15, 8, 3, 76, 66, 75,
+ 84, 13, 17, 1, 15, 19, 8, 2, 64, 65, 77, 67,
+ 74, 81, 106, 75, 79, 87, 8, 70, 79, 74, 65, 1,
+ 2, 8, 5, 12, 64, 7, 6, 13, 19, 19, 21, 7, 56,
+ 42, 35, 29, 21, 19, 1, 70, 85, 2, 34, 28, 26,
+ 18, 17, 9, 6, 2, 68, 74, 65, 1, 2, 8, 5, 12,
+ 64, 7, 6, 13, 19, 19, 21, 7, 56, 42, 35, 29,
+ 21, 19, 1, 70, 85 },
+
+ {
+
+ 56,
+ 7, 77, 56, 7, 77, 95, 78, 17, 11, 5, 15, 35,
+ 48, 24, 5, 68, 18, 80, 75, 1, 0, 10, 72, 1,
+ 31, 31, 3, 83, 110, 91, 92, 71, 80, 75, 1, 86,
+ 72, 26, 4, 76, 86, 92, 78, 80, 88, 102, 64,
+ 79, 91, 68, 81, 86, 99, 5, 74, 72, 83, 21, 2,
+ 22, 0, 0, 0, 72, 90, 97, 64, 8, 65, 22, 77,
+ 104, 86, 71, 79, 8, 6, 75, 71, 92, 3, 82, 75,
+ 85, 92, 81, 86, 83, 15, 65, 67, 85, 73, 91,
+ 78, 93, 65, 76, 72, 87, 14, 71, 1, 83, 81, 74,
+ 76, 68, 1, 0, 71, 5, 6, 71, 2, 66, 68, 3, 72,
+ 24, 67, 1, 20, 21, 28, 26, 17, 85, 67, 70, 1,
+ 67, 84, 68, 8, 69, 71, 4, 23, 47, 28, 2, 85,
+ 82, 65, 34, 39, 105, 68, 2, 86, 9, 65, 64, 5,
+ 33, 55, 44, 13, 84, 98, 7, 32, 41, 102, 20,
+ 21, 22, 22, 23, 20, 14, 20, 21, 6, 13, 12, 5,
+ 3, 66, 10, 7, 6, 8, 3, 7, 12, 4, 79, 4, 5, 69,
+ 8, 72, 42, 51, 35, 26, 33, 39, 49, 44, 25, 4,
+ 69, 3, 10, 10, 97, 7, 11, 74, 26, 11, 11, 12,
+ 14, 10, 17, 17, 69, 93, 71, 73, 67, 100, 88,
+ 6, 12, 10, 4, 69, 74, 79, 79, 94, 76, 22, 10,
+ 5, 65, 2, 70, 78, 81, 85, 70, 24, 11, 6, 1, 4,
+ 67, 72, 78, 87, 74, 19, 6, 8, 4, 0, 72, 76,
+ 84, 66, 32, 16, 12, 7, 8, 64, 71, 72, 75, 62,
+ 102, 98, 91, 96, 92, 91, 89, 87, 86, 86, 85,
+ 82, 82, 85, 87, 91, 70, 86, 76, 80, 79, 77,
+ 74, 71, 71, 68, 73, 69, 73, 68, 68, 8, 69, 77,
+ 66, 68, 72, 1, 64, 67, 69, 3, 65, 72, 73, 65,
+ 83, 69, 7, 67, 2, 69, 67, 67, 4, 2, 69, 69,
+ 74, 74, 70, 57, 54, 56, 54, 52, 57, 57, 56,
+ 52, 52, 56, 53, 44, 34, 16, 50, 46, 44, 39,
+ 33, 26, 17, 24, 19, 13, 3, 65, 70, 79, 39, 36,
+ 39, 26, 10, 16, 11, 3, 13, 6, 1, 78, 68, 77,
+ 85, 12, 16, 64, 13, 17, 6, 0, 65, 66, 78, 68,
+ 75, 81, 105, 76, 80, 89, 7, 71, 80, 74, 65, 2,
+ 3, 9, 6, 13, 64, 8, 7, 14, 19, 20, 22, 8, 55,
+ 40, 33, 27, 19, 17, 64, 72, 86, 3, 34, 28, 27,
+ 19, 18, 9, 7, 2, 67, 74, 65, 2, 3, 9, 6, 13,
+ 64, 8, 7, 14, 19, 20, 22, 8, 55, 40, 33, 27,
+ 19, 17, 64, 72, 86 },
+
+ {
+
+ 55,
+ 7, 77, 55, 7, 77, 93, 76, 18, 11, 4, 13, 34,
+ 47, 24, 5, 66, 17, 80, 73, 1, 64, 11, 73, 0,
+ 30, 29, 64, 85, 111, 89, 90, 71, 80, 73, 1,
+ 85, 71, 27, 4, 75, 85, 91, 79, 81, 88, 103,
+ 64, 79, 90, 68, 82, 86, 99, 5, 74, 72, 82, 22,
+ 2, 22, 0, 0, 0, 72, 90, 97, 0, 7, 65, 21, 77,
+ 103, 84, 70, 78, 9, 7, 74, 70, 90, 4, 81, 74,
+ 83, 93, 81, 86, 83, 16, 65, 66, 83, 73, 90,
+ 77, 91, 65, 76, 72, 87, 15, 71, 2, 82, 81, 73,
+ 76, 68, 1, 1, 70, 5, 6, 71, 2, 66, 68, 3, 72,
+ 24, 67, 1, 21, 21, 29, 26, 18, 85, 67, 69, 1,
+ 67, 84, 68, 9, 69, 70, 5, 24, 48, 29, 4, 85,
+ 83, 64, 34, 39, 106, 68, 3, 86, 9, 65, 64, 6,
+ 33, 55, 44, 14, 85, 96, 6, 29, 38, 100, 20,
+ 21, 21, 21, 23, 19, 13, 20, 21, 5, 12, 12, 5,
+ 3, 67, 10, 6, 5, 7, 3, 7, 11, 4, 80, 4, 4, 69,
+ 7, 72, 40, 49, 33, 25, 31, 37, 46, 41, 24, 2,
+ 71, 2, 8, 8, 99, 6, 10, 76, 25, 10, 10, 11,
+ 13, 8, 15, 15, 70, 93, 72, 74, 68, 98, 86, 8,
+ 12, 10, 4, 68, 73, 78, 78, 92, 75, 23, 11, 6,
+ 65, 3, 69, 76, 80, 83, 70, 25, 12, 7, 2, 5,
+ 67, 72, 77, 86, 74, 21, 7, 8, 4, 1, 72, 76,
+ 83, 65, 33, 17, 13, 7, 9, 64, 70, 72, 75, 62,
+ 100, 97, 90, 94, 91, 89, 88, 86, 84, 84, 83,
+ 80, 82, 84, 87, 90, 67, 86, 77, 80, 78, 76,
+ 74, 71, 71, 68, 72, 69, 74, 68, 68, 8, 69, 77,
+ 66, 68, 72, 0, 64, 67, 70, 2, 65, 72, 73, 64,
+ 84, 69, 8, 68, 2, 69, 67, 66, 4, 1, 68, 68,
+ 74, 74, 71, 56, 53, 55, 52, 50, 55, 55, 53,
+ 49, 49, 53, 50, 41, 31, 14, 46, 43, 40, 34,
+ 30, 24, 15, 22, 16, 11, 2, 66, 71, 79, 37, 34,
+ 37, 24, 8, 14, 10, 1, 11, 4, 64, 80, 70, 79,
+ 86, 11, 15, 66, 12, 15, 5, 65, 67, 68, 79, 69,
+ 76, 81, 104, 77, 82, 90, 6, 72, 81, 74, 64, 2,
+ 3, 10, 7, 14, 64, 9, 8, 15, 20, 21, 22, 9, 55,
+ 38, 31, 25, 17, 15, 66, 74, 87, 3, 35, 29, 28,
+ 19, 19, 10, 7, 3, 67, 74, 64, 2, 3, 10, 7, 14,
+ 64, 9, 8, 15, 20, 21, 22, 9, 55, 38, 31, 25,
+ 17, 15, 66, 74, 87 },
+
+ {
+
+ 53,
+ 7, 77, 53, 7, 77, 92, 75, 18, 11, 4, 11, 32,
+ 46, 25, 5, 65, 15, 80, 72, 0, 64, 12, 74, 65,
+ 28, 27, 67, 88, 112, 87, 89, 71, 80, 72, 0,
+ 85, 70, 27, 3, 75, 84, 89, 80, 83, 89, 103,
+ 64, 78, 89, 69, 82, 86, 99, 5, 74, 72, 82, 22,
+ 2, 22, 0, 0, 0, 71, 91, 97, 0, 6, 65, 21, 76,
+ 102, 82, 69, 77, 11, 9, 74, 69, 88, 5, 80, 73,
+ 80, 93, 82, 86, 84, 16, 66, 65, 80, 72, 90,
+ 77, 89, 65, 76, 72, 87, 15, 71, 2, 81, 81, 73,
+ 75, 68, 1, 1, 69, 6, 6, 72, 1, 66, 68, 3, 72,
+ 24, 67, 1, 21, 21, 29, 26, 18, 86, 66, 69, 0,
+ 67, 83, 68, 10, 68, 70, 7, 26, 49, 31, 6, 86,
+ 84, 64, 35, 39, 107, 68, 3, 87, 10, 65, 64, 7,
+ 33, 55, 44, 16, 86, 95, 5, 26, 35, 99, 19, 21,
+ 21, 21, 22, 19, 13, 20, 20, 4, 11, 11, 5, 3,
+ 67, 9, 6, 4, 7, 2, 6, 10, 3, 81, 3, 3, 69, 5,
+ 73, 38, 47, 31, 23, 30, 35, 42, 39, 23, 0, 74,
+ 0, 6, 6, 100, 4, 9, 77, 24, 9, 9, 9, 11, 6,
+ 13, 14, 72, 93, 73, 75, 70, 97, 85, 9, 13, 10,
+ 4, 67, 72, 77, 78, 90, 73, 24, 11, 6, 64, 5,
+ 68, 75, 79, 80, 69, 25, 12, 7, 2, 6, 66, 71,
+ 77, 85, 74, 22, 8, 8, 4, 2, 71, 76, 82, 64,
+ 33, 18, 13, 7, 10, 0, 69, 71, 74, 62, 99, 95,
+ 88, 93, 90, 88, 86, 84, 83, 83, 81, 77, 81,
+ 83, 86, 89, 64, 85, 78, 79, 77, 76, 74, 71,
+ 71, 69, 72, 70, 75, 68, 68, 9, 69, 77, 66, 68,
+ 71, 0, 65, 67, 71, 1, 66, 72, 73, 0, 85, 70,
+ 8, 68, 1, 69, 67, 66, 3, 1, 68, 68, 74, 74,
+ 73, 55, 52, 54, 51, 47, 52, 52, 50, 47, 46,
+ 49, 47, 37, 29, 12, 42, 39, 36, 29, 27, 22,
+ 13, 20, 14, 9, 0, 67, 72, 79, 34, 31, 34, 22,
+ 6, 12, 8, 64, 9, 2, 66, 82, 73, 80, 88, 10,
+ 14, 69, 10, 14, 3, 67, 68, 69, 80, 70, 77, 81,
+ 103, 79, 83, 92, 4, 73, 82, 74, 64, 3, 4, 11,
+ 7, 14, 0, 10, 9, 16, 20, 22, 23, 9, 54, 36,
+ 29, 23, 15, 13, 69, 76, 89, 4, 35, 29, 28, 20,
+ 19, 10, 8, 3, 66, 74, 64, 3, 4, 11, 7, 14, 0,
+ 10, 9, 16, 20, 22, 23, 9, 54, 36, 29, 23, 15,
+ 13, 69, 76, 89 },
+
+ {
+
+ 52,
+ 7, 77, 52, 7, 77, 90, 73, 18, 11, 3, 9, 31,
+ 45, 26, 5, 0, 13, 79, 70, 0, 64, 13, 75, 66,
+ 27, 25, 70, 90, 112, 85, 88, 71, 79, 70, 0,
+ 85, 69, 27, 3, 74, 83, 88, 81, 84, 89, 103,
+ 64, 78, 88, 69, 82, 86, 99, 5, 73, 72, 81, 23,
+ 2, 22, 0, 0, 0, 71, 91, 97, 1, 5, 65, 20, 76,
+ 100, 79, 68, 76, 12, 10, 73, 68, 86, 6, 79,
+ 72, 78, 94, 82, 86, 84, 16, 66, 64, 78, 72,
+ 89, 76, 87, 65, 76, 72, 86, 15, 71, 3, 80, 80,
+ 72, 75, 68, 2, 2, 68, 6, 6, 72, 1, 65, 68, 3,
+ 72, 25, 67, 1, 22, 22, 29, 26, 18, 86, 66, 68,
+ 0, 67, 83, 68, 11, 68, 69, 8, 27, 50, 32, 8,
+ 86, 85, 0, 35, 39, 108, 68, 4, 87, 11, 65, 64,
+ 9, 33, 55, 44, 17, 87, 93, 4, 24, 32, 98, 19,
+ 21, 20, 20, 22, 19, 12, 20, 20, 3, 10, 11, 5,
+ 3, 67, 9, 5, 3, 6, 2, 6, 9, 3, 82, 2, 3, 69,
+ 4, 73, 36, 45, 30, 22, 28, 33, 39, 36, 22, 65,
+ 76, 64, 4, 4, 101, 3, 8, 79, 23, 8, 8, 8, 10,
+ 4, 11, 12, 73, 93, 74, 76, 72, 95, 83, 10, 13,
+ 10, 4, 66, 71, 76, 77, 88, 72, 25, 12, 7, 0,
+ 6, 67, 73, 78, 78, 68, 26, 13, 8, 3, 7, 65,
+ 71, 76, 84, 73, 23, 9, 8, 4, 3, 71, 75, 81, 0,
+ 33, 19, 13, 7, 11, 1, 68, 71, 73, 62, 98, 94,
+ 87, 91, 89, 86, 85, 82, 81, 81, 79, 75, 80,
+ 82, 85, 88, 2, 85, 79, 79, 76, 75, 74, 71, 71,
+ 69, 71, 70, 76, 68, 68, 10, 69, 77, 66, 68,
+ 71, 64, 65, 67, 72, 0, 66, 72, 73, 1, 85, 70,
+ 9, 69, 1, 69, 67, 66, 3, 0, 68, 67, 74, 74,
+ 74, 54, 51, 53, 49, 45, 50, 49, 47, 44, 43,
+ 46, 44, 34, 26, 10, 38, 36, 32, 24, 24, 20,
+ 12, 18, 11, 7, 64, 68, 73, 79, 32, 29, 32, 20,
+ 4, 10, 7, 66, 7, 0, 68, 84, 75, 82, 89, 9, 13,
+ 71, 9, 12, 2, 69, 69, 70, 81, 71, 78, 81, 102,
+ 80, 84, 93, 3, 74, 83, 74, 0, 3, 5, 12, 8, 15,
+ 0, 11, 10, 17, 21, 23, 23, 10, 54, 34, 27, 21,
+ 13, 11, 71, 78, 90, 4, 36, 30, 29, 21, 20, 11,
+ 8, 3, 65, 74, 0, 3, 5, 12, 8, 15, 0, 11, 10,
+ 17, 21, 23, 23, 10, 54, 34, 27, 21, 13, 11,
+ 71, 78, 90 },
+
+ {
+
+ 51,
+ 7, 78, 51, 7, 78, 88, 72, 19, 11, 3, 8, 29,
+ 44, 26, 6, 2, 12, 79, 69, 0, 65, 14, 76, 67,
+ 26, 24, 74, 92, 113, 83, 86, 71, 79, 69, 0,
+ 84, 68, 28, 3, 73, 82, 87, 83, 85, 89, 104,
+ 64, 78, 88, 69, 83, 85, 99, 4, 73, 72, 80, 23,
+ 2, 22, 0, 0, 0, 70, 91, 97, 1, 5, 66, 20, 75,
+ 99, 77, 67, 75, 14, 12, 72, 67, 84, 7, 78, 72,
+ 76, 95, 82, 86, 84, 17, 66, 0, 76, 72, 88, 75,
+ 85, 65, 76, 72, 86, 16, 72, 3, 80, 80, 71, 75,
+ 67, 2, 2, 67, 7, 6, 72, 1, 65, 68, 3, 72, 25,
+ 67, 1, 22, 22, 30, 27, 19, 87, 65, 67, 64, 67,
+ 83, 68, 12, 68, 69, 9, 28, 50, 34, 9, 87, 86,
+ 0, 35, 39, 108, 69, 4, 88, 11, 66, 0, 10, 33,
+ 55, 45, 18, 88, 92, 3, 21, 29, 96, 19, 21, 20,
+ 19, 21, 18, 12, 19, 19, 3, 10, 11, 4, 3, 68,
+ 8, 4, 2, 6, 2, 6, 9, 2, 84, 2, 2, 69, 3, 74,
+ 33, 43, 28, 20, 26, 31, 36, 34, 20, 67, 78,
+ 66, 2, 2, 103, 2, 6, 81, 21, 7, 7, 7, 8, 2, 9,
+ 10, 75, 93, 75, 77, 73, 94, 82, 12, 14, 10, 4,
+ 65, 71, 76, 76, 87, 71, 26, 13, 8, 0, 7, 66,
+ 72, 77, 76, 68, 26, 13, 8, 3, 8, 65, 70, 75,
+ 83, 73, 25, 10, 8, 4, 3, 70, 75, 81, 0, 34,
+ 19, 14, 7, 12, 1, 68, 70, 73, 62, 96, 92, 85,
+ 89, 87, 85, 83, 81, 80, 79, 77, 73, 80, 81,
+ 85, 88, 5, 85, 80, 79, 76, 74, 74, 71, 71, 69,
+ 71, 70, 77, 68, 68, 10, 69, 77, 66, 68, 71,
+ 64, 65, 67, 73, 64, 66, 72, 73, 2, 86, 70, 10,
+ 70, 1, 69, 67, 65, 3, 0, 67, 66, 74, 74, 76,
+ 53, 50, 52, 47, 43, 47, 47, 44, 42, 40, 43,
+ 41, 31, 23, 8, 35, 32, 28, 19, 22, 17, 10, 16,
+ 9, 5, 65, 69, 74, 79, 30, 27, 30, 18, 2, 9, 5,
+ 68, 5, 66, 70, 86, 77, 84, 90, 8, 11, 73, 7,
+ 10, 0, 71, 71, 72, 82, 72, 79, 82, 101, 81,
+ 86, 95, 2, 76, 84, 73, 0, 4, 5, 13, 9, 16, 0,
+ 12, 11, 18, 21, 24, 24, 11, 53, 32, 25, 19,
+ 11, 9, 73, 80, 91, 5, 36, 30, 30, 21, 21, 11,
+ 9, 4, 65, 73, 0, 4, 5, 13, 9, 16, 0, 12, 11,
+ 18, 21, 24, 24, 11, 53, 32, 25, 19, 11, 9, 73,
+ 80, 91 },
+
+ {
+
+ 50,
+ 7, 78, 50, 7, 78, 86, 70, 19, 11, 2, 6, 28,
+ 43, 27, 6, 3, 10, 79, 67, 64, 65, 15, 77, 68,
+ 25, 22, 77, 95, 114, 81, 85, 71, 79, 67, 64,
+ 84, 67, 28, 3, 73, 81, 85, 84, 86, 89, 104,
+ 64, 77, 87, 70, 83, 85, 99, 4, 73, 72, 79, 24,
+ 2, 22, 0, 0, 0, 70, 91, 97, 2, 4, 66, 19, 75,
+ 98, 75, 66, 74, 15, 13, 71, 66, 82, 8, 77, 71,
+ 73, 95, 83, 86, 85, 17, 66, 1, 73, 71, 88, 75,
+ 83, 65, 76, 72, 86, 16, 72, 4, 79, 80, 70, 74,
+ 67, 2, 3, 66, 7, 6, 73, 1, 65, 68, 3, 72, 25,
+ 67, 1, 23, 22, 30, 27, 19, 87, 65, 67, 64, 67,
+ 82, 68, 13, 67, 68, 11, 30, 51, 35, 11, 87,
+ 87, 1, 36, 39, 109, 69, 5, 88, 12, 66, 0, 11,
+ 33, 55, 45, 20, 89, 90, 2, 18, 26, 95, 19, 21,
+ 19, 19, 21, 18, 11, 19, 19, 2, 9, 11, 4, 3,
+ 68, 8, 4, 1, 5, 2, 6, 8, 2, 85, 1, 1, 69, 2,
+ 74, 31, 41, 26, 19, 25, 29, 33, 31, 19, 69,
+ 80, 67, 0, 0, 104, 1, 5, 82, 20, 6, 6, 5, 7,
+ 0, 7, 9, 76, 93, 76, 78, 75, 92, 80, 13, 14,
+ 10, 4, 64, 70, 75, 76, 85, 69, 27, 14, 8, 1,
+ 9, 65, 70, 76, 73, 67, 27, 14, 9, 4, 9, 64,
+ 70, 74, 82, 73, 26, 11, 8, 4, 4, 70, 75, 80,
+ 1, 34, 20, 14, 7, 13, 2, 67, 70, 72, 62, 95,
+ 91, 84, 88, 86, 83, 82, 79, 78, 77, 75, 70,
+ 79, 80, 84, 87, 8, 84, 81, 78, 75, 73, 74, 71,
+ 71, 69, 70, 71, 78, 68, 68, 11, 69, 77, 66,
+ 68, 70, 65, 66, 67, 74, 65, 67, 72, 73, 3, 87,
+ 71, 11, 70, 0, 69, 67, 65, 2, 64, 67, 66, 74,
+ 74, 77, 52, 49, 51, 46, 40, 45, 44, 41, 39,
+ 37, 40, 38, 28, 21, 6, 31, 29, 24, 14, 19, 15,
+ 8, 14, 6, 3, 66, 70, 75, 79, 28, 24, 27, 16,
+ 0, 7, 4, 70, 3, 68, 72, 88, 79, 85, 92, 7, 10,
+ 75, 6, 9, 64, 73, 72, 73, 83, 73, 80, 82, 100,
+ 83, 87, 96, 0, 77, 85, 73, 1, 4, 6, 14, 10,
+ 16, 1, 13, 12, 19, 22, 25, 24, 11, 53, 30, 23,
+ 17, 9, 7, 76, 82, 92, 5, 37, 31, 30, 22, 22,
+ 12, 9, 4, 64, 73, 1, 4, 6, 14, 10, 16, 1, 13,
+ 12, 19, 22, 25, 24, 11, 53, 30, 23, 17, 9, 7,
+ 76, 82, 92 },
+
+ {
+
+ 48,
+ 6, 78, 48, 6, 78, 85, 69, 19, 11, 2, 4, 26,
+ 42, 27, 6, 5, 8, 79, 66, 64, 66, 16, 78, 70,
+ 23, 20, 81, 97, 115, 79, 84, 71, 79, 66, 64,
+ 84, 67, 28, 2, 72, 80, 84, 85, 88, 90, 105,
+ 64, 77, 86, 70, 84, 85, 99, 4, 73, 72, 79, 24,
+ 2, 22, 0, 0, 0, 69, 92, 97, 2, 3, 66, 19, 74,
+ 97, 73, 65, 74, 17, 15, 71, 65, 80, 8, 76, 70,
+ 71, 96, 83, 87, 85, 17, 67, 1, 71, 71, 87, 74,
+ 82, 65, 76, 72, 86, 16, 72, 4, 78, 80, 70, 74,
+ 67, 2, 3, 65, 8, 6, 73, 0, 65, 68, 2, 72, 25,
+ 67, 1, 23, 22, 30, 27, 19, 88, 64, 66, 65, 67,
+ 82, 68, 14, 67, 68, 12, 31, 52, 37, 13, 88,
+ 88, 1, 36, 39, 110, 69, 5, 89, 12, 66, 0, 12,
+ 33, 55, 45, 21, 90, 89, 1, 15, 22, 94, 18, 21,
+ 19, 18, 20, 17, 11, 19, 18, 1, 8, 10, 4, 2,
+ 69, 7, 3, 0, 5, 1, 5, 7, 1, 86, 0, 0, 69, 0,
+ 75, 29, 39, 24, 17, 23, 26, 29, 29, 18, 71,
+ 83, 69, 66, 65, 106, 64, 4, 84, 19, 5, 5, 4,
+ 5, 65, 5, 7, 78, 93, 77, 79, 77, 91, 79, 14,
+ 15, 10, 4, 64, 69, 74, 75, 83, 68, 27, 14, 9,
+ 1, 10, 64, 69, 75, 71, 67, 27, 14, 9, 4, 9,
+ 64, 69, 74, 81, 73, 27, 12, 8, 4, 5, 69, 75,
+ 79, 2, 34, 21, 14, 7, 14, 2, 66, 69, 72, 62,
+ 94, 89, 82, 86, 85, 82, 80, 78, 77, 76, 73,
+ 68, 79, 79, 84, 86, 11, 84, 82, 78, 74, 73,
+ 74, 71, 71, 70, 70, 71, 79, 68, 68, 11, 69,
+ 77, 67, 68, 70, 65, 66, 68, 75, 66, 67, 73,
+ 73, 4, 88, 71, 11, 71, 0, 69, 68, 65, 2, 64,
+ 67, 65, 74, 74, 79, 51, 48, 50, 44, 38, 42,
+ 41, 38, 37, 34, 36, 35, 24, 18, 4, 27, 25, 20,
+ 9, 16, 13, 6, 11, 4, 1, 68, 72, 76, 79, 25,
+ 22, 25, 14, 66, 5, 2, 73, 1, 70, 74, 90, 82,
+ 87, 93, 5, 9, 78, 4, 7, 66, 75, 74, 75, 84,
+ 74, 81, 82, 99, 84, 89, 98, 64, 78, 86, 73, 1,
+ 5, 6, 15, 10, 17, 1, 14, 12, 20, 22, 25, 25,
+ 12, 52, 28, 21, 15, 7, 5, 78, 84, 94, 6, 37,
+ 31, 31, 22, 22, 12, 10, 4, 64, 73, 1, 5, 6,
+ 15, 10, 17, 1, 14, 12, 20, 22, 25, 25, 12, 52,
+ 28, 21, 15, 7, 5, 78, 84, 94 },
+
+ {
+
+ 47,
+ 6, 78, 47, 6, 78, 83, 68, 20, 11, 2, 2, 24,
+ 42, 28, 6, 7, 7, 78, 64, 64, 66, 17, 78, 71,
+ 22, 18, 84, 99, 115, 77, 82, 70, 78, 64, 64,
+ 83, 66, 29, 2, 71, 79, 83, 86, 89, 90, 105,
+ 64, 77, 85, 70, 84, 85, 99, 4, 72, 71, 78, 24,
+ 2, 22, 0, 0, 0, 68, 92, 97, 2, 2, 66, 19, 73,
+ 95, 70, 64, 73, 19, 17, 70, 64, 78, 9, 74, 69,
+ 69, 97, 83, 87, 85, 18, 67, 2, 69, 71, 86, 73,
+ 80, 65, 75, 72, 85, 17, 72, 5, 77, 79, 69, 74,
+ 67, 3, 4, 64, 9, 6, 73, 0, 64, 67, 2, 72, 26,
+ 67, 1, 23, 23, 31, 27, 20, 88, 0, 65, 66, 67,
+ 82, 68, 15, 67, 67, 13, 32, 53, 39, 15, 89,
+ 89, 2, 36, 40, 111, 69, 5, 89, 13, 66, 0, 14,
+ 33, 55, 45, 22, 91, 88, 0, 13, 19, 92, 18, 21,
+ 19, 17, 20, 17, 11, 19, 18, 0, 7, 10, 4, 2,
+ 69, 7, 2, 0, 5, 1, 5, 6, 0, 87, 0, 0, 69, 64,
+ 75, 27, 37, 23, 16, 21, 24, 26, 27, 17, 73,
+ 85, 70, 68, 66, 107, 65, 3, 86, 18, 4, 4, 3,
+ 3, 66, 3, 5, 79, 93, 77, 80, 78, 89, 77, 16,
+ 16, 10, 4, 0, 68, 73, 74, 81, 67, 28, 15, 10,
+ 2, 11, 0, 68, 74, 69, 66, 28, 15, 10, 4, 10,
+ 0, 68, 73, 79, 72, 29, 13, 9, 4, 6, 68, 74,
+ 78, 3, 35, 22, 15, 8, 15, 3, 65, 68, 71, 62,
+ 92, 87, 80, 84, 84, 81, 78, 76, 75, 74, 71,
+ 66, 78, 78, 83, 85, 15, 84, 83, 78, 73, 72,
+ 73, 71, 71, 70, 70, 71, 80, 68, 68, 12, 69,
+ 77, 67, 68, 70, 65, 66, 68, 75, 67, 67, 73,
+ 72, 6, 88, 71, 12, 72, 0, 69, 68, 64, 2, 64,
+ 66, 64, 73, 73, 81, 50, 47, 49, 42, 36, 39,
+ 39, 35, 35, 32, 33, 33, 21, 15, 2, 23, 22, 17,
+ 4, 13, 11, 5, 9, 2, 0, 69, 73, 77, 79, 23, 20,
+ 23, 12, 68, 3, 1, 75, 64, 72, 76, 92, 84, 89,
+ 94, 4, 8, 80, 3, 5, 67, 77, 75, 76, 85, 75,
+ 81, 82, 98, 85, 90, 100, 65, 79, 87, 73, 2, 6,
+ 7, 17, 11, 18, 1, 15, 13, 21, 23, 26, 26, 13,
+ 51, 27, 19, 13, 5, 3, 80, 86, 95, 7, 37, 32,
+ 32, 23, 23, 13, 11, 5, 0, 73, 2, 6, 7, 17, 11,
+ 18, 1, 15, 13, 21, 23, 26, 26, 13, 51, 27, 19,
+ 13, 5, 3, 80, 86, 95 },
+
+ {
+
+ 46,
+ 6, 78, 46, 6, 78, 81, 66, 20, 11, 1, 0, 23,
+ 41, 29, 6, 8, 5, 78, 0, 65, 66, 18, 79, 72,
+ 21, 16, 87, 102, 116, 75, 81, 70, 78, 0, 65,
+ 83, 65, 29, 2, 71, 78, 81, 87, 90, 90, 105,
+ 64, 76, 84, 71, 84, 85, 99, 4, 72, 71, 77, 25,
+ 2, 22, 0, 0, 0, 68, 92, 97, 3, 1, 66, 18, 73,
+ 94, 68, 0, 72, 20, 18, 69, 0, 76, 10, 73, 68,
+ 66, 97, 84, 87, 86, 18, 67, 3, 66, 70, 86, 73,
+ 78, 65, 75, 72, 85, 17, 72, 5, 76, 79, 68, 73,
+ 67, 3, 4, 0, 9, 6, 74, 0, 64, 67, 2, 72, 26,
+ 67, 1, 24, 23, 31, 27, 20, 89, 0, 65, 66, 67,
+ 81, 68, 16, 66, 67, 15, 34, 54, 40, 17, 89,
+ 90, 2, 37, 40, 112, 69, 6, 90, 14, 66, 0, 15,
+ 33, 55, 45, 24, 92, 86, 64, 10, 16, 91, 18,
+ 21, 18, 17, 19, 17, 10, 19, 17, 64, 6, 10, 4,
+ 2, 69, 6, 2, 64, 4, 1, 5, 5, 0, 88, 64, 64,
+ 69, 65, 76, 25, 35, 21, 14, 20, 22, 23, 24,
+ 16, 75, 87, 72, 70, 68, 108, 66, 2, 87, 17, 3,
+ 3, 1, 2, 68, 1, 4, 81, 93, 78, 81, 80, 88, 76,
+ 17, 16, 10, 4, 1, 67, 72, 74, 79, 65, 29, 16,
+ 10, 3, 13, 1, 66, 73, 66, 65, 28, 15, 10, 5,
+ 11, 1, 68, 72, 78, 72, 30, 14, 9, 4, 7, 68,
+ 74, 77, 4, 35, 23, 15, 8, 16, 4, 64, 68, 70,
+ 62, 91, 86, 79, 83, 83, 79, 77, 74, 74, 72,
+ 69, 0, 77, 77, 82, 84, 18, 83, 84, 77, 72, 71,
+ 73, 71, 71, 70, 69, 72, 81, 68, 68, 13, 69,
+ 77, 67, 68, 69, 66, 67, 68, 76, 68, 68, 73,
+ 72, 7, 89, 72, 13, 72, 64, 69, 68, 64, 1, 65,
+ 66, 64, 73, 73, 82, 49, 46, 48, 41, 33, 37,
+ 36, 32, 32, 29, 30, 30, 18, 13, 0, 19, 18, 13,
+ 64, 10, 9, 3, 7, 64, 65, 70, 74, 78, 79, 21,
+ 17, 20, 10, 70, 1, 64, 77, 66, 74, 78, 94, 86,
+ 90, 96, 3, 7, 82, 1, 4, 69, 79, 76, 77, 86,
+ 76, 82, 82, 97, 87, 91, 101, 67, 80, 88, 73,
+ 2, 6, 8, 18, 12, 18, 2, 16, 14, 22, 23, 27,
+ 26, 13, 51, 25, 17, 11, 3, 1, 83, 88, 96, 7,
+ 38, 32, 32, 24, 24, 13, 11, 5, 1, 73, 2, 6, 8,
+ 18, 12, 18, 2, 16, 14, 22, 23, 27, 26, 13, 51,
+ 25, 17, 11, 3, 1, 83, 88, 96 },
+
+ {
+
+ 45,
+ 6, 79, 45, 6, 79, 79, 65, 21, 11, 1, 64, 21,
+ 40, 29, 7, 10, 4, 78, 2, 65, 67, 19, 80, 73,
+ 20, 15, 91, 104, 117, 73, 79, 70, 78, 2, 65,
+ 82, 64, 30, 2, 70, 77, 80, 89, 91, 90, 106,
+ 64, 76, 84, 71, 85, 84, 99, 3, 72, 71, 76, 25,
+ 2, 22, 0, 0, 0, 67, 92, 97, 3, 1, 67, 18, 72,
+ 93, 66, 1, 71, 22, 20, 68, 1, 74, 11, 72, 68,
+ 64, 98, 84, 87, 86, 19, 67, 4, 64, 70, 85, 72,
+ 76, 65, 75, 72, 85, 18, 73, 6, 76, 79, 67, 73,
+ 66, 3, 5, 1, 10, 6, 74, 0, 64, 67, 2, 72, 26,
+ 67, 1, 24, 23, 32, 28, 21, 89, 1, 64, 67, 67,
+ 81, 68, 17, 66, 66, 16, 35, 54, 42, 18, 90,
+ 91, 3, 37, 40, 112, 70, 6, 90, 14, 67, 1, 16,
+ 33, 55, 46, 25, 93, 85, 65, 7, 13, 89, 18, 21,
+ 18, 16, 19, 16, 10, 18, 17, 64, 6, 10, 3, 2,
+ 70, 6, 1, 65, 4, 1, 5, 5, 64, 90, 64, 65, 69,
+ 66, 76, 22, 33, 19, 13, 18, 20, 20, 22, 14,
+ 77, 89, 73, 72, 70, 110, 67, 0, 89, 15, 2, 2,
+ 0, 0, 70, 64, 2, 82, 93, 79, 82, 81, 86, 74,
+ 19, 17, 10, 4, 2, 67, 72, 73, 78, 64, 30, 17,
+ 11, 3, 14, 2, 65, 72, 64, 65, 29, 16, 11, 5,
+ 12, 1, 67, 71, 77, 72, 32, 15, 9, 4, 7, 67,
+ 74, 77, 4, 36, 23, 16, 8, 17, 4, 64, 67, 70,
+ 62, 89, 84, 77, 81, 81, 78, 75, 73, 72, 70,
+ 67, 2, 77, 76, 82, 84, 21, 83, 85, 77, 72, 70,
+ 73, 71, 71, 70, 69, 72, 82, 68, 68, 13, 69,
+ 77, 67, 68, 69, 66, 67, 68, 77, 69, 68, 73,
+ 72, 8, 90, 72, 14, 73, 64, 69, 68, 0, 1, 65,
+ 65, 0, 73, 73, 84, 48, 45, 47, 39, 31, 34, 34,
+ 29, 30, 26, 27, 27, 15, 10, 65, 16, 15, 9, 69,
+ 8, 6, 1, 5, 66, 67, 71, 75, 79, 79, 19, 15,
+ 18, 8, 72, 0, 65, 79, 68, 77, 80, 96, 88, 92,
+ 97, 2, 5, 84, 0, 2, 70, 81, 78, 79, 87, 77,
+ 83, 83, 96, 88, 93, 103, 68, 82, 89, 72, 3, 7,
+ 8, 19, 13, 19, 2, 17, 15, 23, 24, 28, 27, 14,
+ 50, 23, 15, 9, 1, 64, 85, 90, 97, 8, 38, 33,
+ 33, 24, 25, 14, 12, 6, 1, 72, 3, 7, 8, 19, 13,
+ 19, 2, 17, 15, 23, 24, 28, 27, 14, 50, 23, 15,
+ 9, 1, 64, 85, 90, 97 },
+
+ {
+
+ 43,
+ 6, 79, 43, 6, 79, 78, 0, 21, 11, 0, 66, 20,
+ 39, 30, 7, 12, 2, 78, 3, 65, 67, 20, 81, 75,
+ 18, 13, 94, 106, 118, 71, 78, 70, 78, 3, 65,
+ 82, 0, 30, 1, 69, 76, 79, 90, 93, 91, 106, 64,
+ 76, 83, 71, 85, 84, 99, 3, 72, 71, 76, 26, 2,
+ 22, 0, 0, 0, 67, 93, 97, 4, 0, 67, 17, 72, 92,
+ 64, 2, 70, 23, 21, 68, 2, 72, 12, 71, 67, 1,
+ 99, 84, 87, 86, 19, 68, 5, 1, 70, 84, 71, 74,
+ 65, 75, 72, 85, 18, 73, 6, 75, 79, 67, 73, 66,
+ 3, 5, 2, 10, 6, 74, 64, 64, 67, 2, 72, 26, 67,
+ 1, 25, 23, 32, 28, 21, 90, 1, 0, 67, 67, 81,
+ 68, 18, 66, 66, 17, 36, 55, 43, 20, 90, 92, 3,
+ 37, 40, 113, 70, 7, 91, 15, 67, 1, 17, 33, 55,
+ 46, 26, 94, 83, 66, 4, 10, 88, 17, 21, 17, 15,
+ 18, 16, 9, 18, 16, 65, 5, 9, 3, 2, 70, 5, 0,
+ 66, 3, 0, 4, 4, 64, 91, 65, 66, 69, 68, 77,
+ 20, 31, 17, 11, 16, 18, 16, 19, 13, 79, 92,
+ 75, 74, 72, 111, 69, 64, 91, 14, 1, 1, 64, 64,
+ 72, 66, 0, 84, 93, 80, 83, 83, 85, 73, 20, 17,
+ 10, 4, 3, 66, 71, 72, 76, 0, 31, 17, 12, 4,
+ 15, 3, 0, 71, 1, 64, 29, 16, 11, 6, 13, 2, 67,
+ 71, 76, 72, 33, 16, 9, 4, 8, 67, 74, 76, 5,
+ 36, 24, 16, 8, 18, 5, 0, 67, 69, 62, 88, 83,
+ 76, 79, 80, 76, 74, 71, 71, 69, 65, 4, 76, 75,
+ 81, 83, 24, 83, 86, 77, 71, 70, 73, 71, 71,
+ 71, 68, 72, 83, 68, 68, 14, 69, 77, 67, 68,
+ 69, 67, 67, 68, 78, 70, 68, 73, 72, 9, 91, 72,
+ 14, 74, 64, 69, 68, 0, 1, 66, 65, 1, 73, 73,
+ 85, 47, 44, 46, 37, 29, 32, 31, 26, 27, 23,
+ 23, 24, 11, 7, 67, 12, 11, 5, 74, 5, 4, 64, 3,
+ 69, 69, 73, 76, 80, 79, 16, 13, 16, 6, 74, 65,
+ 67, 81, 70, 79, 82, 98, 91, 94, 98, 1, 4, 87,
+ 65, 0, 72, 83, 79, 80, 88, 78, 84, 83, 95, 89,
+ 94, 104, 69, 83, 90, 72, 3, 7, 9, 20, 13, 20,
+ 2, 18, 16, 24, 24, 29, 27, 15, 50, 21, 13, 7,
+ 64, 66, 87, 92, 99, 8, 39, 33, 34, 25, 25, 14,
+ 12, 6, 2, 72, 3, 7, 9, 20, 13, 20, 2, 18, 16,
+ 24, 24, 29, 27, 15, 50, 21, 13, 7, 64, 66, 87,
+ 92, 99 },
+
+ {
+
+ 42,
+ 6, 79, 42, 6, 79, 76, 1, 21, 11, 0, 68, 18,
+ 38, 31, 7, 13, 0, 77, 5, 66, 67, 21, 82, 76,
+ 17, 11, 97, 109, 118, 69, 77, 70, 77, 5, 66,
+ 82, 1, 30, 1, 69, 75, 77, 91, 94, 91, 106, 64,
+ 75, 82, 72, 85, 84, 99, 3, 71, 71, 75, 26, 2,
+ 22, 0, 0, 0, 66, 93, 97, 4, 64, 67, 17, 71,
+ 90, 2, 3, 69, 25, 23, 67, 3, 70, 13, 70, 66,
+ 4, 99, 85, 87, 87, 19, 68, 6, 4, 69, 84, 71,
+ 72, 65, 75, 72, 84, 18, 73, 7, 74, 78, 66, 72,
+ 66, 4, 6, 3, 11, 6, 75, 64, 0, 67, 2, 72, 27,
+ 67, 1, 25, 24, 32, 28, 21, 90, 2, 0, 68, 67,
+ 80, 68, 19, 65, 65, 19, 38, 56, 45, 22, 91,
+ 93, 4, 38, 40, 114, 70, 7, 91, 16, 67, 1, 19,
+ 33, 55, 46, 28, 95, 82, 67, 2, 7, 87, 17, 21,
+ 17, 15, 18, 16, 9, 18, 16, 66, 4, 9, 3, 2, 70,
+ 5, 0, 67, 3, 0, 4, 3, 65, 92, 66, 66, 69, 69,
+ 77, 18, 29, 16, 10, 15, 16, 13, 17, 12, 81,
+ 94, 76, 76, 74, 112, 70, 65, 92, 13, 0, 0, 66,
+ 66, 74, 68, 64, 85, 93, 81, 84, 85, 83, 71,
+ 21, 18, 10, 4, 4, 65, 70, 72, 74, 2, 32, 18,
+ 12, 5, 17, 4, 1, 70, 4, 0, 30, 17, 12, 6, 14,
+ 3, 66, 70, 75, 71, 34, 17, 9, 4, 9, 66, 73,
+ 75, 6, 36, 25, 16, 8, 19, 6, 1, 66, 68, 62,
+ 87, 81, 74, 78, 79, 75, 72, 69, 69, 67, 0, 7,
+ 75, 74, 80, 82, 27, 82, 87, 76, 70, 69, 73,
+ 71, 71, 71, 68, 73, 84, 68, 68, 15, 69, 77,
+ 67, 68, 68, 67, 68, 68, 79, 71, 69, 73, 72,
+ 10, 91, 73, 15, 74, 65, 69, 68, 0, 0, 66, 65,
+ 1, 73, 73, 87, 46, 43, 45, 36, 26, 29, 28, 23,
+ 25, 20, 20, 21, 8, 5, 69, 8, 8, 1, 79, 2, 2,
+ 65, 1, 71, 71, 74, 77, 81, 79, 14, 10, 13, 4,
+ 76, 67, 68, 83, 72, 81, 84, 100, 93, 95, 100,
+ 0, 3, 89, 66, 64, 73, 85, 80, 81, 89, 79, 85,
+ 83, 94, 91, 95, 106, 71, 84, 91, 72, 4, 8, 10,
+ 21, 14, 20, 3, 19, 17, 25, 25, 30, 28, 15, 49,
+ 19, 11, 5, 66, 68, 90, 94, 100, 9, 39, 34, 34,
+ 26, 26, 15, 13, 6, 3, 72, 4, 8, 10, 21, 14,
+ 20, 3, 19, 17, 25, 25, 30, 28, 15, 49, 19, 11,
+ 5, 66, 68, 90, 94, 100 },
+
+ {
+
+ 41,
+ 6, 79, 41, 6, 79, 74, 3, 22, 11, 64, 70, 17,
+ 37, 31, 7, 15, 64, 77, 6, 66, 68, 22, 83, 77,
+ 16, 9, 101, 111, 119, 67, 75, 70, 77, 6, 66,
+ 81, 2, 31, 1, 68, 74, 76, 92, 95, 91, 107, 64,
+ 75, 81, 72, 86, 84, 99, 3, 71, 71, 74, 27, 2,
+ 22, 0, 0, 0, 66, 93, 97, 5, 65, 67, 16, 71,
+ 89, 4, 4, 68, 26, 24, 66, 4, 68, 14, 69, 65,
+ 6, 100, 85, 87, 87, 20, 68, 7, 6, 69, 83, 70,
+ 70, 65, 75, 72, 84, 19, 73, 7, 73, 78, 65, 72,
+ 66, 4, 6, 4, 11, 6, 75, 64, 0, 67, 2, 72, 27,
+ 67, 1, 26, 24, 33, 28, 22, 91, 2, 1, 68, 67,
+ 80, 68, 20, 65, 65, 20, 39, 57, 46, 24, 91,
+ 94, 4, 38, 40, 115, 70, 8, 92, 16, 67, 1, 20,
+ 33, 55, 46, 29, 96, 80, 68, 64, 4, 85, 17, 21,
+ 16, 14, 17, 15, 8, 18, 15, 67, 3, 9, 3, 2, 71,
+ 4, 64, 68, 2, 0, 4, 2, 65, 93, 66, 67, 69, 70,
+ 78, 16, 27, 14, 8, 13, 14, 10, 14, 11, 83, 96,
+ 78, 78, 76, 114, 71, 66, 94, 12, 64, 64, 67,
+ 67, 76, 70, 66, 87, 93, 82, 85, 86, 82, 70,
+ 23, 18, 10, 4, 5, 64, 69, 71, 72, 3, 33, 19,
+ 13, 5, 18, 5, 3, 69, 6, 0, 30, 17, 12, 7, 15,
+ 3, 66, 69, 74, 71, 36, 18, 9, 4, 10, 66, 73,
+ 74, 7, 37, 26, 17, 8, 20, 6, 2, 66, 68, 62,
+ 85, 80, 73, 76, 78, 73, 71, 68, 68, 65, 2, 9,
+ 75, 73, 80, 81, 30, 82, 88, 76, 69, 68, 73,
+ 71, 71, 71, 67, 73, 85, 68, 68, 15, 69, 77,
+ 67, 68, 68, 68, 68, 68, 80, 72, 69, 73, 72,
+ 11, 92, 73, 16, 75, 65, 69, 68, 1, 0, 67, 64,
+ 2, 73, 73, 88, 45, 42, 44, 34, 24, 27, 26, 20,
+ 22, 17, 17, 18, 5, 2, 71, 4, 4, 66, 84, 64, 0,
+ 67, 64, 74, 73, 75, 78, 82, 79, 12, 8, 11, 2,
+ 78, 69, 70, 85, 74, 83, 86, 102, 95, 97, 101,
+ 64, 2, 91, 68, 66, 75, 87, 82, 83, 90, 80, 86,
+ 83, 93, 92, 97, 107, 72, 85, 92, 72, 4, 8, 10,
+ 22, 15, 21, 3, 20, 18, 26, 25, 31, 28, 16, 49,
+ 17, 9, 3, 68, 70, 92, 96, 101, 9, 40, 34, 35,
+ 26, 27, 15, 13, 7, 3, 72, 4, 8, 10, 22, 15,
+ 21, 3, 20, 18, 26, 25, 31, 28, 16, 49, 17, 9,
+ 3, 68, 70, 92, 96, 101 },
+
+ {
+
+ 40,
+ 6, 79, 40, 6, 79, 72, 4, 22, 11, 64, 72, 15,
+ 36, 32, 7, 17, 66, 77, 8, 66, 68, 23, 84, 78,
+ 15, 7, 104, 113, 120, 65, 74, 70, 77, 8, 66,
+ 81, 3, 31, 1, 67, 73, 75, 93, 96, 91, 107, 64,
+ 75, 80, 72, 86, 84, 99, 3, 71, 71, 73, 27, 2,
+ 22, 0, 0, 0, 65, 93, 97, 5, 66, 67, 16, 70,
+ 88, 6, 5, 67, 28, 26, 65, 5, 66, 15, 68, 64,
+ 8, 101, 85, 87, 87, 20, 68, 8, 8, 69, 82, 69,
+ 68, 65, 75, 72, 84, 19, 73, 8, 72, 78, 64, 72,
+ 66, 4, 7, 5, 12, 6, 75, 64, 0, 67, 2, 72, 27,
+ 67, 1, 26, 24, 33, 28, 22, 91, 3, 2, 69, 67,
+ 80, 68, 21, 65, 64, 21, 40, 58, 48, 26, 92,
+ 95, 5, 38, 40, 116, 70, 8, 92, 17, 67, 1, 21,
+ 33, 55, 46, 30, 97, 79, 69, 67, 1, 84, 17, 21,
+ 16, 13, 17, 15, 8, 18, 15, 68, 2, 9, 3, 2, 71,
+ 4, 65, 69, 2, 0, 4, 1, 66, 94, 67, 68, 69, 71,
+ 78, 14, 25, 12, 7, 11, 12, 7, 12, 10, 85, 98,
+ 79, 80, 78, 115, 72, 67, 96, 11, 65, 65, 68,
+ 69, 78, 72, 68, 88, 93, 83, 86, 88, 80, 68,
+ 24, 19, 10, 4, 6, 0, 68, 70, 70, 4, 34, 20,
+ 14, 6, 19, 6, 4, 68, 8, 1, 31, 18, 13, 7, 16,
+ 4, 65, 68, 73, 71, 37, 19, 9, 4, 11, 65, 73,
+ 73, 8, 37, 27, 17, 8, 21, 7, 3, 65, 67, 62,
+ 84, 78, 71, 74, 77, 72, 69, 66, 66, 0, 4, 11,
+ 74, 72, 79, 80, 33, 82, 89, 76, 68, 67, 73,
+ 71, 71, 71, 67, 73, 86, 68, 68, 16, 69, 77,
+ 67, 68, 68, 68, 68, 68, 81, 73, 69, 73, 72,
+ 12, 93, 73, 17, 76, 65, 69, 68, 1, 0, 67, 64,
+ 3, 73, 73, 90, 44, 41, 43, 32, 22, 24, 23, 17,
+ 20, 14, 14, 15, 2, 64, 73, 0, 1, 70, 89, 67,
+ 65, 69, 66, 76, 75, 76, 79, 83, 79, 10, 6, 9,
+ 0, 80, 71, 71, 87, 76, 85, 88, 104, 97, 99,
+ 102, 65, 1, 93, 69, 68, 76, 89, 83, 84, 91,
+ 81, 87, 83, 92, 93, 98, 109, 73, 86, 93, 72,
+ 5, 9, 11, 23, 16, 22, 3, 21, 19, 27, 26, 32,
+ 29, 17, 48, 15, 7, 1, 70, 72, 94, 98, 102, 10,
+ 40, 35, 36, 27, 28, 16, 14, 7, 4, 72, 5, 9,
+ 11, 23, 16, 22, 3, 21, 19, 27, 26, 32, 29, 17,
+ 48, 15, 7, 1, 70, 72, 94, 98, 102 },
+
+ {
+
+ 38,
+ 5, 80, 38, 5, 80, 71, 5, 22, 11, 65, 74, 13,
+ 35, 32, 7, 18, 68, 77, 9, 67, 69, 24, 85, 80,
+ 13, 5, 108, 116, 121, 0, 73, 70, 77, 9, 67,
+ 81, 3, 31, 0, 67, 73, 74, 95, 98, 92, 108, 65,
+ 75, 80, 73, 87, 84, 99, 2, 71, 71, 73, 27, 1,
+ 22, 0, 0, 0, 65, 94, 97, 5, 67, 68, 15, 70,
+ 87, 8, 6, 67, 29, 27, 65, 6, 65, 15, 67, 64,
+ 10, 102, 86, 88, 88, 20, 69, 8, 10, 69, 82,
+ 69, 67, 65, 75, 72, 84, 19, 74, 8, 72, 78, 64,
+ 72, 66, 4, 7, 6, 12, 6, 76, 65, 0, 67, 1, 72,
+ 27, 67, 1, 26, 24, 33, 28, 22, 92, 3, 2, 70,
+ 67, 80, 69, 21, 65, 64, 22, 41, 58, 49, 27,
+ 93, 97, 5, 38, 40, 117, 71, 8, 93, 17, 68, 1,
+ 22, 33, 54, 46, 31, 98, 78, 71, 70, 66, 83,
+ 16, 21, 15, 12, 16, 14, 7, 17, 14, 69, 1, 8,
+ 2, 1, 72, 3, 66, 70, 1, 64, 3, 0, 67, 96, 68,
+ 69, 69, 73, 79, 11, 22, 10, 5, 9, 9, 3, 9, 8,
+ 87, 101, 81, 83, 80, 117, 74, 69, 98, 9, 66,
+ 66, 70, 71, 80, 74, 70, 90, 93, 84, 87, 90,
+ 79, 67, 25, 19, 10, 4, 6, 0, 68, 70, 69, 5,
+ 34, 20, 14, 6, 20, 7, 5, 68, 10, 1, 31, 18,
+ 13, 7, 16, 4, 65, 68, 72, 71, 38, 20, 9, 3,
+ 11, 65, 73, 73, 8, 37, 27, 17, 8, 22, 7, 3,
+ 65, 67, 62, 83, 77, 70, 73, 76, 71, 68, 65,
+ 65, 1, 5, 13, 74, 72, 79, 80, 36, 82, 91, 76,
+ 68, 67, 73, 71, 71, 72, 67, 74, 87, 69, 68,
+ 16, 70, 77, 68, 68, 68, 69, 69, 69, 82, 74,
+ 70, 74, 72, 13, 94, 74, 17, 77, 66, 69, 69, 1,
+ 64, 68, 64, 3, 73, 73, 92, 42, 40, 41, 30, 19,
+ 21, 20, 14, 17, 11, 10, 12, 65, 67, 75, 67,
+ 66, 74, 95, 70, 68, 71, 69, 79, 77, 78, 81,
+ 84, 79, 7, 3, 6, 65, 83, 73, 73, 90, 78, 88,
+ 90, 107, 100, 101, 104, 67, 64, 96, 71, 70,
+ 78, 91, 85, 86, 92, 82, 88, 84, 91, 95, 100,
+ 111, 75, 88, 95, 72, 5, 9, 11, 24, 16, 22, 3,
+ 22, 19, 28, 26, 32, 29, 17, 47, 13, 5, 65, 73,
+ 74, 97, 100, 104, 10, 40, 35, 36, 27, 28, 16,
+ 14, 7, 4, 72, 5, 9, 11, 24, 16, 22, 3, 22, 19,
+ 28, 26, 32, 29, 17, 47, 13, 5, 65, 73, 74, 97,
+ 100, 104 },
+
+ {
+
+ 37,
+ 5, 80, 37, 5, 80, 69, 7, 23, 12, 65, 75, 12,
+ 35, 33, 8, 20, 69, 76, 11, 67, 69, 26, 85, 81,
+ 12, 4, 111, 118, 121, 2, 71, 69, 76, 11, 67,
+ 80, 4, 32, 0, 66, 72, 72, 96, 99, 92, 108, 65,
+ 74, 79, 73, 87, 83, 98, 2, 70, 70, 72, 28, 1,
+ 22, 0, 0, 0, 64, 94, 97, 6, 67, 68, 15, 69,
+ 85, 11, 8, 66, 31, 29, 64, 8, 0, 16, 65, 0,
+ 13, 102, 86, 88, 88, 21, 69, 9, 13, 68, 81,
+ 68, 65, 65, 74, 72, 83, 20, 74, 9, 71, 77, 0,
+ 71, 65, 5, 8, 7, 13, 7, 76, 65, 1, 66, 1, 71,
+ 28, 66, 1, 27, 25, 34, 29, 23, 92, 4, 3, 70,
+ 67, 79, 69, 22, 64, 0, 24, 43, 59, 51, 29, 93,
+ 98, 6, 39, 41, 117, 71, 9, 93, 18, 68, 2, 24,
+ 33, 54, 47, 33, 99, 76, 72, 72, 69, 81, 16,
+ 21, 15, 12, 16, 14, 7, 17, 14, 69, 1, 8, 2, 1,
+ 72, 3, 66, 70, 1, 64, 3, 0, 67, 97, 68, 69,
+ 69, 74, 79, 9, 20, 9, 4, 8, 7, 0, 7, 7, 88,
+ 103, 82, 85, 81, 118, 75, 70, 99, 8, 66, 66,
+ 71, 72, 81, 75, 71, 91, 93, 84, 87, 91, 77,
+ 65, 27, 20, 10, 4, 7, 1, 67, 69, 67, 7, 35,
+ 21, 15, 7, 22, 8, 7, 67, 13, 2, 32, 19, 14, 8,
+ 17, 5, 64, 67, 70, 70, 40, 21, 10, 3, 12, 64,
+ 72, 72, 9, 38, 28, 18, 9, 23, 8, 4, 64, 66,
+ 62, 81, 75, 68, 71, 74, 69, 66, 0, 0, 3, 7,
+ 16, 73, 71, 78, 79, 40, 81, 92, 75, 67, 66,
+ 72, 71, 70, 72, 66, 74, 87, 69, 68, 17, 70,
+ 77, 68, 68, 67, 69, 69, 69, 82, 74, 70, 74,
+ 71, 15, 94, 74, 18, 77, 66, 69, 69, 2, 64, 68,
+ 0, 4, 72, 72, 93, 41, 39, 40, 29, 17, 19, 18,
+ 11, 15, 9, 7, 10, 68, 69, 77, 70, 69, 77, 100,
+ 72, 70, 72, 71, 81, 78, 79, 82, 84, 79, 5, 1,
+ 4, 67, 85, 74, 74, 92, 79, 90, 91, 109, 102,
+ 102, 105, 68, 65, 98, 72, 71, 79, 92, 86, 87,
+ 93, 82, 88, 84, 90, 96, 101, 112, 76, 89, 96,
+ 71, 6, 10, 12, 26, 17, 23, 4, 24, 20, 29, 27,
+ 33, 30, 18, 47, 12, 4, 67, 75, 75, 99, 101,
+ 105, 11, 41, 36, 37, 28, 29, 17, 15, 8, 5, 71,
+ 6, 10, 12, 26, 17, 23, 4, 24, 20, 29, 27, 33,
+ 30, 18, 47, 12, 4, 67, 75, 75, 99, 101, 105 },
+
+ {
+
+ 36,
+ 5, 80, 36, 5, 80, 67, 8, 23, 12, 65, 77, 10,
+ 34, 34, 8, 22, 71, 76, 12, 67, 69, 27, 86, 82,
+ 11, 2, 114, 120, 122, 4, 70, 69, 76, 12, 67,
+ 80, 5, 32, 0, 65, 71, 71, 97, 100, 92, 108,
+ 65, 74, 78, 73, 87, 83, 98, 2, 70, 70, 71, 28,
+ 1, 22, 0, 0, 0, 0, 94, 97, 6, 68, 68, 15, 68,
+ 84, 13, 9, 65, 33, 31, 0, 9, 2, 17, 64, 1, 15,
+ 103, 86, 88, 88, 21, 69, 10, 15, 68, 80, 67,
+ 0, 65, 74, 72, 83, 20, 74, 9, 70, 77, 1, 71,
+ 65, 5, 8, 8, 14, 7, 76, 65, 1, 66, 1, 71, 28,
+ 66, 1, 27, 25, 34, 29, 23, 93, 5, 4, 71, 67,
+ 79, 69, 23, 64, 0, 25, 44, 60, 53, 31, 94, 99,
+ 6, 39, 41, 118, 71, 9, 94, 19, 68, 2, 25, 33,
+ 54, 47, 34, 100, 75, 73, 75, 72, 80, 16, 21,
+ 15, 11, 15, 14, 7, 17, 13, 70, 0, 8, 2, 1, 72,
+ 2, 67, 71, 1, 64, 3, 64, 68, 98, 69, 70, 69,
+ 75, 80, 7, 18, 7, 2, 6, 5, 66, 5, 6, 90, 105,
+ 84, 87, 83, 119, 76, 71, 101, 7, 67, 67, 72,
+ 74, 83, 77, 73, 93, 93, 85, 88, 93, 76, 64,
+ 28, 21, 10, 4, 8, 2, 66, 68, 65, 8, 36, 22,
+ 16, 8, 23, 9, 8, 66, 15, 3, 32, 19, 14, 8, 18,
+ 6, 0, 66, 69, 70, 41, 22, 10, 3, 13, 0, 72,
+ 71, 10, 38, 29, 18, 9, 24, 9, 5, 0, 65, 62,
+ 80, 73, 66, 69, 73, 68, 64, 2, 1, 5, 9, 18,
+ 72, 70, 77, 78, 43, 81, 93, 75, 66, 65, 72,
+ 71, 70, 72, 66, 74, 88, 69, 68, 18, 70, 77,
+ 68, 68, 67, 69, 69, 69, 83, 75, 70, 74, 71,
+ 16, 95, 74, 19, 78, 66, 69, 69, 2, 64, 68, 0,
+ 5, 72, 72, 95, 40, 38, 39, 27, 15, 16, 15, 8,
+ 13, 6, 4, 7, 71, 72, 79, 74, 73, 81, 105, 75,
+ 72, 74, 73, 83, 80, 80, 83, 85, 79, 3, 64, 2,
+ 69, 87, 76, 76, 94, 81, 92, 93, 111, 104, 104,
+ 106, 69, 66, 100, 74, 73, 81, 94, 87, 88, 94,
+ 83, 89, 84, 89, 97, 102, 114, 77, 90, 97, 71,
+ 6, 11, 13, 27, 18, 24, 4, 25, 21, 30, 27, 34,
+ 31, 19, 46, 10, 2, 69, 77, 77, 101, 103, 106,
+ 12, 41, 36, 38, 29, 30, 17, 16, 8, 6, 71, 6,
+ 11, 13, 27, 18, 24, 4, 25, 21, 30, 27, 34, 31,
+ 19, 46, 10, 2, 69, 77, 77, 101, 103, 106 },
+
+ {
+
+ 35,
+ 5, 80, 35, 5, 80, 65, 10, 24, 12, 66, 79, 9,
+ 33, 34, 8, 24, 72, 76, 14, 67, 70, 28, 87, 83,
+ 10, 0, 118, 122, 123, 6, 68, 69, 76, 14, 67,
+ 79, 6, 33, 0, 64, 70, 70, 98, 101, 92, 109,
+ 65, 74, 77, 73, 88, 83, 98, 2, 70, 70, 70, 29,
+ 1, 22, 0, 0, 0, 0, 94, 97, 7, 69, 68, 14, 68,
+ 83, 15, 10, 64, 34, 32, 1, 10, 4, 18, 0, 2,
+ 17, 104, 86, 88, 88, 22, 69, 11, 17, 68, 79,
+ 66, 2, 65, 74, 72, 83, 21, 74, 10, 69, 77, 2,
+ 71, 65, 5, 9, 9, 14, 7, 76, 65, 1, 66, 1, 71,
+ 28, 66, 1, 28, 25, 35, 29, 24, 93, 5, 5, 71,
+ 67, 79, 69, 24, 64, 1, 26, 45, 61, 54, 33, 94,
+ 100, 7, 39, 41, 119, 71, 10, 94, 19, 68, 2,
+ 26, 33, 54, 47, 35, 101, 73, 74, 78, 75, 78,
+ 16, 21, 14, 10, 15, 13, 6, 17, 13, 71, 64, 8,
+ 2, 1, 73, 2, 68, 72, 0, 64, 3, 65, 68, 99, 69,
+ 71, 69, 76, 80, 5, 16, 5, 1, 4, 3, 69, 2, 5,
+ 92, 107, 85, 89, 85, 121, 77, 72, 103, 6, 68,
+ 68, 73, 75, 85, 79, 75, 94, 93, 86, 89, 94,
+ 74, 1, 30, 21, 10, 4, 9, 3, 65, 67, 0, 9, 37,
+ 23, 17, 8, 24, 10, 10, 65, 17, 3, 33, 20, 15,
+ 9, 19, 6, 0, 65, 68, 70, 43, 23, 10, 3, 14, 0,
+ 72, 70, 11, 39, 30, 19, 9, 25, 9, 6, 0, 65,
+ 62, 78, 72, 65, 67, 72, 66, 0, 3, 3, 7, 11,
+ 20, 72, 69, 77, 77, 46, 81, 94, 75, 65, 64,
+ 72, 71, 70, 72, 65, 74, 89, 69, 68, 18, 70,
+ 77, 68, 68, 67, 70, 69, 69, 84, 76, 70, 74,
+ 71, 17, 96, 74, 20, 79, 66, 69, 69, 3, 64, 69,
+ 1, 6, 72, 72, 96, 39, 37, 38, 25, 13, 14, 13,
+ 5, 10, 3, 1, 4, 74, 75, 81, 78, 76, 85, 110,
+ 78, 74, 76, 75, 86, 82, 81, 84, 86, 79, 1, 66,
+ 0, 71, 89, 78, 77, 96, 83, 94, 95, 113, 106,
+ 106, 107, 70, 67, 102, 75, 75, 82, 96, 89, 90,
+ 95, 84, 90, 84, 88, 98, 104, 115, 78, 91, 98,
+ 71, 7, 11, 13, 28, 19, 25, 4, 26, 22, 31, 28,
+ 35, 31, 20, 46, 8, 0, 71, 79, 79, 103, 105,
+ 107, 12, 42, 37, 39, 29, 31, 18, 16, 9, 6, 71,
+ 7, 11, 13, 28, 19, 25, 4, 26, 22, 31, 28, 35,
+ 31, 20, 46, 8, 0, 71, 79, 79, 103, 105, 107 },
+
+ {
+
+ 33,
+ 5, 80, 33, 5, 80, 64, 11, 24, 12, 66, 81, 7,
+ 32, 35, 8, 25, 74, 76, 15, 68, 70, 29, 88, 85,
+ 8, 65, 121, 125, 124, 8, 67, 69, 76, 15, 68,
+ 79, 7, 33, 64, 64, 69, 68, 99, 103, 93, 109,
+ 65, 73, 76, 74, 88, 83, 98, 2, 70, 70, 70, 29,
+ 1, 22, 0, 0, 0, 1, 95, 97, 7, 70, 68, 14, 67,
+ 82, 17, 11, 0, 36, 34, 1, 11, 6, 19, 1, 3, 20,
+ 104, 87, 88, 89, 22, 70, 12, 20, 67, 79, 66,
+ 4, 65, 74, 72, 83, 21, 74, 10, 68, 77, 2, 70,
+ 65, 5, 9, 10, 15, 7, 77, 66, 1, 66, 1, 71, 28,
+ 66, 1, 28, 25, 35, 29, 24, 94, 6, 5, 72, 67,
+ 78, 69, 25, 0, 1, 28, 47, 62, 56, 35, 95, 101,
+ 7, 40, 41, 120, 71, 10, 95, 20, 68, 2, 27, 33,
+ 54, 47, 37, 102, 72, 75, 81, 78, 77, 15, 21,
+ 14, 10, 14, 13, 6, 17, 12, 72, 65, 7, 2, 1,
+ 73, 1, 68, 73, 0, 65, 2, 66, 69, 100, 70, 72,
+ 69, 78, 81, 3, 14, 3, 64, 3, 1, 73, 0, 4, 94,
+ 110, 87, 91, 87, 122, 79, 73, 104, 5, 69, 69,
+ 75, 77, 87, 81, 76, 96, 93, 87, 90, 96, 73, 2,
+ 31, 22, 10, 4, 10, 4, 64, 67, 2, 11, 38, 23,
+ 17, 9, 26, 11, 11, 64, 20, 4, 33, 20, 15, 9,
+ 20, 7, 1, 65, 67, 70, 44, 24, 10, 3, 15, 1,
+ 72, 69, 12, 39, 31, 19, 9, 26, 10, 7, 1, 64,
+ 62, 77, 70, 0, 66, 71, 65, 2, 5, 4, 8, 13, 23,
+ 71, 68, 76, 76, 49, 80, 95, 74, 64, 64, 72,
+ 71, 70, 73, 65, 75, 90, 69, 68, 19, 70, 77,
+ 68, 68, 66, 70, 70, 69, 85, 77, 71, 74, 71,
+ 18, 97, 75, 20, 79, 67, 69, 69, 3, 65, 69, 1,
+ 6, 72, 72, 98, 38, 36, 37, 24, 10, 11, 10, 2,
+ 8, 0, 66, 1, 78, 77, 83, 82, 80, 89, 115, 81,
+ 76, 78, 77, 88, 84, 83, 85, 87, 79, 65, 69,
+ 66, 73, 91, 80, 79, 98, 85, 96, 97, 115, 109,
+ 107, 109, 71, 68, 105, 77, 76, 84, 98, 90, 91,
+ 96, 85, 91, 84, 87, 100, 105, 117, 80, 92, 99,
+ 71, 7, 12, 14, 29, 19, 25, 5, 27, 23, 32, 28,
+ 36, 32, 20, 45, 6, 65, 73, 81, 81, 106, 107,
+ 109, 13, 42, 37, 39, 30, 31, 18, 17, 9, 7, 71,
+ 7, 12, 14, 29, 19, 25, 5, 27, 23, 32, 28, 36,
+ 32, 20, 45, 6, 65, 73, 81, 81, 106, 107, 109 },
+
+ {
+
+ 32,
+ 5, 80, 32, 5, 80, 1, 13, 24, 12, 67, 83, 6,
+ 31, 36, 8, 27, 76, 75, 17, 68, 70, 30, 89, 86,
+ 7, 67, 124, 126, 124, 10, 66, 69, 75, 17, 68,
+ 79, 8, 33, 64, 0, 68, 67, 100, 104, 93, 109,
+ 65, 73, 75, 74, 88, 83, 98, 2, 69, 70, 69, 30,
+ 1, 22, 0, 0, 0, 1, 95, 97, 8, 71, 68, 13, 67,
+ 80, 20, 12, 1, 37, 35, 2, 12, 8, 20, 2, 4, 22,
+ 105, 87, 88, 89, 22, 70, 13, 22, 67, 78, 65,
+ 6, 65, 74, 72, 82, 21, 74, 11, 67, 76, 3, 70,
+ 65, 6, 10, 11, 15, 7, 77, 66, 2, 66, 1, 71,
+ 29, 66, 1, 29, 26, 35, 29, 24, 94, 6, 6, 72,
+ 67, 78, 69, 26, 0, 2, 29, 48, 62, 57, 37, 95,
+ 102, 8, 40, 41, 121, 71, 11, 95, 21, 68, 2,
+ 29, 33, 54, 47, 38, 103, 70, 76, 83, 81, 76,
+ 15, 21, 13, 9, 14, 13, 5, 17, 12, 73, 66, 7,
+ 2, 1, 73, 1, 69, 74, 64, 65, 2, 67, 69, 101,
+ 71, 72, 69, 79, 81, 1, 12, 2, 65, 1, 64, 76,
+ 66, 3, 96, 112, 88, 93, 89, 123, 80, 74, 106,
+ 4, 70, 70, 76, 78, 89, 83, 78, 97, 93, 88, 91,
+ 98, 71, 4, 32, 22, 10, 4, 11, 5, 0, 66, 4, 12,
+ 39, 24, 18, 10, 27, 12, 13, 0, 22, 5, 34, 21,
+ 16, 10, 21, 8, 1, 64, 66, 69, 45, 25, 10, 3,
+ 16, 1, 71, 68, 13, 39, 32, 19, 9, 27, 11, 8,
+ 1, 0, 62, 76, 69, 1, 64, 70, 0, 3, 7, 6, 10,
+ 15, 25, 70, 67, 75, 75, 52, 80, 96, 74, 0, 0,
+ 72, 71, 70, 73, 64, 75, 91, 69, 68, 20, 70,
+ 77, 68, 68, 66, 71, 70, 69, 86, 78, 71, 74,
+ 71, 19, 97, 75, 21, 80, 67, 69, 69, 3, 65, 70,
+ 1, 7, 72, 72, 99, 37, 35, 36, 22, 8, 9, 7, 64,
+ 5, 66, 69, 65, 81, 80, 85, 86, 83, 93, 120,
+ 84, 78, 79, 79, 91, 86, 84, 86, 88, 79, 67,
+ 71, 68, 75, 93, 82, 80, 100, 87, 98, 99, 117,
+ 111, 109, 110, 72, 69, 107, 78, 78, 85, 100,
+ 91, 92, 97, 86, 92, 84, 86, 101, 106, 118, 81,
+ 93, 100, 71, 8, 12, 15, 30, 20, 26, 5, 28, 24,
+ 33, 29, 37, 32, 21, 45, 4, 67, 75, 83, 83,
+ 108, 109, 110, 13, 43, 38, 40, 31, 32, 19, 17,
+ 9, 8, 71, 8, 12, 15, 30, 20, 26, 5, 28, 24,
+ 33, 29, 37, 32, 21, 45, 4, 67, 75, 83, 83,
+ 108, 109, 110 },
+
+ {
+
+ 31,
+ 5, 81, 31, 5, 81, 3, 14, 25, 12, 67, 84, 4,
+ 30, 36, 9, 29, 77, 75, 18, 68, 71, 31, 90, 87,
+ 6, 68, 126, 126, 125, 12, 64, 69, 75, 18, 68,
+ 78, 9, 34, 64, 1, 67, 66, 102, 105, 93, 110,
+ 65, 73, 75, 74, 89, 82, 98, 1, 69, 70, 68, 30,
+ 1, 22, 0, 0, 0, 2, 95, 97, 8, 71, 69, 13, 66,
+ 79, 22, 13, 2, 39, 37, 3, 13, 10, 21, 3, 4,
+ 24, 106, 87, 88, 89, 23, 70, 14, 24, 67, 77,
+ 64, 8, 65, 74, 72, 82, 22, 75, 11, 67, 76, 4,
+ 70, 64, 6, 10, 12, 16, 7, 77, 66, 2, 66, 1,
+ 71, 29, 66, 1, 29, 26, 36, 30, 25, 95, 7, 7,
+ 73, 67, 78, 69, 27, 0, 2, 30, 49, 62, 59, 38,
+ 96, 103, 8, 40, 41, 121, 72, 11, 96, 21, 69,
+ 3, 30, 33, 54, 48, 39, 104, 69, 77, 86, 84,
+ 74, 15, 21, 13, 8, 13, 12, 5, 16, 11, 73, 66,
+ 7, 1, 1, 74, 0, 70, 75, 64, 65, 2, 67, 70,
+ 103, 71, 73, 69, 80, 82, 65, 10, 0, 67, 64,
+ 66, 79, 68, 1, 98, 114, 90, 95, 91, 125, 81,
+ 76, 108, 2, 71, 71, 77, 80, 91, 85, 80, 99,
+ 93, 89, 92, 99, 70, 5, 34, 23, 10, 4, 12, 5,
+ 0, 65, 5, 13, 40, 25, 19, 10, 28, 13, 14, 1,
+ 24, 5, 34, 21, 16, 10, 22, 8, 2, 0, 65, 69,
+ 47, 26, 10, 3, 16, 2, 71, 68, 13, 40, 32, 20,
+ 9, 28, 11, 8, 2, 0, 62, 74, 67, 3, 1, 68, 1,
+ 5, 8, 7, 12, 17, 27, 70, 66, 75, 75, 55, 80,
+ 97, 74, 0, 1, 72, 71, 70, 73, 64, 75, 92, 69,
+ 68, 20, 70, 77, 68, 68, 66, 71, 70, 69, 87,
+ 79, 71, 74, 71, 20, 98, 75, 22, 81, 67, 69,
+ 69, 4, 65, 70, 2, 8, 72, 72, 101, 36, 34, 35,
+ 20, 6, 6, 5, 67, 3, 69, 72, 68, 84, 83, 87,
+ 89, 87, 97, 125, 86, 81, 81, 81, 93, 88, 85,
+ 87, 89, 79, 69, 73, 70, 77, 95, 83, 82, 102,
+ 89, 101, 101, 119, 113, 111, 111, 73, 71, 109,
+ 80, 80, 87, 102, 93, 94, 98, 87, 93, 85, 85,
+ 102, 108, 120, 82, 95, 101, 70, 8, 13, 15, 31,
+ 21, 27, 5, 29, 25, 34, 29, 38, 33, 22, 44, 2,
+ 69, 77, 85, 85, 110, 111, 111, 14, 43, 38, 41,
+ 31, 33, 19, 18, 10, 8, 70, 8, 13, 15, 31, 21,
+ 27, 5, 29, 25, 34, 29, 38, 33, 22, 44, 2, 69,
+ 77, 85, 85, 110, 111, 111 },
+
+ {
+
+ 30,
+ 5, 81, 30, 5, 81, 5, 16, 25, 12, 68, 86, 3,
+ 29, 37, 9, 30, 79, 75, 20, 69, 71, 32, 91, 88,
+ 5, 70, 126, 126, 126, 14, 0, 69, 75, 20, 69,
+ 78, 10, 34, 64, 1, 66, 64, 103, 106, 93, 110,
+ 65, 72, 74, 75, 89, 82, 98, 1, 69, 70, 67, 31,
+ 1, 22, 0, 0, 0, 2, 95, 97, 9, 72, 69, 12, 66,
+ 78, 24, 14, 3, 40, 38, 4, 14, 12, 22, 4, 5,
+ 27, 106, 88, 88, 90, 23, 70, 15, 27, 66, 77,
+ 64, 10, 65, 74, 72, 82, 22, 75, 12, 66, 76, 5,
+ 69, 64, 6, 11, 13, 16, 7, 78, 66, 2, 66, 1,
+ 71, 29, 66, 1, 30, 26, 36, 30, 25, 95, 7, 7,
+ 73, 67, 77, 69, 28, 1, 3, 32, 51, 62, 60, 40,
+ 96, 104, 9, 41, 41, 122, 72, 12, 96, 22, 69,
+ 3, 31, 33, 54, 48, 41, 105, 67, 78, 89, 87,
+ 73, 15, 21, 12, 8, 13, 12, 4, 16, 11, 74, 67,
+ 7, 1, 1, 74, 0, 70, 76, 65, 65, 2, 68, 70,
+ 104, 72, 74, 69, 81, 82, 67, 8, 65, 68, 65,
+ 68, 82, 71, 0, 100, 116, 91, 97, 93, 126, 82,
+ 77, 109, 1, 72, 72, 79, 81, 93, 87, 81, 100,
+ 93, 90, 93, 101, 68, 7, 35, 23, 10, 4, 13, 6,
+ 1, 65, 7, 15, 41, 26, 19, 11, 30, 14, 16, 2,
+ 27, 6, 35, 22, 17, 11, 23, 9, 2, 1, 64, 69,
+ 48, 27, 10, 3, 17, 2, 71, 67, 14, 40, 33, 20,
+ 9, 29, 12, 9, 2, 1, 62, 73, 66, 4, 2, 67, 3,
+ 6, 10, 9, 14, 19, 30, 69, 65, 74, 74, 58, 79,
+ 98, 73, 1, 2, 72, 71, 70, 73, 0, 76, 93, 69,
+ 68, 21, 70, 77, 68, 68, 65, 72, 71, 69, 88,
+ 80, 72, 74, 71, 21, 99, 76, 23, 81, 68, 69,
+ 69, 4, 66, 71, 2, 8, 72, 72, 102, 35, 33, 34,
+ 19, 3, 4, 2, 70, 0, 72, 75, 71, 87, 85, 89,
+ 93, 90, 101, 126, 89, 83, 83, 83, 96, 90, 86,
+ 88, 90, 79, 71, 76, 73, 79, 97, 85, 83, 104,
+ 91, 103, 103, 121, 115, 112, 113, 74, 72, 111,
+ 81, 81, 88, 104, 94, 95, 99, 88, 94, 85, 84,
+ 104, 109, 121, 84, 96, 102, 70, 9, 13, 16, 32,
+ 22, 27, 6, 30, 26, 35, 30, 39, 33, 22, 44, 0,
+ 71, 79, 87, 87, 113, 113, 112, 14, 44, 39, 41,
+ 32, 34, 20, 18, 10, 9, 70, 9, 13, 16, 32, 22,
+ 27, 6, 30, 26, 35, 30, 39, 33, 22, 44, 0, 71,
+ 79, 87, 87, 113, 113, 112 },
+
+ {
+
+ 28,
+ 4, 81, 28, 4, 81, 6, 17, 25, 12, 68, 88, 1,
+ 28, 37, 9, 32, 81, 75, 21, 69, 72, 33, 92, 90,
+ 3, 72, 126, 126, 126, 16, 1, 69, 75, 21, 69,
+ 78, 10, 34, 65, 2, 65, 0, 104, 108, 94, 111,
+ 65, 72, 73, 75, 90, 82, 98, 1, 69, 70, 67, 31,
+ 1, 22, 0, 0, 0, 3, 96, 97, 9, 73, 69, 12, 65,
+ 77, 26, 15, 3, 42, 40, 4, 15, 14, 22, 5, 6,
+ 29, 107, 88, 89, 90, 23, 71, 15, 29, 66, 76,
+ 0, 11, 65, 74, 72, 82, 22, 75, 12, 65, 76, 5,
+ 69, 64, 6, 11, 14, 17, 7, 78, 67, 2, 66, 0,
+ 71, 29, 66, 1, 30, 26, 36, 30, 25, 96, 8, 8,
+ 74, 67, 77, 69, 29, 1, 3, 33, 52, 62, 62, 42,
+ 97, 105, 9, 41, 41, 123, 72, 12, 97, 22, 69,
+ 3, 32, 33, 54, 48, 42, 106, 66, 79, 92, 91,
+ 72, 14, 21, 12, 7, 12, 11, 4, 16, 10, 75, 68,
+ 6, 1, 0, 75, 64, 71, 77, 65, 66, 1, 69, 71,
+ 105, 73, 75, 69, 83, 83, 69, 6, 67, 70, 67,
+ 71, 86, 73, 64, 102, 119, 93, 100, 95, 126,
+ 84, 78, 111, 0, 73, 73, 80, 83, 95, 89, 83,
+ 102, 93, 91, 94, 103, 67, 8, 36, 24, 10, 4,
+ 13, 7, 2, 64, 9, 16, 41, 26, 20, 11, 31, 15,
+ 17, 3, 29, 6, 35, 22, 17, 11, 23, 9, 3, 1, 0,
+ 69, 49, 28, 10, 3, 18, 3, 71, 66, 15, 40, 34,
+ 20, 9, 30, 12, 10, 3, 1, 62, 72, 64, 6, 4, 66,
+ 4, 8, 11, 10, 15, 21, 32, 69, 64, 74, 73, 61,
+ 79, 99, 73, 2, 2, 72, 71, 70, 74, 0, 76, 94,
+ 69, 68, 21, 70, 77, 69, 68, 65, 72, 71, 70,
+ 89, 81, 72, 75, 71, 22, 100, 76, 23, 82, 68,
+ 69, 70, 4, 66, 71, 2, 9, 72, 72, 104, 34, 32,
+ 33, 17, 1, 1, 64, 73, 65, 75, 79, 74, 91, 88,
+ 91, 97, 94, 105, 126, 92, 85, 85, 86, 98, 92,
+ 88, 90, 91, 79, 74, 78, 75, 81, 100, 87, 85,
+ 107, 93, 105, 105, 123, 118, 114, 114, 76, 73,
+ 114, 83, 83, 90, 106, 96, 97, 100, 89, 95, 85,
+ 83, 105, 111, 123, 85, 97, 103, 70, 9, 14, 16,
+ 33, 22, 28, 6, 31, 26, 36, 30, 39, 34, 23, 43,
+ 65, 73, 81, 89, 89, 115, 115, 114, 15, 44, 39,
+ 42, 32, 34, 20, 19, 10, 9, 70, 9, 14, 16, 33,
+ 22, 28, 6, 31, 26, 36, 30, 39, 34, 23, 43, 65,
+ 73, 81, 89, 89, 115, 115, 114 },
+
+ {
+
+ 27,
+ 4, 81, 27, 4, 81, 8, 18, 26, 12, 68, 90, 64,
+ 28, 38, 9, 34, 82, 74, 23, 69, 72, 34, 92, 91,
+ 2, 74, 126, 126, 126, 18, 3, 68, 74, 23, 69,
+ 77, 11, 35, 65, 3, 64, 1, 105, 109, 94, 111,
+ 65, 72, 72, 75, 90, 82, 98, 1, 68, 69, 66, 31,
+ 1, 22, 0, 0, 0, 4, 96, 97, 9, 74, 69, 12, 64,
+ 75, 29, 16, 4, 44, 42, 5, 16, 16, 23, 7, 7,
+ 31, 108, 88, 89, 90, 24, 71, 16, 31, 66, 75,
+ 1, 13, 65, 73, 72, 81, 23, 75, 13, 64, 75, 6,
+ 69, 64, 7, 12, 15, 18, 7, 78, 67, 3, 65, 0,
+ 71, 30, 66, 1, 30, 27, 37, 30, 26, 96, 9, 9,
+ 75, 67, 77, 69, 30, 1, 4, 34, 53, 62, 62, 44,
+ 98, 106, 10, 41, 42, 124, 72, 12, 97, 23, 69,
+ 3, 34, 33, 54, 48, 43, 107, 65, 80, 94, 94,
+ 70, 14, 21, 12, 6, 12, 11, 4, 16, 10, 76, 69,
+ 6, 1, 0, 75, 64, 72, 77, 65, 66, 1, 70, 72,
+ 106, 73, 75, 69, 84, 83, 71, 4, 68, 71, 69,
+ 73, 89, 75, 65, 104, 121, 94, 102, 96, 126,
+ 85, 79, 113, 64, 74, 74, 81, 85, 96, 91, 85,
+ 103, 93, 91, 95, 104, 65, 10, 38, 25, 10, 4,
+ 14, 8, 3, 0, 11, 17, 42, 27, 21, 12, 32, 16,
+ 18, 4, 31, 7, 36, 23, 18, 11, 24, 10, 4, 2, 2,
+ 68, 51, 29, 11, 3, 19, 4, 70, 65, 16, 41, 35,
+ 21, 10, 31, 13, 11, 4, 2, 62, 70, 1, 8, 6, 65,
+ 5, 10, 13, 12, 17, 23, 34, 68, 0, 73, 72, 62,
+ 79, 100, 73, 3, 3, 71, 71, 70, 74, 0, 76, 95,
+ 69, 68, 22, 70, 77, 69, 68, 65, 72, 71, 70,
+ 89, 82, 72, 75, 70, 24, 100, 76, 24, 83, 68,
+ 69, 70, 5, 66, 71, 3, 10, 71, 71, 106, 33, 31,
+ 32, 15, 64, 65, 66, 76, 67, 77, 82, 76, 94,
+ 91, 93, 101, 97, 108, 126, 95, 87, 86, 88,
+ 100, 93, 89, 91, 92, 79, 76, 80, 77, 83, 102,
+ 89, 86, 109, 95, 107, 107, 125, 120, 116, 115,
+ 77, 74, 116, 84, 85, 91, 108, 97, 98, 101, 90,
+ 95, 85, 82, 106, 112, 125, 86, 98, 104, 70,
+ 10, 15, 17, 35, 23, 29, 6, 32, 27, 37, 31, 40,
+ 35, 24, 42, 66, 75, 83, 91, 91, 117, 117, 115,
+ 16, 44, 40, 43, 33, 35, 21, 20, 11, 10, 70,
+ 10, 15, 17, 35, 23, 29, 6, 32, 27, 37, 31, 40,
+ 35, 24, 42, 66, 75, 83, 91, 91, 117, 117, 115 },
+
+ {
+
+ 26,
+ 4, 81, 26, 4, 81, 10, 20, 26, 12, 69, 92, 65,
+ 27, 39, 9, 35, 84, 74, 24, 70, 72, 35, 93, 92,
+ 1, 76, 126, 126, 126, 20, 4, 68, 74, 24, 70,
+ 77, 12, 35, 65, 3, 0, 3, 106, 110, 94, 111,
+ 65, 71, 71, 76, 90, 82, 98, 1, 68, 69, 65, 32,
+ 1, 22, 0, 0, 0, 4, 96, 97, 10, 75, 69, 11, 64,
+ 74, 31, 17, 5, 45, 43, 6, 17, 18, 24, 8, 8,
+ 34, 108, 89, 89, 91, 24, 71, 17, 34, 65, 75,
+ 1, 15, 65, 73, 72, 81, 23, 75, 13, 0, 75, 7,
+ 68, 64, 7, 12, 16, 18, 7, 79, 67, 3, 65, 0,
+ 71, 30, 66, 1, 31, 27, 37, 30, 26, 97, 9, 9,
+ 75, 67, 76, 69, 31, 2, 4, 36, 55, 62, 62, 46,
+ 98, 107, 10, 42, 42, 125, 72, 13, 98, 24, 69,
+ 3, 35, 33, 54, 48, 45, 108, 0, 81, 97, 97, 69,
+ 14, 21, 11, 6, 11, 11, 3, 16, 9, 77, 70, 6, 1,
+ 0, 75, 65, 72, 78, 66, 66, 1, 71, 72, 107, 74,
+ 76, 69, 85, 84, 73, 2, 70, 73, 70, 75, 92, 78,
+ 66, 106, 123, 96, 104, 98, 126, 86, 80, 114,
+ 65, 75, 75, 83, 86, 98, 93, 86, 105, 93, 92,
+ 96, 106, 64, 11, 39, 25, 10, 4, 15, 9, 4, 0,
+ 13, 19, 43, 28, 21, 13, 34, 17, 20, 5, 34, 8,
+ 36, 23, 18, 12, 25, 11, 4, 3, 3, 68, 52, 30,
+ 11, 3, 20, 4, 70, 64, 17, 41, 36, 21, 10, 32,
+ 14, 12, 4, 3, 62, 69, 2, 9, 7, 64, 7, 11, 15,
+ 13, 19, 25, 37, 67, 1, 72, 71, 62, 78, 101,
+ 72, 4, 4, 71, 71, 70, 74, 1, 77, 96, 69, 68,
+ 23, 70, 77, 69, 68, 64, 73, 72, 70, 90, 83,
+ 73, 75, 70, 25, 101, 77, 25, 83, 69, 69, 70,
+ 5, 67, 72, 3, 10, 71, 71, 107, 32, 30, 31, 14,
+ 67, 67, 69, 79, 70, 80, 85, 79, 97, 93, 95,
+ 105, 101, 112, 126, 98, 89, 88, 90, 103, 95,
+ 90, 92, 93, 79, 78, 83, 80, 85, 104, 91, 88,
+ 111, 97, 109, 109, 126, 122, 117, 117, 78, 75,
+ 118, 86, 86, 93, 110, 98, 99, 102, 91, 96, 85,
+ 81, 108, 113, 126, 88, 99, 105, 70, 10, 15,
+ 18, 36, 24, 29, 7, 33, 28, 38, 31, 41, 35, 24,
+ 42, 68, 77, 85, 93, 93, 120, 119, 116, 16, 45,
+ 40, 43, 34, 36, 21, 20, 11, 11, 70, 10, 15,
+ 18, 36, 24, 29, 7, 33, 28, 38, 31, 41, 35, 24,
+ 42, 68, 77, 85, 93, 93, 120, 119, 116 },
+
+ {
+
+ 25,
+ 4, 82, 25, 4, 82, 12, 21, 27, 12, 69, 93, 67,
+ 26, 39, 10, 37, 85, 74, 26, 70, 73, 36, 94,
+ 93, 0, 77, 126, 126, 126, 22, 6, 68, 74, 26,
+ 70, 76, 13, 36, 65, 4, 1, 4, 108, 111, 94,
+ 112, 65, 71, 71, 76, 91, 81, 98, 0, 68, 69,
+ 64, 32, 1, 22, 0, 0, 0, 5, 96, 97, 10, 75, 70,
+ 11, 0, 73, 33, 18, 6, 47, 45, 7, 18, 20, 25,
+ 9, 8, 36, 109, 89, 89, 91, 25, 71, 18, 36, 65,
+ 74, 2, 17, 65, 73, 72, 81, 24, 76, 14, 0, 75,
+ 8, 68, 0, 7, 13, 17, 19, 7, 79, 67, 3, 65, 0,
+ 71, 30, 66, 1, 31, 27, 38, 31, 27, 97, 10, 10,
+ 76, 67, 76, 69, 32, 2, 5, 37, 56, 62, 62, 47,
+ 99, 108, 11, 42, 42, 125, 73, 13, 98, 24, 70,
+ 4, 36, 33, 54, 49, 46, 109, 1, 82, 100, 100,
+ 67, 14, 21, 11, 5, 11, 10, 3, 15, 9, 77, 70,
+ 6, 0, 0, 76, 65, 73, 79, 66, 66, 1, 71, 73,
+ 109, 74, 77, 69, 86, 84, 76, 0, 72, 74, 72,
+ 77, 95, 80, 68, 108, 125, 97, 106, 100, 126,
+ 87, 82, 116, 67, 76, 76, 84, 88, 100, 95, 88,
+ 106, 93, 93, 97, 107, 1, 13, 41, 26, 10, 4,
+ 16, 9, 4, 1, 14, 20, 44, 29, 22, 13, 35, 18,
+ 21, 6, 36, 8, 37, 24, 19, 12, 26, 11, 5, 4, 4,
+ 68, 54, 31, 11, 3, 20, 5, 70, 64, 17, 42, 36,
+ 22, 10, 33, 14, 12, 5, 3, 62, 67, 4, 11, 9, 1,
+ 8, 13, 16, 15, 21, 27, 39, 67, 2, 72, 71, 62,
+ 78, 102, 72, 4, 5, 71, 71, 70, 74, 1, 77, 97,
+ 69, 68, 23, 70, 77, 69, 68, 64, 73, 72, 70,
+ 91, 84, 73, 75, 70, 26, 102, 77, 26, 84, 69,
+ 69, 70, 6, 67, 72, 4, 11, 71, 71, 109, 31, 29,
+ 30, 12, 69, 70, 71, 82, 72, 83, 88, 82, 100,
+ 96, 97, 108, 104, 116, 126, 100, 92, 90, 92,
+ 105, 97, 91, 93, 94, 79, 80, 85, 82, 87, 106,
+ 92, 89, 113, 99, 112, 111, 126, 124, 119, 118,
+ 79, 77, 120, 87, 88, 94, 112, 100, 101, 103,
+ 92, 97, 86, 80, 109, 115, 126, 89, 101, 106,
+ 69, 11, 16, 18, 37, 25, 30, 7, 34, 29, 39, 32,
+ 42, 36, 25, 41, 70, 79, 87, 95, 95, 122, 121,
+ 117, 17, 45, 41, 44, 34, 37, 22, 21, 12, 11,
+ 69, 11, 16, 18, 37, 25, 30, 7, 34, 29, 39, 32,
+ 42, 36, 25, 41, 70, 79, 87, 95, 95, 122, 121,
+ 117 },
+
+ {
+
+ 23,
+ 4, 82, 23, 4, 82, 13, 23, 27, 12, 70, 95, 68,
+ 25, 40, 10, 39, 87, 74, 27, 70, 73, 37, 95,
+ 95, 65, 79, 126, 126, 126, 24, 7, 68, 74, 27,
+ 70, 76, 14, 36, 66, 5, 2, 5, 109, 113, 95,
+ 112, 65, 71, 70, 76, 91, 81, 98, 0, 68, 69,
+ 64, 33, 1, 22, 0, 0, 0, 5, 97, 97, 11, 76, 70,
+ 10, 0, 72, 35, 19, 7, 48, 46, 7, 19, 22, 26,
+ 10, 9, 38, 110, 89, 89, 91, 25, 72, 19, 38,
+ 65, 73, 3, 19, 65, 73, 72, 81, 24, 76, 14, 1,
+ 75, 8, 68, 0, 7, 13, 18, 19, 7, 79, 68, 3, 65,
+ 0, 71, 30, 66, 1, 32, 27, 38, 31, 27, 98, 10,
+ 11, 76, 67, 76, 69, 33, 2, 5, 38, 57, 62, 62,
+ 49, 99, 109, 11, 42, 42, 126, 73, 14, 99, 25,
+ 70, 4, 37, 33, 54, 49, 47, 110, 3, 83, 103,
+ 103, 66, 13, 21, 10, 4, 10, 10, 2, 15, 8, 78,
+ 71, 5, 0, 0, 76, 66, 74, 80, 67, 67, 0, 72,
+ 73, 110, 75, 78, 69, 88, 85, 78, 65, 74, 76,
+ 74, 79, 99, 83, 69, 110, 126, 99, 108, 102,
+ 126, 89, 83, 118, 68, 77, 77, 85, 89, 102, 97,
+ 90, 108, 93, 94, 98, 109, 2, 14, 42, 26, 10,
+ 4, 17, 10, 5, 2, 16, 21, 45, 29, 23, 14, 36,
+ 19, 23, 7, 38, 9, 37, 24, 19, 13, 27, 12, 5,
+ 4, 5, 68, 55, 32, 11, 3, 21, 5, 70, 0, 18, 42,
+ 37, 22, 10, 34, 15, 13, 5, 4, 62, 66, 5, 12,
+ 11, 2, 10, 14, 18, 16, 22, 29, 41, 66, 3, 71,
+ 70, 62, 78, 103, 72, 5, 5, 71, 71, 70, 75, 2,
+ 77, 98, 69, 68, 24, 70, 77, 69, 68, 64, 74,
+ 72, 70, 92, 85, 73, 75, 70, 27, 103, 77, 26,
+ 85, 69, 69, 70, 6, 67, 73, 4, 12, 71, 71, 110,
+ 30, 28, 29, 10, 71, 72, 74, 85, 75, 86, 92,
+ 85, 104, 99, 99, 112, 108, 120, 126, 103, 94,
+ 92, 94, 108, 99, 93, 94, 95, 79, 83, 87, 84,
+ 89, 108, 94, 91, 115, 101, 114, 113, 126, 126,
+ 121, 119, 80, 78, 123, 89, 90, 96, 114, 101,
+ 102, 104, 93, 98, 86, 79, 110, 116, 126, 90,
+ 102, 107, 69, 11, 16, 19, 38, 25, 31, 7, 35,
+ 30, 40, 32, 43, 36, 26, 41, 72, 81, 89, 97,
+ 97, 124, 123, 119, 17, 46, 41, 45, 35, 37, 22,
+ 21, 12, 12, 69, 11, 16, 19, 38, 25, 31, 7, 35,
+ 30, 40, 32, 43, 36, 26, 41, 72, 81, 89, 97,
+ 97, 124, 123, 119 },
+
+ {
+
+ 22,
+ 4, 82, 22, 4, 82, 15, 24, 27, 12, 70, 97, 70,
+ 24, 41, 10, 40, 89, 73, 29, 71, 73, 38, 96,
+ 96, 66, 81, 126, 126, 126, 26, 8, 68, 73, 29,
+ 71, 76, 15, 36, 66, 5, 3, 7, 110, 114, 95,
+ 112, 65, 70, 69, 77, 91, 81, 98, 0, 67, 69, 0,
+ 33, 1, 22, 0, 0, 0, 6, 97, 97, 11, 77, 70, 10,
+ 1, 70, 38, 20, 8, 50, 48, 8, 20, 24, 27, 11,
+ 10, 41, 110, 90, 89, 92, 25, 72, 20, 41, 64,
+ 73, 3, 21, 65, 73, 72, 80, 24, 76, 15, 2, 74,
+ 9, 67, 0, 8, 14, 19, 20, 7, 80, 68, 4, 65, 0,
+ 71, 31, 66, 1, 32, 28, 38, 31, 27, 98, 11, 11,
+ 77, 67, 75, 69, 34, 3, 6, 40, 59, 62, 62, 51,
+ 100, 110, 12, 43, 42, 126, 73, 14, 99, 26, 70,
+ 4, 39, 33, 54, 49, 49, 111, 4, 84, 105, 106,
+ 65, 13, 21, 10, 4, 10, 10, 2, 15, 8, 79, 72,
+ 5, 0, 0, 76, 66, 74, 81, 67, 67, 0, 73, 74,
+ 111, 76, 78, 69, 89, 85, 80, 67, 75, 77, 75,
+ 81, 102, 85, 70, 112, 126, 100, 110, 104, 126,
+ 90, 84, 119, 69, 78, 78, 87, 91, 104, 99, 91,
+ 109, 93, 95, 99, 111, 4, 16, 43, 27, 10, 4,
+ 18, 11, 6, 2, 18, 23, 46, 30, 23, 15, 38, 20,
+ 24, 8, 41, 10, 38, 25, 20, 13, 28, 13, 6, 5,
+ 6, 67, 56, 33, 11, 3, 22, 6, 69, 1, 19, 42,
+ 38, 22, 10, 35, 16, 14, 6, 5, 62, 65, 7, 14,
+ 12, 3, 11, 16, 20, 18, 24, 31, 44, 65, 4, 70,
+ 69, 62, 77, 104, 71, 6, 6, 71, 71, 70, 75, 2,
+ 78, 99, 69, 68, 25, 70, 77, 69, 68, 0, 74, 73,
+ 70, 93, 86, 74, 75, 70, 28, 103, 78, 27, 85,
+ 70, 69, 70, 6, 68, 73, 4, 12, 71, 71, 112, 29,
+ 27, 28, 9, 74, 75, 77, 88, 77, 89, 95, 88,
+ 107, 101, 101, 116, 111, 124, 126, 106, 96,
+ 93, 96, 110, 101, 94, 95, 96, 79, 85, 90, 87,
+ 91, 110, 96, 92, 117, 103, 116, 115, 126, 126,
+ 122, 121, 81, 79, 125, 90, 91, 97, 116, 102,
+ 103, 105, 94, 99, 86, 78, 112, 117, 126, 92,
+ 103, 108, 69, 12, 17, 20, 39, 26, 31, 8, 36,
+ 31, 41, 33, 44, 37, 26, 40, 74, 83, 91, 99,
+ 99, 126, 125, 120, 18, 46, 42, 45, 36, 38, 23,
+ 22, 12, 13, 69, 12, 17, 20, 39, 26, 31, 8, 36,
+ 31, 41, 33, 44, 37, 26, 40, 74, 83, 91, 99,
+ 99, 126, 125, 120 },
+
+ {
+
+ 21,
+ 4, 82, 21, 4, 82, 17, 26, 28, 12, 71, 99, 71,
+ 23, 41, 10, 42, 90, 73, 30, 71, 74, 39, 97,
+ 97, 67, 83, 126, 126, 126, 28, 10, 68, 73, 30,
+ 71, 75, 16, 37, 66, 6, 4, 8, 111, 115, 95,
+ 113, 65, 70, 68, 77, 92, 81, 98, 0, 67, 69, 1,
+ 34, 1, 22, 0, 0, 0, 6, 97, 97, 12, 78, 70, 9,
+ 1, 69, 40, 21, 9, 51, 49, 9, 21, 26, 28, 12,
+ 11, 43, 111, 90, 89, 92, 26, 72, 21, 43, 64,
+ 72, 4, 23, 65, 73, 72, 80, 25, 76, 15, 3, 74,
+ 10, 67, 0, 8, 14, 20, 20, 7, 80, 68, 4, 65, 0,
+ 71, 31, 66, 1, 33, 28, 39, 31, 28, 99, 11, 12,
+ 77, 67, 75, 69, 35, 3, 6, 41, 60, 62, 62, 53,
+ 100, 111, 12, 43, 42, 126, 73, 15, 100, 26,
+ 70, 4, 40, 33, 54, 49, 50, 112, 6, 85, 108,
+ 109, 0, 13, 21, 9, 3, 9, 9, 1, 15, 7, 80, 73,
+ 5, 0, 0, 77, 67, 75, 82, 68, 67, 0, 74, 74,
+ 112, 76, 79, 69, 90, 86, 82, 69, 77, 79, 77,
+ 83, 105, 88, 71, 114, 126, 102, 112, 106, 126,
+ 91, 85, 121, 70, 79, 79, 88, 92, 106, 101, 93,
+ 111, 93, 96, 100, 112, 5, 17, 45, 27, 10, 4,
+ 19, 12, 7, 3, 20, 24, 47, 31, 24, 15, 39, 21,
+ 26, 9, 43, 10, 38, 25, 20, 14, 29, 13, 6, 6,
+ 7, 67, 58, 34, 11, 3, 23, 6, 69, 2, 20, 43,
+ 39, 23, 10, 36, 16, 15, 6, 5, 62, 0, 8, 15,
+ 14, 4, 13, 17, 21, 19, 26, 33, 46, 65, 5, 70,
+ 68, 62, 77, 105, 71, 7, 7, 71, 71, 70, 75, 3,
+ 78, 100, 69, 68, 25, 70, 77, 69, 68, 0, 75,
+ 73, 70, 94, 87, 74, 75, 70, 29, 104, 78, 28,
+ 86, 70, 69, 70, 7, 68, 74, 5, 13, 71, 71, 113,
+ 28, 26, 27, 7, 76, 77, 79, 91, 80, 92, 98, 91,
+ 110, 104, 103, 120, 115, 126, 126, 109, 98,
+ 95, 98, 113, 103, 95, 96, 97, 79, 87, 92, 89,
+ 93, 112, 98, 94, 119, 105, 118, 117, 126, 126,
+ 124, 122, 82, 80, 126, 92, 93, 99, 118, 104,
+ 105, 106, 95, 100, 86, 77, 113, 119, 126, 93,
+ 104, 109, 69, 12, 17, 20, 40, 27, 32, 8, 37,
+ 32, 42, 33, 45, 37, 27, 40, 76, 85, 93, 101,
+ 101, 126, 126, 121, 18, 47, 42, 46, 36, 39,
+ 23, 22, 13, 13, 69, 12, 17, 20, 40, 27, 32, 8,
+ 37, 32, 42, 33, 45, 37, 27, 40, 76, 85, 93,
+ 101, 101, 126, 126, 121 },
+
+ {
+
+ 20,
+ 4, 82, 20, 4, 82, 19, 27, 28, 12, 71, 101, 73,
+ 22, 42, 10, 44, 92, 73, 32, 71, 74, 40, 98,
+ 98, 68, 85, 126, 126, 126, 30, 11, 68, 73, 32,
+ 71, 75, 17, 37, 66, 7, 5, 9, 112, 116, 95,
+ 113, 65, 70, 67, 77, 92, 81, 98, 0, 67, 69, 2,
+ 34, 1, 22, 0, 0, 0, 7, 97, 97, 12, 79, 70, 9,
+ 2, 68, 42, 22, 10, 53, 51, 10, 22, 28, 29, 13,
+ 12, 45, 112, 90, 89, 92, 26, 72, 22, 45, 64,
+ 71, 5, 25, 65, 73, 72, 80, 25, 76, 16, 4, 74,
+ 11, 67, 0, 8, 15, 21, 21, 7, 80, 68, 4, 65, 0,
+ 71, 31, 66, 1, 33, 28, 39, 31, 28, 99, 12, 13,
+ 78, 67, 75, 69, 36, 3, 7, 42, 61, 62, 62, 55,
+ 101, 112, 13, 43, 42, 126, 73, 15, 100, 27,
+ 70, 4, 41, 33, 54, 49, 51, 113, 7, 86, 111,
+ 112, 1, 13, 21, 9, 2, 9, 9, 1, 15, 7, 81, 74,
+ 5, 0, 0, 77, 67, 76, 83, 68, 67, 0, 75, 75,
+ 113, 77, 80, 69, 91, 86, 84, 71, 79, 80, 79,
+ 85, 108, 90, 72, 116, 126, 103, 114, 108, 126,
+ 92, 86, 123, 71, 80, 80, 89, 94, 108, 103, 95,
+ 112, 93, 97, 101, 114, 7, 19, 46, 28, 10, 4,
+ 20, 13, 8, 4, 22, 25, 48, 32, 25, 16, 40, 22,
+ 27, 10, 45, 11, 39, 26, 21, 14, 30, 14, 7, 7,
+ 8, 67, 59, 35, 11, 3, 24, 7, 69, 3, 21, 43,
+ 40, 23, 10, 37, 17, 16, 7, 6, 62, 1, 10, 17,
+ 16, 5, 14, 19, 23, 21, 28, 35, 48, 64, 6, 69,
+ 67, 62, 77, 106, 71, 8, 8, 71, 71, 70, 75, 3,
+ 78, 101, 69, 68, 26, 70, 77, 69, 68, 0, 75,
+ 73, 70, 95, 88, 74, 75, 70, 30, 105, 78, 29,
+ 87, 70, 69, 70, 7, 68, 74, 5, 14, 71, 71, 115,
+ 27, 25, 26, 5, 78, 80, 82, 94, 82, 95, 101,
+ 94, 113, 107, 105, 124, 118, 126, 126, 112,
+ 100, 97, 100, 115, 105, 96, 97, 98, 79, 89,
+ 94, 91, 95, 114, 100, 95, 121, 107, 120, 119,
+ 126, 126, 126, 123, 83, 81, 126, 93, 95, 100,
+ 120, 105, 106, 107, 96, 101, 86, 76, 114, 120,
+ 126, 94, 105, 110, 69, 13, 18, 21, 41, 28, 33,
+ 8, 38, 33, 43, 34, 46, 38, 28, 39, 78, 87, 95,
+ 103, 103, 126, 126, 122, 19, 47, 43, 47, 37,
+ 40, 24, 23, 13, 14, 69, 13, 18, 21, 41, 28,
+ 33, 8, 38, 33, 43, 34, 46, 38, 28, 39, 78, 87,
+ 95, 103, 103, 126, 126, 122 },
+
+ {
+
+ 18,
+ 3, 83, 18, 3, 83, 20, 28, 28, 12, 72, 103, 75,
+ 21, 42, 10, 45, 94, 73, 33, 72, 75, 41, 99,
+ 100, 70, 87, 126, 126, 126, 32, 12, 68, 73,
+ 33, 72, 75, 17, 37, 67, 7, 5, 10, 114, 118,
+ 96, 114, 66, 70, 67, 78, 93, 81, 98, 64, 67,
+ 69, 2, 34, 0, 22, 0, 0, 0, 7, 98, 97, 12, 80,
+ 71, 8, 2, 67, 44, 23, 10, 54, 52, 10, 23, 29,
+ 29, 14, 12, 47, 113, 91, 90, 93, 26, 73, 22,
+ 47, 64, 71, 5, 26, 65, 73, 72, 80, 25, 77, 16,
+ 4, 74, 11, 67, 0, 8, 15, 22, 21, 7, 81, 69, 4,
+ 65, 64, 71, 31, 66, 1, 33, 28, 39, 31, 28,
+ 100, 12, 13, 79, 67, 75, 70, 36, 3, 7, 43, 62,
+ 62, 62, 56, 102, 114, 13, 43, 42, 126, 74, 15,
+ 101, 27, 71, 4, 42, 33, 53, 49, 52, 114, 8,
+ 88, 114, 116, 2, 12, 21, 8, 1, 8, 8, 0, 14, 6,
+ 82, 75, 4, 64, 64, 78, 68, 77, 84, 69, 68, 64,
+ 76, 76, 115, 78, 81, 69, 93, 87, 87, 74, 81,
+ 82, 81, 88, 112, 93, 74, 118, 126, 105, 117,
+ 110, 126, 94, 88, 125, 73, 81, 81, 91, 96,
+ 110, 105, 97, 114, 93, 98, 102, 116, 8, 20,
+ 47, 28, 10, 4, 20, 13, 8, 4, 23, 26, 48, 32,
+ 25, 16, 41, 23, 28, 10, 47, 11, 39, 26, 21,
+ 14, 30, 14, 7, 7, 9, 67, 60, 36, 11, 2, 24, 7,
+ 69, 3, 21, 43, 40, 23, 10, 38, 17, 16, 7, 6,
+ 62, 2, 11, 18, 17, 6, 15, 20, 24, 22, 29, 36,
+ 50, 64, 6, 69, 67, 62, 77, 108, 71, 8, 8, 71,
+ 71, 70, 76, 3, 79, 102, 70, 68, 26, 71, 77,
+ 70, 68, 0, 76, 74, 71, 96, 89, 75, 76, 70, 31,
+ 106, 79, 29, 88, 71, 69, 71, 7, 69, 75, 5, 14,
+ 71, 71, 117, 25, 24, 24, 3, 81, 83, 85, 97,
+ 85, 98, 105, 97, 117, 110, 107, 126, 122, 126,
+ 126, 115, 103, 99, 103, 118, 107, 98, 99, 99,
+ 79, 92, 97, 94, 97, 117, 102, 97, 124, 109,
+ 123, 121, 126, 126, 126, 125, 85, 83, 126, 95,
+ 97, 102, 122, 107, 108, 108, 97, 102, 87, 75,
+ 116, 122, 126, 96, 107, 112, 69, 13, 18, 21,
+ 42, 28, 33, 8, 39, 33, 44, 34, 46, 38, 28, 38,
+ 80, 89, 98, 106, 105, 126, 126, 124, 19, 47,
+ 43, 47, 37, 40, 24, 23, 13, 14, 69, 13, 18,
+ 21, 42, 28, 33, 8, 39, 33, 44, 34, 46, 38, 28,
+ 38, 80, 89, 98, 106, 105, 126, 126, 124 },
+
+ {
+
+ 17,
+ 3, 83, 17, 3, 83, 22, 30, 29, 13, 72, 104, 76,
+ 21, 43, 11, 47, 95, 72, 35, 72, 75, 43, 99,
+ 101, 71, 88, 126, 126, 126, 34, 14, 67, 72,
+ 35, 72, 74, 18, 38, 67, 8, 6, 12, 115, 119,
+ 96, 114, 66, 69, 66, 78, 93, 80, 97, 64, 66,
+ 68, 3, 35, 0, 22, 0, 0, 0, 8, 98, 97, 13, 80,
+ 71, 8, 3, 65, 47, 25, 11, 56, 54, 11, 25, 31,
+ 30, 16, 13, 50, 113, 91, 90, 93, 27, 73, 23,
+ 50, 0, 70, 6, 28, 65, 72, 72, 79, 26, 77, 17,
+ 5, 73, 12, 66, 1, 9, 16, 23, 22, 8, 81, 69, 5,
+ 64, 64, 70, 32, 65, 1, 34, 29, 40, 32, 29,
+ 100, 13, 14, 79, 67, 74, 70, 37, 4, 8, 45, 62,
+ 62, 62, 58, 102, 115, 14, 44, 43, 126, 74, 16,
+ 101, 28, 71, 5, 44, 33, 53, 50, 54, 115, 10,
+ 89, 116, 119, 4, 12, 21, 8, 1, 8, 8, 0, 14, 6,
+ 82, 75, 4, 64, 64, 78, 68, 77, 84, 69, 68, 64,
+ 76, 76, 116, 78, 81, 69, 94, 87, 89, 76, 82,
+ 83, 82, 90, 115, 95, 75, 119, 126, 106, 119,
+ 111, 126, 95, 89, 126, 74, 81, 81, 92, 97,
+ 111, 106, 98, 115, 93, 98, 102, 117, 10, 22,
+ 49, 29, 10, 4, 21, 14, 9, 5, 25, 28, 49, 33,
+ 26, 17, 43, 24, 30, 11, 50, 12, 40, 27, 22,
+ 15, 31, 15, 8, 8, 11, 66, 62, 37, 12, 2, 25,
+ 8, 68, 4, 22, 44, 41, 24, 11, 39, 18, 17, 8,
+ 7, 62, 4, 13, 20, 19, 8, 17, 22, 26, 24, 31,
+ 38, 53, 0, 7, 68, 66, 62, 76, 109, 70, 9, 9,
+ 70, 71, 69, 76, 4, 79, 102, 70, 68, 27, 71,
+ 77, 70, 68, 1, 76, 74, 71, 96, 89, 75, 76, 69,
+ 33, 106, 79, 30, 88, 71, 69, 71, 8, 69, 75, 6,
+ 15, 70, 70, 118, 24, 23, 23, 2, 83, 85, 87,
+ 100, 87, 100, 108, 99, 120, 112, 109, 126,
+ 125, 126, 126, 117, 105, 100, 105, 120, 108,
+ 99, 100, 99, 79, 94, 99, 96, 99, 119, 103, 98,
+ 126, 110, 125, 122, 126, 126, 126, 126, 86,
+ 84, 126, 96, 98, 103, 123, 108, 109, 109, 97,
+ 102, 87, 74, 117, 123, 126, 97, 108, 113, 68,
+ 14, 19, 22, 44, 29, 34, 9, 41, 34, 45, 35, 47,
+ 39, 29, 38, 81, 90, 100, 108, 106, 126, 126,
+ 125, 20, 48, 44, 48, 38, 41, 25, 24, 14, 15,
+ 68, 14, 19, 22, 44, 29, 34, 9, 41, 34, 45, 35,
+ 47, 39, 29, 38, 81, 90, 100, 108, 106, 126,
+ 126, 125 },
+
+ {
+
+ 16,
+ 3, 83, 16, 3, 83, 24, 31, 29, 13, 72, 106, 78,
+ 20, 44, 11, 49, 97, 72, 36, 72, 75, 44, 100,
+ 102, 72, 90, 126, 126, 126, 36, 15, 67, 72,
+ 36, 72, 74, 19, 38, 67, 9, 7, 13, 116, 120,
+ 96, 114, 66, 69, 65, 78, 93, 80, 97, 64, 66,
+ 68, 4, 35, 0, 22, 0, 0, 0, 9, 98, 97, 13, 81,
+ 71, 8, 4, 64, 49, 26, 12, 58, 56, 12, 26, 33,
+ 31, 17, 14, 52, 114, 91, 90, 93, 27, 73, 24,
+ 52, 0, 69, 7, 30, 65, 72, 72, 79, 26, 77, 17,
+ 6, 73, 13, 66, 1, 9, 16, 24, 23, 8, 81, 69, 5,
+ 64, 64, 70, 32, 65, 1, 34, 29, 40, 32, 29,
+ 101, 14, 15, 80, 67, 74, 70, 38, 4, 8, 46, 62,
+ 62, 62, 60, 103, 116, 14, 44, 43, 126, 74, 16,
+ 102, 29, 71, 5, 45, 33, 53, 50, 55, 116, 11,
+ 90, 119, 122, 5, 12, 21, 8, 0, 7, 8, 0, 14, 5,
+ 83, 76, 4, 64, 64, 78, 69, 78, 85, 69, 68, 64,
+ 77, 77, 117, 79, 82, 69, 95, 88, 91, 78, 84,
+ 85, 84, 92, 118, 97, 76, 121, 126, 108, 121,
+ 113, 126, 96, 90, 126, 75, 82, 82, 93, 99,
+ 113, 108, 100, 117, 93, 99, 103, 119, 11, 23,
+ 50, 30, 10, 4, 22, 15, 10, 6, 27, 29, 50, 34,
+ 27, 18, 44, 25, 31, 12, 52, 13, 40, 27, 22,
+ 15, 32, 16, 9, 9, 12, 66, 62, 38, 12, 2, 26,
+ 9, 68, 5, 23, 44, 42, 24, 11, 40, 19, 18, 9,
+ 8, 62, 5, 15, 22, 21, 9, 18, 24, 28, 25, 33,
+ 40, 55, 1, 8, 67, 65, 62, 76, 110, 70, 10, 10,
+ 70, 71, 69, 76, 4, 79, 103, 70, 68, 28, 71,
+ 77, 70, 68, 1, 76, 74, 71, 97, 90, 75, 76, 69,
+ 34, 107, 79, 31, 89, 71, 69, 71, 8, 69, 75, 6,
+ 16, 70, 70, 120, 23, 22, 22, 0, 85, 88, 90,
+ 103, 89, 103, 111, 102, 123, 115, 111, 126,
+ 126, 126, 126, 120, 107, 102, 107, 122, 110,
+ 100, 101, 100, 79, 96, 101, 98, 101, 121, 105,
+ 100, 126, 112, 126, 124, 126, 126, 126, 126,
+ 87, 85, 126, 98, 100, 105, 125, 109, 110, 110,
+ 98, 103, 87, 73, 118, 124, 126, 98, 109, 114,
+ 68, 14, 20, 23, 45, 30, 35, 9, 42, 35, 46, 35,
+ 48, 40, 30, 37, 83, 92, 102, 110, 108, 126,
+ 126, 126, 21, 48, 44, 49, 39, 42, 25, 25, 14,
+ 16, 68, 14, 20, 23, 45, 30, 35, 9, 42, 35, 46,
+ 35, 48, 40, 30, 37, 83, 92, 102, 110, 108,
+ 126, 126, 126 },
+
+ {
+
+ 15,
+ 3, 83, 15, 3, 83, 26, 33, 30, 13, 73, 108, 79,
+ 19, 44, 11, 51, 98, 72, 38, 72, 76, 45, 101,
+ 103, 73, 92, 126, 126, 126, 38, 17, 67, 72,
+ 38, 72, 73, 20, 39, 67, 10, 8, 14, 117, 121,
+ 96, 115, 66, 69, 64, 78, 94, 80, 97, 64, 66,
+ 68, 5, 36, 0, 22, 0, 0, 0, 9, 98, 97, 14, 82,
+ 71, 7, 4, 0, 51, 27, 13, 59, 57, 13, 27, 35,
+ 32, 18, 15, 54, 115, 91, 90, 93, 28, 73, 25,
+ 54, 0, 68, 8, 32, 65, 72, 72, 79, 27, 77, 18,
+ 7, 73, 14, 66, 1, 9, 17, 25, 23, 8, 81, 69, 5,
+ 64, 64, 70, 32, 65, 1, 35, 29, 41, 32, 30,
+ 101, 14, 16, 80, 67, 74, 70, 39, 4, 9, 47, 62,
+ 62, 62, 62, 103, 117, 15, 44, 43, 126, 74, 17,
+ 102, 29, 71, 5, 46, 33, 53, 50, 56, 117, 13,
+ 91, 122, 125, 7, 12, 21, 7, 64, 7, 7, 64, 14,
+ 5, 84, 77, 4, 64, 64, 79, 69, 79, 86, 70, 68,
+ 64, 78, 77, 118, 79, 83, 69, 96, 88, 93, 80,
+ 86, 86, 86, 94, 121, 100, 77, 123, 126, 109,
+ 123, 115, 126, 97, 91, 126, 76, 83, 83, 94,
+ 100, 115, 110, 102, 118, 93, 100, 104, 120,
+ 13, 25, 52, 30, 10, 4, 23, 16, 11, 7, 29, 30,
+ 51, 35, 28, 18, 45, 26, 33, 13, 54, 13, 41,
+ 28, 23, 16, 33, 16, 9, 10, 13, 66, 62, 39, 12,
+ 2, 27, 9, 68, 6, 24, 45, 43, 25, 11, 41, 19,
+ 19, 9, 8, 62, 7, 16, 23, 23, 10, 20, 25, 29,
+ 27, 35, 42, 57, 1, 9, 67, 64, 62, 76, 111, 70,
+ 11, 11, 70, 71, 69, 76, 5, 79, 104, 70, 68,
+ 28, 71, 77, 70, 68, 1, 77, 74, 71, 98, 91, 75,
+ 76, 69, 35, 108, 79, 32, 90, 71, 69, 71, 9,
+ 69, 76, 7, 17, 70, 70, 121, 22, 21, 21, 65,
+ 87, 90, 92, 106, 92, 106, 114, 105, 126, 118,
+ 113, 126, 126, 126, 126, 123, 109, 104, 109,
+ 125, 112, 101, 102, 101, 79, 98, 103, 100,
+ 103, 123, 107, 101, 126, 114, 126, 126, 126,
+ 126, 126, 126, 88, 86, 126, 99, 102, 106, 126,
+ 111, 112, 111, 99, 104, 87, 72, 119, 126, 126,
+ 99, 110, 115, 68, 15, 20, 23, 46, 31, 36, 9,
+ 43, 36, 47, 36, 49, 40, 31, 37, 85, 94, 104,
+ 112, 110, 126, 126, 126, 21, 49, 45, 50, 39,
+ 43, 26, 25, 15, 16, 68, 15, 20, 23, 46, 31,
+ 36, 9, 43, 36, 47, 36, 49, 40, 31, 37, 85, 94,
+ 104, 112, 110, 126, 126, 126 },
+
+ },
+
+ {
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 126, 104, 10, 9, 12, 30, 61,
+ 62, 54, 14, 118, 6, 78, 65, 1, 14, 73, 13, 64,
+ 20, 62, 67, 90, 104, 126, 104, 67, 78, 65, 1,
+ 86, 95, 2, 18, 69, 81, 96, 8, 67, 86, 88, 5, 76,
+ 94, 9, 69, 81, 88, 67, 74, 74, 80, 72, 5, 22, 0,
+ 0, 0, 83, 86, 97, 72, 22, 1, 52, 8, 69, 126,
+ 102, 82, 74, 107, 126, 126, 126, 95, 126, 114,
+ 126, 123, 115, 122, 115, 0, 68, 84, 104, 70, 93,
+ 90, 126, 74, 97, 91, 126, 7, 82, 76, 125, 93,
+ 87, 77, 71, 0, 68, 84, 1, 65, 2, 7, 66, 64, 2,
+ 78, 13, 11, 28, 19, 25, 18, 17, 19, 46, 12, 13,
+ 44, 30, 1, 108, 100, 101, 91, 94, 88, 84, 86,
+ 83, 87, 94, 70, 72, 74, 4, 102, 100, 95, 75, 72,
+ 75, 71, 17, 69, 1, 65, 26, 72, 6, 9, 1, 72, 62,
+ 54, 38, 45, 54, 44, 26, 45, 34, 30, 33, 18, 5,
+ 1, 2, 25, 18, 24, 21, 19, 18, 22, 14, 29, 21, 8,
+ 12, 17, 89, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 46, 62, 60, 41, 62, 62, 62,
+ 62, 60, 58, 62, 47, 41, 15, 26, 3, 68, 97, 71,
+ 21, 13, 9, 1, 5, 0, 72, 74, 91, 67, 36, 24, 19,
+ 17, 64, 68, 78, 77, 86, 92, 8, 3, 1, 65, 73, 76,
+ 80, 88, 110, 97, 84, 79, 73, 74, 86, 96, 97,
+ 117, 78, 30, 15, 10, 1, 71, 79, 86, 90, 97, 62,
+ 93, 84, 79, 66, 71, 1, 3, 4, 75, 1, 5, 66, 79,
+ 71, 68, 19, 1, 27, 23, 36, 34, 19, 27, 31, 21,
+ 15, 1, 17, 64, 104, 97, 96, 88, 85, 85, 85, 88,
+ 66, 77, 76, 76, 5, 76, 83, 99, 95, 95, 76, 74,
+ 70, 75, 68, 65, 73, 1, 1, 68, 75, 8, 64, 70, 57,
+ 44, 47, 49, 50, 52, 48, 47, 40, 40, 43, 37, 19,
+ 23, 16, 46, 42, 41, 36, 34, 28, 13, 6, 0, 77,
+ 82, 94, 69, 109, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 61, 50, 28, 5, 62, 62, 33, 62, 62,
+ 62, 60, 62, 58, 52, 58, 51, 52, 34, 37, 24, 66,
+ 42, 32, 13, 120, 112, 114, 85, 92, 89, 71, 81,
+ 80, 68, 70, 7, 68, 13, 74, 62, 62, 62, 62, 60,
+ 57, 29, 9, 82, 75, 40, 29, 20, 9, 8, 2, 64, 68,
+ 92, 106, 97, 90, 90, 88, 73, 79, 86, 73, 70, 69,
+ 66, 64, 5, 4, 62, 62, 62, 62, 60, 54, 43, 27, 67 },
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 125, 102, 11, 10, 12, 29,
+ 60, 62, 54, 14, 115, 6, 77, 64, 1, 14, 72, 12,
+ 65, 20, 62, 68, 91, 104, 124, 102, 67, 77, 64,
+ 1, 85, 93, 3, 18, 68, 80, 95, 8, 67, 85, 88,
+ 5, 75, 93, 9, 69, 80, 88, 66, 73, 73, 79, 71,
+ 5, 22, 0, 0, 0, 82, 86, 97, 71, 22, 1, 52, 8,
+ 69, 125, 101, 82, 73, 105, 125, 125, 125, 93,
+ 125, 112, 125, 121, 114, 121, 114, 1, 67, 83,
+ 103, 69, 92, 89, 125, 73, 96, 90, 125, 8, 81,
+ 75, 123, 92, 86, 76, 70, 1, 67, 83, 2, 64, 2,
+ 7, 65, 64, 2, 77, 13, 11, 28, 19, 25, 18, 17,
+ 19, 45, 12, 13, 43, 29, 1, 107, 99, 100, 90,
+ 93, 87, 83, 85, 82, 86, 92, 70, 72, 73, 3,
+ 101, 99, 95, 74, 72, 74, 70, 17, 68, 1, 65,
+ 25, 71, 6, 8, 1, 72, 62, 54, 38, 45, 54, 44,
+ 26, 45, 34, 29, 33, 18, 5, 1, 2, 25, 18, 24,
+ 21, 19, 17, 22, 14, 28, 20, 8, 11, 16, 89, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 60, 44, 62, 59, 40, 62, 62, 62, 62, 58,
+ 56, 61, 45, 39, 15, 25, 2, 68, 97, 70, 22, 14,
+ 10, 2, 5, 0, 71, 73, 90, 66, 37, 25, 20, 17,
+ 0, 67, 77, 76, 85, 91, 9, 4, 2, 64, 72, 75,
+ 79, 87, 108, 96, 82, 78, 72, 73, 85, 95, 96,
+ 115, 77, 31, 16, 11, 2, 70, 78, 85, 89, 96,
+ 62, 92, 83, 78, 66, 70, 1, 4, 5, 74, 2, 6, 65,
+ 78, 71, 68, 19, 2, 27, 23, 35, 34, 19, 26, 30,
+ 21, 15, 1, 16, 64, 103, 96, 95, 87, 84, 84,
+ 84, 87, 66, 76, 75, 75, 5, 75, 82, 98, 94, 95,
+ 76, 73, 70, 74, 68, 65, 72, 1, 1, 67, 74, 8,
+ 64, 70, 57, 44, 47, 49, 49, 52, 48, 47, 40,
+ 40, 43, 37, 19, 22, 15, 45, 41, 40, 35, 33,
+ 27, 13, 6, 0, 76, 81, 93, 69, 108, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 61, 59, 48, 27, 5,
+ 62, 62, 32, 62, 62, 62, 58, 62, 56, 50, 56,
+ 49, 50, 33, 35, 23, 67, 41, 31, 12, 118, 110,
+ 112, 84, 91, 88, 69, 80, 79, 68, 69, 9, 66,
+ 15, 73, 62, 62, 62, 62, 58, 55, 27, 7, 83, 74,
+ 41, 29, 20, 9, 9, 2, 64, 68, 91, 105, 96, 89,
+ 89, 86, 72, 78, 85, 72, 69, 68, 65, 0, 6, 4,
+ 62, 62, 62, 62, 59, 53, 41, 26, 67 },
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 123, 101, 11, 10, 12, 28,
+ 59, 61, 54, 14, 113, 6, 76, 0, 1, 13, 72, 11,
+ 66, 19, 60, 70, 92, 105, 121, 101, 67, 76, 0,
+ 1, 85, 92, 3, 17, 68, 80, 94, 8, 67, 85, 88,
+ 5, 75, 92, 9, 69, 80, 88, 66, 73, 73, 79, 71,
+ 5, 22, 0, 0, 0, 81, 86, 97, 71, 21, 1, 52, 8,
+ 69, 124, 100, 82, 73, 104, 123, 123, 124, 92,
+ 123, 111, 123, 120, 113, 120, 113, 2, 67, 82,
+ 102, 69, 92, 88, 123, 73, 96, 90, 124, 8, 81,
+ 75, 122, 92, 85, 76, 70, 1, 67, 82, 2, 64, 1,
+ 7, 65, 64, 2, 77, 13, 11, 27, 19, 24, 18, 17,
+ 19, 43, 12, 13, 41, 28, 0, 106, 98, 99, 89,
+ 92, 86, 82, 84, 82, 85, 91, 70, 72, 73, 2,
+ 101, 98, 95, 74, 72, 73, 70, 16, 67, 1, 65,
+ 24, 70, 5, 7, 1, 73, 60, 53, 37, 44, 53, 43,
+ 25, 44, 34, 28, 32, 18, 5, 1, 2, 24, 17, 23,
+ 20, 18, 16, 21, 13, 26, 19, 7, 10, 15, 89, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 58, 41, 62, 57, 38, 62, 62, 62, 62, 56,
+ 54, 58, 43, 37, 14, 23, 1, 69, 97, 70, 22, 14,
+ 10, 2, 5, 0, 71, 73, 89, 66, 37, 25, 20, 17,
+ 1, 67, 76, 76, 84, 90, 10, 5, 2, 64, 71, 75,
+ 79, 86, 107, 95, 81, 77, 72, 73, 84, 94, 95,
+ 114, 77, 31, 16, 11, 2, 69, 77, 84, 88, 95,
+ 62, 92, 83, 78, 66, 70, 1, 4, 5, 74, 2, 6, 64,
+ 78, 71, 68, 18, 2, 26, 22, 34, 33, 19, 25, 29,
+ 21, 15, 0, 15, 65, 102, 95, 94, 87, 84, 84,
+ 83, 86, 66, 76, 75, 75, 4, 75, 82, 98, 93, 95,
+ 76, 73, 70, 73, 68, 65, 71, 1, 1, 67, 73, 7,
+ 64, 71, 56, 44, 47, 48, 48, 51, 47, 46, 39,
+ 39, 42, 36, 18, 21, 14, 43, 40, 38, 33, 32,
+ 26, 12, 5, 0, 76, 81, 93, 70, 107, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 59, 57, 46, 26, 4,
+ 62, 60, 31, 62, 62, 62, 56, 60, 54, 48, 54,
+ 47, 48, 31, 33, 21, 68, 39, 29, 10, 117, 109,
+ 111, 83, 90, 87, 67, 79, 78, 68, 68, 10, 65,
+ 16, 72, 62, 62, 62, 62, 55, 52, 24, 5, 84, 74,
+ 41, 29, 20, 9, 9, 2, 64, 68, 90, 104, 95, 88,
+ 88, 85, 71, 77, 84, 71, 68, 67, 65, 1, 6, 4,
+ 62, 62, 62, 61, 57, 51, 39, 24, 68 },
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 121, 99, 12, 10, 11, 26, 57,
+ 60, 54, 14, 111, 6, 75, 1, 1, 12, 72, 10, 67,
+ 19, 58, 71, 93, 105, 118, 100, 67, 75, 1, 1,
+ 84, 91, 4, 17, 68, 79, 93, 7, 68, 85, 88, 5,
+ 75, 92, 9, 69, 80, 88, 65, 73, 73, 79, 70, 5,
+ 22, 0, 0, 0, 81, 86, 97, 70, 20, 1, 52, 8, 69,
+ 123, 99, 82, 72, 103, 121, 121, 122, 91, 121,
+ 110, 121, 119, 112, 119, 112, 3, 67, 81, 101,
+ 69, 91, 88, 121, 73, 95, 89, 123, 8, 81, 74,
+ 120, 91, 84, 76, 70, 1, 67, 81, 3, 0, 1, 7,
+ 65, 64, 2, 77, 13, 10, 27, 19, 23, 18, 17, 19,
+ 41, 12, 12, 39, 27, 64, 105, 97, 98, 88, 91,
+ 86, 81, 84, 81, 84, 90, 70, 72, 73, 1, 100,
+ 97, 95, 74, 72, 72, 70, 15, 66, 1, 65, 23, 69,
+ 5, 6, 1, 74, 59, 52, 37, 43, 52, 42, 25, 43,
+ 33, 27, 31, 18, 5, 1, 1, 23, 16, 22, 19, 17,
+ 15, 20, 13, 24, 18, 7, 9, 14, 89, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 55,
+ 39, 62, 55, 37, 62, 61, 62, 59, 54, 51, 56,
+ 41, 34, 13, 21, 0, 70, 97, 70, 23, 14, 10, 2,
+ 5, 0, 71, 73, 89, 66, 37, 25, 20, 17, 2, 66,
+ 76, 75, 84, 89, 11, 5, 3, 64, 70, 74, 78, 86,
+ 106, 94, 80, 76, 71, 73, 83, 93, 94, 113, 76,
+ 31, 16, 11, 2, 68, 77, 83, 87, 94, 62, 91, 82,
+ 77, 66, 70, 1, 4, 5, 74, 2, 6, 64, 78, 71, 68,
+ 18, 3, 25, 21, 33, 32, 19, 24, 28, 21, 15, 0,
+ 14, 65, 101, 94, 93, 86, 83, 83, 83, 85, 66,
+ 76, 75, 74, 4, 75, 82, 97, 92, 95, 76, 73, 70,
+ 72, 68, 65, 70, 1, 1, 67, 72, 6, 64, 72, 55,
+ 43, 46, 47, 47, 50, 46, 45, 38, 38, 41, 35,
+ 17, 20, 13, 42, 39, 37, 31, 30, 25, 11, 5, 64,
+ 76, 81, 93, 70, 106, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 57, 54, 44, 24, 3, 61, 59, 29,
+ 62, 62, 60, 54, 58, 52, 46, 52, 45, 45, 29,
+ 31, 19, 69, 37, 27, 9, 116, 108, 110, 82, 89,
+ 86, 66, 78, 77, 68, 67, 12, 0, 18, 71, 62, 62,
+ 62, 62, 52, 49, 21, 3, 85, 74, 41, 29, 20, 9,
+ 9, 2, 64, 68, 90, 103, 94, 87, 87, 84, 71, 77,
+ 83, 71, 68, 67, 65, 1, 6, 4, 62, 62, 62, 59,
+ 55, 49, 37, 22, 69 },
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 120, 98, 12, 10, 11, 25, 56,
+ 58, 54, 14, 108, 5, 74, 1, 1, 11, 72, 9, 68,
+ 18, 56, 73, 94, 106, 115, 99, 67, 74, 1, 1,
+ 84, 90, 4, 16, 68, 79, 93, 7, 68, 84, 88, 5,
+ 75, 91, 8, 70, 80, 88, 65, 72, 73, 78, 70, 5,
+ 22, 0, 0, 0, 80, 87, 97, 70, 19, 1, 52, 8, 69,
+ 122, 98, 82, 72, 101, 120, 119, 121, 90, 120,
+ 108, 119, 118, 112, 118, 112, 3, 67, 80, 100,
+ 69, 91, 87, 119, 73, 95, 89, 122, 8, 80, 74,
+ 119, 91, 84, 76, 69, 1, 67, 81, 3, 0, 0, 6,
+ 65, 64, 2, 77, 13, 10, 26, 19, 23, 18, 17, 18,
+ 39, 12, 12, 37, 26, 65, 104, 96, 97, 87, 91,
+ 85, 80, 83, 81, 83, 89, 70, 72, 72, 0, 100,
+ 96, 95, 74, 72, 72, 70, 14, 65, 1, 65, 21, 68,
+ 4, 5, 1, 75, 57, 51, 36, 42, 51, 41, 24, 42,
+ 33, 25, 30, 17, 5, 1, 1, 22, 16, 21, 19, 16,
+ 14, 19, 12, 22, 17, 6, 8, 13, 89, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 59, 53,
+ 36, 62, 54, 35, 62, 59, 62, 57, 51, 49, 53,
+ 39, 32, 12, 20, 65, 71, 97, 70, 23, 15, 10, 2,
+ 5, 0, 71, 73, 88, 65, 38, 25, 20, 17, 3, 66,
+ 75, 75, 83, 89, 12, 6, 3, 64, 70, 74, 78, 85,
+ 105, 94, 79, 76, 71, 73, 82, 92, 94, 112, 76,
+ 32, 16, 11, 2, 67, 76, 83, 86, 93, 62, 91, 82,
+ 77, 66, 70, 1, 4, 5, 73, 2, 6, 0, 78, 71, 68,
+ 17, 3, 24, 20, 32, 31, 19, 22, 27, 20, 15, 64,
+ 13, 66, 101, 94, 92, 86, 83, 83, 82, 84, 67,
+ 76, 75, 74, 3, 75, 82, 97, 91, 95, 76, 72, 70,
+ 72, 68, 65, 69, 1, 0, 67, 71, 6, 65, 73, 54,
+ 43, 46, 46, 46, 49, 45, 44, 37, 37, 40, 34,
+ 16, 19, 12, 40, 37, 35, 29, 29, 24, 10, 4, 64,
+ 76, 81, 93, 71, 106, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 60, 55, 52, 42, 23, 2, 59, 57, 28,
+ 62, 62, 58, 52, 55, 50, 44, 50, 43, 43, 27,
+ 29, 17, 70, 35, 25, 7, 115, 107, 109, 82, 88,
+ 85, 64, 77, 76, 68, 66, 13, 1, 19, 71, 62, 62,
+ 62, 62, 49, 46, 18, 1, 86, 74, 41, 29, 20, 9,
+ 9, 2, 64, 68, 89, 102, 93, 86, 87, 83, 70, 76,
+ 82, 70, 67, 66, 64, 2, 7, 4, 62, 62, 62, 57,
+ 53, 47, 35, 20, 70 },
+
+ {
+
+ 62,
+ 9, 74, 62, 9, 74, 118, 96, 12, 10, 10, 23, 54,
+ 57, 54, 14, 106, 5, 73, 2, 1, 11, 71, 8, 69,
+ 18, 54, 75, 95, 106, 112, 97, 67, 73, 2, 1,
+ 84, 89, 4, 16, 68, 79, 92, 7, 69, 84, 88, 5,
+ 75, 90, 8, 70, 80, 88, 64, 72, 72, 78, 69, 5,
+ 22, 0, 0, 0, 80, 87, 97, 69, 18, 1, 52, 8, 69,
+ 121, 97, 82, 71, 100, 118, 117, 119, 89, 118,
+ 107, 117, 117, 111, 117, 111, 4, 67, 79, 99,
+ 69, 90, 86, 117, 73, 95, 88, 120, 9, 80, 73,
+ 118, 90, 83, 76, 69, 2, 66, 80, 4, 1, 0, 6,
+ 65, 64, 2, 77, 13, 9, 25, 19, 22, 18, 17, 18,
+ 37, 12, 11, 36, 25, 66, 103, 95, 96, 86, 90,
+ 84, 79, 82, 80, 82, 88, 70, 72, 72, 64, 99,
+ 95, 95, 73, 72, 71, 70, 13, 64, 1, 65, 20, 67,
+ 4, 4, 1, 75, 56, 50, 36, 41, 50, 40, 23, 42,
+ 33, 24, 29, 17, 5, 1, 0, 22, 15, 20, 18, 15,
+ 13, 19, 11, 20, 16, 5, 7, 12, 89, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 57, 51,
+ 34, 60, 52, 33, 62, 57, 60, 55, 49, 47, 50,
+ 37, 29, 11, 18, 66, 71, 97, 70, 23, 15, 10, 2,
+ 5, 0, 71, 73, 88, 65, 38, 25, 20, 17, 4, 65,
+ 74, 75, 82, 88, 13, 7, 3, 0, 69, 73, 77, 85,
+ 104, 93, 77, 75, 71, 72, 81, 91, 93, 111, 75,
+ 32, 17, 11, 2, 66, 75, 82, 85, 92, 62, 91, 82,
+ 76, 66, 70, 1, 4, 5, 73, 2, 7, 0, 78, 71, 68,
+ 16, 4, 23, 19, 31, 31, 19, 21, 26, 20, 15, 65,
+ 12, 66, 100, 93, 91, 85, 82, 82, 82, 83, 67,
+ 76, 75, 74, 2, 75, 82, 96, 90, 95, 76, 72, 70,
+ 71, 68, 65, 68, 1, 0, 67, 70, 5, 65, 73, 53,
+ 43, 45, 46, 45, 48, 44, 43, 37, 36, 39, 33,
+ 15, 18, 11, 39, 36, 34, 27, 28, 23, 9, 3, 65,
+ 76, 80, 93, 71, 105, 62, 62, 62, 62, 62, 62,
+ 62, 62, 60, 58, 53, 50, 40, 21, 1, 57, 55, 27,
+ 61, 62, 56, 50, 53, 48, 42, 48, 41, 40, 25,
+ 27, 15, 71, 33, 23, 6, 114, 105, 108, 81, 87,
+ 84, 1, 76, 75, 68, 65, 15, 3, 21, 70, 62, 62,
+ 62, 62, 47, 43, 16, 64, 87, 74, 41, 29, 20, 9,
+ 9, 2, 64, 68, 89, 101, 92, 85, 86, 82, 69, 76,
+ 81, 69, 66, 65, 64, 2, 7, 4, 62, 62, 62, 56,
+ 51, 45, 33, 18, 71 },
+
+ {
+
+ 62,
+ 9, 75, 62, 9, 75, 116, 95, 13, 10, 10, 22, 53,
+ 56, 54, 14, 104, 5, 73, 3, 1, 10, 71, 7, 70,
+ 17, 53, 76, 96, 107, 109, 96, 67, 73, 3, 1,
+ 83, 88, 5, 15, 67, 78, 91, 6, 69, 84, 88, 5,
+ 74, 90, 8, 70, 79, 88, 64, 72, 72, 78, 69, 5,
+ 22, 0, 0, 0, 79, 87, 97, 69, 18, 0, 52, 8, 69,
+ 120, 97, 82, 71, 99, 116, 115, 118, 88, 116,
+ 106, 115, 116, 110, 116, 110, 5, 67, 78, 99,
+ 68, 90, 86, 115, 73, 94, 88, 119, 9, 80, 73,
+ 116, 90, 82, 75, 69, 2, 66, 79, 4, 1, 64, 6,
+ 65, 64, 2, 77, 13, 9, 25, 19, 21, 18, 17, 18,
+ 35, 12, 11, 34, 24, 67, 103, 94, 96, 86, 89,
+ 84, 78, 82, 80, 82, 86, 70, 72, 72, 65, 99,
+ 94, 95, 73, 72, 70, 69, 12, 64, 1, 65, 19, 66,
+ 3, 3, 1, 76, 54, 49, 35, 41, 49, 40, 23, 41,
+ 32, 23, 28, 17, 5, 1, 0, 21, 14, 19, 17, 15,
+ 12, 18, 11, 18, 15, 5, 6, 11, 89, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 54, 48,
+ 31, 58, 50, 32, 62, 54, 57, 52, 47, 44, 48,
+ 34, 27, 10, 16, 67, 72, 97, 69, 24, 15, 11, 2,
+ 5, 0, 71, 73, 87, 65, 38, 26, 20, 17, 5, 65,
+ 74, 74, 82, 87, 14, 7, 4, 0, 68, 73, 77, 84,
+ 103, 92, 76, 74, 70, 72, 81, 91, 92, 109, 75,
+ 32, 17, 11, 3, 66, 75, 81, 85, 91, 62, 90, 81,
+ 76, 66, 70, 1, 4, 5, 73, 3, 7, 1, 78, 71, 69,
+ 16, 4, 22, 18, 30, 30, 19, 20, 25, 20, 15, 65,
+ 11, 67, 99, 92, 90, 85, 82, 82, 81, 83, 67,
+ 75, 74, 73, 2, 75, 82, 96, 89, 95, 76, 72, 70,
+ 70, 68, 65, 67, 0, 0, 67, 70, 4, 65, 74, 52,
+ 42, 45, 45, 44, 48, 44, 42, 36, 36, 38, 32,
+ 14, 17, 10, 37, 35, 32, 25, 26, 21, 8, 3, 65,
+ 76, 80, 92, 72, 104, 62, 62, 62, 62, 62, 62,
+ 62, 62, 58, 55, 51, 47, 38, 20, 1, 56, 54, 25,
+ 59, 62, 54, 48, 51, 46, 40, 45, 39, 38, 23,
+ 25, 14, 73, 31, 21, 4, 113, 104, 107, 80, 86,
+ 83, 2, 75, 74, 68, 64, 16, 4, 22, 69, 62, 62,
+ 62, 59, 44, 41, 13, 66, 89, 73, 41, 29, 20, 9,
+ 9, 2, 64, 68, 88, 100, 92, 84, 85, 81, 69, 75,
+ 80, 69, 66, 65, 64, 3, 7, 4, 62, 62, 61, 54,
+ 50, 44, 30, 17, 72 },
+
+ {
+
+ 62,
+ 9, 75, 62, 9, 75, 114, 93, 13, 10, 9, 20, 51,
+ 54, 54, 14, 101, 4, 72, 3, 1, 9, 71, 6, 71,
+ 17, 51, 78, 97, 107, 106, 95, 67, 72, 3, 1,
+ 83, 87, 5, 15, 67, 78, 91, 6, 70, 83, 88, 5,
+ 74, 89, 7, 70, 79, 88, 0, 71, 72, 77, 68, 5,
+ 22, 0, 0, 0, 79, 87, 97, 68, 17, 0, 52, 8, 69,
+ 119, 96, 82, 70, 97, 115, 113, 116, 87, 115,
+ 104, 113, 115, 109, 115, 110, 6, 67, 77, 98,
+ 68, 89, 85, 113, 73, 94, 87, 118, 9, 79, 72,
+ 115, 89, 82, 75, 68, 2, 66, 78, 5, 2, 64, 5,
+ 65, 64, 2, 77, 13, 8, 24, 19, 21, 18, 17, 17,
+ 33, 12, 10, 32, 23, 68, 102, 93, 95, 85, 88,
+ 83, 77, 81, 79, 81, 85, 70, 72, 71, 66, 98,
+ 93, 95, 73, 72, 70, 69, 11, 0, 1, 65, 17, 65,
+ 3, 2, 1, 77, 53, 48, 35, 40, 48, 39, 22, 40,
+ 32, 22, 27, 17, 5, 1, 64, 20, 14, 18, 17, 14,
+ 11, 17, 10, 16, 14, 4, 5, 10, 89, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 60, 61, 52, 46,
+ 29, 56, 49, 30, 62, 52, 55, 50, 44, 42, 45,
+ 32, 24, 9, 15, 69, 73, 97, 69, 24, 16, 11, 2,
+ 5, 0, 71, 73, 87, 64, 39, 26, 20, 17, 6, 64,
+ 73, 74, 81, 86, 15, 8, 4, 0, 67, 72, 76, 84,
+ 102, 92, 75, 74, 70, 72, 80, 90, 92, 108, 74,
+ 33, 17, 11, 3, 65, 74, 80, 84, 90, 62, 90, 81,
+ 75, 66, 70, 1, 4, 5, 72, 3, 7, 1, 78, 71, 69,
+ 15, 5, 21, 17, 29, 29, 19, 19, 24, 19, 15, 66,
+ 10, 67, 98, 92, 89, 84, 81, 81, 81, 82, 67,
+ 75, 74, 73, 1, 75, 82, 95, 88, 95, 76, 71, 70,
+ 70, 68, 65, 66, 0, 0, 67, 69, 4, 66, 75, 51,
+ 42, 44, 44, 43, 47, 43, 41, 35, 35, 37, 31,
+ 13, 16, 9, 36, 33, 31, 23, 25, 20, 7, 2, 66,
+ 76, 80, 92, 72, 103, 62, 62, 62, 62, 62, 62,
+ 62, 61, 56, 53, 49, 45, 36, 18, 0, 54, 52, 24,
+ 57, 62, 52, 46, 49, 44, 38, 43, 37, 35, 21,
+ 23, 12, 74, 29, 19, 3, 112, 103, 106, 80, 85,
+ 82, 4, 74, 73, 68, 0, 18, 6, 24, 69, 62, 62,
+ 61, 56, 41, 38, 10, 68, 90, 73, 41, 29, 20, 9,
+ 9, 2, 64, 68, 88, 99, 91, 83, 84, 80, 68, 75,
+ 79, 68, 65, 64, 0, 3, 8, 4, 62, 62, 59, 52,
+ 48, 42, 28, 15, 73 },
+
+ {
+
+ 62,
+ 8, 75, 62, 8, 75, 113, 92, 13, 10, 9, 19, 50,
+ 53, 54, 14, 99, 4, 71, 4, 1, 8, 71, 5, 73, 16,
+ 49, 80, 98, 108, 104, 94, 67, 71, 4, 1, 83,
+ 86, 5, 14, 67, 78, 90, 5, 70, 83, 89, 5, 74,
+ 89, 7, 71, 79, 88, 0, 71, 72, 77, 68, 5, 22,
+ 0, 0, 0, 78, 88, 97, 68, 16, 0, 52, 8, 69,
+ 118, 95, 82, 70, 96, 113, 111, 115, 86, 113,
+ 103, 112, 114, 109, 114, 109, 6, 67, 76, 97,
+ 68, 89, 85, 112, 73, 94, 87, 117, 9, 79, 72,
+ 114, 89, 81, 75, 68, 2, 66, 78, 5, 2, 65, 5,
+ 65, 64, 2, 77, 13, 8, 23, 19, 20, 18, 17, 17,
+ 31, 12, 10, 30, 22, 69, 101, 92, 94, 84, 88,
+ 83, 76, 81, 79, 80, 84, 70, 72, 71, 68, 98,
+ 92, 95, 73, 73, 69, 69, 10, 1, 1, 65, 16, 64,
+ 2, 1, 1, 78, 51, 47, 34, 39, 47, 38, 21, 39,
+ 31, 20, 26, 16, 5, 1, 64, 19, 13, 17, 16, 13,
+ 10, 16, 9, 14, 12, 3, 4, 9, 89, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 61, 58, 58, 49, 43,
+ 26, 54, 47, 28, 61, 50, 52, 47, 42, 39, 42,
+ 30, 22, 8, 13, 70, 74, 98, 69, 24, 16, 11, 2,
+ 5, 0, 71, 73, 86, 64, 39, 26, 20, 17, 7, 64,
+ 73, 74, 81, 86, 16, 8, 4, 0, 67, 72, 76, 83,
+ 101, 91, 74, 73, 70, 72, 79, 89, 91, 107, 74,
+ 33, 17, 11, 3, 64, 74, 80, 83, 90, 62, 90, 81,
+ 75, 66, 70, 1, 4, 5, 72, 3, 7, 2, 78, 71, 69,
+ 14, 5, 20, 16, 28, 28, 19, 17, 22, 19, 15, 67,
+ 9, 68, 98, 91, 88, 84, 81, 81, 80, 81, 68, 75,
+ 74, 73, 0, 75, 82, 95, 88, 96, 76, 71, 70, 69,
+ 68, 65, 66, 0, 64, 67, 68, 3, 66, 76, 50, 41,
+ 44, 43, 41, 46, 42, 40, 34, 34, 36, 30, 12,
+ 15, 8, 34, 32, 29, 21, 23, 19, 6, 1, 66, 76,
+ 80, 92, 73, 103, 62, 62, 62, 62, 62, 62, 61,
+ 58, 54, 51, 47, 42, 34, 17, 64, 52, 50, 22,
+ 55, 61, 49, 43, 46, 41, 36, 41, 34, 33, 19,
+ 20, 10, 75, 27, 17, 1, 111, 102, 105, 79, 84,
+ 82, 5, 73, 73, 68, 0, 19, 7, 25, 68, 62, 62,
+ 58, 53, 38, 35, 7, 70, 91, 73, 41, 29, 20, 9,
+ 9, 2, 64, 68, 87, 99, 90, 82, 84, 79, 68, 74,
+ 79, 68, 65, 64, 0, 4, 8, 3, 62, 62, 57, 50,
+ 46, 40, 26, 13, 74 },
+
+ {
+
+ 62,
+ 8, 75, 62, 8, 75, 111, 91, 14, 10, 9, 18, 49,
+ 52, 54, 14, 97, 4, 70, 5, 1, 8, 70, 4, 74, 15,
+ 47, 81, 99, 109, 101, 92, 67, 70, 5, 1, 82,
+ 85, 6, 13, 67, 77, 89, 5, 70, 83, 89, 5, 74,
+ 88, 7, 71, 79, 88, 0, 71, 71, 77, 68, 5, 22,
+ 0, 0, 0, 77, 88, 97, 68, 15, 0, 52, 8, 69,
+ 117, 94, 82, 70, 95, 111, 109, 113, 84, 111,
+ 102, 110, 113, 108, 113, 108, 7, 66, 75, 96,
+ 68, 88, 84, 110, 73, 93, 87, 115, 10, 79, 72,
+ 112, 89, 80, 75, 68, 3, 65, 77, 5, 2, 65, 5,
+ 64, 64, 2, 76, 13, 8, 23, 19, 19, 18, 17, 17,
+ 29, 12, 10, 29, 21, 69, 100, 91, 93, 83, 87,
+ 82, 75, 80, 79, 79, 83, 70, 72, 71, 69, 97,
+ 91, 95, 72, 73, 68, 69, 9, 2, 1, 65, 15, 0, 1,
+ 0, 1, 78, 50, 46, 34, 38, 46, 37, 21, 39, 31,
+ 19, 25, 16, 5, 1, 64, 19, 12, 16, 15, 12, 9,
+ 16, 9, 13, 11, 3, 3, 8, 89, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 59, 56, 56, 46, 41, 23,
+ 53, 45, 27, 59, 48, 50, 45, 40, 37, 40, 28,
+ 20, 8, 11, 71, 74, 98, 69, 25, 16, 11, 3, 5,
+ 0, 70, 73, 85, 64, 39, 26, 21, 17, 8, 0, 72,
+ 73, 80, 85, 17, 9, 5, 1, 66, 71, 76, 82, 100,
+ 90, 72, 72, 69, 71, 78, 88, 90, 106, 73, 33,
+ 18, 12, 3, 0, 73, 79, 82, 89, 62, 89, 80, 74,
+ 66, 70, 1, 5, 6, 72, 3, 8, 3, 78, 71, 69, 14,
+ 5, 19, 16, 27, 28, 19, 16, 21, 19, 15, 67, 8,
+ 69, 97, 90, 87, 84, 80, 81, 79, 80, 68, 75,
+ 74, 72, 0, 75, 82, 95, 87, 96, 76, 71, 70, 68,
+ 68, 65, 65, 0, 64, 67, 67, 2, 66, 76, 49, 41,
+ 44, 43, 40, 45, 41, 39, 34, 33, 35, 30, 12,
+ 14, 7, 33, 31, 27, 19, 22, 18, 6, 1, 66, 75,
+ 79, 92, 74, 102, 62, 62, 62, 62, 62, 62, 59,
+ 56, 52, 49, 45, 40, 32, 16, 65, 50, 49, 21,
+ 53, 59, 47, 41, 44, 39, 34, 39, 32, 31, 18,
+ 18, 8, 76, 25, 15, 64, 110, 100, 103, 78, 83,
+ 81, 7, 72, 72, 68, 1, 21, 8, 27, 67, 62, 62,
+ 56, 50, 36, 32, 5, 72, 92, 73, 41, 29, 20, 9,
+ 10, 2, 64, 68, 86, 98, 89, 81, 83, 77, 67, 73,
+ 78, 67, 64, 0, 0, 5, 8, 3, 62, 61, 56, 49, 44,
+ 38, 24, 11, 74 },
+
+ {
+
+ 62,
+ 8, 75, 62, 8, 75, 109, 89, 14, 10, 8, 16, 47,
+ 50, 54, 14, 94, 3, 69, 5, 1, 7, 70, 3, 75, 15,
+ 45, 83, 100, 109, 98, 91, 67, 69, 5, 1, 82,
+ 84, 6, 13, 67, 77, 89, 5, 71, 82, 89, 5, 74,
+ 87, 6, 71, 79, 88, 1, 70, 71, 76, 67, 5, 22,
+ 0, 0, 0, 77, 88, 97, 67, 14, 0, 52, 8, 69,
+ 116, 93, 82, 69, 93, 110, 107, 112, 83, 110,
+ 100, 108, 112, 107, 112, 108, 8, 66, 74, 95,
+ 68, 88, 83, 108, 73, 93, 86, 114, 10, 78, 71,
+ 111, 88, 80, 75, 67, 3, 65, 76, 6, 3, 66, 4,
+ 64, 64, 2, 76, 13, 7, 22, 19, 19, 18, 17, 16,
+ 27, 12, 9, 27, 20, 70, 99, 90, 92, 82, 86, 81,
+ 74, 79, 78, 78, 82, 70, 72, 70, 70, 97, 90,
+ 95, 72, 73, 68, 69, 8, 3, 1, 65, 13, 1, 1, 64,
+ 1, 79, 48, 45, 33, 37, 45, 36, 20, 38, 31, 18,
+ 24, 16, 5, 1, 65, 18, 12, 15, 15, 11, 8, 15,
+ 8, 11, 10, 2, 2, 7, 89, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 57, 54, 53, 44, 39, 21, 51,
+ 44, 25, 56, 46, 48, 43, 37, 35, 37, 26, 17, 7,
+ 10, 73, 75, 98, 69, 25, 17, 11, 3, 5, 0, 70,
+ 73, 85, 0, 40, 26, 21, 17, 9, 0, 71, 73, 79,
+ 84, 18, 10, 5, 1, 65, 71, 75, 82, 99, 90, 71,
+ 72, 69, 71, 77, 87, 90, 105, 73, 34, 18, 12,
+ 3, 1, 72, 78, 81, 88, 62, 89, 80, 74, 66, 70,
+ 1, 5, 6, 71, 3, 8, 3, 78, 71, 69, 13, 6, 18,
+ 15, 26, 27, 19, 15, 20, 18, 15, 68, 7, 69, 96,
+ 90, 86, 83, 80, 80, 79, 79, 68, 75, 74, 72,
+ 64, 75, 82, 94, 86, 96, 76, 70, 70, 68, 68,
+ 65, 64, 0, 64, 67, 66, 2, 67, 77, 48, 41, 43,
+ 42, 39, 44, 40, 38, 33, 32, 34, 29, 11, 13, 6,
+ 31, 29, 26, 17, 21, 17, 5, 0, 67, 75, 79, 92,
+ 74, 101, 62, 62, 62, 62, 62, 60, 57, 53, 50,
+ 47, 43, 38, 30, 14, 66, 48, 47, 20, 51, 57,
+ 45, 39, 42, 37, 32, 37, 30, 28, 16, 16, 6, 77,
+ 23, 13, 65, 109, 99, 102, 78, 82, 80, 9, 71,
+ 71, 68, 2, 22, 10, 28, 67, 62, 60, 53, 47, 33,
+ 29, 2, 74, 93, 73, 41, 29, 20, 9, 10, 2, 64,
+ 68, 86, 97, 88, 80, 82, 76, 66, 73, 77, 66, 0,
+ 1, 1, 5, 9, 3, 60, 59, 54, 47, 42, 36, 22, 9,
+ 75 },
+
+ {
+
+ 62,
+ 8, 76, 62, 8, 76, 107, 88, 15, 10, 8, 15, 46,
+ 49, 54, 14, 92, 3, 69, 6, 1, 6, 70, 2, 76, 14,
+ 44, 84, 101, 110, 95, 90, 67, 69, 6, 1, 81,
+ 83, 7, 12, 66, 76, 88, 4, 71, 82, 89, 5, 73,
+ 87, 6, 71, 78, 88, 1, 70, 71, 76, 67, 5, 22,
+ 0, 0, 0, 76, 88, 97, 67, 14, 64, 52, 8, 69,
+ 115, 93, 82, 69, 92, 108, 105, 110, 82, 108,
+ 99, 106, 111, 106, 111, 107, 9, 66, 73, 95,
+ 67, 87, 83, 106, 73, 92, 86, 113, 10, 78, 71,
+ 109, 88, 79, 74, 67, 3, 65, 75, 6, 3, 66, 4,
+ 64, 64, 2, 76, 13, 7, 22, 19, 18, 18, 17, 16,
+ 25, 12, 9, 25, 19, 71, 99, 89, 92, 82, 85, 81,
+ 73, 79, 78, 78, 80, 70, 72, 70, 71, 96, 89,
+ 95, 72, 73, 67, 68, 7, 3, 1, 65, 12, 2, 0, 65,
+ 1, 80, 47, 44, 33, 37, 44, 36, 20, 37, 30, 17,
+ 23, 16, 5, 1, 65, 17, 11, 14, 14, 11, 7, 14,
+ 8, 9, 9, 2, 1, 6, 89, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 54, 52, 51, 41, 36, 18, 49, 42,
+ 24, 54, 43, 45, 40, 35, 32, 35, 23, 15, 6, 8,
+ 74, 76, 98, 68, 26, 17, 12, 3, 5, 0, 70, 73,
+ 84, 0, 40, 27, 21, 17, 10, 1, 71, 72, 79, 83,
+ 19, 10, 6, 1, 64, 70, 75, 81, 98, 89, 70, 71,
+ 68, 71, 77, 87, 89, 103, 72, 34, 18, 12, 4, 1,
+ 72, 77, 81, 87, 62, 88, 79, 73, 66, 70, 1, 5,
+ 6, 71, 4, 8, 4, 78, 71, 70, 13, 6, 17, 14, 25,
+ 26, 19, 14, 19, 18, 15, 68, 6, 70, 95, 89, 85,
+ 83, 79, 80, 78, 79, 68, 74, 73, 71, 64, 75,
+ 82, 94, 85, 96, 76, 70, 70, 67, 68, 65, 0, 64,
+ 64, 67, 66, 1, 67, 78, 47, 40, 43, 41, 38, 44,
+ 40, 37, 32, 32, 33, 28, 10, 12, 5, 30, 28, 24,
+ 15, 19, 15, 4, 0, 67, 75, 79, 91, 75, 100, 62,
+ 62, 62, 62, 62, 58, 55, 51, 48, 44, 41, 35,
+ 28, 13, 66, 47, 46, 18, 49, 54, 43, 37, 40,
+ 35, 30, 34, 28, 26, 14, 14, 5, 79, 21, 11, 67,
+ 108, 98, 101, 77, 81, 79, 10, 70, 70, 68, 3,
+ 24, 11, 30, 66, 61, 59, 51, 44, 30, 27, 64,
+ 76, 95, 72, 41, 29, 20, 9, 10, 2, 64, 68, 85,
+ 96, 88, 79, 81, 75, 66, 72, 76, 66, 0, 1, 1,
+ 6, 9, 3, 59, 58, 52, 45, 41, 35, 19, 8, 76 },
+
+ {
+
+ 62,
+ 8, 76, 62, 8, 76, 106, 86, 15, 10, 7, 13, 44,
+ 48, 54, 14, 90, 3, 68, 7, 1, 5, 70, 1, 77, 14,
+ 42, 86, 102, 110, 92, 89, 67, 68, 7, 1, 81,
+ 82, 7, 12, 66, 76, 87, 4, 72, 82, 89, 5, 73,
+ 86, 6, 72, 78, 88, 2, 70, 71, 76, 66, 5, 22,
+ 0, 0, 0, 76, 89, 97, 66, 13, 64, 52, 8, 69,
+ 114, 92, 82, 68, 91, 106, 103, 109, 81, 106,
+ 98, 104, 110, 106, 110, 106, 9, 66, 72, 94,
+ 67, 87, 82, 104, 73, 92, 85, 112, 10, 78, 70,
+ 108, 87, 78, 74, 67, 3, 65, 75, 7, 4, 67, 4,
+ 64, 64, 2, 76, 13, 6, 21, 19, 17, 18, 17, 16,
+ 23, 12, 8, 23, 18, 72, 98, 88, 91, 81, 85, 80,
+ 72, 78, 77, 77, 79, 70, 72, 70, 72, 96, 88,
+ 95, 72, 73, 66, 68, 6, 4, 1, 65, 11, 3, 0, 66,
+ 1, 81, 45, 43, 32, 36, 43, 35, 19, 36, 30, 15,
+ 22, 15, 5, 1, 66, 16, 10, 13, 13, 10, 6, 13,
+ 7, 7, 8, 1, 0, 5, 89, 62, 62, 61, 62, 62, 62,
+ 62, 62, 61, 52, 50, 48, 39, 34, 16, 47, 40,
+ 22, 52, 41, 43, 38, 33, 30, 32, 21, 12, 5, 6,
+ 75, 77, 98, 68, 26, 17, 12, 3, 5, 0, 70, 73,
+ 84, 0, 40, 27, 21, 17, 11, 1, 70, 72, 78, 83,
+ 20, 11, 6, 1, 64, 70, 74, 81, 97, 88, 69, 70,
+ 68, 71, 76, 86, 88, 102, 72, 34, 18, 12, 4, 2,
+ 71, 77, 80, 86, 62, 88, 79, 73, 66, 70, 1, 5,
+ 6, 71, 4, 8, 4, 78, 71, 70, 12, 7, 16, 13, 24,
+ 25, 19, 12, 18, 18, 15, 69, 5, 70, 95, 88, 84,
+ 82, 79, 79, 78, 78, 69, 74, 73, 71, 65, 75,
+ 82, 93, 84, 96, 76, 70, 70, 66, 68, 65, 1, 64,
+ 65, 67, 65, 0, 67, 79, 46, 40, 42, 40, 37, 43,
+ 39, 36, 31, 31, 32, 27, 9, 11, 4, 28, 27, 23,
+ 13, 18, 14, 3, 64, 68, 75, 79, 91, 75, 100,
+ 62, 62, 62, 62, 62, 56, 53, 48, 46, 42, 39,
+ 33, 26, 11, 67, 45, 44, 17, 47, 52, 41, 35,
+ 37, 33, 28, 32, 26, 23, 12, 12, 3, 80, 19, 9,
+ 68, 107, 97, 100, 76, 80, 78, 12, 69, 69, 68,
+ 4, 25, 13, 31, 65, 59, 57, 48, 41, 27, 24, 67,
+ 78, 96, 72, 41, 29, 20, 9, 10, 2, 64, 68, 85,
+ 95, 87, 78, 81, 74, 65, 72, 75, 65, 1, 2, 1,
+ 6, 9, 3, 58, 56, 50, 43, 39, 33, 17, 6, 77 },
+
+ {
+
+ 62,
+ 8, 76, 62, 8, 76, 104, 85, 15, 10, 7, 12, 43,
+ 46, 54, 14, 87, 2, 67, 7, 1, 5, 69, 0, 78, 13,
+ 40, 88, 103, 111, 89, 87, 67, 67, 7, 1, 81,
+ 81, 7, 11, 66, 76, 87, 4, 72, 81, 89, 5, 73,
+ 85, 5, 72, 78, 88, 2, 69, 70, 75, 66, 5, 22,
+ 0, 0, 0, 75, 89, 97, 66, 12, 64, 52, 8, 69,
+ 113, 91, 82, 68, 89, 105, 101, 107, 80, 105,
+ 96, 102, 109, 105, 109, 106, 10, 66, 71, 93,
+ 67, 86, 81, 102, 73, 92, 85, 110, 11, 77, 70,
+ 107, 87, 78, 74, 66, 4, 64, 74, 7, 4, 67, 3,
+ 64, 64, 2, 76, 13, 6, 20, 19, 17, 18, 17, 15,
+ 21, 12, 8, 22, 17, 73, 97, 87, 90, 80, 84, 79,
+ 71, 77, 77, 76, 78, 70, 72, 69, 73, 95, 87,
+ 95, 71, 73, 66, 68, 5, 5, 1, 65, 9, 4, 64, 67,
+ 1, 81, 44, 42, 32, 35, 42, 34, 18, 36, 30, 14,
+ 21, 15, 5, 1, 66, 16, 10, 12, 13, 9, 5, 13, 6,
+ 5, 7, 0, 64, 4, 89, 61, 62, 59, 62, 61, 60,
+ 60, 60, 59, 50, 48, 46, 36, 32, 13, 45, 39,
+ 20, 49, 39, 41, 36, 30, 28, 29, 19, 10, 4, 5,
+ 77, 77, 98, 68, 26, 18, 12, 3, 5, 0, 70, 73,
+ 83, 1, 41, 27, 21, 17, 12, 2, 69, 72, 77, 82,
+ 21, 12, 6, 2, 0, 69, 74, 80, 96, 88, 67, 70,
+ 68, 70, 75, 85, 88, 101, 71, 35, 19, 12, 4, 3,
+ 70, 76, 79, 85, 62, 88, 79, 72, 66, 70, 1, 5,
+ 6, 70, 4, 9, 5, 78, 71, 70, 11, 7, 15, 12, 23,
+ 25, 19, 11, 17, 17, 15, 70, 4, 71, 94, 88, 83,
+ 82, 78, 79, 77, 77, 69, 74, 73, 71, 66, 75,
+ 82, 93, 83, 96, 76, 69, 70, 66, 68, 65, 2, 64,
+ 65, 67, 64, 0, 68, 79, 45, 40, 42, 40, 36, 42,
+ 38, 35, 31, 30, 31, 26, 8, 10, 3, 27, 25, 21,
+ 11, 17, 13, 2, 65, 68, 75, 78, 91, 76, 99, 62,
+ 62, 62, 62, 60, 54, 51, 46, 44, 40, 37, 31,
+ 24, 10, 68, 43, 42, 16, 45, 50, 39, 33, 35,
+ 31, 26, 30, 24, 21, 10, 10, 1, 81, 17, 7, 70,
+ 106, 95, 99, 76, 79, 77, 14, 68, 68, 68, 5,
+ 27, 14, 33, 65, 58, 55, 46, 38, 25, 21, 69,
+ 80, 97, 72, 41, 29, 20, 9, 10, 2, 64, 68, 84,
+ 94, 86, 77, 80, 73, 64, 71, 74, 64, 2, 3, 2,
+ 7, 10, 3, 56, 55, 49, 42, 37, 31, 15, 4, 78 },
+
+ {
+
+ 61,
+ 8, 76, 61, 8, 76, 102, 83, 16, 10, 6, 10, 41,
+ 45, 54, 14, 85, 2, 66, 8, 1, 4, 69, 64, 79,
+ 13, 38, 89, 104, 111, 86, 86, 67, 66, 8, 1,
+ 80, 80, 8, 11, 66, 75, 86, 3, 73, 81, 89, 5,
+ 73, 85, 5, 72, 78, 88, 3, 69, 70, 75, 65, 5,
+ 22, 0, 0, 0, 75, 89, 97, 65, 11, 64, 52, 8,
+ 69, 112, 90, 82, 67, 88, 103, 99, 106, 79,
+ 103, 95, 100, 108, 104, 108, 105, 11, 66, 70,
+ 92, 67, 86, 81, 100, 73, 91, 84, 109, 11, 77,
+ 69, 105, 86, 77, 74, 66, 4, 64, 73, 8, 5, 68,
+ 3, 64, 64, 2, 76, 13, 5, 20, 19, 16, 18, 17,
+ 15, 19, 12, 7, 20, 16, 74, 96, 86, 89, 79, 83,
+ 79, 70, 77, 76, 75, 77, 70, 72, 69, 74, 95,
+ 86, 95, 71, 73, 65, 68, 4, 6, 1, 65, 8, 5, 64,
+ 68, 1, 82, 42, 41, 31, 34, 41, 33, 18, 35, 29,
+ 13, 20, 15, 5, 1, 67, 15, 9, 11, 12, 8, 4, 12,
+ 6, 3, 6, 0, 65, 3, 89, 60, 61, 58, 62, 59, 58,
+ 58, 58, 56, 47, 46, 43, 34, 29, 11, 43, 37,
+ 19, 47, 37, 38, 33, 28, 25, 27, 17, 7, 3, 3,
+ 78, 78, 98, 68, 27, 18, 12, 3, 5, 0, 70, 73,
+ 83, 1, 41, 27, 21, 17, 13, 2, 69, 71, 77, 81,
+ 22, 12, 7, 2, 1, 69, 73, 80, 95, 87, 66, 69,
+ 67, 70, 74, 84, 87, 100, 71, 35, 19, 12, 4, 4,
+ 70, 75, 78, 84, 62, 87, 78, 72, 66, 70, 1, 5,
+ 6, 70, 4, 9, 5, 78, 71, 70, 11, 8, 14, 11, 22,
+ 24, 19, 10, 16, 17, 15, 70, 3, 71, 93, 87, 82,
+ 81, 78, 78, 77, 76, 69, 74, 73, 70, 66, 75,
+ 82, 92, 82, 96, 76, 69, 70, 65, 68, 65, 3, 64,
+ 65, 67, 0, 64, 68, 80, 44, 39, 41, 39, 35, 41,
+ 37, 34, 30, 29, 30, 25, 7, 9, 2, 25, 24, 20,
+ 9, 15, 12, 1, 65, 69, 75, 78, 91, 76, 98, 62,
+ 62, 61, 61, 57, 52, 49, 43, 42, 38, 35, 28,
+ 22, 8, 69, 41, 41, 14, 43, 48, 37, 31, 33, 29,
+ 24, 28, 22, 18, 8, 8, 64, 82, 15, 5, 71, 105,
+ 94, 98, 75, 78, 76, 15, 67, 67, 68, 6, 28, 16,
+ 34, 64, 56, 54, 43, 35, 22, 18, 72, 82, 98,
+ 72, 41, 29, 20, 9, 10, 2, 64, 68, 84, 93, 85,
+ 76, 79, 72, 64, 71, 73, 64, 2, 3, 2, 7, 10, 3,
+ 55, 53, 47, 40, 35, 29, 13, 2, 79 },
+
+ {
+
+ 60,
+ 8, 76, 60, 8, 76, 100, 82, 16, 10, 6, 9, 40,
+ 44, 54, 14, 83, 2, 65, 9, 1, 3, 69, 65, 80,
+ 12, 36, 91, 105, 112, 83, 85, 67, 65, 9, 1,
+ 80, 79, 8, 10, 66, 75, 85, 3, 73, 81, 89, 5,
+ 73, 84, 5, 72, 78, 88, 3, 69, 70, 75, 65, 5,
+ 22, 0, 0, 0, 74, 89, 97, 65, 10, 64, 52, 8,
+ 69, 111, 89, 82, 67, 87, 101, 97, 104, 78,
+ 101, 94, 98, 107, 103, 107, 104, 12, 66, 69,
+ 91, 67, 85, 80, 98, 73, 91, 84, 108, 11, 77,
+ 69, 104, 86, 76, 74, 66, 4, 64, 72, 8, 5, 68,
+ 3, 64, 64, 2, 76, 13, 5, 19, 19, 15, 18, 17,
+ 15, 17, 12, 7, 18, 15, 75, 95, 85, 88, 78, 82,
+ 78, 69, 76, 76, 74, 76, 70, 72, 69, 75, 94,
+ 85, 95, 71, 73, 64, 68, 3, 7, 1, 65, 7, 6, 65,
+ 69, 1, 83, 41, 40, 31, 33, 40, 32, 17, 34, 29,
+ 12, 19, 15, 5, 1, 67, 14, 8, 10, 11, 7, 3, 11,
+ 5, 1, 5, 64, 66, 2, 89, 58, 60, 56, 60, 57,
+ 56, 56, 56, 54, 45, 44, 41, 31, 27, 8, 41, 35,
+ 17, 45, 35, 36, 31, 26, 23, 24, 15, 5, 2, 1,
+ 79, 79, 98, 68, 27, 18, 12, 3, 5, 0, 70, 73,
+ 82, 1, 41, 27, 21, 17, 14, 3, 68, 71, 76, 80,
+ 23, 13, 7, 2, 2, 68, 73, 79, 94, 86, 65, 68,
+ 67, 70, 73, 83, 86, 99, 70, 35, 19, 12, 4, 5,
+ 69, 74, 77, 83, 62, 87, 78, 71, 66, 70, 1, 5,
+ 6, 70, 4, 9, 6, 78, 71, 70, 10, 8, 13, 10, 21,
+ 23, 19, 9, 15, 17, 15, 71, 2, 72, 92, 86, 81,
+ 81, 77, 78, 76, 75, 69, 74, 73, 70, 67, 75,
+ 82, 92, 81, 96, 76, 69, 70, 64, 68, 65, 4, 64,
+ 65, 67, 1, 65, 68, 81, 43, 39, 41, 38, 34, 40,
+ 36, 33, 29, 28, 29, 24, 6, 8, 1, 24, 23, 18,
+ 7, 14, 11, 0, 66, 69, 75, 78, 91, 77, 97, 62,
+ 62, 59, 59, 54, 50, 47, 41, 40, 36, 33, 26,
+ 20, 7, 70, 39, 39, 13, 41, 46, 35, 29, 31, 27,
+ 22, 26, 20, 16, 6, 6, 66, 83, 13, 3, 73, 104,
+ 93, 97, 74, 77, 75, 17, 66, 66, 68, 7, 30, 17,
+ 36, 0, 55, 52, 41, 32, 19, 15, 75, 84, 99, 72,
+ 41, 29, 20, 9, 10, 2, 64, 68, 83, 92, 84, 75,
+ 78, 71, 0, 70, 72, 0, 3, 4, 2, 8, 10, 3, 54,
+ 52, 45, 38, 33, 27, 11, 0, 80 },
+
+ {
+
+ 58,
+ 7, 77, 58, 7, 77, 99, 81, 16, 10, 5, 7, 38,
+ 42, 53, 14, 81, 1, 65, 9, 0, 2, 69, 67, 82,
+ 11, 34, 93, 106, 113, 81, 84, 68, 65, 9, 0,
+ 80, 78, 8, 9, 66, 75, 85, 2, 74, 81, 90, 5,
+ 73, 84, 4, 73, 78, 88, 3, 69, 70, 75, 65, 4,
+ 22, 0, 0, 0, 74, 90, 97, 65, 9, 65, 52, 7, 69,
+ 110, 89, 82, 67, 86, 100, 96, 103, 77, 100,
+ 93, 97, 106, 103, 106, 104, 12, 66, 69, 91,
+ 67, 85, 80, 97, 73, 91, 84, 107, 11, 77, 69,
+ 103, 86, 76, 74, 66, 4, 64, 72, 8, 5, 69, 2,
+ 64, 65, 2, 76, 12, 4, 18, 19, 14, 17, 17, 14,
+ 15, 11, 6, 16, 14, 76, 95, 85, 88, 78, 82, 78,
+ 68, 76, 76, 74, 75, 71, 72, 69, 77, 94, 85,
+ 95, 71, 74, 64, 68, 2, 7, 1, 65, 5, 6, 66, 70,
+ 1, 84, 39, 39, 30, 32, 39, 31, 16, 33, 28, 10,
+ 18, 14, 4, 1, 68, 13, 7, 9, 10, 6, 2, 10, 4,
+ 64, 3, 65, 68, 0, 89, 56, 58, 54, 58, 55, 53,
+ 53, 53, 51, 42, 41, 38, 28, 24, 5, 39, 33, 15,
+ 42, 32, 33, 28, 23, 20, 21, 12, 2, 1, 64, 81,
+ 80, 99, 68, 27, 18, 12, 3, 5, 64, 70, 73, 82,
+ 1, 41, 27, 21, 17, 15, 3, 68, 71, 76, 80, 23,
+ 13, 7, 2, 2, 68, 73, 79, 93, 86, 64, 68, 67,
+ 70, 73, 83, 86, 98, 70, 35, 19, 12, 4, 5, 69,
+ 74, 77, 83, 62, 87, 78, 71, 66, 70, 1, 5, 6,
+ 70, 4, 9, 6, 78, 71, 71, 9, 8, 12, 9, 20, 22,
+ 18, 7, 13, 16, 14, 72, 0, 73, 92, 86, 80, 81,
+ 77, 78, 76, 75, 70, 74, 73, 70, 68, 75, 82,
+ 92, 81, 97, 76, 69, 70, 64, 69, 65, 4, 65, 66,
+ 67, 1, 66, 69, 82, 42, 38, 40, 37, 32, 39, 35,
+ 32, 28, 27, 28, 23, 5, 6, 64, 22, 21, 16, 5,
+ 12, 9, 64, 67, 70, 75, 78, 91, 78, 97, 62, 61,
+ 57, 56, 51, 47, 44, 38, 37, 33, 30, 23, 17, 5,
+ 71, 37, 37, 11, 39, 43, 32, 26, 28, 24, 20,
+ 23, 17, 13, 4, 3, 68, 85, 11, 1, 75, 103, 92,
+ 96, 74, 77, 75, 18, 66, 66, 68, 7, 31, 18, 37,
+ 0, 53, 50, 38, 28, 16, 12, 78, 87, 101, 72,
+ 41, 28, 19, 9, 10, 2, 65, 68, 83, 92, 84, 75,
+ 78, 70, 0, 70, 72, 0, 3, 4, 2, 8, 10, 2, 52,
+ 50, 43, 36, 31, 25, 8, 65, 81 },
+
+ {
+
+ 57,
+ 7, 77, 57, 7, 77, 97, 79, 17, 11, 5, 6, 37,
+ 41, 53, 14, 78, 1, 64, 10, 0, 2, 68, 68, 83,
+ 11, 33, 94, 107, 113, 78, 82, 68, 64, 10, 0,
+ 79, 76, 9, 9, 65, 74, 84, 2, 74, 80, 90, 5,
+ 72, 83, 4, 73, 77, 88, 4, 68, 69, 74, 64, 4,
+ 22, 0, 0, 0, 73, 90, 97, 64, 9, 65, 52, 7, 69,
+ 108, 88, 82, 66, 84, 98, 94, 101, 75, 98, 91,
+ 95, 104, 102, 105, 103, 13, 65, 68, 90, 66,
+ 84, 79, 95, 72, 90, 83, 105, 12, 76, 68, 101,
+ 85, 75, 73, 65, 5, 0, 71, 9, 6, 69, 2, 0, 65,
+ 2, 75, 12, 4, 18, 19, 14, 17, 17, 14, 14, 11,
+ 6, 15, 13, 76, 94, 84, 87, 77, 81, 77, 67, 75,
+ 75, 73, 73, 71, 72, 68, 78, 93, 84, 95, 70,
+ 74, 0, 67, 2, 8, 1, 65, 4, 7, 66, 71, 1, 84,
+ 38, 39, 30, 32, 39, 31, 16, 33, 28, 9, 18, 14,
+ 4, 1, 68, 13, 7, 9, 10, 6, 1, 10, 4, 65, 2,
+ 65, 69, 64, 89, 55, 57, 53, 57, 54, 51, 51,
+ 51, 49, 40, 39, 36, 26, 22, 3, 38, 32, 14, 40,
+ 30, 31, 26, 21, 18, 19, 10, 0, 1, 65, 82, 80,
+ 99, 67, 28, 19, 13, 4, 5, 64, 69, 72, 81, 2,
+ 42, 28, 22, 17, 16, 4, 67, 70, 75, 79, 24, 14,
+ 8, 3, 3, 67, 72, 78, 91, 85, 1, 67, 66, 69,
+ 72, 82, 85, 96, 69, 36, 20, 13, 5, 6, 68, 73,
+ 76, 82, 62, 86, 77, 70, 66, 69, 1, 6, 7, 69,
+ 5, 10, 7, 77, 71, 71, 9, 9, 12, 9, 19, 22, 18,
+ 6, 12, 16, 14, 72, 64, 73, 91, 85, 79, 80, 76,
+ 77, 75, 74, 70, 73, 72, 69, 68, 74, 81, 91,
+ 80, 97, 76, 68, 70, 0, 69, 65, 5, 65, 66, 66,
+ 2, 66, 69, 82, 42, 38, 40, 37, 31, 39, 35, 32,
+ 28, 27, 28, 23, 5, 5, 65, 21, 20, 15, 4, 11,
+ 8, 64, 67, 70, 74, 77, 90, 78, 96, 60, 59, 55,
+ 54, 49, 45, 42, 36, 35, 31, 28, 21, 15, 4, 71,
+ 36, 36, 10, 38, 41, 30, 24, 26, 22, 18, 21,
+ 15, 11, 3, 1, 69, 86, 10, 0, 76, 101, 90, 94,
+ 73, 76, 74, 20, 65, 65, 68, 8, 33, 20, 39, 1,
+ 52, 49, 36, 25, 14, 10, 80, 89, 102, 71, 42,
+ 28, 19, 9, 11, 2, 65, 68, 82, 91, 83, 74, 77,
+ 68, 1, 69, 71, 1, 4, 5, 3, 9, 11, 2, 51, 49,
+ 42, 35, 30, 24, 6, 66, 81 },
+
+ {
+
+ 56,
+ 7, 77, 56, 7, 77, 95, 78, 17, 11, 5, 5, 36,
+ 40, 53, 14, 76, 1, 0, 11, 0, 1, 68, 69, 84,
+ 10, 31, 96, 108, 114, 75, 81, 68, 0, 11, 0,
+ 79, 75, 9, 8, 65, 74, 83, 2, 74, 80, 90, 5,
+ 72, 82, 4, 73, 77, 88, 4, 68, 69, 74, 64, 4,
+ 22, 0, 0, 0, 72, 90, 97, 64, 8, 65, 52, 7, 69,
+ 107, 87, 82, 66, 83, 96, 92, 100, 74, 96, 90,
+ 93, 103, 101, 104, 102, 14, 65, 67, 89, 66,
+ 84, 78, 93, 72, 90, 83, 104, 12, 76, 68, 100,
+ 85, 74, 73, 65, 5, 0, 70, 9, 6, 70, 2, 0, 65,
+ 2, 75, 12, 4, 17, 19, 13, 17, 17, 14, 12, 11,
+ 6, 13, 12, 77, 93, 83, 86, 76, 80, 76, 66, 74,
+ 75, 72, 72, 71, 72, 68, 79, 93, 83, 95, 70,
+ 74, 1, 67, 1, 9, 1, 65, 3, 8, 67, 72, 1, 85,
+ 36, 38, 29, 31, 38, 30, 15, 32, 28, 8, 17, 14,
+ 4, 1, 68, 12, 6, 8, 9, 5, 0, 9, 3, 67, 1, 66,
+ 70, 65, 89, 53, 56, 51, 55, 52, 49, 49, 49,
+ 46, 38, 37, 33, 23, 20, 0, 36, 30, 12, 38, 28,
+ 29, 24, 19, 16, 16, 8, 65, 0, 67, 83, 81, 99,
+ 67, 28, 19, 13, 4, 5, 64, 69, 72, 80, 2, 42,
+ 28, 22, 17, 17, 4, 66, 70, 74, 78, 25, 15, 8,
+ 3, 4, 67, 72, 77, 90, 84, 2, 66, 66, 69, 71,
+ 81, 84, 95, 69, 36, 20, 13, 5, 7, 67, 72, 75,
+ 81, 62, 86, 77, 70, 66, 69, 1, 6, 7, 69, 5,
+ 10, 8, 77, 71, 71, 8, 9, 11, 8, 18, 21, 18, 5,
+ 11, 16, 14, 73, 65, 74, 90, 84, 78, 80, 76,
+ 77, 74, 73, 70, 73, 72, 69, 69, 74, 81, 91,
+ 79, 97, 76, 68, 70, 1, 69, 65, 6, 65, 66, 66,
+ 3, 67, 69, 83, 41, 38, 40, 36, 30, 38, 34, 31,
+ 27, 26, 27, 22, 4, 4, 66, 19, 19, 13, 2, 10,
+ 7, 65, 68, 70, 74, 77, 90, 79, 95, 58, 57, 53,
+ 52, 46, 43, 40, 33, 33, 29, 26, 19, 13, 3, 72,
+ 34, 34, 9, 36, 39, 28, 22, 24, 20, 16, 19, 13,
+ 9, 1, 64, 71, 87, 8, 65, 78, 100, 89, 93, 72,
+ 75, 73, 22, 64, 64, 68, 9, 34, 21, 40, 2, 51,
+ 47, 33, 22, 11, 7, 83, 91, 103, 71, 42, 28,
+ 19, 9, 11, 2, 65, 68, 81, 90, 82, 73, 76, 67,
+ 2, 68, 70, 2, 5, 6, 3, 10, 11, 2, 50, 47, 40,
+ 33, 28, 22, 4, 68, 82 },
+
+ {
+
+ 55,
+ 7, 77, 55, 7, 77, 93, 76, 18, 11, 4, 3, 34,
+ 39, 53, 14, 74, 1, 1, 12, 0, 0, 68, 70, 85,
+ 10, 29, 97, 109, 114, 72, 80, 68, 1, 12, 0,
+ 78, 74, 10, 8, 65, 73, 82, 1, 75, 80, 90, 5,
+ 72, 82, 4, 73, 77, 88, 5, 68, 69, 74, 0, 4,
+ 22, 0, 0, 0, 72, 90, 97, 0, 7, 65, 52, 7, 69,
+ 106, 86, 82, 65, 82, 94, 90, 98, 73, 94, 89,
+ 91, 102, 100, 103, 101, 15, 65, 66, 88, 66,
+ 83, 78, 91, 72, 89, 82, 103, 12, 76, 67, 98,
+ 84, 73, 73, 65, 5, 0, 69, 10, 7, 70, 2, 0, 65,
+ 2, 75, 12, 3, 17, 19, 12, 17, 17, 14, 10, 11,
+ 5, 11, 11, 78, 92, 82, 85, 75, 79, 76, 65, 74,
+ 74, 71, 71, 71, 72, 68, 80, 92, 82, 95, 70,
+ 74, 2, 67, 0, 10, 1, 65, 2, 9, 67, 73, 1, 86,
+ 35, 37, 29, 30, 37, 29, 15, 31, 27, 7, 16, 14,
+ 4, 1, 69, 11, 5, 7, 8, 4, 64, 8, 3, 69, 0, 66,
+ 71, 66, 89, 52, 54, 50, 53, 50, 47, 47, 47,
+ 44, 35, 35, 31, 21, 17, 65, 34, 28, 11, 36,
+ 26, 26, 21, 17, 13, 14, 6, 68, 64, 69, 84, 82,
+ 99, 67, 29, 19, 13, 4, 5, 64, 69, 72, 80, 2,
+ 42, 28, 22, 17, 18, 5, 66, 69, 74, 77, 26, 15,
+ 9, 3, 5, 66, 71, 77, 89, 83, 3, 65, 65, 69,
+ 70, 80, 83, 94, 68, 36, 20, 13, 5, 8, 67, 71,
+ 74, 80, 62, 85, 76, 69, 66, 69, 1, 6, 7, 69,
+ 5, 10, 8, 77, 71, 71, 8, 10, 10, 7, 17, 20,
+ 18, 4, 10, 16, 14, 73, 66, 74, 89, 83, 77, 79,
+ 75, 76, 74, 72, 70, 73, 72, 68, 69, 74, 81,
+ 90, 78, 97, 76, 68, 70, 2, 69, 65, 7, 65, 66,
+ 66, 4, 68, 69, 84, 40, 37, 39, 35, 29, 37, 33,
+ 30, 26, 25, 26, 21, 3, 3, 67, 18, 18, 12, 0,
+ 8, 6, 66, 68, 71, 74, 77, 90, 79, 94, 56, 55,
+ 51, 50, 43, 41, 38, 31, 31, 27, 24, 16, 11, 1,
+ 73, 32, 33, 7, 34, 37, 26, 20, 22, 18, 14, 17,
+ 11, 6, 64, 66, 73, 88, 6, 67, 79, 99, 88, 92,
+ 71, 74, 72, 23, 0, 0, 68, 10, 36, 23, 42, 3,
+ 49, 46, 31, 19, 8, 4, 86, 93, 104, 71, 42, 28,
+ 19, 9, 11, 2, 65, 68, 81, 89, 81, 72, 75, 66,
+ 2, 68, 69, 2, 5, 6, 3, 10, 11, 2, 49, 46, 38,
+ 31, 26, 20, 2, 70, 83 },
+
+ {
+
+ 53,
+ 7, 77, 53, 7, 77, 92, 75, 18, 11, 4, 2, 33,
+ 37, 53, 14, 71, 0, 2, 12, 0, 64, 68, 71, 86,
+ 9, 27, 99, 110, 115, 69, 79, 68, 2, 12, 0, 78,
+ 73, 10, 7, 65, 73, 82, 1, 75, 79, 90, 5, 72,
+ 81, 3, 74, 77, 88, 5, 67, 69, 73, 0, 4, 22, 0,
+ 0, 0, 71, 91, 97, 0, 6, 65, 52, 7, 69, 105,
+ 85, 82, 65, 80, 93, 88, 97, 72, 93, 87, 89,
+ 101, 100, 102, 101, 15, 65, 65, 87, 66, 83,
+ 77, 89, 72, 89, 82, 102, 12, 75, 67, 97, 84,
+ 73, 73, 64, 5, 0, 69, 10, 7, 71, 1, 0, 65, 2,
+ 75, 12, 3, 16, 19, 12, 17, 17, 13, 8, 11, 5,
+ 9, 10, 79, 91, 81, 84, 74, 79, 75, 64, 73, 74,
+ 70, 70, 71, 72, 67, 81, 92, 81, 95, 70, 74, 2,
+ 67, 64, 11, 1, 65, 0, 10, 68, 74, 1, 87, 33,
+ 36, 28, 29, 36, 28, 14, 30, 27, 5, 15, 13, 4,
+ 1, 69, 10, 5, 6, 8, 3, 65, 7, 2, 71, 64, 67,
+ 72, 67, 89, 50, 53, 48, 51, 48, 45, 44, 45,
+ 41, 33, 33, 28, 18, 15, 68, 32, 27, 9, 33, 24,
+ 24, 19, 14, 11, 11, 4, 70, 65, 70, 86, 83, 99,
+ 67, 29, 20, 13, 4, 5, 64, 69, 72, 79, 3, 43,
+ 28, 22, 17, 19, 5, 65, 69, 73, 77, 27, 16, 9,
+ 3, 5, 66, 71, 76, 88, 83, 4, 65, 65, 69, 69,
+ 79, 83, 93, 68, 37, 20, 13, 5, 9, 66, 71, 73,
+ 79, 62, 85, 76, 69, 66, 69, 1, 6, 7, 68, 5,
+ 10, 9, 77, 71, 71, 7, 10, 9, 6, 16, 19, 18, 2,
+ 9, 15, 14, 74, 67, 75, 89, 83, 76, 79, 75, 76,
+ 73, 71, 71, 73, 72, 68, 70, 74, 81, 90, 77,
+ 97, 76, 67, 70, 2, 69, 65, 8, 65, 67, 66, 5,
+ 68, 70, 85, 39, 37, 39, 34, 28, 36, 32, 29,
+ 25, 24, 25, 20, 2, 2, 68, 16, 16, 10, 65, 7,
+ 5, 67, 69, 71, 74, 77, 90, 80, 94, 53, 52, 49,
+ 47, 40, 39, 36, 28, 29, 25, 22, 14, 9, 0, 74,
+ 30, 31, 6, 32, 35, 24, 18, 19, 16, 12, 15, 9,
+ 4, 66, 68, 75, 89, 4, 69, 81, 98, 87, 91, 71,
+ 73, 71, 25, 1, 1, 68, 11, 37, 24, 43, 3, 48,
+ 44, 28, 16, 5, 1, 89, 95, 105, 71, 42, 28, 19,
+ 9, 11, 2, 65, 68, 80, 88, 80, 71, 75, 65, 3,
+ 67, 68, 3, 6, 7, 4, 11, 12, 2, 47, 44, 36, 29,
+ 24, 18, 0, 72, 84 },
+
+ {
+
+ 52,
+ 7, 77, 52, 7, 77, 90, 73, 18, 11, 3, 0, 31,
+ 36, 53, 14, 69, 0, 3, 13, 0, 64, 67, 72, 87,
+ 9, 25, 101, 111, 115, 66, 77, 68, 3, 13, 0,
+ 78, 72, 10, 7, 65, 73, 81, 1, 76, 79, 90, 5,
+ 72, 80, 3, 74, 77, 88, 6, 67, 68, 73, 1, 4,
+ 22, 0, 0, 0, 71, 91, 97, 1, 5, 65, 52, 7, 69,
+ 104, 84, 82, 64, 79, 91, 86, 95, 71, 91, 86,
+ 87, 100, 99, 101, 100, 16, 65, 64, 86, 66, 82,
+ 76, 87, 72, 89, 81, 100, 13, 75, 66, 96, 83,
+ 72, 73, 64, 6, 1, 68, 11, 8, 71, 1, 0, 65, 2,
+ 75, 12, 2, 15, 19, 11, 17, 17, 13, 6, 11, 4,
+ 8, 9, 80, 90, 80, 83, 73, 78, 74, 0, 72, 73,
+ 69, 69, 71, 72, 67, 82, 91, 80, 95, 69, 74, 3,
+ 67, 65, 12, 1, 65, 64, 11, 68, 75, 1, 87, 32,
+ 35, 28, 28, 35, 27, 13, 30, 27, 4, 14, 13, 4,
+ 1, 70, 10, 4, 5, 7, 2, 66, 7, 1, 73, 65, 68,
+ 73, 68, 89, 48, 52, 46, 49, 47, 43, 42, 43,
+ 39, 31, 31, 26, 16, 13, 70, 30, 25, 7, 31, 22,
+ 22, 17, 12, 9, 8, 2, 73, 66, 72, 87, 83, 99,
+ 67, 29, 20, 13, 4, 5, 64, 69, 72, 79, 3, 43,
+ 28, 22, 17, 20, 6, 64, 69, 72, 76, 28, 17, 9,
+ 4, 6, 65, 70, 76, 87, 82, 6, 64, 65, 68, 68,
+ 78, 82, 92, 67, 37, 21, 13, 5, 10, 65, 70, 72,
+ 78, 62, 85, 76, 68, 66, 69, 1, 6, 7, 68, 5,
+ 11, 9, 77, 71, 71, 6, 11, 8, 5, 15, 19, 18, 1,
+ 8, 15, 14, 75, 68, 75, 88, 82, 75, 78, 74, 75,
+ 73, 70, 71, 73, 72, 68, 71, 74, 81, 89, 76,
+ 97, 76, 67, 70, 3, 69, 65, 9, 65, 67, 66, 6,
+ 69, 70, 85, 38, 37, 38, 34, 27, 35, 31, 28,
+ 25, 23, 24, 19, 1, 1, 69, 15, 15, 9, 67, 6, 4,
+ 68, 70, 72, 74, 76, 90, 80, 93, 51, 50, 47,
+ 45, 38, 37, 34, 26, 27, 23, 20, 12, 7, 65, 75,
+ 28, 29, 5, 30, 33, 22, 16, 17, 14, 10, 13, 7,
+ 1, 68, 70, 77, 90, 2, 71, 82, 97, 85, 90, 70,
+ 72, 70, 27, 2, 2, 68, 12, 39, 26, 45, 4, 46,
+ 42, 26, 13, 3, 65, 91, 97, 106, 71, 42, 28,
+ 19, 9, 11, 2, 65, 68, 80, 87, 79, 70, 74, 64,
+ 4, 67, 67, 4, 7, 8, 4, 11, 12, 2, 46, 43, 35,
+ 28, 22, 16, 65, 74, 85 },
+
+ {
+
+ 51,
+ 7, 78, 51, 7, 78, 88, 72, 19, 11, 3, 64, 30,
+ 35, 53, 14, 67, 0, 3, 14, 0, 65, 67, 73, 88,
+ 8, 24, 102, 112, 116, 0, 76, 68, 3, 14, 0, 77,
+ 71, 11, 6, 64, 72, 80, 0, 76, 79, 90, 5, 71,
+ 80, 3, 74, 76, 88, 6, 67, 68, 73, 1, 4, 22, 0,
+ 0, 0, 70, 91, 97, 1, 5, 66, 52, 7, 69, 103,
+ 84, 82, 64, 78, 89, 84, 94, 70, 89, 85, 85,
+ 99, 98, 100, 99, 17, 65, 0, 86, 65, 82, 76,
+ 85, 72, 88, 81, 99, 13, 75, 66, 94, 83, 71,
+ 72, 64, 6, 1, 67, 11, 8, 72, 1, 0, 65, 2, 75,
+ 12, 2, 15, 19, 10, 17, 17, 13, 4, 11, 4, 6, 8,
+ 81, 90, 79, 83, 73, 77, 74, 1, 72, 73, 69, 67,
+ 71, 72, 67, 83, 91, 79, 95, 69, 74, 4, 66, 66,
+ 12, 1, 65, 65, 12, 69, 76, 1, 88, 30, 34, 27,
+ 28, 34, 27, 13, 29, 26, 3, 13, 13, 4, 1, 70,
+ 9, 3, 4, 6, 2, 67, 6, 1, 75, 66, 68, 74, 69,
+ 89, 47, 50, 45, 47, 45, 41, 40, 41, 36, 28,
+ 29, 23, 13, 10, 73, 28, 23, 6, 29, 19, 19, 14,
+ 10, 6, 6, 64, 75, 67, 74, 88, 84, 99, 66, 30,
+ 20, 14, 4, 5, 64, 69, 72, 78, 3, 43, 29, 22,
+ 17, 21, 6, 64, 68, 72, 75, 29, 17, 10, 4, 7,
+ 65, 70, 75, 86, 81, 7, 0, 64, 68, 68, 78, 81,
+ 90, 67, 37, 21, 13, 6, 10, 65, 69, 72, 77, 62,
+ 84, 75, 68, 66, 69, 1, 6, 7, 68, 6, 11, 10,
+ 77, 71, 72, 6, 11, 7, 4, 14, 18, 18, 0, 7, 15,
+ 14, 75, 69, 76, 87, 81, 74, 78, 74, 75, 72,
+ 70, 71, 72, 71, 67, 71, 74, 81, 89, 75, 97,
+ 76, 67, 70, 4, 69, 65, 10, 66, 67, 66, 6, 70,
+ 70, 86, 37, 36, 38, 33, 26, 35, 31, 27, 24,
+ 23, 23, 18, 0, 0, 70, 13, 14, 7, 69, 4, 2, 69,
+ 70, 72, 74, 76, 89, 81, 92, 49, 48, 45, 43,
+ 35, 35, 32, 23, 25, 20, 18, 9, 5, 66, 75, 27,
+ 28, 3, 28, 30, 20, 14, 15, 12, 8, 10, 5, 64,
+ 70, 72, 78, 92, 0, 73, 84, 96, 84, 89, 69, 71,
+ 69, 28, 3, 3, 68, 13, 40, 27, 46, 5, 45, 41,
+ 23, 10, 0, 67, 94, 99, 108, 70, 42, 28, 19, 9,
+ 11, 2, 65, 68, 79, 86, 79, 69, 73, 0, 4, 66,
+ 66, 4, 7, 8, 4, 12, 12, 2, 45, 41, 33, 26, 21,
+ 15, 68, 75, 86 },
+
+ {
+
+ 50,
+ 7, 78, 50, 7, 78, 86, 70, 19, 11, 2, 66, 28,
+ 33, 53, 14, 64, 64, 4, 14, 0, 66, 67, 74, 89,
+ 8, 22, 104, 113, 116, 3, 75, 68, 4, 14, 0, 77,
+ 70, 11, 6, 64, 72, 80, 0, 77, 78, 90, 5, 71,
+ 79, 2, 74, 76, 88, 7, 66, 68, 72, 2, 4, 22, 0,
+ 0, 0, 70, 91, 97, 2, 4, 66, 52, 7, 69, 102,
+ 83, 82, 0, 76, 88, 82, 92, 69, 88, 83, 83, 98,
+ 97, 99, 99, 18, 65, 1, 85, 65, 81, 75, 83, 72,
+ 88, 80, 98, 13, 74, 65, 93, 82, 71, 72, 0, 6,
+ 1, 66, 12, 9, 72, 0, 0, 65, 2, 75, 12, 1, 14,
+ 19, 10, 17, 17, 12, 2, 11, 3, 4, 7, 82, 89,
+ 78, 82, 72, 76, 73, 2, 71, 72, 68, 66, 71, 72,
+ 66, 84, 90, 78, 95, 69, 74, 4, 66, 67, 13, 1,
+ 65, 67, 13, 69, 77, 1, 89, 29, 33, 27, 27, 33,
+ 26, 12, 28, 26, 2, 12, 13, 4, 1, 71, 8, 3, 3,
+ 6, 1, 68, 5, 0, 77, 67, 69, 75, 70, 89, 45,
+ 49, 43, 45, 43, 39, 37, 39, 34, 26, 27, 21,
+ 11, 8, 75, 26, 22, 4, 26, 17, 17, 12, 7, 4, 3,
+ 66, 78, 68, 75, 90, 85, 99, 66, 30, 21, 14, 4,
+ 5, 64, 69, 72, 78, 4, 44, 29, 22, 17, 22, 7,
+ 0, 68, 71, 74, 30, 18, 10, 4, 8, 64, 69, 75,
+ 85, 81, 8, 0, 64, 68, 67, 77, 81, 89, 66, 38,
+ 21, 13, 6, 11, 64, 68, 71, 76, 62, 84, 75, 67,
+ 66, 69, 1, 6, 7, 67, 6, 11, 10, 77, 71, 72, 5,
+ 12, 6, 3, 13, 17, 18, 64, 6, 14, 14, 76, 70,
+ 76, 86, 81, 73, 77, 73, 74, 72, 69, 71, 72,
+ 71, 67, 72, 74, 81, 88, 74, 97, 76, 66, 70, 4,
+ 69, 65, 11, 66, 67, 66, 7, 70, 71, 87, 36, 36,
+ 37, 32, 25, 34, 30, 26, 23, 22, 22, 17, 64,
+ 64, 71, 12, 12, 6, 71, 3, 1, 70, 71, 73, 74,
+ 76, 89, 81, 91, 47, 46, 43, 40, 32, 33, 30,
+ 21, 23, 18, 16, 7, 3, 68, 76, 25, 26, 2, 26,
+ 28, 18, 12, 13, 10, 6, 8, 3, 67, 72, 74, 80,
+ 93, 65, 75, 85, 95, 83, 88, 69, 70, 68, 30, 4,
+ 4, 68, 14, 42, 29, 48, 5, 43, 39, 21, 7, 66,
+ 70, 97, 101, 109, 70, 42, 28, 19, 9, 11, 2,
+ 65, 68, 79, 85, 78, 68, 72, 1, 5, 66, 65, 5,
+ 8, 9, 5, 12, 13, 2, 43, 40, 31, 24, 19, 13,
+ 70, 77, 87 },
+
+ {
+
+ 48,
+ 6, 78, 48, 6, 78, 85, 69, 19, 11, 2, 67, 27,
+ 32, 53, 14, 1, 64, 5, 15, 0, 67, 67, 75, 91,
+ 7, 20, 106, 114, 117, 5, 74, 68, 5, 15, 0, 77,
+ 69, 11, 5, 64, 72, 79, 64, 77, 78, 91, 5, 71,
+ 79, 2, 75, 76, 88, 7, 66, 68, 72, 2, 4, 22, 0,
+ 0, 0, 69, 92, 97, 2, 3, 66, 52, 7, 69, 101,
+ 82, 82, 0, 75, 86, 80, 91, 68, 86, 82, 82, 97,
+ 97, 98, 98, 18, 65, 2, 84, 65, 81, 75, 82, 72,
+ 88, 80, 97, 13, 74, 65, 92, 82, 70, 72, 0, 6,
+ 1, 66, 12, 9, 73, 0, 0, 65, 2, 75, 12, 1, 13,
+ 19, 9, 17, 17, 12, 0, 11, 3, 2, 6, 83, 88, 77,
+ 81, 71, 76, 73, 3, 71, 72, 67, 65, 71, 72, 66,
+ 86, 90, 77, 95, 69, 75, 5, 66, 68, 14, 1, 65,
+ 68, 14, 70, 78, 1, 90, 27, 32, 26, 26, 32, 25,
+ 11, 27, 25, 0, 11, 12, 4, 1, 71, 7, 2, 2, 5,
+ 0, 69, 4, 64, 79, 69, 70, 76, 71, 89, 43, 47,
+ 41, 43, 41, 37, 35, 37, 31, 23, 25, 18, 8, 5,
+ 78, 24, 20, 2, 24, 15, 14, 9, 5, 1, 0, 68, 80,
+ 69, 77, 91, 86, 100, 66, 30, 21, 14, 4, 5, 64,
+ 69, 72, 77, 4, 44, 29, 22, 17, 23, 7, 0, 68,
+ 71, 74, 31, 18, 10, 4, 8, 64, 69, 74, 84, 80,
+ 9, 1, 64, 68, 66, 76, 80, 88, 66, 38, 21, 13,
+ 6, 12, 64, 68, 70, 76, 62, 84, 75, 67, 66, 69,
+ 1, 6, 7, 67, 6, 11, 11, 77, 71, 72, 4, 12, 5,
+ 2, 12, 16, 18, 66, 4, 14, 14, 77, 71, 77, 86,
+ 80, 72, 77, 73, 74, 71, 68, 72, 72, 71, 67,
+ 73, 74, 81, 88, 74, 98, 76, 66, 70, 5, 69, 65,
+ 11, 66, 68, 66, 8, 71, 71, 88, 35, 35, 37, 31,
+ 23, 33, 29, 25, 22, 21, 21, 16, 65, 65, 72,
+ 10, 11, 4, 73, 1, 0, 71, 72, 73, 74, 76, 89,
+ 82, 91, 44, 43, 41, 38, 29, 30, 27, 18, 21,
+ 16, 14, 4, 1, 69, 77, 23, 24, 0, 24, 26, 15,
+ 9, 10, 7, 4, 6, 0, 69, 74, 77, 82, 94, 67, 77,
+ 87, 94, 82, 87, 68, 69, 68, 31, 5, 4, 68, 14,
+ 43, 30, 49, 6, 42, 37, 18, 4, 69, 73, 100,
+ 103, 110, 70, 42, 28, 19, 9, 11, 2, 65, 68,
+ 78, 85, 77, 67, 72, 2, 5, 65, 65, 5, 8, 9, 5,
+ 13, 13, 1, 42, 38, 29, 22, 17, 11, 72, 79, 88 },
+
+ {
+
+ 47,
+ 6, 78, 47, 6, 78, 83, 68, 20, 11, 2, 68, 26,
+ 31, 53, 14, 3, 64, 6, 16, 0, 67, 66, 76, 92,
+ 6, 18, 107, 115, 118, 8, 72, 68, 6, 16, 0, 76,
+ 68, 12, 4, 64, 71, 78, 64, 77, 78, 91, 5, 71,
+ 78, 2, 75, 76, 88, 7, 66, 67, 72, 2, 4, 22, 0,
+ 0, 0, 68, 92, 97, 2, 2, 66, 52, 7, 69, 100,
+ 81, 82, 0, 74, 84, 78, 89, 66, 84, 81, 80, 96,
+ 96, 97, 97, 19, 64, 3, 83, 65, 80, 74, 80, 72,
+ 87, 80, 95, 14, 74, 65, 90, 82, 69, 72, 0, 7,
+ 2, 65, 12, 9, 73, 0, 1, 65, 2, 74, 12, 1, 13,
+ 19, 8, 17, 17, 12, 65, 11, 3, 1, 5, 83, 87,
+ 76, 80, 70, 75, 72, 4, 70, 72, 66, 64, 71, 72,
+ 66, 87, 89, 76, 95, 68, 75, 6, 66, 69, 15, 1,
+ 65, 69, 15, 71, 79, 1, 90, 26, 31, 26, 25, 31,
+ 24, 11, 27, 25, 64, 10, 12, 4, 1, 71, 7, 1, 1,
+ 4, 64, 70, 4, 64, 80, 70, 70, 77, 72, 89, 42,
+ 46, 40, 42, 40, 35, 33, 35, 29, 21, 23, 16, 5,
+ 3, 81, 23, 18, 1, 22, 13, 12, 7, 3, 64, 65,
+ 70, 82, 69, 79, 92, 86, 100, 66, 31, 21, 14,
+ 5, 5, 64, 68, 72, 76, 4, 44, 29, 23, 17, 24,
+ 8, 1, 67, 70, 73, 32, 19, 11, 5, 9, 0, 69, 73,
+ 83, 79, 11, 2, 0, 67, 65, 75, 79, 87, 65, 38,
+ 22, 14, 6, 13, 0, 67, 69, 75, 62, 83, 74, 66,
+ 66, 69, 1, 7, 8, 67, 6, 12, 12, 77, 71, 72, 4,
+ 12, 4, 2, 11, 16, 18, 67, 3, 14, 14, 77, 72,
+ 78, 85, 79, 71, 77, 72, 74, 70, 67, 72, 72,
+ 71, 66, 73, 74, 81, 88, 73, 98, 76, 66, 70, 6,
+ 69, 65, 12, 66, 68, 66, 9, 72, 71, 88, 34, 35,
+ 37, 31, 22, 32, 28, 24, 22, 20, 20, 16, 65,
+ 66, 73, 9, 10, 2, 75, 0, 64, 71, 72, 73, 73,
+ 75, 89, 83, 90, 42, 41, 39, 36, 27, 28, 25,
+ 16, 19, 14, 12, 2, 64, 70, 78, 21, 23, 64, 22,
+ 24, 13, 7, 8, 5, 2, 4, 65, 71, 75, 79, 84, 95,
+ 69, 79, 89, 93, 80, 85, 67, 68, 67, 33, 6, 5,
+ 68, 15, 45, 31, 51, 7, 41, 36, 16, 1, 71, 76,
+ 102, 105, 111, 70, 42, 28, 19, 9, 12, 2, 65,
+ 68, 77, 84, 76, 66, 71, 4, 6, 64, 64, 6, 9,
+ 10, 5, 14, 13, 1, 41, 37, 28, 21, 15, 9, 74,
+ 81, 88 },
+
+ {
+
+ 46,
+ 6, 78, 46, 6, 78, 81, 66, 20, 11, 1, 70, 24,
+ 29, 53, 14, 6, 65, 7, 16, 0, 68, 66, 77, 93,
+ 6, 16, 109, 116, 118, 11, 71, 68, 7, 16, 0,
+ 76, 67, 12, 4, 64, 71, 78, 64, 78, 77, 91, 5,
+ 71, 77, 1, 75, 76, 88, 8, 65, 67, 71, 3, 4,
+ 22, 0, 0, 0, 68, 92, 97, 3, 1, 66, 52, 7, 69,
+ 99, 80, 82, 1, 72, 83, 76, 88, 65, 83, 79, 78,
+ 95, 95, 96, 97, 20, 64, 4, 82, 65, 80, 73, 78,
+ 72, 87, 79, 94, 14, 73, 64, 89, 81, 69, 72, 1,
+ 7, 2, 64, 13, 10, 74, 64, 1, 65, 2, 74, 12, 0,
+ 12, 19, 8, 17, 17, 11, 67, 11, 2, 64, 4, 84,
+ 86, 75, 79, 69, 74, 71, 5, 69, 71, 65, 0, 71,
+ 72, 65, 88, 89, 75, 95, 68, 75, 6, 66, 70, 16,
+ 1, 65, 71, 16, 71, 80, 1, 91, 24, 30, 25, 24,
+ 30, 23, 10, 26, 25, 65, 9, 12, 4, 1, 72, 6, 1,
+ 0, 4, 65, 71, 3, 65, 82, 71, 71, 78, 73, 89,
+ 40, 45, 38, 40, 38, 33, 30, 33, 26, 19, 21,
+ 13, 3, 1, 83, 21, 17, 64, 19, 11, 10, 5, 0,
+ 66, 68, 72, 85, 70, 80, 94, 87, 100, 66, 31,
+ 22, 14, 5, 5, 64, 68, 72, 76, 5, 45, 29, 23,
+ 17, 25, 8, 2, 67, 69, 72, 33, 20, 11, 5, 10,
+ 0, 68, 73, 82, 79, 12, 2, 0, 67, 64, 74, 79,
+ 86, 65, 39, 22, 14, 6, 14, 1, 66, 68, 74, 62,
+ 83, 74, 66, 66, 69, 1, 7, 8, 66, 6, 12, 12,
+ 77, 71, 72, 3, 13, 3, 1, 10, 15, 18, 68, 2,
+ 13, 14, 78, 73, 78, 84, 79, 70, 76, 72, 73,
+ 70, 66, 72, 72, 71, 66, 74, 74, 81, 87, 72,
+ 98, 76, 65, 70, 6, 69, 65, 13, 66, 68, 66, 10,
+ 72, 72, 89, 33, 35, 36, 30, 21, 31, 27, 23,
+ 21, 19, 19, 15, 66, 67, 74, 7, 8, 1, 77, 64,
+ 65, 72, 73, 74, 73, 75, 89, 83, 89, 40, 39,
+ 37, 33, 24, 26, 23, 13, 17, 12, 10, 0, 66, 72,
+ 79, 19, 21, 65, 20, 22, 11, 5, 6, 3, 0, 2, 67,
+ 74, 77, 81, 86, 96, 71, 81, 90, 92, 79, 84,
+ 67, 67, 66, 35, 7, 6, 68, 16, 46, 33, 52, 7,
+ 39, 34, 13, 65, 74, 79, 105, 107, 112, 70, 42,
+ 28, 19, 9, 12, 2, 65, 68, 77, 83, 75, 65, 70,
+ 5, 7, 64, 0, 7, 10, 11, 6, 14, 14, 1, 39, 35,
+ 26, 19, 13, 7, 76, 83, 89 },
+
+ {
+
+ 45,
+ 6, 79, 45, 6, 79, 79, 65, 21, 11, 1, 71, 23,
+ 28, 53, 14, 8, 65, 7, 17, 0, 69, 66, 78, 94,
+ 5, 15, 110, 117, 119, 14, 70, 68, 7, 17, 0,
+ 75, 66, 13, 3, 0, 70, 77, 65, 78, 77, 91, 5,
+ 70, 77, 1, 75, 75, 88, 8, 65, 67, 71, 3, 4,
+ 22, 0, 0, 0, 67, 92, 97, 3, 1, 67, 52, 7, 69,
+ 98, 80, 82, 1, 71, 81, 74, 86, 64, 81, 78, 76,
+ 94, 94, 95, 96, 21, 64, 5, 82, 64, 79, 73, 76,
+ 72, 86, 79, 93, 14, 73, 64, 87, 81, 68, 71, 1,
+ 7, 2, 0, 13, 10, 74, 64, 1, 65, 2, 74, 12, 0,
+ 12, 19, 7, 17, 17, 11, 69, 11, 2, 66, 3, 85,
+ 86, 74, 79, 69, 73, 71, 6, 69, 71, 65, 2, 71,
+ 72, 65, 89, 88, 74, 95, 68, 75, 7, 65, 71, 16,
+ 1, 65, 72, 17, 72, 81, 1, 92, 23, 29, 25, 24,
+ 29, 23, 10, 25, 24, 66, 8, 12, 4, 1, 72, 5, 0,
+ 64, 3, 65, 72, 2, 65, 84, 72, 71, 79, 74, 89,
+ 39, 43, 37, 38, 36, 31, 28, 31, 24, 16, 19,
+ 11, 0, 65, 86, 19, 15, 65, 17, 8, 7, 2, 65,
+ 69, 70, 75, 87, 71, 82, 95, 88, 100, 65, 32,
+ 22, 15, 5, 5, 64, 68, 72, 75, 5, 45, 30, 23,
+ 17, 26, 9, 2, 66, 69, 71, 34, 20, 12, 5, 11,
+ 1, 68, 72, 81, 78, 13, 3, 1, 67, 64, 74, 78,
+ 84, 64, 39, 22, 14, 7, 14, 1, 65, 68, 73, 62,
+ 82, 73, 65, 66, 69, 1, 7, 8, 66, 7, 12, 13,
+ 77, 71, 73, 3, 13, 2, 0, 9, 14, 18, 69, 1, 13,
+ 14, 78, 74, 79, 83, 78, 69, 76, 71, 73, 69,
+ 66, 72, 71, 70, 65, 74, 74, 81, 87, 71, 98,
+ 76, 65, 70, 7, 69, 65, 14, 67, 68, 66, 10, 73,
+ 72, 90, 32, 34, 36, 29, 20, 31, 27, 22, 20,
+ 19, 18, 14, 67, 68, 75, 6, 7, 64, 79, 66, 67,
+ 73, 73, 74, 73, 75, 88, 84, 88, 38, 37, 35,
+ 31, 21, 24, 21, 11, 15, 9, 8, 66, 68, 73, 79,
+ 18, 20, 67, 18, 19, 9, 3, 4, 1, 65, 64, 69,
+ 76, 79, 83, 87, 98, 73, 83, 92, 91, 78, 83,
+ 66, 66, 65, 36, 8, 7, 68, 17, 48, 34, 54, 8,
+ 38, 33, 11, 68, 77, 81, 108, 109, 114, 69, 42,
+ 28, 19, 9, 12, 2, 65, 68, 76, 82, 75, 64, 69,
+ 6, 7, 0, 1, 7, 10, 11, 6, 15, 14, 1, 38, 34,
+ 24, 17, 12, 6, 79, 84, 90 },
+
+ {
+
+ 43,
+ 6, 79, 43, 6, 79, 78, 0, 21, 11, 0, 73, 21,
+ 27, 53, 14, 10, 65, 8, 18, 0, 70, 66, 79, 95,
+ 5, 13, 112, 118, 119, 17, 69, 68, 8, 18, 0,
+ 75, 65, 13, 3, 0, 70, 76, 65, 79, 77, 91, 5,
+ 70, 76, 1, 76, 75, 88, 9, 65, 67, 71, 4, 4,
+ 22, 0, 0, 0, 67, 93, 97, 4, 0, 67, 52, 7, 69,
+ 97, 79, 82, 2, 70, 79, 72, 85, 0, 79, 77, 74,
+ 93, 94, 94, 95, 21, 64, 6, 81, 64, 79, 72, 74,
+ 72, 86, 78, 92, 14, 73, 0, 86, 80, 67, 71, 1,
+ 7, 2, 0, 14, 11, 75, 64, 1, 65, 2, 74, 12, 64,
+ 11, 19, 6, 17, 17, 11, 71, 11, 1, 68, 2, 86,
+ 85, 73, 78, 68, 73, 70, 7, 68, 70, 64, 3, 71,
+ 72, 65, 90, 88, 73, 95, 68, 75, 8, 65, 72, 17,
+ 1, 65, 73, 18, 72, 82, 1, 93, 21, 28, 24, 23,
+ 28, 22, 9, 24, 24, 68, 7, 11, 4, 1, 73, 4, 64,
+ 65, 2, 66, 73, 1, 66, 86, 73, 72, 80, 75, 89,
+ 37, 42, 35, 36, 34, 29, 26, 29, 21, 14, 17, 8,
+ 65, 67, 88, 17, 13, 67, 15, 6, 5, 0, 67, 71,
+ 73, 77, 90, 72, 84, 96, 89, 100, 65, 32, 22,
+ 15, 5, 5, 64, 68, 72, 75, 5, 45, 30, 23, 17,
+ 27, 9, 3, 66, 68, 71, 35, 21, 12, 5, 11, 1,
+ 67, 72, 80, 77, 14, 4, 1, 67, 0, 73, 77, 83,
+ 64, 39, 22, 14, 7, 15, 2, 65, 67, 72, 62, 82,
+ 73, 65, 66, 69, 1, 7, 8, 66, 7, 12, 13, 77,
+ 71, 73, 2, 14, 1, 64, 8, 13, 18, 71, 0, 13,
+ 14, 79, 75, 79, 83, 77, 68, 75, 71, 72, 69,
+ 65, 73, 71, 70, 65, 75, 74, 81, 86, 70, 98,
+ 76, 65, 70, 8, 69, 65, 15, 67, 69, 66, 11, 74,
+ 72, 91, 31, 34, 35, 28, 19, 30, 26, 21, 19,
+ 18, 17, 13, 68, 69, 76, 4, 6, 65, 81, 67, 68,
+ 74, 74, 75, 73, 75, 88, 84, 88, 35, 34, 33,
+ 29, 18, 22, 19, 8, 13, 7, 6, 68, 70, 75, 80,
+ 16, 18, 68, 16, 17, 7, 1, 1, 64, 67, 66, 71,
+ 79, 81, 85, 89, 99, 75, 85, 93, 90, 77, 82,
+ 65, 65, 64, 38, 9, 8, 68, 18, 49, 36, 55, 9,
+ 36, 31, 8, 71, 80, 84, 111, 111, 115, 69, 42,
+ 28, 19, 9, 12, 2, 65, 68, 76, 81, 74, 0, 69,
+ 7, 8, 0, 2, 8, 11, 12, 6, 15, 14, 1, 37, 32,
+ 22, 15, 10, 4, 81, 86, 91 },
+
+ {
+
+ 42,
+ 6, 79, 42, 6, 79, 76, 1, 21, 11, 0, 74, 20,
+ 25, 53, 14, 13, 66, 9, 18, 0, 70, 65, 80, 96,
+ 4, 11, 114, 119, 120, 20, 67, 68, 9, 18, 0,
+ 75, 64, 13, 2, 0, 70, 76, 65, 79, 76, 91, 5,
+ 70, 75, 0, 76, 75, 88, 9, 64, 66, 70, 4, 4,
+ 22, 0, 0, 0, 66, 93, 97, 4, 64, 67, 52, 7, 69,
+ 96, 78, 82, 2, 68, 78, 70, 83, 1, 78, 75, 72,
+ 92, 93, 93, 95, 22, 64, 7, 80, 64, 78, 71, 72,
+ 72, 86, 78, 90, 15, 72, 0, 85, 80, 67, 71, 2,
+ 8, 3, 1, 14, 11, 75, 65, 1, 65, 2, 74, 12, 64,
+ 10, 19, 6, 17, 17, 10, 73, 11, 1, 69, 1, 87,
+ 84, 72, 77, 67, 72, 69, 8, 67, 70, 0, 4, 71,
+ 72, 64, 91, 87, 72, 95, 67, 75, 8, 65, 73, 18,
+ 1, 65, 75, 19, 73, 83, 1, 93, 20, 27, 24, 22,
+ 27, 21, 8, 24, 24, 69, 6, 11, 4, 1, 73, 4, 64,
+ 66, 2, 67, 74, 1, 67, 88, 74, 73, 81, 76, 89,
+ 35, 41, 33, 34, 33, 27, 23, 27, 19, 12, 15, 6,
+ 68, 69, 91, 15, 12, 69, 12, 4, 3, 65, 70, 73,
+ 76, 79, 92, 73, 85, 98, 89, 100, 65, 32, 23,
+ 15, 5, 5, 64, 68, 72, 74, 6, 46, 30, 23, 17,
+ 28, 10, 4, 66, 67, 70, 36, 22, 12, 6, 12, 2,
+ 67, 71, 79, 77, 16, 4, 1, 66, 1, 72, 77, 82,
+ 0, 40, 23, 14, 7, 16, 3, 64, 66, 71, 62, 82,
+ 73, 64, 66, 69, 1, 7, 8, 65, 7, 13, 14, 77,
+ 71, 73, 1, 14, 0, 65, 7, 13, 18, 72, 64, 12,
+ 14, 80, 76, 80, 82, 77, 67, 75, 70, 72, 68,
+ 64, 73, 71, 70, 65, 76, 74, 81, 86, 69, 98,
+ 76, 64, 70, 8, 69, 65, 16, 67, 69, 66, 12, 74,
+ 73, 91, 30, 34, 35, 28, 18, 29, 25, 20, 19,
+ 17, 16, 12, 69, 70, 77, 3, 4, 67, 83, 68, 69,
+ 75, 75, 75, 73, 74, 88, 85, 87, 33, 32, 31,
+ 26, 16, 20, 17, 6, 11, 5, 4, 70, 72, 76, 81,
+ 14, 16, 69, 14, 15, 5, 64, 64, 66, 69, 68, 73,
+ 81, 83, 87, 91, 100, 77, 87, 95, 89, 75, 81,
+ 65, 64, 0, 40, 10, 9, 68, 19, 51, 37, 57, 9,
+ 35, 29, 6, 74, 82, 87, 113, 113, 116, 69, 42,
+ 28, 19, 9, 12, 2, 65, 68, 75, 80, 73, 1, 68,
+ 8, 9, 1, 3, 9, 12, 13, 7, 16, 15, 1, 35, 31,
+ 21, 14, 8, 2, 83, 88, 92 },
+
+ {
+
+ 41,
+ 6, 79, 41, 6, 79, 74, 3, 22, 11, 64, 76, 18,
+ 24, 53, 14, 15, 66, 10, 19, 0, 71, 65, 81, 97,
+ 4, 9, 115, 120, 120, 23, 66, 68, 10, 19, 0,
+ 74, 0, 14, 2, 0, 69, 75, 66, 80, 76, 91, 5,
+ 70, 75, 0, 76, 75, 88, 10, 64, 66, 70, 5, 4,
+ 22, 0, 0, 0, 66, 93, 97, 5, 65, 67, 52, 7, 69,
+ 95, 77, 82, 3, 67, 76, 68, 82, 2, 76, 74, 70,
+ 91, 92, 92, 94, 23, 64, 8, 79, 64, 78, 71, 70,
+ 72, 85, 77, 89, 15, 72, 1, 83, 79, 66, 71, 2,
+ 8, 3, 2, 15, 12, 76, 65, 1, 65, 2, 74, 12, 65,
+ 10, 19, 5, 17, 17, 10, 75, 11, 0, 71, 0, 88,
+ 83, 71, 76, 66, 71, 69, 9, 67, 69, 1, 5, 71,
+ 72, 64, 92, 87, 71, 95, 67, 75, 9, 65, 74, 19,
+ 1, 65, 76, 20, 73, 84, 1, 94, 18, 26, 23, 21,
+ 26, 20, 8, 23, 23, 70, 5, 11, 4, 1, 74, 3, 65,
+ 67, 1, 68, 75, 0, 67, 90, 75, 73, 82, 77, 89,
+ 34, 39, 32, 32, 31, 25, 21, 25, 16, 9, 13, 3,
+ 70, 72, 93, 13, 10, 70, 10, 2, 0, 68, 72, 76,
+ 78, 81, 95, 74, 87, 99, 90, 100, 65, 33, 23,
+ 15, 5, 5, 64, 68, 72, 74, 6, 46, 30, 23, 17,
+ 29, 10, 4, 65, 67, 69, 37, 22, 13, 6, 13, 2,
+ 66, 71, 78, 76, 17, 5, 2, 66, 2, 71, 76, 81,
+ 0, 40, 23, 14, 7, 17, 3, 0, 65, 70, 62, 81,
+ 72, 64, 66, 69, 1, 7, 8, 65, 7, 13, 14, 77,
+ 71, 73, 1, 15, 64, 66, 6, 12, 18, 73, 65, 12,
+ 14, 80, 77, 80, 81, 76, 66, 74, 70, 71, 68, 0,
+ 73, 71, 70, 64, 76, 74, 81, 85, 68, 98, 76,
+ 64, 70, 9, 69, 65, 17, 67, 69, 66, 13, 75, 73,
+ 92, 29, 33, 34, 27, 17, 28, 24, 19, 18, 16,
+ 15, 11, 70, 71, 78, 1, 3, 68, 85, 70, 70, 76,
+ 75, 76, 73, 74, 88, 85, 86, 31, 30, 29, 24,
+ 13, 18, 15, 3, 9, 3, 2, 73, 74, 78, 82, 12,
+ 15, 71, 12, 13, 3, 66, 66, 68, 71, 70, 75, 84,
+ 85, 89, 93, 101, 79, 89, 96, 88, 74, 80, 64,
+ 0, 1, 41, 11, 10, 68, 20, 52, 39, 58, 10, 33,
+ 28, 3, 77, 85, 90, 116, 115, 117, 69, 42, 28,
+ 19, 9, 12, 2, 65, 68, 75, 79, 72, 2, 67, 9, 9,
+ 1, 4, 9, 12, 13, 7, 16, 15, 1, 34, 29, 19, 12,
+ 6, 0, 85, 90, 93 },
+
+ {
+
+ 40,
+ 6, 79, 40, 6, 79, 72, 4, 22, 11, 64, 77, 17,
+ 23, 53, 14, 17, 66, 11, 20, 0, 72, 65, 82, 98,
+ 3, 7, 117, 121, 121, 26, 65, 68, 11, 20, 0,
+ 74, 1, 14, 1, 0, 69, 74, 66, 80, 76, 91, 5,
+ 70, 74, 0, 76, 75, 88, 10, 64, 66, 70, 5, 4,
+ 22, 0, 0, 0, 65, 93, 97, 5, 66, 67, 52, 7, 69,
+ 94, 76, 82, 3, 66, 74, 66, 80, 3, 74, 73, 68,
+ 90, 91, 91, 93, 24, 64, 9, 78, 64, 77, 70, 68,
+ 72, 85, 77, 88, 15, 72, 1, 82, 79, 65, 71, 2,
+ 8, 3, 3, 15, 12, 76, 65, 1, 65, 2, 74, 12, 65,
+ 9, 19, 4, 17, 17, 10, 77, 11, 0, 73, 64, 89,
+ 82, 70, 75, 65, 70, 68, 10, 66, 69, 2, 6, 71,
+ 72, 64, 93, 86, 70, 95, 67, 75, 10, 65, 75,
+ 20, 1, 65, 77, 21, 74, 85, 1, 95, 17, 25, 23,
+ 20, 25, 19, 7, 22, 23, 71, 4, 11, 4, 1, 74, 2,
+ 66, 68, 0, 69, 76, 64, 68, 92, 76, 74, 83, 78,
+ 89, 32, 38, 30, 30, 29, 23, 19, 23, 14, 7, 11,
+ 1, 73, 74, 96, 11, 8, 72, 8, 0, 65, 70, 74,
+ 78, 81, 83, 97, 75, 89, 100, 91, 100, 65, 33,
+ 23, 15, 5, 5, 64, 68, 72, 73, 6, 46, 30, 23,
+ 17, 30, 11, 5, 65, 66, 68, 38, 23, 13, 6, 14,
+ 3, 66, 70, 77, 75, 18, 6, 2, 66, 3, 70, 75,
+ 80, 1, 40, 23, 14, 7, 18, 4, 1, 64, 69, 62,
+ 81, 72, 0, 66, 69, 1, 7, 8, 65, 7, 13, 15, 77,
+ 71, 73, 0, 15, 65, 67, 5, 11, 18, 74, 66, 12,
+ 14, 81, 78, 81, 80, 75, 65, 74, 69, 71, 67, 1,
+ 73, 71, 70, 64, 77, 74, 81, 85, 67, 98, 76,
+ 64, 70, 10, 69, 65, 18, 67, 69, 66, 14, 76,
+ 73, 93, 28, 33, 34, 26, 16, 27, 23, 18, 17,
+ 15, 14, 10, 71, 72, 79, 0, 2, 70, 87, 71, 71,
+ 77, 76, 76, 73, 74, 88, 86, 85, 29, 28, 27,
+ 22, 10, 16, 13, 1, 7, 1, 0, 75, 76, 79, 83,
+ 10, 13, 72, 10, 11, 1, 68, 68, 70, 73, 72, 77,
+ 86, 87, 91, 95, 102, 81, 91, 98, 87, 73, 79,
+ 0, 1, 2, 43, 12, 11, 68, 21, 54, 40, 60, 11,
+ 32, 26, 1, 80, 88, 93, 119, 117, 118, 69, 42,
+ 28, 19, 9, 12, 2, 65, 68, 74, 78, 71, 3, 66,
+ 10, 10, 2, 5, 10, 13, 14, 7, 17, 15, 1, 33,
+ 28, 17, 10, 4, 65, 87, 92, 94 },
+
+ {
+
+ 38,
+ 5, 80, 38, 5, 80, 71, 5, 22, 11, 65, 79, 15,
+ 21, 52, 14, 19, 67, 11, 20, 64, 73, 65, 84,
+ 100, 2, 5, 119, 122, 122, 28, 64, 69, 11, 20,
+ 64, 74, 2, 14, 0, 0, 69, 74, 67, 81, 76, 92,
+ 5, 70, 74, 64, 77, 75, 88, 10, 64, 66, 70, 5,
+ 3, 22, 0, 0, 0, 65, 94, 97, 5, 67, 68, 52, 6,
+ 69, 93, 76, 82, 3, 65, 73, 65, 79, 4, 73, 72,
+ 67, 89, 91, 90, 93, 24, 64, 9, 78, 64, 77, 70,
+ 67, 72, 85, 77, 87, 15, 72, 1, 81, 79, 65, 71,
+ 2, 8, 3, 3, 15, 12, 77, 66, 1, 66, 2, 74, 11,
+ 66, 8, 19, 3, 16, 17, 9, 79, 10, 64, 75, 65,
+ 90, 82, 70, 75, 65, 70, 68, 11, 66, 69, 2, 7,
+ 72, 72, 64, 95, 86, 70, 95, 67, 76, 10, 65,
+ 76, 20, 1, 65, 79, 21, 75, 86, 1, 96, 15, 24,
+ 22, 19, 24, 18, 6, 21, 22, 73, 3, 10, 3, 1,
+ 75, 1, 67, 69, 64, 70, 77, 65, 69, 94, 78, 75,
+ 85, 80, 89, 30, 36, 28, 28, 27, 20, 16, 20,
+ 11, 4, 8, 65, 76, 77, 99, 9, 6, 74, 5, 66, 68,
+ 73, 77, 81, 84, 86, 100, 76, 91, 102, 92, 101,
+ 65, 33, 23, 15, 5, 5, 65, 68, 72, 73, 6, 46,
+ 30, 23, 17, 31, 11, 5, 65, 66, 68, 38, 23, 13,
+ 6, 14, 3, 66, 70, 76, 75, 19, 6, 2, 66, 3, 70,
+ 75, 79, 1, 40, 23, 14, 7, 18, 4, 1, 64, 69,
+ 62, 81, 72, 0, 66, 69, 1, 7, 8, 65, 7, 13, 15,
+ 77, 71, 74, 64, 15, 66, 68, 4, 10, 17, 76, 68,
+ 11, 13, 82, 80, 82, 80, 75, 64, 74, 69, 71,
+ 67, 1, 74, 71, 70, 64, 78, 74, 81, 85, 67, 99,
+ 76, 64, 70, 10, 70, 65, 18, 68, 70, 66, 14,
+ 77, 74, 94, 27, 32, 33, 25, 14, 26, 22, 17,
+ 16, 14, 13, 9, 72, 74, 81, 65, 0, 72, 89, 73,
+ 73, 78, 77, 77, 73, 74, 88, 87, 85, 26, 25,
+ 25, 19, 7, 13, 10, 65, 4, 65, 66, 78, 79, 81,
+ 84, 8, 11, 74, 8, 8, 65, 71, 71, 73, 75, 75,
+ 80, 89, 89, 94, 97, 104, 83, 93, 100, 86, 72,
+ 78, 0, 1, 2, 44, 12, 11, 68, 21, 55, 41, 61,
+ 11, 30, 24, 65, 84, 91, 96, 122, 120, 120, 69,
+ 42, 27, 18, 9, 12, 2, 66, 68, 74, 78, 71, 3,
+ 66, 11, 10, 2, 5, 10, 13, 14, 7, 17, 15, 0,
+ 31, 26, 15, 8, 2, 67, 90, 94, 95 },
+
+ {
+
+ 37,
+ 5, 80, 37, 5, 80, 69, 7, 23, 12, 65, 80, 14,
+ 20, 52, 14, 22, 67, 12, 21, 64, 73, 64, 85,
+ 101, 2, 4, 120, 123, 122, 31, 1, 69, 12, 21,
+ 64, 73, 4, 15, 0, 1, 68, 73, 67, 81, 75, 92,
+ 5, 69, 73, 64, 77, 74, 88, 11, 0, 65, 69, 6,
+ 3, 22, 0, 0, 0, 64, 94, 97, 6, 67, 68, 52, 6,
+ 69, 91, 75, 82, 4, 0, 71, 0, 77, 6, 71, 70,
+ 65, 87, 90, 89, 92, 25, 0, 10, 77, 0, 76, 69,
+ 65, 71, 84, 76, 85, 16, 71, 2, 79, 78, 64, 70,
+ 3, 9, 4, 4, 16, 13, 77, 66, 2, 66, 2, 73, 11,
+ 66, 8, 19, 3, 16, 17, 9, 80, 10, 64, 76, 66,
+ 90, 81, 69, 74, 64, 69, 67, 12, 65, 68, 3, 9,
+ 72, 72, 0, 96, 85, 69, 95, 66, 76, 11, 64, 76,
+ 21, 1, 65, 80, 22, 75, 87, 1, 96, 14, 24, 22,
+ 19, 24, 18, 6, 21, 22, 74, 3, 10, 3, 1, 75, 1,
+ 67, 69, 64, 70, 78, 65, 69, 95, 79, 75, 86,
+ 81, 89, 29, 35, 27, 27, 26, 18, 14, 18, 9, 2,
+ 6, 67, 78, 79, 101, 8, 5, 75, 3, 68, 70, 75,
+ 79, 83, 86, 88, 102, 76, 92, 103, 92, 101, 64,
+ 34, 24, 16, 6, 5, 65, 67, 71, 72, 7, 47, 31,
+ 24, 17, 32, 12, 6, 64, 65, 67, 39, 24, 14, 7,
+ 15, 4, 65, 69, 74, 74, 21, 7, 3, 65, 4, 69,
+ 74, 77, 2, 41, 24, 15, 8, 19, 5, 2, 0, 68, 62,
+ 80, 71, 1, 66, 68, 1, 8, 9, 64, 8, 14, 16, 76,
+ 71, 74, 64, 16, 66, 68, 3, 10, 17, 77, 69, 11,
+ 13, 82, 81, 82, 79, 74, 0, 73, 68, 70, 66, 2,
+ 74, 70, 69, 0, 78, 73, 80, 84, 66, 99, 76, 0,
+ 70, 11, 70, 65, 19, 68, 70, 65, 15, 77, 74,
+ 94, 27, 32, 33, 25, 13, 26, 22, 17, 16, 14,
+ 13, 9, 72, 75, 82, 66, 64, 73, 90, 74, 74, 78,
+ 77, 77, 72, 73, 87, 87, 84, 24, 23, 23, 17, 5,
+ 11, 8, 67, 2, 67, 68, 80, 81, 82, 84, 7, 10,
+ 75, 7, 6, 67, 73, 73, 75, 77, 77, 82, 91, 90,
+ 96, 98, 105, 84, 94, 101, 84, 70, 76, 1, 2, 3,
+ 46, 13, 12, 68, 22, 57, 43, 62, 12, 29, 23,
+ 67, 87, 93, 98, 124, 122, 121, 68, 43, 27, 18,
+ 9, 13, 2, 66, 68, 73, 77, 70, 4, 65, 13, 11,
+ 3, 6, 11, 14, 15, 8, 18, 16, 0, 30, 25, 14, 7,
+ 1, 68, 92, 95, 95 },
+
+ {
+
+ 36,
+ 5, 80, 36, 5, 80, 67, 8, 23, 12, 65, 81, 13,
+ 19, 52, 14, 24, 67, 13, 22, 64, 74, 64, 86,
+ 102, 1, 2, 122, 124, 123, 34, 2, 69, 13, 22,
+ 64, 73, 5, 15, 64, 1, 68, 72, 67, 81, 75, 92,
+ 5, 69, 72, 64, 77, 74, 88, 11, 0, 65, 69, 6,
+ 3, 22, 0, 0, 0, 0, 94, 97, 6, 68, 68, 52, 6,
+ 69, 90, 74, 82, 4, 1, 69, 2, 76, 7, 69, 69, 0,
+ 86, 89, 88, 91, 26, 0, 11, 76, 0, 76, 68, 0,
+ 71, 84, 76, 84, 16, 71, 2, 78, 78, 0, 70, 3,
+ 9, 4, 5, 16, 13, 78, 66, 2, 66, 2, 73, 11, 66,
+ 7, 19, 2, 16, 17, 9, 82, 10, 64, 78, 67, 91,
+ 80, 68, 73, 0, 68, 66, 13, 64, 68, 4, 10, 72,
+ 72, 0, 97, 85, 68, 95, 66, 76, 12, 64, 77, 22,
+ 1, 65, 81, 23, 76, 88, 1, 97, 12, 23, 21, 18,
+ 23, 17, 5, 20, 22, 75, 2, 10, 3, 1, 75, 0, 68,
+ 70, 65, 71, 79, 66, 70, 97, 80, 76, 87, 82,
+ 89, 27, 34, 25, 25, 24, 16, 12, 16, 6, 0, 4,
+ 70, 81, 81, 104, 6, 3, 77, 1, 70, 72, 77, 81,
+ 85, 89, 90, 104, 77, 94, 104, 93, 101, 64, 34,
+ 24, 16, 6, 5, 65, 67, 71, 71, 7, 47, 31, 24,
+ 17, 33, 12, 7, 64, 64, 66, 40, 25, 14, 7, 16,
+ 4, 65, 68, 73, 73, 22, 8, 3, 65, 5, 68, 73,
+ 76, 2, 41, 24, 15, 8, 20, 6, 3, 1, 67, 62, 80,
+ 71, 1, 66, 68, 1, 8, 9, 64, 8, 14, 17, 76, 71,
+ 74, 65, 16, 67, 69, 2, 9, 17, 78, 70, 11, 13,
+ 83, 82, 83, 78, 73, 1, 73, 68, 70, 65, 3, 74,
+ 70, 69, 0, 79, 73, 80, 84, 65, 99, 76, 0, 70,
+ 12, 70, 65, 20, 68, 70, 65, 16, 78, 74, 95,
+ 26, 32, 33, 24, 12, 25, 21, 16, 15, 13, 12, 8,
+ 73, 76, 83, 68, 65, 75, 92, 75, 75, 79, 78,
+ 77, 72, 73, 87, 88, 83, 22, 21, 21, 15, 2, 9,
+ 6, 70, 0, 69, 70, 82, 83, 83, 85, 5, 8, 76, 5,
+ 4, 69, 75, 75, 77, 79, 79, 84, 93, 92, 98,
+ 100, 106, 86, 96, 103, 83, 69, 75, 2, 3, 4,
+ 48, 14, 13, 68, 23, 58, 44, 62, 13, 28, 21,
+ 70, 90, 96, 101, 126, 124, 122, 68, 43, 27,
+ 18, 9, 13, 2, 66, 68, 72, 76, 69, 5, 64, 14,
+ 12, 4, 7, 12, 15, 16, 8, 19, 16, 0, 29, 23,
+ 12, 5, 64, 70, 94, 97, 96 },
+
+ {
+
+ 35,
+ 5, 80, 35, 5, 80, 65, 10, 24, 12, 66, 83, 11,
+ 18, 52, 14, 26, 67, 14, 23, 64, 75, 64, 87,
+ 103, 1, 0, 123, 125, 123, 37, 3, 69, 14, 23,
+ 64, 72, 6, 16, 64, 1, 67, 71, 68, 82, 75, 92,
+ 5, 69, 72, 64, 77, 74, 88, 12, 0, 65, 69, 7,
+ 3, 22, 0, 0, 0, 0, 94, 97, 7, 69, 68, 52, 6,
+ 69, 89, 73, 82, 5, 2, 67, 4, 74, 8, 67, 68, 2,
+ 85, 88, 87, 90, 27, 0, 12, 75, 0, 75, 68, 2,
+ 71, 83, 75, 83, 16, 71, 3, 76, 77, 1, 70, 3,
+ 9, 4, 6, 17, 14, 78, 66, 2, 66, 2, 73, 11, 67,
+ 7, 19, 1, 16, 17, 9, 84, 10, 65, 80, 68, 92,
+ 79, 67, 72, 1, 67, 66, 14, 64, 67, 5, 11, 72,
+ 72, 0, 98, 84, 67, 95, 66, 76, 13, 64, 78, 23,
+ 1, 65, 82, 24, 76, 89, 1, 98, 11, 22, 21, 17,
+ 22, 16, 5, 19, 21, 76, 1, 10, 3, 1, 76, 64,
+ 69, 71, 66, 72, 80, 67, 70, 99, 81, 76, 88,
+ 83, 89, 26, 32, 24, 23, 22, 14, 10, 14, 4, 66,
+ 2, 72, 83, 84, 106, 4, 1, 78, 64, 72, 75, 80,
+ 83, 88, 91, 92, 107, 78, 96, 105, 94, 101, 64,
+ 35, 24, 16, 6, 5, 65, 67, 71, 71, 7, 47, 31,
+ 24, 17, 34, 13, 7, 0, 64, 65, 41, 25, 15, 7,
+ 17, 5, 64, 68, 72, 72, 23, 9, 4, 65, 6, 67,
+ 72, 75, 3, 41, 24, 15, 8, 21, 6, 4, 2, 66, 62,
+ 79, 70, 2, 66, 68, 1, 8, 9, 64, 8, 14, 17, 76,
+ 71, 74, 65, 17, 68, 70, 1, 8, 17, 79, 71, 11,
+ 13, 83, 83, 83, 77, 72, 2, 72, 67, 69, 65, 4,
+ 74, 70, 69, 1, 79, 73, 80, 83, 64, 99, 76, 0,
+ 70, 13, 70, 65, 21, 68, 70, 65, 17, 79, 74,
+ 96, 25, 31, 32, 23, 11, 24, 20, 15, 14, 12,
+ 11, 7, 74, 77, 84, 69, 66, 76, 94, 77, 76, 80,
+ 78, 78, 72, 73, 87, 88, 82, 20, 19, 19, 13,
+ 64, 7, 4, 72, 65, 71, 72, 85, 85, 85, 86, 3,
+ 7, 78, 3, 2, 71, 77, 77, 79, 81, 81, 86, 96,
+ 94, 100, 102, 107, 88, 98, 104, 82, 68, 74, 3,
+ 4, 5, 49, 15, 14, 68, 24, 60, 46, 62, 14, 26,
+ 20, 72, 93, 99, 104, 126, 126, 123, 68, 43,
+ 27, 18, 9, 13, 2, 66, 68, 72, 75, 68, 6, 0,
+ 15, 12, 4, 8, 12, 15, 16, 8, 19, 16, 0, 28,
+ 22, 10, 3, 66, 72, 96, 99, 97 },
+
+ {
+
+ 33,
+ 5, 80, 33, 5, 80, 64, 11, 24, 12, 66, 84, 10,
+ 16, 52, 14, 29, 68, 15, 23, 64, 76, 64, 88,
+ 104, 0, 65, 125, 126, 124, 40, 4, 69, 15, 23,
+ 64, 72, 7, 16, 65, 1, 67, 71, 68, 82, 74, 92,
+ 5, 69, 71, 65, 78, 74, 88, 12, 1, 65, 68, 7,
+ 3, 22, 0, 0, 0, 1, 95, 97, 7, 70, 68, 52, 6,
+ 69, 88, 72, 82, 5, 4, 66, 6, 73, 9, 66, 66, 4,
+ 84, 88, 86, 90, 27, 0, 13, 74, 0, 75, 67, 4,
+ 71, 83, 75, 82, 16, 70, 3, 75, 77, 1, 70, 4,
+ 9, 4, 6, 17, 14, 79, 67, 2, 66, 2, 73, 11, 67,
+ 6, 19, 1, 16, 17, 8, 86, 10, 65, 82, 69, 93,
+ 78, 66, 71, 2, 67, 65, 15, 0, 67, 6, 12, 72,
+ 72, 1, 99, 84, 66, 95, 66, 76, 13, 64, 79, 24,
+ 1, 65, 84, 25, 77, 90, 1, 99, 9, 21, 20, 16,
+ 21, 15, 4, 18, 21, 78, 0, 9, 3, 1, 76, 65, 69,
+ 72, 66, 73, 81, 68, 71, 101, 82, 77, 89, 84,
+ 89, 24, 31, 22, 21, 20, 12, 7, 12, 1, 68, 0,
+ 75, 86, 86, 109, 2, 0, 80, 67, 74, 77, 82, 86,
+ 90, 94, 94, 109, 79, 97, 107, 95, 101, 64, 35,
+ 25, 16, 6, 5, 65, 67, 71, 70, 8, 48, 31, 24,
+ 17, 35, 13, 8, 0, 0, 65, 42, 26, 15, 7, 17, 5,
+ 64, 67, 71, 72, 24, 9, 4, 65, 7, 66, 72, 74,
+ 3, 42, 24, 15, 8, 22, 7, 4, 3, 65, 62, 79, 70,
+ 2, 66, 68, 1, 8, 9, 0, 8, 14, 18, 76, 71, 74,
+ 66, 17, 69, 71, 0, 7, 17, 81, 72, 10, 13, 84,
+ 84, 84, 77, 72, 3, 72, 67, 69, 64, 5, 75, 70,
+ 69, 1, 80, 73, 80, 83, 0, 99, 76, 1, 70, 13,
+ 70, 65, 22, 68, 71, 65, 18, 79, 75, 97, 24,
+ 31, 32, 22, 10, 23, 19, 14, 13, 11, 10, 6, 75,
+ 78, 85, 71, 68, 78, 96, 78, 77, 81, 79, 78,
+ 72, 73, 87, 89, 82, 17, 16, 17, 10, 67, 5, 2,
+ 75, 67, 73, 74, 87, 87, 86, 87, 1, 5, 79, 1,
+ 0, 73, 79, 80, 81, 83, 83, 88, 98, 96, 102,
+ 104, 108, 90, 100, 106, 81, 67, 73, 3, 5, 6,
+ 51, 16, 15, 68, 25, 61, 47, 62, 14, 25, 18,
+ 75, 96, 102, 107, 126, 126, 124, 68, 43, 27,
+ 18, 9, 13, 2, 66, 68, 71, 74, 67, 7, 0, 16,
+ 13, 5, 9, 13, 16, 17, 9, 20, 17, 0, 26, 20, 8,
+ 1, 68, 74, 98, 101, 98 },
+
+ {
+
+ 32,
+ 5, 80, 32, 5, 80, 1, 13, 24, 12, 67, 86, 8,
+ 15, 52, 14, 31, 68, 16, 24, 64, 76, 0, 89,
+ 105, 0, 67, 126, 126, 124, 43, 6, 69, 16, 24,
+ 64, 72, 8, 16, 65, 1, 67, 70, 68, 83, 74, 92,
+ 5, 69, 70, 65, 78, 74, 88, 13, 1, 64, 68, 8,
+ 3, 22, 0, 0, 0, 1, 95, 97, 8, 71, 68, 52, 6,
+ 69, 87, 71, 82, 6, 5, 64, 8, 71, 10, 64, 65,
+ 6, 83, 87, 85, 89, 28, 0, 14, 73, 0, 74, 66,
+ 6, 71, 83, 74, 80, 17, 70, 4, 74, 76, 2, 70,
+ 4, 10, 5, 7, 18, 15, 79, 67, 2, 66, 2, 73, 11,
+ 68, 5, 19, 0, 16, 17, 8, 88, 10, 66, 83, 70,
+ 94, 77, 65, 70, 3, 66, 64, 16, 1, 66, 7, 13,
+ 72, 72, 1, 100, 83, 65, 95, 65, 76, 14, 64,
+ 80, 25, 1, 65, 85, 26, 77, 91, 1, 99, 8, 20,
+ 20, 15, 20, 14, 3, 18, 21, 79, 64, 9, 3, 1,
+ 77, 65, 70, 73, 67, 74, 82, 68, 72, 103, 83,
+ 78, 90, 85, 89, 22, 30, 20, 19, 19, 10, 5, 10,
+ 64, 70, 65, 77, 88, 88, 111, 0, 65, 82, 69,
+ 76, 79, 84, 88, 92, 97, 96, 112, 80, 99, 108,
+ 95, 101, 64, 35, 25, 16, 6, 5, 65, 67, 71, 70,
+ 8, 48, 31, 24, 17, 36, 14, 9, 0, 1, 64, 43,
+ 27, 15, 8, 18, 6, 0, 67, 70, 71, 26, 10, 4,
+ 64, 8, 65, 71, 73, 4, 42, 25, 15, 8, 23, 8, 5,
+ 4, 64, 62, 79, 70, 3, 66, 68, 1, 8, 9, 0, 8,
+ 15, 18, 76, 71, 74, 67, 18, 70, 72, 64, 7, 17,
+ 82, 73, 10, 13, 85, 85, 84, 76, 71, 4, 71, 66,
+ 68, 64, 6, 75, 70, 69, 1, 81, 73, 80, 82, 1,
+ 99, 76, 1, 70, 14, 70, 65, 23, 68, 71, 65, 19,
+ 80, 75, 97, 23, 31, 31, 22, 9, 22, 18, 13, 13,
+ 10, 9, 5, 76, 79, 86, 72, 69, 79, 98, 79, 78,
+ 82, 80, 79, 72, 72, 87, 89, 81, 15, 14, 15, 8,
+ 69, 3, 0, 77, 69, 75, 76, 89, 89, 88, 88, 64,
+ 3, 80, 64, 65, 75, 81, 82, 83, 85, 85, 90,
+ 101, 98, 104, 106, 109, 92, 102, 107, 80, 65,
+ 72, 4, 6, 7, 53, 17, 16, 68, 26, 62, 49, 62,
+ 15, 23, 16, 77, 99, 104, 110, 126, 126, 125,
+ 68, 43, 27, 18, 9, 13, 2, 66, 68, 71, 73, 66,
+ 8, 1, 17, 14, 5, 10, 14, 17, 18, 9, 20, 17, 0,
+ 25, 19, 7, 0, 70, 76, 100, 103, 99 },
+
+ {
+
+ 31,
+ 5, 81, 31, 5, 81, 3, 14, 25, 12, 67, 87, 7,
+ 14, 52, 14, 33, 68, 16, 25, 64, 77, 0, 90,
+ 106, 64, 68, 126, 126, 125, 46, 7, 69, 16, 25,
+ 64, 71, 9, 17, 66, 2, 66, 69, 69, 83, 74, 92,
+ 5, 68, 70, 65, 78, 73, 88, 13, 1, 64, 68, 8,
+ 3, 22, 0, 0, 0, 2, 95, 97, 8, 71, 69, 52, 6,
+ 69, 86, 71, 82, 6, 6, 1, 10, 70, 11, 1, 64, 8,
+ 82, 86, 84, 88, 29, 0, 15, 73, 1, 74, 66, 8,
+ 71, 82, 74, 79, 17, 70, 4, 72, 76, 3, 69, 4,
+ 10, 5, 8, 18, 15, 80, 67, 2, 66, 2, 73, 11,
+ 68, 5, 19, 64, 16, 17, 8, 90, 10, 66, 85, 71,
+ 95, 77, 64, 70, 3, 65, 64, 17, 1, 66, 7, 15,
+ 72, 72, 1, 101, 83, 64, 95, 65, 76, 15, 0, 81,
+ 25, 1, 65, 86, 27, 78, 92, 1, 100, 6, 19, 19,
+ 15, 19, 14, 3, 17, 20, 80, 65, 9, 3, 1, 77,
+ 66, 71, 74, 68, 74, 83, 69, 72, 105, 84, 78,
+ 91, 86, 89, 21, 28, 19, 17, 17, 8, 3, 8, 67,
+ 73, 67, 80, 91, 91, 114, 65, 67, 83, 71, 79,
+ 82, 87, 90, 95, 99, 99, 114, 81, 101, 109, 96,
+ 101, 0, 36, 25, 17, 6, 5, 65, 67, 71, 69, 8,
+ 48, 32, 24, 17, 37, 14, 9, 1, 1, 0, 44, 27,
+ 16, 8, 19, 6, 0, 66, 69, 70, 27, 11, 5, 64, 8,
+ 65, 70, 71, 4, 42, 25, 15, 9, 23, 8, 6, 4, 0,
+ 62, 78, 69, 3, 66, 68, 1, 8, 9, 0, 9, 15, 19,
+ 76, 71, 75, 67, 18, 71, 73, 65, 6, 17, 83, 74,
+ 10, 13, 85, 86, 85, 75, 70, 5, 71, 66, 68, 0,
+ 6, 75, 69, 68, 2, 81, 73, 80, 82, 2, 99, 76,
+ 1, 70, 15, 70, 65, 24, 69, 71, 65, 19, 81, 75,
+ 98, 22, 30, 31, 21, 8, 22, 18, 12, 12, 10, 8,
+ 4, 77, 80, 87, 74, 70, 81, 100, 81, 80, 83,
+ 80, 79, 72, 72, 86, 90, 80, 13, 12, 13, 6, 72,
+ 1, 65, 80, 71, 78, 78, 92, 91, 89, 88, 65, 2,
+ 82, 66, 68, 77, 83, 84, 85, 87, 88, 92, 103,
+ 100, 106, 107, 111, 94, 104, 109, 79, 64, 71,
+ 5, 7, 8, 54, 18, 17, 68, 27, 62, 50, 62, 16,
+ 22, 15, 80, 102, 107, 112, 126, 126, 126, 67,
+ 43, 27, 18, 9, 13, 2, 66, 68, 70, 72, 66, 9,
+ 2, 18, 14, 6, 11, 14, 17, 18, 9, 21, 17, 0,
+ 24, 17, 5, 65, 71, 77, 103, 104, 100 },
+
+ {
+
+ 30,
+ 5, 81, 30, 5, 81, 5, 16, 25, 12, 68, 89, 5,
+ 12, 52, 14, 36, 69, 17, 25, 64, 78, 0, 91,
+ 107, 64, 70, 126, 126, 125, 49, 8, 69, 17, 25,
+ 64, 71, 10, 17, 66, 2, 66, 69, 69, 84, 73, 92,
+ 5, 68, 69, 66, 78, 73, 88, 14, 2, 64, 67, 9,
+ 3, 22, 0, 0, 0, 2, 95, 97, 9, 72, 69, 52, 6,
+ 69, 85, 70, 82, 7, 8, 2, 12, 68, 12, 2, 1, 10,
+ 81, 85, 83, 88, 30, 0, 16, 72, 1, 73, 65, 10,
+ 71, 82, 73, 78, 17, 69, 5, 71, 75, 3, 69, 5,
+ 10, 5, 9, 19, 16, 80, 68, 2, 66, 2, 73, 11,
+ 69, 4, 19, 64, 16, 17, 7, 92, 10, 67, 87, 72,
+ 96, 76, 0, 69, 4, 64, 0, 18, 2, 65, 8, 16, 72,
+ 72, 2, 102, 82, 0, 95, 65, 76, 15, 0, 82, 26,
+ 1, 65, 88, 28, 78, 93, 1, 101, 5, 18, 19, 14,
+ 18, 13, 2, 16, 20, 81, 66, 9, 3, 1, 78, 67,
+ 71, 75, 68, 75, 84, 70, 73, 107, 85, 79, 92,
+ 87, 89, 19, 27, 17, 15, 15, 6, 0, 6, 69, 75,
+ 69, 82, 93, 93, 116, 67, 68, 85, 74, 81, 84,
+ 89, 93, 97, 102, 101, 117, 82, 102, 111, 97,
+ 101, 0, 36, 26, 17, 6, 5, 65, 67, 71, 69, 9,
+ 49, 32, 24, 17, 38, 15, 10, 1, 2, 1, 45, 28,
+ 16, 8, 20, 7, 1, 66, 68, 70, 28, 11, 5, 64, 9,
+ 64, 70, 70, 5, 43, 25, 15, 9, 24, 9, 7, 5, 1,
+ 62, 78, 69, 4, 66, 68, 1, 8, 9, 1, 9, 15, 19,
+ 76, 71, 75, 68, 19, 72, 74, 66, 5, 17, 84, 75,
+ 9, 13, 86, 87, 85, 74, 70, 6, 70, 65, 67, 0,
+ 7, 75, 69, 68, 2, 82, 73, 80, 81, 3, 99, 76,
+ 2, 70, 15, 70, 65, 25, 69, 71, 65, 20, 81, 76,
+ 99, 21, 30, 30, 20, 7, 21, 17, 11, 11, 9, 7,
+ 3, 78, 81, 88, 75, 72, 82, 102, 82, 81, 84,
+ 81, 80, 72, 72, 86, 90, 79, 11, 10, 11, 3, 75,
+ 64, 67, 82, 73, 80, 80, 94, 93, 91, 89, 67, 0,
+ 83, 68, 70, 79, 85, 86, 87, 89, 90, 94, 106,
+ 102, 108, 109, 112, 96, 106, 110, 78, 0, 70,
+ 5, 8, 9, 56, 19, 18, 68, 28, 62, 52, 62, 16,
+ 20, 13, 82, 105, 110, 115, 126, 126, 126, 67,
+ 43, 27, 18, 9, 13, 2, 66, 68, 70, 71, 65, 10,
+ 3, 19, 15, 6, 12, 15, 18, 19, 10, 21, 18, 0,
+ 22, 16, 3, 67, 73, 79, 105, 106, 101 },
+
+ {
+
+ 28,
+ 4, 81, 28, 4, 81, 6, 17, 25, 12, 68, 90, 4,
+ 11, 52, 14, 38, 69, 18, 26, 64, 79, 0, 92,
+ 109, 65, 72, 126, 126, 126, 51, 9, 69, 18, 26,
+ 64, 71, 11, 17, 67, 2, 66, 68, 70, 84, 73, 93,
+ 5, 68, 69, 66, 79, 73, 88, 14, 2, 64, 67, 9,
+ 3, 22, 0, 0, 0, 3, 96, 97, 9, 73, 69, 52, 6,
+ 69, 84, 69, 82, 7, 9, 4, 14, 67, 13, 4, 2, 11,
+ 80, 85, 82, 87, 30, 0, 17, 71, 1, 73, 65, 11,
+ 71, 82, 73, 77, 17, 69, 5, 70, 75, 4, 69, 5,
+ 10, 5, 9, 19, 16, 81, 68, 2, 66, 2, 73, 11,
+ 69, 3, 19, 65, 16, 17, 7, 94, 10, 67, 89, 73,
+ 97, 75, 1, 68, 5, 64, 0, 19, 2, 65, 9, 17, 72,
+ 72, 2, 104, 82, 1, 95, 65, 77, 16, 0, 83, 27,
+ 1, 65, 89, 29, 79, 94, 1, 102, 3, 17, 18, 13,
+ 17, 12, 1, 15, 19, 83, 67, 8, 3, 1, 78, 68,
+ 72, 76, 69, 76, 85, 71, 74, 109, 87, 80, 93,
+ 88, 89, 17, 25, 15, 13, 13, 4, 65, 4, 72, 78,
+ 71, 85, 96, 96, 119, 69, 70, 87, 76, 83, 87,
+ 92, 95, 100, 105, 103, 119, 83, 104, 112, 98,
+ 102, 0, 36, 26, 17, 6, 5, 65, 67, 71, 68, 9,
+ 49, 32, 24, 17, 39, 15, 10, 1, 2, 1, 46, 28,
+ 16, 8, 20, 7, 1, 65, 67, 69, 29, 12, 5, 64,
+ 10, 0, 69, 69, 5, 43, 25, 15, 9, 25, 9, 7, 6,
+ 1, 62, 78, 69, 4, 66, 68, 1, 8, 9, 1, 9, 15,
+ 20, 76, 71, 75, 69, 19, 73, 75, 67, 4, 17, 86,
+ 77, 9, 13, 87, 88, 86, 74, 69, 7, 70, 65, 67,
+ 1, 8, 76, 69, 68, 2, 83, 73, 80, 81, 3, 100,
+ 76, 2, 70, 16, 70, 65, 25, 69, 72, 65, 21, 82,
+ 76, 100, 20, 29, 30, 19, 5, 20, 16, 10, 10, 8,
+ 6, 2, 79, 82, 89, 77, 73, 84, 104, 84, 82, 85,
+ 82, 80, 72, 72, 86, 91, 79, 8, 7, 9, 1, 78,
+ 67, 70, 85, 75, 82, 82, 97, 95, 92, 90, 69,
+ 65, 85, 70, 72, 82, 88, 89, 90, 91, 92, 97,
+ 108, 104, 111, 111, 113, 98, 108, 112, 77, 1,
+ 69, 6, 9, 9, 57, 20, 18, 68, 28, 62, 53, 62,
+ 17, 19, 11, 85, 108, 113, 118, 126, 126, 126,
+ 67, 43, 27, 18, 9, 13, 2, 66, 68, 69, 71, 64,
+ 11, 3, 20, 15, 7, 12, 15, 18, 19, 10, 22, 18,
+ 64, 21, 14, 1, 69, 75, 81, 107, 108, 102 },
+
+ {
+
+ 27,
+ 4, 81, 27, 4, 81, 8, 18, 26, 12, 68, 91, 3,
+ 10, 52, 14, 40, 69, 19, 27, 64, 79, 1, 93,
+ 110, 66, 74, 126, 126, 126, 54, 11, 69, 19,
+ 27, 64, 70, 12, 18, 68, 2, 65, 67, 70, 84, 73,
+ 93, 5, 68, 68, 66, 79, 73, 88, 14, 2, 0, 67,
+ 9, 3, 22, 0, 0, 0, 4, 96, 97, 9, 74, 69, 52,
+ 6, 69, 83, 68, 82, 7, 10, 6, 16, 65, 15, 6, 3,
+ 13, 79, 84, 81, 86, 31, 1, 18, 70, 1, 72, 64,
+ 13, 71, 81, 73, 75, 18, 69, 5, 68, 75, 5, 69,
+ 5, 11, 6, 10, 19, 16, 81, 68, 3, 66, 2, 72,
+ 11, 69, 3, 19, 66, 16, 17, 7, 96, 10, 67, 90,
+ 74, 97, 74, 2, 67, 6, 0, 1, 20, 3, 65, 10, 18,
+ 72, 72, 2, 105, 81, 2, 95, 64, 77, 17, 0, 84,
+ 28, 1, 65, 90, 30, 80, 95, 1, 102, 2, 16, 18,
+ 12, 16, 11, 1, 15, 19, 84, 68, 8, 3, 1, 78,
+ 68, 73, 77, 70, 77, 86, 71, 74, 110, 88, 80,
+ 94, 89, 89, 16, 24, 14, 12, 12, 2, 67, 2, 74,
+ 80, 73, 87, 99, 98, 122, 70, 72, 88, 78, 85,
+ 89, 94, 97, 102, 107, 105, 121, 83, 106, 113,
+ 98, 102, 0, 37, 26, 17, 7, 5, 65, 66, 71, 67,
+ 9, 49, 32, 25, 17, 40, 16, 11, 2, 3, 2, 47,
+ 29, 17, 9, 21, 8, 1, 64, 66, 68, 31, 13, 6, 0,
+ 11, 1, 68, 68, 6, 43, 26, 16, 9, 26, 10, 8, 7,
+ 2, 62, 77, 68, 5, 66, 68, 1, 9, 10, 1, 9, 16,
+ 21, 76, 71, 75, 69, 19, 74, 75, 68, 4, 17, 87,
+ 78, 9, 13, 87, 89, 87, 73, 68, 8, 70, 64, 67,
+ 2, 9, 76, 69, 68, 3, 83, 73, 80, 81, 4, 100,
+ 76, 2, 70, 17, 70, 65, 26, 69, 72, 65, 22, 83,
+ 76, 100, 19, 29, 30, 19, 4, 19, 15, 9, 10, 7,
+ 5, 2, 79, 83, 90, 78, 74, 86, 106, 85, 83, 85,
+ 82, 80, 71, 71, 86, 92, 78, 6, 5, 7, 64, 80,
+ 69, 72, 87, 77, 84, 84, 99, 97, 93, 91, 71,
+ 66, 86, 72, 74, 84, 90, 91, 92, 93, 94, 99,
+ 110, 105, 113, 113, 114, 100, 110, 114, 76, 3,
+ 67, 7, 10, 10, 59, 21, 19, 68, 29, 62, 54, 62,
+ 18, 18, 10, 87, 111, 115, 121, 126, 126, 126,
+ 67, 43, 27, 18, 9, 14, 2, 66, 68, 68, 70, 0,
+ 12, 4, 22, 16, 8, 13, 16, 19, 20, 10, 23, 18,
+ 64, 20, 13, 0, 70, 77, 83, 109, 110, 102 },
+
+ {
+
+ 26,
+ 4, 81, 26, 4, 81, 10, 20, 26, 12, 69, 93, 1,
+ 8, 52, 14, 43, 70, 20, 27, 64, 80, 1, 94, 111,
+ 66, 76, 126, 126, 126, 57, 12, 69, 20, 27, 64,
+ 70, 13, 18, 68, 2, 65, 67, 70, 85, 72, 93, 5,
+ 68, 67, 67, 79, 73, 88, 15, 3, 0, 66, 10, 3,
+ 22, 0, 0, 0, 4, 96, 97, 10, 75, 69, 52, 6, 69,
+ 82, 67, 82, 8, 12, 7, 18, 64, 16, 7, 5, 15,
+ 78, 83, 80, 86, 32, 1, 19, 69, 1, 72, 0, 15,
+ 71, 81, 72, 74, 18, 68, 6, 67, 74, 5, 69, 6,
+ 11, 6, 11, 20, 17, 82, 69, 3, 66, 2, 72, 11,
+ 70, 2, 19, 66, 16, 17, 6, 98, 10, 68, 92, 75,
+ 98, 73, 3, 66, 7, 1, 2, 21, 4, 64, 11, 19, 72,
+ 72, 3, 106, 81, 3, 95, 64, 77, 17, 0, 85, 29,
+ 1, 65, 92, 31, 80, 96, 1, 103, 0, 15, 17, 11,
+ 15, 10, 0, 14, 19, 85, 69, 8, 3, 1, 79, 69,
+ 73, 78, 70, 78, 87, 72, 75, 112, 89, 81, 95,
+ 90, 89, 14, 23, 12, 10, 10, 0, 70, 0, 77, 82,
+ 75, 90, 101, 100, 124, 72, 73, 90, 81, 87, 91,
+ 96, 100, 104, 110, 107, 124, 84, 107, 115, 99,
+ 102, 0, 37, 27, 17, 7, 5, 65, 66, 71, 67, 10,
+ 50, 32, 25, 17, 41, 16, 12, 2, 4, 3, 48, 30,
+ 17, 9, 22, 8, 2, 64, 65, 68, 32, 13, 6, 0, 12,
+ 2, 68, 67, 6, 44, 26, 16, 9, 27, 11, 9, 8, 3,
+ 62, 77, 68, 5, 66, 68, 1, 9, 10, 2, 9, 16, 21,
+ 76, 71, 75, 70, 20, 75, 76, 69, 3, 17, 88, 79,
+ 8, 13, 88, 90, 87, 72, 68, 9, 69, 64, 66, 2,
+ 10, 76, 69, 68, 3, 84, 73, 80, 80, 5, 100, 76,
+ 3, 70, 17, 70, 65, 27, 69, 72, 65, 23, 83, 77,
+ 101, 18, 29, 29, 18, 3, 18, 14, 8, 9, 6, 4, 1,
+ 80, 84, 91, 80, 76, 87, 108, 86, 84, 86, 83,
+ 81, 71, 71, 86, 92, 77, 4, 3, 5, 67, 83, 71,
+ 74, 90, 79, 86, 86, 101, 99, 95, 92, 73, 68,
+ 87, 74, 76, 86, 92, 93, 94, 95, 96, 101, 113,
+ 107, 115, 115, 115, 102, 112, 115, 75, 4, 66,
+ 7, 11, 11, 61, 22, 20, 68, 30, 62, 56, 62, 18,
+ 16, 8, 90, 114, 118, 124, 126, 126, 126, 67,
+ 43, 27, 18, 9, 14, 2, 66, 68, 68, 69, 1, 13,
+ 5, 23, 17, 8, 14, 17, 20, 21, 11, 23, 19, 64,
+ 18, 11, 65, 72, 79, 85, 111, 112, 103 },
+
+ {
+
+ 25,
+ 4, 82, 25, 4, 82, 12, 21, 27, 12, 69, 94, 0,
+ 7, 52, 14, 45, 70, 20, 28, 64, 81, 1, 95, 112,
+ 67, 77, 126, 126, 126, 60, 13, 69, 20, 28, 64,
+ 69, 14, 19, 69, 3, 64, 66, 71, 85, 72, 93, 5,
+ 67, 67, 67, 79, 72, 88, 15, 3, 0, 66, 10, 3,
+ 22, 0, 0, 0, 5, 96, 97, 10, 75, 70, 52, 6, 69,
+ 81, 67, 82, 8, 13, 9, 20, 1, 17, 9, 6, 17, 77,
+ 82, 79, 85, 33, 1, 20, 69, 2, 71, 0, 17, 71,
+ 80, 72, 73, 18, 68, 6, 65, 74, 6, 68, 6, 11,
+ 6, 12, 20, 17, 82, 69, 3, 66, 2, 72, 11, 70,
+ 2, 19, 67, 16, 17, 6, 100, 10, 68, 94, 76, 99,
+ 73, 4, 66, 7, 2, 2, 22, 4, 64, 11, 21, 72, 72,
+ 3, 107, 80, 4, 95, 64, 77, 18, 1, 86, 29, 1,
+ 65, 93, 32, 81, 97, 1, 104, 64, 14, 17, 11,
+ 14, 10, 0, 13, 18, 86, 70, 8, 3, 1, 79, 70,
+ 74, 79, 71, 78, 88, 73, 75, 114, 90, 81, 96,
+ 91, 89, 13, 21, 11, 8, 8, 65, 72, 65, 79, 85,
+ 77, 92, 104, 103, 126, 74, 75, 91, 83, 90, 94,
+ 99, 102, 107, 112, 110, 126, 85, 109, 116,
+ 100, 102, 1, 38, 27, 18, 7, 5, 65, 66, 71, 66,
+ 10, 50, 33, 25, 17, 42, 17, 12, 3, 4, 4, 49,
+ 30, 18, 9, 23, 9, 2, 0, 64, 67, 33, 14, 7, 0,
+ 12, 2, 67, 65, 7, 44, 26, 16, 10, 27, 11, 10,
+ 8, 4, 62, 76, 67, 6, 66, 68, 1, 9, 10, 2, 10,
+ 16, 22, 76, 71, 76, 70, 20, 76, 77, 70, 2, 17,
+ 89, 80, 8, 13, 88, 91, 88, 71, 67, 10, 69, 0,
+ 66, 3, 10, 76, 68, 67, 4, 84, 73, 80, 80, 6,
+ 100, 76, 3, 70, 18, 70, 65, 28, 70, 72, 65,
+ 23, 84, 77, 102, 17, 28, 29, 17, 2, 18, 14, 7,
+ 8, 6, 3, 0, 81, 85, 92, 81, 77, 89, 110, 88,
+ 86, 87, 83, 81, 71, 71, 85, 93, 76, 2, 1, 3,
+ 69, 86, 73, 76, 92, 81, 89, 88, 104, 101, 96,
+ 92, 74, 69, 89, 76, 79, 88, 94, 95, 96, 97,
+ 99, 103, 115, 109, 117, 116, 117, 104, 114,
+ 117, 74, 5, 65, 8, 12, 12, 62, 23, 21, 68, 31,
+ 62, 57, 62, 19, 15, 7, 92, 117, 121, 126, 126,
+ 126, 126, 66, 43, 27, 18, 9, 14, 2, 66, 68,
+ 67, 68, 1, 14, 6, 24, 17, 9, 15, 17, 20, 21,
+ 11, 24, 19, 64, 17, 10, 67, 74, 80, 86, 114,
+ 113, 104 },
+
+ {
+
+ 23,
+ 4, 82, 23, 4, 82, 13, 23, 27, 12, 70, 96, 65,
+ 6, 52, 14, 47, 70, 21, 29, 64, 82, 1, 96, 113,
+ 67, 79, 126, 126, 126, 62, 14, 69, 21, 29, 64,
+ 69, 15, 19, 69, 3, 64, 65, 71, 86, 72, 93, 5,
+ 67, 66, 67, 80, 72, 88, 16, 3, 0, 66, 11, 3,
+ 22, 0, 0, 0, 5, 97, 97, 11, 76, 70, 52, 6, 69,
+ 80, 66, 82, 9, 14, 11, 22, 2, 18, 11, 7, 19,
+ 76, 82, 78, 84, 33, 1, 21, 68, 2, 71, 1, 19,
+ 71, 80, 71, 72, 18, 68, 7, 64, 73, 7, 68, 6,
+ 11, 6, 12, 21, 18, 83, 69, 3, 66, 2, 72, 11,
+ 71, 1, 19, 68, 16, 17, 6, 102, 10, 69, 96, 77,
+ 100, 72, 5, 65, 8, 2, 3, 23, 5, 0, 12, 22, 72,
+ 72, 3, 108, 80, 5, 95, 64, 77, 19, 1, 87, 30,
+ 1, 65, 94, 33, 81, 98, 1, 105, 66, 13, 16, 10,
+ 13, 9, 64, 12, 18, 88, 71, 7, 3, 1, 80, 71,
+ 75, 80, 72, 79, 89, 74, 76, 116, 91, 82, 97,
+ 92, 89, 11, 20, 9, 6, 6, 67, 74, 67, 82, 87,
+ 79, 95, 106, 105, 126, 76, 77, 93, 85, 92, 96,
+ 101, 104, 109, 115, 112, 126, 86, 111, 117,
+ 101, 102, 1, 38, 27, 18, 7, 5, 65, 66, 71, 66,
+ 10, 50, 33, 25, 17, 43, 17, 13, 3, 5, 4, 50,
+ 31, 18, 9, 23, 9, 3, 0, 0, 66, 34, 15, 7, 0,
+ 13, 3, 66, 64, 7, 44, 26, 16, 10, 28, 12, 10,
+ 9, 5, 62, 76, 67, 6, 66, 68, 1, 9, 10, 2, 10,
+ 16, 22, 76, 71, 76, 71, 21, 77, 78, 71, 1, 17,
+ 91, 81, 8, 13, 89, 92, 88, 71, 66, 11, 68, 0,
+ 65, 3, 11, 77, 68, 67, 4, 85, 73, 80, 79, 7,
+ 100, 76, 3, 70, 19, 70, 65, 29, 70, 73, 65,
+ 24, 85, 77, 103, 16, 28, 28, 16, 1, 17, 13, 6,
+ 7, 5, 2, 64, 82, 86, 93, 83, 78, 90, 112, 89,
+ 87, 88, 84, 82, 71, 71, 85, 93, 76, 64, 65, 1,
+ 71, 89, 75, 78, 95, 83, 91, 90, 106, 103, 98,
+ 93, 76, 71, 90, 78, 81, 90, 96, 98, 98, 99,
+ 101, 105, 118, 111, 119, 118, 118, 106, 116,
+ 118, 73, 6, 64, 9, 13, 13, 62, 24, 22, 68, 32,
+ 62, 59, 62, 20, 13, 5, 95, 120, 124, 126, 126,
+ 126, 126, 66, 43, 27, 18, 9, 14, 2, 66, 68,
+ 67, 67, 2, 15, 6, 25, 18, 9, 16, 18, 21, 22,
+ 11, 24, 19, 64, 16, 8, 69, 76, 82, 88, 116,
+ 115, 105 },
+
+ {
+
+ 22,
+ 4, 82, 22, 4, 82, 15, 24, 27, 12, 70, 97, 66,
+ 4, 52, 14, 50, 71, 22, 29, 64, 82, 2, 97, 114,
+ 68, 81, 126, 126, 126, 62, 16, 69, 22, 29, 64,
+ 69, 16, 19, 70, 3, 64, 65, 71, 86, 71, 93, 5,
+ 67, 65, 68, 80, 72, 88, 16, 4, 1, 65, 11, 3,
+ 22, 0, 0, 0, 6, 97, 97, 11, 77, 70, 52, 6, 69,
+ 79, 65, 82, 9, 16, 12, 24, 4, 19, 12, 9, 21,
+ 75, 81, 77, 84, 34, 1, 22, 67, 2, 70, 2, 21,
+ 71, 80, 71, 70, 19, 67, 7, 0, 73, 7, 68, 7,
+ 12, 7, 13, 21, 18, 83, 70, 3, 66, 2, 72, 11,
+ 71, 0, 19, 68, 16, 17, 5, 104, 10, 69, 97, 78,
+ 101, 71, 6, 64, 9, 3, 4, 24, 6, 0, 13, 23, 72,
+ 72, 4, 109, 79, 6, 95, 0, 77, 19, 1, 88, 31,
+ 1, 65, 96, 34, 82, 99, 1, 105, 67, 12, 16, 9,
+ 12, 8, 65, 12, 18, 89, 72, 7, 3, 1, 80, 71,
+ 75, 81, 72, 80, 90, 74, 77, 118, 92, 83, 98,
+ 93, 89, 9, 19, 7, 4, 5, 69, 77, 69, 84, 89,
+ 81, 97, 109, 107, 126, 78, 78, 95, 88, 94, 98,
+ 103, 107, 111, 118, 114, 126, 87, 112, 119,
+ 101, 102, 1, 38, 28, 18, 7, 5, 65, 66, 71, 65,
+ 11, 51, 33, 25, 17, 44, 18, 14, 3, 6, 5, 51,
+ 32, 18, 10, 24, 10, 3, 1, 1, 66, 36, 15, 7, 1,
+ 14, 4, 66, 0, 8, 45, 27, 16, 10, 29, 13, 11,
+ 10, 6, 62, 76, 67, 7, 66, 68, 1, 9, 10, 3, 10,
+ 17, 23, 76, 71, 76, 72, 21, 78, 79, 72, 1, 17,
+ 92, 82, 7, 13, 90, 93, 89, 70, 66, 12, 68, 1,
+ 65, 4, 12, 77, 68, 67, 4, 86, 73, 80, 79, 8,
+ 100, 76, 4, 70, 19, 70, 65, 30, 70, 73, 65,
+ 25, 85, 78, 103, 15, 28, 28, 16, 0, 16, 12, 5,
+ 7, 4, 1, 65, 83, 87, 94, 84, 80, 92, 114, 90,
+ 88, 89, 85, 82, 71, 70, 85, 94, 75, 66, 67,
+ 64, 74, 91, 77, 80, 97, 85, 93, 92, 108, 105,
+ 99, 94, 78, 73, 91, 80, 83, 92, 98, 100, 100,
+ 101, 103, 107, 120, 113, 121, 120, 119, 108,
+ 118, 120, 72, 8, 0, 9, 14, 14, 62, 25, 23, 68,
+ 33, 62, 60, 62, 20, 12, 3, 97, 123, 126, 126,
+ 126, 126, 126, 66, 43, 27, 18, 9, 14, 2, 66,
+ 68, 66, 66, 3, 16, 7, 26, 19, 10, 17, 19, 22,
+ 23, 12, 25, 20, 64, 14, 7, 70, 77, 84, 90,
+ 118, 117, 106 },
+
+ {
+
+ 21,
+ 4, 82, 21, 4, 82, 17, 26, 28, 12, 71, 99, 68,
+ 3, 52, 14, 52, 71, 23, 30, 64, 83, 2, 98, 115,
+ 68, 83, 126, 126, 126, 62, 17, 69, 23, 30, 64,
+ 68, 17, 20, 70, 3, 0, 64, 72, 87, 71, 93, 5,
+ 67, 65, 68, 80, 72, 88, 17, 4, 1, 65, 12, 3,
+ 22, 0, 0, 0, 6, 97, 97, 12, 78, 70, 52, 6, 69,
+ 78, 64, 82, 10, 17, 14, 26, 5, 20, 14, 10, 23,
+ 74, 80, 76, 83, 35, 1, 23, 66, 2, 70, 2, 23,
+ 71, 79, 70, 69, 19, 67, 8, 2, 72, 8, 68, 7,
+ 12, 7, 14, 22, 19, 84, 70, 3, 66, 2, 72, 11,
+ 72, 0, 19, 69, 16, 17, 5, 106, 10, 70, 99, 79,
+ 102, 70, 7, 0, 10, 4, 4, 25, 6, 1, 14, 24, 72,
+ 72, 4, 110, 79, 7, 95, 0, 77, 20, 1, 89, 32,
+ 1, 65, 97, 35, 82, 100, 1, 106, 69, 11, 15, 8,
+ 11, 7, 65, 11, 17, 90, 73, 7, 3, 1, 81, 72,
+ 76, 82, 73, 81, 91, 75, 77, 120, 93, 83, 99,
+ 94, 89, 8, 17, 6, 2, 3, 71, 79, 71, 87, 92,
+ 83, 100, 111, 110, 126, 80, 80, 96, 90, 96,
+ 101, 106, 109, 114, 120, 116, 126, 88, 114,
+ 120, 102, 102, 1, 39, 28, 18, 7, 5, 65, 66,
+ 71, 65, 11, 51, 33, 25, 17, 45, 18, 14, 4, 6,
+ 6, 52, 32, 19, 10, 25, 10, 4, 1, 2, 65, 37,
+ 16, 8, 1, 15, 5, 65, 1, 8, 45, 27, 16, 10, 30,
+ 13, 12, 11, 7, 62, 75, 66, 7, 66, 68, 1, 9,
+ 10, 3, 10, 17, 23, 76, 71, 76, 72, 22, 79, 80,
+ 73, 0, 17, 93, 83, 7, 13, 90, 94, 89, 69, 65,
+ 13, 67, 1, 64, 4, 13, 77, 68, 67, 5, 86, 73,
+ 80, 78, 9, 100, 76, 4, 70, 20, 70, 65, 31, 70,
+ 73, 65, 26, 86, 78, 104, 14, 27, 27, 15, 64,
+ 15, 11, 4, 6, 3, 0, 66, 84, 88, 95, 86, 81,
+ 93, 116, 92, 89, 90, 85, 83, 71, 70, 85, 94,
+ 74, 68, 69, 66, 76, 94, 79, 82, 100, 87, 95,
+ 94, 111, 107, 101, 95, 80, 74, 93, 82, 85, 94,
+ 100, 102, 102, 103, 105, 109, 123, 115, 123,
+ 122, 120, 110, 120, 121, 71, 9, 1, 10, 15, 15,
+ 62, 26, 24, 68, 34, 62, 62, 62, 21, 10, 2,
+ 100, 126, 126, 126, 126, 126, 126, 66, 43, 27,
+ 18, 9, 14, 2, 66, 68, 66, 65, 4, 17, 8, 27,
+ 19, 10, 18, 19, 22, 23, 12, 25, 20, 64, 13, 5,
+ 72, 79, 86, 92, 120, 119, 107 },
+
+ {
+
+ 20,
+ 4, 82, 20, 4, 82, 19, 27, 28, 12, 71, 100, 69,
+ 2, 52, 14, 54, 71, 24, 31, 64, 84, 2, 99, 116,
+ 69, 85, 126, 126, 126, 62, 18, 69, 24, 31, 64,
+ 68, 18, 20, 71, 3, 0, 0, 72, 87, 71, 93, 5,
+ 67, 64, 68, 80, 72, 88, 17, 4, 1, 65, 12, 3,
+ 22, 0, 0, 0, 7, 97, 97, 12, 79, 70, 52, 6, 69,
+ 77, 0, 82, 10, 18, 16, 28, 7, 21, 16, 11, 25,
+ 73, 79, 75, 82, 36, 1, 24, 65, 2, 69, 3, 25,
+ 71, 79, 70, 68, 19, 67, 8, 3, 72, 9, 68, 7,
+ 12, 7, 15, 22, 19, 84, 70, 3, 66, 2, 72, 11,
+ 72, 64, 19, 70, 16, 17, 5, 108, 10, 70, 101,
+ 80, 103, 69, 8, 1, 11, 5, 5, 26, 7, 1, 15, 25,
+ 72, 72, 4, 111, 78, 8, 95, 0, 77, 21, 1, 90,
+ 33, 1, 65, 98, 36, 83, 101, 1, 107, 70, 10,
+ 15, 7, 10, 6, 66, 10, 17, 91, 74, 7, 3, 1, 81,
+ 73, 77, 83, 74, 82, 92, 76, 78, 122, 94, 84,
+ 100, 95, 89, 6, 16, 4, 0, 1, 73, 81, 73, 89,
+ 94, 85, 102, 114, 112, 126, 82, 82, 98, 92,
+ 98, 103, 108, 111, 116, 123, 118, 126, 89,
+ 116, 121, 103, 102, 1, 39, 28, 18, 7, 5, 65,
+ 66, 71, 64, 11, 51, 33, 25, 17, 46, 19, 15, 4,
+ 7, 7, 53, 33, 19, 10, 26, 11, 4, 2, 3, 64, 38,
+ 17, 8, 1, 16, 6, 64, 2, 9, 45, 27, 16, 10, 31,
+ 14, 13, 12, 8, 62, 75, 66, 8, 66, 68, 1, 9,
+ 10, 3, 10, 17, 24, 76, 71, 76, 73, 22, 80, 81,
+ 74, 64, 17, 94, 84, 7, 13, 91, 95, 90, 68, 64,
+ 14, 67, 2, 64, 5, 14, 77, 68, 67, 5, 87, 73,
+ 80, 78, 10, 100, 76, 4, 70, 21, 70, 65, 32,
+ 70, 73, 65, 27, 87, 78, 105, 13, 27, 27, 14,
+ 65, 14, 10, 3, 5, 2, 64, 67, 85, 89, 96, 87,
+ 82, 95, 118, 93, 90, 91, 86, 83, 71, 70, 85,
+ 95, 73, 70, 71, 68, 78, 97, 81, 84, 102, 89,
+ 97, 96, 113, 109, 102, 96, 82, 76, 94, 84, 87,
+ 96, 102, 104, 104, 105, 107, 111, 125, 117,
+ 125, 124, 121, 112, 122, 123, 70, 10, 2, 11,
+ 16, 16, 62, 27, 25, 68, 35, 62, 62, 62, 22, 9,
+ 0, 102, 126, 126, 126, 126, 126, 126, 66, 43,
+ 27, 18, 9, 14, 2, 66, 68, 65, 64, 5, 18, 9,
+ 28, 20, 11, 19, 20, 23, 24, 12, 26, 20, 64,
+ 12, 4, 74, 81, 88, 94, 122, 121, 108 },
+
+ {
+
+ 18,
+ 3, 83, 18, 3, 83, 20, 28, 28, 12, 72, 102, 71,
+ 0, 51, 14, 56, 72, 24, 31, 65, 85, 2, 101,
+ 118, 70, 87, 126, 126, 126, 62, 19, 70, 24,
+ 31, 65, 68, 19, 20, 72, 3, 0, 0, 73, 88, 71,
+ 94, 5, 67, 64, 69, 81, 72, 88, 17, 4, 1, 65,
+ 12, 2, 22, 0, 0, 0, 7, 98, 97, 12, 80, 71, 52,
+ 5, 69, 76, 0, 82, 10, 19, 17, 29, 8, 22, 17,
+ 12, 26, 72, 79, 74, 82, 36, 1, 24, 65, 2, 69,
+ 3, 26, 71, 79, 70, 67, 19, 67, 8, 4, 72, 9,
+ 68, 7, 12, 7, 15, 22, 19, 85, 71, 3, 67, 2,
+ 72, 10, 73, 65, 19, 71, 15, 17, 4, 110, 9, 71,
+ 103, 81, 104, 69, 8, 1, 11, 5, 5, 27, 7, 1,
+ 15, 26, 73, 72, 4, 113, 78, 8, 95, 0, 78, 21,
+ 1, 91, 33, 1, 65, 100, 36, 84, 102, 1, 108,
+ 72, 9, 14, 6, 9, 5, 67, 9, 16, 93, 75, 6, 2,
+ 1, 82, 74, 78, 84, 75, 83, 93, 77, 79, 124,
+ 96, 85, 102, 97, 89, 4, 14, 2, 65, 64, 76, 84,
+ 76, 92, 97, 88, 105, 117, 115, 126, 84, 84,
+ 100, 95, 101, 106, 111, 114, 119, 126, 121,
+ 126, 90, 118, 123, 104, 103, 1, 39, 28, 18, 7,
+ 5, 66, 66, 71, 64, 11, 51, 33, 25, 17, 47, 19,
+ 15, 4, 7, 7, 53, 33, 19, 10, 26, 11, 4, 2, 4,
+ 64, 39, 17, 8, 1, 16, 6, 64, 3, 9, 45, 27, 16,
+ 10, 31, 14, 13, 12, 8, 62, 75, 66, 8, 66, 68,
+ 1, 9, 10, 3, 10, 17, 24, 76, 71, 77, 74, 22,
+ 81, 82, 75, 65, 16, 96, 86, 6, 12, 92, 97, 91,
+ 68, 64, 15, 67, 2, 64, 5, 14, 78, 68, 67, 5,
+ 88, 73, 80, 78, 10, 101, 76, 4, 70, 21, 71,
+ 65, 32, 71, 74, 65, 27, 88, 79, 106, 12, 26,
+ 26, 13, 67, 13, 9, 2, 4, 1, 65, 68, 86, 91,
+ 98, 89, 84, 97, 120, 95, 92, 92, 87, 84, 71,
+ 70, 85, 96, 73, 73, 74, 70, 81, 100, 84, 87,
+ 105, 92, 100, 99, 116, 112, 104, 97, 84, 78,
+ 96, 86, 90, 99, 105, 107, 107, 107, 110, 114,
+ 126, 119, 126, 126, 123, 114, 124, 125, 69,
+ 11, 3, 11, 16, 16, 62, 27, 25, 68, 35, 62, 62,
+ 62, 22, 7, 65, 105, 126, 126, 126, 126, 126,
+ 126, 66, 43, 26, 17, 9, 14, 2, 67, 68, 65, 64,
+ 5, 18, 9, 29, 20, 11, 19, 20, 23, 24, 12, 26,
+ 20, 65, 10, 2, 76, 83, 90, 96, 125, 123, 109 },
+
+ {
+
+ 17,
+ 3, 83, 17, 3, 83, 22, 30, 29, 13, 72, 103, 72,
+ 64, 51, 14, 59, 72, 25, 32, 65, 85, 3, 102,
+ 119, 70, 88, 126, 126, 126, 62, 21, 70, 25,
+ 32, 65, 67, 21, 21, 72, 4, 1, 1, 73, 88, 70,
+ 94, 5, 66, 0, 69, 81, 71, 88, 18, 5, 2, 64,
+ 13, 2, 22, 0, 0, 0, 8, 98, 97, 13, 80, 71, 52,
+ 5, 69, 74, 1, 82, 11, 21, 19, 31, 10, 24, 19,
+ 14, 28, 70, 78, 73, 81, 37, 2, 25, 64, 3, 68,
+ 4, 28, 70, 78, 69, 65, 20, 66, 9, 6, 71, 10,
+ 67, 8, 13, 8, 16, 23, 20, 85, 71, 4, 67, 2,
+ 71, 10, 73, 65, 19, 71, 15, 17, 4, 111, 9, 71,
+ 104, 82, 104, 68, 9, 2, 12, 6, 6, 28, 8, 2,
+ 16, 28, 73, 72, 5, 114, 77, 9, 95, 1, 78, 22,
+ 2, 91, 34, 1, 65, 101, 37, 84, 103, 1, 108,
+ 73, 9, 14, 6, 9, 5, 67, 9, 16, 94, 75, 6, 2,
+ 1, 82, 74, 78, 84, 75, 83, 94, 77, 79, 125,
+ 97, 85, 103, 98, 89, 3, 13, 1, 66, 65, 78, 86,
+ 78, 94, 99, 90, 107, 119, 117, 126, 85, 85,
+ 101, 97, 103, 108, 113, 116, 121, 126, 123,
+ 126, 90, 119, 124, 104, 103, 2, 40, 29, 19, 8,
+ 5, 66, 65, 70, 0, 12, 52, 34, 26, 17, 48, 20,
+ 16, 5, 8, 8, 54, 34, 20, 11, 27, 12, 5, 3, 6,
+ 0, 41, 18, 9, 2, 17, 7, 0, 5, 10, 46, 28, 17,
+ 11, 32, 15, 14, 13, 9, 62, 74, 65, 9, 66, 67,
+ 1, 10, 11, 4, 11, 18, 25, 75, 71, 77, 74, 23,
+ 81, 82, 76, 65, 16, 97, 87, 6, 12, 92, 98, 91,
+ 67, 0, 16, 66, 3, 0, 6, 15, 78, 67, 66, 6, 88,
+ 72, 79, 77, 11, 101, 76, 5, 70, 22, 71, 65,
+ 33, 71, 74, 64, 28, 88, 79, 106, 12, 26, 26,
+ 13, 68, 13, 9, 2, 4, 1, 65, 68, 86, 92, 99,
+ 90, 85, 98, 121, 96, 93, 92, 87, 84, 70, 69,
+ 84, 96, 72, 75, 76, 72, 83, 102, 86, 89, 107,
+ 94, 102, 101, 118, 114, 105, 97, 85, 79, 97,
+ 87, 92, 101, 107, 109, 109, 109, 112, 116,
+ 126, 120, 126, 126, 124, 115, 125, 126, 67,
+ 13, 5, 12, 17, 17, 62, 28, 26, 68, 36, 62, 62,
+ 62, 23, 6, 66, 107, 126, 126, 126, 126, 126,
+ 126, 65, 44, 26, 17, 9, 15, 2, 67, 68, 64, 0,
+ 6, 19, 10, 31, 21, 12, 20, 21, 24, 25, 13, 27,
+ 21, 65, 9, 1, 77, 84, 91, 97, 126, 124, 109 },
+
+ {
+
+ 16,
+ 3, 83, 16, 3, 83, 24, 31, 29, 13, 72, 104, 73,
+ 65, 51, 14, 61, 72, 26, 33, 65, 86, 3, 103,
+ 120, 71, 90, 126, 126, 126, 62, 22, 70, 26,
+ 33, 65, 67, 22, 21, 73, 4, 1, 2, 73, 88, 70,
+ 94, 5, 66, 1, 69, 81, 71, 88, 18, 5, 2, 64,
+ 13, 2, 22, 0, 0, 0, 9, 98, 97, 13, 81, 71, 52,
+ 5, 69, 73, 2, 82, 11, 22, 21, 33, 11, 25, 21,
+ 15, 30, 69, 77, 72, 80, 38, 2, 26, 0, 3, 68,
+ 5, 30, 70, 78, 69, 64, 20, 66, 9, 7, 71, 11,
+ 67, 8, 13, 8, 17, 23, 20, 86, 71, 4, 67, 2,
+ 71, 10, 73, 66, 19, 72, 15, 17, 4, 113, 9, 71,
+ 106, 83, 105, 67, 10, 3, 13, 7, 7, 29, 9, 2,
+ 17, 29, 73, 72, 5, 115, 77, 10, 95, 1, 78, 23,
+ 2, 92, 35, 1, 65, 102, 38, 85, 104, 1, 109,
+ 75, 8, 13, 5, 8, 4, 68, 8, 16, 95, 76, 6, 2,
+ 1, 82, 75, 79, 85, 76, 84, 95, 78, 80, 126,
+ 98, 86, 104, 99, 89, 1, 12, 64, 68, 67, 80,
+ 88, 80, 97, 101, 92, 110, 122, 119, 126, 87,
+ 87, 103, 99, 105, 110, 115, 118, 123, 126,
+ 125, 126, 91, 121, 125, 105, 103, 2, 40, 29,
+ 19, 8, 5, 66, 65, 70, 1, 12, 52, 34, 26, 17,
+ 49, 20, 17, 5, 9, 9, 55, 35, 20, 11, 28, 12,
+ 5, 4, 7, 1, 42, 19, 9, 2, 18, 8, 1, 6, 10, 46,
+ 28, 17, 11, 33, 16, 15, 14, 10, 62, 74, 65, 9,
+ 66, 67, 1, 10, 11, 4, 11, 18, 26, 75, 71, 77,
+ 75, 23, 82, 83, 77, 66, 16, 98, 88, 6, 12, 93,
+ 99, 92, 66, 1, 17, 66, 3, 0, 7, 16, 78, 67,
+ 66, 6, 89, 72, 79, 77, 12, 101, 76, 5, 70, 23,
+ 71, 65, 34, 71, 74, 64, 29, 89, 79, 107, 11,
+ 26, 26, 12, 69, 12, 8, 1, 3, 0, 66, 69, 87,
+ 93, 100, 92, 86, 100, 123, 97, 94, 93, 88, 84,
+ 70, 69, 84, 97, 71, 77, 78, 74, 85, 105, 88,
+ 91, 110, 96, 104, 103, 120, 116, 106, 98, 87,
+ 81, 98, 89, 94, 103, 109, 111, 111, 111, 114,
+ 118, 126, 122, 126, 126, 125, 117, 126, 126,
+ 66, 14, 6, 13, 18, 18, 62, 29, 27, 68, 37, 62,
+ 62, 62, 24, 5, 68, 110, 126, 126, 126, 126,
+ 126, 126, 65, 44, 26, 17, 9, 15, 2, 67, 68, 0,
+ 1, 7, 20, 11, 32, 22, 13, 21, 22, 25, 26, 13,
+ 28, 21, 65, 8, 64, 79, 86, 93, 99, 126, 126,
+ 110 },
+
+ {
+
+ 15,
+ 3, 83, 15, 3, 83, 26, 33, 30, 13, 73, 106, 75,
+ 66, 51, 14, 62, 72, 27, 34, 65, 87, 3, 104,
+ 121, 71, 92, 126, 126, 126, 62, 23, 70, 27,
+ 34, 65, 66, 23, 22, 73, 4, 2, 3, 74, 89, 70,
+ 94, 5, 66, 1, 69, 81, 71, 88, 19, 5, 2, 64,
+ 14, 2, 22, 0, 0, 0, 9, 98, 97, 14, 82, 71, 52,
+ 5, 69, 72, 3, 82, 12, 23, 23, 35, 13, 26, 23,
+ 16, 32, 68, 76, 71, 79, 39, 2, 27, 1, 3, 67,
+ 5, 32, 70, 77, 68, 0, 20, 66, 10, 9, 70, 12,
+ 67, 8, 13, 8, 18, 24, 21, 86, 71, 4, 67, 2,
+ 71, 10, 74, 66, 19, 73, 15, 17, 4, 115, 9, 72,
+ 108, 84, 106, 66, 11, 4, 14, 8, 7, 30, 9, 3,
+ 18, 30, 73, 72, 5, 116, 76, 11, 95, 1, 78, 24,
+ 2, 93, 36, 1, 65, 103, 39, 85, 105, 1, 110,
+ 76, 7, 13, 4, 7, 3, 68, 7, 15, 96, 77, 6, 2,
+ 1, 83, 76, 80, 86, 77, 85, 96, 79, 80, 126,
+ 99, 86, 105, 100, 89, 0, 10, 65, 70, 69, 82,
+ 90, 82, 99, 104, 94, 112, 124, 122, 126, 89,
+ 89, 104, 101, 107, 113, 118, 120, 126, 126,
+ 126, 126, 92, 123, 126, 106, 103, 2, 41, 29,
+ 19, 8, 5, 66, 65, 70, 1, 12, 52, 34, 26, 17,
+ 50, 21, 17, 6, 9, 10, 56, 35, 21, 11, 29, 13,
+ 6, 4, 8, 2, 43, 20, 10, 2, 19, 9, 2, 7, 11,
+ 46, 28, 17, 11, 34, 16, 16, 15, 11, 62, 73,
+ 64, 10, 66, 67, 1, 10, 11, 4, 11, 18, 26, 75,
+ 71, 77, 75, 24, 83, 84, 78, 67, 16, 99, 89, 6,
+ 12, 93, 100, 92, 65, 2, 18, 65, 4, 1, 7, 17,
+ 78, 67, 66, 7, 89, 72, 79, 76, 13, 101, 76, 5,
+ 70, 24, 71, 65, 35, 71, 74, 64, 30, 90, 79,
+ 108, 10, 25, 25, 11, 70, 11, 7, 0, 2, 64, 67,
+ 70, 88, 94, 101, 93, 87, 101, 125, 99, 95, 94,
+ 88, 85, 70, 69, 84, 97, 70, 79, 80, 76, 87,
+ 108, 90, 93, 112, 98, 106, 105, 123, 118, 108,
+ 99, 89, 82, 100, 91, 96, 105, 111, 113, 113,
+ 113, 116, 120, 126, 124, 126, 126, 126, 119,
+ 126, 126, 65, 15, 7, 14, 19, 19, 62, 30, 28,
+ 68, 38, 62, 62, 62, 25, 3, 69, 112, 126, 126,
+ 126, 126, 126, 126, 65, 44, 26, 17, 9, 15, 2,
+ 67, 68, 0, 2, 8, 21, 12, 33, 22, 13, 22, 22,
+ 25, 26, 13, 28, 21, 65, 7, 65, 81, 88, 95,
+ 101, 126, 126, 111 },
+
+ },
+
+ };
+
+#endif
diff --git a/decoder/ih264d_compute_bs.c b/decoder/ih264d_compute_bs.c
new file mode 100755
index 0000000..4a6750a
--- /dev/null
+++ b/decoder/ih264d_compute_bs.c
@@ -0,0 +1,2394 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_structs.h"
+#include "ih264d_defs.h"
+#include "ih264d_deblocking.h"
+#include "string.h"
+#include "ih264d_debug.h"
+#include "ih264d_tables.h"
+
+UWORD16 ih264d_update_csbp_8x8(UWORD16 u2_luma_csbp)
+{
+ UWORD16 u2_mod_csbp;
+
+ u2_mod_csbp = u2_luma_csbp;
+
+ if(u2_mod_csbp & 0x0033)
+ {
+ u2_mod_csbp |= 0x0033;
+ }
+
+ if(u2_mod_csbp & 0x00CC)
+ {
+ u2_mod_csbp |= 0x00CC;
+ }
+
+ if(u2_mod_csbp & 0x3300)
+ {
+ u2_mod_csbp |= 0x3300;
+ }
+
+ if(u2_mod_csbp & 0xCC00)
+ {
+ u2_mod_csbp |= 0xCC00;
+ }
+
+ return u2_mod_csbp;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_fill_bs2_horz_vert */
+/* */
+/* Description : This function fills boundray strength (=2) for all horz */
+/* and vert edges of current mb based on coded sub block */
+/* pattern of current, top and left mb */
+/* Inputs : */
+/* pu4_bs : Base pointer of BS table which gets updated */
+/* u4_left_mb_csbp : left mb's coded sub block pattern */
+/* u4_top_mb_csbp : top mb's coded sub block pattern */
+/* u4_cur_mb_csbp : current mb's coded sub block pattern */
+/* */
+/* Globals : <Does it use any global variables?> */
+/* Processing : */
+/* */
+/* csbp for each 4x4 block in a mb is bit packet in reverse */
+/* raster scan order for each mb as shown below: */
+/* 15|14|13|12|11|10|9|8|7|6|5|4|3|2|1|0. */
+/* */
+/* BS=2 for a 4x4 edge if any of adjacent blocks forming edge */
+/* are coded. Keeping this in mind, bs=2 for all horz and vert */
+/* edges can be derived using a lookup table for each edge */
+/* after "ORing" the csbp values as follows: */
+/* (C means current Mb, T means top mb and L means left mb) */
+/* */
+/* All Horz edges: */
+/* 15C|14C|13C|12C|11C|10C|9C|8C|7C|6C|5C|4C|3C |2C |1C |0C */
+/* (or with) 11C|10C| 9C| 8C| 7C|6C |5C|4C|3C|2C|1C|0C|15T|14T|13T|12T */
+/* -----BS[3]-----|----BS[2]----|---BS[1]---|----BS[0]-----| */
+/* */
+/* All Vert edges: */
+/* 15C|14C|13C|12C|11C|10C|9C| 8C|7C|6C|5C|4C|3C |2C |1C |0C */
+/* (or with) 14C|13C|12C|15L|10C| 9C|8C|11L|6C|5C|4C|7L|2C |1C |0C |3L */
+/* Do 4x4 transpose of resulting pattern to get vertBS[4]-BS[7] */
+/* */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 16 10 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+#define CSBP_LEFT_BLOCK_MASK 0x1111
+#define CSBP_RIGHT_BLOCK_MASK 0x8888
+
+void ih264d_fill_bs2_horz_vert(UWORD32 *pu4_bs, /* Base pointer of BS table */
+ WORD32 u4_left_mb_csbp, /* csbp of left mb */
+ WORD32 u4_top_mb_csbp, /* csbp of top mb */
+ WORD32 u4_cur_mb_csbp, /* csbp of current mb */
+ const UWORD32 *pu4_packed_bs2, const UWORD16 *pu2_4x4_v2h_reorder)
+{
+ /*************************************************************************/
+ /*u4_nbr_horz_csbp=11C|10C|9C|8C|7C|6C|5C|4C|3C|2C|1C|0C|15T|14T|13T|12T */
+ /*************************************************************************/
+ UWORD32 u4_nbr_horz_csbp = (u4_cur_mb_csbp << 4) | (u4_top_mb_csbp >> 12);
+ UWORD32 u4_horz_bs2_dec = u4_cur_mb_csbp | u4_nbr_horz_csbp;
+
+ /*************************************************************************/
+ /*u4_left_mb_masked_csbp = 15L|0|0|0|11L|0|0|0|7L|0|0|0|3L|0|0|0 */
+ /*************************************************************************/
+ UWORD32 u4_left_mb_masked_csbp = u4_left_mb_csbp & CSBP_RIGHT_BLOCK_MASK;
+
+ /*************************************************************************/
+ /*u4_cur_mb_masked_csbp =14C|13C|12C|x|10C|9C|8C|x|6C|5C|4C|x|2C|1C|0C|x */
+ /*************************************************************************/
+ UWORD32 u4_cur_mb_masked_csbp = (u4_cur_mb_csbp << 1)
+ & (~CSBP_LEFT_BLOCK_MASK);
+
+ /*************************************************************************/
+ /*u4_nbr_vert_csbp=14C|13C|12C|15L|10C|9C|8C|11L|6C|5C|4C|7L|2C|1C|0C|3L */
+ /*************************************************************************/
+ UWORD32 u4_nbr_vert_csbp = (u4_cur_mb_masked_csbp)
+ | (u4_left_mb_masked_csbp >> 3);
+
+ UWORD32 u4_vert_bs2_dec = u4_cur_mb_csbp | u4_nbr_vert_csbp;
+
+ UWORD32 u4_reordered_vert_bs2_dec, u4_temp;
+
+ PROFILE_DISABLE_BOUNDARY_STRENGTH()
+
+ /*************************************************************************/
+ /* Fill horz edges (0,1,2,3) boundary strengths 2 using look up table */
+ /*************************************************************************/
+ pu4_bs[0] = pu4_packed_bs2[u4_horz_bs2_dec & 0xF];
+ pu4_bs[1] = pu4_packed_bs2[(u4_horz_bs2_dec >> 4) & 0xF];
+ pu4_bs[2] = pu4_packed_bs2[(u4_horz_bs2_dec >> 8) & 0xF];
+ pu4_bs[3] = pu4_packed_bs2[(u4_horz_bs2_dec >> 12) & 0xF];
+
+ /*************************************************************************/
+ /* Do 4x4 tranpose of u4_vert_bs2_dec by using look up table for reorder */
+ /*************************************************************************/
+ u4_reordered_vert_bs2_dec = pu2_4x4_v2h_reorder[u4_vert_bs2_dec & 0xF];
+ u4_temp = pu2_4x4_v2h_reorder[(u4_vert_bs2_dec >> 4) & 0xF];
+ u4_reordered_vert_bs2_dec |= (u4_temp << 1);
+ u4_temp = pu2_4x4_v2h_reorder[(u4_vert_bs2_dec >> 8) & 0xF];
+ u4_reordered_vert_bs2_dec |= (u4_temp << 2);
+ u4_temp = pu2_4x4_v2h_reorder[(u4_vert_bs2_dec >> 12) & 0xF];
+ u4_reordered_vert_bs2_dec |= (u4_temp << 3);
+
+ /*************************************************************************/
+ /* Fill vert edges (4,5,6,7) boundary strengths 2 using look up table */
+ /*************************************************************************/
+ pu4_bs[4] = pu4_packed_bs2[u4_reordered_vert_bs2_dec & 0xF];
+ pu4_bs[5] = pu4_packed_bs2[(u4_reordered_vert_bs2_dec >> 4) & 0xF];
+ pu4_bs[6] = pu4_packed_bs2[(u4_reordered_vert_bs2_dec >> 8) & 0xF];
+ pu4_bs[7] = pu4_packed_bs2[(u4_reordered_vert_bs2_dec >> 12) & 0xF];
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_fill_bs1_16x16mb_pslice */
+/* */
+/* Description : This function fills boundray strength (=1) for those */
+/* horz and vert mb edges of 16x16mb which are set to 0 by */
+/* ih264d_fill_bs2_horz_vert. This function is used for p slices */
+/* */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : <Does it use any global variables?> */
+/* Processing : If any motion vector component of adjacent 4x4 blocks */
+/* differs by more than 1 integer pel or if reference */
+/* pictures are different, Bs is set to 1. */
+/* */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 16 10 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+void ih264d_fill_bs1_16x16mb_pslice(mv_pred_t *ps_cur_mv_pred,
+ mv_pred_t *ps_top_mv_pred,
+ void **ppv_map_ref_idx_to_poc,
+ UWORD32 *pu4_bs_table, /* pointer to the BsTable array */
+ mv_pred_t *ps_leftmost_mv_pred,
+ neighbouradd_t *ps_left_addr,
+ void **u4_pic_addrress, /* picture address for BS calc */
+ WORD32 i4_ver_mvlimit)
+{
+ WORD16 i2_q_mv0, i2_q_mv1;
+ WORD16 i2_p_mv0, i2_p_mv1;
+ void *pv_cur_pic_addr0, *pv_cur_pic_addr1;
+ void *pv_nbr_pic_addr0, *pv_nbr_pic_addr1;
+ void **ppv_map_ref_idx_to_poc_l0; //,*ppv_map_ref_idx_to_poc_l1;
+ UWORD32 i;
+ UWORD32 u4_bs_horz = pu4_bs_table[0];
+ UWORD32 u4_bs_vert = pu4_bs_table[4];
+
+ PROFILE_DISABLE_BOUNDARY_STRENGTH()
+
+ ppv_map_ref_idx_to_poc_l0 = ppv_map_ref_idx_to_poc;
+
+ i2_q_mv0 = ps_cur_mv_pred->i2_mv[0];
+ i2_q_mv1 = ps_cur_mv_pred->i2_mv[1];
+ pv_cur_pic_addr0 = ppv_map_ref_idx_to_poc_l0[ps_cur_mv_pred->i1_ref_frame[0]];
+ pv_cur_pic_addr1 = 0;
+
+ /*********************************/
+ /* Computing Bs for the top edge */
+ /*********************************/
+ for(i = 0; i < 4; i++, ps_top_mv_pred++)
+ {
+ UWORD32 u4_idx = 24 - (i << 3);
+
+ /*********************************/
+ /* check if Bs is already set */
+ /*********************************/
+ if(!((u4_bs_horz >> u4_idx) & 0xf))
+ {
+ /************************************************************/
+ /* If Bs is not set, use left edge and current edge mvs and */
+ /* reference pictures addresses to evaluate Bs==1 */
+ /************************************************************/
+ UWORD32 u4_bs_temp1;
+ UWORD32 u4_bs;
+
+ /*********************************************************/
+ /* If any motion vector component differs by more than 1 */
+ /* integer pel or if reference pictures are different Bs */
+ /* is set to 1. Note that this condition shall be met for*/
+ /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/
+ /*********************************************************/
+ i2_p_mv0 = ps_top_mv_pred->i2_mv[0];
+ i2_p_mv1 = ps_top_mv_pred->i2_mv[1];
+ pv_nbr_pic_addr0 = u4_pic_addrress[i & 2];
+ pv_nbr_pic_addr1 = u4_pic_addrress[1 + (i & 2)];
+
+ u4_bs_temp1 = ((ABS((i2_p_mv0 - i2_q_mv0)) >= 4) ||
+ (ABS((i2_p_mv1 - i2_q_mv1)) >= i4_ver_mvlimit));
+
+ u4_bs = ((pv_cur_pic_addr0 != pv_nbr_pic_addr0)
+ || (pv_cur_pic_addr1 != pv_nbr_pic_addr1)
+ || u4_bs_temp1);
+
+ u4_bs_horz |= (u4_bs << u4_idx);
+ }
+ }
+ pu4_bs_table[0] = u4_bs_horz;
+
+ /***********************************/
+ /* Computing Bs for the left edge */
+ /***********************************/
+ for(i = 0; i < 4; i++, ps_leftmost_mv_pred += 4)
+ {
+ UWORD32 u4_idx = 24 - (i << 3);
+
+ /*********************************/
+ /* check if Bs is already set */
+ /*********************************/
+ if(!((u4_bs_vert >> u4_idx) & 0xf))
+ {
+ /****************************************************/
+ /* If Bs is not set, evalaute conditions for Bs=1 */
+ /****************************************************/
+ UWORD32 u4_bs_temp1;
+ UWORD32 u4_bs;
+ /*********************************************************/
+ /* If any motion vector component differs by more than 1 */
+ /* integer pel or if reference pictures are different Bs */
+ /* is set to 1. Note that this condition shall be met for*/
+ /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/
+ /*********************************************************/
+
+ i2_p_mv0 = ps_leftmost_mv_pred->i2_mv[0];
+ i2_p_mv1 = ps_leftmost_mv_pred->i2_mv[1];
+ pv_nbr_pic_addr0 = ps_left_addr->u4_add[i & 2];
+ pv_nbr_pic_addr1 = ps_left_addr->u4_add[1 + (i & 2)];
+
+ u4_bs_temp1 =
+ ((ABS((i2_p_mv0 - i2_q_mv0))
+ >= 4)
+ | (ABS((i2_p_mv1 - i2_q_mv1))
+ >= i4_ver_mvlimit));
+
+ u4_bs = ((pv_cur_pic_addr0 != pv_nbr_pic_addr0)
+ || (pv_cur_pic_addr1 != pv_nbr_pic_addr1)
+ || u4_bs_temp1);
+
+ u4_bs_vert |= (u4_bs << u4_idx);
+ }
+ }
+ pu4_bs_table[4] = u4_bs_vert;
+
+ return;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_fill_bs1_non16x16mb_pslice */
+/* */
+/* Description : This function fills boundray strength (=1) for those */
+/* horz and vert edges of non16x16mb which are set to 0 by */
+/* ih264d_fill_bs2_horz_vert. This function is used for p slices */
+/* */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : <Does it use any global variables?> */
+/* Processing : If any motion vector component of adjacent 4x4 blocks */
+/* differs by more than 1 integer pel or if reference */
+/* pictures are different, Bs is set to 1. */
+/* */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 16 10 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+void ih264d_fill_bs1_non16x16mb_pslice(mv_pred_t *ps_cur_mv_pred,
+ mv_pred_t *ps_top_mv_pred,
+ void **ppv_map_ref_idx_to_poc,
+ UWORD32 *pu4_bs_table, /* pointer to the BsTable array */
+ mv_pred_t *ps_leftmost_mv_pred,
+ neighbouradd_t *ps_left_addr,
+ void **u4_pic_addrress,
+ WORD32 i4_ver_mvlimit)
+{
+ UWORD32 edge;
+ void **ppv_map_ref_idx_to_poc_l0; //,*ppv_map_ref_idx_to_poc_l1;
+
+ PROFILE_DISABLE_BOUNDARY_STRENGTH()
+
+ ppv_map_ref_idx_to_poc_l0 = ppv_map_ref_idx_to_poc;
+
+
+ for(edge = 0; edge < 4; edge++, ps_top_mv_pred = ps_cur_mv_pred - 4)
+ {
+ /*********************************************************************/
+ /* Each iteration of this loop fills the four BS values of one HORIZ */
+ /* edge and one BS value for each of the four VERT edges. */
+ /*********************************************************************/
+ WORD32 i;
+ UWORD32 u4_vert_idx = 24 - (edge << 3);
+ UWORD32 u4_bs_horz = pu4_bs_table[edge];
+ mv_pred_t *ps_left_mv_pred = ps_leftmost_mv_pred + (edge << 2);
+
+ for(i = 0; i < 4; i++, ps_top_mv_pred++, ps_cur_mv_pred++)
+ {
+ WORD16 i2_cur_mv0, i2_cur_mv1;
+ WORD8 i1_cur_ref0;
+ void *pv_cur_pic_addr0, *pv_cur_pic_addr1 = 0;
+ void *pv_nbr_pic_addr0, *pv_nbr_pic_addr1;
+
+ /******************************************************/
+ /* Each iteration of this inner loop computes a HORIZ */
+ /* and a VERT BS value for a 4x4 block */
+ /******************************************************/
+ UWORD32 u4_bs_vert = (pu4_bs_table[i + 4] >> u4_vert_idx) & 0xf;
+ UWORD32 u4_horz_idx = 24 - (i << 3);
+
+ /*****************************************************/
+ /* check if vert Bs for this block is already set */
+ /*****************************************************/
+ if(!u4_bs_vert)
+ {
+ WORD16 i2_left_mv0, i2_left_mv1;
+ /************************************************************/
+ /* If Bs is not set, use left edge and current edge mvs and */
+ /* reference pictures addresses to evaluate Bs==1 */
+ /************************************************************/
+ i2_left_mv0 = ps_left_mv_pred->i2_mv[0];
+ i2_left_mv1 = ps_left_mv_pred->i2_mv[1];
+
+ i2_cur_mv0 = ps_cur_mv_pred->i2_mv[0];
+ i2_cur_mv1 = ps_cur_mv_pred->i2_mv[1];
+
+ i1_cur_ref0 = ps_cur_mv_pred->i1_ref_frame[0];
+
+ pv_cur_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_cur_ref0];
+ if(i)
+ {
+ WORD8 i1_left_ref0 = ps_left_mv_pred->i1_ref_frame[0];
+ pv_nbr_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_left_ref0];
+ pv_nbr_pic_addr1 = 0;
+ }
+ else
+ {
+ pv_nbr_pic_addr0 = ps_left_addr->u4_add[edge & 2];
+ pv_nbr_pic_addr1 = ps_left_addr->u4_add[1 + (edge & 2)];
+ }
+
+ {
+ UWORD32 u4_bs_temp1;
+ /*********************************************************/
+ /* If any motion vector component differs by more than 1 */
+ /* integer pel or if reference pictures are different Bs */
+ /* is set to 1. Note that this condition shall be met for*/
+ /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/
+ /*********************************************************/
+
+ u4_bs_temp1 =
+ ((ABS((i2_left_mv0 - i2_cur_mv0))
+ >= 4)
+ | (ABS((i2_left_mv1
+ - i2_cur_mv1))
+ >= i4_ver_mvlimit));
+
+ u4_bs_vert = ((pv_nbr_pic_addr0 != pv_cur_pic_addr0)
+ || (pv_nbr_pic_addr1 != pv_cur_pic_addr1)
+ || u4_bs_temp1);
+
+ pu4_bs_table[i + 4] |= (u4_bs_vert << u4_vert_idx);
+ }
+ }
+
+ /*****************************************************/
+ /* check if horz Bs for this block is already set */
+ /*****************************************************/
+ if(!((u4_bs_horz >> u4_horz_idx) & 0xf))
+ {
+ WORD16 i2_top_mv0, i2_top_mv1;
+ /************************************************************/
+ /* If Bs is not set, use top edge and current edge mvs and */
+ /* reference pictures addresses to evaluate Bs==1 */
+ /************************************************************/
+ i2_cur_mv0 = ps_cur_mv_pred->i2_mv[0];
+ i2_cur_mv1 = ps_cur_mv_pred->i2_mv[1];
+
+ i1_cur_ref0 = ps_cur_mv_pred->i1_ref_frame[0];
+
+ i2_top_mv0 = ps_top_mv_pred->i2_mv[0];
+ i2_top_mv1 = ps_top_mv_pred->i2_mv[1];
+
+ pv_cur_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_cur_ref0];
+ if(edge)
+ {
+ WORD8 i1_top_ref0 = ps_top_mv_pred->i1_ref_frame[0];
+ pv_nbr_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_top_ref0];
+ pv_nbr_pic_addr1 = 0;
+ }
+ else
+ {
+ pv_nbr_pic_addr0 = u4_pic_addrress[i & 2];
+ pv_nbr_pic_addr1 = u4_pic_addrress[1 + (i & 2)];
+ }
+
+ {
+ UWORD32 u4_bs_temp1;
+ UWORD32 u4_bs;
+ /*********************************************************/
+ /* If any motion vector component differs by more than 1 */
+ /* integer pel or if reference pictures are different Bs */
+ /* is set to 1. Note that this condition shall be met for*/
+ /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/
+ /*********************************************************/
+
+ u4_bs_temp1 =
+ ((ABS((i2_top_mv0 - i2_cur_mv0))
+ >= 4)
+ | (ABS((i2_top_mv1
+ - i2_cur_mv1))
+ >= i4_ver_mvlimit));
+
+ u4_bs = ((pv_nbr_pic_addr0 != pv_cur_pic_addr0)
+ || (pv_nbr_pic_addr1 != pv_cur_pic_addr1)
+ || u4_bs_temp1);
+
+ u4_bs_horz |= (u4_bs << u4_horz_idx);
+ }
+ }
+
+ ps_left_mv_pred = ps_cur_mv_pred;
+ }
+
+ pu4_bs_table[edge] = u4_bs_horz;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_fill_bs1_16x16mb_bslice */
+/* */
+/* Description : This function fills boundray strength (=1) for those */
+/* horz and vert mb edges of 16x16mb which are set to 0 by */
+/* ih264d_fill_bs2_horz_vert. This function is used for b slices */
+/* */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : <Does it use any global variables?> */
+/* Processing : If any motion vector component of adjacent 4x4 blocks */
+/* differs by more than 1 integer pel or if reference */
+/* pictures are different, Bs is set to 1. */
+/* */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 16 10 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+void ih264d_fill_bs1_16x16mb_bslice(mv_pred_t *ps_cur_mv_pred,
+ mv_pred_t *ps_top_mv_pred,
+ void **ppv_map_ref_idx_to_poc,
+ UWORD32 *pu4_bs_table, /* pointer to the BsTable array */
+ mv_pred_t *ps_leftmost_mv_pred,
+ neighbouradd_t *ps_left_addr,
+ void **u4_pic_addrress,
+ WORD32 i4_ver_mvlimit)
+{
+ WORD16 i2_q_mv0, i2_q_mv1, i2_q_mv2, i2_q_mv3;
+ WORD16 i2_p_mv0, i2_p_mv1, i2_p_mv2, i2_p_mv3;
+ void *pv_cur_pic_addr0, *pv_cur_pic_addr1;
+ void *pv_nbr_pic_addr0, *pv_nbr_pic_addr1;
+ void **ppv_map_ref_idx_to_poc_l0, **ppv_map_ref_idx_to_poc_l1;
+ UWORD32 i;
+ UWORD32 u4_bs_horz = pu4_bs_table[0];
+ UWORD32 u4_bs_vert = pu4_bs_table[4];
+
+ PROFILE_DISABLE_BOUNDARY_STRENGTH()
+
+ ppv_map_ref_idx_to_poc_l0 = ppv_map_ref_idx_to_poc;
+ ppv_map_ref_idx_to_poc_l1 = ppv_map_ref_idx_to_poc + POC_LIST_L0_TO_L1_DIFF;
+ i2_q_mv0 = ps_cur_mv_pred->i2_mv[0];
+ i2_q_mv1 = ps_cur_mv_pred->i2_mv[1];
+ i2_q_mv2 = ps_cur_mv_pred->i2_mv[2];
+ i2_q_mv3 = ps_cur_mv_pred->i2_mv[3];
+ pv_cur_pic_addr0 = ppv_map_ref_idx_to_poc_l0[ps_cur_mv_pred->i1_ref_frame[0]];
+ pv_cur_pic_addr1 = ppv_map_ref_idx_to_poc_l1[ps_cur_mv_pred->i1_ref_frame[1]];
+
+ /*********************************/
+ /* Computing Bs for the top edge */
+ /*********************************/
+ for(i = 0; i < 4; i++, ps_top_mv_pred++)
+ {
+ UWORD32 u4_idx = 24 - (i << 3);
+
+ /*********************************/
+ /* check if Bs is already set */
+ /*********************************/
+ if(!((u4_bs_horz >> u4_idx) & 0xf))
+ {
+ /************************************************************/
+ /* If Bs is not set, use left edge and current edge mvs and */
+ /* reference pictures addresses to evaluate Bs==1 */
+ /************************************************************/
+ UWORD32 u4_bs_temp1, u4_bs_temp2;
+ UWORD32 u4_bs;
+
+ /*********************************************************/
+ /* If any motion vector component differs by more than 1 */
+ /* integer pel or if reference pictures are different Bs */
+ /* is set to 1. Note that this condition shall be met for*/
+ /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/
+ /*********************************************************/
+ i2_p_mv0 = ps_top_mv_pred->i2_mv[0];
+ i2_p_mv1 = ps_top_mv_pred->i2_mv[1];
+ i2_p_mv2 = ps_top_mv_pred->i2_mv[2];
+ i2_p_mv3 = ps_top_mv_pred->i2_mv[3];
+ pv_nbr_pic_addr0 = u4_pic_addrress[i & 2];
+ pv_nbr_pic_addr1 = u4_pic_addrress[1 + (i & 2)];
+
+ u4_bs_temp1 =
+ ((ABS((i2_p_mv0 - i2_q_mv0))
+ >= 4)
+ | (ABS((i2_p_mv1 - i2_q_mv1))
+ >= i4_ver_mvlimit)
+ | (ABS((i2_p_mv2 - i2_q_mv2))
+ >= 4)
+ | (ABS((i2_p_mv3 - i2_q_mv3))
+ >= i4_ver_mvlimit));
+
+ u4_bs_temp2 =
+ ((ABS((i2_p_mv0 - i2_q_mv2))
+ >= 4)
+ | (ABS((i2_p_mv1 - i2_q_mv3))
+ >= i4_ver_mvlimit)
+ | (ABS((i2_p_mv2 - i2_q_mv0))
+ >= 4)
+ | (ABS((i2_p_mv3 - i2_q_mv1))
+ >= i4_ver_mvlimit));
+
+ u4_bs = ((pv_cur_pic_addr0 != pv_nbr_pic_addr0)
+ || (pv_cur_pic_addr1 != pv_nbr_pic_addr1)
+ || u4_bs_temp1)
+ && ((pv_cur_pic_addr0 != pv_nbr_pic_addr1)
+ || (pv_cur_pic_addr1
+ != pv_nbr_pic_addr0)
+ || u4_bs_temp2);
+
+ u4_bs_horz |= (u4_bs << u4_idx);
+ }
+ }
+ pu4_bs_table[0] = u4_bs_horz;
+
+ /***********************************/
+ /* Computing Bs for the left edge */
+ /***********************************/
+ for(i = 0; i < 4; i++, ps_leftmost_mv_pred += 4)
+ {
+ UWORD32 u4_idx = 24 - (i << 3);
+
+ /*********************************/
+ /* check if Bs is already set */
+ /*********************************/
+ if(!((u4_bs_vert >> u4_idx) & 0xf))
+ {
+ /****************************************************/
+ /* If Bs is not set, evalaute conditions for Bs=1 */
+ /****************************************************/
+ UWORD32 u4_bs_temp1, u4_bs_temp2;
+ UWORD32 u4_bs;
+ /*********************************************************/
+ /* If any motion vector component differs by more than 1 */
+ /* integer pel or if reference pictures are different Bs */
+ /* is set to 1. Note that this condition shall be met for*/
+ /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/
+ /*********************************************************/
+
+ i2_p_mv0 = ps_leftmost_mv_pred->i2_mv[0];
+ i2_p_mv1 = ps_leftmost_mv_pred->i2_mv[1];
+ i2_p_mv2 = ps_leftmost_mv_pred->i2_mv[2];
+ i2_p_mv3 = ps_leftmost_mv_pred->i2_mv[3];
+ pv_nbr_pic_addr0 = ps_left_addr->u4_add[i & 2];
+ pv_nbr_pic_addr1 = ps_left_addr->u4_add[1 + (i & 2)];
+
+ u4_bs_temp1 =
+ ((ABS((i2_p_mv0 - i2_q_mv0))
+ >= 4)
+ | (ABS((i2_p_mv1 - i2_q_mv1))
+ >= i4_ver_mvlimit)
+ | (ABS((i2_p_mv2 - i2_q_mv2))
+ >= 4)
+ | (ABS((i2_p_mv3 - i2_q_mv3))
+ >= i4_ver_mvlimit));
+
+ u4_bs_temp2 =
+ ((ABS((i2_p_mv0 - i2_q_mv2))
+ >= 4)
+ | (ABS((i2_p_mv1 - i2_q_mv3))
+ >= i4_ver_mvlimit)
+ | (ABS((i2_p_mv2 - i2_q_mv0))
+ >= 4)
+ | (ABS((i2_p_mv3 - i2_q_mv1))
+ >= i4_ver_mvlimit));
+
+ u4_bs = ((pv_cur_pic_addr0 != pv_nbr_pic_addr0)
+ || (pv_cur_pic_addr1 != pv_nbr_pic_addr1)
+ || u4_bs_temp1)
+ && ((pv_cur_pic_addr0 != pv_nbr_pic_addr1)
+ || (pv_cur_pic_addr1
+ != pv_nbr_pic_addr0)
+ || u4_bs_temp2);
+
+ u4_bs_vert |= (u4_bs << u4_idx);
+ }
+ }
+ pu4_bs_table[4] = u4_bs_vert;
+
+ return;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_fill_bs1_non16x16mb_bslice */
+/* */
+/* Description : This function fills boundray strength (=1) for those */
+/* horz and vert edges of non16x16mb which are set to 0 by */
+/* ih264d_fill_bs2_horz_vert. This function is used for b slices */
+/* */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : <Does it use any global variables?> */
+/* Processing : If any motion vector component of adjacent 4x4 blocks */
+/* differs by more than 1 integer pel or if reference */
+/* pictures are different, Bs is set to 1. */
+/* */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 16 10 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+void ih264d_fill_bs1_non16x16mb_bslice(mv_pred_t *ps_cur_mv_pred,
+ mv_pred_t *ps_top_mv_pred,
+ void **ppv_map_ref_idx_to_poc,
+ UWORD32 *pu4_bs_table, /* pointer to the BsTable array */
+ mv_pred_t *ps_leftmost_mv_pred,
+ neighbouradd_t *ps_left_addr,
+ void **u4_pic_addrress,
+ WORD32 i4_ver_mvlimit)
+{
+ UWORD32 edge;
+ void **ppv_map_ref_idx_to_poc_l0, **ppv_map_ref_idx_to_poc_l1;
+ ppv_map_ref_idx_to_poc_l0 = ppv_map_ref_idx_to_poc;
+ ppv_map_ref_idx_to_poc_l1 = ppv_map_ref_idx_to_poc + POC_LIST_L0_TO_L1_DIFF;
+
+ PROFILE_DISABLE_BOUNDARY_STRENGTH()
+
+ for(edge = 0; edge < 4; edge++, ps_top_mv_pred = ps_cur_mv_pred - 4)
+ {
+ /*********************************************************************/
+ /* Each iteration of this loop fills the four BS values of one HORIZ */
+ /* edge and one BS value for each of the four VERT edges. */
+ /*********************************************************************/
+ WORD32 i;
+ UWORD32 u4_vert_idx = 24 - (edge << 3);
+ UWORD32 u4_bs_horz = pu4_bs_table[edge];
+ mv_pred_t *ps_left_mv_pred = ps_leftmost_mv_pred + (edge << 2);
+
+ for(i = 0; i < 4; i++, ps_top_mv_pred++, ps_cur_mv_pred++)
+ {
+ WORD16 i2_cur_mv0, i2_cur_mv1, i16_curMv2, i16_curMv3;
+ WORD8 i1_cur_ref0, i1_cur_ref1;
+ void *pv_cur_pic_addr0, *pv_cur_pic_addr1;
+ void *pv_nbr_pic_addr0, *pv_nbr_pic_addr1;
+
+ /******************************************************/
+ /* Each iteration of this inner loop computes a HORIZ */
+ /* and a VERT BS value for a 4x4 block */
+ /******************************************************/
+ UWORD32 u4_bs_vert = (pu4_bs_table[i + 4] >> u4_vert_idx) & 0xf;
+ UWORD32 u4_horz_idx = 24 - (i << 3);
+
+ /*****************************************************/
+ /* check if vert Bs for this block is already set */
+ /*****************************************************/
+ if(!u4_bs_vert)
+ {
+ WORD16 i2_left_mv0, i2_left_mv1, i2_left_mv2, i2_left_mv3;
+ /************************************************************/
+ /* If Bs is not set, use left edge and current edge mvs and */
+ /* reference pictures addresses to evaluate Bs==1 */
+ /************************************************************/
+ i2_left_mv0 = ps_left_mv_pred->i2_mv[0];
+ i2_left_mv1 = ps_left_mv_pred->i2_mv[1];
+ i2_left_mv2 = ps_left_mv_pred->i2_mv[2];
+ i2_left_mv3 = ps_left_mv_pred->i2_mv[3];
+
+ i2_cur_mv0 = ps_cur_mv_pred->i2_mv[0];
+ i2_cur_mv1 = ps_cur_mv_pred->i2_mv[1];
+ i16_curMv2 = ps_cur_mv_pred->i2_mv[2];
+ i16_curMv3 = ps_cur_mv_pred->i2_mv[3];
+ i1_cur_ref0 = ps_cur_mv_pred->i1_ref_frame[0];
+ i1_cur_ref1 = ps_cur_mv_pred->i1_ref_frame[1];
+ pv_cur_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_cur_ref0];
+ pv_cur_pic_addr1 = ppv_map_ref_idx_to_poc_l1[i1_cur_ref1];
+
+ if(i)
+ {
+ WORD8 i1_left_ref0, i1_left_ref1;
+ i1_left_ref0 = ps_left_mv_pred->i1_ref_frame[0];
+ i1_left_ref1 = ps_left_mv_pred->i1_ref_frame[1];
+ pv_nbr_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_left_ref0];
+ pv_nbr_pic_addr1 = ppv_map_ref_idx_to_poc_l1[i1_left_ref1];
+ }
+ else
+ {
+ pv_nbr_pic_addr0 = ps_left_addr->u4_add[edge & 2];
+ pv_nbr_pic_addr1 = ps_left_addr->u4_add[1 + (edge & 2)];
+ }
+
+ {
+ UWORD32 u4_bs_temp1, u4_bs_temp2;
+ /*********************************************************/
+ /* If any motion vector component differs by more than 1 */
+ /* integer pel or if reference pictures are different Bs */
+ /* is set to 1. Note that this condition shall be met for*/
+ /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/
+ /*********************************************************/
+
+ u4_bs_temp1 =
+ ((ABS((i2_left_mv0 - i2_cur_mv0))
+ >= 4)
+ | (ABS((i2_left_mv1
+ - i2_cur_mv1))
+ >= i4_ver_mvlimit)
+ | (ABS((i2_left_mv2
+ - i16_curMv2))
+ >= 4)
+ | (ABS((i2_left_mv3
+ - i16_curMv3))
+ >= i4_ver_mvlimit));
+
+ u4_bs_temp2 =
+ ((ABS((i2_left_mv0 - i16_curMv2))
+ >= 4)
+ | (ABS((i2_left_mv1
+ - i16_curMv3))
+ >= i4_ver_mvlimit)
+ | (ABS((i2_left_mv2
+ - i2_cur_mv0))
+ >= 4)
+ | (ABS((i2_left_mv3
+ - i2_cur_mv1))
+ >= i4_ver_mvlimit));
+
+ u4_bs_vert =
+ ((pv_nbr_pic_addr0 != pv_cur_pic_addr0)
+ || (pv_nbr_pic_addr1
+ != pv_cur_pic_addr1)
+ || u4_bs_temp1)
+ && ((pv_nbr_pic_addr0
+ != pv_cur_pic_addr1)
+ || (pv_nbr_pic_addr1
+ != pv_cur_pic_addr0)
+ || u4_bs_temp2);
+
+ pu4_bs_table[i + 4] |= (u4_bs_vert << u4_vert_idx);
+ }
+ }
+
+ /*****************************************************/
+ /* check if horz Bs for this block is already set */
+ /*****************************************************/
+ if(!((u4_bs_horz >> u4_horz_idx) & 0xf))
+ {
+ WORD16 i2_top_mv0, i2_top_mv1, i16_topMv2, i16_topMv3;
+ /************************************************************/
+ /* If Bs is not set, use top edge and current edge mvs and */
+ /* reference pictures addresses to evaluate Bs==1 */
+ /************************************************************/
+ i2_cur_mv0 = ps_cur_mv_pred->i2_mv[0];
+ i2_cur_mv1 = ps_cur_mv_pred->i2_mv[1];
+ i16_curMv2 = ps_cur_mv_pred->i2_mv[2];
+ i16_curMv3 = ps_cur_mv_pred->i2_mv[3];
+ i1_cur_ref0 = ps_cur_mv_pred->i1_ref_frame[0];
+ i1_cur_ref1 = ps_cur_mv_pred->i1_ref_frame[1];
+
+ i2_top_mv0 = ps_top_mv_pred->i2_mv[0];
+ i2_top_mv1 = ps_top_mv_pred->i2_mv[1];
+ i16_topMv2 = ps_top_mv_pred->i2_mv[2];
+ i16_topMv3 = ps_top_mv_pred->i2_mv[3];
+ pv_cur_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_cur_ref0];
+ pv_cur_pic_addr1 = ppv_map_ref_idx_to_poc_l1[i1_cur_ref1];
+ if(edge)
+ {
+ WORD8 i1_top_ref0, i1_top_ref1;
+ i1_top_ref0 = ps_top_mv_pred->i1_ref_frame[0];
+ i1_top_ref1 = ps_top_mv_pred->i1_ref_frame[1];
+ pv_nbr_pic_addr0 = ppv_map_ref_idx_to_poc_l0[i1_top_ref0];
+ pv_nbr_pic_addr1 = ppv_map_ref_idx_to_poc_l1[i1_top_ref1];
+ }
+ else
+ {
+ pv_nbr_pic_addr0 = u4_pic_addrress[i & 2];
+ pv_nbr_pic_addr1 = u4_pic_addrress[1 + (i & 2)];
+ }
+
+ {
+ UWORD32 u4_bs_temp1, u4_bs_temp2;
+ UWORD32 u4_bs;
+ /*********************************************************/
+ /* If any motion vector component differs by more than 1 */
+ /* integer pel or if reference pictures are different Bs */
+ /* is set to 1. Note that this condition shall be met for*/
+ /* both (fwd-fwd,bwd-bwd) and (fwd-bwd,bwd-fwd) direction*/
+ /*********************************************************/
+
+ u4_bs_temp1 =
+ ((ABS((i2_top_mv0 - i2_cur_mv0))
+ >= 4)
+ | (ABS((i2_top_mv1
+ - i2_cur_mv1))
+ >= i4_ver_mvlimit)
+ | (ABS((i16_topMv2
+ - i16_curMv2))
+ >= 4)
+ | (ABS((i16_topMv3
+ - i16_curMv3))
+ >= i4_ver_mvlimit));
+
+ u4_bs_temp2 =
+ ((ABS((i2_top_mv0 - i16_curMv2))
+ >= 4)
+ | (ABS((i2_top_mv1
+ - i16_curMv3))
+ >= i4_ver_mvlimit)
+ | (ABS((i16_topMv2
+ - i2_cur_mv0))
+ >= 4)
+ | (ABS((i16_topMv3
+ - i2_cur_mv1))
+ >= i4_ver_mvlimit));
+
+ u4_bs =
+ ((pv_nbr_pic_addr0 != pv_cur_pic_addr0)
+ || (pv_nbr_pic_addr1
+ != pv_cur_pic_addr1)
+ || u4_bs_temp1)
+ && ((pv_nbr_pic_addr0
+ != pv_cur_pic_addr1)
+ || (pv_nbr_pic_addr1
+ != pv_cur_pic_addr0)
+ || u4_bs_temp2);
+
+ u4_bs_horz |= (u4_bs << u4_horz_idx);
+ }
+ }
+
+ ps_left_mv_pred = ps_cur_mv_pred;
+ }
+
+ pu4_bs_table[edge] = u4_bs_horz;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_fill_bs_xtra_left_edge_cur_fld */
+/* */
+/* Description : This function fills boundray strength (= 2 or 1) for */
+/* xtra left mb edge when cur mb is field and left mb is */
+/* frame. */
+/* Inputs : */
+/* */
+/* Globals : <Does it use any global variables?> */
+/* Processing : */
+/* */
+/* */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 16 10 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+void ih264d_fill_bs_xtra_left_edge_cur_fld(UWORD32 *pu4_bs, /* Base pointer of BS table */
+ WORD32 u4_left_mb_t_csbp, /* left mbpair's top csbp */
+ WORD32 u4_left_mb_b_csbp, /* left mbpair's bottom csbp*/
+ WORD32 u4_cur_mb_csbp, /* csbp of current mb */
+ UWORD32 u4_cur_mb_top /* is top or bottom mb */
+
+ )
+{
+ const UWORD32 *pu4_packed_bs = (const UWORD32 *)gau4_ih264d_packed_bs2;
+ UWORD32 u4_cur, u4_left, u4_or;
+ UNUSED(u4_cur_mb_top);
+
+ PROFILE_DISABLE_BOUNDARY_STRENGTH()
+
+ u4_left_mb_t_csbp = ((u4_left_mb_t_csbp & 0x0008) >> 3)
+ + ((u4_left_mb_t_csbp & 0x0080) >> 6)
+ + ((u4_left_mb_t_csbp & 0x0800) >> 9)
+ + ((u4_left_mb_t_csbp & 0x8000) >> 12);
+
+ u4_left_mb_b_csbp = ((u4_left_mb_b_csbp & 0x0008) << 1)
+ + ((u4_left_mb_b_csbp & 0x0080) >> 2)
+ + ((u4_left_mb_b_csbp & 0x0800) >> 5)
+ + ((u4_left_mb_b_csbp & 0x8000) >> 8);
+
+ /*********************************************************************/
+ /* u4_cur = 0|0|0|0|0|0|0|0|12C|12C|8C|8C|4C|4C|0C|0C */
+ /*********************************************************************/
+ u4_cur = (u4_cur_mb_csbp & 0x0001) + ((u4_cur_mb_csbp & 0x0001) << 1)
+ + ((u4_cur_mb_csbp & 0x0010) >> 2)
+ + ((u4_cur_mb_csbp & 0x0010) >> 1)
+ + ((u4_cur_mb_csbp & 0x0100) >> 4)
+ + ((u4_cur_mb_csbp & 0x0100) >> 3)
+ + ((u4_cur_mb_csbp & 0x1000) >> 6)
+ + ((u4_cur_mb_csbp & 0x1000) >> 5);
+
+ /*********************************************************************/
+ /* u4_left =0|0|0|0|0|0|0|0|15Lb|11Lb|7Lb|3Lb|15Lt|11Lt|7Lt|3Lt */
+ /*********************************************************************/
+ u4_left = u4_left_mb_t_csbp + u4_left_mb_b_csbp;
+
+ u4_or = (u4_cur | u4_left);
+ /*********************************************************************/
+ /* Fill vert edges (4,9) boundary strengths using look up table */
+ /*********************************************************************/
+ pu4_packed_bs += 16;
+ pu4_bs[4] = pu4_packed_bs[u4_or & 0xF];
+ pu4_bs[9] = pu4_packed_bs[(u4_or >> 4)];
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_fill_bs_xtra_left_edge_cur_frm */
+/* */
+/* Description : This function fills boundray strength (= 2 or 1) for */
+/* xtra left mb edge when cur mb is frame and left mb is */
+/* field. */
+/* Inputs : */
+/* */
+/* Globals : <Does it use any global variables?> */
+/* Processing : */
+/* */
+/* */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 16 10 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+void ih264d_fill_bs_xtra_left_edge_cur_frm(UWORD32 *pu4_bs, /* Base pointer of BS table */
+ WORD32 u4_left_mb_t_csbp, /* left mbpair's top csbp */
+ WORD32 u4_left_mb_b_csbp, /* left mbpair's bottom csbp*/
+ WORD32 u4_cur_mb_csbp, /* csbp of current mb */
+ UWORD32 u4_cur_mb_bot /* is top or bottom mb */
+
+ )
+{
+ const UWORD32 *pu4_packed_bs = (const UWORD32 *)gau4_ih264d_packed_bs2;
+ UWORD32 u4_cur, u4_left, u4_or;
+ UWORD32 u4_right_shift = (u4_cur_mb_bot << 3);
+
+ PROFILE_DISABLE_BOUNDARY_STRENGTH()
+
+ u4_left_mb_t_csbp >>= u4_right_shift;
+ u4_left_mb_b_csbp >>= u4_right_shift;
+
+ u4_left_mb_t_csbp = ((u4_left_mb_t_csbp & 0x08) >> 3)
+ + ((u4_left_mb_t_csbp & 0x08) >> 2)
+ + ((u4_left_mb_t_csbp & 0x80) >> 5)
+ + ((u4_left_mb_t_csbp & 0x80) >> 4);
+
+ u4_left_mb_b_csbp = ((u4_left_mb_b_csbp & 0x08) << 1)
+ + ((u4_left_mb_b_csbp & 0x08) << 2)
+ + ((u4_left_mb_b_csbp & 0x80) >> 1)
+ + ((u4_left_mb_b_csbp & 0x80));
+
+ u4_cur = ((u4_cur_mb_csbp & 0x0001)) + ((u4_cur_mb_csbp & 0x0010) >> 3)
+ + ((u4_cur_mb_csbp & 0x0100) >> 6)
+ + ((u4_cur_mb_csbp & 0x1000) >> 9);
+
+ u4_cur += (u4_cur << 4);
+
+ u4_left = u4_left_mb_t_csbp + u4_left_mb_b_csbp;
+
+ u4_or = (u4_cur | u4_left);
+ /*********************************************************************/
+ /* Fill vert edges (4,9) boundary strengths using look up table */
+ /*********************************************************************/
+ pu4_packed_bs += 16;
+ pu4_bs[4] = pu4_packed_bs[u4_or & 0xF];
+ pu4_bs[9] = pu4_packed_bs[(u4_or >> 4)];
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_fill_bs_xtra_top_edge */
+/* */
+/* Description : This function fills boundray strength (= 2 or 1) for */
+/* xtra top mb edge when cur mb is top mb of frame mb pair */
+/* and top mbpair is field coded. */
+/* Inputs : */
+/* */
+/* Globals : <Does it use any global variables?> */
+/* Processing : */
+/* */
+/* */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 16 10 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+void ih264d_fill_bs_xtra_top_edge(UWORD32 *pu4_bs, /* Base pointer of BS table */
+ WORD32 u4_topmb_t_csbp, /* top mbpair's top csbp */
+ WORD32 u4_topmb_b_csbp, /* top mbpair's bottom csbp*/
+ WORD32 u4_cur_mb_csbp /* csbp of current mb */
+
+ )
+{
+ const UWORD32 *pu4_packed_bs = (const UWORD32 *)gau4_ih264d_packed_bs2;
+ UWORD32 u4_or;
+
+ u4_cur_mb_csbp &= 0xf;
+ u4_topmb_t_csbp >>= 12;
+ u4_topmb_b_csbp >>= 12;
+
+ u4_or = (u4_cur_mb_csbp | u4_topmb_t_csbp);
+ /*********************************************************************/
+ /* Fill vert edges (0,8) boundary strengths using look up table */
+ /*********************************************************************/
+ pu4_packed_bs += 16;
+ pu4_bs[8] = pu4_packed_bs[u4_or];
+
+ u4_or = (u4_cur_mb_csbp | u4_topmb_b_csbp);
+ pu4_bs[0] = pu4_packed_bs[u4_or];
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_compute_bs_non_mbaff */
+/* */
+/* Description : This function computes the pointers of left,top & current*/
+/* : Nnz, MvPred & deblk_mb_t and supplies to FillBs function for*/
+/* : Boundary Strength Calculation */
+/* Inputs : <What inputs does the function take?> */
+/* Processing : This functions calls deblock MB in the MB increment order*/
+/* */
+/* Outputs : Produces the Boundary Strength for Current Mb */
+/* Returns : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* ITTIAM */
+/*****************************************************************************/
+
+void ih264d_compute_bs_non_mbaff(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ const UWORD16 u2_mbxn_mb)
+{
+ /* Mvpred and Nnz for top and Courrent */
+ mv_pred_t *ps_cur_mv_pred, *ps_top_mv_pred = NULL, *ps_left_mv_pred;
+ /* deblk_mb_t Params */
+ deblk_mb_t *ps_cur_mb_params; /*< Parameters of current MacroBlock */
+ deblkmb_neighbour_t *ps_deblk_top_mb;
+
+ /* Reference Index to POC mapping*/
+ void ** apv_map_ref_idx_to_poc;
+ UWORD32 u4_leftmbtype;
+
+ UWORD16 u2_left_csbp, u2_top_csbp, u2_cur_csbp;
+
+ /* Set of flags */
+ UWORD32 u4_cur_mb_intra, u1_top_mb_typ, u4_cur_mb_fld;
+ UWORD32 u1_cur_mb_type;
+ UWORD32 * pu4_bs_table;
+
+ /* Neighbour availability */
+ /* Initialization */
+ const UWORD32 u2_mbx = ps_cur_mb_info->u2_mbx;
+ const UWORD32 u2_mby = ps_cur_mb_info->u2_mby;
+ const UWORD32 u1_pingpong = u2_mbx & 0x01;
+
+ PROFILE_DISABLE_BOUNDARY_STRENGTH()
+
+ ps_deblk_top_mb = ps_dec->ps_deblk_top_mb + u2_mbx;
+
+
+ /* Pointer assignment for Current DeblkMB, Current Mv Pred */
+ ps_cur_mb_params = ps_dec->ps_deblk_mbn + u2_mbxn_mb;
+ ps_cur_mv_pred = ps_dec->ps_mv_cur + (u2_mbxn_mb << 4);
+
+ apv_map_ref_idx_to_poc = ps_dec->ppv_map_ref_idx_to_poc + 1;
+ u1_cur_mb_type = ps_cur_mb_params->u1_mb_type;
+ u1_top_mb_typ = ps_deblk_top_mb->u1_mb_type;
+ ps_deblk_top_mb->u1_mb_type = u1_cur_mb_type;
+
+ {
+ UWORD8 mb_qp_temp;
+
+ ps_cur_mb_params->u1_topmb_qp = ps_deblk_top_mb->u1_mb_qp;
+ ps_deblk_top_mb->u1_mb_qp = ps_cur_mb_params->u1_mb_qp;
+
+ ps_cur_mb_params->u1_left_mb_qp = ps_dec->deblk_left_mb[1].u1_mb_qp;
+ ps_dec->deblk_left_mb[1].u1_mb_qp = ps_cur_mb_params->u1_mb_qp;
+
+ }
+
+ /* if no deblocking required for current Mb then continue */
+ /* Check next Mbs in Mb group */
+ if(ps_cur_mb_params->u1_deblocking_mode & MB_DISABLE_FILTERING)
+ {
+ void ** pu4_map_ref_idx_to_poc_l1 = apv_map_ref_idx_to_poc +
+ POC_LIST_L0_TO_L1_DIFF;
+ {
+ /* Store Parameter for Top MvPred refernce frame Address */
+
+ void ** ppv_top_mv_pred_addr = ps_cur_mb_info->ps_curmb->u4_pic_addrress;
+ WORD8 * p1_refTop0 = (ps_cur_mv_pred + 12)->i1_ref_frame;
+ WORD8 * p1_refTop1 = (ps_cur_mv_pred + 14)->i1_ref_frame;
+
+ /* Store Left addresses for Next Mb */
+ void ** ppv_left_mv_pred_addr =
+ ps_dec->ps_left_mvpred_addr[!u1_pingpong][1].u4_add;
+ WORD8 * p1_refleft0 = (ps_cur_mv_pred + 3)->i1_ref_frame;
+
+
+ ppv_top_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refTop0[0]];
+ ppv_top_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refTop0[1]];
+
+ ppv_left_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
+ ppv_top_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
+ ppv_left_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
+ ppv_top_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
+
+ ppv_left_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refleft0[0]];
+ ppv_left_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refleft0[1]];
+ //}
+ /* Storing the leftMbtype for next Mb */
+ ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type;
+ }
+
+ return;
+ }
+
+ /* Flag for extra left Edge */
+ ps_cur_mb_params->u1_single_call = 1;
+
+ /* Update the Left deblk_mb_t and Left MvPred Parameters */
+ if(!u2_mbx)
+ {
+ u4_leftmbtype = 0;
+
+ /* Initialize the ps_left_mv_pred with Junk but Valid Location */
+ /* to avoid invalid memory access */
+ /* this is read only pointer */
+ ps_left_mv_pred = ps_dec->ps_mv_cur + 3;
+ }
+ else
+ {
+ u4_leftmbtype = ps_dec->deblk_left_mb[1].u1_mb_type;
+
+ /* Come to Left Most Edge of the MB */
+ ps_left_mv_pred = (u2_mbxn_mb) ?
+ ps_dec->ps_mv_cur + ((u2_mbxn_mb - 1) << 4) + 3 :
+ ps_dec->ps_mv_left + 3;
+ }
+
+ if(!u2_mby)
+ u1_top_mb_typ = 0;
+
+ /* MvPred Pointer Calculation */
+ /* CHANGED CODE */
+ ps_top_mv_pred = ps_cur_mv_pred - (ps_dec->u2_frm_wd_in_mbs << 4) + 12;
+
+ u4_cur_mb_intra = u1_cur_mb_type & D_INTRA_MB;
+ u4_cur_mb_fld = !!(u1_cur_mb_type & D_FLD_MB);
+ /* Compute BS function */
+ pu4_bs_table = ps_cur_mb_params->u4_bs_table;
+
+ u2_cur_csbp = ps_cur_mb_info->ps_curmb->u2_luma_csbp;
+ u2_left_csbp = ps_cur_mb_info->ps_left_mb->u2_luma_csbp;
+ u2_top_csbp = ps_cur_mb_info->ps_top_mb->u2_luma_csbp;
+ /* Compute BS function */
+ if(ps_dec->ps_cur_sps->u1_profile_idc == HIGH_PROFILE_IDC)
+ {
+ if(ps_cur_mb_info->u1_tran_form8x8 == 1)
+ {
+ u2_cur_csbp = ih264d_update_csbp_8x8(
+ ps_cur_mb_info->ps_curmb->u2_luma_csbp);
+ }
+
+ if(ps_cur_mb_info->ps_left_mb->u1_tran_form8x8 == 1)
+ {
+ u2_left_csbp = ih264d_update_csbp_8x8(
+ ps_cur_mb_info->ps_left_mb->u2_luma_csbp);
+ }
+
+ if(ps_cur_mb_info->ps_top_mb->u1_tran_form8x8 == 1)
+ {
+ u2_top_csbp = ih264d_update_csbp_8x8(
+ ps_cur_mb_info->ps_top_mb->u2_luma_csbp);
+ }
+ }
+ if(u4_cur_mb_intra)
+ {
+
+ pu4_bs_table[4] = 0x04040404;
+ pu4_bs_table[0] = u4_cur_mb_fld ? 0x03030303 : 0x04040404;
+ pu4_bs_table[1] = 0x03030303;
+ pu4_bs_table[2] = 0x03030303;
+ pu4_bs_table[3] = 0x03030303;
+ pu4_bs_table[5] = 0x03030303;
+ pu4_bs_table[6] = 0x03030303;
+ pu4_bs_table[7] = 0x03030303;
+ }
+ else
+ {
+ UWORD32 u4_is_non16x16 = !!(u1_cur_mb_type & D_PRED_NON_16x16);
+ UWORD32 u4_is_b = ps_dec->u1_B;
+
+ ih264d_fill_bs2_horz_vert(
+ pu4_bs_table, u2_left_csbp, u2_top_csbp, u2_cur_csbp,
+ (const UWORD32 *)(gau4_ih264d_packed_bs2),
+ (const UWORD16 *)(gau2_ih264d_4x4_v2h_reorder));
+
+ if(u4_leftmbtype & D_INTRA_MB)
+ pu4_bs_table[4] = 0x04040404;
+
+ if(u1_top_mb_typ & D_INTRA_MB)
+ pu4_bs_table[0] = u4_cur_mb_fld ? 0x03030303 : 0x04040404;
+
+ ps_dec->pf_fill_bs1[u4_is_b][u4_is_non16x16](
+ ps_cur_mv_pred, ps_top_mv_pred, apv_map_ref_idx_to_poc,
+ pu4_bs_table, ps_left_mv_pred,
+ &(ps_dec->ps_left_mvpred_addr[u1_pingpong][1]),
+ ps_cur_mb_info->ps_top_mb->u4_pic_addrress,
+ (4 >> u4_cur_mb_fld));
+ }
+
+ {
+ void ** pu4_map_ref_idx_to_poc_l1 = apv_map_ref_idx_to_poc +
+ POC_LIST_L0_TO_L1_DIFF;
+ {
+ /* Store Parameter for Top MvPred refernce frame Address */
+
+ void ** ppv_top_mv_pred_addr = ps_cur_mb_info->ps_curmb->u4_pic_addrress;
+ WORD8 * p1_refTop0 = (ps_cur_mv_pred + 12)->i1_ref_frame;
+ WORD8 * p1_refTop1 = (ps_cur_mv_pred + 14)->i1_ref_frame;
+
+ /* Store Left addresses for Next Mb */
+ void ** ppv_left_mv_pred_addr =
+ ps_dec->ps_left_mvpred_addr[!u1_pingpong][1].u4_add;
+ WORD8 * p1_refleft0 = (ps_cur_mv_pred + 3)->i1_ref_frame;
+
+ ppv_top_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refTop0[0]];
+ ppv_top_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refTop0[1]];
+
+ ppv_left_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
+ ppv_top_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
+ ppv_left_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
+ ppv_top_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
+
+ ppv_left_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refleft0[0]];
+ ppv_left_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refleft0[1]];
+
+ /* Storing the leftMbtype for next Mb */
+ ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type;
+
+ }
+ }
+
+ /* For transform 8x8 disable deblocking of the intrernal edges of a 8x8 block */
+ if(ps_cur_mb_info->u1_tran_form8x8)
+ {
+ pu4_bs_table[1] = 0;
+ pu4_bs_table[3] = 0;
+ pu4_bs_table[5] = 0;
+ pu4_bs_table[7] = 0;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_compute_bs_mbaff */
+/* */
+/* Description : This function computes the pointers of left,top & current*/
+/* : Nnz, MvPred & deblk_mb_t and supplies to FillBs function for*/
+/* : Boundary Strength Calculation */
+/* Inputs : <What inputs does the function take?> */
+/* Processing : This functions calls deblock MB in the MB increment order*/
+/* */
+/* Outputs : Produces the Boundary Strength for Current Mb */
+/* Returns : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* ITTIAM */
+/*****************************************************************************/
+
+void ih264d_compute_bs_mbaff(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ const UWORD16 u2_mbxn_mb)
+{
+ /* Mvpred and Nnz for top and Courrent */
+ mv_pred_t *ps_cur_mv_pred, *ps_top_mv_pred = NULL, *ps_left_mv_pred;
+ /* deblk_mb_t Params */
+ deblk_mb_t *ps_cur_mb_params; /*< Parameters of current MacroBlock */
+ neighbouradd_t * ps_left_ngbr;
+ deblkmb_neighbour_t *ps_deblk_top_mb;
+ /* Reference Index to POC mapping*/
+ void ** apv_map_ref_idx_to_poc;
+
+ UWORD32 u4_leftmbtype;
+
+
+ UWORD16 u2_left_csbp, u2_top_csbp, u2_cur_csbp;
+
+ /* Set of flags */
+ UWORD32 u4_cur_mb_intra, u4_cur_mb_fld, u4_top_mb_fld, u1_top_mb_typ, u4_left_mb_fld;
+ UWORD32 u1_cur_mb_type;
+ UWORD32 * pu4_bs_table;
+ const UWORD32 u4_bot_mb = (1 - ps_cur_mb_info->u1_topmb);
+ /* Initialization */
+ const UWORD32 u2_mbx = ps_cur_mb_info->u2_mbx;
+ const UWORD32 u2_mby = ps_cur_mb_info->u2_mby;
+ /* Load From u1_pingpong and Store in !u1_pingpong */
+ const UWORD32 u1_pingpong = u2_mbx & 0x01;
+
+ PROFILE_DISABLE_BOUNDARY_STRENGTH()
+
+ ps_deblk_top_mb = ps_dec->ps_deblk_top_mb + (u2_mbx << 1);
+
+
+ /************************************************/
+ /* Initialize the left Mb type */
+ /* Left MvPred */
+ /************************************************/
+
+ if(!u2_mbx)
+ {
+ /************************************************************/
+ /* Initialize the ps_left_mv_pred with Junk but Valid Location */
+ /* to avoid invalid memory access */
+ /* this is read only pointer */
+ /************************************************************/
+ ps_left_mv_pred = ps_dec->ps_mv_cur + 16;
+ }
+ else
+ {
+ /* Come to Left Most Edge of the MB */
+ ps_left_mv_pred = (u2_mbxn_mb) ?
+ ps_dec->ps_mv_cur + ((u2_mbxn_mb - 1) << 5) + 3 :
+ ps_dec->ps_mv_left + 3;
+
+ ps_left_mv_pred += (u4_bot_mb << 4);
+ }
+
+ u4_leftmbtype = ps_dec->deblk_left_mb[u4_bot_mb].u1_mb_type;
+
+ ps_left_ngbr = &(ps_dec->ps_left_mvpred_addr[u1_pingpong][u4_bot_mb]);
+
+ /************************************************/
+ /* Pointer Assignment for Current Mb Parameters */
+ /* Pointer Assignment for Current MvPred */
+ /************************************************/
+ ps_cur_mb_params = ps_dec->ps_deblk_mbn + (u2_mbxn_mb << 1) + u4_bot_mb;
+ u1_cur_mb_type = ps_cur_mb_params->u1_mb_type;
+
+ ps_cur_mv_pred = ps_dec->ps_mv_cur + (u2_mbxn_mb << 5);
+ ps_cur_mv_pred += (u4_bot_mb << 4);
+
+ /********************************************/
+ /* Pointer Assignment for Top Mb Parameters */
+ /* Pointer Assignment for Top MvPred and */
+ /* Pointer Assignment for Top Nnz */
+ /********************************************/
+
+ /* CHANGED CODE */
+ ps_top_mv_pred = ps_cur_mv_pred - (ps_dec->u2_frm_wd_in_mbs << 5) + 12;
+
+ u4_cur_mb_fld = !!(u1_cur_mb_type & D_FLD_MB);
+ u4_left_mb_fld = !!(ps_dec->deblk_left_mb[0].u1_mb_type & D_FLD_MB);
+
+ if(u4_left_mb_fld != u4_cur_mb_fld)
+ {
+ /* Flag for extra left Edge */
+ ps_cur_mb_params->u1_single_call = 0;
+
+ if(u4_bot_mb)
+ {
+ ps_left_ngbr--;
+ ps_left_mv_pred -= 16;
+ }
+ }
+ else
+ ps_cur_mb_params->u1_single_call = 1;
+
+ apv_map_ref_idx_to_poc = ps_dec->ppv_map_ref_idx_to_poc + 1;
+ if(u4_cur_mb_fld)
+ {
+ if(u4_bot_mb)
+ {
+ apv_map_ref_idx_to_poc += BOT_LIST_FLD_L0;
+ }
+ else
+ {
+ apv_map_ref_idx_to_poc += TOP_LIST_FLD_L0;
+ }
+ }
+
+ /**********************************************************/
+ /* if no deblocking required for current Mb then continue */
+ /**********************************************************/
+ if(ps_cur_mb_params->u1_deblocking_mode & MB_DISABLE_FILTERING)
+ {
+ void ** pu4_map_ref_idx_to_poc_l1 = apv_map_ref_idx_to_poc +
+ POC_LIST_L0_TO_L1_DIFF;
+
+ {
+ /* Store Parameter for Top MvPred refernce frame Address */
+
+ void ** ppv_top_mv_pred_addr = ps_cur_mb_info->ps_curmb->u4_pic_addrress;
+ void ** ppv_left_mv_pred_addr =
+ ps_dec->ps_left_mvpred_addr[!u1_pingpong][u4_bot_mb].u4_add;
+ WORD8 * p1_refTop0 = (ps_cur_mv_pred + 12)->i1_ref_frame;
+ WORD8 * p1_refTop1 = (ps_cur_mv_pred + 14)->i1_ref_frame;
+ WORD8 * p1_refLeft0 = (ps_cur_mv_pred + 3)->i1_ref_frame;
+ ppv_top_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refTop0[0]];
+ ppv_top_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refTop0[1]];
+ ppv_left_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
+ ppv_top_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
+ ppv_left_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
+ ppv_top_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
+ ppv_left_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refLeft0[0]];
+ ppv_left_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refLeft0[1]];
+ }
+ if(u4_bot_mb)
+ {
+ /* store The Left Mb Type*/
+ ps_dec->deblk_left_mb[0].u1_mb_type =
+ (ps_cur_mb_params - 1)->u1_mb_type;
+ ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type;
+
+ }
+ ps_deblk_top_mb[u4_bot_mb].u1_mb_type = u1_cur_mb_type;
+ return;
+ }
+
+ if(u2_mby)
+ {
+ u1_top_mb_typ = ps_deblk_top_mb[1].u1_mb_type;
+ u4_top_mb_fld = !!(u1_top_mb_typ & D_FLD_MB);
+
+ if(!u4_bot_mb)
+ {
+ if(u4_top_mb_fld & u4_cur_mb_fld)
+ u1_top_mb_typ = ps_deblk_top_mb[0].u1_mb_type;
+ else
+ {
+ ps_top_mv_pred += 16;
+ }
+ }
+ }
+ else
+ {
+ u4_top_mb_fld = u4_cur_mb_fld;
+ u1_top_mb_typ = 0;
+ }
+
+ if(u4_bot_mb & !u4_cur_mb_fld)
+ {
+ u1_top_mb_typ = ps_deblk_top_mb[0].u1_mb_type;
+ u4_top_mb_fld = u4_cur_mb_fld;
+ ps_top_mv_pred = ps_cur_mv_pred - 4;
+ }
+
+ pu4_bs_table = ps_cur_mb_params->u4_bs_table;
+ u4_cur_mb_intra = u1_cur_mb_type & D_INTRA_MB;
+
+ u2_cur_csbp = ps_cur_mb_info->ps_curmb->u2_luma_csbp;
+ u2_left_csbp = ps_cur_mb_info->ps_left_mb->u2_luma_csbp;
+ u2_top_csbp = ps_cur_mb_info->ps_top_mb->u2_luma_csbp;
+ /* Compute BS function */
+ if(ps_dec->ps_cur_sps->u1_profile_idc == HIGH_PROFILE_IDC)
+ {
+
+ if(ps_cur_mb_info->u1_tran_form8x8 == 1)
+ {
+ u2_cur_csbp = ih264d_update_csbp_8x8(
+ ps_cur_mb_info->ps_curmb->u2_luma_csbp);
+ }
+
+ if(ps_cur_mb_info->ps_left_mb->u1_tran_form8x8 == 1)
+ {
+ u2_left_csbp = ih264d_update_csbp_8x8(
+ ps_cur_mb_info->ps_left_mb->u2_luma_csbp);
+ }
+
+ if(ps_cur_mb_info->ps_top_mb->u1_tran_form8x8 == 1)
+ {
+ u2_top_csbp = ih264d_update_csbp_8x8(
+ ps_cur_mb_info->ps_top_mb->u2_luma_csbp);
+ }
+ }
+ if(u4_cur_mb_intra)
+ {
+
+ pu4_bs_table[4] = 0x04040404;
+ if((0 == u4_cur_mb_fld) && (0 == u4_top_mb_fld))
+ {
+ pu4_bs_table[0] = 0x04040404;
+ }
+ else
+ {
+ pu4_bs_table[0] = 0x03030303;
+ }
+
+ pu4_bs_table[1] = 0x03030303;
+ pu4_bs_table[2] = 0x03030303;
+ pu4_bs_table[3] = 0x03030303;
+ pu4_bs_table[5] = 0x03030303;
+ pu4_bs_table[6] = 0x03030303;
+ pu4_bs_table[7] = 0x03030303;
+
+ /*********************************************************************/
+ /* Fill Bs of xtra top and left edge unconditionally to avoid checks */
+ /*********************************************************************/
+ pu4_bs_table[8] = 0x03030303;
+ pu4_bs_table[9] = 0x04040404;
+ }
+ else
+ {
+ UWORD32 u4_is_non16x16 = !!(u1_cur_mb_type & D_PRED_NON_16x16);
+ UWORD32 u4_is_b = ps_dec->u1_B;
+
+ ih264d_fill_bs2_horz_vert(
+ pu4_bs_table, u2_left_csbp, u2_top_csbp, u2_cur_csbp,
+ (const UWORD32 *)(gau4_ih264d_packed_bs2),
+ (const UWORD16 *)(gau2_ih264d_4x4_v2h_reorder));
+
+ if(u4_leftmbtype & D_INTRA_MB)
+ pu4_bs_table[4] = 0x04040404;
+
+ if(u1_top_mb_typ & D_INTRA_MB)
+ pu4_bs_table[0] = u4_cur_mb_fld ? 0x03030303 : 0x04040404;
+ else if(u4_cur_mb_fld != u4_top_mb_fld)
+ {
+ /****************************************************/
+ /* Setting BS for mixed mode edge=1 when (Bs!=2) */
+ /****************************************************/
+ pu4_bs_table[0] = (pu4_bs_table[0] >> 1) + 0x01010101;
+ }
+
+ {
+ /* Call to Compute Boundary Strength for Extra Left Edge */
+ if(u2_mbx
+ && !(ps_cur_mb_params->u1_deblocking_mode
+ & MB_DISABLE_LEFT_EDGE))
+ {
+ if(u4_cur_mb_fld != u4_left_mb_fld)
+ {
+ UWORD32 u4_left_mb_t_csbp =
+ ps_cur_mb_info->ps_left_mb[0].u2_luma_csbp;
+ UWORD32 u4_left_mb_b_csbp =
+ ps_cur_mb_info->ps_left_mb[1].u2_luma_csbp;
+ if(1 == ps_cur_mb_info->ps_left_mb[0].u1_tran_form8x8)
+ {
+ u4_left_mb_t_csbp = (UWORD32)ih264d_update_csbp_8x8(
+ (UWORD16)u4_left_mb_t_csbp);
+ }
+
+ if(1 == ps_cur_mb_info->ps_left_mb[1].u1_tran_form8x8)
+ {
+ u4_left_mb_b_csbp = (UWORD32)ih264d_update_csbp_8x8(
+ (UWORD16)u4_left_mb_b_csbp);
+ }
+ ps_dec->pf_fill_bs_xtra_left_edge[u4_cur_mb_fld](
+ pu4_bs_table, u4_left_mb_t_csbp,
+ u4_left_mb_b_csbp, u2_cur_csbp, u4_bot_mb);
+
+ if(ps_dec->deblk_left_mb[0].u1_mb_type & D_INTRA_MB)
+ pu4_bs_table[4] = 0x04040404;
+
+ if(ps_dec->deblk_left_mb[1].u1_mb_type & D_INTRA_MB)
+ pu4_bs_table[9] = 0x04040404;
+
+ }
+ }
+ /* Call to Compute Boundary Strength for Extra Top Edge */
+ if(u2_mby
+ && !(ps_cur_mb_params->u1_deblocking_mode
+ & MB_DISABLE_TOP_EDGE))
+ {
+ if((((!u4_bot_mb) & (!u4_cur_mb_fld)) && u4_top_mb_fld))
+ {
+ UWORD32 u4_topmb_t_csbp =
+ ps_cur_mb_info->ps_top_mb[-1].u2_luma_csbp;
+ UWORD32 u4_topmb_b_csbp =
+ ps_cur_mb_info->ps_top_mb[0].u2_luma_csbp;
+ if(1 == ps_cur_mb_info->ps_top_mb[-1].u1_tran_form8x8)
+ {
+ u4_topmb_t_csbp = (UWORD32)ih264d_update_csbp_8x8(
+ (UWORD16)u4_topmb_t_csbp);
+ }
+
+ if(1 == ps_cur_mb_info->ps_top_mb[0].u1_tran_form8x8)
+ {
+ u4_topmb_b_csbp = (UWORD32)ih264d_update_csbp_8x8(
+ (UWORD16)u4_topmb_b_csbp);
+ }
+ ih264d_fill_bs_xtra_top_edge(pu4_bs_table, u4_topmb_t_csbp,
+ u4_topmb_b_csbp, u2_cur_csbp);
+
+ if(ps_deblk_top_mb[0].u1_mb_type & D_INTRA_MB)
+ pu4_bs_table[8] = 0x03030303;
+
+ if(ps_deblk_top_mb[1].u1_mb_type & D_INTRA_MB)
+ pu4_bs_table[0] = 0x03030303;
+ }
+ }
+ }
+
+ ps_dec->pf_fill_bs1[u4_is_b][u4_is_non16x16](
+ ps_cur_mv_pred, ps_top_mv_pred, apv_map_ref_idx_to_poc,
+ pu4_bs_table, ps_left_mv_pred, ps_left_ngbr,
+ ps_cur_mb_info->ps_top_mb->u4_pic_addrress,
+ (4 >> u4_cur_mb_fld));
+ }
+
+ {
+ void ** pu4_map_ref_idx_to_poc_l1 = apv_map_ref_idx_to_poc +
+ POC_LIST_L0_TO_L1_DIFF;
+
+ {
+ /* Store Parameter for Top MvPred refernce frame Address */
+ void ** ppv_top_mv_pred_addr = ps_cur_mb_info->ps_curmb->u4_pic_addrress;
+ void ** ppv_left_mv_pred_addr =
+ ps_dec->ps_left_mvpred_addr[!u1_pingpong][u4_bot_mb].u4_add;
+ WORD8 * p1_refTop0 = (ps_cur_mv_pred + 12)->i1_ref_frame;
+ WORD8 * p1_refTop1 = (ps_cur_mv_pred + 14)->i1_ref_frame;
+ WORD8 * p1_refLeft0 = (ps_cur_mv_pred + 3)->i1_ref_frame;
+ ppv_top_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refTop0[0]];
+ ppv_top_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refTop0[1]];
+ ppv_left_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
+ ppv_top_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
+ ppv_left_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
+ ppv_top_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
+ ppv_left_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refLeft0[0]];
+ ppv_left_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refLeft0[1]];
+ }
+ if(u4_bot_mb)
+ {
+ /* store The Left Mb Type*/
+ ps_dec->deblk_left_mb[0].u1_mb_type =
+ (ps_cur_mb_params - 1)->u1_mb_type;
+ ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type;
+
+ }
+ ps_deblk_top_mb[u4_bot_mb].u1_mb_type = u1_cur_mb_type;
+ }
+ /* For transform 8x8 disable deblocking of the intrernal edges of a 8x8 block */
+ if(ps_cur_mb_info->u1_tran_form8x8)
+ {
+ pu4_bs_table[1] = 0;
+ pu4_bs_table[3] = 0;
+ pu4_bs_table[5] = 0;
+ pu4_bs_table[7] = 0;
+ }
+
+}
+
+
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_fill_bs_for_mb \endif
+ *
+ * \brief
+ * Determines the boundary strength (Bs), for the complete MB. Bs is
+ * determined for each block boundary between two neighbouring 4x4
+ * luma blocks, then packed in a UWORD32, first Bs placed in MSB and
+ * so on. Such packed Bs values for all 8 edges are kept in an array.
+ *
+ * \return
+ * Returns the packed boundary strength(Bs) MSB -> LSB Bs0|Bs1|Bs2|Bs3
+ *
+ **************************************************************************
+ */
+
+void ih264d_fill_bs_for_mb(deblk_mb_t * ps_cur_mb_params,
+ deblk_mb_t * ps_top_mb_params,
+ deblk_mb_t * ps_left_mb_params,
+ mv_pred_t *ps_cur_mv_pred,
+ mv_pred_t *ps_top_mv_pred,
+ UWORD8 *puc_cur_nnz,
+ UWORD8 *puc_top_nnz,
+ void **ppv_map_ref_idx_to_poc,
+ UWORD32 ui_mbAff,
+ UWORD32 ui_bs_table[], /* pointer to the BsTable array */
+ mv_pred_t *ps_leftmost_mv_pred,
+ neighbouradd_t *ps_left_addr,
+ neighbouradd_t *ps_top_add)
+{
+ UWORD32 u4_bs_horz = 0;
+ UWORD8 edge, u1_top_intra = 0, u1_left_intra = 0;
+ mv_pred_t *ps_left_mv_pred;
+ WORD16 i2_cur_mv0, i2_cur_mv1, i16_curMv2, i16_curMv3;
+ WORD16 i2_left_mv0, i2_left_mv1, i2_left_mv2, i2_left_mv3;
+ WORD16 i2_top_mv0, i2_top_mv1, i16_topMv2, i16_topMv3;
+ WORD8 i1_cur_ref0, i1_cur_ref1, i1_left_ref0, i1_left_ref1, i1_top_ref0, i1_top_ref1;
+ UWORD8 uc_cur_nnz, uc_left_nnz, uc_top_nnz, u1_mb_type, uc_Bslice;
+ void **ppv_map_ref_idx_to_poc_l0, **ppv_map_ref_idx_to_poc_l1;
+ UWORD8 uc_temp;
+ UWORD8 uc_cur_mb_fld, uc_top_mb_fld;
+ UWORD32 c_mv_limit;
+
+ u1_mb_type = ps_cur_mb_params->u1_mb_type;
+ uc_Bslice = u1_mb_type & D_B_SLICE;
+ ppv_map_ref_idx_to_poc_l0 = ppv_map_ref_idx_to_poc;
+ ppv_map_ref_idx_to_poc_l1 = ppv_map_ref_idx_to_poc + POC_LIST_L0_TO_L1_DIFF;
+
+ ps_top_mb_params = ps_top_mb_params ? ps_top_mb_params : ps_cur_mb_params;
+ u1_top_intra = ps_top_mb_params->u1_mb_type & D_INTRA_MB;
+ u1_left_intra = ps_left_mb_params->u1_mb_type & D_INTRA_MB;
+
+ ui_bs_table[4] = 0x04040404; //Default for INTRA MB Boundary edges.
+ uc_cur_mb_fld = (ps_cur_mb_params->u1_mb_type & D_FLD_MB) >> 7;
+ uc_top_mb_fld = (ps_top_mb_params->u1_mb_type & D_FLD_MB) >> 7;
+
+ c_mv_limit = 4 >> uc_cur_mb_fld;
+ if((0 == uc_cur_mb_fld) && (0 == uc_top_mb_fld))
+ {
+ ui_bs_table[0] = 0x04040404;
+ }
+ else
+ {
+ ui_bs_table[0] = 0x03030303;
+ }
+
+ for(edge = 0; edge < 4;
+ edge++, ps_top_mv_pred = ps_cur_mv_pred - 4, puc_top_nnz =
+ puc_cur_nnz - 4)
+ {
+ //Each iteration of this loop fills the four BS values of one HORIZ edge and
+ //one BS value for each of the four VERT edges.
+ WORD8 i = 0;
+ UWORD8 uc_bs_horiz, uc_bs_vert;
+ UWORD32 ui_cnd;
+ void *ui_ref_pic_addr[4];
+ UWORD8 uc_mixed_mode_edge;
+
+ uc_mixed_mode_edge = 0;
+
+ uc_temp = (ui_mbAff << 4) + 13;
+
+ uc_cur_nnz = *(puc_cur_nnz - uc_temp);
+ ps_left_mv_pred = ps_leftmost_mv_pred + (edge << 2);
+
+ for(i = 0; i < 4; i++, ps_top_mv_pred++, ps_cur_mv_pred++)
+ {
+ //Each iteration of this inner loop computes a HORIZ
+ //and a VERT BS value for a 4x4 block
+
+ uc_left_nnz = uc_cur_nnz;
+ uc_cur_nnz = *puc_cur_nnz++;
+ uc_top_nnz = *puc_top_nnz++;
+
+ //VERT edge is assigned BS values first
+ ui_cnd = !(uc_left_nnz || uc_cur_nnz);
+ uc_bs_vert = 2;
+
+ if(ui_cnd)
+ {
+ i2_left_mv0 = ps_left_mv_pred->i2_mv[0];
+ i2_left_mv1 = ps_left_mv_pred->i2_mv[1];
+ i2_left_mv2 = ps_left_mv_pred->i2_mv[2];
+ i2_left_mv3 = ps_left_mv_pred->i2_mv[3];
+
+ i2_cur_mv0 = ps_cur_mv_pred->i2_mv[0];
+ i2_cur_mv1 = ps_cur_mv_pred->i2_mv[1];
+ i16_curMv2 = ps_cur_mv_pred->i2_mv[2];
+ i16_curMv3 = ps_cur_mv_pred->i2_mv[3];
+ i1_cur_ref0 = ps_cur_mv_pred->i1_ref_frame[0];
+ i1_cur_ref1 = ps_cur_mv_pred->i1_ref_frame[1];
+ ui_ref_pic_addr[2] = ppv_map_ref_idx_to_poc_l0[i1_cur_ref0];
+ ui_ref_pic_addr[3] = ppv_map_ref_idx_to_poc_l1[i1_cur_ref1];
+
+ if(i)
+ {
+ i1_left_ref0 = ps_left_mv_pred->i1_ref_frame[0];
+ i1_left_ref1 = ps_left_mv_pred->i1_ref_frame[1];
+ ui_ref_pic_addr[0] = ppv_map_ref_idx_to_poc_l0[i1_left_ref0];
+ ui_ref_pic_addr[1] = ppv_map_ref_idx_to_poc_l1[i1_left_ref1];
+ }
+ else
+ {
+ ui_ref_pic_addr[0] = ps_left_addr->u4_add[edge & 2];
+ ui_ref_pic_addr[1] = ps_left_addr->u4_add[1 + (edge & 2)];
+ }
+ if(!uc_Bslice)
+ {
+ uc_bs_vert =
+ (ui_ref_pic_addr[0] != ui_ref_pic_addr[2])
+ | (ABS((i2_left_mv0
+ - i2_cur_mv0))
+ >= 4)
+ | (ABS((i2_left_mv1
+ - i2_cur_mv1))
+ >= (UWORD8)c_mv_limit);
+ }
+ else
+ {
+ UWORD8 uc_bs_temp1, uc_bs_temp2;
+
+ uc_bs_vert = 1;
+
+ uc_bs_temp1 =
+ ((ABS((i2_left_mv0 - i2_cur_mv0))
+ >= 4)
+ | (ABS((i2_left_mv1
+ - i2_cur_mv1))
+ >= (UWORD8)c_mv_limit)
+ | (ABS((i2_left_mv2
+ - i16_curMv2))
+ >= 4)
+ | (ABS((i2_left_mv3
+ - i16_curMv3))
+ >= (UWORD8)c_mv_limit));
+
+ uc_bs_temp2 =
+ ((ABS((i2_left_mv0 - i16_curMv2))
+ >= 4)
+ | (ABS((i2_left_mv1
+ - i16_curMv3))
+ >= (UWORD8)c_mv_limit)
+ | (ABS((i2_left_mv2
+ - i2_cur_mv0))
+ >= 4)
+ | (ABS((i2_left_mv3
+ - i2_cur_mv1))
+ >= (UWORD8)c_mv_limit));
+
+ uc_bs_vert =
+ (((ui_ref_pic_addr[0] != ui_ref_pic_addr[2])
+ || (ui_ref_pic_addr[1]
+ != ui_ref_pic_addr[3]))
+ || (uc_bs_temp1))
+ && (((ui_ref_pic_addr[0]
+ != ui_ref_pic_addr[3])
+ || (ui_ref_pic_addr[1]
+ != ui_ref_pic_addr[2]))
+ || (uc_bs_temp2));
+
+ }
+ }
+ //Fill the VERT BS, only if valid i.e.,
+ //if it is a non-edge OR it is an edge, which is not yet filled
+ uc_bs_vert = (!i && u1_left_intra) ? 4 : uc_bs_vert;
+ ui_bs_table[i + 4] = (ui_bs_table[i + 4] << 8) | uc_bs_vert;
+
+ //HORIZ edge is assigned BS values next
+ ui_cnd = !(uc_top_nnz || uc_cur_nnz);
+ uc_bs_horiz = 2;
+
+ if(ui_cnd)
+ {
+ uc_mixed_mode_edge =
+ (0 == edge) ? (uc_top_mb_fld != uc_cur_mb_fld) : 0;
+ ui_cnd = 1 - uc_mixed_mode_edge;
+ uc_bs_horiz = uc_mixed_mode_edge;
+ }
+
+ if(ui_cnd)
+ {
+ i2_cur_mv0 = ps_cur_mv_pred->i2_mv[0];
+ i2_cur_mv1 = ps_cur_mv_pred->i2_mv[1];
+ i16_curMv2 = ps_cur_mv_pred->i2_mv[2];
+ i16_curMv3 = ps_cur_mv_pred->i2_mv[3];
+ i1_cur_ref0 = ps_cur_mv_pred->i1_ref_frame[0];
+ i1_cur_ref1 = ps_cur_mv_pred->i1_ref_frame[1];
+
+ i2_top_mv0 = ps_top_mv_pred->i2_mv[0];
+ i2_top_mv1 = ps_top_mv_pred->i2_mv[1];
+ i16_topMv2 = ps_top_mv_pred->i2_mv[2];
+ i16_topMv3 = ps_top_mv_pred->i2_mv[3];
+ ui_ref_pic_addr[2] = ppv_map_ref_idx_to_poc_l0[i1_cur_ref0];
+ ui_ref_pic_addr[3] = ppv_map_ref_idx_to_poc_l1[i1_cur_ref1];
+ if(edge)
+ {
+ i1_top_ref0 = ps_top_mv_pred->i1_ref_frame[0];
+ i1_top_ref1 = ps_top_mv_pred->i1_ref_frame[1];
+ ui_ref_pic_addr[0] = ppv_map_ref_idx_to_poc_l0[i1_top_ref0];
+ ui_ref_pic_addr[1] = ppv_map_ref_idx_to_poc_l1[i1_top_ref1];
+ }
+ else
+ {
+ ui_ref_pic_addr[0] = ps_top_add->u4_add[i & 2];
+ ui_ref_pic_addr[1] = ps_top_add->u4_add[1 + (i & 2)];
+ }
+ if(!uc_Bslice)
+ {
+ uc_bs_horiz =
+ (ui_ref_pic_addr[0] != ui_ref_pic_addr[2])
+ | (ABS((i2_top_mv0
+ - i2_cur_mv0))
+ >= 4)
+ | (ABS((i2_top_mv1
+ - i2_cur_mv1))
+ >= (UWORD8)c_mv_limit);
+ }
+ else
+ {
+ UWORD8 uc_bs_temp1, uc_bs_temp2;
+
+ uc_bs_horiz = 1;
+
+ uc_bs_temp1 =
+ ((ABS((i2_top_mv0 - i2_cur_mv0))
+ >= 4)
+ | (ABS((i2_top_mv1
+ - i2_cur_mv1))
+ >= (UWORD8)c_mv_limit)
+ | (ABS((i16_topMv2
+ - i16_curMv2))
+ >= 4)
+ | (ABS((i16_topMv3
+ - i16_curMv3))
+ >= (UWORD8)c_mv_limit));
+
+ uc_bs_temp2 =
+ ((ABS((i2_top_mv0 - i16_curMv2))
+ >= 4)
+ | (ABS((i2_top_mv1
+ - i16_curMv3))
+ >= (UWORD8)c_mv_limit)
+ | (ABS((i16_topMv2
+ - i2_cur_mv0))
+ >= 4)
+ | (ABS((i16_topMv3
+ - i2_cur_mv1))
+ >= (UWORD8)c_mv_limit));
+
+ uc_bs_horiz =
+ (((ui_ref_pic_addr[0] != ui_ref_pic_addr[2])
+ || (ui_ref_pic_addr[1]
+ != ui_ref_pic_addr[3]))
+ || (uc_bs_temp1))
+ && (((ui_ref_pic_addr[0]
+ != ui_ref_pic_addr[3])
+ || (ui_ref_pic_addr[1]
+ != ui_ref_pic_addr[2]))
+ || (uc_bs_temp2));
+
+ }
+ }
+ ps_left_mv_pred = ps_cur_mv_pred;
+ u4_bs_horz = (u4_bs_horz << 8) + uc_bs_horiz;
+ }
+ //Fill the HORIZ BS, only if valid i.e.,
+ //if it is a non-edge OR it is an edge, which is not yet filled
+ if(edge || (!edge && !u1_top_intra))
+ ui_bs_table[edge] = u4_bs_horz;
+ }
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_fill_bs_for_extra_left_edge \endif
+ *
+ * \brief
+ * Fills the boundary strength (Bs), for the top extra edge. ock
+ *
+ * \return
+ * Returns the packed boundary strength(Bs) MSB -> LSB Bs0|Bs1|Bs2|Bs3
+ *
+ **************************************************************************
+ */
+void ih264d_fill_bs_for_extra_left_edge(deblk_mb_t *ps_cur_deblk_mb,
+ deblk_mb_t *ps_leftDeblkMb,
+ UWORD8* puc_cur_nnz,
+ UWORD8 uc_botMb)
+{
+ /* Set the Flag in uc_deblocking_mode variable of current MB*/
+ /* for mixed mode edge*/
+ ps_cur_deblk_mb->u1_single_call = 0;
+
+ if(ps_cur_deblk_mb->u1_mb_type & D_INTRA_MB)
+ {
+ ps_cur_deblk_mb->u4_bs_table[4] = 0x04040404;
+ ps_cur_deblk_mb->u4_bs_table[9] = 0x04040404;
+ }
+ else if((ps_leftDeblkMb->u1_mb_type & D_INTRA_MB)
+ && ((ps_leftDeblkMb + 1)->u1_mb_type & D_INTRA_MB))
+ {
+ ps_cur_deblk_mb->u4_bs_table[4] = 0x04040404;
+ ps_cur_deblk_mb->u4_bs_table[9] = 0x04040404;
+ }
+ else
+ {
+ /* Get strengths of left MB edge */
+ UWORD32 u4_bs;
+ UWORD8 uc_Bs;
+ WORD32 i;
+ UWORD32 ui_curMbFld;
+ UWORD8 *puc_left_nnz;
+ UWORD32 ui_bs_left_edge[2];
+
+ ui_curMbFld = (ps_cur_deblk_mb->u1_mb_type & D_FLD_MB) >> 7;
+
+ puc_left_nnz = puc_cur_nnz - 29;
+ if((ui_curMbFld == 0) && uc_botMb)
+ {
+ puc_left_nnz -= 8;
+ }
+ else if(ui_curMbFld && uc_botMb)
+ {
+ puc_left_nnz -= 16;
+ }
+
+ if(ui_curMbFld)
+ {
+ if(ps_leftDeblkMb->u1_mb_type & D_INTRA_MB)
+ {
+ ui_bs_left_edge[0] = 0x04040404;
+ puc_left_nnz += 16;
+ puc_cur_nnz += 8;
+ }
+ else
+ {
+ u4_bs = 0;
+ for(i = 4; i > 0; i--)
+ {
+ uc_Bs = ((*puc_cur_nnz || *puc_left_nnz)) ? 2 : 1;
+ u4_bs = (u4_bs << 8) | uc_Bs;
+ puc_left_nnz += 4;
+ if(i & 0x01)
+ puc_cur_nnz += 4;
+ }
+ ui_bs_left_edge[0] = u4_bs;
+ }
+
+ if((ps_leftDeblkMb + 1)->u1_mb_type & D_INTRA_MB)
+ {
+ ui_bs_left_edge[1] = 0x04040404;
+ }
+ else
+ {
+ u4_bs = 0;
+ for(i = 4; i > 0; i--)
+ {
+ uc_Bs = ((*puc_cur_nnz || *puc_left_nnz)) ? 2 : 1;
+ u4_bs = (u4_bs << 8) | uc_Bs;
+ puc_left_nnz += 4;
+ if(i & 0x01)
+ puc_cur_nnz += 4;
+ }
+ ui_bs_left_edge[1] = u4_bs;
+ }
+ }
+ else
+ {
+ UWORD8 *puc_curNnzB, *puc_leftNnzB;
+ puc_curNnzB = puc_cur_nnz;
+ puc_leftNnzB = puc_left_nnz + 16;
+ if(ps_leftDeblkMb->u1_mb_type & D_INTRA_MB)
+ {
+ ui_bs_left_edge[0] = 0x04040404;
+ }
+ else
+ {
+ u4_bs = 0;
+ for(i = 4; i > 0; i--, puc_cur_nnz += 4)
+ {
+ uc_Bs = ((*puc_cur_nnz || *puc_left_nnz)) ? 2 : 1;
+ u4_bs = (u4_bs << 8) | uc_Bs;
+ if(i & 0x01)
+ puc_left_nnz += 4;
+ }
+ ui_bs_left_edge[0] = u4_bs;
+ }
+
+ if((ps_leftDeblkMb + 1)->u1_mb_type & D_INTRA_MB)
+ {
+ ui_bs_left_edge[1] = 0x04040404;
+ }
+ else
+ {
+ u4_bs = 0;
+ for(i = 4; i > 0; i--, puc_curNnzB += 4)
+ {
+ uc_Bs = ((*puc_curNnzB || *puc_leftNnzB)) ? 2 : 1;
+ u4_bs = (u4_bs << 8) | uc_Bs;
+ if(i & 0x01)
+ puc_leftNnzB += 4;
+ }
+ ui_bs_left_edge[1] = u4_bs;
+ }
+ }
+ /* Copy The Values in Cur Deblk Mb Parameters */
+ ps_cur_deblk_mb->u4_bs_table[4] = ui_bs_left_edge[0];
+ ps_cur_deblk_mb->u4_bs_table[9] = ui_bs_left_edge[1];
+ }
+
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_fill_bs_for_extra_top_edge \endif
+ *
+ * \brief
+ * Fills the boundary strength (Bs), for the top extra edge. ock
+ *
+ * \return
+ * Returns the packed boundary strength(Bs) MSB -> LSB Bs0|Bs1|Bs2|Bs3
+ *
+ **************************************************************************
+ */
+void ih264d_fill_bs_for_extra_top_edge(deblk_mb_t *ps_cur_mb_params,
+ UWORD8 u1_Edge0_mb_typ,
+ UWORD8 u1_Edge1_mb_typ,
+ UWORD8 *pu1_curNnz,
+ UWORD8 *pu1_topNnz)
+{
+ UWORD32 u4_bs;
+ UWORD8 uc_Bs;
+ WORD32 i;
+ UWORD8 *pu1_cur_nnz_tmp;
+ UWORD8 *pu1_top_nnz_tmp;
+ UWORD8 u1_top_edge;
+ UWORD8 u1_top_mb_type;
+ for(u1_top_edge = 0; u1_top_edge < 2; u1_top_edge++)
+ {
+ u1_top_mb_type = u1_top_edge ? u1_Edge1_mb_typ : u1_Edge0_mb_typ;
+ pu1_cur_nnz_tmp = pu1_curNnz;
+ pu1_top_nnz_tmp = pu1_topNnz + (u1_top_edge << 2);
+
+ if((ps_cur_mb_params->u1_mb_type & D_INTRA_MB)
+ + (u1_top_mb_type & D_INTRA_MB))
+ {
+ u4_bs = 0x03030303;
+ }
+ else
+ {
+ u4_bs = 0;
+ for(i = 4; i > 0; i--, pu1_cur_nnz_tmp += 1, pu1_top_nnz_tmp += 1)
+ {
+ uc_Bs = ((*pu1_cur_nnz_tmp || *pu1_top_nnz_tmp)) ? 2 : 1;
+ u4_bs = (u4_bs << 8) | uc_Bs;
+ }
+ }
+ if(u1_top_edge)
+ ps_cur_mb_params->u4_bs_table[0] = u4_bs;
+ else
+ ps_cur_mb_params->u4_bs_table[8] = u4_bs;
+ }
+}
+
+
+void ih264d_fill_bs_mbedge_4(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ const UWORD16 u2_mbxn_mb)
+{
+
+ /* deblk_mb_t Params */
+ deblk_mb_t *ps_cur_mb_params; /*< Parameters of current MacroBlock */
+ deblkmb_neighbour_t *ps_deblk_top_mb;
+ UWORD32 * pu4_bs_table;
+ UWORD8 u1_cur_mb_type;
+
+ /* Neighbour availability */
+ /* Initialization */
+ const UWORD32 u2_mbx = ps_cur_mb_info->u2_mbx;
+ const UWORD32 u2_mby = ps_cur_mb_info->u2_mby;
+ const UWORD32 u1_pingpong = u2_mbx & 0x01;
+ ps_deblk_top_mb = ps_dec->ps_deblk_top_mb + u2_mbx;
+
+
+ /* Pointer assignment for Current DeblkMB, Current Mv Pred */
+ ps_cur_mb_params = ps_dec->ps_deblk_mbn + u2_mbxn_mb;
+
+ u1_cur_mb_type = ps_cur_mb_params->u1_mb_type;
+
+ ps_deblk_top_mb->u1_mb_type = u1_cur_mb_type;
+
+ {
+ UWORD8 mb_qp_temp;
+
+ ps_cur_mb_params->u1_topmb_qp = ps_deblk_top_mb->u1_mb_qp;
+ ps_deblk_top_mb->u1_mb_qp = ps_cur_mb_params->u1_mb_qp;
+
+ ps_cur_mb_params->u1_left_mb_qp = ps_dec->deblk_left_mb[1].u1_mb_qp;
+ ps_dec->deblk_left_mb[1].u1_mb_qp = ps_cur_mb_params->u1_mb_qp;
+
+ }
+
+ ps_cur_mb_params->u1_single_call = 1;
+
+ ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type;
+ /* if no deblocking required for current Mb then continue */
+ /* Check next Mbs in Mb group */
+ if(ps_cur_mb_params->u1_deblocking_mode & MB_DISABLE_FILTERING)
+ {
+ /* Storing the leftMbtype for next Mb */
+ return;
+ }
+
+ /* Compute BS function */
+ pu4_bs_table = ps_cur_mb_params->u4_bs_table;
+
+ pu4_bs_table[4] = 0x04040404;
+ pu4_bs_table[0] = 0x04040404;
+ pu4_bs_table[1] = 0;
+ pu4_bs_table[2] = 0;
+ pu4_bs_table[3] = 0;
+ pu4_bs_table[5] = 0;
+ pu4_bs_table[6] = 0;
+ pu4_bs_table[7] = 0;
+
+}
+
+void ih264d_fill_bs_mbedge_2(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ const UWORD16 u2_mbxn_mb)
+{
+
+ /* deblk_mb_t Params */
+ deblk_mb_t *ps_cur_mb_params; /*< Parameters of current MacroBlock */
+ deblkmb_neighbour_t *ps_deblk_top_mb;
+ UWORD32 * pu4_bs_table;
+ UWORD8 u1_cur_mb_type;
+
+ /* Neighbour availability */
+ /* Initialization */
+ const UWORD32 u2_mbx = ps_cur_mb_info->u2_mbx;
+ const UWORD32 u2_mby = ps_cur_mb_info->u2_mby;
+ const UWORD32 u1_pingpong = u2_mbx & 0x01;
+ ps_deblk_top_mb = ps_dec->ps_deblk_top_mb + u2_mbx;
+
+
+ /* Pointer assignment for Current DeblkMB, Current Mv Pred */
+ ps_cur_mb_params = ps_dec->ps_deblk_mbn + u2_mbxn_mb;
+
+ u1_cur_mb_type = ps_cur_mb_params->u1_mb_type;
+
+ ps_deblk_top_mb->u1_mb_type = u1_cur_mb_type;
+
+ {
+ UWORD8 mb_qp_temp;
+
+ ps_cur_mb_params->u1_topmb_qp = ps_deblk_top_mb->u1_mb_qp;
+ ps_deblk_top_mb->u1_mb_qp = ps_cur_mb_params->u1_mb_qp;
+
+ ps_cur_mb_params->u1_left_mb_qp = ps_dec->deblk_left_mb[1].u1_mb_qp;
+ ps_dec->deblk_left_mb[1].u1_mb_qp = ps_cur_mb_params->u1_mb_qp;
+
+ }
+
+ ps_cur_mb_params->u1_single_call = 1;
+
+ ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type;
+ /* if no deblocking required for current Mb then continue */
+ /* Check next Mbs in Mb group */
+ if(ps_cur_mb_params->u1_deblocking_mode & MB_DISABLE_FILTERING)
+ {
+ /* Storing the leftMbtype for next Mb */
+ return;
+ }
+
+ /* Compute BS function */
+ pu4_bs_table = ps_cur_mb_params->u4_bs_table;
+
+ {
+ UWORD32 top_mb_csbp, left_mb_csbp, cur_mb_csbp;
+ UWORD32 top_edge, left_edge;
+
+ top_mb_csbp = ps_cur_mb_info->ps_top_mb->u2_luma_csbp;
+ left_mb_csbp = ps_cur_mb_info->ps_left_mb->u2_luma_csbp;
+ cur_mb_csbp = ps_cur_mb_info->ps_curmb->u2_luma_csbp;
+
+ top_mb_csbp = top_mb_csbp >> 12;
+ top_edge = top_mb_csbp | (cur_mb_csbp & 0xf);
+
+ if(top_edge)
+ pu4_bs_table[0] = 0x02020202;
+ else
+ pu4_bs_table[0] = 0;
+
+ cur_mb_csbp = cur_mb_csbp & CSBP_LEFT_BLOCK_MASK;
+ left_mb_csbp = left_mb_csbp & CSBP_RIGHT_BLOCK_MASK;
+
+ left_edge = cur_mb_csbp | left_mb_csbp;
+
+ if(left_edge)
+ pu4_bs_table[4] = 0x02020202;
+ else
+ pu4_bs_table[4] = 0;
+
+ pu4_bs_table[1] = 0;
+ pu4_bs_table[2] = 0;
+ pu4_bs_table[3] = 0;
+ pu4_bs_table[5] = 0;
+ pu4_bs_table[6] = 0;
+ pu4_bs_table[7] = 0;
+ }
+
+}
diff --git a/decoder/ih264d_deblocking.c b/decoder/ih264d_deblocking.c
new file mode 100755
index 0000000..ad4ce08
--- /dev/null
+++ b/decoder/ih264d_deblocking.c
@@ -0,0 +1,2134 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#include <string.h>
+
+#include "ih264_typedefs.h"
+#include "iv.h"
+#include "ivd.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_debug.h"
+#include "ih264d_defs.h"
+#include "ih264d_defs.h"
+#include "ih264d_structs.h"
+#include "ih264d_deblocking.h"
+#include "ih264d_mb_utils.h"
+#include "ih264d_error_handler.h"
+#include "ih264d_utils.h"
+
+
+#include "ih264d_defs.h"
+#include "ih264d_format_conv.h"
+#include "ih264d_deblocking.h"
+#include "ih264d_tables.h"
+//extern UWORD8 *g_dest_y, *g_dest_uv;
+
+/*!
+ *************************************************************************
+ * \file ih264d_deblocking.c
+ *
+ * \brief
+ * Decoder specific deblocking routines
+ *
+ * \author AI
+ *************************************************************************
+ */
+
+/*!
+ **************************************************************************
+ * \if Function name : HorizonPad \endif
+ *
+ * \brief
+ * Does the Horizontal padding on a whole pic.
+ *
+ * \return
+ * None
+ **************************************************************************
+ */
+
+/*!
+ **************************************************************************
+ * \if Function name : FilterBoundaryLeft \endif
+ *
+ * \brief
+ * Filters MacroBlock Left Boundary egdes.
+ *
+ * \return
+ * None
+ **************************************************************************
+ */
+void ih264d_filter_boundary_left_nonmbaff(dec_struct_t *ps_dec,
+ tfr_ctxt_t * ps_tfr_cxt,
+ WORD8 i1_cb_qp_idx_ofst,
+ WORD8 i1_cr_qp_idx_ofst,
+ deblk_mb_t * ps_cur_mb,
+ UWORD16 i4_strd_y,
+ UWORD16 i4_strd_uv,
+ deblk_mb_t * ps_left_mb,
+ UWORD32 pu4_bs_tab[],
+ UWORD8 u1_cur_fld)
+{
+ UWORD8 *pu1_y, *pu1_u, *pu1_v;
+ WORD32 uc_tmp, qp_avg;
+ WORD32 alpha_u = 0, beta_u = 0, alpha_v = 0, beta_v = 0;
+ WORD32 alpha_y = 0, beta_y = 0;
+
+ WORD32 idx_b_u, idx_a_u, idx_b_v, idx_a_v;
+ WORD32 idx_b_y, idx_a_y;
+
+ UWORD32 u4_bs_val;
+
+ UWORD8 *pu1_cliptab_u, *pu1_cliptab_v, *pu1_cliptab_y;
+
+ UWORD8 u1_double_cl = !ps_cur_mb->u1_single_call;
+ WORD32 ofst_a = ps_cur_mb->i1_slice_alpha_c0_offset;
+ WORD32 ofst_b = ps_cur_mb->i1_slice_beta_offset;
+
+ PROFILE_DISABLE_DEBLK()
+
+ pu1_y = ps_tfr_cxt->pu1_mb_y;
+ pu1_u = ps_tfr_cxt->pu1_mb_u;
+ pu1_v = ps_tfr_cxt->pu1_mb_v;
+
+ /* LUMA values */
+ /* Deblock rounding change */
+ qp_avg =
+ (UWORD8)((ps_cur_mb->u1_left_mb_qp + ps_cur_mb->u1_mb_qp + 1)
+ >> 1);
+
+ idx_a_y = qp_avg + ofst_a;
+ alpha_y = gau1_ih264d_alpha_table[12 + idx_a_y];
+ idx_b_y = qp_avg + ofst_b;
+ beta_y = gau1_ih264d_beta_table[12 + idx_b_y];
+
+ /* Chroma cb values */
+ {
+ UWORD8 u1_mb_qp1, u1_mb_qp2;
+ u1_mb_qp1 = (ps_cur_mb->u1_left_mb_qp + i1_cb_qp_idx_ofst);
+ u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cb_qp_idx_ofst);
+ qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1]
+ + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1);
+ }
+ idx_a_u = qp_avg + ofst_a;
+ alpha_u = gau1_ih264d_alpha_table[12 + idx_a_u];
+ idx_b_u = qp_avg + ofst_b;
+ beta_u = gau1_ih264d_beta_table[12 + idx_b_u];
+ /* Chroma cr values */
+ {
+ UWORD8 u1_mb_qp1, u1_mb_qp2;
+ u1_mb_qp1 = (ps_cur_mb->u1_left_mb_qp + i1_cr_qp_idx_ofst);
+ u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cr_qp_idx_ofst);
+ qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1]
+ + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1);
+ }
+ idx_a_v = qp_avg + ofst_a;
+ alpha_v = gau1_ih264d_alpha_table[12 + idx_a_v];
+ idx_b_v = qp_avg + ofst_b;
+ beta_v = gau1_ih264d_beta_table[12 + idx_b_v];
+
+ if(u1_double_cl == 0)
+ {
+ u4_bs_val = pu4_bs_tab[4];
+
+ if(0x04040404 == u4_bs_val)
+ {
+ ps_dec->pf_deblk_luma_vert_bs4(pu1_y, i4_strd_y, alpha_y, beta_y);
+ ps_dec->pf_deblk_chroma_vert_bs4(pu1_u, i4_strd_uv, alpha_u,
+ beta_u, alpha_v, beta_v);
+ }
+ else
+ {
+ if(u4_bs_val)
+ {
+
+ pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12
+ + idx_a_y];
+ pu1_cliptab_u = (UWORD8 *)&gau1_ih264d_clip_table[12
+ + idx_a_u];
+ pu1_cliptab_v = (UWORD8 *)&gau1_ih264d_clip_table[12
+ + idx_a_v];
+ ps_dec->pf_deblk_luma_vert_bslt4(pu1_y, i4_strd_y, alpha_y,
+ beta_y, u4_bs_val,
+ pu1_cliptab_y);
+ ps_dec->pf_deblk_chroma_vert_bslt4(pu1_u, i4_strd_uv, alpha_u,
+ beta_u, alpha_v, beta_v,
+ u4_bs_val, pu1_cliptab_u,
+ pu1_cliptab_v);
+
+ }
+ }
+
+ }
+ else
+ {
+
+ i4_strd_y <<= (!u1_cur_fld);
+ u4_bs_val = pu4_bs_tab[4];
+ i4_strd_uv <<= (!u1_cur_fld);
+
+ if(0x04040404 == u4_bs_val)
+ {
+
+ ps_dec->pf_deblk_luma_vert_bs4_mbaff(pu1_y, i4_strd_y, alpha_y,
+ beta_y);
+ ps_dec->pf_deblk_chroma_vert_bs4_mbaff(pu1_u, i4_strd_uv, alpha_u,
+ beta_u, alpha_v, beta_v);
+
+ }
+ else
+ {
+ if(u4_bs_val)
+ {
+
+ pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12
+ + idx_a_y];
+ pu1_cliptab_u = (UWORD8 *)&gau1_ih264d_clip_table[12
+ + idx_a_u];
+ pu1_cliptab_v = (UWORD8 *)&gau1_ih264d_clip_table[12
+ + idx_a_v];
+
+ ps_dec->pf_deblk_luma_vert_bslt4_mbaff(pu1_y, i4_strd_y,
+ alpha_y, beta_y,
+ u4_bs_val,
+ pu1_cliptab_y);
+ ps_dec->pf_deblk_chroma_vert_bslt4_mbaff(pu1_u, i4_strd_uv,
+ alpha_u, beta_u,
+ alpha_v, beta_v,
+ u4_bs_val,
+ pu1_cliptab_u,
+ pu1_cliptab_v);
+ }
+ }
+
+ {
+
+ UWORD16 u2_shift = (i4_strd_y >> 1) << (u1_cur_fld ? 4 : 0);
+ pu1_y += u2_shift;
+ u2_shift = (i4_strd_uv >> 1) << (u1_cur_fld ? 3 : 0);
+ pu1_u += u2_shift;
+ pu1_v += u2_shift;
+ }
+
+ qp_avg = (((ps_left_mb + 1)->u1_mb_qp + ps_cur_mb->u1_mb_qp + 1) >> 1);
+
+ idx_a_y = qp_avg + ofst_a;
+ alpha_y = gau1_ih264d_alpha_table[12 + idx_a_y];
+ idx_b_y = qp_avg + ofst_b;
+ beta_y = gau1_ih264d_beta_table[12 + idx_b_y];
+ u4_bs_val = pu4_bs_tab[9];
+
+ {
+ UWORD8 u1_mb_qp1, u1_mb_qp2;
+ u1_mb_qp1 = ((ps_left_mb + 1)->u1_mb_qp + i1_cb_qp_idx_ofst);
+ u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cb_qp_idx_ofst);
+ qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1]
+ + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1);
+ }
+ idx_a_u = qp_avg + ofst_a;
+ alpha_u = gau1_ih264d_alpha_table[12 + idx_a_u];
+ idx_b_u = qp_avg + ofst_b;
+ beta_u = gau1_ih264d_beta_table[12 + idx_b_u];
+ u4_bs_val = pu4_bs_tab[9];
+ {
+ UWORD8 u1_mb_qp1, u1_mb_qp2;
+ u1_mb_qp1 = ((ps_left_mb + 1)->u1_mb_qp + i1_cr_qp_idx_ofst);
+ u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cr_qp_idx_ofst);
+ qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1]
+ + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1);
+ }
+ idx_a_v = qp_avg + ofst_a;
+ alpha_v = gau1_ih264d_alpha_table[12 + idx_a_v];
+ idx_b_v = qp_avg + ofst_b;
+ beta_v = gau1_ih264d_beta_table[12 + idx_b_v];
+
+ if(0x04040404 == u4_bs_val)
+ {
+ ps_dec->pf_deblk_luma_vert_bs4_mbaff(pu1_y, i4_strd_y, alpha_y,
+ beta_y);
+ ps_dec->pf_deblk_chroma_vert_bs4_mbaff(pu1_u, i4_strd_uv, alpha_u,
+ beta_u, alpha_v, beta_v);
+
+ }
+ else
+ {
+ if(u4_bs_val)
+ {
+
+ pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12
+ + idx_a_y];
+ pu1_cliptab_u = (UWORD8 *)&gau1_ih264d_clip_table[12
+ + idx_a_u];
+ pu1_cliptab_v = (UWORD8 *)&gau1_ih264d_clip_table[12
+ + idx_a_v];
+
+ ps_dec->pf_deblk_luma_vert_bslt4_mbaff(pu1_y, i4_strd_y,
+ alpha_y, beta_y,
+ u4_bs_val,
+ pu1_cliptab_y);
+ ps_dec->pf_deblk_chroma_vert_bslt4_mbaff(pu1_u, i4_strd_uv,
+ alpha_u, beta_u,
+ alpha_v, beta_v,
+ u4_bs_val,
+ pu1_cliptab_u,
+ pu1_cliptab_v);
+
+ }
+ }
+ }
+
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : FilterBoundaryTop \endif
+ *
+ * \brief
+ * Filters MacroBlock Top Boundary egdes.
+ *
+ * \return
+ * None
+ **************************************************************************
+ */
+
+void ih264d_filter_boundary_top_nonmbaff(dec_struct_t *ps_dec,
+ tfr_ctxt_t * ps_tfr_cxt,
+ WORD8 i1_cb_qp_idx_ofst,
+ WORD8 i1_cr_qp_idx_ofst,
+ deblk_mb_t * ps_cur_mb,
+ UWORD16 i4_strd_y,
+ UWORD16 i4_strd_uv,
+ deblk_mb_t * ps_top_mb,
+ UWORD32 u4_bs)
+{
+ UWORD8 *pu1_y, *pu1_u;
+ WORD32 alpha_u = 0, beta_u = 0, alpha_v = 0, beta_v = 0;
+ WORD32 alpha_y = 0, beta_y = 0;
+ WORD32 qp_avg;
+ WORD32 uc_QPav_Y;
+ WORD32 idx_b_u, idx_a_u, idx_b_v, idx_a_v;
+ WORD32 idx_b_y, idx_a_y;
+ UWORD16 uc_tmp;
+
+ UWORD8 *pu1_cliptab_u, *pu1_cliptab_v, *pu1_cliptab_y;
+ WORD32 ofst_a = ps_cur_mb->i1_slice_alpha_c0_offset;
+ WORD32 ofst_b = ps_cur_mb->i1_slice_beta_offset;
+
+ UNUSED(ps_top_mb);
+ /* LUMA values */
+ /* Deblock rounding change */
+ uc_tmp = ((ps_cur_mb->u1_topmb_qp + ps_cur_mb->u1_mb_qp + 1) >> 1);
+ uc_QPav_Y = (UWORD8)uc_tmp;
+ idx_a_y = uc_QPav_Y + ofst_a;
+ alpha_y = gau1_ih264d_alpha_table[12 + idx_a_y];
+ idx_b_y = uc_QPav_Y + ofst_b;
+ beta_y = gau1_ih264d_beta_table[12 + idx_b_y];
+ pu1_y = ps_tfr_cxt->pu1_mb_y;
+
+ /* CHROMA cb values */
+ {
+ UWORD8 u1_mb_qp1, u1_mb_qp2;
+ u1_mb_qp1 = (ps_cur_mb->u1_topmb_qp + i1_cb_qp_idx_ofst);
+ u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cb_qp_idx_ofst);
+ qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1]
+ + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1);
+ }
+
+ idx_a_u = qp_avg + ofst_a;
+ alpha_u = gau1_ih264d_alpha_table[12 + idx_a_u];
+ idx_b_u = qp_avg + ofst_b;
+ beta_u = gau1_ih264d_beta_table[12 + idx_b_u];
+ /* CHROMA cr values */
+ {
+ UWORD8 u1_mb_qp1, u1_mb_qp2;
+ u1_mb_qp1 = (ps_cur_mb->u1_topmb_qp + i1_cr_qp_idx_ofst);
+ u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cr_qp_idx_ofst);
+ qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1]
+ + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1);
+ }
+
+ idx_a_v = qp_avg + ofst_a;
+ alpha_v = gau1_ih264d_alpha_table[12 + idx_a_v];
+ idx_b_v = qp_avg + ofst_b;
+ beta_v = gau1_ih264d_beta_table[12 + idx_b_v];
+ pu1_u = ps_tfr_cxt->pu1_mb_u;
+
+ if(u4_bs == 0x04040404)
+ {
+ /* Code specific to the assembly module */
+
+ ps_dec->pf_deblk_luma_horz_bs4(pu1_y, i4_strd_y, alpha_y, beta_y);
+ ps_dec->pf_deblk_chroma_horz_bs4(pu1_u, i4_strd_uv, alpha_u, beta_u,
+ alpha_v, beta_v);
+ }
+ else
+ {
+ if(u4_bs)
+ {
+
+ pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_y];
+ pu1_cliptab_u =
+ (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_u];
+ pu1_cliptab_v =
+ (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_v];
+
+ ps_dec->pf_deblk_luma_horz_bslt4(pu1_y, i4_strd_y, alpha_y, beta_y,
+ u4_bs, pu1_cliptab_y);
+ ps_dec->pf_deblk_chroma_horz_bslt4(pu1_u, i4_strd_uv, alpha_u,
+ beta_u, alpha_v, beta_v,
+ u4_bs, pu1_cliptab_u,
+ pu1_cliptab_v);
+
+ }
+ }
+
+}
+
+void ih264d_deblock_mb_nonmbaff(dec_struct_t *ps_dec,
+ tfr_ctxt_t * ps_tfr_cxt,
+ WORD8 i1_cb_qp_idx_ofst,
+ WORD8 i1_cr_qp_idx_ofst,
+ deblk_mb_t * ps_cur_mb,
+ WORD32 i4_strd_y,
+ WORD32 i4_strd_uv,
+ deblk_mb_t * ps_top_mb,
+ deblk_mb_t * ps_left_mb)
+{
+ UWORD8 *pu1_y, *pu1_u;
+ UWORD32 u4_bs;
+
+ WORD32 alpha, beta, alpha_u, beta_u, alpha_v, beta_v;
+
+ UWORD8 *pu1_cliptab_u;
+ UWORD8 *pu1_cliptab_v;
+ UWORD8 *pu1_cliptab_y;
+
+ UWORD32 * pu4_bs_tab = ps_cur_mb->u4_bs_table;
+ WORD32 idx_a_y, idx_a_u, idx_a_v;
+
+ PROFILE_DISABLE_DEBLK()
+ /* Return from here to switch off deblocking */
+
+ /*---------------------------------------------------------------------*/
+ /* Filter wrt Left edge */
+ /* except */
+ /* - Left Egde is Picture Boundary */
+ /* - Left Egde is part of Slice Boundary and Deblocking */
+ /* parameters of slice disable Filtering of Slice Boundary Edges*/
+ /*---------------------------------------------------------------------*/
+ if(ps_left_mb)
+ ih264d_filter_boundary_left_nonmbaff(ps_dec, ps_tfr_cxt,
+ i1_cb_qp_idx_ofst,
+ i1_cr_qp_idx_ofst, ps_cur_mb,
+ i4_strd_y, i4_strd_uv, ps_left_mb,
+ pu4_bs_tab, 0);
+
+ /*--------------------------------------------------------------------*/
+ /* Filter wrt Other Vertical Edges */
+ /*--------------------------------------------------------------------*/
+ {
+ WORD32 ofst_a, ofst_b, idx_b_y, idx_b_u,
+ idx_b_v;
+ WORD32 qp_avg, qp_avg_u, qp_avg_v;
+ ofst_a = ps_cur_mb->i1_slice_alpha_c0_offset;
+ ofst_b = ps_cur_mb->i1_slice_beta_offset;
+
+ qp_avg = ps_cur_mb->u1_mb_qp;
+
+ idx_a_y = qp_avg + ofst_a;
+ alpha = gau1_ih264d_alpha_table[12 + idx_a_y];
+ idx_b_y = qp_avg + ofst_b;
+ beta = gau1_ih264d_beta_table[12 + idx_b_y];
+
+ /* CHROMA values */
+ /* CHROMA Cb values */
+ qp_avg_u = (qp_avg + i1_cb_qp_idx_ofst);
+ qp_avg_u = gau1_ih264d_qp_scale_cr[12 + qp_avg_u];
+ idx_a_u = qp_avg_u + ofst_a;
+ alpha_u = gau1_ih264d_alpha_table[12 + idx_a_u];
+ idx_b_u = qp_avg_u + ofst_b;
+ beta_u = gau1_ih264d_beta_table[12 + idx_b_u];
+ /* CHROMA Cr values */
+ qp_avg_v = (qp_avg + i1_cr_qp_idx_ofst);
+ qp_avg_v = gau1_ih264d_qp_scale_cr[12 + qp_avg_v];
+ idx_a_v = qp_avg_v + ofst_a;
+ alpha_v = gau1_ih264d_alpha_table[12 + idx_a_v];
+ idx_b_v = qp_avg_v + ofst_b;
+ beta_v = gau1_ih264d_beta_table[12 + idx_b_v];
+ }
+
+ pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_y]; //this for Luma
+ pu1_cliptab_u = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_u]; //this for chroma
+ pu1_cliptab_v = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_v]; //this for chroma
+
+ //edge=1
+
+
+ u4_bs = pu4_bs_tab[5];
+ pu1_y = ps_tfr_cxt->pu1_mb_y;
+ pu1_u = ps_tfr_cxt->pu1_mb_u;
+
+ if(u4_bs)
+ {
+
+ ps_dec->pf_deblk_luma_vert_bslt4(pu1_y + 4, i4_strd_y, alpha, beta,
+ u4_bs, pu1_cliptab_y);
+
+ }
+ //edge=2
+
+ u4_bs = pu4_bs_tab[6];
+ if(u4_bs)
+ {
+ ps_dec->pf_deblk_luma_vert_bslt4(pu1_y + 8, i4_strd_y, alpha, beta,
+ u4_bs, pu1_cliptab_y);
+ ps_dec->pf_deblk_chroma_vert_bslt4(pu1_u + 4 * YUV420SP_FACTOR,
+ i4_strd_uv, alpha_u, beta_u,
+ alpha_v, beta_v, u4_bs,
+ pu1_cliptab_u, pu1_cliptab_v);
+
+ }
+ //edge=3
+
+ u4_bs = pu4_bs_tab[7];
+ if(u4_bs)
+ {
+ ps_dec->pf_deblk_luma_vert_bslt4(pu1_y + 12, i4_strd_y, alpha, beta,
+ u4_bs, pu1_cliptab_y);
+
+ }
+
+ /*--------------------------------------------------------------------*/
+ /* Filter wrt Top edge */
+ /* except */
+ /* - Top Egde is Picture Boundary */
+ /* - Top Egde is part of Slice Boundary and Deblocking */
+ /* parameters of slice disable Filtering of Slice Boundary Edges*/
+ /*--------------------------------------------------------------------*/
+ if(ps_top_mb)
+ {
+ /** if top MB and MB AFF and cur MB is frame and top is field then */
+ /* one extra top edge needs to be deblocked */
+
+ ih264d_filter_boundary_top_nonmbaff(ps_dec, ps_tfr_cxt,
+ i1_cb_qp_idx_ofst,
+ i1_cr_qp_idx_ofst, ps_cur_mb,
+ i4_strd_y, i4_strd_uv, ps_top_mb,
+ pu4_bs_tab[0]);
+
+ }
+
+ /*--------------------------------------------------------------------*/
+ /* Filter wrt Other Horizontal Edges */
+ /*--------------------------------------------------------------------*/
+
+ //edge1
+ u4_bs = pu4_bs_tab[1];
+
+ if(u4_bs)
+ {
+ ps_dec->pf_deblk_luma_horz_bslt4(pu1_y + (i4_strd_y << 2), i4_strd_y,
+ alpha, beta, u4_bs, pu1_cliptab_y);
+
+ }
+ //edge2
+ u4_bs = pu4_bs_tab[2];
+
+ if(u4_bs)
+ {
+
+ ps_dec->pf_deblk_luma_horz_bslt4(pu1_y + (i4_strd_y << 3), i4_strd_y,
+ alpha, beta, u4_bs, pu1_cliptab_y);
+ ps_dec->pf_deblk_chroma_horz_bslt4(pu1_u + (i4_strd_uv << 2),
+ i4_strd_uv, alpha_u, beta_u,
+ alpha_v, beta_v, u4_bs,
+ pu1_cliptab_u, pu1_cliptab_v);
+
+ }
+ //edge3
+ u4_bs = pu4_bs_tab[3];
+ if(u4_bs)
+ {
+ ps_dec->pf_deblk_luma_horz_bslt4(
+ (pu1_y + (i4_strd_y << 3) + (i4_strd_y << 2)),
+ i4_strd_y, alpha, beta, u4_bs, pu1_cliptab_y);
+
+ }
+
+}
+
+/**************************************************************************
+ *
+ * Function Name : ih264d_init_deblk_tfr_ctxt
+ *
+ * Description : This function is called once per deblockpicture call
+ * This sets up the transfer address contexts
+ *
+ * Revision History:
+ *
+ * DD MM YYYY Author(s) Changes (Describe the changes made)
+ * 14 06 2005 SWRN Draft
+ **************************************************************************/
+void ih264d_init_deblk_tfr_ctxt(dec_struct_t * ps_dec,
+ pad_mgr_t *ps_pad_mgr,
+ tfr_ctxt_t *ps_tfr_cxt,
+ UWORD16 u2_image_wd_mb,
+ UWORD8 u1_mbaff)
+{
+
+ UWORD32 i4_wd_y;
+ UWORD32 i4_wd_uv;
+ UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag; /*< Field u4_flag */
+ UNUSED(u2_image_wd_mb);
+ ps_tfr_cxt->pu1_src_y = ps_dec->s_cur_pic.pu1_buf1 - 4;
+ ps_tfr_cxt->pu1_src_u = ps_dec->s_cur_pic.pu1_buf2 - 4;
+ ps_tfr_cxt->pu1_src_v = ps_dec->s_cur_pic.pu1_buf3 - 4;
+ ps_tfr_cxt->pu1_dest_y = ps_tfr_cxt->pu1_src_y;
+ ps_tfr_cxt->pu1_dest_u = ps_tfr_cxt->pu1_src_u;
+ ps_tfr_cxt->pu1_dest_v = ps_tfr_cxt->pu1_src_v;
+
+ i4_wd_y = ps_dec->u2_frm_wd_y << u1_field_pic_flag;
+ i4_wd_uv = ps_dec->u2_frm_wd_uv << u1_field_pic_flag;
+ ps_tfr_cxt->u4_y_inc = ((i4_wd_y << u1_mbaff) * 16
+ - (ps_dec->u2_frm_wd_in_mbs << 4));
+
+ ps_tfr_cxt->u4_uv_inc = (i4_wd_uv << u1_mbaff) * 8
+ - (ps_dec->u2_frm_wd_in_mbs << 4);
+
+ /* padding related initialisations */
+ if(ps_dec->ps_cur_slice->u1_nal_ref_idc)
+ {
+ ps_pad_mgr->u1_vert_pad_top = !(ps_dec->ps_cur_slice->u1_field_pic_flag
+ && ps_dec->ps_cur_slice->u1_bottom_field_flag);
+ ps_pad_mgr->u1_vert_pad_bot =
+ ((!ps_dec->ps_cur_slice->u1_field_pic_flag)
+ || ps_dec->ps_cur_slice->u1_bottom_field_flag);
+ ps_pad_mgr->u1_horz_pad = 1;
+ }
+ else
+ {
+ ps_pad_mgr->u1_horz_pad = 0;
+ ps_pad_mgr->u1_vert_pad_top = 0;
+ ps_pad_mgr->u1_vert_pad_bot = 0;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_deblock_picture_mbaff */
+/* */
+/* Description : This function carries out deblocking on a whole picture */
+/* with MBAFF */
+/* */
+/* Inputs : <What inputs does the function take?> */
+/* Processing : This functions calls deblock MB in the MB increment order*/
+/* */
+/* Outputs : Produces the deblocked picture */
+/* Returns : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 17 02 2005 NS Creation */
+/* 14 06 2005 SWRN clean-up */
+/*****************************************************************************/
+
+void ih264d_deblock_picture_mbaff(dec_struct_t * ps_dec)
+{
+ WORD16 i2_mb_x, i2_mb_y;
+ deblk_mb_t *ps_cur_mb;
+ deblk_mb_t *ps_top_mb;
+ deblk_mb_t *ps_left_mb;
+
+ UWORD8 u1_vert_pad_top = 1;
+ UWORD8 u1_cur_fld, u1_top_fld, u1_left_fld;
+ UWORD8 u1_first_row;
+
+ UWORD8 * pu1_deb_y, *pu1_deb_u, *pu1_deb_v;
+ UWORD8 u1_deb_mode, u1_extra_top_edge;
+ WORD32 i4_wd_y, i4_wd_uv;
+
+ UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag; /*< Field u4_flag */
+ UWORD8 u1_bottom_field_flag = ps_dec->ps_cur_slice->u1_bottom_field_flag; /*< Bottom field u4_flag*/
+
+ /**************************************************/
+ /* one time loads from ps_dec which will be used */
+ /* frequently throughout the deblocking procedure */
+ /**************************************************/
+ pad_mgr_t * ps_pad_mgr = &ps_dec->s_pad_mgr;
+ tfr_ctxt_t s_tfr_ctxt;
+ tfr_ctxt_t * ps_tfr_cxt = &s_tfr_ctxt;
+
+ UWORD16 u2_image_wd_mb = ps_dec->u2_frm_wd_in_mbs;
+ UWORD16 u2_image_ht_mb = ps_dec->u2_frm_ht_in_mbs;
+ UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
+ WORD8 i1_cb_qp_idx_ofst = ps_dec->ps_cur_pps->i1_chroma_qp_index_offset;
+ WORD8 i1_cr_qp_idx_ofst =
+ ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset;
+
+ /* Set up Parameter for DMA transfer */
+ ih264d_init_deblk_tfr_ctxt(ps_dec, ps_pad_mgr, ps_tfr_cxt, u2_image_wd_mb,
+ u1_mbaff);
+
+ /* Pic level Initialisations */
+ i2_mb_y = u2_image_ht_mb;
+ i2_mb_x = 0;
+ u1_extra_top_edge = 0;
+
+ u1_first_row = 1;
+
+ i4_wd_y = ps_dec->u2_frm_wd_y << u1_field_pic_flag;
+ i4_wd_uv = ps_dec->u2_frm_wd_uv << u1_field_pic_flag;
+ /* Initial filling of the buffers with deblocking data */
+
+ pu1_deb_y = ps_tfr_cxt->pu1_src_y + 4;
+ pu1_deb_u = ps_tfr_cxt->pu1_src_u + 4;
+ pu1_deb_v = ps_tfr_cxt->pu1_src_v + 4;
+ ps_cur_mb = ps_dec->ps_deblk_pic;
+
+ if(ps_dec->u4_app_disable_deblk_frm == 0)
+ {
+ if(ps_dec->u4_mb_level_deblk == 0 || ps_dec->u4_num_cores >= 3)
+ {
+
+ while(i2_mb_y > 0)
+ {
+ do
+ {
+
+ u1_deb_mode = ps_cur_mb->u1_deblocking_mode;
+ if(!(u1_deb_mode & MB_DISABLE_FILTERING))
+ {
+ ps_tfr_cxt->pu1_mb_y = pu1_deb_y;
+ ps_tfr_cxt->pu1_mb_u = pu1_deb_u;
+ ps_tfr_cxt->pu1_mb_v = pu1_deb_v;
+
+ u1_cur_fld = (ps_cur_mb->u1_mb_type & D_FLD_MB) >> 7;
+ u1_cur_fld &= 1;
+ if(i2_mb_x)
+ {
+ ps_left_mb = ps_cur_mb - 2;
+ }
+ else
+ {
+ ps_left_mb = NULL;
+ }
+ if(!u1_first_row)
+ {
+ ps_top_mb = ps_cur_mb - (u2_image_wd_mb << 1) + 1;
+ u1_top_fld = (ps_top_mb->u1_mb_type & D_FLD_MB)
+ >> 7;
+ }
+ else
+ {
+ ps_top_mb = NULL;
+ u1_top_fld = 0;
+ }
+
+ if((!u1_first_row) & u1_top_fld & u1_cur_fld)
+ ps_top_mb--;
+
+ /********************************************************/
+ /* if top MB and MB AFF and cur MB is frame and top is */
+ /* field, then one extra top edge needs to be deblocked */
+ /********************************************************/
+ u1_extra_top_edge = (!u1_cur_fld) & u1_top_fld;
+
+ if(u1_deb_mode & MB_DISABLE_LEFT_EDGE)
+ ps_left_mb = NULL;
+ if(u1_deb_mode & MB_DISABLE_TOP_EDGE)
+ ps_top_mb = NULL;
+
+ ih264d_deblock_mb_mbaff(ps_dec, ps_tfr_cxt,
+ i1_cb_qp_idx_ofst,
+ i1_cr_qp_idx_ofst, ps_cur_mb,
+ i4_wd_y, i4_wd_uv, ps_top_mb,
+ ps_left_mb, u1_cur_fld,
+ u1_extra_top_edge);
+ }
+
+ ps_cur_mb++;
+
+ u1_deb_mode = ps_cur_mb->u1_deblocking_mode;
+ if(!(u1_deb_mode & MB_DISABLE_FILTERING))
+ {
+ ps_tfr_cxt->pu1_mb_y = pu1_deb_y;
+ ps_tfr_cxt->pu1_mb_u = pu1_deb_u;
+ ps_tfr_cxt->pu1_mb_v = pu1_deb_v;
+
+ u1_cur_fld = (ps_cur_mb->u1_mb_type & D_FLD_MB) >> 7;
+ u1_cur_fld &= 1;
+ if(i2_mb_x)
+ {
+ ps_left_mb = ps_cur_mb - 2;
+ u1_left_fld = (ps_left_mb->u1_mb_type & D_FLD_MB)
+ >> 7;
+ }
+ else
+ {
+ ps_left_mb = NULL;
+ u1_left_fld = u1_cur_fld;
+ }
+ if(!u1_first_row)
+ {
+ ps_top_mb = ps_cur_mb - (u2_image_wd_mb << 1);
+ }
+ else
+ {
+ ps_top_mb = NULL;
+ }
+
+ {
+ UWORD8 u1_row_shift_y = 0, u1_row_shift_uv = 0;
+ if(!u1_cur_fld)
+ {
+ ps_top_mb = ps_cur_mb - 1;
+ u1_top_fld = (ps_top_mb->u1_mb_type & D_FLD_MB)
+ >> 7;
+ u1_row_shift_y = 4;
+ u1_row_shift_uv = 3;
+ }
+ ps_tfr_cxt->pu1_mb_y += i4_wd_y << u1_row_shift_y;
+ ps_tfr_cxt->pu1_mb_u +=
+ (i4_wd_uv << u1_row_shift_uv);
+ ps_tfr_cxt->pu1_mb_v += i4_wd_uv << u1_row_shift_uv;
+ }
+
+ /* point to A if top else A+1 */
+ if(u1_left_fld ^ u1_cur_fld)
+ ps_left_mb--;
+
+ /********************************************************/
+ /* if top MB and MB AFF and cur MB is frame and top is */
+ /* field, then one extra top edge needs to be deblocked */
+ /********************************************************/
+ u1_extra_top_edge = 0;
+
+ if(u1_deb_mode & MB_DISABLE_LEFT_EDGE)
+ ps_left_mb = NULL;
+ if(u1_deb_mode & MB_DISABLE_TOP_EDGE)
+ ps_top_mb = NULL;
+
+ ih264d_deblock_mb_mbaff(ps_dec, ps_tfr_cxt,
+ i1_cb_qp_idx_ofst,
+ i1_cr_qp_idx_ofst, ps_cur_mb,
+ i4_wd_y, i4_wd_uv, ps_top_mb,
+ ps_left_mb, u1_cur_fld,
+ u1_extra_top_edge);
+ }
+
+ ps_cur_mb++;
+ i2_mb_x++;
+
+ pu1_deb_y += 16;
+ pu1_deb_u += 8 * YUV420SP_FACTOR;
+ pu1_deb_v += 8;
+
+ }
+ while(u2_image_wd_mb > i2_mb_x);
+
+ pu1_deb_y += ps_tfr_cxt->u4_y_inc;
+ pu1_deb_u += ps_tfr_cxt->u4_uv_inc;
+ pu1_deb_v += ps_tfr_cxt->u4_uv_inc;
+
+ i2_mb_x = 0;
+ i2_mb_y -= 2;
+
+ u1_first_row = 0;
+
+ }
+ }
+
+ }
+ //Padd the Picture
+ //Horizontal Padd
+
+ if(ps_pad_mgr->u1_horz_pad)
+ {
+ UWORD32 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag;
+ ps_dec->pf_pad_left_luma(ps_tfr_cxt->pu1_src_y + 4,
+ ps_dec->u2_frm_wd_y << u1_field_pic_flag,
+ ps_dec->u2_pic_ht >> u1_field_pic_flag,
+ PAD_LEN_Y_H);
+ ps_dec->pf_pad_right_luma(
+ ps_tfr_cxt->pu1_src_y + 4
+ + (ps_dec->u2_frm_wd_in_mbs << 4),
+ ps_dec->u2_frm_wd_y << u1_field_pic_flag,
+ ps_dec->u2_pic_ht >> u1_field_pic_flag, PAD_LEN_Y_H);
+
+ ps_dec->pf_pad_left_chroma(ps_tfr_cxt->pu1_src_u + 4,
+ ps_dec->u2_frm_wd_uv << u1_field_pic_flag,
+ (ps_dec->u2_pic_ht / 2) >> u1_field_pic_flag,
+ PAD_LEN_UV_H * YUV420SP_FACTOR);
+ ps_dec->pf_pad_right_chroma(
+ ps_tfr_cxt->pu1_src_u + 4
+ + (ps_dec->u2_frm_wd_in_mbs << 4),
+ ps_dec->u2_frm_wd_uv << u1_field_pic_flag,
+ (ps_dec->u2_pic_ht / 2) >> u1_field_pic_flag,
+ PAD_LEN_UV_H * YUV420SP_FACTOR);
+
+ }
+
+//Vertical Padd Top
+ if(ps_pad_mgr->u1_vert_pad_top)
+ {
+ ps_dec->pf_pad_top(ps_dec->ps_cur_pic->pu1_buf1 - PAD_LEN_Y_H,
+ ps_dec->u2_frm_wd_y, ps_dec->u2_frm_wd_y,
+ ps_pad_mgr->u1_pad_len_y_v);
+ ps_dec->pf_pad_top(
+ ps_dec->ps_cur_pic->pu1_buf2
+ - PAD_LEN_UV_H * YUV420SP_FACTOR,
+ ps_dec->u2_frm_wd_uv, ps_dec->u2_frm_wd_uv,
+ ps_pad_mgr->u1_pad_len_cr_v);
+ ps_pad_mgr->u1_vert_pad_top = 0;
+ }
+
+//Vertical Padd Bottom
+ if(ps_pad_mgr->u1_vert_pad_bot)
+ {
+
+ UWORD8 *pu1_buf;
+ pu1_buf = ps_dec->ps_cur_pic->pu1_buf1 - PAD_LEN_Y_H;
+ pu1_buf += ps_dec->u2_pic_ht * ps_dec->u2_frm_wd_y;
+ ps_dec->pf_pad_bottom(pu1_buf, ps_dec->u2_frm_wd_y, ps_dec->u2_frm_wd_y,
+ ps_pad_mgr->u1_pad_len_y_v);
+ pu1_buf = ps_dec->ps_cur_pic->pu1_buf2 - PAD_LEN_UV_H * YUV420SP_FACTOR;
+ pu1_buf += (ps_dec->u2_pic_ht >> 1) * ps_dec->u2_frm_wd_uv;
+
+ ps_dec->pf_pad_bottom(pu1_buf, ps_dec->u2_frm_wd_uv,
+ ps_dec->u2_frm_wd_uv,
+ ps_pad_mgr->u1_pad_len_cr_v);
+
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_deblock_picture_non_mbaff */
+/* */
+/* Description : This function carries out deblocking on a whole picture */
+/* without MBAFF */
+/* */
+/* Inputs : <What inputs does the function take?> */
+/* Processing : This functions calls deblock MB in the MB increment order*/
+/* */
+/* Outputs : Produces the deblocked picture */
+/* Returns : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 17 02 2005 NS Creation */
+/* 14 06 2005 SWRN clean-up */
+/*****************************************************************************/
+
+void ih264d_deblock_picture_non_mbaff(dec_struct_t * ps_dec)
+{
+ WORD16 i2_mb_x, i2_mb_y;
+ deblk_mb_t *ps_cur_mb;
+ deblk_mb_t *ps_top_mb;
+ deblk_mb_t *ps_left_mb;
+
+ UWORD8 u1_vert_pad_top = 1;
+ UWORD8 u1_first_row;
+
+ UWORD8 u1_deb_mode;
+ WORD32 i4_wd_y, i4_wd_uv;
+
+ UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag; /*< Field u4_flag */
+ UWORD8 u1_bottom_field_flag = ps_dec->ps_cur_slice->u1_bottom_field_flag; /*< Bottom field u4_flag */
+
+ /**************************************************/
+ /* one time loads from ps_dec which will be used */
+ /* frequently throughout the deblocking procedure */
+ /**************************************************/
+ pad_mgr_t * ps_pad_mgr = &ps_dec->s_pad_mgr;
+ tfr_ctxt_t s_tfr_ctxt;
+ tfr_ctxt_t * ps_tfr_cxt = &s_tfr_ctxt; // = &ps_dec->s_tran_addrecon;
+
+ UWORD16 u2_image_wd_mb = ps_dec->u2_frm_wd_in_mbs;
+ UWORD16 u2_image_ht_mb = ps_dec->u2_frm_ht_in_mbs;
+ WORD8 i1_cb_qp_idx_ofst = ps_dec->ps_cur_pps->i1_chroma_qp_index_offset;
+ WORD8 i1_cr_qp_idx_ofst =
+ ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset;
+
+ /* Set up Parameter for DMA transfer */
+ ih264d_init_deblk_tfr_ctxt(ps_dec, ps_pad_mgr, ps_tfr_cxt, u2_image_wd_mb,
+ 0);
+
+ /* Pic level Initialisations */
+ i2_mb_y = u2_image_ht_mb;
+ i2_mb_x = 0;
+
+ u1_first_row = 1;
+
+ i4_wd_y = ps_dec->u2_frm_wd_y << u1_field_pic_flag;
+ i4_wd_uv = ps_dec->u2_frm_wd_uv << u1_field_pic_flag;
+ /* Initial filling of the buffers with deblocking data */
+
+ ps_tfr_cxt->pu1_mb_y = ps_tfr_cxt->pu1_src_y + 4;
+ ps_tfr_cxt->pu1_mb_u = ps_tfr_cxt->pu1_src_u + 4;
+ ps_tfr_cxt->pu1_mb_v = ps_tfr_cxt->pu1_src_v + 4;
+ ps_cur_mb = ps_dec->ps_deblk_pic;
+
+ if(ps_dec->u4_app_disable_deblk_frm == 0)
+ {
+ if((ps_dec->u4_mb_level_deblk == 0) && (ps_dec->u4_num_cores != 3))
+ {
+
+ while(i2_mb_y > 0)
+ {
+ do
+ {
+
+ u1_deb_mode = ps_cur_mb->u1_deblocking_mode;
+ if(!(u1_deb_mode & MB_DISABLE_FILTERING))
+ {
+ if(i2_mb_x)
+ {
+ ps_left_mb = ps_cur_mb - 1;
+ }
+ else
+ {
+ ps_left_mb = NULL;
+ }
+ if(!u1_first_row)
+ {
+ ps_top_mb = ps_cur_mb - (u2_image_wd_mb);
+ }
+ else
+ {
+ ps_top_mb = NULL;
+ }
+
+ if(u1_deb_mode & MB_DISABLE_LEFT_EDGE)
+ ps_left_mb = NULL;
+ if(u1_deb_mode & MB_DISABLE_TOP_EDGE)
+ ps_top_mb = NULL;
+
+ ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt,
+ i1_cb_qp_idx_ofst,
+ i1_cr_qp_idx_ofst, ps_cur_mb,
+ i4_wd_y, i4_wd_uv, ps_top_mb,
+ ps_left_mb);
+ }
+
+ ps_cur_mb++;
+ i2_mb_x++;
+
+ ps_tfr_cxt->pu1_mb_y += 16;
+ ps_tfr_cxt->pu1_mb_u += 8 * YUV420SP_FACTOR;
+ ps_tfr_cxt->pu1_mb_v += 8;
+
+ }
+ while(i2_mb_x < u2_image_wd_mb);
+
+ ps_tfr_cxt->pu1_mb_y += ps_tfr_cxt->u4_y_inc;
+ ps_tfr_cxt->pu1_mb_u += ps_tfr_cxt->u4_uv_inc;
+ ps_tfr_cxt->pu1_mb_v += ps_tfr_cxt->u4_uv_inc;
+
+ i2_mb_x = 0;
+ i2_mb_y--;
+ u1_first_row = 0;
+
+ }
+ }
+
+ }
+
+ //Padd the Picture
+ //Horizontal Padd
+ if(ps_pad_mgr->u1_horz_pad)
+ {
+ UWORD32 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag;
+ ps_dec->pf_pad_left_luma(ps_tfr_cxt->pu1_src_y + 4,
+ ps_dec->u2_frm_wd_y << u1_field_pic_flag,
+ ps_dec->u2_pic_ht >> u1_field_pic_flag,
+ PAD_LEN_Y_H);
+ ps_dec->pf_pad_right_luma(
+ ps_tfr_cxt->pu1_src_y + 4
+ + (ps_dec->u2_frm_wd_in_mbs << 4),
+ ps_dec->u2_frm_wd_y << u1_field_pic_flag,
+ ps_dec->u2_pic_ht >> u1_field_pic_flag, PAD_LEN_Y_H);
+
+ ps_dec->pf_pad_left_chroma(ps_tfr_cxt->pu1_src_u + 4,
+ ps_dec->u2_frm_wd_uv << u1_field_pic_flag,
+ (ps_dec->u2_pic_ht / 2) >> u1_field_pic_flag,
+ PAD_LEN_UV_H * YUV420SP_FACTOR);
+ ps_dec->pf_pad_right_chroma(
+ ps_tfr_cxt->pu1_src_u + 4
+ + (ps_dec->u2_frm_wd_in_mbs << 4),
+ ps_dec->u2_frm_wd_uv << u1_field_pic_flag,
+ (ps_dec->u2_pic_ht / 2) >> u1_field_pic_flag,
+ PAD_LEN_UV_H * YUV420SP_FACTOR);
+
+ }
+
+//Vertical Padd Top
+ if(ps_pad_mgr->u1_vert_pad_top)
+ {
+ ps_dec->pf_pad_top(ps_dec->ps_cur_pic->pu1_buf1 - PAD_LEN_Y_H,
+ ps_dec->u2_frm_wd_y, ps_dec->u2_frm_wd_y,
+ ps_pad_mgr->u1_pad_len_y_v);
+ ps_dec->pf_pad_top(
+ ps_dec->ps_cur_pic->pu1_buf2
+ - PAD_LEN_UV_H * YUV420SP_FACTOR,
+ ps_dec->u2_frm_wd_uv, ps_dec->u2_frm_wd_uv,
+ ps_pad_mgr->u1_pad_len_cr_v);
+ ps_pad_mgr->u1_vert_pad_top = 0;
+ }
+
+//Vertical Padd Bottom
+ if(ps_pad_mgr->u1_vert_pad_bot)
+ {
+
+ UWORD8 *pu1_buf;
+ pu1_buf = ps_dec->ps_cur_pic->pu1_buf1 - PAD_LEN_Y_H;
+ pu1_buf += ps_dec->u2_pic_ht * ps_dec->u2_frm_wd_y;
+ ps_dec->pf_pad_bottom(pu1_buf, ps_dec->u2_frm_wd_y, ps_dec->u2_frm_wd_y,
+ ps_pad_mgr->u1_pad_len_y_v);
+ pu1_buf = ps_dec->ps_cur_pic->pu1_buf2 - PAD_LEN_UV_H * YUV420SP_FACTOR;
+ pu1_buf += (ps_dec->u2_pic_ht >> 1) * ps_dec->u2_frm_wd_uv;
+
+ ps_dec->pf_pad_bottom(pu1_buf, ps_dec->u2_frm_wd_uv,
+ ps_dec->u2_frm_wd_uv,
+ ps_pad_mgr->u1_pad_len_cr_v);
+
+ }
+}
+
+void ih264d_deblock_picture_progressive(dec_struct_t * ps_dec)
+{
+ WORD16 i2_mb_x, i2_mb_y;
+
+ deblk_mb_t *ps_cur_mb;
+ deblk_mb_t *ps_top_mb;
+ deblk_mb_t *ps_left_mb;
+
+ UWORD8 u1_vert_pad_top = 1;
+ UWORD8 u1_mbs_next, u1_first_row;
+ UWORD8 u1_deb_mode;
+ WORD32 i4_wd_y, i4_wd_uv;
+
+
+ /**************************************************/
+ /* one time loads from ps_dec which will be used */
+ /* frequently throughout the deblocking procedure */
+ /**************************************************/
+ pad_mgr_t * ps_pad_mgr = &ps_dec->s_pad_mgr;
+
+ tfr_ctxt_t s_tfr_ctxt;
+ tfr_ctxt_t * ps_tfr_cxt = &s_tfr_ctxt; // = &ps_dec->s_tran_addrecon;
+ UWORD16 u2_image_wd_mb = ps_dec->u2_frm_wd_in_mbs;
+ UWORD16 u2_image_ht_mb = ps_dec->u2_frm_ht_in_mbs;
+ UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
+
+ WORD8 i1_cb_qp_idx_ofst = ps_dec->ps_cur_pps->i1_chroma_qp_index_offset;
+ WORD8 i1_cr_qp_idx_ofst =
+ ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset;
+
+ /* Set up Parameter for deblocking */
+ ih264d_init_deblk_tfr_ctxt(ps_dec, ps_pad_mgr, ps_tfr_cxt, u2_image_wd_mb,
+ 0);
+
+ /* Pic level Initialisations */
+ i2_mb_y = u2_image_ht_mb;
+ i2_mb_x = 0;
+
+ u1_first_row = 1;
+
+ i4_wd_y = ps_dec->u2_frm_wd_y;
+ i4_wd_uv = ps_dec->u2_frm_wd_uv;
+ /* Initial filling of the buffers with deblocking data */
+
+ ps_tfr_cxt->pu1_mb_y = ps_tfr_cxt->pu1_src_y + 4;
+ ps_tfr_cxt->pu1_mb_u = ps_tfr_cxt->pu1_src_u + 4;
+ ps_tfr_cxt->pu1_mb_v = ps_tfr_cxt->pu1_src_v + 4;
+ ps_cur_mb = ps_dec->ps_deblk_pic;
+
+ if(ps_dec->u4_app_disable_deblk_frm == 0)
+ {
+
+ if((ps_dec->u4_mb_level_deblk == 0) && (ps_dec->u4_num_cores != 3))
+ {
+
+ while(i2_mb_y > 0)
+ {
+
+ u1_deb_mode = ps_cur_mb->u1_deblocking_mode;
+ if(!(u1_deb_mode & MB_DISABLE_FILTERING))
+ {
+
+ if(i2_mb_x)
+ {
+ ps_left_mb = ps_cur_mb - 1;
+
+ }
+ else
+ {
+ ps_left_mb = NULL;
+
+ }
+ if(!u1_first_row)
+ {
+ ps_top_mb = ps_cur_mb - (u2_image_wd_mb);
+ }
+ else
+ {
+ ps_top_mb = NULL;
+ }
+
+ if(u1_deb_mode & MB_DISABLE_LEFT_EDGE)
+ ps_left_mb = NULL;
+ if(u1_deb_mode & MB_DISABLE_TOP_EDGE)
+ ps_top_mb = NULL;
+
+ ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt,
+ i1_cb_qp_idx_ofst,
+ i1_cr_qp_idx_ofst, ps_cur_mb,
+ i4_wd_y, i4_wd_uv, ps_top_mb,
+ ps_left_mb);
+ }
+
+ ps_cur_mb++;
+ i2_mb_x++;
+ u1_mbs_next = u2_image_wd_mb - i2_mb_x;
+
+ ps_tfr_cxt->pu1_mb_y += 16;
+ ps_tfr_cxt->pu1_mb_u += 8 * YUV420SP_FACTOR;
+ ps_tfr_cxt->pu1_mb_v += 8;
+
+ if(!u1_mbs_next)
+ {
+ ps_tfr_cxt->pu1_mb_y += ps_tfr_cxt->u4_y_inc;
+ ps_tfr_cxt->pu1_mb_u += ps_tfr_cxt->u4_uv_inc;
+ ps_tfr_cxt->pu1_mb_v += ps_tfr_cxt->u4_uv_inc;
+
+ i2_mb_x = 0;
+ i2_mb_y--;
+ u1_first_row = 0;
+ }
+
+ }
+ }
+
+ }
+
+ //Padd the Picture
+ //Horizontal Padd
+ if(ps_pad_mgr->u1_horz_pad)
+ {
+ UWORD32 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag;
+ ps_dec->pf_pad_left_luma(ps_tfr_cxt->pu1_src_y + 4,
+ ps_dec->u2_frm_wd_y << u1_field_pic_flag,
+ ps_dec->u2_pic_ht >> u1_field_pic_flag,
+ PAD_LEN_Y_H);
+ ps_dec->pf_pad_right_luma(
+ ps_tfr_cxt->pu1_src_y + 4
+ + (ps_dec->u2_frm_wd_in_mbs << 4),
+ ps_dec->u2_frm_wd_y << u1_field_pic_flag,
+ ps_dec->u2_pic_ht >> u1_field_pic_flag, PAD_LEN_Y_H);
+
+ ps_dec->pf_pad_left_chroma(ps_tfr_cxt->pu1_src_u + 4,
+ ps_dec->u2_frm_wd_uv << u1_field_pic_flag,
+ (ps_dec->u2_pic_ht / 2) >> u1_field_pic_flag,
+ PAD_LEN_UV_H * YUV420SP_FACTOR);
+ ps_dec->pf_pad_right_chroma(
+ ps_tfr_cxt->pu1_src_u + 4
+ + (ps_dec->u2_frm_wd_in_mbs << 4),
+ ps_dec->u2_frm_wd_uv << u1_field_pic_flag,
+ (ps_dec->u2_pic_ht / 2) >> u1_field_pic_flag,
+ PAD_LEN_UV_H * YUV420SP_FACTOR);
+
+ }
+
+//Vertical Padd Top
+ if(ps_pad_mgr->u1_vert_pad_top)
+ {
+ ps_dec->pf_pad_top(ps_dec->ps_cur_pic->pu1_buf1 - PAD_LEN_Y_H,
+ ps_dec->u2_frm_wd_y, ps_dec->u2_frm_wd_y,
+ ps_pad_mgr->u1_pad_len_y_v);
+ ps_dec->pf_pad_top(
+ ps_dec->ps_cur_pic->pu1_buf2
+ - PAD_LEN_UV_H * YUV420SP_FACTOR,
+ ps_dec->u2_frm_wd_uv, ps_dec->u2_frm_wd_uv,
+ ps_pad_mgr->u1_pad_len_cr_v);
+
+ }
+
+//Vertical Padd Bottom
+ if(ps_pad_mgr->u1_vert_pad_bot)
+ {
+
+ UWORD8 *pu1_buf;
+ pu1_buf = ps_dec->ps_cur_pic->pu1_buf1 - PAD_LEN_Y_H;
+ pu1_buf += ps_dec->u2_pic_ht * ps_dec->u2_frm_wd_y;
+ ps_dec->pf_pad_bottom(pu1_buf, ps_dec->u2_frm_wd_y, ps_dec->u2_frm_wd_y,
+ ps_pad_mgr->u1_pad_len_y_v);
+ pu1_buf = ps_dec->ps_cur_pic->pu1_buf2 - PAD_LEN_UV_H * YUV420SP_FACTOR;
+ pu1_buf += (ps_dec->u2_pic_ht >> 1) * ps_dec->u2_frm_wd_uv;
+
+ ps_dec->pf_pad_bottom(pu1_buf, ps_dec->u2_frm_wd_uv,
+ ps_dec->u2_frm_wd_uv,
+ ps_pad_mgr->u1_pad_len_cr_v);
+
+ }
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_set_deblocking_parameters \endif
+ *
+ * \brief
+ * Sets the deblocking parameters of the macroblock
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ *
+ * \note
+ * Given the neighbour availablity information, and the deblocking
+ * parameters of the slice,this function will set the deblocking
+ * mode of the macroblock.
+ **************************************************************************
+ */
+
+WORD8 ih264d_set_deblocking_parameters(deblk_mb_t * ps_cur_mb,
+ dec_slice_params_t * ps_slice,
+ UWORD8 u1_mb_ngbr_availablity,
+ UWORD8 u1_mb_field_decoding_flag)
+{
+ /*------------------------------------------------------------------*/
+ /* Set the deblocking parameters */
+ /*------------------------------------------------------------------*/
+ ps_cur_mb->i1_slice_alpha_c0_offset = ps_slice->i1_slice_alpha_c0_offset;
+ ps_cur_mb->i1_slice_beta_offset = ps_slice->i1_slice_beta_offset;
+ ps_cur_mb->u1_mb_type = (u1_mb_field_decoding_flag << 7);
+
+ switch(ps_slice->u1_disable_dblk_filter_idc)
+ {
+ case DBLK_ENABLED:
+ ps_cur_mb->u1_deblocking_mode = MB_ENABLE_FILTERING;
+ break;
+ case DBLK_DISABLED:
+ ps_cur_mb->u1_deblocking_mode = MB_DISABLE_FILTERING;
+ break;
+ case SLICE_BOUNDARY_DBLK_DISABLED:
+ {
+ ps_cur_mb->u1_deblocking_mode = MB_ENABLE_FILTERING;
+ if(!(u1_mb_ngbr_availablity & LEFT_MB_AVAILABLE_MASK))
+ ps_cur_mb->u1_deblocking_mode |= MB_DISABLE_LEFT_EDGE;
+ if(!(u1_mb_ngbr_availablity & TOP_MB_AVAILABLE_MASK))
+ ps_cur_mb->u1_deblocking_mode |= MB_DISABLE_TOP_EDGE;
+ break;
+ }
+ }
+
+ return (0);
+}
+
+void ih264d_copy_intra_pred_line(dec_struct_t *ps_dec,
+ dec_mb_info_t *ps_cur_mb_info,
+ UWORD32 nmb_index)
+{
+ UWORD8 *pu1_mb_last_row, u1_mb_field_decoding_flag;
+ UWORD32 u4_recWidth, u4_recwidth_cr;
+
+ u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag;
+
+ u4_recWidth = ps_dec->u2_frm_wd_y << u1_mb_field_decoding_flag;
+ u4_recwidth_cr = ps_dec->u2_frm_wd_uv << u1_mb_field_decoding_flag;
+
+ pu1_mb_last_row = ps_dec->s_tran_addrecon.pu1_dest_y
+ + (u4_recWidth * (MB_SIZE - 1));
+ pu1_mb_last_row += MB_SIZE * nmb_index;
+ MEMCPY_16BYTES(ps_dec->pu1_cur_y_intra_pred_line, pu1_mb_last_row);
+
+ pu1_mb_last_row = ps_dec->s_tran_addrecon.pu1_dest_u
+ + (u4_recwidth_cr * (BLK8x8SIZE - 1));
+ pu1_mb_last_row += BLK8x8SIZE * nmb_index * YUV420SP_FACTOR;
+
+ MEMCPY_16BYTES(ps_dec->pu1_cur_u_intra_pred_line, pu1_mb_last_row);
+
+ ps_dec->pu1_cur_y_intra_pred_line = ps_dec->pu1_cur_y_intra_pred_line_base
+ + (MB_SIZE * (ps_cur_mb_info->u2_mbx + 1));
+ ps_dec->pu1_cur_u_intra_pred_line = ps_dec->pu1_cur_u_intra_pred_line_base
+ + (BLK8x8SIZE * (ps_cur_mb_info->u2_mbx + 1))
+ * YUV420SP_FACTOR;
+ ps_dec->pu1_cur_v_intra_pred_line = ps_dec->pu1_cur_v_intra_pred_line_base
+ + (BLK8x8SIZE * (ps_cur_mb_info->u2_mbx + 1));
+
+ if(ps_cur_mb_info->u2_mbx == (ps_dec->u2_frm_wd_in_mbs - 1))
+ {
+ UWORD8* pu1_temp;
+
+ ps_dec->pu1_cur_y_intra_pred_line =
+ ps_dec->pu1_cur_y_intra_pred_line_base;
+ ps_dec->pu1_cur_u_intra_pred_line =
+ ps_dec->pu1_cur_u_intra_pred_line_base;
+ ps_dec->pu1_cur_v_intra_pred_line =
+ ps_dec->pu1_cur_v_intra_pred_line_base;
+
+ /*swap current and previous rows*/
+ pu1_temp = ps_dec->pu1_cur_y_intra_pred_line;
+ ps_dec->pu1_cur_y_intra_pred_line = ps_dec->pu1_prev_y_intra_pred_line;
+ ps_dec->pu1_prev_y_intra_pred_line = pu1_temp;
+
+ pu1_temp = ps_dec->pu1_cur_u_intra_pred_line;
+ ps_dec->pu1_cur_u_intra_pred_line = ps_dec->pu1_prev_u_intra_pred_line;
+ ps_dec->pu1_prev_u_intra_pred_line = pu1_temp;
+
+ pu1_temp = ps_dec->pu1_cur_v_intra_pred_line;
+ ps_dec->pu1_cur_v_intra_pred_line = ps_dec->pu1_prev_v_intra_pred_line;
+ ps_dec->pu1_prev_v_intra_pred_line = pu1_temp;
+
+ ps_dec->pu1_cur_y_intra_pred_line_base =
+ ps_dec->pu1_cur_y_intra_pred_line;
+ ps_dec->pu1_cur_u_intra_pred_line_base =
+ ps_dec->pu1_cur_u_intra_pred_line;
+ ps_dec->pu1_cur_v_intra_pred_line_base =
+ ps_dec->pu1_cur_v_intra_pred_line;
+
+
+
+
+
+ }
+
+}
+
+void ih264d_deblock_mb_level(dec_struct_t *ps_dec,
+ dec_mb_info_t *ps_cur_mb_info,
+ UWORD32 nmb_index)
+{
+ UWORD8 u1_deb_mode;
+ deblk_mb_t *ps_cur_mb, *ps_left_mb, *ps_top_mb;
+ UWORD16 u2_image_wd_mb = ps_dec->u2_frm_wd_in_mbs;
+ UWORD16 u2_image_ht_mb = ps_dec->u2_frm_ht_in_mbs;
+ WORD8 i1_cb_qp_idx_ofst = ps_dec->ps_cur_pps->i1_chroma_qp_index_offset;
+ WORD8 i1_cr_qp_idx_ofst =
+ ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset;
+ WORD32 i4_wd_y, i4_wd_uv;
+ tfr_ctxt_t * ps_tfr_cxt = &ps_dec->s_tran_addrecon;
+ WORD16 i2_mb_y, i2_mb_x;
+ UWORD8 u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag;
+ deblk_mb_t *ps_deblk_cur_mb;
+
+ /*Copy the last row of every MB ,to be used for intra prediction f next row*/
+ {
+ UWORD8 *pu1_mb_last_row, u1_mb_field_decoding_flag;
+ UWORD32 u4_recWidth, u4_recwidth_cr;
+
+ u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag;
+
+ u4_recWidth = ps_dec->u2_frm_wd_y << u1_mb_field_decoding_flag;
+ u4_recwidth_cr = ps_dec->u2_frm_wd_uv << u1_mb_field_decoding_flag;
+
+ pu1_mb_last_row = ps_dec->s_tran_addrecon.pu1_dest_y
+ + (u4_recWidth * (MB_SIZE - 1));
+ pu1_mb_last_row += MB_SIZE * nmb_index;
+ MEMCPY_16BYTES(ps_dec->pu1_cur_y_intra_pred_line, pu1_mb_last_row);
+
+ pu1_mb_last_row = ps_dec->s_tran_addrecon.pu1_dest_u
+ + (u4_recwidth_cr * (BLK8x8SIZE - 1));
+ pu1_mb_last_row += BLK8x8SIZE * nmb_index * YUV420SP_FACTOR;
+
+ MEMCPY_16BYTES(ps_dec->pu1_cur_u_intra_pred_line, pu1_mb_last_row);
+
+ ps_dec->pu1_cur_y_intra_pred_line =
+ ps_dec->pu1_cur_y_intra_pred_line_base
+ + (MB_SIZE
+ * (ps_cur_mb_info->u2_mbx
+ + 1));
+ ps_dec->pu1_cur_u_intra_pred_line =
+ ps_dec->pu1_cur_u_intra_pred_line_base
+ + (BLK8x8SIZE
+ * (ps_cur_mb_info->u2_mbx
+ + 1))
+ * YUV420SP_FACTOR;
+ ps_dec->pu1_cur_v_intra_pred_line =
+ ps_dec->pu1_cur_v_intra_pred_line_base
+ + (BLK8x8SIZE
+ * (ps_cur_mb_info->u2_mbx
+ + 1));
+ }
+
+ i2_mb_y = ps_cur_mb_info->u2_mby;
+ i4_wd_y = ps_dec->u2_frm_wd_y << u1_mb_field_decoding_flag;
+ i4_wd_uv = ps_dec->u2_frm_wd_uv << u1_mb_field_decoding_flag;
+
+ if(ps_cur_mb_info->u2_mbx != 0)
+ {
+ /*Deblock the previous MB*/
+ deblk_mb_t *ps_deblk_cur_mb;
+
+ if(ps_dec->u1_separate_parse == 1)
+ {
+ ps_deblk_cur_mb = ps_dec->ps_deblk_mbn_dec_thrd + nmb_index - 1;
+
+ }
+ else
+ {
+
+ if(nmb_index == 0)
+ /*if first mb in Nmb ,pick up the context from previous Nmb data*/
+ ps_deblk_cur_mb = ps_dec->ps_deblk_mbn_prev
+ + ps_dec->u4_num_mbs_prev_nmb - 1;
+ else
+ ps_deblk_cur_mb = ps_dec->ps_deblk_mbn + nmb_index - 1;
+ }
+
+ ps_cur_mb = ps_deblk_cur_mb;
+
+ u1_deb_mode = ps_cur_mb->u1_deblocking_mode;
+
+ i2_mb_x = ps_cur_mb_info->u2_mbx - 1;
+
+ if(ps_dec->u4_app_disable_deblk_frm == 1)
+ u1_deb_mode = MB_DISABLE_FILTERING;
+ if(!(u1_deb_mode & MB_DISABLE_FILTERING))
+ {
+
+ if(i2_mb_x)
+ {
+ ps_left_mb = ps_cur_mb - 1;
+
+ }
+ else
+ {
+ ps_left_mb = NULL;
+
+ }
+ if(i2_mb_y)
+ {
+ ps_top_mb = ps_cur_mb - (u2_image_wd_mb);
+ }
+ else
+ {
+ ps_top_mb = NULL;
+ }
+
+ if(u1_deb_mode & MB_DISABLE_LEFT_EDGE)
+ ps_left_mb = NULL;
+ if(u1_deb_mode & MB_DISABLE_TOP_EDGE)
+ ps_top_mb = NULL;
+
+ ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt, i1_cb_qp_idx_ofst,
+ i1_cr_qp_idx_ofst, ps_cur_mb, i4_wd_y,
+ i4_wd_uv, ps_top_mb, ps_left_mb);
+ }
+
+ ps_tfr_cxt->pu1_mb_y += MB_SIZE;
+ ps_tfr_cxt->pu1_mb_u += (MB_SIZE >> 1) * YUV420SP_FACTOR;
+ ps_tfr_cxt->pu1_mb_v += (MB_SIZE >> 1);
+ }
+
+ if(ps_cur_mb_info->u2_mbx == (ps_dec->u2_frm_wd_in_mbs - 1))
+ {
+ /*Deblock the previous MB*/
+ deblk_mb_t *ps_deblk_cur_mb;
+ UWORD8 *pu1_temp;
+
+ if(ps_dec->u1_separate_parse == 1)
+ ps_deblk_cur_mb = ps_dec->ps_deblk_mbn_dec_thrd + nmb_index;
+ else
+ ps_deblk_cur_mb = ps_dec->ps_deblk_mbn + nmb_index;
+
+ i2_mb_x = ps_cur_mb_info->u2_mbx;
+
+ ps_cur_mb = ps_deblk_cur_mb;
+ u1_deb_mode = ps_cur_mb->u1_deblocking_mode;
+
+ if(ps_dec->u4_app_disable_deblk_frm == 1)
+ u1_deb_mode = MB_DISABLE_FILTERING;
+
+ if(!(u1_deb_mode & MB_DISABLE_FILTERING))
+ {
+
+ if(i2_mb_x)
+ {
+ ps_left_mb = ps_cur_mb - 1;
+
+ }
+ else
+ {
+ ps_left_mb = NULL;
+
+ }
+ if(i2_mb_y)
+ {
+ ps_top_mb = ps_cur_mb - (u2_image_wd_mb);
+ }
+ else
+ {
+ ps_top_mb = NULL;
+ }
+
+ if(u1_deb_mode & MB_DISABLE_LEFT_EDGE)
+ ps_left_mb = NULL;
+ if(u1_deb_mode & MB_DISABLE_TOP_EDGE)
+ ps_top_mb = NULL;
+
+ ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt, i1_cb_qp_idx_ofst,
+ i1_cr_qp_idx_ofst, ps_cur_mb, i4_wd_y,
+ i4_wd_uv, ps_top_mb, ps_left_mb);
+ }
+
+ ps_dec->pu1_cur_y_intra_pred_line =
+ ps_dec->pu1_cur_y_intra_pred_line_base;
+ ps_dec->pu1_cur_u_intra_pred_line =
+ ps_dec->pu1_cur_u_intra_pred_line_base;
+ ps_dec->pu1_cur_v_intra_pred_line =
+ ps_dec->pu1_cur_v_intra_pred_line_base;
+
+ /*swap current and previous rows*/
+ pu1_temp = ps_dec->pu1_cur_y_intra_pred_line;
+ ps_dec->pu1_cur_y_intra_pred_line = ps_dec->pu1_prev_y_intra_pred_line;
+ ps_dec->pu1_prev_y_intra_pred_line = pu1_temp;
+
+ pu1_temp = ps_dec->pu1_cur_u_intra_pred_line;
+ ps_dec->pu1_cur_u_intra_pred_line = ps_dec->pu1_prev_u_intra_pred_line;
+ ps_dec->pu1_prev_u_intra_pred_line = pu1_temp;
+
+ pu1_temp = ps_dec->pu1_cur_v_intra_pred_line;
+ ps_dec->pu1_cur_v_intra_pred_line = ps_dec->pu1_prev_v_intra_pred_line;
+ ps_dec->pu1_prev_v_intra_pred_line = pu1_temp;
+
+ ps_dec->pu1_cur_y_intra_pred_line_base =
+ ps_dec->pu1_cur_y_intra_pred_line;
+ ps_dec->pu1_cur_u_intra_pred_line_base =
+ ps_dec->pu1_cur_u_intra_pred_line;
+ ps_dec->pu1_cur_v_intra_pred_line_base =
+ ps_dec->pu1_cur_v_intra_pred_line;
+
+ }
+
+}
+
+void ih264d_filter_boundary_left_mbaff(dec_struct_t *ps_dec,
+ tfr_ctxt_t * ps_tfr_cxt,
+ WORD8 i1_cb_qp_idx_ofst,
+ WORD8 i1_cr_qp_idx_ofst,
+ deblk_mb_t * ps_cur_mb,
+ UWORD16 i4_strd_y,
+ UWORD16 i4_strd_uv,
+ deblk_mb_t * ps_left_mb, /* Neighbouring MB parameters */
+ UWORD32 pu4_bs_tab[], /* pointer to the BsTable array */
+ UWORD8 u1_cur_fld)
+{
+ UWORD8 *pu1_y, *pu1_u, *pu1_v;
+ UWORD8 uc_tmp, qp_avg, uc_QPav_Y;
+ WORD32 alpha_u = 0, beta_u = 0, alpha_v = 0, beta_v = 0;
+ WORD32 alpha_y = 0, beta_y = 0;
+
+ WORD32 idx_b_u, idx_a_u, idx_b_v, idx_a_v;
+ WORD32 idx_b_y, idx_a_y;
+
+ UWORD32 u4_bs_val;
+
+ UWORD8 *pu1_cliptab_u, *pu1_cliptab_v, *pu1_cliptab_y;
+
+ UWORD8 u1_double_cl = !ps_cur_mb->u1_single_call;
+ WORD32 ofst_a = ps_cur_mb->i1_slice_alpha_c0_offset;
+ WORD32 ofst_b = ps_cur_mb->i1_slice_beta_offset;
+
+ PROFILE_DISABLE_DEBLK()
+
+ pu1_y = ps_tfr_cxt->pu1_mb_y;
+ pu1_u = ps_tfr_cxt->pu1_mb_u;
+ pu1_v = ps_tfr_cxt->pu1_mb_v;
+
+ /* LUMA values */
+ /* Deblock rounding change */
+ uc_tmp = (UWORD8)((ps_left_mb->u1_mb_qp + ps_cur_mb->u1_mb_qp + 1) >> 1);
+ uc_QPav_Y = uc_tmp;
+ idx_a_y = uc_QPav_Y + ofst_a;
+ alpha_y = gau1_ih264d_alpha_table[12 + idx_a_y];
+ idx_b_y = uc_QPav_Y + ofst_b;
+ beta_y = gau1_ih264d_beta_table[12 + idx_b_y];
+
+ /* Chroma cb values */
+ {
+ UWORD8 u1_mb_qp1, u1_mb_qp2;
+ u1_mb_qp1 = (ps_left_mb->u1_mb_qp + i1_cb_qp_idx_ofst);
+ u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cb_qp_idx_ofst);
+ qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1]
+ + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1);
+ }
+ idx_a_u = qp_avg + ofst_a;
+ alpha_u = gau1_ih264d_alpha_table[12 + idx_a_u];
+ idx_b_u = qp_avg + ofst_b;
+ beta_u = gau1_ih264d_beta_table[12 + idx_b_u];
+
+ /* Chroma cr values */
+ {
+ UWORD8 u1_mb_qp1, u1_mb_qp2;
+ u1_mb_qp1 = (ps_left_mb->u1_mb_qp + i1_cr_qp_idx_ofst);
+ u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cr_qp_idx_ofst);
+ qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1]
+ + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1);
+ }
+ idx_a_v = qp_avg + ofst_a;
+ alpha_v = gau1_ih264d_alpha_table[12 + idx_a_v];
+ idx_b_v = qp_avg + ofst_b;
+ beta_v = gau1_ih264d_beta_table[12 + idx_b_v];
+
+ if(u1_double_cl == 0)
+ {
+ u4_bs_val = pu4_bs_tab[4];
+
+ if(0x04040404 == u4_bs_val)
+ {
+ ps_dec->pf_deblk_luma_vert_bs4(pu1_y, i4_strd_y, alpha_y, beta_y);
+ ps_dec->pf_deblk_chroma_vert_bs4(pu1_u, i4_strd_uv, alpha_u,
+ beta_u, alpha_v, beta_v);
+
+ }
+ else
+ {
+ if(u4_bs_val)
+ {
+
+ pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12
+ + idx_a_y];
+ pu1_cliptab_u = (UWORD8 *)&gau1_ih264d_clip_table[12
+ + idx_a_u];
+ pu1_cliptab_v = (UWORD8 *)&gau1_ih264d_clip_table[12
+ + idx_a_v];
+
+ ps_dec->pf_deblk_luma_vert_bslt4(pu1_y, i4_strd_y, alpha_y,
+ beta_y, u4_bs_val,
+ pu1_cliptab_y);
+ ps_dec->pf_deblk_chroma_vert_bslt4(pu1_u, i4_strd_uv, alpha_u,
+ beta_u, alpha_v, beta_v,
+ u4_bs_val, pu1_cliptab_u,
+ pu1_cliptab_v);
+
+ }
+ }
+
+ }
+ else
+ {
+
+ i4_strd_y <<= (!u1_cur_fld);
+ u4_bs_val = pu4_bs_tab[4];
+ i4_strd_uv <<= (!u1_cur_fld);
+
+ if(0x04040404 == u4_bs_val)
+ {
+ ps_dec->pf_deblk_luma_vert_bs4_mbaff(pu1_y, i4_strd_y, alpha_y,
+ beta_y);
+ ps_dec->pf_deblk_chroma_vert_bs4_mbaff(pu1_u, i4_strd_uv, alpha_u,
+ beta_u, alpha_v, beta_v);
+ }
+ else
+ {
+ if(u4_bs_val)
+ {
+
+ pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12
+ + idx_a_y];
+ pu1_cliptab_u = (UWORD8 *)&gau1_ih264d_clip_table[12
+ + idx_a_u];
+ pu1_cliptab_v = (UWORD8 *)&gau1_ih264d_clip_table[12
+ + idx_a_v];
+ ps_dec->pf_deblk_luma_vert_bslt4_mbaff(pu1_y, i4_strd_y,
+ alpha_y, beta_y,
+ u4_bs_val,
+ pu1_cliptab_y);
+ ps_dec->pf_deblk_chroma_vert_bslt4_mbaff(pu1_u, i4_strd_uv,
+ alpha_u, beta_u,
+ alpha_v, beta_v,
+ u4_bs_val,
+ pu1_cliptab_u,
+ pu1_cliptab_v);
+
+ }
+ }
+
+ {
+
+ UWORD16 u2_shift = (i4_strd_y >> 1) << (u1_cur_fld ? 4 : 0);
+ pu1_y += u2_shift;
+ u2_shift = (i4_strd_uv >> 1) << (u1_cur_fld ? 3 : 0);
+ pu1_u += u2_shift;
+ pu1_v += u2_shift;
+ }
+
+ uc_tmp = (((ps_left_mb + 1)->u1_mb_qp + ps_cur_mb->u1_mb_qp + 1) >> 1);
+ uc_QPav_Y = uc_tmp;
+ idx_a_y = uc_QPav_Y + ofst_a;
+ alpha_y = gau1_ih264d_alpha_table[12 + idx_a_y];
+ idx_b_y = uc_QPav_Y + ofst_b;
+ beta_y = gau1_ih264d_beta_table[12 + idx_b_y];
+ u4_bs_val = pu4_bs_tab[9];
+
+ {
+ UWORD8 u1_mb_qp1, u1_mb_qp2;
+ u1_mb_qp1 = ((ps_left_mb + 1)->u1_mb_qp + i1_cb_qp_idx_ofst);
+ u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cb_qp_idx_ofst);
+ qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1]
+ + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1);
+ }
+ idx_a_u = qp_avg + ofst_a;
+ alpha_u = gau1_ih264d_alpha_table[12 + idx_a_u];
+ idx_b_u = qp_avg + ofst_b;
+ beta_u = gau1_ih264d_beta_table[12 + idx_b_u];
+ u4_bs_val = pu4_bs_tab[9];
+ {
+ UWORD8 u1_mb_qp1, u1_mb_qp2;
+ u1_mb_qp1 = ((ps_left_mb + 1)->u1_mb_qp + i1_cr_qp_idx_ofst);
+ u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cr_qp_idx_ofst);
+ qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1]
+ + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1);
+ }
+ idx_a_v = qp_avg + ofst_a;
+ alpha_v = gau1_ih264d_alpha_table[12 + idx_a_v];
+ idx_b_v = qp_avg + ofst_b;
+ beta_v = gau1_ih264d_beta_table[12 + idx_b_v];
+
+ if(0x04040404 == u4_bs_val)
+ {
+ ps_dec->pf_deblk_luma_vert_bs4_mbaff(pu1_y, i4_strd_y, alpha_y,
+ beta_y);
+ ps_dec->pf_deblk_chroma_vert_bs4_mbaff(pu1_u, i4_strd_uv, alpha_u,
+ beta_u, alpha_v, beta_v);
+
+ }
+ else
+ {
+ if(u4_bs_val)
+ {
+
+ pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12
+ + idx_a_y];
+ pu1_cliptab_u = (UWORD8 *)&gau1_ih264d_clip_table[12
+ + idx_a_u];
+ pu1_cliptab_v = (UWORD8 *)&gau1_ih264d_clip_table[12
+ + idx_a_v];
+
+ ps_dec->pf_deblk_luma_vert_bslt4_mbaff(pu1_y, i4_strd_y,
+ alpha_y, beta_y,
+ u4_bs_val,
+ pu1_cliptab_y);
+ ps_dec->pf_deblk_chroma_vert_bslt4_mbaff(pu1_u, i4_strd_uv,
+ alpha_u, beta_u,
+ alpha_v, beta_v,
+ u4_bs_val,
+ pu1_cliptab_u,
+ pu1_cliptab_v);
+
+ }
+ }
+ }
+
+}
+
+void ih264d_filter_boundary_topmbaff(dec_struct_t *ps_dec,
+ tfr_ctxt_t * ps_tfr_cxt,
+ WORD8 i1_cb_qp_idx_ofst,
+ WORD8 i1_cr_qp_idx_ofst,
+ deblk_mb_t * ps_cur_mb,
+ UWORD16 i4_strd_y,
+ UWORD16 i4_strd_uv,
+ deblk_mb_t * ps_top_mb,
+ UWORD32 u4_bs)
+{
+ UWORD8 *pu1_y, *pu1_u;
+ WORD32 alpha_u = 0, beta_u = 0, alpha_v = 0, beta_v = 0;
+ WORD32 alpha_y = 0, beta_y = 0;
+ WORD32 qp_avg;
+ WORD32 uc_QPav_Y;
+ WORD32 idx_b_u, idx_a_u, idx_b_v, idx_a_v;
+ WORD32 idx_b_y, idx_a_y;
+ UWORD16 uc_tmp;
+
+ UWORD8 *pu1_cliptab_u, *pu1_cliptab_v, *pu1_cliptab_y;
+ WORD32 ofst_a = ps_cur_mb->i1_slice_alpha_c0_offset;
+ WORD32 ofst_b = ps_cur_mb->i1_slice_beta_offset;
+
+ /* LUMA values */
+ /* Deblock rounding change */
+ uc_tmp = ((ps_top_mb->u1_mb_qp + ps_cur_mb->u1_mb_qp + 1) >> 1);
+ uc_QPav_Y = (UWORD8)uc_tmp;
+ idx_a_y = uc_QPav_Y + ofst_a;
+ alpha_y = gau1_ih264d_alpha_table[12 + idx_a_y];
+ idx_b_y = uc_QPav_Y + ofst_b;
+ beta_y = gau1_ih264d_beta_table[12 + idx_b_y];
+ pu1_y = ps_tfr_cxt->pu1_mb_y;
+
+ /* CHROMA cb values */
+ {
+ UWORD8 u1_mb_qp1, u1_mb_qp2;
+ u1_mb_qp1 = (ps_top_mb->u1_mb_qp + i1_cb_qp_idx_ofst);
+ u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cb_qp_idx_ofst);
+ qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1]
+ + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1);
+ }
+
+ idx_a_u = qp_avg + ofst_a;
+ alpha_u = gau1_ih264d_alpha_table[12 + idx_a_u];
+ idx_b_u = qp_avg + ofst_b;
+ beta_u = gau1_ih264d_beta_table[12 + idx_b_u];
+ /* CHROMA cr values */
+ {
+ UWORD8 u1_mb_qp1, u1_mb_qp2;
+ u1_mb_qp1 = (ps_top_mb->u1_mb_qp + i1_cr_qp_idx_ofst);
+ u1_mb_qp2 = (ps_cur_mb->u1_mb_qp + i1_cr_qp_idx_ofst);
+ qp_avg = (UWORD8)((gau1_ih264d_qp_scale_cr[12 + u1_mb_qp1]
+ + gau1_ih264d_qp_scale_cr[12 + u1_mb_qp2] + 1) >> 1);
+ }
+
+ idx_a_v = qp_avg + ofst_a;
+ alpha_v = gau1_ih264d_alpha_table[12 + idx_a_v];
+ idx_b_v = qp_avg + ofst_b;
+ beta_v = gau1_ih264d_beta_table[12 + idx_b_v];
+ pu1_u = ps_tfr_cxt->pu1_mb_u;
+
+ if(u4_bs == 0x04040404)
+ {
+ /* Code specific to the assembly module */
+ ps_dec->pf_deblk_luma_horz_bs4(pu1_y, i4_strd_y, alpha_y, beta_y);
+ ps_dec->pf_deblk_chroma_horz_bs4(pu1_u, i4_strd_uv, alpha_u, beta_u,
+ alpha_v, beta_v);
+
+ }
+ else
+ {
+ if(u4_bs)
+ {
+
+ pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_y];
+ pu1_cliptab_u =
+ (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_u];
+ pu1_cliptab_v =
+ (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_v];
+
+ ps_dec->pf_deblk_luma_horz_bslt4(pu1_y, i4_strd_y, alpha_y, beta_y,
+ u4_bs, pu1_cliptab_y);
+ ps_dec->pf_deblk_chroma_horz_bslt4(pu1_u, i4_strd_uv, alpha_u,
+ beta_u, alpha_v, beta_v,
+ u4_bs, pu1_cliptab_u,
+ pu1_cliptab_v);
+
+ }
+ }
+
+}
+
+void ih264d_deblock_mb_mbaff(dec_struct_t *ps_dec,
+ tfr_ctxt_t * ps_tfr_cxt,
+ WORD8 i1_cb_qp_idx_ofst,
+ WORD8 i1_cr_qp_idx_ofst,
+ deblk_mb_t * ps_cur_mb,
+ WORD32 i4_strd_y,
+ WORD32 i4_strd_uv,
+ deblk_mb_t * ps_top_mb,
+ deblk_mb_t * ps_left_mb,
+ UWORD8 u1_cur_fld,
+ UWORD8 u1_extra_top_edge)
+{
+ UWORD8 *pu1_y, *pu1_u;
+ UWORD32 u4_bs;
+// WORD8 edge;
+ WORD32 alpha, beta, alpha_u, beta_u, alpha_v, beta_v;
+
+ UWORD8 *pu1_cliptab_u;
+ UWORD8 *pu1_cliptab_v;
+ UWORD8 *pu1_cliptab_y;
+
+ UWORD32 * pu4_bs_tab = ps_cur_mb->u4_bs_table;
+ WORD32 idx_a_y, idx_a_u, idx_a_v;
+ /* Return from here to switch off deblocking */
+ PROFILE_DISABLE_DEBLK()
+
+ i4_strd_y <<= u1_cur_fld;
+ i4_strd_uv <<= u1_cur_fld;
+ /*--------------------------------------------------------------------*/
+ /* Filter wrt Left edge */
+ /* except */
+ /* - Left Egde is Picture Boundary */
+ /* - Left Egde is part of Slice Boundary and Deblocking */
+ /* parameters of slice disable Filtering of Slice Boundary Edges*/
+ /*--------------------------------------------------------------------*/
+ if(ps_left_mb)
+ ih264d_filter_boundary_left_mbaff(ps_dec, ps_tfr_cxt, i1_cb_qp_idx_ofst,
+ i1_cr_qp_idx_ofst, ps_cur_mb,
+ i4_strd_y, i4_strd_uv, ps_left_mb,
+ pu4_bs_tab, u1_cur_fld);
+
+ /*--------------------------------------------------------------------*/
+ /* Filter wrt Other Vertical Edges */
+ /*--------------------------------------------------------------------*/
+ {
+ WORD32 ofst_a, ofst_b, idx_b_y, idx_b_u,
+ idx_b_v;
+ WORD32 qp_avg, qp_avg_u, qp_avg_v;
+ ofst_a = ps_cur_mb->i1_slice_alpha_c0_offset;
+ ofst_b = ps_cur_mb->i1_slice_beta_offset;
+ qp_avg = ps_cur_mb->u1_mb_qp;
+ idx_a_y = qp_avg + ofst_a;
+ alpha = gau1_ih264d_alpha_table[12 + idx_a_y];
+ idx_b_y = qp_avg + ofst_b;
+ beta = gau1_ih264d_beta_table[12 + idx_b_y];
+
+ /* CHROMA Cb values */
+ qp_avg_u = (qp_avg + i1_cb_qp_idx_ofst);
+ qp_avg_u = gau1_ih264d_qp_scale_cr[12 + qp_avg_u];
+ idx_a_u = qp_avg_u + ofst_a;
+ alpha_u = gau1_ih264d_alpha_table[12 + idx_a_u];
+ idx_b_u = qp_avg_u + ofst_b;
+ beta_u = gau1_ih264d_beta_table[12 + idx_b_u];
+ /* CHROMA Cr values */
+ qp_avg_v = (qp_avg + i1_cr_qp_idx_ofst);
+ qp_avg_v = gau1_ih264d_qp_scale_cr[12 + qp_avg_v];
+ idx_a_v = qp_avg_v + ofst_a;
+ alpha_v = gau1_ih264d_alpha_table[12 + idx_a_v];
+ idx_b_v = qp_avg_v + ofst_b;
+ beta_v = gau1_ih264d_beta_table[12 + idx_b_v];
+ }
+
+ //STARTL4_FILTER_VERT;
+
+ pu1_cliptab_y = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_y]; //this for Luma
+ pu1_cliptab_u = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_u]; //this for chroma
+ pu1_cliptab_v = (UWORD8 *)&gau1_ih264d_clip_table[12 + idx_a_v]; //this for chroma
+
+ //edge=1
+
+
+ u4_bs = pu4_bs_tab[5];
+ pu1_y = ps_tfr_cxt->pu1_mb_y;
+ pu1_u = ps_tfr_cxt->pu1_mb_u;
+
+ if(u4_bs)
+ {
+
+ ps_dec->pf_deblk_luma_vert_bslt4(pu1_y + 4, i4_strd_y, alpha, beta,
+ u4_bs, pu1_cliptab_y);
+
+ }
+ //edge=2
+
+ u4_bs = pu4_bs_tab[6];
+ if(u4_bs)
+ {
+
+ ps_dec->pf_deblk_luma_vert_bslt4(pu1_y + 8, i4_strd_y, alpha, beta,
+ u4_bs, pu1_cliptab_y);
+ ps_dec->pf_deblk_chroma_vert_bslt4(pu1_u + 4 * YUV420SP_FACTOR,
+ i4_strd_uv, alpha_u, beta_u,
+ alpha_v, beta_v, u4_bs,
+ pu1_cliptab_u, pu1_cliptab_v);
+ }
+ //edge=3
+
+ u4_bs = pu4_bs_tab[7];
+ if(u4_bs)
+ {
+
+ ps_dec->pf_deblk_luma_vert_bslt4(pu1_y + 12, i4_strd_y, alpha, beta,
+ u4_bs, pu1_cliptab_y);
+
+ }
+
+ /*--------------------------------------------------------------------*/
+ /* Filter wrt Top edge */
+ /* except */
+ /* - Top Egde is Picture Boundary */
+ /* - Top Egde is part of Slice Boundary and Deblocking */
+ /* parameters of slice disable Filtering of Slice Boundary Edges*/
+ /*--------------------------------------------------------------------*/
+ if(ps_top_mb)
+ {
+ /** if top MB and MB AFF and cur MB is frame and top is field then */
+ /* one extra top edge needs to be deblocked */
+ if(u1_extra_top_edge)
+ {
+ ih264d_filter_boundary_topmbaff(ps_dec, ps_tfr_cxt,
+ i1_cb_qp_idx_ofst,
+ i1_cr_qp_idx_ofst, ps_cur_mb,
+ (UWORD16)(i4_strd_y << 1),
+ (UWORD16)(i4_strd_uv << 1),
+ ps_top_mb - 1, pu4_bs_tab[8]);
+ ps_tfr_cxt->pu1_mb_y += i4_strd_y;
+ ps_tfr_cxt->pu1_mb_u += i4_strd_uv;
+ ps_tfr_cxt->pu1_mb_v += i4_strd_uv;
+
+ ih264d_filter_boundary_topmbaff(ps_dec, ps_tfr_cxt,
+ i1_cb_qp_idx_ofst,
+ i1_cr_qp_idx_ofst, ps_cur_mb,
+ (UWORD16)(i4_strd_y << 1),
+ (UWORD16)(i4_strd_uv << 1),
+ ps_top_mb, pu4_bs_tab[0]);
+ ps_tfr_cxt->pu1_mb_y -= i4_strd_y;
+ ps_tfr_cxt->pu1_mb_u -= i4_strd_uv;
+ ps_tfr_cxt->pu1_mb_v -= i4_strd_uv;
+ }
+ else
+ {
+ ih264d_filter_boundary_topmbaff(ps_dec, ps_tfr_cxt,
+ i1_cb_qp_idx_ofst,
+ i1_cr_qp_idx_ofst, ps_cur_mb,
+ i4_strd_y, i4_strd_uv, ps_top_mb,
+ pu4_bs_tab[0]);
+ }
+ }
+
+ /*--------------------------------------------------------------------*/
+ /* Filter wrt Other Horizontal Edges */
+ /*--------------------------------------------------------------------*/
+
+ //edge1
+ u4_bs = pu4_bs_tab[1];
+
+ if(u4_bs)
+ {
+ ps_dec->pf_deblk_luma_horz_bslt4(pu1_y + (i4_strd_y << 2), i4_strd_y,
+ alpha, beta, u4_bs, pu1_cliptab_y);
+
+ }
+ //edge2
+ u4_bs = pu4_bs_tab[2];
+
+ if(u4_bs)
+ {
+
+ ps_dec->pf_deblk_luma_horz_bslt4(pu1_y + (i4_strd_y << 3), i4_strd_y,
+ alpha, beta, u4_bs, pu1_cliptab_y);
+ ps_dec->pf_deblk_chroma_horz_bslt4(pu1_u + (i4_strd_uv << 2),
+ i4_strd_uv, alpha_u, beta_u,
+ alpha_v, beta_v, u4_bs,
+ pu1_cliptab_u, pu1_cliptab_v);
+
+ }
+ //edge3
+ u4_bs = pu4_bs_tab[3];
+ if(u4_bs)
+ {
+
+ ps_dec->pf_deblk_luma_horz_bslt4(
+ (pu1_y + (i4_strd_y << 3) + (i4_strd_y << 2)),
+ i4_strd_y, alpha, beta, u4_bs, pu1_cliptab_y);
+
+ }
+
+}
+
diff --git a/decoder/ih264d_deblocking.h b/decoder/ih264d_deblocking.h
new file mode 100755
index 0000000..21601aa
--- /dev/null
+++ b/decoder/ih264d_deblocking.h
@@ -0,0 +1,173 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef _IH264D_DEBLOCKING_H_
+#define _IH264D_DEBLOCKING_H_
+/*!
+ **************************************************************************
+ * \file ih264d_deblocking.h
+ *
+ * \brief
+ * Declarations of deblocking functions
+ *
+ * \date
+ * 23/11/2002
+ *
+ * \author AI
+ **************************************************************************
+ */
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_structs.h"
+
+WORD8 ih264d_set_deblocking_parameters(deblk_mb_t * ps_cur_deblk_mb,
+ dec_slice_params_t * ps_slice,
+ UWORD8 u1_mb_ngbr_availablity,
+ UWORD8 u1_mb_field_decoding_flag);
+
+void FilterBoundaryLeft(tfr_ctxt_t * const ps_tfr_cxt,
+ const WORD8 i1_cb_qp_idx_ofst,
+ const WORD8 i1_cr_qp_idx_ofst,
+ deblk_mb_t * const ps_cur_mb,
+ UWORD16 u2_strd_y,
+ UWORD16 u2_strd_uv,
+ deblk_mb_t * const ps_left_mb,
+ const UWORD32 pu4_bs_tab[],
+ const UWORD8 u1_cur_fld);
+void FilterBoundaryTop(tfr_ctxt_t * const ps_tfr_cxt,
+ const WORD8 i1_cb_qp_idx_ofst,
+ const WORD8 i1_cr_qp_idx_ofst,
+ deblk_mb_t * const ps_cur_mb,
+ const UWORD16 u2_strd_y,
+ const UWORD16 u2_strd_uv,
+ deblk_mb_t * const ps_top_mb,
+ const UWORD32 u4_bs);
+void deblock_mb(tfr_ctxt_t * const ps_tfr_cxt,
+ const WORD8 i1_cb_qp_idx_ofst,
+ const WORD8 i1_cr_qp_idx_ofst,
+ deblk_mb_t * const ps_cur_mb,
+ WORD32 i4_strd_y,
+ WORD32 i4_strd_uv,
+ deblk_mb_t * const ps_top_mb,
+ deblk_mb_t * const ps_left_mb,
+ const UWORD8 u1_cur_fld,
+ const UWORD8 u1_extra_top_edge);
+void ih264d_deblock_mb_mbaff(dec_struct_t *ps_dec,
+ tfr_ctxt_t * const ps_tfr_cxt,
+ const WORD8 i1_cb_qp_idx_ofst,
+ const WORD8 i1_cr_qp_idx_ofst,
+ deblk_mb_t * const ps_cur_mb,
+ WORD32 i4_strd_y,
+ WORD32 i4_strd_uv,
+ deblk_mb_t * const ps_top_mb,
+ deblk_mb_t * const ps_left_mb,
+ const UWORD8 u1_cur_fld,
+ const UWORD8 u1_extra_top_edge);
+
+void ih264d_deblock_picture_mbaff(dec_struct_t * const ps_dec);
+
+void ih264d_deblock_picture_non_mbaff(dec_struct_t * const ps_dec);
+
+void ih264d_deblock_picture_progressive(dec_struct_t * const ps_dec);
+
+void ih264d_compute_bs_mbaff(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ const UWORD16 u2_mbxn_mb);
+void ih264d_compute_bs_non_mbaff(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ const UWORD16 u2_mbxn_mb);
+
+void ih264d_fill_bs_mbedge_2(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ const UWORD16 u2_mbxn_mb);
+
+void ih264d_fill_bs_mbedge_4(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ const UWORD16 u2_mbxn_mb);
+
+void ih264d_fill_bs1_16x16mb_pslice(mv_pred_t *ps_cur_mv_pred,
+ mv_pred_t *ps_top_mv_pred,
+ void **ppv_map_ref_idx_to_poc,
+ UWORD32 *pu4_bs_table,
+ mv_pred_t *ps_leftmost_mv_pred,
+ neighbouradd_t *ps_left_addr,
+ void **u4_pic_addrress,
+ WORD32 i4_ver_mvlimit);
+
+void ih264d_fill_bs1_non16x16mb_pslice(mv_pred_t *ps_cur_mv_pred,
+ mv_pred_t *ps_top_mv_pred,
+ void **ppv_map_ref_idx_to_poc,
+ UWORD32 *pu4_bs_table,
+ mv_pred_t *ps_leftmost_mv_pred,
+ neighbouradd_t *ps_left_addr,
+ void **u4_pic_addrress,
+ WORD32 i4_ver_mvlimit);
+
+void ih264d_fill_bs1_16x16mb_bslice(mv_pred_t *ps_cur_mv_pred,
+ mv_pred_t *ps_top_mv_pred,
+ void **ppv_map_ref_idx_to_poc,
+ UWORD32 *pu4_bs_table,
+ mv_pred_t *ps_leftmost_mv_pred,
+ neighbouradd_t *ps_left_addr,
+ void **u4_pic_addrress,
+ WORD32 i4_ver_mvlimit);
+
+void ih264d_fill_bs1_non16x16mb_bslice(mv_pred_t *ps_cur_mv_pred,
+ mv_pred_t *ps_top_mv_pred,
+ void **ppv_map_ref_idx_to_poc,
+ UWORD32 *pu4_bs_table,
+ mv_pred_t *ps_leftmost_mv_pred,
+ neighbouradd_t *ps_left_addr,
+ void **u4_pic_addrress,
+ WORD32 i4_ver_mvlimit);
+
+void ih264d_fill_bs_xtra_left_edge_cur_fld(UWORD32 *pu4_bs,
+ WORD32 u4_left_mb_t_csbp,
+ WORD32 u4_left_mb_b_csbp,
+ WORD32 u4_cur_mb_csbp,
+ UWORD32 u4_cur_mb_top);
+
+void ih264d_fill_bs_xtra_left_edge_cur_frm(UWORD32 *pu4_bs,
+ WORD32 u4_left_mb_t_csbp,
+ WORD32 u4_left_mb_b_csbp,
+ WORD32 u4_cur_mb_csbp,
+ UWORD32 u4_cur_mb_top);
+
+void ih264d_deblock_mb_nonmbaff(dec_struct_t *ps_dec,
+ tfr_ctxt_t * const ps_tfr_cxt,
+ const WORD8 i1_cb_qp_idx_ofst,
+ const WORD8 i1_cr_qp_idx_ofst,
+ deblk_mb_t * const ps_cur_mb,
+ WORD32 i4_strd_y,
+ WORD32 i4_strd_uv,
+ deblk_mb_t * const ps_top_mb,
+ deblk_mb_t * const ps_left_mb);
+
+void ih264d_init_deblk_tfr_ctxt(dec_struct_t * ps_dec,
+ pad_mgr_t *ps_pad_mgr,
+ tfr_ctxt_t *ps_tfr_cxt,
+ UWORD16 u2_image_wd_mb,
+ UWORD8 u1_mbaff);
+
+void ih264d_deblock_mb_level(dec_struct_t *ps_dec,
+ dec_mb_info_t *ps_cur_mb_info,
+ UWORD32 nmb_index);
+
+#endif /* _IH264D_DEBLOCKING_H_ */
diff --git a/decoder/ih264d_debug.c b/decoder/ih264d_debug.c
new file mode 100755
index 0000000..5650e20
--- /dev/null
+++ b/decoder/ih264d_debug.c
@@ -0,0 +1,40 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+ **************************************************************************
+ * \file ih264d_debug.c
+ *
+ * \brief
+ * Contains routines that can be used in debugging
+ *
+ * \date
+ * 20/11/2002
+ *
+ * \author AI
+ **************************************************************************
+ */
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_bitstrm.h"
+#include "ih264d_debug.h"
+#include "ih264d_defs.h"
+
diff --git a/decoder/ih264d_debug.h b/decoder/ih264d_debug.h
new file mode 100755
index 0000000..787b697
--- /dev/null
+++ b/decoder/ih264d_debug.h
@@ -0,0 +1,135 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef _IH264D_DEBUG_H_
+#define _IH264D_DEBUG_H_
+
+/*!
+ **************************************************************************
+ * \file ih264d_debug.h
+ *
+ * \brief
+ * Contains declarations used for debugging
+ *
+ * \date
+ * 2/12/2002
+ *
+ * \author AI
+ **************************************************************************
+ */
+#ifdef DEBUG_DEC
+#define H264_DEC_DEBUG_PRINT(...) printf("\n[H264_DEBUG] %s/%d:: ", __FUNCTION__, __LINE__);printf(__VA_ARGS__)
+#else //DEBUG_DEC
+#define H264_DEC_DEBUG_PRINT(...) {}
+#endif //DEBUG_DEC
+#define STRENGTH_DEBLOCKING 0 //sanjeev
+#define DEBUG_RECONSTRUCT_LUMA 0
+#define DEBUG_RECONSTRUCT_CHROMA 0
+
+#define DEBUG_IDCT 0
+#define DEBUG_LUMA_IDCT 0
+#define DEBUG_REF_IDCT 0
+
+#define BIN_BIT_RATIO 0
+#define MB_PART_HIST 0
+
+#define MB_INTRA_PREDICTION 1
+
+#ifdef WIN32
+#define CHK_PURIFY 0
+#else
+#define CHK_PURIFY 0
+#endif
+
+#if MB_INTRA_PREDICTION
+#define MB_INTRA_CHROMA_PREDICTION_ON 1
+#define MB_INTRA_4x4_PREDICTION_ON 1
+#define MB_INTRA_16x16_PREDICTION_ON 1
+#endif
+
+#define TRACE 0
+#define DEBUG_CABAC 0
+#define DEBUG_ABS_MVD 0
+#define DEBUG_INTRA_PRED_MODES 0
+#define DEBUG_DEBLOCKING 0
+
+#define COPYTHECONTEXT(s,val)
+#define PRINT_TRACE
+#define PRINT_TRACE_CAB
+#define SWITCHOFFTRACE
+#define SWITCHONTRACE
+#define SWITCHOFFTRACECABAC
+#define SWITCHONTRACECABAC
+
+#define INC_BIN_COUNT(ps_cab_env)
+#define INC_DECISION_BINS(ps_cab_env)
+#define INC_BYPASS_BINS(ps_cab_env)
+#define INC_SYM_COUNT(ps_cab_env)
+#define PRINT_BIN_BIT_RATIO(ps_dec)
+#define RESET_BIN_COUNTS(ps_cab_env)
+
+
+#ifdef PROFILE_DIS_DEBLK
+#define PROFILE_DISABLE_DEBLK() return;
+#else
+#define PROFILE_DISABLE_DEBLK() ;
+#endif
+
+#ifdef PROFILE_DIS_IQ_IT_RECON
+#define PROFILE_DISABLE_IQ_IT_RECON() if (0)
+#define PROFILE_DISABLE_IQ_IT_RECON_RETURN() return;
+#else
+#define PROFILE_DISABLE_IQ_IT_RECON() ;
+#define PROFILE_DISABLE_IQ_IT_RECON_RETURN() ;
+#endif
+
+#ifdef PROFILE_DIS_INTRA_PRED
+#define PROFILE_DISABLE_INTRA_PRED() if (0)
+#else
+#define PROFILE_DISABLE_INTRA_PRED() ;
+#endif
+
+#ifdef PROFILE_DIS_UNPACK
+#define PROFILE_DISABLE_UNPACK_LUMA() return 0;
+#define PROFILE_DISABLE_UNPACK_CHROMA() return ;
+#else
+#define PROFILE_DISABLE_UNPACK_LUMA() ;
+#define PROFILE_DISABLE_UNPACK_CHROMA() ;
+#endif
+
+#ifdef PROFILE_DIS_INTER_PRED
+#define PROFILE_DISABLE_INTER_PRED() return;
+#else
+#define PROFILE_DISABLE_INTER_PRED() ;
+#endif
+
+#ifdef PROFILE_DIS_BOUNDARY_STRENGTH
+#define PROFILE_DISABLE_BOUNDARY_STRENGTH() return;
+#else
+#define PROFILE_DISABLE_BOUNDARY_STRENGTH() ;
+#endif
+
+#ifdef PROFILE_DIS_MB_PART_INFO
+#define PROFILE_DISABLE_MB_PART_INFO() return 0;
+#else
+#define PROFILE_DISABLE_MB_PART_INFO() ;
+#endif
+
+#endif /* _IH264D_DEBUG_H_ */
+
diff --git a/decoder/ih264d_defs.h b/decoder/ih264d_defs.h
new file mode 100755
index 0000000..3f8bc58
--- /dev/null
+++ b/decoder/ih264d_defs.h
@@ -0,0 +1,671 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef _IH264D_DEFS_H_
+#define _IH264D_DEFS_H_
+
+/**
+ ************************************************************************
+ * \file ih264d_defs.h
+ *
+ * \brief
+ * Type definitions used in the code
+ *
+ * \date
+ * 19/11/2002
+ *
+ * \author Sriram Sethuraman
+ *
+ ************************************************************************
+ */
+#define H264_MAX_FRAME_WIDTH 3840
+#define H264_MAX_FRAME_HEIGHT 2160
+
+#define H264_MIN_FRAME_WIDTH 16
+#define H264_MIN_FRAME_HEIGHT 16
+
+#define IH264DEC_MAX_NAL_UNIT_SIZE 311040
+#define IH264DEC_NUM_ZEROS_IN_START_CODE 2
+#define H264DEC_MEM_ALLOC_SUCCESS 1
+#define H264DEC_MEM_ALLOC_FAILURE 0
+#define H264DEC_CREATE_FAILED (NULL)
+
+#define H264_NO_BUF_TO_DISPLAY -1
+#define H264_DISPLAY_BUF_FOUND 0
+#define IH264DEC_YUV420 0
+#define IH264DEC_YUV422 1
+#define IH264DEC_YUV422INTERLACED 2
+#define IH264DEC_RGB 4 // Original Size
+/* Ceiling of variables to the nearest power of 2 */
+#define FILL_POWEROF2(x,y) (size_t)(((x) & ((1<<(y))-1))?((1<<(y)) - ((x) & ((1<<(y))-1))): 0)
+#define ALIGN_POWEROF2(x,y) (x) = (x)+FILL_POWEROF2((size_t)(x),y)
+
+/** Bit manipulation macros */
+#define CHECKBIT(a,i) ((a) & (1 << i))
+#define CLEARBIT(a,i) ((a) &= ~(1 << i))
+
+/** Macro to convert a integer to a boolean value */
+#define BOOLEAN(x) (!!(x))
+
+/** Arithmetic operations */
+#define MOD(x,y) ((x)%(y))
+#define DIV(x,y) ((x)/(y))
+#define MUL(x,y) ((x)*(y))
+#define SIGN_POW2_DIV(x, y) (((x) < 0) ? (-((-(x)) >> (y))) : ((x) >> (y)))
+
+#define MB_ENABLE_FILTERING 0x00
+#define MB_DISABLE_FILTERING 0x01
+#define MB_DISABLE_TOP_EDGE 0x02
+#define MB_DISABLE_LEFT_EDGE 0x04
+
+/** Maximum number of reference pics */
+#define MAX_REF_BUFS 32
+#define MAX_DISP_BUFS_NEW 64
+#define MAX_FRAMES 16
+#define MAX_MBS_IN_ROW (720/16)
+#define INVALID_FRAME_NUM 0x0fffffff
+#define GAP_FRAME_NUM 0x1fffffff
+#define MAX_PIC_SIZE 622080 // 720 * 576 * 1.5
+/** macros for reference picture lists, refIdx to POC mapping */
+// 1 extra entry into reference picture lists for refIdx = -1.
+// this entry is always 0. this saves conditional checks in
+// FillBs modules.
+#define POC_LIST_L0_TO_L1_DIFF (( 2*MAX_FRAMES) + 1)
+#define POC_LIST_L0_TO_L1_DIFF_1 ((MAX_FRAMES) + 1)
+
+#define FRM_LIST_L0 0 //0
+#define FRM_LIST_L1 1 * POC_LIST_L0_TO_L1_DIFF//FRM_LIST_L0 + POC_LIST_L0_TO_L1_DIFF //0+33 //(1 * POC_LIST_L0_TO_L1_DIFF)
+#define TOP_LIST_FLD_L0 2 * POC_LIST_L0_TO_L1_DIFF//FRM_LIST_L1 + POC_LIST_L0_TO_L1_DIFF //0+33+33 //(2 * POC_LIST_L0_TO_L1_DIFF)
+#define TOP_LIST_FLD_L1 3 * POC_LIST_L0_TO_L1_DIFF//TOP_LIST_FLD_L0 + POC_LIST_L0_TO_L1_DIFF_1 //0+33+33+17 //(3 * POC_LIST_L0_TO_L1_DIFF)
+#define BOT_LIST_FLD_L0 4 * POC_LIST_L0_TO_L1_DIFF//TOP_LIST_FLD_L1 + POC_LIST_L0_TO_L1_DIFF_1 //0+33+33+17+17
+#define BOT_LIST_FLD_L1 5 * POC_LIST_L0_TO_L1_DIFF//BOT_LIST_FLD_L0 + POC_LIST_L0_TO_L1_DIFF_1 //0+33+33+17+17+17
+#define TOTAL_LIST_ENTRIES 6 * POC_LIST_L0_TO_L1_DIFF//BOT_LIST_FLD_L1 + POC_LIST_L0_TO_L1_DIFF_1 //0+33+33+17+17+17+17
+#define PAD_MV_BANK_ROW 64
+#define OFFSET_MV_BANK_ROW ((PAD_MV_BANK_ROW)>>1)
+#define PAD_PUC_CURNNZ 32
+#define OFFSET_PUC_CURNNZ (PAD_PUC_CURNNZ)
+#define PAD_MAP_IDX_POC (1)
+#define OFFSET_MAP_IDX_POC (1)
+
+#define OFFSET_MAP_IDX_POC (1)
+
+#define NAL_REF_IDC(nal_first_byte) ((nal_first_byte >> 5) & 0x3)
+#define NAL_FORBIDDEN_BIT(nal_first_byte) (nal_first_byte>>7)
+#define NAL_UNIT_TYPE(nal_first_byte) (nal_first_byte & 0x1F)
+
+#define INT_PIC_TYPE_I (0x00)
+
+#define YIELD_CNT_THRESHOLD 8
+#define ENABLE_420P_UV_SHARING 1
+
+#define OK 0
+#define END 1
+#define NOT_OK -1
+
+/* For 420SP */
+#define YUV420SP_FACTOR 2
+
+
+/**
+ ***************************************************************************
+ * Enum to hold various mem records being request
+ ****************************************************************************
+ */
+enum
+{
+ /**
+ * Codec Object at API level
+ */
+ MEM_REC_IV_OBJ,
+
+ /**
+ * Codec context
+ */
+ MEM_REC_CODEC,
+
+ /**
+ * Bitstream buffer which holds emulation prevention removed bytes
+ */
+ MEM_REC_BITSBUF,
+
+ /**
+ * Buffer to hold coeff data
+ */
+ MEM_REC_COEFF_DATA,
+
+ /**
+ * Motion vector bank
+ */
+ MEM_REC_MVBANK,
+
+ /**
+ * Holds mem records passed to the codec.
+ */
+ MEM_REC_BACKUP,
+
+ /**
+ * Holds SPS
+ */
+ MEM_REC_SPS,
+
+ /**
+ * Holds PPS
+ */
+ MEM_REC_PPS,
+
+ /**
+ * Holds Slice Headers
+ */
+ MEM_REC_SLICE_HDR,
+
+ /**
+ * Holds thread handles
+ */
+ MEM_REC_THREAD_HANDLE,
+
+ /**
+ * Contains i4_status map indicating parse i4_status per MB basis
+ */
+ MEM_REC_PARSE_MAP,
+
+ /**
+ * Contains i4_status map indicating processing i4_status per MB basis
+ */
+ MEM_REC_PROC_MAP,
+
+ /**
+ * Contains slice number info for each MB
+ */
+
+ MEM_REC_SLICE_NUM_MAP,
+
+ /**
+ * Holds dpb manager context
+ */
+ MEM_REC_DPB_MGR,
+
+ /**
+ * Holds neighbors' info
+ */
+ MEM_REC_NEIGHBOR_INFO,
+
+ /**
+ * Holds neighbors' info
+ */
+ MEM_REC_PRED_INFO,
+
+
+ /**
+ * Holds inter pred inforamation on packed format info
+ */
+ MEM_REC_PRED_INFO_PKD,
+ /**
+ * Holds neighbors' info
+ */
+ MEM_REC_MB_INFO,
+
+ /**
+ * Holds deblock Mb info structure frame level)
+ */
+ MEM_REC_DEBLK_MB_INFO,
+
+ /**
+ * Holds reference picture buffers in non-shared mode
+ */
+ MEM_REC_REF_PIC,
+
+ /**
+ * Holds some misc intermediate_buffers
+ */
+ MEM_REC_EXTRA_MEM,
+
+ /**
+ * Holds some misc intermediate_buffers
+ */
+ MEM_REC_INTERNAL_SCRATCH,
+
+ /**
+ * Holds some misc intermediate_buffers
+ */
+ MEM_REC_INTERNAL_PERSIST,
+
+ /* holds structures related to picture buffer manager*/
+ MEM_REC_PIC_BUF_MGR,
+
+ /*holds structure related to MV buffer manager*/
+ MEM_REC_MV_BUF_MGR,
+
+ /**
+ * Place holder to compute number of memory records.
+ */
+ MEM_REC_CNT
+/* Do not add anything below */
+};
+
+#ifdef DEBLOCK_THREAD
+#define H264_MUTEX_LOCK(lock) ithread_mutex_lock(lock)
+#define H264_MUTEX_UNLOCK(lock) ithread_mutex_unlock(lock)
+#else //DEBLOCK_THREAD
+#define H264_MUTEX_LOCK(lock)
+#define H264_MUTEX_UNLOCK(lock)
+
+#define DEBUG_THREADS_PRINTF(...)
+#define DEBUG_PERF_PRINTF(...)
+
+/** Profile Types*/
+#define BASE_PROFILE_IDC 66
+#define MAIN_PROFILE_IDC 77
+#define HIGH_PROFILE_IDC 100
+#define MAIN_PROFILE 1
+
+#define MB_SIZE 16
+#define BLK8x8SIZE 8
+#define BLK_SIZE 4
+#define NUM_BLKS_PER_MB 24
+#define NUM_LUM_BLKS_PER_MB 16
+#define LUM_BLK 0
+#define CHROM_BLK 1
+#define NUM_PELS_IN_MB 64
+
+/* Level Types */
+#define H264_LEVEL_1_0 10
+#define H264_LEVEL_1_1 11
+#define H264_LEVEL_1_2 12
+#define H264_LEVEL_1_3 13
+#define H264_LEVEL_2_0 20
+#define H264_LEVEL_2_1 21
+#define H264_LEVEL_2_2 22
+#define H264_LEVEL_3_0 30
+#define H264_LEVEL_3_1 31
+#define H264_LEVEL_3_2 32
+#define H264_LEVEL_4_0 40
+#define H264_LEVEL_4_1 41
+#define H264_LEVEL_4_2 42
+#define H264_LEVEL_5_0 50
+#define H264_LEVEL_5_1 51
+
+#define MAX_MBS_LEVEL_51 36864
+#define MAX_MBS_LEVEL_50 22080
+#define MAX_MBS_LEVEL_42 8704
+#define MAX_MBS_LEVEL_41 8192
+#define MAX_MBS_LEVEL_40 8192
+#define MAX_MBS_LEVEL_32 5120
+#define MAX_MBS_LEVEL_31 3600
+#define MAX_MBS_LEVEL_30 1620
+#define MAX_MBS_LEVEL_22 1620
+#define MAX_MBS_LEVEL_21 792
+#define MAX_MBS_LEVEL_20 396
+#define MAX_MBS_LEVEL_13 396
+#define MAX_MBS_LEVEL_12 396
+#define MAX_MBS_LEVEL_11 396
+#define MAX_MBS_LEVEL_10 99
+
+
+/*
+ | Legend:
+ | LVL Level*10
+ | MPR Macroblk processing rate
+ | MMF Max Mbs/Frm
+ | MDK Max DbpSize (in kB)
+ | MDB max DbpSize (in bytes)
+ | MFS FrmSizeYUV (in bytes)
+ | MDP Max DBPics
+ | MDC Ceiling DBPics
+ | FPS Frame/Second
+ |
+ | LVL MPR MMF MDK MDB MFS MDP MDC FPS
+ | 10 1485 99 148.5 152064 38016 4.00 4.00 15.00
+ | 11 3000 396 337.5 345600 152064 2.27 3.00 7.58
+ | 12 6000 396 891 912384 152064 6.00 6.00 15.15
+ | 13 11880 396 891 912384 152064 6.00 6.00 30.00
+ | 20 11880 396 891 912384 152064 6.00 6.00 30.00
+ | 21 19800 792 1782 1824768 304128 6.00 6.00 25.00
+ | 22 20250 1620 3037.5 3110400 622080 5.00 5.00 12.50
+ | 30 40500 1620 3037.5 3110400 622080 5.00 5.00 25.00
+ */
+#define MAX_REF_LEVEL_1_0 4
+#define MAX_REF_LEVEL_1_1 3
+#define MAX_REF_LEVEL_1_2 6
+#define MAX_REF_LEVEL_1_3 6
+#define MAX_REF_LEVEL_2_0 6
+#define MAX_REF_LEVEL_2_1 6
+#define MAX_REF_LEVEL_2_2 5
+#define MAX_REF_LEVEL_3_0 5
+#define H264_MAX_REF_PICS 16
+
+#define MIN_LEVEL_SUPPORTED 10
+#define MAX_LEVEL_SUPPORTED 64
+
+/** NAL Types */
+#define SLICE_NAL 1
+#define SLICE_DATA_PARTITION_A_NAL 2
+#define SLICE_DATA_PARTITION_B_NAL 3
+#define SLICE_DATA_PARTITION_C_NAL 4
+#define IDR_SLICE_NAL 5
+#define SEI_NAL 6
+#define SEQ_PARAM_NAL 7
+#define PIC_PARAM_NAL 8
+#define ACCESS_UNIT_DELIMITER_RBSP 9
+#define END_OF_SEQ_RBSP 10
+#define END_OF_STREAM_RBSP 11
+#define FILLER_DATA_NAL 12
+
+/** Entropy coding modes */
+#define CAVLC 0
+#define CABAC 1
+
+/** Picture Types */
+#define I_PIC 0
+#define IP_PIC 1
+#define IPB_PIC 2
+#define SI_PIC 3
+#define SIP_PIC 4
+#define ISI_PIC 5
+#define ISI_PSP_PIC 6
+#define ALL_PIC 7
+
+/* Frame or field picture type */
+#define FRM_PIC 0x00
+#define TOP_FLD 0x01
+#define BOT_FLD 0x02
+#define COMP_FLD_PAIR 0x03 /* TOP_FLD | BOT_FLD */
+#define AFRM_PIC 0x04
+#define TOP_REF 0x08
+#define BOT_REF 0x10
+#define PIC_MASK 0x03
+#define NON_EXISTING 0xff
+
+/* field picture type for display */
+#define DISP_TOP_FLD 0x00
+#define DISP_BOT_FLD 0x01
+
+/** Slice Types */
+#define P_SLICE 0
+#define B_SLICE 1
+#define I_SLICE 2
+#define SP_SLICE 3
+#define SI_SLICE 4
+
+/* Definition for picture skip */
+#define SKIP_NONE (0x0)
+#define I_SLC_BIT (0x1)
+#define P_SLC_BIT (0x2)
+#define B_SLC_BIT (0x4)
+
+/** Macros used for Deblocking */
+#define D_INTER_MB 0
+#define D_INTRA_MB 1
+#define D_PRED_NON_16x16 2
+#define D_B_SLICE 4
+#define D_B_SUBMB 6 //D_B_SLICE | D_PRED_NON_16x16 | D_INTER_MB
+#define D_FLD_MB 0x80
+
+/** Macros for Cabac checks */
+/** MbType */
+/** |x|x|I_PCM|SKIP|
+ |S|Inter/Intra|P/B|NON-BD16x16/BD16x16,I16x16/I4x4| */
+#define CAB_INTRA 0x00 /* 0000 00xx */
+#define CAB_INTER 0x04 /* 0000 01xx */
+#define CAB_I4x4 0x00 /* 0000 00x0 */
+#define CAB_I16x16 0x01 /* 0000 00x1 */
+#define CAB_BD16x16 0x04 /* 0000 0100 */
+#define CAB_NON_BD16x16 0x05 /* 0000 0101 */
+#define CAB_P 0x07 /* 0000 0111 */
+#define CAB_SI4x4 0x08 /* 0000 10x0 */
+#define CAB_SI16x16 0x09 /* 0000 10x1 */
+#define CAB_SKIP_MASK 0x10 /* 0001 0000 */
+#define CAB_SKIP 0x10 /* 0001 0000 */
+#define CAB_P_SKIP 0x16 /* 0001 x11x */
+#define CAB_B_SKIP 0x14 /* 0001 x100 */
+#define CAB_BD16x16_MASK 0x07 /* 0000 0111 */
+#define CAB_INTRA_MASK 0x04 /* 0000 0100 */
+#define CAB_I_PCM 0x20 /* 001x xxxx */
+
+/**< Binarization types for CABAC */
+/* |x|x|x|x|MSB_FIRST_FLC|FLC|TUNARY|UNARY| */
+#define UNARY 1
+#define TUNARY 2
+#define FLC 4
+#define MSB_FIRST_FLC 12
+
+/** Macroblock Types */
+#define I_4x4_MB 0
+#define I_16x16_MB 1
+#define P_MB 2
+#define B_MB 3
+#define SI_MB 4
+#define SP_MB 5
+#define I_PCM_MB 6
+
+#define SI4x4_MB 0xFF
+
+/** Intra luma 16x16 and chroma 8x8 prediction modes */
+#define NUM_INTRA_PRED_MODES 4
+#define VERT 0
+#define HORIZ 1
+#define DC 2
+#define PLANE 3
+#define NOT_VALID -1
+#define DC_DC_DC_DC 0x02020202 /*packed 4 bytes used in Decode Intra Mb*/
+
+/** Intra luma 4x4 prediction modes */
+#define NUM_INTRA4x4_PRED_MODES 9
+
+/** VERT, HORIZ, DC are applicable to 4x4 as well */
+/** D - Down; U - Up; L - Left; R - Right */
+#define DIAG_DL 3
+#define DIAG_DR 4
+#define VERT_R 5
+#define HORIZ_D 6
+#define VERT_L 7
+#define HORIZ_U 8
+
+/** P_MB prediction modes */
+#define NUM_INTER_MB_PRED_MODES 5
+#define PRED_16x16 0
+#define PRED_16x8 1
+#define PRED_8x16 2
+#define PRED_8x8 3
+#define PRED_8x8R0 4
+#define MAGIC_16x16 5
+#define MB_SKIP 255
+
+/* P_MB submb modes */
+#define P_L0_8x8 0
+#define P_L0_8x4 1
+#define P_L0_4x8 2
+#define P_L0_4x4 3
+
+/* B_MB submb modes */
+#define B_DIRECT_8x8 0
+#define B_L0_8x8 1
+#define B_L1_8x8 2
+#define B_BI_8x8 3
+#define B_L0_8x4 4
+#define B_L0_4x8 5
+#define B_L1_8x4 6
+#define B_L1_4x8 7
+#define B_BI_8x4 8
+#define B_BI_4x8 9
+#define B_L0_4x4 10
+#define B_L1_4x4 11
+#define B_BI_4x4 12
+
+/** B_MB prediction modes */
+#define B_8x8 22
+#define PRED_INVALID -1
+#define B_DIRECT 0
+#define PRED_L0 1
+#define PRED_L1 2
+#define BI_PRED 3
+#define B_DIRECT_BI_PRED 23
+#define B_DIRECT_PRED_L0 24
+#define B_DIRECT_PRED_L1 25
+#define B_DIRECT_SPATIAL 26
+
+#define B_DIRECT8x8_BI_PRED 13
+#define B_DIRECT8x8_PRED_L0 14
+#define B_DIRECT8x8_PRED_L1 15
+
+#define ONE_TO_ONE 0
+#define FRM_TO_FLD 1
+#define FLD_TO_FRM 2
+
+/** Inter Sub MB Pred modes */
+#define NUM_INTER_SUBMB_PRED_MODES 4
+#define SUBMB_8x8 0
+#define SUBMB_8x4 1
+#define SUBMB_4x8 2
+#define SUBMB_4x4 3
+
+/** Coded Block Pattern - Chroma */
+#define CBPC_ALLZERO 0
+#define CBPC_ACZERO 1
+#define CBPC_NONZERO 2
+
+/** Index for accessing the left MB in the MV predictor array */
+#define LEFT 0
+/** Index for accessing the top MB in the MV predictor array */
+#define TOP 1
+/** Index for accessing the top right MB in the MV predictor array */
+#define TOP_R 2
+/** Index for accessing the top Left MB in the MV predictor array */
+#define TOP_L 3
+
+/** Maximum number of Sequence Parameter sets */
+#define MAX_NUM_SEQ_PARAMS 32
+
+/** Maximum number of Picture Parameter sets */
+#define MAX_NUM_PIC_PARAMS 256
+
+#define MASK_ERR_SEQ_SET_ID (0xFFFFFFE0)
+#define MASK_ERR_PIC_SET_ID (0xFFFFFF00)
+
+#define MAX_PIC_ORDER_CNT_TYPE 2
+
+#define MAX_BITS_IN_FRAME_NUM 16
+#define MAX_BITS_IN_POC_LSB 16
+
+#define H264_MAX_REF_PICS 16
+#define H264_MAX_REF_IDX 32
+#define MAX_WEIGHT_BIPRED_IDC 2
+#define MAX_CABAC_INIT_IDC 2
+
+#define H264_DEFAULT_NUM_CORES 1
+#define DEFAULT_SEPARATE_PARSE (H264_DEFAULT_NUM_CORES == 2)? 1 :0
+
+/** Maximum number of Slice groups */
+#define MAX_NUM_SLICE_GROUPS 8
+#define MAX_NUM_REF_FRAMES_OFFSET 255
+
+/** Deblocking modes for a slice */
+#define SLICE_BOUNDARY_DBLK_DISABLED 2
+#define DBLK_DISABLED 1
+#define DBLK_ENABLED 0
+#define MIN_DBLK_FIL_OFF -12
+#define MAX_DBLK_FIL_OFF 12
+
+/** Width of the predictor buffers used for MC */
+#define MB_SIZE 16
+#define BLK8x8SIZE 8
+#define BLK_SIZE 4
+#define NUM_BLKS_PER_MB 24
+#define NUM_LUM_BLKS_PER_MB 16
+
+#define SUB_BLK_WIDTH 4
+#define SUB_SUB_BLK_SIZE 4 /* 2x2 pixel i4_size */
+#define SUB_BLK_SIZE ((SUB_BLK_WIDTH) * (SUB_BLK_WIDTH))
+#define MB_LUM_SIZE 256
+#define MB_CHROM_SIZE 64
+
+/**< Width to pad the luminance frame buff */
+/**< Height to pad the luminance frame buff */
+/**< Width to pad the chrominance frame buff */
+/**< Height to pad the chrominance frame buff */
+
+#define PAD_LEN_Y_H 32
+#define PAD_LEN_Y_V 20
+#define PAD_LEN_UV_H 16
+#define PAD_LEN_UV_V 8
+
+#define PAD_MV_BANK_ROW 64
+
+/**< Maimum u4_ofst by which the Mvs could point outside the frame buffers
+ horizontally in the left and vertically in the top direction */
+#define MAX_OFFSET_OUTSIDE_X_FRM -20
+#define MAX_OFFSET_OUTSIDE_Y_FRM -20
+#define MAX_OFFSET_OUTSIDE_UV_FRM -8
+
+/** UVLC parsing macros */
+#define UEV 1
+#define SEV 2
+#define TEV 3
+
+/** Defines for Boolean values */
+#ifndef TRUE
+#define TRUE 1
+#define FALSE 0
+#endif
+
+#define UNUSED_FOR_REF 0
+#define IS_SHORT_TERM 1
+#define IS_LONG_TERM 2
+
+/** Defines for which field gets displayed first */
+#define MAX_FRAMES 16
+#define INVALID_FRAME_NUM 0x0fffffff
+#define DO_NOT_DISP 254
+#define DISP_FLD_FIRST_UNDEF 0
+#define DISP_TOP_FLD_FIRST 1
+#define DISP_BOT_FLD_FIRST 2
+
+/** Misc error resilience requirements*/
+#define MASK_LOG2_WEIGHT_DENOM 0xFFFFFFF8
+#define MASK_PRED_WEIGHT_OFFSET 0xFFFFFF00
+#define MAX_REDUNDANT_PIC_CNT 127
+
+#define DPB_HACK 0
+#define DPB_HACK_NEW 0
+
+
+
+#define PD_MB_BUF_SIZE (H264_MAX_FRAME_WIDTH * H264_MAX_FRAME_WIDTH / 256)
+#define PD_MB_BUF_SIZE_MOD 0xffffffff
+#define MAX_PRED_INFO_LIMIT (PD_MB_BUF_SIZE * 32 * 2)
+
+#endif //DEBLOCK_THREAD
+
+
+#define NO_DC_SB 0
+#define SUB_BLK_MASK 0xFFFFFF00
+#define NUM_COEFFS_IN_4x4BLK 16
+
+
+#define MEMSET_16BYTES(pu4_start,value) \
+ { \
+ memset(pu4_start,value,16); \
+ }
+
+#define MEMCPY_16BYTES(dst,src) \
+{ \
+ memcpy(dst,src,16); \
+}
+
+
+#endif /*_IH264D_DEFS_H_*/
diff --git a/decoder/ih264d_dpb_manager.h b/decoder/ih264d_dpb_manager.h
new file mode 100755
index 0000000..a9539c8
--- /dev/null
+++ b/decoder/ih264d_dpb_manager.h
@@ -0,0 +1,173 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef _IH264D_DPB_MANAGER_H_
+#define _IH264D_DPB_MANAGER_H_
+/*!
+***************************************************************************
+* \file ih264d_dpb_manager.h
+*
+* \brief
+* Decoded Picture Buffer Manager Include File
+*
+* Detailed_description
+*
+* \date
+* 19-12-2002
+*
+* \author Sriram Sethuraman
+***************************************************************************
+*/
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_bitstrm.h"
+#include "ih264d_defs.h"
+
+#define END_OF_MMCO 0
+#define MARK_ST_PICNUM_AS_NONREF 1
+#define MARK_LT_INDEX_AS_NONREF 2
+#define MARK_ST_PICNUM_AS_LT_INDEX 3
+#define SET_MAX_LT_INDEX 4
+#define RESET_REF_PICTURES 5
+#define SET_LT_INDEX 6
+#define RESET_NONREF_PICTURES 7
+#define RESET_ALL_PICTURES 8
+
+struct field_t
+{
+ /* picNum of tbe reference field */
+ WORD32 i4_pic_num;
+
+ /* assigned when used for long term reference */
+ /* else MAX_REF_BUFS+1 */
+ UWORD8 u1_long_term_frame_idx;
+
+ /* 0 : unused for reference */
+ /* 1 : used for short term reference */
+ /* 2 : used for long term reference */
+ UWORD8 u1_reference_info;
+};
+
+
+struct dpb_info_t
+{
+ struct pic_buffer_t *ps_pic_buf; /** Pointer to picture buffer structure */
+ WORD32 i4_frame_num; /** frame number of picture - unique for each ref*/
+ struct dpb_info_t *ps_prev_short;/** Link to the DPB with previous picNum */
+ struct dpb_info_t *ps_prev_long; /** Link to the DPB with previous long term frame*/
+ struct field_t s_top_field; /** Contains information of the top_field
+ reference info, pic num and longt term frame idx */
+ struct field_t s_bot_field; /** Contains information of the bot_field
+ reference info, pic num and longt term frame idx */
+ UWORD8 u1_buf_id; /** bufID from bufAPI */
+ UWORD8 u1_used_as_ref; /** whether buffer is used as ref for frame or
+ complementary reference field pair */
+ UWORD8 u1_lt_idx; /** If buf is assigned long-term index; else MAX_REF_BUFS+1 */
+
+};
+
+typedef struct
+{
+ struct pic_buffer_t *ps_def_dpb[MAX_REF_BUFS];/** DPB in default index order */
+ struct pic_buffer_t *ps_mod_dpb[2][2 * MAX_REF_BUFS];/** DPB in reordered index order, 0-fwd,1-bwd */
+ struct pic_buffer_t *ps_init_dpb[2][2 * MAX_REF_BUFS];/** DPB in reordered index order, 0-fwd,1-bwd */
+ struct dpb_info_t *ps_dpb_st_head; /** Pointer to the most recent picNum */
+ struct dpb_info_t *ps_dpb_ht_head; /** Pointer to the smallest LT index */
+ struct dpb_info_t as_dpb_info[MAX_REF_BUFS]; /** Physical storage for dpbInfo for ref bufs */
+ UWORD8 u1_num_st_ref_bufs; /** Number of short term ref. buffers */
+ UWORD8 u1_num_lt_ref_bufs; /** Number of long term ref. buffer */
+ UWORD8 u1_max_lt_pic_idx_plus1; /** Maximum long term pictures - 0 to max_long_term_pic_idx */
+ UWORD8 u1_num_gaps; /** Total number of outstanding gaps */
+ void * pv_codec_handle; /* For Error Handling */
+ WORD32 i4_max_frm_num; /** Max frame number */
+ WORD32 ai4_gaps_start_frm_num[MAX_FRAMES];/** start frame number for a gap seqn */
+ WORD32 ai4_gaps_end_frm_num[MAX_FRAMES]; /** start frame number for a gap seqn */
+ WORD8 ai1_gaps_per_seq[MAX_FRAMES]; /** number of gaps with each gap seqn */
+ WORD32 ai4_poc_buf_id_map[MAX_FRAMES][3];
+ WORD8 i1_poc_buf_id_entries;
+ WORD8 i1_gaps_deleted;
+ UWORD16 u2_pic_wd;
+ UWORD16 u2_pic_ht;
+}dpb_manager_t;
+
+/** Structure store the MMC Commands */
+struct MMCParams
+{
+ UWORD32 u4_mmco; /** memory managemet control operation */
+ UWORD32 u4_diff_pic_num; /** diff Of Pic Nums Minus1 */
+ UWORD32 u4_lt_idx; /** Long Term Pic Idx */
+ UWORD32 u4_max_lt_idx_plus1; /** MaxLongTermPicIdxPlus1 */
+};
+
+typedef struct
+{
+ UWORD8 u1_dpb_commands_read; /** Flag to indicate that DBP commands are read */
+ UWORD8 u1_buf_mode; /** decoder Pic bugffering mode*/
+ UWORD8 u1_num_of_commands; /** Number of MMC commands */
+ /* These variables are ised in case of IDR pictures only */
+ UWORD8 u1_idr_pic; /** = 1 ,IDR pic */
+ UWORD8 u1_no_output_of_prior_pics_flag;
+ UWORD8 u1_long_term_reference_flag;
+ struct MMCParams as_mmc_params[MAX_REF_BUFS]; /* < Buffer to store MMC commands */
+ UWORD8 u1_dpb_commands_read_slc;
+}dpb_commands_t;
+
+void ih264d_init_ref_bufs(dpb_manager_t *ps_dpb_mgr);
+
+WORD32 ih264d_insert_st_node(dpb_manager_t *ps_dpb_mgr,
+ struct pic_buffer_t *ps_pic_buf,
+ UWORD8 u1_buf_id,
+ UWORD32 u2_cur_pic_num);
+WORD32 ih264d_update_default_index_list(dpb_manager_t *ps_dpb_mgr);
+WORD32 ih264d_do_mmco_buffer(dpb_commands_t *ps_dpb_cmds,
+ dpb_manager_t *ps_dpb_mgr,
+ UWORD8 u1_numRef_frames_for_seq,
+ UWORD32 u4_cur_pic_num,
+ UWORD32 u2_u4_max_pic_num_minus1,
+ UWORD8 u1_nal_unit_type,
+ struct pic_buffer_t *ps_pic_buf,
+ UWORD8 u1_buf_id,
+ UWORD8 u1_fld_pic_flag,
+ UWORD8 u1_curr_pic_in_err);
+void ih264d_release_pics_in_dpb(void *pv_dec,
+ UWORD8 u1_disp_bufs);
+void ih264d_reset_ref_bufs(dpb_manager_t *ps_dpb_mgr);
+WORD32 ih264d_delete_st_node_or_make_lt(dpb_manager_t *ps_dpb_mgr,
+ WORD32 u4_pic_num,
+ UWORD32 u4_lt_idx,
+ UWORD8 u1_fld_pic_flag);
+
+WORD32 ih264d_delete_gap_frm_mmco(dpb_manager_t *ps_dpb_mgr,
+ WORD32 i4_frame_num,
+ UWORD8 *pu1_del_node);
+
+WORD32 ih264d_delete_gap_frm_sliding(dpb_manager_t *ps_dpb_mgr,
+ WORD32 i4_frame_num,
+ UWORD8 *pu1_del_node);
+
+WORD32 ih264d_do_mmco_for_gaps(dpb_manager_t *ps_dpb_mgr,
+ UWORD8 u1_num_ref_frames);
+
+WORD32 ih264d_insert_pic_in_display_list(dpb_manager_t *ps_dpb_mgr,
+ UWORD8 u1_buf_id,
+ WORD32 i4_display_poc,
+ UWORD32 u4_frame_num);
+void ih264d_delete_nonref_nondisplay_pics(dpb_manager_t *ps_dpb_mgr);
+#endif /* _IH264D_DPB_MANAGER_H_ */
diff --git a/decoder/ih264d_dpb_mgr.c b/decoder/ih264d_dpb_mgr.c
new file mode 100755
index 0000000..205bc9b
--- /dev/null
+++ b/decoder/ih264d_dpb_mgr.c
@@ -0,0 +1,1987 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "iv.h"
+#include "ih264d_dpb_manager.h"
+#include "ih264d_bitstrm.h"
+#include "ih264d_parse_cavlc.h"
+#include "ih264d_defs.h"
+#include "ih264d_structs.h"
+#include "ih264d_process_bslice.h"
+#include "ih264d_debug.h"
+#include "ih264d_tables.h"
+#include "ih264d_error_handler.h"
+#include "string.h"
+#include "ih264d_defs.h"
+#include "ih264_error.h"
+#include "ih264_buf_mgr.h"
+#include "assert.h"
+
+/*!
+ ***************************************************************************
+ * \file ih264d_dpb_mgr.c
+ *
+ * \brief
+ * Functions for managing the decoded picture buffer
+ *
+ * Detailed_description
+ *
+ * \date
+ * 19-12-2002
+ *
+ * \author Sriram Sethuraman
+ ***************************************************************************
+ */
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_init_ref_bufs \endif
+ *
+ * \brief
+ * Called at the start for initialization.
+ *
+ * \return
+ * none
+ **************************************************************************
+ */
+void ih264d_init_ref_bufs(dpb_manager_t *ps_dpb_mgr)
+{
+ UWORD32 i;
+ struct dpb_info_t *ps_dpb_info = ps_dpb_mgr->as_dpb_info;
+ for(i = 0; i < MAX_REF_BUFS; i++)
+ {
+ ps_dpb_info[i].u1_used_as_ref = UNUSED_FOR_REF;
+ ps_dpb_info[i].u1_lt_idx = MAX_REF_BUFS + 1;
+ ps_dpb_info[i].ps_prev_short = NULL;
+ ps_dpb_info[i].ps_prev_long = NULL;
+ ps_dpb_info[i].ps_pic_buf = NULL;
+ ps_dpb_info[i].s_top_field.u1_reference_info = UNUSED_FOR_REF;
+ ps_dpb_info[i].s_bot_field.u1_reference_info = UNUSED_FOR_REF;
+ ps_dpb_info[i].s_top_field.u1_long_term_frame_idx = MAX_REF_BUFS + 1;
+ ps_dpb_info[i].s_bot_field.u1_long_term_frame_idx = MAX_REF_BUFS + 1;
+
+ }
+ ps_dpb_mgr->u1_num_st_ref_bufs = ps_dpb_mgr->u1_num_lt_ref_bufs = 0;
+ ps_dpb_mgr->ps_dpb_st_head = NULL;
+ ps_dpb_mgr->ps_dpb_ht_head = NULL;
+ ps_dpb_mgr->i1_gaps_deleted = 0;
+ ps_dpb_mgr->i1_poc_buf_id_entries = 0;
+
+ ps_dpb_mgr->u1_num_gaps = 0;
+ for(i = 0; i < MAX_FRAMES; i++)
+ {
+ ps_dpb_mgr->ai4_gaps_start_frm_num[i] = INVALID_FRAME_NUM;
+ ps_dpb_mgr->ai4_gaps_end_frm_num[i] = 0;
+ ps_dpb_mgr->ai1_gaps_per_seq[i] = 0;
+ ps_dpb_mgr->ai4_poc_buf_id_map[i][0] = -1;
+ ps_dpb_mgr->ai4_poc_buf_id_map[i][1] = 0x7fffffff;
+ ps_dpb_mgr->ai4_poc_buf_id_map[i][2] = 0;
+ }
+
+}
+
+void ih264d_free_ref_pic_mv_bufs(void* pv_dec, UWORD8 pic_buf_id)
+{
+ dec_struct_t *ps_dec = (dec_struct_t *)pv_dec;
+
+ if((pic_buf_id == ps_dec->u1_pic_buf_id) &&
+ ps_dec->ps_cur_slice->u1_field_pic_flag &&
+ (ps_dec->u1_top_bottom_decoded == 0))
+ {
+ return;
+ }
+
+ ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
+ pic_buf_id,
+ BUF_MGR_REF);
+ ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_mv_buf_mgr,
+ ps_dec->au1_pic_buf_id_mv_buf_id_map[pic_buf_id],
+ BUF_MGR_REF);
+}
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_delete_lt_node \endif
+ *
+ * \brief
+ * Delete a buffer with a long term index from the LT linked list
+ *
+ * \return
+ * none
+ **************************************************************************
+ */
+WORD32 ih264d_delete_lt_node(dpb_manager_t *ps_dpb_mgr,
+ UWORD32 u4_lt_idx,
+ UWORD8 u1_fld_pic_flag,
+ struct dpb_info_t *ps_lt_node_to_insert,
+ WORD32 *pi4_status)
+{
+ *pi4_status = 0;
+ if(ps_dpb_mgr->u1_num_lt_ref_bufs > 0)
+ {
+ WORD32 i;
+ struct dpb_info_t *ps_next_dpb;
+ /* ps_unmark_node points to the node to be removed */
+ /* from long term list. */
+ struct dpb_info_t *ps_unmark_node;
+ //Find the node with matching LTIndex
+ ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head;
+ if(ps_next_dpb->u1_lt_idx == u4_lt_idx)
+ {
+ ps_unmark_node = ps_next_dpb;
+ }
+ else
+ {
+ for(i = 1; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++)
+ {
+ if(ps_next_dpb->ps_prev_long->u1_lt_idx == u4_lt_idx)
+ break;
+ ps_next_dpb = ps_next_dpb->ps_prev_long;
+ }
+ if(i == ps_dpb_mgr->u1_num_lt_ref_bufs)
+ *pi4_status = 1;
+ else
+ ps_unmark_node = ps_next_dpb->ps_prev_long;
+ }
+
+ if(*pi4_status == 0)
+ {
+ if(u1_fld_pic_flag)
+ {
+ if(ps_lt_node_to_insert != ps_unmark_node)
+ {
+ UWORD8 u1_deleted = 0;
+ /* for the ps_unmark_node mark the corresponding field */
+ /* field as unused for reference */
+
+ if(ps_unmark_node->s_top_field.u1_long_term_frame_idx
+ == u4_lt_idx)
+ {
+ ps_unmark_node->s_top_field.u1_reference_info =
+ UNUSED_FOR_REF;
+ ps_unmark_node->s_top_field.u1_long_term_frame_idx =
+ MAX_REF_BUFS + 1;
+ u1_deleted = 1;
+ }
+ if(ps_unmark_node->s_bot_field.u1_long_term_frame_idx
+ == u4_lt_idx)
+ {
+ ps_unmark_node->s_bot_field.u1_reference_info =
+ UNUSED_FOR_REF;
+ ps_unmark_node->s_bot_field.u1_long_term_frame_idx =
+ MAX_REF_BUFS + 1;
+ u1_deleted = 1;
+ }
+
+ if(!u1_deleted)
+ {
+
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_DBP_MANAGER_T;
+
+ return i4_error_code;
+ }
+ }
+
+ ps_unmark_node->u1_used_as_ref =
+ ps_unmark_node->s_top_field.u1_reference_info
+ | ps_unmark_node->s_bot_field.u1_reference_info;
+ }
+ else
+ ps_unmark_node->u1_used_as_ref = UNUSED_FOR_REF;
+
+ if(UNUSED_FOR_REF == ps_unmark_node->u1_used_as_ref)
+ {
+ if(ps_unmark_node == ps_dpb_mgr->ps_dpb_ht_head)
+ ps_dpb_mgr->ps_dpb_ht_head = ps_next_dpb->ps_prev_long;
+
+ ps_unmark_node->u1_lt_idx = MAX_REF_BUFS + 1;
+ ps_unmark_node->s_top_field.u1_reference_info =
+ UNUSED_FOR_REF;
+ ps_unmark_node->s_bot_field.u1_reference_info =
+ UNUSED_FOR_REF;
+ // Release the physical buffer
+ ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle,
+ ps_unmark_node->u1_buf_id);
+ ps_next_dpb->ps_prev_long = ps_unmark_node->ps_prev_long; //update link
+ ps_unmark_node->ps_prev_long = NULL;
+ ps_dpb_mgr->u1_num_lt_ref_bufs--; //decrement LT buf count
+ }
+ }
+ }
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_insert_lt_node \endif
+ *
+ * \brief
+ * Insert a buffer into the LT linked list at a given LT index
+ *
+ * \return
+ * none
+ **************************************************************************
+ */
+WORD32 ih264d_insert_lt_node(dpb_manager_t *ps_dpb_mgr,
+ struct dpb_info_t *ps_mov_node,
+ UWORD32 u4_lt_idx,
+ UWORD8 u1_fld_pic_flag)
+{
+ UWORD8 u1_mark_top_field_long_term = 0;
+ UWORD8 u1_mark_bot_field_long_term = 0;
+
+ {
+ if(u1_fld_pic_flag)
+ {
+ /* Assign corresponding field (top or bottom) long_term_frame_idx */
+
+ if((ps_mov_node->s_top_field.u1_reference_info == IS_LONG_TERM)
+ && (ps_mov_node->s_bot_field.u1_reference_info
+ == IS_LONG_TERM))
+ {
+ if(ps_mov_node->u1_lt_idx == u4_lt_idx)
+ u1_mark_bot_field_long_term = 1;
+ else
+ {
+
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_DBP_MANAGER_T;
+
+ return i4_error_code;
+
+ }
+ }
+ else if(ps_mov_node->s_top_field.u1_reference_info == IS_LONG_TERM)
+ {
+ u1_mark_top_field_long_term = 1;
+ }
+
+ if(!(u1_mark_top_field_long_term || u1_mark_bot_field_long_term))
+ {
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_DBP_MANAGER_T;
+ return i4_error_code;
+ }
+ }
+ else
+ {
+ ps_mov_node->s_top_field.u1_reference_info = IS_LONG_TERM;
+ ps_mov_node->s_bot_field.u1_reference_info = IS_LONG_TERM;
+ ps_mov_node->s_top_field.u1_long_term_frame_idx = u4_lt_idx;
+ ps_mov_node->s_bot_field.u1_long_term_frame_idx = u4_lt_idx;
+ }
+
+ ps_mov_node->u1_lt_idx = u4_lt_idx; //Assign the LT index to the node
+ ps_mov_node->ps_pic_buf->u1_long_term_frm_idx = u4_lt_idx;
+ ps_mov_node->u1_used_as_ref = IS_LONG_TERM;
+
+ /* Insert the new long term in the LT list with u4_lt_idx */
+ /* in ascending order. */
+ if(ps_dpb_mgr->u1_num_lt_ref_bufs > 0)
+ {
+ struct dpb_info_t *ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head;
+ if(u4_lt_idx < ps_next_dpb->u1_lt_idx)
+ {
+ //LTIndex to be inserted is the smallest LT index
+ //Update head and point prev to the next higher index
+ ps_mov_node->ps_prev_long = ps_next_dpb;
+ ps_dpb_mgr->ps_dpb_ht_head = ps_mov_node;
+ }
+ else
+ {
+ WORD32 i;
+ struct dpb_info_t *ps_nxtDPB = ps_next_dpb;
+ ps_next_dpb = ps_next_dpb->ps_prev_long;
+ for(i = 1; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++)
+ {
+ if(ps_next_dpb->u1_lt_idx > u4_lt_idx)
+ break;
+ ps_nxtDPB = ps_next_dpb;
+ ps_next_dpb = ps_next_dpb->ps_prev_long;
+ }
+
+ ps_nxtDPB->ps_prev_long = ps_mov_node;
+ ps_mov_node->ps_prev_long = ps_next_dpb;
+ }
+ }
+ else
+ {
+ ps_dpb_mgr->ps_dpb_ht_head = ps_mov_node;
+ ps_mov_node->ps_prev_long = NULL;
+ }
+ /* Identify the picture buffer as a long term picture buffer */
+ ps_mov_node->ps_pic_buf->u1_is_short = 0;
+
+ /* Increment LT buf count only if new LT node inserted */
+ /* If Increment during top_field is done, don't increment */
+ /* for bottom field, as both them are part of same pic. */
+ if(!u1_mark_bot_field_long_term)
+ ps_dpb_mgr->u1_num_lt_ref_bufs++;
+
+ }
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_insert_st_node \endif
+ *
+ * \brief
+ * Adds a short term reference picture into the ST linked list
+ *
+ * \return
+ * None
+ *
+ * \note
+ * Called only for a new coded picture with nal_ref_idc!=0
+ **************************************************************************
+ */
+WORD32 ih264d_insert_st_node(dpb_manager_t *ps_dpb_mgr,
+ struct pic_buffer_t *ps_pic_buf,
+ UWORD8 u1_buf_id,
+ UWORD32 u4_cur_pic_num)
+{
+ WORD32 i;
+ struct dpb_info_t *ps_dpb_info = ps_dpb_mgr->as_dpb_info;
+ UWORD8 u1_picture_type = ps_pic_buf->u1_picturetype;
+ /* Find an unused dpb location */
+ for(i = 0; i < MAX_REF_BUFS; i++)
+ {
+ if((ps_dpb_info[i].ps_pic_buf == ps_pic_buf)
+ && ps_dpb_info[i].u1_used_as_ref)
+ {
+ /* Can occur only for field bottom pictures */
+ ps_dpb_info[i].s_bot_field.u1_reference_info = IS_SHORT_TERM;
+ return 0;
+ }
+
+ if((ps_dpb_info[i].u1_used_as_ref == UNUSED_FOR_REF)
+ && (ps_dpb_info[i].s_top_field.u1_reference_info
+ == UNUSED_FOR_REF)
+ && (ps_dpb_info[i].s_bot_field.u1_reference_info
+ == UNUSED_FOR_REF))
+ break;
+ }
+ if(i == MAX_REF_BUFS)
+ {
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_DBP_MANAGER_T;
+ return i4_error_code;
+ }
+
+ /* Create dpb info */
+ ps_dpb_info[i].ps_pic_buf = ps_pic_buf;
+ ps_dpb_info[i].ps_prev_short = ps_dpb_mgr->ps_dpb_st_head;
+ ps_dpb_info[i].u1_buf_id = u1_buf_id;
+ ps_dpb_info[i].u1_used_as_ref = TRUE;
+ ps_dpb_info[i].u1_lt_idx = MAX_REF_BUFS + 1;
+ ps_dpb_info[i].i4_frame_num = u4_cur_pic_num;
+ ps_dpb_info[i].ps_pic_buf->i4_frame_num = u4_cur_pic_num;
+
+ /* update the head node of linked list to point to the cur Pic */
+ ps_dpb_mgr->ps_dpb_st_head = ps_dpb_info + i;
+
+ // Increment Short term bufCount
+ ps_dpb_mgr->u1_num_st_ref_bufs++;
+ /* Identify the picture as a short term picture buffer */
+ ps_pic_buf->u1_is_short = IS_SHORT_TERM;
+
+ if((u1_picture_type & 0x03) == FRM_PIC)
+ {
+ ps_dpb_info[i].u1_used_as_ref = IS_SHORT_TERM;
+ ps_dpb_info[i].s_top_field.u1_reference_info = IS_SHORT_TERM;
+ ps_dpb_info[i].s_bot_field.u1_reference_info = IS_SHORT_TERM;
+ }
+
+ if((u1_picture_type & 0x03) == TOP_FLD)
+ ps_dpb_info[i].s_top_field.u1_reference_info = IS_SHORT_TERM;
+
+ if((u1_picture_type & 0x03) == BOT_FLD)
+ ps_dpb_info[i].s_bot_field.u1_reference_info = IS_SHORT_TERM;
+
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_delete_st_node_or_make_lt \endif
+ *
+ * \brief
+ * Delete short term ref with a given picNum from the ST linked list or
+ * make it an LT node
+ *
+ * \return
+ * 0 - if successful; -1 - otherwise
+ *
+ * \note
+ * Common parts to MMCO==1 and MMCO==3 have been combined here
+ **************************************************************************
+ */
+WORD32 ih264d_delete_st_node_or_make_lt(dpb_manager_t *ps_dpb_mgr,
+ WORD32 i4_pic_num,
+ UWORD32 u4_lt_idx,
+ UWORD8 u1_fld_pic_flag)
+{
+ WORD32 i;
+ struct dpb_info_t *ps_next_dpb;
+ WORD32 i4_frame_num = i4_pic_num;
+ struct dpb_info_t *ps_unmark_node = NULL;
+ UWORD8 u1_del_node = 0, u1_del_st = 0;
+ UWORD8 u1_reference_type = UNUSED_FOR_REF;
+ WORD32 ret;
+
+ if(u1_fld_pic_flag)
+ {
+ i4_frame_num = i4_frame_num >> 1;
+
+ if(u4_lt_idx == (MAX_REF_BUFS + 1))
+ u1_reference_type = UNUSED_FOR_REF;
+ else
+ u1_reference_type = IS_LONG_TERM;
+ }
+
+ //Find the node with matching picNum
+ ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
+ if((WORD32)ps_next_dpb->i4_frame_num == i4_frame_num)
+ {
+ ps_unmark_node = ps_next_dpb;
+ }
+ else
+ {
+ for(i = 1; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++)
+ {
+ if((WORD32)ps_next_dpb->ps_prev_short->i4_frame_num == i4_frame_num)
+ break;
+ ps_next_dpb = ps_next_dpb->ps_prev_short;
+ }
+
+ if(i == ps_dpb_mgr->u1_num_st_ref_bufs)
+ {
+ if(ps_dpb_mgr->u1_num_gaps)
+ {
+ ret = ih264d_delete_gap_frm_mmco(ps_dpb_mgr, i4_frame_num, &u1_del_st);
+ if(ret != OK)
+ return ret;
+ }
+ else
+ {
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_DBP_MANAGER_T;
+
+ return i4_error_code;
+ }
+
+ if(u1_del_st)
+ {
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_DBP_MANAGER_T;
+ return i4_error_code;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+ else
+ ps_unmark_node = ps_next_dpb->ps_prev_short;
+ }
+
+ if(u1_fld_pic_flag)
+ {
+ /* Mark the corresponding field ( top or bot) as */
+ /* UNUSED_FOR_REF or IS_LONG_TERM depending on */
+ /* u1_reference_type. */
+ if(ps_unmark_node->s_top_field.i4_pic_num == i4_pic_num)
+ {
+ ps_unmark_node->s_top_field.u1_reference_info = u1_reference_type;
+ ps_unmark_node->s_top_field.u1_long_term_frame_idx = u4_lt_idx;
+ {
+ UWORD8 *pu1_src = ps_unmark_node->ps_pic_buf->pu1_col_zero_flag;
+ WORD32 i4_size = ((ps_dpb_mgr->u2_pic_wd
+ * ps_dpb_mgr->u2_pic_ht) >> 5);
+ /* memset the colocated zero u4_flag buffer */
+ memset(pu1_src, 0, i4_size);
+ }
+ }
+
+ else if(ps_unmark_node->s_bot_field.i4_pic_num == i4_pic_num)
+ {
+
+ ps_unmark_node->s_bot_field.u1_reference_info = u1_reference_type;
+ ps_unmark_node->s_bot_field.u1_long_term_frame_idx = u4_lt_idx;
+ {
+ UWORD8 *pu1_src =
+ ps_unmark_node->ps_pic_buf->pu1_col_zero_flag
+ + ((ps_dpb_mgr->u2_pic_wd
+ * ps_dpb_mgr->u2_pic_ht)
+ >> 5);
+ WORD32 i4_size = ((ps_dpb_mgr->u2_pic_wd
+ * ps_dpb_mgr->u2_pic_ht) >> 5);
+ /* memset the colocated zero u4_flag buffer */
+ memset(pu1_src, 0, i4_size);
+ }
+ }
+ ps_unmark_node->u1_used_as_ref =
+ ps_unmark_node->s_top_field.u1_reference_info
+ | ps_unmark_node->s_bot_field.u1_reference_info;
+ }
+ else
+ {
+ ps_unmark_node->u1_used_as_ref = UNUSED_FOR_REF;
+ ps_unmark_node->s_top_field.u1_reference_info = UNUSED_FOR_REF;
+ ps_unmark_node->s_bot_field.u1_reference_info = UNUSED_FOR_REF;
+
+ {
+ UWORD8 *pu1_src = ps_unmark_node->ps_pic_buf->pu1_col_zero_flag;
+
+ WORD32 i4_size = ((ps_dpb_mgr->u2_pic_wd
+ * ps_dpb_mgr->u2_pic_ht) >> 4);
+ /* memset the colocated zero u4_flag buffer */
+ memset(pu1_src, 0, i4_size);
+ }
+ }
+
+ if(!(ps_unmark_node->u1_used_as_ref & IS_SHORT_TERM))
+ {
+ if(ps_unmark_node == ps_dpb_mgr->ps_dpb_st_head)
+ ps_dpb_mgr->ps_dpb_st_head = ps_next_dpb->ps_prev_short;
+ else
+ ps_next_dpb->ps_prev_short = ps_unmark_node->ps_prev_short; //update link
+ ps_dpb_mgr->u1_num_st_ref_bufs--; //decrement ST buf count
+ u1_del_node = 1;
+ }
+
+ if(u4_lt_idx == MAX_REF_BUFS + 1)
+ {
+ if(u1_del_node)
+ {
+ // Release the physical buffer
+ ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle,
+ ps_unmark_node->u1_buf_id);
+ ps_unmark_node->ps_prev_short = NULL;
+ }
+ }
+ else
+ {
+ WORD32 i4_status;
+ //If another node has the same LT index, delete that node
+ ret = ih264d_delete_lt_node(ps_dpb_mgr, u4_lt_idx,
+ u1_fld_pic_flag, ps_unmark_node, &i4_status);
+ if(ret != OK)
+ return ret;
+ // Now insert the short term node as a long term node
+ ret = ih264d_insert_lt_node(ps_dpb_mgr, ps_unmark_node, u4_lt_idx,
+ u1_fld_pic_flag);
+ if(ret != OK)
+ return ret;
+ }
+ return OK;
+}
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_reset_ref_bufs \endif
+ *
+ * \brief
+ * Called if MMCO==5/7 or on the first slice of an IDR picture
+ *
+ * \return
+ * none
+ **************************************************************************
+ */
+void ih264d_reset_ref_bufs(dpb_manager_t *ps_dpb_mgr)
+{
+ WORD32 i;
+ struct dpb_info_t *ps_dpb_info = ps_dpb_mgr->as_dpb_info;
+
+ for(i = 0; i < MAX_REF_BUFS; i++)
+ {
+ if(ps_dpb_info[i].u1_used_as_ref)
+ {
+ ps_dpb_info[i].u1_used_as_ref = UNUSED_FOR_REF;
+ ps_dpb_info[i].u1_lt_idx = MAX_REF_BUFS + 1;
+ ps_dpb_info[i].ps_prev_short = NULL;
+ ps_dpb_info[i].ps_prev_long = NULL;
+ ps_dpb_info[i].ps_pic_buf = NULL;
+ ps_dpb_info[i].s_top_field.u1_reference_info = UNUSED_FOR_REF;
+ ps_dpb_info[i].s_bot_field.u1_reference_info = UNUSED_FOR_REF;
+ ps_dpb_info[i].s_top_field.u1_long_term_frame_idx = MAX_REF_BUFS + 1;
+ ps_dpb_info[i].s_bot_field.u1_long_term_frame_idx = MAX_REF_BUFS + 1;
+
+ //Release physical buffer
+ ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle,
+ ps_dpb_info[i].u1_buf_id);
+ }
+ }
+ ps_dpb_mgr->u1_num_st_ref_bufs = ps_dpb_mgr->u1_num_lt_ref_bufs = 0;
+ ps_dpb_mgr->ps_dpb_st_head = NULL;
+ ps_dpb_mgr->ps_dpb_ht_head = NULL;
+
+ /* release all gaps */
+ ps_dpb_mgr->u1_num_gaps = 0;
+ for(i = 0; i < MAX_FRAMES; i++)
+ {
+ ps_dpb_mgr->ai4_gaps_start_frm_num[i] = INVALID_FRAME_NUM;
+ ps_dpb_mgr->ai4_gaps_end_frm_num[i] = 0;
+ ps_dpb_mgr->ai1_gaps_per_seq[i] = 0;
+ }
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : Name \endif
+ *
+ * \brief
+ * create the default index list after an MMCO
+ *
+ * \return
+ * 0 - if no_error; -1 - error
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_update_default_index_list(dpb_manager_t *ps_dpb_mgr)
+{
+ WORD32 i;
+ struct dpb_info_t *ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
+
+ for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++)
+ {
+ ps_dpb_mgr->ps_def_dpb[i] = ps_next_dpb->ps_pic_buf;
+ ps_next_dpb = ps_next_dpb->ps_prev_short;
+ }
+
+ ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head;
+ for(;i< ps_dpb_mgr->u1_num_st_ref_bufs + ps_dpb_mgr->u1_num_lt_ref_bufs; i++)
+ {
+ ps_dpb_mgr->ps_def_dpb[i] = ps_next_dpb->ps_pic_buf;
+ ps_next_dpb = ps_next_dpb->ps_prev_long;
+ }
+ return 0;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ref_idx_reordering \endif
+ *
+ * \brief
+ * Parse the bitstream and reorder indices for the current slice
+ *
+ * \return
+ * 0 - if no_error; -1 - error
+ *
+ * \note
+ * Called only if ref_idx_reordering_flag_l0 is decoded as 1
+ * Remove error checking for unmatching picNum or LTIndex later (if not needed)
+ * \para
+ * This section implements 7.3.3.1 and 8.2.6.4
+ * Uses the default index list as the starting point and
+ * remaps the picNums sent to the next higher index in the
+ * modified list. The unmodified ones are copied from the
+ * default to modified list retaining their order in the default list.
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_ref_idx_reordering(dec_struct_t *ps_dec, UWORD8 uc_lx)
+{
+ dpb_manager_t *ps_dpb_mgr = ps_dec->ps_dpb_mgr;
+ UWORD16 u4_cur_pic_num = ps_dec->ps_cur_slice->u2_frame_num;
+ /*< Maximum Picture Number Minus 1 */
+ UWORD16 ui_max_frame_num =
+ ps_dec->ps_cur_sps->u2_u4_max_pic_num_minus1 + 1;
+
+ WORD32 i;
+ UWORD32 ui_remapIdc, ui_nextUev;
+ WORD16 u2_pred_frame_num = u4_cur_pic_num;
+ WORD32 i_temp;
+ UWORD16 u2_def_mod_flag = 0; /* Flag to keep track of which indices have been remapped */
+ UWORD8 modCount = 0;
+ UWORD32 *pu4_bitstrm_buf = ps_dec->ps_bitstrm->pu4_buffer;
+ UWORD32 *pu4_bitstrm_ofst = &ps_dec->ps_bitstrm->u4_ofst;
+ dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice;
+ UWORD8 u1_field_pic_flag = ps_cur_slice->u1_field_pic_flag;
+
+ if(u1_field_pic_flag)
+ {
+ u4_cur_pic_num = u4_cur_pic_num * 2 + 1;
+ ui_max_frame_num = ui_max_frame_num * 2;
+ }
+
+ u2_pred_frame_num = u4_cur_pic_num;
+
+ ui_remapIdc = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+
+ while(ui_remapIdc != 3)
+ {
+ ui_nextUev = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if(ui_remapIdc != 2)
+ {
+ ui_nextUev = ui_nextUev + 1;
+ if(ui_remapIdc == 0)
+ {
+ // diffPicNum is -ve
+ i_temp = u2_pred_frame_num - ui_nextUev;
+ if(i_temp < 0)
+ i_temp += ui_max_frame_num;
+ }
+ else
+ {
+ // diffPicNum is +ve
+ i_temp = u2_pred_frame_num + ui_nextUev;
+ if(i_temp >= ui_max_frame_num)
+ i_temp -= ui_max_frame_num;
+ }
+ /* Find the dpb with the matching picNum (picNum==frameNum for framePic) */
+
+ if(i_temp > u4_cur_pic_num)
+ i_temp = i_temp - ui_max_frame_num;
+
+ for(i = 0; i < (ps_cur_slice->u1_initial_list_size[uc_lx]); i++)
+ {
+ if(ps_dpb_mgr->ps_init_dpb[uc_lx][i]->i4_pic_num == i_temp)
+ break;
+ }
+ if(i == (ps_cur_slice->u1_initial_list_size[uc_lx]))
+ {
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_DBP_MANAGER_T;
+ return i4_error_code;
+ }
+
+ u2_def_mod_flag |= (1 << i);
+ ps_dpb_mgr->ps_mod_dpb[uc_lx][modCount++] =
+ ps_dpb_mgr->ps_init_dpb[uc_lx][i];
+ u2_pred_frame_num = i_temp; //update predictor to be the picNum just obtained
+ }
+ else //2
+ {
+ UWORD8 u1_lt_idx = (UWORD8)ui_nextUev;
+
+ for(i = 0; i < (ps_cur_slice->u1_initial_list_size[uc_lx]); i++)
+ {
+ if(!ps_dpb_mgr->ps_init_dpb[uc_lx][i]->u1_is_short)
+ {
+ if(ps_dpb_mgr->ps_init_dpb[uc_lx][i]->u1_long_term_pic_num
+ == u1_lt_idx)
+ break;
+ }
+ }
+ if(i == (ps_cur_slice->u1_initial_list_size[uc_lx]))
+ {
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_DBP_MANAGER_T;
+ return i4_error_code;
+ }
+
+ u2_def_mod_flag |= (1 << i);
+ ps_dpb_mgr->ps_mod_dpb[uc_lx][modCount++] =
+ ps_dpb_mgr->ps_init_dpb[uc_lx][i];
+ }
+
+ ui_remapIdc = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ /* Get the remapping_idc - 0/1/2/3 */
+ }
+
+ //Handle the ref indices that were not remapped
+ for(i = 0; i < (ps_cur_slice->u1_num_ref_idx_lx_active[uc_lx]); i++)
+ {
+ if(!(u2_def_mod_flag & (1 << i)))
+ ps_dpb_mgr->ps_mod_dpb[uc_lx][modCount++] =
+ ps_dpb_mgr->ps_init_dpb[uc_lx][i];
+ }
+ return OK;
+}
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_read_mmco_commands \endif
+ *
+ * \brief
+ * Parses MMCO commands and stores them in a structure for later use.
+ *
+ * \return
+ * 0 - No error; -1 - Error
+ *
+ * \note
+ * This function stores MMCO commands in structure only for the first time.
+ * In case of MMCO commands being issued for same Picture Number, they are
+ * just parsed and not stored them in the structure.
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_read_mmco_commands(struct _DecStruct * ps_dec)
+{
+ dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
+ dpb_commands_t *ps_dpb_cmds = ps_dec->ps_dpb_cmds;
+ dec_slice_params_t * ps_slice = ps_dec->ps_cur_slice;
+ WORD32 j;
+ UWORD8 u1_buf_mode;
+ struct MMCParams *ps_mmc_params;
+ UWORD32 *pu4_bitstrm_buf = ps_dec->ps_bitstrm->pu4_buffer;
+ UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
+ UWORD32 u4_bit_ofst = ps_dec->ps_bitstrm->u4_ofst;
+
+ ps_slice->u1_mmco_equalto5 = 0;
+ {
+ if(ps_dec->u1_nal_unit_type == IDR_SLICE_NAL)
+ {
+ ps_slice->u1_no_output_of_prior_pics_flag =
+ ih264d_get_bit_h264(ps_bitstrm);
+ COPYTHECONTEXT("SH: no_output_of_prior_pics_flag",
+ ps_slice->u1_no_output_of_prior_pics_flag);
+ ps_slice->u1_long_term_reference_flag = ih264d_get_bit_h264(
+ ps_bitstrm);
+ COPYTHECONTEXT("SH: long_term_reference_flag",
+ ps_slice->u1_long_term_reference_flag);
+ ps_dpb_cmds->u1_idr_pic = 1;
+ ps_dpb_cmds->u1_no_output_of_prior_pics_flag =
+ ps_slice->u1_no_output_of_prior_pics_flag;
+ ps_dpb_cmds->u1_long_term_reference_flag =
+ ps_slice->u1_long_term_reference_flag;
+ }
+ else
+ {
+ u1_buf_mode = ih264d_get_bit_h264(ps_bitstrm); //0 - sliding window; 1 - arbitrary
+ COPYTHECONTEXT("SH: adaptive_ref_pic_buffering_flag", u1_buf_mode);
+ ps_dpb_cmds->u1_buf_mode = u1_buf_mode;
+ j = 0;
+
+ if(u1_buf_mode == 1)
+ {
+ UWORD32 u4_mmco;
+ UWORD32 u4_diff_pic_num;
+ UWORD32 u4_lt_idx, u4_max_lt_idx;
+
+ u4_mmco = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ while(u4_mmco != END_OF_MMCO)
+ {
+ ps_mmc_params = &ps_dpb_cmds->as_mmc_params[j];
+ ps_mmc_params->u4_mmco = u4_mmco;
+ switch(u4_mmco)
+ {
+ case MARK_ST_PICNUM_AS_NONREF:
+ u4_diff_pic_num = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ //Get absDiffPicnumMinus1
+ ps_mmc_params->u4_diff_pic_num = u4_diff_pic_num;
+ break;
+
+ case MARK_LT_INDEX_AS_NONREF:
+ u4_lt_idx = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ ps_mmc_params->u4_lt_idx = u4_lt_idx;
+ break;
+
+ case MARK_ST_PICNUM_AS_LT_INDEX:
+ u4_diff_pic_num = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ ps_mmc_params->u4_diff_pic_num = u4_diff_pic_num;
+ u4_lt_idx = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ ps_mmc_params->u4_lt_idx = u4_lt_idx;
+ break;
+
+ case SET_MAX_LT_INDEX:
+ {
+ u4_max_lt_idx = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ ps_mmc_params->u4_max_lt_idx_plus1 = u4_max_lt_idx;
+ break;
+ }
+ case RESET_REF_PICTURES:
+ {
+ ps_slice->u1_mmco_equalto5 = 1;
+ break;
+ }
+
+ case SET_LT_INDEX:
+ u4_lt_idx = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ ps_mmc_params->u4_lt_idx = u4_lt_idx;
+ break;
+
+ default:
+ break;
+ }
+ u4_mmco = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+
+ j++;
+ }
+ ps_dpb_cmds->u1_num_of_commands = j;
+
+ }
+ }
+ ps_dpb_cmds->u1_dpb_commands_read = 1;
+ ps_dpb_cmds->u1_dpb_commands_read_slc = 1;
+
+ }
+ u4_bit_ofst = ps_dec->ps_bitstrm->u4_ofst - u4_bit_ofst;
+ return u4_bit_ofst;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_do_mmco_buffer \endif
+ *
+ * \brief
+ * Perform decoded picture buffer memory management control operations
+ *
+ * \return
+ * 0 - No error; -1 - Error
+ *
+ * \note
+ * Bitstream is also parsed here to get the MMCOs
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_do_mmco_buffer(dpb_commands_t *ps_dpb_cmds,
+ dpb_manager_t *ps_dpb_mgr,
+ UWORD8 u1_numRef_frames_for_seq, /*!< num_ref_frames from active SeqParSet*/
+ UWORD32 u4_cur_pic_num,
+ UWORD32 u2_u4_max_pic_num_minus1,
+ UWORD8 u1_nal_unit_type,
+ struct pic_buffer_t *ps_pic_buf,
+ UWORD8 u1_buf_id,
+ UWORD8 u1_fld_pic_flag,
+ UWORD8 u1_curr_pic_in_err)
+{
+ WORD32 i;
+ UWORD8 u1_buf_mode, u1_marked_lt;
+ struct dpb_info_t *ps_next_dpb;
+ UWORD8 u1_num_gaps;
+ UWORD8 u1_del_node = 1;
+ UWORD8 u1_insert_st_pic = 1;
+ WORD32 ret;
+ UNUSED(u1_nal_unit_type);
+ UNUSED(u2_u4_max_pic_num_minus1);
+ u1_buf_mode = ps_dpb_cmds->u1_buf_mode; //0 - sliding window; 1 - Adaptive
+ u1_marked_lt = 0;
+ u1_num_gaps = ps_dpb_mgr->u1_num_gaps;
+
+ if(!u1_buf_mode)
+ {
+ //Sliding window - implements 8.2.5.3
+ if((ps_dpb_mgr->u1_num_st_ref_bufs
+ + ps_dpb_mgr->u1_num_lt_ref_bufs + u1_num_gaps)
+ == u1_numRef_frames_for_seq)
+ {
+ UWORD8 u1_new_node_flag = 1;
+ if((0 == ps_dpb_mgr->u1_num_st_ref_bufs) && (0 == u1_num_gaps))
+ {
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_DBP_MANAGER_T;
+ return i4_error_code;
+ }
+
+ // Chase the links to reach the last but one picNum, if available
+ ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
+
+ if(ps_dpb_mgr->u1_num_st_ref_bufs > 1)
+ {
+ if(ps_next_dpb->i4_frame_num == (WORD32)u4_cur_pic_num)
+ {
+ /* Incase of filed pictures top_field has been allocated */
+ /* picture buffer and complementary bottom field pair comes */
+ /* then the sliding window mechanism should not allocate a */
+ /* new node */
+ u1_new_node_flag = 0;
+ }
+
+ for(i = 1; i < (ps_dpb_mgr->u1_num_st_ref_bufs - 1); i++)
+ {
+ if(ps_next_dpb == NULL)
+ {
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_DBP_MANAGER_T;
+ return i4_error_code;
+ }
+ if(ps_next_dpb->i4_frame_num == (WORD32)u4_cur_pic_num)
+ {
+ /* Incase of field pictures top_field has been allocated */
+ /* picture buffer and complementary bottom field pair comes */
+ /* then the sliding window mechanism should not allocate a */
+ /* new node */
+ u1_new_node_flag = 0;
+ }
+ ps_next_dpb = ps_next_dpb->ps_prev_short;
+ }
+
+ if(ps_next_dpb->ps_prev_short->ps_prev_short != NULL)
+ {
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_DBP_MANAGER_T;
+ return i4_error_code;
+ }
+
+ if(u1_new_node_flag)
+ {
+ if(u1_num_gaps)
+ {
+ ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr,
+ ps_next_dpb->ps_prev_short->i4_frame_num,
+ &u1_del_node);
+ if(ret != OK)
+ return ret;
+ }
+
+ if(u1_del_node)
+ {
+ ps_dpb_mgr->u1_num_st_ref_bufs--;
+ ps_next_dpb->ps_prev_short->u1_used_as_ref =
+ UNUSED_FOR_REF;
+ ps_next_dpb->ps_prev_short->s_top_field.u1_reference_info =
+ UNUSED_FOR_REF;
+ ps_next_dpb->ps_prev_short->s_bot_field.u1_reference_info =
+ UNUSED_FOR_REF;
+ ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle,
+ ps_next_dpb->ps_prev_short->u1_buf_id);
+ ps_next_dpb->ps_prev_short->ps_pic_buf = NULL;
+ ps_next_dpb->ps_prev_short = NULL;
+ }
+ }
+ }
+ else
+ {
+ if(ps_dpb_mgr->u1_num_st_ref_bufs)
+ {
+ ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr,
+ ps_next_dpb->i4_frame_num,
+ &u1_del_node);
+ if(ret != OK)
+ return ret;
+ if((ps_next_dpb->i4_frame_num != (WORD32)u4_cur_pic_num)
+ && u1_del_node)
+ {
+ ps_dpb_mgr->u1_num_st_ref_bufs--;
+ ps_next_dpb->u1_used_as_ref = FALSE;
+ ps_next_dpb->s_top_field.u1_reference_info =
+ UNUSED_FOR_REF;
+ ps_next_dpb->s_bot_field.u1_reference_info =
+ UNUSED_FOR_REF;
+ ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle,
+ ps_next_dpb->u1_buf_id);
+ ps_next_dpb->ps_pic_buf = NULL;
+ ps_next_dpb->ps_prev_short = NULL;
+ ps_dpb_mgr->ps_dpb_st_head = NULL;
+ ps_next_dpb = NULL;
+ }
+ else if(ps_next_dpb->i4_frame_num == (WORD32)u4_cur_pic_num)
+ {
+ if(u1_curr_pic_in_err)
+ {
+ u1_insert_st_pic = 0;
+ }
+ else if(ps_dpb_mgr->u1_num_st_ref_bufs > 0)
+ {
+ ps_dpb_mgr->u1_num_st_ref_bufs--;
+ ps_next_dpb->u1_used_as_ref = FALSE;
+ ps_next_dpb->s_top_field.u1_reference_info =
+ UNUSED_FOR_REF;
+ ps_next_dpb->s_bot_field.u1_reference_info =
+ UNUSED_FOR_REF;
+ ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle,
+ ps_next_dpb->u1_buf_id);
+ ps_next_dpb->ps_pic_buf = NULL;
+ ps_next_dpb = NULL;
+ }
+ }
+ }
+ else
+ {
+ ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr,
+ INVALID_FRAME_NUM,
+ &u1_del_node);
+ if(ret != OK)
+ return ret;
+ if(u1_del_node)
+ {
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_DBP_MANAGER_T;
+ return i4_error_code;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ //Adaptive memory control - implements 8.2.5.4
+ UWORD32 u4_mmco;
+ UWORD32 u4_diff_pic_num;
+ WORD32 i4_pic_num;
+ UWORD32 u4_lt_idx;
+ WORD32 j;
+ struct MMCParams *ps_mmc_params;
+
+ for(j = 0; j < ps_dpb_cmds->u1_num_of_commands; j++)
+ {
+ ps_mmc_params = &ps_dpb_cmds->as_mmc_params[j];
+ u4_mmco = ps_mmc_params->u4_mmco; //Get MMCO
+
+ switch(u4_mmco)
+ {
+ case MARK_ST_PICNUM_AS_NONREF:
+ {
+
+ {
+ UWORD32 i4_cur_pic_num = u4_cur_pic_num;
+ u4_diff_pic_num = ps_mmc_params->u4_diff_pic_num; //Get absDiffPicnumMinus1
+ if(u1_fld_pic_flag)
+ i4_cur_pic_num = i4_cur_pic_num * 2 + 1;
+ i4_pic_num = i4_cur_pic_num - (u4_diff_pic_num + 1);
+ }
+
+ if(ps_dpb_mgr->u1_num_st_ref_bufs > 0)
+ {
+ ret = ih264d_delete_st_node_or_make_lt(ps_dpb_mgr,
+ i4_pic_num,
+ MAX_REF_BUFS + 1,
+ u1_fld_pic_flag);
+ if(ret != OK)
+ return ret;
+ }
+ else
+ {
+ UWORD8 u1_dummy;
+ ret = ih264d_delete_gap_frm_mmco(ps_dpb_mgr, i4_pic_num, &u1_dummy);
+ if(ret != OK)
+ return ret;
+ }
+ break;
+ }
+ case MARK_LT_INDEX_AS_NONREF:
+ {
+ WORD32 i4_status;
+ u4_lt_idx = ps_mmc_params->u4_lt_idx; //Get long term index
+ ret = ih264d_delete_lt_node(ps_dpb_mgr,
+ u4_lt_idx,
+ u1_fld_pic_flag,
+ 0, &i4_status);
+ if(ret != OK)
+ return ret;
+ if(i4_status)
+ {
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_DBP_MANAGER_T;
+ return i4_error_code;
+ }
+ break;
+ }
+
+ case MARK_ST_PICNUM_AS_LT_INDEX:
+ {
+ {
+ UWORD32 i4_cur_pic_num = u4_cur_pic_num;
+ u4_diff_pic_num = ps_mmc_params->u4_diff_pic_num; //Get absDiffPicnumMinus1
+ if(u1_fld_pic_flag)
+ i4_cur_pic_num = i4_cur_pic_num * 2 + 1;
+
+ i4_pic_num = i4_cur_pic_num - (u4_diff_pic_num + 1);
+ }
+
+ u4_lt_idx = ps_mmc_params->u4_lt_idx; //Get long term index
+ if(ps_dpb_mgr->u1_num_st_ref_bufs > 0)
+ {
+ ret = ih264d_delete_st_node_or_make_lt(ps_dpb_mgr,
+ i4_pic_num, u4_lt_idx,
+ u1_fld_pic_flag);
+ if(ret != OK)
+ return ret;
+ }
+ break;
+ }
+ case SET_MAX_LT_INDEX:
+ {
+ UWORD8 uc_numLT = ps_dpb_mgr->u1_num_lt_ref_bufs;
+ u4_lt_idx = ps_mmc_params->u4_max_lt_idx_plus1; //Get Max_long_term_index_plus1
+ if(u4_lt_idx < ps_dpb_mgr->u1_max_lt_pic_idx_plus1
+ && uc_numLT > 0)
+ {
+ struct dpb_info_t *ps_nxtDPB;
+ //Set all LT buffers with index >= u4_lt_idx to nonreference
+ ps_nxtDPB = ps_dpb_mgr->ps_dpb_ht_head;
+ ps_next_dpb = ps_nxtDPB->ps_prev_long;
+ if(ps_nxtDPB->u1_lt_idx >= u4_lt_idx)
+ {
+ i = 0;
+ ps_dpb_mgr->ps_dpb_ht_head = NULL;
+ }
+ else
+ {
+ for(i = 1; i < uc_numLT; i++)
+ {
+ if(ps_next_dpb->u1_lt_idx >= u4_lt_idx)
+ break;
+ ps_nxtDPB = ps_next_dpb;
+ ps_next_dpb = ps_next_dpb->ps_prev_long;
+ }
+ ps_nxtDPB->ps_prev_long = NULL; //Terminate the link of the closest LTIndex that is <=Max
+ }
+ ps_dpb_mgr->u1_num_lt_ref_bufs = i;
+ if(i == 0)
+ ps_next_dpb = ps_nxtDPB;
+
+ for(; i < uc_numLT; i++)
+ {
+ ps_nxtDPB = ps_next_dpb;
+ ps_nxtDPB->u1_lt_idx = MAX_REF_BUFS + 1;
+ ps_nxtDPB->u1_used_as_ref = UNUSED_FOR_REF;
+ ps_nxtDPB->s_top_field.u1_reference_info =
+ UNUSED_FOR_REF;
+ ps_nxtDPB->s_bot_field.u1_reference_info =
+ UNUSED_FOR_REF;
+
+ ps_nxtDPB->ps_pic_buf = NULL;
+ //Release buffer
+ ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle,
+ ps_nxtDPB->u1_buf_id);
+ ps_next_dpb = ps_nxtDPB->ps_prev_long;
+ ps_nxtDPB->ps_prev_long = NULL;
+ }
+ }
+ ps_dpb_mgr->u1_max_lt_pic_idx_plus1 = u4_lt_idx;
+
+ break;
+ }
+ case SET_LT_INDEX:
+ {
+ u4_lt_idx = ps_mmc_params->u4_lt_idx; //Get long term index
+ ret = ih264d_insert_st_node(ps_dpb_mgr, ps_pic_buf, u1_buf_id,
+ u4_cur_pic_num);
+ if(ret != OK)
+ return ret;
+ ret = ih264d_delete_st_node_or_make_lt(ps_dpb_mgr,
+ u4_cur_pic_num, u4_lt_idx,
+ u1_fld_pic_flag);
+ if(ret != OK)
+ return ret;
+ u1_marked_lt = 1;
+ break;
+ }
+
+ default:
+ break;
+ }
+ if(u4_mmco == RESET_REF_PICTURES || u4_mmco == RESET_ALL_PICTURES)
+ {
+ ih264d_reset_ref_bufs(ps_dpb_mgr);
+ u4_cur_pic_num = 0;
+ }
+ }
+ }
+ if(!u1_marked_lt && u1_insert_st_pic)
+ {
+ ret = ih264d_insert_st_node(ps_dpb_mgr, ps_pic_buf, u1_buf_id,
+ u4_cur_pic_num);
+ if(ret != OK)
+ return ret;
+ }
+ return OK;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_release_pics_in_dpb */
+/* */
+/* Description : This function deletes all pictures from DPB */
+/* */
+/* Inputs : h_pic_buf_api: pointer to picture buffer API */
+/* u1_disp_bufs: number pictures ready for display */
+/* */
+/* Globals : None */
+/* Outputs : None */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 06 2005 NS Draft */
+/* */
+/*****************************************************************************/
+void ih264d_release_pics_in_dpb(void *pv_dec,
+ UWORD8 u1_disp_bufs)
+{
+ WORD8 i;
+ dec_struct_t *ps_dec = (dec_struct_t *)pv_dec;
+
+ for(i = 0; i < u1_disp_bufs; i++)
+ {
+ ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
+ i,
+ BUF_MGR_REF);
+ ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_mv_buf_mgr,
+ ps_dec->au1_pic_buf_id_mv_buf_id_map[i],
+ BUF_MGR_REF);
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_delete_gap_frm_sliding */
+/* */
+/* Description : This function deletes a picture from the list of gaps, */
+/* if the frame number of gap frame is lesser than the one */
+/* to be deleted by sliding window */
+/* Inputs : ps_dpb_mgr: pointer to dpb manager */
+/* i4_frame_num: frame number of picture that's going to */
+/* be deleted by sliding window */
+/* pu1_del_node: holds 0 if a gap is deleted else 1 */
+/* Globals : None */
+/* Processing : Function searches for frame number lesser than */
+/* i4_frame_num in the gaps list */
+/* Outputs : None */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 06 2005 NS Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_delete_gap_frm_sliding(dpb_manager_t *ps_dpb_mgr,
+ WORD32 i4_frame_num,
+ UWORD8 *pu1_del_node)
+{
+ WORD8 i1_gap_idx, i, j, j_min;
+ WORD32 *pi4_gaps_start_frm_num, *pi4_gaps_end_frm_num, i4_gap_frame_num;
+ WORD32 i4_start_frm_num, i4_end_frm_num;
+ WORD32 i4_max_frm_num;
+ WORD32 i4_frm_num, i4_gap_frm_num_min;
+
+ /* find the least frame num from gaps and current DPB node */
+ /* Delete the least one */
+ *pu1_del_node = 1;
+ if(0 == ps_dpb_mgr->u1_num_gaps)
+ return OK;
+ pi4_gaps_start_frm_num = ps_dpb_mgr->ai4_gaps_start_frm_num;
+ pi4_gaps_end_frm_num = ps_dpb_mgr->ai4_gaps_end_frm_num;
+ i4_gap_frame_num = INVALID_FRAME_NUM;
+ i4_max_frm_num = ps_dpb_mgr->i4_max_frm_num;
+
+ i1_gap_idx = -1;
+ if(INVALID_FRAME_NUM != i4_frame_num)
+ {
+ i4_gap_frame_num = i4_frame_num;
+ for(i = 0; i < MAX_FRAMES; i++)
+ {
+ i4_start_frm_num = pi4_gaps_start_frm_num[i];
+ if(INVALID_FRAME_NUM != i4_start_frm_num)
+ {
+ i4_end_frm_num = pi4_gaps_end_frm_num[i];
+ if(i4_end_frm_num < i4_max_frm_num)
+ {
+ if(i4_start_frm_num <= i4_gap_frame_num)
+ {
+ i4_gap_frame_num = i4_start_frm_num;
+ i1_gap_idx = i;
+ }
+ }
+ else
+ {
+ if(((i4_start_frm_num <= i4_gap_frame_num)
+ && (i4_gap_frame_num <= i4_max_frm_num))
+ || ((i4_start_frm_num >= i4_gap_frame_num)
+ && ((i4_gap_frame_num
+ + i4_max_frm_num)
+ >= i4_end_frm_num)))
+ {
+ i4_gap_frame_num = i4_start_frm_num;
+ i1_gap_idx = i;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ /* no valid short term buffers, delete one gap from the least start */
+ /* of gap sequence */
+ i4_gap_frame_num = pi4_gaps_start_frm_num[0];
+ i1_gap_idx = 0;
+ for(i = 1; i < MAX_FRAMES; i++)
+ {
+ if(INVALID_FRAME_NUM != pi4_gaps_start_frm_num[i])
+ {
+ if(pi4_gaps_start_frm_num[i] < i4_gap_frame_num)
+ {
+ i4_gap_frame_num = pi4_gaps_start_frm_num[i];
+ i1_gap_idx = i;
+ }
+ }
+ }
+ if(INVALID_FRAME_NUM == i4_gap_frame_num)
+ {
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_DBP_MANAGER_T;
+ return i4_error_code;
+ }
+ }
+
+ if(-1 != i1_gap_idx)
+ {
+ /* find least frame_num in the poc_map, which is in this range */
+ i4_start_frm_num = pi4_gaps_start_frm_num[i1_gap_idx];
+ if(i4_start_frm_num < 0)
+ i4_start_frm_num += i4_max_frm_num;
+ i4_end_frm_num = pi4_gaps_end_frm_num[i1_gap_idx];
+ if(i4_end_frm_num < 0)
+ i4_end_frm_num += i4_max_frm_num;
+
+ i4_gap_frm_num_min = 0xfffffff;
+ j_min = MAX_FRAMES;
+ for(j = 0; j < MAX_FRAMES; j++)
+ {
+ i4_frm_num = ps_dpb_mgr->ai4_poc_buf_id_map[j][2];
+ if((i4_start_frm_num <= i4_frm_num)
+ && (i4_end_frm_num >= i4_frm_num))
+ {
+ if(i4_frm_num < i4_gap_frm_num_min)
+ {
+ j_min = j;
+ i4_gap_frm_num_min = i4_frm_num;
+ }
+ }
+ }
+
+ if(j_min != MAX_FRAMES)
+ {
+
+ ps_dpb_mgr->ai4_poc_buf_id_map[j_min][0] = -1;
+ ps_dpb_mgr->ai4_poc_buf_id_map[j_min][1] = 0x7fffffff;
+ ps_dpb_mgr->ai4_poc_buf_id_map[j_min][2] = GAP_FRAME_NUM;
+ ps_dpb_mgr->i1_gaps_deleted++;
+
+ ps_dpb_mgr->ai1_gaps_per_seq[i1_gap_idx]--;
+ ps_dpb_mgr->u1_num_gaps--;
+ *pu1_del_node = 0;
+ if(0 == ps_dpb_mgr->ai1_gaps_per_seq[i1_gap_idx])
+ {
+ ps_dpb_mgr->ai4_gaps_start_frm_num[i1_gap_idx] =
+ INVALID_FRAME_NUM;
+ ps_dpb_mgr->ai4_gaps_end_frm_num[i1_gap_idx] = 0;
+ }
+ }
+ }
+
+ return OK;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_delete_gap_frm_mmco */
+/* */
+/* Description : This function deletes a picture from the list of gaps, */
+/* if the frame number (specified by mmco commands) to be */
+/* deleted is in the range by gap sequence. */
+/* */
+/* Inputs : ps_dpb_mgr: pointer to dpb manager */
+/* i4_frame_num: frame number of picture that's going to */
+/* be deleted by mmco */
+/* pu1_del_node: holds 0 if a gap is deleted else 1 */
+/* Globals : None */
+/* Processing : Function searches for frame number lesser in the range */
+/* specified by gap sequence */
+/* Outputs : None */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 06 2005 NS Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_delete_gap_frm_mmco(dpb_manager_t *ps_dpb_mgr,
+ WORD32 i4_frame_num,
+ UWORD8 *pu1_del_node)
+{
+ WORD8 i, j;
+ WORD32 *pi4_start, *pi4_end;
+ WORD32 i4_start_frm_num, i4_end_frm_num, i4_max_frm_num;
+
+ /* find the least frame num from gaps and current DPB node */
+ /* Delete the gaps */
+ *pu1_del_node = 1;
+ pi4_start = ps_dpb_mgr->ai4_gaps_start_frm_num;
+ pi4_end = ps_dpb_mgr->ai4_gaps_end_frm_num;
+ i4_max_frm_num = ps_dpb_mgr->i4_max_frm_num;
+
+ if(0 == ps_dpb_mgr->u1_num_gaps)
+ return OK;
+
+ if(i4_frame_num < 0)
+ i4_frame_num += i4_max_frm_num;
+ for(i = 0; i < MAX_FRAMES; i++)
+ {
+ i4_start_frm_num = pi4_start[i];
+ if(i4_start_frm_num < 0)
+ i4_start_frm_num += i4_max_frm_num;
+ if(INVALID_FRAME_NUM != i4_start_frm_num)
+ {
+ i4_end_frm_num = pi4_end[i];
+ if(i4_end_frm_num < 0)
+ i4_end_frm_num += i4_max_frm_num;
+
+ if((i4_frame_num >= i4_start_frm_num)
+ && (i4_frame_num <= i4_end_frm_num))
+ {
+ break;
+ }
+ else
+ {
+ if(((i4_frame_num + i4_max_frm_num) >= i4_start_frm_num)
+ && ((i4_frame_num + i4_max_frm_num)
+ <= i4_end_frm_num))
+ {
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_DBP_MANAGER_T;
+ return i4_error_code;
+ }
+ }
+ }
+ }
+
+ /* find frame_num index, in the poc_map which needs to be deleted */
+ for(j = 0; j < MAX_FRAMES; j++)
+ {
+ if(i4_frame_num == ps_dpb_mgr->ai4_poc_buf_id_map[j][2])
+ break;
+ }
+
+ if(MAX_FRAMES != i)
+ {
+ if(j == MAX_FRAMES)
+ {
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_DBP_MANAGER_T;
+ return i4_error_code;
+ }
+
+ ps_dpb_mgr->ai4_poc_buf_id_map[j][0] = -1;
+ ps_dpb_mgr->ai4_poc_buf_id_map[j][1] = 0x7fffffff;
+ ps_dpb_mgr->ai4_poc_buf_id_map[j][2] = GAP_FRAME_NUM;
+ ps_dpb_mgr->i1_gaps_deleted++;
+
+ ps_dpb_mgr->ai1_gaps_per_seq[i]--;
+ ps_dpb_mgr->u1_num_gaps--;
+ *pu1_del_node = 0;
+ if(0 == ps_dpb_mgr->ai1_gaps_per_seq[i])
+ {
+ ps_dpb_mgr->ai4_gaps_start_frm_num[i] = INVALID_FRAME_NUM;
+ ps_dpb_mgr->ai4_gaps_end_frm_num[i] = 0;
+ }
+ }
+ else
+ {
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_DBP_MANAGER_T;
+ return i4_error_code;
+ }
+
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_do_mmco_for_gaps \endif
+ *
+ * \brief
+ * Perform decoded picture buffer memory management control operations
+ *
+ * \return
+ * 0 - No error; -1 - Error
+ *
+ * \note
+ * Bitstream is also parsed here to get the MMCOs
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_do_mmco_for_gaps(dpb_manager_t *ps_dpb_mgr,
+ UWORD8 u1_num_ref_frames /*!< num_ref_frames from active SeqParSet*/
+ )
+{
+ struct dpb_info_t *ps_next_dpb;
+ UWORD8 u1_num_gaps;
+ UWORD8 u1_st_ref_bufs, u1_lt_ref_bufs, u1_del_node;
+ WORD8 i;
+ WORD32 i4_frame_gaps = 1;
+ WORD32 ret;
+
+ //Sliding window - implements 8.2.5.3, flush out buffers
+ u1_st_ref_bufs = ps_dpb_mgr->u1_num_st_ref_bufs;
+ u1_lt_ref_bufs = ps_dpb_mgr->u1_num_lt_ref_bufs;
+
+ while(1)
+ {
+ u1_num_gaps = ps_dpb_mgr->u1_num_gaps;
+ if((u1_st_ref_bufs + u1_lt_ref_bufs + u1_num_gaps + i4_frame_gaps)
+ > u1_num_ref_frames)
+ {
+ if(0 == (u1_st_ref_bufs + u1_num_gaps))
+ {
+ i4_frame_gaps = 0;
+ ps_dpb_mgr->u1_num_gaps = (u1_num_ref_frames
+ - u1_lt_ref_bufs);
+ }
+ else
+ {
+ u1_del_node = 1;
+ ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
+
+ if(u1_st_ref_bufs > 1)
+ {
+ for(i = 1; i < (u1_st_ref_bufs - 1); i++)
+ {
+ if(ps_next_dpb == NULL)
+ {
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_DBP_MANAGER_T;
+ return i4_error_code;
+ }
+ ps_next_dpb = ps_next_dpb->ps_prev_short;
+ }
+
+ if(ps_next_dpb->ps_prev_short->ps_prev_short != NULL)
+ {
+ return ERROR_DBP_MANAGER_T;
+ }
+
+ if(u1_num_gaps)
+ {
+ ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr,
+ ps_next_dpb->ps_prev_short->i4_frame_num,
+ &u1_del_node);
+ if(ret != OK)
+ return ret;
+ }
+
+ if(u1_del_node)
+ {
+ u1_st_ref_bufs--;
+ ps_next_dpb->ps_prev_short->u1_used_as_ref =
+ UNUSED_FOR_REF;
+ ps_next_dpb->ps_prev_short->s_top_field.u1_reference_info =
+ UNUSED_FOR_REF;
+ ps_next_dpb->ps_prev_short->s_bot_field.u1_reference_info =
+ UNUSED_FOR_REF;
+ ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle,
+ ps_next_dpb->ps_prev_short->u1_buf_id);
+ ps_next_dpb->ps_prev_short->ps_pic_buf = NULL;
+ ps_next_dpb->ps_prev_short = NULL;
+ }
+ }
+ else
+ {
+ if(u1_st_ref_bufs)
+ {
+ if(u1_num_gaps)
+ {
+ ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr,
+ ps_next_dpb->i4_frame_num,
+ &u1_del_node);
+ if(ret != OK)
+ return ret;
+ }
+
+ if(u1_del_node)
+ {
+ u1_st_ref_bufs--;
+ ps_next_dpb->u1_used_as_ref = FALSE;
+ ps_next_dpb->s_top_field.u1_reference_info =
+ UNUSED_FOR_REF;
+ ps_next_dpb->s_bot_field.u1_reference_info =
+ UNUSED_FOR_REF;
+ ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle,
+ ps_next_dpb->u1_buf_id);
+ ps_next_dpb->ps_pic_buf = NULL;
+ ps_next_dpb = NULL;
+ ps_dpb_mgr->ps_dpb_st_head = NULL;
+ ps_dpb_mgr->u1_num_st_ref_bufs = u1_st_ref_bufs;
+ }
+ }
+ else
+ {
+ ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr,
+ INVALID_FRAME_NUM,
+ &u1_del_node);
+ if(ret != OK)
+ return ret;
+ if(u1_del_node)
+ {
+ return ERROR_DBP_MANAGER_T;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ ps_dpb_mgr->u1_num_gaps += i4_frame_gaps;
+ break;
+ }
+ }
+
+ ps_dpb_mgr->u1_num_st_ref_bufs = u1_st_ref_bufs;
+
+ return OK;
+}
+/****************************************************************************/
+/* */
+/* Function Name : ih264d_free_node_from_dpb */
+/* */
+/* Description : */
+/* */
+/* Inputs : */
+/* */
+/* Globals : */
+/* */
+/* Processing : */
+/* */
+/* Outputs : */
+/* */
+/* Returns : */
+/* */
+/* Known Issues : */
+/* */
+/* Revision History */
+/* */
+/* DD MM YY Author Changes */
+/* Sarat */
+/****************************************************************************/
+/**** Function Added for Error Resilience *****/
+WORD32 ih264d_free_node_from_dpb(dpb_manager_t *ps_dpb_mgr,
+ UWORD32 u4_cur_pic_num,
+ UWORD8 u1_numRef_frames_for_seq)
+{
+ WORD32 i;
+ UWORD8 u1_num_gaps = ps_dpb_mgr->u1_num_gaps;
+ struct dpb_info_t *ps_next_dpb;
+ UWORD8 u1_del_node = 1;
+ WORD32 ret;
+
+ //Sliding window - implements 8.2.5.3
+ if((ps_dpb_mgr->u1_num_st_ref_bufs + ps_dpb_mgr->u1_num_lt_ref_bufs
+ + u1_num_gaps) == u1_numRef_frames_for_seq)
+ {
+ UWORD8 u1_new_node_flag = 1;
+ if((0 == ps_dpb_mgr->u1_num_st_ref_bufs) && (0 == u1_num_gaps))
+ {
+ return ERROR_DBP_MANAGER_T;
+ }
+
+ // Chase the links to reach the last but one picNum, if available
+ ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
+
+ if(ps_dpb_mgr->u1_num_st_ref_bufs > 1)
+ {
+ if(ps_next_dpb->i4_frame_num == (WORD32)u4_cur_pic_num)
+ {
+ /* Incase of filed pictures top_field has been allocated */
+ /* picture buffer and complementary bottom field pair comes */
+ /* then the sliding window mechanism should not allocate a */
+ /* new node */
+ u1_new_node_flag = 0;
+ }
+
+ for(i = 1; i < (ps_dpb_mgr->u1_num_st_ref_bufs - 1); i++)
+ {
+ if(ps_next_dpb == NULL)
+ return ERROR_DBP_MANAGER_T;
+
+ if(ps_next_dpb->i4_frame_num == (WORD32)u4_cur_pic_num)
+ {
+ /* Incase of field pictures top_field has been allocated */
+ /* picture buffer and complementary bottom field pair comes */
+ /* then the sliding window mechanism should not allocate a */
+ /* new node */
+ u1_new_node_flag = 0;
+ }
+ ps_next_dpb = ps_next_dpb->ps_prev_short;
+ }
+
+ if(ps_next_dpb->ps_prev_short->ps_prev_short != NULL)
+ return ERROR_DBP_MANAGER_T;
+
+ if(u1_new_node_flag)
+ {
+ if(u1_num_gaps)
+ {
+ ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr,
+ ps_next_dpb->ps_prev_short->i4_frame_num,
+ &u1_del_node);
+ if(ret != OK)
+ return ret;
+ }
+
+ if(u1_del_node)
+ {
+ ps_dpb_mgr->u1_num_st_ref_bufs--;
+ ps_next_dpb->ps_prev_short->u1_used_as_ref = UNUSED_FOR_REF;
+ ps_next_dpb->ps_prev_short->s_top_field.u1_reference_info =
+ UNUSED_FOR_REF;
+ ps_next_dpb->ps_prev_short->s_bot_field.u1_reference_info =
+ UNUSED_FOR_REF;
+ ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle,
+ ps_next_dpb->ps_prev_short->u1_buf_id);
+ ps_next_dpb->ps_prev_short->ps_pic_buf = NULL;
+ ps_next_dpb->ps_prev_short = NULL;
+ }
+ }
+ }
+ else
+ {
+ if(ps_dpb_mgr->u1_num_st_ref_bufs)
+ {
+ ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr,
+ ps_next_dpb->i4_frame_num,
+ &u1_del_node);
+ if(ret != OK)
+ return ret;
+ if((ps_next_dpb->i4_frame_num != (WORD32)u4_cur_pic_num)
+ && u1_del_node)
+ {
+ ps_dpb_mgr->u1_num_st_ref_bufs--;
+ ps_next_dpb->u1_used_as_ref = FALSE;
+ ps_next_dpb->s_top_field.u1_reference_info = UNUSED_FOR_REF;
+ ps_next_dpb->s_bot_field.u1_reference_info = UNUSED_FOR_REF;
+ ih264d_free_ref_pic_mv_bufs(ps_dpb_mgr->pv_codec_handle,
+ ps_next_dpb->u1_buf_id);
+ ps_next_dpb->ps_pic_buf = NULL;
+ ps_next_dpb = NULL;
+ }
+ }
+ else
+ {
+ ret = ih264d_delete_gap_frm_sliding(ps_dpb_mgr, INVALID_FRAME_NUM, &u1_del_node);
+ if(ret != OK)
+ return ret;
+ if(u1_del_node)
+ return ERROR_DBP_MANAGER_T;
+ }
+ }
+ }
+ return OK;
+}
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_delete_nonref_nondisplay_pics */
+/* */
+/* Description : */
+/* */
+/* */
+/* Inputs : */
+/* Globals : */
+/* Processing : */
+/* */
+/* Outputs : */
+/* Returns : */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 05 06 2007 Varun Draft */
+/* */
+/*****************************************************************************/
+
+void ih264d_delete_nonref_nondisplay_pics(dpb_manager_t *ps_dpb_mgr)
+{
+ WORD8 i;
+ WORD32 (*i4_poc_buf_id_map)[3] = ps_dpb_mgr->ai4_poc_buf_id_map;
+
+ /* remove all gaps marked as unused for ref */
+ for(i = 0; (i < MAX_FRAMES) && ps_dpb_mgr->i1_gaps_deleted; i++)
+ {
+ if(GAP_FRAME_NUM == i4_poc_buf_id_map[i][2])
+ {
+ ps_dpb_mgr->i1_gaps_deleted--;
+ ps_dpb_mgr->i1_poc_buf_id_entries--;
+ i4_poc_buf_id_map[i][0] = -1;
+ i4_poc_buf_id_map[i][1] = 0x7fffffff;
+ i4_poc_buf_id_map[i][2] = 0;
+ }
+ }
+}
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_insert_pic_in_display_list */
+/* */
+/* Description : */
+/* */
+/* */
+/* Inputs : */
+/* Globals : */
+/* Processing : */
+/* */
+/* Outputs : */
+/* Returns : */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 05 06 2007 Varun Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 ih264d_insert_pic_in_display_list(dpb_manager_t *ps_dpb_mgr,
+ UWORD8 u1_buf_id,
+ WORD32 i4_display_poc,
+ UWORD32 u4_frame_num)
+{
+ WORD8 i;
+ WORD32 (*i4_poc_buf_id_map)[3] = ps_dpb_mgr->ai4_poc_buf_id_map;
+
+ for(i = 0; i < MAX_FRAMES; i++)
+ {
+ /* Find an empty slot */
+ if(i4_poc_buf_id_map[i][0] == -1)
+ {
+ if(GAP_FRAME_NUM == i4_poc_buf_id_map[i][2])
+ ps_dpb_mgr->i1_gaps_deleted--;
+ else
+ ps_dpb_mgr->i1_poc_buf_id_entries++;
+
+ i4_poc_buf_id_map[i][0] = u1_buf_id;
+ i4_poc_buf_id_map[i][1] = i4_display_poc;
+ i4_poc_buf_id_map[i][2] = u4_frame_num;
+
+ break;
+ }
+ }
+
+ if(MAX_FRAMES == i)
+ {
+
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_GAPS_IN_FRM_NUM;
+ return i4_error_code;
+ }
+ return OK;
+}
+
diff --git a/decoder/ih264d_error_handler.h b/decoder/ih264d_error_handler.h
new file mode 100755
index 0000000..20c0f89
--- /dev/null
+++ b/decoder/ih264d_error_handler.h
@@ -0,0 +1,115 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _IH264D_ERROR_HANDLER_H_
+#define _IH264D_ERROR_HANDLER_H_
+
+/*!
+ *************************************************************************
+ * \file ih264d_error_handler.h
+ *
+ * \brief
+ * Contains declaration of ih264d_global_error_handler function
+ *
+ * \date
+ * 21/11/2002
+ *
+ * \author AI
+ *************************************************************************
+ */
+
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+
+typedef enum
+{
+
+ ERROR_MEM_ALLOC_ISRAM_T = 0x50,
+ ERROR_MEM_ALLOC_SDRAM_T = 0x51,
+ ERROR_BUF_MGR = 0x52,
+ ERROR_DBP_MANAGER_T = 0x53,
+ ERROR_GAPS_IN_FRM_NUM = 0x54,
+ ERROR_UNKNOWN_NAL = 0x55,
+ ERROR_INV_MB_SLC_GRP_T = 0x56,
+ ERROR_MULTIPLE_SLC_GRP_T = 0x57,
+ ERROR_UNKNOWN_LEVEL = 0x58,
+ ERROR_FEATURE_UNAVAIL = 0x59,
+ ERROR_NOT_SUPP_RESOLUTION = 0x5A,
+ ERROR_INVALID_PIC_PARAM = 0x5B,
+ ERROR_INVALID_SEQ_PARAM = 0x5C,
+ ERROR_EGC_EXCEED_32_1_T = 0x5D,
+ ERROR_EGC_EXCEED_32_2_T = 0x5E,
+ ERROR_INV_RANGE_TEV_T = 0x5F,
+ ERROR_INV_SLC_TYPE_T = 0x60,
+ ERROR_UNAVAIL_PICBUF_T = 0x61,
+ ERROR_UNAVAIL_MVBUF_T = 0x62,
+ ERROR_UNAVAIL_DISPBUF_T = 0x63,
+ ERROR_INV_POC_TYPE_T = 0x64,
+ ERROR_PIC1_NOT_FOUND_T = 0x65,
+ ERROR_PIC0_NOT_FOUND_T = 0x66,
+ ERROR_NUM_REF = 0x67,
+ ERROR_REFIDX_ORDER_T = 0x68,
+ ERROR_EOB_FLUSHBITS_T = 0x69,
+ ERROR_EOB_GETBITS_T = 0x6A,
+ ERROR_EOB_GETBIT_T = 0x6B,
+ ERROR_EOB_BYPASS_T = 0x6C,
+ ERROR_EOB_DECISION_T = 0x6D,
+ ERROR_EOB_TERMINATE_T = 0x6E,
+ ERROR_EOB_READCOEFF4X4CAB_T = 0x6F,
+ ERROR_INV_RANGE_QP_T = 0x70,
+ ERROR_END_OF_FRAME_EXPECTED_T = 0x71,
+ ERROR_MB_TYPE = 0x72,
+ ERROR_SUB_MB_TYPE = 0x73,
+ ERROR_CBP = 0x74,
+ ERROR_REF_IDX = 0x75,
+ ERROR_NUM_MV = 0x76,
+ ERROR_CHROMA_PRED_MODE = 0x77,
+ ERROR_INTRAPRED = 0x78,
+ ERROR_NEXT_MB_ADDRESS_T = 0x79,
+ ERROR_MB_ADDRESS_T = 0x7A,
+ ERROR_MB_GROUP_ASSGN_T = 0x7B,
+ ERROR_CAVLC_NUM_COEFF_T = 0x7C,
+ ERROR_CAVLC_SCAN_POS_T = 0x7D,
+ ERROR_CABAC_RENORM_T = 0x7E,
+ ERROR_CABAC_SIG_COEFF1_T = 0x7F,
+ ERROR_CABAC_SIG_COEFF2_T = 0x80,
+ ERROR_CABAC_ENCODE_COEFF_T = 0x81,
+ ERROR_INV_SPS_PPS_T = 0x82,
+ ERROR_INV_SLICE_HDR_T = 0x83,
+ ERROR_PRED_WEIGHT_TABLE_T = 0x84,
+ IH264D_VERS_BUF_INSUFFICIENT = 0x85,
+ ERROR_ACTUAL_LEVEL_GREATER_THAN_INIT = 0x86,
+ ERROR_CORRUPTED_SLICE = 0x87,
+ ERROR_FRAME_LIMIT_OVER = 0x88,
+ ERROR_ACTUAL_RESOLUTION_GREATER_THAN_INIT = 0x89,
+ ERROR_PROFILE_NOT_SUPPORTED = 0x8A,
+ ERROR_DISP_WIDTH_RESET_TO_PIC_WIDTH = 0x8B,
+ ERROR_DISP_WIDTH_INVALID = 0x8C,
+ ERROR_DANGLING_FIELD_IN_PIC = 0x8D,
+ ERROR_DYNAMIC_RESOLUTION_NOT_SUPPORTED = 0x8E,
+ ERROR_INIT_NOT_DONE = 0x8F,
+ ERROR_LEVEL_UNSUPPORTED = 0x90,
+ ERROR_START_CODE_NOT_FOUND = 0x91,
+ ERROR_PIC_NUM_IS_REPEATED = 0x92,
+
+} h264_decoder_error_code_t;
+
+#endif /* _IH264D_ERROR_HANDLER_H_ */
diff --git a/decoder/ih264d_format_conv.c b/decoder/ih264d_format_conv.c
new file mode 100755
index 0000000..9a8494e
--- /dev/null
+++ b/decoder/ih264d_format_conv.c
@@ -0,0 +1,838 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* */
+/* File Name : ih264d_format_conv.c */
+/* */
+/* Description : Contains functions needed to convert the images in */
+/* different color spaces to yuv 422i color space */
+/* */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 28 08 2007 Naveen Kumar T Draft */
+/* */
+/*****************************************************************************/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <string.h>
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_structs.h"
+#include "ih264d_format_conv.h"
+#include "ih264d_defs.h"
+
+
+
+#ifdef LOGO_EN
+#include "ih264d_ittiam_logo.h"
+#define INSERT_LOGO(pu1_buf_y,pu1_buf_u,pu1_buf_v, u4_stride, u4_x_pos, u4_y_pos, u4_yuv_fmt, u4_disp_wd, u4_disp_ht) \
+ ih264d_insert_logo(pu1_buf_y,pu1_buf_u,pu1_buf_v, u4_stride,\
+ u4_x_pos, u4_y_pos, u4_yuv_fmt, u4_disp_wd, u4_disp_ht)
+#else
+#define INSERT_LOGO(pu1_buf_y,pu1_buf_u,pu1_buf_v, u4_stride, u4_x_pos, u4_y_pos, u4_yuv_fmt, u4_disp_wd, u4_disp_ht)
+#endif
+
+/**
+ *******************************************************************************
+ *
+ * @brief Function used from copying a 420SP buffer
+ *
+ * @par Description
+ * Function used from copying a 420SP buffer
+ *
+ * @param[in] pu1_y_src
+ * Input Y pointer
+ *
+ * @param[in] pu1_uv_src
+ * Input UV pointer (UV is interleaved either in UV or VU format)
+ *
+ * @param[in] pu1_y_dst
+ * Output Y pointer
+ *
+ * @param[in] pu1_uv_dst
+ * Output UV pointer (UV is interleaved in the same format as that of input)
+ *
+ * @param[in] wd
+ * Width
+ *
+ * @param[in] ht
+ * Height
+ *
+ * @param[in] src_y_strd
+ * Input Y Stride
+ *
+ * @param[in] src_uv_strd
+ * Input UV stride
+ *
+ * @param[in] dst_y_strd
+ * Output Y stride
+ *
+ * @param[in] dst_uv_strd
+ * Output UV stride
+ *
+ * @returns None
+ *
+ * @remarks In case there is a need to perform partial frame copy then
+ * by passion appropriate source and destination pointers and appropriate
+ * values for wd and ht it can be done
+ *
+ *******************************************************************************
+ */
+void ih264d_fmt_conv_420sp_to_rgb565(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD16 *pu2_rgb_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_strd,
+ WORD32 is_u_first)
+{
+
+ WORD16 i2_r, i2_g, i2_b;
+ UWORD32 u4_r, u4_g, u4_b;
+ WORD16 i2_i, i2_j;
+ UWORD8 *pu1_y_src_nxt;
+ UWORD16 *pu2_rgb_dst_next_row;
+
+ UWORD8 *pu1_u_src, *pu1_v_src;
+
+ if(is_u_first)
+ {
+ pu1_u_src = (UWORD8 *)pu1_uv_src;
+ pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
+ }
+ else
+ {
+ pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
+ pu1_v_src = (UWORD8 *)pu1_uv_src;
+ }
+
+ pu1_y_src_nxt = pu1_y_src + src_y_strd;
+ pu2_rgb_dst_next_row = pu2_rgb_dst + dst_strd;
+
+ for(i2_i = 0; i2_i < (ht >> 1); i2_i++)
+ {
+ for(i2_j = (wd >> 1); i2_j > 0; i2_j--)
+ {
+ i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
+ i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3)
+ >> 13;
+ i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
+
+ pu1_u_src += 2;
+ pu1_v_src += 2;
+ /* pixel 0 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src + i2_b);
+ u4_b >>= 3;
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src + i2_g);
+ u4_g >>= 2;
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src + i2_r);
+ u4_r >>= 3;
+
+ pu1_y_src++;
+ *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
+
+ /* pixel 1 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src + i2_b);
+ u4_b >>= 3;
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src + i2_g);
+ u4_g >>= 2;
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src + i2_r);
+ u4_r >>= 3;
+
+ pu1_y_src++;
+ *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
+
+ /* pixel 2 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
+ u4_b >>= 3;
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
+ u4_g >>= 2;
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
+ u4_r >>= 3;
+
+ pu1_y_src_nxt++;
+ *pu2_rgb_dst_next_row++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
+
+ /* pixel 3 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
+ u4_b >>= 3;
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
+ u4_g >>= 2;
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
+ u4_r >>= 3;
+
+ pu1_y_src_nxt++;
+ *pu2_rgb_dst_next_row++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
+
+ }
+
+ pu1_u_src = pu1_u_src + src_uv_strd - wd;
+ pu1_v_src = pu1_v_src + src_uv_strd - wd;
+
+ pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
+ pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
+
+ pu2_rgb_dst = pu2_rgb_dst_next_row - wd + dst_strd;
+ pu2_rgb_dst_next_row = pu2_rgb_dst_next_row + (dst_strd << 1) - wd;
+ }
+
+}
+
+void ih264d_fmt_conv_420sp_to_rgba8888(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD32 *pu4_rgba_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_strd,
+ WORD32 is_u_first)
+{
+
+ WORD16 i2_r, i2_g, i2_b;
+ UWORD32 u4_r, u4_g, u4_b;
+ WORD16 i2_i, i2_j;
+ UWORD8 *pu1_y_src_nxt;
+ UWORD32 *pu4_rgba_dst_next_row;
+
+ UWORD8 *pu1_u_src, *pu1_v_src;
+
+ if(is_u_first)
+ {
+ pu1_u_src = (UWORD8 *)pu1_uv_src;
+ pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
+ }
+ else
+ {
+ pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
+ pu1_v_src = (UWORD8 *)pu1_uv_src;
+ }
+
+ pu1_y_src_nxt = pu1_y_src + src_y_strd;
+ pu4_rgba_dst_next_row = pu4_rgba_dst + dst_strd;
+
+ for(i2_i = 0; i2_i < (ht >> 1); i2_i++)
+ {
+ for(i2_j = (wd >> 1); i2_j > 0; i2_j--)
+ {
+ i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
+ i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3)
+ >> 13;
+ i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
+
+ pu1_u_src += 2;
+ pu1_v_src += 2;
+ /* pixel 0 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src + i2_b);
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src + i2_g);
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src + i2_r);
+
+ pu1_y_src++;
+ *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
+
+ /* pixel 1 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src + i2_b);
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src + i2_g);
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src + i2_r);
+
+ pu1_y_src++;
+ *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
+
+ /* pixel 2 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
+
+ pu1_y_src_nxt++;
+ *pu4_rgba_dst_next_row++ =
+ ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
+
+ /* pixel 3 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
+
+ pu1_y_src_nxt++;
+ *pu4_rgba_dst_next_row++ =
+ ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
+
+ }
+
+ pu1_u_src = pu1_u_src + src_uv_strd - wd;
+ pu1_v_src = pu1_v_src + src_uv_strd - wd;
+
+ pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
+ pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
+
+ pu4_rgba_dst = pu4_rgba_dst_next_row - wd + dst_strd;
+ pu4_rgba_dst_next_row = pu4_rgba_dst_next_row + (dst_strd << 1) - wd;
+ }
+
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief Function used from copying a 420SP buffer
+ *
+ * @par Description
+ * Function used from copying a 420SP buffer
+ *
+ * @param[in] pu1_y_src
+ * Input Y pointer
+ *
+ * @param[in] pu1_uv_src
+ * Input UV pointer (UV is interleaved either in UV or VU format)
+ *
+ * @param[in] pu1_y_dst
+ * Output Y pointer
+ *
+ * @param[in] pu1_uv_dst
+ * Output UV pointer (UV is interleaved in the same format as that of input)
+ *
+ * @param[in] wd
+ * Width
+ *
+ * @param[in] ht
+ * Height
+ *
+ * @param[in] src_y_strd
+ * Input Y Stride
+ *
+ * @param[in] src_uv_strd
+ * Input UV stride
+ *
+ * @param[in] dst_y_strd
+ * Output Y stride
+ *
+ * @param[in] dst_uv_strd
+ * Output UV stride
+ *
+ * @returns None
+ *
+ * @remarks In case there is a need to perform partial frame copy then
+ * by passion appropriate source and destination pointers and appropriate
+ * values for wd and ht it can be done
+ *
+ *******************************************************************************
+ */
+
+void ih264d_fmt_conv_420sp_to_420sp(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_uv_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_y_strd,
+ WORD32 dst_uv_strd)
+{
+ UWORD8 *pu1_src, *pu1_dst;
+ WORD32 num_rows, num_cols, src_strd, dst_strd;
+ WORD32 i;
+
+ /* copy luma */
+ pu1_src = (UWORD8 *)pu1_y_src;
+ pu1_dst = (UWORD8 *)pu1_y_dst;
+
+ num_rows = ht;
+ num_cols = wd;
+
+ src_strd = src_y_strd;
+ dst_strd = dst_y_strd;
+
+ for(i = 0; i < num_rows; i++)
+ {
+ memcpy(pu1_dst, pu1_src, num_cols);
+ pu1_dst += dst_strd;
+ pu1_src += src_strd;
+ }
+
+ /* copy U and V */
+ pu1_src = (UWORD8 *)pu1_uv_src;
+ pu1_dst = (UWORD8 *)pu1_uv_dst;
+
+ num_rows = ht >> 1;
+ num_cols = wd;
+
+ src_strd = src_uv_strd;
+ dst_strd = dst_uv_strd;
+
+ for(i = 0; i < num_rows; i++)
+ {
+ memcpy(pu1_dst, pu1_src, num_cols);
+ pu1_dst += dst_strd;
+ pu1_src += src_strd;
+ }
+ return;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief Function used from copying a 420SP buffer
+ *
+ * @par Description
+ * Function used from copying a 420SP buffer
+ *
+ * @param[in] pu1_y_src
+ * Input Y pointer
+ *
+ * @param[in] pu1_uv_src
+ * Input UV pointer (UV is interleaved either in UV or VU format)
+ *
+ * @param[in] pu1_y_dst
+ * Output Y pointer
+ *
+ * @param[in] pu1_uv_dst
+ * Output UV pointer (UV is interleaved in the same format as that of input)
+ *
+ * @param[in] wd
+ * Width
+ *
+ * @param[in] ht
+ * Height
+ *
+ * @param[in] src_y_strd
+ * Input Y Stride
+ *
+ * @param[in] src_uv_strd
+ * Input UV stride
+ *
+ * @param[in] dst_y_strd
+ * Output Y stride
+ *
+ * @param[in] dst_uv_strd
+ * Output UV stride
+ *
+ * @returns None
+ *
+ * @remarks In case there is a need to perform partial frame copy then
+ * by passion appropriate source and destination pointers and appropriate
+ * values for wd and ht it can be done
+ *
+ *******************************************************************************
+ */
+void ih264d_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_uv_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_y_strd,
+ WORD32 dst_uv_strd)
+{
+ UWORD8 *pu1_src, *pu1_dst;
+ WORD32 num_rows, num_cols, src_strd, dst_strd;
+ WORD32 i;
+
+ /* copy luma */
+ pu1_src = (UWORD8 *)pu1_y_src;
+ pu1_dst = (UWORD8 *)pu1_y_dst;
+
+ num_rows = ht;
+ num_cols = wd;
+
+ src_strd = src_y_strd;
+ dst_strd = dst_y_strd;
+
+ for(i = 0; i < num_rows; i++)
+ {
+ memcpy(pu1_dst, pu1_src, num_cols);
+ pu1_dst += dst_strd;
+ pu1_src += src_strd;
+ }
+
+ /* copy U and V */
+ pu1_src = (UWORD8 *)pu1_uv_src;
+ pu1_dst = (UWORD8 *)pu1_uv_dst;
+
+ num_rows = ht >> 1;
+ num_cols = wd;
+
+ src_strd = src_uv_strd;
+ dst_strd = dst_uv_strd;
+
+ for(i = 0; i < num_rows; i++)
+ {
+ WORD32 j;
+ for(j = 0; j < num_cols; j += 2)
+ {
+ pu1_dst[j + 0] = pu1_src[j + 1];
+ pu1_dst[j + 1] = pu1_src[j + 0];
+ }
+ pu1_dst += dst_strd;
+ pu1_src += src_strd;
+ }
+ return;
+}
+/**
+ *******************************************************************************
+ *
+ * @brief Function used from copying a 420SP buffer
+ *
+ * @par Description
+ * Function used from copying a 420SP buffer
+ *
+ * @param[in] pu1_y_src
+ * Input Y pointer
+ *
+ * @param[in] pu1_uv_src
+ * Input UV pointer (UV is interleaved either in UV or VU format)
+ *
+ * @param[in] pu1_y_dst
+ * Output Y pointer
+ *
+ * @param[in] pu1_u_dst
+ * Output U pointer
+ *
+ * @param[in] pu1_v_dst
+ * Output V pointer
+ *
+ * @param[in] wd
+ * Width
+ *
+ * @param[in] ht
+ * Height
+ *
+ * @param[in] src_y_strd
+ * Input Y Stride
+ *
+ * @param[in] src_uv_strd
+ * Input UV stride
+ *
+ * @param[in] dst_y_strd
+ * Output Y stride
+ *
+ * @param[in] dst_uv_strd
+ * Output UV stride
+ *
+ * @param[in] is_u_first
+ * Flag to indicate if U is the first byte in input chroma part
+ *
+ * @returns none
+ *
+ * @remarks In case there is a need to perform partial frame copy then
+ * by passion appropriate source and destination pointers and appropriate
+ * values for wd and ht it can be done
+ *
+ *******************************************************************************
+ */
+
+void ih264d_fmt_conv_420sp_to_420p(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_u_dst,
+ UWORD8 *pu1_v_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_y_strd,
+ WORD32 dst_uv_strd,
+ WORD32 is_u_first,
+ WORD32 disable_luma_copy)
+{
+ UWORD8 *pu1_src, *pu1_dst;
+ UWORD8 *pu1_u_src, *pu1_v_src;
+ WORD32 num_rows, num_cols, src_strd, dst_strd;
+ WORD32 i, j;
+
+ if(0 == disable_luma_copy)
+ {
+ /* copy luma */
+ pu1_src = (UWORD8 *)pu1_y_src;
+ pu1_dst = (UWORD8 *)pu1_y_dst;
+
+ num_rows = ht;
+ num_cols = wd;
+
+ src_strd = src_y_strd;
+ dst_strd = dst_y_strd;
+
+ for(i = 0; i < num_rows; i++)
+ {
+ memcpy(pu1_dst, pu1_src, num_cols);
+ pu1_dst += dst_strd;
+ pu1_src += src_strd;
+ }
+ }
+ /* de-interleave U and V and copy to destination */
+ if(is_u_first)
+ {
+ pu1_u_src = (UWORD8 *)pu1_uv_src;
+ pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
+ }
+ else
+ {
+ pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
+ pu1_v_src = (UWORD8 *)pu1_uv_src;
+ }
+
+ num_rows = ht >> 1;
+ num_cols = wd >> 1;
+
+ src_strd = src_uv_strd;
+ dst_strd = dst_uv_strd;
+
+ for(i = 0; i < num_rows; i++)
+ {
+ for(j = 0; j < num_cols; j++)
+ {
+ pu1_u_dst[j] = pu1_u_src[j * 2];
+ pu1_v_dst[j] = pu1_v_src[j * 2];
+ }
+
+ pu1_u_dst += dst_strd;
+ pu1_v_dst += dst_strd;
+ pu1_u_src += src_strd;
+ pu1_v_src += src_strd;
+ }
+ return;
+}
+
+/*****************************************************************************/
+/* Function Name : ih264d_format_convert */
+/* */
+/* Description : Implements format conversion/frame copy */
+/* Inputs : ps_dec - Decoder parameters */
+/* Globals : None */
+/* Processing : Refer bumping process in the standard */
+/* Outputs : Assigns display sequence number. */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 27 04 2005 NS Draft */
+/* */
+/*****************************************************************************/
+void ih264d_format_convert(dec_struct_t *ps_dec,
+ ivd_get_display_frame_op_t *pv_disp_op,
+ UWORD32 u4_start_y,
+ UWORD32 u4_num_rows_y)
+{
+ UWORD32 convert_uv_only = 0;
+ iv_yuv_buf_t *ps_op_frm;
+
+ if(1 == pv_disp_op->u4_error_code)
+ return;
+
+ ps_op_frm = &(ps_dec->s_disp_frame_info);
+
+ /* Requires u4_start_y and u4_num_rows_y to be even */
+ if(u4_start_y & 1)
+ {
+ H264_DEC_DEBUG_PRINT(
+ "Requires even number of rows and even u4_start_y for format conversion\n");
+ return;
+ }
+
+ if((1 == ps_dec->u4_share_disp_buf)
+ && ((pv_disp_op->e_output_format == IV_YUV_420SP_UV)))
+ {
+ return;
+ }
+ if(pv_disp_op->e_output_format == IV_YUV_420P)
+ {
+ UWORD8 *pu1_src, *pu1_dst;
+ UWORD16 i;
+ UWORD16 iter;
+
+ IV_COLOR_FORMAT_T e_output_format = pv_disp_op->e_output_format;
+ UWORD32 start_uv = u4_start_y >> 1;
+ UWORD32 num_rows_uv = (u4_num_rows_y + 1) >> 1;
+ if(0 == ps_dec->u4_share_disp_buf)
+ {
+ convert_uv_only = 0;
+ }
+ else
+ {
+ convert_uv_only = 1;
+ }
+ {
+
+ UWORD8 *pu1_y_src, *pu1_u_src, *pu1_v_src;
+ UWORD8 *pu1_y_dst, *pu1_u_dst, *pu1_v_dst;
+ UWORD32 width, height;
+ UWORD32 src_luma_stride, src_chroma_stride;
+ UWORD32 dst_luma_stride, dst_chroma_stride;
+
+ pu1_y_src = (UWORD8 *)ps_op_frm->pv_y_buf;
+ pu1_y_src += u4_start_y * ps_op_frm->u4_y_strd;
+
+ pu1_y_dst = (UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_y_buf;
+ pu1_y_dst += u4_start_y * pv_disp_op->s_disp_frm_buf.u4_y_strd;
+
+ pu1_u_src = (UWORD8 *)ps_op_frm->pv_u_buf;
+ pu1_u_src += start_uv * ps_op_frm->u4_u_strd;
+
+ pu1_u_dst = (UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_u_buf;
+ pu1_u_dst += start_uv * pv_disp_op->s_disp_frm_buf.u4_u_strd;
+
+ pu1_v_src = (UWORD8 *)ps_op_frm->pv_v_buf;
+ pu1_v_src += start_uv * ps_op_frm->u4_v_strd;
+
+ pu1_v_dst = (UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_v_buf;
+ pu1_v_dst += start_uv * pv_disp_op->s_disp_frm_buf.u4_v_strd;
+
+ src_luma_stride = ps_op_frm->u4_y_strd;
+ src_chroma_stride = ps_op_frm->u4_u_strd;
+
+ dst_luma_stride = pv_disp_op->s_disp_frm_buf.u4_y_strd;
+ dst_chroma_stride = pv_disp_op->s_disp_frm_buf.u4_u_strd;
+
+ width = ps_op_frm->u4_y_wd;
+ height = u4_num_rows_y;
+ ih264d_fmt_conv_420sp_to_420p(pu1_y_src, pu1_u_src, pu1_y_dst,
+ pu1_u_dst, pu1_v_dst, width, height,
+ src_luma_stride, src_chroma_stride,
+ dst_luma_stride, dst_chroma_stride, 1,
+ convert_uv_only);
+ }
+
+ }
+
+ else if((pv_disp_op->e_output_format == IV_YUV_420SP_UV)
+ || (pv_disp_op->e_output_format == IV_YUV_420SP_VU))
+
+ {
+
+ UWORD32 start_uv = u4_start_y >> 1;
+ UWORD32 num_rows_uv = (u4_num_rows_y + 1) >> 1;
+
+
+ if(pv_disp_op->e_output_format == IV_YUV_420SP_UV)
+ {
+ ih264d_fmt_conv_420sp_to_420sp(
+ (UWORD8 *)ps_op_frm->pv_y_buf
+ + u4_start_y * ps_op_frm->u4_y_strd,
+ ((UWORD8 *)ps_op_frm->pv_u_buf
+ + start_uv * ps_op_frm->u4_u_strd),
+ ((UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_y_buf
+ + u4_start_y
+ * pv_disp_op->s_disp_frm_buf.u4_y_strd),
+ ((UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_u_buf
+ + start_uv
+ * pv_disp_op->s_disp_frm_buf.u4_u_strd),
+ ps_op_frm->u4_y_wd, u4_num_rows_y,
+ ps_op_frm->u4_y_strd, ps_op_frm->u4_u_strd,
+ pv_disp_op->s_disp_frm_buf.u4_y_strd,
+ pv_disp_op->s_disp_frm_buf.u4_u_strd);
+ }
+ else
+ {
+
+ ih264d_fmt_conv_420sp_to_420sp_swap_uv(
+ (UWORD8 *)ps_op_frm->pv_y_buf
+ + u4_start_y * ps_op_frm->u4_y_strd,
+ ((UWORD8 *)ps_op_frm->pv_u_buf
+ + start_uv * ps_op_frm->u4_u_strd),
+ ((UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_y_buf
+ + u4_start_y
+ * pv_disp_op->s_disp_frm_buf.u4_y_strd),
+ ((UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_u_buf
+ + start_uv
+ * pv_disp_op->s_disp_frm_buf.u4_u_strd),
+ ps_op_frm->u4_y_wd, u4_num_rows_y,
+ ps_op_frm->u4_y_strd, ps_op_frm->u4_u_strd,
+ pv_disp_op->s_disp_frm_buf.u4_y_strd,
+ pv_disp_op->s_disp_frm_buf.u4_u_strd);
+
+ }
+
+ }
+ else if(pv_disp_op->e_output_format == IV_RGB_565)
+ {
+ UWORD32 temp = 0;
+ UWORD32 u2_width_rem;
+
+ UWORD32 start_uv = u4_start_y >> 1;
+
+ ih264d_fmt_conv_420sp_to_rgb565(
+ (UWORD8 *)ps_op_frm->pv_y_buf
+ + u4_start_y * ps_op_frm->u4_y_strd,
+ ((UWORD8 *)ps_op_frm->pv_u_buf
+ + start_uv * ps_op_frm->u4_u_strd),
+ ((UWORD16 *)pv_disp_op->s_disp_frm_buf.pv_y_buf
+ + u4_start_y
+ * pv_disp_op->s_disp_frm_buf.u4_y_strd),
+ ps_op_frm->u4_y_wd, u4_num_rows_y, ps_op_frm->u4_y_strd,
+ ps_op_frm->u4_u_strd,
+ pv_disp_op->s_disp_frm_buf.u4_y_strd, 1);
+
+
+ }
+
+ if((u4_start_y + u4_num_rows_y) >= ps_dec->s_disp_frame_info.u4_y_ht)
+ {
+
+ INSERT_LOGO(pv_disp_op->s_disp_frm_buf.pv_y_buf,
+ pv_disp_op->s_disp_frm_buf.pv_u_buf,
+ pv_disp_op->s_disp_frm_buf.pv_v_buf, pv_disp_op->s_disp_frm_buf.u4_y_strd,
+ ps_dec->u2_disp_width,
+ ps_dec->u2_disp_height,
+ pv_disp_op->e_output_format,
+ ps_op_frm->u4_y_wd,
+ ps_op_frm->u4_y_ht);
+ }
+
+ return;
+}
diff --git a/decoder/ih264d_format_conv.h b/decoder/ih264d_format_conv.h
new file mode 100755
index 0000000..81a8a0f
--- /dev/null
+++ b/decoder/ih264d_format_conv.h
@@ -0,0 +1,120 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* */
+/* File Name : ih264d_format_conv.h */
+/* */
+/* Description : Contains coefficients and constant reqquired for */
+/* converting from rgb and gray color spaces to yuv422i */
+/* color space */
+/* */
+/* List of Functions : None */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 27 08 2007 Naveen Kumar T Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef _IH264D_FORMAT_CONV_H_
+#define _IH264D_FORMAT_CONV_H_
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+#define COEFF_0_Y 66
+#define COEFF_1_Y 129
+#define COEFF_2_Y 25
+#define COEFF_0_U -38
+#define COEFF_1_U -75
+#define COEFF_2_U 112
+#define COEFF_0_V 112
+#define COEFF_1_V -94
+#define COEFF_2_V -18
+#define CONST_RGB_YUV1 4096
+#define CONST_RGB_YUV2 32768
+#define CONST_GRAY_YUV 128
+#define COEF_2_V2_U 0xFFEE0070
+
+#define COF_2Y_0Y 0X00190042
+#define COF_1U_0U 0XFFB5FFDA
+#define COF_1V_0V 0XFFA20070
+
+void ih264d_fmt_conv_420sp_to_420p(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_u_dst,
+ UWORD8 *pu1_v_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_y_strd,
+ WORD32 dst_uv_strd,
+ WORD32 is_u_first,
+ WORD32 disable_luma_copy);
+
+void ih264d_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_uv_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_y_strd,
+ WORD32 dst_uv_strd);
+
+void ih264d_fmt_conv_420sp_to_420sp(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_uv_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_y_strd,
+ WORD32 dst_uv_strd);
+
+void ih264d_fmt_conv_420sp_to_rgb565(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD16 *pu2_rgb_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_strd,
+ WORD32 is_u_first);
+#define COEFF1 13073
+#define COEFF2 -3207
+#define COEFF3 -6664
+#define COEFF4 16530
+
+void ih264d_format_convert(dec_struct_t *ps_dec,
+ ivd_get_display_frame_op_t *pv_disp_op,
+ UWORD32 u4_start_y,
+ UWORD32 u4_num_rows_y);
+
+
+#endif /* _IH264D_FORMAT_CONV_H_ */
diff --git a/decoder/ih264d_function_selector.h b/decoder/ih264d_function_selector.h
new file mode 100755
index 0000000..92ad959
--- /dev/null
+++ b/decoder/ih264d_function_selector.h
@@ -0,0 +1,75 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ * ih264d_function_selector.h
+ *
+ * @brief
+ * Structure definitions used in the decoder
+ *
+ * @author
+ * Harish
+ *
+ * @par List of Functions:
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+#ifndef _IH264D_FUNCTION_SELECTOR_H_
+#define _IH264D_FUNCTION_SELECTOR_H_
+
+#define D_ARCH_NA 1
+#define D_ARCH_ARM_NONEON 2
+#define D_ARCH_ARM_A9Q 3
+#define D_ARCH_ARM_A9A 4
+#define D_ARCH_ARM_A9 5
+#define D_ARCH_ARM_A7 6
+#define D_ARCH_ARM_A5 7
+#define D_ARCH_ARM_A15 8
+#define D_ARCH_ARM_NEONINTR 9
+#define D_ARCH_ARMV8_GENERIC 10
+#define D_ARCH_X86_GENERIC 11
+#define D_ARCH_X86_SSSE3 12
+#define D_ARCH_X86_SSE42 13
+#define D_ARCH_X86_AVX2 14
+#define D_ARCH_MIPS_GENERIC 15
+#define D_ARCH_MIPS_32 16
+
+void ih264d_init_arch(dec_struct_t *ps_codec);
+
+void ih264d_init_function_ptr(dec_struct_t *ps_codec);
+
+void ih264d_init_function_ptr_generic(dec_struct_t *ps_codec);
+void ih264d_init_function_ptr_ssse3(dec_struct_t *ps_codec);
+void ih264d_init_function_ptr_sse42(dec_struct_t *ps_codec);
+
+#ifndef DISABLE_AVX2
+void ih264d_init_function_ptr_avx2(dec_struct_t *ps_codec);
+#endif
+
+void ih264d_init_function_ptr_a9q(dec_struct_t *ps_codec);
+void ih264d_init_function_ptr_av8(dec_struct_t *ps_codec);
+
+#endif /* _IH264D_FUNCTION_SELECTOR_H_ */
diff --git a/decoder/ih264d_function_selector_generic.c b/decoder/ih264d_function_selector_generic.c
new file mode 100755
index 0000000..48956ef
--- /dev/null
+++ b/decoder/ih264d_function_selector_generic.c
@@ -0,0 +1,222 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_function_selector_generic.c
+ *
+ * @brief
+ * Contains functions to initialize function pointers of codec context
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ * - ih264e_init_function_ptr_generic
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv.h"
+#include "ivd.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264_error.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+
+#include "ih264d_structs.h"
+#include "ih264d_function_selector.h"
+
+/**
+ *******************************************************************************
+ *
+ * @brief Initialize the intra/inter/transform/deblk function pointers of
+ * codec context
+ *
+ * @par Description: the current routine initializes the function pointers of
+ * codec context basing on the architecture in use
+ *
+ * @param[in] ps_codec
+ * Codec context pointer
+ *
+ * @returns none
+ *
+ * @remarks none
+ *
+ *******************************************************************************
+ */
+void ih264d_init_function_ptr_generic(dec_struct_t *ps_codec)
+{
+
+ WORD32 i = 0;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 16x16 */
+ ps_codec->apf_intra_pred_luma_16x16[0] =
+ ih264_intra_pred_luma_16x16_mode_vert;
+ ps_codec->apf_intra_pred_luma_16x16[1] =
+ ih264_intra_pred_luma_16x16_mode_horz;
+ ps_codec->apf_intra_pred_luma_16x16[2] =
+ ih264_intra_pred_luma_16x16_mode_dc;
+ ps_codec->apf_intra_pred_luma_16x16[3] =
+ ih264_intra_pred_luma_16x16_mode_plane;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 4x4 */
+ ps_codec->apf_intra_pred_luma_4x4[0] = ih264_intra_pred_luma_4x4_mode_vert;
+ ps_codec->apf_intra_pred_luma_4x4[1] = ih264_intra_pred_luma_4x4_mode_horz;
+ ps_codec->apf_intra_pred_luma_4x4[2] = ih264_intra_pred_luma_4x4_mode_dc;
+ ps_codec->apf_intra_pred_luma_4x4[3] =
+ ih264_intra_pred_luma_4x4_mode_diag_dl;
+ ps_codec->apf_intra_pred_luma_4x4[4] =
+ ih264_intra_pred_luma_4x4_mode_diag_dr;
+ ps_codec->apf_intra_pred_luma_4x4[5] =
+ ih264_intra_pred_luma_4x4_mode_vert_r;
+ ps_codec->apf_intra_pred_luma_4x4[6] =
+ ih264_intra_pred_luma_4x4_mode_horz_d;
+ ps_codec->apf_intra_pred_luma_4x4[7] =
+ ih264_intra_pred_luma_4x4_mode_vert_l;
+ ps_codec->apf_intra_pred_luma_4x4[8] =
+ ih264_intra_pred_luma_4x4_mode_horz_u;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_luma_8x8[0] = ih264_intra_pred_luma_8x8_mode_vert;
+ ps_codec->apf_intra_pred_luma_8x8[1] = ih264_intra_pred_luma_8x8_mode_horz;
+ ps_codec->apf_intra_pred_luma_8x8[2] = ih264_intra_pred_luma_8x8_mode_dc;
+ ps_codec->apf_intra_pred_luma_8x8[3] =
+ ih264_intra_pred_luma_8x8_mode_diag_dl;
+ ps_codec->apf_intra_pred_luma_8x8[4] =
+ ih264_intra_pred_luma_8x8_mode_diag_dr;
+ ps_codec->apf_intra_pred_luma_8x8[5] =
+ ih264_intra_pred_luma_8x8_mode_vert_r;
+ ps_codec->apf_intra_pred_luma_8x8[6] =
+ ih264_intra_pred_luma_8x8_mode_horz_d;
+ ps_codec->apf_intra_pred_luma_8x8[7] =
+ ih264_intra_pred_luma_8x8_mode_vert_l;
+ ps_codec->apf_intra_pred_luma_8x8[8] =
+ ih264_intra_pred_luma_8x8_mode_horz_u;
+
+ ps_codec->pf_intra_pred_ref_filtering =
+ ih264_intra_pred_luma_8x8_mode_ref_filtering;
+
+ /* Init function pointers for intra pred leaf level functions chroma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_chroma[0] = ih264_intra_pred_chroma_8x8_mode_vert;
+ ps_codec->apf_intra_pred_chroma[1] = ih264_intra_pred_chroma_8x8_mode_horz;
+ ps_codec->apf_intra_pred_chroma[2] = ih264_intra_pred_chroma_8x8_mode_dc;
+ ps_codec->apf_intra_pred_chroma[3] = ih264_intra_pred_chroma_8x8_mode_plane;
+
+ ps_codec->pf_default_weighted_pred_luma = ih264_default_weighted_pred_luma;
+ ps_codec->pf_default_weighted_pred_chroma =
+ ih264_default_weighted_pred_chroma;
+ ps_codec->pf_weighted_pred_luma = ih264_weighted_pred_luma;
+ ps_codec->pf_weighted_pred_chroma = ih264_weighted_pred_chroma;
+ ps_codec->pf_weighted_bi_pred_luma = ih264_weighted_bi_pred_luma;
+ ps_codec->pf_weighted_bi_pred_chroma = ih264_weighted_bi_pred_chroma;
+
+ /* Padding Functions */
+ ps_codec->pf_pad_top = ih264_pad_top;
+ ps_codec->pf_pad_bottom = ih264_pad_bottom;
+ ps_codec->pf_pad_left_luma = ih264_pad_left_luma;
+ ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma;
+ ps_codec->pf_pad_right_luma = ih264_pad_right_luma;
+ ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma;
+
+ ps_codec->pf_iquant_itrans_recon_luma_4x4 = ih264_iquant_itrans_recon_4x4;
+ ps_codec->pf_iquant_itrans_recon_luma_4x4_dc =
+ ih264_iquant_itrans_recon_4x4_dc;
+ ps_codec->pf_iquant_itrans_recon_luma_8x8 = ih264_iquant_itrans_recon_8x8;
+ ps_codec->pf_iquant_itrans_recon_luma_8x8_dc =
+ ih264_iquant_itrans_recon_8x8_dc;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4 =
+ ih264_iquant_itrans_recon_chroma_4x4;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc =
+ ih264_iquant_itrans_recon_chroma_4x4_dc;
+ ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4;
+
+ /* Init fn ptr luma deblocking */
+ ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4;
+ ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4;
+ ps_codec->pf_deblk_luma_vert_bs4_mbaff = ih264_deblk_luma_vert_bs4_mbaff;
+ ps_codec->pf_deblk_luma_vert_bslt4_mbaff =
+ ih264_deblk_luma_vert_bslt4_mbaff;
+
+ ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4;
+ ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4;
+
+ /* Init fn ptr chroma deblocking */
+ ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4;
+ ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4;
+ ps_codec->pf_deblk_chroma_vert_bs4_mbaff =
+ ih264_deblk_chroma_vert_bs4_mbaff;
+ ps_codec->pf_deblk_chroma_vert_bslt4_mbaff =
+ ih264_deblk_chroma_vert_bslt4_mbaff;
+
+ ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4;
+ ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4;
+
+ /* Inter pred leaf level functions */
+ ps_codec->apf_inter_pred_luma[0] = ih264_inter_pred_luma_copy;
+ ps_codec->apf_inter_pred_luma[1] = ih264_inter_pred_luma_horz_qpel;
+ ps_codec->apf_inter_pred_luma[2] = ih264_inter_pred_luma_horz;
+ ps_codec->apf_inter_pred_luma[3] = ih264_inter_pred_luma_horz_qpel;
+ ps_codec->apf_inter_pred_luma[4] = ih264_inter_pred_luma_vert_qpel;
+ ps_codec->apf_inter_pred_luma[5] =
+ ih264_inter_pred_luma_horz_qpel_vert_qpel;
+ ps_codec->apf_inter_pred_luma[6] =
+ ih264_inter_pred_luma_horz_hpel_vert_qpel;
+ ps_codec->apf_inter_pred_luma[7] =
+ ih264_inter_pred_luma_horz_qpel_vert_qpel;
+ ps_codec->apf_inter_pred_luma[8] = ih264_inter_pred_luma_vert;
+ ps_codec->apf_inter_pred_luma[9] =
+ ih264_inter_pred_luma_horz_qpel_vert_hpel;
+ ps_codec->apf_inter_pred_luma[10] =
+ ih264_inter_pred_luma_horz_hpel_vert_hpel;
+ ps_codec->apf_inter_pred_luma[11] =
+ ih264_inter_pred_luma_horz_qpel_vert_hpel;
+ ps_codec->apf_inter_pred_luma[12] = ih264_inter_pred_luma_vert_qpel;
+ ps_codec->apf_inter_pred_luma[13] =
+ ih264_inter_pred_luma_horz_qpel_vert_qpel;
+ ps_codec->apf_inter_pred_luma[14] =
+ ih264_inter_pred_luma_horz_hpel_vert_qpel;
+ ps_codec->apf_inter_pred_luma[15] =
+ ih264_inter_pred_luma_horz_qpel_vert_qpel;
+
+ ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma;
+
+ return;
+}
diff --git a/decoder/ih264d_inter_pred.c b/decoder/ih264d_inter_pred.c
new file mode 100755
index 0000000..fa818b5
--- /dev/null
+++ b/decoder/ih264d_inter_pred.c
@@ -0,0 +1,1614 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+ **************************************************************************
+ * \file ih264d_inter_pred.c
+ *
+ * \brief
+ * This file contains routines to perform MotionCompensation tasks
+ *
+ * Detailed_description
+ *
+ * \date
+ * 20/11/2002
+ *
+ * \author Arvind Raman
+ **************************************************************************
+ */
+
+#include <string.h>
+#include "ih264d_defs.h"
+#include "ih264d_mvpred.h"
+#include "ih264d_error_handler.h"
+#include "ih264d_structs.h"
+#include "ih264d_defs.h"
+#include "ih264d_inter_pred.h"
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_debug.h"
+#include "ih264d_tables.h"
+#include "ih264d_mb_utils.h"
+
+
+void ih264d_pad_on_demand(pred_info_t *ps_pred, UWORD8 lum_chrom_blk);
+
+
+
+void ih264d_copy_multiplex_data(UWORD8 *puc_Source,
+ UWORD8 *puc_To,
+ UWORD32 uc_w,
+ UWORD32 uc_h,
+ UWORD32 ui16_sourceWidth,
+ UWORD32 ui16_toWidth)
+{
+ UWORD8 uc_i, uc_j;
+
+ for(uc_i = 0; uc_i < uc_h; uc_i++)
+ {
+ memcpy(puc_To, puc_Source, uc_w);
+ puc_To += ui16_toWidth;
+ puc_Source += ui16_sourceWidth;
+ }
+}
+
+
+/*!
+ **************************************************************************
+ * \if Function name : dma_2d1d \endif
+ *
+ * \brief
+ * 2D -> 1D linear DMA into the reference buffers
+ *
+ * \return
+ * None
+ **************************************************************************
+ */
+void ih264d_copy_2d1d(UWORD8 *puc_src,
+ UWORD8 *puc_dest,
+ UWORD16 ui16_srcWidth,
+ UWORD16 ui16_widthToFill,
+ UWORD16 ui16_heightToFill)
+{
+ UWORD32 uc_w, uc_h;
+ for(uc_h = ui16_heightToFill; uc_h != 0; uc_h--)
+ {
+ memcpy(puc_dest, puc_src, ui16_widthToFill);
+ puc_dest += ui16_widthToFill;
+ puc_src += ui16_srcWidth;
+ }
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_fill_pred_info \endif
+ *
+ * \brief
+ * Fills inter prediction related info
+ *
+ * \return
+ * None
+ **************************************************************************
+ */
+void ih264d_fill_pred_info(WORD16 *pi2_mv,WORD32 part_width,WORD32 part_height, WORD32 sub_mb_num,
+ WORD32 pred_dir,pred_info_pkd_t *ps_pred_pkd,WORD8 i1_buf_id,
+ WORD8 i1_ref_idx,UWORD32 *pu4_wt_offset,UWORD8 u1_pic_type)
+{
+ WORD32 insert_bits;
+
+ ps_pred_pkd->i2_mv[0] = pi2_mv[0];
+ ps_pred_pkd->i2_mv[1] = pi2_mv[1];
+
+ insert_bits = sub_mb_num & 3; /*sub mb x*/
+ ps_pred_pkd->i1_size_pos_info = insert_bits;
+ insert_bits = sub_mb_num >> 2;/*sub mb y*/
+ ps_pred_pkd->i1_size_pos_info |= insert_bits << 2;
+ insert_bits = part_width >> 1;
+ ps_pred_pkd->i1_size_pos_info |= insert_bits << 4;
+ insert_bits = part_height >> 1;
+ ps_pred_pkd->i1_size_pos_info |= insert_bits << 6;
+
+ ps_pred_pkd->i1_ref_idx_info = i1_ref_idx;
+ ps_pred_pkd->i1_ref_idx_info |= (pred_dir << 6);
+ ps_pred_pkd->i1_buf_id = i1_buf_id;
+ ps_pred_pkd->pu4_wt_offst = pu4_wt_offset;
+ ps_pred_pkd->u1_pic_type = u1_pic_type;
+
+
+}
+
+
+
+
+
+
+
+/*****************************************************************************/
+/* \if Function name : formMbPartInfo \endif */
+/* */
+/* \brief */
+/* Form the Mb partition information structure, to be used by the MC */
+/* routine */
+/* */
+/* \return */
+/* None */
+/* \note */
+/* c_bufx is used to select PredBuffer, */
+/* if it's only Forward/Backward prediction always buffer used is */
+/* puc_MbLumaPredBuffer[0 to X1],pu1_mb_cb_pred_buffer[0 to X1] and */
+/* pu1_mb_cr_pred_buffer[0 to X1] */
+/* */
+/* if it's bidirect for forward ..PredBuffer[0 to X1] buffer is used and */
+/* ..PredBuffer[X2 to X3] for backward prediction. and */
+/* */
+/* Final predicted samples values are the average of ..PredBuffer[0 to X1]*/
+/* and ..PredBuffer[X2 to X3] */
+/* */
+/* X1 is 255 for Luma and 63 for Chroma */
+/* X2 is 256 for Luma and 64 for Chroma */
+/* X3 is 511 for Luma and 127 for Chroma */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 11 05 2005 SWRN Modified to handle pod */
+/*****************************************************************************/
+
+WORD32 ih264d_form_mb_part_info_bp(pred_info_pkd_t *ps_pred_pkd,
+ dec_struct_t * ps_dec,
+ UWORD16 u2_mb_x,
+ UWORD16 u2_mb_y,
+ WORD32 mb_index,
+ dec_mb_info_t *ps_cur_mb_info)
+{
+ /* The reference buffer pointer */
+ WORD32 i2_frm_x, i2_frm_y;
+ WORD32 i2_tmp_mv_x, i2_tmp_mv_y;
+ WORD32 i2_rec_x, i2_rec_y;
+
+ WORD32 u2_pic_ht;
+ WORD32 u2_frm_wd;
+ WORD32 u2_rec_wd;
+ UWORD8 u1_sub_x = 0,u1_sub_y=0 ;
+ UWORD8 u1_part_wd = 0,u1_part_ht = 0;
+ WORD16 i2_mv_x,i2_mv_y;
+
+
+
+ /********************************************/
+ /* i1_mc_wd width reqd for mcomp */
+ /* u1_dma_ht height reqd for mcomp */
+ /* u1_dma_wd width aligned to 4 bytes */
+ /* u1_dx fractional part of width */
+ /* u1_dx fractional part of height */
+ /********************************************/
+ WORD32 u1_ofst_in_word;
+ UWORD32 i1_mc_wd;
+
+ WORD32 u1_dma_ht;
+
+ UWORD32 u1_dma_wd;
+ UWORD32 u1_dx;
+ UWORD32 u1_dy;
+ pred_info_t * ps_pred = ps_dec->ps_pred + ps_dec->u4_pred_info_idx;
+ dec_slice_params_t * const ps_cur_slice = ps_dec->ps_cur_slice;
+ tfr_ctxt_t *ps_frame_buf;
+ struct pic_buffer_t *ps_ref_frm;
+ UWORD8 u1_scale_ref,u1_mbaff,u1_field;
+ pic_buffer_t **pps_ref_frame;
+ WORD8 i1_size_pos_info,i1_buf_id;
+
+ PROFILE_DISABLE_MB_PART_INFO()
+
+ UNUSED(ps_cur_mb_info);
+ i1_size_pos_info = ps_pred_pkd->i1_size_pos_info;
+ GET_XPOS_PRED(u1_sub_x,i1_size_pos_info);
+ GET_YPOS_PRED(u1_sub_y,i1_size_pos_info);
+ GET_WIDTH_PRED(u1_part_wd,i1_size_pos_info);
+ GET_HEIGHT_PRED(u1_part_ht,i1_size_pos_info);
+ i2_mv_x = ps_pred_pkd->i2_mv[0];
+ i2_mv_y = ps_pred_pkd->i2_mv[1];
+ i1_buf_id = ps_pred_pkd->i1_buf_id;
+
+
+ ps_ref_frm = ps_dec->apv_buf_id_pic_buf_map[i1_buf_id];
+
+
+ {
+ ps_frame_buf = &ps_dec->s_tran_addrecon;
+ }
+
+
+ /* Transfer Setup Y */
+ {
+ UWORD8 *pu1_pred, *pu1_rec;
+
+ /* calculating rounded motion vectors and fractional components */
+ i2_tmp_mv_x = i2_mv_x;
+ i2_tmp_mv_y = i2_mv_y;
+ u1_dx = i2_tmp_mv_x & 0x3;
+ u1_dy = i2_tmp_mv_y & 0x3;
+ i2_tmp_mv_x >>= 2;
+ i2_tmp_mv_y >>= 2;
+ i1_mc_wd = u1_part_wd << 2;
+ u1_dma_ht = u1_part_ht << 2;
+ if(u1_dx)
+ {
+ i2_tmp_mv_x -= 2;
+ i1_mc_wd += 5;
+ }
+ if(u1_dy)
+ {
+ i2_tmp_mv_y -= 2;
+ u1_dma_ht += 5;
+ }
+
+ /********************************************************************/
+ /* Calulating the horizontal and the vertical u4_ofst from top left */
+ /* edge of the reference frame, and subsequent clipping */
+ /********************************************************************/
+ u2_pic_ht = ps_dec->u2_pic_ht;
+ u2_frm_wd = ps_dec->u2_frm_wd_y;
+ i2_rec_x = u1_sub_x << 2;
+ i2_rec_y = u1_sub_y << 2;
+
+ i2_frm_x = (u2_mb_x << 4) + i2_rec_x + i2_tmp_mv_x;
+ i2_frm_y = (u2_mb_y << 4) + i2_rec_y + i2_tmp_mv_y;
+
+ i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_X_FRM, (ps_dec->u2_pic_wd - 1),
+ i2_frm_x);
+ i2_frm_y = CLIP3(((1 - u1_dma_ht)), (u2_pic_ht - (1)), i2_frm_y);
+
+ pu1_pred = ps_ref_frm->pu1_buf1 + i2_frm_y * u2_frm_wd + i2_frm_x;
+
+
+ u1_ofst_in_word = 0;
+ u1_dma_wd = (i1_mc_wd + u1_ofst_in_word + 3) & 0xFC;
+
+ /********************************************************************/
+ /* Calulating the horizontal and the vertical u4_ofst from top left */
+ /* edge of the recon buffer */
+ /********************************************************************/
+ /* CHANGED CODE */
+ u2_rec_wd = MB_SIZE;
+ {
+ u2_rec_wd = ps_dec->u2_frm_wd_y;
+ i2_rec_x += (mb_index << 4);
+ pu1_rec = ps_frame_buf->pu1_dest_y + i2_rec_y * u2_rec_wd
+ + i2_rec_x;
+ }
+
+ /* CHANGED CODE */
+
+ /* filling the pred and dma structures for Y */
+ u2_frm_wd = ps_dec->u2_frm_wd_y;
+
+ ps_pred->u2_u1_ref_buf_wd = u1_dma_wd;
+ ps_pred->i1_dma_ht = u1_dma_ht;
+ ps_pred->i1_mc_wd = i1_mc_wd;
+ ps_pred->u2_frm_wd = u2_frm_wd;
+ ps_pred->pu1_rec_y_u = pu1_rec;
+ ps_pred->u2_dst_stride = u2_rec_wd;
+
+ ps_pred->i1_mb_partwidth = u1_part_wd << 2;
+ ps_pred->i1_mb_partheight = u1_part_ht << 2;
+ ps_pred->u1_mc_addr_ofst = u1_ofst_in_word;
+ ps_pred->u1_dydx = (u1_dy << 2) + u1_dx;
+
+ ps_pred->pu1_y_ref = pu1_pred;
+
+ }
+
+ /* Increment ps_pred index */
+ ps_pred++;
+
+ /* Transfer Setup U & V */
+ {
+ WORD32 i4_ref_offset, i4_rec_offset;
+ UWORD8 *pu1_pred_u, *pu1_pred_v;
+
+
+ /* calculating rounded motion vectors and fractional components */
+ i2_tmp_mv_x = i2_mv_x;
+ i2_tmp_mv_y = i2_mv_y;
+
+ /************************************************************************/
+ /* Table 8-9: Derivation of the vertical component of the chroma vector */
+ /* in field coding mode */
+ /************************************************************************/
+
+ /* Eighth sample of the chroma MV */
+ u1_dx = i2_tmp_mv_x & 0x7;
+ u1_dy = i2_tmp_mv_y & 0x7;
+
+ /********************************************************************/
+ /* Calculating the full pel MV for chroma which is 1/2 of the Luma */
+ /* MV in full pel units */
+ /********************************************************************/
+ i2_mv_x = i2_tmp_mv_x;
+ i2_mv_y = i2_tmp_mv_y;
+ i2_tmp_mv_x = SIGN_POW2_DIV(i2_tmp_mv_x, 3);
+ i2_tmp_mv_y = SIGN_POW2_DIV(i2_tmp_mv_y, 3);
+ i1_mc_wd = u1_part_wd << 1;
+ u1_dma_ht = u1_part_ht << 1;
+ if(u1_dx)
+ {
+ i2_tmp_mv_x -= (i2_mv_x < 0);
+ i1_mc_wd++;
+ }
+ if(u1_dy != 0)
+ {
+ i2_tmp_mv_y -= (i2_mv_y < 0);
+ u1_dma_ht++;
+ }
+
+ /********************************************************************/
+ /* Calulating the horizontal and the vertical u4_ofst from top left */
+ /* edge of the reference frame, and subsequent clipping */
+ /********************************************************************/
+ u2_pic_ht >>= 1;
+ u2_frm_wd = ps_dec->u2_frm_wd_uv;
+ i2_rec_x = u1_sub_x << 1;
+ i2_rec_y = u1_sub_y << 1;
+
+ i2_frm_x = (u2_mb_x << 3) + i2_rec_x + i2_tmp_mv_x;
+ i2_frm_y = (u2_mb_y << 3) + i2_rec_y + i2_tmp_mv_y;
+
+ i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_UV_FRM,
+ ((ps_dec->u2_pic_wd >> 1) - 1), i2_frm_x);
+ i2_frm_y = CLIP3(((1 - u1_dma_ht)), (u2_pic_ht - (1)), i2_frm_y);
+
+ i4_ref_offset = i2_frm_y * u2_frm_wd + i2_frm_x * YUV420SP_FACTOR;
+ u1_ofst_in_word = 0;
+ u1_dma_wd = (i1_mc_wd + u1_ofst_in_word + 3) & 0xFC;
+ i4_ref_offset -= u1_ofst_in_word;
+
+ /********************************************************************/
+ /* Calulating the horizontal and the vertical u4_ofst from top left */
+ /* edge of the recon buffer */
+ /********************************************************************/
+ /* CHANGED CODE */
+ u2_rec_wd = BLK8x8SIZE * YUV420SP_FACTOR;
+ i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR;
+
+ {
+ u2_rec_wd = ps_dec->u2_frm_wd_uv;
+ i2_rec_x += (mb_index << 3);
+ i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR;
+ ps_pred->pu1_rec_y_u = ps_frame_buf->pu1_dest_u + i4_rec_offset;
+ ps_pred->u1_pi1_wt_ofst_rec_v = ps_frame_buf->pu1_dest_v
+ + i4_rec_offset;
+ }
+
+ /* CHANGED CODE */
+
+ /* filling the common pred structures for U */
+ u2_frm_wd = ps_dec->u2_frm_wd_uv;
+
+ ps_pred->u2_u1_ref_buf_wd = u1_dma_wd;
+ ps_pred->i1_dma_ht = u1_dma_ht;
+ ps_pred->i1_mc_wd = i1_mc_wd;
+
+ ps_pred->u2_frm_wd = u2_frm_wd;
+ ps_pred->u2_dst_stride = u2_rec_wd;
+
+ ps_pred->i1_mb_partwidth = u1_part_wd << 1;
+ ps_pred->i1_mb_partheight = u1_part_ht << 1;
+ ps_pred->u1_mc_addr_ofst = u1_ofst_in_word;
+ ps_pred->u1_dydx = (u1_dy << 3) + u1_dx;
+
+ pu1_pred_u = ps_ref_frm->pu1_buf2 + i4_ref_offset;
+ pu1_pred_v = ps_ref_frm->pu1_buf3 + i4_ref_offset;
+
+ /* Copy U & V partitions */
+ ps_pred->pu1_u_ref = pu1_pred_u;
+
+ /* Increment the reference buffer Index */
+ ps_pred->pu1_v_ref = pu1_pred_v;
+ }
+
+ /* Increment ps_pred index */
+ ps_dec->u4_pred_info_idx += 2;
+
+ return OK;
+
+}
+
+
+/*****************************************************************************/
+/* \if Function name : formMbPartInfo \endif */
+/* */
+/* \brief */
+/* Form the Mb partition information structure, to be used by the MC */
+/* routine */
+/* */
+/* \return */
+/* None */
+/* \note */
+/* c_bufx is used to select PredBuffer, */
+/* if it's only Forward/Backward prediction always buffer used is */
+/* puc_MbLumaPredBuffer[0 to X1],pu1_mb_cb_pred_buffer[0 to X1] and */
+/* pu1_mb_cr_pred_buffer[0 to X1] */
+/* */
+/* if it's bidirect for forward ..PredBuffer[0 to X1] buffer is used and */
+/* ..PredBuffer[X2 to X3] for backward prediction. and */
+/* */
+/* Final predicted samples values are the average of ..PredBuffer[0 to X1]*/
+/* and ..PredBuffer[X2 to X3] */
+/* */
+/* X1 is 255 for Luma and 63 for Chroma */
+/* X2 is 256 for Luma and 64 for Chroma */
+/* X3 is 511 for Luma and 127 for Chroma */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 11 05 2005 SWRN Modified to handle pod */
+/*****************************************************************************/
+WORD32 ih264d_form_mb_part_info_mp(pred_info_pkd_t *ps_pred_pkd,
+ dec_struct_t * ps_dec,
+ UWORD16 u2_mb_x,
+ UWORD16 u2_mb_y,
+ WORD32 mb_index,
+ dec_mb_info_t *ps_cur_mb_info)
+{
+ /* The reference buffer pointer */
+ UWORD8 *pu1_ref_buf;
+ WORD16 i2_frm_x, i2_frm_y, i2_tmp_mv_x, i2_tmp_mv_y, i2_pod_ht;
+ WORD16 i2_rec_x, i2_rec_y;
+ UWORD16 u2_pic_ht, u2_frm_wd, u2_rec_wd;
+ UWORD8 u1_wght_pred_type, u1_wted_bipred_idc;
+ UWORD16 u2_tot_ref_scratch_size;
+ UWORD8 u1_sub_x = 0;
+ UWORD8 u1_sub_y = 0;
+ UWORD8 u1_is_bi_dir = 0;
+
+ /********************************************/
+ /* i1_mc_wd width reqd for mcomp */
+ /* u1_dma_ht height reqd for mcomp */
+ /* u1_dma_wd width aligned to 4 bytes */
+ /* u1_dx fractional part of width */
+ /* u1_dx fractional part of height */
+ /********************************************/
+ UWORD8 u1_ofst_in_word, i1_mc_wd, u1_dma_ht, u1_dma_wd, u1_dx, u1_dy;
+ pred_info_t * ps_pred ;
+ dec_slice_params_t * const ps_cur_slice = ps_dec->ps_cur_slice;
+ const UWORD8 u1_slice_type = ps_cur_slice->u1_slice_type;
+ UWORD8 u1_pod_bot, u1_pod_top;
+
+ /* load the pictype for pod u4_flag & chroma motion vector derivation */
+ UWORD8 u1_ref_pic_type ;
+
+ /* set default value to flags specifying field nature of picture & mb */
+ UWORD32 u1_mb_fld = 0, u1_mb_or_pic_fld;
+ UWORD32 u1_mb_bot = 0, u1_pic_bot = 0, u1_mb_or_pic_bot;
+ tfr_ctxt_t *ps_frame_buf;
+ /* calculate flags specifying field nature of picture & mb */
+ const UWORD32 u1_pic_fld = ps_cur_slice->u1_field_pic_flag;
+ WORD8 i1_pred;
+ WORD8 i1_size_pos_info,i1_buf_id,i1_ref_idx;
+ UWORD8 u1_part_wd,u1_part_ht;
+ WORD16 i2_mv_x,i2_mv_y;
+ struct pic_buffer_t *ps_ref_frm;
+ UWORD32 *pu4_wt_offset;
+ UWORD8 *pu1_buf1,*pu1_buf2,*pu1_buf3;
+
+
+ PROFILE_DISABLE_MB_PART_INFO()
+
+ ps_pred = ps_dec->ps_pred + ps_dec->u4_pred_info_idx;
+
+
+ i1_size_pos_info = ps_pred_pkd->i1_size_pos_info;
+ GET_XPOS_PRED(u1_sub_x,i1_size_pos_info);
+ GET_YPOS_PRED(u1_sub_y,i1_size_pos_info);
+ GET_WIDTH_PRED(u1_part_wd,i1_size_pos_info);
+ GET_HEIGHT_PRED(u1_part_ht,i1_size_pos_info);
+ i2_mv_x = ps_pred_pkd->i2_mv[0];
+ i2_mv_y = ps_pred_pkd->i2_mv[1];
+ i1_ref_idx = ps_pred_pkd->i1_ref_idx_info & 0x3f;
+ i1_buf_id = ps_pred_pkd->i1_buf_id;
+ ps_ref_frm = ps_dec->apv_buf_id_pic_buf_map[i1_buf_id];
+
+ i1_pred = (ps_pred_pkd->i1_ref_idx_info & 0xC0) >> 6;
+ u1_is_bi_dir = (i1_pred == BI_PRED);
+
+
+ u1_ref_pic_type = ps_pred_pkd->u1_pic_type & PIC_MASK;
+
+ pu1_buf1 = ps_ref_frm->pu1_buf1;
+ pu1_buf2 = ps_ref_frm->pu1_buf2;
+ pu1_buf3 = ps_ref_frm->pu1_buf3;
+
+ if(u1_ref_pic_type == BOT_FLD)
+ {
+ pu1_buf1 += ps_ref_frm->u2_frm_wd_y;
+ pu1_buf2 += ps_ref_frm->u2_frm_wd_uv;
+ pu1_buf3 += ps_ref_frm->u2_frm_wd_uv;
+
+ }
+
+
+
+ if(ps_dec->ps_cur_pps->u1_wted_pred_flag)
+ {
+ pu4_wt_offset = (UWORD32*)&ps_dec->pu4_wt_ofsts[2
+ * X3(i1_ref_idx)];
+ }
+
+
+ pu4_wt_offset = ps_pred_pkd->pu4_wt_offst;
+
+
+ /* Pointer to the frame buffer */
+ {
+ ps_frame_buf = &ps_dec->s_tran_addrecon;
+ /* CHANGED CODE */
+ }
+
+ if(!u1_pic_fld)
+ {
+ u1_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag;
+ u1_mb_bot = 1 - ps_cur_mb_info->u1_topmb;
+ }
+ else
+ u1_pic_bot = ps_cur_slice->u1_bottom_field_flag;
+
+ /****************************************************************/
+ /* calculating the flags the tell whether to use frame-padding */
+ /* or use software pad-on-demand */
+ /****************************************************************/
+ u1_mb_or_pic_bot = u1_mb_bot | u1_pic_bot;
+ u1_mb_or_pic_fld = u1_mb_fld | u1_pic_fld;
+ u1_pod_bot = u1_mb_or_pic_fld && (u1_ref_pic_type == TOP_FLD);
+ u1_pod_top = u1_mb_or_pic_fld && (u1_ref_pic_type == BOT_FLD);
+
+ /* Weighted Pred additions */
+ u1_wted_bipred_idc = ps_dec->ps_cur_pps->u1_wted_bipred_idc;
+
+ if((u1_slice_type == P_SLICE) || (u1_slice_type == SP_SLICE))
+ {
+ /* P Slice only */
+ u1_wght_pred_type = ps_dec->ps_cur_pps->u1_wted_pred_flag;
+
+ }
+ else
+ {
+ /* B Slice only */
+ u1_wght_pred_type = 1 + u1_is_bi_dir;
+ if(u1_wted_bipred_idc == 0)
+ u1_wght_pred_type = 0;
+ if((u1_wted_bipred_idc == 2) && (!u1_is_bi_dir))
+ u1_wght_pred_type = 0;
+ }
+ /* load the scratch reference buffer index */
+ pu1_ref_buf = ps_dec->pu1_ref_buff + ps_dec->u4_dma_buf_idx;
+ u2_tot_ref_scratch_size = 0;
+
+
+ /* Transfer Setup Y */
+ {
+ UWORD8 *pu1_pred, *pu1_rec;
+ /* calculating rounded motion vectors and fractional components */
+ i2_tmp_mv_x = i2_mv_x;
+ i2_tmp_mv_y = i2_mv_y;
+
+ u1_dx = i2_tmp_mv_x & 0x3;
+ u1_dy = i2_tmp_mv_y & 0x3;
+ i2_tmp_mv_x >>= 2;
+ i2_tmp_mv_y >>= 2;
+ i1_mc_wd = u1_part_wd << 2;
+ u1_dma_ht = u1_part_ht << 2;
+ if(u1_dx)
+ {
+ i2_tmp_mv_x -= 2;
+ i1_mc_wd += 5;
+ }
+ if(u1_dy)
+ {
+ i2_tmp_mv_y -= 2;
+ u1_dma_ht += 5;
+ }
+
+ /********************************************************************/
+ /* Calulating the horizontal and the vertical u4_ofst from top left */
+ /* edge of the reference frame, and subsequent clipping */
+ /********************************************************************/
+ u2_pic_ht = ps_dec->u2_pic_ht >> u1_pic_fld;
+ u2_frm_wd = ps_dec->u2_frm_wd_y << u1_pic_fld;
+ i2_frm_x = (u2_mb_x << 4) + (u1_sub_x << 2) + i2_tmp_mv_x;
+ i2_frm_y = ((u2_mb_y + (u1_mb_bot && !u1_mb_fld)) << 4)
+ + (((u1_sub_y << 2) + i2_tmp_mv_y) << u1_mb_fld);
+
+ i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_X_FRM, (ps_dec->u2_pic_wd - 1),
+ i2_frm_x);
+ i2_frm_y = CLIP3(((1 - u1_dma_ht) << u1_mb_fld),
+ (u2_pic_ht - (1 << u1_mb_fld)), i2_frm_y);
+
+ pu1_pred = pu1_buf1 + i2_frm_y * u2_frm_wd + i2_frm_x;
+ u1_ofst_in_word = 0;
+
+ u1_dma_wd = (i1_mc_wd + u1_ofst_in_word + 3) & 0xFC;
+
+ /********************************************************************/
+ /* Calulating the horizontal and the vertical u4_ofst from top left */
+ /* edge of the recon buffer */
+ /********************************************************************/
+ /* CHANGED CODE */
+ u2_rec_wd = MB_SIZE;
+ i2_rec_x = u1_sub_x << 2;
+ i2_rec_y = u1_sub_y << 2;
+ {
+ u2_rec_wd = ps_dec->u2_frm_wd_y << u1_mb_or_pic_fld;
+ i2_rec_x += (mb_index << 4);
+ pu1_rec = ps_frame_buf->pu1_dest_y + i2_rec_y * u2_rec_wd
+ + i2_rec_x;
+ if(u1_mb_bot)
+ pu1_rec += ps_dec->u2_frm_wd_y << ((u1_mb_fld) ? 0 : 4);
+ }
+
+ /* CHANGED CODE */
+
+ /* filling the pred and dma structures for Y */
+ u2_frm_wd = ps_dec->u2_frm_wd_y << u1_mb_or_pic_fld;
+
+ ps_pred->pu1_dma_dest_addr = pu1_ref_buf;
+ ps_pred->u2_u1_ref_buf_wd = u1_dma_wd;
+ ps_pred->u2_frm_wd = u2_frm_wd;
+ ps_pred->i1_dma_ht = u1_dma_ht;
+ ps_pred->i1_mc_wd = i1_mc_wd;
+ ps_pred->pu1_rec_y_u = pu1_rec;
+ ps_pred->u2_dst_stride = u2_rec_wd;
+
+ ps_pred->i1_mb_partwidth = u1_part_wd << 2;
+ ps_pred->i1_mb_partheight = u1_part_ht << 2;
+ ps_pred->u1_mc_addr_ofst = u1_ofst_in_word;
+ ps_pred->u1_dydx = (u1_dy << 2) + u1_dx;
+ ps_pred->u1_is_bi_direct = u1_is_bi_dir;
+ ps_pred->u1_pi1_wt_ofst_rec_v = (UWORD8 *)pu4_wt_offset;
+ ps_pred->u1_wght_pred_type = u1_wght_pred_type;
+ ps_pred->i1_pod_ht = 0;
+
+ /* Increment the Reference buffer Indices */
+ pu1_ref_buf += u1_dma_wd * u1_dma_ht;
+ u2_tot_ref_scratch_size += u1_dma_wd * u1_dma_ht;
+
+ /* unrestricted field motion comp for top region outside frame */
+ i2_pod_ht = (-i2_frm_y) >> u1_mb_fld;
+ if((i2_pod_ht > 0) && u1_pod_top)
+ {
+ ps_pred->i1_pod_ht = (WORD8)(-i2_pod_ht);
+ u1_dma_ht -= i2_pod_ht;
+ pu1_pred += i2_pod_ht * u2_frm_wd;
+ }
+ /* unrestricted field motion comp for bottom region outside frame */
+ else if(u1_pod_bot)
+ {
+ i2_pod_ht = u1_dma_ht + ((i2_frm_y - u2_pic_ht) >> u1_mb_fld);
+ if(i2_pod_ht > 0)
+ {
+ u1_dma_ht -= i2_pod_ht;
+ ps_pred->i1_pod_ht = (WORD8)i2_pod_ht;
+ }
+ }
+
+ /* Copy Y partition */
+
+ /*
+ * ps_pred->i1_pod_ht is non zero when MBAFF is present. In case of MBAFF the reference data
+ * is copied in the Scrath buffer so that the padding_on_demand doesnot corrupt the frame data
+ */
+ if(ps_pred->i1_pod_ht)
+ {
+ ps_pred->pu1_pred = pu1_pred;
+ ps_pred->u1_dma_ht_y = u1_dma_ht;
+ ps_pred->u1_dma_wd_y = u1_dma_wd;
+ }
+ ps_pred->pu1_y_ref = pu1_pred;
+ }
+
+
+
+ /* Increment ps_pred index */
+ ps_pred++;
+
+ /* Transfer Setup U & V */
+ {
+ WORD32 i4_ref_offset, i4_rec_offset;
+ UWORD8 *pu1_pred_u, *pu1_pred_v, u1_tmp_dma_ht;
+ /* CHANGED CODE */
+ UWORD8 u1_chroma_cbp = (UWORD8)(ps_cur_mb_info->u1_cbp >> 4);
+ /* CHANGED CODE */
+
+ /* calculating rounded motion vectors and fractional components */
+ i2_tmp_mv_x = i2_mv_x;
+ i2_tmp_mv_y = i2_mv_y;
+
+ /************************************************************************/
+ /* Table 8-9: Derivation of the vertical component of the chroma vector */
+ /* in field coding mode */
+ /************************************************************************/
+ if(u1_pod_bot && u1_mb_or_pic_bot)
+ i2_tmp_mv_y += 2;
+ if(u1_pod_top && !u1_mb_or_pic_bot)
+ i2_tmp_mv_y -= 2;
+
+ /* Eighth sample of the chroma MV */
+ u1_dx = i2_tmp_mv_x & 0x7;
+ u1_dy = i2_tmp_mv_y & 0x7;
+
+ /********************************************************************/
+ /* Calculating the full pel MV for chroma which is 1/2 of the Luma */
+ /* MV in full pel units */
+ /********************************************************************/
+ i2_mv_x = i2_tmp_mv_x;
+ i2_mv_y = i2_tmp_mv_y;
+ i2_tmp_mv_x = SIGN_POW2_DIV(i2_tmp_mv_x, 3);
+ i2_tmp_mv_y = SIGN_POW2_DIV(i2_tmp_mv_y, 3);
+ i1_mc_wd = u1_part_wd << 1;
+ u1_dma_ht = u1_part_ht << 1;
+ if(u1_dx)
+ {
+ if(i2_mv_x < 0)
+ i2_tmp_mv_x -= 1;
+ i1_mc_wd++;
+ }
+ if(u1_dy != 0)
+ {
+ if(i2_mv_y < 0)
+ i2_tmp_mv_y -= 1;
+ u1_dma_ht++;
+ }
+
+ /********************************************************************/
+ /* Calulating the horizontal and the vertical u4_ofst from top left */
+ /* edge of the reference frame, and subsequent clipping */
+ /********************************************************************/
+ u2_pic_ht >>= 1;
+ u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_pic_fld;
+ i2_frm_x = (u2_mb_x << 3) + (u1_sub_x << 1) + i2_tmp_mv_x;
+ i2_frm_y = ((u2_mb_y + (u1_mb_bot && !u1_mb_fld)) << 3)
+ + (((u1_sub_y << 1) + i2_tmp_mv_y) << u1_mb_fld);
+
+ i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_UV_FRM,
+ ((ps_dec->u2_pic_wd >> 1) - 1), i2_frm_x);
+ i2_frm_y = CLIP3(((1 - u1_dma_ht) << u1_mb_fld),
+ (u2_pic_ht - (1 << u1_mb_fld)), i2_frm_y);
+
+ i4_ref_offset = i2_frm_y * u2_frm_wd + i2_frm_x * YUV420SP_FACTOR;
+ u1_ofst_in_word = 0;
+ u1_dma_wd = (i1_mc_wd + u1_ofst_in_word + 3) & 0xFC;
+ i4_ref_offset -= u1_ofst_in_word;
+
+ /********************************************************************/
+ /* Calulating the horizontal and the vertical u4_ofst from top left */
+ /* edge of the recon buffer */
+ /********************************************************************/
+ /* CHANGED CODE */
+ u2_rec_wd = BLK8x8SIZE * YUV420SP_FACTOR;
+ i2_rec_x = u1_sub_x << 1;
+ i2_rec_y = u1_sub_y << 1;
+ i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR;
+ {
+ u2_rec_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld;
+
+ i2_rec_x += (mb_index << 3);
+ i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR;
+ if(u1_mb_bot)
+ i4_rec_offset += ps_dec->u2_frm_wd_uv << ((u1_mb_fld) ? 0 : 3);
+ ps_pred->pu1_rec_y_u = ps_frame_buf->pu1_dest_u + i4_rec_offset;
+ ps_pred->u1_pi1_wt_ofst_rec_v = ps_frame_buf->pu1_dest_v
+ + i4_rec_offset;
+
+ }
+
+ /* CHANGED CODE */
+
+ /* filling the common pred structures for U */
+ u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld;
+ u1_tmp_dma_ht = u1_dma_ht;
+ ps_pred->u2_u1_ref_buf_wd = u1_dma_wd;
+ ps_pred->u2_frm_wd = u2_frm_wd;
+ ps_pred->i1_dma_ht = u1_dma_ht;
+ ps_pred->i1_mc_wd = i1_mc_wd;
+ ps_pred->u2_dst_stride = u2_rec_wd;
+
+ ps_pred->i1_mb_partwidth = u1_part_wd << 1;
+ ps_pred->i1_mb_partheight = u1_part_ht << 1;
+ ps_pred->u1_mc_addr_ofst = u1_ofst_in_word;
+ ps_pred->u1_dydx = (u1_dy << 3) + u1_dx;
+ ps_pred->u1_is_bi_direct = u1_is_bi_dir;
+ ps_pred->u1_wght_pred_type = u1_wght_pred_type;
+ ps_pred->i1_pod_ht = 0;
+
+ ps_pred->pu1_dma_dest_addr = pu1_ref_buf;
+
+ /* unrestricted field motion comp for top region outside frame */
+ i2_pod_ht = (-i2_frm_y) >> u1_mb_fld;
+ if((i2_pod_ht > 0) && u1_pod_top)
+ {
+ i4_ref_offset += i2_pod_ht * u2_frm_wd;
+ u1_dma_ht -= i2_pod_ht;
+ ps_pred->i1_pod_ht = (WORD8)(-i2_pod_ht);
+ }
+ /* unrestricted field motion comp for bottom region outside frame */
+ else if(u1_pod_bot)
+ {
+ i2_pod_ht = u1_dma_ht + ((i2_frm_y - u2_pic_ht) >> u1_mb_fld);
+ if(i2_pod_ht > 0)
+ {
+ u1_dma_ht -= i2_pod_ht;
+ ps_pred->i1_pod_ht = (WORD8)i2_pod_ht;
+ }
+ }
+
+ pu1_pred_u = pu1_buf2 + i4_ref_offset;
+ pu1_pred_v = pu1_buf3 + i4_ref_offset;
+
+ /* Copy U & V partitions */
+ if(ps_pred->i1_pod_ht)
+ {
+ ps_pred->pu1_pred_u = pu1_pred_u;
+ ps_pred->u1_dma_ht_uv = u1_dma_ht;
+ ps_pred->u1_dma_wd_uv = u1_dma_wd;
+
+ }
+ ps_pred->pu1_u_ref = pu1_pred_u;
+
+ /* Increment the reference buffer Index */
+ u2_tot_ref_scratch_size += (u1_dma_wd * u1_tmp_dma_ht) << 1;
+
+ if(ps_pred->i1_pod_ht)
+ {
+ ps_pred->pu1_pred_v = pu1_pred_v;
+ ps_pred->u1_dma_ht_uv = u1_dma_ht;
+ ps_pred->u1_dma_wd_uv = u1_dma_wd;
+ }
+
+ ps_pred->pu1_v_ref = pu1_pred_v;
+ }
+
+ /* Increment ps_pred index */
+ ps_dec->u4_pred_info_idx += 2;
+
+
+ /* Increment the reference buffer Index */
+ ps_dec->u4_dma_buf_idx += u2_tot_ref_scratch_size;
+
+ if(ps_dec->u4_dma_buf_idx > ps_dec->u4_ref_buf_size)
+ return ERROR_NUM_MV;
+
+ return OK;
+
+
+
+}
+
+
+/*!
+ **************************************************************************
+ * \if Function name : MotionCompensate \endif
+ *
+ * \brief
+ * The routine forms predictor blocks for the entire MB and stores it in
+ * predictor buffers.This function works only for BASELINE profile
+ *
+ * \param ps_dec: Pointer to the structure decStruct. This is used to get
+ * pointers to the current and the reference frame and to the MbParams
+ * structure.
+ *
+ * \return
+ * None
+ *
+ * \note
+ * The routine forms predictors for all the luma and the chroma MB
+ * partitions.
+ **************************************************************************
+ */
+
+void ih264d_motion_compensate_bp(dec_struct_t * ps_dec, dec_mb_info_t *ps_cur_mb_info)
+{
+ pred_info_t *ps_pred ;
+ UWORD8 *puc_ref, *pu1_dest_y;
+ UWORD8 *pu1_dest_u;
+ UWORD32 u2_num_pels, u2_ref_wd_y, u2_ref_wd_uv, u2_dst_wd;
+
+ UWORD32 u4_wd_y, u4_ht_y, u4_wd_uv;
+ UWORD32 u4_ht_uv;
+ UWORD8 *puc_pred0 = (UWORD8 *)(ps_dec->pi2_pred1);
+
+
+ PROFILE_DISABLE_INTER_PRED()
+ UNUSED(ps_cur_mb_info);
+ ps_pred = ps_dec->ps_pred ;
+
+ for(u2_num_pels = 0; u2_num_pels < 256;)
+ {
+ UWORD32 uc_dx, uc_dy;
+ /* Pointer to the destination buffer. If the CBPs of all 8x8 blocks in
+ the MB partition are zero then it would be better to copy the
+ predictor valus directly to the current frame buffer */
+ /*
+ * ps_pred->i1_pod_ht is non zero when MBAFF is present. In case of MBAFF the reference data
+ * is copied in the Scrath buffer so that the padding_on_demand doesnot corrupt the frame data
+ */
+
+ u2_ref_wd_y = ps_pred->u2_frm_wd;
+ puc_ref = ps_pred->pu1_y_ref;
+ if(ps_pred->u1_dydx & 0x3)
+ puc_ref += 2;
+ if(ps_pred->u1_dydx >> 2)
+ puc_ref += 2 * u2_ref_wd_y;
+
+ u4_wd_y = ps_pred->i1_mb_partwidth;
+ u4_ht_y = ps_pred->i1_mb_partheight;
+ uc_dx = ps_pred->u1_dydx;
+ uc_dy = uc_dx >> 2;
+ uc_dx &= 0x3;
+
+ pu1_dest_y = ps_pred->pu1_rec_y_u;
+ u2_dst_wd = ps_pred->u2_dst_stride;
+
+ ps_dec->apf_inter_pred_luma[ps_pred->u1_dydx](puc_ref, pu1_dest_y,
+ u2_ref_wd_y,
+ u2_dst_wd,
+ u4_ht_y,
+ u4_wd_y, puc_pred0,
+ ps_pred->u1_dydx);
+
+ ps_pred++;
+
+ /* Interpolate samples for the chroma components */
+ {
+ UWORD8 *pu1_ref_u;
+
+ u2_ref_wd_uv = ps_pred->u2_frm_wd;
+ pu1_ref_u = ps_pred->pu1_u_ref + ps_pred->u1_mc_addr_ofst;
+
+ u4_wd_uv = ps_pred->i1_mb_partwidth;
+ u4_ht_uv = ps_pred->i1_mb_partheight;
+ uc_dx = ps_pred->u1_dydx; /* 8*dy + dx */
+ uc_dy = uc_dx >> 3;
+ uc_dx &= 0x7;
+
+ pu1_dest_u = ps_pred->pu1_rec_y_u;
+ u2_dst_wd = ps_pred->u2_dst_stride;
+
+ ps_pred++;
+ ps_dec->pf_inter_pred_chroma(pu1_ref_u, pu1_dest_u, u2_ref_wd_uv,
+ u2_dst_wd, uc_dx, uc_dy,
+ u4_ht_uv, u4_wd_uv);
+
+ }
+
+ u2_num_pels += (UWORD8)u4_wd_y * (UWORD8)u4_ht_y;
+
+ }
+}
+
+
+/*
+ **************************************************************************
+ * \if Function name : MotionCompensateB \endif
+ *
+ * \brief
+ * The routine forms predictor blocks for the entire MB and stores it in
+ * predictor buffers.
+ *
+ * \param ps_dec: Pointer to the structure decStruct. This is used to get
+ * pointers to the current and the reference frame and to the MbParams
+ * structure.
+ *
+ * \return
+ * None
+ *
+ * \note
+ * The routine forms predictors for all the luma and the chroma MB
+ * partitions.
+ **************************************************************************
+ */
+
+void ih264d_motion_compensate_mp(dec_struct_t * ps_dec, dec_mb_info_t *ps_cur_mb_info)
+{
+ pred_info_t *ps_pred ;
+ pred_info_t *ps_pred_y_forw, *ps_pred_y_back, *ps_pred_cr_forw;
+ UWORD8 *puc_ref, *pu1_dest_y, *puc_pred0, *puc_pred1;
+ UWORD8 *pu1_dest_u, *pu1_dest_v;
+ WORD16 *pi16_intm;
+ UWORD32 u2_num_pels, u2_ref_wd_y, u2_ref_wd_uv, u2_dst_wd;
+ UWORD32 u2_dest_wd_y, u2_dest_wd_uv;
+ UWORD32 u2_row_buf_wd_y = ps_dec->u2_mb_group_cols_y1;
+ UWORD32 u2_row_buf_wd_uv = ps_dec->u2_mb_group_cols_cr1;
+ UWORD32 u2_log2Y_crwd = ps_dec->ps_cur_slice->u2_log2Y_crwd;
+ UWORD32 u4_wd_y, u4_ht_y, u1_dir, u4_wd_uv;
+ UWORD32 u4_ht_uv;
+ UWORD8 *pu1_temp_mc_buffer = ps_dec->pu1_temp_mc_buffer;
+ WORD32 i2_pod_ht;
+ UWORD32 u2_pic_ht, u2_frm_wd, u2_rec_wd;
+ UWORD32 u1_pod_bot, u1_pod_top;
+ UWORD8 *pu1_pred, *pu1_dma_dst;
+ UWORD32 u1_dma_wd, u1_dma_ht;
+
+ dec_slice_params_t * const ps_cur_slice = ps_dec->ps_cur_slice;
+
+ /* set default value to flags specifying field nature of picture & mb */
+ UWORD32 u1_mb_fld = 0, u1_mb_or_pic_fld;
+ UWORD32 u1_mb_or_pic_bot;
+ /* calculate flags specifying field nature of picture & mb */
+ const UWORD8 u1_pic_fld = ps_cur_slice->u1_field_pic_flag;
+
+ PROFILE_DISABLE_INTER_PRED()
+ ps_pred = ps_dec->ps_pred ;
+ /* Initialize both ps_pred_y_forw an y_back to avoid static analysis warnigns */
+ ps_pred_y_forw = ps_pred;
+ ps_pred_y_back = ps_pred;
+
+ if(ps_dec->u1_separate_parse)
+ u2_log2Y_crwd = ps_dec->ps_decode_cur_slice->u2_log2Y_crwd;
+
+ if(!u1_pic_fld)
+ {
+ u1_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag;
+ }
+
+ u1_mb_or_pic_fld = u1_mb_fld | u1_pic_fld;
+
+ pi16_intm = ps_dec->pi2_pred1;
+ puc_pred0 = (UWORD8 *)pi16_intm;
+ puc_pred1 = puc_pred0 + MB_SIZE * MB_SIZE;
+
+ for(u2_num_pels = 0; u2_num_pels < 256;)
+ {
+ UWORD8 uc_dx, uc_dy;
+ const UWORD8 u1_is_bi_direct = ps_pred->u1_is_bi_direct;
+ for(u1_dir = 0; u1_dir <= u1_is_bi_direct; u1_dir++)
+ {
+ /* Pointer to the destination buffer. If the CBPs of all 8x8 blocks in
+ the MB partition are zero then it would be better to copy the
+ predictor valus directly to the current frame buffer */
+ /*
+ * ps_pred->i1_pod_ht is non zero when MBAFF is present. In case of MBAFF the reference data
+ * is copied in the Scrath buffer so that the padding_on_demand doesnot corrupt the frame data
+ */
+
+ if(ps_pred->i1_pod_ht)
+ {
+ u2_ref_wd_y = ps_pred->u2_u1_ref_buf_wd;
+ puc_ref = ps_pred->pu1_dma_dest_addr;
+ }
+ else
+ {
+ u2_ref_wd_y = ps_pred->u2_frm_wd;
+ puc_ref = ps_pred->pu1_y_ref;
+
+ }
+
+ if(ps_pred->u1_dydx & 0x3)
+ puc_ref += 2;
+ if(ps_pred->u1_dydx >> 2)
+ puc_ref += 2 * u2_ref_wd_y;
+ u4_wd_y = ps_pred->i1_mb_partwidth;
+ u4_ht_y = ps_pred->i1_mb_partheight;
+
+ if(ps_pred->i1_pod_ht)
+ {
+ pu1_pred = ps_pred->pu1_pred;
+ pu1_dma_dst = ps_pred->pu1_dma_dest_addr;
+ u1_dma_wd = ps_pred->u1_dma_wd_y;
+ u1_dma_ht = ps_pred->u1_dma_ht_y;
+ u2_frm_wd = ps_dec->u2_frm_wd_y << u1_mb_or_pic_fld;
+ }
+
+ uc_dx = ps_pred->u1_dydx;
+ uc_dy = uc_dx >> 2;
+ uc_dx &= 0x3;
+ if(u1_dir == 0)
+ {
+ pu1_dest_y = ps_pred->pu1_rec_y_u;
+ u2_row_buf_wd_y = ps_pred->u2_dst_stride;
+ u2_dst_wd = ps_pred->u2_dst_stride;
+ u2_dest_wd_y = u2_dst_wd;
+ ps_pred_y_forw = ps_pred;
+ }
+ else
+ {
+ pu1_dest_y = pu1_temp_mc_buffer;
+ u2_dst_wd = MB_SIZE;
+ u2_dest_wd_y = u2_dst_wd;
+ ps_pred_y_back = ps_pred;
+ ps_pred_y_back->pu1_rec_y_u = pu1_dest_y;
+ }
+
+ /* padding on demand (POD) for y done here */
+
+ if(ps_pred->i1_pod_ht)
+ {
+ if(ps_pred->i1_pod_ht < 0)
+ {
+ pu1_dma_dst =
+ pu1_dma_dst
+ - (ps_pred->i1_pod_ht
+ * ps_pred->u2_u1_ref_buf_wd);
+ }
+ ih264d_copy_2d1d(pu1_pred, pu1_dma_dst, u2_frm_wd, u1_dma_wd,
+ u1_dma_ht);
+ ih264d_pad_on_demand(ps_pred, LUM_BLK);
+ }
+ ps_dec->apf_inter_pred_luma[ps_pred->u1_dydx](puc_ref, pu1_dest_y,
+ u2_ref_wd_y,
+ u2_dst_wd,
+ u4_ht_y,
+ u4_wd_y,
+ puc_pred0,
+ ps_pred->u1_dydx);
+ ps_pred++;
+
+ /* Interpolate samples for the chroma components */
+ {
+ UWORD8 *pu1_ref_u;
+ UWORD32 u1_dma_ht;
+
+ /* padding on demand (POD) for U and V done here */
+ u1_dma_ht = ps_pred->i1_dma_ht;
+
+ if(ps_pred->i1_pod_ht)
+ {
+ pu1_pred = ps_pred->pu1_pred_u;
+ pu1_dma_dst = ps_pred->pu1_dma_dest_addr;
+ u1_dma_ht = ps_pred->u1_dma_ht_uv;
+ u1_dma_wd = ps_pred->u1_dma_wd_uv * YUV420SP_FACTOR;
+ u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld;
+ if(ps_pred->i1_pod_ht < 0)
+ {
+ /*Top POD*/
+ pu1_dma_dst -= (ps_pred->i1_pod_ht
+ * ps_pred->u2_u1_ref_buf_wd
+ * YUV420SP_FACTOR);
+ }
+
+ ih264d_copy_2d1d(pu1_pred, pu1_dma_dst, u2_frm_wd,
+ u1_dma_wd, u1_dma_ht);
+
+ pu1_dma_dst += (ps_pred->i1_dma_ht
+ * ps_pred->u2_u1_ref_buf_wd);
+ pu1_pred = ps_pred->pu1_pred_v;
+
+ ih264d_pad_on_demand(ps_pred, CHROM_BLK);
+ }
+
+ if(ps_pred->i1_pod_ht)
+ {
+ pu1_ref_u = ps_pred->pu1_dma_dest_addr;
+
+ u2_ref_wd_uv = ps_pred->u2_u1_ref_buf_wd
+ * YUV420SP_FACTOR;
+ }
+ else
+ {
+ u2_ref_wd_uv = ps_pred->u2_frm_wd;
+ pu1_ref_u = ps_pred->pu1_u_ref;
+
+ }
+
+ u4_wd_uv = ps_pred->i1_mb_partwidth;
+ u4_ht_uv = ps_pred->i1_mb_partheight;
+ uc_dx = ps_pred->u1_dydx; /* 8*dy + dx */
+ uc_dy = uc_dx >> 3;
+ uc_dx &= 0x7;
+ if(u1_dir == 0)
+ {
+ pu1_dest_u = ps_pred->pu1_rec_y_u;
+
+ pu1_dest_v = ps_pred->u1_pi1_wt_ofst_rec_v;
+ u2_row_buf_wd_uv = ps_pred->u2_dst_stride;
+ u2_dst_wd = ps_pred->u2_dst_stride;
+ u2_dest_wd_uv = u2_dst_wd;
+ ps_pred_cr_forw = ps_pred;
+ }
+ else
+ {
+ pu1_dest_u = puc_pred0;
+
+ pu1_dest_v = puc_pred1;
+ u2_dest_wd_uv = BUFFER_WIDTH;
+ u2_dst_wd = BUFFER_WIDTH;
+ ps_pred->pu1_rec_y_u = pu1_dest_u;
+ ps_pred->u1_pi1_wt_ofst_rec_v = pu1_dest_v;
+ }
+
+ ps_pred++;
+ ps_dec->pf_inter_pred_chroma(pu1_ref_u, pu1_dest_u,
+ u2_ref_wd_uv, u2_dst_wd,
+ uc_dx, uc_dy, u4_ht_uv,
+ u4_wd_uv);
+
+ if(ps_cur_mb_info->u1_Mux == 1)
+ {
+ /******************************************************************/
+ /* padding on demand (POD) for U and V done here */
+ /* ps_pred now points to the Y entry of the 0,0 component */
+ /* Y need not be checked for POD because Y lies within */
+ /* the picture((0,0) mv for Y doesnot get changed. But (0,0) for */
+ /* U and V can need POD beacause of cross-field mv adjustments */
+ /* (Table 8-9 of standard) */
+ /******************************************************************/
+ if((ps_pred + 1)->i1_pod_ht)
+
+ {
+
+ pu1_pred = (ps_pred + 1)->pu1_pred_u;
+ pu1_dma_dst = (ps_pred + 1)->pu1_dma_dest_addr;
+ u1_dma_ht = (ps_pred + 1)->u1_dma_ht_uv;
+ u1_dma_wd = (ps_pred + 1)->u1_dma_wd_uv
+ * YUV420SP_FACTOR;
+ u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld;
+ if((ps_pred + 1)->i1_pod_ht < 0)
+ {
+ /*Top POD*/
+ pu1_dma_dst -= ((ps_pred + 1)->i1_pod_ht
+ * (ps_pred + 1)->u2_u1_ref_buf_wd
+ * YUV420SP_FACTOR);
+ }
+ ih264d_copy_2d1d(pu1_pred, pu1_dma_dst, u2_frm_wd,
+ u1_dma_wd, u1_dma_ht);
+ pu1_dma_dst += ((ps_pred + 1)->i1_dma_ht
+ * (ps_pred + 1)->u2_u1_ref_buf_wd); //(u1_dma_ht * u1_dma_wd);//
+ pu1_pred = (ps_pred + 1)->pu1_pred_v;
+ ih264d_pad_on_demand(ps_pred + 1, CHROM_BLK);
+
+ }
+
+ ih264d_multiplex_ref_data(ps_dec, ps_pred, pu1_dest_y,
+ pu1_dest_u, pu1_dest_v, ps_cur_mb_info,
+ u2_dest_wd_y, u2_dest_wd_uv,
+ u1_dir);
+ ps_pred += 2;
+ }
+ }
+ }
+ if(u1_dir != 0)
+ u2_ref_wd_y = MB_SIZE;
+
+ u2_num_pels += u4_wd_y * u4_ht_y;
+ /* if BI_DIRECT, average the two pred's, and put in ..PredBuffer[0] */
+ if((u1_is_bi_direct != 0) || (ps_pred_y_forw->u1_wght_pred_type != 0))
+ {
+
+ switch(ps_pred_y_forw->u1_wght_pred_type)
+ {
+ case 0:
+ ps_dec->pf_default_weighted_pred_luma(
+ ps_pred_y_forw->pu1_rec_y_u, pu1_dest_y,
+ ps_pred_y_forw->pu1_rec_y_u,
+ u2_row_buf_wd_y, u2_ref_wd_y,
+ u2_row_buf_wd_y, u4_ht_uv * 2,
+ u4_wd_uv * 2);
+
+ ps_dec->pf_default_weighted_pred_chroma(
+ ps_pred_cr_forw->pu1_rec_y_u, pu1_dest_u,
+ ps_pred_cr_forw->pu1_rec_y_u,
+ u2_row_buf_wd_uv, u2_dst_wd,
+ u2_row_buf_wd_uv, u4_ht_uv,
+ u4_wd_uv);
+
+ break;
+ case 1:
+ {
+ UWORD32 *pu4_weight_ofst =
+ (UWORD32*)ps_pred_y_forw->u1_pi1_wt_ofst_rec_v;
+ UWORD32 u4_wt_ofst_u, u4_wt_ofst_v;
+ UWORD32 u4_wt_ofst_y =
+ (UWORD32)(pu4_weight_ofst[0]);
+ WORD32 weight = (WORD16)(u4_wt_ofst_y & 0xffff);
+ WORD32 ofst = (WORD8)(u4_wt_ofst_y >> 16);
+
+ ps_dec->pf_weighted_pred_luma(ps_pred_y_forw->pu1_rec_y_u,
+ ps_pred_y_forw->pu1_rec_y_u,
+ u2_row_buf_wd_y,
+ u2_row_buf_wd_y,
+ (u2_log2Y_crwd & 0x0ff),
+ weight, ofst, u4_ht_y,
+ u4_wd_y);
+
+ u4_wt_ofst_u = (UWORD32)(pu4_weight_ofst[2]);
+ u4_wt_ofst_v = (UWORD32)(pu4_weight_ofst[4]);
+ weight = ((u4_wt_ofst_v & 0xffff) << 16)
+ | (u4_wt_ofst_u & 0xffff);
+ ofst = ((u4_wt_ofst_v >> 16) << 8)
+ | ((u4_wt_ofst_u >> 16) & 0xFF);
+
+ ps_dec->pf_weighted_pred_chroma(
+ ps_pred_cr_forw->pu1_rec_y_u,
+ ps_pred_cr_forw->pu1_rec_y_u,
+ u2_row_buf_wd_uv, u2_row_buf_wd_uv,
+ (u2_log2Y_crwd >> 8), weight, ofst,
+ u4_ht_y >> 1, u4_wd_y >> 1);
+ }
+
+ break;
+ case 2:
+ {
+ UWORD32 *pu4_weight_ofst =
+ (UWORD32*)ps_pred_y_forw->u1_pi1_wt_ofst_rec_v;
+ UWORD32 u4_wt_ofst_u, u4_wt_ofst_v;
+ UWORD32 u4_wt_ofst_y;
+ WORD32 weight1, weight2;
+ WORD32 ofst1, ofst2;
+
+ u4_wt_ofst_y = (UWORD32)(pu4_weight_ofst[0]);
+
+ weight1 = (WORD16)(u4_wt_ofst_y & 0xffff);
+ ofst1 = (WORD8)(u4_wt_ofst_y >> 16);
+
+ u4_wt_ofst_y = (UWORD32)(pu4_weight_ofst[1]);
+ weight2 = (WORD16)(u4_wt_ofst_y & 0xffff);
+ ofst2 = (WORD8)(u4_wt_ofst_y >> 16);
+
+ ps_dec->pf_weighted_bi_pred_luma(ps_pred_y_forw->pu1_rec_y_u,
+ ps_pred_y_back->pu1_rec_y_u,
+ ps_pred_y_forw->pu1_rec_y_u,
+ u2_row_buf_wd_y,
+ u2_ref_wd_y,
+ u2_row_buf_wd_y,
+ (u2_log2Y_crwd & 0x0ff),
+ weight1, weight2, ofst1,
+ ofst2, u4_ht_y,
+ u4_wd_y);
+
+ u4_wt_ofst_u = (UWORD32)(pu4_weight_ofst[2]);
+ u4_wt_ofst_v = (UWORD32)(pu4_weight_ofst[4]);
+ weight1 = ((u4_wt_ofst_v & 0xffff) << 16)
+ | (u4_wt_ofst_u & 0xffff);
+ ofst1 = ((u4_wt_ofst_v >> 16) << 8)
+ | ((u4_wt_ofst_u >> 16) & 0xFF);
+
+ u4_wt_ofst_u = (UWORD32)(pu4_weight_ofst[3]);
+ u4_wt_ofst_v = (UWORD32)(pu4_weight_ofst[5]);
+ weight2 = ((u4_wt_ofst_v & 0xffff) << 16)
+ | (u4_wt_ofst_u & 0xffff);
+ ofst2 = ((u4_wt_ofst_v >> 16) << 8)
+ | ((u4_wt_ofst_u >> 16) & 0xFF);
+
+ ps_dec->pf_weighted_bi_pred_chroma(
+ (ps_pred_y_forw + 1)->pu1_rec_y_u,
+ (ps_pred_y_back + 1)->pu1_rec_y_u,
+ (ps_pred_y_forw + 1)->pu1_rec_y_u,
+ u2_row_buf_wd_uv, u2_dst_wd,
+ u2_row_buf_wd_uv, (u2_log2Y_crwd >> 8),
+ weight1, weight2, ofst1, ofst2,
+ u4_ht_y >> 1, u4_wd_y >> 1);
+ }
+
+ break;
+ }
+
+ }
+ }
+}
+
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_multiplex_ref_data \endif
+ *
+ * \brief
+ * Initializes forward and backward refernce lists for B slice decoding.
+ *
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+
+void ih264d_multiplex_ref_data(dec_struct_t * ps_dec,
+ pred_info_t *ps_pred,
+ UWORD8* pu1_dest_y,
+ UWORD8* pu1_dest_u,
+ UWORD8* pu1_dest_v,
+ dec_mb_info_t *ps_cur_mb_info,
+ UWORD16 u2_dest_wd_y,
+ UWORD16 u2_dest_wd_uv,
+ UWORD8 u1_dir)
+{
+ UWORD16 u2_mask = ps_cur_mb_info->u2_mask[u1_dir];
+ UWORD8 *pu1_ref_y, *pu1_ref_u, *pu1_ref_v;
+ UWORD8 uc_cond, i, j, u1_dydx;
+ UWORD16 u2_ref_wd_y, u2_ref_wd_uv;
+
+ PROFILE_DISABLE_INTER_PRED()
+
+ if(ps_pred->i1_pod_ht)
+ {
+ pu1_ref_y = ps_pred->pu1_dma_dest_addr + ps_pred->u1_mc_addr_ofst;
+
+ u2_ref_wd_y = ps_pred->u2_u1_ref_buf_wd;
+ }
+ else
+ {
+ pu1_ref_y = ps_pred->pu1_y_ref + ps_pred->u1_mc_addr_ofst;
+ u2_ref_wd_y = ps_pred->u2_frm_wd;
+ }
+
+ ps_pred++;
+ if(ps_pred->i1_pod_ht)
+ {
+ pu1_ref_u = ps_pred->pu1_dma_dest_addr + ps_pred->u1_mc_addr_ofst;
+ pu1_ref_v = pu1_ref_u + ps_pred->u2_u1_ref_buf_wd * ps_pred->i1_dma_ht;
+ u2_ref_wd_uv = ps_pred->u2_u1_ref_buf_wd * YUV420SP_FACTOR;
+
+ }
+ else
+ {
+ pu1_ref_u = ps_pred->pu1_u_ref + ps_pred->u1_mc_addr_ofst;
+ pu1_ref_v = ps_pred->pu1_v_ref + ps_pred->u1_mc_addr_ofst;
+ u2_ref_wd_uv = ps_pred->u2_frm_wd;
+
+ }
+
+ u1_dydx = ps_pred->u1_dydx;
+
+ {
+ UWORD8 uc_dx, uc_dy;
+ UWORD8 *pu1_scratch_v, *pu1_scratch_u;
+
+ uc_dx = u1_dydx & 0x3;
+ uc_dy = u1_dydx >> 3;
+ if(u1_dydx != 0)
+ {
+ pred_info_t * ps_prv_pred = ps_pred - 2;
+ pu1_scratch_u = ps_prv_pred->pu1_dma_dest_addr
+ + ps_prv_pred->u1_mc_addr_ofst;
+ pu1_scratch_v = pu1_scratch_u
+ + ps_prv_pred->u2_u1_ref_buf_wd
+ * ps_prv_pred->i1_dma_ht;
+ ps_dec->pf_inter_pred_chroma(pu1_ref_u, pu1_scratch_u,
+ u2_ref_wd_uv, 16, uc_dx, uc_dy, 8,
+ 8);
+
+ /* Modify ref pointer and refWidth to point to scratch */
+ /* buffer to be used below in ih264d_copy_multiplex_data functions */
+ /* CHANGED CODE */
+ pu1_ref_u = pu1_scratch_u;
+ pu1_ref_v = pu1_scratch_v;
+ u2_ref_wd_uv = 8 * YUV420SP_FACTOR;
+ }
+ }
+ {
+ for(i = 0; i < 4; i++)
+ {
+ for(j = 0; j < 4; j++)
+ {
+
+ uc_cond = u2_mask & 1;
+ u2_mask >>= 1;
+ if(uc_cond)
+ {
+ *(UWORD32 *)(pu1_dest_y + u2_dest_wd_y) =
+ *(UWORD32 *)(pu1_ref_y + u2_ref_wd_y);
+ *(UWORD32 *)(pu1_dest_y + 2 * u2_dest_wd_y) =
+ *(UWORD32 *)(pu1_ref_y + 2 * u2_ref_wd_y);
+ *(UWORD32 *)(pu1_dest_y + 3 * u2_dest_wd_y) =
+ *(UWORD32 *)(pu1_ref_y + 3 * u2_ref_wd_y);
+ {
+ UWORD32 *dst, *src;
+ dst = (UWORD32 *)pu1_dest_y;
+ src = (UWORD32 *)pu1_ref_y;
+ *dst = *src;
+ dst++;
+ src++;
+ pu1_dest_y = (UWORD8 *)dst;
+ pu1_ref_y = (UWORD8 *)src;
+ }
+ *(UWORD32 *)(pu1_dest_u + u2_dest_wd_uv) =
+ *(UWORD32 *)(pu1_ref_u + u2_ref_wd_uv);
+ {
+ UWORD32 *dst, *src;
+ dst = (UWORD32 *)pu1_dest_u;
+ src = (UWORD32 *)pu1_ref_u;
+ *dst = *src;
+ dst++;
+ src++;
+ pu1_dest_u = (UWORD8 *)dst;
+ pu1_ref_u = (UWORD8 *)src;
+ }
+
+ }
+ else
+ {
+ pu1_dest_y += 4;
+ pu1_ref_y += 4;
+ pu1_dest_u += 2 * YUV420SP_FACTOR;
+ pu1_ref_u += 2 * YUV420SP_FACTOR;
+ pu1_dest_v += 2;
+ pu1_ref_v += 2;
+ }
+ }
+ pu1_ref_y += 4 * (u2_ref_wd_y - 4);
+ pu1_ref_u += 2 * (u2_ref_wd_uv - 4 * YUV420SP_FACTOR);
+ pu1_ref_v += 2 * (u2_ref_wd_uv - 4);
+ pu1_dest_y += 4 * (u2_dest_wd_y - 4);
+ pu1_dest_u += 2 * (u2_dest_wd_uv - 4 * YUV420SP_FACTOR);
+ pu1_dest_v += 2 * (u2_dest_wd_uv - 4);
+ }
+ }
+}
+
+void ih264d_pad_on_demand(pred_info_t *ps_pred, UWORD8 lum_chrom_blk)
+{
+ if(CHROM_BLK == lum_chrom_blk)
+ {
+ UWORD32 *pu4_pod_src_u, *pu4_pod_dst_u;
+ UWORD32 *pu4_pod_src_v, *pu4_pod_dst_v;
+ WORD32 j, u1_wd_stride;
+ WORD32 i, u1_dma_ht, i1_ht;
+ UWORD32 u2_dma_size;
+ u1_wd_stride = (ps_pred->u2_u1_ref_buf_wd >> 2) * YUV420SP_FACTOR;
+ u1_dma_ht = ps_pred->i1_dma_ht;
+ u2_dma_size = u1_wd_stride * u1_dma_ht;
+ pu4_pod_src_u = (UWORD32 *)ps_pred->pu1_dma_dest_addr;
+ pu4_pod_dst_u = pu4_pod_src_u;
+
+ pu4_pod_src_v = pu4_pod_src_u + u2_dma_size;
+ pu4_pod_dst_v = pu4_pod_src_v;
+
+ i1_ht = ps_pred->i1_pod_ht;
+ pu4_pod_src_u -= u1_wd_stride * i1_ht;
+ pu4_pod_src_v -= u1_wd_stride * i1_ht;
+ if(i1_ht < 0)
+ /* Top POD */
+ i1_ht = -i1_ht;
+ else
+ {
+ /* Bottom POD */
+ pu4_pod_src_u += (u1_dma_ht - 1) * u1_wd_stride;
+ pu4_pod_dst_u += (u1_dma_ht - i1_ht) * u1_wd_stride;
+ pu4_pod_src_v += (u1_dma_ht - 1) * u1_wd_stride;
+ pu4_pod_dst_v += (u1_dma_ht - i1_ht) * u1_wd_stride;
+ }
+
+ for(i = 0; i < i1_ht; i++)
+ for(j = 0; j < u1_wd_stride; j++)
+ {
+ *pu4_pod_dst_u++ = *(pu4_pod_src_u + j);
+
+ }
+ }
+ else
+ {
+ UWORD32 *pu4_pod_src, *pu4_pod_dst;
+ WORD32 j, u1_wd_stride;
+ WORD32 i, i1_ht;
+ pu4_pod_src = (UWORD32 *)ps_pred->pu1_dma_dest_addr;
+ pu4_pod_dst = pu4_pod_src;
+ u1_wd_stride = ps_pred->u2_u1_ref_buf_wd >> 2;
+ i1_ht = ps_pred->i1_pod_ht;
+ pu4_pod_src -= u1_wd_stride * i1_ht;
+ if(i1_ht < 0)
+ /* Top POD */
+ i1_ht = -i1_ht;
+ else
+ {
+ /* Bottom POD */
+ pu4_pod_src += (ps_pred->i1_dma_ht - 1) * u1_wd_stride;
+ pu4_pod_dst += (ps_pred->i1_dma_ht - i1_ht) * u1_wd_stride;
+ }
+
+ for(i = 0; i < i1_ht; i++)
+ for(j = 0; j < u1_wd_stride; j++)
+ *pu4_pod_dst++ = *(pu4_pod_src + j);
+ }
+}
+
diff --git a/decoder/ih264d_inter_pred.h b/decoder/ih264d_inter_pred.h
new file mode 100755
index 0000000..52d648a
--- /dev/null
+++ b/decoder/ih264d_inter_pred.h
@@ -0,0 +1,93 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _IH264D_INTER_PRED_H_
+#define _IH264D_INTER_PRED_H_
+
+/*!
+ **************************************************************************
+ * \file ih264d_inter_pred.h
+ *
+ * \brief
+ * Decalaration for routines defined in MorionCompensate.c
+ *
+ * Detailed_description
+ *
+ * \date
+ * creation_date
+ *
+ * \author Arvind Raman
+ **************************************************************************
+ */
+
+#include "ih264d_structs.h"
+
+#define BUFFER_WIDTH 16
+/*!
+ **************************************************************************
+ * \brief PRED_BUFFER_WIDTH / HEIGHT
+ *
+ * Width and height of the 16 bit (also reused a 2 8 bits buffers). The
+ * required dimensions for these buffers are 21x21, however to align the
+ * start of every row to a WORD aligned boundary the width has been increased
+ * to 24.
+ **************************************************************************
+ */
+//#define PRED_BUFFER_WIDTH 24
+//#define PRED_BUFFER_HEIGHT 21
+#define PRED_BUFFER_WIDTH 24*2
+#define PRED_BUFFER_HEIGHT 24*2
+
+void ih264d_fill_pred_info(WORD16 *pi2_mv,WORD32 part_width,WORD32 part_height, WORD32 sub_mb_num,
+ WORD32 pred_dir,pred_info_pkd_t *ps_pred_pkd,WORD8 i1_buf_id,
+ WORD8 i1_ref_idx,UWORD32 *pu4_wt_offset,UWORD8 u1_pic_type);
+
+WORD32 ih264d_form_mb_part_info_bp(pred_info_pkd_t *ps_pred_pkd,
+ dec_struct_t * ps_dec,
+ UWORD16 u2_mb_x,
+ UWORD16 u2_mb_y,
+ WORD32 mb_index,
+ dec_mb_info_t *ps_cur_mb_info);
+
+WORD32 ih264d_form_mb_part_info_mp(pred_info_pkd_t *ps_pred_pkd,
+ dec_struct_t * ps_dec,
+ UWORD16 u2_mb_x,
+ UWORD16 u2_mb_y,
+ WORD32 mb_index,
+ dec_mb_info_t *ps_cur_mb_info);
+
+
+void ih264d_motion_compensate_bp(dec_struct_t * ps_dec, dec_mb_info_t *ps_cur_mb_info);
+void ih264d_motion_compensate_mp(dec_struct_t * ps_dec, dec_mb_info_t *ps_cur_mb_info);
+
+
+void TransferRefBuffs(dec_struct_t *ps_dec);
+
+void ih264d_multiplex_ref_data(dec_struct_t * ps_dec,
+ pred_info_t *ps_pred,
+ UWORD8* pu1_dest_y,
+ UWORD8* pu1_dest_u,
+ UWORD8* pu1_dest_v,
+ dec_mb_info_t *ps_cur_mb_info,
+ UWORD16 u2_dest_wd_y,
+ UWORD16 u2_dest_wd_uv,
+ UWORD8 u1_dir);
+#endif /* _IH264D_INTER_PRED_H_ */
+
diff --git a/decoder/ih264d_mb_utils.c b/decoder/ih264d_mb_utils.c
new file mode 100755
index 0000000..4cbfca5
--- /dev/null
+++ b/decoder/ih264d_mb_utils.c
@@ -0,0 +1,1496 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+ **************************************************************************
+ * \file ih264d_mb_utils.c
+ *
+ * \brief
+ * Contains utitlity functions needed for Macroblock decoding
+ *
+ * \date
+ * 18/12/2002
+ *
+ * \author AI
+ **************************************************************************
+ */
+#include <string.h>
+#include <stdlib.h>
+#include "ih264d_bitstrm.h"
+#include "ih264d_defs.h"
+#include "ih264d_debug.h"
+#include "ih264d_structs.h"
+#include "ih264d_defs.h"
+#include "ih264d_mb_utils.h"
+#include "ih264d_parse_slice.h"
+#include "ih264d_error_handler.h"
+#include "ih264d_parse_mb_header.h"
+#include "ih264d_cabac.h"
+#include "ih264d_defs.h"
+#include "ih264d_tables.h"
+
+/*****************************************************************************/
+/* */
+/* Function Name : get_mb_info_cavlc */
+/* */
+/* Description : This function sets the following information of cur MB */
+/* (a) mb_x and mb_y */
+/* (b) Neighbour availablity */
+/* (c) Macroblock location in the frame buffer */
+/* (e) For mbaff predicts field/frame u4_flag for topMb */
+/* and sets the field/frame for botMb. This is */
+/* written in ps_dec->u1_cur_mb_fld_dec_flag */
+/* */
+/* Inputs : pointer to decstruct */
+/* pointer to current mb info */
+/* currentMbaddress */
+/* */
+/* Processing : leftMb and TopMb params are used by DecMbskip and */
+/* DecCtxMbfield modules so that these modules do not */
+/* check for neigbour availability and then find the */
+/* neigbours for context increments */
+/* */
+/* Returns : OK */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 13 07 2002 Jay Draft */
+/* */
+/*****************************************************************************/
+
+UWORD32 ih264d_get_mb_info_cavlc_nonmbaff(dec_struct_t *ps_dec,
+ const UWORD16 u2_cur_mb_address,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD32 u4_mbskip_run)
+{
+ UWORD16 u2_mb_x;
+ UWORD16 u2_mb_y;
+ UWORD8 u1_mb_ngbr_avail = 0;
+ UWORD16 u2_frm_width_in_mb = ps_dec->u2_frm_wd_in_mbs;
+ WORD16 i2_prev_slice_mbx = ps_dec->i2_prev_slice_mbx;
+ UWORD16 u2_top_right_mask = TOP_RIGHT_DEFAULT_AVAILABLE;
+ UWORD16 u2_top_left_mask = TOP_LEFT_DEFAULT_AVAILABLE;
+ UNUSED(u4_mbskip_run);
+ /*--------------------------------------------------------------------*/
+ /* Calculate values of mb_x and mb_y */
+ /*--------------------------------------------------------------------*/
+ u2_mb_x = ps_dec->u2_mbx;
+ u2_mb_y = ps_dec->u2_mby;
+
+ if(ps_dec->u1_separate_parse)
+ {
+ ps_dec->u2_cur_mb_addr = u2_cur_mb_address;
+ }
+ u2_mb_x++;
+
+ if(u2_mb_x == u2_frm_width_in_mb)
+ {
+ u2_mb_x = 0;
+ u2_mb_y++;
+ }
+ if(u2_mb_y > ps_dec->i2_prev_slice_mby)
+ {
+ /* if not in the immemdiate row of prev slice end then top
+ will be available */
+ if(u2_mb_y > (ps_dec->i2_prev_slice_mby + 1))
+ i2_prev_slice_mbx = -1;
+
+ if(u2_mb_x > i2_prev_slice_mbx)
+ {
+ u1_mb_ngbr_avail |= TOP_MB_AVAILABLE_MASK;
+ u2_top_right_mask |= TOP_RIGHT_TOP_AVAILABLE;
+ u2_top_left_mask |= TOP_LEFT_TOP_AVAILABLE;
+ }
+
+ if((u2_mb_x > (i2_prev_slice_mbx - 1))
+ && (u2_mb_x != (u2_frm_width_in_mb - 1)))
+ {
+ u1_mb_ngbr_avail |= TOP_RIGHT_MB_AVAILABLE_MASK;
+ u2_top_right_mask |= TOP_RIGHT_TOPR_AVAILABLE;
+ }
+
+ if(u2_mb_x > (i2_prev_slice_mbx + 1))
+ {
+ u1_mb_ngbr_avail |= TOP_LEFT_MB_AVAILABLE_MASK;
+ u2_top_left_mask |= TOP_LEFT_TOPL_AVAILABLE;
+ }
+
+ /* Next row Left will be available*/
+ i2_prev_slice_mbx = -1;
+ }
+
+ /* Same row */
+ if(u2_mb_x > (i2_prev_slice_mbx + 1))
+ {
+ u1_mb_ngbr_avail |= LEFT_MB_AVAILABLE_MASK;
+ u2_top_left_mask |= TOP_LEFT_LEFT_AVAILABLE;
+ }
+
+ {
+ mb_neigbour_params_t *ps_cur_mb_row = ps_dec->ps_cur_mb_row;
+ mb_neigbour_params_t *ps_top_mb_row = ps_dec->ps_top_mb_row;
+
+ /* copy the parameters of topleft Mb */
+ ps_cur_mb_info->u1_topleft_mbtype = ps_dec->u1_topleft_mbtype;
+ /* Neighbour pointer assignments*/
+ ps_cur_mb_info->ps_curmb = ps_cur_mb_row + u2_mb_x;
+ ps_cur_mb_info->ps_left_mb = ps_cur_mb_row + u2_mb_x - 1;
+ ps_cur_mb_info->ps_top_mb = ps_top_mb_row + u2_mb_x;
+ ps_cur_mb_info->ps_top_right_mb = ps_top_mb_row + u2_mb_x + 1;
+
+ /* Update the parameters of topleftmb*/
+ ps_dec->u1_topleft_mbtype = ps_cur_mb_info->ps_top_mb->u1_mb_type;
+ }
+
+ ps_dec->u2_mby = u2_mb_y;
+ ps_dec->u2_mbx = u2_mb_x;
+ ps_cur_mb_info->u2_mbx = u2_mb_x;
+ ps_cur_mb_info->u2_mby = u2_mb_y;
+ ps_cur_mb_info->u1_topmb = 1;
+ ps_dec->i4_submb_ofst += SUB_BLK_SIZE;
+ ps_dec->u1_mb_ngbr_availablity = u1_mb_ngbr_avail;
+ ps_cur_mb_info->u1_mb_ngbr_availablity = u1_mb_ngbr_avail;
+ ps_cur_mb_info->ps_curmb->u1_mb_fld = ps_dec->u1_cur_mb_fld_dec_flag;
+ ps_cur_mb_info->u1_mb_field_decodingflag = ps_dec->u1_cur_mb_fld_dec_flag;
+ ps_cur_mb_info->u2_top_left_avail_mask = u2_top_left_mask;
+ ps_cur_mb_info->u2_top_right_avail_mask = u2_top_right_mask;
+ return (OK);
+
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : get_mb_info_cavlc */
+/* */
+/* Description : This function sets the following information of cur MB */
+/* (a) mb_x and mb_y */
+/* (b) Neighbour availablity */
+/* (c) Macroblock location in the frame buffer */
+/* (e) For mbaff predicts field/frame u4_flag for topMb */
+/* and sets the field/frame for botMb. This is */
+/* written in ps_dec->u1_cur_mb_fld_dec_flag */
+/* */
+/* Inputs : pointer to decstruct */
+/* pointer to current mb info */
+/* currentMbaddress */
+/* */
+/* Processing : leftMb and TopMb params are used by DecMbskip and */
+/* DecCtxMbfield modules so that these modules do not */
+/* check for neigbour availability and then find the */
+/* neigbours for context increments */
+/* */
+/* Returns : OK */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 13 07 2002 Jay Draft */
+/* */
+/*****************************************************************************/
+
+UWORD32 ih264d_get_mb_info_cavlc_mbaff(dec_struct_t *ps_dec,
+ const UWORD16 u2_cur_mb_address,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD32 u4_mbskip_run)
+{
+ UWORD16 u2_mb_x;
+ UWORD16 u2_mb_y;
+ UWORD8 u1_mb_ngbr_avail = 0;
+ UWORD16 u2_frm_width_in_mb = ps_dec->u2_frm_wd_in_mbs;
+
+ UWORD8 u1_top_mb = 1 - (u2_cur_mb_address & 0x01);
+ WORD16 i2_prev_slice_mbx = ps_dec->i2_prev_slice_mbx;
+ UWORD8 u1_cur_mb_field = 0;
+ UWORD16 u2_top_right_mask = TOP_RIGHT_DEFAULT_AVAILABLE;
+ UWORD16 u2_top_left_mask = TOP_LEFT_DEFAULT_AVAILABLE;
+
+ /*--------------------------------------------------------------------*/
+ /* Calculate values of mb_x and mb_y */
+ /*--------------------------------------------------------------------*/
+ u2_mb_x = ps_dec->u2_mbx;
+ u2_mb_y = ps_dec->u2_mby;
+
+ if(ps_dec->u1_separate_parse)
+ {
+ ps_dec->u2_cur_mb_addr = u2_cur_mb_address;
+ }
+
+
+ if(u1_top_mb)
+ {
+ u2_mb_x++;
+ if(u2_mb_x == u2_frm_width_in_mb)
+ {
+ u2_mb_x = 0;
+ u2_mb_y += 2;
+ }
+ if(u2_mb_y > ps_dec->i2_prev_slice_mby)
+ {
+ /* if not in the immemdiate row of prev slice end then top
+ will be available */
+ if(u2_mb_y > (ps_dec->i2_prev_slice_mby + 2))
+ i2_prev_slice_mbx = -1;
+ if(u2_mb_x > i2_prev_slice_mbx)
+ {
+ u1_mb_ngbr_avail |= TOP_MB_AVAILABLE_MASK;
+ u1_cur_mb_field = ps_dec->ps_top_mb_row[u2_mb_x << 1].u1_mb_fld;
+ u2_top_right_mask |= TOP_RIGHT_TOP_AVAILABLE;
+ u2_top_left_mask |= TOP_LEFT_TOP_AVAILABLE;
+ }
+ if((u2_mb_x > (i2_prev_slice_mbx - 1))
+ && (u2_mb_x != (u2_frm_width_in_mb - 1)))
+ {
+ u1_mb_ngbr_avail |= TOP_RIGHT_MB_AVAILABLE_MASK;
+ u2_top_right_mask |= TOP_RIGHT_TOPR_AVAILABLE;
+ }
+
+ if(u2_mb_x > (i2_prev_slice_mbx + 1))
+ {
+ u1_mb_ngbr_avail |= TOP_LEFT_MB_AVAILABLE_MASK;
+ u2_top_left_mask |= TOP_LEFT_TOPL_AVAILABLE;
+ }
+
+ i2_prev_slice_mbx = -1;
+ }
+ /* Same row */
+ if(u2_mb_x > (i2_prev_slice_mbx + 1))
+ {
+ u1_mb_ngbr_avail |= LEFT_MB_AVAILABLE_MASK;
+ u1_cur_mb_field =
+ ps_dec->ps_cur_mb_row[(u2_mb_x << 1) - 1].u1_mb_fld;
+ u2_top_left_mask |= TOP_LEFT_LEFT_AVAILABLE;
+ }
+ /* Read u1_cur_mb_field from the bitstream if u4_mbskip_run <= 1*/
+ if(u4_mbskip_run <= 1)
+ u1_cur_mb_field = (UWORD8)ih264d_get_bit_h264(ps_dec->ps_bitstrm);
+
+ ps_dec->u1_cur_mb_fld_dec_flag = u1_cur_mb_field;
+ ps_dec->u2_top_left_mask = u2_top_left_mask;
+ ps_dec->u2_top_right_mask = u2_top_right_mask;
+ }
+ else
+ {
+ u1_mb_ngbr_avail = ps_dec->u1_mb_ngbr_availablity;
+ u1_cur_mb_field = ps_dec->u1_cur_mb_fld_dec_flag;
+ u2_top_left_mask = ps_dec->u2_top_left_mask;
+ u2_top_right_mask = ps_dec->u2_top_right_mask;
+
+ if(!u1_cur_mb_field)
+ {
+ /* Top is available */
+ u1_mb_ngbr_avail |= TOP_MB_AVAILABLE_MASK;
+ u2_top_right_mask |= TOP_RIGHT_TOP_AVAILABLE;
+ u2_top_left_mask |= TOP_LEFT_TOP_AVAILABLE;
+ /* Top Right not available */
+ u1_mb_ngbr_avail &= TOP_RT_SUBBLOCK_MASK_MOD;
+ u2_top_right_mask &= (~TOP_RIGHT_TOPR_AVAILABLE);
+
+ if(u1_mb_ngbr_avail & LEFT_MB_AVAILABLE_MASK)
+ {
+ u1_mb_ngbr_avail |= TOP_LEFT_MB_AVAILABLE_MASK;
+ u2_top_left_mask |= TOP_LEFT_LEFT_AVAILABLE;
+ u2_top_left_mask |= TOP_LEFT_TOPL_AVAILABLE;
+ }
+ }
+ }
+
+ ps_dec->u2_mby = u2_mb_y;
+ ps_dec->u2_mbx = u2_mb_x;
+ ps_cur_mb_info->u2_mbx = u2_mb_x;
+ ps_cur_mb_info->u2_mby = u2_mb_y;
+ ps_cur_mb_info->u1_topmb = u1_top_mb;
+ ps_dec->i4_submb_ofst += SUB_BLK_SIZE;
+ ps_dec->u1_mb_ngbr_availablity = u1_mb_ngbr_avail;
+ ps_cur_mb_info->u1_mb_ngbr_availablity = u1_mb_ngbr_avail;
+ ps_cur_mb_info->u1_mb_field_decodingflag = u1_cur_mb_field;
+ ps_cur_mb_info->u2_top_left_avail_mask = u2_top_left_mask;
+ ps_cur_mb_info->u2_top_right_avail_mask = u2_top_right_mask;
+ ih264d_get_mbaff_neighbours(ps_dec, ps_cur_mb_info, u1_cur_mb_field);
+ return (OK);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : get_mb_info_cabac */
+/* */
+/* Description : This function sets the following information of cur MB */
+/* (a) mb_x and mb_y */
+/* (b) Neighbour availablity */
+/* (c) Macroblock location in the frame buffer */
+/* (e) leftMb parama and TopMb params of curMB */
+/* (f) For Mbaff case leftMb params and TopMb params of */
+/* bottomMb are also set if curMB is top */
+/* (g) For mbaff predicts field/frame u4_flag for topMb */
+/* and sets the field/frame for botMb. This is */
+/* written in ps_dec->u1_cur_mb_fld_dec_flag */
+/* */
+/* Inputs : pointer to decstruct */
+/* pointer to current mb info */
+/* currentMbaddress */
+/* */
+/* Processing : leftMb and TopMb params are used by DecMbskip and */
+/* DecCtxMbfield modules so that these modules do not */
+/* check for neigbour availability and then find the */
+/* neigbours for context increments */
+/* */
+/* Returns : OK */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 13 07 2002 Jay Draft */
+/* */
+/*****************************************************************************/
+UWORD32 ih264d_get_mb_info_cabac_nonmbaff(dec_struct_t *ps_dec,
+ const UWORD16 u2_cur_mb_address,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD32 u4_mbskip)
+{
+ WORD32 u2_mb_x;
+ WORD32 u2_mb_y;
+ UWORD32 u1_mb_ngbr_avail = 0;
+ UWORD32 u2_frm_width_in_mb = ps_dec->u2_frm_wd_in_mbs;
+ UWORD32 u1_top_mb = 1;
+ WORD32 i2_prev_slice_mbx = ps_dec->i2_prev_slice_mbx;
+ UWORD32 u2_top_right_mask = TOP_RIGHT_DEFAULT_AVAILABLE;
+ UWORD32 u2_top_left_mask = TOP_LEFT_DEFAULT_AVAILABLE;
+ ctxt_inc_mb_info_t * const p_ctx_inc_mb_map = ps_dec->p_ctxt_inc_mb_map;
+
+ /*--------------------------------------------------------------------*/
+ /* Calculate values of mb_x and mb_y */
+ /*--------------------------------------------------------------------*/
+ u2_mb_x = (WORD16)ps_dec->u2_mbx;
+ u2_mb_y = ps_dec->u2_mby;
+
+ if(ps_dec->u1_separate_parse)
+ {
+ ps_dec->u2_cur_mb_addr = u2_cur_mb_address;
+ }
+
+ u2_mb_x++;
+ if((UWORD32)u2_mb_x == u2_frm_width_in_mb)
+ {
+ u2_mb_x = 0;
+ u2_mb_y++;
+ }
+ /*********************************************************************/
+ /* Cabac Context Initialisations */
+ /*********************************************************************/
+ ps_dec->ps_curr_ctxt_mb_info = p_ctx_inc_mb_map + u2_mb_x;
+ ps_dec->p_left_ctxt_mb_info = p_ctx_inc_mb_map - 1;
+ ps_dec->p_top_ctxt_mb_info = p_ctx_inc_mb_map - 1;
+
+ /********************************************************************/
+ /* neighbour availablility */
+ /********************************************************************/
+ if(u2_mb_y > ps_dec->i2_prev_slice_mby)
+ {
+ /* if not in the immemdiate row of prev slice end then top
+ will be available */
+ if(u2_mb_y > (ps_dec->i2_prev_slice_mby + 1))
+ i2_prev_slice_mbx = -1;
+
+ if(u2_mb_x > i2_prev_slice_mbx)
+ {
+ u1_mb_ngbr_avail |= TOP_MB_AVAILABLE_MASK;
+ u2_top_right_mask |= TOP_RIGHT_TOP_AVAILABLE;
+ u2_top_left_mask |= TOP_LEFT_TOP_AVAILABLE;
+ ps_dec->p_top_ctxt_mb_info = ps_dec->ps_curr_ctxt_mb_info;
+ }
+ if((u2_mb_x > (i2_prev_slice_mbx - 1))
+ && ((UWORD32)u2_mb_x != (u2_frm_width_in_mb - 1)))
+ {
+ u1_mb_ngbr_avail |= TOP_RIGHT_MB_AVAILABLE_MASK;
+ u2_top_right_mask |= TOP_RIGHT_TOPR_AVAILABLE;
+ }
+
+ if(u2_mb_x > (i2_prev_slice_mbx + 1))
+ {
+ u1_mb_ngbr_avail |= TOP_LEFT_MB_AVAILABLE_MASK;
+ u2_top_left_mask |= TOP_LEFT_TOPL_AVAILABLE;
+ }
+ /* Next row */
+ i2_prev_slice_mbx = -1;
+ }
+ /* Same row */
+ if(u2_mb_x > (i2_prev_slice_mbx + 1))
+ {
+ u1_mb_ngbr_avail |= LEFT_MB_AVAILABLE_MASK;
+ u2_top_left_mask |= TOP_LEFT_LEFT_AVAILABLE;
+ ps_dec->p_left_ctxt_mb_info = ps_dec->ps_curr_ctxt_mb_info - 1;
+ }
+ {
+ mb_neigbour_params_t *ps_cur_mb_row = ps_dec->ps_cur_mb_row;
+ mb_neigbour_params_t *ps_top_mb_row = ps_dec->ps_top_mb_row;
+ /* copy the parameters of topleft Mb */
+ ps_cur_mb_info->u1_topleft_mbtype = ps_dec->u1_topleft_mbtype;
+ /* Neighbour pointer assignments*/
+ ps_cur_mb_info->ps_curmb = ps_cur_mb_row + u2_mb_x;
+ ps_cur_mb_info->ps_left_mb = ps_cur_mb_row + u2_mb_x - 1;
+ ps_cur_mb_info->ps_top_mb = ps_top_mb_row + u2_mb_x;
+ ps_cur_mb_info->ps_top_right_mb = ps_top_mb_row + u2_mb_x + 1;
+
+ /* Update the parameters of topleftmb*/
+ ps_dec->u1_topleft_mbtype = ps_cur_mb_info->ps_top_mb->u1_mb_type;
+ }
+
+ ps_dec->u2_mby = u2_mb_y;
+ ps_dec->u2_mbx = u2_mb_x;
+ ps_cur_mb_info->u2_mbx = u2_mb_x;
+ ps_cur_mb_info->u2_mby = u2_mb_y;
+ ps_cur_mb_info->u1_topmb = u1_top_mb;
+ ps_dec->i4_submb_ofst += SUB_BLK_SIZE;
+ ps_dec->u1_mb_ngbr_availablity = u1_mb_ngbr_avail;
+ ps_cur_mb_info->u1_mb_ngbr_availablity = u1_mb_ngbr_avail;
+ ps_cur_mb_info->ps_curmb->u1_mb_fld = ps_dec->u1_cur_mb_fld_dec_flag;
+ ps_cur_mb_info->u1_mb_field_decodingflag = ps_dec->u1_cur_mb_fld_dec_flag;
+ ps_cur_mb_info->u2_top_left_avail_mask = u2_top_left_mask;
+ ps_cur_mb_info->u2_top_right_avail_mask = u2_top_right_mask;
+
+ /*********************************************************************/
+ /* Assign the neigbours */
+ /*********************************************************************/
+ if(u4_mbskip)
+ {
+ UWORD32 u4_ctx_inc =
+ 2
+ - ((!!(ps_dec->p_top_ctxt_mb_info->u1_mb_type
+ & CAB_SKIP_MASK))
+ + (!!(ps_dec->p_left_ctxt_mb_info->u1_mb_type
+ & CAB_SKIP_MASK)));
+
+ u4_mbskip = ih264d_decode_bin(u4_ctx_inc, ps_dec->p_mb_skip_flag_t,
+ ps_dec->ps_bitstrm, &ps_dec->s_cab_dec_env);
+
+ if(!u4_mbskip)
+ {
+ if(!(u1_mb_ngbr_avail & LEFT_MB_AVAILABLE_MASK))
+ {
+ UWORD32 *pu4_buf;
+ UWORD8 *pu1_buf;
+
+ pu1_buf = ps_dec->pu1_left_nnz_y;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ *pu4_buf = 0;
+ pu1_buf = ps_dec->pu1_left_nnz_uv;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ *pu4_buf = 0;
+
+
+ *(ps_dec->pu1_left_yuv_dc_csbp) = 0;
+ MEMSET_16BYTES(&ps_dec->pu1_left_mv_ctxt_inc[0][0], 0);
+ *(UWORD32 *)ps_dec->pi1_left_ref_idx_ctxt_inc = 0;
+ }
+ if(!(u1_mb_ngbr_avail & TOP_MB_AVAILABLE_MASK))
+ {
+ MEMSET_16BYTES(ps_dec->ps_curr_ctxt_mb_info->u1_mv, 0);
+ memset(ps_dec->ps_curr_ctxt_mb_info->i1_ref_idx, 0, 4);
+ }
+ }
+ }
+ return (u4_mbskip);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : get_mb_info_cabac */
+/* */
+/* Description : This function sets the following information of cur MB */
+/* (a) mb_x and mb_y */
+/* (b) Neighbour availablity */
+/* (c) Macroblock location in the frame buffer */
+/* (e) leftMb parama and TopMb params of curMB */
+/* (f) For Mbaff case leftMb params and TopMb params of */
+/* bottomMb are also set if curMB is top */
+/* (g) For mbaff predicts field/frame u4_flag for topMb */
+/* and sets the field/frame for botMb. This is */
+/* written in ps_dec->u1_cur_mb_fld_dec_flag */
+/* */
+/* Inputs : pointer to decstruct */
+/* pointer to current mb info */
+/* currentMbaddress */
+/* */
+/* Processing : leftMb and TopMb params are used by DecMbskip and */
+/* DecCtxMbfield modules so that these modules do not */
+/* check for neigbour availability and then find the */
+/* neigbours for context increments */
+/* */
+/* Returns : OK */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 13 07 2002 Jay Draft */
+/* */
+/*****************************************************************************/
+
+UWORD32 ih264d_get_mb_info_cabac_mbaff(dec_struct_t *ps_dec,
+ const UWORD16 u2_cur_mb_address,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD32 u4_mbskip)
+{
+ UWORD16 u2_mb_x;
+ UWORD16 u2_mb_y;
+ UWORD8 u1_mb_ngbr_avail = 0;
+ UWORD16 u2_frm_width_in_mb = ps_dec->u2_frm_wd_in_mbs;
+ ctxt_inc_mb_info_t * const p_ctx_inc_mb_map = ps_dec->p_ctxt_inc_mb_map;
+ ctxt_inc_mb_info_t *ps_curr_ctxt, *ps_top_ctxt, *ps_left_ctxt;
+ mb_neigbour_params_t *ps_cur_mb_row = ps_dec->ps_cur_mb_row;
+ mb_neigbour_params_t *ps_top_mb_row = ps_dec->ps_top_mb_row;
+ UWORD32 u4_left_mb_pair_fld = 0;
+ UWORD32 u4_top_mb_pair_fld = 0;
+ UWORD8 u1_cur_mb_field = 0;
+ UWORD8 u1_top_mb = 1 - (u2_cur_mb_address & 0x01);
+ WORD16 i2_prev_slice_mbx = ps_dec->i2_prev_slice_mbx;
+ UWORD16 u2_top_right_mask = TOP_RIGHT_DEFAULT_AVAILABLE;
+ UWORD16 u2_top_left_mask = TOP_LEFT_DEFAULT_AVAILABLE;
+
+ /*--------------------------------------------------------------------*/
+ /* Calculate values of mb_x and mb_y */
+ /*--------------------------------------------------------------------*/
+ u2_mb_x = ps_dec->u2_mbx;
+ u2_mb_y = ps_dec->u2_mby;
+
+ if(ps_dec->u1_separate_parse)
+ {
+ ps_dec->u2_cur_mb_addr = u2_cur_mb_address;
+ }
+
+ ps_top_ctxt = ps_left_ctxt = p_ctx_inc_mb_map - 1;
+
+ if(u1_top_mb)
+ {
+ ctxt_inc_mb_info_t *ps_left_mb_of_bot = ps_left_ctxt;
+ ctxt_inc_mb_info_t *ps_top_mb_of_bot = ps_top_ctxt;
+
+ u2_mb_x++;
+
+ if(u2_mb_x == u2_frm_width_in_mb)
+ {
+ u2_mb_x = 0;
+ u2_mb_y += 2;
+ }
+
+ ps_curr_ctxt = p_ctx_inc_mb_map + (u2_mb_x << 1);
+ if(u2_mb_y > ps_dec->i2_prev_slice_mby)
+ {
+ UWORD8 u1_cur_mb_fld_flag_known = 0;
+ /* Next row */
+ if(u2_mb_x > 0)
+ {
+ /***********************************************************************/
+ /* Left Mb is avialable */
+ /***********************************************************************/
+ u1_mb_ngbr_avail |= LEFT_MB_AVAILABLE_MASK;
+ ps_left_ctxt = ps_curr_ctxt - 2;
+ ps_left_mb_of_bot = ps_curr_ctxt - 1;
+ u1_cur_mb_field = u4_left_mb_pair_fld = ps_cur_mb_row[(u2_mb_x
+ << 1) - 1].u1_mb_fld;
+ u1_cur_mb_fld_flag_known = 1;
+ u2_top_left_mask |= TOP_LEFT_LEFT_AVAILABLE;
+ }
+ /* if not in the immemdiate row of prev slice end then top
+ will be available */
+ if(u2_mb_y > (ps_dec->i2_prev_slice_mby + 2))
+ i2_prev_slice_mbx = -1;
+ if(u2_mb_x > i2_prev_slice_mbx)
+ {
+ /*********************************************************************/
+ /* Top Mb is avialable */
+ /*********************************************************************/
+ u1_mb_ngbr_avail |= TOP_MB_AVAILABLE_MASK;
+ u2_top_right_mask |= TOP_RIGHT_TOP_AVAILABLE;
+ u2_top_left_mask |= TOP_LEFT_TOP_AVAILABLE;
+
+ /* point to MbAddrB + 1 */
+ ps_top_ctxt = ps_curr_ctxt + 1;
+ u4_top_mb_pair_fld = ps_top_mb_row[(u2_mb_x << 1)].u1_mb_fld;
+
+ u1_cur_mb_field =
+ u1_cur_mb_fld_flag_known ?
+ u1_cur_mb_field :
+ u4_top_mb_pair_fld;
+ ps_top_mb_of_bot = u1_cur_mb_field ? ps_top_ctxt : ps_curr_ctxt;
+
+ /* MbAddrB */
+ ps_top_ctxt -= (u1_cur_mb_field && u4_top_mb_pair_fld);
+ }
+
+ if((u2_mb_x > (i2_prev_slice_mbx - 1))
+ && (u2_mb_x != (u2_frm_width_in_mb - 1)))
+ {
+ u1_mb_ngbr_avail |= TOP_RIGHT_MB_AVAILABLE_MASK;
+ u2_top_right_mask |= TOP_RIGHT_TOPR_AVAILABLE;
+ }
+
+ if(u2_mb_x > (i2_prev_slice_mbx + 1))
+ {
+ u1_mb_ngbr_avail |= TOP_LEFT_MB_AVAILABLE_MASK;
+ u2_top_left_mask |= TOP_LEFT_TOPL_AVAILABLE;
+ }
+ }
+ else
+ {
+ /* Same row */
+ if(u2_mb_x > (i2_prev_slice_mbx + 1))
+ {
+ /***************************************************************/
+ /* Left Mb is avialable */
+ /***************************************************************/
+ u1_mb_ngbr_avail |= LEFT_MB_AVAILABLE_MASK;
+
+ u1_cur_mb_field = u4_left_mb_pair_fld = ps_cur_mb_row[(u2_mb_x
+ << 1) - 1].u1_mb_fld;
+ ps_left_ctxt = ps_curr_ctxt - 2;
+ ps_left_mb_of_bot = ps_curr_ctxt - 1;
+ u2_top_left_mask |= TOP_LEFT_LEFT_AVAILABLE;
+ }
+ }
+ /*********************************************************/
+ /* Check whether the call is from I slice or Inter slice */
+ /*********************************************************/
+ if(u4_mbskip)
+ {
+ UWORD32 u4_ctx_inc = 2
+ - ((!!(ps_top_ctxt->u1_mb_type & CAB_SKIP_MASK))
+ + (!!(ps_left_ctxt->u1_mb_type
+ & CAB_SKIP_MASK)));
+ dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
+ decoding_envirnoment_t *ps_cab_dec_env = &ps_dec->s_cab_dec_env;
+ bin_ctxt_model_t *p_mb_skip_flag_t = ps_dec->p_mb_skip_flag_t;
+
+ ps_dec->u4_next_mb_skip = 0;
+ u4_mbskip = ih264d_decode_bin(u4_ctx_inc, p_mb_skip_flag_t,
+ ps_bitstrm, ps_cab_dec_env);
+
+ if(u4_mbskip)
+ {
+ UWORD32 u4_next_mbskip;
+ ps_curr_ctxt->u1_mb_type = CAB_SKIP;
+
+ u4_ctx_inc =
+ 2
+ - ((!!(ps_top_mb_of_bot->u1_mb_type
+ & CAB_SKIP_MASK))
+ + (!!(ps_left_mb_of_bot->u1_mb_type
+ & CAB_SKIP_MASK)));
+
+ /* Decode the skip u4_flag of bottom Mb */
+ u4_next_mbskip = ih264d_decode_bin(u4_ctx_inc, p_mb_skip_flag_t,
+ ps_bitstrm,
+ ps_cab_dec_env);
+
+ ps_dec->u4_next_mb_skip = u4_next_mbskip;
+
+ if(!u4_next_mbskip)
+ {
+ u4_ctx_inc = u4_top_mb_pair_fld + u4_left_mb_pair_fld;
+
+ u1_cur_mb_field = ih264d_decode_bin(
+ u4_ctx_inc, ps_dec->p_mb_field_dec_flag_t,
+ ps_bitstrm, ps_cab_dec_env);
+ }
+ }
+ }
+
+ if(!u4_mbskip)
+ {
+ UWORD32 u4_ctx_inc = u4_top_mb_pair_fld + u4_left_mb_pair_fld;
+ u1_cur_mb_field = ih264d_decode_bin(u4_ctx_inc,
+ ps_dec->p_mb_field_dec_flag_t,
+ ps_dec->ps_bitstrm,
+ &ps_dec->s_cab_dec_env);
+ }
+
+ ps_dec->u1_cur_mb_fld_dec_flag = u1_cur_mb_field;
+ ps_dec->u2_top_left_mask = u2_top_left_mask;
+ ps_dec->u2_top_right_mask = u2_top_right_mask;
+ ps_dec->u2_mby = u2_mb_y;
+ ps_dec->u2_mbx = u2_mb_x;
+ }
+ else
+ {
+ u1_cur_mb_field = ps_dec->u1_cur_mb_fld_dec_flag;
+ u1_mb_ngbr_avail = ps_dec->u1_mb_ngbr_availablity;
+ u2_top_left_mask = ps_dec->u2_top_left_mask;
+ u2_top_right_mask = ps_dec->u2_top_right_mask;
+ ps_curr_ctxt = p_ctx_inc_mb_map + (u2_mb_x << 1) + 1;
+
+ if(u1_mb_ngbr_avail & LEFT_MB_AVAILABLE_MASK)
+ {
+ u4_left_mb_pair_fld = ps_cur_mb_row[(u2_mb_x << 1) - 1].u1_mb_fld;
+
+ /* point to A if top else A+1 */
+ ps_left_ctxt = ps_curr_ctxt - 2
+ - (u4_left_mb_pair_fld != u1_cur_mb_field);
+ }
+
+ if(u1_cur_mb_field)
+ {
+ if(u1_mb_ngbr_avail & TOP_MB_AVAILABLE_MASK)
+ {
+ /* point to MbAddrB + 1 */
+ ps_top_ctxt = ps_curr_ctxt;
+ }
+ }
+ else
+ {
+ /* Top is available */
+ u1_mb_ngbr_avail |= TOP_MB_AVAILABLE_MASK;
+ u2_top_right_mask |= TOP_RIGHT_TOP_AVAILABLE;
+ u2_top_left_mask |= TOP_LEFT_TOP_AVAILABLE;
+ /* Top Right not available */
+ u1_mb_ngbr_avail &= TOP_RT_SUBBLOCK_MASK_MOD;
+ u2_top_right_mask &= (~TOP_RIGHT_TOPR_AVAILABLE);
+
+ if(u1_mb_ngbr_avail & LEFT_MB_AVAILABLE_MASK)
+ {
+ u1_mb_ngbr_avail |= TOP_LEFT_MB_AVAILABLE_MASK;
+ u2_top_left_mask |= TOP_LEFT_LEFT_AVAILABLE;
+ u2_top_left_mask |= TOP_LEFT_TOPL_AVAILABLE;
+ }
+
+ /* CurMbAddr - 1 */
+ ps_top_ctxt = ps_curr_ctxt - 1;
+ }
+
+ if(u4_mbskip)
+ {
+ if(ps_curr_ctxt[-1].u1_mb_type & CAB_SKIP_MASK)
+ {
+ /* If previous mb is skipped, return value of next mb skip */
+ u4_mbskip = ps_dec->u4_next_mb_skip;
+
+ }
+ else
+ {
+ /* If previous mb is not skipped then call DecMbSkip */
+ UWORD32 u4_ctx_inc =
+ 2
+ - ((!!(ps_top_ctxt->u1_mb_type
+ & CAB_SKIP_MASK))
+ + (!!(ps_left_ctxt->u1_mb_type
+ & CAB_SKIP_MASK)));
+
+ u4_mbskip = ih264d_decode_bin(u4_ctx_inc,
+ ps_dec->p_mb_skip_flag_t,
+ ps_dec->ps_bitstrm,
+ &ps_dec->s_cab_dec_env);
+ }
+ }
+ }
+
+ ps_cur_mb_info->u2_mbx = u2_mb_x;
+ ps_cur_mb_info->u2_mby = u2_mb_y;
+ ps_cur_mb_info->u1_topmb = u1_top_mb;
+ ps_dec->i4_submb_ofst += SUB_BLK_SIZE;
+ ps_dec->u1_mb_ngbr_availablity = u1_mb_ngbr_avail;
+ ps_cur_mb_info->u1_mb_ngbr_availablity = u1_mb_ngbr_avail;
+ ps_cur_mb_info->u1_mb_field_decodingflag = u1_cur_mb_field;
+ ps_cur_mb_info->u2_top_left_avail_mask = u2_top_left_mask;
+ ps_cur_mb_info->u2_top_right_avail_mask = u2_top_right_mask;
+
+ ih264d_get_mbaff_neighbours(ps_dec, ps_cur_mb_info, u1_cur_mb_field);
+ {
+ ih264d_get_cabac_context_mbaff(ps_dec, ps_cur_mb_info, u4_mbskip);
+ }
+
+ {
+ bin_ctxt_model_t *p_cabac_ctxt_table_t = ps_dec->p_cabac_ctxt_table_t;
+
+ if(u1_cur_mb_field)
+ {
+ p_cabac_ctxt_table_t += SIGNIFICANT_COEFF_FLAG_FLD;
+ }
+ else
+ {
+ p_cabac_ctxt_table_t += SIGNIFICANT_COEFF_FLAG_FRAME;
+ }
+ {
+ bin_ctxt_model_t * * p_significant_coeff_flag_t =
+ ps_dec->p_significant_coeff_flag_t;
+ p_significant_coeff_flag_t[0] = p_cabac_ctxt_table_t
+ + SIG_COEFF_CTXT_CAT_0_OFFSET;
+ p_significant_coeff_flag_t[1] = p_cabac_ctxt_table_t
+ + SIG_COEFF_CTXT_CAT_1_OFFSET;
+ p_significant_coeff_flag_t[2] = p_cabac_ctxt_table_t
+ + SIG_COEFF_CTXT_CAT_2_OFFSET;
+ p_significant_coeff_flag_t[3] = p_cabac_ctxt_table_t
+ + SIG_COEFF_CTXT_CAT_3_OFFSET;
+ p_significant_coeff_flag_t[4] = p_cabac_ctxt_table_t
+ + SIG_COEFF_CTXT_CAT_4_OFFSET;
+ p_significant_coeff_flag_t[5] = p_cabac_ctxt_table_t
+ + SIG_COEFF_CTXT_CAT_5_OFFSET;
+
+ }
+ }
+ return (u4_mbskip);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_get_cabac_context_mbaff */
+/* */
+/* Description : Gets the current macroblock Cabac Context and sets the */
+/* top and left cabac context ptrs in CtxIncMbMap */
+/* 1. For Coss field left neigbours it alters coded block */
+/* u4_flag , motion vectors, reference indices, cbp of */
+/* the left neigbours which increases the code i4_size */
+/* 2. For Coss field top neigbours it alters motion */
+/* vectors reference indices of the top neigbours */
+/* which further increases the code i4_size */
+/* */
+/* Inputs : 1. dec_struct_t */
+/* 2. CurMbAddr used for Mbaff (only to see if curMB */
+/* is top or bottom) */
+/* 3. uc_curMbFldDecFlag only for Mbaff */
+/* */
+/* Returns : 0 */
+/* */
+/* Issues : code i4_size can be reduced if ui_CodedBlockFlag storage */
+/* structure in context is changed. This change however */
+/* would break the parseResidual4x4Cabac asm routine. */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 18 06 2005 Jay */
+/* */
+/*****************************************************************************/
+UWORD32 ih264d_get_cabac_context_mbaff(dec_struct_t * ps_dec,
+ dec_mb_info_t *ps_cur_mb_info,
+ UWORD32 u4_mbskip)
+{
+ const UWORD8 u1_mb_ngbr_availablity = ps_dec->u1_mb_ngbr_availablity;
+ ctxt_inc_mb_info_t * const p_ctx_inc_mb_map = ps_dec->p_ctxt_inc_mb_map;
+
+ UWORD8 (*pu1_left_mv_ctxt_inc_2d)[4] = &ps_dec->pu1_left_mv_ctxt_inc[0];
+ WORD8 (*pi1_left_ref_idx_ctxt_inc) = ps_dec->pi1_left_ref_idx_ctxt_inc;
+ const UWORD8 u1_cur_mb_fld_flag = ps_cur_mb_info->u1_mb_field_decodingflag;
+ const UWORD8 u1_topmb = ps_cur_mb_info->u1_topmb;
+ const UWORD8 uc_botMb = 1 - ps_cur_mb_info->u1_topmb;
+
+ ctxt_inc_mb_info_t * ps_leftMB;
+
+ ps_dec->ps_curr_ctxt_mb_info = p_ctx_inc_mb_map + (ps_dec->u2_mbx << 1);
+ ps_dec->p_top_ctxt_mb_info = ps_dec->ps_curr_ctxt_mb_info;
+
+ if(u1_topmb)
+ {
+ pu1_left_mv_ctxt_inc_2d = ps_dec->u1_left_mv_ctxt_inc_arr[0];
+ pi1_left_ref_idx_ctxt_inc = &ps_dec->i1_left_ref_idx_ctx_inc_arr[0][0];
+ ps_dec->pu1_left_yuv_dc_csbp = &ps_dec->u1_yuv_dc_csbp_topmb;
+ }
+ else
+ {
+ /* uc_botMb */
+ pu1_left_mv_ctxt_inc_2d = ps_dec->u1_left_mv_ctxt_inc_arr[1];
+ pi1_left_ref_idx_ctxt_inc = &ps_dec->i1_left_ref_idx_ctx_inc_arr[1][0];
+ ps_dec->pu1_left_yuv_dc_csbp = &ps_dec->u1_yuv_dc_csbp_bot_mb;
+ ps_dec->ps_curr_ctxt_mb_info += 1;
+ }
+
+ ps_dec->pu1_left_mv_ctxt_inc = pu1_left_mv_ctxt_inc_2d;
+ ps_dec->pi1_left_ref_idx_ctxt_inc = pi1_left_ref_idx_ctxt_inc;
+
+ if(u1_mb_ngbr_availablity & LEFT_MB_AVAILABLE_MASK)
+ {
+ const UWORD8 u1_left_mb_fld_flag = ps_cur_mb_info->ps_left_mb->u1_mb_fld;
+
+ ps_leftMB = ps_dec->ps_curr_ctxt_mb_info - 2;
+ if(u1_left_mb_fld_flag != u1_cur_mb_fld_flag)
+ {
+ ctxt_inc_mb_info_t *ps_tempLeft;
+ UWORD8 u1_cbp_t, u1_cbp_b;
+ UWORD8 u1_cr_cpb;
+
+ ps_leftMB -= uc_botMb;
+ ps_tempLeft = ps_dec->ps_left_mb_ctxt_info;
+ ps_tempLeft->u1_mb_type = ps_leftMB->u1_mb_type;
+ ps_tempLeft->u1_intra_chroma_pred_mode =
+ ps_leftMB->u1_intra_chroma_pred_mode;
+
+ ps_tempLeft->u1_transform8x8_ctxt = ps_leftMB->u1_transform8x8_ctxt;
+
+ u1_cr_cpb = ps_leftMB->u1_cbp;
+ /*****************************************************************/
+ /* reform RefIdx, CBP, MV and CBF ctxInc taking care of A and A+1*/
+ /*****************************************************************/
+ if(u1_cur_mb_fld_flag)
+ {
+ /* current MB is a FLD and left a FRM */
+ UWORD8 (* const pu1_left_mv_ctxt_inc_2d_arr_top)[4] =
+ ps_dec->u1_left_mv_ctxt_inc_arr[0];
+ UWORD8 (* const pu1_left_mv_ctxt_inc_2d_arr_bot)[4] =
+ ps_dec->u1_left_mv_ctxt_inc_arr[1];
+ WORD8 (* const i1_left_ref_idx_ctxt_inc_arr_top) =
+ &ps_dec->i1_left_ref_idx_ctx_inc_arr[0][0];
+ WORD8 (* const i1_left_ref_idx_ctxt_inc_arr_bot) =
+ &ps_dec->i1_left_ref_idx_ctx_inc_arr[1][0];
+
+ u1_cbp_t = ps_leftMB->u1_cbp;
+ u1_cbp_b = (ps_leftMB + 1)->u1_cbp;
+ ps_tempLeft->u1_cbp = (u1_cbp_t & 0x02)
+ | ((u1_cbp_b & 0x02) << 2);
+
+ // set motionvectors as
+ // 0T = 0T 0B = 0T
+ // 1T = 2T 1B = 2T
+ // 2T = 0B 2B = 0B
+ // 3T = 2B 3B = 2B
+ if(u1_topmb)
+ {
+ /********************************************/
+ /* Bottoms DC CBF = Top DC CBF */
+ /********************************************/
+ ps_dec->u1_yuv_dc_csbp_bot_mb =
+ ps_dec->u1_yuv_dc_csbp_topmb;
+
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[3] =
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d_arr_bot[2];
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[1] =
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d_arr_top[2];
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[2] =
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d_arr_bot[0];
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[0] =
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d_arr_top[0];
+
+ i1_left_ref_idx_ctxt_inc_arr_top[1] =
+ i1_left_ref_idx_ctxt_inc_arr_bot[0];
+ i1_left_ref_idx_ctxt_inc_arr_top[3] =
+ i1_left_ref_idx_ctxt_inc_arr_bot[2];
+
+ *(UWORD32 *)(i1_left_ref_idx_ctxt_inc_arr_bot) =
+ *(UWORD32 *)(i1_left_ref_idx_ctxt_inc_arr_top);
+
+ memcpy(pu1_left_mv_ctxt_inc_2d_arr_bot,
+ pu1_left_mv_ctxt_inc_2d_arr_top, 16);
+ }
+
+ {
+ UWORD8 i;
+ for(i = 0; i < 4; i++)
+ {
+ pu1_left_mv_ctxt_inc_2d[i][1] >>= 1;
+ pu1_left_mv_ctxt_inc_2d[i][3] >>= 1;
+ }
+ }
+ }
+ else
+ {
+ /* current MB is a FRM and left FLD */
+ if(u1_topmb)
+ {
+ u1_cbp_t = ps_leftMB->u1_cbp;
+ u1_cbp_t = (u1_cbp_t & 0x02);
+ ps_tempLeft->u1_cbp = (u1_cbp_t | (u1_cbp_t << 2));
+
+ /********************************************/
+ /* Bottoms DC CBF = Top DC CBF */
+ /********************************************/
+ ps_dec->u1_yuv_dc_csbp_bot_mb =
+ ps_dec->u1_yuv_dc_csbp_topmb;
+
+ // set motionvectors as
+ // 3B = 2B = 3T
+ // 1B = 0B = 2T
+ // 3T = 2T = 1T
+ // 1T = 0T = 0T
+
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[7] =
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[3];
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[6] =
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[3];
+
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[5] =
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[2];
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[4] =
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[2];
+
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[3] =
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[1];
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[2] =
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[1];
+
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[1] =
+ *(UWORD32 *)pu1_left_mv_ctxt_inc_2d[0];
+
+ pi1_left_ref_idx_ctxt_inc[7] = (pi1_left_ref_idx_ctxt_inc[3]
+ - 1);
+ pi1_left_ref_idx_ctxt_inc[6] = (pi1_left_ref_idx_ctxt_inc[3]
+ - 1);
+
+ pi1_left_ref_idx_ctxt_inc[5] = (pi1_left_ref_idx_ctxt_inc[1]
+ - 1);
+ pi1_left_ref_idx_ctxt_inc[4] = (pi1_left_ref_idx_ctxt_inc[1]
+ - 1);
+
+ pi1_left_ref_idx_ctxt_inc[3] = (pi1_left_ref_idx_ctxt_inc[2]
+ - 1);
+ pi1_left_ref_idx_ctxt_inc[2] = (pi1_left_ref_idx_ctxt_inc[2]
+ - 1);
+
+ pi1_left_ref_idx_ctxt_inc[1] = (pi1_left_ref_idx_ctxt_inc[0]
+ - 1);
+ pi1_left_ref_idx_ctxt_inc[0] = (pi1_left_ref_idx_ctxt_inc[0]
+ - 1);
+ }
+ else
+ {
+ u1_cbp_t = ps_leftMB->u1_cbp;
+ u1_cbp_t = (u1_cbp_t & 0x08);
+ ps_tempLeft->u1_cbp = (u1_cbp_t | (u1_cbp_t >> 2));
+ }
+
+ {
+ UWORD8 i;
+ for(i = 0; i < 4; i++)
+ {
+ pu1_left_mv_ctxt_inc_2d[i][1] <<= 1;
+ pu1_left_mv_ctxt_inc_2d[i][3] <<= 1;
+ }
+ }
+
+ }
+
+ ps_tempLeft->u1_cbp = ps_tempLeft->u1_cbp + ((u1_cr_cpb >> 4) << 4);
+ ps_leftMB = ps_tempLeft;
+ }
+
+ ps_dec->p_left_ctxt_mb_info = ps_leftMB;
+ }
+ else
+ {
+ ps_dec->p_left_ctxt_mb_info = p_ctx_inc_mb_map - 1;
+ if(!u4_mbskip)
+ {
+ *(ps_dec->pu1_left_yuv_dc_csbp) = 0;
+
+ MEMSET_16BYTES(&pu1_left_mv_ctxt_inc_2d[0][0], 0);
+ *(UWORD32 *)pi1_left_ref_idx_ctxt_inc = 0;
+ }
+ }
+
+ /*************************************************************************/
+ /* Now get the top context mb info */
+ /*************************************************************************/
+ {
+ UWORD8 (*u1_top_mv_ctxt_inc_arr_2d)[4] =
+ ps_dec->ps_curr_ctxt_mb_info->u1_mv;
+ WORD8 (*pi1_top_ref_idx_ctxt_inc) =
+ ps_dec->ps_curr_ctxt_mb_info->i1_ref_idx;
+ UWORD8 uc_topMbFldDecFlag = ps_cur_mb_info->ps_top_mb->u1_mb_fld;
+
+ if(u1_mb_ngbr_availablity & TOP_MB_AVAILABLE_MASK)
+ {
+ if(ps_cur_mb_info->i1_offset)
+ ps_dec->p_top_ctxt_mb_info += 1;
+
+ if(!u4_mbskip)
+ {
+ memcpy(u1_top_mv_ctxt_inc_arr_2d,
+ &ps_dec->p_top_ctxt_mb_info->u1_mv, 16);
+ memcpy(pi1_top_ref_idx_ctxt_inc,
+ &ps_dec->p_top_ctxt_mb_info->i1_ref_idx, 4);
+ if(uc_topMbFldDecFlag ^ u1_cur_mb_fld_flag)
+ {
+ UWORD8 i;
+ if(u1_cur_mb_fld_flag)
+ {
+ for(i = 0; i < 4; i++)
+ {
+ u1_top_mv_ctxt_inc_arr_2d[i][1] >>= 1;
+ u1_top_mv_ctxt_inc_arr_2d[i][3] >>= 1;
+ }
+ }
+ else
+ {
+ for(i = 0; i < 4; i++)
+ {
+ u1_top_mv_ctxt_inc_arr_2d[i][1] <<= 1;
+ u1_top_mv_ctxt_inc_arr_2d[i][3] <<= 1;
+ pi1_top_ref_idx_ctxt_inc[i] -= 1;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ ps_dec->p_top_ctxt_mb_info = p_ctx_inc_mb_map - 1;
+ if(!u4_mbskip)
+ {
+
+ MEMSET_16BYTES(&u1_top_mv_ctxt_inc_arr_2d[0][0], 0);
+ memset(pi1_top_ref_idx_ctxt_inc, 0, 4);
+ }
+ }
+ }
+
+ return OK;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_update_mbaff_left_nnz */
+/* */
+/* Description : This function updates the left luma and chroma nnz for */
+/* mbaff cases. */
+/* */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : <Does it use any global variables?> */
+/* Processing : <Describe how the function operates - include algorithm */
+/* description> */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 13 07 2002 Ittiam Draft */
+/* */
+/*****************************************************************************/
+void ih264d_update_mbaff_left_nnz(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info)
+{
+ UWORD32 *pu4_buf;
+ UWORD8 *pu1_buf;
+ if(ps_cur_mb_info->u1_topmb)
+ {
+ pu1_buf = ps_dec->pu1_left_nnz_y;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ ps_dec->u4_n_left_temp_y = *pu4_buf;
+
+ pu1_buf = ps_dec->pu1_left_nnz_uv;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ ps_dec->u4_n_left_temp_uv = *pu4_buf;
+ }
+ else
+ {
+
+ ps_dec->u4_n_leftY[0] = ps_dec->u4_n_left_temp_y;
+ pu1_buf = ps_dec->pu1_left_nnz_y;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ ps_dec->u4_n_leftY[1] = *pu4_buf;
+ ps_dec->u4_n_left_cr[0] = ps_dec->u4_n_left_temp_uv;
+ pu1_buf = ps_dec->pu1_left_nnz_uv;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ ps_dec->u4_n_left_cr[1] = *pu4_buf;
+
+ }
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_get_mbaff_neighbours \endif
+ *
+ * \brief
+ * Gets the neighbors for the current MB if it is of type MB-AFF
+ * frame.
+ *
+ * \return
+ * None
+ *
+ **************************************************************************
+ */
+void ih264d_get_mbaff_neighbours(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 uc_curMbFldDecFlag)
+{
+
+ mb_neigbour_params_t *ps_left_mb;
+ mb_neigbour_params_t *ps_top_mb;
+ mb_neigbour_params_t *ps_top_right_mb = NULL;
+ mb_neigbour_params_t *ps_curmb;
+ const UWORD8 u1_topmb = ps_cur_mb_info->u1_topmb;
+ const UWORD8 uc_botMb = 1 - u1_topmb;
+ const UWORD32 u4_mb_x = ps_cur_mb_info->u2_mbx;
+
+ /* Current MbParams location in top row buffer */
+ ps_curmb = ps_dec->ps_cur_mb_row + (u4_mb_x << 1) + uc_botMb;
+ ps_left_mb = ps_curmb - 2;
+ /* point to A if top else A+1 */
+ if(uc_botMb && (ps_left_mb->u1_mb_fld != uc_curMbFldDecFlag))
+ {
+ /* move from A + 1 to A */
+ ps_left_mb--;
+ }
+ ps_cur_mb_info->i1_offset = 0;
+ if((uc_curMbFldDecFlag == 0) && uc_botMb)
+ {
+ mb_neigbour_params_t *ps_topleft_mb;
+ /* CurMbAddr - 1 */
+ ps_top_mb = ps_curmb - 1;
+
+ /* Mark Top right Not available */
+ /* point to A */
+ ps_topleft_mb = ps_curmb - 3;
+
+ if(ps_topleft_mb->u1_mb_fld)
+ {
+ /* point to A + 1 */
+ ps_topleft_mb++;
+ }
+ ps_cur_mb_info->u1_topleft_mb_fld = ps_topleft_mb->u1_mb_fld;
+ ps_cur_mb_info->u1_topleft_mbtype = ps_topleft_mb->u1_mb_type;
+ }
+ else
+ {
+ /* Top = B + 1 */
+ ps_top_mb = ps_dec->ps_top_mb_row + (u4_mb_x << 1) + 1;
+ ps_top_right_mb = ps_top_mb + 2;
+ ps_cur_mb_info->i1_offset = 4;
+ /* TopRight = C + 1 */
+
+ /* TopLeft = D+1 */
+ ps_cur_mb_info->u1_topleft_mb_fld = ps_dec->u1_topleft_mb_fld_bot;
+ ps_cur_mb_info->u1_topleft_mbtype = ps_dec->u1_topleft_mbtype_bot;
+
+ if(uc_curMbFldDecFlag && u1_topmb)
+ {
+ if(ps_top_mb->u1_mb_fld)
+ {
+ /* MbAddrB */
+ ps_top_mb--;
+ ps_cur_mb_info->i1_offset = 0;
+ }
+ /* If topright is field then point to C */
+ ps_top_right_mb -= ps_top_right_mb->u1_mb_fld ? 1 : 0;
+ if(ps_cur_mb_info->u1_topleft_mb_fld)
+ {
+ /* TopLeft = D */
+ ps_cur_mb_info->u1_topleft_mb_fld = ps_dec->u1_topleft_mb_fld;
+ ps_cur_mb_info->u1_topleft_mbtype = ps_dec->u1_topleft_mbtype;
+ }
+ }
+ }
+ if(u1_topmb)
+ {
+ /* Update the parameters of topleftmb*/
+ ps_dec->u1_topleft_mb_fld = ps_top_mb->u1_mb_fld;
+ ps_dec->u1_topleft_mbtype = ps_top_mb->u1_mb_type;
+ /* Set invscan and dequantMatrixScan*/
+ if(uc_curMbFldDecFlag)
+ {
+ ps_dec->pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan_fld;
+ }
+ else
+ {
+ ps_dec->pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan;
+ }
+ ps_dec->pu2_quant_scale_y =
+ gau2_ih264_iquant_scale_4x4[ps_dec->u1_qp_y_rem6];
+ ps_dec->pu2_quant_scale_u =
+ gau2_ih264_iquant_scale_4x4[ps_dec->u1_qp_u_rem6];
+ ps_dec->pu2_quant_scale_v =
+ gau2_ih264_iquant_scale_4x4[ps_dec->u1_qp_v_rem6];
+
+ }
+ else
+ {
+ /* Update the parameters of topleftmb*/
+ mb_neigbour_params_t *ps_top_mb_temp = ps_dec->ps_top_mb_row
+ + (u4_mb_x << 1) + 1;
+ ps_dec->u1_topleft_mb_fld_bot = ps_top_mb_temp->u1_mb_fld;
+ ps_dec->u1_topleft_mbtype_bot = ps_top_mb_temp->u1_mb_type;
+ }
+
+ ps_cur_mb_info->ps_left_mb = ps_left_mb;
+ ps_cur_mb_info->ps_top_mb = ps_top_mb;
+ ps_cur_mb_info->ps_top_right_mb = ps_top_right_mb;
+ ps_cur_mb_info->ps_curmb = ps_curmb;
+ ps_curmb->u1_mb_fld = uc_curMbFldDecFlag;
+
+ {
+ /* Form Left NNZ */
+ UWORD8 u1_is_left_mb_fld = ps_left_mb->u1_mb_fld;
+ UWORD8 *pu1_left_mb_pair_nnz_y = (UWORD8 *)&ps_dec->u4_n_leftY[0];
+ UWORD8 *pu1_left_mb_pair_nnz_uv = (UWORD8 *)&ps_dec->u4_n_left_cr[0];
+ UWORD8 *pu1_left_nnz_y = ps_dec->pu1_left_nnz_y;
+ UWORD8 *pu1_left_nnz_uv = ps_dec->pu1_left_nnz_uv;
+
+ if(uc_curMbFldDecFlag == u1_is_left_mb_fld)
+ {
+ *(UWORD32 *)pu1_left_nnz_y = *(UWORD32 *)(pu1_left_mb_pair_nnz_y
+ + (uc_botMb << 2));
+ *(UWORD32 *)pu1_left_nnz_uv = *(UWORD32 *)(pu1_left_mb_pair_nnz_uv
+ + (uc_botMb << 2));
+ }
+ else if((uc_curMbFldDecFlag == 0) && u1_topmb && u1_is_left_mb_fld)
+ {
+ /* 0 0 1 1 of u4_n_leftY[0], 0 0 2 2 of u4_n_left_cr[0] */
+ pu1_left_nnz_y[0] = pu1_left_nnz_y[1] = pu1_left_mb_pair_nnz_y[0];
+ pu1_left_nnz_y[2] = pu1_left_nnz_y[3] = pu1_left_mb_pair_nnz_y[1];
+ pu1_left_nnz_uv[0] = pu1_left_nnz_uv[1] =
+ pu1_left_mb_pair_nnz_uv[0];
+ pu1_left_nnz_uv[2] = pu1_left_nnz_uv[3] =
+ pu1_left_mb_pair_nnz_uv[2];
+ }
+ else if((uc_curMbFldDecFlag == 0) && uc_botMb && u1_is_left_mb_fld)
+ {
+ /* 2 2 3 3 of u4_n_leftY[0] , 1 1 3 3 of u4_n_left_cr[0] */
+ pu1_left_nnz_y[0] = pu1_left_nnz_y[1] = pu1_left_mb_pair_nnz_y[2];
+ pu1_left_nnz_y[2] = pu1_left_nnz_y[3] = pu1_left_mb_pair_nnz_y[3];
+ pu1_left_nnz_uv[0] = pu1_left_nnz_uv[1] =
+ pu1_left_mb_pair_nnz_uv[1];
+ pu1_left_nnz_uv[2] = pu1_left_nnz_uv[3] =
+ pu1_left_mb_pair_nnz_uv[3];
+ }
+ else
+ {
+ /* 0 2 0 2 of u4_n_leftY[0], u4_n_leftY[1] */
+ pu1_left_nnz_y[0] = pu1_left_mb_pair_nnz_y[0];
+ pu1_left_nnz_y[1] = pu1_left_mb_pair_nnz_y[2];
+ pu1_left_nnz_y[2] = pu1_left_mb_pair_nnz_y[4 + 0];
+ pu1_left_nnz_y[3] = pu1_left_mb_pair_nnz_y[4 + 2];
+
+ /* 0 of u4_n_left_cr[0] and 0 u4_n_left_cr[1]
+ 2 of u4_n_left_cr[0] and 2 u4_n_left_cr[1] */
+ pu1_left_nnz_uv[0] = pu1_left_mb_pair_nnz_uv[0];
+ pu1_left_nnz_uv[1] = pu1_left_mb_pair_nnz_uv[4 + 0];
+ pu1_left_nnz_uv[2] = pu1_left_mb_pair_nnz_uv[2];
+ pu1_left_nnz_uv[3] = pu1_left_mb_pair_nnz_uv[4 + 2];
+ }
+ }
+}
+
+/*
+ **************************************************************************
+ * \if Function name : ih264d_transfer_mb_group_data \endif
+ *
+ * \brief
+ * Transfer the Following things
+ * N-Mb DeblkParams Data ( To Ext DeblkParams Buffer )
+ * N-Mb Recon Data ( To Ext Frame Buffer )
+ * N-Mb Intrapredline Data ( Updated Internally)
+ * N-Mb MV Data ( To Ext MV Buffer )
+ * N-Mb MVTop/TopRight Data ( To Int MV Top Scratch Buffers)
+ *
+ * \return
+ * None
+ *
+ **************************************************************************
+ */
+void ih264d_transfer_mb_group_data(dec_struct_t * ps_dec,
+ const WORD8 c_numMbs,
+ const UWORD8 u1_end_of_row, /* Cur n-Mb End of Row Flag */
+ const UWORD8 u1_end_of_row_next /* Next n-Mb End of Row Flag */
+ )
+{
+ dec_mb_info_t *ps_cur_mb_info = ps_dec->ps_nmb_info;
+ tfr_ctxt_t *ps_trns_addr = &ps_dec->s_tran_addrecon;
+ UWORD16 u2_mb_y;
+ UWORD32 y_offset;
+ UWORD32 u4_frame_stride;
+ mb_neigbour_params_t *ps_temp;
+ const UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
+ UNUSED(u1_end_of_row_next);
+
+ ps_trns_addr->pu1_dest_y += ps_trns_addr->u4_inc_y[u1_end_of_row];
+ ps_trns_addr->pu1_dest_u += ps_trns_addr->u4_inc_uv[u1_end_of_row];
+ ps_trns_addr->pu1_dest_v += ps_trns_addr->u4_inc_uv[u1_end_of_row];
+
+ /* Swap top and current pointers */
+ if(u1_end_of_row)
+ {
+
+ if(ps_dec->u1_separate_parse)
+ {
+ u2_mb_y = ps_dec->i2_dec_thread_mb_y;
+ }
+ else
+ {
+ ps_temp = ps_dec->ps_cur_mb_row;
+ ps_dec->ps_cur_mb_row = ps_dec->ps_top_mb_row;
+ ps_dec->ps_top_mb_row = ps_temp;
+
+ u2_mb_y = ps_dec->u2_mby + (1 + u1_mbaff);
+ }
+
+ u4_frame_stride = ps_dec->u2_frm_wd_y
+ << ps_dec->ps_cur_slice->u1_field_pic_flag;
+ y_offset = (u2_mb_y * u4_frame_stride) << 4;
+ ps_trns_addr->pu1_dest_y = ps_dec->s_cur_pic.pu1_buf1 + y_offset;
+
+ u4_frame_stride = ps_dec->u2_frm_wd_uv
+ << ps_dec->ps_cur_slice->u1_field_pic_flag;
+ y_offset = (u2_mb_y * u4_frame_stride) << 3;
+ ps_trns_addr->pu1_dest_u = ps_dec->s_cur_pic.pu1_buf2 + y_offset;
+ ps_trns_addr->pu1_dest_v = ps_dec->s_cur_pic.pu1_buf3 + y_offset;
+
+ ps_trns_addr->pu1_mb_y = ps_trns_addr->pu1_dest_y;
+ ps_trns_addr->pu1_mb_u = ps_trns_addr->pu1_dest_u;
+ ps_trns_addr->pu1_mb_v = ps_trns_addr->pu1_dest_v;
+ }
+
+ /*
+ * The Slice boundary is also a valid condition to transfer. So recalculate
+ * the Left increment, in case the number of MBs is lesser than the
+ * N MB value. c_numMbs will be equal to N of N MB if the entire N Mb is
+ * decoded.
+ */
+ ps_dec->s_tran_addrecon.u2_mv_left_inc = ((c_numMbs >> u1_mbaff) - 1)
+ << (4 + u1_mbaff);
+ ps_dec->s_tran_addrecon.u2_mv_top_left_inc = (c_numMbs << 2) - 1
+ - (u1_mbaff << 2);
+
+ if(ps_dec->u1_separate_parse == 0)
+ {
+ /* reassign left MV and cur MV pointers */
+ ps_dec->ps_mv_left = ps_dec->ps_mv_cur
+ + ps_dec->s_tran_addrecon.u2_mv_left_inc;
+
+ ps_dec->ps_mv_cur += (c_numMbs << 4);
+ }
+
+ /* Increment deblock parameters pointer in external memory */
+
+ if(ps_dec->u1_separate_parse == 1)
+ {
+ ps_dec->ps_deblk_mbn_dec_thrd += c_numMbs;
+ }
+ else
+ {
+ if(ps_dec->u4_mb_level_deblk == 0)
+ ps_dec->ps_deblk_mbn += c_numMbs;
+ else
+ {
+ deblk_mb_t *temp;
+
+ /*swap previous and curr pointers*/
+ ps_dec->ps_deblk_mbn = ps_dec->ps_deblk_mbn_prev;
+ temp = ps_dec->ps_deblk_mbn_curr;
+ ps_dec->ps_deblk_mbn_curr = ps_dec->ps_deblk_mbn_prev;
+ ps_dec->ps_deblk_mbn_prev = temp;
+ }
+ }
+
+}
+
diff --git a/decoder/ih264d_mb_utils.h b/decoder/ih264d_mb_utils.h
new file mode 100755
index 0000000..6e359f5
--- /dev/null
+++ b/decoder/ih264d_mb_utils.h
@@ -0,0 +1,293 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef _IH264D_MB_UTILS_H_
+#define _IH264D_MB_UTILS_H_
+/*!
+ **************************************************************************
+ * \file ih264d_mb_utils.h
+ *
+ * \brief
+ * Contains declarations of the utility functions needed to decode MB
+ *
+ * \date
+ * 18/12/2002
+ *
+ * \author AI
+ **************************************************************************
+ */
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_structs.h"
+
+/*--------------------------------------------------------------------*/
+/* Macros to get raster scan position of a block[8x8] / sub block[4x4]*/
+/*--------------------------------------------------------------------*/
+
+#define GET_BLK_RASTER_POS_X(x) ((x & 0x01) << 1)
+#define GET_BLK_RASTER_POS_Y(y) ((y >> 1) << 1)
+#define GET_SUB_BLK_RASTER_POS_X(x) ((x & 0x01))
+#define GET_SUB_BLK_RASTER_POS_Y(y) ((y >> 1))
+
+/*--------------------------------------------------------------------*/
+/* Masks used in decoding of Macroblock */
+/*--------------------------------------------------------------------*/
+
+#define LEFT_MB_AVAILABLE_MASK 0x01
+#define TOP_LEFT_MB_AVAILABLE_MASK 0x02
+#define TOP_MB_AVAILABLE_MASK 0x04
+#define TOP_RIGHT_MB_AVAILABLE_MASK 0x08
+
+#define TOP_RT_SUBBLOCK_MASK_MOD 0xFFF7
+
+#define TOP_RIGHT_DEFAULT_AVAILABLE 0x5750
+#define TOP_RIGHT_TOPR_AVAILABLE 0x0008
+#define TOP_RIGHT_TOP_AVAILABLE 0x0007
+
+#define TOP_LEFT_DEFAULT_AVAILABLE 0xEEE0
+#define TOP_LEFT_TOPL_AVAILABLE 0x0001
+#define TOP_LEFT_TOP_AVAILABLE 0x000E
+#define TOP_LEFT_LEFT_AVAILABLE 0x1110
+
+#define CHECK_MB_MAP(u4_mb_num, mb_map, u4_cond) \
+{ \
+ UWORD32 u4_bit_number; \
+ volatile UWORD8 *pu1_mb_flag; \
+ \
+ u4_bit_number = u4_mb_num & 0x07; \
+ pu1_mb_flag = (UWORD8 *)mb_map + (u4_mb_num >> 3); \
+ \
+ u4_cond = CHECKBIT((*pu1_mb_flag), u4_bit_number); \
+}
+
+#define CHECK_MB_MAP_BYTE(u4_mb_num, mb_map, u4_cond) \
+{ \
+ volatile UWORD8 *pu1_mb_flag; \
+ \
+ pu1_mb_flag = (UWORD8 *)mb_map + (u4_mb_num ); \
+ \
+ u4_cond = (*pu1_mb_flag); \
+}
+
+#define UPDATE_MB_MAP(u2_frm_wd_in_mbs, u2_mbx, u2_mby, mb_map, mb_count) \
+{ \
+ UWORD32 u4_bit_number; \
+ UWORD32 u4_mb_number; \
+ \
+ u4_mb_number = u2_frm_wd_in_mbs * (u2_mby >> u1_mbaff) + u2_mbx; \
+ \
+ u4_bit_number = u4_mb_number & 0x07; \
+ /* \
+ * In case of MbAff, update the mb_map only if the entire MB is done. We can check that \
+ * by checking if Y is odd, implying that this is the second row in the MbAff MB \
+ */ \
+ SET_BIT(mb_map[u4_mb_number >> 3], u4_bit_number); \
+ \
+ if (1 == u1_mbaff) \
+ { \
+ /* \
+ * If MBAFF u4_flag is set, set this MB and the MB just below this. \
+ * So, add frame width to the MB number and set that bit. \
+ */ \
+ /* \
+ u4_mb_number += u2_frm_wd_in_mbs; \
+ \
+ u4_bit_number = u4_mb_number & 0x07; \
+ \
+ SET_BIT(mb_map[u4_mb_number >> 3], u4_bit_number); \
+ */ \
+ } \
+ \
+ /*H264_DEC_DEBUG_PRINT("SETBIT: %d\n", u4_mb_number);*/ \
+ mb_count++; \
+}
+
+#define UPDATE_MB_MAP_MBNUM(mb_map, u4_mb_number) \
+{ \
+ UWORD32 u4_bit_number; \
+ volatile UWORD8 *pu1_mb_flag; \
+ \
+ u4_bit_number = u4_mb_number & 0x07; \
+ pu1_mb_flag = (UWORD8 *)mb_map + (u4_mb_number >> 3); \
+ /* \
+ * In case of MbAff, update the mb_map only if the entire MB is done. We can check that \
+ * by checking if Y is odd, implying that this is the second row in the MbAff MB \
+ */ \
+ SET_BIT((*pu1_mb_flag), u4_bit_number); \
+}
+
+#define UPDATE_MB_MAP_MBNUM_BYTE(mb_map, u4_mb_number) \
+{ \
+ volatile UWORD8 *pu1_mb_flag; \
+ \
+ pu1_mb_flag = (UWORD8 *)mb_map + (u4_mb_number); \
+ /* \
+ * In case of MbAff, update the mb_map only if the entire MB is done. We can check that \
+ * by checking if Y is odd, implying that this is the second row in the MbAff MB \
+ */ \
+ (*pu1_mb_flag) = 1; \
+}
+
+#define UPDATE_SLICE_NUM_MAP(slice_map, u4_mb_number,u2_slice_num) \
+{ \
+ volatile UWORD16 *pu2_slice_map; \
+ \
+ pu2_slice_map = (UWORD16 *)slice_map + (u4_mb_number); \
+ (*pu2_slice_map) = u2_slice_num; \
+}
+
+#define GET_SLICE_NUM_MAP(slice_map, mb_number,u2_slice_num) \
+{ \
+ volatile UWORD16 *pu2_slice_map; \
+ \
+ pu2_slice_map = (UWORD16 *)slice_map + (mb_number); \
+ u2_slice_num = (*pu2_slice_map) ; \
+}
+
+
+#define GET_XPOS_PRED(u1_out,pkd_info) \
+{ \
+ WORD32 bit_field; \
+ bit_field = pkd_info & 0x3; \
+ u1_out = bit_field; \
+}
+
+
+#define GET_YPOS_PRED(u1_out,pkd_info) \
+{ \
+ WORD32 bit_field; \
+ bit_field = pkd_info >> 2; \
+ u1_out = bit_field & 0x3; \
+}
+
+
+
+#define GET_WIDTH_PRED(u1_out,pkd_info) \
+{ \
+ WORD32 bit_field; \
+ bit_field = pkd_info >> 4; \
+ bit_field = (bit_field & 0x3) << 1 ; \
+ u1_out = (bit_field == 0)?1:bit_field; \
+ }
+
+#define GET_HEIGHT_PRED(u1_out,pkd_info) \
+{ \
+ WORD32 bit_field; \
+ bit_field = pkd_info >> 6; \
+ bit_field = (bit_field & 0x3) << 1 ; \
+ u1_out = (bit_field == 0)?1:bit_field; \
+}
+
+/*!
+ **************************************************************************
+ * \brief Masks for elements present in the first column but not on the
+ * first row.
+ **************************************************************************
+ */
+#define FIRST_COL_NOT_FIRST_ROW 0xFAFB
+#define FIRST_ROW_MASK 0xFFCC
+/*!
+ **************************************************************************
+ * \brief Mask for elements presen in the first row but not in the
+ * last column.
+ **************************************************************************
+ */
+#define FIRST_ROW_NOT_LAST_COL 0xFFEC
+/*!
+ **************************************************************************
+ * \brief Mask for elements presen in the first row but not in the
+ * first column.
+ **************************************************************************
+ */
+#define FIRST_ROW_NOT_FIRST_COL 0xFFCD
+/*!
+ **************************************************************************
+ * \brief Masks for the top right subMB of a 4x4 block
+ **************************************************************************
+ */
+#define TOP_RT_SUBBLOCK_MASK 0xFFDF
+/*!
+ **************************************************************************
+ * \brief Masks for the top left subMB of a 4x4 block
+ **************************************************************************
+ */
+#define TOP_LT_SUBBLOCK_MASK 0xFFFE
+/*!
+ **************************************************************************
+ * \brief Indicates if a subMB has a top right subMB available
+ **************************************************************************
+ */
+#define TOP_RT_SUBBLOCK_MB_MASK 0x5F4C
+
+#define FIRST_COL_MASK 0xFAFA
+
+/*--------------------------------------------------------------------*/
+/* Macros to calculate the current position of a MB wrt picture */
+/*--------------------------------------------------------------------*/
+#define MB_LUMA_PIC_OFFSET(mb_x,mb_y,frmWidthY) (((mb_y)*(frmWidthY) + (mb_x))<<4)
+#define MB_CHROMA_PIC_OFFSET(mb_x,mb_y,frmWidthUV) (((mb_y)*(frmWidthUV) + (mb_x))<<3)
+
+/*--------------------------------------------------------------------*/
+/* Macros to calculate the current position of a MB wrt N[ Num coeff] Array */
+/*--------------------------------------------------------------------*/
+#define MB_PARAM_OFFSET(mb_x,mb_y,frmWidthInMbs,u1_mbaff,u1_topmb) \
+ ((mb_x << u1_mbaff) + (1 - u1_topmb) + (mb_y * frmWidthInMbs))
+
+UWORD32 ih264d_get_mb_info_cavlc_mbaff(dec_struct_t * ps_dec,
+ const UWORD16 ui16_curMbAddress,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD32 u4_mbskip_run);
+UWORD32 ih264d_get_mb_info_cavlc_nonmbaff(dec_struct_t * ps_dec,
+ const UWORD16 ui16_curMbAddress,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD32 u4_mbskip_run);
+
+UWORD32 ih264d_get_mb_info_cabac_mbaff(dec_struct_t * ps_dec,
+ const UWORD16 ui16_curMbAddress,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD32 u4_mbskip_run);
+
+UWORD32 ih264d_get_mb_info_cabac_nonmbaff(dec_struct_t * ps_dec,
+ const UWORD16 ui16_curMbAddress,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD32 u4_mbskip_run);
+
+UWORD8 get_cabac_context_non_mbaff(dec_struct_t * ps_dec, UWORD16 u2_mbskip);
+
+UWORD32 ih264d_get_cabac_context_mbaff(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD32 u4_mbskip);
+
+WORD32 PutMbToFrame(dec_struct_t * ps_dec);
+void ih264d_get_mbaff_neighbours(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 uc_curMbFldDecFlag);
+
+void ih264d_update_mbaff_left_nnz(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info);
+void ih264d_transfer_mb_group_data(dec_struct_t * ps_dec,
+ const WORD8 c_numMbs,
+ const UWORD8 u1_end_of_row, /* Cur n-Mb End of Row Flag */
+ const UWORD8 u1_end_of_row_next /* Next n-Mb End of Row Flag */
+ );
+
+//void FillRandomData(UWORD8 *pu1_buf, WORD32 u4_bufSize);
+
+#endif /* _MB_UTILS_H_ */
diff --git a/decoder/ih264d_mem_request.h b/decoder/ih264d_mem_request.h
new file mode 100755
index 0000000..3c60c72
--- /dev/null
+++ b/decoder/ih264d_mem_request.h
@@ -0,0 +1,82 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _IH264D_MEM_REQUEST_H_
+#define _IH264D_MEM_REQUEST_H_
+/*!
+ ***************************************************************************
+ * \file ih264d_mem_request.h
+ *
+ * \brief
+ * This file contains declarations and data structures of the API's which
+ * required to interact with Picture Buffer.
+ *
+ *
+ * \date
+ * 11/12/2002
+ *
+ * \author NS
+ ***************************************************************************/
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_defs.h"
+#include "ih264d_structs.h"
+
+#define MAX_MEM_BLOCKS 64 + 8
+
+struct MemBlock
+{
+ void ** v_memLocation; /** memory location where address of allocated memory should be stored*/
+ UWORD32 u4_mem_size; /** Size of the memory block */
+};
+
+struct MemReq
+{
+ UWORD32 u4_num_memBlocks; /** Number of memory blocks */
+ struct MemBlock s_memBlock[MAX_MEM_BLOCKS]; /** Pointer to the first memory block */
+};
+
+struct PicMemBlock
+{
+ void * buf1; /** memory location for buf1 */
+ void * buf2; /** memory location for buf2 */
+ void * buf3; /** memory location for buf3 */
+};
+
+struct PicMemReq
+{
+ WORD32 i4_num_pic_memBlocks; /** Number of memory blocks */
+ UWORD32 u4_size1; /** Size of the buf1 in PicMemBlock */
+ UWORD32 u4_size2; /** Size of the buf2 in PicMemBlock */
+ UWORD32 u4_size3; /** Size of the buf3 in PicMemBlock */
+ struct PicMemBlock s_PicMemBlock[MAX_DISP_BUFS_NEW];
+};
+
+WORD32 ih264d_create_pic_buffers(UWORD8 u1_num_of_buf,
+ dec_struct_t *ps_dec);
+
+WORD32 ih264d_create_mv_bank(void * pv_codec_handle,
+ UWORD32 u4_wd,
+ UWORD32 u4_ht);
+WORD16 ih264d_get_memory_dec_params(dec_struct_t * ps_dec);
+
+
+#endif /* _IH264D_MEM_REQUEST_H_ */
diff --git a/decoder/ih264d_mvpred.c b/decoder/ih264d_mvpred.c
new file mode 100755
index 0000000..fb4932f
--- /dev/null
+++ b/decoder/ih264d_mvpred.c
@@ -0,0 +1,1193 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+ **************************************************************************
+ * \file ih264d_mvpred.c
+ *
+ * \brief
+ * This file contains function specific to decoding Motion vector.
+ *
+ * Detailed_description
+ *
+ * \date
+ * 10-12-2002
+ *
+ * \author Arvind Raman
+ **************************************************************************
+ */
+#include <string.h>
+#include "ih264d_parse_cavlc.h"
+#include "ih264d_error_handler.h"
+#include "ih264d_structs.h"
+#include "ih264d_defs.h"
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_mb_utils.h"
+#include "ih264d_defs.h"
+#include "ih264d_debug.h"
+#include "ih264d_tables.h"
+#include "ih264d_process_bslice.h"
+#include "ih264d_mvpred.h"
+#include "ih264d_inter_pred.h"
+#include "ih264d_tables.h"
+
+/*!
+ **************************************************************************
+ * \if ih264d_get_motion_vector_predictor name : Name \endif
+ *
+ * \brief
+ * The routine calculates the motion vector predictor for a given block,
+ * given the candidate MV predictors.
+ *
+ * \param ps_mv_pred: Candidate predictors for the current block
+ * \param ps_currMv: Pointer to the left top edge of the current block in
+ * the MV bank
+ *
+ * \return
+ * _mvPred: The x & y components of the MV predictor.
+ *
+ * \note
+ * The code implements the logic as described in sec 8.4.1.2.1. Given
+ * the candidate predictors and the pointer to the top left edge of the
+ * block in the MV bank.
+ *
+ **************************************************************************
+ */
+
+void ih264d_get_motion_vector_predictor(mv_pred_t * ps_result,
+ mv_pred_t **ps_mv_pred,
+ UWORD8 u1_ref_idx,
+ UWORD8 u1_B,
+ const UWORD8 *pu1_mv_pred_condition)
+{
+ WORD8 c_temp;
+ UWORD8 uc_B2 = (u1_B << 1);
+
+ /* If only one of the candidate blocks has a reference frame equal to
+ the current block then use the same block as the final predictor */
+ c_temp =
+ (ps_mv_pred[LEFT]->i1_ref_frame[u1_B] == u1_ref_idx)
+ | ((ps_mv_pred[TOP]->i1_ref_frame[u1_B]
+ == u1_ref_idx) << 1)
+ | ((ps_mv_pred[TOP_R]->i1_ref_frame[u1_B]
+ == u1_ref_idx) << 2);
+ c_temp = pu1_mv_pred_condition[c_temp];
+
+ if(c_temp != -1)
+ {
+ /* Case when only when one of the cadidate block has the same
+ reference frame as the current block */
+ ps_result->i2_mv[uc_B2 + 0] = ps_mv_pred[c_temp]->i2_mv[uc_B2 + 0];
+ ps_result->i2_mv[uc_B2 + 1] = ps_mv_pred[c_temp]->i2_mv[uc_B2 + 1];
+ }
+ else
+ {
+ WORD32 D0, D1;
+ D0 = MIN(ps_mv_pred[0]->i2_mv[uc_B2 + 0],
+ ps_mv_pred[1]->i2_mv[uc_B2 + 0]);
+ D1 = MAX(ps_mv_pred[0]->i2_mv[uc_B2 + 0],
+ ps_mv_pred[1]->i2_mv[uc_B2 + 0]);
+ D1 = MIN(D1, ps_mv_pred[2]->i2_mv[uc_B2 + 0]);
+ ps_result->i2_mv[uc_B2 + 0] = (WORD16)(MAX(D0, D1));
+
+ D0 = MIN(ps_mv_pred[0]->i2_mv[uc_B2 + 1],
+ ps_mv_pred[1]->i2_mv[uc_B2 + 1]);
+ D1 = MAX(ps_mv_pred[0]->i2_mv[uc_B2 + 1],
+ ps_mv_pred[1]->i2_mv[uc_B2 + 1]);
+ D1 = MIN(D1, ps_mv_pred[2]->i2_mv[uc_B2 + 1]);
+ ps_result->i2_mv[uc_B2 + 1] = (WORD16)(MAX(D0, D1));
+
+ }
+}
+
+/*!
+ **************************************************************************
+ * \if ih264d_mbaff_mv_pred name : Name \endif
+ *
+ * \brief
+ * The routine calculates the motion vector predictor for a given block,
+ * given the candidate MV predictors.
+ *
+ * \param ps_mv_pred: Candidate predictors for the current block
+ * \param ps_currMv: Pointer to the left top edge of the current block in
+ * the MV bank
+ *
+ * \return
+ * _mvPred: The x & y components of the MV predictor.
+ *
+ * \note
+ * The code implements the logic as described in sec 8.4.1.2.1. Given
+ * the candidate predictors and the pointer to the top left edge of the
+ * block in the MV bank.
+ *
+ **************************************************************************
+ */
+
+void ih264d_mbaff_mv_pred(mv_pred_t **ps_mv_pred,
+ UWORD8 u1_sub_mb_num,
+ mv_pred_t *ps_mv_nmb,
+ mv_pred_t *ps_mv_ntop,
+ dec_struct_t *ps_dec,
+ UWORD8 uc_mb_part_width,
+ dec_mb_info_t *ps_cur_mb_info,
+ UWORD8* pu0_scale)
+{
+ UWORD16 u2_a_in = 0, u2_b_in = 0, u2_c_in = 0, u2_d_in = 0;
+ mv_pred_t *ps_mvpred_l, *ps_mvpred_tmp;
+ UWORD8 u1_sub_mb_x = (u1_sub_mb_num & 3), uc_sub_mb_y = (u1_sub_mb_num >> 2);
+ UWORD8 u1_is_cur_mb_fld, u1_is_left_mb_fld, u1_is_top_mb_fld;
+ UWORD8 u1_is_cur_mb_top;
+
+ u1_is_cur_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag;
+ u1_is_cur_mb_top = ps_cur_mb_info->u1_topmb;
+
+ u1_is_left_mb_fld = ps_cur_mb_info->ps_left_mb->u1_mb_fld;
+ u1_is_top_mb_fld = ps_cur_mb_info->ps_top_mb->u1_mb_fld;
+
+ /* Checking in the subMB exists, calculating their motion vectors to be
+ used as predictors and the reference frames of those subMBs */
+ ps_mv_pred[LEFT] = &ps_dec->s_default_mv_pred;
+ ps_mv_pred[TOP] = &(ps_dec->s_default_mv_pred);
+ ps_mv_pred[TOP_R] = &(ps_dec->s_default_mv_pred);
+
+ /* Check if the left subMb is available */
+ if(u1_sub_mb_x)
+ {
+ u2_a_in = 1;
+ ps_mv_pred[LEFT] = (ps_mv_nmb - 1);
+ }
+ else
+ {
+ UWORD8 uc_temp;
+ u2_a_in = (ps_cur_mb_info->u1_mb_ngbr_availablity & LEFT_MB_AVAILABLE_MASK);
+ if(u2_a_in)
+ {
+ ps_mvpred_l = (ps_dec->u4_num_pmbair) ?
+ ps_mv_nmb :
+ (ps_dec->ps_mv_left + (uc_sub_mb_y << 2) + 48
+ - (u1_is_cur_mb_top << 4));
+ uc_temp = 29;
+ if(u1_is_cur_mb_fld ^ u1_is_left_mb_fld)
+ {
+ if(u1_is_left_mb_fld)
+ {
+ uc_temp +=
+ (((uc_sub_mb_y & 1) << 2)
+ + ((uc_sub_mb_y & 2) << 1));
+ uc_temp += ((u1_is_cur_mb_top) ? 0 : 8);
+ }
+ else
+ {
+ uc_temp = uc_temp - (uc_sub_mb_y << 2);
+ uc_temp += ((u1_is_cur_mb_top) ? 0 : 16);
+ }
+ }
+ ps_mv_pred[LEFT] = (ps_mvpred_l - uc_temp);
+ pu0_scale[LEFT] = u1_is_cur_mb_fld - u1_is_left_mb_fld;
+ }
+ }
+
+ /* Check if the top subMB is available */
+ if((uc_sub_mb_y > 0) || ((u1_is_cur_mb_top | u1_is_cur_mb_fld) == 0))
+ {
+ u2_b_in = 1;
+ ps_mv_pred[TOP] = ps_mv_nmb - 4;
+ }
+ else
+ {
+ u2_b_in = (ps_cur_mb_info->u1_mb_ngbr_availablity & TOP_MB_AVAILABLE_MASK);
+ if(u2_b_in)
+ {
+ /* CHANGED CODE */
+
+ if(u1_is_top_mb_fld && u1_is_cur_mb_fld)
+ ps_mvpred_tmp = ps_mv_ntop;
+ else
+ {
+ ps_mvpred_tmp = ps_mv_ntop;
+ if(u1_is_cur_mb_top)
+ ps_mvpred_tmp += 16;
+ }
+
+ ps_mv_pred[TOP] = ps_mvpred_tmp;
+ pu0_scale[TOP] = u1_is_cur_mb_fld - u1_is_top_mb_fld;
+ }
+ }
+
+ /* Check if the top right subMb is available. The top right subMb is
+ defined as the top right subMb at the top right corner of the MB
+ partition. The top right subMb index starting from the top left
+ corner of the MB partition is given by
+ TopRightSubMbIndx = TopLeftSubMbIndx + (WidthOfMbPartition - 6) / 2
+ */
+ u2_c_in = CHECKBIT(ps_cur_mb_info->u2_top_right_avail_mask,
+ (u1_sub_mb_num + uc_mb_part_width - 1));
+ if(u2_c_in)
+ {
+ ps_mv_pred[TOP_R] = ps_mv_pred[TOP] + uc_mb_part_width;
+ pu0_scale[TOP_R] = pu0_scale[TOP];
+ if((uc_sub_mb_y == 0) && ((u1_sub_mb_x + uc_mb_part_width) > 3))
+ {
+ UWORD8 uc_isTopRtMbFld;
+ uc_isTopRtMbFld = ps_cur_mb_info->ps_top_right_mb->u1_mb_fld;
+ /* CHANGED CODE */
+ ps_mvpred_tmp = ps_mv_ntop + uc_mb_part_width + 12;
+ ps_mvpred_tmp += (u1_is_cur_mb_top) ? 16 : 0;
+ ps_mvpred_tmp += (u1_is_cur_mb_fld && u1_is_cur_mb_top && uc_isTopRtMbFld) ?
+ 0 : 16;
+ ps_mv_pred[TOP_R] = ps_mvpred_tmp;
+ pu0_scale[TOP_R] = u1_is_cur_mb_fld - uc_isTopRtMbFld;
+ }
+ }
+ else
+ {
+ u2_d_in = CHECKBIT(ps_cur_mb_info->u2_top_left_avail_mask, u1_sub_mb_num);
+
+ /* Check if the the top left subMB is available */
+ if(u2_d_in)
+ {
+ UWORD8 uc_isTopLtMbFld;
+
+ ps_mv_pred[TOP_R] = ps_mv_pred[TOP] - 1;
+ pu0_scale[TOP_R] = pu0_scale[TOP];
+
+ if(u1_sub_mb_x == 0)
+ {
+ if((uc_sub_mb_y > 0) || ((u1_is_cur_mb_top | u1_is_cur_mb_fld) == 0))
+ {
+ uc_isTopLtMbFld = u1_is_left_mb_fld;
+ ps_mvpred_tmp = ps_mv_pred[LEFT] - 4;
+
+ if((u1_is_cur_mb_fld == 0) && uc_isTopLtMbFld)
+ {
+ ps_mvpred_tmp = ps_mv_pred[LEFT] + 16;
+ ps_mvpred_tmp -= (uc_sub_mb_y & 1) ? 0 : 4;
+ }
+ }
+ else
+ {
+ UWORD32 u4_cond = ps_dec->u4_num_pmbair;
+ uc_isTopLtMbFld = ps_cur_mb_info->u1_topleft_mb_fld;
+
+ /* CHANGED CODE */
+ ps_mvpred_tmp = ps_mv_ntop - 29;
+ ps_mvpred_tmp += (u1_is_cur_mb_top) ? 16 : 0;
+ if(u1_is_cur_mb_fld && u1_is_cur_mb_top)
+ ps_mvpred_tmp -= (uc_isTopLtMbFld) ? 16 : 0;
+ }
+ ps_mv_pred[TOP_R] = ps_mvpred_tmp;
+ pu0_scale[TOP_R] = u1_is_cur_mb_fld - uc_isTopLtMbFld;
+ }
+ }
+ else if(u2_b_in == 0)
+ {
+ /* If all the subMBs B, C, D are all out of the frame then their MV
+ and their reference picture is equal to that of A */
+ ps_mv_pred[TOP] = ps_mv_pred[LEFT];
+ ps_mv_pred[TOP_R] = ps_mv_pred[LEFT];
+ pu0_scale[TOP] = pu0_scale[LEFT];
+ pu0_scale[TOP_R] = pu0_scale[LEFT];
+ }
+ }
+}
+
+/*!
+ **************************************************************************
+ * \if ih264d_non_mbaff_mv_pred name : Name \endif
+ *
+ * \brief
+ * The routine calculates the motion vector predictor for a given block,
+ * given the candidate MV predictors.
+ *
+ * \param ps_mv_pred: Candidate predictors for the current block
+ * \param ps_currMv: Pointer to the left top edge of the current block in
+ * the MV bank
+ *
+ * \return
+ * _mvPred: The x & y components of the MV predictor.
+ *
+ * \note
+ * The code implements the logic as described in sec 8.4.1.2.1. Given
+ * the candidate predictors and the pointer to the top left edge of the
+ * block in the MV bank.
+ *
+ **************************************************************************
+ */
+#if(!MVPRED_NONMBAFF)
+void ih264d_non_mbaff_mv_pred(mv_pred_t **ps_mv_pred,
+ UWORD8 u1_sub_mb_num,
+ mv_pred_t *ps_mv_nmb,
+ mv_pred_t *ps_mv_ntop,
+ dec_struct_t *ps_dec,
+ UWORD8 uc_mb_part_width,
+ dec_mb_info_t *ps_cur_mb_info)
+{
+ UWORD16 u2_b_in = 0, u2_c_in = 0, u2_d_in = 0;
+ UWORD8 u1_sub_mb_x = (u1_sub_mb_num & 3), uc_sub_mb_y = (u1_sub_mb_num >> 2);
+
+ /* Checking in the subMB exists, calculating their motion vectors to be
+ used as predictors and the reference frames of those subMBs */
+
+ ps_mv_pred[LEFT] = &ps_dec->s_default_mv_pred;
+ ps_mv_pred[TOP] = &(ps_dec->s_default_mv_pred);
+ ps_mv_pred[TOP_R] = &(ps_dec->s_default_mv_pred);
+ /* Check if the left subMb is available */
+
+ if(u1_sub_mb_x)
+ {
+ ps_mv_pred[LEFT] = (ps_mv_nmb - 1);
+ }
+ else
+ {
+ if(ps_cur_mb_info->u1_mb_ngbr_availablity & LEFT_MB_AVAILABLE_MASK)
+ {
+ ps_mv_pred[LEFT] = (ps_mv_nmb - 13);
+ }
+ }
+
+ /* Check if the top subMB is available */
+ if(uc_sub_mb_y)
+ {
+ u2_b_in = 1;
+ ps_mv_ntop = ps_mv_nmb - 4;
+ ps_mv_pred[TOP] = ps_mv_ntop;
+
+ }
+ else
+ {
+ u2_b_in = (ps_cur_mb_info->u1_mb_ngbr_availablity & TOP_MB_AVAILABLE_MASK);
+ if(u2_b_in)
+ {
+ ps_mv_pred[TOP] = ps_mv_ntop;
+ }
+ }
+
+ /* Check if the top right subMb is available. The top right subMb is
+ defined as the top right subMb at the top right corner of the MB
+ partition. The top right subMb index starting from the top left
+ corner of the MB partition is given by
+ TopRightSubMbIndx = TopLeftSubMbIndx + (WidthOfMbPartition - 6) / 2
+ */
+ u2_c_in = CHECKBIT(ps_cur_mb_info->u2_top_right_avail_mask,
+ (u1_sub_mb_num + uc_mb_part_width - 1));
+ if(u2_c_in)
+ {
+ ps_mv_pred[TOP_R] = (ps_mv_ntop + uc_mb_part_width);
+
+ if(uc_sub_mb_y == 0)
+ {
+ /* CHANGED CODE */
+ if((u1_sub_mb_x + uc_mb_part_width) > 3)
+ ps_mv_pred[TOP_R] += 12;
+ }
+ }
+ else
+ {
+ u2_d_in = CHECKBIT(ps_cur_mb_info->u2_top_left_avail_mask, u1_sub_mb_num);
+ /* Check if the the top left subMB is available */
+ if(u2_d_in)
+ {
+ /* CHANGED CODE */
+ ps_mv_pred[TOP_R] = (ps_mv_ntop - 1);
+ if(u1_sub_mb_x == 0)
+ {
+ if(uc_sub_mb_y)
+ {
+ ps_mv_pred[TOP_R] = (ps_mv_nmb - 17);
+ }
+ else
+ {
+ /* CHANGED CODE */
+ ps_mv_pred[TOP_R] -= 12;
+ }
+ }
+ }
+ else if(u2_b_in == 0)
+ {
+ /* If all the subMBs B, C, D are all out of the frame then their MV
+ and their reference picture is equal to that of A */
+ ps_mv_pred[TOP] = ps_mv_pred[LEFT];
+ ps_mv_pred[TOP_R] = ps_mv_pred[LEFT];
+ }
+ }
+}
+#endif
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_mvpred_nonmbaffB */
+/* */
+/* Description : This function calculates the motion vector predictor, */
+/* for B-Slices */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : None */
+/* Processing : The neighbours A(Left),B(Top),C(TopRight) are calculated */
+/* and based on the type of Mb the prediction is */
+/* appropriately done */
+/* Outputs : populates ps_mv_final_pred structure */
+/* Returns : u1_direct_zero_pred_flag which is used only in */
+/* decodeSpatialdirect() */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 03 05 2005 TA First Draft */
+/* */
+/*****************************************************************************/
+#if(!MVPRED_NONMBAFF)
+UWORD8 ih264d_mvpred_nonmbaffB(dec_struct_t *ps_dec,
+ dec_mb_info_t *ps_cur_mb_info,
+ mv_pred_t *ps_mv_nmb,
+ mv_pred_t *ps_mv_ntop,
+ mv_pred_t *ps_mv_final_pred,
+ UWORD8 u1_sub_mb_num,
+ UWORD8 uc_mb_part_width,
+ UWORD8 u1_lx_start,
+ UWORD8 u1_lxend,
+ UWORD8 u1_mb_mc_mode)
+{
+ UWORD8 u1_a_in, u1_b_in, uc_temp1, uc_temp2, uc_temp3;
+ mv_pred_t *ps_mv_pred[3];
+ UWORD8 uc_B2, uc_lx, u1_ref_idx;
+ UWORD8 u1_direct_zero_pred_flag = 0;
+
+ ih264d_non_mbaff_mv_pred(ps_mv_pred, u1_sub_mb_num, ps_mv_nmb, ps_mv_ntop,
+ ps_dec, uc_mb_part_width, ps_cur_mb_info);
+
+ for(uc_lx = u1_lx_start; uc_lx < u1_lxend; uc_lx++)
+ {
+ u1_ref_idx = ps_mv_final_pred->i1_ref_frame[uc_lx];
+ uc_B2 = (uc_lx << 1);
+ switch(u1_mb_mc_mode)
+ {
+ case PRED_16x8:
+ /* Directional prediction for a 16x8 MB partition */
+ if(u1_sub_mb_num == 0)
+ {
+ /* Calculating the MV pred for the top 16x8 block */
+ if(ps_mv_pred[TOP]->i1_ref_frame[uc_lx] == u1_ref_idx)
+ {
+ /* If the reference frame used by the top subMB is same as the
+ reference frame used by the current block then MV predictor to
+ be used for the current block is same as the MV of the top
+ subMB */
+ ps_mv_final_pred->i2_mv[uc_B2 + 0] =
+ ps_mv_pred[TOP]->i2_mv[uc_B2 + 0];
+ ps_mv_final_pred->i2_mv[uc_B2 + 1] =
+ ps_mv_pred[TOP]->i2_mv[uc_B2 + 1];
+ }
+ else
+ {
+ /* The MV predictor is calculated according to the process
+ defined in 8.4.1.2.1 */
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred,
+ ps_mv_pred,
+ u1_ref_idx,
+ uc_lx,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+ }
+ }
+ else
+ {
+ if(ps_mv_pred[LEFT]->i1_ref_frame[uc_lx] == u1_ref_idx)
+ {
+ /* If the reference frame used by the left subMB is same as the
+ reference frame used by the current block then MV predictor to
+ be used for the current block is same as the MV of the left
+ subMB */
+ ps_mv_final_pred->i2_mv[uc_B2 + 0] =
+ ps_mv_pred[LEFT]->i2_mv[uc_B2 + 0];
+ ps_mv_final_pred->i2_mv[uc_B2 + 1] =
+ ps_mv_pred[LEFT]->i2_mv[uc_B2 + 1];
+ }
+ else
+ {
+ /* The MV predictor is calculated according to the process
+ defined in 8.4.1.2.1 */
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred,
+ ps_mv_pred,
+ u1_ref_idx,
+ uc_lx,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+ }
+ }
+ break;
+ case PRED_8x16:
+ /* Directional prediction for a 8x16 MB partition */
+ if(u1_sub_mb_num == 0)
+ {
+ if(ps_mv_pred[LEFT]->i1_ref_frame[uc_lx] == u1_ref_idx)
+ {
+ /* If the reference frame used by the left subMB is same as the
+ reference frame used by the current block then MV predictor to
+ be used for the current block is same as the MV of the left
+ subMB */
+ ps_mv_final_pred->i2_mv[uc_B2 + 0] =
+ ps_mv_pred[LEFT]->i2_mv[uc_B2 + 0];
+ ps_mv_final_pred->i2_mv[uc_B2 + 1] =
+ ps_mv_pred[LEFT]->i2_mv[uc_B2 + 1];
+ }
+ else
+ {
+ /* The MV predictor is calculated according to the process
+ defined in 8.4.1.2.1 */
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred,
+ ps_mv_pred,
+ u1_ref_idx,
+ uc_lx,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+ }
+ }
+ else
+ {
+ if(ps_mv_pred[TOP_R]->i1_ref_frame[uc_lx] == u1_ref_idx)
+ {
+ /* If the reference frame used by the top right subMB is same as
+ the reference frame used by the current block then MV
+ predictor to be used for the current block is same as the MV
+ of the left subMB */
+ ps_mv_final_pred->i2_mv[uc_B2 + 0] =
+ ps_mv_pred[TOP_R]->i2_mv[uc_B2 + 0];
+ ps_mv_final_pred->i2_mv[uc_B2 + 1] =
+ ps_mv_pred[TOP_R]->i2_mv[uc_B2 + 1];
+ }
+ else
+ {
+ /* The MV predictor is calculated according to the process
+ defined in 8.4.1.2.1 */
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred,
+ ps_mv_pred,
+ u1_ref_idx,
+ uc_lx,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+ }
+ }
+ break;
+ case B_DIRECT_SPATIAL:
+ /* Case when the MB has been skipped */
+ /* If either of left or the top subMB is not present
+ OR
+ If both the MV components of either the left or the top subMB are
+ zero and their reference frame pointer pointing to 0
+ then MV for the skipped MB is zero
+ else the Median of the mv_pred_t is used */
+ uc_temp1 = (UWORD8)ps_mv_pred[LEFT]->i1_ref_frame[0];
+ uc_temp2 = (UWORD8)ps_mv_pred[TOP]->i1_ref_frame[0];
+ uc_temp3 = (UWORD8)ps_mv_pred[TOP_R]->i1_ref_frame[0];
+
+ ps_mv_final_pred->i1_ref_frame[0] = MIN(uc_temp1,
+ MIN(uc_temp2, uc_temp3));
+
+ uc_temp1 = (UWORD8)ps_mv_pred[LEFT]->i1_ref_frame[1];
+ uc_temp2 = (UWORD8)ps_mv_pred[TOP]->i1_ref_frame[1];
+ uc_temp3 = (UWORD8)ps_mv_pred[TOP_R]->i1_ref_frame[1];
+
+ ps_mv_final_pred->i1_ref_frame[1] = MIN(uc_temp1,
+ MIN(uc_temp2, uc_temp3));
+
+ if((ps_mv_final_pred->i1_ref_frame[0] < 0)
+ && (ps_mv_final_pred->i1_ref_frame[1] < 0))
+ {
+ u1_direct_zero_pred_flag = 1;
+ ps_mv_final_pred->i1_ref_frame[0] = 0;
+ ps_mv_final_pred->i1_ref_frame[1] = 0;
+ }
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred, ps_mv_pred,
+ ps_mv_final_pred->i1_ref_frame[0], 0,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred, ps_mv_pred,
+ ps_mv_final_pred->i1_ref_frame[1], 1,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+
+ break;
+ case MB_SKIP:
+ /* Case when the MB has been skipped */
+ /* If either of left or the top subMB is not present
+ OR
+ If both the MV components of either the left or the top subMB are
+ zero and their reference frame pointer pointing to 0
+ then MV for the skipped MB is zero
+ else the Median of the mv_pred_t is used */
+ u1_a_in = (ps_cur_mb_info->u1_mb_ngbr_availablity &
+ LEFT_MB_AVAILABLE_MASK);
+ u1_b_in = (ps_cur_mb_info->u1_mb_ngbr_availablity &
+ TOP_MB_AVAILABLE_MASK);
+ if(((u1_a_in * u1_b_in) == 0)
+ || ((ps_mv_pred[LEFT]->i2_mv[0]
+ | ps_mv_pred[LEFT]->i2_mv[1]
+ | ps_mv_pred[LEFT]->i1_ref_frame[0])
+ == 0)
+ || ((ps_mv_pred[TOP]->i2_mv[0]
+ | ps_mv_pred[TOP]->i2_mv[1]
+ | ps_mv_pred[TOP]->i1_ref_frame[0])
+ == 0))
+ {
+ ps_mv_final_pred->i2_mv[0] = 0;
+ ps_mv_final_pred->i2_mv[1] = 0;
+ break;
+ }
+ /* If the condition above is not true calculate the MV predictor
+ according to the process defined in sec 8.4.1.2.1 */
+ default:
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred, ps_mv_pred, u1_ref_idx, uc_lx,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+ break;
+ }
+ }
+ return (u1_direct_zero_pred_flag);
+}
+#endif
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_mvpred_nonmbaff */
+/* */
+/* Description : This function calculates the motion vector predictor, */
+/* for all the slice types other than B_SLICE */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : None */
+/* Processing : The neighbours A(Left),B(Top),C(TopRight) are calculated */
+/* and based on the type of Mb the prediction is */
+/* appropriately done */
+/* Outputs : populates ps_mv_final_pred structure */
+/* Returns : u1_direct_zero_pred_flag which is used only in */
+/* decodeSpatialdirect() */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 03 05 2005 TA First Draft */
+/* */
+/*****************************************************************************/
+#if(!MVPRED_NONMBAFF)
+UWORD8 ih264d_mvpred_nonmbaff(dec_struct_t *ps_dec,
+ dec_mb_info_t *ps_cur_mb_info,
+ mv_pred_t *ps_mv_nmb,
+ mv_pred_t *ps_mv_ntop,
+ mv_pred_t *ps_mv_final_pred,
+ UWORD8 u1_sub_mb_num,
+ UWORD8 uc_mb_part_width,
+ UWORD8 u1_lx_start,
+ UWORD8 u1_lxend,
+ UWORD8 u1_mb_mc_mode)
+{
+ UWORD8 u1_a_in, u1_b_in, uc_temp1, uc_temp2, uc_temp3;
+ mv_pred_t *ps_mv_pred[3];
+ UWORD8 u1_ref_idx;
+ UWORD8 u1_direct_zero_pred_flag = 0;
+ UNUSED(u1_lx_start);
+ UNUSED(u1_lxend);
+ ih264d_non_mbaff_mv_pred(ps_mv_pred, u1_sub_mb_num, ps_mv_nmb, ps_mv_ntop,
+ ps_dec, uc_mb_part_width, ps_cur_mb_info);
+
+ u1_ref_idx = ps_mv_final_pred->i1_ref_frame[0];
+
+ switch(u1_mb_mc_mode)
+ {
+ case PRED_16x8:
+ /* Directional prediction for a 16x8 MB partition */
+ if(u1_sub_mb_num == 0)
+ {
+ /* Calculating the MV pred for the top 16x8 block */
+ if(ps_mv_pred[TOP]->i1_ref_frame[0] == u1_ref_idx)
+ {
+ /* If the reference frame used by the top subMB is same as the
+ reference frame used by the current block then MV predictor to
+ be used for the current block is same as the MV of the top
+ subMB */
+
+ ps_mv_final_pred->i2_mv[0] = ps_mv_pred[TOP]->i2_mv[0];
+ ps_mv_final_pred->i2_mv[1] = ps_mv_pred[TOP]->i2_mv[1];
+ }
+ else
+ {
+ /* The MV predictor is calculated according to the process
+ defined in 8.4.1.2.1 */
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred,
+ ps_mv_pred,
+ u1_ref_idx,
+ 0,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+ }
+ }
+ else
+ {
+ if(ps_mv_pred[LEFT]->i1_ref_frame[0] == u1_ref_idx)
+ {
+ /* If the reference frame used by the left subMB is same as the
+ reference frame used by the current block then MV predictor to
+ be used for the current block is same as the MV of the left
+ subMB */
+
+ ps_mv_final_pred->i2_mv[0] = ps_mv_pred[LEFT]->i2_mv[0];
+ ps_mv_final_pred->i2_mv[1] = ps_mv_pred[LEFT]->i2_mv[1];
+ }
+ else
+ {
+ /* The MV predictor is calculated according to the process
+ defined in 8.4.1.2.1 */
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred,
+ ps_mv_pred,
+ u1_ref_idx,
+ 0,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+ }
+ }
+ break;
+ case PRED_8x16:
+ /* Directional prediction for a 8x16 MB partition */
+ if(u1_sub_mb_num == 0)
+ {
+ if(ps_mv_pred[LEFT]->i1_ref_frame[0] == u1_ref_idx)
+ {
+ /* If the reference frame used by the left subMB is same as the
+ reference frame used by the current block then MV predictor to
+ be used for the current block is same as the MV of the left
+ subMB */
+
+ ps_mv_final_pred->i2_mv[0] = ps_mv_pred[LEFT]->i2_mv[0];
+ ps_mv_final_pred->i2_mv[1] = ps_mv_pred[LEFT]->i2_mv[1];
+ }
+ else
+ {
+ /* The MV predictor is calculated according to the process
+ defined in 8.4.1.2.1 */
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred,
+ ps_mv_pred,
+ u1_ref_idx,
+ 0,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+ }
+ }
+ else
+ {
+ if(ps_mv_pred[TOP_R]->i1_ref_frame[0] == u1_ref_idx)
+ {
+ /* If the reference frame used by the top right subMB is same as
+ the reference frame used by the current block then MV
+ predictor to be used for the current block is same as the MV
+ of the left subMB */
+
+ ps_mv_final_pred->i2_mv[0] = ps_mv_pred[TOP_R]->i2_mv[0];
+ ps_mv_final_pred->i2_mv[1] = ps_mv_pred[TOP_R]->i2_mv[1];
+ }
+ else
+ {
+ /* The MV predictor is calculated according to the process
+ defined in 8.4.1.2.1 */
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred,
+ ps_mv_pred,
+ u1_ref_idx,
+ 0,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+ }
+ }
+ break;
+ case B_DIRECT_SPATIAL:
+ /* Case when the MB has been skipped */
+ /* If either of left or the top subMB is not present
+ OR
+ If both the MV components of either the left or the top subMB are
+ zero and their reference frame pointer pointing to 0
+ then MV for the skipped MB is zero
+ else the Median of the mv_pred_t is used */
+ uc_temp1 = (UWORD8)ps_mv_pred[LEFT]->i1_ref_frame[0];
+ uc_temp2 = (UWORD8)ps_mv_pred[TOP]->i1_ref_frame[0];
+ uc_temp3 = (UWORD8)ps_mv_pred[TOP_R]->i1_ref_frame[0];
+
+ ps_mv_final_pred->i1_ref_frame[0] = MIN(uc_temp1,
+ MIN(uc_temp2, uc_temp3));
+
+ uc_temp1 = (UWORD8)ps_mv_pred[LEFT]->i1_ref_frame[1];
+ uc_temp2 = (UWORD8)ps_mv_pred[TOP]->i1_ref_frame[1];
+ uc_temp3 = (UWORD8)ps_mv_pred[TOP_R]->i1_ref_frame[1];
+
+ ps_mv_final_pred->i1_ref_frame[1] = MIN(uc_temp1,
+ MIN(uc_temp2, uc_temp3));
+
+ if((ps_mv_final_pred->i1_ref_frame[0] < 0)
+ && (ps_mv_final_pred->i1_ref_frame[1] < 0))
+ {
+ u1_direct_zero_pred_flag = 1;
+ ps_mv_final_pred->i1_ref_frame[0] = 0;
+ ps_mv_final_pred->i1_ref_frame[1] = 0;
+ }
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred, ps_mv_pred,
+ ps_mv_final_pred->i1_ref_frame[0], 0,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred, ps_mv_pred,
+ ps_mv_final_pred->i1_ref_frame[1], 1,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+
+ break;
+ case MB_SKIP:
+ /* Case when the MB has been skipped */
+ /* If either of left or the top subMB is not present
+ OR
+ If both the MV components of either the left or the top subMB are
+ zero and their reference frame pointer pointing to 0
+ then MV for the skipped MB is zero
+ else the Median of the mv_pred_t is used */
+ u1_a_in = (ps_cur_mb_info->u1_mb_ngbr_availablity &
+ LEFT_MB_AVAILABLE_MASK);
+ u1_b_in = (ps_cur_mb_info->u1_mb_ngbr_availablity &
+ TOP_MB_AVAILABLE_MASK);
+ if(((u1_a_in * u1_b_in) == 0)
+ || ((ps_mv_pred[LEFT]->i2_mv[0]
+ | ps_mv_pred[LEFT]->i2_mv[1]
+ | ps_mv_pred[LEFT]->i1_ref_frame[0])
+ == 0)
+ || ((ps_mv_pred[TOP]->i2_mv[0]
+ | ps_mv_pred[TOP]->i2_mv[1]
+ | ps_mv_pred[TOP]->i1_ref_frame[0])
+ == 0))
+ {
+
+ ps_mv_final_pred->i2_mv[0] = 0;
+ ps_mv_final_pred->i2_mv[1] = 0;
+ break;
+ }
+ /* If the condition above is not true calculate the MV predictor
+ according to the process defined in sec 8.4.1.2.1 */
+ default:
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred, ps_mv_pred, u1_ref_idx, 0,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+ break;
+ }
+
+ return (u1_direct_zero_pred_flag);
+}
+#endif
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_mvpred_mbaff */
+/* */
+/* Description : This function calculates the motion vector predictor, */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : None */
+/* Processing : The neighbours A(Left),B(Top),C(TopRight) are calculated */
+/* and based on the type of Mb the prediction is */
+/* appropriately done */
+/* Outputs : populates ps_mv_final_pred structure */
+/* Returns : u1_direct_zero_pred_flag which is used only in */
+/* decodeSpatialdirect() */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 03 05 2005 TA First Draft */
+/* */
+/*****************************************************************************/
+
+UWORD8 ih264d_mvpred_mbaff(dec_struct_t *ps_dec,
+ dec_mb_info_t *ps_cur_mb_info,
+ mv_pred_t *ps_mv_nmb,
+ mv_pred_t *ps_mv_ntop,
+ mv_pred_t *ps_mv_final_pred,
+ UWORD8 u1_sub_mb_num,
+ UWORD8 uc_mb_part_width,
+ UWORD8 u1_lx_start,
+ UWORD8 u1_lxend,
+ UWORD8 u1_mb_mc_mode)
+{
+ UWORD8 u1_a_in, u1_b_in, uc_temp1, uc_temp2, uc_temp3;
+ mv_pred_t *ps_mv_pred[3], s_mvPred[3];
+ UWORD8 uc_B2, pu0_scale[3], i, uc_lx, u1_ref_idx;
+ UWORD8 u1_direct_zero_pred_flag = 0;
+
+ pu0_scale[0] = pu0_scale[1] = pu0_scale[2] = 0;
+ ih264d_mbaff_mv_pred(ps_mv_pred, u1_sub_mb_num, ps_mv_nmb, ps_mv_ntop, ps_dec,
+ uc_mb_part_width, ps_cur_mb_info, pu0_scale);
+ for(i = 0; i < 3; i++)
+ {
+ if(pu0_scale[i] != 0)
+ {
+ memcpy(&s_mvPred[i], ps_mv_pred[i], sizeof(mv_pred_t));
+ if(pu0_scale[i] == 1)
+ {
+ s_mvPred[i].i1_ref_frame[0] = s_mvPred[i].i1_ref_frame[0] << 1;
+ s_mvPred[i].i1_ref_frame[1] = s_mvPred[i].i1_ref_frame[1] << 1;
+ s_mvPred[i].i2_mv[1] = SIGN_POW2_DIV(s_mvPred[i].i2_mv[1], 1);
+ s_mvPred[i].i2_mv[3] = SIGN_POW2_DIV(s_mvPred[i].i2_mv[3], 1);
+ }
+ else
+ {
+ s_mvPred[i].i1_ref_frame[0] = s_mvPred[i].i1_ref_frame[0] >> 1;
+ s_mvPred[i].i1_ref_frame[1] = s_mvPred[i].i1_ref_frame[1] >> 1;
+ s_mvPred[i].i2_mv[1] = s_mvPred[i].i2_mv[1] << 1;
+ s_mvPred[i].i2_mv[3] = s_mvPred[i].i2_mv[3] << 1;
+ }
+ ps_mv_pred[i] = &s_mvPred[i];
+ }
+ }
+
+ for(uc_lx = u1_lx_start; uc_lx < u1_lxend; uc_lx++)
+ {
+ u1_ref_idx = ps_mv_final_pred->i1_ref_frame[uc_lx];
+ uc_B2 = (uc_lx << 1);
+ switch(u1_mb_mc_mode)
+ {
+ case PRED_16x8:
+ /* Directional prediction for a 16x8 MB partition */
+ if(u1_sub_mb_num == 0)
+ {
+ /* Calculating the MV pred for the top 16x8 block */
+ if(ps_mv_pred[TOP]->i1_ref_frame[uc_lx] == u1_ref_idx)
+ {
+ /* If the reference frame used by the top subMB is same as the
+ reference frame used by the current block then MV predictor to
+ be used for the current block is same as the MV of the top
+ subMB */
+ ps_mv_final_pred->i2_mv[uc_B2 + 0] =
+ ps_mv_pred[TOP]->i2_mv[uc_B2 + 0];
+ ps_mv_final_pred->i2_mv[uc_B2 + 1] =
+ ps_mv_pred[TOP]->i2_mv[uc_B2 + 1];
+ }
+ else
+ {
+ /* The MV predictor is calculated according to the process
+ defined in 8.4.1.2.1 */
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred,
+ ps_mv_pred,
+ u1_ref_idx,
+ uc_lx,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+ }
+ }
+ else
+ {
+ if(ps_mv_pred[LEFT]->i1_ref_frame[uc_lx] == u1_ref_idx)
+ {
+ /* If the reference frame used by the left subMB is same as the
+ reference frame used by the current block then MV predictor to
+ be used for the current block is same as the MV of the left
+ subMB */
+ ps_mv_final_pred->i2_mv[uc_B2 + 0] =
+ ps_mv_pred[LEFT]->i2_mv[uc_B2 + 0];
+ ps_mv_final_pred->i2_mv[uc_B2 + 1] =
+ ps_mv_pred[LEFT]->i2_mv[uc_B2 + 1];
+ }
+ else
+ {
+ /* The MV predictor is calculated according to the process
+ defined in 8.4.1.2.1 */
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred,
+ ps_mv_pred,
+ u1_ref_idx,
+ uc_lx,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+ }
+ }
+ break;
+ case PRED_8x16:
+ /* Directional prediction for a 8x16 MB partition */
+ if(u1_sub_mb_num == 0)
+ {
+ if(ps_mv_pred[LEFT]->i1_ref_frame[uc_lx] == u1_ref_idx)
+ {
+ /* If the reference frame used by the left subMB is same as the
+ reference frame used by the current block then MV predictor to
+ be used for the current block is same as the MV of the left
+ subMB */
+ ps_mv_final_pred->i2_mv[uc_B2 + 0] =
+ ps_mv_pred[LEFT]->i2_mv[uc_B2 + 0];
+ ps_mv_final_pred->i2_mv[uc_B2 + 1] =
+ ps_mv_pred[LEFT]->i2_mv[uc_B2 + 1];
+ }
+ else
+ {
+ /* The MV predictor is calculated according to the process
+ defined in 8.4.1.2.1 */
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred,
+ ps_mv_pred,
+ u1_ref_idx,
+ uc_lx,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+ }
+ }
+ else
+ {
+ if(ps_mv_pred[TOP_R]->i1_ref_frame[uc_lx] == u1_ref_idx)
+ {
+ /* If the reference frame used by the top right subMB is same as
+ the reference frame used by the current block then MV
+ predictor to be used for the current block is same as the MV
+ of the left subMB */
+ ps_mv_final_pred->i2_mv[uc_B2 + 0] =
+ ps_mv_pred[TOP_R]->i2_mv[uc_B2 + 0];
+ ps_mv_final_pred->i2_mv[uc_B2 + 1] =
+ ps_mv_pred[TOP_R]->i2_mv[uc_B2 + 1];
+ }
+ else
+ {
+ /* The MV predictor is calculated according to the process
+ defined in 8.4.1.2.1 */
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred,
+ ps_mv_pred,
+ u1_ref_idx,
+ uc_lx,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+ }
+ }
+ break;
+ case B_DIRECT_SPATIAL:
+ /* Case when the MB has been skipped */
+ /* If either of left or the top subMB is not present
+ OR
+ If both the MV components of either the left or the top subMB are
+ zero and their reference frame pointer pointing to 0
+ then MV for the skipped MB is zero
+ else the Median of the mv_pred_t is used */
+ uc_temp1 = (UWORD8)ps_mv_pred[LEFT]->i1_ref_frame[0];
+ uc_temp2 = (UWORD8)ps_mv_pred[TOP]->i1_ref_frame[0];
+ uc_temp3 = (UWORD8)ps_mv_pred[TOP_R]->i1_ref_frame[0];
+
+ ps_mv_final_pred->i1_ref_frame[0] = MIN(uc_temp1,
+ MIN(uc_temp2, uc_temp3));
+
+ uc_temp1 = (UWORD8)ps_mv_pred[LEFT]->i1_ref_frame[1];
+ uc_temp2 = (UWORD8)ps_mv_pred[TOP]->i1_ref_frame[1];
+ uc_temp3 = (UWORD8)ps_mv_pred[TOP_R]->i1_ref_frame[1];
+
+ ps_mv_final_pred->i1_ref_frame[1] = MIN(uc_temp1,
+ MIN(uc_temp2, uc_temp3));
+
+ /* If the reference indices are negative clip the scaled reference indices to -1 */
+ /* i.e invalid reference index */
+
+ /*if(ps_mv_final_pred->i1_ref_frame[0] < 0)
+ ps_mv_final_pred->i1_ref_frame[0] = -1;
+
+ if(ps_mv_final_pred->i1_ref_frame[1] < 0)
+ ps_mv_final_pred->i1_ref_frame[1] = -1; */
+
+ if((ps_mv_final_pred->i1_ref_frame[0] < 0)
+ && (ps_mv_final_pred->i1_ref_frame[1] < 0))
+ {
+ u1_direct_zero_pred_flag = 1;
+ ps_mv_final_pred->i1_ref_frame[0] = 0;
+ ps_mv_final_pred->i1_ref_frame[1] = 0;
+ }
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred, ps_mv_pred,
+ ps_mv_final_pred->i1_ref_frame[0], 0,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred, ps_mv_pred,
+ ps_mv_final_pred->i1_ref_frame[1], 1,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+
+ break;
+ case MB_SKIP:
+ /* Case when the MB has been skipped */
+ /* If either of left or the top subMB is not present
+ OR
+ If both the MV components of either the left or the top subMB are
+ zero and their reference frame pointer pointing to 0
+ then MV for the skipped MB is zero
+ else the Median of the mv_pred_t is used */
+ u1_a_in = (ps_cur_mb_info->u1_mb_ngbr_availablity &
+ LEFT_MB_AVAILABLE_MASK);
+ u1_b_in = (ps_cur_mb_info->u1_mb_ngbr_availablity &
+ TOP_MB_AVAILABLE_MASK);
+ if(((u1_a_in * u1_b_in) == 0)
+ || ((ps_mv_pred[LEFT]->i2_mv[0]
+ | ps_mv_pred[LEFT]->i2_mv[1]
+ | ps_mv_pred[LEFT]->i1_ref_frame[0])
+ == 0)
+ || ((ps_mv_pred[TOP]->i2_mv[0]
+ | ps_mv_pred[TOP]->i2_mv[1]
+ | ps_mv_pred[TOP]->i1_ref_frame[0])
+ == 0))
+ {
+ ps_mv_final_pred->i2_mv[0] = 0;
+ ps_mv_final_pred->i2_mv[1] = 0;
+ break;
+ }
+ /* If the condition above is not true calculate the MV predictor
+ according to the process defined in sec 8.4.1.2.1 */
+ default:
+ ih264d_get_motion_vector_predictor(
+ ps_mv_final_pred, ps_mv_pred, u1_ref_idx, uc_lx,
+ (const UWORD8 *)gau1_ih264d_mv_pred_condition);
+ break;
+ }
+ }
+ return (u1_direct_zero_pred_flag);
+}
+
+
+
+
+void ih264d_rep_mv_colz(dec_struct_t *ps_dec,
+ mv_pred_t *ps_mv_pred_src,
+ mv_pred_t *ps_mv_pred_dst,
+ UWORD8 u1_sub_mb_num,
+ UWORD8 u1_colz,
+ UWORD8 u1_ht,
+ UWORD8 u1_wd)
+{
+
+ UWORD8 k, m;
+ UWORD8 *pu1_colz = ps_dec->pu1_col_zero_flag + ps_dec->i4_submb_ofst
+ + u1_sub_mb_num;
+
+ for(k = 0; k < u1_ht; k++)
+ {
+ for(m = 0; m < u1_wd; m++)
+ {
+ *(ps_mv_pred_dst + m) = *(ps_mv_pred_src);
+ *(pu1_colz + m) = u1_colz;
+
+ }
+ pu1_colz += SUB_BLK_WIDTH;
+ ps_mv_pred_dst += SUB_BLK_WIDTH;
+ }
+}
+
diff --git a/decoder/ih264d_mvpred.h b/decoder/ih264d_mvpred.h
new file mode 100755
index 0000000..66366ca
--- /dev/null
+++ b/decoder/ih264d_mvpred.h
@@ -0,0 +1,153 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _IH264D_MVPRED_H_
+#define _IH264D_MVPRED_H_
+
+/**
+**************************************************************************
+* \file ih264d_mvpred.h
+*
+* \brief
+* This file contains declarations of functions specific to decoding
+* Motion vector.
+*
+* Detailed_description
+*
+* \date
+* 10-12-2002
+*
+* \author Arvind Raman
+**************************************************************************
+*/
+#include "ih264d_structs.h"
+#include "ih264d_defs.h"
+//#include "structs.h"
+
+/** Reference number that is not valid */
+#define OUT_OF_RANGE_REF -1
+
+#define ONE_TO_ONE 0
+#define FRM_TO_FLD 1
+#define FLD_TO_FRM 2
+
+/**
+**************************************************************************
+* \brief POSITION_IN_MVBANK
+*
+* a: Pointer to the top left subMb of the MB in the MV bank array
+* b: Horiz posn in terms of subMbs
+* c: Vert posn in terms of subMbs
+* d: subMb number
+**************************************************************************
+*/
+#define POSITION_IN_MVBANK(a, b, c, d) (a) + (c) * (d) + (b)
+
+
+
+/**
+**************************************************************************
+* \brief col4x4_t
+*
+* Container to return the information related to the co-located 4x4
+* sub-macroblock.
+**************************************************************************
+*/
+typedef struct
+{
+ mv_pred_t *ps_mv; /** Ptr to the Mv bank */
+ UWORD16 u2_mb_addr_col; /** Addr of the co-located MB */
+ WORD16 i2_mv[2]; /** Mv of the colocated MB */
+ WORD8 i1_ref_idx_col; /** Ref idx of the co-located picture */
+ UWORD8 u1_col_pic; /** Idx of the colocated pic */
+ UWORD8 u1_yM; /** "y" coord of the colocated MB addr */
+ UWORD8 u1_vert_mv_scale; /** as defined in sec 8.4.1.2.1 */
+} col4x4_t;
+
+
+
+
+
+void ih264d_update_nnz_for_skipmb(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_entrpy);
+
+void ih264d_get_motion_vector_predictor(mv_pred_t * ps_result,
+ mv_pred_t **ps_mv_pred,
+ UWORD8 u1_ref_idx,
+ UWORD8 u1_B,
+ const UWORD8 *pu1_mv_pred_condition);
+void ih264d_mbaff_mv_pred(mv_pred_t **ps_mv_pred,
+ UWORD8 u1_sub_mb_num,
+ mv_pred_t *ps_mv_nmb,
+ mv_pred_t *ps_mv_ntop,
+ dec_struct_t *ps_dec,
+ UWORD8 uc_mb_part_width,
+ dec_mb_info_t *ps_cur_mb_info,
+ UWORD8* pu0_scale);
+void ih264d_non_mbaff_mv_pred(mv_pred_t **ps_mv_pred,
+ UWORD8 u1_sub_mb_num,
+ mv_pred_t *ps_mv_nmb,
+ mv_pred_t *ps_mv_ntop,
+ dec_struct_t *ps_dec,
+ UWORD8 uc_mb_part_width,
+ dec_mb_info_t *ps_cur_mb_info);
+UWORD8 ih264d_mvpred_nonmbaff(dec_struct_t *ps_dec,
+ dec_mb_info_t *ps_cur_mb_info,
+ mv_pred_t *ps_mv_nmb,
+ mv_pred_t *ps_mv_ntop,
+ mv_pred_t *ps_mv_final_pred,
+ UWORD8 u1_sub_mb_num,
+ UWORD8 uc_mb_part_width,
+ UWORD8 u1_lx_start,
+ UWORD8 u1_lxend,
+ UWORD8 u1_mb_mc_mode);
+
+UWORD8 ih264d_mvpred_nonmbaffB(dec_struct_t *ps_dec,
+ dec_mb_info_t *ps_cur_mb_info,
+ mv_pred_t *ps_mv_nmb,
+ mv_pred_t *ps_mv_ntop,
+ mv_pred_t *ps_mv_final_pred,
+ UWORD8 u1_sub_mb_num,
+ UWORD8 uc_mb_part_width,
+ UWORD8 u1_lx_start,
+ UWORD8 u1_lxend,
+ UWORD8 u1_mb_mc_mode);
+
+UWORD8 ih264d_mvpred_mbaff(dec_struct_t *ps_dec,
+ dec_mb_info_t *ps_cur_mb_info,
+ mv_pred_t *ps_mv_nmb,
+ mv_pred_t *ps_mv_ntop,
+ mv_pred_t *ps_mv_final_pred,
+ UWORD8 u1_sub_mb_num,
+ UWORD8 uc_mb_part_width,
+ UWORD8 u1_lx_start,
+ UWORD8 u1_lxend,
+ UWORD8 u1_mb_mc_mode);
+
+void ih264d_rep_mv_colz(dec_struct_t *ps_dec,
+ mv_pred_t *ps_mv_pred_src,
+ mv_pred_t *ps_mv_pred_dst,
+ UWORD8 u1_sub_mb_num,
+ UWORD8 u1_colz,
+ UWORD8 u1_ht,
+ UWORD8 u1_wd);
+
+#endif /* _IH264D_MVPRED_H_ */
diff --git a/decoder/ih264d_nal.c b/decoder/ih264d_nal.c
new file mode 100755
index 0000000..48450c8
--- /dev/null
+++ b/decoder/ih264d_nal.c
@@ -0,0 +1,393 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+ **************************************************************************
+ * \file ih264d_nal.c
+ *
+ * \brief NAL parsing routines
+ *
+ * Detailed_description
+ *
+ * \author
+ * - AI 19 11 2002 Creation
+ **************************************************************************
+ */
+#include "ih264d_bitstrm.h"
+#include "ih264d_defs.h"
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_defs.h"
+#define NUM_OF_ZERO_BYTES_BEFORE_START_CODE 2
+#define EMULATION_PREVENTION_BYTE 0x03
+
+#define NAL_FIRST_BYTE_SIZE 1
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_find_start_code \endif
+ *
+ * \brief
+ * This function searches for the Start Code Prefix.
+ *
+ * \param pu1_buf : Pointer to char buffer which contains bitstream.
+ * \param u4_cur_pos : Current position in the buffer.
+ * \param u4_max_ofst : Number of bytes in Buffer.
+ * \param pu4_length_of_start_code : Poiter to length of Start Code.
+ *
+ * \return
+ * Returns 0 on success and -1 on error.
+ *
+ **************************************************************************
+ */
+#define START_CODE_NOT_FOUND -1
+#define END_OF_STREAM_BUFFER -2
+#define END_OF_STREAM -1
+
+void ih264d_check_if_aud(UWORD8 *pu1_buf,
+ UWORD32 u4_cur_pos,
+ UWORD32 u4_max_ofst,
+ UWORD32 *pu4_next_is_aud)
+{
+ UWORD8 u1_first_byte, u1_nal_unit_type;
+ if(u4_cur_pos + 1 < u4_max_ofst)
+ {
+ u1_first_byte = pu1_buf[u4_cur_pos + 1];
+ u1_nal_unit_type = NAL_UNIT_TYPE(u1_first_byte);
+
+ if(u1_nal_unit_type == ACCESS_UNIT_DELIMITER_RBSP)
+ {
+ *pu4_next_is_aud = 1;
+ }
+ }
+
+}
+WORD32 ih264d_find_start_code(UWORD8 *pu1_buf,
+ UWORD32 u4_cur_pos,
+ UWORD32 u4_max_ofst,
+ UWORD32 *pu4_length_of_start_code,
+ UWORD32 *pu4_next_is_aud)
+{
+ WORD32 zero_byte_cnt = 0;
+ UWORD32 ui_curPosTemp;
+
+ *pu4_length_of_start_code = 0;
+ /*Find first start code */
+ while(u4_cur_pos < u4_max_ofst)
+ {
+ if(pu1_buf[u4_cur_pos] == 0)
+ zero_byte_cnt++;
+ else if(pu1_buf[u4_cur_pos]
+ == 0x01 && zero_byte_cnt >= NUM_OF_ZERO_BYTES_BEFORE_START_CODE)
+ {
+ /* Found the start code */
+ u4_cur_pos++;
+ break;
+ }
+ else
+ {
+ zero_byte_cnt = 0;
+ }
+ u4_cur_pos++;
+ }
+ /*Find Next Start Code */
+ *pu4_length_of_start_code = u4_cur_pos;
+ zero_byte_cnt = 0;
+ ui_curPosTemp = u4_cur_pos;
+ while(u4_cur_pos < u4_max_ofst)
+ {
+
+ if(pu1_buf[u4_cur_pos] == 0)
+ zero_byte_cnt++;
+ else if(pu1_buf[u4_cur_pos]
+ == 0x01 && zero_byte_cnt >= NUM_OF_ZERO_BYTES_BEFORE_START_CODE)
+ {
+ /* Found the start code */
+ ih264d_check_if_aud(pu1_buf, u4_cur_pos, u4_max_ofst,
+ pu4_next_is_aud);
+ return (u4_cur_pos - zero_byte_cnt - ui_curPosTemp);
+ }
+ else
+ {
+ zero_byte_cnt = 0;
+ }
+ u4_cur_pos++;
+ }
+
+ return (u4_cur_pos - zero_byte_cnt - ui_curPosTemp); //(START_CODE_NOT_FOUND);
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_get_next_nal_unit \endif
+ *
+ * \brief
+ * This function reads one NAl unit.
+ *
+ * \param ps_nalStream : Poiter to NalUnitStream structure.
+ * \param ps_nalUnit : Pointer to NalUnit.
+ *
+ * \return
+ * Returns 0 on success and -1 on error.
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_get_next_nal_unit(UWORD8 *pu1_buf,
+ UWORD32 u4_cur_pos,
+ UWORD32 u4_max_ofst,
+ UWORD32 *pu4_length_of_start_code)
+{
+
+ WORD32 i_length_of_nal_unit = 0;
+ UWORD32 u4_next_is_aud;
+
+ /* NAL Thread starts */
+
+ ih264d_find_start_code(pu1_buf, u4_cur_pos, u4_max_ofst,
+ pu4_length_of_start_code, &u4_next_is_aud);
+
+ return (i_length_of_nal_unit);
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_process_nal_unit \endif
+ *
+ * \brief
+ * This function removes emulation byte "0x03" from bitstream (EBSP to RBSP).
+ * It also converts bytestream format into 32 bit little-endian format.
+ *
+ * \param ps_bitstrm : Poiter to dec_bit_stream_t structure.
+ * \param pu1_nal_unit : Pointer to char buffer of NalUnit.
+ * \param u4_numbytes_in_nal_unit : Number bytes in NalUnit buffer.
+ *
+ * \return
+ * Returns number of bytes in RBSP ps_bitstrm.
+ *
+ * \note
+ * This function is same as nal_unit() of 7.3.1. Apart from nal_unit()
+ * implementation it converts char buffer into 32 bit Buffer. This
+ * facilitates efficient access of bitstream. This has been done taking
+ * into account present processor architectures.
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_process_nal_unit(dec_bit_stream_t *ps_bitstrm,
+ UWORD8 *pu1_nal_unit,
+ UWORD32 u4_numbytes_in_nal_unit)
+{
+ UWORD32 u4_num_bytes_in_rbsp;
+ UWORD8 u1_cur_byte;
+ WORD32 i = 0;
+ WORD8 c_count;
+ UWORD32 ui_word;
+ UWORD32 *puc_bitstream_buffer = (UWORD32*)pu1_nal_unit;
+ ps_bitstrm->pu4_buffer = puc_bitstream_buffer;
+
+ /*--------------------------------------------------------------------*/
+ /* First Byte of the NAL Unit */
+ /*--------------------------------------------------------------------*/
+
+ ui_word = *pu1_nal_unit++;
+
+ /*--------------------------------------------------------------------*/
+ /* Convertion of the EBSP to RBSP */
+ /* ie Remove the emulation_prevention_byte [equal to 0x03] */
+ /*--------------------------------------------------------------------*/
+ u4_num_bytes_in_rbsp = 0;
+ c_count = 0;
+
+//first iteration
+
+ u1_cur_byte = *pu1_nal_unit++;
+
+ ui_word = ((ui_word << 8) | u1_cur_byte);
+
+ c_count++;
+ if(u1_cur_byte != 0x00)
+ c_count = 0;
+
+//second iteration
+
+ u1_cur_byte = *pu1_nal_unit++;
+
+ ui_word = ((ui_word << 8) | u1_cur_byte);
+ u4_num_bytes_in_rbsp = 2;
+
+ c_count++;
+ if(u1_cur_byte != 0x00)
+ c_count = 0;
+
+ if(u4_numbytes_in_nal_unit > 2)
+ {
+ i = ((u4_numbytes_in_nal_unit - 3));
+ }
+
+ for(; i > 8; i -= 4)
+ {
+
+// loop 0
+ u1_cur_byte = *pu1_nal_unit++;
+
+ if(c_count == NUM_OF_ZERO_BYTES_BEFORE_START_CODE
+ && u1_cur_byte == EMULATION_PREVENTION_BYTE)
+ {
+ c_count = 0;
+ u1_cur_byte = *pu1_nal_unit++;
+ i--;
+ }
+
+ ui_word = ((ui_word << 8) | u1_cur_byte);
+ *puc_bitstream_buffer = ui_word;
+ puc_bitstream_buffer++;
+ c_count++;
+ if(u1_cur_byte != 0x00)
+ c_count = 0;
+
+// loop 1
+ u1_cur_byte = *pu1_nal_unit++;
+
+ if(c_count == NUM_OF_ZERO_BYTES_BEFORE_START_CODE
+ && u1_cur_byte == EMULATION_PREVENTION_BYTE)
+ {
+ c_count = 0;
+ u1_cur_byte = *pu1_nal_unit++;
+ i--;
+ }
+ ui_word = ((ui_word << 8) | u1_cur_byte);
+
+ c_count++;
+ if(u1_cur_byte != 0x00)
+ c_count = 0;
+
+// loop 2
+ u1_cur_byte = *pu1_nal_unit++;
+
+ if(c_count == NUM_OF_ZERO_BYTES_BEFORE_START_CODE
+ && u1_cur_byte == EMULATION_PREVENTION_BYTE)
+ {
+ c_count = 0;
+ u1_cur_byte = *pu1_nal_unit++;
+ i--;
+ }
+
+ ui_word = ((ui_word << 8) | u1_cur_byte);
+
+ c_count++;
+ if(u1_cur_byte != 0x00)
+ c_count = 0;
+
+// loop 3
+ u1_cur_byte = *pu1_nal_unit++;
+
+ if(c_count == NUM_OF_ZERO_BYTES_BEFORE_START_CODE
+ && u1_cur_byte == EMULATION_PREVENTION_BYTE)
+ {
+ c_count = 0;
+ u1_cur_byte = *pu1_nal_unit++;
+ i--;
+ }
+
+ ui_word = ((ui_word << 8) | u1_cur_byte);
+
+ c_count++;
+ if(u1_cur_byte != 0x00)
+ c_count = 0;
+
+ u4_num_bytes_in_rbsp += 4;
+
+ }
+
+ for(; i > 0; i--)
+ {
+ u1_cur_byte = *pu1_nal_unit++;
+
+ if(c_count == NUM_OF_ZERO_BYTES_BEFORE_START_CODE
+ && u1_cur_byte == EMULATION_PREVENTION_BYTE)
+ {
+ c_count = 0;
+ i--;
+ u1_cur_byte = *pu1_nal_unit++;
+ }
+
+ ui_word = ((ui_word << 8) | u1_cur_byte);
+ u4_num_bytes_in_rbsp++;
+
+ if((u4_num_bytes_in_rbsp & 0x03) == 0x03)
+ {
+ *puc_bitstream_buffer = ui_word;
+ puc_bitstream_buffer++;
+ }
+ c_count++;
+ if(u1_cur_byte != 0x00)
+ c_count = 0;
+
+ }
+
+ *puc_bitstream_buffer = (ui_word
+ << ((3 - (((u4_num_bytes_in_rbsp << 30) >> 30))) << 3));
+ ps_bitstrm->u4_ofst = 0;
+ ps_bitstrm->u4_max_ofst = ((u4_num_bytes_in_rbsp + NAL_FIRST_BYTE_SIZE) << 3);
+
+ return (u4_num_bytes_in_rbsp);
+}
+
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_rbsp_to_sodb \endif
+ *
+ * \brief
+ * This function converts RBSP to SODB.
+ *
+ * \param ps_bitstrm : Poiter to dec_bit_stream_t structure.
+ *
+ * \return
+ * None.
+ *
+ **************************************************************************
+ */
+void ih264d_rbsp_to_sodb(dec_bit_stream_t *ps_bitstrm)
+{
+ UWORD32 ui_lastWord;
+ UWORD32 ui_word;
+ UWORD8 uc_lastByte;
+ WORD8 i;
+
+ ui_lastWord = (ps_bitstrm->u4_max_ofst >> 5);
+ i = (ps_bitstrm->u4_max_ofst >> 3) & 0x03;
+
+ if(i)
+ {
+ ui_word = ps_bitstrm->pu4_buffer[ui_lastWord];
+ uc_lastByte = ((ui_word << ((i - 1) << 3)) >> 24);
+ }
+ else
+ {
+ ui_word = ps_bitstrm->pu4_buffer[ui_lastWord - 1];
+ uc_lastByte = ((ui_word << 24) >> 24);
+ }
+ /*--------------------------------------------------------------------*/
+ /* Find out the rbsp_stop_bit position in the last byte of rbsp */
+ /*--------------------------------------------------------------------*/
+ for(i = 0; (i < 8) && !CHECKBIT(uc_lastByte, i); ++i)
+ ;
+ ps_bitstrm->u4_max_ofst = ps_bitstrm->u4_max_ofst - (i + 1);
+}
diff --git a/decoder/ih264d_nal.h b/decoder/ih264d_nal.h
new file mode 100755
index 0000000..3778881
--- /dev/null
+++ b/decoder/ih264d_nal.h
@@ -0,0 +1,56 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _IH264D_NAL_H_
+#define _IH264D_NAL_H_
+
+/*!
+*************************************************************************
+* \file ih264d_nal.h
+*
+* \brief
+* short_description
+*
+* Detailed_description
+*
+* \date
+* 21/11/2002
+*
+* \author AI
+*************************************************************************
+*/
+#include <stdio.h>
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_bitstrm.h"
+
+WORD32 ih264d_process_nal_unit(dec_bit_stream_t *ps_bitstrm,
+ UWORD8 *pu1_nal_unit,
+ UWORD32 u4_numbytes_in_nal_unit);
+void ih264d_rbsp_to_sodb(dec_bit_stream_t *ps_bitstrm);
+WORD32 ih264d_find_start_code(UWORD8 *pu1_buf,
+ UWORD32 u4_cur_pos,
+ UWORD32 u4_max_ofst,
+ UWORD32 *pu4_length_of_start_code,
+ UWORD32 *pu4_next_is_aud);
+
+
+#endif /* _IH264D_NAL_H_ */
diff --git a/decoder/ih264d_parse_bslice.c b/decoder/ih264d_parse_bslice.c
new file mode 100755
index 0000000..89cf5ed
--- /dev/null
+++ b/decoder/ih264d_parse_bslice.c
@@ -0,0 +1,1696 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+ **************************************************************************
+ * \file ih264d_parse_bslice.c
+ *
+ * \brief
+ * Contains routines that decode a I slice type
+ *
+ * Detailed_description
+ *
+ * \date
+ * 07/07/2003
+ *
+ * \author NS
+ **************************************************************************
+ */
+
+#include <string.h>
+#include "ih264d_bitstrm.h"
+#include "ih264d_defs.h"
+#include "ih264d_debug.h"
+#include "ih264d_tables.h"
+#include "ih264d_structs.h"
+#include "ih264d_defs.h"
+#include "ih264d_parse_cavlc.h"
+#include "ih264d_mb_utils.h"
+#include "ih264d_parse_slice.h"
+#include "ih264d_process_intra_mb.h"
+#include "ih264d_mvpred.h"
+#include "ih264d_parse_islice.h"
+#include "ih264d_inter_pred.h"
+#include "ih264d_process_pslice.h"
+#include "ih264d_process_bslice.h"
+#include "ih264d_deblocking.h"
+#include "ih264d_cabac.h"
+#include "ih264d_parse_mb_header.h"
+#include "ih264d_error_handler.h"
+#include "ih264d_mvpred.h"
+#include "ih264d_cabac.h"
+#include "ih264d_utils.h"
+
+void ih264d_init_cabac_contexts(UWORD8 u1_slice_type, dec_struct_t * ps_dec);
+
+/*!
+ **************************************************************************
+ * \if Function name : ParseMb_SubMb_PredBCav\endif
+ *
+ * \brief
+ * Implements sub_mb_pred() of 7.3.5.2. & mb_pred() of 7.3.5.1
+ *
+ * \return
+ * None.
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_parse_bmb_non_direct_cavlc(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num,
+ UWORD8 u1_num_mbsNby2)
+{
+ dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
+ UWORD8 * pu1_sub_mb_pred_modes = (UWORD8 *)(gau1_ih264d_submb_pred_modes) + 4;
+ const UWORD8 (*pu1_mb_pred_modes)[32] =
+ (const UWORD8 (*)[32])gau1_ih264d_mb_pred_modes;
+ const UWORD8 * pu1_num_mb_part = (const UWORD8 *)gau1_ih264d_num_mb_part;
+ const UWORD8 * pu1_sub_mb_mc_mode = (const UWORD8 *)(gau1_ih264d_submb_mc_mode)
+ + 4;
+
+ parse_pmbarams_t * ps_parse_mb_data = ps_dec->ps_parse_mb_data
+ + u1_num_mbsNby2;
+ UWORD8 * pu1_col_info = ps_parse_mb_data->u1_col_info;
+ WORD8 (*pi1_ref_idx)[MAX_REFIDX_INFO_PER_MB] = ps_parse_mb_data->i1_ref_idx;
+ UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type;
+ UWORD8 u1_mb_mc_mode, u1_num_mb_part, u1_sub_mb = !(u1_mb_type ^ B_8x8);
+ UWORD32 u4_mb_mc_mode = 0, u4_mb_pred_mode = 0;
+ WORD32 ret;
+
+ if(u1_sub_mb)
+ {
+ UWORD8 uc_i;
+ u1_mb_mc_mode = 0;
+ u1_num_mb_part = 4;
+ /* Reading the subMB type */
+ for(uc_i = 0; uc_i < 4; uc_i++)
+ {
+
+ UWORD32 ui_sub_mb_mode;
+
+//Inlined ih264d_uev
+ UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
+ UWORD32 u4_word, u4_ldz;
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
+ u4_ldz = CLZ(u4_word);
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf,
+ u4_ldz);
+ *pu4_bitstrm_ofst = u4_bitstream_offset;
+ ui_sub_mb_mode = ((1 << u4_ldz) + u4_word - 1);
+//Inlined ih264d_uev
+
+ if(ui_sub_mb_mode > 12)
+ return ERROR_SUB_MB_TYPE;
+ else
+ {
+ UWORD8 u1_subMbPredMode = pu1_sub_mb_pred_modes[ui_sub_mb_mode];
+ u4_mb_mc_mode = (u4_mb_mc_mode << 8)
+ | pu1_sub_mb_mc_mode[ui_sub_mb_mode];
+ u4_mb_pred_mode = (u4_mb_pred_mode << 8) | u1_subMbPredMode;
+ pi1_ref_idx[0][uc_i] = ((u1_subMbPredMode & PRED_L0) - 1) >> 1;
+ pi1_ref_idx[1][uc_i] = ((u1_subMbPredMode & PRED_L1) - 1) >> 1;
+ COPYTHECONTEXT("sub_mb_type", u1_subMbPredMode);
+ }
+ /* Storing collocated Mb and SubMb mode information */
+ *pu1_col_info++ = ((PRED_8x8) << 6)
+ | ((pu1_sub_mb_mc_mode[ui_sub_mb_mode] << 4));
+ if(ui_sub_mb_mode != B_DIRECT_8x8)
+ {
+ if(ui_sub_mb_mode > B_BI_8x8)
+ {
+ ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 0;
+ }
+ }
+ else if(!ps_dec->s_high_profile.u1_direct_8x8_inference_flag)
+ {
+ ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 0;
+ }
+ }
+ }
+ else
+ {
+ UWORD8 u1_mb_pred_mode_idx = 5 + u1_mb_type;
+ UWORD8 u1_mb_pred_mode_part0 = pu1_mb_pred_modes[0][u1_mb_pred_mode_idx];
+ UWORD8 u1_mb_pred_mode_part1 = pu1_mb_pred_modes[1][u1_mb_pred_mode_idx];
+ u1_mb_mc_mode = ps_cur_mb_info->u1_mb_mc_mode;
+ u1_num_mb_part = pu1_num_mb_part[u1_mb_mc_mode];
+
+ pi1_ref_idx[0][0] = ((u1_mb_pred_mode_part0 & PRED_L0) - 1) >> 1;
+ pi1_ref_idx[1][0] = ((u1_mb_pred_mode_part0 & PRED_L1) - 1) >> 1;
+ pi1_ref_idx[0][1] = ((u1_mb_pred_mode_part1 & PRED_L0) - 1) >> 1;
+ pi1_ref_idx[1][1] = ((u1_mb_pred_mode_part1 & PRED_L1) - 1) >> 1;
+
+ u4_mb_pred_mode = (u1_mb_pred_mode_part0 << 8) | u1_mb_pred_mode_part1;
+ u4_mb_mc_mode = u1_mb_mc_mode | (u1_mb_mc_mode << 8);
+ u4_mb_mc_mode <<= 16;
+ u4_mb_pred_mode <<= 16;
+
+ /* Storing collocated Mb and SubMb mode information */
+ *pu1_col_info++ = (u1_mb_mc_mode << 6);
+ if(u1_mb_mc_mode)
+ *pu1_col_info++ = (u1_mb_mc_mode << 6);
+ }
+
+ {
+ UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
+ UWORD8 uc_field = ps_cur_mb_info->u1_mb_field_decodingflag;
+ UWORD8 *pu1_num_ref_idx_lx_active =
+ ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active;
+ const UWORD8 u1_mbaff_field = (u1_mbaff & uc_field);
+ UWORD8 u4_num_ref_idx_lx_active;
+
+ u4_num_ref_idx_lx_active = (pu1_num_ref_idx_lx_active[0]
+ << u1_mbaff_field) - 1;
+
+ if(u4_num_ref_idx_lx_active)
+ {
+ if(1 == u4_num_ref_idx_lx_active)
+ ih264d_parse_bmb_ref_index_cavlc_range1(
+ u1_num_mb_part, ps_bitstrm, pi1_ref_idx[0],
+ u4_num_ref_idx_lx_active);
+ else
+ {
+ ret = ih264d_parse_bmb_ref_index_cavlc(u1_num_mb_part, ps_bitstrm,
+ pi1_ref_idx[0],
+ u4_num_ref_idx_lx_active);
+ if(ret != OK)
+ return ret;
+ }
+ }
+
+ u4_num_ref_idx_lx_active = (pu1_num_ref_idx_lx_active[1]
+ << u1_mbaff_field) - 1;
+
+ if(u4_num_ref_idx_lx_active)
+ {
+ if(1 == u4_num_ref_idx_lx_active)
+ ih264d_parse_bmb_ref_index_cavlc_range1(
+ u1_num_mb_part, ps_bitstrm, pi1_ref_idx[1],
+ u4_num_ref_idx_lx_active);
+ else
+ {
+ ret = ih264d_parse_bmb_ref_index_cavlc(u1_num_mb_part, ps_bitstrm,
+ pi1_ref_idx[1],
+ u4_num_ref_idx_lx_active);
+ if(ret != OK)
+ return ret;
+ }
+ }
+ }
+
+ /* Read MotionVectors */
+ {
+ const UWORD8 * pu1_top_left_sub_mb_indx;
+
+ const UWORD8 * pu1_sub_mb_indx_mod =
+ (const UWORD8 *)(gau1_ih264d_submb_indx_mod)
+ + (u1_sub_mb * 6);
+ const UWORD8 * pu1_sub_mb_partw = (const UWORD8 *)gau1_ih264d_submb_partw;
+ const UWORD8 * pu1_sub_mb_parth = (const UWORD8 *)gau1_ih264d_submb_parth;
+ const UWORD8 * pu1_num_sub_mb_part =
+ (const UWORD8 *)gau1_ih264d_num_submb_part;
+ const UWORD8 * pu1_mb_partw = (const UWORD8 *)gau1_ih264d_mb_partw;
+ const UWORD8 * pu1_mb_parth = (const UWORD8 *)gau1_ih264d_mb_parth;
+ UWORD8 u1_p_idx = 0, u1_num_submb_part, uc_lx;
+ parse_part_params_t * ps_part;
+ mv_pred_t *ps_mv_start = ps_dec->ps_mv_cur + (u1_mb_num << 4);
+ UWORD8 u1_mb_part_wd, u1_mb_part_ht;
+
+ /* Initialisations */
+ ps_part = ps_dec->ps_part;
+ /* Default Initialization for Non subMb Case Mode */
+ u1_mb_part_wd = pu1_mb_partw[u1_mb_mc_mode];
+ u1_mb_part_ht = pu1_mb_parth[u1_mb_mc_mode];
+ u1_num_submb_part = 1;
+
+ /* Decoding the MV for the subMB */
+ for(uc_lx = 0; uc_lx < 2; uc_lx++)
+ {
+ UWORD8 u1_sub_mb_num = 0, u1_pred_mode, uc_i;
+ UWORD32 u4_mb_mc_mode_tmp = u4_mb_mc_mode;
+ UWORD32 u4_mb_pred_mode_tmp = u4_mb_pred_mode;
+ UWORD16 u2_sub_mb_num = 0x028A; // for sub mb case
+ UWORD8 u1_b2 = uc_lx << 1;
+ u1_pred_mode = (uc_lx) ? PRED_L1 : PRED_L0;
+ pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_mc_mode << 1);
+
+ for(uc_i = 0; uc_i < u1_num_mb_part; uc_i++)
+ {
+ UWORD8 u1_mb_mc_mode, uc_j;
+ UWORD8 i1_pred = u4_mb_pred_mode_tmp >> 24;
+ u1_mb_mc_mode = u4_mb_mc_mode_tmp >> 24;
+ u4_mb_pred_mode_tmp <<= 8;
+ u4_mb_mc_mode_tmp <<= 8;
+ /* subMb prediction mode */
+ if(u1_sub_mb)
+ {
+
+ u1_mb_part_wd = pu1_sub_mb_partw[u1_mb_mc_mode];
+ u1_mb_part_ht = pu1_sub_mb_parth[u1_mb_mc_mode];
+ u1_sub_mb_num = u2_sub_mb_num >> 12;
+ u1_num_submb_part = pu1_num_sub_mb_part[u1_mb_mc_mode];
+ pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod
+ + (u1_mb_mc_mode << 1);
+ u2_sub_mb_num <<= 4;
+ }
+ for(uc_j = 0; uc_j < u1_num_submb_part;
+ uc_j++, pu1_top_left_sub_mb_indx++)
+ {
+ mv_pred_t * ps_mv;
+ u1_sub_mb_num = u1_sub_mb_num + *pu1_top_left_sub_mb_indx;
+ ps_mv = ps_mv_start + u1_sub_mb_num;
+
+ /* Storing Info for partitions, writing only once */
+ if(uc_lx)
+ {
+ ps_part->u1_is_direct = (!i1_pred);
+ ps_part->u1_pred_mode = i1_pred;
+ ps_part->u1_sub_mb_num = u1_sub_mb_num;
+ ps_part->u1_partheight = u1_mb_part_ht;
+ ps_part->u1_partwidth = u1_mb_part_wd;
+ /* Increment partition Index */
+ u1_p_idx++;
+ ps_part++;
+ }
+
+ if(i1_pred & u1_pred_mode)
+ {
+ WORD16 i2_mvx, i2_mvy;
+
+//inlining ih264d_sev
+ {
+ UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
+ UWORD32 u4_word, u4_ldz, u4_abs_val;
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset,
+ pu4_bitstrm_buf);
+ u4_ldz = CLZ(u4_word);
+
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ GETBITS(u4_word, u4_bitstream_offset,
+ pu4_bitstrm_buf, u4_ldz);
+
+ *pu4_bitstrm_ofst = u4_bitstream_offset;
+ u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
+
+ if(u4_word & 0x1)
+ i2_mvx = (-(WORD32)u4_abs_val);
+ else
+ i2_mvx = (u4_abs_val);
+ }
+//inlinined ih264d_sev
+
+//inlining ih264d_sev
+ {
+ UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
+ UWORD32 u4_word, u4_ldz, u4_abs_val;
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset,
+ pu4_bitstrm_buf);
+ u4_ldz = CLZ(u4_word);
+
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ GETBITS(u4_word, u4_bitstream_offset,
+ pu4_bitstrm_buf, u4_ldz);
+
+ *pu4_bitstrm_ofst = u4_bitstream_offset;
+ u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
+
+ if(u4_word & 0x1)
+ i2_mvy = (-(WORD32)u4_abs_val);
+ else
+ i2_mvy = (u4_abs_val);
+ }
+//inlinined ih264d_sev
+
+ /* Storing Mv residuals */
+ ps_mv->i2_mv[u1_b2] = i2_mvx;
+ ps_mv->i2_mv[u1_b2 + 1] = i2_mvy;
+ }
+ }
+ }
+ }
+ /* write back to the scratch partition info */
+ ps_dec->ps_part = ps_part;
+ ps_parse_mb_data->u1_num_part = u1_sub_mb ? u1_p_idx : u1_num_mb_part;
+
+ }
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ParseMb_SubMb_PredBCab\endif
+ *
+ * \brief
+ * Implements sub_mb_pred() of 7.3.5.2. & mb_pred() of 7.3.5.1
+ *
+ * \return
+ * None.
+ *
+ **************************************************************************
+ */
+
+WORD32 ih264d_parse_bmb_non_direct_cabac(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num,
+ UWORD8 u1_num_mbsNby2)
+{
+ /* Loads from ps_dec */
+ decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env;
+ dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
+ ctxt_inc_mb_info_t *p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info;
+ parse_pmbarams_t * ps_parse_mb_data = ps_dec->ps_parse_mb_data
+ + u1_num_mbsNby2;
+
+ /* table pointer loads */
+ const UWORD8 * pu1_sub_mb_pred_modes = (UWORD8 *)(gau1_ih264d_submb_pred_modes)
+ + 4;
+ const UWORD8 (*pu1_mb_pred_modes)[32] =
+ (const UWORD8 (*)[32])gau1_ih264d_mb_pred_modes;
+ const UWORD8 *pu1_num_mb_part = (const UWORD8 *)gau1_ih264d_num_mb_part;
+ const UWORD8 *pu1_sub_mb_mc_mode = (UWORD8 *)(gau1_ih264d_submb_mc_mode) + 4;
+
+ const UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type;
+ UWORD8 * pu1_col_info = ps_parse_mb_data->u1_col_info;
+ WORD8 *pi1_ref_idx_l0 = &ps_parse_mb_data->i1_ref_idx[0][0];
+ WORD8 *pi1_ref_idx_l1 = &ps_parse_mb_data->i1_ref_idx[1][0];
+ UWORD8 u1_dec_ref_l0, u1_dec_ref_l1;
+
+ UWORD8 u1_num_mb_part, u1_mb_mc_mode, u1_sub_mb, u1_mbpred_mode = 5
+ + u1_mb_type;
+ UWORD32 u4_mb_mc_mode = 0, u4_mb_pred_mode = 0;
+ WORD32 ret;
+
+ p_curr_ctxt->u1_mb_type = CAB_NON_BD16x16;
+ u1_sub_mb = !(u1_mb_type ^ B_8x8);
+
+ {
+ UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
+ UWORD8 *pu1_num_ref_idx_lx_active =
+ ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active;
+ UWORD8 uc_field = ps_cur_mb_info->u1_mb_field_decodingflag;
+ UWORD8 u1_mbaff_field = (u1_mbaff & uc_field);
+ u1_dec_ref_l0 = (pu1_num_ref_idx_lx_active[0] << u1_mbaff_field) - 1;
+ u1_dec_ref_l1 = (pu1_num_ref_idx_lx_active[1] << u1_mbaff_field) - 1;
+ }
+
+ if(u1_sub_mb)
+ {
+ const UWORD8 u1_colz = ((PRED_8x8) << 6);
+ UWORD8 uc_i;
+ u1_mb_mc_mode = 0;
+ u1_num_mb_part = 4;
+ /* Reading the subMB type */
+ for(uc_i = 0; uc_i < 4; uc_i++)
+ {
+ UWORD8 u1_sub_mb_mode, u1_subMbPredModes;
+ u1_sub_mb_mode = ih264d_parse_submb_type_cabac(
+ 1, ps_cab_env, ps_bitstrm,
+ ps_dec->p_sub_mb_type_t);
+
+ if(u1_sub_mb_mode > 12)
+ return ERROR_SUB_MB_TYPE;
+
+ u1_subMbPredModes = pu1_sub_mb_pred_modes[u1_sub_mb_mode];
+ u4_mb_mc_mode = (u4_mb_mc_mode << 8) | pu1_sub_mb_mc_mode[u1_sub_mb_mode];
+ u4_mb_pred_mode = (u4_mb_pred_mode << 8) | u1_subMbPredModes;
+ *pi1_ref_idx_l0++ =
+ (u1_subMbPredModes & PRED_L0) ? u1_dec_ref_l0 : -1;
+ *pi1_ref_idx_l1++ =
+ (u1_subMbPredModes & PRED_L1) ? u1_dec_ref_l1 : -1;
+ COPYTHECONTEXT("sub_mb_type", u1_sub_mb_mode);
+ /* Storing collocated Mb and SubMb mode information */
+ *pu1_col_info++ =
+ (u1_colz | (pu1_sub_mb_mc_mode[u1_sub_mb_mode] << 4));
+ if(u1_sub_mb_mode != B_DIRECT_8x8)
+ {
+ if(u1_sub_mb_mode > B_BI_8x8)
+ {
+ ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 0;
+ }
+ }
+ else if(!ps_dec->s_high_profile.u1_direct_8x8_inference_flag)
+ {
+ ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 0;
+ }
+ }
+ pi1_ref_idx_l0 -= 4;
+ pi1_ref_idx_l1 -= 4;
+ }
+ else
+ {
+ UWORD8 u1_mb_pred_mode_part0 = pu1_mb_pred_modes[0][u1_mbpred_mode];
+ UWORD8 u1_mb_pred_mode_part1 = pu1_mb_pred_modes[1][u1_mbpred_mode];
+ u1_mb_mc_mode = ps_cur_mb_info->u1_mb_mc_mode;
+ u1_num_mb_part = pu1_num_mb_part[u1_mb_mc_mode];
+ /* Storing collocated Mb and SubMb mode information */
+ *pu1_col_info++ = (u1_mb_mc_mode << 6);
+ if(u1_mb_mc_mode)
+ *pu1_col_info++ = (u1_mb_mc_mode << 6);
+ u4_mb_mc_mode = u1_mb_mc_mode | (u1_mb_mc_mode << 8);
+ u4_mb_mc_mode <<= 16;
+ u4_mb_pred_mode = ((u1_mb_pred_mode_part0 << 8) | u1_mb_pred_mode_part1) << 16;
+
+ *pi1_ref_idx_l0++ = (u1_mb_pred_mode_part0 & PRED_L0) ? u1_dec_ref_l0 : -1;
+ *pi1_ref_idx_l0-- = (u1_mb_pred_mode_part1 & PRED_L0) ? u1_dec_ref_l0 : -1;
+ *pi1_ref_idx_l1++ = (u1_mb_pred_mode_part0 & PRED_L1) ? u1_dec_ref_l1 : -1;
+ *pi1_ref_idx_l1-- = (u1_mb_pred_mode_part1 & PRED_L1) ? u1_dec_ref_l1 : -1;
+ }
+ {
+ WORD8 *pi1_lft_cxt = ps_dec->pi1_left_ref_idx_ctxt_inc;
+ WORD8 *pi1_top_cxt = p_curr_ctxt->i1_ref_idx;
+
+ ret = ih264d_parse_ref_idx_cabac(u1_num_mb_part, 0, u1_dec_ref_l0,
+ u1_mb_mc_mode, pi1_ref_idx_l0, pi1_lft_cxt,
+ pi1_top_cxt, ps_cab_env, ps_bitstrm,
+ ps_dec->p_ref_idx_t);
+ if(ret != OK)
+ return ret;
+
+ ret = ih264d_parse_ref_idx_cabac(u1_num_mb_part, 2, u1_dec_ref_l1,
+ u1_mb_mc_mode, pi1_ref_idx_l1, pi1_lft_cxt,
+ pi1_top_cxt, ps_cab_env, ps_bitstrm,
+ ps_dec->p_ref_idx_t);
+ if(ret != OK)
+ return ret;
+ }
+ /* Read MotionVectors */
+ {
+ const UWORD8 *pu1_top_left_sub_mb_indx;
+ UWORD8 uc_j, uc_lx;
+ UWORD8 u1_mb_part_wd, u1_mb_part_ht;
+
+ const UWORD8 *pu1_sub_mb_indx_mod =
+ (const UWORD8 *)gau1_ih264d_submb_indx_mod
+ + (u1_sub_mb * 6);
+ const UWORD8 *pu1_sub_mb_partw = (const UWORD8 *)gau1_ih264d_submb_partw;
+ const UWORD8 *pu1_sub_mb_parth = (const UWORD8 *)gau1_ih264d_submb_parth;
+ const UWORD8 *pu1_num_sub_mb_part =
+ (const UWORD8 *)gau1_ih264d_num_submb_part;
+ const UWORD8 *pu1_mb_partw = (const UWORD8 *)gau1_ih264d_mb_partw;
+ const UWORD8 *pu1_mb_parth = (const UWORD8 *)gau1_ih264d_mb_parth;
+
+ UWORD8 u1_p_idx = 0;
+ UWORD8 u1_num_submb_part;
+ parse_part_params_t *ps_part;
+ /* Initialisations */
+ mv_pred_t *ps_mv_start = ps_dec->ps_mv_cur + (u1_mb_num << 4);
+ ps_part = ps_dec->ps_part;
+
+ /* Default initialization for non subMb case */
+ u1_mb_part_wd = pu1_mb_partw[u1_mb_mc_mode];
+ u1_mb_part_ht = pu1_mb_parth[u1_mb_mc_mode];
+ u1_num_submb_part = 1;
+
+ /* Decoding the MV for the subMB */
+ for(uc_lx = 0; uc_lx < 2; uc_lx++)
+ {
+ UWORD8 u1_sub_mb_num = 0;
+ UWORD32 u4_mb_pred_mode_tmp = u4_mb_pred_mode;
+ UWORD32 u4_mb_mc_mode_tmp = u4_mb_mc_mode;
+ UWORD8 u1_mb_mc_mode_1, u1_pred_mode, uc_i;
+ UWORD16 u2_sub_mb_num = 0x028A;
+ UWORD8 u1_b2 = uc_lx << 1;
+ u1_pred_mode = (uc_lx) ? PRED_L1 : PRED_L0;
+ /* Default for Cabac */
+ pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_mc_mode << 1);
+ for(uc_i = 0; uc_i < u1_num_mb_part; uc_i++)
+ {
+
+ WORD8 i1_pred = (UWORD8)(u4_mb_pred_mode_tmp >> 24);
+ u1_mb_mc_mode_1 = (UWORD8)(u4_mb_mc_mode_tmp >> 24);
+ u4_mb_pred_mode_tmp <<= 8;
+ u4_mb_mc_mode_tmp <<= 8;
+
+ /* subMb prediction mode */
+ if(u1_sub_mb)
+ {
+ u1_mb_part_wd = pu1_sub_mb_partw[u1_mb_mc_mode_1];
+ u1_mb_part_ht = pu1_sub_mb_parth[u1_mb_mc_mode_1];
+ u1_sub_mb_num = u2_sub_mb_num >> 12;
+ pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_mc_mode_1 << 1);
+ u1_num_submb_part = pu1_num_sub_mb_part[u1_mb_mc_mode_1];
+ u2_sub_mb_num = u2_sub_mb_num << 4;
+ }
+
+ for(uc_j = 0; uc_j < u1_num_submb_part;
+ uc_j++, pu1_top_left_sub_mb_indx++)
+ {
+ mv_pred_t *ps_mv;
+ u1_sub_mb_num = u1_sub_mb_num + *pu1_top_left_sub_mb_indx;
+ ps_mv = ps_mv_start + u1_sub_mb_num;
+
+ /* Storing Info for partitions, writing only once */
+ if(uc_lx)
+ {
+ ps_part->u1_is_direct = (!i1_pred);
+ ps_part->u1_pred_mode = i1_pred;
+ ps_part->u1_sub_mb_num = u1_sub_mb_num;
+ ps_part->u1_partheight = u1_mb_part_ht;
+ ps_part->u1_partwidth = u1_mb_part_wd;
+
+ /* Increment partition Index */
+ u1_p_idx++;
+ ps_part++;
+ }
+
+ ih264d_get_mvd_cabac(u1_sub_mb_num, u1_b2, u1_mb_part_wd,
+ u1_mb_part_ht,
+ (UWORD8)(i1_pred & u1_pred_mode), ps_dec,
+ ps_mv);
+ }
+ }
+ }
+ /* write back to the scratch partition info */
+
+ ps_dec->ps_part = ps_part;
+ ps_parse_mb_data->u1_num_part = u1_sub_mb ? u1_p_idx : u1_num_mb_part;
+
+ }
+
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_parse_bmb_cabac \endif
+ *
+ * \brief
+ * This function parses CABAC syntax of a B MB.
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+WORD32 ih264d_parse_bmb_cabac(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num,
+ UWORD8 u1_num_mbsNby2)
+{
+ UWORD8 u1_cbp;
+ deblk_mb_t * ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u1_mb_num;
+ const UWORD8 *puc_mb_mc_mode = (const UWORD8 *)gau1_ih264d_mb_mc_mode;
+ UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type;
+ ctxt_inc_mb_info_t *p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info;
+
+ WORD32 ret;
+ UWORD8 u1_Bdirect_tranform_read = 1;
+ ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 1;
+
+ ps_cur_mb_info->u1_mb_mc_mode = puc_mb_mc_mode[5 + u1_mb_type];
+
+ ps_cur_mb_info->u1_yuv_dc_block_flag = 0;
+
+ ps_cur_deblk_mb->u1_mb_type |= D_B_SLICE;
+ if(u1_mb_type != B_DIRECT)
+ {
+ ret = ih264d_parse_bmb_non_direct_cabac(ps_dec, ps_cur_mb_info, u1_mb_num,
+ u1_num_mbsNby2);
+ if(ret != OK)
+ return ret;
+ }
+ else
+ {
+
+ /************ STORING PARTITION INFO ***********/
+ parse_part_params_t * ps_part_info;
+ ps_part_info = ps_dec->ps_part;
+ ps_part_info->u1_is_direct = PART_DIRECT_16x16;
+ ps_part_info->u1_sub_mb_num = 0;
+ ps_dec->ps_part++;
+ p_curr_ctxt->u1_mb_type = CAB_BD16x16;
+
+ MEMSET_16BYTES(&ps_dec->pu1_left_mv_ctxt_inc[0][0], 0);
+ memset(ps_dec->pi1_left_ref_idx_ctxt_inc, 0, 4);
+ MEMSET_16BYTES(p_curr_ctxt->u1_mv, 0);
+ memset(p_curr_ctxt->i1_ref_idx, 0, 4);
+
+ /* check whether transform8x8 u4_flag to be read or not */
+ u1_Bdirect_tranform_read =
+ ps_dec->s_high_profile.u1_direct_8x8_inference_flag;
+ }
+
+ /* Read the Coded block pattern */
+ u1_cbp = (WORD8)ih264d_parse_ctx_cbp_cabac(ps_dec);
+ p_curr_ctxt->u1_cbp = u1_cbp;
+ ps_cur_mb_info->u1_cbp = u1_cbp;
+
+ if(u1_cbp > 47)
+ return ERROR_CBP;
+
+ COPYTHECONTEXT("coded_block_pattern", u1_cbp);
+
+ ps_cur_mb_info->u1_tran_form8x8 = 0;
+ ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
+
+ if((ps_dec->s_high_profile.u1_transform8x8_present) && (u1_cbp & (0xf))
+ && (ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag)
+ && (u1_Bdirect_tranform_read))
+ {
+ ps_cur_mb_info->u1_tran_form8x8 = ih264d_parse_transform8x8flag_cabac(
+ ps_dec, ps_cur_mb_info);
+ COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8);
+
+ ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8;
+ p_curr_ctxt->u1_transform8x8_ctxt = ps_cur_mb_info->u1_tran_form8x8;
+ }
+ else
+ {
+ p_curr_ctxt->u1_transform8x8_ctxt = 0;
+ }
+
+ p_curr_ctxt->u1_intra_chroma_pred_mode = 0;
+ p_curr_ctxt->u1_yuv_dc_csbp &= 0xFE;
+ ps_dec->pu1_left_yuv_dc_csbp[0] &= 0x6;
+
+ /* Read mb_qp_delta */
+ if(u1_cbp)
+ {
+ WORD8 c_temp;
+ ret = ih264d_parse_mb_qp_delta_cabac(ps_dec, &c_temp);
+ if(ret != OK)
+ return ret;
+ COPYTHECONTEXT("mb_qp_delta", c_temp);
+ if(c_temp)
+ {
+ ret = ih264d_update_qp(ps_dec, c_temp);
+ if(ret != OK)
+ return ret;
+ }
+ }
+ else
+ ps_dec->i1_prev_mb_qp_delta = 0;
+
+ ih264d_parse_residual4x4_cabac(ps_dec, ps_cur_mb_info, 0);
+ if(EXCEED_OFFSET(ps_dec->ps_bitstrm))
+ return ERROR_EOB_TERMINATE_T;
+ return OK;
+}
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_parse_bmb_cavlc \endif
+ *
+ * \brief
+ * This function parses CAVLC syntax of a B MB.
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+WORD32 ih264d_parse_bmb_cavlc(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num,
+ UWORD8 u1_num_mbsNby2)
+{
+ UWORD32 u4_cbp;
+ deblk_mb_t * ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u1_mb_num;
+ dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
+ UWORD32 * pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
+ const UWORD8 *puc_mb_mc_mode = (const UWORD8 *)gau1_ih264d_mb_mc_mode;
+ UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type;
+
+ WORD32 ret;
+ UWORD8 u1_Bdirect_tranform_read = 1;
+ ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag = 1;
+ ps_cur_mb_info->u1_tran_form8x8 = 0;
+ ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
+
+ ps_cur_mb_info->u1_yuv_dc_block_flag = 0;
+
+ ps_cur_mb_info->u1_mb_mc_mode = puc_mb_mc_mode[5 + u1_mb_type];
+
+ ps_cur_deblk_mb->u1_mb_type |= D_B_SLICE;
+ if(u1_mb_type != B_DIRECT)
+ {
+ ret = ih264d_parse_bmb_non_direct_cavlc(ps_dec, ps_cur_mb_info, u1_mb_num,
+ u1_num_mbsNby2);
+ if(ret != OK)
+ return ret;
+ }
+ else
+ {
+ /************ STORING PARTITION INFO ***********/
+ parse_part_params_t * ps_part_info;
+ ps_part_info = ps_dec->ps_part;
+ ps_part_info->u1_is_direct = PART_DIRECT_16x16;
+ ps_part_info->u1_sub_mb_num = 0;
+ ps_dec->ps_part++;
+ /* check whether transform8x8 u4_flag to be read or not */
+ u1_Bdirect_tranform_read =
+ ps_dec->s_high_profile.u1_direct_8x8_inference_flag;
+ }
+
+ /* Read the Coded block pattern */
+ {
+ const UWORD8 * puc_CbpInter = gau1_ih264d_cbp_inter;
+//Inlined ih264d_uev
+ UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
+ UWORD32 u4_word, u4_ldz;
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
+ u4_ldz = CLZ(u4_word);
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
+ *pu4_bitstrm_ofst = u4_bitstream_offset;
+ u4_cbp = ((1 << u4_ldz) + u4_word - 1);
+//Inlined ih264d_uev
+ if(u4_cbp > 47)
+ return ERROR_CBP;
+ u4_cbp = puc_CbpInter[u4_cbp];
+
+ if((ps_dec->s_high_profile.u1_transform8x8_present) && (u4_cbp & (0xf))
+ && (ps_dec->s_high_profile.u1_no_submb_part_size_lt8x8_flag)
+ && (u1_Bdirect_tranform_read))
+ {
+ ps_cur_mb_info->u1_tran_form8x8 = ih264d_get_bit_h264(ps_bitstrm);
+ COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8);
+ ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8;
+ }
+
+ }
+
+ COPYTHECONTEXT("coded_block_pattern", u4_cbp);
+ ps_cur_mb_info->u1_cbp = u4_cbp;
+
+ /* Read mb_qp_delta */
+ if(u4_cbp)
+ {
+ WORD32 i_temp;
+//inlining ih264d_sev
+
+ UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
+ UWORD32 u4_word, u4_ldz, u4_abs_val;
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
+ u4_ldz = CLZ(u4_word);
+
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
+
+ *pu4_bitstrm_ofst = u4_bitstream_offset;
+ u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
+
+ if(u4_word & 0x1)
+ i_temp = (-(WORD32)u4_abs_val);
+ else
+ i_temp = (u4_abs_val);
+
+ if(i_temp < -26 || i_temp > 25)
+ return ERROR_INV_RANGE_QP_T;
+//inlinined ih264d_sev
+ COPYTHECONTEXT("mb_qp_delta", i_temp);
+ if(i_temp)
+ {
+ ret = ih264d_update_qp(ps_dec, (WORD8)i_temp);
+ if(ret != OK)
+ return ret;
+ }
+
+ ret = ih264d_parse_residual4x4_cavlc(ps_dec, ps_cur_mb_info, 0);
+ if(ret != OK)
+ return ret;
+ if(EXCEED_OFFSET(ps_bitstrm))
+ return ERROR_EOB_TERMINATE_T;
+ }
+ else
+ {
+ ps_dec->i1_prev_mb_qp_delta = 0;
+ ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CAVLC);
+ }
+
+ return OK;
+}
+
+WORD32 ih264d_mv_pred_ref_tfr_nby2_bmb(dec_struct_t * ps_dec,
+ UWORD8 u1_mb_idx,
+ UWORD8 u1_num_mbs)
+{
+ parse_pmbarams_t * ps_mb_part_info;
+ parse_part_params_t * ps_part;
+ mv_pred_t *ps_mv_nmb, *ps_mv_nmb_start, *ps_mv_ntop, *ps_mv_ntop_start;
+ pic_buffer_t * ps_ref_frame;
+ UWORD8 u1_direct_mode_width;
+ UWORD8 i, j;
+ dec_mb_info_t * ps_cur_mb_info;
+ const UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
+ UWORD8 u1_field;
+ WORD32 ret = 0;
+
+ ps_dec->i4_submb_ofst -= (u1_num_mbs - u1_mb_idx) << 4;
+ ps_mb_part_info = ps_dec->ps_parse_mb_data;
+ ps_part = ps_dec->ps_parse_part_params;
+
+ /* N/2 Mb MvPred and Transfer Setup Loop */
+ for(i = u1_mb_idx; i < u1_num_mbs; i++, ps_mb_part_info++)
+ {
+ UWORD8 u1_colz = 0;
+ ps_dec->i4_submb_ofst += SUB_BLK_SIZE;
+ /* Restore the slice scratch MbX and MbY context */
+ ps_cur_mb_info = ps_dec->ps_nmb_info + i;
+ ps_dec->u2_wait_id = i;
+
+ u1_field = ps_cur_mb_info->u1_mb_field_decodingflag;
+
+ ps_mv_nmb_start = ps_dec->ps_mv_cur + (i << 4);
+ ps_dec->u2_mbx = ps_cur_mb_info->u2_mbx;
+ ps_dec->u2_mby = ps_cur_mb_info->u2_mby;
+ ps_dec->u1_currB_type = 0;
+ ps_dec->u2_mv_2mb[i & 0x1] = 0;
+
+ /* Look for MV Prediction and Reference Transfer in Non-I Mbs */
+ if(!ps_mb_part_info->u1_isI_mb)
+ {
+ UWORD8 u1_blk_no;
+ WORD16 i1_ref_idx, i1_ref_idx1;
+ UWORD8 u1_pred_mode;
+ UWORD8 u1_sub_mb_x, u1_sub_mb_y, u1_sub_mb_num;
+ UWORD8 u1_lx, u1_lx_start, u1_lxend, u1_tmp_lx;
+ UWORD8 u1_num_part, u1_num_ref, u1_wd, u1_ht;
+ UWORD32 *pu4_wt_offst;
+ UWORD8 u1_scale_ref, u4_bot_mb;
+ deblk_mb_t * ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + i;
+ WORD8 (*pi1_ref_idx)[MAX_REFIDX_INFO_PER_MB] =
+ ps_mb_part_info->i1_ref_idx;
+ WORD8 *pi1_ref_idx0 = pi1_ref_idx[0],
+ *pi1_ref_idx1 = pi1_ref_idx[1];
+ UWORD32 **ppu4_wt_ofst = ps_mb_part_info->pu4_wt_offst;
+
+ /* MB Level initialisations */
+ ps_dec->u4_num_pmbair = i >> u1_mbaff;
+ ps_dec->u1_mb_idx_mv = i;
+
+ /* CHANGED CODE */
+ ps_mv_ntop_start = ps_mv_nmb_start
+ - (ps_dec->u2_frm_wd_in_mbs << (4 + u1_mbaff)) + 12;
+
+ u1_num_part = ps_mb_part_info->u1_num_part;
+ ps_cur_deblk_mb->u1_mb_type |= (u1_num_part > 1) << 1;
+ u1_direct_mode_width = (1 == ps_mb_part_info->u1_num_part) ? 16 : 8;
+
+
+ ps_cur_mb_info->u4_pred_info_pkd_idx = ps_dec->u4_pred_info_pkd_idx;
+ ps_cur_mb_info->u1_num_pred_parts = 0;
+
+ /****************************************************/
+ /* weighted u4_ofst pointer calculations, this loop */
+ /* runs maximum 4 times, even in direct cases */
+ /****************************************************/
+ u1_scale_ref = u1_mbaff & ps_cur_mb_info->u1_mb_field_decodingflag;
+ u4_bot_mb = 1 - ps_cur_mb_info->u1_topmb;
+ if(ps_dec->ps_cur_pps->u1_wted_bipred_idc)
+ {
+ u1_num_ref = MIN(u1_num_part, 4);
+ if(PART_DIRECT_16x16 != ps_part->u1_is_direct)
+ {
+ for(u1_blk_no = 0; u1_blk_no < u1_num_ref; u1_blk_no++)
+ {
+ i1_ref_idx = MAX(pi1_ref_idx0[u1_blk_no], 0);
+ if(u1_scale_ref)
+ i1_ref_idx >>= 1;
+ i1_ref_idx *=
+ ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1];
+ if(u1_scale_ref)
+ i1_ref_idx +=
+ (MAX(pi1_ref_idx1[u1_blk_no], 0)
+ >> 1);
+ else
+ i1_ref_idx += MAX(pi1_ref_idx1[u1_blk_no], 0);
+ pu4_wt_offst = (UWORD32*)&ps_dec->pu4_wt_ofsts[2
+ * X3(i1_ref_idx)];
+
+ if(pi1_ref_idx0[u1_blk_no] < 0)
+ pu4_wt_offst += 1;
+
+ ppu4_wt_ofst[u1_blk_no] = pu4_wt_offst;
+ if(u1_scale_ref
+ && (ps_dec->ps_cur_pps->u1_wted_bipred_idc
+ == 2))
+ {
+ i1_ref_idx = MAX(pi1_ref_idx0[u1_blk_no], 0);
+ i1_ref_idx *=
+ (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]
+ << 1);
+ i1_ref_idx += MAX(pi1_ref_idx1[u1_blk_no], 0);
+ if(u4_bot_mb)
+ {
+ i1_ref_idx +=
+ (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]
+ << 1)
+ * (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]
+ << 1);
+ }
+ pu4_wt_offst = (UWORD32*)&ps_dec->pu4_mbaff_wt_mat[2
+ * X3(i1_ref_idx)];
+ ppu4_wt_ofst[u1_blk_no] = pu4_wt_offst;
+ }
+ }
+ }
+ }
+
+ /**************************************************/
+ /* Loop on Partitions */
+ /* direct mode is reflected as a single partition */
+ /**************************************************/
+ ps_dec->u4_dma_buf_idx = 0;
+ for(j = 0; j < u1_num_part; j++, ps_part++)
+ {
+ u1_sub_mb_num = ps_part->u1_sub_mb_num;
+ ps_dec->u1_sub_mb_num = u1_sub_mb_num;
+
+ if(PART_NOT_DIRECT != ps_part->u1_is_direct)
+ {
+ /**************************************************/
+ /* Direct Mode, Call DecodeSpatial/TemporalDirect */
+ /* only (those will in turn call FormMbPartInfo) */
+ /**************************************************/
+ ret = ps_dec->ps_cur_slice->pf_decodeDirect(ps_dec,
+ u1_direct_mode_width,
+ ps_cur_mb_info, i);
+ if(ret != OK)
+ return ret;
+ ps_cur_deblk_mb->u1_mb_type |= (ps_dec->u1_currB_type << 1);
+
+ }
+ else
+ {
+ mv_pred_t s_mvPred;
+ /**************************************************/
+ /* Non Direct Mode, Call Motion Vector Predictor */
+ /* and FormMbpartInfo */
+ /**************************************************/
+ u1_sub_mb_x = u1_sub_mb_num & 0x03;
+ u1_sub_mb_y = u1_sub_mb_num >> 2;
+ u1_blk_no =
+ (u1_num_part < 4) ?
+ j :
+ (((u1_sub_mb_y >> 1) << 1)
+ + (u1_sub_mb_x
+ >> 1));
+
+ ps_mv_ntop = ps_mv_ntop_start + u1_sub_mb_x;
+ ps_mv_nmb = ps_mv_nmb_start + u1_sub_mb_num;
+
+ u1_pred_mode = ps_part->u1_pred_mode;
+ u1_wd = ps_part->u1_partwidth;
+ u1_ht = ps_part->u1_partheight;
+
+ u1_lx_start = 0;
+ u1_lxend = 2;
+ if( PRED_L0 == u1_pred_mode)
+ {
+ s_mvPred.i2_mv[2] = 0;
+ s_mvPred.i2_mv[3] = 0;
+ u1_lxend = 1;
+ }
+ if( PRED_L1 == u1_pred_mode)
+ {
+ s_mvPred.i2_mv[0] = 0;
+ s_mvPred.i2_mv[1] = 0;
+ u1_lx_start = 1;
+ }
+
+ /* Populate the colpic info and reference frames */
+ s_mvPred.i1_ref_frame[0] = pi1_ref_idx0[u1_blk_no];
+ s_mvPred.i1_ref_frame[1] = pi1_ref_idx1[u1_blk_no];
+
+ ps_dec->pf_mvpred(ps_dec, ps_cur_mb_info, ps_mv_nmb, ps_mv_ntop,
+ &s_mvPred, u1_sub_mb_num, u1_wd,
+ u1_lx_start, u1_lxend,
+ ps_cur_mb_info->u1_mb_mc_mode);
+
+ /**********************************************************/
+ /* Loop on number of predictors, 1 Each for Forw Backw */
+ /* Loop 2 times for BiDirect mode */
+ /**********************************************************/
+ for(u1_lx = u1_lx_start; u1_lx < u1_lxend; u1_lx++)
+ {
+ WORD16 i2_mv_x, i2_mv_y;
+
+ /********************************************************/
+ /* Predict Mv */
+ /* Add Mv Residuals and store back */
+ /********************************************************/
+ i1_ref_idx = s_mvPred.i1_ref_frame[u1_lx];
+ u1_tmp_lx = (u1_lx << 1);
+
+ i2_mv_x = ps_mv_nmb->i2_mv[u1_tmp_lx];
+ i2_mv_y = ps_mv_nmb->i2_mv[u1_tmp_lx + 1];
+
+ i2_mv_x += s_mvPred.i2_mv[u1_tmp_lx];
+ i2_mv_y += s_mvPred.i2_mv[u1_tmp_lx + 1];
+ s_mvPred.i2_mv[u1_tmp_lx] = i2_mv_x;
+ s_mvPred.i2_mv[u1_tmp_lx + 1] = i2_mv_y;
+
+ /********************************************************/
+ /* Transfer setup call */
+ /* convert RefIdx if it is MbAff */
+ /* Pass Weight Offset and refFrame */
+ /********************************************************/
+ i1_ref_idx1 = i1_ref_idx >> u1_scale_ref;
+ if(u1_scale_ref && ((i1_ref_idx & 0x01) != u4_bot_mb))
+ i1_ref_idx1 += MAX_REF_BUFS;
+ ps_ref_frame =
+ ps_dec->ps_ref_pic_buf_lx[u1_lx][i1_ref_idx1];
+
+ /* Storing Colocated-Zero u4_flag */
+ if(u1_lx == u1_lx_start)
+ {
+ /* Fill colocated info in MvPred structure */
+ s_mvPred.u1_col_ref_pic_idx =
+ ps_ref_frame->u1_mv_buf_id;
+ s_mvPred.u1_pic_type = ps_ref_frame->u1_pic_type;
+
+ /* Calculating colocated zero information */
+ u1_colz =
+ (u1_field << 1)
+ | ((i1_ref_idx == 0)
+ && (ABS(i2_mv_x)
+ <= 1)
+ && (ABS(i2_mv_y)
+ <= 1));
+ u1_colz |= ps_mb_part_info->u1_col_info[u1_blk_no];
+ }
+
+ pu4_wt_offst = ppu4_wt_ofst[u1_blk_no];
+ {
+ pred_info_pkd_t *ps_pred_pkd;
+ WORD16 i2_mv[2];
+
+ i2_mv[0] = i2_mv_x;
+ i2_mv[1] = i2_mv_y;
+
+ ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
+ ih264d_fill_pred_info(i2_mv,u1_wd,u1_ht,u1_sub_mb_num,u1_pred_mode,
+ ps_pred_pkd,ps_ref_frame->u1_pic_buf_id,i1_ref_idx,pu4_wt_offst,
+ ps_ref_frame->u1_pic_type);
+ ps_dec->u4_pred_info_pkd_idx++;
+ ps_cur_mb_info->u1_num_pred_parts++;
+
+
+ }
+
+ }
+ ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb,
+ u1_sub_mb_num, u1_colz, u1_ht,
+ u1_wd);
+ }
+ }
+
+ }
+ else
+ {
+ /* Set zero values in case of Intra Mbs */
+ mv_pred_t s_mvPred =
+ {
+ { 0, 0, 0, 0 },
+ { -1, -1 }, 0, 0};
+ /* Storing colocated zero information */
+ ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb_start, 0,
+ (UWORD8)(u1_field << 1), 4, 4);
+ }
+
+ /*if num _cores is set to 3 ,compute bs will be done in another thread*/
+ if(ps_dec->u4_num_cores < 3)
+ {
+ if(ps_dec->u4_app_disable_deblk_frm == 0)
+ ps_dec->pf_compute_bs(ps_dec, ps_cur_mb_info,
+ (UWORD16)(i >> u1_mbaff));
+ }
+ }
+ return OK;
+}
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_get_implicit_weights \endif
+ *
+ * \brief
+ * Calculates Implicit Weights.
+ *
+ * \return
+ * None
+ *
+ **************************************************************************
+ */
+void ih264d_get_implicit_weights(dec_struct_t *ps_dec)
+{
+ UWORD32 *pu4_iwt_ofst;
+ UWORD8 i, j;
+ struct pic_buffer_t *ps_pic_buff0, *ps_pic_buff1;
+ WORD16 i2_dist_scale_factor;
+ WORD16 i16_tb, i16_td, i16_tx;
+ UWORD32 u4_poc0, u4_poc1;
+ UWORD32 ui_temp0, ui_temp1;
+ UWORD8 uc_num_ref_idx_l0_active, uc_num_ref_idx_l1_active;
+
+ pu4_iwt_ofst = ps_dec->pu4_wts_ofsts_mat;
+ uc_num_ref_idx_l0_active =
+ ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0];
+ uc_num_ref_idx_l1_active =
+ ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1];
+
+ for(i = 0; i < uc_num_ref_idx_l0_active; i++)
+ {
+ ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][i];
+ u4_poc0 = ps_pic_buff0->i4_avg_poc;
+ for(j = 0; j < uc_num_ref_idx_l1_active; j++)
+ {
+ ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][j];
+ u4_poc1 = ps_pic_buff1->i4_avg_poc;
+
+ if(u4_poc1 != u4_poc0)
+ {
+ i16_tb = ps_dec->ps_cur_pic->i4_poc - u4_poc0;
+ i16_tb = CLIP3(-128, 127, i16_tb);
+ i16_td = u4_poc1 - u4_poc0;
+ i16_td = CLIP3(-128, 127, i16_td);
+ i16_tx = (16384 + ABS(SIGN_POW2_DIV(i16_td, 1))) / i16_td;
+ i2_dist_scale_factor = CLIP3(-1024, 1023,
+ (((i16_tb * i16_tx) + 32) >> 6));
+
+ if(/*((u4_poc1 - u4_poc0) == 0) ||*/
+ (!(ps_pic_buff1->u1_is_short && ps_pic_buff0->u1_is_short))
+ || ((i2_dist_scale_factor >> 2) < -64)
+ || ((i2_dist_scale_factor >> 2) > 128))
+ {
+ /* same for forward and backward, wt=32 and Offset = 0 */
+ ui_temp0 = 0x00000020;
+ ui_temp1 = 0x00000020;
+ }
+ else
+ {
+ ui_temp0 = 64 - (i2_dist_scale_factor >> 2);
+ ui_temp1 = (i2_dist_scale_factor >> 2);
+ }
+ }
+ else
+ {
+ ui_temp0 = 0x00000020;
+ ui_temp1 = 0x00000020;
+ }
+ pu4_iwt_ofst[0] = pu4_iwt_ofst[2] = pu4_iwt_ofst[4] = ui_temp0;
+ pu4_iwt_ofst[1] = pu4_iwt_ofst[3] = pu4_iwt_ofst[5] = ui_temp1;
+ pu4_iwt_ofst += 6;
+ }
+ }
+ if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
+ {
+ UWORD8 k;
+ WORD32 i4_cur_poc = ps_dec->ps_cur_pic->i4_top_field_order_cnt;
+ UWORD32* pu4_wt_mat = ps_dec->pu4_mbaff_wt_mat;
+ /* Form the Implicit Weighted prediction matrix for field MBs also */
+ for(k = 0; k < 2; k++)
+ {
+ for(i = 0; i < (uc_num_ref_idx_l0_active << 1); i++)
+ {
+ UWORD16 u2_l0_idx;
+
+ /*u2_l0_idx = (i >= uc_num_ref_idx_l0_active)
+ ?(MAX_REF_BUFS + i - uc_num_ref_idx_l0_active) : (i) ;*/
+
+ u2_l0_idx = i >> 1;
+ if((i & 0x01) != k)
+ {
+ u2_l0_idx += MAX_REF_BUFS;
+ }
+ ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][u2_l0_idx];
+ u4_poc0 = ps_pic_buff0->i4_poc;
+ for(j = 0; j < (uc_num_ref_idx_l1_active << 1); j++)
+ {
+ UWORD16 u2_l1_idx;
+ /*u2_l1_idx = (j >= uc_num_ref_idx_l1_active)
+ ? (MAX_REF_BUFS + j - uc_num_ref_idx_l1_active ) : (j) ;*/
+
+ u2_l1_idx = j >> 1;
+ if((j & 0x01) != k)
+ {
+ u2_l1_idx += MAX_REF_BUFS;
+ }
+ ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][u2_l1_idx];
+ u4_poc1 = ps_pic_buff1->i4_poc;
+ if(u4_poc1 != u4_poc0)
+ {
+ i16_tb = i4_cur_poc - u4_poc0;
+ i16_tb = CLIP3(-128, 127, i16_tb);
+ i16_td = u4_poc1 - u4_poc0;
+ i16_td = CLIP3(-128, 127, i16_td);
+ i16_tx = (16384 + ABS(SIGN_POW2_DIV(i16_td, 1)))
+ / i16_td;
+ i2_dist_scale_factor = CLIP3(
+ -1024, 1023,
+ (((i16_tb * i16_tx) + 32) >> 6));
+
+ if(/*((u4_poc1 - u4_poc0) == 0) ||*/
+ (!(ps_pic_buff1->u1_is_short && ps_pic_buff0->u1_is_short))
+ || ((i2_dist_scale_factor >> 2) < -64)
+ || ((i2_dist_scale_factor >> 2) > 128))
+ {
+ /* same for forward and backward, wt=32 and Offset = 0 */
+ ui_temp0 = 0x00000020;
+ ui_temp1 = 0x00000020;
+ }
+ else
+ {
+ ui_temp0 = 64 - (i2_dist_scale_factor >> 2);
+ ui_temp1 = (i2_dist_scale_factor >> 2);
+ }
+ }
+ else
+ {
+ ui_temp0 = 0x00000020;
+ ui_temp1 = 0x00000020;
+ }
+ /* Store in the weight matrix */
+ *pu4_wt_mat++ = ui_temp0;
+ *pu4_wt_mat++ = ui_temp1;
+ *pu4_wt_mat++ = ui_temp0;
+ *pu4_wt_mat++ = ui_temp1;
+ *pu4_wt_mat++ = ui_temp0;
+ *pu4_wt_mat++ = ui_temp1;
+
+ }
+ }
+ i4_cur_poc = ps_dec->ps_cur_pic->i4_bottom_field_order_cnt;
+ }
+ }
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_decode_bslice \endif
+ *
+ * \brief
+ * Decodes a B Slice
+ *
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+WORD32 ih264d_parse_bslice(dec_struct_t * ps_dec, UWORD16 u2_first_mb_in_slice)
+{
+ dec_pic_params_t * ps_pps = ps_dec->ps_cur_pps;
+ dec_slice_params_t * ps_slice = ps_dec->ps_cur_slice;
+ dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
+ UWORD8 u1_ref_idx_re_flag_lx;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
+
+ UWORD32 u4_temp, ui_temp1;
+ WORD32 i_temp;
+ WORD32 ret;
+
+ /*--------------------------------------------------------------------*/
+ /* Read remaining contents of the slice header */
+ /*--------------------------------------------------------------------*/
+ {
+ WORD8 *pi1_buf;
+ WORD16 *pi2_mv = ps_dec->s_default_mv_pred.i2_mv;
+ WORD32 *pi4_mv = (WORD32*)pi2_mv;
+ WORD16 *pi16_refFrame;
+ pi1_buf = ps_dec->s_default_mv_pred.i1_ref_frame;
+ pi16_refFrame = (WORD16*)pi1_buf;
+ *pi4_mv = 0;
+ *(pi4_mv + 1) = 0;
+ *pi16_refFrame = OUT_OF_RANGE_REF;
+ ps_dec->s_default_mv_pred.u1_col_ref_pic_idx = (UWORD8)-1;
+ ps_dec->s_default_mv_pred.u1_pic_type = (UWORD8)-1;
+ }
+
+ ps_slice->u1_num_ref_idx_active_override_flag = ih264d_get_bit_h264(
+ ps_bitstrm);
+ COPYTHECONTEXT("SH: num_ref_idx_override_flag",
+ ps_slice->u1_num_ref_idx_active_override_flag);
+
+ u4_temp = ps_dec->ps_cur_pps->u1_num_ref_idx_lx_active[0];
+ ui_temp1 = ps_dec->ps_cur_pps->u1_num_ref_idx_lx_active[1];
+ if(ps_slice->u1_num_ref_idx_active_override_flag)
+ {
+ u4_temp = 1 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ COPYTHECONTEXT("SH: num_ref_idx_l0_active_minus1",
+ u4_temp - 1);
+ ui_temp1 = 1 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ COPYTHECONTEXT("SH: num_ref_idx_l1_active_minus1",
+ ui_temp1 - 1);
+ }
+
+ {
+ UWORD8 u1_max_ref_idx = MAX_FRAMES;
+ if(ps_slice->u1_field_pic_flag)
+ {
+ u1_max_ref_idx = MAX_FRAMES << 1;
+ }
+ if((u4_temp > u1_max_ref_idx) || (ui_temp1 > u1_max_ref_idx))
+ {
+ return ERROR_NUM_REF;
+ }
+ ps_slice->u1_num_ref_idx_lx_active[0] = u4_temp;
+ ps_slice->u1_num_ref_idx_lx_active[1] = ui_temp1;
+ }
+ /* Initialize the Reference list once in Picture if the slice type */
+ /* of first slice is between 5 to 9 defined in table 7.3 of standard */
+ /* If picture contains both P & B slices then Initialize the Reference*/
+ /* List only when it switches from P to B and B to P */
+
+ {
+ UWORD8 init_idx_flg = (ps_dec->u1_pr_sl_type
+ != ps_dec->ps_cur_slice->u1_slice_type);
+ if(ps_dec->u1_first_pb_nal_in_pic
+ || (init_idx_flg & !ps_dec->u1_sl_typ_5_9)
+ || ps_dec->u1_num_ref_idx_lx_active_prev
+ != ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0])
+ ih264d_init_ref_idx_lx_b(ps_dec);
+ if(ps_dec->u1_first_pb_nal_in_pic & ps_dec->u1_sl_typ_5_9)
+ ps_dec->u1_first_pb_nal_in_pic = 0;
+ }
+ /* Store the value for future slices in the same picture */
+ ps_dec->u1_num_ref_idx_lx_active_prev =
+ ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0];
+
+ u1_ref_idx_re_flag_lx = ih264d_get_bit_h264(ps_bitstrm);
+ COPYTHECONTEXT("SH: ref_pic_list_reordering_flag_l0",u1_ref_idx_re_flag_lx);
+
+ /* Modified temporarily */
+ if(u1_ref_idx_re_flag_lx)
+ {
+ WORD8 ret;
+ ps_dec->ps_ref_pic_buf_lx[0] = ps_dec->ps_dpb_mgr->ps_mod_dpb[0];
+ ret = ih264d_ref_idx_reordering(ps_dec, 0);
+ if(ret == -1)
+ return ERROR_REFIDX_ORDER_T;
+ }
+ else
+ ps_dec->ps_ref_pic_buf_lx[0] = ps_dec->ps_dpb_mgr->ps_init_dpb[0];
+
+ u1_ref_idx_re_flag_lx = ih264d_get_bit_h264(ps_bitstrm);
+ COPYTHECONTEXT("SH: ref_pic_list_reordering_flag_l1",u1_ref_idx_re_flag_lx);
+
+ /* Modified temporarily */
+ if(u1_ref_idx_re_flag_lx)
+ {
+ WORD8 ret;
+ ps_dec->ps_ref_pic_buf_lx[1] = ps_dec->ps_dpb_mgr->ps_mod_dpb[1];
+ ret = ih264d_ref_idx_reordering(ps_dec, 1);
+ if(ret == -1)
+ return ERROR_REFIDX_ORDER_T;
+ }
+ else
+ ps_dec->ps_ref_pic_buf_lx[1] = ps_dec->ps_dpb_mgr->ps_init_dpb[1];
+
+ /* Create refIdx to POC mapping */
+ {
+ void **ppv_map_ref_idx_to_poc_lx;
+ WORD8 idx;
+ struct pic_buffer_t *ps_pic;
+
+ ppv_map_ref_idx_to_poc_lx = ps_dec->ppv_map_ref_idx_to_poc + FRM_LIST_L0;
+ ppv_map_ref_idx_to_poc_lx[0] = 0;
+ ppv_map_ref_idx_to_poc_lx++;
+ for(idx = 0; idx < ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0];
+ idx++)
+ {
+ ps_pic = ps_dec->ps_ref_pic_buf_lx[0][idx];
+ ppv_map_ref_idx_to_poc_lx[idx] = (ps_pic->pu1_buf1);
+ }
+
+ ppv_map_ref_idx_to_poc_lx = ps_dec->ppv_map_ref_idx_to_poc + FRM_LIST_L1;
+
+ ppv_map_ref_idx_to_poc_lx[0] = 0;
+ ppv_map_ref_idx_to_poc_lx++;
+ for(idx = 0; idx < ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1];
+ idx++)
+ {
+ ps_pic = ps_dec->ps_ref_pic_buf_lx[1][idx];
+ ppv_map_ref_idx_to_poc_lx[idx] = (ps_pic->pu1_buf1);
+ }
+
+ if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
+ {
+ void **ppv_map_ref_idx_to_poc_lx_t, **ppv_map_ref_idx_to_poc_lx_b;
+
+ ppv_map_ref_idx_to_poc_lx_t = ps_dec->ppv_map_ref_idx_to_poc
+ + TOP_LIST_FLD_L0;
+ ppv_map_ref_idx_to_poc_lx_b = ps_dec->ppv_map_ref_idx_to_poc
+ + BOT_LIST_FLD_L0;
+
+ ppv_map_ref_idx_to_poc_lx_t[0] = 0;
+ ppv_map_ref_idx_to_poc_lx_t++;
+ ppv_map_ref_idx_to_poc_lx_b[0] = 0;
+ ppv_map_ref_idx_to_poc_lx_b++;
+ for(idx = 0; idx < ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0];
+ idx++)
+ {
+ ps_pic = ps_dec->ps_ref_pic_buf_lx[0][idx];
+ ppv_map_ref_idx_to_poc_lx_t[0] = (ps_pic->pu1_buf1);
+ ppv_map_ref_idx_to_poc_lx_b[1] = (ps_pic->pu1_buf1);
+
+ ppv_map_ref_idx_to_poc_lx_b[0] = (ps_pic->pu1_buf1) + 1;
+ ppv_map_ref_idx_to_poc_lx_t[1] = (ps_pic->pu1_buf1) + 1;
+
+ ppv_map_ref_idx_to_poc_lx_t += 2;
+ ppv_map_ref_idx_to_poc_lx_b += 2;
+ }
+
+ ppv_map_ref_idx_to_poc_lx_t = ps_dec->ppv_map_ref_idx_to_poc
+ + TOP_LIST_FLD_L1;
+ ppv_map_ref_idx_to_poc_lx_b = ps_dec->ppv_map_ref_idx_to_poc
+ + BOT_LIST_FLD_L1;
+
+ ppv_map_ref_idx_to_poc_lx_t[0] = 0;
+ ppv_map_ref_idx_to_poc_lx_t++;
+ ppv_map_ref_idx_to_poc_lx_b[0] = 0;
+ ppv_map_ref_idx_to_poc_lx_b++;
+ for(idx = 0; idx < ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1];
+ idx++)
+ {
+ UWORD8 u1_tmp_idx = idx << 1;
+ ps_pic = ps_dec->ps_ref_pic_buf_lx[1][idx];
+ ppv_map_ref_idx_to_poc_lx_t[u1_tmp_idx] = (ps_pic->pu1_buf1);
+ ppv_map_ref_idx_to_poc_lx_b[u1_tmp_idx + 1] = (ps_pic->pu1_buf1);
+
+ ppv_map_ref_idx_to_poc_lx_b[u1_tmp_idx] = (ps_pic->pu1_buf1) + 1;
+ ppv_map_ref_idx_to_poc_lx_t[u1_tmp_idx + 1] = (ps_pic->pu1_buf1) + 1;
+
+ }
+ }
+
+ if(ps_dec->u4_num_cores >= 3)
+ {
+ WORD32 num_entries;
+ WORD32 size;
+
+ num_entries = MIN(MAX_FRAMES, ps_dec->u4_num_ref_frames_at_init);
+ num_entries = 2 * ((2 * num_entries) + 1);
+
+ size = num_entries * sizeof(void *);
+ size += PAD_MAP_IDX_POC * sizeof(void *);
+
+ memcpy((void *)ps_dec->ps_parse_cur_slice->ppv_map_ref_idx_to_poc,
+ ps_dec->ppv_map_ref_idx_to_poc,
+ size);
+ }
+
+ }
+
+ if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag
+ && (ps_dec->ps_cur_slice->u1_field_pic_flag == 0))
+ {
+ ih264d_convert_frm_mbaff_list(ps_dec);
+ }
+
+ if(ps_pps->u1_wted_bipred_idc == 1)
+ {
+ ret = ih264d_parse_pred_weight_table(ps_slice, ps_bitstrm);
+ if(ret != OK)
+ return ret;
+ ih264d_form_pred_weight_matrix(ps_dec);
+ ps_dec->pu4_wt_ofsts = ps_dec->pu4_wts_ofsts_mat;
+ }
+ else if(ps_pps->u1_wted_bipred_idc == 2)
+ {
+ /* Implicit Weighted prediction */
+ ps_slice->u2_log2Y_crwd = 0x0505;
+ ps_dec->pu4_wt_ofsts = ps_dec->pu4_wts_ofsts_mat;
+ ih264d_get_implicit_weights(ps_dec);
+ }
+ else
+ ps_dec->ps_cur_slice->u2_log2Y_crwd = 0;
+
+ ps_dec->ps_parse_cur_slice->u2_log2Y_crwd =
+ ps_dec->ps_cur_slice->u2_log2Y_crwd;
+
+ /* G050 */
+ if(ps_slice->u1_nal_ref_idc != 0)
+ {
+ if(!ps_dec->ps_dpb_cmds->u1_dpb_commands_read)
+ ps_dec->u4_bitoffset = ih264d_read_mmco_commands(ps_dec);
+ else
+ ps_bitstrm->u4_ofst += ps_dec->u4_bitoffset;
+ }
+ /* G050 */
+
+ if(ps_pps->u1_entropy_coding_mode == CABAC)
+ {
+ u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if(u4_temp > MAX_CABAC_INIT_IDC)
+ {
+ return ERROR_INV_SLICE_HDR_T;
+ }
+ ps_slice->u1_cabac_init_idc = u4_temp;
+ COPYTHECONTEXT("SH: cabac_init_idc",ps_slice->u1_cabac_init_idc);
+ }
+
+ /* Read slice_qp_delta */
+ i_temp = ps_pps->u1_pic_init_qp
+ + ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if((i_temp < 0) || (i_temp > 51))
+ {
+ return ERROR_INV_RANGE_QP_T;
+ }
+ ps_slice->u1_slice_qp = i_temp;
+ COPYTHECONTEXT("SH: slice_qp_delta",
+ (WORD8)(ps_slice->u1_slice_qp - ps_pps->u1_pic_init_qp));
+
+ if(ps_pps->u1_deblocking_filter_parameters_present_flag == 1)
+ {
+ u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if(u4_temp > SLICE_BOUNDARY_DBLK_DISABLED)
+ {
+ return ERROR_INV_SLICE_HDR_T;
+ } COPYTHECONTEXT("SH: disable_deblocking_filter_idc", u4_temp);
+ ps_slice->u1_disable_dblk_filter_idc = u4_temp;
+ if(u4_temp != 1)
+ {
+ i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf)
+ << 1;
+ if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
+ {
+ return ERROR_INV_SLICE_HDR_T;
+ }
+ ps_slice->i1_slice_alpha_c0_offset = i_temp;
+ COPYTHECONTEXT("SH: slice_alpha_c0_offset_div2",
+ ps_slice->i1_slice_alpha_c0_offset >> 1);
+
+ i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf)
+ << 1;
+ if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
+ {
+ return ERROR_INV_SLICE_HDR_T;
+ }
+ ps_slice->i1_slice_beta_offset = i_temp;
+ COPYTHECONTEXT("SH: slice_beta_offset_div2",
+ ps_slice->i1_slice_beta_offset >> 1);
+
+ }
+ else
+ {
+ ps_slice->i1_slice_alpha_c0_offset = 0;
+ ps_slice->i1_slice_beta_offset = 0;
+ }
+ }
+ else
+ {
+ ps_slice->u1_disable_dblk_filter_idc = 0;
+ ps_slice->i1_slice_alpha_c0_offset = 0;
+ ps_slice->i1_slice_beta_offset = 0;
+ }
+
+
+ /*set slice header cone to 2 ,to indicate correct header*/
+ DATA_SYNC();
+
+ ps_dec->ps_parse_cur_slice->slice_header_done = 2;
+
+ if(ps_pps->u1_entropy_coding_mode)
+ {
+ SWITCHOFFTRACE; SWITCHONTRACECABAC;
+ ps_dec->pf_parse_inter_slice = ih264d_parse_inter_slice_data_cabac;
+ ps_dec->pf_parse_inter_mb = ih264d_parse_bmb_cabac;
+ ih264d_init_cabac_contexts(B_SLICE, ps_dec);
+
+ if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
+ ps_dec->pf_get_mb_info = ih264d_get_mb_info_cabac_mbaff;
+ else
+ ps_dec->pf_get_mb_info = ih264d_get_mb_info_cabac_nonmbaff;
+ }
+ else
+ {
+ SWITCHONTRACE; SWITCHOFFTRACECABAC;
+ ps_dec->pf_parse_inter_slice = ih264d_parse_inter_slice_data_cavlc;
+ ps_dec->pf_parse_inter_mb = ih264d_parse_bmb_cavlc;
+ if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
+ ps_dec->pf_get_mb_info = ih264d_get_mb_info_cavlc_mbaff;
+ else
+ ps_dec->pf_get_mb_info = ih264d_get_mb_info_cavlc_nonmbaff;
+ }
+
+ ret = ih264d_cal_col_pic(ps_dec);
+ if(ret != OK)
+ return ret;
+ ps_dec->u1_B = 1;
+ ps_dec->pf_mvpred_ref_tfr_nby2mb = ih264d_mv_pred_ref_tfr_nby2_bmb;
+ ret = ps_dec->pf_parse_inter_slice(ps_dec, ps_slice, u2_first_mb_in_slice);
+ if(ret != OK)
+ return ret;
+ return OK;
+}
+
diff --git a/decoder/ih264d_parse_cabac.c b/decoder/ih264d_parse_cabac.c
new file mode 100755
index 0000000..9d58f33
--- /dev/null
+++ b/decoder/ih264d_parse_cabac.c
@@ -0,0 +1,1607 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+ ***************************************************************************
+ * \file ih264d_parse_cabac.c
+ *
+ * \brief
+ * This file contains cabac Residual decoding routines.
+ *
+ * \date
+ * 20/03/2003
+ *
+ * \author NS
+ ***************************************************************************
+ */
+
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_defs.h"
+#include "ih264d_structs.h"
+
+#include "ih264d_cabac.h"
+#include "ih264d_bitstrm.h"
+#include "ih264d_parse_mb_header.h"
+#include "ih264d_debug.h"
+#include "ih264d_tables.h"
+#include "ih264d_error_handler.h"
+#include "ih264d_parse_cabac.h"
+#include "ih264d_parse_slice.h"
+#include "ih264d_tables.h"
+#include "ih264d_mb_utils.h"
+#include "ih264d_utils.h"
+
+/*!
+ ********************************************************************************
+ * \if Function name : ih264d_read_coeff4x4_cabac \endif
+ *
+ * \brief This function encodes residual_block_cabac as defined in 7.3.5.3.2.
+ *
+ * \return
+ * Returns the index of last significant coeff.
+ *
+ ********************************************************************************
+ */
+
+UWORD8 ih264d_read_coeff4x4_cabac(dec_bit_stream_t *ps_bitstrm,
+ UWORD32 u4_ctxcat,
+ bin_ctxt_model_t *ps_ctxt_sig_coeff,
+ dec_struct_t *ps_dec, /*!< pointer to access global variables*/
+ bin_ctxt_model_t *ps_ctxt_coded)
+{
+
+ decoding_envirnoment_t *ps_cab_env = &ps_dec->s_cab_dec_env;
+ UWORD32 u4_coded_flag;
+ UWORD32 u4_offset, *pu4_buffer;
+ UWORD32 u4_code_int_range, u4_code_int_val_ofst;
+ tu_sblk4x4_coeff_data_t *ps_tu_4x4;
+ WORD16 *pi2_coeff_data;
+ WORD32 num_sig_coeffs = 0;
+
+ /*loading from strcuctures*/
+
+ ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
+ ps_tu_4x4->u2_sig_coeff_map = 0;
+ pi2_coeff_data = &ps_tu_4x4->ai2_level[0];
+
+ u4_offset = ps_bitstrm->u4_ofst;
+ pu4_buffer = ps_bitstrm->pu4_buffer;
+
+ u4_code_int_range = ps_cab_env->u4_code_int_range;
+ u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
+
+ {
+
+ /*inilined DecodeDecision_onebin begins*/
+
+ {
+
+ UWORD32 u4_qnt_int_range, u4_int_range_lps;
+ UWORD32 u4_symbol, u1_mps_state;
+
+ UWORD32 table_lookup;
+ const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table;
+ UWORD32 u4_clz;
+
+ u1_mps_state = (ps_ctxt_coded->u1_mps_state);
+ u4_clz = CLZ(u4_code_int_range);
+ u4_qnt_int_range = u4_code_int_range << u4_clz;
+ u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3;
+ table_lookup =
+ pu4_table[(u1_mps_state << 2) + u4_qnt_int_range];
+ u4_int_range_lps = table_lookup & 0xff;
+ u4_int_range_lps = u4_int_range_lps << (23 - u4_clz);
+ u4_code_int_range = u4_code_int_range - u4_int_range_lps;
+ u4_symbol = ((u1_mps_state >> 6) & 0x1);
+ u1_mps_state = (table_lookup >> 8) & 0x7F;
+
+ CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, u4_symbol,
+ u4_int_range_lps, u1_mps_state, table_lookup)
+
+ if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8)
+ {
+
+ RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst,
+ u4_offset, pu4_buffer)
+ }
+
+ ps_ctxt_coded->u1_mps_state = u1_mps_state;
+ u4_coded_flag = u4_symbol;
+
+ /*inilined DecodeDecision_onebin ends*/
+
+ }
+
+ }
+
+ if(u4_coded_flag)
+ {
+
+ {
+ bin_ctxt_model_t *p_binCtxt_last, *p_binCtxt_last_org;
+ UWORD32 uc_last_coeff_idx;
+ UWORD32 uc_bin;
+ UWORD32 i;
+ WORD32 first_coeff_offset = 0;
+
+ if((u4_ctxcat == CHROMA_AC_CTXCAT) || (u4_ctxcat == LUMA_AC_CTXCAT))
+ {
+ first_coeff_offset = 1;
+ }
+
+ i = 0;
+ if(u4_ctxcat == CHROMA_DC_CTXCAT)
+ {
+ uc_last_coeff_idx = 3;
+ }
+ else
+ {
+ UWORD32 u4_start;
+ u4_start = (u4_ctxcat & 1) + (u4_ctxcat >> 2);
+ uc_last_coeff_idx = 15 - u4_start;
+ }
+ p_binCtxt_last_org = ps_ctxt_sig_coeff
+ + LAST_COEFF_CTXT_MINUS_SIG_COEFF_CTXT;
+
+ do
+ {
+
+ /*inilined DecodeDecision_onebin begins*/
+ {
+
+ UWORD32 u4_qnt_int_range, u4_int_range_lps;
+ UWORD32 u4_symbol, u1_mps_state;
+ UWORD32 table_lookup;
+ const UWORD32 *pu4_table =
+ (const UWORD32 *)ps_cab_env->cabac_table;
+ UWORD32 u4_clz;
+
+ u1_mps_state = (ps_ctxt_sig_coeff->u1_mps_state);
+
+ u4_clz = CLZ(u4_code_int_range);
+
+ u4_qnt_int_range = u4_code_int_range << u4_clz;
+ u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3;
+
+ table_lookup = pu4_table[(u1_mps_state << 2)
+ + u4_qnt_int_range];
+
+ u4_int_range_lps = table_lookup & 0xff;
+
+ u4_int_range_lps = u4_int_range_lps << (23 - u4_clz);
+ u4_code_int_range = u4_code_int_range - u4_int_range_lps;
+ u4_symbol = ((u1_mps_state >> 6) & 0x1);
+ u1_mps_state = (table_lookup >> 8) & 0x7F;
+
+ CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst,
+ u4_symbol, u4_int_range_lps, u1_mps_state,
+ table_lookup)
+
+ if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_14)
+ {
+
+ UWORD32 read_bits, u4_clz;
+ u4_clz = CLZ(u4_code_int_range);
+ NEXTBITS(read_bits, (u4_offset + 23), pu4_buffer,
+ u4_clz)
+ FLUSHBITS(u4_offset, (u4_clz))
+ u4_code_int_range = u4_code_int_range << u4_clz;
+ u4_code_int_val_ofst = (u4_code_int_val_ofst << u4_clz)
+ | read_bits;
+ }
+
+ INC_BIN_COUNT(
+ ps_cab_env)
+
+ ps_ctxt_sig_coeff->u1_mps_state = u1_mps_state;
+ uc_bin = u4_symbol;
+
+ }
+ /*incrementing pointer to point to the context of the next bin*/
+ ps_ctxt_sig_coeff++;
+
+ /*inilined DecodeDecision_onebin ends*/
+
+ if(uc_bin)
+ {
+ num_sig_coeffs++;
+ SET_BIT(ps_tu_4x4->u2_sig_coeff_map, (i + first_coeff_offset));
+
+ p_binCtxt_last = p_binCtxt_last_org + i;
+
+ /*inilined DecodeDecision_onebin begins*/
+
+ {
+
+ UWORD32 u4_qnt_int_range, u4_int_range_lps;
+ UWORD32 u4_symbol, u1_mps_state;
+ UWORD32 table_lookup;
+ const UWORD32 *pu4_table =
+ (const UWORD32 *)ps_cab_env->cabac_table;
+ UWORD32 u4_clz;
+
+ u1_mps_state = (p_binCtxt_last->u1_mps_state);
+
+ u4_clz = CLZ(u4_code_int_range);
+ u4_qnt_int_range = u4_code_int_range << u4_clz;
+ u4_qnt_int_range = (u4_qnt_int_range >> 29)
+ & 0x3;
+
+ table_lookup = pu4_table[(u1_mps_state << 2)
+ + u4_qnt_int_range];
+ u4_int_range_lps = table_lookup & 0xff;
+
+ u4_int_range_lps = u4_int_range_lps
+ << (23 - u4_clz);
+
+ u4_code_int_range = u4_code_int_range
+ - u4_int_range_lps;
+ u4_symbol = ((u1_mps_state >> 6) & 0x1);
+ u1_mps_state = (table_lookup >> 8) & 0x7F;
+
+ CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst,
+ u4_symbol, u4_int_range_lps,
+ u1_mps_state, table_lookup)
+
+ INC_BIN_COUNT(ps_cab_env)
+
+ p_binCtxt_last->u1_mps_state = u1_mps_state;
+ uc_bin = u4_symbol;
+
+ }
+
+ /*inilined DecodeDecision_onebin ends*/
+ if(uc_bin == 1)
+ goto label_read_levels;
+
+ }
+
+ i = i + 1;
+
+ }
+ while(i < uc_last_coeff_idx);
+
+ num_sig_coeffs++;
+ SET_BIT(ps_tu_4x4->u2_sig_coeff_map, (i + first_coeff_offset));
+
+ label_read_levels: ;
+
+ }
+
+ /// VALUE of No of Coeff in BLOCK = i + 1 for second case else i;
+
+ /* Decode coeff_abs_level_minus1 and coeff_sign_flag */
+ {
+
+ WORD32 i2_abs_lvl;
+ UWORD32 u1_abs_level_equal1 = 1, u1_abs_level_gt1 = 0;
+
+ UWORD32 u4_ctx_inc;
+ UWORD32 ui_prefix;
+ bin_ctxt_model_t *p_ctxt_abs_level;
+
+
+ p_ctxt_abs_level = ps_dec->p_coeff_abs_level_minus1_t[u4_ctxcat];
+ u4_ctx_inc = ((0x51));
+
+ /*****************************************************/
+ /* Main Loop runs for no. of Significant coefficient */
+ /*****************************************************/
+
+
+ do
+ {
+
+ {
+ INC_SYM_COUNT(&(ps_dec.s_cab_dec_env));
+
+ /*****************************************************/
+ /* inilining a modified ih264d_decode_bins_unary */
+ /*****************************************************/
+
+ {
+ UWORD32 u4_value;
+ UWORD32 u4_symbol;
+ bin_ctxt_model_t *ps_bin_ctxt;
+ UWORD32 u4_ctx_Inc;
+
+ u4_value = 0;
+
+ u4_ctx_Inc = u4_ctx_inc & 0xf;
+ ps_bin_ctxt = p_ctxt_abs_level + u4_ctx_Inc;
+
+ do
+ {
+
+ {
+
+ UWORD32 u4_qnt_int_range,
+ u4_int_range_lps;
+ UWORD32 u1_mps_state;
+ UWORD32 table_lookup;
+ const UWORD32 *pu4_table =
+ (const UWORD32 *)ps_cab_env->cabac_table;
+ UWORD32 u4_clz;
+
+ u1_mps_state = (ps_bin_ctxt->u1_mps_state);
+ u4_clz = CLZ(u4_code_int_range);
+ u4_qnt_int_range = u4_code_int_range
+ << u4_clz;
+ u4_qnt_int_range = (u4_qnt_int_range
+ >> 29) & 0x3;
+ table_lookup = pu4_table[(u1_mps_state << 2)
+ + u4_qnt_int_range];
+ u4_int_range_lps = table_lookup & 0xff;
+
+ u4_int_range_lps = u4_int_range_lps
+ << (23 - u4_clz);
+ u4_code_int_range = u4_code_int_range
+ - u4_int_range_lps;
+ u4_symbol = ((u1_mps_state >> 6) & 0x1);
+ u1_mps_state = (table_lookup >> 8) & 0x7F;
+
+ CHECK_IF_LPS(u4_code_int_range,
+ u4_code_int_val_ofst, u4_symbol,
+ u4_int_range_lps, u1_mps_state,
+ table_lookup)
+
+ if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9)
+ {
+
+ RENORM_RANGE_OFFSET(u4_code_int_range,
+ u4_code_int_val_ofst,
+ u4_offset, pu4_buffer)
+ }
+
+ INC_BIN_COUNT(ps_cab_env);
+
+ ps_bin_ctxt->u1_mps_state = u1_mps_state;
+ }
+
+ INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env);
+
+ u4_value++;
+ ps_bin_ctxt = p_ctxt_abs_level + (u4_ctx_inc >> 4);
+
+ }
+ while(u4_symbol && (u4_value < UCOFF_LEVEL));
+
+ ui_prefix = u4_value - 1 + u4_symbol;
+
+ }
+
+ if(ui_prefix == UCOFF_LEVEL)
+ {
+ UWORD32 ui16_sufS = 0;
+ UWORD32 u1_max_bins;
+ UWORD32 u4_value;
+
+ i2_abs_lvl = UCOFF_LEVEL;
+ /*inlining ih264d_decode_bypass_bins_unary begins*/
+
+ {
+ UWORD32 uc_bin;
+ UWORD32 bits_to_flush;
+ UWORD32 max_bits = 32;
+
+ bits_to_flush = 0;
+ /*renormalize to ensure there 23 bits more in the u4_code_int_val_ofst*/
+ {
+ UWORD32 u4_clz, read_bits;
+
+ u4_clz = CLZ(u4_code_int_range);
+ FLUSHBITS(u4_offset, u4_clz)
+ NEXTBITS(read_bits, u4_offset, pu4_buffer, 23)
+ u4_code_int_range = u4_code_int_range << u4_clz;
+ u4_code_int_val_ofst = (u4_code_int_val_ofst
+ << u4_clz) | read_bits;
+
+ }
+
+ do
+ {
+ bits_to_flush++;
+
+ u4_code_int_range = u4_code_int_range >> 1;
+
+ if(u4_code_int_val_ofst >= u4_code_int_range)
+ {
+ /* S=1 */
+ uc_bin = 1;
+ u4_code_int_val_ofst -= u4_code_int_range;
+ }
+ else
+ {
+ /* S=0 */
+ uc_bin = 0;
+ }
+
+ INC_BIN_COUNT(
+ ps_cab_env);INC_BYPASS_BINS(ps_cab_env);
+
+ }
+ while(uc_bin && (bits_to_flush < max_bits));
+
+ u4_value = (bits_to_flush - 1);
+
+ }
+ /*inlining ih264d_decode_bypass_bins_unary ends*/
+
+ ui16_sufS = (1 << u4_value);
+ u1_max_bins = u4_value;
+
+ if(u4_value > 0)
+ {
+
+ /*inline bypassbins_flc begins*/
+
+ if(u4_value > 10)
+ {
+ UWORD32 u4_clz, read_bits;
+
+ u4_clz = CLZ(u4_code_int_range);
+ FLUSHBITS(u4_offset, u4_clz)
+ NEXTBITS(read_bits, u4_offset, pu4_buffer, 23)
+ u4_code_int_range = u4_code_int_range << u4_clz;
+ u4_code_int_val_ofst = (u4_code_int_val_ofst
+ << u4_clz) | read_bits;
+ }
+
+ {
+ UWORD32 ui_bins;
+ UWORD32 uc_bin;
+ UWORD32 bits_to_flush;
+
+ ui_bins = 0;
+ bits_to_flush = 0;
+
+ do
+ {
+ bits_to_flush++;
+
+ u4_code_int_range = u4_code_int_range >> 1;
+
+ if(u4_code_int_val_ofst
+ >= u4_code_int_range)
+ {
+ /* S=1 */
+ uc_bin = 1;
+ u4_code_int_val_ofst -=
+ u4_code_int_range;
+ }
+ else
+ {
+ /* S=0 */
+ uc_bin = 0;
+ }
+
+ INC_BIN_COUNT(
+ ps_cab_env);INC_BYPASS_BINS(ps_cab_env);
+
+ ui_bins = ((ui_bins << 1) | uc_bin);
+
+ }
+ while(bits_to_flush < u1_max_bins);
+
+ u4_value = ui_bins;
+ }
+
+ /*inline bypassbins_flc ends*/
+
+ }
+
+ //Value of K
+ ui16_sufS += u4_value;
+ i2_abs_lvl += ui16_sufS;
+
+ }
+ else
+ i2_abs_lvl = 1 + ui_prefix;
+
+ if(i2_abs_lvl > 1)
+ {
+ u1_abs_level_gt1++;
+ }
+ if(!u1_abs_level_gt1)
+ {
+ u1_abs_level_equal1++;
+ u4_ctx_inc = (5 << 4) + MIN(u1_abs_level_equal1, 4);
+ }
+ else
+ u4_ctx_inc = (5 + MIN(u1_abs_level_gt1, 4)) << 4;
+
+ /*u4_ctx_inc = g_table_temp[u1_abs_level_gt1][u1_abs_level_equal1];*/
+
+ /* encode coeff_sign_flag[i] */
+
+ {
+ u4_code_int_range = u4_code_int_range >> 1;
+
+ if(u4_code_int_val_ofst >= (u4_code_int_range))
+ {
+ /* S=1 */
+ u4_code_int_val_ofst -= u4_code_int_range;
+ i2_abs_lvl = (-i2_abs_lvl);
+ }
+
+ }
+ num_sig_coeffs--;
+ *pi2_coeff_data++ = i2_abs_lvl;
+ }
+ }
+ while(num_sig_coeffs > 0);
+ }
+ }
+
+ if(u4_coded_flag)
+ {
+ WORD32 offset;
+ offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_4x4;
+ offset = ALIGN4(offset);
+ ps_dec->pv_parse_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_parse_tu_coeff_data + offset);
+ }
+
+
+ /*updating structures*/
+ ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
+ ps_cab_env->u4_code_int_range = u4_code_int_range;
+ ps_bitstrm->u4_ofst = u4_offset;
+ return (u4_coded_flag);
+}
+/*!
+ ********************************************************************************
+ * \if Function name : ih264d_read_coeff8x8_cabac \endif
+ *
+ * \brief This function encodes residual_block_cabac as defined in 7.3.5.3.2.
+ when transform_8x8_flag = 1
+ *
+ * \return
+ * Returns the index of last significant coeff.
+ *
+ ********************************************************************************
+ */
+
+void ih264d_read_coeff8x8_cabac(dec_bit_stream_t *ps_bitstrm,
+ dec_struct_t *ps_dec, /*!< pointer to access global variables*/
+ dec_mb_info_t *ps_cur_mb_info)
+{
+ decoding_envirnoment_t *ps_cab_env = &ps_dec->s_cab_dec_env;
+ UWORD32 u4_offset, *pu4_buffer;
+ UWORD32 u4_code_int_range, u4_code_int_val_ofst;
+
+ /* High profile related declarations */
+ UWORD8 u1_field_coding_flag = ps_cur_mb_info->ps_curmb->u1_mb_fld;
+ const UWORD8 *pu1_lastcoeff_context_inc =
+ (UWORD8 *)gau1_ih264d_lastcoeff_context_inc;
+ const UWORD8 *pu1_sigcoeff_context_inc;
+ bin_ctxt_model_t *ps_ctxt_sig_coeff;
+ WORD32 num_sig_coeffs = 0;
+ tu_blk8x8_coeff_data_t *ps_tu_8x8;
+ WORD16 *pi2_coeff_data;
+
+ /*loading from strcuctures*/
+
+ ps_tu_8x8 = (tu_blk8x8_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
+ ps_tu_8x8->au4_sig_coeff_map[0] = 0;
+ ps_tu_8x8->au4_sig_coeff_map[1] = 0;
+ pi2_coeff_data = &ps_tu_8x8->ai2_level[0];
+
+
+ if(!u1_field_coding_flag)
+ {
+ pu1_sigcoeff_context_inc =
+ (UWORD8 *)gau1_ih264d_sigcoeff_context_inc_frame;
+
+ /*******************************************************************/
+ /* last coefficient context is derived from significant coeff u4_flag */
+ /* only significant coefficient matrix need to be initialized */
+ /*******************************************************************/
+ ps_ctxt_sig_coeff = ps_dec->s_high_profile.ps_sigcoeff_8x8_frame;
+ }
+ else
+ {
+ pu1_sigcoeff_context_inc =
+ (UWORD8 *)gau1_ih264d_sigcoeff_context_inc_field;
+
+ /*******************************************************************/
+ /* last coefficient context is derived from significant coeff u4_flag */
+ /* only significant coefficient matrix need to be initialized */
+ /*******************************************************************/
+ ps_ctxt_sig_coeff = ps_dec->s_high_profile.ps_sigcoeff_8x8_field;
+ }
+
+ /*loading from strcuctures*/
+
+ u4_offset = ps_bitstrm->u4_ofst;
+ pu4_buffer = ps_bitstrm->pu4_buffer;
+
+ u4_code_int_range = ps_cab_env->u4_code_int_range;
+ u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
+
+ {
+ {
+ bin_ctxt_model_t *p_binCtxt_last, *p_binCtxt_last_org,
+ *p_ctxt_sig_coeff_org;
+ UWORD32 uc_last_coeff_idx;
+ UWORD32 uc_bin;
+ UWORD32 i;
+
+ i = 0;
+
+ uc_last_coeff_idx = 63;
+
+ p_binCtxt_last_org = ps_ctxt_sig_coeff
+ + LAST_COEFF_CTXT_MINUS_SIG_COEFF_CTXT_8X8;
+
+ p_ctxt_sig_coeff_org = ps_ctxt_sig_coeff;
+
+ do
+ {
+ /*inilined DecodeDecision_onebin begins*/
+ {
+ UWORD32 u4_qnt_int_range, u4_int_range_lps;
+ UWORD32 u4_symbol, u1_mps_state;
+ UWORD32 table_lookup;
+ const UWORD32 *pu4_table =
+ (const UWORD32 *)ps_cab_env->cabac_table;
+ UWORD32 u4_clz;
+
+ u1_mps_state = (ps_ctxt_sig_coeff->u1_mps_state);
+
+ u4_clz = CLZ(u4_code_int_range);
+
+ u4_qnt_int_range = u4_code_int_range << u4_clz;
+ u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3;
+
+ table_lookup = pu4_table[(u1_mps_state << 2)
+ + u4_qnt_int_range];
+
+ u4_int_range_lps = table_lookup & 0xff;
+
+ u4_int_range_lps = u4_int_range_lps << (23 - u4_clz);
+ u4_code_int_range = u4_code_int_range - u4_int_range_lps;
+ u4_symbol = ((u1_mps_state >> 6) & 0x1);
+ u1_mps_state = (table_lookup >> 8) & 0x7F;
+
+ CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst,
+ u4_symbol, u4_int_range_lps, u1_mps_state,
+ table_lookup)
+
+ if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_14)
+ {
+ UWORD32 read_bits, u4_clz;
+ u4_clz = CLZ(u4_code_int_range);
+ NEXTBITS(read_bits, (u4_offset + 23), pu4_buffer,
+ u4_clz)
+ FLUSHBITS(u4_offset, (u4_clz))
+ u4_code_int_range = u4_code_int_range << u4_clz;
+ u4_code_int_val_ofst = (u4_code_int_val_ofst << u4_clz)
+ | read_bits;
+ }
+
+ ps_ctxt_sig_coeff->u1_mps_state = u1_mps_state;
+ uc_bin = u4_symbol;
+ }
+ /*incrementing pointer to point to the context of the next bin*/
+ ps_ctxt_sig_coeff = p_ctxt_sig_coeff_org
+ + pu1_sigcoeff_context_inc[i + 1];
+
+ /*inilined DecodeDecision_onebin ends*/
+ if(uc_bin)
+ {
+ num_sig_coeffs++;
+ SET_BIT(ps_tu_8x8->au4_sig_coeff_map[i>31], (i > 31 ? i - 32:i));
+
+ p_binCtxt_last = p_binCtxt_last_org
+ + pu1_lastcoeff_context_inc[i];
+
+ /*inilined DecodeDecision_onebin begins*/
+
+ {
+ UWORD32 u4_qnt_int_range, u4_int_range_lps;
+ UWORD32 u4_symbol, u1_mps_state;
+ UWORD32 table_lookup;
+ const UWORD32 *pu4_table =
+ (const UWORD32 *)ps_cab_env->cabac_table;
+ UWORD32 u4_clz;
+
+ u1_mps_state = (p_binCtxt_last->u1_mps_state);
+
+ u4_clz = CLZ(u4_code_int_range);
+ u4_qnt_int_range = u4_code_int_range << u4_clz;
+ u4_qnt_int_range = (u4_qnt_int_range >> 29)
+ & 0x3;
+
+ table_lookup = pu4_table[(u1_mps_state << 2)
+ + u4_qnt_int_range];
+ u4_int_range_lps = table_lookup & 0xff;
+
+ u4_int_range_lps = u4_int_range_lps
+ << (23 - u4_clz);
+
+ u4_code_int_range = u4_code_int_range
+ - u4_int_range_lps;
+ u4_symbol = ((u1_mps_state >> 6) & 0x1);
+ u1_mps_state = (table_lookup >> 8) & 0x7F;
+
+ CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst,
+ u4_symbol, u4_int_range_lps,
+ u1_mps_state, table_lookup)
+
+ p_binCtxt_last->u1_mps_state = u1_mps_state;
+ uc_bin = u4_symbol;
+ }
+
+ /*inilined DecodeDecision_onebin ends*/
+ if(uc_bin == 1)
+ goto label_read_levels;
+
+ }
+
+ i = i + 1;
+
+ }
+ while(i < uc_last_coeff_idx);
+
+ num_sig_coeffs++;
+ SET_BIT(ps_tu_8x8->au4_sig_coeff_map[i>31], (i > 31 ? i - 32:i));
+
+ label_read_levels: ;
+ }
+
+ /// VALUE of No of Coeff in BLOCK = i + 1 for second case else i;
+
+ /* Decode coeff_abs_level_minus1 and coeff_sign_flag */
+ {
+ WORD32 i2_abs_lvl;
+ UWORD32 u1_abs_level_equal1 = 1, u1_abs_level_gt1 = 0;
+
+ UWORD32 u4_ctx_inc;
+ UWORD32 ui_prefix;
+ bin_ctxt_model_t *p_ctxt_abs_level;
+
+ p_ctxt_abs_level =
+ ps_dec->p_coeff_abs_level_minus1_t[LUMA_8X8_CTXCAT];
+ u4_ctx_inc = ((0x51));
+
+ /*****************************************************/
+ /* Main Loop runs for no. of Significant coefficient */
+ /*****************************************************/
+ do
+ {
+ {
+
+ /*****************************************************/
+ /* inilining a modified ih264d_decode_bins_unary */
+ /*****************************************************/
+
+ {
+ UWORD32 u4_value;
+ UWORD32 u4_symbol;
+ bin_ctxt_model_t *ps_bin_ctxt;
+ UWORD32 u4_ctx_Inc;
+ u4_value = 0;
+
+ u4_ctx_Inc = u4_ctx_inc & 0xf;
+ ps_bin_ctxt = p_ctxt_abs_level + u4_ctx_Inc;
+
+ do
+ {
+ {
+ UWORD32 u4_qnt_int_range,
+ u4_int_range_lps;
+ UWORD32 u1_mps_state;
+ UWORD32 table_lookup;
+ const UWORD32 *pu4_table =
+ (const UWORD32 *)ps_cab_env->cabac_table;
+ UWORD32 u4_clz;
+
+ u1_mps_state = (ps_bin_ctxt->u1_mps_state);
+ u4_clz = CLZ(u4_code_int_range);
+ u4_qnt_int_range = u4_code_int_range
+ << u4_clz;
+ u4_qnt_int_range = (u4_qnt_int_range
+ >> 29) & 0x3;
+ table_lookup = pu4_table[(u1_mps_state << 2)
+ + u4_qnt_int_range];
+ u4_int_range_lps = table_lookup & 0xff;
+
+ u4_int_range_lps = u4_int_range_lps
+ << (23 - u4_clz);
+ u4_code_int_range = u4_code_int_range
+ - u4_int_range_lps;
+ u4_symbol = ((u1_mps_state >> 6) & 0x1);
+ u1_mps_state = (table_lookup >> 8) & 0x7F;
+
+ CHECK_IF_LPS(u4_code_int_range,
+ u4_code_int_val_ofst, u4_symbol,
+ u4_int_range_lps, u1_mps_state,
+ table_lookup)
+
+ if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9)
+ {
+
+ RENORM_RANGE_OFFSET(u4_code_int_range,
+ u4_code_int_val_ofst,
+ u4_offset, pu4_buffer)
+ }
+
+ ps_bin_ctxt->u1_mps_state = u1_mps_state;
+ }
+
+ u4_value++;
+ ps_bin_ctxt = p_ctxt_abs_level + (u4_ctx_inc >> 4);
+
+ }
+ while(u4_symbol && (u4_value < UCOFF_LEVEL));
+
+ ui_prefix = u4_value - 1 + u4_symbol;
+ }
+
+ if(ui_prefix == UCOFF_LEVEL)
+ {
+ UWORD32 ui16_sufS = 0;
+ UWORD32 u1_max_bins;
+ UWORD32 u4_value;
+
+ i2_abs_lvl = UCOFF_LEVEL;
+ /*inlining ih264d_decode_bypass_bins_unary begins*/
+
+ {
+ UWORD32 uc_bin;
+ UWORD32 bits_to_flush;
+ UWORD32 max_bits = 32;
+
+ bits_to_flush = 0;
+ /*renormalize to ensure there 23 bits more in the u4_code_int_val_ofst*/
+ {
+ UWORD32 u4_clz, read_bits;
+
+ u4_clz = CLZ(u4_code_int_range);
+ FLUSHBITS(u4_offset, u4_clz)
+ NEXTBITS(read_bits, u4_offset, pu4_buffer, 23)
+ u4_code_int_range = u4_code_int_range << u4_clz;
+ u4_code_int_val_ofst = (u4_code_int_val_ofst
+ << u4_clz) | read_bits;
+ }
+
+ do
+ {
+ bits_to_flush++;
+
+ u4_code_int_range = u4_code_int_range >> 1;
+
+ if(u4_code_int_val_ofst >= u4_code_int_range)
+ {
+ /* S=1 */
+ uc_bin = 1;
+ u4_code_int_val_ofst -= u4_code_int_range;
+ }
+ else
+ {
+ /* S=0 */
+ uc_bin = 0;
+ }
+
+ }
+ while(uc_bin && (bits_to_flush < max_bits));
+
+ u4_value = (bits_to_flush - 1);
+ }
+ /*inlining ih264d_decode_bypass_bins_unary ends*/
+
+ ui16_sufS = (1 << u4_value);
+ u1_max_bins = u4_value;
+
+ if(u4_value > 0)
+ {
+ /*inline bypassbins_flc begins*/
+
+ if(u4_value > 10)
+ {
+ UWORD32 u4_clz, read_bits;
+
+ u4_clz = CLZ(u4_code_int_range);
+ FLUSHBITS(u4_offset, u4_clz)
+ NEXTBITS(read_bits, u4_offset, pu4_buffer, 23)
+ u4_code_int_range = u4_code_int_range << u4_clz;
+ u4_code_int_val_ofst = (u4_code_int_val_ofst
+ << u4_clz) | read_bits;
+ }
+
+ {
+ UWORD32 ui_bins;
+ UWORD32 uc_bin;
+ UWORD32 bits_to_flush;
+
+ ui_bins = 0;
+ bits_to_flush = 0;
+
+ do
+ {
+ bits_to_flush++;
+
+ u4_code_int_range = u4_code_int_range >> 1;
+
+ if(u4_code_int_val_ofst
+ >= u4_code_int_range)
+ {
+ /* S=1 */
+ uc_bin = 1;
+ u4_code_int_val_ofst -=
+ u4_code_int_range;
+ }
+ else
+ {
+ /* S=0 */
+ uc_bin = 0;
+ }
+
+ ui_bins = ((ui_bins << 1) | uc_bin);
+
+ }
+ while(bits_to_flush < u1_max_bins);
+
+ u4_value = ui_bins;
+ }
+ /*inline bypassbins_flc ends*/
+ }
+
+ //Value of K
+ ui16_sufS += u4_value;
+ i2_abs_lvl += ui16_sufS;
+ }
+ else
+ {
+ i2_abs_lvl = 1 + ui_prefix;
+ }
+
+ if(i2_abs_lvl > 1)
+ {
+ u1_abs_level_gt1++;
+ }
+ if(!u1_abs_level_gt1)
+ {
+ u1_abs_level_equal1++;
+ u4_ctx_inc = (5 << 4) + MIN(u1_abs_level_equal1, 4);
+ }
+ else
+ {
+ u4_ctx_inc = (5 + MIN(u1_abs_level_gt1, 4)) << 4;
+ }
+
+ /*u4_ctx_inc = g_table_temp[u1_abs_level_gt1][u1_abs_level_equal1];*/
+
+ /* encode coeff_sign_flag[i] */
+
+ {
+ u4_code_int_range = u4_code_int_range >> 1;
+
+ if(u4_code_int_val_ofst >= (u4_code_int_range))
+ {
+ /* S=1 */
+ u4_code_int_val_ofst -= u4_code_int_range;
+ i2_abs_lvl = (-i2_abs_lvl);
+ }
+ }
+
+ *pi2_coeff_data++ = i2_abs_lvl;
+ num_sig_coeffs--;
+ }
+ }
+ while(num_sig_coeffs > 0);
+ }
+ }
+
+ {
+ WORD32 offset;
+ offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_8x8;
+ offset = ALIGN4(offset);
+ ps_dec->pv_parse_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_parse_tu_coeff_data + offset);
+ }
+
+ /*updating structures*/
+ ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
+ ps_cab_env->u4_code_int_range = u4_code_int_range;
+ ps_bitstrm->u4_ofst = u4_offset;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_cabac_parse_8x8block */
+/* */
+/* Description : This function does the residual parsing of 4 subblocks */
+/* in a 8x8 block. */
+/* */
+/* Inputs : pi2_coeff_block : pointer to residual block where */
+/* decoded and inverse scan coefficients are updated */
+/* */
+/* u4_sub_block_strd : indicates the number of sublocks */
+/* in a row. It is 4 for luma and 2 for chroma. */
+/* */
+/* u4_ctx_cat : inidicates context category for residual */
+/* decoding. */
+/* */
+/* ps_dec : pointer to Decstruct (decoder context) */
+/* */
+/* pu1_top_nnz : top nnz pointer */
+/* */
+/* pu1_left_nnz : left nnz pointer */
+/* */
+/* Globals : No */
+/* Processing : Parsing for four subblocks in unrolled, top and left nnz */
+/* are updated on the fly. csbp is set in accordance to */
+/* decoded numcoeff for the subblock index in raster order */
+/* */
+/* Outputs : The updated residue buffer, nnzs and csbp current block */
+/* */
+/* Returns : Returns the coded sub block pattern csbp for the block */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 09 10 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+UWORD32 ih264d_cabac_parse_8x8block(WORD16 *pi2_coeff_block,
+ UWORD32 u4_sub_block_strd,
+ UWORD32 u4_ctx_cat,
+ dec_struct_t * ps_dec,
+ UWORD8 *pu1_top_nnz,
+ UWORD8 *pu1_left_nnz)
+{
+ UWORD32 u4_ctxinc, u4_subblock_coded;
+ UWORD32 u4_top0, u4_top1;
+ UWORD32 u4_csbp = 0;
+ UWORD32 u4_idx = 0;
+ dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
+ bin_ctxt_model_t * const ps_cbf = ps_dec->p_cbf_t[u4_ctx_cat];
+ bin_ctxt_model_t *ps_src_bin_ctxt;
+ bin_ctxt_model_t * const ps_sig_coeff_flag =
+ ps_dec->p_significant_coeff_flag_t[u4_ctx_cat];
+
+ UWORD8 *pu1_inv_scan = ps_dec->pu1_inv_scan;
+
+ /*------------------------------------------------------*/
+ /* Residual 4x4 decoding: SubBlock 0 */
+ /*------------------------------------------------------*/
+ u4_ctxinc = ((!!pu1_top_nnz[0]) << 1) + (!!pu1_left_nnz[0]);
+
+ ps_src_bin_ctxt = ps_cbf + u4_ctxinc;
+
+ u4_top0 = ih264d_read_coeff4x4_cabac( ps_bitstrm,
+ u4_ctx_cat, ps_sig_coeff_flag, ps_dec,
+ ps_src_bin_ctxt);
+
+ INSERT_BIT(u4_csbp, u4_idx, u4_top0);
+
+ /*------------------------------------------------------*/
+ /* Residual 4x4 decoding: SubBlock 1 */
+ /*------------------------------------------------------*/
+ u4_idx++;
+ pi2_coeff_block += NUM_COEFFS_IN_4x4BLK;
+ u4_ctxinc = ((!!pu1_top_nnz[1]) << 1) + u4_top0;
+
+ ps_src_bin_ctxt = ps_cbf + u4_ctxinc;
+
+ u4_top1 = ih264d_read_coeff4x4_cabac(ps_bitstrm,
+ u4_ctx_cat, ps_sig_coeff_flag, ps_dec,
+ ps_src_bin_ctxt);
+
+ INSERT_BIT(u4_csbp, u4_idx, u4_top1);
+ pu1_left_nnz[0] = u4_top1;
+
+ /*------------------------------------------------------*/
+ /* Residual 4x4 decoding: SubBlock 2 */
+ /*------------------------------------------------------*/
+ u4_idx += (u4_sub_block_strd - 1);
+ pi2_coeff_block += ((u4_sub_block_strd - 1) * NUM_COEFFS_IN_4x4BLK);
+ u4_ctxinc = (u4_top0 << 1) + (!!pu1_left_nnz[1]);
+
+ ps_src_bin_ctxt = ps_cbf + u4_ctxinc;
+
+ u4_subblock_coded = ih264d_read_coeff4x4_cabac(ps_bitstrm, u4_ctx_cat,
+ ps_sig_coeff_flag, ps_dec,
+ ps_src_bin_ctxt);
+
+ INSERT_BIT(u4_csbp, u4_idx, u4_subblock_coded);
+ pu1_top_nnz[0] = u4_subblock_coded;
+
+ /*------------------------------------------------------*/
+ /* Residual 4x4 decoding: SubBlock 3 */
+ /*------------------------------------------------------*/
+ u4_idx++;
+ pi2_coeff_block += NUM_COEFFS_IN_4x4BLK;
+ u4_ctxinc = (u4_top1 << 1) + u4_subblock_coded;
+
+ ps_src_bin_ctxt = ps_cbf + u4_ctxinc;
+
+ u4_subblock_coded = ih264d_read_coeff4x4_cabac(ps_bitstrm, u4_ctx_cat,
+ ps_sig_coeff_flag, ps_dec,
+ ps_src_bin_ctxt);
+
+ INSERT_BIT(u4_csbp, u4_idx, u4_subblock_coded);
+ pu1_top_nnz[1] = pu1_left_nnz[1] = u4_subblock_coded;
+
+ return (u4_csbp);
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_parse_residual4x4_cabac \endif
+ *
+ * \brief
+ * This function parses CABAC syntax of a Luma and Chroma AC Residuals.
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+
+WORD32 ih264d_parse_residual4x4_cabac(dec_struct_t * ps_dec,
+ dec_mb_info_t *ps_cur_mb_info,
+ UWORD8 u1_offset)
+{
+ UWORD8 u1_cbp = ps_cur_mb_info->u1_cbp;
+ UWORD16 ui16_csbp = 0;
+ WORD16 *pi2_residual_buf;
+ UWORD8 uc_ctx_cat;
+ UWORD8 *pu1_top_nnz = ps_cur_mb_info->ps_curmb->pu1_nnz_y;
+ UWORD8 *pu1_left_nnz = ps_dec->pu1_left_nnz_y;
+ UWORD8 *pu1_top_nnz_uv = ps_cur_mb_info->ps_curmb->pu1_nnz_uv;
+ ctxt_inc_mb_info_t *p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info;
+ ctxt_inc_mb_info_t *ps_top_ctxt = ps_dec->p_top_ctxt_mb_info;
+ dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
+ UWORD32 u4_nbr_avail = ps_dec->u1_mb_ngbr_availablity;
+ WORD16 *pi2_coeff_block = NULL;
+ bin_ctxt_model_t *ps_src_bin_ctxt;
+
+ UWORD8 u1_top_dc_csbp = (ps_top_ctxt->u1_yuv_dc_csbp) >> 1;
+ UWORD8 u1_left_dc_csbp = (ps_dec->pu1_left_yuv_dc_csbp[0]) >> 1;
+
+
+ if(!(u4_nbr_avail & TOP_MB_AVAILABLE_MASK))
+ {
+ if(p_curr_ctxt->u1_mb_type & CAB_INTRA_MASK)
+ {
+ *(UWORD32 *)pu1_top_nnz = 0;
+ u1_top_dc_csbp = 0;
+ *(UWORD32 *)pu1_top_nnz_uv = 0;
+ }
+ else
+ {
+ *(UWORD32 *)pu1_top_nnz = 0x01010101;
+ u1_top_dc_csbp = 0x3;
+ *(UWORD32 *)pu1_top_nnz_uv = 0x01010101;
+ }
+ }
+ else
+ {
+ UWORD32 *pu4_buf;
+ UWORD8 *pu1_buf;
+ pu1_buf = ps_cur_mb_info->ps_top_mb->pu1_nnz_y;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ *(UWORD32 *)(pu1_top_nnz) = *pu4_buf;
+
+ pu1_buf = ps_cur_mb_info->ps_top_mb->pu1_nnz_uv;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ *(UWORD32 *)(pu1_top_nnz_uv) = *pu4_buf;
+
+ }
+
+ if(!(u4_nbr_avail & LEFT_MB_AVAILABLE_MASK))
+ {
+ if(p_curr_ctxt->u1_mb_type & CAB_INTRA_MASK)
+ {
+ UWORD32 *pu4_buf;
+ UWORD8 *pu1_buf;
+ *(UWORD32 *)pu1_left_nnz = 0;
+ u1_left_dc_csbp = 0;
+ pu1_buf = ps_dec->pu1_left_nnz_uv;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ *pu4_buf = 0;
+ }
+ else
+ {
+ UWORD32 *pu4_buf;
+ UWORD8 *pu1_buf;
+ *(UWORD32 *)pu1_left_nnz = 0x01010101;
+ u1_left_dc_csbp = 0x3;
+ pu1_buf = ps_dec->pu1_left_nnz_uv;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ *pu4_buf = 0x01010101;
+ }
+ }
+
+ uc_ctx_cat = u1_offset ? LUMA_AC_CTXCAT : LUMA_4X4_CTXCAT;
+
+ ps_cur_mb_info->u1_qp_div6 = ps_dec->u1_qp_y_div6;
+ ps_cur_mb_info->u1_qpc_div6 = ps_dec->u1_qp_u_div6;
+ ps_cur_mb_info->u1_qp_rem6 = ps_dec->u1_qp_y_rem6;
+ ps_cur_mb_info->u1_qpc_rem6 = ps_dec->u1_qp_u_rem6;
+ // CHECK_THIS
+ ps_cur_mb_info->u1_qpcr_div6 = ps_dec->u1_qp_v_div6;
+ ps_cur_mb_info->u1_qpcr_rem6 = ps_dec->u1_qp_v_rem6;
+
+ if(u1_cbp & 0x0f)
+ {
+ if(ps_cur_mb_info->u1_tran_form8x8 == 0)
+ {
+ /*******************************************************************/
+ /* Block 0 residual decoding, check cbp and proceed (subblock = 0) */
+ /*******************************************************************/
+ if(!(u1_cbp & 0x1))
+ {
+ *(UWORD16 *)(pu1_top_nnz) = 0;
+ *(UWORD16 *)(pu1_left_nnz) = 0;
+ }
+ else
+ {
+ ui16_csbp = ih264d_cabac_parse_8x8block(pi2_coeff_block, 4,
+ uc_ctx_cat, ps_dec,
+ pu1_top_nnz,
+ pu1_left_nnz);
+ }
+
+ /*******************************************************************/
+ /* Block 1 residual decoding, check cbp and proceed (subblock = 2) */
+ /*******************************************************************/
+ pi2_coeff_block += (2 * NUM_COEFFS_IN_4x4BLK);
+ if(!(u1_cbp & 0x2))
+ {
+ *(UWORD16 *)(pu1_top_nnz + 2) = 0;
+ *(UWORD16 *)(pu1_left_nnz) = 0;
+ }
+ else
+ {
+ UWORD32 u4_temp = ih264d_cabac_parse_8x8block(pi2_coeff_block,
+ 4, uc_ctx_cat,
+ ps_dec,
+ (pu1_top_nnz + 2),
+ pu1_left_nnz);
+ ui16_csbp |= (u4_temp << 2);
+ }
+
+ /*******************************************************************/
+ /* Block 2 residual decoding, check cbp and proceed (subblock = 8) */
+ /*******************************************************************/
+ pi2_coeff_block += (6 * NUM_COEFFS_IN_4x4BLK);
+ if(!(u1_cbp & 0x4))
+ {
+ *(UWORD16 *)(pu1_top_nnz) = 0;
+ *(UWORD16 *)(pu1_left_nnz + 2) = 0;
+ }
+ else
+ {
+ UWORD32 u4_temp = ih264d_cabac_parse_8x8block(
+ pi2_coeff_block, 4, uc_ctx_cat, ps_dec,
+ pu1_top_nnz, (pu1_left_nnz + 2));
+ ui16_csbp |= (u4_temp << 8);
+ }
+
+ /*******************************************************************/
+ /* Block 3 residual decoding, check cbp and proceed (subblock = 10)*/
+ /*******************************************************************/
+ pi2_coeff_block += (2 * NUM_COEFFS_IN_4x4BLK);
+ if(!(u1_cbp & 0x8))
+ {
+ *(UWORD16 *)(pu1_top_nnz + 2) = 0;
+ *(UWORD16 *)(pu1_left_nnz + 2) = 0;
+ }
+ else
+ {
+ UWORD32 u4_temp = ih264d_cabac_parse_8x8block(
+ pi2_coeff_block, 4, uc_ctx_cat, ps_dec,
+ (pu1_top_nnz + 2), (pu1_left_nnz + 2));
+ ui16_csbp |= (u4_temp << 10);
+ }
+
+ }
+ else
+ {
+ ui16_csbp = 0;
+
+ /*******************************************************************/
+ /* Block 0 residual decoding, check cbp and proceed (subblock = 0) */
+ /*******************************************************************/
+ if(!(u1_cbp & 0x1))
+ {
+ *(UWORD16 *)(pu1_top_nnz) = 0;
+ *(UWORD16 *)(pu1_left_nnz) = 0;
+ }
+ else
+ {
+
+ dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
+
+ ih264d_read_coeff8x8_cabac( ps_bitstrm,
+ ps_dec, ps_cur_mb_info);
+
+ pu1_left_nnz[0] = 1;
+ pu1_left_nnz[1] = 1;
+
+ pu1_top_nnz[0] = 1;
+ pu1_top_nnz[1] = 1;
+
+ /* added to be used by BS computation module */
+ ui16_csbp |= 0x0033;
+ }
+
+ /*******************************************************************/
+ /* Block 1 residual decoding, check cbp and proceed (subblock = 2) */
+ /*******************************************************************/
+ pi2_coeff_block += 64;
+
+ if(!(u1_cbp & 0x2))
+ {
+ *(UWORD16 *)(pu1_top_nnz + 2) = 0;
+ *(UWORD16 *)(pu1_left_nnz) = 0;
+ }
+ else
+ {
+
+
+ dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
+
+ ih264d_read_coeff8x8_cabac(ps_bitstrm,
+ ps_dec, ps_cur_mb_info);
+
+ pu1_left_nnz[0] = 1;
+ pu1_left_nnz[1] = 1;
+
+ pu1_top_nnz[2] = 1;
+ pu1_top_nnz[3] = 1;
+
+ /* added to be used by BS computation module */
+ ui16_csbp |= 0x00CC;
+
+ }
+
+ /*******************************************************************/
+ /* Block 2 residual decoding, check cbp and proceed (subblock = 8) */
+ /*******************************************************************/
+ pi2_coeff_block += 64;
+ if(!(u1_cbp & 0x4))
+ {
+ *(UWORD16 *)(pu1_top_nnz) = 0;
+ *(UWORD16 *)(pu1_left_nnz + 2) = 0;
+ }
+ else
+ {
+
+ dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
+
+ ih264d_read_coeff8x8_cabac(ps_bitstrm,
+ ps_dec, ps_cur_mb_info);
+
+ pu1_left_nnz[2] = 1;
+ pu1_left_nnz[3] = 1;
+
+ pu1_top_nnz[0] = 1;
+ pu1_top_nnz[1] = 1;
+
+ /* added to be used by BS computation module */
+ ui16_csbp |= 0x3300;
+ }
+
+ /*******************************************************************/
+ /* Block 3 residual decoding, check cbp and proceed (subblock = 10)*/
+ /*******************************************************************/
+ pi2_coeff_block += 64;
+
+ if(!(u1_cbp & 0x8))
+ {
+ *(UWORD16 *)(pu1_top_nnz + 2) = 0;
+ *(UWORD16 *)(pu1_left_nnz + 2) = 0;
+ }
+ else
+ {
+
+ dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
+
+ ih264d_read_coeff8x8_cabac(ps_bitstrm,
+ ps_dec, ps_cur_mb_info);
+
+ pu1_left_nnz[2] = 1;
+ pu1_left_nnz[3] = 1;
+
+ pu1_top_nnz[2] = 1;
+ pu1_top_nnz[3] = 1;
+
+ /* added to be used by BS computation module */
+ ui16_csbp |= 0xCC00;
+ }
+ }
+ }
+ else
+ {
+ *(UWORD32 *)(pu1_top_nnz) = 0;
+ *(UWORD32 *)(pu1_left_nnz) = 0;
+ }
+ /*--------------------------------------------------------------------*/
+ /* Store the last row of N values to top row */
+ /*--------------------------------------------------------------------*/
+ ps_cur_mb_info->u2_luma_csbp = ui16_csbp;
+ ps_cur_mb_info->ps_curmb->u2_luma_csbp = ui16_csbp;
+ {
+ WORD8 i;
+ UWORD16 u2_chroma_csbp = 0;
+ ps_cur_mb_info->u2_chroma_csbp = 0;
+
+ u1_cbp >>= 4;
+ pu1_top_nnz = pu1_top_nnz_uv;
+ pu1_left_nnz = ps_dec->pu1_left_nnz_uv;
+ /*--------------------------------------------------------------------*/
+ /* if Chroma Component not present OR no ac values present */
+ /* Set the values of N to zero */
+ /*--------------------------------------------------------------------*/
+ if(u1_cbp == CBPC_ALLZERO)
+ {
+ ps_dec->pu1_left_yuv_dc_csbp[0] &= 0x1;
+ *(UWORD32 *)(pu1_top_nnz) = 0;
+ *(UWORD32 *)(pu1_left_nnz) = 0;
+ p_curr_ctxt->u1_yuv_dc_csbp &= 0x1;
+ return (0);
+ }
+
+ /*--------------------------------------------------------------------*/
+ /* Decode Chroma DC values */
+ /*--------------------------------------------------------------------*/
+ for(i = 0; i < 2; i++)
+ {
+ UWORD8 uc_a = 1, uc_b = 1;
+ UWORD32 u4_ctx_inc;
+ UWORD8 uc_codedBlockFlag;
+ UWORD8 pu1_inv_scan[4] =
+ { 0, 1, 2, 3 };
+ WORD32 u4_scale;
+ WORD32 i4_mb_inter_inc;
+ tu_sblk4x4_coeff_data_t *ps_tu_4x4 =
+ (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
+ WORD16 *pi2_coeff_data =
+ (WORD16 *)ps_dec->pv_parse_tu_coeff_data;
+ WORD16 ai2_dc_coef[4];
+
+ INC_SYM_COUNT(&(ps_dec->s_cab_dec_env));
+ u4_scale = (i) ?
+ (ps_dec->pu2_quant_scale_v[0]
+ << ps_dec->u1_qp_v_div6) :
+ (ps_dec->pu2_quant_scale_u[0]
+ << ps_dec->u1_qp_u_div6);
+
+ /*--------------------------------------------------------------------*/
+ /* Decode Bitstream to get the DC coeff */
+ /*--------------------------------------------------------------------*/
+ uc_a = (u1_left_dc_csbp >> i) & 0x01;
+ uc_b = (u1_top_dc_csbp >> i) & 0x01;
+ u4_ctx_inc = (uc_a + (uc_b << 1));
+
+ ps_src_bin_ctxt = (ps_dec->p_cbf_t[CHROMA_DC_CTXCAT]) + u4_ctx_inc;
+
+ uc_codedBlockFlag =
+ ih264d_read_coeff4x4_cabac(ps_bitstrm,
+ CHROMA_DC_CTXCAT,
+ ps_dec->p_significant_coeff_flag_t[CHROMA_DC_CTXCAT],
+ ps_dec, ps_src_bin_ctxt);
+
+ i4_mb_inter_inc = (!((ps_cur_mb_info->ps_curmb->u1_mb_type == I_4x4_MB)
+ || (ps_cur_mb_info->ps_curmb->u1_mb_type == I_16x16_MB)))
+ * 3;
+
+ if(ps_dec->s_high_profile.u1_scaling_present)
+ {
+ u4_scale *=
+ ps_dec->s_high_profile.i2_scalinglist4x4[i4_mb_inter_inc
+ + 1 + i][0];
+
+ }
+ else
+ {
+ u4_scale <<= 4;
+ }
+
+ if(uc_codedBlockFlag)
+ {
+ WORD32 i_z0, i_z1, i_z2, i_z3;
+ WORD32 *pi4_scale;
+
+ SET_BIT(u1_top_dc_csbp, i);
+ SET_BIT(u1_left_dc_csbp, i);
+
+ ai2_dc_coef[0] = 0;
+ ai2_dc_coef[1] = 0;
+ ai2_dc_coef[2] = 0;
+ ai2_dc_coef[3] = 0;
+
+ ih264d_unpack_coeff4x4_dc_4x4blk(ps_tu_4x4,
+ ai2_dc_coef,
+ pu1_inv_scan);
+ i_z0 = (ai2_dc_coef[0] + ai2_dc_coef[2]);
+ i_z1 = (ai2_dc_coef[0] - ai2_dc_coef[2]);
+ i_z2 = (ai2_dc_coef[1] - ai2_dc_coef[3]);
+ i_z3 = (ai2_dc_coef[1] + ai2_dc_coef[3]);
+
+ /*-----------------------------------------------------------*/
+ /* Scaling and storing the values back */
+ /*-----------------------------------------------------------*/
+ *pi2_coeff_data++ = ((i_z0 + i_z3) * u4_scale) >> 5;
+ *pi2_coeff_data++ = ((i_z0 - i_z3) * u4_scale) >> 5;
+ *pi2_coeff_data++ = ((i_z1 + i_z2) * u4_scale) >> 5;
+ *pi2_coeff_data++ = ((i_z1 - i_z2) * u4_scale) >> 5;
+
+ ps_dec->pv_parse_tu_coeff_data = (void *)pi2_coeff_data;
+
+ SET_BIT(ps_cur_mb_info->u1_yuv_dc_block_flag,(i+1));
+ }
+ else
+ {
+ CLEARBIT(u1_top_dc_csbp, i);
+ CLEARBIT(u1_left_dc_csbp, i);
+ }
+ }
+
+ /*********************************************************************/
+ /* Update the DC csbp */
+ /*********************************************************************/
+ ps_dec->pu1_left_yuv_dc_csbp[0] &= 0x1;
+ p_curr_ctxt->u1_yuv_dc_csbp &= 0x1;
+ ps_dec->pu1_left_yuv_dc_csbp[0] |= (u1_left_dc_csbp << 1);
+ p_curr_ctxt->u1_yuv_dc_csbp |= (u1_top_dc_csbp << 1);
+ if(u1_cbp == CBPC_ACZERO)
+ {
+ *(UWORD32 *)(pu1_top_nnz) = 0;
+ *(UWORD32 *)(pu1_left_nnz) = 0;
+ return (0);
+ }
+ /*--------------------------------------------------------------------*/
+ /* Decode Chroma AC values */
+ /*--------------------------------------------------------------------*/
+ {
+ UWORD32 u4_temp;
+ /*****************************************************************/
+ /* U Block residual decoding, check cbp and proceed (subblock=0)*/
+ /*****************************************************************/
+ u2_chroma_csbp = ih264d_cabac_parse_8x8block(pi2_coeff_block, 2,
+ CHROMA_AC_CTXCAT,
+ ps_dec, pu1_top_nnz,
+ pu1_left_nnz);
+
+ pi2_coeff_block += MB_CHROM_SIZE;
+ /*****************************************************************/
+ /* V Block residual decoding, check cbp and proceed (subblock=1)*/
+ /*****************************************************************/
+ u4_temp = ih264d_cabac_parse_8x8block(pi2_coeff_block, 2,
+ CHROMA_AC_CTXCAT,
+ ps_dec, (pu1_top_nnz + 2),
+ (pu1_left_nnz + 2));
+ u2_chroma_csbp |= (u4_temp << 4);
+ }
+ /*********************************************************************/
+ /* Update the AC csbp */
+ /*********************************************************************/
+ ps_cur_mb_info->u2_chroma_csbp = u2_chroma_csbp;
+ }
+
+ return (0);
+}
+
diff --git a/decoder/ih264d_parse_cabac.h b/decoder/ih264d_parse_cabac.h
new file mode 100755
index 0000000..eb66e8c
--- /dev/null
+++ b/decoder/ih264d_parse_cabac.h
@@ -0,0 +1,60 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+ ***************************************************************************
+ * \file ih264d_parse_cabac.h
+ *
+ * \brief
+ * This file contains cabac Residual decoding routines.
+ *
+ * \date
+ * 20/03/2003
+ *
+ * \author NS
+ ***************************************************************************
+ */
+#ifndef _IH264D_PARSE_CABAC_H_
+#define _IH264D_PARSE_CABAC_H_
+
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+
+#define UCOFF_LEVEL 14
+
+
+UWORD8 ih264d_read_coeff4x4_cabac(dec_bit_stream_t *ps_bitstrm,
+ UWORD32 u4_ctxcat,
+ bin_ctxt_model_t *ps_ctxt_sig_coeff,
+ dec_struct_t *ps_dec,
+ bin_ctxt_model_t *ps_ctxt_coded);
+
+void ih264d_read_coeff8x8_cabac(dec_bit_stream_t *ps_bitstrm,
+ dec_struct_t *ps_dec,
+ dec_mb_info_t *ps_cur_mb_info);
+
+UWORD32 cabac_parse_8x8block_transform8x8_set(WORD16 *pi2_coeff_block,
+ dec_struct_t * ps_dec,
+ UWORD8 *pu1_top_nnz,
+ UWORD8 *pu1_left_nnz,
+ dec_mb_info_t *ps_cur_mb_info);
+
+#endif /* _IH264D_PARSE_CABAC_H_ */
diff --git a/decoder/ih264d_parse_cavlc.c b/decoder/ih264d_parse_cavlc.c
new file mode 100755
index 0000000..a3f345c
--- /dev/null
+++ b/decoder/ih264d_parse_cavlc.c
@@ -0,0 +1,2694 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+ ***************************************************************************
+ * \file ih264d_parse_cavlc.c
+ *
+ * \brief
+ * This file contains UVLC related functions.
+ *
+ * \date
+ * 20/11/2002
+ *
+ * \author NS
+ ***************************************************************************
+ */
+
+#include <string.h>
+#include <stdio.h>
+
+#include "ih264d_bitstrm.h"
+#include "ih264d_parse_cavlc.h"
+#include "ih264d_error_handler.h"
+#include "ih264d_defs.h"
+#include "ih264d_debug.h"
+#include "ih264d_cabac.h"
+#include "ih264d_structs.h"
+#include "ih264d_tables.h"
+#include "ih264d_tables.h"
+#include "ih264d_mb_utils.h"
+
+void ih264d_unpack_coeff4x4_dc_4x4blk(tu_sblk4x4_coeff_data_t *ps_tu_4x4,
+ WORD16 *pi2_out_coeff_data,
+ UWORD8 *pu1_inv_scan);
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_uev */
+/* */
+/* Description : Reads the unsigned Exp Golomb codec syntax from the */
+/* ps_bitstrm as specified in section 9.1 of H264 standard */
+/* It also increases bitstream u4_ofst by the number of bits */
+/* parsed for UEV decode operation */
+/* */
+/* Inputs : bitstream base pointer and bitsream u4_ofst in bits */
+/* Globals : None */
+/* Processing : */
+/* Outputs : UEV decoded syntax element and incremented ps_bitstrm u4_ofst */
+/* Returns : UEV decoded syntax element */
+/* */
+/* Issues : Does not check if ps_bitstrm u4_ofst exceeds max ps_bitstrm i4_size */
+/* for performamce. Caller might have to do error resilence */
+/* check for bitstream overflow */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 19 09 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+UWORD32 ih264d_uev(UWORD32 *pu4_bitstrm_ofst, UWORD32 *pu4_bitstrm_buf)
+{
+ UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
+ UWORD32 u4_word, u4_ldz;
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
+ u4_ldz = CLZ(u4_word);
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
+ *pu4_bitstrm_ofst = u4_bitstream_offset;
+ return ((1 << u4_ldz) + u4_word - 1);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_sev */
+/* */
+/* Description : Reads the signed Exp Golomb codec syntax from the ps_bitstrm */
+/* as specified in section 9.1 of H264 standard. */
+/* It also increases bitstream u4_ofst by the number of bits */
+/* parsed for SEV decode operation */
+/* */
+/* Inputs : bitstream base pointer and bitsream u4_ofst in bits */
+/* Globals : None */
+/* Processing : */
+/* Outputs : SEV decoded syntax element and incremented ps_bitstrm u4_ofst */
+/* Returns : SEV decoded syntax element */
+/* */
+/* Issues : Does not check if ps_bitstrm u4_ofst exceeds max ps_bitstrm i4_size */
+/* for performamce. Caller might have to do error resilence */
+/* check for bitstream overflow */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 19 09 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_sev(UWORD32 *pu4_bitstrm_ofst, UWORD32 *pu4_bitstrm_buf)
+{
+ UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
+ UWORD32 u4_word, u4_ldz, u4_abs_val;
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
+ u4_ldz = CLZ(u4_word);
+
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
+
+ *pu4_bitstrm_ofst = u4_bitstream_offset;
+ u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
+
+ if(u4_word & 0x1)
+ return (-(WORD32)u4_abs_val);
+ else
+ return (u4_abs_val);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : get_tev_range_1 */
+/* */
+/* Description : Reads the TEV Exp Golomb codec syntax from the ps_bitstrm */
+/* as specified in section 9.1 of H264 standard. This will */
+/* called only when the input range is 1 for TEV decode. */
+/* If range is more than 1, then UEV decode is done */
+/* */
+/* Inputs : bitstream base pointer and bitsream u4_ofst in bits */
+/* Globals : None */
+/* Processing : */
+/* Outputs : TEV decoded syntax element and incremented ps_bitstrm u4_ofst */
+/* Returns : TEV decoded syntax element */
+/* */
+/* Issues : Does not check if ps_bitstrm u4_ofst exceeds max ps_bitstrm i4_size */
+/* for performamce. Caller might have to do error resilence */
+/* check for bitstream overflow */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 19 09 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+UWORD32 ih264d_tev_range1(UWORD32 *pu4_bitstrm_ofst, UWORD32 *pu4_bitstrm_buf)
+{
+ UWORD32 u4_code;
+ GETBIT(u4_code, *pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ return (!u4_code);
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_uvlc \endif
+ *
+ * \brief
+ *
+ * Reads the unsigned/signed/truncated integer Exp-Golomb-coded syntax element
+ * with the left bit first. The parsing process for this descriptor is specified
+ * in subclause 9.1.
+ *
+ * \param ps_bitstrm : Pointer to Bitstream Structure .
+ * \param u4_range : Range value in case of Truncated Exp-Golomb-code
+ * \param pi_bitstrm_ofst : Pointer to the local copy of Bitstream u4_ofst
+ * \param u1_flag : Flag indicating the case of UEV,SEV or TEV
+ * \param u4_bitstrm_ofst : Local copy of Bitstream u4_ofst
+ * \param pu4_bitstrm_buf : Pointer to the Bitstream buffer
+ *
+ * \return
+ * Returns Code Value.
+ *
+ **************************************************************************
+ */
+
+WORD32 ih264d_uvlc(dec_bit_stream_t *ps_bitstrm,
+ UWORD32 u4_range,
+ UWORD32 *pi_bitstrm_ofst,
+ UWORD8 u1_flag,
+ UWORD32 u4_bitstrm_ofst,
+ UWORD32 *pu4_bitstrm_buf)
+{
+ UWORD32 word, word2, cur_bit, cur_word, code_val, code_num, clz;
+
+ SWITCHOFFTRACE;
+ cur_bit = u4_bitstrm_ofst & 0x1F;
+ cur_word = u4_bitstrm_ofst >> 5;
+ word = pu4_bitstrm_buf[cur_word];
+ word2 = pu4_bitstrm_buf[cur_word + 1];
+
+ if(cur_bit != 0)
+ {
+ word <<= cur_bit;
+ word2 >>= (32 - cur_bit);
+ word |= word2;
+ }
+
+ if(u1_flag == TEV && u4_range == 1)
+ {
+ word >>= 31;
+ word = 1 - word;
+ (*pi_bitstrm_ofst)++;
+ ps_bitstrm->u4_ofst = *pi_bitstrm_ofst;
+ return (WORD32)word;
+ }
+
+ //finding clz
+ {
+ UWORD32 ui32_code, ui32_mask;
+
+ ui32_code = word;
+ ui32_mask = 0x80000000;
+ clz = 0;
+
+ /* DSP implements this with LMBD instruction */
+ /* so there we don't need to break the loop */
+ while(!(ui32_code & ui32_mask))
+ {
+ clz++;
+ ui32_mask >>= 1;
+ if(0 == ui32_mask)
+ break;
+ }
+ }
+
+ if(clz == 0)
+ {
+ *pi_bitstrm_ofst = *pi_bitstrm_ofst + (2 * clz) + 1;
+ ps_bitstrm->u4_ofst = *pi_bitstrm_ofst;
+ return 0;
+ }
+
+ word <<= (clz + 1);
+ word >>= (32 - clz);
+ code_num = (1 << clz) + word - 1;
+ *pi_bitstrm_ofst = *pi_bitstrm_ofst + (2 * clz) + 1;
+ ps_bitstrm->u4_ofst = *pi_bitstrm_ofst;
+
+ if(u1_flag == TEV || u1_flag == UEV)
+ return (WORD32)code_num;
+
+ code_val = (code_num + 1) >> 1;
+ if(!(code_num & 0x01))
+ return -((WORD32)code_val);
+ return (WORD32)code_val;
+
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_cavlc_4x4res_block_totalcoeff_1 */
+/* */
+/* Description : This function does cavlc decoding of 4x4 block residual */
+/* coefficient when total coeff is equal to 1. The parsing */
+/* is done as defined in section 9.2.2 and 9.2.3 of the */
+/* H264 standard. */
+/* */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : <Does it use any global variables?> */
+/* Processing : <Describe how the function operates - include algorithm */
+/* description> */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 25 09 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_cavlc_4x4res_block_totalcoeff_1(UWORD32 u4_isdc,
+ UWORD32 u4_total_coeff_trail_one,
+ dec_bit_stream_t *ps_bitstrm)
+{
+
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst;
+ UWORD32 u4_trailing_ones = u4_total_coeff_trail_one & 0xFFFF;
+ WORD32 i2_level;
+ UWORD32 u4_tot_zero, u4_ldz, u4_scan_pos;
+
+ tu_sblk4x4_coeff_data_t *ps_tu_4x4;
+ WORD16 *pi2_coeff_data;
+ dec_struct_t *ps_dec = (dec_struct_t *)ps_bitstrm->pv_codec_handle;
+
+ ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
+ ps_tu_4x4->u2_sig_coeff_map = 0;
+ pi2_coeff_data = &ps_tu_4x4->ai2_level[0];
+
+
+ if(u4_trailing_ones)
+ {
+ UWORD32 u4_sign;
+ /****************************************************************/
+ /* Decode Trailing One as in section 9.2.2 */
+ /****************************************************************/
+ GETBIT(u4_sign, u4_bitstream_offset, pu4_bitstrm_buf);
+ i2_level = u4_sign ? -1 : 1;
+ }
+ else
+ {
+ /****************************************************************/
+ /* Decoding Level based on prefix and suffix as in 9.2.2 */
+ /****************************************************************/
+ UWORD32 u4_lev_suffix, u4_lev_suffix_size;
+ WORD32 u2_lev_code, u2_abs_value;
+ UWORD32 u4_lev_prefix;
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset,
+ pu4_bitstrm_buf);
+ u2_lev_code = (2 + MIN(u4_lev_prefix, 15));
+
+ if(14 == u4_lev_prefix)
+ u4_lev_suffix_size = 4;
+ else if(15 <= u4_lev_prefix)
+ {
+ u2_lev_code += 15;
+ u4_lev_suffix_size = u4_lev_prefix - 3;
+ }
+ else
+ u4_lev_suffix_size = 0;
+
+ //HP_LEVEL_PREFIX
+ if(16 <= u4_lev_prefix)
+ {
+ u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096);
+ }
+ if(u4_lev_suffix_size)
+ {
+ GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf,
+ u4_lev_suffix_size);
+ u2_lev_code += u4_lev_suffix;
+ }
+
+ u2_abs_value = (u2_lev_code + 2) >> 1;
+ /*********************************************************/
+ /* If Level code is odd, level is negative else positive */
+ /*********************************************************/
+ i2_level = (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value;
+
+ }
+
+ /****************************************************************/
+ /* Decoding total zeros as in section 9.2.3, table 9.7 */
+ /****************************************************************/
+ FIND_ONE_IN_STREAM_LEN(u4_ldz, u4_bitstream_offset, pu4_bitstrm_buf, 8);
+
+ if(u4_ldz)
+ {
+ GETBIT(u4_tot_zero, u4_bitstream_offset, pu4_bitstrm_buf);
+ u4_tot_zero = (u4_ldz << 1) - u4_tot_zero;
+ }
+ else
+ u4_tot_zero = 0;
+
+ /***********************************************************************/
+ /* Inverse scan and store residual coeff. Update the bitstream u4_ofst */
+ /***********************************************************************/
+ u4_scan_pos = u4_tot_zero + u4_isdc;
+ if(u4_scan_pos > 15)
+ return -1;
+
+ SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos);
+ *pi2_coeff_data++ = i2_level;
+
+
+ {
+ WORD32 offset;
+ offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_4x4;
+ offset = ALIGN4(offset);
+ ps_dec->pv_parse_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_parse_tu_coeff_data + offset);
+ }
+
+ ps_bitstrm->u4_ofst = u4_bitstream_offset;
+ return 0;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_cavlc_4x4res_block_totalcoeff_2to10 */
+/* */
+/* Description : This function does cavlc decoding of 4x4 block residual */
+/* coefficient when total coeffs are between two and ten */
+/* inclusive. Parsing is done as defined in section 9.2.2 */
+/* and 9.2.3 the H264 standard. */
+/* */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : <Does it use any global variables?> */
+/* Processing : <Describe how the function operates - include algorithm */
+/* description> */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 25 09 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 ih264d_cavlc_4x4res_block_totalcoeff_2to10(UWORD32 u4_isdc,
+ UWORD32 u4_total_coeff_trail_one, /*!<TotalCoefficients<<16+trailingones*/
+ dec_bit_stream_t *ps_bitstrm)
+{
+ UWORD32 u4_total_zeroes;
+ WORD32 i;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst;
+ UWORD32 u4_trailing_ones = u4_total_coeff_trail_one & 0xFFFF;
+ UWORD32 u4_total_coeff = u4_total_coeff_trail_one >> 16;
+ WORD16 i2_level_arr[16];
+
+ tu_sblk4x4_coeff_data_t *ps_tu_4x4;
+ WORD16 *pi2_coeff_data;
+ dec_struct_t *ps_dec = (dec_struct_t *)ps_bitstrm->pv_codec_handle;
+
+ ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
+ ps_tu_4x4->u2_sig_coeff_map = 0;
+ pi2_coeff_data = &ps_tu_4x4->ai2_level[0];
+
+ i = u4_total_coeff - 1;
+
+ if(u4_trailing_ones)
+ {
+ /*********************************************************************/
+ /* Decode Trailing Ones */
+ /* read the sign of T1's and put them in level array */
+ /*********************************************************************/
+ UWORD32 u4_signs, u4_cnt = u4_trailing_ones;
+ WORD16 (*ppi2_trlone_lkup)[3] =
+ (WORD16 (*)[3])gai2_ih264d_trailing_one_level;
+ WORD16 *pi2_trlone_lkup;
+
+ GETBITS(u4_signs, u4_bitstream_offset, pu4_bitstrm_buf, u4_cnt);
+
+ pi2_trlone_lkup = ppi2_trlone_lkup[(1 << u4_cnt) - 2 + u4_signs];
+
+ while(u4_cnt--)
+ i2_level_arr[i--] = *pi2_trlone_lkup++;
+ }
+
+ /****************************************************************/
+ /* Decoding Levels Begins */
+ /****************************************************************/
+ if(i >= 0)
+ {
+ /****************************************************************/
+ /* First level is decoded outside the loop as it has lot of */
+ /* special cases. */
+ /****************************************************************/
+ UWORD32 u4_lev_suffix, u4_suffix_len, u4_lev_suffix_size;
+ WORD32 u2_lev_code, u2_abs_value;
+ UWORD32 u4_lev_prefix;
+
+ /***************************************************************/
+ /* u4_suffix_len = 0, Find leading zeros in next 32 bits */
+ /***************************************************************/
+ FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset,
+ pu4_bitstrm_buf);
+
+ /*********************************************************/
+ /* Special decoding case when trailing ones are 3 */
+ /*********************************************************/
+ u2_lev_code = MIN(15, u4_lev_prefix);
+
+ u2_lev_code += (3 == u4_trailing_ones) ? 0 : 2;
+
+ if(14 == u4_lev_prefix)
+ u4_lev_suffix_size = 4;
+ else if(15 <= u4_lev_prefix)
+ {
+ u2_lev_code += 15;
+ u4_lev_suffix_size = u4_lev_prefix - 3;
+ }
+ else
+ u4_lev_suffix_size = 0;
+
+ //HP_LEVEL_PREFIX
+ if(16 <= u4_lev_prefix)
+ {
+ u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096);
+ }
+ if(u4_lev_suffix_size)
+ {
+ GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf,
+ u4_lev_suffix_size);
+ u2_lev_code += u4_lev_suffix;
+ }
+
+ u2_abs_value = (u2_lev_code + 2) >> 1;
+ /*********************************************************/
+ /* If Level code is odd, level is negative else positive */
+ /*********************************************************/
+ i2_level_arr[i--] = (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value;
+
+ u4_suffix_len = (u2_abs_value > 3) ? 2 : 1;
+
+ /*********************************************************/
+ /* Now loop over the remaining levels */
+ /*********************************************************/
+ while(i >= 0)
+ {
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset,
+ pu4_bitstrm_buf);
+
+ u4_lev_suffix_size =
+ (15 <= u4_lev_prefix) ?
+ (u4_lev_prefix - 3) : u4_suffix_len;
+
+ /*********************************************************/
+ /* Compute level code using prefix and suffix */
+ /*********************************************************/
+ GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf,
+ u4_lev_suffix_size);
+ u2_lev_code = (MIN(15,u4_lev_prefix) << u4_suffix_len)
+ + u4_lev_suffix;
+
+ //HP_LEVEL_PREFIX
+ if(16 <= u4_lev_prefix)
+ {
+ u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096);
+ }
+ u2_abs_value = (u2_lev_code + 2) >> 1;
+
+ /*********************************************************/
+ /* If Level code is odd, level is negative else positive */
+ /*********************************************************/
+ i2_level_arr[i--] =
+ (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value;
+
+ /*********************************************************/
+ /* Increment suffix length if required */
+ /*********************************************************/
+ u4_suffix_len +=
+ (u4_suffix_len < 6) ?
+ (u2_abs_value
+ > (3
+ << (u4_suffix_len
+ - 1))) :
+ 0;
+ }
+
+ /****************************************************************/
+ /* Decoding Levels Ends */
+ /****************************************************************/
+ }
+
+ /****************************************************************/
+ /* Decoding total zeros as in section 9.2.3, table 9.7 */
+ /****************************************************************/
+ {
+ UWORD32 u4_index;
+ const UWORD8 (*ppu1_total_zero_lkup)[64] =
+ (const UWORD8 (*)[64])gau1_ih264d_table_total_zero_2to10;
+
+ NEXTBITS(u4_index, u4_bitstream_offset, pu4_bitstrm_buf, 6);
+ u4_total_zeroes = ppu1_total_zero_lkup[u4_total_coeff - 2][u4_index];
+
+ FLUSHBITS(u4_bitstream_offset, (u4_total_zeroes >> 4));
+ u4_total_zeroes &= 0xf;
+ }
+
+ /**************************************************************/
+ /* Decode the runs and form the coefficient buffer */
+ /**************************************************************/
+ {
+ const UWORD8 *pu1_table_runbefore;
+ UWORD32 u4_run;
+ WORD32 k;
+ UWORD32 u4_scan_pos = u4_total_coeff + u4_total_zeroes - 1 + u4_isdc;
+ WORD32 u4_zeroes_left = u4_total_zeroes;
+ k = u4_total_coeff - 1;
+
+ /**************************************************************/
+ /* Decoding Runs Begin for zeros left > 6 */
+ /**************************************************************/
+ while((u4_zeroes_left > 6) && k)
+ {
+ UWORD32 u4_code;
+
+ NEXTBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 3);
+
+ if(u4_code != 0)
+ {
+ FLUSHBITS(u4_bitstream_offset, 3);
+ u4_run = (7 - u4_code);
+ }
+ else
+ {
+
+ FIND_ONE_IN_STREAM_LEN(u4_code, u4_bitstream_offset,
+ pu4_bitstrm_buf, 11);
+ u4_run = (4 + u4_code);
+ }
+
+ SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos);
+ *pi2_coeff_data++ = i2_level_arr[k--];
+ u4_zeroes_left -= u4_run;
+ u4_scan_pos -= (u4_run + 1);
+ }
+
+ /**************************************************************/
+ /* Decoding Runs for 0 < zeros left <=6 */
+ /**************************************************************/
+ pu1_table_runbefore = (UWORD8 *)gau1_ih264d_table_run_before;
+ while((u4_zeroes_left > 0) && k)
+ {
+ UWORD32 u4_code;
+ NEXTBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 3);
+
+ u4_code = pu1_table_runbefore[u4_code + (u4_zeroes_left << 3)];
+ u4_run = u4_code >> 2;
+
+ FLUSHBITS(u4_bitstream_offset, (u4_code & 0x03));
+
+ SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos);
+ *pi2_coeff_data++ = i2_level_arr[k--];
+ u4_zeroes_left -= u4_run;
+ u4_scan_pos -= (u4_run + 1);
+ }
+ /**************************************************************/
+ /* Decoding Runs End */
+ /**************************************************************/
+
+ /**************************************************************/
+ /* Copy the remaining coefficients */
+ /**************************************************************/
+ if(u4_zeroes_left < 0)
+ return -1;
+ while(k >= 0)
+ {
+
+ SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos);
+ *pi2_coeff_data++ = i2_level_arr[k--];
+ u4_scan_pos--;
+ }
+ }
+
+ {
+ WORD32 offset;
+ offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_4x4;
+ offset = ALIGN4(offset);
+ ps_dec->pv_parse_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_parse_tu_coeff_data + offset);
+ }
+
+ ps_bitstrm->u4_ofst = u4_bitstream_offset;
+ return 0;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_cavlc_4x4res_block_totalcoeff_11to16 */
+/* */
+/* Description : This function does cavlc decoding of 4x4 block residual */
+/* coefficient when total coeffs are greater than ten. */
+/* Parsing is done as defined in section 9.2.2 and 9.2.3 of */
+/* the H264 standard. */
+/* */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : <Does it use any global variables?> */
+/* Processing : <Describe how the function operates - include algorithm */
+/* description> */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 25 09 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 ih264d_cavlc_4x4res_block_totalcoeff_11to16(UWORD32 u4_isdc,
+ UWORD32 u4_total_coeff_trail_one, /*!<TotalCoefficients<<16+trailingones*/
+ dec_bit_stream_t *ps_bitstrm )
+{
+ UWORD32 u4_total_zeroes;
+ WORD32 i;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst;
+ UWORD32 u4_trailing_ones = u4_total_coeff_trail_one & 0xFFFF;
+ UWORD32 u4_total_coeff = u4_total_coeff_trail_one >> 16;
+ WORD16 i2_level_arr[16];
+
+ tu_sblk4x4_coeff_data_t *ps_tu_4x4;
+ WORD16 *pi2_coeff_data;
+ dec_struct_t *ps_dec = (dec_struct_t *)ps_bitstrm->pv_codec_handle;
+
+ ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
+ ps_tu_4x4->u2_sig_coeff_map = 0;
+ pi2_coeff_data = &ps_tu_4x4->ai2_level[0];
+
+ i = u4_total_coeff - 1;
+ if(u4_trailing_ones)
+ {
+ /*********************************************************************/
+ /* Decode Trailing Ones */
+ /* read the sign of T1's and put them in level array */
+ /*********************************************************************/
+ UWORD32 u4_signs, u4_cnt = u4_trailing_ones;
+ WORD16 (*ppi2_trlone_lkup)[3] =
+ (WORD16 (*)[3])gai2_ih264d_trailing_one_level;
+ WORD16 *pi2_trlone_lkup;
+
+ GETBITS(u4_signs, u4_bitstream_offset, pu4_bitstrm_buf, u4_cnt);
+
+ pi2_trlone_lkup = ppi2_trlone_lkup[(1 << u4_cnt) - 2 + u4_signs];
+
+ while(u4_cnt--)
+ i2_level_arr[i--] = *pi2_trlone_lkup++;
+ }
+
+ /****************************************************************/
+ /* Decoding Levels Begins */
+ /****************************************************************/
+ if(i >= 0)
+ {
+ /****************************************************************/
+ /* First level is decoded outside the loop as it has lot of */
+ /* special cases. */
+ /****************************************************************/
+ UWORD32 u4_lev_suffix, u4_suffix_len, u4_lev_suffix_size;
+ UWORD16 u2_lev_code, u2_abs_value;
+ UWORD32 u4_lev_prefix;
+
+ if(u4_trailing_ones < 3)
+ {
+ /*********************************************************/
+ /* u4_suffix_len = 1 */
+ /*********************************************************/
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset,
+ pu4_bitstrm_buf);
+
+ u4_lev_suffix_size =
+ (15 <= u4_lev_prefix) ? (u4_lev_prefix - 3) : 1;
+
+ GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf,
+ u4_lev_suffix_size);
+ u2_lev_code = 2 + (MIN(u4_lev_prefix,15) << 1) + u4_lev_suffix;
+
+ //HP_LEVEL_PREFIX
+ if(16 <= u4_lev_prefix)
+ {
+ u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096);
+ }
+ }
+ else
+ {
+ /*********************************************************/
+ /*u4_suffix_len = 0 */
+ /*********************************************************/
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset,
+ pu4_bitstrm_buf);
+
+ /*********************************************************/
+ /* Special decoding case when trailing ones are 3 */
+ /*********************************************************/
+ u2_lev_code = MIN(15, u4_lev_prefix);
+
+ u2_lev_code += (3 == u4_trailing_ones) ? 0 : (2);
+
+ if(14 == u4_lev_prefix)
+ u4_lev_suffix_size = 4;
+ else if(15 <= u4_lev_prefix)
+ {
+ u2_lev_code += 15;
+ u4_lev_suffix_size = (u4_lev_prefix - 3);
+ }
+ else
+ u4_lev_suffix_size = 0;
+
+ //HP_LEVEL_PREFIX
+ if(16 <= u4_lev_prefix)
+ {
+ u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096);
+ }
+ if(u4_lev_suffix_size)
+ {
+ GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf,
+ u4_lev_suffix_size);
+ u2_lev_code += u4_lev_suffix;
+ }
+ }
+
+ u2_abs_value = (u2_lev_code + 2) >> 1;
+ /*********************************************************/
+ /* If Level code is odd, level is negative else positive */
+ /*********************************************************/
+ i2_level_arr[i--] = (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value;
+
+ u4_suffix_len = (u2_abs_value > 3) ? 2 : 1;
+
+ /*********************************************************/
+ /* Now loop over the remaining levels */
+ /*********************************************************/
+ while(i >= 0)
+ {
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset,
+ pu4_bitstrm_buf);
+
+ u4_lev_suffix_size =
+ (15 <= u4_lev_prefix) ?
+ (u4_lev_prefix - 3) : u4_suffix_len;
+
+ /*********************************************************/
+ /* Compute level code using prefix and suffix */
+ /*********************************************************/
+ GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf,
+ u4_lev_suffix_size);
+ u2_lev_code = (MIN(15,u4_lev_prefix) << u4_suffix_len)
+ + u4_lev_suffix;
+
+ //HP_LEVEL_PREFIX
+ if(16 <= u4_lev_prefix)
+ {
+ u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096);
+ }
+ u2_abs_value = (u2_lev_code + 2) >> 1;
+
+ /*********************************************************/
+ /* If Level code is odd, level is negative else positive */
+ /*********************************************************/
+ i2_level_arr[i--] =
+ (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value;
+
+ /*********************************************************/
+ /* Increment suffix length if required */
+ /*********************************************************/
+ u4_suffix_len +=
+ (u4_suffix_len < 6) ?
+ (u2_abs_value
+ > (3
+ << (u4_suffix_len
+ - 1))) :
+ 0;
+ }
+
+ /****************************************************************/
+ /* Decoding Levels Ends */
+ /****************************************************************/
+ }
+
+ if(u4_total_coeff < (16 - u4_isdc))
+ {
+ UWORD32 u4_index;
+ const UWORD8 (*ppu1_total_zero_lkup)[16] =
+ (const UWORD8 (*)[16])gau1_ih264d_table_total_zero_11to15;
+
+ NEXTBITS(u4_index, u4_bitstream_offset, pu4_bitstrm_buf, 4);
+ u4_total_zeroes = ppu1_total_zero_lkup[u4_total_coeff - 11][u4_index];
+
+ FLUSHBITS(u4_bitstream_offset, (u4_total_zeroes >> 4));
+ u4_total_zeroes &= 0xf;
+ }
+ else
+ u4_total_zeroes = 0;
+
+ /**************************************************************/
+ /* Decode the runs and form the coefficient buffer */
+ /**************************************************************/
+ {
+ const UWORD8 *pu1_table_runbefore;
+ UWORD32 u4_run;
+ WORD32 k;
+ UWORD32 u4_scan_pos = u4_total_coeff + u4_total_zeroes - 1 + u4_isdc;
+ WORD32 u4_zeroes_left = u4_total_zeroes;
+ k = u4_total_coeff - 1;
+
+ /**************************************************************/
+ /* Decoding Runs for 0 < zeros left <=6 */
+ /**************************************************************/
+ pu1_table_runbefore = (UWORD8 *)gau1_ih264d_table_run_before;
+ while((u4_zeroes_left > 0) && k)
+ {
+ UWORD32 u4_code;
+ NEXTBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 3);
+
+ u4_code = pu1_table_runbefore[u4_code + (u4_zeroes_left << 3)];
+ u4_run = u4_code >> 2;
+
+ FLUSHBITS(u4_bitstream_offset, (u4_code & 0x03));
+ SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos);
+ *pi2_coeff_data++ = i2_level_arr[k--];
+ u4_zeroes_left -= u4_run;
+ u4_scan_pos -= (u4_run + 1);
+ }
+ /**************************************************************/
+ /* Decoding Runs End */
+ /**************************************************************/
+
+ /**************************************************************/
+ /* Copy the remaining coefficients */
+ /**************************************************************/
+ if(u4_zeroes_left < 0)
+ return -1;
+ while(k >= 0)
+ {
+ SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos);
+ *pi2_coeff_data++ = i2_level_arr[k--];
+ u4_scan_pos--;
+ }
+ }
+
+ {
+ WORD32 offset;
+ offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_4x4;
+ offset = ALIGN4(offset);
+ ps_dec->pv_parse_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_parse_tu_coeff_data + offset);
+ }
+
+ ps_bitstrm->u4_ofst = u4_bitstream_offset;
+ return 0;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_rest_of_residual_cav_chroma_dc_block */
+/* */
+/* Description : This function does the Cavlc parsing of the bitstream */
+/* for chroma dc coefficients */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : <Does it use any global variables?> */
+/* Processing : <Describe how the function operates - include algorithm */
+/* description> */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 15 09 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+void ih264d_rest_of_residual_cav_chroma_dc_block(UWORD32 u4_total_coeff_trail_one,
+ dec_bit_stream_t *ps_bitstrm)
+{
+ UWORD32 u4_total_zeroes;
+ WORD16 i;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst;
+ UWORD32 u4_trailing_ones = u4_total_coeff_trail_one & 0xFFFF;
+ UWORD32 u4_total_coeff = u4_total_coeff_trail_one >> 16;
+ WORD16 i2_level_arr[4];
+
+ tu_sblk4x4_coeff_data_t *ps_tu_4x4;
+ WORD16 *pi2_coeff_data;
+ dec_struct_t *ps_dec = (dec_struct_t *)ps_bitstrm->pv_codec_handle;
+
+ ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
+ ps_tu_4x4->u2_sig_coeff_map = 0;
+ pi2_coeff_data = &ps_tu_4x4->ai2_level[0];
+
+ i = u4_total_coeff - 1;
+ if(u4_trailing_ones)
+ {
+ /*********************************************************************/
+ /* Decode Trailing Ones */
+ /* read the sign of T1's and put them in level array */
+ /*********************************************************************/
+ UWORD32 u4_signs, u4_cnt = u4_trailing_ones;
+ WORD16 (*ppi2_trlone_lkup)[3] =
+ (WORD16 (*)[3])gai2_ih264d_trailing_one_level;
+ WORD16 *pi2_trlone_lkup;
+
+ GETBITS(u4_signs, u4_bitstream_offset, pu4_bitstrm_buf, u4_cnt);
+
+ pi2_trlone_lkup = ppi2_trlone_lkup[(1 << u4_cnt) - 2 + u4_signs];
+
+ while(u4_cnt--)
+ i2_level_arr[i--] = *pi2_trlone_lkup++;
+ }
+
+ /****************************************************************/
+ /* Decoding Levels Begins */
+ /****************************************************************/
+ if(i >= 0)
+ {
+ /****************************************************************/
+ /* First level is decoded outside the loop as it has lot of */
+ /* special cases. */
+ /****************************************************************/
+ UWORD32 u4_lev_suffix, u4_suffix_len, u4_lev_suffix_size;
+ UWORD16 u2_lev_code, u2_abs_value;
+ UWORD32 u4_lev_prefix;
+
+ /***************************************************************/
+ /* u4_suffix_len = 0, Find leading zeros in next 32 bits */
+ /***************************************************************/
+ FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset,
+ pu4_bitstrm_buf);
+
+ /*********************************************************/
+ /* Special decoding case when trailing ones are 3 */
+ /*********************************************************/
+ u2_lev_code = MIN(15, u4_lev_prefix);
+
+ u2_lev_code += (3 == u4_trailing_ones) ? 0 : (2);
+
+ if(14 == u4_lev_prefix)
+ u4_lev_suffix_size = 4;
+ else if(15 <= u4_lev_prefix)
+ {
+ u2_lev_code += 15;
+ u4_lev_suffix_size = u4_lev_prefix - 3;
+ }
+ else
+ u4_lev_suffix_size = 0;
+
+ //HP_LEVEL_PREFIX
+ if(16 <= u4_lev_prefix)
+ {
+ u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096);
+ }
+ if(u4_lev_suffix_size)
+ {
+ GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf,
+ u4_lev_suffix_size);
+ u2_lev_code += u4_lev_suffix;
+ }
+
+ u2_abs_value = (u2_lev_code + 2) >> 1;
+ /*********************************************************/
+ /* If Level code is odd, level is negative else positive */
+ /*********************************************************/
+ i2_level_arr[i--] = (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value;
+
+ u4_suffix_len = (u2_abs_value > 3) ? 2 : 1;
+
+ /*********************************************************/
+ /* Now loop over the remaining levels */
+ /*********************************************************/
+ while(i >= 0)
+ {
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ FIND_ONE_IN_STREAM_32(u4_lev_prefix, u4_bitstream_offset,
+ pu4_bitstrm_buf);
+
+ u4_lev_suffix_size =
+ (15 <= u4_lev_prefix) ?
+ (u4_lev_prefix - 3) : u4_suffix_len;
+
+ /*********************************************************/
+ /* Compute level code using prefix and suffix */
+ /*********************************************************/
+ GETBITS(u4_lev_suffix, u4_bitstream_offset, pu4_bitstrm_buf,
+ u4_lev_suffix_size);
+ u2_lev_code = (MIN(u4_lev_prefix,15) << u4_suffix_len)
+ + u4_lev_suffix;
+
+ //HP_LEVEL_PREFIX
+ if(16 <= u4_lev_prefix)
+ {
+ u2_lev_code += ((1 << (u4_lev_prefix - 3)) - 4096);
+ }
+ u2_abs_value = (u2_lev_code + 2) >> 1;
+
+ /*********************************************************/
+ /* If Level code is odd, level is negative else positive */
+ /*********************************************************/
+ i2_level_arr[i--] =
+ (u2_lev_code & 1) ? -u2_abs_value : u2_abs_value;
+
+ /*********************************************************/
+ /* Increment suffix length if required */
+ /*********************************************************/
+ u4_suffix_len += (u2_abs_value > (3 << (u4_suffix_len - 1)));
+ }
+
+ /****************************************************************/
+ /* Decoding Levels Ends */
+ /****************************************************************/
+ }
+
+ if(u4_total_coeff < 4)
+ {
+ UWORD32 u4_max_ldz = (4 - u4_total_coeff);
+ FIND_ONE_IN_STREAM_LEN(u4_total_zeroes, u4_bitstream_offset,
+ pu4_bitstrm_buf, u4_max_ldz);
+ }
+ else
+ u4_total_zeroes = 0;
+
+ /**************************************************************/
+ /* Decode the runs and form the coefficient buffer */
+ /**************************************************************/
+ {
+ const UWORD8 *pu1_table_runbefore;
+ UWORD32 u4_run;
+ UWORD32 u4_scan_pos = (u4_total_coeff + u4_total_zeroes - 1);
+ UWORD32 u4_zeroes_left = u4_total_zeroes;
+ i = u4_total_coeff - 1;
+
+ /**************************************************************/
+ /* Decoding Runs for 0 < zeros left <=6 */
+ /**************************************************************/
+ pu1_table_runbefore = (UWORD8 *)gau1_ih264d_table_run_before;
+ while(u4_zeroes_left && i)
+ {
+ UWORD32 u4_code;
+ NEXTBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 3);
+
+ u4_code = pu1_table_runbefore[u4_code + (u4_zeroes_left << 3)];
+ u4_run = u4_code >> 2;
+
+ FLUSHBITS(u4_bitstream_offset, (u4_code & 0x03));
+ SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos);
+ *pi2_coeff_data++ = i2_level_arr[i--];
+ u4_zeroes_left -= u4_run;
+ u4_scan_pos -= (u4_run + 1);
+ }
+ /**************************************************************/
+ /* Decoding Runs End */
+ /**************************************************************/
+
+ /**************************************************************/
+ /* Copy the remaining coefficients */
+ /**************************************************************/
+ while(i >= 0)
+ {
+ SET_BIT(ps_tu_4x4->u2_sig_coeff_map, u4_scan_pos);
+ *pi2_coeff_data++ = i2_level_arr[i--];
+ u4_scan_pos--;
+ }
+ }
+
+ {
+ WORD32 offset;
+ offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_4x4;
+ offset = ALIGN4(offset);
+ ps_dec->pv_parse_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_parse_tu_coeff_data + offset);
+ }
+
+ ps_bitstrm->u4_ofst = u4_bitstream_offset;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : CavlcParsingInvScanInvQuant \endif
+ *
+ * \brief
+ * This function do cavlc parsing of coefficient tokens for any block
+ * type except chromDc and depending
+ * on whenther any coefficients to be parsed calls module
+ * RestOfResidualBlockCavlc.
+ *
+ * \return
+ * Returns total number of non-zero coefficients.
+ *
+ **************************************************************************
+ */
+
+WORD32 ih264d_cavlc_parse4x4coeff_n0to7(WORD16 *pi2_coeff_block,
+ UWORD32 u4_isdc, /* is it a DC block */
+ WORD32 u4_n,
+ dec_struct_t *ps_dec,
+ UWORD32 *pu4_total_coeff)
+{
+ dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst;
+ UWORD32 u4_code, u4_index, u4_ldz;
+ const UWORD16 *pu2_code = (const UWORD16*)gau2_ih264d_code_gx;
+ const UWORD16 *pu2_offset_num_vlc =
+ (const UWORD16 *)gau2_ih264d_offset_num_vlc_tab;
+ UWORD32 u4_offset_num_vlc = pu2_offset_num_vlc[u4_n];
+
+
+ UNUSED(pi2_coeff_block);
+ *pu4_total_coeff = 0;
+ FIND_ONE_IN_STREAM_32(u4_ldz, u4_bitstream_offset, pu4_bitstrm_buf);
+ NEXTBITS(u4_index, u4_bitstream_offset, pu4_bitstrm_buf, 3);
+ u4_index += (u4_ldz << 3);
+ u4_index += u4_offset_num_vlc;
+
+ u4_index = MIN(u4_index, 303);
+ u4_code = pu2_code[u4_index];
+
+ FLUSHBITS(u4_bitstream_offset, (u4_code & 0x03));
+ ps_bitstrm->u4_ofst = u4_bitstream_offset;
+ *pu4_total_coeff = (u4_code >> 4);
+
+ if(*pu4_total_coeff)
+ {
+ UWORD32 u4_trailing_ones, u4_offset, u4_total_coeff_tone;
+ const UWORD8 *pu1_offset =
+ (UWORD8 *)gau1_ih264d_total_coeff_fn_ptr_offset;
+ WORD32 ret;
+ u4_trailing_ones = ((u4_code >> 2) & 0x03);
+ u4_offset = pu1_offset[*pu4_total_coeff - 1];
+ u4_total_coeff_tone = (*pu4_total_coeff << 16) | u4_trailing_ones;
+
+ ret = ps_dec->pf_cavlc_4x4res_block[u4_offset](u4_isdc,
+ u4_total_coeff_tone,
+ ps_bitstrm);
+ if(ret != 0)
+ return ERROR_CAVLC_NUM_COEFF_T;
+ }
+
+ return OK;
+}
+
+WORD32 ih264d_cavlc_parse4x4coeff_n8(WORD16 *pi2_coeff_block,
+ UWORD32 u4_isdc, /* is it a DC block */
+ WORD32 u4_n,
+ dec_struct_t *ps_dec,
+ UWORD32 *pu4_total_coeff)
+{
+
+ dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst;
+ UWORD32 u4_code;
+ UNUSED(u4_n);
+ UNUSED(pi2_coeff_block);
+ GETBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 6);
+ ps_bitstrm->u4_ofst = u4_bitstream_offset;
+ *pu4_total_coeff = 0;
+
+ if(u4_code != 3)
+ {
+ UWORD8 *pu1_offset = (UWORD8 *)gau1_ih264d_total_coeff_fn_ptr_offset;
+ UWORD32 u4_trailing_ones, u4_offset, u4_total_coeff_tone;
+
+ *pu4_total_coeff = (u4_code >> 2) + 1;
+ u4_trailing_ones = u4_code & 0x03;
+ u4_offset = pu1_offset[*pu4_total_coeff - 1];
+ u4_total_coeff_tone = (*pu4_total_coeff << 16) | u4_trailing_ones;
+
+ ps_dec->pf_cavlc_4x4res_block[u4_offset](u4_isdc,
+ u4_total_coeff_tone,
+ ps_bitstrm);
+ }
+
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_cavlc_parse_chroma_dc \endif
+ *
+ * \brief
+ * This function do cavlc parsing of coefficient tokens chromDc block
+ * and depending on whenther any coefficients to be parsed calls module
+ * ih264d_rest_of_residual_cav_chroma_dc_block.
+ *
+ * \return
+ * Returns total number of non-zero coefficients.
+ *
+ **************************************************************************
+ */
+
+void ih264d_cavlc_parse_chroma_dc(dec_mb_info_t *ps_cur_mb_info,
+ WORD16 *pi2_coeff_block,
+ dec_bit_stream_t *ps_bitstrm,
+ UWORD32 u4_scale_u,
+ UWORD32 u4_scale_v,
+ WORD32 i4_mb_inter_inc)
+{
+ UWORD32 u4_total_coeff, u4_trailing_ones, u4_total_coeff_tone, u4_code;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 u4_bitstream_offset = ps_bitstrm->u4_ofst;
+ const UWORD8 *pu1_cav_chromdc = (const UWORD8*)gau1_ih264d_cav_chromdc_vld;
+ UNUSED(i4_mb_inter_inc);
+ /******************************************************************/
+ /* Chroma DC Block for U component */
+ /******************************************************************/
+ NEXTBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 8);
+
+ u4_code = pu1_cav_chromdc[u4_code];
+
+ FLUSHBITS(u4_bitstream_offset, ((u4_code & 0x7) + 1));
+ ps_bitstrm->u4_ofst = u4_bitstream_offset;
+
+ u4_total_coeff = (u4_code >> 5);
+
+ if(u4_total_coeff)
+ {
+ WORD32 i_z0, i_z1, i_z2, i_z3;
+ tu_sblk4x4_coeff_data_t *ps_tu_4x4;
+ dec_struct_t *ps_dec = (dec_struct_t *)ps_bitstrm->pv_codec_handle;
+ WORD16 ai2_dc_coef[4];
+ UWORD8 pu1_inv_scan[4] =
+ { 0, 1, 2, 3 };
+ WORD16 *pi2_coeff_data =
+ (WORD16 *)ps_dec->pv_parse_tu_coeff_data;
+
+ ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
+
+ u4_trailing_ones = ((u4_code >> 3) & 0x3);
+ u4_total_coeff_tone = (u4_total_coeff << 16) | u4_trailing_ones;
+ ih264d_rest_of_residual_cav_chroma_dc_block(u4_total_coeff_tone,
+ ps_bitstrm);
+
+ ai2_dc_coef[0] = 0;
+ ai2_dc_coef[1] = 0;
+ ai2_dc_coef[2] = 0;
+ ai2_dc_coef[3] = 0;
+
+ ih264d_unpack_coeff4x4_dc_4x4blk(ps_tu_4x4,
+ ai2_dc_coef,
+ pu1_inv_scan);
+ /*-------------------------------------------------------------------*/
+ /* Inverse 2x2 transform and scaling of chroma DC */
+ /*-------------------------------------------------------------------*/
+ i_z0 = (ai2_dc_coef[0] + ai2_dc_coef[2]);
+ i_z1 = (ai2_dc_coef[0] - ai2_dc_coef[2]);
+ i_z2 = (ai2_dc_coef[1] - ai2_dc_coef[3]);
+ i_z3 = (ai2_dc_coef[1] + ai2_dc_coef[3]);
+
+ /*-----------------------------------------------------------*/
+ /* Scaling and storing the values back */
+ /*-----------------------------------------------------------*/
+ *pi2_coeff_data++ = ((i_z0 + i_z3) * u4_scale_u) >> 5;
+ *pi2_coeff_data++ = ((i_z0 - i_z3) * u4_scale_u) >> 5;
+ *pi2_coeff_data++ = ((i_z1 + i_z2) * u4_scale_u) >> 5;
+ *pi2_coeff_data++ = ((i_z1 - i_z2) * u4_scale_u) >> 5;
+
+ ps_dec->pv_parse_tu_coeff_data = (void *)pi2_coeff_data;
+
+ SET_BIT(ps_cur_mb_info->u1_yuv_dc_block_flag,1);
+ }
+
+ /******************************************************************/
+ /* Chroma DC Block for V component */
+ /******************************************************************/
+ pi2_coeff_block += 64;
+ u4_bitstream_offset = ps_bitstrm->u4_ofst;
+
+ NEXTBITS(u4_code, u4_bitstream_offset, pu4_bitstrm_buf, 8);
+
+ u4_code = pu1_cav_chromdc[u4_code];
+
+ FLUSHBITS(u4_bitstream_offset, ((u4_code & 0x7) + 1));
+ ps_bitstrm->u4_ofst = u4_bitstream_offset;
+
+ u4_total_coeff = (u4_code >> 5);
+
+ if(u4_total_coeff)
+ {
+ WORD32 i_z0, i_z1, i_z2, i_z3;
+ tu_sblk4x4_coeff_data_t *ps_tu_4x4;
+ dec_struct_t *ps_dec = (dec_struct_t *)ps_bitstrm->pv_codec_handle;
+ WORD16 ai2_dc_coef[4];
+ UWORD8 pu1_inv_scan[4] =
+ { 0, 1, 2, 3 };
+ WORD16 *pi2_coeff_data =
+ (WORD16 *)ps_dec->pv_parse_tu_coeff_data;
+
+ ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
+
+ u4_trailing_ones = ((u4_code >> 3) & 0x3);
+ u4_total_coeff_tone = (u4_total_coeff << 16) | u4_trailing_ones;
+ ih264d_rest_of_residual_cav_chroma_dc_block(u4_total_coeff_tone,
+ ps_bitstrm);
+
+ ai2_dc_coef[0] = 0;
+ ai2_dc_coef[1] = 0;
+ ai2_dc_coef[2] = 0;
+ ai2_dc_coef[3] = 0;
+
+ ih264d_unpack_coeff4x4_dc_4x4blk(ps_tu_4x4,
+ ai2_dc_coef,
+ pu1_inv_scan);
+
+ /*-------------------------------------------------------------------*/
+ /* Inverse 2x2 transform and scaling of chroma DC */
+ /*-------------------------------------------------------------------*/
+ i_z0 = (ai2_dc_coef[0] + ai2_dc_coef[2]);
+ i_z1 = (ai2_dc_coef[0] - ai2_dc_coef[2]);
+ i_z2 = (ai2_dc_coef[1] - ai2_dc_coef[3]);
+ i_z3 = (ai2_dc_coef[1] + ai2_dc_coef[3]);
+
+ /*-----------------------------------------------------------*/
+ /* Scaling and storing the values back */
+ /*-----------------------------------------------------------*/
+ *pi2_coeff_data++ = ((i_z0 + i_z3) * u4_scale_v) >> 5;
+ *pi2_coeff_data++ = ((i_z0 - i_z3) * u4_scale_v) >> 5;
+ *pi2_coeff_data++ = ((i_z1 + i_z2) * u4_scale_v) >> 5;
+ *pi2_coeff_data++ = ((i_z1 - i_z2) * u4_scale_v) >> 5;
+
+ ps_dec->pv_parse_tu_coeff_data = (void *)pi2_coeff_data;
+
+ SET_BIT(ps_cur_mb_info->u1_yuv_dc_block_flag,2);
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_pmb_ref_index_cavlc_range1 */
+/* */
+/* Description : This function does the Cavlc TEV range =1 parsing of */
+/* reference index for a P MB. Range is 1 when */
+/* num_ref_idx_active_minus1 is 0 */
+/* */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : <Does it use any global variables?> */
+/* Processing : <Describe how the function operates - include algorithm */
+/* description> */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 19 09 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+void ih264d_parse_pmb_ref_index_cavlc_range1(UWORD32 u4_num_part, /* Number of partitions in MB */
+ dec_bit_stream_t *ps_bitstrm, /* Pointer to bitstream Structure. */
+ WORD8 *pi1_ref_idx, /* pointer to reference index array */
+ UWORD32 u4_num_ref_idx_active_minus1 /* Not used for range 1 */
+ )
+{
+ UWORD32 u4_i;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 *pu4_bitstream_off = &ps_bitstrm->u4_ofst;
+ UNUSED(u4_num_ref_idx_active_minus1);
+ for(u4_i = 0; u4_i < u4_num_part; u4_i++)
+ {
+ UWORD32 u4_ref_idx;
+ u4_ref_idx = ih264d_tev_range1(pu4_bitstream_off, pu4_bitstrm_buf);
+
+ /* Storing Reference Idx Information */
+ pi1_ref_idx[u4_i] = (WORD8)u4_ref_idx;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_pmb_ref_index_cavlc */
+/* */
+/* Description : This function does the Cavlc TEV range > 1 parsing of */
+/* reference index for a P MB. */
+/* Range > 1 when num_ref_idx_active_minus1 > 0 */
+/* */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : <Does it use any global variables?> */
+/* Processing : <Describe how the function operates - include algorithm */
+/* description> */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 19 09 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 ih264d_parse_pmb_ref_index_cavlc(UWORD32 u4_num_part, /* Number of partitions in MB */
+ dec_bit_stream_t *ps_bitstrm, /* Pointer to bitstream Structure. */
+ WORD8 *pi1_ref_idx, /* pointer to reference index array */
+ UWORD32 u4_num_ref_idx_active_minus1 /* Number of active references - 1 */
+ )
+{
+ UWORD32 u4_i;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 *pu4_bitstream_off = &ps_bitstrm->u4_ofst;
+
+ for(u4_i = 0; u4_i < u4_num_part; u4_i++)
+ {
+ UWORD32 u4_ref_idx;
+//Inlined ih264d_uev
+ UWORD32 u4_bitstream_offset = *pu4_bitstream_off;
+ UWORD32 u4_word, u4_ldz;
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
+ u4_ldz = CLZ(u4_word);
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
+ *pu4_bitstream_off = u4_bitstream_offset;
+ u4_ref_idx = ((1 << u4_ldz) + u4_word - 1);
+//Inlined ih264d_uev
+
+ if(u4_ref_idx > u4_num_ref_idx_active_minus1)
+ return ERROR_REF_IDX;
+
+ /* Storing Reference Idx Information */
+ pi1_ref_idx[u4_i] = (WORD8)u4_ref_idx;
+ }
+ return OK;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_bmb_ref_index_cavlc_range1 */
+/* */
+/* Description : This function does the Cavlc TEV range =1 parsing of */
+/* reference index for a B MB. Range is 1 when */
+/* num_ref_idx_active_minus1 is 0 */
+/* */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : <Does it use any global variables?> */
+/* Processing : <Describe how the function operates - include algorithm */
+/* description> */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 19 09 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+void ih264d_parse_bmb_ref_index_cavlc_range1(UWORD32 u4_num_part, /* Number of partitions in MB */
+ dec_bit_stream_t *ps_bitstrm, /* Pointer to bitstream Structure. */
+ WORD8 *pi1_ref_idx, /* pointer to reference index array */
+ UWORD32 u4_num_ref_idx_active_minus1 /* Not used for range 1 */
+ )
+{
+ UWORD32 u4_i;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 *pu4_bitstream_off = &ps_bitstrm->u4_ofst;
+ UNUSED(u4_num_ref_idx_active_minus1);
+ for(u4_i = 0; u4_i < u4_num_part; u4_i++)
+ {
+ if(pi1_ref_idx[u4_i] > -1)
+ {
+ UWORD32 u4_ref_idx;
+
+ u4_ref_idx = ih264d_tev_range1(pu4_bitstream_off, pu4_bitstrm_buf);
+
+ /* Storing Reference Idx Information */
+ pi1_ref_idx[u4_i] = (WORD8)u4_ref_idx;
+ }
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_bmb_ref_index_cavlc */
+/* */
+/* Description : This function does the Cavlc TEV range > 1 parsing of */
+/* reference index for a B MB. */
+/* Range > 1 when num_ref_idx_active_minus1 > 0 */
+/* */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : <Does it use any global variables?> */
+/* Processing : <Describe how the function operates - include algorithm */
+/* description> */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 19 09 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_parse_bmb_ref_index_cavlc(UWORD32 u4_num_part, /* Number of partitions in MB */
+ dec_bit_stream_t *ps_bitstrm, /* Pointer to bitstream Structure. */
+ WORD8 *pi1_ref_idx, /* pointer to reference index array */
+ UWORD32 u4_num_ref_idx_active_minus1 /* Number of active references - 1 */
+ )
+{
+ UWORD32 u4_i;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 *pu4_bitstream_off = &ps_bitstrm->u4_ofst;
+
+ for(u4_i = 0; u4_i < u4_num_part; u4_i++)
+ {
+ if(pi1_ref_idx[u4_i] > -1)
+ {
+ UWORD32 u4_ref_idx;
+//inlining ih264d_uev
+ UWORD32 u4_bitstream_offset = *pu4_bitstream_off;
+ UWORD32 u4_word, u4_ldz;
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
+ u4_ldz = CLZ(u4_word);
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
+ *pu4_bitstream_off = u4_bitstream_offset;
+ u4_ref_idx = ((1 << u4_ldz) + u4_word - 1);
+//inlining ih264d_uev
+ if(u4_ref_idx > u4_num_ref_idx_active_minus1)
+ return ERROR_REF_IDX;
+
+ /* Storing Reference Idx Information */
+ pi1_ref_idx[u4_i] = (WORD8)u4_ref_idx;
+ }
+ }
+ return OK;
+}
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_cavlc_parse_8x8block_both_available */
+/* */
+/* Description : This function does the residual parsing of 4 subblocks */
+/* in a 8x8 block when both top and left are available */
+/* */
+/* Inputs : pi2_coeff_block : pointer to residual block where */
+/* decoded and inverse scan coefficients are updated */
+/* */
+/* u4_sub_block_strd : indicates the number of sublocks */
+/* in a row. It is 4 for luma and 2 for chroma. */
+/* */
+/* u4_isdc : required to indicate 4x4 parse modules if the */
+/* current Mb is I_16x16/chroma DC coded. */
+/* */
+/* ps_dec : pointer to Decstruct (decoder context) */
+/* */
+/* pu1_top_nnz : top nnz pointer */
+/* */
+/* pu1_left_nnz : left nnz pointer */
+/* */
+/* Globals : No */
+/* Processing : Parsing for four subblocks in unrolled, top and left nnz */
+/* are updated on the fly. csbp is set in accordance to */
+/* decoded numcoeff for the subblock index in raster order */
+/* */
+/* Outputs : The updated residue buffer, nnzs and csbp current block */
+/* */
+/* Returns : Returns the coded sub block pattern csbp for the block */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 09 10 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_cavlc_parse_8x8block_both_available(WORD16 *pi2_coeff_block,
+ UWORD32 u4_sub_block_strd,
+ UWORD32 u4_isdc,
+ dec_struct_t * ps_dec,
+ UWORD8 *pu1_top_nnz,
+ UWORD8 *pu1_left_nnz,
+ UWORD8 u1_tran_form8x8,
+ UWORD8 u1_mb_field_decodingflag,
+ UWORD32 *pu4_csbp)
+{
+ UWORD32 u4_num_coeff, u4_n, u4_subblock_coded;
+ UWORD32 u4_top0, u4_top1;
+ UWORD32 *pu4_dummy;
+ WORD32 (**pf_cavlc_parse4x4coeff)(WORD16 *pi2_coeff_block,
+ UWORD32 u4_isdc,
+ WORD32 u4_n,
+ struct _DecStruct *ps_dec,
+ UWORD32 *pu4_dummy) =
+ ps_dec->pf_cavlc_parse4x4coeff;
+ UWORD32 u4_idx = 0;
+ UWORD8 *puc_temp;
+ WORD32 ret;
+
+ *pu4_csbp = 0;
+ /* need to change the inverse scan matrices here */
+ puc_temp = ps_dec->pu1_inv_scan;
+
+ /*------------------------------------------------------*/
+ /* Residual 4x4 decoding: SubBlock 0 */
+ /*------------------------------------------------------*/
+ if(u1_tran_form8x8)
+ {
+ if(!u1_mb_field_decodingflag)
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[0];
+ }
+ else
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[0];
+ }
+ }
+ u4_n = (pu1_top_nnz[0] + pu1_left_nnz[0] + 1) >> 1;
+ ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
+ u4_n, ps_dec, &u4_num_coeff);
+ if(ret != OK)
+ return ret;
+
+ u4_top0 = u4_num_coeff;
+ u4_subblock_coded = (u4_num_coeff != 0);
+ INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
+
+ /*------------------------------------------------------*/
+ /* Residual 4x4 decoding: SubBlock 1 */
+ /*------------------------------------------------------*/
+ u4_idx++;
+ if(u1_tran_form8x8)
+ {
+ if(!u1_mb_field_decodingflag)
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[1];
+ }
+ else
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[1];
+ }
+ }
+ else
+ {
+ pi2_coeff_block += NUM_COEFFS_IN_4x4BLK;
+ }
+ u4_n = (pu1_top_nnz[1] + u4_num_coeff + 1) >> 1;
+ ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
+ u4_n, ps_dec, &u4_num_coeff);
+ if(ret != OK)
+ return ret;
+
+ u4_top1 = pu1_left_nnz[0] = u4_num_coeff;
+ u4_subblock_coded = (u4_num_coeff != 0);
+ INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
+
+ /*------------------------------------------------------*/
+ /* Residual 4x4 decoding: SubBlock 2 */
+ /*------------------------------------------------------*/
+ u4_idx += (u4_sub_block_strd - 1);
+ if(u1_tran_form8x8)
+ {
+ if(!u1_mb_field_decodingflag)
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[2];
+ }
+ else
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[2];
+ }
+ }
+ else
+ {
+ pi2_coeff_block += ((u4_sub_block_strd - 1) * NUM_COEFFS_IN_4x4BLK);
+ }
+ u4_n = (u4_top0 + pu1_left_nnz[1] + 1) >> 1;
+ ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
+ u4_n, ps_dec, &u4_num_coeff);
+ if(ret != OK)
+ return ret;
+
+ pu1_top_nnz[0] = u4_num_coeff;
+ u4_subblock_coded = (u4_num_coeff != 0);
+ INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
+
+ /*------------------------------------------------------*/
+ /* Residual 4x4 decoding: SubBlock 3 */
+ /*------------------------------------------------------*/
+ u4_idx++;
+ if(u1_tran_form8x8)
+ {
+ if(!u1_mb_field_decodingflag)
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[3];
+ }
+ else
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[3];
+ }
+ }
+ else
+ {
+ pi2_coeff_block += NUM_COEFFS_IN_4x4BLK;
+ }
+ u4_n = (u4_top1 + u4_num_coeff + 1) >> 1;
+ ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
+ u4_n, ps_dec, &u4_num_coeff);
+ if(ret != OK)
+ return ret;
+
+ pu1_top_nnz[1] = pu1_left_nnz[1] = u4_num_coeff;
+ u4_subblock_coded = (u4_num_coeff != 0);
+ INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
+
+ ps_dec->pu1_inv_scan = puc_temp;
+
+ return OK;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_cavlc_parse_8x8block_left_available */
+/* */
+/* Description : This function does the residual parsing of 4 subblocks */
+/* in a 8x8 block when only left is available for block */
+/* */
+/* Inputs : pi2_coeff_block : pointer to residual block where */
+/* decoded and inverse scan coefficients are updated */
+/* */
+/* u4_sub_block_strd : indicates the number of sublocks */
+/* in a row. It is 4 for luma and 2 for chroma. */
+/* */
+/* u4_isdc : required to indicate 4x4 parse modules if the */
+/* current Mb is I_16x16/chroma DC coded. */
+/* */
+/* ps_dec : pointer to Decstruct (decoder context) */
+/* */
+/* pu1_top_nnz : top nnz pointer */
+/* */
+/* pu1_left_nnz : left nnz pointer */
+/* */
+/* Globals : No */
+/* Processing : Parsing for four subblocks in unrolled, top and left nnz */
+/* are updated on the fly. csbp is set in accordance to */
+/* decoded numcoeff for the subblock index in raster order */
+/* */
+/* Outputs : The updated residue buffer, nnzs and csbp current block */
+/* */
+/* Returns : Returns the coded sub block pattern csbp for the block */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 09 10 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_cavlc_parse_8x8block_left_available(WORD16 *pi2_coeff_block,
+ UWORD32 u4_sub_block_strd,
+ UWORD32 u4_isdc,
+ dec_struct_t * ps_dec,
+ UWORD8 *pu1_top_nnz,
+ UWORD8 *pu1_left_nnz,
+ UWORD8 u1_tran_form8x8,
+ UWORD8 u1_mb_field_decodingflag,
+ UWORD32 *pu4_csbp)
+{
+ UWORD32 u4_num_coeff, u4_n, u4_subblock_coded;
+ UWORD32 u4_top0, u4_top1;
+ UWORD32 *pu4_dummy;
+ WORD32 (**pf_cavlc_parse4x4coeff)(WORD16 *pi2_coeff_block,
+ UWORD32 u4_isdc,
+ WORD32 u4_n,
+ struct _DecStruct *ps_dec,
+ UWORD32 *pu4_dummy) =
+ ps_dec->pf_cavlc_parse4x4coeff;
+ UWORD32 u4_idx = 0;
+ UWORD8 *puc_temp;
+ WORD32 ret;
+
+ *pu4_csbp = 0;
+ puc_temp = ps_dec->pu1_inv_scan;
+
+ /*------------------------------------------------------*/
+ /* Residual 4x4 decoding: SubBlock 0 */
+ /*------------------------------------------------------*/
+ if(u1_tran_form8x8)
+ {
+ if(!u1_mb_field_decodingflag)
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[0];
+ }
+ else
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[0];
+ }
+ }
+ u4_n = pu1_left_nnz[0];
+ ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
+ u4_n, ps_dec, &u4_num_coeff);
+ if(ret != OK)
+ return ret;
+
+ u4_top0 = u4_num_coeff;
+ u4_subblock_coded = (u4_num_coeff != 0);
+ INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
+
+ /*------------------------------------------------------*/
+ /* Residual 4x4 decoding: SubBlock 1 */
+ /*------------------------------------------------------*/
+ u4_idx++;
+ if(u1_tran_form8x8)
+ {
+ if(!u1_mb_field_decodingflag)
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[1];
+ }
+ else
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[1];
+ }
+ }
+ else
+ {
+ pi2_coeff_block += NUM_COEFFS_IN_4x4BLK;
+ }
+ u4_n = u4_num_coeff;
+ ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
+ u4_n, ps_dec, &u4_num_coeff);
+ if(ret != OK)
+ return ret;
+
+ u4_top1 = pu1_left_nnz[0] = u4_num_coeff;
+ u4_subblock_coded = (u4_num_coeff != 0);
+ INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
+
+ /*------------------------------------------------------*/
+ /* Residual 4x4 decoding: SubBlock 2 */
+ /*------------------------------------------------------*/
+ u4_idx += (u4_sub_block_strd - 1);
+ if(u1_tran_form8x8)
+ {
+ if(!u1_mb_field_decodingflag)
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[2];
+ }
+ else
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[2];
+ }
+ }
+ else
+ {
+ pi2_coeff_block += ((u4_sub_block_strd - 1) * NUM_COEFFS_IN_4x4BLK);
+ }
+ u4_n = (u4_top0 + pu1_left_nnz[1] + 1) >> 1;
+ ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
+ u4_n, ps_dec, &u4_num_coeff);
+ if(ret != OK)
+ return ret;
+
+ pu1_top_nnz[0] = u4_num_coeff;
+ u4_subblock_coded = (u4_num_coeff != 0);
+ INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
+
+ /*------------------------------------------------------*/
+ /* Residual 4x4 decoding: SubBlock 3 */
+ /*------------------------------------------------------*/
+ u4_idx++;
+ if(u1_tran_form8x8)
+ {
+ if(!u1_mb_field_decodingflag)
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[3];
+ }
+ else
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[3];
+ }
+ }
+ else
+ {
+ pi2_coeff_block += NUM_COEFFS_IN_4x4BLK;
+ }
+ u4_n = (u4_top1 + u4_num_coeff + 1) >> 1;
+ ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
+ u4_n, ps_dec, &u4_num_coeff);
+ if(ret != OK)
+ return ret;
+
+ pu1_top_nnz[1] = pu1_left_nnz[1] = u4_num_coeff;
+ u4_subblock_coded = (u4_num_coeff != 0);
+ INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
+
+ ps_dec->pu1_inv_scan = puc_temp;
+
+ return OK;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_cavlc_parse_8x8block_top_available */
+/* */
+/* Description : This function does the residual parsing of 4 subblocks */
+/* in a 8x8 block when only top is available for block */
+/* */
+/* Inputs : pi2_coeff_block : pointer to residual block where */
+/* decoded and inverse scan coefficients are updated */
+/* */
+/* u4_sub_block_strd : indicates the number of sublocks */
+/* in a row. It is 4 for luma and 2 for chroma. */
+/* */
+/* u4_isdc : required to indicate 4x4 parse modules if the */
+/* current Mb is I_16x16/chroma DC coded. */
+/* */
+/* ps_dec : pointer to Decstruct (decoder context) */
+/* */
+/* pu1_top_nnz : top nnz pointer */
+/* */
+/* pu1_left_nnz : left nnz pointer */
+/* */
+/* Globals : No */
+/* Processing : Parsing for four subblocks in unrolled, top and left nnz */
+/* are updated on the fly. csbp is set in accordance to */
+/* decoded numcoeff for the subblock index in raster order */
+/* */
+/* Outputs : The updated residue buffer, nnzs and csbp current block */
+/* */
+/* Returns : Returns the coded sub block pattern csbp for the block */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 09 10 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_cavlc_parse_8x8block_top_available(WORD16 *pi2_coeff_block,
+ UWORD32 u4_sub_block_strd,
+ UWORD32 u4_isdc,
+ dec_struct_t * ps_dec,
+ UWORD8 *pu1_top_nnz,
+ UWORD8 *pu1_left_nnz,
+ UWORD8 u1_tran_form8x8,
+ UWORD8 u1_mb_field_decodingflag,
+ UWORD32 *pu4_csbp)
+{
+ UWORD32 u4_num_coeff, u4_n, u4_subblock_coded;
+ UWORD32 u4_top0, u4_top1;
+ UWORD32 *pu4_dummy;
+ WORD32 (**pf_cavlc_parse4x4coeff)(WORD16 *pi2_coeff_block,
+ UWORD32 u4_isdc,
+ WORD32 u4_n,
+ struct _DecStruct *ps_dec,
+ UWORD32 *pu4_dummy) =
+ ps_dec->pf_cavlc_parse4x4coeff;
+ UWORD32 u4_idx = 0;
+ UWORD8 *puc_temp;
+ WORD32 ret;
+
+ *pu4_csbp = 0;
+ puc_temp = ps_dec->pu1_inv_scan;
+
+ /*------------------------------------------------------*/
+ /* Residual 4x4 decoding: SubBlock 0 */
+ /*------------------------------------------------------*/
+ if(u1_tran_form8x8)
+ {
+ if(!u1_mb_field_decodingflag)
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[0];
+ }
+ else
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[0];
+ }
+ }
+ u4_n = pu1_top_nnz[0];
+ ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
+ u4_n, ps_dec, &u4_num_coeff);
+ if(ret != OK)
+ return ret;
+
+ u4_top0 = u4_num_coeff;
+ u4_subblock_coded = (u4_num_coeff != 0);
+ INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
+
+ /*------------------------------------------------------*/
+ /* Residual 4x4 decoding: SubBlock 1 */
+ /*------------------------------------------------------*/
+ u4_idx++;
+ if(u1_tran_form8x8)
+ {
+ if(!u1_mb_field_decodingflag)
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[1];
+ }
+ else
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[1];
+ }
+ }
+ else
+ {
+ pi2_coeff_block += NUM_COEFFS_IN_4x4BLK;
+ }
+ u4_n = (pu1_top_nnz[1] + u4_num_coeff + 1) >> 1;
+ ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
+ u4_n, ps_dec, &u4_num_coeff);
+ if(ret != OK)
+ return ret;
+
+ u4_top1 = pu1_left_nnz[0] = u4_num_coeff;
+ u4_subblock_coded = (u4_num_coeff != 0);
+ INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
+
+ /*------------------------------------------------------*/
+ /* Residual 4x4 decoding: SubBlock 2 */
+ /*------------------------------------------------------*/
+ u4_idx += (u4_sub_block_strd - 1);
+ if(u1_tran_form8x8)
+ {
+ if(!u1_mb_field_decodingflag)
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[2];
+ }
+ else
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[2];
+ }
+ }
+ else
+ {
+ pi2_coeff_block += ((u4_sub_block_strd - 1) * NUM_COEFFS_IN_4x4BLK);
+ }
+ u4_n = u4_top0;
+ ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
+ u4_n, ps_dec, &u4_num_coeff);
+ if(ret != OK)
+ return ret;
+
+ pu1_top_nnz[0] = u4_num_coeff;
+ u4_subblock_coded = (u4_num_coeff != 0);
+ INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
+
+ /*------------------------------------------------------*/
+ /* Residual 4x4 decoding: SubBlock 3 */
+ /*------------------------------------------------------*/
+ u4_idx++;
+ if(u1_tran_form8x8)
+ {
+ if(!u1_mb_field_decodingflag)
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[3];
+ }
+ else
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[3];
+ }
+ }
+ else
+ {
+ pi2_coeff_block += NUM_COEFFS_IN_4x4BLK;
+ }
+ u4_n = (u4_top1 + u4_num_coeff + 1) >> 1;
+ ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
+ u4_n, ps_dec, &u4_num_coeff);
+ if(ret != OK)
+ return ret;
+
+ pu1_top_nnz[1] = pu1_left_nnz[1] = u4_num_coeff;
+ u4_subblock_coded = (u4_num_coeff != 0);
+ INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
+
+ ps_dec->pu1_inv_scan = puc_temp;
+
+ return OK;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_cavlc_parse_8x8block_none_available */
+/* */
+/* Description : This function does the residual parsing of 4 subblocks */
+/* in a 8x8 block when none of the neigbours are available */
+/* */
+/* Inputs : pi2_coeff_block : pointer to residual block where */
+/* decoded and inverse scan coefficients are updated */
+/* */
+/* u4_sub_block_strd : indicates the number of sublocks */
+/* in a row. It is 4 for luma and 2 for chroma. */
+/* */
+/* u4_isdc : required to indicate 4x4 parse modules if the */
+/* current Mb is I_16x16/chroma DC coded. */
+/* */
+/* ps_dec : pointer to Decstruct (decoder context) */
+/* */
+/* pu1_top_nnz : top nnz pointer */
+/* */
+/* pu1_left_nnz : left nnz pointer */
+/* */
+/* Globals : No */
+/* Processing : Parsing for four subblocks in unrolled, top and left nnz */
+/* are updated on the fly. csbp is set in accordance to */
+/* decoded numcoeff for the subblock index in raster order */
+/* */
+/* Outputs : The updated residue buffer, nnzs and csbp current block */
+/* */
+/* Returns : Returns the coded sub block pattern csbp for the block */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 09 10 2008 Jay Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_cavlc_parse_8x8block_none_available(WORD16 *pi2_coeff_block,
+ UWORD32 u4_sub_block_strd,
+ UWORD32 u4_isdc,
+ dec_struct_t * ps_dec,
+ UWORD8 *pu1_top_nnz,
+ UWORD8 *pu1_left_nnz,
+ UWORD8 u1_tran_form8x8,
+ UWORD8 u1_mb_field_decodingflag,
+ UWORD32 *pu4_csbp)
+{
+ UWORD32 u4_num_coeff, u4_n, u4_subblock_coded;
+ UWORD32 u4_top0, u4_top1;
+ UWORD32 *pu4_dummy;
+ WORD32 (**pf_cavlc_parse4x4coeff)(WORD16 *pi2_coeff_block,
+ UWORD32 u4_isdc,
+ WORD32 u4_n,
+ struct _DecStruct *ps_dec,
+ UWORD32 *pu4_dummy) =
+ ps_dec->pf_cavlc_parse4x4coeff;
+ UWORD32 u4_idx = 0;
+ UWORD8 *puc_temp;
+ WORD32 ret;
+
+ *pu4_csbp = 0;
+ puc_temp = ps_dec->pu1_inv_scan;
+
+ /*------------------------------------------------------*/
+ /* Residual 4x4 decoding: SubBlock 0 */
+ /*------------------------------------------------------*/
+ if(u1_tran_form8x8)
+ {
+ if(!u1_mb_field_decodingflag)
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[0];
+ }
+ else
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[0];
+ }
+ }
+ ret = pf_cavlc_parse4x4coeff[0](pi2_coeff_block, u4_isdc, 0,
+ ps_dec, &u4_num_coeff);
+ if(ret != OK)
+ return ret;
+
+ u4_top0 = u4_num_coeff;
+ u4_subblock_coded = (u4_num_coeff != 0);
+ INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
+
+ /*------------------------------------------------------*/
+ /* Residual 4x4 decoding: SubBlock 1 */
+ /*------------------------------------------------------*/
+ u4_idx++;
+ if(u1_tran_form8x8)
+ {
+ if(!u1_mb_field_decodingflag)
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[1];
+ }
+ else
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[1];
+ }
+ }
+ else
+ {
+ pi2_coeff_block += NUM_COEFFS_IN_4x4BLK;
+ }
+ u4_n = u4_num_coeff;
+ ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
+ u4_n, ps_dec, &u4_num_coeff);
+ if(ret != OK)
+ return ret;
+
+ u4_top1 = pu1_left_nnz[0] = u4_num_coeff;
+ u4_subblock_coded = (u4_num_coeff != 0);
+ INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
+
+ /*------------------------------------------------------*/
+ /* Residual 4x4 decoding: SubBlock 2 */
+ /*------------------------------------------------------*/
+ u4_idx += (u4_sub_block_strd - 1);
+ if(u1_tran_form8x8)
+ {
+ if(!u1_mb_field_decodingflag)
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[2];
+ }
+ else
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[2];
+ }
+ }
+ else
+ {
+ pi2_coeff_block += ((u4_sub_block_strd - 1) * NUM_COEFFS_IN_4x4BLK);
+ }
+ u4_n = u4_top0;
+ ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
+ u4_n, ps_dec, &u4_num_coeff);
+ if(ret != OK)
+ return ret;
+
+ pu1_top_nnz[0] = u4_num_coeff;
+ u4_subblock_coded = (u4_num_coeff != 0);
+ INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
+
+ /*------------------------------------------------------*/
+ /* Residual 4x4 decoding: SubBlock 3 */
+ /*------------------------------------------------------*/
+ u4_idx++;
+ if(u1_tran_form8x8)
+ {
+ if(!u1_mb_field_decodingflag)
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[3];
+ }
+ else
+ {
+ ps_dec->pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[3];
+ }
+ }
+ else
+ {
+ pi2_coeff_block += NUM_COEFFS_IN_4x4BLK;
+ }
+ u4_n = (u4_top1 + u4_num_coeff + 1) >> 1;
+ ret = pf_cavlc_parse4x4coeff[(u4_n > 7)](pi2_coeff_block, u4_isdc,
+ u4_n, ps_dec, &u4_num_coeff);
+ if(ret != OK)
+ return ret;
+
+ pu1_top_nnz[1] = pu1_left_nnz[1] = u4_num_coeff;
+ u4_subblock_coded = (u4_num_coeff != 0);
+ INSERT_BIT(*pu4_csbp, u4_idx, u4_subblock_coded);
+
+ ps_dec->pu1_inv_scan = puc_temp;
+
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_parse_residual4x4_cavlc \endif
+ *
+ * \brief
+ * This function parses CAVLC syntax of a Luma and Chroma AC Residuals.
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+
+WORD32 ih264d_parse_residual4x4_cavlc(dec_struct_t * ps_dec,
+ dec_mb_info_t *ps_cur_mb_info,
+ UWORD8 u1_offset)
+{
+ UWORD8 u1_cbp = ps_cur_mb_info->u1_cbp;
+ UWORD16 ui16_csbp = 0;
+ UWORD32 u4_nbr_avl;
+ WORD16 *pi2_residual_buf;
+
+ UWORD8 u1_is_top_mb_avail;
+ UWORD8 u1_is_left_mb_avail;
+
+ UWORD8 *pu1_top_nnz = ps_cur_mb_info->ps_curmb->pu1_nnz_y;
+ UWORD8 *pu1_left_nnz = ps_dec->pu1_left_nnz_y;
+ WORD16 *pi2_coeff_block = NULL;
+ UWORD32 *pu4_dummy;
+ WORD32 ret;
+
+ WORD32 (**pf_cavlc_parse_8x8block)(WORD16 *pi2_coeff_block,
+ UWORD32 u4_sub_block_strd,
+ UWORD32 u4_isdc,
+ struct _DecStruct *ps_dec,
+ UWORD8 *pu1_top_nnz,
+ UWORD8 *pu1_left_nnz,
+ UWORD8 u1_tran_form8x8,
+ UWORD8 u1_mb_field_decodingflag,
+ UWORD32 *pu4_dummy) = ps_dec->pf_cavlc_parse_8x8block;
+
+
+ {
+ UWORD8 uc_temp = ps_dec->u1_mb_ngbr_availablity;
+ u1_is_top_mb_avail = BOOLEAN(uc_temp & TOP_MB_AVAILABLE_MASK);
+ u1_is_left_mb_avail = BOOLEAN(uc_temp & LEFT_MB_AVAILABLE_MASK);
+ u4_nbr_avl = (u1_is_top_mb_avail << 1) | u1_is_left_mb_avail;
+ }
+
+ ps_cur_mb_info->u1_qp_div6 = ps_dec->u1_qp_y_div6;
+ ps_cur_mb_info->u1_qp_rem6 = ps_dec->u1_qp_y_rem6;
+ ps_cur_mb_info->u1_qpc_div6 = ps_dec->u1_qp_u_div6;
+ ps_cur_mb_info->u1_qpc_rem6 = ps_dec->u1_qp_u_rem6;
+ ps_cur_mb_info->u1_qpcr_div6 = ps_dec->u1_qp_v_div6;
+ ps_cur_mb_info->u1_qpcr_rem6 = ps_dec->u1_qp_v_rem6;
+
+ if(u1_cbp & 0xf)
+ {
+ pu1_top_nnz[0] = ps_cur_mb_info->ps_top_mb->pu1_nnz_y[0];
+ pu1_top_nnz[1] = ps_cur_mb_info->ps_top_mb->pu1_nnz_y[1];
+ pu1_top_nnz[2] = ps_cur_mb_info->ps_top_mb->pu1_nnz_y[2];
+ pu1_top_nnz[3] = ps_cur_mb_info->ps_top_mb->pu1_nnz_y[3];
+
+ /*******************************************************************/
+ /* Block 0 residual decoding, check cbp and proceed (subblock = 0) */
+ /*******************************************************************/
+ if(!(u1_cbp & 0x1))
+ {
+ *(UWORD16 *)(pu1_top_nnz) = 0;
+ *(UWORD16 *)(pu1_left_nnz) = 0;
+
+ }
+ else
+ {
+ UWORD32 u4_temp;
+ ret = pf_cavlc_parse_8x8block[u4_nbr_avl](
+ pi2_coeff_block, 4, u1_offset, ps_dec, pu1_top_nnz,
+ pu1_left_nnz, ps_cur_mb_info->u1_tran_form8x8,
+ ps_cur_mb_info->u1_mb_field_decodingflag, &u4_temp);
+ if(ret != OK)
+ return ret;
+ ui16_csbp = u4_temp;
+ }
+
+ /*******************************************************************/
+ /* Block 1 residual decoding, check cbp and proceed (subblock = 2) */
+ /*******************************************************************/
+ if(ps_cur_mb_info->u1_tran_form8x8)
+ {
+ pi2_coeff_block += 64;
+ }
+ else
+ {
+ pi2_coeff_block += (2 * NUM_COEFFS_IN_4x4BLK);
+ }
+
+ if(!(u1_cbp & 0x2))
+ {
+ *(UWORD16 *)(pu1_top_nnz + 2) = 0;
+ *(UWORD16 *)(pu1_left_nnz) = 0;
+ }
+ else
+ {
+ UWORD32 u4_temp = (u4_nbr_avl | 0x1);
+ ret = pf_cavlc_parse_8x8block[u4_temp](
+ pi2_coeff_block, 4, u1_offset, ps_dec,
+ (pu1_top_nnz + 2), pu1_left_nnz,
+ ps_cur_mb_info->u1_tran_form8x8,
+ ps_cur_mb_info->u1_mb_field_decodingflag, &u4_temp);
+ if(ret != OK)
+ return ret;
+ ui16_csbp |= (u4_temp << 2);
+ }
+
+ /*******************************************************************/
+ /* Block 2 residual decoding, check cbp and proceed (subblock = 8) */
+ /*******************************************************************/
+ if(ps_cur_mb_info->u1_tran_form8x8)
+ {
+ pi2_coeff_block += 64;
+ }
+ else
+ {
+ pi2_coeff_block += (6 * NUM_COEFFS_IN_4x4BLK);
+ }
+
+ if(!(u1_cbp & 0x4))
+ {
+ *(UWORD16 *)(pu1_top_nnz) = 0;
+ *(UWORD16 *)(pu1_left_nnz + 2) = 0;
+ }
+ else
+ {
+ UWORD32 u4_temp = (u4_nbr_avl | 0x2);
+ ret = pf_cavlc_parse_8x8block[u4_temp](
+ pi2_coeff_block, 4, u1_offset, ps_dec, pu1_top_nnz,
+ (pu1_left_nnz + 2), ps_cur_mb_info->u1_tran_form8x8,
+ ps_cur_mb_info->u1_mb_field_decodingflag, &u4_temp);
+ if(ret != OK)
+ return ret;
+ ui16_csbp |= (u4_temp << 8);
+ }
+
+ /*******************************************************************/
+ /* Block 3 residual decoding, check cbp and proceed (subblock = 10)*/
+ /*******************************************************************/
+ if(ps_cur_mb_info->u1_tran_form8x8)
+ {
+ pi2_coeff_block += 64;
+ }
+ else
+ {
+ pi2_coeff_block += (2 * NUM_COEFFS_IN_4x4BLK);
+ }
+
+ if(!(u1_cbp & 0x8))
+ {
+ *(UWORD16 *)(pu1_top_nnz + 2) = 0;
+ *(UWORD16 *)(pu1_left_nnz + 2) = 0;
+ }
+ else
+ {
+ UWORD32 u4_temp;
+ ret = pf_cavlc_parse_8x8block[0x3](
+ pi2_coeff_block, 4, u1_offset, ps_dec,
+ (pu1_top_nnz + 2), (pu1_left_nnz + 2),
+ ps_cur_mb_info->u1_tran_form8x8,
+ ps_cur_mb_info->u1_mb_field_decodingflag, &u4_temp);
+ if(ret != OK)
+ return ret;
+ ui16_csbp |= (u4_temp << 10);
+ }
+ }
+ else
+ {
+ *(UWORD32 *)(pu1_top_nnz) = 0;
+ *(UWORD32 *)(pu1_left_nnz) = 0;
+ }
+
+ ps_cur_mb_info->u2_luma_csbp = ui16_csbp;
+ ps_cur_mb_info->ps_curmb->u2_luma_csbp = ui16_csbp;
+
+ {
+ UWORD16 u2_chroma_csbp = 0;
+ ps_cur_mb_info->u2_chroma_csbp = 0;
+ pu1_top_nnz = ps_cur_mb_info->ps_curmb->pu1_nnz_uv;
+ pu1_left_nnz = ps_dec->pu1_left_nnz_uv;
+
+ u1_cbp >>= 4;
+ /*--------------------------------------------------------------------*/
+ /* if Chroma Component not present OR no ac values present */
+ /* Set the values of N to zero */
+ /*--------------------------------------------------------------------*/
+ if(u1_cbp == CBPC_ALLZERO || u1_cbp == CBPC_ACZERO)
+ {
+ *(UWORD32 *)(pu1_top_nnz) = 0;
+ *(UWORD32 *)(pu1_left_nnz) = 0;
+ }
+
+ if(u1_cbp == CBPC_ALLZERO)
+ {
+ return (0);
+ }
+ /*--------------------------------------------------------------------*/
+ /* Decode Chroma DC values */
+ /*--------------------------------------------------------------------*/
+ {
+ WORD32 u4_scale_u;
+ WORD32 u4_scale_v;
+ WORD32 i4_mb_inter_inc;
+ u4_scale_u = ps_dec->pu2_quant_scale_u[0] << ps_dec->u1_qp_u_div6;
+ u4_scale_v = ps_dec->pu2_quant_scale_v[0] << ps_dec->u1_qp_v_div6;
+ i4_mb_inter_inc = (!((ps_cur_mb_info->ps_curmb->u1_mb_type == I_4x4_MB)
+ || (ps_cur_mb_info->ps_curmb->u1_mb_type == I_16x16_MB)))
+ * 3;
+
+ if(ps_dec->s_high_profile.u1_scaling_present)
+ {
+ u4_scale_u *=
+ ps_dec->s_high_profile.i2_scalinglist4x4[i4_mb_inter_inc
+ + 1][0];
+ u4_scale_v *=
+ ps_dec->s_high_profile.i2_scalinglist4x4[i4_mb_inter_inc
+ + 2][0];
+
+ }
+ else
+ {
+ u4_scale_u <<= 4;
+ u4_scale_v <<= 4;
+ }
+
+ ih264d_cavlc_parse_chroma_dc(ps_cur_mb_info,pi2_coeff_block, ps_dec->ps_bitstrm,
+ u4_scale_u, u4_scale_v,
+ i4_mb_inter_inc);
+ }
+
+ if(u1_cbp == CBPC_ACZERO)
+ return (0);
+
+ pu1_top_nnz[0] = ps_cur_mb_info->ps_top_mb->pu1_nnz_uv[0];
+ pu1_top_nnz[1] = ps_cur_mb_info->ps_top_mb->pu1_nnz_uv[1];
+ pu1_top_nnz[2] = ps_cur_mb_info->ps_top_mb->pu1_nnz_uv[2];
+ pu1_top_nnz[3] = ps_cur_mb_info->ps_top_mb->pu1_nnz_uv[3];
+ /*--------------------------------------------------------------------*/
+ /* Decode Chroma AC values */
+ /*--------------------------------------------------------------------*/
+ {
+ UWORD32 u4_temp;
+ /*****************************************************************/
+ /* U Block residual decoding, check cbp and proceed (subblock=0)*/
+ /*****************************************************************/
+ ret = pf_cavlc_parse_8x8block[u4_nbr_avl](
+ pi2_coeff_block, 2, 1, ps_dec, pu1_top_nnz,
+ pu1_left_nnz, 0, 0, &u4_temp);
+ if(ret != OK)
+ return ret;
+ u2_chroma_csbp = u4_temp;
+
+ pi2_coeff_block += MB_CHROM_SIZE;
+ /*****************************************************************/
+ /* V Block residual decoding, check cbp and proceed (subblock=1)*/
+ /*****************************************************************/
+ ret = pf_cavlc_parse_8x8block[u4_nbr_avl](pi2_coeff_block, 2, 1,
+ ps_dec,
+ (pu1_top_nnz + 2),
+ (pu1_left_nnz + 2), 0,
+ 0, &u4_temp);
+ if(ret != OK)
+ return ret;
+ u2_chroma_csbp |= (u4_temp << 4);
+ }
+
+ ps_cur_mb_info->u2_chroma_csbp = u2_chroma_csbp;
+ }
+ return OK;
+}
diff --git a/decoder/ih264d_parse_cavlc.h b/decoder/ih264d_parse_cavlc.h
new file mode 100755
index 0000000..06105a3
--- /dev/null
+++ b/decoder/ih264d_parse_cavlc.h
@@ -0,0 +1,165 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef _IH264D_PARSE_CAVLC_H_
+#define _IH264D_PARSE_CAVLC_H_
+/*!
+ **************************************************************************
+ * \file ih264d_parse_cavlc.h
+ *
+ * \brief
+ * Declaration of UVLC and CAVLC functions
+ *
+ * \date
+ * 18/12/2002
+ *
+ * \author AI
+ **************************************************************************
+ */
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_bitstrm.h"
+#include "ih264d_structs.h"
+#include "ih264d_cabac.h"
+
+enum cavlcTableNum
+{
+ tableTotalZeroOffset,
+ tableTotalZero,
+ tableRunBefore,
+ codeGx,
+ chromTab,
+ offsetNumVlcTab
+};
+
+WORD32 ih264d_uvlc(dec_bit_stream_t *ps_bitstrm,
+ UWORD32 u4_range,
+ UWORD32 *pi_bitstrm_ofst,
+ UWORD8 u1_flag,
+ UWORD32 u4_bitstrm_ofst,
+ UWORD32 *pi_bitstrm_buf);
+
+UWORD32 ih264d_uev(UWORD32 *pu4_bitstrm_ofst, UWORD32 *pu4_bitstrm_buf);
+
+WORD32 ih264d_sev(UWORD32 *pu4_bitstrm_ofst, UWORD32 *pu4_bitstrm_buf);
+
+UWORD32 ih264d_tev_range1(UWORD32 *pu4_bitstrm_ofst,
+ UWORD32 *pu4_bitstrm_buf);
+
+UWORD8 RestOfResidualBlockCavlc(WORD16 *pi2_coeff_block,
+ UWORD32 u1_ofst_is_dc_max_coef_scale_fact,
+ UWORD32 u4_total_coeff_trail_one,
+ dec_bit_stream_t *ps_bitstrm,
+ UWORD8 *pu1_invscan);
+
+WORD32 ih264d_cavlc_4x4res_block_totalcoeff_1( UWORD32 u4_isdc,
+ UWORD32 u4_total_coeff_trail_one,
+ dec_bit_stream_t *ps_bitstrm);
+
+WORD32 ih264d_cavlc_4x4res_block_totalcoeff_2to10(UWORD32 u4_isdc,
+ UWORD32 u4_total_coeff_trail_one,
+ dec_bit_stream_t *ps_bitstrm);
+
+WORD32 ih264d_cavlc_4x4res_block_totalcoeff_11to16(UWORD32 u4_isdc,
+ UWORD32 u4_total_coeff_trail_one,
+ dec_bit_stream_t *ps_bitstrm);
+
+WORD32 ih264d_cavlc_parse4x4coeff_n0to7(WORD16 *pi2_coeff_block,
+ UWORD32 u4_isdc,
+ WORD32 u4_n,
+ dec_struct_t *ps_dec,
+ UWORD32 *pu4_total_coeff);
+
+WORD32 ih264d_cavlc_parse4x4coeff_n8(WORD16 *pi2_coeff_block,
+ UWORD32 u4_isdc,
+ WORD32 u4_n,
+ dec_struct_t *ps_dec,
+ UWORD32 *pu4_total_coeff);
+
+void ih264d_cavlc_parse_chroma_dc(dec_mb_info_t *ps_cur_mb_info,
+ WORD16 *pi2_coeff_block,
+ dec_bit_stream_t *ps_bitstrm,
+ UWORD32 u4_scale_u,
+ UWORD32 u4_scale_v,
+ WORD32 i4_mb_inter_inc);
+
+WORD32 ih264d_cavlc_parse_8x8block_none_available(WORD16 *pi2_coeff_block,
+ UWORD32 u4_sub_block_strd,
+ UWORD32 u4_isdc,
+ dec_struct_t * ps_dec,
+ UWORD8 *pu1_top_nnz,
+ UWORD8 *pu1_left_nnz,
+ UWORD8 u1_tran_form8x8,
+ UWORD8 u1_mb_field_decodingflag,
+ UWORD32 *pu4_csbp);
+
+WORD32 ih264d_cavlc_parse_8x8block_left_available(WORD16 *pi2_coeff_block,
+ UWORD32 u4_sub_block_strd,
+ UWORD32 u4_isdc,
+ dec_struct_t * ps_dec,
+ UWORD8 *pu1_top_nnz,
+ UWORD8 *pu1_left_nnz,
+ UWORD8 u1_tran_form8x8,
+ UWORD8 u1_mb_field_decodingflag,
+ UWORD32 *pu4_csbp);
+
+WORD32 ih264d_cavlc_parse_8x8block_top_available(WORD16 *pi2_coeff_block,
+ UWORD32 u4_sub_block_strd,
+ UWORD32 u4_isdc,
+ dec_struct_t * ps_dec,
+ UWORD8 *pu1_top_nnz,
+ UWORD8 *pu1_left_nnz,
+ UWORD8 u1_tran_form8x8,
+ UWORD8 u1_mb_field_decodingflag,
+ UWORD32 *pu4_csbp);
+
+WORD32 ih264d_cavlc_parse_8x8block_both_available(WORD16 *pi2_coeff_block,
+ UWORD32 u4_sub_block_strd,
+ UWORD32 u4_isdc,
+ dec_struct_t * ps_dec,
+ UWORD8 *pu1_top_nnz,
+ UWORD8 *pu1_left_nnz,
+ UWORD8 u1_tran_form8x8,
+ UWORD8 u1_mb_field_decodingflag,
+ UWORD32 *pu4_csbp);
+
+WORD8 ResidualBlockChromaDC(WORD16 *pi2_level, dec_bit_stream_t *ps_bitstrm);
+
+void ih264d_parse_pmb_ref_index_cavlc_range1(UWORD32 u4_num_part,
+ dec_bit_stream_t *ps_bitstrm,
+ WORD8 *pi1_ref_idx,
+ UWORD32 u4_num_ref_idx_active_minus1);
+
+WORD32 ih264d_parse_pmb_ref_index_cavlc(UWORD32 u4_num_part,
+ dec_bit_stream_t *ps_bitstrm,
+ WORD8 *pi1_ref_idx,
+ UWORD32 u4_num_ref_idx_active_minus1);
+
+void ih264d_parse_bmb_ref_index_cavlc_range1(UWORD32 u4_num_part,
+ dec_bit_stream_t *ps_bitstrm,
+ WORD8 *pi1_ref_idx,
+ UWORD32 u4_num_ref_idx_active_minus1);
+
+WORD32 ih264d_parse_bmb_ref_index_cavlc(UWORD32 u4_num_part,
+ dec_bit_stream_t *ps_bitstrm,
+ WORD8 *pi1_ref_idx,
+ UWORD32 u4_num_ref_idx_active_minus1);
+
+#endif /* _IH264D_PARSE_CAVLC_H_ */
diff --git a/decoder/ih264d_parse_headers.c b/decoder/ih264d_parse_headers.c
new file mode 100755
index 0000000..9458d6b
--- /dev/null
+++ b/decoder/ih264d_parse_headers.c
@@ -0,0 +1,1204 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+ **************************************************************************
+ * \file ih264d_parse_headers.c
+ *
+ * \brief
+ * Contains High level syntax[above slice] parsing routines
+ *
+ * \date
+ * 19/12/2002
+ *
+ * \author AI
+ **************************************************************************
+ */
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_bitstrm.h"
+#include "ih264d_structs.h"
+#include "ih264d_parse_cavlc.h"
+#include "ih264d_defs.h"
+#include "ih264d_defs.h"
+#include "ih264d_defs.h"
+#include "ih264d_parse_slice.h"
+#include "ih264d_tables.h"
+#include "ih264d_utils.h"
+#include "ih264d_nal.h"
+#include "ih264d_deblocking.h"
+
+#include "ih264d_mem_request.h"
+#include "ih264d_debug.h"
+#include "ih264d_error_handler.h"
+#include "ih264d_mb_utils.h"
+#include "ih264d_sei.h"
+#include "ih264d_vui.h"
+#include "ih264d_thread_parse_decode.h"
+#include "ih264d_thread_compute_bs.h"
+#include "ih264d_quant_scaling.h"
+#include "ih264d_defs.h"
+#include "ivd.h"
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_slice_partition */
+/* */
+/* Description : This function is intended to parse and decode slice part */
+/* itions. Currently it's not implemented. Decoder will */
+/* print a message, skips this NAL and continues */
+/* Inputs : ps_dec Decoder parameters */
+/* ps_bitstrm Bitstream */
+/* Globals : None */
+/* Processing : This functionality needs to be implemented */
+/* Outputs : None */
+/* Returns : None */
+/* */
+/* Issues : Not implemented */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 05 2002 NS Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 ih264d_parse_slice_partition(dec_struct_t * ps_dec,
+ dec_bit_stream_t * ps_bitstrm)
+{
+ H264_DEC_DEBUG_PRINT("\nSlice partition not supported");
+ UNUSED(ps_dec);
+ UNUSED(ps_bitstrm);
+ return (0);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_sei */
+/* */
+/* Description : This function is intended to parse and decode SEI */
+/* Currently it's not implemented. Decoder will print a */
+/* message, skips this NAL and continues */
+/* Inputs : ps_dec Decoder parameters */
+/* ps_bitstrm Bitstream */
+/* Globals : None */
+/* Processing : This functionality needs to be implemented */
+/* Outputs : None */
+/* Returns : None */
+/* */
+/* Issues : Not implemented */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 05 2002 NS Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_parse_sei(dec_struct_t * ps_dec, dec_bit_stream_t * ps_bitstrm)
+{
+ UNUSED(ps_dec);
+ UNUSED(ps_bitstrm);
+ return (0);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_filler_data */
+/* */
+/* Description : This function is intended to parse and decode filler */
+/* data NAL. Currently it's not implemented. Decoder will */
+/* print a message, skips this NAL and continues */
+/* Inputs : ps_dec Decoder parameters */
+/* ps_bitstrm Bitstream */
+/* Globals : None */
+/* Processing : This functionality needs to be implemented */
+/* Outputs : None */
+/* Returns : None */
+/* */
+/* Issues : Not implemented */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 05 2002 NS Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_parse_filler_data(dec_struct_t * ps_dec,
+ dec_bit_stream_t * ps_bitstrm)
+{
+ UNUSED(ps_dec);
+ UNUSED(ps_bitstrm);
+ return (0);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_end_of_stream */
+/* */
+/* Description : This function is intended to parse and decode end of */
+/* sequence. Currently it's not implemented. Decoder will */
+/* print a message, skips this NAL and continues */
+/* Inputs : ps_dec Decoder parameters */
+/* Globals : None */
+/* Processing : This functionality needs to be implemented */
+/* Outputs : None */
+/* Returns : None */
+/* */
+/* Issues : Not implemented */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 05 2002 NS Draft */
+/* */
+/*****************************************************************************/
+void ih264d_parse_end_of_stream(dec_struct_t * ps_dec)
+{
+ UNUSED(ps_dec);
+ return;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_parse_pps \endif
+ *
+ * \brief
+ * Decodes Picture Parameter set
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+WORD32 ih264d_parse_pps(dec_struct_t * ps_dec, dec_bit_stream_t * ps_bitstrm)
+{
+ UWORD8 uc_temp;
+ dec_seq_params_t * ps_sps = NULL;
+ dec_pic_params_t * ps_pps = NULL;
+ UWORD32 *pu4_bitstrm_buf = ps_dec->ps_bitstrm->pu4_buffer;
+ UWORD32 *pu4_bitstrm_ofst = &ps_dec->ps_bitstrm->u4_ofst;
+
+ /* Variables used for error resilience checks */
+ UWORD32 u4_temp;
+ WORD32 i_temp;
+
+ /* For High profile related syntax elements */
+ UWORD8 u1_more_data_flag;
+ WORD32 i4_i;
+
+ /*--------------------------------------------------------------------*/
+ /* Decode pic_parameter_set_id and find corresponding pic params */
+ /*--------------------------------------------------------------------*/
+ u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if(u4_temp & MASK_ERR_PIC_SET_ID)
+ return ERROR_INV_SPS_PPS_T;
+ ps_pps = ps_dec->pv_scratch_sps_pps;
+ *ps_pps = ps_dec->ps_pps[u4_temp];
+ ps_pps->u1_pic_parameter_set_id = (WORD8)u4_temp;
+ COPYTHECONTEXT("PPS: pic_parameter_set_id",ps_pps->u1_pic_parameter_set_id);
+
+ /************************************************/
+ /* initilization of High profile syntax element */
+ /************************************************/
+ ps_pps->i4_transform_8x8_mode_flag = 0;
+ ps_pps->i4_pic_scaling_matrix_present_flag = 0;
+
+ /*--------------------------------------------------------------------*/
+ /* Decode seq_parameter_set_id and map it to a seq_parameter_set */
+ /*--------------------------------------------------------------------*/
+ u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if(u4_temp & MASK_ERR_SEQ_SET_ID)
+ return ERROR_INV_SPS_PPS_T;
+ COPYTHECONTEXT("PPS: seq_parameter_set_id",u4_temp);
+ ps_sps = &ps_dec->ps_sps[u4_temp];
+ ps_pps->ps_sps = ps_sps;
+
+ /*--------------------------------------------------------------------*/
+ /* Decode entropy_coding_mode */
+ /*--------------------------------------------------------------------*/
+ ps_pps->u1_entropy_coding_mode = ih264d_get_bit_h264(ps_bitstrm);
+ COPYTHECONTEXT("PPS: entropy_coding_mode_flag",ps_pps->u1_entropy_coding_mode);
+
+ ps_pps->u1_pic_order_present_flag = ih264d_get_bit_h264(ps_bitstrm);
+ COPYTHECONTEXT("PPS: pic_order_present_flag",ps_pps->u1_pic_order_present_flag);
+
+ /*--------------------------------------------------------------------*/
+ /* Decode num_slice_groups_minus1 */
+ /*--------------------------------------------------------------------*/
+ u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf) + 1;
+ if(u4_temp != 1)
+ {
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_FEATURE_UNAVAIL;
+ return i4_error_code;
+ }
+ ps_pps->u1_num_slice_groups = u4_temp;
+ COPYTHECONTEXT("PPS: num_slice_groups_minus1",ps_pps->u1_num_slice_groups -1);
+
+ /*--------------------------------------------------------------------*/
+ /* Other parameter set values */
+ /*--------------------------------------------------------------------*/
+ u4_temp = 1 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if(u4_temp > H264_MAX_REF_IDX)
+ return ERROR_REF_IDX;
+ ps_pps->u1_num_ref_idx_lx_active[0] = u4_temp;
+ COPYTHECONTEXT("PPS: num_ref_idx_l0_active_minus1",
+ ps_pps->u1_num_ref_idx_lx_active[0] - 1);
+
+ u4_temp = 1 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if(u4_temp > H264_MAX_REF_IDX)
+ return ERROR_REF_IDX;
+ ps_pps->u1_num_ref_idx_lx_active[1] = u4_temp;
+ COPYTHECONTEXT("PPS: num_ref_idx_l1_active_minus1",
+ ps_pps->u1_num_ref_idx_lx_active[1] - 1);
+
+ ps_pps->u1_wted_pred_flag = ih264d_get_bit_h264(ps_bitstrm);
+ COPYTHECONTEXT("PPS: weighted prediction u4_flag",ps_pps->u1_wted_pred_flag);
+ uc_temp = ih264d_get_bits_h264(ps_bitstrm, 2);
+ COPYTHECONTEXT("PPS: weighted_bipred_idc",uc_temp);
+ ps_pps->u1_wted_bipred_idc = uc_temp;
+
+ if(ps_pps->u1_wted_bipred_idc > MAX_WEIGHT_BIPRED_IDC)
+ return ERROR_INV_SPS_PPS_T;
+
+ i_temp = 26 + ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+
+ if((i_temp < 0) || (i_temp > 51))
+ return ERROR_INV_RANGE_QP_T;
+
+ ps_pps->u1_pic_init_qp = i_temp;
+ COPYTHECONTEXT("PPS: pic_init_qp_minus26",ps_pps->u1_pic_init_qp - 26);
+
+ i_temp = 26 + ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+
+ if((i_temp < 0) || (i_temp > 51))
+ return ERROR_INV_RANGE_QP_T;
+
+ ps_pps->u1_pic_init_qs = i_temp;
+ COPYTHECONTEXT("PPS: pic_init_qs_minus26",ps_pps->u1_pic_init_qs - 26);
+
+ i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if((i_temp < -12) || (i_temp > 12))
+ return ERROR_INV_RANGE_QP_T;
+ ps_pps->i1_chroma_qp_index_offset = i_temp;
+ COPYTHECONTEXT("PPS: chroma_qp_index_offset",ps_pps->i1_chroma_qp_index_offset);
+
+ /***************************************************************************/
+ /* initialize second_chroma_qp_index_offset to i1_chroma_qp_index_offset if */
+ /* second_chroma_qp_index_offset is not present in bit-ps_bitstrm */
+ /***************************************************************************/
+ ps_pps->i1_second_chroma_qp_index_offset =
+ ps_pps->i1_chroma_qp_index_offset;
+
+ ps_pps->u1_deblocking_filter_parameters_present_flag = ih264d_get_bit_h264(
+ ps_bitstrm);
+ COPYTHECONTEXT("PPS: deblocking_filter_control_present_flag",
+ ps_pps->u1_deblocking_filter_parameters_present_flag);
+ ps_pps->u1_constrained_intra_pred_flag = ih264d_get_bit_h264(ps_bitstrm);
+ COPYTHECONTEXT("PPS: constrained_intra_pred_flag",
+ ps_pps->u1_constrained_intra_pred_flag);
+ ps_pps->u1_redundant_pic_cnt_present_flag = ih264d_get_bit_h264(ps_bitstrm);
+ COPYTHECONTEXT("PPS: redundant_pic_cnt_present_flag",
+ ps_pps->u1_redundant_pic_cnt_present_flag);
+
+ /* High profile related syntax elements */
+ u1_more_data_flag = MORE_RBSP_DATA(ps_bitstrm);
+ if(u1_more_data_flag && (ps_pps->ps_sps->u1_profile_idc == HIGH_PROFILE_IDC))
+ {
+ /* read transform_8x8_mode_flag */
+ ps_pps->i4_transform_8x8_mode_flag = (WORD32)ih264d_get_bit_h264(
+ ps_bitstrm);
+
+ /* read pic_scaling_matrix_present_flag */
+ ps_pps->i4_pic_scaling_matrix_present_flag =
+ (WORD32)ih264d_get_bit_h264(ps_bitstrm);
+
+ if(ps_pps->i4_pic_scaling_matrix_present_flag)
+ {
+ /* read the scaling matrices */
+ for(i4_i = 0;
+ i4_i
+ < (6
+ + (ps_pps->i4_transform_8x8_mode_flag
+ << 1));
+ i4_i++)
+ {
+ ps_pps->u1_pic_scaling_list_present_flag[i4_i] =
+ ih264d_get_bit_h264(ps_bitstrm);
+
+ if(ps_pps->u1_pic_scaling_list_present_flag[i4_i])
+ {
+ if(i4_i < 6)
+ {
+ ih264d_scaling_list(
+ ps_pps->i2_pic_scalinglist4x4[i4_i],
+ 16,
+ &ps_pps->u1_pic_use_default_scaling_matrix_flag[i4_i],
+ ps_bitstrm);
+ }
+ else
+ {
+ ih264d_scaling_list(
+ ps_pps->i2_pic_scalinglist8x8[i4_i - 6],
+ 64,
+ &ps_pps->u1_pic_use_default_scaling_matrix_flag[i4_i],
+ ps_bitstrm);
+ }
+ }
+ }
+ }
+
+ /* read second_chroma_qp_index_offset syntax element */
+ ps_pps->i1_second_chroma_qp_index_offset = ih264d_sev(
+ pu4_bitstrm_ofst, pu4_bitstrm_buf);
+
+ if((ps_pps->i1_second_chroma_qp_index_offset + 12) > 24)
+ return ERROR_INV_RANGE_QP_T;
+ }
+
+ ps_pps->u1_is_valid = TRUE;
+ ps_dec->ps_pps[ps_pps->u1_pic_parameter_set_id] = *ps_pps;
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_parse_sps \endif
+ *
+ * \brief
+ * Decodes Sequence parameter set from the bitstream
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+UWORD32 ih264d_correct_level_idc(UWORD32 u4_level_idc, UWORD32 u4_total_mbs)
+{
+ UWORD32 u4_max_mbs_allowed;
+
+ switch(u4_level_idc)
+ {
+ case H264_LEVEL_1_0:
+ u4_max_mbs_allowed = MAX_MBS_LEVEL_10;
+ break;
+ case H264_LEVEL_1_1:
+ u4_max_mbs_allowed = MAX_MBS_LEVEL_11;
+ break;
+ case H264_LEVEL_1_2:
+ u4_max_mbs_allowed = MAX_MBS_LEVEL_12;
+ break;
+ case H264_LEVEL_1_3:
+ u4_max_mbs_allowed = MAX_MBS_LEVEL_13;
+ break;
+ case H264_LEVEL_2_0:
+ u4_max_mbs_allowed = MAX_MBS_LEVEL_20;
+ break;
+ case H264_LEVEL_2_1:
+ u4_max_mbs_allowed = MAX_MBS_LEVEL_21;
+ break;
+ case H264_LEVEL_2_2:
+ u4_max_mbs_allowed = MAX_MBS_LEVEL_22;
+ break;
+ case H264_LEVEL_3_0:
+ u4_max_mbs_allowed = MAX_MBS_LEVEL_30;
+ break;
+ case H264_LEVEL_3_1:
+ u4_max_mbs_allowed = MAX_MBS_LEVEL_31;
+ break;
+ case H264_LEVEL_3_2:
+ u4_max_mbs_allowed = MAX_MBS_LEVEL_32;
+ break;
+ case H264_LEVEL_4_0:
+ u4_max_mbs_allowed = MAX_MBS_LEVEL_40;
+ break;
+ case H264_LEVEL_4_1:
+ u4_max_mbs_allowed = MAX_MBS_LEVEL_41;
+ break;
+ case H264_LEVEL_4_2:
+ u4_max_mbs_allowed = MAX_MBS_LEVEL_42;
+ break;
+ case H264_LEVEL_5_0:
+ u4_max_mbs_allowed = MAX_MBS_LEVEL_50;
+ break;
+ case H264_LEVEL_5_1:
+ default:
+ u4_max_mbs_allowed = MAX_MBS_LEVEL_51;
+ break;
+
+ }
+
+ /*correct of the level is incorrect*/
+ if(u4_total_mbs > u4_max_mbs_allowed)
+ {
+ if(u4_total_mbs > MAX_MBS_LEVEL_50)
+ u4_level_idc = H264_LEVEL_5_1;
+ else if(u4_total_mbs > MAX_MBS_LEVEL_42)
+ u4_level_idc = H264_LEVEL_5_0;
+ else if(u4_total_mbs > MAX_MBS_LEVEL_41)
+ u4_level_idc = H264_LEVEL_4_2;
+ else if(u4_total_mbs > MAX_MBS_LEVEL_40)
+ u4_level_idc = H264_LEVEL_4_1;
+ else if(u4_total_mbs > MAX_MBS_LEVEL_32)
+ u4_level_idc = H264_LEVEL_4_0;
+ else if(u4_total_mbs > MAX_MBS_LEVEL_31)
+ u4_level_idc = H264_LEVEL_3_2;
+ else if(u4_total_mbs > MAX_MBS_LEVEL_30)
+ u4_level_idc = H264_LEVEL_3_1;
+ else if(u4_total_mbs > MAX_MBS_LEVEL_21)
+ u4_level_idc = H264_LEVEL_3_0;
+ else if(u4_total_mbs > MAX_MBS_LEVEL_20)
+ u4_level_idc = H264_LEVEL_2_1;
+ else if(u4_total_mbs > MAX_MBS_LEVEL_10)
+ u4_level_idc = H264_LEVEL_2_0;
+ }
+
+ return (u4_level_idc);
+
+}
+WORD32 ih264d_parse_sps(dec_struct_t *ps_dec, dec_bit_stream_t *ps_bitstrm)
+{
+ UWORD8 i;
+ dec_seq_params_t *ps_seq = NULL;
+ UWORD8 u1_profile_idc, u1_level_idc, u1_seq_parameter_set_id;
+ UWORD16 i2_max_frm_num;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
+ UWORD8 u1_frm, uc_constraint_set0_flag, uc_constraint_set1_flag;
+
+ UWORD32 u4_temp;
+ WORD32 pic_height_in_map_units_minus1 = 0;
+ UWORD32 u2_pic_wd = 0;
+ UWORD32 u2_pic_ht = 0;
+ UWORD32 u2_frm_wd_y = 0;
+ UWORD32 u2_frm_ht_y = 0;
+ UWORD32 u2_frm_wd_uv = 0;
+ UWORD32 u2_frm_ht_uv = 0;
+ UWORD32 u2_crop_offset_y = 0;
+ UWORD32 u2_crop_offset_uv = 0;
+ WORD32 ret;
+
+ /* High profile related syntax element */
+ WORD32 i4_i;
+ /* G050 */
+ UWORD8 u1_frame_cropping_flag, u1_frame_cropping_rect_left_ofst,
+ u1_frame_cropping_rect_right_ofst,
+ u1_frame_cropping_rect_top_ofst,
+ u1_frame_cropping_rect_bottom_ofst;
+ /* G050 */
+ /*--------------------------------------------------------------------*/
+ /* Decode seq_parameter_set_id and profile and level values */
+ /*--------------------------------------------------------------------*/
+ SWITCHONTRACE;
+ u1_profile_idc = ih264d_get_bits_h264(ps_bitstrm, 8);
+ COPYTHECONTEXT("SPS: profile_idc",u1_profile_idc);
+
+ /* G050 */
+ uc_constraint_set0_flag = ih264d_get_bit_h264(ps_bitstrm);
+ uc_constraint_set1_flag = ih264d_get_bit_h264(ps_bitstrm);
+ ih264d_get_bit_h264(ps_bitstrm);
+
+ /*****************************************************/
+ /* Read 5 bits for uc_constraint_set3_flag (1 bit) */
+ /* and reserved_zero_4bits (4 bits) - Sushant */
+ /*****************************************************/
+ ih264d_get_bits_h264(ps_bitstrm, 5);
+ /* G050 */
+
+ /* Check whether particular profile is suported or not */
+ /* Check whether particular profile is suported or not */
+ if((u1_profile_idc != MAIN_PROFILE_IDC) &&
+
+ (u1_profile_idc != BASE_PROFILE_IDC) &&
+
+ (u1_profile_idc != HIGH_PROFILE_IDC)
+
+ )
+ {
+
+ if((uc_constraint_set1_flag != 1) && (uc_constraint_set0_flag != 1))
+ {
+ if(NULL != ps_dec)
+ {
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_FEATURE_UNAVAIL;
+ return i4_error_code;
+ }
+ else
+ {
+ return (ERROR_FEATURE_UNAVAIL);
+ }
+ }
+ }
+
+ u1_level_idc = ih264d_get_bits_h264(ps_bitstrm, 8);
+
+ /*
+ if(ps_dec->u4_level_at_init < u1_level_idc)
+ {
+ UWORD32 i4_error_code;
+ H264_DEC_DEBUG_PRINT("\nstream has the level more than the one which is set during init\n");
+ i4_error_code = ERROR_ACTUAL_LEVEL_GREATER_THAN_INIT ;
+ return i4_error_code;
+ * Here instead of flagging the error, we could have ignored this error
+ * and went ahead for further decoding, but we are not doing
+ * so because, at least one header should be healthy to do the
+ * decoding, and moreover, it may help to avoid the crashes in the erroneous
+ * streams.
+ *
+
+ }
+ */
+ COPYTHECONTEXT("SPS: u4_level_idc",u1_level_idc);
+
+ u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if(u4_temp & MASK_ERR_SEQ_SET_ID)
+ return ERROR_INV_SPS_PPS_T;
+ u1_seq_parameter_set_id = u4_temp;
+ COPYTHECONTEXT("SPS: seq_parameter_set_id",
+ u1_seq_parameter_set_id);
+
+ /*--------------------------------------------------------------------*/
+ /* Find an seq param entry in seqparam array of decStruct */
+ /*--------------------------------------------------------------------*/
+
+ ps_seq = ps_dec->pv_scratch_sps_pps;
+ *ps_seq = ps_dec->ps_sps[u1_seq_parameter_set_id];
+ ps_seq->u1_profile_idc = u1_profile_idc;
+ ps_seq->u1_level_idc = u1_level_idc;
+ ps_seq->u1_seq_parameter_set_id = u1_seq_parameter_set_id;
+
+ /*******************************************************************/
+ /* Initializations for high profile - Sushant */
+ /*******************************************************************/
+ ps_seq->i4_chroma_format_idc = 1;
+ ps_seq->i4_bit_depth_luma_minus8 = 0;
+ ps_seq->i4_bit_depth_chroma_minus8 = 0;
+ ps_seq->i4_qpprime_y_zero_transform_bypass_flag = 0;
+ ps_seq->i4_seq_scaling_matrix_present_flag = 0;
+ if(u1_profile_idc == HIGH_PROFILE_IDC)
+ {
+
+ /* reading chroma_format_idc */
+ ps_seq->i4_chroma_format_idc = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+
+ /* Monochrome is not supported */
+ if(ps_seq->i4_chroma_format_idc != 1)
+ {
+ return ERROR_INV_SPS_PPS_T;
+ }
+
+ /* reading bit_depth_luma_minus8 */
+ ps_seq->i4_bit_depth_luma_minus8 = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+
+ if(ps_seq->i4_bit_depth_luma_minus8 != 0)
+ {
+ return ERROR_INV_SPS_PPS_T;
+ }
+
+ /* reading bit_depth_chroma_minus8 */
+ ps_seq->i4_bit_depth_chroma_minus8 = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+
+ if(ps_seq->i4_bit_depth_chroma_minus8 != 0)
+ {
+ return ERROR_INV_SPS_PPS_T;
+ }
+
+ /* reading qpprime_y_zero_transform_bypass_flag */
+ ps_seq->i4_qpprime_y_zero_transform_bypass_flag =
+ (WORD32)ih264d_get_bit_h264(ps_bitstrm);
+
+ if(ps_seq->i4_qpprime_y_zero_transform_bypass_flag != 0)
+ {
+ return ERROR_INV_SPS_PPS_T;
+ }
+
+ /* reading seq_scaling_matrix_present_flag */
+ ps_seq->i4_seq_scaling_matrix_present_flag =
+ (WORD32)ih264d_get_bit_h264(ps_bitstrm);
+
+ if(ps_seq->i4_seq_scaling_matrix_present_flag)
+ {
+ for(i4_i = 0; i4_i < 8; i4_i++)
+ {
+ ps_seq->u1_seq_scaling_list_present_flag[i4_i] =
+ ih264d_get_bit_h264(ps_bitstrm);
+
+ /* initialize u1_use_default_scaling_matrix_flag[i4_i] to zero */
+ /* before calling scaling list */
+ ps_seq->u1_use_default_scaling_matrix_flag[i4_i] = 0;
+
+ if(ps_seq->u1_seq_scaling_list_present_flag[i4_i])
+ {
+ if(i4_i < 6)
+ {
+ ih264d_scaling_list(
+ ps_seq->i2_scalinglist4x4[i4_i],
+ 16,
+ &ps_seq->u1_use_default_scaling_matrix_flag[i4_i],
+ ps_bitstrm);
+ }
+ else
+ {
+ ih264d_scaling_list(
+ ps_seq->i2_scalinglist8x8[i4_i - 6],
+ 64,
+ &ps_seq->u1_use_default_scaling_matrix_flag[i4_i],
+ ps_bitstrm);
+ }
+ }
+ }
+ }
+ }
+ /*--------------------------------------------------------------------*/
+ /* Decode MaxFrameNum */
+ /*--------------------------------------------------------------------*/
+ u4_temp = 4 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if(u4_temp > MAX_BITS_IN_FRAME_NUM)
+ {
+ return ERROR_INV_SPS_PPS_T;
+ }
+ ps_seq->u1_bits_in_frm_num = u4_temp;
+ COPYTHECONTEXT("SPS: log2_max_frame_num_minus4",
+ (ps_seq->u1_bits_in_frm_num - 4));
+
+ i2_max_frm_num = (1 << (ps_seq->u1_bits_in_frm_num));
+ ps_seq->u2_u4_max_pic_num_minus1 = i2_max_frm_num - 1;
+ /*--------------------------------------------------------------------*/
+ /* Decode picture order count and related values */
+ /*--------------------------------------------------------------------*/
+ u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+
+ if(u4_temp > MAX_PIC_ORDER_CNT_TYPE)
+ {
+ return ERROR_INV_POC_TYPE_T;
+ }
+ ps_seq->u1_pic_order_cnt_type = u4_temp;
+ COPYTHECONTEXT("SPS: pic_order_cnt_type",ps_seq->u1_pic_order_cnt_type);
+
+ ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle = 1;
+ if(ps_seq->u1_pic_order_cnt_type == 0)
+ {
+ u4_temp = 4 + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if(u4_temp > MAX_BITS_IN_POC_LSB)
+ {
+ return ERROR_INV_SPS_PPS_T;
+ }
+ ps_seq->u1_log2_max_pic_order_cnt_lsb_minus = u4_temp;
+ ps_seq->i4_max_pic_order_cntLsb = (1 << u4_temp);
+ COPYTHECONTEXT("SPS: log2_max_pic_order_cnt_lsb_minus4",(u4_temp - 4));
+ }
+ else if(ps_seq->u1_pic_order_cnt_type == 1)
+ {
+ ps_seq->u1_delta_pic_order_always_zero_flag = ih264d_get_bit_h264(
+ ps_bitstrm);
+ COPYTHECONTEXT("SPS: delta_pic_order_always_zero_flag",
+ ps_seq->u1_delta_pic_order_always_zero_flag);
+
+ ps_seq->i4_ofst_for_non_ref_pic = ih264d_sev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ COPYTHECONTEXT("SPS: offset_for_non_ref_pic",
+ ps_seq->i4_ofst_for_non_ref_pic);
+
+ ps_seq->i4_ofst_for_top_to_bottom_field = ih264d_sev(
+ pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ COPYTHECONTEXT("SPS: offset_for_top_to_bottom_field",
+ ps_seq->i4_ofst_for_top_to_bottom_field);
+
+ u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if(u4_temp > 255)
+ return ERROR_INV_SPS_PPS_T;
+ ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle = u4_temp;
+ COPYTHECONTEXT("SPS: num_ref_frames_in_pic_order_cnt_cycle",
+ ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle);
+
+ for(i = 0; i < ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle; i++)
+ {
+ ps_seq->i4_ofst_for_ref_frame[i] = ih264d_sev(
+ pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ COPYTHECONTEXT("SPS: offset_for_ref_frame",
+ ps_seq->i4_ofst_for_ref_frame[i]);
+ }
+ }
+
+ u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+
+ if((u4_temp > H264_MAX_REF_PICS))
+ {
+ return ERROR_NUM_REF;
+ }
+ ps_seq->u1_num_ref_frames = u4_temp;
+ COPYTHECONTEXT("SPS: num_ref_frames",ps_seq->u1_num_ref_frames);
+
+ ps_seq->u1_gaps_in_frame_num_value_allowed_flag = ih264d_get_bit_h264(
+ ps_bitstrm);
+ COPYTHECONTEXT("SPS: gaps_in_frame_num_value_allowed_flag",
+ ps_seq->u1_gaps_in_frame_num_value_allowed_flag);
+
+ /*--------------------------------------------------------------------*/
+ /* Decode FrameWidth and FrameHeight and related values */
+ /*--------------------------------------------------------------------*/
+ ps_seq->u2_frm_wd_in_mbs = 1
+ + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ COPYTHECONTEXT("SPS: pic_width_in_mbs_minus1",
+ ps_seq->u2_frm_wd_in_mbs - 1);
+ u2_pic_wd = (ps_seq->u2_frm_wd_in_mbs << 4);
+
+ pic_height_in_map_units_minus1 = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ ps_seq->u2_frm_ht_in_mbs = 1 + pic_height_in_map_units_minus1;
+
+ u2_pic_ht = (ps_seq->u2_frm_ht_in_mbs << 4);
+
+ /*--------------------------------------------------------------------*/
+ /* Get the value of MaxMbAddress and Number of bits needed for it */
+ /*--------------------------------------------------------------------*/
+ ps_seq->u2_max_mb_addr = (ps_seq->u2_frm_wd_in_mbs
+ * ps_seq->u2_frm_ht_in_mbs) - 1;
+
+ ps_seq->u2_total_num_of_mbs = ps_seq->u2_max_mb_addr + 1;
+
+ ps_seq->u1_level_idc = ih264d_correct_level_idc(
+ u1_level_idc, ps_seq->u2_total_num_of_mbs);
+
+ u1_frm = ih264d_get_bit_h264(ps_bitstrm);
+ ps_seq->u1_frame_mbs_only_flag = u1_frm;
+
+ COPYTHECONTEXT("SPS: frame_mbs_only_flag", u1_frm);
+
+ if(!u1_frm)
+ {
+ u2_pic_ht <<= 1;
+ ps_seq->u1_mb_aff_flag = ih264d_get_bit_h264(ps_bitstrm);
+ COPYTHECONTEXT("SPS: mb_adaptive_frame_field_flag",
+ ps_seq->u1_mb_aff_flag);
+
+ }
+ else
+ ps_seq->u1_mb_aff_flag = 0;
+
+ {
+ WORD32 frame_height_in_mbs = (2 - ps_seq->u1_frame_mbs_only_flag)
+ * (pic_height_in_map_units_minus1 + 1);
+ UWORD32 wdth = (ps_seq->u2_frm_wd_in_mbs) << 4;
+ UWORD32 hght = (frame_height_in_mbs) << 4;
+
+ if((u2_pic_wd < H264_MIN_FRAME_WIDTH)
+ || (u2_pic_wd > ps_dec->u4_width_at_init))
+ {
+ ivd_video_decode_op_t *ps_out;
+ /*set width and height in decode output structure*/
+ ps_out = (ivd_video_decode_op_t *)ps_dec->pv_dec_out;
+ ps_out->u4_pic_wd = u2_pic_wd;
+ ps_out->u4_pic_ht = u2_pic_ht;
+
+ return IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED;
+ }
+
+ if((u2_pic_ht < H264_MIN_FRAME_HEIGHT)
+ || (((0 != ps_seq->u1_frame_mbs_only_flag)
+ && (u2_pic_ht * u2_pic_wd
+ > ps_dec->u4_height_at_init
+ * ps_dec->u4_width_at_init))
+ || ((0 == ps_seq->u1_frame_mbs_only_flag)
+ && (ALIGN32(u2_pic_ht)
+ * u2_pic_wd
+ > ALIGN32(ps_dec->u4_height_at_init)
+ * ps_dec->u4_width_at_init))))
+ {
+ ivd_video_decode_op_t *ps_out;
+ /*set width and height in decode output structure*/
+ ps_out = (ivd_video_decode_op_t *)ps_dec->pv_dec_out;
+ ps_out->u4_pic_wd = u2_pic_wd;
+ ps_out->u4_pic_ht = u2_pic_ht;
+
+ return IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED;
+ }
+
+
+
+
+ }
+
+ ps_seq->u1_direct_8x8_inference_flag = ih264d_get_bit_h264(ps_bitstrm);
+
+ COPYTHECONTEXT("SPS: direct_8x8_inference_flag",
+ ps_seq->u1_direct_8x8_inference_flag);
+
+ /* G050 */
+ u1_frame_cropping_flag = ih264d_get_bit_h264(ps_bitstrm);
+ COPYTHECONTEXT("SPS: frame_cropping_flag",u1_frame_cropping_flag);
+
+ if(u1_frame_cropping_flag)
+ {
+ u1_frame_cropping_rect_left_ofst = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ COPYTHECONTEXT("SPS: frame_cropping_rect_left_offset",
+ u1_frame_cropping_rect_left_ofst);
+ u1_frame_cropping_rect_right_ofst = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ COPYTHECONTEXT("SPS: frame_cropping_rect_right_offset",
+ u1_frame_cropping_rect_right_ofst);
+ u1_frame_cropping_rect_top_ofst = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ COPYTHECONTEXT("SPS: frame_cropping_rect_top_offset",
+ u1_frame_cropping_rect_top_ofst);
+ u1_frame_cropping_rect_bottom_ofst = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ COPYTHECONTEXT("SPS: frame_cropping_rect_bottom_offset",
+ u1_frame_cropping_rect_bottom_ofst);
+ }
+ /* G050 */
+
+ ps_seq->u1_vui_parameters_present_flag = ih264d_get_bit_h264(ps_bitstrm);
+ COPYTHECONTEXT("SPS: vui_parameters_present_flag",
+ ps_seq->u1_vui_parameters_present_flag);
+
+ u2_frm_wd_y = u2_pic_wd + (UWORD8)(PAD_LEN_Y_H << 1);
+ if(1 == ps_dec->u4_share_disp_buf)
+ {
+ if(ps_dec->u4_app_disp_width > u2_frm_wd_y)
+ u2_frm_wd_y = ps_dec->u4_app_disp_width;
+ }
+
+ u2_frm_ht_y = u2_pic_ht + (UWORD8)(PAD_LEN_Y_V << 2);
+ u2_frm_wd_uv = u2_pic_wd + (UWORD8)(PAD_LEN_UV_H << 2);
+ u2_frm_wd_uv = MAX(u2_frm_wd_uv, u2_frm_wd_y);
+
+ u2_frm_ht_uv = (u2_pic_ht >> 1) + (UWORD8)(PAD_LEN_UV_V << 2);
+ u2_frm_ht_uv = MAX(u2_frm_ht_uv, (u2_frm_ht_y >> 1));
+
+
+ /* Calculate display picture width, height and start u4_ofst from YUV420 */
+ /* pictute buffers as per cropping information parsed above */
+ {
+ UWORD16 u2_rgt_ofst = 0;
+ UWORD16 u2_lft_ofst = 0;
+ UWORD16 u2_top_ofst = 0;
+ UWORD16 u2_btm_ofst = 0;
+ UWORD8 u1_frm_mbs_flag;
+ UWORD8 u1_vert_mult_factor;
+ WORD32 i4_cropped_ht, i4_cropped_wd;
+
+ if(u1_frame_cropping_flag)
+ {
+ /* Calculate right and left u4_ofst for cropped picture */
+ u2_rgt_ofst = u1_frame_cropping_rect_right_ofst << 1;
+ u2_lft_ofst = u1_frame_cropping_rect_left_ofst << 1;
+
+ /* Know frame MBs only u4_flag */
+ u1_frm_mbs_flag = (1 == ps_seq->u1_frame_mbs_only_flag);
+
+ /* Simplify the vertical u4_ofst calculation from field/frame */
+ u1_vert_mult_factor = (2 - u1_frm_mbs_flag);
+
+ /* Calculate bottom and top u4_ofst for cropped picture */
+ u2_btm_ofst = (u1_frame_cropping_rect_bottom_ofst
+ << u1_vert_mult_factor);
+ u2_top_ofst = (u1_frame_cropping_rect_top_ofst
+ << u1_vert_mult_factor);
+ }
+
+ /* Calculate u4_ofst from start of YUV 420 picture buffer to start of*/
+ /* cropped picture buffer */
+ u2_crop_offset_y = (u2_frm_wd_y * u2_top_ofst) + (u2_lft_ofst);
+ u2_crop_offset_uv = (u2_frm_wd_uv * (u2_top_ofst >> 1))
+ + (u2_lft_ofst >> 1) * YUV420SP_FACTOR;
+ /* Calculate the display picture width and height based on crop */
+ /* information */
+ i4_cropped_ht = u2_pic_ht - (u2_btm_ofst + u2_top_ofst);
+ i4_cropped_wd = u2_pic_wd - (u2_rgt_ofst + u2_lft_ofst);
+
+ if((i4_cropped_ht < MB_SIZE) || (i4_cropped_wd < MB_SIZE))
+ {
+ return ERROR_INV_SPS_PPS_T;
+ }
+
+ if((3 == ps_dec->i4_header_decoded) && (ps_dec->u2_pic_wd != u2_pic_wd))
+ {
+ ps_dec->u1_res_changed = 1;
+ return IVD_RES_CHANGED;
+ }
+ if((3 == ps_dec->i4_header_decoded) && (ps_dec->u2_pic_ht != u2_pic_ht))
+ {
+ ps_dec->u1_res_changed = 1;
+ return IVD_RES_CHANGED;
+ }
+
+ ps_dec->u2_disp_height = i4_cropped_ht;
+
+ ps_dec->u2_disp_width = i4_cropped_wd;
+
+ }
+
+ ps_seq->u1_is_valid = TRUE;
+
+ if(1 == ps_seq->u1_vui_parameters_present_flag)
+ {
+ ret = ih264d_parse_vui_parametres(&ps_seq->s_vui, ps_bitstrm);
+ if(ret != OK)
+ return ret;
+ }
+
+ /*
+ * Code Add to check for display width.
+ * This has to be at the end of the SPS parsing, so everything gets
+ * parsed and the error will not affect decoding.
+ * */
+ if((0 != ps_dec->u4_app_disp_width)
+ && (ps_dec->u4_app_disp_width < ps_dec->u2_pic_wd))
+ {
+ ps_dec->u4_app_disp_width = ps_dec->u2_pic_wd;
+ return ERROR_DISP_WIDTH_RESET_TO_PIC_WIDTH;
+ }
+
+
+
+ ps_dec->u2_pic_wd = u2_pic_wd;
+ ps_dec->u2_pic_ht = u2_pic_ht;
+
+ /* Added temporarily to give pic height and width as display height */
+ /* and width in case some cropping errors occur` */
+ /*ps_dec->u2_disp_height = ps_dec->u2_pic_ht;
+ ps_dec->u2_disp_width = ps_dec->u2_pic_wd;*/
+
+ /* Determining the Width and Height of Frame from that of Picture */
+
+ ps_dec->u2_frm_wd_y = u2_frm_wd_y;
+ ps_dec->u2_frm_ht_y = u2_frm_ht_y;
+
+ ps_dec->u2_frm_wd_uv = u2_frm_wd_uv;
+ ps_dec->u2_frm_ht_uv = u2_frm_ht_uv;
+ ps_dec->s_pad_mgr.u1_pad_len_y_v = (UWORD8)(PAD_LEN_Y_V << (1 - u1_frm));
+ ps_dec->s_pad_mgr.u1_pad_len_cr_v = (UWORD8)(PAD_LEN_UV_V << (1 - u1_frm));
+
+ ps_dec->u2_frm_wd_in_mbs = ps_seq->u2_frm_wd_in_mbs;
+ ps_dec->u2_frm_ht_in_mbs = ps_seq->u2_frm_ht_in_mbs;
+
+ ps_dec->u2_crop_offset_y = u2_crop_offset_y;
+ ps_dec->u2_crop_offset_uv = u2_crop_offset_uv;
+
+ ps_dec->ps_sps[u1_seq_parameter_set_id] = *ps_seq;
+
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_parse_end_of_sequence \endif
+ *
+ * \brief
+ * Decodes End of Sequence.
+ *
+ * \param ps_bitstrm : Pointer to bit ps_bitstrm containing the NAL unit
+ *
+ * \return
+ * 0 on Success and error code otherwise
+ **************************************************************************
+ */
+WORD32 ih264d_parse_end_of_sequence(dec_struct_t * ps_dec)
+{
+ WORD32 ret;
+
+ ret = ih264d_end_of_pic_processing(ps_dec);
+ return ret;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : AcessUnitDelimiterRbsp \endif
+ *
+ * \brief
+ * Decodes AcessUnitDelimiterRbsp.
+ *
+ * \param ps_bitstrm : Pointer to bit ps_bitstrm containing the NAL unit
+ *
+ * \return
+ * 0 on Success and error code otherwise
+ **************************************************************************
+ */
+
+WORD32 ih264d_access_unit_delimiter_rbsp(dec_struct_t * ps_dec)
+{
+ UWORD8 u1_primary_pic_type;
+ u1_primary_pic_type = ih264d_get_bits_h264(ps_dec->ps_bitstrm, 3);
+ switch(u1_primary_pic_type)
+ {
+ case I_PIC:
+ case SI_PIC:
+ case ISI_PIC:
+ ps_dec->ps_dec_err_status->u1_pic_aud_i = PIC_TYPE_I;
+ break;
+ default:
+ ps_dec->ps_dec_err_status->u1_pic_aud_i = PIC_TYPE_UNKNOWN;
+ }
+ return (0);
+}
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_parse_nal_unit \endif
+ *
+ * \brief
+ * Decodes NAL unit
+ *
+ * \return
+ * 0 on Success and error code otherwise
+ **************************************************************************
+ */
+
+WORD32 ih264d_parse_nal_unit(iv_obj_t *dec_hdl,
+ ivd_video_decode_op_t *ps_dec_op,
+ UWORD8 *pu1_buf,
+ UWORD32 u4_length)
+{
+
+ dec_bit_stream_t *ps_bitstrm;
+
+
+ dec_struct_t *ps_dec = (dec_struct_t *)dec_hdl->pv_codec_handle;
+ ivd_video_decode_ip_t *ps_dec_in =
+ (ivd_video_decode_ip_t *)ps_dec->pv_dec_in;
+ dec_slice_params_t * ps_cur_slice = ps_dec->ps_cur_slice;
+ UWORD8 u1_first_byte, u1_nal_ref_idc;
+ UWORD8 u1_nal_unit_type;
+ WORD32 i_status = OK;
+ ps_bitstrm = ps_dec->ps_bitstrm;
+
+ if(pu1_buf)
+ {
+ if(u4_length)
+ {
+ ps_dec_op->u4_frame_decoded_flag = 0;
+ ih264d_process_nal_unit(ps_dec->ps_bitstrm, pu1_buf,
+ u4_length);
+
+ SWITCHOFFTRACE;
+ u1_first_byte = ih264d_get_bits_h264(ps_bitstrm, 8);
+
+ if(NAL_FORBIDDEN_BIT(u1_first_byte))
+ {
+ H264_DEC_DEBUG_PRINT("\nForbidden bit set in Nal Unit, Let's try\n");
+ }
+ u1_nal_unit_type = NAL_UNIT_TYPE(u1_first_byte);
+ ps_dec->u1_nal_unit_type = u1_nal_unit_type;
+ u1_nal_ref_idc = (UWORD8)(NAL_REF_IDC(u1_first_byte));
+ //Skip all NALUs if SPS and PPS are not decoded
+ switch(u1_nal_unit_type)
+ {
+ case SLICE_DATA_PARTITION_A_NAL:
+ case SLICE_DATA_PARTITION_B_NAL:
+ case SLICE_DATA_PARTITION_C_NAL:
+ if(!ps_dec->i4_decode_header)
+ ih264d_parse_slice_partition(ps_dec, ps_bitstrm);
+
+ break;
+
+ case IDR_SLICE_NAL:
+ case SLICE_NAL:
+
+ /* ! */
+ DEBUG_THREADS_PRINTF("Decoding a slice NAL\n");
+ if(!ps_dec->i4_decode_header)
+ {
+ if(ps_dec->i4_header_decoded == 3)
+ {
+ /* ! */
+ ps_dec->u4_slice_start_code_found = 1;
+
+ ih264d_rbsp_to_sodb(ps_dec->ps_bitstrm);
+
+ i_status = ih264d_parse_decode_slice(
+ (UWORD8)(u1_nal_unit_type
+ == IDR_SLICE_NAL),
+ u1_nal_ref_idc, ps_dec);
+
+ if(i_status != OK)
+ return i_status;
+ }
+ else
+ {
+ H264_DEC_DEBUG_PRINT(
+ "\nSlice NAL Supplied but no header has been supplied\n");
+ }
+ }
+ break;
+
+ case SEI_NAL:
+ if(!ps_dec->i4_decode_header)
+ {
+ ih264d_rbsp_to_sodb(ps_dec->ps_bitstrm);
+ i_status = ih264d_parse_sei_message(ps_dec, ps_bitstrm);
+ if(i_status != OK)
+ return i_status;
+ ih264d_parse_sei(ps_dec, ps_bitstrm);
+ }
+ break;
+ case SEQ_PARAM_NAL:
+ /* ! */
+ ih264d_rbsp_to_sodb(ps_dec->ps_bitstrm);
+ i_status = ih264d_parse_sps(ps_dec, ps_bitstrm);
+ if(i_status == ERROR_INV_SPS_PPS_T)
+ return i_status;
+ if(!i_status)
+ ps_dec->i4_header_decoded |= 0x1;
+ break;
+
+ case PIC_PARAM_NAL:
+ /* ! */
+ ih264d_rbsp_to_sodb(ps_dec->ps_bitstrm);
+ i_status = ih264d_parse_pps(ps_dec, ps_bitstrm);
+ if(i_status == ERROR_INV_SPS_PPS_T)
+ return i_status;
+ if(!i_status)
+ ps_dec->i4_header_decoded |= 0x2;
+ break;
+ case ACCESS_UNIT_DELIMITER_RBSP:
+ if(!ps_dec->i4_decode_header)
+ {
+ ih264d_access_unit_delimiter_rbsp(ps_dec);
+ }
+ break;
+ //Let us ignore the END_OF_SEQ_RBSP NAL and decode even after this NAL
+ case END_OF_STREAM_RBSP:
+ if(!ps_dec->i4_decode_header)
+ {
+ ih264d_parse_end_of_stream(ps_dec);
+ }
+ break;
+ case FILLER_DATA_NAL:
+ if(!ps_dec->i4_decode_header)
+ {
+ ih264d_parse_filler_data(ps_dec, ps_bitstrm);
+ }
+ break;
+ default:
+ H264_DEC_DEBUG_PRINT("\nUnknown NAL type %d\n", u1_nal_unit_type);
+ break;
+ }
+
+ }
+
+ }
+
+ return i_status;
+
+}
+
diff --git a/decoder/ih264d_parse_headers.h b/decoder/ih264d_parse_headers.h
new file mode 100755
index 0000000..3c829e7
--- /dev/null
+++ b/decoder/ih264d_parse_headers.h
@@ -0,0 +1,46 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef _IH264D_PARSE_HEADERS_H_
+#define _IH264D_PARSE_HEADERS_H_
+/*!
+**************************************************************************
+* \file ih264d_parse_headers.h
+*
+* \brief
+* Contains declarations high level syntax[above slice]
+* parsing routines
+*
+* \date
+* 19/12/2002
+*
+* \author AI
+**************************************************************************
+*/
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_bitstrm.h"
+#include "ih264d_structs.h"
+WORD32 ih264d_parse_nal_unit(iv_obj_t *dec_hdl,
+ ivd_video_decode_op_t *ps_dec_op,
+ UWORD8 *pu1_buf,
+ UWORD32 u4_length);
+
+#endif /* _IH264D_PARSE_HEADERS_H_ */
diff --git a/decoder/ih264d_parse_islice.c b/decoder/ih264d_parse_islice.c
new file mode 100755
index 0000000..7851a0b
--- /dev/null
+++ b/decoder/ih264d_parse_islice.c
@@ -0,0 +1,1479 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+ **************************************************************************
+ * \file ih264d_parse_islice.c
+ *
+ * \brief
+ * Contains routines that decode a I slice type
+ *
+ * Detailed_description
+ *
+ * \date
+ * 07/07/2003
+ *
+ * \author NS
+ **************************************************************************
+ */
+#include "ih264d_error_handler.h"
+#include "ih264d_debug.h"
+#include <string.h>
+#include "ih264d_bitstrm.h"
+#include "ih264d_defs.h"
+#include "ih264d_debug.h"
+#include "ih264d_tables.h"
+#include "ih264d_structs.h"
+#include "ih264d_defs.h"
+#include "ih264d_parse_cavlc.h"
+#include "ih264d_mb_utils.h"
+#include "ih264d_deblocking.h"
+#include "ih264d_cabac.h"
+#include "ih264d_parse_cabac.h"
+#include "ih264d_parse_mb_header.h"
+#include "ih264d_parse_slice.h"
+#include "ih264d_process_pslice.h"
+#include "ih264d_process_intra_mb.h"
+#include "ih264d_parse_islice.h"
+#include "ih264d_error_handler.h"
+#include "ih264d_mvpred.h"
+#include "ih264d_defs.h"
+#include "ih264d_thread_parse_decode.h"
+#include "ithread.h"
+#include "ih264d_parse_mb_header.h"
+#include "assert.h"
+#include "ih264d_utils.h"
+#include "ih264d_format_conv.h"
+
+void ih264d_init_cabac_contexts(UWORD8 u1_slice_type, dec_struct_t * ps_dec);
+
+void ih264d_itrans_recon_luma_dc(dec_struct_t *ps_dec,
+ WORD16* pi2_src,
+ WORD16* pi2_coeff_block,
+ const UWORD16 *pu2_weigh_mat);
+
+
+
+/*!
+ **************************************************************************
+ * \if Function name : ParseIMb \endif
+ *
+ * \brief
+ * This function parses CAVLC syntax of a I MB. If 16x16 Luma DC transform
+ * is also done here. Transformed Luma DC values are copied in their
+ * 0th pixel location of corrosponding CoeffBlock.
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+WORD32 ih264d_parse_imb_cavlc(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num,
+ UWORD8 u1_mb_type)
+{
+ WORD32 i4_delta_qp;
+ UWORD32 u4_temp;
+ UWORD32 ui_is_top_mb_available;
+ UWORD32 ui_is_left_mb_available;
+ UWORD32 u4_cbp;
+ UWORD32 u4_offset;
+ UWORD32 *pu4_bitstrm_buf;
+ WORD32 ret;
+
+ dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
+ UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
+ UNUSED(u1_mb_num);
+ ps_cur_mb_info->u1_tran_form8x8 = 0;
+ ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
+
+ ps_cur_mb_info->u1_yuv_dc_block_flag = 0;
+
+ u4_temp = ps_dec->u1_mb_ngbr_availablity;
+ ui_is_top_mb_available = BOOLEAN(u4_temp & TOP_MB_AVAILABLE_MASK);
+ ui_is_left_mb_available = BOOLEAN(u4_temp & LEFT_MB_AVAILABLE_MASK);
+
+ pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+
+ if(u1_mb_type == I_4x4_MB)
+ {
+ ps_cur_mb_info->ps_curmb->u1_mb_type = I_4x4_MB;
+ u4_offset = 0;
+
+ /*--------------------------------------------------------------------*/
+ /* Read transform_size_8x8_flag if present */
+ /*--------------------------------------------------------------------*/
+ if(ps_dec->s_high_profile.u1_transform8x8_present)
+ {
+ ps_cur_mb_info->u1_tran_form8x8 = ih264d_get_bit_h264(ps_bitstrm);
+ COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8);
+ ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8;
+ }
+
+ /*--------------------------------------------------------------------*/
+ /* Read the IntraPrediction modes for LUMA */
+ /*--------------------------------------------------------------------*/
+ if (!ps_cur_mb_info->u1_tran_form8x8)
+ {
+ ih264d_read_intra_pred_modes(ps_dec,
+ ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data),
+ ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data+16),
+ ps_cur_mb_info->u1_tran_form8x8);
+ UWORD8 *pu1_temp = (UWORD8 *)ps_dec->pv_parse_tu_coeff_data;
+ pu1_temp += 32;
+ ps_dec->pv_parse_tu_coeff_data = (void *)pu1_temp;
+ }
+ else
+ {
+ ih264d_read_intra_pred_modes(ps_dec,
+ ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data),
+ ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data+4),
+ ps_cur_mb_info->u1_tran_form8x8);
+ UWORD8 *pu1_temp = (UWORD8 *)ps_dec->pv_parse_tu_coeff_data;
+ pu1_temp += 8;
+ ps_dec->pv_parse_tu_coeff_data = (void *)pu1_temp;
+ }
+ /*--------------------------------------------------------------------*/
+ /* Read the IntraPrediction mode for CHROMA */
+ /*--------------------------------------------------------------------*/
+//Inlined ih264d_uev
+ {
+ UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
+ UWORD32 u4_word, u4_ldz, u4_temp;
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
+ u4_ldz = CLZ(u4_word);
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ {
+ GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf,
+ u4_ldz);
+ }
+ *pu4_bitstrm_ofst = u4_bitstream_offset;
+ u4_temp = ((1 << u4_ldz) + u4_word - 1);
+ if(u4_temp > 3)
+ {
+ return ERROR_CHROMA_PRED_MODE;
+ }
+ ps_cur_mb_info->u1_chroma_pred_mode = u4_temp;
+ COPYTHECONTEXT("intra_chroma_pred_mode", ps_cur_mb_info->u1_chroma_pred_mode);
+ }
+ /*--------------------------------------------------------------------*/
+ /* Read the Coded block pattern */
+ /*--------------------------------------------------------------------*/
+ {
+ UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
+ UWORD32 u4_word, u4_ldz;
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
+ u4_ldz = CLZ(u4_word);
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ {
+ GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf,
+ u4_ldz);
+ }
+ *pu4_bitstrm_ofst = u4_bitstream_offset;
+ u4_cbp = ((1 << u4_ldz) + u4_word - 1);
+ }
+ if(u4_cbp > 47)
+ {
+ return ERROR_CBP;
+ }
+
+ u4_cbp = gau1_ih264d_cbp_table[u4_cbp][0];
+ COPYTHECONTEXT("coded_block_pattern", u1_cbp);
+ ps_cur_mb_info->u1_cbp = u4_cbp;
+
+ /*--------------------------------------------------------------------*/
+ /* Read mb_qp_delta */
+ /*--------------------------------------------------------------------*/
+ if(ps_cur_mb_info->u1_cbp)
+ {
+ UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
+ UWORD32 u4_word, u4_ldz, u4_abs_val;
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
+ u4_ldz = CLZ(u4_word);
+
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ {
+ GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf,
+ u4_ldz);
+ }
+
+ *pu4_bitstrm_ofst = u4_bitstream_offset;
+ u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
+
+ if(u4_word & 0x1)
+ {
+ i4_delta_qp = (-(WORD32)u4_abs_val);
+ }
+ else
+ {
+ i4_delta_qp = (u4_abs_val);
+ }
+
+ if((i4_delta_qp < -26) || (i4_delta_qp > 25))
+ {
+ return ERROR_INV_RANGE_QP_T;
+ }
+
+ COPYTHECONTEXT("mb_qp_delta", i1_delta_qp);
+ if(i4_delta_qp != 0)
+ {
+ ret = ih264d_update_qp(ps_dec, (WORD8)i4_delta_qp);
+ if(ret != OK)
+ return ret;
+ }
+ }
+
+ }
+ else
+ {
+ u4_offset = 1;
+ ps_cur_mb_info->ps_curmb->u1_mb_type = I_16x16_MB;
+ /*-------------------------------------------------------------------*/
+ /* Read the IntraPrediction mode for CHROMA */
+ /*-------------------------------------------------------------------*/
+ {
+ UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
+ UWORD32 u4_word, u4_ldz;
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
+ u4_ldz = CLZ(u4_word);
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ {
+ GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf,
+ u4_ldz);
+ }
+ *pu4_bitstrm_ofst = u4_bitstream_offset;
+ u4_temp = ((1 << u4_ldz) + u4_word - 1);
+
+//Inlined ih264d_uev
+
+ if(u4_temp > 3)
+ {
+ return ERROR_CHROMA_PRED_MODE;
+ }
+ ps_cur_mb_info->u1_chroma_pred_mode = u4_temp;
+ COPYTHECONTEXT("intra_chroma_pred_mode", ps_cur_mb_info->u1_chroma_pred_mode);
+ }
+ /*-------------------------------------------------------------------*/
+ /* Read the Coded block pattern */
+ /*-------------------------------------------------------------------*/
+ u4_cbp = gau1_ih264d_cbp_tab[(u1_mb_type - 1) >> 2];
+ ps_cur_mb_info->u1_cbp = u4_cbp;
+
+ /*-------------------------------------------------------------------*/
+ /* Read mb_qp_delta */
+ /*-------------------------------------------------------------------*/
+ {
+ UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
+ UWORD32 u4_word, u4_ldz, u4_abs_val;
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
+ u4_ldz = CLZ(u4_word);
+
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf,
+ u4_ldz);
+
+ *pu4_bitstrm_ofst = u4_bitstream_offset;
+ u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
+
+ if(u4_word & 0x1)
+ i4_delta_qp = (-(WORD32)u4_abs_val);
+ else
+ i4_delta_qp = (u4_abs_val);
+
+ if((i4_delta_qp < -26) || (i4_delta_qp > 25))
+ return ERROR_INV_RANGE_QP_T;
+
+ }
+//inlinined ih264d_sev
+ COPYTHECONTEXT("Delta quant", i1_delta_qp);
+
+ if(i4_delta_qp != 0)
+ {
+ ret = ih264d_update_qp(ps_dec, (WORD8)i4_delta_qp);
+ if(ret != OK)
+ return ret;
+ }
+
+ {
+ WORD16 i_scaleFactor;
+ UWORD32 ui_N = 0;
+ WORD16 *pi2_scale_matrix_ptr;
+ /*******************************************************************/
+ /* for luma DC coefficients the scaling is done during the parsing */
+ /* to preserve the precision */
+ /*******************************************************************/
+ if(ps_dec->s_high_profile.u1_scaling_present)
+ {
+ pi2_scale_matrix_ptr =
+ ps_dec->s_high_profile.i2_scalinglist4x4[0];
+ }
+ else
+ {
+ i_scaleFactor = 16;
+ pi2_scale_matrix_ptr = &i_scaleFactor;
+ }
+
+ /*---------------------------------------------------------------*/
+ /* Decode DC coefficients */
+ /*---------------------------------------------------------------*/
+ /*---------------------------------------------------------------*/
+ /* Calculation of N */
+ /*---------------------------------------------------------------*/
+ if(ui_is_left_mb_available)
+ {
+
+ if(ui_is_top_mb_available)
+ {
+ ui_N = ((ps_cur_mb_info->ps_top_mb->pu1_nnz_y[0]
+ + ps_dec->pu1_left_nnz_y[0] + 1) >> 1);
+ }
+ else
+ {
+ ui_N = ps_dec->pu1_left_nnz_y[0];
+ }
+ }
+ else if(ui_is_top_mb_available)
+ {
+ ui_N = ps_cur_mb_info->ps_top_mb->pu1_nnz_y[0];
+ }
+
+ {
+ WORD16 pi2_dc_coef[16];
+ WORD32 pi4_tmp[16];
+ tu_sblk4x4_coeff_data_t *ps_tu_4x4 =
+ (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
+ WORD16 *pi2_coeff_block =
+ (WORD16 *)ps_dec->pv_parse_tu_coeff_data;
+ ps_tu_4x4->u2_sig_coeff_map = 0;
+ UWORD32 u4_num_coeff;
+
+ ret = ps_dec->pf_cavlc_parse4x4coeff[(ui_N > 7)](pi2_dc_coef, 0, ui_N,
+ ps_dec, &u4_num_coeff);
+ if(ret != OK)
+ return ret;
+
+ if(EXCEED_OFFSET(ps_bitstrm))
+ return ERROR_EOB_TERMINATE_T;
+ if(ps_tu_4x4->u2_sig_coeff_map)
+ {
+ memset(pi2_dc_coef,0,sizeof(pi2_dc_coef));
+ ih264d_unpack_coeff4x4_dc_4x4blk(ps_tu_4x4,
+ pi2_dc_coef,
+ ps_dec->pu1_inv_scan);
+
+ PROFILE_DISABLE_IQ_IT_RECON()
+ ps_dec->pf_ihadamard_scaling_4x4(pi2_dc_coef,
+ pi2_coeff_block,
+ ps_dec->pu2_quant_scale_y,
+ (UWORD16 *)pi2_scale_matrix_ptr,
+ ps_dec->u1_qp_y_div6,
+ pi4_tmp);
+ pi2_coeff_block += 16;
+ ps_dec->pv_parse_tu_coeff_data = (void *)pi2_coeff_block;
+ SET_BIT(ps_cur_mb_info->u1_yuv_dc_block_flag,0);
+ }
+
+ }
+ }
+ }
+
+
+ if(u4_cbp)
+ {
+
+ ret = ih264d_parse_residual4x4_cavlc(ps_dec, ps_cur_mb_info,
+ (UWORD8)u4_offset);
+ if(ret != OK)
+ return ret;
+ if(EXCEED_OFFSET(ps_bitstrm))
+ return ERROR_EOB_TERMINATE_T;
+
+ /* Store Left Mb NNZ and TOP chroma NNZ */
+ }
+ else
+ {
+ ps_cur_mb_info->u1_qp_div6 = ps_dec->u1_qp_y_div6;
+ ps_cur_mb_info->u1_qpc_div6 = ps_dec->u1_qp_u_div6;
+ ps_cur_mb_info->u1_qpcr_div6 = ps_dec->u1_qp_v_div6;
+ ps_cur_mb_info->u1_qp_rem6 = ps_dec->u1_qp_y_rem6;
+ ps_cur_mb_info->u1_qpc_rem6 = ps_dec->u1_qp_u_rem6;
+ ps_cur_mb_info->u1_qpcr_rem6 = ps_dec->u1_qp_v_rem6;
+ ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CAVLC);
+ }
+
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ParseIMbCab \endif
+ *
+ * \brief
+ * This function parses CABAC syntax of a I MB. If 16x16 Luma DC transform
+ * is also done here. Transformed Luma DC values are copied in their
+ * 0th pixel location of corrosponding CoeffBlock.
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+WORD32 ih264d_parse_imb_cabac(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_type)
+{
+ WORD8 i1_delta_qp;
+ UWORD8 u1_cbp;
+ UWORD8 u1_offset;
+ /* Variables for handling Cabac contexts */
+ ctxt_inc_mb_info_t *p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info;
+ ctxt_inc_mb_info_t *ps_left_ctxt = ps_dec->p_left_ctxt_mb_info;
+ dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
+ bin_ctxt_model_t *p_bin_ctxt;
+
+ UWORD8 u1_intra_chrom_pred_mode;
+ UWORD8 u1_dc_block_flag = 0;
+ WORD32 ret;
+
+ ps_cur_mb_info->u1_yuv_dc_block_flag = 0;
+
+ if(ps_left_ctxt == ps_dec->ps_def_ctxt_mb_info)
+ {
+ ps_dec->pu1_left_yuv_dc_csbp[0] = 0xf;
+ }
+
+ if(ps_dec->ps_cur_slice->u1_slice_type != I_SLICE)
+ {
+ WORD32 *pi4_buf;
+ WORD8 *pi1_buf;
+ MEMSET_16BYTES(&ps_dec->pu1_left_mv_ctxt_inc[0][0], 0);
+ *((UWORD32 *)ps_dec->pi1_left_ref_idx_ctxt_inc) = 0;
+ MEMSET_16BYTES(p_curr_ctxt->u1_mv, 0);
+ pi1_buf = p_curr_ctxt->i1_ref_idx;
+ pi4_buf = (WORD32 *)pi1_buf;
+ *pi4_buf = 0;
+ }
+
+ if(u1_mb_type == I_4x4_MB)
+ {
+ ps_cur_mb_info->ps_curmb->u1_mb_type = I_4x4_MB;
+ p_curr_ctxt->u1_mb_type = CAB_I4x4;
+ u1_offset = 0;
+
+ ps_cur_mb_info->u1_tran_form8x8 = 0;
+ ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
+
+ /*--------------------------------------------------------------------*/
+ /* Read transform_size_8x8_flag if present */
+ /*--------------------------------------------------------------------*/
+ if(ps_dec->s_high_profile.u1_transform8x8_present)
+ {
+ ps_cur_mb_info->u1_tran_form8x8 = ih264d_parse_transform8x8flag_cabac(
+ ps_dec, ps_cur_mb_info);
+ COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8);
+ p_curr_ctxt->u1_transform8x8_ctxt = ps_cur_mb_info->u1_tran_form8x8;
+ ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8;
+ }
+ else
+ {
+ p_curr_ctxt->u1_transform8x8_ctxt = 0;
+ }
+
+ /*--------------------------------------------------------------------*/
+ /* Read the IntraPrediction modes for LUMA */
+ /*--------------------------------------------------------------------*/
+ if (!ps_cur_mb_info->u1_tran_form8x8)
+ {
+ ih264d_read_intra_pred_modes_cabac(
+ ps_dec,
+ ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data),
+ ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data+16),
+ ps_cur_mb_info->u1_tran_form8x8);
+ UWORD8 *pu1_temp = (UWORD8 *)ps_dec->pv_parse_tu_coeff_data;
+ pu1_temp += 32;
+ ps_dec->pv_parse_tu_coeff_data = (void *)pu1_temp;
+ }
+ else
+ {
+ ih264d_read_intra_pred_modes_cabac(
+ ps_dec,
+ ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data),
+ ((UWORD8 *)ps_dec->pv_parse_tu_coeff_data+4),
+ ps_cur_mb_info->u1_tran_form8x8);
+ UWORD8 *pu1_temp = (UWORD8 *)ps_dec->pv_parse_tu_coeff_data;
+ pu1_temp += 8;
+ ps_dec->pv_parse_tu_coeff_data = (void *)pu1_temp;
+ }
+ /*--------------------------------------------------------------------*/
+ /* Read the IntraPrediction mode for CHROMA */
+ /*--------------------------------------------------------------------*/
+ u1_intra_chrom_pred_mode = ih264d_parse_chroma_pred_mode_cabac(ps_dec);
+ COPYTHECONTEXT("intra_chroma_pred_mode", u1_intra_chrom_pred_mode);
+ p_curr_ctxt->u1_intra_chroma_pred_mode = ps_cur_mb_info->u1_chroma_pred_mode =
+ u1_intra_chrom_pred_mode;
+
+ /*--------------------------------------------------------------------*/
+ /* Read the Coded block pattern */
+ /*--------------------------------------------------------------------*/
+ u1_cbp = ih264d_parse_ctx_cbp_cabac(ps_dec);
+ COPYTHECONTEXT("coded_block_pattern", u1_cbp);
+ ps_cur_mb_info->u1_cbp = u1_cbp;
+ p_curr_ctxt->u1_cbp = u1_cbp;
+
+ /*--------------------------------------------------------------------*/
+ /* Read mb_qp_delta */
+ /*--------------------------------------------------------------------*/
+ if(ps_cur_mb_info->u1_cbp)
+ {
+ ret = ih264d_parse_mb_qp_delta_cabac(ps_dec, &i1_delta_qp);
+ if(ret != OK)
+ return ret;
+ COPYTHECONTEXT("mb_qp_delta", i1_delta_qp);
+ if(i1_delta_qp != 0)
+ {
+ ret = ih264d_update_qp(ps_dec, i1_delta_qp);
+ if(ret != OK)
+ return ret;
+ }
+ }
+ else
+ ps_dec->i1_prev_mb_qp_delta = 0;
+ p_curr_ctxt->u1_yuv_dc_csbp &= 0xFE;
+ }
+ else
+ {
+ u1_offset = 1;
+ ps_cur_mb_info->ps_curmb->u1_mb_type = I_16x16_MB;
+ p_curr_ctxt->u1_mb_type = CAB_I16x16;
+ ps_cur_mb_info->u1_tran_form8x8 = 0;
+ p_curr_ctxt->u1_transform8x8_ctxt = 0;
+ ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
+ /*--------------------------------------------------------------------*/
+ /* Read the IntraPrediction mode for CHROMA */
+ /*--------------------------------------------------------------------*/
+ u1_intra_chrom_pred_mode = ih264d_parse_chroma_pred_mode_cabac(ps_dec);
+ if(u1_intra_chrom_pred_mode > 3)
+ return ERROR_CHROMA_PRED_MODE;
+
+ COPYTHECONTEXT("Chroma intra_chroma_pred_mode pred mode", u1_intra_chrom_pred_mode);
+ p_curr_ctxt->u1_intra_chroma_pred_mode = ps_cur_mb_info->u1_chroma_pred_mode =
+ u1_intra_chrom_pred_mode;
+
+ /*--------------------------------------------------------------------*/
+ /* Read the Coded block pattern */
+ /*--------------------------------------------------------------------*/
+ u1_cbp = gau1_ih264d_cbp_tab[(u1_mb_type - 1) >> 2];
+ ps_cur_mb_info->u1_cbp = u1_cbp;
+ p_curr_ctxt->u1_cbp = u1_cbp;
+
+ /*--------------------------------------------------------------------*/
+ /* Read mb_qp_delta */
+ /*--------------------------------------------------------------------*/
+ ret = ih264d_parse_mb_qp_delta_cabac(ps_dec, &i1_delta_qp);
+ if(ret != OK)
+ return ret;
+ COPYTHECONTEXT("mb_qp_delta", i1_delta_qp);
+ if(i1_delta_qp != 0)
+ {
+ ret = ih264d_update_qp(ps_dec, i1_delta_qp);
+ if(ret != OK)
+ return ret;
+ }
+
+ {
+ WORD16 i_scaleFactor;
+ WORD16* pi2_scale_matrix_ptr;
+ /*******************************************************************/
+ /* for luma DC coefficients the scaling is done during the parsing */
+ /* to preserve the precision */
+ /*******************************************************************/
+ if(ps_dec->s_high_profile.u1_scaling_present)
+ {
+ pi2_scale_matrix_ptr =
+ ps_dec->s_high_profile.i2_scalinglist4x4[0];
+
+ }
+ else
+ {
+ i_scaleFactor = 16;
+ pi2_scale_matrix_ptr = &i_scaleFactor;
+ }
+ {
+ ctxt_inc_mb_info_t *ps_top_ctxt = ps_dec->p_top_ctxt_mb_info;
+ UWORD8 uc_a, uc_b;
+ UWORD32 u4_ctx_inc;
+
+ INC_SYM_COUNT(&(ps_dec->s_cab_dec_env));
+
+ /* if MbAddrN not available then CondTermN = 1 */
+ uc_b = ((ps_top_ctxt->u1_yuv_dc_csbp) & 0x01);
+
+ /* if MbAddrN not available then CondTermN = 1 */
+ uc_a = ((ps_dec->pu1_left_yuv_dc_csbp[0]) & 0x01);
+
+ u4_ctx_inc = (uc_a + (uc_b << 1));
+
+ {
+ WORD16 pi2_dc_coef[16];
+ tu_sblk4x4_coeff_data_t *ps_tu_4x4 =
+ (tu_sblk4x4_coeff_data_t *)ps_dec->pv_parse_tu_coeff_data;
+ WORD16 *pi2_coeff_block =
+ (WORD16 *)ps_dec->pv_parse_tu_coeff_data;
+
+ p_bin_ctxt = (ps_dec->p_cbf_t[LUMA_DC_CTXCAT]) + u4_ctx_inc;
+
+ u1_dc_block_flag =
+ ih264d_read_coeff4x4_cabac(ps_bitstrm,
+ LUMA_DC_CTXCAT,
+ ps_dec->p_significant_coeff_flag_t[LUMA_DC_CTXCAT],
+ ps_dec, p_bin_ctxt);
+
+ /* Store coded_block_flag */
+ p_curr_ctxt->u1_yuv_dc_csbp &= 0xFE;
+ p_curr_ctxt->u1_yuv_dc_csbp |= u1_dc_block_flag;
+ if(u1_dc_block_flag)
+ {
+ WORD32 pi4_tmp[16];
+ memset(pi2_dc_coef,0,sizeof(pi2_dc_coef));
+ ih264d_unpack_coeff4x4_dc_4x4blk(ps_tu_4x4,
+ pi2_dc_coef,
+ ps_dec->pu1_inv_scan);
+
+ PROFILE_DISABLE_IQ_IT_RECON()
+ ps_dec->pf_ihadamard_scaling_4x4(pi2_dc_coef,
+ pi2_coeff_block,
+ ps_dec->pu2_quant_scale_y,
+ (UWORD16 *)pi2_scale_matrix_ptr,
+ ps_dec->u1_qp_y_div6,
+ pi4_tmp);
+ pi2_coeff_block += 16;
+ ps_dec->pv_parse_tu_coeff_data = (void *)pi2_coeff_block;
+ SET_BIT(ps_cur_mb_info->u1_yuv_dc_block_flag,0);
+ }
+
+ }
+
+ }
+ }
+ }
+
+ ps_dec->pu1_left_yuv_dc_csbp[0] &= 0x6;
+ ps_dec->pu1_left_yuv_dc_csbp[0] |= u1_dc_block_flag;
+
+ ih264d_parse_residual4x4_cabac(ps_dec, ps_cur_mb_info, u1_offset);
+ if(EXCEED_OFFSET(ps_bitstrm))
+ return ERROR_EOB_TERMINATE_T;
+ return OK;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_islice_data_cavlc */
+/* */
+/* Description : This function parses cabac syntax of a inter slice on */
+/* N MB basis. */
+/* */
+/* Inputs : ps_dec */
+/* sliceparams */
+/* firstMbInSlice */
+/* */
+/* Processing : 1. After parsing syntax for N MBs those N MBs are */
+/* decoded till the end of slice. */
+/* */
+/* Returns : 0 */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 24 06 2005 ARNY Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_parse_islice_data_cavlc(dec_struct_t * ps_dec,
+ dec_slice_params_t * ps_slice,
+ UWORD16 u2_first_mb_in_slice)
+{
+ UWORD8 uc_more_data_flag;
+ UWORD8 u1_num_mbs, u1_mb_idx;
+ dec_mb_info_t *ps_cur_mb_info;
+ deblk_mb_t *ps_cur_deblk_mb;
+ dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
+ UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD16 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs;
+ WORD16 i2_cur_mb_addr;
+ UWORD8 u1_mbaff;
+ UWORD8 u1_num_mbs_next, u1_end_of_row, u1_tfr_n_mb;
+ WORD32 ret;
+
+ ps_dec->u1_qp = ps_slice->u1_slice_qp;
+ ret = ih264d_update_qp(ps_dec, 0);
+ if(ret != OK)
+ return ret;
+ u1_mbaff = ps_slice->u1_mbaff_frame_flag;
+
+ /* initializations */
+ u1_mb_idx = ps_dec->u1_mb_idx;
+ u1_num_mbs = u1_mb_idx;
+
+ uc_more_data_flag = 1;
+ i2_cur_mb_addr = u2_first_mb_in_slice << u1_mbaff;
+
+ do
+ {
+ UWORD8 u1_mb_type;
+
+ if(i2_cur_mb_addr > ps_dec->ps_cur_sps->u2_max_mb_addr)
+ {
+ break;
+ }
+
+ ps_cur_mb_info = ps_dec->ps_nmb_info + u1_num_mbs;
+ ps_dec->u4_num_pmbair = (u1_num_mbs >> u1_mbaff);
+
+ ps_cur_mb_info->u1_end_of_slice = 0;
+
+ /***************************************************************/
+ /* Get the required information for decoding of MB */
+ /* mb_x, mb_y , neighbour availablity, */
+ /***************************************************************/
+ ps_dec->pf_get_mb_info(ps_dec, i2_cur_mb_addr, ps_cur_mb_info, 0);
+
+ /***************************************************************/
+ /* Set the deblocking parameters for this MB */
+ /***************************************************************/
+ ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u1_num_mbs;
+
+ if(ps_dec->u4_app_disable_deblk_frm == 0)
+ ih264d_set_deblocking_parameters(ps_cur_deblk_mb, ps_slice,
+ ps_dec->u1_mb_ngbr_availablity,
+ ps_dec->u1_cur_mb_fld_dec_flag);
+
+ ps_cur_deblk_mb->u1_mb_type = ps_cur_deblk_mb->u1_mb_type | D_INTRA_MB;
+
+ /**************************************************************/
+ /* Macroblock Layer Begins, Decode the u1_mb_type */
+ /**************************************************************/
+//Inlined ih264d_uev
+ {
+ UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
+ UWORD32 u4_word, u4_ldz, u4_temp;
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
+ u4_ldz = CLZ(u4_word);
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf,
+ u4_ldz);
+ *pu4_bitstrm_ofst = u4_bitstream_offset;
+ u4_temp = ((1 << u4_ldz) + u4_word - 1);
+ if(u4_temp > 25)
+ return ERROR_MB_TYPE;
+ u1_mb_type = u4_temp;
+
+ }
+//Inlined ih264d_uev
+ ps_cur_mb_info->u1_mb_type = u1_mb_type;
+ COPYTHECONTEXT("u1_mb_type", u1_mb_type);
+
+ /**************************************************************/
+ /* Parse Macroblock data */
+ /**************************************************************/
+ if(25 == u1_mb_type)
+ {
+ /* I_PCM_MB */
+ ps_cur_mb_info->ps_curmb->u1_mb_type = I_PCM_MB;
+ ret = ih264d_parse_ipcm_mb(ps_dec, ps_cur_mb_info, u1_num_mbs);
+ if(ret != OK)
+ return ret;
+ ps_cur_deblk_mb->u1_mb_qp = 0;
+ }
+ else
+ {
+ ret = ih264d_parse_imb_cavlc(ps_dec, ps_cur_mb_info, u1_num_mbs, u1_mb_type);
+ if(ret != OK)
+ return ret;
+ ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp;
+ }
+
+ if(u1_mbaff)
+ {
+ ih264d_update_mbaff_left_nnz(ps_dec, ps_cur_mb_info);
+ }
+ /**************************************************************/
+ /* Get next Macroblock address */
+ /**************************************************************/
+
+ i2_cur_mb_addr++;
+ uc_more_data_flag = MORE_RBSP_DATA(ps_bitstrm);
+
+ /* Store the colocated information */
+ {
+ mv_pred_t *ps_mv_nmb_start = ps_dec->ps_mv_cur + (u1_num_mbs << 4);
+
+ mv_pred_t s_mvPred =
+ {
+ { 0, 0, 0, 0 },
+ { -1, -1 }, 0, 0};
+ ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb_start, 0,
+ (UWORD8)(ps_dec->u1_cur_mb_fld_dec_flag << 1), 4,
+ 4);
+ }
+
+ /*if num _cores is set to 3,compute bs will be done in another thread*/
+ if(ps_dec->u4_num_cores < 3)
+ {
+ if(ps_dec->u4_app_disable_deblk_frm == 0)
+ ps_dec->pf_compute_bs(ps_dec, ps_cur_mb_info,
+ (UWORD16)(u1_num_mbs >> u1_mbaff));
+ }
+ u1_num_mbs++;
+ ps_dec->u2_total_mbs_coded++;
+
+ /****************************************************************/
+ /* Check for End Of Row */
+ /****************************************************************/
+ u1_num_mbs_next = i2_pic_wdin_mbs - ps_dec->u2_mbx - 1;
+ u1_end_of_row = (!u1_num_mbs_next) && (!(u1_mbaff && (u1_num_mbs & 0x01)));
+ u1_tfr_n_mb = (u1_num_mbs == ps_dec->u1_recon_mb_grp) || u1_end_of_row
+ || (!uc_more_data_flag);
+ ps_cur_mb_info->u1_end_of_slice = (!uc_more_data_flag);
+
+ /*H264_DEC_DEBUG_PRINT("Pic: %d Mb_X=%d Mb_Y=%d",
+ ps_slice->i4_poc >> ps_slice->u1_field_pic_flag,
+ ps_dec->u2_mbx,ps_dec->u2_mby + (1 - ps_cur_mb_info->u1_topmb));
+ H264_DEC_DEBUG_PRINT("u1_tfr_n_mb || (!uc_more_data_flag): %d", u1_tfr_n_mb || (!uc_more_data_flag));*/
+ if(u1_tfr_n_mb || (!uc_more_data_flag))
+ {
+
+ if(ps_dec->u1_separate_parse)
+ {
+ ih264d_parse_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs,
+ u1_num_mbs_next, u1_tfr_n_mb, u1_end_of_row);
+ ps_dec->ps_nmb_info += u1_num_mbs;
+ }
+ else
+ {
+ ret = ih264d_decode_recon_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs,
+ u1_num_mbs_next, u1_tfr_n_mb,
+ u1_end_of_row);
+ if(ret != OK)
+ return ret;
+ }
+
+ if(u1_tfr_n_mb)
+ u1_num_mbs = 0;
+ u1_mb_idx = u1_num_mbs;
+ ps_dec->u1_mb_idx = u1_num_mbs;
+
+ }
+ }
+ while(uc_more_data_flag);
+
+ if(ps_dec->u1_separate_parse)
+ {
+ ps_dec->ps_parse_cur_slice->end_of_slice = 1;
+ ps_dec->ps_cur_slice->u4_mbs_in_slice = i2_cur_mb_addr
+ - (u2_first_mb_in_slice << u1_mbaff);
+ }
+ return OK;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_islice_data_cabac */
+/* */
+/* Description : This function parses cabac syntax of a inter slice on */
+/* N MB basis. */
+/* */
+/* Inputs : ps_dec */
+/* sliceparams */
+/* firstMbInSlice */
+/* */
+/* Processing : 1. After parsing syntax for N MBs those N MBs are */
+/* decoded till the end of slice. */
+/* */
+/* Returns : 0 */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 24 06 2005 ARNY Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_parse_islice_data_cabac(dec_struct_t * ps_dec,
+ dec_slice_params_t * ps_slice,
+ UWORD16 u2_first_mb_in_slice)
+{
+ UWORD8 uc_more_data_flag;
+ UWORD8 u1_num_mbs, u1_mb_idx;
+ dec_mb_info_t *ps_cur_mb_info;
+ deblk_mb_t *ps_cur_deblk_mb;
+
+ dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
+ UWORD16 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs;
+ WORD16 i2_cur_mb_addr;
+ UWORD8 u1_mbaff;
+ UWORD8 u1_num_mbs_next, u1_end_of_row, u1_tfr_n_mb;
+ WORD32 ret;
+
+ ps_dec->u1_qp = ps_slice->u1_slice_qp;
+ ret = ih264d_update_qp(ps_dec, 0);
+ if(ret != 0)
+ return ret;
+ u1_mbaff = ps_slice->u1_mbaff_frame_flag;
+
+ if(ps_bitstrm->u4_ofst & 0x07)
+ {
+ ps_bitstrm->u4_ofst += 8;
+ ps_bitstrm->u4_ofst &= 0xFFFFFFF8;
+ }
+ ret = ih264d_init_cabac_dec_envirnoment(&(ps_dec->s_cab_dec_env), ps_bitstrm);
+ if(ret != OK)
+ return ret;
+ ih264d_init_cabac_contexts(I_SLICE, ps_dec);
+
+ ps_dec->i1_prev_mb_qp_delta = 0;
+
+ /* initializations */
+ u1_mb_idx = ps_dec->u1_mb_idx;
+ u1_num_mbs = u1_mb_idx;
+
+ uc_more_data_flag = 1;
+ i2_cur_mb_addr = u2_first_mb_in_slice << u1_mbaff;
+ do
+ {
+ UWORD16 u2_mbx;
+ {
+ UWORD8 u1_mb_type;
+
+ ps_cur_mb_info = ps_dec->ps_nmb_info + u1_num_mbs;
+ ps_dec->u4_num_pmbair = (u1_num_mbs >> u1_mbaff);
+
+ ps_cur_mb_info->u1_end_of_slice = 0;
+
+ /***************************************************************/
+ /* Get the required information for decoding of MB */
+ /* mb_x, mb_y , neighbour availablity, */
+ /***************************************************************/
+ ps_dec->pf_get_mb_info(ps_dec, i2_cur_mb_addr, ps_cur_mb_info, 0);
+ u2_mbx = ps_dec->u2_mbx;
+
+ /*********************************************************************/
+ /* initialize u1_tran_form8x8 to zero to aviod uninitialized accesses */
+ /*********************************************************************/
+ ps_cur_mb_info->u1_tran_form8x8 = 0;
+ ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
+
+ /***************************************************************/
+ /* Set the deblocking parameters for this MB */
+ /***************************************************************/
+ ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u1_num_mbs;
+ if(ps_dec->u4_app_disable_deblk_frm == 0)
+ ih264d_set_deblocking_parameters(
+ ps_cur_deblk_mb, ps_slice,
+ ps_dec->u1_mb_ngbr_availablity,
+ ps_dec->u1_cur_mb_fld_dec_flag);
+
+ ps_cur_deblk_mb->u1_mb_type = ps_cur_deblk_mb->u1_mb_type
+ | D_INTRA_MB;
+
+ /* Macroblock Layer Begins */
+ /* Decode the u1_mb_type */
+ u1_mb_type = ih264d_parse_mb_type_intra_cabac(0, ps_dec);
+ if(u1_mb_type > 25)
+ return ERROR_MB_TYPE;
+ ps_cur_mb_info->u1_mb_type = u1_mb_type;
+ COPYTHECONTEXT("u1_mb_type", u1_mb_type);
+
+ /* Parse Macroblock Data */
+ if(25 == u1_mb_type)
+ {
+ /* I_PCM_MB */
+ ps_cur_mb_info->ps_curmb->u1_mb_type = I_PCM_MB;
+ ret = ih264d_parse_ipcm_mb(ps_dec, ps_cur_mb_info, u1_num_mbs);
+ if(ret != OK)
+ return ret;
+ ps_cur_deblk_mb->u1_mb_qp = 0;
+ }
+ else
+ {
+ ret = ih264d_parse_imb_cabac(ps_dec, ps_cur_mb_info, u1_mb_type);
+ if(ret != OK)
+ return ret;
+ ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp;
+ }
+
+ if(u1_mbaff)
+ {
+ ih264d_update_mbaff_left_nnz(ps_dec, ps_cur_mb_info);
+ }
+ /* Next macroblock information */
+ if(i2_cur_mb_addr > ps_dec->ps_cur_sps->u2_max_mb_addr)
+ return ERROR_MB_ADDRESS_T;
+ i2_cur_mb_addr++;
+
+ if(ps_cur_mb_info->u1_topmb && u1_mbaff)
+ uc_more_data_flag = 1;
+ else
+ {
+ uc_more_data_flag = ih264d_decode_terminate(&ps_dec->s_cab_dec_env,
+ ps_bitstrm);
+ uc_more_data_flag = !uc_more_data_flag;
+ COPYTHECONTEXT("Decode Sliceterm",!uc_more_data_flag);
+ }
+ /* Store the colocated information */
+ {
+
+ mv_pred_t *ps_mv_nmb_start = ps_dec->ps_mv_cur + (u1_num_mbs << 4);
+ mv_pred_t s_mvPred =
+ {
+ { 0, 0, 0, 0 },
+ { -1, -1 }, 0, 0};
+ ih264d_rep_mv_colz(
+ ps_dec, &s_mvPred, ps_mv_nmb_start, 0,
+ (UWORD8)(ps_dec->u1_cur_mb_fld_dec_flag << 1),
+ 4, 4);
+ }
+ /*if num _cores is set to 3,compute bs will be done in another thread*/
+ if(ps_dec->u4_num_cores < 3)
+ {
+ if(ps_dec->u4_app_disable_deblk_frm == 0)
+ ps_dec->pf_compute_bs(ps_dec, ps_cur_mb_info,
+ (UWORD16)(u1_num_mbs >> u1_mbaff));
+ }
+ u1_num_mbs++;
+ ps_dec->u2_total_mbs_coded++;
+
+ }
+
+ /****************************************************************/
+ /* Check for End Of Row */
+ /****************************************************************/
+ u1_num_mbs_next = i2_pic_wdin_mbs - u2_mbx - 1;
+ u1_end_of_row = (!u1_num_mbs_next) && (!(u1_mbaff && (u1_num_mbs & 0x01)));
+ u1_tfr_n_mb = (u1_num_mbs == ps_dec->u1_recon_mb_grp) || u1_end_of_row
+ || (!uc_more_data_flag);
+ ps_cur_mb_info->u1_end_of_slice = (!uc_more_data_flag);
+
+ if(u1_tfr_n_mb || (!uc_more_data_flag))
+ {
+
+
+ if(ps_dec->u1_separate_parse)
+ {
+ ih264d_parse_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs,
+ u1_num_mbs_next, u1_tfr_n_mb, u1_end_of_row);
+ ps_dec->ps_nmb_info += u1_num_mbs;
+ }
+ else
+ {
+ ret = ih264d_decode_recon_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs,
+ u1_num_mbs_next, u1_tfr_n_mb,
+ u1_end_of_row);
+ if(ret != OK)
+ return ret;
+ }
+
+ if(u1_tfr_n_mb)
+ u1_num_mbs = 0;
+ u1_mb_idx = u1_num_mbs;
+ ps_dec->u1_mb_idx = u1_num_mbs;
+
+ }
+ }
+ while(uc_more_data_flag);
+
+ if(ps_dec->u1_separate_parse)
+ {
+ ps_dec->ps_parse_cur_slice->end_of_slice = 1;
+ ps_dec->ps_cur_slice->u4_mbs_in_slice = i2_cur_mb_addr
+ - (u2_first_mb_in_slice << u1_mbaff);
+ }
+ return OK;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_ipcm_mb */
+/* */
+/* Description : This function decodes the pixel values of I_PCM Mb. */
+/* */
+/* Inputs : ps_dec, ps_cur_mb_info and mb number */
+/* */
+/* Description : This function reads the luma and chroma pixels directly */
+/* from the bitstream when the mbtype is I_PCM and stores */
+/* them in recon buffer. If the entropy coding mode is */
+/* cabac, decoding engine is re-initialized. The nnzs and */
+/* cabac contexts are appropriately modified. */
+/* Returns : void */
+/* */
+/* Revision History: */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 13 07 2002 Jay */
+/* */
+/*****************************************************************************/
+
+WORD32 ih264d_parse_ipcm_mb(dec_struct_t * ps_dec,
+ dec_mb_info_t *ps_cur_mb_info,
+ UWORD8 u1_mbNum)
+{
+ dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
+ UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
+ UWORD8 *pu1_y, *pu1_u, *pu1_v;
+ WORD32 ret;
+
+ UWORD32 u4_rec_width_y, u4_rec_width_uv;
+ UWORD32 u1_num_mb_pair;
+ UWORD8 u1_x, u1_y;
+ /* CHANGED CODE */
+ tfr_ctxt_t *ps_frame_buf;
+ UWORD8 u1_mb_field_decoding_flag;
+ UWORD32 *pu4_buf;
+ UWORD8 *pu1_buf;
+ /* CHANGED CODE */
+
+ if(ps_dec->u1_separate_parse)
+ {
+ ps_frame_buf = &ps_dec->s_tran_addrecon_parse;
+ }
+ else
+ {
+ ps_frame_buf = &ps_dec->s_tran_addrecon;
+ }
+ /* align bistream to byte boundary. */
+ /* pcm_alignment_zero_bit discarded */
+ /* For XX GotoByteBoundary */
+ if(ps_bitstrm->u4_ofst & 0x07)
+ {
+ ps_bitstrm->u4_ofst += 8;
+ ps_bitstrm->u4_ofst &= 0xFFFFFFF8;
+ }
+
+ /* Store left Nnz as 16 for each 4x4 blk */
+
+ pu1_buf = ps_dec->pu1_left_nnz_y;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ *pu4_buf = 0x10101010;
+ pu1_buf = ps_cur_mb_info->ps_curmb->pu1_nnz_y;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ *pu4_buf = 0x10101010;
+ pu1_buf = ps_cur_mb_info->ps_curmb->pu1_nnz_uv;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ *pu4_buf = 0x10101010;
+ pu1_buf = ps_dec->pu1_left_nnz_uv;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ *pu4_buf = 0x10101010;
+ ps_cur_mb_info->u1_cbp = 0xff;
+
+ ps_dec->i1_prev_mb_qp_delta = 0;
+ /* Get neighbour MB's */
+ u1_num_mb_pair = (u1_mbNum >> u1_mbaff);
+
+ /*****************************************************************************/
+ /* calculate the RECON buffer YUV pointers for the PCM data */
+ /*****************************************************************************/
+ /* CHANGED CODE */
+ u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag;
+ pu1_y = ps_frame_buf->pu1_dest_y + (u1_num_mb_pair << 4);
+ pu1_u = ps_frame_buf->pu1_dest_u + (u1_num_mb_pair << 4);
+ pu1_v = pu1_u + 1;
+
+ u4_rec_width_y = ps_dec->u2_frm_wd_y << u1_mb_field_decoding_flag;
+ u4_rec_width_uv = ps_dec->u2_frm_wd_uv << u1_mb_field_decoding_flag;
+ /* CHANGED CODE */
+
+ if(u1_mbaff)
+ {
+ UWORD8 u1_top_mb;
+
+ u1_top_mb = ps_cur_mb_info->u1_topmb;
+
+ if(u1_top_mb == 0)
+ {
+ pu1_y += (u1_mb_field_decoding_flag ?
+ (u4_rec_width_y >> 1) : (u4_rec_width_y << 4));
+ pu1_u += (u1_mb_field_decoding_flag ?
+ (u4_rec_width_uv) : (u4_rec_width_uv << 4));
+ pu1_v = pu1_u + 1;
+ }
+ }
+
+ /* Read Luma samples */
+ for(u1_y = 0; u1_y < 16; u1_y++)
+ {
+ for(u1_x = 0; u1_x < 16; u1_x++)
+ pu1_y[u1_x] = ih264d_get_bits_h264(ps_bitstrm, 8);
+
+ pu1_y += u4_rec_width_y;
+ }
+
+ /* Read Chroma samples */
+ for(u1_y = 0; u1_y < 8; u1_y++)
+ {
+ for(u1_x = 0; u1_x < 8; u1_x++)
+ pu1_u[u1_x * YUV420SP_FACTOR] = ih264d_get_bits_h264(ps_bitstrm, 8);
+
+ pu1_u += u4_rec_width_uv;
+ }
+
+ for(u1_y = 0; u1_y < 8; u1_y++)
+ {
+ for(u1_x = 0; u1_x < 8; u1_x++)
+ pu1_v[u1_x * YUV420SP_FACTOR] = ih264d_get_bits_h264(ps_bitstrm, 8);
+
+ pu1_v += u4_rec_width_uv;
+ }
+
+ if(CABAC == ps_dec->ps_cur_pps->u1_entropy_coding_mode)
+ {
+ UWORD32 *pu4_buf;
+ UWORD8 *pu1_buf;
+ ctxt_inc_mb_info_t *p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info;
+ /* Re-initialize the cabac decoding engine. */
+ ret = ih264d_init_cabac_dec_envirnoment(&(ps_dec->s_cab_dec_env), ps_bitstrm);
+ if(ret != OK)
+ return ret;
+ /* update the cabac contetxs */
+ p_curr_ctxt->u1_mb_type = CAB_I_PCM;
+ p_curr_ctxt->u1_cbp = 47;
+ p_curr_ctxt->u1_intra_chroma_pred_mode = 0;
+ p_curr_ctxt->u1_transform8x8_ctxt = 0;
+ ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
+
+ pu1_buf = ps_dec->pu1_left_nnz_y;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ *pu4_buf = 0x01010101;
+
+ pu1_buf = ps_cur_mb_info->ps_curmb->pu1_nnz_y;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ *pu4_buf = 0x01010101;
+
+ pu1_buf = ps_cur_mb_info->ps_curmb->pu1_nnz_uv;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ *pu4_buf = 0x01010101;
+
+ pu1_buf = ps_dec->pu1_left_nnz_uv;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ *pu4_buf = 0x01010101;
+
+ p_curr_ctxt->u1_yuv_dc_csbp = 0x7;
+ ps_dec->pu1_left_yuv_dc_csbp[0] = 0x7;
+ if(ps_dec->ps_cur_slice->u1_slice_type != I_SLICE)
+ {
+
+ MEMSET_16BYTES(&ps_dec->pu1_left_mv_ctxt_inc[0][0], 0);
+ memset(ps_dec->pi1_left_ref_idx_ctxt_inc, 0, 4);
+ MEMSET_16BYTES(p_curr_ctxt->u1_mv, 0);
+ memset(p_curr_ctxt->i1_ref_idx, 0, 4);
+
+ }
+ }
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_decode_islice \endif
+ *
+ * \brief
+ * Decodes an I Slice
+ *
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+WORD32 ih264d_parse_islice(dec_struct_t *ps_dec,
+ UWORD16 u2_first_mb_in_slice)
+{
+ dec_pic_params_t * ps_pps = ps_dec->ps_cur_pps;
+ dec_slice_params_t * ps_slice = ps_dec->ps_cur_slice;
+ UWORD32 *pu4_bitstrm_buf = ps_dec->ps_bitstrm->pu4_buffer;
+ UWORD32 *pu4_bitstrm_ofst = &ps_dec->ps_bitstrm->u4_ofst;
+ UWORD32 u4_temp;
+ WORD32 i_temp;
+ WORD32 ret;
+
+ /*--------------------------------------------------------------------*/
+ /* Read remaining contents of the slice header */
+ /*--------------------------------------------------------------------*/
+ /* dec_ref_pic_marking function */
+ /* G050 */
+ if(ps_slice->u1_nal_ref_idc != 0)
+ {
+ if(!ps_dec->ps_dpb_cmds->u1_dpb_commands_read)
+ ps_dec->u4_bitoffset = ih264d_read_mmco_commands(
+ ps_dec);
+ else
+ ps_dec->ps_bitstrm->u4_ofst += ps_dec->u4_bitoffset;
+ }
+ /* G050 */
+
+ /* Read slice_qp_delta */
+ i_temp = ps_pps->u1_pic_init_qp
+ + ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if((i_temp < 0) || (i_temp > 51))
+ return ERROR_INV_RANGE_QP_T;
+ ps_slice->u1_slice_qp = i_temp;
+ COPYTHECONTEXT("SH: slice_qp_delta",
+ ps_slice->u1_slice_qp - ps_pps->u1_pic_init_qp);
+
+ if(ps_pps->u1_deblocking_filter_parameters_present_flag == 1)
+ {
+ u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ COPYTHECONTEXT("SH: disable_deblocking_filter_idc", u4_temp);
+
+ if(u4_temp > SLICE_BOUNDARY_DBLK_DISABLED)
+ {
+ return ERROR_INV_SLICE_HDR_T;
+ }
+ ps_slice->u1_disable_dblk_filter_idc = u4_temp;
+ if(u4_temp != 1)
+ {
+ i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf)
+ << 1;
+ if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
+ {
+ return ERROR_INV_SLICE_HDR_T;
+ }
+ ps_slice->i1_slice_alpha_c0_offset = i_temp;
+ COPYTHECONTEXT("SH: slice_alpha_c0_offset_div2",
+ ps_slice->i1_slice_alpha_c0_offset >> 1);
+
+ i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf)
+ << 1;
+ if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
+ {
+ return ERROR_INV_SLICE_HDR_T;
+ }
+ ps_slice->i1_slice_beta_offset = i_temp;
+ COPYTHECONTEXT("SH: slice_beta_offset_div2",
+ ps_slice->i1_slice_beta_offset >> 1);
+
+ }
+ else
+ {
+ ps_slice->i1_slice_alpha_c0_offset = 0;
+ ps_slice->i1_slice_beta_offset = 0;
+ }
+ }
+ else
+ {
+ ps_slice->u1_disable_dblk_filter_idc = 0;
+ ps_slice->i1_slice_alpha_c0_offset = 0;
+ ps_slice->i1_slice_beta_offset = 0;
+ }
+
+ /* Initialization to check if number of motion vector per 2 Mbs */
+ /* are exceeding the range or not */
+ ps_dec->u2_mv_2mb[0] = 0;
+ ps_dec->u2_mv_2mb[1] = 0;
+
+
+ /*set slice header cone to 2 ,to indicate correct header*/
+ DATA_SYNC();
+ ps_dec->ps_parse_cur_slice->slice_header_done = 2;
+
+ if(ps_pps->u1_entropy_coding_mode)
+ {
+ SWITCHOFFTRACE; SWITCHONTRACECABAC;
+ if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
+ {
+ ps_dec->pf_get_mb_info = ih264d_get_mb_info_cabac_mbaff;
+ }
+ else
+ ps_dec->pf_get_mb_info = ih264d_get_mb_info_cabac_nonmbaff;
+
+ ret = ih264d_parse_islice_data_cabac(ps_dec, ps_slice,
+ u2_first_mb_in_slice);
+ if(ret != OK)
+ return ret;
+ SWITCHONTRACE; SWITCHOFFTRACECABAC;
+ if(ps_dec->ps_parse_cur_slice->u2_error_flag == 1)
+ return 0;
+
+ }
+ else
+ {
+ if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
+ {
+ ps_dec->pf_get_mb_info = ih264d_get_mb_info_cavlc_mbaff;
+ }
+ else
+ ps_dec->pf_get_mb_info = ih264d_get_mb_info_cavlc_nonmbaff;
+ ret = ih264d_parse_islice_data_cavlc(ps_dec, ps_slice,
+ u2_first_mb_in_slice);
+ if(ret != OK)
+ return ret;
+ }
+
+ return OK;
+}
diff --git a/decoder/ih264d_parse_islice.h b/decoder/ih264d_parse_islice.h
new file mode 100755
index 0000000..6a43d7b
--- /dev/null
+++ b/decoder/ih264d_parse_islice.h
@@ -0,0 +1,113 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+ **************************************************************************
+ * \file ih264d_parse_islice.h
+ *
+ * \brief
+ * Contains routines that decode a I slice type
+ *
+ * Detailed_description
+ *
+ * \date
+ * 07/07/2003
+ *
+ * \author NS
+ **************************************************************************
+ */
+
+#ifndef _IH264D_PARSE_ISLICE_H_
+#define _IH264D_PARSE_ISLICE_H_
+
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_tables.h"
+
+WORD32 ih264d_parse_residual4x4_cavlc(dec_struct_t * ps_dec,
+ dec_mb_info_t *ps_cur_mb_info,
+ UWORD8 u1_offset);
+WORD32 ih264d_parse_residual4x4_cabac(dec_struct_t * ps_dec,
+ dec_mb_info_t *ps_cur_mb_info,
+ UWORD8 u1_offset);
+WORD32 ih264d_parse_imb_cavlc(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num,
+ UWORD8 u1_mb_type);
+WORD32 ih264d_parse_imb_cabac(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_type);
+
+WORD32 ih264d_parse_islice_data_cavlc(dec_struct_t * ps_dec,
+ dec_slice_params_t * ps_slice,
+ UWORD16 u2_first_mb_in_slice);
+WORD32 ih264d_parse_islice_data_cabac(dec_struct_t * ps_dec,
+ dec_slice_params_t * ps_slice,
+ UWORD16 u2_first_mb_in_slice);
+WORD32 ih264d_parse_pmb_cavlc(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num,
+ UWORD8 u1_num_mbsNby2);
+WORD32 ih264d_parse_pmb_cabac(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num,
+ UWORD8 u1_num_mbsNby2);
+
+WORD32 ih264d_parse_bmb_non_direct_cavlc(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num,
+ UWORD8 u1_mbNumModNBy2);
+
+WORD32 ih264d_parse_bmb_non_direct_cabac(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num,
+ UWORD8 u1_mbNumModNBy2);
+
+WORD32 ih264d_parse_bmb_cavlc(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num,
+ UWORD8 u1_num_mbsNby2);
+
+WORD32 ih264d_parse_bmb_cabac(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num,
+ UWORD8 u1_num_mbsNby2);
+
+WORD32 ih264d_parse_inter_slice_data_cavlc(dec_struct_t * ps_dec,
+ dec_slice_params_t * ps_slice,
+ UWORD16 u2_first_mb_in_slice);
+
+WORD32 ih264d_parse_inter_slice_data_cabac(dec_struct_t * ps_dec,
+ dec_slice_params_t * ps_slice,
+ UWORD16 u2_first_mb_in_slice);
+
+WORD32 ParseBMb(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num,
+ UWORD8 u1_num_mbsNby2);
+
+WORD32 ih264d_parse_ipcm_mb(dec_struct_t * ps_dec,
+ dec_mb_info_t *ps_cur_mb_info,
+ UWORD8 u1_mbNum);
+WORD32 ih264d_parse_islice(dec_struct_t *ps_dec,
+ UWORD16 u2_first_mb_in_slice);
+
+#endif /* _IH264D_PARSE_ISLICE_H_ */
diff --git a/decoder/ih264d_parse_mb_header.c b/decoder/ih264d_parse_mb_header.c
new file mode 100755
index 0000000..f30ad67
--- /dev/null
+++ b/decoder/ih264d_parse_mb_header.c
@@ -0,0 +1,1397 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+ ***************************************************************************
+ * \file ih264d_parse_mb_header.c
+ *
+ * \brief
+ * This file contains context identifier encoding routines.
+ *
+ * \date
+ * 04/02/2003
+ *
+ * \author NS
+ ***************************************************************************
+ */
+#include <string.h>
+#include "ih264d_structs.h"
+#include "ih264d_bitstrm.h"
+#include "ih264d_cabac.h"
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_defs.h"
+#include "ih264d_error_handler.h"
+#include "ih264d_tables.h"
+#include "ih264d_debug.h"
+#include "ih264d_defs.h"
+#include "ih264d_defs.h"
+#include "ih264d_mb_utils.h"
+#include "ih264d_parse_mb_header.h"
+#include "ih264d_defs.h"
+
+/*! < CtxtInc index 0 - CtxMbTypeI, CtxMbTypeSISuffix
+ index 1 - CtxMbTypePSuffix, CtxMbTypeBSuffix
+ */
+
+
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_parse_mb_type_intra_cabac \endif
+ *
+ * \brief
+ * This function decodes MB type using CABAC entropy coding mode.
+ *
+ * \return
+ * MBType.
+ *
+ **************************************************************************
+ */
+UWORD8 ih264d_parse_mb_type_intra_cabac(UWORD8 u1_inter,
+ struct _DecStruct * ps_dec)
+{
+ decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env;
+ dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
+ ctxt_inc_mb_info_t * ps_left_ctxt = ps_dec->p_left_ctxt_mb_info;
+ ctxt_inc_mb_info_t * ps_top_ctxt = ps_dec->p_top_ctxt_mb_info;
+ bin_ctxt_model_t *ps_mb_bin_ctxt = ps_dec->p_mb_type_t;
+ WORD8 u1_mb_type, u1_bin;
+ UWORD32 u4_cxt_inc;
+
+ u4_cxt_inc = 0;
+ if(!u1_inter)
+ {
+ if(ps_left_ctxt != ps_dec->ps_def_ctxt_mb_info)
+ u4_cxt_inc += ((ps_left_ctxt->u1_mb_type != CAB_I4x4) ? 1 : 0);
+ if(ps_top_ctxt != ps_dec->ps_def_ctxt_mb_info)
+ u4_cxt_inc += ((ps_top_ctxt->u1_mb_type != CAB_I4x4) ? 1 : 0);
+ }
+ else
+ {
+ ps_mb_bin_ctxt = ps_mb_bin_ctxt + 3 + (ps_dec->u1_B << 1);
+ }
+
+ /* b0 */
+ u1_mb_type = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt, ps_bitstrm,
+ ps_cab_env);
+ if(u1_mb_type)
+ {
+ /* I16x16 or I_PCM mode */
+ /* b1 */
+ u1_bin = ih264d_decode_terminate(ps_cab_env, ps_bitstrm);
+ if(u1_bin == 0)
+ {
+ /* I16x16 mode */
+ /* Read b2 and b3 */
+ u4_cxt_inc = (u1_inter) ? 0x021 : 0x043;
+
+ u1_bin = ih264d_decode_bins(2, u4_cxt_inc, ps_mb_bin_ctxt, ps_bitstrm,
+ ps_cab_env);
+
+ if(u1_bin & 0x01)
+ u1_mb_type += 4;
+
+ if(u1_bin & 0x02)
+ u1_mb_type += 12;
+
+ if(u1_bin & 0x01)
+ {
+ /* since b3=1, Read three bins */
+ u4_cxt_inc = (u1_inter) ? 0x0332 : 0x0765;
+ u1_bin = (UWORD8)ih264d_decode_bins(3, u4_cxt_inc, ps_mb_bin_ctxt,
+ ps_bitstrm, ps_cab_env);
+
+ }
+ else
+ {
+ /* Read two bins */
+ u4_cxt_inc = (u1_inter) ? 0x033 : 0x076;
+ u1_bin = (UWORD8)ih264d_decode_bins(2, u4_cxt_inc, ps_mb_bin_ctxt,
+ ps_bitstrm, ps_cab_env);
+ }
+ u1_mb_type += u1_bin;
+ }
+ else
+ {
+ /* I_PCM mode */
+ /* b1=1 */
+ u1_mb_type = 25;
+ }
+ }
+ return (u1_mb_type);
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_parse_mb_type_cabac \endif
+ *
+ * \brief
+ * This function decodes MB type using CABAC entropy coding mode.
+ *
+ * \return
+ * MBType.
+ *
+ **************************************************************************
+ */
+UWORD32 ih264d_parse_mb_type_cabac(struct _DecStruct * ps_dec)
+{
+ const UWORD8 uc_slice_type = ps_dec->ps_cur_slice->u1_slice_type;
+ decoding_envirnoment_t *ps_cab_env = &ps_dec->s_cab_dec_env;
+ dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
+ ctxt_inc_mb_info_t *ps_left_ctxt = ps_dec->p_left_ctxt_mb_info;
+ ctxt_inc_mb_info_t *ps_top_ctxt = ps_dec->p_top_ctxt_mb_info;
+ WORD8 c_ctxt_inc;
+ bin_ctxt_model_t *ps_mb_bin_ctxt = ps_dec->p_mb_type_t;
+ WORD8 u1_mb_type = 0, u1_bin;
+ UWORD32 u4_cxt_inc;
+
+ INC_SYM_COUNT(ps_cab_env);
+
+ c_ctxt_inc = 0;
+
+ if(uc_slice_type == SI_SLICE)
+ {
+ /* b0 */
+ if(ps_left_ctxt != ps_dec->ps_def_ctxt_mb_info)
+ c_ctxt_inc += ((ps_left_ctxt->u1_mb_type != CAB_SI4x4) ? 1 : 0);
+ if(ps_top_ctxt != ps_dec->ps_def_ctxt_mb_info)
+ c_ctxt_inc += ((ps_top_ctxt->u1_mb_type != CAB_SI4x4) ? 1 : 0);
+
+ u4_cxt_inc = c_ctxt_inc;
+ u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt, ps_bitstrm,
+ ps_cab_env);
+ if(u1_bin == 0)
+ {
+ /* SI MB */
+ u1_mb_type = 0;
+ }
+ else
+ {
+ u1_mb_type = 1 + ih264d_parse_mb_type_intra_cabac(0, ps_dec);
+ }
+ }
+ else if(uc_slice_type == P_SLICE)
+ {
+ /* P Slice */
+ /* b0 */
+ u4_cxt_inc = 0;
+ u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt, ps_bitstrm,
+ ps_cab_env);
+ if(!u1_bin)
+ {
+ /* Inter MB types */
+ /* b1 */
+ u4_cxt_inc = 0x01;
+ u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt,
+ ps_bitstrm, ps_cab_env);
+ /* b2 */
+ u4_cxt_inc = u1_bin + 2;
+ u1_mb_type = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt,
+ ps_bitstrm, ps_cab_env);
+ u1_mb_type = (u1_bin << 1) + u1_mb_type;
+ if(u1_mb_type)
+ u1_mb_type = 4 - u1_mb_type;
+ }
+ else
+ {
+ /* Intra Prefix 1 found */
+ /* Intra MB type */
+ u1_mb_type = 5 + ih264d_parse_mb_type_intra_cabac(1, ps_dec);
+ }
+ }
+ else if(uc_slice_type == B_SLICE)
+ {
+ WORD8 a, b;
+ /* B Slice */
+ /* b0 */
+ /* a = b = 0, if B slice and MB is a SKIP or B_DIRECT16x16 */
+ a = 0;
+ b = 0;
+ u1_mb_type = 0;
+ if(ps_left_ctxt != ps_dec->ps_def_ctxt_mb_info)
+ a = ((ps_left_ctxt->u1_mb_type & CAB_BD16x16_MASK) != CAB_BD16x16);
+ if(ps_top_ctxt != ps_dec->ps_def_ctxt_mb_info)
+ b = ((ps_top_ctxt->u1_mb_type & CAB_BD16x16_MASK) != CAB_BD16x16);
+
+ u4_cxt_inc = a + b;
+
+ u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt, ps_bitstrm,
+ ps_cab_env);
+
+ if(u1_bin)
+ {
+
+ /* b1 */
+ u4_cxt_inc = 0x03;
+ u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt,
+ ps_bitstrm, ps_cab_env);
+
+ if(!u1_bin)
+ {
+ /* b2 */
+ u4_cxt_inc = 0x05;
+ u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt,
+ ps_bitstrm, ps_cab_env);
+
+ u1_mb_type = u1_bin + 1;
+ }
+ else
+ {
+ u1_mb_type = 3;
+ /* b2 */
+ u4_cxt_inc = 0x04;
+ u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt,
+ ps_bitstrm, ps_cab_env);
+
+ if(u1_bin)
+ {
+ u1_mb_type += 8;
+ /* b3 */
+ u4_cxt_inc = 0x05;
+ u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc, ps_mb_bin_ctxt,
+ ps_bitstrm, ps_cab_env);
+
+ if(!u1_bin)
+ {
+ u1_mb_type++;
+ /* b4, b5, b6 */
+ u4_cxt_inc = 0x0555;
+ u1_bin = (UWORD8)ih264d_decode_bins(3, u4_cxt_inc,
+ ps_mb_bin_ctxt,
+ ps_bitstrm,
+ ps_cab_env);
+
+
+
+ u1_mb_type += u1_bin;
+ }
+ else
+ {
+ /* b4 */
+ u4_cxt_inc = 0x05;
+ u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc,
+ ps_mb_bin_ctxt,
+ ps_bitstrm,
+ ps_cab_env);
+
+ if(u1_bin)
+ {
+ /* b5 */
+ u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc,
+ ps_mb_bin_ctxt,
+ ps_bitstrm,
+ ps_cab_env);
+
+ u1_mb_type += (u1_bin ? 11 : 0);
+ }
+ else
+ {
+ u1_mb_type = 20;
+ /* b5 */
+ u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc,
+ ps_mb_bin_ctxt,
+ ps_bitstrm,
+ ps_cab_env);
+
+ if(!u1_bin)
+ {
+ /* b6 */
+ u1_bin = (UWORD8)ih264d_decode_bin(u4_cxt_inc,
+ ps_mb_bin_ctxt,
+ ps_bitstrm,
+ ps_cab_env);
+
+ u1_mb_type += u1_bin;
+ }
+ else
+ {
+ /* Intra Prefix 111101 found */
+ /* Intra MB type */
+ u1_mb_type =
+ 23
+ + ih264d_parse_mb_type_intra_cabac(
+ 1,
+ ps_dec);
+ }
+ }
+ }
+ }
+ else
+ {
+ /* b3, b4, b5 */
+ u4_cxt_inc = 0x0555;
+ u1_bin = (UWORD8)ih264d_decode_bins(3, u4_cxt_inc,
+ ps_mb_bin_ctxt, ps_bitstrm,
+ ps_cab_env);
+
+
+
+
+ u1_mb_type += u1_bin;
+ }
+ }
+ }
+ }
+ return ((UWORD32)u1_mb_type);
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : DecSubMBType \endif
+ *
+ * \brief
+ * This function decodes MB type using CABAC entropy coding mode.
+ *
+ * \return
+ * MBType.
+ *
+ **************************************************************************
+ */
+UWORD32 ih264d_parse_submb_type_cabac(const UWORD8 u1_slc_type_b,
+ decoding_envirnoment_t * ps_cab_env,
+ dec_bit_stream_t * ps_bitstrm,
+ bin_ctxt_model_t * ps_sub_mb_cxt)
+{
+ WORD8 u1_sub_mb_type, u1_bin;
+
+ INC_SYM_COUNT(ps_cab_env);
+
+ u1_sub_mb_type = 0;
+ u1_bin = (UWORD8)ih264d_decode_bin(0, ps_sub_mb_cxt, ps_bitstrm,
+ ps_cab_env);
+
+ if(u1_slc_type_b ^ u1_bin)
+ return 0;
+
+ if(!u1_slc_type_b)
+ {
+ /* P Slice */
+ u1_sub_mb_type = 1;
+ u1_bin = (UWORD8)ih264d_decode_bin(1, ps_sub_mb_cxt, ps_bitstrm,
+ ps_cab_env);
+ if(u1_bin == 1)
+ {
+ u1_bin = (UWORD8)ih264d_decode_bin(2, ps_sub_mb_cxt, ps_bitstrm,
+ ps_cab_env);
+ u1_sub_mb_type = (2 + (!u1_bin));
+ }
+
+ return u1_sub_mb_type;
+ }
+ else
+ {
+ /* B Slice */
+
+ /* b1 */
+ u1_bin = (UWORD8)ih264d_decode_bin(1, ps_sub_mb_cxt, ps_bitstrm,
+ ps_cab_env);
+ if(u1_bin)
+ {
+ /* b2 */
+ u1_bin = (UWORD8)ih264d_decode_bin(2, ps_sub_mb_cxt, ps_bitstrm,
+ ps_cab_env);
+ if(u1_bin)
+ {
+ /* b3 */
+ u1_sub_mb_type = 7;
+ u1_bin = (UWORD8)ih264d_decode_bin(3, ps_sub_mb_cxt, ps_bitstrm,
+ ps_cab_env);
+ u1_sub_mb_type += u1_bin << 2;
+ u1_bin = !u1_bin;
+ /* b4 */
+ if(u1_bin == 0)
+ {
+ u1_bin = ih264d_decode_bin(3, ps_sub_mb_cxt, ps_bitstrm,
+ ps_cab_env);
+ }
+ else
+ {
+ u1_bin = (UWORD8)ih264d_decode_bins(2, 0x33, ps_sub_mb_cxt,
+ ps_bitstrm, ps_cab_env);
+ }
+
+ return (u1_sub_mb_type + u1_bin);
+ }
+ else
+ {
+ /* b3 */
+ u1_bin = (UWORD8)ih264d_decode_bins(2, 0x33, ps_sub_mb_cxt,
+ ps_bitstrm, ps_cab_env);
+ return (3 + u1_bin);
+ }
+ }
+ else
+ {
+ /* b2 */
+ u1_bin = (UWORD8)ih264d_decode_bin(3, ps_sub_mb_cxt, ps_bitstrm,
+ ps_cab_env);
+ return (1 + u1_bin);
+ }
+ }
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_parse_ref_idx_cabac \endif
+ *
+ * \brief
+ * This function decodes Reference Index using CABAC entropy coding mode.
+ *
+ * \return
+ * None
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_parse_ref_idx_cabac(const UWORD8 u1_num_part,
+ const UWORD8 u1_b2,
+ const UWORD8 u1_max_ref_minus1,
+ const UWORD8 u1_mb_mode,
+ WORD8 * pi1_ref_idx,
+ WORD8 * const pi1_lft_cxt,
+ WORD8 * const pi1_top_cxt,
+ decoding_envirnoment_t * const ps_cab_env,
+ dec_bit_stream_t * const ps_bitstrm,
+ bin_ctxt_model_t * const ps_ref_cxt)
+{
+ UWORD8 u1_a, u1_b;
+ UWORD32 u4_cxt_inc;
+ UWORD8 u1_blk_no, u1_i, u1_idx_lft, u1_idx_top;
+ WORD8 i1_ref_idx;
+
+ for(u1_blk_no = 0, u1_i = 0; u1_i < u1_num_part; u1_i++, pi1_ref_idx++)
+ {
+ u1_idx_lft = ((u1_blk_no & 0x02) >> 1) + u1_b2;
+ u1_idx_top = (u1_blk_no & 0x01) + u1_b2;
+ i1_ref_idx = *pi1_ref_idx;
+
+ if(i1_ref_idx > 0)
+ {
+ u1_a = pi1_lft_cxt[u1_idx_lft] > 0;
+ u1_b = pi1_top_cxt[u1_idx_top] > 0;
+
+ u4_cxt_inc = u1_a + (u1_b << 1);
+ u4_cxt_inc = (u4_cxt_inc | 0x55540);
+
+ i1_ref_idx = (WORD8)ih264d_decode_bins_unary(32, u4_cxt_inc,
+ ps_ref_cxt, ps_bitstrm,
+ ps_cab_env);
+
+ if((i1_ref_idx > u1_max_ref_minus1) || (i1_ref_idx < 0))
+ {
+ return ERROR_REF_IDX;
+ }
+
+ *pi1_ref_idx = i1_ref_idx;
+
+ INC_SYM_COUNT(ps_cab_env);
+
+ }
+
+ /* Storing Reference Idx Information */
+ pi1_lft_cxt[u1_idx_lft] = i1_ref_idx;
+ pi1_top_cxt[u1_idx_top] = i1_ref_idx;
+ u1_blk_no = u1_blk_no + 1 + (u1_mb_mode & 0x01);
+ }
+ /* if(!u1_sub_mb) */
+ if(u1_num_part != 4)
+ {
+ pi1_lft_cxt[(!(u1_mb_mode & 0x1)) + u1_b2] = pi1_lft_cxt[u1_b2];
+ pi1_top_cxt[(!(u1_mb_mode & 0x2)) + u1_b2] = pi1_top_cxt[u1_b2];
+ }
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_parse_mb_qp_delta_cabac \endif
+ *
+ * \brief
+ * This function decodes MB Qp delta using CABAC entropy coding mode.
+ *
+ * \return
+ * None
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_parse_mb_qp_delta_cabac(struct _DecStruct * ps_dec,
+ WORD8 *pi1_mb_qp_delta)
+{
+ decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env;
+ dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
+
+ UWORD8 u1_code_num;
+ bin_ctxt_model_t *ps_mb_qp_delta_ctxt = ps_dec->p_mb_qp_delta_t;
+ UWORD32 u4_cxt_inc;
+
+ INC_SYM_COUNT(ps_cab_env);
+
+ u4_cxt_inc = (!(!(ps_dec->i1_prev_mb_qp_delta)));
+
+ u1_code_num = 0;
+ u4_cxt_inc = (u4_cxt_inc | 0x33320);
+ /* max number of bins = 53,
+ since Range for MbQpDelta= -26 to +25 inclusive, UNARY code */
+ u1_code_num = ih264d_decode_bins_unary(32, u4_cxt_inc, ps_mb_qp_delta_ctxt,
+ ps_bitstrm, ps_cab_env);
+ if(u1_code_num == 32)
+ {
+ /* Read remaining 21 bins */
+ UWORD8 uc_codeNumX;
+ u4_cxt_inc = 0x33333;
+ uc_codeNumX = ih264d_decode_bins_unary(21, u4_cxt_inc, ps_mb_qp_delta_ctxt,
+ ps_bitstrm, ps_cab_env);
+ u1_code_num = u1_code_num + uc_codeNumX;
+ }
+
+ *pi1_mb_qp_delta = (u1_code_num + 1) >> 1;
+ /* Table 9.3: If code_num is even Syntax Element has -ve value */
+ if(!(u1_code_num & 0x01))
+ *pi1_mb_qp_delta = -(*pi1_mb_qp_delta);
+
+ /* Range of MbQpDelta= -26 to +25 inclusive */
+ if((*pi1_mb_qp_delta < -26) || (*pi1_mb_qp_delta > 25))
+ return ERROR_INV_RANGE_QP_T;
+ ps_dec->i1_prev_mb_qp_delta = *pi1_mb_qp_delta;
+ return OK;
+}
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_parse_chroma_pred_mode_cabac \endif
+ *
+ * \brief
+ * This function decodes Chroma Pred mode using CABAC entropy coding mode.
+ *
+ * \return
+ * None
+ *
+ **************************************************************************
+ */
+WORD8 ih264d_parse_chroma_pred_mode_cabac(struct _DecStruct * ps_dec)
+{
+ decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env;
+ dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
+ ctxt_inc_mb_info_t * ps_left_ctxt = ps_dec->p_left_ctxt_mb_info;
+ ctxt_inc_mb_info_t * ps_top_ctxt = ps_dec->p_top_ctxt_mb_info;
+ WORD8 i1_chroma_pred_mode, a, b;
+ UWORD32 u4_cxt_inc;
+
+ INC_SYM_COUNT(ps_cab_env);
+
+ /* Binarization is TU and Cmax=3 */
+ i1_chroma_pred_mode = 0;
+ a = 0;
+ b = 0;
+
+ a = ((ps_left_ctxt->u1_intra_chroma_pred_mode != 0) ? 1 : 0);
+
+ b = ((ps_top_ctxt->u1_intra_chroma_pred_mode != 0) ? 1 : 0);
+ u4_cxt_inc = a + b;
+
+ u4_cxt_inc = (u4_cxt_inc | 0x330);
+
+ i1_chroma_pred_mode = ih264d_decode_bins_tunary(
+ 3, u4_cxt_inc, ps_dec->p_intra_chroma_pred_mode_t,
+ ps_bitstrm, ps_cab_env);
+
+ return (i1_chroma_pred_mode);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_transform8x8flag_cabac */
+/* */
+/* Description : */
+/* Inputs : */
+/* */
+/* */
+/* Returns : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* Rajasekhar Creation */
+/* */
+/*****************************************************************************/
+UWORD8 ih264d_parse_transform8x8flag_cabac(struct _DecStruct * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info)
+{
+ decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env;
+ dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
+ ctxt_inc_mb_info_t * ps_left_ctxt = ps_dec->p_left_ctxt_mb_info;
+ ctxt_inc_mb_info_t * ps_top_ctxt = ps_dec->p_top_ctxt_mb_info;
+ UWORD8 u1_transform_8x8flag;
+ UWORD8 u1_mb_ngbr_avail = ps_cur_mb_info->u1_mb_ngbr_availablity;
+
+ WORD8 a, b;
+ UWORD32 u4_cxt_inc;
+
+ /* for calculating the context increment for transform8x8 u4_flag */
+ /* it reads transform8x8 u4_flag of the neighbors through */
+
+ /* Binarization is FLC */
+ a = 0;
+ b = 0;
+
+ if(u1_mb_ngbr_avail & LEFT_MB_AVAILABLE_MASK)
+ {
+ a = ps_left_ctxt->u1_transform8x8_ctxt;
+ }
+ if(u1_mb_ngbr_avail & TOP_MB_AVAILABLE_MASK)
+ {
+ b = ps_top_ctxt->u1_transform8x8_ctxt;
+
+ }
+
+ u4_cxt_inc = a + b;
+
+ u1_transform_8x8flag = ih264d_decode_bin(
+ u4_cxt_inc, ps_dec->s_high_profile.ps_transform8x8_flag,
+ ps_bitstrm, ps_cab_env);
+
+ return (u1_transform_8x8flag);
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_read_intra_pred_modes_cabac \endif
+ *
+ * \brief
+ * Reads the intra pred mode related values of I4x4 MB from bitstream.
+ *
+ * This function will read the prev intra pred mode flags and
+ * stores it in pu1_prev_intra4x4_pred_mode_flag. If the u4_flag
+ * indicates that most probable mode is not intra pred mode, then
+ * the rem_intra4x4_pred_mode is read and stored in
+ * pu1_rem_intra4x4_pred_mode array.
+ *
+ *
+ * \return
+ * 0 on success and Error code otherwise
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_read_intra_pred_modes_cabac(dec_struct_t * ps_dec,
+ UWORD8 * pu1_prev_intra4x4_pred_mode_flag,
+ UWORD8 * pu1_rem_intra4x4_pred_mode,
+ UWORD8 u1_tran_form8x8)
+{
+ WORD32 i4x4_luma_blk_idx = 0;
+ dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
+ decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env;
+ bin_ctxt_model_t *ps_ctxt_ipred_luma_mpm, *ps_ctx_ipred_luma_rm;
+ WORD32 i4_rem_intra4x4_pred_mode;
+ UWORD32 u4_prev_intra4x4_pred_mode_flag;
+ UWORD32 u4_code_int_range, u4_code_int_val_ofst;
+ const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table;
+
+ ps_ctxt_ipred_luma_mpm = ps_dec->p_prev_intra4x4_pred_mode_flag_t;
+ ps_ctx_ipred_luma_rm = ps_dec->p_rem_intra4x4_pred_mode_t;
+ SWITCHOFFTRACE;
+
+ i4x4_luma_blk_idx = (0 == u1_tran_form8x8) ? 16 : 4;
+
+ u4_code_int_range = ps_cab_env->u4_code_int_range;
+ u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
+
+ do
+ {
+
+ DECODE_ONE_BIN_MACRO(ps_ctxt_ipred_luma_mpm, u4_code_int_range,
+ u4_code_int_val_ofst, pu4_table, ps_bitstrm,
+ u4_prev_intra4x4_pred_mode_flag)
+ *pu1_prev_intra4x4_pred_mode_flag = u4_prev_intra4x4_pred_mode_flag;
+
+ i4_rem_intra4x4_pred_mode = -1;
+ if(!u4_prev_intra4x4_pred_mode_flag)
+ {
+
+ /*inlining DecodeDecisionBins_FLC*/
+
+ {
+
+ UWORD8 u1_max_bins = 3;
+ UWORD32 u4_value;
+ UWORD32 u4_symbol, i;
+
+ i = 0;
+ u4_value = 0;
+
+ do
+ {
+
+ DECODE_ONE_BIN_MACRO(ps_ctx_ipred_luma_rm, u4_code_int_range,
+ u4_code_int_val_ofst, pu4_table,
+ ps_bitstrm, u4_symbol)
+
+ INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env);
+
+ u4_value = u4_value | (u4_symbol << i);
+
+ i++;
+ }
+ while(i < u1_max_bins);
+
+ i4_rem_intra4x4_pred_mode = (u4_value);
+
+ }
+
+ }
+
+ (*pu1_rem_intra4x4_pred_mode) = i4_rem_intra4x4_pred_mode;
+
+ COPYTHECONTEXT("intra4x4_pred_mode", i4_rem_intra4x4_pred_mode);
+
+ pu1_prev_intra4x4_pred_mode_flag++;
+ pu1_rem_intra4x4_pred_mode++;
+
+ i4x4_luma_blk_idx--;
+ }
+ while(i4x4_luma_blk_idx);
+
+ ps_cab_env->u4_code_int_range = u4_code_int_range;
+ ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
+
+ return (0);
+
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_parse_ctx_cbp_cabac \endif
+ *
+ * \brief
+ * This function decodes CtxCbpLuma and CtxCbpChroma (CBP of a Macroblock).
+ * using CABAC entropy coding mode.
+ *
+ * \return
+ * CBP of a MB.
+ *
+ **************************************************************************
+ */
+UWORD32 ih264d_parse_ctx_cbp_cabac(struct _DecStruct * ps_dec)
+{
+
+ UWORD32 u4_cxt_inc;
+ decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env;
+ dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
+ ctxt_inc_mb_info_t * ps_left_ctxt = ps_dec->p_left_ctxt_mb_info;
+ ctxt_inc_mb_info_t * ps_top_ctxt = ps_dec->p_top_ctxt_mb_info;
+ bin_ctxt_model_t *ps_ctxt_cbp_luma = ps_dec->p_cbp_luma_t, *ps_bin_ctxt;
+ WORD8 c_Cbp; //,i,j;
+ UWORD32 u4_code_int_range, u4_code_int_val_ofst;
+ UWORD32 u4_offset, *pu4_buffer;
+ const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table;
+
+ INC_SYM_COUNT(ps_cab_env);
+
+
+
+ /* CBP Luma, FL, Cmax = 15, L = 4 */
+ u4_cxt_inc = (!((ps_top_ctxt->u1_cbp >> 2) & 0x01)) << 1;
+ u4_cxt_inc += !((ps_left_ctxt->u1_cbp >> 1) & 0x01);
+
+ u4_offset = ps_bitstrm->u4_ofst;
+ pu4_buffer = ps_bitstrm->pu4_buffer;
+
+ u4_code_int_range = ps_cab_env->u4_code_int_range;
+ u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
+ /*renormalize to ensure there 23 bits more in the u4_code_int_val_ofst*/
+ {
+ UWORD32 u4_clz, read_bits;
+
+ u4_clz = CLZ(u4_code_int_range);
+ FLUSHBITS(u4_offset, u4_clz)
+ NEXTBITS(read_bits, u4_offset, pu4_buffer, 23)
+ u4_code_int_range = u4_code_int_range << u4_clz;
+ u4_code_int_val_ofst = (u4_code_int_val_ofst << u4_clz) | read_bits;
+ }
+
+ ps_bin_ctxt = ps_ctxt_cbp_luma + u4_cxt_inc;
+
+ /*inlining DecodeDecision_onebin without renorm*/
+ {
+
+ UWORD32 u4_qnt_int_range, u4_int_range_lps;
+ UWORD32 u4_symbol, u1_mps_state;
+ UWORD32 table_lookup;
+ UWORD32 u4_clz;
+
+ u1_mps_state = (ps_bin_ctxt->u1_mps_state);
+
+ u4_clz = CLZ(u4_code_int_range);
+ u4_qnt_int_range = u4_code_int_range << u4_clz;
+ u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3;
+
+ table_lookup = pu4_table[(u1_mps_state << 2) + u4_qnt_int_range];
+ u4_int_range_lps = table_lookup & 0xff;
+
+ u4_int_range_lps = u4_int_range_lps << (23 - u4_clz);
+ u4_code_int_range = u4_code_int_range - u4_int_range_lps;
+
+ u4_symbol = ((u1_mps_state >> 6) & 0x1);
+
+ /*if mps*/
+ u1_mps_state = (table_lookup >> 8) & 0x7F;
+
+ CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, u4_symbol,
+ u4_int_range_lps, u1_mps_state, table_lookup)
+
+ INC_BIN_COUNT(ps_cab_env);
+
+ ps_bin_ctxt->u1_mps_state = u1_mps_state;
+
+ c_Cbp = u4_symbol;
+
+ }
+
+ u4_cxt_inc = (!((ps_top_ctxt->u1_cbp >> 3) & 0x01)) << 1;
+ u4_cxt_inc += !(c_Cbp & 0x01);
+ ps_bin_ctxt = ps_ctxt_cbp_luma + u4_cxt_inc;
+ /*inlining DecodeDecision_onebin without renorm*/
+
+ {
+
+ UWORD32 u4_qnt_int_range, u4_int_range_lps;
+ UWORD32 u4_symbol, u1_mps_state;
+ UWORD32 table_lookup;
+ UWORD32 u4_clz;
+
+ u1_mps_state = (ps_bin_ctxt->u1_mps_state);
+
+ u4_clz = CLZ(u4_code_int_range);
+ u4_qnt_int_range = u4_code_int_range << u4_clz;
+ u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3;
+
+ table_lookup = pu4_table[(u1_mps_state << 2) + u4_qnt_int_range];
+ u4_int_range_lps = table_lookup & 0xff;
+
+ u4_int_range_lps = u4_int_range_lps << (23 - u4_clz);
+ u4_code_int_range = u4_code_int_range - u4_int_range_lps;
+
+ u4_symbol = ((u1_mps_state >> 6) & 0x1);
+
+ /*if mps*/
+ u1_mps_state = (table_lookup >> 8) & 0x7F;
+
+ CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, u4_symbol,
+ u4_int_range_lps, u1_mps_state, table_lookup)
+
+ INC_BIN_COUNT(ps_cab_env);
+
+ ps_bin_ctxt->u1_mps_state = u1_mps_state;
+
+ c_Cbp |= u4_symbol << 1;
+
+ }
+
+ u4_cxt_inc = (!(c_Cbp & 0x01)) << 1;
+ u4_cxt_inc += !((ps_left_ctxt->u1_cbp >> 3) & 0x01);
+ ps_bin_ctxt = ps_ctxt_cbp_luma + u4_cxt_inc;
+ /*inlining DecodeDecision_onebin without renorm*/
+
+ {
+
+ UWORD32 u4_qnt_int_range, u4_int_range_lps;
+ UWORD32 u4_symbol, u1_mps_state;
+ UWORD32 table_lookup;
+ UWORD32 u4_clz;
+
+ u1_mps_state = (ps_bin_ctxt->u1_mps_state);
+
+ u4_clz = CLZ(u4_code_int_range);
+ u4_qnt_int_range = u4_code_int_range << u4_clz;
+ u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3;
+
+ table_lookup = pu4_table[(u1_mps_state << 2) + u4_qnt_int_range];
+ u4_int_range_lps = table_lookup & 0xff;
+
+ u4_int_range_lps = u4_int_range_lps << (23 - u4_clz);
+ u4_code_int_range = u4_code_int_range - u4_int_range_lps;
+
+ u4_symbol = ((u1_mps_state >> 6) & 0x1);
+
+ /*if mps*/
+ u1_mps_state = (table_lookup >> 8) & 0x7F;
+
+ CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, u4_symbol,
+ u4_int_range_lps, u1_mps_state, table_lookup)
+
+ INC_BIN_COUNT(ps_cab_env);
+
+ ps_bin_ctxt->u1_mps_state = u1_mps_state;
+
+ c_Cbp |= u4_symbol << 2;
+
+ }
+
+ u4_cxt_inc = (!((c_Cbp >> 1) & 0x01)) << 1;
+ u4_cxt_inc += !((c_Cbp >> 2) & 0x01);
+ ps_bin_ctxt = ps_ctxt_cbp_luma + u4_cxt_inc;
+ /*inlining DecodeDecision_onebin without renorm*/
+
+ {
+
+ UWORD32 u4_qnt_int_range, u4_int_range_lps;
+ UWORD32 u4_symbol, u1_mps_state;
+ UWORD32 table_lookup;
+ UWORD32 u4_clz;
+
+ u1_mps_state = (ps_bin_ctxt->u1_mps_state);
+
+ u4_clz = CLZ(u4_code_int_range);
+ u4_qnt_int_range = u4_code_int_range << u4_clz;
+ u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3;
+
+ table_lookup = pu4_table[(u1_mps_state << 2) + u4_qnt_int_range];
+ u4_int_range_lps = table_lookup & 0xff;
+
+ u4_int_range_lps = u4_int_range_lps << (23 - u4_clz);
+ u4_code_int_range = u4_code_int_range - u4_int_range_lps;
+
+ u4_symbol = ((u1_mps_state >> 6) & 0x1);
+
+ /*if mps*/
+ u1_mps_state = (table_lookup >> 8) & 0x7F;
+
+ CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst, u4_symbol,
+ u4_int_range_lps, u1_mps_state, table_lookup)
+
+ INC_BIN_COUNT(ps_cab_env);
+
+ ps_bin_ctxt->u1_mps_state = u1_mps_state;
+
+ c_Cbp |= u4_symbol << 3;
+
+ }
+
+ if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8)
+ {
+
+ RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst, u4_offset,
+ pu4_buffer)
+
+ }
+
+ {
+ UWORD32 u4_cxt_inc;
+ WORD8 a, b, c, d;
+ bin_ctxt_model_t *p_CtxtCbpChroma = ps_dec->p_cbp_chroma_t;
+
+ /* CBP Chroma, TU, Cmax = 2 */
+ a = 0;
+ b = 0;
+ c = 0;
+ d = 0;
+
+ {
+ a = (ps_top_ctxt->u1_cbp > 15) ? 2 : 0;
+ c = (ps_top_ctxt->u1_cbp > 31) ? 2 : 0;
+ }
+
+ {
+ b = (ps_left_ctxt->u1_cbp > 15) ? 1 : 0;
+ d = (ps_left_ctxt->u1_cbp > 31) ? 1 : 0;
+ }
+ u4_cxt_inc = a + b;
+ u4_cxt_inc = (u4_cxt_inc | ((4 + c + d) << 4));
+
+ /*inlining ih264d_decode_bins_tunary */
+
+ {
+
+ UWORD8 u1_max_bins = 2;
+ UWORD32 u4_ctx_inc = u4_cxt_inc;
+
+ UWORD32 u4_value;
+ UWORD32 u4_symbol;
+ UWORD8 u4_ctx_Inc;
+ bin_ctxt_model_t *ps_bin_ctxt;
+ u4_value = 0;
+
+ do
+ {
+ u4_ctx_Inc = u4_ctx_inc & 0xF;
+ u4_ctx_inc = u4_ctx_inc >> 4;
+
+ ps_bin_ctxt = p_CtxtCbpChroma + u4_ctx_Inc;
+ /*inlining DecodeDecision_onebin*/
+ {
+
+ UWORD32 u4_qnt_int_range, u4_int_range_lps;
+
+ UWORD32 u1_mps_state;
+ UWORD32 table_lookup;
+ UWORD32 u4_clz;
+
+ u1_mps_state = (ps_bin_ctxt->u1_mps_state);
+
+ u4_clz = CLZ(u4_code_int_range);
+ u4_qnt_int_range = u4_code_int_range << u4_clz;
+ u4_qnt_int_range = (u4_qnt_int_range >> 29) & 0x3;
+
+ table_lookup = pu4_table[(u1_mps_state << 2)
+ + u4_qnt_int_range];
+ u4_int_range_lps = table_lookup & 0xff;
+
+ u4_int_range_lps = u4_int_range_lps << (23 - u4_clz);
+ u4_code_int_range = u4_code_int_range - u4_int_range_lps;
+
+ u4_symbol = ((u1_mps_state >> 6) & 0x1);
+
+ /*if mps*/
+ u1_mps_state = (table_lookup >> 8) & 0x7F;
+
+ CHECK_IF_LPS(u4_code_int_range, u4_code_int_val_ofst,
+ u4_symbol, u4_int_range_lps, u1_mps_state,
+ table_lookup)
+
+ if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_8)
+ {
+ RENORM_RANGE_OFFSET(u4_code_int_range,
+ u4_code_int_val_ofst, u4_offset,
+ pu4_buffer)
+ }
+ ps_bin_ctxt->u1_mps_state = u1_mps_state;
+ }
+
+ INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(
+ ps_cab_env);
+
+ u4_value++;
+ }
+ while((u4_value < u1_max_bins) & (u4_symbol));
+
+ u4_value = u4_value - 1 + u4_symbol;
+
+ a = (u4_value);
+
+ }
+
+c_Cbp = (c_Cbp | (a << 4));
+}
+
+ps_bitstrm->u4_ofst = u4_offset;
+
+ps_cab_env->u4_code_int_range = u4_code_int_range;
+ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
+
+return (c_Cbp);
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_get_mvd_cabac \endif
+ *
+ * \brief
+ * This function decodes Horz and Vert mvd_l0 and mvd_l1 using CABAC entropy
+ * coding mode as defined in 9.3.2.3.
+ *
+ * \return
+ * None
+ *
+ **************************************************************************
+ */
+void ih264d_get_mvd_cabac(UWORD8 u1_sub_mb,
+ UWORD8 u1_b2,
+ UWORD8 u1_part_wd,
+ UWORD8 u1_part_ht,
+ UWORD8 u1_dec_mvd,
+ dec_struct_t *ps_dec,
+ mv_pred_t *ps_mv)
+{
+ UWORD8 u1_abs_mvd_x = 0, u1_abs_mvd_y = 0;
+ UWORD8 u1_sub_mb_x, u1_sub_mb_y;
+ UWORD8 *pu1_top_mv_ctxt, *pu1_lft_mv_ctxt;
+ WORD16 *pi2_mv;
+
+ u1_sub_mb_x = (UWORD8)(u1_sub_mb & 0x03);
+ u1_sub_mb_y = (UWORD8)(u1_sub_mb >> 2);
+ pu1_top_mv_ctxt = &ps_dec->ps_curr_ctxt_mb_info->u1_mv[u1_sub_mb_x][u1_b2];
+ pu1_lft_mv_ctxt = &ps_dec->pu1_left_mv_ctxt_inc[u1_sub_mb_y][u1_b2];
+ pi2_mv = &ps_mv->i2_mv[u1_b2];
+
+ if(u1_dec_mvd)
+ {
+ WORD16 i2_mv_x, i2_mv_y;
+ WORD32 i2_temp;
+ {
+ decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env;
+ dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
+ UWORD16 u2_abs_mvd_x_a, u2_abs_mvd_x_b, u2_abs_mvd_y_a,
+ u2_abs_mvd_y_b;
+
+ u2_abs_mvd_x_b = (UWORD16)pu1_top_mv_ctxt[0];
+ u2_abs_mvd_y_b = (UWORD16)pu1_top_mv_ctxt[1];
+ u2_abs_mvd_x_a = (UWORD16)pu1_lft_mv_ctxt[0];
+ u2_abs_mvd_y_a = (UWORD16)pu1_lft_mv_ctxt[1];
+
+ i2_temp = u2_abs_mvd_x_a + u2_abs_mvd_x_b;
+
+ i2_mv_x = ih264d_parse_mvd_cabac(ps_bitstrm, ps_cab_env,
+ ps_dec->p_mvd_x_t, i2_temp);
+
+ i2_temp = u2_abs_mvd_y_a + u2_abs_mvd_y_b;
+
+ i2_mv_y = ih264d_parse_mvd_cabac(ps_bitstrm, ps_cab_env,
+ ps_dec->p_mvd_y_t, i2_temp);
+ }
+
+ /***********************************************************************/
+ /* Store the abs_mvd_values in cabac contexts */
+ /* The follownig code can be easily optimzed if mvX, mvY clip values */
+ /* are packed in 16 bits follwed by memcpy */
+ /***********************************************************************/
+ u1_abs_mvd_x = CLIP3(0, 127, ABS(i2_mv_x));
+ u1_abs_mvd_y = CLIP3(0, 127, ABS(i2_mv_y));
+
+ COPYTHECONTEXT("MVD", i2_mv_x);COPYTHECONTEXT("MVD", i2_mv_y);
+
+ /* Storing Mv residuals */
+ pi2_mv[0] = i2_mv_x;
+ pi2_mv[1] = i2_mv_y;
+ }
+
+ /***************************************************************/
+ /* Store abs_mvd_values cabac contexts */
+ /***************************************************************/
+#ifndef ARM
+ {
+ UWORD8 u1_i;
+ for(u1_i = 0; u1_i < u1_part_wd; u1_i++, pu1_top_mv_ctxt += 4)
+ {
+ pu1_top_mv_ctxt[0] = u1_abs_mvd_x;
+ pu1_top_mv_ctxt[1] = u1_abs_mvd_y;
+ }
+
+ for(u1_i = 0; u1_i < u1_part_ht; u1_i++, pu1_lft_mv_ctxt += 4)
+ {
+ pu1_lft_mv_ctxt[0] = u1_abs_mvd_x;
+ pu1_lft_mv_ctxt[1] = u1_abs_mvd_y;
+ }
+ }
+#else
+ /* Optimising the loop, with Little-Endian Assumption */
+ {
+ UWORD16 *pu2_top_cxt = (UWORD16 *)pu1_top_mv_ctxt;
+ UWORD16 *pu2_lft_cxt = (UWORD16 *)pu1_lft_mv_ctxt;
+ UWORD16 u2_pack_mvd = (UWORD16)((u1_abs_mvd_y << 8) | u1_abs_mvd_x);
+ UWORD8 u1_wd = u1_part_wd, u1_ht = u1_part_ht;
+
+ u1_wd--;
+ *pu2_top_cxt = u2_pack_mvd;
+ pu2_top_cxt += 2;
+ if(u1_wd)
+ {
+ u1_wd--;
+ *pu2_top_cxt = u2_pack_mvd;
+ pu2_top_cxt += 2;
+ }
+ if(u1_wd)
+ {
+ *pu2_top_cxt = u2_pack_mvd;
+ pu2_top_cxt += 2;
+ *pu2_top_cxt = u2_pack_mvd;
+ }
+ u1_ht--;
+ *pu2_lft_cxt = u2_pack_mvd;
+ pu2_lft_cxt += 2;
+ if(u1_ht)
+ {
+ u1_ht--;
+ *pu2_lft_cxt = u2_pack_mvd;
+ pu2_lft_cxt += 2;
+ }
+ if(u1_ht)
+ {
+ *pu2_lft_cxt = u2_pack_mvd;
+ pu2_lft_cxt += 2;
+ *pu2_lft_cxt = u2_pack_mvd;
+ }
+ }
+#endif
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_mvd_cabac */
+/* */
+/* Description : This cabac function decodes the mvd in a given direction */
+/* direction ( x or y ) as defined in 9.3.2.3. */
+/* */
+/* Inputs : 1. pointer to Bitstream */
+/* 2. pointer to cabac decoding environmnet */
+/* 3. pointer to Mvd context */
+/* 4. abs(Top mvd) = u2_abs_mvd_b */
+/* 5. abs(left mvd)= u2_abs_mvd_a */
+/* */
+/* Processing : see section 9.3.2.3 of the standard */
+/* */
+/* Outputs : i2_mvd */
+/* Returns : i2_mvd */
+/* */
+/* Issues : none */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 16 06 2005 Jay Draft */
+/* */
+/*****************************************************************************/
+WORD16 ih264d_parse_mvd_cabac(dec_bit_stream_t * ps_bitstrm,
+ decoding_envirnoment_t * ps_cab_env,
+ bin_ctxt_model_t * p_ctxt_mvd,
+ UWORD32 i4_temp)
+
+{
+ WORD8 k;
+ WORD16 i2_suf;
+ WORD16 i2_mvd;
+ UWORD16 u2_abs_mvd;
+ UWORD32 u4_ctx_inc;
+ UWORD32 u4_prefix;
+ const UWORD32 *pu4_table = (const UWORD32 *)ps_cab_env->cabac_table;
+ UWORD32 u4_code_int_range, u4_code_int_val_ofst;
+
+ /* if mvd < 9 */
+ /* mvd = Prefix */
+ /* else */
+ /* mvd = Prefix + Suffix */
+ /* decode sign bit */
+ /* Prefix TU decoding Cmax =Ucoff and Suffix 3rd order Exp-Golomb */
+
+ u2_abs_mvd = (UWORD16)i4_temp;
+ u4_ctx_inc = 1;
+
+ if(u2_abs_mvd < 3)
+ u4_ctx_inc = 0;
+ else if(u2_abs_mvd > 32)
+ u4_ctx_inc = 2;
+
+ u4_ctx_inc = (u4_ctx_inc | 0x65430);
+
+ /*inlining modified version of ih264d_decode_bins_unary*/
+
+ {
+ UWORD8 u1_max_bins = 9;
+ UWORD32 u4_value;
+ UWORD32 u4_symbol;
+ bin_ctxt_model_t *ps_bin_ctxt;
+ UWORD32 u4_ctx_Inc;
+
+ u4_value = 0;
+ u4_code_int_range = ps_cab_env->u4_code_int_range;
+ u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
+
+ do
+ {
+ u4_ctx_Inc = u4_ctx_inc & 0xf;
+ u4_ctx_inc = u4_ctx_inc >> 4;
+
+ ps_bin_ctxt = p_ctxt_mvd + u4_ctx_Inc;
+
+ DECODE_ONE_BIN_MACRO(ps_bin_ctxt, u4_code_int_range,
+ u4_code_int_val_ofst, pu4_table, ps_bitstrm,
+ u4_symbol)
+
+ INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env);
+
+ u4_value++;
+
+ }
+ while(u4_symbol && u4_value < 5);
+
+ ps_bin_ctxt = p_ctxt_mvd + 6;
+
+ if(u4_symbol && (u4_value < u1_max_bins))
+ {
+
+ do
+ {
+
+ DECODE_ONE_BIN_MACRO(ps_bin_ctxt, u4_code_int_range,
+ u4_code_int_val_ofst, pu4_table,
+ ps_bitstrm, u4_symbol)
+
+ INC_BIN_COUNT(ps_cab_env);INC_DECISION_BINS(ps_cab_env);
+ u4_value++;
+ }
+ while(u4_symbol && (u4_value < u1_max_bins));
+
+ }
+
+ ps_cab_env->u4_code_int_range = u4_code_int_range;
+ ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
+ u4_value = u4_value - 1 + u4_symbol;
+ u4_prefix = (u4_value);
+ }
+
+ i2_mvd = u4_prefix;
+
+ if(i2_mvd == 9)
+ {
+ /* Read Suffix */
+ k = ih264d_decode_bypass_bins_unary(ps_cab_env, ps_bitstrm);
+ i2_suf = (1 << k) - 1;
+ k = k + 3;
+ i2_suf = (i2_suf << 3);
+ i2_mvd += i2_suf;
+ i2_suf = ih264d_decode_bypass_bins(ps_cab_env, k, ps_bitstrm);
+ i2_mvd += i2_suf;
+ }
+ /* Read Sign bit */
+ if(!i2_mvd)
+ return (i2_mvd);
+
+ else
+ {
+ UWORD32 u4_code_int_val_ofst, u4_code_int_range;
+
+ u4_code_int_val_ofst = ps_cab_env->u4_code_int_val_ofst;
+ u4_code_int_range = ps_cab_env->u4_code_int_range;
+
+ if(u4_code_int_range < ONE_RIGHT_SHIFTED_BY_9)
+ {
+ UWORD32 *pu4_buffer, u4_offset;
+
+ pu4_buffer = ps_bitstrm->pu4_buffer;
+ u4_offset = ps_bitstrm->u4_ofst;
+
+ RENORM_RANGE_OFFSET(u4_code_int_range, u4_code_int_val_ofst,
+ u4_offset, pu4_buffer)
+ ps_bitstrm->u4_ofst = u4_offset;
+ }
+
+ u4_code_int_range = u4_code_int_range >> 1;
+
+ if(u4_code_int_val_ofst >= u4_code_int_range)
+ {
+ /* S=1 */
+ u4_code_int_val_ofst -= u4_code_int_range;
+ i2_mvd = (-i2_mvd);
+ }
+
+ ps_cab_env->u4_code_int_val_ofst = u4_code_int_val_ofst;
+ ps_cab_env->u4_code_int_range = u4_code_int_range;
+
+ return (i2_mvd);
+
+ }
+}
diff --git a/decoder/ih264d_parse_mb_header.h b/decoder/ih264d_parse_mb_header.h
new file mode 100755
index 0000000..63067b9
--- /dev/null
+++ b/decoder/ih264d_parse_mb_header.h
@@ -0,0 +1,88 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+ ***************************************************************************
+ * \file ih264d_parse_mb_header.h
+ *
+ * \brief
+ * This file contains context identifier decoding routines.
+ *
+ * \date
+ * 04/02/2003
+ *
+ * \author NS
+ ***************************************************************************
+ */
+#ifndef _IH264D_PARSE_MB_HEADER_H_
+#define _IH264D_PARSE_MB_HEADER_H_
+
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_structs.h"
+#include "ih264d_cabac.h"
+
+WORD32 ih264d_read_intra_pred_modes_cabac(dec_struct_t * ps_dec,
+ UWORD8 * pu1_prev_intra4x4_pred_mode_flag,
+ UWORD8 * pu1_rem_intra4x4_pred_mode,
+ UWORD8 u1_tran_form8x8);
+
+UWORD32 ih264d_parse_mb_type_cabac(struct _DecStruct * ps_dec);
+UWORD8 ih264d_parse_mb_type_intra_cabac(UWORD8 u1_inter,
+ struct _DecStruct * ps_dec);
+
+UWORD32 ih264d_parse_submb_type_cabac(const UWORD8 u1_slc_type_p,
+ decoding_envirnoment_t * ps_cab_env,
+ dec_bit_stream_t * ps_bitstrm,
+ bin_ctxt_model_t * ps_sub_mb_cxt);
+WORD32 ih264d_parse_ref_idx_cabac(const UWORD8 u1_num_part,
+ const UWORD8 u1_b2,
+ const UWORD8 u1_max_ref_minus1,
+ const UWORD8 u1_mb_mode,
+ WORD8 * pi1_ref_idx,
+ WORD8 * const pi1_lft_cxt,
+ WORD8 * const pi1_top_cxt,
+ decoding_envirnoment_t * const ps_cab_env,
+ dec_bit_stream_t * const ps_bitstrm,
+ bin_ctxt_model_t * const ps_ref_cxt);
+
+WORD32 ih264d_parse_mb_qp_delta_cabac(struct _DecStruct * ps_dec,
+ WORD8 *pi1_mb_qp_delta);
+WORD8 ih264d_parse_chroma_pred_mode_cabac(struct _DecStruct * ps_dec);
+
+UWORD32 ih264d_parse_ctx_cbp_cabac(struct _DecStruct * ps_dec);
+
+UWORD8 ih264d_parse_transform8x8flag_cabac(struct _DecStruct * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info);
+
+void ih264d_get_mvd_cabac(UWORD8 u1_sub_mb,
+ UWORD8 u1_b2,
+ UWORD8 u1_part_wd,
+ UWORD8 u1_part_ht,
+ UWORD8 u1_dec_mvd,
+ dec_struct_t *ps_dec,
+ mv_pred_t *ps_mv);
+
+WORD16 ih264d_parse_mvd_cabac(dec_bit_stream_t * ps_bitstrm,
+ decoding_envirnoment_t * ps_cab_env,
+ bin_ctxt_model_t * p_ctxt_mvd,
+ UWORD32 temp);
+
+#endif /* _IH264D_PARSE_MB_HEADER_H_ */
diff --git a/decoder/ih264d_parse_pslice.c b/decoder/ih264d_parse_pslice.c
new file mode 100755
index 0000000..67d1405
--- /dev/null
+++ b/decoder/ih264d_parse_pslice.c
@@ -0,0 +1,1760 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+ **************************************************************************
+ * \file ih264d_parse_pslice.c
+ *
+ * \brief
+ * Contains routines that decode a I slice type
+ *
+ * Detailed_description
+ *
+ * \date
+ * 07/07/2003
+ *
+ * \author NS
+ **************************************************************************
+ */
+
+#include <string.h>
+#include "ih264d_bitstrm.h"
+#include "ih264d_defs.h"
+#include "ih264d_debug.h"
+#include "ih264d_tables.h"
+#include "ih264d_structs.h"
+#include "ih264d_defs.h"
+#include "ih264d_parse_cavlc.h"
+#include "ih264d_mb_utils.h"
+#include "ih264d_parse_slice.h"
+#include "ih264d_mvpred.h"
+#include "ih264d_parse_islice.h"
+#include "ih264d_process_intra_mb.h"
+#include "ih264d_inter_pred.h"
+#include "ih264d_process_pslice.h"
+#include "ih264d_deblocking.h"
+#include "ih264d_cabac.h"
+#include "ih264d_parse_mb_header.h"
+#include "ih264d_error_handler.h"
+#include "ih264d_defs.h"
+#include "ih264d_format_conv.h"
+#include "ih264d_quant_scaling.h"
+#include "ih264d_thread_parse_decode.h"
+#include "ih264d_process_bslice.h"
+#include "ithread.h"
+#include "ih264d_utils.h"
+#include "ih264d_format_conv.h"
+
+void ih264d_init_cabac_contexts(UWORD8 u1_slice_type, dec_struct_t * ps_dec);
+void ih264d_deblock_mb_level(dec_struct_t *ps_dec,
+ dec_mb_info_t *ps_cur_mb_info,
+ UWORD32 nmb_index);
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_parse_pmb_cavlc \endif
+ *
+ * \brief
+ * This function parses CAVLC syntax of a P MB.
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+WORD32 ih264d_parse_pmb_cavlc(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num,
+ UWORD8 u1_num_mbsNby2)
+{
+ UWORD32 u1_num_mb_part;
+ UWORD32 uc_sub_mb;
+ dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
+ UWORD32 * const pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
+
+ parse_pmbarams_t * ps_parse_mb_data = ps_dec->ps_parse_mb_data
+ + u1_num_mbsNby2;
+ WORD8 * pi1_ref_idx = ps_parse_mb_data->i1_ref_idx[0];
+ const UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
+ const UWORD8 * pu1_num_mb_part = (const UWORD8 *)gau1_ih264d_num_mb_part;
+ UWORD8 * pu1_col_info = ps_parse_mb_data->u1_col_info;
+
+ UWORD32 u1_mb_type = ps_cur_mb_info->u1_mb_type;
+ UWORD32 u4_sum_mb_mode_pack = 0;
+ WORD32 ret;
+
+ UWORD8 u1_no_submb_part_size_lt8x8_flag = 1;
+ ps_cur_mb_info->u1_tran_form8x8 = 0;
+ ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
+
+ ps_cur_mb_info->u1_yuv_dc_block_flag = 0;
+
+ ps_cur_mb_info->u1_mb_mc_mode = u1_mb_type;
+ uc_sub_mb = ((u1_mb_type == PRED_8x8) | (u1_mb_type == PRED_8x8R0));
+
+ /* Reading the subMB type */
+ if(uc_sub_mb)
+ {
+ WORD32 i;
+ UWORD8 u1_colz = (PRED_8x8 << 6);
+
+ for(i = 0; i < 4; i++)
+ {
+ UWORD32 ui_sub_mb_mode;
+
+ //Inlined ih264d_uev
+ UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
+ UWORD32 u4_word, u4_ldz;
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
+ u4_ldz = CLZ(u4_word);
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf,
+ u4_ldz);
+ *pu4_bitstrm_ofst = u4_bitstream_offset;
+ ui_sub_mb_mode = ((1 << u4_ldz) + u4_word - 1);
+ //Inlined ih264d_uev
+
+ if(ui_sub_mb_mode > 3)
+ {
+ return ERROR_SUB_MB_TYPE;
+ }
+ else
+ {
+ u4_sum_mb_mode_pack = (u4_sum_mb_mode_pack << 8) | ui_sub_mb_mode;
+ /* Storing collocated information */
+ *pu1_col_info++ = u1_colz | (UWORD8)(ui_sub_mb_mode << 4);
+
+ COPYTHECONTEXT("sub_mb_type", ui_sub_mb_mode);
+ }
+
+ /* check if Motion compensation is done below 8x8 */
+ if(ui_sub_mb_mode != P_L0_8x8)
+ {
+ u1_no_submb_part_size_lt8x8_flag = 0;
+ }
+ }
+
+ //
+ u1_num_mb_part = 4;
+ }
+ else
+ {
+ *pu1_col_info++ = (u1_mb_type << 6);
+ if(u1_mb_type)
+ *pu1_col_info++ = (u1_mb_type << 6);
+ u1_num_mb_part = pu1_num_mb_part[u1_mb_type];
+
+ }
+
+ /* Decoding reference index 0: For simple profile the following */
+ /* conditions are always true (mb_field_decoding_flag == 0); */
+ /* (MbPartPredMode != PredL1) */
+
+ {
+
+ UWORD8 uc_field = ps_cur_mb_info->u1_mb_field_decodingflag;
+ UWORD8 uc_num_ref_idx_l0_active_minus1 =
+ (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]
+ << (u1_mbaff & uc_field)) - 1;
+
+ if((uc_num_ref_idx_l0_active_minus1 > 0) & (u1_mb_type != PRED_8x8R0))
+ {
+ if(1 == uc_num_ref_idx_l0_active_minus1)
+ ih264d_parse_pmb_ref_index_cavlc_range1(
+ u1_num_mb_part, ps_bitstrm, pi1_ref_idx,
+ uc_num_ref_idx_l0_active_minus1);
+ else
+ {
+ ret = ih264d_parse_pmb_ref_index_cavlc(
+ u1_num_mb_part, ps_bitstrm, pi1_ref_idx,
+ uc_num_ref_idx_l0_active_minus1);
+ if(ret != OK)
+ return ret;
+ }
+ }
+ else
+ {
+ /* When there exists only a single frame to predict from */
+ UWORD8 uc_i;
+ for(uc_i = 0; uc_i < u1_num_mb_part; uc_i++)
+ /* Storing Reference Idx Information */
+ pi1_ref_idx[uc_i] = 0;
+ }
+ }
+
+ {
+ UWORD8 u1_p_idx, uc_i;
+ parse_part_params_t * ps_part = ps_dec->ps_part;
+ UWORD8 u1_sub_mb_mode, u1_num_subpart, u1_mb_part_width, u1_mb_part_height;
+ UWORD8 u1_sub_mb_num;
+ const UWORD8 * pu1_top_left_sub_mb_indx;
+ mv_pred_t * ps_mv, *ps_mv_start = ps_dec->ps_mv_cur + (u1_mb_num << 4);
+ /* Loading the table pointers */
+ const UWORD8 * pu1_mb_partw = (const UWORD8 *)gau1_ih264d_mb_partw;
+ const UWORD8 * pu1_mb_parth = (const UWORD8 *)gau1_ih264d_mb_parth;
+ const UWORD8 * pu1_sub_mb_indx_mod =
+ (const UWORD8 *)(gau1_ih264d_submb_indx_mod)
+ + (uc_sub_mb * 6);
+ const UWORD8 * pu1_sub_mb_partw = (const UWORD8 *)gau1_ih264d_submb_partw;
+ const UWORD8 * pu1_sub_mb_parth = (const UWORD8 *)gau1_ih264d_submb_parth;
+ const UWORD8 * pu1_num_sub_mb_part =
+ (const UWORD8 *)gau1_ih264d_num_submb_part;
+
+ UWORD16 u2_sub_mb_num = 0x028A;
+
+ /*********************************************************/
+ /* default initialisations for condition (uc_sub_mb == 0) */
+ /* i.e. all are subpartitions of 8x8 */
+ /*********************************************************/
+ u1_sub_mb_mode = 0;
+ u1_num_subpart = 1;
+ u1_mb_part_width = pu1_mb_partw[u1_mb_type];
+ u1_mb_part_height = pu1_mb_parth[u1_mb_type];
+ pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_type << 1);
+ u1_sub_mb_num = 0;
+
+ /* Loop on number of partitions */
+ for(uc_i = 0, u1_p_idx = 0; uc_i < u1_num_mb_part; uc_i++)
+ {
+ UWORD8 uc_j;
+ if(uc_sub_mb)
+ {
+ u1_sub_mb_mode = u4_sum_mb_mode_pack >> 24;
+ u1_num_subpart = pu1_num_sub_mb_part[u1_sub_mb_mode];
+ u1_mb_part_width = pu1_sub_mb_partw[u1_sub_mb_mode];
+ u1_mb_part_height = pu1_sub_mb_parth[u1_sub_mb_mode];
+ pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_sub_mb_mode << 1);
+ u1_sub_mb_num = u2_sub_mb_num >> 12;
+ u4_sum_mb_mode_pack <<= 8;
+ u2_sub_mb_num <<= 4;
+ }
+
+ /* Loop on Number of sub-partitions */
+ for(uc_j = 0; uc_j < u1_num_subpart; uc_j++, pu1_top_left_sub_mb_indx++)
+ {
+ WORD16 i2_mvx, i2_mvy;
+ u1_sub_mb_num += *pu1_top_left_sub_mb_indx;
+ ps_mv = ps_mv_start + u1_sub_mb_num;
+
+ /* Reading the differential Mv from the bitstream */
+ //i2_mvx = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ //inlining ih264d_sev
+ {
+ UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
+ UWORD32 u4_word, u4_ldz, u4_abs_val;
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset,
+ pu4_bitstrm_buf);
+ u4_ldz = CLZ(u4_word);
+
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ GETBITS(u4_word, u4_bitstream_offset,
+ pu4_bitstrm_buf, u4_ldz);
+
+ *pu4_bitstrm_ofst = u4_bitstream_offset;
+ u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
+
+ if(u4_word & 0x1)
+ i2_mvx = (-(WORD32)u4_abs_val);
+ else
+ i2_mvx = (u4_abs_val);
+ }
+ //inlinined ih264d_sev
+ COPYTHECONTEXT("MVD", i2_mvx);
+ i2_mvy = ih264d_sev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ COPYTHECONTEXT("MVD", i2_mvy);
+
+ /* Storing Info for partitions */
+ ps_part->u1_is_direct = PART_NOT_DIRECT;
+ ps_part->u1_sub_mb_num = u1_sub_mb_num;
+ ps_part->u1_partheight = u1_mb_part_height;
+ ps_part->u1_partwidth = u1_mb_part_width;
+
+ /* Storing Mv residuals */
+ ps_mv->i2_mv[0] = i2_mvx;
+ ps_mv->i2_mv[1] = i2_mvy;
+
+ /* Increment partition Index */
+ u1_p_idx++;
+ ps_part++;
+ }
+ }
+ ps_parse_mb_data->u1_num_part = u1_p_idx;
+ ps_dec->ps_part = ps_part;
+ }
+
+ {
+ UWORD32 u4_cbp;
+
+ /* Read the Coded block pattern */
+ UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
+ UWORD32 u4_word, u4_ldz;
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
+ u4_ldz = CLZ(u4_word);
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf, u4_ldz);
+ *pu4_bitstrm_ofst = u4_bitstream_offset;
+ u4_cbp = ((1 << u4_ldz) + u4_word - 1);
+
+ if(u4_cbp > 47)
+ return ERROR_CBP;
+
+ u4_cbp = *((UWORD8*)gau1_ih264d_cbp_inter + u4_cbp);
+ COPYTHECONTEXT("coded_block_pattern", u4_cbp);
+ ps_cur_mb_info->u1_cbp = u4_cbp;
+
+ /* Read the transform8x8 u4_flag if present */
+ if((ps_dec->s_high_profile.u1_transform8x8_present) && (u4_cbp & 0xf)
+ && u1_no_submb_part_size_lt8x8_flag)
+ {
+ ps_cur_mb_info->u1_tran_form8x8 = ih264d_get_bit_h264(ps_bitstrm);
+ COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8);
+ ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8;
+ }
+
+ /* Read mb_qp_delta */
+ if(u4_cbp)
+ {
+ WORD32 i_temp;
+
+ UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
+ UWORD32 u4_word, u4_ldz, u4_abs_val;
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
+ u4_ldz = CLZ(u4_word);
+
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf,
+ u4_ldz);
+
+ *pu4_bitstrm_ofst = u4_bitstream_offset;
+ u4_abs_val = ((1 << u4_ldz) + u4_word) >> 1;
+
+ if(u4_word & 0x1)
+ i_temp = (-(WORD32)u4_abs_val);
+ else
+ i_temp = (u4_abs_val);
+
+ if((i_temp < -26) || (i_temp > 25))
+ return ERROR_INV_RANGE_QP_T;
+ //inlinined ih264d_sev
+
+ COPYTHECONTEXT("mb_qp_delta", i_temp);
+ if(i_temp)
+ {
+ ret = ih264d_update_qp(ps_dec, (WORD8)i_temp);
+ if(ret != OK)
+ return ret;
+ }
+
+ ret = ih264d_parse_residual4x4_cavlc(ps_dec, ps_cur_mb_info, 0);
+ if(ret != OK)
+ return ret;
+ if(EXCEED_OFFSET(ps_bitstrm))
+ return ERROR_EOB_TERMINATE_T;
+ }
+ else
+ {
+ ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CAVLC);
+ }
+
+
+
+ }
+
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_parse_pmb_cabac \endif
+ *
+ * \brief
+ * This function parses CABAC syntax of a P MB.
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+WORD32 ih264d_parse_pmb_cabac(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num,
+ UWORD8 u1_num_mbsNby2)
+{
+ UWORD32 u1_num_mb_part;
+ UWORD32 uc_sub_mb;
+ parse_pmbarams_t * ps_parse_mb_data = ps_dec->ps_parse_mb_data
+ + u1_num_mbsNby2;
+ WORD8 * pi1_ref_idx = ps_parse_mb_data->i1_ref_idx[0];
+ const UWORD8 * pu1_num_mb_part = (const UWORD8 *)gau1_ih264d_num_mb_part;
+ const UWORD32 u1_mb_type = ps_cur_mb_info->u1_mb_type;
+ UWORD8 * pu1_col_info = ps_parse_mb_data->u1_col_info;
+ UWORD32 u1_mb_mc_mode = u1_mb_type;
+ ctxt_inc_mb_info_t * p_curr_ctxt = ps_dec->ps_curr_ctxt_mb_info;
+ decoding_envirnoment_t * ps_cab_env = &ps_dec->s_cab_dec_env;
+ dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
+ UWORD32 u4_sub_mb_pack = 0;
+ WORD32 ret;
+
+ UWORD8 u1_no_submb_part_size_lt8x8_flag = 1;
+ ps_cur_mb_info->u1_tran_form8x8 = 0;
+ ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
+
+ ps_cur_mb_info->u1_yuv_dc_block_flag = 0;
+
+ p_curr_ctxt->u1_mb_type = CAB_P;
+ ps_cur_mb_info->u1_mb_mc_mode = u1_mb_type;
+ uc_sub_mb = ((u1_mb_type == PRED_8x8) | (u1_mb_type == PRED_8x8R0));
+
+ /* Reading the subMB type */
+ if(uc_sub_mb)
+ {
+
+ UWORD8 u1_colz = (PRED_8x8 << 6);
+ u1_mb_mc_mode = 0;
+
+ {
+ UWORD8 u1_sub_mb_mode;
+ u1_sub_mb_mode = ih264d_parse_submb_type_cabac(
+ 0, ps_cab_env, ps_bitstrm,
+ ps_dec->p_sub_mb_type_t);
+ if(u1_sub_mb_mode > 3)
+ return ERROR_SUB_MB_TYPE;
+
+ u4_sub_mb_pack = (u4_sub_mb_pack << 8) | u1_sub_mb_mode;
+ /* Storing collocated information */
+ *pu1_col_info++ = u1_colz | ((UWORD8)(u1_sub_mb_mode << 4));
+ COPYTHECONTEXT("sub_mb_type", u1_sub_mb_mode);
+ /* check if Motion compensation is done below 8x8 */
+ if(u1_sub_mb_mode != P_L0_8x8)
+ {
+ u1_no_submb_part_size_lt8x8_flag = 0;
+ }
+ }
+ {
+ UWORD8 u1_sub_mb_mode;
+ u1_sub_mb_mode = ih264d_parse_submb_type_cabac(
+ 0, ps_cab_env, ps_bitstrm,
+ ps_dec->p_sub_mb_type_t);
+ if(u1_sub_mb_mode > 3)
+ return ERROR_SUB_MB_TYPE;
+
+ u4_sub_mb_pack = (u4_sub_mb_pack << 8) | u1_sub_mb_mode;
+ /* Storing collocated information */
+ *pu1_col_info++ = u1_colz | ((UWORD8)(u1_sub_mb_mode << 4));
+ COPYTHECONTEXT("sub_mb_type", u1_sub_mb_mode);
+ /* check if Motion compensation is done below 8x8 */
+ if(u1_sub_mb_mode != P_L0_8x8)
+ {
+ u1_no_submb_part_size_lt8x8_flag = 0;
+ }
+ }
+ {
+ UWORD8 u1_sub_mb_mode;
+ u1_sub_mb_mode = ih264d_parse_submb_type_cabac(
+ 0, ps_cab_env, ps_bitstrm,
+ ps_dec->p_sub_mb_type_t);
+ if(u1_sub_mb_mode > 3)
+ return ERROR_SUB_MB_TYPE;
+
+ u4_sub_mb_pack = (u4_sub_mb_pack << 8) | u1_sub_mb_mode;
+ /* Storing collocated information */
+ *pu1_col_info++ = u1_colz | ((UWORD8)(u1_sub_mb_mode << 4));
+ COPYTHECONTEXT("sub_mb_type", u1_sub_mb_mode);
+ /* check if Motion compensation is done below 8x8 */
+ if(u1_sub_mb_mode != P_L0_8x8)
+ {
+ u1_no_submb_part_size_lt8x8_flag = 0;
+ }
+ }
+ {
+ UWORD8 u1_sub_mb_mode;
+ u1_sub_mb_mode = ih264d_parse_submb_type_cabac(
+ 0, ps_cab_env, ps_bitstrm,
+ ps_dec->p_sub_mb_type_t);
+ if(u1_sub_mb_mode > 3)
+ return ERROR_SUB_MB_TYPE;
+
+ u4_sub_mb_pack = (u4_sub_mb_pack << 8) | u1_sub_mb_mode;
+ /* Storing collocated information */
+ *pu1_col_info++ = u1_colz | ((UWORD8)(u1_sub_mb_mode << 4));
+ COPYTHECONTEXT("sub_mb_type", u1_sub_mb_mode);
+ /* check if Motion compensation is done below 8x8 */
+ if(u1_sub_mb_mode != P_L0_8x8)
+ {
+ u1_no_submb_part_size_lt8x8_flag = 0;
+ }
+ }
+ u1_num_mb_part = 4;
+ }
+ else
+ {
+ u1_num_mb_part = pu1_num_mb_part[u1_mb_type];
+ /* Storing collocated Mb and SubMb mode information */
+ *pu1_col_info++ = (u1_mb_type << 6);
+ if(u1_mb_type)
+ *pu1_col_info++ = (u1_mb_type << 6);
+ }
+ /* Decoding reference index 0: For simple profile the following */
+ /* conditions are always true (mb_field_decoding_flag == 0); */
+ /* (MbPartPredMode != PredL1) */
+ {
+ WORD8 * pi1_top_ref_idx_ctx_inc_arr = p_curr_ctxt->i1_ref_idx;
+ WORD8 * pi1_left_ref_idx_ctxt_inc = ps_dec->pi1_left_ref_idx_ctxt_inc;
+ UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
+ UWORD8 uc_field = ps_cur_mb_info->u1_mb_field_decodingflag;
+ UWORD8 uc_num_ref_idx_l0_active_minus1 =
+ (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]
+ << (u1_mbaff & uc_field)) - 1;
+
+ if((uc_num_ref_idx_l0_active_minus1 > 0) & (u1_mb_type != PRED_8x8R0))
+ {
+ /* force the routine to decode ref idx for each partition */
+ *((UWORD32 *)pi1_ref_idx) = 0x01010101;
+ ret = ih264d_parse_ref_idx_cabac(u1_num_mb_part, 0,
+ uc_num_ref_idx_l0_active_minus1,
+ u1_mb_mc_mode, pi1_ref_idx,
+ pi1_left_ref_idx_ctxt_inc,
+ pi1_top_ref_idx_ctx_inc_arr, ps_cab_env,
+ ps_bitstrm, ps_dec->p_ref_idx_t);
+ if(ret != OK)
+ return ret;
+ }
+ else
+ {
+ /* When there exists only a single frame to predict from */
+ pi1_left_ref_idx_ctxt_inc[0] = 0;
+ pi1_left_ref_idx_ctxt_inc[1] = 0;
+ pi1_top_ref_idx_ctx_inc_arr[0] = 0;
+ pi1_top_ref_idx_ctx_inc_arr[1] = 0;
+ *((UWORD32 *)pi1_ref_idx) = 0;
+ }
+ }
+
+ {
+ UWORD8 u1_p_idx, uc_i;
+ parse_part_params_t * ps_part = ps_dec->ps_part;
+ UWORD8 u1_sub_mb_mode, u1_num_subpart, u1_mb_part_width, u1_mb_part_height;
+ UWORD8 u1_sub_mb_num;
+ const UWORD8 * pu1_top_left_sub_mb_indx;
+ mv_pred_t *ps_mv_start = ps_dec->ps_mv_cur + (u1_mb_num << 4);
+ UWORD16 u2_sub_mb_num_pack = 0x028A;
+
+ /* Loading the table pointers */
+ const UWORD8 * pu1_mb_partw = (const UWORD8 *)gau1_ih264d_mb_partw;
+ const UWORD8 * pu1_mb_parth = (const UWORD8 *)gau1_ih264d_mb_parth;
+ const UWORD8 * pu1_sub_mb_indx_mod =
+ (const UWORD8 *)(gau1_ih264d_submb_indx_mod)
+ + (uc_sub_mb * 6);
+ const UWORD8 * pu1_sub_mb_partw = (const UWORD8 *)gau1_ih264d_submb_partw;
+ const UWORD8 * pu1_sub_mb_parth = (const UWORD8 *)gau1_ih264d_submb_parth;
+ const UWORD8 * pu1_num_sub_mb_part =
+ (const UWORD8 *)gau1_ih264d_num_submb_part;
+
+ /*********************************************************/
+ /* default initialisations for condition (uc_sub_mb == 0) */
+ /* i.e. all are subpartitions of 8x8 */
+ /*********************************************************/
+ u1_sub_mb_mode = 0;
+ u1_num_subpart = 1;
+ u1_mb_part_width = pu1_mb_partw[u1_mb_type];
+ u1_mb_part_height = pu1_mb_parth[u1_mb_type];
+ pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_mb_type << 1);
+ u1_sub_mb_num = 0;
+
+ /* Loop on number of partitions */
+ for(uc_i = 0, u1_p_idx = 0; uc_i < u1_num_mb_part; uc_i++)
+ {
+ UWORD8 uc_j;
+ if(uc_sub_mb)
+ {
+ u1_sub_mb_mode = u4_sub_mb_pack >> 24;
+ u1_num_subpart = pu1_num_sub_mb_part[u1_sub_mb_mode];
+ u1_mb_part_width = pu1_sub_mb_partw[u1_sub_mb_mode];
+ u1_mb_part_height = pu1_sub_mb_parth[u1_sub_mb_mode];
+ pu1_top_left_sub_mb_indx = pu1_sub_mb_indx_mod + (u1_sub_mb_mode << 1);
+ u1_sub_mb_num = u2_sub_mb_num_pack >> 12;
+ u4_sub_mb_pack <<= 8;
+ u2_sub_mb_num_pack <<= 4;
+ }
+ /* Loop on Number of sub-partitions */
+ for(uc_j = 0; uc_j < u1_num_subpart; uc_j++, pu1_top_left_sub_mb_indx++)
+ {
+ mv_pred_t * ps_mv;
+
+ u1_sub_mb_num += *pu1_top_left_sub_mb_indx;
+ ps_mv = ps_mv_start + u1_sub_mb_num;
+
+ /* Storing Info for partitions */
+ ps_part->u1_is_direct = PART_NOT_DIRECT;
+ ps_part->u1_sub_mb_num = u1_sub_mb_num;
+ ps_part->u1_partheight = u1_mb_part_height;
+ ps_part->u1_partwidth = u1_mb_part_width;
+
+ /* Increment partition Index */
+ u1_p_idx++;
+ ps_part++;
+
+ ih264d_get_mvd_cabac(u1_sub_mb_num, 0, u1_mb_part_width,
+ u1_mb_part_height, 1, ps_dec, ps_mv);
+ }
+ }
+ ps_parse_mb_data->u1_num_part = u1_p_idx;
+ ps_dec->ps_part = ps_part;
+ }
+ {
+ UWORD8 u1_cbp;
+
+ /* Read the Coded block pattern */
+ u1_cbp = (WORD8)ih264d_parse_ctx_cbp_cabac(ps_dec);
+ COPYTHECONTEXT("coded_block_pattern", u1_cbp);
+ ps_cur_mb_info->u1_cbp = u1_cbp;
+ p_curr_ctxt->u1_cbp = u1_cbp;
+ p_curr_ctxt->u1_intra_chroma_pred_mode = 0;
+ p_curr_ctxt->u1_yuv_dc_csbp &= 0xFE;
+ ps_dec->pu1_left_yuv_dc_csbp[0] &= 0x6;
+
+ if(u1_cbp > 47)
+ return ERROR_CBP;
+
+ ps_cur_mb_info->u1_tran_form8x8 = 0;
+ ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
+
+ /* Read the transform8x8 u4_flag if present */
+ if((ps_dec->s_high_profile.u1_transform8x8_present) && (u1_cbp & 0xf)
+ && u1_no_submb_part_size_lt8x8_flag)
+ {
+ ps_cur_mb_info->u1_tran_form8x8 = ih264d_parse_transform8x8flag_cabac(
+ ps_dec, ps_cur_mb_info);
+ COPYTHECONTEXT("transform_size_8x8_flag", ps_cur_mb_info->u1_tran_form8x8);
+ p_curr_ctxt->u1_transform8x8_ctxt = ps_cur_mb_info->u1_tran_form8x8;
+ ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = ps_cur_mb_info->u1_tran_form8x8;
+
+ }
+ else
+ {
+ p_curr_ctxt->u1_transform8x8_ctxt = 0;
+ }
+
+ /* Read mb_qp_delta */
+ if(u1_cbp)
+ {
+ WORD8 c_temp;
+ ret = ih264d_parse_mb_qp_delta_cabac(ps_dec, &c_temp);
+ if(ret != OK)
+ return ret;
+ COPYTHECONTEXT("mb_qp_delta", c_temp);
+ if(c_temp != 0)
+ {
+ ret = ih264d_update_qp(ps_dec, c_temp);
+ if(ret != OK)
+ return ret;
+ }
+ }
+ else
+ ps_dec->i1_prev_mb_qp_delta = 0;
+
+
+
+ ih264d_parse_residual4x4_cabac(ps_dec, ps_cur_mb_info, 0);
+ if(EXCEED_OFFSET(ps_dec->ps_bitstrm))
+ return ERROR_EOB_TERMINATE_T;
+ }
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : parsePSliceData \endif
+ *
+ * \brief
+ * This function parses CAVLC syntax of N MB's of a P slice.
+ * 1. After parsing syntax of N MB's, for those N MB's (less than N, incase
+ * of end of slice or end of row), MB is decoded. This process is carried
+ * for one complete MB row or till end of slice.
+ * 2. Bottom one row of current MB is copied to IntraPredLine buffers.
+ * IntraPredLine buffers are used for Intra prediction of next row.
+ * 3. Current MB row along with previous 4 rows of Luma (and 2 of Chroma) are
+ * deblocked.
+ * 4. 4 rows (2 for Chroma) previous row and 12 rows (6 for Chroma) are
+ * DMA'ed to picture buffers.
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_update_nnz_for_skipmb \endif
+ *
+ * \brief
+ *
+ * \return
+ * None
+ *
+ **************************************************************************
+ */
+void ih264d_update_nnz_for_skipmb(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_entrpy)
+{
+ UWORD32 *pu4_buf;
+ UWORD8 *pu1_buf;
+ UNUSED(u1_entrpy);
+ pu1_buf = ps_dec->pu1_left_nnz_y;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ *pu4_buf = 0;
+ pu1_buf = ps_dec->pu1_left_nnz_uv;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ *pu4_buf = 0;
+ pu1_buf = ps_cur_mb_info->ps_curmb->pu1_nnz_y;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ *pu4_buf = 0;
+ pu1_buf = ps_cur_mb_info->ps_curmb->pu1_nnz_uv;
+ pu4_buf = (UWORD32 *)pu1_buf;
+ *pu4_buf = 0;
+ ps_cur_mb_info->ps_curmb->u2_luma_csbp = 0;
+ ps_cur_mb_info->u2_luma_csbp = 0;
+ ps_cur_mb_info->u2_chroma_csbp = 0;
+}
+
+
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_inter_slice_data_cabac */
+/* */
+/* Description : This function parses cabac syntax of a inter slice on */
+/* N MB basis. */
+/* */
+/* Inputs : ps_dec */
+/* sliceparams */
+/* firstMbInSlice */
+/* */
+/* Processing : 1. After parsing syntax for N MBs those N MBs are */
+/* decoded till the end of slice. */
+/* 2. MV prediction and DMA happens on a N/2 MB basis. */
+/* */
+/* Returns : 0 */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 13 07 2002 Jay Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_parse_inter_slice_data_cabac(dec_struct_t * ps_dec,
+ dec_slice_params_t * ps_slice,
+ UWORD16 u2_first_mb_in_slice)
+{
+ UWORD32 uc_more_data_flag;
+ WORD32 i2_cur_mb_addr;
+ UWORD32 u1_num_mbs, u1_num_mbsNby2, u1_mb_idx;
+ UWORD32 u1_mbaff;
+ UWORD32 u1_num_mbs_next, u1_end_of_row;
+ const UWORD16 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs;
+ UWORD32 u1_slice_end = 0;
+ UWORD32 u1_tfr_n_mb = 0;
+ UWORD32 u1_decode_nmb = 0;
+
+
+ deblk_mb_t *ps_cur_deblk_mb;
+ dec_mb_info_t *ps_cur_mb_info;
+
+ parse_pmbarams_t *ps_parse_mb_data = ps_dec->ps_parse_mb_data;
+ UWORD32 u1_inter_mb_skip_type;
+ UWORD32 u1_inter_mb_type;
+ UWORD32 u1_deblk_mb_type;
+ UWORD32 u1_mb_threshold;
+ dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
+ WORD32 ret;
+
+ /******************************************************/
+ /* Initialisations specific to B or P slice */
+ /******************************************************/
+ if(ps_slice->u1_slice_type == P_SLICE)
+ {
+ u1_inter_mb_skip_type = CAB_P_SKIP;
+ u1_inter_mb_type = P_MB;
+ u1_deblk_mb_type = D_INTER_MB;
+ u1_mb_threshold = 5;
+ }
+ else // B_SLICE
+ {
+ u1_inter_mb_skip_type = CAB_B_SKIP;
+ u1_inter_mb_type = B_MB;
+ u1_deblk_mb_type = D_B_SLICE;
+ u1_mb_threshold = 23;
+ }
+
+ /******************************************************/
+ /* Slice Level Initialisations */
+ /******************************************************/
+ i2_cur_mb_addr = u2_first_mb_in_slice;
+ ps_dec->u1_qp = ps_slice->u1_slice_qp;
+ ret = ih264d_update_qp(ps_dec, 0);
+ if(ret != OK)
+ return ret;
+ u1_mb_idx = ps_dec->u1_mb_idx;
+ u1_num_mbs = u1_mb_idx;
+ u1_num_mbsNby2 = 0;
+ u1_mbaff = ps_slice->u1_mbaff_frame_flag;
+ i2_cur_mb_addr = u2_first_mb_in_slice << u1_mbaff;
+ uc_more_data_flag = 1;
+
+ /* Initialisations specific to cabac */
+ if(ps_bitstrm->u4_ofst & 0x07)
+ {
+ ps_bitstrm->u4_ofst += 8;
+ ps_bitstrm->u4_ofst &= 0xFFFFFFF8;
+ }
+
+ ret = ih264d_init_cabac_dec_envirnoment(&(ps_dec->s_cab_dec_env), ps_bitstrm);
+ if(ret != OK)
+ return ret;
+
+ ps_dec->i1_prev_mb_qp_delta = 0;
+
+ while(!u1_slice_end)
+ {
+ UWORD8 u1_mb_type;
+ UWORD32 u4_mb_skip;
+
+
+ ps_cur_mb_info = ps_dec->ps_nmb_info + u1_num_mbs;
+
+ ps_cur_mb_info->u1_Mux = 0;
+ ps_dec->u4_num_pmbair = (u1_num_mbs >> u1_mbaff);
+ ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u1_num_mbs;
+
+ ps_cur_mb_info->u1_end_of_slice = 0;
+
+ /* Storing Default partition info */
+ ps_parse_mb_data->u1_num_part = 1;
+ ps_parse_mb_data->u1_isI_mb = 0;
+
+ /***************************************************************/
+ /* Get the required information for decoding of MB */
+ /* mb_x, mb_y , neighbour availablity, */
+ /***************************************************************/
+ u4_mb_skip = ps_dec->pf_get_mb_info(ps_dec, i2_cur_mb_addr, ps_cur_mb_info, 1);
+
+ /*********************************************************************/
+ /* initialize u1_tran_form8x8 to zero to aviod uninitialized accesses */
+ /*********************************************************************/
+ ps_cur_mb_info->u1_tran_form8x8 = 0;
+ ps_cur_mb_info->ps_curmb->u1_tran_form8x8 = 0;
+
+ /***************************************************************/
+ /* Set the deblocking parameters for this MB */
+ /***************************************************************/
+ if(ps_dec->u4_app_disable_deblk_frm == 0)
+ ih264d_set_deblocking_parameters(ps_cur_deblk_mb, ps_slice,
+ ps_dec->u1_mb_ngbr_availablity,
+ ps_dec->u1_cur_mb_fld_dec_flag);
+
+ if(u4_mb_skip)
+ {
+
+ /* Set appropriate flags in ps_cur_mb_info and ps_dec */
+ memset(ps_dec->ps_curr_ctxt_mb_info, 0, sizeof(ctxt_inc_mb_info_t));
+ ps_dec->ps_curr_ctxt_mb_info->u1_mb_type = u1_inter_mb_skip_type;
+
+ MEMSET_16BYTES(&ps_dec->pu1_left_mv_ctxt_inc[0][0], 0);
+
+ *((UWORD32 *)ps_dec->pi1_left_ref_idx_ctxt_inc) = 0;
+ *(ps_dec->pu1_left_yuv_dc_csbp) = 0;
+
+ ps_dec->i1_prev_mb_qp_delta = 0;
+ ps_cur_mb_info->u1_mb_type = MB_SKIP;
+ ps_cur_mb_info->u1_cbp = 0;
+
+ {
+ /* Storing Skip partition info */
+ parse_part_params_t *ps_part_info = ps_dec->ps_part;
+ ps_part_info->u1_is_direct = PART_DIRECT_16x16;
+ ps_part_info->u1_sub_mb_num = 0;
+ ps_dec->ps_part++;
+ }
+
+ /* Update Nnzs */
+ ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CABAC);
+
+ ps_cur_mb_info->ps_curmb->u1_mb_type = u1_inter_mb_type;
+ ps_cur_deblk_mb->u1_mb_type |= u1_deblk_mb_type;
+ ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp;
+
+ }
+ else
+ {
+
+ /* Macroblock Layer Begins */
+ /* Decode the u1_mb_type */
+ u1_mb_type = ih264d_parse_mb_type_cabac(ps_dec);
+ ps_cur_mb_info->u1_mb_type = u1_mb_type;
+ if(u1_mb_type > (25 + u1_mb_threshold))
+ return ERROR_MB_TYPE;
+
+ /* Parse Macroblock Data */
+ if(u1_mb_type < u1_mb_threshold)
+ {
+ ps_cur_mb_info->ps_curmb->u1_mb_type = u1_inter_mb_type;
+ *(ps_dec->pu1_left_yuv_dc_csbp) &= 0x6;
+
+ ret = ps_dec->pf_parse_inter_mb(ps_dec, ps_cur_mb_info, u1_num_mbs,
+ u1_num_mbsNby2);
+ if(ret != OK)
+ return ret;
+ ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp;
+ ps_cur_deblk_mb->u1_mb_type |= u1_deblk_mb_type;
+ }
+ else
+ {
+ /* Storing Intra partition info */
+ ps_parse_mb_data->u1_num_part = 0;
+ ps_parse_mb_data->u1_isI_mb = 1;
+
+ if((25 + u1_mb_threshold) == u1_mb_type)
+ {
+ /* I_PCM_MB */
+ ps_cur_mb_info->ps_curmb->u1_mb_type = I_PCM_MB;
+ ret = ih264d_parse_ipcm_mb(ps_dec, ps_cur_mb_info, u1_num_mbs);
+ if(ret != OK)
+ return ret;
+ ps_cur_deblk_mb->u1_mb_qp = 0;
+ }
+ else
+ {
+ if(u1_mb_type == u1_mb_threshold)
+ ps_cur_mb_info->ps_curmb->u1_mb_type = I_4x4_MB;
+ else
+ ps_cur_mb_info->ps_curmb->u1_mb_type = I_16x16_MB;
+
+ ret = ih264d_parse_imb_cabac(
+ ps_dec, ps_cur_mb_info,
+ (UWORD8)(u1_mb_type - u1_mb_threshold));
+ if(ret != OK)
+ return ret;
+ ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp;
+ }
+ ps_cur_deblk_mb->u1_mb_type |= D_INTRA_MB;
+
+ }
+
+ }
+
+ if(u1_mbaff)
+ {
+ ih264d_update_mbaff_left_nnz(ps_dec, ps_cur_mb_info);
+ }
+ /* Next macroblock information */
+ if(i2_cur_mb_addr > ps_dec->ps_cur_sps->u2_max_mb_addr)
+ return ERROR_MB_ADDRESS_T;
+ i2_cur_mb_addr++;
+
+ if(ps_cur_mb_info->u1_topmb && u1_mbaff)
+ uc_more_data_flag = 1;
+ else
+ {
+ uc_more_data_flag = ih264d_decode_terminate(&ps_dec->s_cab_dec_env,
+ ps_bitstrm);
+ uc_more_data_flag = !uc_more_data_flag;
+ COPYTHECONTEXT("Decode Sliceterm",!uc_more_data_flag);
+ }
+
+ u1_num_mbs++;
+ ps_dec->u2_total_mbs_coded++;
+ u1_num_mbsNby2++;
+ ps_parse_mb_data++;
+
+ /****************************************************************/
+ /* Check for End Of Row and other flags that determine when to */
+ /* do DMA setup for N/2-Mb, Decode for N-Mb, and Transfer for */
+ /* N-Mb */
+ /****************************************************************/
+ u1_num_mbs_next = i2_pic_wdin_mbs - ps_dec->u2_mbx - 1;
+ u1_end_of_row = (!u1_num_mbs_next) && (!(u1_mbaff && (u1_num_mbs & 0x01)));
+ u1_slice_end = !uc_more_data_flag;
+ u1_tfr_n_mb = (u1_num_mbs == ps_dec->u1_recon_mb_grp) || u1_end_of_row
+ || u1_slice_end;
+ u1_decode_nmb = u1_tfr_n_mb || u1_slice_end;
+ ps_cur_mb_info->u1_end_of_slice = u1_slice_end;
+ /*u1_dma_nby2mb = u1_decode_nmb ||
+ (u1_num_mbsNby2 == ps_dec->u1_recon_mb_grp_pair);*/
+
+//if(u1_dma_nby2mb)
+ if(u1_decode_nmb)
+ {
+
+ ret = ps_dec->pf_mvpred_ref_tfr_nby2mb(ps_dec, u1_mb_idx, u1_num_mbs);
+ if(ret != OK)
+ return ret;
+ u1_num_mbsNby2 = 0;
+
+ {
+ ps_parse_mb_data = ps_dec->ps_parse_mb_data;
+ ps_dec->ps_part = ps_dec->ps_parse_part_params;
+ }
+ }
+
+ /*H264_DEC_DEBUG_PRINT("Pic: %d Mb_X=%d Mb_Y=%d",
+ ps_slice->i4_poc >> ps_slice->u1_field_pic_flag,
+ ps_dec->u2_mbx,ps_dec->u2_mby + (1 - ps_cur_mb_info->u1_topmb));
+ H264_DEC_DEBUG_PRINT("u1_decode_nmb: %d, u1_num_mbs: %d", u1_decode_nmb, u1_num_mbs);*/
+ if(u1_decode_nmb)
+ {
+
+ if(ps_dec->u1_separate_parse)
+ {
+ ih264d_parse_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs,
+ u1_num_mbs_next, u1_tfr_n_mb, u1_end_of_row);
+ ps_dec->ps_nmb_info += u1_num_mbs;
+ }
+ else
+ {
+ ret = ih264d_decode_recon_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs,
+ u1_num_mbs_next, u1_tfr_n_mb,
+ u1_end_of_row);
+ if(ret != OK)
+ return ret;
+ }
+
+ if(u1_tfr_n_mb)
+ u1_num_mbs = 0;
+ u1_mb_idx = u1_num_mbs;
+ ps_dec->u1_mb_idx = u1_num_mbs;
+
+ }
+ }
+
+ if(ps_dec->u1_separate_parse)
+ {
+ ps_dec->ps_parse_cur_slice->end_of_slice = 1;
+ ps_dec->ps_cur_slice->u4_mbs_in_slice = i2_cur_mb_addr
+ - (u2_first_mb_in_slice << u1_mbaff);
+ }
+ return OK;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_inter_slice_data_cavlc */
+/* */
+/* Description : This function parses cavlc syntax of a inter slice on */
+/* N MB basis. */
+/* */
+/* Inputs : ps_dec */
+/* sliceparams */
+/* firstMbInSlice */
+/* */
+/* Processing : 1. After parsing syntax for N MBs those N MBs are */
+/* decoded till the end of slice. */
+/* 2. MV prediction and DMA happens on a N/2 MB basis. */
+/* */
+/* Returns : 0 */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 13 07 2002 Jay Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 ih264d_parse_inter_slice_data_cavlc(dec_struct_t * ps_dec,
+ dec_slice_params_t * ps_slice,
+ UWORD16 u2_first_mb_in_slice)
+{
+ UWORD32 uc_more_data_flag;
+ WORD32 i2_cur_mb_addr;
+ UWORD32 u1_num_mbs, u1_num_mbsNby2, u1_mb_idx;
+ UWORD32 i2_mb_skip_run;
+ UWORD32 u1_read_mb_type;
+
+ UWORD32 u1_mbaff;
+ UWORD32 u1_num_mbs_next, u1_end_of_row;
+ const UWORD32 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs;
+ UWORD32 u1_slice_end = 0;
+ UWORD32 u1_tfr_n_mb = 0;
+ UWORD32 u1_decode_nmb = 0;
+
+ dec_bit_stream_t * const ps_bitstrm = ps_dec->ps_bitstrm;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
+ deblk_mb_t *ps_cur_deblk_mb;
+ dec_mb_info_t *ps_cur_mb_info;
+ parse_pmbarams_t *ps_parse_mb_data = ps_dec->ps_parse_mb_data;
+ UWORD32 u1_inter_mb_type;
+ UWORD32 u1_deblk_mb_type;
+ UWORD32 u1_mb_threshold;
+ WORD32 ret;
+
+ /******************************************************/
+ /* Initialisations specific to B or P slice */
+ /******************************************************/
+
+ if(ps_slice->u1_slice_type == P_SLICE)
+ {
+ u1_inter_mb_type = P_MB;
+ u1_deblk_mb_type = D_INTER_MB;
+ u1_mb_threshold = 5;
+ }
+ else // B_SLICE
+ {
+ u1_inter_mb_type = B_MB;
+ u1_deblk_mb_type = D_B_SLICE;
+ u1_mb_threshold = 23;
+ }
+ /******************************************************/
+ /* Slice Level Initialisations */
+ /******************************************************/
+ i2_cur_mb_addr = u2_first_mb_in_slice;
+ ps_dec->u1_qp = ps_slice->u1_slice_qp;
+ ret = ih264d_update_qp(ps_dec, 0);
+ if(ret != OK)
+ return ret;
+ u1_mb_idx = ps_dec->u1_mb_idx;
+ u1_num_mbs = u1_mb_idx;
+
+ u1_num_mbsNby2 = 0;
+ u1_mbaff = ps_slice->u1_mbaff_frame_flag;
+ i2_cur_mb_addr = u2_first_mb_in_slice << u1_mbaff;
+ i2_mb_skip_run = 0;
+ uc_more_data_flag = 1;
+ u1_read_mb_type = 0;
+
+ while(!u1_slice_end)
+ {
+ UWORD8 u1_mb_type;
+
+ if(i2_cur_mb_addr > ps_dec->ps_cur_sps->u2_max_mb_addr)
+ {
+
+ break;
+ }
+
+
+ ps_cur_mb_info = ps_dec->ps_nmb_info + u1_num_mbs;
+
+ ps_cur_mb_info->u1_Mux = 0;
+ ps_dec->u4_num_pmbair = (u1_num_mbs >> u1_mbaff);
+ ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + u1_num_mbs;
+
+ ps_cur_mb_info->u1_end_of_slice = 0;
+
+ /* Storing Default partition info */
+ ps_parse_mb_data->u1_num_part = 1;
+ ps_parse_mb_data->u1_isI_mb = 0;
+
+ if((!i2_mb_skip_run) && (!u1_read_mb_type))
+ {
+
+ //Inlined ih264d_uev
+ UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
+ UWORD32 u4_word, u4_ldz;
+
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
+
+ u4_ldz = CLZ(u4_word);
+
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ {
+ GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf,
+ u4_ldz);
+ }
+ *pu4_bitstrm_ofst = u4_bitstream_offset;
+ i2_mb_skip_run = ((1 << u4_ldz) + u4_word - 1);
+ //Inlined ih264d_uev
+ COPYTHECONTEXT("mb_skip_run", i2_mb_skip_run);
+ uc_more_data_flag = MORE_RBSP_DATA(ps_bitstrm);
+ u1_read_mb_type = uc_more_data_flag;
+ }
+
+ /***************************************************************/
+ /* Get the required information for decoding of MB */
+ /* mb_x, mb_y , neighbour availablity, */
+ /***************************************************************/
+ ps_dec->pf_get_mb_info(ps_dec, i2_cur_mb_addr, ps_cur_mb_info, i2_mb_skip_run);
+
+ /***************************************************************/
+ /* Set the deblocking parameters for this MB */
+ /***************************************************************/
+ if(ps_dec->u4_app_disable_deblk_frm == 0)
+ ih264d_set_deblocking_parameters(ps_cur_deblk_mb, ps_slice,
+ ps_dec->u1_mb_ngbr_availablity,
+ ps_dec->u1_cur_mb_fld_dec_flag);
+
+ if(i2_mb_skip_run)
+ {
+ /* Set appropriate flags in ps_cur_mb_info and ps_dec */
+ ps_dec->i1_prev_mb_qp_delta = 0;
+ ps_dec->u1_sub_mb_num = 0;
+ ps_cur_mb_info->u1_mb_type = MB_SKIP;
+ ps_cur_mb_info->u1_mb_mc_mode = PRED_16x16;
+ ps_cur_mb_info->u1_cbp = 0;
+
+ {
+ /* Storing Skip partition info */
+ parse_part_params_t *ps_part_info = ps_dec->ps_part;
+ ps_part_info->u1_is_direct = PART_DIRECT_16x16;
+ ps_part_info->u1_sub_mb_num = 0;
+ ps_dec->ps_part++;
+ }
+
+ /* Update Nnzs */
+ ih264d_update_nnz_for_skipmb(ps_dec, ps_cur_mb_info, CAVLC);
+
+ ps_cur_mb_info->ps_curmb->u1_mb_type = u1_inter_mb_type;
+ ps_cur_deblk_mb->u1_mb_type |= u1_deblk_mb_type;
+
+ i2_mb_skip_run--;
+ }
+ else
+ {
+ u1_read_mb_type = 0;
+ /**************************************************************/
+ /* Macroblock Layer Begins, Decode the u1_mb_type */
+ /**************************************************************/
+ {
+ UWORD32 u4_bitstream_offset = *pu4_bitstrm_ofst;
+ UWORD32 u4_word, u4_ldz, u4_temp;
+
+
+ //Inlined ih264d_uev
+ /***************************************************************/
+ /* Find leading zeros in next 32 bits */
+ /***************************************************************/
+ NEXTBITS_32(u4_word, u4_bitstream_offset, pu4_bitstrm_buf);
+ u4_ldz = CLZ(u4_word);
+ /* Flush the ps_bitstrm */
+ u4_bitstream_offset += (u4_ldz + 1);
+ /* Read the suffix from the ps_bitstrm */
+ u4_word = 0;
+ if(u4_ldz)
+ GETBITS(u4_word, u4_bitstream_offset, pu4_bitstrm_buf,
+ u4_ldz);
+ *pu4_bitstrm_ofst = u4_bitstream_offset;
+ u4_temp = ((1 << u4_ldz) + u4_word - 1);
+ //Inlined ih264d_uev
+ if(u4_temp > (UWORD32)(25 + u1_mb_threshold))
+ return ERROR_MB_TYPE;
+ u1_mb_type = u4_temp;
+ COPYTHECONTEXT("u1_mb_type", u1_mb_type);
+ }
+ ps_cur_mb_info->u1_mb_type = u1_mb_type;
+
+ /**************************************************************/
+ /* Parse Macroblock data */
+ /**************************************************************/
+ if(u1_mb_type < u1_mb_threshold)
+ {
+ ps_cur_mb_info->ps_curmb->u1_mb_type = u1_inter_mb_type;
+
+ ret = ps_dec->pf_parse_inter_mb(ps_dec, ps_cur_mb_info, u1_num_mbs,
+ u1_num_mbsNby2);
+ if(ret != OK)
+ return ret;
+ ps_cur_deblk_mb->u1_mb_type |= u1_deblk_mb_type;
+ }
+ else
+ {
+ /* Storing Intra partition info */
+ ps_parse_mb_data->u1_num_part = 0;
+ ps_parse_mb_data->u1_isI_mb = 1;
+
+ if((25 + u1_mb_threshold) == u1_mb_type)
+ {
+ /* I_PCM_MB */
+ ps_cur_mb_info->ps_curmb->u1_mb_type = I_PCM_MB;
+ ret = ih264d_parse_ipcm_mb(ps_dec, ps_cur_mb_info, u1_num_mbs);
+ if(ret != OK)
+ return ret;
+ ps_dec->u1_qp = 0;
+ }
+ else
+ {
+ ret = ih264d_parse_imb_cavlc(
+ ps_dec, ps_cur_mb_info, u1_num_mbs,
+ (UWORD8)(u1_mb_type - u1_mb_threshold));
+ if(ret != OK)
+ return ret;
+ }
+
+ ps_cur_deblk_mb->u1_mb_type |= D_INTRA_MB;
+ }
+ uc_more_data_flag = MORE_RBSP_DATA(ps_bitstrm);
+ }
+ ps_cur_deblk_mb->u1_mb_qp = ps_dec->u1_qp;
+
+ if(u1_mbaff)
+ {
+ ih264d_update_mbaff_left_nnz(ps_dec, ps_cur_mb_info);
+ }
+ /**************************************************************/
+ /* Get next Macroblock address */
+ /**************************************************************/
+ i2_cur_mb_addr++;
+
+ u1_num_mbs++;
+ ps_dec->u2_total_mbs_coded++;
+ u1_num_mbsNby2++;
+ ps_parse_mb_data++;
+
+ /****************************************************************/
+ /* Check for End Of Row and other flags that determine when to */
+ /* do DMA setup for N/2-Mb, Decode for N-Mb, and Transfer for */
+ /* N-Mb */
+ /****************************************************************/
+ u1_num_mbs_next = i2_pic_wdin_mbs - ps_dec->u2_mbx - 1;
+ u1_end_of_row = (!u1_num_mbs_next) && (!(u1_mbaff && (u1_num_mbs & 0x01)));
+ u1_slice_end = (!(uc_more_data_flag || i2_mb_skip_run));
+ u1_tfr_n_mb = (u1_num_mbs == ps_dec->u1_recon_mb_grp) || u1_end_of_row
+ || u1_slice_end;
+ u1_decode_nmb = u1_tfr_n_mb || u1_slice_end;
+ ps_cur_mb_info->u1_end_of_slice = u1_slice_end;
+
+ /*u1_dma_nby2mb = u1_decode_nmb ||
+ (u1_num_mbsNby2 == ps_dec->u1_recon_mb_grp_pair);*/
+
+//if(u1_dma_nby2mb)
+ if(u1_decode_nmb)
+ {
+
+ ret = ps_dec->pf_mvpred_ref_tfr_nby2mb(ps_dec, u1_mb_idx, u1_num_mbs);
+ if(ret != OK)
+ return ret;
+ u1_num_mbsNby2 = 0;
+
+ {
+ ps_parse_mb_data = ps_dec->ps_parse_mb_data;
+ ps_dec->ps_part = ps_dec->ps_parse_part_params;
+ }
+ }
+
+ /*H264_DEC_DEBUG_PRINT("Pic: %d Mb_X=%d Mb_Y=%d",
+ ps_slice->i4_poc >> ps_slice->u1_field_pic_flag,
+ ps_dec->u2_mbx,ps_dec->u2_mby + (1 - ps_cur_mb_info->u1_topmb));
+ H264_DEC_DEBUG_PRINT("u1_decode_nmb: %d", u1_decode_nmb);*/
+ if(u1_decode_nmb)
+ {
+
+
+
+ if(ps_dec->u1_separate_parse)
+ {
+ ih264d_parse_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs,
+ u1_num_mbs_next, u1_tfr_n_mb, u1_end_of_row);
+ ps_dec->ps_nmb_info += u1_num_mbs;
+ }
+ else
+ {
+ ret = ih264d_decode_recon_tfr_nmb(ps_dec, u1_mb_idx, u1_num_mbs,
+ u1_num_mbs_next, u1_tfr_n_mb,
+ u1_end_of_row);
+ if(ret != OK)
+ return ret;
+ }
+
+ if(u1_tfr_n_mb)
+ u1_num_mbs = 0;
+ u1_mb_idx = u1_num_mbs;
+ ps_dec->u1_mb_idx = u1_num_mbs;
+
+ }
+//ps_dec->ps_pred++;
+ }
+
+ if(ps_dec->u1_separate_parse)
+ {
+ ps_dec->ps_parse_cur_slice->end_of_slice = 1;
+ ps_dec->ps_cur_slice->u4_mbs_in_slice = i2_cur_mb_addr
+ - (u2_first_mb_in_slice << u1_mbaff);
+ }
+
+
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_decode_pslice \endif
+ *
+ * \brief
+ * Decodes a P Slice
+ *
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+WORD32 ih264d_parse_pslice(dec_struct_t *ps_dec, UWORD16 u2_first_mb_in_slice)
+{
+ dec_pic_params_t * ps_pps = ps_dec->ps_cur_pps;
+ dec_slice_params_t * ps_cur_slice = ps_dec->ps_cur_slice;
+ dec_bit_stream_t *ps_bitstrm = ps_dec->ps_bitstrm;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
+ UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; //ps_dec->ps_cur_sps->u1_mb_aff_flag;
+ UWORD8 u1_field_pic_flag = ps_cur_slice->u1_field_pic_flag;
+
+ UWORD32 u4_temp;
+ WORD32 i_temp;
+ WORD32 ret;
+
+ /*--------------------------------------------------------------------*/
+ /* Read remaining contents of the slice header */
+ /*--------------------------------------------------------------------*/
+ {
+ WORD8 *pi1_buf;
+ WORD16 *pi2_mv = ps_dec->s_default_mv_pred.i2_mv;
+ WORD32 *pi4_mv = (WORD32*)pi2_mv;
+ WORD16 *pi16_refFrame;
+
+ pi1_buf = ps_dec->s_default_mv_pred.i1_ref_frame;
+ pi16_refFrame = (WORD16*)pi1_buf;
+ *pi4_mv = 0;
+ *(pi4_mv + 1) = 0;
+ *pi16_refFrame = OUT_OF_RANGE_REF;
+ ps_dec->s_default_mv_pred.u1_col_ref_pic_idx = (UWORD8)-1;
+ ps_dec->s_default_mv_pred.u1_pic_type = (UWORD8)-1;
+ }
+
+ ps_cur_slice->u1_num_ref_idx_active_override_flag = ih264d_get_bit_h264(
+ ps_bitstrm);
+
+ COPYTHECONTEXT("SH: num_ref_idx_override_flag",
+ ps_cur_slice->u1_num_ref_idx_active_override_flag);
+
+ u4_temp = ps_dec->ps_cur_pps->u1_num_ref_idx_lx_active[0];
+ if(ps_cur_slice->u1_num_ref_idx_active_override_flag)
+ {
+ u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf) + 1;
+ }
+
+ {
+
+
+
+ UWORD8 u1_max_ref_idx = MAX_FRAMES << u1_field_pic_flag;
+ if(u4_temp > u1_max_ref_idx)
+ {
+ return ERROR_NUM_REF;
+ }
+ ps_cur_slice->u1_num_ref_idx_lx_active[0] = u4_temp;
+ COPYTHECONTEXT("SH: num_ref_idx_l0_active_minus1",
+ ps_cur_slice->u1_num_ref_idx_lx_active[0] - 1);
+
+ }
+
+ {
+ UWORD8 uc_refIdxReFlagL0 = ih264d_get_bit_h264(ps_bitstrm);
+ COPYTHECONTEXT("SH: ref_pic_list_reordering_flag_l0",uc_refIdxReFlagL0);
+
+ /* Initialize the Reference list once in Picture if the slice type */
+ /* of first slice is between 5 to 9 defined in table 7.3 of standard */
+ /* If picture contains both P & B slices then Initialize the Reference*/
+ /* List only when it switches from P to B and B to P */
+ {
+ UWORD8 init_idx_flg = (ps_dec->u1_pr_sl_type
+ != ps_dec->ps_cur_slice->u1_slice_type);
+ if(ps_dec->u1_first_pb_nal_in_pic
+ || (init_idx_flg & !ps_dec->u1_sl_typ_5_9)
+ || ps_dec->u1_num_ref_idx_lx_active_prev
+ != ps_cur_slice->u1_num_ref_idx_lx_active[0])
+ {
+ ih264d_init_ref_idx_lx_p(ps_dec);
+ }
+ if(ps_dec->u1_first_pb_nal_in_pic & ps_dec->u1_sl_typ_5_9)
+ ps_dec->u1_first_pb_nal_in_pic = 0;
+ }
+ /* Store the value for future slices in the same picture */
+ ps_dec->u1_num_ref_idx_lx_active_prev =
+ ps_cur_slice->u1_num_ref_idx_lx_active[0];
+
+ /* Modified temporarily */
+ if(uc_refIdxReFlagL0)
+ {
+ WORD8 ret;
+ ps_dec->ps_ref_pic_buf_lx[0] = ps_dec->ps_dpb_mgr->ps_mod_dpb[0];
+ ret = ih264d_ref_idx_reordering(ps_dec, 0);
+ if(ret == -1)
+ return ERROR_REFIDX_ORDER_T;
+ ps_dec->ps_ref_pic_buf_lx[0] = ps_dec->ps_dpb_mgr->ps_mod_dpb[0];
+ }
+ else
+ ps_dec->ps_ref_pic_buf_lx[0] =
+ ps_dec->ps_dpb_mgr->ps_init_dpb[0];
+ }
+ /* Create refIdx to POC mapping */
+ {
+ void **pui_map_ref_idx_to_poc_lx0, **pui_map_ref_idx_to_poc_lx1;
+ WORD8 idx;
+ struct pic_buffer_t *ps_pic;
+
+ pui_map_ref_idx_to_poc_lx0 = ps_dec->ppv_map_ref_idx_to_poc + FRM_LIST_L0;
+ pui_map_ref_idx_to_poc_lx0[0] = 0; //For ref_idx = -1
+ pui_map_ref_idx_to_poc_lx0++;
+ for(idx = 0; idx < ps_cur_slice->u1_num_ref_idx_lx_active[0]; idx++)
+ {
+ ps_pic = ps_dec->ps_ref_pic_buf_lx[0][idx];
+ pui_map_ref_idx_to_poc_lx0[idx] = (ps_pic->pu1_buf1);
+ }
+
+ /* Bug Fix Deblocking */
+ pui_map_ref_idx_to_poc_lx1 = ps_dec->ppv_map_ref_idx_to_poc + FRM_LIST_L1;
+ pui_map_ref_idx_to_poc_lx1[0] = 0;
+
+ if(u1_mbaff)
+ {
+ void **ppv_map_ref_idx_to_poc_lx_t, **ppv_map_ref_idx_to_poc_lx_b;
+ void **ppv_map_ref_idx_to_poc_lx_t1, **ppv_map_ref_idx_to_poc_lx_b1;
+ ppv_map_ref_idx_to_poc_lx_t = ps_dec->ppv_map_ref_idx_to_poc
+ + TOP_LIST_FLD_L0;
+ ppv_map_ref_idx_to_poc_lx_b = ps_dec->ppv_map_ref_idx_to_poc
+ + BOT_LIST_FLD_L0;
+
+ ppv_map_ref_idx_to_poc_lx_t[0] = 0; // For ref_idx = -1
+ ppv_map_ref_idx_to_poc_lx_t++;
+ ppv_map_ref_idx_to_poc_lx_b[0] = 0; // For ref_idx = -1
+ ppv_map_ref_idx_to_poc_lx_b++;
+
+ idx = 0;
+ for(idx = 0; idx < ps_cur_slice->u1_num_ref_idx_lx_active[0]; idx++)
+ {
+ ps_pic = ps_dec->ps_ref_pic_buf_lx[0][idx];
+ ppv_map_ref_idx_to_poc_lx_t[0] = (ps_pic->pu1_buf1);
+ ppv_map_ref_idx_to_poc_lx_b[1] = (ps_pic->pu1_buf1);
+
+ ppv_map_ref_idx_to_poc_lx_b[0] = (ps_pic->pu1_buf1) + 1;
+ ppv_map_ref_idx_to_poc_lx_t[1] = (ps_pic->pu1_buf1) + 1;
+
+ ppv_map_ref_idx_to_poc_lx_t += 2;
+ ppv_map_ref_idx_to_poc_lx_b += 2;
+ }
+ ppv_map_ref_idx_to_poc_lx_t1 = ps_dec->ppv_map_ref_idx_to_poc
+ + TOP_LIST_FLD_L1;
+ ppv_map_ref_idx_to_poc_lx_t1[0] = 0;
+ ppv_map_ref_idx_to_poc_lx_b1 = ps_dec->ppv_map_ref_idx_to_poc
+ + BOT_LIST_FLD_L1;
+ ppv_map_ref_idx_to_poc_lx_b1[0] = 0;
+
+ }
+
+ if(ps_dec->u4_num_cores >= 3)
+ {
+ WORD32 num_entries;
+ WORD32 size;
+
+ num_entries = MIN(MAX_FRAMES, ps_dec->u4_num_ref_frames_at_init);
+ num_entries = 2 * ((2 * num_entries) + 1);
+
+ size = num_entries * sizeof(void *);
+ size += PAD_MAP_IDX_POC * sizeof(void *);
+
+ memcpy((void *)ps_dec->ps_parse_cur_slice->ppv_map_ref_idx_to_poc,
+ ps_dec->ppv_map_ref_idx_to_poc,
+ size);
+ }
+
+
+ }
+ if(ps_pps->u1_wted_pred_flag)
+ {
+ ret = ih264d_parse_pred_weight_table(ps_cur_slice, ps_bitstrm);
+ if(ret != OK)
+ return ret;
+ ih264d_form_pred_weight_matrix(ps_dec);
+ ps_dec->pu4_wt_ofsts = ps_dec->pu4_wts_ofsts_mat;
+ }
+ else
+ {
+ ps_dec->ps_cur_slice->u2_log2Y_crwd = 0;
+ ps_dec->pu4_wt_ofsts = ps_dec->pu4_wts_ofsts_mat;
+ }
+
+ ps_dec->ps_parse_cur_slice->u2_log2Y_crwd =
+ ps_dec->ps_cur_slice->u2_log2Y_crwd;
+
+ if(u1_mbaff && (u1_field_pic_flag == 0))
+ {
+ ih264d_convert_frm_mbaff_list(ps_dec);
+ }
+
+ /* G050 */
+ if(ps_cur_slice->u1_nal_ref_idc != 0)
+ {
+ if(!ps_dec->ps_dpb_cmds->u1_dpb_commands_read)
+ ps_dec->u4_bitoffset = ih264d_read_mmco_commands(ps_dec);
+ else
+ ps_bitstrm->u4_ofst += ps_dec->u4_bitoffset;
+
+ }
+ /* G050 */
+
+ if(ps_pps->u1_entropy_coding_mode == CABAC)
+ {
+ u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+
+ if(u4_temp > MAX_CABAC_INIT_IDC)
+ {
+ return ERROR_INV_SLICE_HDR_T;
+ }
+ ps_cur_slice->u1_cabac_init_idc = u4_temp;
+ COPYTHECONTEXT("SH: cabac_init_idc",ps_cur_slice->u1_cabac_init_idc);
+ }
+
+ /* Read slice_qp_delta */
+ i_temp = ps_pps->u1_pic_init_qp
+ + ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if((i_temp < 0) || (i_temp > 51))
+ {
+ return ERROR_INV_RANGE_QP_T;
+ }
+ ps_cur_slice->u1_slice_qp = i_temp;
+ COPYTHECONTEXT("SH: slice_qp_delta",
+ (WORD8)(ps_cur_slice->u1_slice_qp - ps_pps->u1_pic_init_qp));
+
+ if(ps_pps->u1_deblocking_filter_parameters_present_flag == 1)
+ {
+ u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if(u4_temp > SLICE_BOUNDARY_DBLK_DISABLED)
+ {
+ return ERROR_INV_SLICE_HDR_T;
+ }
+
+ COPYTHECONTEXT("SH: disable_deblocking_filter_idc", u4_temp);
+ ps_cur_slice->u1_disable_dblk_filter_idc = u4_temp;
+ if(u4_temp != 1)
+ {
+ i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf)
+ << 1;
+ if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
+ {
+ return ERROR_INV_SLICE_HDR_T;
+ }
+ ps_cur_slice->i1_slice_alpha_c0_offset = i_temp;
+ COPYTHECONTEXT("SH: slice_alpha_c0_offset_div2",
+ ps_cur_slice->i1_slice_alpha_c0_offset >> 1);
+
+ i_temp = ih264d_sev(pu4_bitstrm_ofst, pu4_bitstrm_buf)
+ << 1;
+ if((MIN_DBLK_FIL_OFF > i_temp) || (i_temp > MAX_DBLK_FIL_OFF))
+ {
+ return ERROR_INV_SLICE_HDR_T;
+ }
+ ps_cur_slice->i1_slice_beta_offset = i_temp;
+ COPYTHECONTEXT("SH: slice_beta_offset_div2",
+ ps_cur_slice->i1_slice_beta_offset >> 1);
+ }
+ else
+ {
+ ps_cur_slice->i1_slice_alpha_c0_offset = 0;
+ ps_cur_slice->i1_slice_beta_offset = 0;
+ }
+ }
+ else
+ {
+ ps_cur_slice->u1_disable_dblk_filter_idc = 0;
+ ps_cur_slice->i1_slice_alpha_c0_offset = 0;
+ ps_cur_slice->i1_slice_beta_offset = 0;
+ }
+
+ DATA_SYNC();
+ ps_dec->ps_parse_cur_slice->slice_header_done = 2;
+
+ if(ps_pps->u1_entropy_coding_mode)
+ {
+ SWITCHOFFTRACE; SWITCHONTRACECABAC;
+ ps_dec->pf_parse_inter_slice = ih264d_parse_inter_slice_data_cabac;
+ if(ps_dec->ps_parse_cur_slice->u2_error_flag == 1)
+ return 0;
+ ps_dec->pf_parse_inter_mb = ih264d_parse_pmb_cabac;
+ ih264d_init_cabac_contexts(P_SLICE, ps_dec);
+
+ if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
+ ps_dec->pf_get_mb_info = ih264d_get_mb_info_cabac_mbaff;
+ else
+ ps_dec->pf_get_mb_info = ih264d_get_mb_info_cabac_nonmbaff;
+ }
+ else
+ {
+ SWITCHONTRACE; SWITCHOFFTRACECABAC;
+ ps_dec->pf_parse_inter_slice = ih264d_parse_inter_slice_data_cavlc;
+ ps_dec->pf_parse_inter_mb = ih264d_parse_pmb_cavlc;
+ if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag)
+ {
+ ps_dec->pf_get_mb_info = ih264d_get_mb_info_cavlc_mbaff;
+ }
+ else
+ ps_dec->pf_get_mb_info = ih264d_get_mb_info_cavlc_nonmbaff;
+ }
+
+ ps_dec->u1_B = 0;
+ ps_dec->pf_mvpred_ref_tfr_nby2mb = ih264d_mv_pred_ref_tfr_nby2_pmb;
+ ret = ps_dec->pf_parse_inter_slice(ps_dec, ps_cur_slice, u2_first_mb_in_slice);
+ if(ret != OK)
+ return ret;
+// ps_dec->curr_slice_in_error = 0 ;
+ return OK;
+}
diff --git a/decoder/ih264d_parse_slice.c b/decoder/ih264d_parse_slice.c
new file mode 100755
index 0000000..323df43
--- /dev/null
+++ b/decoder/ih264d_parse_slice.c
@@ -0,0 +1,1887 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+ **************************************************************************
+ * \file ih264d_parse_slice.c
+ *
+ * \brief
+ * Contains routines that decodes a slice NAL unit
+ *
+ * \date
+ * 19/12/2002
+ *
+ * \author AI
+ **************************************************************************
+ */
+#include <string.h>
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ithread.h"
+#include "ih264d_structs.h"
+#include "ih264d_debug.h"
+#include "ih264d_bitstrm.h"
+#include "ih264d_parse_mb_header.h"
+#include "ih264d_process_bslice.h"
+#include "ih264d_process_pslice.h"
+#include "ih264d_parse_cavlc.h"
+#include "ih264d_utils.h"
+#include "ih264d_deblocking.h"
+#include "ih264d_defs.h"
+#include "ih264d_error_handler.h"
+#include "ih264d_tables.h"
+#include "ih264d_defs.h"
+#include "ih264d_mem_request.h"
+#include "ih264d_parse_islice.h"
+#include "ih264d_parse_slice.h"
+#include "ih264d_mvpred.h"
+#include "ih264d_mb_utils.h"
+
+#include "ih264d_defs.h"
+#include "ih264d_quant_scaling.h"
+
+#include "ih264d_inter_pred.h"
+
+#include "ih264d_sei.h"
+#include "ih264d.h"
+#include "ih264_error.h"
+#include "ih264_disp_mgr.h"
+#include "ih264_buf_mgr.h"
+
+#include "ih264d_thread_parse_decode.h"
+#include "ih264d_thread_compute_bs.h"
+#include "ih264d_dpb_manager.h"
+#include <assert.h>
+#include "ih264d_parse_islice.h"
+#define RET_LAST_SKIP 0x80000000
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_form_pred_weight_matrix \endif
+ *
+ * \brief
+ * Forms pred weight matrix.
+ *
+ * \return
+ * None
+ *
+ **************************************************************************
+ */
+
+void ih264d_form_pred_weight_matrix(dec_struct_t *ps_dec)
+{
+ dec_slice_params_t *ps_cur_slice;
+ UWORD8 uc_num_ref_idx_l0_active, uc_num_ref_idx_l1_active;
+ UWORD8 i, j;
+ UWORD32 *pu4_mat_iwt_ofst;
+ UWORD16 i2_idx;
+ UWORD32 *pui32_weight_offset_l0, *pui32_weight_offset_l1;
+ UWORD32 u4_temp;
+
+ ps_cur_slice = ps_dec->ps_cur_slice;
+ uc_num_ref_idx_l0_active = ps_cur_slice->u1_num_ref_idx_lx_active[0];
+ uc_num_ref_idx_l1_active = ps_cur_slice->u1_num_ref_idx_lx_active[1];
+
+ pu4_mat_iwt_ofst = ps_dec->pu4_wts_ofsts_mat;
+
+ if(ps_cur_slice->u1_slice_type == B_SLICE)
+ {
+ for(i = 0; i < uc_num_ref_idx_l0_active; i++)
+ {
+ pui32_weight_offset_l0 = ps_cur_slice->u4_wt_ofst_lx[0][i];
+ for(j = 0; j < uc_num_ref_idx_l1_active; j++)
+ {
+ pui32_weight_offset_l1 = ps_cur_slice->u4_wt_ofst_lx[1][j];
+ i2_idx = i * uc_num_ref_idx_l0_active + j;
+ i2_idx = X3(i2_idx);
+ /* u4_temp = (pui32_weight_offset_l0[0] | (pui32_weight_offset_l1[0] << 16));
+ pu4_mat_iwt_ofst[0] = u4_temp;
+ u4_temp = (pui32_weight_offset_l0[1] | (pui32_weight_offset_l1[1] << 16));
+ pu4_mat_iwt_ofst[1] = u4_temp;
+ u4_temp = (pui32_weight_offset_l0[2] | (pui32_weight_offset_l1[2] << 16));
+ pu4_mat_iwt_ofst[2] = u4_temp;
+ pu4_mat_iwt_ofst += 3;*/
+ pu4_mat_iwt_ofst[0] = pui32_weight_offset_l0[0];
+ pu4_mat_iwt_ofst[1] = pui32_weight_offset_l1[0];
+ pu4_mat_iwt_ofst[2] = pui32_weight_offset_l0[1];
+ pu4_mat_iwt_ofst[3] = pui32_weight_offset_l1[1];
+ pu4_mat_iwt_ofst[4] = pui32_weight_offset_l0[2];
+ pu4_mat_iwt_ofst[5] = pui32_weight_offset_l1[2];
+ pu4_mat_iwt_ofst += 6;
+ }
+ }
+ }
+ else
+ {
+ for(i = 0; i < uc_num_ref_idx_l0_active; i++)
+ {
+ pui32_weight_offset_l0 = ps_cur_slice->u4_wt_ofst_lx[0][i];
+ i2_idx = X3(i);
+ u4_temp = (UWORD32)pui32_weight_offset_l0[0];
+ pu4_mat_iwt_ofst[0] = u4_temp;
+ u4_temp = (UWORD32)pui32_weight_offset_l0[1];
+ pu4_mat_iwt_ofst[2] = u4_temp;
+ u4_temp = (UWORD32)pui32_weight_offset_l0[2];
+ pu4_mat_iwt_ofst[4] = u4_temp;
+ pu4_mat_iwt_ofst += 6;
+ }
+ }
+}
+
+
+/*!
+ **************************************************************************
+ * \if Function name : init_firstSliceParam \endif
+ *
+ * \brief
+ * Initialize the Parameter required for all the slices for a picture
+ *
+ * \return : Nothing
+ *
+ **************************************************************************
+ */
+
+WORD32 ih264d_start_of_pic(dec_struct_t *ps_dec,
+ WORD32 i4_poc,
+ pocstruct_t *ps_temp_poc,
+ UWORD16 u2_frame_num,
+ dec_pic_params_t *ps_pps)
+{
+ pocstruct_t *ps_prev_poc = &ps_dec->s_cur_pic_poc;
+ pocstruct_t *ps_cur_poc = ps_temp_poc;
+
+ pic_buffer_t *pic_buf;
+
+ ivd_video_decode_op_t * ps_dec_output =
+ (ivd_video_decode_op_t *)ps_dec->pv_dec_out;
+ dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice;
+ dec_seq_params_t *ps_seq = ps_pps->ps_sps;
+ UWORD8 u1_bottom_field_flag = ps_cur_slice->u1_bottom_field_flag;
+ UWORD8 u1_field_pic_flag = ps_cur_slice->u1_field_pic_flag;
+ /* high profile related declarations */
+ high_profile_tools_t s_high_profile;
+ WORD32 ret;
+
+ H264_MUTEX_LOCK(&ps_dec->process_disp_mutex);
+
+ ps_prev_poc->i4_pic_order_cnt_lsb = ps_cur_poc->i4_pic_order_cnt_lsb;
+ ps_prev_poc->i4_pic_order_cnt_msb = ps_cur_poc->i4_pic_order_cnt_msb;
+ ps_prev_poc->i4_delta_pic_order_cnt_bottom =
+ ps_cur_poc->i4_delta_pic_order_cnt_bottom;
+ ps_prev_poc->i4_delta_pic_order_cnt[0] =
+ ps_cur_poc->i4_delta_pic_order_cnt[0];
+ ps_prev_poc->i4_delta_pic_order_cnt[1] =
+ ps_cur_poc->i4_delta_pic_order_cnt[1];
+ ps_prev_poc->u1_bot_field = ps_dec->ps_cur_slice->u1_bottom_field_flag;
+ ps_prev_poc->i4_prev_frame_num_ofst = ps_cur_poc->i4_prev_frame_num_ofst;
+ ps_prev_poc->u2_frame_num = u2_frame_num;
+ ps_dec->i1_prev_mb_qp_delta = 0;
+ ps_dec->i1_next_ctxt_idx = 0;
+
+ ps_dec->u4_mb_level_deblk = 0;
+
+ /* Disable MB_LEVEL_DEBLK if deblock thread is enabled */
+ if(ps_dec->u4_num_cores >= 3)
+ {
+ ps_dec->u4_mb_level_deblk = 0;
+ }
+
+
+ if(ps_seq->u1_mb_aff_flag == 1)
+ {
+ ps_dec->u4_mb_level_deblk = 0;
+ if(ps_dec->u4_num_cores > 2)
+ ps_dec->u4_num_cores = 2;
+ }
+ if(ps_dec->u4_mb_level_deblk == 1)
+ ps_dec->u4_use_intrapred_line_copy = 1;
+ else
+ ps_dec->u4_use_intrapred_line_copy = 0;
+
+ if((ps_dec->u4_num_cores >= 3) && (ps_seq->u1_mb_aff_flag == 0))
+ {
+ ps_dec->u4_use_intrapred_line_copy = 1;
+ }
+
+ ps_dec->u4_app_disable_deblk_frm = 0;
+ /* If degrade is enabled, set the degrade flags appropriately */
+ if(ps_dec->i4_degrade_type && ps_dec->i4_degrade_pics)
+ {
+ WORD32 degrade_pic;
+ ps_dec->i4_degrade_pic_cnt++;
+ degrade_pic = 0;
+
+ /* If degrade is to be done in all frames, then do not check further */
+ switch(ps_dec->i4_degrade_pics)
+ {
+ case 4:
+ {
+ degrade_pic = 1;
+ break;
+ }
+ case 3:
+ {
+ if(ps_cur_slice->u1_slice_type != I_SLICE)
+ degrade_pic = 1;
+
+ break;
+ }
+ case 2:
+ {
+
+ /* If pic count hits non-degrade interval or it is an islice, then do not degrade */
+ if((ps_cur_slice->u1_slice_type != I_SLICE)
+ && (ps_dec->i4_degrade_pic_cnt
+ != ps_dec->i4_nondegrade_interval))
+ degrade_pic = 1;
+
+ break;
+ }
+ case 1:
+ {
+ /* Check if the current picture is non-ref */
+ if(0 == ps_cur_slice->u1_nal_ref_idc)
+ {
+ degrade_pic = 1;
+ }
+ break;
+ }
+
+ }
+ if(degrade_pic)
+ {
+ if(ps_dec->i4_degrade_type & 0x2)
+ ps_dec->u4_app_disable_deblk_frm = 1;
+
+ /* MC degrading is done only for non-ref pictures */
+ if(0 == ps_cur_slice->u1_nal_ref_idc)
+ {
+ if(ps_dec->i4_degrade_type & 0x4)
+ ps_dec->i4_mv_frac_mask = 0;
+
+ if(ps_dec->i4_degrade_type & 0x8)
+ ps_dec->i4_mv_frac_mask = 0;
+ }
+ }
+ else
+ ps_dec->i4_degrade_pic_cnt = 0;
+ }
+
+ {
+ dec_err_status_t * ps_err = ps_dec->ps_dec_err_status;
+ if(ps_dec->u1_sl_typ_5_9
+ && ((ps_cur_slice->u1_slice_type == I_SLICE)
+ || (ps_cur_slice->u1_slice_type
+ == SI_SLICE)))
+ ps_err->u1_cur_pic_type = PIC_TYPE_I;
+ else
+ ps_err->u1_cur_pic_type = PIC_TYPE_UNKNOWN;
+
+ if(ps_err->u1_pic_aud_i == PIC_TYPE_I)
+ {
+ ps_err->u1_cur_pic_type = PIC_TYPE_I;
+ ps_err->u1_pic_aud_i = PIC_TYPE_UNKNOWN;
+ }
+
+ if(ps_cur_slice->u1_nal_unit_type == IDR_SLICE_NAL)
+ {
+ if(ps_err->u1_err_flag)
+ ih264d_reset_ref_bufs(ps_dec->ps_dpb_mgr);
+ ps_err->u1_err_flag = ACCEPT_ALL_PICS;
+ }
+ }
+
+ ps_dec->u1_first_nal_in_pic = 0;
+ if(ps_dec->u1_init_dec_flag && ps_dec->s_prev_seq_params.u1_eoseq_pending)
+ {
+ /* Reset the decoder picture buffers */
+ WORD32 j;
+ for(j = 0; j < MAX_DISP_BUFS_NEW; j++)
+ {
+
+ ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
+ j,
+ BUF_MGR_REF);
+ ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_mv_buf_mgr,
+ ps_dec->au1_pic_buf_id_mv_buf_id_map[j],
+ BUF_MGR_REF);
+ ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
+ j,
+ BUF_MGR_IO);
+ }
+
+ /* reset the decoder structure parameters related to buffer handling */
+ ps_dec->u1_second_field = 0;
+ ps_dec->i4_cur_display_seq = 0;
+
+ /********************************************************************/
+ /* indicate in the decoder output i4_status that some frames are being */
+ /* dropped, so that it resets timestamp and wait for a new sequence */
+ /********************************************************************/
+
+ ps_dec->s_prev_seq_params.u1_eoseq_pending = 0;
+ }
+ ret = ih264d_init_pic(ps_dec, u2_frame_num, i4_poc, ps_pps);
+ if(ret != OK)
+ return ret;
+
+ ps_dec->pv_parse_tu_coeff_data = ps_dec->pv_pic_tu_coeff_data;
+ ps_dec->pv_proc_tu_coeff_data = ps_dec->pv_pic_tu_coeff_data;
+ ps_dec->ps_nmb_info = ps_dec->ps_frm_mb_info;
+ if(ps_dec->u1_separate_parse)
+ {
+ UWORD16 pic_wd = ps_dec->u4_width_at_init;
+ UWORD16 pic_ht = ps_dec->u4_height_at_init;
+ UWORD32 num_mbs;
+
+ if((NULL != ps_dec->ps_sps) && (1 == (ps_dec->ps_sps->u1_is_valid)))
+ {
+ pic_wd = ps_dec->u2_pic_wd;
+ pic_ht = ps_dec->u2_pic_ht;
+ }
+ num_mbs = (pic_wd * pic_ht) >> 8;
+
+ ps_dec->u4_start_frame_decode = 0;
+ if(ps_dec->pu1_dec_mb_map)
+ {
+ memset((void *)ps_dec->pu1_dec_mb_map, 0, num_mbs);
+ }
+
+ if(ps_dec->pu1_recon_mb_map)
+ {
+
+ memset((void *)ps_dec->pu1_recon_mb_map, 0, num_mbs);
+ }
+
+ if(ps_dec->pu2_slice_num_map)
+ {
+ memset((void *)ps_dec->pu2_slice_num_map, 0,
+ (num_mbs * sizeof(UWORD16)));
+ }
+
+ }
+ if(ps_dec->u4_first_slice_in_pic == 1)
+ {
+ ps_dec->ps_parse_cur_slice = &(ps_dec->ps_dec_slice_buf[0]);
+ ps_dec->ps_decode_cur_slice = &(ps_dec->ps_dec_slice_buf[0]);
+ ps_dec->ps_computebs_cur_slice = &(ps_dec->ps_dec_slice_buf[0]);
+ }
+ ps_dec->ps_parse_cur_slice->slice_header_done = 0;
+ ps_dec->ps_parse_cur_slice->last_slice_in_frame = 0;
+ ps_dec->ps_parse_cur_slice->u4_num_mbs_done_in_slice = 0;
+
+ ps_dec->ps_parse_cur_slice->u2_error_flag = 0;
+
+ /* Initialize all the HP toolsets to zero */
+ ps_dec->s_high_profile.u1_scaling_present = 0;
+ ps_dec->s_high_profile.u1_transform8x8_present = 0;
+
+ /* Get Next Free Picture */
+ if(1 == ps_dec->u4_share_disp_buf)
+ {
+ UWORD32 i;
+ /* Free any buffer that is in the queue to be freed */
+ for(i = 0; i < MAX_DISP_BUFS_NEW; i++)
+ {
+ if(0 == ps_dec->u4_disp_buf_to_be_freed[i])
+ continue;
+ ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr, i,
+ BUF_MGR_IO);
+ ps_dec->u4_disp_buf_to_be_freed[i] = 0;
+ ps_dec->u4_disp_buf_mapping[i] = 0;
+
+ }
+ }
+ if(!(u1_field_pic_flag && 0 != ps_dec->u1_top_bottom_decoded)) //ps_dec->u1_second_field))
+ {
+ pic_buffer_t *ps_cur_pic;
+ WORD32 cur_pic_buf_id, cur_mv_buf_id;
+ col_mv_buf_t *ps_col_mv;
+ while(1)
+ {
+ ps_cur_pic = (pic_buffer_t *)ih264_buf_mgr_get_next_free(
+ (buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
+ &cur_pic_buf_id);
+ if(ps_cur_pic == NULL)
+ {
+ ps_dec->i4_error_code = ERROR_UNAVAIL_PICBUF_T;
+ return ERROR_UNAVAIL_PICBUF_T;
+ }
+ if(0 == ps_dec->u4_disp_buf_mapping[cur_pic_buf_id])
+ {
+ break;
+ }
+
+ }
+ ps_col_mv = (col_mv_buf_t *)ih264_buf_mgr_get_next_free((buf_mgr_t *)ps_dec->pv_mv_buf_mgr,
+ &cur_mv_buf_id);
+ if(ps_col_mv == NULL)
+ {
+ ps_dec->i4_error_code = ERROR_UNAVAIL_MVBUF_T;
+ return ERROR_UNAVAIL_MVBUF_T;
+ }
+
+ ps_dec->ps_cur_pic = ps_cur_pic;
+ ps_dec->u1_pic_buf_id = cur_pic_buf_id;
+ ps_cur_pic->u4_ts = ps_dec->u4_ts;
+
+
+ ps_cur_pic->u1_mv_buf_id = cur_mv_buf_id;
+ ps_dec->au1_pic_buf_id_mv_buf_id_map[cur_pic_buf_id] = cur_mv_buf_id;
+
+ ps_cur_pic->pu1_col_zero_flag = (UWORD8 *)ps_col_mv->pv_col_zero_flag;
+ ps_cur_pic->ps_mv = (mv_pred_t *)ps_col_mv->pv_mv;
+ ps_dec->au1_pic_buf_ref_flag[cur_pic_buf_id] = 0;
+
+ if(!ps_dec->ps_cur_pic)
+ {
+ H264_DEC_DEBUG_PRINT("------- Display Buffers Reset --------\n");
+ WORD32 j;
+ for(j = 0; j < MAX_DISP_BUFS_NEW; j++)
+ {
+
+ ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
+ j,
+ BUF_MGR_REF);
+ ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_mv_buf_mgr,
+ ps_dec->au1_pic_buf_id_mv_buf_id_map[j],
+ BUF_MGR_REF);
+ ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
+ j,
+ BUF_MGR_IO);
+ }
+
+ ps_dec->i4_cur_display_seq = 0;
+ ps_dec->i4_prev_max_display_seq = 0;
+ ps_dec->i4_max_poc = 0;
+
+ ps_cur_pic = (pic_buffer_t *)ih264_buf_mgr_get_next_free(
+ (buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
+ &cur_pic_buf_id);
+ if(ps_cur_pic == NULL)
+ {
+ ps_dec->i4_error_code = ERROR_UNAVAIL_PICBUF_T;
+ return ERROR_UNAVAIL_PICBUF_T;
+ }
+
+ ps_col_mv = (col_mv_buf_t *)ih264_buf_mgr_get_next_free((buf_mgr_t *)ps_dec->pv_mv_buf_mgr,
+ &cur_mv_buf_id);
+ if(ps_col_mv == NULL)
+ {
+ ps_dec->i4_error_code = ERROR_UNAVAIL_MVBUF_T;
+ return ERROR_UNAVAIL_MVBUF_T;
+ }
+
+ ps_dec->ps_cur_pic = ps_cur_pic;
+ ps_dec->u1_pic_buf_id = cur_pic_buf_id;
+ ps_cur_pic->u4_ts = ps_dec->u4_ts;
+ ps_dec->apv_buf_id_pic_buf_map[cur_pic_buf_id] = (void *)ps_cur_pic;
+
+ ps_cur_pic->u1_mv_buf_id = cur_mv_buf_id;
+ ps_dec->au1_pic_buf_id_mv_buf_id_map[cur_pic_buf_id] = cur_mv_buf_id;
+
+ ps_cur_pic->pu1_col_zero_flag = (UWORD8 *)ps_col_mv->pv_col_zero_flag;
+ ps_cur_pic->ps_mv = (mv_pred_t *)ps_col_mv->pv_mv;
+ ps_dec->au1_pic_buf_ref_flag[cur_pic_buf_id] = 0;
+
+ }
+
+ ps_dec->ps_cur_pic->u1_picturetype = u1_field_pic_flag;
+ ps_dec->ps_cur_pic->u4_pack_slc_typ = SKIP_NONE;
+ H264_DEC_DEBUG_PRINT("got a buffer\n");
+ }
+ else
+ {
+ H264_DEC_DEBUG_PRINT("did not get a buffer\n");
+ }
+
+ ps_dec->u4_pic_buf_got = 1;
+
+ ps_dec->ps_cur_pic->i4_poc = i4_poc;
+ ps_dec->ps_cur_pic->i4_frame_num = u2_frame_num;
+ ps_dec->ps_cur_pic->i4_pic_num = u2_frame_num;
+ ps_dec->ps_cur_pic->i4_top_field_order_cnt = ps_pps->i4_top_field_order_cnt;
+ ps_dec->ps_cur_pic->i4_bottom_field_order_cnt =
+ ps_pps->i4_bottom_field_order_cnt;
+ ps_dec->ps_cur_pic->i4_avg_poc = ps_pps->i4_avg_poc;
+ ps_dec->ps_cur_pic->u4_time_stamp = ps_dec->u4_pts;
+
+ ps_dec->s_cur_pic = *(ps_dec->ps_cur_pic);
+ if(u1_field_pic_flag && u1_bottom_field_flag)
+ {
+ WORD32 i4_temp_poc;
+ WORD32 i4_top_field_order_poc, i4_bot_field_order_poc;
+ /* Point to odd lines, since it's bottom field */
+ ps_dec->s_cur_pic.pu1_buf1 += ps_dec->s_cur_pic.u2_frm_wd_y;
+ ps_dec->s_cur_pic.pu1_buf2 += ps_dec->s_cur_pic.u2_frm_wd_uv;
+ ps_dec->s_cur_pic.pu1_buf3 += ps_dec->s_cur_pic.u2_frm_wd_uv;
+ ps_dec->s_cur_pic.ps_mv +=
+ ((ps_dec->u2_pic_ht * ps_dec->u2_pic_wd) >> 5);
+ ps_dec->s_cur_pic.pu1_col_zero_flag += ((ps_dec->u2_pic_ht
+ * ps_dec->u2_pic_wd) >> 5);
+ ps_dec->ps_cur_pic->u1_picturetype |= BOT_FLD;
+ i4_top_field_order_poc = ps_dec->ps_cur_pic->i4_top_field_order_cnt;
+ i4_bot_field_order_poc = ps_dec->ps_cur_pic->i4_bottom_field_order_cnt;
+ i4_temp_poc = MIN(i4_top_field_order_poc,
+ i4_bot_field_order_poc);
+ ps_dec->ps_cur_pic->i4_avg_poc = i4_temp_poc;
+ }
+
+ ps_cur_slice->u1_mbaff_frame_flag = ps_seq->u1_mb_aff_flag
+ && (!u1_field_pic_flag);
+
+ ps_dec->ps_cur_pic->u1_picturetype |= (ps_cur_slice->u1_mbaff_frame_flag
+ << 2);
+ if(ps_cur_slice->u1_mbaff_frame_flag)
+ {
+ ps_dec->u2_mb_group_cols_y = ((ps_dec->u1_recon_mb_grp >> 1) << 4) + 8;
+ ps_dec->u2_mb_group_cols_cr = ((ps_dec->u1_recon_mb_grp >> 1) << 3) + 8;
+ }
+ else
+ {
+ ps_dec->u2_mb_group_cols_y = (ps_dec->u1_recon_mb_grp << 4) + 8;
+ ps_dec->u2_mb_group_cols_cr = (ps_dec->u1_recon_mb_grp << 3) + 8;
+ }
+
+
+
+
+
+ ps_dec->ps_cur_mb_row = ps_dec->ps_nbr_mb_row; //[0];
+ ps_dec->ps_cur_mb_row++; //Increment by 1 ,so that left mb will always be valid
+ ps_dec->ps_top_mb_row =
+ ps_dec->ps_nbr_mb_row
+ + ((ps_dec->u2_frm_wd_in_mbs + 1)
+ << (1
+ - ps_dec->ps_cur_sps->u1_frame_mbs_only_flag));
+ ps_dec->ps_top_mb_row++; //Increment by 1 ,so that left mb will always be valid
+
+ ps_dec->u2_mb_group_cols_y1 = ps_dec->u2_mb_group_cols_y;
+ ps_dec->u2_mb_group_cols_cr1 = ps_dec->u2_mb_group_cols_cr;
+ ps_dec->pu1_y = ps_dec->pu1_y_scratch[0];
+ ps_dec->pu1_u = ps_dec->pu1_u_scratch[0];
+ ps_dec->pu1_v = ps_dec->pu1_v_scratch[0];
+ ps_dec->u1_yuv_scratch_idx = 0;
+ /* CHANGED CODE */
+ ps_dec->ps_mv_cur = ps_dec->s_cur_pic.ps_mv;
+ ps_dec->ps_mv_top = ps_dec->ps_mv_top_p[0];
+ /* CHANGED CODE */
+ ps_dec->u1_mv_top_p = 0;
+ ps_dec->u1_mb_idx = 0;
+ /* CHANGED CODE */
+ ps_dec->ps_mv_left = ps_dec->s_cur_pic.ps_mv;
+ ps_dec->pu1_yleft = 0;
+ ps_dec->pu1_uleft = 0;
+ ps_dec->pu1_vleft = 0;
+ ps_dec->u1_not_wait_rec = 2;
+ ps_dec->u2_total_mbs_coded = 0;
+ ps_dec->i4_submb_ofst = -(SUB_BLK_SIZE);
+ ps_dec->u4_pred_info_idx = 0;
+ ps_dec->u4_pred_info_pkd_idx = 0;
+ ps_dec->u4_dma_buf_idx = 0;
+ ps_dec->ps_mv = ps_dec->s_cur_pic.ps_mv;
+ ps_dec->ps_mv_bank_cur = ps_dec->s_cur_pic.ps_mv;
+ ps_dec->pu1_col_zero_flag = ps_dec->s_cur_pic.pu1_col_zero_flag;
+ ps_dec->ps_part = ps_dec->ps_parse_part_params;
+ ps_dec->i2_prev_slice_mbx = -1;
+ ps_dec->i2_prev_slice_mby = 0;
+ ps_dec->u2_mv_2mb[0] = 0;
+ ps_dec->u2_mv_2mb[1] = 0;
+ ps_dec->u1_last_pic_not_decoded = 0;
+
+ ps_dec->u2_cur_slice_num = 0;
+ ps_dec->u2_cur_slice_num_dec_thread = 0;
+ ps_dec->u2_cur_slice_num_bs = 0;
+ ps_dec->u4_intra_pred_line_ofst = 0;
+ ps_dec->pu1_cur_y_intra_pred_line = ps_dec->pu1_y_intra_pred_line;
+ ps_dec->pu1_cur_u_intra_pred_line = ps_dec->pu1_u_intra_pred_line;
+ ps_dec->pu1_cur_v_intra_pred_line = ps_dec->pu1_v_intra_pred_line;
+
+ ps_dec->pu1_cur_y_intra_pred_line_base = ps_dec->pu1_y_intra_pred_line;
+ ps_dec->pu1_cur_u_intra_pred_line_base = ps_dec->pu1_u_intra_pred_line;
+ ps_dec->pu1_cur_v_intra_pred_line_base = ps_dec->pu1_v_intra_pred_line;
+
+
+
+
+
+ ps_dec->pu1_prev_y_intra_pred_line = ps_dec->pu1_y_intra_pred_line
+ + (ps_dec->u2_frm_wd_in_mbs * MB_SIZE);
+
+ ps_dec->pu1_prev_u_intra_pred_line = ps_dec->pu1_u_intra_pred_line
+ + ps_dec->u2_frm_wd_in_mbs * BLK8x8SIZE * YUV420SP_FACTOR;
+ ps_dec->pu1_prev_v_intra_pred_line = ps_dec->pu1_v_intra_pred_line
+ + ps_dec->u2_frm_wd_in_mbs * BLK8x8SIZE;
+
+ ps_dec->ps_deblk_mbn = ps_dec->ps_deblk_pic;
+ ps_dec->ps_deblk_mbn_curr = ps_dec->ps_deblk_mbn;
+ ps_dec->ps_deblk_mbn_prev = ps_dec->ps_deblk_mbn + ps_dec->u1_recon_mb_grp;
+ /* Initialize The Function Pointer Depending Upon the Entropy and MbAff Flag */
+ {
+ if(ps_cur_slice->u1_mbaff_frame_flag)
+ {
+ ps_dec->pf_compute_bs = ih264d_compute_bs_mbaff;
+ ps_dec->pf_mvpred = ih264d_mvpred_mbaff;
+ }
+ else
+ {
+ ps_dec->pf_compute_bs = ih264d_compute_bs_non_mbaff;
+ ps_dec->u1_cur_mb_fld_dec_flag = ps_cur_slice->u1_field_pic_flag;
+ }
+ }
+ /* Set up the Parameter for DMA transfer */
+ {
+ UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag;
+
+ UWORD8 u1_mbaff = ps_cur_slice->u1_mbaff_frame_flag;
+
+ UWORD8 uc_lastmbs = (((ps_dec->u2_pic_wd) >> 4)
+ % (ps_dec->u1_recon_mb_grp >> u1_mbaff));
+ UWORD16 ui16_lastmbs_widthY =
+ (uc_lastmbs ? (uc_lastmbs << 4) : ((ps_dec->u1_recon_mb_grp
+ >> u1_mbaff) << 4));
+ UWORD16 ui16_lastmbs_widthUV =
+ uc_lastmbs ? (uc_lastmbs << 3) : ((ps_dec->u1_recon_mb_grp
+ >> u1_mbaff) << 3);
+
+ ps_dec->s_tran_addrecon.pu1_dest_y = ps_dec->s_cur_pic.pu1_buf1;
+ ps_dec->s_tran_addrecon.pu1_dest_u = ps_dec->s_cur_pic.pu1_buf2;
+ ps_dec->s_tran_addrecon.pu1_dest_v = ps_dec->s_cur_pic.pu1_buf3;
+
+ ps_dec->s_tran_addrecon.u2_frm_wd_y = ps_dec->u2_frm_wd_y
+ << u1_field_pic_flag;
+ ps_dec->s_tran_addrecon.u2_frm_wd_uv = ps_dec->u2_frm_wd_uv
+ << u1_field_pic_flag;
+
+ if(u1_field_pic_flag)
+ {
+ ui16_lastmbs_widthY += ps_dec->u2_frm_wd_y;
+ ui16_lastmbs_widthUV += ps_dec->u2_frm_wd_uv;
+ }
+
+ /* Normal Increment of Pointer */
+ ps_dec->s_tran_addrecon.u4_inc_y[0] = ((ps_dec->u1_recon_mb_grp << 4)
+ >> u1_mbaff);
+ ps_dec->s_tran_addrecon.u4_inc_uv[0] = ((ps_dec->u1_recon_mb_grp << 4)
+ >> u1_mbaff);
+
+ /* End of Row Increment */
+ ps_dec->s_tran_addrecon.u4_inc_y[1] = (ui16_lastmbs_widthY
+ + (PAD_LEN_Y_H << 1)
+ + ps_dec->s_tran_addrecon.u2_frm_wd_y
+ * ((15 << u1_mbaff) + u1_mbaff));
+ ps_dec->s_tran_addrecon.u4_inc_uv[1] = (ui16_lastmbs_widthUV
+ + (PAD_LEN_UV_H << 2)
+ + ps_dec->s_tran_addrecon.u2_frm_wd_uv
+ * ((15 << u1_mbaff) + u1_mbaff));
+
+ /* Assign picture numbers to each frame/field */
+ /* only once per picture. */
+ ih264d_assign_pic_num(ps_dec);
+ ps_dec->s_tran_addrecon.u2_mv_top_left_inc = (ps_dec->u1_recon_mb_grp
+ << 2) - 1 - (u1_mbaff << 2);
+ ps_dec->s_tran_addrecon.u2_mv_left_inc = ((ps_dec->u1_recon_mb_grp
+ >> u1_mbaff) - 1) << (4 + u1_mbaff);
+ }
+ /**********************************************************************/
+ /* High profile related initialization at pictrue level */
+ /**********************************************************************/
+ if(ps_seq->u1_profile_idc == HIGH_PROFILE_IDC)
+ {
+ if((ps_seq->i4_seq_scaling_matrix_present_flag)
+ || (ps_pps->i4_pic_scaling_matrix_present_flag))
+ {
+ ih264d_form_scaling_matrix_picture(ps_seq, ps_pps, ps_dec);
+ ps_dec->s_high_profile.u1_scaling_present = 1;
+ }
+ else
+ {
+ ih264d_form_default_scaling_matrix(ps_dec);
+ }
+
+ if(ps_pps->i4_transform_8x8_mode_flag)
+ {
+ ps_dec->s_high_profile.u1_transform8x8_present = 1;
+ }
+ }
+ else
+ {
+ ih264d_form_default_scaling_matrix(ps_dec);
+ }
+
+ /* required while reading the transform_size_8x8 u4_flag */
+ ps_dec->s_high_profile.u1_direct_8x8_inference_flag =
+ ps_seq->u1_direct_8x8_inference_flag;
+ ps_dec->s_high_profile.s_cavlc_ctxt = ps_dec->s_cavlc_ctxt;
+
+ if(ps_dec->u1_separate_parse)
+ {
+ memcpy(&ps_dec->s_tran_addrecon_parse, &ps_dec->s_tran_addrecon,
+ sizeof(tfr_ctxt_t));
+ }
+
+ H264_MUTEX_UNLOCK(&ps_dec->process_disp_mutex);
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_deblock_display \endif
+ *
+ * \brief : The function callls the deblocking routine and manages
+ : the Recon buffers and displays .
+ * \return : Nothing
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_end_of_pic_dispbuf_mgr(dec_struct_t * ps_dec)
+{
+ dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice;
+ UWORD8 u1_num_of_users = 0;
+ WORD32 ret;
+
+ H264_MUTEX_LOCK(&ps_dec->process_disp_mutex);
+ if(1)
+ {
+
+ {
+ ih264d_delete_nonref_nondisplay_pics(ps_dec->ps_dpb_mgr);
+ if(ps_cur_slice->u1_mmco_equalto5
+ || (ps_cur_slice->u1_nal_unit_type == IDR_SLICE_NAL))
+ {
+ ps_dec->ps_cur_pic->i4_poc = 0;
+ if(ps_dec->u2_total_mbs_coded
+ == (ps_dec->ps_cur_sps->u2_max_mb_addr + 1))
+ ih264d_reset_ref_bufs(ps_dec->ps_dpb_mgr);
+ ih264d_release_display_bufs(ps_dec);
+ }
+ if(ps_dec->u4_num_reorder_frames_at_init != 0)
+ {
+ ret = ih264d_assign_display_seq(ps_dec);
+ if(ret != OK)
+ return ret;
+ }
+ }
+
+ if(ps_cur_slice->u1_nal_ref_idc)
+ {
+ /* Mark pic buf as needed for reference */
+ ih264_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
+ ps_dec->u1_pic_buf_id,
+ BUF_MGR_REF);
+ /* Mark mv buf as needed for reference */
+ ih264_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_mv_buf_mgr,
+ ps_dec->au1_pic_buf_id_mv_buf_id_map[ps_dec->u1_pic_buf_id],
+ BUF_MGR_REF);
+ ps_dec->au1_pic_buf_ref_flag[ps_dec->u1_pic_buf_id] = 1;
+ }
+
+ /* 420 consumer */
+ /* Increment the number of users by 1 for display based upon */
+ /*the SEEK KEY FRAME control sent to decoder */
+ if(((0 == ps_dec->u1_last_pic_not_decoded)
+ && (0
+ == (ps_dec->ps_cur_pic->u4_pack_slc_typ
+ & ps_dec->u4_skip_frm_mask)))
+ || (ps_cur_slice->u1_nal_unit_type == IDR_SLICE_NAL))
+ {
+ /* Mark pic buf as needed for display */
+ ih264_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
+ ps_dec->u1_pic_buf_id,
+ BUF_MGR_IO);
+
+ }
+
+ if(!ps_cur_slice->u1_field_pic_flag
+ || ((TOP_FIELD_ONLY | BOT_FIELD_ONLY)
+ != ps_dec->u1_top_bottom_decoded))
+ {
+ pic_buffer_t *ps_cur_pic = ps_dec->ps_cur_pic;
+ ps_cur_pic->u2_disp_width = ps_dec->u2_disp_width;
+ ps_cur_pic->u2_disp_height = ps_dec->u2_disp_height >> 1;
+
+ ps_cur_pic->u2_crop_offset_y = ps_dec->u2_crop_offset_y;
+ ps_cur_pic->u2_crop_offset_uv = ps_dec->u2_crop_offset_uv;
+ ps_cur_pic->u1_pic_type = 0;
+
+ ret = ih264d_insert_pic_in_display_list(
+ ps_dec->ps_dpb_mgr,
+ ps_dec->u1_pic_buf_id,
+ ps_dec->i4_prev_max_display_seq
+ + ps_dec->ps_cur_pic->i4_poc,
+ ps_dec->ps_cur_pic->i4_frame_num);
+ if(ret != OK)
+ return ret;
+
+ {
+ ivd_video_decode_op_t * ps_dec_output =
+ (ivd_video_decode_op_t *)ps_dec->pv_dec_out;
+
+ ps_dec_output->u4_frame_decoded_flag = 1;
+ }
+ if(ps_dec->au1_pic_buf_ref_flag[ps_dec->u1_pic_buf_id] == 0)
+ {
+ ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_mv_buf_mgr,
+ ps_dec->au1_pic_buf_id_mv_buf_id_map[ps_dec->u1_pic_buf_id],
+ BUF_MGR_REF);
+ ps_dec->au1_pic_buf_ref_flag[ps_dec->u1_pic_buf_id] = 0;
+
+ }
+ }
+ else
+ {
+ H264_DEC_DEBUG_PRINT("pic not inserted display %d %d\n",
+ ps_cur_slice->u1_field_pic_flag,
+ ps_dec->u1_second_field);
+ }
+ {
+
+ if(!ps_cur_slice->u1_end_of_frame_signal)
+ {
+ ps_cur_slice->u1_end_of_frame_signal = 1;
+ }
+ }
+
+ if(!ps_cur_slice->u1_field_pic_flag
+ || ((TOP_FIELD_ONLY | BOT_FIELD_ONLY)
+ == ps_dec->u1_top_bottom_decoded))
+ {
+ if(ps_dec->u4_num_reorder_frames_at_init == 0)
+ {
+ ret = ih264d_assign_display_seq(ps_dec);
+ if(ret != OK)
+ return ret;
+ }
+ }
+ }
+
+ H264_MUTEX_UNLOCK(&ps_dec->process_disp_mutex);
+
+ return OK;
+}
+
+void ih264d_err_pic_dispbuf_mgr(dec_struct_t *ps_dec)
+{
+ dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice;
+ ivd_video_decode_op_t * ps_dec_output =
+ (ivd_video_decode_op_t *)ps_dec->pv_dec_out;
+
+ ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
+ ps_dec->u1_pic_buf_id,
+ BUF_MGR_REF);
+ ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_mv_buf_mgr,
+ ps_dec->au1_pic_buf_id_mv_buf_id_map[ps_dec->u1_pic_buf_id],
+ BUF_MGR_REF);
+ ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
+ ps_dec->u1_pic_buf_id,
+ BUF_MGR_IO);
+}
+
+void ih264d_deblock_picture(void *ptr)
+{
+ dec_struct_t *ps_dec = (dec_struct_t *)ptr;
+
+ {
+ /*Deblock picture only if all the mb's in the frame have been decoded*/
+ if(ps_dec->u1_pic_decode_done == 1)
+ {
+ if(ps_dec->ps_cur_slice->u1_mbaff_frame_flag
+ || ps_dec->ps_cur_slice->u1_field_pic_flag)
+ {
+ ps_dec->p_DeblockPicture[ps_dec->ps_cur_slice->u1_mbaff_frame_flag](
+ ps_dec);
+ }
+ else
+
+ {
+
+ ih264d_deblock_picture_progressive(ps_dec);
+ }
+
+ }
+ }
+
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_deblock_display \endif
+ *
+ * \brief : The function callls the deblocking routine and manages
+ : the Recon buffers and displays .
+ * \return : Nothing
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_deblock_display(dec_struct_t *ps_dec)
+{
+ WORD32 ret;
+ /* Call deblocking */
+ ih264d_deblock_picture(ps_dec);
+
+ ret = ih264d_end_of_pic_dispbuf_mgr(ps_dec);
+ if(ret != OK)
+ return ret;
+
+ return OK;
+}
+
+/*
+ *!
+ **************************************************************************
+ * \if Function name : EndofPoc \endif
+ *
+ * \brief
+ * EndofPoc Processing
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+
+WORD32 ih264d_end_of_pic(dec_struct_t *ps_dec,
+ UWORD8 u1_is_idr_slice,
+ UWORD16 u2_frame_num)
+{
+ dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice;
+ WORD32 ret;
+
+ ps_dec->u1_first_nal_in_pic = 1;
+ ps_dec->u1_first_pb_nal_in_pic = 1;
+ ps_dec->u2_mbx = 0xffff;
+ ps_dec->u2_mby = 0;
+ {
+ dec_err_status_t * ps_err = ps_dec->ps_dec_err_status;
+ if(ps_err->u1_err_flag & REJECT_CUR_PIC)
+ {
+ ps_err->u1_err_flag ^= REJECT_CUR_PIC;
+ ih264d_err_pic_dispbuf_mgr(ps_dec);
+ return OK;
+ }
+ }
+
+ H264_MUTEX_LOCK(&ps_dec->process_disp_mutex);
+ ret = ih264d_end_of_pic_processing(ps_dec);
+ if(ret != OK)
+ return ret;
+ ps_dec->u2_total_mbs_coded = 0;
+ /*--------------------------------------------------------------------*/
+ /* ih264d_decode_pic_order_cnt - calculate the Pic Order Cnt */
+ /* Needed to detect end of picture */
+ /*--------------------------------------------------------------------*/
+ {
+ pocstruct_t *ps_prev_poc = &ps_dec->s_prev_pic_poc;
+ pocstruct_t *ps_cur_poc = &ps_dec->s_cur_pic_poc;
+ if((0 == u1_is_idr_slice) && ps_cur_slice->u1_nal_ref_idc)
+ ps_dec->u2_prev_ref_frame_num = ps_cur_slice->u2_frame_num;
+
+ if(u1_is_idr_slice || ps_cur_slice->u1_mmco_equalto5)
+ ps_dec->u2_prev_ref_frame_num = 0;
+
+ if(ps_dec->ps_cur_sps->u1_gaps_in_frame_num_value_allowed_flag)
+ {
+ ret = ih264d_decode_gaps_in_frame_num(ps_dec, u2_frame_num);
+ if(ret != OK)
+ return ret;
+ }
+
+ ps_prev_poc->i4_prev_frame_num_ofst = ps_cur_poc->i4_prev_frame_num_ofst;
+ ps_prev_poc->u2_frame_num = ps_cur_poc->u2_frame_num;
+ ps_prev_poc->u1_mmco_equalto5 = ps_cur_slice->u1_mmco_equalto5;
+ if(ps_cur_slice->u1_nal_ref_idc)
+ {
+ ps_prev_poc->i4_pic_order_cnt_lsb = ps_cur_poc->i4_pic_order_cnt_lsb;
+ ps_prev_poc->i4_pic_order_cnt_msb = ps_cur_poc->i4_pic_order_cnt_msb;
+ ps_prev_poc->i4_delta_pic_order_cnt_bottom =
+ ps_cur_poc->i4_delta_pic_order_cnt_bottom;
+ ps_prev_poc->i4_delta_pic_order_cnt[0] =
+ ps_cur_poc->i4_delta_pic_order_cnt[0];
+ ps_prev_poc->i4_delta_pic_order_cnt[1] =
+ ps_cur_poc->i4_delta_pic_order_cnt[1];
+ ps_prev_poc->u1_bot_field = ps_cur_poc->u1_bot_field;
+ }
+ }
+ if(!ps_cur_slice->u1_end_of_frame_signal)
+ {
+ return ERROR_END_OF_FRAME_EXPECTED_T;
+ } H264_MUTEX_UNLOCK(&ps_dec->process_disp_mutex);
+
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : DecodeSlice \endif
+ *
+ * \brief
+ * Parses a slice
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+
+WORD32 ih264d_parse_decode_slice(UWORD8 u1_is_idr_slice,
+ UWORD8 u1_nal_ref_idc,
+ dec_struct_t *ps_dec /* Decoder parameters */
+ )
+{
+ dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
+ dec_pic_params_t *ps_pps;
+ dec_seq_params_t *ps_seq;
+ dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice;
+ pocstruct_t s_tmp_poc;
+ WORD32 i_delta_poc[2];
+ WORD32 i4_poc = 0;
+ UWORD16 u2_first_mb_in_slice, u2_frame_num;
+ UWORD8 u1_field_pic_flag, u1_redundant_pic_cnt = 0, u1_slice_type;
+ UWORD32 u4_idr_pic_id = 0;
+ UWORD8 u1_bottom_field_flag, u1_pic_order_cnt_type;
+
+ UWORD8 u1_nal_unit_type;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
+ WORD8 i1_is_end_of_poc;
+
+ WORD32 ret;
+ UWORD32 u4_temp;
+ WORD32 i_temp;
+ UWORD32 u4_call_end_of_pic = 0;
+
+ /*--------------------------------------------------------------------*/
+ /* Decode Portion of the Slice header */
+ /* This is done to detect end of picture */
+ /*--------------------------------------------------------------------*/
+
+ if(ps_dec->u4_first_slice_in_pic == 0)
+ {
+ volatile dec_slice_struct_t *ps_next_slice;
+
+ ps_next_slice = ps_dec->ps_parse_cur_slice + 1;
+
+ /*Reset the ready u4_flag and then increment*/
+ ps_next_slice->slice_header_done = 0;
+ DATA_SYNC();
+ ps_dec->ps_parse_cur_slice++;
+ }
+
+ /* read FirstMbInSlice and slice type*/
+ ps_dec->ps_dpb_cmds->u1_dpb_commands_read_slc = 0;
+ u2_first_mb_in_slice = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ if(u2_first_mb_in_slice
+ > (ps_dec->u2_frm_ht_in_mbs * ps_dec->u2_frm_wd_in_mbs))
+ {
+
+ return ERROR_CORRUPTED_SLICE;
+ }
+
+ /*we currently don not support ASO*/
+ if(((u2_first_mb_in_slice << ps_cur_slice->u1_mbaff_frame_flag)
+ <= ps_dec->u2_cur_mb_addr) && (ps_dec->u2_cur_mb_addr != 0)
+ && (ps_dec->u4_first_slice_in_pic != 0))
+ {
+ return ERROR_CORRUPTED_SLICE;
+ }
+
+ COPYTHECONTEXT("SH: first_mb_in_slice",u2_first_mb_in_slice);
+
+ u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+
+ if(u4_temp > 9)
+ return ERROR_INV_SLC_TYPE_T;
+
+ u1_slice_type = u4_temp;
+ COPYTHECONTEXT("SH: slice_type",(u1_slice_type));
+ ps_dec->u1_sl_typ_5_9 = 0;
+ /* Find Out the Slice Type is 5 to 9 or not then Set the Flag */
+ /* u1_sl_typ_5_9 = 1 .Which tells that all the slices in the Pic*/
+ /* will be of same type of current */
+ if(u1_slice_type > 4)
+ {
+ u1_slice_type -= 5;
+ ps_dec->u1_sl_typ_5_9 = 1;
+ }
+
+ {
+ UWORD32 skip;
+
+ if((ps_dec->i4_app_skip_mode == IVD_SKIP_PB)
+ || (ps_dec->i4_dec_skip_mode == IVD_SKIP_PB))
+ {
+ UWORD32 u4_bit_stream_offset = 0;
+
+ if(ps_dec->u1_nal_unit_type == IDR_SLICE_NAL)
+ {
+ skip = 0;
+
+ ps_dec->i4_dec_skip_mode = IVD_SKIP_NONE;
+ }
+ else if((I_SLICE == u1_slice_type)
+ && (1 >= ps_dec->ps_sps->u1_num_ref_frames))
+ {
+ skip = 0;
+
+ ps_dec->i4_dec_skip_mode = IVD_SKIP_NONE;
+ }
+ else
+ {
+ skip = 1;
+ }
+
+ /* If one frame worth of data is already skipped, do not skip the next one */
+ if((0 == u2_first_mb_in_slice) && (1 == ps_dec->u4_prev_nal_skipped))
+ {
+ skip = 0;
+ }
+
+ if(skip)
+ {
+ ps_dec->u4_prev_nal_skipped = 1;
+ ps_dec->i4_dec_skip_mode = IVD_SKIP_PB;
+ return 0;
+ }
+ else
+ {
+ /* If the previous NAL was skipped, then
+ do not process that buffer in this call.
+ Return to app and process it in the next call.
+ This is necessary to handle cases where I/IDR is not complete in
+ the current buffer and application intends to fill the remaining part of the bitstream
+ later. This ensures we process only frame worth of data in every call */
+ if(1 == ps_dec->u4_prev_nal_skipped)
+ {
+ ps_dec->u4_return_to_app = 1;
+ return 0;
+ }
+ }
+ }
+
+ }
+
+ u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if(u4_temp & MASK_ERR_PIC_SET_ID)
+ return ERROR_INV_SPS_PPS_T;
+ /* discard slice if pic param is invalid */
+ COPYTHECONTEXT("SH: pic_parameter_set_id", u4_temp);
+ ps_pps = &ps_dec->ps_pps[u4_temp];
+ if(FALSE == ps_pps->u1_is_valid)
+ {
+ return ERROR_INV_SPS_PPS_T;
+ }
+ ps_seq = ps_pps->ps_sps;
+ if(!ps_seq)
+ return ERROR_INV_SPS_PPS_T;
+ if(FALSE == ps_seq->u1_is_valid)
+ return ERROR_INV_SPS_PPS_T;
+
+ /* Get the frame num */
+ u2_frame_num = ih264d_get_bits_h264(ps_bitstrm,
+ ps_seq->u1_bits_in_frm_num);
+// H264_DEC_DEBUG_PRINT("FRAME %d First MB in slice: %d\n", u2_frame_num, u2_first_mb_in_slice);
+
+ COPYTHECONTEXT("SH: frame_num", u2_frame_num);
+// H264_DEC_DEBUG_PRINT("Second field: %d frame num: %d prv_frame_num: %d \n", ps_dec->u1_second_field, u2_frame_num, ps_dec->u2_prv_frame_num);
+
+ /* Get the field related flags */
+ if(!ps_seq->u1_frame_mbs_only_flag)
+ {
+
+ u1_field_pic_flag = ih264d_get_bit_h264(ps_bitstrm);
+ COPYTHECONTEXT("SH: field_pic_flag", u1_field_pic_flag);
+ u1_bottom_field_flag = 0;
+
+ if(u1_field_pic_flag)
+ {
+ ps_dec->pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan_fld;
+ u1_bottom_field_flag = ih264d_get_bit_h264(ps_bitstrm);
+ COPYTHECONTEXT("SH: bottom_field_flag", u1_bottom_field_flag);
+
+ }
+ else
+ {
+ ps_dec->pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan;
+ }
+ }
+ else
+ {
+ u1_field_pic_flag = 0;
+ u1_bottom_field_flag = 0;
+
+ ps_dec->pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan;
+ }
+
+ u1_nal_unit_type = SLICE_NAL;
+ if(u1_is_idr_slice)
+ {
+ if(0 == u1_field_pic_flag)
+ {
+ ps_dec->u1_top_bottom_decoded = TOP_FIELD_ONLY | BOT_FIELD_ONLY;
+ }
+ u1_nal_unit_type = IDR_SLICE_NAL;
+ u4_idr_pic_id = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ if(u4_idr_pic_id > 65535)
+ return ERROR_INV_SPS_PPS_T;
+ COPYTHECONTEXT("SH: ", u4_idr_pic_id);
+ }
+
+ /* read delta pic order count information*/
+ i_delta_poc[0] = i_delta_poc[1] = 0;
+ s_tmp_poc.i4_pic_order_cnt_lsb = 0;
+ s_tmp_poc.i4_delta_pic_order_cnt_bottom = 0;
+ u1_pic_order_cnt_type = ps_seq->u1_pic_order_cnt_type;
+ if(u1_pic_order_cnt_type == 0)
+ {
+ i_temp = ih264d_get_bits_h264(
+ ps_bitstrm,
+ ps_seq->u1_log2_max_pic_order_cnt_lsb_minus);
+ if(i_temp < 0 || i_temp >= ps_seq->i4_max_pic_order_cntLsb)
+ return ERROR_INV_SPS_PPS_T;
+ s_tmp_poc.i4_pic_order_cnt_lsb = i_temp;
+ COPYTHECONTEXT("SH: pic_order_cnt_lsb", s_tmp_poc.i4_pic_order_cnt_lsb);
+
+ if((ps_pps->u1_pic_order_present_flag == 1) && (!u1_field_pic_flag))
+ {
+ s_tmp_poc.i4_delta_pic_order_cnt_bottom = ih264d_sev(
+ pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ //if(s_tmp_poc.i4_delta_pic_order_cnt_bottom > ps_seq->i4_max_pic_order_cntLsb)
+ COPYTHECONTEXT("SH: delta_pic_order_cnt_bottom",
+ s_tmp_poc.i4_delta_pic_order_cnt_bottom);
+ }
+ }
+
+ s_tmp_poc.i4_delta_pic_order_cnt[0] = 0;
+ s_tmp_poc.i4_delta_pic_order_cnt[1] = 0;
+ if(u1_pic_order_cnt_type == 1
+ && (!ps_seq->u1_delta_pic_order_always_zero_flag))
+ {
+ s_tmp_poc.i4_delta_pic_order_cnt[0] = ih264d_sev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ COPYTHECONTEXT("SH: delta_pic_order_cnt[0]",
+ s_tmp_poc.i4_delta_pic_order_cnt[0]);
+
+ if(ps_pps->u1_pic_order_present_flag && !u1_field_pic_flag)
+ {
+ s_tmp_poc.i4_delta_pic_order_cnt[1] = ih264d_sev(
+ pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ COPYTHECONTEXT("SH: delta_pic_order_cnt[1]",
+ s_tmp_poc.i4_delta_pic_order_cnt[1]);
+ }
+ }
+
+ if(ps_pps->u1_redundant_pic_cnt_present_flag)
+ {
+ u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if(u4_temp > MAX_REDUNDANT_PIC_CNT)
+ return ERROR_INV_SPS_PPS_T;
+ u1_redundant_pic_cnt = u4_temp;
+ COPYTHECONTEXT("SH: redundant_pic_cnt", u1_redundant_pic_cnt);
+ }
+ /*--------------------------------------------------------------------*/
+ /* Check if the slice is part of new picture if so do End of Pic */
+ /* processing. */
+ /*--------------------------------------------------------------------*/
+ i1_is_end_of_poc = 0;
+ if(!ps_dec->u1_first_nal_in_pic)
+ {
+ UWORD8 uc_mbs_exceed = 0;
+ i1_is_end_of_poc = ih264d_is_end_of_pic(u2_frame_num, u1_nal_ref_idc,
+ &s_tmp_poc, &ps_dec->s_cur_pic_poc,
+ ps_cur_slice, u1_pic_order_cnt_type,
+ u1_nal_unit_type, u4_idr_pic_id,
+ u1_field_pic_flag,
+ u1_bottom_field_flag);
+
+ /*since we support only Full frame decode, every new process should
+ * process a new pic
+ */
+ if(ps_dec->u4_first_slice_in_pic == 1)
+ {
+ i1_is_end_of_poc = 1;
+ }
+
+ if(ps_dec->u2_total_mbs_coded
+ == (ps_dec->ps_cur_sps->u2_max_mb_addr + 1))
+ {
+ /*u2_total_mbs_coded is forced to u2_max_mb_addr+ 1 at the end of decode ,so
+ ,if it is first slice in pic dont consider u2_total_mbs_coded to detect new picture */
+ if(ps_dec->u4_first_slice_in_pic == 0)
+ uc_mbs_exceed = 1;
+ }
+
+ if(i1_is_end_of_poc || uc_mbs_exceed)
+ {
+
+ if(1 == ps_dec->u1_last_pic_not_decoded)
+ {
+ ret = ih264d_end_of_pic_dispbuf_mgr(ps_dec);
+
+ if(ret != OK)
+ return ret;
+
+ ret = ih264d_end_of_pic(ps_dec, u1_is_idr_slice, u2_frame_num);
+ if(ret != OK)
+ return ret;
+#if WIN32
+ H264_DEC_DEBUG_PRINT(" ------ PIC SKIPPED ------\n");
+#endif
+ return RET_LAST_SKIP;
+ }
+ else
+ {
+ if((ps_dec->u2_total_mbs_coded
+ < (ps_dec->ps_cur_sps->u2_max_mb_addr + 1)))
+ {
+ H264_DEC_DEBUG_PRINT("Hello\n");
+ ps_dec->u2_total_mbs_coded =
+ ps_dec->ps_cur_sps->u2_max_mb_addr + 1;
+ ps_dec->u1_first_nal_in_pic = 1;
+ ps_dec->u1_first_pb_nal_in_pic = 1;
+ return ERROR_END_OF_FRAME_EXPECTED_T;
+ /*if (ps_cur_slice->u1_field_pic_flag &&
+ ((TOP_FIELD_ONLY | BOT_FIELD_ONLY) == ps_dec->u1_top_bottom_decoded))
+ {
+ ps_cur_slice->u1_end_of_frame_signal = 0;
+ }*/
+ }
+ ret = ih264d_end_of_pic(ps_dec, u1_is_idr_slice, u2_frame_num);
+ if(ret != OK)
+ return ret;
+ }
+
+ }
+ else
+ {
+
+ if(ps_dec->u4_first_slice_in_pic == 1)
+ {
+ /*If the first slice in decode api is not from a new picture,
+ * we will return error code ,as we don't support partial
+ frame decode*/
+ return ERROR_PIC_NUM_IS_REPEATED;
+ }
+ }
+ }
+ ps_cur_slice->u1_end_of_frame_signal = 0;
+ if(u1_field_pic_flag)
+ {
+ /*
+ * Check if the frame number has changed.
+ */
+ H264_DEC_DEBUG_PRINT(
+ "u2_frame_num: %d ps_dec->u2_prv_frame_num: %d ps_dec->u1_top_bottom_decoded: %d\n",
+ u2_frame_num, ps_dec->u2_prv_frame_num,
+ ps_dec->u1_top_bottom_decoded);
+ if((u2_frame_num != ps_dec->u2_prv_frame_num)
+ && (0 != ps_dec->u1_top_bottom_decoded))
+ {
+ if((TOP_FIELD_ONLY | BOT_FIELD_ONLY)
+ != ps_dec->u1_top_bottom_decoded)
+ {
+ H264_DEC_DEBUG_PRINT("Dangling Field, toggling second field\n");
+ ps_dec->u1_second_field = 1 - ps_dec->u1_second_field;
+ ps_dec->u1_dangling_field = 1;
+ /*
+ * Updating the u1_bottom_field_flag since its used in the concealment function.
+ */
+ ps_cur_slice->u1_bottom_field_flag = u1_bottom_field_flag;
+ ps_dec->u2_prv_frame_num = u2_frame_num;
+
+ ret = ih264d_deblock_display(ps_dec);
+ if(ret != OK)
+ return ret;
+
+ /*
+ * The bytes consumed will be handled by the
+ * video_decode function after the error is handled.
+ */
+ return ERROR_DANGLING_FIELD_IN_PIC;
+
+ }
+
+ }
+
+ ps_dec->u2_prv_frame_num = u2_frame_num;
+ }
+
+ if(ps_cur_slice->u1_mmco_equalto5)
+ {
+ WORD32 i4_temp_poc;
+ WORD32 i4_top_field_order_poc, i4_bot_field_order_poc;
+
+ if(!ps_cur_slice->u1_field_pic_flag) // or a complementary field pair
+ {
+ i4_top_field_order_poc = ps_dec->ps_cur_pic->i4_top_field_order_cnt;
+ i4_bot_field_order_poc =
+ ps_dec->ps_cur_pic->i4_bottom_field_order_cnt;
+ i4_temp_poc = MIN(i4_top_field_order_poc,
+ i4_bot_field_order_poc);
+ }
+ else if(!ps_cur_slice->u1_bottom_field_flag)
+ i4_temp_poc = ps_dec->ps_cur_pic->i4_top_field_order_cnt;
+ else
+ i4_temp_poc = ps_dec->ps_cur_pic->i4_bottom_field_order_cnt;
+
+ ps_dec->ps_cur_pic->i4_top_field_order_cnt = i4_temp_poc
+ - ps_dec->ps_cur_pic->i4_top_field_order_cnt;
+ ps_dec->ps_cur_pic->i4_bottom_field_order_cnt = i4_temp_poc
+ - ps_dec->ps_cur_pic->i4_bottom_field_order_cnt;
+ ps_dec->ps_cur_pic->i4_poc = i4_temp_poc;
+ ps_dec->ps_cur_pic->i4_avg_poc = i4_temp_poc;
+ }
+ if(ps_dec->u1_first_nal_in_pic)
+ {
+ ret = ih264d_decode_pic_order_cnt(u1_is_idr_slice, u2_frame_num,
+ &ps_dec->s_prev_pic_poc,
+ &s_tmp_poc, ps_cur_slice, ps_pps,
+ u1_nal_ref_idc,
+ u1_bottom_field_flag,
+ u1_field_pic_flag, &i4_poc);
+ if(ret != OK)
+ return ret;
+ /* Display seq no calculations */
+ if(i4_poc >= ps_dec->i4_max_poc)
+ ps_dec->i4_max_poc = i4_poc;
+ /* IDR Picture or POC wrap around */
+ if(i4_poc == 0)
+ {
+ ps_dec->i4_prev_max_display_seq = ps_dec->i4_prev_max_display_seq
+ + ps_dec->i4_max_poc
+ + ps_dec->u1_max_dec_frame_buffering + 1;
+ ps_dec->i4_max_poc = 0;
+ }
+ }
+
+ /*--------------------------------------------------------------------*/
+ /* Copy the values read from the bitstream to the slice header and then*/
+ /* If the slice is first slice in picture, then do Start of Picture */
+ /* processing. */
+ /*--------------------------------------------------------------------*/
+ ps_cur_slice->i4_delta_pic_order_cnt[0] = i_delta_poc[0];
+ ps_cur_slice->i4_delta_pic_order_cnt[1] = i_delta_poc[1];
+ ps_cur_slice->u4_idr_pic_id = u4_idr_pic_id;
+ ps_cur_slice->u2_first_mb_in_slice = u2_first_mb_in_slice;
+ ps_cur_slice->u1_field_pic_flag = u1_field_pic_flag;
+ ps_cur_slice->u1_bottom_field_flag = u1_bottom_field_flag;
+ ps_cur_slice->u1_slice_type = u1_slice_type;
+ ps_cur_slice->i4_pic_order_cnt_lsb = s_tmp_poc.i4_pic_order_cnt_lsb;
+
+ ps_cur_slice->u1_nal_unit_type = u1_nal_unit_type;
+ ps_cur_slice->u1_redundant_pic_cnt = u1_redundant_pic_cnt;
+ ps_cur_slice->u1_nal_ref_idc = u1_nal_ref_idc;
+ ps_cur_slice->u1_pic_order_cnt_type = u1_pic_order_cnt_type;
+
+ if(ps_seq->u1_frame_mbs_only_flag)
+ ps_cur_slice->u1_direct_8x8_inference_flag =
+ ps_seq->u1_direct_8x8_inference_flag;
+ else
+ ps_cur_slice->u1_direct_8x8_inference_flag = 1;
+
+ if(u1_slice_type == B_SLICE)
+ {
+ ps_cur_slice->u1_direct_spatial_mv_pred_flag = ih264d_get_bit_h264(
+ ps_bitstrm);
+ COPYTHECONTEXT("SH: direct_spatial_mv_pred_flag",
+ ps_cur_slice->u1_direct_spatial_mv_pred_flag);
+
+ if(ps_cur_slice->u1_direct_spatial_mv_pred_flag)
+ ps_cur_slice->pf_decodeDirect = ih264d_decode_spatial_direct;
+ else
+ ps_cur_slice->pf_decodeDirect = ih264d_decode_temporal_direct;
+ if(!((ps_pps->ps_sps->u1_mb_aff_flag) && (!u1_field_pic_flag)))
+ ps_dec->pf_mvpred = ih264d_mvpred_nonmbaffB;
+ }
+ else
+ {
+ if(!((ps_pps->ps_sps->u1_mb_aff_flag) && (!u1_field_pic_flag)))
+ ps_dec->pf_mvpred = ih264d_mvpred_nonmbaff;
+ }
+
+ if(ps_dec->u1_first_nal_in_pic)
+ {
+ ret = ih264d_start_of_pic(ps_dec, i4_poc, &s_tmp_poc, u2_frame_num, ps_pps);
+ if(ret != OK)
+ return ret;
+
+ ps_dec->u4_output_present = 0;
+
+ if(1 == ps_dec->u4_fmt_conv_in_process)
+ {
+ ih264d_get_next_display_field(ps_dec,
+ ps_dec->ps_out_buffer,
+ &(ps_dec->s_disp_op));
+ /* If error code is non-zero then there is no buffer available for display,
+ hence avoid format conversion */
+
+ if(0 != ps_dec->s_disp_op.u4_error_code)
+ {
+ ps_dec->u4_fmt_conv_cur_row = ps_dec->s_disp_frame_info.u4_y_ht;
+ ps_dec->as_fmt_conv_part[0].u4_flag = 0;
+ ps_dec->as_fmt_conv_part[1].u4_flag = 0;
+ }
+ else
+ ps_dec->u4_output_present = 1;
+ }
+ if(ps_dec->u1_separate_parse == 1)
+ {
+ if(ps_dec->u4_dec_thread_created == 0)
+ {
+ ithread_create(ps_dec->pv_dec_thread_handle, NULL,
+ (void *)ih264d_decode_picture_thread,
+ (void *)ps_dec);
+
+ ps_dec->u4_dec_thread_created = 1;
+ }
+
+ if((ps_dec->u4_num_cores == 3) && (ps_dec->u4_app_disable_deblk_frm == 0)
+ && (ps_dec->u4_bs_deblk_thread_created == 0))
+ {
+ ps_dec->u4_start_bs_deblk = 0;
+ ithread_create(ps_dec->pv_bs_deblk_thread_handle, NULL,
+ (void *)ih264d_computebs_deblk_thread,
+ (void *)ps_dec);
+ ps_dec->u4_bs_deblk_thread_created = 1;
+ }
+ }
+
+ }
+
+ /* INITIALIZATION of fn ptrs for MC and formMbPartInfo functions */
+ {
+ UWORD8 uc_nofield_nombaff = 1; // = ((ps_dec->ps_sps->u1_profile_idc == 0x42) || (u1_slice_type == I_SLICE));
+
+
+
+ uc_nofield_nombaff = ((ps_dec->ps_cur_slice->u1_field_pic_flag == 0)
+ && (ps_dec->ps_cur_slice->u1_mbaff_frame_flag == 0)
+ && (u1_slice_type != B_SLICE)
+ && (ps_dec->ps_cur_pps->u1_wted_pred_flag == 0));
+
+ /* Initialise MC and formMbPartInfo fn ptrs one time based on profile_idc */
+
+ if(uc_nofield_nombaff)
+ {
+ ps_dec->p_form_mb_part_info = ih264d_form_mb_part_info_bp;
+ ps_dec->p_motion_compensate = ih264d_motion_compensate_bp;
+ }
+ else
+ {
+ ps_dec->p_form_mb_part_info = ih264d_form_mb_part_info_mp;
+ ps_dec->p_motion_compensate = ih264d_motion_compensate_mp;
+ }
+
+
+ }
+
+ /*
+ * Decide whether to decode the current picture or not
+ */
+ {
+ dec_err_status_t * ps_err = ps_dec->ps_dec_err_status;
+ if(ps_err->u4_frm_sei_sync == u2_frame_num)
+ {
+ ps_err->u1_err_flag = ACCEPT_ALL_PICS;
+ ps_err->u4_frm_sei_sync = SYNC_FRM_DEFAULT;
+ }
+ ps_err->u4_cur_frm = u2_frame_num;
+ }
+
+ /* Decision for decoding if the picture is to be skipped */
+ {
+ WORD32 i4_skip_b_pic, i4_skip_p_pic;
+
+ i4_skip_b_pic = (ps_dec->u4_skip_frm_mask & B_SLC_BIT)
+ && (B_SLICE == u1_slice_type) && (0 == u1_nal_ref_idc);
+
+ i4_skip_p_pic = (ps_dec->u4_skip_frm_mask & P_SLC_BIT)
+ && (P_SLICE == u1_slice_type) && (0 == u1_nal_ref_idc);
+
+ /**************************************************************/
+ /* Skip the B picture if skip mask is set for B picture and */
+ /* Current B picture is a non reference B picture or there is */
+ /* no user for reference B picture */
+ /**************************************************************/
+ if(i4_skip_b_pic)
+ {
+ ps_dec->ps_cur_pic->u4_pack_slc_typ |= B_SLC_BIT;
+ /* Don't decode the picture in SKIP-B mode if that picture is B */
+ /* and also it is not to be used as a reference picture */
+ ps_dec->u1_last_pic_not_decoded = 1;
+
+ return OK;
+ }
+ /**************************************************************/
+ /* Skip the P picture if skip mask is set for P picture and */
+ /* Current P picture is a non reference P picture or there is */
+ /* no user for reference P picture */
+ /**************************************************************/
+ if(i4_skip_p_pic)
+ {
+ ps_dec->ps_cur_pic->u4_pack_slc_typ |= P_SLC_BIT;
+ /* Don't decode the picture in SKIP-P mode if that picture is P */
+ /* and also it is not to be used as a reference picture */
+ ps_dec->u1_last_pic_not_decoded = 1;
+
+ return OK;
+ }
+ }
+
+ {
+ UWORD16 u2_mb_x, u2_mb_y;
+
+ ps_dec->i4_submb_ofst = ((u2_first_mb_in_slice
+ << ps_cur_slice->u1_mbaff_frame_flag) * SUB_BLK_SIZE)
+ - SUB_BLK_SIZE;
+ if(u2_first_mb_in_slice)
+ {
+ UWORD8 u1_mb_aff;
+ UWORD8 u1_field_pic;
+ UWORD16 u2_frm_wd_in_mbs;
+ u2_frm_wd_in_mbs = ps_seq->u2_frm_wd_in_mbs;
+ u1_mb_aff = ps_cur_slice->u1_mbaff_frame_flag;
+ u1_field_pic = ps_cur_slice->u1_field_pic_flag;
+
+ {
+ UWORD32 x_offset;
+ UWORD32 y_offset;
+ UWORD32 u4_frame_stride;
+ tfr_ctxt_t *ps_trns_addr; // = &ps_dec->s_tran_addrecon_parse;
+
+ if(ps_dec->u1_separate_parse)
+ {
+ ps_trns_addr = &ps_dec->s_tran_addrecon_parse;
+ }
+ else
+ {
+ ps_trns_addr = &ps_dec->s_tran_addrecon;
+ }
+ u2_mb_x = MOD(u2_first_mb_in_slice, u2_frm_wd_in_mbs);
+ u2_mb_y = DIV(u2_first_mb_in_slice, u2_frm_wd_in_mbs);
+
+ u2_mb_y <<= u1_mb_aff;
+
+ if((u2_mb_x > u2_frm_wd_in_mbs - 1)
+ || (u2_mb_y > ps_dec->u2_frm_ht_in_mbs - 1))
+ {
+ return ERROR_CORRUPTED_SLICE;
+ }
+
+ u4_frame_stride = ps_dec->u2_frm_wd_y << u1_field_pic;
+ x_offset = u2_mb_x << 4;
+ y_offset = (u2_mb_y * u4_frame_stride) << 4;
+
+ ps_trns_addr->pu1_dest_y = ps_dec->s_cur_pic.pu1_buf1 + x_offset
+ + y_offset;
+
+ u4_frame_stride = ps_dec->u2_frm_wd_uv << u1_field_pic;
+ x_offset >>= 1;
+ y_offset = (u2_mb_y * u4_frame_stride) << 3;
+
+ x_offset *= YUV420SP_FACTOR;
+
+ ps_trns_addr->pu1_dest_u = ps_dec->s_cur_pic.pu1_buf2 + x_offset
+ + y_offset;
+ ps_trns_addr->pu1_dest_v = ps_dec->s_cur_pic.pu1_buf3 + x_offset
+ + y_offset;
+
+ ps_trns_addr->pu1_mb_y = ps_trns_addr->pu1_dest_y;
+ ps_trns_addr->pu1_mb_u = ps_trns_addr->pu1_dest_u;
+ ps_trns_addr->pu1_mb_v = ps_trns_addr->pu1_dest_v;
+
+ if(ps_dec->u4_mb_level_deblk == 1)
+ {
+ /*If it is not the first mb in row,the previous MB which needs to be deblocked
+ * as there is delay of 1 MB*/
+ if(u2_mb_x != 0)
+ {
+ ps_trns_addr->pu1_mb_y -= MB_SIZE;
+ ps_trns_addr->pu1_mb_u -= BLK8x8SIZE * YUV420SP_FACTOR;
+ ps_trns_addr->pu1_mb_v -= BLK8x8SIZE;
+ }
+ }
+
+ // assign the deblock structure pointers to start of slice
+ if(ps_dec->u1_separate_parse == 1)
+ {
+ ps_dec->ps_deblk_mbn = ps_dec->ps_deblk_pic
+ + (u2_first_mb_in_slice << u1_mb_aff);
+ }
+ else
+ {
+ if(ps_dec->u4_mb_level_deblk == 0)
+ ps_dec->ps_deblk_mbn = ps_dec->ps_deblk_pic
+ + (u2_first_mb_in_slice << u1_mb_aff);
+ }
+
+ ps_dec->u2_cur_mb_addr = (u2_first_mb_in_slice << u1_mb_aff);
+
+ ps_dec->ps_mv_cur = ps_dec->s_cur_pic.ps_mv
+ + ((u2_first_mb_in_slice << u1_mb_aff) << 4);
+ }
+ }
+ else
+ {
+ tfr_ctxt_t *ps_trns_addr;
+
+ if(ps_dec->u1_separate_parse)
+ {
+ ps_trns_addr = &ps_dec->s_tran_addrecon_parse;
+ }
+ else
+ {
+ ps_trns_addr = &ps_dec->s_tran_addrecon;
+ }
+
+ u2_mb_x = 0xffff;
+ u2_mb_y = 0;
+ // assign the deblock structure pointers to start of slice
+ ps_dec->u2_cur_mb_addr = 0;
+ ps_dec->ps_deblk_mbn = ps_dec->ps_deblk_pic;
+ if(ps_dec->u4_mb_level_deblk == 1)
+ {
+ ps_dec->ps_deblk_mbn_curr = ps_dec->ps_deblk_mbn;
+ ps_dec->ps_deblk_mbn_prev = ps_dec->ps_deblk_mbn
+ + ps_dec->u1_recon_mb_grp;
+ }
+ ps_dec->ps_mv_cur = ps_dec->s_cur_pic.ps_mv;
+ ps_trns_addr->pu1_dest_y = ps_dec->s_cur_pic.pu1_buf1;
+ ps_trns_addr->pu1_dest_u = ps_dec->s_cur_pic.pu1_buf2;
+ ps_trns_addr->pu1_dest_v = ps_dec->s_cur_pic.pu1_buf3;
+
+ ps_trns_addr->pu1_mb_y = ps_trns_addr->pu1_dest_y;
+ ps_trns_addr->pu1_mb_u = ps_trns_addr->pu1_dest_u;
+ ps_trns_addr->pu1_mb_v = ps_trns_addr->pu1_dest_v;
+
+ }
+
+ ps_dec->ps_part = ps_dec->ps_parse_part_params;
+
+ ps_dec->u2_mbx =
+ (MOD(u2_first_mb_in_slice - 1, ps_seq->u2_frm_wd_in_mbs));
+ ps_dec->u2_mby =
+ (DIV(u2_first_mb_in_slice - 1, ps_seq->u2_frm_wd_in_mbs));
+ ps_dec->u2_mby <<= ps_cur_slice->u1_mbaff_frame_flag;
+ ps_dec->i2_prev_slice_mbx = ps_dec->u2_mbx;
+ ps_dec->i2_prev_slice_mby = ps_dec->u2_mby;
+ }
+
+ /* RBSP stop bit is used for CABAC decoding*/
+ ps_bitstrm->u4_max_ofst += ps_dec->ps_cur_pps->u1_entropy_coding_mode;
+
+ ps_dec->u1_B = (u1_slice_type == B_SLICE);
+ ps_dec->u4_next_mb_skip = 0;
+
+ ps_dec->ps_parse_cur_slice->u4_num_mbs_done_in_slice = 0;
+ ps_dec->ps_parse_cur_slice->u4_first_mb_in_slice =
+ ps_dec->ps_cur_slice->u2_first_mb_in_slice;
+ ps_dec->ps_parse_cur_slice->slice_type =
+ ps_dec->ps_cur_slice->u1_slice_type;
+ ps_dec->ps_parse_cur_slice->end_of_slice = 0;
+ ps_dec->ps_parse_cur_slice->last_slice_in_frame = 0;
+
+
+ /*set to zero to indicate a valid slice has been decoded*/
+ ps_dec->u4_first_slice_in_pic = 0;
+
+ ps_dec->u4_start_frame_decode = 1;
+
+
+ ps_dec->u4_start_bs_deblk = 1;
+
+ ps_dec->ps_parse_cur_slice->u2_error_flag = 0;
+ {
+ WORD32 num_entries;
+ WORD32 size;
+ UWORD8 *pu1_buf;
+
+ num_entries = MIN(MAX_FRAMES, ps_dec->u4_num_ref_frames_at_init);
+ num_entries = 2 * ((2 * num_entries) + 1);
+
+ size = num_entries * sizeof(void *);
+ size += PAD_MAP_IDX_POC * sizeof(void *);
+
+ pu1_buf = (UWORD8 *)ps_dec->pv_map_ref_idx_to_poc_buf;
+ pu1_buf += size * ps_dec->u2_cur_slice_num;
+ ps_dec->ps_parse_cur_slice->ppv_map_ref_idx_to_poc = (volatile void **)pu1_buf;
+ }
+
+ if(u1_slice_type == I_SLICE)
+ {
+ ps_dec->ps_cur_pic->u4_pack_slc_typ |= I_SLC_BIT;
+
+ ret = ih264d_parse_islice(ps_dec, u2_first_mb_in_slice);
+ if(ret != OK)
+ return ret;
+
+ if(ps_dec->i4_pic_type != B_SLICE && ps_dec->i4_pic_type != P_SLICE)
+ ps_dec->i4_pic_type = I_SLICE;
+
+ }
+ else if(u1_slice_type == P_SLICE)
+ {
+ ps_dec->ps_cur_pic->u4_pack_slc_typ |= P_SLC_BIT;
+ ret = ih264d_parse_pslice(ps_dec, u2_first_mb_in_slice);
+ if(ret != OK)
+ return ret;
+ ps_dec->u1_pr_sl_type = u1_slice_type;
+ if(ps_dec->i4_pic_type != B_SLICE)
+ ps_dec->i4_pic_type = P_SLICE;
+ }
+ else if(u1_slice_type == B_SLICE)
+ {
+ ps_dec->ps_cur_pic->u4_pack_slc_typ |= B_SLC_BIT;
+ ret = ih264d_parse_bslice(ps_dec, u2_first_mb_in_slice);
+ if(ret != OK)
+ return ret;
+ ps_dec->u1_pr_sl_type = u1_slice_type;
+ ps_dec->i4_pic_type = B_SLICE;
+ }
+ else
+ return ERROR_INV_SLC_TYPE_T;
+
+ ps_dec->ps_parse_cur_slice->end_of_slice = 1;
+
+ ps_dec->u2_cur_slice_num++;
+ /* storing last Mb X and MbY of the slice */
+ ps_dec->i2_prev_slice_mbx = ps_dec->u2_mbx;
+ ps_dec->i2_prev_slice_mby = ps_dec->u2_mby;
+ /* End of Picture detection */
+
+ if(ps_dec->u2_total_mbs_coded >= (ps_seq->u2_max_mb_addr + 1))
+ {
+ ps_dec->u1_pic_decode_done = 1;
+
+ }
+
+ {
+ dec_err_status_t * ps_err = ps_dec->ps_dec_err_status;
+ if((ps_err->u1_err_flag & REJECT_PB_PICS)
+ && (ps_err->u1_cur_pic_type == PIC_TYPE_I))
+ {
+ ps_err->u1_err_flag = ACCEPT_ALL_PICS;
+ }
+ }
+
+ PRINT_BIN_BIT_RATIO(ps_dec)
+
+ return OK;
+}
+
diff --git a/decoder/ih264d_parse_slice.h b/decoder/ih264d_parse_slice.h
new file mode 100755
index 0000000..cf5f9ce
--- /dev/null
+++ b/decoder/ih264d_parse_slice.h
@@ -0,0 +1,47 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef _IH264D_PARSE_SLICE_H_
+#define _IH264D_PARSE_SLICE_H_
+/*!
+ **************************************************************************
+ * \file ih264d_parse_slice.h
+ *
+ * \brief
+ * Contains routines that decodes a slice NAL unit
+ *
+ * \date
+ * 19/12/2002
+ *
+ * \author AI
+ **************************************************************************
+ */
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_structs.h"
+#include "ih264d_error_handler.h"
+WORD32 ih264d_parse_decode_slice(UWORD8 u1_is_idr_slice,
+ UWORD8 u1_nal_ref_idc,
+ dec_struct_t * ps_dec );
+
+WORD32 ih264d_ref_idx_reordering(dec_struct_t * ps_dec, UWORD8 u1_isB);
+WORD32 ih264d_read_mmco_commands(dec_struct_t * ps_dec);
+void ih264d_form_pred_weight_matrix(dec_struct_t *ps_dec);
+#endif /* _IH264D_PARSE_SLICE_H_ */
diff --git a/decoder/ih264d_process_bslice.c b/decoder/ih264d_process_bslice.c
new file mode 100755
index 0000000..69199cf
--- /dev/null
+++ b/decoder/ih264d_process_bslice.c
@@ -0,0 +1,2345 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+ **************************************************************************
+ * \file ih264d_process_bslice.c
+ *
+ * \brief
+ * Contains routines that decode B slice type
+ *
+ * Detailed_description
+ *
+ * \date
+ * 21/12/2002
+ *
+ * \author NS
+ **************************************************************************
+ */
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+
+#include <string.h>
+#include "ih264d_structs.h"
+#include "ih264d_bitstrm.h"
+#include "ih264d_parse_cavlc.h"
+#include "ih264d_mb_utils.h"
+#include "ih264d_mvpred.h"
+#include "ih264d_inter_pred.h"
+#include "ih264d_process_pslice.h"
+#include "ih264d_error_handler.h"
+#include "ih264d_tables.h"
+#include "ih264d_parse_slice.h"
+#include "ih264d_process_pslice.h"
+#include "ih264d_process_bslice.h"
+#include "ih264d_tables.h"
+#include "ih264d_parse_islice.h"
+#include "ih264d_mvpred.h"
+
+void ih264d_init_cabac_contexts(UWORD8 u1_slice_type, dec_struct_t * ps_dec);
+//UWORD32 g_hits = 0;
+//UWORD32 g_miss = 0;
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_decode_spatial_direct \endif
+ *
+ * \brief
+ * Decodes spatial direct mode.
+ *
+ * \return
+ * None.
+ * Arunoday T
+ **************************************************************************
+ */
+WORD32 ih264d_decode_spatial_direct(dec_struct_t * ps_dec,
+ UWORD8 u1_wd_x,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num)
+{
+ mv_pred_t s_mv_pred, *ps_mv;
+ UWORD8 u1_col_zero_flag, u1_sub_mb_num, u1_direct_zero_pred_flag = 0;
+ UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
+ mv_pred_t *ps_mv_ntop_start;
+ mv_pred_t *ps_mv_nmb_start = ps_dec->ps_mv_cur + (u1_mb_num << 4);
+ UWORD8 partition_size, sub_partition, u1_mb_partw, u1_mb_parth;
+ UWORD8 i;
+ WORD8 i1_pred, i1_ref_frame0, i1_ref_frame1;
+ struct pic_buffer_t *ps_ref_frame = NULL, *ps_col_pic, *ps_pic_buff0 = NULL,
+ *ps_pic_buff1 = NULL;
+
+ UWORD8 u1_zero_pred_cond_f, u1_zero_pred_cond_b;
+ WORD16 i2_def_mv[2], i2_spat_pred_mv[4], *pi2_final_mv0, *pi2_final_mv1;
+ UWORD16 ui2_mask_fwd = 0, ui2_mask_bwd = 0, u2_mask = 0;
+ UWORD32 *pui32_weight_ofsts = NULL;
+ directmv_t s_mvdirect;
+ UWORD8 u1_colz;
+ UWORD8 u1_final_ref_idx = 0;
+ const UWORD8 *pu1_mb_parth = (const UWORD8 *)gau1_ih264d_mb_parth;
+ const UWORD8 *pu1_mb_partw = (const UWORD8 *)gau1_ih264d_mb_partw;
+ const UWORD16 sub_mask_table[] =
+ { 0x33, 0x3, 0x11, 0x1 };
+ const UWORD16 mask_table[] =
+ { 0xffff, /*16x16 NA */
+ 0xff, /* 16x8*/
+ 0x3333, /* 8x16*/
+ 0x33 };/* 8x8*/
+ mv_pred_t s_temp_mv_pred;
+ WORD32 ret = 0;
+
+ /* CHANGED CODE */
+ ps_mv_ntop_start = ps_dec->ps_mv_cur + (u1_mb_num << 4)
+ - (ps_dec->u2_frm_wd_in_mbs << (4 + u1_mbaff)) + 12;
+
+ /* assign default values for MotionVector as zero */
+ i2_def_mv[0] = 0;
+ i2_def_mv[1] = 0;
+
+ u1_direct_zero_pred_flag = ps_dec->pf_mvpred(ps_dec, ps_cur_mb_info, ps_mv_nmb_start,
+ ps_mv_ntop_start, &s_mv_pred, 0, 4,
+ 0, 1, B_DIRECT_SPATIAL);
+
+ i2_spat_pred_mv[0] = s_mv_pred.i2_mv[0];
+ i2_spat_pred_mv[1] = s_mv_pred.i2_mv[1];
+ i2_spat_pred_mv[2] = s_mv_pred.i2_mv[2];
+ i2_spat_pred_mv[3] = s_mv_pred.i2_mv[3];
+
+ i1_ref_frame0 = s_mv_pred.i1_ref_frame[0];
+ i1_ref_frame1 = s_mv_pred.i1_ref_frame[1];
+
+ i1_ref_frame0 = (i1_ref_frame0 < 0) ? -1 : i1_ref_frame0;
+ i1_ref_frame1 = (i1_ref_frame1 < 0) ? -1 : i1_ref_frame1;
+
+ i1_pred = 0;
+
+ {
+ WORD8 u1_ref_idx, u1_ref_idx1;
+ UWORD32 uc_Idx, uc_Idx1;
+ UWORD8 u1_scale_ref = (ps_dec->ps_cur_slice->u1_mbaff_frame_flag
+ && ps_cur_mb_info->u1_mb_field_decodingflag);
+ u1_final_ref_idx = i1_ref_frame0;
+ if(i1_ref_frame0 >= 0)
+ {
+ /* convert RefIdx if it is MbAff */
+ u1_ref_idx = i1_ref_frame0;
+ u1_ref_idx1 = i1_ref_frame0;
+ if(u1_scale_ref)
+ {
+ u1_ref_idx1 = u1_ref_idx >> 1;
+ if((u1_ref_idx & 0x01) != (1 - ps_cur_mb_info->u1_topmb))
+ u1_ref_idx1 += MAX_REF_BUFS;
+ }
+ /* If i1_ref_frame0 < 0 then refIdxCol is obtained from ps_pic_buff1 */
+ ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][u1_ref_idx1];
+ ps_ref_frame = ps_pic_buff0;
+ i1_pred = PRED_L0;
+ }
+
+ if(i1_ref_frame1 >= 0)
+ {
+ /* convert RefIdx if it is MbAff */
+ u1_ref_idx = i1_ref_frame1;
+ u1_ref_idx1 = i1_ref_frame1;
+ if(u1_scale_ref)
+ {
+ u1_ref_idx1 = u1_ref_idx >> 1;
+ if((u1_ref_idx & 0x01) != (1 - ps_cur_mb_info->u1_topmb))
+ u1_ref_idx1 += MAX_REF_BUFS;
+ }
+ ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][u1_ref_idx1];
+ i1_pred = i1_pred | PRED_L1;
+ }
+ if(i1_ref_frame0 < 0)
+ {
+ ps_ref_frame = ps_pic_buff1;
+ u1_final_ref_idx = i1_ref_frame1;
+ }
+
+ u1_zero_pred_cond_f = (u1_direct_zero_pred_flag) || (i1_ref_frame0 < 0);
+ u1_zero_pred_cond_b = (u1_direct_zero_pred_flag) || (i1_ref_frame1 < 0);
+
+ if(ps_dec->ps_cur_pps->u1_wted_bipred_idc)
+ {
+ uc_Idx = ((i1_ref_frame0 < 1) ? 0 : i1_ref_frame0)
+ * ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1];
+ if(u1_scale_ref)
+ uc_Idx >>= 1;
+ uc_Idx1 = (i1_ref_frame1 < 0) ? 0 : i1_ref_frame1;
+ uc_Idx += (u1_scale_ref) ? (uc_Idx1 >> 1) : uc_Idx1;
+ pui32_weight_ofsts =
+ (UWORD32*)&ps_dec->pu4_wt_ofsts[2 * X3(uc_Idx)];
+
+ if(i1_ref_frame0 < 0)
+ pui32_weight_ofsts += 1;
+
+ if(u1_scale_ref && (ps_dec->ps_cur_pps->u1_wted_bipred_idc == 2))
+ {
+ WORD16 i2_ref_idx;
+ i2_ref_idx = MAX(i1_ref_frame0, 0);
+ i2_ref_idx *= (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]
+ << 1);
+ i2_ref_idx += MAX(i1_ref_frame1, 0);
+ if(!ps_cur_mb_info->u1_topmb)
+ i2_ref_idx +=
+ (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]
+ << 1)
+ * (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]
+ << 1);
+ pui32_weight_ofsts = (UWORD32*)&ps_dec->pu4_mbaff_wt_mat[2
+ * X3(i2_ref_idx)];
+ }
+ }
+ }
+
+ s_temp_mv_pred.i1_ref_frame[0] = i1_ref_frame0;
+ s_temp_mv_pred.i1_ref_frame[1] = i1_ref_frame1;
+ s_temp_mv_pred.u1_col_ref_pic_idx = ps_ref_frame->u1_mv_buf_id;
+ s_temp_mv_pred.u1_pic_type = ps_ref_frame->u1_pic_type;
+
+ /**********************************************************************/
+ /* Call the function which gets the number of partitions and */
+ /* partition info of colocated Mb */
+ /**********************************************************************/
+
+ ps_dec->pf_parse_mvdirect(ps_dec, ps_dec->ps_col_pic, &s_mvdirect, u1_wd_x,
+ ps_dec->i4_submb_ofst, ps_cur_mb_info);
+ ps_col_pic = ps_dec->ps_col_pic;
+ if((s_mvdirect.u1_col_zeroflag_change == 0) || u1_direct_zero_pred_flag)
+ {
+ WORD16 i2_mv_x, i2_mv_y, i2_mvX1, i2_mvY1;
+ /* Most probable case */
+ u1_col_zero_flag = *(ps_col_pic->pu1_col_zero_flag
+ + s_mvdirect.i4_mv_indices[0]);
+ u1_col_zero_flag = u1_col_zero_flag & 0x01;
+
+ if(u1_zero_pred_cond_f || ((i1_ref_frame0 == 0) && (u1_col_zero_flag == 1)))
+ {
+ i2_mv_x = 0;
+ i2_mv_y = 0;
+ }
+ else
+ {
+ i2_mv_x = i2_spat_pred_mv[0];
+ i2_mv_y = i2_spat_pred_mv[1];
+
+ }
+
+ if(u1_zero_pred_cond_b || ((i1_ref_frame1 == 0) && (u1_col_zero_flag == 1)))
+ {
+ i2_mvX1 = 0;
+ i2_mvY1 = 0;
+ }
+ else
+ {
+ i2_mvX1 = i2_spat_pred_mv[2];
+ i2_mvY1 = i2_spat_pred_mv[3];
+ }
+
+ u1_sub_mb_num = ps_dec->u1_sub_mb_num;
+ u1_mb_partw = (u1_wd_x >> 2);
+
+
+ if(i1_ref_frame0 >= 0)
+ {
+ {
+ pred_info_pkd_t *ps_pred_pkd;
+ WORD16 i2_mv[2];
+ WORD8 i1_ref_idx= 0;
+
+ i2_mv[0] = i2_mv_x;
+ i2_mv[1] = i2_mv_y;
+
+ ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
+ ih264d_fill_pred_info(i2_mv,u1_mb_partw,u1_mb_partw,u1_sub_mb_num,i1_pred,
+ ps_pred_pkd,ps_pic_buff0->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts,
+ ps_pic_buff0->u1_pic_type);
+ ps_dec->u4_pred_info_pkd_idx++;
+ ps_cur_mb_info->u1_num_pred_parts++;
+
+
+ }
+
+ }
+
+ if(i1_ref_frame1 >= 0)
+ {
+ {
+ pred_info_pkd_t *ps_pred_pkd;
+ WORD16 i2_mv[2];
+ WORD8 i1_ref_idx= 0;
+
+ i2_mv[0] = i2_mvX1;
+ i2_mv[1] = i2_mvY1;
+
+ ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
+ ih264d_fill_pred_info(i2_mv,u1_mb_partw,u1_mb_partw,u1_sub_mb_num,i1_pred,
+ ps_pred_pkd,ps_pic_buff1->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts,
+ ps_pic_buff1->u1_pic_type);
+ ps_dec->u4_pred_info_pkd_idx++;
+ ps_cur_mb_info->u1_num_pred_parts++;
+
+
+ }
+ }
+
+
+ /* Replication optimisation */
+ s_temp_mv_pred.i2_mv[0] = i2_mv_x;
+ s_temp_mv_pred.i2_mv[1] = i2_mv_y;
+ s_temp_mv_pred.i2_mv[2] = i2_mvX1;
+ s_temp_mv_pred.i2_mv[3] = i2_mvY1;
+
+ /* Calculating colocated zero information */
+ {
+ /*************************************/
+ /* If(bit2 and bit3 set) */
+ /* then */
+ /* (bit0 and bit1) => submmbmode */
+ /* (bit2 and bit3) => mbmode */
+ /* else */
+ /* (bit0 and bit1) => mbmode */
+ /*************************************/
+ /*UWORD8 u1_packed_mb_sub_mb_mode = sub_partition ?
+ (s_mvdirect.i1_partitionsize[0]) : ((s_mvdirect.i1_partitionsize[0]) << 2);*/
+ UWORD8 u1_packed_mb_sub_mb_mode = (u1_mb_partw == 2) ? 0x03 : 0;
+
+ if(i1_ref_frame0 < 0)
+ {
+ i2_mv_x = i2_mvX1;
+ i2_mv_y = i2_mvY1;
+ }
+
+ /* Change from left shift 4 to 6 - Varun */
+ u1_colz = (ps_cur_mb_info->u1_mb_field_decodingflag << 1)
+ | ((u1_final_ref_idx == 0) && (ABS(i2_mv_x) <= 1)
+ && (ABS(i2_mv_y) <= 1));
+ u1_colz |= (u1_packed_mb_sub_mb_mode << 6);
+ }
+ ps_mv = ps_mv_nmb_start + u1_sub_mb_num;
+ ih264d_rep_mv_colz(ps_dec, &s_temp_mv_pred, ps_mv, u1_sub_mb_num, u1_colz,
+ u1_mb_partw, u1_mb_partw);
+ if(u1_wd_x == MB_SIZE)
+ ps_dec->u1_currB_type = 0;
+
+
+
+ return OK;
+ }
+ /***************************************************************************/
+ /* If present MB is 16x16 and the partition of colocated Mb is >= PRED_8x8 */
+ /* i.e 8x8 or less than 8x8 partitions then set up DMA for (0,0) and */
+ /* spatially predicted motion vector and do the multiplexing after */
+ /* motion compensation */
+ /***************************************************************************/
+
+
+ if((u1_wd_x == MB_SIZE) && (s_mvdirect.i1_num_partitions > 2))
+ {
+ ps_cur_mb_info->u1_Mux = 1;
+ if(i1_ref_frame0 >= 0)
+ {
+
+ {
+ pred_info_pkd_t *ps_pred_pkd;
+ WORD8 i1_ref_idx= 0;
+
+ ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
+ ih264d_fill_pred_info(&(i2_spat_pred_mv[0]),4,4,0,i1_pred,
+ ps_pred_pkd,ps_pic_buff0->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts,
+ ps_pic_buff0->u1_pic_type);
+ ps_dec->u4_pred_info_pkd_idx++;
+ ps_cur_mb_info->u1_num_pred_parts++;
+
+
+ }
+
+ /****** (0,0) Motion vectors DMA *****/
+ {
+ pred_info_pkd_t *ps_pred_pkd;
+ WORD16 i2_mv[2];
+ WORD8 i1_ref_idx= 0;
+
+ i2_mv[0] = 0;
+ i2_mv[1] = 0;
+
+ ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
+ ih264d_fill_pred_info(i2_mv,4,4,0,i1_pred,
+ ps_pred_pkd,ps_pic_buff0->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts,
+ ps_pic_buff0->u1_pic_type);
+ ps_dec->u4_pred_info_pkd_idx++;
+ ps_cur_mb_info->u1_num_pred_parts++;
+
+
+ }
+ }
+ if(i1_ref_frame1 >= 0)
+ {
+ {
+ pred_info_pkd_t *ps_pred_pkd;
+ WORD16 i2_mv[2];
+ WORD8 i1_ref_idx= 0;
+
+ ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
+ ih264d_fill_pred_info(&(i2_spat_pred_mv[2]),4,4,0,i1_pred,
+ ps_pred_pkd,ps_pic_buff1->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts,
+ ps_pic_buff1->u1_pic_type);
+ ps_dec->u4_pred_info_pkd_idx++;
+ ps_cur_mb_info->u1_num_pred_parts++;
+
+
+ }
+
+ /****** (0,0) Motion vectors DMA *****/
+
+ {
+ pred_info_pkd_t *ps_pred_pkd;
+ WORD16 i2_mv[2];
+ WORD8 i1_ref_idx= 0;
+
+ i2_mv[0] = 0;
+ i2_mv[1] = 0;
+
+ ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
+ ih264d_fill_pred_info(i2_mv,4,4,0,i1_pred,
+ ps_pred_pkd,ps_pic_buff1->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts,
+ ps_pic_buff1->u1_pic_type);
+ ps_dec->u4_pred_info_pkd_idx++;
+ ps_cur_mb_info->u1_num_pred_parts++;
+
+
+ }
+ }
+ }
+
+ /*u1_col = *(ps_col_pic->pu1_col_zero_flag + s_mvdirect.i4_mv_indices[0]);
+ u1_col &= 1;
+ u1_init = 0;*/
+
+ for(i = 0; i < s_mvdirect.i1_num_partitions; i++)
+ {
+ partition_size = s_mvdirect.i1_partitionsize[i];
+ u1_sub_mb_num = s_mvdirect.i1_submb_num[i];
+
+ sub_partition = partition_size >> 2;
+ partition_size &= 0x3;
+ u1_mb_partw = pu1_mb_partw[partition_size];
+ u1_mb_parth = pu1_mb_parth[partition_size];
+ u2_mask = mask_table[partition_size];
+ if(sub_partition != 0)
+ {
+ u1_mb_partw >>= 1;
+ u1_mb_parth >>= 1;
+ u2_mask = sub_mask_table[partition_size];
+ }
+
+ u1_col_zero_flag = *(ps_col_pic->pu1_col_zero_flag
+ + s_mvdirect.i4_mv_indices[i]);
+ u1_col_zero_flag = u1_col_zero_flag & 0x01;
+
+ /*if(u1_col != u1_col_zero_flag)
+ u1_init = 1;*/
+
+ if(u1_zero_pred_cond_f || ((i1_ref_frame0 == 0) && (u1_col_zero_flag == 1)))
+ {
+ pi2_final_mv0 = &i2_def_mv[0];
+ ui2_mask_fwd |= (u2_mask << u1_sub_mb_num);
+ }
+ else
+ pi2_final_mv0 = &i2_spat_pred_mv[0];
+
+ if(u1_zero_pred_cond_b || ((i1_ref_frame1 == 0) && (u1_col_zero_flag == 1)))
+ {
+ pi2_final_mv1 = &i2_def_mv[0];
+ ui2_mask_bwd |= (u2_mask << u1_sub_mb_num);
+ }
+ else
+ pi2_final_mv1 = &i2_spat_pred_mv[2];
+
+ if(ps_cur_mb_info->u1_Mux != 1)
+ {
+ /*u1_sub_mb_x = u1_sub_mb_num & 0x03;
+ uc_sub_mb_y = (u1_sub_mb_num >> 2);*/
+ if(i1_ref_frame0 >= 0)
+ {
+
+ {
+ pred_info_pkd_t *ps_pred_pkd;
+ WORD8 i1_ref_idx= 0;
+
+ ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
+ ih264d_fill_pred_info(pi2_final_mv0,u1_mb_partw,u1_mb_parth,u1_sub_mb_num,i1_pred,
+ ps_pred_pkd,ps_pic_buff0->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts,
+ ps_pic_buff0->u1_pic_type);
+ ps_dec->u4_pred_info_pkd_idx++;
+ ps_cur_mb_info->u1_num_pred_parts++;
+
+
+ }
+
+ }
+
+ if(i1_ref_frame1 >= 0)
+ {
+ {
+ pred_info_pkd_t *ps_pred_pkd;
+ WORD8 i1_ref_idx= 0;
+
+ ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
+ ih264d_fill_pred_info(pi2_final_mv1,u1_mb_partw,u1_mb_parth,u1_sub_mb_num,i1_pred,
+ ps_pred_pkd,ps_pic_buff1->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts,
+ ps_pic_buff1->u1_pic_type);
+ ps_dec->u4_pred_info_pkd_idx++;
+ ps_cur_mb_info->u1_num_pred_parts++;
+
+
+ }
+ }
+ }
+
+ /* Replication optimisation */
+ s_temp_mv_pred.i2_mv[0] = pi2_final_mv0[0];
+ s_temp_mv_pred.i2_mv[1] = pi2_final_mv0[1];
+ s_temp_mv_pred.i2_mv[2] = pi2_final_mv1[0];
+ s_temp_mv_pred.i2_mv[3] = pi2_final_mv1[1];
+
+ /* Calculating colocated zero information */
+ {
+ WORD16 i2_mv_x = 0, i2_mv_y = 0;
+ /*************************************/
+ /* If(bit2 and bit3 set) */
+ /* then */
+ /* (bit0 and bit1) => submmbmode */
+ /* (bit2 and bit3) => mbmode */
+ /* else */
+ /* (bit0 and bit1) => mbmode */
+ /*************************************/
+ UWORD8 u1_packed_mb_sub_mb_mode =
+ sub_partition ? (s_mvdirect.i1_partitionsize[i]) : ((s_mvdirect.i1_partitionsize[i])
+ << 2);
+
+ if(i1_ref_frame0 >= 0)
+ {
+ i2_mv_x = pi2_final_mv0[0];
+ i2_mv_y = pi2_final_mv0[1];
+ }
+ else
+ {
+ i2_mv_x = pi2_final_mv1[0];
+ i2_mv_y = pi2_final_mv1[1];
+ }
+
+ u1_colz = (ps_cur_mb_info->u1_mb_field_decodingflag << 1)
+ | ((u1_final_ref_idx == 0) && (ABS(i2_mv_x) <= 1)
+ && (ABS(i2_mv_y) <= 1));
+ u1_colz |= (u1_packed_mb_sub_mb_mode << 4);
+ }
+ ps_mv = ps_mv_nmb_start + u1_sub_mb_num;
+ ih264d_rep_mv_colz(ps_dec, &s_temp_mv_pred, ps_mv, u1_sub_mb_num, u1_colz,
+ u1_mb_parth, u1_mb_partw);
+ }
+ i = 0;
+ if(i1_ref_frame0 >= 0)
+ ps_cur_mb_info->u2_mask[i++] = ui2_mask_fwd;
+ if(i1_ref_frame1 >= 0)
+ ps_cur_mb_info->u2_mask[i] = ui2_mask_bwd;
+
+ /*if(u1_init)
+ H264_DEC_DEBUG_PRINT("hit\n");
+ else
+ H264_DEC_DEBUG_PRINT("miss\n");*/
+
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_decode_temporal_direct \endif
+ *
+ * \brief
+ * Decodes temporal direct mode.
+ *
+ * \return
+ * None.
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_decode_temporal_direct(dec_struct_t * ps_dec,
+ UWORD8 u1_wd_x,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num)
+{
+ struct pic_buffer_t *ps_pic_buff0, *ps_pic_buff1, *ps_col_pic;
+ mv_pred_t *ps_mv, s_temp_mv_pred;
+ UWORD8 u1_sub_mb_num;
+ UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
+ WORD16 i2_mv_x0, i2_mv_y0, i2_mv_x1, i2_mv_y1;
+ UWORD8 u1_mb_partw, u1_mb_parth;
+ UWORD8 i, partition_size, sub_partition;
+ UWORD32 *pui32_weight_ofsts = NULL;
+ directmv_t s_mvdirect;
+ const UWORD8 *pu1_mb_parth = (const UWORD8 *)gau1_ih264d_mb_parth;
+ const UWORD8 *pu1_mb_partw = (const UWORD8 *)gau1_ih264d_mb_partw;
+ WORD8 c_refFrm0, c_refFrm1;
+ UWORD8 u1_ref_idx0, u1_is_cur_mb_fld;
+ UWORD32 pic0_poc, pic1_poc, cur_poc;
+ WORD32 ret = 0;
+
+ u1_is_cur_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag;
+ ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][0];
+
+ /**********************************************************************/
+ /* Call the function which gets the number of partitions and */
+ /* partition info of colocated Mb */
+ /**********************************************************************/
+ ps_dec->pf_parse_mvdirect(ps_dec, ps_dec->ps_col_pic, &s_mvdirect, u1_wd_x,
+ ps_dec->i4_submb_ofst, ps_cur_mb_info);
+ ps_col_pic = ps_dec->ps_col_pic;
+
+ for(i = 0; i < s_mvdirect.i1_num_partitions; i++)
+ {
+ UWORD8 u1_colz;
+ partition_size = s_mvdirect.i1_partitionsize[i];
+ u1_sub_mb_num = s_mvdirect.i1_submb_num[i];
+ ps_mv = ps_col_pic->ps_mv + s_mvdirect.i4_mv_indices[i];
+
+ /* This should be removed to catch unitialized memory read */
+ u1_ref_idx0 = 0;
+
+ sub_partition = partition_size >> 2;
+ partition_size &= 0x3;
+ u1_mb_partw = pu1_mb_partw[partition_size];
+ u1_mb_parth = pu1_mb_parth[partition_size];
+ if(sub_partition != 0)
+ {
+ u1_mb_partw >>= 1;
+ u1_mb_parth >>= 1;
+ }
+ c_refFrm0 = ps_mv->i1_ref_frame[0];
+ c_refFrm1 = ps_mv->i1_ref_frame[1];
+
+ if((c_refFrm0 == -1) && (c_refFrm1 == -1))
+ {
+ u1_ref_idx0 = 0;
+ ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][0];
+ if(u1_mbaff && u1_is_cur_mb_fld)
+ {
+ if(ps_cur_mb_info->u1_topmb)
+ {
+ pic0_poc = ps_pic_buff0->i4_top_field_order_cnt;
+ pic1_poc = ps_pic_buff1->i4_top_field_order_cnt;
+ cur_poc = ps_dec->ps_cur_pic->i4_top_field_order_cnt;
+ }
+ else
+ {
+ pic1_poc = ps_pic_buff1->i4_bottom_field_order_cnt;
+ cur_poc = ps_dec->ps_cur_pic->i4_bottom_field_order_cnt;
+ ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][MAX_REF_BUFS];
+ pic0_poc = ps_pic_buff0->i4_bottom_field_order_cnt;
+ ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][MAX_REF_BUFS];
+ }
+ }
+ else
+ {
+ pic0_poc = ps_pic_buff0->i4_avg_poc;
+ pic1_poc = ps_pic_buff1->i4_avg_poc;
+ cur_poc = ps_dec->ps_cur_pic->i4_poc;
+ }
+ }
+ else
+ {
+ UWORD8 uc_i, u1_num_frw_ref_pics;
+ UWORD8 buf_id, u1_pic_type;
+ buf_id = ps_mv->u1_col_ref_pic_idx;
+ u1_pic_type = ps_mv->u1_pic_type;
+ if(ps_dec->ps_cur_slice->u1_field_pic_flag)
+ {
+ if(s_mvdirect.u1_vert_mv_scale == FRM_TO_FLD)
+ {
+ u1_pic_type = TOP_FLD;
+ if(ps_dec->ps_cur_slice->u1_bottom_field_flag)
+ u1_pic_type = BOT_FLD;
+ }
+ }
+ u1_num_frw_ref_pics =
+ ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0];
+
+ for(uc_i = 0; uc_i < u1_num_frw_ref_pics; uc_i++)
+ {
+ if(ps_dec->ps_cur_slice->u1_field_pic_flag)
+ {
+ if(ps_dec->ps_ref_pic_buf_lx[0][uc_i]->u1_mv_buf_id == buf_id)
+ {
+ if(ps_dec->ps_ref_pic_buf_lx[0][uc_i]->u1_pic_type
+ == u1_pic_type)
+ {
+ u1_ref_idx0 = uc_i;
+ break;
+ }
+ }
+ }
+ else
+ {
+ if(ps_dec->ps_ref_pic_buf_lx[0][uc_i]->u1_mv_buf_id == buf_id)
+ {
+ u1_ref_idx0 = uc_i;
+ break;
+ }
+ }
+ }
+
+ ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][u1_ref_idx0];
+ ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][0];
+
+ if(u1_mbaff && u1_is_cur_mb_fld)
+ {
+ pic0_poc = ps_pic_buff0->i4_top_field_order_cnt;
+ u1_ref_idx0 <<= 1;
+ if(s_mvdirect.u1_vert_mv_scale == ONE_TO_ONE)
+ {
+ if(u1_pic_type == BOT_FLD)
+ {
+ pic0_poc = ps_pic_buff0->i4_bottom_field_order_cnt;
+ ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][(u1_ref_idx0
+ >> 1) + MAX_REF_BUFS];
+ if(ps_cur_mb_info->u1_topmb)
+ u1_ref_idx0++;
+ }
+ else
+ {
+ if(1 - ps_cur_mb_info->u1_topmb)
+ u1_ref_idx0++;
+ }
+ }
+ if(s_mvdirect.u1_vert_mv_scale == FRM_TO_FLD)
+ {
+ if(1 - ps_cur_mb_info->u1_topmb)
+ {
+ pic0_poc = ps_pic_buff0->i4_bottom_field_order_cnt;
+ ps_pic_buff0 = ps_dec->ps_ref_pic_buf_lx[0][(u1_ref_idx0
+ >> 1) + MAX_REF_BUFS];
+ }
+ }
+ if(ps_cur_mb_info->u1_topmb)
+ {
+ pic1_poc = ps_pic_buff1->i4_top_field_order_cnt;
+ cur_poc = ps_dec->ps_cur_pic->i4_top_field_order_cnt;
+ }
+ else
+ {
+ pic1_poc = ps_pic_buff1->i4_bottom_field_order_cnt;
+ cur_poc = ps_dec->ps_cur_pic->i4_bottom_field_order_cnt;
+ ps_pic_buff1 = ps_dec->ps_ref_pic_buf_lx[1][MAX_REF_BUFS];
+ }
+ }
+ else
+ {
+ pic0_poc = ps_pic_buff0->i4_avg_poc;
+ pic1_poc = ps_pic_buff1->i4_avg_poc;
+ cur_poc = ps_dec->ps_cur_pic->i4_poc;
+ }
+ }
+ {
+ WORD16 i16_td;
+
+ if(c_refFrm0 >= 0)
+ {
+ i2_mv_x0 = ps_mv->i2_mv[0];
+ i2_mv_y0 = ps_mv->i2_mv[1];
+ }
+ else if(c_refFrm1 >= 0)
+ {
+ i2_mv_x0 = ps_mv->i2_mv[2];
+ i2_mv_y0 = ps_mv->i2_mv[3];
+ }
+ else
+ {
+ i2_mv_x0 = 0;
+ i2_mv_y0 = 0;
+ }
+ /* If FRM_TO_FLD or FLD_TO_FRM scale the "y" component of the colocated Mv*/
+ if(s_mvdirect.u1_vert_mv_scale == FRM_TO_FLD)
+ {
+ i2_mv_y0 /= 2;
+ }
+ else if(s_mvdirect.u1_vert_mv_scale == FLD_TO_FRM)
+ {
+ i2_mv_y0 *= 2;
+ }
+
+ i16_td = pic1_poc - pic0_poc;
+ if((ps_pic_buff0->u1_is_short == 0) || (i16_td == 0))
+ {
+ i2_mv_x1 = 0;
+ i2_mv_y1 = 0;
+ }
+ else
+ {
+ WORD16 i16_tb, i16_tx, i2_dist_scale_factor, i16_temp;
+
+ i16_td = CLIP3(-128, 127, i16_td);
+ i16_tb = cur_poc - pic0_poc;
+ i16_tb = CLIP3(-128, 127, i16_tb);
+
+ i16_tx = (16384 + ABS(SIGN_POW2_DIV(i16_td, 1))) / i16_td;
+ i2_dist_scale_factor = CLIP3(-1024, 1023,
+ (((i16_tb * i16_tx) + 32) >> 6));
+ i16_temp = (i2_mv_x0 * i2_dist_scale_factor + 128) >> 8;
+ i2_mv_x1 = i16_temp - i2_mv_x0;
+ i2_mv_x0 = i16_temp;
+
+ i16_temp = (i2_mv_y0 * i2_dist_scale_factor + 128) >> 8;
+ i2_mv_y1 = i16_temp - i2_mv_y0;
+ i2_mv_y0 = i16_temp;
+ }
+ {
+ mv_pred_t *ps_mv;
+
+ /*u1_sub_mb_x = u1_sub_mb_num & 0x03;
+ uc_sub_mb_y = u1_sub_mb_num >> 2;*/
+ if(ps_dec->ps_cur_pps->u1_wted_bipred_idc)
+ {
+ UWORD8 u1_idx =
+ u1_ref_idx0
+ * ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1];
+ UWORD8 u1_scale_ref = u1_mbaff && u1_is_cur_mb_fld;
+ if(u1_scale_ref)
+ u1_idx >>= 1;
+ pui32_weight_ofsts = (UWORD32*)&ps_dec->pu4_wt_ofsts[2
+ * X3(u1_idx)];
+ if(u1_scale_ref
+ && (ps_dec->ps_cur_pps->u1_wted_bipred_idc
+ == 2))
+ {
+ WORD16 i2_ref_idx;
+ i2_ref_idx = u1_ref_idx0;
+ i2_ref_idx *=
+ (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]
+ << 1);
+ if(!ps_cur_mb_info->u1_topmb)
+ i2_ref_idx +=
+ (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0]
+ << 1)
+ * (ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1]
+ << 1);
+ pui32_weight_ofsts =
+ (UWORD32*)&ps_dec->pu4_mbaff_wt_mat[2
+ * X3(i2_ref_idx)];
+ }
+ }
+ {
+ pred_info_pkd_t *ps_pred_pkd;
+ WORD16 i2_mv[2];
+ WORD8 i1_ref_idx= 0;
+
+ i2_mv[0] = i2_mv_x0;
+ i2_mv[1] = i2_mv_y0;
+
+ ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
+ ih264d_fill_pred_info(i2_mv,u1_mb_partw,u1_mb_parth,u1_sub_mb_num,PRED_L0 | PRED_L1,
+ ps_pred_pkd,ps_pic_buff0->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts,
+ ps_pic_buff0->u1_pic_type);
+ ps_dec->u4_pred_info_pkd_idx++;
+ ps_cur_mb_info->u1_num_pred_parts++;
+
+
+ }
+ {
+ pred_info_pkd_t *ps_pred_pkd;
+ WORD16 i2_mv[2];
+ WORD8 i1_ref_idx= 0;
+
+ i2_mv[0] = i2_mv_x1;
+ i2_mv[1] = i2_mv_y1;
+
+ ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
+ ih264d_fill_pred_info(i2_mv,u1_mb_partw,u1_mb_parth,u1_sub_mb_num,PRED_L0 | PRED_L1,
+ ps_pred_pkd,ps_pic_buff1->u1_pic_buf_id,i1_ref_idx,pui32_weight_ofsts,
+ ps_pic_buff1->u1_pic_type);
+ ps_dec->u4_pred_info_pkd_idx++;
+ ps_cur_mb_info->u1_num_pred_parts++;
+
+
+ }
+
+ /* Replication optimisation */
+ s_temp_mv_pred.i2_mv[0] = i2_mv_x0;
+ s_temp_mv_pred.i2_mv[1] = i2_mv_y0;
+ s_temp_mv_pred.i2_mv[2] = i2_mv_x1;
+ s_temp_mv_pred.i2_mv[3] = i2_mv_y1;
+ s_temp_mv_pred.i1_ref_frame[0] = u1_ref_idx0;
+ s_temp_mv_pred.i1_ref_frame[1] = 0;
+ s_temp_mv_pred.u1_col_ref_pic_idx = ps_pic_buff0->u1_mv_buf_id;
+ s_temp_mv_pred.u1_pic_type = ps_pic_buff0->u1_pic_type;
+ ps_mv = ps_dec->ps_mv_cur + (u1_mb_num << 4) + u1_sub_mb_num;
+
+ {
+ WORD16 i2_mv_x = 0, i2_mv_y = 0;
+ UWORD8 u1_packed_mb_sub_mb_mode =
+ sub_partition ? (s_mvdirect.i1_partitionsize[i]) : ((s_mvdirect.i1_partitionsize[i])
+ << 2);
+
+ if(c_refFrm0 >= 0)
+ {
+ i2_mv_x = i2_mv_x0;
+ i2_mv_y = i2_mv_y0;
+ }
+ else
+ {
+ i2_mv_x = i2_mv_x1;
+ i2_mv_y = i2_mv_y1;
+ }
+
+ u1_colz =
+ (ps_cur_mb_info->u1_mb_field_decodingflag << 1)
+ | ((u1_ref_idx0 == 0)
+ && (ABS(i2_mv_x)
+ <= 1)
+ && (ABS(i2_mv_y)
+ <= 1));
+ u1_colz |= (u1_packed_mb_sub_mb_mode << 4);
+ }
+ ih264d_rep_mv_colz(ps_dec, &s_temp_mv_pred, ps_mv, u1_sub_mb_num,
+ u1_colz, u1_mb_parth, u1_mb_partw);
+ }
+ }
+ }
+ /* return value set to UWORD8 to make it homogeneous */
+ /* with decodespatialdirect */
+ return OK;
+}
+
+void ih264d_convert_frm_to_fld_list(struct pic_buffer_t *ps_ref_pic_buf_lx,
+ UWORD8 *pu1_L0,
+ dec_struct_t *ps_dec,
+ UWORD8 u1_num_short_term_bufs)
+{
+ UWORD8 uc_count = *pu1_L0, i, uc_l1, uc_lx, j;
+ struct pic_buffer_t *ps_ref_lx[2], *ps_ref_pic_lx;
+ UWORD8 u1_bottom_field_flag;
+ dec_slice_params_t *ps_cur_slice;
+ UWORD8 u1_ref[2], u1_fld[2], u1_same_fld, u1_op_fld;
+ UWORD32 ui_half_num_of_sub_mbs;
+
+ uc_l1 = 0;
+ uc_lx = 0;
+ ps_cur_slice = ps_dec->ps_cur_slice;
+ ps_ref_pic_lx = ps_ref_pic_buf_lx - MAX_REF_BUFS;
+ ps_ref_lx[0] = ps_ref_pic_buf_lx;
+ ps_ref_lx[1] = ps_ref_pic_buf_lx;
+ u1_bottom_field_flag = ps_cur_slice->u1_bottom_field_flag;
+ ui_half_num_of_sub_mbs = ((ps_dec->u2_pic_ht * ps_dec->u2_pic_wd) >> 5);
+ if(u1_bottom_field_flag)
+ {
+ u1_ref[0] = BOT_REF;
+ u1_ref[1] = TOP_REF;
+ u1_fld[0] = BOT_FLD;
+ u1_fld[1] = TOP_FLD;
+ u1_same_fld = BOT_FLD;
+ u1_op_fld = TOP_FLD;
+ }
+ else
+ {
+ u1_ref[0] = TOP_REF;
+ u1_ref[1] = BOT_REF;
+ u1_fld[0] = TOP_FLD;
+ u1_fld[1] = BOT_FLD;
+ u1_same_fld = TOP_FLD;
+ u1_op_fld = BOT_FLD;
+ }
+
+ /* Create the field list starting with all the short term */
+ /* frames followed by all the long term frames. No long term */
+ /* reference field should have a list idx less than a short */
+ /* term reference field during initiailization. */
+
+ for(j = 0; j < 2; j++)
+ {
+ i = ((j == 0) ? 0 : u1_num_short_term_bufs);
+ uc_count = ((j == 0) ? u1_num_short_term_bufs : *pu1_L0);
+ for(; i < uc_count; i++, ps_ref_lx[0]++)
+ {
+ /* Search field of same parity in Frame list */
+ if((ps_ref_lx[0]->u1_pic_type & u1_ref[0])) // || ((ps_ref_lx[0]->u1_picturetype & 0x3) == 0))
+ {
+ /* Insert PIC of same parity in RefPicList */
+ ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_lx, ps_ref_lx[0]);
+ ps_ref_pic_lx->i4_pic_num = (ps_ref_pic_lx->i4_pic_num * 2 + 1);
+ ps_ref_pic_lx->u1_long_term_pic_num =
+ (ps_ref_pic_lx->u1_long_term_frm_idx * 2 + 1);
+ ps_ref_pic_lx->u1_pic_type = u1_same_fld;
+ if(u1_fld[0] & BOT_FLD)
+ {
+ ps_ref_pic_lx->u1_pic_type = BOT_FLD;
+ ps_ref_pic_lx->pu1_buf1 += ps_ref_pic_lx->u2_frm_wd_y;
+ ps_ref_pic_lx->pu1_buf2 += ps_ref_pic_lx->u2_frm_wd_uv;
+ ps_ref_pic_lx->pu1_buf3 += ps_ref_pic_lx->u2_frm_wd_uv;
+ if(ps_ref_pic_lx->u1_picturetype & 0x3)
+ {
+ ps_ref_pic_lx->pu1_col_zero_flag += ui_half_num_of_sub_mbs;
+ ps_ref_pic_lx->ps_mv += ui_half_num_of_sub_mbs;
+ }
+ ps_ref_pic_lx->i4_poc =
+ ps_ref_pic_lx->i4_bottom_field_order_cnt;
+ ps_ref_pic_lx->i4_avg_poc =
+ ps_ref_pic_lx->i4_bottom_field_order_cnt;
+ }
+ else
+ {
+ ps_ref_pic_lx->u1_pic_type = TOP_FLD;
+ ps_ref_pic_lx->i4_poc = ps_ref_pic_lx->i4_top_field_order_cnt;
+ ps_ref_pic_lx->i4_avg_poc =
+ ps_ref_pic_lx->i4_top_field_order_cnt;
+ }
+
+ ps_ref_pic_lx++;
+ uc_lx++;
+ /* Find field of opposite parity */
+ if(uc_l1 < uc_count && ps_ref_lx[1])
+ {
+ while(!(ps_ref_lx[1]->u1_pic_type & u1_ref[1]))
+ {
+ ps_ref_lx[1]++;
+ uc_l1++;
+ if(uc_l1 >= uc_count)
+ ps_ref_lx[1] = 0;
+ if(!ps_ref_lx[1])
+ break;
+ }
+
+ if(ps_ref_lx[1])
+ {
+ uc_l1++;
+ ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_lx,
+ ps_ref_lx[1]);
+ ps_ref_pic_lx->u1_pic_type = u1_op_fld;
+ ps_ref_pic_lx->i4_pic_num = (ps_ref_pic_lx->i4_pic_num * 2);
+ ps_ref_pic_lx->u1_long_term_pic_num =
+ (ps_ref_pic_lx->u1_long_term_frm_idx * 2);
+ if(u1_fld[1] & BOT_FLD)
+ {
+ ps_ref_pic_lx->u1_pic_type = BOT_FLD;
+ ps_ref_pic_lx->pu1_buf1 += ps_ref_pic_lx->u2_frm_wd_y;
+ ps_ref_pic_lx->pu1_buf2 += ps_ref_pic_lx->u2_frm_wd_uv;
+ ps_ref_pic_lx->pu1_buf3 += ps_ref_pic_lx->u2_frm_wd_uv;
+ if(ps_ref_pic_lx->u1_picturetype & 0x3)
+ {
+ ps_ref_pic_lx->pu1_col_zero_flag +=
+ ui_half_num_of_sub_mbs;
+ ps_ref_pic_lx->ps_mv += ui_half_num_of_sub_mbs;
+ }
+ ps_ref_pic_lx->i4_poc =
+ ps_ref_pic_lx->i4_bottom_field_order_cnt;
+ ps_ref_pic_lx->i4_avg_poc =
+ ps_ref_pic_lx->i4_bottom_field_order_cnt;
+ }
+ else
+ {
+ ps_ref_pic_lx->u1_pic_type = TOP_FLD;
+ ps_ref_pic_lx->i4_poc =
+ ps_ref_pic_lx->i4_top_field_order_cnt;
+ ps_ref_pic_lx->i4_avg_poc =
+ ps_ref_pic_lx->i4_top_field_order_cnt;
+ }
+ ps_ref_pic_lx++;
+ uc_lx++;
+ ps_ref_lx[1]++;
+ }
+ }
+ }
+ }
+
+ /* Same parity fields are over, now insert left over opposite parity fields */
+ /** Added if(ps_ref_lx[1]) for error checks */
+ if(ps_ref_lx[1])
+ {
+ for(; uc_l1 < uc_count; uc_l1++)
+ {
+ if(ps_ref_lx[1]->u1_pic_type & u1_ref[1])
+ {
+ /* Insert PIC of opposite parity in RefPicList */
+ ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_lx,
+ ps_ref_lx[1]);
+ ps_ref_pic_lx->u1_pic_type = u1_op_fld;
+ ps_ref_pic_lx->i4_pic_num = (ps_ref_pic_lx->i4_pic_num * 2);
+ ps_ref_pic_lx->u1_long_term_pic_num =
+ (ps_ref_pic_lx->u1_long_term_frm_idx * 2);
+ if(u1_op_fld == BOT_FLD)
+ {
+ ps_ref_pic_lx->u1_pic_type = BOT_FLD;
+ ps_ref_pic_lx->pu1_buf1 += ps_ref_pic_lx->u2_frm_wd_y;
+ ps_ref_pic_lx->pu1_buf2 += ps_ref_pic_lx->u2_frm_wd_uv;
+ ps_ref_pic_lx->pu1_buf3 += ps_ref_pic_lx->u2_frm_wd_uv;
+ if(ps_ref_pic_lx->u1_picturetype & 0x3)
+ {
+ ps_ref_pic_lx->pu1_col_zero_flag +=
+ ui_half_num_of_sub_mbs;
+ ps_ref_pic_lx->ps_mv += ui_half_num_of_sub_mbs;
+ }
+ ps_ref_pic_lx->i4_poc =
+ ps_ref_pic_lx->i4_bottom_field_order_cnt;
+ ps_ref_pic_lx->i4_avg_poc =
+ ps_ref_pic_lx->i4_bottom_field_order_cnt;
+ }
+ else
+ {
+ ps_ref_pic_lx->i4_poc =
+ ps_ref_pic_lx->i4_top_field_order_cnt;
+ ps_ref_pic_lx->i4_avg_poc =
+ ps_ref_pic_lx->i4_top_field_order_cnt;
+ }
+ ps_ref_pic_lx++;
+ uc_lx++;
+ ps_ref_lx[1]++;
+ }
+ }
+ }
+ }
+ *pu1_L0 = uc_lx;
+}
+
+void ih264d_convert_frm_mbaff_list(dec_struct_t *ps_dec)
+{
+ struct pic_buffer_t **ps_ref_pic_lx;
+ UWORD8 u1_max_ref_idx, idx;
+ UWORD16 u2_frm_wd_y, u2_frm_wd_uv;
+ struct pic_buffer_t **ps_ref_pic_buf_lx;
+ UWORD32 u4_half_num_of_sub_mbs = ((ps_dec->u2_pic_ht * ps_dec->u2_pic_wd) >> 5);
+
+ ps_ref_pic_buf_lx = ps_dec->ps_ref_pic_buf_lx[0];
+ ps_ref_pic_lx = ps_dec->ps_ref_pic_buf_lx[0];
+ u1_max_ref_idx = ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[0];
+ for(idx = 0; idx < u1_max_ref_idx; idx++)
+ {
+ ps_ref_pic_lx[idx]->u1_pic_type = TOP_FLD;
+ ps_ref_pic_lx[idx]->i4_poc = ps_ref_pic_lx[idx]->i4_top_field_order_cnt;
+
+ }
+ u2_frm_wd_y = ps_dec->u2_frm_wd_y;
+ u2_frm_wd_uv = ps_dec->u2_frm_wd_uv;
+
+ for(idx = 0; idx < u1_max_ref_idx; idx++)
+ {
+ *ps_ref_pic_lx[idx + MAX_REF_BUFS] = *ps_ref_pic_buf_lx[idx];
+ ps_ref_pic_lx[idx + MAX_REF_BUFS]->pu1_buf1 =
+ ps_ref_pic_buf_lx[idx]->pu1_buf1 + u2_frm_wd_y;
+ ps_ref_pic_lx[idx + MAX_REF_BUFS]->pu1_buf2 =
+ ps_ref_pic_buf_lx[idx]->pu1_buf2 + u2_frm_wd_uv;
+ ps_ref_pic_lx[idx + MAX_REF_BUFS]->pu1_buf3 =
+ ps_ref_pic_buf_lx[idx]->pu1_buf3 + u2_frm_wd_uv;
+
+ ps_ref_pic_lx[idx + MAX_REF_BUFS]->u1_pic_type = BOT_FLD;
+ ps_ref_pic_lx[idx + MAX_REF_BUFS]->i4_poc =
+ ps_ref_pic_buf_lx[idx]->i4_bottom_field_order_cnt;
+ if(ps_ref_pic_buf_lx[idx]->u1_picturetype & 0x3)
+ {
+ ps_ref_pic_lx[idx + MAX_REF_BUFS]->pu1_col_zero_flag =
+ ps_ref_pic_buf_lx[idx]->pu1_col_zero_flag
+ + u4_half_num_of_sub_mbs;
+ ps_ref_pic_lx[idx + MAX_REF_BUFS]->ps_mv =
+ ps_ref_pic_buf_lx[idx]->ps_mv + u4_half_num_of_sub_mbs;
+ }
+ }
+
+ if(ps_dec->u1_B)
+ {
+ ps_ref_pic_buf_lx = ps_dec->ps_ref_pic_buf_lx[1];
+ ps_ref_pic_lx = ps_dec->ps_ref_pic_buf_lx[1];
+ u1_max_ref_idx = ps_dec->ps_cur_slice->u1_num_ref_idx_lx_active[1];
+ for(idx = 0; idx < u1_max_ref_idx; idx++)
+ {
+ ps_ref_pic_lx[idx]->u1_pic_type = TOP_FLD;
+ ps_ref_pic_lx[idx]->i4_poc = ps_ref_pic_lx[idx]->i4_top_field_order_cnt;
+
+ }
+
+ for(idx = 0; idx < u1_max_ref_idx; idx++)
+ {
+ *ps_ref_pic_lx[idx + MAX_REF_BUFS] = *ps_ref_pic_buf_lx[idx];
+ ps_ref_pic_lx[idx + MAX_REF_BUFS]->pu1_buf1 =
+ ps_ref_pic_buf_lx[idx]->pu1_buf1 + u2_frm_wd_y;
+ ps_ref_pic_lx[idx + MAX_REF_BUFS]->pu1_buf2 =
+ ps_ref_pic_buf_lx[idx]->pu1_buf2 + u2_frm_wd_uv;
+ ps_ref_pic_lx[idx + MAX_REF_BUFS]->pu1_buf3 =
+ ps_ref_pic_buf_lx[idx]->pu1_buf3 + u2_frm_wd_uv;
+ ps_ref_pic_lx[idx + MAX_REF_BUFS]->u1_pic_type = BOT_FLD;
+ ps_ref_pic_lx[idx + MAX_REF_BUFS]->i4_poc =
+ ps_ref_pic_buf_lx[idx]->i4_bottom_field_order_cnt;
+
+ if(ps_ref_pic_buf_lx[idx]->u1_picturetype & 0x3)
+ {
+ ps_ref_pic_lx[idx + MAX_REF_BUFS]->pu1_col_zero_flag =
+ ps_ref_pic_buf_lx[idx]->pu1_col_zero_flag
+ + u4_half_num_of_sub_mbs;
+ ps_ref_pic_lx[idx + MAX_REF_BUFS]->ps_mv =
+ ps_ref_pic_buf_lx[idx]->ps_mv
+ + u4_half_num_of_sub_mbs;
+ }
+ }
+ }
+}
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_init_ref_idx_lx_b \endif
+ *
+ * \brief
+ * Initializes forward and backward refernce lists for B slice decoding.
+ *
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+void ih264d_init_ref_idx_lx_b(dec_struct_t *ps_dec)
+{
+ struct pic_buffer_t *ps_ref_pic_buf_lx;
+ dpb_manager_t *ps_dpb_mgr;
+ struct dpb_info_t *ps_next_dpb;
+ WORD32 i_cur_poc, i_max_st_poc, i_min_st_poc, i_ref_poc, i_temp_poc;
+ WORD8 i;
+ UWORD8 u1_max_lt_index, u1_min_lt_index, u1_lt_index;
+ UWORD8 u1_field_pic_flag;
+ dec_slice_params_t *ps_cur_slice;
+ UWORD8 u1_L0, u1_L1;
+ UWORD8 u1_num_short_term_bufs;
+ UWORD8 u1_max_ref_idx_l0, u1_max_ref_idx_l1;
+
+ ps_cur_slice = ps_dec->ps_cur_slice;
+ u1_field_pic_flag = ps_cur_slice->u1_field_pic_flag;
+ u1_max_ref_idx_l0 = ps_cur_slice->u1_num_ref_idx_lx_active[0]
+ << u1_field_pic_flag;
+ u1_max_ref_idx_l1 = ps_cur_slice->u1_num_ref_idx_lx_active[1]
+ << u1_field_pic_flag;
+
+ ps_dpb_mgr = ps_dec->ps_dpb_mgr;
+ /* Get the current POC */
+ i_cur_poc = ps_dec->ps_cur_pic->i4_poc;
+
+ /* Get MaxStPOC,MinStPOC,MaxLt,MinLt */
+ i_max_st_poc = i_cur_poc;
+ i_min_st_poc = i_cur_poc;
+ u1_max_lt_index = MAX_REF_BUFS + 1;
+ u1_min_lt_index = MAX_REF_BUFS + 1;
+ /* Start from ST head */
+ ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
+ for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++)
+ {
+ i_ref_poc = ps_next_dpb->ps_pic_buf->i4_poc;
+ if(i_ref_poc < i_cur_poc)
+ {
+ /* RefPic Buf POC is before Current POC in display order */
+ i_min_st_poc = MIN(i_min_st_poc, i_ref_poc);
+ }
+ else
+ {
+ /* RefPic Buf POC is after Current POC in display order */
+ i_max_st_poc = MAX(i_max_st_poc, i_ref_poc);
+ }
+
+ /* Chase the next link */
+ ps_next_dpb = ps_next_dpb->ps_prev_short;
+ }
+
+ /* Start from LT head */
+ ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head;
+ if(ps_next_dpb)
+ {
+ u1_max_lt_index = ps_next_dpb->u1_lt_idx;
+ u1_min_lt_index = ps_next_dpb->u1_lt_idx;
+ }
+ for(i = 0; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++)
+ {
+ u1_lt_index = ps_next_dpb->u1_lt_idx;
+ u1_max_lt_index = (UWORD8)(MAX(u1_max_lt_index, u1_lt_index));
+ u1_min_lt_index = (UWORD8)(MIN(u1_min_lt_index, u1_lt_index));
+
+ /* Chase the next link */
+ ps_next_dpb = ps_next_dpb->ps_prev_long;
+ }
+
+ /* 1. Initialize refIdxL0 */
+ u1_L0 = 0;
+ if(u1_field_pic_flag)
+ {
+ ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[0][0];
+ ps_ref_pic_buf_lx += MAX_REF_BUFS;
+ i_temp_poc = i_cur_poc;
+ }
+ else
+ {
+ ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[0][0];
+ i_temp_poc = i_cur_poc - 1;
+ }
+ /* Arrange all short term buffers in output order as given by POC */
+ /* 1.1 Arrange POC's less than CurrPOC in the descending POC order starting
+ from (CurrPOC - 1)*/
+ for(; i_temp_poc >= i_min_st_poc; i_temp_poc--)
+ {
+ /* Start from ST head */
+ ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
+ for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++)
+ {
+ if((WORD32)ps_next_dpb->ps_pic_buf->i4_poc == i_temp_poc)
+ {
+ /* Copy info in pic buffer */
+ ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx,
+ ps_next_dpb->ps_pic_buf);
+ ps_ref_pic_buf_lx++;
+ u1_L0++;
+ break;
+ }
+ ps_next_dpb = ps_next_dpb->ps_prev_short;
+ }
+ }
+
+ {
+ /* 1.2. Arrange POC's more than CurrPOC in the ascending POC order starting
+ from (CurrPOC + 1)*/
+ for(i_temp_poc = i_cur_poc + 1; i_temp_poc <= i_max_st_poc; i_temp_poc++)
+ {
+ /* Start from ST head */
+ ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
+ for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++)
+ {
+ if((WORD32)ps_next_dpb->ps_pic_buf->i4_poc == i_temp_poc)
+ {
+ ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx,
+ ps_next_dpb->ps_pic_buf);
+ ps_ref_pic_buf_lx++;
+ u1_L0++;
+ break;
+ }
+ ps_next_dpb = ps_next_dpb->ps_prev_short;
+ }
+ }
+ }
+
+ /* 1.3 Arrange all Long term buffers in ascending order, in LongtermIndex */
+ /* Start from ST head */
+
+ u1_num_short_term_bufs = u1_L0;
+ for(u1_lt_index = u1_min_lt_index; u1_lt_index <= u1_max_lt_index; u1_lt_index++)
+ {
+ ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head;
+ for(i = 0; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++)
+ {
+ if(ps_next_dpb->u1_lt_idx == u1_lt_index)
+ {
+ ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx,
+ ps_next_dpb->ps_pic_buf);
+ ps_ref_pic_buf_lx->u1_long_term_pic_num =
+ ps_ref_pic_buf_lx->u1_long_term_frm_idx;
+
+ ps_ref_pic_buf_lx++;
+ u1_L0++;
+ break;
+ }
+ ps_next_dpb = ps_next_dpb->ps_prev_long;
+ }
+ }
+
+ if(u1_field_pic_flag)
+ {
+ /* Initialize the rest of the entries in the */
+ /* reference list to handle of errors */
+ {
+ UWORD8 u1_i;
+ pic_buffer_t *ps_ref_pic;
+
+ ps_ref_pic = ps_dpb_mgr->ps_init_dpb[0][0] + MAX_REF_BUFS;
+
+ if(NULL == ps_ref_pic->pu1_buf1)
+ {
+ ps_ref_pic = ps_dec->ps_cur_pic;
+ }
+ for(u1_i = u1_L0; u1_i < u1_max_ref_idx_l0; u1_i++)
+ {
+ *ps_ref_pic_buf_lx = *ps_ref_pic;
+ ps_ref_pic_buf_lx++;
+ }
+ }
+ ih264d_convert_frm_to_fld_list(
+ ps_dpb_mgr->ps_init_dpb[0][0] + MAX_REF_BUFS, &u1_L0,
+ ps_dec, u1_num_short_term_bufs);
+
+ ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[0][0] + u1_L0;
+ }
+
+ ps_dec->ps_cur_slice->u1_initial_list_size[0] = u1_L0;
+
+ /* Initialize the rest of the entries in the */
+ /* reference list to handle of errors */
+ {
+ UWORD8 u1_i;
+ pic_buffer_t *ps_ref_pic;
+
+ ps_ref_pic = ps_dpb_mgr->ps_init_dpb[0][0];
+
+ if(NULL == ps_ref_pic->pu1_buf1)
+ {
+ ps_ref_pic = ps_dec->ps_cur_pic;
+ }
+ for(u1_i = u1_L0; u1_i < u1_max_ref_idx_l0; u1_i++)
+ {
+ *ps_ref_pic_buf_lx = *ps_ref_pic;
+ ps_ref_pic_buf_lx++;
+ }
+ }
+ {
+ /* 2. Initialize refIdxL1 */
+ u1_L1 = 0;
+ if(u1_field_pic_flag)
+ {
+ ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[1][0] + MAX_REF_BUFS;
+ }
+ else
+ {
+ ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[1][0];
+ }
+
+ /* 2.1. Arrange POC's more than CurrPOC in the ascending POC order starting
+ from (CurrPOC + 1)*/
+ for(i_temp_poc = i_cur_poc + 1; i_temp_poc <= i_max_st_poc; i_temp_poc++)
+ {
+ /* Start from ST head */
+ ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
+ for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++)
+ {
+ if((WORD32)ps_next_dpb->ps_pic_buf->i4_poc == i_temp_poc)
+ {
+ ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx,
+ ps_next_dpb->ps_pic_buf);
+ ps_ref_pic_buf_lx++;
+ u1_L1++;
+ break;
+ }
+ ps_next_dpb = ps_next_dpb->ps_prev_short;
+ }
+ }
+
+ if(u1_field_pic_flag)
+ {
+ i_temp_poc = i_cur_poc;
+ }
+ else
+ {
+ i_temp_poc = i_cur_poc - 1;
+ }
+
+ /* Arrange all short term buffers in output order as given by POC */
+ /* 2.2 Arrange POC's less than CurrPOC in the descending POC order starting
+ from (CurrPOC - 1)*/
+ for(; i_temp_poc >= i_min_st_poc; i_temp_poc--)
+ {
+ /* Start from ST head */
+ ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
+ for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++)
+ {
+ if((WORD32)ps_next_dpb->ps_pic_buf->i4_poc == i_temp_poc)
+ {
+ ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx,
+ ps_next_dpb->ps_pic_buf);
+ ps_ref_pic_buf_lx++;
+ u1_L1++;
+ break;
+ }
+ ps_next_dpb = ps_next_dpb->ps_prev_short;
+ }
+ }
+
+ /* 2.3 Arrange all Long term buffers in ascending order, in LongtermIndex */
+ /* Start from ST head */
+ u1_num_short_term_bufs = u1_L1;
+ ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head;
+ for(u1_lt_index = u1_min_lt_index; u1_lt_index <= u1_max_lt_index;
+ u1_lt_index++)
+ {
+ for(i = 0; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++)
+ {
+ if(ps_next_dpb->u1_lt_idx == u1_lt_index)
+ {
+ ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx,
+ ps_next_dpb->ps_pic_buf);
+ ps_ref_pic_buf_lx->u1_long_term_pic_num =
+ ps_ref_pic_buf_lx->u1_long_term_frm_idx;
+ ps_ref_pic_buf_lx++;
+ u1_L1++;
+ break;
+ }
+ ps_next_dpb = ps_next_dpb->ps_prev_long;
+ }
+ }
+
+ if(u1_field_pic_flag)
+ {
+ /* Initialize the rest of the entries in the */
+ /* reference list to handle of errors */
+ {
+ UWORD8 u1_i;
+ pic_buffer_t *ps_ref_pic;
+
+ ps_ref_pic = ps_dpb_mgr->ps_init_dpb[1][0] + MAX_REF_BUFS;
+
+ if(NULL == ps_ref_pic->pu1_buf1)
+ {
+ ps_ref_pic = ps_dec->ps_cur_pic;
+ }
+ for(u1_i = u1_L1; u1_i < u1_max_ref_idx_l1; u1_i++)
+ {
+ *ps_ref_pic_buf_lx = *ps_ref_pic;
+ ps_ref_pic_buf_lx++;
+ }
+ }
+
+ ih264d_convert_frm_to_fld_list(
+ ps_dpb_mgr->ps_init_dpb[1][0] + MAX_REF_BUFS,
+ &u1_L1, ps_dec, u1_num_short_term_bufs);
+ ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[1][0] + u1_L1;
+ }
+
+ ps_dec->ps_cur_slice->u1_initial_list_size[1] = u1_L1;
+
+ /* Initialize the rest of the entries in the */
+ /* reference list to handle of errors */
+ {
+ UWORD8 u1_i;
+ pic_buffer_t *ps_ref_pic;
+
+ ps_ref_pic = ps_dpb_mgr->ps_init_dpb[1][0];
+
+ if(NULL == ps_ref_pic->pu1_buf1)
+ {
+ ps_ref_pic = ps_dec->ps_cur_pic;
+ }
+ for(u1_i = u1_L1; u1_i < u1_max_ref_idx_l1; u1_i++)
+ {
+ *ps_ref_pic_buf_lx = *ps_ref_pic;
+ ps_ref_pic_buf_lx++;
+ }
+ }
+
+ /* If list0 and list 1 ebtries are same then swap the 0th and 1st entry */
+ /* of list 1 */
+ {
+ struct pic_buffer_t *ps_ref_pic1_buf_l0, *ps_ref_pic1_buf_l1;
+ struct pic_buffer_t s_ref_pic1_buf_temp;
+
+ ps_ref_pic1_buf_l0 = ps_dpb_mgr->ps_init_dpb[0][0];
+ ps_ref_pic1_buf_l1 = ps_dpb_mgr->ps_init_dpb[1][0];
+
+ if((u1_L0 == u1_L1) && (u1_L0 > 1))
+ {
+ WORD32 i_index, i_swap;
+
+ i_swap = 1;
+
+ for(i_index = 0; i_index < u1_L0; i_index++)
+ {
+ if((ps_ref_pic1_buf_l0[i_index]).pu1_buf1
+ != (ps_ref_pic1_buf_l1[i_index]).pu1_buf1)
+ {
+ i_swap = 0;
+ break;
+ }
+ }
+ if(1 == i_swap)
+ {
+ memcpy(&s_ref_pic1_buf_temp, &ps_ref_pic1_buf_l1[1],
+ sizeof(struct pic_buffer_t));
+ memcpy(&ps_ref_pic1_buf_l1[1], &ps_ref_pic1_buf_l1[0],
+ sizeof(struct pic_buffer_t));
+ memcpy(&ps_ref_pic1_buf_l1[0], &s_ref_pic1_buf_temp,
+ sizeof(struct pic_buffer_t));
+ }
+ }
+ }
+ }
+}
+
+
+
+void ih264d_get_implicit_weights(dec_struct_t *ps_dec);
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_one_to_one \endif
+ *
+ * \brief
+ * Initializes forward and backward refernce lists for B slice decoding.
+ *
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+void ih264d_one_to_one(dec_struct_t *ps_dec,
+ struct pic_buffer_t *ps_col_pic,
+ directmv_t *ps_direct,
+ UWORD8 u1_wd_x,
+ WORD32 u2_sub_mb_ofst,
+ dec_mb_info_t * ps_cur_mb_info)
+{
+ UWORD8 *pu1_col_zero_flag_start, u1_col_mb_pred_mode, u1_num_blks, u1_sub_mb_num;
+ UWORD8 u1_init_colzero_flag;
+ UNUSED(ps_cur_mb_info);
+ pu1_col_zero_flag_start = ps_col_pic->pu1_col_zero_flag + u2_sub_mb_ofst;
+ u1_col_mb_pred_mode = pu1_col_zero_flag_start[ps_dec->u1_sub_mb_num];
+ u1_init_colzero_flag = u1_col_mb_pred_mode & 1;
+ u1_col_mb_pred_mode >>= 6;
+ ps_direct->u1_vert_mv_scale = ONE_TO_ONE;
+ ps_direct->u1_col_zeroflag_change = 0;
+
+ if(u1_wd_x == MB_SIZE)
+ {
+ ps_dec->u1_currB_type = (!!u1_col_mb_pred_mode);
+ if(u1_col_mb_pred_mode == PRED_16x16)
+ {
+ ps_direct->i1_num_partitions = 1;
+ ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst;
+ ps_direct->i1_submb_num[0] = 0;
+ ps_direct->i1_partitionsize[0] = PRED_16x16;
+
+ return;
+ }
+ else if(u1_col_mb_pred_mode < PRED_8x8)
+ {
+ ps_direct->i1_num_partitions = 2;
+ ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst;
+ ps_direct->i1_submb_num[0] = 0;
+ ps_direct->i1_partitionsize[0] = u1_col_mb_pred_mode;
+ u1_sub_mb_num = (u1_col_mb_pred_mode == PRED_16x8) ? 8 : 2;
+ ps_direct->i1_submb_num[1] = u1_sub_mb_num;
+ ps_direct->i4_mv_indices[1] = u2_sub_mb_ofst
+ + ps_direct->i1_submb_num[1];
+ ps_direct->i1_partitionsize[1] = u1_col_mb_pred_mode;
+ if((pu1_col_zero_flag_start[u1_sub_mb_num] & 1) != u1_init_colzero_flag)
+ ps_direct->u1_col_zeroflag_change = 1;
+ return;
+ }
+ else
+ {
+ u1_num_blks = 4;
+ }
+ }
+ else
+ {
+ u1_num_blks = 1;
+ }
+
+ {
+ const UWORD8 *pu1_top_lt_mb_part_idx;
+ UWORD8 u1_col_sub_mb_pred_mode, uc_blk, u1_sub_blk, u1_submb_col = 0;
+ UWORD8 u1_num_sub_blks, uc_direct8x8inf, *pu1_col_zero_flag, u1_sub_mb_num;
+ const UWORD8 *pu1_num_sub_mb_part =
+ (const UWORD8 *)gau1_ih264d_num_submb_part;
+ UWORD8 i1_num_partitions = 0, partition_size;
+ WORD32 mv_index;
+ const UWORD8 *pu1_top_lt_sub_mb_idx = gau1_ih264d_submb_indx_mod_sp_drct;
+
+ u1_sub_mb_num = ps_dec->u1_sub_mb_num;
+ uc_direct8x8inf = ps_dec->ps_cur_slice->u1_direct_8x8_inference_flag;
+ pu1_top_lt_mb_part_idx = gau1_ih264d_top_left_mb_part_indx_mod
+ + (PRED_8x8 << 1) + 1;
+
+ for(uc_blk = 0; uc_blk < u1_num_blks; uc_blk++)
+ {
+ partition_size = PRED_8x8;
+ pu1_top_lt_sub_mb_idx = gau1_ih264d_submb_indx_mod_sp_drct;
+ if(uc_direct8x8inf == 1)
+ {
+ u1_submb_col = u1_sub_mb_num | (u1_sub_mb_num >> 1);
+ mv_index = u2_sub_mb_ofst + u1_submb_col;
+ u1_num_sub_blks = 1;
+ }
+ else
+ {
+ /* colMbPart is either 8x8, 8x4, 4x8, 4x4 */
+ pu1_col_zero_flag = pu1_col_zero_flag_start + u1_sub_mb_num;
+ u1_col_sub_mb_pred_mode = *pu1_col_zero_flag;
+ u1_col_sub_mb_pred_mode = (u1_col_sub_mb_pred_mode & 0x30) >> 4;
+ partition_size = (UWORD8)((u1_col_sub_mb_pred_mode)
+ | (PRED_8x8 << 2));
+ mv_index = u2_sub_mb_ofst + u1_sub_mb_num;
+ pu1_top_lt_sub_mb_idx += (u1_col_sub_mb_pred_mode << 1);
+ u1_num_sub_blks = pu1_num_sub_mb_part[u1_col_sub_mb_pred_mode];
+
+ }
+
+ for(u1_sub_blk = 0; u1_sub_blk < u1_num_sub_blks;
+ u1_sub_blk++, pu1_top_lt_sub_mb_idx++)
+ {
+ u1_sub_mb_num += *pu1_top_lt_sub_mb_idx;
+ mv_index += *pu1_top_lt_sub_mb_idx;
+ ps_direct->i4_mv_indices[i1_num_partitions] = mv_index;
+ ps_direct->i1_submb_num[i1_num_partitions] = u1_sub_mb_num;
+ ps_direct->i1_partitionsize[i1_num_partitions] = partition_size;
+ i1_num_partitions++;
+ if(!uc_direct8x8inf)
+ u1_submb_col = u1_sub_mb_num;
+ if((pu1_col_zero_flag_start[u1_submb_col] & 1)
+ != u1_init_colzero_flag)
+ ps_direct->u1_col_zeroflag_change = 1;
+ }
+ u1_sub_mb_num = *pu1_top_lt_mb_part_idx++;
+ }
+ ps_direct->i1_num_partitions = i1_num_partitions;
+ }
+}
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_mbaff_cross_pmbair \endif
+ *
+ * \brief
+ * Initializes forward and backward refernce lists for B slice decoding.
+ *
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+void ih264d_mbaff_cross_pmbair(dec_struct_t *ps_dec,
+ struct pic_buffer_t *ps_col_pic,
+ directmv_t *ps_direct,
+ UWORD8 u1_wd_x,
+ WORD32 u2_sub_mb_ofst,
+ dec_mb_info_t * ps_cur_mb_info)
+{
+ UWORD8 *pu1_col_zero_flag_start, *pu1_col_zero_flag, u1_sub_mb_num,
+ uc_sub_mb_num_col;
+ UWORD8 *pu1_col_zero_flag_right_half;
+ WORD32 i4_force_8X8;
+ UWORD8 u1_num_blks, u1_col_mb_pred_mode, uc_blk, u1_col_sub_mb_pred_mode,
+ u1_col_sub_mb_pred_mode_rt;
+ UWORD8 i1_num_partitions = 0, partition_size;
+
+ WORD32 mv_index;
+
+ UWORD8 u1_num_sub_blks;
+ UWORD8 u1_is_cur_mb_fld, i;
+ UWORD8 u1_init_colzero_flag;
+
+ u1_is_cur_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag;
+ u1_sub_mb_num = ps_dec->u1_sub_mb_num;
+ ps_direct->u1_col_zeroflag_change = 0;
+ /*pu1_col_zero_flag_start = ps_col_pic->pu1_col_zero_flag + u2_sub_mb_ofst;
+ u1_col_mb_pred_mode = pu1_col_zero_flag_start[u1_sub_mb_num];
+ u1_init_colzero_flag = u1_col_mb_pred_mode & 1;
+ u1_col_mb_pred_mode >>= 6; */
+ if(0 == u1_is_cur_mb_fld)
+ {
+ ps_direct->u1_vert_mv_scale = FLD_TO_FRM;
+ if(u1_wd_x == MB_SIZE)
+ {
+ pu1_col_zero_flag_start = ps_col_pic->pu1_col_zero_flag
+ + u2_sub_mb_ofst;
+ u1_col_mb_pred_mode = pu1_col_zero_flag_start[0];
+ u1_init_colzero_flag = u1_col_mb_pred_mode & 1;
+ u1_col_mb_pred_mode >>= 6;
+
+
+ if(u1_col_mb_pred_mode & 0x2)
+ {
+ ps_dec->u1_currB_type = 1;
+ if(u1_col_mb_pred_mode == PRED_8x16)
+ {
+ ps_direct->i1_num_partitions = 2;
+ ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst;
+ ps_direct->i1_submb_num[0] = 0;
+ ps_direct->i1_partitionsize[0] = PRED_8x16;
+ ps_direct->i4_mv_indices[1] = u2_sub_mb_ofst + 2;
+ ps_direct->i1_submb_num[1] = 2;
+ ps_direct->i1_partitionsize[1] = PRED_8x16;
+ if((pu1_col_zero_flag_start[2] & 1) != u1_init_colzero_flag)
+ ps_direct->u1_col_zeroflag_change = 1;
+ }
+ else
+ {
+ pu1_col_zero_flag = pu1_col_zero_flag_start + u1_sub_mb_num;
+ u1_col_sub_mb_pred_mode = (*pu1_col_zero_flag & 0x10);/* 8x4 or 4x4 mode */
+
+ pu1_col_zero_flag_right_half = pu1_col_zero_flag_start
+ + u1_sub_mb_num + 2;
+ u1_col_sub_mb_pred_mode_rt =
+ (*pu1_col_zero_flag_right_half & 0x10);/* 8x4 or 4x4 mode */
+
+ i4_force_8X8 = (u1_col_sub_mb_pred_mode)
+ || (u1_col_sub_mb_pred_mode_rt);
+ if(i4_force_8X8)
+ {
+ u1_num_sub_blks = 2;
+ partition_size = PRED_8x8;
+ }
+ else
+ {
+ partition_size = PRED_8x16;
+ u1_num_sub_blks = 1;
+ }
+
+ for(i = 0; i < 2; i++)
+ {
+ for(uc_blk = 0; uc_blk < u1_num_sub_blks; uc_blk++)
+ {
+ uc_sub_mb_num_col = u1_sub_mb_num | (u1_sub_mb_num >> 1);
+ uc_sub_mb_num_col &= 0x7;
+ mv_index = u2_sub_mb_ofst + uc_sub_mb_num_col;
+
+ ps_direct->i4_mv_indices[i1_num_partitions] =
+ mv_index;
+ ps_direct->i1_submb_num[i1_num_partitions] =
+ u1_sub_mb_num;
+ ps_direct->i1_partitionsize[i1_num_partitions] =
+ partition_size;
+ i1_num_partitions++;
+ if((pu1_col_zero_flag_start[uc_sub_mb_num_col] & 1)
+ != u1_init_colzero_flag)
+ ps_direct->u1_col_zeroflag_change = 1;
+ u1_sub_mb_num += 8;
+ }
+ u1_sub_mb_num = 2; /* move to second half of Cur MB */
+ }
+ ps_direct->i1_num_partitions = i1_num_partitions;
+ return;
+ }
+ }
+ else
+ {
+ ps_direct->i1_num_partitions = 1;
+ ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst;
+ ps_direct->i1_submb_num[0] = 0;
+ ps_direct->i1_partitionsize[0] = PRED_16x16;
+ ps_dec->u1_currB_type = 0;
+ return;
+ }
+ }
+ else
+ {
+ uc_sub_mb_num_col = u1_sub_mb_num | (u1_sub_mb_num >> 1);
+ uc_sub_mb_num_col &= 0x7;
+
+ ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst + uc_sub_mb_num_col;
+ ps_direct->i1_submb_num[0] = u1_sub_mb_num;
+ ps_direct->i1_partitionsize[0] = PRED_8x8;
+ ps_direct->i1_num_partitions = 1;
+ }
+ }
+ else
+ {
+ ps_direct->u1_vert_mv_scale = FRM_TO_FLD;
+ pu1_col_zero_flag_start = ps_col_pic->pu1_col_zero_flag + u2_sub_mb_ofst;
+ u1_init_colzero_flag = pu1_col_zero_flag_start[0] & 1;
+
+ if(u1_wd_x == MB_SIZE)
+ {
+ UWORD8 u1_submb_col;
+ UWORD8 *puc_colZeroFlagStart_bot_mb, uc_colMbPredMode_bot_mb;
+
+ pu1_col_zero_flag_start = ps_col_pic->pu1_col_zero_flag
+ + u2_sub_mb_ofst;
+ u1_col_mb_pred_mode = pu1_col_zero_flag_start[u1_sub_mb_num] >> 6;
+
+ puc_colZeroFlagStart_bot_mb = ps_col_pic->pu1_col_zero_flag
+ + u2_sub_mb_ofst + 16;
+ uc_colMbPredMode_bot_mb = puc_colZeroFlagStart_bot_mb[8] >> 6;
+
+ i4_force_8X8 = (u1_col_mb_pred_mode & 0x2)
+ || (uc_colMbPredMode_bot_mb & 0x2);
+ if(i4_force_8X8)
+ {
+ u1_num_blks = 2;
+ partition_size = PRED_8x8;
+ }
+ else
+ {
+ u1_num_blks = 1;
+ partition_size = PRED_16x8;
+ }
+
+ ps_dec->u1_currB_type = 1;
+ /*As this mb is derived from 2 Mbs min no of partitions = 2*/
+ for(i = 0; i < 2; i++)
+ {
+
+ pu1_col_zero_flag_start = ps_col_pic->pu1_col_zero_flag
+ + u2_sub_mb_ofst;
+ u1_col_mb_pred_mode = pu1_col_zero_flag_start[u1_sub_mb_num] >> 6;
+
+ for(uc_blk = 0; uc_blk < u1_num_blks; uc_blk++)
+ {
+ u1_submb_col = (u1_sub_mb_num & 0x7) ? 1 : 0;
+ u1_submb_col += u1_sub_mb_num;
+ mv_index = u2_sub_mb_ofst + u1_submb_col;
+
+
+ ps_direct->i4_mv_indices[i1_num_partitions] = mv_index;
+ ps_direct->i1_submb_num[i1_num_partitions] = u1_sub_mb_num;
+ ps_direct->i1_partitionsize[i1_num_partitions] =
+ partition_size;
+ i1_num_partitions++;
+ if((pu1_col_zero_flag_start[u1_submb_col] & 1)
+ != u1_init_colzero_flag)
+ ps_direct->u1_col_zeroflag_change = 1;
+ u1_sub_mb_num += 2;
+ }
+ u1_sub_mb_num = 8; /* move to second half of Cur MB */
+ u2_sub_mb_ofst += 16;/* move to next Colocated MB */
+ }
+ ps_direct->i1_num_partitions = i1_num_partitions;
+ return;
+ }
+ else
+ {
+ uc_sub_mb_num_col = u1_sub_mb_num | (u1_sub_mb_num >> 1);
+ uc_sub_mb_num_col &= 0xb;
+ u2_sub_mb_ofst += (u1_sub_mb_num >> 3) ? 16 : 0;
+
+ ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst + uc_sub_mb_num_col;
+ ps_direct->i1_submb_num[0] = u1_sub_mb_num;
+ ps_direct->i1_partitionsize[0] = PRED_8x8;
+ ps_direct->i1_num_partitions = 1;
+ return;
+ }
+ }
+}
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_cal_col_pic \endif
+ *
+ * \brief
+ * Finds the colocated picture.
+ *
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+WORD32 ih264d_cal_col_pic(dec_struct_t *ps_dec)
+{
+ struct pic_buffer_t* ps_col_pic = ps_dec->ps_col_pic;
+ UWORD8 uc_curpictype, uc_colpictype;
+ ps_col_pic = ps_dec->ps_ref_pic_buf_lx[1][0];
+ uc_curpictype = (ps_dec->ps_cur_pic->u1_picturetype & 0x7);
+ uc_colpictype = (ps_col_pic->u1_picturetype & 0x7);
+ if(uc_curpictype == FRM_PIC)
+ {
+ if(uc_colpictype == FRM_PIC)
+ ps_dec->pf_parse_mvdirect = ih264d_one_to_one;
+ else if(uc_colpictype == COMP_FLD_PAIR)
+ {
+ ps_dec->pf_parse_mvdirect = ih264d_fld_to_frm;
+ if(ps_col_pic->i4_top_field_order_cnt
+ >= ps_col_pic->i4_bottom_field_order_cnt)
+ {
+ struct pic_buffer_t* ps_tempPic = ps_col_pic;
+ UWORD32 ui_half_num_of_sub_mbs = ((ps_dec->u2_pic_ht
+ * ps_dec->u2_pic_wd) >> 5);
+ ps_col_pic = ps_dec->ps_ref_pic_buf_lx[1][MAX_REF_BUFS];
+ /* memcpy ps_tempPic to ps_col_pic */
+ *ps_col_pic = *ps_tempPic;
+ ps_col_pic->pu1_buf1 = ps_tempPic->pu1_buf1
+ + ps_tempPic->u2_frm_wd_y;
+ ps_col_pic->pu1_buf2 = ps_tempPic->pu1_buf2
+ + ps_tempPic->u2_frm_wd_uv;
+ ps_col_pic->pu1_buf3 = ps_tempPic->pu1_buf3
+ + ps_tempPic->u2_frm_wd_uv;
+ ps_col_pic->pu1_col_zero_flag = ps_tempPic->pu1_col_zero_flag
+ + ui_half_num_of_sub_mbs;
+ ps_col_pic->ps_mv = ps_tempPic->ps_mv + ui_half_num_of_sub_mbs;
+
+
+ ps_col_pic->u1_pic_type = 0;/*complementary reference field pair-refering as frame */
+
+
+
+ }
+ }
+ else
+ {
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_DBP_MANAGER_T;
+// i4_error_code |= 1<<IVD_CORRUPTEDDATA;
+ return i4_error_code;
+ }
+ }
+ else if(uc_curpictype == AFRM_PIC)
+ {
+ ps_dec->pf_parse_mvdirect = ih264d_fld_to_mbaff;
+ }
+ else /* must be a field*/
+ {
+ if(uc_colpictype == FRM_PIC)
+ ps_dec->pf_parse_mvdirect = ih264d_frm_to_fld;
+ else if(uc_colpictype == AFRM_PIC)
+ ps_dec->pf_parse_mvdirect = ih264d_mbaff_to_fld;
+ else
+ ps_dec->pf_parse_mvdirect = ih264d_one_to_one;
+ }
+ ps_dec->ps_col_pic = ps_col_pic;
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_frm_to_fld \endif
+ *
+ * \brief
+ * Initializes forward and backward refernce lists for B slice decoding.
+ *
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+void ih264d_frm_to_fld(dec_struct_t *ps_dec,
+ struct pic_buffer_t *ps_col_pic,
+ directmv_t *ps_direct,
+ UWORD8 u1_wd_x,
+ WORD32 u2_sub_mb_ofst,
+ dec_mb_info_t * ps_cur_mb_info)
+{
+ UWORD8 *pu1_col_zero_flag_start, u1_sub_mb_num;
+ UWORD8 u1_num_blks, u1_col_mb_pred_mode, uc_blk;
+ UWORD8 i1_num_partitions = 0, partition_size, i;
+ WORD32 mv_index;
+ UWORD32 increment;
+ WORD32 i4_force_8X8;
+ UNUSED(ps_cur_mb_info);
+ ps_direct->u1_col_zeroflag_change = 1;
+ ps_direct->u1_vert_mv_scale = FRM_TO_FLD;
+ u1_sub_mb_num = ps_dec->u1_sub_mb_num;
+
+ /* new calculation specific to this function */
+ if((ps_col_pic->u1_picturetype & 0x7) == FRM_PIC)
+ {
+ UWORD16 u2_frm_wd_in_mbs = ps_dec->u2_frm_wd_in_mbs;
+ increment = (u2_frm_wd_in_mbs << 4);
+ /*mbAddrCol = mbAddrCol1 */
+ u2_sub_mb_ofst = (ps_dec->u2_mbx
+ + (2 * ps_dec->u2_mby * u2_frm_wd_in_mbs)) << 4;
+ }
+ else
+ increment = 16;
+
+ if(u1_wd_x == MB_SIZE)
+ {
+ ps_dec->u1_currB_type = 1;
+
+ {
+ UWORD8 *puc_colZeroFlagStart_bot_mb, uc_colMbPredMode_bot_mb;
+
+ pu1_col_zero_flag_start = ps_col_pic->pu1_col_zero_flag
+ + u2_sub_mb_ofst;
+ u1_col_mb_pred_mode = (*pu1_col_zero_flag_start >> 6);
+
+ puc_colZeroFlagStart_bot_mb = ps_col_pic->pu1_col_zero_flag
+ + u2_sub_mb_ofst + increment;
+ uc_colMbPredMode_bot_mb = (*puc_colZeroFlagStart_bot_mb >> 6);
+
+ i4_force_8X8 = (u1_col_mb_pred_mode & 0x2)
+ || (uc_colMbPredMode_bot_mb & 0x2);
+
+ if(i4_force_8X8)
+ {
+ u1_num_blks = 2;
+ partition_size = PRED_8x8;
+ }
+ else
+ {
+ partition_size = PRED_16x8;
+ u1_num_blks = 1;
+ }
+ }
+
+ /*As this mb is derived from 2 Mbs, min no of partitions = 2*/
+ for(i = 0; i < 2; i++)
+ {
+ for(uc_blk = 0; uc_blk < u1_num_blks; uc_blk++)
+ {
+ mv_index = u2_sub_mb_ofst + u1_sub_mb_num;
+ mv_index += (u1_sub_mb_num & 0x7) ? 1 : 0;
+
+ ps_direct->i4_mv_indices[i1_num_partitions] = mv_index;
+ ps_direct->i1_submb_num[i1_num_partitions] = u1_sub_mb_num;
+ ps_direct->i1_partitionsize[i1_num_partitions] = partition_size;
+ i1_num_partitions++;
+
+ u1_sub_mb_num += 2;
+ }
+ u1_sub_mb_num = 8; /* move to second half of Cur MB */
+ u2_sub_mb_ofst += increment;/* move to next Colocated MB */
+ }
+ ps_direct->i1_num_partitions = i1_num_partitions;
+ return;
+ }
+ else
+ {
+ UWORD8 u1_sub_mb_num_col;
+ u1_sub_mb_num_col = u1_sub_mb_num | (u1_sub_mb_num >> 1);
+ u1_sub_mb_num_col &= 0xb;
+ u2_sub_mb_ofst += (u1_sub_mb_num >> 3) ? increment : 0;
+
+ ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst + u1_sub_mb_num_col;
+ ps_direct->i1_submb_num[0] = u1_sub_mb_num;
+ ps_direct->i1_partitionsize[0] = PRED_8x8;
+ ps_direct->i1_num_partitions = 1;
+ return;
+ }
+}
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_fld_to_frm \endif
+ *
+ * \brief
+ * Initializes forward and backward refernce lists for B slice decoding.
+ *
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+void ih264d_fld_to_frm(dec_struct_t *ps_dec,
+ struct pic_buffer_t *ps_col_pic,
+ directmv_t *ps_direct,
+ UWORD8 u1_wd_x,
+ WORD32 u2_sub_mb_ofst,
+ dec_mb_info_t * ps_cur_mb_info)
+{
+ UWORD8 *pu1_col_zero_flag_start, *pu1_col_zero_flag,
+ *pu1_col_zero_flag_right_half, u1_sub_mb_num, uc_sub_mb_num_col;
+ UWORD8 u1_col_mb_pred_mode, uc_blk;
+ WORD32 i4_force_8X8;
+
+ UNUSED(ps_cur_mb_info);
+ ps_direct->u1_vert_mv_scale = FLD_TO_FRM;
+ ps_direct->u1_col_zeroflag_change = 1;
+ /* new calculation specific to this function for u2_sub_mb_ofst*/
+ u2_sub_mb_ofst = (ps_dec->u2_mbx
+ + ((ps_dec->u2_mby >> 1) * ps_dec->u2_frm_wd_in_mbs)) << 4;
+ u2_sub_mb_ofst += ((ps_dec->u2_mby & 1) << 3);
+
+ if(u1_wd_x == MB_SIZE)
+ {
+ pu1_col_zero_flag_start = ps_col_pic->pu1_col_zero_flag + u2_sub_mb_ofst;
+ u1_col_mb_pred_mode = (*pu1_col_zero_flag_start >> 6);
+ ps_dec->u1_currB_type = (!!u1_col_mb_pred_mode);
+
+ if(u1_col_mb_pred_mode & 0x2)
+ {
+ if(u1_col_mb_pred_mode == PRED_8x16)
+ {
+ ps_direct->i1_num_partitions = 2;
+ ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst;
+ ps_direct->i1_submb_num[0] = 0;
+ ps_direct->i1_partitionsize[0] = PRED_8x16;
+ ps_direct->i4_mv_indices[1] = u2_sub_mb_ofst + 2;
+ ps_direct->i1_submb_num[1] = 2;
+ ps_direct->i1_partitionsize[1] = PRED_8x16;
+ }
+ else
+ {
+ UWORD8 i1_num_partitions = 0, partition_size;
+ UWORD32 mv_index;
+ UWORD8 u1_num_sub_blks, i, u1_col_sub_mb_pred_mode,
+ u1_col_sub_mb_pred_mode_rt;
+
+ u1_sub_mb_num = ps_dec->u1_sub_mb_num;
+
+ pu1_col_zero_flag = pu1_col_zero_flag_start + u1_sub_mb_num;
+ u1_col_sub_mb_pred_mode = (*pu1_col_zero_flag & 0x10);/* 8x4 or 4x4 mode */
+
+ pu1_col_zero_flag_right_half = pu1_col_zero_flag_start + u1_sub_mb_num
+ + 2;
+ u1_col_sub_mb_pred_mode_rt = (*pu1_col_zero_flag_right_half
+ & 0x10);/* 8x4 or 4x4 mode */
+
+ i4_force_8X8 = (u1_col_sub_mb_pred_mode)
+ || (u1_col_sub_mb_pred_mode_rt);
+ if(i4_force_8X8)
+ {
+ u1_num_sub_blks = 2;
+ partition_size = PRED_8x8;
+ }
+ else
+ {
+ partition_size = PRED_8x16;
+ u1_num_sub_blks = 1;
+ }
+
+ for(i = 0; i < 2; i++)
+ {
+ for(uc_blk = 0; uc_blk < u1_num_sub_blks; uc_blk++)
+ {
+ uc_sub_mb_num_col = u1_sub_mb_num | (u1_sub_mb_num >> 1);
+ uc_sub_mb_num_col &= 0x7;
+ mv_index = u2_sub_mb_ofst + uc_sub_mb_num_col;
+
+ ps_direct->i4_mv_indices[i1_num_partitions] = mv_index;
+ ps_direct->i1_submb_num[i1_num_partitions] =
+ u1_sub_mb_num;
+ ps_direct->i1_partitionsize[i1_num_partitions] =
+ partition_size;
+ i1_num_partitions++;
+ u1_sub_mb_num += 8;
+ }
+
+ u1_sub_mb_num = 2; /* move to second half of Cur MB */
+
+ }
+ ps_direct->i1_num_partitions = i1_num_partitions;
+ return;
+ }
+ }
+ else
+ {
+ ps_direct->i1_num_partitions = 1;
+ ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst;
+ ps_direct->i1_submb_num[0] = 0;
+ ps_direct->i1_partitionsize[0] = PRED_16x16;
+ return;
+ }
+ }
+ else
+ {
+ u1_sub_mb_num = ps_dec->u1_sub_mb_num;
+ uc_sub_mb_num_col = u1_sub_mb_num | (u1_sub_mb_num >> 1);
+ uc_sub_mb_num_col &= 0x7;
+
+ ps_direct->i4_mv_indices[0] = u2_sub_mb_ofst + uc_sub_mb_num_col;
+ ps_direct->i1_submb_num[0] = u1_sub_mb_num;
+ ps_direct->i1_partitionsize[0] = PRED_8x8;
+ ps_direct->i1_num_partitions = 1;
+ }
+}
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_one_to_one \endif
+ *
+ * \brief
+ * Initializes forward and backward refernce lists for B slice decoding.
+ *
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+void ih264d_mbaff_to_fld(dec_struct_t *ps_dec,
+ struct pic_buffer_t *ps_col_pic,
+ directmv_t *ps_direct,
+ UWORD8 u1_wd_x,
+ WORD32 u2_sub_mb_ofst,
+ dec_mb_info_t * ps_cur_mb_info)
+{
+ UWORD8* pu1_col_zero_flag, u1_iscol_mb_fld;
+ u2_sub_mb_ofst <<= 1;
+ pu1_col_zero_flag = ps_col_pic->pu1_col_zero_flag + u2_sub_mb_ofst;
+ u1_iscol_mb_fld = (*pu1_col_zero_flag & 0x2) >> 1;
+ if(u1_iscol_mb_fld)
+ {
+ u2_sub_mb_ofst += (ps_dec->ps_cur_slice->u1_bottom_field_flag << 4);
+ ih264d_one_to_one(ps_dec, ps_col_pic, ps_direct, u1_wd_x,
+ u2_sub_mb_ofst, ps_cur_mb_info);
+ }
+ else
+ ih264d_frm_to_fld(ps_dec, ps_col_pic, ps_direct, u1_wd_x,
+ u2_sub_mb_ofst, ps_cur_mb_info);
+}
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_one_to_one \endif
+ *
+ * \brief
+ * Initializes forward and backward refernce lists for B slice decoding.
+ *
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+void ih264d_fld_to_mbaff(dec_struct_t *ps_dec,
+ struct pic_buffer_t *ps_col_pic,
+ directmv_t *ps_direct,
+ UWORD8 u1_wd_x,
+ WORD32 u2_sub_mb_ofst,
+ dec_mb_info_t * ps_cur_mb_info)
+{
+ if((ps_col_pic->u1_picturetype & 0x7) == COMP_FLD_PAIR)
+ {
+ /* first calculate the colocated picture which varies with Mb */
+ UWORD8 u1_is_cur_mb_fld;
+ u1_is_cur_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag;
+ u2_sub_mb_ofst = (u2_sub_mb_ofst & 0xffe0); /* mbaddrCol5 = curmbaddr/2;*/
+ u2_sub_mb_ofst >>= 1;
+
+ ps_col_pic = ps_dec->ps_ref_pic_buf_lx[1][0];
+ if(u1_is_cur_mb_fld)
+ {
+ if(1 - ps_cur_mb_info->u1_topmb)
+ ps_col_pic = ps_dec->ps_ref_pic_buf_lx[1][MAX_REF_BUFS];
+
+ ih264d_one_to_one(ps_dec, ps_col_pic, ps_direct, u1_wd_x,
+ u2_sub_mb_ofst, ps_cur_mb_info);
+ }
+ else
+ {
+
+ if(ABS(ps_col_pic->i4_top_field_order_cnt
+ - ps_dec->ps_cur_pic->i4_poc) >=
+ ABS(ps_dec->ps_cur_pic->i4_poc - ps_col_pic->i4_bottom_field_order_cnt))
+ {
+ ps_col_pic = ps_dec->ps_ref_pic_buf_lx[1][MAX_REF_BUFS];
+ }
+
+ if(ps_cur_mb_info->u1_topmb == 0)
+ u2_sub_mb_ofst += 8;
+ ih264d_mbaff_cross_pmbair(ps_dec, ps_col_pic, ps_direct, u1_wd_x,
+ u2_sub_mb_ofst, ps_cur_mb_info);
+ }
+ ps_dec->ps_col_pic = ps_col_pic;
+ }
+ else
+ {
+ UWORD8* pu1_col_zero_flag = ps_col_pic->pu1_col_zero_flag
+ + u2_sub_mb_ofst;
+ UWORD8 temp, u1_is_cur_mb_fld, u1_iscol_mb_fld;
+
+ u1_iscol_mb_fld = (*pu1_col_zero_flag & 0x2) >> 1;
+ u1_is_cur_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag;
+ temp = (u1_iscol_mb_fld ^ u1_is_cur_mb_fld);
+
+ if(temp == 0)
+ ih264d_one_to_one(ps_dec, ps_col_pic, ps_direct, u1_wd_x,
+ u2_sub_mb_ofst, ps_cur_mb_info);
+ else
+ {
+ u2_sub_mb_ofst &= 0xffef;
+ if(u1_is_cur_mb_fld == 0)
+ {
+ if(ABS(ps_col_pic->i4_top_field_order_cnt
+ - ps_dec->ps_cur_pic->i4_poc) >=
+ ABS(ps_dec->ps_cur_pic->i4_poc - ps_col_pic->i4_bottom_field_order_cnt))
+ {
+ u2_sub_mb_ofst += 0x10;
+ }
+ if(ps_cur_mb_info->u1_topmb == 0)
+ u2_sub_mb_ofst += 8;
+ }
+ ih264d_mbaff_cross_pmbair(ps_dec, ps_col_pic, ps_direct, u1_wd_x,
+ u2_sub_mb_ofst, ps_cur_mb_info);
+ }
+ }
+}
+
diff --git a/decoder/ih264d_process_bslice.h b/decoder/ih264d_process_bslice.h
new file mode 100755
index 0000000..5aa76e3
--- /dev/null
+++ b/decoder/ih264d_process_bslice.h
@@ -0,0 +1,108 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef _IH264D_PARSE_BSLICE_H_
+#define _IH264D_PARSE_BSLICE_H_
+/*!
+**************************************************************************
+* \file ih264d_process_bslice.h
+*
+* \brief
+* Contains declarations of routines that decode a B slice type
+*
+* Detailed_description
+*
+* \date
+* 21/12/2002
+*
+* \author NS
+**************************************************************************
+*/
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_structs.h"
+WORD32 ih264d_parse_bslice(dec_struct_t * ps_dec,
+ UWORD16 u2_first_mb_in_slice);
+WORD32 ih264d_decode_spatial_direct(dec_struct_t * ps_dec,
+ UWORD8 u1_wd_x,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num);
+WORD32 ih264d_decode_temporal_direct(dec_struct_t * ps_dec,
+ UWORD8 u1_wd_x,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num);
+WORD32 parseBSliceData(dec_struct_t * ps_dec,
+ dec_slice_params_t * ps_slice,
+ UWORD16 u2_first_mb_in_slice);
+WORD32 parseBSliceData(dec_struct_t * ps_dec,
+ dec_slice_params_t * ps_slice,
+ UWORD16 u2_first_mb_in_slice);
+
+void ih264d_init_ref_idx_lx_b(dec_struct_t *ps_dec);
+
+void ih264d_convert_frm_to_fld_list(struct pic_buffer_t *ps_ref_pic_buf_lx,
+ UWORD8 *pu1_L0,
+ dec_struct_t *ps_dec,
+ UWORD8 u1_num_short_term_bufs);
+
+void ih264d_convert_frm_mbaff_list(dec_struct_t *ps_dec);
+void ih264d_one_to_one(dec_struct_t *ps_dec,
+ struct pic_buffer_t *ps_col_pic,
+ directmv_t *ps_direct,
+ UWORD8 u1_wd_x,
+ WORD32 u2_sub_mb_ofst,
+ dec_mb_info_t * ps_cur_mb_info);
+void ih264d_mbaff_cross_pmbair(dec_struct_t *ps_dec,
+ struct pic_buffer_t *ps_col_pic,
+ directmv_t *ps_direct,
+ UWORD8 u1_wd_x,
+ WORD32 u2_sub_mb_ofst,
+ dec_mb_info_t * ps_cur_mb_info);
+void ih264d_frm_to_fld(dec_struct_t *ps_dec,
+ struct pic_buffer_t *ps_col_pic,
+ directmv_t *ps_direct,
+ UWORD8 u1_wd_x,
+ WORD32 u2_sub_mb_ofst,
+ dec_mb_info_t * ps_cur_mb_info);
+void ih264d_fld_to_frm(dec_struct_t *ps_dec,
+ struct pic_buffer_t *ps_col_pic,
+ directmv_t *ps_direct,
+ UWORD8 u1_wd_x,
+ WORD32 u2_sub_mb_ofst,
+ dec_mb_info_t * ps_cur_mb_info);
+void ih264d_mbaff_to_fld(dec_struct_t *ps_dec,
+ struct pic_buffer_t *ps_col_pic,
+ directmv_t *ps_direct,
+ UWORD8 u1_wd_x,
+ WORD32 u2_sub_mb_ofst,
+ dec_mb_info_t * ps_cur_mb_info);
+void ih264d_fld_to_mbaff(dec_struct_t *ps_dec,
+ struct pic_buffer_t *ps_col_pic,
+ directmv_t *ps_direct,
+ UWORD8 u1_wd_x,
+ WORD32 u2_sub_mb_ofst,
+ dec_mb_info_t * ps_cur_mb_info);
+WORD32 ih264d_cal_col_pic(dec_struct_t *ps_dec);
+
+WORD32 ih264d_mv_pred_ref_tfr_nby2_bmb(dec_struct_t * ps_dec,
+ UWORD8 u1_num_mbs,
+ UWORD8 u1_num_mbsNby2);
+
+#endif /* _IH264D_PARSE_BSLICE_H_ */
diff --git a/decoder/ih264d_process_intra_mb.c b/decoder/ih264d_process_intra_mb.c
new file mode 100755
index 0000000..96006ce
--- /dev/null
+++ b/decoder/ih264d_process_intra_mb.c
@@ -0,0 +1,2006 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+ **************************************************************************
+ * \file ih264d_process_intra_mb.c
+ *
+ * \brief
+ * Contains routines that decode a I slice type
+ *
+ * Detailed_description
+ *
+ * \date
+ * 07/07/2003
+ *
+ * \author NS
+ **************************************************************************
+ */
+
+#include <string.h>
+#include "ih264d_bitstrm.h"
+#include "ih264d_defs.h"
+#include "ih264d_debug.h"
+#include "ih264d_tables.h"
+#include "ih264d_structs.h"
+#include "ih264d_defs.h"
+#include "ih264d_parse_cavlc.h"
+#include "ih264d_mb_utils.h"
+#include "ih264d_parse_slice.h"
+#include "ih264d_process_intra_mb.h"
+#include "ih264d_error_handler.h"
+#include "ih264d_quant_scaling.h"
+#include "ih264d_tables.h"
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_itrans_recon_luma_dc \endif
+ *
+ * \brief
+ * This function does InvTransform, scaling and reconstruction of Luma DC.
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+void ih264d_itrans_recon_luma_dc(dec_struct_t *ps_dec,
+ WORD16* pi2_src,
+ WORD16* pi2_coeff_block,
+ const UWORD16 *pu2_weigh_mat)
+{
+ WORD32 i;
+ WORD16 pi2_out[16];
+ WORD32 pi4_tmp[16];
+ WORD16 *pi2_out_ptr = &pi2_out[0];
+ PROFILE_DISABLE_IQ_IT_RECON_RETURN()
+ ps_dec->pf_ihadamard_scaling_4x4(pi2_src, pi2_out,
+ ps_dec->pu2_quant_scale_y, pu2_weigh_mat,
+ ps_dec->u1_qp_y_div6, pi4_tmp);
+ for(i = 0; i < 4; i++)
+ {
+ pi2_coeff_block[0] = pi2_out_ptr[0];
+ pi2_coeff_block[4 * 16] = pi2_out_ptr[4];
+ pi2_coeff_block[8 * 16] = pi2_out_ptr[8];
+ pi2_coeff_block[12 * 16] = pi2_out_ptr[12];
+
+ pi2_out_ptr++; /* Point to next column */
+ pi2_coeff_block += 16;
+ }
+}
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_read_intra_pred_modes \endif
+ *
+ * \brief
+ * Reads the intra pred mode related values of I4x4 MB from bitstream.
+ *
+ * This function will read the prev intra pred mode flags and
+ * stores it in pu1_prev_intra4x4_pred_mode_flag. If the u4_flag
+ * indicates that most probable mode is not intra pred mode, then
+ * the rem_intra4x4_pred_mode is read and stored in
+ * pu1_rem_intra4x4_pred_mode array.
+ *
+ *
+ * \return
+ * 0 on success and Error code otherwise
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_read_intra_pred_modes(dec_struct_t * ps_dec,
+ UWORD8 * pu1_prev_intra4x4_pred_mode_flag,
+ UWORD8 * pu1_rem_intra4x4_pred_mode,
+ UWORD32 u4_trans_form8x8)
+{
+ WORD32 i4x4_luma_blk_idx = 0, i8x8_luma_blk_idx = 0;
+
+ dec_bit_stream_t * ps_bitstrm = ps_dec->ps_bitstrm;
+
+ if(!u4_trans_form8x8)
+ {
+ for(i4x4_luma_blk_idx = 0; i4x4_luma_blk_idx < 16; ++i4x4_luma_blk_idx)
+ {
+ UWORD32 u4_temp;
+ SWITCHOFFTRACE;
+
+ GETBIT(u4_temp, ps_bitstrm->u4_ofst, ps_bitstrm->pu4_buffer);
+ *pu1_prev_intra4x4_pred_mode_flag = (UWORD8)u4_temp;
+ if(!(*pu1_prev_intra4x4_pred_mode_flag))
+ {
+ GETBITS(u4_temp, ps_bitstrm->u4_ofst, ps_bitstrm->pu4_buffer, 3);
+
+ *(pu1_rem_intra4x4_pred_mode) = (UWORD8)u4_temp;
+ }
+
+ pu1_prev_intra4x4_pred_mode_flag++;
+ pu1_rem_intra4x4_pred_mode++;
+ }
+ }
+ else
+ {
+ /**********************************************************************/
+ /* prev_intra4x4_pred_modes to be interpreted as */
+ /* prev_intra8x8_pred_modes in case of transform 8x8 */
+ /**********************************************************************/
+ for(i8x8_luma_blk_idx = 0; i8x8_luma_blk_idx < 4; i8x8_luma_blk_idx++)
+ {
+ UWORD32 u4_temp;
+ GETBIT(u4_temp, ps_bitstrm->u4_ofst, ps_bitstrm->pu4_buffer);
+ *pu1_prev_intra4x4_pred_mode_flag = (UWORD8)u4_temp;
+ if(!(*pu1_prev_intra4x4_pred_mode_flag))
+ {
+ GETBITS(u4_temp, ps_bitstrm->u4_ofst, ps_bitstrm->pu4_buffer, 3);
+
+ (*pu1_rem_intra4x4_pred_mode) = (UWORD8)u4_temp;
+ }
+ pu1_prev_intra4x4_pred_mode_flag++;
+ pu1_rem_intra4x4_pred_mode++;
+ }
+ }
+ return (0);
+}
+WORD32 ih264d_unpack_coeff4x4_4x4blk(dec_struct_t * ps_dec,
+ WORD16 *pi2_out_coeff_data,
+ UWORD8 *pu1_inv_scan)
+{
+ tu_sblk4x4_coeff_data_t *ps_tu_4x4 = (tu_sblk4x4_coeff_data_t *)ps_dec->pv_proc_tu_coeff_data;
+ UWORD16 u2_sig_coeff_map = ps_tu_4x4->u2_sig_coeff_map;
+ WORD32 idx = 0;
+ WORD16 *pi2_coeff_data = &ps_tu_4x4->ai2_level[0];
+ WORD32 dc_only_flag = 0;
+ WORD32 num_coeff = 0;
+
+ PROFILE_DISABLE_UNPACK_LUMA()
+ while(u2_sig_coeff_map)
+ {
+ idx = CLZ(u2_sig_coeff_map);
+
+ idx = 31 - idx;
+ RESET_BIT(u2_sig_coeff_map,idx);
+
+ idx = pu1_inv_scan[idx];
+ pi2_out_coeff_data[idx] = *pi2_coeff_data++;
+ num_coeff++;
+ }
+
+ if((num_coeff == 1) && (idx == 0))
+ {
+ dc_only_flag = 1;
+ }
+
+ {
+ WORD32 offset;
+ offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_4x4;
+ offset = ALIGN4(offset);
+ ps_dec->pv_proc_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_proc_tu_coeff_data + offset);
+ }
+
+ return dc_only_flag;
+}
+
+UWORD32 ih264d_unpack_coeff4x4_8x8blk(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD16 ui2_luma_csbp,
+ WORD16 *pi2_out_coeff_data)
+{
+ UWORD8 *pu1_inv_scan;
+ UWORD8 u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag;
+ UWORD8 u1_field_coding_flag = ps_cur_mb_info->ps_curmb->u1_mb_fld;
+ UWORD32 u4_luma_dc_only_csbp = 0;
+ WORD32 dc_only_flag = 0;
+
+ PROFILE_DISABLE_UNPACK_LUMA()
+ if(u1_field_coding_flag || u1_mb_field_decoding_flag)
+ {
+ pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan_fld;
+ }
+ else
+ {
+ pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan;
+ }
+
+ // sub 0
+ if(ui2_luma_csbp & 0x1)
+ {
+ memset(pi2_out_coeff_data,0,16*sizeof(WORD16));
+ dc_only_flag = ih264d_unpack_coeff4x4_4x4blk(ps_dec,
+ pi2_out_coeff_data,
+ pu1_inv_scan);
+
+ INSERT_BIT(u4_luma_dc_only_csbp, 0, dc_only_flag);
+ }
+
+ pi2_out_coeff_data += 16;
+ // sub 1
+ if(ui2_luma_csbp & 0x2)
+ {
+ memset(pi2_out_coeff_data,0,16*sizeof(WORD16));
+ dc_only_flag = ih264d_unpack_coeff4x4_4x4blk(ps_dec,
+ pi2_out_coeff_data,
+ pu1_inv_scan);
+ INSERT_BIT(u4_luma_dc_only_csbp, 1, dc_only_flag);
+ }
+
+ pi2_out_coeff_data += 16 + 32;
+ // sub 2
+ if(ui2_luma_csbp & 0x10)
+ {
+ memset(pi2_out_coeff_data,0,16*sizeof(WORD16));
+ dc_only_flag = ih264d_unpack_coeff4x4_4x4blk(ps_dec,
+ pi2_out_coeff_data,
+ pu1_inv_scan);
+ INSERT_BIT(u4_luma_dc_only_csbp, 4, dc_only_flag);
+ }
+
+ pi2_out_coeff_data += 16;
+ // sub 3
+ if(ui2_luma_csbp & 0x20)
+ {
+ memset(pi2_out_coeff_data,0,16*sizeof(WORD16));
+ dc_only_flag = ih264d_unpack_coeff4x4_4x4blk(ps_dec,
+ pi2_out_coeff_data,
+ pu1_inv_scan);
+ INSERT_BIT(u4_luma_dc_only_csbp, 5, dc_only_flag);
+ }
+ return u4_luma_dc_only_csbp;
+}
+WORD32 ih264d_unpack_coeff8x8_8x8blk_cavlc(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD16 ui2_luma_csbp,
+ WORD16 *pi2_out_coeff_data)
+{
+ UWORD8 *pu1_inv_scan;
+ UWORD8 u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag;
+ UWORD8 u1_field_coding_flag = ps_cur_mb_info->ps_curmb->u1_mb_fld;
+ WORD32 dc_only_flag = 0;
+
+ PROFILE_DISABLE_UNPACK_LUMA()
+ if(ui2_luma_csbp & 0x33)
+ {
+ memset(pi2_out_coeff_data,0,64*sizeof(WORD16));
+ }
+
+ if(!u1_mb_field_decoding_flag)
+ {
+ pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[0];
+ }
+ else
+ {
+ pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[0];
+ }
+ // sub 0
+ if(ui2_luma_csbp & 0x1)
+ {
+ dc_only_flag = ih264d_unpack_coeff4x4_4x4blk(ps_dec,
+ pi2_out_coeff_data,
+ pu1_inv_scan);
+ }
+
+ if(!u1_mb_field_decoding_flag)
+ {
+ pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[1];
+ }
+ else
+ {
+ pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[1];
+ }
+ // sub 1
+ if(ui2_luma_csbp & 0x2)
+ {
+ dc_only_flag = 0;
+ ih264d_unpack_coeff4x4_4x4blk(ps_dec,
+ pi2_out_coeff_data,
+ pu1_inv_scan);
+ }
+
+ if(!u1_mb_field_decoding_flag)
+ {
+ pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[2];
+ }
+ else
+ {
+ pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[2];
+ }
+ // sub 2
+ if(ui2_luma_csbp & 0x10)
+ {
+ dc_only_flag = 0;
+ ih264d_unpack_coeff4x4_4x4blk(ps_dec,
+ pi2_out_coeff_data,
+ pu1_inv_scan);
+ }
+
+ if(!u1_mb_field_decoding_flag)
+ {
+ pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_prog8x8_cavlc[3];
+ }
+ else
+ {
+ pu1_inv_scan =
+ (UWORD8*)gau1_ih264d_inv_scan_int8x8_cavlc[3];
+ }
+ // sub 3
+ if(ui2_luma_csbp & 0x20)
+ {
+ dc_only_flag = 0;
+ ih264d_unpack_coeff4x4_4x4blk(ps_dec,
+ pi2_out_coeff_data,
+ pu1_inv_scan);
+ }
+ return dc_only_flag;
+}
+void ih264d_unpack_coeff4x4_8x8blk_chroma(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD16 ui2_chroma_csbp,
+ WORD16 *pi2_out_coeff_data)
+{
+ UWORD8 *pu1_inv_scan;
+ UWORD8 u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag;
+ UWORD8 u1_field_coding_flag = ps_cur_mb_info->ps_curmb->u1_mb_fld;
+
+ PROFILE_DISABLE_UNPACK_CHROMA()
+ if(u1_field_coding_flag || u1_mb_field_decoding_flag)
+ {
+ pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan_fld;
+ }
+ else
+ {
+ pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan;
+ }
+
+ if(ui2_chroma_csbp & 0x1)
+ {
+ memset(pi2_out_coeff_data,0,16*sizeof(WORD16));
+ ih264d_unpack_coeff4x4_4x4blk(ps_dec,
+ pi2_out_coeff_data,
+ pu1_inv_scan);
+ }
+ pi2_out_coeff_data += 16;
+ if(ui2_chroma_csbp & 0x2)
+ {
+ memset(pi2_out_coeff_data,0,16*sizeof(WORD16));
+ ih264d_unpack_coeff4x4_4x4blk(ps_dec,
+ pi2_out_coeff_data,
+ pu1_inv_scan);
+ }
+
+ pi2_out_coeff_data += 16;
+ if(ui2_chroma_csbp & 0x4)
+ {
+ memset(pi2_out_coeff_data,0,16*sizeof(WORD16));
+ ih264d_unpack_coeff4x4_4x4blk(ps_dec,
+ pi2_out_coeff_data,
+ pu1_inv_scan);
+ }
+
+ pi2_out_coeff_data += 16;
+ if(ui2_chroma_csbp & 0x8)
+ {
+ memset(pi2_out_coeff_data,0,16*sizeof(WORD16));
+ ih264d_unpack_coeff4x4_4x4blk(ps_dec,
+ pi2_out_coeff_data,
+ pu1_inv_scan);
+ }
+}
+UWORD32 ih264d_unpack_luma_coeff4x4_mb(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 intra_flag)
+{
+ UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type;
+ UWORD16 ui2_luma_csbp = ps_cur_mb_info->u2_luma_csbp;
+ UWORD8 *pu1_inv_scan = ps_dec->pu1_inv_scan;
+ WORD16 *pi2_coeff_data = ps_dec->pi2_coeff_data;
+
+ PROFILE_DISABLE_UNPACK_LUMA()
+ if(!ps_cur_mb_info->u1_tran_form8x8)
+ {
+ UWORD32 u4_luma_dc_only_csbp = 0;
+ UWORD32 u4_temp = 0;
+ WORD16* pi2_dc_val = NULL;
+ /*
+ * Reserve the pointer to dc vals. The dc vals will be copied
+ * after unpacking of ac vals since memset to 0 inside.
+ */
+ if(intra_flag && (u1_mb_type != I_4x4_MB))
+ {
+ if(CHECKBIT(ps_cur_mb_info->u1_yuv_dc_block_flag,0))
+ {
+ pi2_dc_val = (WORD16 *)ps_dec->pv_proc_tu_coeff_data;
+
+ ps_dec->pv_proc_tu_coeff_data = (void *)(pi2_dc_val + 16);
+ }
+ }
+
+ if(ui2_luma_csbp)
+ {
+ pi2_coeff_data = ps_dec->pi2_coeff_data;
+ u4_temp = ih264d_unpack_coeff4x4_8x8blk(ps_dec,
+ ps_cur_mb_info,
+ ui2_luma_csbp,
+ pi2_coeff_data);
+ u4_luma_dc_only_csbp = u4_temp;
+
+ pi2_coeff_data += 32;
+
+ ui2_luma_csbp = ui2_luma_csbp >> 2;
+ u4_temp = ih264d_unpack_coeff4x4_8x8blk(ps_dec,
+ ps_cur_mb_info,
+ ui2_luma_csbp,
+ pi2_coeff_data);
+
+ u4_luma_dc_only_csbp |= (u4_temp << 2);
+
+ pi2_coeff_data += 32 + 64;
+
+ ui2_luma_csbp = ui2_luma_csbp >> 6;
+ u4_temp = ih264d_unpack_coeff4x4_8x8blk(ps_dec,
+ ps_cur_mb_info,
+ ui2_luma_csbp,
+ pi2_coeff_data);
+
+ u4_luma_dc_only_csbp |= (u4_temp << 8);
+
+ pi2_coeff_data += 32;
+
+ ui2_luma_csbp = ui2_luma_csbp >> 2;
+ u4_temp = ih264d_unpack_coeff4x4_8x8blk(ps_dec,
+ ps_cur_mb_info,
+ ui2_luma_csbp,
+ pi2_coeff_data);
+ u4_luma_dc_only_csbp |= (u4_temp << 10);
+ }
+
+ if(pi2_dc_val != NULL)
+ {
+ WORD32 i;
+ pi2_coeff_data = ps_dec->pi2_coeff_data;
+ for(i = 0; i < 4; i++)
+ {
+ pi2_coeff_data[0] = pi2_dc_val[0];
+ pi2_coeff_data[4 * 16] = pi2_dc_val[4];
+ pi2_coeff_data[8 * 16] = pi2_dc_val[8];
+ pi2_coeff_data[12 * 16] = pi2_dc_val[12];
+
+ pi2_dc_val++; /* Point to next column */
+ pi2_coeff_data += 16;
+ }
+ u4_luma_dc_only_csbp = ps_cur_mb_info->u2_luma_csbp ^ 0xFFFF;
+ }
+ return u4_luma_dc_only_csbp;
+ }
+ else
+ {
+ UWORD32 u4_luma_dc_only_cbp = 0;
+ WORD32 dc_only_flag;
+ if(ui2_luma_csbp)
+ {
+ pi2_coeff_data = ps_dec->pi2_coeff_data;
+ dc_only_flag = ih264d_unpack_coeff8x8_8x8blk_cavlc(ps_dec,
+ ps_cur_mb_info,
+ ui2_luma_csbp,
+ pi2_coeff_data);
+ INSERT_BIT(u4_luma_dc_only_cbp, 0, dc_only_flag);
+
+ pi2_coeff_data += 64;
+
+ ui2_luma_csbp = ui2_luma_csbp >> 2;
+ dc_only_flag = ih264d_unpack_coeff8x8_8x8blk_cavlc(ps_dec,
+ ps_cur_mb_info,
+ ui2_luma_csbp,
+ pi2_coeff_data);
+
+ INSERT_BIT(u4_luma_dc_only_cbp, 1, dc_only_flag);
+
+ pi2_coeff_data += 64;
+
+ ui2_luma_csbp = ui2_luma_csbp >> 6;
+ dc_only_flag = ih264d_unpack_coeff8x8_8x8blk_cavlc(ps_dec,
+ ps_cur_mb_info,
+ ui2_luma_csbp,
+ pi2_coeff_data);
+
+ INSERT_BIT(u4_luma_dc_only_cbp, 2, dc_only_flag);
+
+ pi2_coeff_data += 64;
+ ui2_luma_csbp = ui2_luma_csbp >> 2;
+ dc_only_flag = ih264d_unpack_coeff8x8_8x8blk_cavlc(ps_dec,
+ ps_cur_mb_info,
+ ui2_luma_csbp,
+ pi2_coeff_data);
+ INSERT_BIT(u4_luma_dc_only_cbp, 3, dc_only_flag);
+ }
+ return u4_luma_dc_only_cbp;
+ }
+
+}
+
+void ih264d_unpack_chroma_coeff4x4_mb(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info)
+{
+ UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type;
+ UWORD16 ui2_chroma_csbp = ps_cur_mb_info->u2_chroma_csbp;
+ UWORD8 *pu1_inv_scan = ps_dec->pu1_inv_scan;
+ WORD16 *pi2_coeff_data = ps_dec->pi2_coeff_data;
+ WORD32 i;
+ WORD16 *pi2_dc_val_u = NULL;
+ WORD16 *pi2_dc_val_v = NULL;
+
+ PROFILE_DISABLE_UNPACK_CHROMA()
+ if((ps_cur_mb_info->u1_cbp >> 4) == CBPC_ALLZERO)
+ return;
+
+ /*
+ * Reserve the pointers to dc vals. The dc vals will be copied
+ * after unpacking of ac vals since memset to 0 inside.
+ */
+ if(CHECKBIT(ps_cur_mb_info->u1_yuv_dc_block_flag,1))
+ {
+ pi2_dc_val_u = (WORD16 *)ps_dec->pv_proc_tu_coeff_data;
+
+ ps_dec->pv_proc_tu_coeff_data = (void *)(pi2_dc_val_u + 4);
+ }
+ if(CHECKBIT(ps_cur_mb_info->u1_yuv_dc_block_flag,2))
+ {
+ pi2_dc_val_v = (WORD16 *)ps_dec->pv_proc_tu_coeff_data;
+
+ ps_dec->pv_proc_tu_coeff_data = (void *)(pi2_dc_val_v + 4);
+ }
+
+ if((ps_cur_mb_info->u1_cbp >> 4) == CBPC_NONZERO)
+ {
+ pi2_coeff_data = ps_dec->pi2_coeff_data;
+ ih264d_unpack_coeff4x4_8x8blk_chroma(ps_dec,
+ ps_cur_mb_info,
+ ui2_chroma_csbp,
+ pi2_coeff_data);
+
+ pi2_coeff_data += 64;
+ ui2_chroma_csbp = ui2_chroma_csbp >> 4;
+ ih264d_unpack_coeff4x4_8x8blk_chroma(ps_dec,
+ ps_cur_mb_info,
+ ui2_chroma_csbp,
+ pi2_coeff_data);
+
+ }
+
+ pi2_coeff_data = ps_dec->pi2_coeff_data;
+ if(pi2_dc_val_u != NULL)
+ {
+ pi2_coeff_data[0] = *pi2_dc_val_u++;
+ pi2_coeff_data[1 * 16] = *pi2_dc_val_u++;
+ pi2_coeff_data[2 * 16] = *pi2_dc_val_u++;
+ pi2_coeff_data[3 * 16] = *pi2_dc_val_u++;
+ }
+ else
+ {
+ pi2_coeff_data[0] = 0;
+ pi2_coeff_data[1 * 16] = 0;
+ pi2_coeff_data[2 * 16] = 0;
+ pi2_coeff_data[3 * 16] = 0;
+ }
+ pi2_coeff_data += 64;
+ if(pi2_dc_val_v != NULL)
+ {
+ pi2_coeff_data[0] = *pi2_dc_val_v++;
+ pi2_coeff_data[1 * 16] = *pi2_dc_val_v++;
+ pi2_coeff_data[2 * 16] = *pi2_dc_val_v++;
+ pi2_coeff_data[3 * 16] = *pi2_dc_val_v++;
+ }
+ else
+ {
+ pi2_coeff_data[0] = 0;
+ pi2_coeff_data[1 * 16] = 0;
+ pi2_coeff_data[2 * 16] = 0;
+ pi2_coeff_data[3 * 16] = 0;
+ }
+}
+UWORD32 ih264d_unpack_luma_coeff8x8_mb(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info)
+{
+ WORD32 blk_8x8_cnt;
+ WORD16 *pi2_out_coeff_data = ps_dec->pi2_coeff_data;
+ UWORD8 u1_field_coding_flag = ps_cur_mb_info->ps_curmb->u1_mb_fld;
+ UWORD8 *pu1_inv_scan;
+ UWORD32 u4_luma_dc_only_cbp = 0;
+
+ PROFILE_DISABLE_UNPACK_LUMA()
+ if(!u1_field_coding_flag)
+ {
+ /*******************************************************************/
+ /* initializing inverse scan matrices */
+ /*******************************************************************/
+ pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan_prog8x8_cabac;
+ }
+ else
+ {
+ /*******************************************************************/
+ /* initializing inverse scan matrices */
+ /*******************************************************************/
+ pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan_int8x8_cabac;
+ }
+
+ for(blk_8x8_cnt = 0; blk_8x8_cnt < 4; blk_8x8_cnt++)
+ {
+ if(CHECKBIT(ps_cur_mb_info->u1_cbp, blk_8x8_cnt))
+ {
+ tu_blk8x8_coeff_data_t *ps_tu_8x8 = (tu_blk8x8_coeff_data_t *)ps_dec->pv_proc_tu_coeff_data;
+ UWORD32 u4_sig_coeff_map;
+ WORD32 idx = 0;
+ WORD16 *pi2_coeff_data = &ps_tu_8x8->ai2_level[0];
+ WORD32 num_coeff = 0;
+
+ /* memset 64 coefficient to zero */
+ memset(pi2_out_coeff_data,0,64*sizeof(WORD16));
+
+ u4_sig_coeff_map = ps_tu_8x8->au4_sig_coeff_map[1];
+
+ while(u4_sig_coeff_map)
+ {
+ idx = CLZ(u4_sig_coeff_map);
+
+ idx = 31 - idx;
+ RESET_BIT(u4_sig_coeff_map,idx);
+
+ idx = pu1_inv_scan[idx + 32];
+ pi2_out_coeff_data[idx] = *pi2_coeff_data++;
+ num_coeff++;
+ }
+
+ u4_sig_coeff_map = ps_tu_8x8->au4_sig_coeff_map[0];
+ while(u4_sig_coeff_map)
+ {
+ idx = CLZ(u4_sig_coeff_map);
+
+ idx = 31 - idx;
+ RESET_BIT(u4_sig_coeff_map,idx);
+
+ idx = pu1_inv_scan[idx];
+ pi2_out_coeff_data[idx] = *pi2_coeff_data++;
+ num_coeff++;
+ }
+
+ if((num_coeff == 1) && (idx == 0))
+ {
+ SET_BIT(u4_luma_dc_only_cbp,blk_8x8_cnt);
+ }
+
+
+ {
+ WORD32 offset;
+ offset = (UWORD8 *)pi2_coeff_data - (UWORD8 *)ps_tu_8x8;
+ offset = ALIGN4(offset);
+ ps_dec->pv_proc_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_proc_tu_coeff_data + offset);
+ }
+ }
+ pi2_out_coeff_data += 64;
+ }
+
+ return u4_luma_dc_only_cbp;
+}
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_process_intra_mb \endif
+ *
+ * \brief
+ * This function decodes an I MB. Intraprediction is carried out followed
+ * by InvTramsform. Both IntraPrediction and Reconstrucion are carried out
+ * row buffer itself.
+ *
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+WORD32 ih264d_process_intra_mb(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num)
+{
+ UWORD8 u1_mb_type = ps_cur_mb_info->u1_mb_type;
+ UWORD8 uc_temp = ps_cur_mb_info->u1_mb_ngbr_availablity;
+ UWORD8 u1_top_available = BOOLEAN(uc_temp & TOP_MB_AVAILABLE_MASK);
+ UWORD8 u1_left_available = BOOLEAN(uc_temp & LEFT_MB_AVAILABLE_MASK);
+ UWORD8 u1_use_top_right_mb = BOOLEAN(uc_temp & TOP_RIGHT_MB_AVAILABLE_MASK);
+ UWORD8 u1_use_top_left_mb = BOOLEAN(uc_temp & TOP_LEFT_MB_AVAILABLE_MASK);
+ UWORD8 uc_useTopMB = u1_top_available;
+ UWORD16 u2_use_left_mb = u1_left_available;
+ UWORD16 u2_use_left_mb_pack;
+ UWORD8 *pu1_luma_pred_buffer;
+ /* CHANGED CODE */
+ UWORD8 *pu1_luma_rei1_buffer;
+ UWORD8 *puc_top;
+
+ mb_neigbour_params_t *ps_left_mb;
+ mb_neigbour_params_t *ps_top_mb;
+ mb_neigbour_params_t *ps_top_right_mb;
+ mb_neigbour_params_t *ps_curmb;
+
+ UWORD16 u2_mbx = ps_cur_mb_info->u2_mbx;
+ UWORD32 ui_pred_width, ui_rec_width;
+ WORD16 *pi2_y_coeff;
+ UWORD8 u1_mbaff, u1_topmb, u1_mb_field_decoding_flag;
+ UWORD32 u4_num_pmbair;
+ UWORD16 ui2_luma_csbp = ps_cur_mb_info->u2_luma_csbp;
+ UWORD8 *pu1_yleft, *pu1_ytop_left;
+ /* Chroma variables*/
+ UWORD8 *pu1_top_u;
+ UWORD8 *pu1_uleft;
+ UWORD8 *pu1_u_top_left;
+ /* CHANGED CODE */
+ UWORD8 *pu1_mb_cb_rei1_buffer, *pu1_mb_cr_rei1_buffer;
+ UWORD32 u4_recwidth_cr;
+ /* CHANGED CODE */
+ tfr_ctxt_t *ps_frame_buf = &ps_dec->s_tran_addrecon;
+ UWORD32 u4_luma_dc_only_csbp = 0;
+ UWORD32 u4_luma_dc_only_cbp = 0;
+
+ UWORD8 *pu1_prev_intra4x4_pred_mode_data = (UWORD8 *)ps_dec->pv_proc_tu_coeff_data; //Pointer to keep track of intra4x4_pred_mode data in pv_proc_tu_coeff_data buffer
+ u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
+ u1_topmb = ps_cur_mb_info->u1_topmb;
+ u4_num_pmbair = (u1_mb_num >> u1_mbaff);
+
+
+ /*--------------------------------------------------------------------*/
+ /* Find the current MB's mb params */
+ /*--------------------------------------------------------------------*/
+ u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag;
+
+ ps_curmb = ps_cur_mb_info->ps_curmb;
+ ps_top_mb = ps_cur_mb_info->ps_top_mb;
+ ps_left_mb = ps_cur_mb_info->ps_left_mb;
+ ps_top_right_mb = ps_cur_mb_info->ps_top_right_mb;
+
+ /*--------------------------------------------------------------------*/
+ /* Check whether neighbouring MB is Inter MB and */
+ /* constrained intra pred is 1. */
+ /*--------------------------------------------------------------------*/
+ u2_use_left_mb_pack = (u2_use_left_mb << 8) + u2_use_left_mb;
+
+ if(ps_dec->ps_cur_pps->u1_constrained_intra_pred_flag)
+ {
+ UWORD8 u1_left = (UWORD8)u2_use_left_mb;
+
+ uc_useTopMB = uc_useTopMB
+ && ((ps_top_mb->u1_mb_type != P_MB)
+ && (ps_top_mb->u1_mb_type != B_MB));
+ u2_use_left_mb = u2_use_left_mb
+ && ((ps_left_mb->u1_mb_type != P_MB)
+ && (ps_left_mb->u1_mb_type != B_MB));
+
+ u2_use_left_mb_pack = (u2_use_left_mb << 8) + u2_use_left_mb;
+ if(u1_mbaff)
+ {
+ if(u1_mb_field_decoding_flag ^ ps_left_mb->u1_mb_fld)
+ {
+ u1_left = u1_left
+ && (((ps_left_mb + 1)->u1_mb_type != P_MB)
+ && ((ps_left_mb + 1)->u1_mb_type
+ != B_MB));
+ u2_use_left_mb = u2_use_left_mb && u1_left;
+ if(u1_mb_field_decoding_flag)
+ u2_use_left_mb_pack = (u1_left << 8)
+ + (u2_use_left_mb_pack & 0xff);
+ else
+ u2_use_left_mb_pack = (u2_use_left_mb << 8)
+ + (u2_use_left_mb);
+ }
+ }
+ u1_use_top_right_mb =
+ u1_use_top_right_mb
+ && ((ps_top_right_mb->u1_mb_type != P_MB)
+ && (ps_top_right_mb->u1_mb_type
+ != B_MB));
+
+ u1_use_top_left_mb =
+ u1_use_top_left_mb
+ && ((ps_cur_mb_info->u1_topleft_mbtype != P_MB)
+ && (ps_cur_mb_info->u1_topleft_mbtype
+ != B_MB));
+ }
+
+ /*********************Common pointer calculations *************************/
+ /* CHANGED CODE */
+ pu1_luma_pred_buffer = ps_dec->pu1_y;
+ pu1_luma_rei1_buffer = ps_frame_buf->pu1_dest_y + (u4_num_pmbair << 4);
+ pu1_mb_cb_rei1_buffer = ps_frame_buf->pu1_dest_u
+ + (u4_num_pmbair << 3) * YUV420SP_FACTOR;
+ pu1_mb_cr_rei1_buffer = ps_frame_buf->pu1_dest_v + (u4_num_pmbair << 3);
+ ui_pred_width = MB_SIZE;
+ ui_rec_width = ps_dec->u2_frm_wd_y << u1_mb_field_decoding_flag;
+ u4_recwidth_cr = ps_dec->u2_frm_wd_uv << u1_mb_field_decoding_flag;
+ /************* Current and top luma pointer *****************/
+
+ if(u1_mbaff)
+ {
+ if(u1_topmb == 0)
+ {
+ pu1_luma_rei1_buffer += (
+ u1_mb_field_decoding_flag ?
+ (ui_rec_width >> 1) :
+ (ui_rec_width << 4));
+ pu1_mb_cb_rei1_buffer += (
+ u1_mb_field_decoding_flag ?
+ (u4_recwidth_cr >> 1) :
+ (u4_recwidth_cr << 3));
+ pu1_mb_cr_rei1_buffer += (
+ u1_mb_field_decoding_flag ?
+ (u4_recwidth_cr >> 1) :
+ (u4_recwidth_cr << 3));
+ }
+ }
+
+ /* CHANGED CODE */
+ if(ps_dec->u4_use_intrapred_line_copy == 1)
+ {
+ puc_top = ps_dec->pu1_prev_y_intra_pred_line + (ps_cur_mb_info->u2_mbx << 4);
+ pu1_top_u = ps_dec->pu1_prev_u_intra_pred_line
+ + (ps_cur_mb_info->u2_mbx << 3) * YUV420SP_FACTOR;
+ }
+ else
+ {
+ puc_top = pu1_luma_rei1_buffer - ui_rec_width;
+ pu1_top_u = pu1_mb_cb_rei1_buffer - u4_recwidth_cr;
+ }
+ /* CHANGED CODE */
+
+ /************* Left pointer *****************/
+ pu1_yleft = pu1_luma_rei1_buffer - 1;
+ pu1_uleft = pu1_mb_cb_rei1_buffer - 1 * YUV420SP_FACTOR;
+
+ /**************Top Left pointer calculation**********/
+ pu1_ytop_left = puc_top - 1;
+ pu1_u_top_left = pu1_top_u - 1 * YUV420SP_FACTOR;
+
+ /* CHANGED CODE */
+ PROFILE_DISABLE_INTRA_PRED()
+ {
+ pu1_prev_intra4x4_pred_mode_data = (UWORD8 *)ps_dec->pv_proc_tu_coeff_data;
+ if(u1_mb_type == I_4x4_MB && ps_cur_mb_info->u1_tran_form8x8 == 0)
+ {
+ ps_dec->pv_proc_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_proc_tu_coeff_data + 32);
+
+ }
+ else if (u1_mb_type == I_4x4_MB && ps_cur_mb_info->u1_tran_form8x8 == 1)
+ {
+ ps_dec->pv_proc_tu_coeff_data = (void *)((UWORD8 *)ps_dec->pv_proc_tu_coeff_data + 8);
+ }
+ }
+ if(!ps_cur_mb_info->u1_tran_form8x8)
+ {
+ u4_luma_dc_only_csbp = ih264d_unpack_luma_coeff4x4_mb(ps_dec,
+ ps_cur_mb_info,
+ 1);
+ }
+ else
+ {
+ if(!ps_dec->ps_cur_pps->u1_entropy_coding_mode)
+ {
+ u4_luma_dc_only_cbp = ih264d_unpack_luma_coeff4x4_mb(ps_dec,
+ ps_cur_mb_info,
+ 1);
+ }
+ else
+ {
+ u4_luma_dc_only_cbp = ih264d_unpack_luma_coeff8x8_mb(ps_dec,
+ ps_cur_mb_info);
+ }
+ }
+
+ pi2_y_coeff = ps_dec->pi2_coeff_data;
+
+ if(u1_mb_type != I_4x4_MB)
+ {
+ UWORD8 u1_intrapred_mode = MB_TYPE_TO_INTRA_16x16_MODE(u1_mb_type);
+ /*--------------------------------------------------------------------*/
+ /* 16x16 IntraPrediction */
+ /*--------------------------------------------------------------------*/
+ {
+ UWORD8 u1_packed_modes = (u1_top_available << 1)
+ + u1_left_available;
+ UWORD8 u1_err_code =
+ (u1_intrapred_mode & 1) ?
+ u1_intrapred_mode :
+ (u1_intrapred_mode ^ 2);
+
+ if((u1_err_code & u1_packed_modes) ^ u1_err_code)
+ {
+ return ERROR_INTRAPRED;
+ }
+ }
+ {
+ UWORD8 au1_ngbr_pels[33];
+ /* Get neighbour pixels */
+ /* left pels */
+ if(u2_use_left_mb)
+ {
+ WORD32 i;
+ for(i = 0; i < 16; i++)
+ au1_ngbr_pels[16 - 1 - i] = pu1_yleft[i * ui_rec_width];
+ }
+ else
+ {
+ memset(au1_ngbr_pels, 0, 16);
+ }
+
+ /* top left pels */
+ au1_ngbr_pels[16] = *pu1_ytop_left;
+
+ /* top pels */
+ if(uc_useTopMB)
+ {
+ memcpy(au1_ngbr_pels + 16 + 1, puc_top, 16);
+ }
+ else
+ {
+ memset(au1_ngbr_pels + 16 + 1, 0, 16);
+ }
+ PROFILE_DISABLE_INTRA_PRED()
+ ps_dec->apf_intra_pred_luma_16x16[u1_intrapred_mode](
+ au1_ngbr_pels, pu1_luma_rei1_buffer, 1, ui_rec_width,
+ ((uc_useTopMB << 2) | u2_use_left_mb));
+ }
+ {
+ UWORD32 i;
+ WORD16 ai2_tmp[16];
+ for(i = 0; i < 16; i++)
+ {
+ WORD16 *pi2_level = pi2_y_coeff + (i << 4);
+ UWORD8 *pu1_pred_sblk = pu1_luma_rei1_buffer
+ + ((i & 0x3) * BLK_SIZE)
+ + (i >> 2) * (ui_rec_width << 2);
+ PROFILE_DISABLE_IQ_IT_RECON()
+ {
+ if(CHECKBIT(ps_cur_mb_info->u2_luma_csbp, i))
+ {
+ ps_dec->pf_iquant_itrans_recon_luma_4x4(
+ pi2_level,
+ pu1_pred_sblk,
+ pu1_pred_sblk,
+ ui_rec_width,
+ ui_rec_width,
+ gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6],
+ (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[0],
+ ps_cur_mb_info->u1_qp_div6, ai2_tmp, 1,
+ pi2_level);
+ }
+ else if((CHECKBIT(u4_luma_dc_only_csbp, i)) && pi2_level[0] != 0)
+ {
+ ps_dec->pf_iquant_itrans_recon_luma_4x4_dc(
+ pi2_level,
+ pu1_pred_sblk,
+ pu1_pred_sblk,
+ ui_rec_width,
+ ui_rec_width,
+ gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6],
+ (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[0],
+ ps_cur_mb_info->u1_qp_div6, ai2_tmp, 1,
+ pi2_level);
+ }
+ }
+ }
+ }
+ }
+ else if(!ps_cur_mb_info->u1_tran_form8x8)
+ {
+ UWORD8 u1_is_left_sub_block, u1_is_top_sub_block = uc_useTopMB;
+ UWORD8 u1_sub_blk_x, u1_sub_blk_y, u1_sub_mb_num;
+ WORD8 i1_top_pred_mode;
+ WORD8 i1_left_pred_mode;
+ UWORD8 *pu1_top, *pu1_left, *pu1_top_left, *pu1_top_right;
+ WORD8 *pi1_cur_pred_mode, *pi1_left_pred_mode, *pc_topPredMode;
+ UWORD16 ui2_left_pred_buf_width = 0xffff;
+ WORD8 i1_intra_pred;
+ UWORD8 *pu1_prev_intra4x4_pred_mode_flag = pu1_prev_intra4x4_pred_mode_data;
+ UWORD8 *pu1_rem_intra4x4_pred_mode = pu1_prev_intra4x4_pred_mode_data + 16;
+ WORD16 *pi2_y_coeff1;
+ UWORD8 u1_cur_sub_block;
+ UWORD16 ui2_top_rt_mask;
+
+ /*--------------------------------------------------------------------*/
+ /* 4x4 IntraPrediction */
+ /*--------------------------------------------------------------------*/
+ /* Calculation of Top Right subblock mask */
+ /* */
+ /* (a) Set it to default mask */
+ /* [It has 0 for sublocks which will never have top-right sub block] */
+ /* */
+ /* (b) If top MB is not available */
+ /* Clear the bits of the first row sub blocks */
+ /* */
+ /* (c) Set/Clear bit for top-right sublock of MB */
+ /* [5 sub-block in decoding order] based on TOP RIGHT MB availablity */
+ /*--------------------------------------------------------------------*/
+
+ pu1_top = puc_top;
+
+ ui2_top_rt_mask = (u1_use_top_right_mb << 3) | (0x5750);
+ if(uc_useTopMB)
+ ui2_top_rt_mask |= 0x7;
+
+ /*Top Related initialisations*/
+
+
+ pi1_cur_pred_mode = ps_cur_mb_info->ps_curmb->pi1_intrapredmodes;
+ pc_topPredMode = ps_cur_mb_info->ps_top_mb->pi1_intrapredmodes;
+ /*--------------------------------------
+ if(u1_mbaff)
+ {
+
+ pi1_cur_pred_mode += (u2_mbx << 2);
+ pc_topPredMode = pi1_cur_pred_mode + ps_cur_mb_info->i1_offset;
+ pi1_cur_pred_mode += (u1_topmb) ? 0: 4;
+ }*/
+
+ if(u1_top_available)
+ {
+ if(ps_top_mb->u1_mb_type == I_4x4_MB)
+ *(WORD32*)pi1_cur_pred_mode = *(WORD32*)pc_topPredMode;
+ else
+ *(WORD32*)pi1_cur_pred_mode =
+ (uc_useTopMB) ? DC_DC_DC_DC : NOT_VALID;
+ }
+ else
+ *(WORD32*)pi1_cur_pred_mode = NOT_VALID;
+ /* CHANGED CODE */
+
+ /* CHANGED CODE */
+
+ /*Left Related initialisations*/
+ pi1_left_pred_mode = ps_dec->pi1_left_pred_mode;
+ if(!u1_mbaff)
+ {
+
+ if(u1_left_available)
+ {
+
+ if(ps_left_mb->u1_mb_type != I_4x4_MB)
+ *(WORD32*)pi1_left_pred_mode =
+ (u2_use_left_mb_pack) ?
+ DC_DC_DC_DC :
+ NOT_VALID;
+
+ }
+ else
+ {
+
+ *(WORD32*)pi1_left_pred_mode = NOT_VALID;
+ }
+
+ }
+ else
+ {
+ UWORD8 u1_curMbfld = ps_cur_mb_info->u1_mb_field_decodingflag;
+ UWORD8 u1_leftMbfld = ps_left_mb->u1_mb_fld;
+
+ if(u1_curMbfld ^ u1_leftMbfld)
+ {
+
+ if(u1_topmb
+ | ((u1_topmb == 0)
+ && ((ps_curmb - 1)->u1_mb_type
+ != I_4x4_MB)))
+ {
+ if(u1_left_available)
+ {
+ if(ps_left_mb->u1_mb_type != I_4x4_MB)
+ {
+ if(CHECKBIT(u2_use_left_mb_pack,0) == 0)
+ *(WORD32*)pi1_left_pred_mode = NOT_VALID;
+ else
+ *(WORD32*)pi1_left_pred_mode = DC_DC_DC_DC;
+ }
+ }
+ else
+ *(WORD32*)pi1_left_pred_mode = NOT_VALID;
+
+ if(u1_curMbfld)
+ {
+ if(u1_left_available)
+ {
+ if((ps_left_mb + 1)->u1_mb_type != I_4x4_MB)
+ {
+ if(u2_use_left_mb_pack >> 8)
+ *(WORD32*)(pi1_left_pred_mode + 4) =
+ DC_DC_DC_DC;
+ else
+ *(WORD32*)(pi1_left_pred_mode + 4) =
+ NOT_VALID;
+ }
+ }
+ else
+ *(WORD32*)(pi1_left_pred_mode + 4) = NOT_VALID;
+ pi1_left_pred_mode[1] = pi1_left_pred_mode[2];
+ pi1_left_pred_mode[2] = pi1_left_pred_mode[4];
+ pi1_left_pred_mode[3] = pi1_left_pred_mode[6];
+ *(WORD32*)(pi1_left_pred_mode + 4) =
+ *(WORD32*)pi1_left_pred_mode;
+ }
+ else
+ {
+
+ pi1_left_pred_mode[7] = pi1_left_pred_mode[3];
+ pi1_left_pred_mode[6] = pi1_left_pred_mode[3];
+ pi1_left_pred_mode[5] = pi1_left_pred_mode[2];
+ pi1_left_pred_mode[4] = pi1_left_pred_mode[2];
+ pi1_left_pred_mode[3] = pi1_left_pred_mode[1];
+ pi1_left_pred_mode[2] = pi1_left_pred_mode[1];
+ pi1_left_pred_mode[1] = pi1_left_pred_mode[0];
+ }
+ }
+ pi1_left_pred_mode += (u1_topmb) ? 0 : 4;
+ }
+ else
+ {
+
+ pi1_left_pred_mode += (u1_topmb) ? 0 : 4;
+ if(u1_left_available)
+ {
+
+ if(ps_left_mb->u1_mb_type != I_4x4_MB)
+ *(WORD32*)pi1_left_pred_mode =
+ (u2_use_left_mb_pack) ?
+ DC_DC_DC_DC :
+ NOT_VALID;
+ }
+ else
+ *(WORD32*)pi1_left_pred_mode = NOT_VALID;
+ }
+ }
+ /* One time pointer initialisations*/
+ pi2_y_coeff1 = pi2_y_coeff;
+ pu1_top_left = pu1_ytop_left;
+
+ /* Scan the sub-blocks in Raster Scan Order */
+ for(u1_sub_mb_num = 0; u1_sub_mb_num < 16; u1_sub_mb_num++)
+ {
+ UWORD8 au1_ngbr_pels[13];
+
+ u1_sub_blk_x = u1_sub_mb_num & 0x3;
+ u1_sub_blk_y = u1_sub_mb_num >> 2;
+ i1_top_pred_mode = pi1_cur_pred_mode[u1_sub_blk_x];
+ i1_left_pred_mode = pi1_left_pred_mode[u1_sub_blk_y];
+ u1_use_top_right_mb = (!!CHECKBIT(ui2_top_rt_mask, u1_sub_mb_num));
+
+ /*********** left subblock availability**********/
+ if(u1_sub_blk_x)
+ u1_is_left_sub_block = 1;
+ else
+ u1_is_left_sub_block =
+ (u1_sub_blk_y < 2) ?
+ (CHECKBIT(u2_use_left_mb_pack,
+ 0)) :
+ (u2_use_left_mb_pack >> 8);
+
+ /* CHANGED CODE */
+ if(u1_sub_blk_y)
+ u1_is_top_sub_block = 1;
+
+ /* CHANGED CODE */
+ /***************** Top *********************/
+ if(ps_dec->u4_use_intrapred_line_copy == 1)
+ {
+
+ if(u1_sub_blk_y)
+ pu1_top = pu1_luma_rei1_buffer - ui_rec_width;
+ else
+ pu1_top = puc_top + (u1_sub_blk_x << 2);
+ }
+ else
+ {
+ pu1_top = pu1_luma_rei1_buffer - ui_rec_width;
+ }
+ /***************** Top Right *********************/
+ pu1_top_right = pu1_top + 4;
+ /***************** Top Left *********************/
+ pu1_top_left = pu1_top - 1;
+ /***************** Left *********************/
+ pu1_left = pu1_luma_rei1_buffer - 1;
+ /* CHANGED CODE */
+
+ /*---------------------------------------------------------------*/
+ /* Calculation of Intra prediction mode */
+ /*---------------------------------------------------------------*/
+ i1_intra_pred = ((i1_left_pred_mode < 0) | (i1_top_pred_mode < 0)) ?
+ DC : MIN(i1_left_pred_mode, i1_top_pred_mode);
+ {
+ UWORD8 u1_packed_modes = (u1_is_top_sub_block << 1)
+ + u1_is_left_sub_block;
+ UWORD8 *pu1_intra_err_codes =
+ (UWORD8 *)gau1_ih264d_intra_pred_err_code;
+ UWORD8 uc_b2b0 = ((u1_sub_mb_num & 4) >> 1) | (u1_sub_mb_num & 1);
+ UWORD8 uc_b3b1 = ((u1_sub_mb_num & 8) >> 2)
+ | ((u1_sub_mb_num & 2) >> 1);
+
+ u1_cur_sub_block = (uc_b3b1 << 2) + uc_b2b0;
+ PROFILE_DISABLE_INTRA_PRED()
+ if(!pu1_prev_intra4x4_pred_mode_flag[u1_cur_sub_block])
+ {
+ i1_intra_pred =
+ pu1_rem_intra4x4_pred_mode[u1_cur_sub_block]
+ + (pu1_rem_intra4x4_pred_mode[u1_cur_sub_block]
+ >= i1_intra_pred);
+ }
+ {
+ UWORD8 u1_err_code = pu1_intra_err_codes[i1_intra_pred];
+
+ /*if((u1_err_code & u1_packed_modes) ^ u1_err_code)
+ {
+ }*/
+
+ }
+ }
+ {
+ /* Get neighbour pixels */
+ /* left pels */
+ if(u1_is_left_sub_block)
+ {
+ WORD32 i;
+ for(i = 0; i < 4; i++)
+ au1_ngbr_pels[4 - 1 - i] = pu1_left[i * ui_rec_width];
+ }
+ else
+ {
+ memset(au1_ngbr_pels, 0, 4);
+ }
+
+ /* top left pels */
+ au1_ngbr_pels[4] = *pu1_top_left;
+
+ /* top pels */
+ if(u1_is_top_sub_block)
+ {
+ memcpy(au1_ngbr_pels + 4 + 1, pu1_top, 4);
+ }
+ else
+ {
+ memset(au1_ngbr_pels + 4 + 1, 0, 4);
+ }
+
+ /* top right pels */
+ if(u1_use_top_right_mb)
+ {
+ memcpy(au1_ngbr_pels + 4 * 2 + 1, pu1_top_right, 4);
+ }
+ else if(u1_is_top_sub_block)
+ {
+ memset(au1_ngbr_pels + 4 * 2 + 1, au1_ngbr_pels[4 * 2], 4);
+ }
+ }
+ PROFILE_DISABLE_INTRA_PRED()
+ ps_dec->apf_intra_pred_luma_4x4[i1_intra_pred](
+ au1_ngbr_pels, pu1_luma_rei1_buffer, 1,
+ ui_rec_width,
+ ((u1_is_top_sub_block << 2) | u1_is_left_sub_block));
+
+ /* CHANGED CODE */
+ if(CHECKBIT(ui2_luma_csbp, u1_sub_mb_num))
+ {
+ WORD16 ai2_tmp[16];
+ PROFILE_DISABLE_IQ_IT_RECON()
+ {
+ if(CHECKBIT(u4_luma_dc_only_csbp, u1_sub_mb_num))
+ {
+ ps_dec->pf_iquant_itrans_recon_luma_4x4_dc(
+ pi2_y_coeff1,
+ pu1_luma_rei1_buffer,
+ pu1_luma_rei1_buffer,
+ ui_rec_width,
+ ui_rec_width,
+ gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6],
+ (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[0],
+ ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0,
+ NULL);
+ }
+ else
+ {
+ ps_dec->pf_iquant_itrans_recon_luma_4x4(
+ pi2_y_coeff1,
+ pu1_luma_rei1_buffer,
+ pu1_luma_rei1_buffer,
+ ui_rec_width,
+ ui_rec_width,
+ gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6],
+ (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[0],
+ ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0,
+ NULL);
+ }
+ }
+
+ }
+
+ /*---------------------------------------------------------------*/
+ /* Update sub block number */
+ /*---------------------------------------------------------------*/
+ pi2_y_coeff1 += 16;
+ pu1_luma_rei1_buffer +=
+ (u1_sub_blk_x == 3) ? (ui_rec_width << 2) - 12 : 4;
+ pu1_luma_pred_buffer +=
+ (u1_sub_blk_x == 3) ? (ui_pred_width << 2) - 12 : 4;
+ /* CHANGED CODE */
+ pi1_cur_pred_mode[u1_sub_blk_x] = i1_intra_pred;
+ pi1_left_pred_mode[u1_sub_blk_y] = i1_intra_pred;
+ }
+ }
+ else if((u1_mb_type == I_4x4_MB) && (ps_cur_mb_info->u1_tran_form8x8 == 1))
+ {
+ UWORD8 u1_is_left_sub_block, u1_is_top_sub_block = uc_useTopMB;
+ UWORD8 u1_sub_blk_x, u1_sub_blk_y, u1_sub_mb_num;
+ WORD8 i1_top_pred_mode;
+ WORD8 i1_left_pred_mode;
+ UWORD8 *pu1_top, *pu1_left, *pu1_top_left;
+ WORD8 *pi1_cur_pred_mode, *pi1_left_pred_mode, *pc_topPredMode;
+ UWORD16 ui2_left_pred_buf_width = 0xffff;
+ WORD8 i1_intra_pred;
+ UWORD8 *pu1_prev_intra4x4_pred_mode_flag = pu1_prev_intra4x4_pred_mode_data;
+ UWORD8 *pu1_rem_intra4x4_pred_mode = pu1_prev_intra4x4_pred_mode_data + 4;
+ WORD16 *pi2_y_coeff1;
+ UWORD16 ui2_top_rt_mask;
+ UWORD32 u4_4x4_left_offset = 0;
+
+ /*--------------------------------------------------------------------*/
+ /* 8x8 IntraPrediction */
+ /*--------------------------------------------------------------------*/
+ /* Calculation of Top Right subblock mask */
+ /* */
+ /* (a) Set it to default mask */
+ /* [It has 0 for sublocks which will never have top-right sub block] */
+ /* */
+ /* (b) If top MB is not available */
+ /* Clear the bits of the first row sub blocks */
+ /* */
+ /* (c) Set/Clear bit for top-right sublock of MB */
+ /* [5 sub-block in decoding order] based on TOP RIGHT MB availablity */
+ /* */
+ /* ui2_top_rt_mask: marks availibility of top right(neighbour) */
+ /* in the 8x8 Block ordering */
+ /* */
+ /* tr0 tr1 */
+ /* 0 1 tr3 */
+ /* 2 3 */
+ /* */
+ /* Top rights for 0 is in top MB */
+ /* top right of 1 will be in top right MB */
+ /* top right of 3 in right MB and hence not available */
+ /* This corresponds to ui2_top_rt_mask having default value 0x4 */
+ /*--------------------------------------------------------------------*/
+
+ ui2_top_rt_mask = (u1_use_top_right_mb << 1) | (0x4);
+
+ if(uc_useTopMB)
+ {
+ ui2_top_rt_mask |= 0x1;
+ }
+
+ /* Top Related initialisations */
+ pi1_cur_pred_mode = ps_cur_mb_info->ps_curmb->pi1_intrapredmodes;
+ pc_topPredMode = ps_cur_mb_info->ps_top_mb->pi1_intrapredmodes;
+ /*
+ if(u1_mbaff)
+ {
+ pi1_cur_pred_mode += (u2_mbx << 2);
+ pc_topPredMode = pi1_cur_pred_mode + ps_cur_mb_info->i1_offset;
+ pi1_cur_pred_mode += (u1_topmb) ? 0: 4;
+ }
+ */
+ if(u1_top_available)
+ {
+ if(ps_top_mb->u1_mb_type == I_4x4_MB)
+ {
+ *(WORD32*)pi1_cur_pred_mode = *(WORD32*)pc_topPredMode;
+ }
+ else
+ {
+ *(WORD32*)pi1_cur_pred_mode =
+ (uc_useTopMB) ? DC_DC_DC_DC : NOT_VALID;
+ }
+ }
+ else
+ {
+ *(WORD32*)pi1_cur_pred_mode = NOT_VALID;
+ }
+
+ pu1_top = puc_top - 8;
+
+ /*Left Related initialisations*/
+ pi1_left_pred_mode = ps_dec->pi1_left_pred_mode;
+
+ if(!u1_mbaff)
+ {
+ if(u1_left_available)
+ {
+ if(ps_left_mb->u1_mb_type != I_4x4_MB)
+ {
+ *(WORD32*)pi1_left_pred_mode =
+ (u2_use_left_mb_pack) ?
+ DC_DC_DC_DC :
+ NOT_VALID;
+ }
+ }
+ else
+ {
+ *(WORD32*)pi1_left_pred_mode = NOT_VALID;
+ }
+ }
+ else
+ {
+ UWORD8 u1_curMbfld = ps_cur_mb_info->u1_mb_field_decodingflag;
+
+ UWORD8 u1_leftMbfld = ps_left_mb->u1_mb_fld;
+
+ if((!u1_curMbfld) && (u1_leftMbfld))
+ {
+ u4_4x4_left_offset = 1;
+ }
+
+ if(u1_curMbfld ^ u1_leftMbfld)
+ {
+
+ if(u1_topmb
+ | ((u1_topmb == 0)
+ && ((ps_curmb - 1)->u1_mb_type
+ != I_4x4_MB)))
+
+ {
+ if(u1_left_available)
+ {
+ if(ps_left_mb->u1_mb_type != I_4x4_MB)
+ {
+ if(CHECKBIT(u2_use_left_mb_pack,0) == 0)
+ {
+ *(WORD32*)pi1_left_pred_mode = NOT_VALID;
+ }
+ else
+ {
+ *(WORD32*)pi1_left_pred_mode = DC_DC_DC_DC;
+ }
+ }
+ }
+ else
+ {
+ *(WORD32*)pi1_left_pred_mode = NOT_VALID;
+ }
+
+ if(u1_curMbfld)
+ {
+ if(u1_left_available)
+ {
+ if((ps_left_mb + 1)->u1_mb_type != I_4x4_MB)
+ {
+ if(u2_use_left_mb_pack >> 8)
+ {
+ *(WORD32*)(pi1_left_pred_mode + 4) =
+ DC_DC_DC_DC;
+ }
+ else
+ {
+ *(WORD32*)(pi1_left_pred_mode + 4) =
+ NOT_VALID;
+ }
+ }
+ }
+ else
+ {
+ *(WORD32*)(pi1_left_pred_mode + 4) = NOT_VALID;
+ }
+
+ pi1_left_pred_mode[1] = pi1_left_pred_mode[2];
+ pi1_left_pred_mode[2] = pi1_left_pred_mode[4];
+ pi1_left_pred_mode[3] = pi1_left_pred_mode[6];
+ *(WORD32*)(pi1_left_pred_mode + 4) =
+ *(WORD32*)pi1_left_pred_mode;
+ }
+ else
+ {
+ pi1_left_pred_mode[7] = pi1_left_pred_mode[3];
+ pi1_left_pred_mode[6] = pi1_left_pred_mode[3];
+ pi1_left_pred_mode[5] = pi1_left_pred_mode[2];
+ pi1_left_pred_mode[4] = pi1_left_pred_mode[2];
+ pi1_left_pred_mode[3] = pi1_left_pred_mode[1];
+ pi1_left_pred_mode[2] = pi1_left_pred_mode[1];
+ pi1_left_pred_mode[1] = pi1_left_pred_mode[0];
+ }
+ }
+ pi1_left_pred_mode += (u1_topmb) ? 0 : 4;
+ }
+ else
+ {
+ pi1_left_pred_mode += (u1_topmb) ? 0 : 4;
+
+ if(u1_left_available)
+ {
+ if(ps_left_mb->u1_mb_type != I_4x4_MB)
+ {
+ *(WORD32*)pi1_left_pred_mode =
+ (u2_use_left_mb_pack) ?
+ DC_DC_DC_DC :
+ NOT_VALID;
+ }
+ }
+ else
+ {
+ *(WORD32*)pi1_left_pred_mode = NOT_VALID;
+ }
+ }
+ }
+
+ /* One time pointer initialisations*/
+ pi2_y_coeff1 = pi2_y_coeff;
+
+ if(u1_use_top_left_mb)
+ {
+ pu1_top_left = pu1_ytop_left;
+ }
+ else
+ {
+ pu1_top_left = NULL;
+ }
+
+ /* Scan the sub-blocks in Raster Scan Order */
+ for(u1_sub_mb_num = 0; u1_sub_mb_num < 4; u1_sub_mb_num++)
+ {
+ u1_sub_blk_x = (u1_sub_mb_num & 0x1);
+ u1_sub_blk_y = (u1_sub_mb_num >> 1);
+ i1_top_pred_mode = pi1_cur_pred_mode[u1_sub_blk_x << 1];
+ i1_left_pred_mode = pi1_left_pred_mode[u1_sub_blk_y << 1];
+
+ if(2 == u1_sub_mb_num)
+ {
+ i1_left_pred_mode = pi1_left_pred_mode[(u1_sub_blk_y << 1)
+ + u4_4x4_left_offset];
+ }
+
+ u1_use_top_right_mb = (!!CHECKBIT(ui2_top_rt_mask, u1_sub_mb_num));
+
+ /*********** left subblock availability**********/
+ if(u1_sub_blk_x)
+ {
+ u1_is_left_sub_block = 1;
+ }
+ else
+ {
+ u1_is_left_sub_block =
+ (u1_sub_blk_y < 1) ?
+ (CHECKBIT(u2_use_left_mb_pack,
+ 0)) :
+ (u2_use_left_mb_pack >> 8);
+ }
+
+ /***************** Top *********************/
+ if(u1_sub_blk_y)
+ {
+ u1_is_top_sub_block = 1;
+ // sushant
+ pu1_top = /*pu1_luma_pred_buffer*/pu1_luma_rei1_buffer - ui_rec_width;
+ }
+ else
+ {
+ pu1_top += 8;
+ }
+
+ /***************** Left *********************/
+ if((u1_sub_blk_x) | (u4_num_pmbair != 0))
+ {
+ // sushant
+ pu1_left = /*pu1_luma_pred_buffer*/pu1_luma_rei1_buffer - 1;
+ ui2_left_pred_buf_width = ui_rec_width;
+ }
+ else
+ {
+ pu1_left = pu1_yleft;
+ pu1_yleft += (ui_rec_width << 3);
+ ui2_left_pred_buf_width = ui_rec_width;
+ }
+
+ /***************** Top Left *********************/
+ if(u1_sub_mb_num)
+ {
+ pu1_top_left = (u1_sub_blk_x) ?
+ pu1_top - 1 : pu1_left - ui_rec_width;
+
+ if((u1_sub_blk_x && (!u1_is_top_sub_block))
+ || ((!u1_sub_blk_x) && (!u1_is_left_sub_block)))
+ {
+ pu1_top_left = NULL;
+ }
+ }
+
+ /*---------------------------------------------------------------*/
+ /* Calculation of Intra prediction mode */
+ /*---------------------------------------------------------------*/
+ i1_intra_pred = ((i1_left_pred_mode < 0) | (i1_top_pred_mode < 0)) ?
+ DC : MIN(i1_left_pred_mode, i1_top_pred_mode);
+ {
+ UWORD8 u1_packed_modes = (u1_is_top_sub_block << 1)
+ + u1_is_left_sub_block;
+ UWORD8 *pu1_intra_err_codes =
+ (UWORD8 *)gau1_ih264d_intra_pred_err_code;
+
+ /********************************************************************/
+ /* Same intra4x4_pred_mode array is filled with intra4x4_pred_mode */
+ /* for a MB with 8x8 intrapredicition */
+ /********************************************************************/
+ PROFILE_DISABLE_INTRA_PRED()
+ if(!pu1_prev_intra4x4_pred_mode_flag[u1_sub_mb_num])
+ {
+ i1_intra_pred = pu1_rem_intra4x4_pred_mode[u1_sub_mb_num]
+ + (pu1_rem_intra4x4_pred_mode[u1_sub_mb_num]
+ >= i1_intra_pred);
+ }
+ {
+ UWORD8 u1_err_code = pu1_intra_err_codes[i1_intra_pred];
+
+ if((u1_err_code & u1_packed_modes) ^ u1_err_code)
+ {
+ return ERROR_INTRAPRED;
+ }
+ }
+ }
+
+ {
+ UWORD8 au1_ngbr_pels[25];
+ WORD32 ngbr_avail;
+ ngbr_avail = u1_is_left_sub_block << 0;
+ ngbr_avail |= u1_is_top_sub_block << 2;
+
+ if(pu1_top_left)
+ ngbr_avail |= 1 << 1;
+
+ ngbr_avail |= u1_use_top_right_mb << 3;
+ PROFILE_DISABLE_INTRA_PRED()
+ {
+ ps_dec->pf_intra_pred_ref_filtering(pu1_left, pu1_top_left,
+ pu1_top, au1_ngbr_pels,
+ ui2_left_pred_buf_width,
+ ngbr_avail);
+
+ ps_dec->apf_intra_pred_luma_8x8[i1_intra_pred](
+ au1_ngbr_pels, pu1_luma_rei1_buffer, 1,
+ ui_rec_width,
+ ((u1_is_top_sub_block << 2) | u1_is_left_sub_block));
+ }
+ }
+
+ /* Inverse Transform and Reconstruction */
+ if(CHECKBIT(ps_cur_mb_info->u1_cbp, u1_sub_mb_num))
+ {
+ WORD16 *pi2_scale_matrix_ptr;
+ WORD16 ai2_tmp[64];
+
+ pi2_scale_matrix_ptr =
+ ps_dec->s_high_profile.i2_scalinglist8x8[0];
+ PROFILE_DISABLE_IQ_IT_RECON()
+ {
+ if(CHECKBIT(u4_luma_dc_only_cbp, u1_sub_mb_num))
+ {
+ ps_dec->pf_iquant_itrans_recon_luma_8x8_dc(
+ pi2_y_coeff1,
+ pu1_luma_rei1_buffer,
+ pu1_luma_rei1_buffer,
+ ui_rec_width,
+ ui_rec_width,
+ gau1_ih264d_dequant8x8_cavlc[ps_cur_mb_info->u1_qp_rem6],
+ (UWORD16 *)pi2_scale_matrix_ptr,
+ ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0,
+ NULL);
+ }
+ else
+ {
+ ps_dec->pf_iquant_itrans_recon_luma_8x8(
+ pi2_y_coeff1,
+ pu1_luma_rei1_buffer,
+ pu1_luma_rei1_buffer,
+ ui_rec_width,
+ ui_rec_width,
+ gau1_ih264d_dequant8x8_cavlc[ps_cur_mb_info->u1_qp_rem6],
+ (UWORD16 *)pi2_scale_matrix_ptr,
+ ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0,
+ NULL);
+ }
+ }
+
+ }
+
+ /*---------------------------------------------------------------*/
+ /* Update sub block number */
+ /*---------------------------------------------------------------*/
+ pi2_y_coeff1 += 64;
+
+ pu1_luma_rei1_buffer +=
+ (u1_sub_blk_x == 1) ?
+ (ui_rec_width << 3) - (8 * 1) : 8;
+
+ /*---------------------------------------------------------------*/
+ /* Pred mode filled in terms of 4x4 block so replicated in 2 */
+ /* locations. */
+ /*---------------------------------------------------------------*/
+ pi1_cur_pred_mode[u1_sub_blk_x << 1] = i1_intra_pred;
+ pi1_cur_pred_mode[(u1_sub_blk_x << 1) + 1] = i1_intra_pred;
+ pi1_left_pred_mode[u1_sub_blk_y << 1] = i1_intra_pred;
+ pi1_left_pred_mode[(u1_sub_blk_y << 1) + 1] = i1_intra_pred;
+ }
+ }
+ /* Decode Chroma Block */
+ ih264d_unpack_chroma_coeff4x4_mb(ps_dec,
+ ps_cur_mb_info);
+ /*--------------------------------------------------------------------*/
+ /* Chroma Blocks decoding */
+ /*--------------------------------------------------------------------*/
+ {
+ UWORD8 u1_intra_chrom_pred_mode;
+ UWORD8 u1_chroma_cbp = (UWORD8)(ps_cur_mb_info->u1_cbp >> 4);
+
+ /*--------------------------------------------------------------------*/
+ /* Perform Chroma intra prediction */
+ /*--------------------------------------------------------------------*/
+
+ u1_intra_chrom_pred_mode = CHROMA_TO_LUMA_INTRA_MODE(
+ ps_cur_mb_info->u1_chroma_pred_mode);
+
+ {
+ UWORD8 u1_packed_modes = (u1_top_available << 1)
+ + u1_left_available;
+ UWORD8 u1_err_code =
+ (u1_intra_chrom_pred_mode & 1) ?
+ u1_intra_chrom_pred_mode :
+ (u1_intra_chrom_pred_mode ^ 2);
+ if((u1_err_code & u1_packed_modes) ^ u1_err_code)
+ return ERROR_INTRAPRED;
+ }
+
+ /* CHANGED CODE */
+ if(u1_chroma_cbp != CBPC_ALLZERO)
+ {
+ UWORD16 u2_chroma_csbp =
+ (u1_chroma_cbp == CBPC_ACZERO) ?
+ 0 : ps_cur_mb_info->u2_chroma_csbp;
+ UWORD32 u4_scale_u;
+ UWORD32 u4_scale_v;
+
+ {
+ UWORD16 au2_ngbr_pels[33];
+ UWORD8 *pu1_ngbr_pels = (UWORD8 *)au2_ngbr_pels;
+ UWORD16 *pu2_left_uv;
+ UWORD16 *pu2_topleft_uv;
+ WORD32 use_left1 = (u2_use_left_mb_pack & 0x0ff);
+ WORD32 use_left2 = (u2_use_left_mb_pack & 0xff00) >> 8;
+
+ pu2_left_uv = (UWORD16 *)pu1_uleft;
+ pu2_topleft_uv = (UWORD16 *)pu1_u_top_left;
+ /* Get neighbour pixels */
+ /* left pels */
+ if(u2_use_left_mb_pack)
+ {
+ WORD32 i;
+ if(use_left1)
+ {
+ for(i = 0; i < 4; i++)
+ au2_ngbr_pels[8 - 1 - i] = pu2_left_uv[i
+ * u4_recwidth_cr / YUV420SP_FACTOR];
+ }
+ else
+ {
+ memset(au2_ngbr_pels + 4, 0, 4 * sizeof(UWORD16));
+ }
+
+ if(use_left2)
+ {
+ for(i = 4; i < 8; i++)
+ au2_ngbr_pels[8 - 1 - i] = pu2_left_uv[i
+ * u4_recwidth_cr / YUV420SP_FACTOR];
+ }
+ else
+ {
+ memset(au2_ngbr_pels, 0, 4 * sizeof(UWORD16));
+ }
+ }
+ else
+ {
+ memset(au2_ngbr_pels, 0, 8 * sizeof(UWORD16));
+ }
+
+ /* top left pels */
+ au2_ngbr_pels[8] = *pu2_topleft_uv;
+
+ /* top pels */
+ if(uc_useTopMB)
+ {
+ memcpy(au2_ngbr_pels + 8 + 1, pu1_top_u,
+ 8 * sizeof(UWORD16));
+ }
+ else
+ {
+ memset(au2_ngbr_pels + 8 + 1, 0, 8 * sizeof(UWORD16));
+ }
+
+ PROFILE_DISABLE_INTRA_PRED()
+ ps_dec->apf_intra_pred_chroma[u1_intra_chrom_pred_mode](
+ pu1_ngbr_pels,
+ pu1_mb_cb_rei1_buffer,
+ 1,
+ u4_recwidth_cr,
+ ((uc_useTopMB << 2) | (use_left2 << 4)
+ | use_left1));
+ }
+ u4_scale_u = ps_cur_mb_info->u1_qpc_div6;
+ u4_scale_v = ps_cur_mb_info->u1_qpcr_div6;
+ pi2_y_coeff = ps_dec->pi2_coeff_data;
+
+ {
+ UWORD32 i;
+ WORD16 ai2_tmp[16];
+ for(i = 0; i < 4; i++)
+ {
+ WORD16 *pi2_level = pi2_y_coeff + (i << 4);
+ UWORD8 *pu1_pred_sblk = pu1_mb_cb_rei1_buffer
+ + ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR)
+ + (i >> 1) * (u4_recwidth_cr << 2);
+ PROFILE_DISABLE_IQ_IT_RECON()
+ {
+ if(CHECKBIT(u2_chroma_csbp, i))
+ {
+ ps_dec->pf_iquant_itrans_recon_chroma_4x4(
+ pi2_level,
+ pu1_pred_sblk,
+ pu1_pred_sblk,
+ u4_recwidth_cr,
+ u4_recwidth_cr,
+ gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpc_rem6],
+ (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[1],
+ u4_scale_u, ai2_tmp, pi2_level);
+ }
+ else if(pi2_level[0] != 0)
+ {
+ ps_dec->pf_iquant_itrans_recon_chroma_4x4_dc(
+ pi2_level,
+ pu1_pred_sblk,
+ pu1_pred_sblk,
+ u4_recwidth_cr,
+ u4_recwidth_cr,
+ gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpc_rem6],
+ (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[1],
+ u4_scale_u, ai2_tmp, pi2_level);
+ }
+ }
+
+ }
+ }
+
+ pi2_y_coeff += MB_CHROM_SIZE;
+ u2_chroma_csbp = u2_chroma_csbp >> 4;
+ {
+ UWORD32 i;
+ WORD16 ai2_tmp[16];
+ for(i = 0; i < 4; i++)
+ {
+ WORD16 *pi2_level = pi2_y_coeff + (i << 4);
+ UWORD8 *pu1_pred_sblk = pu1_mb_cb_rei1_buffer + 1
+ + ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR)
+ + (i >> 1) * (u4_recwidth_cr << 2);
+ PROFILE_DISABLE_IQ_IT_RECON()
+ {
+ if(CHECKBIT(u2_chroma_csbp, i))
+ {
+ ps_dec->pf_iquant_itrans_recon_chroma_4x4(
+ pi2_level,
+ pu1_pred_sblk,
+ pu1_pred_sblk,
+ u4_recwidth_cr,
+ u4_recwidth_cr,
+ gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpcr_rem6],
+ (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[2],
+ u4_scale_v, ai2_tmp, pi2_level);
+ }
+ else if(pi2_level[0] != 0)
+ {
+ ps_dec->pf_iquant_itrans_recon_chroma_4x4_dc(
+ pi2_level,
+ pu1_pred_sblk,
+ pu1_pred_sblk,
+ u4_recwidth_cr,
+ u4_recwidth_cr,
+ gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpcr_rem6],
+ (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[2],
+ u4_scale_v, ai2_tmp, pi2_level);
+ }
+ }
+ }
+ }
+
+ }
+ else
+ {
+ /* If no inverse transform is needed, pass recon buffer pointer */
+ /* to Intraprediction module instead of pred buffer pointer */
+ {
+ UWORD16 au2_ngbr_pels[33];
+ UWORD8 *pu1_ngbr_pels = (UWORD8 *)au2_ngbr_pels;
+ UWORD16 *pu2_left_uv;
+ UWORD16 *pu2_topleft_uv;
+ pu2_topleft_uv = (UWORD16 *)pu1_u_top_left;
+ pu2_left_uv = (UWORD16 *)pu1_uleft;
+ WORD32 use_left1 = (u2_use_left_mb_pack & 0x0ff);
+ WORD32 use_left2 = (u2_use_left_mb_pack & 0xff00) >> 8;
+
+ /* Get neighbour pixels */
+ /* left pels */
+ if(u2_use_left_mb_pack)
+ {
+ WORD32 i;
+ if(use_left1)
+ {
+ for(i = 0; i < 4; i++)
+ au2_ngbr_pels[8 - 1 - i] = pu2_left_uv[i
+ * u4_recwidth_cr / YUV420SP_FACTOR];
+ }
+ else
+ {
+ memset(au2_ngbr_pels + 4, 0, 4 * sizeof(UWORD16));
+ }
+
+ if(use_left2)
+ {
+ for(i = 4; i < 8; i++)
+ au2_ngbr_pels[8 - 1 - i] = pu2_left_uv[i
+ * u4_recwidth_cr / YUV420SP_FACTOR];
+ }
+ else
+ {
+ memset(au2_ngbr_pels, 0, 4 * sizeof(UWORD16));
+ }
+
+ }
+ else
+ {
+ memset(au2_ngbr_pels, 0, 8 * sizeof(UWORD16));
+ }
+
+ /* top left pels */
+ au2_ngbr_pels[8] = *pu2_topleft_uv;
+
+ /* top pels */
+ if(uc_useTopMB)
+ {
+ memcpy(au2_ngbr_pels + 8 + 1, pu1_top_u,
+ 8 * sizeof(UWORD16));
+ }
+ else
+ {
+ memset(au2_ngbr_pels + 8 + 1, 0, 8 * sizeof(UWORD16));
+ }
+
+ PROFILE_DISABLE_INTRA_PRED()
+ ps_dec->apf_intra_pred_chroma[u1_intra_chrom_pred_mode](
+ pu1_ngbr_pels,
+ pu1_mb_cb_rei1_buffer,
+ 1,
+ u4_recwidth_cr,
+ ((uc_useTopMB << 2) | (use_left2 << 4)
+ | use_left1));
+ }
+
+ }
+
+ }
+ return OK;
+}
diff --git a/decoder/ih264d_process_intra_mb.h b/decoder/ih264d_process_intra_mb.h
new file mode 100755
index 0000000..30d7819
--- /dev/null
+++ b/decoder/ih264d_process_intra_mb.h
@@ -0,0 +1,65 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+ **************************************************************************
+ * \file ih264d_process_intra_mb.h
+ *
+ * \brief
+ * Contains routines that decode a I slice type
+ *
+ * Detailed_description
+ *
+ * \date
+ * 07/07/2003
+ *
+ * \author NS
+ **************************************************************************
+ */
+#ifndef _IH264D_PROCESS_INTRA_MB_H_
+#define _IH264D_PROCESS_INTRA_MB_H_
+
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_structs.h"
+
+#define CHROMA_TO_LUMA_INTRA_MODE(x) (x ^ ( (!(x & 0x01)) << 1))
+#define MB_TYPE_TO_INTRA_16x16_MODE(x) ((x - 1) & 0x03)
+
+UWORD32 ih264d_unpack_luma_coeff4x4_mb(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 intra_flag);
+void ih264d_unpack_chroma_coeff4x4_mb(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info);
+UWORD32 ih264d_unpack_luma_coeff8x8_mb(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info);
+
+WORD32 ih264d_read_intra_pred_modes(dec_struct_t *ps_dec,
+ UWORD8 *pu1_prev_intra4x4_pred_mode_flag,
+ UWORD8 *pu1_rem_intra4x4_pred_mode,
+ UWORD32 u4_trans_form8x8);
+
+WORD32 ih264d_process_intra_mb(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num);
+
+#endif /* _IH264D_PROCESS_INTRA_MB_H_ */
+
diff --git a/decoder/ih264d_process_pslice.c b/decoder/ih264d_process_pslice.c
new file mode 100755
index 0000000..b1230f6
--- /dev/null
+++ b/decoder/ih264d_process_pslice.c
@@ -0,0 +1,1139 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+ **************************************************************************
+ * \file ih264d_process_pslice.c
+ *
+ * \brief
+ * Contains routines that decode a I slice type
+ *
+ * Detailed_description
+ *
+ * \date
+ * 21/12/2002
+ *
+ * \author NS
+ **************************************************************************
+ */
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+
+#include <string.h>
+#include "ih264d_bitstrm.h"
+#include "ih264d_defs.h"
+#include "ih264d_debug.h"
+#include "ih264d_structs.h"
+#include "ih264d_defs.h"
+#include "ih264d_parse_cavlc.h"
+#include "ih264d_mb_utils.h"
+#include "ih264d_deblocking.h"
+#include "ih264d_dpb_manager.h"
+#include "ih264d_mvpred.h"
+#include "ih264d_inter_pred.h"
+#include "ih264d_process_pslice.h"
+#include "ih264d_error_handler.h"
+#include "ih264d_cabac.h"
+#include "ih264d_debug.h"
+#include "ih264d_tables.h"
+#include "ih264d_parse_slice.h"
+#include "ih264d_utils.h"
+#include "ih264d_parse_islice.h"
+#include "ih264d_process_bslice.h"
+#include "ih264d_process_intra_mb.h"
+
+void ih264d_init_cabac_contexts(UWORD8 u1_slice_type, dec_struct_t * ps_dec);
+
+void ih264d_insert_pic_in_ref_pic_listx(struct pic_buffer_t *ps_ref_pic_buf_lx,
+ struct pic_buffer_t *ps_pic)
+{
+ *ps_ref_pic_buf_lx = *ps_pic;
+}
+
+WORD32 ih264d_mv_pred_ref_tfr_nby2_pmb(dec_struct_t * ps_dec,
+ UWORD8 u1_mb_idx,
+ UWORD8 u1_num_mbs)
+{
+ parse_pmbarams_t * ps_mb_part_info;
+ parse_part_params_t * ps_part;
+ mv_pred_t *ps_mv_nmb, *ps_mv_nmb_start, *ps_mv_ntop, *ps_mv_ntop_start;
+ UWORD32 i, j;
+ const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
+ dec_mb_info_t * ps_cur_mb_info;
+ WORD32 i2_mv_x, i2_mv_y;
+ WORD32 ret;
+
+ ps_dec->i4_submb_ofst -= (u1_num_mbs - u1_mb_idx) << 4;
+ ps_mb_part_info = ps_dec->ps_parse_mb_data; // + u1_mb_idx;
+ ps_part = ps_dec->ps_parse_part_params; // + u1_mb_idx;
+
+ /* N/2 Mb MvPred and Transfer Setup Loop */
+ for(i = u1_mb_idx; i < u1_num_mbs; i++, ps_mb_part_info++)
+ {
+ UWORD32 u1_colz;
+ UWORD32 u1_field;
+ mv_pred_t s_mvPred;
+ mv_pred_t *ps_mv_pred = &s_mvPred;
+
+
+
+ *ps_mv_pred = ps_dec->s_default_mv_pred;
+
+ ps_dec->i4_submb_ofst += SUB_BLK_SIZE;
+ ps_dec->u2_wait_id = i;
+
+ /* Restore the slice scratch MbX and MbY context */
+ ps_cur_mb_info = ps_dec->ps_nmb_info + i;
+ u1_field = ps_cur_mb_info->u1_mb_field_decodingflag;
+
+
+
+ ps_mv_nmb_start = ps_dec->ps_mv_cur + (i << 4);
+ ps_dec->u2_mbx = ps_cur_mb_info->u2_mbx;
+ ps_dec->u2_mby = ps_cur_mb_info->u2_mby;
+ ps_dec->u2_mv_2mb[i & 0x1] = 0;
+
+ /* Look for MV Prediction and Reference Transfer in Non-I Mbs */
+ if(!ps_mb_part_info->u1_isI_mb)
+ {
+ UWORD32 u1_blk_no;
+ WORD32 i1_ref_idx, i1_ref_idx1;
+ UWORD32 u1_sub_mb_x, u1_sub_mb_y, u1_sub_mb_num;
+ UWORD32 u1_num_part, u1_num_ref, u1_wd, u1_ht;
+ UWORD32 *pu4_wt_offst, **ppu4_wt_ofst;
+ UWORD32 u1_scale_ref, u4_bot_mb;
+ WORD8 *pi1_ref_idx = ps_mb_part_info->i1_ref_idx[0];
+ pic_buffer_t *ps_ref_frame, **pps_ref_frame;
+ deblk_mb_t * ps_cur_deblk_mb = ps_dec->ps_deblk_mbn + i;
+
+ /* MB Level initialisations */
+ ps_dec->u4_num_pmbair = i >> u1_mbaff;
+ ps_dec->u1_mb_idx_mv = i;
+ ppu4_wt_ofst = ps_mb_part_info->pu4_wt_offst;
+ pps_ref_frame = ps_dec->ps_ref_pic_buf_lx[0];
+ /* CHANGED CODE */
+ ps_mv_ntop_start = ps_mv_nmb_start
+ - (ps_dec->u2_frm_wd_in_mbs << (4 + u1_mbaff)) + 12;
+
+ u1_num_part = ps_mb_part_info->u1_num_part;
+ ps_cur_deblk_mb->u1_mb_type |= (u1_num_part > 1) << 1;
+ ps_cur_mb_info->u4_pred_info_pkd_idx = ps_dec->u4_pred_info_pkd_idx;
+ ps_cur_mb_info->u1_num_pred_parts = 0;
+
+
+ /****************************************************/
+ /* weighted u4_ofst pointer calculations, this loop */
+ /* runs maximum 4 times, even in direct cases */
+ /****************************************************/
+ u1_scale_ref = u1_mbaff & u1_field;
+
+ u4_bot_mb = 1 - ps_cur_mb_info->u1_topmb;
+ if(ps_dec->ps_cur_pps->u1_wted_pred_flag)
+ {
+ u1_num_ref = MIN(u1_num_part, 4);
+ for(u1_blk_no = 0; u1_blk_no < u1_num_ref; u1_blk_no++)
+ {
+ i1_ref_idx = pi1_ref_idx[u1_blk_no];
+ if(u1_scale_ref)
+ i1_ref_idx >>= 1;
+ pu4_wt_offst = (UWORD32*)&ps_dec->pu4_wt_ofsts[2
+ * X3(i1_ref_idx)];
+ ppu4_wt_ofst[u1_blk_no] = pu4_wt_offst;
+ }
+ }
+ else
+ {
+ ppu4_wt_ofst[0] = NULL;
+ ppu4_wt_ofst[1] = NULL;
+ ppu4_wt_ofst[2] = NULL;
+ ppu4_wt_ofst[3] = NULL;
+ }
+
+ /**************************************************/
+ /* Loop on Partitions */
+ /**************************************************/
+ ps_dec->u4_dma_buf_idx = 0;
+
+ for(j = 0; j < u1_num_part; j++, ps_part++)
+ {
+
+ u1_sub_mb_num = ps_part->u1_sub_mb_num;
+ ps_dec->u1_sub_mb_num = u1_sub_mb_num;
+
+ if(PART_NOT_DIRECT != ps_part->u1_is_direct)
+ {
+ /* Mb Skip Mode */
+ /* Setting the default and other members of MvPred Structure */
+ s_mvPred.i2_mv[2] = -1;
+ s_mvPred.i2_mv[3] = -1;
+ s_mvPred.i1_ref_frame[0] = 0;
+ i1_ref_idx = (u1_scale_ref && u4_bot_mb) ? MAX_REF_BUFS : 0;
+ ps_ref_frame = pps_ref_frame[i1_ref_idx];
+ s_mvPred.u1_col_ref_pic_idx = ps_ref_frame->u1_mv_buf_id;
+ s_mvPred.u1_pic_type = ps_ref_frame->u1_pic_type;
+ pu4_wt_offst = (UWORD32*)&ps_dec->pu4_wt_ofsts[0];
+
+ ps_dec->pf_mvpred(ps_dec, ps_cur_mb_info, ps_mv_nmb_start,
+ ps_mv_ntop_start, &s_mvPred, 0, 4, 0, 1,
+ MB_SKIP);
+
+
+
+
+
+
+ {
+ pred_info_pkd_t *ps_pred_pkd;
+ ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
+ ih264d_fill_pred_info (s_mvPred.i2_mv,4,4,0,PRED_L0,ps_pred_pkd,ps_ref_frame->u1_pic_buf_id,
+ (i1_ref_idx >> u1_scale_ref),pu4_wt_offst,
+ ps_ref_frame->u1_pic_type);
+
+
+ ps_dec->u4_pred_info_pkd_idx++;
+ ps_cur_mb_info->u1_num_pred_parts++;
+ }
+
+
+
+ /* Storing colocated zero information */
+ u1_colz = ((ABS(s_mvPred.i2_mv[0]) <= 1)
+ && (ABS(s_mvPred.i2_mv[1]) <= 1))
+ + (u1_field << 1);
+
+ ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb_start, 0,
+ u1_colz, 4, 4);
+ }
+ else
+ {
+ u1_sub_mb_x = u1_sub_mb_num & 0x03;
+ u1_sub_mb_y = u1_sub_mb_num >> 2;
+ u1_blk_no =
+ (u1_num_part < 4) ?
+ j :
+ (((u1_sub_mb_y >> 1) << 1)
+ + (u1_sub_mb_x
+ >> 1));
+
+ ps_mv_ntop = ps_mv_ntop_start + u1_sub_mb_x;
+ ps_mv_nmb = ps_mv_nmb_start + u1_sub_mb_num;
+
+ u1_wd = ps_part->u1_partwidth;
+ u1_ht = ps_part->u1_partheight;
+
+ /* Populate the colpic info and reference frames */
+ i1_ref_idx = pi1_ref_idx[u1_blk_no];
+ s_mvPred.i1_ref_frame[0] = i1_ref_idx;
+
+ /********************************************************/
+ /* Predict Mv */
+ /* Add Mv Residuals and store back */
+ /********************************************************/
+ ps_dec->pf_mvpred(ps_dec, ps_cur_mb_info, ps_mv_nmb, ps_mv_ntop,
+ &s_mvPred, u1_sub_mb_num, u1_wd, 0, 1,
+ ps_cur_mb_info->u1_mb_mc_mode);
+ i2_mv_x = ps_mv_nmb->i2_mv[0];
+ i2_mv_y = ps_mv_nmb->i2_mv[1];
+ i2_mv_x += s_mvPred.i2_mv[0];
+ i2_mv_y += s_mvPred.i2_mv[1];
+ s_mvPred.i2_mv[0] = i2_mv_x;
+ s_mvPred.i2_mv[1] = i2_mv_y;
+
+ /********************************************************/
+ /* Transfer setup call */
+ /* convert RefIdx if it is MbAff */
+ /* Pass Weight Offset and refFrame */
+ /********************************************************/
+ i1_ref_idx1 = i1_ref_idx >> u1_scale_ref;
+ if(u1_scale_ref && ((i1_ref_idx & 0x01) != u4_bot_mb))
+ i1_ref_idx1 += MAX_REF_BUFS;
+ ps_ref_frame = pps_ref_frame[i1_ref_idx1];
+ pu4_wt_offst = ppu4_wt_ofst[u1_blk_no];
+
+
+
+
+
+
+ {
+ pred_info_pkd_t *ps_pred_pkd;
+ ps_pred_pkd = ps_dec->ps_pred_pkd + ps_dec->u4_pred_info_pkd_idx;
+ ih264d_fill_pred_info (s_mvPred.i2_mv,u1_wd,u1_ht,u1_sub_mb_num,PRED_L0,ps_pred_pkd,
+ ps_ref_frame->u1_pic_buf_id,(i1_ref_idx >> u1_scale_ref),pu4_wt_offst,
+ ps_ref_frame->u1_pic_type);
+
+ ps_dec->u4_pred_info_pkd_idx++;
+ ps_cur_mb_info->u1_num_pred_parts++;
+ }
+
+
+
+ /* Fill colocated info in MvPred structure */
+ s_mvPred.u1_col_ref_pic_idx = ps_ref_frame->u1_mv_buf_id;
+ s_mvPred.u1_pic_type = ps_ref_frame->u1_pic_type;
+
+ /* Calculating colocated zero information */
+ u1_colz =
+ (u1_field << 1)
+ | ((i1_ref_idx == 0)
+ && (ABS(i2_mv_x)
+ <= 1)
+ && (ABS(i2_mv_y)
+ <= 1));
+ u1_colz |= ps_mb_part_info->u1_col_info[u1_blk_no];
+
+ /* Replicate the motion vectors and colzero u4_flag */
+ /* for all sub-partitions */
+
+ ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb,
+ u1_sub_mb_num, u1_colz, u1_ht,
+ u1_wd);
+ }
+ }
+
+ }
+ else
+ {
+ /* Storing colocated zero information */
+ ih264d_rep_mv_colz(ps_dec, &s_mvPred, ps_mv_nmb_start, 0,
+ (UWORD8)(u1_field << 1), 4, 4);
+
+ }
+ /*if num _cores is set to 3,compute bs will be done in another thread*/
+ if(ps_dec->u4_num_cores < 3)
+ {
+
+ if(ps_dec->u4_app_disable_deblk_frm == 0)
+ ps_dec->pf_compute_bs(ps_dec, ps_cur_mb_info,
+ (UWORD16)(i >> u1_mbaff));
+ }
+ }
+
+
+
+ return OK;
+}
+
+#if THREAD_PARSE
+
+#else
+WORD32 ih264d_decode_recon_tfr_nmb(dec_struct_t * ps_dec,
+ UWORD8 u1_mb_idx,
+ UWORD8 u1_num_mbs,
+ UWORD8 u1_num_mbs_next,
+ UWORD8 u1_tfr_n_mb,
+ UWORD8 u1_end_of_row)
+{
+ WORD32 i,j;
+ UWORD32 u1_end_of_row_next;
+ dec_mb_info_t * ps_cur_mb_info;
+ UWORD32 u4_update_mbaff = 0;
+ WORD32 ret;
+ const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
+ const UWORD32 u1_slice_type = ps_dec->ps_cur_slice->u1_slice_type;
+ const WORD32 u1_skip_th = (
+ (u1_slice_type != I_SLICE) ?
+ (ps_dec->u1_B ? B_8x8 : PRED_8x8R0) : -1);
+ const UWORD32 u1_ipcm_th = (
+ (u1_slice_type != I_SLICE) ? (ps_dec->u1_B ? 23 : 5) : 0);
+
+
+
+
+
+ /* N Mb MC Loop */
+ for(i = u1_mb_idx; i < u1_num_mbs; i++)
+ {
+ ps_cur_mb_info = ps_dec->ps_nmb_info + i;
+ ps_dec->u4_dma_buf_idx = 0;
+ ps_dec->u4_pred_info_idx = 0;
+
+ if(ps_cur_mb_info->u1_mb_type <= u1_skip_th)
+ {
+ {
+ WORD32 pred_cnt = 0;
+ pred_info_pkd_t *ps_pred_pkd;
+ UWORD32 u4_pred_info_pkd_idx;
+ WORD8 i1_pred;
+
+ u4_pred_info_pkd_idx = ps_cur_mb_info->u4_pred_info_pkd_idx;
+
+ while(pred_cnt < ps_cur_mb_info->u1_num_pred_parts)
+ {
+
+ ps_pred_pkd = ps_dec->ps_pred_pkd + u4_pred_info_pkd_idx;
+
+ ps_dec->p_form_mb_part_info(ps_pred_pkd,ps_dec,
+ ps_cur_mb_info->u2_mbx,ps_cur_mb_info->u2_mby,(i >> u1_mbaff),
+ ps_cur_mb_info);
+ u4_pred_info_pkd_idx++;
+ pred_cnt++;
+ }
+ }
+
+ ps_dec->p_motion_compensate(ps_dec, ps_cur_mb_info);
+
+ }
+ else if(ps_cur_mb_info->u1_mb_type == MB_SKIP)
+ {
+ {
+ WORD32 pred_cnt = 0;
+ pred_info_pkd_t *ps_pred_pkd;
+ UWORD32 u4_pred_info_pkd_idx;
+ WORD8 i1_pred;
+
+ u4_pred_info_pkd_idx = ps_cur_mb_info->u4_pred_info_pkd_idx;
+
+ while(pred_cnt < ps_cur_mb_info->u1_num_pred_parts)
+ {
+
+ ps_pred_pkd = ps_dec->ps_pred_pkd + u4_pred_info_pkd_idx;
+
+ ps_dec->p_form_mb_part_info(ps_pred_pkd,ps_dec,
+ ps_cur_mb_info->u2_mbx,ps_cur_mb_info->u2_mby,(i >> u1_mbaff),
+ ps_cur_mb_info);
+
+ u4_pred_info_pkd_idx++;
+ pred_cnt++;
+ }
+ }
+ /* Decode MB skip */
+ ps_dec->p_motion_compensate(ps_dec, ps_cur_mb_info);
+
+ }
+
+ }
+
+
+ /* N Mb IQ IT RECON Loop */
+ for(j = u1_mb_idx; j < i; j++)
+ {
+ ps_cur_mb_info = ps_dec->ps_nmb_info + j;
+
+ if(ps_cur_mb_info->u1_mb_type <= u1_skip_th)
+ {
+ ih264d_process_inter_mb(ps_dec, ps_cur_mb_info, j);
+
+ }
+ else if(ps_cur_mb_info->u1_mb_type != MB_SKIP)
+ {
+ if((u1_ipcm_th + 25) != ps_cur_mb_info->u1_mb_type)
+ {
+ ps_cur_mb_info->u1_mb_type -= (u1_skip_th + 1);
+ ret = ih264d_process_intra_mb(ps_dec, ps_cur_mb_info, j);
+ if(ret != OK)
+ return ret;
+ }
+ }
+
+ if(ps_dec->u4_mb_level_deblk == 1)
+ {
+ ih264d_deblock_mb_level(ps_dec, ps_cur_mb_info, j);
+
+ }
+
+ if(u1_mbaff)
+ {
+ if(u4_update_mbaff)
+ {
+ UWORD32 u4_mb_num = ps_cur_mb_info->u2_mbx
+ + ps_dec->u2_frm_wd_in_mbs
+ * (ps_cur_mb_info->u2_mby >> 1);
+ UPDATE_MB_MAP_MBNUM_BYTE(ps_dec->pu1_recon_mb_map, u4_mb_num);
+ u4_update_mbaff = 0;
+ }
+ else
+ {
+ u4_update_mbaff = 1;
+ }
+ }
+ else
+ {
+ UWORD32 u4_mb_num = ps_cur_mb_info->u2_mbx
+ + ps_dec->u2_frm_wd_in_mbs * ps_cur_mb_info->u2_mby;
+ UPDATE_MB_MAP_MBNUM_BYTE(ps_dec->pu1_recon_mb_map, u4_mb_num);
+ }
+ }
+
+
+ if(u1_tfr_n_mb)
+ {
+ /****************************************************************/
+ /* Check for End Of Row in Next iteration */
+ /****************************************************************/
+ u1_end_of_row_next =
+ u1_num_mbs_next
+ && (u1_num_mbs_next
+ <= (ps_dec->u1_recon_mb_grp
+ >> u1_mbaff));
+
+ /****************************************************************/
+ /* Transfer the Following things */
+ /* N-Mb DeblkParams Data ( To Ext DeblkParams Buffer ) */
+ /* N-Mb Recon Data ( To Ext Frame Buffer ) */
+ /* N-Mb Intrapredline Data ( Updated Internally) */
+ /* N-Mb MV Data ( To Ext MV Buffer ) */
+ /* N-Mb MVTop/TopRight Data ( To Int MV Top Scratch Buffers) */
+ /****************************************************************/
+ ih264d_transfer_mb_group_data(ps_dec, u1_num_mbs, u1_end_of_row,
+ u1_end_of_row_next);
+ ps_dec->u4_num_mbs_prev_nmb = u1_num_mbs;
+
+ if(u1_end_of_row)
+ {
+ /* Reset the N-Mb Recon Buf Index to default Values */
+ ps_dec->u2_mb_group_cols_y1 = ps_dec->u2_mb_group_cols_y;
+ ps_dec->u2_mb_group_cols_cr1 = ps_dec->u2_mb_group_cols_cr;
+ }
+ /* If next N-Mb Group is the EndOfRow, set the N-Mb Recon Buf Index */
+ else if(u1_end_of_row_next)
+ {
+ ps_dec->u2_mb_group_cols_y1 = (u1_num_mbs_next << 4) + 8;
+ ps_dec->u2_mb_group_cols_cr1 = (u1_num_mbs_next << 3) + 8;
+ }
+ ps_dec->u4_pred_info_idx = 0;
+ ps_dec->u4_dma_buf_idx = 0;
+
+
+ }
+ return OK;
+}
+#endif
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_process_inter_mb \endif
+ *
+ * \brief
+ * This function decodes an Inter MB.
+ *
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ **************************************************************************
+ */
+WORD32 ih264d_process_inter_mb(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num)
+{
+ /* CHANGED CODE */
+ UWORD8 *pu1_rec_y, *pu1_rec_u, *pu1_rec_v;
+
+ /*CHANGED CODE */
+ UWORD32 ui_rec_width, u4_recwidth_cr;
+ WORD16 *pi2_y_coeff;
+ UWORD32 u1_mb_field_decoding_flag;
+ const UWORD8 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
+ UWORD32 uc_botMb;
+ UWORD32 u4_num_pmbair;
+ /* CHANGED CODE */
+ tfr_ctxt_t *ps_frame_buf = &ps_dec->s_tran_addrecon;
+ UWORD32 u4_luma_dc_only_csbp = 0;
+ UWORD32 u4_luma_dc_only_cbp = 0;
+ /* CHANGED CODE */
+
+ uc_botMb = 1 - ps_cur_mb_info->u1_topmb;
+ u4_num_pmbair = (u1_mb_num >> u1_mbaff);
+ u1_mb_field_decoding_flag = ps_cur_mb_info->u1_mb_field_decodingflag;
+
+
+ /* CHANGED CODE */
+ pu1_rec_y = ps_frame_buf->pu1_dest_y + (u4_num_pmbair << 4);
+ pu1_rec_u =
+ ps_frame_buf->pu1_dest_u
+ + (u4_num_pmbair << 3) * YUV420SP_FACTOR;
+ pu1_rec_v = ps_frame_buf->pu1_dest_v + (u4_num_pmbair << 3);
+ ui_rec_width = ps_dec->u2_frm_wd_y << u1_mb_field_decoding_flag;
+ u4_recwidth_cr = ps_dec->u2_frm_wd_uv << u1_mb_field_decoding_flag;
+
+ /* CHANGED CODE */
+
+ if(u1_mbaff)
+ {
+ if(uc_botMb)
+ {
+ pu1_rec_y += (u1_mb_field_decoding_flag ?
+ (ui_rec_width >> 1) : (ui_rec_width << 4));
+ pu1_rec_u += (u1_mb_field_decoding_flag ?
+ (u4_recwidth_cr >> 1) : (u4_recwidth_cr << 3));
+ pu1_rec_v += (u1_mb_field_decoding_flag ?
+ (u4_recwidth_cr >> 1) : (u4_recwidth_cr << 3));
+ }
+ }
+
+ if(!ps_cur_mb_info->u1_tran_form8x8)
+ {
+ u4_luma_dc_only_csbp = ih264d_unpack_luma_coeff4x4_mb(ps_dec,
+ ps_cur_mb_info,
+ 0);
+ }
+ else
+ {
+ if(!ps_dec->ps_cur_pps->u1_entropy_coding_mode)
+ {
+ u4_luma_dc_only_cbp = ih264d_unpack_luma_coeff4x4_mb(ps_dec,
+ ps_cur_mb_info,
+ 0);
+ }
+ else
+ {
+ u4_luma_dc_only_cbp = ih264d_unpack_luma_coeff8x8_mb(ps_dec,
+ ps_cur_mb_info);
+ }
+ }
+
+ pi2_y_coeff = ps_dec->pi2_coeff_data;
+ /* Inverse Transform and Reconstruction */
+ if(ps_cur_mb_info->u1_cbp & 0x0f)
+ {
+ /* CHANGED CODE */
+ if(!ps_cur_mb_info->u1_tran_form8x8)
+ {
+ UWORD32 i;
+ WORD16 ai2_tmp[16];
+ for(i = 0; i < 16; i++)
+ {
+ if(CHECKBIT(ps_cur_mb_info->u2_luma_csbp, i))
+ {
+ WORD16 *pi2_level = pi2_y_coeff + (i << 4);
+ UWORD8 *pu1_pred_sblk = pu1_rec_y + ((i & 0x3) * BLK_SIZE)
+ + (i >> 2) * (ui_rec_width << 2);
+ PROFILE_DISABLE_IQ_IT_RECON()
+ {
+ if(CHECKBIT(u4_luma_dc_only_csbp, i))
+ {
+ ps_dec->pf_iquant_itrans_recon_luma_4x4_dc(
+ pi2_level,
+ pu1_pred_sblk,
+ pu1_pred_sblk,
+ ui_rec_width,
+ ui_rec_width,
+ gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6],
+ (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[3],
+ ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0,
+ NULL);
+ }
+ else
+ {
+ ps_dec->pf_iquant_itrans_recon_luma_4x4(
+ pi2_level,
+ pu1_pred_sblk,
+ pu1_pred_sblk,
+ ui_rec_width,
+ ui_rec_width,
+ gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qp_rem6],
+ (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[3],
+ ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0,
+ NULL);
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ WORD16 *pi2_scale_matrix_ptr;
+ WORD32 i;
+
+ pi2_scale_matrix_ptr =
+ ps_dec->s_high_profile.i2_scalinglist8x8[1];
+
+ for(i = 0; i < 4; i++)
+ {
+ WORD16 ai2_tmp[64];
+ WORD16 *pi16_levelBlock = pi2_y_coeff + (i << 6); /* move to the next 8x8 adding 64 */
+
+ UWORD8 *pu1_pred_sblk = pu1_rec_y + ((i & 0x1) * BLK8x8SIZE)
+ + (i >> 1) * (ui_rec_width << 3);
+ if(CHECKBIT(ps_cur_mb_info->u1_cbp, i))
+ {
+ PROFILE_DISABLE_IQ_IT_RECON()
+ {
+ if(CHECKBIT(u4_luma_dc_only_cbp, i))
+ {
+ ps_dec->pf_iquant_itrans_recon_luma_8x8_dc(
+ pi16_levelBlock,
+ pu1_pred_sblk,
+ pu1_pred_sblk,
+ ui_rec_width,
+ ui_rec_width,
+ gau1_ih264d_dequant8x8_cavlc[ps_cur_mb_info->u1_qp_rem6],
+ (UWORD16 *)pi2_scale_matrix_ptr,
+ ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0,
+ NULL);
+ }
+ else
+ {
+ ps_dec->pf_iquant_itrans_recon_luma_8x8(
+ pi16_levelBlock,
+ pu1_pred_sblk,
+ pu1_pred_sblk,
+ ui_rec_width,
+ ui_rec_width,
+ gau1_ih264d_dequant8x8_cavlc[ps_cur_mb_info->u1_qp_rem6],
+ (UWORD16 *)pi2_scale_matrix_ptr,
+ ps_cur_mb_info->u1_qp_div6, ai2_tmp, 0,
+ NULL);
+ }
+ }
+ }
+ }
+
+ }
+ }
+
+ /* Decode Chroma Block */
+ ih264d_unpack_chroma_coeff4x4_mb(ps_dec,
+ ps_cur_mb_info);
+ /*--------------------------------------------------------------------*/
+ /* Chroma Blocks decoding */
+ /*--------------------------------------------------------------------*/
+ {
+ UWORD8 u1_chroma_cbp = (UWORD8)(ps_cur_mb_info->u1_cbp >> 4);
+
+ if(u1_chroma_cbp != CBPC_ALLZERO)
+ {
+ UWORD32 u4_scale_u = ps_cur_mb_info->u1_qpc_div6;
+ UWORD32 u4_scale_v = ps_cur_mb_info->u1_qpcr_div6;
+ UWORD16 u2_chroma_csbp = ps_cur_mb_info->u2_chroma_csbp;
+
+ pi2_y_coeff = ps_dec->pi2_coeff_data;
+
+ {
+ UWORD32 i;
+ WORD16 ai2_tmp[16];
+ for(i = 0; i < 4; i++)
+ {
+ WORD16 *pi2_level = pi2_y_coeff + (i << 4);
+ UWORD8 *pu1_pred_sblk = pu1_rec_u
+ + ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR)
+ + (i >> 1) * (u4_recwidth_cr << 2);
+ PROFILE_DISABLE_IQ_IT_RECON()
+ {
+ if(CHECKBIT(u2_chroma_csbp, i))
+ {
+ ps_dec->pf_iquant_itrans_recon_chroma_4x4(
+ pi2_level,
+ pu1_pred_sblk,
+ pu1_pred_sblk,
+ u4_recwidth_cr,
+ u4_recwidth_cr,
+ gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpc_rem6],
+ (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[4],
+ u4_scale_u, ai2_tmp, pi2_level);
+ }
+ else if(pi2_level[0] != 0)
+ {
+ ps_dec->pf_iquant_itrans_recon_chroma_4x4_dc(
+ pi2_level,
+ pu1_pred_sblk,
+ pu1_pred_sblk,
+ u4_recwidth_cr,
+ u4_recwidth_cr,
+ gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpc_rem6],
+ (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[4],
+ u4_scale_u, ai2_tmp, pi2_level);
+ }
+ }
+ }
+ }
+
+ pi2_y_coeff += MB_CHROM_SIZE;
+ u2_chroma_csbp >>= 4;
+
+ {
+ UWORD32 i;
+ WORD16 ai2_tmp[16];
+ for(i = 0; i < 4; i++)
+ {
+ WORD16 *pi2_level = pi2_y_coeff + (i << 4);
+ UWORD8 *pu1_pred_sblk = pu1_rec_u + 1
+ + ((i & 0x1) * BLK_SIZE * YUV420SP_FACTOR)
+ + (i >> 1) * (u4_recwidth_cr << 2);
+ PROFILE_DISABLE_IQ_IT_RECON()
+ {
+ if(CHECKBIT(u2_chroma_csbp, i))
+ {
+ ps_dec->pf_iquant_itrans_recon_chroma_4x4(
+ pi2_level,
+ pu1_pred_sblk,
+ pu1_pred_sblk,
+ u4_recwidth_cr,
+ u4_recwidth_cr,
+ gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpcr_rem6],
+ (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[5],
+ u4_scale_v, ai2_tmp, pi2_level);
+ }
+ else if(pi2_level[0] != 0)
+ {
+ ps_dec->pf_iquant_itrans_recon_chroma_4x4_dc(
+ pi2_level,
+ pu1_pred_sblk,
+ pu1_pred_sblk,
+ u4_recwidth_cr,
+ u4_recwidth_cr,
+ gau2_ih264_iquant_scale_4x4[ps_cur_mb_info->u1_qpcr_rem6],
+ (UWORD16 *)ps_dec->s_high_profile.i2_scalinglist4x4[5],
+ u4_scale_v, ai2_tmp, pi2_level);
+ }
+ }
+ }
+ }
+ }
+ }
+ return (0);
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_parse_pred_weight_table \endif
+ *
+ * \brief
+ * Implements pred_weight_table() of 7.3.3.2.
+ *
+ * \return
+ * None
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_parse_pred_weight_table(dec_slice_params_t * ps_cur_slice,
+ dec_bit_stream_t * ps_bitstrm)
+{
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
+ WORD8 i, cont, lx;
+ UWORD8 uc_weight_flag;
+ UWORD32 *pui32_weight_offset_lx;
+ WORD16 c_weight, c_offset;
+ UWORD32 ui32_y_def_weight_ofst, ui32_cr_def_weight_ofst;
+ UWORD32 ui32_temp;
+ UWORD8 uc_luma_log2_weight_denom;
+ UWORD8 uc_chroma_log2_weight_denom;
+
+ /* Variables for error resilience checks */
+ UWORD32 u4_temp;
+ WORD32 i_temp;
+
+ u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if(u4_temp & MASK_LOG2_WEIGHT_DENOM)
+ {
+ return ERROR_PRED_WEIGHT_TABLE_T;
+ }
+ uc_luma_log2_weight_denom = u4_temp;
+ COPYTHECONTEXT("SH: luma_log2_weight_denom",uc_luma_log2_weight_denom);
+ ui32_y_def_weight_ofst = (1 << uc_luma_log2_weight_denom);
+
+ u4_temp = ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if(u4_temp & MASK_LOG2_WEIGHT_DENOM)
+ {
+ return ERROR_PRED_WEIGHT_TABLE_T;
+ }
+ uc_chroma_log2_weight_denom = u4_temp;
+ COPYTHECONTEXT("SH: chroma_log2_weight_denom",uc_chroma_log2_weight_denom);
+ ui32_cr_def_weight_ofst = (1 << uc_chroma_log2_weight_denom);
+
+ ps_cur_slice->u2_log2Y_crwd = uc_luma_log2_weight_denom
+ | (uc_chroma_log2_weight_denom << 8);
+
+ cont = (ps_cur_slice->u1_slice_type == B_SLICE);
+ lx = 0;
+ do
+ {
+ for(i = 0; i < ps_cur_slice->u1_num_ref_idx_lx_active[lx]; i++)
+ {
+ pui32_weight_offset_lx = ps_cur_slice->u4_wt_ofst_lx[lx][i];
+
+ uc_weight_flag = ih264d_get_bit_h264(ps_bitstrm);
+ pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ COPYTHECONTEXT("SH: luma_weight_l0_flag",uc_weight_flag);
+ if(uc_weight_flag)
+ {
+ i_temp = ih264d_sev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ if((i_temp + 128) & MASK_PRED_WEIGHT_OFFSET)
+ return ERROR_PRED_WEIGHT_TABLE_T;
+ c_weight = i_temp;
+ COPYTHECONTEXT("SH: luma_weight_l0",c_weight);
+
+ i_temp = ih264d_sev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ if((i_temp + 128) & MASK_PRED_WEIGHT_OFFSET)
+ return ERROR_PRED_WEIGHT_TABLE_T;
+ c_offset = i_temp;
+ COPYTHECONTEXT("SH: luma_offset_l0",c_offset);
+
+ ui32_temp = (c_offset << 16) | (c_weight & 0xFFFF);
+ pui32_weight_offset_lx[0] = ui32_temp;
+ }
+ else
+ {
+
+ pui32_weight_offset_lx[0] = ui32_y_def_weight_ofst;
+ }
+
+ {
+ WORD8 c_weightCb, c_weightCr, c_offsetCb, c_offsetCr;
+ uc_weight_flag = ih264d_get_bit_h264(ps_bitstrm);
+ pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ COPYTHECONTEXT("SH: chroma_weight_l0_flag",uc_weight_flag);
+ if(uc_weight_flag)
+ {
+ i_temp = ih264d_sev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ if((i_temp + 128) & MASK_PRED_WEIGHT_OFFSET)
+ return ERROR_PRED_WEIGHT_TABLE_T;
+ c_weightCb = i_temp;
+ COPYTHECONTEXT("SH: chroma_weight_l0",c_weightCb);
+
+ i_temp = ih264d_sev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ if((i_temp + 128) & MASK_PRED_WEIGHT_OFFSET)
+ return ERROR_PRED_WEIGHT_TABLE_T;
+ c_offsetCb = i_temp;
+ COPYTHECONTEXT("SH: chroma_weight_l0",c_offsetCb);
+
+ ui32_temp = (c_offsetCb << 16) | (c_weightCb & 0xFFFF);
+ pui32_weight_offset_lx[1] = ui32_temp;
+
+ i_temp = ih264d_sev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ if((i_temp + 128) & MASK_PRED_WEIGHT_OFFSET)
+ return ERROR_PRED_WEIGHT_TABLE_T;
+ c_weightCr = i_temp;
+ COPYTHECONTEXT("SH: chroma_weight_l0",c_weightCr);
+
+ i_temp = ih264d_sev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ if((i_temp + 128) & MASK_PRED_WEIGHT_OFFSET)
+ return ERROR_PRED_WEIGHT_TABLE_T;
+ c_offsetCr = i_temp;
+ COPYTHECONTEXT("SH: chroma_weight_l0",c_offsetCr);
+
+ ui32_temp = (c_offsetCr << 16) | (c_weightCr & 0xFFFF);
+ pui32_weight_offset_lx[2] = ui32_temp;
+ }
+ else
+ {
+ pui32_weight_offset_lx[1] = ui32_cr_def_weight_ofst;
+ pui32_weight_offset_lx[2] = ui32_cr_def_weight_ofst;
+ }
+ }
+ }
+ lx++;
+ }
+ while(cont--);
+
+ return OK;
+}
+
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_init_ref_idx_lx_p */
+/* */
+/* Description : This function initializes the reference picture L0 list */
+/* for P slices as per section 8.2.4.2.1 and 8.2.4.2.2. */
+/* */
+/* Inputs : pointer to ps_dec struture */
+/* Globals : NO */
+/* Processing : arranges all the short term pictures according to */
+/* pic_num in descending order starting from curr pic_num. */
+/* and inserts it in L0 list followed by all Long term */
+/* pictures in ascending order. */
+/* */
+/* Returns : void */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 13 07 2002 Jay Draft */
+/* */
+/*****************************************************************************/
+void ih264d_init_ref_idx_lx_p(dec_struct_t *ps_dec)
+{
+ struct pic_buffer_t *ps_ref_pic_buf_lx;
+ dpb_manager_t *ps_dpb_mgr;
+ struct dpb_info_t *ps_next_dpb;
+ WORD8 i;
+ UWORD8 u1_max_lt_index, u1_min_lt_index, u1_lt_index;
+ UWORD8 u1_field_pic_flag;
+ dec_slice_params_t *ps_cur_slice;
+ UWORD8 u1_L0;
+ WORD32 i4_cur_pic_num, i4_min_st_pic_num;
+ WORD32 i4_temp_pic_num, i4_ref_pic_num;
+ UWORD8 u1_num_short_term_bufs;
+ UWORD8 u1_max_ref_idx_l0;
+
+ ps_cur_slice = ps_dec->ps_cur_slice;
+ u1_field_pic_flag = ps_cur_slice->u1_field_pic_flag;
+ u1_max_ref_idx_l0 = ps_cur_slice->u1_num_ref_idx_lx_active[0]
+ << u1_field_pic_flag;
+
+ ps_dpb_mgr = ps_dec->ps_dpb_mgr;
+ /* Get the current frame number */
+ i4_cur_pic_num = ps_dec->ps_cur_pic->i4_pic_num;
+
+ /* Get Min pic_num,MinLt */
+ i4_min_st_pic_num = i4_cur_pic_num;
+ u1_max_lt_index = MAX_REF_BUFS + 1;
+ u1_min_lt_index = MAX_REF_BUFS + 1;
+
+ /* Start from ST head */
+ ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
+ for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++)
+ {
+ i4_ref_pic_num = ps_next_dpb->ps_pic_buf->i4_pic_num;
+ if(i4_ref_pic_num < i4_cur_pic_num)
+ {
+ /* RefPic Buf pic_num is before Current pic_num in decode order */
+ i4_min_st_pic_num = MIN(i4_min_st_pic_num, i4_ref_pic_num);
+ }
+
+ /* Chase the next link */
+ ps_next_dpb = ps_next_dpb->ps_prev_short;
+ }
+
+ /* Start from LT head */
+ ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head;
+ if(ps_next_dpb)
+ {
+ u1_max_lt_index = ps_next_dpb->u1_lt_idx;
+ u1_min_lt_index = ps_next_dpb->u1_lt_idx;
+
+ for(i = 0; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++)
+ {
+ u1_lt_index = ps_next_dpb->u1_lt_idx;
+ u1_max_lt_index = (UWORD8)(MAX(u1_max_lt_index, u1_lt_index));
+ u1_min_lt_index = (UWORD8)(MIN(u1_min_lt_index, u1_lt_index));
+
+ /* Chase the next link */
+ ps_next_dpb = ps_next_dpb->ps_prev_long;
+ }
+ }
+ /* 1. Initialize refIdxL0 */
+ u1_L0 = 0;
+ if(u1_field_pic_flag)
+ {
+ ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[0][0];
+ ps_ref_pic_buf_lx += MAX_REF_BUFS;
+ i4_temp_pic_num = i4_cur_pic_num;
+ }
+ else
+ {
+ ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[0][0];
+ i4_temp_pic_num = i4_cur_pic_num;
+ }
+
+ /* Arrange all short term buffers in output order as given by pic_num */
+ /* Arrange pic_num's less than Curr pic_num in the descending pic_num */
+ /* order starting from (Curr pic_num - 1) */
+ for(; i4_temp_pic_num >= i4_min_st_pic_num; i4_temp_pic_num--)
+ {
+ /* Start from ST head */
+ ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
+ for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++)
+ {
+ if((WORD32)ps_next_dpb->ps_pic_buf->i4_pic_num == i4_temp_pic_num)
+ {
+ /* Copy info in pic buffer */
+ ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx,
+ ps_next_dpb->ps_pic_buf);
+ ps_ref_pic_buf_lx++;
+ u1_L0++;
+ break;
+ }
+ ps_next_dpb = ps_next_dpb->ps_prev_short;
+ }
+ }
+
+ /* Arrange all Long term buffers in ascending order, in LongtermIndex */
+ /* Start from LT head */
+ u1_num_short_term_bufs = u1_L0;
+ for(u1_lt_index = u1_min_lt_index; u1_lt_index <= u1_max_lt_index;
+ u1_lt_index++)
+ {
+ ps_next_dpb = ps_dpb_mgr->ps_dpb_ht_head;
+ for(i = 0; i < ps_dpb_mgr->u1_num_lt_ref_bufs; i++)
+ {
+ if(ps_next_dpb->u1_lt_idx == u1_lt_index)
+ {
+ ih264d_insert_pic_in_ref_pic_listx(ps_ref_pic_buf_lx,
+ ps_next_dpb->ps_pic_buf);
+
+ ps_ref_pic_buf_lx->u1_long_term_pic_num =
+ ps_ref_pic_buf_lx->u1_long_term_frm_idx;
+ ps_ref_pic_buf_lx++;
+ u1_L0++;
+ break;
+ }
+ ps_next_dpb = ps_next_dpb->ps_prev_long;
+ }
+ }
+
+ if(u1_field_pic_flag)
+ {
+ /* Initialize the rest of the entries in the */
+ /* reference list to handle of errors */
+ {
+ UWORD8 u1_i;
+ pic_buffer_t *ps_ref_pic;
+
+ ps_ref_pic = ps_dpb_mgr->ps_init_dpb[0][0] + MAX_REF_BUFS;
+
+ if(NULL == ps_ref_pic->pu1_buf1)
+ {
+ ps_ref_pic = ps_dec->ps_cur_pic;
+ }
+ for(u1_i = u1_L0; u1_i < u1_max_ref_idx_l0; u1_i++)
+ {
+ *ps_ref_pic_buf_lx = *ps_ref_pic;
+ ps_ref_pic_buf_lx++;
+ }
+ }
+
+ ih264d_convert_frm_to_fld_list(
+ ps_dpb_mgr->ps_init_dpb[0][0] + MAX_REF_BUFS, &u1_L0,
+ ps_dec, u1_num_short_term_bufs);
+
+ ps_ref_pic_buf_lx = ps_dpb_mgr->ps_init_dpb[0][0] + u1_L0;
+ }
+
+ /* Initialize the rest of the entries in the */
+ /* reference list to handle of errors */
+ {
+ UWORD8 u1_i;
+ pic_buffer_t *ps_ref_pic;
+
+ ps_ref_pic = ps_dpb_mgr->ps_init_dpb[0][0];
+
+ if(NULL == ps_ref_pic->pu1_buf1)
+ {
+ ps_ref_pic = ps_dec->ps_cur_pic;
+ }
+ for(u1_i = u1_L0; u1_i < u1_max_ref_idx_l0; u1_i++)
+ {
+ *ps_ref_pic_buf_lx = *ps_ref_pic;
+ ps_ref_pic_buf_lx++;
+ }
+ }
+ ps_dec->ps_cur_slice->u1_initial_list_size[0] = u1_L0;
+}
+
diff --git a/decoder/ih264d_process_pslice.h b/decoder/ih264d_process_pslice.h
new file mode 100755
index 0000000..8740eb4
--- /dev/null
+++ b/decoder/ih264d_process_pslice.h
@@ -0,0 +1,69 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef _IH264D_PROCESS_PSLICE_H_
+#define _IH264D_PROCESS_PSLICE_H_
+/*!
+**************************************************************************
+* \file ih264d_process_pslice.h
+*
+* \brief
+* Contains declarations of routines that decode a P slice type
+*
+* Detailed_description
+*
+* \date
+* 21/12/2002
+*
+* \author NS
+**************************************************************************
+*/
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_structs.h"
+WORD32 ih264d_parse_pslice(dec_struct_t *ps_dec,
+ UWORD16 u2_first_mb_in_slice);
+WORD32 ih264d_parse_pred_weight_table(dec_slice_params_t * ps_cur_slice,
+ dec_bit_stream_t * ps_bitstrm);
+
+WORD32 parsePSliceData(dec_struct_t * ps_dec,
+ dec_slice_params_t * ps_slice,
+ UWORD16 u2_first_mb_in_slice);
+
+WORD32 ih264d_process_inter_mb(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num);
+
+void ih264d_init_ref_idx_lx_p(dec_struct_t *ps_dec);
+
+WORD32 ih264d_mv_pred_ref_tfr_nby2_pmb(dec_struct_t * ps_dec,
+ UWORD8 u1_num_mbs,
+ UWORD8 u1_num_mbsNby2);
+
+WORD32 ih264d_decode_recon_tfr_nmb(dec_struct_t * ps_dec,
+ UWORD8 u1_mb_idx,
+ UWORD8 u1_num_mbs,
+ UWORD8 u1_num_mbs_next,
+ UWORD8 u1_tfr_n_mb,
+ UWORD8 u1_end_of_row);
+
+void ih264d_insert_pic_in_ref_pic_listx(struct pic_buffer_t *ps_ref_pic_buf_lx,
+ struct pic_buffer_t *ps_pic);
+#endif /* _IH264D_PROCESS_PSLICE_H_ */
diff --git a/decoder/ih264d_quant_scaling.c b/decoder/ih264d_quant_scaling.c
new file mode 100755
index 0000000..fa9aeb5
--- /dev/null
+++ b/decoder/ih264d_quant_scaling.c
@@ -0,0 +1,274 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_bitstrm.h"
+#include "ih264d_structs.h"
+#include "ih264d_parse_cavlc.h"
+#include "ih264d_defs.h"
+#include "ih264d_defs.h"
+#include "ih264d_defs.h"
+
+#include "ih264d_parse_slice.h"
+#include "ih264d_tables.h"
+#include "ih264d_utils.h"
+#include "ih264d_nal.h"
+#include "ih264d_deblocking.h"
+
+#include "ih264d_mem_request.h"
+#include "ih264d_debug.h"
+
+#include "ih264d_error_handler.h"
+#include "ih264d_mb_utils.h"
+#include "ih264d_sei.h"
+#include "ih264d_vui.h"
+#include "ih264d_tables.h"
+
+#define IDCT_BLOCK_WIDTH8X8 8
+
+void ih264d_scaling_list(WORD16 *pi2_scaling_list,
+ WORD32 i4_size_of_scalinglist,
+ UWORD8 *pu1_use_default_scaling_matrix_flag,
+ dec_bit_stream_t *ps_bitstrm)
+{
+ WORD32 i4_j, i4_delta_scale, i4_lastScale = 8, i4_nextScale = 8;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
+
+ *pu1_use_default_scaling_matrix_flag = 0;
+
+ for(i4_j = 0; i4_j < i4_size_of_scalinglist; i4_j++)
+ {
+ if(i4_nextScale != 0)
+ {
+ i4_delta_scale = ih264d_sev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+
+ i4_nextScale = ((i4_lastScale + i4_delta_scale + 256) & 0xff);
+
+ *pu1_use_default_scaling_matrix_flag = ((i4_j == 0)
+ && (i4_nextScale == 0));
+
+ }
+ pi2_scaling_list[i4_j] =
+ (i4_nextScale == 0) ? (i4_lastScale) : (i4_nextScale);
+ i4_lastScale = pi2_scaling_list[i4_j];
+ }
+}
+
+void ih264d_form_default_scaling_matrix(dec_struct_t *ps_dec)
+{
+
+ /*************************************************************************/
+ /* perform the inverse scanning for the frame and field scaling matrices */
+ /*************************************************************************/
+ {
+ UWORD8 *pu1_inv_scan;
+ WORD32 i4_i, i4_j;
+
+ pu1_inv_scan = (UWORD8 *)gau1_ih264d_inv_scan;
+
+ /* for all 4x4 matrices */
+ for(i4_i = 0; i4_i < 6; i4_i++)
+ {
+ for(i4_j = 0; i4_j < 16; i4_j++)
+ {
+ ps_dec->s_high_profile.i2_scalinglist4x4[i4_i][pu1_inv_scan[i4_j]] =
+ 16;
+
+ }
+ }
+
+ /* for all 8x8 matrices */
+ for(i4_i = 0; i4_i < 2; i4_i++)
+ {
+ for(i4_j = 0; i4_j < 64; i4_j++)
+ {
+ ps_dec->s_high_profile.i2_scalinglist8x8[i4_i][gau1_ih264d_inv_scan_prog8x8_cabac[i4_j]] =
+ 16;
+
+ }
+ }
+ }
+}
+
+void ih264d_form_scaling_matrix_picture(dec_seq_params_t *ps_seq,
+ dec_pic_params_t *ps_pic,
+ dec_struct_t *ps_dec)
+{
+ /* default scaling matrices */
+ WORD32 i4_i;
+
+ /* check the SPS first */
+ if(ps_seq->i4_seq_scaling_matrix_present_flag)
+ {
+ for(i4_i = 0; i4_i < 8; i4_i++)
+ {
+ if(i4_i < 6)
+ {
+ /* fall-back rule A */
+ if(!ps_seq->u1_seq_scaling_list_present_flag[i4_i])
+ {
+ if((i4_i == 0) || (i4_i == 3))
+ {
+ ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
+ (i4_i == 0) ? (WORD16 *)(gai2_ih264d_default_intra4x4) : (WORD16 *)(gai2_ih264d_default_inter4x4);
+ }
+ else
+ {
+ ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
+ ps_dec->s_high_profile.pi2_scale_mat[i4_i
+ - 1];
+ }
+ }
+ else
+ {
+ if(ps_seq->u1_use_default_scaling_matrix_flag[i4_i])
+ {
+ ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
+ (i4_i < 3) ? (WORD16 *)(gai2_ih264d_default_intra4x4) : (WORD16 *)(gai2_ih264d_default_inter4x4);
+ }
+ else
+ {
+ ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
+ ps_seq->i2_scalinglist4x4[i4_i];
+ }
+ }
+
+ }
+ else
+ {
+ /* fall-back rule A */
+ if((!ps_seq->u1_seq_scaling_list_present_flag[i4_i])
+ || (ps_seq->u1_use_default_scaling_matrix_flag[i4_i]))
+ {
+ ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
+ (i4_i == 6) ? ((WORD16*)gai2_ih264d_default_intra8x8) : ((WORD16*)gai2_ih264d_default_inter8x8);
+ }
+ else
+ {
+ ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
+ ps_seq->i2_scalinglist8x8[i4_i - 6];
+ }
+ }
+ }
+ }
+
+ /* checking for the PPS */
+
+ if(ps_pic->i4_pic_scaling_matrix_present_flag)
+ {
+ for(i4_i = 0; i4_i < 8; i4_i++)
+ {
+ if(i4_i < 6)
+ {
+ /* fall back rule B */
+ if(!ps_pic->u1_pic_scaling_list_present_flag[i4_i])
+ {
+ if((i4_i == 0) || (i4_i == 3))
+ {
+ if(!ps_seq->i4_seq_scaling_matrix_present_flag)
+ {
+ ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
+ (i4_i == 0) ? (WORD16 *)(gai2_ih264d_default_intra4x4) : (WORD16 *)(gai2_ih264d_default_inter4x4);
+ }
+ }
+ else
+ {
+ ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
+ ps_dec->s_high_profile.pi2_scale_mat[i4_i
+ - 1];
+ }
+ }
+ else
+ {
+ if(ps_pic->u1_pic_use_default_scaling_matrix_flag[i4_i])
+ {
+ ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
+ (i4_i < 3) ? (WORD16 *)(gai2_ih264d_default_intra4x4) : (WORD16 *)(gai2_ih264d_default_inter4x4);
+ }
+ else
+ {
+ ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
+ ps_pic->i2_pic_scalinglist4x4[i4_i];
+ }
+ }
+ }
+ else
+ {
+ if(!ps_pic->u1_pic_scaling_list_present_flag[i4_i])
+ {
+ if(!ps_seq->u1_seq_scaling_list_present_flag[i4_i])
+ {
+ ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
+ (i4_i == 6) ? ((WORD16*)gai2_ih264d_default_intra8x8) : ((WORD16*)gai2_ih264d_default_inter8x8);
+ }
+ }
+ else
+ {
+ if(ps_pic->u1_pic_use_default_scaling_matrix_flag[i4_i])
+ {
+ ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
+ (i4_i == 6) ? (WORD16 *)(gai2_ih264d_default_intra8x8) : (WORD16 *)(gai2_ih264d_default_inter8x8);
+ }
+ else
+ {
+ ps_dec->s_high_profile.pi2_scale_mat[i4_i] =
+ ps_pic->i2_pic_scalinglist8x8[i4_i - 6];
+ }
+ }
+ }
+ }
+ }
+
+ /*************************************************************************/
+ /* perform the inverse scanning for the frame and field scaling matrices */
+ /*************************************************************************/
+ {
+ UWORD8 *pu1_inv_scan_4x4;
+ WORD32 i4_i, i4_j;
+
+ pu1_inv_scan_4x4 = (UWORD8 *)gau1_ih264d_inv_scan;
+
+ /* for all 4x4 matrices */
+ for(i4_i = 0; i4_i < 6; i4_i++)
+ {
+ for(i4_j = 0; i4_j < 16; i4_j++)
+ {
+ ps_dec->s_high_profile.i2_scalinglist4x4[i4_i][pu1_inv_scan_4x4[i4_j]] =
+ ps_dec->s_high_profile.pi2_scale_mat[i4_i][i4_j];
+
+ }
+ }
+
+ /* for all 8x8 matrices */
+ for(i4_i = 0; i4_i < 2; i4_i++)
+ {
+ for(i4_j = 0; i4_j < 64; i4_j++)
+ {
+ ps_dec->s_high_profile.i2_scalinglist8x8[i4_i][gau1_ih264d_inv_scan_prog8x8_cabac[i4_j]] =
+ ps_dec->s_high_profile.pi2_scale_mat[i4_i + 6][i4_j];
+
+ }
+ }
+ }
+}
+
diff --git a/decoder/ih264d_quant_scaling.h b/decoder/ih264d_quant_scaling.h
new file mode 100755
index 0000000..d9bd377
--- /dev/null
+++ b/decoder/ih264d_quant_scaling.h
@@ -0,0 +1,37 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef _IH264D_QUANT_SCALING_H_
+#define _IH264D_QUANT_SCALING_H_
+void ih264d_scaling_list(WORD16 *pi2_scaling_list,
+ WORD32 i4_size_of_scalinglist,
+ UWORD8 *pu1_use_default_scaling_matrix_flag,
+ dec_bit_stream_t *ps_bitstrm);
+
+
+void ih264d_form_scaling_matrix_picture(dec_seq_params_t *ps_seq,
+ dec_pic_params_t *ps_pic,
+ dec_struct_t *ps_dec);
+
+void ih264d_form_default_scaling_matrix(dec_struct_t *ps_dec);
+
+
+
+
+#endif
diff --git a/decoder/ih264d_sei.c b/decoder/ih264d_sei.c
new file mode 100755
index 0000000..14ffcd6
--- /dev/null
+++ b/decoder/ih264d_sei.c
@@ -0,0 +1,386 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : ih264d_sei.c */
+/* */
+/* Description : This file contains routines to parse SEI NAL's */
+/* */
+/* List of Functions : <List the functions defined in this file> */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 25 05 2005 NS Draft */
+/* */
+/*****************************************************************************/
+
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_sei.h"
+#include "ih264d_bitstrm.h"
+#include "ih264d_structs.h"
+#include "ih264d_error_handler.h"
+#include "ih264d_vui.h"
+#include "ih264d_parse_cavlc.h"
+#include "ih264d_defs.h"
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_buffering_period */
+/* */
+/* Description : This function parses SEI message buffering_period */
+/* Inputs : ps_buf_prd pointer to struct buf_period_t */
+/* ps_bitstrm Bitstream */
+/* Globals : None */
+/* Processing : Parses SEI payload buffering period. */
+/* Outputs : None */
+/* Returns : None */
+/* */
+/* Issues : Not implemented fully */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 05 2002 NS Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 ih264d_parse_buffering_period(buf_period_t *ps_buf_prd,
+ dec_bit_stream_t *ps_bitstrm,
+ dec_struct_t *ps_dec)
+{
+ UWORD8 u1_seq_parameter_set_id;
+ dec_seq_params_t *ps_seq;
+ UWORD8 u1_nal_hrd_present, u1_vcl_hrd_present;
+ UWORD32 i;
+ UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UNUSED(ps_buf_prd);
+ u1_seq_parameter_set_id = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ if(u1_seq_parameter_set_id > MAX_NUM_SEQ_PARAMS)
+ return ERROR_INVALID_SEQ_PARAM;
+ ps_seq = &ps_dec->ps_sps[u1_seq_parameter_set_id];
+ if(TRUE != ps_seq->u1_is_valid)
+ return (-1);
+
+ ps_dec->ps_sei->u1_seq_param_set_id = u1_seq_parameter_set_id;
+ ps_dec->ps_cur_sps = ps_seq;
+ if(FALSE == ps_seq->u1_is_valid)
+ return ERROR_INVALID_SEQ_PARAM;
+ if(1 == ps_seq->u1_vui_parameters_present_flag)
+ {
+ u1_nal_hrd_present = ps_seq->s_vui.u1_nal_hrd_params_present;
+ if(u1_nal_hrd_present)
+ {
+ for(i = 0; i < ps_seq->s_vui.s_nal_hrd.u4_cpb_cnt; i++)
+ {
+ ih264d_get_bits_h264(
+ ps_bitstrm,
+ ps_seq->s_vui.s_nal_hrd.u1_initial_cpb_removal_delay);
+ ih264d_get_bits_h264(
+ ps_bitstrm,
+ ps_seq->s_vui.s_nal_hrd.u1_initial_cpb_removal_delay);
+ }
+ }
+
+ u1_vcl_hrd_present = ps_seq->s_vui.u1_vcl_hrd_params_present;
+ if(u1_vcl_hrd_present)
+ {
+ for(i = 0; i < ps_seq->s_vui.s_vcl_hrd.u4_cpb_cnt; i++)
+ {
+ ih264d_get_bits_h264(
+ ps_bitstrm,
+ ps_seq->s_vui.s_vcl_hrd.u1_initial_cpb_removal_delay);
+ ih264d_get_bits_h264(
+ ps_bitstrm,
+ ps_seq->s_vui.s_vcl_hrd.u1_initial_cpb_removal_delay);
+ }
+ }
+ }
+ return OK;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_pic_timing */
+/* */
+/* Description : This function parses SEI message pic_timing */
+/* Inputs : ps_bitstrm Bitstream */
+/* ps_dec Poniter decoder context */
+/* ui4_payload_size pay load i4_size */
+/* Globals : None */
+/* Processing : Parses SEI payload picture timing */
+/* Outputs : None */
+/* Returns : None */
+/* */
+/* Issues : Not implemented fully */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 05 2002 NS Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_parse_pic_timing(dec_bit_stream_t *ps_bitstrm,
+ dec_struct_t *ps_dec,
+ UWORD32 ui4_payload_size)
+{
+ sei *ps_sei;
+ vui_t *ps_vu4;
+ UWORD8 u1_cpb_dpb_present;
+ UWORD8 u1_pic_struct_present_flag;
+ UWORD32 u4_start_offset, u4_bits_consumed;
+ UWORD8 u1_cpb_removal_delay_length, u1_dpb_output_delay_length;
+
+ ps_sei = (sei *)ps_dec->ps_sei;
+ ps_vu4 = &ps_dec->ps_cur_sps->s_vui;
+
+ u1_cpb_dpb_present = ps_vu4->u1_vcl_hrd_params_present
+ + ps_vu4->u1_nal_hrd_params_present;
+
+ if(ps_vu4->u1_vcl_hrd_params_present)
+ {
+ u1_cpb_removal_delay_length =
+ ps_vu4->s_vcl_hrd.u1_cpb_removal_delay_length;
+ u1_dpb_output_delay_length =
+ ps_vu4->s_vcl_hrd.u1_dpb_output_delay_length;
+ }
+ else if(ps_vu4->u1_nal_hrd_params_present)
+ {
+ u1_cpb_removal_delay_length =
+ ps_vu4->s_nal_hrd.u1_cpb_removal_delay_length;
+ u1_dpb_output_delay_length =
+ ps_vu4->s_nal_hrd.u1_dpb_output_delay_length;
+ }
+ else
+ {
+ u1_cpb_removal_delay_length = 24;
+ u1_dpb_output_delay_length = 24;
+
+ }
+
+ u4_start_offset = ps_bitstrm->u4_ofst;
+ if(u1_cpb_dpb_present)
+ {
+ ih264d_get_bits_h264(ps_bitstrm, u1_cpb_removal_delay_length);
+ ih264d_get_bits_h264(ps_bitstrm, u1_dpb_output_delay_length);
+ }
+
+ u1_pic_struct_present_flag = ps_vu4->u1_pic_struct_present_flag;
+ if(u1_pic_struct_present_flag)
+ {
+ ps_sei->u1_pic_struct = ih264d_get_bits_h264(ps_bitstrm, 4);
+ ps_dec->u1_pic_struct_copy = ps_sei->u1_pic_struct;
+ ps_sei->u1_is_valid = 1;
+ }
+ u4_bits_consumed = ps_bitstrm->u4_ofst - u4_start_offset;
+ ih264d_flush_bits_h264(ps_bitstrm,
+ (ui4_payload_size << 3) - u4_bits_consumed);
+
+ return (0);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_recovery_point */
+/* */
+/* Description : This function parses SEI message recovery point */
+/* Inputs : ps_bitstrm Bitstream */
+/* ps_dec Poniter decoder context */
+/* ui4_payload_size pay load i4_size */
+/* Globals : None */
+/* Processing : Parses SEI payload picture timing */
+/* Outputs : None */
+/* Returns : None */
+/* */
+/* Issues : Not implemented fully */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 05 2002 NS Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_parse_recovery_point(dec_bit_stream_t *ps_bitstrm,
+ dec_struct_t *ps_dec,
+ UWORD32 ui4_payload_size)
+{
+ sei *ps_sei = ps_dec->ps_sei;
+ dec_err_status_t *ps_err = ps_dec->ps_dec_err_status;
+ UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UNUSED(ui4_payload_size);
+ ps_sei->u2_recovery_frame_cnt = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ ps_err->u4_frm_sei_sync = ps_err->u4_cur_frm
+ + ps_sei->u2_recovery_frame_cnt;
+ ps_sei->u1_exact_match_flag = ih264d_get_bit_h264(ps_bitstrm);
+ ps_sei->u1_broken_link_flag = ih264d_get_bit_h264(ps_bitstrm);
+ ps_sei->u1_changing_slice_grp_idc = ih264d_get_bits_h264(ps_bitstrm, 2);
+
+ return (0);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_sei_payload */
+/* */
+/* Description : This function parses SEI pay loads. Currently it's */
+/* implemented partially. */
+/* Inputs : ps_bitstrm Bitstream */
+/* ui4_payload_type SEI payload type */
+/* ui4_payload_size SEI payload i4_size */
+/* Globals : None */
+/* Processing : Parses SEI payloads units and stores the info */
+/* Outputs : None */
+/* Returns : None */
+/* */
+/* Issues : Not implemented fully */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 05 2002 NS Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 ih264d_parse_sei_payload(dec_bit_stream_t *ps_bitstrm,
+ UWORD32 ui4_payload_type,
+ UWORD32 ui4_payload_size,
+ dec_struct_t *ps_dec)
+{
+ sei *ps_sei;
+ WORD32 i4_status = 0;
+ ps_sei = (sei *)ps_dec->ps_sei;
+ switch(ui4_payload_type)
+ {
+ case SEI_BUF_PERIOD:
+
+ i4_status = ih264d_parse_buffering_period(&ps_sei->s_buf_period,
+ ps_bitstrm, ps_dec);
+ /*if(i4_status != OK)
+ return i4_status;*/
+ break;
+ case SEI_PIC_TIMING:
+ if(NULL == ps_dec->ps_cur_sps)
+ ih264d_flush_bits_h264(ps_bitstrm, (ui4_payload_size << 3));
+ else
+ ih264d_parse_pic_timing(ps_bitstrm, ps_dec,
+ ui4_payload_size);
+ break;
+ case SEI_RECOVERY_PT:
+ ih264d_parse_recovery_point(ps_bitstrm, ps_dec,
+ ui4_payload_size);
+ break;
+ default:
+ ih264d_flush_bits_h264(ps_bitstrm, (ui4_payload_size << 3));
+ break;
+ }
+ return (i4_status);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_sei_message */
+/* */
+/* Description : This function is parses and decode SEI. Currently it's */
+/* not implemented fully. */
+/* Inputs : ps_dec Decoder parameters */
+/* ps_bitstrm Bitstream */
+/* Globals : None */
+/* Processing : Parses SEI NAL units and stores the info */
+/* Outputs : None */
+/* Returns : None */
+/* */
+/* Issues : Not implemented fully */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 05 2002 NS Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 ih264d_parse_sei_message(dec_struct_t *ps_dec,
+ dec_bit_stream_t *ps_bitstrm)
+{
+ UWORD32 ui4_payload_type, ui4_payload_size;
+ UWORD32 u4_bits;
+ WORD32 i4_status = 0;
+
+ do
+ {
+ ui4_payload_type = 0;
+
+ u4_bits = ih264d_get_bits_h264(ps_bitstrm, 8);
+ while(0xff == u4_bits)
+ {
+ u4_bits = ih264d_get_bits_h264(ps_bitstrm, 8);
+ ui4_payload_type += 255;
+ }
+ ui4_payload_type += u4_bits;
+
+ ui4_payload_size = 0;
+ u4_bits = ih264d_get_bits_h264(ps_bitstrm, 8);
+ while(0xff == u4_bits)
+ {
+ u4_bits = ih264d_get_bits_h264(ps_bitstrm, 8);
+ ui4_payload_size += 255;
+ }
+ ui4_payload_size += u4_bits;
+
+ i4_status = ih264d_parse_sei_payload(ps_bitstrm, ui4_payload_type,
+ ui4_payload_size, ps_dec);
+ if(i4_status == -1)
+ {
+ i4_status = 0;
+ break;
+ }
+
+ if(i4_status != OK)
+ return i4_status;
+
+ if(ih264d_check_byte_aligned(ps_bitstrm) == 0)
+ {
+ u4_bits = ih264d_get_bit_h264(ps_bitstrm);
+ if(0 == u4_bits)
+ {
+ H264_DEC_DEBUG_PRINT("\nError in parsing SEI message");
+ }
+ while(0 == ih264d_check_byte_aligned(ps_bitstrm))
+ {
+ u4_bits = ih264d_get_bit_h264(ps_bitstrm);
+ if(u4_bits)
+ {
+ H264_DEC_DEBUG_PRINT("\nError in parsing SEI message");
+ }
+ }
+ }
+ }
+ while(ps_bitstrm->u4_ofst < ps_bitstrm->u4_max_ofst);
+ return (i4_status);
+}
+
diff --git a/decoder/ih264d_sei.h b/decoder/ih264d_sei.h
new file mode 100755
index 0000000..5033740
--- /dev/null
+++ b/decoder/ih264d_sei.h
@@ -0,0 +1,91 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : ih264d_sei.h */
+/* */
+/* Description : This file contains routines to parse SEI NAL's */
+/* */
+/* List of Functions : <List the functions defined in this file> */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 25 05 2005 NS Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef _IH264D_SEI_H_
+#define _IH264D_SEI_H_
+
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_bitstrm.h"
+#include "ih264d_structs.h"
+
+#define SEI_BUF_PERIOD 0
+#define SEI_PIC_TIMING 1
+#define SEI_PAN_SCAN_RECT 2
+#define SEI_FILLER 3
+#define SEI_UD_REG_T35 4
+#define SEI_UD_UN_REG 5
+#define SEI_RECOVERY_PT 6
+#define SEI_DEC_REF_MARK 7
+#define SEI_SPARE_PIC 8
+#define SEI_SCENE_INFO 9
+#define SEI_SUB_SEQN_INFO 10
+#define SEI_SUB_SEQN_LAY_CHAR 11
+#define SEI_SUB_SEQN_CHAR 12
+#define SEI_FULL_FRAME_FREEZE 13
+#define SEI_FULL_FRAME_FREEZE_REL 14
+#define SEI_FULL_FRAME_SNAP_SHOT 15
+#define SEI_PROG_REF_SEGMENT_START 16
+#define SEI_PROG_REF_SEGMENT_END 17
+#define SEI_MOT_CON_SLICE_GRP_SET 18
+/* Declaration of dec_struct_t to avoid CCS compilation Error */
+struct _DecStruct;
+WORD32 ih264d_parse_sei_message(struct _DecStruct *ps_dec,
+ dec_bit_stream_t *ps_bitstrm);
+typedef struct
+{
+ UWORD8 u1_seq_parameter_set_id;
+ UWORD32 u4_initial_cpb_removal_delay;
+ UWORD32 u4_nitial_cpb_removal_delay_offset;
+
+} buf_period_t;
+
+struct _sei
+{
+ UWORD8 u1_seq_param_set_id;
+ buf_period_t s_buf_period;
+ UWORD8 u1_pic_struct;
+ UWORD16 u2_recovery_frame_cnt;
+ UWORD8 u1_exact_match_flag;
+ UWORD8 u1_broken_link_flag;
+ UWORD8 u1_changing_slice_grp_idc;
+ UWORD8 u1_is_valid;
+};
+typedef struct _sei sei;
+#endif /* _IH264D_SEI_H_ */
+
diff --git a/decoder/ih264d_structs.h b/decoder/ih264d_structs.h
new file mode 100755
index 0000000..110f71d
--- /dev/null
+++ b/decoder/ih264d_structs.h
@@ -0,0 +1,1582 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef _IH264D_STRUCTS_H_
+#define _IH264D_STRUCTS_H_
+
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "iv.h"
+#include "ivd.h"
+
+#include "ih264d_transfer_address.h"
+#include "ih264d_defs.h"
+#include "ih264d_defs.h"
+#include "ih264d_bitstrm.h"
+#include "ih264d_debug.h"
+#include "ih264d_dpb_manager.h"
+/* includes for CABAC */
+#include "ih264d_cabac.h"
+#include "ih264d_dpb_manager.h"
+
+#include "ih264d_vui.h"
+#include "ih264d_sei.h"
+#include "iv.h"
+#include "ivd.h"
+
+#include "ih264_weighted_pred.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+
+/** Number of Mb's whoose syntax will be read */
+/************************************************************/
+/* MB_GROUP should be a multiple of 2 */
+/************************************************************/
+#define PARSE_MB_GROUP_4 4
+
+/* MV_SCRATCH_BUFS assumed to be pow(2) */
+#define MV_SCRATCH_BUFS 4
+
+#define LEFT_MB_PIXELS 4
+#define LEFT_MB_PIXELS_Y_FRM_BOT 64 /* 4 * 16 */
+#define LEFT_MB_PIXELS_CR_FRM_BOT 32 /* 4 * 8 */
+
+#define TOP_FIELD_ONLY 0x02
+#define BOT_FIELD_ONLY 0x01
+
+#define MAX_REF_BUF_SIZE (3776*2*2)
+
+struct _DecStruct;
+struct _DecMbInfo;
+
+
+#define NUM_INT_G_TABLE ((UWORD32) (sigcoeff_ctxtinc_field8x8 + 1))
+#define NUM_EXT_G_TABLE ((UWORD32) (ITTIAM_LOGO_V_BUF_T + 1))
+
+typedef enum
+{
+ MB_TYPE_SI_SLICE = 0,
+ MB_TYPE_I_SLICE = 3,
+ MB_SKIP_FLAG_P_SLICE = 11,
+ MB_TYPE_P_SLICE = 14,
+ SUB_MB_TYPE_P_SLICE = 21,
+ MB_SKIP_FLAG_B_SLICE = 24,
+ MB_TYPE_B_SLICE = 27,
+ SUB_MB_TYPE_B_SLICE = 36,
+ MVD_X = 40,
+ MVD_Y = 47,
+ REF_IDX = 54,
+ MB_QP_DELTA = 60,
+ INTRA_CHROMA_PRED_MODE = 64,
+ PREV_INTRA4X4_PRED_MODE_FLAG = 68,
+ REM_INTRA4X4_PRED_MODE = 69,
+ MB_FIELD_DECODING_FLAG = 70,
+ CBP_LUMA = 73,
+ CBP_CHROMA = 77,
+ CBF = 85,
+ SIGNIFICANT_COEFF_FLAG_FRAME = 105,
+ SIGNIFICANT_COEFF_FLAG_FLD = 277,
+ LAST_SIGNIFICANT_COEFF_FLAG_FRAME = 166,
+ LAST_SIGNIFICANT_COEFF_FLAG_FLD = 338,
+ COEFF_ABS_LEVEL_MINUS1 = 227,
+
+ /* High profile related Syntax element CABAC offsets */
+ TRANSFORM_SIZE_8X8_FLAG = 399,
+ SIGNIFICANT_COEFF_FLAG_8X8_FRAME = 402,
+ LAST_SIGNIFICANT_COEFF_FLAG_8X8_FRAME = 417,
+ COEFF_ABS_LEVEL_MINUS1_8X8 = 426,
+ SIGNIFICANT_COEFF_FLAG_8X8_FIELD = 436,
+ LAST_SIGNIFICANT_COEFF_FLAG_8X8_FIELD = 451
+
+} cabac_table_num_t;
+
+typedef enum
+{
+ SIG_COEFF_CTXT_CAT_0_OFFSET = 0,
+ SIG_COEFF_CTXT_CAT_1_OFFSET = 15,
+ SIG_COEFF_CTXT_CAT_2_OFFSET = 29,
+ SIG_COEFF_CTXT_CAT_3_OFFSET = 44,
+ SIG_COEFF_CTXT_CAT_4_OFFSET = 47,
+ SIG_COEFF_CTXT_CAT_5_OFFSET = 0,
+ COEFF_ABS_LEVEL_CAT_0_OFFSET = 0,
+ COEFF_ABS_LEVEL_CAT_1_OFFSET = 10,
+ COEFF_ABS_LEVEL_CAT_2_OFFSET = 20,
+ COEFF_ABS_LEVEL_CAT_3_OFFSET = 30,
+ COEFF_ABS_LEVEL_CAT_4_OFFSET = 39,
+ COEFF_ABS_LEVEL_CAT_5_OFFSET = 0
+} cabac_blk_cat_offset_t;
+
+typedef enum
+{
+ CABAC_IPBMB_LD_ADRS_T,
+ CABAC_IPBMB_LD_SZ_T,
+ CAVLC_IPBMB_LD_ADRS_T,
+ CAVLC_IPBMB_LD_SZ_T,
+ PARSE_IPBMB_RUN_ADRS_T,
+
+ MVP_MBAFF_LD_ADRS_T,
+ MVP_MBAFF_LD_SZ_T,
+ MVP_NON_MBAFF_LD_ADRS_T,
+ MVP_NON_MBAFF_LD_SZ_T,
+ MVPRED_RUN_ADRS_T,
+
+ B_REF_DMA_LD_ADRS_T,
+ B_REF_DMA_LD_SZ_T,
+ P_REF_DMA_LD_ADRS_T,
+ P_REF_DMA_LD_SZ_T,
+ REF_DMA_RUN_ADRS_T,
+
+ SP_DRCT_LD_ADRS_T,
+ SP_DRCT_LD_SZ_T,
+ TMP_DRCT_LD_ADRS_T,
+ TMP_DRCT_LD_SZ_T,
+ B_SKIP_RUN_ADRS_T,
+
+ DEC_DEBLK_RUN_ADRS_T,
+ H264_DBLK_LD_ADRS_T,
+ H264_DBLK_LD_SZ_T,
+ H264_DEC_LD_ADRS_T,
+
+ /*
+ * (H264_DEC_LD_SZ_T + 1) will be considered as the end of this table
+ * new members to be added before this
+ */
+ H264_DEC_LD_SZ_T
+} code_overlay_tab_t;
+
+/** Structure for the MV bank */
+typedef struct _mv_pred_t
+{
+ WORD16 i2_mv[4]; /** 0- mvFwdX, 1- mvFwdY, 2- mvBwdX, 3- mvBwdY */
+ WORD8 i1_ref_frame[2];
+
+ UWORD8 u1_col_ref_pic_idx; /** Idx into the pic buff array */
+ UWORD8 u1_pic_type; /** Idx into the pic buff array */
+
+} mv_pred_t;
+
+typedef struct
+{
+ WORD32 i4_mv_indices[16];
+ WORD8 i1_submb_num[16];
+ WORD8 i1_partitionsize[16];
+ WORD8 i1_num_partitions;
+ WORD8 u1_vert_mv_scale;
+ UWORD8 u1_col_zeroflag_change;
+} directmv_t;
+
+typedef struct pic_buffer_t
+{
+ /**Different components of the picture */
+ UWORD8 *pu1_buf1;
+ UWORD8 *pu1_buf2;
+ UWORD8 *pu1_buf3;
+ UWORD16 u2_disp_width; /** Width of the display luma frame in pixels */
+ UWORD16 u2_disp_height; /** Height of the display luma frame in pixels */
+ UWORD32 u4_time_stamp; /** Time at which frame has to be displayed */
+ UWORD16 u2_frm_wd_y; /** Width of the luma frame in pixels */
+ UWORD16 u2_frm_wd_uv; /** Width of the chroma frame */
+ UWORD16 u2_frm_ht_y; /** Height of the luma frame in pixels */
+ UWORD16 u2_frm_ht_uv; /** Height of the chroma frame */
+ /* Upto this is resembling the structure IH264DEC_DispUnit */
+
+ /* If any member is to be added, add below this */
+
+ /* u4_ofst from start of picture buffer to display position for Y buffer */
+ UWORD16 u2_crop_offset_y;
+
+ /* u4_ofst from start of picture buffer to display position for UV buffer */
+ UWORD16 u2_crop_offset_uv;
+
+ UWORD8 u1_is_short; /** (1: short 0: long) term ref pic */
+ UWORD8 u1_pic_type; /** frame / field / complementary field pair */
+ UWORD8 u1_pic_buf_id; /** Idx into the picBufAPI array */
+ UWORD8 u1_mv_buf_id;
+ WORD32 i4_seq;
+ UWORD8 *pu1_col_zero_flag;
+ mv_pred_t *ps_mv; /** Pointer to the MV bank array */
+ WORD32 i4_poc; /** POC */
+ WORD32 i4_pic_num;
+ WORD32 i4_frame_num;
+ WORD32 i4_top_field_order_cnt; /** TopPOC */
+ WORD32 i4_bottom_field_order_cnt; /** BottomPOC */
+ WORD32 i4_avg_poc; /** minPOC */
+ UWORD8 u1_picturetype; /*Same as u1_pic_type..u1_pic_type gets overwritten whereas
+ this doesnot get overwritten ...stores the pictype of
+ frame/complementary field pair/ mbaff */
+ UWORD8 u1_long_term_frm_idx;
+ UWORD8 u1_long_term_pic_num;
+ UWORD32 u4_pack_slc_typ; /* It will contain information about types of slices */
+
+ /* ! */
+ UWORD32 u4_ts;
+ UWORD8 u1_pic_struct;/* Refer to SEI table D-1 */
+
+} pic_buffer_t;
+
+typedef struct
+{
+ void *u4_add[4];
+} neighbouradd_t;
+
+typedef struct
+{
+ const UWORD8 *pu1_inv_scan;
+ void *pv_table[6];
+} cavlc_cntxt_t;
+
+/**
+ ************************************************************************
+ * \file ih264d_structs.h
+ *
+ * \brief
+ * Structures used in the H.264 decoder
+ *
+ * \date
+ * 18/11/2002
+ *
+ * \author Sriram Sethuraman
+ *
+ ************************************************************************
+ */
+
+/**
+ * Structure to represent a MV Bank buffer and col flag
+ */
+typedef struct
+{
+ /**
+ * Pointer to buffer that holds col flag.
+ */
+ void *pv_col_zero_flag;
+
+ /**
+ * Pointer to buffer that holds mv_pred
+ */
+ void *pv_mv;
+
+ }col_mv_buf_t;
+
+
+/* Note the i4_size of this structure is hardcoded in arm_default_weighted_Pred.s as 0x3C.
+ * ADD r0,r0,#0x3C and so on..
+ * If there is a change in i4_size update above file accordingly.
+ */
+typedef struct
+{
+ UWORD8 u1_mc_addr_ofst; /** Offset in bytes relative to pu1_dma_dest_addr */
+ UWORD8 u1_dydx; /** 4*dy + dx for Y comp / 8*dy + dx for UV comp */
+ UWORD8 u1_is_bi_direct; /** 1: is bi-direct 0: forward / backward only */
+ UWORD8 u1_wght_pred_type; /** 0-default 1-singleWeighted 2-BiWeighted */
+ WORD8 i1_mb_partwidth; /** Width of MB partition */
+ WORD8 i1_mb_partheight; /** Height of MB partition */
+ WORD8 i1_mc_wd; /** Number of bytes in a DMA stride */
+ WORD8 i1_dma_ht; /** Number of strides */
+
+ WORD8 i1_pod_ht; /** Flag specifying height of pad on demand */
+ /** 0 (No pod) -ve(Top pod) +ve(Bottom pod) */
+ UWORD16 u2_dst_stride; /** Stride value of the destination */
+ UWORD16 u2_u1_ref_buf_wd; /** Width of the ref buffer */
+ UWORD16 u2_frm_wd;
+ UWORD16 u2_dummy;
+
+ UWORD8 *u1_pi1_wt_ofst_rec_v; /** Pointer to packed weight and u4_ofst */
+ UWORD8 *pu1_rec_y_u; /** MB partition address in row buffer */
+ UWORD8 *pu1_dma_dest_addr; /** Destination address for DMA transfer */
+ UWORD8 *pu1_y_ref;
+ UWORD8 *pu1_u_ref;
+ UWORD8 *pu1_v_ref;
+
+ UWORD8 *pu1_pred;
+ UWORD8 *pu1_pred_u;
+ UWORD8 *pu1_pred_v;
+ UWORD8 u1_dma_wd_y;
+ UWORD8 u1_dma_ht_y;
+ UWORD8 u1_dma_wd_uv;
+ UWORD8 u1_dma_ht_uv;
+} pred_info_t;
+
+typedef struct
+{
+ UWORD32 *pu4_wt_offst;
+ WORD16 i2_mv[2];
+
+ /***************************************************/
+ /*packing information i1_size_pos_info */
+ /* bit 1:0 -> X position in terms of (4x4) units */
+ /* bit 3:2 -> Y position in terms of (4x4) units */
+ /* bit 5:4 -> PU width 0:4,1:8,2:16 */
+ /* bit 7:6 -> PU height 0:4,1:8,2:16 */
+ /***************************************************/
+ WORD8 i1_size_pos_info;
+
+ /***************************************************/
+ /*packing information ref idx info */
+ /* bit 5:0 ->ref_idx */
+ /* bit 6:7 -> 0:l0,1:l1,2:bipred */
+ /***************************************************/
+ WORD8 i1_ref_idx_info;
+
+ WORD8 i1_buf_id;
+
+
+ UWORD8 u1_pic_type; /** frame /top field/bottom field/mbaff / complementary field pair */
+
+}pred_info_pkd_t;
+/*! Sequence level parameters */
+
+typedef struct
+{
+ UWORD8 u1_seq_parameter_set_id; /** id for the seq par set 0-31 */
+ UWORD8 u1_is_valid; /** is Seq Param set valid */
+
+ UWORD16 u2_frm_wd_in_mbs; /** Frame width expressed in MB units */
+ UWORD16 u2_frm_ht_in_mbs; /** Frame height expressed in MB units */
+
+ /* Following are derived from the above two */
+ UWORD16 u2_fld_ht_in_mbs; /** Field height expressed in MB units */
+ UWORD16 u2_max_mb_addr; /** Total number of macroblocks in a coded picture */
+ UWORD16 u2_total_num_of_mbs; /** Total number of macroblocks in a coded picture */
+ UWORD32 u4_fld_ht; /** field height */
+ UWORD32 u4_cwidth; /** chroma width */
+ UWORD32 u4_chr_frm_ht; /** chroma height */
+ UWORD32 u4_chr_fld_ht; /** chroma field height */
+ UWORD8 u1_mb_aff_flag; /** 0 - no mb_aff; 1 - uses mb_aff */
+
+ UWORD8 u1_profile_idc; /** profile value */
+ UWORD8 u1_level_idc; /** level value */
+
+ /* high profile related syntax elements */
+ WORD32 i4_chroma_format_idc;
+ WORD32 i4_bit_depth_luma_minus8;
+ WORD32 i4_bit_depth_chroma_minus8;
+ WORD32 i4_qpprime_y_zero_transform_bypass_flag;
+ WORD32 i4_seq_scaling_matrix_present_flag;
+ UWORD8 u1_seq_scaling_list_present_flag[8];
+ UWORD8 u1_use_default_scaling_matrix_flag[8];
+ WORD16 i2_scalinglist4x4[6][16];
+ WORD16 i2_scalinglist8x8[2][64];
+ UWORD8 u1_more_than_one_slice_group_allowed_flag;
+ UWORD8 u1_arbitrary_slice_order_allowed_flag;
+ UWORD8 u1_redundant_slices_allowed_flag;
+ UWORD8 u1_bits_in_frm_num; /** Number of bits in frame num */
+ UWORD16 u2_u4_max_pic_num_minus1; /** Maximum frame num minus 1 */
+ UWORD8 u1_pic_order_cnt_type; /** 0 - 2 indicates the method to code picture order count */
+ UWORD8 u1_log2_max_pic_order_cnt_lsb_minus;
+ WORD32 i4_max_pic_order_cntLsb;
+ UWORD8 u1_num_ref_frames_in_pic_order_cnt_cycle;
+ UWORD8 u1_delta_pic_order_always_zero_flag;
+ WORD32 i4_ofst_for_non_ref_pic;
+ WORD32 i4_ofst_for_top_to_bottom_field;
+ WORD32 i4_ofst_for_ref_frame[MAX_NUM_REF_FRAMES_OFFSET];
+ UWORD8 u1_num_ref_frames;
+ UWORD8 u1_gaps_in_frame_num_value_allowed_flag;
+ UWORD8 u1_frame_mbs_only_flag; /** 1 - frame only; 0 - field/frame pic */
+ UWORD8 u1_direct_8x8_inference_flag;
+ UWORD8 u1_vui_parameters_present_flag;
+ vui_t s_vui;
+} dec_seq_params_t;
+
+typedef struct
+{
+ UWORD16 u2_frm_wd_in_mbs; /** Frame width expressed in MB units */
+ UWORD16 u2_frm_ht_in_mbs; /** Frame height expressed in MB units */
+ UWORD8 u1_frame_mbs_only_flag; /** 1 - frame only; 0 - field/frame pic */
+ UWORD8 u1_profile_idc; /** profile value */
+ UWORD8 u1_level_idc; /** level value */
+ UWORD8 u1_direct_8x8_inference_flag;
+ UWORD8 u1_eoseq_pending;
+} prev_seq_params_t;
+
+/** Picture level parameters */
+typedef struct
+{
+ dec_seq_params_t *ps_sps; /** applicable seq. parameter set */
+
+ /* High profile related syntax elements */
+ WORD32 i4_transform_8x8_mode_flag;
+ WORD32 i4_pic_scaling_matrix_present_flag;
+ UWORD8 u1_pic_scaling_list_present_flag[8];
+ UWORD8 u1_pic_use_default_scaling_matrix_flag[8];
+ WORD16 i2_pic_scalinglist4x4[6][16];
+ WORD16 i2_pic_scalinglist8x8[2][64];
+ WORD8 i1_second_chroma_qp_index_offset;
+
+ UWORD32 u4_slice_group_change_rate;
+ UWORD8 *pu1_slice_groupmb_map; /** MB map with slice membership labels */
+ UWORD8 u1_pic_parameter_set_id; /** id for the picture par set 0-255*/
+ UWORD8 u1_entropy_coding_mode; /** Entropy coding : 0-VLC; 1 - CABAC */
+ UWORD8 u1_num_slice_groups; /** Number of slice groups */
+ UWORD8 u1_pic_init_qp; /** Initial QPY for the picture {-26,25}*/
+ WORD8 i1_chroma_qp_index_offset; /** Chroma QP u4_ofst w.r.t QPY {-12,12} */
+ UWORD8 u1_dblk_filter_parms_flag; /** Slice layer has deblocking filter parameters */
+ UWORD8 u1_constrained_intra_pred_flag; /** Constrained intra prediction u4_flag */
+ UWORD8 u1_redundant_pic_cnt_present_flag; /** Redundant_pic_cnt is in slices using this PPS */
+ UWORD8 u1_pic_order_present_flag; /** Pic order present u4_flag */
+ UWORD8 u1_num_ref_idx_lx_active[2]; /** Maximum reference picture index in the reference list 0 : range [1 - 15] */
+ UWORD8 u1_wted_pred_flag;
+ UWORD8 u1_wted_bipred_idc;
+ UWORD8 u1_pic_init_qs;
+ UWORD8 u1_deblocking_filter_parameters_present_flag;
+ UWORD8 u1_vui_pic_parameters_flag;
+ UWORD8 u1_mb_slice_group_map_type;
+ UWORD8 u1_slice_group_change_direction_flag;
+ UWORD8 u1_frame_cropping_flag;
+ UWORD8 u1_frame_cropping_rect_left_ofst;
+ UWORD8 u1_frame_cropping_rect_right_ofst;
+ UWORD8 u1_frame_cropping_rect_top_ofst;
+ UWORD8 u1_frame_cropping_rect_bottom_ofst;
+ void * pv_codec_handle; /* For Error Handling */
+ WORD32 i4_top_field_order_cnt;
+ WORD32 i4_bottom_field_order_cnt;
+ WORD32 i4_avg_poc;
+ UWORD8 u1_is_valid; /** is Pic Param set valid */
+} dec_pic_params_t;
+
+/** Picture Order Count Paramsters */
+typedef struct
+{
+ WORD32 i4_pic_order_cnt_lsb;
+ WORD32 i4_pic_order_cnt_msb;
+ WORD32 i4_delta_pic_order_cnt_bottom;
+ WORD32 i4_delta_pic_order_cnt[2];
+ WORD32 i4_prev_frame_num_ofst;
+ UWORD8 u1_mmco_equalto5;
+ UWORD8 u1_bot_field;
+ UWORD16 u2_frame_num;
+ WORD32 i4_top_field_order_count;
+ WORD32 i4_bottom_field_order_count;
+} pocstruct_t;
+
+/*****************************************************************************/
+/* parse_mb_pers_info contains necessary mb info data required persistently */
+/* in the form of top and left neighbours. */
+/*****************************************************************************/
+typedef struct
+{
+ void *u4_pic_addrress[4]; /* picture address for BS calc */
+ WORD8 pi1_intrapredmodes[4]; /* calc Intra pred modes */
+ UWORD8 pu1_nnz_y[4];
+ UWORD8 pu1_nnz_uv[4];
+ UWORD8 u1_mb_fld;
+ UWORD8 u1_mb_type;
+ UWORD16 u2_luma_csbp; /* Luma csbp used for BS calc */
+ UWORD8 u1_tran_form8x8;
+} mb_neigbour_params_t;
+
+/* This info is required for decoding purposes except Deblockng */
+typedef struct _DecMbInfo
+{
+ UWORD8 u1_mb_type; /** macroblock type: I/P/B/SI/SP */
+ UWORD8 u1_chroma_pred_mode;
+ UWORD8 u1_cbp;
+ UWORD8 u1_mb_mc_mode; /** 16x16, 2 16x8, 2 8x16, 4 8x8 */
+ UWORD8 u1_topmb; /** top Mb u4_flag */
+ UWORD8 u1_mb_ngbr_availablity;
+ UWORD8 u1_end_of_slice;
+ UWORD8 u1_mb_field_decodingflag;
+ UWORD8 u1_topleft_mb_fld;
+ UWORD8 u1_topleft_mbtype;
+ WORD8 i1_offset;
+ UWORD8 u1_Mux;
+ UWORD8 u1_qp_div6;
+ UWORD8 u1_qp_rem6;
+ UWORD8 u1_qpc_div6;
+ UWORD8 u1_qpcr_div6;
+ UWORD8 u1_qpc_rem6;
+ UWORD8 u1_qpcr_rem6;
+ UWORD8 u1_tran_form8x8;
+ UWORD8 u1_num_pred_parts;
+ UWORD8 u1_yuv_dc_block_flag;
+ UWORD16 u2_top_right_avail_mask;
+ UWORD16 u2_top_left_avail_mask;
+ UWORD16 u2_luma_csbp; /** Coded 4x4 Sub Block Pattern */
+ UWORD16 u2_chroma_csbp; /** Coded 4x4 Sub Block Pattern */
+ UWORD16 u2_mbx;
+ UWORD16 u2_mby;
+ UWORD16 u2_mask[2];
+
+ UWORD32 u4_pred_info_pkd_idx;
+
+ mb_neigbour_params_t *ps_left_mb;
+ mb_neigbour_params_t *ps_top_mb;
+ mb_neigbour_params_t *ps_top_right_mb;
+ mb_neigbour_params_t *ps_curmb;
+} dec_mb_info_t;
+
+
+/** Slice level parameters */
+typedef struct
+{
+ dec_pic_params_t *ps_pps; /** PPS used */
+ WORD32 i4_delta_pic_order_cnt[2];
+ WORD32 i4_poc; /** Pic order cnt of picture to which slice belongs*/
+ UWORD32 u4_idr_pic_id; /** IDR pic ID */
+ UWORD16 u2_first_mb_in_slice; /** Address of first MB in slice*/
+ UWORD16 u2_frame_num; /** Frame number from prev IDR pic */
+
+ UWORD8 u1_mbaff_frame_flag; /** Mb adaptive frame field u4_flag */
+ UWORD8 u1_field_pic_flag; /** Field picture or not */
+ UWORD8 u1_bottom_field_flag; /** If slice belongs to bot field pic */
+ UWORD8 u1_slice_type; /** I/P/B/SI/SP */
+ WORD32 i4_pic_order_cnt_lsb; /** Picture Order Count */
+ UWORD8 u1_slice_qp; /** Add slice_qp_delta to pic_init_QP */
+ UWORD8 u1_disable_dblk_filter_idc; /** 0-dblk all edges; 1 - suppress; 2 - suppress only edges */
+ WORD8 i1_slice_alpha_c0_offset; /** dblk: alpha and C0 table u4_ofst {-12,12}*/
+ WORD8 i1_slice_beta_offset; /** dblk: beta table u4_ofst {-12, 12}*/
+ UWORD8 u1_sp_for_switch_flag;
+ UWORD8 u1_no_output_of_prior_pics_flag;
+ UWORD8 u1_long_term_reference_flag;
+ UWORD8 u1_num_ref_idx_lx_active[2];
+ UWORD8 u1_cabac_init_idc; /** cabac_init_idc */
+ UWORD8 u1_num_ref_idx_active_override_flag;
+ UWORD8 u1_direct_spatial_mv_pred_flag;
+ WORD32 (*pf_decodeDirect)(struct _DecStruct *ps_dec,
+ UWORD8 u1_wd_x,
+ dec_mb_info_t *ps_cur_mb_info,
+ UWORD8 u1_mb_num);
+ UWORD8 u1_redundant_pic_cnt;
+ WORD8 i1_slice_qs_delta;
+ UWORD8 u1_nal_ref_idc; /** NAL ref idc of the Slice NAL unit */
+ UWORD8 u1_nal_unit_type; /** NAL unit type of the Slice NAL */
+ UWORD8 u1_direct_8x8_inference_flag;
+ UWORD8 u1_mmco_equalto5; /** any of the MMCO command equal to 5 */
+ UWORD8 u1_pic_order_cnt_type;
+ pocstruct_t s_POC;
+ /* DataStructures required for weighted prediction */
+ UWORD16 u2_log2Y_crwd; /** Packed luma and chroma log2_weight_denom */
+ /* [list0/list1]:[ref pics index]:[0-Y 1-Cb 2-Cr] [weight/u4_ofst],
+ weights and offsets are signed numbers, since they are packed, it is defined
+ unsigned. LSB byte : weight and MSB byte: u4_ofst */
+ UWORD32 u4_wt_ofst_lx[2][MAX_REF_BUFS][3];
+ void * pv_codec_handle; /* For Error Handling */
+ UWORD8 u1_end_of_frame_signal;
+
+ /* This is used when reordering is done in Forward or */
+ /* backward lists. This is because reordering can point */
+ /* to any valid entry in initial list irrespective of */
+ /* num_ref_idx_active which could be overwritten using */
+ /* ref_idx_reorder_flag */
+ UWORD8 u1_initial_list_size[2];
+ UWORD32 u4_mbs_in_slice;
+} dec_slice_params_t;
+
+
+typedef struct
+{
+ UWORD8 u1_mb_type; /* Bit representations, X- reserved */
+ /** |Field/Frame|X|X|X|X|Bslice u4_flag|PRED_NON_16x16 u4_flag |Intra Mbflag| */
+ UWORD8 u1_mb_qp;
+ UWORD8 u1_deblocking_mode; /** dblk: Mode [ NO / NO TOP / NO LEFT] filter */
+ WORD8 i1_slice_alpha_c0_offset; /** dblk: alpha and C0 table u4_ofst {-12,12}*/
+ WORD8 i1_slice_beta_offset; /** dblk: beta table u4_ofst {-12, 12}*/
+ UWORD8 u1_single_call;
+ UWORD8 u1_topmb_qp;
+ UWORD8 u1_left_mb_qp;
+ UWORD32 u4_bs_table[10]; /* Boundary strength */
+
+} deblk_mb_t;
+
+typedef struct
+{
+ UWORD8 u1_mb_type;
+ UWORD8 u1_mb_qp;
+} deblkmb_neighbour_t;
+
+#define MAX_MV_RESIDUAL_INFO_PER_MB 32
+#define MAX_REFIDX_INFO_PER_MB 4
+#define PART_NOT_DIRECT 0
+#define PART_DIRECT_8x8 1
+#define PART_DIRECT_16x16 2
+typedef struct
+{
+ UWORD8 u1_is_direct;
+ UWORD8 u1_pred_mode;
+ UWORD8 u1_sub_mb_num;
+ UWORD8 u1_partheight;
+ UWORD8 u1_partwidth;
+} parse_part_params_t;
+
+typedef struct
+{
+ UWORD8 u1_isI_mb;
+ UWORD8 u1_num_part;
+ UWORD32 *pu4_wt_offst[MAX_REFIDX_INFO_PER_MB];
+ WORD8 i1_ref_idx[2][MAX_REFIDX_INFO_PER_MB];
+ UWORD8 u1_col_info[MAX_REFIDX_INFO_PER_MB];
+} parse_pmbarams_t;
+
+typedef struct
+{
+ UWORD8 *pu1_mb_y; /* pointer to N-Mb pad buffer Y (Horz) */
+ UWORD8 *pu1_mb_u; /* pointer to N-Mb pad buffer U (Horz) */
+ UWORD8 *pu1_mb_v; /* pointer to N-Mb pad buffer V (Horz) */
+ UWORD8 *pu1_row_y; /* pointer to row pad buffer Y (Vert) */
+ UWORD8 *pu1_row_u; /* pointer to row pad buffer U (Vert) */
+ UWORD8 *pu1_row_v; /* pointer to row pad buffer V (Vert) */
+ UWORD8 u1_vert_pad_top; /* flip-flop u4_flag remembering pad area (Vert) */
+ UWORD8 u1_vert_pad_bot; /* flip-flop u4_flag remembering pad area (Vert) */
+ UWORD8 u1_horz_pad; /* flip-flop u4_flag remembering pad area (Vert) */
+ UWORD8 u1_pad_len_y_v; /* vertical pad amount for luma */
+ UWORD8 u1_pad_len_cr_v; /* vertical pad amount for chroma */
+} pad_mgr_t;
+
+typedef struct code_overlay_ctxt
+{
+ UWORD8 u1_pb_slice_type;
+ UWORD8 u1_entropy_coding_type;
+ UWORD8 u1_mbaff_frame_flag;
+ UWORD8 u1_b_direct_flag;
+} code_overlay_ctxt_t;
+
+#define ACCEPT_ALL_PICS (0x00)
+#define REJECT_CUR_PIC (0x01)
+#define REJECT_PB_PICS (0x02)
+
+#define PIC_TYPE_UNKNOWN (0xFF)
+#define PIC_TYPE_I (0x00)
+#define SYNC_FRM_DEFAULT (0xFFFFFFFF)
+#define INIT_FRAME (0xFFFFFF)
+
+typedef struct dec_err_status_t
+{
+ UWORD8 u1_cur_pic_type;
+ UWORD8 u1_pic_aud_i;
+ UWORD8 u1_err_flag;
+ UWORD32 u4_frm_sei_sync;
+ UWORD32 u4_cur_frm;
+} dec_err_status_t;
+
+/**************************************************************************/
+/* Structure holds information about all high profile toolsets */
+/**************************************************************************/
+typedef struct
+{
+ /*****************************************/
+ /* variables required for scaling */
+ /*****************************************/
+ UWORD8 u1_scaling_present;
+ WORD16 *pi2_scale_mat[8];
+
+ /*************************************************/
+ /* scaling matrices for frame macroblocks after */
+ /* inverse scanning */
+ /*************************************************/
+ WORD16 i2_scalinglist4x4[6][16];
+ WORD16 i2_scalinglist8x8[2][64];
+
+
+ /*****************************************/
+ /* variables required for transform8x8 */
+ /*****************************************/
+ UWORD8 u1_transform8x8_present;
+ UWORD8 u1_direct_8x8_inference_flag;
+ /* temporary variable to get noSubMbPartSizeLessThan8x8Flag from ih264d_parse_bmb_non_direct_cavlc */
+ UWORD8 u1_no_submb_part_size_lt8x8_flag;
+
+ /* needed for inverse scanning */
+ cavlc_cntxt_t s_cavlc_ctxt;
+
+ /* contexts for the CABAC related parsing */
+ bin_ctxt_model_t *ps_transform8x8_flag;
+ bin_ctxt_model_t *ps_sigcoeff_8x8_frame;
+ bin_ctxt_model_t *ps_last_sigcoeff_8x8_frame;
+ bin_ctxt_model_t *ps_coeff_abs_levelminus1;
+ bin_ctxt_model_t *ps_sigcoeff_8x8_field;
+ bin_ctxt_model_t *ps_last_sigcoeff_8x8_field;
+
+/* variables required for intra8x8 */
+
+/* variables required for handling different Qp for Cb and Cr */
+
+} high_profile_tools_t;
+
+typedef struct
+{
+ UWORD32 u4_num_bufs; /* Number of buffers in each display frame. 2 for 420SP and 3 for 420P and so on */
+ void *buf[3]; /* Pointers to each of the components */
+ UWORD32 u4_bufsize[3];
+ UWORD32 u4_ofst[3];
+} disp_buf_t;
+typedef struct _dec_slice_struct
+{
+ volatile UWORD32 u4_first_mb_in_slice;
+ volatile UWORD32 u4_num_mbs_done_in_slice;
+ volatile UWORD32 slice_type;
+ volatile UWORD32 end_of_slice;
+ volatile UWORD32 slice_header_done;
+ volatile UWORD32 last_slice_in_frame;
+ volatile UWORD16 u2_log2Y_crwd;
+ volatile UWORD16 u2_error_flag;
+ volatile void **ppv_map_ref_idx_to_poc;
+} dec_slice_struct_t;
+
+typedef struct
+{
+ UWORD32 u4_flag;
+ UWORD32 u4_start_y;
+ UWORD32 u4_num_rows_y;
+} fmt_conv_part_t;
+
+/**
+ * Structure to hold coefficient info for a 4x4 transform
+ */
+typedef struct
+{
+ /**
+ * significant coefficient map
+ */
+ UWORD16 u2_sig_coeff_map;
+
+ /**
+ * holds coefficients
+ */
+ WORD16 ai2_level[16];
+}tu_sblk4x4_coeff_data_t;
+
+/**
+ * Structure to hold coefficient info for a 8x8 transform
+ */
+typedef struct
+{
+
+ /**
+ * significant coefficient map
+ */
+ UWORD32 au4_sig_coeff_map[2];
+
+ /**
+ * holds coefficients
+ */
+ WORD16 ai2_level[64];
+}tu_blk8x8_coeff_data_t;
+
+/** Aggregating structure that is globally available */
+typedef struct _DecStruct
+{
+
+ /* Add below all other static memory allocations and pointers to items
+ that are dynamically allocated once per session */
+ dec_bit_stream_t *ps_bitstrm;
+ dec_seq_params_t *ps_cur_sps;
+ dec_pic_params_t *ps_cur_pps;
+ dec_slice_params_t *ps_cur_slice;
+
+ dec_pic_params_t *ps_pps;
+ dec_seq_params_t *ps_sps;
+ const UWORD16 *pu2_quant_scale_y;
+ const UWORD16 *pu2_quant_scale_u;
+ const UWORD16 *pu2_quant_scale_v;
+ UWORD16 u2_mbx;
+ UWORD16 u2_mby;
+
+ UWORD16 u2_frm_wd_y; /** Width for luma buff */
+ UWORD16 u2_frm_ht_y; /** Height for luma buff */
+ UWORD16 u2_frm_wd_uv; /** Width for chroma buff */
+ UWORD16 u2_frm_ht_uv; /** Height for chroma buff */
+ UWORD16 u2_frm_wd_in_mbs; /** Frame width expressed in MB units */
+ UWORD16 u2_frm_ht_in_mbs; /** Frame height expressed in MB units */
+ WORD32 i4_submb_ofst; /** Offset in subMbs from the top left edge */
+ /* Pointer to colocated Zero frame Image, will be used in B_DIRECT mode */
+ /* colZeroFlag | // 0th bit
+ field_flag | // 1st bit
+ XX | // 2:3 bit don't cares
+ subMbMode | // 4:5 bit
+ MbMode | // 6:7 bit */
+
+ UWORD8 *pu1_col_zero_flag;
+
+ UWORD16 u2_pic_wd; /** Width of the picture being decoded */
+ UWORD16 u2_pic_ht; /** Height of the picture being decoded */
+
+ UWORD8 u1_first_nal_in_pic;
+ UWORD8 u1_mb_ngbr_availablity;
+ UWORD8 u1_ref_idxl0_active_minus1;
+ UWORD8 u1_qp;
+ UWORD8 u1_qp_y_div6;
+ UWORD8 u1_qp_u_div6;
+ UWORD8 u1_qp_y_rem6;
+ UWORD8 u1_qp_u_rem6;
+
+ /*********************************/
+ /* configurable mb-group numbers */
+ /* very critical to the decoder */
+ /*********************************/
+ /************************************************************/
+ /* MB_GROUP should be a multiple of 2 */
+ /************************************************************/
+ UWORD8 u1_recon_mb_grp;
+ UWORD8 u1_recon_mb_grp_pair;
+ /* Variables to handle Cabac */
+ decoding_envirnoment_t s_cab_dec_env; /* < Structure for decoding_envirnoment_t */
+ /* These things need to be updated at each MbLevel */
+ WORD8 i1_next_ctxt_idx; /* < next Ctxt Index */
+ UWORD8 u1_currB_type;
+ WORD8 i1_prev_mb_qp_delta; /* Prev MbQpDelta */
+ UWORD8 u1_nal_unit_type;
+
+ ctxt_inc_mb_info_t *p_ctxt_inc_mb_map; /* Pointer to ctxt_inc_mb_info_t map */
+ ctxt_inc_mb_info_t *p_left_ctxt_mb_info; /* Pointer to left ctxt_inc_mb_info_t */
+ ctxt_inc_mb_info_t *p_top_ctxt_mb_info; /* Pointer to top ctxt_inc_mb_info_t */
+ ctxt_inc_mb_info_t *ps_curr_ctxt_mb_info; /* Pointer to current ctxt_inc_mb_info_t */
+ ctxt_inc_mb_info_t *ps_def_ctxt_mb_info; /* Pointer to default ctxt_inc_mb_info_t */
+
+ /* mv contexts for mv decoding using cabac */
+ //UWORD8 u1_top_mv_ctxt_inc[4][4];
+ /* Dimensions for u1_left_mv_ctxt_inc_arr is [2][4][4] for Mbaff case */
+ UWORD8 u1_left_mv_ctxt_inc_arr[2][4][4];
+ UWORD8 (*pu1_left_mv_ctxt_inc)[4];
+
+ UWORD8 u1_sub_mb_num;
+ UWORD8 u1_B; /** if B slice u1_B = 1 else 0 */
+ WORD16 i2_only_backwarddma_info_idx;
+ mv_pred_t *ps_mv; /** Pointer to the MV bank array */
+ mv_pred_t *ps_mv_bank_cur; /** Pointer to the MV bank array */
+ mv_pred_t s_default_mv_pred; /** Structure containing the default values
+ for MV predictor */
+
+ pred_info_t *ps_pred; /** Stores info to cfg MC */
+ pred_info_t *ps_pred_start;
+
+ UWORD32 u4_pred_info_idx;
+ pred_info_pkd_t *ps_pred_pkd;
+ pred_info_pkd_t *ps_pred_pkd_start;
+ UWORD32 u4_pred_info_pkd_idx;
+ UWORD8 *pu1_ref_buff; /** Destination buffer for DMAs */
+ UWORD32 u4_dma_buf_idx;
+
+ UWORD8 *pu1_y;
+ UWORD8 *pu1_u;
+ UWORD8 *pu1_v;
+
+ WORD16 *pi2_y_coeff;
+ UWORD8 *pu1_inv_scan;
+
+ /**
+ * Pointer frame level TU subblock coeff data
+ */
+ void *pv_pic_tu_coeff_data;
+
+ /**
+ * Pointer to TU subblock coeff data and number of subblocks and scan idx
+ * Incremented each time a coded subblock is processed
+ *
+ */
+ void *pv_parse_tu_coeff_data;
+
+ void *pv_proc_tu_coeff_data;
+
+ WORD16 *pi2_coeff_data;
+
+ cavlc_cntxt_t s_cavlc_ctxt;
+
+ UWORD32 u4_n_leftY[2];
+ UWORD32 u4_n_left_cr[2];
+ UWORD32 u4_n_left_temp_y;
+
+ UWORD8 pu1_left_nnz_y[4];
+ UWORD8 pu1_left_nnz_uv[4];
+ UWORD32 u4_n_left_temp_uv;
+ /***************************************************************************/
+ /* Base pointer to all the cabac contexts */
+ /***************************************************************************/
+ bin_ctxt_model_t *p_cabac_ctxt_table_t;
+
+ /***************************************************************************/
+ /* cabac context pointers for every SE mapped into in p_cabac_ctxt_table_t */
+ /***************************************************************************/
+ bin_ctxt_model_t *p_mb_type_t;
+ bin_ctxt_model_t *p_mb_skip_flag_t;
+ bin_ctxt_model_t *p_sub_mb_type_t;
+ bin_ctxt_model_t *p_mvd_x_t;
+ bin_ctxt_model_t *p_mvd_y_t;
+ bin_ctxt_model_t *p_ref_idx_t;
+ bin_ctxt_model_t *p_mb_qp_delta_t;
+ bin_ctxt_model_t *p_intra_chroma_pred_mode_t;
+ bin_ctxt_model_t *p_prev_intra4x4_pred_mode_flag_t;
+ bin_ctxt_model_t *p_rem_intra4x4_pred_mode_t;
+ bin_ctxt_model_t *p_mb_field_dec_flag_t;
+ bin_ctxt_model_t *p_cbp_luma_t;
+ bin_ctxt_model_t *p_cbp_chroma_t;
+ bin_ctxt_model_t *p_cbf_t[NUM_CTX_CAT];
+ bin_ctxt_model_t *p_significant_coeff_flag_t[NUM_CTX_CAT];
+ bin_ctxt_model_t *p_coeff_abs_level_minus1_t[NUM_CTX_CAT];
+
+ UWORD32 u4_num_pmbair; /** MB pair number */
+ mv_pred_t *ps_mv_left; /** Pointer to left motion vector bank */
+ mv_pred_t *ps_mv_top_left; /** Pointer to top left motion vector bank */
+ mv_pred_t *ps_mv_top_right; /** Pointer to top right motion vector bank */
+
+ UWORD8 *pu1_left_yuv_dc_csbp;
+
+ /* c64x_map.inc takes care of only this part
+ If you change/add any members above this,
+ modify c64x_map.inc accordingly */
+
+ void **pp_ext_g_table_ptr;
+
+ deblkmb_neighbour_t deblk_left_mb[2];
+ deblkmb_neighbour_t *ps_deblk_top_mb;
+ neighbouradd_t (*ps_left_mvpred_addr)[2]; /* Left MvPred Address Ping Pong*/
+// neighbouradd_t *ps_topMvPredAdd;
+
+ /***************************************************************************/
+ /* Ref_idx contexts are stored in the following way */
+ /* Array Idx 0,1 for reference indices in Forward direction */
+ /* Array Idx 2,3 for reference indices in backward direction */
+ /***************************************************************************/
+
+ /* Dimensions for u1_left_ref_ctxt_inc_arr is [2][4] for Mbaff:Top and Bot */
+ WORD8 i1_left_ref_idx_ctx_inc_arr[2][4];
+ WORD8 *pi1_left_ref_idx_ctxt_inc;
+
+ /*************************************************************************/
+ /* Arrangnment of DC CSBP */
+ /* bits: b7 b6 b5 b4 b3 b2 b1 b0 */
+ /* CSBP: x x x x x Vdc Udc Ydc */
+ /*************************************************************************/
+ /*************************************************************************/
+ /* Points either to u1_yuv_dc_csbp_topmb or u1_yuv_dc_csbp_bot_mb */
+ /*************************************************************************/
+ UWORD8 u1_yuv_dc_csbp_topmb;
+ UWORD8 u1_yuv_dc_csbp_bot_mb;
+
+ /* DMA SETUP */
+ tfr_ctxt_t s_tran_addrecon_parse;
+ tfr_ctxt_t s_tran_addrecon;
+
+ /* slice Header Simplification */
+ UWORD8 u1_pr_sl_type;
+ UWORD8 u1_sl_typ_5_9;
+ WORD32 i4_frametype;
+ UWORD32 u4_app_disp_width;
+ WORD32 i4_error_code;
+ UWORD8 u1_first_pb_nal_in_pic;
+ UWORD32 u4_bitoffset;
+
+ /* Variables added to handle field pics */
+
+ UWORD8 u1_second_field;
+ WORD32 i4_pic_type;
+ WORD32 i4_content_type;
+ WORD32 i4_decode_header;
+ WORD32 i4_header_decoded;
+ UWORD32 u4_total_frames_decoded;
+
+ ctxt_inc_mb_info_t *ps_left_mb_ctxt_info; /* structure containing the left MB's
+ context info, incase of Mbaff */
+ pocstruct_t s_prev_pic_poc;
+ pocstruct_t s_cur_pic_poc;
+ WORD32 i4_cur_display_seq;
+ WORD32 i4_prev_max_display_seq;
+ WORD32 i4_max_poc;
+ deblk_mb_t *ps_cur_deblk_mb;
+
+ /* Pointers to local scratch buffers */
+ deblk_mb_t *ps_deblk_pic;
+
+ /* Pointers to Picture Buffers (Given by BufAPI Lib) */
+ struct pic_buffer_t *ps_cur_pic; /** Pointer to Current picture buffer */
+
+ /* Scratch Picture Buffers (Given by BufAPI Lib) */
+ struct pic_buffer_t s_cur_pic;
+
+ /* Current Slice related information */
+ volatile UWORD16 u2_cur_slice_num;
+ volatile UWORD16 u2_cur_slice_num_dec_thread;
+
+ /* Variables needed for Buffer API handling */
+ UWORD8 u1_nal_buf_id;
+ UWORD8 u1_pic_buf_id;
+ UWORD8 u1_pic_bufs;
+
+ WORD16 *pi2_pred1; //[441]; /** Temp predictor buffer for MC */
+ /* Pointer to refernce Pic buffers list, 0:fwd, 1:bwd */
+ pic_buffer_t **ps_ref_pic_buf_lx[2];
+ /* refIdx to POC mapping */
+ void **ppv_map_ref_idx_to_poc;
+ UWORD32 *pu4_defI_wts_ofsts;
+ UWORD32 *pu4_wts_ofsts_mat;
+ UWORD32 *pu4_wt_ofsts;
+ UWORD32 *pu4_mbaff_wt_mat;
+ /* Function pointers to read Params common to CAVLC and CABAC */
+ WORD32 (*pf_parse_inter_mb)(struct _DecStruct * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD8 u1_mb_num,
+ UWORD8 u1_num_mbsNby2);
+ WORD32 (*pf_mvpred_ref_tfr_nby2mb)(struct _DecStruct * ps_dec,
+ UWORD8 u1_num_mbs,
+ UWORD8 u1_num_mbsNby2);
+
+ WORD32 (*pf_parse_inter_slice)(struct _DecStruct * ps_dec,
+ dec_slice_params_t * ps_slice,
+ UWORD16 u2_first_mb_in_slice);
+
+ UWORD32 (*pf_get_mb_info)(struct _DecStruct * ps_dec,
+ const UWORD16 u2_cur_mb_address,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD32 u4_mbskip_run);
+
+ /* Variables for Decode Buffer Management */
+ dpb_manager_t *ps_dpb_mgr;
+ dpb_commands_t *ps_dpb_cmds;
+
+ /* Variables Required for N MB design */
+ dec_mb_info_t *ps_nmb_info;
+
+ UWORD8 *pu1_y_intra_pred_line;
+ UWORD8 *pu1_u_intra_pred_line;
+ UWORD8 *pu1_v_intra_pred_line;
+
+ UWORD8 *pu1_cur_y_intra_pred_line;
+ UWORD8 *pu1_cur_u_intra_pred_line;
+ UWORD8 *pu1_cur_v_intra_pred_line;
+
+ UWORD8 *pu1_cur_y_intra_pred_line_base;
+ UWORD8 *pu1_cur_u_intra_pred_line_base;
+ UWORD8 *pu1_cur_v_intra_pred_line_base;
+
+ UWORD8 *pu1_prev_y_intra_pred_line;
+ UWORD8 *pu1_prev_u_intra_pred_line;
+ UWORD8 *pu1_prev_v_intra_pred_line;
+
+ UWORD32 u4_intra_pred_line_ofst;
+
+ /* Scratch ping reconstruction pointers for Y U V */
+ UWORD8 *pu1_y_scratch[2];
+ UWORD8 *pu1_u_scratch[2];
+ UWORD8 *pu1_v_scratch[2];
+ UWORD8 u1_yuv_scratch_idx;
+ UWORD8 u1_not_wait_rec;
+ UWORD8 u1_res_changed;
+
+ UWORD8 *pu1_yleft; /** Left Y pointer, used for intra-pred */
+ UWORD8 *pu1_uleft; /** Left U pointer, used for intra-pred */
+ UWORD8 *pu1_vleft; /** Left V pointer, used for intra-pred */
+ UWORD8 u1_y_topleft[2]; /** Left Y pointer, used for intra-pred */
+ UWORD8 u1_u_topleft[2]; /** Left U pointer, used for intra-pred */
+ UWORD8 u1_v_topleft[2]; /** Left V pointer, used for intra-pred */
+ UWORD16 u2_mb_group_cols_y; /** Number of Y pixels in the N MB group */
+ UWORD16 u2_mb_group_cols_cr; /** Number of U/V pixels in the N MB group */
+ UWORD16 u2_mb_group_cols_y1; /** Number of Y pixels in the N MB group */
+ UWORD16 u2_mb_group_cols_cr1; /** Number of U/V pixels in the N MB group */
+
+ mv_pred_t *ps_mv_cur; /** pointer to current motion vector bank */
+ mv_pred_t *ps_mv_top; /** pointer to top motion vector bank */
+ mv_pred_t *ps_mv_top_right2;/** Pointer to top right motion vector bank */
+ mv_pred_t *ps_mv_p[2]; /** Scratch ping motion vector bank */
+ mv_pred_t *ps_mv_top_p[MV_SCRATCH_BUFS]; /** Scratch top pong motion vector bank */
+ UWORD8 u1_mv_top_p;
+
+ deblk_mb_t *ps_deblk_mbn;
+ deblk_mb_t *ps_deblk_mbn_dec_thrd;/*pointer used by parsing when spearaet_parse is 1*/
+ deblk_mb_t *ps_deblk_mbn_curr;
+ deblk_mb_t *ps_deblk_mbn_prev;
+
+ UWORD8 *pu1_temp_mc_buffer;
+
+ struct _sei *ps_sei;
+ UWORD8 u1_pic_struct_copy;
+ /* Variables required for cropping */
+ UWORD16 u2_disp_width;
+ UWORD16 u2_disp_height;
+ UWORD16 u2_crop_offset_y;
+ UWORD16 u2_crop_offset_uv;
+
+ /* Variable required to get presentation time stamp through application */
+ UWORD32 u4_pts;
+
+ /* Variables used for gaps in frame number */
+ UWORD16 u2_prev_ref_frame_num;
+ UWORD8 u1_vert_up_scale_flag;
+ iv_mem_rec_t *ps_mem_tab;
+
+ UWORD16 u2_wait_id;
+
+ void *pi4_ctxt_save_register;
+ void *pi4_ctxt_save_register_dec;
+
+ UWORD8 u1_mb_idx;
+ struct pic_buffer_t *ps_col_pic;
+ void (*pf_parse_mvdirect)(struct _DecStruct*,
+ struct pic_buffer_t*,
+ directmv_t*,
+ UWORD8,
+ WORD32,
+ dec_mb_info_t *);
+ void *pv_dec_out;
+ void *pv_dec_in;
+ void *pv_scratch_sps_pps; /*used temeporarily store sps/ spps while parsing*/
+
+ /* state pointers to mb and partition information */
+ parse_pmbarams_t *ps_parse_mb_data;
+ parse_part_params_t *ps_parse_part_params;
+
+ /* scratch pointers to mb and partition information */
+ parse_part_params_t *ps_part;
+
+ UWORD8 u1_max_dec_frame_buffering;
+ pad_mgr_t s_pad_mgr;
+ UWORD8 (*pf_mvpred)(struct _DecStruct *ps_dec,
+ struct _DecMbInfo *ps_cur_mb_info,
+ mv_pred_t *ps_mv_pred,
+ mv_pred_t *ps_mv_nmb,
+ mv_pred_t *ps_mv_ntop,
+ UWORD8 u1_sub_mb_num,
+ UWORD8 uc_mb_part_width,
+ UWORD8 uc_lxstart,
+ UWORD8 uc_lxend,
+ UWORD8 u1_mb_mc_mode);
+ void (*pf_compute_bs)(struct _DecStruct * ps_dec,
+ struct _DecMbInfo * ps_cur_mb_info,
+ const UWORD16 u2_mbxn_mb);
+ UWORD8 u1_init_dec_flag;
+ prev_seq_params_t s_prev_seq_params;
+ UWORD8 u1_cur_mb_fld_dec_flag; /* current Mb fld or Frm */
+
+ code_overlay_ctxt_t s_code_overlay_ctxt;
+ UWORD8 u1_code_overlay;
+
+// WORD8 *pi1_cur_predmodes;
+ WORD8 pi1_left_pred_mode[8];
+ UWORD8 u1_topleft_mb_fld;
+ UWORD8 u1_topleft_mbtype;
+ UWORD8 u1_topleft_mb_fld_bot;
+ UWORD8 u1_topleft_mbtype_bot;
+ UWORD8 u1_deblk_mb_grp;
+ WORD16 i2_prev_slice_mbx;
+ WORD16 i2_prev_slice_mby;
+ UWORD16 u2_top_left_mask;
+ UWORD16 u2_top_right_mask;
+ dec_err_status_t * ps_dec_err_status;
+
+ UWORD32 *pu4_sos_signal;
+ UWORD8 u1_mb_idx_mv;
+ UWORD16 u2_mv_2mb[2];
+ UWORD32 u4_ref_buf_size;
+ UWORD32 u4_packet_cnt;
+ /* to remember the i4_status & input parameters from the sample app */
+ void *pv_dec_status; // itt_dec_status_t void pointer */
+ void *pv_dec_params; // itt_dec_prms_t void pointer
+ void *pv_app_ctxt;
+ UWORD32 u4_skip_frm_mask;
+ void *pv_fmt_con_ctxt;
+ /* for the parallel format conversion */
+ UWORD8 *pu1_frmt_conv_y[3];
+ UWORD8 *pu1_frmt_conv_u[3];
+ UWORD8 *pu1_frmt_conv_v[3];
+ UWORD8 *pu1_deblk_scr;
+ UWORD32 u4_deblk_scr_sz;
+
+ /* variable for finding the no.of mbs decoded in the current picture */
+ UWORD16 u2_total_mbs_coded;
+ /* member added for supporting fragmented annex - B */
+// frg_annex_read_t s_frag_annex_read;
+ /* added for vui_t, sei support*/
+ WORD32 i4_vui_frame_rate;
+ /* To Store the value of ref_idx_active for previous slice */
+ /* useful in error handling */
+ UWORD8 u1_num_ref_idx_lx_active_prev;
+ /* Flag added to come out of process call in annex-b if&if frame is decoded */
+ /* presence of access unit delimters and pps and sps */
+ UWORD8 u1_frame_decoded_flag;
+
+ /* To keep track of whether the last picture was decoded or not */
+ /* in case of skip mode set by the application */
+ UWORD8 u1_last_pic_not_decoded;
+ UWORD32 *pu4_return_remaining_bufs;
+
+ /* Used for disabling deblocking of non-reference pictures */
+ WORD32 i4_set_low_complexity_mode;
+ WORD32 i4_disable_deblock;
+
+ WORD32 e_dec_status;
+ UWORD32 u4_num_fld_in_frm;
+
+ /* Function pointer for 4x4 residual cavlc parsing based on total coeff */
+ WORD32 (*pf_cavlc_4x4res_block[3])(UWORD32 u4_isdc,
+ UWORD32 u4_total_coeff_trail_one, /**TotalCoefficients<<16+trailingones*/
+ dec_bit_stream_t *ps_bitstrm);
+
+ /* Function pointer array for interpolate functions in called from motion compensattion module */
+ void (*p_mc_interpolate_x_y[16][3])(UWORD8*,
+ UWORD8*,
+ UWORD8*,
+ UWORD8,
+ UWORD16,
+ UWORD16,
+ UWORD8);
+
+ /**************************************************************************/
+ /* Function pointer for 4x4 totalcoeff, trlone and residual cavlc parsing */
+ /* based on u4_n (neigbourinng nnz average) */
+ /* These point to two functions depending on (u4_n > 7) and (u4_n <= 7) */
+ /**************************************************************************/
+ WORD32 (*pf_cavlc_parse4x4coeff[2])(WORD16 *pi2_coeff_block,
+ UWORD32 u4_isdc, /* is it a DC block */
+ WORD32 u4_n,
+ struct _DecStruct *ps_dec, /** Decoder Parameters */
+ UWORD32 *pu4_total_coeff);
+
+ /**************************************************************************/
+ /* Function pointer for luma 8x8block cavlc parsing based on top and left */
+ /* neigbour availability. */
+ /**************************************************************************/
+ WORD32 (*pf_cavlc_parse_8x8block[4])(WORD16 *pi2_coeff_block,
+ UWORD32 u4_sub_block_strd,
+ UWORD32 u4_isdc,
+ struct _DecStruct *ps_dec,
+ UWORD8 *pu1_top_nnz,
+ UWORD8 *pu1_left_nnz,
+ UWORD8 u1_tran_form8x8,
+ UWORD8 u1_mb_field_decodingflag,
+ UWORD32 *pu4_csbp);
+
+ /**************************************************************************/
+ /* Ping pong top and current rows of mb neigbour_params */
+ /**************************************************************************/
+ mb_neigbour_params_t *ps_nbr_mb_row;
+ mb_neigbour_params_t *ps_cur_mb_row;
+ mb_neigbour_params_t *ps_top_mb_row;
+
+ /**************************************************************************/
+ /* Function pointer for 16x16 and non16x16 Bs1 calculations depending on */
+ /* P and B slice. */
+ /***************************************************************************/
+ void (*pf_fill_bs1[2][2])(mv_pred_t *ps_cur_mv_pred,
+ mv_pred_t *ps_top_mv_pred,
+ void **ppv_map_ref_idx_to_poc,
+ UWORD32 *pu4_bs_table, /* pointer to the BsTable array */
+ mv_pred_t *ps_leftmost_mv_pred,
+ neighbouradd_t *ps_left_addr,
+ void **u4_pic_addrress,
+ WORD32 i4_ver_mvlimit);
+
+ void (*pf_fill_bs_xtra_left_edge[2])(UWORD32 *pu4_bs, /* Base pointer of BS table */
+ WORD32 u4_left_mb_t_csbp, /* left mbpair's top csbp */
+ WORD32 u4_left_mb_b_csbp, /* left mbpair's bottom csbp*/
+ WORD32 u4_cur_mb_csbp, /* csbp of current mb */
+ UWORD32 u4_cur_mb_bot /* is top or bottom mb */
+
+ );
+ /* Function pointer array for BP and MP functions for MC*/
+ void (*p_motion_compensate)(struct _DecStruct * ps_dec,
+ dec_mb_info_t *ps_cur_mb_info);
+
+
+ void (*p_mc_dec_thread)(struct _DecStruct * ps_dec, dec_mb_info_t *ps_cur_mb_info);
+
+ /* Function pointer array for BP and MP functions for formMbPartInfo*/
+
+ WORD32 (*p_form_mb_part_info)(pred_info_pkd_t *ps_pred_pkd,
+ struct _DecStruct * ps_dec,
+ UWORD16 u2_mb_x,
+ UWORD16 u2_mb_y,
+ WORD32 mb_index,
+ dec_mb_info_t *ps_cur_mb_info);
+
+ WORD32 (*p_form_mb_part_info_thread)(pred_info_pkd_t *ps_pred_pkd,
+ struct _DecStruct * ps_dec,
+ UWORD16 u2_mb_x,
+ UWORD16 u2_mb_y,
+ WORD32 mb_index,
+ dec_mb_info_t *ps_cur_mb_info);
+
+
+ /* Required for cabac mbaff bottom mb */
+ UWORD32 u4_next_mb_skip;
+
+ void (*p_DeblockPicture[2])(struct _DecStruct *);
+
+ /* ! */
+ UWORD32 u4_ts;
+ UWORD8 u1_flushfrm;
+
+ /* Output format sent by the application */
+ UWORD8 u1_chroma_format;
+ UWORD8 u1_pic_decode_done;
+ UWORD32 u4_level_at_init;
+ UWORD32 u4_width_at_init;
+ UWORD32 u4_height_at_init;
+ WORD32 init_done;
+ WORD32 process_called;
+
+ /******************************************/
+ /* For the high profile related variables */
+ /******************************************/
+ high_profile_tools_t s_high_profile;
+ /* CBCR */
+ UWORD8 u1_qp_v_div6;
+ UWORD8 u1_qp_v_rem6;
+ /*
+ * TO help solve the dangling field case.
+ * Check for the previous frame number and the current frame number.
+ */
+ UWORD16 u2_prv_frame_num;
+ UWORD8 u1_top_bottom_decoded;
+ UWORD8 u1_dangling_field;
+
+ /*
+ * For Low Memory case
+ */
+ UWORD32 u4_num_ref_frames_at_init;
+ UWORD32 u4_num_reorder_frames_at_init;
+ UWORD32 u4_num_extra_disp_bufs_at_init;
+ UWORD32 u4_num_disp_bufs_requested;
+ WORD32 i4_display_delay;
+ UWORD32 u4_slice_start_code_found;
+
+ UWORD32 u4_mb_level_deblk;
+ UWORD32 u4_use_intrapred_line_copy;
+ UWORD32 u4_num_mbs_prev_nmb;
+ UWORD32 u4_app_deblk_disable_level;
+ UWORD32 u4_app_disable_deblk_frm;
+ WORD32 i4_app_skip_mode;
+ WORD32 i4_mv_frac_mask;
+
+ disp_buf_t disp_bufs[MAX_DISP_BUFS_NEW];
+ UWORD32 u4_disp_buf_mapping[MAX_DISP_BUFS_NEW];
+ UWORD32 u4_disp_buf_to_be_freed[MAX_DISP_BUFS_NEW];
+ UWORD32 u4_share_disp_buf;
+ UWORD32 u4_num_disp_bufs;
+ UWORD32 u4_prev_nal_skipped;
+ UWORD32 u4_return_to_app;
+ WORD32 i4_dec_skip_mode;
+
+ UWORD32 u4_bs_deblk_thread_created;
+ volatile UWORD32 u4_start_bs_deblk;
+ void *pv_bs_deblk_thread_handle;
+
+ UWORD32 u4_cur_bs_mb_num;
+ UWORD32 u4_bs_cur_slice_num_mbs;
+ UWORD32 u4_cur_slice_bs_done;
+ UWORD32 u4_cur_deblk_mb_num;
+ volatile UWORD16 u2_cur_slice_num_bs;
+
+ UWORD32 u4_deblk_mb_x;
+ UWORD32 u4_deblk_mb_y;
+ deblk_mb_t *ps_cur_deblk_thrd_mb;
+
+
+ iv_yuv_buf_t s_disp_frame_info;
+ UWORD32 u4_fmt_conv_num_rows;
+ UWORD32 u4_fmt_conv_cur_row;
+ ivd_out_bufdesc_t *ps_out_buffer;
+ ivd_get_display_frame_op_t s_disp_op;
+ UWORD32 u4_stop_threads;
+ UWORD32 u4_output_present;
+
+ volatile UWORD16 cur_dec_mb_num;
+ volatile UWORD16 u2_cur_mb_addr;
+ WORD16 i2_dec_thread_mb_y;
+
+ UWORD8 u1_separate_parse;
+// 0: slice parse not started, 1: slice decode can start, 2: slice in error
+ volatile UWORD32 u4_start_frame_decode;
+ UWORD32 u4_dec_thread_created;
+ void *pv_dec_thread_handle;
+ volatile UWORD8 *pu1_dec_mb_map;
+ volatile UWORD8 *pu1_recon_mb_map;
+ volatile UWORD16 *pu2_slice_num_map;
+ dec_slice_struct_t *ps_dec_slice_buf;
+ void *pv_map_ref_idx_to_poc_buf;
+ dec_mb_info_t *ps_frm_mb_info;
+ volatile dec_slice_struct_t * volatile ps_parse_cur_slice;
+ volatile dec_slice_struct_t * volatile ps_decode_cur_slice;
+ volatile dec_slice_struct_t * volatile ps_computebs_cur_slice;
+ UWORD32 u4_cur_slice_decode_done;
+ UWORD32 u4_extra_mem_used;
+
+ UWORD32 u4_first_slice_in_pic;
+ UWORD32 u4_num_cores;
+ IVD_ARCH_T e_processor_arch;
+ IVD_SOC_T e_processor_soc;
+
+ /**
+ * Pictures that are are degraded
+ * 0 : No degrade
+ * 1 : Only on non-reference frames
+ * 2 : Use interval specified by u4_nondegrade_interval
+ * 3 : All non-key frames
+ * 4 : All frames
+ */
+ WORD32 i4_degrade_pics;
+
+ /**
+ * Interval for pictures which are completely decoded without any degradation
+ */
+ WORD32 i4_nondegrade_interval;
+
+ /**
+ * bit position (lsb is zero): Type of degradation
+ * 1 : Disable deblocking
+ * 2 : Faster inter prediction filters
+ * 3 : Fastest inter prediction filters
+ */
+ WORD32 i4_degrade_type;
+
+ /** Degrade pic count, Used to maintain the interval between non-degraded pics
+ *
+ */
+ WORD32 i4_degrade_pic_cnt;
+
+ fmt_conv_part_t as_fmt_conv_part[2];
+ UWORD32 u4_fmt_conv_in_process;
+ UWORD32 u4_pic_buf_got;
+ UWORD16 u2_mb_skip_error;
+ volatile UWORD16 u2_skip_deblock;
+
+ /**
+ * Col flag and mv pred buffer manager
+ */
+ void *pv_mv_buf_mgr;
+
+ /**
+ * Picture buffer manager
+ */
+ void *pv_pic_buf_mgr;
+
+ /**
+ * Display buffer manager
+ */
+ void *pv_disp_buf_mgr;
+
+ void *apv_buf_id_pic_buf_map[MAX_DISP_BUFS_NEW];
+
+ UWORD8 au1_pic_buf_id_mv_buf_id_map[MAX_DISP_BUFS_NEW];
+
+ UWORD8 au1_pic_buf_ref_flag[MAX_DISP_BUFS_NEW];
+
+ ih264_default_weighted_pred_ft *pf_default_weighted_pred_luma;
+
+ ih264_default_weighted_pred_ft *pf_default_weighted_pred_chroma;
+
+ ih264_weighted_pred_ft *pf_weighted_pred_luma;
+
+ ih264_weighted_pred_ft *pf_weighted_pred_chroma;
+
+ ih264_weighted_bi_pred_ft *pf_weighted_bi_pred_luma;
+
+ ih264_weighted_bi_pred_ft *pf_weighted_bi_pred_chroma;
+
+ ih264_pad *pf_pad_top;
+ ih264_pad *pf_pad_bottom;
+ ih264_pad *pf_pad_left_luma;
+ ih264_pad *pf_pad_left_chroma;
+ ih264_pad *pf_pad_right_luma;
+ ih264_pad *pf_pad_right_chroma;
+
+ ih264_inter_pred_chroma_ft *pf_inter_pred_chroma;
+
+ ih264_inter_pred_luma_ft *apf_inter_pred_luma[16];
+
+ ih264_intra_pred_luma_ft *apf_intra_pred_luma_16x16[4];
+
+ ih264_intra_pred_luma_ft *apf_intra_pred_luma_8x8[9];
+
+ ih264_intra_pred_luma_ft *apf_intra_pred_luma_4x4[9];
+
+ ih264_intra_pred_ref_filtering_ft *pf_intra_pred_ref_filtering;
+
+ ih264_intra_pred_chroma_ft *apf_intra_pred_chroma[4];
+
+ ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_4x4;
+
+ ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_4x4_dc;
+
+ ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_8x8;
+
+ ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_luma_8x8_dc;
+
+ ih264_iquant_itrans_recon_chroma_ft *pf_iquant_itrans_recon_chroma_4x4;
+
+ ih264_iquant_itrans_recon_chroma_ft *pf_iquant_itrans_recon_chroma_4x4_dc;
+
+ ih264_ihadamard_scaling_ft *pf_ihadamard_scaling_4x4;
+
+ /**
+ * deblock vertical luma edge with blocking strength 4
+ */
+ ih264_deblk_edge_bs4_ft *pf_deblk_luma_vert_bs4;
+
+ /**
+ * deblock vertical luma edge with blocking strength less than 4
+ */
+ ih264_deblk_edge_bslt4_ft *pf_deblk_luma_vert_bslt4;
+
+ /**
+ * deblock vertical luma edge with blocking strength 4 for mbaff
+ */
+ ih264_deblk_edge_bs4_ft *pf_deblk_luma_vert_bs4_mbaff;
+
+ /**
+ * deblock vertical luma edge with blocking strength less than 4 for mbaff
+ */
+ ih264_deblk_edge_bslt4_ft *pf_deblk_luma_vert_bslt4_mbaff;
+
+ /**
+ * deblock vertical chroma edge with blocking strength 4
+ */
+ ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_vert_bs4;
+
+ /**
+ * deblock vertical chroma edge with blocking strength less than 4
+ */
+ ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_vert_bslt4;
+
+ /**
+ * deblock vertical chroma edge with blocking strength 4 for mbaff
+ */
+ ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_vert_bs4_mbaff;
+
+ /**
+ * deblock vertical chroma edge with blocking strength less than 4 for mbaff
+ */
+ ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_vert_bslt4_mbaff;
+
+ /**
+ * deblock horizontal luma edge with blocking strength 4
+ */
+ ih264_deblk_edge_bs4_ft *pf_deblk_luma_horz_bs4;
+
+ /**
+ * deblock horizontal luma edge with blocking strength less than 4
+ */
+ ih264_deblk_edge_bslt4_ft *pf_deblk_luma_horz_bslt4;
+
+ /**
+ * deblock horizontal chroma edge with blocking strength 4
+ */
+ ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_horz_bs4;
+
+ /**
+ * deblock horizontal chroma edge with blocking strength less than 4
+ */
+ ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_horz_bslt4;
+
+
+} dec_struct_t;
+
+#endif /* _H264_DEC_STRUCTS_H */
diff --git a/decoder/ih264d_tables.c b/decoder/ih264d_tables.c
new file mode 100755
index 0000000..ddca2fb
--- /dev/null
+++ b/decoder/ih264d_tables.c
@@ -0,0 +1,872 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ **************************************************************************
+ * \file ih264d_tables.c
+ *
+ * \brief
+ * Defination of all tables used by h264 decoder
+ *
+ * \date
+ * 17/09/2004
+ *
+ * \author MA
+ **************************************************************************
+ */
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_defs.h"
+
+const UWORD8 gau1_ih264d_qp_scale_cr[] =
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 29, 30, 31, 32, 32, 33, 34, 34, 35, 35, 36, 36, 37, 37, 37, 38, 38, 38,
+ 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39 };
+const UWORD8 gau1_ih264d_alpha_table[] =
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 4, 4, 5, 6, 7, 8, 9, 10, 12, 13, 15, 17, 20, 22, 25, 28, 32, 36,
+ 40, 45, 50, 56, 63, 71, 80, 90, 101, 113, 127, 144, 162, 182, 203, 226,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 };
+const UWORD8 gau1_ih264d_beta_table[] =
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11,
+ 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 18, 18, 18,
+ 18, 18, 18, 18, 18, 18, 18, 18, 18 };
+
+const UWORD8 gau1_ih264d_clip_table[][4] =
+ {
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 0 },
+ { 0, 0, 0, 1 },
+ { 0, 0, 0, 1 },
+ { 0, 0, 0, 1 },
+ { 0, 0, 0, 1 },
+ { 0, 0, 1, 1 },
+ { 0, 0, 1, 1 },
+ { 0, 1, 1, 1 },
+ { 0, 1, 1, 1 },
+ { 0, 1, 1, 1 },
+ { 0, 1, 1, 1 },
+ { 0, 1, 1, 2 },
+ { 0, 1, 1, 2 },
+ { 0, 1, 1, 2 },
+ { 0, 1, 1, 2 },
+ { 0, 1, 2, 3 },
+ { 0, 1, 2, 3 },
+ { 0, 2, 2, 3 },
+ { 0, 2, 2, 4 },
+ { 0, 2, 3, 4 },
+ { 0, 2, 3, 4 },
+ { 0, 3, 3, 5 },
+ { 0, 3, 4, 6 },
+ { 0, 3, 4, 6 },
+ { 0, 4, 5, 7 },
+ { 0, 4, 5, 8 },
+ { 0, 4, 6, 9 },
+ { 0, 5, 7, 10 },
+ { 0, 6, 8, 11 },
+ { 0, 6, 8, 13 },
+ { 0, 7, 10, 14 },
+ { 0, 8, 11, 16 },
+ { 0, 9, 12, 18 },
+ { 0, 10, 13, 20 },
+ { 0, 11, 15, 23 },
+ { 0, 13, 17, 25 },
+
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 },
+ { 0, 13, 17, 25 }
+
+ };
+const UWORD8 gau1_ih264d_clip_table_deblock[] =
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51 };
+
+/****************DEBLOCKING TABLES ENDS*******************/
+
+/*************************************************************/
+/* BS CALCULATION TABLES */
+/*************************************************************/
+UWORD32 const gau4_ih264d_packed_bs2[32] =
+ {
+ /*************************************************************/
+ /* BS TABLES FOR NORMAL EDGES */
+ /*************************************************************/
+ 0x00000000,
+ 0x02000000, 0x00020000, 0x02020000, 0x00000200, 0x02000200, 0x00020200,
+ 0x02020200, 0x00000002, 0x02000002, 0x00020002, 0x02020002, 0x00000202,
+ 0x02000202, 0x00020202, 0x02020202,
+
+ /*************************************************************/
+ /* BS TABLES FOR XTRA LEFT MB EDGES IN MBAFF CASE */
+ /*************************************************************/
+ 0x01010101,
+ 0x02010101, 0x01020101, 0x02020101, 0x01010201, 0x02010201, 0x01020201,
+ 0x02020201, 0x01010102, 0x02010102, 0x01020102, 0x02020102, 0x01010202,
+ 0x02010202, 0x01020202, 0x02020202, };
+
+UWORD16 const gau2_ih264d_4x4_v2h_reorder[16] =
+ { 0x0000, 0x0001, 0x0010, 0x0011, 0x0100, 0x0101, 0x0110, 0x0111, 0x1000,
+ 0x1001, 0x1010, 0x1011, 0x1100, 0x1101, 0x1110, 0x1111 };
+
+/****************SCALING TABLES STARTS *****************/
+const WORD16 gai2_ih264d_default_intra4x4[16] =
+ { 6, 13, 13, 20, 20, 20, 28, 28, 28, 28, 32, 32, 32, 37, 37, 42 };
+
+const WORD16 gai2_ih264d_default_inter4x4[16] =
+ { 10, 14, 14, 20, 20, 20, 24, 24, 24, 24, 27, 27, 27, 30, 30, 34 };
+
+const WORD16 gai2_ih264d_default_intra8x8[64] =
+ { 6, 10, 10, 13, 11, 13, 16, 16, 16, 16, 18, 18, 18, 18, 18, 23, 23, 23, 23,
+ 23, 23, 25, 25, 25, 25, 25, 25, 25, 27, 27, 27, 27, 27, 27, 27, 27, 29,
+ 29, 29, 29, 29, 29, 29, 31, 31, 31, 31, 31, 31, 33, 33, 33, 33, 33, 36,
+ 36, 36, 36, 38, 38, 38, 40, 40, 42 };
+
+const WORD16 gai2_ih264d_default_inter8x8[64] =
+ { 9, 13, 13, 15, 13, 15, 17, 17, 17, 17, 19, 19, 19, 19, 19, 21, 21, 21, 21,
+ 21, 21, 22, 22, 22, 22, 22, 22, 22, 24, 24, 24, 24, 24, 24, 24, 24, 25,
+ 25, 25, 25, 25, 25, 25, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 30,
+ 30, 30, 30, 32, 32, 32, 33, 33, 35 };
+
+const WORD16 gai2_ih264d_flat_4x4[16] =
+ { 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 };
+
+const WORD16 gai2_ih264d_flat_8x8[64] =
+ { 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 };
+
+/****************SCALING TABLES ENDS *****************/
+
+/*Inverse scan tables for individual 4x4 blocks of 8x8 transform coeffs of CAVLC */
+
+/* progressive */
+
+const UWORD8 gau1_ih264d_inv_scan_prog8x8_cavlc[4][16] =
+ {
+ { 0, 9, 17, 18, 12, 40, 27, 7, 35, 57, 29, 30, 58, 38, 53, 47 }, /* for First subblock */
+ { 1, 2, 24, 11, 19, 48, 20, 14, 42, 50, 22, 37, 59, 31, 60, 55 }, /* for second subblock */
+ { 8, 3, 32, 4, 26, 41, 13, 21, 49, 43, 15, 44, 52, 39, 61, 62 }, /* for third subblock */
+ { 16, 10, 25, 5, 33, 34, 6, 28, 56, 36, 23, 51, 45, 46, 54, 63 } /* for fourth subblock */
+ };
+
+const UWORD8 gau1_ih264d_inv_scan_int8x8_cavlc[4][16] =
+ {
+ { 0, 9, 2, 56, 18, 26, 34, 27, 35, 28, 36, 29, 45, 7, 54, 39 }, /* for First subblock */
+ { 8, 24, 25, 33, 41, 11, 42, 12, 43, 13, 44, 14, 53, 15, 62, 47 }, /* for second subblock */
+ { 16, 32, 40, 10, 49, 4, 50, 5, 51, 6, 52, 22, 61, 38, 23, 55 }, /* for third subblock */
+ { 1, 17, 48, 3, 57, 19, 58, 20, 59, 21, 60, 37, 30, 46, 31, 63 } /* for fourth subblock */
+ };
+
+/*Inverse scan tables for individual 8x8 blocks of 8x8 transform coeffs of CABAC */
+/* progressive */
+
+const UWORD8 gau1_ih264d_inv_scan_prog8x8_cabac[64] =
+ { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33,
+ 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43,
+ 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53,
+ 60, 61, 54, 47, 55, 62, 63 };
+
+/* interlace */
+
+const UWORD8 gau1_ih264d_inv_scan_int8x8_cabac[64] =
+ { 0, 8, 16, 1, 9, 24, 32, 17, 2, 25, 40, 48, 56, 33, 10, 3, 18, 41, 49, 57,
+ 26, 11, 4, 19, 34, 42, 50, 58, 27, 12, 5, 20, 35, 43, 51, 59, 28, 13, 6,
+ 21, 36, 44, 52, 60, 29, 14, 22, 37, 45, 53, 61, 30, 7, 15, 38, 46, 54, 62,
+ 23, 31, 39, 47, 55, 63 };
+
+/****************PARSING TABLES *******************/
+UWORD8 const gau1_ih264d_subblk_offset[16] =
+ { 8, 9, 12, 13, 10, 11, 14, 15, 16, 17, 20, 21, 18, 19, 22, 23 };
+
+const UWORD8 gau1_ih264d_cbp_tab[6] =
+ { 0, 16, 32, 15, 31, 47 };
+
+/** gives CBP value from codeword number, both for intra and inter */
+
+const UWORD8 gau1_ih264d_cbp_table[48][2] =
+ {
+ { 47, 0 },
+ { 31, 16 },
+ { 15, 1 },
+ { 0, 2 },
+ { 23, 4 },
+ { 27, 8 },
+ { 29, 32 },
+ { 30, 3 },
+ { 7, 5 },
+ { 11, 10 },
+ { 13, 12 },
+ { 14, 15 },
+ { 39, 47 },
+ { 43, 7 },
+ { 45, 11 },
+ { 46, 13 },
+ { 16, 14 },
+ { 3, 6 },
+ { 5, 9 },
+ { 10, 31 },
+ { 12, 35 },
+ { 19, 37 },
+ { 21, 42 },
+ { 26, 44 },
+ { 28, 33 },
+ { 35, 34 },
+ { 37, 36 },
+ { 42, 40 },
+ { 44, 39 },
+ { 1, 43 },
+ { 2, 45 },
+ { 4, 46 },
+ { 8, 17 },
+ { 17, 18 },
+ { 18, 20 },
+ { 20, 24 },
+ { 24, 19 },
+ { 6, 21 },
+ { 9, 26 },
+ { 22, 28 },
+ { 25, 23 },
+ { 32, 27 },
+ { 33, 29 },
+ { 34, 30 },
+ { 36, 22 },
+ { 40, 25 },
+ { 38, 38 },
+ { 41, 41 }, };
+/****************PARSING TABLES ENDS *******************/
+
+/****************DECODE SLICE TABLES STARTS *******************/
+/*Definition of Tables needed by functions of this file */
+const UWORD8 gau1_ih264d_inv_scan[16] =
+ { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 };
+
+const UWORD8 gau1_ih264d_inv_scan_fld[16] =
+ { 0, 4, 1, 8, 12, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
+
+const UWORD8 gau1_ih264d_dequant_matrix[6][16] =
+{
+ { 10, 13, 10, 13, 13, 16, 13, 16, 10, 13, 10 ,13, 13, 16, 13, 16},
+ { 11, 14, 11, 14, 14, 18, 14, 18, 11, 14, 11 ,14, 14, 18, 14, 18},
+ { 13, 16, 13, 16, 16, 20, 16, 20, 13, 16, 13 ,16, 16, 20, 16, 20},
+ { 14, 18, 14, 18, 18, 23, 18, 23, 14, 18, 14, 18, 18, 23, 18, 23},
+ { 16, 20, 16, 20, 20, 25, 20, 25, 16, 20, 16, 20, 20, 25, 20, 25},
+ { 18, 23, 18, 23, 23, 29, 23, 29, 18, 23, 18, 23, 23, 29, 23, 29}
+};
+
+const UWORD16 gau2_ih264_iquant_scale_4x4[6][16] =
+ {
+ { 10, 13, 10, 13, 13, 16, 13, 16, 10, 13, 10, 13, 13, 16, 13, 16 },
+ { 11, 14, 11, 14, 14, 18, 14, 18, 11, 14, 11, 14, 14, 18, 14, 18 },
+ { 13, 16, 13, 16, 16, 20, 16, 20, 13, 16, 13, 16, 16, 20, 16, 20 },
+ { 14, 18, 14, 18, 18, 23, 18, 23, 14, 18, 14, 18, 18, 23, 18, 23 },
+ { 16, 20, 16, 20, 20, 25, 20, 25, 16, 20, 16, 20, 20, 25, 20, 25 },
+ { 18, 23, 18, 23, 23, 29, 23, 29, 18, 23, 18, 23, 23, 29, 23, 29 } };
+
+const UWORD8 gau1_ih264d_dequant8x8_zigzag_cavlc[4][6][16] =
+ {
+ {
+ { 20, 18, 24, 32, 19, 19, 18, 19, 19, 18, 18, 24,
+ 24, 25, 24, 18 }, /* for First subblock */
+ { 22, 19, 26, 35, 21, 21, 19, 21, 21, 19, 19, 26,
+ 26, 28, 26, 19 },
+ { 26, 23, 31, 42, 24, 24, 23, 24, 24, 23, 23, 31,
+ 31, 33, 31, 23 },
+ { 28, 25, 33, 45, 26, 26, 25, 26, 26, 25, 25, 33,
+ 33, 35, 33, 25 },
+ { 32, 28, 38, 51, 30, 30, 28, 30, 30, 28, 28, 38,
+ 38, 40, 38, 28 },
+ { 36, 32, 43, 58, 34, 34, 32, 34, 34, 32, 32, 43,
+ 43, 46, 43, 32 } },
+ {
+ { 19, 25, 19, 18, 24, 25, 25, 24, 24, 32, 32, 19,
+ 18, 18, 19, 24 }, /* for second subblock */
+ { 21, 28, 21, 19, 26, 28, 28, 26, 26, 35, 35,
+ 21, 19, 19, 21, 26 },
+ { 24, 33, 24, 23, 31, 33, 33, 31, 31, 42, 42,
+ 24, 23, 23, 24, 31 },
+ { 26, 35, 26, 25, 33, 35, 35, 33, 33, 45, 45,
+ 26, 25, 25, 26, 33 },
+ { 30, 40, 30, 28, 38, 40, 40, 38, 38, 51, 51,
+ 30, 28, 28, 30, 38 },
+ { 34, 46, 34, 32, 43, 46, 46, 43, 43, 58, 58,
+ 34, 32, 32, 34, 43 } },
+ {
+ { 19, 19, 20, 20, 24, 18, 18, 24, 24, 18, 18, 19,
+ 25, 19, 18, 24 }, /* for third subblock */
+ { 21, 21, 22, 22, 26, 19, 19, 26, 26, 19, 19,
+ 21, 28, 21, 19, 26 },
+ { 24, 24, 26, 26, 31, 23, 23, 31, 31, 23, 23,
+ 24, 33, 24, 23, 31 },
+ { 26, 26, 28, 28, 33, 25, 25, 33, 33, 25, 25,
+ 26, 35, 26, 25, 33 },
+ { 30, 30, 32, 32, 38, 28, 28, 38, 38, 28, 28,
+ 30, 40, 30, 28, 38 },
+ { 34, 34, 36, 36, 43, 32, 32, 43, 43, 32, 32,
+ 34, 46, 34, 32, 43 } },
+ {
+ { 25, 24, 18, 19, 19, 25, 25, 19, 19, 20, 24, 24,
+ 18, 24, 32, 18 }, /* for fourth subblock */
+ { 28, 26, 19, 21, 21, 28, 28, 21, 21, 22, 26,
+ 26, 19, 26, 35, 19 },
+ { 33, 31, 23, 24, 24, 33, 33, 24, 24, 26, 31,
+ 31, 23, 31, 42, 23 },
+ { 35, 33, 25, 26, 26, 35, 35, 26, 26, 28, 33,
+ 33, 25, 33, 45, 25 },
+ { 40, 38, 28, 30, 30, 40, 40, 30, 30, 32, 38,
+ 38, 28, 38, 51, 28 },
+ { 46, 43, 32, 34, 34, 46, 46, 34, 34, 36, 43,
+ 43, 32, 43, 58, 32 } }
+
+ };
+
+const UWORD16 gau1_ih264d_dequant8x8_cavlc[6][64] =
+ {
+ { 20, 19, 25, 19, 20, 19, 25, 19, 19, 18, 24, 18, 19,
+ 18, 24, 18, 25, 24, 32, 24, 25, 24, 32, 24, 19, 18,
+ 24, 18, 19, 18, 24, 18, 20, 19, 25, 19, 20, 19, 25,
+ 19, 19, 18, 24, 18, 19, 18, 24, 18, 25, 24, 32, 24,
+ 25, 24, 32, 24, 19, 18, 24, 18, 19, 18, 24, 18 },
+ { 22, 21, 28, 21, 22, 21, 28, 21, 21, 19, 26, 19, 21,
+ 19, 26, 19, 28, 26, 35, 26, 28, 26, 35, 26, 21, 19,
+ 26, 19, 21, 19, 26, 19, 22, 21, 28, 21, 22, 21, 28,
+ 21, 21, 19, 26, 19, 21, 19, 26, 19, 28, 26, 35, 26,
+ 28, 26, 35, 26, 21, 19, 26, 19, 21, 19, 26, 19 },
+ { 26, 24, 33, 24, 26, 24, 33, 24, 24, 23, 31, 23, 24,
+ 23, 31, 23, 33, 31, 42, 31, 33, 31, 42, 31, 24, 23,
+ 31, 23, 24, 23, 31, 23, 26, 24, 33, 24, 26, 24, 33,
+ 24, 24, 23, 31, 23, 24, 23, 31, 23, 33, 31, 42, 31,
+ 33, 31, 42, 31, 24, 23, 31, 23, 24, 23, 31, 23 },
+ { 28, 26, 35, 26, 28, 26, 35, 26, 26, 25, 33, 25, 26,
+ 25, 33, 25, 35, 33, 45, 33, 35, 33, 45, 33, 26, 25,
+ 33, 25, 26, 25, 33, 25, 28, 26, 35, 26, 28, 26, 35,
+ 26, 26, 25, 33, 25, 26, 25, 33, 25, 35, 33, 45, 33,
+ 35, 33, 45, 33, 26, 25, 33, 25, 26, 25, 33, 25 },
+ { 32, 30, 40, 30, 32, 30, 40, 30, 30, 28, 38, 28, 30,
+ 28, 38, 28, 40, 38, 51, 38, 40, 38, 51, 38, 30, 28,
+ 38, 28, 30, 28, 38, 28, 32, 30, 40, 30, 32, 30, 40,
+ 30, 30, 28, 38, 28, 30, 28, 38, 28, 40, 38, 51, 38,
+ 40, 38, 51, 38, 30, 28, 38, 28, 30, 28, 38, 28 },
+ { 36, 34, 46, 34, 36, 34, 46, 34, 34, 32, 43, 32, 34,
+ 32, 43, 32, 46, 43, 58, 43, 46, 43, 58, 43, 34, 32,
+ 43, 32, 34, 32, 43, 32, 36, 34, 46, 34, 36, 34, 46,
+ 34, 34, 32, 43, 32, 34, 32, 43, 32, 46, 43, 58, 43,
+ 46, 43, 58, 43, 34, 32, 43, 32, 34, 32, 43, 32 }, };
+
+/****************DECODE SLICE TABLES ENDS *******************/
+
+/****************MOTION VECTOR DECODING TABLES STARTS *******************/
+
+/**
+ **************************************************************************
+ * \brief This array is used to evaluate the condition when only one of
+ * predictor subMbs has a reference frame equal to that of E subMb.
+ **************************************************************************
+ */
+
+const WORD8 gau1_ih264d_mv_pred_condition[] =
+ { -1, 0, 1, -1, 2, -1, -1, -1 };
+
+/** Number of subMbs for the 8x8 prediction mode */
+const UWORD8 gau1_ih264d_num_submb_part[] =
+ { 1, 2, 2, 4 };
+
+/** Width of the 8x8 prediction mode in terms of subMbs */
+const UWORD8 gau1_ih264d_submb_partw[] =
+ { 2, 2, 1, 1 };
+
+/** Height of the 8x8 prediction mode in terms of subMbs */
+const UWORD8 gau1_ih264d_submb_parth[] =
+ { 2, 1, 2, 1 };
+
+/** Number of MB partitions for the MB prediction mode */
+const UWORD8 gau1_ih264d_num_mb_part[] =
+ { 1, 2, 2, 4 };
+
+/** Width of the MB partition in terms of subMbs */
+const UWORD8 gau1_ih264d_mb_partw[] =
+ { 4, 4, 2, 2, 2 };
+
+/** Height of the MB partition in terms of subMbs */
+const UWORD8 gau1_ih264d_mb_parth[] =
+ { 4, 2, 4, 2, 2 };
+
+/** MB partition information is packed into a UWORD32 {0,number,width,height} */
+const UWORD32 gau4_ih264d_submb_part[] =
+ { 0x00010202, 0x00020201, 0x00020102, 0x00040101 };
+
+const UWORD8 gau1_ih264d_submb_indx_mod[] =
+ { 0, 0, /* 16x16 */
+ 0, 8, /* 16x8 */
+ 0, 2, /* 8x16 */
+ 0, 0, /* 8x8 */
+ 0, 4, /* 8x4 */
+ 0, 1, /* 4x8 */
+ 0, 1, 3, 1 /* 4x4 */
+ };
+
+/** This table is used to assign CBPs to Inter MBs. */
+const UWORD8 gau1_ih264d_cbp_inter[] =
+ { 0, 16, 1, 2, 4, 8, 32, 3, 5, 10, 12, 15, 47, 7, 11, 13, 14, 6, 9, 31, 35,
+ 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46, 17, 18, 20, 24, 19, 21, 26,
+ 28, 23, 27, 29, 30, 22, 25, 38, 41 };
+
+/** Motion comp modes for P followed by B,
+ 0 to 4 : P Mbs
+ 5 to 27 : B Mbs
+ 28 to 30 : DIRECT */
+const UWORD8 gau1_ih264d_mb_mc_mode[] =
+ {
+ PRED_16x16,
+ PRED_16x8, PRED_8x16, PRED_8x8, PRED_8x8R0,
+ PRED_16x16,
+ PRED_16x16, PRED_16x16, PRED_16x16, PRED_16x8, PRED_8x16,
+ PRED_16x8,
+ PRED_8x16, PRED_16x8, PRED_8x16, PRED_16x8, PRED_8x16,
+ PRED_16x8,
+ PRED_8x16, PRED_16x8, PRED_8x16, PRED_16x8, PRED_8x16,
+ PRED_16x8,
+ PRED_8x16, PRED_16x8, PRED_8x16, PRED_8x8,
+ /* Self defined modes for B_SKIP and DIRECT16x16 */
+ PRED_8x8,
+ PRED_8x8, PRED_8x8 };
+
+const UWORD8 gau1_ih264d_submb_mc_mode[] =
+ { SUBMB_8x8, SUBMB_8x4, SUBMB_4x8, SUBMB_4x4,
+ SUBMB_8x8,
+ SUBMB_8x8, SUBMB_8x8, SUBMB_8x8, SUBMB_8x4, SUBMB_4x8,
+ SUBMB_8x4,
+ SUBMB_4x8, SUBMB_8x4, SUBMB_4x8, SUBMB_4x4, SUBMB_4x4, SUBMB_4x4,
+ /* Self defined modes B DIRECT8x8 */
+ SUBMB_4x4,
+ SUBMB_4x4, SUBMB_4x4 };
+
+/** Sub MB pred modes for B slice */
+const UWORD8 gau1_ih264d_submb_pred_modes[] =
+ {
+ PRED_L0,
+ PRED_L0, PRED_L0, PRED_L0,
+ B_DIRECT,
+ PRED_L0, PRED_L1, BI_PRED, PRED_L0, PRED_L0, PRED_L1,
+ PRED_L1,
+ BI_PRED, BI_PRED, PRED_L0, PRED_L1, BI_PRED,
+ /* Self defined modes for B DIRECT8x8 */
+ BI_PRED,
+ PRED_L0, PRED_L1, };
+
+/** MB pred modes for P and B slice */
+const WORD8 gau1_ih264d_mb_pred_modes[2][32] =
+ {
+ { PRED_L0, PRED_L0, PRED_L0, PRED_INVALID, PRED_INVALID,
+ B_DIRECT,
+ PRED_L0, PRED_L1, BI_PRED, PRED_L0, PRED_L0, PRED_L1, PRED_L1,
+ PRED_L0,
+ PRED_L0, PRED_L1, PRED_L1, PRED_L0, PRED_L0, PRED_L1, PRED_L1,
+ BI_PRED,
+ BI_PRED, BI_PRED, BI_PRED, BI_PRED, BI_PRED, PRED_INVALID,
+ /* Self defined modes for B_SKIP and DIRECT16x16 */
+ BI_PRED,
+ PRED_L0, PRED_L1, },
+ { PRED_INVALID, PRED_L0, PRED_L0, PRED_INVALID, PRED_INVALID,
+ PRED_INVALID,
+ PRED_INVALID, PRED_INVALID, PRED_INVALID, PRED_L0, PRED_L0,
+ PRED_L1,
+ PRED_L1, PRED_L1, PRED_L1, PRED_L0, PRED_L0, BI_PRED, BI_PRED,
+ BI_PRED,
+ BI_PRED, PRED_L0, PRED_L0, PRED_L1, PRED_L1, BI_PRED, BI_PRED,
+ PRED_INVALID,
+ /* Self defined modes for B_SKIP and DIRECT16x16 */
+ PRED_INVALID,
+ PRED_INVALID, PRED_INVALID } };
+
+/****************MOTION VECTOR DECODING TABLES ENDS *******************/
+
+/****************CAVLC DECODING TABLES STARTS *******************/
+
+/*****************************************************************************/
+/* 6 Bit table look for total zeros (totalcoeff = 2to10) as in Table 9.7 */
+/* of H264 standard. In each table entry, lower 4 bits represent total zeros */
+/* decoded while upper 4 bit represent the bits to be flushed from ps_bitstrm */
+/*****************************************************************************/
+const UWORD8 gau1_ih264d_table_total_zero_2to10[9][64] =
+ {
+ /* For total coeff = 2 */
+ { 0x6E, 0x6D, 0x6C, 0x6B, 0x5A, 0x5A, 0x59, 0x59, 0x48, 0x48, 0x48,
+ 0x48, 0x47, 0x47, 0x47, 0x47, 0x46, 0x46, 0x46, 0x46, 0x45, 0x45,
+ 0x45, 0x45, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x33,
+ 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x32, 0x32, 0x32, 0x32,
+ 0x32, 0x32, 0x32, 0x32, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31,
+ 0x31, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, },
+
+ /* For total coeff = 3 */
+ { 0x6D, 0x6B, 0x5C, 0x5C, 0x5A, 0x5A, 0x59, 0x59, 0x48, 0x48, 0x48,
+ 0x48, 0x45, 0x45, 0x45, 0x45, 0x44, 0x44, 0x44, 0x44, 0x40, 0x40,
+ 0x40, 0x40, 0x37, 0x37, 0x37, 0x37, 0x37, 0x37, 0x37, 0x37, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x33, 0x33, 0x33, 0x33,
+ 0x33, 0x33, 0x33, 0x33, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32,
+ 0x32, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, },
+
+ /* For total coeff = 4 */
+ { 0x5C, 0x5C, 0x5B, 0x5B, 0x5A, 0x5A, 0x50, 0x50, 0x49, 0x49, 0x49,
+ 0x49, 0x47, 0x47, 0x47, 0x47, 0x43, 0x43, 0x43, 0x43, 0x42, 0x42,
+ 0x42, 0x42, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x38, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x35, 0x35, 0x35, 0x35,
+ 0x35, 0x35, 0x35, 0x35, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34,
+ 0x34, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, },
+
+ /* For total coeff = 5 */
+ { 0x5B, 0x5B, 0x59, 0x59, 0x4A, 0x4A, 0x4A, 0x4A, 0x48, 0x48, 0x48,
+ 0x48, 0x42, 0x42, 0x42, 0x42, 0x41, 0x41, 0x41, 0x41, 0x40, 0x40,
+ 0x40, 0x40, 0x37, 0x37, 0x37, 0x37, 0x37, 0x37, 0x37, 0x37, 0x36,
+ 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x35, 0x35, 0x35, 0x35,
+ 0x35, 0x35, 0x35, 0x35, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34,
+ 0x34, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, },
+
+ /* For total coeff = 6 */
+ { 0x6A, 0x60, 0x51, 0x51, 0x48, 0x48, 0x48, 0x48, 0x39, 0x39, 0x39,
+ 0x39, 0x39, 0x39, 0x39, 0x39, 0x37, 0x37, 0x37, 0x37, 0x37, 0x37,
+ 0x37, 0x37, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x35,
+ 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x35, 0x34, 0x34, 0x34, 0x34,
+ 0x34, 0x34, 0x34, 0x34, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33,
+ 0x33, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, },
+
+ /* For total coeff = 7 */
+ { 0x69, 0x60, 0x51, 0x51, 0x47, 0x47, 0x47, 0x47, 0x38, 0x38, 0x38,
+ 0x38, 0x38, 0x38, 0x38, 0x38, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x33,
+ 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x32, 0x32, 0x32, 0x32,
+ 0x32, 0x32, 0x32, 0x32, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25,
+ 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, },
+
+ /* For total coeff = 8 */
+ { 0x68, 0x60, 0x52, 0x52, 0x41, 0x41, 0x41, 0x41, 0x37, 0x37, 0x37,
+ 0x37, 0x37, 0x37, 0x37, 0x37, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+ 0x36, 0x36, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x25,
+ 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25,
+ 0x25, 0x25, 0x25, 0x25, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24,
+ 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, },
+
+ /* For total coeff = 9 */
+ { 0x61, 0x60, 0x57, 0x57, 0x42, 0x42, 0x42, 0x42, 0x35, 0x35, 0x35,
+ 0x35, 0x35, 0x35, 0x35, 0x35, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26,
+ 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x24,
+ 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24,
+ 0x24, 0x24, 0x24, 0x24, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23,
+ 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, },
+
+ /* For total coeff = 10 */
+ { 0x51, 0x51, 0x50, 0x50, 0x46, 0x46, 0x46, 0x46, 0x32, 0x32, 0x32,
+ 0x32, 0x32, 0x32, 0x32, 0x32, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25,
+ 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x25, 0x24,
+ 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24, 0x24,
+ 0x24, 0x24, 0x24, 0x24, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23,
+ 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, }
+
+ };
+
+/*****************************************************************************/
+/* 4 Bit table look for total zeros (totalcoeff = 11to15) as in Table 9.7 */
+/* of H264 standard. In each table entry, lower 4 bits represent total zeros */
+/* decoded while upper 4 bit represent the bits to be flushed from ps_bitstrm */
+/*****************************************************************************/
+const UWORD8 gau1_ih264d_table_total_zero_11to15[5][16] =
+ {
+ /* For total coeff = 11 */
+ { 0x40, 0x41, 0x32, 0x32, 0x33, 0x33, 0x35, 0x35, 0x14, 0x14, 0x14,
+ 0x14, 0x14, 0x14, 0x14, 0x14, },
+
+ /* For total coeff = 12 */
+ { 0x40, 0x41, 0x34, 0x34, 0x22, 0x22, 0x22, 0x22, 0x13, 0x13, 0x13,
+ 0x13, 0x13, 0x13, 0x13, 0x13, },
+
+ /* For total coeff = 13 */
+ { 0x30, 0x30, 0x31, 0x31, 0x23, 0x23, 0x23, 0x23, 0x12, 0x12, 0x12,
+ 0x12, 0x12, 0x12, 0x12, 0x12, },
+
+ /* For total coeff = 14 */
+ { 0x20, 0x20, 0x20, 0x20, 0x21, 0x21, 0x21, 0x21, 0x12, 0x12, 0x12,
+ 0x12, 0x12, 0x12, 0x12, 0x12, },
+
+ /* For total coeff = 15 */
+ { 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x11, 0x11, 0x11,
+ 0x11, 0x11, 0x11, 0x11, 0x11, }, };
+
+/** Tables used to read "Run Before", Below tables are packed to reduce lookups */
+/** (Base addess of Gx << 2) + (Max code length for that Gx) */
+const UWORD8 gau1_ih264d_table_run_before[64] =
+ { 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 1, 1, 1, 1, 10, 10, 6, 6, 1, 1, 1, 1,
+ 14, 14, 10, 10, 6, 6, 2, 2, 19, 15, 10, 10, 6, 6, 2, 2, 23, 19, 15, 11, 6,
+ 6, 2, 2, 7, 11, 19, 15, 27, 23, 2, 2, 27, 27, 23, 19, 15, 11, 7, 3 };
+
+/*****************************************************************************/
+/* Lookup table for CAVLC 4x4 total_coeff,trailing_ones as pers Table 9-5 */
+/* in the standard. Starting form lsb first 2 bits=flushbits, next 2bits= */
+/* trailing ones, next 5 bits=total_coeff. Total bits used = 9 out of 16 */
+/*****************************************************************************/
+const UWORD16 gau2_ih264d_code_gx[304] =
+ {
+ /* Lookup for 0 <= nC < 2 */
+ 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0014, 0x0014,
+ 0x0014, 0x0014, 0x0014, 0x0014, 0x0014, 0x0014, 0x0028, 0x0028, 0x0028,
+ 0x0028, 0x0028, 0x0028, 0x0028, 0x0028, 0x0026, 0x0026, 0x0012, 0x0012,
+ 0x003D, 0x003D, 0x003D, 0x003D, 0x005E, 0x005E, 0x003A, 0x003A, 0x004D,
+ 0x004D, 0x004D, 0x004D, 0x006E, 0x006E, 0x004A, 0x004A, 0x0036, 0x0036,
+ 0x0022, 0x0022, 0x007E, 0x007E, 0x005A, 0x005A, 0x0046, 0x0046, 0x0032,
+ 0x0032, 0x008E, 0x008E, 0x006A, 0x006A, 0x0056, 0x0056, 0x0042, 0x0042,
+ 0x009E, 0x009E, 0x007A, 0x007A, 0x0066, 0x0066, 0x0052, 0x0052, 0x0083,
+ 0x009B, 0x0087, 0x0073, 0x00AF, 0x008B, 0x0077, 0x0063, 0x00CF, 0x00BB,
+ 0x00A7, 0x00A3, 0x00BF, 0x00AB, 0x0097, 0x0093, 0x00EF, 0x00DB, 0x00C7,
+ 0x00C3, 0x00DF, 0x00CB, 0x00B7, 0x00B3, 0x010F, 0x00FB, 0x00F7, 0x00E3,
+ 0x00FF, 0x00EB, 0x00E7, 0x00D3, 0x0102, 0x0102, 0x010A, 0x010A, 0x0106,
+ 0x0106, 0x00F2, 0x00F2, 0x00D4, 0x00D4, 0x00D4, 0x00D4, 0x00D4, 0x00D4,
+ 0x00D4, 0x00D4,
+
+ /* Lookup for 2 <= nC < 4 */
+ 0x0015,
+ 0x0015, 0x0015, 0x0015, 0x0001, 0x0001, 0x0001, 0x0001, 0x004E, 0x004E,
+ 0x003E, 0x003E, 0x0029, 0x0029, 0x0029, 0x0029, 0x006F, 0x003B, 0x0037,
+ 0x0013, 0x005E, 0x005E, 0x0026, 0x0026, 0x007E, 0x007E, 0x004A, 0x004A,
+ 0x0046, 0x0046, 0x0022, 0x0022, 0x008E, 0x008E, 0x005A, 0x005A, 0x0056,
+ 0x0056, 0x0032, 0x0032, 0x0052, 0x0052, 0x006A, 0x006A, 0x0066, 0x0066,
+ 0x0042, 0x0042, 0x009E, 0x009E, 0x007A, 0x007A, 0x0076, 0x0076, 0x0062,
+ 0x0062, 0x00BF, 0x009B, 0x0097, 0x0083, 0x00AF, 0x008B, 0x0087, 0x0073,
+ 0x00B3, 0x00BB, 0x00B7, 0x00A3, 0x00CF, 0x00AB, 0x00A7, 0x0093, 0x00EF,
+ 0x00DB, 0x00D7, 0x00D3, 0x00DF, 0x00CB, 0x00C7, 0x00C3, 0x00F7, 0x00F3,
+ 0x00FB, 0x00E7, 0x00EA, 0x00EA, 0x00E2, 0x00E2, 0x010E, 0x010E, 0x010A,
+ 0x010A, 0x0106, 0x0106, 0x0102, 0x0102, 0x00FC, 0x00FC, 0x00FC, 0x00FC,
+ 0x00FC, 0x00FC, 0x00FC, 0x00FC,
+
+ /* Lookup for 4 <= nC < 8 */
+ 0x007F,
+ 0x006F, 0x005F, 0x004F, 0x003F, 0x002B, 0x0017, 0x0003, 0x0057, 0x005B,
+ 0x0047, 0x004B, 0x0037, 0x008F, 0x003B, 0x0027, 0x0033, 0x007B, 0x0077,
+ 0x0023, 0x009F, 0x006B, 0x0067, 0x0013, 0x0073, 0x0063, 0x009B, 0x0053,
+ 0x00AF, 0x008B, 0x0087, 0x0043, 0x00CF, 0x00BB, 0x00A7, 0x0093, 0x00BF,
+ 0x00AB, 0x0097, 0x0083, 0x00C3, 0x00DB, 0x00C7, 0x00B3, 0x00DF, 0x00CB,
+ 0x00B7, 0x00A3, 0x00F7, 0x00E3, 0x00EF, 0x00EB, 0x00E7, 0x00D3, 0x00D6,
+ 0x00D6, 0x0106, 0x0106, 0x00F2, 0x00F2, 0x00FE, 0x00FE, 0x00FA, 0x00FA,
+ 0x010D, 0x010D, 0x010D, 0x010D, 0x0109, 0x0109, 0x0109, 0x0109, 0x0100,
+ 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100 };
+
+/*****************************************************************************/
+/* Lookup table for CAVLC ChromaDC total_coeff,trailing_ones parsing as per */
+/* Table 9-5 in the standard. Starting from msb, First 4bits=total_coeff, */
+/* next 2bits=trailing_ones and last 2bits=flushbits-1 */
+/*****************************************************************************/
+const UWORD8 gau1_ih264d_cav_chromdc_vld[256] =
+ { 0x9E, 0x9E, 0x97, 0x8F, 0x76, 0x76, 0x6E, 0x6E, 0x85, 0x85, 0x85, 0x85,
+ 0x65, 0x65, 0x65, 0x65, 0x45, 0x45, 0x45, 0x45, 0x7D, 0x7D, 0x7D, 0x7D,
+ 0x4D, 0x4D, 0x4D, 0x4D, 0x25, 0x25, 0x25, 0x25,
+
+ 0x52,
+ 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52,
+ 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52,
+ 0x52, 0x52, 0x52, 0x52, 0x52, 0x52, 0x52,
+
+ 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01,
+
+ 0x28,
+ 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28,
+ 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28,
+ 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28,
+ 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28,
+ 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28,
+ 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28,
+ 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28,
+ 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28,
+ 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28,
+ 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28,
+ 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, };
+
+const UWORD16 gau2_ih264d_offset_num_vlc_tab[9] =
+ { 0, 0, 120, 120, 224, 224, 224, 224, 224 };
+
+/*****************************************************************************/
+/* Function pointer u4_ofst table lookup for parsing 4x4 residual blocks in */
+/* CAVLC. The u4_ofst is dependent on total coeffs coded */
+/*****************************************************************************/
+const UWORD8 gau1_ih264d_total_coeff_fn_ptr_offset[16] =
+ { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2 };
+
+/****************************************************************************/
+/* gai2_ih264d_trailing_one_level lookup tables based on trailing one bits */
+/* All zeroes are u2_dummy in the table are u2_dummy to keep 3 uniform elements */
+/****************************************************************************/
+const WORD16 gai2_ih264d_trailing_one_level[14][3] =
+ {
+ /* All zeroes are u2_dummy */
+ /**********************************************************************/
+ /* Levels for trailing ones = 1, bits read can be 0 or 1 */
+ /**********************************************************************/
+ { 1, 0, 0 }, /* 0 */
+ { -1, 0, 0 }, /* 1 */
+
+ /**********************************************************************/
+ /* Levels for trailing ones = 2, bits read can be 00, 01, 10 ,11 */
+ /**********************************************************************/
+ { 1, 1, 0 }, /* 00 */
+ { 1, -1, 0 }, /* 01 */
+ { -1, 1, 0 }, /* 10 */
+ { -1, -1, 0 }, /* 11 */
+
+ /**********************************************************************/
+ /* Levels for trailing ones = 3, bits read can be 000 - 111 */
+ /**********************************************************************/
+ { 1, 1, 1 }, /* 000 */
+ { 1, 1, -1 }, /* 001 */
+ { 1, -1, 1 }, /* 010 */
+ { 1, -1, -1 }, /* 011 */
+ { -1, 1, 1 }, /* 100 */
+ { -1, 1, -1 }, /* 101 */
+ { -1, -1, 1 }, /* 110 */
+ { -1, -1, -1 }, /* 111 */
+ };
+/****************CAVLC DECODING TABLES ENDS *******************/
+
+/****************************************************************************/
+/* These are the codes used for error detection in intra pred4x4 modes */
+/****************************************************************************/
+const UWORD8 gau1_ih264d_intra_pred_err_code[9] =
+ { 2, 1, 0, 2, 3, 3, 3, 2, 1 };
+
+/* Number of users for top field , bottom field, which field needs to be */
+/* displayed first */
+const UWORD8 gau1_ih264d_sei_fld_usage[9][3] =
+ {
+ { 1, 1, DISP_FLD_FIRST_UNDEF },
+ { 1, 0, DISP_TOP_FLD_FIRST },
+ { 0, 1, DISP_BOT_FLD_FIRST },
+ { 1, 1, DISP_TOP_FLD_FIRST },
+ { 1, 1, DISP_BOT_FLD_FIRST },
+ { 2, 1, DISP_TOP_FLD_FIRST },
+ { 1, 2, DISP_BOT_FLD_FIRST },
+ { 2, 2, DISP_FLD_FIRST_UNDEF },
+ { 3, 3, DISP_FLD_FIRST_UNDEF } };
+
+/*****************************************************************/
+/* Context increment for significant coefficient(CABAC) */
+/* Requires only 63 elements. But the last element with value -1 */
+/* is kept to make it 64 */
+/*****************************************************************/
+const UWORD8 gau1_ih264d_sigcoeff_context_inc_frame[64] =
+ { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5, 4, 4, 4, 4, 3, 3, 6, 7, 7,
+ 7, 8, 9, 10, 9, 8, 7, 7, 6, 11, 12, 13, 11, 6, 7, 8, 9, 14, 10, 9, 8, 6,
+ 11, 12, 13, 11, 6, 9, 14, 10, 9, 11, 12, 13, 11, 14, 10, 12, -1 };
+
+const UWORD8 gau1_ih264d_sigcoeff_context_inc_field[64] =
+ { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5, 6, 9, 10, 10, 8, 11, 12,
+ 11, 9, 9, 10, 10, 8, 11, 12, 11, 9, 9, 10, 10, 8, 11, 12, 11, 9, 9, 10,
+ 10, 8, 13, 13, 9, 9, 10, 10, 8, 13, 13, 9, 9, 10, 10, 14, 14, 14, 14, 14,
+ -1 };
+
+const UWORD8 gau1_ih264d_lastcoeff_context_inc[64] =
+ { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5,
+ 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, -1 };
+
+/*!
+ **************************************************************************
+ * \brief gau1_ih264d_top_left_mb_part_indx_mod
+ *
+ * SubBlk number of the top left subBlk in each of the MB partition
+ * (16x16, 16x8, 8x16, 8x8)
+ **************************************************************************
+ */
+const UWORD8 gau1_ih264d_top_left_mb_part_indx_mod[] =
+ { 0, 0 /* Junk */, /* 16x16 */
+ 0, 8, /* 16x8 */
+ 0, 2, /* 8x16 */
+ 0, 2, 8, 10 /* 8x8 */
+ };
+
+/*!
+ **************************************************************************
+ * \brief gau1_ih264d_submb_indx_mod_sp_drct
+ *
+ * Contains increments to the subBlk num in a given subMb partition.
+ **************************************************************************
+ */
+const UWORD8 gau1_ih264d_submb_indx_mod_sp_drct[] =
+ { 0, 0 /* Junk */, /* 8x8 */
+ 0, 4, /* 8x4 */
+ 0, 1, /* 4x8 */
+ 0, 1, 3, 1 /* 4x4 */
+ };
diff --git a/decoder/ih264d_tables.h b/decoder/ih264d_tables.h
new file mode 100755
index 0000000..04dfbd0
--- /dev/null
+++ b/decoder/ih264d_tables.h
@@ -0,0 +1,157 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _IH264D_TABLES_H_
+#define _IH264D_TABLES_H_
+
+/**
+ **************************************************************************
+ * \file ih264d_tables.h
+ *
+ * \brief
+ * Declaration of all tables used by h264 decoder
+ *
+ * \date
+ * 17/09/2004
+ *
+ * \author MA
+ **************************************************************************
+ */
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_cabac.h"
+
+/*Deblocking Table declaration*/
+extern const UWORD8 gau1_ih264d_qp_scale_cr[];
+extern const UWORD8 gau1_ih264d_alpha_table[];
+extern const UWORD8 gau1_ih264d_clip_table_deblock[];
+extern const UWORD8 gau1_ih264d_beta_table[];
+extern const UWORD8 gau1_ih264d_clip_table[][4];
+
+/*Parsing Table declaration*/
+extern const UWORD8 gau1_ih264d_cbp_tab[6];
+extern const UWORD32 gau4_ih264d_packed_bs2[16];
+extern const UWORD16 gau2_ih264d_4x4_v2h_reorder[16];
+extern const UWORD8 gau1_ih264d_subblk_offset[16];
+extern const UWORD8 gau1_ih264d_cbp_table[48][2];
+
+/*Decode Slice Table declaration*/
+extern const UWORD8 gau1_ih264d_inv_scan[16];
+extern const UWORD8 gau1_ih264d_inv_scan_fld[16];
+extern const UWORD8 gau1_ih264d_dequant_matrix[6][16];
+extern const UWORD16 gau2_ih264_iquant_scale_4x4[6][16];
+extern const UWORD8 gau1_ih264d_dequant8x8_zigzag_cavlc[4][6][16];
+extern const UWORD16 gau1_ih264d_dequant8x8_cavlc[6][64];
+
+extern const UWORD8 gau1_ih264d_inv_scan_prog8x8_cavlc[4][16];
+extern const UWORD8 gau1_ih264d_inv_scan_int8x8_cavlc[4][16];
+extern const UWORD8 gau1_ih264d_inv_scan_prog8x8_cabac[64];
+extern const UWORD8 gau1_ih264d_inv_scan_int8x8_cabac[64];
+
+extern const UWORD8 gau1_ih264d_lastcoeff_context_inc[64];
+extern const UWORD8 gau1_ih264d_sigcoeff_context_inc_frame[64];
+extern const UWORD8 gau1_ih264d_sigcoeff_context_inc_field[64];
+
+/* scaling related table declaration */
+extern const WORD16 gai2_ih264d_default_intra4x4[16];
+extern const WORD16 gai2_ih264d_default_inter4x4[16];
+extern const WORD16 gai2_ih264d_default_intra8x8[64];
+extern const WORD16 gai2_ih264d_default_inter8x8[64];
+extern const WORD16 gai2_ih264d_flat_4x4[16];
+extern const WORD16 gai2_ih264d_flat_8x8[64];
+
+/*Decode MV Table declaration*/
+extern const WORD8 gau1_ih264d_mv_pred_condition[];
+
+/** Number of subMbs for the 8x8 prediction mode */
+extern const UWORD8 gau1_ih264d_num_submb_part[];
+
+/** Width of the 8x8 prediction mode in terms of subMbs */
+extern const UWORD8 gau1_ih264d_submb_partw[];
+
+/** Height of the 8x8 prediction mode in terms of subMbs */
+extern const UWORD8 gau1_ih264d_submb_parth[];
+
+/** Number of MB partitions for the MB prediction mode */
+extern const UWORD8 gau1_ih264d_num_mb_part[];
+
+/** Width of the MB partition in terms of subMbs */
+extern const UWORD8 gau1_ih264d_mb_partw[];
+
+/** Height of the MB partition in terms of subMbs */
+extern const UWORD8 gau1_ih264d_mb_parth[];
+
+/** MB partition information is packed into a UWORD32 {0,number,width,height} */
+extern const UWORD32 gau4_ih264d_submb_part[];
+
+extern const UWORD8 gau1_ih264d_submb_indx_mod[];
+
+/** This table is used to assign CBPs to Inter MBs. */
+extern const UWORD8 gau1_ih264d_cbp_inter[];
+
+/** Motion comp modes for P followed by B,
+ 0 to 4 : P Mbs
+ 5 to 27 : B Mbs
+ 28 to 30 : DIRECT */
+extern const UWORD8 gau1_ih264d_mb_mc_mode[];
+
+extern const UWORD8 gau1_ih264d_submb_mc_mode[];
+
+/** Sub MB pred modes for B slice */
+extern const UWORD8 gau1_ih264d_submb_pred_modes[];
+
+/** MB pred modes for P and B slice */
+extern const WORD8 gau1_ih264d_mb_pred_modes[2][32];
+
+/*Decode CAVLC Table declaration*/
+extern const UWORD8 gau1_ih264d_table_total_zero_2to10[9][64];
+extern const UWORD8 gau1_ih264d_table_total_zero_11to15[5][16];
+extern const UWORD8 gau1_ih264d_table_run_before[64];
+extern const UWORD16 gau2_ih264d_code_gx[304];
+extern const UWORD8 gau1_ih264d_cav_chromdc_vld[256];
+extern const UWORD16 gau2_ih264d_offset_num_vlc_tab[9];
+extern const UWORD8 gau1_ih264d_total_coeff_fn_ptr_offset[16];
+extern const WORD16 gai2_ih264d_trailing_one_level[14][3];
+
+/*Decode CABAC Table declaration*/
+extern const UWORD32 gau4_ih264d_cabac_table[];
+
+/****************************************************************************/
+/* For error detection in intra pred4x4 modes */
+/****************************************************************************/
+extern const UWORD8 gau1_ih264d_intra_pred_err_code[9];
+
+/*****************************************************************************/
+/* Cabac tables for context initialization depending upon type of Slice, */
+/* cabac init Idc value and Qp. */
+/*****************************************************************************/
+extern const UWORD8 gau1_ih264d_cabac_ctxt_init_table[NUM_CAB_INIT_IDC_PLUS_ONE][QP_RANGE][NUM_CABAC_CTXTS];
+
+/*****************************************************************************/
+/* SEI tables for field usge and which field first */
+/*****************************************************************************/
+extern const UWORD8 gau1_ih264d_sei_fld_usage[9][3];
+
+
+extern const UWORD8 gau1_ih264d_top_left_mb_part_indx_mod[];
+extern const UWORD8 gau1_ih264d_submb_indx_mod_sp_drct[];
+
+#endif /*TABLES_H*/
diff --git a/decoder/ih264d_thread_compute_bs.c b/decoder/ih264d_thread_compute_bs.c
new file mode 100755
index 0000000..6812d57
--- /dev/null
+++ b/decoder/ih264d_thread_compute_bs.c
@@ -0,0 +1,802 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+ **************************************************************************
+ * \file ih264d_thread_compute_bs.c
+ *
+ * \brief
+ * Contains routines that for multi-thread decoder
+ *
+ * Detailed_description
+ *
+ * \date
+ * 20/02/2012
+ *
+ * \author ZR
+ **************************************************************************
+ */
+#include "ih264d_error_handler.h"
+#include "ih264d_debug.h"
+#include <string.h>
+#include "ih264d_defs.h"
+#include "ih264d_debug.h"
+#include "ih264d_tables.h"
+#include "ih264d_structs.h"
+#include "ih264d_defs.h"
+#include "ih264d_mb_utils.h"
+
+#include "ih264d_thread_compute_bs.h"
+#include "ithread.h"
+#include "ih264d_deblocking.h"
+#include "ih264d_mb_utils.h"
+#include "ih264d_tables.h"
+#include "ih264d_format_conv.h"
+#include "ih264d_defs.h"
+UWORD16 ih264d_update_csbp_8x8(UWORD16 u2_luma_csbp);
+void ih264d_fill_bs2_horz_vert(UWORD32 *pu4_bs, /* Base pointer of BS table */
+ WORD32 u4_left_mb_csbp, /* csbp of left mb */
+ WORD32 u4_top_mb_csbp, /* csbp of top mb */
+ WORD32 u4_cur_mb_csbp, /* csbp of current mb */
+ const UWORD32 *pu4_packed_bs2, const UWORD16 *pu2_4x4_v2h_reorder);
+
+#define BS_MB_GROUP 4
+#define DEBLK_MB_GROUP 1
+#define FORMAT_CONV_MB_GROUP 4
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_compute_bs_non_mbaff_thread */
+/* */
+/* Description : This function computes the pointers of left,top & current*/
+/* : Nnz, MvPred & deblk_mb_t and supplies to FillBs function for*/
+/* : Boundary Strength Calculation .this function is used */
+/* : BS being calculated in separate thread */
+/* Inputs : pointer to decoder context,cur_mb_info,u4_mb_num */
+/* Processing : */
+/* */
+/* Outputs : Produces the Boundary Strength for Current Mb */
+/* Returns : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* ITTIAM */
+/*****************************************************************************/
+
+void ih264d_compute_bs_non_mbaff_thread(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD32 u4_mb_num)
+{
+ /* Mvpred and Nnz for top and Courrent */
+ mv_pred_t *ps_cur_mv_pred, *ps_top_mv_pred = NULL, *ps_left_mv_pred;
+ /* deblk_mb_t Params */
+ deblk_mb_t *ps_cur_mb_params; /*< Parameters of current MacroBlock */
+ deblkmb_neighbour_t *ps_deblk_top_mb;
+
+ /* Reference Index to POC mapping*/
+ void ** apv_map_ref_idx_to_poc;
+ UWORD32 u4_leftmbtype;
+
+ UWORD16 u2_left_csbp, u2_top_csbp, u2_cur_csbp;
+
+ /* Set of flags */
+ UWORD32 u4_cur_mb_intra, u1_top_mb_typ, u4_cur_mb_fld;
+ UWORD32 u1_cur_mb_type;
+ UWORD32 * pu4_bs_table;
+
+ /* Neighbour availability */
+ /* Initialization */
+ const UWORD32 u2_mbx = ps_cur_mb_info->u2_mbx;
+ const UWORD32 u2_mby = ps_cur_mb_info->u2_mby;
+ const UWORD32 u1_pingpong = u2_mbx & 0x01;
+ ps_deblk_top_mb = ps_dec->ps_deblk_top_mb + u2_mbx;
+
+ /* Pointer assignment for Current DeblkMB, Current Mv Pred */
+ ps_cur_mb_params = ps_dec->ps_deblk_pic + u4_mb_num;
+ ps_cur_mv_pred = ps_dec->s_cur_pic.ps_mv + (u4_mb_num << 4);
+
+ apv_map_ref_idx_to_poc =
+ (void **)ps_dec->ps_computebs_cur_slice->ppv_map_ref_idx_to_poc
+ + 1;
+ u1_cur_mb_type = ps_cur_mb_params->u1_mb_type;
+ u1_top_mb_typ = ps_deblk_top_mb->u1_mb_type;
+ ps_deblk_top_mb->u1_mb_type = u1_cur_mb_type;
+
+ {
+ ps_cur_mb_params->u1_topmb_qp = ps_deblk_top_mb->u1_mb_qp;
+ ps_deblk_top_mb->u1_mb_qp = ps_cur_mb_params->u1_mb_qp;
+
+ ps_cur_mb_params->u1_left_mb_qp = ps_dec->deblk_left_mb[1].u1_mb_qp;
+ ps_dec->deblk_left_mb[1].u1_mb_qp = ps_cur_mb_params->u1_mb_qp;
+
+ }
+
+ /* if no deblocking required for current Mb then continue */
+ /* Check next Mbs in Mb group */
+ if(ps_cur_mb_params->u1_deblocking_mode & MB_DISABLE_FILTERING)
+ {
+ void ** pu4_map_ref_idx_to_poc_l1 = apv_map_ref_idx_to_poc +
+ POC_LIST_L0_TO_L1_DIFF;
+ {
+ /* Store Parameter for Top MvPred refernce frame Address */
+
+ void ** ppv_top_mv_pred_addr = ps_cur_mb_info->ps_curmb->u4_pic_addrress;
+ WORD8 * p1_refTop0 = (ps_cur_mv_pred + 12)->i1_ref_frame;
+ WORD8 * p1_refTop1 = (ps_cur_mv_pred + 14)->i1_ref_frame;
+
+ /* Store Left addresses for Next Mb */
+ void ** ppv_left_mv_pred_addr =
+ ps_dec->ps_left_mvpred_addr[!u1_pingpong][1].u4_add;
+ WORD8 * p1_refleft0 = (ps_cur_mv_pred + 3)->i1_ref_frame;
+
+
+ ppv_top_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refTop0[0]];
+ ppv_top_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refTop0[1]];
+
+ ppv_left_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
+ ppv_top_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
+ ppv_left_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
+ ppv_top_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
+
+ ppv_left_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refleft0[0]];
+ ppv_left_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refleft0[1]];
+ //}
+ /* Storing the leftMbtype for next Mb */
+ ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type;
+ }
+
+ return;
+ }
+
+ /* Flag for extra left Edge */
+ ps_cur_mb_params->u1_single_call = 1;
+
+ /* Update the Left deblk_mb_t and Left MvPred Parameters */
+ if(!u2_mbx)
+ {
+ u4_leftmbtype = 0;
+
+ /* Initialize the ps_left_mv_pred with Junk but Valid Location */
+ /* to avoid invalid memory access */
+ /* this is read only pointer */
+ ps_left_mv_pred = ps_cur_mv_pred + 3;
+ }
+ else
+ {
+ u4_leftmbtype = ps_dec->deblk_left_mb[1].u1_mb_type;
+
+ /* Come to Left Most Edge of the MB */
+ ps_left_mv_pred = ps_cur_mv_pred - (1 << 4) + 3;
+ }
+
+ if(!u2_mby)
+ u1_top_mb_typ = 0;
+
+ /* MvPred Pointer Calculation */
+ /* CHANGED CODE */
+ ps_top_mv_pred = ps_cur_mv_pred - (ps_dec->u2_frm_wd_in_mbs << 4) + 12;
+
+ u4_cur_mb_intra = u1_cur_mb_type & D_INTRA_MB;
+ u4_cur_mb_fld = !!(u1_cur_mb_type & D_FLD_MB);
+ /* Compute BS function */
+ pu4_bs_table = ps_cur_mb_params->u4_bs_table;
+
+ u2_cur_csbp = ps_cur_mb_info->ps_curmb->u2_luma_csbp;
+ u2_left_csbp = ps_cur_mb_info->ps_left_mb->u2_luma_csbp;
+ u2_top_csbp = ps_cur_mb_info->ps_top_mb->u2_luma_csbp;
+
+ /* Compute BS function */
+ if(ps_dec->ps_cur_sps->u1_profile_idc == HIGH_PROFILE_IDC)
+ {
+ if(ps_cur_mb_info->u1_tran_form8x8 == 1)
+ {
+ u2_cur_csbp = ih264d_update_csbp_8x8(
+ ps_cur_mb_info->ps_curmb->u2_luma_csbp);
+ }
+
+ if(ps_cur_mb_info->ps_left_mb->u1_tran_form8x8 == 1)
+ {
+ u2_left_csbp = ih264d_update_csbp_8x8(
+ ps_cur_mb_info->ps_left_mb->u2_luma_csbp);
+ }
+
+ if(ps_cur_mb_info->ps_top_mb->u1_tran_form8x8 == 1)
+ {
+ u2_top_csbp = ih264d_update_csbp_8x8(
+ ps_cur_mb_info->ps_top_mb->u2_luma_csbp);
+ }
+ }
+ if(u4_cur_mb_intra)
+ {
+
+ pu4_bs_table[4] = 0x04040404;
+ pu4_bs_table[0] = u4_cur_mb_fld ? 0x03030303 : 0x04040404;
+ pu4_bs_table[1] = 0x03030303;
+ pu4_bs_table[2] = 0x03030303;
+ pu4_bs_table[3] = 0x03030303;
+ pu4_bs_table[5] = 0x03030303;
+ pu4_bs_table[6] = 0x03030303;
+ pu4_bs_table[7] = 0x03030303;
+ }
+ else
+ {
+ UWORD32 u4_is_non16x16 = !!(u1_cur_mb_type & D_PRED_NON_16x16);
+ UWORD32 u4_is_b =
+ (ps_dec->ps_computebs_cur_slice->slice_type == B_SLICE);
+
+
+
+
+
+
+ ih264d_fill_bs2_horz_vert(pu4_bs_table, u2_left_csbp, u2_top_csbp,
+ u2_cur_csbp, gau4_ih264d_packed_bs2,
+ gau2_ih264d_4x4_v2h_reorder);
+
+ if(u4_leftmbtype & D_INTRA_MB)
+ pu4_bs_table[4] = 0x04040404;
+
+ if(u1_top_mb_typ & D_INTRA_MB)
+ pu4_bs_table[0] = u4_cur_mb_fld ? 0x03030303 : 0x04040404;
+
+ ps_dec->pf_fill_bs1[u4_is_b][u4_is_non16x16](
+ ps_cur_mv_pred, ps_top_mv_pred, apv_map_ref_idx_to_poc,
+ pu4_bs_table, ps_left_mv_pred,
+ &(ps_dec->ps_left_mvpred_addr[u1_pingpong][1]),
+ ps_cur_mb_info->ps_top_mb->u4_pic_addrress,
+ (4 >> u4_cur_mb_fld));
+ }
+
+ {
+ void ** pu4_map_ref_idx_to_poc_l1 = apv_map_ref_idx_to_poc +
+ POC_LIST_L0_TO_L1_DIFF;
+ {
+ /* Store Parameter for Top MvPred refernce frame Address */
+
+ void ** ppv_top_mv_pred_addr = ps_cur_mb_info->ps_curmb->u4_pic_addrress;
+ WORD8 * p1_refTop0 = (ps_cur_mv_pred + 12)->i1_ref_frame;
+ WORD8 * p1_refTop1 = (ps_cur_mv_pred + 14)->i1_ref_frame;
+
+ /* Store Left addresses for Next Mb */
+ void ** ppv_left_mv_pred_addr =
+ ps_dec->ps_left_mvpred_addr[!u1_pingpong][1].u4_add;
+ WORD8 * p1_refleft0 = (ps_cur_mv_pred + 3)->i1_ref_frame;
+
+ ppv_top_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refTop0[0]];
+ ppv_top_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refTop0[1]];
+
+ ppv_left_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
+ ppv_top_mv_pred_addr[2] = apv_map_ref_idx_to_poc[p1_refTop1[0]];
+ ppv_left_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
+ ppv_top_mv_pred_addr[3] = pu4_map_ref_idx_to_poc_l1[p1_refTop1[1]];
+
+ ppv_left_mv_pred_addr[0] = apv_map_ref_idx_to_poc[p1_refleft0[0]];
+ ppv_left_mv_pred_addr[1] = pu4_map_ref_idx_to_poc_l1[p1_refleft0[1]];
+
+ /* Storing the leftMbtype for next Mb */
+ ps_dec->deblk_left_mb[1].u1_mb_type = ps_cur_mb_params->u1_mb_type;
+
+ }
+ }
+
+ /* For transform 8x8 disable deblocking of the intrernal edges of a 8x8 block */
+ if(ps_cur_mb_info->u1_tran_form8x8)
+ {
+ pu4_bs_table[1] = 0;
+ pu4_bs_table[3] = 0;
+ pu4_bs_table[5] = 0;
+ pu4_bs_table[7] = 0;
+ }
+}
+
+void ih264d_check_mb_map_deblk(dec_struct_t *ps_dec,
+ UWORD32 deblk_mb_grp,
+ tfr_ctxt_t *ps_tfr_cxt)
+{
+ UWORD32 i = 0;
+ UWORD32 u4_mb_num;
+ UWORD32 u4_cur_mb, u4_right_mb;
+ volatile UWORD8 *mb_map = ps_dec->pu1_recon_mb_map;
+ UWORD32 u4_mb_x, u4_mb_y, u4_image_wd_mb;
+ deblk_mb_t *ps_cur_mb = ps_dec->ps_cur_deblk_thrd_mb;
+ deblk_mb_t *ps_top_mb;
+ deblk_mb_t *ps_left_mb;
+ const WORD32 i4_cb_qp_idx_ofst =
+ ps_dec->ps_cur_pps->i1_chroma_qp_index_offset;
+ const WORD32 i4_cr_qp_idx_ofst =
+ ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset;
+
+ UWORD32 u4_wd_y, u4_wd_uv;
+ UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag;
+
+ u4_mb_num = ps_dec->u4_cur_deblk_mb_num;
+ u4_mb_x = ps_dec->u4_deblk_mb_x;
+ u4_mb_y = ps_dec->u4_deblk_mb_y;
+ u4_image_wd_mb = ps_dec->u2_frm_wd_in_mbs;
+ u4_wd_y = ps_dec->u2_frm_wd_y << u1_field_pic_flag;
+ u4_wd_uv = ps_dec->u2_frm_wd_uv << u1_field_pic_flag;
+ ps_cur_mb = ps_dec->ps_cur_deblk_thrd_mb;
+
+ for(i = 0; i < deblk_mb_grp; i++)
+ {
+
+ //while(1)
+ //{
+ CHECK_MB_MAP_BYTE(u4_mb_num, mb_map, u4_cur_mb);
+
+ if(ps_dec->u4_cur_bs_mb_num <= u4_mb_num)
+ u4_cur_mb = 0;
+
+ if(u4_mb_x < (u4_image_wd_mb - 1))
+ {
+ CHECK_MB_MAP_BYTE((u4_mb_num + 1), mb_map, u4_right_mb);
+ }
+ else
+ u4_right_mb = 1;
+
+ if((u4_cur_mb && u4_right_mb) == 0)
+ {
+ break;
+ }
+ else
+ {
+
+ }
+ //}
+
+ u4_mb_num++;
+ {
+ UWORD32 u4_deb_mode, u4_mbs_next;
+ u4_deb_mode = ps_cur_mb->u1_deblocking_mode;
+ if(!(u4_deb_mode & MB_DISABLE_FILTERING))
+ {
+
+ if(u4_mb_x)
+ {
+ ps_left_mb = ps_cur_mb - 1;
+
+ }
+ else
+ {
+ ps_left_mb = NULL;
+
+ }
+ if(u4_mb_y != 0)
+ {
+ ps_top_mb = ps_cur_mb - (u4_image_wd_mb);
+ }
+ else
+ {
+ ps_top_mb = NULL;
+ }
+
+ if(u4_deb_mode & MB_DISABLE_LEFT_EDGE)
+ ps_left_mb = NULL;
+ if(u4_deb_mode & MB_DISABLE_TOP_EDGE)
+ ps_top_mb = NULL;
+
+ ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt,
+ i4_cb_qp_idx_ofst, i4_cr_qp_idx_ofst,
+ ps_cur_mb, u4_wd_y, u4_wd_uv,
+ ps_top_mb, ps_left_mb);
+
+ }
+
+ ps_cur_mb++;
+ u4_mb_x++;
+ u4_mbs_next = u4_image_wd_mb - u4_mb_x;
+
+ ps_tfr_cxt->pu1_mb_y += 16;
+ ps_tfr_cxt->pu1_mb_u += 8 * YUV420SP_FACTOR;
+ ps_tfr_cxt->pu1_mb_v += 8;
+
+ if(!u4_mbs_next)
+ {
+ ps_tfr_cxt->pu1_mb_y += ps_tfr_cxt->u4_y_inc;
+ ps_tfr_cxt->pu1_mb_u += ps_tfr_cxt->u4_uv_inc;
+ ps_tfr_cxt->pu1_mb_v += ps_tfr_cxt->u4_uv_inc;
+ u4_mb_y++;
+ u4_mb_x = 0;
+ }
+ }
+
+ }
+
+ ps_dec->u4_cur_deblk_mb_num = u4_mb_num;
+ ps_dec->u4_deblk_mb_x = u4_mb_x;
+ ps_dec->u4_deblk_mb_y = u4_mb_y;
+ ps_dec->ps_cur_deblk_thrd_mb = ps_cur_mb;
+
+}
+
+void ih264d_check_mb_map_deblk_wait(dec_struct_t *ps_dec,
+ UWORD32 deblk_mb_grp,
+ tfr_ctxt_t *ps_tfr_cxt)
+{
+ UWORD32 i = 0;
+ UWORD32 u4_mb_num;
+ UWORD32 u4_cur_mb, u4_right_mb;
+ volatile UWORD8 *mb_map = ps_dec->pu1_recon_mb_map;
+ UWORD32 u4_mb_x, u4_mb_y, u4_image_wd_mb;
+ deblk_mb_t *ps_cur_mb = ps_dec->ps_cur_deblk_thrd_mb;
+ deblk_mb_t *ps_top_mb;
+ deblk_mb_t *ps_left_mb;
+ const WORD32 i4_cb_qp_idx_ofst =
+ ps_dec->ps_cur_pps->i1_chroma_qp_index_offset;
+ const WORD32 i4_cr_qp_idx_ofst =
+ ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset;
+
+ UWORD32 u4_wd_y, u4_wd_uv;
+ UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag;
+
+ u4_mb_num = ps_dec->u4_cur_deblk_mb_num;
+ u4_mb_x = ps_dec->u4_deblk_mb_x;
+ u4_mb_y = ps_dec->u4_deblk_mb_y;
+ u4_image_wd_mb = ps_dec->u2_frm_wd_in_mbs;
+ u4_wd_y = ps_dec->u2_frm_wd_y << u1_field_pic_flag;
+ u4_wd_uv = ps_dec->u2_frm_wd_uv << u1_field_pic_flag;
+ ps_cur_mb = ps_dec->ps_cur_deblk_thrd_mb;
+
+ for(i = 0; i < deblk_mb_grp; i++)
+ {
+
+ while(1)
+ {
+ CHECK_MB_MAP_BYTE(u4_mb_num, mb_map, u4_cur_mb);
+
+ if(ps_dec->u4_cur_bs_mb_num <= u4_mb_num)
+ u4_cur_mb = 0;
+
+ if(u4_mb_x < (u4_image_wd_mb - 1))
+ {
+ CHECK_MB_MAP_BYTE((u4_mb_num + 1), mb_map, u4_right_mb);
+ }
+ else
+ u4_right_mb = 1;
+
+ if(ps_dec->u2_mb_skip_error)
+ {
+ ps_dec->u2_skip_deblock = 1;
+ break;
+ }
+
+
+ if(ps_dec->u2_skip_deblock == 1)
+ {
+ break;
+ }
+ if((u4_cur_mb && u4_right_mb) == 0)
+ {
+
+ if(ps_dec->u4_output_present
+ && ps_dec->u4_fmt_conv_cur_row
+ < ps_dec->s_disp_frame_info.u4_y_ht)
+ {
+ ps_dec->u4_fmt_conv_num_rows =
+ MIN(ps_dec->u4_fmt_conv_num_rows,
+ (ps_dec->s_disp_frame_info.u4_y_ht
+ - ps_dec->u4_fmt_conv_cur_row));
+ ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op),
+ ps_dec->u4_fmt_conv_cur_row,
+ ps_dec->u4_fmt_conv_num_rows);
+ ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows;
+ }
+ else
+ NOP(32);
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ u4_mb_num++;
+ {
+ UWORD32 u4_deb_mode, u4_mbs_next;
+ u4_deb_mode = ps_cur_mb->u1_deblocking_mode;
+ if(!(u4_deb_mode & MB_DISABLE_FILTERING))
+ {
+
+ if(u4_mb_x)
+ {
+ ps_left_mb = ps_cur_mb - 1;
+
+ }
+ else
+ {
+ ps_left_mb = NULL;
+
+ }
+ if(u4_mb_y != 0)
+ {
+ ps_top_mb = ps_cur_mb - (u4_image_wd_mb);
+ }
+ else
+ {
+ ps_top_mb = NULL;
+ }
+
+ if(u4_deb_mode & MB_DISABLE_LEFT_EDGE)
+ ps_left_mb = NULL;
+ if(u4_deb_mode & MB_DISABLE_TOP_EDGE)
+ ps_top_mb = NULL;
+
+ ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt,
+ i4_cb_qp_idx_ofst, i4_cr_qp_idx_ofst,
+ ps_cur_mb, u4_wd_y, u4_wd_uv,
+ ps_top_mb, ps_left_mb);
+ }
+
+ ps_cur_mb++;
+ u4_mb_x++;
+ u4_mbs_next = u4_image_wd_mb - u4_mb_x;
+
+ ps_tfr_cxt->pu1_mb_y += 16;
+ ps_tfr_cxt->pu1_mb_u += 8 * YUV420SP_FACTOR;
+ ps_tfr_cxt->pu1_mb_v += 8;
+
+ if(!u4_mbs_next)
+ {
+ ps_tfr_cxt->pu1_mb_y += ps_tfr_cxt->u4_y_inc;
+ ps_tfr_cxt->pu1_mb_u += ps_tfr_cxt->u4_uv_inc;
+ ps_tfr_cxt->pu1_mb_v += ps_tfr_cxt->u4_uv_inc;
+ u4_mb_y++;
+ u4_mb_x = 0;
+ }
+ }
+
+ }
+
+ ps_dec->u4_cur_deblk_mb_num = u4_mb_num;
+ ps_dec->u4_deblk_mb_x = u4_mb_x;
+ ps_dec->u4_deblk_mb_y = u4_mb_y;
+ ps_dec->ps_cur_deblk_thrd_mb = ps_cur_mb;
+
+}
+void ih264d_computebs_deblk_slice(dec_struct_t *ps_dec, tfr_ctxt_t *ps_tfr_cxt)
+{
+ dec_mb_info_t *p_cur_mb;
+ UWORD32 u4_max_addr = ps_dec->ps_cur_sps->u2_max_mb_addr;
+ UWORD32 i;
+ UWORD32 u1_mb_aff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
+ UWORD16 u2_slice_num;
+ UWORD32 u4_mb_num;
+
+ ps_dec->u4_cur_slice_bs_done = 0;
+ ps_dec->u4_bs_cur_slice_num_mbs = 0;
+ ps_dec->u4_cur_bs_mb_num =
+ (ps_dec->ps_computebs_cur_slice->u4_first_mb_in_slice)
+ << u1_mb_aff;
+
+ while(ps_dec->u4_cur_slice_bs_done != 1)
+ {
+ UWORD32 bs_mb_grp = BS_MB_GROUP;
+ while(1)
+ {
+
+ UWORD32 u4_cond = 0;
+
+ u4_mb_num = ps_dec->u4_cur_bs_mb_num;
+
+ /*introducing 1 MB delay*/
+ if((u4_mb_num + BS_MB_GROUP) <= u4_max_addr)
+ u4_mb_num = u4_mb_num + BS_MB_GROUP;
+ else
+ {
+ bs_mb_grp = u4_max_addr - u4_mb_num + 1;
+ u4_mb_num = u4_max_addr;
+
+ }
+
+ CHECK_MB_MAP_BYTE(u4_mb_num, ps_dec->pu1_dec_mb_map, u4_cond);
+ if(u4_cond)
+ {
+ break;
+ }
+
+ if(ps_dec->u2_skip_deblock == 0)
+ {
+ ih264d_check_mb_map_deblk(ps_dec, DEBLK_MB_GROUP, ps_tfr_cxt);
+ }
+ }
+
+ GET_SLICE_NUM_MAP(ps_dec->pu2_slice_num_map, ps_dec->u4_cur_bs_mb_num,
+ u2_slice_num);
+
+ if(u2_slice_num != ps_dec->u2_cur_slice_num_bs)
+ {
+ ps_dec->u4_cur_slice_bs_done = 1;
+ }
+
+ /* Compute BS for NMB group*/
+ for(i = 0; i < bs_mb_grp; i++)
+ {
+ GET_SLICE_NUM_MAP(ps_dec->pu2_slice_num_map,
+ ps_dec->u4_cur_bs_mb_num, u2_slice_num);
+
+ if(u2_slice_num != ps_dec->u2_cur_slice_num_bs)
+ {
+ ps_dec->u4_cur_slice_bs_done = 1;
+ }
+
+ if(ps_dec->u4_cur_slice_bs_done == 1)
+ break;
+
+ p_cur_mb = &ps_dec->ps_frm_mb_info[ps_dec->u4_cur_bs_mb_num
+ & PD_MB_BUF_SIZE_MOD];
+
+ DEBUG_THREADS_PRINTF("ps_dec->u4_cur_bs_mb_num = %d\n",ps_dec->u4_cur_bs_mb_num);
+ ih264d_compute_bs_non_mbaff_thread(ps_dec, p_cur_mb,
+ ps_dec->u4_cur_bs_mb_num);
+ ps_dec->u4_cur_bs_mb_num++;
+ ps_dec->u4_bs_cur_slice_num_mbs++;
+
+ }
+
+ if(ps_dec->u4_cur_bs_mb_num > u4_max_addr)
+ {
+ ps_dec->u4_cur_slice_bs_done = 1;
+ }
+
+ /*deblock MB group*/
+ {
+ UWORD32 u4_num_mbs;
+
+ if(ps_dec->u4_cur_bs_mb_num > ps_dec->u4_cur_deblk_mb_num)
+
+ u4_num_mbs = ps_dec->u4_cur_bs_mb_num
+ - ps_dec->u4_cur_deblk_mb_num;
+ else
+ u4_num_mbs = 0;
+
+ if(u4_num_mbs >= DEBLK_MB_GROUP)
+ u4_num_mbs = DEBLK_MB_GROUP;
+ if(ps_dec->u2_skip_deblock == 0)
+ {
+ ih264d_check_mb_map_deblk_wait(ps_dec, u4_num_mbs, ps_tfr_cxt);
+ }
+ }
+
+ }
+}
+
+void ih264d_computebs_deblk_thread(dec_struct_t *ps_dec)
+{
+ tfr_ctxt_t s_tfr_ctxt;
+ tfr_ctxt_t *ps_tfr_cxt = &s_tfr_ctxt; // = &ps_dec->s_tran_addrecon;
+ pad_mgr_t *ps_pad_mgr = &ps_dec->s_pad_mgr;
+
+ UWORD32 yield_cnt = 0;
+
+ ithread_set_name("ih264d_computebs_deblk_thread");
+
+
+ // run the loop till all slices are decoded
+
+ // 0: un-identified state, 1 - bs needed, 2 - bs not needed
+ while(1)
+ {
+ if(ps_dec->u4_start_bs_deblk == 0)
+ {
+ NOP(128);
+ NOP(128);
+ NOP(128);
+ NOP(128);
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ if(ps_dec->u4_start_bs_deblk == 1)
+ {
+ ps_dec->u4_cur_deblk_mb_num = 0;
+ ps_dec->u4_deblk_mb_x = 0;
+ ps_dec->u4_deblk_mb_y = 0;
+
+ ih264d_init_deblk_tfr_ctxt(ps_dec, ps_pad_mgr, ps_tfr_cxt,
+ ps_dec->u2_frm_wd_in_mbs, 0);
+
+ ps_tfr_cxt->pu1_mb_y = ps_tfr_cxt->pu1_src_y + 4;
+ ps_tfr_cxt->pu1_mb_u = ps_tfr_cxt->pu1_src_u + 4;
+ ps_tfr_cxt->pu1_mb_v = ps_tfr_cxt->pu1_src_v + 4;
+
+ ps_dec->ps_cur_deblk_thrd_mb = ps_dec->ps_deblk_pic;
+
+ while(1)
+ {
+ /*Complete all writes before processing next slice*/
+ DATA_SYNC();
+ /*wait untill all the slice params have been populated*/
+ while(ps_dec->ps_computebs_cur_slice->slice_header_done == 0)
+ {
+ NOP(32); DEBUG_THREADS_PRINTF(" waiting for slice header at compute bs\n");
+ }
+
+ DEBUG_THREADS_PRINTF(" Entering compute bs slice\n");
+ ih264d_computebs_deblk_slice(ps_dec, ps_tfr_cxt);
+
+ DEBUG_THREADS_PRINTF(" Exit compute bs slice \n");
+
+ /*Complete all writes before processing next slice*/
+ DATA_SYNC();
+
+ while(1)
+ {
+ volatile void * parse_addr, *computebs_addr;
+ volatile UWORD32 last_slice;
+
+ parse_addr = (volatile void *)ps_dec->ps_parse_cur_slice;
+ computebs_addr =
+ (volatile void *)ps_dec->ps_computebs_cur_slice;
+ last_slice =
+ ps_dec->ps_computebs_cur_slice->last_slice_in_frame;
+
+ if(last_slice == 1)
+ break;
+
+ if(parse_addr != computebs_addr)
+ break;
+
+ DEBUG_THREADS_PRINTF("Waiting at compute bs for next slice or end of frame\n");
+
+ NOP(32);
+
+ }
+
+ DEBUG_THREADS_PRINTF("CBS thread:Got next slice/end of frame signal \n ");
+
+ if((void *)ps_dec->ps_parse_cur_slice
+ > (void *)ps_dec->ps_computebs_cur_slice)
+ {
+ ps_dec->ps_computebs_cur_slice++;
+ ps_dec->u2_cur_slice_num_bs++;
+ }
+ else
+ {
+ /*Last slice in frame*/
+ break;
+ }
+
+ }
+
+ /*deblock remaining MBs*/
+ {
+ UWORD32 u4_num_mbs;
+
+ u4_num_mbs = ps_dec->ps_cur_sps->u2_max_mb_addr
+ - ps_dec->u4_cur_deblk_mb_num + 1;
+
+ DEBUG_PERF_PRINTF("mbs left for deblocking= %d \n",u4_num_mbs);
+
+ if(u4_num_mbs != 0)
+ if(ps_dec->u2_skip_deblock == 0)
+ ih264d_check_mb_map_deblk_wait(ps_dec, u4_num_mbs,
+ ps_tfr_cxt);
+ }
+ }
+
+ ps_dec->u4_start_bs_deblk = 0;
+ ithread_exit(0);
+}
+
+
diff --git a/decoder/ih264d_thread_compute_bs.h b/decoder/ih264d_thread_compute_bs.h
new file mode 100755
index 0000000..1bef07f
--- /dev/null
+++ b/decoder/ih264d_thread_compute_bs.h
@@ -0,0 +1,34 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*
+ * ih264d_thread_parse_decode.h
+ *
+ * Created on: Feb 21, 2012
+ * Author: 100492
+ */
+
+#ifndef _IH264D_THREAD_COMPUTE_BS_H_
+#define _IH264D_THREAD_COMPUTE_BS_H_
+void ih264d_compute_bs_non_mbaff_thread(dec_struct_t * ps_dec,
+ dec_mb_info_t * ps_cur_mb_info,
+ UWORD32 u4_mb_num);
+
+void ih264d_computebs_deblk_thread(dec_struct_t *ps_dec);
+#endif /* _IH264D_THREAD_COMPUTE_BS_H_ */
diff --git a/decoder/ih264d_thread_parse_decode.c b/decoder/ih264d_thread_parse_decode.c
new file mode 100755
index 0000000..be3cb01
--- /dev/null
+++ b/decoder/ih264d_thread_parse_decode.c
@@ -0,0 +1,732 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+ **************************************************************************
+ * \file ih264d_thread_parse_decode.c
+ *
+ * \brief
+ * Contains routines that for multi-thread decoder
+ *
+ * Detailed_description
+ *
+ * \date
+ * 20/02/2012
+ *
+ * \author ZR
+ **************************************************************************
+ */
+
+#include "ih264d_error_handler.h"
+#include "ih264d_debug.h"
+#include "ithread.h"
+#include <string.h>
+#include "ih264d_defs.h"
+#include "ih264d_debug.h"
+#include "ih264d_tables.h"
+#include "ih264d_structs.h"
+#include "ih264d_defs.h"
+#include "ih264d_mb_utils.h"
+#include "ih264d_thread_parse_decode.h"
+#include "ih264d_inter_pred.h"
+
+#include "ih264d_process_pslice.h"
+#include "ih264d_process_intra_mb.h"
+#include "ih264d_deblocking.h"
+#include "ih264d_format_conv.h"
+
+void ih264d_deblock_mb_level(dec_struct_t *ps_dec,
+ dec_mb_info_t *ps_cur_mb_info,
+ UWORD32 nmb_index);
+
+void ih264d_copy_intra_pred_line(dec_struct_t *ps_dec,
+ dec_mb_info_t *ps_cur_mb_info,
+ UWORD32 nmb_index);
+
+void ih264d_parse_tfr_nmb(dec_struct_t * ps_dec,
+ UWORD8 u1_mb_idx,
+ UWORD8 u1_num_mbs,
+ UWORD8 u1_num_mbs_next,
+ UWORD8 u1_tfr_n_mb,
+ UWORD8 u1_end_of_row)
+{
+ WORD32 i, u4_mb_num;
+
+ const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
+ UWORD32 u4_n_mb_start;
+
+ UNUSED(u1_mb_idx);
+ UNUSED(u1_num_mbs_next);
+ if(u1_tfr_n_mb)
+ {
+
+
+ u4_n_mb_start = (ps_dec->u2_cur_mb_addr + 1) - u1_num_mbs;
+
+ // copy into s_frmMbInfo
+
+ u4_mb_num = u4_n_mb_start;
+ ps_dec->ps_parse_cur_slice->u4_num_mbs_done_in_slice += u1_num_mbs;
+ u4_mb_num = (ps_dec->u2_cur_mb_addr + 1) - u1_num_mbs;
+
+ for(i = 0; i < u1_num_mbs; i++)
+ {
+ DATA_SYNC();
+ UPDATE_SLICE_NUM_MAP(ps_dec->pu2_slice_num_map, u4_mb_num,
+ ps_dec->u2_cur_slice_num);
+ UPDATE_MB_MAP_MBNUM_BYTE(ps_dec->pu1_dec_mb_map, u4_mb_num);
+
+ u4_mb_num++;
+ }
+
+ DATA_SYNC();
+ /****************************************************************/
+ /* Check for End Of Row in Next iteration */
+ /****************************************************************/
+
+ /****************************************************************/
+ /* Transfer the Following things */
+ /* N-Mb DeblkParams Data ( To Ext DeblkParams Buffer ) */
+ /* N-Mb Recon Data ( To Ext Frame Buffer ) */
+ /* N-Mb Intrapredline Data ( Updated Internally) */
+ /* N-Mb MV Data ( To Ext MV Buffer ) */
+ /* N-Mb MVTop/TopRight Data ( To Int MV Top Scratch Buffers) */
+ /****************************************************************/
+
+ /* Swap top and current pointers */
+
+ ps_dec->s_tran_addrecon_parse.pu1_dest_y +=
+ ps_dec->s_tran_addrecon_parse.u4_inc_y[u1_end_of_row];
+ ps_dec->s_tran_addrecon_parse.pu1_dest_u +=
+ ps_dec->s_tran_addrecon_parse.u4_inc_uv[u1_end_of_row];
+ ps_dec->s_tran_addrecon_parse.pu1_dest_v +=
+ ps_dec->s_tran_addrecon_parse.u4_inc_uv[u1_end_of_row];
+
+ if(u1_end_of_row)
+ {
+ UWORD16 u2_mb_y;
+ UWORD32 u4_frame_stride, y_offset;
+
+ ps_dec->ps_top_mb_row = ps_dec->ps_cur_mb_row;
+ ps_dec->ps_cur_mb_row += ((ps_dec->u2_frm_wd_in_mbs) << u1_mbaff);
+
+ u2_mb_y = ps_dec->u2_mby + (1 + u1_mbaff);
+ u4_frame_stride = ps_dec->u2_frm_wd_y
+ << ps_dec->ps_cur_slice->u1_field_pic_flag;
+ y_offset = (u2_mb_y * u4_frame_stride) << 4;
+ ps_dec->s_tran_addrecon_parse.pu1_dest_y =
+ ps_dec->s_cur_pic.pu1_buf1 + y_offset;
+
+ u4_frame_stride = ps_dec->u2_frm_wd_uv
+ << ps_dec->ps_cur_slice->u1_field_pic_flag;
+ y_offset = (u2_mb_y * u4_frame_stride) << 3;
+ ps_dec->s_tran_addrecon_parse.pu1_dest_u =
+ ps_dec->s_cur_pic.pu1_buf2 + y_offset;
+ ps_dec->s_tran_addrecon_parse.pu1_dest_v =
+ ps_dec->s_cur_pic.pu1_buf3 + y_offset;
+
+ }
+
+ ps_dec->ps_deblk_mbn += u1_num_mbs;
+
+ /*
+ * The Slice boundary is also a valid condition to transfer. So recalculate
+ * the Left increment, in case the number of MBs is lesser than the
+ * N MB value. c_numMbs will be equal to N of N MB if the entire N Mb is
+ * decoded.
+ */
+ ps_dec->s_tran_addrecon.u2_mv_left_inc = ((u1_num_mbs >> u1_mbaff) - 1)
+ << (4 + u1_mbaff);
+ ps_dec->s_tran_addrecon.u2_mv_top_left_inc = (u1_num_mbs << 2) - 1
+ - (u1_mbaff << 2);
+
+ /* reassign left MV and cur MV pointers */
+ ps_dec->ps_mv_left = ps_dec->ps_mv_cur
+ + ps_dec->s_tran_addrecon.u2_mv_left_inc;
+
+
+
+
+
+ ps_dec->ps_mv_cur += (u1_num_mbs << 4);
+ ps_dec->u4_num_mbs_prev_nmb = u1_num_mbs;
+
+
+ ps_dec->u4_dma_buf_idx = 0;
+
+ }
+}
+
+void ih264d_decode_tfr_nmb(dec_struct_t * ps_dec,
+ UWORD8 u1_num_mbs,
+ UWORD8 u1_num_mbs_next,
+ UWORD8 u1_end_of_row)
+{
+
+ UWORD32 u1_end_of_row_next;
+
+ const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
+
+ /****************************************************************/
+ /* Check for End Of Row in Next iteration */
+ /****************************************************************/
+ u1_end_of_row_next =
+ u1_num_mbs_next
+ && ((u1_num_mbs_next)
+ <= (ps_dec->u1_recon_mb_grp
+ >> u1_mbaff));
+
+ /****************************************************************/
+ /* Transfer the Following things */
+ /* N-Mb DeblkParams Data ( To Ext DeblkParams Buffer ) */
+ /* N-Mb Recon Data ( To Ext Frame Buffer ) */
+ /* N-Mb Intrapredline Data ( Updated Internally) */
+ /* N-Mb MV Data ( To Ext MV Buffer ) */
+ /* N-Mb MVTop/TopRight Data ( To Int MV Top Scratch Buffers) */
+ /****************************************************************/
+ if(u1_end_of_row)
+ {
+ ps_dec->i2_dec_thread_mb_y += (1 << u1_mbaff);
+ }
+ ih264d_transfer_mb_group_data(ps_dec, u1_num_mbs, u1_end_of_row,
+ u1_end_of_row_next);
+
+ if(u1_end_of_row)
+ {
+ /* Reset the N-Mb Recon Buf Index to default Values */
+ ps_dec->u2_mb_group_cols_y1 = ps_dec->u2_mb_group_cols_y;
+ ps_dec->u2_mb_group_cols_cr1 = ps_dec->u2_mb_group_cols_cr;
+ }
+ /* If next N-Mb Group is the EndOfRow, set the N-Mb Recon Buf Index */
+ else if(u1_end_of_row_next)
+ {
+ ps_dec->u2_mb_group_cols_y1 = (u1_num_mbs_next << 4) + 8;
+ ps_dec->u2_mb_group_cols_cr1 = (u1_num_mbs_next << 3) + 8;
+ }
+}
+
+WORD32 ih264d_decode_recon_tfr_nmb_thread(dec_struct_t * ps_dec, UWORD8 u1_num_mbs, // number of MBs loop should run
+ UWORD8 u1_num_mbs_next,
+ UWORD8 u1_end_of_row)
+{
+ WORD32 i,j;
+ dec_mb_info_t * ps_cur_mb_info;
+ UWORD32 u4_update_mbaff = 0;
+ const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
+ UWORD32 u1_slice_type, u1_B;
+ WORD32 u1_skip_th;
+ UWORD32 u1_ipcm_th;
+ UWORD32 u4_cond;
+ UWORD16 u2_slice_num,u2_cur_dec_mb_num;
+ WORD32 ret;
+
+ u1_slice_type = ps_dec->ps_decode_cur_slice->slice_type;
+
+ u1_B = (u1_slice_type == B_SLICE);
+
+ u1_skip_th =
+ ((u1_slice_type != I_SLICE) ?
+ (u1_B ? B_8x8 : PRED_8x8R0) : -1);
+
+ u1_ipcm_th = ((u1_slice_type != I_SLICE) ? (u1_B ? 23 : 5) : 0);
+
+ u2_cur_dec_mb_num = ps_dec->cur_dec_mb_num;
+
+ /* N Mb MC Loop */
+ for(i = 0; i < u1_num_mbs; i++)
+ {
+ DATA_SYNC();
+
+ // check dec_mb_map
+ UWORD32 yield_cnt = 0, u4_max_addr;
+
+ u4_max_addr = ps_dec->ps_cur_sps->u2_max_mb_addr;
+ while(1)
+ {
+ UWORD32 u4_mb_num = u2_cur_dec_mb_num;
+
+ /*introducing 1 MB delay*/
+ if(u4_mb_num < u4_max_addr)
+ u4_mb_num = u4_mb_num + 1;
+
+ CHECK_MB_MAP_BYTE(u4_mb_num, ps_dec->pu1_dec_mb_map, u4_cond);
+ if(u4_cond)
+ {
+ break;
+ }
+ else
+ {
+
+ {
+ NOP(128);
+
+ }
+
+ DEBUG_THREADS_PRINTF("waiting for mb mapcur_dec_mb_num = %d,ps_dec->u2_cur_mb_addr = %d\n",u2_cur_dec_mb_num,
+ ps_dec->u2_cur_mb_addr);
+
+ }
+ }
+
+ GET_SLICE_NUM_MAP(ps_dec->pu2_slice_num_map, u2_cur_dec_mb_num,
+ u2_slice_num);
+
+ if(u2_slice_num != ps_dec->u2_cur_slice_num_dec_thread)
+ {
+ ps_dec->u4_cur_slice_decode_done = 1;
+ break;
+ }
+
+ ps_cur_mb_info = &ps_dec->ps_frm_mb_info[u2_cur_dec_mb_num
+ & PD_MB_BUF_SIZE_MOD];
+
+ ps_dec->u4_dma_buf_idx = 0;
+ ps_dec->u4_pred_info_idx = 0;
+
+ if(ps_cur_mb_info->u1_mb_type <= u1_skip_th)
+ {
+
+ {
+ WORD32 pred_cnt = 0;
+ pred_info_pkd_t *ps_pred_pkd;
+ UWORD32 u4_pred_info_pkd_idx;
+ WORD8 i1_pred;
+
+ u4_pred_info_pkd_idx = ps_cur_mb_info->u4_pred_info_pkd_idx;
+
+ while(pred_cnt < ps_cur_mb_info->u1_num_pred_parts)
+ {
+
+ ps_pred_pkd = ps_dec->ps_pred_pkd + u4_pred_info_pkd_idx;
+
+
+ ps_dec->p_form_mb_part_info_thread(ps_pred_pkd,ps_dec,
+ ps_cur_mb_info->u2_mbx,ps_cur_mb_info->u2_mby,(i >> u1_mbaff),
+ ps_cur_mb_info);
+
+ u4_pred_info_pkd_idx++;
+ pred_cnt++;
+
+ }
+ }
+ ps_dec->p_mc_dec_thread(ps_dec, ps_cur_mb_info);
+ }
+ else if(ps_cur_mb_info->u1_mb_type == MB_SKIP)
+ {
+ {
+ WORD32 pred_cnt = 0;
+ pred_info_pkd_t *ps_pred_pkd;
+ UWORD32 u4_pred_info_pkd_idx;
+ WORD8 i1_pred;
+
+ u4_pred_info_pkd_idx = ps_cur_mb_info->u4_pred_info_pkd_idx;
+
+
+
+ while(pred_cnt < ps_cur_mb_info->u1_num_pred_parts)
+ {
+
+ ps_pred_pkd = ps_dec->ps_pred_pkd + u4_pred_info_pkd_idx;
+
+
+ ps_dec->p_form_mb_part_info_thread(ps_pred_pkd,ps_dec,
+ ps_cur_mb_info->u2_mbx,ps_cur_mb_info->u2_mby,(i >> u1_mbaff),
+ ps_cur_mb_info);
+
+
+ u4_pred_info_pkd_idx++;
+ pred_cnt++;
+ }
+ }
+ /* Decode MB skip */
+ ps_dec->p_mc_dec_thread(ps_dec, ps_cur_mb_info);
+ }
+
+ u2_cur_dec_mb_num++;
+ }
+
+ /* N Mb IQ IT RECON Loop */
+ for(j = 0; j < i; j++)
+ {
+ DATA_SYNC();
+
+
+ ps_cur_mb_info = &ps_dec->ps_frm_mb_info[ps_dec->cur_dec_mb_num
+ & PD_MB_BUF_SIZE_MOD];
+
+
+ if(ps_cur_mb_info->u1_mb_type <= u1_skip_th)
+ {
+ ih264d_process_inter_mb(ps_dec, ps_cur_mb_info, j);
+ }
+ else if(ps_cur_mb_info->u1_mb_type != MB_SKIP)
+ {
+ if((u1_ipcm_th + 25) != ps_cur_mb_info->u1_mb_type)
+ {
+ ps_cur_mb_info->u1_mb_type -= (u1_skip_th + 1);
+ ret = ih264d_process_intra_mb(ps_dec, ps_cur_mb_info, j);
+ if(ret != OK)
+ return ret;
+ }
+ }
+
+ if(ps_dec->u4_mb_level_deblk == 1)
+ {
+
+ ih264d_deblock_mb_level(ps_dec, ps_cur_mb_info, j);
+ }
+
+ if((ps_dec->u4_num_cores >= 3) && (u1_mbaff == 0))
+ ih264d_copy_intra_pred_line(ps_dec, ps_cur_mb_info, j);
+ if(u1_mbaff)
+ {
+ if(u4_update_mbaff)
+ {
+ UWORD32 u4_mb_num = ps_cur_mb_info->u2_mbx
+ + ps_dec->u2_frm_wd_in_mbs
+ * (ps_cur_mb_info->u2_mby >> 1);
+ UPDATE_MB_MAP_MBNUM_BYTE(ps_dec->pu1_recon_mb_map, u4_mb_num);
+ u4_update_mbaff = 0;
+ }
+ else
+ {
+ u4_update_mbaff = 1;
+ }
+ }
+ else
+ {
+ UWORD32 u4_mb_num = ps_cur_mb_info->u2_mbx
+ + ps_dec->u2_frm_wd_in_mbs * ps_cur_mb_info->u2_mby;
+ UPDATE_MB_MAP_MBNUM_BYTE(ps_dec->pu1_recon_mb_map, u4_mb_num);
+ }
+ ps_dec->cur_dec_mb_num++;
+ }
+
+
+ /*handle the last mb in picture case*/
+ if(ps_dec->cur_dec_mb_num > ps_dec->ps_cur_sps->u2_max_mb_addr)
+ ps_dec->u4_cur_slice_decode_done = 1;
+
+ if(i != u1_num_mbs)
+ {
+ u1_end_of_row = 0;
+ /*Number of MB's left in row*/
+ u1_num_mbs_next = u1_num_mbs_next + ((u1_num_mbs - i) >> u1_mbaff);
+ }
+
+ ih264d_decode_tfr_nmb(ps_dec, (i), u1_num_mbs_next, u1_end_of_row);
+
+ return OK;
+}
+
+WORD32 ih264d_decode_slice_thread(dec_struct_t *ps_dec /* Decoder parameters */
+)
+{
+ UWORD8 u1_num_mbs_next, u1_num_mbsleft, u1_end_of_row = 0; //, u1_slice_end, u1_tfr_n_mb, u1_decode_nmb;
+ const UWORD32 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs;
+ UWORD8 u1_mbaff, u1_num_mbs; //,uc_more_data_flag,u1_mb_idx;
+
+ UWORD16 u2_first_mb_in_slice;
+
+ /*dec_bit_stream_t *const ps_bitstrm = ps_dec->ps_bitstrm;
+ UWORD32 * pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;*/
+
+ UWORD16 i16_mb_x, i16_mb_y;
+ UWORD8 u1_field_pic;
+ UWORD32 u4_frame_stride, x_offset, y_offset;
+ WORD32 ret;
+
+ tfr_ctxt_t *ps_trns_addr;
+
+ if(ps_dec->ps_decode_cur_slice->slice_header_done != 2)
+ return ERROR_INV_SLICE_HDR_T;
+
+
+
+ u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag;
+
+ u2_first_mb_in_slice = ps_dec->ps_decode_cur_slice->u4_first_mb_in_slice;
+
+ i16_mb_x = MOD(u2_first_mb_in_slice, i2_pic_wdin_mbs);
+ i16_mb_y = DIV(u2_first_mb_in_slice, i2_pic_wdin_mbs);
+ i16_mb_y <<= u1_mbaff;
+ ps_dec->i2_dec_thread_mb_y = i16_mb_y;
+
+ /*if((i16_mb_x > (i2_pic_wdin_mbs - 1))
+ || (i16_mb_y > ps_dec->u2_frm_ht_in_mbs - 1))
+ {
+ }*/
+ if(ps_dec->cur_dec_mb_num == u2_first_mb_in_slice << u1_mbaff)
+ {
+ ps_dec->u2_mb_skip_error = 0;
+ }
+ else
+ {
+ ps_dec->u2_mb_skip_error = 1;
+ }
+ ps_dec->cur_dec_mb_num = u2_first_mb_in_slice << u1_mbaff;
+
+ // recalculate recon pointers
+ u1_field_pic = ps_dec->ps_cur_slice->u1_field_pic_flag;
+ u4_frame_stride = ps_dec->u2_frm_wd_y << u1_field_pic;
+ x_offset = i16_mb_x << 4;
+ y_offset = (i16_mb_y * u4_frame_stride) << 4;
+
+ ps_trns_addr = &(ps_dec->s_tran_addrecon);
+
+ ps_trns_addr->pu1_dest_y = ps_dec->s_cur_pic.pu1_buf1 + x_offset + y_offset;
+
+ u4_frame_stride = ps_dec->u2_frm_wd_uv << u1_field_pic;
+ x_offset >>= 1;
+ y_offset = (i16_mb_y * u4_frame_stride) << 3;
+
+ x_offset *= YUV420SP_FACTOR;
+
+ ps_trns_addr->pu1_dest_u = ps_dec->s_cur_pic.pu1_buf2 + x_offset + y_offset;
+ ps_trns_addr->pu1_dest_v = ps_dec->s_cur_pic.pu1_buf3 + x_offset + y_offset;
+
+ ps_trns_addr->pu1_mb_y = ps_trns_addr->pu1_dest_y;
+ ps_trns_addr->pu1_mb_u = ps_trns_addr->pu1_dest_u;
+ ps_trns_addr->pu1_mb_v = ps_trns_addr->pu1_dest_v;
+
+ if(ps_dec->u4_mb_level_deblk == 1)
+ {
+ /*If it is not the first mb in row,the previous MB which needs to be deblocked
+ * as there is delay of 1 MB*/
+ if(i16_mb_x != 0)
+ {
+ ps_trns_addr->pu1_mb_y -= MB_SIZE;
+ ps_trns_addr->pu1_mb_u -= BLK8x8SIZE * YUV420SP_FACTOR;
+ ps_trns_addr->pu1_mb_v -= BLK8x8SIZE;
+ }
+ }
+
+ /**********Number of Mbs in Slice**********/
+
+ ps_dec->ps_deblk_mbn_dec_thrd = ps_dec->ps_deblk_pic
+ + (u2_first_mb_in_slice << u1_mbaff);
+
+ /* Initialise MC and formMbPartInfo fn ptrs one time based on profile_idc */
+
+ {
+ ps_dec->p_mc_dec_thread = ih264d_motion_compensate_bp;
+ ps_dec->p_form_mb_part_info_thread = ih264d_form_mb_part_info_bp;
+ }
+ {
+ UWORD8 uc_nofield_nombaff;
+ uc_nofield_nombaff = ((ps_dec->ps_cur_slice->u1_field_pic_flag == 0)
+ && (ps_dec->ps_cur_slice->u1_mbaff_frame_flag == 0)
+ && (ps_dec->ps_decode_cur_slice->slice_type != B_SLICE)
+ && (ps_dec->ps_cur_pps->u1_wted_pred_flag == 0));
+
+ if(uc_nofield_nombaff == 0)
+ {
+ ps_dec->p_mc_dec_thread = ih264d_motion_compensate_mp;
+ ps_dec->p_form_mb_part_info_thread = ih264d_form_mb_part_info_mp;
+ }
+
+ }
+
+ ps_dec->u4_cur_slice_decode_done = 0;
+
+
+ while(ps_dec->u4_cur_slice_decode_done != 1)
+ {
+
+ u1_num_mbsleft = ((i2_pic_wdin_mbs - i16_mb_x) << u1_mbaff);
+
+ if(u1_num_mbsleft <= ps_dec->u1_recon_mb_grp)
+ {
+ u1_num_mbs = u1_num_mbsleft;
+
+ /*Indicate number of mb's left in a row*/
+ u1_num_mbs_next = 0;
+ u1_end_of_row = 1;
+ i16_mb_x = 0;
+ }
+ else
+ {
+ u1_num_mbs = ps_dec->u1_recon_mb_grp;
+
+ /*Indicate number of mb's left in a row*/
+ u1_num_mbs_next = i2_pic_wdin_mbs - i16_mb_x
+ - (ps_dec->u1_recon_mb_grp >> u1_mbaff);
+ i16_mb_x += (u1_num_mbs >> u1_mbaff);
+ u1_end_of_row = 0;
+
+ }
+ ret = ih264d_decode_recon_tfr_nmb_thread(ps_dec, u1_num_mbs, u1_num_mbs_next,
+ u1_end_of_row);
+ if(ret != OK)
+ return ret;
+ }
+ return OK;
+}
+
+void ih264d_decode_picture_thread(dec_struct_t *ps_dec )
+{
+ volatile WORD32 i4_err_status;
+
+
+ ithread_set_name("ih264d_decode_picture_thread");
+
+
+
+ // run the loop till all slices are decoded
+
+ while(1)
+ {
+ if(ps_dec->u4_start_frame_decode)
+ {
+ break;
+ }
+ else
+ {
+ NOP(32);
+
+ }
+ }
+
+ DEBUG_THREADS_PRINTF("Got start of frame u4_flag\n");
+
+ if(ps_dec->u4_start_frame_decode == 1)
+ {
+ while(1)
+ {
+ /*Complete all writes before processing next slice*/
+ DATA_SYNC();
+ /*wait untill all the slice params have been populated*/
+ while(ps_dec->ps_decode_cur_slice->slice_header_done == 0)
+ {
+ NOP(32); DEBUG_THREADS_PRINTF(" waiting for slice header \n");
+ }
+
+ DEBUG_THREADS_PRINTF(" Entering decode slice\n");
+
+ ih264d_decode_slice_thread(ps_dec);
+ DEBUG_THREADS_PRINTF(" Exit ih264d_decode_slice_thread \n");
+
+ /*Complete all writes before processing next slice*/
+ DATA_SYNC();
+
+ while(1)
+ {
+ volatile void * parse_addr, *dec_addr;
+ volatile UWORD32 last_slice;
+
+ parse_addr = (volatile void *)ps_dec->ps_parse_cur_slice;
+ dec_addr = (volatile void *)ps_dec->ps_decode_cur_slice;
+ last_slice = ps_dec->ps_decode_cur_slice->last_slice_in_frame;
+
+ if(last_slice == 1)
+ break;
+
+ if(parse_addr != dec_addr)
+ break;
+
+ DEBUG_THREADS_PRINTF("Waiting for next slice or end of frame\n");
+
+ NOP(32);
+ if(i4_err_status != 0)
+ {
+ /*In the case of error set decode Mb number ,so that the
+ parse thread does not wait because of mb difference being
+ greated the 32*/
+ ps_dec->cur_dec_mb_num = ps_dec->u2_cur_mb_addr - 1;
+ }
+ }
+
+ DEBUG_THREADS_PRINTF("Got next slice/end of frame signal \n ");
+
+ if((void *)ps_dec->ps_parse_cur_slice
+ > (void *)ps_dec->ps_decode_cur_slice)
+ {
+ ps_dec->ps_decode_cur_slice++;
+ ps_dec->u2_cur_slice_num_dec_thread++;
+ }
+ else
+ {
+ /*Last slice in frame*/
+ break;
+ }
+
+ }
+ }
+
+ if(ps_dec->u4_output_present)
+ {
+ while(1)
+ {
+ volatile UWORD32 *u4_flag = &(ps_dec->as_fmt_conv_part[1].u4_flag);
+
+ DEBUG_THREADS_PRINTF(" Format conversion loop in decode *u4_flag = %d\n",*u4_flag);
+ if(2 == *u4_flag)
+ {
+ if(ps_dec->as_fmt_conv_part[1].u4_num_rows_y)
+ ih264d_format_convert(
+ ps_dec, &(ps_dec->s_disp_op),
+ ps_dec->as_fmt_conv_part[1].u4_start_y,
+ ps_dec->as_fmt_conv_part[1].u4_num_rows_y);
+
+ break;
+ }
+ else if(1 == *u4_flag)
+ {
+ NOP(32);
+
+ }
+ else
+ break;
+
+ }
+ }
+
+ ithread_exit(0);
+
+}
+
+void ih264d_signal_decode_thread(dec_struct_t *ps_dec)
+{
+ if(ps_dec->u4_dec_thread_created == 1)
+ {
+
+ if(ps_dec->u4_start_frame_decode == 1)
+ ps_dec->ps_parse_cur_slice->last_slice_in_frame = 1;
+ else
+ /*to indicate frame in error*/
+ ps_dec->u4_start_frame_decode = 2;
+
+ ithread_join(ps_dec->pv_dec_thread_handle, NULL);
+ ps_dec->u4_dec_thread_created = 0;
+ }
+}
+void ih264d_signal_bs_deblk_thread(dec_struct_t *ps_dec)
+{
+ if(ps_dec->u4_bs_deblk_thread_created)
+ {
+ /*signal error*/
+ if(ps_dec->u4_start_bs_deblk == 0)
+ ps_dec->u4_start_bs_deblk = 2;
+
+ ithread_join(ps_dec->pv_bs_deblk_thread_handle, NULL);
+ ps_dec->u4_bs_deblk_thread_created = 0;
+ }
+
+}
diff --git a/decoder/ih264d_thread_parse_decode.h b/decoder/ih264d_thread_parse_decode.h
new file mode 100755
index 0000000..013b14f
--- /dev/null
+++ b/decoder/ih264d_thread_parse_decode.h
@@ -0,0 +1,48 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*
+ * ih264d_thread_parse_decode.h
+ *
+ * Created on: Feb 21, 2012
+ * Author: 100492
+ */
+
+#ifndef _IH264D_THREAD_PARSE_DECPDE_H_
+#define _IH264D_THREAD_PARSE_DECPDE_H_
+void ih264d_parse_tfr_nmb(dec_struct_t *ps_dec,
+ UWORD8 u1_mb_idx,
+ UWORD8 u1_num_mbs,
+ UWORD8 u1_num_mbs_next,
+ UWORD8 u1_tfr_n_mb,
+ UWORD8 u1_end_of_row);
+void ih264d_decode_tfr_nmb(dec_struct_t *ps_dec,
+ UWORD8 u1_num_mbs,
+ UWORD8 u1_num_mbs_next,
+ UWORD8 u1_end_of_row);
+WORD32 ih264d_decode_recon_tfr_nmb_thread(dec_struct_t *ps_dec,
+ UWORD8 u1_num_mbs,
+ UWORD8 u1_num_mbs_next,
+ UWORD8 u1_end_of_row);
+void ih264d_decode_picture_thread(dec_struct_t *ps_dec);
+WORD32 ih264d_decode_slice_thread(dec_struct_t *ps_dec);
+
+
+
+#endif /* _IH264D_THREAD_PARSE_DECPDE_H_ */
diff --git a/decoder/ih264d_transfer_address.h b/decoder/ih264d_transfer_address.h
new file mode 100755
index 0000000..aa64b85
--- /dev/null
+++ b/decoder/ih264d_transfer_address.h
@@ -0,0 +1,45 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef _IH264D_TRANSFER_ADDRESS_H_
+#define _IH264D_TRANSFER_ADDRESS_H_
+
+typedef struct
+{
+ UWORD8 *pu1_src_y;
+ UWORD8 *pu1_src_u;
+ UWORD8 *pu1_src_v;
+ UWORD8 *pu1_dest_y;
+ UWORD8 *pu1_dest_u;
+ UWORD8 *pu1_dest_v;
+ UWORD32 u4_inc_y[2];
+ UWORD32 u4_inc_uv[2];
+ UWORD16 u2_frm_wd_y;
+ UWORD16 u2_frm_wd_uv;
+ UWORD8 *pu1_mb_y;
+ UWORD8 *pu1_mb_u;
+ UWORD8 *pu1_mb_v;
+ UWORD16 u2_mv_left_inc;
+ UWORD16 u2_mv_top_left_inc;
+ UWORD32 u4_y_inc;
+ UWORD32 u4_uv_inc;
+
+} tfr_ctxt_t;
+
+#endif
diff --git a/decoder/ih264d_utils.c b/decoder/ih264d_utils.c
new file mode 100755
index 0000000..f60d99c
--- /dev/null
+++ b/decoder/ih264d_utils.c
@@ -0,0 +1,2625 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+ **************************************************************************
+ * \file ih264d_utils.c
+ *
+ * \brief
+ * Contains routines that handle of start and end of pic processing
+ *
+ * \date
+ * 19/12/2002
+ *
+ * \author AI
+ **************************************************************************
+ */
+
+#include <string.h>
+#include "ih264_typedefs.h"
+#include "ithread.h"
+#include "ih264d_deblocking.h"
+#include "ih264d_parse_slice.h"
+#include "ih264d_parse_cavlc.h"
+#include "ih264d_dpb_manager.h"
+#include "ih264d_defs.h"
+#include "ih264d_structs.h"
+#include "ih264d_mem_request.h"
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_tables.h"
+#include "ih264d_debug.h"
+#include "ih264d_mb_utils.h"
+#include "ih264d_error_handler.h"
+#include "ih264d_dpb_manager.h"
+#include "ih264d_utils.h"
+#include "ih264d_defs.h"
+#include "ih264d_tables.h"
+#include "ih264d_inter_pred.h"
+#include "ih264d_dpb_manager.h"
+#include "iv.h"
+#include "ivd.h"
+#include "ih264d_format_conv.h"
+#include "ih264_error.h"
+#include "ih264_disp_mgr.h"
+#include "ih264_buf_mgr.h"
+#include "ih264d_utils.h"
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_is_end_of_pic \endif
+ *
+ * \brief
+ * Determines whether current slice is first slice of a new picture as
+ * defined in 7.4.1.2.4 of 14496-10.
+ *
+ * \return
+ * Return 1 if current slice is first slice of a new picture
+ * Otherwise it returns 0
+ **************************************************************************
+ */
+UWORD8 ih264d_is_end_of_pic(UWORD16 u2_frame_num,
+ UWORD8 u1_nal_ref_idc,
+ pocstruct_t *ps_cur_poc,
+ pocstruct_t *ps_prev_poc,
+ dec_slice_params_t * ps_prev_slice, /*!< Previous slice parameters*/
+ UWORD8 u1_pic_order_cnt_type,
+ UWORD8 u1_nal_unit_type,
+ UWORD32 u4_idr_pic_id,
+ UWORD8 u1_field_pic_flag,
+ UWORD8 u1_bottom_field_flag)
+{
+ WORD8 i1_is_end_of_pic;
+ WORD8 a, b, c, d, e, f, g, h;
+
+ a = b = c = d = e = f = g = h = 0;
+ a = (ps_prev_slice->u2_frame_num != u2_frame_num);
+ b = (ps_prev_slice->u1_field_pic_flag != u1_field_pic_flag);
+ if(u1_field_pic_flag && ps_prev_slice->u1_field_pic_flag)
+ c = (u1_bottom_field_flag != ps_prev_slice->u1_bottom_field_flag);
+ d =
+ (u1_nal_ref_idc == 0 && ps_prev_slice->u1_nal_ref_idc != 0)
+ || (u1_nal_ref_idc != 0
+ && ps_prev_slice->u1_nal_ref_idc
+ == 0);
+ if(!a)
+ {
+ if((u1_pic_order_cnt_type == 0)
+ && (ps_prev_slice->u1_pic_order_cnt_type == 0))
+ {
+ e =
+ ((ps_cur_poc->i4_pic_order_cnt_lsb
+ != ps_prev_poc->i4_pic_order_cnt_lsb)
+ || (ps_cur_poc->i4_delta_pic_order_cnt_bottom
+ != ps_prev_poc->i4_delta_pic_order_cnt_bottom));
+ }
+
+ if((u1_pic_order_cnt_type == 1)
+ && (ps_prev_slice->u1_pic_order_cnt_type == 1))
+ {
+ f =
+ ((ps_cur_poc->i4_delta_pic_order_cnt[0]
+ != ps_prev_poc->i4_delta_pic_order_cnt[0])
+ || (ps_cur_poc->i4_delta_pic_order_cnt[1]
+ != ps_prev_poc->i4_delta_pic_order_cnt[1]));
+ }
+ }
+
+ if((u1_nal_unit_type == IDR_SLICE_NAL)
+ && (ps_prev_slice->u1_nal_unit_type == IDR_SLICE_NAL))
+ {
+ g = (u4_idr_pic_id != ps_prev_slice->u4_idr_pic_id);
+ }
+
+ if((u1_nal_unit_type == IDR_SLICE_NAL)
+ && (ps_prev_slice->u1_nal_unit_type != IDR_SLICE_NAL))
+ {
+ h = 1;
+ }
+ i1_is_end_of_pic = a + b + c + d + e + f + g + h;
+ return (i1_is_end_of_pic);
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_decode_pic_order_cnt \endif
+ *
+ * \brief
+ * Calculates picture order count of picture.
+ *
+ * \return
+ * Returns the pic order count of the picture to which current
+ * Slice belongs.
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_decode_pic_order_cnt(UWORD8 u1_is_idr_slice,
+ UWORD32 u2_frame_num,
+ pocstruct_t *ps_prev_poc,
+ pocstruct_t *ps_cur_poc,
+ dec_slice_params_t *ps_cur_slice, /*!< Pointer to current slice Params*/
+ dec_pic_params_t * ps_pps,
+ UWORD8 u1_nal_ref_idc,
+ UWORD8 u1_bottom_field_flag,
+ UWORD8 u1_field_pic_flag,
+ WORD32 *pi4_poc)
+{
+ WORD16 i1_pic_msb;
+ WORD32 i4_top_field_order_cnt = 0, i4_bottom_field_order_cnt = 0;
+ dec_seq_params_t *ps_seq = ps_pps->ps_sps;
+ WORD32 i4_prev_frame_num_ofst;
+
+ switch(ps_seq->u1_pic_order_cnt_type)
+ {
+ case 0:
+ /* POC TYPE 0 */
+ if(u1_is_idr_slice)
+ {
+ ps_prev_poc->i4_pic_order_cnt_msb = 0;
+ ps_prev_poc->i4_pic_order_cnt_lsb = 0;
+ }
+ if(ps_prev_poc->u1_mmco_equalto5)
+ {
+ if(ps_prev_poc->u1_bot_field != 1)
+ {
+ ps_prev_poc->i4_pic_order_cnt_msb = 0;
+ ps_prev_poc->i4_pic_order_cnt_lsb =
+ ps_prev_poc->i4_top_field_order_count;
+ }
+ else
+ {
+ ps_prev_poc->i4_pic_order_cnt_msb = 0;
+ ps_prev_poc->i4_pic_order_cnt_lsb = 0;
+ }
+ }
+
+ if((ps_cur_poc->i4_pic_order_cnt_lsb
+ < ps_prev_poc->i4_pic_order_cnt_lsb)
+ && ((ps_prev_poc->i4_pic_order_cnt_lsb
+ - ps_cur_poc->i4_pic_order_cnt_lsb)
+ >= (ps_seq->i4_max_pic_order_cntLsb
+ >> 1)))
+ {
+ i1_pic_msb = ps_prev_poc->i4_pic_order_cnt_msb
+ + ps_seq->i4_max_pic_order_cntLsb;
+ }
+ else if((ps_cur_poc->i4_pic_order_cnt_lsb
+ > ps_prev_poc->i4_pic_order_cnt_lsb)
+ && ((ps_cur_poc->i4_pic_order_cnt_lsb
+ - ps_prev_poc->i4_pic_order_cnt_lsb)
+ >= (ps_seq->i4_max_pic_order_cntLsb
+ >> 1)))
+ {
+ i1_pic_msb = ps_prev_poc->i4_pic_order_cnt_msb
+ - ps_seq->i4_max_pic_order_cntLsb;
+ }
+ else
+ {
+ i1_pic_msb = ps_prev_poc->i4_pic_order_cnt_msb;
+ }
+
+ if(!u1_field_pic_flag || !u1_bottom_field_flag)
+ i4_top_field_order_cnt = i1_pic_msb
+ + ps_cur_poc->i4_pic_order_cnt_lsb;
+
+ if(!u1_field_pic_flag)
+ {
+ i4_bottom_field_order_cnt = i4_top_field_order_cnt
+ + ps_cur_poc->i4_delta_pic_order_cnt_bottom;
+ }
+ else if(u1_bottom_field_flag)
+ {
+ i4_bottom_field_order_cnt = i1_pic_msb
+ + ps_cur_poc->i4_pic_order_cnt_lsb;
+ }
+ ps_cur_poc->i4_pic_order_cnt_msb = i1_pic_msb;
+ break;
+
+ case 1:
+ {
+ /* POC TYPE 1 */
+ UWORD8 i;
+ WORD32 prev_frame_num;
+ WORD32 frame_num_ofst;
+ WORD32 abs_frm_num;
+ WORD32 poc_cycle_cnt, frame_num_in_poc_cycle;
+ WORD32 expected_delta_poc_cycle;
+ WORD32 expected_poc;
+
+ prev_frame_num = (WORD32)ps_cur_slice->u2_frame_num;
+ if(!u1_is_idr_slice)
+ {
+ if(ps_cur_slice->u1_mmco_equalto5)
+ {
+ prev_frame_num = 0;
+ i4_prev_frame_num_ofst = 0;
+ }
+ else
+ {
+ i4_prev_frame_num_ofst = ps_prev_poc->i4_prev_frame_num_ofst;
+ }
+ }
+ else
+ i4_prev_frame_num_ofst = 0;
+
+ /* 1. Derivation for FrameNumOffset */
+ if(u1_is_idr_slice)
+ {
+ frame_num_ofst = 0;
+ ps_cur_poc->i4_delta_pic_order_cnt[0] = 0;
+ ps_cur_poc->i4_delta_pic_order_cnt[1] = 0;
+ }
+ else if(prev_frame_num > ((WORD32)u2_frame_num))
+ {
+ frame_num_ofst = i4_prev_frame_num_ofst
+ + ps_seq->u2_u4_max_pic_num_minus1 + 1;
+ }
+ else
+ frame_num_ofst = i4_prev_frame_num_ofst;
+
+ /* 2. Derivation for absFrameNum */
+ if(0 != ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle)
+ abs_frm_num = frame_num_ofst + u2_frame_num;
+ else
+ abs_frm_num = 0;
+ if((u1_nal_ref_idc == 0) && (abs_frm_num > 0))
+ abs_frm_num = abs_frm_num - 1;
+
+ /* 4. expectedDeltaPerPicOrderCntCycle is derived as */
+ expected_delta_poc_cycle = 0;
+ for(i = 0; i < ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle;
+ i++)
+ {
+ expected_delta_poc_cycle +=
+ ps_seq->i4_ofst_for_ref_frame[i];
+ }
+
+ /* 3. When absFrameNum > 0, picOrderCntCycleCnt and
+ frame_num_in_poc_cycle are derived as : */
+ /* 5. expectedPicOrderCnt is derived as : */
+ if(abs_frm_num > 0)
+ {
+ poc_cycle_cnt =
+ DIV((abs_frm_num - 1),
+ ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle);
+ frame_num_in_poc_cycle =
+ MOD((abs_frm_num - 1),
+ ps_seq->u1_num_ref_frames_in_pic_order_cnt_cycle);
+
+ expected_poc = poc_cycle_cnt
+ * expected_delta_poc_cycle;
+ for(i = 0; i <= frame_num_in_poc_cycle; i++)
+ {
+ expected_poc = expected_poc
+ + ps_seq->i4_ofst_for_ref_frame[i];
+ }
+ }
+ else
+ expected_poc = 0;
+
+ if(u1_nal_ref_idc == 0)
+ {
+ expected_poc = expected_poc
+ + ps_seq->i4_ofst_for_non_ref_pic;
+ }
+
+ /* 6. TopFieldOrderCnt or BottomFieldOrderCnt are derived as */
+ if(!u1_field_pic_flag)
+ {
+ i4_top_field_order_cnt = expected_poc
+ + ps_cur_poc->i4_delta_pic_order_cnt[0];
+ i4_bottom_field_order_cnt = i4_top_field_order_cnt
+ + ps_seq->i4_ofst_for_top_to_bottom_field
+ + ps_cur_poc->i4_delta_pic_order_cnt[1];
+ }
+ else if(!u1_bottom_field_flag)
+ {
+ i4_top_field_order_cnt = expected_poc
+ + ps_cur_poc->i4_delta_pic_order_cnt[0];
+ }
+ else
+ {
+ i4_bottom_field_order_cnt = expected_poc
+ + ps_seq->i4_ofst_for_top_to_bottom_field
+ + ps_cur_poc->i4_delta_pic_order_cnt[0];
+ }
+ /* Copy the current POC info into Previous POC structure */
+ ps_cur_poc->i4_prev_frame_num_ofst = frame_num_ofst;
+ }
+
+ break;
+ case 2:
+ {
+ /* POC TYPE 2 */
+ WORD32 prev_frame_num;
+ WORD32 frame_num_ofst;
+ WORD32 tmp_poc;
+
+ prev_frame_num = (WORD32)ps_cur_slice->u2_frame_num;
+ if(!u1_is_idr_slice)
+ {
+ if(ps_cur_slice->u1_mmco_equalto5)
+ {
+ prev_frame_num = 0;
+ i4_prev_frame_num_ofst = 0;
+ }
+ else
+ i4_prev_frame_num_ofst = ps_prev_poc->i4_prev_frame_num_ofst;
+ }
+ else
+ i4_prev_frame_num_ofst = 0;
+
+ /* 1. Derivation for FrameNumOffset */
+ if(u1_is_idr_slice)
+ {
+ frame_num_ofst = 0;
+ ps_cur_poc->i4_delta_pic_order_cnt[0] = 0;
+ ps_cur_poc->i4_delta_pic_order_cnt[1] = 0;
+ }
+ else if(prev_frame_num > ((WORD32)u2_frame_num))
+ {
+ frame_num_ofst = i4_prev_frame_num_ofst
+ + ps_seq->u2_u4_max_pic_num_minus1 + 1;
+ }
+ else
+ frame_num_ofst = i4_prev_frame_num_ofst;
+
+ /* 2. Derivation for tempPicOrderCnt */
+ if(u1_is_idr_slice)
+ tmp_poc = 0;
+ else if(u1_nal_ref_idc == 0)
+ tmp_poc = ((frame_num_ofst + u2_frame_num) << 1)
+ - 1;
+ else
+ tmp_poc = ((frame_num_ofst + u2_frame_num) << 1);
+
+ /* 6. TopFieldOrderCnt or BottomFieldOrderCnt are derived as */
+ if(!u1_field_pic_flag)
+ {
+ i4_top_field_order_cnt = tmp_poc;
+ i4_bottom_field_order_cnt = tmp_poc;
+ }
+ else if(!u1_bottom_field_flag)
+ i4_top_field_order_cnt = tmp_poc;
+ else
+ i4_bottom_field_order_cnt = tmp_poc;
+
+ /* Copy the current POC info into Previous POC structure */
+ ps_prev_poc->i4_prev_frame_num_ofst = frame_num_ofst;
+ ps_cur_poc->i4_prev_frame_num_ofst = frame_num_ofst;
+ }
+ break;
+ default:
+ return ERROR_INV_POC_TYPE_T;
+ break;
+ }
+
+ if(!u1_field_pic_flag) // or a complementary field pair
+ {
+ *pi4_poc = MIN(i4_top_field_order_cnt, i4_bottom_field_order_cnt);
+ ps_pps->i4_top_field_order_cnt = i4_top_field_order_cnt;
+ ps_pps->i4_bottom_field_order_cnt = i4_bottom_field_order_cnt;
+ }
+ else if(!u1_bottom_field_flag)
+ {
+ *pi4_poc = i4_top_field_order_cnt;
+ ps_pps->i4_top_field_order_cnt = i4_top_field_order_cnt;
+ }
+ else
+ {
+ *pi4_poc = i4_bottom_field_order_cnt;
+ ps_pps->i4_bottom_field_order_cnt = i4_bottom_field_order_cnt;
+ }
+
+ ps_pps->i4_avg_poc = *pi4_poc;
+
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_end_of_pic_processing \endif
+ *
+ * \brief
+ * Performs the end of picture processing.
+ *
+ * It performs deblocking on the current picture and sets the i4_status of
+ * current picture as decoded.
+ *
+ * \return
+ * 0 on Success and Error code otherwise.
+ **************************************************************************
+ */
+WORD32 ih264d_end_of_pic_processing(dec_struct_t *ps_dec)
+{
+ UWORD8 u1_pic_type, u1_nal_ref_idc;
+ dec_slice_params_t *ps_cur_slice = ps_dec->ps_cur_slice;
+ WORD32 ret;
+
+ /* If nal_ref_idc is equal to 0 for one slice or slice data partition NAL
+ unit of a particular picture, it shall be equal to 0 for all slice and
+ slice data partition NAL units of the picture. nal_ref_idc greater
+ than 0 indicates that the content of the NAL unit belongs to a decoded
+ picture that is stored and marked for use as a reference picture in the
+ decoded picture buffer. */
+
+ /* 1. Do MMCO
+ 2. Add Cur Pic to list of reference pics.
+ */
+
+ /* Call MMCO */
+ u1_pic_type = 0;
+ u1_nal_ref_idc = ps_cur_slice->u1_nal_ref_idc;
+
+ if(u1_nal_ref_idc)
+ {
+ if(ps_cur_slice->u1_nal_unit_type == IDR_SLICE_NAL)
+ {
+ if(ps_dec->ps_dpb_cmds->u1_long_term_reference_flag == 0)
+ {
+ ih264d_reset_ref_bufs(ps_dec->ps_dpb_mgr);
+
+ {
+ ret = ih264d_insert_st_node(ps_dec->ps_dpb_mgr,
+ ps_dec->ps_cur_pic,
+ ps_dec->u1_pic_buf_id,
+ ps_cur_slice->u2_frame_num);
+ if(ret != OK)
+ return ret;
+ }
+ }
+ else
+ {
+ /* Equivalent of inserting a pic directly as longterm Pic */
+
+ {
+ ret = ih264d_insert_st_node(ps_dec->ps_dpb_mgr,
+ ps_dec->ps_cur_pic,
+ ps_dec->u1_pic_buf_id,
+ ps_cur_slice->u2_frame_num);
+ if(ret != OK)
+ return ret;
+ /* Set longTermIdx = 0, MaxLongTermFrameIdx = 0 */
+ ret = ih264d_delete_st_node_or_make_lt(
+ ps_dec->ps_dpb_mgr,
+ ps_cur_slice->u2_frame_num, 0,
+ ps_cur_slice->u1_field_pic_flag);
+ if(ret != OK)
+ return ret;
+ ps_dec->ps_dpb_mgr->u1_max_lt_pic_idx_plus1 = 1;
+ }
+ }
+ }
+ else
+ {
+
+ {
+ UWORD16 u2_pic_num = ps_cur_slice->u2_frame_num;
+
+
+
+ ret = ih264d_do_mmco_buffer(
+ ps_dec->ps_dpb_cmds, ps_dec->ps_dpb_mgr,
+ ps_dec->ps_cur_sps->u1_num_ref_frames,
+ u2_pic_num,
+ (ps_dec->ps_cur_sps->u2_u4_max_pic_num_minus1),
+ ps_dec->u1_nal_unit_type, ps_dec->ps_cur_pic,
+ ps_dec->u1_pic_buf_id,
+ ps_cur_slice->u1_field_pic_flag,
+ ps_dec->e_dec_status);
+ if(ret != OK)
+ return ret;
+ }
+ }
+ ih264d_update_default_index_list(ps_dec->ps_dpb_mgr);
+ }
+
+ if(ps_cur_slice->u1_field_pic_flag)
+ {
+ if(ps_cur_slice->u1_bottom_field_flag)
+ {
+ if(u1_nal_ref_idc)
+ u1_pic_type = u1_pic_type | BOT_REF;
+ u1_pic_type = u1_pic_type | BOT_FLD;
+ }
+ else
+ {
+ if(u1_nal_ref_idc)
+ u1_pic_type = u1_pic_type | TOP_REF;
+ u1_pic_type = u1_pic_type | TOP_FLD;
+ }
+ }
+ else
+ u1_pic_type = TOP_REF | BOT_REF;
+ ps_dec->ps_cur_pic->u1_pic_type |= u1_pic_type;
+
+#if ROW_ACCESSES_STAT
+ {
+ H264_DEC_DEBUG_PRINT("Row_Accesses_BeforeBB = %6d, Row_Accesses_AfterBB = %6d \n\n",
+ gui_Row_Accesses_BeforeBB, gui_Row_Accesses_AfterBB);
+ gui_Row_Accesses_BeforeBBTotal += gui_Row_Accesses_BeforeBB;
+ gui_Row_Accesses_AfterBBTotal += gui_Row_Accesses_AfterBB;
+ gui_Row_Accesses_AfterBB = 0;
+ gui_Row_Accesses_BeforeBB = 0;
+ }
+#endif
+
+ if(ps_cur_slice->u1_field_pic_flag)
+ {
+ H264_DEC_DEBUG_PRINT("Toggling secondField\n");
+ ps_dec->u1_second_field = 1 - ps_dec->u1_second_field;
+ }
+
+ return OK;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : init_dpb_size */
+/* */
+/* Description : This function calculates the DBP i4_size in frames */
+/* Inputs : ps_seq - current sequence params */
+/* */
+/* Globals : None */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : DPB in frames */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 28 04 2005 NS Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_get_dpb_size(dec_seq_params_t *ps_seq, dec_struct_t *ps_dec)
+{
+ WORD32 i4_size;
+ UWORD8 u1_level_idc;
+
+
+ u1_level_idc = ps_seq->u1_level_idc; //harcode for the time being
+
+#if DPB_HACK
+ u1_level_idc = (u1_level_idc < 30) ? 30 : u1_level_idc;
+ u1_level_idc = (u1_level_idc > 30) ? 30 : u1_level_idc;
+#endif
+
+ u1_level_idc = MIN(u1_level_idc, ps_dec->u4_level_at_init);
+ //DPB_HACK
+
+
+ switch(u1_level_idc)
+ {
+ case 10:
+ i4_size = 152064;
+ break;
+ case 11:
+ i4_size = 345600;
+ break;
+ case 12:
+ i4_size = 912384;
+ break;
+ case 13:
+ i4_size = 912384;
+ break;
+ case 20:
+ i4_size = 912384;
+ break;
+ case 21:
+ i4_size = 1824768;
+ break;
+ case 22:
+ i4_size = 3110400;
+ break;
+ case 30:
+ i4_size = 3110400;
+ break;
+ case 31:
+ i4_size = 6912000;
+ break;
+ case 32:
+ i4_size = 7864320;
+ break;
+ case 40:
+ i4_size = 12582912;
+ break;
+ case 41:
+ i4_size = 12582912;
+ break;
+ case 42:
+ i4_size = 12582912;
+ break;
+ case 50:
+ i4_size = 42393600;
+ break;
+ case 51:
+ i4_size = 70778880;
+ break;
+ default:
+ i4_size = 6912000;
+ break;
+ /*
+ * Not calling the error handler if the level has come wrong.
+ */
+ /*{
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_UNKNOWN_LEVEL ;
+
+ }
+ break;*/
+ }
+
+ /* Temporary hack to run Tractor Cav/Cab/MbAff Profiler ps_bitstrm */
+#if DPB_HACK
+ i4_size = 6912000;
+#endif
+
+ i4_size =
+ i4_size
+ / (ps_seq->u2_frm_wd_in_mbs
+ * (ps_seq->u2_frm_ht_in_mbs
+ << (1
+ - ps_seq->u1_frame_mbs_only_flag)));
+ i4_size = i4_size / 384; // temp / (256 * 1.5)
+ i4_size = MIN(i4_size, 16);
+ i4_size = MAX(i4_size, 1);
+ return (i4_size);
+}
+
+WORD32 ih264d_get_dpb_size_new(UWORD32 u4_level_idc,
+ UWORD32 u2_frm_wd_in_mbs,
+ UWORD32 u2_frm_ht_in_mbs)
+{
+
+ UWORD32 i4_size = 0;
+
+ switch(u4_level_idc)
+ {
+ case 10:
+ i4_size = 152064;
+ break;
+ case 11:
+ i4_size = 345600;
+ break;
+ case 12:
+ i4_size = 912384;
+ break;
+ case 13:
+ i4_size = 912384;
+ break;
+ case 20:
+ i4_size = 912384;
+ break;
+ case 21:
+ i4_size = 1824768;
+ break;
+ case 22:
+ i4_size = 3110400;
+ break;
+ case 30:
+ i4_size = 3110400;
+ break;
+ case 31:
+ i4_size = 6912000;
+ break;
+ case 32:
+ i4_size = 7864320;
+ break;
+ case 40:
+ i4_size = 12582912;
+ break;
+ case 41:
+ i4_size = 12582912;
+ break;
+ case 42:
+ i4_size = 12582912;
+ break;
+ case 50:
+ i4_size = 42393600;
+ break;
+ case 51:
+ i4_size = 70778880;
+ break;
+ default:
+ {
+ return -1;
+ }
+ break;
+ }
+
+ i4_size = i4_size / (u2_frm_wd_in_mbs * (u2_frm_ht_in_mbs));
+ i4_size = (i4_size + 383) / 384;
+ i4_size = MIN(i4_size, 16);
+ i4_size = MAX(i4_size, 1);
+ return (i4_size);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_max_possible_ref_pics */
+/* */
+/* Description : This function returns the maximum number of */
+/* reference buffers corresponding to the current Level */
+/* in accordance to "Table A-1 Level limits" in standard. */
+/* Please refer to Annex A - Profiles and Levels */
+/* Maximum Number of reference buffers are derived from */
+/* the dbpsize and max_mbs_in frame given in the table */
+/* Inputs : level number */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 19 05 2005 SWRN Draft */
+/* */
+/*****************************************************************************/
+
+UWORD8 ih264d_max_possible_ref_pics(UWORD8 u1_level)
+{
+ switch(u1_level)
+ {
+ case H264_LEVEL_1_0:
+ return (MAX_REF_LEVEL_1_0);
+ case H264_LEVEL_1_1:
+ return (MAX_REF_LEVEL_1_1);
+ case H264_LEVEL_1_2:
+ return (MAX_REF_LEVEL_1_2);
+ case H264_LEVEL_1_3:
+ return (MAX_REF_LEVEL_1_3);
+ case H264_LEVEL_2_0:
+ return (MAX_REF_LEVEL_2_0);
+ case H264_LEVEL_2_1:
+ return (MAX_REF_LEVEL_2_1);
+ case H264_LEVEL_2_2:
+ return (MAX_REF_LEVEL_2_2);
+ case H264_LEVEL_3_0:
+ return (MAX_REF_LEVEL_3_0);
+ }
+
+ return (H264_MAX_REF_PICS);
+}
+
+/***************************************************************************/
+/* If change in Level or the required PicBuffers i4_size is more than the */
+/* current one FREE the current PicBuffers and allocate affresh */
+/***************************************************************************/
+UWORD8 ih264d_is_sps_changed(prev_seq_params_t * ps_prv,
+ dec_seq_params_t * ps_cur)
+{
+
+ if((ps_prv->u2_frm_wd_in_mbs != ps_cur->u2_frm_wd_in_mbs)
+ || (ps_prv->u1_level_idc != ps_cur->u1_level_idc)
+ || (ps_prv->u1_profile_idc != ps_cur->u1_profile_idc)
+ || (ps_cur->u2_frm_ht_in_mbs != ps_prv->u2_frm_ht_in_mbs)
+ || (ps_cur->u1_frame_mbs_only_flag
+ != ps_prv->u1_frame_mbs_only_flag)
+ || (ps_cur->u1_direct_8x8_inference_flag
+ != ps_prv->u1_direct_8x8_inference_flag))
+ return 1;
+
+ return 0;
+}
+
+/**************************************************************************/
+/* This function initialises the value of ps_dec->u1_recon_mb_grp */
+/* ps_dec->u1_recon_mb_grp must satisfy the following criteria */
+/* - multiple of 2 (required for N/2 parse-mvpred design) */
+/* - multiple of 4 (if it is not a frame_mbs_only sequence), */
+/* in this case N/2 itself needs to be even for mbpair processing */
+/* - lesser than ps_dec->u2_frm_wd_in_mbs/2 (at least 3 N-Chunks */
+/* should make a row to ensure proper MvTop transferring) */
+/**************************************************************************/
+WORD32 ih264d_init_dec_mb_grp(dec_struct_t *ps_dec)
+{
+ dec_seq_params_t *ps_seq = ps_dec->ps_cur_sps;
+ UWORD8 u1_frm = ps_seq->u1_frame_mbs_only_flag;
+
+ ps_dec->u1_recon_mb_grp = PARSE_MB_GROUP_4;
+
+ //NMB set to width in MBs for non-mbaff cases
+ if(0 == ps_seq->u1_mb_aff_flag)
+ ps_dec->u1_recon_mb_grp = ps_dec->u2_frm_wd_in_mbs;
+
+ ps_dec->u1_recon_mb_grp_pair = ps_dec->u1_recon_mb_grp >> 1;
+
+ if(!ps_dec->u1_recon_mb_grp)
+ {
+ return ERROR_MB_GROUP_ASSGN_T;
+ }
+
+ ps_dec->u4_num_mbs_prev_nmb = ps_dec->u1_recon_mb_grp;
+
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : get_numbuf_dpb_bank \endif
+ *
+ * \brief
+ * Initializes the picture.
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ *
+ * \note
+ * This function is called when first slice of the
+ * NON -IDR picture is encountered.
+ **************************************************************************
+ */
+static WORD32 get_numbuf_dpb_bank(dec_struct_t *ps_dec)
+{
+ WORD32 i4_DPB_size;
+ WORD32 i4_pic_size;
+ WORD32 i4_num_buf_alloc;
+ UWORD32 Ysize;
+ UWORD32 UVsize;
+ UWORD32 one_frm_size;
+ UWORD32 luma_height;
+
+ luma_height = ps_dec->u2_pic_ht;
+
+ i4_DPB_size = ps_dec->ps_mem_tab[MEM_REC_REF_PIC].u4_mem_size;
+
+ Ysize = (ps_dec->u2_frm_wd_y) * (luma_height + (PAD_LEN_Y_V << 2));
+
+ UVsize = Ysize >> 2;
+
+ {
+ if(ps_dec->u4_share_disp_buf == 1)
+ {
+ /* In case of buffers getting shared between application and library
+ there is no need of reference memtabs. Instead of setting the i4_size
+ to zero, it is reduced to a small i4_size to ensure that changes
+ in the code are minimal */
+ if((ps_dec->u1_chroma_format == IV_YUV_420SP_UV)
+ || (ps_dec->u1_chroma_format == IV_YUV_420SP_VU)
+ || (ps_dec->u1_chroma_format == IV_YUV_420P))
+ {
+ Ysize = 64;
+ }
+ if(ps_dec->u1_chroma_format == IV_YUV_420SP_UV)
+ {
+ UVsize = 64;
+ }
+
+ }
+ }
+
+ one_frm_size = (((Ysize + 127) >> 7) << 7)
+ + ((((UVsize << 1) + 127) >> 7) << 7);
+ i4_num_buf_alloc = i4_DPB_size / (one_frm_size);
+
+ return i4_num_buf_alloc;
+}
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_init_pic \endif
+ *
+ * \brief
+ * Initializes the picture.
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ *
+ * \note
+ * This function is called when first slice of the
+ * NON -IDR picture is encountered.
+ **************************************************************************
+ */
+WORD32 ih264d_init_pic(dec_struct_t *ps_dec,
+ UWORD16 u2_frame_num,
+ WORD32 i4_poc,
+ dec_pic_params_t *ps_pps)
+{
+ dec_seq_params_t *ps_seq = ps_pps->ps_sps;
+ prev_seq_params_t * ps_prev_seq_params = &ps_dec->s_prev_seq_params;
+ WORD32 i4_pic_bufs;
+ WORD32 ret;
+
+ ps_dec->ps_cur_slice->u2_frame_num = u2_frame_num;
+ ps_dec->ps_cur_slice->i4_poc = i4_poc;
+ ps_dec->ps_cur_pps = ps_pps;
+ ps_dec->ps_cur_pps->pv_codec_handle = ps_dec;
+
+ ps_dec->ps_cur_sps = ps_seq;
+ ps_dec->ps_dpb_mgr->i4_max_frm_num = ps_seq->u2_u4_max_pic_num_minus1
+ + 1;
+
+ ps_dec->ps_dpb_mgr->u2_pic_ht = ps_dec->u2_pic_ht;
+ ps_dec->ps_dpb_mgr->u2_pic_wd = ps_dec->u2_pic_wd;
+ ps_dec->i4_pic_type = -1;
+ ps_dec->i4_frametype = -1;
+ ps_dec->i4_content_type = -1;
+
+ /*--------------------------------------------------------------------*/
+ /* Get the value of MaxMbAddress and frmheight in Mbs */
+ /*--------------------------------------------------------------------*/
+ ps_seq->u2_max_mb_addr =
+ (ps_seq->u2_frm_wd_in_mbs
+ * (ps_dec->u2_pic_ht
+ >> (4
+ + ps_dec->ps_cur_slice->u1_field_pic_flag)))
+ - 1;
+ ps_dec->u2_frm_ht_in_mbs = (ps_dec->u2_pic_ht
+ >> (4 + ps_dec->ps_cur_slice->u1_field_pic_flag));
+
+
+ /***************************************************************************/
+ /* If change in Level or the required PicBuffers i4_size is more than the */
+ /* current one FREE the current PicBuffers and allocate affresh */
+ /***************************************************************************/
+ if(!ps_dec->u1_init_dec_flag
+ || ih264d_is_sps_changed(ps_prev_seq_params, ps_seq))
+ {
+
+
+ ivd_video_decode_ip_t *ps_dec_in = ps_dec->pv_dec_in;
+ ivd_video_decode_op_t *ps_dec_out = ps_dec->pv_dec_out;
+
+ if(ps_dec->u4_share_disp_buf == 0)
+ {
+ i4_pic_bufs = get_numbuf_dpb_bank(ps_dec);
+ }
+ else
+ {
+ i4_pic_bufs = (WORD32)ps_dec->u4_num_disp_bufs;
+ }
+
+ ps_dec->u1_pic_bufs = CLIP_U8(i4_pic_bufs);
+
+ if(ps_dec->u4_share_disp_buf == 0)
+ ps_dec->u1_pic_bufs = MIN(ps_dec->u1_pic_bufs,
+ (H264_MAX_REF_PICS * 2));
+
+ ps_dec->u1_max_dec_frame_buffering = ih264d_get_dpb_size(ps_seq,
+ ps_dec);
+
+ if(ps_dec->u4_share_disp_buf)
+ ps_dec->u1_max_dec_frame_buffering = MAX(
+ ps_dec->u1_max_dec_frame_buffering, 5);
+
+ ps_dec->u1_max_dec_frame_buffering = MIN(
+ ps_dec->u1_max_dec_frame_buffering,
+ ps_dec->u4_num_ref_frames_at_init);
+ ps_dec->u1_max_dec_frame_buffering = MIN(
+ ps_dec->u1_max_dec_frame_buffering,
+ ps_dec->u1_pic_bufs);
+
+// ps_dec->u1_pic_bufs = ps_dec->i1_max_dec_frame_buffering;
+
+ /* Fix is for handling one pic in and one pic out incase of */
+ /* MMCO 5 or IDR */
+
+ ps_dec->i4_display_delay = MIN(ps_dec->u4_num_reorder_frames_at_init,
+ ps_dec->u1_max_dec_frame_buffering);
+
+ if(1 == ps_seq->u1_vui_parameters_present_flag)
+ {
+ if(ps_seq->u1_frame_mbs_only_flag == 1)
+ ps_dec->i4_display_delay = MIN(
+ (UWORD32 )ps_dec->i4_display_delay,
+ ((UWORD32 )ps_seq->s_vui.u4_num_reorder_frames
+ + 1));
+ else
+ ps_dec->i4_display_delay = MIN(
+ (UWORD32 )ps_dec->i4_display_delay,
+ ((UWORD32 )ps_seq->s_vui.u4_num_reorder_frames
+ + 1) * 2);
+ }
+
+ /* Temporary hack to run Tractor Cav/Cab/MbAff Profiler streams also for CAFI1_SVA_C.264 in conformance*/
+ if(ps_dec->u1_init_dec_flag)
+ {
+ ih264d_release_pics_in_dpb((void *)ps_dec,
+ ps_dec->u1_pic_bufs);
+ ih264d_release_display_bufs(ps_dec);
+ ih264d_reset_ref_bufs(ps_dec->ps_dpb_mgr);
+ }
+
+ /*********************************************************************/
+ /* Configuring decoder parameters based on level and then */
+ /* fresh pointer initialisation in decoder scratch and state buffers */
+ /*********************************************************************/
+ if(!ps_dec->u1_init_dec_flag ||
+ ((ps_seq->u1_level_idc < H264_LEVEL_3_0) ^ (ps_prev_seq_params->u1_level_idc < H264_LEVEL_3_0)))
+ {
+ ret = ih264d_init_dec_mb_grp(ps_dec);
+ if(ret != OK)
+ return ret;
+ }
+
+ ret = ih264d_create_pic_buffers(ps_dec->u1_pic_bufs,
+ ps_dec);
+ if(ret != OK)
+ return ret;
+
+ ih264d_get_memory_dec_params(ps_dec);
+
+ ret = ih264d_create_mv_bank(ps_dec, ps_dec->u2_pic_wd,
+ ps_dec->u2_pic_ht);
+ if(ret != OK)
+ return ret;
+
+ /* In shared mode, set all of them as used by display */
+ if(ps_dec->u4_share_disp_buf == 1)
+ {
+ WORD32 i;
+
+ for(i = 0; i < ps_dec->u1_pic_bufs; i++)
+ {
+ ih264_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mgr, i,
+ BUF_MGR_IO);
+ }
+ }
+
+ ps_dec->u1_init_dec_flag = 1;
+ ps_prev_seq_params->u2_frm_wd_in_mbs = ps_seq->u2_frm_wd_in_mbs;
+ ps_prev_seq_params->u1_level_idc = ps_seq->u1_level_idc;
+ ps_prev_seq_params->u1_profile_idc = ps_seq->u1_profile_idc;
+ ps_prev_seq_params->u2_frm_ht_in_mbs = ps_seq->u2_frm_ht_in_mbs;
+ ps_prev_seq_params->u1_frame_mbs_only_flag =
+ ps_seq->u1_frame_mbs_only_flag;
+ ps_prev_seq_params->u1_direct_8x8_inference_flag =
+ ps_seq->u1_direct_8x8_inference_flag;
+
+ ps_dec->i4_cur_display_seq = 0;
+ ps_dec->i4_prev_max_display_seq = 0;
+ ps_dec->i4_max_poc = 0;
+
+ {
+ /* 0th entry of CtxtIncMbMap will be always be containing default values
+ for CABAC context representing MB not available */
+ ctxt_inc_mb_info_t *p_DefCtxt = ps_dec->p_ctxt_inc_mb_map - 1;
+ UWORD8 *pu1_temp;
+ WORD8 i;
+ p_DefCtxt->u1_mb_type = CAB_SKIP;
+
+ p_DefCtxt->u1_cbp = 0x0f;
+ p_DefCtxt->u1_intra_chroma_pred_mode = 0;
+
+ p_DefCtxt->u1_yuv_dc_csbp = 0x7;
+
+ p_DefCtxt->u1_transform8x8_ctxt = 0;
+
+ pu1_temp = (UWORD8*)p_DefCtxt->i1_ref_idx;
+ for(i = 0; i < 4; i++, pu1_temp++)
+ (*pu1_temp) = 0;
+ pu1_temp = (UWORD8*)p_DefCtxt->u1_mv;
+ for(i = 0; i < 16; i++, pu1_temp++)
+ (*pu1_temp) = 0;
+ ps_dec->ps_def_ctxt_mb_info = p_DefCtxt;
+ }
+
+ }
+ /* reset DBP commands read u4_flag */
+ ps_dec->ps_dpb_cmds->u1_dpb_commands_read = 0;
+
+ return OK;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_get_next_display_field */
+/* */
+/* Description : Application calls this module to get the next field */
+/* to be displayed */
+/* */
+/* Inputs : 1. IBUFAPI_Handle Hnadle to the Display buffer */
+/* 2. IH264DEC_DispUnit Pointer to the display struct */
+/* */
+/* Globals : */
+/* */
+/* */
+/* Processing : None */
+/* Outputs : None */
+/* Returns : None */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 27 05 2005 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 ih264d_get_next_display_field(dec_struct_t * ps_dec,
+ ivd_out_bufdesc_t *ps_out_buffer,
+ ivd_get_display_frame_op_t *pv_disp_op)
+{
+ pic_buffer_t *pic_buf;
+
+ UWORD8 i1_cur_fld;
+ WORD32 u4_api_ret = -1;
+ WORD32 i4_disp_buf_id;
+ iv_yuv_buf_t *ps_op_frm;
+
+
+
+ ps_op_frm = &(ps_dec->s_disp_frame_info);
+ H264_MUTEX_LOCK(&ps_dec->process_disp_mutex);
+ pic_buf = (pic_buffer_t *)ih264_disp_mgr_get(
+ (disp_mgr_t *)ps_dec->pv_disp_buf_mgr, &i4_disp_buf_id);
+ ps_dec->u4_num_fld_in_frm = 0;
+ u4_api_ret = -1;
+ pv_disp_op->u4_ts = -1;
+ pv_disp_op->e_output_format = ps_dec->u1_chroma_format;
+
+ pv_disp_op->s_disp_frm_buf.pv_y_buf = ps_out_buffer->pu1_bufs[0];
+ pv_disp_op->s_disp_frm_buf.pv_u_buf = ps_out_buffer->pu1_bufs[1];
+ pv_disp_op->s_disp_frm_buf.pv_v_buf = ps_out_buffer->pu1_bufs[2];
+ if(pic_buf != NULL)
+ {
+ pv_disp_op->e4_fld_type = 0;
+ pv_disp_op->u4_disp_buf_id = i4_disp_buf_id;
+
+ ps_op_frm->u4_y_ht = pic_buf->u2_disp_height << 1;
+ ps_op_frm->u4_u_ht = ps_op_frm->u4_v_ht = ps_op_frm->u4_y_ht >> 1;
+ ps_op_frm->u4_y_wd = pic_buf->u2_disp_width;
+
+ ps_op_frm->u4_u_wd = ps_op_frm->u4_v_wd = ps_op_frm->u4_y_wd >> 1;
+
+ ps_op_frm->u4_y_strd = pic_buf->u2_frm_wd_y;
+ ps_op_frm->u4_u_strd = ps_op_frm->u4_v_strd = pic_buf->u2_frm_wd_uv;
+
+ /* ! */
+ pv_disp_op->u4_ts = pic_buf->u4_ts;
+
+ /* set the start of the Y, U and V buffer pointer for display */
+ ps_op_frm->pv_y_buf = pic_buf->pu1_buf1 + pic_buf->u2_crop_offset_y;
+ ps_op_frm->pv_u_buf = pic_buf->pu1_buf2 + pic_buf->u2_crop_offset_uv;
+ ps_op_frm->pv_v_buf = pic_buf->pu1_buf3 + pic_buf->u2_crop_offset_uv;
+ ps_dec->u4_num_fld_in_frm++;
+ ps_dec->u4_num_fld_in_frm++;
+ u4_api_ret = 0;
+
+ if(pic_buf->u1_picturetype == 0)
+ pv_disp_op->u4_progressive_frame_flag = 1;
+ else
+ pv_disp_op->u4_progressive_frame_flag = 0;
+
+ } H264_MUTEX_UNLOCK(&ps_dec->process_disp_mutex);
+ pv_disp_op->u4_error_code = u4_api_ret;
+ pv_disp_op->e_pic_type = 0xFFFFFFFF; //Junk;
+
+ if(u4_api_ret)
+ {
+ pv_disp_op->u4_error_code = 1; //put a proper error code here
+ }
+ else
+ {
+
+ //Release the buffer if being sent for display
+ UWORD32 temp;
+ UWORD32 dest_inc_Y = 0, dest_inc_UV = 0;
+
+ pv_disp_op->s_disp_frm_buf.u4_y_wd = temp = MIN(ps_op_frm->u4_y_wd,
+ ps_op_frm->u4_y_strd);
+ pv_disp_op->s_disp_frm_buf.u4_u_wd = pv_disp_op->s_disp_frm_buf.u4_y_wd
+ >> 1;
+ pv_disp_op->s_disp_frm_buf.u4_v_wd = pv_disp_op->s_disp_frm_buf.u4_y_wd
+ >> 1;
+
+ pv_disp_op->s_disp_frm_buf.u4_y_ht = ps_op_frm->u4_y_ht;
+ pv_disp_op->s_disp_frm_buf.u4_u_ht = pv_disp_op->s_disp_frm_buf.u4_y_ht
+ >> 1;
+ pv_disp_op->s_disp_frm_buf.u4_v_ht = pv_disp_op->s_disp_frm_buf.u4_y_ht
+ >> 1;
+ if(0 == ps_dec->u4_share_disp_buf)
+ {
+ pv_disp_op->s_disp_frm_buf.u4_y_strd =
+ pv_disp_op->s_disp_frm_buf.u4_y_wd;
+ pv_disp_op->s_disp_frm_buf.u4_u_strd =
+ pv_disp_op->s_disp_frm_buf.u4_y_wd >> 1;
+ pv_disp_op->s_disp_frm_buf.u4_v_strd =
+ pv_disp_op->s_disp_frm_buf.u4_y_wd >> 1;
+
+ }
+ else
+ {
+ pv_disp_op->s_disp_frm_buf.u4_y_strd = ps_op_frm->u4_y_strd;
+ }
+
+ if(ps_dec->u4_app_disp_width)
+ {
+ pv_disp_op->s_disp_frm_buf.u4_y_strd = MAX(
+ ps_dec->u4_app_disp_width,
+ pv_disp_op->s_disp_frm_buf.u4_y_strd);
+ }
+
+ pv_disp_op->u4_error_code = 0;
+ if(pv_disp_op->e_output_format == IV_YUV_420P)
+ {
+ UWORD32 i;
+ pv_disp_op->s_disp_frm_buf.u4_u_strd =
+ pv_disp_op->s_disp_frm_buf.u4_y_strd >> 1;
+ pv_disp_op->s_disp_frm_buf.u4_v_strd =
+ pv_disp_op->s_disp_frm_buf.u4_y_strd >> 1;
+
+ pv_disp_op->s_disp_frm_buf.u4_u_wd = ps_op_frm->u4_y_wd >> 1;
+ pv_disp_op->s_disp_frm_buf.u4_v_wd = ps_op_frm->u4_y_wd >> 1;
+
+ if(1 == ps_dec->u4_share_disp_buf)
+ {
+ pv_disp_op->s_disp_frm_buf.pv_y_buf = ps_op_frm->pv_y_buf;
+
+ for(i = 0; i < MAX_DISP_BUFS_NEW; i++)
+ {
+ UWORD8 *buf = ps_dec->disp_bufs[i].buf[0];
+ buf += ps_dec->disp_bufs[i].u4_ofst[0];
+ if(((UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_y_buf
+ - pic_buf->u2_crop_offset_y) == buf)
+ {
+ buf = ps_dec->disp_bufs[i].buf[1];
+ buf += ps_dec->disp_bufs[i].u4_ofst[1];
+ pv_disp_op->s_disp_frm_buf.pv_u_buf = buf
+ + pic_buf->u2_crop_offset_uv;
+
+ buf = ps_dec->disp_bufs[i].buf[2];
+ buf += ps_dec->disp_bufs[i].u4_ofst[2];
+ pv_disp_op->s_disp_frm_buf.pv_v_buf = buf
+ + pic_buf->u2_crop_offset_uv;
+ }
+ }
+ }
+
+ }
+ else if((pv_disp_op->e_output_format == IV_YUV_420SP_UV)
+ || (pv_disp_op->e_output_format == IV_YUV_420SP_VU))
+ {
+ pv_disp_op->s_disp_frm_buf.u4_u_strd =
+ pv_disp_op->s_disp_frm_buf.u4_y_strd;
+ pv_disp_op->s_disp_frm_buf.u4_v_strd = 0;
+
+ if(1 == ps_dec->u4_share_disp_buf)
+ {
+ UWORD32 i;
+
+ pv_disp_op->s_disp_frm_buf.pv_y_buf = ps_op_frm->pv_y_buf;
+
+ for(i = 0; i < MAX_DISP_BUFS_NEW; i++)
+ {
+ UWORD8 *buf = ps_dec->disp_bufs[i].buf[0];
+ buf += ps_dec->disp_bufs[i].u4_ofst[0];
+ if((UWORD8 *)pv_disp_op->s_disp_frm_buf.pv_y_buf
+ - pic_buf->u2_crop_offset_y == buf)
+ {
+ buf = ps_dec->disp_bufs[i].buf[1];
+ buf += ps_dec->disp_bufs[i].u4_ofst[1];
+ pv_disp_op->s_disp_frm_buf.pv_u_buf = buf
+ + pic_buf->u2_crop_offset_uv;
+ ;
+
+ buf = ps_dec->disp_bufs[i].buf[2];
+ buf += ps_dec->disp_bufs[i].u4_ofst[2];
+ pv_disp_op->s_disp_frm_buf.pv_v_buf = buf
+ + pic_buf->u2_crop_offset_uv;
+ ;
+ }
+ }
+ }
+ pv_disp_op->s_disp_frm_buf.u4_u_wd =
+ pv_disp_op->s_disp_frm_buf.u4_y_wd;
+ pv_disp_op->s_disp_frm_buf.u4_v_wd = 0;
+
+ }
+ else if((pv_disp_op->e_output_format == IV_RGB_565)
+ || (pv_disp_op->e_output_format == IV_YUV_422ILE))
+ {
+
+ pv_disp_op->s_disp_frm_buf.u4_u_strd = 0;
+ pv_disp_op->s_disp_frm_buf.u4_v_strd = 0;
+ pv_disp_op->s_disp_frm_buf.u4_u_wd = 0;
+ pv_disp_op->s_disp_frm_buf.u4_v_wd = 0;
+ pv_disp_op->s_disp_frm_buf.u4_u_ht = 0;
+ pv_disp_op->s_disp_frm_buf.u4_v_ht = 0;
+
+ }
+
+
+ }
+
+ return u4_api_ret;
+}
+
+
+/*****************************************************************************/
+/* Function Name : ih264d_release_display_field */
+/* */
+/* Description : This function releases the display field that was returned */
+/* here. */
+/* Inputs : ps_dec - Decoder parameters */
+/* Globals : None */
+/* Processing : Refer bumping process in the standard */
+/* Outputs : Assigns display sequence number. */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 27 04 2005 NS Draft */
+/* */
+/*****************************************************************************/
+void ih264d_release_display_field(dec_struct_t *ps_dec,
+ ivd_get_display_frame_op_t *pv_disp_op)
+{
+ if(1 == pv_disp_op->u4_error_code)
+ {
+ if(1 == ps_dec->u1_flushfrm)
+ {
+ UWORD32 i;
+
+ if(1 == ps_dec->u4_share_disp_buf)
+ {
+ H264_MUTEX_LOCK(&ps_dec->process_disp_mutex);
+ for(i = 0; i < (MAX_DISP_BUFS_NEW); i++)
+ {
+ if(1 == ps_dec->u4_disp_buf_mapping[i])
+ {
+ ih264_buf_mgr_release(
+ (buf_mgr_t *)ps_dec->pv_pic_buf_mgr, i,
+ BUF_MGR_IO);
+ ps_dec->u4_disp_buf_mapping[i] = 0;
+ }
+ } H264_MUTEX_UNLOCK(&ps_dec->process_disp_mutex);
+
+ memset(ps_dec->u4_disp_buf_to_be_freed, 0,
+ (MAX_DISP_BUFS_NEW) * sizeof(UWORD32));
+ for(i = 0; i < ps_dec->u1_pic_bufs; i++)
+ ps_dec->u4_disp_buf_mapping[i] = 1;
+ }
+ ps_dec->u1_flushfrm = 0;
+
+ }
+ }
+ else
+ {
+ H264_MUTEX_LOCK(&ps_dec->process_disp_mutex);
+
+ if(0 == ps_dec->u4_share_disp_buf)
+ {
+ ih264_buf_mgr_release((buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
+ pv_disp_op->u4_disp_buf_id,
+ BUF_MGR_IO);
+
+ }
+ else
+ {
+ ps_dec->u4_disp_buf_mapping[pv_disp_op->u4_disp_buf_id] = 1;
+ } H264_MUTEX_UNLOCK(&ps_dec->process_disp_mutex);
+
+ }
+}
+/*****************************************************************************/
+/* Function Name : ih264d_assign_display_seq */
+/* */
+/* Description : This function implments bumping process. Every outgoing */
+/* frame from DPB is assigned a display sequence number */
+/* which increases monotonically. System looks for this */
+/* number to display a frame. */
+/* here. */
+/* Inputs : ps_dec - Decoder parameters */
+/* Globals : None */
+/* Processing : Refer bumping process in the standard */
+/* Outputs : Assigns display sequence number. */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 27 04 2005 NS Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_assign_display_seq(dec_struct_t *ps_dec)
+{
+ WORD32 i;
+ WORD32 i4_min_poc;
+ WORD32 i4_min_poc_buf_id;
+ WORD32 i4_min_index;
+ dpb_manager_t *ps_dpb_mgr = ps_dec->ps_dpb_mgr;
+ WORD32 (*i4_poc_buf_id_map)[3] = ps_dpb_mgr->ai4_poc_buf_id_map;
+
+ i4_min_poc = 0x7fffffff;
+ i4_min_poc_buf_id = -1;
+ i4_min_index = -1;
+
+ if(ps_dpb_mgr->i1_poc_buf_id_entries >= ps_dec->i4_display_delay)
+ {
+ for(i = 0; i < MAX_FRAMES; i++)
+ {
+ if((i4_poc_buf_id_map[i][0] != -1)
+ && (DO_NOT_DISP
+ != ps_dpb_mgr->ai4_poc_buf_id_map[i][0]))
+ {
+ if(i4_poc_buf_id_map[i][1] < i4_min_poc)
+ {
+ i4_min_poc = i4_poc_buf_id_map[i][1];
+ i4_min_poc_buf_id = i4_poc_buf_id_map[i][0];
+ i4_min_index = i;
+ }
+ }
+ }
+
+ if((i4_min_index != -1) && (DO_NOT_DISP != i4_min_poc_buf_id))
+ {
+ ps_dec->i4_cur_display_seq++;
+ ih264_disp_mgr_add(
+ (disp_mgr_t *)ps_dec->pv_disp_buf_mgr,
+ i4_min_poc_buf_id, ps_dec->i4_cur_display_seq,
+ ps_dec->apv_buf_id_pic_buf_map[i4_min_poc_buf_id]);
+ i4_poc_buf_id_map[i4_min_index][0] = -1;
+ i4_poc_buf_id_map[i4_min_index][1] = 0x7fffffff;
+ ps_dpb_mgr->i1_poc_buf_id_entries--;
+ }
+ else if(DO_NOT_DISP == i4_min_poc_buf_id)
+ {
+ WORD32 i4_error_code;
+ i4_error_code = ERROR_GAPS_IN_FRM_NUM;
+// i4_error_code |= 1<<IVD_CORRUPTEDDATA;
+ return i4_error_code;
+ }
+ }
+ return OK;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_release_display_bufs */
+/* */
+/* Description : This function implments bumping process when mmco = 5. */
+/* Each outgoing frame from DPB is assigned a display */
+/* sequence number which increases monotonically. System */
+/* looks for this number to display a frame. */
+/* Inputs : ps_dec - Decoder parameters */
+/* Globals : None */
+/* Processing : Refer bumping process in the standard for mmco = 5 */
+/* Outputs : Assigns display sequence number. */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 27 04 2005 NS Draft */
+/* */
+/*****************************************************************************/
+void ih264d_release_display_bufs(dec_struct_t *ps_dec)
+{
+ WORD32 i, j;
+ WORD32 i4_min_poc;
+ WORD32 i4_min_poc_buf_id;
+ WORD32 i4_min_index;
+ dpb_manager_t *ps_dpb_mgr = ps_dec->ps_dpb_mgr;
+ WORD32 (*i4_poc_buf_id_map)[3] = ps_dpb_mgr->ai4_poc_buf_id_map;
+
+ i4_min_poc = 0x7fffffff;
+ i4_min_poc_buf_id = -1;
+ i4_min_index = -1;
+
+ ih264d_delete_nonref_nondisplay_pics(ps_dpb_mgr);
+
+ for(j = 0; j < ps_dpb_mgr->i1_poc_buf_id_entries; j++)
+ {
+ i4_min_poc = 0x7fffffff;
+ for(i = 0; i < MAX_FRAMES; i++)
+ {
+ if(i4_poc_buf_id_map[i][0] != -1)
+ {
+ if(i4_poc_buf_id_map[i][1] < i4_min_poc)
+ {
+ i4_min_poc = i4_poc_buf_id_map[i][1];
+ i4_min_poc_buf_id = i4_poc_buf_id_map[i][0];
+ i4_min_index = i;
+ }
+ }
+ }
+
+ if(DO_NOT_DISP != i4_min_poc_buf_id)
+ {
+ ps_dec->i4_cur_display_seq++;
+ ih264_disp_mgr_add(
+ (disp_mgr_t *)ps_dec->pv_disp_buf_mgr,
+ i4_min_poc_buf_id, ps_dec->i4_cur_display_seq,
+ ps_dec->apv_buf_id_pic_buf_map[i4_min_poc_buf_id]);
+ i4_poc_buf_id_map[i4_min_index][0] = -1;
+ i4_poc_buf_id_map[i4_min_index][1] = 0x7fffffff;
+ ps_dpb_mgr->ai4_poc_buf_id_map[i4_min_index][2] = 0;
+ }
+ else
+ {
+ i4_poc_buf_id_map[i4_min_index][0] = -1;
+ i4_poc_buf_id_map[i4_min_index][1] = 0x7fffffff;
+ ps_dpb_mgr->ai4_poc_buf_id_map[i4_min_index][2] = 0;
+ }
+ }
+ ps_dpb_mgr->i1_poc_buf_id_entries = 0;
+ ps_dec->i4_prev_max_display_seq = ps_dec->i4_prev_max_display_seq
+ + ps_dec->i4_max_poc + ps_dec->u1_max_dec_frame_buffering
+ + 1;
+ ps_dec->i4_max_poc = 0;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_assign_pic_num */
+/* */
+/* Description : This function assigns pic num to each reference frame */
+/* depending on the cur_frame_num as speified in section */
+/* 8.2.4.1 */
+/* */
+/* Inputs : ps_dec */
+/* */
+/* Globals : NO globals used */
+/* */
+/* Processing : for all ST pictures */
+/* if( FrameNum > cur_frame_num) */
+/* PicNum = FrameNum - MaxFrameNum */
+/* else */
+/* PicNum = FrameNum */
+/* */
+/* Returns : void */
+/* */
+/* Issues : NO */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 13 07 2002 Jay Draft */
+/* */
+/*****************************************************************************/
+
+void ih264d_assign_pic_num(dec_struct_t *ps_dec)
+{
+ dpb_manager_t *ps_dpb_mgr;
+ struct dpb_info_t *ps_next_dpb;
+ WORD8 i;
+ WORD32 i4_cur_frame_num, i4_max_frame_num;
+ WORD32 i4_ref_frame_num;
+ UWORD8 u1_fld_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag;
+
+ i4_max_frame_num = ps_dec->ps_cur_sps->u2_u4_max_pic_num_minus1 + 1;
+ i4_cur_frame_num = ps_dec->ps_cur_pic->i4_frame_num;
+ ps_dpb_mgr = ps_dec->ps_dpb_mgr;
+
+ /* Start from ST head */
+ ps_next_dpb = ps_dpb_mgr->ps_dpb_st_head;
+ for(i = 0; i < ps_dpb_mgr->u1_num_st_ref_bufs; i++)
+ {
+ WORD32 i4_pic_num;
+
+ i4_ref_frame_num = ps_next_dpb->ps_pic_buf->i4_frame_num;
+ if(i4_ref_frame_num > i4_cur_frame_num)
+ {
+ /* RefPic Buf frame_num is before Current frame_num in decode order */
+ i4_pic_num = i4_ref_frame_num - i4_max_frame_num;
+ }
+ else
+ {
+ /* RefPic Buf frame_num is after Current frame_num in decode order */
+ i4_pic_num = i4_ref_frame_num;
+ }
+
+ ps_next_dpb->ps_pic_buf->i4_pic_num = i4_pic_num;
+ ps_next_dpb->i4_frame_num = i4_pic_num;
+ ps_next_dpb->ps_pic_buf->u1_long_term_frm_idx = MAX_REF_BUFS + 1;
+ if(u1_fld_pic_flag)
+ {
+ /* Assign the pic num to top fields and bot fields */
+
+ ps_next_dpb->s_top_field.i4_pic_num = i4_pic_num * 2
+ + !(ps_dec->ps_cur_slice->u1_bottom_field_flag);
+ ps_next_dpb->s_bot_field.i4_pic_num = i4_pic_num * 2
+ + ps_dec->ps_cur_slice->u1_bottom_field_flag;
+ }
+ /* Chase the next link */
+ ps_next_dpb = ps_next_dpb->ps_prev_short;
+ }
+
+ if(ps_dec->ps_cur_sps->u1_gaps_in_frame_num_value_allowed_flag
+ && ps_dpb_mgr->u1_num_gaps)
+ {
+ WORD32 i4_start_frm, i4_end_frm;
+ /* Assign pic numbers for gaps */
+ for(i = 0; i < MAX_FRAMES; i++)
+ {
+ i4_start_frm = ps_dpb_mgr->ai4_gaps_start_frm_num[i];
+ if(i4_start_frm != INVALID_FRAME_NUM)
+ {
+ if(i4_start_frm > i4_cur_frame_num)
+ {
+ /* gap's frame_num is before Current frame_num in
+ decode order */
+ i4_start_frm -= i4_max_frame_num;
+ }
+ ps_dpb_mgr->ai4_gaps_start_frm_num[i] = i4_start_frm;
+ i4_end_frm = ps_dpb_mgr->ai4_gaps_end_frm_num[i];
+
+ if(i4_end_frm > i4_cur_frame_num)
+ {
+ /* gap's frame_num is before Current frame_num in
+ decode order */
+ i4_end_frm -= i4_max_frame_num;
+ }
+ ps_dpb_mgr->ai4_gaps_end_frm_num[i] = i4_end_frm;
+ }
+ }
+ }
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_update_qp \endif
+ *
+ * \brief
+ * Updates the values of QP and its related entities
+ *
+ * \return
+ * 0 on Success and Error code otherwise
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_update_qp(dec_struct_t * ps_dec, const WORD8 i1_qp)
+{
+ WORD32 i_temp;
+ i_temp = (ps_dec->u1_qp + i1_qp + 52) % 52;
+
+ if((i_temp < 0) || (i_temp > 51) || (i1_qp < -26) || (i1_qp > 25))
+ return ERROR_INV_RANGE_QP_T;
+
+ ps_dec->u1_qp = i_temp;
+ ps_dec->u1_qp_y_rem6 = ps_dec->u1_qp % 6;
+ ps_dec->u1_qp_y_div6 = ps_dec->u1_qp / 6;
+ i_temp = CLIP3(0, 51, ps_dec->u1_qp + ps_dec->ps_cur_pps->i1_chroma_qp_index_offset);
+ ps_dec->u1_qp_u_rem6 = MOD(gau1_ih264d_qp_scale_cr[12 + i_temp], 6);
+ ps_dec->u1_qp_u_div6 = DIV(gau1_ih264d_qp_scale_cr[12 + i_temp], 6);
+
+ i_temp = CLIP3(0, 51, ps_dec->u1_qp + ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset);
+ ps_dec->u1_qp_v_rem6 = MOD(gau1_ih264d_qp_scale_cr[12 + i_temp], 6);
+ ps_dec->u1_qp_v_div6 = DIV(gau1_ih264d_qp_scale_cr[12 + i_temp], 6);
+
+ ps_dec->pu2_quant_scale_y =
+ gau2_ih264_iquant_scale_4x4[ps_dec->u1_qp_y_rem6];
+ ps_dec->pu2_quant_scale_u =
+ gau2_ih264_iquant_scale_4x4[ps_dec->u1_qp_u_rem6];
+ ps_dec->pu2_quant_scale_v =
+ gau2_ih264_iquant_scale_4x4[ps_dec->u1_qp_v_rem6];
+ return OK;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_decode_gaps_in_frame_num */
+/* */
+/* Description : This function decodes gaps in frame number */
+/* */
+/* Inputs : ps_dec Decoder parameters */
+/* u2_frame_num current frame number */
+/* */
+/* Globals : None */
+/* Processing : This functionality needs to be implemented */
+/* Outputs : None */
+/* Returns : None */
+/* */
+/* Issues : Not implemented */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 05 2002 NS Draft */
+/* */
+/*****************************************************************************/
+WORD32 ih264d_decode_gaps_in_frame_num(dec_struct_t *ps_dec,
+ UWORD16 u2_frame_num)
+{
+ UWORD32 u4_next_frm_num, u4_start_frm_num;
+ UWORD32 u4_max_frm_num;
+ pocstruct_t s_tmp_poc;
+ WORD32 i4_poc;
+ dec_slice_params_t *ps_cur_slice;
+
+ dec_pic_params_t *ps_pic_params;
+ WORD8 i1_gap_idx;
+ WORD32 *i4_gaps_start_frm_num;
+ dpb_manager_t *ps_dpb_mgr;
+ WORD32 i4_frame_gaps;
+ WORD8 *pi1_gaps_per_seq;
+ WORD32 ret;
+
+ ps_cur_slice = ps_dec->ps_cur_slice;
+ if(ps_cur_slice->u1_field_pic_flag)
+ {
+ if(ps_dec->u2_prev_ref_frame_num == u2_frame_num)
+ return 0;
+ }
+
+ u4_next_frm_num = ps_dec->u2_prev_ref_frame_num + 1;
+ u4_max_frm_num = ps_dec->ps_cur_sps->u2_u4_max_pic_num_minus1 + 1;
+
+ // check
+ if(u4_next_frm_num >= u4_max_frm_num)
+ {
+ u4_next_frm_num -= u4_max_frm_num;
+ }
+
+ if(u4_next_frm_num == u2_frame_num)
+ {
+ return (0);
+ }
+
+ // check
+ if((ps_dec->u1_nal_unit_type == IDR_SLICE_NAL)
+ && (u4_next_frm_num >= u2_frame_num))
+ {
+ return (0);
+ }
+ u4_start_frm_num = u4_next_frm_num;
+
+ s_tmp_poc.i4_pic_order_cnt_lsb = 0;
+ s_tmp_poc.i4_delta_pic_order_cnt_bottom = 0;
+ s_tmp_poc.i4_pic_order_cnt_lsb = 0;
+ s_tmp_poc.i4_delta_pic_order_cnt_bottom = 0;
+ s_tmp_poc.i4_delta_pic_order_cnt[0] = 0;
+ s_tmp_poc.i4_delta_pic_order_cnt[1] = 0;
+
+ ps_cur_slice = ps_dec->ps_cur_slice;
+ ps_pic_params = ps_dec->ps_cur_pps;
+ ps_cur_slice->u1_field_pic_flag = 0;
+
+ i4_frame_gaps = 0;
+ ps_dpb_mgr = ps_dec->ps_dpb_mgr;
+
+ /* Find a empty slot to store gap seqn info */
+ i4_gaps_start_frm_num = ps_dpb_mgr->ai4_gaps_start_frm_num;
+ for(i1_gap_idx = 0; i1_gap_idx < MAX_FRAMES; i1_gap_idx++)
+ {
+ if(INVALID_FRAME_NUM == i4_gaps_start_frm_num[i1_gap_idx])
+ break;
+ }
+ if(MAX_FRAMES == i1_gap_idx)
+ {
+ UWORD32 i4_error_code;
+ i4_error_code = ERROR_DBP_MANAGER_T;
+// i4_error_code |= 1<<IVD_CORRUPTEDDATA;
+ return i4_error_code;
+ }
+
+ i4_poc = 0;
+ i4_gaps_start_frm_num[i1_gap_idx] = u4_start_frm_num;
+ ps_dpb_mgr->ai4_gaps_end_frm_num[i1_gap_idx] = u2_frame_num - 1;
+ pi1_gaps_per_seq = ps_dpb_mgr->ai1_gaps_per_seq;
+ pi1_gaps_per_seq[i1_gap_idx] = 0;
+ while(u4_next_frm_num != u2_frame_num)
+ {
+ ih264d_delete_nonref_nondisplay_pics(ps_dpb_mgr);
+ if(ps_pic_params->ps_sps->u1_pic_order_cnt_type)
+ {
+ /* allocate a picture buffer and insert it as ST node */
+ ret = ih264d_decode_pic_order_cnt(0, u4_next_frm_num,
+ &ps_dec->s_prev_pic_poc,
+ &s_tmp_poc, ps_cur_slice,
+ ps_pic_params, 1, 0, 0,
+ &i4_poc);
+ if(ret != OK)
+ return ret;
+
+ /* Display seq no calculations */
+ if(i4_poc >= ps_dec->i4_max_poc)
+ ps_dec->i4_max_poc = i4_poc;
+ /* IDR Picture or POC wrap around */
+ if(i4_poc == 0)
+ {
+ ps_dec->i4_prev_max_display_seq =
+ ps_dec->i4_prev_max_display_seq
+ + ps_dec->i4_max_poc
+ + ps_dec->u1_max_dec_frame_buffering
+ + 1;
+ ps_dec->i4_max_poc = 0;
+ }
+
+ ps_cur_slice->u1_mmco_equalto5 = 0;
+ ps_cur_slice->u2_frame_num = u4_next_frm_num;
+ }
+
+ // check
+ if(ps_dpb_mgr->i1_poc_buf_id_entries
+ >= ps_dec->u1_max_dec_frame_buffering)
+ {
+ ret = ih264d_assign_display_seq(ps_dec);
+ if(ret != OK)
+ return ret;
+ }
+
+ ret = ih264d_insert_pic_in_display_list(
+ ps_dec->ps_dpb_mgr, (WORD8) DO_NOT_DISP,
+ (WORD32)(ps_dec->i4_prev_max_display_seq + i4_poc),
+ u4_next_frm_num);
+ if(ret != OK)
+ return ret;
+
+ pi1_gaps_per_seq[i1_gap_idx]++;
+ ret = ih264d_do_mmco_for_gaps(ps_dpb_mgr,
+ ps_dec->ps_cur_sps->u1_num_ref_frames);
+ if(ret != OK)
+ return ret;
+
+ ih264d_delete_nonref_nondisplay_pics(ps_dpb_mgr);
+
+ u4_next_frm_num++;
+ if(u4_next_frm_num >= u4_max_frm_num)
+ {
+ u4_next_frm_num -= u4_max_frm_num;
+ }
+
+ i4_frame_gaps++;
+ }
+
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_create_pic_buffers \endif
+ *
+ * \brief
+ * This function creates Picture Buffers.
+ *
+ * \return
+ * 0 on Success and -1 on error
+ **************************************************************************
+ */
+WORD32 ih264d_create_pic_buffers(UWORD8 u1_num_of_buf,
+ dec_struct_t *ps_dec)
+{
+ struct pic_buffer_t *ps_pic_buf;
+ UWORD8 i;
+ UWORD32 u4_luma_size, u4_chroma_size;
+ UWORD8 u1_frm = ps_dec->ps_cur_sps->u1_frame_mbs_only_flag;
+ WORD32 j;
+ UWORD32 u4_pic_buf_mem_used, u4_ref_buf_mem_used;
+ UWORD8 *pu1_pic_buf_mem_base, *pu1_ref_buf_mem_base;
+
+ u4_pic_buf_mem_used = 0;
+ pu1_pic_buf_mem_base = ps_dec->ps_mem_tab[MEM_REC_PIC_BUF_MGR].pv_base;
+
+ ps_dec->pv_disp_buf_mgr = (void *)(pu1_pic_buf_mem_base
+ + u4_pic_buf_mem_used);
+ u4_pic_buf_mem_used += sizeof(disp_mgr_t);
+ ih264_disp_mgr_init((disp_mgr_t *)ps_dec->pv_disp_buf_mgr);
+
+ ps_dec->pv_pic_buf_mgr =
+ (void *)(pu1_pic_buf_mem_base + u4_pic_buf_mem_used);
+ u4_pic_buf_mem_used += sizeof(buf_mgr_t) + ithread_get_mutex_lock_size();
+ ih264_buf_mgr_init((buf_mgr_t *)ps_dec->pv_pic_buf_mgr);
+
+ ps_pic_buf = (pic_buffer_t *)(pu1_pic_buf_mem_base + u4_pic_buf_mem_used);
+ u4_pic_buf_mem_used += sizeof(struct pic_buffer_t)
+ * (H264_MAX_REF_PICS * 2);
+
+ u4_luma_size = ps_dec->u2_frm_wd_y * ps_dec->u2_frm_ht_y;
+ u4_chroma_size = ps_dec->u2_frm_wd_uv * ps_dec->u2_frm_ht_uv;
+
+ {
+ if(ps_dec->u4_share_disp_buf == 1)
+ {
+ /* In case of buffers getting shared between application and library
+ there is no need of reference memtabs. Instead of setting the i4_size
+ to zero, it is reduced to a small i4_size to ensure that changes
+ in the code are minimal */
+ if((ps_dec->u1_chroma_format == IV_YUV_420SP_UV)
+ || (ps_dec->u1_chroma_format == IV_YUV_420SP_VU)
+ || (ps_dec->u1_chroma_format == IV_YUV_420P))
+ {
+ u4_luma_size = 64;
+ }
+
+ if(ps_dec->u1_chroma_format == IV_YUV_420SP_UV)
+
+ {
+ u4_chroma_size = 64;
+ }
+
+ }
+ }
+
+ pu1_ref_buf_mem_base = ps_dec->ps_mem_tab[MEM_REC_REF_PIC].pv_base;
+ u4_ref_buf_mem_used = 0;
+
+ /* Allocate memory for refernce buffers */
+ for(i = 0; i < u1_num_of_buf; i++)
+ {
+ UWORD32 u4_offset;
+ WORD32 buf_ret;
+ UWORD8 *pu1_luma, *pu1_chroma;
+
+ pu1_luma = pu1_ref_buf_mem_base + u4_ref_buf_mem_used;
+ u4_ref_buf_mem_used += u4_luma_size;
+ pu1_chroma = pu1_ref_buf_mem_base + u4_ref_buf_mem_used;
+ u4_ref_buf_mem_used += u4_chroma_size;
+
+ /* Offset to the start of the pic from the top left corner of the frame
+ buffer */
+
+ if((0 == ps_dec->u4_share_disp_buf)
+ || (NULL == ps_dec->disp_bufs[i].buf[0]))
+ {
+ UWORD32 pad_len_h, pad_len_v;
+
+ u4_offset = ps_dec->u2_frm_wd_y * (PAD_LEN_Y_V << 1) + PAD_LEN_Y_H;
+ ps_pic_buf->pu1_buf1 = (UWORD8 *)(pu1_luma) + u4_offset;
+
+ pad_len_h = MAX(PAD_LEN_UV_H, (PAD_LEN_Y_H >> 1));
+ pad_len_v = MAX(PAD_LEN_UV_V, PAD_LEN_Y_V);
+
+ u4_offset = ps_dec->u2_frm_wd_uv * pad_len_v + pad_len_h;
+
+ ps_pic_buf->pu1_buf2 = (UWORD8 *)(pu1_chroma) + u4_offset;
+ ps_pic_buf->pu1_buf3 = (UWORD8 *)(NULL) + u4_offset;
+
+ }
+ else
+ {
+ UWORD32 pad_len_h, pad_len_v;
+ u4_offset = ps_dec->u2_frm_wd_y * (PAD_LEN_Y_V << 1) + PAD_LEN_Y_H;
+ ps_pic_buf->pu1_buf1 = (UWORD8 *)ps_dec->disp_bufs[i].buf[0]
+ + u4_offset;
+
+ ps_dec->disp_bufs[i].u4_ofst[0] = u4_offset;
+
+ if(ps_dec->u1_chroma_format == IV_YUV_420P)
+ {
+ pad_len_h = MAX(PAD_LEN_UV_H * YUV420SP_FACTOR,
+ (PAD_LEN_Y_H >> 1));
+ pad_len_v = MAX(PAD_LEN_UV_V, PAD_LEN_Y_V);
+
+ u4_offset = ps_dec->u2_frm_wd_uv * pad_len_v + pad_len_h;
+ ps_pic_buf->pu1_buf2 = (UWORD8 *)(pu1_chroma) + u4_offset;
+ ps_pic_buf->pu1_buf3 = (UWORD8 *)(NULL) + u4_offset;
+
+ ps_dec->disp_bufs[i].u4_ofst[1] = u4_offset;
+ ps_dec->disp_bufs[i].u4_ofst[2] = u4_offset;
+
+ }
+ else
+ {
+ pad_len_h = MAX(PAD_LEN_UV_H * YUV420SP_FACTOR,
+ (PAD_LEN_Y_H >> 1));
+ pad_len_v = MAX(PAD_LEN_UV_V, PAD_LEN_Y_V);
+
+ u4_offset = ps_dec->u2_frm_wd_uv * pad_len_v + pad_len_h;
+ ps_pic_buf->pu1_buf2 = (UWORD8 *)(ps_dec->disp_bufs[i].buf[1])
+ + u4_offset;
+ ps_pic_buf->pu1_buf3 = (UWORD8 *)(ps_dec->disp_bufs[i].buf[1])
+ + u4_offset;
+
+ ps_dec->disp_bufs[i].u4_ofst[1] = u4_offset;
+ ps_dec->disp_bufs[i].u4_ofst[2] = u4_offset;
+
+ }
+
+ }
+
+ ps_pic_buf->u2_frm_ht_y = ps_dec->u2_frm_ht_y;
+ ps_pic_buf->u2_frm_ht_uv = ps_dec->u2_frm_ht_uv;
+ ps_pic_buf->u2_frm_wd_y = ps_dec->u2_frm_wd_y;
+ ps_pic_buf->u2_frm_wd_uv = ps_dec->u2_frm_wd_uv;
+
+ ps_pic_buf->u1_pic_buf_id = i;
+
+ buf_ret = ih264_buf_mgr_add((buf_mgr_t *)ps_dec->pv_pic_buf_mgr,
+ ps_pic_buf, i);
+ if(0 != buf_ret)
+ {
+ ps_dec->i4_error_code = ERROR_BUF_MGR;
+ return ERROR_BUF_MGR;
+ }
+
+ ps_dec->apv_buf_id_pic_buf_map[i] = (void *)ps_pic_buf;
+ ps_pic_buf++;
+ }
+
+ if((u4_ref_buf_mem_used > ps_dec->ps_mem_tab[MEM_REC_REF_PIC].u4_mem_size) ||
+ (u4_pic_buf_mem_used > ps_dec->ps_mem_tab[MEM_REC_PIC_BUF_MGR].u4_mem_size))
+ {
+ ps_dec->i4_error_code = ERROR_BUF_MGR;
+ return ERROR_BUF_MGR;
+ }
+
+ if(1 == ps_dec->u4_share_disp_buf)
+ {
+ for(i = 0; i < u1_num_of_buf; i++)
+ ps_dec->u4_disp_buf_mapping[i] = 1;
+ }
+ return OK;
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_get_memory_dec_params \endif
+ *
+ * \brief
+ * This function allocates memory required by Decoder.
+ *
+ * \param ps_dec: Pointer to dec_struct_t.
+ *
+ * \return
+ * Returns i4_status as returned by MemManager.
+ *
+ **************************************************************************
+ */
+//WORD16 i16_res_coeff[2 * 3600 * (MB_LUM_SIZE + 2 * MB_CHROM_SIZE)];
+//pred_info_t s_pred_frame[4000 * 60];
+//pred_info_t *ps_pred_frame;
+
+WORD16 ih264d_get_memory_dec_params(dec_struct_t * ps_dec)
+{
+ struct MemReq s_MemReq;
+ struct MemBlock *p_MemBlock;
+
+ pred_info_t *ps_pred_frame;
+ dec_mb_info_t *ps_frm_mb_info;
+ dec_slice_struct_t *ps_dec_slice_buf;
+ UWORD8 *pu1_dec_mb_map, *pu1_recon_mb_map;
+ UWORD16 *pu2_slice_num_map;
+
+ WORD16 *pi16_res_coeff;
+ WORD16 i16_status = 0;
+ UWORD8 uc_frmOrFld = (1 - ps_dec->ps_cur_sps->u1_frame_mbs_only_flag);
+ UWORD16 u4_luma_wd = ps_dec->u2_frm_wd_y;
+ UWORD16 u4_chroma_wd = ps_dec->u2_frm_wd_uv;
+ WORD8 c_i = 0;
+ dec_seq_params_t *ps_sps = ps_dec->ps_cur_sps;
+ UWORD32 u4_total_mbs = ps_sps->u2_total_num_of_mbs << uc_frmOrFld;
+ UWORD32 u4_wd_mbs = ps_dec->u2_frm_wd_in_mbs;
+ UWORD32 u4_ht_mbs = ps_dec->u2_frm_ht_in_mbs;
+ UWORD32 u4_blk_wd;
+ UWORD32 ui_size = 0;
+ UWORD32 u4_int_scratch_size = 0, u4_ref_pred_size = 0;
+ UWORD8 *pu1_buf;
+
+ ps_dec->ps_deblk_pic = ps_dec->ps_mem_tab[MEM_REC_DEBLK_MB_INFO].pv_base;
+
+ ps_dec->pu1_dec_mb_map = ps_dec->ps_mem_tab[MEM_REC_PARSE_MAP].pv_base;
+
+ ps_dec->pu1_recon_mb_map = ps_dec->ps_mem_tab[MEM_REC_PROC_MAP].pv_base;
+
+ ps_dec->pu2_slice_num_map =
+ ps_dec->ps_mem_tab[MEM_REC_SLICE_NUM_MAP].pv_base;
+
+ ps_dec->ps_dec_slice_buf = ps_dec->ps_mem_tab[MEM_REC_SLICE_HDR].pv_base;
+ pu1_buf = (UWORD8 *)ps_dec->ps_dec_slice_buf;
+ pu1_buf += sizeof(dec_slice_struct_t) * u4_total_mbs;
+ ps_dec->pv_map_ref_idx_to_poc_buf = (void *)pu1_buf;
+
+ ps_dec->ps_frm_mb_info = ps_dec->ps_mem_tab[MEM_REC_MB_INFO].pv_base;
+ memset(ps_dec->ps_frm_mb_info, 0, ps_dec->ps_mem_tab[MEM_REC_MB_INFO].u4_mem_size);
+
+ ps_dec->ps_pred = ps_dec->ps_mem_tab[MEM_REC_PRED_INFO].pv_base;
+
+ ps_dec->pi2_coeff_data = ps_dec->ps_mem_tab[MEM_REC_COEFF_DATA].pv_base;
+
+ ps_dec->pv_pic_tu_coeff_data = (void *)(ps_dec->pi2_coeff_data + MB_LUM_SIZE);
+
+ /*scratch memory allocations*/
+ {
+ UWORD8 *pu1_scratch_mem_base;
+ UWORD32 u4_scratch_mem_used;
+
+ pu1_scratch_mem_base =
+ ps_dec->ps_mem_tab[MEM_REC_INTERNAL_SCRATCH].pv_base;
+ u4_scratch_mem_used = 0;
+
+ ps_dec->ppv_map_ref_idx_to_poc = (void *)(pu1_scratch_mem_base
+ + u4_scratch_mem_used);
+ u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used);
+ u4_scratch_mem_used += ((TOTAL_LIST_ENTRIES + PAD_MAP_IDX_POC)
+ * sizeof(void *));
+ u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used);
+ memset(ps_dec->ppv_map_ref_idx_to_poc, 0, (TOTAL_LIST_ENTRIES + PAD_MAP_IDX_POC)
+ * sizeof(void *));
+
+ ps_dec->p_cabac_ctxt_table_t = (void *)(pu1_scratch_mem_base
+ + u4_scratch_mem_used);
+ u4_scratch_mem_used += (sizeof(bin_ctxt_model_t) * NUM_CABAC_CTXTS);
+ u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used);
+
+ ps_dec->ps_left_mb_ctxt_info = (void *)(pu1_scratch_mem_base
+ + u4_scratch_mem_used);
+ u4_scratch_mem_used += sizeof(ctxt_inc_mb_info_t);
+ u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used);
+
+ ps_dec->pu4_defI_wts_ofsts = (void *)(pu1_scratch_mem_base
+ + u4_scratch_mem_used);
+ u4_scratch_mem_used +=
+ sizeof(UWORD32)
+ * (ps_sps->u1_num_ref_frames
+ * ps_sps->u1_num_ref_frames);
+ u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used);
+
+ ps_dec->pu1_ref_buff = (void *)(pu1_scratch_mem_base
+ + u4_scratch_mem_used);
+ u4_scratch_mem_used += MAX_REF_BUF_SIZE;
+ u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used);
+ ps_dec->pi2_pred1 =
+ (void *)(pu1_scratch_mem_base + u4_scratch_mem_used);
+ u4_scratch_mem_used += ((sizeof(WORD16)) * PRED_BUFFER_WIDTH
+ * PRED_BUFFER_HEIGHT);
+ u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used);
+
+ ps_dec->pu1_temp_mc_buffer = (void *)(pu1_scratch_mem_base
+ + u4_scratch_mem_used);
+ u4_scratch_mem_used += sizeof(UWORD8) * (MB_LUM_SIZE);
+
+ ps_dec->ps_parse_mb_data = (void *)(pu1_scratch_mem_base
+ + u4_scratch_mem_used);
+ u4_scratch_mem_used += sizeof(parse_pmbarams_t)
+ * (ps_dec->u1_recon_mb_grp);
+ u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used);
+
+ ps_dec->ps_parse_part_params = (void *)(pu1_scratch_mem_base
+ + u4_scratch_mem_used);
+ u4_scratch_mem_used += sizeof(parse_part_params_t)
+ * ((ps_dec->u1_recon_mb_grp) << 4);
+ u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used);
+
+ ps_dec->ps_dpb_mgr->ps_init_dpb[0][0] =
+ (struct pic_buffer_t*)(pu1_scratch_mem_base
+ + u4_scratch_mem_used);
+ u4_scratch_mem_used += 2 * MAX_REF_BUFS * sizeof(struct pic_buffer_t);
+
+ u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used);
+ ps_dec->ps_dpb_mgr->ps_init_dpb[1][0] =
+ (struct pic_buffer_t*)(pu1_scratch_mem_base + u4_scratch_mem_used);
+ u4_scratch_mem_used += 2 * MAX_REF_BUFS * sizeof(struct pic_buffer_t);
+ u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used);
+ ps_dec->pu4_mbaff_wt_mat = (UWORD32 *)(pu1_scratch_mem_base + u4_scratch_mem_used);
+
+ u4_scratch_mem_used += (sizeof(UWORD32) * 3
+ * (MAX_FRAMES * MAX_FRAMES))
+ << 3;
+ u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used);
+
+ ps_dec->pu4_wts_ofsts_mat = (UWORD32 *)(pu1_scratch_mem_base + u4_scratch_mem_used);
+ u4_scratch_mem_used += sizeof(UWORD32) * 2 * 3
+ * (MAX_FRAMES * MAX_FRAMES);
+ u4_scratch_mem_used = ALIGN64(u4_scratch_mem_used);
+ }
+ /********************************************************************/
+ /* check whether deblk memory used is less than the scratch buffer */
+ /* and assign deblocking pointers in the the reference buffers */
+ /********************************************************************/
+ {
+ /************************************************************/
+ /* Post allocation Initialisations */
+ /************************************************************/
+ memset(ps_dec->ppv_map_ref_idx_to_poc, 0,
+ (TOTAL_LIST_ENTRIES + PAD_MAP_IDX_POC) * sizeof(void *));
+ ps_dec->ppv_map_ref_idx_to_poc += OFFSET_MAP_IDX_POC;
+
+ {
+ UWORD32 u4_ref_size;
+ u4_ref_size = MAX_REF_BUF_SIZE;
+
+ {
+
+ ps_dec->ps_parse_cur_slice = &(ps_dec->ps_dec_slice_buf[0]);
+ ps_dec->ps_decode_cur_slice = &(ps_dec->ps_dec_slice_buf[0]);
+ ps_dec->ps_computebs_cur_slice = &(ps_dec->ps_dec_slice_buf[0]);
+ ps_dec->ps_parse_cur_slice->slice_header_done = 0;
+
+ ps_dec->ps_pred_start = ps_dec->ps_pred;
+ ps_dec->u4_ref_buf_size = u4_ref_size;
+ }
+ }
+
+ {
+ UWORD8 i;
+ struct pic_buffer_t *ps_init_dpb;
+ ps_init_dpb = ps_dec->ps_dpb_mgr->ps_init_dpb[0][0];
+ for(i = 0; i < 2 * MAX_REF_BUFS; i++)
+ {
+ ps_init_dpb->pu1_buf1 = NULL;
+ ps_init_dpb->u1_long_term_frm_idx = MAX_REF_BUFS + 1;
+ ps_dec->ps_dpb_mgr->ps_init_dpb[0][i] = ps_init_dpb;
+ ps_dec->ps_dpb_mgr->ps_mod_dpb[0][i] = ps_init_dpb;
+ ps_init_dpb++;
+ }
+
+ ps_init_dpb = ps_dec->ps_dpb_mgr->ps_init_dpb[1][0];
+ for(i = 0; i < 2 * MAX_REF_BUFS; i++)
+ {
+ ps_init_dpb->pu1_buf1 = NULL;
+ ps_init_dpb->u1_long_term_frm_idx = MAX_REF_BUFS + 1;
+ ps_dec->ps_dpb_mgr->ps_init_dpb[1][i] = ps_init_dpb;
+ ps_dec->ps_dpb_mgr->ps_mod_dpb[1][i] = ps_init_dpb;
+ ps_init_dpb++;
+ }
+ }
+ }
+
+ /*persistent memory allocations*/
+
+ {
+ UWORD8 *pu1_persitent_mem_base;
+ UWORD32 u4_persistent_mem_used;
+
+ pu1_persitent_mem_base =
+ ps_dec->ps_mem_tab[MEM_REC_INTERNAL_PERSIST].pv_base;
+ u4_persistent_mem_used = 0;
+
+ ps_dec->ps_deblk_top_mb = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += ((u4_wd_mbs
+ * sizeof(deblkmb_neighbour_t)) << uc_frmOrFld);
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+
+ ps_dec->ps_left_mvpred_addr = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += (sizeof(neighbouradd_t) << 2);
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+
+ ps_dec->p_ctxt_inc_mb_map = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += ((sizeof(ctxt_inc_mb_info_t))
+ * (((u4_wd_mbs + 1) << uc_frmOrFld) + 1));
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+
+ ps_dec->ps_mv_p[0] = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += (sizeof(mv_pred_t) * ps_dec->u1_recon_mb_grp
+ * 16);
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+
+ ps_dec->ps_mv_p[1] = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += (sizeof(mv_pred_t) * ps_dec->u1_recon_mb_grp
+ * 16);
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+
+ {
+ UWORD8 i;
+ for(i = 0; i < MV_SCRATCH_BUFS; i++)
+ {
+
+ ps_dec->ps_mv_top_p[i] = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += (sizeof(mv_pred_t)
+ * ps_dec->u1_recon_mb_grp * 4);
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+
+ }
+ }
+
+ {
+ UWORD32 u4_numRows = MB_SIZE << 1;
+
+ /* Allocate memory for ping, pong and left reconstruction buffers */
+ u4_blk_wd = ((ps_dec->u1_recon_mb_grp << 4) >> 1) + 8;
+
+ ps_dec->pu1_y_scratch[0] = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd;
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+
+ ps_dec->pu1_y_scratch[1] = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd;
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+
+ u4_numRows = BLK8x8SIZE << 1;
+ u4_blk_wd = ((ps_dec->u1_recon_mb_grp << 3) >> 1) + 8;
+
+ ps_dec->pu1_u_scratch[0] = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd;
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+
+ ps_dec->pu1_v_scratch[0] = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd;
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+
+ ps_dec->pu1_u_scratch[1] = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd;
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+
+ ps_dec->pu1_v_scratch[1] = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += sizeof(UWORD8) * u4_numRows * u4_blk_wd;
+ u4_persistent_mem_used += 32;
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+
+ }
+
+ ps_dec->pu1_y_intra_pred_line = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += sizeof(UWORD8) * (u4_luma_wd + 16) * 2;
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+
+ ps_dec->pu1_u_intra_pred_line = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += sizeof(UWORD8) * (u4_chroma_wd + 16) * 2
+ * YUV420SP_FACTOR;
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+
+ ps_dec->pu1_v_intra_pred_line = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += sizeof(UWORD8) * (u4_chroma_wd + 16) * 2;
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+
+ ps_dec->ps_nbr_mb_row = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ if(ps_dec->u1_separate_parse)
+ {
+ u4_persistent_mem_used += sizeof(mb_neigbour_params_t)
+ * ((u4_wd_mbs + 1) * u4_ht_mbs);
+ memset(ps_dec->ps_nbr_mb_row, 0, sizeof(mb_neigbour_params_t)
+ * ((u4_wd_mbs + 1) * u4_ht_mbs));
+ }
+ else
+ {
+ u4_persistent_mem_used += sizeof(mb_neigbour_params_t)
+ * ((u4_wd_mbs + 1) << uc_frmOrFld);
+ memset(ps_dec->ps_nbr_mb_row, 0, sizeof(mb_neigbour_params_t)
+ * ((u4_wd_mbs + 1) << uc_frmOrFld));
+
+ }
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+ ps_dec->s_pad_mgr.pu1_row_y = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += ps_dec->u2_frm_wd_y;
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+
+ ps_dec->s_pad_mgr.pu1_row_u = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += ps_dec->u2_frm_wd_uv;
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+
+ ps_dec->s_pad_mgr.pu1_row_v = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += ps_dec->u2_frm_wd_uv;
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+
+ ps_dec->s_pad_mgr.pu1_mb_y = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += ((MB_SIZE + 4) << uc_frmOrFld) * PAD_LEN_Y_H;
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+
+ ps_dec->s_pad_mgr.pu1_mb_u = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += ((BLK8x8SIZE + 2) << uc_frmOrFld)
+ * PAD_LEN_UV_H;
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+
+ ps_dec->s_pad_mgr.pu1_mb_v = (void *)(pu1_persitent_mem_base
+ + u4_persistent_mem_used);
+ u4_persistent_mem_used += ((BLK8x8SIZE + 2) << uc_frmOrFld)
+ * PAD_LEN_UV_H;
+ u4_persistent_mem_used = ALIGN64(u4_persistent_mem_used);
+ }
+
+ /*Post allocation initializations*/
+ memset(ps_dec->pu1_y_intra_pred_line, 0,
+ sizeof(UWORD8) * u4_luma_wd + PAD_LEN_Y_H);
+ memset(ps_dec->pu1_u_intra_pred_line, 0,
+ sizeof(UWORD8) * u4_chroma_wd + PAD_LEN_UV_H);
+ memset(ps_dec->pu1_v_intra_pred_line, 0,
+ sizeof(UWORD8) * u4_chroma_wd + PAD_LEN_UV_H);
+
+ /* 0th entry of CtxtIncMbMap will be always be containing default values
+ for CABAC context representing MB not available */
+ ps_dec->p_ctxt_inc_mb_map += 1;
+ /* Post allocation Increment Actions */
+
+ /***************************************************************************/
+ /*Initialize cabac context pointers for every SE that has fixed contextIdx */
+ /***************************************************************************/
+ {
+ bin_ctxt_model_t * const p_cabac_ctxt_table_t =
+ ps_dec->p_cabac_ctxt_table_t;
+ bin_ctxt_model_t * * p_coeff_abs_level_minus1_t =
+ ps_dec->p_coeff_abs_level_minus1_t;
+ bin_ctxt_model_t * * p_cbf_t = ps_dec->p_cbf_t;
+
+ ps_dec->p_mb_field_dec_flag_t = p_cabac_ctxt_table_t
+ + MB_FIELD_DECODING_FLAG;
+ ps_dec->p_prev_intra4x4_pred_mode_flag_t = p_cabac_ctxt_table_t
+ + PREV_INTRA4X4_PRED_MODE_FLAG;
+ ps_dec->p_rem_intra4x4_pred_mode_t = p_cabac_ctxt_table_t
+ + REM_INTRA4X4_PRED_MODE;
+ ps_dec->p_intra_chroma_pred_mode_t = p_cabac_ctxt_table_t
+ + INTRA_CHROMA_PRED_MODE;
+ ps_dec->p_mb_qp_delta_t = p_cabac_ctxt_table_t + MB_QP_DELTA;
+ ps_dec->p_ref_idx_t = p_cabac_ctxt_table_t + REF_IDX;
+ ps_dec->p_mvd_x_t = p_cabac_ctxt_table_t + MVD_X;
+ ps_dec->p_mvd_y_t = p_cabac_ctxt_table_t + MVD_Y;
+ p_cbf_t[0] = p_cabac_ctxt_table_t + CBF + 0;
+ p_cbf_t[1] = p_cabac_ctxt_table_t + CBF + 4;
+ p_cbf_t[2] = p_cabac_ctxt_table_t + CBF + 8;
+ p_cbf_t[3] = p_cabac_ctxt_table_t + CBF + 12;
+ p_cbf_t[4] = p_cabac_ctxt_table_t + CBF + 16;
+ ps_dec->p_cbp_luma_t = p_cabac_ctxt_table_t + CBP_LUMA;
+ ps_dec->p_cbp_chroma_t = p_cabac_ctxt_table_t + CBP_CHROMA;
+
+ p_coeff_abs_level_minus1_t[LUMA_DC_CTXCAT] = p_cabac_ctxt_table_t
+ + COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_0_OFFSET;
+
+ p_coeff_abs_level_minus1_t[LUMA_AC_CTXCAT] = p_cabac_ctxt_table_t
+ + COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_1_OFFSET;
+
+ p_coeff_abs_level_minus1_t[LUMA_4X4_CTXCAT] = p_cabac_ctxt_table_t
+ + COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_2_OFFSET;
+
+ p_coeff_abs_level_minus1_t[CHROMA_DC_CTXCAT] = p_cabac_ctxt_table_t
+ + COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_3_OFFSET;
+
+ p_coeff_abs_level_minus1_t[CHROMA_AC_CTXCAT] = p_cabac_ctxt_table_t
+ + COEFF_ABS_LEVEL_MINUS1 + COEFF_ABS_LEVEL_CAT_4_OFFSET;
+
+ p_coeff_abs_level_minus1_t[LUMA_8X8_CTXCAT] = p_cabac_ctxt_table_t
+ + COEFF_ABS_LEVEL_MINUS1_8X8
+ + COEFF_ABS_LEVEL_CAT_5_OFFSET;
+
+ /********************************************************/
+ /* context for the high profile related syntax elements */
+ /* This is maintained seperately in s_high_profile */
+ /********************************************************/
+ {
+
+ ps_dec->s_high_profile.ps_transform8x8_flag = p_cabac_ctxt_table_t
+ + TRANSFORM_SIZE_8X8_FLAG;
+
+ ps_dec->s_high_profile.ps_sigcoeff_8x8_frame = p_cabac_ctxt_table_t
+ + SIGNIFICANT_COEFF_FLAG_8X8_FRAME;
+
+ ps_dec->s_high_profile.ps_last_sigcoeff_8x8_frame =
+ p_cabac_ctxt_table_t
+ + LAST_SIGNIFICANT_COEFF_FLAG_8X8_FRAME;
+
+ ps_dec->s_high_profile.ps_coeff_abs_levelminus1 =
+ p_cabac_ctxt_table_t + COEFF_ABS_LEVEL_MINUS1_8X8;
+
+ ps_dec->s_high_profile.ps_sigcoeff_8x8_field = p_cabac_ctxt_table_t
+ + SIGNIFICANT_COEFF_FLAG_8X8_FIELD;
+
+ ps_dec->s_high_profile.ps_last_sigcoeff_8x8_field =
+ p_cabac_ctxt_table_t
+ + LAST_SIGNIFICANT_COEFF_FLAG_8X8_FIELD;
+
+ }
+
+ }
+ return (i16_status);
+}
+
+/*!
+ **************************************************************************
+ * \if Function name : ih264d_create_mv_bank \endif
+ *
+ * \brief
+ * This function creates MV bank.
+ *
+ * \param memType : Type of memory being handled
+ * 0: Display Buffer
+ * 1: Decoder Buffer
+ * 2: Internal Buffer
+ * \param u1_num_of_buf: Number of decode or display buffers.
+ * \param u4_wd : Frame width.
+ * \param u4_ht : Frame Height.
+ * \param ps_pic_buf_api : Pointer to Picture Buffer API.
+ * \param ih264d_dec_mem_manager : Memory manager utility supplied by system.
+ *
+ * \return
+ * 0 on Success and -1 on error
+ *
+ **************************************************************************
+ */
+WORD32 ih264d_create_mv_bank(void *pv_dec,
+ UWORD32 ui_width,
+ UWORD32 ui_height)
+{
+ UWORD8 i;
+ UWORD32 col_flag_buffer_size, mvpred_buffer_size;
+ UWORD8 *pu1_mv_buf_mgr_base, *pu1_mv_bank_base;
+ UWORD32 u4_mv_buf_mgr_mem_used, u4_mv_bank_mem_used;
+ col_mv_buf_t *ps_col_mv;
+ mv_pred_t *ps_mv;
+ UWORD8 *pu1_col_zero_flag_buf;
+ dec_struct_t *ps_dec = (dec_struct_t *)pv_dec;
+ WORD32 buf_ret;
+
+ pu1_mv_buf_mgr_base = ps_dec->ps_mem_tab[MEM_REC_MV_BUF_MGR].pv_base;
+ u4_mv_buf_mgr_mem_used = 0;
+ col_flag_buffer_size = ((ui_width * ui_height) >> 4);
+
+ pu1_mv_bank_base = ps_dec->ps_mem_tab[MEM_REC_MVBANK].pv_base;
+ u4_mv_bank_mem_used = 0;
+ mvpred_buffer_size = sizeof(mv_pred_t)
+ * ((ui_width * (ui_height + PAD_MV_BANK_ROW)) >> 4);
+
+ ps_dec->pv_mv_buf_mgr = (void *)(pu1_mv_buf_mgr_base + u4_mv_buf_mgr_mem_used);
+ u4_mv_buf_mgr_mem_used += sizeof(buf_mgr_t) + ithread_get_mutex_lock_size();
+ ih264_buf_mgr_init((buf_mgr_t *)ps_dec->pv_mv_buf_mgr);
+
+ ps_col_mv = (col_mv_buf_t *)(pu1_mv_buf_mgr_base + u4_mv_buf_mgr_mem_used);
+ u4_mv_buf_mgr_mem_used += sizeof(col_mv_buf_t) * (H264_MAX_REF_PICS * 2);
+ u4_mv_buf_mgr_mem_used = ALIGN128(u4_mv_buf_mgr_mem_used);
+
+ for(i = 0 ; i < ps_dec->u1_max_dec_frame_buffering + 1; i++)
+ {
+ pu1_col_zero_flag_buf = pu1_mv_buf_mgr_base + u4_mv_buf_mgr_mem_used;
+ u4_mv_buf_mgr_mem_used += col_flag_buffer_size;
+
+ ps_mv = (mv_pred_t *)(pu1_mv_bank_base + u4_mv_bank_mem_used);
+ u4_mv_bank_mem_used += mvpred_buffer_size;
+
+ memset(ps_mv, 0, ((ui_width*OFFSET_MV_BANK_ROW) >> 4) * sizeof(mv_pred_t));
+ ps_mv += (ui_width*OFFSET_MV_BANK_ROW) >> 4;
+
+ ps_col_mv->pv_col_zero_flag = (void *)pu1_col_zero_flag_buf;
+ ps_col_mv->pv_mv = (void *)ps_mv;
+ buf_ret = ih264_buf_mgr_add((buf_mgr_t *)ps_dec->pv_mv_buf_mgr, ps_col_mv, i);
+ if(0 != buf_ret)
+ {
+ ps_dec->i4_error_code = ERROR_BUF_MGR;
+ return ERROR_BUF_MGR;
+ }
+ ps_col_mv++;
+ }
+
+ if((u4_mv_buf_mgr_mem_used > ps_dec->ps_mem_tab[MEM_REC_MV_BUF_MGR].u4_mem_size) ||
+ (u4_mv_bank_mem_used > ps_dec->ps_mem_tab[MEM_REC_MVBANK].u4_mem_size))
+ {
+ ps_dec->i4_error_code = ERROR_BUF_MGR;
+ return ERROR_BUF_MGR;
+ }
+
+ return OK;
+
+}
+
+
+void ih264d_unpack_coeff4x4_dc_4x4blk(tu_sblk4x4_coeff_data_t *ps_tu_4x4,
+ WORD16 *pi2_out_coeff_data,
+ UWORD8 *pu1_inv_scan)
+{
+ UWORD16 u2_sig_coeff_map = ps_tu_4x4->u2_sig_coeff_map;
+ WORD32 idx;
+ WORD16 *pi2_coeff_data = &ps_tu_4x4->ai2_level[0];
+
+ while(u2_sig_coeff_map)
+ {
+ idx = CLZ(u2_sig_coeff_map);
+
+ idx = 31 - idx;
+ RESET_BIT(u2_sig_coeff_map,idx);
+
+ idx = pu1_inv_scan[idx];
+ pi2_out_coeff_data[idx] = *pi2_coeff_data++;
+
+ }
+}
diff --git a/decoder/ih264d_utils.h b/decoder/ih264d_utils.h
new file mode 100755
index 0000000..a1a64d5
--- /dev/null
+++ b/decoder/ih264d_utils.h
@@ -0,0 +1,101 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef _IH264D_UTILS_H_
+#define _IH264D_UTILS_H_
+/*!
+**************************************************************************
+* \file ih264d_utils.h
+*
+* \brief
+* Contains declaration of routines
+* that handle of start and end of pic processing
+*
+* \date
+* 19/12/2002
+*
+* \author AI
+**************************************************************************
+*/
+#include "ih264d_defs.h"
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_structs.h"
+#include "ih264d_parse_cavlc.h"
+
+void pad_frm_buff_vert(dec_struct_t *ps_dec);
+
+UWORD8 ih264d_is_end_of_pic(UWORD16 u2_frame_num,
+ UWORD8 u1_nal_ref_idc,
+ pocstruct_t *ps_cur_poc,
+ pocstruct_t *ps_prev_poc,
+ dec_slice_params_t * ps_prev_slice,
+ UWORD8 u1_pic_order_cnt_type,
+ UWORD8 u1_nal_unit_type,
+ UWORD32 u4_idr_pic_id,
+ UWORD8 u1_field_pic_flag,
+ UWORD8 u1_bottom_field_flag);
+
+WORD32 ih264d_end_of_pic_processing(dec_struct_t * ps_dec);
+
+WORD32 ih264d_init_pic(dec_struct_t *ps_dec,
+ UWORD16 u2_frame_num,
+ WORD32 i4_poc,
+ dec_pic_params_t * ps_pps);
+
+WORD32 ih264d_end_of_pic_processing(dec_struct_t * ps_dec);
+WORD32 ih264d_decode_pic_order_cnt(UWORD8 u1_is_idr_slice,
+ UWORD32 u2_frame_num,
+ pocstruct_t *ps_prev_poc,
+ pocstruct_t *ps_cur_poc,
+ dec_slice_params_t *ps_cur_slice,
+ dec_pic_params_t * ps_pps,
+ UWORD8 u1_nal_ref_idc,
+ UWORD8 u1_bottom_field_flag,
+ UWORD8 u1_field_pic_flag,
+ WORD32 *pi4_poc);
+void ih264d_release_display_bufs(dec_struct_t *ps_dec);
+WORD32 ih264d_assign_display_seq(dec_struct_t *ps_dec);
+void ih264d_assign_pic_num(dec_struct_t *ps_dec);
+
+void ih264d_unpack_coeff4x4_dc_4x4blk(tu_sblk4x4_coeff_data_t *ps_tu_4x4,
+ WORD16 *pi2_out_coeff_data,
+ UWORD8 *pu1_inv_scan);
+
+WORD32 ih264d_update_qp(dec_struct_t * ps_dec, const WORD8 i1_qp);
+WORD32 ih264d_decode_gaps_in_frame_num(dec_struct_t *ps_dec,
+ UWORD16 u2_frame_num);
+
+WORD32 ih264d_get_next_display_field(dec_struct_t * ps_dec,
+ ivd_out_bufdesc_t *ps_out_buffer,
+ ivd_get_display_frame_op_t *pv_disp_op);
+
+void ih264d_release_display_field(dec_struct_t *ps_dec,
+ ivd_get_display_frame_op_t *pv_disp_op);
+void ih264d_close_video_decoder(iv_obj_t *iv_obj_t);
+WORD32 ih264d_get_dpb_size_new(UWORD32 u4_level_idc,
+ UWORD32 width,
+ UWORD32 height);
+WORD32 ih264d_get_next_nal_unit(UWORD8 *pu1_buf,
+ UWORD32 u4_cur_pos,
+ UWORD32 u4_max_ofst,
+ UWORD32 *pu4_length_of_start_code);
+
+#endif /* _IH264D_UTILS_H_ */
diff --git a/decoder/ih264d_vui.c b/decoder/ih264d_vui.c
new file mode 100755
index 0000000..87276bd
--- /dev/null
+++ b/decoder/ih264d_vui.c
@@ -0,0 +1,233 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : ih264d_vui.c */
+/* */
+/* Description : This file contains routines to parse VUI NAL's */
+/* */
+/* List of Functions : <List the functions defined in this file> */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 25 05 2005 NS Draft */
+/* */
+/*****************************************************************************/
+
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_vui.h"
+#include "ih264d_bitstrm.h"
+#include "ih264d_parse_cavlc.h"
+#include "ih264d_structs.h"
+#include "ih264d_error_handler.h"
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_hrd_parametres */
+/* */
+/* Description : This function parses hrd_t parametres */
+/* Inputs : ps_hrd pointer to HRD params */
+/* ps_bitstrm Bitstream */
+/* Globals : None */
+/* Processing : Parses HRD params */
+/* Outputs : None */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 05 2002 NS Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 ih264d_parse_hrd_parametres(hrd_t *ps_hrd,
+ dec_bit_stream_t *ps_bitstrm)
+{
+ UWORD8 u1_index;
+ UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+
+ ps_hrd->u4_cpb_cnt = 1
+ + ih264d_uev(pu4_bitstrm_ofst, pu4_bitstrm_buf);
+ if(ps_hrd->u4_cpb_cnt > 31)
+ return ERROR_INV_SPS_PPS_T;
+ ps_hrd->u1_bit_rate_scale = ih264d_get_bits_h264(ps_bitstrm, 4);
+ ps_hrd->u1_cpb_size_scale = ih264d_get_bits_h264(ps_bitstrm, 4);
+
+ for(u1_index = 0; u1_index < (UWORD8)ps_hrd->u4_cpb_cnt; u1_index++)
+ {
+ ps_hrd->u4_bit_rate[u1_index] = 1
+ + ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ ps_hrd->u4_cpb_size[u1_index] = 1
+ + ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ ps_hrd->u1_cbr_flag[u1_index] = ih264d_get_bits_h264(ps_bitstrm, 1);
+ }
+
+ ps_hrd->u1_initial_cpb_removal_delay = 1
+ + ih264d_get_bits_h264(ps_bitstrm, 5);
+ ps_hrd->u1_cpb_removal_delay_length = 1
+ + ih264d_get_bits_h264(ps_bitstrm, 5);
+ ps_hrd->u1_dpb_output_delay_length = 1
+ + ih264d_get_bits_h264(ps_bitstrm, 5);
+ ps_hrd->u1_time_offset_length = ih264d_get_bits_h264(ps_bitstrm, 5);
+
+ return OK;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : ih264d_parse_vui_parametres */
+/* */
+/* Description : This function parses VUI NALs. */
+/* Inputs : ps_vu4 pointer to VUI params */
+/* ps_bitstrm Bitstream */
+/* Globals : None */
+/* Processing : Parses VUI NAL's units and stores the info */
+/* Outputs : None */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 05 2002 NS Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 ih264d_parse_vui_parametres(vui_t *ps_vu4,
+ dec_bit_stream_t *ps_bitstrm)
+{
+ UWORD8 u4_bits;
+ UWORD32 *pu4_bitstrm_ofst = &ps_bitstrm->u4_ofst;
+ UWORD32 *pu4_bitstrm_buf = ps_bitstrm->pu4_buffer;
+ WORD32 ret;
+
+ u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1);
+ if(u4_bits)
+ {
+ u4_bits = ih264d_get_bits_h264(ps_bitstrm, 8);
+ ps_vu4->u1_aspect_ratio_idc = (UWORD8)u4_bits;
+ if(VUI_EXTENDED_SAR == u4_bits)
+ {
+ ps_vu4->u2_sar_width = ih264d_get_bits_h264(ps_bitstrm, 16);
+ ps_vu4->u2_sar_height = ih264d_get_bits_h264(ps_bitstrm, 16);
+ }
+ }
+
+ u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1);
+ if(u4_bits)
+ {
+ ps_vu4->u1_overscan_appropriate_flag = ih264d_get_bits_h264(
+ ps_bitstrm, 1);
+ }
+ u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1);
+ if(u4_bits)
+ {
+ ps_vu4->u1_video_format = ih264d_get_bits_h264(ps_bitstrm, 3);
+ ps_vu4->u1_video_full_range_flag = ih264d_get_bits_h264(ps_bitstrm,
+ 1);
+ u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1);
+ if(u4_bits)
+ {
+ ps_vu4->u1_colour_primaries = ih264d_get_bits_h264(ps_bitstrm,
+ 8);
+ ps_vu4->u1_tfr_chars = ih264d_get_bits_h264(ps_bitstrm, 8);
+ ps_vu4->u1_matrix_coeffs = ih264d_get_bits_h264(ps_bitstrm, 8);
+ }
+ }
+
+ u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1);
+ if(u4_bits)
+ {
+ ps_vu4->u1_cr_top_field = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ ps_vu4->u1_cr_bottom_field = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ }
+
+ u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1);
+ if(u4_bits)
+ {
+ ps_vu4->u4_num_units_in_tick = ih264d_get_bits_h264(ps_bitstrm, 32);
+ ps_vu4->u4_time_scale = ih264d_get_bits_h264(ps_bitstrm, 32);
+ ps_vu4->u1_fixed_frame_rate_flag = ih264d_get_bits_h264(ps_bitstrm,
+ 1);
+ }
+
+ u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1);
+ ps_vu4->u1_nal_hrd_params_present = u4_bits;
+ if(u4_bits)
+ {
+ ret = ih264d_parse_hrd_parametres(&ps_vu4->s_nal_hrd, ps_bitstrm);
+ if(ret != OK)
+ return ret;
+ }
+ u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1);
+ ps_vu4->u1_vcl_hrd_params_present = u4_bits;
+ if(u4_bits)
+ {
+ ret = ih264d_parse_hrd_parametres(&ps_vu4->s_vcl_hrd, ps_bitstrm);
+ if(ret != OK)
+ return ret;
+ }
+
+ if(ps_vu4->u1_nal_hrd_params_present || u4_bits)
+ {
+ ps_vu4->u1_low_delay_hrd_flag = ih264d_get_bits_h264(ps_bitstrm, 1);
+ }
+ ps_vu4->u1_pic_struct_present_flag = ih264d_get_bits_h264(ps_bitstrm, 1);
+
+ u4_bits = ih264d_get_bits_h264(ps_bitstrm, 1);
+ if(u4_bits)
+ {
+ ps_vu4->u1_mv_over_pic_boundaries_flag = ih264d_get_bits_h264(
+ ps_bitstrm, 1);
+ ps_vu4->u4_max_bytes_per_pic_denom = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ ps_vu4->u4_max_bits_per_mb_denom = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ ps_vu4->u4_log2_max_mv_length_horz = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ ps_vu4->u4_log2_max_mv_length_vert = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ ps_vu4->u4_num_reorder_frames = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ ps_vu4->u4_max_dec_frame_buffering = ih264d_uev(pu4_bitstrm_ofst,
+ pu4_bitstrm_buf);
+ }
+ else
+ {
+ /* Setting this to a large value if not present */
+ ps_vu4->u4_num_reorder_frames = 64;
+ }
+
+ return OK;
+}
diff --git a/decoder/ih264d_vui.h b/decoder/ih264d_vui.h
new file mode 100755
index 0000000..e380a5b
--- /dev/null
+++ b/decoder/ih264d_vui.h
@@ -0,0 +1,96 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : ih264d_vui.h */
+/* */
+/* Description : This file contains routines to parse SEI NAL's */
+/* */
+/* List of Functions : <List the functions defined in this file> */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 25 05 2005 NS Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef _IH264D_VUI_H_
+#define _IH264D_VUI_H_
+
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264d_bitstrm.h"
+
+#define VUI_EXTENDED_SAR 255
+
+typedef struct
+{
+ UWORD32 u4_cpb_cnt;
+ UWORD8 u1_bit_rate_scale;
+ UWORD8 u1_cpb_size_scale;
+ UWORD32 u4_bit_rate[32];
+ UWORD32 u4_cpb_size[32];
+ UWORD8 u1_cbr_flag[32];
+ UWORD8 u1_initial_cpb_removal_delay;
+ UWORD8 u1_cpb_removal_delay_length;
+ UWORD8 u1_dpb_output_delay_length;
+ UWORD8 u1_time_offset_length;
+} hrd_t;
+
+typedef struct
+{
+ UWORD8 u1_aspect_ratio_idc;
+ UWORD16 u2_sar_width;
+ UWORD16 u2_sar_height;
+ UWORD8 u1_overscan_appropriate_flag;
+ UWORD8 u1_video_format;
+ UWORD8 u1_video_full_range_flag;
+ UWORD8 u1_colour_primaries;
+ UWORD8 u1_tfr_chars;
+ UWORD8 u1_matrix_coeffs;
+ UWORD8 u1_cr_top_field;
+ UWORD8 u1_cr_bottom_field;
+ UWORD32 u4_num_units_in_tick;
+ UWORD32 u4_time_scale;
+ UWORD8 u1_fixed_frame_rate_flag;
+ UWORD8 u1_nal_hrd_params_present;
+ hrd_t s_nal_hrd;
+ UWORD8 u1_vcl_hrd_params_present;
+ hrd_t s_vcl_hrd;
+ UWORD8 u1_low_delay_hrd_flag;
+ UWORD8 u1_pic_struct_present_flag;
+ UWORD8 u1_mv_over_pic_boundaries_flag;
+ UWORD32 u4_max_bytes_per_pic_denom;
+ UWORD32 u4_max_bits_per_mb_denom;
+ UWORD32 u4_log2_max_mv_length_horz;
+ UWORD32 u4_log2_max_mv_length_vert;
+ UWORD32 u4_num_reorder_frames;
+ UWORD32 u4_max_dec_frame_buffering;
+} vui_t;
+
+WORD32 ih264d_parse_vui_parametres(vui_t *ps_vu4,
+ dec_bit_stream_t *ps_bitstrm);
+#endif /* _SEI_H_ */
+
diff --git a/decoder/iv.h b/decoder/iv.h
new file mode 100755
index 0000000..3a2ebf5
--- /dev/null
+++ b/decoder/iv.h
@@ -0,0 +1,420 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* iv.h
+*
+* @brief
+* This file contains all the necessary structure and enumeration
+* definitions needed for the Application Program Interface(API) of the
+* Ittiam Video and Image codecs
+*
+* @author
+* 100239(RCY)
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+#ifndef _IV_H
+#define _IV_H
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+
+/* IV_API_CALL_STATUS_T:This is only to return the FAIL/PASS status to the */
+/* application for the current API call */
+
+typedef enum {
+ IV_STATUS_NA = 0x7FFFFFFF,
+ IV_SUCCESS = 0x0,
+ IV_FAIL = 0x1,
+}IV_API_CALL_STATUS_T;
+
+/* IV_MEM_TYPE_T: This Enumeration defines the type of memory (Internal/Ext */
+/* -ernal) along with the cacheable/non-cacheable attributes */
+
+typedef enum {
+ IV_NA_MEM_TYPE = 0x7FFFFFFF,
+ IV_INTERNAL_CACHEABLE_PERSISTENT_MEM = 0x1,
+ IV_INTERNAL_CACHEABLE_SCRATCH_MEM = 0x2,
+ IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM = 0x3,
+ IV_EXTERNAL_CACHEABLE_SCRATCH_MEM = 0x4,
+ IV_INTERNAL_NONCACHEABLE_PERSISTENT_MEM = 0x5,
+ IV_INTERNAL_NONCACHEABLE_SCRATCH_MEM = 0x6,
+ IV_EXTERNAL_NONCACHEABLE_PERSISTENT_MEM = 0x7,
+ IV_EXTERNAL_NONCACHEABLE_SCRATCH_MEM = 0x8
+}IV_MEM_TYPE_T;
+
+/* IV_COLOR_FORMAT_T: This enumeration lists all the color formats which */
+/* finds usage in video/image codecs */
+
+typedef enum {
+ IV_CHROMA_NA = 0x7FFFFFFF,
+ IV_YUV_420P = 0x1,
+ IV_YUV_422P = 0x2,
+ IV_420_UV_INTL = 0x3,
+ IV_YUV_422IBE = 0x4,
+ IV_YUV_422ILE = 0x5,
+ IV_YUV_444P = 0x6,
+ IV_YUV_411P = 0x7,
+ IV_GRAY = 0x8,
+ IV_RGB_565 = 0x9,
+ IV_RGB_24 = 0xa,
+ IV_YUV_420SP_UV = 0xb,
+ IV_YUV_420SP_VU = 0xc,
+ IV_RGBA_8888 = 0xd
+}IV_COLOR_FORMAT_T;
+
+/* IV_PICTURE_CODING_TYPE_T: VOP/Frame coding type Enumeration */
+
+typedef enum {
+ IV_NA_FRAME = 0x7FFFFFFF,
+ IV_I_FRAME = 0x0,
+ IV_P_FRAME = 0x1,
+ IV_B_FRAME = 0x2,
+ IV_IDR_FRAME = 0x3,
+ IV_II_FRAME = 0x4,
+ IV_IP_FRAME = 0x5,
+ IV_IB_FRAME = 0x6,
+ IV_PI_FRAME = 0x7,
+ IV_PP_FRAME = 0x8,
+ IV_PB_FRAME = 0x9,
+ IV_BI_FRAME = 0xa,
+ IV_BP_FRAME = 0xb,
+ IV_BB_FRAME = 0xc,
+ IV_MBAFF_I_FRAME = 0xd,
+ IV_MBAFF_P_FRAME = 0xe,
+ IV_MBAFF_B_FRAME = 0xf,
+ IV_MBAFF_IDR_FRAME = 0x10,
+ IV_NOT_CODED_FRAME = 0x11,
+ IV_FRAMETYPE_DEFAULT = IV_I_FRAME
+}IV_PICTURE_CODING_TYPE_T;
+
+/* IV_FLD_TYPE_T: field type Enumeration */
+
+typedef enum {
+ IV_NA_FLD = 0x7FFFFFFF,
+ IV_TOP_FLD = 0x0,
+ IV_BOT_FLD = 0x1,
+ IV_FLD_TYPE_DEFAULT = IV_TOP_FLD
+}IV_FLD_TYPE_T;
+
+/* IV_CONTENT_TYPE_T: Video content type */
+
+typedef enum {
+ IV_CONTENTTYPE_NA = 0x7FFFFFFF,
+ IV_PROGRESSIVE = 0x0,
+ IV_INTERLACED = 0x1,
+ IV_PROGRESSIVE_FRAME = 0x2,
+ IV_INTERLACED_FRAME = 0x3,
+ IV_INTERLACED_TOPFIELD = 0x4,
+ IV_INTERLACED_BOTTOMFIELD = 0x5,
+ IV_CONTENTTYPE_DEFAULT = IV_PROGRESSIVE,
+}IV_CONTENT_TYPE_T;
+
+/* IV_API_COMMAND_TYPE_T:API command type */
+typedef enum {
+ IV_CMD_NA = 0x7FFFFFFF,
+ IV_CMD_GET_NUM_MEM_REC = 0x0,
+ IV_CMD_FILL_NUM_MEM_REC = 0x1,
+ IV_CMD_RETRIEVE_MEMREC = 0x2,
+ IV_CMD_INIT = 0x3,
+ IV_CMD_DUMMY_ELEMENT = 0x4,
+}IV_API_COMMAND_TYPE_T;
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/* IV_OBJ_T: This structure defines the handle for the codec instance */
+
+typedef struct {
+ /**
+ * u4_size of the structure
+ */
+ UWORD32 u4_size;
+
+ /**
+ * Pointer to the API function pointer table of the codec
+ */
+ void *pv_fxns;
+
+ /**
+ * Pointer to the handle of the codec
+ */
+ void *pv_codec_handle;
+}iv_obj_t;
+
+/* iv_mem_rec_t: This structure defines the memory record holder which will */
+/* be used by the codec to communicate its memory requirements to the */
+/* application through appropriate API functions */
+
+typedef struct {
+ /**
+ * u4_size of the structure
+ */
+ UWORD32 u4_size;
+
+ /**
+ * Pointer to the memory allocated by the application
+ */
+ void *pv_base;
+
+ /**
+ * u4_size of the memory to be allocated
+ */
+ UWORD32 u4_mem_size;
+
+ /**
+ * Alignment of the memory pointer
+ */
+ UWORD32 u4_mem_alignment;
+ /**
+ * Nature of the memory to be allocated
+ */
+ IV_MEM_TYPE_T e_mem_type;
+}iv_mem_rec_t;
+
+/* IV_YUV_BUF_T: This structure defines attributes for the yuv buffer */
+
+typedef struct {
+ /**
+ * u4_size of the structure
+ */
+ UWORD32 u4_size;
+
+ /**
+ * Pointer to Luma (Y) Buffer
+ */
+
+ void *pv_y_buf;
+ /**
+ * Pointer to Chroma (Cb) Buffer
+ */
+ void *pv_u_buf;
+
+ /**
+ * Pointer to Chroma (Cr) Buffer
+ */
+ void *pv_v_buf;
+
+ /**
+ * Width of the Luma (Y) Buffer
+ */
+ UWORD32 u4_y_wd;
+
+ /**
+ * Height of the Luma (Y) Buffer
+ */
+ UWORD32 u4_y_ht;
+
+ /**
+ * Stride/Pitch of the Luma (Y) Buffer
+ */
+ UWORD32 u4_y_strd;
+
+ /**
+ * Width of the Chroma (Cb) Buffer
+ */
+ UWORD32 u4_u_wd;
+
+ /**
+ * Height of the Chroma (Cb) Buffer
+ */
+ UWORD32 u4_u_ht;
+
+ /**
+ * Stride/Pitch of the Chroma (Cb) Buffer
+ */
+ UWORD32 u4_u_strd;
+
+ /**
+ * Width of the Chroma (Cr) Buffer
+ */
+ UWORD32 u4_v_wd;
+
+ /**
+ * Height of the Chroma (Cr) Buffer
+ */
+ UWORD32 u4_v_ht;
+
+ /**
+ * Stride/Pitch of the Chroma (Cr) Buffer
+ */
+ UWORD32 u4_v_strd;
+}iv_yuv_buf_t;
+
+/*****************************************************************************/
+/* Get Number of Memory Records */
+/*****************************************************************************/
+
+/* IV_API_COMMAND_TYPE_T::e_cmd = IV_CMD_GET_NUM_MEM_REC */
+
+
+typedef struct {
+ /**
+ * u4_size of the structure
+ */
+ UWORD32 u4_size;
+
+ /**
+ * cmd
+ */
+ IV_API_COMMAND_TYPE_T e_cmd;
+}iv_num_mem_rec_ip_t;
+
+
+typedef struct {
+ /**
+ * u4_size of the structure
+ */
+ UWORD32 u4_size;
+
+ /**
+ * error code
+ */
+ UWORD32 u4_error_code;
+
+ /**
+ * num_mem_rec
+ */
+ UWORD32 u4_num_mem_rec;
+}iv_num_mem_rec_op_t;
+
+
+/*****************************************************************************/
+/* Fill Memory Records */
+/*****************************************************************************/
+
+/* IV_API_COMMAND_TYPE_T::e_cmd = IV_CMD_FILL_NUM_MEM_REC */
+
+
+typedef struct {
+ /**
+ * u4_size of the structure
+ */
+ UWORD32 u4_size;
+
+ /**
+ * cmd
+ */
+ IV_API_COMMAND_TYPE_T e_cmd;
+
+ /**
+ * pointer to array of memrecords structures should be filled by codec
+ with details of memory resource requirements
+ */
+ iv_mem_rec_t *pv_mem_rec_location;
+
+ /**
+ * maximum width for which codec should request memory requirements
+ */
+ UWORD32 u4_max_frm_wd;
+
+ /**
+ * maximum height for which codec should request memory requirements
+ */
+ UWORD32 u4_max_frm_ht;
+}iv_fill_mem_rec_ip_t;
+
+
+typedef struct {
+ /**
+ * u4_size of the structure
+ */
+ UWORD32 u4_size;
+
+ /**
+ * error_code
+ */
+ UWORD32 u4_error_code;
+
+ /**
+ * no of memory record structures which are filled by codec
+ */
+ UWORD32 u4_num_mem_rec_filled;
+}iv_fill_mem_rec_op_t;
+
+
+/*****************************************************************************/
+/* Retrieve Memory Records */
+/*****************************************************************************/
+
+/* IV_API_COMMAND_TYPE_T::e_cmd = IV_CMD_RETRIEVE_MEMREC */
+
+
+
+typedef struct {
+ /**
+ * u4_size of the structure
+ */
+ UWORD32 u4_size;
+
+ /**
+ * cmd
+ */
+ IV_API_COMMAND_TYPE_T e_cmd;
+
+ /**
+ * array of structures where codec should fill with all resources(memory) with it
+ */
+ iv_mem_rec_t *pv_mem_rec_location;
+}iv_retrieve_mem_rec_ip_t;
+
+
+typedef struct {
+ /**
+ * u4_size of the structure
+ */
+ UWORD32 u4_size;
+
+ /**
+ * error_code
+ */
+ UWORD32 u4_error_code;
+
+ /**
+ * no of memory records filled by codec
+ */
+ UWORD32 u4_num_mem_rec_filled;
+}iv_retrieve_mem_rec_op_t;
+
+
+
+#endif /* _IV_H */
+
diff --git a/decoder/ivd.h b/decoder/ivd.h
new file mode 100755
index 0000000..955b81f
--- /dev/null
+++ b/decoder/ivd.h
@@ -0,0 +1,585 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* */
+/* File Name : ivd.h */
+/* */
+/* Description : This file contains all the necessary structure and */
+/* enumeration definitions needed for the Application */
+/* Program Interface(API) of the Ittiam Video Decoders */
+/* */
+/* List of Functions : None */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 26 08 2010 100239(RCY) Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef _IVD_H
+#define _IVD_H
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define IVD_VIDDEC_MAX_IO_BUFFERS 64
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/* IVD_ARCH_T: Architecture Enumeration */
+typedef enum
+{
+ ARCH_NA = 0x7FFFFFFF,
+ ARCH_ARM_NONEON = 0x0,
+ ARCH_ARM_A9Q,
+ ARCH_ARM_A9A,
+ ARCH_ARM_A9,
+ ARCH_ARM_A7,
+ ARCH_ARM_A5,
+ ARCH_ARM_A15,
+ ARCH_ARM_NEONINTR,
+ ARCH_ARMV8_GENERIC,
+ ARCH_X86_GENERIC = 0x100,
+ ARCH_X86_SSSE3,
+ ARCH_X86_SSE42,
+ ARCH_X86_AVX2,
+ ARCH_MIPS_GENERIC = 0x200,
+ ARCH_MIPS_32
+}IVD_ARCH_T;
+
+/* IVD_SOC_T: SOC Enumeration */
+typedef enum
+{
+ SOC_NA = 0x7FFFFFFF,
+ SOC_GENERIC = 0x0,
+ SOC_HISI_37X = 0x100,
+}IVD_SOC_T;
+
+/* IVD_FRAME_SKIP_MODE_T:Skip mode Enumeration */
+
+typedef enum {
+ IVD_SKIP_NONE = 0x7FFFFFFF,
+ IVD_SKIP_P = 0x1,
+ IVD_SKIP_B = 0x2,
+ IVD_SKIP_I = 0x3,
+ IVD_SKIP_IP = 0x4,
+ IVD_SKIP_IB = 0x5,
+ IVD_SKIP_PB = 0x6,
+ IVD_SKIP_IPB = 0x7,
+ IVD_SKIP_IDR = 0x8,
+ IVD_SKIP_DEFAULT = IVD_SKIP_NONE,
+}IVD_FRAME_SKIP_MODE_T;
+
+/* IVD_VIDEO_DECODE_MODE_T: Set decoder to decode either frame worth of data */
+/* or only header worth of data */
+
+typedef enum {
+ IVD_DECODE_MODE_NA = 0x7FFFFFFF,
+
+ /* This enables the codec to process all decodable units */
+ IVD_DECODE_FRAME = 0x0,
+
+ /* This enables the codec to decode header only */
+ IVD_DECODE_HEADER = 0x1,
+
+
+
+}IVD_VIDEO_DECODE_MODE_T;
+
+
+/* IVD_DISPLAY_FRAME_OUT_MODE_T: Video Display Frame Output Mode */
+
+typedef enum {
+
+ IVD_DISPLAY_ORDER_NA = 0x7FFFFFFF,
+ /* To set codec to fill output buffers in display order */
+ IVD_DISPLAY_FRAME_OUT = 0x0,
+
+ /* To set codec to fill output buffers in decode order */
+ IVD_DECODE_FRAME_OUT = 0x1,
+}IVD_DISPLAY_FRAME_OUT_MODE_T;
+
+
+/* IVD_API_COMMAND_TYPE_T:API command type */
+typedef enum {
+ IVD_CMD_VIDEO_NA = 0x7FFFFFFF,
+ IVD_CMD_VIDEO_CTL = IV_CMD_DUMMY_ELEMENT + 1,
+ IVD_CMD_VIDEO_DECODE,
+ IVD_CMD_GET_DISPLAY_FRAME,
+ IVD_CMD_REL_DISPLAY_FRAME,
+ IVD_CMD_SET_DISPLAY_FRAME
+}IVD_API_COMMAND_TYPE_T;
+
+/* IVD_CONTROL_API_COMMAND_TYPE_T: Video Control API command type */
+
+typedef enum {
+ IVD_CMD_NA = 0x7FFFFFFF,
+ IVD_CMD_CTL_GETPARAMS = 0x0,
+ IVD_CMD_CTL_SETPARAMS = 0x1,
+ IVD_CMD_CTL_RESET = 0x2,
+ IVD_CMD_CTL_SETDEFAULT = 0x3,
+ IVD_CMD_CTL_FLUSH = 0x4,
+ IVD_CMD_CTL_GETBUFINFO = 0x5,
+ IVD_CMD_CTL_GETVERSION = 0x6,
+ IVD_CMD_CTL_CODEC_SUBCMD_START = 0x7
+}IVD_CONTROL_API_COMMAND_TYPE_T;
+
+
+/* IVD_ERROR_BITS_T: A UWORD32 container will be used for reporting the error*/
+/* code to the application. The first 8 bits starting from LSB have been */
+/* reserved for the codec to report internal error details. The rest of the */
+/* bits will be generic for all video decoders and each bit has an associated*/
+/* meaning as mentioned below. The unused bit fields are reserved for future */
+/* extenstions and will be zero in the current implementation */
+
+typedef enum {
+ /* Bit 8 - Applied concealment. */
+ IVD_APPLIEDCONCEALMENT = 0x8,
+ /* Bit 9 - Insufficient input data. */
+ IVD_INSUFFICIENTDATA = 0x9,
+ /* Bit 10 - Data problem/corruption. */
+ IVD_CORRUPTEDDATA = 0xa,
+ /* Bit 11 - Header problem/corruption. */
+ IVD_CORRUPTEDHEADER = 0xb,
+ /* Bit 12 - Unsupported feature/parameter in input. */
+ IVD_UNSUPPORTEDINPUT = 0xc,
+ /* Bit 13 - Unsupported input parameter orconfiguration. */
+ IVD_UNSUPPORTEDPARAM = 0xd,
+ /* Bit 14 - Fatal error (stop the codec).If there is an */
+ /* error and this bit is not set, the error is a recoverable one. */
+ IVD_FATALERROR = 0xe,
+ /* Bit 15 - Invalid bitstream. Applies when Bitstream/YUV frame */
+ /* buffer for encode/decode call is made with non-valid or zero u4_size */
+ /* data */
+ IVD_INVALID_BITSTREAM = 0xf,
+ /* Bit 16 */
+ IVD_INCOMPLETE_BITSTREAM = 0x10,
+ IVD_ERROR_BITS_T_DUMMY_ELEMENT = 0x7FFFFFFF
+}IVD_ERROR_BITS_T;
+
+
+/* IVD_CONTROL_API_COMMAND_TYPE_T: Video Control API command type */
+typedef enum {
+ IVD_ERROR_NONE = 0x0,
+ IVD_NUM_MEM_REC_FAILED = 0x1,
+ IVD_NUM_REC_NOT_SUFFICIENT = 0x2,
+ IVD_FILL_MEM_REC_FAILED = 0x3,
+ IVD_REQUESTED_WIDTH_NOT_SUPPPORTED = 0x4,
+ IVD_REQUESTED_HEIGHT_NOT_SUPPPORTED = 0x5,
+ IVD_INIT_DEC_FAILED = 0x6,
+ IVD_INIT_DEC_NOT_SUFFICIENT = 0x7,
+ IVD_INIT_DEC_WIDTH_NOT_SUPPPORTED = 0x8,
+ IVD_INIT_DEC_HEIGHT_NOT_SUPPPORTED = 0x9,
+ IVD_INIT_DEC_MEM_NOT_ALIGNED = 0xa,
+ IVD_INIT_DEC_COL_FMT_NOT_SUPPORTED = 0xb,
+ IVD_INIT_DEC_MEM_REC_NOT_SUFFICIENT = 0xc,
+ IVD_GET_VERSION_DATABUFFER_SZ_INSUFFICIENT = 0xd,
+ IVD_BUFFER_SIZE_SET_TO_ZERO = 0xe,
+ IVD_UNEXPECTED_END_OF_STREAM = 0xf,
+ IVD_SEQUENCE_HEADER_NOT_DECODED = 0x10,
+ IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED = 0x11,
+ IVD_MAX_FRAME_LIMIT_REACHED = 0x12,
+ IVD_IP_API_STRUCT_SIZE_INCORRECT = 0x13,
+ IVD_OP_API_STRUCT_SIZE_INCORRECT = 0x14,
+ IVD_HANDLE_NULL = 0x15,
+ IVD_HANDLE_STRUCT_SIZE_INCORRECT = 0x16,
+ IVD_INVALID_HANDLE_NULL = 0x17,
+ IVD_INVALID_API_CMD = 0x18,
+ IVD_UNSUPPORTED_API_CMD = 0x19,
+ IVD_MEM_REC_STRUCT_SIZE_INCORRECT = 0x1a,
+ IVD_DISP_FRM_ZERO_OP_BUFS = 0x1b,
+ IVD_DISP_FRM_OP_BUF_NULL = 0x1c,
+ IVD_DISP_FRM_ZERO_OP_BUF_SIZE = 0x1d,
+ IVD_DEC_FRM_BS_BUF_NULL = 0x1e,
+ IVD_SET_CONFG_INVALID_DEC_MODE = 0x1f,
+ IVD_SET_CONFG_UNSUPPORTED_DISP_WIDTH = 0x20,
+ IVD_RESET_FAILED = 0x21,
+ IVD_INIT_DEC_MEM_REC_OVERLAP_ERR = 0x22,
+ IVD_INIT_DEC_MEM_REC_BASE_NULL = 0x23,
+ IVD_INIT_DEC_MEM_REC_ALIGNMENT_ERR = 0x24,
+ IVD_INIT_DEC_MEM_REC_INSUFFICIENT_SIZE = 0x25,
+ IVD_INIT_DEC_MEM_REC_INCORRECT_TYPE = 0x26,
+ IVD_DEC_NUMBYTES_INV = 0x27,
+ IVD_DEC_REF_BUF_NULL = 0x28,
+ IVD_DEC_FRM_SKIPPED = 0x29,
+ IVD_RES_CHANGED = 0x2a,
+ IVD_DUMMY_ELEMENT_FOR_CODEC_EXTENSIONS = 0x300,
+}IVD_ERROR_CODES_T;
+
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+/* structure for passing output buffers to codec during get display buffer */
+/* call */
+typedef struct {
+
+ /* number of output buffers */
+ UWORD32 u4_num_bufs;
+
+ /* list of pointers to output buffers */
+ UWORD8 *pu1_bufs[IVD_VIDDEC_MAX_IO_BUFFERS];
+
+ /* sizes of each output buffer */
+ UWORD32 u4_min_out_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS];
+
+}ivd_out_bufdesc_t;
+
+/*****************************************************************************/
+/* Initialize decoder */
+/*****************************************************************************/
+
+/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_INIT */
+
+
+typedef struct {
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+ IVD_API_COMMAND_TYPE_T e_cmd;
+ /* no memrecords which are allocated on request of codec through
+ fill mem records */
+ UWORD32 u4_num_mem_rec;
+ /* maximum height for which codec should be initialized */
+ UWORD32 u4_frm_max_wd;
+ /* maximum width for which codec should be initialized */
+ UWORD32 u4_frm_max_ht;
+ /* format in which codec has to give out frame data for display */
+ IV_COLOR_FORMAT_T e_output_format;
+ /* pointer to memrecord array, which contains allocated resources */
+ iv_mem_rec_t *pv_mem_rec_location;
+}ivd_init_ip_t;
+
+
+typedef struct{
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+ UWORD32 u4_error_code;
+}ivd_init_op_t;
+
+
+/*****************************************************************************/
+/* Video Decode */
+/*****************************************************************************/
+
+
+/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_DECODE */
+
+
+typedef struct {
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+ IVD_API_COMMAND_TYPE_T e_cmd;
+ UWORD32 u4_ts;
+ UWORD32 u4_num_Bytes;
+ void *pv_stream_buffer;
+
+ /* output buffer desc */
+ ivd_out_bufdesc_t s_out_buffer;
+
+}ivd_video_decode_ip_t;
+
+
+typedef struct{
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+ UWORD32 u4_error_code;
+ UWORD32 u4_num_bytes_consumed;
+ UWORD32 u4_pic_wd;
+ UWORD32 u4_pic_ht;
+ IV_PICTURE_CODING_TYPE_T e_pic_type;
+ UWORD32 u4_frame_decoded_flag;
+ UWORD32 u4_new_seq;
+
+ UWORD32 u4_output_present;
+ UWORD32 u4_progressive_frame_flag;
+ UWORD32 u4_is_ref_flag;
+ IV_COLOR_FORMAT_T e_output_format;
+ iv_yuv_buf_t s_disp_frm_buf;
+ IV_FLD_TYPE_T e4_fld_type;
+ UWORD32 u4_ts;
+ UWORD32 u4_disp_buf_id;
+}ivd_video_decode_op_t;
+
+
+/*****************************************************************************/
+/* Get Display Frame */
+/*****************************************************************************/
+
+
+/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_GET_DISPLAY_FRAME */
+
+typedef struct
+{
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+
+ IVD_API_COMMAND_TYPE_T e_cmd;
+
+ /* output buffer desc */
+ ivd_out_bufdesc_t s_out_buffer;
+
+}ivd_get_display_frame_ip_t;
+
+
+typedef struct
+{
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+ UWORD32 u4_error_code;
+ UWORD32 u4_progressive_frame_flag;
+ IV_PICTURE_CODING_TYPE_T e_pic_type;
+ UWORD32 u4_is_ref_flag;
+ IV_COLOR_FORMAT_T e_output_format;
+ iv_yuv_buf_t s_disp_frm_buf;
+ IV_FLD_TYPE_T e4_fld_type;
+ UWORD32 u4_ts;
+ UWORD32 u4_disp_buf_id;
+}ivd_get_display_frame_op_t;
+
+/*****************************************************************************/
+/* Set Display Frame */
+/*****************************************************************************/
+
+
+/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_SET_DISPLAY_FRAME */
+
+typedef struct
+{
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+
+ IVD_API_COMMAND_TYPE_T e_cmd;
+
+ UWORD32 num_disp_bufs;
+
+ /* output buffer desc */
+ ivd_out_bufdesc_t s_disp_buffer[IVD_VIDDEC_MAX_IO_BUFFERS];
+
+}ivd_set_display_frame_ip_t;
+
+
+typedef struct
+{
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+ UWORD32 u4_error_code;
+}ivd_set_display_frame_op_t;
+
+
+/*****************************************************************************/
+/* Release Display Frame */
+/*****************************************************************************/
+
+
+/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_SET_DISPLAY_FRAME */
+
+typedef struct
+{
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+ IVD_API_COMMAND_TYPE_T e_cmd;
+ UWORD32 u4_disp_buf_id;
+}ivd_rel_display_frame_ip_t;
+
+
+typedef struct
+{
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+ UWORD32 u4_error_code;
+}ivd_rel_display_frame_op_t;
+
+/*****************************************************************************/
+/* Video control Flush */
+/*****************************************************************************/
+/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_CTL */
+/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd = IVD_CMD_ctl_FLUSH */
+
+
+
+typedef struct{
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+ IVD_API_COMMAND_TYPE_T e_cmd;
+ IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+}ivd_ctl_flush_ip_t;
+
+
+typedef struct{
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+ UWORD32 u4_error_code;
+}ivd_ctl_flush_op_t;
+
+/*****************************************************************************/
+/* Video control reset */
+/*****************************************************************************/
+/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_CTL */
+/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd = IVD_CMD_ctl_RESET */
+
+
+typedef struct{
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+ IVD_API_COMMAND_TYPE_T e_cmd;
+ IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+}ivd_ctl_reset_ip_t;
+
+
+typedef struct{
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+ UWORD32 u4_error_code;
+}ivd_ctl_reset_op_t;
+
+
+/*****************************************************************************/
+/* Video control Set Params */
+/*****************************************************************************/
+/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_CTL */
+/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd=IVD_CMD_ctl_SETPARAMS */
+/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd=IVD_CMD_ctl_SETDEFAULT */
+
+
+
+typedef struct {
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+ IVD_API_COMMAND_TYPE_T e_cmd;
+ IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+ IVD_VIDEO_DECODE_MODE_T e_vid_dec_mode;
+ UWORD32 u4_disp_wd;
+ IVD_FRAME_SKIP_MODE_T e_frm_skip_mode;
+ IVD_DISPLAY_FRAME_OUT_MODE_T e_frm_out_mode;
+}ivd_ctl_set_config_ip_t;
+
+
+typedef struct{
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+ UWORD32 u4_error_code;
+}ivd_ctl_set_config_op_t;
+
+/*****************************************************************************/
+/* Video control:Get Buf Info */
+/*****************************************************************************/
+
+/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_CTL */
+/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd=IVD_CMD_ctl_GETBUFINFO */
+
+
+typedef struct{
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+ IVD_API_COMMAND_TYPE_T e_cmd;
+ IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+}ivd_ctl_getbufinfo_ip_t;
+
+
+typedef struct{
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+ UWORD32 u4_error_code;
+ /* no of display buffer sets required by codec */
+ UWORD32 u4_num_disp_bufs;
+ /* no of input buffers required for codec */
+ UWORD32 u4_min_num_in_bufs;
+ /* no of output buffers required for codec */
+ UWORD32 u4_min_num_out_bufs;
+ /* sizes of each input buffer required */
+ UWORD32 u4_min_in_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS];
+ /* sizes of each output buffer required */
+ UWORD32 u4_min_out_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS];
+}ivd_ctl_getbufinfo_op_t;
+
+
+/*****************************************************************************/
+/* Video control:Getstatus Call */
+/*****************************************************************************/
+
+
+/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_CTL */
+/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd=IVD_CMD_ctl_GETPARAMS */
+
+
+typedef struct{
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+ IVD_API_COMMAND_TYPE_T e_cmd;
+ IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+}ivd_ctl_getstatus_ip_t;
+
+
+typedef struct{
+ UWORD32 u4_size;
+ UWORD32 u4_error_code;
+ /* no of display buffer sets required by codec */
+ UWORD32 u4_num_disp_bufs;
+ UWORD32 u4_pic_ht;
+ UWORD32 u4_pic_wd;
+ UWORD32 u4_frame_rate;
+ UWORD32 u4_bit_rate;
+ IV_CONTENT_TYPE_T e_content_type;
+ IV_COLOR_FORMAT_T e_output_chroma_format;
+ /* no of input buffers required for codec */
+ UWORD32 u4_min_num_in_bufs;
+ /* no of output buffers required for codec */
+ UWORD32 u4_min_num_out_bufs;
+ /* sizes of each input buffer required */
+ UWORD32 u4_min_in_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS];
+ /* sizes of each output buffer required */
+ UWORD32 u4_min_out_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS];
+}ivd_ctl_getstatus_op_t;
+
+
+/*****************************************************************************/
+/* Video control:Get Version Info */
+/*****************************************************************************/
+
+/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_CTL */
+/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd=IVD_CMD_ctl_GETVERSION */
+
+
+typedef struct{
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+ IVD_API_COMMAND_TYPE_T e_cmd;
+ IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+ void *pv_version_buffer;
+ UWORD32 u4_version_buffer_size;
+}ivd_ctl_getversioninfo_ip_t;
+
+
+typedef struct{
+ /* u4_size of the structure */
+ UWORD32 u4_size;
+ UWORD32 u4_error_code;
+}ivd_ctl_getversioninfo_op_t;
+
+#endif /* __IVD_H__ */
+
diff --git a/decoder/mips/ih264d_function_selector.c b/decoder/mips/ih264d_function_selector.c
new file mode 100755
index 0000000..13680ed
--- /dev/null
+++ b/decoder/mips/ih264d_function_selector.c
@@ -0,0 +1,66 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* imp2d_function_selector.c
+*
+* @brief
+* Contains functions to initialize function pointers used in hevc
+*
+* @author
+* Naveen
+*
+* @par List of Functions:
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv.h"
+#include "ivd.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264_error.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+
+#include "ih264d_structs.h"
+#include "ih264d_function_selector.h"
+
+void ih264d_init_function_ptr(dec_struct_t *ps_codec)
+{
+ ih264d_init_function_ptr_generic(ps_codec);
+}
+void ih264d_init_arch(dec_struct_t *ps_codec)
+{
+ ps_codec->e_processor_arch = ARCH_NA;
+}
diff --git a/decoder/x86/ih264d_function_selector.c b/decoder/x86/ih264d_function_selector.c
new file mode 100755
index 0000000..9fc5c39
--- /dev/null
+++ b/decoder/x86/ih264d_function_selector.c
@@ -0,0 +1,94 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* imp2d_function_selector.c
+*
+* @brief
+* Contains functions to initialize function pointers used in hevc
+*
+* @author
+* Naveen
+*
+* @par List of Functions:
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv.h"
+#include "ivd.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264_error.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+
+#include "ih264d_structs.h"
+#include "ih264d_function_selector.h"
+
+void ih264d_init_function_ptr(dec_struct_t *ps_codec)
+{
+
+ ih264d_init_function_ptr_generic(ps_codec);
+ switch(ps_codec->e_processor_arch)
+ {
+ case ARCH_X86_GENERIC:
+ ih264d_init_function_ptr_generic(ps_codec);
+ break;
+ case ARCH_X86_SSSE3:
+ ih264d_init_function_ptr_ssse3(ps_codec);
+ break;
+ case ARCH_X86_SSE42:
+ default:
+ ih264d_init_function_ptr_ssse3(ps_codec);
+ ih264d_init_function_ptr_sse42(ps_codec);
+ break;
+ }
+}
+void ih264d_init_arch(dec_struct_t *ps_codec)
+{
+#ifdef DEFAULT_ARCH
+#if DEFAULT_ARCH == D_ARCH_X86_SSE42
+ ps_codec->e_processor_arch = ARCH_X86_SSE42;
+#elif DEFAULT_ARCH == D_ARCH_X86_SSSE3
+ ps_codec->e_processor_arch = ARCH_X86_SSSE3;
+#elif DEFAULT_ARCH == D_ARCH_X86_AVX2
+ ps_codec->e_processor_arch = D_ARCH_X86_AVX2;
+#else
+ ps_codec->e_processor_arch = ARCH_X86_GENERIC;
+#endif
+#else
+ ps_codec->e_processor_arch = ARCH_X86_SSE42;
+#endif
+
+}
diff --git a/decoder/x86/ih264d_function_selector_sse42.c b/decoder/x86/ih264d_function_selector_sse42.c
new file mode 100755
index 0000000..0c493d2
--- /dev/null
+++ b/decoder/x86/ih264d_function_selector_sse42.c
@@ -0,0 +1,95 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_function_selector_generic.c
+*
+* @brief
+* Contains functions to initialize function pointers of codec context
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ih264e_init_function_ptr_generic
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv.h"
+#include "ivd.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264_error.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+
+#include "ih264d_structs.h"
+
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264d_init_function_ptr_sse42(dec_struct_t *ps_codec)
+{
+ ps_codec->pf_default_weighted_pred_luma = ih264_default_weighted_pred_luma_sse42;
+ ps_codec->pf_default_weighted_pred_chroma = ih264_default_weighted_pred_chroma_sse42;
+ ps_codec->pf_weighted_pred_luma = ih264_weighted_pred_luma_sse42;
+ ps_codec->pf_weighted_pred_chroma = ih264_weighted_pred_chroma_sse42;
+ ps_codec->pf_weighted_bi_pred_luma = ih264_weighted_bi_pred_luma_sse42;
+ ps_codec->pf_weighted_bi_pred_chroma = ih264_weighted_bi_pred_chroma_sse42;
+
+ ps_codec->pf_iquant_itrans_recon_luma_4x4 = ih264_iquant_itrans_recon_4x4_sse42;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4_sse42;
+ ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_sse42;
+ return;
+}
diff --git a/decoder/x86/ih264d_function_selector_ssse3.c b/decoder/x86/ih264d_function_selector_ssse3.c
new file mode 100755
index 0000000..1786213
--- /dev/null
+++ b/decoder/x86/ih264d_function_selector_ssse3.c
@@ -0,0 +1,181 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_function_selector_generic.c
+*
+* @brief
+* Contains functions to initialize function pointers of codec context
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ih264e_init_function_ptr_generic
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv.h"
+#include "ivd.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264_error.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+
+#include "ih264d_structs.h"
+
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264d_init_function_ptr_ssse3(dec_struct_t *ps_codec)
+{
+
+
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 16x16 */
+ ps_codec->apf_intra_pred_luma_16x16[0] = ih264_intra_pred_luma_16x16_mode_vert_ssse3;
+ ps_codec->apf_intra_pred_luma_16x16[1] = ih264_intra_pred_luma_16x16_mode_horz_ssse3;
+ ps_codec->apf_intra_pred_luma_16x16[2] = ih264_intra_pred_luma_16x16_mode_dc_ssse3;
+ ps_codec->apf_intra_pred_luma_16x16[3] = ih264_intra_pred_luma_16x16_mode_plane_ssse3;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 4x4 */
+ ps_codec->apf_intra_pred_luma_4x4[0] = ih264_intra_pred_luma_4x4_mode_vert_ssse3;
+ ps_codec->apf_intra_pred_luma_4x4[1] = ih264_intra_pred_luma_4x4_mode_horz_ssse3;
+ ps_codec->apf_intra_pred_luma_4x4[2] = ih264_intra_pred_luma_4x4_mode_dc_ssse3;
+ ps_codec->apf_intra_pred_luma_4x4[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3;
+ ps_codec->apf_intra_pred_luma_4x4[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3;
+ ps_codec->apf_intra_pred_luma_4x4[5] = ih264_intra_pred_luma_4x4_mode_vert_r_ssse3;
+ ps_codec->apf_intra_pred_luma_4x4[6] = ih264_intra_pred_luma_4x4_mode_horz_d_ssse3;
+ ps_codec->apf_intra_pred_luma_4x4[7] = ih264_intra_pred_luma_4x4_mode_vert_l_ssse3;
+ ps_codec->apf_intra_pred_luma_4x4[8] = ih264_intra_pred_luma_4x4_mode_horz_u_ssse3;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_luma_8x8[0] = ih264_intra_pred_luma_8x8_mode_vert_ssse3;
+ ps_codec->apf_intra_pred_luma_8x8[1] = ih264_intra_pred_luma_8x8_mode_horz_ssse3;
+ ps_codec->apf_intra_pred_luma_8x8[2] = ih264_intra_pred_luma_8x8_mode_dc_ssse3;
+ ps_codec->apf_intra_pred_luma_8x8[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3;
+ ps_codec->apf_intra_pred_luma_8x8[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3;
+ ps_codec->apf_intra_pred_luma_8x8[5] = ih264_intra_pred_luma_8x8_mode_vert_r_ssse3;
+ ps_codec->apf_intra_pred_luma_8x8[6] = ih264_intra_pred_luma_8x8_mode_horz_d_ssse3;
+ ps_codec->apf_intra_pred_luma_8x8[7] = ih264_intra_pred_luma_8x8_mode_vert_l_ssse3;
+ ps_codec->apf_intra_pred_luma_8x8[8] = ih264_intra_pred_luma_8x8_mode_horz_u_ssse3;
+
+ ps_codec->pf_intra_pred_ref_filtering = ih264_intra_pred_luma_8x8_mode_ref_filtering;
+
+ /* Init function pointers for intra pred leaf level functions chroma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_chroma[0] = ih264_intra_pred_chroma_8x8_mode_vert_ssse3;
+ ps_codec->apf_intra_pred_chroma[1] = ih264_intra_pred_chroma_8x8_mode_horz_ssse3;
+ ps_codec->apf_intra_pred_chroma[2] = ih264_intra_pred_chroma_8x8_mode_dc;
+ ps_codec->apf_intra_pred_chroma[3] = ih264_intra_pred_chroma_8x8_mode_plane_ssse3;
+
+
+ ps_codec->pf_pad_left_luma = ih264_pad_left_luma_ssse3;
+ ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_ssse3;
+ ps_codec->pf_pad_right_luma = ih264_pad_right_luma_ssse3;
+ ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_ssse3;
+
+
+ ps_codec->pf_iquant_itrans_recon_luma_4x4 = ih264_iquant_itrans_recon_4x4_ssse3;
+ ps_codec->pf_iquant_itrans_recon_luma_4x4_dc = ih264_iquant_itrans_recon_4x4_dc_ssse3;
+ ps_codec->pf_iquant_itrans_recon_luma_8x8 = ih264_iquant_itrans_recon_8x8_ssse3;
+ ps_codec->pf_iquant_itrans_recon_luma_8x8_dc = ih264_iquant_itrans_recon_8x8_dc_ssse3;
+
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3;
+
+ /* Init fn ptr luma deblocking */
+ ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_ssse3;
+ ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_ssse3;
+ ps_codec->pf_deblk_luma_vert_bs4_mbaff = ih264_deblk_luma_vert_bs4_mbaff_ssse3;
+ ps_codec->pf_deblk_luma_vert_bslt4_mbaff = ih264_deblk_luma_vert_bslt4_mbaff_ssse3;
+
+ ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_ssse3;
+ ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_ssse3;
+
+ /* Init fn ptr chroma deblocking */
+ ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_ssse3;
+ ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_ssse3;
+ ps_codec->pf_deblk_chroma_vert_bs4_mbaff = ih264_deblk_chroma_vert_bs4_mbaff_ssse3;
+ ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_ssse3;
+ ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_ssse3;
+ ps_codec->pf_deblk_chroma_vert_bslt4_mbaff = ih264_deblk_chroma_vert_bslt4_mbaff_ssse3;
+
+ /* Inter pred leaf level functions */
+
+ ps_codec->apf_inter_pred_luma[0] = ih264_inter_pred_luma_copy_ssse3;
+ ps_codec->apf_inter_pred_luma[1] = ih264_inter_pred_luma_horz_qpel_ssse3;
+ ps_codec->apf_inter_pred_luma[2] = ih264_inter_pred_luma_horz_ssse3;
+ ps_codec->apf_inter_pred_luma[3] = ih264_inter_pred_luma_horz_qpel_ssse3;
+ ps_codec->apf_inter_pred_luma[4] = ih264_inter_pred_luma_vert_qpel_ssse3;
+ ps_codec->apf_inter_pred_luma[5] = ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3;
+ ps_codec->apf_inter_pred_luma[6] = ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3;
+ ps_codec->apf_inter_pred_luma[7] = ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3;
+ ps_codec->apf_inter_pred_luma[8] = ih264_inter_pred_luma_vert_ssse3;
+ ps_codec->apf_inter_pred_luma[9] = ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3;
+ ps_codec->apf_inter_pred_luma[10] = ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3;
+ ps_codec->apf_inter_pred_luma[11] = ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3;
+ ps_codec->apf_inter_pred_luma[12] = ih264_inter_pred_luma_vert_qpel_ssse3;
+ ps_codec->apf_inter_pred_luma[13] = ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3;
+ ps_codec->apf_inter_pred_luma[14] = ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3;
+ ps_codec->apf_inter_pred_luma[15] = ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3;
+
+ ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_ssse3;
+
+
+ return;
+}
diff --git a/encoder.arm.mk b/encoder.arm.mk
new file mode 100755
index 0000000..81ed22f
--- /dev/null
+++ b/encoder.arm.mk
@@ -0,0 +1,47 @@
+libavce_inc_dir_arm += $(LOCAL_PATH)/encoder/arm
+libavce_inc_dir_arm += $(LOCAL_PATH)/common/arm
+
+libavce_cflags_arm += -DDISABLE_NEONINTR -DARM -DARMGCC
+
+libavce_srcs_c_arm += encoder/arm/ih264e_function_selector.c
+
+ifeq ($(ARCH_ARM_HAVE_NEON),true)
+libavce_srcs_c_arm += encoder/arm/ih264e_function_selector_a9q.c
+
+libavce_srcs_asm_arm += common/arm/ih264_resi_trans_quant_a9.s
+libavce_srcs_asm_arm += common/arm/ih264_iquant_itrans_recon_a9.s
+libavce_srcs_asm_arm += common/arm/ih264_iquant_itrans_recon_dc_a9.s
+libavce_srcs_asm_arm += common/arm/ih264_ihadamard_scaling_a9.s
+libavce_srcs_asm_arm += common/arm/ih264_deblk_chroma_a9.s
+libavce_srcs_asm_arm += common/arm/ih264_deblk_luma_a9.s
+libavce_srcs_asm_arm += common/arm/ih264_intra_pred_chroma_a9q.s
+libavce_srcs_asm_arm += common/arm/ih264_intra_pred_luma_16x16_a9q.s
+libavce_srcs_asm_arm += common/arm/ih264_intra_pred_luma_4x4_a9q.s
+libavce_srcs_asm_arm += common/arm/ih264_intra_pred_luma_8x8_a9q.s
+libavce_srcs_asm_arm += common/arm/ih264_inter_pred_chroma_a9q.s
+libavce_srcs_asm_arm += common/arm/ih264_inter_pred_filters_luma_horz_a9q.s
+libavce_srcs_asm_arm += common/arm/ih264_inter_pred_filters_luma_vert_a9q.s
+libavce_srcs_asm_arm += common/arm/ih264_inter_pred_luma_bilinear_a9q.s
+libavce_srcs_asm_arm += common/arm/ih264_inter_pred_luma_copy_a9q.s
+libavce_srcs_asm_arm += common/arm/ih264_padding_neon.s
+libavce_srcs_asm_arm += common/arm/ih264_mem_fns_neon.s
+libavce_srcs_asm_arm += common/arm/ih264_arm_memory_barrier.s
+
+libavce_srcs_asm_arm += encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s
+libavce_srcs_asm_arm += encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s
+libavce_srcs_asm_arm += encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s
+libavce_srcs_asm_arm += encoder/arm/ih264e_half_pel.s
+libavce_srcs_asm_arm += encoder/arm/ih264e_fmt_conv.s
+
+#ME
+libavce_srcs_asm_arm += encoder/arm/ime_distortion_metrics_a9q.s
+
+libavce_cflags_arm += -DDEFAULT_ARCH=D_ARCH_ARM_A9Q
+
+else #No Neon
+libavce_cflags_arm += -DDISABLE_NEON -DDEFAULT_ARCH=D_ARCH_ARM_NONEON
+endif #Neon check
+
+LOCAL_SRC_FILES_arm += $(libavce_srcs_c_arm) $(libavce_srcs_asm_arm)
+LOCAL_C_INCLUDES_arm += $(libavce_inc_dir_arm)
+LOCAL_CFLAGS_arm += $(libavce_cflags_arm)
diff --git a/encoder.arm64.mk b/encoder.arm64.mk
new file mode 100755
index 0000000..845b481
--- /dev/null
+++ b/encoder.arm64.mk
@@ -0,0 +1,48 @@
+libavce_cflags_arm64 += -DARMV8
+libavce_cflags_arm64 += -DDISABLE_NEONINTR -DARM -DARMGCC
+
+libavce_inc_dir_arm64 += $(LOCAL_PATH)/encoder/arm
+libavce_inc_dir_arm64 += $(LOCAL_PATH)/encoder/armv8
+libavce_inc_dir_arm64 += $(LOCAL_PATH)/common/armv8
+
+libavce_srcs_c_arm64 += encoder/arm/ih264e_function_selector.c
+
+ifeq ($(ARCH_ARM_HAVE_NEON),true)
+libavce_srcs_c_arm64 += encoder/arm/ih264e_function_selector_av8.c
+
+libavce_srcs_asm_arm64 += common/armv8/ih264_resi_trans_quant_av8.s
+libavce_srcs_asm_arm64 += common/armv8/ih264_iquant_itrans_recon_av8.s
+libavce_srcs_asm_arm64 += common/armv8/ih264_iquant_itrans_recon_dc_av8.s
+libavce_srcs_asm_arm64 += common/armv8/ih264_ihadamard_scaling_av8.s
+
+libavce_srcs_asm_arm64 += common/armv8/ih264_intra_pred_chroma_av8.s
+libavce_srcs_asm_arm64 += common/armv8/ih264_intra_pred_luma_16x16_av8.s
+libavce_srcs_asm_arm64 += common/armv8/ih264_intra_pred_luma_4x4_av8.s
+libavce_srcs_asm_arm64 += common/armv8/ih264_intra_pred_luma_8x8_av8.s
+libavce_srcs_asm_arm64 += common/armv8/ih264_inter_pred_luma_copy_av8.s
+libavce_srcs_asm_arm64 += common/armv8/ih264_inter_pred_chroma_av8.s
+libavce_srcs_asm_arm64 += common/armv8/ih264_inter_pred_filters_luma_horz_av8.s
+libavce_srcs_asm_arm64 += common/armv8/ih264_inter_pred_filters_luma_vert_av8.s
+libavce_srcs_asm_arm64 += common/armv8/ih264_padding_neon_av8.s
+libavce_srcs_asm_arm64 += common/armv8/ih264_mem_fns_neon_av8.s
+libavce_srcs_asm_arm64 += common/armv8/ih264_deblk_luma_av8.s
+libavce_srcs_asm_arm64 += common/armv8/ih264_deblk_chroma_av8.s
+
+libavce_srcs_asm_arm64 += encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s
+libavce_srcs_asm_arm64 += encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s
+libavce_srcs_asm_arm64 += encoder/armv8/ih264e_half_pel_av8.s
+
+#ME
+libavce_srcs_asm_arm64 += encoder/armv8/ime_distortion_metrics_av8.s
+
+libavce_cflags_arm += -DDEFAULT_ARCH=D_ARCH_ARMV8_GENERIC
+else
+libavce_cflags_arm64 += -DDISABLE_NEON -DDEFAULT_ARCH=D_ARCH_ARM_NONEON
+endif
+
+
+
+
+LOCAL_SRC_FILES_arm64 += $(libavce_srcs_c_arm64) $(libavce_srcs_asm_arm64)
+LOCAL_C_INCLUDES_arm64 += $(libavce_inc_dir_arm64)
+LOCAL_CFLAGS_arm64 += $(libavce_cflags_arm64)
diff --git a/encoder.mips.mk b/encoder.mips.mk
new file mode 100755
index 0000000..92ae5de
--- /dev/null
+++ b/encoder.mips.mk
@@ -0,0 +1,7 @@
+libavce_inc_dir_mips += $(LOCAL_PATH)/common/mips
+libavce_inc_dir_mips += $(LOCAL_PATH)/encoder/mips
+
+libavce_srcs_c_mips += encoder/mips/ih264e_function_selector.c
+
+LOCAL_C_INCLUDES_mips += $(libavce_inc_dir_mips)
+LOCAL_SRC_FILES_mips += $(libavce_srcs_c_mips)
diff --git a/encoder.mips64.mk b/encoder.mips64.mk
new file mode 100755
index 0000000..5181fd9
--- /dev/null
+++ b/encoder.mips64.mk
@@ -0,0 +1,7 @@
+libavce_inc_dir_mips64 += $(LOCAL_PATH)/common/mips
+libavce_inc_dir_mips64 += $(LOCAL_PATH)/encoder/mips
+
+libavce_srcs_c_mips64 += encoder/mips/ih264e_function_selector.c
+
+LOCAL_C_INCLUDES_mips64 += $(libavce_inc_dir_mips)
+LOCAL_SRC_FILES_mips64 += $(libavce_srcs_c_mips)
diff --git a/encoder.mk b/encoder.mk
new file mode 100755
index 0000000..5829118
--- /dev/null
+++ b/encoder.mk
@@ -0,0 +1,90 @@
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+libavce_source_dir := $(LOCAL_PATH)
+
+## Arch-common settings
+LOCAL_MODULE := libavcenc
+#LOCAL_32_BIT_ONLY := true
+
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+
+LOCAL_CFLAGS += -D_LIB -DMULTICORE -DANDROID -DNDEBUG -UHP_PL -DN_MB_ENABLE -URC_FIXED_POINT -fPIC
+LOCAL_CFLAGS += -O3 -DANDROID
+
+LOCAL_C_INCLUDES := $(LOCAL_PATH)/encoder $(LOCAL_PATH)/common
+
+libavce_srcs_c += common/ih264_resi_trans_quant.c
+libavce_srcs_c += common/ih264_iquant_itrans_recon.c
+libavce_srcs_c += common/ih264_ihadamard_scaling.c
+libavce_srcs_c += common/ih264_inter_pred_filters.c
+libavce_srcs_c += common/ih264_luma_intra_pred_filters.c
+libavce_srcs_c += common/ih264_chroma_intra_pred_filters.c
+libavce_srcs_c += common/ih264_padding.c
+libavce_srcs_c += common/ih264_mem_fns.c
+libavce_srcs_c += common/ih264_deblk_edge_filters.c
+libavce_srcs_c += common/ih264_deblk_tables.c
+libavce_srcs_c += common/ih264_cavlc_tables.c
+libavce_srcs_c += common/ih264_cabac_tables.c
+libavce_srcs_c += common/ih264_common_tables.c
+libavce_srcs_c += common/ih264_trans_data.c
+libavce_srcs_c += common/ih264_buf_mgr.c
+libavce_srcs_c += common/ih264_dpb_mgr.c
+libavce_srcs_c += common/ih264_list.c
+
+
+libavce_srcs_c += common/ithread.c
+
+libavce_srcs_c += encoder/ih264e_globals.c
+libavce_srcs_c += encoder/ih264e_intra_modes_eval.c
+libavce_srcs_c += encoder/ih264e_half_pel.c
+libavce_srcs_c += encoder/ih264e_mc.c
+libavce_srcs_c += encoder/ih264e_me.c
+libavce_srcs_c += encoder/ih264e_rc_mem_interface.c
+libavce_srcs_c += encoder/ih264e_time_stamp.c
+libavce_srcs_c += encoder/ih264e_modify_frm_rate.c
+libavce_srcs_c += encoder/ih264e_rate_control.c
+libavce_srcs_c += encoder/ih264e_core_coding.c
+libavce_srcs_c += encoder/ih264e_deblk.c
+libavce_srcs_c += encoder/ih264e_api.c
+libavce_srcs_c += encoder/ih264e_process.c
+libavce_srcs_c += encoder/ih264e_encode.c
+libavce_srcs_c += encoder/ih264e_utils.c
+libavce_srcs_c += encoder/ih264e_version.c
+libavce_srcs_c += encoder/ih264e_bitstream.c
+libavce_srcs_c += encoder/ih264e_cavlc.c
+libavce_srcs_c += encoder/ih264e_encode_header.c
+libavce_srcs_c += encoder/ih264e_function_selector_generic.c
+libavce_srcs_c += encoder/ih264e_fmt_conv.c
+
+#Rate Control
+libavce_srcs_c += encoder/irc_rate_control_api.c
+libavce_srcs_c += encoder/irc_bit_allocation.c
+libavce_srcs_c += encoder/irc_cbr_buffer_control.c
+libavce_srcs_c += encoder/irc_est_sad.c
+libavce_srcs_c += encoder/irc_fixed_point_error_bits.c
+libavce_srcs_c += encoder/irc_frame_info_collector.c
+libavce_srcs_c += encoder/irc_mb_model_based.c
+libavce_srcs_c += encoder/irc_picture_type.c
+libavce_srcs_c += encoder/irc_rd_model.c
+libavce_srcs_c += encoder/irc_vbr_storage_vbv.c
+libavce_srcs_c += encoder/irc_vbr_str_prms.c
+
+#ME files
+libavce_srcs_c += encoder/ime.c
+libavce_srcs_c += encoder/ime_distortion_metrics.c
+
+
+
+LOCAL_SRC_FILES := $(libavce_srcs_c) $(libavce_srcs_asm)
+
+
+# Load the arch-specific settings
+include $(LOCAL_PATH)/encoder.arm.mk
+include $(LOCAL_PATH)/encoder.arm64.mk
+include $(LOCAL_PATH)/encoder.x86.mk
+include $(LOCAL_PATH)/encoder.x86_64.mk
+include $(LOCAL_PATH)/encoder.mips.mk
+include $(LOCAL_PATH)/encoder.mips64.mk
+
+include $(BUILD_STATIC_LIBRARY)
diff --git a/encoder.x86.mk b/encoder.x86.mk
new file mode 100755
index 0000000..e9b6a5f
--- /dev/null
+++ b/encoder.x86.mk
@@ -0,0 +1,37 @@
+libavce_cflags_x86 += -DX86 -DDISABLE_AVX2 -m32 -msse4.2 -mno-avx -DDEFAULT_ARCH=D_ARCH_X86_SSE42
+
+libavce_inc_dir_x86 += $(LOCAL_PATH)/encoder/x86
+libavce_inc_dir_x86 += $(LOCAL_PATH)/common/x86
+
+libavce_srcs_c_x86 += encoder/x86/ih264e_function_selector.c
+libavce_srcs_c_x86 += encoder/x86/ih264e_function_selector_sse42.c
+libavce_srcs_c_x86 += encoder/x86/ih264e_function_selector_ssse3.c
+
+libavce_srcs_c_x86 += common/x86/ih264_iquant_itrans_recon_ssse3.c
+libavce_srcs_c_x86 += common/x86/ih264_iquant_itrans_recon_dc_ssse3.c
+libavce_srcs_c_x86 += common/x86/ih264_ihadamard_scaling_ssse3.c
+libavce_srcs_c_x86 += common/x86/ih264_inter_pred_filters_ssse3.c
+libavce_srcs_c_x86 += common/x86/ih264_mem_fns_ssse3.c
+libavce_srcs_c_x86 += common/x86/ih264_padding_ssse3.c
+libavce_srcs_c_x86 += common/x86/ih264_luma_intra_pred_filters_ssse3.c
+libavce_srcs_c_x86 += common/x86/ih264_chroma_intra_pred_filters_ssse3.c
+libavce_srcs_c_x86 += common/x86/ih264_deblk_chroma_ssse3.c
+libavce_srcs_c_x86 += common/x86/ih264_deblk_luma_ssse3.c
+libavce_srcs_c_x86 += common/x86/ih264_iquant_itrans_recon_sse42.c
+libavce_srcs_c_x86 += common/x86/ih264_ihadamard_scaling_sse42.c
+libavce_srcs_c_x86 += common/x86/ih264_resi_trans_quant_sse42.c
+libavce_srcs_c_x86 += common/x86/ih264_weighted_pred_sse42.c
+
+libavce_srcs_c_x86 += encoder/x86/ih264e_half_pel_ssse3.c
+libavce_srcs_c_x86 += encoder/x86/ih264e_intra_modes_eval_ssse3.c
+libavce_srcs_c_x86 += encoder/x86/ime_distortion_metrics_sse42.c
+
+
+
+
+
+
+LOCAL_SRC_FILES_x86 += $(libavce_srcs_c_x86) $(libavce_srcs_asm_x86)
+LOCAL_C_INCLUDES_x86 += $(libavce_inc_dir_x86)
+LOCAL_CFLAGS_x86 += $(libavce_cflags_x86)
+
diff --git a/encoder.x86_64.mk b/encoder.x86_64.mk
new file mode 100755
index 0000000..deb004b
--- /dev/null
+++ b/encoder.x86_64.mk
@@ -0,0 +1,35 @@
+libavce_cflags_x86_64 += -DX86 -DDISABLE_AVX2 -m64 -msse4.2 -mno-avx -DDEFAULT_ARCH=D_ARCH_X86_SSE42
+
+libavce_inc_dir_x86_64 += $(LOCAL_PATH)/encoder/x86
+libavce_inc_dir_x86_64 += $(LOCAL_PATH)/common/x86
+
+libavce_srcs_c_x86_64 += encoder/x86/ih264e_function_selector.c
+libavce_srcs_c_x86_64 += encoder/x86/ih264e_function_selector_sse42.c
+libavce_srcs_c_x86_64 += encoder/x86/ih264e_function_selector_ssse3.c
+
+libavce_srcs_c_x86_64 += common/x86/ih264_iquant_itrans_recon_ssse3.c
+libavce_srcs_c_x86_64 += common/x86/ih264_iquant_itrans_recon_dc_ssse3.c
+libavce_srcs_c_x86_64 += common/x86/ih264_ihadamard_scaling_ssse3.c
+libavce_srcs_c_x86_64 += common/x86/ih264_inter_pred_filters_ssse3.c
+libavce_srcs_c_x86_64 += common/x86/ih264_mem_fns_ssse3.c
+libavce_srcs_c_x86_64 += common/x86/ih264_padding_ssse3.c
+libavce_srcs_c_x86_64 += common/x86/ih264_luma_intra_pred_filters_ssse3.c
+libavce_srcs_c_x86_64 += common/x86/ih264_chroma_intra_pred_filters_ssse3.c
+libavce_srcs_c_x86_64 += common/x86/ih264_deblk_chroma_ssse3.c
+libavce_srcs_c_x86_64 += common/x86/ih264_deblk_luma_ssse3.c
+libavce_srcs_c_x86_64 += common/x86/ih264_iquant_itrans_recon_sse42.c
+libavce_srcs_c_x86_64 += common/x86/ih264_ihadamard_scaling_sse42.c
+libavce_srcs_c_x86_64 += common/x86/ih264_resi_trans_quant_sse42.c
+libavce_srcs_c_x86_64 += common/x86/ih264_weighted_pred_sse42.c
+
+libavce_srcs_c_x86_64 += encoder/x86/ih264e_half_pel_ssse3.c
+libavce_srcs_c_x86_64 += encoder/x86/ih264e_intra_modes_eval_ssse3.c
+libavce_srcs_c_x86_64 += encoder/x86/ime_distortion_metrics_sse42.c
+
+
+LOCAL_SRC_FILES_x86_64 += $(libavce_srcs_c_x86_64) $(libavce_srcs_asm_x86_64)
+LOCAL_C_INCLUDES_x86_64 += $(libavce_inc_dir_x86_64)
+LOCAL_CFLAGS_x86_64 += $(libavce_cflags_x86_64)
+
+
+
diff --git a/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s b/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s
new file mode 100755
index 0000000..fe0ce17
--- /dev/null
+++ b/encoder/arm/ih264e_evaluate_intra16x16_modes_a9q.s
@@ -0,0 +1,313 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+
+@/**
+@******************************************************************************
+@*
+@* @brief :Evaluate best intra 16x16 mode (among VERT, HORZ and DC )
+@* and do the prediction.
+@*
+@* @par Description
+@* This function evaluates first three 16x16 modes and compute corresponding sad
+@* and return the buffer predicted with best mode.
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@** @param[in] pu1_ngbr_pels_i16
+@* UWORD8 pointer to neighbouring pels
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] u4_n_avblty
+@* availability of neighbouring pixels
+@*
+@* @param[in] u4_intra_mode
+@* Pointer to the variable in which best mode is returned
+@*
+@* @param[in] pu4_sadmin
+@* Pointer to the variable in which minimum sad is returned
+@*
+@* @param[in] u4_valid_intra_modes
+@* Says what all modes are valid
+@*
+@*
+@* @return none
+@*
+@******************************************************************************
+@*/
+@
+@void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src,
+@ UWORD8 *pu1_ngbr_pels_i16,
+@ UWORD8 *pu1_dst,
+@ UWORD32 src_strd,
+@ UWORD32 dst_strd,
+@ WORD32 u4_n_avblty,
+@ UWORD32 *u4_intra_mode,
+@ WORD32 *pu4_sadmin,
+@ UWORD32 u4_valid_intra_modes)
+@
+.text
+.p2align 2
+
+ .global ih264e_evaluate_intra16x16_modes_a9q
+
+ih264e_evaluate_intra16x16_modes_a9q:
+
+@r0 = pu1_src,
+@r1 = pu1_ngbr_pels_i16,
+@r2 = pu1_dst,
+@r3 = src_strd,
+@r4 = dst_strd,
+@r5 = u4_n_avblty,
+@r6 = u4_intra_mode,
+@r7 = pu4_sadmin
+
+
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ ldr r5, [sp, #44]
+
+
+ vpush {d8-d15}
+ vld1.32 {q4}, [r1]!
+ sub r6, r1, #1
+ add r1, r1, #1
+ mov r10, #0
+ vld1.32 {q5}, [r1]!
+ mov r11, #0
+ mov r4, #0
+ @/* Left available ????
+ ands r7, r5, #01
+ movne r10, #1
+
+ @/* Top available ????
+ ands r8, r5, #04
+ lsl r9, r10, #3
+ movne r11, #1
+ lsl r12, r11, #3
+ adds r8, r9, r12
+
+
+ @/* None available :(
+ moveq r4, #128
+
+
+
+@/fINDING dc val*/
+ @----------------------
+ vaddl.u8 q15, d8, d9
+
+ vaddl.u8 q14, d10, d11
+
+ vadd.u16 q15, q14, q15
+ @ VLD1.32 {q2},[r0],r3;row 2
+ vadd.u16 d30, d31, d30
+ vpadd.u16 d30, d30
+ @ VLD1.32 {q3},[r0],r3 ;row 3
+ vpadd.u16 d30, d30
+ @---------------------
+
+
+ vmov.u16 r7, d30[0]
+ add r7, r7, r8
+ add r11, r11, #3
+ add r8, r10, r11
+
+ lsr r7, r8
+ add r7, r4, r7
+ vld1.32 {q0}, [r0], r3 @ source r0w 0
+ vdup.8 q15, r7 @dc val
+
+@/* computing SADs for all three modes*/
+ ldrb r7, [r6]
+ vdup.8 q10, r7 @/HORIZONTAL VALUE ROW=0;
+ @/vertical row 0;
+ vabdl.u8 q8, d0, d10
+ vabdl.u8 q9, d1, d11
+ sub r6, r6, #1
+ @/HORZ row 0;
+ vabdl.u8 q13, d0, d20
+ vabdl.u8 q14, d1, d21
+ mov r1, #15
+ @/dc row 0;
+ vabdl.u8 q11, d0, d30
+ vabdl.u8 q12, d1, d31
+
+
+loop:
+ vld1.32 {q1}, [r0], r3 @row i
+ @/dc row i;
+ vabal.u8 q11, d2, d30
+ ldrb r7, [r6]
+ vabal.u8 q12, d3, d31
+
+ @/vertical row i;
+ vabal.u8 q8, d2, d10
+ vdup.8 q10, r7 @/HORIZONTAL VALUE ROW=i;
+ sub r6, r6, #1
+ vabal.u8 q9, d3, d11
+
+ subs r1, r1, #1
+ @/HORZ row i;
+ vabal.u8 q13, d2, d20
+ vabal.u8 q14, d3, d21
+ bne loop
+
+ @------------------------------------------------------------------------------
+
+ vadd.i16 q9, q9, q8 @/VERT
+ vadd.i16 d18, d19, d18 @/VERT
+ vpaddl.u16 d18, d18 @/VERT
+ vadd.i16 q14, q13, q14 @/HORZ
+ vadd.i16 d28, d29, d28 @/HORZ
+ vpaddl.u32 d18, d18 @/VERT
+ vpaddl.u16 d28, d28 @/HORZ
+
+ vpaddl.u32 d28, d28 @/HORZ
+ vmov.u32 r8, d18[0] @ vert
+ vadd.i16 q12, q11, q12 @/DC
+ vmov.u32 r9, d28[0] @horz
+ mov r11, #1
+ vadd.i16 d24, d24, d25 @/DC
+ lsl r11 , #30
+
+ @-----------------------
+ ldr r0, [sp, #120] @ u4_valid_intra_modes
+ @--------------------------------------------
+ ands r7, r0, #01 @ vert mode valid????????????
+ moveq r8, r11
+ vpaddl.u16 d24, d24 @/DC
+
+ ands r6, r0, #02 @ horz mode valid????????????
+ moveq r9, r11
+ vpaddl.u32 d24, d24 @/DC
+
+ vmov.u32 r10, d24[0] @dc
+@--------------------------------
+ ldr r4, [sp, #104] @r4 = dst_strd,
+ ldr r7, [sp, #116] @r7 = pu4_sadmin
+@----------------------------------------------
+ ands r6, r0, #04 @ dc mode valid????????????
+ moveq r10, r11
+
+ @---------------------------
+ ldr r6, [sp, #112] @ R6 =MODE
+ @--------------------------
+
+ cmp r8, r9
+ bgt not_vert
+ cmp r8, r10
+ bgt do_dc
+
+ @/----------------------
+ @DO VERTICAL PREDICTION
+ str r8 , [r7] @MIN SAD
+ mov r8, #0
+ str r8 , [r6] @ MODE
+ vmov q15, q5
+
+ b do_dc_vert
+ @-----------------------------
+not_vert:
+ cmp r9, r10
+ bgt do_dc
+
+ @/----------------------
+ @DO HORIZONTAL
+ vdup.8 q5, d9[7] @0
+ str r9 , [r7] @MIN SAD
+ vdup.8 q6, d9[6] @1
+ mov r9, #1
+ vdup.8 q7, d9[5] @2
+ vst1.32 {d10, d11} , [r2], r4 @0
+ vdup.8 q8, d9[4] @3
+ str r9 , [r6] @ MODE
+ vdup.8 q9, d9[3] @4
+ vst1.32 {d12, d13} , [r2], r4 @1
+ vdup.8 q10, d9[2] @5
+ vst1.32 {d14, d15} , [r2], r4 @2
+ vdup.8 q11, d9[1] @6
+ vst1.32 {d16, d17} , [r2], r4 @3
+ vdup.8 q12, d9[0] @7
+ vst1.32 {d18, d19} , [r2], r4 @4
+ vdup.8 q13, d8[7] @8
+ vst1.32 {d20, d21} , [r2], r4 @5
+ vdup.8 q14, d8[6] @9
+ vst1.32 {d22, d23} , [r2], r4 @6
+ vdup.8 q15, d8[5] @10
+ vst1.32 {d24, d25} , [r2], r4 @7
+ vdup.8 q1, d8[4] @11
+ vst1.32 {d26, d27} , [r2], r4 @8
+ vdup.8 q2, d8[3] @12
+ vst1.32 {d28, d29} , [r2], r4 @9
+ vdup.8 q3, d8[2] @13
+ vst1.32 {d30, d31}, [r2], r4 @10
+ vdup.8 q5, d8[1] @14
+ vst1.32 {d2, d3} , [r2], r4 @11
+ vdup.8 q6, d8[0] @15
+ vst1.32 {d4, d5} , [r2], r4 @12
+
+ vst1.32 {d6, d7} , [r2], r4 @13
+
+ vst1.32 {d10, d11} , [r2], r4 @14
+
+ vst1.32 {d12, d13} , [r2], r4 @15
+ b end_func
+
+
+ @/-----------------------------
+
+do_dc: @/---------------------------------
+ @DO DC
+ str r10 , [r7] @MIN SAD
+ mov r10, #2
+ str r10 , [r6] @ MODE
+do_dc_vert:
+ vst1.32 {d30, d31}, [r2], r4 @0
+ vst1.32 {d30, d31}, [r2], r4 @1
+ vst1.32 {d30, d31}, [r2], r4 @2
+ vst1.32 {d30, d31}, [r2], r4 @3
+ vst1.32 {d30, d31}, [r2], r4 @4
+ vst1.32 {d30, d31}, [r2], r4 @5
+ vst1.32 {d30, d31}, [r2], r4 @6
+ vst1.32 {d30, d31}, [r2], r4 @7
+ vst1.32 {d30, d31}, [r2], r4 @8
+ vst1.32 {d30, d31}, [r2], r4 @9
+ vst1.32 {d30, d31}, [r2], r4 @10
+ vst1.32 {d30, d31}, [r2], r4 @11
+ vst1.32 {d30, d31}, [r2], r4 @12
+ vst1.32 {d30, d31}, [r2], r4 @13
+ vst1.32 {d30, d31}, [r2], r4 @14
+ vst1.32 {d30, d31}, [r2], r4 @15
+ @/------------------
+end_func:
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
diff --git a/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s b/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s
new file mode 100755
index 0000000..568e623
--- /dev/null
+++ b/encoder/arm/ih264e_evaluate_intra4x4_modes_a9q.s
@@ -0,0 +1,529 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+
+@/**
+
+.data
+.p2align 2
+
+scratch_intrapred_luma_4x4_prediction:
+ .long ver, hor, d_c, dia_dl
+ .long dia_dr, ver_r, hor_d, ver_l
+ .long hor_u
+
+
+.text
+.p2align 2
+
+scratch_intrapred_luma_4x4_prediction_addr1:
+ .long scratch_intrapred_luma_4x4_prediction - scrintra_4x4 - 8
+
+
+
+@/**
+@/**
+@******************************************************************************
+@*
+@* @brief :Evaluate best intra 4x4 mode
+@* and do the prediction.
+@*
+@* @par Description
+@* This function evaluates 4x4 modes and compute corresponding sad
+@* and return the buffer predicted with best mode.
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@** @param[in] pu1_ngbr_pels
+@* UWORD8 pointer to neighbouring pels
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] u4_n_avblty
+@* availability of neighbouring pixels
+@*
+@* @param[in] u4_intra_mode
+@* Pointer to the variable in which best mode is returned
+@*
+@* @param[in] pu4_sadmin
+@* Pointer to the variable in which minimum cost is returned
+@*
+@* @param[in] u4_valid_intra_modes
+@* Says what all modes are valid
+@*
+@* * @param[in] u4_lambda
+@* Lamda value for computing cost from SAD
+@*
+@* @param[in] u4_predictd_mode
+@* Predicted mode for cost computation
+@*
+@*
+@*
+@* @return none
+@*
+@******************************************************************************
+@*/
+@void ih264e_evaluate_intra_4x4_modes(UWORD8 *pu1_src,
+@ UWORD8 *pu1_ngbr_pels,
+@ UWORD8 *pu1_dst,
+@ UWORD32 src_strd,
+@ UWORD32 dst_strd,
+@ WORD32 u4_n_avblty,
+@ UWORD32 *u4_intra_mode,
+@ WORD32 *pu4_sadmin,
+@ UWORD32 u4_valid_intra_modes,
+@ UWORD32 u4_lambda,
+@ UWORD32 u4_predictd_mode)
+
+
+
+ .global ih264e_evaluate_intra_4x4_modes_a9q
+
+ih264e_evaluate_intra_4x4_modes_a9q:
+
+@r0 = pu1_src,
+@r1 = pu1_ngbr_pels_i16,
+@r2 = pu1_dst,
+@r3 = src_strd,
+@r4 = dst_strd,
+@r5 = u4_n_avblty,
+@r6 = u4_intra_mode,
+@r7 = pu4_sadmin
+@r8 = u4_valid_intra_modes
+@r0 =u4_lambda
+@r1 = u4_predictd_mode
+
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+
+@--------------------
+ ldr r5, [sp, #44] @r5 = u4_n_avblty,
+@----------------------
+ vpush {d8-d15}
+@Loading neighbours
+ vld1.32 {q0}, [r1]
+ add r4, r1, #12
+ vld1.8 d1[5], [r4]
+ vld1.8 d1[7], [r1]
+ @--------------------------------
+ ldr r8, [sp, #120] @u4_valid_intra_modes
+@----------------------------------------------
+
+
+
+@ LOADING pu1_src
+ vld1.32 {d20[0]}, [r0], r3
+ vext.8 q1, q0, q0, #1
+ vld1.32 {d20[1]}, [r0], r3
+ mov r11, #1
+ vld1.32 {d21[0]}, [r0], r3
+ lsl r11, r11, #30
+ vld1.32 {d21[1]}, [r0], r3
+
+
+
+@--------------------------------
+ ldr r0, [sp, #124] @r0 =u4_lambda
+ ldr r1, [sp, #128] @r1 = u4_predictd_mode
+@------
+
+
+vert:
+ ands r10, r8, #01 @VERT sad ??
+ beq horz
+ vdup.32 q2, d2[1]
+ vabdl.u8 q14, d4, d20
+ vabal.u8 q14, d4, d21
+ vadd.i16 d28, d29, d28
+ subs r6, r1, #0
+ vpaddl.u16 d28, d28 @
+ lslne r6, r0, #2
+ vpaddl.u32 d28, d28 @/
+ moveq r6, r0 @
+ vmov.u32 r9, d28[0] @ vert
+ add r9, r6, r9
+
+ subs r6, r11, r9
+ movgt r11, r9
+ movgt r12, #0
+
+horz:
+ ands r10, r8, #02 @HORZ sad ??
+ beq dc
+ vdup.32 q3, d0[0]
+ vmov.32 q4, q3
+ vtrn.8 q3, q4
+ vtrn.16 d7, d6
+ vtrn.16 d9, d8
+ vtrn.32 d9, d7
+ vtrn.32 d8, d6
+ vabdl.u8 q14, d6, d20
+ subs r6, r1, #1
+ vabal.u8 q14, d7, d21
+ vadd.i16 d28, d29, d28
+ lslne r6, r0, #2
+ vpaddl.u16 d28, d28 @
+ vpaddl.u32 d28, d28 @/
+ vmov.u32 r9, d28[0] @
+ moveq r6, r0 @
+ add r9, r6, r9
+
+ subs r6, r11, r9
+ movgt r11, r9
+ movgt r12, #1
+
+dc:
+ ands r10, r8, #04 @DC sad ??
+ beq diags
+ vext.8 q4, q0, q0, #5
+ vaddl.u8 q4, d0, d8
+ vpaddl.u16 d8, d8 @
+ vpaddl.u32 d8, d8 @/
+ vmov.u32 r4, d8[0] @
+ mov r14, #1
+ ands r10, r5, #1
+ addne r4, r4, #2
+ addne r14, r14, #1
+ ands r10, r5, #4
+ addne r4, r4, #2
+ addne r14, r14, #1
+ ands r10, r5, #5
+ moveq r4, #128
+ moveq r14, #0
+ subs r6, r1, #2
+ lsr r4, r4, r14
+ vdup.8 q4, r4
+ lslne r6, r0, #2
+ vabdl.u8 q14, d8, d20
+ vabal.u8 q14, d9, d21
+ vadd.i16 d28, d29, d28
+ vpaddl.u16 d28, d28 @
+ vpaddl.u32 d28, d28 @/
+ vmov.u32 r9, d28[0] @
+
+ moveq r6, r0 @
+ add r9, r6, r9
+
+ subs r6, r11, r9
+ movgt r11, r9
+ movgt r12, #2
+
+diags:
+ ands r10, r8, #504 @/* if modes other than VERT, HORZ and DC are valid ????*/
+ beq pred
+ @/* Performing FILT11 and FILT121 operation for all neighbour values*/
+ vext.8 q5, q0, q0, #2
+ vaddl.u8 q6, d0, d2
+ vaddl.u8 q7, d1, d3
+ vaddl.u8 q8, d10, d2
+ vaddl.u8 q9, d11, d3
+ vadd.u16 q12, q10, q11
+ vqrshrun.s16 d10, q6, #1
+ vqrshrun.s16 d11, q7, #1
+ vadd.u16 q11, q6, q8
+ vadd.u16 q12, q7, q9
+ vqrshrun.s16 d12, q11, #2
+ vqrshrun.s16 d13, q12, #2
+ mov r14, #0
+ vdup.32 q13 , r14
+ mov r14, #-1
+ vmov.i32 d26[0], r14
+
+diag_dl:
+ ands r10, r8, #0x08 @DIAG_DL sad ??
+ beq diag_dr
+
+ vext.8 q15, q6, q6, #5
+ vbit.32 d14, d30, d26
+ vext.8 q15, q6, q6, #15
+ vbit.32 d15, d31, d26
+ vext.8 q15, q6, q6, #2
+ vext.32 q14, q13, q13, #3
+ vbit.32 d14, d30, d28
+ vext.8 q15, q6, q6, #4
+ vbit.32 d15, d30, d28
+ vabdl.u8 q14, d14, d20
+ subs r6, r1, #3
+ vabal.u8 q14, d15, d21
+ vadd.i16 d28, d29, d28
+ vpaddl.u16 d28, d28 @
+ lslne r6, r0, #2
+ vpaddl.u32 d28, d28 @/
+ vmov.u32 r9, d28[0] @
+
+ moveq r6, r0 @
+ add r9, r6, r9
+
+ subs r6, r11, r9
+ movgt r11, r9
+ movgt r12, #3
+
+diag_dr:
+ ands r10, r8, #16 @DIAG_DR sad ??
+ beq vert_r
+
+ vext.8 q15, q6, q6, #3
+ vbit.32 d16, d30, d26
+ vext.8 q15, q6, q6, #1
+ vbit.32 d17, d30, d26
+ vext.8 q15, q6, q6, #4
+ vext.32 q14, q13, q13, #3
+ vbit.32 d17, d31, d28
+ vext.8 q15, q6, q6, #6
+ vbit.32 d16, d31, d28
+ vabdl.u8 q14, d16, d20
+ subs r6, r1, #4
+ vabal.u8 q14, d17, d21
+ vadd.i16 d28, d29, d28
+ vpaddl.u16 d28, d28 @
+ lslne r6, r0, #2
+ vpaddl.u32 d28, d28 @/
+ vmov.u32 r9, d28[0] @
+
+ moveq r6, r0 @
+ add r9, r6, r9
+
+ subs r6, r11, r9
+ movgt r11, r9
+ movgt r12, #4
+
+vert_r:
+ ands r10, r8, #32 @VERT_R sad ??
+ beq horz_d
+ vext.8 q15, q5, q5, #4
+ vbit.32 d18, d30, d26
+ vext.8 q15, q5, q5, #3
+ vbit.32 d19, d30, d26
+ vext.32 q14, q13, q13, #3
+ vext.8 q15, q6, q6, #15
+ vbit.32 d18, d30, d28
+ vext.8 q15, q6, q6, #14
+ vbit.32 d19, d30, d28
+ mov r14, #0
+ vdup.32 q14 , r14
+ mov r14, #0xff
+ vmov.i8 d28[0], r14
+ vext.8 q15, q6, q6, #2
+ vbit.32 d19, d30, d28
+ vext.32 q14, q14, q14, #3
+ subs r6, r1, #5
+ vext.8 q15, q6, q6, #13
+ vbit.32 d19, d30, d28
+ lslne r6, r0, #2
+ vabdl.u8 q14, d18, d20
+ vabal.u8 q14, d19, d21
+ vadd.i16 d28, d29, d28
+ vpaddl.u16 d28, d28 @
+ vpaddl.u32 d28, d28 @/
+ vmov.u32 r9, d28[0] @
+
+
+ moveq r6, r0 @
+ add r9, r6, r9
+
+ subs r6, r11, r9
+ movgt r11, r9
+ movgt r12, #5
+
+horz_d:
+ vmov.8 q1, q5
+ vmov.8 q15, q6
+ vzip.8 q1, q15
+
+ ands r10, r8, #64 @HORZ_D sad ??
+ beq vert_l
+ vext.8 q15, q6, q6, #2
+ vbit.32 d8, d30, d26
+ mov r14, #0
+ vdup.32 q14 , r14
+ mov r14, #0xff
+ vmov.i8 d28[0], r14
+ vext.8 q15, q5, q5, #3
+ vbit.32 d8, d30, d28
+ vext.8 q15, q1, q1, #2
+ vbit.32 d9, d30, d26
+ vext.32 q14, q13, q13, #3
+ vbit.32 d8, d2, d28
+ subs r6, r1, #6
+ vext.8 q15, q1, q1, #12
+ vbit.32 d9, d30, d28
+ vabdl.u8 q14, d8, d20
+ vabal.u8 q14, d9, d21
+ vadd.i16 d28, d29, d28
+ vpaddl.u16 d28, d28 @
+ lslne r6, r0, #2
+ vpaddl.u32 d28, d28 @/
+ vmov.u32 r9, d28[0] @
+
+
+ moveq r6, r0 @
+ add r9, r6, r9
+
+ subs r6, r11, r9
+ movgt r11, r9
+ movgt r12, #6
+vert_l:
+ ands r10, r8, #128 @VERT_L sad ??
+ beq horz_u
+ vext.8 q15, q5, q5, #5
+ vbit.32 d24, d30, d26
+ vext.8 q15, q15, q15, #1
+ vbit.32 d25, d30, d26
+ vext.8 q15, q6, q6, #1
+ vext.32 q14, q13, q13, #3
+ vbit.32 d24, d30, d28
+ vext.8 q15, q15, q15, #1
+ subs r6, r1, #7
+ vbit.32 d25, d30, d28
+ vabdl.u8 q14, d24, d20
+ vabal.u8 q14, d25, d21
+ vadd.i16 d28, d29, d28
+ vpaddl.u16 d28, d28 @
+ lslne r6, r0, #2
+ vpaddl.u32 d28, d28 @/
+ vmov.u32 r9, d28[0] @
+
+ moveq r6, r0 @
+ add r9, r6, r9
+
+ subs r6, r11, r9
+ movgt r11, r9
+ movgt r12, #7
+
+horz_u:
+ ands r10, r8, #256 @HORZ_U sad ??
+ beq pred
+ vrev64.8 q5, q1
+ vdup.8 q1, d0[0]
+ vext.8 q6, q6, #7
+ mov r14, #0
+ vdup.32 q14 , r14
+ mov r14, #0xff
+ vmov.i8 d28[0], r14
+ vbit.32 d11, d13, d28
+ movw r14, #0xffff
+ vmov.i16 d28[0], r14
+ vext.8 q6, q5, q5, #7
+ subs r6, r1, #8
+ vbit.32 d3, d12, d28
+ vext.8 q6, q5, q5, #3
+ vbit.32 d2, d12, d26
+ vext.32 q14, q13, q13, #3
+ vext.8 q6, q5, q5, #1
+ vbit.32 d2, d12, d28
+ vabdl.u8 q14, d2, d20
+ vabal.u8 q14, d3, d21
+ vadd.i16 d28, d29, d28
+ vpaddl.u16 d28, d28 @
+ lslne r6, r0, #2
+ vpaddl.u32 d28, d28 @/
+ vmov.u32 r9, d28[0] @
+
+
+ moveq r6, r0 @
+ add r9, r6, r9
+
+ subs r6, r11, r9
+ movgt r11, r9
+ movgt r12, #8
+
+pred: @/*dOING FINAL PREDICTION*/
+@---------------------------
+ ldr r7, [sp, #116] @r7 = pu4_sadmin
+ ldr r6, [sp, #112] @ R6 =MODE
+@--------------------------
+ str r11, [r7] @/STORING MIN SAD*/
+ str r12, [r6] @/FINAL MODE*/
+
+
+ ldr r3, scratch_intrapred_luma_4x4_prediction_addr1
+scrintra_4x4:
+ add r3, r3, pc
+ lsl r12, r12, #2
+ add r3, r3, r12
+
+ ldr r5, [r3]
+ and r5, r5, #0xfffffffe
+
+ bx r5
+
+
+ver:
+ vext.8 q0, q0, q0, #1
+ vdup.32 q15, d0[1]
+ b store
+
+hor:
+ vmov.32 q15, q3
+ b store
+
+d_c:
+ vdup.8 q15, r4
+ b store
+
+dia_dl:
+ vmov.32 q15, q7
+ b store
+
+dia_dr:
+ vmov.32 q15, q8
+ b store
+
+ver_r:
+ vmov.32 q15, q9
+ b store
+
+hor_d:
+ vmov.32 q15, q4
+ b store
+
+ver_l:
+ vmov.32 q15, q12
+ b store
+
+hor_u:
+ vmov.32 q15, q1
+
+store: @/* storing to pu1_dst*/
+
+ ldr r4, [sp, #104] @r4 = dst_strd,
+
+ vst1.32 {d30[0]}, [r2], r4
+ vst1.32 {d30[1]}, [r2], r4
+ vst1.32 {d31[0]}, [r2], r4
+ vst1.32 {d31[1]}, [r2], r4
+
+
+end_func:
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
+
+
diff --git a/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s b/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s
new file mode 100755
index 0000000..e4dfca8
--- /dev/null
+++ b/encoder/arm/ih264e_evaluate_intra_chroma_modes_a9q.s
@@ -0,0 +1,346 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+
+@/**
+@******************************************************************************
+@*
+@* @brief :Evaluate best intr chroma mode (among VERT, HORZ and DC )
+@* and do the prediction.
+@*
+@* @par Description
+@* This function evaluates first three intra chroma modes and compute corresponding sad
+@* and return the buffer predicted with best mode.
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@** @param[in] pu1_ngbr_pels
+@* UWORD8 pointer to neighbouring pels
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] u4_n_avblty
+@* availability of neighbouring pixels
+@*
+@* @param[in] u4_intra_mode
+@* Pointer to the variable in which best mode is returned
+@*
+@* @param[in] pu4_sadmin
+@* Pointer to the variable in which minimum sad is returned
+@*
+@* @param[in] u4_valid_intra_modes
+@* Says what all modes are valid
+@*
+@*
+@* @return none
+@*
+@******************************************************************************
+@*/
+@
+@void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src,
+@ UWORD8 *pu1_ngbr_pels_i16,
+@ UWORD8 *pu1_dst,
+@ UWORD32 src_strd,
+@ UWORD32 dst_strd,
+@ WORD32 u4_n_avblty,
+@ UWORD32 *u4_intra_mode,
+@ WORD32 *pu4_sadmin,
+@ UWORD32 u4_valid_intra_modes)
+@
+.text
+.p2align 2
+
+ .global ih264e_evaluate_intra_chroma_modes_a9q
+
+ih264e_evaluate_intra_chroma_modes_a9q:
+
+@r0 = pu1_src,
+@r1 = pu1_ngbr_pels_i16,
+@r2 = pu1_dst,
+@r3 = src_strd,
+@r4 = dst_strd,
+@r5 = u4_n_avblty,
+@r6 = u4_intra_mode,
+@r7 = pu4_sadmin
+
+
+
+ stmfd sp!, {r4-r12, r14} @store register values to stack
+ @-----------------------
+ ldr r5, [sp, #44] @r5 = u4_n_avblty,
+ @-------------------------
+ mov r12, r1 @
+ vpush {d8-d15}
+ vld1.32 {q4}, [r1]!
+ add r1, r1, #2
+ vld1.32 {q5}, [r1]!
+
+ vuzp.u8 q4, q5 @
+
+ vpaddl.u8 d8, d8
+ vpadd.u16 d8, d8
+
+ vpaddl.u8 d9, d9
+ vpadd.u16 d9, d9
+
+ vpaddl.u8 d10, d10
+ vpadd.u16 d10, d10
+
+ vpaddl.u8 d11, d11
+
+ and r7, r5, #5
+ vpadd.u16 d11, d11
+ subs r8, r7, #5
+ beq all_available
+ subs r8, r7, #4
+ beq top_available
+ subs r8, r7, #1
+ beq left_available
+ mov r10, #128
+ vdup.8 q14, r10
+ vdup.8 q15, r10
+ b sad
+
+all_available:
+ vzip.u16 q4, q5
+ vext.16 q6, q4, q4, #2
+ vadd.u16 q7, q5, q6
+ vqrshrn.u16 d14, q7, #3
+ vqrshrn.u16 d15, q4, #2
+ vqrshrn.u16 d16, q5, #2
+ vdup.16 d28, d14[0]
+ vdup.16 d29, d16[1]
+ vdup.16 d30, d15[0]
+ vdup.16 d31, d14[1]
+ b sad
+top_available:
+ vzip.u16 q4, q5
+ vqrshrn.u16 d16, q5, #2
+ vdup.16 d28, d16[0]
+ vdup.16 d29, d16[1]
+ vdup.16 d30, d16[0]
+ vdup.16 d31, d16[1]
+ b sad
+left_available:
+ vzip.u16 q4, q5
+ vqrshrn.u16 d16, q4, #2
+ vdup.16 d28, d16[3]
+ vdup.16 d29, d16[3]
+ vdup.16 d30, d16[2]
+ vdup.16 d31, d16[2]
+
+
+sad:
+ vld1.32 {q4}, [r12]!
+ sub r8, r12, #2
+ add r12, r12, #2
+ vld1.32 {q5}, [r12]!
+ add r12, r0, r3, lsl #2
+ sub r10, r8, #8
+ vld1.32 {q0}, [r0], r3
+ ldrh r9, [r8]
+ vdup.16 q10, r9 @ row 0
+
+ @/vertical row 0;
+ vabdl.u8 q8, d0, d10
+ vabdl.u8 q9, d1, d11
+ sub r8, r8, #2
+ vld1.32 {q1}, [r12], r3
+
+ @/HORZ row 0;
+ vabdl.u8 q13, d0, d20
+ vabdl.u8 q7, d1, d21
+ ldrh r9, [r10]
+ @/dc row 0;
+ vabdl.u8 q11, d0, d28
+ vabdl.u8 q12, d1, d29
+
+
+ vdup.16 q10, r9 @ row 4
+ @/vertical row 4;
+ vabal.u8 q8, d2, d10
+ vabal.u8 q9, d3, d11
+ sub r10, r10, #2
+
+ @/HORZ row 4;
+ vabal.u8 q13, d2, d20
+ vabal.u8 q7, d3, d21
+ @/dc row 4;
+ vabal.u8 q11, d2, d30
+ vabal.u8 q12, d3, d31
+
+ mov r11, #3
+
+loop:
+ vld1.32 {q0}, [r0], r3
+ ldrh r9, [r8]
+
+
+ @/vertical row i;
+ vabal.u8 q8, d0, d10
+ vabal.u8 q9, d1, d11
+
+ vdup.16 q10, r9 @ row i
+ vld1.32 {q1}, [r12], r3
+ sub r8, r8, #2
+ @/HORZ row i;
+ vabal.u8 q13, d0, d20
+ vabal.u8 q7, d1, d21
+ ldrh r9, [r10]
+ @/dc row i;
+ vabal.u8 q11, d0, d28
+ vabal.u8 q12, d1, d29
+ sub r10, r10, #2
+
+ vdup.16 q10, r9 @ row i+4
+ @/vertical row 4;
+ vabal.u8 q8, d2, d10
+ vabal.u8 q9, d3, d11
+ subs r11, r11, #1
+
+ @/HORZ row i+4;
+ vabal.u8 q13, d2, d20
+ vabal.u8 q7, d3, d21
+ @/dc row i+4;
+ vabal.u8 q11, d2, d30
+ vabal.u8 q12, d3, d31
+ bne loop
+
+
+
+@-------------------------------------------
+
+ vadd.i16 q9, q9, q8 @/VERT
+ vadd.i16 q7, q13, q7 @/HORZ
+ vadd.i16 q12, q11, q12 @/DC
+ vadd.i16 d18, d19, d18 @/VERT
+ vadd.i16 d14, d15, d14 @/HORZ
+ vadd.i16 d24, d24, d25 @/DC
+ vpaddl.u16 d18, d18 @/VERT
+ vpaddl.u16 d14, d14 @/HORZ
+ vpaddl.u16 d24, d24 @/DC
+ vpaddl.u32 d18, d18 @/VERT
+ vpaddl.u32 d14, d14 @/HORZ
+ vpaddl.u32 d24, d24 @/DC
+
+
+
+ vmov.u32 r8, d18[0] @ vert
+ vmov.u32 r9, d14[0] @horz
+ vmov.u32 r10, d24[0] @dc
+
+ mov r11, #1
+@-----------------------
+ ldr r0, [sp, #120] @ u4_valid_intra_modes
+@--------------------------------------------
+
+
+ lsl r11 , #30
+
+ ands r7, r0, #04 @ vert mode valid????????????
+ moveq r8, r11
+
+ ands r6, r0, #02 @ horz mode valid????????????
+ moveq r9, r11
+
+ ands r6, r0, #01 @ dc mode valid????????????
+ moveq r10, r11
+
+
+ @---------------------------
+ ldr r4, [sp, #104] @r4 = dst_strd,
+ ldr r6, [sp, #112] @ R6 =MODE
+ ldr r7, [sp, #116] @r7 = pu4_sadmin
+
+ @--------------------------
+
+ cmp r10, r9
+ bgt not_dc
+ cmp r10, r8
+ bgt do_vert
+
+ @/----------------------
+ @DO DC PREDICTION
+ str r10 , [r7] @MIN SAD
+ mov r10, #0
+ str r10 , [r6] @ MODE
+ b do_dc_vert
+ @-----------------------------
+
+not_dc:
+ cmp r9, r8
+ bgt do_vert
+ @/----------------------
+ @DO HORIZONTAL
+
+ vdup.16 q10, d9[3] @/HORIZONTAL VALUE ROW=0;
+ str r9 , [r7] @MIN SAD
+ mov r9, #1
+ vdup.16 q11, d9[2] @/HORIZONTAL VALUE ROW=1;
+ str r9 , [r6] @ MODE
+ vdup.16 q12, d9[1] @/HORIZONTAL VALUE ROW=2;
+ vst1.32 {d20, d21} , [r2], r4 @0
+ vdup.16 q13, d9[0] @/HORIZONTAL VALUE ROW=3;
+ vst1.32 {d22, d23} , [r2], r4 @1
+ vdup.16 q14, d8[3] @/HORIZONTAL VALUE ROW=4;
+ vst1.32 {d24, d25} , [r2], r4 @2
+ vdup.16 q15, d8[2] @/HORIZONTAL VALUE ROW=5;
+ vst1.32 {d26, d27} , [r2], r4 @3
+ vdup.16 q1, d8[1] @/HORIZONTAL VALUE ROW=6;
+ vst1.32 {d28, d29} , [r2], r4 @4
+ vdup.16 q2, d8[0] @/HORIZONTAL VALUE ROW=7;
+ vst1.32 {d30, d31} , [r2], r4 @5
+ vst1.32 {d2, d3} , [r2], r4 @6
+ vst1.32 {d4, d5} , [r2], r4 @7
+ b end_func
+
+do_vert:
+ @DO VERTICAL PREDICTION
+ str r8 , [r7] @MIN SAD
+ mov r8, #2
+ str r8 , [r6] @ MODE
+ vmov q15, q5
+ vmov q14, q5
+
+do_dc_vert:
+ vst1.32 {d28, d29} , [r2], r4 @0
+ vst1.32 {d28, d29} , [r2], r4 @1
+ vst1.32 {d28, d29} , [r2], r4 @2
+ vst1.32 {d28, d29} , [r2], r4 @3
+ vst1.32 {d30, d31} , [r2], r4 @4
+ vst1.32 {d30, d31} , [r2], r4 @5
+ vst1.32 {d30, d31} , [r2], r4 @6
+ vst1.32 {d30, d31} , [r2], r4 @7
+
+
+end_func:
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
+
+
+
diff --git a/encoder/arm/ih264e_fmt_conv.s b/encoder/arm/ih264e_fmt_conv.s
new file mode 100755
index 0000000..2bf1479
--- /dev/null
+++ b/encoder/arm/ih264e_fmt_conv.s
@@ -0,0 +1,329 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+
+.text
+.p2align 2
+@/**
+
+@/*****************************************************************************
+@* *
+@* Function Name : IH264D_CXA8_YUV420toYUV420SP_UV() *
+@* *
+@* Description : This function conversts the image from YUV420P color *
+@* space to 420SP color space(UV interleaved). *
+@* *
+@* Arguments : R0 pu1_y *
+@* R1 pu1_u *
+@* R2 pu1_v *
+@* R3 pu1_dest_y *
+@* [R13 #40] pu1_dest_uv *
+@* [R13 #44] u2_height *
+@* [R13 #48] u2_width *
+@* [R13 #52] u2_stridey *
+@* [R13 #56] u2_strideu *
+@* [R13 #60] u2_stridev *
+@* [R13 #64] u2_dest_stride_y *
+@* [R13 #68] u2_dest_stride_uv *
+@* [R13 #72] convert_uv_only *
+@* *
+@* Values Returned : None *
+@* *
+@* Register Usage : R0 - R14 *
+@* *
+@* Stack Usage : 40 Bytes *
+@* *
+@* Interruptibility : Interruptible *
+@* *
+@* Known Limitations *
+@* Assumptions: Image Width: Assumed to be multiple of 16 and *
+@* greater than or equal to 16 *
+@* Image Height: Assumed to be even. *
+@* *
+@* Revision History : *
+@* DD MM YYYY Author(s) Changes (Describe the changes made) *
+@* 07 06 2010 Varshita Draft *
+@* 07 06 2010 Naveen Kr T Completed *
+@* *
+@*****************************************************************************/
+ .global ih264e_fmt_conv_420p_to_420sp_a9q
+
+ih264e_fmt_conv_420p_to_420sp_a9q:
+
+ @// push the registers on the stack
+ stmfd sp!, {r4-r12, lr}
+
+ ldr r4, [sp, #72] @// Load convert_uv_only
+
+ cmp r4, #1
+ beq yuv420sp_uv_chroma
+ @/* Do the preprocessing before the main loops start */
+ @// Load the parameters from stack
+ ldr r4, [sp, #44] @// Load u2_height from stack
+ ldr r5, [sp, #48] @// Load u2_width from stack
+ ldr r7, [sp, #52] @// Load u2_stridey from stack
+ ldr r8, [sp, #64] @// Load u2_dest_stride_y from stack
+ sub r7, r7, r5 @// Source increment
+ sub r8, r8, r5 @// Destination increment
+
+ vpush {d8-d15}
+yuv420sp_uv_row_loop_y:
+ mov r6, r5
+
+yuv420sp_uv_col_loop_y:
+ pld [r0, #128]
+ vld1.8 {d0, d1}, [r0]!
+ vst1.8 {d0, d1}, [r3]!
+ sub r6, r6, #16
+ cmp r6, #15
+ bgt yuv420sp_uv_col_loop_y
+
+ cmp r6, #0
+ beq yuv420sp_uv_row_loop_end_y
+ @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
+ @//Ex if width is 162, above loop will process 160 pixels. And
+ @//Both source and destination will point to 146th pixel and then 16 bytes will be read
+ @// and written using VLD1 and VST1
+ rsb r6, r6, #16
+ sub r0, r0, r6
+ sub r3, r3, r6
+
+ vld1.8 {d0, d1}, [r0]!
+ vst1.8 {d0, d1}, [r3]!
+
+yuv420sp_uv_row_loop_end_y:
+ add r0, r0, r7
+ add r3, r3, r8
+ subs r4, r4, #1
+ bgt yuv420sp_uv_row_loop_y
+
+yuv420sp_uv_chroma:
+
+ ldr r3, [sp, #40] @// Load pu1_dest_uv from stack
+
+ ldr r4, [sp, #44] @// Load u2_height from stack
+
+ ldr r5, [sp, #48] @// Load u2_width from stack
+
+
+ ldr r7, [sp, #56] @// Load u2_strideu from stack
+
+ ldr r8, [sp, #68] @// Load u2_dest_stride_uv from stack
+
+ sub r7, r7, r5, lsr #1 @// Source increment
+
+ sub r8, r8, r5 @// Destination increment
+
+ mov r5, r5, lsr #1
+ mov r4, r4, lsr #1
+ ldr r3, [sp, #40] @// Load pu1_dest_uv from stack
+ vpush {d8-d15}
+yuv420sp_uv_row_loop_uv:
+ mov r6, r5
+
+
+yuv420sp_uv_col_loop_uv:
+ pld [r1, #128]
+ pld [r2, #128]
+ vld1.8 d0, [r1]!
+ vld1.8 d1, [r2]!
+ vst2.8 {d0, d1}, [r3]!
+ sub r6, r6, #8
+ cmp r6, #7
+ bgt yuv420sp_uv_col_loop_uv
+
+ cmp r6, #0
+ beq yuv420sp_uv_row_loop_end_uv
+ @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
+ @//Ex if width is 162, above loop will process 160 pixels. And
+ @//Both source and destination will point to 146th pixel and then 16 bytes will be read
+ @// and written using VLD1 and VST1
+ rsb r6, r6, #8
+ sub r1, r1, r6
+ sub r2, r2, r6
+ sub r3, r3, r6, lsl #1
+
+ vld1.8 d0, [r1]!
+ vld1.8 d1, [r2]!
+ vst2.8 {d0, d1}, [r3]!
+
+yuv420sp_uv_row_loop_end_uv:
+ add r1, r1, r7
+ add r2, r2, r7
+ add r3, r3, r8
+ subs r4, r4, #1
+ bgt yuv420sp_uv_row_loop_uv
+ @//POP THE REGISTERS
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, pc}
+
+
+
+
+
+@ /**
+@ *******************************************************************************
+@ *
+@ * @brief ih264e_fmt_conv_422i_to_420sp_a9q
+@ * Function used from format conversion or frame copy
+@ *
+@ *
+@ *
+@ *Inputs : r0 - pu1_y - UWORD8 pointer to y plane.
+@ * r1 - pu1_u - UWORD8 pointer to u plane.
+@ * r2 - pu1_v - UWORD8 pointer to u plane.
+@ * r3 - pu2_yuv422i - UWORD16 pointer to yuv422iimage.
+@ * stack + 40 - u4_width - Width of the Y plane.
+@ * 44 - u4_height - Height of the Y plane.
+@ * 48 - u4_stride_y - Stride in pixels of Y plane.
+@ * 52 - u4_stride_u - Stride in pixels of U plane.
+@ * 56 - u4_stride_v - Stride in pixels of V plane.
+@ * 60 - u4_stride_yuv422i- Stride in pixels of yuv422i image.
+@ *
+@ * @par Description
+@ * Function used from copying or converting a reference frame to display buffer
+@ * in non shared mode
+@ *
+@ * @param[in] pu1_y_dst
+@ * Output Y pointer
+@ *
+@ * @param[in] pu1_u_dst
+@ * Output U/UV pointer ( UV is interleaved in the same format as that of input)
+@ *
+@ * @param[in] pu1_v_dst
+@ * Output V pointer ( used in 420P output case)
+@ *
+@ * @param[in] u4_dst_y_strd
+@ * Stride of destination Y buffer
+@ *
+@ * @param[in] u4_dst_u_strd
+@ * Stride of destination U/V buffer
+@ *
+@ *
+@ * @param[in] blocking
+@ * To indicate whether format conversion should wait till frame is reconstructed
+@ * and then return after complete copy is done. To be set to 1 when called at the
+@ * end of frame processing and set to 0 when called between frame processing modules
+@ * in order to utilize available MCPS
+@ *
+@ * @returns Error from IH264E_ERROR_T
+@ *
+@ * @remarks
+@ * Assumes that the stride of U and V buffers are same.
+@ * This is correct in most cases
+@ * If a case comes where this is not true we need to modify the fmt conversion funcnions called inside also
+@ * Since we read 4 pixels ata time the width should be aligned to 4
+@ * In assembly width should be aligned to 16 and height to 2.
+@ *
+@ *
+@ * Revision History :
+@ * DD MM YYYY Author(s) Changes (Describe the changes made)
+@ * 07 06 2010 Harinarayanan K K Adapeted to 422p
+@ *
+@ *******************************************************************************
+@ */
+
+@//`
+@*/
+ .global ih264e_fmt_conv_422i_to_420sp_a9q
+ih264e_fmt_conv_422i_to_420sp_a9q:
+ stmfd sp!, {r4-r12, lr} @// Back the register which are used
+
+
+
+ @/* Do the preprocessing before the main loops start */
+ @// Load the parameters from stack
+ ldr r4, [sp, #48] @// Load u4_stride_y from stack
+
+ ldr r5, [sp, #60] @// Load u4_stride_yuv422i from stack
+ add r6, r0, r4 @// pu1_y_nxt_row = pu1_y + u4_stride_y
+
+ ldr r7, [sp, #40] @// Load u4_width from stack
+ add r8, r3, r5, lsl #1 @// pu2_yuv422i_nxt_row = pu2_yuv422i_y + u4_stride_yuv422i(2 Bytes for each pixel)
+
+ ldr r9, [sp, #52] @// Load u4_stride_u from stack
+ sub r12, r4, r7 @// u2_offset1 = u4_stride_y - u4_width
+
+@LDR r10,[sp,#56] ;// Load u4_stride_v from stack
+ sub r14, r5, r7 @// u2_offset_yuv422i = u4_stride_yuv422i - u4_width
+
+ ldr r11, [sp, #44] @// Load u4_height from stack
+ sub r9, r9, r7 @// u2_offset2 = u4_stride_u - u4_width >> 1
+
+@ SUB r10,r10,r7,ASR #1 ;// u2_offset3 = u4_stride_v - u4_width >> 1
+ mov r14, r14, lsl #1 @// u2_offset_yuv422i = u2_offset_yuv422i * 2
+
+ mov r7, r7, asr #4 @// u4_width = u4_width / 16 (u4_width >> 4)
+ mov r11, r11, asr #1 @// u4_width = u4_width / 2 (u4_width >> 1)
+
+ add r4, r12, r4 @// u2_offset1 = u2_offset1 + u4_stride_y
+ add r5, r14, r5, lsl #1 @// u2_offset_yuv422i = u2_offset_yuv422i + u4_stride_yuv422i
+
+ vpush {d8-d15}
+
+@// Register Assignment
+@// pu1_y - r0
+@// pu1_y_nxt_row - r6
+@// pu1_u - r1
+@// pu1_v - r2
+@// pu2_yuv422i - r3
+@// pu2_yuv422i_nxt_row - r8
+@// u2_offset1 - r4
+@// u2_offset2 - r9
+@// u2_offset3 - r10
+@// u2_offset_yuv422i - r5
+@// u4_width / 16 - r7
+@// u4_height / 2 - r11
+@// inner loop count - r12
+yuv420_to_yuv422i_hight_loop:
+
+ mov r12, r7 @// Inner loop count = u4_width / 16
+
+yuv420_to_yuv422i_width_loop:
+ vld4.8 {d0, d1, d2, d3}, [r3]! @// Load the 16 elements of row 1
+ vld4.8 {d4, d5, d6, d7}, [r8]! @// Load the 16 elements of row 2
+ subs r12, r12, #1
+
+ vrhadd.u8 d0, d0, d4
+ vrhadd.u8 d2, d2, d6
+
+ vst2.8 {d1, d3}, [r0]! @// Store the 16 elements of row1 Y
+ vst2.8 {d5, d7}, [r6]! @// Store the 16 elements of row2 Y
+
+ vst2.8 {d0, d2}, [r1]! @// Store the 8 elements of row1/2 U
+
+ bgt yuv420_to_yuv422i_width_loop
+
+ @// Update the buffer pointer so that they will refer to next pair of rows
+ add r0, r0, r4 @// pu1_y = pu1_y + u2_offset1
+ add r6, r6, r4 @// pu1_y_nxt_row = pu1_y_nxt_row + u2_offset1
+
+ add r1, r1, r9 @// pu1_u = pu1_u + u2_offset2
+ subs r11, r11, #1
+
+ add r3, r3, r5 @// pu2_yuv422i = pu2_yuv422i + u2_offset_yuv422i
+
+ add r8, r8, r5 @// pu2_yuv422i_nxt_row = pu2_yuv422i_nxt_row + u2_offset_yuv422i
+ bgt yuv420_to_yuv422i_hight_loop
+ vpop {d8-d15}
+ ldmfd sp!, {r4-r12, pc} @// Restore the register which are used
+
+
+
diff --git a/encoder/arm/ih264e_function_selector.c b/encoder/arm/ih264e_function_selector.c
new file mode 100755
index 0000000..bb181c1
--- /dev/null
+++ b/encoder/arm/ih264e_function_selector.c
@@ -0,0 +1,170 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_function_selector.c
+*
+* @brief
+* Contains functions to initialize function pointers used in h264
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include Files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include Files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264e_defs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+#ifdef ARMV8
+void ih264e_init_function_ptr(void *pv_codec)
+{
+ codec_t *ps_codec = (codec_t *)pv_codec;
+ ih264e_init_function_ptr_generic(ps_codec);
+ switch(ps_codec->s_cfg.e_arch)
+ {
+ case ARCH_ARM_NONEON:
+ break;
+ case ARCH_ARM_A53:
+ case ARCH_ARM_A57:
+ case ARCH_ARM_V8_NEON:
+ ih264e_init_function_ptr_neon_av8(ps_codec);
+ break;
+ default:
+ ih264e_init_function_ptr_neon_av8(ps_codec);
+ break;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns IV_ARCH_T
+* architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T ih264e_default_arch(void)
+{
+ return ARCH_ARM_V8_NEON;
+}
+
+#else
+
+void ih264e_init_function_ptr(void *pv_codec)
+{
+ codec_t *ps_codec = (codec_t *)pv_codec;
+ ih264e_init_function_ptr_generic(ps_codec);
+ switch(ps_codec->s_cfg.e_arch)
+ {
+ case ARCH_ARM_NONEON:
+ break;
+ case ARCH_ARM_A9Q:
+ case ARCH_ARM_A9A:
+ case ARCH_ARM_A9:
+ case ARCH_ARM_A7:
+ case ARCH_ARM_A5:
+ case ARCH_ARM_A15:
+ ih264e_init_function_ptr_neon_a9q(ps_codec);
+ break;
+ default:
+ ih264e_init_function_ptr_neon_a9q(ps_codec);
+ break;
+ }
+}
+
+IV_ARCH_T ih264e_default_arch(void)
+{
+ return ARCH_ARM_A9Q;
+}
+
+#endif
diff --git a/encoder/arm/ih264e_function_selector_a9q.c b/encoder/arm/ih264e_function_selector_a9q.c
new file mode 100755
index 0000000..8b2879b
--- /dev/null
+++ b/encoder/arm/ih264e_function_selector_a9q.c
@@ -0,0 +1,252 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_function_selector_generic.c
+*
+* @brief
+* Contains functions to initialize function pointers of codec context
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ih264e_init_function_ptr_generic
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264e_defs.h"
+#include "ih264e_structs.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_core_coding.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264_padding.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264_mem_fns.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_half_pel.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_neon_a9q(codec_t *ps_codec)
+{
+ WORD32 i= 0;
+
+ /* curr proc ctxt */
+ process_ctxt_t *ps_proc = NULL;
+ me_ctxt_t *ps_me_ctxt = NULL;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 16x16 */
+ ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_a9q;
+ ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_a9q;
+ ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_a9q;
+ ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_a9q;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 4x4 */
+ ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_a9q;
+ ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_a9q;
+ ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_a9q;
+ ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_a9q;
+ ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_a9q;
+ ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_a9q;
+ ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_a9q;
+ ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_a9q;
+ ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_a9q;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_a9q;
+ ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_a9q;
+ ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_a9q;
+ ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_a9q;
+ ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_a9q;
+ ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_a9q;
+ ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_a9q;
+ ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_a9q;
+
+ /* Init function pointers for intra pred leaf level functions chroma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc_a9q;
+ ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_a9q;
+ ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_a9q;
+ ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_a9q;
+
+ /* Init forward transform fn ptr */
+ ps_codec->pf_resi_trans_quant_8x8 = ih264_resi_trans_quant_8x8;
+ ps_codec->pf_resi_trans_quant_4x4 = ih264_resi_trans_quant_4x4_a9;
+ ps_codec->pf_resi_trans_quant_chroma_4x4 = ih264_resi_trans_quant_chroma_4x4_a9;
+ ps_codec->pf_hadamard_quant_4x4 = ih264_hadamard_quant_4x4_a9;
+ ps_codec->pf_hadamard_quant_2x2_uv = ih264_hadamard_quant_2x2_uv_a9;
+
+ /* Init inverse transform fn ptr */
+ ps_codec->pf_iquant_itrans_recon_8x8 = ih264_iquant_itrans_recon_8x8;
+ ps_codec->pf_iquant_itrans_recon_4x4 = ih264_iquant_itrans_recon_4x4_a9;
+ ps_codec->pf_iquant_itrans_recon_4x4_dc = ih264_iquant_itrans_recon_4x4_dc_a9;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4_a9;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_a9;
+ ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_a9;
+ ps_codec->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv_a9;
+ ps_codec->pf_interleave_copy = ih264_interleave_copy_a9;
+
+ /* Init fn ptr luma core coding */
+ ps_codec->luma_energy_compaction[0] = ih264e_code_luma_intra_macroblock_16x16;
+ ps_codec->luma_energy_compaction[1] = ih264e_code_luma_intra_macroblock_4x4;
+ ps_codec->luma_energy_compaction[3] = ih264e_code_luma_inter_macroblock_16x16;
+
+ /* Init fn ptr chroma core coding */
+ ps_codec->chroma_energy_compaction[0] = ih264e_code_chroma_intra_macroblock_8x8;
+ ps_codec->chroma_energy_compaction[1] = ih264e_code_chroma_inter_macroblock_8x8;
+
+ /* Init fn ptr luma deblocking */
+ ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_a9;
+ ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_a9;
+ ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_a9;
+ ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_a9;
+
+ /* Init fn ptr chroma deblocking */
+ ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_a9;
+ ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_a9;
+ ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_a9;
+ ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_a9;
+
+ /* write mb syntax layer */
+ ps_codec->pf_write_mb_syntax_layer[ISLICE] = ih264e_write_islice_mb;
+ ps_codec->pf_write_mb_syntax_layer[PSLICE] = ih264e_write_pslice_mb;
+
+ /* Padding Functions */
+ ps_codec->pf_pad_top = ih264_pad_top_a9q;
+ ps_codec->pf_pad_bottom = ih264_pad_bottom;
+ ps_codec->pf_pad_left_luma = ih264_pad_left_luma_a9q;
+ ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_a9q;
+ ps_codec->pf_pad_right_luma = ih264_pad_right_luma_a9q;
+ ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_a9q;
+
+ /* Inter pred leaf level functions */
+ ps_codec->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_a9q;
+ ps_codec->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_a9q;
+ ps_codec->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_a9q;
+ ps_codec->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear_a9q;
+ ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_a9q;
+
+ /* sad me level functions */
+ ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q;
+ ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q;
+ ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_a9q;
+
+ /* memor handling operations */
+ ps_codec->pf_mem_cpy = ih264_memcpy_a9q;
+ ps_codec->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_a9q;
+ ps_codec->pf_mem_set = ih264_memset_a9q;
+ ps_codec->pf_mem_set_mul8 = ih264_memset_mul_8_a9q;
+
+ /* sad me level functions */
+ for(i = 0; i < (MAX_PROCESS_CTXT); i++)
+ {
+ ps_proc = &ps_codec->as_process[i];
+ ps_me_ctxt = &ps_proc->s_me_ctxt;
+ ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_a9q;
+ ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_a9q;
+ ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_a9q;
+ ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_a9q;
+ ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog_a9q;
+ ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog_a9q;
+ ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_a9q;
+ ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_a9q;
+ }
+
+ /* intra mode eval -encoder level function */
+ ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_a9q;
+ ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_a9q;
+ ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes_a9q;
+
+ /* csc */
+ ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp_a9q;
+ ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp_a9q;
+
+ /* Halp pel generation function - encoder level*/
+ ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_a9q;
+ ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_a9q;
+
+ return ;
+ }
+
diff --git a/encoder/arm/ih264e_function_selector_av8.c b/encoder/arm/ih264e_function_selector_av8.c
new file mode 100755
index 0000000..173c2d5
--- /dev/null
+++ b/encoder/arm/ih264e_function_selector_av8.c
@@ -0,0 +1,259 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_function_selector_generic.c
+*
+* @brief
+* Contains functions to initialize function pointers of codec context
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ih264e_init_function_ptr_generic
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264e_defs.h"
+#include "ih264e_structs.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_core_coding.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264_padding.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264_mem_fns.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_half_pel.h"
+
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_neon_av8(codec_t *ps_codec)
+{
+
+ WORD32 i= 0;
+
+ /* curr proc ctxt */
+ process_ctxt_t *ps_proc = NULL;
+ me_ctxt_t *ps_me_ctxt = NULL;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 16x16 */
+ ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_av8;
+ ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_av8;
+ ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_av8;
+ ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_av8;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 4x4 */
+ ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_av8;
+ ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_av8;
+ ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_av8;
+ ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_av8;
+ ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_av8;
+ ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_av8;
+ ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_av8;
+ ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_av8;
+ ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_av8;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_av8;
+ ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_av8;
+ ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_av8;
+ ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_av8;
+ ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_av8;
+ ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_av8;
+ ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_av8;
+ ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_av8;
+
+ /* Init function pointers for intra pred leaf level functions chroma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc_av8;
+ ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_av8;
+ ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_av8;
+ ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_av8;
+
+
+ /* Init forward transform fn ptr */
+ ps_codec->pf_resi_trans_quant_8x8 = ih264_resi_trans_quant_8x8;
+ ps_codec->pf_resi_trans_quant_4x4 = ih264_resi_trans_quant_4x4_av8;
+ ps_codec->pf_resi_trans_quant_chroma_4x4 = ih264_resi_trans_quant_chroma_4x4_av8;
+ ps_codec->pf_hadamard_quant_4x4 = ih264_hadamard_quant_4x4_av8;
+ ps_codec->pf_hadamard_quant_2x2_uv = ih264_hadamard_quant_2x2_uv_av8;
+
+ /* Init inverse transform fn ptr */
+ ps_codec->pf_iquant_itrans_recon_8x8 = ih264_iquant_itrans_recon_8x8_av8;
+ ps_codec->pf_iquant_itrans_recon_4x4 = ih264_iquant_itrans_recon_4x4_av8;
+ ps_codec->pf_iquant_itrans_recon_4x4_dc = ih264_iquant_itrans_recon_4x4_dc_av8;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4_av8;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_av8;
+ ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_av8;
+ ps_codec->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv_av8;
+ ps_codec->pf_interleave_copy = ih264_interleave_copy_av8;
+
+ /* Init fn ptr luma core coding */
+ ps_codec->luma_energy_compaction[0] = ih264e_code_luma_intra_macroblock_16x16;
+ ps_codec->luma_energy_compaction[1] = ih264e_code_luma_intra_macroblock_4x4;
+ ps_codec->luma_energy_compaction[3] = ih264e_code_luma_inter_macroblock_16x16;
+
+ /* Init fn ptr chroma core coding */
+ ps_codec->chroma_energy_compaction[0] = ih264e_code_chroma_intra_macroblock_8x8;
+ ps_codec->chroma_energy_compaction[1] = ih264e_code_chroma_inter_macroblock_8x8;
+
+ /* Init fn ptr luma deblocking */
+ ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_av8;
+ ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_av8;
+ ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_av8;
+ ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_av8;
+
+ /* Init fn ptr chroma deblocking */
+ ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_av8;
+ ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_av8;
+ ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_av8;
+ ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_av8;
+
+ /* write mb syntax layer */
+ ps_codec->pf_write_mb_syntax_layer[ISLICE] = ih264e_write_islice_mb;
+ ps_codec->pf_write_mb_syntax_layer[PSLICE] = ih264e_write_pslice_mb;
+
+ /* Padding Functions */
+ ps_codec->pf_pad_top = ih264_pad_top_av8;
+ ps_codec->pf_pad_bottom = ih264_pad_bottom;
+ ps_codec->pf_pad_left_luma = ih264_pad_left_luma_av8;
+ ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_av8;
+ ps_codec->pf_pad_right_luma = ih264_pad_right_luma_av8;
+ ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_av8;
+
+ /* Inter pred leaf level functions */
+ ps_codec->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_av8;
+ ps_codec->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_av8;
+ ps_codec->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_av8;
+ ps_codec->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear;
+ ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_av8;
+
+ /* sad me level functions */
+ ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_av8;
+ ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_av8;
+ ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_av8;
+
+ /* memor handling operations */
+ ps_codec->pf_mem_cpy = ih264_memcpy_av8;
+ ps_codec->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_av8;
+ ps_codec->pf_mem_set = ih264_memset_av8;
+ ps_codec->pf_mem_set_mul8 = ih264_memset_mul_8_av8;
+
+ /* sad me level functions */
+ for(i = 0; i < (MAX_PROCESS_CTXT); i++)
+ {
+ ps_proc = &ps_codec->as_process[i];
+ ps_me_ctxt = &ps_proc->s_me_ctxt;
+ ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_av8;
+ ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_av8;
+ ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_av8;
+ ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_av8;
+ ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog_av8;
+ ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog_av8;
+ ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_av8;
+ ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_av8;
+ }
+
+ /* intra mode eval -encoder level function */
+ ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_av8;
+ ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_av8;
+ ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes;
+
+ /* csc */
+ ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp;
+ ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp;
+
+ /* Halp pel generation function - encoder level*/
+ ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_av8;
+ ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_av8;
+
+ return ;
+ }
+
diff --git a/encoder/arm/ih264e_half_pel.s b/encoder/arm/ih264e_half_pel.s
new file mode 100755
index 0000000..1b9a87a
--- /dev/null
+++ b/encoder/arm/ih264e_half_pel.s
@@ -0,0 +1,951 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@ *******************************************************************************
+@ * @file
+@ * ih264e_half_pel.s
+@ *
+@ * @brief
+@ *
+@ *
+@ * @author
+@ * Ittiam
+@ *
+@ * @par List of Functions:
+@ * ih264e_sixtapfilter_horz
+@ * ih264e_sixtap_filter_2dvh_vert
+@
+@ *
+@ * @remarks
+@ * None
+@ *
+@ *******************************************************************************
+@ */
+
+
+.text
+.p2align 2
+
+@ /**
+@/*******************************************************************************
+@*
+@* @brief
+@* Interprediction luma filter for horizontal input(Filter run for width = 17 and height =16)
+@*
+@* @par Description:
+@* Applies a 6 tap horizontal filter .The output is clipped to 8 bits
+@* sec 8.4.2.2.1 titled "Luma sample interpolation process"
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@void ih264e_sixtapfilter_horz(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst,
+@ WORD32 src_strd,
+@ WORD32 dst_strd);
+
+
+.equ HALFPEL_WIDTH , 17 + 1 @( make it even, two rows are processed at a time)
+
+
+ .global ih264e_sixtapfilter_horz_a9q
+ih264e_sixtapfilter_horz_a9q:
+ stmfd sp!, {lr}
+
+ vmov.i8 d0, #5
+ sub r0, r0, #2
+
+ vmov.i8 d1, #20
+ mov r14, #HALFPEL_WIDTH
+ vpush {d8-d15}
+
+filter_horz_loop:
+
+
+ vld1.8 {d2, d3, d4}, [r0], r2 @// Load row0
+ vld1.8 {d5, d6, d7}, [r0], r2 @// Load row1
+
+ @// Processing row0 and row1
+
+ vext.8 d31, d2, d3, #5 @//extract a[5] (column1,row0)
+ vext.8 d30, d3, d4, #5 @//extract a[5] (column2,row0)
+
+ vaddl.u8 q4, d31, d2 @// a0 + a5 (column1,row0)
+ vext.8 d29, d4, d4, #5 @//extract a[5] (column3,row0)
+ vaddl.u8 q5, d30, d3 @// a0 + a5 (column2,row0)
+ vext.8 d28, d5, d6, #5 @//extract a[5] (column1,row1)
+ vaddl.u8 q6, d29, d4 @// a0 + a5 (column3,row0)
+ vext.8 d27, d6, d7, #5 @//extract a[5] (column2,row1)
+ vaddl.u8 q7, d28, d5 @// a0 + a5 (column1,row1)
+ vext.8 d26, d7, d7, #5 @//extract a[5] (column3,row1)
+
+ vaddl.u8 q8, d27, d6 @// a0 + a5 (column2,row1)
+ vext.8 d31, d2, d3, #2 @//extract a[2] (column1,row0)
+ vaddl.u8 q9, d26, d7 @// a0 + a5 (column3,row1)
+ vext.8 d30, d3, d4, #2 @//extract a[2] (column2,row0)
+ vmlal.u8 q4, d31, d1 @// a0 + a5 + 20a2 (column1,row0)
+ vext.8 d29, d4, d4, #2 @//extract a[2] (column3,row0)
+ vmlal.u8 q5, d30, d1 @// a0 + a5 + 20a2 (column2,row0)
+ vext.8 d28, d5, d6, #2 @//extract a[2] (column1,row1)
+ vmlal.u8 q6, d29, d1 @// a0 + a5 + 20a2 (column3,row0)
+ vext.8 d27, d6, d7, #2 @//extract a[2] (column2,row1)
+ vmlal.u8 q7, d28, d1 @// a0 + a5 + 20a2 (column1,row1)
+ vext.8 d26, d7, d7, #2 @//extract a[2] (column3,row1)
+
+ vmlal.u8 q8, d27, d1 @// a0 + a5 + 20a2 (column2,row1)
+ vext.8 d31, d2, d3, #3 @//extract a[3] (column1,row0)
+ vmlal.u8 q9, d26, d1 @// a0 + a5 + 20a2 (column3,row1)
+ vext.8 d30, d3, d4, #3 @//extract a[3] (column2,row0)
+ vmlal.u8 q4, d31, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ vext.8 d29, d4, d4, #3 @//extract a[3] (column3,row0)
+ vmlal.u8 q5, d30, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ vext.8 d28, d5, d6, #3 @//extract a[3] (column1,row1)
+ vmlal.u8 q6, d29, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ vext.8 d27, d6, d7, #3 @//extract a[3] (column2,row1)
+ vmlal.u8 q7, d28, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row1)
+ vext.8 d26, d7, d7, #3 @//extract a[3] (column3,row1)
+
+ vmlal.u8 q8, d27, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row1)
+ vext.8 d31, d2, d3, #1 @//extract a[1] (column1,row0)
+ vmlal.u8 q9, d26, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row1)
+ vext.8 d30, d3, d4, #1 @//extract a[1] (column2,row0)
+ vmlsl.u8 q4, d31, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ vext.8 d29, d4, d4, #1 @//extract a[1] (column3,row0)
+ vmlsl.u8 q5, d30, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ vext.8 d28, d5, d6, #1 @//extract a[1] (column1,row1)
+ vmlsl.u8 q6, d29, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ vext.8 d27, d6, d7, #1 @//extract a[1] (column2,row1)
+ vmlsl.u8 q7, d28, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1)
+ vext.8 d26, d7, d7, #1 @//extract a[1] (column3,row1)
+
+ vmlsl.u8 q8, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row1)
+ vext.8 d31, d2, d3, #4 @//extract a[4] (column1,row0)
+ vmlsl.u8 q9, d26, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row1)
+ vext.8 d30, d3, d4, #4 @//extract a[4] (column2,row0)
+ vmlsl.u8 q4, d31, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ vext.8 d29, d4, d4, #4 @//extract a[4] (column3,row0)
+ vmlsl.u8 q5, d30, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ vext.8 d28, d5, d6, #4 @//extract a[4] (column1,row1)
+ vmlsl.u8 q6, d29, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+ vext.8 d27, d6, d7, #4 @//extract a[4] (column2,row1)
+ vmlsl.u8 q7, d28, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1)
+ vext.8 d26, d7, d7, #4 @//extract a[4] (column3,row1)
+
+ vmlsl.u8 q8, d27, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row1)
+ vmlsl.u8 q9, d26, d0 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row1)
+
+ vqrshrun.s16 d20, q4, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ vqrshrun.s16 d21, q5, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ vqrshrun.s16 d22, q6, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ vqrshrun.s16 d23, q7, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1)
+ vqrshrun.s16 d24, q8, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row1)
+ vqrshrun.s16 d25, q9, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row1)
+
+ vst1.8 {d20, d21, d22}, [r1], r3 @//Store dest row0
+ vst1.8 {d23, d24, d25}, [r1], r3 @//Store dest row1
+
+ subs r14, r14, #2 @ decrement counter
+
+ bne filter_horz_loop
+
+ vpop {d8-d15}
+ ldmfd sp!, {pc}
+
+
+
+
+
+
+
+
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* This function implements a two stage cascaded six tap filter. It
+@* applies the six tap filter in the vertical direction on the
+@* predictor values, followed by applying the same filter in the
+@* horizontal direction on the output of the first stage. The six tap
+@* filtering operation is described in sec 8.4.2.2.1 titled "Luma sample
+@* interpolation process"
+@* (Filter run for width = 17 and height =17)
+@* @par Description:
+@* The function interpolates
+@* the predictors first in the vertical direction and then in the
+@* horizontal direction to output the (1/2,1/2). The output of the first
+@* stage of the filter is stored in the buffer pointed to by pi16_pred1(only in C)
+@* in 16 bit precision.
+@*
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst1
+@* UWORD8 pointer to the destination(vertical filtered output)
+@*
+@* @param[out] pu1_dst2
+@* UWORD8 pointer to the destination(out put after applying horizontal filter to the intermediate vertical output)
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride of pu1_dst
+@*
+@* @param[in]pi16_pred1
+@* Pointer to 16bit intermediate buffer(used only in c)
+@*
+@* @param[in] pi16_pred1_strd
+@* integer destination stride of pi16_pred1
+@*
+@*
+@* @returns
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@void ih264e_sixtap_filter_2dvh_vert(UWORD8 *pu1_src,
+@ UWORD8 *pu1_dst1,
+@ UWORD8 *pu1_dst2,
+@ WORD32 src_strd,
+@ WORD32 dst_strd,
+@ WORD32 *pi16_pred1,/* Pointer to 16bit intermmediate buffer (used only in c)*/
+@ WORD32 pi16_pred1_strd)
+
+
+
+
+ .global ih264e_sixtap_filter_2dvh_vert_a9q
+
+ih264e_sixtap_filter_2dvh_vert_a9q:
+ stmfd sp!, {r10, r11, r12, lr}
+
+@//r0 - pu1_ref
+@//r3 - u4_ref_width
+ vpush {d8-d15}
+ @// Load six rows for vertical interpolation
+ lsl r12, r3, #1
+ sub r0, r0, r12
+ sub r0, r0, #2
+ vld1.8 {d2, d3, d4}, [r0], r3
+ vld1.8 {d5, d6, d7}, [r0], r3
+ vld1.8 {d8, d9, d10}, [r0], r3
+ mov r12, #5
+ vld1.8 {d11, d12, d13}, [r0], r3
+ mov r14, #20
+ vld1.8 {d14, d15, d16}, [r0], r3
+ vmov.16 d0[0], r12
+ vmov.16 d0[1], r14
+ vld1.8 {d17, d18, d19}, [r0], r3
+ vmov.i8 d1, #20
+
+@// r12 - u2_buff1_width
+@// r14 - u2_buff2_width
+ ldr r12, [sp, #80]
+ add r11, r1, #6
+
+ mov r14, r12
+
+ mov r10, #3 @loop counter
+
+
+filter_2dvh_loop:
+
+ @// ////////////// ROW 1 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+ vaddl.u8 q10, d2, d17 @// a0 + a5 (column1,row0)
+ vmov.i8 d31, #5
+ vmlal.u8 q10, d8, d1 @// a0 + a5 + 20a2 (column1,row0)
+ vmlal.u8 q10, d11, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ vmlsl.u8 q10, d5, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ vmlsl.u8 q10, d14, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+
+
+ vaddl.u8 q11, d3, d18 @// a0 + a5 (column2,row0)
+ vmlal.u8 q11, d9, d1 @// a0 + a5 + 20a2 (column2,row0)
+ vmlal.u8 q11, d12, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ vmlsl.u8 q11, d6, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ vmlsl.u8 q11, d15, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ vext.16 d30, d20, d21, #2 @//extract a[2] (set1)
+
+ vaddl.u8 q12, d4, d19 @// a0 + a5 (column3,row0)
+ vext.16 d29, d20, d21, #3 @//extract a[3] (set1)
+ vmlal.u8 q12, d10, d1 @// a0 + a5 + 20a2 (column3,row0)
+ vmlal.u8 q12, d13, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ vmlsl.u8 q12, d7, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ vmlsl.u8 q12, d16, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+
+ vqrshrun.s16 d2, q10, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ vext.16 d31, d21, d22, #1 @//extract a[5] (set1)
+ vqrshrun.s16 d3, q11, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ vext.16 d28, d20, d21, #1 @//extract a[1] (set1)
+
+ vaddl.s16 q13, d31, d20 @// a0 + a5 (set1)
+ vext.16 d31, d22, d23, #1 @//extract a[5] (set2)
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set1)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set1)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ vmlsl.s16 q13, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ vext.16 d30, d21, d22, #2 @//extract a[2] (set2)
+
+ vqrshrun.s16 d4, q12, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ vext.16 d29, d21, d22, #3 @//extract a[3] (set2)
+
+ vext.16 d28, d21, d22, #1 @//extract a[1] (set2)
+ vaddl.s16 q10, d31, d21 @// a0 + a5 (set2)
+ vmlal.s16 q10, d30, d0[1] @// a0 + a5 + 20a2 (set2)
+ vmlal.s16 q10, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set2)
+ vmlsl.s16 q10, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ vmlsl.s16 q10, d22, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ vext.16 d31, d23, d24, #1 @//extract a[5] (set3)
+
+ vext.8 d2, d2, d3, #2
+ vst1.8 {d3, d4}, [r11], r12 @// store row1 - 1,1/2 grid
+ vst1.8 {d2}, [r1], r12 @// store row1 - 1,1/2 grid
+
+ vext.16 d30, d22, d23, #2 @//extract a[2] (set3)
+ vext.16 d29, d22, d23, #3 @//extract a[3] (set3)
+
+ vaddl.s16 q1, d31, d22 @// a0 + a5 (set3)
+ vext.16 d28, d22, d23, #1 @//extract a[1] (set3)
+ vmlal.s16 q1, d30, d0[1] @// a0 + a5 + 20a2 (set3)
+ vmlal.s16 q1, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set3)
+ vmlsl.s16 q1, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ vmlsl.s16 q1, d23, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ vext.16 d31, d24, d25, #1 @//extract a[5] (set4)
+
+ vshrn.s32 d21, q10, #8 @// shift by 8 and later we will shift by 2 more with rounding (set2)
+ vext.16 d30, d23, d24, #2 @//extract a[2] (set4)
+ vshrn.s32 d20, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set1)
+ vext.16 d29, d23, d24, #3 @//extract a[3] (set4)
+
+ vaddl.s16 q13, d31, d23 @// a0 + a5 (set4)
+ vext.16 d28, d23, d24, #1 @//extract a[1] (set4)
+ vext.16 d31, d25, d25, #1 @//extract a[5] (set5) ;//here only first element in the row is valid
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set4)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set4)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ vmlsl.s16 q13, d24, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ vext.16 d30, d24, d25, #2 @//extract a[2] (set5)
+
+ vaddl.s16 q11, d31, d24 @// a0 + a5 (set5)
+ vext.16 d29, d24, d25, #3 @//extract a[3] (set5)
+
+ vext.16 d31, d24, d25, #1 @//extract a[1] (set5)
+ vshrn.s32 d28, q1, #8 @// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ vld1.8 {d2, d3, d4}, [r0], r3 @// Load next Row data
+ vmlal.s16 q11, d30, d0[1] @// a0 + a5 + 20a2 (set5)
+ vmlal.s16 q11, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set5)
+ vmlsl.s16 q11, d31, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ vmlsl.s16 q11, d25, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ vshrn.s32 d29, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set4)
+ vqrshrun.s16 d26, q10, #2 @// half,half gird set1,2
+
+
+ @//VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4
+ @//VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ @//VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5
+
+ @//VST1.8 {D26,D27,D28},[r2],r14 ;// store 1/2,1,2 grif values
+ @// ////////////// ROW 2 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+ vaddl.u8 q10, d5, d2 @// a0 + a5 (column1,row0)
+ vmov.i8 d31, #5
+ vmlal.u8 q10, d11, d1 @// a0 + a5 + 20a2 (column1,row0)
+ vmlal.u8 q10, d14, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ vmlsl.u8 q10, d8, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ vmlsl.u8 q10, d17, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+
+ vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4
+ vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ vaddl.u8 q11, d6, d3 @// a0 + a5 (column2,row0)
+ vmlal.u8 q11, d12, d1 @// a0 + a5 + 20a2 (column2,row0)
+ vmlal.u8 q11, d15, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ vmlsl.u8 q11, d9, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ vmlsl.u8 q11, d18, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+
+ vqrshrun.s16 d28, q14, #2 @// half,half gird set5
+ vext.16 d30, d20, d21, #2 @//extract a[2] (set1)
+
+ vaddl.u8 q12, d7, d4 @// a0 + a5 (column3,row0)
+ vext.16 d29, d20, d21, #3 @//extract a[3] (set1)
+ vmlal.u8 q12, d13, d1 @// a0 + a5 + 20a2 (column3,row0)
+ vmlal.u8 q12, d16, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ vmlsl.u8 q12, d10, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ vmlsl.u8 q12, d19, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+ vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+
+ vqrshrun.s16 d5, q10, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ vext.16 d31, d21, d22, #1 @//extract a[5] (set1)
+ vqrshrun.s16 d6, q11, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ vext.16 d28, d20, d21, #1 @//extract a[1] (set1)
+
+ vaddl.s16 q13, d31, d20 @// a0 + a5 (set1)
+ vext.16 d31, d22, d23, #1 @//extract a[5] (set2)
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set1)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set1)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ vmlsl.s16 q13, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ vext.16 d30, d21, d22, #2 @//extract a[2] (set2)
+
+ vqrshrun.s16 d7, q12, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ vext.16 d29, d21, d22, #3 @//extract a[3] (set2)
+
+ vext.16 d28, d21, d22, #1 @//extract a[1] (set2)
+ vaddl.s16 q10, d31, d21 @// a0 + a5 (set2)
+ vmlal.s16 q10, d30, d0[1] @// a0 + a5 + 20a2 (set2)
+ vmlal.s16 q10, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set2)
+ vmlsl.s16 q10, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ vmlsl.s16 q10, d22, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ vext.16 d31, d23, d24, #1 @//extract a[5] (set3)
+
+ vext.8 d5, d5, d6, #2
+ vst1.8 {d6, d7}, [r11], r12 @// store row1 - 1,1/2 grid
+ vst1.8 {d5}, [r1], r12 @// store row1 - 1,1/2 grid
+
+ vext.16 d30, d22, d23, #2 @//extract a[2] (set3)
+ vext.16 d29, d22, d23, #3 @//extract a[3] (set3)
+
+ vaddl.s16 q3, d31, d22 @// a0 + a5 (set3)
+ vext.16 d28, d22, d23, #1 @//extract a[1] (set3)
+ vmlal.s16 q3, d30, d0[1] @// a0 + a5 + 20a2 (set3)
+ vmlal.s16 q3, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set3)
+ vmlsl.s16 q3, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ vmlsl.s16 q3, d23, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ vext.16 d31, d24, d25, #1 @//extract a[5] (set4)
+
+ vshrn.s32 d21, q10, #8 @// shift by 8 and later we will shift by 2 more with rounding (set2)
+ vext.16 d30, d23, d24, #2 @//extract a[2] (set4)
+ vshrn.s32 d20, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set1)
+ vext.16 d29, d23, d24, #3 @//extract a[3] (set4)
+
+ vaddl.s16 q13, d31, d23 @// a0 + a5 (set4)
+ vext.16 d28, d23, d24, #1 @//extract a[1] (set4)
+ vext.16 d31, d25, d25, #1 @//extract a[5] (set5) ;//here only first element in the row is valid
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set4)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set4)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ vmlsl.s16 q13, d24, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ vext.16 d30, d24, d25, #2 @//extract a[2] (set5)
+
+ vaddl.s16 q11, d31, d24 @// a0 + a5 (set5)
+ vext.16 d29, d24, d25, #3 @//extract a[3] (set5)
+
+ vext.16 d31, d24, d25, #1 @//extract a[1] (set5)
+ vshrn.s32 d28, q3, #8 @// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ vld1.8 {d5, d6, d7}, [r0], r3 @// Load next Row data
+ vmlal.s16 q11, d30, d0[1] @// a0 + a5 + 20a2 (set5)
+ vmlal.s16 q11, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set5)
+ vmlsl.s16 q11, d31, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ vmlsl.s16 q11, d25, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ vshrn.s32 d29, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set4)
+ vqrshrun.s16 d26, q10, #2 @// half,half gird set1,2
+
+
+ @//VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4
+ @//VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ @//VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5
+
+ @//VST1.8 {D26,D27,D28},[r2],r14 ;// store 1/2,1,2 grif values
+ @// ////////////// ROW 3 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+ vaddl.u8 q10, d8, d5 @// a0 + a5 (column1,row0)
+ vmov.i8 d31, #5
+ vmlal.u8 q10, d14, d1 @// a0 + a5 + 20a2 (column1,row0)
+ vmlal.u8 q10, d17, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ vmlsl.u8 q10, d11, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ vmlsl.u8 q10, d2, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+
+ vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4
+ vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ vaddl.u8 q11, d9, d6 @// a0 + a5 (column2,row0)
+ vmlal.u8 q11, d15, d1 @// a0 + a5 + 20a2 (column2,row0)
+ vmlal.u8 q11, d18, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ vmlsl.u8 q11, d12, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ vmlsl.u8 q11, d3, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+
+ vqrshrun.s16 d28, q14, #2 @// half,half gird set5
+ vext.16 d30, d20, d21, #2 @//extract a[2] (set1)
+
+ vaddl.u8 q12, d10, d7 @// a0 + a5 (column3,row0)
+ vext.16 d29, d20, d21, #3 @//extract a[3] (set1)
+ vmlal.u8 q12, d16, d1 @// a0 + a5 + 20a2 (column3,row0)
+ vmlal.u8 q12, d19, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ vmlsl.u8 q12, d13, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ vmlsl.u8 q12, d4, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+
+ vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+
+ vqrshrun.s16 d8, q10, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ vext.16 d31, d21, d22, #1 @//extract a[5] (set1)
+ vqrshrun.s16 d9, q11, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ vext.16 d28, d20, d21, #1 @//extract a[1] (set1)
+
+ vaddl.s16 q13, d31, d20 @// a0 + a5 (set1)
+ vext.16 d31, d22, d23, #1 @//extract a[5] (set2)
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set1)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set1)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ vmlsl.s16 q13, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ vext.16 d30, d21, d22, #2 @//extract a[2] (set2)
+
+ vqrshrun.s16 d10, q12, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ vext.16 d29, d21, d22, #3 @//extract a[3] (set2)
+
+ vext.16 d28, d21, d22, #1 @//extract a[1] (set2)
+ vaddl.s16 q10, d31, d21 @// a0 + a5 (set2)
+ vmlal.s16 q10, d30, d0[1] @// a0 + a5 + 20a2 (set2)
+ vmlal.s16 q10, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set2)
+ vmlsl.s16 q10, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ vmlsl.s16 q10, d22, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ vext.16 d31, d23, d24, #1 @//extract a[5] (set3)
+
+ vext.8 d8, d8, d9, #2
+ vst1.8 {d9, d10}, [r11], r12 @// store row1 - 1,1/2 grid
+ vst1.8 {d8}, [r1], r12 @// store row1 - 1,1/2 grid
+
+ vext.16 d30, d22, d23, #2 @//extract a[2] (set3)
+ vext.16 d29, d22, d23, #3 @//extract a[3] (set3)
+
+ vaddl.s16 q4, d31, d22 @// a0 + a5 (set3)
+ vext.16 d28, d22, d23, #1 @//extract a[1] (set3)
+ vmlal.s16 q4, d30, d0[1] @// a0 + a5 + 20a2 (set3)
+ vmlal.s16 q4, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set3)
+ vmlsl.s16 q4, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ vmlsl.s16 q4, d23, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ vext.16 d31, d24, d25, #1 @//extract a[5] (set4)
+
+ vshrn.s32 d21, q10, #8 @// shift by 8 and later we will shift by 2 more with rounding (set2)
+ vext.16 d30, d23, d24, #2 @//extract a[2] (set4)
+ vshrn.s32 d20, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set1)
+ vext.16 d29, d23, d24, #3 @//extract a[3] (set4)
+
+ vaddl.s16 q13, d31, d23 @// a0 + a5 (set4)
+ vext.16 d28, d23, d24, #1 @//extract a[1] (set4)
+ vext.16 d31, d25, d25, #1 @//extract a[5] (set5) ;//here only first element in the row is valid
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set4)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set4)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ vmlsl.s16 q13, d24, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ vext.16 d30, d24, d25, #2 @//extract a[2] (set5)
+
+ vaddl.s16 q11, d31, d24 @// a0 + a5 (set5)
+ vext.16 d29, d24, d25, #3 @//extract a[3] (set5)
+
+ vext.16 d31, d24, d25, #1 @//extract a[1] (set5)
+ vshrn.s32 d28, q4, #8 @// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ vld1.8 {d8, d9, d10}, [r0], r3 @// Load next Row data
+ vmlal.s16 q11, d30, d0[1] @// a0 + a5 + 20a2 (set5)
+ vmlal.s16 q11, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set5)
+ vmlsl.s16 q11, d31, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ vmlsl.s16 q11, d25, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ vshrn.s32 d29, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set4)
+ vqrshrun.s16 d26, q10, #2 @// half,half gird set1,2
+
+
+ @//VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4
+ @//VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ @//VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5
+
+ @//VST1.8 {D26,D27,D28},[r2],r14 ;// store 1/2,1,2 grif values
+ @// ////////////// ROW 4 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+ vaddl.u8 q10, d11, d8 @// a0 + a5 (column1,row0)
+ vmov.i8 d31, #5
+ vmlal.u8 q10, d17, d1 @// a0 + a5 + 20a2 (column1,row0)
+ vmlal.u8 q10, d2, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ vmlsl.u8 q10, d14, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ vmlsl.u8 q10, d5, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+
+ vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4
+ vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ vaddl.u8 q11, d12, d9 @// a0 + a5 (column2,row0)
+ vmlal.u8 q11, d18, d1 @// a0 + a5 + 20a2 (column2,row0)
+ vmlal.u8 q11, d3, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ vmlsl.u8 q11, d15, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ vmlsl.u8 q11, d6, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+
+ vqrshrun.s16 d28, q14, #2 @// half,half gird set5
+ vext.16 d30, d20, d21, #2 @//extract a[2] (set1)
+
+ vaddl.u8 q12, d13, d10 @// a0 + a5 (column3,row0)
+ vext.16 d29, d20, d21, #3 @//extract a[3] (set1)
+ vmlal.u8 q12, d19, d1 @// a0 + a5 + 20a2 (column3,row0)
+ vmlal.u8 q12, d4, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ vmlsl.u8 q12, d16, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ vmlsl.u8 q12, d7, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+
+ vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+
+ vqrshrun.s16 d11, q10, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ vext.16 d31, d21, d22, #1 @//extract a[5] (set1)
+ vqrshrun.s16 d12, q11, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ vext.16 d28, d20, d21, #1 @//extract a[1] (set1)
+
+ vaddl.s16 q13, d31, d20 @// a0 + a5 (set1)
+ vext.16 d31, d22, d23, #1 @//extract a[5] (set2)
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set1)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set1)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ vmlsl.s16 q13, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ vext.16 d30, d21, d22, #2 @//extract a[2] (set2)
+
+ vqrshrun.s16 d13, q12, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ vext.16 d29, d21, d22, #3 @//extract a[3] (set2)
+
+ vext.16 d28, d21, d22, #1 @//extract a[1] (set2)
+ vaddl.s16 q10, d31, d21 @// a0 + a5 (set2)
+ vmlal.s16 q10, d30, d0[1] @// a0 + a5 + 20a2 (set2)
+ vmlal.s16 q10, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set2)
+ vmlsl.s16 q10, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ vmlsl.s16 q10, d22, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ vext.16 d31, d23, d24, #1 @//extract a[5] (set3)
+
+ vext.8 d11, d11, d12, #2
+ vst1.8 {d12, d13}, [r11], r12 @// store row1 - 1,1/2 grid
+ vst1.8 {d11}, [r1], r12 @// store row1 - 1,1/2 grid
+
+ vext.16 d30, d22, d23, #2 @//extract a[2] (set3)
+ vext.16 d29, d22, d23, #3 @//extract a[3] (set3)
+
+ vaddl.s16 q6, d31, d22 @// a0 + a5 (set3)
+ vext.16 d28, d22, d23, #1 @//extract a[1] (set3)
+ vmlal.s16 q6, d30, d0[1] @// a0 + a5 + 20a2 (set3)
+ vmlal.s16 q6, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set3)
+ vmlsl.s16 q6, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ vmlsl.s16 q6, d23, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ vext.16 d31, d24, d25, #1 @//extract a[5] (set4)
+
+ vshrn.s32 d21, q10, #8 @// shift by 8 and later we will shift by 2 more with rounding (set2)
+ vext.16 d30, d23, d24, #2 @//extract a[2] (set4)
+ vshrn.s32 d20, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set1)
+ vext.16 d29, d23, d24, #3 @//extract a[3] (set4)
+
+ vaddl.s16 q13, d31, d23 @// a0 + a5 (set4)
+ vext.16 d28, d23, d24, #1 @//extract a[1] (set4)
+ vext.16 d31, d25, d25, #1 @//extract a[5] (set5) ;//here only first element in the row is valid
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set4)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set4)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ vmlsl.s16 q13, d24, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ vext.16 d30, d24, d25, #2 @//extract a[2] (set5)
+
+ vaddl.s16 q11, d31, d24 @// a0 + a5 (set5)
+ vext.16 d29, d24, d25, #3 @//extract a[3] (set5)
+
+ vext.16 d31, d24, d25, #1 @//extract a[1] (set5)
+ vshrn.s32 d28, q6, #8 @// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ vld1.8 {d11, d12, d13}, [r0], r3 @// Load next Row data
+ vmlal.s16 q11, d30, d0[1] @// a0 + a5 + 20a2 (set5)
+ vmlal.s16 q11, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set5)
+ vmlsl.s16 q11, d31, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ vmlsl.s16 q11, d25, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ vshrn.s32 d29, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set4)
+ vqrshrun.s16 d26, q10, #2 @// half,half gird set1,2
+
+
+ @//VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4
+ @//VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ @//VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5
+
+ @//VST1.8 {D26,D27,D28},[r2],r14 ;// store 1/2,1,2 grif values
+ @// ////////////// ROW 5 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+ vaddl.u8 q10, d14, d11 @// a0 + a5 (column1,row0)
+ vmov.i8 d31, #5
+ vmlal.u8 q10, d2, d1 @// a0 + a5 + 20a2 (column1,row0)
+ vmlal.u8 q10, d5, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ vmlsl.u8 q10, d17, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ vmlsl.u8 q10, d8, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+
+ vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4
+ vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ vaddl.u8 q11, d15, d12 @// a0 + a5 (column2,row0)
+ vmlal.u8 q11, d3, d1 @// a0 + a5 + 20a2 (column2,row0)
+ vmlal.u8 q11, d6, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ vmlsl.u8 q11, d18, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ vmlsl.u8 q11, d9, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+
+ vqrshrun.s16 d28, q14, #2 @// half,half gird set5
+ vext.16 d30, d20, d21, #2 @//extract a[2] (set1)
+
+ vaddl.u8 q12, d16, d13 @// a0 + a5 (column3,row0)
+ vext.16 d29, d20, d21, #3 @//extract a[3] (set1)
+ vmlal.u8 q12, d4, d1 @// a0 + a5 + 20a2 (column3,row0)
+ vmlal.u8 q12, d7, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ vmlsl.u8 q12, d19, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ vmlsl.u8 q12, d10, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+
+ vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+
+ vqrshrun.s16 d14, q10, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ vext.16 d31, d21, d22, #1 @//extract a[5] (set1)
+ vqrshrun.s16 d15, q11, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ vext.16 d28, d20, d21, #1 @//extract a[1] (set1)
+
+ vaddl.s16 q13, d31, d20 @// a0 + a5 (set1)
+ vext.16 d31, d22, d23, #1 @//extract a[5] (set2)
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set1)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set1)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ vmlsl.s16 q13, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ vext.16 d30, d21, d22, #2 @//extract a[2] (set2)
+
+ vqrshrun.s16 d16, q12, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ vext.16 d29, d21, d22, #3 @//extract a[3] (set2)
+
+ vext.16 d28, d21, d22, #1 @//extract a[1] (set2)
+ vaddl.s16 q10, d31, d21 @// a0 + a5 (set2)
+ vmlal.s16 q10, d30, d0[1] @// a0 + a5 + 20a2 (set2)
+ vmlal.s16 q10, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set2)
+ vmlsl.s16 q10, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ vmlsl.s16 q10, d22, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ vext.16 d31, d23, d24, #1 @//extract a[5] (set3)
+
+ vext.8 d14, d14, d15, #2
+ vst1.8 {d15, d16}, [r11], r12 @// store row1 - 1,1/2 grid
+ vst1.8 {d14}, [r1], r12 @// store row1 - 1,1/2 grid
+
+ vext.16 d30, d22, d23, #2 @//extract a[2] (set3)
+ vext.16 d29, d22, d23, #3 @//extract a[3] (set3)
+
+ vaddl.s16 q7, d31, d22 @// a0 + a5 (set3)
+ vext.16 d28, d22, d23, #1 @//extract a[1] (set3)
+ vmlal.s16 q7, d30, d0[1] @// a0 + a5 + 20a2 (set3)
+ vmlal.s16 q7, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set3)
+ vmlsl.s16 q7, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ vmlsl.s16 q7, d23, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ vext.16 d31, d24, d25, #1 @//extract a[5] (set4)
+
+ vshrn.s32 d21, q10, #8 @// shift by 8 and later we will shift by 2 more with rounding (set2)
+ vext.16 d30, d23, d24, #2 @//extract a[2] (set4)
+ vshrn.s32 d20, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set1)
+ vext.16 d29, d23, d24, #3 @//extract a[3] (set4)
+
+ vaddl.s16 q13, d31, d23 @// a0 + a5 (set4)
+ vext.16 d28, d23, d24, #1 @//extract a[1] (set4)
+ vext.16 d31, d25, d25, #1 @//extract a[5] (set5) ;//here only first element in the row is valid
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set4)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set4)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ vmlsl.s16 q13, d24, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ vext.16 d30, d24, d25, #2 @//extract a[2] (set5)
+
+ vaddl.s16 q11, d31, d24 @// a0 + a5 (set5)
+ vext.16 d29, d24, d25, #3 @//extract a[3] (set5)
+
+ vext.16 d31, d24, d25, #1 @//extract a[1] (set5)
+ vshrn.s32 d28, q7, #8 @// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ vld1.8 {d14, d15, d16}, [r0], r3 @// Load next Row data
+ vmlal.s16 q11, d30, d0[1] @// a0 + a5 + 20a2 (set5)
+ vmlal.s16 q11, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set5)
+ vmlsl.s16 q11, d31, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ vmlsl.s16 q11, d25, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ vshrn.s32 d29, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set4)
+ vqrshrun.s16 d26, q10, #2 @// half,half gird set1,2
+
+
+ @//VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4
+ @//VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ @//VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5
+
+ @//VST1.8 {D26,D27,D28},[r2],r14 ;// store 1/2,1,2 grif values
+ @// ////////////// ROW 6 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+
+ cmp r10, #1 @// if it 17 rows are complete skip
+ beq filter_2dvh_skip_row
+ vaddl.u8 q10, d17, d14 @// a0 + a5 (column1,row0)
+ vmov.i8 d31, #5
+ vmlal.u8 q10, d5, d1 @// a0 + a5 + 20a2 (column1,row0)
+ vmlal.u8 q10, d8, d1 @// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ vmlsl.u8 q10, d2, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ vmlsl.u8 q10, d11, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+
+ vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4
+ vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ vaddl.u8 q11, d18, d15 @// a0 + a5 (column2,row0)
+ vmlal.u8 q11, d6, d1 @// a0 + a5 + 20a2 (column2,row0)
+ vmlal.u8 q11, d9, d1 @// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ vmlsl.u8 q11, d3, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ vmlsl.u8 q11, d12, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+
+ vqrshrun.s16 d28, q14, #2 @// half,half gird set5
+ vext.16 d30, d20, d21, #2 @//extract a[2] (set1)
+
+ vaddl.u8 q12, d19, d16 @// a0 + a5 (column3,row0)
+ vext.16 d29, d20, d21, #3 @//extract a[3] (set1)
+ vmlal.u8 q12, d7, d1 @// a0 + a5 + 20a2 (column3,row0)
+ vmlal.u8 q12, d10, d1 @// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ vmlsl.u8 q12, d4, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ vmlsl.u8 q12, d13, d31 @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+
+ vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+
+ vqrshrun.s16 d17, q10, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ vext.16 d31, d21, d22, #1 @//extract a[5] (set1)
+ vqrshrun.s16 d18, q11, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ vext.16 d28, d20, d21, #1 @//extract a[1] (set1)
+
+ vaddl.s16 q13, d31, d20 @// a0 + a5 (set1)
+ vext.16 d31, d22, d23, #1 @//extract a[5] (set2)
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set1)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set1)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ vmlsl.s16 q13, d21, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ vext.16 d30, d21, d22, #2 @//extract a[2] (set2)
+
+ vqrshrun.s16 d19, q12, #5 @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ vext.16 d29, d21, d22, #3 @//extract a[3] (set2)
+
+ vext.16 d28, d21, d22, #1 @//extract a[1] (set2)
+ vaddl.s16 q10, d31, d21 @// a0 + a5 (set2)
+ vmlal.s16 q10, d30, d0[1] @// a0 + a5 + 20a2 (set2)
+ vmlal.s16 q10, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set2)
+ vmlsl.s16 q10, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ vmlsl.s16 q10, d22, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ vext.16 d31, d23, d24, #1 @//extract a[5] (set3)
+
+ vext.8 d17, d17, d18, #2
+ vst1.8 {d18, d19}, [r11], r12 @// store row1 - 1,1/2 grid
+ vst1.8 {d17}, [r1], r12 @// store row1 - 1,1/2 grid
+
+ vext.16 d30, d22, d23, #2 @//extract a[2] (set3)
+ vext.16 d29, d22, d23, #3 @//extract a[3] (set3)
+
+ vaddl.s16 q9, d31, d22 @// a0 + a5 (set3)
+ vext.16 d28, d22, d23, #1 @//extract a[1] (set3)
+ vmlal.s16 q9, d30, d0[1] @// a0 + a5 + 20a2 (set3)
+ vmlal.s16 q9, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set3)
+ vmlsl.s16 q9, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ vmlsl.s16 q9, d23, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ vext.16 d31, d24, d25, #1 @//extract a[5] (set4)
+
+ vshrn.s32 d21, q10, #8 @// shift by 8 and later we will shift by 2 more with rounding (set2)
+ vext.16 d30, d23, d24, #2 @//extract a[2] (set4)
+ vshrn.s32 d20, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set1)
+ vext.16 d29, d23, d24, #3 @//extract a[3] (set4)
+
+ vaddl.s16 q13, d31, d23 @// a0 + a5 (set4)
+ vext.16 d28, d23, d24, #1 @//extract a[1] (set4)
+ vext.16 d31, d25, d25, #1 @//extract a[5] (set5) ;//here only first element in the row is valid
+ vmlal.s16 q13, d30, d0[1] @// a0 + a5 + 20a2 (set4)
+ vmlal.s16 q13, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set4)
+ vmlsl.s16 q13, d28, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ vmlsl.s16 q13, d24, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ vext.16 d30, d24, d25, #2 @//extract a[2] (set5)
+
+ vaddl.s16 q11, d31, d24 @// a0 + a5 (set5)
+ vext.16 d29, d24, d25, #3 @//extract a[3] (set5)
+
+ vext.16 d31, d24, d25, #1 @//extract a[1] (set5)
+ vshrn.s32 d28, q9, #8 @// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ vld1.8 {d17, d18, d19}, [r0], r3 @// Load next Row data
+ vmlal.s16 q11, d30, d0[1] @// a0 + a5 + 20a2 (set5)
+ vmlal.s16 q11, d29, d0[1] @// a0 + a5 + 20a2 + 20a3 (set5)
+ vmlsl.s16 q11, d31, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ vmlsl.s16 q11, d25, d0[0] @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ vshrn.s32 d29, q13, #8 @// shift by 8 and later we will shift by 2 more with rounding (set4)
+ vqrshrun.s16 d26, q10, #2 @// half,half gird set1,2
+
+
+ vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4
+ vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ vqrshrun.s16 d28, q14, #2 @// half,half gird set5
+
+ vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+
+ subs r10, r10, #1 @//decrement loop counter
+
+ bne filter_2dvh_loop
+
+
+@// Process first vertical interpolated row
+@// each column is
+ @// ////////////// ROW 13 ///////////////////////
+
+@// Process first vertical interpolated row
+@// each column is
+ vpop {d8-d15}
+ ldmfd sp!, {r10, r11, r12, pc}
+
+filter_2dvh_skip_row:
+
+ vqrshrun.s16 d27, q14, #2 @// half,half gird set3,4
+ vshrn.s32 d28, q11, #8 @// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ vqrshrun.s16 d28, q14, #2 @// half,half gird set5
+
+ vst1.8 {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
+ vpop {d8-d15}
+ ldmfd sp!, {r10, r11, r12, pc}
+
+
+
+
diff --git a/encoder/arm/ih264e_platform_macros.h b/encoder/arm/ih264e_platform_macros.h
new file mode 100755
index 0000000..39cac96
--- /dev/null
+++ b/encoder/arm/ih264e_platform_macros.h
@@ -0,0 +1,143 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_platform_macros.h
+*
+* @brief
+* Contains platform specific routines used for codec context intialization
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_PLATFORM_MACROS_H_
+#define IH264E_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_neon_a9q(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_neon_av8(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_generic(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr(void *pv_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns IV_ARCH_T
+* architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T ih264e_default_arch(void);
+
+#endif /* IH264E_PLATFORM_MACROS_H_ */
diff --git a/encoder/arm/ime_distortion_metrics_a9q.s b/encoder/arm/ime_distortion_metrics_a9q.s
new file mode 100755
index 0000000..b58911e
--- /dev/null
+++ b/encoder/arm/ime_distortion_metrics_a9q.s
@@ -0,0 +1,1353 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+
+@/**
+@******************************************************************************
+@*
+@*
+@* @brief
+@* This file contains definitions of routines that compute distortion
+@* between two macro/sub blocks of identical dimensions
+@*
+@* @author
+@* Ittiam
+@*
+@* @par List of Functions:
+@* - ime_compute_sad_16x16_a9q()
+@* - ime_compute_sad_16x16_fast_a9q()
+@* - ime_compute_sad_16x8_a9q()
+@* - ime_compute_sad_16x16_ea8_a9q()
+@* - ime_calculate_sad2_prog_a9q()
+@* - ime_calculate_sad3_prog_a9q()
+@* - ime_calculate_sad4_prog_a9q()
+@* - ime_sub_pel_compute_sad_16x16_a9q()
+@* - ime_compute_satqd_16x16_lumainter_a9q()
+@* -
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@
+
+
+@/**
+@******************************************************************************
+@*
+@* @brief computes distortion (SAD) between 2 16x16 blocks (fast mode)
+@*
+@* @par Description
+@* This functions computes SAD between 2 16x16 blocks. There is a provision
+@* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+@* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] i4_max_sad
+@* integer maximum allowed distortion
+@*
+@* @param[in] pi4_mb_distortion
+@* integer evaluated sad
+@*
+@* @remarks
+@*
+@******************************************************************************
+@*/
+.text
+.p2align 2
+ .global ime_compute_sad_16x16_fast_a9q
+ime_compute_sad_16x16_fast_a9q:
+
+ stmfd sp!, {r12, lr}
+ lsl r2, r2, #1
+ lsl r3, r3, #1
+
+ @for bringing buffer2 into cache..., dummy load instructions
+ @ LDR r12,[r1]
+
+ vld1.8 {d4, d5}, [r0], r2
+ vld1.8 {d6, d7}, [r1], r3
+ mov r12, #6
+ vld1.8 {d8, d9}, [r0], r2
+ vabdl.u8 q0, d6, d4
+ vabdl.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
+
+loop_sad_16x16_fast:
+
+ vld1.8 {d4, d5}, [r0], r2
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
+ vld1.8 {d6, d7}, [r1], r3
+ subs r12, #2
+ vld1.8 {d8, d9}, [r0], r2
+ vabal.u8 q0, d6, d4
+ vabal.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
+
+ bne loop_sad_16x16_fast
+
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
+
+ vadd.i16 q0, q0, q1
+ vadd.i16 d0, d1, d0
+
+ ldr r12, [sp, #12]
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
+ vshl.u32 d0, d0, #1
+ vst1.32 {d0[0]}, [r12]
+
+ ldmfd sp!, {r12, pc}
+
+
+
+
+@/**
+@******************************************************************************
+@*
+@* @brief computes distortion (SAD) between 2 16x8 blocks
+@*
+@*
+@* @par Description
+@* This functions computes SAD between 2 16x8 blocks. There is a provision
+@* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+@* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] u4_max_sad
+@* integer maximum allowed distortion
+@*
+@* @param[in] pi4_mb_distortion
+@* integer evaluated sad
+@*
+@* @remarks
+@*
+@******************************************************************************
+@*/
+@
+ .global ime_compute_sad_16x8_a9q
+ime_compute_sad_16x8_a9q:
+
+ stmfd sp!, {r12, lr}
+
+ @for bringing buffer2 into cache..., dummy load instructions
+ @LDR r12,[r1]
+
+ vld1.8 {d4, d5}, [r0], r2
+ vld1.8 {d6, d7}, [r1], r3
+ mov r12, #6
+ vld1.8 {d8, d9}, [r0], r2
+ vabdl.u8 q0, d6, d4
+ vabdl.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
+
+loop_sad_16x8:
+
+ vld1.8 {d4, d5}, [r0], r2
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
+ vld1.8 {d6, d7}, [r1], r3
+ subs r12, #2
+ vld1.8 {d8, d9}, [r0], r2
+ vabal.u8 q0, d6, d4
+ vabal.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
+
+ bne loop_sad_16x8
+
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
+
+ vadd.i16 q0, q0, q1
+ vadd.i16 d0, d1, d0
+
+ ldr r12, [sp, #12]
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
+ vst1.32 {d0[0]}, [r12]
+
+ ldmfd sp!, {r12, pc}
+
+
+
+
+
+@/**
+@******************************************************************************
+@*
+@* @brief computes distortion (SAD) between 2 16x16 blocks with early exit
+@*
+@* @par Description
+@* This functions computes SAD between 2 16x16 blocks. There is a provision
+@* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+@* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] i4_max_sad
+@* integer maximum allowed distortion
+@*
+@* @param[in] pi4_mb_distortion
+@* integer evaluated sad
+@*
+@* @remarks
+@*
+@******************************************************************************
+@*/
+ .global ime_compute_sad_16x16_ea8_a9q
+
+ime_compute_sad_16x16_ea8_a9q:
+
+ stmfd sp!, {r5-r7, lr}
+ lsl r2, r2, #1
+ lsl r3, r3, #1
+
+ @for bringing buffer2 into cache..., dummy load instructions
+ @LDR r12,[r1]
+
+ vld1.8 {d4, d5}, [r0], r2
+ vld1.8 {d6, d7}, [r1], r3
+ mov r5, #6
+ vld1.8 {d8, d9}, [r0], r2
+ vabdl.u8 q0, d6, d4
+ vabdl.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
+ ldrd r6, r7, [sp, #16]
+ @r6 = i4_max_sad, r7 = pi4_mb_distortion
+
+loop_sad_16x16_ea8_1:
+
+ vld1.8 {d4, d5}, [r0], r2
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
+ vld1.8 {d6, d7}, [r1], r3
+ subs r5, #2
+ vld1.8 {d8, d9}, [r0], r2
+ vabal.u8 q0, d6, d4
+ vabal.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
+
+ bne loop_sad_16x16_ea8_1
+
+ vabal.u8 q0, d10, d8
+ sub r0, r0, r2, lsl #3
+ vabal.u8 q1, d11, d9
+ sub r1, r1, r3, lsl #3
+
+ vadd.i16 q6, q0, q1
+ add r0, r0, r2, asr #1
+ vadd.i16 d12, d12, d13
+ add r1, r1, r3, asr #1
+
+ vpaddl.u16 d12, d12
+ vld1.8 {d4, d5}, [r0], r2
+ vld1.8 {d6, d7}, [r1], r3
+ vpaddl.u32 d12, d12
+ vld1.8 {d8, d9}, [r0], r2
+ vabal.u8 q0, d6, d4
+ vabal.u8 q1, d7, d5
+
+ vst1.32 {d12[0]}, [r7]
+ ldr r5, [r7]
+ cmp r5, r6
+ bgt end_func_16x16_ea8
+
+ vld1.8 {d10, d11}, [r1], r3
+ mov r5, #6
+
+loop_sad_16x16_ea8_2:
+
+ vld1.8 {d4, d5}, [r0], r2
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
+ vld1.8 {d6, d7}, [r1], r3
+ subs r5, #2
+ vld1.8 {d8, d9}, [r0], r2
+ vabal.u8 q0, d6, d4
+ vabal.u8 q1, d7, d5
+ vld1.8 {d10, d11}, [r1], r3
+
+ bne loop_sad_16x16_ea8_2
+
+ vabal.u8 q0, d10, d8
+ vabal.u8 q1, d11, d9
+
+ vadd.i16 q0, q0, q1
+ vadd.i16 d0, d1, d0
+
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
+
+ vst1.32 {d0[0]}, [r7]
+
+end_func_16x16_ea8:
+
+ ldmfd sp!, {r5-r7, pc}
+
+
+
+@/*
+@//---------------------------------------------------------------------------
+@// Function Name : Calculate_Mad2_prog()
+@//
+@// Detail Description : This function find the sad values of 4 Progressive MBs
+@// at one shot
+@//
+@// Platform : CortexA8/NEON .
+@//
+@//-----------------------------------------------------------------------------
+@*/
+
+ .global ime_calculate_sad2_prog_a9q
+
+ime_calculate_sad2_prog_a9q:
+
+ @ r0 = ref1 <UWORD8 *>
+ @ r1 = ref2 <UWORD8 *>
+ @ r2 = src <UWORD8 *>
+ @ r3 = RefBufferWidth <UWORD32>
+ @ stack = CurBufferWidth <UWORD32>, psad <UWORD32 *>
+
+ stmfd sp!, {r4-r5, lr}
+
+ ldr r4, [sp, #8] @ load src stride to r4
+ mov r5, #14
+
+ @Row 1
+ vld1.8 {d0, d1}, [r2], r4 @ load src Row 1
+ vld1.8 {d2, d3}, [r0], r3 @ load ref1 Row 1
+ vld1.8 {d4, d5}, [r1], r3 @ load ref2 Row 1
+
+ @Row 2
+ vld1.8 {d6, d7}, [r2], r4 @ load src Row 2
+ vabdl.u8 q6, d2, d0
+ vabdl.u8 q7, d3, d1
+ vld1.8 {d8, d9}, [r0], r3 @ load ref1 Row 2
+ vabdl.u8 q8, d4, d0
+ vabdl.u8 q9, d5, d1
+ vld1.8 {d10, d11}, [r1], r3 @ load ref2 Row 2
+
+loop_sad2_prog:
+
+ subs r5, #2
+ @Row 1
+ vld1.8 {d0, d1}, [r2], r4 @ load src Row 1
+ vabal.u8 q6, d8, d6
+ vabal.u8 q7, d9, d7
+ vld1.8 {d2, d3}, [r0], r3 @ load ref1 Row 1
+ vabal.u8 q8, d10, d6
+ vabal.u8 q9, d11, d7
+ vld1.8 {d4, d5}, [r1], r3 @ load ref2 Row 1
+
+ @Row 2
+ vld1.8 {d6, d7}, [r2], r4 @ load src Row 2
+ vabal.u8 q6, d2, d0
+ vabal.u8 q7, d3, d1
+ vld1.8 {d8, d9}, [r0], r3 @ load ref1 Row 2
+ vabal.u8 q8, d4, d0
+ vabal.u8 q9, d5, d1
+ vld1.8 {d10, d11}, [r1], r3 @ load ref2 Row 2
+
+ bne loop_sad2_prog
+
+ vabal.u8 q6, d8, d6
+ vabal.u8 q7, d9, d7
+ vabal.u8 q8, d10, d6
+ vabal.u8 q9, d11, d7
+
+ @ Compute SAD
+
+ vadd.u16 q6, q6, q7 @ Q6 : sad_ref1
+ vadd.u16 q8, q8, q9 @ Q8 : sad_ref2
+
+ vadd.u16 d12, d12, d13
+ ldr r5, [sp, #16] @ loading pi4_sad to r5
+ vadd.u16 d16, d16, d17
+
+ vpadd.u16 d12, d12, d16
+ vpaddl.u16 d12, d12
+
+ vst1.64 {d12}, [r5]!
+
+ ldmfd sp!, {r4-r5, pc}
+
+
+
+@/*
+@//---------------------------------------------------------------------------
+@// Function Name : Calculate_Mad3_prog()
+@//
+@// Detail Description : This function find the sad values of 4 Progressive MBs
+@// at one shot
+@//
+@// Platform : CortexA8/NEON .
+@//
+@//-----------------------------------------------------------------------------
+@*/
+
+ .global ime_calculate_sad3_prog_a9q
+
+ime_calculate_sad3_prog_a9q:
+
+ @ r0 = ref1 <UWORD8 *>
+ @ r1 = ref2 <UWORD8 *>
+ @ r2 = ref3 <UWORD8 *>
+ @ r3 = src <UWORD8 *>
+ @ stack = RefBufferWidth <UWORD32>, CurBufferWidth <UWORD32>, psad <UWORD32 *>
+
+
+ stmfd sp!, {r4-r6, lr}
+
+ ldrd r4, r5, [sp, #16] @ load ref stride to r4, src stride to r5
+ mov r6, #14
+
+ @ Row 1
+ vld1.8 {d0, d1}, [r3], r5 @ load src Row 1
+ vld1.8 {d2, d3}, [r0], r4 @ load ref1 Row 1
+ vld1.8 {d4, d5}, [r1], r4 @ load ref2 Row 1
+ vabdl.u8 q8, d2, d0
+ vabdl.u8 q9, d3, d1
+ vld1.8 {d6, d7}, [r2], r4 @ load ref3 Row 1
+ vabdl.u8 q10, d4, d0
+ vabdl.u8 q11, d5, d1
+
+ @ Row 2
+ vld1.8 {d8, d9}, [r3], r5 @ load src Row 1
+ vabdl.u8 q12, d6, d0
+ vabdl.u8 q13, d7, d1
+ vld1.8 {d10, d11}, [r0], r4 @ load ref1 Row 1
+ vld1.8 {d12, d13}, [r1], r4 @ load ref2 Row 1
+ vabal.u8 q8, d10, d8
+ vabal.u8 q9, d11, d9
+ vld1.8 {d14, d15}, [r2], r4 @ load ref3 Row 1
+ vabal.u8 q10, d12, d8
+ vabal.u8 q11, d13, d9
+
+loop_sad3_prog:
+
+ @Row 1
+ vld1.8 {d0, d1}, [r3], r5 @ load src Row 1
+ vabal.u8 q12, d14, d8
+ vabal.u8 q13, d15, d9
+ vld1.8 {d2, d3}, [r0], r4 @ load ref1 Row 1
+ vld1.8 {d4, d5}, [r1], r4 @ load ref2 Row 1
+ vabal.u8 q8, d2, d0
+ vabal.u8 q9, d3, d1
+ vld1.8 {d6, d7}, [r2], r4 @ load ref3 Row 1
+ vabal.u8 q10, d4, d0
+ vabal.u8 q11, d5, d1
+
+ @Row 2
+ vld1.8 {d8, d9}, [r3], r5 @ load src Row 1
+ vabal.u8 q12, d6, d0
+ vabal.u8 q13, d7, d1
+ vld1.8 {d10, d11}, [r0], r4 @ load ref1 Row 1
+ subs r6, #2
+ vld1.8 {d12, d13}, [r1], r4 @ load ref2 Row 1
+ vabal.u8 q8, d10, d8
+ vabal.u8 q9, d11, d9
+ vld1.8 {d14, d15}, [r2], r4 @ load ref3 Row 1
+ vabal.u8 q10, d12, d8
+ vabal.u8 q11, d13, d9
+
+ bne loop_sad3_prog
+
+ vabal.u8 q12, d14, d8
+ vabal.u8 q13, d15, d9
+
+ @ Compute SAD
+
+ vadd.u16 q8, q8, q9 @ Q8 : sad_ref1
+ vadd.u16 q10, q10, q11 @ Q10 : sad_ref2
+ vadd.u16 q12, q12, q13 @ Q12 : sad_ref3
+
+ vadd.u16 d16, d16, d17
+ vadd.u16 d20, d20, d21
+ vadd.u16 d24, d24, d25
+
+ vpadd.u16 d16, d16, d20
+ vpadd.u16 d24, d24, d24
+
+ ldr r6, [sp, #24] @ loading pi4_sad to r6
+ vpaddl.u16 d16, d16
+ vpaddl.u16 d24, d24
+
+ vst1.64 {d16}, [r6]!
+ vst1.32 {d24[0]}, [r6]
+
+ ldmfd sp!, {r4-r6, pc}
+
+
+
+@/**
+@******************************************************************************
+@*
+@* @brief computes distortion (SAD) for sub-pel motion estimation
+@*
+@* @par Description
+@* This functions computes SAD for all the 8 half pel points
+@*
+@* @param[out] pi4_sad
+@* integer evaluated sad
+@* pi4_sad[0] - half x
+@* pi4_sad[1] - half x - 1
+@* pi4_sad[2] - half y
+@* pi4_sad[3] - half y - 1
+@* pi4_sad[4] - half xy
+@* pi4_sad[5] - half xy - 1
+@* pi4_sad[6] - half xy - strd
+@* pi4_sad[7] - half xy - 1 - strd
+@*
+@* @remarks
+@*
+@******************************************************************************
+@*/
+
+.text
+.p2align 2
+
+ .global ime_sub_pel_compute_sad_16x16_a9q
+
+ime_sub_pel_compute_sad_16x16_a9q:
+
+ stmfd sp!, {r4-r11, lr} @store register values to stack
+
+ ldr r9, [sp, #36]
+ ldr r10, [sp, #40]
+
+ sub r4, r1, #1 @ x left
+ sub r5, r2, r10 @ y top
+
+ sub r6, r3, #1 @ xy left
+ sub r7, r3, r10 @ xy top
+
+ sub r8, r7, #1 @ xy top-left
+ mov r11, #15
+
+ @for bringing buffer2 into cache..., dummy load instructions
+ @ LDR r12,[r1]
+ @ LDR r12,[sp,#12]
+
+ vld1.8 {d0, d1}, [r0], r9 @ src
+ vld1.8 {d2, d3}, [r5], r10 @ y top LOAD
+ vld1.8 {d4, d5}, [r7], r10 @ xy top LOAD
+ vld1.8 {d6, d7}, [r8], r10 @ xy top-left LOAD
+
+ vabdl.u8 q6, d2, d0 @ y top ABS1
+ vabdl.u8 q7, d4, d0 @ xy top ABS1
+ vld1.8 {d8, d9}, [r1], r10 @ x LOAD
+ vabdl.u8 q8, d6, d0 @ xy top-left ABS1
+ vabdl.u8 q9, d8, d0 @ x ABS1
+ vld1.8 {d10, d11}, [r4], r10 @ x left LOAD
+
+ vabal.u8 q6, d3, d1 @ y top ABS2
+ vabal.u8 q7, d5, d1 @ xy top ABS2
+ vld1.8 {d2, d3}, [r2], r10 @ y LOAD
+ vabal.u8 q8, d7, d1 @ xy top-left ABS2
+ vabal.u8 q9, d9, d1 @ x ABS2
+ vld1.8 {d4, d5}, [r3], r10 @ xy LOAD
+
+ vabdl.u8 q10, d10, d0 @ x left ABS1
+ vabdl.u8 q11, d2, d0 @ y ABS1
+ vld1.8 {d6, d7}, [r6], r10 @ xy left LOAD
+ vabdl.u8 q12, d4, d0 @ xy ABS1
+ vabdl.u8 q13, d6, d0 @ xy left ABS1
+
+loop_sub_pel_16x16:
+
+ vabal.u8 q10, d11, d1 @ x left ABS2
+ vabal.u8 q11, d3, d1 @ y ABS2
+ subs r11, #1
+ vabal.u8 q12, d5, d1 @ xy ABS2
+ vabal.u8 q13, d7, d1 @ xy left ABS2
+
+ vld1.8 {d0, d1}, [r0], r9 @ src
+ vabal.u8 q6, d2, d0 @ y top ABS1
+ vabal.u8 q7, d4, d0 @ xy top ABS1
+ vld1.8 {d8, d9}, [r1], r10 @ x LOAD
+ vabal.u8 q8, d6, d0 @ xy top-left ABS1
+ vabal.u8 q9, d8, d0 @ x ABS1
+ vld1.8 {d10, d11}, [r4], r10 @ x left LOAD
+
+ vabal.u8 q6, d3, d1 @ y top ABS2
+ vabal.u8 q7, d5, d1 @ xy top ABS2
+ vld1.8 {d2, d3}, [r2], r10 @ y LOAD
+ vabal.u8 q8, d7, d1 @ xy top-left ABS2
+ vabal.u8 q9, d9, d1 @ x ABS2
+ vld1.8 {d4, d5}, [r3], r10 @ xy LOAD
+
+ vabal.u8 q10, d10, d0 @ x left ABS1
+ vabal.u8 q11, d2, d0 @ y ABS1
+ vld1.8 {d6, d7}, [r6], r10 @ xy left LOAD
+ vabal.u8 q12, d4, d0 @ xy ABS1
+ vabal.u8 q13, d6, d0 @ xy left ABS1
+
+ bne loop_sub_pel_16x16
+
+ vabal.u8 q10, d11, d1 @ x left ABS2
+ vabal.u8 q11, d3, d1 @ y ABS2
+ vabal.u8 q12, d5, d1 @ xy ABS2
+ vabal.u8 q13, d7, d1 @ xy left ABS2
+
+ vadd.i16 d0, d18, d19 @ x
+ vadd.i16 d3, d12, d13 @ y top
+ vadd.i16 d6, d14, d15 @ xy top
+ vadd.i16 d5, d26, d27 @ xy left
+ vadd.i16 d1, d20, d21 @ x left
+ vadd.i16 d2, d22, d23 @ y
+ vadd.i16 d4, d24, d25 @ xy
+ vadd.i16 d7, d16, d17 @ xy top left
+
+ vpadd.i16 d0, d0, d1
+ vpadd.i16 d2, d2, d3
+ vpadd.i16 d4, d4, d5
+ vpadd.i16 d6, d6, d7
+
+ vpaddl.u16 d0, d0
+ vpaddl.u16 d2, d2
+ ldr r11, [sp, #44]
+ vpaddl.u16 d4, d4
+ vpaddl.u16 d6, d6
+
+ vst1.32 {d0}, [r11]!
+ vst1.32 {d2}, [r11]!
+ vst1.32 {d4}, [r11]!
+ vst1.32 {d6}, [r11]!
+
+ ldmfd sp!, {r4-r11, pc} @Restoring registers from stack
+
+
+
+@/**
+@******************************************************************************
+@*
+@* @brief computes distortion (SAD) between 2 16x16 blocks
+@*
+@* @par Description
+@* This functions computes SAD between 2 16x16 blocks. There is a provision
+@* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+@* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+@*
+@* @param[in] pu1_src
+@* UWORD8 pointer to the source
+@*
+@* @param[out] pu1_dst
+@* UWORD8 pointer to the destination
+@*
+@* @param[in] src_strd
+@* integer source stride
+@*
+@* @param[in] dst_strd
+@* integer destination stride
+@*
+@* @param[in] i4_max_sad
+@* integer maximum allowed distortion
+@*
+@* @param[in] pi4_mb_distortion
+@* integer evaluated sad
+@*
+@* @remarks
+@*
+@******************************************************************************
+@*/
+
+.text
+.p2align 2
+
+ .global ime_compute_sad_16x16_a9q
+
+ime_compute_sad_16x16_a9q:
+
+
+ @STMFD sp!,{r12,lr}
+ stmfd sp!, {r12, r14} @store register values to stack
+
+ @for bringing buffer2 into cache..., dummy load instructions
+ @ LDR r12,[r1]
+ @ LDR r12,[sp,#12]
+
+ vld1.8 {d4, d5}, [r0], r2
+ vld1.8 {d6, d7}, [r1], r3
+
+ mov r12, #14
+ vld1.8 {d8, d9}, [r0], r2
+ vabdl.u8 q0, d4, d6
+ vld1.8 {d10, d11}, [r1], r3
+ vabdl.u8 q1, d5, d7
+
+loop_sad_16x16:
+
+ vld1.8 {d4, d5}, [r0], r2
+ vabal.u8 q0, d8, d10
+ vld1.8 {d6, d7}, [r1], r3
+ vabal.u8 q1, d9, d11
+
+ vld1.8 {d8, d9}, [r0], r2
+ vabal.u8 q0, d4, d6
+ subs r12, #2
+ vld1.8 {d10, d11}, [r1], r3
+ vabal.u8 q1, d5, d7
+
+ bne loop_sad_16x16
+
+ vabal.u8 q0, d8, d10
+ vabal.u8 q1, d9, d11
+
+ vadd.i16 q0, q0, q1
+ vadd.i16 d0, d1, d0
+ ldr r12, [sp, #12]
+
+ vpaddl.u16 d0, d0
+ vpaddl.u32 d0, d0
+ vst1.32 {d0[0]}, [r12]
+
+ ldmfd sp!, {r12, pc} @Restoring registers from stack
+
+
+@/*
+@//---------------------------------------------------------------------------
+@// Function Name : Calculate_Mad4_prog()
+@//
+@// Detail Description : This function find the sad values of 4 Progressive MBs
+@// at one shot
+@//
+@// Platform : CortexA8/NEON .
+@//
+@//-----------------------------------------------------------------------------
+@*/
+
+ .global ime_calculate_sad4_prog_a9q
+
+ime_calculate_sad4_prog_a9q:
+ @ r0 = temp_frame <UWORD8 *>
+ @ r1 = buffer_ptr <UWORD8 *>
+ @ r2 = RefBufferWidth <UWORD32>
+ @ r3 = CurBufferWidth <UWORD32>
+ @ stack = psad <UWORD32 *> {at 0x34}
+
+ stmfd sp!, {r4-r7, lr}
+
+ @UWORD8 *left_ptr = temp_frame - 1;
+ @UWORD8 *right_ptr = temp_frame + 1;
+ @UWORD8 *top_ptr = temp_frame - RefBufferWidth;
+ @UWORD8 *bot_ptr = temp_frame + RefBufferWidth;
+
+ mov r7, #14
+ sub r4, r0, #0x01 @r4 = left_ptr
+ add r5, r0, #0x1 @r5 = right_ptr
+ sub r6, r0, r2 @r6 = top_ptr
+ add r0, r0, r2 @r0 = bot_ptr
+ @r1 = buffer_ptr
+
+ @D0:D1 : buffer
+ @D2:D3 : top
+ @D4:D5 : left
+ @D6:D7 : right
+ @D8:D9 : bottom
+
+ @Row 1
+ vld1.8 {d0, d1}, [r1], r3 @ load src Row 1
+ vld1.8 {d2, d3}, [r6], r2 @ load top Row 1
+ vld1.8 {d4, d5}, [r4], r2 @ load left Row 1
+
+ vabdl.u8 q5, d2, d0
+ vld1.8 {d6, d7}, [r5], r2 @ load right Row 1
+ vabdl.u8 q6, d3, d1
+
+ vabdl.u8 q7, d0, d4
+ vld1.8 {d8, d9}, [r0], r2 @ load bottom Row 1
+ vabdl.u8 q8, d1, d5
+
+ @Row 2
+ vabdl.u8 q9, d0, d6
+ vld1.8 {d26, d27}, [r1], r3 @ load src Row 2
+ vabdl.u8 q10, d1, d7
+
+ vabdl.u8 q11, d0, d8
+ vld1.8 {d2, d3}, [r6], r2 @ load top Row 2
+ vabdl.u8 q12, d1, d9
+
+loop_sad4_prog:
+
+ vabal.u8 q5, d26, d2
+ vld1.8 {d4, d5}, [r4], r2 @ load left Row 2
+ vabal.u8 q6, d27, d3
+
+ vabal.u8 q7, d26, d4
+ vld1.8 {d6, d7}, [r5], r2 @ load right Row 2
+ vabal.u8 q8, d27, d5
+
+ vabal.u8 q9, d26, d6
+ vld1.8 {d8, d9}, [r0], r2 @ load bottom Row 2
+ vabal.u8 q10, d27, d7
+
+ @Row 1
+ vabal.u8 q11, d26, d8
+ vld1.8 {d0, d1}, [r1], r3 @ load src Row 1
+ vabal.u8 q12, d27, d9
+
+ vld1.8 {d2, d3}, [r6], r2 @ load top Row 1
+ subs r7, #2
+ vld1.8 {d4, d5}, [r4], r2 @ load left Row 1
+
+ vabal.u8 q5, d0, d2
+
+ vld1.8 {d6, d7}, [r5], r2 @ load right Row 1
+ vabal.u8 q6, d1, d3
+
+ vabal.u8 q7, d0, d4
+ vld1.8 {d8, d9}, [r0], r2 @ load bottom Row 1
+ vabal.u8 q8, d1, d5
+
+ @Row 2
+ vabal.u8 q9, d0, d6
+ vld1.8 {d26, d27}, [r1], r3 @ load src Row 2
+ vabal.u8 q10, d1, d7
+
+ vabal.u8 q11, d0, d8
+ vld1.8 {d2, d3}, [r6], r2 @ load top Row 2
+ vabal.u8 q12, d1, d9
+
+ bne loop_sad4_prog
+
+ vabal.u8 q5, d26, d2
+ vld1.8 {d4, d5}, [r4], r2 @ load left Row 2
+ vabal.u8 q6, d27, d3
+
+ vabal.u8 q7, d26, d4
+ vld1.8 {d6, d7}, [r5], r2 @ load right Row 2
+ vabal.u8 q8, d27, d5
+
+ vabal.u8 q9, d26, d6
+ vld1.8 {d8, d9}, [r0], r2 @ load bottom Row 2
+ vabal.u8 q10, d27, d7
+
+ vabal.u8 q11, d26, d8
+ vabal.u8 q12, d27, d9
+
+ @;Q5:Q6 : sad_top
+ @;Q7:Q8 : sad_left
+ @;Q9:Q10 : sad_right
+ @;Q11:Q12 : sad_bot
+
+ vadd.u16 q5, q5, q6
+ vadd.u16 q7, q7, q8
+ vadd.u16 q9, q9, q10
+ vadd.u16 q11, q11, q12
+
+ @; Free :-
+ @; Q6,Q8,Q10,Q12
+
+ @;Q5 -> D10:D11
+ @;Q7 -> D14:D15
+ @;Q9 -> D18:D19
+ @;Q11 -> D22:D23
+
+ vadd.u16 d10, d10, d11
+ vadd.u16 d14, d14, d15
+ vadd.u16 d18, d18, d19
+ vadd.u16 d22, d22, d23
+
+ @;D10 : sad_top
+ @;D14 : sad_left
+ @;D18 : sad_right
+ @;D22 : sad_bot
+
+
+ vpaddl.u16 d11, d10
+ vpaddl.u16 d15, d14
+ vpaddl.u16 d19, d18
+ vpaddl.u16 d23, d22
+
+ @;D11 : sad_top
+ @;D15 : sad_left
+ @;D19 : sad_right
+ @;D23 : sad_bot
+
+ vpaddl.u32 d10, d11
+ vpaddl.u32 d22, d23
+ vpaddl.u32 d14, d15
+ vpaddl.u32 d18, d19
+
+ @;D10 : sad_top
+ @;D14 : sad_left
+ @;D18 : sad_right
+ @;D22 : sad_bot
+
+ ldr r4, [sp, #20] @;Can be rearranged
+
+ vsli.64 d10, d22, #32
+ vsli.64 d14, d18, #32
+
+ vst1.64 {d14}, [r4]!
+ vst1.64 {d10}, [r4]!
+
+ ldmfd sp!, {r4-r7, pc}
+
+
+
+
+@*****************************************************************************
+@*
+@* Function Name : ime_compute_satqd_16x16_lumainter_a9
+@* Description : This fucntion computes SAD for a 16x16 block.
+@ : It also computes if any 4x4 block will have a nonzero coefficent after transform and quant
+@
+@ Arguments : R0 :pointer to src buffer
+@ R1 :pointer to est buffer
+@ R2 :source stride
+@ R3 :est stride
+@ STACk :Threshold,distotion,is_nonzero
+@*
+@* Values Returned : NONE
+@*
+@* Register Usage : R0-R11
+@* Stack Usage :
+@* Cycles : Around
+@* Interruptiaility : Interruptable
+@*
+@* Known Limitations
+@* \Assumptions :
+@*
+@* Revision History :
+@* DD MM YYYY Author(s) Changes
+@* 14 04 2014 Harinarayanan K K First version
+@*
+@*****************************************************************************
+ .global ime_compute_satqd_16x16_lumainter_a9q
+ime_compute_satqd_16x16_lumainter_a9q:
+ @R0 :pointer to src buffer
+ @R1 :pointer to est buffer
+ @R2 :Source stride
+ @R3 :Pred stride
+ @R4 :Threshold pointer
+ @R5 :Distortion,ie SAD
+ @R6 :is nonzero
+
+ push {r4-r12, lr} @push all the variables first
+ @ADD SP,SP,#40 ;decrement stack pointer,to accomodate two variables
+ ldr r4, [sp, #40] @load the threshold address
+
+ mov r8, #8 @Number of 4x8 blocks to be processed
+ mov r10, #0 @Sad
+ mov r7, #0 @Nonzero info
+ @----------------------------------------------------
+
+ vld1.u8 d30, [r0], r2 @I load 8 pix src row 1
+
+ vld1.u8 d31, [r1], r3 @I load 8 pix pred row 1
+
+ vld1.u8 d28, [r0], r2 @I load 8 pix src row 2
+
+ vld1.u8 d29, [r1], r3 @I load 8 pix pred row 2
+
+ vld1.u8 d26, [r0], r2 @I load 8 pix src row 3
+ vabdl.u8 q0, d30, d31 @I Abs diff r1 blk 12
+
+ vld1.u8 d27, [r1], r3 @I load 8 pix pred row 3
+
+ vld1.u8 d24, [r0], r2 @I load 8 pix src row 4
+
+ vld1.u8 d25, [r1], r3 @I load 8 pix pred row 4
+ vabdl.u8 q1, d28, d29 @I Abs diff r1 blk 12
+
+ vld1.u16 {q11}, [r4] @I load the threhold
+ vabdl.u8 q2, d26, d27 @I Abs diff r1 blk 12
+
+ vabdl.u8 q3, d24, d25 @I Abs diff r1 blk 12
+
+
+
+core_loop:
+ @S1 S2 S3 S4 A1 A2 A3 A4
+ @S5 S6 S7 S8 A5 A6 A7 A8
+ @S9 S10 S11 S12 A9 A10 A11 A12
+ @S13 S14 S15 S16 A13 A14 A15 A16
+ ands r11, r8, #1 @II See if we are at even or odd block
+ vadd.u16 q4 , q0, q3 @I Add r1 r4
+ lsl r11, r2, #2 @II Move back src 4 rows
+
+ subeq r0, r0, r11 @II Move back src 4 rows if we are at even block
+ vadd.u16 q5 , q1, q2 @I Add r2 r3
+ addeq r0, r0, #8 @II Move src 8 cols forward if we are at even block
+
+ lsl r11, r3, #2 @II Move back pred 4 rows
+ vtrn.16 d8 , d10 @I trnspse 1
+ subeq r1, r1, r11 @II Move back pred 4 rows if we are at even block
+
+ addeq r1, r1, #8 @II Move pred 8 cols forward if we are at even block
+ vtrn.16 d9 , d11 @I trnspse 2
+ subne r0, r0, #8 @II Src 8clos back for odd rows
+
+ subne r1, r1, #8 @II Pred 8 cols back for odd rows
+ vtrn.32 d10, d11 @I trnspse 4
+
+
+ vtrn.32 d8 , d9 @I trnspse 3
+ vswp d10, d11 @I rearrange so that the q4 and q5 add properly
+ @D8 S1 S4 A1 A4
+ @D9 S2 S3 A2 A3
+ @D11 S1 S4 A1 A4
+ @D10 S2 S3 A2 A3
+
+ vadd.s16 q6, q4, q5 @I Get s1 s4
+ vld1.u8 d30, [r0], r2 @II load first 8 pix src row 1
+
+ vtrn.s16 d12, d13 @I Get s2 s3
+ @D12 S1 S4 A1 A4
+ @D13 S2 S3 A2 A3
+
+ vshl.s16 q7, q6 , #1 @I si = si<<1
+ vld1.u8 d31, [r1], r3 @II load first 8 pix pred row 1
+
+ vpadd.s16 d16, d12, d13 @I (s1 + s4) (s2 + s3)
+ vld1.u8 d28, [r0], r2 @II load first 8 pix src row 2
+ @ D16 S14 A14 S23 A23
+ vrev32.16 d0, d16 @I
+ vuzp.s16 d16, d0 @I
+ @D16 S14 S23 A14 A23
+ vadd.s16 d17, d12, d13 @I (s1 + s2) (s3 + s4)
+ vld1.u8 d29, [r1], r3 @II load first 8 pix pred row 2
+ @D17 S12 S34 A12 A34
+
+ vrev32.16 q9, q7 @I Rearrange si's
+ @Q9 Z4,Z1,Y4,Y1,Z3,Z2,Y3,Y2
+
+ @D12 S1 S4 A1 A4
+ @D19 Z3 Z2 Y3 Y2
+ vsub.s16 d8, d12, d19 @I (s1 - (s3<<1)) (s4 - (s2<<1))
+ vld1.u8 d26, [r0], r2 @II load first 8 pix src row 3
+ @D13 S2 S3 A2 A3
+ @D18 Z4 Z1 Y4 Y1
+ vsub.s16 d9, d13, d18 @I (s2 - (s4<<1)) (s3 - (s1<<1))
+ vld1.u8 d27, [r1], r3 @II load first 8 pix pred row 3
+ @Q10 S8 S5 A8 A5 S7 S4 A7 A4
+
+ @D16 S14 S23 A14 A23
+ vpadd.s16 d10, d16, d17 @I Get sad by adding s1 s2 s3 s4
+ vld1.u8 d24, [r0], r2 @II load first 8 pix src row 4
+ @D22 SAD1 SAD2 junk junk
+
+
+ @Q8 S2 S1 A2 A1 S6 S3 A6 A3
+ @Q10 S8 S5 A8 A5 S7 S4 A7 A4
+ vtrn.32 q8, q4 @I Rearrange to make ls of each block togather
+ @Q8 S2 S1 S8 S5 S6 S3 S7 S4
+ @Q10 A2 A1 A8 A5 A6 A3 A7 A4
+
+
+ ldrh r11, [r4, #16] @I Load the threshold for DC val blk 1
+ vdup.s16 q6, d10[0] @I Get the sad blk 1
+ vabdl.u8 q0, d30, d31 @II Abs diff r1 blk 12
+
+ vshl.s16 q7, q6, #1 @I sad_2 = sad_1<<1
+ vmov.s16 r9, d10[0] @I Get the sad for block 1
+
+ vsub.s16 q9, q7, q8 @I Add to the lss
+ vmov.s16 r5, d10[1] @I Get the sad for block 2
+
+ vcle.s16 q7, q11, q9 @I Add to the lss
+ vld1.u8 d25, [r1], r3 @II load first 8 pix pred row 4
+
+ vdup.s16 q15, d10[1] @I Get the sad blk 1
+ vabdl.u8 q1, d28, d29 @II Abs diff r1 blk 12
+
+
+ vshl.s16 q14, q15, #1 @I sad_2 = sad_1<<1
+ vsub.s16 q3, q14, q4 @I Add to the lss
+ vcle.s16 q15, q11, q3 @I Add to the lss
+
+ ADD R10, R10, R9 @I Add to the global sad blk 1
+ vtrn.u8 q15, q7 @I get all comparison bits to one reg
+ vabdl.u8 q2, d26, d27 @II Abs diff r1 blk 12
+
+ ADD R10, R10, R5 @I Add to the global sad blk 2
+ vshr.u8 q14, q15, #7 @I Shift the bits so that no overflow occurs
+ cmp r11, r9
+
+ movle r7, #0xf @I If not met mark it by mvoing non zero val to R7 blk 1 ;I Compare with threshold blk 1
+ vadd.u8 d28, d28, d29 @I Add the bits
+ cmp r11, r5 @I Compare with threshold blk 2
+
+ movle r7, #0xf @I If not met mark it by mvoing non zero val to R7 blk 2
+ vpadd.u8 d28, d28, d29 @I Add the bits
+
+ vmov.u32 r11, d28[0] @I Since a set bit now represents a unstatisofrd contifon store it in r11
+ vabdl.u8 q3, d24, d25 @II Abs diff r1 blk 12
+
+ orr r7, r7, r11 @I get the guy to r11
+
+
+ sub r8, r8, #1 @I Decremrnt block count
+
+ cmp r7, #0 @I If we have atlest one non zero block
+ bne compute_sad_only @I if a non zero block is der,From now on compute sad only
+
+ cmp r8, #1 @I See if we are at the last block
+ bne core_loop @I If the blocks are zero, lets continue the satdq
+
+
+ @EPILOUGE for core loop
+ @S1 S2 S3 S4 A1 A2 A3 A4
+ @S5 S6 S7 S8 A5 A6 A7 A8
+ @S9 S10 S11 S12 A9 A10 A11 A12
+ @S13 S14 S15 S16 A13 A14 A15 A16
+ vadd.u16 q4 , q0, q3 @Add r1 r4
+ vadd.u16 q5 , q1, q2 @Add r2 r3
+ @D8 S1 S2 S2 S1
+ @D10 S4 S3 S3 S4
+ @D9 A1 A2 A2 A1
+ @D11 A4 A3 A3 A4
+ vtrn.16 d8 , d10 @I trnspse 1
+ vtrn.16 d9 , d11 @I trnspse 2
+ vtrn.32 d8 , d9 @I trnspse 3
+ vtrn.32 d10, d11 @I trnspse 4
+
+ vswp d10, d11 @I rearrange so that the q4 and q5 add properly
+ @D8 S1 S4 A1 A4
+ @D9 S2 S3 A2 A3
+ @D11 S1 S4 A1 A4
+ @D10 S2 S3 A2 A3
+ vadd.s16 q6, q4, q5 @Get s1 s4
+ vtrn.s16 d12, d13 @Get s2 s3
+ @D12 S1 S4 A1 A4
+ @D13 S2 S3 A2 A3
+
+ vshl.s16 q7, q6 , #1 @si = si<<1
+ vmov.s16 r9, d10[0] @Get the sad for block 1
+
+ vpadd.s16 d16, d12, d13 @(s1 + s4) (s2 + s3)
+ vmov.s16 r5, d10[1] @Get the sad for block 2
+ @D16 S14 A14 S23 A23
+ vrev32.16 d30, d16 @
+ vuzp.s16 d16, d30 @
+ @D16 S14 S23 A14 A23
+ vadd.s16 d17, d12, d13 @(s1 + s2) (s3 + s4)
+ @D17 S12 S34 A12 A34
+
+ vrev32.16 q9, q7 @Rearrange si's
+ @Q9 Z4,Z1,Y4,Y1,Z3,Z2,Y3,Y2
+
+ @D12 S1 S4 A1 A4
+ @D19 Z3 Z2 Y3 Y2
+ vsub.s16 d8, d12, d19 @(s1 - (s3<<1)) (s4 - (s2<<1))
+ @D13 S2 S3 A2 A3
+ @D18 Z4 Z1 Y4 Y1
+ vsub.s16 d9, d13, d18 @(s2 - (s4<<1)) (s3 - (s1<<1))
+ @Q10 S8 S5 A8 A5 S7 S4 A7 A4
+
+ @D16 S14 S23 A14 A23
+ vpadd.s16 d10, d16, d17 @I Get sad by adding s1 s2 s3 s4
+ @D22 SAD1 SAD2 junk junk
+ vmov.u16 r9, d10[0] @Get the sad for block 1
+ vmov.u16 r5, d10[1] @Get the sad for block 2
+
+ @Q8 S2 S1 A2 A1 S6 S3 A6 A3
+ @Q10 S8 S5 A8 A5 S7 S4 A7 A4
+ ldrh r11, [r4, #16] @Load the threshold for DC val blk 1
+ vtrn.32 q8, q4 @Rearrange to make ls of each block togather
+ ADD R10, R10, R9 @Add to the global sad blk 1
+
+ @Q8 S2 S1 S8 S5 S6 S3 S7 S4
+ @Q10 A2 A1 A8 A5 A6 A3 A7 A4
+
+ vld1.u16 {q11}, [r4] @load the threhold
+ ADD R10, R10, R5 @Add to the global sad blk 2
+
+ vdup.u16 q6, d10[0] @Get the sad blk 1
+
+ cmp r11, r9 @Compare with threshold blk 1
+ vshl.u16 q7, q6, #1 @sad_2 = sad_1<<1
+
+ vsub.s16 q9, q7, q8 @Add to the lss
+
+ vcle.s16 q15, q11, q9 @Add to the lss
+ movle r7, #0xf @If not met mark it by mvoing non zero val to R7 blk 1
+
+ cmp r11, r5 @Compare with threshold blk 2
+ vdup.u16 q14, d10[1] @Get the sad blk 1
+
+ vshl.u16 q13, q14, #1 @sad_2 = sad_1<<1
+ vsub.s16 q12, q13, q4 @Add to the lss
+ vcle.s16 q14, q11, q12 @Add to the lss
+ movle r7, #0xf @If not met mark it by mvoing non zero val to R7 blk 2
+
+ vtrn.u8 q14, q15 @get all comparison bits to one reg
+ vshr.u8 q14, q14, #7 @Shift the bits so that no overflow occurs
+ vadd.u8 d28, d28, d29 @Add the bits
+ vpadd.u8 d28, d28, d29 @Add the bits
+ vmov.u32 r11, d28[0] @Since a set bit now represents a unstatisofrd contifon store it in r11
+ orr r7, r7, r11 @get the guy to r11
+
+ b funcend_sad_16x16 @Since all blocks ar processed nw, got to end
+
+compute_sad_only: @This block computes SAD only, so will be lighter
+ @IT will start processign at n odd block
+ @It will compute sad for odd blok,
+ @and then for two blocks at a time
+ @The counter is r7, hence r7 blocks will be processed
+
+ and r11, r8, #1 @Get the last bit of counter
+ cmp r11, #0 @See if we are at even or odd block
+ @iif the blk is even we just have to set the pointer to the
+ @start of current row
+
+ lsleq r11, r2, #2 @I Move back src 4 rows
+ subeq r0, r0, r11 @I Move back src 4 rows if we are at even block
+
+ lsleq r11, r3, #2 @I Move back pred 4 rows
+ subeq r1, r1, r11 @I Move back pred 4 rows if we are at even block
+ @ADDEQ R8,R8,#2 ;Inc counter
+ beq skip_odd_blk @If the blk is odd we have to compute sad
+
+
+ vadd.u16 q4, q0, q1 @Add SAD of row1 and row2
+ vadd.u16 q5, q2, q3 @Add SAD of row3 and row4
+ vadd.u16 q6, q4, q5 @Add SAD of row 1-4
+ vadd.u16 d14, d12, d13 @Add Blk1 and blk2
+ vpadd.u16 d16, d14, d15 @Add col 1-2 and 3-4
+ vpadd.u16 d18, d16, d17 @Add col 12-34
+
+ vmov.u16 r9, d18[0] @Move sad to arm
+ ADD R10, R10, R9 @Add to the global sad
+
+ sub r8, r8, #1 @Dec counter
+ cmp r8, #0 @See if we processed last block
+ beq funcend_sad_16x16 @if lprocessed last block goto end of func
+
+ sub r0, r0, #8 @Since we processed od block move back src by 8 cols
+ sub r1, r1, #8 @Since we processed od block move back pred by 8 cols
+
+skip_odd_blk:
+
+ vmov.s16 q0, #0 @Initialize the accumulator
+ vmov.s16 q1, #0 @Initialize the accumulator
+
+ vld1.u8 {q15}, [r0], r2 @load src r1
+ vld1.u8 {q14}, [r1], r3 @load pred r1
+
+ vld1.u8 {q13}, [r0], r2 @load src r2
+ vld1.u8 {q12}, [r1], r3 @load pred r2
+
+ vld1.u8 {q11}, [r0], r2 @load src r3
+ vld1.u8 {q10}, [r1], r3 @load pred r2
+
+ vld1.u8 {q9}, [r0], r2 @load src r4
+ vld1.u8 {q8}, [r1], r3 @load pred r4
+
+ cmp r8, #2
+ beq sad_epilouge
+
+sad_loop:
+
+ vabal.u8 q0, d30, d28 @I accumulate Abs diff R1
+ vabal.u8 q1, d31, d29 @I accumulate Abs diff R1
+
+ vld1.u8 {q15}, [r0], r2 @II load r1 src
+ vabal.u8 q0, d26, d24 @I accumulate Abs diff R2
+
+ vld1.u8 {q14}, [r1], r3 @II load r1 pred
+ vabal.u8 q1, d27, d25 @I accumulate Abs diff R2
+
+ vld1.u8 {q13}, [r0], r2 @II load r3 src
+ vabal.u8 q0, d22, d20 @I accumulate Abs diff R3
+
+ vld1.u8 {q12}, [r1], r3 @II load r2 pred
+ vabal.u8 q1, d23, d21 @I accumulate Abs diff R3
+
+ vld1.u8 {q11}, [r0], r2 @II load r3 src
+ vabal.u8 q0, d18, d16 @I accumulate Abs diff R4
+
+
+ sub r8, r8, #2 @Since we processe 16 pix @a time, dec by 2
+ vld1.u8 {q10}, [r1], r3 @II load r3 pred
+ vabal.u8 q1, d19, d17 @I accumulate Abs diff R4
+
+ cmp r8, #2 @Check if last loop
+ vld1.u8 {q9}, [r0], r2 @II load r4 src
+ vld1.u8 {q8}, [r1], r3 @II load r4 pred
+
+ bne sad_loop @Go back to SAD computation
+
+sad_epilouge:
+ vabal.u8 q0, d30, d28 @Accumulate Abs diff R1
+ vabal.u8 q1, d31, d29 @Accumulate Abs diff R1
+
+ vabal.u8 q0, d26, d24 @Accumulate Abs diff R2
+ vabal.u8 q1, d27, d25 @Accumulate Abs diff R2
+
+ vabal.u8 q0, d22, d20 @Accumulate Abs diff R3
+ vabal.u8 q1, d23, d21 @Aaccumulate Abs diff R3
+
+ vabal.u8 q0, d18, d16 @Accumulate Abs diff R4
+ vabal.u8 q1, d19, d17 @Accumulate Abs diff R4
+
+ vadd.u16 q2, q0, q1 @ADD two accumulators
+ vadd.u16 d6, d4, d5 @Add two blk sad
+ vpadd.u16 d8, d6, d7 @Add col 1-2 and 3-4 sad
+ vpadd.u16 d10, d8, d9 @Add col 12-34 sad
+
+ vmov.u16 r9, d10[0] @move SAD to ARM
+ ADD R10, R10, R9 @Add to the global sad
+
+funcend_sad_16x16: @End of fucntion process
+ ldr r5, [sp, #44]
+ ldr r6, [sp, #48]
+
+ str r7, [r6] @Store the is zero reg
+ str r10, [r5] @Store sad
+
+ @SUB SP,SP,#40
+ pop {r4-r12, pc}
+
+
diff --git a/encoder/arm/ime_platform_macros.h b/encoder/arm/ime_platform_macros.h
new file mode 100755
index 0000000..0f5b2f2
--- /dev/null
+++ b/encoder/arm/ime_platform_macros.h
@@ -0,0 +1,51 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ime_platform_macros.h
+*
+* @brief
+* Platform specific Macro definitions used in the codec
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef _IME_PLATFORM_MACROS_H_
+#define _IME_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Function macro definitions */
+/*****************************************************************************/
+
+#define USADA8(src,est,sad) \
+ sad += ABS(src[0]-est[0]) + \
+ ABS(src[1]-est[1]) + \
+ ABS(src[2]-est[2]) + \
+ ABS(src[3]-est[3])
+
+
+#endif /* _IH264_PLATFORM_MACROS_H_ */
diff --git a/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s b/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s
new file mode 100755
index 0000000..c442077
--- /dev/null
+++ b/encoder/armv8/ih264e_evaluate_intra16x16_modes_av8.s
@@ -0,0 +1,592 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+
+///**
+//******************************************************************************
+//*
+//* @brief :Evaluate best intra 16x16 mode (among VERT, HORZ and DC )
+//* and do the prediction.
+//*
+//* @par Description
+//* This function evaluates first three 16x16 modes and compute corresponding sad
+//* and return the buffer predicted with best mode.
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//** @param[in] pu1_ngbr_pels_i16
+//* UWORD8 pointer to neighbouring pels
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] u4_n_avblty
+//* availability of neighbouring pixels
+//*
+//* @param[in] u4_intra_mode
+//* Pointer to the variable in which best mode is returned
+//*
+//* @param[in] pu4_sadmin
+//* Pointer to the variable in which minimum sad is returned
+//*
+//* @param[in] u4_valid_intra_modes
+//* Says what all modes are valid
+//*
+//*
+//* @return none
+//*
+//******************************************************************************
+//*/
+//
+//void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src,
+// UWORD8 *pu1_ngbr_pels_i16,
+// UWORD8 *pu1_dst,
+// UWORD32 src_strd,
+// UWORD32 dst_strd,
+// WORD32 u4_n_avblty,
+// UWORD32 *u4_intra_mode,
+// WORD32 *pu4_sadmin,
+// UWORD32 u4_valid_intra_modes)
+//
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+.globl ih264e_evaluate_intra16x16_modes_av8
+
+ih264e_evaluate_intra16x16_modes_av8:
+
+//x0 = pu1_src,
+//x1 = pu1_ngbr_pels_i16,
+//x2 = pu1_dst,
+//x3 = src_strd,
+//x4 = dst_strd,
+//x5 = u4_n_avblty,
+//x6 = u4_intra_mode,
+//x7 = pu4_sadmin
+
+
+
+ // STMFD sp!, {x4-x12, x14} //store register values to stack
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ ldr x16, [sp, #80]
+ mov x17, x4
+ mov x18, x5
+ mov x14, x6
+ mov x15, x7
+
+
+ sub v0.16b, v0.16b, v0.16b
+ sub v1.16b, v1.16b, v1.16b
+ mov w10, #0
+ mov w11 , #3
+
+ ands x6, x5, #0x01
+ beq top_available //LEFT NOT AVAILABLE
+ ld1 {v0.16b}, [x1]
+ add w10, w10, #8
+ add w11, w11, #1
+top_available:
+ ands x6, x5, #0x04
+ beq none_available
+ add x6, x1, #17
+ ld1 {v1.16b}, [x6]
+ add w10, w10, #8
+ add w11, w11, #1
+ b summation
+none_available:
+ cmp x5, #0
+ bne summation
+ mov w6, #128
+ dup v30.16b, w6
+ dup v31.16b, w6
+ b sad_comp
+summation:
+ uaddl v2.8h, v0.8b, v1.8b
+ uaddl2 v3.8h, v0.16b, v1.16b
+ dup v10.8h, w10
+ neg w11, w11
+ dup v20.8h, w11
+ add v0.8h, v2.8h, v3.8h
+ mov v1.d[0], v0.d[1]
+ add v0.4h, v0.4h, v1.4h
+ addp v0.4h, v0.4h , v0.4h
+ addp v0.4h, v0.4h , v0.4h
+ add v0.4h, v0.4h, v10.4h
+ uqshl v0.8h, v0.8h, v20.8h
+ sqxtun v0.8b, v0.8h
+
+ dup v30.16b, v0.b[0]
+ dup v31.16b, v0.b[0]
+
+
+sad_comp:
+ ld1 { v0.2s, v1.2s }, [x0], x3 // source x0w 0
+
+ ld1 { v2.2s, v3.2s}, [x0], x3 //row 1
+
+ ld1 { v4.2s, v5.2s}, [x0], x3 //row 2
+
+ ld1 { v6.2s, v7.2s}, [x0], x3 //row 3
+
+ //---------------------
+
+ //values for vertical prediction
+ add x6, x1, #17
+ ld1 {v10.8b}, [x6], #8
+ ld1 {v11.8b}, [x6], #8
+ ld1 {v9.16b}, [x1]
+
+
+
+ dup v20.8b, v9.b[15] ///HORIZONTAL VALUE ROW=0//
+ dup v21.8b, v9.b[15] ///HORIZONTAL VALUE ROW=0//
+
+
+///* computing SADs for all three modes*/
+ ///vertical row 0@
+ uabdl v16.8h, v0.8b, v10.8b
+ uabdl v18.8h, v1.8b, v11.8b
+
+ ///HORZ row 0@
+ uabdl v26.8h, v0.8b, v20.8b
+ uabdl v28.8h, v1.8b, v21.8b
+
+ ///dc row 0@
+ uabdl v22.8h, v0.8b, v30.8b
+ uabdl v24.8h, v1.8b, v31.8b
+
+
+
+
+
+ dup v20.8b, v9.b[14] ///HORIZONTAL VALUE ROW=1//
+ dup v21.8b, v9.b[14]
+
+
+ ///vertical row 1@
+ uabal v16.8h, v2.8b, v10.8b
+ uabal v18.8h, v3.8b, v11.8b
+
+ ld1 { v0.2s, v1.2s }, [x0], x3 //row 4
+ ///HORZ row 1@
+ uabal v26.8h, v2.8b, v20.8b
+ uabal v28.8h, v3.8b, v21.8b
+
+ ///dc row 1@
+ uabal v22.8h, v2.8b, v30.8b
+ uabal v24.8h, v3.8b, v31.8b
+
+ dup v20.8b, v9.b[13] ///HORIZONTAL VALUE ROW=2//
+ dup v21.8b, v9.b[13]
+
+ ///vertical row 2@
+ uabal v16.8h, v4.8b, v10.8b
+ uabal v18.8h, v5.8b, v11.8b
+
+ ld1 { v2.2s, v3.2s}, [x0], x3 //row 5
+ ///HORZ row 2@
+ uabal v26.8h, v4.8b, v20.8b
+ uabal v28.8h, v5.8b, v21.8b
+
+ ///dc row 2@
+ uabal v22.8h, v4.8b, v30.8b
+ uabal v24.8h, v5.8b, v31.8b
+
+ dup v20.8b, v9.b[12] ///HORIZONTAL VALUE ROW=3//
+ dup v21.8b, v9.b[12]
+
+ ///vertical row 3@
+ uabal v16.8h, v6.8b, v10.8b
+ uabal v18.8h, v7.8b, v11.8b
+
+ ld1 { v4.2s, v5.2s}, [x0], x3 //row 6
+ ///HORZ row 3@
+ uabal v26.8h, v6.8b, v20.8b
+ uabal v28.8h, v7.8b, v21.8b
+
+ ///dc row 3@
+ uabal v22.8h, v6.8b, v30.8b
+ uabal v24.8h, v7.8b, v31.8b
+//----------------------------------------------------------------------------------------------
+
+ dup v20.8b, v9.b[11] ///HORIZONTAL VALUE ROW=0//
+ dup v21.8b, v9.b[11]
+
+ ///vertical row 0@
+ uabal v16.8h, v0.8b, v10.8b
+ uabal v18.8h, v1.8b, v11.8b
+
+ ld1 { v6.2s, v7.2s}, [x0], x3 //row 7
+ ///HORZ row 0@
+ uabal v26.8h, v0.8b, v20.8b
+ uabal v28.8h, v1.8b, v21.8b
+
+ ///dc row 0@
+ uabal v22.8h, v0.8b, v30.8b
+ uabal v24.8h, v1.8b, v31.8b
+
+ dup v20.8b, v9.b[10] ///HORIZONTAL VALUE ROW=1//
+ dup v21.8b, v9.b[10]
+
+ ///vertical row 1@
+ uabal v16.8h, v2.8b, v10.8b
+ uabal v18.8h, v3.8b, v11.8b
+
+ ld1 { v0.2s, v1.2s }, [x0], x3 //row 8
+ ///HORZ row 1@
+ uabal v26.8h, v2.8b, v20.8b
+ uabal v28.8h, v3.8b, v21.8b
+
+ ///dc row 1@
+ uabal v22.8h, v2.8b, v30.8b
+ uabal v24.8h, v3.8b, v31.8b
+
+ dup v20.8b, v9.b[9] ///HORIZONTAL VALUE ROW=2//
+ dup v21.8b, v9.b[9]
+
+ ///vertical row 2@
+ uabal v16.8h, v4.8b, v10.8b
+ uabal v18.8h, v5.8b, v11.8b
+
+ ld1 { v2.2s, v3.2s}, [x0], x3 //row 9
+
+ ///HORZ row 2@
+ uabal v26.8h, v4.8b, v20.8b
+ uabal v28.8h, v5.8b, v21.8b
+
+ ///dc row 2@
+ uabal v22.8h, v4.8b, v30.8b
+ uabal v24.8h, v5.8b, v31.8b
+
+ dup v20.8b, v9.b[8] ///HORIZONTAL VALUE ROW=3//
+ dup v21.8b, v9.b[8]
+
+ ///vertical row 3@
+ uabal v16.8h, v6.8b, v10.8b
+ uabal v18.8h, v7.8b, v11.8b
+
+ ld1 { v4.2s, v5.2s}, [x0], x3 //row 10
+
+ ///HORZ row 3@
+ uabal v26.8h, v6.8b, v20.8b
+ uabal v28.8h, v7.8b, v21.8b
+
+ ///dc row 3@
+ uabal v22.8h, v6.8b, v30.8b
+ uabal v24.8h, v7.8b, v31.8b
+
+
+//-------------------------------------------
+
+ dup v20.8b, v9.b[7] ///HORIZONTAL VALUE ROW=0//
+ dup v21.8b, v9.b[7]
+
+ ///vertical row 0@
+ uabal v16.8h, v0.8b, v10.8b
+ uabal v18.8h, v1.8b, v11.8b
+
+ ld1 { v6.2s, v7.2s}, [x0], x3 //row11
+
+ ///HORZ row 0@
+ uabal v26.8h, v0.8b, v20.8b
+ uabal v28.8h, v1.8b, v21.8b
+
+ ///dc row 0@
+ uabal v22.8h, v0.8b, v30.8b
+ uabal v24.8h, v1.8b, v31.8b
+
+ dup v20.8b, v9.b[6] ///HORIZONTAL VALUE ROW=1//
+ dup v21.8b, v9.b[6]
+
+ ///vertical row 1@
+ uabal v16.8h, v2.8b, v10.8b
+ uabal v18.8h, v3.8b, v11.8b
+
+ ld1 { v0.2s, v1.2s }, [x0], x3 //row12
+
+ ///HORZ row 1@
+ uabal v26.8h, v2.8b, v20.8b
+ uabal v28.8h, v3.8b, v21.8b
+
+ ///dc row 1@
+ uabal v22.8h, v2.8b, v30.8b
+ uabal v24.8h, v3.8b, v31.8b
+
+ dup v20.8b, v9.b[5] ///HORIZONTAL VALUE ROW=2//
+ dup v21.8b, v9.b[5]
+
+ ///vertical row 2@
+ uabal v16.8h, v4.8b, v10.8b
+ uabal v18.8h, v5.8b, v11.8b
+
+ ld1 { v2.2s, v3.2s}, [x0], x3 //row13
+
+ ///HORZ row 2@
+ uabal v26.8h, v4.8b, v20.8b
+ uabal v28.8h, v5.8b, v21.8b
+
+ ///dc row 2@
+ uabal v22.8h, v4.8b, v30.8b
+ uabal v24.8h, v5.8b, v31.8b
+
+ dup v20.8b, v9.b[4] ///HORIZONTAL VALUE ROW=3//
+ dup v21.8b, v9.b[4]
+
+ ///vertical row 3@
+ uabal v16.8h, v6.8b, v10.8b
+ uabal v18.8h, v7.8b, v11.8b
+
+ ld1 { v4.2s, v5.2s}, [x0], x3 //row14
+
+ ///HORZ row 3@
+ uabal v26.8h, v6.8b, v20.8b
+ uabal v28.8h, v7.8b, v21.8b
+
+ ///dc row 3@
+ uabal v22.8h, v6.8b, v30.8b
+ uabal v24.8h, v7.8b, v31.8b
+ //-----------------------------------------------------------------
+
+ dup v20.8b, v9.b[3] ///HORIZONTAL VALUE ROW=0//
+ dup v21.8b, v9.b[3]
+
+ ///vertical row 0@
+ uabal v16.8h, v0.8b, v10.8b
+ uabal v18.8h, v1.8b, v11.8b
+
+ ld1 { v6.2s, v7.2s}, [x0], x3 //row15
+
+ ///HORZ row 0@
+ uabal v26.8h, v0.8b, v20.8b
+ uabal v28.8h, v1.8b, v21.8b
+
+ ///dc row 0@
+ uabal v22.8h, v0.8b, v30.8b
+ uabal v24.8h, v1.8b, v31.8b
+
+ dup v20.8b, v9.b[2] ///HORIZONTAL VALUE ROW=1//
+ dup v21.8b, v9.b[2]
+
+ ///vertical row 1@
+ uabal v16.8h, v2.8b, v10.8b
+ uabal v18.8h, v3.8b, v11.8b
+
+ ///HORZ row 1@
+ uabal v26.8h, v2.8b, v20.8b
+ uabal v28.8h, v3.8b, v21.8b
+
+ ///dc row 1@
+ uabal v22.8h, v2.8b, v30.8b
+ uabal v24.8h, v3.8b, v31.8b
+
+ dup v20.8b, v9.b[1] ///HORIZONTAL VALUE ROW=2//
+ dup v21.8b, v9.b[1]
+
+ ///vertical row 2@
+ uabal v16.8h, v4.8b, v10.8b
+ uabal v18.8h, v5.8b, v11.8b
+
+ ///HORZ row 2@
+ uabal v26.8h, v4.8b, v20.8b
+ uabal v28.8h, v5.8b, v21.8b
+
+ ///dc row 2@
+ uabal v22.8h, v4.8b, v30.8b
+ uabal v24.8h, v5.8b, v31.8b
+
+ dup v20.8b, v9.b[0] ///HORIZONTAL VALUE ROW=3//
+ dup v21.8b, v9.b[0]
+
+ ///vertical row 3@
+ uabal v16.8h, v6.8b, v10.8b
+ uabal v18.8h, v7.8b, v11.8b
+
+ ///HORZ row 3@
+ uabal v26.8h, v6.8b, v20.8b
+ uabal v28.8h, v7.8b, v21.8b
+
+ ///dc row 3@
+ uabal v22.8h, v6.8b, v30.8b
+ uabal v24.8h, v7.8b, v31.8b
+ //------------------------------------------------------------------------------
+
+
+ //vert sum
+
+ add v16.8h, v16.8h , v18.8h
+ mov v18.d[0], v16.d[1]
+ add v16.4h, v16.4h , v18.4h
+ uaddlp v16.2s, v16.4h
+ addp v16.2s, v16.2s, v16.2s
+ smov x8, v16.s[0] //dc
+
+
+ //horz sum
+
+ add v26.8h, v26.8h , v28.8h
+ mov v28.d[0], v26.d[1]
+ add v26.4h, v26.4h , v28.4h
+ uaddlp v26.2s, v26.4h
+ addp v26.2s, v26.2s, v26.2s
+ smov x9, v26.s[0]
+
+ //dc sum
+
+ add v24.8h, v22.8h , v24.8h ///DC
+ mov v25.d[0], v24.d[1]
+ add v24.4h, v24.4h , v25.4h ///DC
+ uaddlp v24.2s, v24.4h ///DC
+ addp v24.2s, v24.2s, v24.2s ///DC
+ smov x10, v24.s[0] //dc
+
+
+ //-----------------------
+ mov x11, #1
+ lsl x11, x11, #30
+
+ mov x0, x16
+ //--------------------------------------------
+ ands x7, x0, #01 // vert mode valid????????????
+ csel x8, x11, x8, eq
+
+
+ ands x6, x0, #02 // horz mode valid????????????
+ csel x9, x11, x9, eq
+
+ ands x6, x0, #04 // dc mode valid????????????
+ csel x10, x11, x10, eq
+
+
+
+
+//--------------------------------
+
+ mov x4, x17
+ mov x7, x15
+ mov x6, x14
+
+ //---------------------------
+
+ //--------------------------
+
+ cmp x8, x9
+ bgt not_vert
+ cmp x8, x10
+ bgt do_dc
+
+ ///----------------------
+ //DO VERTICAL PREDICTION
+ str x8 , [x7] //MIN SAD
+ mov x8, #0
+ str x8 , [x6] // MODE
+ add x6, x1, #17
+ ld1 {v30.16b}, [x6]
+ b do_dc_vert
+ //-----------------------------
+not_vert: cmp x9, x10
+ bgt do_dc
+
+ ///----------------------
+ //DO HORIZONTAL
+ str x9 , [x7] //MIN SAD
+ mov x9, #1
+ str x9 , [x6] // MODE
+
+ ld1 {v0.16b}, [x1]
+ dup v10.16b, v0.b[15]
+ dup v11.16b, v0.b[14]
+ dup v12.16b, v0.b[13]
+ dup v13.16b, v0.b[12]
+ st1 {v10.16b}, [x2], x4
+ dup v14.16b, v0.b[11]
+ st1 {v11.16b}, [x2], x4
+ dup v15.16b, v0.b[10]
+ st1 {v12.16b}, [x2], x4
+ dup v16.16b, v0.b[9]
+ st1 {v13.16b}, [x2], x4
+ dup v17.16b, v0.b[8]
+ st1 {v14.16b}, [x2], x4
+ dup v18.16b, v0.b[7]
+ st1 {v15.16b}, [x2], x4
+ dup v19.16b, v0.b[6]
+ st1 {v16.16b}, [x2], x4
+ dup v20.16b, v0.b[5]
+ st1 {v17.16b}, [x2], x4
+ dup v21.16b, v0.b[4]
+ st1 {v18.16b}, [x2], x4
+ dup v22.16b, v0.b[3]
+ st1 {v19.16b}, [x2], x4
+ dup v23.16b, v0.b[2]
+ st1 {v20.16b}, [x2], x4
+ dup v24.16b, v0.b[1]
+ st1 {v21.16b}, [x2], x4
+ dup v25.16b, v0.b[0]
+ st1 {v22.16b}, [x2], x4
+ st1 {v23.16b}, [x2], x4
+ st1 {v24.16b}, [x2], x4
+ st1 {v25.16b}, [x2], x4
+
+
+
+ b end_func
+
+
+ ///-----------------------------
+
+do_dc: ///---------------------------------
+ //DO DC
+ str x10 , [x7] //MIN SAD
+ mov x10, #2
+ str x10 , [x6] // MODE
+do_dc_vert:
+ st1 {v30.4s}, [x2], x4 //0
+ st1 {v30.4s}, [x2], x4 //1
+ st1 {v30.4s}, [x2], x4 //2
+ st1 {v30.4s}, [x2], x4 //3
+ st1 {v30.4s}, [x2], x4 //4
+ st1 {v30.4s}, [x2], x4 //5
+ st1 {v30.4s}, [x2], x4 //6
+ st1 {v30.4s}, [x2], x4 //7
+ st1 {v30.4s}, [x2], x4 //8
+ st1 {v30.4s}, [x2], x4 //9
+ st1 {v30.4s}, [x2], x4 //10
+ st1 {v30.4s}, [x2], x4 //11
+ st1 {v30.4s}, [x2], x4 //12
+ st1 {v30.4s}, [x2], x4 //13
+ st1 {v30.4s}, [x2], x4 //14
+ st1 {v30.4s}, [x2], x4 //15
+ ///------------------
+end_func:
+ // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
diff --git a/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s b/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s
new file mode 100755
index 0000000..b02afd1
--- /dev/null
+++ b/encoder/armv8/ih264e_evaluate_intra_chroma_modes_av8.s
@@ -0,0 +1,467 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+
+///**
+//******************************************************************************
+//*
+//* @brief :Evaluate best intr chroma mode (among VERT, HORZ and DC )
+//* and do the prediction.
+//*
+//* @par Description
+//* This function evaluates first three intra chroma modes and compute corresponding sad
+//* and return the buffer predicted with best mode.
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//** @param[in] pu1_ngbr_pels
+//* UWORD8 pointer to neighbouring pels
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] u4_n_avblty
+//* availability of neighbouring pixels
+//*
+//* @param[in] u4_intra_mode
+//* Pointer to the variable in which best mode is returned
+//*
+//* @param[in] pu4_sadmin
+//* Pointer to the variable in which minimum sad is returned
+//*
+//* @param[in] u4_valid_intra_modes
+//* Says what all modes are valid
+//*
+//*
+//* @return none
+//*
+//******************************************************************************
+//*/
+//
+//void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src,
+// UWORD8 *pu1_ngbr_pels_i16,
+// UWORD8 *pu1_dst,
+// UWORD32 src_strd,
+// UWORD32 dst_strd,
+// WORD32 u4_n_avblty,
+// UWORD32 *u4_intra_mode,
+// WORD32 *pu4_sadmin,
+// UWORD32 u4_valid_intra_modes)
+//
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+.global ih264e_evaluate_intra_chroma_modes_av8
+
+ih264e_evaluate_intra_chroma_modes_av8:
+
+//x0 = pu1_src,
+//x1 = pu1_ngbr_pels_i16,
+//x2 = pu1_dst,
+//x3 = src_strd,
+//x4 = dst_strd,
+//x5 = u4_n_avblty,
+//x6 = u4_intra_mode,
+//x7 = pu4_sadmin
+
+
+
+ // STMFD sp!, {x4-x12, x14} //store register values to stack
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+ //-----------------------
+ ldr x16, [sp, #80]
+ mov x17, x4
+ mov x18, x5
+ mov x14, x6
+ mov x15, x7
+
+ mov x19, #5
+ ands x6, x5, x19
+ beq none_available
+ cmp x6, #1
+ beq left_only_available
+ cmp x6, #4
+ beq top_only_available
+
+all_available:
+ ld1 {v0.8b, v1.8b}, [x1]
+ add x6, x1, #18
+ ld1 {v2.8b, v3.8b}, [x6]
+ uxtl v0.8h, v0.8b
+ uxtl v1.8h, v1.8b
+ addp v0.4s, v0.4s , v0.4s
+ addp v1.4s, v1.4s , v1.4s
+ addp v0.4s, v0.4s , v0.4s
+ addp v1.4s, v1.4s , v1.4s
+ uxtl v2.8h, v2.8b
+ uxtl v3.8h, v3.8b
+ addp v2.4s, v2.4s , v2.4s
+ addp v3.4s, v3.4s , v3.4s
+ addp v2.4s, v2.4s , v2.4s
+ addp v3.4s, v3.4s , v3.4s
+ rshrn v5.8b, v0.8h, #2
+ dup v21.8h, v5.h[0]
+ rshrn v6.8b, v3.8h, #2
+ dup v20.8h, v6.h[0]
+ add v1.8h, v1.8h, v2.8h
+ rshrn v1.8b, v1.8h, #3
+ dup v23.8h, v1.h[0]
+ mov v20.d[0], v23.d[0]
+ add v0.8h, v0.8h, v3.8h
+ rshrn v0.8b, v0.8h, #3
+ dup v23.8h, v0.h[0]
+ mov v31.d[0], v23.d[0]
+ mov v28.d[0], v20.d[0]
+ mov v29.d[0], v20.d[1]
+ mov v30.d[0], v21.d[0]
+ b sad_comp
+
+left_only_available:
+ ld1 {v0.8b, v1.8b}, [x1]
+ uxtl v0.8h, v0.8b
+ uxtl v1.8h, v1.8b
+ addp v0.4s, v0.4s , v0.4s
+ addp v1.4s, v1.4s , v1.4s
+ addp v0.4s, v0.4s , v0.4s
+ addp v1.4s, v1.4s , v1.4s
+ rshrn v0.8b, v0.8h, #2
+ rshrn v1.8b, v1.8h, #2
+
+ dup v28.8h , v1.h[0]
+ dup v29.8h , v1.h[0]
+ dup v30.8h, v0.h[0]
+ dup v31.8h, v0.h[0]
+ b sad_comp
+
+top_only_available:
+ add x6, x1, #18
+ ld1 {v0.8b, v1.8b}, [x6]
+ uxtl v0.8h, v0.8b
+ uxtl v1.8h, v1.8b
+ addp v0.4s, v0.4s , v0.4s
+ addp v1.4s, v1.4s , v1.4s
+ addp v0.4s, v0.4s , v0.4s
+ addp v1.4s, v1.4s , v1.4s
+ rshrn v0.8b, v0.8h, #2
+ rshrn v1.8b, v1.8h, #2
+ dup v28.8h , v0.h[0]
+ dup v30.8h, v1.h[0]
+ mov v29.d[0], v30.d[1]
+ mov v30.d[0], v28.d[0]
+ mov v31.d[0], v30.d[1]
+ b sad_comp
+none_available:
+ mov w20, #128
+ dup v28.16b, w20
+ dup v29.16b, w20
+ dup v30.16b, w20
+ dup v31.16b, w20
+
+
+
+sad_comp:
+ add x6, x1, #18
+ ld1 {v10.8b, v11.8b}, [x6] // vertical values
+
+ ld1 {v27.8h}, [x1]
+
+ dup v20.8h, v27.h[7] ///HORIZONTAL VALUE ROW=0//
+ dup v21.8h, v27.h[7]
+
+ ld1 { v0.8b, v1.8b}, [x0], x3
+
+
+ ///vertical row 0@
+ uabdl v16.8h, v0.8b, v10.8b
+ uabdl v18.8h, v1.8b, v11.8b
+
+ ///HORZ row 0@
+ uabdl v26.8h, v0.8b, v20.8b
+ uabdl v14.8h, v1.8b, v21.8b
+
+ ld1 {v2.8b, v3.8b}, [x0], x3
+
+
+
+ ///dc row 0@
+ uabdl v22.8h, v0.8b, v28.8b
+ uabdl v24.8h, v1.8b, v29.8b
+
+
+ dup v20.8h, v27.h[6]
+ dup v21.8h, v27.h[6] ///HORIZONTAL VALUE ROW=1//
+
+ ///vertical row 1@
+ uabal v16.8h, v2.8b, v10.8b
+ uabal v18.8h, v3.8b, v11.8b
+
+ ld1 { v4.8b, v5.8b}, [x0], x3
+
+ ///HORZ row 1@
+ uabal v26.8h, v2.8b, v20.8b
+ uabal v14.8h, v3.8b, v21.8b
+
+ ///dc row 1@
+ uabal v22.8h, v2.8b, v28.8b
+ uabal v24.8h, v3.8b, v29.8b
+
+ dup v20.8h, v27.h[5]
+ dup v21.8h, v27.h[5] ///HORIZONTAL VALUE ROW=2//
+
+ ///vertical row 2@
+ uabal v16.8h, v4.8b, v10.8b
+ uabal v18.8h, v5.8b, v11.8b
+
+ ld1 { v6.8b, v7.8b}, [x0], x3
+ ///HORZ row 2@
+ uabal v26.8h, v4.8b, v20.8b
+ uabal v14.8h, v5.8b, v21.8b
+
+ ///dc row 2@
+ uabal v22.8h, v4.8b, v28.8b
+ uabal v24.8h, v5.8b, v29.8b
+
+ dup v20.8h, v27.h[4]
+ dup v21.8h, v27.h[4] ///HORIZONTAL VALUE ROW=3//
+
+ ///vertical row 3@
+ uabal v16.8h, v6.8b, v10.8b
+ uabal v18.8h, v7.8b, v11.8b
+
+ ///HORZ row 3@
+ uabal v26.8h, v6.8b, v20.8b
+ uabal v14.8h, v7.8b, v21.8b
+
+ ///dc row 3@
+ uabal v22.8h, v6.8b, v28.8b
+ uabal v24.8h, v7.8b, v29.8b
+
+ //----------------------------------------------------------------------------------------------
+ ld1 { v0.8b, v1.8b}, [x0], x3
+
+
+ dup v20.8h, v27.h[3]
+ dup v21.8h, v27.h[3] ///HORIZONTAL VALUE ROW=0//
+
+ ///vertical row 0@
+ uabal v16.8h, v0.8b, v10.8b
+ uabal v18.8h, v1.8b, v11.8b
+
+ ///HORZ row 0@
+ uabal v26.8h, v0.8b, v20.8b
+ uabal v14.8h, v1.8b, v21.8b
+
+ ld1 { v2.8b, v3.8b}, [x0], x3
+
+ ///dc row 0@
+ uabal v22.8h, v0.8b, v30.8b
+ uabal v24.8h, v1.8b, v31.8b
+
+ dup v20.8h, v27.h[2]
+ dup v21.8h, v27.h[2] ///HORIZONTAL VALUE ROW=1//
+
+ ///vertical row 1@
+ uabal v16.8h, v2.8b, v10.8b
+ uabal v18.8h, v3.8b, v11.8b
+
+ ///HORZ row 1@
+ uabal v26.8h, v2.8b, v20.8b
+ uabal v14.8h, v3.8b, v21.8b
+
+ ld1 { v4.8b, v5.8b}, [x0], x3
+
+ ///dc row 1@
+ uabal v22.8h, v2.8b, v30.8b
+ uabal v24.8h, v3.8b, v31.8b
+
+ dup v20.8h, v27.h[1]
+ dup v21.8h, v27.h[1] ///HORIZONTAL VALUE ROW=2//
+
+ ///vertical row 2@
+ uabal v16.8h, v4.8b, v10.8b
+ uabal v18.8h, v5.8b, v11.8b
+
+ ///HORZ row 2@
+ uabal v26.8h, v4.8b, v20.8b
+ uabal v14.8h, v5.8b, v21.8b
+
+ ld1 {v6.8b, v7.8b}, [x0], x3
+
+ ///dc row 2@
+ uabal v22.8h, v4.8b, v30.8b
+ uabal v24.8h, v5.8b, v31.8b
+
+ dup v20.8h, v27.h[0]
+ dup v21.8h, v27.h[0] ///HORIZONTAL VALUE ROW=3//
+
+ ///vertical row 3@
+ uabal v16.8h, v6.8b, v10.8b
+ uabal v18.8h, v7.8b, v11.8b
+
+ ///HORZ row 3@
+ uabal v26.8h, v6.8b, v20.8b
+ uabal v14.8h, v7.8b, v21.8b
+
+ ///dc row 3@
+ uabal v22.8h, v6.8b, v30.8b
+ uabal v24.8h, v7.8b, v31.8b
+
+
+//-------------------------------------------
+
+
+//vert sum
+
+ add v16.8h, v16.8h , v18.8h
+ mov v18.d[0], v16.d[1]
+ add v16.4h, v16.4h , v18.4h
+ uaddlp v16.2s, v16.4h
+ addp v16.2s, v16.2s, v16.2s
+ smov x8, v16.s[0]
+
+
+ //horz sum
+
+ add v26.8h, v26.8h , v14.8h
+ mov v14.d[0], v26.d[1]
+ add v26.4h, v26.4h , v14.4h
+ uaddlp v26.2s, v26.4h
+ addp v26.2s, v26.2s, v26.2s
+ smov x9, v26.s[0]
+
+ //dc sum
+
+ add v24.8h, v22.8h , v24.8h ///DC
+ mov v25.d[0], v24.d[1]
+ add v24.4h, v24.4h , v25.4h ///DC
+ uaddlp v24.2s, v24.4h ///DC
+ addp v24.2s, v24.2s, v24.2s ///DC
+ smov x10, v24.s[0] //dc
+
+
+
+
+ mov x11, #1
+//-----------------------
+ mov x0, x16 // u4_valid_intra_modes
+
+//--------------------------------------------
+
+
+ lsl x11, x11, #30
+
+ ands x7, x0, #04 // vert mode valid????????????
+ csel x8, x11, x8, eq
+
+ ands x6, x0, #02 // horz mode valid????????????
+ csel x9, x11, x9, eq
+
+ ands x6, x0, #01 // dc mode valid????????????
+ csel x10, x11, x10, eq
+
+
+ //---------------------------
+
+ mov x4, x17
+ mov x6, x14
+ mov x7, x15
+
+ //--------------------------
+
+ cmp x10, x9
+ bgt not_dc
+ cmp x10, x8
+ bgt do_vert
+
+ ///----------------------
+ //DO DC PREDICTION
+ str x10 , [x7] //MIN SAD
+
+ mov x10, #0
+ str x10 , [x6] // MODE
+
+ b do_dc_vert
+ //-----------------------------
+
+not_dc:
+ cmp x9, x8
+ bgt do_vert
+ ///----------------------
+ //DO HORIZONTAL
+ str x9 , [x7] //MIN SAD
+
+ mov x10, #1
+ str x10 , [x6] // MODE
+ ld1 {v0.8h}, [x1]
+
+ dup v10.8h, v0.h[7]
+ dup v11.8h, v0.h[6]
+ dup v12.8h, v0.h[5]
+ dup v13.8h, v0.h[4]
+ st1 {v10.8h}, [x2], x4
+ dup v14.8h, v0.h[3]
+ st1 {v11.8h}, [x2], x4
+ dup v15.8h, v0.h[2]
+ st1 {v12.8h}, [x2], x4
+ dup v16.8h, v0.h[1]
+ st1 {v13.8h}, [x2], x4
+ dup v17.8h, v0.h[0]
+ st1 {v14.8h}, [x2], x4
+ st1 {v15.8h}, [x2], x4
+ st1 {v16.8h}, [x2], x4
+ st1 {v17.8h}, [x2], x4
+
+ b end_func
+
+do_vert:
+ //DO VERTICAL PREDICTION
+ str x8 , [x7] //MIN SAD
+ mov x8, #2
+ str x8 , [x6] // MODE
+ add x6, x1, #18
+ ld1 {v28.8b, v29.8b}, [x6] // vertical values
+ ld1 {v30.8b, v31.8b}, [x6] // vertical values
+
+do_dc_vert:
+ st1 {v28.2s, v29.2s} , [x2], x4 //0
+ st1 {v28.2s, v29.2s} , [x2], x4 //1
+ st1 {v28.2s, v29.2s} , [x2], x4 //2
+ st1 {v28.2s, v29.2s} , [x2], x4 //3
+ st1 {v30.2s, v31.2s} , [x2], x4 //4
+ st1 {v30.2s, v31.2s} , [x2], x4 //5
+ st1 {v30.2s, v31.2s} , [x2], x4 //6
+ st1 {v30.2s, v31.2s} , [x2], x4 //7
+
+end_func:
+ // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
diff --git a/encoder/armv8/ih264e_half_pel_av8.s b/encoder/armv8/ih264e_half_pel_av8.s
new file mode 100755
index 0000000..6dbd8f8
--- /dev/null
+++ b/encoder/armv8/ih264e_half_pel_av8.s
@@ -0,0 +1,1024 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+// *******************************************************************************
+// * @file
+// * ih264e_half_pel.s
+// *
+// * @brief
+// *
+// *
+// * @author
+// * Ittiam
+// *
+// * @par List of Functions:
+// * ih264e_sixtapfilter_horz
+// * ih264e_sixtap_filter_2dvh_vert
+//
+// *
+// * @remarks
+// * None
+// *
+// *******************************************************************************
+// */
+
+
+.text
+.p2align 2
+.include "ih264_neon_macros.s"
+
+// /**
+///*******************************************************************************
+//*
+//* @brief
+//* Interprediction luma filter for horizontal input(Filter run for width = 17 and height =16)
+//*
+//* @par Description:
+//* Applies a 6 tap horizontal filter .The output is clipped to 8 bits
+//* sec 8.4.2.2.1 titled "Luma sample interpolation process"
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+//void ih264e_sixtapfilter_horz(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst,
+// WORD32 src_strd,
+// WORD32 dst_strd);
+
+
+.equ halfpel_width , 17 + 1 //( make it even, two rows are processed at a time)
+
+
+ .global ih264e_sixtapfilter_horz_av8
+ih264e_sixtapfilter_horz_av8:
+ // STMFD sp!,{x14}
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+ movi v0.8b, #5
+ sub x0, x0, #2
+ sub x3, x3, #16
+ movi v1.8b, #20
+ mov x14, #16
+
+filter_horz_loop:
+
+
+ ld1 {v2.8b, v3.8b, v4.8b}, [x0], x2 //// Load row0
+ ld1 {v5.8b, v6.8b, v7.8b}, [x0], x2 //// Load row1
+
+ //// Processing row0 and row1
+
+ ext v31.8b, v2.8b , v3.8b , #5
+ ext v30.8b, v3.8b , v4.8b , #5
+
+ uaddl v8.8h, v31.8b, v2.8b //// a0 + a5 (column1,row0)
+ ext v29.8b, v4.8b , v4.8b , #5
+ uaddl v10.8h, v30.8b, v3.8b //// a0 + a5 (column2,row0)
+ ext v28.8b, v5.8b , v6.8b , #5
+ uaddl v12.8h, v29.8b, v4.8b //// a0 + a5 (column3,row0)
+ ext v27.8b, v6.8b , v7.8b , #5
+ uaddl v14.8h, v28.8b, v5.8b //// a0 + a5 (column1,row1)
+ ext v26.8b, v7.8b , v7.8b , #5
+
+ uaddl v16.8h, v27.8b, v6.8b //// a0 + a5 (column2,row1)
+ ext v31.8b, v2.8b , v3.8b , #2
+ uaddl v18.8h, v26.8b, v7.8b //// a0 + a5 (column3,row1)
+ ext v30.8b, v3.8b , v4.8b , #2
+ umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0)
+ ext v29.8b, v4.8b , v4.8b , #2
+ umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0)
+ ext v28.8b, v5.8b , v6.8b , #2
+ umlal v12.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0)
+ ext v27.8b, v6.8b , v7.8b , #2
+ umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 (column1,row1)
+ ext v26.8b, v7.8b , v7.8b , #2
+
+ umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 (column2,row1)
+ ext v31.8b, v2.8b , v3.8b , #3
+ umlal v18.8h, v26.8b, v1.8b //// a0 + a5 + 20a2 (column3,row1)
+ ext v30.8b, v3.8b , v4.8b , #3
+ umlal v8.8h, v31.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ ext v29.8b, v4.8b , v4.8b , #3
+ umlal v10.8h, v30.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ ext v28.8b, v5.8b , v6.8b , #3
+ umlal v12.8h, v29.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ ext v27.8b, v6.8b , v7.8b , #3
+ umlal v14.8h, v28.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row1)
+ ext v26.8b, v7.8b , v7.8b , #3
+
+ umlal v16.8h, v27.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row1)
+ ext v31.8b, v2.8b , v3.8b , #1
+ umlal v18.8h, v26.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row1)
+ ext v30.8b, v3.8b , v4.8b , #1
+ umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ ext v29.8b, v4.8b , v4.8b , #1
+ umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ ext v28.8b, v5.8b , v6.8b , #1
+ umlsl v12.8h, v29.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ ext v27.8b, v6.8b , v7.8b , #1
+ umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row1)
+ ext v26.8b, v7.8b , v7.8b , #1
+
+ umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row1)
+ ext v31.8b, v2.8b , v3.8b , #4
+ umlsl v18.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row1)
+ ext v30.8b, v3.8b , v4.8b , #4
+ umlsl v8.8h, v31.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ ext v29.8b, v4.8b , v4.8b , #4
+ umlsl v10.8h, v30.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ ext v28.8b, v5.8b , v6.8b , #4
+ umlsl v12.8h, v29.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+ ext v27.8b, v6.8b , v7.8b , #4
+ umlsl v14.8h, v28.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row1)
+ ext v26.8b, v7.8b , v7.8b , #4
+
+ umlsl v16.8h, v27.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row1)
+ umlsl v18.8h, v26.8b, v0.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row1)
+
+ sqrshrun v20.8b, v8.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ sqrshrun v21.8b, v10.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ sqrshrun v22.8b, v12.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ sqrshrun v23.8b, v14.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row1)
+ sqrshrun v24.8b, v16.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row1)
+ sqrshrun v25.8b, v18.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row1)
+
+ st1 {v20.8b, v21.8b}, [x1], #16 ////Store dest row0
+ st1 {v22.h}[0], [x1], x3
+ st1 {v23.8b, v24.8b}, [x1], #16 ////Store dest row1
+ st1 {v25.h}[0], [x1], x3
+
+ subs x14, x14, #2 // decrement counter
+
+ bne filter_horz_loop
+
+
+ // LDMFD sp!,{pc}
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+
+
+
+
+
+
+
+///**
+//*******************************************************************************
+//*
+//* @brief
+//* This function implements a two stage cascaded six tap filter. It
+//* applies the six tap filter in the vertical direction on the
+//* predictor values, followed by applying the same filter in the
+//* horizontal direction on the output of the first stage. The six tap
+//* filtering operation is described in sec 8.4.2.2.1 titled "Luma sample
+//* interpolation process"
+//* (Filter run for width = 17 and height =17)
+//* @par Description:
+//* The function interpolates
+//* the predictors first in the vertical direction and then in the
+//* horizontal direction to output the (1/2,1/2). The output of the first
+//* stage of the filter is stored in the buffer pointed to by pi16_pred1(only in C)
+//* in 16 bit precision.
+//*
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst1
+//* UWORD8 pointer to the destination(vertical filtered output)
+//*
+//* @param[out] pu1_dst2
+//* UWORD8 pointer to the destination(out put after applying horizontal filter to the intermediate vertical output)
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride of pu1_dst
+//*
+//* @param[in]pi16_pred1
+//* Pointer to 16bit intermediate buffer(used only in c)
+//*
+//* @param[in] pi16_pred1_strd
+//* integer destination stride of pi16_pred1
+//*
+//*
+//* @returns
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//*/
+//void ih264e_sixtap_filter_2dvh_vert(UWORD8 *pu1_src,
+// UWORD8 *pu1_dst1,
+// UWORD8 *pu1_dst2,
+// WORD32 src_strd,
+// WORD32 dst_strd,
+// WORD32 *pi16_pred1,/* Pointer to 16bit intermmediate buffer (used only in c)*/
+// WORD32 pi16_pred1_strd)
+
+
+
+
+ .global ih264e_sixtap_filter_2dvh_vert_av8
+
+ih264e_sixtap_filter_2dvh_vert_av8:
+ // STMFD sp!,{x10,x11,x12,x14}
+ push_v_regs
+ stp x19, x20, [sp, #-16]!
+
+////x0 - pu1_ref
+////x3 - u4_ref_width
+
+ //// Load six rows for vertical interpolation
+ lsl x12, x3, #1
+ sub x0, x0, x12
+ sub x0, x0, #2
+ ld1 {v2.8b, v3.8b, v4.8b}, [x0], x3
+ ld1 {v5.8b, v6.8b, v7.8b}, [x0], x3
+ ld1 {v8.8b, v9.8b, v10.8b}, [x0], x3
+ mov x12, #5
+ ld1 {v11.8b, v12.8b, v13.8b}, [x0], x3
+ mov x14, #20
+ ld1 {v14.8b, v15.8b, v16.8b}, [x0], x3
+ mov v0.4h[0], w12
+ mov v0.4h[1], w14
+ ld1 {v17.8b, v18.8b, v19.8b}, [x0], x3
+ movi v1.8b, #20
+
+//// x12 - u2_buff1_width
+//// x14 - u2_buff2_width
+ mov x12, x4
+ add x11, x1, #16
+
+ mov x14, x12
+
+ mov x10, #3 //loop counter
+ sub x16 , x12, #8
+ sub x19, x14, #16
+filter_2dvh_loop:
+
+ //// ////////////// ROW 1 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+ uaddl v20.8h, v2.8b, v17.8b //// a0 + a5 (column1,row0)
+ movi v31.8b, #5
+ umlal v20.8h, v8.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0)
+ umlal v20.8h, v11.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ umlsl v20.8h, v5.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ umlsl v20.8h, v14.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ mov v21.d[0], v20.d[1]
+
+ uaddl v22.8h, v3.8b, v18.8b //// a0 + a5 (column2,row0)
+ umlal v22.8h, v9.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0)
+ umlal v22.8h, v12.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ umlsl v22.8h, v6.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ umlsl v22.8h, v15.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ ext v30.8b, v20.8b , v21.8b , #4
+ mov v23.d[0], v22.d[1]
+
+
+ uaddl v24.8h, v4.8b, v19.8b //// a0 + a5 (column3,row0)
+ ext v29.8b, v20.8b , v21.8b , #6
+ umlal v24.8h, v10.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0)
+ umlal v24.8h, v13.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ umlsl v24.8h, v7.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ umlsl v24.8h, v16.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+ mov v25.d[0], v24.d[1]
+
+ sqrshrun v2.8b, v20.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ ext v31.8b, v21.8b , v22.8b , #2
+ sqrshrun v3.8b, v22.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ ext v28.8b, v20.8b , v21.8b , #2
+
+ saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1)
+ ext v31.8b, v22.8b , v23.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ ext v30.8b, v21.8b , v22.8b , #4
+
+ sqrshrun v4.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ ext v29.8b, v21.8b , v22.8b , #6
+
+ ext v28.8b, v21.8b , v22.8b , #2
+ saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2)
+ smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2)
+ smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
+ smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ ext v31.8b, v23.8b , v24.8b , #2
+ mov v21.d[0], v20.d[1]
+ ext v2.8b, v2.8b , v3.8b , #2
+ ext v3.8b, v3.8b , v4.8b , #2
+ ext v4.8b, v4.8b , v4.8b , #2
+
+ st1 {v2.8b, v3.8b}, [x1], x12 //// store row1 - 1,1/2 grid
+ st1 {v4.h}[0], [x11], x12 //// store row1 - 1,1/2 grid
+
+ ext v30.8b, v22.8b , v23.8b , #4
+ ext v29.8b, v22.8b , v23.8b , #6
+
+ saddl v2.4s, v31.4h, v22.4h //// a0 + a5 (set3)
+ ext v28.8b, v22.8b , v23.8b , #2
+ smlal v2.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3)
+ smlal v2.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
+ smlsl v2.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ smlsl v2.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ ext v31.8b, v24.8b , v25.8b , #2
+
+ shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2)
+ ext v30.8b, v23.8b , v24.8b , #4
+ shrn v20.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set1)
+ ext v29.8b, v23.8b , v24.8b , #6
+
+ saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4)
+ ext v28.8b, v23.8b , v24.8b , #2
+ ext v31.8b, v25.8b , v25.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ ext v30.8b, v24.8b , v25.8b , #4
+
+ saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5)
+ ext v29.8b, v24.8b , v25.8b , #6
+
+ ext v31.8b, v24.8b , v25.8b , #2
+ shrn v28.4h, v2.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ ld1 {v2.8b, v3.8b, v4.8b}, [x0], x3 //// Load next Row data
+ smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5)
+ smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
+ smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4)
+ mov v20.d[1], v21.d[0]
+ sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2
+
+
+ ////VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4
+ ////VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ ////VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5
+
+ ////VST1.8 {D26,D27,D28},[x2],x14 ;// store 1/2,1,2 grif values
+ //// ////////////// ROW 2 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+ uaddl v20.8h, v5.8b, v2.8b //// a0 + a5 (column1,row0)
+ movi v31.8b, #5
+ umlal v20.8h, v11.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0)
+ umlal v20.8h, v14.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ umlsl v20.8h, v8.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ umlsl v20.8h, v17.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ mov v21.d[0], v20.d[1]
+
+ mov v28.d[1], v29.d[0]
+ sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4
+
+ shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ uaddl v22.8h, v6.8b, v3.8b //// a0 + a5 (column2,row0)
+ umlal v22.8h, v12.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0)
+ umlal v22.8h, v15.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ umlsl v22.8h, v9.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ umlsl v22.8h, v18.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ mov v23.d[0], v22.d[1]
+
+ sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5
+ ext v30.8b, v20.8b , v21.8b , #4
+
+ uaddl v24.8h, v7.8b, v4.8b //// a0 + a5 (column3,row0)
+ ext v29.8b, v20.8b , v21.8b , #6
+ umlal v24.8h, v13.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0)
+ umlal v24.8h, v16.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ umlsl v24.8h, v10.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ umlsl v24.8h, v19.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+ mov v25.d[0], v24.d[1]
+
+ st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+ st1 {v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values
+
+ sqrshrun v5.8b, v20.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ ext v31.8b, v21.8b , v22.8b , #2
+ sqrshrun v6.8b, v22.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ ext v28.8b, v20.8b , v21.8b , #2
+
+ saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1)
+ ext v31.8b, v22.8b , v23.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ ext v30.8b, v21.8b , v22.8b , #4
+
+ sqrshrun v7.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ ext v29.8b, v21.8b , v22.8b , #6
+
+ ext v28.8b, v21.8b , v22.8b , #2
+ saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2)
+ smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2)
+ smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
+ smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ ext v31.8b, v23.8b , v24.8b , #2
+
+ ext v5.8b, v5.8b , v6.8b , #2
+ ext v6.8b, v6.8b , v7.8b , #2
+ ext v7.8b, v7.8b , v7.8b , #2
+
+ st1 {v5.8b, v6.8b}, [x1], x12 //// store row1 - 1,1/2 grid
+ st1 {v7.h}[0], [x11], x12 //// store row1 - 1,1/2 grid
+
+ ext v30.8b, v22.8b , v23.8b , #4
+ ext v29.8b, v22.8b , v23.8b , #6
+
+ saddl v6.4s, v31.4h, v22.4h //// a0 + a5 (set3)
+ ext v28.8b, v22.8b , v23.8b , #2
+ smlal v6.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3)
+ smlal v6.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
+ smlsl v6.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ smlsl v6.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ ext v31.8b, v24.8b , v25.8b , #2
+
+ shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2)
+ ext v30.8b, v23.8b , v24.8b , #4
+ shrn v20.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set1)
+ ext v29.8b, v23.8b , v24.8b , #6
+
+ saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4)
+ ext v28.8b, v23.8b , v24.8b , #2
+ ext v31.8b, v25.8b , v25.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ ext v30.8b, v24.8b , v25.8b , #4
+
+ saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5)
+ ext v29.8b, v24.8b , v25.8b , #6
+
+ ext v31.8b, v24.8b , v25.8b , #2
+ shrn v28.4h, v6.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ ld1 {v5.8b, v6.8b, v7.8b}, [x0], x3 //// Load next Row data
+ smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5)
+ smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
+ smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4)
+ mov v20.d[1], v21.d[0]
+ sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2
+
+
+ ////VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4
+ ////VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ ////VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5
+
+ ////VST1.8 {D26,D27,D28},[x2],x14 ;// store 1/2,1,2 grif values
+ //// ////////////// ROW 3 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+ uaddl v20.8h, v8.8b, v5.8b //// a0 + a5 (column1,row0)
+ movi v31.8b, #5
+ umlal v20.8h, v14.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0)
+ umlal v20.8h, v17.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ umlsl v20.8h, v11.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ umlsl v20.8h, v2.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ mov v21.d[0], v20.d[1]
+
+ mov v28.d[1], v29.d[0]
+ sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4
+ shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ uaddl v22.8h, v9.8b, v6.8b //// a0 + a5 (column2,row0)
+ umlal v22.8h, v15.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0)
+ umlal v22.8h, v18.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ umlsl v22.8h, v12.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ umlsl v22.8h, v3.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ mov v23.d[0], v22.d[1]
+
+ sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5
+ ext v30.8b, v20.8b , v21.8b , #4
+
+ uaddl v24.8h, v10.8b, v7.8b //// a0 + a5 (column3,row0)
+ ext v29.8b, v20.8b , v21.8b , #6
+ umlal v24.8h, v16.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0)
+ umlal v24.8h, v19.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ umlsl v24.8h, v13.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ umlsl v24.8h, v4.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+ mov v25.d[0], v24.d[1]
+
+ st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+ st1 { v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values
+
+ sqrshrun v8.8b, v20.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ ext v31.8b, v21.8b , v22.8b , #2
+ sqrshrun v9.8b, v22.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ ext v28.8b, v20.8b , v21.8b , #2
+
+ saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1)
+ ext v31.8b, v22.8b , v23.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ ext v30.8b, v21.8b , v22.8b , #4
+
+ sqrshrun v10.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ ext v29.8b, v21.8b , v22.8b , #6
+
+ ext v28.8b, v21.8b , v22.8b , #2
+ saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2)
+ smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2)
+ smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
+ smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ ext v31.8b, v23.8b , v24.8b , #2
+
+ ext v8.8b, v8.8b , v9.8b , #2
+ ext v9.8b, v9.8b , v10.8b , #2
+ ext v10.8b, v10.8b , v10.8b , #2
+
+ st1 {v8.8b, v9.8b}, [x1], x12 //// store row1 - 1,1/2 grid
+ st1 {v10.h}[0], [x11], x12 //// store row1 - 1,1/2 grid
+
+ ext v30.8b, v22.8b , v23.8b , #4
+ ext v29.8b, v22.8b , v23.8b , #6
+
+ saddl v8.4s, v31.4h, v22.4h //// a0 + a5 (set3)
+ ext v28.8b, v22.8b , v23.8b , #2
+ smlal v8.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3)
+ smlal v8.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
+ smlsl v8.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ smlsl v8.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ ext v31.8b, v24.8b , v25.8b , #2
+
+ shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2)
+ ext v30.8b, v23.8b , v24.8b , #4
+ shrn v20.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set1)
+ ext v29.8b, v23.8b , v24.8b , #6
+
+ saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4)
+ ext v28.8b, v23.8b , v24.8b , #2
+ ext v31.8b, v25.8b , v25.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ ext v30.8b, v24.8b , v25.8b , #4
+
+ saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5)
+ ext v29.8b, v24.8b , v25.8b , #6
+
+ ext v31.8b, v24.8b , v25.8b , #2
+ shrn v28.4h, v8.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ ld1 {v8.8b, v9.8b, v10.8b}, [x0], x3 //// Load next Row data
+ smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5)
+ smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
+ smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4)
+ mov v20.d[1], v21.d[0]
+ sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2
+
+
+ ////VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4
+ ////VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ ////VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5
+
+ ////VST1.8 {D26,D27,D28},[x2],x14 ;// store 1/2,1,2 grif values
+ //// ////////////// ROW 4 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+ uaddl v20.8h, v11.8b, v8.8b //// a0 + a5 (column1,row0)
+ movi v31.8b, #5
+ umlal v20.8h, v17.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0)
+ umlal v20.8h, v2.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ umlsl v20.8h, v14.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ umlsl v20.8h, v5.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ mov v21.d[0], v20.d[1]
+ mov v28.d[1], v29.d[0]
+ sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4
+ shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ uaddl v22.8h, v12.8b, v9.8b //// a0 + a5 (column2,row0)
+ umlal v22.8h, v18.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0)
+ umlal v22.8h, v3.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ umlsl v22.8h, v15.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ umlsl v22.8h, v6.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ mov v23.d[0], v22.d[1]
+
+ sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5
+ ext v30.8b, v20.8b , v21.8b , #4
+
+ uaddl v24.8h, v13.8b, v10.8b //// a0 + a5 (column3,row0)
+ ext v29.8b, v20.8b , v21.8b , #6
+ umlal v24.8h, v19.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0)
+ umlal v24.8h, v4.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ umlsl v24.8h, v16.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ umlsl v24.8h, v7.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+ mov v25.d[0], v24.d[1]
+
+ st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+ st1 {v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values
+
+ sqrshrun v11.8b, v20.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ ext v31.8b, v21.8b , v22.8b , #2
+ sqrshrun v12.8b, v22.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ ext v28.8b, v20.8b , v21.8b , #2
+
+ saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1)
+ ext v31.8b, v22.8b , v23.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ ext v30.8b, v21.8b , v22.8b , #4
+
+ sqrshrun v13.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ ext v29.8b, v21.8b , v22.8b , #6
+
+ ext v28.8b, v21.8b , v22.8b , #2
+ saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2)
+ smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2)
+ smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
+ smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ ext v31.8b, v23.8b , v24.8b , #2
+
+ ext v11.8b, v11.8b , v12.8b , #2
+ ext v12.8b, v12.8b , v13.8b , #2
+ ext v13.8b, v13.8b , v13.8b , #2
+
+ st1 {v11.8b, v12.8b}, [x1], x12 //// store row1 - 1,1/2 grid
+ st1 {v13.h}[0], [x11], x12 //// store row1 - 1,1/2 grid
+
+ ext v30.8b, v22.8b , v23.8b , #4
+ ext v29.8b, v22.8b , v23.8b , #6
+
+ saddl v12.4s, v31.4h, v22.4h //// a0 + a5 (set3)
+ ext v28.8b, v22.8b , v23.8b , #2
+ smlal v12.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3)
+ smlal v12.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
+ smlsl v12.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ smlsl v12.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ ext v31.8b, v24.8b , v25.8b , #2
+
+ shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2)
+ ext v30.8b, v23.8b , v24.8b , #4
+ shrn v20.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set1)
+ ext v29.8b, v23.8b , v24.8b , #6
+
+ saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4)
+ ext v28.8b, v23.8b , v24.8b , #2
+ ext v31.8b, v25.8b , v25.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ ext v30.8b, v24.8b , v25.8b , #4
+
+ saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5)
+ ext v29.8b, v24.8b , v25.8b , #6
+
+ ext v31.8b, v24.8b , v25.8b , #2
+ shrn v28.4h, v12.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ ld1 {v11.8b, v12.8b, v13.8b}, [x0], x3 //// Load next Row data
+ smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5)
+ smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
+ smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4)
+ mov v20.d[1], v21.d[0]
+ sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2
+
+
+ ////VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4
+ ////VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ ////VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5
+
+ ////VST1.8 {D26,D27,D28},[x2],x14 ;// store 1/2,1,2 grif values
+ //// ////////////// ROW 5 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+ uaddl v20.8h, v14.8b, v11.8b //// a0 + a5 (column1,row0)
+ movi v31.8b, #5
+ umlal v20.8h, v2.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0)
+ umlal v20.8h, v5.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ umlsl v20.8h, v17.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ umlsl v20.8h, v8.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ mov v21.d[0], v20.d[1]
+ mov v28.d[1], v29.d[0]
+ sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4
+ shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ uaddl v22.8h, v15.8b, v12.8b //// a0 + a5 (column2,row0)
+ umlal v22.8h, v3.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0)
+ umlal v22.8h, v6.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ umlsl v22.8h, v18.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ umlsl v22.8h, v9.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ mov v23.d[0], v22.d[1]
+
+ sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5
+ ext v30.8b, v20.8b , v21.8b , #4
+
+ uaddl v24.8h, v16.8b, v13.8b //// a0 + a5 (column3,row0)
+ ext v29.8b, v20.8b , v21.8b , #6
+ umlal v24.8h, v4.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0)
+ umlal v24.8h, v7.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ umlsl v24.8h, v19.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ umlsl v24.8h, v10.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+ mov v25.d[0], v24.d[1]
+
+ st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+ st1 {v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values
+
+ sqrshrun v14.8b, v20.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ ext v31.8b, v21.8b , v22.8b , #2
+ sqrshrun v15.8b, v22.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ ext v28.8b, v20.8b , v21.8b , #2
+
+ saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1)
+ ext v31.8b, v22.8b , v23.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ ext v30.8b, v21.8b , v22.8b , #4
+
+ sqrshrun v16.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ ext v29.8b, v21.8b , v22.8b , #6
+
+ ext v28.8b, v21.8b , v22.8b , #2
+ saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2)
+ smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2)
+ smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
+ smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ ext v31.8b, v23.8b , v24.8b , #2
+
+ ext v14.8b, v14.8b , v15.8b , #2
+ ext v15.8b, v15.8b , v16.8b , #2
+ ext v16.8b, v16.8b , v16.8b , #2
+
+ st1 {v14.8b, v15.8b}, [x1], x12 //// store row1 - 1,1/2 grid
+ st1 {v16.h}[0], [x11], x12 //// store row1 - 1,1/2 grid
+
+ ext v30.8b, v22.8b , v23.8b , #4
+ ext v29.8b, v22.8b , v23.8b , #6
+
+ saddl v14.4s, v31.4h, v22.4h //// a0 + a5 (set3)
+ ext v28.8b, v22.8b , v23.8b , #2
+ smlal v14.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3)
+ smlal v14.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
+ smlsl v14.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ smlsl v14.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ ext v31.8b, v24.8b , v25.8b , #2
+
+ shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2)
+ ext v30.8b, v23.8b , v24.8b , #4
+ shrn v20.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set1)
+ ext v29.8b, v23.8b , v24.8b , #6
+
+ saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4)
+ ext v28.8b, v23.8b , v24.8b , #2
+ ext v31.8b, v25.8b , v25.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ ext v30.8b, v24.8b , v25.8b , #4
+
+ saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5)
+ ext v29.8b, v24.8b , v25.8b , #6
+
+ ext v31.8b, v24.8b , v25.8b , #2
+ shrn v28.4h, v14.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ ld1 {v14.8b, v15.8b, v16.8b}, [x0], x3 //// Load next Row data
+ smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5)
+ smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
+ smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4)
+ mov v20.d[1], v21.d[0]
+ sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2
+
+
+ ////VQRSHRUN.s16 D27,Q14,#2 ;// half,half gird set3,4
+ ////VSHRN.s32 D28,Q11,#8 ;// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ ////VQRSHRUN.s16 D28,Q14,#2 ;// half,half gird set5
+
+ ////VST1.8 {D26,D27,D28},[x2],x14 ;// store 1/2,1,2 grif values
+ //// ////////////// ROW 6 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+
+ cmp x10, #1 //// if it 17 rows are complete skip
+ beq filter_2dvh_skip_row
+ uaddl v20.8h, v17.8b, v14.8b //// a0 + a5 (column1,row0)
+ movi v31.8b, #5
+ umlal v20.8h, v5.8b, v1.8b //// a0 + a5 + 20a2 (column1,row0)
+ umlal v20.8h, v8.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column1,row0)
+ umlsl v20.8h, v2.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column1,row0)
+ umlsl v20.8h, v11.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column1,row0)
+ mov v21.d[0], v20.d[1]
+ mov v28.d[1], v29.d[0]
+ sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4
+ shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ uaddl v22.8h, v18.8b, v15.8b //// a0 + a5 (column2,row0)
+ umlal v22.8h, v6.8b, v1.8b //// a0 + a5 + 20a2 (column2,row0)
+ umlal v22.8h, v9.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column2,row0)
+ umlsl v22.8h, v3.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column2,row0)
+ umlsl v22.8h, v12.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column2,row0)
+ mov v23.d[0], v22.d[1]
+
+ sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5
+ ext v30.8b, v20.8b , v21.8b , #4
+
+ uaddl v24.8h, v19.8b, v16.8b //// a0 + a5 (column3,row0)
+ ext v29.8b, v20.8b , v21.8b , #6
+ umlal v24.8h, v7.8b, v1.8b //// a0 + a5 + 20a2 (column3,row0)
+ umlal v24.8h, v10.8b, v1.8b //// a0 + a5 + 20a2 + 20a3 (column3,row0)
+ umlsl v24.8h, v4.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 (column3,row0)
+ umlsl v24.8h, v13.8b, v31.8b //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (column3,row0)
+ mov v25.d[0], v24.d[1]
+
+ st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+ st1 {v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values
+
+ sqrshrun v17.8b, v20.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column1,row0)
+ ext v31.8b, v21.8b , v22.8b , #2
+ sqrshrun v18.8b, v22.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column2,row0)
+ ext v28.8b, v20.8b , v21.8b , #2
+
+ saddl v26.4s, v31.4h, v20.4h //// a0 + a5 (set1)
+ ext v31.8b, v22.8b , v23.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set1)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set1)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set1)
+ smlsl v26.4s, v21.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set1)
+ ext v30.8b, v21.8b , v22.8b , #4
+
+ sqrshrun v19.8b, v24.8h, #5 //// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5 (column3,row0)
+ ext v29.8b, v21.8b , v22.8b , #6
+
+ ext v28.8b, v21.8b , v22.8b , #2
+ saddl v20.4s, v31.4h, v21.4h //// a0 + a5 (set2)
+ smlal v20.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set2)
+ smlal v20.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set2)
+ smlsl v20.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set2)
+ smlsl v20.4s, v22.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set2)
+ ext v31.8b, v23.8b , v24.8b , #2
+
+ ext v17.8b, v17.8b , v18.8b , #2
+ ext v18.8b, v18.8b , v19.8b , #2
+ ext v19.8b, v19.8b , v19.8b , #2
+
+ st1 {v17.8b, v18.8b}, [x1], x12 //// store row1 - 1,1/2 grid
+ st1 {v19.h}[0], [x11], x12 //// store row1 - 1,1/2 grid
+
+ ext v30.8b, v22.8b , v23.8b , #4
+ ext v29.8b, v22.8b , v23.8b , #6
+
+ saddl v18.4s, v31.4h, v22.4h //// a0 + a5 (set3)
+ ext v28.8b, v22.8b , v23.8b , #2
+ smlal v18.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set3)
+ smlal v18.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set3)
+ smlsl v18.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set3)
+ smlsl v18.4s, v23.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set3)
+ ext v31.8b, v24.8b , v25.8b , #2
+
+ shrn v21.4h, v20.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set2)
+ ext v30.8b, v23.8b , v24.8b , #4
+ shrn v20.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set1)
+ ext v29.8b, v23.8b , v24.8b , #6
+
+ saddl v26.4s, v31.4h, v23.4h //// a0 + a5 (set4)
+ ext v28.8b, v23.8b , v24.8b , #2
+ ext v31.8b, v25.8b , v25.8b , #2
+ smlal v26.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set4)
+ smlal v26.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set4)
+ smlsl v26.4s, v28.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set4)
+ smlsl v26.4s, v24.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set4)
+ ext v30.8b, v24.8b , v25.8b , #4
+
+ saddl v22.4s, v31.4h, v24.4h //// a0 + a5 (set5)
+ ext v29.8b, v24.8b , v25.8b , #6
+
+ ext v31.8b, v24.8b , v25.8b , #2
+ shrn v28.4h, v18.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set3)
+
+ ld1 {v17.8b, v18.8b, v19.8b}, [x0], x3 //// Load next Row data
+ smlal v22.4s, v30.4h, v0.4h[1] //// a0 + a5 + 20a2 (set5)
+ smlal v22.4s, v29.4h, v0.4h[1] //// a0 + a5 + 20a2 + 20a3 (set5)
+ smlsl v22.4s, v31.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 (set5)
+ smlsl v22.4s, v25.4h, v0.4h[0] //// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 (set5)
+ shrn v29.4h, v26.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set4)
+ mov v20.d[1], v21.d[0]
+ sqrshrun v26.8b, v20.8h, #2 //// half,half gird set1,2
+
+ mov v28.d[1], v29.d[0]
+ sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4
+ shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5
+
+ st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+ st1 {v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values
+
+ subs x10, x10, #1 ////decrement loop counter
+
+ bne filter_2dvh_loop
+
+
+//// Process first vertical interpolated row
+//// each column is
+ //// ////////////// ROW 13 ///////////////////////
+
+//// Process first vertical interpolated row
+//// each column is
+
+ // LDMFD sp!,{x10,x11,x12,pc}
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+filter_2dvh_skip_row:
+ mov v28.d[1], v29.d[0]
+ sqrshrun v27.8b, v28.8h, #2 //// half,half gird set3,4
+ shrn v28.4h, v22.4s, #8 //// shift by 8 and later we will shift by 2 more with rounding (set5)
+
+ sqrshrun v28.8b, v28.8h, #2 //// half,half gird set5
+
+ st1 {v26.8b, v27.8b}, [x2], #16 //// store 1/2,1,2 grif values
+ st1 {v28.h}[0], [x2], x19 //// store 1/2,1,2 grif values
+ // LDMFD sp!,{x10,x11,x12,pc}
+ ldp x19, x20, [sp], #16
+ pop_v_regs
+ ret
+
+
+///*****************************************
+
+
+
+
+
+
+ .section .note.gnu-stack,"",%progbits
diff --git a/encoder/armv8/ih264e_platform_macros.h b/encoder/armv8/ih264e_platform_macros.h
new file mode 100755
index 0000000..39cac96
--- /dev/null
+++ b/encoder/armv8/ih264e_platform_macros.h
@@ -0,0 +1,143 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_platform_macros.h
+*
+* @brief
+* Contains platform specific routines used for codec context intialization
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_PLATFORM_MACROS_H_
+#define IH264E_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_neon_a9q(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_neon_av8(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_generic(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr(void *pv_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns IV_ARCH_T
+* architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T ih264e_default_arch(void);
+
+#endif /* IH264E_PLATFORM_MACROS_H_ */
diff --git a/encoder/armv8/ime_distortion_metrics_av8.s b/encoder/armv8/ime_distortion_metrics_av8.s
new file mode 100755
index 0000000..99ebc8a
--- /dev/null
+++ b/encoder/armv8/ime_distortion_metrics_av8.s
@@ -0,0 +1,978 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+//**
+
+///**
+//******************************************************************************
+//*
+//*
+//* @brief
+//* This file contains definitions of routines that compute distortion
+//* between two macro/sub blocks of identical dimensions
+//*
+//* @author
+//* Ittiam
+//*
+//* @par List of Functions:
+//* - ime_compute_sad_16x16()
+//* - ime_compute_sad_8x8()
+//* - ime_compute_sad_4x4()
+//* - ime_compute_sad_16x8()
+//* - ime_compute_satqd_16x16_lumainter_av8()
+//*
+//* @remarks
+//* None
+//*
+//*******************************************************************************
+//
+
+
+///**
+//******************************************************************************
+//*
+//* @brief computes distortion (SAD) between 2 16x16 blocks (fast mode)
+//*
+//* @par Description
+//* This functions computes SAD between 2 16x16 blocks. There is a provision
+//* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+//* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] i4_max_sad
+//* integer maximum allowed distortion
+//*
+//* @param[in] pi4_mb_distortion
+//* integer evaluated sad
+//*
+//* @remarks
+//*
+//******************************************************************************
+//*/
+.text
+.p2align 2
+
+.macro push_v_regs
+ stp d8, d9, [sp, #-16]!
+ stp d10, d11, [sp, #-16]!
+ stp d12, d13, [sp, #-16]!
+ stp d14, d15, [sp, #-16]!
+.endm
+.macro pop_v_regs
+ ldp d14, d15, [sp], #16
+ ldp d12, d13, [sp], #16
+ ldp d10, d11, [sp], #16
+ ldp d8, d9, [sp], #16
+.endm
+
+ .global ime_compute_sad_16x16_fast_av8
+ime_compute_sad_16x16_fast_av8:
+ push_v_regs
+ lsl x2, x2, #1
+ lsl x3, x3, #1
+
+ mov x6, #2
+ movi v30.8h, #0
+
+core_loop_ime_compute_sad_16x16_fast_av8:
+
+ ld1 {v0.16b}, [x0], x2
+ ld1 {v1.16b}, [x1], x3
+ ld1 {v2.16b}, [x0], x2
+ ld1 {v3.16b}, [x1], x3
+
+ uabal v30.8h, v0.8b, v1.8b
+ uabal2 v30.8h, v0.16b, v1.16b
+
+ uabal v30.8h, v2.8b, v3.8b
+ uabal2 v30.8h, v2.16b, v3.16b
+
+ ld1 {v4.16b}, [x0], x2
+ ld1 {v5.16b}, [x1], x3
+ ld1 {v6.16b}, [x0], x2
+ ld1 {v7.16b}, [x1], x3
+
+ uabal v30.8h, v4.8b, v5.8b
+ uabal2 v30.8h, v4.16b, v5.16b
+
+ uabal v30.8h, v6.8b, v7.8b
+ uabal2 v30.8h, v6.16b, v7.16b
+
+ subs x6, x6, #1
+ bne core_loop_ime_compute_sad_16x16_fast_av8
+
+
+ addp v30.8h, v30.8h, v30.8h
+ uaddlp v30.4s, v30.8h
+ addp v30.2s, v30.2s, v30.2s
+ shl v30.2s, v30.2s, #1
+
+ st1 {v30.s}[0], [x5]
+ pop_v_regs
+ ret
+
+
+///**
+//******************************************************************************
+//*
+//* @brief computes distortion (SAD) between 2 16x8 blocks
+//*
+//*
+//* @par Description
+//* This functions computes SAD between 2 16x8 blocks. There is a provision
+//* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+//* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] u4_max_sad
+//* integer maximum allowed distortion
+//*
+//* @param[in] pi4_mb_distortion
+//* integer evaluated sad
+//*
+//* @remarks
+//*
+//******************************************************************************
+//*/
+//
+ .global ime_compute_sad_16x8_av8
+ime_compute_sad_16x8_av8:
+
+ //chheck what stride incremtn to use
+ //earlier code did not have this lsl
+ push_v_regs
+ mov x6, #2
+ movi v30.8h, #0
+
+core_loop_ime_compute_sad_16x8_av8:
+
+ ld1 {v0.16b}, [x0], x2
+ ld1 {v1.16b}, [x1], x3
+ ld1 {v2.16b}, [x0], x2
+ ld1 {v3.16b}, [x1], x3
+
+ uabal v30.8h, v0.8b, v1.8b
+ uabal2 v30.8h, v0.16b, v1.16b
+
+ uabal v30.8h, v2.8b, v3.8b
+ uabal2 v30.8h, v2.16b, v3.16b
+
+ ld1 {v4.16b}, [x0], x2
+ ld1 {v5.16b}, [x1], x3
+ ld1 {v6.16b}, [x0], x2
+ ld1 {v7.16b}, [x1], x3
+
+ uabal v30.8h, v4.8b, v5.8b
+ uabal2 v30.8h, v4.16b, v5.16b
+
+ uabal v30.8h, v6.8b, v7.8b
+ uabal2 v30.8h, v6.16b, v7.16b
+
+ subs x6, x6, #1
+ bne core_loop_ime_compute_sad_16x8_av8
+
+
+ addp v30.8h, v30.8h, v30.8h
+ uaddlp v30.4s, v30.8h
+ addp v30.2s, v30.2s, v30.2s
+
+ st1 {v30.s}[0], [x5]
+ pop_v_regs
+ ret
+
+///**
+//******************************************************************************
+//*
+//* @brief computes distortion (SAD) between 2 16x16 blocks with early exit
+//*
+//* @par Description
+//* This functions computes SAD between 2 16x16 blocks. There is a provision
+//* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+//* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] i4_max_sad
+//* integer maximum allowed distortion
+//*
+//* @param[in] pi4_mb_distortion
+//* integer evaluated sad
+//*
+//* @remarks
+//*
+//******************************************************************************
+//*/
+
+ .global ime_compute_sad_16x16_ea8_av8
+ime_compute_sad_16x16_ea8_av8:
+
+ push_v_regs
+ movi v30.8h, #0
+
+ add x7, x0, x2
+ add x8, x1, x3
+
+ lsl x2, x2, #1
+ lsl x3, x3, #1
+
+ ld1 {v0.16b}, [x0], x2
+ ld1 {v1.16b}, [x1], x3
+ ld1 {v2.16b}, [x0], x2
+ ld1 {v3.16b}, [x1], x3
+ ld1 {v8.16b}, [x0], x2
+ ld1 {v9.16b}, [x1], x3
+ ld1 {v10.16b}, [x0], x2
+ ld1 {v11.16b}, [x1], x3
+ ld1 {v12.16b}, [x0], x2
+ ld1 {v13.16b}, [x1], x3
+ ld1 {v14.16b}, [x0], x2
+ ld1 {v15.16b}, [x1], x3
+ ld1 {v16.16b}, [x0], x2
+ ld1 {v17.16b}, [x1], x3
+ ld1 {v18.16b}, [x0], x2
+ ld1 {v19.16b}, [x1], x3
+
+ uabal v30.8h, v0.8b, v1.8b
+ uabal2 v30.8h, v0.16b, v1.16b
+
+ uabal v30.8h, v2.8b, v3.8b
+ uabal2 v30.8h, v2.16b, v3.16b
+
+ uabal v30.8h, v8.8b, v9.8b
+ uabal2 v30.8h, v8.16b, v9.16b
+
+ uabal v30.8h, v10.8b, v11.8b
+ uabal2 v30.8h, v10.16b, v11.16b
+
+ uabal v30.8h, v12.8b, v13.8b
+ uabal2 v30.8h, v12.16b, v13.16b
+
+ uabal v30.8h, v14.8b, v15.8b
+ uabal2 v30.8h, v14.16b, v15.16b
+
+ uabal v30.8h, v16.8b, v17.8b
+ uabal2 v30.8h, v16.16b, v17.16b
+
+ uabal v30.8h, v18.8b, v19.8b
+ uabal2 v30.8h, v18.16b, v19.16b
+
+ addp v31.8h, v30.8h, v30.8h
+ uaddlp v31.4s, v31.8h
+ addp v31.2s, v31.2s, v31.2s
+ mov w6, v31.s[0]
+ cmp w6, w4
+ bgt end_func_16x16
+
+ //do the stuff again
+ ld1 {v0.16b}, [x7], x2
+ ld1 {v1.16b}, [x8], x3
+ ld1 {v2.16b}, [x7], x2
+ ld1 {v3.16b}, [x8], x3
+ ld1 {v8.16b}, [x7], x2
+ ld1 {v9.16b}, [x8], x3
+ ld1 {v10.16b}, [x7], x2
+ ld1 {v11.16b}, [x8], x3
+ ld1 {v12.16b}, [x7], x2
+ ld1 {v13.16b}, [x8], x3
+ ld1 {v14.16b}, [x7], x2
+ ld1 {v15.16b}, [x8], x3
+ ld1 {v16.16b}, [x7], x2
+ ld1 {v17.16b}, [x8], x3
+ ld1 {v18.16b}, [x7], x2
+ ld1 {v19.16b}, [x8], x3
+
+ uabal v30.8h, v0.8b, v1.8b
+ uabal2 v30.8h, v0.16b, v1.16b
+
+ uabal v30.8h, v2.8b, v3.8b
+ uabal2 v30.8h, v2.16b, v3.16b
+
+ uabal v30.8h, v8.8b, v9.8b
+ uabal2 v30.8h, v8.16b, v9.16b
+
+ uabal v30.8h, v10.8b, v11.8b
+ uabal2 v30.8h, v10.16b, v11.16b
+
+ uabal v30.8h, v12.8b, v13.8b
+ uabal2 v30.8h, v12.16b, v13.16b
+
+ uabal v30.8h, v14.8b, v15.8b
+ uabal2 v30.8h, v14.16b, v15.16b
+
+ uabal v30.8h, v16.8b, v17.8b
+ uabal2 v30.8h, v16.16b, v17.16b
+
+ uabal v30.8h, v18.8b, v19.8b
+ uabal2 v30.8h, v18.16b, v19.16b
+
+ addp v31.8h, v30.8h, v30.8h
+ uaddlp v31.4s, v31.8h
+ addp v31.2s, v31.2s, v31.2s
+
+end_func_16x16:
+ st1 {v31.s}[0], [x5]
+ pop_v_regs
+ ret
+
+
+///*
+////---------------------------------------------------------------------------
+//// Function Name : ime_calculate_sad2_prog_av8()
+////
+//// Detail Description : This function find the sad values of 4 Progressive MBs
+//// at one shot
+////
+//// Platform : CortexAv8/NEON .
+////
+////-----------------------------------------------------------------------------
+//*/
+
+ .global ime_calculate_sad2_prog_av8
+ime_calculate_sad2_prog_av8:
+
+ // x0 = ref1 <UWORD8 *>
+ // x1 = ref2 <UWORD8 *>
+ // x2 = src <UWORD8 *>
+ // x3 = RefBufferWidth <UWORD32>
+ // stack = CurBufferWidth <UWORD32>, psad <UWORD32 *>
+ push_v_regs
+ mov x6, #8
+ movi v30.8h, #0
+ movi v31.8h, #0
+
+core_loop_ime_calculate_sad2_prog_av8:
+
+ ld1 {v0.16b}, [x0], x3
+ ld1 {v1.16b}, [x1], x3
+ ld1 {v2.16b}, [x3], x4
+
+ ld1 {v3.16b}, [x0], x3
+ ld1 {v4.16b}, [x1], x3
+ ld1 {v5.16b}, [x3], x4
+
+
+ uabal v30.8h, v0.8b, v2.8b
+ uabal2 v30.8h, v0.16b, v2.16b
+ uabal v31.8h, v1.8b, v2.8b
+ uabal2 v31.8h, v1.16b, v2.16b
+
+ uabal v30.8h, v3.8b, v5.8b
+ uabal2 v30.8h, v3.16b, v5.16b
+ uabal v31.8h, v4.8b, v5.8b
+ uabal2 v31.8h, v4.16b, v5.16b
+
+
+ ld1 {v6.16b}, [x0], x3
+ ld1 {v7.16b}, [x1], x3
+ ld1 {v8.16b}, [x3], x4
+
+ ld1 {v9.16b}, [x0], x3
+ ld1 {v10.16b}, [x1], x3
+ ld1 {v11.16b}, [x3], x4
+
+ uabal v30.8h, v6.8b, v8.8b
+ uabal2 v30.8h, v6.16b, v8.16b
+ uabal v31.8h, v7.8b, v8.8b
+ uabal2 v31.8h, v7.16b, v8.16b
+
+ uabal v30.8h, v9.8b, v11.8b
+ uabal2 v30.8h, v9.16b, v11.16b
+ uabal v31.8h, v10.8b, v11.8b
+ uabal2 v31.8h, v0.16b, v11.16b
+
+ subs x6, x6, #1
+ bne core_loop_ime_calculate_sad2_prog_av8
+
+ addp v30.8h, v30.8h, v31.8h
+ uaddlp v30.4s, v30.8h
+ addp v30.2s, v30.2s, v30.2s
+ shl v30.2s, v30.2s, #1
+
+ st1 {v30.2s}, [x5]
+ pop_v_regs
+ ret
+
+///*
+////---------------------------------------------------------------------------
+//// Function Name : Calculate_Mad3_prog()
+////
+//// Detail Description : This function find the sad values of 4 Progressive MBs
+//// at one shot
+////
+//// Platform : CortexA8/NEON .
+////
+////-----------------------------------------------------------------------------
+//*/
+
+ .global ime_calculate_sad3_prog_av8
+ime_calculate_sad3_prog_av8:
+
+ // x0 = ref1 <UWORD8 *>
+ // x1 = ref2 <UWORD8 *>
+ // x2 = ref3 <UWORD8 *>
+ // x3 = src <UWORD8 *>
+ // stack = RefBufferWidth <UWORD32>, CurBufferWidth <UWORD32>, psad <UWORD32 *>
+
+
+ // x0 = ref1 <UWORD8 *>
+ // x1 = ref2 <UWORD8 *>
+ // x2 = src <UWORD8 *>
+ // x3 = RefBufferWidth <UWORD32>
+ // stack = CurBufferWidth <UWORD32>, psad <UWORD32 *>
+ push_v_regs
+ mov x6, #16
+ movi v29.8h, #0
+ movi v30.8h, #0
+ movi v31.8h, #0
+
+core_loop_ime_calculate_sad3_prog_av8:
+
+ ld1 {v0.16b}, [x0], x4
+ ld1 {v1.16b}, [x1], x4
+ ld1 {v2.16b}, [x2], x4
+ ld1 {v3.16b}, [x3], x5
+
+ uabal v29.8h, v0.8b, v3.8b
+ uabal2 v29.8h, v0.16b, v3.16b
+ uabal v30.8h, v1.8b, v3.8b
+ uabal2 v30.8h, v1.16b, v3.16b
+ uabal v31.8h, v2.8b, v3.8b
+ uabal2 v31.8h, v2.16b, v3.16b
+
+ ld1 {v4.16b}, [x0], x4
+ ld1 {v5.16b}, [x1], x4
+ ld1 {v6.16b}, [x2], x4
+ ld1 {v7.16b}, [x3], x5
+
+ uabal v29.8h, v4.8b, v7.8b
+ uabal2 v29.8h, v4.16b, v7.16b
+ uabal v30.8h, v5.8b, v7.8b
+ uabal2 v30.8h, v5.16b, v7.16b
+ uabal v31.8h, v6.8b, v7.8b
+ uabal2 v31.8h, v6.16b, v7.16b
+
+ subs x6, x6, #1
+ bne core_loop_ime_calculate_sad2_prog_av8
+
+ addp v30.8h, v30.8h, v31.8h
+ uaddlp v30.4s, v30.8h
+ addp v30.2s, v30.2s, v30.2s
+ shl v30.2s, v30.2s, #1
+
+ st1 {v30.2s}, [x5]
+ pop_v_regs
+ ret
+
+
+
+
+///**
+//******************************************************************************
+//*
+//* @brief computes distortion (SAD) for sub-pel motion estimation
+//*
+//* @par Description
+//* This functions computes SAD for all the 8 half pel points
+//*
+//* @param[out] pi4_sad
+//* integer evaluated sad
+//* pi4_sad[0] - half x
+//* pi4_sad[1] - half x - 1
+//* pi4_sad[2] - half y
+//* pi4_sad[3] - half y - 1
+//* pi4_sad[4] - half xy
+//* pi4_sad[5] - half xy - 1
+//* pi4_sad[6] - half xy - strd
+//* pi4_sad[7] - half xy - 1 - strd
+//*
+//* @remarks
+//*
+//******************************************************************************
+//*/
+
+.text
+.p2align 2
+
+ .global ime_sub_pel_compute_sad_16x16_av8
+ime_sub_pel_compute_sad_16x16_av8:
+ push_v_regs
+ sub x7, x1, #1 //x left
+ sub x8, x2, x5 //y top
+ sub x9, x3, #1 //xy left
+ sub x10, x3, x5 //xy top
+ sub x11, x10, #1 //xy top left
+
+ movi v24.8h, #0
+ movi v25.8h, #0
+ movi v26.8h, #0
+ movi v27.8h, #0
+ movi v28.8h, #0
+ movi v29.8h, #0
+ movi v30.8h, #0
+ movi v31.8h, #0
+
+ mov x12, #16
+core_loop_ime_sub_pel_compute_sad_16x16_av8:
+
+ ld1 {v0.16b}, [x0], x4 //src
+ ld1 {v1.16b}, [x1], x5 //x
+ ld1 {v2.16b}, [x7], x5 //x left
+ ld1 {v3.16b}, [x2], x5 //y
+ ld1 {v9.16b}, [x8], x5 //y top
+ ld1 {v10.16b}, [x3], x5 //xy
+ ld1 {v11.16b}, [x9], x5 //xy left
+ ld1 {v12.16b}, [x10], x5 //xy top
+ ld1 {v13.16b}, [x11], x5 //xy top left
+
+ uabal v24.8h, v0.8b, v1.8b
+ uabal2 v24.8h, v0.16b, v1.16b
+ uabal v25.8h, v0.8b, v2.8b
+ uabal2 v25.8h, v0.16b, v2.16b
+ uabal v26.8h, v0.8b, v3.8b
+ uabal2 v26.8h, v0.16b, v3.16b
+ uabal v27.8h, v0.8b, v9.8b
+ uabal2 v27.8h, v0.16b, v9.16b
+ uabal v28.8h, v0.8b, v10.8b
+ uabal2 v28.8h, v0.16b, v10.16b
+ uabal v29.8h, v0.8b, v11.8b
+ uabal2 v29.8h, v0.16b, v11.16b
+ uabal v30.8h, v0.8b, v12.8b
+ uabal2 v30.8h, v0.16b, v12.16b
+ uabal v31.8h, v0.8b, v13.8b
+ uabal2 v31.8h, v0.16b, v13.16b
+
+ subs x12, x12, #1
+ bne core_loop_ime_sub_pel_compute_sad_16x16_av8
+
+ addp v24.8h, v24.8h, v25.8h
+ addp v26.8h, v26.8h, v27.8h
+ addp v28.8h, v28.8h, v29.8h
+ addp v30.8h, v30.8h, v31.8h
+
+ uaddlp v24.4s, v24.8h
+ uaddlp v26.4s, v26.8h
+ uaddlp v28.4s, v28.8h
+ uaddlp v30.4s, v30.8h
+
+ addp v24.4s, v24.4s, v26.4s
+ addp v25.4s, v28.4s, v30.4s
+
+ st1 {v24.4s-v25.4s}, [x6]
+
+
+ pop_v_regs
+ ret
+
+
+///**
+//******************************************************************************
+//*
+//* @brief computes distortion (SAD) between 2 16x16 blocks
+//*
+//* @par Description
+//* This functions computes SAD between 2 16x16 blocks. There is a provision
+//* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+//* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+//*
+//* @param[in] pu1_src
+//* UWORD8 pointer to the source
+//*
+//* @param[out] pu1_dst
+//* UWORD8 pointer to the destination
+//*
+//* @param[in] src_strd
+//* integer source stride
+//*
+//* @param[in] dst_strd
+//* integer destination stride
+//*
+//* @param[in] i4_max_sad
+//* integer maximum allowed distortion
+//*
+//* @param[in] pi4_mb_distortion
+//* integer evaluated sad
+//*
+//* @remarks
+//*
+//******************************************************************************
+//*/
+ .global ime_compute_sad_16x16_av8
+ime_compute_sad_16x16_av8:
+ push_v_regs
+ mov x6, #4
+ movi v30.8h, #0
+
+core_loop_ime_compute_sad_16x16_av8:
+
+ ld1 {v0.16b}, [x0], x2
+ ld1 {v1.16b}, [x1], x3
+ ld1 {v2.16b}, [x0], x2
+ ld1 {v3.16b}, [x1], x3
+
+ uabal v30.8h, v0.8b, v1.8b
+ uabal2 v30.8h, v0.16b, v1.16b
+
+ uabal v30.8h, v2.8b, v3.8b
+ uabal2 v30.8h, v2.16b, v3.16b
+
+ ld1 {v4.16b}, [x0], x2
+ ld1 {v5.16b}, [x1], x3
+ ld1 {v6.16b}, [x0], x2
+ ld1 {v7.16b}, [x1], x3
+
+ uabal v30.8h, v4.8b, v5.8b
+ uabal2 v30.8h, v4.16b, v5.16b
+
+ uabal v30.8h, v6.8b, v7.8b
+ uabal2 v30.8h, v6.16b, v7.16b
+
+ subs x6, x6, #1
+ bne core_loop_ime_compute_sad_16x16_av8
+
+
+ addp v30.8h, v30.8h, v30.8h
+ uaddlp v30.4s, v30.8h
+ addp v30.2s, v30.2s, v30.2s
+
+ st1 {v30.s}[0], [x5]
+ pop_v_regs
+ ret
+
+
+///*
+////---------------------------------------------------------------------------
+//// Function Name : Calculate_Mad4_prog()
+////
+//// Detail Description : This function find the sad values of 4 Progressive MBs
+//// at one shot
+////
+//// Platform : CortexA8/NEON .
+////
+////-----------------------------------------------------------------------------
+//*/
+
+ .global ime_calculate_sad4_prog_av8
+ime_calculate_sad4_prog_av8:
+ push_v_regs
+ sub x5, x0, #1 //left
+ add x6, x0, #1 //right
+ sub x7, x0, x2 //top
+ add x8, x0, x2 //bottom
+
+ movi v28.8h, #0
+ movi v29.8h, #0
+ movi v30.8h, #0
+ movi v31.8h, #0
+
+ mov x9, #16
+core_loop_ime_calculate_sad4_prog_av8:
+
+ ld1 {v0.16b}, [x1], x3
+ ld1 {v1.16b}, [x5], x2
+ ld1 {v2.16b}, [x6], x2
+ ld1 {v3.16b}, [x7], x2
+ ld1 {v9.16b}, [x8], x2
+
+ uabal v28.8h, v0.8b, v1.8b
+ uabal2 v28.8h, v0.16b, v1.16b
+ uabal v29.8h, v0.8b, v2.8b
+ uabal2 v29.8h, v0.16b, v2.16b
+ uabal v30.8h, v0.8b, v3.8b
+ uabal2 v30.8h, v0.16b, v3.16b
+ uabal v31.8h, v0.8b, v9.8b
+ uabal2 v31.8h, v0.16b, v9.16b
+
+ subs x9, x9, #1
+ bne core_loop_ime_calculate_sad4_prog_av8
+
+ addp v28.8h, v28.8h, v29.8h
+ addp v30.8h, v30.8h, v31.8h
+
+ uaddlp v28.4s, v28.8h
+ uaddlp v30.4s, v30.8h
+
+ addp v28.4s, v28.4s, v30.4s
+ st1 {v28.4s}, [x4]
+ pop_v_regs
+ ret
+
+
+
+//*****************************************************************************
+//*
+//* Function Name : ime_compute_satqd_16x16_lumainter_av8
+//* Description : This fucntion computes SAD for a 16x16 block.
+// : It also computes if any 4x4 block will have a nonzero coefficent after transform and quant
+//
+// Arguments : x0 :pointer to src buffer
+// x1 :pointer to est buffer
+// x2 :source stride
+// x3 :est stride
+// STACk :Threshold,distotion,is_nonzero
+//*
+//* Values Returned : NONE
+//*
+//* Register Usage : x0-x11
+//* Stack Usage :
+//* Cycles : Around
+//* Interruptiaility : Interruptable
+//*
+//* Known Limitations
+//* \Assumptions :
+//*
+//* Revision History :
+//* DD MM YYYY Author(s) Changes
+//* 14 04 2014 Harinarayanan K K First version
+//*
+//*****************************************************************************
+ .global ime_compute_satqd_16x16_lumainter_av8
+ime_compute_satqd_16x16_lumainter_av8:
+ //x0 :pointer to src buffer
+ //x1 :pointer to est buffer
+ //x2 :Source stride
+ //x3 :Pred stride
+ //x4 :Threshold pointer
+ //x5 :Distortion,ie SAD
+ //x6 :is nonzero
+ //x7 :loop counter
+ push_v_regs
+ stp d8, d9, [sp, #-16]!
+ stp d10, d11, [sp, #-16]!
+ stp d12, d13, [sp, #-16]!
+ stp d14, d15, [sp, #-16]!
+
+ ld1 {v30.8h}, [x4]
+
+ dup v20.4h, v30.h[1] //ls1
+ dup v24.4h, v30.h[0] //ls2
+ dup v21.4h, v30.h[5] //ls3
+ dup v25.4h, v30.h[7] //ls4
+ dup v22.4h, v30.h[3] //ls5
+ dup v26.4h, v30.h[4] //ls6
+ dup v23.4h, v30.h[6] //ls7
+ dup v27.4h, v30.h[2] //ls8
+
+ mov v20.d[1], v24.d[0]
+ mov v21.d[1], v25.d[0]
+ mov v22.d[1], v26.d[0]
+ mov v23.d[1], v27.d[0]
+
+ add x4, x4, #16
+ ld1 {v29.h}[0], [x4]
+ dup v29.4h, v29.h[0]
+
+ movi v31.8h, #0
+
+ mov x7, #4
+core_loop_satqd_ime_compute_satqd_16x16_lumainter:
+ ld1 {v0.16b}, [x0], x2
+ ld1 {v1.16b}, [x1], x3
+ ld1 {v2.16b}, [x0], x2
+ ld1 {v3.16b}, [x1], x3
+ ld1 {v4.16b}, [x0], x2
+ ld1 {v5.16b}, [x1], x3
+ ld1 {v6.16b}, [x0], x2
+ ld1 {v7.16b}, [x1], x3
+
+ uabdl v10.8h, v0.8b, v1.8b
+ uabdl2 v15.8h, v0.16b, v1.16b
+ uabdl v11.8h, v2.8b, v3.8b
+ uabdl2 v16.8h, v2.16b, v3.16b
+ uabdl v12.8h, v4.8b, v5.8b
+ uabdl2 v17.8h, v4.16b, v5.16b
+ uabdl v13.8h, v6.8b, v7.8b
+ uabdl2 v18.8h, v6.16b, v7.16b
+
+ add v0.8h, v10.8h, v13.8h
+ add v1.8h, v11.8h, v12.8h
+ add v2.8h, v15.8h, v18.8h
+ add v3.8h, v16.8h, v17.8h
+
+ //v0 : S1 S4 S4 S1 A1 A4 A4 A1
+ //v1 : S2 S3 S3 S2 A2 A3 A3 A2
+ //v2 : B1 B4 B4 B1 X1 X4 X4 X1
+ //v3 : B3 B2 B2 B3 X3 X2 X2 X3
+
+ trn1 v4.8h, v0.8h, v1.8h
+ trn2 v5.8h, v0.8h, v1.8h
+ trn1 v6.8h, v2.8h, v3.8h
+ trn2 v7.8h, v2.8h, v3.8h
+
+ trn1 v0.4s, v4.4s, v6.4s
+ trn2 v2.4s, v4.4s, v6.4s
+ trn1 v1.4s, v5.4s, v7.4s
+ trn2 v3.4s, v5.4s, v7.4s
+
+ add v4.8h, v0.8h, v3.8h
+ add v5.8h, v1.8h, v2.8h
+ //v4 : S1 S2 B1 B2 A1 A2 X1 X2
+ //v5 : S4 S3 B4 B3 A4 A3 X4 X3
+
+ //compute sad for each 4x4 block
+ add v6.8h, v4.8h, v5.8h
+ addp v19.8h, v6.8h, v6.8h
+ //duplicate the sad into 128 bit so that we can compare using 128bit
+ add v31.4h, v31.4h, v19.4h
+
+ //sad_2 = sad_1<<1;
+ shl v28.8h, v19.8h, #1
+
+ //sad_2 - pu2_thrsh
+ sub v24.8h, v28.8h, v20.8h
+ sub v25.8h, v28.8h, v21.8h
+ sub v26.8h, v28.8h, v22.8h
+ sub v27.8h, v28.8h, v23.8h
+
+ trn1 v0.4s, v4.4s, v5.4s
+ trn2 v1.4s, v4.4s, v5.4s
+ //v0 : S1 S2 S4 S3 A1 A2 A4 A3
+ //v1 : B1 B2 B4 B3 X1 X2 X4 X3
+
+ trn1 v4.8h, v0.8h, v1.8h
+ trn2 v5.8h, v0.8h, v1.8h
+ //v4 : S1 B1 S4 B4 A1 X1 A4 X4
+ //v5 : S2 B2 S3 B3 A2 X2 A3 X3
+
+ mov v7.s[0], v4.s[1]
+ mov v7.s[1], v4.s[3]
+ mov v6.s[0], v5.s[1] // V4 //S1 B1 A1 X1
+ mov v6.s[1], v5.s[3] // V5 //S2 B2 A2 X2
+ mov v4.s[1], v4.s[2] // V6 //S3 B3 A3 X3
+ mov v5.s[1], v5.s[2] // V7 //S4 B4 A4 X4
+
+ shl v0.4h, v4.4h, #1 //S1<<1
+ shl v1.4h, v5.4h, #1 //S2<<1
+ shl v2.4h, v6.4h, #1 //S3<<1
+ shl v3.4h, v7.4h, #1 //S4<<1
+
+ add v8.4h, v5.4h, v6.4h //(s2[j] + s3[j]))
+ add v9.4h, v4.4h, v7.4h //(s1[j] + s4[j]))
+ add v10.4h, v6.4h, v7.4h //(s3[j] + s4[j]))
+ sub v11.4h, v6.4h, v0.4h //(s3[j] - (s1[j]<<1))
+ sub v12.4h, v7.4h, v1.4h //(s4[j] - (s2[j]<<1))
+ add v13.4h, v4.4h, v5.4h //(s1[j] + s2[j]))
+ sub v14.4h, v5.4h, v3.4h //(s2[j] - (s4[j]<<1)))
+ sub v15.4h, v4.4h, v2.4h //(s1[j] - (s3[j]<<1)))
+
+ mov v8.d[1], v9.d[0]
+ mov v10.d[1], v11.d[0]
+ mov v12.d[1], v13.d[0]
+ mov v14.d[1], v15.d[0]
+
+ cmge v0.8h, v24.8h, v8.8h //ls1 ls2
+ cmge v1.8h, v25.8h, v10.8h //ls3 ls4
+ cmge v2.8h, v26.8h, v12.8h //ls5 ls6
+ cmge v3.8h, v27.8h, v14.8h //ls7 ls8
+ cmge v4.4h, v19.4h, v29.4h //sad
+
+ orr v0.16b, v0.16b, v1.16b
+ orr v2.16b, v2.16b, v3.16b
+ orr v2.16b, v0.16b, v2.16b
+ xtn v2.8b, v2.8h
+ orr v2.8b, v2.8b, v4.8b
+
+ //if the comparison is non zero, out
+ mov x4, v2.d[0]
+ cmp x4, #0
+ bne core_loop_compute_sad_pre
+
+ subs x7, x7, #1
+ bne core_loop_satqd_ime_compute_satqd_16x16_lumainter
+ b satdq_end_func
+
+
+core_loop_compute_sad:
+ ld1 {v0.16b}, [x0], x2
+ ld1 {v1.16b}, [x1], x3
+ ld1 {v2.16b}, [x0], x2
+ ld1 {v3.16b}, [x1], x3
+
+ uabal v31.8h, v0.8b, v1.8b
+ uabal2 v31.8h, v0.16b, v1.16b
+
+ uabal v31.8h, v2.8b, v3.8b
+ uabal2 v31.8h, v2.16b, v3.16b
+
+ ld1 {v4.16b}, [x0], x2
+ ld1 {v5.16b}, [x1], x3
+ ld1 {v6.16b}, [x0], x2
+ ld1 {v7.16b}, [x1], x3
+
+ uabal v31.8h, v4.8b, v5.8b
+ uabal2 v31.8h, v4.16b, v5.16b
+
+ uabal v31.8h, v6.8b, v7.8b
+ uabal2 v31.8h, v6.16b, v7.16b
+
+core_loop_compute_sad_pre:
+ subs x7, x7, #1
+ bne core_loop_compute_sad
+
+satdq_end_func:
+
+ mov x7, #1
+ cmp x4, #0
+ csel x7, x4, x7, eq
+ str w7, [x6]
+
+ addp v31.8h, v31.8h, v31.8h
+ uaddlp v31.4s, v31.8h
+ addp v31.2s, v31.2s, v31.2s
+ st1 {v31.s}[0], [x5]
+
+
+ ldp d14, d15, [sp], #16
+ ldp d12, d13, [sp], #16
+ ldp d10, d11, [sp], #16
+ ldp d8, d9, [sp], #16
+ pop_v_regs
+ ret
+ .section .note.gnu-stack,"",%progbits
diff --git a/encoder/armv8/ime_platform_macros.h b/encoder/armv8/ime_platform_macros.h
new file mode 100755
index 0000000..0f5b2f2
--- /dev/null
+++ b/encoder/armv8/ime_platform_macros.h
@@ -0,0 +1,51 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ime_platform_macros.h
+*
+* @brief
+* Platform specific Macro definitions used in the codec
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef _IME_PLATFORM_MACROS_H_
+#define _IME_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Function macro definitions */
+/*****************************************************************************/
+
+#define USADA8(src,est,sad) \
+ sad += ABS(src[0]-est[0]) + \
+ ABS(src[1]-est[1]) + \
+ ABS(src[2]-est[2]) + \
+ ABS(src[3]-est[3])
+
+
+#endif /* _IH264_PLATFORM_MACROS_H_ */
diff --git a/encoder/ih264e.h b/encoder/ih264e.h
new file mode 100755
index 0000000..15a9d8f
--- /dev/null
+++ b/encoder/ih264e.h
@@ -0,0 +1,620 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* */
+/* File Name : ih264e.h */
+/* */
+/* Description : This file contains all the necessary structure and */
+/* enumeration definitions needed for the Application */
+/* Program Interface(API) of the Ittiam MPEG4 */
+/* Encoder on Cortex A8 - Neon platform */
+/* */
+/* List of Functions : ih264e_api_function */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 26 08 2010 100239(RCY) Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef _IH264E_H_
+#define _IH264E_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "iv2.h"
+#include "ive2.h"
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+
+/*****************************************************************************/
+/* API Function Prototype */
+/*****************************************************************************/
+IV_STATUS_T ih264e_api_function(iv_obj_t *ps_handle, void *pv_api_ip,void *pv_api_op);
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum
+{
+ IH264E_CMD_CTL_SET_ME_INFO_ENABLE,
+}IH264E_CMD_CTL_SUB_CMDS;
+
+
+/*****************************************************************************/
+/* Extended Structures */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Get Number of Memory Records */
+/*****************************************************************************/
+
+
+typedef struct
+{
+ iv_num_mem_rec_ip_t s_ive_ip;
+}ih264e_num_mem_rec_ip_t;
+
+
+typedef struct
+{
+ iv_num_mem_rec_op_t s_ive_op;
+}ih264e_num_mem_rec_op_t;
+
+
+/*****************************************************************************/
+/* Fill Memory Records */
+/*****************************************************************************/
+
+
+typedef struct
+{
+ iv_fill_mem_rec_ip_t s_ive_ip;
+}ih264e_fill_mem_rec_ip_t;
+
+
+typedef struct
+{
+ iv_fill_mem_rec_op_t s_ive_op;
+}ih264e_fill_mem_rec_op_t;
+
+/*****************************************************************************/
+/* Retrieve Memory Records */
+/*****************************************************************************/
+
+
+typedef struct
+{
+ iv_retrieve_mem_rec_ip_t s_ive_ip;
+}ih264e_retrieve_mem_rec_ip_t;
+
+
+typedef struct
+{
+ iv_retrieve_mem_rec_op_t s_ive_op;
+}ih264e_retrieve_mem_rec_op_t;
+
+
+/*****************************************************************************/
+/* Initialize encoder */
+/*****************************************************************************/
+
+typedef struct
+{
+ ive_init_ip_t s_ive_ip;
+}ih264e_init_ip_t;
+
+
+typedef struct
+{
+ ive_init_op_t s_ive_op;
+}ih264e_init_op_t;
+
+
+/*****************************************************************************/
+/* Queue Input raw buffer - Send the YUV buffer to be encoded */
+/*****************************************************************************/
+typedef struct
+{
+ ive_queue_inp_ip_t s_ive_ip;
+}ih264e_queue_inp_ip_t;
+
+typedef struct
+{
+ ive_queue_inp_op_t s_ive_op;
+}ih264e_queue_inp_op_t;
+
+/*****************************************************************************/
+/* Dequeue Input raw buffer - Get free YUV buffer from the encoder */
+/*****************************************************************************/
+typedef struct
+{
+ ive_dequeue_inp_ip_t s_ive_ip;
+}ih264e_dequeue_inp_ip_t;
+
+typedef struct
+{
+ ive_dequeue_inp_op_t s_ive_op;
+}ih264e_dequeue_inp_op_t;
+
+
+/*****************************************************************************/
+/* Queue Output bitstream buffer - Send the bistream buffer to be filled */
+/*****************************************************************************/
+typedef struct
+{
+ ive_queue_out_ip_t s_ive_ip;
+}ih264e_queue_out_ip_t;
+
+typedef struct
+{
+ ive_queue_out_op_t s_ive_op;
+}ih264e_queue_out_op_t;
+
+/*****************************************************************************/
+/* Dequeue Output bitstream buffer - Get the bistream buffer filled */
+/*****************************************************************************/
+typedef struct
+{
+ ive_dequeue_out_ip_t s_ive_ip;
+}ih264e_dequeue_out_ip_t;
+
+typedef struct
+{
+ ive_dequeue_out_op_t s_ive_op;
+}ih264e_dequeue_out_op_t;
+
+
+/*****************************************************************************/
+/* Get Recon data - Get the reconstructed data from encoder */
+/*****************************************************************************/
+typedef struct
+{
+ ive_get_recon_ip_t s_ive_ip;
+}ih264e_get_recon_ip_t;
+
+typedef struct
+{
+ ive_get_recon_op_t s_ive_op;
+}ih264e_get_recon_op_t;
+/*****************************************************************************/
+/* Video control Flush */
+/*****************************************************************************/
+
+
+typedef struct
+{
+ ive_ctl_flush_ip_t s_ive_ip;
+}ih264e_ctl_flush_ip_t;
+
+
+typedef struct
+{
+ ive_ctl_flush_op_t s_ive_op;
+}ih264e_ctl_flush_op_t;
+
+/*****************************************************************************/
+/* Video control reset */
+/*****************************************************************************/
+
+
+typedef struct
+{
+ ive_ctl_reset_ip_t s_ive_ip;
+}ih264e_ctl_reset_ip_t;
+
+
+typedef struct
+{
+ ive_ctl_reset_op_t s_ive_op;
+}ih264e_ctl_reset_op_t;
+
+
+/*****************************************************************************/
+/* Video control:Get Buf Info */
+/*****************************************************************************/
+
+
+typedef struct
+{
+ ive_ctl_getbufinfo_ip_t s_ive_ip;
+}ih264e_ctl_getbufinfo_ip_t;
+
+
+
+typedef struct
+{
+ ive_ctl_getbufinfo_op_t s_ive_op;
+}ih264e_ctl_getbufinfo_op_t;
+
+
+
+/*****************************************************************************/
+/* Video control:Get Version Info */
+/*****************************************************************************/
+
+
+typedef struct
+{
+ ive_ctl_getversioninfo_ip_t s_ive_ip;
+}ih264e_ctl_getversioninfo_ip_t;
+
+
+
+typedef struct
+{
+ ive_ctl_getversioninfo_op_t s_ive_op;
+}ih264e_ctl_getversioninfo_op_t;
+
+/*****************************************************************************/
+/* Video control:Set default params */
+/*****************************************************************************/
+
+
+typedef struct
+{
+ ive_ctl_setdefault_ip_t s_ive_ip;
+}ih264e_ctl_setdefault_ip_t;
+
+
+
+typedef struct
+{
+ ive_ctl_setdefault_op_t s_ive_op;
+}ih264e_ctl_setdefault_op_t;
+
+/*****************************************************************************/
+/* Video control Set IPE params */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_ipe_params_ip_t s_ive_ip;
+}ih264e_ctl_set_ipe_params_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_ipe_params_op_t s_ive_op;
+}ih264e_ctl_set_ipe_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set Frame dimensions */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_dimensions_ip_t s_ive_ip;
+}ih264e_ctl_set_dimensions_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_dimensions_op_t s_ive_op;
+}ih264e_ctl_set_dimensions_op_t;
+
+/*****************************************************************************/
+/* Video control Set Frame rates */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_frame_rate_ip_t s_ive_ip;
+}ih264e_ctl_set_frame_rate_ip_t;
+typedef struct
+{
+ ive_ctl_set_frame_rate_op_t s_ive_op;
+}ih264e_ctl_set_frame_rate_op_t;
+
+
+/*****************************************************************************/
+/* Video control Set Bitrate */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_bitrate_ip_t s_ive_ip;
+}ih264e_ctl_set_bitrate_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_bitrate_op_t s_ive_op;
+}ih264e_ctl_set_bitrate_op_t;
+
+
+/*****************************************************************************/
+/* Video control Set Frame type */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_frame_type_ip_t s_ive_ip;
+}ih264e_ctl_set_frame_type_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_frame_type_op_t s_ive_op;
+}ih264e_ctl_set_frame_type_op_t;
+
+/*****************************************************************************/
+/* Video control Set Encode mode */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_enc_mode_ip_t s_ive_ip;
+}ih264e_ctl_set_enc_mode_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_enc_mode_op_t s_ive_op;
+}ih264e_ctl_set_enc_mode_op_t;
+
+/*****************************************************************************/
+/* Video control Set QP */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_qp_ip_t s_ive_ip;
+}ih264e_ctl_set_qp_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_qp_op_t s_ive_op;
+}ih264e_ctl_set_qp_op_t;
+
+/*****************************************************************************/
+/* Video control Set AIR params */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_air_params_ip_t s_ive_ip;
+}ih264e_ctl_set_air_params_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_air_params_op_t s_ive_op;
+}ih264e_ctl_set_air_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set VBV params */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_vbv_params_ip_t s_ive_ip;
+}ih264e_ctl_set_vbv_params_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_vbv_params_op_t s_ive_op;
+}ih264e_ctl_set_vbv_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set Processor Details */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_num_cores_ip_t s_ive_ip;
+}ih264e_ctl_set_num_cores_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_num_cores_op_t s_ive_op;
+}ih264e_ctl_set_num_cores_op_t;
+
+/*****************************************************************************/
+/* Video control Set Motion estimation params */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_me_params_ip_t s_ive_ip;
+}ih264e_ctl_set_me_params_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_me_params_op_t s_ive_op;
+}ih264e_ctl_set_me_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set GOP params */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_gop_params_ip_t s_ive_ip;
+}ih264e_ctl_set_gop_params_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_gop_params_op_t s_ive_op;
+}ih264e_ctl_set_gop_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set Deblock params */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_deblock_params_ip_t s_ive_ip;
+}ih264e_ctl_set_deblock_params_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_deblock_params_op_t s_ive_op;
+}ih264e_ctl_set_deblock_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set Profile params */
+/*****************************************************************************/
+typedef struct
+{
+ ive_ctl_set_profile_params_ip_t s_ive_ip;
+}ih264e_ctl_set_profile_params_ip_t;
+
+typedef struct
+{
+ ive_ctl_set_profile_params_op_t s_ive_op;
+}ih264e_ctl_set_profile_params_op_t;
+
+/*****************************************************************************/
+/* Synchronous video encode call */
+/*****************************************************************************/
+typedef struct
+{
+ ive_video_encode_ip_t s_ive_ip;
+}ih264e_video_encode_ip_t;
+
+typedef struct
+{
+ ive_video_encode_op_t s_ive_op;
+}ih264e_video_encode_op_t;
+
+
+/* The enum values should not have greater than 8 bits as this is assigned to WORD8 */
+typedef enum
+{
+ INTRA16x16 = 0,
+ INTRA4x4,
+ INTER16x16
+}IV_MB_TYPE_T;
+
+/*****************************************************************************/
+/* Pic info structures */
+/*****************************************************************************/
+typedef struct
+{
+ /** Qp */
+ UWORD32 u4_qp;
+
+ /** Pic Type */
+ IV_PICTURE_CODING_TYPE_T e_frame_type;
+
+}ih264e_pic_info1_t;
+
+typedef struct
+{
+ /** Qp */
+ UWORD32 u4_qp;
+
+ /** Pic Type */
+ IV_PICTURE_CODING_TYPE_T e_frame_type;
+
+ /** Disable deblock level (0: Enable completely, 3: Disable completely */
+ UWORD32 u4_disable_deblock_level;
+
+}ih264e_pic_info2_t;
+
+
+/*****************************************************************************/
+/* MB info structures */
+/*****************************************************************************/
+typedef struct
+{
+ /** MV X */
+ WORD16 i2_mv_x;
+
+ /** MV Y */
+ WORD16 i2_mv_y;
+}ih264e_mv_t;
+
+typedef struct
+{
+ /** Intra / Inter */
+ WORD8 i1_mb_type;
+ union
+ {
+ ih264e_mv_t as_mv[1];
+
+ /** Intra mode */
+ WORD8 ai1_intra_mode[1];
+ };
+}ih264e_mb_info1_t;
+
+typedef struct
+{
+ /** Intra / Inter */
+ WORD8 i1_mb_type;
+
+
+ /** SAD */
+ UWORD16 u2_sad;
+
+ union
+ {
+ ih264e_mv_t as_mv[1];
+
+ /** Intra mode */
+ WORD8 ai1_intra_mode[1];
+ };
+
+
+}ih264e_mb_info2_t;
+
+typedef struct
+{
+ /** Intra / Inter */
+ WORD8 i1_mb_type;
+
+ union
+ {
+ ih264e_mv_t as_mv[4];
+
+ /** Intra mode */
+ WORD8 ai1_intra_mode[16];
+ };
+
+}ih264e_mb_info3_t;
+
+typedef struct
+{
+ /** Intra / Inter */
+ WORD8 i1_mb_type;
+
+ /** Intra Mode */
+ WORD8 i1_intra_mode;
+
+ /** SAD */
+ UWORD16 u2_sad;
+
+ union
+ {
+ ih264e_mv_t as_mv[16];
+
+ /** Intra mode */
+ WORD8 ai1_intra_mode[16];
+ };
+
+
+
+}ih264e_mb_info4_t;
+
+/* Add any new structures to the following union. It is used to calculate the max size needed for allocation of memory */
+typedef struct
+{
+ union
+ {
+ ih264e_mb_info1_t s_mb_info1;
+ ih264e_mb_info2_t s_mb_info2;
+ ih264e_mb_info3_t s_mb_info3;
+ ih264e_mb_info4_t s_mb_info4;
+ };
+}ih264e_mb_info_t;
+
+#ifdef __cplusplus
+} /* closing brace for extern "C" */
+#endif
+#endif /* _IH264E_H_ */
diff --git a/encoder/ih264e_api.c b/encoder/ih264e_api.c
new file mode 100755
index 0000000..e5c66ea
--- /dev/null
+++ b/encoder/ih264e_api.c
@@ -0,0 +1,5559 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_api.c
+*
+* @brief
+* Contains api function definitions for H264 encoder
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - api_check_struct_sanity()
+* - ih264e_codec_update_config()
+* - ih264e_set_default_params()
+* - ih264e_init()
+* - ih264e_get_num_rec()
+* - ih264e_fill_num_mem_rec()
+* - ih264e_init_mem_rec()
+* - ih264e_retrieve_memrec()
+* - ih264e_set_flush_mode()
+* - ih264e_get_buf_info()
+* - ih264e_set_dimensions()
+* - ih264e_set_frame_rate()
+* - ih264e_set_bit_rate()
+* - ih264e_set_frame_type()
+* - ih264e_set_qp()
+* - ih264e_set_enc_mode()
+* - ih264e_set_vbv_params()
+* - ih264_set_air_params()
+* - ih264_set_me_params()
+* - ih264_set_ipe_params()
+* - ih264_set_gop_params()
+* - ih264_set_profile_params()
+* - ih264_set_deblock_params()
+* - ih264e_set_num_cores()
+* - ih264e_reset()
+* - ih264e_ctl()
+* - ih264e_api_function()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include Files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+/* User Include Files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "ih264_size_defs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ithread.h"
+#include "ih264_debug.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_macros.h"
+#include "ih264e_defs.h"
+#include "ih264e_globals.h"
+#include "ih264_buf_mgr.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "irc_rate_control_api.h"
+#include "ih264e_time_stamp.h"
+#include "ih264e_modify_frm_rate.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_defs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264e_structs.h"
+#include "ih264e_utils.h"
+#include "ih264e_core_coding.h"
+#include "ih264_buf_mgr.h"
+#include "ih264_platform_macros.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_list.h"
+#include "ih264_dpb_mgr.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264_common_tables.h"
+#include "ih264e_master.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_version.h"
+
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+WORD32 ih264e_get_rate_control_mem_tab(void *pv_rate_control,
+ iv_mem_rec_t *ps_mem,
+ ITT_FUNC_TYPE_E e_func_type);
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* Used to test arguments for corresponding API call
+*
+* @par Description:
+* For each command the arguments are validated
+*
+* @param[in] ps_handle
+* Codec handle at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input structure
+*
+* @param[out] pv_api_op
+* Pointer to output structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T api_check_struct_sanity(iv_obj_t *ps_handle,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+ /* api call */
+ WORD32 command = IV_CMD_NA;
+
+ /* input structure expected by the api call */
+ UWORD32 *pu4_api_ip = pv_api_ip;
+
+ /* output structure expected by the api call */
+ UWORD32 *pu4_api_op = pv_api_op;
+
+ /* temp var */
+ WORD32 i, j;
+
+ if (NULL == pv_api_op || NULL == pv_api_ip)
+ {
+ return (IV_FAIL);
+ }
+
+ /* get command */
+ command = pu4_api_ip[1];
+
+ /* set error code */
+ pu4_api_op[1] = 0;
+
+ /* error checks on handle */
+ switch (command)
+ {
+ case IV_CMD_GET_NUM_MEM_REC:
+ case IV_CMD_FILL_NUM_MEM_REC:
+ break;
+
+ case IV_CMD_INIT:
+ if (ps_handle == NULL)
+ {
+ *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVE_ERR_HANDLE_NULL;
+ return IV_FAIL;
+ }
+
+ if (ps_handle->u4_size != sizeof(iv_obj_t))
+ {
+ *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVE_ERR_HANDLE_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ break;
+
+ case IVE_CMD_QUEUE_INPUT:
+ case IVE_CMD_QUEUE_OUTPUT:
+ case IVE_CMD_DEQUEUE_OUTPUT:
+ case IVE_CMD_GET_RECON:
+ case IV_CMD_RETRIEVE_MEMREC:
+ case IVE_CMD_VIDEO_CTL:
+ case IVE_CMD_VIDEO_ENCODE:
+
+ if (ps_handle == NULL)
+ {
+ *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVE_ERR_HANDLE_NULL;
+ return IV_FAIL;
+ }
+
+ if (ps_handle->u4_size != sizeof(iv_obj_t))
+ {
+ *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVE_ERR_HANDLE_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_handle->pv_fxns != ih264e_api_function)
+ {
+ *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVE_ERR_API_FUNCTION_PTR_NULL;
+ return IV_FAIL;
+ }
+
+ if (ps_handle->pv_codec_handle == NULL)
+ {
+ *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVE_ERR_INVALID_CODEC_HANDLE;
+ return IV_FAIL;
+ }
+ break;
+
+ default:
+ *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVE_ERR_INVALID_API_CMD;
+ return IV_FAIL;
+ }
+
+ /* error checks on input output structures */
+ switch (command)
+ {
+ case IV_CMD_GET_NUM_MEM_REC:
+ {
+ ih264e_num_mem_rec_ip_t *ps_ip = pv_api_ip;
+ ih264e_num_mem_rec_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ if (ps_ip->s_ive_ip.u4_size != sizeof(ih264e_num_mem_rec_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_GET_MEM_REC_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if (ps_op->s_ive_op.u4_size != sizeof(ih264e_num_mem_rec_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_GET_MEM_REC_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+ break;
+ }
+
+ case IV_CMD_FILL_NUM_MEM_REC:
+ {
+ ih264e_fill_mem_rec_ip_t *ps_ip = pv_api_ip;
+ ih264e_fill_mem_rec_op_t *ps_op = pv_api_op;
+
+ iv_mem_rec_t *ps_mem_rec = NULL;
+
+ WORD32 max_wd = ALIGN16(ps_ip->s_ive_ip.u4_max_wd);
+ WORD32 max_ht = ALIGN16(ps_ip->s_ive_ip.u4_max_ht);
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ if (ps_ip->s_ive_ip.u4_size != sizeof(ih264e_fill_mem_rec_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_FILL_MEM_REC_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if (ps_op->s_ive_op.u4_size != sizeof(ih264e_fill_mem_rec_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_FILL_MEM_REC_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if (max_wd < MIN_WD || max_wd > MAX_WD)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_WIDTH_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (max_ht < MIN_HT || max_ht > MAX_HT)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_HEIGHT_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ /* verify number of mem rec ptr */
+ if (NULL == ps_ip->s_ive_ip.ps_mem_rec)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_FILL_NUM_MEM_RECS_POINTER_NULL;
+ return (IV_FAIL);
+ }
+
+ /* verify number of mem records */
+ if (ps_ip->s_ive_ip.u4_num_mem_rec != MEM_REC_CNT)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_NUM_MEM_REC_NOT_SUFFICIENT;
+ return IV_FAIL;
+ }
+
+ /* check mem records sizes are correct */
+ ps_mem_rec = ps_ip->s_ive_ip.ps_mem_rec;
+ for (i = 0; i < MEM_REC_CNT; i++)
+ {
+ if (ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_MEM_REC_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ }
+ break;
+ }
+
+ case IV_CMD_INIT:
+ {
+ ih264e_init_ip_t *ps_ip = pv_api_ip;
+ ih264e_init_op_t *ps_op = pv_api_op;
+
+ iv_mem_rec_t *ps_mem_rec = NULL;
+
+ WORD32 max_wd = ALIGN16(ps_ip->s_ive_ip.u4_max_wd);
+ WORD32 max_ht = ALIGN16(ps_ip->s_ive_ip.u4_max_ht);
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ if (ps_ip->s_ive_ip.u4_size != sizeof(ih264e_init_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_INIT_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if (ps_op->s_ive_op.u4_size != sizeof(ih264e_init_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_INIT_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if (max_wd < MIN_WD || max_wd > MAX_WD)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_WIDTH_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (max_ht < MIN_HT || max_ht > MAX_HT)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_HEIGHT_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_ref_cnt != 1)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REF_UNSUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_reorder_cnt != 0)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REORDER_UNSUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if ((ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_10)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_1B)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_11)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_12)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_13)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_20)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_21)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_22)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_30)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_31)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_32)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_40)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_41)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_42)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_50)
+ && (ps_ip->s_ive_ip.u4_max_level != IH264_LEVEL_51))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_CODEC_LEVEL_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if ((ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420P)
+ && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_422ILE)
+ && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420SP_UV)
+ && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420SP_VU))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INPUT_CHROMA_FORMAT_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if ((ps_ip->s_ive_ip.e_recon_color_fmt != IV_YUV_420P)
+ && (ps_ip->s_ive_ip.e_recon_color_fmt != IV_YUV_420SP_UV)
+ && (ps_ip->s_ive_ip.e_recon_color_fmt != IV_YUV_420SP_VU))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_RECON_CHROMA_FORMAT_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if ((ps_ip->s_ive_ip.e_rc_mode != IVE_RC_NONE)
+ && (ps_ip->s_ive_ip.e_rc_mode != IVE_RC_STORAGE)
+ && (ps_ip->s_ive_ip.e_rc_mode != IVE_RC_CBR_NON_LOW_DELAY))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_RATE_CONTROL_MODE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_framerate > DEFAULT_MAX_FRAMERATE)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_FRAME_RATE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_bitrate > DEFAULT_MAX_BITRATE)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_BITRATE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_num_bframes != 0)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_BFRAMES_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.e_content_type != IV_PROGRESSIVE)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_CONTENT_TYPE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_srch_rng_x > DEFAULT_MAX_SRCH_RANGE_X)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_HORIZONTAL_SEARCH_RANGE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_srch_rng_y > DEFAULT_MAX_SRCH_RANGE_Y)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_VERTICAL_SEARCH_RANGE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if ((ps_ip->s_ive_ip.e_slice_mode != IVE_SLICE_MODE_NONE)
+ && (ps_ip->s_ive_ip.e_slice_mode != IVE_SLICE_MODE_BLOCKS))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_SLICE_TYPE_INPUT_INVALID;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
+ {
+ if (ps_ip->s_ive_ip.u4_slice_param == 0
+ || ps_ip->s_ive_ip.u4_slice_param > ((UWORD32)max_ht >> 4))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_SLICE_PARAM_INPUT_INVALID;
+ return (IV_FAIL);
+ }
+ }
+
+ if (NULL == ps_ip->s_ive_ip.ps_mem_rec)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_FILL_NUM_MEM_RECS_POINTER_NULL;
+ return (IV_FAIL);
+ }
+
+ /* verify number of mem records */
+ if (ps_ip->s_ive_ip.u4_num_mem_rec != MEM_REC_CNT)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_NUM_MEM_REC_NOT_SUFFICIENT;
+ return (IV_FAIL);
+ }
+
+ ps_mem_rec = ps_ip->s_ive_ip.ps_mem_rec;
+
+ /* check memrecords sizes are correct */
+ for (i = 0; i <((WORD32)ps_ip->s_ive_ip.u4_num_mem_rec); i++)
+ {
+ if (ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_MEM_REC_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ /* check memrecords pointers are not NULL */
+ if (ps_mem_rec[i].pv_base == NULL)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_MEM_REC_BASE_POINTER_NULL;
+ return IV_FAIL;
+ }
+ }
+
+ /* verify memtabs for overlapping regions */
+ {
+ void *start[MEM_REC_CNT];
+ void *end[MEM_REC_CNT];
+
+ start[0] = (ps_mem_rec[0].pv_base);
+ end[0] = ((UWORD8 *) ps_mem_rec[0].pv_base)
+ + ps_mem_rec[0].u4_mem_size - 1;
+
+ for (i = 1; i < MEM_REC_CNT; i++)
+ {
+ /* This array is populated to check memtab overlap */
+ start[i] = (ps_mem_rec[i].pv_base);
+ end[i] = ((UWORD8 *) ps_mem_rec[i].pv_base)
+ + ps_mem_rec[i].u4_mem_size - 1;
+
+ for (j = 0; j < i; j++)
+ {
+ if ((start[i] >= start[j]) && (start[i] <= end[j]))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_MEM_REC_OVERLAP_ERR;
+ return IV_FAIL;
+ }
+
+ if ((end[i] >= start[j]) && (end[i] <= end[j]))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_MEM_REC_OVERLAP_ERR;
+ return IV_FAIL;
+ }
+
+ if ((start[i] < start[j]) && (end[i] > end[j]))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_MEM_REC_OVERLAP_ERR;
+ return IV_FAIL;
+ }
+ }
+ }
+ }
+
+ /* re-validate mem records with init config */
+ {
+ /* mem records */
+ iv_mem_rec_t s_mem_rec_ittiam_api[MEM_REC_CNT];
+
+ /* api interface structs */
+ ih264e_fill_mem_rec_ip_t s_ip;
+ ih264e_fill_mem_rec_op_t s_op;
+
+ /* error status */
+ IV_STATUS_T e_status;
+
+ /* temp var */
+ WORD32 i;
+
+ s_ip.s_ive_ip.u4_size = sizeof(ih264e_fill_mem_rec_ip_t);
+ s_op.s_ive_op.u4_size = sizeof(ih264e_fill_mem_rec_op_t);
+
+ s_ip.s_ive_ip.e_cmd = IV_CMD_FILL_NUM_MEM_REC;
+ s_ip.s_ive_ip.ps_mem_rec = s_mem_rec_ittiam_api;
+ s_ip.s_ive_ip.u4_max_wd = max_wd;
+ s_ip.s_ive_ip.u4_max_ht = max_ht;
+ s_ip.s_ive_ip.u4_num_mem_rec = ps_ip->s_ive_ip.u4_num_mem_rec;
+ s_ip.s_ive_ip.u4_max_level = ps_ip->s_ive_ip.u4_max_level;
+ s_ip.s_ive_ip.u4_max_ref_cnt = ps_ip->s_ive_ip.u4_max_ref_cnt;
+ s_ip.s_ive_ip.u4_max_reorder_cnt =
+ ps_ip->s_ive_ip.u4_max_reorder_cnt;
+ s_ip.s_ive_ip.e_color_format = ps_ip->s_ive_ip.e_inp_color_fmt;
+ s_ip.s_ive_ip.u4_max_srch_rng_x =
+ ps_ip->s_ive_ip.u4_max_srch_rng_x;
+ s_ip.s_ive_ip.u4_max_srch_rng_y =
+ ps_ip->s_ive_ip.u4_max_srch_rng_y;
+
+ for (i = 0; i < MEM_REC_CNT; i++)
+ {
+ s_mem_rec_ittiam_api[i].u4_size = sizeof(iv_mem_rec_t);
+ }
+
+ /* fill mem records */
+ e_status = ih264e_api_function(NULL, (void *) &s_ip,
+ (void *) &s_op);
+
+ if (IV_FAIL == e_status)
+ {
+ ps_op->s_ive_op.u4_error_code = s_op.s_ive_op.u4_error_code;
+ return (IV_FAIL);
+ }
+
+ /* verify mem records */
+ for (i = 0; i < MEM_REC_CNT; i++)
+ {
+ if (ps_mem_rec[i].u4_mem_size
+ < s_mem_rec_ittiam_api[i].u4_mem_size)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_MEM_REC_INSUFFICIENT_SIZE;
+
+ return IV_FAIL;
+ }
+
+ if (ps_mem_rec[i].u4_mem_alignment
+ != s_mem_rec_ittiam_api[i].u4_mem_alignment)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_MEM_REC_ALIGNMENT_ERR;
+
+ return IV_FAIL;
+ }
+
+ if (ps_mem_rec[i].e_mem_type
+ != s_mem_rec_ittiam_api[i].e_mem_type)
+ {
+ UWORD32 check = IV_SUCCESS;
+ UWORD32 diff = s_mem_rec_ittiam_api[i].e_mem_type
+ - ps_mem_rec[i].e_mem_type;
+
+ if ((ps_mem_rec[i].e_mem_type
+ <= IV_EXTERNAL_CACHEABLE_SCRATCH_MEM)
+ && (s_mem_rec_ittiam_api[i].e_mem_type
+ >= IV_INTERNAL_NONCACHEABLE_PERSISTENT_MEM))
+ {
+ check = IV_FAIL;
+ }
+
+ if (3 != (s_mem_rec_ittiam_api[i].e_mem_type % 4))
+ {
+ /* It is not IV_EXTERNAL_NONCACHEABLE_PERSISTENT_MEM or
+ * IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM */
+
+ if ((diff < 1) || (diff > 3))
+ {
+ /* Difference between 1 and 3 is okay for all cases other than the
+ * two filtered with the MOD condition above */
+ check = IV_FAIL;
+ }
+ }
+ else
+ {
+ if (diff == 1)
+ {
+ /* This particular case is when codec asked for External Persistent,
+ * but got Internal Scratch */
+ check = IV_FAIL;
+ }
+ if ((diff != 2) && (diff != 3))
+ {
+ check = IV_FAIL;
+ }
+ }
+
+ if (check == IV_FAIL)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_MEM_REC_INCORRECT_TYPE;
+
+ return IV_FAIL;
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ case IVE_CMD_QUEUE_INPUT:
+ case IVE_CMD_QUEUE_OUTPUT:
+ case IVE_CMD_DEQUEUE_OUTPUT:
+ case IVE_CMD_GET_RECON:
+ break;
+
+ case IV_CMD_RETRIEVE_MEMREC:
+ {
+ ih264e_retrieve_mem_rec_ip_t *ps_ip = pv_api_ip;
+ ih264e_retrieve_mem_rec_op_t *ps_op = pv_api_op;
+
+ iv_mem_rec_t *ps_mem_rec = NULL;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ if (ps_ip->s_ive_ip.u4_size != sizeof(ih264e_retrieve_mem_rec_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_RETRIEVE_MEM_REC_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if (ps_op->s_ive_op.u4_size != sizeof(ih264e_retrieve_mem_rec_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_RETRIEVE_MEM_REC_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if (NULL == ps_ip->s_ive_ip.ps_mem_rec)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_FILL_NUM_MEM_RECS_POINTER_NULL;
+ return (IV_FAIL);
+ }
+
+ ps_mem_rec = ps_ip->s_ive_ip.ps_mem_rec;
+
+ /* check memrecords sizes are correct */
+ for (i = 0; i < MEM_REC_CNT; i++)
+ {
+ if (ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_MEM_REC_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ }
+ break;
+ }
+
+ case IVE_CMD_VIDEO_ENCODE:
+ {
+ ih264e_video_encode_ip_t *ps_ip = pv_api_ip;
+ ih264e_video_encode_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size != sizeof(ih264e_video_encode_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_ENCODE_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+
+ if (ps_op->s_ive_op.u4_size != sizeof(ih264e_video_encode_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_ENCODE_API_STRUCT_SIZE_INCORRECT;
+ return (IV_FAIL);
+ }
+ break;
+ }
+
+ case IVE_CMD_VIDEO_CTL:
+ {
+ /* ptr to input structure */
+ WORD32 *pu4_ptr_cmd = pv_api_ip;
+
+ /* sub command */
+ WORD32 sub_command = pu4_ptr_cmd[2];
+
+ switch (sub_command)
+ {
+ case IVE_CMD_CTL_SETDEFAULT:
+ {
+ ih264e_ctl_setdefault_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_setdefault_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_setdefault_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETDEF_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_setdefault_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETDEF_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+ break;
+ }
+
+ case IVE_CMD_CTL_GETBUFINFO:
+ {
+ codec_t *ps_codec = (codec_t *) (ps_handle->pv_codec_handle);
+
+ ih264e_ctl_getbufinfo_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_getbufinfo_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_getbufinfo_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_GETBUFINFO_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_getbufinfo_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_GETBUFINFO_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_wd < MIN_WD)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_WIDTH_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_wd > ps_codec->s_cfg.u4_max_wd)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_WIDTH_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_ht < MIN_HT)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_HEIGHT_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_ht > ps_codec->s_cfg.u4_max_ht)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_HEIGHT_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if ((ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420P)
+ && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_422ILE)
+ && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420SP_UV)
+ && (ps_ip->s_ive_ip.e_inp_color_fmt != IV_YUV_420SP_VU))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INPUT_CHROMA_FORMAT_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+ break;
+ }
+
+ case IVE_CMD_CTL_GETVERSION:
+ {
+ ih264e_ctl_getversioninfo_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_getversioninfo_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_getversioninfo_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_GETVERSION_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_getversioninfo_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_GETVERSION_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_ip->s_ive_ip.pu1_version == NULL)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_CTL_GET_VERSION_BUFFER_IS_NULL;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_FLUSH:
+ {
+ ih264e_ctl_flush_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_flush_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_flush_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_FLUSH_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_flush_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_FLUSH_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_RESET:
+ {
+ ih264e_ctl_reset_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_reset_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_reset_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_RESET_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_reset_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_RESET_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_NUM_CORES:
+ {
+ ih264e_ctl_set_num_cores_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_num_cores_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_num_cores_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETCORES_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_num_cores_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETCORES_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_num_cores < 1)
+ || (ps_ip->s_ive_ip.u4_num_cores > MAX_NUM_CORES))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_NUM_CORES;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_DIMENSIONS:
+ {
+ codec_t *ps_codec = (codec_t *) (ps_handle->pv_codec_handle);
+
+ ih264e_ctl_set_dimensions_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_dimensions_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_dimensions_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETDIM_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_dimensions_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETDIM_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_ip->s_ive_ip.u4_wd < MIN_WD)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_WIDTH_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_wd > ps_codec->s_cfg.u4_max_wd)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_WIDTH_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_ht < MIN_HT)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_HEIGHT_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_ht > ps_codec->s_cfg.u4_max_ht)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_HEIGHT_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_FRAMERATE:
+ {
+ ih264e_ctl_set_frame_rate_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_frame_rate_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_frame_rate_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETFRAMERATE_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_frame_rate_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETFRAMERATE_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (((ps_ip->s_ive_ip.u4_src_frame_rate * 1000) > DEFAULT_MAX_FRAMERATE)
+ || ((ps_ip->s_ive_ip.u4_tgt_frame_rate * 1000) > DEFAULT_MAX_FRAMERATE))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_FRAME_RATE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if ((ps_ip->s_ive_ip.u4_src_frame_rate == 0)
+ || (ps_ip->s_ive_ip.u4_tgt_frame_rate == 0))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_FRAME_RATE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_tgt_frame_rate
+ > ps_ip->s_ive_ip.u4_src_frame_rate)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_TGT_FRAME_RATE_EXCEEDS_SRC_FRAME_RATE;
+ return (IV_FAIL);
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_BITRATE:
+ {
+ ih264e_ctl_set_bitrate_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_bitrate_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_bitrate_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETBITRATE_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_bitrate_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETBITRATE_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_target_bitrate > DEFAULT_MAX_BITRATE)
+ || (ps_ip->s_ive_ip.u4_target_bitrate == 0))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_BITRATE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_FRAMETYPE:
+ {
+ ih264e_ctl_set_frame_type_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_frame_type_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_frame_type_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETFRAMETYPE_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_frame_type_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETFRAMETYPE_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.e_frame_type != IV_NA_FRAME)
+ && (ps_ip->s_ive_ip.e_frame_type != IV_I_FRAME)
+ && (ps_ip->s_ive_ip.e_frame_type != IV_P_FRAME)
+ && (ps_ip->s_ive_ip.e_frame_type != IV_IDR_FRAME))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_FORCE_FRAME_INPUT;
+ return IV_FAIL;
+ }
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_ME_PARAMS:
+ {
+ codec_t *ps_codec = (codec_t *) (ps_handle->pv_codec_handle);
+
+ ih264e_ctl_set_me_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_me_params_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_me_params_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETMEPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_me_params_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETMEPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_me_speed_preset != FULL_SRCH)
+ && (ps_ip->s_ive_ip.u4_me_speed_preset != DMND_SRCH)
+ && (ps_ip->s_ive_ip.u4_me_speed_preset != HEX_SRCH))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_ME_SPEED_PRESET;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_enable_hpel != 0)
+ && (ps_ip->s_ive_ip.u4_enable_hpel != 1))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_HALFPEL_OPTION;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_enable_qpel != 0)
+ && (ps_ip->s_ive_ip.u4_enable_qpel != 1))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_QPEL_OPTION;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_enable_fast_sad != 0)
+ && (ps_ip->s_ive_ip.u4_enable_fast_sad != 1))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_FAST_SAD_OPTION;
+ return IV_FAIL;
+ }
+
+ if (ps_ip->s_ive_ip.u4_enable_alt_ref > 255)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_ALT_REF_OPTION;
+ return IV_FAIL;
+ }
+
+ if (ps_ip->s_ive_ip.u4_srch_rng_x
+ > ps_codec->s_cfg.u4_max_srch_rng_x)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_HORIZONTAL_SEARCH_RANGE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ if (ps_ip->s_ive_ip.u4_srch_rng_y
+ > ps_codec->s_cfg.u4_max_srch_rng_y)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_VERTICAL_SEARCH_RANGE_NOT_SUPPORTED;
+ return (IV_FAIL);
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_IPE_PARAMS:
+ {
+ ih264e_ctl_set_ipe_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_ipe_params_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_ipe_params_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETIPEPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_ipe_params_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETIPEPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_enable_intra_4x4 != 0)
+ && (ps_ip->s_ive_ip.u4_enable_intra_4x4 != 1))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_INTRA4x4_OPTION;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_CONFIG)
+ && (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_SLOWEST)
+ && (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_NORMAL)
+ && (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_FAST)
+ && (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_HIGH_SPEED)
+ && (ps_ip->s_ive_ip.u4_enc_speed_preset != IVE_FASTEST))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_ENC_SPEED_PRESET;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_GOP_PARAMS:
+ {
+ ih264e_ctl_set_gop_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_gop_params_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_gop_params_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETGOPPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_gop_params_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETGOPPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_i_frm_interval < DEFAULT_MIN_INTRA_FRAME_RATE)
+ || (ps_ip->s_ive_ip.u4_i_frm_interval > DEFAULT_MAX_INTRA_FRAME_RATE))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_INTRA_FRAME_INTERVAL;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_idr_frm_interval < DEFAULT_MIN_INTRA_FRAME_RATE)
+ || (ps_ip->s_ive_ip.u4_idr_frm_interval > DEFAULT_MAX_INTRA_FRAME_RATE))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_IDR_FRAME_INTERVAL;
+ return IV_FAIL;
+ }
+
+ if (ps_ip->s_ive_ip.u4_num_b_frames != 0)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_BFRAMES_NOT_SUPPORTED;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_DEBLOCK_PARAMS:
+ {
+ ih264e_ctl_set_deblock_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_deblock_params_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_deblock_params_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETDEBLKPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_deblock_params_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETDEBLKPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_disable_deblock_level != DISABLE_DEBLK_LEVEL_0)
+ && (ps_ip->s_ive_ip.u4_disable_deblock_level != DISABLE_DEBLK_LEVEL_2)
+ && (ps_ip->s_ive_ip.u4_disable_deblock_level != DISABLE_DEBLK_LEVEL_3)
+ && (ps_ip->s_ive_ip.u4_disable_deblock_level != DISABLE_DEBLK_LEVEL_4))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_DEBLOCKING_TYPE_INPUT;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_QP:
+ {
+ ih264e_ctl_set_qp_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_qp_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_qp_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETQPPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_qp_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETQPPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_i_qp_max > MAX_H264_QP)
+ || (ps_ip->s_ive_ip.u4_p_qp_max > MAX_H264_QP)
+ || (ps_ip->s_ive_ip.u4_b_qp_max > MAX_H264_QP))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_MAX_FRAME_QP;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_i_qp_min > ps_ip->s_ive_ip.u4_i_qp_max)
+ || (ps_ip->s_ive_ip.u4_p_qp_min > ps_ip->s_ive_ip.u4_p_qp_max)
+ || (ps_ip->s_ive_ip.u4_b_qp_min > ps_ip->s_ive_ip.u4_b_qp_max))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_MIN_FRAME_QP;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_i_qp > ps_ip->s_ive_ip.u4_i_qp_max)
+ || (ps_ip->s_ive_ip.u4_p_qp > ps_ip->s_ive_ip.u4_p_qp_max)
+ || (ps_ip->s_ive_ip.u4_b_qp > ps_ip->s_ive_ip.u4_b_qp_max))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_INIT_QP;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_i_qp < ps_ip->s_ive_ip.u4_i_qp_min)
+ || (ps_ip->s_ive_ip.u4_p_qp < ps_ip->s_ive_ip.u4_p_qp_min)
+ || (ps_ip->s_ive_ip.u4_b_qp < ps_ip->s_ive_ip.u4_b_qp_min))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |= IH264E_INVALID_INIT_QP;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_ENC_MODE:
+ {
+ ih264e_ctl_set_enc_mode_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_enc_mode_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_enc_mode_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETENCMODE_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_enc_mode_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETENCMODE_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.e_enc_mode != IVE_ENC_MODE_HEADER)
+ && (ps_ip->s_ive_ip.e_enc_mode != IVE_ENC_MODE_PICTURE))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_ENC_OPERATION_MODE;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_VBV_PARAMS:
+ {
+ ih264e_ctl_set_vbv_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_vbv_params_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_vbv_params_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETVBVPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_vbv_params_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETVBVPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.u4_vbv_buffer_delay < DEFAULT_MIN_BUFFER_DELAY)
+ || (ps_ip->s_ive_ip.u4_vbv_buffer_delay > DEFAULT_MAX_BUFFER_DELAY))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_BUFFER_DELAY;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_AIR_PARAMS:
+ {
+ ih264e_ctl_set_air_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_air_params_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_air_params_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETAIRPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_air_params_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETAIRPARAMS_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if ((ps_ip->s_ive_ip.e_air_mode != IVE_AIR_MODE_NONE)
+ && (ps_ip->s_ive_ip.e_air_mode != IVE_AIR_MODE_CYCLIC)
+ && (ps_ip->s_ive_ip.e_air_mode != IVE_AIR_MODE_RANDOM))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_AIR_MODE;
+ return IV_FAIL;
+ }
+
+ if (ps_ip->s_ive_ip.u4_air_refresh_period == 0)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_INVALID_AIR_REFRESH_PERIOD;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_PROFILE_PARAMS:
+ {
+ ih264e_ctl_set_profile_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_profile_params_op_t *ps_op = pv_api_op;
+
+ if (ps_ip->s_ive_ip.u4_size
+ != sizeof(ih264e_ctl_set_profile_params_ip_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_IP_CTL_SETPROFILE_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_op->s_ive_op.u4_size
+ != sizeof(ih264e_ctl_set_profile_params_op_t))
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IVE_ERR_OP_CTL_SETPROFILE_API_STRUCT_SIZE_INCORRECT;
+ return IV_FAIL;
+ }
+
+ if (ps_ip->s_ive_ip.e_profile != IV_PROFILE_BASE)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1
+ << IVE_UNSUPPORTEDPARAM;
+ ps_op->s_ive_op.u4_error_code |=
+ IH264E_PROFILE_NOT_SUPPORTED;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ default:
+ *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVE_ERR_INVALID_API_SUB_CMD;
+ return IV_FAIL;
+ }
+
+ break;
+ }
+
+ default:
+ *(pu4_api_op + 1) |= 1 << IVE_UNSUPPORTEDPARAM;
+ *(pu4_api_op + 1) |= IVE_ERR_INVALID_API_CMD;
+ return IV_FAIL;
+ }
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief update encoder configuration parameters
+*
+* @par Description:
+* updates encoder configuration parameters from the given config set.
+* Initialize/reinitialize codec parameters according to new configurations.
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @param[in] ps_cfg
+* Pointer to config param set
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_codec_update_config(codec_t *ps_codec,
+ cfg_params_t *ps_cfg)
+{
+ /* config params */
+ cfg_params_t *ps_curr_cfg = &ps_codec->s_cfg;
+
+ /* error status */
+ IH264E_ERROR_T err = IH264E_SUCCESS;
+
+ /* temp var */
+ UWORD32 u4_init_rc = 0;
+
+ /***********************/
+ /* UPDATE CODEC CONFIG */
+ /***********************/
+ if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_DIMENSIONS)
+ {
+ UWORD32 wd_aln = ALIGN16(ps_cfg->u4_wd);
+ UWORD32 ht_aln = ALIGN16(ps_cfg->u4_ht);
+
+ if (ps_curr_cfg->u4_wd != wd_aln || ps_curr_cfg->u4_ht != ht_aln
+ || ps_curr_cfg->u4_strd != ps_cfg->u4_strd
+ || ps_curr_cfg->u4_disp_wd != ps_cfg->u4_disp_wd
+ || ps_curr_cfg->u4_disp_ht != ps_cfg->u4_disp_ht)
+ {
+ ps_curr_cfg->u4_wd = wd_aln;
+ ps_curr_cfg->u4_ht = ht_aln;
+ ps_curr_cfg->u4_strd = ps_cfg->u4_strd;
+
+ if (ps_curr_cfg->u4_strd == 0)
+ {
+ ps_curr_cfg->u4_strd = ps_curr_cfg->u4_wd;
+ }
+
+ ps_curr_cfg->u4_disp_wd = ps_cfg->u4_disp_wd;
+ ps_curr_cfg->u4_disp_ht = ps_cfg->u4_disp_ht;
+
+ ps_curr_cfg->i4_wd_mbs = ps_curr_cfg->u4_wd >> 4;
+ ps_curr_cfg->i4_ht_mbs = ps_curr_cfg->u4_ht >> 4;
+
+ ps_codec->i4_src_strd = ps_codec->s_cfg.u4_strd;
+ ps_codec->i4_rec_strd = ALIGN16(ps_cfg->u4_wd) + PAD_WD;
+
+ /* If number of MBs in a frame changes the air map also changes.
+ * Hence recompute air map also reset air pic cnt */
+ if (ps_codec->s_cfg.e_air_mode != IVE_AIR_MODE_NONE)
+ {
+ /* re-init the air map */
+ ih264e_init_air_map(ps_codec);
+
+ /* reset air counter */
+ ps_codec->i4_air_pic_cnt = -1;
+ }
+
+ /* initialize mv bank buffer manager */
+ err = ih264e_mv_buf_mgr_add_bufs(ps_codec);
+ if (err != IH264E_SUCCESS)
+ return err;
+
+ /* initialize ref bank buffer manager */
+ err = ih264e_pic_buf_mgr_add_bufs(ps_codec);
+ if (err != IH264E_SUCCESS)
+ return err;
+
+ /* since dimension changed, start new sequence by forcing IDR */
+ ps_codec->force_curr_frame_type = IV_IDR_FRAME;
+
+ /* in case dimension changes, we need to reinitialize RC as the
+ * old model shall not fit further */
+ u4_init_rc = 1;
+
+ /* when the dimension changes, the header needs to be regenerated */
+ ps_codec->i4_header_mode = 1;
+ }
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_FRAMERATE)
+ {
+ /* temp var */
+ UWORD32 u4_src_ticks, u4_tgt_ticks;
+
+ u4_src_ticks = ih264e_frame_time_get_src_ticks(
+ ps_codec->s_rate_control.pps_frame_time);
+
+ u4_tgt_ticks = ih264e_frame_time_get_tgt_ticks(
+ ps_codec->s_rate_control.pps_frame_time);
+
+ /* Change frame rate */
+ if (ps_codec->s_cfg.u4_src_frame_rate
+ != ps_cfg->u4_src_frame_rate * 1000)
+ {
+ ps_codec->s_cfg.u4_src_frame_rate = ps_cfg->u4_src_frame_rate
+ * 1000;
+
+ ih264e_frame_time_update_src_frame_rate(
+ ps_codec->s_rate_control.pps_frame_time,
+ ps_codec->s_cfg.u4_src_frame_rate);
+
+ ih264_time_stamp_update_frame_rate(
+ ps_codec->s_rate_control.pps_time_stamp,
+ ps_codec->s_cfg.u4_src_frame_rate);
+
+ irc_change_frame_rate(ps_codec->s_rate_control.pps_rate_control_api,
+ ps_codec->s_cfg.u4_src_frame_rate,
+ u4_src_ticks, u4_tgt_ticks);
+ }
+
+ if (ps_codec->s_cfg.u4_tgt_frame_rate
+ != ps_cfg->u4_tgt_frame_rate * 1000)
+ {
+ ps_codec->s_cfg.u4_tgt_frame_rate = ps_cfg->u4_tgt_frame_rate
+ * 1000;
+
+ ih264e_frame_time_update_tgt_frame_rate(
+ ps_codec->s_rate_control.pps_frame_time,
+ ps_codec->s_cfg.u4_tgt_frame_rate);
+
+ irc_change_frame_rate(ps_codec->s_rate_control.pps_rate_control_api,
+ ps_codec->s_cfg.u4_src_frame_rate,
+ u4_src_ticks, u4_tgt_ticks);
+
+ irc_change_frm_rate_for_bit_alloc(
+ ps_codec->s_rate_control.pps_rate_control_api,
+ ps_codec->s_cfg.u4_tgt_frame_rate);
+ }
+
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_BITRATE)
+ {
+ if (ps_curr_cfg->u4_target_bitrate != ps_cfg->u4_target_bitrate)
+ {
+ if (IVE_RC_NONE != ps_curr_cfg->e_rc_mode)
+ irc_change_avg_bit_rate(
+ ps_codec->s_rate_control.pps_rate_control_api,
+ ps_cfg->u4_target_bitrate);
+
+ ps_curr_cfg->u4_target_bitrate = ps_cfg->u4_target_bitrate;
+ }
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_FRAMETYPE)
+ {
+ switch (ps_cfg->e_frame_type)
+ {
+ case IV_I_FRAME:
+ ps_codec->force_curr_frame_type = IV_I_FRAME;
+ break;
+
+ case IV_IDR_FRAME:
+ ps_codec->force_curr_frame_type = IV_IDR_FRAME;
+ break;
+
+ case IV_P_FRAME:
+ default:
+ break;
+ }
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_ME_PARAMS)
+ {
+ if (ps_curr_cfg->u4_enc_speed_preset == IVE_CONFIG)
+ {
+ ps_codec->s_cfg.u4_enable_hpel = ps_cfg->u4_enable_hpel;
+ ps_codec->s_cfg.u4_enable_fast_sad = ps_cfg->u4_enable_fast_sad;
+ ps_codec->s_cfg.u4_me_speed_preset = ps_cfg->u4_me_speed_preset;
+ ps_codec->s_cfg.u4_enable_qpel = ps_cfg->u4_enable_qpel;
+ }
+ else if (ps_curr_cfg->u4_enc_speed_preset == IVE_FASTEST)
+ {
+ ps_codec->s_cfg.u4_enable_fast_sad = ps_cfg->u4_enable_fast_sad;
+ }
+ ps_codec->s_cfg.u4_srch_rng_x = ps_cfg->u4_srch_rng_x;
+ ps_codec->s_cfg.u4_srch_rng_y = ps_cfg->u4_srch_rng_y;
+
+ if (ps_codec->s_cfg.u4_enable_alt_ref != ps_cfg->u4_enable_alt_ref)
+ {
+ ps_codec->s_cfg.u4_enable_alt_ref = ps_cfg->u4_enable_alt_ref;
+ ps_codec->u4_is_curr_frm_ref = 1;
+ }
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_IPE_PARAMS)
+ {
+ ps_curr_cfg->u4_enc_speed_preset = ps_cfg->u4_enc_speed_preset;
+
+ if (ps_curr_cfg->u4_enc_speed_preset == IVE_SLOWEST)
+ {/* high quality */
+ /* enable diamond search */
+ ps_curr_cfg->u4_me_speed_preset = DMND_SRCH;
+ ps_curr_cfg->u4_enable_fast_sad = 0;
+
+ /* disable intra 4x4 */
+ ps_curr_cfg->u4_enable_intra_4x4 = 1;
+ ps_codec->luma_energy_compaction[1] =
+ ih264e_code_luma_intra_macroblock_4x4_rdopt_on;
+
+ /* sub pel off */
+ ps_curr_cfg->u4_enable_hpel = 1;
+
+ /* deblocking off */
+ ps_curr_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
+
+ /* disabled intra inter gating in Inter slices */
+ ps_codec->u4_inter_gate = 0;
+ }
+ else if (ps_curr_cfg->u4_enc_speed_preset == IVE_NORMAL)
+ {/* normal */
+ /* enable diamond search */
+ ps_curr_cfg->u4_me_speed_preset = DMND_SRCH;
+ ps_curr_cfg->u4_enable_fast_sad = 0;
+
+ /* disable intra 4x4 */
+ ps_curr_cfg->u4_enable_intra_4x4 = 1;
+
+ /* sub pel off */
+ ps_curr_cfg->u4_enable_hpel = 1;
+
+ /* deblocking off */
+ ps_curr_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
+
+ /* disabled intra inter gating in Inter slices */
+ ps_codec->u4_inter_gate = 0;
+ }
+ else if (ps_curr_cfg->u4_enc_speed_preset == IVE_FAST)
+ {/* normal */
+ /* enable diamond search */
+ ps_curr_cfg->u4_me_speed_preset = DMND_SRCH;
+ ps_curr_cfg->u4_enable_fast_sad = 0;
+
+ /* disable intra 4x4 */
+ ps_curr_cfg->u4_enable_intra_4x4 = 0;
+
+ /* sub pel off */
+ ps_curr_cfg->u4_enable_hpel = 1;
+
+ /* deblocking off */
+ ps_curr_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
+
+ /* disabled intra inter gating in Inter slices */
+ ps_codec->u4_inter_gate = 1;
+ }
+ else if (ps_curr_cfg->u4_enc_speed_preset == IVE_HIGH_SPEED)
+ {/* fast */
+ /* enable diamond search */
+ ps_curr_cfg->u4_me_speed_preset = DMND_SRCH;
+ ps_curr_cfg->u4_enable_fast_sad = 0;
+
+ /* disable intra 4x4 */
+ ps_curr_cfg->u4_enable_intra_4x4 = 0;
+
+ /* sub pel off */
+ ps_curr_cfg->u4_enable_hpel = 0;
+
+ /* deblocking off */
+ ps_curr_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_4;
+
+ /* disabled intra inter gating in Inter slices */
+ ps_codec->u4_inter_gate = 0;
+ }
+ else if (ps_curr_cfg->u4_enc_speed_preset == IVE_FASTEST)
+ {/* fastest */
+ /* enable diamond search */
+ ps_curr_cfg->u4_me_speed_preset = DMND_SRCH;
+ //u4_num_layers = 4;
+
+ /* disable intra 4x4 */
+ ps_curr_cfg->u4_enable_intra_4x4 = 0;
+
+ /* sub pel off */
+ ps_curr_cfg->u4_enable_hpel = 0;
+
+ /* deblocking off */
+ ps_curr_cfg->u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_4;
+
+ /* disabled intra inter gating in Inter slices */
+ ps_codec->u4_inter_gate = 1;
+ }
+ else if (ps_curr_cfg->u4_enc_speed_preset == IVE_CONFIG)
+ {
+ ps_curr_cfg->u4_enable_intra_4x4 = ps_cfg->u4_enable_intra_4x4;
+ }
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_GOP_PARAMS)
+ {
+ if (ps_curr_cfg->u4_i_frm_interval != ps_cfg->u4_i_frm_interval)
+ {
+ ps_curr_cfg->u4_i_frm_interval = ps_cfg->u4_i_frm_interval;
+
+ /* reset air counter */
+ ps_codec->i4_air_pic_cnt = -1;
+
+ /* re-init air map */
+ ih264e_init_air_map(ps_codec);
+
+ /*Effect intra frame interval change*/
+
+ irc_change_intra_frm_int_call(
+ ps_codec->s_rate_control.pps_rate_control_api,
+ ps_curr_cfg->u4_i_frm_interval);
+ }
+
+ ps_curr_cfg->u4_idr_frm_interval = ps_cfg->u4_idr_frm_interval;
+
+ ps_curr_cfg->u4_num_b_frames = ps_cfg->u4_num_b_frames;
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_DEBLOCK_PARAMS)
+ {
+ if (ps_curr_cfg->u4_enc_speed_preset == IVE_CONFIG)
+ {
+ ps_curr_cfg->u4_disable_deblock_level =
+ ps_cfg->u4_disable_deblock_level;
+ }
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_QP)
+ {
+ UWORD8 au1_init_qp[MAX_PIC_TYPE];
+ UWORD8 au1_min_max_qp[2 * MAX_PIC_TYPE];
+
+ ps_codec->s_cfg.u4_i_qp_max = ps_cfg->u4_i_qp_max;
+ ps_codec->s_cfg.u4_i_qp_min = ps_cfg->u4_i_qp_min;
+ ps_codec->s_cfg.u4_i_qp = ps_cfg->u4_i_qp;
+
+ ps_codec->s_cfg.u4_p_qp_max = ps_cfg->u4_p_qp_max;
+ ps_codec->s_cfg.u4_p_qp_min = ps_cfg->u4_p_qp_min;
+ ps_codec->s_cfg.u4_p_qp = ps_cfg->u4_p_qp;
+
+ ps_codec->s_cfg.u4_b_qp_max = ps_cfg->u4_b_qp_max;
+ ps_codec->s_cfg.u4_b_qp_min = ps_cfg->u4_b_qp_min;
+ ps_codec->s_cfg.u4_b_qp = ps_cfg->u4_b_qp;
+
+ /* update rc lib with modified qp */
+ au1_init_qp[0] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp];
+ au1_init_qp[1] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp];
+ au1_init_qp[2] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp];
+
+ irc_change_init_qp(ps_codec->s_rate_control.pps_rate_control_api,
+ au1_init_qp);
+
+ au1_min_max_qp[2 * I_PIC] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_min];
+ au1_min_max_qp[2 * I_PIC + 1] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_max];
+
+ au1_min_max_qp[2 * P_PIC] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_min];
+ au1_min_max_qp[2 * P_PIC + 1] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_max];
+
+ au1_min_max_qp[2 * B_PIC] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_min];
+ au1_min_max_qp[2 * B_PIC + 1] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_max];
+
+ irc_change_min_max_qp(ps_codec->s_rate_control.pps_rate_control_api,
+ au1_min_max_qp);
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_ENC_MODE)
+ {
+ ps_codec->s_cfg.e_enc_mode = ps_cfg->e_enc_mode;
+
+ if (ps_codec->s_cfg.e_enc_mode == IVE_ENC_MODE_HEADER)
+ {
+ ps_codec->i4_header_mode = 1;
+ ps_codec->s_cfg.e_enc_mode = IVE_ENC_MODE_PICTURE;
+ }
+ else
+ {
+ ps_codec->i4_header_mode = 0;
+ }
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_VBV_PARAMS
+ && IVE_RC_NONE != ps_codec->s_cfg.e_rc_mode)
+ {
+ ps_codec->s_cfg.u4_vbv_buf_size = ps_cfg->u4_vbv_buf_size;
+ ps_codec->s_cfg.u4_vbv_buffer_delay = ps_cfg->u4_vbv_buffer_delay;
+
+ // irc_change_buffer_delay(ps_codec->s_rate_control.pps_rate_control_api, ps_codec->s_cfg.u4_vbv_buffer_delay);
+
+ // TODO: remove this when the support for changing buffer dynamically
+ // is yet to be added.
+ u4_init_rc = 1;
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_AIR_PARAMS)
+ {
+ if (ps_curr_cfg->e_air_mode != ps_cfg->e_air_mode
+ || ps_curr_cfg->u4_air_refresh_period
+ != ps_cfg->u4_air_refresh_period)
+ {
+ ps_curr_cfg->e_air_mode = ps_cfg->e_air_mode;
+ ps_curr_cfg->u4_air_refresh_period = ps_cfg->u4_air_refresh_period;
+
+ ih264e_init_air_map(ps_codec);
+
+ /* reset air counter */
+ ps_codec->i4_air_pic_cnt = -1;
+ }
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_PROFILE_PARAMS)
+ {
+ ps_codec->s_cfg.e_profile = ps_cfg->e_profile;
+ }
+ else if (ps_cfg->e_cmd == IVE_CMD_CTL_SET_NUM_CORES)
+ {
+ ps_codec->s_cfg.u4_num_cores = ps_cfg->u4_num_cores;
+ }
+
+ /* reset RC model */
+ if (u4_init_rc)
+ {
+ /* init qp */
+ UWORD8 au1_init_qp[MAX_PIC_TYPE];
+
+ /* min max qp */
+ UWORD8 au1_min_max_qp[2 * MAX_PIC_TYPE];
+
+ /* init i,p,b qp */
+ au1_init_qp[0] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp];
+ au1_init_qp[1] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp];
+ au1_init_qp[2] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp];
+
+ /* init min max qp */
+ au1_min_max_qp[2 * I_PIC] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_min];
+ au1_min_max_qp[2 * I_PIC + 1] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_max];
+
+ au1_min_max_qp[2 * P_PIC] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_min];
+ au1_min_max_qp[2 * P_PIC + 1] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_max];
+
+ au1_min_max_qp[2 * B_PIC] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_min];
+ au1_min_max_qp[2 * B_PIC + 1] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_max];
+
+ /* get rc mode */
+ switch (ps_codec->s_cfg.e_rc_mode)
+ {
+ case IVE_RC_STORAGE:
+ ps_codec->s_rate_control.e_rc_type = VBR_STORAGE;
+ break;
+
+ case IVE_RC_CBR_NON_LOW_DELAY:
+ ps_codec->s_rate_control.e_rc_type = CBR_NLDRC;
+ break;
+
+ case IVE_RC_CBR_LOW_DELAY:
+ ps_codec->s_rate_control.e_rc_type = CBR_LDRC;
+ break;
+
+ case IVE_RC_NONE:
+ ps_codec->s_rate_control.e_rc_type = CONST_QP;
+ break;
+
+ default:
+ break;
+ }
+
+ /* init rate control */
+ ih264e_rc_init(ps_codec->s_rate_control.pps_rate_control_api,
+ ps_codec->s_rate_control.pps_frame_time,
+ ps_codec->s_rate_control.pps_time_stamp,
+ ps_codec->s_rate_control.pps_pd_frm_rate,
+ ps_codec->s_cfg.u4_max_framerate,
+ ps_codec->s_cfg.u4_src_frame_rate,
+ ps_codec->s_cfg.u4_tgt_frame_rate,
+ ps_codec->s_rate_control.e_rc_type,
+ ps_codec->s_cfg.u4_target_bitrate,
+ ps_codec->s_cfg.u4_max_bitrate,
+ ps_codec->s_cfg.u4_vbv_buffer_delay,
+ ps_codec->s_cfg.u4_i_frm_interval, au1_init_qp,
+ H264_ALLOC_INTER_FRM_INTV, au1_min_max_qp,
+ ps_codec->s_cfg.u4_max_level);
+ }
+
+ return err;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets default encoder config parameters
+*
+* @par Description:
+* Sets default dynamic parameters. Will be called in ih264e_init() to ensure
+* that even if set_params is not called, codec continues to work
+*
+* @param[in] ps_cfg
+* Pointer to encoder config params
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_set_default_params(cfg_params_t *ps_cfg)
+{
+ WORD32 ret = IV_SUCCESS;
+
+ ps_cfg->u4_max_wd = MAX_WD;
+ ps_cfg->u4_max_ht = MAX_HT;
+ ps_cfg->u4_max_ref_cnt = MAX_REF_CNT;
+ ps_cfg->u4_max_reorder_cnt = MAX_REF_CNT;
+ ps_cfg->u4_max_level = DEFAULT_MAX_LEVEL;
+ ps_cfg->e_inp_color_fmt = IV_YUV_420SP_UV;
+ ps_cfg->u4_enable_recon = DEFAULT_RECON_ENABLE;
+ ps_cfg->e_recon_color_fmt = IV_YUV_420P;
+ ps_cfg->u4_enc_speed_preset = IVE_FASTEST;
+ ps_cfg->e_rc_mode = DEFAULT_RC;
+ ps_cfg->u4_max_framerate = DEFAULT_MAX_FRAMERATE;
+ ps_cfg->u4_max_bitrate = DEFAULT_MAX_BITRATE;
+ ps_cfg->u4_max_num_bframes = 0;
+ ps_cfg->e_content_type = IV_PROGRESSIVE;
+ ps_cfg->u4_max_srch_rng_x = DEFAULT_MAX_SRCH_RANGE_X;
+ ps_cfg->u4_max_srch_rng_y = DEFAULT_MAX_SRCH_RANGE_Y;
+ ps_cfg->e_slice_mode = IVE_SLICE_MODE_NONE;
+ ps_cfg->u4_slice_param = DEFAULT_SLICE_PARAM;
+ ps_cfg->e_arch = ih264e_default_arch();
+ ps_cfg->e_soc = SOC_GENERIC;
+ ps_cfg->u4_disp_wd = MAX_WD;
+ ps_cfg->u4_disp_ht = MAX_HT;
+ ps_cfg->u4_wd = MAX_WD;
+ ps_cfg->u4_ht = MAX_HT;
+ ps_cfg->u4_strd = ALIGN16(MAX_WD);
+ ps_cfg->u4_src_frame_rate = DEFAULT_SRC_FRAME_RATE;
+ ps_cfg->u4_tgt_frame_rate = DEFAULT_TGT_FRAME_RATE;
+ ps_cfg->u4_target_bitrate = DEFAULT_BITRATE;
+ ps_cfg->e_frame_type = IV_NA_FRAME;
+ ps_cfg->e_enc_mode = IVE_ENC_MODE_DEFAULT;
+ ps_cfg->u4_i_qp = DEFAULT_I_QP;
+ ps_cfg->u4_p_qp = DEFAULT_P_QP;
+ ps_cfg->u4_b_qp = DEFAULT_B_QP;
+ ps_cfg->u4_i_qp_min = DEFAULT_QP_MIN;
+ ps_cfg->u4_i_qp_max = DEFAULT_QP_MAX;
+ ps_cfg->u4_p_qp_min = DEFAULT_QP_MIN;
+ ps_cfg->u4_p_qp_max = DEFAULT_QP_MAX;
+ ps_cfg->u4_b_qp_min = DEFAULT_QP_MIN;
+ ps_cfg->u4_b_qp_max = DEFAULT_QP_MAX;
+ ps_cfg->e_air_mode = DEFAULT_AIR_MODE;
+ ps_cfg->u4_air_refresh_period = DEFAULT_AIR_REFRESH_PERIOD;
+ ps_cfg->u4_vbv_buffer_delay = DEFAULT_VBV_DELAY;
+ ps_cfg->u4_vbv_buf_size = DEFAULT_VBV_SIZE;
+ ps_cfg->u4_num_cores = DEFAULT_NUM_CORES;
+ ps_cfg->u4_me_speed_preset = DEFAULT_ME_SPEED_PRESET;
+ ps_cfg->u4_enable_hpel = DEFAULT_HPEL;
+ ps_cfg->u4_enable_qpel = DEFAULT_QPEL;
+ ps_cfg->u4_enable_intra_4x4 = DEFAULT_I4;
+ ps_cfg->u4_enable_intra_8x8 = DEFAULT_I8;
+ ps_cfg->u4_enable_intra_16x16 = DEFAULT_I16;
+ ps_cfg->u4_enable_fast_sad = DEFAULT_ENABLE_FAST_SAD;
+ ps_cfg->u4_enable_satqd = DEFAULT_ENABLE_SATQD;
+ ps_cfg->i4_min_sad =
+ (ps_cfg->u4_enable_satqd == DEFAULT_ENABLE_SATQD) ?
+ DEFAULT_MIN_SAD_ENABLE :
+ DEFAULT_MIN_SAD_DISABLE;
+ ps_cfg->u4_srch_rng_x = DEFAULT_SRCH_RNG_X;
+ ps_cfg->u4_srch_rng_y = DEFAULT_SRCH_RNG_Y;
+ ps_cfg->u4_i_frm_interval = DEFAULT_I_INTERVAL;
+ ps_cfg->u4_idr_frm_interval = DEFAULT_IDR_INTERVAL;
+ ps_cfg->u4_num_b_frames = DEFAULT_B_FRAMES;
+ ps_cfg->u4_disable_deblock_level = DEFAULT_DISABLE_DEBLK_LEVEL;
+ ps_cfg->e_profile = DEFAULT_PROFILE;
+ ps_cfg->u4_timestamp_low = 0;
+ ps_cfg->u4_timestamp_high = 0;
+ ps_cfg->u4_is_valid = 1;
+ ps_cfg->e_cmd = IVE_CMD_CT_NA;
+ ps_cfg->i4_wd_mbs = ps_cfg->u4_max_wd >> 4;
+ ps_cfg->i4_ht_mbs = ps_cfg->u4_max_ht >> 4;
+ ps_cfg->u4_entropy_coding_mode = CAVLC;
+ ps_cfg->u4_weighted_prediction = 0;
+ ps_cfg->u4_constrained_intra_pred = 0;
+ ps_cfg->u4_pic_info_type = 0;
+ ps_cfg->u4_mb_info_type = 0;
+
+ return ret;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Initialize encoder context. This will be called by init_mem_rec and during
+* codec reset
+*
+* @par Description:
+* Initializes the context
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_init(codec_t *ps_codec)
+{
+ /* enc config param set */
+ cfg_params_t *ps_cfg = &(ps_codec->s_cfg);
+
+ /* temp var */
+ WORD32 i;
+
+ /* coded pic count */
+ ps_codec->i4_coded_pic_cnt = 0;
+
+ /* Number of API calls to encode are made */
+ ps_codec->i4_encode_api_call_cnt = -1;
+
+ /* Indicates no header has been generated yet */
+ ps_codec->u4_header_generated = 0;
+
+ /* Number of pictures encoded */
+ ps_codec->i4_pic_cnt = -1;
+
+ /* Number of threads created */
+ ps_codec->i4_proc_thread_cnt = 0;
+
+ /* ctl mutex init */
+ ithread_mutex_init(ps_codec->pv_ctl_mutex);
+
+ /* Set encoder chroma format */
+ ps_codec->e_codec_color_format =
+ (ps_cfg->e_inp_color_fmt == IV_YUV_420SP_VU) ?
+ IV_YUV_420SP_VU : IV_YUV_420SP_UV;
+
+ /* Number of continuous frames where deblocking was disabled */
+ ps_codec->i4_disable_deblk_pic_cnt = 0;
+
+ /* frame num */
+ ps_codec->i4_frame_num = -1;
+
+ /* set the current frame type to I frame, since we are going to start encoding*/
+ ps_codec->force_curr_frame_type = IV_NA_FRAME;
+
+ /* idr_pic_id */
+ ps_codec->i4_idr_pic_id = -1;
+
+ /* Flush mode */
+ ps_codec->i4_flush_mode = 0;
+
+ /* Encode header mode */
+ ps_codec->i4_header_mode = 0;
+
+ /* Encode generate header */
+ ps_codec->i4_gen_header = 0;
+
+ /* To signal successful completion of init */
+ ps_codec->i4_init_done = 1;
+
+ /* To signal that at least one picture was decoded */
+ ps_codec->i4_first_pic_done = 0;
+
+ /* Reset Codec */
+ ps_codec->i4_reset_flag = 0;
+
+ /* Current error code */
+ ps_codec->i4_error_code = IH264E_SUCCESS;
+
+ /* threshold residue */
+ ps_codec->u4_thres_resi = 1;
+
+ /* inter gating enable */
+ ps_codec->u4_inter_gate = 0;
+
+ /* entropy mutex init */
+ ithread_mutex_init(ps_codec->pv_entropy_mutex);
+
+ /* sps id */
+ ps_codec->i4_sps_id = 0;
+
+ /* sps id */
+ ps_codec->i4_pps_id = 0;
+
+ /* Process thread created status */
+ memset(ps_codec->ai4_process_thread_created, 0, MAX_PROCESS_THREADS);
+
+ /* Number of MBs processed together */
+ ps_codec->i4_proc_nmb = 8;
+
+ /* Previous POC msb */
+ ps_codec->i4_prev_poc_msb = 0;
+
+ /* Previous POC lsb */
+ ps_codec->i4_prev_poc_lsb = -1;
+
+ /* max Previous POC lsb */
+ ps_codec->i4_max_prev_poc_lsb = -1;
+
+ /* sps, pps status */
+ {
+ sps_t *ps_sps = ps_codec->ps_sps_base;
+ pps_t *ps_pps = ps_codec->ps_pps_base;
+
+ for (i = 0; i < MAX_SPS_CNT; i++)
+ {
+ ps_sps->i1_sps_valid = 0;
+ ps_sps++;
+ }
+
+ for (i = 0; i < MAX_PPS_CNT; i++)
+ {
+ ps_pps->i1_pps_valid = 0;
+ ps_pps++;
+ }
+ }
+
+ {
+ WORD32 max_mb_rows = ps_cfg->i4_ht_mbs;
+
+ WORD32 num_jobs = max_mb_rows * 2;
+ WORD32 clz;
+
+ /* Use next power of two number of entries*/
+ clz = CLZ(num_jobs);
+ num_jobs = 1 << (32 - clz);
+
+ /* init process jobq */
+ ps_codec->pv_proc_jobq = ih264_list_init(
+ ps_codec->pv_proc_jobq_buf,
+ ps_codec->i4_proc_jobq_buf_size, num_jobs,
+ sizeof(job_t), 10);
+ RETURN_IF((ps_codec->pv_proc_jobq == NULL), IV_FAIL);
+ ih264_list_reset(ps_codec->pv_proc_jobq);
+
+ /* init entropy jobq */
+ ps_codec->pv_entropy_jobq = ih264_list_init(
+ ps_codec->pv_entropy_jobq_buf,
+ ps_codec->i4_entropy_jobq_buf_size, num_jobs,
+ sizeof(job_t), 10);
+ RETURN_IF((ps_codec->pv_entropy_jobq == NULL), IV_FAIL);
+ ih264_list_reset(ps_codec->pv_entropy_jobq);
+ }
+
+ /* Update the jobq context to all the threads */
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ ps_codec->as_process[i].pv_proc_jobq = ps_codec->pv_proc_jobq;
+ ps_codec->as_process[i].pv_entropy_jobq = ps_codec->pv_entropy_jobq;
+
+ /* i4_id always stays between 0 and MAX_PROCESS_THREADS */
+ ps_codec->as_process[i].i4_id =
+ (i >= MAX_PROCESS_THREADS) ?
+ (i - MAX_PROCESS_THREADS) : i;
+ ps_codec->as_process[i].ps_codec = ps_codec;
+
+ ps_codec->as_process[i].s_entropy.pv_proc_jobq = ps_codec->pv_proc_jobq;
+ ps_codec->as_process[i].s_entropy.pv_entropy_jobq =
+ ps_codec->pv_entropy_jobq;
+ ps_codec->as_process[i].s_entropy.i4_abs_pic_order_cnt = -1;
+ }
+
+ /* Initialize MV Bank buffer manager */
+ ps_codec->pv_mv_buf_mgr = ih264_buf_mgr_init(ps_codec->pv_mv_buf_mgr_base);
+
+ /* Initialize Picture buffer manager for reference buffers*/
+ ps_codec->pv_ref_buf_mgr = ih264_buf_mgr_init(
+ ps_codec->pv_ref_buf_mgr_base);
+
+ /* Initialize Picture buffer manager for input buffers*/
+ ps_codec->pv_inp_buf_mgr = ih264_buf_mgr_init(
+ ps_codec->pv_inp_buf_mgr_base);
+
+ /* Initialize buffer manager for output buffers*/
+ ps_codec->pv_out_buf_mgr = ih264_buf_mgr_init(
+ ps_codec->pv_out_buf_mgr_base);
+
+ /* buffer cnt in buffer manager */
+ ps_codec->i4_inp_buf_cnt = 0;
+ ps_codec->i4_out_buf_cnt = 0;
+ ps_codec->i4_ref_buf_cnt = 0;
+
+ ps_codec->ps_pic_buf = (pic_buf_t *) ps_codec->pv_pic_buf_base;
+ memset(ps_codec->ps_pic_buf, 0, BUF_MGR_MAX_CNT * sizeof(pic_buf_t));
+
+ /* Initialize dpb manager */
+ ih264_dpb_mgr_init((dpb_mgr_t*) ps_codec->pv_dpb_mgr);
+
+ memset(ps_codec->as_ref_set, 0,
+ sizeof(ref_set_t) * (MAX_DPB_SIZE + MAX_CTXT_SETS));
+ for (i = 0; i < (MAX_DPB_SIZE + MAX_CTXT_SETS); i++)
+ {
+ ps_codec->as_ref_set[i].i4_pic_cnt = -1;
+ }
+
+ /* fn ptr init */
+ ih264e_init_function_ptr(ps_codec);
+
+ /* reset status flags */
+ for (i = 0; i < MAX_CTXT_SETS; i++)
+ {
+ ps_codec->au4_entropy_thread_active[i] = 0;
+ ps_codec->ai4_pic_cnt[i] = -1;
+
+ ps_codec->s_rate_control.pre_encode_skip[i] = 0;
+ ps_codec->s_rate_control.post_encode_skip[i] = 0;
+ }
+
+ ps_codec->s_rate_control.num_intra_in_prev_frame = 0;
+ ps_codec->s_rate_control.i4_avg_activity = 0;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Gets number of memory records required by the codec
+*
+* @par Description:
+* Gets codec memory requirements
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_get_num_rec(void *pv_api_ip, void *pv_api_op)
+{
+ UNUSED(pv_api_ip);
+ /* api call I/O structures */
+ ih264e_num_mem_rec_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_num_mem_rec = MEM_REC_CNT;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Fills memory records of the codec
+*
+* @par Description:
+* Fills codec memory requirements
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op)
+{
+ /* api call I/O structures */
+ ih264e_fill_mem_rec_ip_t *ps_ip = pv_api_ip;
+ ih264e_fill_mem_rec_op_t *ps_op = pv_api_op;
+
+ /* profile / level info */
+ WORD32 level;
+ WORD32 num_reorder_frames;
+ WORD32 num_ref_frames;
+
+ /* mem records */
+ WORD32 no_of_mem_rec;
+ iv_mem_rec_t *ps_mem_rec_base, *ps_mem_rec;
+
+ /* frame dimensions */
+ WORD32 max_wd_luma, max_ht_luma;
+ WORD32 max_mb_rows, max_mb_cols, max_mb_cnt;
+
+ /* temp var */
+ WORD32 i;
+
+ /* error status */
+ IV_STATUS_T status = IV_SUCCESS;
+
+ /* profile / level info */
+ level = ps_ip->s_ive_ip.u4_max_level;
+ num_reorder_frames = ps_ip->s_ive_ip.u4_max_reorder_cnt;
+ num_ref_frames = ps_ip->s_ive_ip.u4_max_ref_cnt;
+
+ /* mem records */
+ ps_mem_rec_base = ps_ip->s_ive_ip.ps_mem_rec;
+ no_of_mem_rec = ps_ip->s_ive_ip.u4_num_mem_rec;
+
+ /* frame dimensions */
+ max_ht_luma = ps_ip->s_ive_ip.u4_max_ht;
+ max_wd_luma = ps_ip->s_ive_ip.u4_max_wd;
+ max_ht_luma = ALIGN16(max_ht_luma);
+ max_wd_luma = ALIGN16(max_wd_luma);
+ max_mb_rows = max_ht_luma / MB_SIZE;
+ max_mb_cols = max_wd_luma / MB_SIZE;
+ max_mb_cnt = max_mb_rows * max_mb_cols;
+
+ /* validate params */
+ if ((level < MIN_LEVEL) || (level > MAX_LEVEL))
+ {
+ ps_op->s_ive_op.u4_error_code |= IH264E_CODEC_LEVEL_NOT_SUPPORTED;
+ level = MAX_LEVEL;
+ }
+
+ if (num_ref_frames > MAX_REF_CNT)
+ {
+ ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REF_UNSUPPORTED;
+ num_ref_frames = MAX_REF_CNT;
+ }
+
+ if (num_reorder_frames > MAX_REF_CNT)
+ {
+ ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REORDER_UNSUPPORTED;
+ num_reorder_frames = MAX_REF_CNT;
+ }
+
+ /* Set all memory records as persistent and alignment as 128 by default */
+ ps_mem_rec = ps_mem_rec_base;
+ for (i = 0; i < no_of_mem_rec; i++)
+ {
+ ps_mem_rec->u4_mem_alignment = 128;
+ ps_mem_rec->e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+ ps_mem_rec++;
+ }
+
+ /************************************************************************
+ * Request memory for h264 encoder handle *
+ ***********************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_IV_OBJ];
+ {
+ ps_mem_rec->u4_mem_size = sizeof(iv_obj_t);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_IV_OBJ, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory for h264 encoder context *
+ ***********************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_CODEC];
+ {
+ ps_mem_rec->u4_mem_size = sizeof(codec_t);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_CODEC, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory for entropy context *
+ * In multi core encoding, each row is assumed to be launched on a *
+ * thread. The rows below can only start after its neighbors are coded *
+ * The status of an mb coded/uncoded is signaled via entropy map. *
+ * 1. One word32 to store skip run cnt *
+ * 2. mb entropy map (mb status entropy coded/uncoded). The size*
+ * of the entropy map is max mb cols. Further allocate one *
+ * more additional row to evade checking for row -1. *
+ * 3. size of bit stream buffer to store bit stream ctxt. *
+ * 4. Entropy coding is dependent on nnz coefficient count for *
+ * the neighbor blocks. It is sufficient to maintain one row *
+ * worth of nnz as entropy for lower row waits on entropy map*
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY];
+ {
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size of skip mb run */
+ total_size += sizeof(WORD32);
+ total_size = ALIGN8(total_size);
+
+ /* size in bytes to store entropy status of an entire frame */
+ total_size += (max_mb_cols * max_mb_rows);
+ /* add an additional 1 row of bytes to evade the special case of row 0 */
+ total_size += max_mb_cols;
+ total_size = ALIGN128(total_size);
+
+ /* size of bit stream buffer */
+ total_size += sizeof(bitstrm_t);
+ total_size = ALIGN128(total_size);
+
+ /* top nnz luma */
+ total_size += (max_mb_cols * 4 * sizeof(UWORD8));
+ total_size = ALIGN128(total_size);
+
+ /* top nnz cbcr */
+ total_size += (max_mb_cols * 4 * sizeof(UWORD8));
+ total_size = ALIGN128(total_size);
+
+ /* total size per each proc ctxt */
+ total_size *= MAX_CTXT_SETS;
+
+ ps_mem_rec->u4_mem_size = total_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_ENTROPY, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * The residue coefficients that needs to be entropy coded are packed *
+ * at a buffer space by the proc threads. The entropy thread shall *
+ * read from the buffer space, unpack them and encode the same. The *
+ * buffer space required to pack a row of mbs are as follows. *
+ * Assuming transform_8x8_flag is disabled, *
+ * In the worst case, 1 mb contains 1 dc 4x4 luma sub block, followed *
+ * by 16 ac 4x4 luma sub blocks, 2 dc chroma 2x2 sub blocks, followed *
+ * by 8 ac 4x4 chroma sub blocks. *
+ * For the sake of simplicity we assume that all sub blocks are of *
+ * type 4x4. The packing of each 4x4 is depicted by the structure *
+ * tu_sblk_coeff_data_t *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_COEFF_DATA];
+ {
+ /* temp var */
+ WORD32 size = 0;
+
+ /* size of coeff data of 1 mb */
+ size += sizeof(tu_sblk_coeff_data_t) * MAX_4x4_SUBBLKS;
+
+ /* size of coeff data of 1 row of mb's */
+ size *= max_mb_cols;
+
+ /* align to avoid any false sharing across threads */
+ size = ALIGN64(size);
+
+ /* size for one full frame */
+ size *= max_mb_rows;
+
+ /* size of each proc buffer set (ping, pong) */
+ size *= MAX_CTXT_SETS;
+
+ ps_mem_rec->u4_mem_size = size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MB_COEFF_DATA, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * while encoding an mb, the mb header data is signaled to the entropy*
+ * thread by writing to a buffer space. the size of header data per mb *
+ * is assumed to be 40 bytes *
+ * TODO: revisit this inference *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_HEADER_DATA];
+ {
+ /* temp var */
+ WORD32 size;
+
+ /* size per MB */
+ size = 40;
+
+ /* size for 1 row of mbs */
+ size = size * max_mb_cols;
+
+ /* align to avoid any false sharing across threads */
+ size = ALIGN64(size);
+
+ /* size for one full frame */
+ size *= max_mb_rows;
+
+ /* size of each proc buffer set (ping, pong) */
+ size *= MAX_CTXT_SETS;
+
+ ps_mem_rec->u4_mem_size = size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MB_HEADER_DATA, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Size for holding mv_buf_t for each MV Bank. *
+ * Note this allocation is done for BUF_MGR_MAX_CNT instead of *
+ * MAX_DPB_SIZE or max_dpb_size for following reasons *
+ * max_dpb_size will be based on max_wd and max_ht *
+ * For higher max_wd and max_ht this number will be smaller than *
+ * MAX_DPB_SIZE But during actual initialization number of buffers *
+ * allocated can be more. *
+ * *
+ * One extra MV Bank is needed to hold current pics MV bank. *
+ * Since this is only a structure allocation and not actual buffer *
+ * allocation, it is allocated for BUF_MGR_MAX_CNT entries *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_MVBANK];
+ {
+ /* max luma samples */
+ WORD32 max_luma_samples = 0;
+
+ /* determine max luma samples */
+ for (i = 0; i < 16; i++)
+ if (level ==(WORD32)gas_ih264_lvl_tbl[i].u4_level_idc)
+ max_luma_samples = gas_ih264_lvl_tbl[i].u4_max_fs
+ << (BLK_SIZE + BLK_SIZE);
+
+ ps_mem_rec->u4_mem_size = ih264_buf_mgr_size();
+
+ /************************************************************************
+ * Allocate for pu_map, enc_pu_t and pic_pu_idx for each MV bank *
+ * Note: Number of luma samples is not max_wd * max_ht here, instead it *
+ * is set to maximum number of luma samples allowed at the given level. *
+ * This is done to ensure that any stream with width and height lesser *
+ * than max_wd and max_ht is supported. Number of buffers required can *
+ * be greater for lower width and heights at a given level and this *
+ * increased number of buffers might require more memory than what *
+ * max_wd and max_ht buffer would have required Also note one extra *
+ * buffer is allocated to store current pictures MV bank. *
+ ***********************************************************************/
+
+ ps_mem_rec->u4_mem_size += BUF_MGR_MAX_CNT * sizeof(mv_buf_t);
+
+ ps_mem_rec->u4_mem_size += (num_ref_frames + num_reorder_frames
+ + MAX_CTXT_SETS)
+ * ih264e_get_pic_mv_bank_size(max_luma_samples);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MVBANK, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * While encoding inter slices, to compute the cost of encoding an mb *
+ * with the mv's at hand, we employ the expression cost = sad + lambda *
+ * x mv_bits. Here mv_bits is the total number of bits taken to represe*
+ * nt the mv in the stream. The mv bits for all the possible mv are *
+ * stored in the look up table. The mem record for this look up table *
+ * is given below. *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_MVBITS];
+ {
+ /* max srch range x */
+ UWORD32 u4_srch_range_x = ps_ip->s_ive_ip.u4_max_srch_rng_x;
+
+ /* max srch range y */
+ UWORD32 u4_srch_range_y = ps_ip->s_ive_ip.u4_max_srch_rng_y;
+
+ /* max srch range */
+ UWORD32 u4_max_srch_range = MAX(u4_srch_range_x, u4_srch_range_y);
+
+ /* due to subpel */
+ u4_max_srch_range <<= 2;
+
+ /* due to mv on either direction */
+ u4_max_srch_range = (u4_max_srch_range << 1);
+
+ /* due to pred mv + zero */
+ u4_max_srch_range = (u4_max_srch_range << 1) + 1;
+
+ u4_max_srch_range = ALIGN128(u4_max_srch_range);
+
+ ps_mem_rec->u4_mem_size = u4_max_srch_range;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MVBITS, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory for SPS *
+ ***********************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_SPS];
+ {
+ ps_mem_rec->u4_mem_size = MAX_SPS_CNT * sizeof(sps_t);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_SPS, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory for PPS *
+ ***********************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_PPS];
+ {
+ ps_mem_rec->u4_mem_size = MAX_PPS_CNT * sizeof(pps_t);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_PPS, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory for Slice Header *
+ ***********************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_SLICE_HDR];
+ {
+ ps_mem_rec->u4_mem_size = MAX_CTXT_SETS * MAX_SLICE_HDR_CNT
+ * sizeof(slice_header_t);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_SLICE_HDR, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory for Adaptive Intra Refresh *
+ ***********************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_AIR_MAP];
+ {
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* intra coded map */
+ total_size += max_mb_cnt;
+ total_size *= MAX_CTXT_SETS;
+
+ /* mb refresh map */
+ total_size += sizeof(UWORD16) * max_mb_cnt;
+
+ /* alignment */
+ total_size = ALIGN128(total_size);
+
+ ps_mem_rec->u4_mem_size = total_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_AIR_MAP, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * In multi slice encoding, this memory record helps tracking the start*
+ * of slice with reference to mb. *
+ * MEM RECORD for holding *
+ * 1. mb slice map *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_SLICE_MAP];
+ {
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size in bytes to slice index of all mbs of a frame */
+ total_size = ALIGN64(max_mb_cnt);
+
+ /* total size per each proc ctxt */
+ total_size *= MAX_CTXT_SETS;
+ ps_mem_rec->u4_mem_size = total_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_SLICE_MAP, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory to hold thread handles for each processing thread *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_THREAD_HANDLE];
+ {
+ WORD32 handle_size = ithread_get_handle_size();
+
+ ps_mem_rec->u4_mem_size = MAX_PROCESS_THREADS * handle_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_THREAD_HANDLE, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory to hold mutex for control calls *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_CTL_MUTEX];
+ {
+ ps_mem_rec->u4_mem_size = ithread_get_mutex_lock_size();
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_CTL_MUTEX, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory to hold mutex for entropy calls *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY_MUTEX];
+ {
+ ps_mem_rec->u4_mem_size = ithread_get_mutex_lock_size();
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_ENTROPY_MUTEX, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory to hold process jobs *
+ ***********************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_JOBQ];
+ {
+ /* One process job per row of MBs */
+ /* Allocate for two pictures, so that wrap around can be handled easily */
+ WORD32 num_jobs = max_mb_rows * 2;
+
+ WORD32 job_queue_size = ih264_list_size(num_jobs, sizeof(job_t));
+
+ ps_mem_rec->u4_mem_size = job_queue_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_PROC_JOBQ, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory to hold entropy jobs *
+ ***********************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY_JOBQ];
+ {
+ /* One process job per row of MBs */
+ /* Allocate for two pictures, so that wrap around can be handled easily */
+ WORD32 num_jobs = max_mb_rows * 2;
+
+ WORD32 job_queue_size = ih264_list_size(num_jobs, sizeof(job_t));
+
+ ps_mem_rec->u4_mem_size = job_queue_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_ENTROPY_JOBQ, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * In multi core encoding, each row is assumed to be launched on a *
+ * thread. The rows below can only start after its neighbors are coded *
+ * The status of an mb coded/uncoded is signaled via proc map. *
+ * MEM RECORD for holding *
+ * 1. mb proc map (mb status core coded/uncoded) *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_MAP];
+ {
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size in bytes to mb core coding status of an entire frame */
+ total_size = max_mb_cnt;
+
+ /* add an additional 1 row of bytes to evade the special case of row 0 */
+ total_size += max_mb_cols;
+
+ /* total size per each proc ctxt */
+ total_size *= MAX_CTXT_SETS;
+ ps_mem_rec->u4_mem_size = total_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_PROC_MAP, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * mem record for holding a particular MB is deblocked or not *
+ * 1. mb deblk map (mb status deblocked/not deblocked) *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_DBLK_MAP];
+ {
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size in bytes to mb core coding status of an entire frame */
+ total_size = max_mb_cnt;
+
+ /* add an additional 1 row of bytes to evade the special case of row 0 */
+ total_size += max_mb_cols;
+
+ total_size = ALIGN64(total_size);
+
+ /* total size per each proc ctxt */
+ total_size *= MAX_CTXT_SETS;
+ ps_mem_rec->u4_mem_size = total_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_DBLK_MAP, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * mem record for holding a particular MB's me is done or not *
+ * 1. mb me map *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_ME_MAP];
+ {
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size in bytes to mb core coding status of an entire frame */
+ total_size = max_mb_cnt;
+
+ /* add an additional 1 row of bytes to evade the special case of row 0 */
+ total_size += max_mb_cols;
+
+ /* total size per each proc ctxt */
+ total_size *= MAX_CTXT_SETS;
+
+ ps_mem_rec->u4_mem_size = total_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_ME_MAP, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * size for holding dpb manager context *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_DPB_MGR];
+ {
+ ps_mem_rec->u4_mem_size = sizeof(dpb_mgr_t);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_DPB_MGR, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * luma or chroma core coding involves mb estimation, error computation*
+ * between the estimated singnal and the actual signal, transform the *
+ * error, quantize the error, then inverse transform and inverse quant *
+ * ize the residue and add the result back to estimated signal. *
+ * To perform all these, a set of temporary buffers are needed. *
+ * MEM RECORD for holding scratch buffers *
+ * 1. prediction buffer used during mb mode analysis *
+ * 2 temp. reference buffer when intra 4x4 with rdopt on is *
+ * enabled *
+ * - when intra 4x4 is enabled, rdopt is on, to store the *
+ * reconstructed values and use them later this temp. buffer *
+ * is used. *
+ * 3. prediction buffer used during intra mode analysis *
+ * 4. prediction buffer used during intra 16x16 plane mode *
+ * analysis
+ * 5. prediction buffer used during intra chroma mode analysis *
+ * 6. prediction buffer used during intra chroma 16x16 plane *
+ * mode analysis
+ * 7. forward transform output buffer *
+ * - to store the error between estimated and the actual inp *
+ * ut and to store the fwd transformed quantized output *
+ * 8. forward transform output buffer *
+ * - when intra 4x4 is enabled, rdopt is on, to store the *
+ * fwd transform values and use them later this temp. buffer *
+ * is used. *
+ * 9. temporary buffer for inverse transform *
+ * - temporary buffer used in inverse transform and inverse *
+ * quantization *
+ * A. Buffers for holding half_x , half_y and half_xy planes *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_SCRATCH];
+ {
+ WORD32 total_size = 0;
+
+ /* size to hold prediction buffer */
+ total_size += sizeof(UWORD8) * 16 * 16;
+ total_size = ALIGN64(total_size);
+
+ /* size to hold recon for intra 4x4 buffer */
+ total_size += sizeof(UWORD8) * 16 * 16;
+ total_size = ALIGN64(total_size);
+
+ /* prediction buffer intra 16x16 */
+ total_size += sizeof(UWORD8) * 16 * 16;
+ total_size = ALIGN64(total_size);
+
+ /* prediction buffer intra 16x16 plane*/
+ total_size += sizeof(UWORD8) * 16 * 16;
+ total_size = ALIGN64(total_size);
+
+ /* prediction buffer intra chroma*/
+ total_size += sizeof(UWORD8) * 16 * 8;
+ total_size = ALIGN64(total_size);
+
+ /* prediction buffer intra chroma plane*/
+ total_size += sizeof(UWORD8) * 16 * 8;
+ total_size = ALIGN64(total_size);
+
+ /* size to hold fwd transform output */
+ total_size += sizeof(WORD16) * SIZE_TRANS_BUFF;
+ total_size = ALIGN64(total_size);
+
+ /* size to hold fwd transform output */
+ total_size += sizeof(WORD16) * SIZE_TRANS_BUFF;
+ total_size = ALIGN64(total_size);
+
+ /* size to hold temporary data during inverse transform */
+ total_size += sizeof(WORD32) * SIZE_TMP_BUFF_ITRANS;
+ total_size = ALIGN64(total_size);
+
+ /* Buffers for holding half_x , half_y and half_xy planes */
+ total_size += sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT);
+ total_size = ALIGN64(total_size);
+
+ total_size += sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT);
+ total_size = ALIGN64(total_size);
+
+ total_size += sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT);
+ total_size = ALIGN64(total_size);
+
+ /* Allocate for each process thread */
+ total_size *= MAX_PROCESS_CTXT;
+
+ ps_mem_rec->u4_mem_size = total_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_PROC_SCRATCH, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * When transform_8x8_flag is disabled, the size of a sub block is *
+ * 4x4 and when the transform_8x8_flag is enabled the size of the sub *
+ * block is 8x8. The threshold matrix and the forward scaling list *
+ * is of the size of the sub block. *
+ * MEM RECORD for holding *
+ * 1. quantization parameters for plane y, cb, cr *
+ * - threshold matrix for quantization *
+ * - forward weight matrix *
+ * - satqd threshold matrix *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_QUANT_PARAM];
+ {
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* quantization parameter list for planes y,cb and cr */
+ total_size += ALIGN64(sizeof(quant_params_t)) * 3;
+
+ /* size of threshold matrix for quantization
+ * (assuming the transform_8x8_flag is disabled).
+ * for all 3 planes */
+ total_size += ALIGN64(sizeof(WORD16) * 4 * 4) * 3;
+
+ /* size of forward weight matrix for quantization
+ * (assuming the transform_8x8_flag is disabled).
+ * for all 3 planes */
+ total_size += ALIGN64(sizeof(WORD16) * 4 * 4) * 3;
+
+ /* Size for SATDQ threshold matrix for palnes y, cb and cr */
+ total_size += ALIGN64(sizeof(UWORD16) * 9) * 3;
+
+ /* total size per each proc thread */
+ total_size *= MAX_PROCESS_CTXT;
+
+ ps_mem_rec->u4_mem_size = total_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_QUANT_PARAM, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * While computing blocking strength for the current mb, the csbp, mb *
+ * type for the neighboring mbs are necessary. memtab for storing top *
+ * row mbtype and csbp is evaluated here. *
+ * *
+ * when encoding intra 4x4 or intra 8x8 the submb types are estimated *
+ * and sent. The estimation is dependent on neighbor mbs. For this *
+ * store the top row sub mb types for intra mbs *
+ * *
+ * During motion vector prediction, the curr mb mv is predicted from *
+ * neigbors left, top, top right and sometimes top left depending on *
+ * the availability. The top and top right content is accessed from *
+ * the memtab specified below. *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_TOP_ROW_SYN_INFO];
+ {
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size in bytes to store 1 row of mb_info_t */
+ /* one additional mb, to avoid checking end of row condition */
+ total_size += (max_mb_cols + 1) * sizeof(mb_info_t);
+
+ /* size in bytes to store 1 row of intra macroblock sub modes */
+ total_size += max_mb_cols * sizeof(UWORD8) * 16;
+
+ /* size in bytes to store 1 row + 1 of enc_pu_t */
+ /* one additional mb, to avoid checking end of row condition */
+ total_size += (max_mb_cols + 1) * sizeof(enc_pu_t);
+
+ /* total size per proc ctxt */
+ total_size = ALIGN128(total_size);
+
+ /* total size per each proc ctxt */
+ total_size *= MAX_CTXT_SETS;
+ ps_mem_rec->u4_mem_size = total_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_TOP_ROW_SYN_INFO, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * When transform_8x8_flag is disabled, the mb is partitioned into *
+ * 4 sub blocks. This corresponds to 1 vertical left edge and 1 *
+ * vertical inner edge, 1 horizontal top edge and 1 horizontal *
+ * inner edge per mb. Further, When transform_8x8_flag is enabled, *
+ * the mb is partitioned in to 16 sub blocks. This corresponds to *
+ * 1 vertical left edge and 3 vertical inner edges, 1 horizontal top *
+ * edge and 3 horizontal inner edges per mb. *
+ * MEM RECORD for holding *
+ * 1. vertical edge blocking strength *
+ * 2. horizontal edge blocking strength *
+ * 3. mb qp *
+ * all are frame level *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_BS_QP];
+ {
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size in bytes to store vertical edge bs, horizontal edge bs and qp of every mb*/
+ WORD32 vert_bs_size, horz_bs_size, qp_size;
+
+ /* vertical edge bs = total number of vertical edges * number of bytes per each edge */
+ /* total num of v edges = total mb * 4 (assuming transform_8x8_flag = 0),
+ * each edge is formed by 4 pairs of subblks, requiring 4 bytes to storing bs */
+ vert_bs_size = ALIGN64(max_mb_cnt * 4 * 4);
+
+ /* horizontal edge bs = total number of horizontal edges * number of bytes per each edge */
+ /* total num of h edges = total mb * 4 (assuming transform_8x8_flag = 0),
+ * each edge is formed by 4 pairs of subblks, requiring 4 bytes to storing bs */
+ horz_bs_size = ALIGN64(max_mb_cnt * 4 * 4);
+
+ /* qp of each mb requires 1 byte */
+ qp_size = ALIGN64(max_mb_cnt);
+
+ /* total size */
+ total_size = vert_bs_size + horz_bs_size + qp_size;
+
+ /* total size per each proc ctxt */
+ total_size *= MAX_CTXT_SETS;
+
+ ps_mem_rec->u4_mem_size = total_size;
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_BS_QP, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * size for holding dpb manager context *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_INP_PIC];
+ {
+ ps_mem_rec->u4_mem_size = ih264_buf_mgr_size();
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_INP_PIC, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * size for holding dpb manager context *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_OUT];
+ {
+ ps_mem_rec->u4_mem_size = ih264_buf_mgr_size();
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_OUT, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Size for color space conversion *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_CSC];
+ {
+ /* We need a total a memory for a single frame of 420 sp, ie
+ * (wd * ht) for luma and (wd * ht / 2) for chroma*/
+ ps_mem_rec->u4_mem_size = MAX_CTXT_SETS
+ * ((3 * max_ht_luma * max_wd_luma) >> 1);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_CSC, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Size for holding pic_buf_t for each reference picture *
+ * Note this allocation is done for BUF_MGR_MAX_CNT instead of *
+ * MAX_DPB_SIZE or max_dpb_size for following reasons *
+ * max_dpb_size will be based on max_wd and max_ht *
+ * For higher max_wd and max_ht this number will be smaller than *
+ * MAX_DPB_SIZE But during actual initialization number of buffers *
+ * allocated can be more. *
+ * *
+ * Also to handle display depth application can allocate more than *
+ * what codec asks for in case of non-shared mode *
+ * Since this is only a structure allocation and not actual buffer *
+ * allocation, it is allocated for BUF_MGR_MAX_CNT entries *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_REF_PIC];
+ {
+ ps_mem_rec->u4_mem_size = ih264_buf_mgr_size();
+ ps_mem_rec->u4_mem_size += BUF_MGR_MAX_CNT * sizeof(pic_buf_t);
+
+ /************************************************************************
+ * Note: Number of luma samples is not max_wd * max_ht here, instead it *
+ * is set to maximum number of luma samples allowed at the given level. *
+ * This is done to ensure that any stream with width and height lesser *
+ * than max_wd and max_ht is supported. Number of buffers required can *
+ * be greater for lower width and heights at a given level and this *
+ * increased number of buffers might require more memory than what *
+ * max_wd and max_ht buffer would have required. Number of buffers is *
+ * doubled in order to return one frame at a time instead of sending *
+ * multiple outputs during dpb full case. Also note one extra buffer is *
+ * allocted to store current picture. *
+ * *
+ * Half-pel planes for each reference buffer are allocated along with *
+ * the reference buffer. So each reference buffer is 4 times the *
+ * required size. This way buffer management for the half-pel planes is *
+ * easier and while using the half-pel planes in MC, an offset can be *
+ * used from a single pointer *
+ ***********************************************************************/
+ ps_mem_rec->u4_mem_size += HPEL_PLANES_CNT
+ * ih264e_get_total_pic_buf_size(
+ max_wd_luma * max_ht_luma, level,
+ PAD_WD, PAD_HT, num_ref_frames,
+ num_reorder_frames);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_REF_PIC, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * Request memory to hold mem recs to be returned during retrieve call *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_BACKUP];
+ {
+ ps_mem_rec->u4_mem_size = MEM_REC_CNT * sizeof(iv_mem_rec_t);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_BACKUP, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * size for memory required by NMB info structs and buffer for storing *
+ * half pel plane *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_INFO_NMB];
+ {
+ ps_mem_rec->u4_mem_size = MAX_PROCESS_CTXT * MAX_NMB
+ * (sizeof(mb_info_nmb_t)
+ + MB_SIZE * MB_SIZE * sizeof(UWORD8));
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MB_INFO_NMB, ps_mem_rec->u4_mem_size);
+
+ /************************************************************************
+ * RC mem records *
+ ************************************************************************/
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_RC];
+ {
+ ih264e_get_rate_control_mem_tab(NULL, ps_mem_rec, FILL_MEMTAB);
+ }
+ DEBUG("\nMemory record Id %d = %d \n", MEM_REC_RC, ps_mem_rec->u4_mem_size);
+
+ /* Each memtab size is aligned to next multiple of 128 bytes */
+ /* This is to ensure all the memtabs start at different cache lines */
+ ps_mem_rec = ps_mem_rec_base;
+ for (i = 0; i < MEM_REC_CNT; i++)
+ {
+ ps_mem_rec->u4_mem_size = ALIGN128(ps_mem_rec->u4_mem_size);
+ ps_mem_rec++;
+ }
+
+ ps_op->s_ive_op.u4_num_mem_rec = MEM_REC_CNT;
+
+ DEBUG("Num mem recs in fill call : %d\n", ps_op->s_ive_op.u4_num_mem_rec);
+
+ return (status);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Initializes from mem records passed to the codec
+*
+* @par Description:
+* Initializes pointers based on mem records passed
+*
+* @param[in] ps_codec_obj
+* Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+ /* api call I/O structures */
+ ih264e_init_ip_t *ps_ip = pv_api_ip;
+ ih264e_init_op_t *ps_op = pv_api_op;
+
+ /* mem records */
+ iv_mem_rec_t *ps_mem_rec_base, *ps_mem_rec;
+
+ /* codec variables */
+ codec_t * ps_codec;
+ cfg_params_t *ps_cfg;
+
+ /* frame dimensions */
+ WORD32 max_wd_luma, max_ht_luma;
+ WORD32 max_mb_rows, max_mb_cols, max_mb_cnt;
+
+ /* temp var */
+ WORD32 i;
+ WORD32 status = IV_SUCCESS;
+
+ /* frame dimensions */
+ max_ht_luma = ALIGN16(ps_ip->s_ive_ip.u4_max_ht);
+ max_wd_luma = ALIGN16(ps_ip->s_ive_ip.u4_max_wd);
+ max_mb_rows = max_ht_luma / MB_SIZE;
+ max_mb_cols = max_wd_luma / MB_SIZE;
+ max_mb_cnt = max_mb_rows * max_mb_cols;
+
+ /* mem records */
+ ps_mem_rec_base = ps_ip->s_ive_ip.ps_mem_rec;
+
+ /* Init mem records */
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_CODEC];
+ {
+ ps_codec_obj->pv_codec_handle = ps_mem_rec->pv_base;
+ ps_codec = (codec_t *) (ps_codec_obj->pv_codec_handle);
+ }
+
+ /* Note this memset can not be done in init() call, since init will called
+ during reset as well. And calling this during reset will mean all pointers
+ need to reinitialized */
+ memset(ps_codec, 0, sizeof(codec_t));
+
+ /* Set default Config Params */
+ ps_cfg = &ps_codec->s_cfg;
+ ih264e_set_default_params(ps_cfg);
+
+ /* Update config params as per input */
+ ps_cfg->u4_max_wd = ALIGN16(ps_ip->s_ive_ip.u4_max_wd);
+ ps_cfg->u4_max_ht = ALIGN16(ps_ip->s_ive_ip.u4_max_ht);
+ ps_cfg->i4_wd_mbs = ps_cfg->u4_max_wd >> 4;
+ ps_cfg->i4_ht_mbs = ps_cfg->u4_max_ht >> 4;
+ ps_cfg->u4_max_ref_cnt = ps_ip->s_ive_ip.u4_max_ref_cnt;
+ ps_cfg->u4_max_reorder_cnt = ps_ip->s_ive_ip.u4_max_reorder_cnt;
+ ps_cfg->u4_max_level = ps_ip->s_ive_ip.u4_max_level;
+ ps_cfg->e_inp_color_fmt = ps_ip->s_ive_ip.e_inp_color_fmt;
+ ps_cfg->e_recon_color_fmt = ps_ip->s_ive_ip.e_recon_color_fmt;
+ ps_cfg->u4_max_framerate = ps_ip->s_ive_ip.u4_max_framerate;
+ ps_cfg->u4_max_bitrate = ps_ip->s_ive_ip.u4_max_bitrate;
+ ps_cfg->u4_max_num_bframes = ps_ip->s_ive_ip.u4_max_num_bframes;
+ ps_cfg->e_content_type = ps_ip->s_ive_ip.e_content_type;
+ ps_cfg->u4_max_srch_rng_x = ps_ip->s_ive_ip.u4_max_srch_rng_x;
+ ps_cfg->u4_max_srch_rng_y = ps_ip->s_ive_ip.u4_max_srch_rng_y;
+ ps_cfg->e_slice_mode = ps_ip->s_ive_ip.e_slice_mode;
+ ps_cfg->u4_slice_param = ps_ip->s_ive_ip.u4_slice_param;
+ ps_cfg->e_arch = ps_ip->s_ive_ip.e_arch;
+ ps_cfg->e_soc = ps_ip->s_ive_ip.e_soc;
+ ps_cfg->u4_enable_recon = ps_ip->s_ive_ip.u4_enable_recon;
+ ps_cfg->e_rc_mode = ps_ip->s_ive_ip.e_rc_mode;
+
+ /* Validate params */
+ if ((ps_ip->s_ive_ip.u4_max_level < MIN_LEVEL)
+ || (ps_ip->s_ive_ip.u4_max_level > MAX_LEVEL))
+ {
+ ps_op->s_ive_op.u4_error_code |= IH264E_CODEC_LEVEL_NOT_SUPPORTED;
+ ps_cfg->u4_max_level = DEFAULT_MAX_LEVEL;
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_ref_cnt > MAX_REF_CNT)
+ {
+ ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REF_UNSUPPORTED;
+ ps_cfg->u4_max_ref_cnt = MAX_REF_CNT;
+ }
+
+ if (ps_ip->s_ive_ip.u4_max_reorder_cnt > MAX_REF_CNT)
+ {
+ ps_op->s_ive_op.u4_error_code |= IH264E_NUM_REORDER_UNSUPPORTED;
+ ps_cfg->u4_max_reorder_cnt = MAX_REF_CNT;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_BACKUP];
+ {
+ ps_codec->ps_mem_rec_backup = (iv_mem_rec_t *) ps_mem_rec->pv_base;
+
+ memcpy(ps_codec->ps_mem_rec_backup, ps_mem_rec_base,
+ MEM_REC_CNT * sizeof(iv_mem_rec_t));
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY];
+ {
+ /* temp var */
+ WORD32 size = 0, offset;
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ /* base ptr */
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* reset size */
+ size = 0;
+
+ /* skip mb run */
+ ps_codec->as_process[i].s_entropy.pi4_mb_skip_run =
+ (void *) (pu1_buf + size);
+ size += sizeof(WORD32);
+ size = ALIGN8(size);
+
+ /* entropy map */
+ ps_codec->as_process[i].s_entropy.pu1_entropy_map =
+ (void *) (pu1_buf + size + max_mb_cols);
+ /* size in bytes to store entropy status of an entire frame */
+ size += (max_mb_cols * max_mb_rows);
+ /* add an additional 1 row of bytes to evade the special case of row 0 */
+ size += max_mb_cols;
+ size = ALIGN128(size);
+
+ /* bit stream ptr */
+ ps_codec->as_process[i].s_entropy.ps_bitstrm = (void *) (pu1_buf
+ + size);
+ size += sizeof(bitstrm_t);
+ size = ALIGN128(size);
+
+ /* nnz luma */
+ ps_codec->as_process[i].s_entropy.pu1_top_nnz_luma =
+ (void *) (pu1_buf + size);
+ size += (max_mb_cols * 4 * sizeof(UWORD8));
+ size = ALIGN128(size);
+
+ /* nnz chroma */
+ ps_codec->as_process[i].s_entropy.pu1_top_nnz_cbcr =
+ (void *) (pu1_buf + size);
+ size += (max_mb_cols * 4 * sizeof(UWORD8));
+ size = ALIGN128(size);
+ offset = size;
+ }
+ else
+ {
+ /* base ptr */
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* reset size */
+ size = offset;
+
+ /* skip mb run */
+ ps_codec->as_process[i].s_entropy.pi4_mb_skip_run =
+ (void *) (pu1_buf + size);
+ size += sizeof(WORD32);
+ size = ALIGN8(size);
+
+ /* entropy map */
+ ps_codec->as_process[i].s_entropy.pu1_entropy_map =
+ (void *) (pu1_buf + size + max_mb_cols);
+ /* size in bytes to store entropy status of an entire frame */
+ size += (max_mb_cols * max_mb_rows);
+ /* add an additional 1 row of bytes to evade the special case of row 0 */
+ size += max_mb_cols;
+ size = ALIGN128(size);
+
+ /* bit stream ptr */
+ ps_codec->as_process[i].s_entropy.ps_bitstrm = (void *) (pu1_buf
+ + size);
+ size += sizeof(bitstrm_t);
+ size = ALIGN128(size);
+
+ /* nnz luma */
+ ps_codec->as_process[i].s_entropy.pu1_top_nnz_luma =
+ (void *) (pu1_buf + size);
+ size += (max_mb_cols * 4 * sizeof(UWORD8));
+ size = ALIGN128(size);
+
+ /* nnz chroma */
+ ps_codec->as_process[i].s_entropy.pu1_top_nnz_cbcr =
+ (void *) (pu1_buf + size);
+ size += (max_mb_cols * 4 * sizeof(UWORD8));
+ size = ALIGN128(size);
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_COEFF_DATA];
+ {
+ /* temp var */
+ WORD32 size = 0, size_of_row;
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* size of coeff data of 1 mb */
+ size += sizeof(tu_sblk_coeff_data_t) * MAX_4x4_SUBBLKS;
+
+ /* size of coeff data of 1 row of mb's */
+ size *= max_mb_cols;
+
+ /* align to avoid false sharing */
+ size = ALIGN64(size);
+ size_of_row = size;
+
+ /* size for one full frame */
+ size *= max_mb_rows;
+
+ ps_codec->u4_size_coeff_data = size_of_row;
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ ps_codec->as_process[i].pv_pic_mb_coeff_data = pu1_buf;
+ ps_codec->as_process[i].s_entropy.pv_pic_mb_coeff_data =
+ pu1_buf;
+ }
+ else
+ {
+ ps_codec->as_process[i].pv_pic_mb_coeff_data = pu1_buf + size;
+ ps_codec->as_process[i].s_entropy.pv_pic_mb_coeff_data = pu1_buf
+ + size;
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_HEADER_DATA];
+ {
+ /* temp var */
+ WORD32 size, size_of_row;
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* size of header data of 1 mb */
+ size = 40;
+
+ /* size for 1 row of mbs */
+ size = size * max_mb_cols;
+
+ /* align to avoid any false sharing across threads */
+ size = ALIGN64(size);
+ size_of_row = size;
+
+ /* size for one full frame */
+ size *= max_mb_rows;
+
+ ps_codec->u4_size_header_data = size_of_row;
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ ps_codec->as_process[i].pv_pic_mb_header_data = pu1_buf;
+ ps_codec->as_process[i].s_entropy.pv_pic_mb_header_data =
+ pu1_buf;
+ }
+ else
+ {
+ ps_codec->as_process[i].pv_pic_mb_header_data = pu1_buf + size;
+ ps_codec->as_process[i].s_entropy.pv_pic_mb_header_data =
+ pu1_buf + size;
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_MVBANK];
+ {
+ /* size of buf mgr struct */
+ WORD32 size = ih264_buf_mgr_size();
+
+ /* temp var */
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* mv buffer mgr */
+ ps_codec->pv_mv_buf_mgr_base = pu1_buf;
+
+ /* mv bank */
+ ps_codec->pv_mv_bank_buf_base = pu1_buf + size;
+ ps_codec->i4_total_mv_bank_size = ps_mem_rec->u4_mem_size - size;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_MVBITS];
+ {
+ /* max srch range x */
+ UWORD32 u4_srch_range_x = ps_ip->s_ive_ip.u4_max_srch_rng_x;
+
+ /* max srch range y */
+ UWORD32 u4_srch_range_y = ps_ip->s_ive_ip.u4_max_srch_rng_y;
+
+ /* max srch range */
+ UWORD32 u4_max_srch_range = MAX(u4_srch_range_x, u4_srch_range_y);
+
+ /* temp var */
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* due to subpel */
+ u4_max_srch_range <<= 2;
+
+// /* due to mv on either direction */
+// u4_max_srch_range = (u4_max_srch_range << 1);
+
+ /* due to pred mv + zero */
+ u4_max_srch_range = (u4_max_srch_range << 1) + 1;
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ /* me ctxt */
+ me_ctxt_t *ps_mem_ctxt = &(ps_codec->as_process[i].s_me_ctxt);
+
+ /* init at zero mv */
+ ps_mem_ctxt->pu1_mv_bits = pu1_buf + u4_max_srch_range;
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_SPS];
+ {
+ ps_codec->ps_sps_base = (sps_t *) ps_mem_rec->pv_base;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_PPS];
+ {
+ ps_codec->ps_pps_base = (pps_t *) ps_mem_rec->pv_base;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_SLICE_HDR];
+ {
+ ps_codec->ps_slice_hdr_base = ps_mem_rec->pv_base;
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ ps_codec->as_process[i].ps_slice_hdr_base = ps_mem_rec->pv_base;
+ }
+ else
+ {
+ /* temp var */
+ WORD32 size = MAX_SLICE_HDR_CNT * sizeof(slice_header_t);
+ void *pv_buf = (UWORD8 *) ps_mem_rec->pv_base + size;
+
+ ps_codec->as_process[i].ps_slice_hdr_base = pv_buf;
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_AIR_MAP];
+ {
+ /* temp var */
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ ps_codec->as_process[i].pu1_is_intra_coded = pu1_buf;
+ }
+ else
+ {
+ ps_codec->as_process[i].pu1_is_intra_coded = pu1_buf
+ + max_mb_cnt;
+ }
+ }
+
+ ps_codec->pu2_intr_rfrsh_map = (UWORD16 *) (pu1_buf + max_mb_cnt * 2);
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_SLICE_MAP];
+ {
+ /* pointer to storage space */
+ UWORD8 *pu1_buf_ping, *pu1_buf_pong;
+
+ /* init pointer */
+ pu1_buf_ping = ps_mem_rec->pv_base;
+ pu1_buf_pong = pu1_buf_ping + ALIGN64(max_mb_cnt);
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ ps_codec->as_process[i].pu1_slice_idx = pu1_buf_ping;
+ }
+ else
+ {
+ ps_codec->as_process[i].pu1_slice_idx = pu1_buf_pong;
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_THREAD_HANDLE];
+ {
+ WORD32 handle_size = ithread_get_handle_size();
+
+ for (i = 0; i < MAX_PROCESS_THREADS; i++)
+ {
+ ps_codec->apv_proc_thread_handle[i] = (UWORD8 *) ps_mem_rec->pv_base
+ + (i * handle_size);
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_CTL_MUTEX];
+ {
+ ps_codec->pv_ctl_mutex = ps_mem_rec->pv_base;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY_MUTEX];
+ {
+ ps_codec->pv_entropy_mutex = ps_mem_rec->pv_base;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_JOBQ];
+ {
+ ps_codec->pv_proc_jobq_buf = ps_mem_rec->pv_base;
+ ps_codec->i4_proc_jobq_buf_size = ps_mem_rec->u4_mem_size;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_ENTROPY_JOBQ];
+ {
+ ps_codec->pv_entropy_jobq_buf = ps_mem_rec->pv_base;
+ ps_codec->i4_entropy_jobq_buf_size = ps_mem_rec->u4_mem_size;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_MAP];
+ {
+ /* pointer to storage space */
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size in bytes to mb core coding status of an entire frame */
+ total_size = max_mb_cnt;
+
+ /* add an additional 1 row of bytes to evade the special case of row 0 */
+ total_size += max_mb_cols;
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ ps_codec->as_process[i].pu1_proc_map = pu1_buf + max_mb_cols;
+ }
+ else
+ {
+ ps_codec->as_process[i].pu1_proc_map = pu1_buf + total_size
+ + max_mb_cols;
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_DBLK_MAP];
+ {
+ /* pointer to storage space */
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size in bytes to mb core coding status of an entire frame */
+ total_size = max_mb_cnt;
+
+ /* add an additional 1 row of bytes to evade the special case of row 0 */
+ total_size += max_mb_cols;
+
+ /*Align the memory offsets*/
+ total_size = ALIGN64(total_size);
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ ps_codec->as_process[i].pu1_deblk_map = pu1_buf + max_mb_cols;
+
+ }
+ else
+ {
+ ps_codec->as_process[i].pu1_deblk_map = pu1_buf + total_size
+ + max_mb_cols;
+
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_ME_MAP];
+ {
+ /* pointer to storage space */
+ UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base;
+
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size in bytes to mb core coding status of an entire frame */
+ total_size = max_mb_cnt;
+
+ /* add an additional 1 row of bytes to evade the special case of row 0 */
+ total_size += max_mb_cols;
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ ps_codec->as_process[i].pu1_me_map = pu1_buf + max_mb_cols;
+ }
+ else
+ {
+ ps_codec->as_process[i].pu1_me_map = pu1_buf + total_size
+ + max_mb_cols;
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_DPB_MGR];
+ {
+ ps_codec->pv_dpb_mgr = ps_mem_rec->pv_base;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_PROC_SCRATCH];
+ {
+ /* pointer to storage space */
+ UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base;
+
+ /* size of pred buffer, fwd transform output, temp buffer for inv tra */
+ WORD32 size_pred_luma, size_pred_chroma, size_fwd, size_inv, size_hp;
+
+ /* temp var */
+ WORD32 size = 0;
+
+ /* size to hold intra/inter prediction buffer */
+ size_pred_luma = sizeof(UWORD8) * 16 * 16;
+ size_pred_chroma = sizeof(UWORD8) * 8 * 16;
+
+ /* size to hold fwd transform output */
+ size_fwd = sizeof(WORD16) * SIZE_TRANS_BUFF;
+
+ /* size to hold temporary data during inverse transform */
+ size_inv = sizeof(WORD32) * SIZE_TMP_BUFF_ITRANS;
+
+ /* size to hold half pel plane buffers */
+ size_hp = sizeof(UWORD8) * (HP_BUFF_WD * HP_BUFF_HT);
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ /* prediction buffer */
+ ps_codec->as_process[i].pu1_pred_mb = (void *) (pu1_buf + size);
+ ps_codec->as_process[i].i4_pred_strd = 16;
+ size += size_pred_luma;
+ size = ALIGN64(size);
+
+ /* prediction buffer */
+ ps_codec->as_process[i].pu1_ref_mb_intra_4x4 = (void *) (pu1_buf
+ + size);
+ size += size_pred_luma;
+ size = ALIGN64(size);
+
+ /* prediction buffer intra 16x16 */
+ ps_codec->as_process[i].pu1_pred_mb_intra_16x16 = (void *) (pu1_buf
+ + size);
+ size += size_pred_luma;
+ size = ALIGN64(size);
+
+ /* prediction buffer intra 16x16 plane*/
+ ps_codec->as_process[i].pu1_pred_mb_intra_16x16_plane =
+ (void *) (pu1_buf + size);
+ size += size_pred_luma;
+ size = ALIGN64(size);
+
+ /* prediction buffer intra chroma*/
+ ps_codec->as_process[i].pu1_pred_mb_intra_chroma = (void *) (pu1_buf
+ + size);
+ size += size_pred_chroma;
+ size = ALIGN64(size);
+
+ /* prediction buffer intra chroma plane*/
+ ps_codec->as_process[i].pu1_pred_mb_intra_chroma_plane =
+ (void *) (pu1_buf + size);
+ size += size_pred_chroma;
+ size = ALIGN64(size);
+
+ /* Fwd transform output */
+ ps_codec->as_process[i].pi2_res_buf = (void *) (pu1_buf + size);
+ ps_codec->as_process[i].i4_res_strd = 16;
+ size += size_fwd;
+ size = ALIGN64(size);
+
+ /* Fwd transform output */
+ ps_codec->as_process[i].pi2_res_buf_intra_4x4 = (void *) (pu1_buf
+ + size);
+ size += size_fwd;
+ size = ALIGN64(size);
+
+ /* scratch buffer used during inverse transform */
+ ps_codec->as_process[i].pv_scratch_buff = (void *) (pu1_buf + size);
+ size += size_inv;
+ size = ALIGN64(size);
+
+ /* Buffers for holding half_x , half_y and half_xy values */
+ ps_codec->as_process[i].pu1_half_x = (void *) (pu1_buf + size);
+ size += size_hp;
+ size = ALIGN64(size);
+
+ ps_codec->as_process[i].pu1_half_y = (void *) (pu1_buf + size);
+ size += size_hp;
+ size = ALIGN64(size);
+
+ ps_codec->as_process[i].pu1_half_xy = (void *) (pu1_buf + size);
+ size += size_hp;
+ size = ALIGN64(size);
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_QUANT_PARAM];
+ {
+ /* pointer to storage space */
+ UWORD8 *pu1_buf = (UWORD8 *) ps_mem_rec->pv_base;
+
+ /* size of qp, threshold matrix, fwd scaling list for one plane */
+ WORD32 size_quant_param, size_thres_mat, size_fwd_weight_mat,
+ size_satqd_weight_mat;
+
+ /* temp var */
+ WORD32 total_size = 0;
+
+ /* size of quantization parameter list of 1 plane */
+ size_quant_param = ALIGN64(sizeof(quant_params_t));
+
+ /* size of threshold matrix for quantization
+ * (assuming the transform_8x8_flag is disabled).
+ * for 1 plane */
+ size_thres_mat = ALIGN64(sizeof(WORD16) * 4 * 4);
+
+ /* size of forward weight matrix for quantization
+ * (assuming the transform_8x8_flag is disabled).
+ * for 1 plane */
+ size_fwd_weight_mat = ALIGN64(sizeof(WORD16) * 4 * 4);
+
+ /* size of SATQD matrix*/
+ size_satqd_weight_mat = ALIGN64(sizeof(UWORD16) * 9);
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ quant_params_t **ps_qp_params = ps_codec->as_process[i].ps_qp_params;
+
+ /* quantization param structure */
+ ps_qp_params[0] = (quant_params_t *) (pu1_buf + total_size);
+ total_size = total_size + size_quant_param;
+ ps_qp_params[1] = (quant_params_t *) (pu1_buf + total_size);
+ total_size = total_size + size_quant_param;
+ ps_qp_params[2] = (quant_params_t *) (pu1_buf + total_size);
+ total_size = total_size + size_quant_param;
+
+ /* threshold matrix for quantization */
+ ps_qp_params[0]->pu2_thres_mat = (void *) (pu1_buf + total_size);
+ total_size = total_size + size_thres_mat;
+ ps_qp_params[1]->pu2_thres_mat = (void *) (pu1_buf + total_size);
+ total_size = total_size + size_thres_mat;
+ ps_qp_params[2]->pu2_thres_mat = (void *) (pu1_buf + total_size);
+ total_size = total_size + size_thres_mat;
+
+ /* fwd weight matrix */
+ ps_qp_params[0]->pu2_weigh_mat = (void *) (pu1_buf + total_size);
+ total_size = total_size + size_fwd_weight_mat;
+ ps_qp_params[1]->pu2_weigh_mat = (void *) (pu1_buf + total_size);
+ total_size = total_size + size_fwd_weight_mat;
+ ps_qp_params[2]->pu2_weigh_mat = (void *) (pu1_buf + total_size);
+ total_size = total_size + size_fwd_weight_mat;
+
+ /* threshold matrix for SATQD */
+ ps_qp_params[0]->pu2_sad_thrsh = (void *) (pu1_buf + total_size);
+ total_size = total_size + size_satqd_weight_mat;
+ ps_qp_params[1]->pu2_sad_thrsh = (void *) (pu1_buf + total_size);
+ total_size = total_size + size_satqd_weight_mat;
+ ps_qp_params[2]->pu2_sad_thrsh = (void *) (pu1_buf + total_size);
+ total_size = total_size + size_satqd_weight_mat;
+
+ total_size = ALIGN128(total_size);
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_TOP_ROW_SYN_INFO];
+ {
+ /* total size of the mem record */
+ WORD32 total_size = 0, size_csbp, size_intra_modes, size_mv;
+
+ /* pointer to buffer */
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* size in bytes to store 1 row of mb_info_t */
+ /* one additional mb, to avoid checking end of row condition */
+ size_csbp = (max_mb_cols + 1) * sizeof(mb_info_t);
+
+ /* size in bytes to store 1 row of intra macroblock sub modes */
+ size_intra_modes = max_mb_cols * sizeof(UWORD8) * 16;
+
+ /* size in bytes to store 1 row + 1 of enc_pu_t */
+ /* one additional mb, to avoid checking end of row condition */
+ size_mv = (max_mb_cols + 1) * sizeof(enc_pu_t);
+
+ /* total size per proc ctxt */
+ total_size = size_csbp + size_intra_modes + size_mv;
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ ps_codec->as_process[i].ps_top_row_mb_syntax_ele_base =
+ (mb_info_t *) pu1_buf;
+ ps_codec->as_process[i].pu1_top_mb_intra_modes_base = pu1_buf
+ + size_csbp;
+ ps_codec->as_process[i].ps_top_row_pu_base =
+ (enc_pu_t *) (pu1_buf + size_csbp
+ + size_intra_modes);
+ }
+ else
+ {
+ ps_codec->as_process[i].ps_top_row_mb_syntax_ele_base =
+ (mb_info_t *) (pu1_buf + total_size);
+ ps_codec->as_process[i].pu1_top_mb_intra_modes_base = pu1_buf
+ + total_size + size_csbp;
+ ps_codec->as_process[i].ps_top_row_pu_base =
+ (enc_pu_t *) (pu1_buf + total_size + size_csbp
+ + size_intra_modes);
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_BS_QP];
+ {
+ UWORD8 *pu1_buf_ping, *pu1_buf_pong;
+
+ /* total size of the mem record */
+ WORD32 total_size = 0;
+
+ /* size in bytes to store vertical edge bs, horizontal edge bs and qp of every mb*/
+ WORD32 vert_bs_size, horz_bs_size, qp_size;
+
+ /* vertical edge bs = total number of vertical edges * number of bytes per each edge */
+ /* total num of v edges = total mb * 4 (assuming transform_8x8_flag = 0),
+ * each edge is formed by 4 pairs of subblks, requiring 4 bytes to storing bs */
+ vert_bs_size = ALIGN64(max_mb_cnt * 4 * 4);
+
+ /* horizontal edge bs = total number of horizontal edges * number of bytes per each edge */
+ /* total num of h edges = total mb * 4 (assuming transform_8x8_flag = 0),
+ * each edge is formed by 4 pairs of subblks, requiring 4 bytes to storing bs */
+ horz_bs_size = ALIGN64(max_mb_cnt * 4 * 4);
+
+ /* qp of each mb requires 1 byte */
+ qp_size = ALIGN64(max_mb_cnt);
+
+ /* total size */
+ total_size = vert_bs_size + horz_bs_size + qp_size;
+
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ if (i < MAX_PROCESS_CTXT / 2)
+ {
+ pu1_buf_ping = (UWORD8 *) ps_mem_rec->pv_base;
+
+ /* vertical edge bs storage space */
+ ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu4_pic_vert_bs =
+ (UWORD32 *) pu1_buf_ping;
+ pu1_buf_ping += vert_bs_size;
+
+ /* horizontal edge bs storage space */
+ ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu4_pic_horz_bs =
+ (UWORD32 *) pu1_buf_ping;
+ pu1_buf_ping += horz_bs_size;
+
+ /* qp */
+ ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp =
+ (UWORD8 *) pu1_buf_ping;
+ pu1_buf_ping += qp_size;
+ }
+ else
+ {
+ pu1_buf_pong = (UWORD8 *) ps_mem_rec->pv_base;
+ pu1_buf_pong += total_size;
+
+ /* vertical edge bs storage space */
+ ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu4_pic_vert_bs =
+ (UWORD32 *) pu1_buf_pong;
+ pu1_buf_pong += vert_bs_size;
+
+ /* horizontal edge bs storage space */
+ ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu4_pic_horz_bs =
+ (UWORD32 *) pu1_buf_pong;
+ pu1_buf_pong += horz_bs_size;
+
+ /* qp */
+ ps_codec->as_process[i].s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp =
+ (UWORD8 *) pu1_buf_pong;
+ pu1_buf_pong += qp_size;
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_INP_PIC];
+ {
+ ps_codec->pv_inp_buf_mgr_base = ps_mem_rec->pv_base;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_OUT];
+ {
+ ps_codec->pv_out_buf_mgr_base = ps_mem_rec->pv_base;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_CSC];
+ {
+ ps_codec->pu1_y_csc_buf_base = ps_mem_rec->pv_base;
+ ps_codec->pu1_uv_csc_buf_base = (UWORD8 *) ps_mem_rec->pv_base
+ + (max_ht_luma * max_wd_luma);
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_REF_PIC];
+ {
+ /* size of buf mgr struct */
+ WORD32 size = ih264_buf_mgr_size();
+
+ /* temp var */
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* pic buffer mgr */
+ ps_codec->pv_ref_buf_mgr_base = pu1_buf;
+
+ /* picture bank */
+ ps_codec->pv_pic_buf_base = pu1_buf + size;
+ ps_codec->i4_total_pic_buf_size = ps_mem_rec->u4_mem_size - size;
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_INFO_NMB];
+ {
+ /* temp var */
+ UWORD8 *pu1_buf = ps_mem_rec->pv_base;
+
+ /* size of nmb ctxt */
+ WORD32 size = MAX_NMB * sizeof(mb_info_nmb_t);
+
+ UWORD32 nmb_cntr, subpel_buf_size;
+
+ /* init nmb info structure pointer in all proc ctxts */
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ ps_codec->as_process[i].ps_nmb_info = (mb_info_nmb_t *) (pu1_buf);
+
+ pu1_buf += size;
+ }
+
+ subpel_buf_size = MB_SIZE * MB_SIZE * sizeof(UWORD8);
+
+ /* adjusting pointers for nmb halfpel buffer */
+ for (i = 0; i < MAX_PROCESS_CTXT; i++)
+ {
+ mb_info_nmb_t* ps_mb_info_nmb =
+ &ps_codec->as_process[i].ps_nmb_info[0];
+
+ for (nmb_cntr = 0; nmb_cntr < MAX_NMB; nmb_cntr++)
+ {
+ ps_mb_info_nmb[nmb_cntr].pu1_best_sub_pel_buf = pu1_buf;
+
+ pu1_buf = pu1_buf + subpel_buf_size;
+
+ ps_mb_info_nmb[nmb_cntr].u4_bst_spel_buf_strd = MB_SIZE;
+ }
+ }
+ }
+
+ ps_mem_rec = &ps_mem_rec_base[MEM_REC_RC];
+ {
+ ih264e_get_rate_control_mem_tab(&ps_codec->s_rate_control, ps_mem_rec,
+ USE_BASE);
+ }
+
+ /* init codec ctxt */
+ status = ih264e_init(ps_codec);
+
+ return status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Retrieves mem records passed to the codec
+*
+* @par Description:
+* Retrieves mem recs passed during init
+*
+* @param[in] ps_codec_obj
+* Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_retrieve_memrec(iv_obj_t *ps_codec_obj,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+ /* codec ctxt */
+ codec_t *ps_codec = (codec_t *) ps_codec_obj->pv_codec_handle;
+
+ /* ctrl call I/O structures */
+ ih264e_retrieve_mem_rec_ip_t *ps_ip = pv_api_ip;
+ ih264e_retrieve_mem_rec_op_t *ps_op = pv_api_op;
+
+ if (ps_codec->i4_init_done != 1)
+ {
+ ps_op->s_ive_op.u4_error_code |= 1 << IVE_FATALERROR;
+ ps_op->s_ive_op.u4_error_code |= IH264E_INIT_NOT_DONE;
+ return IV_FAIL;
+ }
+
+ /* join threads upon at end of sequence */
+ ih264e_join_threads(ps_codec);
+
+ /* collect list of memory records used by the encoder library */
+ memcpy(ps_ip->s_ive_ip.ps_mem_rec, ps_codec->ps_mem_rec_backup,
+ MEM_REC_CNT * (sizeof(iv_mem_rec_t)));
+ ps_op->s_ive_op.u4_num_mem_rec_filled = MEM_REC_CNT;
+
+ /* clean up mutex memory */
+ ih264_list_free(ps_codec->pv_entropy_jobq);
+ ih264_list_free(ps_codec->pv_proc_jobq);
+ ithread_mutex_destroy(ps_codec->pv_ctl_mutex);
+ ithread_mutex_destroy(ps_codec->pv_entropy_mutex);
+
+
+ ih264_buf_mgr_free((buf_mgr_t *)ps_codec->pv_mv_buf_mgr);
+ ih264_buf_mgr_free((buf_mgr_t *)ps_codec->pv_ref_buf_mgr);
+ ih264_buf_mgr_free((buf_mgr_t *)ps_codec->pv_inp_buf_mgr);
+ ih264_buf_mgr_free((buf_mgr_t *)ps_codec->pv_out_buf_mgr);
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets the encoder in flush mode.
+*
+* @par Description:
+* Sets the encoder in flush mode
+*
+* @param[in] ps_codec_obj
+* Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks This call has no real effect on encoder
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_set_flush_mode(iv_obj_t *ps_codec_obj,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+ UNUSED(pv_api_ip);
+ /* codec ctxt */
+ codec_t *ps_codec = (codec_t *) ps_codec_obj->pv_codec_handle;
+
+ /* ctrl call I/O structures */
+ ih264e_ctl_flush_op_t *ps_ctl_op = pv_api_op;
+
+ ps_ctl_op->s_ive_op.u4_error_code = 0;
+
+ /* signal flush frame control call */
+ ps_codec->i4_flush_mode = 1;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Gets encoder buffer requirements
+*
+* @par Description:
+* Gets the encoder buffer requirements. Basing on max width and max height
+* configuration settings, this routine, computes the sizes of necessary input,
+* output buffers returns this info to callee.
+*
+* @param[in] ps_codec_obj
+* Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_get_buf_info(iv_obj_t *ps_codec_obj,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+ UNUSED(ps_codec_obj);
+ /* ctrl call I/O structures */
+ ih264e_ctl_getbufinfo_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_getbufinfo_op_t *ps_op = pv_api_op;
+
+ /* temp var */
+ WORD32 wd = ALIGN16(ps_ip->s_ive_ip.u4_max_wd);
+ WORD32 ht = ALIGN16(ps_ip->s_ive_ip.u4_max_ht);
+ WORD32 i;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ /* Number of components in input buffers required for codec &
+ * Minimum sizes of each component in input buffer required */
+ if (ps_ip->s_ive_ip.e_inp_color_fmt == IV_YUV_420P)
+ {
+ ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_420_COMP;
+
+ ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht;
+ ps_op->s_ive_op.au4_min_in_buf_size[1] = (wd >> 1) * (ht >> 1);
+ ps_op->s_ive_op.au4_min_in_buf_size[2] = (wd >> 1) * (ht >> 1);
+ }
+ else if (ps_ip->s_ive_ip.e_inp_color_fmt == IV_YUV_422ILE)
+ {
+ ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_422ILE_COMP;
+
+ ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht * 2;
+ ps_op->s_ive_op.au4_min_in_buf_size[1] =
+ ps_op->s_ive_op.au4_min_in_buf_size[2] = 0;
+ }
+ else if (ps_ip->s_ive_ip.e_inp_color_fmt == IV_RGB_565)
+ {
+ ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_RGB565_COMP;
+
+ ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht * 2;
+ ps_op->s_ive_op.au4_min_in_buf_size[1] =
+ ps_op->s_ive_op.au4_min_in_buf_size[2] = 0;
+ }
+ else if (ps_ip->s_ive_ip.e_inp_color_fmt == IV_RGBA_8888)
+ {
+ ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_RGBA8888_COMP;
+
+ ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht * 4;
+ ps_op->s_ive_op.au4_min_in_buf_size[1] =
+ ps_op->s_ive_op.au4_min_in_buf_size[2] = 0;
+ }
+ else if ((ps_ip->s_ive_ip.e_inp_color_fmt == IV_YUV_420SP_UV)
+ || (ps_ip->s_ive_ip.e_inp_color_fmt == IV_YUV_420SP_VU))
+ {
+ ps_op->s_ive_op.u4_inp_comp_cnt = MIN_RAW_BUFS_420SP_COMP;
+
+ ps_op->s_ive_op.au4_min_in_buf_size[0] = wd * ht;
+ ps_op->s_ive_op.au4_min_in_buf_size[1] = wd * (ht >> 1);
+ ps_op->s_ive_op.au4_min_in_buf_size[2] = 0;
+ }
+
+ /* Number of components in output buffers required for codec &
+ * Minimum sizes of each component in output buffer required */
+ ps_op->s_ive_op.u4_out_comp_cnt = MIN_BITS_BUFS_COMP;
+
+ for (i = 0; i < (WORD32) ps_op->s_ive_op.u4_out_comp_cnt; i++)
+ {
+ ps_op->s_ive_op.au4_min_out_buf_size[i] = (wd * ht * 3) >> 1;
+ }
+
+ ps_op->s_ive_op.u4_min_inp_bufs = MIN_INP_BUFS;
+ ps_op->s_ive_op.u4_min_out_bufs = MIN_OUT_BUFS;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets the picture dimensions
+*
+* @par Description:
+* Sets width, height, display width, display height and strides
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_dimensions(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_dimensions_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_dimensions_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->u4_wd = ALIGN16(ps_ip->s_ive_ip.u4_wd);
+ ps_cfg->u4_ht = ALIGN16(ps_ip->s_ive_ip.u4_ht);
+ ps_cfg->u4_strd = ps_ip->s_ive_ip.u4_strd;
+ ps_cfg->i4_wd_mbs = ps_cfg->u4_wd >> 4;
+ ps_cfg->i4_ht_mbs = ps_cfg->u4_ht >> 4;
+ ps_cfg->u4_disp_wd = ps_ip->s_ive_ip.u4_wd;
+ ps_cfg->u4_disp_ht = ps_ip->s_ive_ip.u4_ht;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets source and target frame rates
+*
+* @par Description:
+* Sets source and target frame rates
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_frame_rate(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_frame_rate_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_frame_rate_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->u4_src_frame_rate = ps_ip->s_ive_ip.u4_src_frame_rate;
+ ps_cfg->u4_tgt_frame_rate = ps_ip->s_ive_ip.u4_tgt_frame_rate;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets target bit rate
+*
+* @par Description:
+* Sets target bit rate
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_bit_rate(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_bitrate_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_bitrate_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->u4_target_bitrate = ps_ip->s_ive_ip.u4_target_bitrate;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets frame type
+*
+* @par Description:
+* Sets frame type
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks not a sticky tag
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_frame_type(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_frame_type_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_frame_type_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->e_frame_type = ps_ip->s_ive_ip.e_frame_type;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets quantization params
+*
+* @par Description:
+* Sets the max, min and default qp for I frame, P frame and B frame
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_qp(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_qp_ip_t *ps_set_qp_ip = pv_api_ip;
+ ih264e_ctl_set_qp_op_t *ps_set_qp_op = pv_api_op;
+
+ ps_set_qp_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->u4_i_qp_max = ps_set_qp_ip->s_ive_ip.u4_i_qp_max;
+ ps_cfg->u4_i_qp_min = ps_set_qp_ip->s_ive_ip.u4_i_qp_min;
+ ps_cfg->u4_i_qp = ps_set_qp_ip->s_ive_ip.u4_i_qp;
+ ps_cfg->u4_p_qp_max = ps_set_qp_ip->s_ive_ip.u4_p_qp_max;
+ ps_cfg->u4_p_qp_min = ps_set_qp_ip->s_ive_ip.u4_p_qp_min;
+ ps_cfg->u4_p_qp = ps_set_qp_ip->s_ive_ip.u4_p_qp;
+ ps_cfg->u4_b_qp_max = ps_set_qp_ip->s_ive_ip.u4_b_qp_max;
+ ps_cfg->u4_b_qp_min = ps_set_qp_ip->s_ive_ip.u4_b_qp_min;
+ ps_cfg->u4_b_qp = ps_set_qp_ip->s_ive_ip.u4_b_qp;
+
+ ps_cfg->u4_timestamp_high = ps_set_qp_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_set_qp_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets encoding mode
+*
+* @par Description:
+* Sets encoding mode
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_enc_mode(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_enc_mode_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_enc_mode_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->e_enc_mode = ps_ip->s_ive_ip.e_enc_mode;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets vbv parameters
+*
+* @par Description:
+* Sets vbv parameters
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264e_set_vbv_params(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_vbv_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_vbv_params_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->u4_vbv_buf_size = ps_ip->s_ive_ip.u4_vbv_buf_size;
+ ps_cfg->u4_vbv_buffer_delay = ps_ip->s_ive_ip.u4_vbv_buffer_delay;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets AIR parameters
+*
+* @par Description:
+* Sets AIR parameters
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264_set_air_params(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_air_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_air_params_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->e_air_mode = ps_ip->s_ive_ip.e_air_mode;
+ ps_cfg->u4_air_refresh_period = ps_ip->s_ive_ip.u4_air_refresh_period;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets motion estimation parameters
+*
+* @par Description:
+* Sets motion estimation parameters
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264_set_me_params(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_me_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_me_params_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->u4_enable_hpel = ps_ip->s_ive_ip.u4_enable_hpel;
+ ps_cfg->u4_enable_qpel = ps_ip->s_ive_ip.u4_enable_qpel;
+ ps_cfg->u4_enable_fast_sad = ps_ip->s_ive_ip.u4_enable_fast_sad;
+ ps_cfg->u4_enable_alt_ref = ps_ip->s_ive_ip.u4_enable_alt_ref;
+ ps_cfg->u4_srch_rng_x = ps_ip->s_ive_ip.u4_srch_rng_x;
+ ps_cfg->u4_srch_rng_y = ps_ip->s_ive_ip.u4_srch_rng_y;
+ ps_cfg->u4_me_speed_preset = ps_ip->s_ive_ip.u4_me_speed_preset;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets Intra/Inter Prediction estimation parameters
+*
+* @par Description:
+* Sets Intra/Inter Prediction estimation parameters
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264_set_ipe_params(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_ipe_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_ipe_params_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->u4_enable_intra_4x4 = ps_ip->s_ive_ip.u4_enable_intra_4x4;
+ ps_cfg->u4_enc_speed_preset = ps_ip->s_ive_ip.u4_enc_speed_preset;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets GOP parameters
+*
+* @par Description:
+* Sets GOP parameters
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264_set_gop_params(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_gop_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_gop_params_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->u4_i_frm_interval = ps_ip->s_ive_ip.u4_i_frm_interval;
+ ps_cfg->u4_idr_frm_interval = ps_ip->s_ive_ip.u4_idr_frm_interval;
+ ps_cfg->u4_num_b_frames = ps_ip->s_ive_ip.u4_num_b_frames;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets profile parameters
+*
+* @par Description:
+* Sets profile parameters
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @param[out] ps_cfg
+* Pointer to config structure to be updated
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IV_STATUS_T ih264_set_profile_params(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_profile_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_profile_params_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->e_profile = ps_ip->s_ive_ip.e_profile;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets disable deblock level
+*
+* @par Description:
+* Sets disable deblock level. Level 0 means no disabling and level 4 means
+* disable completely. 1, 2, 3 are intermediate levels that control amount
+* of deblocking done.
+*
+* @param[in] ps_codec_obj
+* Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264_set_deblock_params(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_deblock_params_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_deblock_params_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->u4_disable_deblock_level = ps_ip->s_ive_ip.u4_disable_deblock_level;
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sets number of cores
+*
+* @par Description:
+* Sets number of cores
+*
+* @param[in] ps_codec_obj
+* Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks The number of encoder threads is limited to MAX_PROCESS_THREADS
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_set_num_cores(void *pv_api_ip,
+ void *pv_api_op,
+ cfg_params_t *ps_cfg)
+{
+ /* ctrl call I/O structures */
+ ih264e_ctl_set_num_cores_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_set_num_cores_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ ps_cfg->u4_num_cores = MIN(ps_ip->s_ive_ip.u4_num_cores, MAX_PROCESS_THREADS);
+
+ ps_cfg->u4_timestamp_high = ps_ip->s_ive_ip.u4_timestamp_high;
+ ps_cfg->u4_timestamp_low = ps_ip->s_ive_ip.u4_timestamp_low;
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Resets encoder state
+*
+* @par Description:
+* Resets encoder state by calling ih264e_init()
+*
+* @param[in] ps_codec_obj
+* Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_reset(iv_obj_t *ps_codec_obj,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+ UNUSED(pv_api_ip);
+ /* codec ctxt */
+ codec_t * ps_codec = (codec_t *) (ps_codec_obj->pv_codec_handle);
+
+ /* ctrl call I/O structures */
+ ih264e_ctl_reset_op_t *ps_op = pv_api_op;
+
+ ps_op->s_ive_op.u4_error_code = 0;
+
+ if (ps_codec != NULL)
+ {
+ ih264e_init(ps_codec);
+ }
+ else
+ {
+ ps_op->s_ive_op.u4_error_code = IH264E_INIT_NOT_DONE;
+ }
+
+ return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Codec control call
+*
+* @par Description:
+* Codec control call which in turn calls appropriate calls based on sub-command
+*
+* @param[in] ps_codec_obj
+* Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 ih264e_ctl(iv_obj_t *ps_codec_obj,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+ /* codec ctxt */
+ codec_t *ps_codec = (codec_t *) ps_codec_obj->pv_codec_handle;
+
+ /* ctrl call I/O structures */
+ ih264e_ctl_setdefault_ip_t *ps_ctl_ip = pv_api_ip;
+ ih264e_ctl_setdefault_op_t *ps_ctl_op = pv_api_op;
+
+ /* ctrl call sub cmd */
+ IVE_CONTROL_API_COMMAND_TYPE_T sub_cmd = ps_ctl_ip->s_ive_ip.e_sub_cmd;
+
+ /* error status */
+ IV_STATUS_T ret = 0;
+
+ /* temp var */
+ WORD32 i;
+ cfg_params_t *ps_cfg = NULL;
+
+ /* control call is for configuring encoding params, this is not to be called
+ * before a successful init call */
+ if (ps_codec->i4_init_done != 1)
+ {
+ ps_ctl_op->s_ive_op.u4_error_code |= 1 << IVE_FATALERROR;
+ ps_ctl_op->s_ive_op.u4_error_code |= IH264E_INIT_NOT_DONE;
+ return IV_FAIL;
+ }
+
+ /* make it thread safe */
+ ithread_mutex_lock(ps_codec->pv_ctl_mutex);
+
+ /* find a free config param set to hold current parameters */
+ for (i = 0; i < MAX_ACTIVE_CONFIG_PARAMS; i++)
+ {
+ if (0 == ps_codec->as_cfg[i].u4_is_valid)
+ {
+ ps_cfg = &ps_codec->as_cfg[i];
+ break;
+ }
+ }
+
+ /* If all are invalid, then start overwriting from the head config params */
+ if (NULL == ps_cfg)
+ {
+ ps_cfg = &ps_codec->as_cfg[0];
+ }
+
+ ps_cfg->u4_is_valid = 1;
+
+ ps_cfg->e_cmd = sub_cmd;
+
+ switch (sub_cmd)
+ {
+ case IVE_CMD_CTL_SET_DIMENSIONS:
+ ret = ih264e_set_dimensions(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_FRAMERATE:
+ ret = ih264e_set_frame_rate(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_BITRATE:
+ ret = ih264e_set_bit_rate(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_FRAMETYPE:
+ ret = ih264e_set_frame_type(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_QP:
+ ret = ih264e_set_qp(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_ENC_MODE:
+ ret = ih264e_set_enc_mode(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_VBV_PARAMS:
+ ret = ih264e_set_vbv_params(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_AIR_PARAMS:
+ ret = ih264_set_air_params(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_ME_PARAMS:
+ ret = ih264_set_me_params(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_IPE_PARAMS:
+ ret = ih264_set_ipe_params(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_GOP_PARAMS:
+ ret = ih264_set_gop_params(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_PROFILE_PARAMS:
+ ret = ih264_set_profile_params(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_SET_DEBLOCK_PARAMS:
+ ret = ih264_set_deblock_params(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ case IVE_CMD_CTL_RESET:
+
+ /* invalidate config param struct as it is being served right away */
+ ps_codec->as_cfg[i].u4_is_valid = 0;
+
+ ret = ih264e_reset(ps_codec_obj, pv_api_ip, pv_api_op);
+ break;
+
+ case IVE_CMD_CTL_SETDEFAULT:
+ {
+ /* ctrl call I/O structures */
+ ih264e_ctl_setdefault_op_t *ps_op = pv_api_op;
+
+ /* invalidate config param struct as it is being served right away */
+ ps_codec->as_cfg[i].u4_is_valid = 0;
+
+ /* error status */
+ ret = ih264e_set_default_params(ps_cfg);
+
+ ps_op->s_ive_op.u4_error_code = ret;
+
+ break;
+ }
+
+ case IVE_CMD_CTL_FLUSH:
+
+ /* invalidate config param struct as it is being served right away */
+ ps_codec->as_cfg[i].u4_is_valid = 0;
+
+ ret = ih264e_set_flush_mode(ps_codec_obj, pv_api_ip, pv_api_op);
+ break;
+
+ case IVE_CMD_CTL_GETBUFINFO:
+
+ /* invalidate config param struct as it is being served right away */
+ ps_codec->as_cfg[i].u4_is_valid = 0;
+
+ ret = ih264e_get_buf_info(ps_codec_obj, pv_api_ip, pv_api_op);
+ break;
+
+ case IVE_CMD_CTL_GETVERSION:
+ {
+ /* ctrl call I/O structures */
+ ih264e_ctl_getversioninfo_ip_t *ps_ip = pv_api_ip;
+ ih264e_ctl_getversioninfo_op_t *ps_op = pv_api_op;
+
+ /* invalidate config param struct as it is being served right away */
+ ps_codec->as_cfg[i].u4_is_valid = 0;
+
+ /* error status */
+ ps_op->s_ive_op.u4_error_code = IV_SUCCESS;
+
+ if (ps_ip->s_ive_ip.u4_version_bufsize <= 0)
+ {
+ ps_op->s_ive_op.u4_error_code =
+ IH264E_CXA_VERS_BUF_INSUFFICIENT;
+ ret = IV_FAIL;
+ }
+ else
+ {
+ ret = ih264e_get_version((CHAR *) ps_ip->s_ive_ip.pu1_version,
+ ps_ip->s_ive_ip.u4_version_bufsize);
+
+ if (ret != IV_SUCCESS)
+ {
+ ps_op->s_ive_op.u4_error_code =
+ IH264E_CXA_VERS_BUF_INSUFFICIENT;
+ ret = IV_FAIL;
+ }
+ }
+ break;
+ }
+
+ case IVE_CMD_CTL_SET_NUM_CORES:
+ ret = ih264e_set_num_cores(pv_api_ip, pv_api_op, ps_cfg);
+ break;
+
+ default:
+ /* invalidate config param struct as it is being served right away */
+ ps_codec->as_cfg[i].u4_is_valid = 0;
+
+ DEBUG("Warning !! unrecognized control api command \n");
+ break;
+ }
+
+ ithread_mutex_unlock(ps_codec->pv_ctl_mutex);
+
+ return ret;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Codec entry point function. All the function calls to the codec are done
+* using this function with different values specified in command
+*
+* @par Description:
+* Arguments are tested for validity and then based on the command
+* appropriate function is called
+*
+* @param[in] ps_handle
+* API level handle for codec
+*
+* @param[in] pv_api_ip
+* Input argument structure
+*
+* @param[out] pv_api_op
+* Output argument structure
+*
+* @returns error_status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IV_STATUS_T ih264e_api_function(iv_obj_t *ps_handle,
+ void *pv_api_ip,
+ void *pv_api_op)
+{
+ /* api command */
+ WORD32 command = IV_CMD_NA;
+
+ /* error status */
+ IV_STATUS_T e_status;
+ WORD32 ret;
+
+ /* tmp var */
+ WORD32 *pu4_ptr_cmd = (WORD32 *) pv_api_ip;
+
+ /* validate input / output structures */
+ e_status = api_check_struct_sanity(ps_handle, pv_api_ip, pv_api_op);
+
+ if (e_status != IV_SUCCESS)
+ {
+ DEBUG("error code = %d\n", *((UWORD32 *)pv_api_op + 1));
+ return IV_FAIL;
+ }
+
+ pu4_ptr_cmd++;
+
+ command = *pu4_ptr_cmd;
+
+ switch (command)
+ {
+ case IV_CMD_GET_NUM_MEM_REC:
+ ret = ih264e_get_num_rec(pv_api_ip, pv_api_op);
+ break;
+
+ case IV_CMD_FILL_NUM_MEM_REC:
+ ret = ih264e_fill_num_mem_rec(pv_api_ip, pv_api_op);
+ break;
+
+ case IV_CMD_INIT:
+ ret = ih264e_init_mem_rec(ps_handle, pv_api_ip, pv_api_op);
+ break;
+
+ case IV_CMD_RETRIEVE_MEMREC:
+ ret = ih264e_retrieve_memrec(ps_handle, pv_api_ip, pv_api_op);
+ break;
+
+ case IVE_CMD_VIDEO_CTL:
+ ret = ih264e_ctl(ps_handle, pv_api_ip, pv_api_op);
+ break;
+
+ case IVE_CMD_VIDEO_ENCODE:
+ ret = ih264e_encode(ps_handle, pv_api_ip, pv_api_op);
+ break;
+
+ default:
+ ret = IV_FAIL;
+ break;
+ }
+
+ return (IV_STATUS_T) ret;
+}
diff --git a/encoder/ih264e_bitstream.c b/encoder/ih264e_bitstream.c
new file mode 100755
index 0000000..e5bfbe4
--- /dev/null
+++ b/encoder/ih264e_bitstream.c
@@ -0,0 +1,472 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_bitstream.c
+*
+* @brief
+* This file contains function definitions related to bitstream generation
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_bitstrm_init()
+* - ih264e_put_bits()
+* - ih264e_put_bit()
+* - ih264e_put_rbsp_trailing_bits()
+* - ih264e_put_uev()
+* - ih264e_put_sev()
+* - ih264e_put_nal_start_code_prefix()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "ih264_platform_macros.h"
+#include "ih264_debug.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ih264_defs.h"
+#include "ih264_macros.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief Initializes the encoder bitstream engine
+*
+* @par Description
+* This routine needs to be called at start of slice/frame encode
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] p1_bitstrm_buf
+* bitstream buffer pointer where the encoded stream is generated in byte order
+*
+* @param[in] u4_max_bitstrm_size
+* indicates maximum bitstream buffer size. (in bytes)
+* If actual stream size exceeds the maximum size, encoder should
+* 1. Not corrupt data beyond u4_max_bitstrm_size bytes
+* 2. Report an error back to application indicating overflow
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_bitstrm_init(bitstrm_t *ps_bitstrm,
+ UWORD8 *pu1_bitstrm_buf,
+ UWORD32 u4_max_bitstrm_size)
+{
+ ps_bitstrm->pu1_strm_buffer = pu1_bitstrm_buf;
+ ps_bitstrm->u4_max_strm_size = u4_max_bitstrm_size;
+
+ /* Default init values for other members of bitstream context */
+ ps_bitstrm->u4_strm_buf_offset = 0;
+ ps_bitstrm->u4_cur_word = 0;
+ ps_bitstrm->i4_bits_left_in_cw = WORD_SIZE;
+ ps_bitstrm->i4_zero_bytes_run = 0;
+
+ return(IH264E_SUCCESS);
+}
+
+/**
+******************************************************************************
+*
+* @brief puts a code with specified number of bits into the bitstream
+*
+* @par Description
+* inserts code_len number of bits from lsb of code_val into the
+* bitstream. updates context members like u4_cur_word, u4_strm_buf_offset and
+* i4_bits_left_in_cw. If the total words (u4_strm_buf_offset) exceeds max
+* available size (u4_max_strm_size), returns error without corrupting data
+* beyond it
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] u4_code_val
+* code value that needs to be inserted in the stream.
+*
+* @param[in] code_len
+* indicates code length (in bits) of code_val that would be inserted in
+* bitstream buffer size. Range of length[1:WORD_SIZE]
+*
+* @remarks Assumptions: all bits from bit position code_len to msb of
+* code_val shall be zero
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_bits(bitstrm_t *ps_bitstrm,
+ UWORD32 u4_code_val,
+ WORD32 code_len)
+{
+ UWORD32 u4_cur_word = ps_bitstrm->u4_cur_word;
+ WORD32 bits_left_in_cw = ps_bitstrm->i4_bits_left_in_cw;
+
+
+ /* check assumptions made in the module */
+ ASSERT(code_len > 0 && code_len <= WORD_SIZE);
+
+ if(code_len < WORD_SIZE)
+ ASSERT((u4_code_val >> code_len) == 0);
+
+
+ /* sanity check on the bitstream engine state */
+ ASSERT(bits_left_in_cw > 0 && bits_left_in_cw <= WORD_SIZE);
+
+ ASSERT(ps_bitstrm->i4_zero_bytes_run <= EPB_ZERO_BYTES);
+
+ ASSERT(ps_bitstrm->pu1_strm_buffer != NULL);
+
+
+ if(bits_left_in_cw > code_len)
+ {
+ /*******************************************************************/
+ /* insert the code in local bitstream word and return */
+ /* code is inserted in position of bits left (post decrement) */
+ /*******************************************************************/
+ bits_left_in_cw -= code_len;
+ u4_cur_word |= (u4_code_val << bits_left_in_cw);
+
+ ps_bitstrm->u4_cur_word = u4_cur_word;
+ ps_bitstrm->i4_bits_left_in_cw = bits_left_in_cw;
+
+ return(IH264E_SUCCESS);
+ }
+ else
+ {
+ /********************************************************************/
+ /* 1. insert partial code corresponding to bits left in cur word */
+ /* 2. flush all the bits of cur word to bitstream */
+ /* 3. insert emulation prevention bytes while flushing the bits */
+ /* 4. insert remaining bits of code starting from msb of cur word */
+ /* 5. update bitsleft in current word and stream buffer offset */
+ /********************************************************************/
+ UWORD32 u4_strm_buf_offset = ps_bitstrm->u4_strm_buf_offset;
+
+ UWORD32 u4_max_strm_size = ps_bitstrm->u4_max_strm_size;
+
+ WORD32 zero_run = ps_bitstrm->i4_zero_bytes_run;
+
+ UWORD8* pu1_strm_buf = ps_bitstrm->pu1_strm_buffer;
+
+ WORD32 i, rem_bits = (code_len - bits_left_in_cw);
+
+
+ /*********************************************************************/
+ /* Bitstream overflow check */
+ /* NOTE: corner case of epb bytes (max 2 for 32bit word) not handled */
+ /*********************************************************************/
+ if((u4_strm_buf_offset + (WORD_SIZE>>3)) >= u4_max_strm_size)
+ {
+ /* return without corrupting the buffer beyond its size */
+ return(IH264E_BITSTREAM_BUFFER_OVERFLOW);
+ }
+
+ /* insert parital code corresponding to bits left in cur word */
+ u4_cur_word |= u4_code_val >> rem_bits;
+
+ for(i = WORD_SIZE; i > 0; i -= 8)
+ {
+ /* flush the bits in cur word byte by byte and copy to stream */
+ UWORD8 u1_next_byte = (u4_cur_word >> (i-8)) & 0xFF;
+
+ PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_next_byte, zero_run);
+ }
+
+ /* insert the remaining bits from code val into current word */
+ u4_cur_word = rem_bits ? (u4_code_val << (WORD_SIZE - rem_bits)) : 0;
+
+ /* update the state variables and return success */
+ ps_bitstrm->u4_cur_word = u4_cur_word;
+ ps_bitstrm->i4_bits_left_in_cw = WORD_SIZE - rem_bits;
+ ps_bitstrm->i4_zero_bytes_run = zero_run;
+ ps_bitstrm->u4_strm_buf_offset = u4_strm_buf_offset;
+ return (IH264E_SUCCESS);
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief inserts a 1-bit code into the bitstream
+*
+* @par Description
+* inserts 1bit lsb of code_val into the bitstream
+* updates context members like u4_cur_word, u4_strm_buf_offset and
+* i4_bits_left_in_cw. If the total words (u4_strm_buf_offset) exceeds max
+* available size (u4_max_strm_size), returns error without corrupting data
+* beyond it
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] u4_code_val
+* code value that needs to be inserted in the stream.
+*
+* @remarks Assumptions: all bits from bit position 1 to msb of code_val
+* shall be zero
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_bit(bitstrm_t *ps_bitstrm, UWORD32 u4_code_val)
+{
+ /* call the put bits function for 1 bit and return */
+ return(ih264e_put_bits(ps_bitstrm, u4_code_val, 1));
+}
+
+/**
+******************************************************************************
+*
+* @brief inserts rbsp trailing bits at the end of stream buffer (NAL)
+*
+* @par Description
+* inserts rbsp trailing bits, updates context members like u4_cur_word and
+* i4_bits_left_in_cw and flushes the same in the bitstream buffer. If the
+* total words (u4_strm_buf_offset) exceeds max available size
+* (u4_max_strm_size), returns error without corrupting data beyond it
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_rbsp_trailing_bits(bitstrm_t *ps_bitstrm)
+{
+ WORD32 i;
+ UWORD32 u4_cur_word = ps_bitstrm->u4_cur_word;
+ WORD32 bits_left_in_cw = ps_bitstrm->i4_bits_left_in_cw;
+ WORD32 bytes_left_in_cw = (bits_left_in_cw - 1) >> 3;
+
+ UWORD32 u4_strm_buf_offset = ps_bitstrm->u4_strm_buf_offset;
+ UWORD32 u4_max_strm_size = ps_bitstrm->u4_max_strm_size;
+ WORD32 zero_run = ps_bitstrm->i4_zero_bytes_run;
+ UWORD8* pu1_strm_buf = ps_bitstrm->pu1_strm_buffer;
+
+ /*********************************************************************/
+ /* Bitstream overflow check */
+ /* NOTE: corner case of epb bytes (max 2 for 32bit word) not handled */
+ /*********************************************************************/
+ if((u4_strm_buf_offset + (WORD_SIZE>>3) - bytes_left_in_cw) >=
+ u4_max_strm_size)
+ {
+ /* return without corrupting the buffer beyond its size */
+ return(IH264E_BITSTREAM_BUFFER_OVERFLOW);
+ }
+
+ /* insert a 1 at the end of current word and flush all the bits */
+ u4_cur_word |= (1 << (bits_left_in_cw - 1));
+
+ /* get the bits to be inserted in msbdb of the word */
+ //u4_cur_word <<= (WORD_SIZE - bytes_left_in_cw + 1);
+
+ for(i = WORD_SIZE; i > (bytes_left_in_cw*8); i -= 8)
+ {
+ /* flush the bits in cur word byte by byte and copy to stream */
+ UWORD8 u1_next_byte = (u4_cur_word >> (i-8)) & 0xFF;
+
+ PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_next_byte, zero_run);
+ }
+
+ /* update the stream offset */
+ ps_bitstrm->u4_strm_buf_offset = u4_strm_buf_offset;
+
+ /* Default init values for scratch variables of bitstream context */
+ ps_bitstrm->u4_cur_word = 0;
+ ps_bitstrm->i4_bits_left_in_cw = WORD_SIZE;
+ ps_bitstrm->i4_zero_bytes_run = 0;
+
+ return (IH264E_SUCCESS);
+}
+
+/**
+******************************************************************************
+*
+* @brief puts exponential golomb code of a unsigned integer into bitstream
+*
+* @par Description
+* computes uev code for given syntax element and inserts the same into
+* bitstream by calling ih264e_put_bits() interface.
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] u4_code_num
+* unsigned integer input whose golomb code is written in stream
+*
+* @remarks Assumptions: code value can be represented in less than 16bits
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_uev(bitstrm_t *ps_bitstrm, UWORD32 u4_code_num)
+{
+ UWORD32 u4_bit_str, u4_range;
+ IH264E_ERROR_T e_error;
+
+ /* convert the codenum to exp-golomb bit code: Table 9-2 JCTVC-J1003_d7 */
+ u4_bit_str = u4_code_num + 1;
+
+ /* get range of the bit string and put using put_bits() */
+ GETRANGE(u4_range, u4_bit_str);
+
+ e_error = ih264e_put_bits(ps_bitstrm, u4_bit_str, (2 * u4_range - 1));
+
+ return(e_error);
+}
+
+/**
+******************************************************************************
+*
+* @brief puts exponential golomb code of a signed integer into bitstream
+*
+* @par Description
+* computes sev code for given syntax element and inserts the same into
+* bitstream by calling ih264e_put_bits() interface.
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] syntax_elem
+* signed integer input whose golomb code is written in stream
+*
+* @remarks Assumptions: code value can be represented in less than 16bits
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_sev(bitstrm_t *ps_bitstrm, WORD32 syntax_elem)
+{
+ UWORD32 u4_code_num, u4_bit_str, u4_range;
+ IH264E_ERROR_T e_error;
+
+ /************************************************************************/
+ /* convert the codenum to exp-golomb bit code for signed syntax element */
+ /* See Table9-2 and Table 9-3 of standard JCTVC-J1003_d7 */
+ /************************************************************************/
+ if(syntax_elem <= 0)
+ {
+ /* codeNum for non-positive integer = 2*abs(x) : Table9-3 */
+ u4_code_num = ((-syntax_elem) << 1);
+ }
+ else
+ {
+ /* codeNum for positive integer = 2x-1 : Table9-3 */
+ u4_code_num = (syntax_elem << 1) - 1;
+ }
+
+ /* convert the codenum to exp-golomb bit code: Table 9-2 JCTVC-J1003_d7 */
+ u4_bit_str = u4_code_num + 1;
+
+ /* get range of the bit string and put using put_bits() */
+ GETRANGE(u4_range, u4_bit_str);
+
+ e_error = ih264e_put_bits(ps_bitstrm, u4_bit_str, (2 * u4_range - 1));
+
+ return(e_error);
+}
+
+/**
+******************************************************************************
+*
+* @brief insert NAL start code prefix (0x000001) into bitstream with an option
+* of inserting leading_zero_8bits (which makes startcode prefix as 0x00000001)
+*
+* @par Description
+* Although start code prefix could have been put by calling ih264e_put_bits(),
+* ih264e_put_nal_start_code_prefix() is specially added to make sure emulation
+* prevention insertion is not done for the NAL start code prefix which will
+* surely happen otherwise by calling ih264e_put_bits() interface.
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] insert_leading_zero_8bits
+* flag indicating if one more zero bytes needs to prefixed before start code
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_nal_start_code_prefix(bitstrm_t *ps_bitstrm,
+ WORD32 insert_leading_zero_8bits)
+{
+ UWORD32 u4_strm_buf_offset = ps_bitstrm->u4_strm_buf_offset;
+ UWORD8* pu1_strm_buf = ps_bitstrm->pu1_strm_buffer;
+
+ /* Bitstream buffer overflow check assuming worst case of 4 bytes */
+ if((u4_strm_buf_offset + 4) >= ps_bitstrm->u4_max_strm_size)
+ {
+ return(IH264E_BITSTREAM_BUFFER_OVERFLOW);
+ }
+
+ /* Insert leading zero 8 bits conditionally */
+ if(insert_leading_zero_8bits)
+ {
+ pu1_strm_buf[u4_strm_buf_offset] = 0x00;
+ u4_strm_buf_offset++;
+ }
+
+ /* Insert NAL start code prefix 0x00 00 01 */
+ pu1_strm_buf[u4_strm_buf_offset] = 0x00;
+ u4_strm_buf_offset++;
+
+ pu1_strm_buf[u4_strm_buf_offset] = 0x00;
+ u4_strm_buf_offset++;
+
+ pu1_strm_buf[u4_strm_buf_offset] = 0x01;
+ u4_strm_buf_offset++;
+
+ /* update the stream offset */
+ ps_bitstrm->u4_strm_buf_offset = u4_strm_buf_offset;
+
+ return (IH264E_SUCCESS);
+}
+
diff --git a/encoder/ih264e_bitstream.h b/encoder/ih264e_bitstream.h
new file mode 100755
index 0000000..21360cc
--- /dev/null
+++ b/encoder/ih264e_bitstream.h
@@ -0,0 +1,401 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_bitstream.h
+*
+* @brief
+* This file contains encoder bitstream engine related structures and
+* interface prototypes
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_BITSTREAM_H_
+#define IH264E_BITSTREAM_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief defines the maximum number of bits in a bitstream word
+******************************************************************************
+ */
+#define WORD_SIZE 32
+
+/**
+******************************************************************************
+ * @brief The number of consecutive zero bytes for emulation prevention check
+******************************************************************************
+ */
+#define EPB_ZERO_BYTES 2
+
+/**
+******************************************************************************
+ * @brief Emulation prevention insertion byte
+******************************************************************************
+ */
+#define EPB_BYTE 0x03
+
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Macro to check if emulation prevention byte insertion is required
+******************************************************************************
+ */
+#define INSERT_EPB(zero_run, next_byte) \
+ ((zero_run) == EPB_ZERO_BYTES) && (0 == ((next_byte) & 0xFC))
+
+/**
+******************************************************************************
+ * @brief returns the bit position of a leading 1 (msb) in a code value
+******************************************************************************
+ */
+#if !MSVC
+#define GETRANGE(r,value) \
+{ \
+ r = 0; \
+ if(0 == value) \
+ r = 1; \
+ else \
+ { \
+ r = 32-CLZ(value); \
+ }\
+}
+#else
+#define GETRANGE(r,value) \
+{ \
+ unsigned long msb_one_bit = 0; \
+ r = _BitScanReverse(&msb_one_bit, value) ? (UWORD32)(msb_one_bit + 1) : 1 ; \
+}
+#endif
+
+/**
+******************************************************************************
+ * @brief returns bits required to code a value
+******************************************************************************
+ */
+#define UE_LENGTH(bits,x) \
+{ \
+ UWORD32 r_bit; \
+ GETRANGE(r_bit,x+1) \
+ bits =(((r_bit - 1) << 1)+1); \
+} \
+
+/**
+******************************************************************************
+ * @brief Inserts 1 byte and Emulation Prevention Byte(if any) into bitstream
+ * Increments the stream offset and zero run correspondingly
+******************************************************************************
+ */
+#define PUTBYTE_EPB(ptr,off,byte,zero_run) \
+{ \
+ if( INSERT_EPB(zero_run, byte) ) \
+ { \
+ ptr[off] = EPB_BYTE; \
+ off++; \
+ zero_run = 0; \
+ } \
+ \
+ ptr[off] = byte; \
+ off++; \
+ zero_run = byte ? 0 : zero_run+1; \
+} \
+
+/**
+******************************************************************************
+ * @brief Ensures Byte alignment of the slice header
+******************************************************************************
+ */
+#define BYTE_ALIGNMENT(ps_bitstrm) ih264e_put_rbsp_trailing_bits(ps_bitstrm)
+
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Bitstream context for encoder
+******************************************************************************
+ */
+typedef struct bitstrm
+{
+ /** points to start of stream buffer. */
+ UWORD8 *pu1_strm_buffer;
+
+ /**
+ * max bitstream size (in bytes).
+ * Encoded stream shall not exceed this size.
+ */
+ UWORD32 u4_max_strm_size;
+
+ /**
+ * byte offset (w.r.t pu1_strm_buffer) where next byte would be written
+ * Bitstream engine makes sure it would not corrupt data beyond
+ * u4_max_strm_size bytes
+ */
+ UWORD32 u4_strm_buf_offset;
+
+ /**
+ * current bitstream word; It is a scratch word containing max of
+ * WORD_SIZE bits. Will be copied to stream buffer when the word is
+ * full
+ */
+ UWORD32 u4_cur_word;
+
+ /**
+ * signifies number of bits available in u4_cur_word
+ * bits from msb to i4_bits_left_in_cw of u4_cur_word have already been
+ * inserted next bits would be inserted from pos [i4_bits_left_in_cw-1]
+ * Range of this variable [1 : WORD_SIZE]
+ */
+ WORD32 i4_bits_left_in_cw;
+
+ /**
+ * signifies the number of consecutive zero bytes propogated from previous
+ * word. It is used for emulation prevention byte insertion in the stream
+ */
+ WORD32 i4_zero_bytes_run;
+
+} bitstrm_t;
+
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief Initializes the encoder bitstream engine
+*
+* @par Description
+* This routine needs to be called at start of slice/frame encode
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] p1_bitstrm_buf
+* bitstream buffer pointer where the encoded stream is generated in byte order
+*
+* @param[in] u4_max_bitstrm_size
+* indicates maximum bitstream buffer size. (in bytes)
+* If actual stream size exceeds the maximum size, encoder should
+* 1. Not corrupt data beyond u4_max_bitstrm_size bytes
+* 2. Report an error back to application indicating overflow
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_bitstrm_init
+ (
+ bitstrm_t *ps_bitstrm,
+ UWORD8 *pu1_bitstrm_buf,
+ UWORD32 u4_max_bitstrm_size
+ );
+
+/**
+******************************************************************************
+*
+* @brief puts a code with specified number of bits into the bitstream
+*
+* @par Description
+* inserts code_len number of bits from lsb of code_val into the
+* bitstream. If the total bytes (u4_strm_buf_offset) exceeds max
+* available size (u4_max_strm_size), returns error without corrupting data
+* beyond it
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] u4_code_val
+* code value that needs to be inserted in the stream.
+*
+* @param[in] code_len
+* indicates code length (in bits) of code_val that would be inserted in
+* bitstream buffer size.
+*
+* @remarks Assumptions: all bits from bit position code_len to msb of
+* code_val shall be zero
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_bits
+ (
+ bitstrm_t *ps_bitstrm,
+ UWORD32 u4_code_val,
+ WORD32 code_len
+ );
+
+/**
+******************************************************************************
+*
+* @brief inserts a 1-bit code into the bitstream
+*
+* @par Description
+* inserts 1bit lsb of code_val into the bitstream
+* updates context members like u4_cur_word, u4_strm_buf_offset and
+* i4_bits_left_in_cw. If the total words (u4_strm_buf_offset) exceeds max
+* available size (u4_max_strm_size), returns error without corrupting data
+* beyond it
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] u4_code_val
+* code value that needs to be inserted in the stream.
+*
+* @remarks Assumptions: all bits from bit position 1 to msb of code_val
+* shall be zero
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_bit
+ (
+ bitstrm_t *ps_bitstrm,
+ UWORD32 u4_code_val
+ );
+
+/**
+******************************************************************************
+*
+* @brief inserts rbsp trailing bits at the end of stream buffer (NAL)
+*
+* @par Description
+* inserts rbsp trailing bits, updates context members like u4_cur_word and
+* i4_bits_left_in_cw and flushes the same in the bitstream buffer. If the
+* total words (u4_strm_buf_offset) exceeds max available size
+* (u4_max_strm_size), returns error without corrupting data beyond it
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_rbsp_trailing_bits
+ (
+ bitstrm_t *ps_bitstrm
+ );
+
+/**
+******************************************************************************
+*
+* @brief puts exponential golomb code of a unsigned integer into bitstream
+*
+* @par Description
+* computes uev code for given syntax element and inserts the same into
+* bitstream by calling ih264e_put_bits() interface.
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] u4_code_num
+* unsigned integer input whose golomb code is written in stream
+*
+* @remarks Assumptions: code value can be represented in less than 16bits
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_uev
+ (
+ bitstrm_t *ps_bitstrm,
+ UWORD32 u4_code_num
+ );
+
+/**
+******************************************************************************
+*
+* @brief puts exponential golomb code of a signed integer into bitstream
+*
+* @par Description
+* computes sev code for given syntax element and inserts the same into
+* bitstream by calling ih264e_put_bits() interface.
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] syntax_elem
+* signed integer input whose golomb code is written in stream
+*
+* @remarks Assumptions: code value can be represented in less than 16bits
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_sev
+ (
+ bitstrm_t *ps_bitstrm,
+ WORD32 syntax_elem
+ );
+
+/**
+******************************************************************************
+*
+* @brief insert NAL start code prefix (0x000001) into bitstream with an option
+* of inserting leading_zero_8bits (which makes startcode prefix as 0x00000001)
+*
+* @par Description
+* Although start code prefix could have been put by calling ih264e_put_bits(),
+* ih264e_put_nal_start_code_prefix() is specially added to make sure emulation
+* prevention insertion is not done for the NAL start code prefix which will
+* surely happen otherwise by calling ih264e_put_bits() interface.
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] insert_leading_zero_8bits
+* flag indicating if one more zero bytes needs to prefixed before start code
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_put_nal_start_code_prefix
+ (
+ bitstrm_t *ps_bitstrm,
+ WORD32 insert_leading_zero_8bits
+ );
+
+#endif /* IH264E_BITSTREAM_H_ */
diff --git a/encoder/ih264e_cavlc.c b/encoder/ih264e_cavlc.c
new file mode 100755
index 0000000..1341dcd
--- /dev/null
+++ b/encoder/ih264e_cavlc.c
@@ -0,0 +1,1448 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_cavlc.c
+*
+* @brief
+* Contains all the routines to code syntax elements and residuals when entropy
+* coding chosen is CAVLC
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_compute_zeroruns_and_trailingones()
+* - ih264e_write_coeff4x4_cavlc()
+* - ih264e_write_coeff8x8_cavlc()
+* - ih264e_encode_residue()
+* - ih264e_write_islice_mb()
+* - ih264e_write_pslice_mb()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_debug.h"
+#include "ih264_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_encode_header.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264e_statistics.h"
+#include "ih264e_trace.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function computes run of zero, number of trailing ones and sign of
+* trailing ones basing on the significant coeff map, residual block and
+* total nnz.
+*
+* @param[in] pi2_res_block
+* Pointer to residual block containing levels in scan order
+*
+* @param[in] u4_total_coeff
+* Total non-zero coefficients in that sub block
+*
+* @param[in] pu1_zero_run
+* Pointer to array to store run of zeros
+*
+* @param[in] u4_sig_coeff_map
+* significant coefficient map
+*
+* @returns u4_totzero_sign_trailone
+* Bits 0-8 contains number of trailing ones.
+* Bits 8-16 contains bitwise sign information of trailing one
+* Bits 16-24 contains total number of zeros.
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+static UWORD32 ih264e_compute_zeroruns_and_trailingones(WORD16 *pi2_res_block,
+ UWORD32 u4_total_coeff,
+ UWORD8 *pu1_zero_run,
+ UWORD32 u4_sig_coeff_map)
+{
+ UWORD32 i = 0;
+ UWORD32 u4_nnz_coeff = 0;
+ WORD32 i4_run = -1;
+ UWORD32 u4_sign = 0;
+ UWORD32 u4_tot_zero = 0;
+ UWORD32 u4_trailing1 = 0;
+ WORD32 i4_val;
+ UWORD32 u4_totzero_sign_trailone;
+ UWORD32 *pu4_zero_run;
+
+ pu4_zero_run = (void *)pu1_zero_run;
+ pu4_zero_run[0] = 0;
+ pu4_zero_run[1] = 0;
+ pu4_zero_run[2] = 0;
+ pu4_zero_run[3] = 0;
+
+ /* Compute Runs of zeros for all nnz coefficients except the last 3 */
+ if (u4_total_coeff > 3)
+ {
+ for (i = 0; u4_nnz_coeff < (u4_total_coeff-3); i++)
+ {
+ i4_run++;
+
+ i4_val = (u4_sig_coeff_map & 0x1);
+ u4_sig_coeff_map >>= 1;
+
+ if (i4_val != 0)
+ {
+ pu1_zero_run[u4_nnz_coeff++] = i4_run;
+ i4_run = -1;
+ }
+ }
+ }
+
+ /* Compute T1's, Signof(T1's) and Runs of zeros for the last 3 */
+ while (u4_nnz_coeff != u4_total_coeff)
+ {
+ i4_run++;
+
+ i4_val = (u4_sig_coeff_map & 0x1);
+ u4_sig_coeff_map >>= 1;
+
+ if (i4_val != 0)
+ {
+ if (pi2_res_block[u4_nnz_coeff] == 1)
+ {
+ pu1_zero_run[u4_nnz_coeff] = i4_run;
+ u4_trailing1++;
+ }
+ else
+ {
+ if (pi2_res_block[u4_nnz_coeff] == -1)
+ {
+ pu1_zero_run[u4_nnz_coeff] = i4_run;
+ u4_sign |= 1 << u4_trailing1;
+ u4_trailing1++;
+ }
+ else
+ {
+ pu1_zero_run[u4_nnz_coeff] = i4_run;
+ u4_trailing1 = 0;
+ u4_sign = 0;
+ }
+ }
+ i4_run = -1;
+ u4_nnz_coeff++;
+ }
+ i++;
+ }
+
+ u4_tot_zero = i - u4_total_coeff;
+ u4_totzero_sign_trailone = (u4_tot_zero << 16)|(u4_sign << 8)|u4_trailing1;
+
+ return (u4_totzero_sign_trailone);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function generates CAVLC coded bit stream for the given residual block
+*
+* @param[in] pi2_res_block
+* Pointer to residual block containing levels in scan order
+*
+* @param[in] u4_total_coeff
+* Total non-zero coefficients in the sub block
+*
+* @param[in] u4_block_type
+* block type
+*
+* @param[in] pu1_zero_run
+* Pointer to array to store run of zeros
+*
+* @param[in] u4_nc
+* average of non zero coeff from top and left blocks (when available)
+*
+* @param[in, out] ps_bit_stream
+* structure pointing to a buffer holding output bit stream
+*
+* @param[in] u4_sig_coeff_map
+* significant coefficient map of the residual block
+*
+* @returns
+* error code
+*
+* @remarks
+* If the block type is CAVLC_CHROMA_4x4_DC, then u4_nc is non-significant
+*
+*******************************************************************************
+*/
+static IH264E_ERROR_T ih264e_write_coeff4x4_cavlc(WORD16 *pi2_res_block,
+ UWORD32 u4_total_coeff,
+ ENTROPY_BLK_TYPE u4_block_type,
+ UWORD8 *pu1_zero_run,
+ UWORD32 u4_nc,
+ bitstrm_t *ps_bit_stream,
+ UWORD32 u4_sig_coeff_map)
+{
+ IH264E_ERROR_T error_status = IH264E_SUCCESS;
+ UWORD32 u4_totzero_sign_trailone = 0;
+ UWORD32 u4_trailing_ones = 0;
+ UWORD32 u4_tot_zeros = 0;
+ UWORD32 u4_remaining_coeff = 0;
+ UWORD32 u4_sign1 = 0;
+ UWORD32 u4_max_num_coeff = 0;
+ const UWORD32 au4_max_num_nnz_coeff[] = {16, 15, 16, 4, 15};
+
+ /* validate inputs */
+ ASSERT(u4_block_type <= CAVLC_CHROMA_4x4_AC);
+
+ u4_max_num_coeff = au4_max_num_nnz_coeff[u4_block_type];
+
+ ASSERT(u4_total_coeff <= u4_max_num_coeff);
+
+ if (!u4_total_coeff)
+ {
+ UWORD32 u4_codeword = 15;
+ UWORD32 u4_codesize = 1;
+ if (u4_block_type == CAVLC_CHROMA_4x4_DC)
+ {
+ u4_codeword = 1;
+ u4_codesize = 2;
+ DEBUG("\n[%d numcoeff, %d numtrailing ones]",u4_total_coeff, 0);
+ ENTROPY_TRACE("\tnumber of non zero coeffs ",u4_total_coeff);
+ ENTROPY_TRACE("\tnumber of trailing ones ",0);
+ }
+ else
+ {
+ UWORD32 u4_vlcnum = u4_nc >> 1;
+
+ /* write coeff_token */
+ if (u4_vlcnum > 3)
+ {
+ /* Num-FLC */
+ u4_codeword = 3;
+ u4_codesize = 6;
+ }
+ else
+ {
+ /* Num-VLC 0, 1, 2 */
+ if (u4_vlcnum > 1)
+ {
+ u4_vlcnum = 2;
+ }
+ u4_codesize <<= u4_vlcnum;
+ u4_codeword >>= (4 - u4_codesize);
+ }
+
+ DEBUG("\n[%d numcoeff, %d numtrailing ones, %d nnz]",u4_total_coeff, 0, u4_nc);
+ ENTROPY_TRACE("\tnumber of non zero coeffs ",u4_total_coeff);
+ ENTROPY_TRACE("\tnC ",u4_nc);
+ }
+
+
+ DEBUG("\nCOEFF TOKEN 0: %d u4_codeword, %d u4_codesize",u4_codeword, u4_codesize);
+ ENTROPY_TRACE("\tcodeword ",u4_codeword);
+ ENTROPY_TRACE("\tcodesize ",u4_codesize);
+
+ error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize);
+
+ return error_status;
+ }
+ else
+ {
+ /* Compute zero run, number of trailing ones and their sign. */
+ u4_totzero_sign_trailone =
+ ih264e_compute_zeroruns_and_trailingones(pi2_res_block,
+ u4_total_coeff,
+ pu1_zero_run,
+ u4_sig_coeff_map);
+ u4_trailing_ones = u4_totzero_sign_trailone & 0xFF;
+ u4_sign1 = (u4_totzero_sign_trailone >> 8)& 0xFF;
+ u4_tot_zeros = (u4_totzero_sign_trailone >> 16) & 0xFF;
+ u4_remaining_coeff = u4_total_coeff - u4_trailing_ones;
+
+ /* write coeff_token */
+ {
+ UWORD32 u4_codeword;
+ UWORD32 u4_codesize;
+ if (u4_block_type == CAVLC_CHROMA_4x4_DC)
+ {
+ u4_codeword = gu1_code_coeff_token_table_chroma[u4_trailing_ones][u4_total_coeff-1];
+ u4_codesize = gu1_size_coeff_token_table_chroma[u4_trailing_ones][u4_total_coeff-1];
+
+ DEBUG("\n[%d numcoeff, %d numtrailing ones]",u4_total_coeff, u4_trailing_ones);
+ ENTROPY_TRACE("\tnumber of non zero coeffs ",u4_total_coeff);
+ ENTROPY_TRACE("\tnumber of trailing ones ",u4_trailing_ones);
+ }
+ else
+ {
+ UWORD32 u4_vlcnum = u4_nc >> 1;
+
+ if (u4_vlcnum > 3)
+ {
+ /* Num-FLC */
+ u4_codeword = ((u4_total_coeff-1) << 2 ) + u4_trailing_ones;
+ u4_codesize = 6;
+ }
+ else
+ {
+ /* Num-VLC 0, 1, 2 */
+ if (u4_vlcnum > 1)
+ {
+ u4_vlcnum = 2;
+ }
+ u4_codeword = gu1_code_coeff_token_table[u4_vlcnum][u4_trailing_ones][u4_total_coeff-1];
+ u4_codesize = gu1_size_coeff_token_table[u4_vlcnum][u4_trailing_ones][u4_total_coeff-1];
+ }
+
+ DEBUG("\n[%d numcoeff, %d numtrailing ones, %d nnz]",u4_total_coeff, u4_trailing_ones, u4_nc);
+ ENTROPY_TRACE("\tnumber of non zero coeffs ",u4_total_coeff);
+ ENTROPY_TRACE("\tnumber of trailing ones ",u4_trailing_ones);
+ ENTROPY_TRACE("\tnC ",u4_nc);
+ }
+
+ DEBUG("\nCOEFF TOKEN 0: %d u4_codeword, %d u4_codesize",u4_codeword, u4_codesize);
+ ENTROPY_TRACE("\tcodeword ",u4_codeword);
+ ENTROPY_TRACE("\tcodesize ",u4_codesize);
+
+ error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize);
+ }
+
+ /* write sign of trailing ones */
+ if (u4_trailing_ones)
+ {
+ DEBUG("\nT1's: %d u4_codeword, %d u4_codesize",u4_sign1, u4_trailing_ones);
+ error_status = ih264e_put_bits(ps_bit_stream, u4_sign1, u4_trailing_ones);
+ ENTROPY_TRACE("\tnumber of trailing ones ",u4_trailing_ones);
+ ENTROPY_TRACE("\tsign of trailing ones ",u4_sign1);
+ }
+
+ /* write level codes */
+ if (u4_remaining_coeff)
+ {
+ WORD32 i4_level = pi2_res_block[u4_remaining_coeff-1];
+ UWORD32 u4_escape;
+ UWORD32 u4_suffix_length = 0; // Level-VLC[N]
+ UWORD32 u4_abs_level, u4_abs_level_actual = 0;
+ WORD32 i4_sign;
+ const UWORD32 u4_rndfactor[] = {0, 0, 1, 3, 7, 15, 31};
+
+ DEBUG("\n \t%d coeff,",i4_level);
+ ENTROPY_TRACE("\tcoeff ",i4_level);
+
+ if (u4_trailing_ones < 3)
+ {
+ /* If there are less than 3 T1s, then the first non-T1 level is incremented if negative (decremented if positive)*/
+ if (i4_level < 0)
+ {
+ i4_level += 1;
+ }
+ else
+ {
+ i4_level -= 1;
+ }
+
+ u4_abs_level_actual = 1;
+
+ /* Initialize VLC table (Suffix Length) to encode the level */
+ if (u4_total_coeff > 10)
+ {
+ u4_suffix_length = 1;
+ }
+ }
+
+ i4_sign = (i4_level >> (sizeof(WORD32) * CHAR_BIT - 1));
+ u4_abs_level = ((i4_level + i4_sign) ^ i4_sign);
+
+ u4_abs_level_actual += u4_abs_level;
+
+ u4_escape = (u4_abs_level + u4_rndfactor[u4_suffix_length]) >> u4_suffix_length;
+
+ while (1)
+ {
+ UWORD32 u4_codesize;
+ UWORD32 u4_codeword;
+ UWORD32 u4_codeval;
+
+ u4_remaining_coeff--;
+
+GATHER_CAVLC_STATS1();
+
+ {
+ u4_codeval = u4_abs_level << 1;
+ u4_codeval = u4_codeval - 2 - i4_sign;
+
+ if ((!u4_suffix_length) && (u4_escape > 7) && (u4_abs_level < 16))
+ {
+ u4_codeword = (1 << 4) + (u4_codeval - 14);
+ u4_codesize = 19;
+ }
+ else if (u4_escape > 7)
+ {
+ u4_codeword = (1 << 12) + (u4_codeval - (15 << u4_suffix_length));
+ u4_codesize = 28;
+ if (!u4_suffix_length)
+ {
+ u4_codeword -= 15;
+ }
+ }
+ else
+ {
+ u4_codeword = (1 << u4_suffix_length) + (u4_codeval & ((1 << u4_suffix_length)-1));
+ u4_codesize = (u4_codeval >> u4_suffix_length) + 1 + u4_suffix_length;
+ }
+ }
+
+ /*put the level code in bitstream*/
+ DEBUG("\nLEVEL: %d u4_codeword, %d u4_codesize",u4_codeword, u4_codesize);
+ ENTROPY_TRACE("\tcodeword ",u4_codeword);
+ ENTROPY_TRACE("\tcodesize ",u4_codesize);
+ error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize);
+
+ if (u4_remaining_coeff == 0) break;
+
+ /*update suffix length for next level*/
+ if (u4_suffix_length == 0)
+ {
+ u4_suffix_length++;
+ }
+ if (u4_suffix_length < 6)
+ {
+ if (u4_abs_level_actual > gu1_threshold_vlc_level[u4_suffix_length])
+ {
+ u4_suffix_length++;
+ }
+ }
+
+ /* next level */
+ i4_level = pi2_res_block[u4_remaining_coeff-1];
+
+ DEBUG("\n \t%d coeff,",i4_level);
+ ENTROPY_TRACE("\tcoeff ",i4_level);
+
+ i4_sign = (i4_level >> (sizeof(WORD32) * CHAR_BIT - 1));
+ u4_abs_level = ((i4_level + i4_sign) ^ i4_sign);
+
+ u4_abs_level_actual = u4_abs_level;
+
+ u4_escape = (u4_abs_level + u4_rndfactor[u4_suffix_length]) >> u4_suffix_length;
+ }
+ }
+
+ DEBUG("\n \t %d totalzeros",u4_tot_zeros);
+ ENTROPY_TRACE("\ttotal zeros ",u4_tot_zeros);
+
+ /* Write Total Zeros */
+ if (u4_total_coeff < u4_max_num_coeff)
+ {
+ WORD32 index;
+ UWORD32 u4_codeword;
+ UWORD32 u4_codesize;
+
+ if (u4_block_type == CAVLC_CHROMA_4x4_DC)
+ {
+ UWORD8 gu1_index_zero_table_chroma[] = {0, 4, 7};
+ index = gu1_index_zero_table_chroma[u4_total_coeff-1] + u4_tot_zeros;
+ u4_codesize = gu1_size_zero_table_chroma[index];
+ u4_codeword = gu1_code_zero_table_chroma[index];
+ }
+ else
+ {
+ index = gu1_index_zero_table[u4_total_coeff-1] + u4_tot_zeros;
+ u4_codesize = gu1_size_zero_table[index];
+ u4_codeword = gu1_code_zero_table[index];
+ }
+
+ DEBUG("\nTOTAL ZEROS: %d u4_codeword, %d u4_codesize",u4_codeword, u4_codesize);
+ ENTROPY_TRACE("\tcodeword ",u4_codeword);
+ ENTROPY_TRACE("\tcodesize ",u4_codesize);
+ error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize);
+ }
+
+ /* Write Run Before */
+ if (u4_tot_zeros)
+ {
+ UWORD32 u4_max_num_coef = u4_total_coeff-1;
+ UWORD32 u4_codeword;
+ UWORD32 u4_codesize;
+ UWORD32 u4_zeros_left = u4_tot_zeros;
+
+ while (u4_max_num_coef)
+ {
+ UWORD32 u4_run_before = pu1_zero_run[u4_max_num_coef];
+ UWORD32 u4_index;
+
+ if (u4_zeros_left > MAX_ZERO_LEFT)
+ {
+ u4_index = gu1_index_run_table[MAX_ZERO_LEFT];
+ }
+ else
+ {
+ u4_index = gu1_index_run_table[u4_zeros_left - 1];
+ }
+
+ u4_codesize = gu1_size_run_table[u4_index + u4_run_before];
+ u4_codeword = gu1_code_run_table[u4_index + u4_run_before];
+
+ DEBUG("\nRUN BEFORE ZEROS: %d u4_codeword, %d u4_codesize",u4_codeword, u4_codesize);
+ ENTROPY_TRACE("\tcodeword ",u4_codeword);
+ ENTROPY_TRACE("\tcodesize ",u4_codesize);
+ error_status = ih264e_put_bits(ps_bit_stream, u4_codeword, u4_codesize);
+
+ u4_zeros_left -= u4_run_before;
+ if (!u4_zeros_left)
+ {
+ break;
+ }
+ u4_max_num_coef--;
+ }
+ }
+ }
+
+ return error_status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function generates CAVLC coded bit stream for the given subblock
+*
+* @param[in] ps_ent_ctxt
+* Pointer to entropy context
+*
+* @param[in] pi2_res_block
+* Pointers to residual blocks of all the partitions for the current subblk
+* (containing levels in scan order)
+*
+* @param[in] pu1_nnz
+* Total non-zero coefficients of all the partitions for the current subblk
+*
+* @param[in] pu2_sig_coeff_map
+* Significant coefficient map of all the partitions for the current subblk
+*
+* @param[in] u4_block_type
+* entropy coding block type
+*
+* @param[in] u4_ngbr_avbl
+* top and left availability of all the partitions for the current subblk
+* (packed)
+*
+* @param[in] pu1_top_nnz
+* pointer to the buffer containing nnz of all the subblks to the top
+*
+* @param[in] pu1_left_nnz
+* pointer to the buffer containing nnz of all the subblks to the left
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IH264E_ERROR_T ih264e_write_coeff8x8_cavlc(entropy_ctxt_t *ps_ent_ctxt,
+ WORD16 **pi2_res_block,
+ UWORD8 *pu1_nnz,
+ UWORD16 *pu2_sig_coeff_map,
+ ENTROPY_BLK_TYPE u4_block_type,
+ UWORD32 u4_ngbr_avlb,
+ UWORD8 *pu1_top_nnz,
+ UWORD8 *pu1_left_nnz)
+{
+ IH264E_ERROR_T error_status = IH264E_SUCCESS;
+ bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm;
+ UWORD8 *pu1_zero_run = ps_ent_ctxt->au1_zero_run, *pu1_ngbr_avbl;
+ UWORD32 u4_nC;
+ UWORD8 u1_mb_a, u1_mb_b;
+
+ pu1_ngbr_avbl = (void *)(&u4_ngbr_avlb);
+
+ /* encode ac block index 4x4 = 0*/
+ u1_mb_a = pu1_ngbr_avbl[0] & 0x0F;
+ u1_mb_b = pu1_ngbr_avbl[0] & 0xF0;
+ u4_nC = 0;
+ if (u1_mb_a)
+ u4_nC += pu1_left_nnz[0];
+ if (u1_mb_b)
+ u4_nC += pu1_top_nnz[0];
+ if (u1_mb_a && u1_mb_b)
+ u4_nC = (u4_nC + 1) >> 1;
+ pu1_left_nnz[0] = pu1_top_nnz[0] = pu1_nnz[0];
+ error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[0], pu1_nnz[0], u4_block_type, pu1_zero_run, u4_nC, ps_bitstream, pu2_sig_coeff_map[0]);
+
+ /* encode ac block index 4x4 = 1*/
+ u1_mb_a = pu1_ngbr_avbl[1] & 0x0F;
+ u1_mb_b = pu1_ngbr_avbl[1] & 0xF0;
+ u4_nC = 0;
+ if (u1_mb_a)
+ u4_nC += pu1_left_nnz[0];
+ if (u1_mb_b)
+ u4_nC += pu1_top_nnz[1];
+ if (u1_mb_a && u1_mb_b)
+ u4_nC = (u4_nC + 1) >> 1;
+ pu1_left_nnz[0] = pu1_top_nnz[1] = pu1_nnz[1];
+ error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[1], pu1_nnz[1], u4_block_type, pu1_zero_run, u4_nC, ps_bitstream, pu2_sig_coeff_map[1]);
+
+ /* encode ac block index 4x4 = 2*/
+ u1_mb_a = pu1_ngbr_avbl[2] & 0x0F;
+ u1_mb_b = pu1_ngbr_avbl[2] & 0xF0;
+ u4_nC = 0;
+ if (u1_mb_a)
+ u4_nC += pu1_left_nnz[1];
+ if (u1_mb_b)
+ u4_nC += pu1_top_nnz[0];
+ if (u1_mb_a && u1_mb_b)
+ u4_nC = (u4_nC + 1) >> 1;
+ pu1_left_nnz[1] = pu1_top_nnz[0] = pu1_nnz[2];
+ error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[2], pu1_nnz[2], u4_block_type, pu1_zero_run, u4_nC, ps_bitstream, pu2_sig_coeff_map[2]);
+
+ /* encode ac block index 4x4 = 0*/
+ u1_mb_a = pu1_ngbr_avbl[3] & 0x0F;
+ u1_mb_b = pu1_ngbr_avbl[3] & 0xF0;
+ u4_nC = 0;
+ if (u1_mb_a)
+ u4_nC += pu1_left_nnz[1];
+ if (u1_mb_b)
+ u4_nC += pu1_top_nnz[1];
+ if (u1_mb_a && u1_mb_b)
+ u4_nC = (u4_nC + 1) >> 1;
+ pu1_left_nnz[1] = pu1_top_nnz[1] = pu1_nnz[3];
+ error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[3], pu1_nnz[3], u4_block_type, pu1_zero_run, u4_nC, ps_bitstream, pu2_sig_coeff_map[3]);
+
+ return error_status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function encodes luma and chroma residues of a macro block when
+* the entropy coding mode chosen is cavlc.
+*
+* @param[in] ps_ent_ctxt
+* Pointer to entropy context
+*
+* @param[in] u4_mb_type
+* current mb type
+*
+* @param[in] u4_cbp
+* coded block pattern for the current mb
+*
+* @returns error code
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static IH264E_ERROR_T ih264e_encode_residue(entropy_ctxt_t *ps_ent_ctxt,
+ UWORD32 u4_mb_type,
+ UWORD32 u4_cbp)
+{
+ /* error status */
+ IH264E_ERROR_T error_status = IH264E_SUCCESS;
+
+ /* packed residue */
+ void *pv_mb_coeff_data = ps_ent_ctxt->pv_mb_coeff_data;
+
+ /* bit stream buffer */
+ bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm;
+
+ /* zero run */
+ UWORD8 *pu1_zero_run = ps_ent_ctxt->au1_zero_run;
+
+ /* temp var */
+ UWORD32 u4_nC, u4_ngbr_avlb;
+ UWORD8 au1_nnz[4], *pu1_ngbr_avlb, *pu1_top_nnz, *pu1_left_nnz;
+ UWORD16 au2_sig_coeff_map[4];
+ WORD16 *pi2_res_block[4];
+ UWORD8 *pu1_slice_idx = ps_ent_ctxt->pu1_slice_idx;
+ tu_sblk_coeff_data_t *ps_mb_coeff_data;
+ ENTROPY_BLK_TYPE e_entropy_blk_type = CAVLC_LUMA_4x4;
+
+ /* ngbr availability */
+ UWORD8 u1_mb_a, u1_mb_b;
+
+ /* cbp */
+ UWORD32 u4_cbp_luma = u4_cbp & 0xF, u4_cbp_chroma = u4_cbp >> 4;
+
+ /* mb indices */
+ WORD32 i4_mb_x, i4_mb_y;
+
+ /* derive neighbor availability */
+ i4_mb_x = ps_ent_ctxt->i4_mb_x;
+ i4_mb_y = ps_ent_ctxt->i4_mb_y;
+ pu1_slice_idx += (i4_mb_y * ps_ent_ctxt->i4_wd_mbs);
+ /* left macroblock availability */
+ u1_mb_a = (i4_mb_x == 0 ||
+ (pu1_slice_idx[i4_mb_x - 1 ] != pu1_slice_idx[i4_mb_x]))? 0 : 1;
+ /* top macroblock availability */
+ u1_mb_b = (i4_mb_y == 0 ||
+ (pu1_slice_idx[i4_mb_x-ps_ent_ctxt->i4_wd_mbs] != pu1_slice_idx[i4_mb_x]))? 0 : 1;
+
+ pu1_ngbr_avlb = (void *)(&u4_ngbr_avlb);
+ pu1_top_nnz = ps_ent_ctxt->pu1_top_nnz_luma[ps_ent_ctxt->i4_mb_x];
+ pu1_left_nnz = (UWORD8 *)&ps_ent_ctxt->u4_left_nnz_luma;
+
+ /* encode luma residue */
+
+ /* mb type intra 16x16 */
+ if (u4_mb_type == I16x16)
+ {
+ /* parse packed coeff data structure for residual data */
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+ /* estimate nnz for the current mb */
+ u4_nC = 0;
+ if (u1_mb_a)
+ u4_nC += pu1_left_nnz[0];
+ if (u1_mb_b)
+ u4_nC += pu1_top_nnz[0];
+ if (u1_mb_a && u1_mb_b)
+ u4_nC = (u4_nC + 1) >> 1;
+
+ /* encode dc block */
+ ENTROPY_TRACE("Luma DC blk idx %d",0);
+ error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[0], au1_nnz[0], CAVLC_LUMA_4x4_DC, pu1_zero_run, u4_nC, ps_bitstream, au2_sig_coeff_map[0]);
+
+ e_entropy_blk_type = CAVLC_LUMA_4x4_AC;
+ }
+
+ if (u4_cbp_luma & 1)
+ {
+ /* encode ac block index 8x8 = 0*/
+ /* parse packed coeff data structure for residual data */
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]);
+ /* derive sub block neighbor availability */
+
+ pu1_ngbr_avlb[0] = (u1_mb_b << 4) | (u1_mb_a);
+ pu1_ngbr_avlb[1] = (u1_mb_b << 4) | 1;
+ pu1_ngbr_avlb[2] = (1 << 4) | (u1_mb_a);
+ pu1_ngbr_avlb[3] = 0x11;
+ /* encode sub blk */
+ ENTROPY_TRACE("Luma blk idx %d",0);
+ error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, e_entropy_blk_type, u4_ngbr_avlb, pu1_top_nnz, pu1_left_nnz);
+ }
+ else
+ {
+ pu1_top_nnz[0] = pu1_top_nnz[1] = 0;
+ pu1_left_nnz[0] = pu1_left_nnz[1] = 0;
+ }
+
+ if (u4_cbp_luma & 2)
+ {
+ /* encode ac block index 8x8 = 1*/
+ /* parse packed coeff data structure for residual data */
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]);
+
+ /* derive sub block neighbor availability */
+ pu1_ngbr_avlb[1] = pu1_ngbr_avlb[0] = (u1_mb_b << 4) | 1;
+ pu1_ngbr_avlb[3] = pu1_ngbr_avlb[2] = 0x11;
+ /* encode sub blk */
+ ENTROPY_TRACE("Luma blk idx %d",1);
+ error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, e_entropy_blk_type, u4_ngbr_avlb, pu1_top_nnz+2, pu1_left_nnz);
+ }
+ else
+ {
+ (pu1_top_nnz + 2)[0] = (pu1_top_nnz + 2)[1] = 0;
+ pu1_left_nnz[0] = pu1_left_nnz[1] = 0;
+ }
+
+ if (u4_cbp_luma & 0x4)
+ {
+ /* encode ac block index 8x8 = 2*/
+ /* parse packed coeff data structure for residual data */
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]);
+
+ /* derive sub block neighbor availability */
+ pu1_ngbr_avlb[2] = pu1_ngbr_avlb[0] = (1 << 4) | u1_mb_a;
+ pu1_ngbr_avlb[1] = pu1_ngbr_avlb[3] = 0x11;
+ /* encode sub blk */
+ ENTROPY_TRACE("Luma blk idx %d",2);
+ error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, e_entropy_blk_type, u4_ngbr_avlb, pu1_top_nnz, (pu1_left_nnz+2));
+ }
+ else
+ {
+ pu1_top_nnz[0] = pu1_top_nnz[1] = 0;
+ (pu1_left_nnz + 2)[0] = (pu1_left_nnz + 2)[1] = 0;
+ }
+
+ if (u4_cbp_luma & 0x8)
+ {
+ /* encode ac block index 8x8 = 3*/
+ /* parse packed coeff data structure for residual data */
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]);
+
+ /* derive sub block neighbor availability */
+ u4_ngbr_avlb = 0x11111111;
+ /* encode sub blk */
+ ENTROPY_TRACE("Luma blk idx %d",3);
+ error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, e_entropy_blk_type, u4_ngbr_avlb, pu1_top_nnz+2, pu1_left_nnz+2);
+ }
+ else
+ {
+ (pu1_top_nnz + 2)[0] = (pu1_top_nnz + 2)[1] = 0;
+ (pu1_left_nnz + 2)[0] = (pu1_left_nnz + 2)[1] = 0;
+ }
+
+ /* encode chroma residue */
+ if (u4_cbp_chroma & 3)
+ {
+ /* parse packed coeff data structure for residual data */
+ /* cb, cr */
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+
+ /* encode dc block */
+ /* cb, cr */
+ ENTROPY_TRACE("Chroma DC blk idx %d",0);
+ error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[0], au1_nnz[0], CAVLC_CHROMA_4x4_DC, pu1_zero_run, 0, ps_bitstream, au2_sig_coeff_map[0]);
+ ENTROPY_TRACE("Chroma DC blk idx %d",1);
+ error_status = ih264e_write_coeff4x4_cavlc(pi2_res_block[1], au1_nnz[1], CAVLC_CHROMA_4x4_DC, pu1_zero_run, 0, ps_bitstream, au2_sig_coeff_map[1]);
+ }
+
+ pu1_top_nnz = ps_ent_ctxt->pu1_top_nnz_cbcr[ps_ent_ctxt->i4_mb_x];
+ pu1_left_nnz = (UWORD8 *) &ps_ent_ctxt->u4_left_nnz_cbcr;
+
+ /* encode sub blk */
+ if (u4_cbp_chroma & 0x2)
+ {
+ /* encode ac block index 8x8 = 0*/
+ /* derive sub block neighbor availability */
+ pu1_ngbr_avlb[0] = (u1_mb_b << 4) | (u1_mb_a);
+ pu1_ngbr_avlb[1] = (u1_mb_b << 4) | 1;
+ pu1_ngbr_avlb[2] = (1 << 4) | (u1_mb_a);
+ pu1_ngbr_avlb[3] = 0x11;
+
+ /* parse packed coeff data structure for residual data */
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]);
+
+ ENTROPY_TRACE("Chroma AC blk idx %d",0);
+ error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, CAVLC_CHROMA_4x4_AC, u4_ngbr_avlb, pu1_top_nnz, pu1_left_nnz);
+ }
+ else
+ {
+ pu1_top_nnz[0] = pu1_top_nnz[1] = 0;
+ pu1_left_nnz[0] = pu1_left_nnz[1] = 0;
+ }
+
+ pu1_top_nnz += 2;
+ pu1_left_nnz += 2;
+
+ /* encode sub blk */
+ if (u4_cbp_chroma & 0x2)
+ {
+ /* parse packed coeff data structure for residual data */
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[0], au2_sig_coeff_map[0], pi2_res_block[0]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[1], au2_sig_coeff_map[1], pi2_res_block[1]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[2], au2_sig_coeff_map[2], pi2_res_block[2]);
+ PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, au1_nnz[3], au2_sig_coeff_map[3], pi2_res_block[3]);
+
+ ENTROPY_TRACE("Chroma AC blk idx %d",1);
+ error_status = ih264e_write_coeff8x8_cavlc(ps_ent_ctxt, pi2_res_block, au1_nnz, au2_sig_coeff_map, CAVLC_CHROMA_4x4_AC, u4_ngbr_avlb, pu1_top_nnz, pu1_left_nnz);
+ }
+ else
+ {
+ pu1_top_nnz[0] = pu1_top_nnz[1] = 0;
+ pu1_left_nnz[0] = pu1_left_nnz[1] = 0;
+ }
+
+ /* store the index of the next mb coeff data */
+ ps_ent_ctxt->pv_mb_coeff_data = pv_mb_coeff_data;
+
+ return error_status;
+}
+
+#define GET_NUM_BITS(ps_bitstream) ((ps_bitstream->u4_strm_buf_offset << 3) + 32 - ps_bitstream->i4_bits_left_in_cw)
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function generates CAVLC coded bit stream for an Intra Slice.
+*
+* @description
+* The mb syntax layer for intra slices constitutes luma mb mode, luma sub modes
+* (if present), mb qp delta, coded block pattern, chroma mb mode and
+* luma/chroma residue. These syntax elements are written as directed by table
+* 7.3.5 of h264 specification.
+*
+* @param[in] ps_ent_ctxt
+* pointer to entropy context
+*
+* @returns error code
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_write_islice_mb(entropy_ctxt_t *ps_ent_ctxt)
+{
+ /* error status */
+ IH264E_ERROR_T error_status = IH264E_SUCCESS;
+
+ /* bit stream ptr */
+ bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm;
+
+ /* packed header data */
+ UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data;
+
+ /* mb header info */
+ /*
+ * mb_tpm : mb type plus mode
+ * mb_type : luma mb type and chroma mb type are packed
+ * cbp : coded block pattern
+ * mb_qp_delta : mb qp delta
+ * chroma_intra_mode : chroma intra mode
+ * luma_intra_mode : luma intra mode
+ */
+ WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode;
+ WORD8 mb_qp_delta;
+
+ /* temp var */
+ WORD32 i, mb_type_stream;
+
+ WORD32 bitstream_start_offset, bitstream_end_offset;
+
+ /* Starting bitstream offset for header in bits */
+ bitstream_start_offset = GET_NUM_BITS(ps_bitstream);
+
+
+ /********************************************************************/
+ /* BEGIN HEADER GENERATION */
+ /********************************************************************/
+
+ /* mb header info */
+ mb_tpm = *pu1_byte++;
+ cbp = *pu1_byte++;
+ mb_qp_delta = *pu1_byte++;
+
+ /* mb type */
+ mb_type = mb_tpm & 0xF;
+ /* is intra ? */
+ if (mb_type == I16x16)
+ {
+ UWORD32 u4_cbp_l, u4_cbp_c;
+
+ u4_cbp_c = (cbp >> 4);
+ u4_cbp_l = (cbp & 0xF);
+ luma_intra_mode = (mb_tpm >> 4) & 3;
+ chroma_intra_mode = (mb_tpm >> 6);
+
+ mb_type_stream = luma_intra_mode + 1 + (u4_cbp_c << 2) + (u4_cbp_l == 15) * 12;
+
+ /* write mb type */
+ PUT_BITS_UEV(ps_bitstream, mb_type_stream, error_status, "mb type");
+
+ /* intra_chroma_pred_mode */
+ PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+ }
+ else if (mb_type == I4x4)
+ {
+ /* mb sub blk modes */
+ WORD32 intra_pred_mode_flag, rem_intra_mode;
+ WORD32 byte;
+
+ chroma_intra_mode = (mb_tpm >> 6);
+
+ /* write mb type */
+ PUT_BITS_UEV(ps_bitstream, 0, error_status, "mb type");
+
+ for (i = 0; i < 16; i += 2)
+ {
+ /* sub blk idx 1 */
+ byte = *pu1_byte++;
+
+ intra_pred_mode_flag = byte & 0x1;
+
+ /* prev_intra4x4_pred_mode_flag */
+ PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+ /* rem_intra4x4_pred_mode */
+ if (!intra_pred_mode_flag)
+ {
+ rem_intra_mode = (byte & 0xF) >> 1;
+ PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+ }
+
+ /* sub blk idx 2 */
+ byte >>= 4;
+
+ intra_pred_mode_flag = byte & 0x1;
+
+ /* prev_intra4x4_pred_mode_flag */
+ PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+ /* rem_intra4x4_pred_mode */
+ if (!intra_pred_mode_flag)
+ {
+ rem_intra_mode = (byte & 0xF) >> 1;
+ PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+ }
+ }
+
+ /* intra_chroma_pred_mode */
+ PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+ }
+ else if (mb_type == I8x8)
+ {
+ /* transform 8x8 flag */
+ UWORD32 u4_transform_size_8x8_flag = ps_ent_ctxt->i1_transform_8x8_mode_flag;
+
+ /* mb sub blk modes */
+ WORD32 intra_pred_mode_flag, rem_intra_mode;
+ WORD32 byte;
+
+ chroma_intra_mode = (mb_tpm >> 6);
+
+ ASSERT(0);
+
+ /* write mb type */
+ PUT_BITS_UEV(ps_bitstream, 0, error_status, "mb type");
+
+ /* u4_transform_size_8x8_flag */
+ PUT_BITS(ps_bitstream, u4_transform_size_8x8_flag, 1, error_status, "u4_transform_size_8x8_flag");
+
+ /* write sub block modes */
+ for (i = 0; i < 4; i++)
+ {
+ /* sub blk idx 1 */
+ byte = *pu1_byte++;
+
+ intra_pred_mode_flag = byte & 0x1;
+
+ /* prev_intra4x4_pred_mode_flag */
+ PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+ /* rem_intra4x4_pred_mode */
+ if (!intra_pred_mode_flag)
+ {
+ rem_intra_mode = (byte & 0xF) >> 1;
+ PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+ }
+
+ /* sub blk idx 2 */
+ byte >>= 4;
+
+ intra_pred_mode_flag = byte & 0x1;
+
+ /* prev_intra4x4_pred_mode_flag */
+ PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+ /* rem_intra4x4_pred_mode */
+ if (!intra_pred_mode_flag)
+ {
+ rem_intra_mode = (byte & 0xF) >> 1;
+ PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+ }
+ }
+
+ /* intra_chroma_pred_mode */
+ PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+ }
+ else
+ {
+ }
+
+ /* coded_block_pattern */
+ if (mb_type != I16x16)
+ {
+ PUT_BITS_UEV(ps_bitstream, gu1_cbp_map_tables[cbp][0], error_status, "coded_block_pattern");
+ }
+
+ if (cbp || mb_type == I16x16)
+ {
+ /* mb_qp_delta */
+ PUT_BITS_SEV(ps_bitstream, mb_qp_delta, error_status, "mb_qp_delta");
+ }
+
+ /* Ending bitstream offset for header in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+
+ ps_ent_ctxt->u4_header_bits[0] += bitstream_end_offset - bitstream_start_offset;
+
+ /* Starting bitstream offset for residue */
+ bitstream_start_offset = bitstream_end_offset;
+
+ /* residual */
+ error_status = ih264e_encode_residue(ps_ent_ctxt, mb_type, cbp);
+
+ /* Ending bitstream offset for reside in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+ ps_ent_ctxt->u4_residue_bits[0] += bitstream_end_offset - bitstream_start_offset;
+
+ /* store the index of the next mb syntax layer */
+ ps_ent_ctxt->pv_mb_header_data = pu1_byte;
+
+ return error_status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function generates CAVLC coded bit stream for Inter slices
+*
+* @description
+* The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes
+* (if present), mb qp delta, coded block pattern, chroma mb mode and
+* luma/chroma residue. These syntax elements are written as directed by table
+* 7.3.5 of h264 specification
+*
+* @param[in] ps_ent_ctxt
+* pointer to entropy context
+*
+* @returns error code
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_write_pslice_mb(entropy_ctxt_t *ps_ent_ctxt)
+{
+ /* error status */
+ IH264E_ERROR_T error_status = IH264E_SUCCESS;
+
+ /* bit stream ptr */
+ bitstrm_t *ps_bitstream = ps_ent_ctxt->ps_bitstrm;
+
+ /* packed header data */
+ UWORD8 *pu1_byte = ps_ent_ctxt->pv_mb_header_data;
+
+ /* mb header info */
+ /*
+ * mb_tpm : mb type plus mode
+ * mb_type : luma mb type and chroma mb type are packed
+ * cbp : coded block pattern
+ * mb_qp_delta : mb qp delta
+ * chroma_intra_mode : chroma intra mode
+ * luma_intra_mode : luma intra mode
+ * ps_pu : Pointer to the array of structures having motion vectors, size
+ * and position of sub partitions
+ */
+ WORD32 mb_tpm, mb_type, cbp, chroma_intra_mode, luma_intra_mode;
+ WORD8 mb_qp_delta;
+
+ /* temp var */
+ WORD32 i, mb_type_stream, cbptable = 1;
+
+ WORD32 is_inter = 0;
+
+ WORD32 bitstream_start_offset, bitstream_end_offset;
+
+ /* Starting bitstream offset for header in bits */
+ bitstream_start_offset = GET_NUM_BITS(ps_bitstream);
+
+ /********************************************************************/
+ /* BEGIN HEADER GENERATION */
+ /********************************************************************/
+
+ /* mb header info */
+ mb_tpm = *pu1_byte++;
+
+ /* mb type */
+ mb_type = mb_tpm & 0xF;
+
+ /* check for skip */
+ if (mb_type == PSKIP)
+ {
+ UWORD32 *nnz;
+
+ is_inter = 1;
+
+ /* increment skip counter */
+ (*ps_ent_ctxt->pi4_mb_skip_run)++;
+
+ /* store the index of the next mb syntax layer */
+ ps_ent_ctxt->pv_mb_header_data = pu1_byte;
+
+ /* set nnz to zero */
+ ps_ent_ctxt->u4_left_nnz_luma = 0;
+ nnz = (UWORD32 *)ps_ent_ctxt->pu1_top_nnz_luma[ps_ent_ctxt->i4_mb_x];
+ *nnz = 0;
+ ps_ent_ctxt->u4_left_nnz_cbcr = 0;
+ nnz = (UWORD32 *)ps_ent_ctxt->pu1_top_nnz_cbcr[ps_ent_ctxt->i4_mb_x];
+ *nnz = 0;
+
+ /* residual */
+ error_status = ih264e_encode_residue(ps_ent_ctxt, P16x16, 0);
+
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+
+ ps_ent_ctxt->u4_header_bits[is_inter] += bitstream_end_offset - bitstream_start_offset;
+
+ return error_status;
+ }
+
+ /* remaining mb header info */
+ cbp = *pu1_byte++;
+ mb_qp_delta = *pu1_byte++;
+
+ /* mb skip run */
+ PUT_BITS_UEV(ps_bitstream, *ps_ent_ctxt->pi4_mb_skip_run, error_status, "mb skip run");
+
+ /* reset skip counter */
+ *ps_ent_ctxt->pi4_mb_skip_run = 0;
+
+ /* is intra ? */
+ if (mb_type == I16x16)
+ {
+ UWORD32 u4_cbp_l, u4_cbp_c;
+
+ is_inter = 0;
+
+ u4_cbp_c = (cbp >> 4);
+ u4_cbp_l = (cbp & 0xF);
+ luma_intra_mode = (mb_tpm >> 4) & 3;
+ chroma_intra_mode = (mb_tpm >> 6);
+
+ mb_type_stream = luma_intra_mode + 1 + (u4_cbp_c << 2) + (u4_cbp_l == 15) * 12;
+
+ mb_type_stream += 5;
+
+ /* write mb type */
+ PUT_BITS_UEV(ps_bitstream, mb_type_stream, error_status, "mb type");
+
+ /* intra_chroma_pred_mode */
+ PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+ }
+ else if (mb_type == I4x4)
+ {
+ /* mb sub blk modes */
+ WORD32 intra_pred_mode_flag, rem_intra_mode;
+ WORD32 byte;
+
+ is_inter = 0;
+
+ chroma_intra_mode = (mb_tpm >> 6);
+ cbptable = 0;
+
+ /* write mb type */
+ PUT_BITS_UEV(ps_bitstream, 5, error_status, "mb type");
+
+ for (i = 0; i < 16; i += 2)
+ {
+ /* sub blk idx 1 */
+ byte = *pu1_byte++;
+
+ intra_pred_mode_flag = byte & 0x1;
+
+ /* prev_intra4x4_pred_mode_flag */
+ PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+ /* rem_intra4x4_pred_mode */
+ if (!intra_pred_mode_flag)
+ {
+ rem_intra_mode = (byte & 0xF) >> 1;
+ PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+ }
+
+ /* sub blk idx 2 */
+ byte >>= 4;
+
+ intra_pred_mode_flag = byte & 0x1;
+
+ /* prev_intra4x4_pred_mode_flag */
+ PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+ /* rem_intra4x4_pred_mode */
+ if (!intra_pred_mode_flag)
+ {
+ rem_intra_mode = (byte & 0xF) >> 1;
+ PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+ }
+ }
+
+ /* intra_chroma_pred_mode */
+ PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+ }
+ else if (mb_type == I8x8)
+ {
+ /* transform 8x8 flag */
+ UWORD32 u4_transform_size_8x8_flag = ps_ent_ctxt->i1_transform_8x8_mode_flag;
+
+ /* mb sub blk modes */
+ WORD32 intra_pred_mode_flag, rem_intra_mode;
+ WORD32 byte;
+
+ is_inter = 0;
+
+ chroma_intra_mode = (mb_tpm >> 6);
+ cbptable = 0;
+
+ ASSERT(0);
+
+ /* write mb type */
+ PUT_BITS_UEV(ps_bitstream, 5, error_status, "mb type");
+
+ /* u4_transform_size_8x8_flag */
+ PUT_BITS(ps_bitstream, u4_transform_size_8x8_flag, 1, error_status, "u4_transform_size_8x8_flag");
+
+ /* write sub block modes */
+ for (i = 0; i < 4; i++)
+ {
+ /* sub blk idx 1 */
+ byte = *pu1_byte++;
+
+ intra_pred_mode_flag = byte & 0x1;
+
+ /* prev_intra4x4_pred_mode_flag */
+ PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+ /* rem_intra4x4_pred_mode */
+ if (!intra_pred_mode_flag)
+ {
+ rem_intra_mode = (byte & 0xF) >> 1;
+ PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+ }
+
+ /* sub blk idx 2 */
+ byte >>= 4;
+
+ intra_pred_mode_flag = byte & 0x1;
+
+ /* prev_intra4x4_pred_mode_flag */
+ PUT_BITS(ps_bitstream, intra_pred_mode_flag, 1, error_status, "prev_intra4x4_pred_mode_flag");
+
+ /* rem_intra4x4_pred_mode */
+ if (!intra_pred_mode_flag)
+ {
+ rem_intra_mode = (byte & 0xF) >> 1;
+ PUT_BITS(ps_bitstream, rem_intra_mode, 3, error_status, "rem_intra4x4_pred_mode");
+ }
+ }
+
+ /* intra_chroma_pred_mode */
+ PUT_BITS_UEV(ps_bitstream, chroma_intra_mode, error_status, "intra_chroma_pred_mode");
+ }
+ else
+ {
+ /* inter macro block partition cnt */
+ const UWORD8 au1_part_cnt[] = { 1, 2, 2, 4 };
+
+ /* mv ptr */
+ WORD16 *pi2_mv_ptr = (WORD16 *)pu1_byte;
+
+ /* number of partitions for the current mb */
+ UWORD32 u4_part_cnt = au1_part_cnt[mb_type - 3];
+
+ is_inter = 1;
+
+ /* write mb type */
+ PUT_BITS_UEV(ps_bitstream, mb_type - 3, error_status, "mb type");
+
+ for (i = 0; i < (WORD32)u4_part_cnt; i++)
+ {
+ PUT_BITS_SEV(ps_bitstream, *pi2_mv_ptr++, error_status, "mv x");
+
+ PUT_BITS_SEV(ps_bitstream, *pi2_mv_ptr++, error_status, "mv y");
+ }
+
+ pu1_byte = (UWORD8 *)pi2_mv_ptr;
+ }
+
+ /* coded_block_pattern */
+ if (mb_type != I16x16)
+ {
+ PUT_BITS_UEV(ps_bitstream, gu1_cbp_map_tables[cbp][cbptable], error_status, "coded_block_pattern");
+ }
+
+ if (cbp || mb_type == I16x16)
+ {
+ /* mb_qp_delta */
+ PUT_BITS_SEV(ps_bitstream, mb_qp_delta, error_status, "mb_qp_delta");
+ }
+
+
+ /* Ending bitstream offset for header in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+
+ ps_ent_ctxt->u4_header_bits[is_inter] += bitstream_end_offset - bitstream_start_offset;
+
+ /* start bitstream offset for residue in bits */
+ bitstream_start_offset = bitstream_end_offset;
+
+ /* residual */
+ error_status = ih264e_encode_residue(ps_ent_ctxt, mb_type, cbp);
+
+ /* Ending bitstream offset for residue in bits */
+ bitstream_end_offset = GET_NUM_BITS(ps_bitstream);
+
+ ps_ent_ctxt->u4_residue_bits[is_inter] += bitstream_end_offset - bitstream_start_offset;
+
+ /* store the index of the next mb syntax layer */
+ ps_ent_ctxt->pv_mb_header_data = pu1_byte;
+
+ return error_status;
+}
diff --git a/encoder/ih264e_cavlc.h b/encoder/ih264e_cavlc.h
new file mode 100755
index 0000000..86f4cd4
--- /dev/null
+++ b/encoder/ih264e_cavlc.h
@@ -0,0 +1,112 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_cavlc.h
+*
+* @brief
+* This file contains enumerations, macros and extern declarations of H264
+* cavlc tables
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+******************************************************************************
+*/
+
+#ifndef IH264E_CAVLC_H_
+#define IH264E_CAVLC_H_
+
+/*****************************************************************************/
+/* Function macro definitions */
+/*****************************************************************************/
+
+#define PARSE_COEFF_DATA_BLOCK_4x4(pv_mb_coeff_data, ps_mb_coeff_data, u4_nnz, u4_sig_coeff_map, pi2_res_block) \
+ {\
+ ps_mb_coeff_data = pv_mb_coeff_data; \
+ u4_nnz = ps_mb_coeff_data->i4_sig_map_nnz & 0xff; \
+ if (u4_nnz)\
+ {\
+ u4_sig_coeff_map = ps_mb_coeff_data->i4_sig_map_nnz >> 16; \
+ pi2_res_block = ps_mb_coeff_data->ai2_residue; \
+ pv_mb_coeff_data = ps_mb_coeff_data->ai2_residue + u4_nnz; \
+ }\
+ else\
+ {\
+ pv_mb_coeff_data = ps_mb_coeff_data->ai2_residue;\
+ }\
+ }
+
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function generates CAVLC coded bit stream for an Intra Slice.
+*
+* @description
+* The mb syntax layer for intra slices constitutes luma mb mode, luma sub modes
+* (if present), mb qp delta, coded block pattern, chroma mb mode and
+* luma/chroma residue. These syntax elements are written as directed by table
+* 7.3.5 of h264 specification.
+*
+* @param[in] ps_ent_ctxt
+* pointer to entropy context
+*
+* @returns error code
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_write_islice_mb(entropy_ctxt_t *ps_ent_ctxt);
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function generates CAVLC coded bit stream for Inter slices
+*
+* @description
+* The mb syntax layer for inter slices constitutes luma mb mode, luma sub modes
+* (if present), mb qp delta, coded block pattern, chroma mb mode and
+* luma/chroma residue. These syntax elements are written as directed by table
+* 7.3.5 of h264 specification
+*
+* @param[in] ps_ent_ctxt
+* pointer to entropy context
+*
+* @returns error code
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_write_pslice_mb(entropy_ctxt_t *ps_ent_ctxt);
+
+#endif /* IH264E_CAVLC_H_ */
diff --git a/encoder/ih264e_config.h b/encoder/ih264e_config.h
new file mode 100755
index 0000000..2446cdb
--- /dev/null
+++ b/encoder/ih264e_config.h
@@ -0,0 +1,52 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_config.h
+*
+* @brief
+* contains any necessary declarations/definitions that are used during codec
+* build
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+******************************************************************************
+*/
+
+#ifndef IH264E_CONFIG_H_
+#define IH264E_CONFIG_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+#define CAVLC_LEVEL_STATS 0
+#define GATING_STATS 0
+#define DEBUG_PRINT 0
+#define ENABLE_TRACE 0
+#define DEBUG_RC 0
+#define TRACE_SUPPORT 0
+
+#endif /* IH264E_CONFIG_H_ */
diff --git a/encoder/ih264e_core_coding.c b/encoder/ih264e_core_coding.c
new file mode 100755
index 0000000..5ba18de
--- /dev/null
+++ b/encoder/ih264e_core_coding.c
@@ -0,0 +1,2365 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_core_coding.c
+ *
+ * @brief
+ * This file contains routines that perform luma and chroma core coding for
+ * intra macroblocks
+ *
+ * @author
+ * ittiam
+ *
+ * @par List of Functions:
+ * - ih264e_pack_l_mb_i16()
+ * - ih264e_pack_c_mb_i8()
+ * - ih264e_code_luma_intra_macroblock_16x16()
+ * - ih264e_code_luma_intra_macroblock_4x4()
+ * - ih264e_code_chroma_intra_macroblock_8x8()
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "ih264_platform_macros.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264e_defs.h"
+#include "ih264_trans_data.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_globals.h"
+#include "ih264e_core_coding.h"
+#include "ih264e_mc.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function performs does the DCT transform then Hadamard transform
+* and quantization for a macroblock when the mb mode is intra 16x16 mode
+*
+* @par Description:
+* First cf4 is done on all 16 4x4 blocks of the 16x16 input block.
+* Then hadamard transform is done on the DC coefficients
+* Quantization is then performed on the 16x16 block, 4x4 wise
+*
+* @param[in] pu1_src
+* Pointer to source sub-block
+*
+* @param[in] pu1_pred
+* Pointer to prediction sub-block
+*
+* @param[in] pi2_out
+* Pointer to residual sub-block
+* The output will be in linear format
+* The first 16 continuous locations will contain the values of Dc block
+* After DC block and a stride 1st AC block will follow
+* After one more stride next AC block will follow
+* The blocks will be in raster scan order
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] pred_strd
+* Prediction stride
+*
+* @param[in] dst_strd
+* Destination stride
+*
+* @param[in] pu2_scale_matrix
+* The quantization matrix for 4x4 transform
+*
+* @param[in] pu2_threshold_matrix
+* Threshold matrix
+*
+* @param[in] u4_qbits
+* 15+QP/6
+*
+* @param[in] u4_round_factor
+* Round factor for quant
+*
+* @param[out] pu1_nnz
+* Memory to store the non-zeros after transform
+* The first byte will be the nnz of DC block
+* From the next byte the AC nnzs will be stored in raster scan order
+*
+* @param u4_dc_flag
+* Signals if Dc transform is to be done or not
+* 1 -> Dc transform will be done
+* 0 -> Dc transform will not be done
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_luma_16x16_resi_trans_dctrans_quant(codec_t *ps_codec,
+ UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_out,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd,
+ const UWORD16 *pu2_scale_matrix,
+ const UWORD16 *pu2_threshold_matrix,
+ UWORD32 u4_qbits,
+ UWORD32 u4_round_factor,
+ UWORD8 *pu1_nnz,
+ UWORD32 u4_dc_flag)
+
+{
+ WORD32 blk_cntr;
+ WORD32 i4_offsetx, i4_offsety;
+ UWORD8 *pu1_curr_src, *pu1_curr_pred;
+
+ WORD16 *pi2_dc_str = pi2_out;
+
+ /* Move to the ac addresses */
+ pu1_nnz++;
+ pi2_out += dst_strd;
+
+ for (blk_cntr = 0; blk_cntr < NUM_LUMA4x4_BLOCKS_IN_MB; blk_cntr++)
+ {
+ IND2SUB_LUMA_MB(blk_cntr, i4_offsetx, i4_offsety);
+
+ pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd;
+ pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd;
+
+ ps_codec->pf_resi_trans_quant_4x4(pu1_curr_src, pu1_curr_pred,
+ pi2_out + blk_cntr * dst_strd,
+ src_strd, pred_strd, pu2_scale_matrix,
+ pu2_threshold_matrix, u4_qbits,
+ u4_round_factor, &pu1_nnz[blk_cntr],
+ &pi2_dc_str[blk_cntr]);
+
+ }
+
+ if (!u4_dc_flag)
+ return;
+
+ /*
+ * In case of i16x16, we need to remove the contribution of dc coeffs into
+ * nnz of each block. We are doing that in the packing function
+ */
+
+ /* Adjust pointers to point to dc values */
+ pi2_out -= dst_strd;
+ pu1_nnz--;
+
+ u4_qbits++;
+ u4_round_factor <<= 1;
+
+ ps_codec->pf_hadamard_quant_4x4(pi2_dc_str, pi2_out, pu2_scale_matrix,
+ pu2_threshold_matrix, u4_qbits,
+ u4_round_factor, &pu1_nnz[0]);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function performs the intra 16x16 inverse transform process for H264
+* it includes inverse Dc transform, inverse quant and then inverse transform
+*
+* @par Description:
+*
+* @param[in] pi2_src
+* Input data, 16x16 size
+* First 16 mem locations will have the Dc coffs in rater scan order in linear fashion
+* after a stride 1st AC clock will be present again in raster can order
+* Then each AC block of the 16x16 block will follow in raster scan order
+*
+* @param[in] pu1_pred
+* The predicted data, 16x16 size
+* Block by block form
+*
+* @param[in] pu1_out
+* Output 16x16
+* In block by block form
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] pred_strd
+* input stride for prediction buffer
+*
+* @param[in] out_strd
+* input stride for output buffer
+*
+* @param[in] pu2_iscale_mat
+* Inverse quantization matrix for 4x4 transform
+*
+* @param[in] pu2_weigh_mat
+* weight matrix of 4x4 transform
+*
+* @param[in] qp_div
+* QP/6
+*
+* @param[in] pi4_tmp
+* Input temporary buffer
+* needs to be at least 20 in size
+*
+* @param[in] pu4_cntrl
+* Controls the transform path
+* total Last 17 bits are used
+* the 16th th bit will correspond to DC block
+* and 32-17 will correspond to the ac blocks in raster scan order
+* bit equaling zero indicates that the entire 4x4 block is zero for DC
+* For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block is nonzero
+*
+* @param[in] pi4_tmp
+* Input temporary buffer
+* needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size
+*
+* @returns
+* none
+*
+* @remarks
+* The all zero case must be taken care outside
+*
+*******************************************************************************
+*/
+void ih264e_luma_16x16_idctrans_iquant_itrans_recon(codec_t *ps_codec,
+ WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscale_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 qp_div,
+ UWORD32 u4_cntrl,
+ UWORD32 u4_dc_trans_flag,
+ WORD32 *pi4_tmp)
+{
+ /* Start index for inverse quant in a 4x4 block */
+ WORD32 iq_start_idx = (u4_dc_trans_flag == 0) ? 0 : 1;
+
+ /* Cntrl bits for 4x4 transforms
+ * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path
+ * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path
+ * : dc block must contain only single dc coefficient
+ * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac
+ * : ie not (ac or dc)
+ */
+ UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl;
+
+ /* tmp registers for block ids */
+ UWORD32 u4_blk_id;
+
+ /* Subscrripts */
+ WORD32 i4_offset_x, i4_offset_y;
+
+ UWORD8 *pu1_cur_prd_blk, *pu1_cur_out_blk;
+
+ /* Src and stride for dc coeffs */
+ UWORD32 u4_dc_inc;
+ WORD16 *pi2_dc_src;
+
+ /*
+ * For intra blocks we need to do inverse dc transform
+ * In case if intra blocks, its here that we populate the dc bits in cntrl
+ * as they cannot be populated any earlier
+ */
+ if (u4_dc_trans_flag)
+ {
+ UWORD32 cntr, u4_dc_cntrl;
+ /* Do inv hadamard and place the results at the start of each AC block */
+ ps_codec->pf_ihadamard_scaling_4x4(pi2_src, pi2_src, pu2_iscale_mat,
+ pu2_weigh_mat, qp_div, pi4_tmp);
+
+ /* Update the cntrl flag */
+ u4_dc_cntrl = 0;
+ for (cntr = 0; cntr < DC_COEFF_CNT_LUMA_MB; cntr++)
+ {
+ u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr));
+ }
+ /* Mark dc bits as 1 if corresponding ac bit is 0 */
+ u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl);
+ /* Combine both ac and dc bits */
+ u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA)
+ | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_LUMA);
+ }
+
+ /* Source for dc coeffs
+ * If the block is intra, we have to read dc values from first row of src
+ * then stride for each block is 1, other wise its src stride
+ */
+ pi2_dc_src = (iq_start_idx == 0) ? (pi2_src + src_strd) : pi2_src;
+ u4_dc_inc = (iq_start_idx == 0) ? src_strd : 1;
+
+ /* The AC blocks starts from 2nd row */
+ pi2_src += src_strd;
+
+ /* Get the block bits */
+ u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA);
+ u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_LUMA) << 16;
+ u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFFFF0000;
+
+ /* Get first block to process */
+ DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
+ while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
+ {
+ /* Compute address of src blocks */
+ WORD32 i4_src_offset = u4_dc_inc * u4_blk_id;
+
+ IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
+
+ /* Compute address of out and pred blocks */
+ pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
+ pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
+
+ /* Do inv dc transform */
+ ps_codec->pf_iquant_itrans_recon_4x4_dc(pi2_dc_src + i4_src_offset,
+ pu1_cur_prd_blk,
+ pu1_cur_out_blk, pred_strd,
+ out_strd, pu2_iscale_mat,
+ pu2_weigh_mat, qp_div, NULL,
+ iq_start_idx,
+ pi2_dc_src + i4_src_offset);
+ /* Get next DC block to process */
+ DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
+ }
+
+ /* now process ac/mixed blocks */
+ DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
+ while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
+ {
+
+ WORD32 i4_src_offset = src_strd * u4_blk_id;
+
+ IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
+
+ pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
+ pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
+
+ ps_codec->pf_iquant_itrans_recon_4x4(pi2_src + i4_src_offset,
+ pu1_cur_prd_blk, pu1_cur_out_blk,
+ pred_strd, out_strd,
+ pu2_iscale_mat, pu2_weigh_mat,
+ qp_div, (WORD16*) pi4_tmp,
+ iq_start_idx,
+ pi2_dc_src + u4_blk_id);
+
+ DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
+ }
+
+ /* Now process empty blocks */
+ DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
+ while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
+ {
+ IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
+
+ pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
+ pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
+
+ ps_codec->pf_inter_pred_luma_copy(pu1_cur_prd_blk, pu1_cur_out_blk,
+ pred_strd, out_strd, SIZE_4X4_BLK_HRZ,
+ SIZE_4X4_BLK_VERT, 0, 0);
+
+ DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function performs does the DCT transform then Hadamard transform
+* and quantization for a chroma macroblock
+*
+* @par Description:
+* First cf4 is done on all 16 4x4 blocks of the 8x8input block
+* Then hadamard transform is done on the DC coefficients
+* Quantization is then performed on the 8x8 block, 4x4 wise
+*
+* @param[in] pu1_src
+* Pointer to source sub-block
+* The input is in interleaved format for two chroma planes
+*
+* @param[in] pu1_pred
+* Pointer to prediction sub-block
+* Prediction is in inter leaved format
+*
+* @param[in] pi2_out
+* Pointer to residual sub-block
+* The output will be in linear format
+* The first 4 continuous locations will contain the values of DC block for U
+* and then next 4 will contain for V.
+* After DC block and a stride 1st AC block of U plane will follow
+* After one more stride next AC block of V plane will follow
+* The blocks will be in raster scan order
+*
+* After all the AC blocks of U plane AC blocks of V plane will follow in exact
+* same way
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] pred_strd
+* Prediction stride
+*
+* @param[in] dst_strd
+* Destination stride
+*
+* @param[in] pu2_scale_matrix
+* The quantization matrix for 4x4 transform
+*
+* @param[in] pu2_threshold_matrix
+* Threshold matrix
+*
+* @param[in] u4_qbits
+* 15+QP/6
+*
+* @param[in] u4_round_factor
+* Round factor for quant
+*
+* @param[out] pu1_nnz
+* Memory to store the non-zeros after transform
+* The first byte will be the nnz od DC block for U plane
+* From the next byte the AC nnzs will be storerd in raster scan order
+* The fifth byte will be nnz of Dc block of V plane
+* Then Ac blocks will follow
+*
+* @param u4_dc_flag
+* Signals if Dc transform is to be done or not
+* 1 -> Dc transform will be done
+* 0 -> Dc transform will not be done
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_chroma_8x8_resi_trans_dctrans_quant(codec_t *ps_codec,
+ UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_out,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_scale_matrix,
+ const UWORD16 *pu2_threshold_matrix,
+ UWORD32 u4_qbits,
+ UWORD32 u4_round_factor,
+ UWORD8 *pu1_nnz_c)
+{
+ WORD32 blk_cntr;
+ WORD32 i4_offsetx, i4_offsety;
+ UWORD8 *pu1_curr_src, *pu1_curr_pred;
+
+ WORD16 pi2_dc_str[8];
+ UWORD8 au1_dcnnz[2];
+
+ /* Move to the ac addresses */
+ pu1_nnz_c++;
+ pi2_out += out_strd;
+
+ for (blk_cntr = 0; blk_cntr < NUM_CHROMA4x4_BLOCKS_IN_MB; blk_cntr++)
+ {
+ IND2SUB_CHROMA_MB(blk_cntr, i4_offsetx, i4_offsety);
+
+ pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd;
+ pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd;
+
+ /* For chroma, v plane nnz is populated from position 5 */
+ ps_codec->pf_resi_trans_quant_chroma_4x4(
+ pu1_curr_src, pu1_curr_pred,
+ pi2_out + blk_cntr * out_strd, src_strd, pred_strd,
+ pu2_scale_matrix, pu2_threshold_matrix, u4_qbits,
+ u4_round_factor, &pu1_nnz_c[blk_cntr + (blk_cntr > 3)],
+ &pi2_dc_str[blk_cntr]);
+ }
+
+ /* Adjust pointers to point to dc values */
+ pi2_out -= out_strd;
+ pu1_nnz_c--;
+
+ u4_qbits++;
+ u4_round_factor <<= 1;
+
+ ps_codec->pf_hadamard_quant_2x2_uv(pi2_dc_str, pi2_out, pu2_scale_matrix,
+ pu2_threshold_matrix, u4_qbits,
+ u4_round_factor, au1_dcnnz);
+
+ /* Copy the dc nnzs */
+ pu1_nnz_c[0] = au1_dcnnz[0];
+ pu1_nnz_c[5] = au1_dcnnz[1];
+
+}
+
+/**
+*******************************************************************************
+* @brief
+* This function performs the inverse transform with process for chroma MB of H264
+*
+* @par Description:
+* Does inverse DC transform ,inverse quantization inverse transform
+*
+* @param[in] pi2_src
+* Input data, 16x16 size
+* The input is in the form of, first 4 locations will contain DC coeffs of
+* U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane
+* in raster scan order will follow, each block as linear array in raster scan order.
+* After a stride next AC block will follow. After all AC blocks of U plane
+* V plane AC blocks will follow in exact same order.
+*
+* @param[in] pu1_pred
+* The predicted data, 8x16 size, U and V interleaved
+*
+* @param[in] pu1_out
+* Output 8x16, U and V interleaved
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] pred_strd
+* input stride for prediction buffer
+*
+* @param[in] out_strd
+* input stride for output buffer
+*
+* @param[in] pu2_iscale_mat
+* Inverse quantization martix for 4x4 transform
+*
+* @param[in] pu2_weigh_mat
+* weight matrix of 4x4 transform
+*
+* @param[in] qp_div
+* QP/6
+*
+* @param[in] pi4_tmp
+* Input temporary buffer
+* needs to be at least COFF_CNT_SUB_BLK_4x4 + Number of Dc cofss for chroma * number of planes
+* in size
+*
+* @param[in] pu4_cntrl
+* Controls the transform path
+* the 15 th bit will correspond to DC block of U plane , 14th will indicate the V plane Dc block
+* 32-28 bits will indicate AC blocks of U plane in raster scan order
+* 27-23 bits will indicate AC blocks of V plane in rater scan order
+* The bit 1 implies that there is at least one non zero coeff in a block
+*
+* @returns
+* none
+*
+* @remarks
+*******************************************************************************
+*/
+void ih264e_chroma_8x8_idctrans_iquant_itrans_recon(codec_t *ps_codec,
+ WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_out,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 out_strd,
+ const UWORD16 *pu2_iscale_mat,
+ const UWORD16 *pu2_weigh_mat,
+ UWORD32 qp_div,
+ UWORD32 u4_cntrl,
+ WORD32 *pi4_tmp)
+{
+ /* Cntrl bits for 4x4 transforms
+ * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path
+ * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path
+ * : dc block must contain only single dc coefficient
+ * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac
+ * : ie not (ac or dc)
+ */
+
+ UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl;
+
+ /* tmp registers for block ids */
+ WORD32 u4_blk_id;
+
+ /* Offsets for pointers */
+ WORD32 i4_offset_x, i4_offset_y;
+
+ /* Pointer to 4x4 blocks */
+ UWORD8 *pu1_cur_4x4_prd_blk, *pu1_cur_4x4_out_blk;
+
+ /* Tmp register for pointer to dc coffs */
+ WORD16 *pi2_dc_src;
+
+ WORD16 i2_zero = 0;
+
+ /* Increment for dc block */
+ WORD32 i4_dc_inc;
+
+ /*
+ * Lets do the inverse transform for dc coeffs in chroma
+ */
+ if (u4_cntrl & CNTRL_FLAG_DCBLK_MASK_CHROMA)
+ {
+ UWORD32 cntr, u4_dc_cntrl;
+ /* Do inv hadamard for u an v block */
+
+ ps_codec->pf_ihadamard_scaling_2x2_uv(pi2_src, pi2_src, pu2_iscale_mat,
+ pu2_weigh_mat, qp_div, NULL);
+ /*
+ * Update the cntrl flag
+ * Flag is updated as follows bits 15-11 -> u block dc bits
+ */
+ u4_dc_cntrl = 0;
+ for (cntr = 0; cntr < 8; cntr++)
+ {
+ u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr));
+ }
+
+ /* Mark dc bits as 1 if corresponding ac bit is 0 */
+ u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl);
+ /* Combine both ac and dc bits */
+ u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA)
+ | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_CHROMA);
+
+ /* Since we populated the dc coffs, we have to read them from there */
+ pi2_dc_src = pi2_src;
+ i4_dc_inc = 1;
+ }
+ else
+ {
+ u4_cntrl = u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA;
+ pi2_dc_src = &i2_zero;
+ i4_dc_inc = 0;
+ }
+
+ /* Get the block bits */
+ u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA);
+ u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_CHROMA) << 16;
+ u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFF000000;
+
+ /* The AC blocks starts from 2nd row */
+ pi2_src += src_strd;
+
+ DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
+ while (u4_blk_id < 8)
+ {
+ WORD32 dc_src_offset = u4_blk_id * i4_dc_inc;
+
+ IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
+
+ pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
+ pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
+
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc(
+ pi2_dc_src + dc_src_offset, pu1_cur_4x4_prd_blk,
+ pu1_cur_4x4_out_blk, pred_strd, out_strd, NULL, NULL, 0,
+ NULL, pi2_dc_src + dc_src_offset);
+ /* Get next DC block to process */
+ DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
+ }
+
+ /* now process ac/mixed blocks */
+ DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
+ while (u4_blk_id < 8)
+ {
+ WORD32 i4_src_offset = src_strd * u4_blk_id;
+ WORD32 dc_src_offset = i4_dc_inc * u4_blk_id;
+
+ IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
+
+ pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
+ pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
+
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4(pi2_src + i4_src_offset,
+ pu1_cur_4x4_prd_blk,
+ pu1_cur_4x4_out_blk,
+ pred_strd, out_strd,
+ pu2_iscale_mat,
+ pu2_weigh_mat, qp_div,
+ (WORD16 *) pi4_tmp,
+ pi2_dc_src + dc_src_offset);
+
+ DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
+ }
+
+ /* Now process empty blocks */
+ DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
+ while (u4_blk_id < 8)
+ {
+ IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
+
+ pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
+ pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
+
+ ps_codec->pf_interleave_copy(pu1_cur_4x4_prd_blk, pu1_cur_4x4_out_blk,
+ pred_strd, out_strd, SIZE_4X4_BLK_VERT,
+ SIZE_4X4_BLK_HRZ);
+
+ DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief This function packs residue of an i16x16 luma mb for entropy coding
+*
+* @par Description
+* An i16 macro block contains two classes of units, dc 4x4 block and
+* 4x4 ac blocks. while packing the mb, the dc block is sent first, and
+* the 16 ac blocks are sent next in scan order. Each and every block is
+* represented by 3 parameters (nnz, significant coefficient map and the
+* residue coefficients itself). If a 4x4 unit does not have any coefficients
+* then only nnz is sent. Inside a 4x4 block the individual coefficients are
+* sent in scan order.
+*
+* The first byte of each block will be nnz of the block, if it is non zero,
+* a 2 byte significance map is sent. This is followed by nonzero coefficients.
+* This is repeated for 1 dc + 16 ac blocks.
+*
+* @param[in] pi2_res_mb
+* pointer to residue mb
+*
+* @param[in, out] pv_mb_coeff_data
+* buffer pointing to packed residue coefficients
+*
+* @param[in] u4_res_strd
+* residual block stride
+*
+* @param[out] u1_cbp_l
+* coded block pattern luma
+*
+* @param[in] pu1_nnz
+* number of non zero coefficients in each 4x4 unit
+*
+* @param[out]
+* Control signal for inverse transform of 16x16 blocks
+*
+* @return none
+*
+* @ remarks
+*
+******************************************************************************
+*/
+void ih264e_pack_l_mb_i16(WORD16 *pi2_res_mb,
+ void **pv_mb_coeff_data,
+ WORD32 i4_res_strd,
+ UWORD8 *u1_cbp_l,
+ UWORD8 *pu1_nnz,
+ UWORD32 *pu4_cntrl)
+{
+ /* pointer to packed sub block buffer space */
+ tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data), *ps_mb_coeff_data_ac;
+
+ /* no of non zero coefficients in the current sub block */
+ UWORD32 u4_nnz_cnt;
+
+ /* significant coefficient map */
+ UWORD32 u4_s_map;
+
+ /* pointer to scanning matrix */
+ const UWORD8 *pu1_scan_order;
+
+ /* number of non zeros in sub block */
+ UWORD32 u4_nnz;
+
+ /* coeff scan order */
+ const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
+
+ /* temp var */
+ UWORD32 coeff_cnt, mask, b4,u4_cntrl=0;
+
+ /*DC and AC coeff pointers*/
+ WORD16 *pi2_res_mb_ac,*pi2_res_mb_dc;
+
+ /********************************************************/
+ /* pack dc coeff data for entropy coding */
+ /********************************************************/
+
+ pi2_res_mb_dc = pi2_res_mb;
+ pu1_scan_order = gu1_luma_scan_order_dc;
+
+ u4_nnz = *pu1_nnz;
+ u4_cntrl = 0;
+
+ /* write number of non zero coefficients */
+ ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
+
+ if (u4_nnz)
+ {
+ for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
+ {
+ if (pi2_res_mb_dc[pu1_scan_order[coeff_cnt]])
+ {
+ /* write residue */
+ ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_dc[pu1_scan_order[coeff_cnt]];
+ u4_s_map |= mask;
+ }
+ mask <<= 1;
+ }
+ /* write significant coeff map */
+ ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+
+ u4_cntrl = 0x00008000;// Set DC bit in ctrl code
+ }
+ else
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+ }
+
+ /********************************************************/
+ /* pack ac coeff data for entropy coding */
+ /********************************************************/
+
+ pu1_nnz ++;
+ pu1_scan_order = gu1_luma_scan_order;
+ pi2_res_mb += i4_res_strd; /*Move to AC block*/
+
+ ps_mb_coeff_data_ac = (*pv_mb_coeff_data);
+
+ for (b4 = 0; b4 < 16; b4++)
+ {
+ ps_mb_coeff_data = (*pv_mb_coeff_data);
+
+ u4_nnz = pu1_nnz[u1_scan_order[b4]];
+
+ /* Jump according to the scan order */
+ pi2_res_mb_ac = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]);
+
+ /*
+ * Since this is a i16x16 block, we should not count dc coeff on indi
+ * vidual 4x4 blocks to nnz. But due to the implementation of 16x16
+ * trans function, we add dc's nnz to u4_nnz too. Hence we adjust that
+ * here
+ */
+ u4_nnz -= (pi2_res_mb_ac[0] != 0);
+
+ /* write number of non zero coefficients */
+ ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
+
+ if (u4_nnz)
+ {
+ for (u4_nnz_cnt = 0, coeff_cnt = 1, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
+ {
+ if (pi2_res_mb_ac[pu1_scan_order[coeff_cnt]])
+ {
+ /* write residue */
+ ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_ac[pu1_scan_order[coeff_cnt]];
+ u4_s_map |= mask;
+ }
+ mask <<= 1;
+ }
+ /* write significant coeff map */
+ ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+ *u1_cbp_l = 15;
+
+ u4_cntrl |= (1 << (31 - u1_scan_order[b4]));
+ }
+ else
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+ }
+
+ }
+
+ if (!(*u1_cbp_l))
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data_ac;
+ }
+
+ /* Store the cntrl signal */
+ (*pu4_cntrl) = u4_cntrl;
+ return;
+}
+
+/**
+******************************************************************************
+*
+* @brief This function packs residue of an p16x16 luma mb for entropy coding
+*
+* @par Description
+* A p16x16 macro block contains two classes of units 16 4x4 ac blocks.
+* while packing the mb, the dc block is sent first, and
+* the 16 ac blocks are sent next in scan order. Each and every block is
+* represented by 3 parameters (nnz, significant coefficient map and the
+* residue coefficients itself). If a 4x4 unit does not have any coefficients
+* then only nnz is sent. Inside a 4x4 block the individual coefficients are
+* sent in scan order.
+*
+* The first byte of each block will be nnz of the block, if it is non zero,
+* a 2 byte significance map is sent. This is followed by nonzero coefficients.
+* This is repeated for 1 dc + 16 ac blocks.
+*
+* @param[in] pi2_res_mb
+* pointer to residue mb
+*
+* @param[in, out] pv_mb_coeff_data
+* buffer pointing to packed residue coefficients
+*
+* @param[in] i4_res_strd
+* residual block stride
+*
+* @param[out] u1_cbp_l
+* coded block pattern luma
+*
+* @param[in] pu1_nnz
+* number of non zero coefficients in each 4x4 unit
+*
+* @param[out] pu4_cntrl
+* Control signal for inverse transform
+*
+* @return none
+*
+* @remarks Killing coffs not yet coded
+*
+******************************************************************************
+*/
+void ih264e_pack_l_mb(WORD16 *pi2_res_mb,
+ void **pv_mb_coeff_data,
+ WORD32 i4_res_strd,
+ UWORD8 *u1_cbp_l,
+ UWORD8 *pu1_nnz,
+ UWORD32 u4_thres_resi,
+ UWORD32 *pu4_cntrl)
+{
+ /* pointer to packed sub block buffer space */
+ tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8, *ps_mb_coeff_data_mb;
+
+ /* no of non zero coefficients in the current sub block */
+ UWORD32 u4_nnz_cnt;
+
+ /* significant coefficient map */
+ UWORD32 u4_s_map;
+
+ /* pointer to scanning matrix */
+ const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
+
+ /* number of non zeros in sub block */
+ UWORD32 u4_nnz;
+
+ /* pointer to residual sub block */
+ WORD16 *pi2_res_sb;
+
+ /* coeff scan order */
+ const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
+
+ /* coeff cost */
+ const UWORD8 *pu1_coeff_cost = gu1_coeff_cost;
+
+ /* temp var */
+ UWORD32 u4_mb_coeff_cost = 0, u4_b8_coeff_cost = 0, coeff_cnt, mask, u4_cntrl = 0, b4, b8;
+
+ /* temp var */
+ WORD32 i4_res_val, i4_run = -1, dcac_block;
+
+ /* When Hadamard transform is disabled, first row values are dont care, ignore them */
+ pi2_res_mb += i4_res_strd;
+
+ /* When Hadamard transform is disabled, first unit value is dont care, ignore this */
+ pu1_nnz ++;
+
+ ps_mb_coeff_data_mb = ps_mb_coeff_data_b8 = (*pv_mb_coeff_data);
+
+ /********************************************************/
+ /* pack coeff data for entropy coding */
+ /********************************************************/
+
+ for (b4 = 0; b4 < 16; b4++)
+ {
+ ps_mb_coeff_data = (*pv_mb_coeff_data);
+
+ b8 = b4 >> 2;
+
+ u4_nnz = pu1_nnz[u1_scan_order[b4]];
+
+ /* Jump according to the scan order */
+ pi2_res_sb = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]);
+
+ /* write number of non zero coefficients */
+ ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
+
+ if (u4_nnz)
+ {
+ for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
+ {
+ /* number of runs of zero before, this is used to compute coeff cost */
+ i4_run++;
+
+ i4_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
+
+ if (i4_res_val)
+ {
+ /* write residue */
+ ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i4_res_val;
+ u4_s_map |= mask;
+
+ if (u4_thres_resi)
+ {
+ /* compute coeff cost */
+ if (i4_res_val == 1 || i4_res_val == -1)
+ {
+ if (i4_run < 6)
+ u4_b8_coeff_cost += pu1_coeff_cost[i4_run];
+ }
+ else
+ u4_b8_coeff_cost += 9;
+
+ i4_run = -1;
+ }
+ }
+
+ mask <<= 1;
+ }
+
+ /* write significant coeff map */
+ ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+
+ /* cbp */
+ *u1_cbp_l |= (1 << b8);
+
+ /* Cntrl map for inverse transform computation
+ *
+ * If coeff_cnt is zero, it means that only nonzero was a dc coeff
+ * Hence we have to set the 16 - u1_scan_order[b4]) position instead
+ * of 31 - u1_scan_order[b4]
+ */
+ dcac_block = (coeff_cnt == 0)?16:31;
+ u4_cntrl |= (1 << (dcac_block - u1_scan_order[b4]));
+ }
+ else
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+ }
+
+ /* Decide if the 8x8 unit has to be sent for entropy coding? */
+ if ((b4+1) % 4 == 0)
+ {
+ if ( u4_thres_resi && (u4_b8_coeff_cost <= LUMA_SUB_BLOCK_SKIP_THRESHOLD) &&
+ (*u1_cbp_l & (1 << b8)) )
+ {
+
+
+ /*
+ * When we want to reset the full 8x8 block, we have to reset
+ * both the dc and ac coeff bits hence we have the symmetric
+ * arrangement of bits
+ */
+ const UWORD32 cntrl_mask_map[4] = {0xcc00cc00, 0x33003300, 0x00cc00cc, 0x00330033};
+
+ /* restore cbp */
+ *u1_cbp_l = (*u1_cbp_l & (~(1 << b8)));
+
+ /* correct cntrl flag */
+ u4_cntrl = u4_cntrl & (~cntrl_mask_map[(b4 >> 2)]);
+
+ /* correct nnz */
+ pu1_nnz[u1_scan_order[b4 - 3]] = 0;
+ pu1_nnz[u1_scan_order[b4 - 2]] = 0;
+ pu1_nnz[u1_scan_order[b4 - 1]] = 0;
+ pu1_nnz[u1_scan_order[b4]] = 0;
+
+ /* reset blk cost */
+ u4_b8_coeff_cost = 0;
+ }
+
+ if (!(*u1_cbp_l & (1 << b8)))
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data_b8;
+ }
+
+ u4_mb_coeff_cost += u4_b8_coeff_cost;
+
+ u4_b8_coeff_cost = 0;
+ i4_run = -1;
+ ps_mb_coeff_data_b8 = (*pv_mb_coeff_data);
+ }
+ }
+
+ if (u4_thres_resi && (u4_mb_coeff_cost <= LUMA_BLOCK_SKIP_THRESHOLD)
+ && (*u1_cbp_l))
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data_mb;
+ *u1_cbp_l = 0;
+ u4_cntrl = 0;
+ memset(pu1_nnz, 0, 16);
+ }
+
+ (*pu4_cntrl) = u4_cntrl;
+
+ return;
+}
+
+/**
+******************************************************************************
+*
+* @brief This function packs residue of an i8x8 chroma mb for entropy coding
+*
+* @par Description
+* An i8 chroma macro block contains two classes of units, dc 2x2 block and
+* 4x4 ac blocks. while packing the mb, the dc block is sent first, and
+* the 4 ac blocks are sent next in scan order. Each and every block is
+* represented by 3 parameters (nnz, significant coefficient map and the
+* residue coefficients itself). If a 4x4 unit does not have any coefficients
+* then only nnz is sent. Inside a 4x4 block the individual coefficients are
+* sent in scan order.
+*
+* The first byte of each block will be nnz of the block, if it is non zero,
+* a 2 byte significance map is sent. This is followed by nonzero coefficients.
+* This is repeated for 1 dc + 4 ac blocks.
+*
+* @param[in] pi2_res_mb
+* pointer to residue mb
+*
+* @param[in, out] pv_mb_coeff_data
+* buffer pointing to packed residue coefficients
+*
+* @param[in] u4_res_strd
+* residual block stride
+*
+* @param[out] u1_cbp_c
+* coded block pattern chroma
+*
+* @param[in] pu1_nnz
+* number of non zero coefficients in each 4x4 unit
+*
+* @param[out] pu1_nnz
+* Control signal for inverse transform
+*
+* @param[in] u4_swap_uv
+* Swaps the order of U and V planes in entropy bitstream
+*
+* @return none
+*
+* @ remarks
+*
+******************************************************************************
+*/
+void ih264e_pack_c_mb(WORD16 *pi2_res_mb,
+ void **pv_mb_coeff_data,
+ WORD32 i4_res_strd,
+ UWORD8 *u1_cbp_c,
+ UWORD8 *pu1_nnz,
+ UWORD32 u4_thres_resi,
+ UWORD32 *pu4_cntrl,
+ UWORD32 u4_swap_uv)
+{
+ /* pointer to packed sub block buffer space */
+ tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data);
+ tu_sblk_coeff_data_t *ps_mb_coeff_data_dc, *ps_mb_coeff_data_ac;
+
+ /* nnz pointer */
+ UWORD8 *pu1_nnz_ac, *pu1_nnz_dc;
+
+ /* nnz counter */
+ UWORD32 u4_nnz_cnt;
+
+ /* significant coefficient map */
+ UWORD32 u4_s_map;
+
+ /* pointer to scanning matrix */
+ const UWORD8 *pu1_scan_order;
+
+ /* no of non zero coefficients in the current sub block */
+ UWORD32 u4_nnz;
+
+ /* pointer to residual sub block, res val */
+ WORD16 *pi2_res_sb, i2_res_val;
+
+ /* temp var */
+ UWORD32 coeff_cnt, mask, b4,plane;
+
+ /* temp var */
+ UWORD32 u4_coeff_cost;
+ WORD32 i4_run;
+
+ /* coeff cost */
+ const UWORD8 *pu1_coeff_cost = gu1_coeff_cost;
+
+ /* pointer to packed buffer space */
+ UWORD32 *pu4_mb_coeff_data = NULL;
+
+ /* ac coded block pattern */
+ UWORD8 u1_cbp_ac;
+
+ /* Variable to store the current bit pos in cntrl variable*/
+ UWORD32 cntrl_pos = 0;
+
+ /********************************************************/
+ /* pack dc coeff data for entropy coding */
+ /********************************************************/
+ pu1_scan_order = gu1_chroma_scan_order_dc;
+ pi2_res_sb = pi2_res_mb;
+ pu1_nnz_dc = pu1_nnz;
+ (*pu4_cntrl) = 0;
+ cntrl_pos = 15;
+ ps_mb_coeff_data_dc = (*pv_mb_coeff_data);
+
+ /* Color space conversion between SP_UV and SP_VU
+ * We always assume SP_UV for all the processing
+ * Hence to get proper stream output we need to swap U and V channels here
+ *
+ * For that there are two paths we need to look for
+ * One is the path to bitstream , these variables should have the proper input
+ * configured UV or VU
+ * For the other path the inverse transform variables should have ehat ever 0ordering the
+ * input had
+ */
+
+ if (u4_swap_uv)
+ {
+ pu1_nnz_dc += 5;/* Move to NNZ of V planve */
+ pi2_res_sb += 4;/* Move to DC coff of V plane */
+
+ cntrl_pos = 14; /* Control bit for V plane */
+ }
+
+ for (plane = 0; plane < 2; plane++)
+ {
+ ps_mb_coeff_data = (*pv_mb_coeff_data);
+
+ u4_nnz = *pu1_nnz_dc;
+ /* write number of non zero coefficients U/V */
+ ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
+
+ if (u4_nnz)
+ {
+ for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
+ {
+ i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
+ if (i2_res_val)
+ {
+ /* write residue U/V */
+ ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val;
+ u4_s_map |= mask;
+ }
+ mask <<= 1;
+ }
+ /* write significant coeff map U/V */
+ ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+ *u1_cbp_c = 1;
+
+ (*pu4_cntrl) |= (1 << cntrl_pos);
+ }
+ else
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+ }
+
+ if (u4_swap_uv)
+ {
+ cntrl_pos++; /* Control bit for U plane */
+ pu1_nnz_dc -= 5; /* Move to NNZ of U plane */
+ pi2_res_sb -= 4; /* Move to DC coff of U plane */
+
+ }
+ else
+ {
+ cntrl_pos--; /* Control bit for U plane */
+ pu1_nnz_dc += 5; /* 4 for AC NNZ and 1 for DC */
+ pi2_res_sb += 4; /* Move to DC coff of V plane */
+ }
+ }
+
+ /********************************************************/
+ /* pack ac coeff data for entropy coding */
+ /********************************************************/
+
+ pu1_scan_order = gu1_chroma_scan_order;
+ ps_mb_coeff_data_ac = (*pv_mb_coeff_data);
+
+ if (u4_swap_uv)
+ {
+ pi2_res_sb = pi2_res_mb + i4_res_strd * 5; /* Move to V plane ,ie 1dc row+ 4 ac row */
+ cntrl_pos = 27; /* The control bits are to be added for V bloc ie 31-4 th bit */
+ pu1_nnz_ac = pu1_nnz + 6;/*Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */
+ }
+ else
+ {
+ pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to U plane ,ie 1dc row */
+ cntrl_pos = 31;
+ pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc */
+ }
+
+ for (plane = 0; plane < 2; plane++)
+ {
+ pu4_mb_coeff_data = (*pv_mb_coeff_data);
+
+ u4_coeff_cost = 0;
+ i4_run = -1;
+
+ /* get the current cbp, so that it automatically
+ * gets reverted in case of zero ac values */
+ u1_cbp_ac = *u1_cbp_c;
+
+ for (b4 = 0; b4 < 4; b4++)
+ {
+ ps_mb_coeff_data = (*pv_mb_coeff_data);
+
+ u4_nnz = *pu1_nnz_ac;
+
+ /*
+ * We are scanning only ac coeffs, but the nnz is for the
+ * complete 4x4 block. Hence we have to discount the nnz contributed
+ * by the dc coefficient
+ */
+ u4_nnz -= (pi2_res_sb[0]!=0);
+
+ /* write number of non zero coefficients U/V */
+ ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
+
+ if (u4_nnz)
+ {
+ for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
+ {
+ i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
+
+ i4_run++;
+
+ if (i2_res_val)
+ {
+ /* write residue U/V */
+ ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val;
+ u4_s_map |= mask;
+
+ if ( u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD) )
+ {
+ /* compute coeff cost */
+ if (i2_res_val == 1 || i2_res_val == -1)
+ {
+ if (i4_run < 6)
+ u4_coeff_cost += pu1_coeff_cost[i4_run];
+ }
+ else
+ u4_coeff_cost += 9;
+
+ i4_run = -1;
+ }
+ }
+ mask <<= 1;
+ }
+
+ /* write significant coeff map U/V */
+ ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+ u1_cbp_ac = 2;
+
+ (*pu4_cntrl) |= 1 << cntrl_pos;
+ }
+ else
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+ }
+
+ pu1_nnz_ac++;
+ pi2_res_sb += i4_res_strd;
+ cntrl_pos--;
+ }
+
+ /* reset block */
+ if (u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD))
+ {
+ pu4_mb_coeff_data[0] = 0;
+ pu4_mb_coeff_data[1] = 0;
+ pu4_mb_coeff_data[2] = 0;
+ pu4_mb_coeff_data[3] = 0;
+ (*pv_mb_coeff_data) = pu4_mb_coeff_data + 4;
+
+ /* Generate the control signal */
+ /* Zero out the current plane's AC coefficients */
+ (*pu4_cntrl) &= ((plane == u4_swap_uv) ? 0x0FFFFFFF : 0xF0FFFFFF);
+
+ /* Similarly do for the NNZ also */
+ *(pu1_nnz_ac - 4) = 0;
+ *(pu1_nnz_ac - 3) = 0;
+ *(pu1_nnz_ac - 2) = 0;
+ *(pu1_nnz_ac - 1) = 0;
+ }
+ else
+ {
+ *u1_cbp_c = u1_cbp_ac;
+ }
+
+ if (u4_swap_uv)
+ {
+ pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to V plane ,ie 1dc row+ 4 ac row + 1 dc row */
+ cntrl_pos = 31; /* The control bits are to be added for V bloc ie 31-4 th bit */
+ pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */
+
+ pu1_nnz_ac = pu1_nnz + 1;
+ }
+ else
+ pu1_nnz_ac = pu1_nnz + 6; /* Go to nnz of V plane */
+ }
+
+ /* restore the ptr basing on cbp */
+ if (*u1_cbp_c == 0)
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data_dc;
+ }
+ else if (*u1_cbp_c == 1)
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data_ac;
+ }
+
+ return ;
+}
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when intra mode is i16x16
+*
+* @par Description:
+* If the current mb is to be coded as intra of mb type i16x16, the mb is first
+* predicted using one of i16x16 prediction filters, basing on the intra mode
+* chosen. Then, error is computed between the input blk and the estimated blk.
+* This error is transformed (hierarchical transform i.e., dct followed by hada-
+* -mard), quantized. The quantized coefficients are packed in scan order for
+* entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_l
+* coded block pattern luma
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+
+UWORD8 ih264e_code_luma_intra_macroblock_16x16(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* pointer to ref macro block */
+ UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
+
+ /* pointer to src macro block */
+ UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
+
+ /* pointer to prediction macro block */
+ UWORD8 *pu1_pred_mb = NULL;
+
+ /* pointer to residual macro block */
+ WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+ WORD32 i4_res_strd = ps_proc->i4_res_strd;
+
+ /* intra mode */
+ UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode;
+
+ /* coded block pattern */
+ UWORD8 u1_cbp_l = 0;
+
+ /* number of non zero coeffs*/
+ UWORD32 au4_nnz[5];
+ UWORD8 *pu1_nnz = (UWORD8 *)au4_nnz;
+
+ /*Cntrol signal for itrans*/
+ UWORD32 u4_cntrl;
+
+ /* quantization parameters */
+ quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
+
+ /* pointer to packed mb coeff data */
+ void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
+
+ /* init nnz */
+ au4_nnz[0] = 0;
+ au4_nnz[1] = 0;
+ au4_nnz[2] = 0;
+ au4_nnz[3] = 0;
+ au4_nnz[4] = 0;
+
+ if (u1_intra_mode == PLANE_I16x16)
+ {
+ pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16_plane;
+ }
+ else
+ {
+ pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16;
+ }
+
+ /********************************************************/
+ /* error estimation, */
+ /* transform */
+ /* quantization */
+ /********************************************************/
+ ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
+ pu1_pred_mb, pi2_res_mb,
+ i4_src_strd, i4_pred_strd,
+ i4_res_strd,
+ ps_qp_params->pu2_scale_mat,
+ ps_qp_params->pu2_thres_mat,
+ ps_qp_params->u1_qbits,
+ ps_qp_params->u4_dead_zone,
+ pu1_nnz, ENABLE_DC_TRANSFORM);
+
+ /********************************************************/
+ /* pack coeff data for entropy coding */
+ /********************************************************/
+ ih264e_pack_l_mb_i16(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l,
+ pu1_nnz, &u4_cntrl);
+
+ /********************************************************/
+ /* ierror estimation, */
+ /* itransform */
+ /* iquantization */
+ /********************************************************/
+ /*
+ *if refernce frame is not to be computed
+ *we only need the right and bottom border 4x4 blocks to predict next intra
+ *blocks, hence only compute them
+ */
+ if (!ps_proc->u4_compute_recon)
+ {
+ u4_cntrl &= 0x111F8000;
+ }
+
+ if (u4_cntrl)
+ {
+ ih264e_luma_16x16_idctrans_iquant_itrans_recon(
+ ps_codec, pi2_res_mb, pu1_pred_mb, pu1_ref_mb,
+ i4_res_strd, i4_pred_strd, i4_rec_strd,
+ ps_qp_params->pu2_iscale_mat,
+ ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
+ u4_cntrl, ENABLE_DC_TRANSFORM,
+ ps_proc->pv_scratch_buff);
+ }
+ else
+ {
+ ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb, i4_pred_strd,
+ i4_rec_strd, MB_SIZE, MB_SIZE, NULL,
+ 0);
+ }
+
+ return (u1_cbp_l);
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when intra mode is i4x4
+*
+* @par Description:
+* If the current mb is to be coded as intra of mb type i4x4, the mb is first
+* predicted using one of i4x4 prediction filters, basing on the intra mode
+* chosen. Then, error is computed between the input blk and the estimated blk.
+* This error is dct transformed and quantized. The quantized coefficients are
+* packed in scan order for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_l
+* coded block pattern luma
+*
+* @remarks
+* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
+* mentioned in h.264 specification
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_luma_intra_macroblock_4x4(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* pointer to ref macro block */
+ UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
+
+ /* pointer to src macro block */
+ UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
+
+ /* pointer to prediction macro block */
+ UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
+
+ /* pointer to residual macro block */
+ WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+
+ /* pointer to neighbors: left, top, top-left */
+ UWORD8 *pu1_mb_a;
+ UWORD8 *pu1_mb_b;
+ UWORD8 *pu1_mb_c;
+ UWORD8 *pu1_mb_d;
+
+ /* intra mode */
+ UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode;
+
+ /* neighbor availability */
+ WORD32 i4_ngbr_avbl;
+
+ /* neighbor pels for intra prediction */
+ UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
+
+ /* coded block pattern */
+ UWORD8 u1_cbp_l = 0;
+
+ /* number of non zero coeffs*/
+ UWORD8 u1_nnz;
+
+ /* quantization parameters */
+ quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
+
+ /* pointer to packed mb coeff data */
+ void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
+
+ /* pointer to packed mb coeff data */
+ tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8;
+
+ /* no of non zero coefficients in the current sub block */
+ UWORD32 u4_nnz_cnt;
+
+ /* significant coefficient map */
+ UWORD32 u4_s_map;
+
+ /* pointer to scanning matrix */
+ const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
+
+ /*Dummy variable for 4x4 trans fucntion*/
+ WORD16 i2_dc_dummy;
+
+ /* temp var */
+ UWORD32 i, b8, b4, u1_blk_x, u1_blk_y, u1_pix_x, u1_pix_y, coeff_cnt, mask;
+
+ /* Process 16 4x4 lum sub-blocks of the MB in scan order */
+ for (b8 = 0; b8 < 4; b8++)
+ {
+ u1_blk_x = GET_BLK_RASTER_POS_X(b8) << 3;
+ u1_blk_y = GET_BLK_RASTER_POS_Y(b8) << 3;
+
+ /* if in case cbp for the 8x8 block is zero, send no residue */
+ ps_mb_coeff_data_b8 = *pv_mb_coeff_data;
+
+ for (b4 = 0; b4 < 4; b4++)
+ {
+ /* index of pel in MB */
+ u1_pix_x = u1_blk_x + (GET_SUB_BLK_RASTER_POS_X(b4) << 2);
+ u1_pix_y = u1_blk_y + (GET_SUB_BLK_RASTER_POS_Y(b4) << 2);
+
+ /* Initialize source and reference pointers */
+ pu1_curr_mb = ps_proc->pu1_src_buf_luma + u1_pix_x + (u1_pix_y * i4_src_strd);
+ pu1_ref_mb = ps_proc->pu1_rec_buf_luma + u1_pix_x + (u1_pix_y * i4_rec_strd);
+
+ /* pointer to left of ref macro block */
+ pu1_mb_a = pu1_ref_mb - 1;
+ /* pointer to top of ref macro block */
+ pu1_mb_b = pu1_ref_mb - i4_rec_strd;
+ /* pointer to topright of ref macro block */
+ pu1_mb_c = pu1_mb_b + 4;
+ /* pointer to topleft macro block */
+ pu1_mb_d = pu1_mb_b - 1;
+
+ /* compute neighbor availability */
+ i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
+
+ /* sub block intra mode */
+ u1_intra_mode = ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4];
+
+ /********************************************************/
+ /* gather prediction pels from neighbors for prediction */
+ /********************************************************/
+ /* left pels */
+ if (i4_ngbr_avbl & LEFT_MB_AVAILABLE_MASK)
+ {
+ for (i = 0; i < 4; i++)
+ pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_rec_strd];
+ }
+ else
+ {
+ memset(pu1_ngbr_pels_i4, 0, 4);
+ }
+
+ /* top pels */
+ if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK)
+ {
+ memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
+ }
+ else
+ {
+ memset(pu1_ngbr_pels_i4 + 5, 0, 4);
+ }
+ /* top left pels */
+ if (i4_ngbr_avbl & TOP_LEFT_MB_AVAILABLE_MASK)
+ {
+ pu1_ngbr_pels_i4[4] = *pu1_mb_d;
+ }
+ else
+ {
+ pu1_ngbr_pels_i4[4] = 0;
+ }
+ /* top right pels */
+ if (i4_ngbr_avbl & TOP_RIGHT_MB_AVAILABLE_MASK)
+ {
+ memcpy(pu1_ngbr_pels_i4+8+1,pu1_mb_c,4);
+ }
+ else if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK)
+ {
+ memset(pu1_ngbr_pels_i4+8+1,pu1_ngbr_pels_i4[8],4);
+ }
+
+ /********************************************************/
+ /* prediction */
+ /********************************************************/
+ (ps_codec->apf_intra_pred_4_l)[u1_intra_mode](pu1_ngbr_pels_i4,
+ pu1_pred_mb, 0,
+ i4_pred_strd,
+ i4_ngbr_avbl);
+
+ /********************************************************/
+ /* error estimation, */
+ /* transform */
+ /* quantization */
+ /********************************************************/
+ ps_codec->pf_resi_trans_quant_4x4(pu1_curr_mb, pu1_pred_mb,
+ pi2_res_mb, i4_src_strd,
+ i4_pred_strd,
+ ps_qp_params->pu2_scale_mat,
+ ps_qp_params->pu2_thres_mat,
+ ps_qp_params->u1_qbits,
+ ps_qp_params->u4_dead_zone,
+ &u1_nnz, &i2_dc_dummy);
+
+ /********************************************************/
+ /* pack coeff data for entropy coding */
+ /********************************************************/
+ ps_mb_coeff_data = *pv_mb_coeff_data;
+
+ /* write number of non zero coefficients */
+ ps_mb_coeff_data->i4_sig_map_nnz = u1_nnz;
+
+ if (u1_nnz)
+ {
+ for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u1_nnz; coeff_cnt++)
+ {
+ if (pi2_res_mb[pu1_scan_order[coeff_cnt]])
+ {
+ /* write residue */
+ ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]];
+ u4_s_map |= mask;
+ }
+ mask <<= 1;
+ }
+ /* write significant coeff map */
+ ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+
+ /* update ptr to coeff data */
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+
+ /* cbp */
+ u1_cbp_l |= (1 << b8);
+ }
+ else
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+ }
+
+ /********************************************************/
+ /* ierror estimation, */
+ /* itransform */
+ /* iquantization */
+ /********************************************************/
+ /* If the frame is not to be used for P frame reference or dumping recon
+ * we only will use the recon for only predicting intra Mbs
+ * This will need only right and bottom edge 4x4 blocks recon
+ * Hence we selectively enable them
+ */
+ if (ps_proc->u4_compute_recon || (0xF888 & (1 << ((b8 << 2) + b4))))
+ {
+ if (u1_nnz)
+ ps_codec->pf_iquant_itrans_recon_4x4(
+ pi2_res_mb, pu1_pred_mb, pu1_ref_mb,
+ /*No input stride,*/i4_pred_strd,
+ i4_rec_strd, ps_qp_params->pu2_iscale_mat,
+ ps_qp_params->pu2_weigh_mat,
+ ps_qp_params->u1_qp_div,
+ ps_proc->pv_scratch_buff, 0, 0);
+ else
+ ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb,
+ i4_pred_strd, i4_rec_strd,
+ BLK_SIZE, BLK_SIZE, NULL,
+ 0);
+ }
+
+ }
+
+ /* if the 8x8 block has no residue, nothing needs to be sent to entropy */
+ if (!(u1_cbp_l & (1 << b8)))
+ {
+ *pv_mb_coeff_data = ps_mb_coeff_data_b8;
+ }
+ }
+
+ return (u1_cbp_l);
+}
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when intra mode is i4x4
+*
+* @par Description:
+* If the current mb is to be coded as intra of mb type i4x4, the mb is first
+* predicted using one of i4x4 prediction filters, basing on the intra mode
+* chosen. Then, error is computed between the input blk and the estimated blk.
+* This error is dct transformed and quantized. The quantized coefficients are
+* packed in scan order for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_l
+* coded block pattern luma
+*
+* @remarks
+* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
+* mentioned in h.264 specification
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_luma_intra_macroblock_4x4_rdopt_on(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* pointer to ref macro block */
+ UWORD8 *pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4;
+
+ /* pointer to recon buffer */
+ UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma;
+
+ /* pointer to residual macro block */
+ WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4;
+
+ /* strides */
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+ /* number of non zero coeffs*/
+ UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4;
+
+ /* coded block pattern */
+ UWORD8 u1_cbp_l = 0;
+
+ /* pointer to packed mb coeff data */
+ void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
+
+ /* pointer to packed mb coeff data */
+ tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8;
+
+ /* no of non zero coefficients in the current sub block */
+ UWORD32 u4_nnz_cnt;
+
+ /* significant coefficient map */
+ UWORD32 u4_s_map;
+
+ /* pointer to scanning matrix */
+ const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
+
+ /* temp var */
+ UWORD32 b8, b4, coeff_cnt, mask;
+
+ /* Process 16 4x4 lum sub-blocks of the MB in scan order */
+ for (b8 = 0; b8 < 4; b8++)
+ {
+ /* if in case cbp for the 8x8 block is zero, send no residue */
+ ps_mb_coeff_data_b8 = *pv_mb_coeff_data;
+
+ for (b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE)
+ {
+ /********************************************************/
+ /* pack coeff data for entropy coding */
+ /********************************************************/
+ ps_mb_coeff_data = *pv_mb_coeff_data;
+
+ /* write number of non zero coefficients */
+ ps_mb_coeff_data->i4_sig_map_nnz = *pu1_nnz;
+
+ if (*pu1_nnz)
+ {
+ for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < *pu1_nnz; coeff_cnt++)
+ {
+ if (pi2_res_mb[pu1_scan_order[coeff_cnt]])
+ {
+ /* write residue */
+ ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]];
+ u4_s_map |= mask;
+ }
+ mask <<= 1;
+ }
+ /* write significant coeff map */
+ ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
+
+ /* update ptr to coeff data */
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
+
+ /* cbp */
+ u1_cbp_l |= (1 << b8);
+ }
+ else
+ {
+ (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
+ }
+ }
+
+ /* if the 8x8 block has no residue, nothing needs to be sent to entropy */
+ if (!(u1_cbp_l & (1 << b8)))
+ {
+ *pv_mb_coeff_data = ps_mb_coeff_data_b8;
+ }
+ }
+
+ /* memcpy recon */
+ ps_codec->pf_inter_pred_luma_copy(pu1_ref_mb_intra_4x4, pu1_rec_mb, MB_SIZE, i4_rec_strd, MB_SIZE, MB_SIZE, NULL, 0);
+
+ return (u1_cbp_l);
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief performs chroma core coding for intra macro blocks
+*
+* @par Description:
+* If the current MB is to be intra coded with mb type chroma I8x8, the MB is
+* first predicted using intra 8x8 prediction filters. The predicted data is
+* compared with the input for error and the error is transformed. The DC
+* coefficients of each transformed sub blocks are further transformed using
+* Hadamard transform. The resulting coefficients are quantized, packed and sent
+* for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_c
+* coded block pattern chroma
+*
+* @remarks
+* The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order
+* mentioned in h.264 specification
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_chroma_intra_macroblock_8x8(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* pointer to ref macro block */
+ UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma;
+
+ /* pointer to src macro block */
+ UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
+
+ /* pointer to prediction macro block */
+ UWORD8 *pu1_pred_mb = NULL;
+
+ /* pointer to residual macro block */
+ WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+ WORD32 i4_res_strd = ps_proc->i4_res_strd;
+
+ /* intra mode */
+ UWORD8 u1_intra_mode = ps_proc->u1_c_i8_mode;
+
+ /* coded block pattern */
+ UWORD8 u1_cbp_c = 0;
+
+ /* number of non zero coeffs*/
+ UWORD8 au1_nnz[18] = {0};
+
+ /* quantization parameters */
+ quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1];
+
+ /* Control signal for inverse transform */
+ UWORD32 u4_cntrl;
+
+ /* pointer to packed mb coeff data */
+ void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
+
+ /* See if we need to swap U and V plances for entropy */
+ UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU;
+
+ if (PLANE_CH_I8x8 == u1_intra_mode)
+ {
+ pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma_plane;
+ }
+ else
+ {
+ pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
+ }
+
+ /********************************************************/
+ /* error estimation, */
+ /* transform */
+ /* quantization */
+ /********************************************************/
+ ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
+ pu1_pred_mb, pi2_res_mb,
+ i4_src_strd, i4_pred_strd,
+ i4_res_strd,
+ ps_qp_params->pu2_scale_mat,
+ ps_qp_params->pu2_thres_mat,
+ ps_qp_params->u1_qbits,
+ ps_qp_params->u4_dead_zone,
+ au1_nnz);
+
+ /********************************************************/
+ /* pack coeff data for entropy coding */
+ /********************************************************/
+ ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c,
+ au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv);
+
+ /********************************************************/
+ /* ierror estimation, */
+ /* itransform */
+ /* iquantization */
+ /********************************************************/
+ ih264e_chroma_8x8_idctrans_iquant_itrans_recon(ps_codec, pi2_res_mb,
+ pu1_pred_mb, pu1_ref_mb,
+ i4_res_strd, i4_pred_strd,
+ i4_rec_strd,
+ ps_qp_params->pu2_iscale_mat,
+ ps_qp_params->pu2_weigh_mat,
+ ps_qp_params->u1_qp_div,
+ u4_cntrl,
+ ps_proc->pv_scratch_buff);
+ return (u1_cbp_c);
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when mode is inter
+*
+* @par Description:
+* If the current mb is to be coded as inter the mb is predicted based on the
+* sub mb partitions and corresponding motion vectors generated by ME. Then,
+* error is computed between the input blk and the estimated blk. This error is
+* transformed, quantized. The quantized coefficients are packed in scan order
+* for entropy coding
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_l
+* coded block pattern luma
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+
+UWORD8 ih264e_code_luma_inter_macroblock_16x16(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* pointer to ref macro block */
+ UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma;
+
+ /* pointer to src macro block */
+ UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
+
+ /* pointer to prediction macro block */
+ UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
+
+ /* pointer to residual macro block */
+ WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+ WORD32 i4_res_strd = ps_proc->i4_res_strd;
+
+ /* coded block pattern */
+ UWORD8 u1_cbp_l = 0;
+
+ /*Control signal of itrans*/
+ UWORD32 u4_cntrl;
+
+ /* number of non zero coeffs*/
+ UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz;
+
+ /* quantization parameters */
+ quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
+
+ /* pointer to packed mb coeff data */
+ void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
+
+ /* pseudo pred buffer */
+ UWORD8 *pu1_pseudo_pred = pu1_pred_mb;
+
+ /* pseudo pred buffer stride */
+ WORD32 i4_pseudo_pred_strd = i4_pred_strd;
+
+ /* init nnz */
+ ps_proc->au4_nnz[0] = 0;
+ ps_proc->au4_nnz[1] = 0;
+ ps_proc->au4_nnz[2] = 0;
+ ps_proc->au4_nnz[3] = 0;
+ ps_proc->au4_nnz[4] = 0;
+
+ /********************************************************/
+ /* prediction */
+ /********************************************************/
+ ih264e_motion_comp_luma(ps_proc, &pu1_pseudo_pred, &i4_pseudo_pred_strd);
+
+ /********************************************************/
+ /* error estimation, */
+ /* transform */
+ /* quantization */
+ /********************************************************/
+ if (ps_proc->u4_min_sad_reached == 0 || ps_proc->u4_min_sad != 0)
+ {
+ ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
+ pu1_pseudo_pred, pi2_res_mb,
+ i4_src_strd,
+ i4_pseudo_pred_strd,
+ i4_res_strd,
+ ps_qp_params->pu2_scale_mat,
+ ps_qp_params->pu2_thres_mat,
+ ps_qp_params->u1_qbits,
+ ps_qp_params->u4_dead_zone,
+ pu1_nnz,
+ DISABLE_DC_TRANSFORM);
+
+ /********************************************************/
+ /* pack coeff data for entropy coding */
+ /********************************************************/
+ ih264e_pack_l_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l,
+ pu1_nnz, ps_codec->u4_thres_resi, &u4_cntrl);
+ }
+ else
+ {
+ u1_cbp_l = 0;
+ u4_cntrl = 0;
+ }
+
+ /********************************************************/
+ /* ierror estimation, */
+ /* itransform */
+ /* iquantization */
+ /********************************************************/
+
+ /*If the frame is not to be used for P frame reference or dumping recon
+ * we only will use the reocn for only predicting intra Mbs
+ * THis will need only right and bottom edge 4x4 blocks recon
+ * Hence we selectively enable them using control signal(including DC)
+ */
+ if (ps_proc->u4_compute_recon != 1)
+ {
+ u4_cntrl &= 0x111F0000;
+ }
+
+ if (u4_cntrl)
+ {
+ ih264e_luma_16x16_idctrans_iquant_itrans_recon(
+ ps_codec, pi2_res_mb, pu1_pseudo_pred, pu1_rec_mb,
+ i4_res_strd, i4_pseudo_pred_strd, i4_rec_strd,
+ ps_qp_params->pu2_iscale_mat,
+ ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
+ u4_cntrl /*Cntrl*/, DISABLE_DC_TRANSFORM,
+ ps_proc->pv_scratch_buff);
+ }
+ else
+ {
+ ps_codec->pf_inter_pred_luma_copy(pu1_pseudo_pred, pu1_rec_mb,
+ i4_pseudo_pred_strd, i4_rec_strd,
+ MB_SIZE, MB_SIZE, NULL, 0);
+ }
+
+
+ return (u1_cbp_l);
+}
+
+/**
+*******************************************************************************
+*
+* @brief performs chroma core coding for inter macro blocks
+*
+* @par Description:
+* If the current mb is to be coded as inter predicted mb,based on the sub mb partitions
+* and corresponding motion vectors generated by ME ,prediction is done.
+* Then, error is computed between the input blk and the estimated blk.
+* This error is transformed , quantized. The quantized coefficients
+* are packed in scan order for
+* entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_l
+* coded block pattern chroma
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_chroma_inter_macroblock_8x8(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* pointer to ref macro block */
+ UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_chroma;
+
+ /* pointer to src macro block */
+ UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
+
+ /* pointer to prediction macro block */
+ UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
+
+ /* pointer to residual macro block */
+ WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+ WORD32 i4_res_strd = ps_proc->i4_res_strd;
+
+ /* coded block pattern */
+ UWORD8 u1_cbp_c = 0;
+
+ /*Control signal for inverse transform*/
+ UWORD32 u4_cntrl;
+
+ /* number of non zero coeffs*/
+ UWORD8 au1_nnz[10] = {0};
+
+ /* quantization parameters */
+ quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1];
+
+ /* pointer to packed mb coeff data */
+ void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
+
+ /*See if we need to swap U and V plances for entropy*/
+ UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU;
+
+ /********************************************************/
+ /* prediction */
+ /********************************************************/
+ ih264e_motion_comp_chroma(ps_proc);
+
+ /********************************************************/
+ /* error estimation, */
+ /* transform */
+ /* quantization */
+ /********************************************************/
+ ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
+ pu1_pred_mb, pi2_res_mb,
+ i4_src_strd, i4_pred_strd,
+ i4_res_strd,
+ ps_qp_params->pu2_scale_mat,
+ ps_qp_params->pu2_thres_mat,
+ ps_qp_params->u1_qbits,
+ ps_qp_params->u4_dead_zone,
+ au1_nnz);
+
+ /********************************************************/
+ /* pack coeff data for entropy coding */
+ /********************************************************/
+ ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c,
+ au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv);
+
+ /********************************************************/
+ /* ierror estimation, */
+ /* itransform */
+ /* iquantization */
+ /********************************************************/
+
+ /* If the frame is not to be used for P frame reference or dumping recon
+ * we only will use the reocn for only predicting intra Mbs
+ * THis will need only right and bottom edge 4x4 blocks recon
+ * Hence we selectively enable them using control signal(including DC)
+ */
+ if (!ps_proc->u4_compute_recon)
+ {
+ u4_cntrl &= 0x7700C000;
+ }
+
+ if (u4_cntrl)
+ {
+ ih264e_chroma_8x8_idctrans_iquant_itrans_recon(
+ ps_codec, pi2_res_mb, pu1_pred_mb, pu1_rec_mb,
+ i4_res_strd, i4_pred_strd, i4_rec_strd,
+ ps_qp_params->pu2_iscale_mat,
+ ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
+ u4_cntrl, ps_proc->pv_scratch_buff);
+ }
+ else
+ {
+ ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_rec_mb, i4_pred_strd,
+ i4_rec_strd, MB_SIZE >> 1, MB_SIZE,
+ NULL, 0);
+ }
+
+ return (u1_cbp_c);
+}
diff --git a/encoder/ih264e_core_coding.h b/encoder/ih264e_core_coding.h
new file mode 100755
index 0000000..1237d25
--- /dev/null
+++ b/encoder/ih264e_core_coding.h
@@ -0,0 +1,653 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_core_coding.h
+*
+* @brief
+* This file contains extern declarations of core coding routines
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+******************************************************************************
+*/
+
+#ifndef IH264E_CORE_CODING_H_
+#define IH264E_CORE_CODING_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Enable/Disable Hadamard transform of DC Coeff's
+******************************************************************************
+ */
+#define DISABLE_DC_TRANSFORM 0
+#define ENABLE_DC_TRANSFORM 1
+
+/**
+*******************************************************************************
+ * @brief bit masks for DC and AC control flags
+*******************************************************************************
+ */
+
+#define DC_COEFF_CNT_LUMA_MB 16
+#define NUM_4X4_BLKS_LUMA_MB_ROW 4
+#define NUM_LUMA4x4_BLOCKS_IN_MB 16
+#define NUM_CHROMA4x4_BLOCKS_IN_MB 8
+
+#define SIZE_4X4_BLK_HRZ TRANS_SIZE_4
+#define SIZE_4X4_BLK_VERT TRANS_SIZE_4
+
+#define CNTRL_FLAG_DC_MASK_LUMA 0x0000FFFF
+#define CNTRL_FLAG_AC_MASK_LUMA 0xFFFF0000
+
+#define CNTRL_FLAG_AC_MASK_CHROMA_U 0xF0000000
+#define CNTRL_FLAG_DC_MASK_CHROMA_U 0x0000F000
+
+#define CNTRL_FLAG_AC_MASK_CHROMA_V 0x0F000000
+#define CNTRL_FLAG_DC_MASK_CHROMA_V 0x00000F00
+
+#define CNTRL_FLAG_AC_MASK_CHROMA ( CNTRL_FLAG_AC_MASK_CHROMA_U | CNTRL_FLAG_AC_MASK_CHROMA_V )
+#define CNTRL_FLAG_DC_MASK_CHROMA ( CNTRL_FLAG_DC_MASK_CHROMA_U | CNTRL_FLAG_DC_MASK_CHROMA_V )
+
+#define CNTRL_FLAG_DCBLK_MASK_CHROMA 0x0000C000
+
+/**
+*******************************************************************************
+ * @brief macros for transforms
+*******************************************************************************
+ */
+#define DEQUEUE_BLKID_FROM_CONTROL( u4_cntrl, blk_lin_id) \
+{ \
+ blk_lin_id = CLZ(u4_cntrl); \
+ u4_cntrl &= (0x7FFFFFFF >> blk_lin_id); \
+};
+
+#define IND2SUB_LUMA_MB(u4_blk_id,i4_offset_x,i4_offset_y) \
+{ \
+ i4_offset_x = (u4_blk_id % 4) << 2; \
+ i4_offset_y = (u4_blk_id / 4) << 2; \
+}
+
+#define IND2SUB_CHROMA_MB(u4_blk_id,i4_offset_x,i4_offset_y) \
+{ \
+ i4_offset_x = ((u4_blk_id & 0x1 ) << 3) + (u4_blk_id > 3); \
+ i4_offset_y = (u4_blk_id & 0x2) << 1; \
+}
+
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function performs does the DCT transform then Hadamard transform
+* and quantization for a macroblock when the mb mode is intra 16x16 mode
+*
+* @par Description:
+* First cf4 is done on all 16 4x4 blocks of the 16x16 input block.
+* Then hadamard transform is done on the DC coefficients
+* Quantization is then performed on the 16x16 block, 4x4 wise
+*
+* @param[in] pu1_src
+* Pointer to source sub-block
+*
+* @param[in] pu1_pred
+* Pointer to prediction sub-block
+*
+* @param[in] pi2_out
+* Pointer to residual sub-block
+* The output will be in linear format
+* The first 16 continuous locations will contain the values of Dc block
+* After DC block and a stride 1st AC block will follow
+* After one more stride next AC block will follow
+* The blocks will be in raster scan order
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] pred_strd
+* Prediction stride
+*
+* @param[in] dst_strd
+* Destination stride
+*
+* @param[in] pu2_scale_matrix
+* The quantization matrix for 4x4 transform
+*
+* @param[in] pu2_threshold_matrix
+* Threshold matrix
+*
+* @param[in] u4_qbits
+* 15+QP/6
+*
+* @param[in] u4_round_factor
+* Round factor for quant
+*
+* @param[out] pu1_nnz
+* Memory to store the non-zeros after transform
+* The first byte will be the nnz of DC block
+* From the next byte the AC nnzs will be stored in raster scan order
+*
+* @param u4_dc_flag
+* Signals if Dc transform is to be done or not
+* 1 -> Dc transform will be done
+* 0 -> Dc transform will not be done
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_luma_16x16_resi_trans_dctrans_quant(
+ codec_t *ps_codec, UWORD8 *pu1_src, UWORD8 *pu1_pred,
+ WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd,
+ WORD32 dst_strd, const UWORD16 *pu2_scale_matrix,
+ const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits,
+ UWORD32 u4_round_factor, UWORD8 *pu1_nnz, UWORD32 u4_dc_flag);
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function performs the intra 16x16 inverse transform process for H264
+* it includes inverse Dc transform, inverse quant and then inverse transform
+*
+* @par Description:
+*
+* @param[in] pi2_src
+* Input data, 16x16 size
+* First 16 mem locations will have the Dc coffs in rater scan order in linear fashion
+* after a stride 1st AC clock will be present again in raster can order
+* Then each AC block of the 16x16 block will follow in raster scan order
+*
+* @param[in] pu1_pred
+* The predicted data, 16x16 size
+* Block by block form
+*
+* @param[in] pu1_out
+* Output 16x16
+* In block by block form
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] pred_strd
+* input stride for prediction buffer
+*
+* @param[in] out_strd
+* input stride for output buffer
+*
+* @param[in] pu2_iscale_mat
+* Inverse quantization matrix for 4x4 transform
+*
+* @param[in] pu2_weigh_mat
+* weight matrix of 4x4 transform
+*
+* @param[in] qp_div
+* QP/6
+*
+* @param[in] pi4_tmp
+* Input temporary buffer
+* needs to be at least 20 in size
+*
+* @param[in] pu4_cntrl
+* Controls the transform path
+* total Last 17 bits are used
+* the 16th th bit will correspond to DC block
+* and 32-17 will correspond to the ac blocks in raster scan order
+* bit equaling zero indicates that the entire 4x4 block is zero for DC
+* For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block is nonzero
+*
+* @param[in] pi4_tmp
+* Input temporary buffer
+* needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size
+*
+* @returns
+* none
+*
+* @remarks
+* The all zero case must be taken care outside
+*
+*******************************************************************************
+*/
+void ih264e_luma_16x16_idctrans_iquant_itrans_recon(
+ codec_t *ps_codec, WORD16 *pi2_src, UWORD8 *pu1_pred,
+ UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd,
+ WORD32 out_strd, const UWORD16 *pu2_iscale_mat,
+ const UWORD16 *pu2_weigh_mat, UWORD32 qp_div, UWORD32 u4_cntrl,
+ UWORD32 u4_dc_trans_flag, WORD32 *pi4_tmp);
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function performs does the DCT transform then Hadamard transform
+* and quantization for a chroma macroblock
+*
+* @par Description:
+* First cf4 is done on all 16 4x4 blocks of the 8x8input block
+* Then hadamard transform is done on the DC coefficients
+* Quantization is then performed on the 8x8 block, 4x4 wise
+*
+* @param[in] pu1_src
+* Pointer to source sub-block
+* The input is in interleaved format for two chroma planes
+*
+* @param[in] pu1_pred
+* Pointer to prediction sub-block
+* Prediction is in inter leaved format
+*
+* @param[in] pi2_out
+* Pointer to residual sub-block
+* The output will be in linear format
+* The first 4 continuous locations will contain the values of DC block for U
+* and then next 4 will contain for V.
+* After DC block and a stride 1st AC block of U plane will follow
+* After one more stride next AC block of V plane will follow
+* The blocks will be in raster scan order
+*
+* After all the AC blocks of U plane AC blocks of V plane will follow in exact
+* same way
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] pred_strd
+* Prediction stride
+*
+* @param[in] dst_strd
+* Destination stride
+*
+* @param[in] pu2_scale_matrix
+* The quantization matrix for 4x4 transform
+*
+* @param[in] pu2_threshold_matrix
+* Threshold matrix
+*
+* @param[in] u4_qbits
+* 15+QP/6
+*
+* @param[in] u4_round_factor
+* Round factor for quant
+*
+* @param[out] pu1_nnz
+* Memory to store the non-zeros after transform
+* The first byte will be the nnz od DC block for U plane
+* From the next byte the AC nnzs will be storerd in raster scan order
+* The fifth byte will be nnz of Dc block of V plane
+* Then Ac blocks will follow
+*
+* @param u4_dc_flag
+* Signals if Dc transform is to be done or not
+* 1 -> Dc transform will be done
+* 0 -> Dc transform will not be done
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_chroma_8x8_resi_trans_dctrans_quant(
+ codec_t *ps_codec, UWORD8 *pu1_src, UWORD8 *pu1_pred,
+ WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd,
+ WORD32 out_strd, const UWORD16 *pu2_scale_matrix,
+ const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits,
+ UWORD32 u4_round_factor, UWORD8 *pu1_nnz_c);
+
+/**
+*******************************************************************************
+* @brief
+* This function performs the inverse transform with process for chroma MB of H264
+*
+* @par Description:
+* Does inverse DC transform ,inverse quantization inverse transform
+*
+* @param[in] pi2_src
+* Input data, 16x16 size
+* The input is in the form of, first 4 locations will contain DC coeffs of
+* U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane
+* in raster scan order will follow, each block as linear array in raster scan order.
+* After a stride next AC block will follow. After all AC blocks of U plane
+* V plane AC blocks will follow in exact same order.
+*
+* @param[in] pu1_pred
+* The predicted data, 8x16 size, U and V interleaved
+*
+* @param[in] pu1_out
+* Output 8x16, U and V interleaved
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] pred_strd
+* input stride for prediction buffer
+*
+* @param[in] out_strd
+* input stride for output buffer
+*
+* @param[in] pu2_iscale_mat
+* Inverse quantization martix for 4x4 transform
+*
+* @param[in] pu2_weigh_mat
+* weight matrix of 4x4 transform
+*
+* @param[in] qp_div
+* QP/6
+*
+* @param[in] pi4_tmp
+* Input temporary buffer
+* needs to be at least COFF_CNT_SUB_BLK_4x4 + Number of Dc cofss for chroma * number of planes
+* in size
+*
+* @param[in] pu4_cntrl
+* Controls the transform path
+* the 15 th bit will correspond to DC block of U plane , 14th will indicate the V plane Dc block
+* 32-28 bits will indicate AC blocks of U plane in raster scan order
+* 27-23 bits will indicate AC blocks of V plane in rater scan order
+* The bit 1 implies that there is at least one non zero coff in a block
+*
+* @returns
+* none
+*
+* @remarks
+*******************************************************************************
+*/
+void ih264e_chroma_8x8_idctrans_iquant_itrans_recon(
+ codec_t *ps_codec, WORD16 *pi2_src, UWORD8 *pu1_pred,
+ UWORD8 *pu1_out, WORD32 src_strd, WORD32 pred_strd,
+ WORD32 out_strd, const UWORD16 *pu2_iscale_mat,
+ const UWORD16 *pu2_weigh_mat, UWORD32 qp_div, UWORD32 u4_cntrl,
+ WORD32 *pi4_tmp);
+
+/**
+******************************************************************************
+*
+* @brief This function packs residue of an i16x16 luma mb for entropy coding
+*
+* @par Description
+* An i16 macro block contains two classes of units, dc 4x4 block and
+* 4x4 ac blocks. while packing the mb, the dc block is sent first, and
+* the 16 ac blocks are sent next in scan order. Each and every block is
+* represented by 3 parameters (nnz, significant coefficient map and the
+* residue coefficients itself). If a 4x4 unit does not have any coefficients
+* then only nnz is sent. Inside a 4x4 block the individual coefficients are
+* sent in scan order.
+*
+* The first byte of each block will be nnz of the block, if it is non zero,
+* a 2 byte significance map is sent. This is followed by nonzero coefficients.
+* This is repeated for 1 dc + 16 ac blocks.
+*
+* @param[in] pi2_res_mb
+* pointer to residue mb
+*
+* @param[in, out] pv_mb_coeff_data
+* buffer pointing to packed residue coefficients
+*
+* @param[in] u4_res_strd
+* residual block stride
+*
+* @param[out] u1_cbp_l
+* coded block pattern luma
+*
+* @param[in] pu1_nnz
+* number of non zero coefficients in each 4x4 unit
+*
+* @param[out]
+* Control signal for inverse transform of 16x16 blocks
+*
+* @return none
+*
+* @ remarks
+*
+******************************************************************************
+*/
+void ih264e_pack_l_mb_i16(WORD16 *pi2_res_mb, void **pv_mb_coeff_data,
+ WORD32 i4_res_strd, UWORD8 *u1_cbp_l, UWORD8 *pu1_nnz,
+ UWORD32 *pu4_cntrl);
+
+/**
+******************************************************************************
+*
+* @brief This function packs residue of an i8x8 chroma mb for entropy coding
+*
+* @par Description
+* An i8 chroma macro block contains two classes of units, dc 2x2 block and
+* 4x4 ac blocks. while packing the mb, the dc block is sent first, and
+* the 4 ac blocks are sent next in scan order. Each and every block is
+* represented by 3 parameters (nnz, significant coefficient map and the
+* residue coefficients itself). If a 4x4 unit does not have any coefficients
+* then only nnz is sent. Inside a 4x4 block the individual coefficients are
+* sent in scan order.
+*
+* The first byte of each block will be nnz of the block, if it is non zero,
+* a 2 byte significance map is sent. This is followed by nonzero coefficients.
+* This is repeated for 1 dc + 4 ac blocks.
+*
+* @param[in] pi2_res_mb
+* pointer to residue mb
+*
+* @param[in, out] pv_mb_coeff_data
+* buffer pointing to packed residue coefficients
+*
+* @param[in] u4_res_strd
+* residual block stride
+*
+* @param[out] u1_cbp_c
+* coded block pattern chroma
+*
+* @param[in] pu1_nnz
+* number of non zero coefficients in each 4x4 unit
+*
+* @param[out] pu1_nnz
+* Control signal for inverse transform
+*
+* @param[in] u4_swap_uv
+* Swaps the order of U and V planes in entropy bitstream
+*
+* @return none
+*
+* @ remarks
+*
+******************************************************************************
+*/
+void ih264e_pack_c_mb(WORD16 *pi2_res_mb, void **pv_mb_coeff_data,
+ WORD32 i4_res_strd, UWORD8 *u1_cbp_c, UWORD8 *pu1_nnz,
+ UWORD32 u4_kill_coffs_flag, UWORD32 *pu4_cntrl,
+ UWORD32 u4_swap_uv);
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when intra mode is i16x16
+*
+* @par Description:
+* If the current mb is to be coded as intra of mb type i16x16, the mb is first
+* predicted using one of i16x16 prediction filters, basing on the intra mode
+* chosen. Then, error is computed between the input blk and the estimated blk.
+* This error is transformed (hierarchical transform i.e., dct followed by hada-
+* -mard), quantized. The quantized coefficients are packed in scan order for
+* entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_l
+* coded block pattern luma
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_luma_intra_macroblock_16x16
+ (
+ process_ctxt_t *ps_proc
+ );
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when intra mode is i4x4
+*
+* @par Description:
+* If the current mb is to be coded as intra of mb type i4x4, the mb is first
+* predicted using one of i4x4 prediction filters, basing on the intra mode
+* chosen. Then, error is computed between the input blk and the estimated blk.
+* This error is dct transformed and quantized. The quantized coefficients are
+* packed in scan order for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_l
+* coded block pattern luma
+*
+* @remarks
+* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
+* mentioned in h.264 specification
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_luma_intra_macroblock_4x4
+ (
+ process_ctxt_t *ps_proc
+ );
+
+/**
+*******************************************************************************
+*
+* @brief performs luma core coding when intra mode is i4x4
+*
+* @par Description:
+* If the current mb is to be coded as intra of mb type i4x4, the mb is first
+* predicted using one of i4x4 prediction filters, basing on the intra mode
+* chosen. Then, error is computed between the input blk and the estimated blk.
+* This error is dct transformed and quantized. The quantized coefficients are
+* packed in scan order for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_l
+* coded block pattern luma
+*
+* @remarks
+* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
+* mentioned in h.264 specification
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_luma_intra_macroblock_4x4_rdopt_on
+ (
+ process_ctxt_t *ps_proc
+ );
+
+/**
+*******************************************************************************
+*
+* @brief performs chroma core coding for intra macro blocks
+*
+* @par Description:
+* If the current MB is to be intra coded with mb type chroma I8x8, the MB is
+* first predicted using intra 8x8 prediction filters. The predicted data is
+* compared with the input for error and the error is transformed. The DC
+* coefficients of each transformed sub blocks are further transformed using
+* Hadamard transform. The resulting coefficients are quantized, packed and sent
+* for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_c
+* coded block pattern chroma
+*
+* @remarks
+* The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order
+* mentioned in h.264 specification
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_chroma_intra_macroblock_8x8
+ (
+ process_ctxt_t *ps_proc
+ );
+
+/**
+*******************************************************************************
+* @brief performs luma core coding when mode is inter
+*
+* @par Description:
+* If the current mb is to be coded as inter predicted mb,based on the sub mb
+* partitions and corresponding motion vectors generated by ME, prediction is done.
+* Then, error is computed between the input blk and the estimated blk.
+* This error is transformed ( dct and with out hadamard), quantized. The
+* quantized coefficients are packed in scan order for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_l
+* coded block pattern luma
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_luma_inter_macroblock_16x16
+ (
+ process_ctxt_t *ps_proc
+ );
+
+/**
+*******************************************************************************
+* @brief performs chroma core coding for inter macro blocks
+*
+* @par Description:
+* If the current mb is to be coded as inter predicted mb, based on the sub mb
+* partitions and corresponding motion vectors generated by ME, prediction is done.
+* Then, error is computed between the input blk and the estimated blk.
+* This error is transformed, quantized. The quantized coefficients
+* are packed in scan order for entropy coding.
+*
+* @param[in] ps_proc_ctxt
+* pointer to the current macro block context
+*
+* @returns u1_cbp_l
+* coded block pattern luma
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_code_chroma_inter_macroblock_8x8
+ (
+ process_ctxt_t *ps_proc
+ );
+
+#endif /* IH264E_CORE_CODING_H_ */
diff --git a/encoder/ih264e_deblk.c b/encoder/ih264e_deblk.c
new file mode 100755
index 0000000..8a11bdb
--- /dev/null
+++ b/encoder/ih264e_deblk.c
@@ -0,0 +1,854 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_deblk.c
+ *
+ * @brief
+ * This file contains functions that are associated with deblocking
+ *
+ * @author
+ * ittiam
+ *
+ * @par List of Functions:
+ * - ih264e_fill_bs_1mv_1ref_non_mbaff
+ * - ih264e_calculate_csbp
+ * - ih264e_compute_bs
+ * - ih264e_filter_top_edge
+ * - ih264e_filter_left_edge
+ * - ih264e_deblock_mb
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_macros.h"
+#include "ih264_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264_trans_data.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_deblk_tables.h"
+#include "ih264e_deblk.h"
+
+
+/*****************************************************************************/
+/* Extern global definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief BS Table Lookup
+* input :
+* output :
+* @remarks none
+******************************************************************************
+*/
+static const UWORD32 gu4_bs_table[][16] =
+{
+ {
+ 0x00000000, 0x02000000, 0x00020000, 0x02020000,
+ 0x00000200, 0x02000200, 0x00020200, 0x02020200,
+ 0x00000002, 0x02000002, 0x00020002, 0x02020002,
+ 0x00000202, 0x02000202, 0x00020202, 0x02020202
+ },
+ {
+ 0x01010101, 0x02010101, 0x01020101, 0x02020101,
+ 0x01010201, 0x02010201, 0x01020201, 0x02020201,
+ 0x01010102, 0x02010102, 0x01020102, 0x02020102,
+ 0x01010202, 0x02010202, 0x01020202, 0x02020202
+ }
+};
+
+/**
+******************************************************************************
+* @brief Transpose Matrix used in BS
+* input :
+* output :
+* @remarks none
+******************************************************************************
+*/
+static const UWORD16 ih264e_gu2_4x4_v2h_reorder[16] =
+{
+ 0x0000, 0x0001, 0x0010, 0x0011,
+ 0x0100, 0x0101, 0x0110, 0x0111,
+ 0x1000, 0x1001, 0x1010, 0x1011,
+ 0x1100, 0x1101, 0x1110, 0x1111
+};
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Fill BS value for all the edges of an mb
+*
+* @par Description:
+* Fill BS value for all the edges of an mb
+*
+* @param[in] pu4_horz_bs
+* Base pointer of horizontal BS table
+*
+* @param[in] pu4_vert_bs
+* Base pointer of vertical BS table
+*
+* @param[in] u4_left_mb_csbp
+* coded sub block pattern of left mb
+*
+* @param[in] u4_left_mb_csbp
+* coded sub block pattern of top mb
+*
+* @param[in] ps_leftMvPred
+* MV of left mb
+*
+* @param[in] ps_topMvPred
+* MV of top mb
+*
+* @param[in] ps_curMvPred
+* MV of curr mb
+*
+* @param[in] u1_left_intra
+* is left intra
+*
+* @param[in] u1_top_intra
+* is top intra
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static void ih264e_fill_bs_1mv_1ref_non_mbaff(UWORD32 *pu4_horz_bs,
+ UWORD32 *pu4_vert_bs,
+ UWORD32 u4_left_mb_csbp,
+ UWORD32 u4_top_mb_csbp,
+ UWORD32 u4_cur_mb_csbp,
+ mv_t *ps_leftMvPred,
+ mv_t *ps_topMvPred,
+ mv_t *ps_curMvPred,
+ UWORD8 u1_left_intra,
+ UWORD8 u1_top_intra)
+{
+ /* motion vectors of blks p & q */
+ WORD16 i16_qMv0, i16_qMv1, i16_pMv0, i16_pMv1;
+
+ /* temp var */
+ UWORD32 u4_lft_flag, u4_top_flag;
+ const UWORD32 *bs_map;
+ UWORD32 u4_reordered_vert_bs_enc, u4_temp;
+
+ /* Coded Pattern for Horizontal Edge */
+ /*-----------------------------------------------------------------------*/
+ /*u4_nbr_horz_csbp=11C|10C|9C|8C|7C|6C|5C|4C|3C|2C|1C|0C|15T|14T|13T|12T */
+ /*-----------------------------------------------------------------------*/
+ UWORD32 u4_nbr_horz_csbp = (u4_cur_mb_csbp << 4) | (u4_top_mb_csbp >> 12);
+ UWORD32 u4_horz_bs_enc = u4_cur_mb_csbp | u4_nbr_horz_csbp;
+
+ /* Coded Pattern for Vertical Edge */
+ /*-----------------------------------------------------------------------*/
+ /*u4_left_mb_masked_csbp = 15L|0|0|0|11L|0|0|0|7L|0|0|0|3L|0|0|0 */
+ /*-----------------------------------------------------------------------*/
+ UWORD32 u4_left_mb_masked_csbp = u4_left_mb_csbp & CSBP_RIGHT_BLOCK_MASK;
+
+ /*-----------------------------------------------------------------------*/
+ /*u4_cur_mb_masked_csbp =14C|13C|12C|x|10C|9C|8C|x|6C|5C|4C|x|2C|1C|0C|x */
+ /*-----------------------------------------------------------------------*/
+ UWORD32 u4_cur_mb_masked_csbp =(u4_cur_mb_csbp<<1)&(~CSBP_LEFT_BLOCK_MASK);
+
+ /*-----------------------------------------------------------------------*/
+ /*u4_nbr_vert_csbp=14C|13C|12C|15L|10C|9C|8C|11L|6C|5C|4C|7L|2C|1C|0C|3L */
+ /*-----------------------------------------------------------------------*/
+ UWORD32 u4_nbr_vert_csbp = (u4_cur_mb_masked_csbp) | (u4_left_mb_masked_csbp >> 3);
+ UWORD32 u4_vert_bs_enc = u4_cur_mb_csbp | u4_nbr_vert_csbp;
+
+ /* BS Calculation for MB Boundary Edges */
+
+ /* BS calculation for 1 2 3 horizontal boundary */
+ bs_map = gu4_bs_table[0];
+ pu4_horz_bs[1] = bs_map[(u4_horz_bs_enc >> 4) & 0xF];
+ pu4_horz_bs[2] = bs_map[(u4_horz_bs_enc >> 8) & 0xF];
+ pu4_horz_bs[3] = bs_map[(u4_horz_bs_enc >> 12) & 0xF];
+
+ /* BS calculation for 5 6 7 vertical boundary */
+ /* Do 4x4 tranpose of u4_vert_bs_enc by using look up table for reorder */
+ u4_reordered_vert_bs_enc = ih264e_gu2_4x4_v2h_reorder[u4_vert_bs_enc & 0xF];
+
+ u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 4) & 0xF];
+ u4_reordered_vert_bs_enc |= (u4_temp << 1);
+
+ u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 8) & 0xF];
+ u4_reordered_vert_bs_enc |= (u4_temp << 2);
+
+ u4_temp = ih264e_gu2_4x4_v2h_reorder[(u4_vert_bs_enc >> 12) & 0xF];
+ u4_reordered_vert_bs_enc |= (u4_temp << 3);
+
+ pu4_vert_bs[1] = bs_map[(u4_reordered_vert_bs_enc >> 4) & 0xF];
+ pu4_vert_bs[2] = bs_map[(u4_reordered_vert_bs_enc >> 8) & 0xF];
+ pu4_vert_bs[3] = bs_map[(u4_reordered_vert_bs_enc >> 12) & 0xF];
+
+
+ /* BS Calculation for MB Boundary Edges */
+ i16_qMv0 = ps_curMvPred->i2_mvx;
+ i16_qMv1 = ps_curMvPred->i2_mvy;
+
+ if (u1_top_intra)
+ {
+ pu4_horz_bs[0] = 0x04040404;
+ }
+ else
+ {
+ i16_pMv0 = ps_topMvPred->i2_mvx;
+ i16_pMv1 = ps_topMvPred->i2_mvy;
+
+ u4_top_flag = (ABS((i16_pMv0 - i16_qMv0)) >= 4 ) |
+ (ABS((i16_pMv1 - i16_qMv1)) >= 4);
+
+ bs_map = gu4_bs_table[!!u4_top_flag];
+ pu4_horz_bs[0] = bs_map[u4_horz_bs_enc & 0xF];
+ }
+
+ if (u1_left_intra)
+ {
+ pu4_vert_bs[0] = 0x04040404;
+ }
+ else
+ {
+ i16_pMv0 = ps_leftMvPred->i2_mvx;
+ i16_pMv1 = ps_leftMvPred->i2_mvy;
+
+
+ u4_lft_flag = (ABS((i16_pMv0 - i16_qMv0)) >= 4 ) |
+ (ABS((i16_pMv1 - i16_qMv1)) >= 4);
+
+ bs_map = gu4_bs_table[!!u4_lft_flag];
+ pu4_vert_bs[0] = bs_map[u4_reordered_vert_bs_enc & 0xF];
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief calculate coded subblock pattern from nnz
+*
+* @par Description:
+* calculate coded subblock pattern from nnz
+*
+* @param[in] ps_proc
+* process context
+*
+* @returns csbp
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static UWORD32 ih264e_calculate_csbp(process_ctxt_t *ps_proc)
+{
+ /* number of non zeros for each tx blk */
+ UWORD8 *pu1_curr_nnz = (UWORD8 *)ps_proc->au4_nnz;
+
+ /* csbp */
+ UWORD32 u4_csbp = 0;
+
+ /* temp var */
+ WORD32 i4_i;
+
+ pu1_curr_nnz += 1;
+
+ /* Creating Subblock pattern for current MB */
+ /* 15C|14C|13C|12C|11C|10C|9C|8C|7C|6C|5C|4C|3C|2C|1C|0C */
+ for (i4_i = 0; i4_i < 16; i4_i++ )
+ {
+ u4_csbp |= ((!!*(pu1_curr_nnz + i4_i))<< i4_i);
+ }
+
+ return u4_csbp;
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function computes blocking strength for an mb
+*
+* @par Description:
+* This function computes blocking strength for an mb
+*
+* @param[in] ps_proc
+* process context
+*
+* @returns none
+*
+* @remarks In this module it is assumed that their is only single reference
+* frame and is always the most recently used anchor frame
+*
+*******************************************************************************
+*/
+void ih264e_compute_bs(process_ctxt_t * ps_proc)
+{
+ /* deblk bs context */
+ bs_ctxt_t *ps_bs = &(ps_proc->s_deblk_ctxt.s_bs_ctxt);
+
+ /* vertical blocking strength */
+ UWORD32 *pu4_pic_vert_bs;
+
+ /* horizontal blocking strength */
+ UWORD32 *pu4_pic_horz_bs;
+
+ /* mb indices */
+ WORD32 i4_mb_x, i4_mb_y;
+
+ /* is intra */
+ WORD32 i4_intra;
+
+ /* temp var */
+ WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
+
+ /* init indices */
+ i4_mb_x = ps_bs->i4_mb_x;
+ i4_mb_y = ps_bs->i4_mb_y;
+
+ /* init pointers */
+ pu4_pic_vert_bs = ps_bs->pu4_pic_vert_bs + ((i4_mb_y * i4_wd_mbs) + i4_mb_x) * 4;
+ pu4_pic_horz_bs = ps_bs->pu4_pic_horz_bs + ((i4_mb_y * i4_wd_mbs) + i4_mb_x) * 4;
+
+ /* is intra? */
+ i4_intra = ps_proc->u4_is_intra;
+
+ /* compute blocking strength */
+ if (i4_intra)
+ {
+ pu4_pic_vert_bs[0] = 0x04040404;
+ pu4_pic_vert_bs[1] = pu4_pic_vert_bs[2] = pu4_pic_vert_bs[3] = 0x03030303;
+
+ pu4_pic_horz_bs[0] = 0x04040404;
+ pu4_pic_horz_bs[1] = pu4_pic_horz_bs[2] = pu4_pic_horz_bs[3] = 0x03030303;
+ }
+ else
+ {
+ /* left mb syntax info */
+ mb_info_t *ps_left_mb_syntax_ele = &ps_proc->s_left_mb_syntax_ele;
+
+ /* top mb syntax info */
+ mb_info_t *ps_top_mb_syntax_ele = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
+
+ /* top row motion vector info */
+ enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
+
+ /* csbp for curr mb */
+ ps_proc->u4_csbp = ih264e_calculate_csbp(ps_proc);
+
+ /* csbp for ngbrs */
+ if (i4_mb_x == 0)
+ {
+ ps_left_mb_syntax_ele->u4_csbp = 0;
+ ps_left_mb_syntax_ele->u2_is_intra = 0;
+ ps_proc->s_left_mb_pu.s_l0_mv = ps_proc->ps_pu->s_l0_mv;
+ }
+ if (i4_mb_y == 0)
+ {
+ ps_top_mb_syntax_ele->u4_csbp = 0;
+ ps_top_mb_syntax_ele->u2_is_intra = 0;
+ ps_top_row_pu->s_l0_mv = ps_proc->ps_pu->s_l0_mv;
+ }
+
+ ih264e_fill_bs_1mv_1ref_non_mbaff(pu4_pic_horz_bs,
+ pu4_pic_vert_bs,
+ ps_left_mb_syntax_ele->u4_csbp,
+ ps_top_mb_syntax_ele->u4_csbp,
+ ps_proc->u4_csbp,
+ &ps_proc->s_left_mb_pu.s_l0_mv,
+ &ps_top_row_pu->s_l0_mv,
+ &ps_proc->ps_pu->s_l0_mv,
+ ps_left_mb_syntax_ele->u2_is_intra,
+ ps_top_mb_syntax_ele->u2_is_intra);
+ }
+
+ return ;
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs deblocking of top horizontal edge
+*
+* @par Description:
+* This function performs deblocking of top horizontal edge
+*
+* @param[in] ps_codec
+* pointer to codec context
+*
+* @param[in] ps_proc
+* pointer to proc context
+*
+* @param[in] pu1_mb_qp
+* pointer to mb quantization param
+*
+* @param[in] pu1_cur_pic_luma
+* pointer to recon buffer luma
+*
+* @param[in] pu1_cur_pic_chroma
+* pointer to recon buffer chroma
+*
+* @param[in] pu4_pic_horz_bs
+* pointer to horizontal blocking strength
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static void ih264e_filter_top_edge(codec_t *ps_codec,
+ process_ctxt_t *ps_proc,
+ UWORD8 *pu1_mb_qp,
+ UWORD8 *pu1_cur_pic_luma,
+ UWORD8 *pu1_cur_pic_chroma,
+ UWORD32 *pu4_pic_horz_bs)
+{
+ /* strd */
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+ /* deblk params */
+ UWORD32 u4_alpha_luma, u4_beta_luma, u4_qp_luma, u4_idx_A_luma, u4_idx_B_luma, u4_qp_p, u4_qp_q;
+ UWORD32 u4_alpha_chroma, u4_beta_chroma, u4_qp_chroma, u4_idx_A_chroma, u4_idx_B_chroma;
+
+ /* collect qp of left & top mb */
+ u4_qp_p = pu1_mb_qp[-ps_proc->i4_wd_mbs];
+ u4_qp_q = pu1_mb_qp[0];
+
+ /********/
+ /* luma */
+ /********/
+ u4_qp_luma = (u4_qp_p + u4_qp_q + 1) >> 1;
+
+ /* filter offset A and filter offset B have to be received from slice header */
+ /* TODO : for now lets set these offsets as zero */
+
+
+ u4_idx_A_luma = MIN(51, u4_qp_luma + 0);
+ u4_idx_B_luma = MIN(51, u4_qp_luma + 0);
+
+ /* alpha, beta computation */
+ u4_alpha_luma = gu1_ih264_alpha_table[u4_idx_A_luma];
+ u4_beta_luma = gu1_ih264_beta_table[u4_idx_B_luma];
+
+ /**********/
+ /* chroma */
+ /**********/
+ u4_qp_chroma = (gu1_qpc_fqpi[u4_qp_p] + gu1_qpc_fqpi[u4_qp_q] + 1) >> 1;
+
+ /* filter offset A and filter offset B have to be received from slice header */
+ /* TODO : for now lets set these offsets as zero */
+
+
+ u4_idx_A_chroma = MIN(51, u4_qp_chroma + 0);
+ u4_idx_B_chroma = MIN(51, u4_qp_chroma + 0);
+
+ /* alpha, beta computation */
+ u4_alpha_chroma = gu1_ih264_alpha_table[u4_idx_A_chroma];
+ u4_beta_chroma = gu1_ih264_beta_table[u4_idx_B_chroma];
+
+ /* deblk edge */
+ /* top Horizontal edge - allowed to be deblocked ? */
+ if (pu4_pic_horz_bs[0] == 0x04040404)
+ {
+ /* strong filter */
+ ps_codec->pf_deblk_luma_horz_bs4(pu1_cur_pic_luma, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+ ps_codec->pf_deblk_chroma_horz_bs4(pu1_cur_pic_chroma, i4_rec_strd, u4_alpha_chroma, u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma);
+ }
+ else
+ {
+ /* normal filter */
+ ps_codec->pf_deblk_luma_horz_bslt4(pu1_cur_pic_luma, i4_rec_strd, u4_alpha_luma,
+ u4_beta_luma, pu4_pic_horz_bs[0],
+ gu1_ih264_clip_table[u4_idx_A_luma]);
+
+ ps_codec->pf_deblk_chroma_horz_bslt4(pu1_cur_pic_chroma, i4_rec_strd, u4_alpha_chroma,
+ u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma, pu4_pic_horz_bs[0],
+ gu1_ih264_clip_table[u4_idx_A_chroma], gu1_ih264_clip_table[u4_idx_A_chroma]);
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs deblocking of left vertical edge
+*
+* @par Description:
+* This function performs deblocking of top horizontal edge
+*
+* @param[in] ps_codec
+* pointer to codec context
+*
+* @param[in] ps_proc
+* pointer to proc context
+*
+* @param[in] pu1_mb_qp
+* pointer to mb quantization param
+*
+* @param[in] pu1_cur_pic_luma
+* pointer to recon buffer luma
+*
+* @param[in] pu1_cur_pic_chroma
+* pointer to recon buffer chroma
+*
+* @param[in] pu4_pic_vert_bs
+* pointer to vertical blocking strength
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static void ih264e_filter_left_edge(codec_t *ps_codec,
+ process_ctxt_t *ps_proc,
+ UWORD8 *pu1_mb_qp,
+ UWORD8 *pu1_cur_pic_luma,
+ UWORD8 *pu1_cur_pic_chroma,
+ UWORD32 *pu4_pic_vert_bs)
+{
+ /* strd */
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+ /* deblk params */
+ UWORD32 u4_alpha_luma, u4_beta_luma, u4_qp_luma, u4_idx_A_luma, u4_idx_B_luma, u4_qp_p, u4_qp_q;
+ UWORD32 u4_alpha_chroma, u4_beta_chroma, u4_qp_chroma, u4_idx_A_chroma, u4_idx_B_chroma;
+
+ /* collect qp of left & curr mb */
+ u4_qp_p = pu1_mb_qp[-1];
+ u4_qp_q = pu1_mb_qp[0];
+
+ /********/
+ /* luma */
+ /********/
+ u4_qp_luma = (u4_qp_p + u4_qp_q + 1) >> 1;
+
+ /* filter offset A and filter offset B have to be received from slice header */
+ /* TODO : for now lets set these offsets as zero */
+
+
+ u4_idx_A_luma = MIN(51, u4_qp_luma + 0);
+ u4_idx_B_luma = MIN(51, u4_qp_luma + 0);
+
+ /* alpha, beta computation */
+ u4_alpha_luma = gu1_ih264_alpha_table[u4_idx_A_luma];
+ u4_beta_luma = gu1_ih264_beta_table[u4_idx_B_luma];
+
+ /**********/
+ /* chroma */
+ /**********/
+ u4_qp_chroma = (gu1_qpc_fqpi[u4_qp_p] + gu1_qpc_fqpi[u4_qp_q] + 1) >> 1;
+
+ /* filter offset A and filter offset B have to be received from slice header */
+ /* TODO : for now lets set these offsets as zero */
+
+
+ u4_idx_A_chroma = MIN(51, u4_qp_chroma + 0);
+ u4_idx_B_chroma = MIN(51, u4_qp_chroma + 0);
+
+ /* alpha, beta computation */
+ u4_alpha_chroma = gu1_ih264_alpha_table[u4_idx_A_chroma];
+ u4_beta_chroma = gu1_ih264_beta_table[u4_idx_B_chroma];
+
+ /* deblk edge */
+ if (pu4_pic_vert_bs[0] == 0x04040404)
+ {
+ /* strong filter */
+ ps_codec->pf_deblk_luma_vert_bs4(pu1_cur_pic_luma, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+ ps_codec->pf_deblk_chroma_vert_bs4(pu1_cur_pic_chroma, i4_rec_strd, u4_alpha_chroma, u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma);
+ }
+ else
+ {
+ /* normal filter */
+ ps_codec->pf_deblk_luma_vert_bslt4(pu1_cur_pic_luma, i4_rec_strd,
+ u4_alpha_luma, u4_beta_luma,
+ pu4_pic_vert_bs[0],
+ gu1_ih264_clip_table[u4_idx_A_luma]);
+
+ ps_codec->pf_deblk_chroma_vert_bslt4(pu1_cur_pic_chroma, i4_rec_strd, u4_alpha_chroma,
+ u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma, pu4_pic_vert_bs[0],
+ gu1_ih264_clip_table[u4_idx_A_chroma], gu1_ih264_clip_table[u4_idx_A_chroma]);
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs deblocking on an mb
+*
+* @par Description:
+* This function performs deblocking on an mb
+*
+* @param[in] ps_proc
+* process context corresponding to the job
+*
+* @param[in] ps_deblk
+* pointer to deblock context
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_deblock_mb(process_ctxt_t *ps_proc, deblk_ctxt_t * ps_deblk)
+{
+ /* codec ctxt */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* ngbr availability */
+ UWORD8 u1_mb_a, u1_mb_b;
+
+ /* mb indices */
+ WORD32 i4_mb_x = ps_deblk->i4_mb_x, i4_mb_y = ps_deblk->i4_mb_y;
+
+ /* pic qp ptr */
+ UWORD8 *pu1_pic_qp = ps_deblk->s_bs_ctxt.pu1_pic_qp;
+
+ /* vertical blocking strength */
+ UWORD32 *pu4_pic_vert_bs = ps_deblk->s_bs_ctxt.pu4_pic_vert_bs;
+
+ /* horizontal blocking strength */
+ UWORD32 *pu4_pic_horz_bs = ps_deblk->s_bs_ctxt.pu4_pic_horz_bs;
+
+ /* src buffers luma */
+ UWORD8 *pu1_cur_pic_luma = ps_deblk->pu1_cur_pic_luma;
+
+ /* src buffers chroma */
+ UWORD8 *pu1_cur_pic_chroma = ps_deblk->pu1_cur_pic_chroma;
+
+ /* strd */
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+ /* deblk params */
+ UWORD32 u4_alpha_luma, u4_beta_luma, u4_qp_luma, u4_idx_A_luma, u4_idx_B_luma;
+ UWORD32 u4_alpha_chroma, u4_beta_chroma, u4_qp_chroma, u4_idx_A_chroma, u4_idx_B_chroma;
+
+ /* temp var */
+ UWORD32 push_ptr = (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x;
+
+ /* derive neighbor availability */
+ /* In slice mode the edges of mbs that lie on the slice boundary are not deblocked */
+ /* deblocking filter idc '2' */
+ if (ps_codec->s_cfg.e_slice_mode != IVE_SLICE_MODE_NONE)
+ {
+ /* slice index */
+ UWORD8 *pu1_slice_idx = ps_deblk->pu1_slice_idx;
+
+ pu1_slice_idx += (i4_mb_y * ps_proc->i4_wd_mbs);
+ /* left macroblock availability */
+ u1_mb_a = (i4_mb_x == 0 ||
+ (pu1_slice_idx[i4_mb_x - 1 ] != pu1_slice_idx[i4_mb_x]))? 0 : 1;
+ /* top macroblock availability */
+ u1_mb_b = (i4_mb_y == 0 ||
+ (pu1_slice_idx[i4_mb_x-ps_proc->i4_wd_mbs] != pu1_slice_idx[i4_mb_x]))? 0 : 1;
+ }
+ else
+ {
+ /* left macroblock availability */
+ u1_mb_a = (i4_mb_x == 0)? 0 : 1;
+ /* top macroblock availability */
+ u1_mb_b = (i4_mb_y == 0)? 0 : 1;
+ }
+
+ pu1_pic_qp += push_ptr;
+ pu4_pic_vert_bs += push_ptr * 4;
+ pu4_pic_horz_bs += push_ptr * 4;
+
+ /********/
+ /* luma */
+ /********/
+ u4_qp_luma = pu1_pic_qp[0];
+
+ /* filter offset A and filter offset B have to be received from slice header */
+ /* TODO : for now lets set these offsets as zero */
+
+
+ u4_idx_A_luma = MIN(51, u4_qp_luma + 0);
+ u4_idx_B_luma = MIN(51, u4_qp_luma + 0);
+
+ /* alpha, beta computation */
+ u4_alpha_luma = gu1_ih264_alpha_table[u4_idx_A_luma];
+ u4_beta_luma = gu1_ih264_beta_table[u4_idx_B_luma];
+
+ /**********/
+ /* chroma */
+ /**********/
+ u4_qp_chroma = gu1_qpc_fqpi[u4_qp_luma];
+
+ /* filter offset A and filter offset B have to be received from slice header */
+ /* TODO : for now lets set these offsets as zero */
+
+
+ u4_idx_A_chroma = MIN(51, u4_qp_chroma + 0);
+ u4_idx_B_chroma = MIN(51, u4_qp_chroma + 0);
+
+ /* alpha, beta computation */
+ u4_alpha_chroma = gu1_ih264_alpha_table[u4_idx_A_chroma];
+ u4_beta_chroma = gu1_ih264_beta_table[u4_idx_B_chroma];
+
+ /* Deblock vertical edges */
+ /* left vertical edge 0 - allowed to be deblocked ? */
+ if (u1_mb_a)
+ {
+ ih264e_filter_left_edge(ps_codec, ps_proc, pu1_pic_qp, pu1_cur_pic_luma, pu1_cur_pic_chroma, pu4_pic_vert_bs);
+ }
+
+ /* vertical edge 1 */
+ if (pu4_pic_vert_bs[1] == 0x04040404)
+ {
+ /* strong filter */
+ ps_codec->pf_deblk_luma_vert_bs4(pu1_cur_pic_luma + 4, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+ }
+ else
+ {
+ /* normal filter */
+ ps_codec->pf_deblk_luma_vert_bslt4(pu1_cur_pic_luma + 4, i4_rec_strd,
+ u4_alpha_luma, u4_beta_luma,
+ pu4_pic_vert_bs[1],
+ gu1_ih264_clip_table[u4_idx_A_luma]);
+ }
+
+ /* vertical edge 2 */
+ if (pu4_pic_vert_bs[2] == 0x04040404)
+ {
+ /* strong filter */
+ ps_codec->pf_deblk_luma_vert_bs4(pu1_cur_pic_luma + 8, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+ ps_codec->pf_deblk_chroma_vert_bs4(pu1_cur_pic_chroma + 8, i4_rec_strd, u4_alpha_chroma, u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma);
+ }
+ else
+ {
+ /* normal filter */
+ ps_codec->pf_deblk_luma_vert_bslt4(pu1_cur_pic_luma + 8, i4_rec_strd, u4_alpha_luma,
+ u4_beta_luma, pu4_pic_vert_bs[2],
+ gu1_ih264_clip_table[u4_idx_A_luma]);
+
+ ps_codec->pf_deblk_chroma_vert_bslt4(pu1_cur_pic_chroma + 8, i4_rec_strd, u4_alpha_chroma,
+ u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma, pu4_pic_vert_bs[2],
+ gu1_ih264_clip_table[u4_idx_A_chroma], gu1_ih264_clip_table[u4_idx_A_chroma]);
+ }
+
+ /* vertical edge 3 */
+ if (pu4_pic_vert_bs[3] == 0x04040404)
+ {
+ /* strong filter */
+ ps_codec->pf_deblk_luma_vert_bs4(pu1_cur_pic_luma + 12, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+ }
+ else
+ {
+ /* normal filter */
+ ps_codec->pf_deblk_luma_vert_bslt4(pu1_cur_pic_luma + 12, i4_rec_strd, u4_alpha_luma,
+ u4_beta_luma, pu4_pic_vert_bs[3],
+ gu1_ih264_clip_table[u4_idx_A_luma]);
+ }
+
+ /* Deblock Horizontal edges */
+ /* Horizontal edge 0 */
+ if (u1_mb_b)
+ {
+ ih264e_filter_top_edge(ps_codec, ps_proc, pu1_pic_qp, pu1_cur_pic_luma, pu1_cur_pic_chroma, pu4_pic_horz_bs);
+ }
+
+ /* horizontal edge 1 */
+ if (pu4_pic_horz_bs[1] == 0x04040404)
+ {
+ /* strong filter */
+ ps_codec->pf_deblk_luma_horz_bs4(pu1_cur_pic_luma + 4 * i4_rec_strd, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+ }
+ else
+ {
+ /* normal filter */
+ ps_codec->pf_deblk_luma_horz_bslt4(pu1_cur_pic_luma + 4 * i4_rec_strd, i4_rec_strd, u4_alpha_luma,
+ u4_beta_luma, pu4_pic_horz_bs[1],
+ gu1_ih264_clip_table[u4_idx_A_luma]);
+ }
+
+ /* horizontal edge 2 */
+ if (pu4_pic_horz_bs[2] == 0x04040404)
+ {
+ /* strong filter */
+ ps_codec->pf_deblk_luma_horz_bs4(pu1_cur_pic_luma + 8 * i4_rec_strd, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+ ps_codec->pf_deblk_chroma_horz_bs4(pu1_cur_pic_chroma + 4 * i4_rec_strd, i4_rec_strd, u4_alpha_chroma, u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma);
+ }
+ else
+ {
+ /* normal filter */
+ ps_codec->pf_deblk_luma_horz_bslt4(pu1_cur_pic_luma + 8 * i4_rec_strd, i4_rec_strd, u4_alpha_luma,
+ u4_beta_luma, pu4_pic_horz_bs[2],
+ gu1_ih264_clip_table[u4_idx_A_luma]);
+
+ ps_codec->pf_deblk_chroma_horz_bslt4(pu1_cur_pic_chroma + 4 * i4_rec_strd, i4_rec_strd, u4_alpha_chroma,
+ u4_beta_chroma, u4_alpha_chroma, u4_beta_chroma, pu4_pic_horz_bs[2],
+ gu1_ih264_clip_table[u4_idx_A_chroma], gu1_ih264_clip_table[u4_idx_A_chroma]);
+ }
+
+ /* horizontal edge 3 */
+ if (pu4_pic_horz_bs[3] == 0x04040404)
+ {
+ /* strong filter */
+ ps_codec->pf_deblk_luma_horz_bs4(pu1_cur_pic_luma + 12 * i4_rec_strd, i4_rec_strd, u4_alpha_luma, u4_beta_luma);
+ }
+ else
+ {
+ /* normal filter */
+ ps_codec->pf_deblk_luma_horz_bslt4(pu1_cur_pic_luma + 12 * i4_rec_strd, i4_rec_strd, u4_alpha_luma,
+ u4_beta_luma, pu4_pic_horz_bs[3],
+ gu1_ih264_clip_table[u4_idx_A_luma]);
+ }
+
+ return ;
+}
diff --git a/encoder/ih264e_deblk.h b/encoder/ih264e_deblk.h
new file mode 100755
index 0000000..9b3b67b
--- /dev/null
+++ b/encoder/ih264e_deblk.h
@@ -0,0 +1,99 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_deblk.h
+*
+* @brief
+* This file contains extern declarations of deblocking routines
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+******************************************************************************
+*/
+
+#ifndef IH264E_DEBLK_H_
+#define IH264E_DEBLK_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief masks to extract csbp
+******************************************************************************
+ */
+#define CSBP_LEFT_BLOCK_MASK 0x1111
+#define CSBP_RIGHT_BLOCK_MASK 0x8888
+
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief This function computes blocking strength for an mb
+*
+* @par Description:
+* This function computes blocking strength for an mb
+*
+* @param[in] ps_proc
+* process context
+*
+* @returns none
+*
+* @remarks In this module it is assumed that their is only single reference
+* frame and is always the most recently used anchor frame
+*
+*******************************************************************************
+*/
+void ih264e_compute_bs(process_ctxt_t * ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief This function performs deblocking on an mb
+*
+* @par Description:
+* This function performs deblocking on an mb
+*
+* @param[in] ps_proc
+* process context corresponding to the job
+*
+* @param[in] ps_deblk
+* pointer to deblock context
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_deblock_mb(process_ctxt_t *ps_proc, deblk_ctxt_t * ps_deblk);
+
+#endif /* IH264E_DEBLK_H_ */
diff --git a/encoder/ih264e_debug.h b/encoder/ih264e_debug.h
new file mode 100755
index 0000000..5cb0434
--- /dev/null
+++ b/encoder/ih264e_debug.h
@@ -0,0 +1,65 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_debug.h
+*
+* @brief
+* This file contains extern declarations of routines that could be helpful
+* for debugging purposes.
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+******************************************************************************
+*/
+
+#ifndef IH264E_DEBUG_H_
+#define IH264E_DEBUG_H_
+
+#if DEBUG_RC
+
+#define DEBUG_DUMP_QP(pic_cnt, qp, num_cores) \
+ ih264e_debug_dump_qp(pic_cnt, qp, num_cores);
+
+#define DEBUG_DUMP_RC(ps_rc) ih264e_debug_print_rc(ps_rc);
+
+#define DEBUG_DUMP_COST_SAD_PU(ps_proc) ih264e_debug_dump_cost_sad_pu(ps_proc);
+
+#define DEBUG_DUMP_INP_TO_RC_POST_ENC(ps_frame_info, pic_cnt, num_cores) \
+ ih264e_debug_dump_inp_to_post_enc(ps_frame_info, pic_cnt, num_cores);
+
+#else
+
+#define DEBUG_DUMP_QP(pic_cnt, qp, num_cores) (void);
+
+#define DEBUG_DUMP_RC(ps_rc) (void);
+
+#define DEBUG_DUMP_COST_SAD_PU(ps_proc) (void);
+
+#define DEBUG_DUMP_INP_TO_RC_POST_ENC(ps_frame_info, pic_cnt, num_cores) (void);
+
+#endif
+
+#endif /* IH264E_DEBUG_H_ */
diff --git a/encoder/ih264e_defs.h b/encoder/ih264e_defs.h
new file mode 100755
index 0000000..76929ef
--- /dev/null
+++ b/encoder/ih264e_defs.h
@@ -0,0 +1,538 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_defs.h
+*
+* @brief
+* Definitions used in the encoder
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_DEFS_H_
+#define IH264E_DEFS_H_
+
+
+/*****************************************************************************/
+/* Width and height restrictions */
+/*****************************************************************************/
+/**
+ * Minimum width supported by codec
+ */
+#define MIN_WD 16
+
+/**
+ * Maximum width supported by codec
+ */
+
+#define MAX_WD 1920
+
+/**
+ * Minimum height supported by codec
+ */
+#define MIN_HT 16
+
+/**
+ * Maximum height supported by codec
+ */
+
+#define MAX_HT 1920
+
+/*****************************************************************************/
+/* Padding sizes */
+/*****************************************************************************/
+/**
+ * Padding used for top of the frame
+ */
+#define PAD_TOP 32
+
+/**
+ * Padding used for bottom of the frame
+ */
+#define PAD_BOT 32
+
+/**
+ * Padding used at left of the frame
+ */
+#define PAD_LEFT 32
+
+/**
+ * Padding used at right of the frame
+ */
+#define PAD_RIGHT 32
+/**
+ * Padding for width
+ */
+#define PAD_WD (PAD_LEFT + PAD_RIGHT)
+/**
+ * Padding for height
+ */
+#define PAD_HT (PAD_TOP + PAD_BOT)
+
+/*
+ * buffer width and height for half pel buffers
+ */
+#define HP_BUFF_WD 24
+#define HP_BUFF_HT 18
+
+/*****************************************************************************/
+/* Number of frame restrictions */
+/*****************************************************************************/
+/**
+ * Maximum number of reference buffers in DPB manager
+ */
+#define MAX_REF_CNT 32
+
+/*****************************************************************************/
+/* Num cores releated defs */
+/*****************************************************************************/
+/**
+ * Maximum number of cores
+ */
+#define MAX_NUM_CORES 8
+
+/**
+ * Maximum number of threads for pixel processing
+ */
+#define MAX_PROCESS_THREADS MAX_NUM_CORES
+
+/**
+ * Maximum process context sets
+ * Used to stagger encoding of MAX_CTXT_SETS in parallel
+ */
+#define MAX_CTXT_SETS 2
+/**
+ * Maximum number of contexts
+ * Kept as twice the number of threads, to make it easier to initialize the contexts
+ * from master thread
+ */
+#define MAX_PROCESS_CTXT MAX_NUM_CORES * MAX_CTXT_SETS
+
+/*****************************************************************************/
+/* Profile and level restrictions */
+/*****************************************************************************/
+/**
+ * Max level supported by the codec
+ */
+#define MAX_LEVEL IH264_LEVEL_51
+
+/**
+ * Min level supported by the codec
+ */
+#define MIN_LEVEL IH264_LEVEL_10
+
+/**
+ * Maximum number of slice headers that are held in memory simultaneously
+ * For single core implementation only 1 slice header is enough.
+ * But for multi-core parsing thread needs to ensure that slice headers are
+ * stored till the last CB in a slice is decoded.
+ * Parsing thread has to wait till last CB of a slice is consumed before reusing
+ * overwriting the slice header
+ * MAX_SLICE_HDR_CNT is assumed to be a power of 2
+ */
+
+#define LOG2_MAX_SLICE_HDR_CNT 8
+#define MAX_SLICE_HDR_CNT (1 << LOG2_MAX_SLICE_HDR_CNT)
+
+/* Generic declarations */
+#define DEFAULT_MAX_LEVEL 40
+#define DEFAULT_RECON_ENABLE 0
+#define DEFAULT_RC IVE_RC_STORAGE
+#define DEFAULT_MAX_FRAMERATE 120000
+#define DEFAULT_MAX_BITRATE 20000000
+#define DEFAULT_MAX_SRCH_RANGE_X 256
+#define DEFAULT_MAX_SRCH_RANGE_Y 256
+#define DEFAULT_SLICE_PARAM 256
+#define DEFAULT_SRC_FRAME_RATE 30000
+#define DEFAULT_TGT_FRAME_RATE 30000
+#define DEFAULT_BITRATE 6000000
+#define DEFAULT_QP_MIN 10
+#define DEFAULT_QP_MAX 51
+#define DEFAULT_I_QP 25
+#define DEFAULT_P_QP 28
+#define DEFAULT_B_QP 28
+#define DEFAULT_AIR_MODE IVE_AIR_MODE_NONE
+#define DEFAULT_AIR_REFRESH_PERIOD 30
+#define DEFAULT_VBV_DELAY 1000
+#define DEFAULT_VBV_SIZE 16800000 /* level 3.1 */
+#define DEFAULT_NUM_CORES 1
+#define DEFAULT_ME_SPEED_PRESET 100
+#define DEFAULT_HPEL 1
+#define DEFAULT_QPEL 1
+#define DEFAULT_I4 1
+#define DEFAULT_I8 0
+#define DEFAULT_I16 1
+#define DEFAULT_ENABLE_FAST_SAD 0
+#define DEFAULT_ENABLE_SATQD 1
+#define DEFAULT_MIN_SAD_ENABLE 0
+#define DEFAULT_MIN_SAD_DISABLE -1
+#define DEFAULT_SRCH_RNG_X 64
+#define DEFAULT_SRCH_RNG_Y 48
+#define DEFAULT_I_INTERVAL 30
+#define DEFAULT_IDR_INTERVAL 1000
+#define DEFAULT_B_FRAMES 0
+#define DEFAULT_DISABLE_DEBLK_LEVEL 0
+#define DEFAULT_PROFILE IV_PROFILE_BASE
+#define DEFAULT_MIN_INTRA_FRAME_RATE 1
+#define DEFAULT_MAX_INTRA_FRAME_RATE 2147483647
+#define DEFAULT_MIN_BUFFER_DELAY 30
+#define DEFAULT_MAX_BUFFER_DELAY 20000
+#define DEFAULT_STRIDE 0
+#define DEFAULT_ENC_SPEED_PRESET IVE_USER_DEFINED
+#define DEFAULT_PRE_ENC_ME 0
+#define DEFAULT_PRE_ENC_IPE 0
+
+/** Maximum number of entries in input buffer list */
+#define MAX_INP_BUF_LIST_ENTRIES 32
+
+/** Maximum number of entries in output buffer list */
+#define MAX_OUT_BUF_LIST_ENTRIES 32
+
+/** Maximum number of entries in recon buffer list used within the encoder */
+#define MAX_REC_LIST_ENTRIES 16
+
+/** Number of buffers created to hold half-pel planes for every reference buffer */
+ #define HPEL_PLANES_CNT 1
+
+/**
+ *****************************************************************************
+ * Macro to compute total size required to hold on set of scaling matrices
+ *****************************************************************************
+ */
+#define SCALING_MAT_SIZE(m_scaling_mat_size) \
+{ \
+ m_scaling_mat_size = 6 * TRANS_SIZE_4 * TRANS_SIZE_4; \
+ m_scaling_mat_size += 6 * TRANS_SIZE_8 * TRANS_SIZE_8; \
+ m_scaling_mat_size += 6 * TRANS_SIZE_16 * TRANS_SIZE_16; \
+ m_scaling_mat_size += 2 * TRANS_SIZE_32 * TRANS_SIZE_32; \
+}
+
+/**
+ ******************************************************************************
+ * @brief Macros to get raster scan position of a block[8x8] / sub block[4x4]
+ ******************************************************************************
+ */
+#define GET_BLK_RASTER_POS_X(x) ((x & 0x01))
+#define GET_BLK_RASTER_POS_Y(y) ((y >> 1))
+#define GET_SUB_BLK_RASTER_POS_X(x) ((x & 0x01))
+#define GET_SUB_BLK_RASTER_POS_Y(y) ((y >> 1))
+
+#define NUM_RC_MEMTABS 17
+
+/**
+ ***************************************************************************
+ * Enum to hold various mem records being request
+ ****************************************************************************
+ */
+enum
+{
+ /**
+ * Codec Object at API level
+ */
+ MEM_REC_IV_OBJ,
+
+ /**
+ * Codec context
+ */
+ MEM_REC_CODEC,
+
+ /**
+ * entropy context
+ */
+ MEM_REC_ENTROPY,
+
+ /**
+ * Buffer to hold coeff data
+ */
+ MEM_REC_MB_COEFF_DATA,
+
+ /**
+ * Buffer to hold coeff data
+ */
+ MEM_REC_MB_HEADER_DATA,
+
+ /**
+ * Motion vector bank
+ */
+ MEM_REC_MVBANK,
+
+ /**
+ * Motion vector bits
+ */
+ MEM_REC_MVBITS,
+
+ /**
+ * Holds mem records passed to the codec.
+ */
+ MEM_REC_BACKUP,
+
+ /**
+ * Holds SPS
+ */
+ MEM_REC_SPS,
+
+ /**
+ * Holds PPS
+ */
+ MEM_REC_PPS,
+
+ /**
+ * Holds Slice Headers
+ */
+ MEM_REC_SLICE_HDR,
+
+ /**
+ * Contains map indicating slice index per MB basis
+ */
+ MEM_REC_SLICE_MAP,
+
+ /**
+ * Holds thread handles
+ */
+ MEM_REC_THREAD_HANDLE,
+
+ /**
+ * Holds control call mutex
+ */
+ MEM_REC_CTL_MUTEX,
+
+ /**
+ * Holds entropy call mutex
+ */
+ MEM_REC_ENTROPY_MUTEX,
+
+ /**
+ * Holds memory for Process JOB Queue
+ */
+ MEM_REC_PROC_JOBQ,
+
+ /**
+ * Holds memory for Entropy JOB Queue
+ */
+ MEM_REC_ENTROPY_JOBQ,
+
+ /**
+ * Contains status map indicating processing status per MB basis
+ */
+ MEM_REC_PROC_MAP,
+
+ /**
+ * Contains status map indicating deblocking status per MB basis
+ */
+ MEM_REC_DBLK_MAP,
+
+ /*
+ * Contains AIR map and mask
+ */
+ MEM_REC_AIR_MAP,
+
+ /**
+ * Contains status map indicating ME status per MB basis
+ */
+ MEM_REC_ME_MAP,
+
+ /**
+ * Holds dpb manager context
+ */
+ MEM_REC_DPB_MGR,
+
+ /**
+ * Holds intermediate buffers needed during processing stage
+ * Memory for process contexts is allocated in this memtab
+ */
+ MEM_REC_PROC_SCRATCH,
+
+ /**
+ * Holds buffers for vert_bs, horz_bs and QP (all frame level)
+ */
+ MEM_REC_QUANT_PARAM,
+
+ /**
+ * Holds top row syntax information
+ */
+ MEM_REC_TOP_ROW_SYN_INFO,
+
+ /**
+ * Holds buffers for vert_bs, horz_bs and QP (all frame level)
+ */
+ MEM_REC_BS_QP,
+
+ /**
+ * Holds input buffer manager context
+ */
+ MEM_REC_INP_PIC,
+
+ /**
+ * Holds output buffer manager context
+ */
+ MEM_REC_OUT,
+
+ /**
+ * Holds picture buffer manager context and array of pic_buf_ts
+ * Also holds reference picture buffers in non-shared mode
+ */
+ MEM_REC_REF_PIC,
+
+ /*
+ * Mem record for color space conversion
+ */
+ MEM_REC_CSC,
+
+ /**
+ * NMB info struct
+ */
+ MEM_REC_MB_INFO_NMB,
+
+ /**
+ * Rate control of memory records.
+ */
+ MEM_REC_RC,
+
+ /**
+ * Place holder to compute number of memory records.
+ */
+ MEM_REC_CNT = MEM_REC_RC + NUM_RC_MEMTABS,
+
+ /*
+ * Do not add anything below
+ */
+};
+
+#define DISABLE_DEBLOCK_INTERVAL 8
+
+/**
+ ****************************************************************************
+ * Disable deblock levels
+ * Level 0 enables deblocking completely and level 4 disables completely
+ * Other levels are intermediate values to control deblocking level
+ ****************************************************************************
+ */
+enum
+{
+ /**
+ * Enable deblocking completely
+ */
+ DISABLE_DEBLK_LEVEL_0,
+
+ /**
+ * Disable only within MB edges - Not supported currently
+ */
+ DISABLE_DEBLK_LEVEL_1,
+
+ /**
+ * Enable deblocking once in DEBLOCK_INTERVAL number of pictures
+ * and for I slices
+ */
+ DISABLE_DEBLK_LEVEL_2,
+
+ /**
+ * Enable deblocking only for I slices
+ */
+ DISABLE_DEBLK_LEVEL_3,
+
+ /**
+ * Disable deblocking completely
+ */
+ DISABLE_DEBLK_LEVEL_4
+};
+
+/**
+ ****************************************************************************
+ * Number of buffers for I/O based on format
+ ****************************************************************************
+ */
+
+/** Minimum number of input buffers */
+#define MIN_INP_BUFS 2
+
+/** Minimum number of output buffers */
+#define MIN_OUT_BUFS 1
+
+/** Minimum number of components in bitstream buffer */
+#define MIN_BITS_BUFS_COMP 1
+
+/** Minimum number of components in raw buffer */
+#define MIN_RAW_BUFS_420_COMP 3
+#define MIN_RAW_BUFS_422ILE_COMP 1
+#define MIN_RAW_BUFS_RGB565_COMP 1
+#define MIN_RAW_BUFS_RGBA8888_COMP 1
+#define MIN_RAW_BUFS_420SP_COMP 2
+
+#define MAX_NMB 120
+
+/** Maximum number of active config paramter sets */
+#define MAX_ACTIVE_CONFIG_PARAMS 32
+
+/**
+******************************************************************************
+ * @brief Thresholds for luma & chroma to determine if the 8x8 subblock needs
+ * to be encoded or skipped
+******************************************************************************
+*/
+#define LUMA_SUB_BLOCK_SKIP_THRESHOLD 4
+#define LUMA_BLOCK_SKIP_THRESHOLD 5
+#define CHROMA_BLOCK_SKIP_THRESHOLD 4
+
+/**
+******************************************************************************
+ * @brief defines the first byte of a NAL unit
+ * forbidden zero bit - nal_ref_idc - nal_unit_type
+******************************************************************************
+*/
+/* [0 - 11 - 00111] */
+#define NAL_SPS_FIRST_BYTE 0x67
+
+/* [0 - 11 - 01000] */
+#define NAL_PPS_FIRST_BYTE 0x68
+
+/* [0 - 11 - 00001] */
+#define NAL_SLICE_FIRST_BYTE 0x61
+
+/* [0 - 00 - 00001] */
+#define NAL_NON_REF_SLICE_FIRST_BYTE 0x01
+
+/* [0 - 11 - 00101] */
+#define NAL_IDR_SLICE_FIRST_BYTE 0x65
+
+/* [0 - 00 - 01100] */
+#define NAL_FILLER_FIRST_BYTE 0x0C
+
+/* [0 - 00 - 00110] */
+#define NAL_SEI_FIRST_BYTE 0x06
+
+#define H264_ALLOC_INTER_FRM_INTV 1
+
+#define H264_MPEG_QP_MAP 191
+
+#define MPEG2_QP_ELEM (H264_MPEG_QP_MAP + 1)
+#define H264_QP_ELEM (MAX_H264_QP + 1)
+
+#define H264_INIT_QUANT_I 26
+#define H264_INIT_QUANT_P 34
+
+#endif /*IH264E_DEFS_H_*/
diff --git a/encoder/ih264e_encode.c b/encoder/ih264e_encode.c
new file mode 100755
index 0000000..ffc6fb7
--- /dev/null
+++ b/encoder/ih264e_encode.c
@@ -0,0 +1,580 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_encode.c
+*
+* @brief
+* This file contains functions for encoding the input yuv frame in synchronous
+* api mode
+*
+* @author
+* ittiam
+*
+* List of Functions
+* - ih264e_join_threads()
+* - ih264e_wait_for_thread()
+* - ih264e_encode()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+/* User Include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ithread.h"
+#include "ih264_defs.h"
+#include "ih264_macros.h"
+#include "ih264_debug.h"
+#include "ih264_structs.h"
+#include "ih264_platform_macros.h"
+#include "ih264_error.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_list.h"
+#include "ih264e_error.h"
+#include "ih264e_defs.h"
+#include "ih264_padding.h"
+#include "ih264e_bitstream.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_time_stamp.h"
+#include "ih264e_structs.h"
+#include "ih264e_master.h"
+#include "ih264e_process.h"
+#include "ih264_buf_mgr.h"
+#include "ih264_dpb_mgr.h"
+#include "ih264e_utils.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_config.h"
+#include "ih264e_statistics.h"
+#include "ih264e_trace.h"
+#include "ih264e_debug.h"
+#ifdef LOGO_EN
+#include "ih264e_ittiam_logo.h"
+#endif
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief
+* This function joins all the spawned threads after successful completion of
+* their tasks
+*
+* @par Description
+*
+* @param[in] ps_codec
+* pointer to codec context
+*
+* @returns none
+*
+******************************************************************************
+*/
+void ih264e_join_threads(codec_t *ps_codec)
+{
+ /* temp var */
+ WORD32 i = 0;
+ WORD32 ret = 0;
+
+ /* join spawned threads */
+ while (i < ps_codec->i4_proc_thread_cnt)
+ {
+ if (ps_codec->ai4_process_thread_created[i])
+ {
+ ret = ithread_join(ps_codec->apv_proc_thread_handle[i], NULL);
+ if (ret != 0)
+ {
+ printf("pthread Join Failed");
+ assert(0);
+ }
+ ps_codec->ai4_process_thread_created[i] = 0;
+ i++;
+ }
+ }
+
+ ps_codec->i4_proc_thread_cnt = 0;
+}
+
+/**
+******************************************************************************
+*
+* @brief This function puts the current thread to sleep for a duration
+* of sleep_us
+*
+* @par Description
+* ithread_yield() method causes the calling thread to yield execution to another
+* thread that is ready to run on the current processor. The operating system
+* selects the thread to yield to. ithread_usleep blocks the current thread for
+* the specified number of milliseconds. In other words, yield just says,
+* end my timeslice prematurely, look around for other threads to run. If there
+* is nothing better than me, continue. Sleep says I don't want to run for x
+* milliseconds. Even if no other thread wants to run, don't make me run.
+*
+* @param[in] sleep_us
+* thread sleep duration
+*
+* @returns error_status
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_wait_for_thread(UWORD32 sleep_us)
+{
+ /* yield thread */
+ ithread_yield();
+
+ /* put thread to sleep */
+ ithread_usleep(sleep_us);
+
+ return IH264E_SUCCESS;
+}
+
+/**
+******************************************************************************
+*
+* @brief
+* Encodes in synchronous api mode
+*
+* @par Description
+* This routine processes input yuv, encodes it and outputs bitstream and recon
+*
+* @param[in] ps_codec_obj
+* Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns Status
+*
+******************************************************************************
+*/
+WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op)
+{
+ /* error status */
+ IH264E_ERROR_T error_status = IH264E_SUCCESS;
+
+ /* codec ctxt */
+ codec_t *ps_codec = (codec_t *)ps_codec_obj->pv_codec_handle;
+
+ /* input frame to encode */
+ ih264e_video_encode_ip_t *ps_video_encode_ip = pv_api_ip;
+
+ /* output buffer to write stream */
+ ih264e_video_encode_op_t *ps_video_encode_op = pv_api_op;
+
+ /* i/o structures */
+ inp_buf_t s_inp_buf;
+ out_buf_t s_out_buf;
+
+ /* temp var */
+ WORD32 ctxt_sel = 0, i;
+
+ /********************************************************************/
+ /* BEGIN INIT */
+ /********************************************************************/
+ /* reset output structure */
+ ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS;
+ ps_video_encode_op->s_ive_op.output_present = 0;
+ ps_video_encode_op->s_ive_op.dump_recon = 0;
+ ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_NA_FRAME;
+
+ /* copy input info. to internal structure */
+ s_inp_buf.s_raw_buf = ps_video_encode_ip->s_ive_ip.s_inp_buf;
+ s_inp_buf.u4_timestamp_low = ps_video_encode_ip->s_ive_ip.u4_timestamp_low;
+ s_inp_buf.u4_timestamp_high = ps_video_encode_ip->s_ive_ip.u4_timestamp_high;
+ s_inp_buf.u4_is_last = ps_video_encode_ip->s_ive_ip.u4_is_last;
+ s_inp_buf.pv_mb_info = ps_video_encode_ip->s_ive_ip.pv_mb_info;
+ s_inp_buf.u4_mb_info_type = ps_video_encode_ip->s_ive_ip.u4_mb_info_type;
+ s_inp_buf.pv_pic_info = ps_video_encode_ip->s_ive_ip.pv_pic_info;
+ s_inp_buf.u4_pic_info_type = ps_video_encode_ip->s_ive_ip.u4_pic_info_type;
+
+ /* copy output info. to internal structure */
+ s_out_buf.s_bits_buf = ps_video_encode_ip->s_ive_ip.s_out_buf;
+ s_out_buf.u4_is_last = ps_video_encode_ip->s_ive_ip.u4_is_last;
+ s_out_buf.u4_timestamp_low = ps_video_encode_ip->s_ive_ip.u4_timestamp_low;
+ s_out_buf.u4_timestamp_high = ps_video_encode_ip->s_ive_ip.u4_timestamp_high;
+
+ /* api call cnt */
+ ps_codec->i4_encode_api_call_cnt += 1;
+
+ /* curr pic cnt */
+ ps_codec->i4_pic_cnt += 1;
+
+ /* codec context selector */
+ ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
+
+ /* reset status flags */
+ ps_codec->ai4_pic_cnt[ctxt_sel] = -1;
+ ps_codec->s_rate_control.post_encode_skip[ctxt_sel] = 0;
+ ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] = 0;
+
+ /* pass output buffer to codec */
+ ps_codec->as_out_buf[ctxt_sel] = s_out_buf;
+
+ /* initialize codec ctxt with default params for the first encode api call */
+ if (ps_codec->i4_encode_api_call_cnt == 0)
+ {
+ ih264e_codec_init(ps_codec);
+ }
+
+ /* parse configuration params */
+ for (i = 0; i < MAX_ACTIVE_CONFIG_PARAMS; i++)
+ {
+ cfg_params_t *ps_cfg = &ps_codec->as_cfg[i];
+
+ if (1 == ps_cfg->u4_is_valid)
+ {
+ if ( ((ps_cfg->u4_timestamp_high == s_inp_buf.u4_timestamp_high) &&
+ (ps_cfg->u4_timestamp_low == s_inp_buf.u4_timestamp_low)) ||
+ ((WORD32)ps_cfg->u4_timestamp_high == -1) ||
+ ((WORD32)ps_cfg->u4_timestamp_low == -1) )
+ {
+ error_status |= ih264e_codec_update_config(ps_codec, ps_cfg);
+ SET_ERROR_ON_RETURN(error_status,
+ IVE_UNSUPPORTEDPARAM,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+
+ ps_cfg->u4_is_valid = 0;
+ }
+ }
+ }
+
+ /******************************************************************
+ * INSERT LOGO
+ *****************************************************************/
+#ifdef LOGO_EN
+ if (s_inp_buf.s_raw_buf.apv_bufs[0] != NULL &&
+ ps_codec->i4_header_mode != 1)
+ {
+ ih264e_insert_logo(s_inp_buf.s_raw_buf.apv_bufs[0],
+ s_inp_buf.s_raw_buf.apv_bufs[1],
+ s_inp_buf.s_raw_buf.apv_bufs[2],
+ s_inp_buf.s_raw_buf.au4_strd[0],
+ 0,
+ 0,
+ ps_codec->s_cfg.e_inp_color_fmt,
+ ps_codec->s_cfg.u4_disp_wd,
+ ps_codec->s_cfg.u4_disp_ht);
+ }
+#endif /*LOGO_EN*/
+
+ if (ps_codec->i4_encode_api_call_cnt == 0)
+ {
+ /********************************************************************/
+ /* number of mv/ref bank buffers used by the codec, */
+ /* 1 to handle curr frame */
+ /* 1 to store information of ref frame */
+ /* 1 more additional because of the codec employs 2 ctxt sets */
+ /* to assist asynchronous API */
+ /********************************************************************/
+
+ /* initialize mv bank buffer manager */
+ error_status |= ih264e_mv_buf_mgr_add_bufs(ps_codec);
+ SET_ERROR_ON_RETURN(error_status,
+ IVE_FATALERROR,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+
+ /* initialize ref bank buffer manager */
+ error_status |= ih264e_pic_buf_mgr_add_bufs(ps_codec);
+ SET_ERROR_ON_RETURN(error_status,
+ IVE_FATALERROR,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+
+ /* for the first frame, generate header when not requested explicitly */
+ if (ps_codec->i4_header_mode == 0 &&
+ ps_codec->u4_header_generated == 0)
+ {
+ ps_codec->i4_gen_header = 1;
+ }
+ }
+
+ /* generate header and return when encoder is operated in header mode */
+ if (ps_codec->i4_header_mode == 1)
+ {
+ /* whenever the header is generated, this implies a start of sequence
+ * and a sequence needs to be started with IDR
+ */
+ ps_codec->force_curr_frame_type = IV_IDR_FRAME;
+
+ /* generate header */
+ error_status |= ih264e_generate_sps_pps(ps_codec);
+
+ /* api call cnt */
+ ps_codec->i4_encode_api_call_cnt --;
+
+ /* curr pic cnt */
+ ps_codec->i4_pic_cnt --;
+
+ /* header mode tag is not sticky */
+ ps_codec->i4_header_mode = 0;
+
+ /* send the input to app */
+ ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_raw_buf;
+
+ /* send the output to app */
+ ps_video_encode_op->s_ive_op.output_present = 1;
+ ps_video_encode_op->s_ive_op.dump_recon = 0;
+ ps_video_encode_op->s_ive_op.s_out_buf = ps_codec->as_out_buf[ctxt_sel].s_bits_buf;
+
+ /* error status */
+ SET_ERROR_ON_RETURN(error_status,
+ IVE_FATALERROR,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+
+ /* indicates that header has been generated previously */
+ ps_codec->u4_header_generated = 1;
+
+ return IV_SUCCESS;
+ }
+
+
+ if (s_inp_buf.s_raw_buf.apv_bufs[0] != NULL)
+ {
+ /* array giving pic cnt that is being processed in curr context set */
+ ps_codec->ai4_pic_cnt[ctxt_sel] = ps_codec->i4_pic_cnt;
+
+ /* initialize all relevant process ctxts */
+ error_status |= ih264e_pic_init(ps_codec, &s_inp_buf);
+ SET_ERROR_ON_RETURN(error_status,
+ IVE_FATALERROR,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+
+ if (ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] == 0)
+ {
+ /* proc ctxt base idx */
+ WORD32 proc_ctxt_select = ctxt_sel * MAX_PROCESS_THREADS;
+
+ /* proc ctxt */
+ process_ctxt_t *ps_proc = &ps_codec->as_process[proc_ctxt_select];
+
+ WORD32 ret = 0;
+
+ /* number of addl. threads to be created */
+ WORD32 num_thread_cnt = ps_codec->s_cfg.u4_num_cores - 1;
+
+ for (i = 0; i < num_thread_cnt; i++)
+ {
+ ret = ithread_create(ps_codec->apv_proc_thread_handle[i],
+ NULL,
+ (void*)ih264e_process_thread,
+ &ps_codec->as_process[i + 1]);
+ if (ret != 0)
+ {
+ printf("pthread Create Failed");
+ assert(0);
+ }
+
+ ps_codec->ai4_process_thread_created[i] = 1;
+
+ ps_codec->i4_proc_thread_cnt++;
+ }
+
+
+ /* launch job */
+ ih264e_process_thread(ps_proc);
+
+ /* Join threads at the end of encoding a frame */
+ ih264e_join_threads(ps_codec);
+
+ ih264_list_reset(ps_codec->pv_proc_jobq);
+
+ ih264_list_reset(ps_codec->pv_entropy_jobq);
+ }
+ }
+
+ if (-1 != ps_codec->ai4_pic_cnt[ctxt_sel])
+ {
+ /* proc ctxt base idx */
+ WORD32 proc_ctxt_select = ctxt_sel * MAX_PROCESS_THREADS;
+
+ /* proc ctxt */
+ process_ctxt_t *ps_proc = &ps_codec->as_process[proc_ctxt_select];
+
+ /* receive output back from codec */
+ s_out_buf = ps_codec->as_out_buf[ctxt_sel];
+
+ /* send the output to app */
+ ps_video_encode_op->s_ive_op.output_present = 1;
+ ps_video_encode_op->s_ive_op.dump_recon = 1;
+ ps_video_encode_op->s_ive_op.s_out_buf = s_out_buf.s_bits_buf;
+ ps_video_encode_op->s_ive_op.u4_error_code = IV_SUCCESS;
+
+ /* receive input back from codec */
+ s_inp_buf = ps_proc->s_inp_buf;
+
+ /* send the input to app */
+ ps_video_encode_op->s_ive_op.s_inp_buf = s_inp_buf.s_raw_buf;
+
+ if (ps_codec->s_cfg.u4_enable_recon &&
+ ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] == 0)
+ {
+ /* error status */
+ IH264_ERROR_T ret = IH264_SUCCESS;
+
+ /* recon buffer */
+ rec_buf_t *ps_rec_buf = &ps_codec->as_rec_buf[ctxt_sel];
+
+ ps_video_encode_op->s_ive_op.s_recon_buf = ps_video_encode_ip->s_ive_ip.s_recon_buf;
+
+ /* copy/convert the recon buffer and return */
+ ih264e_fmt_conv(ps_codec, &ps_rec_buf->s_pic_buf,
+ ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[0],
+ ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[1],
+ ps_video_encode_ip->s_ive_ip.s_recon_buf.apv_bufs[2],
+ ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[0],
+ ps_video_encode_ip->s_ive_ip.s_recon_buf.au4_wd[1],
+ 0,
+ ps_codec->s_cfg.u4_disp_ht);
+
+ ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_rec_buf->s_pic_buf.i4_buf_id, BUF_MGR_IO);
+ if (IH264_SUCCESS != ret)
+ {
+ SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret,
+ IVE_FATALERROR,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+ }
+ }
+
+ /* release buffers from ref list */
+ if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel] == 1)
+ {
+ /* pic info */
+ pic_buf_t *ps_cur_pic;
+
+ /* mv info */
+ mv_buf_t *ps_cur_mv_buf;
+
+ /* error status */
+ IH264_ERROR_T ret = IH264_SUCCESS;
+
+ /* Decrement coded pic count */
+ ps_codec->i4_coded_pic_cnt--;
+
+ /* loop through to get the min pic cnt among the list of pics stored in ref list */
+ /* since the skipped frame may not be on reference list, we may not have an MV bank
+ * hence free only if we have allocated */
+ for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
+ {
+ if (ps_codec->i4_pic_cnt == ps_codec->as_ref_set[i].i4_pic_cnt)
+ {
+ ps_codec->as_ref_set[i].i4_pic_cnt = -1;
+ ps_codec->as_ref_set[i].i4_poc = -1;
+
+ ps_cur_pic = ps_codec->as_ref_set[i].ps_pic_buf;
+
+ ps_cur_mv_buf = ps_codec->as_ref_set[i].ps_mv_buf;
+
+ /* release this frame from reference list */
+ ret = ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_REF);
+ SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret,
+ IVE_FATALERROR,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+
+ ret = ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_REF);
+ SET_ERROR_ON_RETURN((IH264E_ERROR_T)ret,
+ IVE_FATALERROR,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+ break;
+ }
+ }
+ }
+
+ if ((ps_codec->s_rate_control.post_encode_skip[ctxt_sel] == 1) ||
+ (ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] == 1))
+ {
+ ps_video_encode_op->s_ive_op.dump_recon = 0;
+ }
+ else
+ {
+ /* set output pic type */
+ if (ps_codec->i4_slice_type == PSLICE)
+ {
+ ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_P_FRAME;
+ }
+ else if (ps_codec->i4_slice_type == ISLICE && ps_codec->u4_is_idr != 1)
+ {
+ ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_I_FRAME;
+ }
+ else
+ {
+ ps_video_encode_op->s_ive_op.u4_encoded_frame_type = IV_IDR_FRAME;
+ }
+ }
+
+ /* loop through to get the error status */
+ for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
+ {
+ error_status |= ps_codec->as_process[ctxt_sel + i].i4_error_code;
+ }
+ SET_ERROR_ON_RETURN(error_status,
+ IVE_FATALERROR,
+ ps_video_encode_op->s_ive_op.u4_error_code,
+ IV_FAIL);
+ }
+
+ if (1 == s_inp_buf.u4_is_last)
+ {
+ ps_video_encode_op->s_ive_op.output_present = 0;
+ ps_video_encode_op->s_ive_op.dump_recon = 0;
+ }
+
+ return IV_SUCCESS;
+}
diff --git a/encoder/ih264e_encode_header.c b/encoder/ih264e_encode_header.c
new file mode 100755
index 0000000..67e5409
--- /dev/null
+++ b/encoder/ih264e_encode_header.c
@@ -0,0 +1,1187 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_encode_header.c
+*
+* @brief
+* This file contains function definitions related to header encoding.
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_generate_nal_unit_header()
+* - ih264e_generate_sps()
+* - ih264e_generate_pps()
+* - ih264e_generate_slice_header()
+* - ih264e_get_level()
+* - ih264e_populate_sps()
+* - ih264e_populate_pps()
+* - ih264e_populate_slice_header()
+* - ih264e_add_filler_nal_unit()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+/* User Include Files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ithread.h"
+#include "ih264e_config.h"
+#include "ih264e_trace.h"
+#include "ih264_typedefs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ih264_debug.h"
+#include "ih264_defs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_defs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_encode_header.h"
+#include "ih264_common_tables.h"
+#include "ih264_macros.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief Generate nal unit header in the stream as per section 7.4.1
+*
+* @par Description
+* Inserts Nal unit header syntax as per section 7.4.1
+*
+* @param[inout] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] nal_unit_type
+* nal type to be inserted
+*
+* @param[in] nal_ref_idc
+* nal ref idc to be inserted
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+static WORD32 ih264e_generate_nal_unit_header(bitstrm_t *ps_bitstrm,
+ WORD32 nal_unit_type,
+ WORD32 nal_ref_idc)
+{
+ WORD32 return_status = IH264E_SUCCESS;
+
+ /* sanity checks */
+ ASSERT((nal_unit_type > 0) && (nal_unit_type < 32));
+
+ /* forbidden_zero_bit + nal_ref_idc + nal_unit_type */
+ PUT_BITS(ps_bitstrm,
+ ((nal_ref_idc << 5) + nal_unit_type),
+ (1+2+5), /*1 forbidden zero bit + 2 nal_ref_idc + 5 nal_unit_type */
+ return_status,
+ "nal_unit_header");
+
+ return(return_status);
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates SPS (Sequence Parameter Set)
+*
+* @par Description
+* This function generates Sequence Parameter Set header as per the spec
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_sps
+* pointer to structure containing SPS data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_generate_sps(bitstrm_t *ps_bitstrm, sps_t *ps_sps)
+{
+ WORD32 return_status = IH264E_SUCCESS;
+ WORD32 i;
+ WORD8 i1_nal_unit_type = 7;
+ WORD8 i1_nal_ref_idc = 3;
+
+ /* Insert Start Code */
+ return_status |= ih264e_put_nal_start_code_prefix(ps_bitstrm, 1);
+
+ /* Insert Nal Unit Header */
+ return_status |= ih264e_generate_nal_unit_header(ps_bitstrm, i1_nal_unit_type, i1_nal_ref_idc);
+
+ /* profile_idc */
+ PUT_BITS(ps_bitstrm, ps_sps->u1_profile_idc, 8, return_status, "profile_idc");
+
+ /* constrained_set_flags */
+ PUT_BITS(ps_bitstrm, ps_sps->u1_constraint_set0_flag, 1, return_status, "constrained_set0_flag");
+ PUT_BITS(ps_bitstrm, ps_sps->u1_constraint_set1_flag, 1, return_status, "constrained_set1_flag");
+ PUT_BITS(ps_bitstrm, ps_sps->u1_constraint_set2_flag, 1, return_status, "constrained_set2_flag");
+ PUT_BITS(ps_bitstrm, ps_sps->u1_constraint_set3_flag, 1, return_status, "constrained_set3_flag");
+
+ /* reserved_zero_four_bits */
+ PUT_BITS(ps_bitstrm, 0, 4, return_status, "reserved_zero_four_bits");
+
+ /* level_idc */
+ PUT_BITS(ps_bitstrm, ps_sps->u1_level_idc, 8, return_status, "level_idc");
+
+ /* seq_parameter_set_id */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->u1_sps_id, return_status, "seq_parameter_set_id");
+
+ if (ps_sps->u1_profile_idc >= IH264_PROFILE_HIGH)
+ {
+ /* chroma_format_idc */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->u1_chroma_format_idc, return_status, "chroma_format_idc");
+
+ if (ps_sps->u1_chroma_format_idc == CHROMA_FMT_IDC_YUV444)
+ {
+ /* i1_residual_colour_transform_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_residual_colour_transform_flag, 1, return_status, "i1_residual_colour_transform_flag");
+ }
+
+ /* bit_depth_luma_minus8 */
+ PUT_BITS_UEV(ps_bitstrm, (ps_sps->i1_bit_depth_luma - 8), return_status, "bit_depth_luma_minus8");
+
+ /* bit_depth_chroma_minus8 */
+ PUT_BITS_UEV(ps_bitstrm, (ps_sps->i1_bit_depth_chroma - 8), return_status, "bit_depth_chroma_minus8");
+
+ /* qpprime_y_zero_transform_bypass_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_qpprime_y_zero_transform_bypass_flag, 1, return_status, "qpprime_y_zero_transform_bypass_flag");
+
+ /* seq_scaling_matrix_present_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_seq_scaling_matrix_present_flag, 1, return_status, "seq_scaling_matrix_present_flag");
+
+ /* seq_scaling_list */
+ if (ps_sps->i1_seq_scaling_matrix_present_flag)
+ {
+ /* TODO_LATER: Will be enabled once scaling list support is added */
+ }
+ }
+
+ /* log2_max_frame_num_minus4 */
+ PUT_BITS_UEV(ps_bitstrm, (ps_sps->i1_log2_max_frame_num - 4), return_status, "log2_max_frame_num_minus4");
+
+ /* pic_order_cnt_type */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i1_pic_order_cnt_type, return_status, "pic_order_cnt_type");
+
+ if (ps_sps->i1_pic_order_cnt_type == 0)
+ {
+ /* log2_max_pic_order_cnt_lsb_minus4 */
+ PUT_BITS_UEV(ps_bitstrm, (ps_sps->i1_log2_max_pic_order_cnt_lsb - 4), return_status, "log2_max_pic_order_cnt_lsb_minus4");
+ }
+ else if (ps_sps->i1_pic_order_cnt_type == 1)
+ {
+ /* delta_pic_order_always_zero_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_delta_pic_order_always_zero_flag, 1, return_status, "delta_pic_order_always_zero_flag");
+
+ /* offset_for_non_ref_pic */
+ PUT_BITS_SEV(ps_bitstrm, ps_sps->i4_offset_for_non_ref_pic, return_status, "offset_for_non_ref_pic");
+
+ /* offset_for_top_to_bottom_field */
+ PUT_BITS_SEV(ps_bitstrm, ps_sps->i4_offset_for_top_to_bottom_field, return_status, "offset_for_top_to_bottom_field");
+
+ /* num_ref_frames_in_pic_order_cnt_cycle */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->u1_num_ref_frames_in_pic_order_cnt_cycle, return_status, "num_ref_frames_in_pic_order_cnt_cycle");
+
+ /* Offset for ref frame */
+ for (i=0; i<ps_sps->u1_num_ref_frames_in_pic_order_cnt_cycle; i++)
+ {
+ /* offset_for_ref_frame */
+ PUT_BITS_SEV(ps_bitstrm, ps_sps->ai4_offset_for_ref_frame[i], return_status, "offset_for_ref_frame");
+ }
+ }
+
+ /* num_ref_frames */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->u1_max_num_ref_frames, return_status, "num_ref_frames");
+
+ /* gaps_in_frame_num_value_allowed_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_gaps_in_frame_num_value_allowed_flag, 1, return_status, "gaps_in_frame_num_value_allowed_flag");
+
+ /* pic_width_in_mbs_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_pic_width_in_mbs_minus1, return_status, "pic_width_in_mbs_minus1");
+
+ /* pic_height_in_map_units_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_pic_height_in_map_units_minus1, return_status, "pic_height_in_map_units_minus1");
+
+ /* frame_mbs_only_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_frame_mbs_only_flag, 1, return_status, "frame_mbs_only_flag");
+
+ if (!ps_sps->i1_frame_mbs_only_flag)
+ {
+ /* mb_adaptive_frame_field_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_mb_adaptive_frame_field_flag, 1, return_status, "mb_adaptive_frame_field_flag");
+ }
+
+ /* direct_8x8_inference_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_direct_8x8_inference_flag, 1, return_status, "direct_8x8_inference_flag");
+
+ /* frame_cropping_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_frame_cropping_flag, 1, return_status, "frame_cropping_flag");
+
+ if (ps_sps->i1_frame_cropping_flag)
+ {
+ /* frame_crop_left_offset */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_frame_crop_left_offset, return_status, "frame_crop_left_offset");
+
+ /* frame_crop_right_offset */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_frame_crop_right_offset, return_status, "frame_crop_right_offset");
+
+ /* frame_crop_top_offset */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_frame_crop_top_offset, return_status, "frame_crop_top_offset");
+
+ /* frame_crop_bottom_offset */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_frame_crop_bottom_offset, return_status, "frame_crop_bottom_offset");
+ }
+
+ /* vui_parameters_present_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_vui_parameters_present_flag, 1, return_status, "vui_parameters_present_flag");
+
+ if (ps_sps->i1_vui_parameters_present_flag)
+ {
+ /* Add vui parameters to the bitstream */;
+ }
+
+ /* rbsp trailing bits */
+ return_status |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
+
+ return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates PPS (Picture Parameter Set)
+*
+* @par Description
+* Generate Picture Parameter Set as per Section 7.3.2.2
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_pps
+* pointer to structure containing PPS data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_generate_pps(bitstrm_t *ps_bitstrm, pps_t *ps_pps, sps_t *ps_sps)
+{
+ WORD32 return_status = IH264E_SUCCESS;
+
+ /* Insert the NAL start code */
+ return_status |= ih264e_put_nal_start_code_prefix(ps_bitstrm, 1);
+
+ /* Insert Nal Unit Header */
+ PUT_BITS(ps_bitstrm, NAL_PPS_FIRST_BYTE, 8, return_status, "pps_header");
+
+ /* pic_parameter_set_id */
+ PUT_BITS_UEV(ps_bitstrm, ps_pps->u1_pps_id, return_status, "pic_parameter_set_id");
+
+ /* seq_parameter_set_id */
+ PUT_BITS_UEV(ps_bitstrm, ps_pps->u1_sps_id, return_status, "seq_parameter_set_id");
+
+ /* Entropy coding : 0-VLC; 1 - CABAC */
+ PUT_BITS(ps_bitstrm, ps_pps->u1_entropy_coding_mode_flag, 1, return_status, "Entropy coding : 0-VLC; 1 - CABAC");
+
+ /* Pic order present flag */
+ PUT_BITS(ps_bitstrm, ps_pps->u1_pic_order_present_flag, 1, return_status, "Pic order present flag");
+
+ /* Number of slice groups */
+ PUT_BITS_UEV(ps_bitstrm, ps_pps->u1_num_slice_groups - 1, return_status, "Number of slice groups");
+
+ if (ps_pps->u1_num_slice_groups > 1)
+ {
+ /* TODO_LATER: Currently the number of slice groups minus 1 is 0.
+ * If this is not the case, we have to add Slice group map type to the bit stream*/
+ }
+
+ /* num_ref_idx_l0_default_active_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_pps->i1_num_ref_idx_l0_default_active - 1, return_status, "num_ref_idx_l0_default_active_minus1");
+
+ /* num_ref_idx_l1_default_active_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_pps->i1_num_ref_idx_l1_default_active - 1, return_status, "num_ref_idx_l1_default_active_minus1");
+
+ /* weighted_pred_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_weighted_pred_flag, 1, return_status, "weighted_pred_flag");
+
+ /* weighted_bipred_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_weighted_bipred_idc, 2, return_status, "weighted_bipred_idc");
+
+ /* pic_init_qp_minus26 */
+ PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_pic_init_qp - 26, return_status, "pic_init_qp_minus26");
+
+ /* pic_init_qs_minus26 */
+ PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_pic_init_qs - 26, return_status, "pic_init_qs_minus26");
+
+ /* chroma_qp_index_offset */
+ PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_chroma_qp_index_offset, return_status, "chroma_qp_index_offset");
+
+ /* deblocking_filter_control_present_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_deblocking_filter_control_present_flag, 1, return_status, "deblocking_filter_control_present_flag");
+
+ /* constrained_intra_pred_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_constrained_intra_pred_flag, 1, return_status, "constrained_intra_pred_flag");
+
+ /*redundant_pic_cnt_present_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_redundant_pic_cnt_present_flag, 1, return_status, "redundant_pic_cnt_present_flag");
+
+ if (ps_sps->u1_profile_idc >= IH264_PROFILE_HIGH)
+ {
+ /* transform_8x8_mode_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_transform_8x8_mode_flag, 1, return_status, "transform_8x8_mode_flag");
+
+ /* pic_scaling_matrix_present_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_pic_scaling_matrix_present_flag, 1, return_status, "pic_scaling_matrix_present_flag");
+
+ if(ps_pps->i1_pic_scaling_matrix_present_flag)
+ {
+ /* TODO_LATER: Will be enabled once scaling list support is added */
+ }
+
+ /* Second chroma QP offset */
+ PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_second_chroma_qp_index_offset, return_status, "Second chroma QP offset");
+ }
+
+ return_status |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
+
+ return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates Slice Header
+*
+* @par Description
+* Generate Slice Header as per Section 7.3.5.1
+*
+* @param[inout] ps_bitstrm
+* pointer to bitstream context for generating slice header
+*
+* @param[in] ps_slice_hdr
+* pointer to slice header params
+*
+* @param[in] ps_pps
+* pointer to pps params referred by slice
+*
+* @param[in] ps_sps
+* pointer to sps params referred by slice
+*
+* @param[out] ps_dup_bit_strm_ent_offset
+* Bitstream struct to store bitstream state
+*
+* @param[out] pu4_first_slice_start_offset
+* first slice offset is returned
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_generate_slice_header(bitstrm_t *ps_bitstrm,
+ slice_header_t *ps_slice_hdr,
+ pps_t *ps_pps,
+ sps_t *ps_sps)
+{
+
+ WORD32 return_status = IH264E_SUCCESS;
+
+ /* Insert start code */
+ return_status |= ih264e_put_nal_start_code_prefix(ps_bitstrm, 1);
+
+ /* Insert Nal Unit Header */
+ return_status |= ih264e_generate_nal_unit_header(ps_bitstrm, ps_slice_hdr->i1_nal_unit_type, ps_slice_hdr->i1_nal_unit_idc);
+
+ /* first_mb_in_slice */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u2_first_mb_in_slice, return_status, "first_mb_in_slice");
+
+ /* slice_type */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_slice_type, return_status, "slice_type");
+
+ /* pic_parameter_set_id */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_pps_id, return_status, "pic_parameter_set_id");
+
+ /* frame_num */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->i4_frame_num, ps_sps->i1_log2_max_frame_num, return_status, "frame_num");
+
+ if (!ps_sps->i1_frame_mbs_only_flag)
+ {
+ /* field_pic_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->i1_field_pic_flag, 1, return_status, "field_pic_flag");
+
+ if(ps_slice_hdr->i1_field_pic_flag)
+ {
+ /* bottom_field_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->i1_bottom_field_flag, 1, return_status, "bottom_field_flag");
+ }
+ }
+
+ if (ps_slice_hdr->i1_nal_unit_type == 5)
+ {
+ /* u2_idr_pic_id */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u2_idr_pic_id, return_status, "u2_idr_pic_id");
+ }
+
+ if (ps_sps->i1_pic_order_cnt_type == 0)
+ {
+ /* pic_order_cnt_lsb */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->i4_pic_order_cnt_lsb, ps_sps->i1_log2_max_pic_order_cnt_lsb, return_status, "pic_order_cnt_lsb");
+
+ if(ps_pps->u1_pic_order_present_flag && !ps_slice_hdr->i1_field_pic_flag)
+ {
+ /* delta_pic_order_cnt_bottom */
+ PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i4_delta_pic_order_cnt_bottom, return_status, "delta_pic_order_cnt_bottom");
+ }
+ }
+
+ if (ps_sps->i1_pic_order_cnt_type == 1 && !ps_sps->i1_delta_pic_order_always_zero_flag)
+ {
+ /* delta_pic_order_cnt[0] */
+ PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->ai4_delta_pic_order_cnt[0], return_status, "delta_pic_order_cnt[0]");
+
+ if (ps_pps->u1_pic_order_present_flag && !ps_slice_hdr->i1_field_pic_flag)
+ {
+ /* delta_pic_order_cnt[1] */
+ PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->ai4_delta_pic_order_cnt[1], return_status, "delta_pic_order_cnt[1]");
+ }
+ }
+
+ if (ps_pps->i1_redundant_pic_cnt_present_flag)
+ {
+ /* redundant_pic_cnt */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_redundant_pic_cnt, return_status, "redundant_pic_cnt");
+ }
+
+ if (ps_slice_hdr->u1_slice_type == BSLICE)
+ {
+ /* direct_spatial_mv_pred_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_direct_spatial_mv_pred_flag, 1, return_status, "direct_spatial_mv_pred_flag");
+ }
+
+ if (ps_slice_hdr->u1_slice_type == PSLICE || ps_slice_hdr->u1_slice_type == SPSLICE || ps_slice_hdr->u1_slice_type == BSLICE)
+ {
+ /* num_ref_idx_active_override_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_num_ref_idx_active_override_flag, 1, return_status, "num_ref_idx_active_override_flag");
+
+ if (ps_slice_hdr->u1_num_ref_idx_active_override_flag)
+ {
+ /* num_ref_idx_l0_active_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_l0_active - 1, return_status, "num_ref_idx_l0_active_minus1");
+ }
+ if (ps_slice_hdr->u1_slice_type == BSLICE)
+ {
+ /* num_ref_idx_l1_active_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_l1_active - 1, return_status, "num_ref_idx_l1_active_minus1");
+ }
+ }
+
+ /* ref_idx_reordering */
+ /* TODO: ref_idx_reordering */
+ if ((ps_slice_hdr->u1_slice_type != ISLICE) && (ps_slice_hdr->u1_slice_type != SISLICE))
+ {
+ /* ref_pic_list_reordering_flag_l0 */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_ref_idx_reordering_flag_l0, 1, return_status, "ref_pic_list_reordering_flag_l0");
+
+ if (ps_slice_hdr->u1_ref_idx_reordering_flag_l0)
+ {
+
+ }
+ }
+
+ if ((ps_pps->i1_weighted_pred_flag &&
+ (ps_slice_hdr->u1_slice_type == PSLICE || ps_slice_hdr->u1_slice_type == SPSLICE)) ||
+ (ps_slice_hdr->u1_weighted_bipred_idc == 1 && ps_slice_hdr->u1_slice_type == BSLICE))
+ {
+ /* TODO_LATER: Currently there is no support for weighted prediction.
+ This needs to be updated when the support is added */
+ }
+
+ if (ps_slice_hdr->i1_nal_unit_idc != 0)
+ {
+ if (ps_slice_hdr->i1_nal_unit_type == 5)
+ {
+ /* no_output_of_prior_pics_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_no_output_of_prior_pics_flag , 1, return_status, "no_output_of_prior_pics_flag ");
+
+ /* long_term_reference_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_long_term_reference_flag , 1, return_status, "long_term_reference_flag ");
+ }
+ else
+ {
+ /* adaptive_ref_pic_marking_mode_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag , 1, return_status, "adaptive_ref_pic_marking_mode_flag ");
+
+ if (ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag)
+ {
+ /* TODO: if the reference picture marking mode is adaptive
+ add these fields in the bit-stream */
+ }
+ }
+ }
+
+ if (ps_slice_hdr->u1_entropy_coding_mode_flag && ps_slice_hdr->u1_slice_type != ISLICE &&
+ ps_slice_hdr->u1_slice_type != SISLICE)
+ {
+ /* cabac_init_idc */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_cabac_init_idc, return_status, "cabac_init_idc");
+ }
+
+ /* slice_qp_delta */
+ PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_qp - ps_pps->i1_pic_init_qp, return_status, "slice_qp_delta");
+
+ if (ps_slice_hdr->u1_slice_type == SPSLICE || ps_slice_hdr->u1_slice_type == SISLICE)
+ {
+ if (ps_slice_hdr->u1_slice_type == SPSLICE)
+ {
+ /* sp_for_switch_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->u1_sp_for_switch_flag , 1, return_status, "sp_for_switch_flag");
+ }
+ /* slice_qs_delta */
+ PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->u1_slice_qs - ps_pps->i1_pic_init_qs, return_status, "slice_qs_delta");
+ }
+
+ if (ps_pps->i1_deblocking_filter_control_present_flag)
+ {
+ /* disable_deblocking_filter_idc */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->u1_disable_deblocking_filter_idc, return_status, "disable_deblocking_filter_idc");
+
+ if(ps_slice_hdr->u1_disable_deblocking_filter_idc != 1)
+ {
+ /* slice_alpha_c0_offset_div2 */
+ PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_alpha_c0_offset_div2, return_status, "slice_alpha_c0_offset_div2");
+
+ /* slice_beta_offset_div2 */
+ PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_beta_offset_div2, return_status, "slice_beta_offset_div2");
+ }
+ }
+
+ if (ps_slice_hdr->u1_num_slice_groups_minus1 > 0 &&
+ ps_pps->u1_slice_group_map_type >= 3 &&
+ ps_pps->u1_slice_group_map_type <= 5)
+ {
+ /* slice_group_change_cycle */
+ /* TODO_LATER: Currently the number of slice groups minus 1 is 0.
+ * If this is not the case, we have to add Slice group map type to the bit stream */
+ }
+
+ return return_status;
+}
+
+
+
+/**
+******************************************************************************
+*
+* @brief Populates sps structure
+*
+* @par Description
+* Populates sps structure for its use in header generation
+*
+* @param[in] ps_codec
+* pointer to encoder context
+*
+* @param[out] ps_sps
+* pointer to sps params that needs to be populated
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_populate_sps(codec_t *ps_codec, sps_t *ps_sps)
+{
+ /* active config parameters */
+ cfg_params_t *ps_cfg = &(ps_codec->s_cfg);
+
+// /* level */
+// IH264_LEVEL_T level_idc;
+
+ /* error_status */
+ IH264E_ERROR_T i4_err_code = IH264E_FAIL;
+
+ /* profile */
+ /*
+ * Baseline profile supports, 8 bits per sample, 4:2:0 format, CAVLC.
+ * B frames are not allowed. Further, Flexible mb ordering, Redundant slices, Arbitrary slice ordering are supported.
+ * The constrained baseline profile is baseline profile minus ASO, FMO and redundant slices.
+ * To the constrained baseline profile if we add support for B slices, support for encoding interlaced frames,
+ * support for weighted prediction and introduce CABAC entropy coding then we have Main Profile.
+ */
+ if ((ps_cfg->u4_num_b_frames) || (ps_cfg->e_content_type != IV_PROGRESSIVE) ||
+ (ps_cfg->u4_entropy_coding_mode == CABAC) || (ps_cfg->u4_weighted_prediction))
+ {
+ ps_sps->u1_profile_idc = IH264_PROFILE_MAIN;
+ }
+ else
+ {
+ ps_sps->u1_profile_idc = IH264_PROFILE_BASELINE;
+ }
+
+ /* level */
+ ps_sps->u1_level_idc = ps_cfg->u4_max_level;
+// i4_err_code = ih264e_get_level(ps_cfg, &level_idc);
+// if (i4_err_code == IH264E_SUCCESS)
+// {
+// ps_sps->u1_level_idc = level_idc;
+//
+// }
+// else
+// {
+// return i4_err_code;
+// }
+
+ /* constrained flags */
+ /*
+ * baseline profile automatically implies set 0 flag
+ */
+ ps_sps->u1_constraint_set0_flag = (ps_sps->u1_profile_idc == IH264_PROFILE_BASELINE);
+ /*
+ * main profile automatically implies set 1 flag
+ * Although the encoder says it supports Baseline profile it actually supports constrained
+ * baseline profile as ASO, FMO and redundant slices are not supported
+ */
+ ps_sps->u1_constraint_set1_flag = (ps_sps->u1_profile_idc <= IH264_PROFILE_MAIN);
+ /*
+ * extended profile is not supported
+ */
+ ps_sps->u1_constraint_set2_flag = 0x00;
+ /*
+ * level 1b or level 11
+ */
+ if (ps_sps->u1_level_idc == IH264_LEVEL_1B)
+ {
+ ps_sps->u1_constraint_set3_flag = 0;
+ ps_sps->u1_level_idc = IH264_LEVEL_11;
+ }
+ else
+ {
+ ps_sps->u1_constraint_set3_flag = 0;
+ }
+
+ /* active sps id */
+ ps_sps->u1_sps_id = ps_codec->i4_sps_id;
+
+ if (ps_sps->u1_profile_idc >= IH264_PROFILE_HIGH)
+ {
+ /* chroma format idc */
+ ps_sps->u1_chroma_format_idc = CHROMA_FMT_IDC_YUV420;
+
+ /* residual_colour_transform_flag */
+ ps_sps->i1_residual_colour_transform_flag = 0;
+
+ /* luma bit depth 8 */
+ ps_sps->i1_bit_depth_luma = 8;
+
+ /* chroma bit depth 8 */
+ ps_sps->i1_bit_depth_chroma = 8;
+
+ /* qpprime_y_zero_transform_bypass_flag */
+ ps_sps->i1_qpprime_y_zero_transform_bypass_flag = 0;
+
+ /* seq_scaling_matrix_present_flag */
+ ps_sps->i1_seq_scaling_matrix_present_flag = 0;
+
+ if (ps_sps->i1_seq_scaling_matrix_present_flag)
+ {
+ /* TODO_LATER: Will be enabled once scaling list support is added */
+ }
+ }
+
+ /* log2_max_frame_num_minus4 */
+ ps_sps->i1_log2_max_frame_num = 16;
+
+ /* pic_order_cnt_type */
+ ps_sps->i1_pic_order_cnt_type = 2;
+
+ if(ps_cfg->u4_enable_alt_ref)
+ ps_sps->i1_pic_order_cnt_type = 0;
+
+ /* log2_max_pic_order_cnt_lsb_minus4 */
+ ps_sps->i1_log2_max_pic_order_cnt_lsb = 8;
+
+ /* TODO : add support for other poc types */
+ if (ps_sps->i1_pic_order_cnt_type == 0)
+ {
+
+ }
+ else if (ps_sps->i1_pic_order_cnt_type == 1)
+ {
+
+ }
+
+ /* num_ref_frames */
+ /* FIXME : Fix this hard coding */
+ ps_sps->u1_max_num_ref_frames = 1;
+
+ /* gaps_in_frame_num_value_allowed_flag */
+ ps_sps->i1_gaps_in_frame_num_value_allowed_flag = 0;
+
+ /* pic width in mb - 1 */
+ ps_sps->i2_pic_width_in_mbs_minus1 = ps_cfg->i4_wd_mbs - 1;
+
+ /* pic height in mb - 1 */
+ ps_sps->i2_pic_height_in_map_units_minus1 = ps_cfg->i4_ht_mbs - 1;;
+
+ /* frame_mbs_only_flag, no support for interlace encoding */
+ ps_sps->i1_frame_mbs_only_flag = 1;
+
+ /* mb_adaptive_frame_field_flag */
+ if (ps_sps->i1_frame_mbs_only_flag == 0)
+ {
+ ps_sps->i1_mb_adaptive_frame_field_flag = 0;
+ }
+
+ /* direct_8x8_inference_flag */
+ ps_sps->i1_direct_8x8_inference_flag = 0;
+
+ /* cropping params */
+ /*NOTE : Cropping values depend on the chroma format
+ * For our case ,decoder interprets the cropping values as 2*num pixels
+ * Hence the difference in the disp width and width must be halved before sending
+ * to get the expected results
+ */
+ ps_sps->i1_frame_cropping_flag = 0;
+ ps_sps->i2_frame_crop_left_offset = 0;
+ ps_sps->i2_frame_crop_right_offset = (ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd)>>1;
+ ps_sps->i2_frame_crop_top_offset = 0;
+ ps_sps->i2_frame_crop_bottom_offset = (ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht)>>1;
+
+ if (ps_sps->i2_frame_crop_left_offset ||
+ ps_sps->i2_frame_crop_right_offset ||
+ ps_sps->i2_frame_crop_top_offset ||
+ ps_sps->i2_frame_crop_bottom_offset)
+ {
+ ps_sps->i1_frame_cropping_flag = 1;
+ }
+
+ /* vui params */
+ ps_sps->i1_vui_parameters_present_flag = 0;
+
+ if (ps_sps->i1_vui_parameters_present_flag)
+ {
+ /* populate vui params */
+ }
+
+ return i4_err_code;
+}
+
+/**
+******************************************************************************
+*
+* @brief Populates pps structure
+*
+* @par Description
+* Populates pps structure for its use in header generation
+*
+* @param[in] ps_codec
+* pointer to encoder context
+*
+* @param[out] ps_pps
+* pointer to pps params that needs to be populated
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_populate_pps(codec_t *ps_codec, pps_t *ps_pps)
+{
+ /* active config parameters */
+ cfg_params_t *ps_cfg = &(ps_codec->s_cfg);
+
+ /* seq_parameter_set_id */
+ ps_pps->u1_sps_id = ps_codec->i4_sps_id;
+
+ /* pic_parameter_set_id */
+ ps_pps->u1_pps_id = ps_codec->i4_pps_id;
+
+ /* entropy_coding_mode */
+ ps_pps->u1_entropy_coding_mode_flag = ps_cfg->u4_entropy_coding_mode;
+
+ /* pic_order_present_flag is unset for POC type 2 */
+ ps_pps->u1_pic_order_present_flag = 0;
+
+ /* Currently number of slice groups supported are 1 */
+ ps_pps->u1_num_slice_groups = 1;
+
+ if (ps_pps->u1_num_slice_groups - 1)
+ {
+ /* TODO_LATER: Currently the number of slice groups minus 1 is 0.
+ * If this is not the case, we have to add Slice group map type to the bit stream*/
+ }
+
+ /* number of reference frames for list 0 */
+ /* FIXME : fix this hard coded value */
+ ps_pps->i1_num_ref_idx_l0_default_active = 1;
+
+ /* number of reference frames for list 1 */
+ ps_pps->i1_num_ref_idx_l1_default_active = 1;
+
+ /* weighted prediction for now is disabled */
+ ps_pps->i1_weighted_pred_flag = 0;
+ ps_pps->i1_weighted_bipred_idc = 0;
+
+ /* The intent is to not signal qp from pps. Rather send the same in slice headers */
+ ps_pps->i1_pic_init_qp = 0;
+
+ /* The intent is to not signal qp from pps. Rather send the same in slice headers */
+ ps_pps->i1_pic_init_qs = 0;
+
+ /* The intent is to not signal qp from pps. Rather send the same in slice headers */
+ ps_pps->i1_chroma_qp_index_offset = 0;
+
+ /* deblocking filter flags present in slice header */
+ ps_pps->i1_deblocking_filter_control_present_flag = 1;
+
+ /* constrained intra prediction */
+ ps_pps->i1_constrained_intra_pred_flag = ps_cfg->u4_constrained_intra_pred;
+
+ /* sending redundant slices is not supported for now */
+ ps_pps->i1_redundant_pic_cnt_present_flag = 0;
+
+ ps_pps->u1_slice_group_map_type = 0;
+ return IH264E_SUCCESS;
+}
+
+/**
+******************************************************************************
+*
+* @brief Populates slice header structure
+*
+* @par Description
+* Populates slice header structure for its use in header generation
+*
+* @param[in] ps_proc
+* pointer to proc context
+*
+* @param[out] ps_slice_hdr
+* pointer to slice header structure that needs to be populated
+*
+* @param[in] ps_pps
+* pointer to pps params structure referred by the slice
+*
+* @param[in] ps_sps
+* pointer to sps params referred by the pps
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_populate_slice_header(process_ctxt_t *ps_proc,
+ slice_header_t *ps_slice_hdr,
+ pps_t *ps_pps,
+ sps_t *ps_sps)
+{
+ /* entropy context */
+ entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
+
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ if (ps_proc->ps_codec->u4_is_curr_frm_ref)
+ {
+ ps_slice_hdr->i1_nal_unit_idc = 3;
+ }
+ else
+ {
+ ps_slice_hdr->i1_nal_unit_idc = 0;
+ }
+
+ /* start mb address */
+ ps_slice_hdr->u2_first_mb_in_slice = ps_entropy->i4_mb_start_add;
+
+ /* slice type */
+ ps_slice_hdr->u1_slice_type = ps_proc->i4_slice_type;
+
+ /* pic_parameter_set_id */
+ ps_slice_hdr->u1_pps_id = ps_pps->u1_pps_id;
+
+ /* Separate color plane flag is 0,
+ * hence the syntax element color_plane_id not included */
+
+ /* frame num */
+ ps_slice_hdr->i4_frame_num = ps_proc->i4_frame_num;
+
+ /* frame_mbs_only_flag, no support for interlace encoding */
+ if (!ps_sps->i1_frame_mbs_only_flag)
+ {
+ ps_slice_hdr->i1_field_pic_flag = 0;
+
+ if (ps_slice_hdr->i1_field_pic_flag)
+ {
+ ps_slice_hdr->i1_bottom_field_flag = 0;
+ }
+ }
+
+ /* idr pic id */
+ if (ps_proc->u4_is_idr)
+ {
+ ps_slice_hdr->u2_idr_pic_id = ps_proc->u4_idr_pic_id;
+ ps_slice_hdr->i1_nal_unit_type = 5;
+ }
+ else
+ {
+ ps_slice_hdr->i1_nal_unit_type = 1;
+ }
+
+ if (ps_sps->i1_pic_order_cnt_type == 0)
+ {
+
+ WORD32 val;
+ val = ps_codec->i4_coded_pic_cnt;
+ val %= (1 << ps_sps->i1_log2_max_pic_order_cnt_lsb);
+ ps_slice_hdr->i4_pic_order_cnt_lsb = val;
+ }
+ else if (ps_sps->i1_pic_order_cnt_type == 1)
+ {
+
+ }
+
+ if(0 == ps_slice_hdr->u2_first_mb_in_slice)
+ ps_codec->i4_coded_pic_cnt++;
+
+ /*
+ * redundant slices are not currently supported.
+ * Hence the syntax element redundant slice cnt is not initialized
+ */
+ if (ps_pps->i1_redundant_pic_cnt_present_flag)
+ {
+
+ }
+
+ /* direct spatial mv pred flag */
+ if (ps_proc->i4_slice_type == BSLICE)
+ {
+
+ }
+
+ if (ps_proc->i4_slice_type == PSLICE || ps_proc->i4_slice_type == SPSLICE || ps_proc->i4_slice_type == BSLICE)
+ {
+ /* num_ref_idx_active_override_flag */
+ ps_slice_hdr->u1_num_ref_idx_active_override_flag = 0;
+
+ if (ps_slice_hdr->u1_num_ref_idx_active_override_flag)
+ {
+ /* num_ref_idx_l0_active_minus1 */
+
+ if (ps_proc->i4_slice_type == BSLICE)
+ {
+ /* num_ref_idx_l1_active_minus1 */
+
+ }
+ }
+ }
+
+ /* ref_idx_reordering */
+ /* TODO: ref_idx_reordering */
+ if ((ps_proc->i4_slice_type != ISLICE) && (ps_proc->i4_slice_type != SISLICE))
+ {
+ /* ref_pic_list_reordering_flag_l0 */
+ ps_slice_hdr->u1_ref_idx_reordering_flag_l0 = 0;
+
+ if (ps_slice_hdr->u1_ref_idx_reordering_flag_l0)
+ {
+
+ }
+ }
+
+ if ((ps_pps->i1_weighted_pred_flag &&
+ (ps_proc->i4_slice_type == PSLICE || ps_proc->i4_slice_type == SPSLICE)) ||
+ (ps_slice_hdr->u1_weighted_bipred_idc == 1 && ps_proc->i4_slice_type == BSLICE))
+ {
+ /* TODO_LATER: Currently there is no support for weighted prediction.
+ This needs to be updated when the support is added */
+ }
+
+ if (ps_slice_hdr->i1_nal_unit_idc != 0)
+ {
+ if (ps_slice_hdr->i1_nal_unit_type == 5)
+ {
+ /* no_output_of_prior_pics_flag */
+ ps_slice_hdr->u1_no_output_of_prior_pics_flag = 0;
+
+ /* long_term_reference_flag */
+ ps_slice_hdr->u1_long_term_reference_flag = 0;
+ }
+ else
+ {
+ /* adaptive_ref_pic_marking_mode_flag */
+ ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag = 0;
+
+ if (ps_slice_hdr->u1_adaptive_ref_pic_marking_mode_flag)
+ {
+ /* TODO: if the reference picture marking mode is adaptive
+ add these fields in the bit-stream */
+ }
+ }
+ }
+
+ /* entropy coding mode flag */
+ ps_slice_hdr->u1_entropy_coding_mode_flag = ps_entropy->u1_entropy_coding_mode_flag;
+
+ if (ps_slice_hdr->u1_entropy_coding_mode_flag && ps_proc->i4_slice_type != ISLICE &&
+ ps_proc->i4_slice_type != SISLICE)
+ {
+ /* cabac_init_idc */
+ }
+
+ /* slice qp */
+ ps_slice_hdr->i1_slice_qp = ps_proc->u4_frame_qp;
+
+ if (ps_proc->i4_slice_type == SPSLICE || ps_proc->i4_slice_type == SISLICE)
+ {
+ if (ps_proc->i4_slice_type == SPSLICE)
+ {
+ /* sp_for_switch_flag */
+ }
+ /* slice_qs_delta */
+ }
+
+ if (ps_pps->i1_deblocking_filter_control_present_flag)
+ {
+ /* disable_deblocking_filter_idc */
+ ps_slice_hdr->u1_disable_deblocking_filter_idc = ps_proc->u4_disable_deblock_level;
+
+ if (ps_slice_hdr->u1_disable_deblocking_filter_idc != 1)
+ {
+ /* slice_alpha_c0_offset_div2 */
+ ps_slice_hdr->i1_slice_alpha_c0_offset_div2 = 0;
+
+ /* slice_beta_offset_div2 */
+ ps_slice_hdr->i1_slice_beta_offset_div2 = 0;
+ }
+ }
+ ps_slice_hdr->u1_num_slice_groups_minus1 = 0;
+ if(ps_slice_hdr->u1_num_slice_groups_minus1 > 0 &&
+ ps_pps->u1_slice_group_map_type >= 3 &&
+ ps_pps->u1_slice_group_map_type <= 5)
+ {
+ /* slice_group_change_cycle */
+ /* TODO_LATER: Currently the number of slice groups minus 1 is 0.
+ * If this is not the case, we have to add Slice group map type to the bit stream */
+ }
+
+ return IH264E_SUCCESS;
+}
+
+/**
+******************************************************************************
+*
+* @brief inserts FILLER Nal Unit.
+*
+* @par Description
+* In constant bit rate rc mode, when the bits generated by the codec is
+* underflowing the target bit rate, the encoder library inserts filler nal unit.
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] insert_fill_bytes
+* Number of fill bytes to be inserted
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_add_filler_nal_unit(bitstrm_t *ps_bitstrm,
+ WORD32 insert_fill_bytes)
+{
+ WORD32 i4_num_words_to_fill, i4_words_filled;
+
+ IH264E_ERROR_T return_status = IH264E_SUCCESS;
+
+ /* Insert the NAL start code */
+ return_status |= ih264e_put_nal_start_code_prefix(ps_bitstrm, 1);
+
+ if (ps_bitstrm->u4_strm_buf_offset + insert_fill_bytes >= ps_bitstrm->u4_max_strm_size)
+ {
+ return (IH264E_BITSTREAM_BUFFER_OVERFLOW);
+ }
+
+ /* Insert Nal Unit Header */
+ PUT_BITS(ps_bitstrm, NAL_FILLER_FIRST_BYTE, 8, return_status, "filler_header");
+
+ PUT_BITS(ps_bitstrm, 0xFFFFFF, 24, return_status, "fill bytes");
+
+ /* Initializing Variables */
+ i4_words_filled = 1;
+
+ /****************************************************/
+ /* Flooring the number of bytes for be stuffed to */
+ /* WORD unit */
+ /****************************************************/
+ i4_num_words_to_fill = (insert_fill_bytes >> 2);
+
+ /****************************************************/
+ /* Reducing already 4 bytes filled. In case stuffing*/
+ /* is <= 4 bytes, we are actually not stuffing */
+ /* anything */
+ /****************************************************/
+ i4_num_words_to_fill -= i4_words_filled;
+
+ while (i4_num_words_to_fill > 0)
+ {
+ /* Insert Nal Unit Header */
+ PUT_BITS(ps_bitstrm, 0xFFFFFFFF, 32, return_status, "fill bytes");
+
+ i4_num_words_to_fill-- ;
+ }
+
+ return_status |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
+
+ return return_status;
+}
+
diff --git a/encoder/ih264e_encode_header.h b/encoder/ih264e_encode_header.h
new file mode 100755
index 0000000..acae5b6
--- /dev/null
+++ b/encoder/ih264e_encode_header.h
@@ -0,0 +1,278 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_encode_header.h
+*
+* @brief
+* This file contains structures and interface prototypes for h264 bitstream
+* header encoding
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_ENCODE_HEADER_H_
+#define IH264E_ENCODE_HEADER_H_
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Macro to put a code with specified number of bits into the
+ * bitstream
+******************************************************************************
+ */
+#define PUT_BITS(ps_bitstrm, code_val, code_len, ret_val, syntax_string) \
+ ENTROPY_TRACE(syntax_string, code_val);\
+ ret_val |= ih264e_put_bits((ps_bitstrm), (code_val), (code_len))
+
+/**
+******************************************************************************
+ * @brief Macro to put a code with specified number of bits into the
+ * bitstream using 0th order exponential Golomb encoding for
+ * signed numbers
+******************************************************************************
+ */
+#define PUT_BITS_UEV(ps_bitstrm, code_val, ret_val, syntax_string) \
+ ENTROPY_TRACE(syntax_string, code_val);\
+ ret_val |= ih264e_put_uev((ps_bitstrm), (code_val))
+
+/**
+******************************************************************************
+ * @brief Macro to put a code with specified number of bits into the
+ * bitstream using 0th order exponential Golomb encoding for
+ * signed numbers
+******************************************************************************
+ */
+#define PUT_BITS_SEV(ps_bitstrm, code_val, ret_val, syntax_string) \
+ ENTROPY_TRACE(syntax_string, code_val);\
+ ret_val |= ih264e_put_sev((ps_bitstrm), (code_val))
+
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief Generates SPS (Sequence Parameter Set)
+*
+* @par Description
+* This function generates Sequence Parameter Set header as per the spec
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_sps
+* pointer to structure containing SPS data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_generate_sps
+ (
+ bitstrm_t *ps_bitstrm,
+ sps_t *ps_sps
+ );
+
+/**
+******************************************************************************
+*
+* @brief Generates PPS (Picture Parameter Set)
+*
+* @par Description
+* Generate Picture Parameter Set as per Section 7.3.2.2
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_pps
+* pointer to structure containing PPS data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_generate_pps
+ (
+ bitstrm_t *ps_bitstrm,
+ pps_t *ps_pps,
+ sps_t *ps_sps
+ );
+
+/**
+******************************************************************************
+*
+* @brief Generates Slice Header
+*
+* @par Description
+* Generate Slice Header as per Section 7.3.5.1
+*
+* @param[inout] ps_bitstrm
+* pointer to bitstream context for generating slice header
+*
+* @param[in] ps_slice_hdr
+* pointer to slice header params
+*
+* @param[in] ps_pps
+* pointer to pps params referred by slice
+*
+* @param[in] ps_sps
+* pointer to sps params referred by slice
+*
+* @param[out] ps_dup_bit_strm_ent_offset
+* Bitstream struct to store bitstream state
+*
+* @param[out] pu4_first_slice_start_offset
+* first slice offset is returned
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_generate_slice_header
+ (
+ bitstrm_t *ps_bitstrm,
+ slice_header_t *ps_slice_hdr,
+ pps_t *ps_pps,
+ sps_t *ps_sps
+ );
+
+/**
+******************************************************************************
+*
+* @brief Populates sps structure
+*
+* @par Description
+* Populates sps structure for its use in header generation
+*
+* @param[in] ps_codec
+* pointer to encoder context
+*
+* @param[out] ps_sps
+* pointer to sps params that needs to be populated
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_populate_sps
+ (
+ codec_t *ps_codec,
+ sps_t *ps_sps
+ );
+
+/**
+******************************************************************************
+*
+* @brief Populates pps structure
+*
+* @par Description
+* Populates pps structure for its use in header generation
+*
+* @param[in] ps_codec
+* pointer to encoder context
+*
+* @param[out] ps_pps
+* pointer to pps params that needs to be populated
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_populate_pps
+ (
+ codec_t *ps_codec,
+ pps_t *ps_pps
+ );
+
+
+/**
+******************************************************************************
+*
+* @brief Populates slice header structure
+*
+* @par Description
+* Populates slice header structure for its use in header generation
+*
+* @param[in] ps_proc
+* pointer to proc context
+*
+* @param[out] ps_slice_hdr
+* pointer to slice header structure that needs to be populated
+*
+* @param[in] ps_pps
+* pointer to pps params structure referred by the slice
+*
+* @param[in] ps_sps
+* pointer to sps params referred by the pps
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ih264e_populate_slice_header
+ (
+ process_ctxt_t *ps_proc,
+ slice_header_t *ps_slice_hdr,
+ pps_t *ps_pps,
+ sps_t *ps_sps
+ );
+
+
+/**
+******************************************************************************
+*
+* @brief inserts FILLER Nal Unit.
+*
+* @par Description
+* In constant bit rate rc mode, when the bits generated by the codec is
+* underflowing the target bit rate, the encoder library inserts filler nal unit.
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] insert_fill_bytes
+* Number of fill bytes to be inserted
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_add_filler_nal_unit
+ (
+ bitstrm_t *ps_bitstrm,
+ WORD32 insert_fill_bytes
+ );
+
+
+#endif //IH264E_ENCODE_HEADER_H_
diff --git a/encoder/ih264e_error.h b/encoder/ih264e_error.h
new file mode 100755
index 0000000..8fe9dac
--- /dev/null
+++ b/encoder/ih264e_error.h
@@ -0,0 +1,229 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_error.h
+*
+* @brief
+* Definitions related to error handling
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_ERROR_H_
+#define IH264E_ERROR_H_
+
+/**
+******************************************************************************
+* @brief Error start codes for various classes of errors in H264 encoder
+******************************************************************************
+*/
+#define SET_ERROR_ON_RETURN(error, severity, out_status, ret_code) \
+ if (error != IH264E_SUCCESS) \
+ {\
+ out_status = ((1 << severity) | error);\
+ return (ret_code);\
+ }
+
+
+/**
+******************************************************************************
+ * @brief Extended error code for each error in H264 encoder
+******************************************************************************
+ */
+typedef enum
+{
+ /* NOTE: the ive error codes ends at 0x80 */
+ IVE_ERR_CODEC_EXTENSIONS = 0x80,
+
+ /* bit stream error start */
+ IH264E_BITSTREAM_ERROR_START = IVE_ERR_CODEC_EXTENSIONS,
+
+ /* codec error start */
+ IH264E_CODEC_ERROR_START = IH264E_BITSTREAM_ERROR_START + 0x10,
+
+ /** no error */
+ IH264E_SUCCESS = 0,
+
+ /** bitstream init failure, buffer ptr not aligned to WORD (32bits) */
+ IH264E_BITSTREAM_BUFPTR_ALIGN_FAIL = IH264E_BITSTREAM_ERROR_START + 0x01,
+
+ /** bitstream init failure, buf size not multiple of WORD size (32bits) */
+ IH264E_BITSTREAM_BUFSIZE_ALIGN_FAIL = IH264E_BITSTREAM_ERROR_START + 0x02,
+
+ /** bitstream runtime failure, buf size limit exceeded during encode */
+ IH264E_BITSTREAM_BUFFER_OVERFLOW = IH264E_BITSTREAM_ERROR_START + 0x03,
+
+ /**width not set within supported limit */
+ IH264E_WIDTH_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x01,
+
+ /**height not set within supported limit */
+ IH264E_HEIGHT_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x02,
+
+ /**Unsupported number of reference pictures passed as an argument */
+ IH264E_NUM_REF_UNSUPPORTED = IH264E_CODEC_ERROR_START + 0x03,
+
+ /**Unsupported number of reference pictures passed as an argument */
+ IH264E_NUM_REORDER_UNSUPPORTED = IH264E_CODEC_ERROR_START + 0x04,
+
+ /**codec level not supported */
+ IH264E_CODEC_LEVEL_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x05,
+
+ /**input chroma format not supported */
+ IH264E_INPUT_CHROMA_FORMAT_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x06,
+
+ /**recon chroma format not supported */
+ IH264E_RECON_CHROMA_FORMAT_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x07,
+
+ /**rate control option configured is not supported */
+ IH264E_RATE_CONTROL_MODE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x08,
+
+ /**frame rate configured is not supported */
+ IH264E_FRAME_RATE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x09,
+
+ /**bit rate configured is not supported */
+ IH264E_BITRATE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x0A,
+
+ /**frame rate not supported */
+ IH264E_BFRAMES_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x0B,
+
+ /**content type not supported */
+ IH264E_CONTENT_TYPE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x0C,
+
+ /**unsupported horizontal search range */
+ IH264E_HORIZONTAL_SEARCH_RANGE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x0D,
+
+ /**unsupported vertical search range */
+ IH264E_VERTICAL_SEARCH_RANGE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x0E,
+
+ /**Unsupported slice type input */
+ IH264E_SLICE_TYPE_INPUT_INVALID = IH264E_CODEC_ERROR_START + 0x0F,
+
+ /**unsupported architecture type */
+ IH264E_ARCH_TYPE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x10,
+
+ /**unsupported soc type */
+ IH264E_SOC_TYPE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x11,
+
+ /**target frame rate exceeds source frame rate */
+ IH264E_TGT_FRAME_RATE_EXCEEDS_SRC_FRAME_RATE = IH264E_CODEC_ERROR_START + 0x12,
+
+ /**invalid force frame input */
+ IH264E_INVALID_FORCE_FRAME_INPUT = IH264E_CODEC_ERROR_START + 0x13,
+
+ /**invalid me speed preset */
+ IH264E_INVALID_ME_SPEED_PRESET = IH264E_CODEC_ERROR_START + 0x14,
+
+ /**invalid encoder speed preset */
+ IH264E_INVALID_ENC_SPEED_PRESET = IH264E_CODEC_ERROR_START + 0x15,
+
+ /**invalid deblocking param */
+ IH264E_INVALID_DEBLOCKING_TYPE_INPUT = IH264E_CODEC_ERROR_START + 0x16,
+
+ /**invalid max qp */
+ IH264E_INVALID_MAX_FRAME_QP = IH264E_CODEC_ERROR_START + 0x17,
+
+ /**invalid min qp */
+ IH264E_INVALID_MIN_FRAME_QP = IH264E_CODEC_ERROR_START + 0x18,
+
+ /**invalid init qp */
+ IH264E_INVALID_INIT_QP = IH264E_CODEC_ERROR_START + 0x19,
+
+ /**version buffer size is insufficient */
+ IH264E_CXA_VERS_BUF_INSUFFICIENT = IH264E_CODEC_ERROR_START + 0x1A,
+
+ /**init not done */
+ IH264E_INIT_NOT_DONE = IH264E_CODEC_ERROR_START + 0x1B,
+
+ /**invalid refresh type input */
+ IH264E_INVALID_AIR_MODE = IH264E_CODEC_ERROR_START + 0x1C,
+
+ /** Unsupported air mode */
+ IH264E_INVALID_AIR_REFRESH_PERIOD = IH264E_CODEC_ERROR_START + 0x1D,
+
+ /**In sufficient memory allocated for MV Bank */
+ IH264E_INSUFFICIENT_MEM_MVBANK = IH264E_CODEC_ERROR_START + 0x1E,
+
+ /**In sufficient memory allocated for MV Bank */
+ IH264E_INSUFFICIENT_MEM_PICBUF = IH264E_CODEC_ERROR_START + 0x1F,
+
+ /**Buffer manager error */
+ IH264E_BUF_MGR_ERROR = IH264E_CODEC_ERROR_START + 0x20,
+
+ /**No free MV Bank buffer available to store current pic */
+ IH264E_NO_FREE_MVBANK = IH264E_CODEC_ERROR_START + 0x21,
+
+ /**No free picture buffer available to store current pic */
+ IH264E_NO_FREE_PICBUF = IH264E_CODEC_ERROR_START + 0x22,
+
+ /**Invalid encoder operation mode */
+ IH264E_INVALID_ENC_OPERATION_MODE = IH264E_CODEC_ERROR_START + 0x23,
+
+ /**Invalid half pel option */
+ IH264E_INVALID_HALFPEL_OPTION = IH264E_CODEC_ERROR_START + 0x24,
+
+ /**Invalid quarter pel option */
+ IH264E_INVALID_QPEL_OPTION = IH264E_CODEC_ERROR_START + 0x25,
+
+ /**Invalid fast sad option */
+ IH264E_INVALID_FAST_SAD_OPTION = IH264E_CODEC_ERROR_START + 0x26,
+
+ /**Invalid intra 4x4 option */
+ IH264E_INVALID_INTRA4x4_OPTION = IH264E_CODEC_ERROR_START + 0x27,
+
+ /**Invalid intra frame interval */
+ IH264E_INVALID_INTRA_FRAME_INTERVAL = IH264E_CODEC_ERROR_START + 0x28,
+
+ /**Invalid idr frame interval */
+ IH264E_INVALID_IDR_FRAME_INTERVAL = IH264E_CODEC_ERROR_START + 0x29,
+
+ /**Invalid buffer delay */
+ IH264E_INVALID_BUFFER_DELAY = IH264E_CODEC_ERROR_START + 0x2A,
+
+ /**Invalid num cores */
+ IH264E_INVALID_NUM_CORES = IH264E_CODEC_ERROR_START + 0x2B,
+
+ /**profile not supported */
+ IH264E_PROFILE_NOT_SUPPORTED = IH264E_CODEC_ERROR_START + 0x2C,
+
+ /**Unsupported slice type input */
+ IH264E_SLICE_PARAM_INPUT_INVALID = IH264E_CODEC_ERROR_START + 0x2D,
+
+ /**Invalid alt ref option */
+ IH264E_INVALID_ALT_REF_OPTION = IH264E_CODEC_ERROR_START + 0x2E,
+
+ /**No free picture buffer available to store recon pic */
+ IH264E_NO_FREE_RECONBUF = IH264E_CODEC_ERROR_START + 0x2F,
+
+ /**max failure error code to ensure enum is 32 bits wide */
+ IH264E_FAIL = -1,
+
+}IH264E_ERROR_T;
+
+
+#endif /* IH264E_ERROR_H_ */
diff --git a/encoder/ih264e_fmt_conv.c b/encoder/ih264e_fmt_conv.c
new file mode 100755
index 0000000..393d6ca
--- /dev/null
+++ b/encoder/ih264e_fmt_conv.c
@@ -0,0 +1,864 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_fmt_conv.c
+*
+* @brief
+* Contains functions for format conversion or frame copy of output buffer
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_fmt_conv_420sp_to_rgb565()
+* - ih264e_fmt_conv_420sp_to_rgba8888()
+* - ih264e_fmt_conv_420sp_to_420sp()
+* - ih264e_fmt_conv_420sp_to_420sp_swap_uv()
+* - ih264e_fmt_conv_420sp_to_420p()
+* - ih264e_fmt_conv_420p_to_420sp()
+* - ih264e_fmt_conv_422i_to_420sp()
+* - ih264e_fmt_conv()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ithread.h"
+#include "ih264_defs.h"
+#include "ih264_debug.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_error.h"
+#include "ih264_buf_mgr.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_fmt_conv.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+void ih264e_fmt_conv_420sp_to_rgb565(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD16 *pu2_rgb_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_strd,
+ WORD32 is_u_first)
+{
+ WORD16 i2_r, i2_g, i2_b;
+ UWORD32 u4_r, u4_g, u4_b;
+ WORD16 i2_i, i2_j;
+ UWORD8 *pu1_y_src_nxt;
+ UWORD16 *pu2_rgb_dst_NextRow;
+
+ UWORD8 *pu1_u_src, *pu1_v_src;
+
+ if (is_u_first)
+ {
+ pu1_u_src = (UWORD8 *) pu1_uv_src;
+ pu1_v_src = (UWORD8 *) pu1_uv_src + 1;
+ }
+ else
+ {
+ pu1_u_src = (UWORD8 *) pu1_uv_src + 1;
+ pu1_v_src = (UWORD8 *) pu1_uv_src;
+ }
+
+ pu1_y_src_nxt = pu1_y_src + src_y_strd;
+ pu2_rgb_dst_NextRow = pu2_rgb_dst + dst_strd;
+
+ for (i2_i = 0; i2_i < (ht >> 1); i2_i++)
+ {
+ for (i2_j = (wd >> 1); i2_j > 0; i2_j--)
+ {
+ i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
+ i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3)
+ >> 13;
+ i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
+
+ pu1_u_src += 2;
+ pu1_v_src += 2;
+ /* pixel 0 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src + i2_b);
+ u4_b >>= 3;
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src + i2_g);
+ u4_g >>= 2;
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src + i2_r);
+ u4_r >>= 3;
+
+ pu1_y_src++;
+ *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
+
+ /* pixel 1 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src + i2_b);
+ u4_b >>= 3;
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src + i2_g);
+ u4_g >>= 2;
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src + i2_r);
+ u4_r >>= 3;
+
+ pu1_y_src++;
+ *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
+
+ /* pixel 2 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
+ u4_b >>= 3;
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
+ u4_g >>= 2;
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
+ u4_r >>= 3;
+
+ pu1_y_src_nxt++;
+ *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
+
+ /* pixel 3 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
+ u4_b >>= 3;
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
+ u4_g >>= 2;
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
+ u4_r >>= 3;
+
+ pu1_y_src_nxt++;
+ *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
+
+ }
+
+ pu1_u_src = pu1_u_src + src_uv_strd - wd;
+ pu1_v_src = pu1_v_src + src_uv_strd - wd;
+
+ pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
+ pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
+
+ pu2_rgb_dst = pu2_rgb_dst_NextRow - wd + dst_strd;
+ pu2_rgb_dst_NextRow = pu2_rgb_dst_NextRow + (dst_strd << 1) - wd;
+ }
+
+}
+
+void ih264e_fmt_conv_420sp_to_rgba8888(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD32 *pu4_rgba_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_strd,
+ WORD32 is_u_first)
+{
+ WORD16 i2_r, i2_g, i2_b;
+ UWORD32 u4_r, u4_g, u4_b;
+ WORD16 i2_i, i2_j;
+ UWORD8 *pu1_y_src_nxt;
+ UWORD32 *pu4_rgba_dst_NextRow;
+ UWORD8 *pu1_u_src, *pu1_v_src;
+
+ if (is_u_first)
+ {
+ pu1_u_src = (UWORD8 *) pu1_uv_src;
+ pu1_v_src = (UWORD8 *) pu1_uv_src + 1;
+ }
+ else
+ {
+ pu1_u_src = (UWORD8 *) pu1_uv_src + 1;
+ pu1_v_src = (UWORD8 *) pu1_uv_src;
+ }
+
+ pu1_y_src_nxt = pu1_y_src + src_y_strd;
+
+ pu4_rgba_dst_NextRow = pu4_rgba_dst + dst_strd;
+
+ for (i2_i = 0; i2_i < (ht >> 1); i2_i++)
+ {
+ for (i2_j = (wd >> 1); i2_j > 0; i2_j--)
+ {
+ i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
+ i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3)
+ >> 13;
+ i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
+
+ pu1_u_src += 2;
+ pu1_v_src += 2;
+ /* pixel 0 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src + i2_b);
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src + i2_g);
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src + i2_r);
+
+ pu1_y_src++;
+ *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
+
+ /* pixel 1 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src + i2_b);
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src + i2_g);
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src + i2_r);
+
+ pu1_y_src++;
+ *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
+
+ /* pixel 2 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
+
+ pu1_y_src_nxt++;
+ *pu4_rgba_dst_NextRow++ =
+ ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
+
+ /* pixel 3 */
+ /* B */
+ u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
+ /* G */
+ u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
+ /* R */
+ u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
+
+ pu1_y_src_nxt++;
+ *pu4_rgba_dst_NextRow++ =
+ ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
+
+ }
+
+ pu1_u_src = pu1_u_src + src_uv_strd - wd;
+ pu1_v_src = pu1_v_src + src_uv_strd - wd;
+
+ pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
+ pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
+
+ pu4_rgba_dst = pu4_rgba_dst_NextRow - wd + dst_strd;
+ pu4_rgba_dst_NextRow = pu4_rgba_dst_NextRow + (dst_strd << 1) - wd;
+ }
+
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function used for copying a 420SP buffer
+*
+* @par Description
+* Function used for copying a 420SP buffer
+*
+* @param[in] pu1_y_src
+* Input Y pointer
+*
+* @param[in] pu1_uv_src
+* Input UV pointer (UV is interleaved either in UV or VU format)
+*
+* @param[in] pu1_y_dst
+* Output Y pointer
+*
+* @param[in] pu1_uv_dst
+* Output UV pointer (UV is interleaved in the same format as that of input)
+*
+* @param[in] wd
+* Width
+*
+* @param[in] ht
+* Height
+*
+* @param[in] src_y_strd
+* Input Y Stride
+*
+* @param[in] src_uv_strd
+* Input UV stride
+*
+* @param[in] dst_y_strd
+* Output Y stride
+*
+* @param[in] dst_uv_strd
+* Output UV stride
+*
+* @returns None
+*
+* @remarks In case there is a need to perform partial frame copy then
+* by passion appropriate source and destination pointers and appropriate
+* values for wd and ht it can be done
+*
+*******************************************************************************
+*/
+void ih264e_fmt_conv_420sp_to_420sp(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_uv_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_y_strd,
+ WORD32 dst_uv_strd)
+{
+ UWORD8 *pu1_src, *pu1_dst;
+ WORD32 num_rows, num_cols, src_strd, dst_strd;
+ WORD32 i;
+
+ /* copy luma */
+ pu1_src = (UWORD8 *) pu1_y_src;
+ pu1_dst = (UWORD8 *) pu1_y_dst;
+
+ num_rows = ht;
+ num_cols = wd;
+
+ src_strd = src_y_strd;
+ dst_strd = dst_y_strd;
+
+ for (i = 0; i < num_rows; i++)
+ {
+ memcpy(pu1_dst, pu1_src, num_cols);
+ pu1_dst += dst_strd;
+ pu1_src += src_strd;
+ }
+
+ /* copy U and V */
+ pu1_src = (UWORD8 *) pu1_uv_src;
+ pu1_dst = (UWORD8 *) pu1_uv_dst;
+
+ num_rows = ht >> 1;
+ num_cols = wd;
+
+ src_strd = src_uv_strd;
+ dst_strd = dst_uv_strd;
+
+ for (i = 0; i < num_rows; i++)
+ {
+ memcpy(pu1_dst, pu1_src, num_cols);
+ pu1_dst += dst_strd;
+ pu1_src += src_strd;
+ }
+ return;
+}
+
+
+void ih264e_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_uv_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_y_strd,
+ WORD32 dst_uv_strd)
+{
+ UWORD8 *pu1_src, *pu1_dst;
+ WORD32 num_rows, num_cols, src_strd, dst_strd;
+ WORD32 i;
+
+ /* copy luma */
+ pu1_src = (UWORD8 *) pu1_y_src;
+ pu1_dst = (UWORD8 *) pu1_y_dst;
+
+ num_rows = ht;
+ num_cols = wd;
+
+ src_strd = src_y_strd;
+ dst_strd = dst_y_strd;
+
+ for (i = 0; i < num_rows; i++)
+ {
+ memcpy(pu1_dst, pu1_src, num_cols);
+ pu1_dst += dst_strd;
+ pu1_src += src_strd;
+ }
+
+ /* copy U and V */
+ pu1_src = (UWORD8 *) pu1_uv_src;
+ pu1_dst = (UWORD8 *) pu1_uv_dst;
+
+ num_rows = ht >> 1;
+ num_cols = wd;
+
+ src_strd = src_uv_strd;
+ dst_strd = dst_uv_strd;
+
+ for (i = 0; i < num_rows; i++)
+ {
+ WORD32 j;
+ for (j = 0; j < num_cols; j += 2)
+ {
+ pu1_dst[j + 0] = pu1_src[j + 1];
+ pu1_dst[j + 1] = pu1_src[j + 0];
+ }
+ pu1_dst += dst_strd;
+ pu1_src += src_strd;
+ }
+ return;
+}
+
+void ih264e_fmt_conv_420sp_to_420p(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_u_dst,
+ UWORD8 *pu1_v_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_y_strd,
+ WORD32 dst_uv_strd,
+ WORD32 is_u_first,
+ WORD32 disable_luma_copy)
+{
+ UWORD8 *pu1_src, *pu1_dst;
+ UWORD8 *pu1_u_src, *pu1_v_src;
+ WORD32 num_rows, num_cols, src_strd, dst_strd;
+ WORD32 i, j;
+
+ if (0 == disable_luma_copy)
+ {
+ /* copy luma */
+ pu1_src = (UWORD8 *) pu1_y_src;
+ pu1_dst = (UWORD8 *) pu1_y_dst;
+
+ num_rows = ht;
+ num_cols = wd;
+
+ src_strd = src_y_strd;
+ dst_strd = dst_y_strd;
+
+ for (i = 0; i < num_rows; i++)
+ {
+ memcpy(pu1_dst, pu1_src, num_cols);
+ pu1_dst += dst_strd;
+ pu1_src += src_strd;
+ }
+ }
+ /* de-interleave U and V and copy to destination */
+ if (is_u_first)
+ {
+ pu1_u_src = (UWORD8 *) pu1_uv_src;
+ pu1_v_src = (UWORD8 *) pu1_uv_src + 1;
+ }
+ else
+ {
+ pu1_u_src = (UWORD8 *) pu1_uv_src + 1;
+ pu1_v_src = (UWORD8 *) pu1_uv_src;
+ }
+
+ num_rows = ht >> 1;
+ num_cols = wd >> 1;
+
+ src_strd = src_uv_strd;
+ dst_strd = dst_uv_strd;
+
+ for (i = 0; i < num_rows; i++)
+ {
+ for (j = 0; j < num_cols; j++)
+ {
+ pu1_u_dst[j] = pu1_u_src[j * 2];
+ pu1_v_dst[j] = pu1_v_src[j * 2];
+ }
+
+ pu1_u_dst += dst_strd;
+ pu1_v_dst += dst_strd;
+ pu1_u_src += src_strd;
+ pu1_v_src += src_strd;
+ }
+ return;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function used to perform color space conversion from 420P to 420SP
+*
+* @par Description
+* Function used to perform color space conversion from 420P to 420SP
+*
+* @param[in] pu1_y_src
+* Input Y pointer
+*
+* @param[in] pu1_u_src
+* Input U pointer
+*
+* @param[in] pu1_v_dst
+* Input V pointer
+*
+* @param[in] pu1_y_dst
+* Output Y pointer
+*
+* @param[in] pu1_uv_dst
+* Output UV pointer
+*
+* @param[in] u4_width
+* Width
+*
+* @param[in] u4_height
+* Height
+*
+* @param[in] src_y_strd
+* Input Y Stride
+*
+* @param[in] src_u_strd
+* Input U stride
+*
+* @param[in] src_v_strd
+* Input V stride
+*
+* @param[in] dst_y_strd
+* Output Y stride
+*
+* @param[in] dst_uv_strd
+* Output UV stride
+*
+* @param[in] convert_uv_only
+* Flag to indicate if only UV copy needs to be done
+*
+* @returns none
+*
+* @remarks In case there is a need to perform partial frame copy then
+* by passion appropriate source and destination pointers and appropriate
+* values for wd and ht it can be done
+*
+*******************************************************************************
+*/
+void ih264e_fmt_conv_420p_to_420sp(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_u_src,
+ UWORD8 *pu1_v_src,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_uv_dst,
+ UWORD16 u2_height,
+ UWORD16 u2_width,
+ UWORD16 src_y_strd,
+ UWORD16 src_u_strd,
+ UWORD16 src_v_strd,
+ UWORD16 dst_y_strd,
+ UWORD16 dst_uv_strd,
+ UWORD32 convert_uv_only)
+{
+ UWORD8 *pu1_src, *pu1_dst;
+ UWORD8 *pu1_src_u, *pu1_src_v;
+ UWORD16 i;
+ UWORD32 u2_width_uv;
+ UWORD32 dest_inc_Y = 0, dest_inc_UV = 0;
+
+ dest_inc_UV = dst_uv_strd;
+
+ if (0 == convert_uv_only)
+ {
+
+ /* Copy Y buffer */
+ pu1_dst = (UWORD8 *) pu1_y_dst;
+ pu1_src = (UWORD8 *) pu1_y_src;
+
+ dest_inc_Y = dst_y_strd;
+
+ for (i = 0; i < u2_height; i++)
+ {
+ memcpy((void *) pu1_dst, (void *) pu1_src, u2_width);
+ pu1_dst += dest_inc_Y;
+ pu1_src += src_y_strd;
+ }
+ }
+
+ /* Interleave Cb and Cr buffers */
+ pu1_src_u = pu1_u_src;
+ pu1_src_v = pu1_v_src;
+ pu1_dst = pu1_uv_dst;
+
+ u2_height = (u2_height + 1) >> 1;
+ u2_width_uv = (u2_width + 1) >> 1;
+ for (i = 0; i < u2_height; i++)
+ {
+ UWORD32 j;
+ for (j = 0; j < u2_width_uv; j++)
+ {
+ *pu1_dst++ = *pu1_src_u++;
+ *pu1_dst++ = *pu1_src_v++;
+ }
+
+ pu1_dst += dest_inc_UV - u2_width;
+ pu1_src_u += src_u_strd - u2_width_uv;
+ pu1_src_v += src_v_strd - u2_width_uv;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function used to convert 422 interleaved to 420sp
+*
+* @par Description
+* Function used to convert 422 interleaved to 420sp
+*
+* @param[in] pu1_y_buf
+* Output Y pointer
+*
+* @param[in] pu1_u_buf
+* Output u pointer
+*
+* @param[in[ pu1_v_buf
+* Output V pointer
+*
+* @param[in] pu1_422i_buf
+* Input 422i pointer
+*
+* @param[in] u4_y_width
+* Width of Y component
+*
+* @param[in] u4_y_height
+* Height of Y component
+*
+* @param[in] u4_y_stride
+* Stride of pu1_y_buf
+*
+* @param[in] u4_u_stride
+* Stride of pu1_u_buf
+*
+* @param[in] u4_v_stride
+* Stride of pu1_v_buf
+*
+* @param[in] u4_422i_stride
+* Stride of pu1_422i_buf
+*
+* @returns None
+*
+* @remarks For conversion
+* pu1_v_buf = pu1_u_buf+1
+* u4_u_stride = u4_v_stride
+*
+* The extra parameters are for maintaining API with assembly function
+*
+*******************************************************************************
+*/
+void ih264e_fmt_conv_422i_to_420sp(UWORD8 *pu1_y_buf,
+ UWORD8 *pu1_u_buf,
+ UWORD8 *pu1_v_buf,
+ UWORD8 *pu1_422i_buf,
+ WORD32 u4_y_width,
+ WORD32 u4_y_height,
+ WORD32 u4_y_stride,
+ WORD32 u4_u_stride,
+ WORD32 u4_v_stride,
+ WORD32 u4_422i_stride)
+{
+ WORD32 row, col;
+ UWORD8 *row_even_422 = pu1_422i_buf;
+ UWORD8 *row_odd_422 = row_even_422 + (u4_422i_stride << 1);
+ UWORD8 *row_even_luma = pu1_y_buf;
+ /* Since at the end of loop, we have row_even_luma += (luma_width << 1),
+ * it should be same here right? */
+ UWORD8 *row_odd_luma = row_even_luma + u4_y_stride;
+ UWORD8 *row_cb = pu1_u_buf;
+ UWORD8 *row_cr = pu1_v_buf;
+
+ for (row = 0; row < u4_y_height; row = row + 2)
+ {
+ for (col = 0; col < (u4_y_width << 1); col = col + 4)
+ {
+ UWORD8 cb_even = row_even_422[col];
+ UWORD8 cr_even = row_even_422[col + 2];
+
+ row_cb[col >> 1] = cb_even;
+ row_cr[col >> 1] = cr_even;
+
+ row_even_luma[col >> 1] = row_even_422[col + 1];
+ row_even_luma[(col >> 1) + 1] = row_even_422[col + 3];
+
+ row_odd_luma[col >> 1] = row_odd_422[col + 1];
+ row_odd_luma[(col >> 1) + 1] = row_odd_422[col + 3];
+ }
+
+ row_even_422 += (u4_422i_stride << 2);
+ row_odd_422 += (u4_422i_stride << 2);
+
+ row_even_luma += (u4_y_stride << 1);
+ row_odd_luma += (u4_y_stride << 1);
+
+ row_cb += u4_u_stride;
+ row_cr += u4_v_stride;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function used from format conversion or frame copy
+*
+* @par Description
+* Function used from copying or converting a reference frame to display buffer
+* in non shared mode
+*
+* @param[in] pu1_y_dst
+* Output Y pointer
+*
+* @param[in] pu1_u_dst
+* Output U/UV pointer ( UV is interleaved in the same format as that of input)
+*
+* @param[in] pu1_v_dst
+* Output V pointer ( used in 420P output case)
+*
+* @param[in] u4_dst_y_strd
+* Stride of destination Y buffer
+*
+* @param[in] u4_dst_u_strd
+* Stride of destination U/V buffer
+*
+* @param[in] blocking
+* To indicate whether format conversion should wait till frame is reconstructed
+* and then return after complete copy is done. To be set to 1 when called at the
+* end of frame processing and set to 0 when called between frame processing modules
+* in order to utilize available MCPS
+*
+* @returns error status
+*
+* @remarks
+* Assumes that the stride of U and V buffers are same.
+* This is correct in most cases
+* If a case comes where this is not true we need to modify the fmt conversion
+* functions called inside also
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_fmt_conv(codec_t *ps_codec,
+ pic_buf_t *ps_pic,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_u_dst,
+ UWORD8 *pu1_v_dst,
+ UWORD32 u4_dst_y_strd,
+ UWORD32 u4_dst_uv_strd,
+ WORD32 cur_row,
+ WORD32 num_rows)
+{
+ IH264E_ERROR_T ret = IH264E_SUCCESS;
+ UWORD8 *pu1_y_src, *pu1_uv_src;
+ UWORD8 *pu1_y_dst_tmp, *pu1_uv_dst_tmp;
+ UWORD8 *pu1_u_dst_tmp, *pu1_v_dst_tmp;
+ UWORD16 *pu2_rgb_dst_tmp;
+ UWORD32 *pu4_rgb_dst_tmp;
+ WORD32 is_u_first;
+ UWORD8 *pu1_luma;
+ UWORD8 *pu1_chroma;
+ WORD32 dst_stride, wd;
+
+
+ if (0 == num_rows)
+ return ret;
+
+ pu1_luma = ps_pic->pu1_luma;
+ pu1_chroma = ps_pic->pu1_chroma;
+
+
+ dst_stride = ps_codec->s_cfg.u4_wd;
+ wd = ps_codec->s_cfg.u4_disp_wd;
+ is_u_first = (IV_YUV_420SP_UV == ps_codec->e_codec_color_format) ? 1 : 0;
+
+ /* In case of 420P output luma copy is disabled for shared mode */
+ {
+ pu1_y_src = pu1_luma + cur_row * ps_codec->i4_rec_strd;
+ pu1_uv_src = pu1_chroma + (cur_row / 2) * ps_codec->i4_rec_strd;
+
+ pu2_rgb_dst_tmp = (UWORD16 *) pu1_y_dst;
+ pu2_rgb_dst_tmp += cur_row * dst_stride;
+ pu4_rgb_dst_tmp = (UWORD32 *) pu1_y_dst;
+ pu4_rgb_dst_tmp += cur_row * dst_stride;
+
+ pu1_y_dst_tmp = pu1_y_dst + cur_row * u4_dst_y_strd;
+ pu1_uv_dst_tmp = pu1_u_dst + (cur_row / 2) * u4_dst_uv_strd;
+ pu1_u_dst_tmp = pu1_u_dst + (cur_row / 2) * u4_dst_uv_strd;
+ pu1_v_dst_tmp = pu1_v_dst + (cur_row / 2) * u4_dst_uv_strd;
+
+ /* If the call is non-blocking and there are no rows to be copied then return */
+ /* In non-shared mode, reference buffers are in 420SP UV format,
+ * if output also is in 420SP_UV, then just copy
+ * if output is in 420SP_VU then swap UV values
+ */
+ if ((IV_YUV_420SP_UV == ps_codec->s_cfg.e_recon_color_fmt) ||
+ (IV_YUV_420SP_VU == ps_codec->s_cfg.e_recon_color_fmt))
+ {
+ ih264e_fmt_conv_420sp_to_420sp(pu1_y_src, pu1_uv_src, pu1_y_dst_tmp,
+ pu1_uv_dst_tmp, wd, num_rows,
+ ps_codec->i4_rec_strd,
+ ps_codec->i4_rec_strd, u4_dst_y_strd,
+ u4_dst_uv_strd);
+ }
+ else if (IV_YUV_420P == ps_codec->s_cfg.e_recon_color_fmt)
+ {
+ ih264e_fmt_conv_420sp_to_420p(pu1_y_src, pu1_uv_src, pu1_y_dst_tmp,
+ pu1_u_dst_tmp, pu1_v_dst_tmp, wd,
+ num_rows, ps_codec->i4_rec_strd,
+ ps_codec->i4_rec_strd, u4_dst_y_strd,
+ u4_dst_uv_strd, is_u_first, 0);
+ }
+ }
+ return(ret);
+}
+
diff --git a/encoder/ih264e_fmt_conv.h b/encoder/ih264e_fmt_conv.h
new file mode 100755
index 0000000..6b33bf0
--- /dev/null
+++ b/encoder/ih264e_fmt_conv.h
@@ -0,0 +1,142 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_fmt_conv.h
+*
+* @brief
+* The file contains extern declarations of color space conversion routines
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_FMT_CONV_H_
+#define IH264E_FMT_CONV_H_
+
+#define COEFF1 13073
+#define COEFF2 -3207
+#define COEFF3 -6664
+#define COEFF4 16530
+
+IH264E_ERROR_T ih264e_fmt_conv(codec_t *ps_codec,
+ pic_buf_t *ps_pic,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_u_dst,
+ UWORD8 *pu1_v_dst,
+ UWORD32 u4_dst_y_strd,
+ UWORD32 u4_dst_uv_strd,
+ WORD32 cur_row,
+ WORD32 num_rows);
+
+typedef void ih264e_fmt_conv_420sp_to_rgba8888_ft(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD32 *pu4_rgba_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_strd,
+ WORD32 is_u_first);
+
+typedef void ih264e_fmt_conv_420sp_to_rgb565_ft(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD16 *pu2_rgb_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_strd,
+ WORD32 is_u_first);
+
+typedef void ih264e_fmt_conv_420sp_to_420sp_ft(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_uv_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_y_strd,
+ WORD32 dst_uv_strd);
+
+typedef void ih264e_fmt_conv_420sp_to_420p_ft(UWORD8 *pu1_y_src,
+ UWORD8 *pu1_uv_src,
+ UWORD8 *pu1_y_dst,
+ UWORD8 *pu1_u_dst,
+ UWORD8 *pu1_v_dst,
+ WORD32 wd,
+ WORD32 ht,
+ WORD32 src_y_strd,
+ WORD32 src_uv_strd,
+ WORD32 dst_y_strd,
+ WORD32 dst_uv_strd,
+ WORD32 is_u_first,
+ WORD32 disable_luma_copy);
+
+typedef void ih264e_fmt_conv_420p_to_420sp_ft(UWORD8 *pu1_y_src, UWORD8 *pu1_u_src, UWORD8 *pu1_v_src,
+ UWORD8 *pu1_y_dst, UWORD8 *pu1_uv_dst,
+ UWORD16 u2_height, UWORD16 u2_width, UWORD16 src_y_strd,
+ UWORD16 src_u_strd, UWORD16 src_v_strd,
+ UWORD16 dst_y_strd, UWORD16 dst_uv_strd,
+ UWORD32 convert_uv_only);
+
+typedef void ih264e_fmt_conv_422i_to_420sp_ft(UWORD8 *pu1_y_buf,UWORD8 *pu1_u_buf,UWORD8 *pu1_v_buf,
+ UWORD8 *pu1_422i_buf,
+ WORD32 u4_y_width,WORD32 u4_y_height,
+ WORD32 u4_y_stride,WORD32 u4_u_stride,WORD32 u4_v_stride,
+ WORD32 u4_422i_stride);
+
+
+/* C function declarations */
+ih264e_fmt_conv_420sp_to_rgba8888_ft ih264e_fmt_conv_420sp_to_rgba8888;
+ih264e_fmt_conv_420sp_to_rgb565_ft ih264e_fmt_conv_420sp_to_rgb565;
+ih264e_fmt_conv_420sp_to_420sp_ft ih264e_fmt_conv_420sp_to_420sp;
+ih264e_fmt_conv_420sp_to_420p_ft ih264e_fmt_conv_420sp_to_420p;
+ih264e_fmt_conv_420p_to_420sp_ft ih264e_fmt_conv_420p_to_420sp;
+ih264e_fmt_conv_422i_to_420sp_ft ih264e_fmt_conv_422i_to_420sp;
+
+/* A9Q function declarations */
+ih264e_fmt_conv_420sp_to_rgba8888_ft ih264e_fmt_conv_420sp_to_rgba8888_a9q;
+ih264e_fmt_conv_420sp_to_420sp_ft ih264e_fmt_conv_420sp_to_420sp_a9q;
+ih264e_fmt_conv_420sp_to_420p_ft ih264e_fmt_conv_420sp_to_420p_a9q;
+ih264e_fmt_conv_420p_to_420sp_ft ih264e_fmt_conv_420p_to_420sp_a9q;
+ih264e_fmt_conv_422i_to_420sp_ft ih264e_fmt_conv_422i_to_420sp_a9q;
+
+
+/* A9A function declarations */
+ih264e_fmt_conv_420sp_to_rgba8888_ft ih264e_fmt_conv_420sp_to_rgba8888_a9a;
+ih264e_fmt_conv_420sp_to_420sp_ft ih264e_fmt_conv_420sp_to_420sp_a9a;
+ih264e_fmt_conv_420sp_to_420p_ft ih264e_fmt_conv_420sp_to_420p_a9a;
+
+/* SSSe31 function declarations */
+ih264e_fmt_conv_420sp_to_420p_ft ih264e_fmt_conv_420sp_to_420p_ssse31;
+
+/* SSE4 function declarations */
+ih264e_fmt_conv_420sp_to_420p_ft ih264e_fmt_conv_420sp_to_420p_sse42;
+
+#endif /* IH264E_FMT_CONV_H_ */
diff --git a/encoder/ih264e_function_selector_generic.c b/encoder/ih264e_function_selector_generic.c
new file mode 100755
index 0000000..65f943a
--- /dev/null
+++ b/encoder/ih264e_function_selector_generic.c
@@ -0,0 +1,259 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_function_selector_generic.c
+*
+* @brief
+* Contains functions to initialize function pointers of codec context
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_init_function_ptr_generic
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264e_defs.h"
+#include "ih264e_structs.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_core_coding.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264_padding.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264_mem_fns.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_half_pel.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_generic(codec_t *ps_codec)
+{
+ WORD32 i = 0;
+
+ /* curr proc ctxt */
+ process_ctxt_t *ps_proc = NULL;
+ me_ctxt_t *ps_me_ctxt = NULL;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 16x16 */
+ ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert;
+ ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz;
+ ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc;
+ ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 4x4 */
+ ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert;
+ ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz;
+ ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc;
+ ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl;
+ ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr;
+ ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r;
+ ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d;
+ ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l;
+ ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert;
+ ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc;
+ ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl;
+ ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr;
+ ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r;
+ ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d;
+ ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l;
+ ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u;
+
+ /* Init function pointers for intra pred leaf level functions chroma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_c[0] = ih264_intra_pred_chroma_8x8_mode_dc;
+ ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz;
+ ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert;
+ ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane;
+
+ /* Init luma forward transform fn ptr */
+ ps_codec->pf_resi_trans_quant_8x8 = ih264_resi_trans_quant_8x8;
+ ps_codec->pf_resi_trans_quant_4x4 = ih264_resi_trans_quant_4x4;
+ ps_codec->pf_resi_trans_quant_chroma_4x4 = ih264_resi_trans_quant_chroma_4x4;
+ ps_codec->pf_hadamard_quant_4x4 = ih264_hadamard_quant_4x4;
+ ps_codec->pf_hadamard_quant_2x2_uv = ih264_hadamard_quant_2x2_uv;
+
+ /* Init inverse transform fn ptr */
+ ps_codec->pf_iquant_itrans_recon_8x8 = ih264_iquant_itrans_recon_8x8;
+ ps_codec->pf_iquant_itrans_recon_4x4 = ih264_iquant_itrans_recon_4x4;
+ ps_codec->pf_iquant_itrans_recon_4x4_dc = ih264_iquant_itrans_recon_4x4_dc;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc;
+
+ ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4;
+ ps_codec->pf_ihadamard_scaling_2x2_uv = ih264_ihadamard_scaling_2x2_uv;
+ ps_codec->pf_interleave_copy = ih264_interleave_copy;
+
+ /* Init fn ptr luma core coding */
+ ps_codec->luma_energy_compaction[0] = ih264e_code_luma_intra_macroblock_16x16;
+ ps_codec->luma_energy_compaction[1] = ih264e_code_luma_intra_macroblock_4x4;
+ ps_codec->luma_energy_compaction[3] = ih264e_code_luma_inter_macroblock_16x16;
+
+ /* Init fn ptr chroma core coding */
+ ps_codec->chroma_energy_compaction[0] = ih264e_code_chroma_intra_macroblock_8x8;
+ ps_codec->chroma_energy_compaction[1] = ih264e_code_chroma_inter_macroblock_8x8;
+
+ /* Init fn ptr luma deblocking */
+ ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4;
+ ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4;
+ ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4;
+ ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4;
+
+ /* Init fn ptr chroma deblocking */
+ ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4;
+ ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4;
+ ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4;
+ ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4;
+
+ /* write mb syntax layer */
+ ps_codec->pf_write_mb_syntax_layer[ISLICE] = ih264e_write_islice_mb;
+ ps_codec->pf_write_mb_syntax_layer[PSLICE] = ih264e_write_pslice_mb;
+
+ /* Padding Functions */
+ ps_codec->pf_pad_top = ih264_pad_top;
+ ps_codec->pf_pad_bottom = ih264_pad_bottom;
+ ps_codec->pf_pad_left_luma = ih264_pad_left_luma;
+ ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma;
+ ps_codec->pf_pad_right_luma = ih264_pad_right_luma;
+ ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma;
+
+ /* Inter pred leaf level functions */
+ ps_codec->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy;
+ ps_codec->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz;
+ ps_codec->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert;
+ ps_codec->pf_inter_pred_luma_bilinear = ih264_inter_pred_luma_bilinear;
+ ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma;
+
+ /* sad me level functions */
+ ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16;
+ ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast;
+ ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8;
+
+ /* memory handling operations */
+ ps_codec->pf_mem_cpy = ih264_memcpy;
+ ps_codec->pf_mem_cpy_mul8 = ih264_memcpy_mul_8;
+ ps_codec->pf_mem_set = ih264_memset;
+ ps_codec->pf_mem_set_mul8 = ih264_memset_mul_8;
+
+ /* sad me level functions */
+ for (i = 0; i < (MAX_PROCESS_CTXT); i++)
+ {
+ ps_proc = &ps_codec->as_process[i];
+
+ ps_me_ctxt = &ps_proc->s_me_ctxt;
+ ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16;
+ ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast;
+ ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8;
+ ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog;
+ ps_me_ctxt->pf_ime_compute_sad3_diamond = ime_calculate_sad3_prog;
+ ps_me_ctxt->pf_ime_compute_sad2_diamond = ime_calculate_sad2_prog;
+ ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16;
+ ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter;
+ }
+
+ /* intra mode eval -encoder level function */
+ ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes;
+ ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes;
+ ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes;
+
+ /* csc */
+ ps_codec->pf_ih264e_conv_420p_to_420sp = ih264e_fmt_conv_420p_to_420sp;
+ ps_codec->pf_ih264e_fmt_conv_422i_to_420sp = ih264e_fmt_conv_422i_to_420sp;
+
+ /* Halp pel generation function - encoder level*/
+ ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz;
+ ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert;
+
+ return;
+}
diff --git a/encoder/ih264e_globals.c b/encoder/ih264e_globals.c
new file mode 100755
index 0000000..e2b46a4
--- /dev/null
+++ b/encoder/ih264e_globals.c
@@ -0,0 +1,261 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_globals.c
+*
+* @brief
+* Contains definitions of global variables used across the encoder
+*
+* @author
+* ittiam
+*
+* @par List of functions
+*
+*
+* @remarks
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_globals.h"
+
+/*****************************************************************************/
+/* Extern global definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief lamda for varying quantizer scales that would be used to
+* compute the RD cost while deciding on the MB modes.
+* input : qp
+* output : lambda
+* @remarks lambda = 0.85 * pow(2, (qp - 12)/3), when SSD is used as metric
+* for computing distortion (Bit rate estimation for cost function of H.264/
+* AVC by Mohd Golam Sarwer et. al.) If the use of distortion metric is SAD
+* rather than SSD in the stage of encoding, consider sqrt(lambda) simply to
+* adjust lambda for the lack of squaring operation in the error computation
+* (from rate distortion optimization for video compression by sullivan).
+******************************************************************************
+*/
+const UWORD16 gu2_qp_lambda[52]=
+{
+ 0, 0, 0, 0, 0, 0, 0, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 3, 3, 3,
+ 4, 4, 5, 5, 6, 7, 7, 8,
+ 9, 10, 12, 13, 15, 17, 19, 21,
+ 23, 26, 30, 33, 37, 42, 47, 53,
+ 59, 66, 74, 83,
+};
+
+/**
+******************************************************************************
+* @brief Lamda for varying quantizer scales that would be used to
+* compute the RD cost while deciding on the MB modes.
+* input : qp
+* output : lambda
+* @remarks lambda = pow(2, (qp - 12)/6)
+******************************************************************************
+*/
+const UWORD8 gu1_qp0[52]=
+{
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 1, 1, 1, 1,
+ 2, 2, 2, 2, 3, 3, 3, 4,
+ 4, 4, 5, 6, 6, 7, 8, 9,
+ 10, 11, 13, 14, 16, 18, 20, 23,
+ 25, 29, 32, 36, 40, 45, 51, 57,
+ 64, 72, 81, 91,
+};
+
+/**
+******************************************************************************
+* @brief unsigned exp. goulumb codelengths to assign cost to a coefficient of
+* mb types.
+* input : Integer
+* output : codelength
+* @remarks Refer sec. 9-1 in h264 specification
+******************************************************************************
+*/
+const UWORD8 u1_uev_codelength[32] =
+{
+ 1, 3, 3, 5, 5, 5, 5, 7,
+ 7, 7, 7, 7, 7, 7, 7, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 11,
+};
+
+
+/**
+******************************************************************************
+* @brief Look up table to assign cost to a coefficient of a residual block
+* basing on its surrounding coefficients
+* input : Numbers of T1's
+* output : coeff_cost
+* @remarks Refer Section 2.3 Elimination of single coefficients in inter
+* macroblocks in document JVT-O079
+******************************************************************************
+*/
+const UWORD8 gu1_coeff_cost[6] =
+{
+ 3, 2, 2, 1, 1, 1
+};
+
+/**
+******************************************************************************
+* @brief Indices map to raster scan for luma 4x4 block
+* input : scan index
+* output : scan location
+* @remarks None
+******************************************************************************
+*/
+const UWORD8 gu1_luma_scan_order[16] =
+{
+ 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
+
+/**
+******************************************************************************
+* @brief Indices map to raster scan for chroma AC block
+* input : scan index
+* output : scan location
+* @remarks None
+******************************************************************************
+*/
+const UWORD8 gu1_chroma_scan_order[15] =
+{
+ 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
+
+/**
+******************************************************************************
+* @brief Indices map to raster scan for luma 4x4 dc block
+* input : scan index
+* output : scan location
+* @remarks : None
+******************************************************************************
+*/
+const UWORD8 gu1_luma_scan_order_dc[16] =
+{
+ 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
+
+/**
+******************************************************************************
+* @brief Indices map to raster scan for chroma 2x2 dc block
+* input : scan index
+* output : scan location
+* @remarks None
+******************************************************************************
+*/
+const UWORD8 gu1_chroma_scan_order_dc[4] =
+{
+ 0, 1, 2, 3
+};
+
+/**
+******************************************************************************
+* @brief choice of motion vectors to be used during mv prediction
+* input : formatted reference idx comparison metric
+* output : mv prediction has to be median or a simple straight forward selec
+* tion from neighbors.
+* @remarks If only one of the candidate blocks has a reference frame equal to
+ the current block then use the same block as the final predictor. A simple
+ look up table to assist this mv prediction condition
+******************************************************************************
+*/
+const WORD8 gi1_mv_pred_condition[8] =
+{
+ -1, 0, 1, -1, 2, -1, -1, -1
+};
+
+/**
+******************************************************************************
+* @brief maps the h264 quantizer to the mpeg2 quantizer scale
+* input : h264 qp
+* output : equivalent mpeg 2 qp
+* @remarks mpeg2qscale = 2 ^ [((h264qp - 12) / 6) + 1]
+******************************************************************************
+*/
+const UWORD8 gau1_h264_to_mpeg2_qmap[H264_QP_ELEM] =
+{
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 3, 3, 3, 4,
+ 4, 4, 5, 6, 6, 7, 8, 9,
+ 10, 11, 13, 14, 16, 18, 20, 23,
+ 25, 29, 32, 36, 40, 45, 51, 57,
+ 64, 72, 81, 91, 102, 114, 128, 144,
+ 161, 181, 203, 228,
+};
+
+/**
+******************************************************************************
+* @brief maps the mpeg2 quantizer to the h264 quantizer scale
+* input : mpeg2 qp
+* output : equivalent h264qp
+* @remarks MPEG-2 dequantization: (2*QFij + k)*Wij*qscale/32
+* k = 0 (for intra) k = sign(QFij)
+* H.264 dequantization: (QFij*R(QP%6,i,j))>>(6 - QP/6)
+*
+* Excluding the portion of R(QP%6,i,j) that is due to
+* the DCT scale factors, the 6 entries after dividing by 64 (2^6)
+* correspond to dequant values of
+* 2.5, 2.8125, 3.125, 3.5625, 3.9375, 4.4375.
+* (a=0.5 b=sqrt(2/5) - refer to JVT-B038.doc)
+*
+* Assuming that h264Qp=12 corresponds to MPEG2 qscale of 2
+* (the actual mapping seems to be to MPEG2 qscale of 2.5),
+* and the fact that the effective h264 quantizer changes by
+* a factor of 2 for every 6 steps, the following mapping is
+* obtained:
+* h264qp = 6*(log2(mpeg2qscale/2)) + 12.
+*
+* Note that the quant matrix entry assumed for the above
+* equality is 16. Hence when the mpeg2 quant matrix entries
+* are all 16, this lookup can be used as is (which is the
+* default inter quant matrix in mpeg-2).
+******************************************************************************
+*/
+const UWORD8 gau1_mpeg2_to_h264_qmap[MPEG2_QP_ELEM] =
+{
+ 0, 4, 10, 14, 16, 18, 20, 21, 22, 23, 24, 25, 26, 26, 27, 27,
+ 28, 29, 29, 29, 30, 30, 31, 31, 32, 32, 32, 33, 33, 33, 33, 34,
+ 34, 34, 35, 35, 35, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 37,
+ 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40,
+ 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 42,
+ 42, 42, 42, 42, 42, 42, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+ 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45,
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47,
+ 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 49, 49, 49, 49, 49,
+ 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49,
+};
+
diff --git a/encoder/ih264e_globals.h b/encoder/ih264e_globals.h
new file mode 100755
index 0000000..4c3de23
--- /dev/null
+++ b/encoder/ih264e_globals.h
@@ -0,0 +1,192 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_globals.h
+*
+* @brief
+* Contains declarations of global variables for H264 encoder
+*
+* @author
+* Ittiam
+*
+* @remarks
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_GLOBALS_H_
+#define IH264E_GLOBALS_H_
+
+
+/*****************************************************************************/
+/* Extern global declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief Computes the lamda for varying quantizer scales that would be used to
+* compute the RD cost while deciding on the MB modes.
+* input : qp
+* output : lambda
+* @remarks lambda = 0.85 * pow(2, (qp - 12)/3), when SSD is used as metric
+* for computing distortion (Bit rate estimation for cost function of H.264/
+* AVC by Mohd Golam Sarwer et. al.) If the use of distortion metric is SAD
+* rather than SSD in the stage of encoding, consider sqrt(lambda) simply to
+* adjust lambda for the lack of squaring operation in the error computation
+* (from rate distortion optimization for video compression by sullivan).
+******************************************************************************
+*/
+extern const UWORD16 gu2_qp_lambda[52];
+
+/**
+******************************************************************************
+* @brief Computes the lamda for varying quantizer scales that would be used to
+* compute the RD cost while deciding on the MB modes.
+* input : qp
+* output : lambda
+* @remarks lambda = pow(2, (qp - 12)/6). When Lagrangian multiplier is disabled
+* the same constant is used across mode decision and mv decisions.
+******************************************************************************
+*/
+extern const UWORD8 gu1_qp0[52];
+
+/**
+******************************************************************************
+* @brief unsigned exp. goulumb codelengths to assign cost to a coefficient of
+* mb types.
+* input : Integer
+* output : codelength
+* @remarks Refer sec. 9-1 in h264 specification
+******************************************************************************
+*/
+extern const UWORD8 u1_uev_codelength[32];
+
+/**
+******************************************************************************
+* @brief Look up table to assign cost to a coefficient of a residual block
+* basing on its surrounding coefficients
+* input : Numbers of T1's
+* output : coeff_cost
+* @remarks Refer Section 2.3 Elimination of single coefficients in inter
+* macroblocks in document JVT-O079
+******************************************************************************
+*/
+extern const UWORD8 gu1_coeff_cost[6];
+
+/**
+******************************************************************************
+* @brief Indices map to raster scan for luma 4x4 block
+* input : scan index
+* output : scan location
+* @remarks The scan order assumes the stride to access the next row is 16
+******************************************************************************
+*/
+extern const UWORD8 gu1_luma_scan_order[16];
+
+/**
+******************************************************************************
+* @brief Indices map to raster scan for chroma AC block
+* input : scan index
+* output : scan location
+* @remarks The scan order assumes the stride to access the next row is 32
+******************************************************************************
+*/
+extern const UWORD8 gu1_chroma_scan_order[15];
+
+/**
+******************************************************************************
+* @brief Indices map to raster scan for luma 4x4 dc block
+* input : scan index
+* output : scan location
+* @remarks The scan order assumes the stride to access the next row is 16
+******************************************************************************
+*/
+extern const UWORD8 gu1_luma_scan_order_dc[16];
+
+/**
+******************************************************************************
+* @brief Indices map to raster scan for chroma 2x2 dc block
+* input : scan index
+* output : scan location
+* @remarks The scan order assumes the stride to access the next row is 16
+******************************************************************************
+*/
+extern const UWORD8 gu1_chroma_scan_order_dc[4];
+
+
+/**
+******************************************************************************
+* @brief choice of motion vectors to be used during mv prediction
+* input : formatted reference idx comparison metric
+* output : mv prediction has to be median or a simple straight forward selec
+* tion from neighbors.
+* @remarks If only one of the candidate blocks has a reference frame equal to
+ the current block then use the same block as the final predictor. A simple
+ look up table to assist this mv prediction condition
+******************************************************************************
+*/
+extern const WORD8 gi1_mv_pred_condition[8];
+
+
+/**
+******************************************************************************
+* @brief maps the h264 quantizer to the mpeg2 quantizer scale
+* input : h264 qp
+* output : eqvivalent mpeg 2 qp
+* @remarks mpeg2qscale = 2 ^ [((h264qp - 12) / 6) + 1]
+******************************************************************************
+*/
+extern const UWORD8 gau1_h264_to_mpeg2_qmap[H264_QP_ELEM];
+
+/**
+******************************************************************************
+* @brief maps the mpeg2 quantizer to the h264 quantizer scale
+* input : mpeg2 qp
+* output : eqvivalent h264q p
+* @remarks MPEG-2 dequantization: (2*QFij + k)*Wij*qscale/32
+* k = 0 (for intra) k = sign(QFij)
+* H.264 dequantization: (QFij*R(QP%6,i,j))>>(6 - QP/6)
+*
+* Excluding the portion of R(QP%6,i,j) that is due to
+* the DCT scale factors, the 6 entries after dividing by 64 (2^6)
+* correspond to dequant values of
+* 2.5, 2.8125, 3.125, 3.5625, 3.9375, 4.4375.
+* (a=0.5 b=sqrt(2/5) - refer to JVT-B038.doc)
+*
+* Assuming that h264Qp=12 corresponds to MPEG2 qscale of 2
+* (the actual mapping seems to be to MPEG2 qscale of 2.5),
+* and the fact that the effective h264 quantizer changes by
+* a factor of 2 for every 6 steps, the following mapping is
+* obtained:
+* h264qp = 6*(log2(mpeg2qscale/2)) + 12.
+*
+* Note that the quant matrix entry assumed for the above
+* equality is 16. Hence when the mpeg2 quant matrix entries
+* are all 16, this lookup can be used as is (which is the
+* default inter quant matrix in mpeg-2).
+******************************************************************************
+*/
+extern const UWORD8 gau1_mpeg2_to_h264_qmap[MPEG2_QP_ELEM];
+
+
+#endif /* IH264E_GLOBALS_H_ */
diff --git a/encoder/ih264e_half_pel.c b/encoder/ih264e_half_pel.c
new file mode 100755
index 0000000..cb475a1
--- /dev/null
+++ b/encoder/ih264e_half_pel.c
@@ -0,0 +1,226 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_half_pel.c
+*
+* @brief
+* This file contains functions that are used for computing subpixel planes
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_sixtapfilter_horz
+* - ih264e_sixtap_filter_2dvh_vert
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ithread.h"
+#include "ih264_platform_macros.h"
+#include "ih264_defs.h"
+#include "ih264e_half_pel.h"
+#include "ih264_macros.h"
+#include "ih264e_half_pel.h"
+#include "ih264e_debug.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* Interprediction luma filter for horizontal input (Filter run for width = 17
+* and height =16)
+*
+* @par Description:
+* Applies a 6 tap horizontal filter .The output is clipped to 8 bits
+* sec 8.4.2.2.1 titled "Luma sample interpolation process"
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+void ih264e_sixtapfilter_horz(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd)
+{
+ UWORD32 u4_i, u4_j;
+ UWORD32 u4_w, u4_h;
+
+ /* width and height of interpolation */
+ u4_w = HP_PL_WD;
+ u4_h = MB_SIZE;
+
+ pu1_src -= 2;
+
+ for (u4_i = 0; u4_i < u4_h; u4_i++)
+ {
+ for (u4_j = 0; u4_j < u4_w; u4_j++, pu1_dst++, pu1_src++)
+ {
+ WORD16 i16_temp;
+
+ i16_temp = ih264_g_six_tap[0] * (*pu1_src + pu1_src[5])
+ + ih264_g_six_tap[1] * (pu1_src[1] + pu1_src[4])
+ + ih264_g_six_tap[2] * (pu1_src[2] + pu1_src[3]);
+
+ i16_temp = (i16_temp + 16) >> 5;
+
+ *pu1_dst = CLIP_U8(i16_temp);
+ }
+ pu1_src += src_strd - u4_w;
+ pu1_dst += dst_strd - u4_w;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function implements a two stage cascaded six tap filter. It applies
+* the six tap filter in the vertical direction on the predictor values,
+* followed by applying the same filter in the horizontal direction on the
+* output of the first stage. The six tap filtering operation is described in
+* sec 8.4.2.2.1 titled "Luma sample interpolation process" (Filter run for
+* width = 17 and height = 17)
+*
+* @par Description:
+* The function interpolates the predictors first in the vertical direction and
+* then in the horizontal direction to output the (1/2,1/2). The output of the
+* first stage of the filter is stored in the buffer pointed to by
+* pi16_pred1(only in C) in 16 bit precision.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst1
+* UWORD8 pointer to the destination (Horizontal filtered output)
+*
+* @param[out] pu1_dst2
+* UWORD8 pointer to the destination (output after applying vertical filter to
+* the intermediate horizontal output)
+*
+* @param[in] src_strd
+* integer source stride
+
+* @param[in] dst_strd
+* integer destination stride of pu1_dst
+*
+* @param[in] pi4_pred
+* Pointer to 16bit intermediate buffer (used only in c)
+*
+* @param[in] i4_pred_strd
+* integer destination stride of pi16_pred1
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+void ih264e_sixtap_filter_2dvh_vert(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst1,
+ UWORD8 *pu1_dst2,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 *pi4_pred,
+ WORD32 i4_pred_strd)
+{
+ WORD32 row, col;
+ WORD32 tmp;
+ WORD32 *pi4_pred_temp = pi4_pred;
+ WORD32 ht = HP_PL_HT, wd = HP_PL_WD;
+
+ for (row = 0; row < ht; row++)
+ {
+ for (col = -2; col < wd + 3; col++)
+ {
+ tmp = ih264_g_six_tap[0] * (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd]) +
+ ih264_g_six_tap[1] * (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd]) +
+ ih264_g_six_tap[2] * (pu1_src[col] + pu1_src[col + 1 * src_strd]);
+
+ pi4_pred_temp[col] = tmp;
+ }
+
+ pu1_src += src_strd;
+ pi4_pred_temp += i4_pred_strd;
+ }
+
+ for (row = 0; row < ht; row++)
+ {
+ for (col = 0; col < wd; col++)
+ {
+ tmp = (pi4_pred[col - 2] + pi4_pred[col + 3]) +
+ ih264_g_six_tap[1] * (pi4_pred[col - 1] + pi4_pred[col + 2]) +
+ ih264_g_six_tap[2] * (pi4_pred[col] + pi4_pred[col + 1]);
+
+ tmp = (tmp + 512) >> 10;
+
+ pu1_dst2[col] = CLIP_U8(tmp);
+ pu1_dst1[col] = CLIP_U8((pi4_pred[col] + 16) >> 5);
+ }
+ pi4_pred += i4_pred_strd;
+ pu1_dst2 += dst_strd;
+ pu1_dst1 += dst_strd;
+ }
+}
+
diff --git a/encoder/ih264e_half_pel.h b/encoder/ih264e_half_pel.h
new file mode 100755
index 0000000..92bd37f
--- /dev/null
+++ b/encoder/ih264e_half_pel.h
@@ -0,0 +1,162 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_half_pel.h
+ *
+ * @brief
+ * Contains extern declarations of subpel functions used by the encoder
+ *
+ * @author
+ * ittiam
+ *
+ * @remarks
+ * none
+ *
+ *******************************************************************************
+ */
+
+#ifndef IH264E_HALF_PEL_H_
+#define IH264E_HALF_PEL_H_
+
+/*****************************************************************************/
+/* Global constants */
+/*****************************************************************************/
+/*
+ * Dimensions of subpel plane buffers
+ */
+#define HP_PL_WD MB_SIZE + 1
+#define HP_PL_HT MB_SIZE + 1
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* Interprediction luma filter for horizontal input (Filter run for width = 17
+* and height =16)
+*
+* @par Description:
+* Applies a 6 tap horizontal filter .The output is clipped to 8 bits
+* sec 8.4.2.2.1 titled "Luma sample interpolation process"
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+typedef void ih264e_sixtapfilter_horz_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd);
+
+ih264e_sixtapfilter_horz_ft ih264e_sixtapfilter_horz;
+
+/* arm assembly */
+ih264e_sixtapfilter_horz_ft ih264e_sixtapfilter_horz_a9q;
+ih264e_sixtapfilter_horz_ft ih264e_sixtapfilter_horz_av8;
+
+/* x86 intrinsics*/
+ih264e_sixtapfilter_horz_ft ih264e_sixtapfilter_horz_ssse3;
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function implements a two stage cascaded six tap filter. It applies
+* the six tap filter in the vertical direction on the predictor values,
+* followed by applying the same filter in the horizontal direction on the
+* output of the first stage. The six tap filtering operation is described in
+* sec 8.4.2.2.1 titled "Luma sample interpolation process" (Filter run for
+* width = 17 and height = 17)
+*
+* @par Description:
+* The function interpolates the predictors first in the vertical direction and
+* then in the horizontal direction to output the (1/2,1/2). The output of the
+* first stage of the filter is stored in the buffer pointed to by
+* pi16_pred1(only in C) in 16 bit precision.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst1
+* UWORD8 pointer to the destination (Horizontal filtered output)
+*
+* @param[out] pu1_dst2
+* UWORD8 pointer to the destination (output after applying vertical filter to
+* the intermediate horizontal output)
+*
+* @param[in] src_strd
+* integer source stride
+
+* @param[in] dst_strd
+* integer destination stride of pu1_dst
+*
+* @param[in] pi4_pred
+* Pointer to 16bit intermediate buffer (used only in c)
+*
+* @param[in] i4_pred_strd
+* integer destination stride of pi16_pred1
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+typedef void ih264e_sixtap_filter_2dvh_vert_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst1,
+ UWORD8 *pu1_dst2,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 *pi4_pred,
+ WORD32 i4_pred_strd);
+
+ih264e_sixtap_filter_2dvh_vert_ft ih264e_sixtap_filter_2dvh_vert;
+
+/* assembly */
+ih264e_sixtap_filter_2dvh_vert_ft ih264e_sixtap_filter_2dvh_vert_a9q;
+
+ih264e_sixtap_filter_2dvh_vert_ft ih264e_sixtap_filter_2dvh_vert_av8;
+
+/* x86 intrinsics */
+ih264e_sixtap_filter_2dvh_vert_ft ih264e_sixtap_filter_2dvh_vert_ssse3;
+
+#endif /* IH264E_HALF_PEL_H_ */
diff --git a/encoder/ih264e_intra_modes_eval.c b/encoder/ih264e_intra_modes_eval.c
new file mode 100755
index 0000000..b41d717
--- /dev/null
+++ b/encoder/ih264e_intra_modes_eval.c
@@ -0,0 +1,2296 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_intra_modes_eval.c
+*
+* @brief
+* This file contains definitions of routines that perform rate distortion
+* analysis on a macroblock if they are to be coded as intra.
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_derive_neighbor_availability_of_mbs()
+* - ih264e_derive_ngbr_avbl_of_mb_partitions()
+* - ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff()
+* - ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff()
+* - ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff()
+* - ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton()
+* - ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff()
+* - ih264e_evaluate_intra16x16_modes()
+* - ih264e_evaluate_intra4x4_modes()
+* - ih264e_evaluate_intra_chroma_modes()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "ih264e_defs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_debug.h"
+#include "ih264_defs.h"
+#include "ih264_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_structs.h"
+#include "ih264_common_tables.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ime_distortion_metrics.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_structs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264e_globals.h"
+#include "ime_platform_macros.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief
+* derivation process for macroblock availability
+*
+* @par Description
+* Calculates the availability of the left, top, topright and topleft macroblocks.
+*
+* @param[in] ps_proc_ctxt
+* pointer to proc context (handle)
+*
+* @remarks Based on section 6.4.5 in H264 spec
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_derive_nghbr_avbl_of_mbs(process_ctxt_t *ps_proc)
+{
+ UWORD8 *pu1_slice_idx_curr = ps_proc->pu1_slice_idx;
+ UWORD8 *pu1_slice_idx_b;
+ UWORD8 *pu1_slice_idx_a;
+ UWORD8 *pu1_slice_idx_c;
+ UWORD8 *pu1_slice_idx_d;
+ block_neighbors_t *ps_ngbr_avbl;
+ WORD32 i4_mb_x, i4_mb_y;
+ WORD32 i4_wd_mbs;
+
+ i4_mb_x = ps_proc->i4_mb_x;
+ i4_mb_y = ps_proc->i4_mb_y;
+
+ i4_wd_mbs = ps_proc->i4_wd_mbs;
+
+ pu1_slice_idx_curr += (i4_mb_y * i4_wd_mbs) + i4_mb_x;
+ pu1_slice_idx_a = pu1_slice_idx_curr - 1;
+ pu1_slice_idx_b = pu1_slice_idx_curr - i4_wd_mbs;
+ pu1_slice_idx_c = pu1_slice_idx_b + 1;
+ pu1_slice_idx_d = pu1_slice_idx_b - 1;
+ ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
+
+ /**********************************************************************/
+ /* The macroblock is marked as available, unless one of the following */
+ /* conditions is true in which case the macroblock shall be marked as */
+ /* not available. */
+ /* 1. mbAddr < 0 */
+ /* 2 mbAddr > CurrMbAddr */
+ /* 3. the macroblock with address mbAddr belongs to a different slice */
+ /* than the macroblock with address CurrMbAddr */
+ /**********************************************************************/
+
+ /* left macroblock availability */
+ if (i4_mb_x == 0)
+ { /* macroblocks along first column */
+ ps_ngbr_avbl->u1_mb_a = 0;
+ }
+ else
+ { /* macroblocks belong to same slice? */
+ if (*pu1_slice_idx_a != *pu1_slice_idx_curr)
+ ps_ngbr_avbl->u1_mb_a = 0;
+ else
+ ps_ngbr_avbl->u1_mb_a = 1;
+ }
+
+ /* top macroblock availability */
+ if (i4_mb_y == 0)
+ { /* macroblocks along first row */
+ ps_ngbr_avbl->u1_mb_b = 0;
+ }
+ else
+ { /* macroblocks belong to same slice? */
+ if (*pu1_slice_idx_b != *pu1_slice_idx_curr)
+ ps_ngbr_avbl->u1_mb_b = 0;
+ else
+ ps_ngbr_avbl->u1_mb_b = 1;
+ }
+
+ /* top right macroblock availability */
+ if (i4_mb_x == i4_wd_mbs-1 || i4_mb_y == 0)
+ { /* macroblocks along last column */
+ ps_ngbr_avbl->u1_mb_c = 0;
+ }
+ else
+ { /* macroblocks belong to same slice? */
+ if (*pu1_slice_idx_c != *pu1_slice_idx_curr)
+ ps_ngbr_avbl->u1_mb_c = 0;
+ else
+ ps_ngbr_avbl->u1_mb_c = 1;
+ }
+
+ /* top left macroblock availability */
+ if (i4_mb_x == 0 || i4_mb_y == 0)
+ { /* macroblocks along first column */
+ ps_ngbr_avbl->u1_mb_d = 0;
+ }
+ else
+ { /* macroblocks belong to same slice? */
+ if (*pu1_slice_idx_d != *pu1_slice_idx_curr)
+ ps_ngbr_avbl->u1_mb_d = 0;
+ else
+ ps_ngbr_avbl->u1_mb_d = 1;
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief
+* derivation process for subblock/partition availability
+*
+* @par Description
+* Calculates the availability of the left, top, topright and topleft subblock
+* or partitions.
+*
+* @param[in] ps_proc_ctxt
+* pointer to macroblock context (handle)
+*
+* @param[in] i1_pel_pos_x
+* column position of the pel wrt the current block
+*
+* @param[in] i1_pel_pos_y
+* row position of the pel in wrt current block
+*
+* @remarks Assumptions: before calling this function it is assumed that
+* the neighbor availability of the current macroblock is already derived.
+* Based on table 6-3 of H264 specification
+*
+* @return availability status (yes or no)
+*
+******************************************************************************
+*/
+UWORD8 ih264e_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *ps_ngbr_avbl,
+ WORD8 i1_pel_pos_x,
+ WORD8 i1_pel_pos_y)
+{
+ UWORD8 u1_neighbor_avail=0;
+
+ /**********************************************************************/
+ /* values of i1_pel_pos_x in the range 0-15 inclusive correspond to */
+ /* various columns of a macroblock */
+ /* */
+ /* values of i1_pel_pos_y in the range 0-15 inclusive correspond to */
+ /* various rows of a macroblock */
+ /* */
+ /* other values of i1_pel_pos_x & i1_pel_pos_y represents elements */
+ /* outside the bound of an mb ie., represents its neighbors. */
+ /**********************************************************************/
+ if (i1_pel_pos_x < 0)
+ { /* column(-1) */
+ if (i1_pel_pos_y < 0)
+ { /* row(-1) */
+ u1_neighbor_avail = ps_ngbr_avbl->u1_mb_d; /* current mb topleft availability */
+ }
+ else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
+ { /* all rows of a macroblock */
+ u1_neighbor_avail = ps_ngbr_avbl->u1_mb_a; /* current mb left availability */
+ }
+ else /* if (i1_pel_pos_y >= 16) */
+ { /* rows(+16) */
+ u1_neighbor_avail = 0; /* current mb bottom left availability */
+ }
+ }
+ else if (i1_pel_pos_x >= 0 && i1_pel_pos_x < 16)
+ { /* all columns of a macroblock */
+ if (i1_pel_pos_y < 0)
+ { /* row(-1) */
+ u1_neighbor_avail = ps_ngbr_avbl->u1_mb_b; /* current mb top availability */
+ }
+ else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
+ { /* all rows of a macroblock */
+ u1_neighbor_avail = 1; /* current mb availability */
+ /* availability of the partition is dependent on the position of the partition inside the mb */
+ /* although the availability is declared as 1 in all cases these needs to be corrected somewhere else and this is not done in here */
+ }
+ else /* if (i1_pel_pos_y >= 16) */
+ { /* rows(+16) */
+ u1_neighbor_avail = 0; /* current mb bottom availability */
+ }
+ }
+ else if (i1_pel_pos_x >= 16)
+ { /* column(+16) */
+ if (i1_pel_pos_y < 0)
+ { /* row(-1) */
+ u1_neighbor_avail = ps_ngbr_avbl->u1_mb_c; /* current mb top right availability */
+ }
+ else /* if (i1_pel_pos_y >= 0) */
+ { /* all other rows */
+ u1_neighbor_avail = 0; /* current mb right & bottom right availability */
+ }
+ }
+
+ return u1_neighbor_avail;
+}
+
+/**
+******************************************************************************
+*
+* @brief
+* evaluate best intra 16x16 mode (rate distortion opt off)
+*
+* @par Description
+* This function evaluates all the possible intra 16x16 modes and finds the mode
+* that best represents the macro-block (least distortion) and occupies fewer
+* bits in the bit-stream.
+*
+* @param[in] ps_proc_ctxt
+* pointer to process context (handle)
+*
+* @remarks
+* Ideally the cost of encoding a macroblock is calculated as
+* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+* input block and the reconstructed block and rate is the number of bits taken
+* to place the macroblock in the bit-stream. In this routine the rate does not
+* exactly point to the total number of bits it takes, rather it points to header
+* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+* and residual bits fall in to texture bits the number of bits taken to encoding
+* mbtype is considered as rate, we compute cost. Further we will approximate
+* the distortion as the deviation b/w input and the predicted block as opposed
+* to input and reconstructed block.
+*
+* NOTE: As per the Document JVT-O079, for intra 16x16 macroblock,
+* the SAD and cost are one and the same.
+*
+* @return none
+*
+******************************************************************************
+*/
+
+void ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* SAD(distortion metric) of an 8x8 block */
+ WORD32 i4_mb_distortion = INT_MAX, i4_mb_distortion_least = INT_MAX;
+
+ /* lambda */
+ UWORD32 u4_lambda = ps_proc->u4_lambda;
+
+ /* cost = distortion + lambda*rate */
+ WORD32 i4_mb_cost= INT_MAX, i4_mb_cost_least = INT_MAX;
+
+ /* intra mode */
+ UWORD32 u4_intra_mode, u4_best_intra_16x16_mode = DC_I16x16;
+
+ /* neighbor pels for intra prediction */
+ UWORD8 *pu1_ngbr_pels_i16 = ps_proc->au1_ngbr_pels;
+
+ /* neighbor availability */
+ WORD32 i4_ngbr_avbl;
+
+ /* pointer to src macro block */
+ UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
+ UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
+
+ /* pointer to prediction macro block */
+ UWORD8 *pu1_pred_mb_intra_16x16 = ps_proc->pu1_pred_mb_intra_16x16;
+ UWORD8 *pu1_pred_mb_intra_16x16_plane = ps_proc->pu1_pred_mb_intra_16x16_plane;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+ /* pointer to neighbors left, top, topleft */
+ UWORD8 *pu1_mb_a = pu1_ref_mb - 1;
+ UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd;
+ UWORD8 *pu1_mb_d = pu1_mb_b - 1;
+
+ /* valid intra modes map */
+ UWORD32 u4_valid_intra_modes;
+
+ /* lut for valid intra modes */
+ const UWORD8 u1_valid_intra_modes[8] = {4, 6, 12, 14, 5, 7, 13, 15};
+
+ /* temp var */
+ UWORD32 i, u4_enable_fast_sad = 0, offset = 0;
+
+ /* init temp var */
+ if (ps_proc->i4_slice_type == PSLICE)
+ {
+ offset = 5;
+ u4_enable_fast_sad = ps_proc->s_me_ctxt.u4_enable_fast_sad;
+ }
+
+ /* locating neighbors that are available for prediction */
+ /* TODO : update the neighbor availability information basing on constrained intra pred information */
+ /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines
+ * basing on neighbors available and hence evade the computation of neighbor availability totally. */
+ /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */
+ i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1);
+ ps_proc->i4_ngbr_avbl_16x16_mb = i4_ngbr_avbl;
+
+ /* gather prediction pels from the neighbors, if particular set is not available
+ * it is set to zero*/
+ /* left pels */
+ if (ps_proc->ps_ngbr_avbl->u1_mb_a)
+ {
+ for(i = 0; i < 16; i++)
+ pu1_ngbr_pels_i16[16-1-i] = pu1_mb_a[i * i4_rec_strd];
+ }
+ else
+ {
+ ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16,0,MB_SIZE);
+ }
+ /* top pels */
+ if (ps_proc->ps_ngbr_avbl->u1_mb_b)
+ {
+ ps_codec->pf_mem_cpy_mul8(pu1_ngbr_pels_i16+16+1,pu1_mb_b,16);
+ /*for(i = 0; i < 16; i++)
+ pu1_ngbr_pels_i16[16+1+i] = pu1_mb_b[i];*/
+ }
+ else
+ {
+ ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16+16+1,0,MB_SIZE);
+ }
+ /* topleft pels */
+ if (ps_proc->ps_ngbr_avbl->u1_mb_d)
+ pu1_ngbr_pels_i16[16] = *pu1_mb_d;
+ else
+ pu1_ngbr_pels_i16[16] = 0;
+
+ /* set valid intra modes for evaluation */
+// u4_valid_intra_modes = 15;
+//// ih264e_filter_intra16x16modes(pu1_mb_curr, i4_src_strd, &u4_valid_intra_modes);
+// if (!ps_proc->ps_ngbr_avbl->u1_mb_a)
+// u4_valid_intra_modes &= ~(1 << HORZ_I16x16);
+// if (!ps_proc->ps_ngbr_avbl->u1_mb_b)
+// u4_valid_intra_modes &= ~(1 << VERT_I16x16);
+//// if (!ps_proc->ps_ngbr_avbl->u1_mb_a || !ps_proc->ps_ngbr_avbl->u1_mb_b || !ps_proc->ps_ngbr_avbl->u1_mb_d)
+// if (i4_ngbr_avbl != 7)
+// u4_valid_intra_modes &= ~(1 << PLANE_I16x16);
+
+ u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
+
+ if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST)
+ u4_valid_intra_modes &= ~(1 << PLANE_I16x16);
+
+ /* evaluate b/w HORZ_I16x16, VERT_I16x16 & DC_I16x16 */
+ ps_codec->pf_ih264e_evaluate_intra16x16_modes(pu1_curr_mb, pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16,
+ i4_src_strd, i4_pred_strd,
+ i4_ngbr_avbl, &u4_intra_mode, &i4_mb_distortion_least,
+ u4_valid_intra_modes);
+
+ /* cost = distortion + lambda*rate */
+ i4_mb_cost_least = i4_mb_distortion_least;
+
+ if (( (u4_valid_intra_modes >> 3) & 1) != 0 && (ps_codec->s_cfg.u4_enc_speed_preset != IVE_FASTEST ||
+ ps_proc->i4_slice_type == ISLICE))
+ {
+ /* intra prediction for PLANE mode*/
+ (ps_codec->apf_intra_pred_16_l)[PLANE_I16x16](pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16_plane, 0, i4_pred_strd, i4_ngbr_avbl);
+
+ /* evaluate distortion between the actual blk and the estimated blk for the given mode */
+ ps_codec->apf_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_pred_mb_intra_16x16_plane, i4_src_strd, i4_pred_strd, i4_mb_cost_least, &i4_mb_distortion);
+
+ /* cost = distortion + lambda*rate */
+ i4_mb_cost = i4_mb_distortion;
+
+ /* update the least cost information if necessary */
+ if(i4_mb_cost < i4_mb_distortion_least)
+ {
+ u4_intra_mode = PLANE_I16x16;
+
+ i4_mb_cost_least = i4_mb_cost;
+ i4_mb_distortion_least = i4_mb_distortion;
+ }
+ }
+
+ u4_best_intra_16x16_mode = u4_intra_mode;
+
+ DEBUG("%d partition cost, %d intra mode\n", i4_mb_cost_least * 32, u4_best_intra_16x16_mode);
+
+ ps_proc->u1_l_i16_mode = u4_best_intra_16x16_mode;
+
+ /* cost = distortion + lambda*rate */
+ i4_mb_cost_least = i4_mb_distortion_least + u4_lambda*u1_uev_codelength[offset + u4_best_intra_16x16_mode];
+
+
+ /* update the type of the mb if necessary */
+ if (i4_mb_cost_least < ps_proc->i4_mb_cost)
+ {
+ ps_proc->i4_mb_cost = i4_mb_cost_least;
+ ps_proc->i4_mb_distortion = i4_mb_distortion_least;
+ ps_proc->u4_mb_type = I16x16;
+ }
+
+ return ;
+}
+
+
+/**
+******************************************************************************
+*
+* @brief
+* evaluate best intra 8x8 mode (rate distortion opt on)
+*
+* @par Description
+* This function evaluates all the possible intra 8x8 modes and finds the mode
+* that best represents the macro-block (least distortion) and occupies fewer
+* bits in the bit-stream.
+*
+* @param[in] ps_proc_ctxt
+* pointer to proc ctxt
+*
+* @remarks Ideally the cost of encoding a macroblock is calculated as
+* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+* input block and the reconstructed block and rate is the number of bits taken
+* to place the macroblock in the bit-stream. In this routine the rate does not
+* exactly point to the total number of bits it takes, rather it points to header
+* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+* and residual bits fall in to texture bits the number of bits taken to encoding
+* mbtype is considered as rate, we compute cost. Further we will approximate
+* the distortion as the deviation b/w input and the predicted block as opposed
+* to input and reconstructed block.
+*
+* NOTE: TODO: This function needs to be tested
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* SAD(distortion metric) of an 4x4 block */
+ WORD32 i4_partition_distortion, i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
+
+ /* lambda */
+ UWORD32 u4_lambda = ps_proc->u4_lambda;
+
+ /* cost = distortion + lambda*rate */
+ WORD32 i4_partition_cost, i4_partition_cost_least, i4_total_cost = u4_lambda;
+
+ /* cost due to mbtype */
+ UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
+
+ /* intra mode */
+ UWORD32 u4_intra_mode, u4_best_intra_8x8_mode = DC_I8x8, u4_estimated_intra_8x8_mode;
+
+ /* neighbor pels for intra prediction */
+ UWORD8 *pu1_ngbr_pels_i8 = ps_proc->au1_ngbr_pels;
+
+ /* pointer to curr partition */
+ UWORD8 *pu1_mb_curr;
+
+ /* pointer to prediction macro block */
+ UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+
+ /* neighbors left, top, top right, top left */
+ UWORD8 *pu1_mb_a;
+ UWORD8 *pu1_mb_b;
+ UWORD8 *pu1_mb_d;
+
+ /* neighbor availability */
+ WORD32 i4_ngbr_avbl;
+ block_neighbors_t s_ngbr_avbl;
+
+ /* temp vars */
+ UWORD32 b8, u4_pix_x, u4_pix_y;
+
+ /* ngbr mb syntax information */
+ UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
+ mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
+
+ /* valid intra modes map */
+ UWORD32 u4_valid_intra_modes;
+
+ for(b8 = 0; b8 < 4; b8++)
+ {
+ u4_pix_x = (b8 & 0x01) << 3;
+ u4_pix_y = (b8 >> 1) << 3;
+
+ pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
+ /* when rdopt is off, we use the input as reference for constructing prediction buffer */
+ /* as opposed to using the recon pels. (open loop intra prediction) */
+ pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */
+ pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
+ pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */
+
+ /* locating neighbors that are available for prediction */
+ /* TODO : update the neighbor availability information basing on constrained intra pred information */
+ /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
+ /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
+ s_ngbr_avbl.u1_mb_a = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x - 1, u4_pix_y); /* xD = -1, yD = 0 */
+ s_ngbr_avbl.u1_mb_b = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x, u4_pix_y - 1); /* xD = 0, yD = -1 */
+ s_ngbr_avbl.u1_mb_c = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x + 8, u4_pix_y - 1); /* xD = BLK_8x8_SIZE, yD = -1 */
+ s_ngbr_avbl.u1_mb_d = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x - 1, u4_pix_y - 1); /* xD = -1, yD = -1 */
+
+ /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_c * TOP_RIGHT_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */
+ i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + (s_ngbr_avbl.u1_mb_c << 3) +
+ (s_ngbr_avbl.u1_mb_a << 4);
+ /* if top partition is available and top right is not available for intra prediction, then */
+ /* padd top right samples using top sample and make top right also available */
+ /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
+ ps_proc->ai4_neighbor_avail_8x8_subblks[b8] = i4_ngbr_avbl;
+
+
+ ih264_intra_pred_luma_8x8_mode_ref_filtering(pu1_mb_a, pu1_mb_b, pu1_mb_d, pu1_ngbr_pels_i8,
+ i4_src_strd, i4_ngbr_avbl);
+
+ i4_partition_cost_least = INT_MAX;
+ /* set valid intra modes for evaluation */
+ u4_valid_intra_modes = 0x1ff;
+
+ if (!s_ngbr_avbl.u1_mb_b)
+ {
+ u4_valid_intra_modes &= ~(1 << VERT_I4x4);
+ u4_valid_intra_modes &= ~(1 << DIAG_DL_I4x4);
+ u4_valid_intra_modes &= ~(1 << VERT_L_I4x4);
+ }
+ if (!s_ngbr_avbl.u1_mb_a)
+ {
+ u4_valid_intra_modes &= ~(1 << HORZ_I4x4);
+ u4_valid_intra_modes &= ~(1 << HORZ_U_I4x4);
+ }
+ if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b || !s_ngbr_avbl.u1_mb_d)
+ {
+ u4_valid_intra_modes &= ~(1 << DIAG_DR_I4x4);
+ u4_valid_intra_modes &= ~(1 << VERT_R_I4x4);
+ u4_valid_intra_modes &= ~(1 << HORZ_D_I4x4);
+ }
+
+ /* estimate the intra 8x8 mode for the current partition (for evaluating cost) */
+ if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
+ {
+ u4_estimated_intra_8x8_mode = DC_I8x8;
+ }
+ else
+ {
+ UWORD32 u4_left_intra_8x8_mode = DC_I8x8;
+ UWORD32 u4_top_intra_8x8_mode = DC_I8x8;
+
+ if (u4_pix_x == 0)
+ {
+ if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
+ {
+ u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[b8+1];
+ }
+ else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
+ {
+ u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[(b8+1)*4+2];
+ }
+ }
+ else
+ {
+ u4_left_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-1];
+ }
+
+ if (u4_pix_y == 0)
+ {
+ if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
+ {
+ u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[b8+2];
+ }
+ else if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
+ {
+ u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[(b8+2)*4+2];
+ }
+ }
+ else
+ {
+ u4_top_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-2];
+ }
+
+ u4_estimated_intra_8x8_mode = MIN(u4_left_intra_8x8_mode, u4_top_intra_8x8_mode);
+ }
+
+ /* perform intra mode 8x8 evaluation */
+ for (u4_intra_mode = VERT_I8x8; u4_valid_intra_modes != 0; u4_intra_mode++, u4_valid_intra_modes >>= 1)
+ {
+ if ( (u4_valid_intra_modes & 1) == 0)
+ continue;
+
+ /* intra prediction */
+ (ps_codec->apf_intra_pred_8_l)[u4_intra_mode](pu1_ngbr_pels_i8, pu1_pred_mb, 0, i4_pred_strd, i4_ngbr_avbl);
+
+ /* evaluate distortion between the actual blk and the estimated blk for the given mode */
+ ime_compute_sad_8x8(pu1_mb_curr, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_partition_cost_least, &i4_partition_distortion);
+
+ i4_partition_cost = i4_partition_distortion + ((u4_estimated_intra_8x8_mode == u4_intra_mode)?u4_cost_one_bit:u4_cost_four_bits);
+
+ /* update the least cost information if necessary */
+ if (i4_partition_cost < i4_partition_cost_least)
+ {
+ i4_partition_cost_least = i4_partition_cost;
+ i4_partition_distortion_least = i4_partition_distortion;
+ u4_best_intra_8x8_mode = u4_intra_mode;
+ }
+ }
+ /* macroblock distortion */
+ i4_total_cost += i4_partition_cost_least;
+ i4_total_distortion += i4_partition_distortion_least;
+ /* mb partition mode */
+ ps_proc->au1_intra_luma_mb_8x8_modes[b8] = u4_best_intra_8x8_mode;
+
+ }
+
+ /* update the type of the mb if necessary */
+ if (i4_total_cost < ps_proc->i4_mb_cost)
+ {
+ ps_proc->i4_mb_cost = i4_total_cost;
+ ps_proc->i4_mb_distortion = i4_total_distortion;
+ ps_proc->u4_mb_type = I8x8;
+ }
+
+ return ;
+}
+
+
+/**
+******************************************************************************
+*
+* @brief
+* evaluate best intra 4x4 mode (rate distortion opt off)
+*
+* @par Description
+* This function evaluates all the possible intra 4x4 modes and finds the mode
+* that best represents the macro-block (least distortion) and occupies fewer
+* bits in the bit-stream.
+*
+* @param[in] ps_proc_ctxt
+* pointer to proc ctxt
+*
+* @remarks
+* Ideally the cost of encoding a macroblock is calculated as
+* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+* input block and the reconstructed block and rate is the number of bits taken
+* to place the macroblock in the bit-stream. In this routine the rate does not
+* exactly point to the total number of bits it takes, rather it points to header
+* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+* and residual bits fall in to texture bits the number of bits taken to encoding
+* mbtype is considered as rate, we compute cost. Further we will approximate
+* the distortion as the deviation b/w input and the predicted block as opposed
+* to input and reconstructed block.
+*
+* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
+* 24*lambda is added to the SAD before comparison with the best SAD for
+* inter prediction. This is an empirical value to prevent using too many intra
+* blocks.
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* SAD(distortion metric) of an 4x4 block */
+ WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
+
+ /* lambda */
+ UWORD32 u4_lambda = ps_proc->u4_lambda;
+
+ /* cost = distortion + lambda*rate */
+ WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
+
+ /* cost due to mbtype */
+ UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
+
+ /* intra mode */
+ UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
+
+ /* neighbor pels for intra prediction */
+ UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
+
+ /* pointer to curr partition */
+ UWORD8 *pu1_mb_curr;
+
+ /* pointer to prediction macro block */
+ UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+
+ /* neighbors left, top, top right, top left */
+ UWORD8 *pu1_mb_a;
+ UWORD8 *pu1_mb_b;
+ UWORD8 *pu1_mb_c;
+ UWORD8 *pu1_mb_d;
+
+ /* neighbor availability */
+ WORD32 i4_ngbr_avbl;
+ block_neighbors_t s_ngbr_avbl;
+
+ /* temp vars */
+ UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
+
+ /* scan order inside 4x4 block */
+ const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
+
+ /* ngbr sub mb modes */
+ UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
+ mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
+
+ /* valid intra modes map */
+ UWORD32 u4_valid_intra_modes;
+ UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
+
+ i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_c << 3);
+ memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
+
+ for (b8 = 0; b8 < 4; b8++)
+ {
+ u4_blk_x = (b8 & 0x01) << 3;
+ u4_blk_y = (b8 >> 1) << 3;
+ for (b4 = 0; b4 < 4; b4++)
+ {
+ u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
+ u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
+
+ pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
+ /* when rdopt is off, we use the input as reference for constructing prediction buffer */
+ /* as opposed to using the recon pels. (open loop intra prediction) */
+ pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */
+ pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
+ pu1_mb_c = pu1_mb_b + 4; /* pointer to top macro block */
+ pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */
+
+ /* locating neighbors that are available for prediction */
+ /* TODO : update the neighbor availability information basing on constrained intra pred information */
+ /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
+ /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
+
+ i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
+ s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
+ s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
+ s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
+ s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
+ /* set valid intra modes for evaluation */
+ u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
+
+ /* if top partition is available and top right is not available for intra prediction, then */
+ /* padd top right samples using top sample and make top right also available */
+ /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
+
+ /* gather prediction pels from the neighbors */
+ if (s_ngbr_avbl.u1_mb_a)
+ {
+ for(i = 0; i < 4; i++)
+ pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_src_strd];
+ }
+ else
+ {
+ memset(pu1_ngbr_pels_i4, 0, 4);
+ }
+
+ if (s_ngbr_avbl.u1_mb_b)
+ {
+ memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
+ }
+ else
+ {
+ memset(pu1_ngbr_pels_i4 + 5, 0, 4);
+ }
+
+ if (s_ngbr_avbl.u1_mb_d)
+ pu1_ngbr_pels_i4[4] = *pu1_mb_d;
+ else
+ pu1_ngbr_pels_i4[4] = 0;
+
+ if (s_ngbr_avbl.u1_mb_c)
+ {
+ memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
+ }
+ else if (s_ngbr_avbl.u1_mb_b)
+ {
+ memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
+ s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
+ }
+
+ i4_partition_cost_least = INT_MAX;
+
+ /* predict the intra 4x4 mode for the current partition (for evaluating cost) */
+ if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
+ {
+ u4_estimated_intra_4x4_mode = DC_I4x4;
+ }
+ else
+ {
+ UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
+ UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
+
+ if (u4_pix_x == 0)
+ {
+ if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
+ {
+ u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]];
+ }
+ else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
+ {
+ u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1];
+ }
+ }
+ else
+ {
+ u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]];
+ }
+
+ if (u4_pix_y == 0)
+ {
+ if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
+ {
+ u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]];
+ }
+ else if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
+ {
+ u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
+ }
+ }
+ else
+ {
+ u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]];
+ }
+
+ u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
+ }
+
+ ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode;
+
+ /* mode evaluation and prediction */
+ ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr,
+ pu1_ngbr_pels_i4,
+ pu1_pred_mb, i4_src_strd,
+ i4_pred_strd, i4_ngbr_avbl,
+ &u4_best_intra_4x4_mode,
+ &i4_partition_cost_least,
+ u4_valid_intra_modes,
+ u4_lambda,
+ u4_estimated_intra_4x4_mode);
+
+
+ i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode) ? u4_cost_one_bit : u4_cost_four_bits);
+
+ DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode);
+ /* macroblock distortion */
+ i4_total_distortion += i4_partition_distortion_least;
+ i4_total_cost += i4_partition_cost_least;
+ /* mb partition mode */
+ ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
+ }
+ }
+
+ /* update the type of the mb if necessary */
+ if (i4_total_cost < ps_proc->i4_mb_cost)
+ {
+ ps_proc->i4_mb_cost = i4_total_cost;
+ ps_proc->i4_mb_distortion = i4_total_distortion;
+ ps_proc->u4_mb_type = I4x4;
+ }
+
+ return ;
+}
+
+/**
+******************************************************************************
+*
+* @brief evaluate best intra 4x4 mode (rate distortion opt on)
+*
+* @par Description
+* This function evaluates all the possible intra 4x4 modes and finds the mode
+* that best represents the macro-block (least distortion) and occupies fewer
+* bits in the bit-stream.
+*
+* @param[in] ps_proc_ctxt
+* pointer to proc ctxt
+*
+* @remarks
+* Ideally the cost of encoding a macroblock is calculated as
+* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+* input block and the reconstructed block and rate is the number of bits taken
+* to place the macroblock in the bit-stream. In this routine the rate does not
+* exactly point to the total number of bits it takes, rather it points to header
+* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+* and residual bits fall in to texture bits the number of bits taken to encoding
+* mbtype is considered as rate, we compute cost. Further we will approximate
+* the distortion as the deviation b/w input and the predicted block as opposed
+* to input and reconstructed block.
+*
+* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
+* 24*lambda is added to the SAD before comparison with the best SAD for
+* inter prediction. This is an empirical value to prevent using too many intra
+* blocks.
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* SAD(distortion metric) of an 4x4 block */
+ WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
+
+ /* lambda */
+ UWORD32 u4_lambda = ps_proc->u4_lambda;
+
+ /* cost = distortion + lambda*rate */
+ WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
+
+ /* cost due to mbtype */
+ UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
+
+ /* intra mode */
+ UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
+
+ /* neighbor pels for intra prediction */
+ UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
+
+ /* pointer to curr partition */
+ UWORD8 *pu1_mb_curr;
+ UWORD8 *pu1_mb_ref_left, *pu1_mb_ref_top;
+ UWORD8 *pu1_ref_mb_intra_4x4;
+
+ /* pointer to residual macro block */
+ WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4;
+
+ /* pointer to prediction macro block */
+ UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+ WORD32 i4_ref_strd_left, i4_ref_strd_top;
+
+ /* neighbors left, top, top right, top left */
+ UWORD8 *pu1_mb_a;
+ UWORD8 *pu1_mb_b;
+ UWORD8 *pu1_mb_c;
+ UWORD8 *pu1_mb_d;
+
+ /* number of non zero coeffs*/
+ UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4;
+
+ /* quantization parameters */
+ quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
+
+ /* neighbor availability */
+ WORD32 i4_ngbr_avbl;
+ block_neighbors_t s_ngbr_avbl;
+
+ /* temp vars */
+ UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
+
+ /* scan order inside 4x4 block */
+ const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
+
+ /* ngbr sub mb modes */
+ UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
+ mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
+
+ /* valid intra modes map */
+ UWORD32 u4_valid_intra_modes;
+ UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
+
+ /* Dummy variable for 4x4 trans function */
+ WORD16 i2_dc_dummy;
+
+ /* compute ngbr availability for sub blks */
+ i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_c << 3);
+ memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
+
+ for(b8 = 0; b8 < 4; b8++)
+ {
+ u4_blk_x = (b8 & 0x01) << 3;
+ u4_blk_y = (b8 >> 1) << 3;
+ for(b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE)
+ {
+ u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
+ u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
+
+ pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4 + u4_pix_x + (u4_pix_y * i4_pred_strd);
+ pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
+ if (u4_pix_x == 0)
+ {
+ i4_ref_strd_left = ps_proc->i4_rec_strd;
+ pu1_mb_ref_left = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_left);
+ }
+ else
+ {
+ i4_ref_strd_left = i4_pred_strd;
+ pu1_mb_ref_left = pu1_ref_mb_intra_4x4;
+ }
+ if (u4_pix_y == 0)
+ {
+ i4_ref_strd_top = ps_proc->i4_rec_strd;
+ pu1_mb_ref_top = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_top);
+ }
+ else
+ {
+ i4_ref_strd_top = i4_pred_strd;
+ pu1_mb_ref_top = pu1_ref_mb_intra_4x4;
+ }
+
+ pu1_mb_a = pu1_mb_ref_left - 1; /* pointer to left macro block */
+ pu1_mb_b = pu1_mb_ref_top - i4_ref_strd_top; /* pointer to top macro block */
+ pu1_mb_c = pu1_mb_b + 4; /* pointer to top right macro block */
+ if (u4_pix_y == 0)
+ pu1_mb_d = pu1_mb_b - 1;
+ else
+ pu1_mb_d = pu1_mb_a - i4_ref_strd_left; /* pointer to top left macro block */
+
+ /* locating neighbors that are available for prediction */
+ /* TODO : update the neighbor availability information basing on constrained intra pred information */
+ /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
+ /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
+
+ i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
+ s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
+ s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
+ s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
+ s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
+ /* set valid intra modes for evaluation */
+ u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
+
+ /* if top partition is available and top right is not available for intra prediction, then */
+ /* padd top right samples using top sample and make top right also available */
+ /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
+
+ /* gather prediction pels from the neighbors */
+ if (s_ngbr_avbl.u1_mb_a)
+ {
+ for(i = 0; i < 4; i++)
+ pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_ref_strd_left];
+ }
+ else
+ {
+ memset(pu1_ngbr_pels_i4,0,4);
+ }
+ if(s_ngbr_avbl.u1_mb_b)
+ {
+ memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
+ }
+ else
+ {
+ memset(pu1_ngbr_pels_i4 + 4 + 1, 0, 4);
+ }
+ if (s_ngbr_avbl.u1_mb_d)
+ pu1_ngbr_pels_i4[4] = *pu1_mb_d;
+ else
+ pu1_ngbr_pels_i4[4] = 0;
+ if (s_ngbr_avbl.u1_mb_c)
+ {
+ memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
+ }
+ else if (s_ngbr_avbl.u1_mb_b)
+ {
+ memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
+ s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
+ }
+
+ i4_partition_cost_least = INT_MAX;
+
+ /* predict the intra 4x4 mode for the current partition (for evaluating cost) */
+ if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
+ {
+ u4_estimated_intra_4x4_mode = DC_I4x4;
+ }
+ else
+ {
+ UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
+ UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
+
+ if (u4_pix_x == 0)
+ {
+ if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
+ {
+ u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]];
+ }
+ else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
+ {
+ u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1];
+ }
+ }
+ else
+ {
+ u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]];
+ }
+
+ if (u4_pix_y == 0)
+ {
+ if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
+ {
+ u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]];
+ }
+ else if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
+ {
+ u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
+ }
+ }
+ else
+ {
+ u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]];
+ }
+
+ u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
+ }
+
+ ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode;
+
+ /*mode evaluation and prediction*/
+ ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr,
+ pu1_ngbr_pels_i4,
+ pu1_pred_mb, i4_src_strd,
+ i4_pred_strd, i4_ngbr_avbl,
+ &u4_best_intra_4x4_mode,
+ &i4_partition_cost_least,
+ u4_valid_intra_modes,
+ u4_lambda,
+ u4_estimated_intra_4x4_mode);
+
+
+ i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode)?u4_cost_one_bit:u4_cost_four_bits);
+
+ DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode);
+
+ /* macroblock distortion */
+ i4_total_distortion += i4_partition_distortion_least;
+ i4_total_cost += i4_partition_cost_least;
+
+ /* mb partition mode */
+ ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
+
+
+ /********************************************************/
+ /* error estimation, */
+ /* transform */
+ /* quantization */
+ /********************************************************/
+ ps_codec->pf_resi_trans_quant_4x4(pu1_mb_curr, pu1_pred_mb,
+ pi2_res_mb, i4_src_strd,
+ i4_pred_strd,
+ /* No op stride, this implies a buff of lenght 1x16 */
+ ps_qp_params->pu2_scale_mat,
+ ps_qp_params->pu2_thres_mat,
+ ps_qp_params->u1_qbits,
+ ps_qp_params->u4_dead_zone,
+ pu1_nnz, &i2_dc_dummy);
+
+ /********************************************************/
+ /* ierror estimation, */
+ /* itransform */
+ /* iquantization */
+ /********************************************************/
+ ps_codec->pf_iquant_itrans_recon_4x4(pi2_res_mb, pu1_pred_mb,
+ pu1_ref_mb_intra_4x4,
+ i4_pred_strd, i4_pred_strd,
+ ps_qp_params->pu2_iscale_mat,
+ ps_qp_params->pu2_weigh_mat,
+ ps_qp_params->u1_qp_div,
+ ps_proc->pv_scratch_buff, 0,
+ NULL);
+ }
+ }
+
+ /* update the type of the mb if necessary */
+ if (i4_total_cost < ps_proc->i4_mb_cost)
+ {
+ ps_proc->i4_mb_cost = i4_total_cost;
+ ps_proc->i4_mb_distortion = i4_total_distortion;
+ ps_proc->u4_mb_type = I4x4;
+ }
+
+ return ;
+}
+
+/**
+******************************************************************************
+*
+* @brief
+* evaluate best chroma intra 8x8 mode (rate distortion opt off)
+*
+* @par Description
+* This function evaluates all the possible chroma intra 8x8 modes and finds
+* the mode that best represents the macroblock (least distortion) and occupies
+* fewer bits in the bitstream.
+*
+* @param[in] ps_proc_ctxt
+* pointer to macroblock context (handle)
+*
+* @remarks
+* For chroma best intra pred mode is calculated based only on SAD
+*
+* @returns none
+*
+******************************************************************************
+*/
+
+void ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
+{
+ /* Codec Context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* SAD(distortion metric) of an 8x8 block */
+ WORD32 i4_mb_distortion, i4_chroma_mb_distortion;
+
+ /* intra mode */
+ UWORD32 u4_best_chroma_intra_8x8_mode = DC_CH_I8x8;
+
+ /* neighbor pels for intra prediction */
+ UWORD8 *pu1_ngbr_pels_c_i8x8 = ps_proc->au1_ngbr_pels;
+
+ /* pointer to curr macro block */
+ UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
+ UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma;
+
+ /* pointer to prediction macro block */
+ UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
+ UWORD8 *pu1_pred_mb_plane = ps_proc->pu1_pred_mb_intra_chroma_plane;
+
+ /* strides */
+ WORD32 i4_src_strd_c = ps_proc->i4_src_strd;
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+ WORD32 i4_rec_strd_c = ps_proc->i4_rec_strd;
+
+ /* neighbors left, top, top left */
+ UWORD8 *pu1_mb_a = pu1_ref_mb - 2;
+ UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd_c;
+ UWORD8 *pu1_mb_d = pu1_mb_b - 2;
+
+ /* neighbor availability */
+ const UWORD8 u1_valid_intra_modes[8] = {1, 3, 9, 11, 5, 7, 13, 15,};
+ WORD32 i4_ngbr_avbl;
+
+ /* valid intra modes map */
+ UWORD32 u4_valid_intra_modes;
+
+ /* temp var */
+ UWORD8 i;
+
+ /* locating neighbors that are available for prediction */
+ /* TODO : update the neighbor availability information basing on constrained intra pred information */
+ /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines
+ * basing on neighbors available and hence evade the computation of neighbor availability totally. */
+ /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */
+ i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1);
+ ps_proc->i4_chroma_neighbor_avail_8x8_mb = i4_ngbr_avbl;
+
+ /* gather prediction pels from the neighbors */
+ /* left pels */
+ if (ps_proc->ps_ngbr_avbl->u1_mb_a)
+ {
+ for (i = 0; i < 16; i += 2)
+ {
+ pu1_ngbr_pels_c_i8x8[16 - 2 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c];
+ pu1_ngbr_pels_c_i8x8[16 - 1 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c + 1];
+ }
+ }
+ else
+ {
+ ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_c_i8x8, 0, MB_SIZE);
+ }
+
+ /* top pels */
+ if (ps_proc->ps_ngbr_avbl->u1_mb_b)
+ {
+ ps_codec->pf_mem_cpy_mul8(&pu1_ngbr_pels_c_i8x8[18], pu1_mb_b, 16);
+ }
+ else
+ {
+ ps_codec->pf_mem_set_mul8((pu1_ngbr_pels_c_i8x8 + 18), 0, MB_SIZE);
+ }
+
+ /* top left pels */
+ if (ps_proc->ps_ngbr_avbl->u1_mb_d)
+ {
+ pu1_ngbr_pels_c_i8x8[16] = *pu1_mb_d;
+ pu1_ngbr_pels_c_i8x8[17] = *(pu1_mb_d + 1);
+ }
+
+ u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
+
+ if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST)
+ u4_valid_intra_modes &= ~(1 << PLANE_CH_I8x8);
+
+ i4_chroma_mb_distortion = INT_MAX;
+
+ /* perform intra mode chroma 8x8 evaluation */
+ /* intra prediction */
+ ps_codec->pf_ih264e_evaluate_intra_chroma_modes(pu1_curr_mb,
+ pu1_ngbr_pels_c_i8x8,
+ pu1_pred_mb,
+ i4_src_strd_c,
+ i4_pred_strd,
+ i4_ngbr_avbl,
+ &u4_best_chroma_intra_8x8_mode,
+ &i4_chroma_mb_distortion,
+ u4_valid_intra_modes);
+
+ if (u4_valid_intra_modes & 8)/* if Chroma PLANE is valid*/
+ {
+ (ps_codec->apf_intra_pred_c)[PLANE_CH_I8x8](pu1_ngbr_pels_c_i8x8, pu1_pred_mb_plane, 0, i4_pred_strd, i4_ngbr_avbl);
+
+ /* evaluate distortion(sad) */
+ ps_codec->pf_compute_sad_16x8(pu1_curr_mb, pu1_pred_mb_plane, i4_src_strd_c, i4_pred_strd, i4_chroma_mb_distortion, &i4_mb_distortion);
+
+ /* update the least distortion information if necessary */
+ if(i4_mb_distortion < i4_chroma_mb_distortion)
+ {
+ i4_chroma_mb_distortion = i4_mb_distortion;
+ u4_best_chroma_intra_8x8_mode = PLANE_CH_I8x8;
+ }
+ }
+
+ DEBUG("%d partition cost, %d intra mode\n", i4_chroma_mb_distortion, u4_best_chroma_intra_8x8_mode);
+
+ ps_proc->u1_c_i8_mode = u4_best_chroma_intra_8x8_mode;
+
+ return ;
+}
+
+
+/**
+******************************************************************************
+*
+* @brief
+* Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
+* prediction.
+*
+* @par Description
+* This function evaluates first three 16x16 modes and compute corresponding sad
+* and return the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] pu1_ngbr_pels_i16
+* UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_n_avblty
+* availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+* Pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+* Pointer to the variable in which minimum sad is returned
+*
+* @param[in] u4_valid_intra_modes
+* Says what all modes are valid
+*
+* @returns none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src,
+ UWORD8 *pu1_ngbr_pels_i16,
+ UWORD8 *pu1_dst,
+ UWORD32 src_strd,
+ UWORD32 dst_strd,
+ WORD32 u4_n_avblty,
+ UWORD32 *u4_intra_mode,
+ WORD32 *pu4_sadmin,
+ UWORD32 u4_valid_intra_modes)
+{
+ UWORD8 *pu1_neighbour;
+ UWORD8 *pu1_src_temp = pu1_src;
+ UWORD8 left = 0, top = 0;
+ WORD32 u4_dcval = 0;
+ WORD32 i, j;
+ WORD32 i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX,
+ i4_min_sad = INT_MAX;
+ UWORD8 val;
+
+ left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
+ top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
+
+ /* left available */
+ if (left)
+ {
+ i4_sad_horz = 0;
+
+ for (i = 0; i < 16; i++)
+ {
+ val = pu1_ngbr_pels_i16[15 - i];
+
+ u4_dcval += val;
+
+ for (j = 0; j < 16; j++)
+ {
+ i4_sad_horz += ABS(val - pu1_src_temp[j]);
+ }
+
+ pu1_src_temp += src_strd;
+ }
+ u4_dcval += 8;
+ }
+
+ pu1_src_temp = pu1_src;
+ /* top available */
+ if (top)
+ {
+ i4_sad_vert = 0;
+
+ for (i = 0; i < 16; i++)
+ {
+ u4_dcval += pu1_ngbr_pels_i16[17 + i];
+
+ for (j = 0; j < 16; j++)
+ {
+ i4_sad_vert += ABS(pu1_ngbr_pels_i16[17 + j] - pu1_src_temp[j]);
+ }
+ pu1_src_temp += src_strd;
+
+ }
+ u4_dcval += 8;
+ }
+
+ u4_dcval = (u4_dcval) >> (3 + left + top);
+
+ pu1_src_temp = pu1_src;
+
+ /* none available */
+ u4_dcval += (left == 0) * (top == 0) * 128;
+
+ i4_sad_dc = 0;
+
+ for (i = 0; i < 16; i++)
+ {
+ for (j = 0; j < 16; j++)
+ {
+ i4_sad_dc += ABS(u4_dcval - pu1_src_temp[j]);
+ }
+ pu1_src_temp += src_strd;
+ }
+
+ if ((u4_valid_intra_modes & 04) == 0)/* If DC is disabled */
+ i4_sad_dc = INT_MAX;
+
+ if ((u4_valid_intra_modes & 01) == 0)/* If VERT is disabled */
+ i4_sad_vert = INT_MAX;
+
+ if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled */
+ i4_sad_horz = INT_MAX;
+
+ i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
+
+ /* Finding Minimum sad and doing corresponding prediction */
+ if (i4_min_sad < *pu4_sadmin)
+ {
+ *pu4_sadmin = i4_min_sad;
+ if (i4_min_sad == i4_sad_vert)
+ {
+ *u4_intra_mode = VERT_I16x16;
+ pu1_neighbour = pu1_ngbr_pels_i16 + 17;
+ for (j = 0; j < 16; j++)
+ {
+ memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
+ pu1_dst += dst_strd;
+ }
+ }
+ else if (i4_min_sad == i4_sad_horz)
+ {
+ *u4_intra_mode = HORZ_I16x16;
+ for (j = 0; j < 16; j++)
+ {
+ val = pu1_ngbr_pels_i16[15 - j];
+ memset(pu1_dst, val, MB_SIZE);
+ pu1_dst += dst_strd;
+ }
+ }
+ else
+ {
+ *u4_intra_mode = DC_I16x16;
+ for (j = 0; j < 16; j++)
+ {
+ memset(pu1_dst, u4_dcval, MB_SIZE);
+ pu1_dst += dst_strd;
+ }
+ }
+ }
+ return;
+}
+
+/**
+******************************************************************************
+*
+* @brief
+* Evaluate best intra 4x4 mode and perform prediction.
+*
+* @par Description
+* This function evaluates 4x4 modes and compute corresponding sad
+* and return the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] pu1_ngbr_pels
+* UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_n_avblty
+* availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+* Pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+* Pointer to the variable in which minimum cost is returned
+*
+* @param[in] u4_valid_intra_modes
+* Says what all modes are valid
+*
+* @param[in] u4_lambda
+* Lamda value for computing cost from SAD
+*
+* @param[in] u4_predictd_mode
+* Predicted mode for cost computation
+*
+* @returns none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra_4x4_modes(UWORD8 *pu1_src,
+ UWORD8 *pu1_ngbr_pels,
+ UWORD8 *pu1_dst,
+ UWORD32 src_strd,
+ UWORD32 dst_strd,
+ WORD32 u4_n_avblty,
+ UWORD32 *u4_intra_mode,
+ WORD32 *pu4_sadmin,
+ UWORD32 u4_valid_intra_modes,
+ UWORD32 u4_lambda,
+ UWORD32 u4_predictd_mode)
+{
+ UWORD8 *pu1_src_temp = pu1_src;
+ UWORD8 *pu1_pred = pu1_ngbr_pels;
+ UWORD8 left = 0, top = 0;
+ UWORD8 u1_pred_val = 0;
+ UWORD8 u1_pred_vals[4] = {0};
+ UWORD8 *pu1_pred_val = NULL;
+ /* To store FILT121 operated values*/
+ UWORD8 u1_pred_vals_diag_121[15] = {0};
+ /* To store FILT11 operated values*/
+ UWORD8 u1_pred_vals_diag_11[15] = {0};
+ UWORD8 u1_pred_vals_vert_r[8] = {0};
+ UWORD8 u1_pred_vals_horz_d[10] = {0};
+ UWORD8 u1_pred_vals_horz_u[10] = {0};
+ WORD32 u4_dcval = 0;
+ WORD32 i4_sad[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
+ INT_MAX, INT_MAX, INT_MAX, INT_MAX};
+
+ WORD32 i4_cost[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
+ INT_MAX, INT_MAX, INT_MAX, INT_MAX};
+ WORD32 i, i4_min_cost = INT_MAX;
+
+ left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
+ top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
+
+ /* Computing SAD */
+
+ /* VERT mode valid */
+ if (u4_valid_intra_modes & 1)
+ {
+ pu1_pred = pu1_ngbr_pels + 5;
+ i4_sad[VERT_I4x4] = 0;
+ i4_cost[VERT_I4x4] = 0;
+
+ USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
+
+ i4_cost[VERT_I4x4] = i4_sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ?
+ u4_lambda : 4 * u4_lambda);
+ }
+
+ /* HORZ mode valid */
+ if (u4_valid_intra_modes & 2)
+ {
+ i4_sad[HORZ_I4x4] = 0;
+ i4_cost[HORZ_I4x4] =0;
+ pu1_src_temp = pu1_src;
+
+ u1_pred_val = pu1_ngbr_pels[3];
+
+ i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
+ + ABS(pu1_src_temp[1] - u1_pred_val)
+ + ABS(pu1_src_temp[2] - u1_pred_val)
+ + ABS(pu1_src_temp[3] - u1_pred_val);
+ pu1_src_temp += src_strd;
+
+ u1_pred_val = pu1_ngbr_pels[2];
+
+ i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
+ + ABS(pu1_src_temp[1] - u1_pred_val)
+ + ABS(pu1_src_temp[2] - u1_pred_val)
+ + ABS(pu1_src_temp[3] - u1_pred_val);
+ pu1_src_temp += src_strd;
+
+ u1_pred_val = pu1_ngbr_pels[1];
+
+ i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
+ + ABS(pu1_src_temp[1] - u1_pred_val)
+ + ABS(pu1_src_temp[2] - u1_pred_val)
+ + ABS(pu1_src_temp[3] - u1_pred_val);
+ pu1_src_temp += src_strd;
+
+ u1_pred_val = pu1_ngbr_pels[0];
+
+ i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
+ + ABS(pu1_src_temp[1] - u1_pred_val)
+ + ABS(pu1_src_temp[2] - u1_pred_val)
+ + ABS(pu1_src_temp[3] - u1_pred_val);
+
+ i4_cost[HORZ_I4x4] = i4_sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ?
+ u4_lambda : 4 * u4_lambda);
+ }
+
+ /* DC mode valid */
+ if (u4_valid_intra_modes & 4)
+ {
+ i4_sad[DC_I4x4] = 0;
+ i4_cost[DC_I4x4] = 0;
+ pu1_src_temp = pu1_src;
+
+ if (left)
+ u4_dcval = pu1_ngbr_pels[0] + pu1_ngbr_pels[1] + pu1_ngbr_pels[2]
+ + pu1_ngbr_pels[3] + 2;
+ if (top)
+ u4_dcval += pu1_ngbr_pels[5] + pu1_ngbr_pels[6] + pu1_ngbr_pels[7]
+ + pu1_ngbr_pels[8] + 2;
+
+ u4_dcval = (u4_dcval) ? (u4_dcval >> (1 + left + top)) : 128;
+
+ /* none available */
+ memset(u1_pred_vals, u4_dcval, 4);
+ USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
+ pu1_src_temp += src_strd;
+
+ i4_cost[DC_I4x4] = i4_sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ?
+ u4_lambda : 4 * u4_lambda);
+ }
+
+ /* if modes other than VERT, HORZ and DC are valid */
+ if (u4_valid_intra_modes > 7)
+ {
+ pu1_pred = pu1_ngbr_pels;
+ pu1_pred[13] = pu1_pred[14] = pu1_pred[12];
+
+ /* Performing FILT121 and FILT11 operation for all neighbour values*/
+ for (i = 0; i < 13; i++)
+ {
+ u1_pred_vals_diag_121[i] = FILT121(pu1_pred[0], pu1_pred[1], pu1_pred[2]);
+ u1_pred_vals_diag_11[i] = FILT11(pu1_pred[0], pu1_pred[1]);
+
+ pu1_pred++;
+ }
+
+ if (u4_valid_intra_modes & 8)/* DIAG_DL */
+ {
+ i4_sad[DIAG_DL_I4x4] = 0;
+ i4_cost[DIAG_DL_I4x4] = 0;
+ pu1_src_temp = pu1_src;
+ pu1_pred_val = u1_pred_vals_diag_121 + 5;
+
+ USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DL_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val + 1), i4_sad[DIAG_DL_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[DIAG_DL_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val + 3), i4_sad[DIAG_DL_I4x4]);
+ pu1_src_temp += src_strd;
+ i4_cost[DIAG_DL_I4x4] = i4_sad[DIAG_DL_I4x4] + ((u4_predictd_mode == DIAG_DL_I4x4) ?
+ u4_lambda : 4 * u4_lambda);
+ }
+
+ if (u4_valid_intra_modes & 16)/* DIAG_DR */
+ {
+ i4_sad[DIAG_DR_I4x4] = 0;
+ i4_cost[DIAG_DR_I4x4] = 0;
+ pu1_src_temp = pu1_src;
+ pu1_pred_val = u1_pred_vals_diag_121 + 3;
+
+ USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DR_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val - 1), i4_sad[DIAG_DR_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val - 2), i4_sad[DIAG_DR_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val - 3), i4_sad[DIAG_DR_I4x4]);
+ pu1_src_temp += src_strd;
+ i4_cost[DIAG_DR_I4x4] = i4_sad[DIAG_DR_I4x4] + ((u4_predictd_mode == DIAG_DR_I4x4) ?
+ u4_lambda : 4 * u4_lambda);
+
+ }
+
+ if (u4_valid_intra_modes & 32)/* VERT_R mode valid ????*/
+ {
+ i4_sad[VERT_R_I4x4] = 0;
+
+ pu1_src_temp = pu1_src;
+ u1_pred_vals_vert_r[0] = u1_pred_vals_diag_121[2];
+ memcpy((u1_pred_vals_vert_r + 1), (u1_pred_vals_diag_11 + 4), 3);
+ u1_pred_vals_vert_r[4] = u1_pred_vals_diag_121[1];
+ memcpy((u1_pred_vals_vert_r + 5), (u1_pred_vals_diag_121 + 3), 3);
+
+ pu1_pred_val = u1_pred_vals_diag_11 + 4;
+ USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
+ pu1_pred_val = u1_pred_vals_diag_121 + 3;
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (u1_pred_vals_vert_r), i4_sad[VERT_R_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (u1_pred_vals_vert_r + 4),
+ i4_sad[VERT_R_I4x4]);
+
+ i4_cost[VERT_R_I4x4] = i4_sad[VERT_R_I4x4] + ((u4_predictd_mode == VERT_R_I4x4) ?
+ u4_lambda : 4 * u4_lambda);
+ }
+
+ if (u4_valid_intra_modes & 64)/* HORZ_D mode valid ????*/
+ {
+ i4_sad[HORZ_D_I4x4] = 0;
+
+ pu1_src_temp = pu1_src;
+ u1_pred_vals_horz_d[6] = u1_pred_vals_diag_11[3];
+ memcpy((u1_pred_vals_horz_d + 7), (u1_pred_vals_diag_121 + 3), 3);
+ u1_pred_vals_horz_d[0] = u1_pred_vals_diag_11[0];
+ u1_pred_vals_horz_d[1] = u1_pred_vals_diag_121[0];
+ u1_pred_vals_horz_d[2] = u1_pred_vals_diag_11[1];
+ u1_pred_vals_horz_d[3] = u1_pred_vals_diag_121[1];
+ u1_pred_vals_horz_d[4] = u1_pred_vals_diag_11[2];
+ u1_pred_vals_horz_d[5] = u1_pred_vals_diag_121[2];
+
+ pu1_pred_val = u1_pred_vals_horz_d;
+ USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_D_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_D_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_D_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_D_I4x4]);
+
+ i4_cost[HORZ_D_I4x4] = i4_sad[HORZ_D_I4x4] + ((u4_predictd_mode == HORZ_D_I4x4) ?
+ u4_lambda : 4 * u4_lambda);
+ }
+
+ if (u4_valid_intra_modes & 128)/* VERT_L mode valid ????*/
+ {
+ i4_sad[VERT_L_I4x4] = 0;
+ pu1_src_temp = pu1_src;
+ pu1_pred_val = u1_pred_vals_diag_11 + 5;
+ USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
+ pu1_src_temp += src_strd;
+ pu1_pred_val = u1_pred_vals_diag_121 + 5;
+ USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
+ pu1_src_temp += src_strd;
+ pu1_pred_val = u1_pred_vals_diag_11 + 6;
+ USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
+ pu1_src_temp += src_strd;
+ pu1_pred_val = u1_pred_vals_diag_121 + 6;
+ USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
+
+ i4_cost[VERT_L_I4x4] = i4_sad[VERT_L_I4x4] + ((u4_predictd_mode == VERT_L_I4x4) ?
+ u4_lambda : 4 * u4_lambda);
+ }
+
+ if (u4_valid_intra_modes & 256)/* HORZ_U mode valid ????*/
+ {
+ i4_sad[HORZ_U_I4x4] = 0;
+ pu1_src_temp = pu1_src;
+ u1_pred_vals_horz_u[0] = u1_pred_vals_diag_11[2];
+ u1_pred_vals_horz_u[1] = u1_pred_vals_diag_121[1];
+ u1_pred_vals_horz_u[2] = u1_pred_vals_diag_11[1];
+ u1_pred_vals_horz_u[3] = u1_pred_vals_diag_121[0];
+ u1_pred_vals_horz_u[4] = u1_pred_vals_diag_11[0];
+ u1_pred_vals_horz_u[5] = FILT121(pu1_ngbr_pels[0], pu1_ngbr_pels[0], pu1_ngbr_pels[1]);
+
+ memset((u1_pred_vals_horz_u + 6), pu1_ngbr_pels[0], 4);
+
+ pu1_pred_val = u1_pred_vals_horz_u;
+ USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_U_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_U_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_U_I4x4]);
+ pu1_src_temp += src_strd;
+ USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_U_I4x4]);
+
+ i4_cost[HORZ_U_I4x4] = i4_sad[HORZ_U_I4x4] + ((u4_predictd_mode == HORZ_U_I4x4) ?
+ u4_lambda : 4 * u4_lambda);
+ }
+
+ i4_min_cost = MIN3(MIN3(i4_cost[0], i4_cost[1], i4_cost[2]),
+ MIN3(i4_cost[3], i4_cost[4], i4_cost[5]),
+ MIN3(i4_cost[6], i4_cost[7], i4_cost[8]));
+
+ }
+ else
+ {
+ /* Only first three modes valid */
+ i4_min_cost = MIN3(i4_cost[0], i4_cost[1], i4_cost[2]);
+ }
+
+ *pu4_sadmin = i4_min_cost;
+
+ if (i4_min_cost == i4_cost[0])
+ {
+ *u4_intra_mode = VERT_I4x4;
+ pu1_pred_val = pu1_ngbr_pels + 5;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ }
+ else if (i4_min_cost == i4_cost[1])
+ {
+ *u4_intra_mode = HORZ_I4x4;
+ memset(pu1_dst, pu1_ngbr_pels[3], 4);
+ pu1_dst += dst_strd;
+ memset(pu1_dst, pu1_ngbr_pels[2], 4);
+ pu1_dst += dst_strd;
+ memset(pu1_dst, pu1_ngbr_pels[1], 4);
+ pu1_dst += dst_strd;
+ memset(pu1_dst, pu1_ngbr_pels[0], 4);
+ }
+ else if (i4_min_cost == i4_cost[2])
+ {
+ *u4_intra_mode = DC_I4x4;
+ memset(pu1_dst, u4_dcval, 4);
+ pu1_dst += dst_strd;
+ memset(pu1_dst, u4_dcval, 4);
+ pu1_dst += dst_strd;
+ memset(pu1_dst, u4_dcval, 4);
+ pu1_dst += dst_strd;
+ memset(pu1_dst, u4_dcval, 4);
+ }
+
+ else if (i4_min_cost == i4_cost[3])
+ {
+ *u4_intra_mode = DIAG_DL_I4x4;
+ pu1_pred_val = u1_pred_vals_diag_121 + 5;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val + 1), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val + 2), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val + 3), 4);
+ }
+ else if (i4_min_cost == i4_cost[4])
+ {
+ *u4_intra_mode = DIAG_DR_I4x4;
+ pu1_pred_val = u1_pred_vals_diag_121 + 3;
+
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val - 1), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val - 2), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val - 3), 4);
+ }
+
+ else if (i4_min_cost == i4_cost[5])
+ {
+ *u4_intra_mode = VERT_R_I4x4;
+ pu1_pred_val = u1_pred_vals_diag_11 + 4;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ pu1_pred_val = u1_pred_vals_diag_121 + 3;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (u1_pred_vals_vert_r), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (u1_pred_vals_vert_r + 4), 4);
+ }
+ else if (i4_min_cost == i4_cost[6])
+ {
+ *u4_intra_mode = HORZ_D_I4x4;
+ pu1_pred_val = u1_pred_vals_horz_d;
+ memcpy(pu1_dst, (pu1_pred_val + 6), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val + 4), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val + 2), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ }
+ else if (i4_min_cost == i4_cost[7])
+ {
+ *u4_intra_mode = VERT_L_I4x4;
+ pu1_pred_val = u1_pred_vals_diag_11 + 5;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ pu1_pred_val = u1_pred_vals_diag_121 + 5;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ pu1_pred_val = u1_pred_vals_diag_11 + 6;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ pu1_pred_val = u1_pred_vals_diag_121 + 6;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ }
+ else if (i4_min_cost == i4_cost[8])
+ {
+ *u4_intra_mode = HORZ_U_I4x4;
+ pu1_pred_val = u1_pred_vals_horz_u;
+ memcpy(pu1_dst, (pu1_pred_val), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val + 2), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val + 4), 4);
+ pu1_dst += dst_strd;
+ memcpy(pu1_dst, (pu1_pred_val + 6), 4);
+ pu1_dst += dst_strd;
+ }
+
+ return;
+}
+
+/**
+******************************************************************************
+*
+* @brief:
+* Evaluate best intr chroma mode (among VERT, HORZ and DC ) and do the prediction.
+*
+* @par Description
+* This function evaluates first three intra chroma modes and compute corresponding sad
+* and return the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] pu1_ngbr_pels
+* UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_n_avblty
+* availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+* Pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+* Pointer to the variable in which minimum sad is returned
+*
+* @param[in] u4_valid_intra_modes
+* Says what all modes are valid
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src,
+ UWORD8 *pu1_ngbr_pels,
+ UWORD8 *pu1_dst,
+ UWORD32 src_strd,
+ UWORD32 dst_strd,
+ WORD32 u4_n_avblty,
+ UWORD32 *u4_intra_mode,
+ WORD32 *pu4_sadmin,
+ UWORD32 u4_valid_intra_modes)
+{
+ UWORD8 *pu1_neighbour;
+ UWORD8 *pu1_src_temp = pu1_src;
+ UWORD8 left = 0, top = 0;
+ WORD32 u4_dcval_u_l[2] = { 0, 0 }, /*sum left neighbours for 'U' ,two separate sets - sum of first four from top,and sum of four values from bottom */
+ u4_dcval_u_t[2] = { 0, 0 }; /*sum top neighbours for 'U'*/
+
+ WORD32 u4_dcval_v_l[2] = { 0, 0 }, /*sum left neighbours for 'V'*/
+ u4_dcval_v_t[2] = { 0, 0 }; /*sum top neighbours for 'V'*/
+
+ WORD32 i, j, row, col, i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX,
+ i4_sad_dc = INT_MAX, i4_min_sad = INT_MAX;
+ UWORD8 val_u, val_v;
+
+ WORD32 u4_dc_val[2][2][2];/* -----------
+ | | | Chroma can have four
+ | 00 | 01 | separate dc value...
+ ----------- u4_dc_val corresponds to this dc values
+ | | | with u4_dc_val[2][2][U] and u4_dc_val[2][2][V]
+ | 10 | 11 |
+ ----------- */
+ left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
+ top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
+
+ /*Evaluating HORZ*/
+ if (left)/* Ifleft available*/
+ {
+ i4_sad_horz = 0;
+
+ for (i = 0; i < 8; i++)
+ {
+ val_v = pu1_ngbr_pels[15 - 2 * i];
+ val_u = pu1_ngbr_pels[15 - 2 * i - 1];
+ row = i / 4;
+ u4_dcval_u_l[row] += val_u;
+ u4_dcval_v_l[row] += val_v;
+ for (j = 0; j < 8; j++)
+ {
+ i4_sad_horz += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for HORZ mode*/
+ i4_sad_horz += ABS(val_v - pu1_src_temp[2 * j + 1]);
+ }
+
+ pu1_src_temp += src_strd;
+ }
+ u4_dcval_u_l[0] += 2;
+ u4_dcval_u_l[1] += 2;
+ u4_dcval_v_l[0] += 2;
+ u4_dcval_v_l[1] += 2;
+ }
+
+ /*Evaluating VERT**/
+ pu1_src_temp = pu1_src;
+ if (top) /* top available*/
+ {
+ i4_sad_vert = 0;
+
+ for (i = 0; i < 8; i++)
+ {
+ col = i / 4;
+
+ val_u = pu1_ngbr_pels[18 + i * 2];
+ val_v = pu1_ngbr_pels[18 + i * 2 + 1];
+ u4_dcval_u_t[col] += val_u;
+ u4_dcval_v_t[col] += val_v;
+
+ for (j = 0; j < 16; j++)
+ {
+ i4_sad_vert += ABS(pu1_ngbr_pels[18 + j] - pu1_src_temp[j]);/* Finding SAD for VERT mode*/
+ }
+ pu1_src_temp += src_strd;
+
+ }
+ u4_dcval_u_t[0] += 2;
+ u4_dcval_u_t[1] += 2;
+ u4_dcval_v_t[0] += 2;
+ u4_dcval_v_t[1] += 2;
+ }
+
+ /* computing DC value*/
+ /* Equation 8-128 in spec*/
+ u4_dc_val[0][0][0] = (u4_dcval_u_l[0] + u4_dcval_u_t[0]) >> (1 + left + top);
+ u4_dc_val[0][0][1] = (u4_dcval_v_l[0] + u4_dcval_v_t[0]) >> (1 + left + top);
+ u4_dc_val[1][1][0] = (u4_dcval_u_l[1] + u4_dcval_u_t[1]) >> (1 + left + top);
+ u4_dc_val[1][1][1] = (u4_dcval_v_l[1] + u4_dcval_v_t[1]) >> (1 + left + top);
+
+ if (top)
+ {
+ /* Equation 8-132 in spec*/
+ u4_dc_val[0][1][0] = (u4_dcval_u_t[1]) >> (1 + top);
+ u4_dc_val[0][1][1] = (u4_dcval_v_t[1]) >> (1 + top);
+ }
+ else
+ {
+ u4_dc_val[0][1][0] = (u4_dcval_u_l[0]) >> (1 + left);
+ u4_dc_val[0][1][1] = (u4_dcval_v_l[0]) >> (1 + left);
+ }
+
+ if (left)
+ {
+ u4_dc_val[1][0][0] = (u4_dcval_u_l[1]) >> (1 + left);
+ u4_dc_val[1][0][1] = (u4_dcval_v_l[1]) >> (1 + left);
+ }
+ else
+ {
+ u4_dc_val[1][0][0] = (u4_dcval_u_t[0]) >> (1 + top);
+ u4_dc_val[1][0][1] = (u4_dcval_v_t[0]) >> (1 + top);
+ }
+
+ if (!(left || top))
+ {
+ /*none available*/
+ u4_dc_val[0][0][0] = u4_dc_val[0][0][1] =
+ u4_dc_val[0][1][0] = u4_dc_val[0][1][1] =
+ u4_dc_val[1][0][0] = u4_dc_val[1][0][1] =
+ u4_dc_val[1][1][0] = u4_dc_val[1][1][1] = 128;
+ }
+
+ /* Evaluating DC */
+ pu1_src_temp = pu1_src;
+ i4_sad_dc = 0;
+ for (i = 0; i < 8; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ col = j / 4;
+ row = i / 4;
+ val_u = u4_dc_val[row][col][0];
+ val_v = u4_dc_val[row][col][1];
+
+ i4_sad_dc += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for DC mode*/
+ i4_sad_dc += ABS(val_v - pu1_src_temp[2 * j + 1]);
+ }
+ pu1_src_temp += src_strd;
+ }
+
+ if ((u4_valid_intra_modes & 01) == 0)/* If DC is disabled*/
+ i4_sad_dc = INT_MAX;
+ if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled*/
+ i4_sad_horz = INT_MAX;
+ if ((u4_valid_intra_modes & 04) == 0)/* If VERT is disabled*/
+ i4_sad_vert = INT_MAX;
+
+ i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
+
+ /* Finding Minimum sad and doing corresponding prediction*/
+ if (i4_min_sad < *pu4_sadmin)
+ {
+ *pu4_sadmin = i4_min_sad;
+
+ if (i4_min_sad == i4_sad_dc)
+ {
+ *u4_intra_mode = DC_CH_I8x8;
+ for (i = 0; i < 8; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ col = j / 4;
+ row = i / 4;
+
+ pu1_dst[2 * j] = u4_dc_val[row][col][0];
+ pu1_dst[2 * j + 1] = u4_dc_val[row][col][1];
+ }
+ pu1_dst += dst_strd;
+ }
+ }
+ else if (i4_min_sad == i4_sad_horz)
+ {
+ *u4_intra_mode = HORZ_CH_I8x8;
+ for (j = 0; j < 8; j++)
+ {
+ val_v = pu1_ngbr_pels[15 - 2 * j];
+ val_u = pu1_ngbr_pels[15 - 2 * j - 1];
+
+ for (i = 0; i < 8; i++)
+ {
+ pu1_dst[2 * i] = val_u;
+ pu1_dst[2 * i + 1] = val_v;
+
+ }
+ pu1_dst += dst_strd;
+ }
+ }
+ else
+ {
+ *u4_intra_mode = VERT_CH_I8x8;
+ pu1_neighbour = pu1_ngbr_pels + 18;
+ for (j = 0; j < 8; j++)
+ {
+ memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
+ pu1_dst += dst_strd;
+ }
+ }
+ }
+
+ return;
+}
diff --git a/encoder/ih264e_intra_modes_eval.h b/encoder/ih264e_intra_modes_eval.h
new file mode 100755
index 0000000..c8402e5
--- /dev/null
+++ b/encoder/ih264e_intra_modes_eval.h
@@ -0,0 +1,418 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_intra_modes_eval.h
+*
+* @brief
+* This file contains declarations of routines that perform rate distortion
+* analysis on a macroblock if coded as intra.
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_INTRA_MODES_EVAL_H_
+#define IH264E_INTRA_MODES_EVAL_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief
+* derivation process for macroblock availability
+*
+* @par Description
+* Calculates the availability of the left, top, topright and topleft macroblocks.
+*
+* @param[in] ps_proc_ctxt
+* pointer to proc context (handle)
+*
+* @remarks Based on section 6.4.5 in H264 spec
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_derive_nghbr_avbl_of_mbs
+ (
+ process_ctxt_t *ps_proc_ctxt
+ );
+
+/**
+******************************************************************************
+*
+* @brief
+* derivation process for subblock/partition availability
+*
+* @par Description
+* Calculates the availability of the left, top, topright and topleft subblock
+* or partitions.
+*
+* @param[in] ps_proc_ctxt
+* pointer to macroblock context (handle)
+*
+* @param[in] i1_pel_pos_x
+* column position of the pel wrt the current block
+*
+* @param[in] i1_pel_pos_y
+* row position of the pel in wrt current block
+*
+* @remarks Assumptions: before calling this function it is assumed that
+* the neighbor availability of the current macroblock is already derived.
+* Based on table 6-3 of H264 specification
+*
+* @return availability status (yes or no)
+*
+******************************************************************************
+*/
+UWORD8 ih264e_derive_ngbr_avbl_of_mb_partitions
+ (
+ block_neighbors_t *s_ngbr_avbl,
+ WORD8 i1_pel_pos_x,
+ WORD8 i1_pel_pos_y
+ );
+
+/**
+******************************************************************************
+*
+* @brief
+* evaluate best intra 16x16 mode (rate distortion opt off)
+*
+* @par Description
+* This function evaluates all the possible intra 16x16 modes and finds the mode
+* that best represents the macro-block (least distortion) and occupies fewer
+* bits in the bit-stream.
+*
+* @param[in] ps_proc_ctxt
+* pointer to process context (handle)
+*
+* @remarks
+* Ideally the cost of encoding a macroblock is calculated as
+* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+* input block and the reconstructed block and rate is the number of bits taken
+* to place the macroblock in the bit-stream. In this routine the rate does not
+* exactly point to the total number of bits it takes, rather it points to header
+* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+* and residual bits fall in to texture bits the number of bits taken to encoding
+* mbtype is considered as rate, we compute cost. Further we will approximate
+* the distortion as the deviation b/w input and the predicted block as opposed
+* to input and reconstructed block.
+*
+* NOTE: As per the Document JVT-O079, for intra 16x16 macroblock,
+* the SAD and cost are one and the same.
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff
+ (
+ process_ctxt_t *ps_proc_ctxt
+ );
+
+/**
+******************************************************************************
+*
+* @brief
+* evaluate best intra 8x8 mode (rate distortion opt on)
+*
+* @par Description
+* This function evaluates all the possible intra 8x8 modes and finds the mode
+* that best represents the macro-block (least distortion) and occupies fewer
+* bits in the bit-stream.
+*
+* @param[in] ps_proc_ctxt
+* pointer to proc ctxt
+*
+* @remarks Ideally the cost of encoding a macroblock is calculated as
+* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+* input block and the reconstructed block and rate is the number of bits taken
+* to place the macroblock in the bit-stream. In this routine the rate does not
+* exactly point to the total number of bits it takes, rather it points to header
+* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+* and residual bits fall in to texture bits the number of bits taken to encoding
+* mbtype is considered as rate, we compute cost. Further we will approximate
+* the distortion as the deviation b/w input and the predicted block as opposed
+* to input and reconstructed block.
+*
+* NOTE: TODO: This function needs to be tested
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff
+ (
+ process_ctxt_t *ps_proc_ctxt
+ );
+
+/**
+******************************************************************************
+*
+* @brief
+* evaluate best intra 4x4 mode (rate distortion opt on)
+*
+* @par Description
+* This function evaluates all the possible intra 4x4 modes and finds the mode
+* that best represents the macro-block (least distortion) and occupies fewer
+* bits in the bit-stream.
+*
+* @param[in] ps_proc_ctxt
+* pointer to proc ctxt
+*
+* @remarks
+* Ideally the cost of encoding a macroblock is calculated as
+* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+* input block and the reconstructed block and rate is the number of bits taken
+* to place the macroblock in the bit-stream. In this routine the rate does not
+* exactly point to the total number of bits it takes, rather it points to header
+* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+* and residual bits fall in to texture bits the number of bits taken to encoding
+* mbtype is considered as rate, we compute cost. Further we will approximate
+* the distortion as the deviation b/w input and the predicted block as opposed
+* to input and reconstructed block.
+*
+* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
+* 24*lambda is added to the SAD before comparison with the best SAD for
+* inter prediction. This is an empirical value to prevent using too many intra
+* blocks.
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton
+ (
+ process_ctxt_t *ps_proc_ctxt
+ );
+
+/**
+******************************************************************************
+*
+* @brief
+* evaluate best intra 4x4 mode (rate distortion opt off)
+*
+* @par Description
+* This function evaluates all the possible intra 4x4 modes and finds the mode
+* that best represents the macro-block (least distortion) and occupies fewer
+* bits in the bit-stream.
+*
+* @param[in] ps_proc_ctxt
+* pointer to proc ctxt
+*
+* @remarks
+* Ideally the cost of encoding a macroblock is calculated as
+* (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
+* input block and the reconstructed block and rate is the number of bits taken
+* to place the macroblock in the bit-stream. In this routine the rate does not
+* exactly point to the total number of bits it takes, rather it points to header
+* bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
+* and residual bits fall in to texture bits the number of bits taken to encoding
+* mbtype is considered as rate, we compute cost. Further we will approximate
+* the distortion as the deviation b/w input and the predicted block as opposed
+* to input and reconstructed block.
+*
+* NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
+* 24*lambda is added to the SAD before comparison with the best SAD for
+* inter prediction. This is an empirical value to prevent using too many intra
+* blocks.
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff
+ (
+ process_ctxt_t *ps_proc_ctxt
+ );
+
+/**
+******************************************************************************
+*
+* @brief
+* evaluate best chroma intra 8x8 mode (rate distortion opt off)
+*
+* @par Description
+* This function evaluates all the possible chroma intra 8x8 modes and finds
+* the mode that best represents the macroblock (least distortion) and occupies
+* fewer bits in the bitstream.
+*
+* @param[in] ps_proc_ctxt
+* pointer to macroblock context (handle)
+*
+* @remarks
+* For chroma best intra pred mode is calculated based only on SAD
+*
+* @returns none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff
+ (
+ process_ctxt_t *ps_proc_ctxt
+ );
+
+
+/**
+******************************************************************************
+*
+* @brief
+* Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
+* prediction.
+*
+* @par Description
+* This function evaluates first three 16x16 modes and compute corresponding sad
+* and return the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] pu1_ngbr_pels_i16
+* UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_n_avblty
+* availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+* Pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+* Pointer to the variable in which minimum sad is returned
+*
+* @param[in] u4_valid_intra_modes
+* Says what all modes are valid
+*
+* @returns none
+*
+******************************************************************************
+*/
+typedef void ih264e_evaluate_intra_modes_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_ngbr_pels_i16,
+ UWORD8 *pu1_dst,
+ UWORD32 src_strd,
+ UWORD32 dst_strd,
+ WORD32 u4_n_avblty,
+ UWORD32 *u4_intra_mode,
+ WORD32 *pu4_sadmin,
+ UWORD32 u4_valid_intra_modes);
+
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra16x16_modes;
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra_chroma_modes;
+
+/* assembly */
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra16x16_modes_a9q;
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra_chroma_modes_a9q;
+
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra16x16_modes_av8;
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra_chroma_modes_av8;
+
+/* x86 intrinsics */
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra16x16_modes_ssse3;
+ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra_chroma_modes_ssse3;
+
+/**
+******************************************************************************
+*
+* @brief
+* Evaluate best intra 4x4 mode and perform prediction.
+*
+* @par Description
+* This function evaluates 4x4 modes and compute corresponding sad
+* and return the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] pu1_ngbr_pels
+* UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_n_avblty
+* availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+* Pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+* Pointer to the variable in which minimum cost is returned
+*
+* @param[in] u4_valid_intra_modes
+* Says what all modes are valid
+*
+* @param[in] u4_lambda
+* Lamda value for computing cost from SAD
+*
+* @param[in] u4_predictd_mode
+* Predicted mode for cost computation
+*
+* @returns none
+*
+******************************************************************************
+*/
+typedef void ih264e_evaluate_intra_4x4_modes_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_ngbr_pels,
+ UWORD8 *pu1_dst,
+ UWORD32 src_strd,
+ UWORD32 dst_strd,
+ WORD32 u4_n_avblty,
+ UWORD32 *u4_intra_mode,
+ WORD32 *pu4_sadmin,
+ UWORD32 u4_valid_intra_modes,
+ UWORD32 u4_lambda,
+ UWORD32 u4_predictd_mode);
+
+ih264e_evaluate_intra_4x4_modes_ft ih264e_evaluate_intra_4x4_modes;
+
+/* x86 intrinsics */
+ih264e_evaluate_intra_4x4_modes_ft ih264e_evaluate_intra_4x4_modes_ssse3;
+
+/* assembly */
+ih264e_evaluate_intra_4x4_modes_ft ih264e_evaluate_intra_4x4_modes_a9q;
+ih264e_evaluate_intra_4x4_modes_ft ih264e_evaluate_intra_4x4_modes_av8;
+
+#endif /* IH264E_INTRA_MODES_EVAL_H_ */
diff --git a/encoder/ih264e_list.h b/encoder/ih264e_list.h
new file mode 100755
index 0000000..782c007
--- /dev/null
+++ b/encoder/ih264e_list.h
@@ -0,0 +1,42 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_list.h
+*
+* @brief
+* The file contains declarations of functions for encoder queue management
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_LIST_H_
+#define IH264E_LIST_H_
+
+
+#endif /* IH264E_LIST_H_ */
diff --git a/encoder/ih264e_master.h b/encoder/ih264e_master.h
new file mode 100755
index 0000000..6c7505a
--- /dev/null
+++ b/encoder/ih264e_master.h
@@ -0,0 +1,132 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_master.h
+*
+* @brief
+* Contains declarations of functions used by master thread
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_MASTER_H_
+#define IH264E_MASTER_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief
+* This function joins all the spawned threads after successful completion of
+* their tasks
+*
+* @par Description
+*
+* @param[in] ps_codec
+* pointer to codec context
+*
+* @returns none
+*
+******************************************************************************
+*/
+void ih264e_join_threads(codec_t *ps_codec);
+
+/**
+******************************************************************************
+*
+* @brief This function puts the current thread to sleep for a duration
+* of sleep_us
+*
+* @par Description
+* ithread_yield() method causes the calling thread to yield execution to another
+* thread that is ready to run on the current processor. The operating system
+* selects the thread to yield to. ithread_usleep blocks the current thread for
+* the specified number of milliseconds. In other words, yield just says,
+* end my timeslice prematurely, look around for other threads to run. If there
+* is nothing better than me, continue. Sleep says I don't want to run for x
+* milliseconds. Even if no other thread wants to run, don't make me run.
+*
+* @param[in] sleep_us
+* thread sleep duration
+*
+* @returns error_status
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_wait_for_thread(UWORD32 sleep_us);
+
+/**
+******************************************************************************
+*
+* @brief
+* Encodes in synchronous api mode
+*
+* @par Description
+* This routine processes input yuv, encodes it and outputs bitstream and recon
+*
+* @param[in] ps_codec_obj
+* Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+* Pointer to input argument structure
+*
+* @param[out] pv_api_op
+* Pointer to output argument structure
+*
+* @returns Status
+*
+******************************************************************************
+*/
+WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op);
+
+/**
+*******************************************************************************
+*
+* @brief update encoder configuration parameters
+*
+* @par Description:
+* updates encoder configuration parameters from the given config set.
+* Initialize/reinitialize codec parameters according to new configurations.
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @param[in] ps_cfg
+* Pointer to config param set
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_codec_update_config(codec_t *ps_codec, cfg_params_t *ps_cfg);
+
+#endif /* IH264E_MASTER_H_ */
diff --git a/encoder/ih264e_mc.c b/encoder/ih264e_mc.c
new file mode 100755
index 0000000..2dd0974
--- /dev/null
+++ b/encoder/ih264e_mc.c
@@ -0,0 +1,320 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_mc.c
+*
+* @brief
+* Contains definition of functions for motion compensation
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_motion_comp_luma()
+* - ih264e_motion_comp_chroma()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_defs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_structs.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_mc.h"
+#include "ih264e_half_pel.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief
+* performs motion compensation for a luma mb for the given mv.
+*
+* @par Description
+* This routine performs motion compensation of an inter mb. When the inter
+* mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer
+* to pred buffer. In this case the function returns pointer and stride of the
+* ref. buffer and this info is used in place of pred buffer else where.
+* In other cases, the pred buffer is populated via copy / filtering + copy
+* (q pel cases) and returned.
+*
+* @param[in] ps_proc
+* pointer to current proc ctxt
+*
+* @param[out] pu1_pseudo_pred
+* pseudo prediction buffer
+*
+* @param[out] u4_pseudo_pred_strd
+* pseudo pred buffer stride
+*
+* @return none
+*
+* @remarks Assumes half pel buffers for the entire frame are populated.
+*
+******************************************************************************
+*/
+void ih264e_motion_comp_luma(process_ctxt_t *ps_proc,
+ UWORD8 **pu1_pseudo_pred,
+ WORD32 *pi4_pseudo_pred_strd)
+{
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* me ctxt */
+ me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
+
+ /* Pointer to the structure having motion vectors, size and position of curr partitions */
+ enc_pu_t *ps_curr_pu;
+
+ /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer */
+ UWORD8 *pu1_ref[4];
+
+ /* pred buffer ptr */
+ UWORD8 *pu1_pred;
+
+ /* strides of full pel, half pel x, half pel y, half pel xy reference buffer */
+ WORD32 i4_ref_strd[4];
+
+ /* pred buffer stride */
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+
+ /* full pel motion vectors */
+ WORD32 u4_mv_x_full, u4_mv_y_full;
+
+ /* half pel motion vectors */
+ WORD32 u4_mv_x_hpel, u4_mv_y_hpel;
+
+ /* quarter pel motion vectors */
+ WORD32 u4_mv_x_qpel, u4_mv_y_qpel;
+
+ /* width & height of the partition */
+ UWORD32 wd, ht;
+
+ /* partition idx */
+ UWORD32 u4_num_prtn;
+
+ /* half / qpel coefficient */
+ UWORD32 u4_subpel_factor;
+
+ /* temp var */
+ UWORD32 u4_lkup_idx1;
+
+ /* Init */
+ i4_ref_strd[0] = ps_proc->i4_rec_strd;
+
+ i4_ref_strd[1] = i4_ref_strd[2] = i4_ref_strd[3] = ps_me_ctxt->u4_hp_buf_strd;
+
+ for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++)
+ {
+ /* update ptr to curr partition */
+ ps_curr_pu = ps_proc->ps_pu + u4_num_prtn;
+
+
+ /* get full pel mv's (full pel units) */
+ u4_mv_x_full = ps_curr_pu->s_l0_mv.i2_mvx >> 2;
+ u4_mv_y_full = ps_curr_pu->s_l0_mv.i2_mvy >> 2;
+
+ /* get half pel mv's */
+ u4_mv_x_hpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x2) >> 1;
+ u4_mv_y_hpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x2) >> 1;
+
+ /* get quarter pel mv's */
+ u4_mv_x_qpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x1);
+ u4_mv_y_qpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x1);
+
+ /* width and height of partition */
+ wd = (ps_curr_pu->b4_wd + 1) << 2;
+ ht = (ps_curr_pu->b4_ht + 1) << 2;
+
+ /* decision ? qpel/hpel, fpel */
+ u4_subpel_factor = (u4_mv_y_hpel << 3) + (u4_mv_x_hpel << 2) + (u4_mv_y_qpel << 1) + (u4_mv_x_qpel);
+
+ /* update ref buffer ptrs */
+ pu1_ref[0] = ps_proc->pu1_ref_buf_luma + (u4_mv_y_full * i4_ref_strd[0]) + u4_mv_x_full;
+
+ pu1_ref[1] = ps_proc->pu1_best_subpel_buf;
+ i4_ref_strd[1] = ps_proc->u4_bst_spel_buf_strd;
+
+
+ /* update pred buff ptr */
+ pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd + 4 * ps_curr_pu->b4_pos_x;
+
+ /*u4_lkup_idx1 will be non zero for half pel*/
+ u4_lkup_idx1 = (u4_subpel_factor >> 2 ) != 0 ;
+
+ {
+ /********************************************************************/
+ /* if the block is P16x16 MB and mv are not quarter pel motion */
+ /* vectors, there is no need to copy 16x16 unit from reference frame*/
+ /* to pred buffer. We might as well send the reference frame buffer */
+ /* pointer as pred buffer (ofc with updated stride) to fwd transform*/
+ /* and inverse transform unit. */
+ /********************************************************************/
+ if (ps_proc->u4_num_sub_partitions == 1)
+ {
+ *pu1_pseudo_pred = pu1_ref[u4_lkup_idx1];
+ *pi4_pseudo_pred_strd = i4_ref_strd[u4_lkup_idx1];
+
+ }
+ /*
+ * Copying half pel or full pel to prediction buffer
+ * Currently ps_proc->u4_num_sub_partitions will always be 1 as we only support 16x16 in P mbs
+ */
+ else
+ {
+ ps_codec->pf_inter_pred_luma_copy(pu1_ref[u4_lkup_idx1], pu1_pred, i4_ref_strd[u4_lkup_idx1], i4_pred_strd, ht, wd, NULL, 0);
+ }
+
+ }
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief
+* performs motion compensation for chroma mb
+*
+* @par Description
+* Copies a MB of data from the reference buffer (Full pel, half pel or q pel)
+* according to the motion vectors given
+*
+* @param[in] ps_proc
+* pointer to current proc ctxt
+*
+* @return none
+*
+* @remarks Assumes half pel and quarter pel buffers for the entire frame are
+* populated.
+******************************************************************************
+*/
+void ih264e_motion_comp_chroma(process_ctxt_t *ps_proc)
+{
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* Pointer to the structure having motion vectors, size and position of curr partitions */
+ enc_pu_t *ps_curr_pu;
+
+ /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer */
+ UWORD8 *pu1_ref;
+
+ /* pred buffer ptr */
+ UWORD8 *pu1_pred;
+
+ /* strides of full pel reference buffer */
+ WORD32 i4_ref_strd = ps_proc->i4_rec_strd;
+
+ /* pred buffer stride */
+ WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
+
+ /* full pel motion vectors */
+ WORD32 u4_mv_x_full, u4_mv_y_full;
+
+ /* half pel motion vectors */
+ WORD32 u4_mv_x_hpel, u4_mv_y_hpel;
+
+ /* quarter pel motion vectors */
+ WORD32 u4_mv_x_qpel, u4_mv_y_qpel;
+
+ /* width & height of the partition */
+ UWORD32 wd, ht;
+
+ /* partition idx */
+ UWORD32 u4_num_prtn;
+
+ WORD32 u4_mv_x;
+ WORD32 u4_mv_y;
+ UWORD8 u1_dx, u1_dy;
+
+ for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++)
+ {
+ ps_curr_pu =ps_proc->ps_pu + u4_num_prtn;
+
+ u4_mv_x = ps_curr_pu->s_l0_mv.i2_mvx >> 3;
+ u4_mv_y = ps_curr_pu->s_l0_mv.i2_mvy >> 3;
+
+ /* corresponds to full pel motion vector in luma, but in chroma corresponds to pel formed with dx, dy =4*/
+ u4_mv_x_full = (ps_curr_pu->s_l0_mv.i2_mvx & 0x4) >> 2;
+ u4_mv_y_full = (ps_curr_pu->s_l0_mv.i2_mvy & 0x4) >> 2;
+
+ /* get half pel mv's */
+ u4_mv_x_hpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x2) >> 1;
+ u4_mv_y_hpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x2) >> 1;
+
+ /* get quarter pel mv's */
+ u4_mv_x_qpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x1);
+ u4_mv_y_qpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x1);
+
+ /* width and height of sub macro block */
+ wd = (ps_curr_pu->b4_wd + 1) << 1;
+ ht = (ps_curr_pu->b4_ht + 1) << 1;
+
+ /* move the pointers so that they point to the motion compensated locations */
+ pu1_ref = ps_proc->pu1_ref_buf_chroma + (u4_mv_y * i4_ref_strd) + (u4_mv_x << 1);
+
+ pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd + 2 * ps_curr_pu->b4_pos_x;
+
+ u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel);
+ u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel);
+
+ ps_codec->pf_inter_pred_chroma(pu1_ref, pu1_pred, i4_ref_strd, i4_pred_strd,
+ u1_dx, u1_dy, ht, wd);
+ }
+}
diff --git a/encoder/ih264e_mc.h b/encoder/ih264e_mc.h
new file mode 100755
index 0000000..965e1d1
--- /dev/null
+++ b/encoder/ih264e_mc.h
@@ -0,0 +1,104 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_mc.h
+*
+* @brief
+* This file contains declarations of routines that perform motion compensation
+* of luma and chroma macroblocks.
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_MC_H_
+#define IH264E_MC_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief
+* performs motion compensation for a luma mb for the given mv.
+*
+* @par Description
+* This routine performs motion compensation of an inter mb. When the inter
+* mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer
+* to pred buffer. In this case the function returns pointer and stride of the
+* ref. buffer and this info is used in place of pred buffer else where.
+* In other cases, the pred buffer is populated via copy / filtering + copy
+* (q pel cases) and returned.
+*
+* @param[in] ps_proc
+* pointer to current proc ctxt
+*
+* @param[out] pu1_pseudo_pred
+* pseudo prediction buffer
+*
+* @param[out] u4_pseudo_pred_strd
+* pseudo pred buffer stride
+*
+* @return none
+*
+* @remarks Assumes half pel buffers for the entire frame are populated.
+*
+******************************************************************************
+*/
+void ih264e_motion_comp_luma(process_ctxt_t *ps_proc,
+ UWORD8 **pu1_pseudo_pred,
+ WORD32 *pi4_pseudo_pred_strd);
+
+/**
+******************************************************************************
+*
+* @brief
+* performs motion compensation for chroma mb
+*
+* @par Description
+* Copies a MB of data from the reference buffer (Full pel, half pel or q pel)
+* according to the motion vectors given
+*
+* @param[in] ps_proc
+* pointer to current proc ctxt
+*
+* @return none
+*
+* @remarks Assumes half pel and quarter pel buffers for the entire frame are
+* populated.
+******************************************************************************
+*/
+void ih264e_motion_comp_chroma
+ (
+ process_ctxt_t *ps_proc
+ );
+
+
+#endif // IH264E_MC_H_
diff --git a/encoder/ih264e_me.c b/encoder/ih264e_me.c
new file mode 100755
index 0000000..9e8d7a3
--- /dev/null
+++ b/encoder/ih264e_me.c
@@ -0,0 +1,1153 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_me.c
+ *
+ * @brief
+ * Contains definition of functions for motion estimation
+ *
+ * @author
+ * ittiam
+ *
+ * @par List of Functions:
+ * - ih264e_init_mv_bits()
+ * - ih264e_skip_analysis_chroma()
+ * - ih264e_skip_analysis_luma()
+ * - ih264e_analyse_skip()
+ * - ih264e_get_search_candidates()
+ * - ih264e_find_skip_motion_vector()
+ * - ih264e_get_mv_predictor()
+ * - ih264e_mv_pred()
+ * - ih264e_mv_pred_me()
+ * - ih264e_init_me()
+ * - ih264e_compute_me()
+ * - ih264e_compute_me_nmb()
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ithread.h"
+#include "ih264_platform_macros.h"
+#include "ih264_defs.h"
+#include "ime_defs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_globals.h"
+#include "ih264_macros.h"
+#include "ih264e_me.h"
+#include "ime.h"
+#include "ime_distortion_metrics.h"
+#include "ih264_debug.h"
+#include "ithread.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264e_core_coding.h"
+#include "ih264e_mc.h"
+#include "ih264e_debug.h"
+#include "ih264e_half_pel.h"
+#include "ime_statistics.h"
+#include "ih264e_platform_macros.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function populates the length of the codewords for motion vectors in the
+* range (-search range, search range) in pixels
+*
+* @param[in] ps_me
+* Pointer to me ctxt
+*
+* @param[out] pu1_mv_bits
+* length of the codeword for all mv's
+*
+* @remarks The length of the code words are derived from signed exponential
+* goloumb codes.
+*
+*******************************************************************************
+*/
+void ih264e_init_mv_bits(me_ctxt_t *ps_me_ctxt)
+{
+ /* temp var */
+ WORD32 i, codesize = 3, diff, limit;
+ UWORD32 u4_code_num, u4_range;
+ UWORD32 u4_uev_min, u4_uev_max, u4_sev_min, u4_sev_max;
+
+ /* max srch range */
+ diff = MAX(DEFAULT_MAX_SRCH_RANGE_X, DEFAULT_MAX_SRCH_RANGE_Y);
+ /* sub pel */
+ diff <<= 2;
+ /* delta mv */
+ diff <<= 1;
+
+ /* codeNum for positive integer = 2x-1 : Table9-3 */
+ u4_code_num = (diff << 1);
+
+ /* get range of the bit string and put using put_bits() */
+ GETRANGE(u4_range, u4_code_num);
+
+ limit = 2*u4_range - 1;
+
+ /* init mv bits */
+ ps_me_ctxt->pu1_mv_bits[0] = 1;
+
+ while (codesize < limit)
+ {
+ u4_uev_min = (1 << (codesize >> 1));
+ u4_uev_max = 2*u4_uev_min - 1;
+
+ u4_sev_min = u4_uev_min >> 1;
+ u4_sev_max = u4_uev_max >> 1;
+
+ DEBUG("\n%d min, %d max %d codesize", u4_sev_min, u4_sev_max, codesize);
+
+ for (i = u4_sev_min; i <= (WORD32)u4_sev_max; i++)
+ {
+ ps_me_ctxt->pu1_mv_bits[-i] = ps_me_ctxt->pu1_mv_bits[i] = codesize;
+ }
+
+ codesize += 2;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Determines the valid candidates for which the initial search shall happen.
+* The best of these candidates is used to center the diamond pixel search.
+*
+* @par Description: The function sends the skip, (0,0), left, top and top-right
+* neighbouring MBs MVs. The left, top and top-right MBs MVs are used because
+* these are the same MVs that are used to form the MV predictor. This initial MV
+* search candidates need not take care of slice boundaries and hence neighbor
+* availability checks are not made here.
+*
+* @param[in] ps_left_mb_pu
+* pointer to left mb motion vector info
+*
+* @param[in] ps_top_mb_pu
+* pointer to top & top right mb motion vector info
+*
+* @param[in] ps_top_left_mb_pu
+* pointer to top left mb motion vector info
+*
+* @param[out] ps_skip_mv
+* pointer to skip motion vectors for the curr mb
+*
+* @param[in] i4_mb_x
+* mb index x
+*
+* @param[in] i4_mb_y
+* mb index y
+*
+* @param[in] i4_wd_mbs
+* pic width in mbs
+*
+* @param[in] ps_motionEst
+* pointer to me context
+*
+* @returns The list of MVs to be used of priming the full pel search and the
+* number of such MVs
+*
+* @remarks
+* Assumptions : 1. Assumes Single reference frame
+* 2. Assumes Only partition of size 16x16
+*
+*******************************************************************************
+*/
+static void ih264e_get_search_candidates(process_ctxt_t *ps_proc,
+ me_ctxt_t *ps_me_ctxt)
+{
+ /* curr mb indices */
+ WORD32 i4_mb_x = ps_proc->i4_mb_x;
+
+ /* left mb motion vector */
+ mv_t *ps_left_mv;
+
+ /* top left mb motion vector */
+ mv_t *ps_top_mv;
+
+ /* top left mb motion vector */
+ mv_t *ps_top_left_mv;
+
+ /* top left mb motion vector */
+ mv_t *ps_top_right_mv;
+
+ /* skip mv */
+ mv_t *ps_skip_mv = ps_proc->ps_skip_mv;
+
+ /* mb part info */
+ mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+
+ /* num of candidate search candidates */
+ UWORD32 u4_num_candidates = 0;
+
+ /* mvs */
+ WORD32 mvx, mvy;
+
+ /* ngbr availability */
+ block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
+
+ /* srch range*/
+ WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
+ WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
+ WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
+ WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
+
+ ps_left_mv = &ps_proc->s_left_mb_pu_ME.s_l0_mv;
+ ps_top_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x)->s_l0_mv;
+ ps_top_left_mv = &ps_proc->s_top_left_mb_pu_ME.s_l0_mv;
+ ps_top_right_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->s_l0_mv;
+
+ /************************************************************/
+ /* Taking the Zero motion vector as one of the candidates */
+ /************************************************************/
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = 0;
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = 0;
+
+ u4_num_candidates++;
+
+ /************************************************************/
+ /* Taking the Left MV Predictor as one of the candidates */
+ /************************************************************/
+ if (ps_ngbr_avbl->u1_mb_a)
+ {
+ mvx = (ps_left_mv->i2_mvx + 2) >> 2;
+ mvy = (ps_left_mv->i2_mvy + 2) >> 2;
+
+ mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
+ mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
+
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx;
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy;
+
+ u4_num_candidates ++;
+ }
+ /*else
+ {
+ ps_me_ctxt->as_mv_init_search[LEFT_CAND].i2_mvx = 0;
+ ps_me_ctxt->as_mv_init_search[LEFT_CAND].i2_mvy = 0;
+ }*/
+
+ /************************************************************/
+ /* Taking the Top MV Predictor as one of the candidates */
+ /************************************************************/
+ if (ps_ngbr_avbl->u1_mb_b)
+ {
+ mvx = (ps_top_mv->i2_mvx + 2) >> 2;
+ mvy = (ps_top_mv->i2_mvy + 2) >> 2;
+
+ mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
+ mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
+
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx;
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy;
+
+ u4_num_candidates ++;
+
+ /************************************************************/
+ /* Taking the TopRt MV Predictor as one of the candidates */
+ /************************************************************/
+ if (ps_ngbr_avbl->u1_mb_c)
+ {
+ mvx = (ps_top_right_mv->i2_mvx + 2) >> 2;
+ mvy = (ps_top_right_mv->i2_mvy + 2)>> 2;
+
+ mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
+ mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
+
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx;
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy;
+
+ u4_num_candidates ++;
+ }
+ /************************************************************/
+ /* Taking the TopLt MV Predictor as one of the candidates */
+ /************************************************************/
+ else if (ps_ngbr_avbl->u1_mb_d)
+ {
+ mvx = (ps_top_left_mv->i2_mvx + 2) >> 2;
+ mvy = (ps_top_left_mv->i2_mvy + 2) >> 2;
+
+ mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
+ mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
+
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx;
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy;
+
+ u4_num_candidates ++;
+ }
+ /*else
+ {
+ ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvx = 0;
+ ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvy = 0;
+ }*/
+ }
+ /*else
+ {
+ ps_me_ctxt->as_mv_init_search[TOP_CAND].i2_mvx = 0;
+ ps_me_ctxt->as_mv_init_search[TOP_CAND].i2_mvy = 0;
+
+ ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvx = 0;
+ ps_me_ctxt->as_mv_init_search[TOPR_CAND].i2_mvy = 0;
+ }*/
+
+
+ /********************************************************************/
+ /* MV Prediction */
+ /********************************************************************/
+ ih264e_mv_pred_me(ps_proc);
+
+ ps_mb_part->s_mv_pred.i2_mvx = ps_proc->ps_pred_mv->i2_mvx;
+ ps_mb_part->s_mv_pred.i2_mvy = ps_proc->ps_pred_mv->i2_mvy;
+
+ /************************************************************/
+ /* Get the skip motion vector */
+ /************************************************************/
+ ih264e_find_skip_motion_vector(ps_proc, 1);
+
+ /************************************************************/
+ /* Taking the Skip motion vector as one of the candidates */
+ /************************************************************/
+ mvx = (ps_skip_mv->i2_mvx + 2) >> 2;
+ mvy = (ps_skip_mv->i2_mvy + 2) >> 2;
+
+ mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
+ mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
+
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvx = mvx;
+ ps_me_ctxt->as_mv_init_search[u4_num_candidates].i2_mvy = mvy;
+
+ u4_num_candidates++;
+
+ ASSERT(u4_num_candidates <= 5);
+
+ ps_me_ctxt->u4_num_candidates = u4_num_candidates;
+}
+
+/**
+*******************************************************************************
+*
+* @brief The function gives the skip motion vector
+*
+* @par Description:
+* The function gives the skip motion vector
+*
+* @param[in] ps_left_mb_pu
+* pointer to left mb motion vector info
+*
+* @param[in] ps_top_row_pu
+* pointer to top & top right mb motion vector info
+*
+* @param[out] ps_pred_mv
+* pointer to candidate predictors for the current block
+*
+* @returns The x & y components of the MV predictor.
+*
+* @remarks The code implements the logic as described in sec 8.4.1.1 in H264
+* specification.
+*
+*******************************************************************************
+*/
+void ih264e_find_skip_motion_vector(process_ctxt_t *ps_proc, UWORD32 u4_for_me)
+{
+ /* left mb motion vector */
+ enc_pu_t *ps_left_mb_pu ;
+
+ /* top mb motion vector */
+ enc_pu_t *ps_top_mb_pu ;
+
+ /* skip mv */
+ mv_t *ps_skip_mv = ps_proc->ps_skip_mv;
+
+ if (u4_for_me == 1)
+ {
+ ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
+ ps_top_mb_pu = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
+ }
+ else
+ {
+ ps_left_mb_pu = &ps_proc->s_left_mb_pu ;
+ ps_top_mb_pu = ps_proc->ps_top_row_pu + ps_proc->i4_mb_x;
+ }
+
+ if ( (!ps_proc->ps_ngbr_avbl->u1_mb_a) ||
+ (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
+ ((ps_left_mb_pu->i1_l0_ref_idx | ps_left_mb_pu->s_l0_mv.i2_mvx | ps_left_mb_pu->s_l0_mv.i2_mvy) == 0) ||
+ ((ps_top_mb_pu->i1_l0_ref_idx | ps_top_mb_pu->s_l0_mv.i2_mvx | ps_top_mb_pu->s_l0_mv.i2_mvy) == 0) )
+ {
+ ps_skip_mv->i2_mvx = 0;
+ ps_skip_mv->i2_mvy = 0;
+ }
+ else
+ {
+ ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv->i2_mvx;
+ ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv->i2_mvy;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief motion vector predictor
+*
+* @par Description:
+* The routine calculates the motion vector predictor for a given block,
+* given the candidate MV predictors.
+*
+* @param[in] ps_left_mb_pu
+* pointer to left mb motion vector info
+*
+* @param[in] ps_top_row_pu
+* pointer to top & top right mb motion vector info
+*
+* @param[out] ps_pred_mv
+* pointer to candidate predictors for the current block
+*
+* @returns The x & y components of the MV predictor.
+*
+* @remarks The code implements the logic as described in sec 8.4.1.3 in H264
+* specification.
+* Assumptions : 1. Assumes Single reference frame
+* 2. Assumes Only partition of size 16x16
+*
+*******************************************************************************
+*/
+void ih264e_get_mv_predictor(enc_pu_t *ps_left_mb_pu,
+ enc_pu_t *ps_top_row_pu,
+ mv_t *ps_pred_mv)
+{
+ /* curr frame ref idx */
+ /* we are assuming that we are operating on single reference frame
+ * hence the ref idx is insignificant during mv prediction.
+ */
+ WORD32 u4_ref_idx = 0;
+
+ /* temp var */
+ WORD32 pred_algo = 3, a, b, c;
+
+ /* If only one of the candidate blocks has a reference frame equal to
+ * the current block then use the same block as the final predictor */
+ a = (ps_left_mb_pu->i1_l0_ref_idx == u4_ref_idx)? 0:-1;
+ b = (ps_top_row_pu[0].i1_l0_ref_idx == u4_ref_idx)? 0:-1;
+ c = (ps_top_row_pu[1].i1_l0_ref_idx == u4_ref_idx)? 0:-1;
+
+ if (a == 0 && b == -1 && c == -1)
+ pred_algo = 0; /* LEFT */
+ else if (a == -1 && b == 0 && c == -1)
+ pred_algo = 1; /* TOP */
+ else if (a == -1 && b == -1 && c == 0)
+ pred_algo = 2; /* TOP RIGHT */
+
+ switch (pred_algo)
+ {
+ case 0:
+ /* left */
+ ps_pred_mv->i2_mvx = ps_left_mb_pu->s_l0_mv.i2_mvx;
+ ps_pred_mv->i2_mvy = ps_left_mb_pu->s_l0_mv.i2_mvy;
+ break;
+ case 1:
+ /* top */
+ ps_pred_mv->i2_mvx = ps_top_row_pu[0].s_l0_mv.i2_mvx;
+ ps_pred_mv->i2_mvy = ps_top_row_pu[0].s_l0_mv.i2_mvy;
+ break;
+ case 2:
+ /* top right */
+ ps_pred_mv->i2_mvx = ps_top_row_pu[1].s_l0_mv.i2_mvx;
+ ps_pred_mv->i2_mvy = ps_top_row_pu[1].s_l0_mv.i2_mvy;
+ break;
+ case 3:
+ /* median */
+ MEDIAN(ps_left_mb_pu->s_l0_mv.i2_mvx,
+ ps_top_row_pu[0].s_l0_mv.i2_mvx,
+ ps_top_row_pu[1].s_l0_mv.i2_mvx,
+ ps_pred_mv->i2_mvx);
+ MEDIAN(ps_left_mb_pu->s_l0_mv.i2_mvy,
+ ps_top_row_pu[0].s_l0_mv.i2_mvy,
+ ps_top_row_pu[1].s_l0_mv.i2_mvy,
+ ps_pred_mv->i2_mvy);
+
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs MV prediction
+*
+* @par Description:
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns none
+*
+* @remarks none
+* This function will update the MB availability since intra inter decision
+* should be done before the call
+*
+*******************************************************************************
+*/
+void ih264e_mv_pred(process_ctxt_t *ps_proc)
+{
+
+ /* left mb motion vector */
+ enc_pu_t *ps_left_mb_pu ;
+
+ /* top left mb motion vector */
+ enc_pu_t *ps_top_left_mb_pu ;
+
+ /* top row motion vector info */
+ enc_pu_t *ps_top_row_pu;
+
+ /* predicted motion vector */
+ mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
+
+ /* zero mv */
+ mv_t zero_mv = {0, 0};
+
+ /* mb neighbor availability */
+ block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
+
+ /* mb syntax elements of neighbors */
+ mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
+ mb_info_t *ps_top_left_syn;
+ UWORD32 u4_left_is_intra;
+
+ ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ele);
+ u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
+ ps_left_mb_pu = &ps_proc->s_left_mb_pu;
+ ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
+ ps_top_row_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x);
+
+ /* Before performing mv prediction prepare the ngbr information and
+ * reset motion vectors basing on their availability */
+ if (!ps_ngbr_avbl->u1_mb_a || (u4_left_is_intra == 1) )
+ {
+ /* left mv */
+ ps_left_mb_pu->i1_l0_ref_idx = -1;
+ ps_left_mb_pu->s_l0_mv = zero_mv;
+ }
+ if (!ps_ngbr_avbl->u1_mb_b || ps_top_syn->u2_is_intra)
+ {
+ /* top mv */
+ ps_top_row_pu[0].i1_l0_ref_idx = -1;
+ ps_top_row_pu[0].s_l0_mv = zero_mv;
+ }
+ if (!ps_ngbr_avbl->u1_mb_c)
+ {
+ /* top right mv - When top right partition is not available for
+ * prediction if top left is available use it for prediction else
+ * set the mv information to -1 and (0, 0)
+ * */
+ if (!ps_ngbr_avbl->u1_mb_d || ps_top_left_syn->u2_is_intra)
+ {
+ ps_top_row_pu[1].i1_l0_ref_idx = -1;
+ ps_top_row_pu[1].s_l0_mv = zero_mv;
+ }
+ else
+ {
+ ps_top_row_pu[1].i1_l0_ref_idx = ps_top_left_mb_pu->i1_l0_ref_idx;
+ ps_top_row_pu[1].s_l0_mv = ps_top_left_mb_pu->s_l0_mv;
+ }
+ }
+ else if (ps_top_syn[1].u2_is_intra)
+ {
+ ps_top_row_pu[1].i1_l0_ref_idx = -1;
+ ps_top_row_pu[1].s_l0_mv = zero_mv;
+ }
+
+ ih264e_get_mv_predictor(ps_left_mb_pu, ps_top_row_pu, ps_pred_mv);
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function approximates Pred. MV
+*
+* @par Description:
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns none
+*
+* @remarks none
+* Motion estimation happens at nmb level. For cost calculations, mv is appro
+* ximated using this function
+*
+*******************************************************************************
+*/
+void ih264e_mv_pred_me(process_ctxt_t *ps_proc)
+{
+ /* left mb motion vector */
+ enc_pu_t *ps_left_mb_pu ;
+
+ /* top left mb motion vector */
+ enc_pu_t *ps_top_left_mb_pu ;
+
+ /* top row motion vector info */
+ enc_pu_t *ps_top_row_pu;
+
+ enc_pu_t s_top_row_pu[2];
+
+ /* predicted motion vector */
+ mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
+
+ /* zero mv */
+ mv_t zero_mv = {0, 0};
+
+ /* mb neighbor availability */
+ block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
+
+ ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
+ ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME;
+ ps_top_row_pu = (ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x);
+
+ s_top_row_pu[0] = ps_top_row_pu[0];
+ s_top_row_pu[1] = ps_top_row_pu[1];
+
+ /* Before performing mv prediction prepare the ngbr information and
+ * reset motion vectors basing on their availability */
+ if (!ps_ngbr_avbl->u1_mb_a )
+ {
+ /* left mv */
+ ps_left_mb_pu->i1_l0_ref_idx = -1;
+ ps_left_mb_pu->s_l0_mv = zero_mv;
+ }
+ if (!ps_ngbr_avbl->u1_mb_b )
+ {
+ /* top mv */
+ s_top_row_pu[0].i1_l0_ref_idx = -1;
+ s_top_row_pu[0].s_l0_mv = zero_mv;
+ }
+ if (!ps_ngbr_avbl->u1_mb_c)
+ {
+ /* top right mv - When top right partition is not available for
+ * prediction if top left is available use it for prediction else
+ * set the mv information to -1 and (0, 0)
+ * */
+ if (!ps_ngbr_avbl->u1_mb_d)
+ {
+ s_top_row_pu[1].i1_l0_ref_idx = -1;
+ s_top_row_pu[1].s_l0_mv = zero_mv;
+ }
+ else
+ {
+ s_top_row_pu[1].i1_l0_ref_idx = ps_top_left_mb_pu->i1_l0_ref_idx;
+ s_top_row_pu[1].s_l0_mv = ps_top_left_mb_pu->s_l0_mv;
+ }
+ }
+
+ ih264e_get_mv_predictor(ps_left_mb_pu, &(s_top_row_pu[0]), ps_pred_mv);
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function initializes me ctxt
+*
+* @par Description:
+* Before dispatching the current job to me thread, the me context associated
+* with the job is initialized.
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_me(process_ctxt_t *ps_proc)
+{
+ /* me ctxt */
+ me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
+
+ /* src ptr */
+ ps_me_ctxt->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma;
+
+ /* ref ptr */
+ ps_me_ctxt->pu1_ref_buf_luma = ps_proc->pu1_ref_buf_luma;
+
+ /* lagrange param */
+ ps_me_ctxt->u4_lambda_motion = gu1_qp0[ps_me_ctxt->u1_mb_qp];
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs motion estimation for the current mb
+*
+* @par Description:
+* The current mb is compared with a list of mb's in the reference frame for
+* least cost. The mb that offers least cost is chosen as predicted mb and the
+* displacement of the predicted mb from index location of the current mb is
+* signaled as mv. The list of the mb's that are chosen in the reference frame
+* are dependent on the speed of the ME configured.
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns motion vector of the pred mb, sad, cost.
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_compute_me(process_ctxt_t *ps_proc)
+{
+ /* me ctxt */
+ me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
+
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+// /* mb syntax elements of neighbors */
+// mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
+// mb_info_t *ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ME);
+
+ /* mb part info */
+ mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+ mb_part_ctxt skip_mb_part_info;
+
+ /* temp var */
+ WORD32 rows_above, rows_below, columns_left, columns_right,u4_use_stat_sad;
+
+ /* Motion vectors in full-pel units */
+ WORD16 mv_x, mv_y;
+
+ /* recon stride */
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+ /* source buffer for halp pel generation functions */
+ UWORD8 *pu1_hpel_src;
+
+ /* quantization parameters */
+ quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
+
+ /* Sad therholds */
+ ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
+
+ /*Best half pel buffer*/
+ UWORD8 *pu1_best_subpel_buf = ps_proc->pu1_best_subpel_buf;
+ UWORD32 u4_bst_spel_strd = ps_proc->u4_bst_spel_buf_strd;
+
+ /* During evaluation for motion vectors do not search through padded regions */
+ /* Obtain number of rows and columns that are effective for computing for me evaluation */
+ rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
+ rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
+ columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
+ columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
+
+ /* init srch range */
+ /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2
+ * on all sides.
+ */
+// ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, ps_me_ctxt->ai2_srch_boundaries[0]);
+// ps_me_ctxt->i4_srch_range_e = MIN(columns_right, ps_me_ctxt->ai2_srch_boundaries[0]);
+// ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, ps_me_ctxt->ai2_srch_boundaries[1]);
+// ps_me_ctxt->i4_srch_range_s = MIN(rows_below, ps_me_ctxt->ai2_srch_boundaries[1]);
+
+ ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
+ ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
+ ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
+ ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
+
+ /* this is to facilitate fast sub pel computation with minimal loads */
+ if (ps_me_ctxt->u4_enable_hpel)
+ {
+ ps_me_ctxt->i4_srch_range_w += 1;
+ ps_me_ctxt->i4_srch_range_e -= 1;
+ ps_me_ctxt->i4_srch_range_n += 1;
+ ps_me_ctxt->i4_srch_range_s -= 1;
+ }
+
+ /*Initialize the min sad option*/
+ ps_me_ctxt->u4_min_sad_reached = 0; /*Not yet found min sad*/
+ ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
+
+ /************************************************************/
+ /* Get the seed motion vector candidates */
+ /************************************************************/
+ ih264e_get_search_candidates(ps_proc, ps_me_ctxt);
+
+ /************************************************************/
+ /* Init the MB part ctxt structure */
+ /************************************************************/
+ ps_mb_part->s_mv_curr.i2_mvx = 0;
+ ps_mb_part->s_mv_curr.i2_mvy = 0;
+ ps_mb_part->i4_mb_cost = INT_MAX;
+ ps_mb_part->i4_mb_distortion = INT_MAX;
+
+ /* With NMB changes this logic will not work as we cannot exit NME in between*/
+ /********************************************************************/
+ /* Analyse skip */
+ /********************************************************************/
+// if (ps_proc->ps_codec->s_cfg.u4_enable_satqd == 0
+// && u4_frame_level_me == 0)
+// {
+// if ( (ps_proc->ps_ngbr_avbl->u1_mb_a && (ps_me_ctxt->u4_left_is_skip == 1)) ||
+// (ps_proc->ps_ngbr_avbl->u1_mb_b && ps_top_syn->u2_mb_type == PSKIP) ||
+// (ps_proc->ps_ngbr_avbl->u1_mb_d && ps_top_left_syn->u2_mb_type == PSKIP) )
+// {
+// if ( 0 == ih264e_analyse_skip(ps_proc, ps_me_ctxt) )
+// {
+// return;
+// }
+// }
+// }
+
+ /********************************************************************/
+ /* compute skip cost */
+ /********************************************************************/
+ /* See if we need to use modified sad */
+ u4_use_stat_sad = (ps_proc->ps_codec->s_cfg.u4_enable_satqd == 1);
+
+ /* init the cost of skip MB */
+ skip_mb_part_info.i4_mb_cost = INT_MAX;
+ ime_compute_skip_cost(ps_me_ctxt, ps_proc->ps_skip_mv, &skip_mb_part_info, u4_use_stat_sad);
+
+
+ if (ps_me_ctxt->u4_min_sad_reached == 0)
+ {
+ /************************************************************/
+ /* Evaluate search candidates for initial mv pt. */
+ /************************************************************/
+ ime_evaluate_init_srchposn_16x16(ps_me_ctxt);
+
+ /********************************************************************/
+ /* full pel motion estimation */
+ /********************************************************************/
+ ime_full_pel_motion_estimation_16x16(ps_me_ctxt);
+
+ DEBUG_MV_HISTOGRAM_ADD((ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx >> 2),
+ (ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy >> 2));
+
+ DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 1);
+ /********************************************************************/
+ /* sub pel motion estimation */
+ /********************************************************************/
+ if (ps_me_ctxt->u4_enable_hpel)
+ {
+ /* motion vectors in terms of full pel values */
+ mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2;
+ mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2;
+
+ /* moving src pointer to the converged motion vector location*/
+ pu1_hpel_src = ps_me_ctxt->pu1_ref_buf_luma + mv_x + (mv_y * i4_rec_strd);
+
+ ps_me_ctxt->pu1_half_x = ps_proc->pu1_half_x;
+ ps_me_ctxt->pu1_half_y = ps_proc->pu1_half_y;
+ ps_me_ctxt->pu1_half_xy = ps_proc->pu1_half_xy;
+ ps_me_ctxt->u4_hp_buf_strd = HP_BUFF_WD;
+
+ /* half pel search is done for both sides of full pel,
+ * hence half_x of width x height = 17x16 is created
+ * starting from left half_x of converged full pel */
+ pu1_hpel_src -= 1;
+
+ /* computing half_x */
+ ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src,
+ ps_proc->pu1_half_x,
+ i4_rec_strd,
+ ps_me_ctxt->u4_hp_buf_strd);
+
+ /*
+ * Halfpel search is done for both sides of full pel,
+ * hence half_y of width x height = 16x17 is created
+ * starting from top half_y of converged full pel
+ * for half_xy top_left is required
+ * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1
+ */
+
+ pu1_hpel_src -= i4_rec_strd;
+
+ /* computing half_y , and half_xy*/
+ ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
+ pu1_hpel_src, ps_proc->pu1_half_y,
+ ps_proc->pu1_half_xy, i4_rec_strd,
+ ps_me_ctxt->u4_hp_buf_strd, ps_proc->ai16_pred1 + 3,
+ ps_me_ctxt->u4_hp_buf_strd);
+
+ ime_sub_pel_motion_estimation_16x16(ps_me_ctxt);
+ }
+ }
+
+ {
+
+ /* if skip gives a better cost than other search, copy the cost accordingly*/
+ if (skip_mb_part_info.i4_mb_cost < ps_mb_part->i4_mb_cost)
+ {
+ ps_mb_part->i4_mb_cost = skip_mb_part_info.i4_mb_cost;
+ ps_mb_part->i4_mb_distortion = skip_mb_part_info.i4_mb_distortion;
+ ps_mb_part->s_mv_curr.i2_mvx = skip_mb_part_info.s_mv_curr.i2_mvx;
+ ps_mb_part->s_mv_curr.i2_mvy = skip_mb_part_info.s_mv_curr.i2_mvy;
+ }
+ else
+ {
+ /*
+ * If the current MB has a sub pel component,
+ * we need to copy that to the best subpel buffer
+ */
+ if (ps_me_ctxt->u4_enable_hpel && ps_mb_part->pu1_best_hpel_buf)
+ {
+ ps_codec->pf_inter_pred_luma_copy(ps_mb_part->pu1_best_hpel_buf,
+ pu1_best_subpel_buf,
+ ps_me_ctxt->u4_hp_buf_strd,
+ u4_bst_spel_strd, MB_SIZE,
+ MB_SIZE, NULL, 0);
+ }
+ }
+ }
+
+ DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 0);
+
+ /* update the type of the mb if necessary */
+ if (ps_me_ctxt->s_mb_part.i4_mb_cost < ps_proc->ps_cur_mb->i4_mb_cost)
+ {
+ /* mb cost */
+ ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->s_mb_part.i4_mb_cost;
+
+ /* mb distortion */
+ ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->s_mb_part.i4_mb_distortion;
+
+ /* mb type */
+ ps_proc->ps_cur_mb->u4_mb_type = P16x16;
+ }
+
+ /* number of partitions */
+ ps_proc->u4_num_sub_partitions = 1;
+ *(ps_proc->pu4_mb_pu_cnt) = 1;
+
+ /* position in-terms of PU */
+ ps_proc->ps_pu->b4_pos_x = 0;
+ ps_proc->ps_pu->b4_pos_y = 0;
+
+ /* PU size */
+ ps_proc->ps_pu->b4_wd = 3;
+ ps_proc->ps_pu->b4_ht = 3;
+
+ /* ref idx */
+ ps_proc->ps_pu->i1_l0_ref_idx = 0;
+
+ /* motion vector L0 */
+ ps_proc->ps_pu->s_l0_mv.i2_mvx = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx;
+ ps_proc->ps_pu->s_l0_mv.i2_mvy = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy;
+
+ /* Update min sad conditions */
+ if (ps_me_ctxt->u4_min_sad_reached == 1)
+ {
+ ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
+ ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs motion estimation for the current NMB
+*
+* @par Description:
+* Intializes input and output pointers required by the function ih264e_compute_me
+* and calls the function ih264e_compute_me in a loop to process NMBs.
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count)
+{
+ /* pic pu */
+ enc_pu_t *ps_pu_begin = ps_proc->ps_pu;
+
+ /* ME map */
+ UWORD8 *pu1_me_map = ps_proc->pu1_me_map + (ps_proc->i4_mb_y * ps_proc->i4_wd_mbs);
+
+ /* temp var */
+ UWORD32 u4_i;
+
+ ps_proc->s_me_ctxt.u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
+ ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->s_left_mb_syntax_ele.u2_mb_type == PSKIP);
+
+ for (u4_i = 0; u4_i < u4_nmb_count; u4_i++)
+ {
+ /* Wait for ME map */
+ if (ps_proc->i4_mb_y > 0)
+ {
+ /* Wait for top right ME to be done */
+ UWORD8 *pu1_me_map_tp_rw = ps_proc->pu1_me_map + (ps_proc->i4_mb_y - 1) * ps_proc->i4_wd_mbs;
+
+ while (1)
+ {
+ volatile UWORD8 *pu1_buf;
+ WORD32 idx = ps_proc->i4_mb_x + u4_i + 1;
+
+ idx = MIN(idx, (ps_proc->i4_wd_mbs - 1));
+ pu1_buf = pu1_me_map_tp_rw + idx;
+ if(*pu1_buf)
+ break;
+ ithread_yield();
+ }
+ }
+
+ ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_i].s_skip_mv);
+ ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_i].s_ngbr_avbl);
+ ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_i].s_pred_mv);
+
+ ps_proc->ps_cur_mb = &(ps_proc->ps_nmb_info[u4_i]);
+
+ ps_proc->ps_cur_mb->u4_min_sad = ps_proc->u4_min_sad;
+ ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
+
+ ps_proc->ps_cur_mb->i4_mb_cost = INT_MAX;
+ ps_proc->ps_cur_mb->i4_mb_distortion = SHRT_MAX;
+
+ /* Set the best subpel buf to the correct mb so that the buffer can be copied */
+ ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_i].pu1_best_sub_pel_buf;
+ ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_i].u4_bst_spel_buf_strd;
+
+ /* Set the min sad conditions */
+ ps_proc->ps_cur_mb->u4_min_sad = ps_proc->ps_codec->u4_min_sad;
+ ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
+
+ /* Derive neighbor availability for the current macroblock */
+ ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
+
+ /* init me */
+ ih264e_init_me(ps_proc);
+
+ ih264e_compute_me(ps_proc);
+
+ /* update top and left structs */
+ {
+ mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
+ mb_info_t *ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ME);
+ enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
+ enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME;
+ enc_pu_t *ps_top_mv = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
+
+ *ps_top_left_syn = *ps_top_syn;
+
+ *ps_top_left_mb_pu = *ps_top_mv;
+ *ps_left_mb_pu = *ps_proc->ps_pu;
+ }
+
+ ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
+
+ /* Copy the min sad reached info */
+ ps_proc->ps_nmb_info[u4_i].u4_min_sad_reached = ps_proc->ps_cur_mb->u4_min_sad_reached;
+ ps_proc->ps_nmb_info[u4_i].u4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
+
+ /*
+ * To make sure that the MV map is properly sync to the
+ * cache we need to do a DDB
+ */
+ {
+ DATA_SYNC();
+
+ pu1_me_map[ps_proc->i4_mb_x] = 1;
+ }
+ ps_proc->i4_mb_x++;
+
+ ps_proc->s_me_ctxt.u4_left_is_intra = 0;
+ ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->ps_cur_mb->u4_mb_type == PSKIP);
+
+ /* update buffers pointers */
+ ps_proc->pu1_src_buf_luma += MB_SIZE;
+ ps_proc->pu1_rec_buf_luma += MB_SIZE;
+ ps_proc->pu1_ref_buf_luma += MB_SIZE;
+
+ /*
+ * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
+ * the stride per MB is MB_SIZE
+ */
+ ps_proc->pu1_src_buf_chroma += MB_SIZE;
+ ps_proc->pu1_rec_buf_chroma += MB_SIZE;
+ ps_proc->pu1_ref_buf_chroma += MB_SIZE;
+
+ ps_proc->pu4_mb_pu_cnt += 1;
+ }
+
+
+ ps_proc->ps_pu = ps_pu_begin;
+ ps_proc->i4_mb_x = ps_proc->i4_mb_x - u4_nmb_count;
+
+ /* update buffers pointers */
+ ps_proc->pu1_src_buf_luma -= MB_SIZE * u4_nmb_count;
+ ps_proc->pu1_rec_buf_luma -= MB_SIZE * u4_nmb_count;
+ ps_proc->pu1_ref_buf_luma -= MB_SIZE * u4_nmb_count;
+
+ /*
+ * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
+ * the stride per MB is MB_SIZE
+ */
+ ps_proc->pu1_src_buf_chroma -= MB_SIZE * u4_nmb_count;
+ ps_proc->pu1_rec_buf_chroma -= MB_SIZE * u4_nmb_count;
+ ps_proc->pu1_ref_buf_chroma -= MB_SIZE * u4_nmb_count;
+
+ ps_proc->pu4_mb_pu_cnt -= u4_nmb_count;
+}
diff --git a/encoder/ih264e_me.h b/encoder/ih264e_me.h
new file mode 100755
index 0000000..c4834a1
--- /dev/null
+++ b/encoder/ih264e_me.h
@@ -0,0 +1,278 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_me.h
+ *
+ * @brief
+ * Contains declarations of global variables for H264 encoder
+ *
+ * @author
+ * ittiam
+ *
+ * @remarks
+ *
+ *******************************************************************************
+ */
+
+#ifndef IH264E_ME_H_
+#define IH264E_ME_H_
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief compute median of 3 elements (a, b, c) and store the output
+ * in to result. This is used for mv prediction
+******************************************************************************
+ */
+
+#define MEDIAN(a, b, c, result) if (a > b){\
+ if (b > c)\
+ result = b;\
+ else {\
+ if (a > c)\
+ result = c;\
+ else \
+ result = a;\
+ }\
+ }\
+ else {\
+ if (c > b)\
+ result = b;\
+ else {\
+ if (c > a)\
+ result = c;\
+ else \
+ result = a;\
+ }\
+ }
+
+
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function populates the length of the codewords for motion vectors in the
+* range (-search range, search range) in pixels
+*
+* @param[in] ps_me
+* Pointer to me ctxt
+*
+* @param[out] pu1_mv_bits
+* length of the codeword for all mv's
+*
+* @remarks The length of the code words are derived from signed exponential
+* goloumb codes.
+*
+*******************************************************************************
+*/
+void ih264e_init_mv_bits
+ (
+ me_ctxt_t *ps_me
+ );
+
+/**
+*******************************************************************************
+*
+* @brief The function gives the skip motion vector
+*
+* @par Description:
+* The function gives the skip motion vector
+*
+* @param[in] ps_left_mb_pu
+* pointer to left mb motion vector info
+*
+* @param[in] ps_top_row_pu
+* pointer to top & top right mb motion vector info
+*
+* @param[out] ps_pred_mv
+* pointer to candidate predictors for the current block
+*
+* @returns The x & y components of the MV predictor.
+*
+* @remarks The code implements the logic as described in sec 8.4.1.1 in H264
+* specification.
+*
+*******************************************************************************
+*/
+void ih264e_find_skip_motion_vector
+ (
+ process_ctxt_t *ps_proc,
+ UWORD32 u4_for_me
+ );
+
+/**
+*******************************************************************************
+*
+* @brief motion vector predictor
+*
+* @par Description:
+* The routine calculates the motion vector predictor for a given block,
+* given the candidate MV predictors.
+*
+* @param[in] ps_left_mb_pu
+* pointer to left mb motion vector info
+*
+* @param[in] ps_top_row_pu
+* pointer to top & top right mb motion vector info
+*
+* @param[out] ps_pred_mv
+* pointer to candidate predictors for the current block
+*
+* @returns The x & y components of the MV predictor.
+*
+* @remarks The code implements the logic as described in sec 8.4.1.3 in H264
+* specification.
+* Assumptions : 1. Assumes Single reference frame
+* 2. Assumes Only partition of size 16x16
+*
+*******************************************************************************
+*/
+void ih264e_get_mv_predictor
+ (
+ enc_pu_t *ps_left_mb_pu,
+ enc_pu_t *ps_top_row_pu,
+ mv_t *ps_pred_mv
+ );
+
+/**
+*******************************************************************************
+*
+* @brief This function computes the best motion vector for the current mb
+*
+* @par Description:
+* This function currently does nothing except set motion vectors from external
+* source
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_compute_me
+ (
+ process_ctxt_t *ps_proc
+ );
+
+/**
+*******************************************************************************
+*
+* @brief This function initializes me ctxt
+*
+* @par Description:
+* Before dispatching the current job to me thread, the me context associated
+* with the job is initialized.
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_me(process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief This function performs motion estimation for the current NMB
+*
+* @par Description:
+* Intializes input and output pointers required by the function ih264e_compute_me
+* and calls the function ih264e_compute_me in a loop to process NMBs.
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_compute_me_nmb
+ (
+ process_ctxt_t *ps_proc,
+ UWORD32 u4_nmb_count
+ );
+
+/**
+*******************************************************************************
+*
+* @brief This function performs MV prediction
+*
+* @par Description:
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns none
+*
+* @remarks none
+* This function will update the MB availability since intra inter decision
+* should be done before the call
+*
+*******************************************************************************
+*/
+void ih264e_mv_pred
+ (
+ process_ctxt_t *ps_proc
+ );
+
+/**
+*******************************************************************************
+*
+* @brief This function approximates Pred. MV
+*
+* @par Description:
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns none
+*
+* @remarks none
+* Motion estimation happens at nmb level. For cost calculations, mv is appro
+* ximated using this function
+*
+*******************************************************************************
+*/
+void ih264e_mv_pred_me
+ (
+ process_ctxt_t *ps_proc
+ );
+
+#endif /* IH264E_ME_H_ */
diff --git a/encoder/ih264e_modify_frm_rate.c b/encoder/ih264e_modify_frm_rate.c
new file mode 100755
index 0000000..bc0e873
--- /dev/null
+++ b/encoder/ih264e_modify_frm_rate.c
@@ -0,0 +1,240 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_modify_frm_rate.c
+*
+* @brief
+* Functions used to modify frame rate
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_pd_frm_rate_get_init_free_memtab()
+* - ih264e_init_pd_frm_rate()
+* - ih264e_update_pd_frm_rate()
+* - ih264e_get_pd_avg_frm_rate()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ih264e_defs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_rc_mem_interface.h"
+#include "ih264e_time_stamp.h"
+#include "ih264e_modify_frm_rate.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Function to init pd frame rate memtab
+*
+* @par Description
+* Function to init pull down frame rate memtab
+*
+* @param[in] pps_pd_frm_rate
+* pull down frame rate context
+*
+* @param[in] ps_memtab
+* Handle to memtab
+*
+* @param[in] e_func_type
+* Function type (get memtab/ update memtab)
+*
+* @returns Number of memtabs used
+*
+* @remarks None
+*
+*******************************************************************************
+*/
+WORD32 ih264e_pd_frm_rate_get_init_free_memtab(pd_frm_rate_handle *pps_pd_frm_rate,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static pd_frm_rate_t s_temp_pd_frm_rate_t;
+
+ /* Hack for al alloc, during which we dont have any state memory.
+ Dereferencing can cause issues */
+ if (e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_pd_frm_rate) = &s_temp_pd_frm_rate_t;
+
+ /* for src rate control state structure */
+ if (e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(pd_frm_rate_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**) pps_pd_frm_rate, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Initializes the pull down frame rate state structure based on input
+* frame rate
+*
+* @par Description
+* Initializes the pull down frame rate state structure based on input frame rate
+*
+* @param[in] ps_pd_frm_rate
+* Pull down frame rate context
+*
+* @param[in] u4_input_frm_rate
+* Input frame rate in frame per 1000sec
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_pd_frm_rate(pd_frm_rate_t *ps_pd_frm_rate,
+ UWORD32 u4_input_frm_rate)
+{
+ WORD32 i;
+
+ ps_pd_frm_rate->u4_input_frm_rate = u4_input_frm_rate;
+
+ for (i = 0; i < (WORD32) (u4_input_frm_rate / 1000); i++)
+ {
+ ps_pd_frm_rate->u4_cur_frm_rate[i] = u4_input_frm_rate;
+ }
+
+ ps_pd_frm_rate->u4_frm_num = 0;
+
+ ps_pd_frm_rate->u4_tot_frm_encoded = 0;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update pull down frame rate
+*
+* @par Description
+* For each frame a run time frame rate value is sent based on whether a frame
+* is skipped or not. If it is skipped for pull down then the current frame
+* rate for the pull down period is signaled as 4/5th of the original frame
+* rate. Thus when this is averaged the frame rate gradually switches from the
+* input frame rate to 4/5th of input frame rate as and when more 3:2 pull
+* down patterns are detected
+*
+* @param[in] ps_pd_frm_rate
+* Pull down frame rate context
+*
+* @param[in] u4_input_frm_rate
+* Input frame rate in frame per 1000sec
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_pd_frm_rate(pd_frm_rate_t *ps_pd_frm_rate,
+ UWORD32 u4_cur_frm_rate)
+{
+ ps_pd_frm_rate->u4_cur_frm_rate[ps_pd_frm_rate->u4_frm_num] = u4_cur_frm_rate;
+
+ ps_pd_frm_rate->u4_frm_num++;
+
+ /* Increment the frame number */
+ if (ps_pd_frm_rate->u4_tot_frm_encoded < (ps_pd_frm_rate->u4_input_frm_rate / 1000))
+ {
+ ps_pd_frm_rate->u4_tot_frm_encoded++;
+ }
+
+ /* Reset frm_num to zero */
+ if (ps_pd_frm_rate->u4_frm_num >= (ps_pd_frm_rate->u4_input_frm_rate / 1000))
+ {
+ ps_pd_frm_rate->u4_frm_num = 0;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief returns average frame rate in 1 sec duration
+*
+* @par Description
+* Averages the last N frame in period(1 sec) and then gives that
+* as the current frames frame rate. Thus this averages out the sudden
+* variation in frame rate
+*
+* @param[in] ps_pd_frm_rate
+* Handle to pull down frame rate context
+*
+* @returns average frame rate
+*
+* @remarks
+*
+*******************************************************************************
+*/
+UWORD32 ih264e_get_pd_avg_frm_rate(pd_frm_rate_t *ps_pd_frm_rate)
+{
+ WORD32 i;
+ WORD32 i4_avg_frm_rate = 0;
+
+ for (i = 0; i < (WORD32) ps_pd_frm_rate->u4_tot_frm_encoded; i++)
+ {
+ i4_avg_frm_rate += ps_pd_frm_rate->u4_cur_frm_rate[i];
+ }
+
+ i4_avg_frm_rate = i4_avg_frm_rate / ps_pd_frm_rate->u4_tot_frm_encoded;
+
+ return i4_avg_frm_rate;
+}
diff --git a/encoder/ih264e_modify_frm_rate.h b/encoder/ih264e_modify_frm_rate.h
new file mode 100755
index 0000000..c301e2c
--- /dev/null
+++ b/encoder/ih264e_modify_frm_rate.h
@@ -0,0 +1,182 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_modify_frm_rate.h
+*
+* @brief
+* Functions declarations used to modify frame rate
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_MODIFY_FRM_RATE_H_
+#define IH264E_MODIFY_FRM_RATE_H_
+
+/*****************************************************************************/
+/* Constant Definitions */
+/*****************************************************************************/
+
+#define MAX_NUM_FRAME 120
+
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+typedef struct pd_frm_rate_t
+{
+ /*
+ * The input frame rate set in the encoder (per 1000 sec)
+ */
+ UWORD32 u4_input_frm_rate;
+
+ /*
+ * Frame rate of current frame due to pull down
+ */
+ UWORD32 u4_cur_frm_rate[MAX_NUM_FRAME];
+
+ /*
+ * current frame num in the above buffer
+ */
+ UWORD32 u4_frm_num;
+
+ /*
+ * Total number of frames encoded.
+ * if greater than input frame rate stays at input frame rate
+ */
+ UWORD32 u4_tot_frm_encoded;
+
+}pd_frm_rate_t;
+
+typedef struct pd_frm_rate_t *pd_frm_rate_handle;
+
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Function to init pd frame rate memtab
+*
+* @par Description
+* Function to init pull down frame rate memtab
+*
+* @param[in] pps_pd_frm_rate
+* pull down frame rate context
+*
+* @param[in] ps_memtab
+* Handle to memtab
+*
+* @param[in] e_func_type
+* Function type (get memtab/ update memtab)
+*
+* @returns Number of memtabs used
+*
+* @remarks None
+*
+*******************************************************************************
+*/
+WORD32 ih264e_pd_frm_rate_get_init_free_memtab(pd_frm_rate_handle *pps_pd_frm_rate,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+/**
+*******************************************************************************
+*
+* @brief Initializes the pull down frame rate state structure based on input
+* frame rate
+*
+* @par Description
+* Initializes the pull down frame rate state structure based on input frame rate
+*
+* @param[in] ps_pd_frm_rate
+* Pull down frame rate context
+*
+* @param[in] u4_input_frm_rate
+* Input frame rate in frame per 1000sec
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_pd_frm_rate(pd_frm_rate_handle ps_pd_frm_rate,
+ UWORD32 u4_input_frm_rate);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update pull down frame rate
+*
+* @par Description
+* For each frame a run time frame rate value is sent based on whether a frame
+* is skipped or not. If it is skipped for pull down then the current frame
+* rate for the pull down period is signaled as 4/5th of the original frame
+* rate. Thus when this is averaged the frame rate gradually switches from the
+* input frame rate to 4/5th of input frame rate as and when more 3:2 pull
+* down patterns are detected
+*
+* @param[in] ps_pd_frm_rate
+* Pull down frame rate context
+*
+* @param[in] u4_input_frm_rate
+* Input frame rate in frame per 1000sec
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_pd_frm_rate(pd_frm_rate_handle ps_pd_frm_rate,
+ UWORD32 u4_cur_frm_rate);
+
+/**
+*******************************************************************************
+*
+* @brief returns average frame rate in 1 sec duration
+*
+* @par Description
+* Averages the last N frame in period(1 sec) and then gives that
+* as the current frames frame rate. Thus this averages out the sudden
+* variation in frame rate
+*
+* @param[in] ps_pd_frm_rate
+* Handle to pull down frame rate context
+*
+* @returns average frame rate
+*
+* @remarks
+*
+*******************************************************************************
+*/
+UWORD32 ih264e_get_pd_avg_frm_rate(pd_frm_rate_handle ps_pd_frm_rate);
+
+#endif /* IH264E_MODIFY_FRM_RATE_H_ */
diff --git a/encoder/ih264e_process.c b/encoder/ih264e_process.c
new file mode 100755
index 0000000..9a468e9
--- /dev/null
+++ b/encoder/ih264e_process.c
@@ -0,0 +1,2369 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_process.c
+*
+* @brief
+* Contains functions for codec thread
+*
+* @author
+* Harish
+*
+* @par List of Functions:
+* - ih264e_generate_sps_pps()
+* - ih264e_init_entropy_ctxt()
+* - ih264e_entropy()
+* - ih264e_pack_header_data()
+* - ih264e_update_proc_ctxt()
+* - ih264e_init_proc_ctxt()
+* - ih264e_pad_recon_buffer()
+* - ih264e_dblk_pad_hpel_processing_n_mbs()
+* - ih264e_process()
+* - ih264e_set_rc_pic_params()
+* - ih264e_update_rc_post_enc()
+* - ih264e_process_thread()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_debug.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_platform_macros.h"
+#include "ih264_macros.h"
+#include "ih264_error.h"
+#include "ih264_buf_mgr.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ih264_structs.h"
+#include "ih264_common_tables.h"
+#include "ih264_list.h"
+#include "ih264e_defs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_process.h"
+#include "ithread.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264e_encode_header.h"
+#include "ih264e_globals.h"
+#include "ih264e_config.h"
+#include "ih264e_trace.h"
+#include "ih264e_statistics.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264e_deblk.h"
+#include "ih264e_me.h"
+#include "ih264e_debug.h"
+#include "ih264e_process.h"
+#include "ih264e_master.h"
+#include "ih264e_utils.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "irc_rate_control_api.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_padding.h"
+#include "ime_statistics.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief This function generates sps, pps set on request
+*
+* @par Description
+* When the encoder is set in header generation mode, the following function
+* is called. This generates sps and pps headers and returns the control back
+* to caller.
+*
+* @param[in] ps_codec
+* pointer to codec context
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
+{
+ /* choose between ping-pong process buffer set */
+ WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
+
+ /* entropy ctxt */
+ entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
+
+ /* Bitstream structure */
+ bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
+
+ /* sps */
+ sps_t *ps_sps = NULL;
+
+ /* pps */
+ pps_t *ps_pps = NULL;
+
+ /* output buff */
+ out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel];
+
+
+ /********************************************************************/
+ /* initialize the bit stream buffer */
+ /********************************************************************/
+ ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize);
+
+ /********************************************************************/
+ /* BEGIN HEADER GENERATION */
+ /********************************************************************/
+ /*ps_codec->i4_pps_id ++;*/
+ ps_codec->i4_pps_id %= MAX_PPS_CNT;
+
+ /*ps_codec->i4_sps_id ++;*/
+ ps_codec->i4_sps_id %= MAX_SPS_CNT;
+
+ /* populate sps header */
+ ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
+ ih264e_populate_sps(ps_codec, ps_sps);
+
+ /* populate pps header */
+ ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
+ ih264e_populate_pps(ps_codec, ps_pps);
+
+ ps_entropy->i4_error_code = IH264E_SUCCESS;
+
+ /* generate sps */
+ ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps);
+
+ /* generate pps */
+ ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
+
+ /* queue output buffer */
+ ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset;
+
+ return ps_entropy->i4_error_code;
+}
+
+/**
+*******************************************************************************
+*
+* @brief initialize entropy context.
+*
+* @par Description:
+* Before invoking the call to perform to entropy coding the entropy context
+* associated with the job needs to be initialized. This involves the start
+* mb address, end mb address, slice index and the pointer to location at
+* which the mb residue info and mb header info are packed.
+*
+* @param[in] ps_proc
+* Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
+{
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* entropy ctxt */
+ entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
+
+ /* start address */
+ ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x;
+
+ /* end address */
+ ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt;
+
+ /* slice index */
+ ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add];
+
+ /* sof */
+ /* @ start of frame or start of a new slice, set sof flag */
+ if (ps_entropy->i4_mb_start_add == 0)
+ {
+ ps_entropy->i4_sof = 1;
+ }
+
+ if (ps_entropy->i4_mb_x == 0)
+ {
+ /* packed mb coeff data */
+ ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
+ ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
+
+ /* packed mb header data */
+ ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
+ ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
+ }
+
+ return IH264E_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief entry point for entropy coding
+*
+* @par Description
+* This function calls lower level functions to perform entropy coding for a
+* group (n rows) of mb's. After encoding 1 row of mb's, the function takes
+* back the control, updates the ctxt and calls lower level functions again.
+* This process is repeated till all the rows or group of mb's (which ever is
+* minimum) are coded
+*
+* @param[in] ps_proc
+* process context
+*
+* @returns error status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+#define GET_NUM_BITS(ps_bitstream) ((ps_bitstream->u4_strm_buf_offset << 3) + WORD_SIZE - ps_bitstream->i4_bits_left_in_cw)
+
+IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
+{
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* entropy context */
+ entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
+
+ /* sps */
+ sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
+
+ /* pps */
+ pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT);
+
+ /* slice header */
+ slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT);
+
+ /* slice type */
+ WORD32 i4_slice_type = ps_proc->i4_slice_type;
+
+ /* Bitstream structure */
+ bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
+
+ /* output buff */
+ out_buf_t s_out_buf;
+
+ /* proc map */
+ UWORD8 *pu1_proc_map;
+
+ /* entropy map */
+ UWORD8 *pu1_entropy_map_curr;
+
+ /* proc base idx */
+ WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1;
+
+ /* temp var */
+ WORD32 i4_wd_mbs, i4_ht_mbs;
+ UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx;
+
+ /********************************************************************/
+ /* BEGIN INIT */
+ /********************************************************************/
+
+ /* entropy encode start address */
+ u4_mb_idx = ps_entropy->i4_mb_start_add;
+
+ /* entropy encode end address */
+ u4_mb_end_idx = ps_entropy->i4_mb_end_add;
+
+ /* width in mbs */
+ i4_wd_mbs = ps_entropy->i4_wd_mbs;
+
+ /* height in mbs */
+ i4_ht_mbs = ps_entropy->i4_ht_mbs;
+
+ /* total mb cnt */
+ u4_mb_cnt = i4_wd_mbs * i4_ht_mbs;
+
+ /* proc map */
+ pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
+
+ /* entropy map */
+ pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
+
+ /********************************************************************/
+ /* @ start of frame / slice, */
+ /* initialize the output buffer, */
+ /* initialize the bit stream buffer, */
+ /* check if sps and pps headers have to be generated, */
+ /* populate and generate slice header */
+ /********************************************************************/
+ if (ps_entropy->i4_sof)
+ {
+ /********************************************************************/
+ /* initialize the output buffer */
+ /********************************************************************/
+ s_out_buf = ps_codec->as_out_buf[ctxt_sel];
+
+ /* is last frame to encode */
+ s_out_buf.u4_is_last = ps_entropy->u4_is_last;
+
+ /* frame idx */
+ s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high;
+ s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low;
+
+ /********************************************************************/
+ /* initialize the bit stream buffer */
+ /********************************************************************/
+ ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize);
+
+ /********************************************************************/
+ /* BEGIN HEADER GENERATION */
+ /********************************************************************/
+ if (1 == ps_entropy->i4_gen_header)
+ {
+ /* generate sps */
+ ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps);
+
+ /* generate pps */
+ ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
+
+ /* reset i4_gen_header */
+ ps_entropy->i4_gen_header = 0;
+ }
+
+ /* populate slice header */
+ ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
+
+ /* generate slice header */
+ ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
+ ps_pps, ps_sps);
+
+ /* once start of frame / slice is done, you can reset it */
+ /* it is the responsibility of the caller to set this flag */
+ ps_entropy->i4_sof = 0;
+ }
+
+ /* begin entropy coding for the mb set */
+ while (u4_mb_idx < u4_mb_end_idx)
+ {
+ /* init ptrs/indices */
+ if (ps_entropy->i4_mb_x == i4_wd_mbs)
+ {
+ ps_entropy->i4_mb_y ++;
+ ps_entropy->i4_mb_x = 0;
+
+ /* packed mb coeff data */
+ ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
+ ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
+
+ /* packed mb header data */
+ ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
+ ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
+
+ /* proc map */
+ pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
+
+ /* entropy map */
+ pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
+ }
+
+ DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y);
+ ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x);
+ ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y);
+
+ /* wait until the curr mb is core coded */
+ /* The wait for curr mb to be core coded is essential when entropy is launched
+ * as a separate job
+ */
+ while (1)
+ {
+ volatile UWORD8 *pu1_buf1;
+ WORD32 idx = ps_entropy->i4_mb_x;
+
+ pu1_buf1 = pu1_proc_map + idx;
+ if(*pu1_buf1)
+ break;
+ ithread_yield();
+ }
+
+ /* write mb layer */
+ ps_codec->pf_write_mb_syntax_layer[i4_slice_type](ps_entropy);
+
+ /* set entropy map */
+ pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
+
+ u4_mb_idx ++;
+ ps_entropy->i4_mb_x ++;
+
+ if (ps_entropy->i4_mb_x == i4_wd_mbs)
+ {
+ /* if slices are enabled */
+ if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
+ {
+ /* current slice index */
+ WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx;
+
+ /* slice map */
+ UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx;
+
+ /* No need to open a slice at end of frame. The current slice can be closed at the time
+ * of signaling eof flag.
+ */
+ if ( (u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx != pu1_slice_idx[u4_mb_idx]))
+ {
+ /* mb skip run */
+ if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
+ {
+ if (*ps_entropy->pi4_mb_skip_run)
+ {
+ PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
+ *ps_entropy->pi4_mb_skip_run = 0;
+ }
+ }
+
+ /* put rbsp trailing bits for the previous slice */
+ ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
+
+ /* update slice header pointer */
+ i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
+ ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
+ ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
+
+ /* populate slice header */
+ ps_entropy->i4_mb_start_add = u4_mb_idx;
+ ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
+
+ /* generate slice header */
+ ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
+ ps_pps, ps_sps);
+ }
+ }
+
+ /* Dont execute any further instructions until store synchronization took place */
+ DATA_SYNC();
+ }
+ }
+
+ /* check for eof */
+ if (u4_mb_idx == u4_mb_cnt)
+ {
+ /* set end of frame flag */
+ ps_entropy->i4_eof = 1;
+ }
+
+ if (ps_entropy->i4_eof)
+ {
+ /* mb skip run */
+ if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
+ {
+ if (*ps_entropy->pi4_mb_skip_run)
+ {
+ PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
+ *ps_entropy->pi4_mb_skip_run = 0;
+ }
+ }
+
+ /* put rbsp trailing bits */
+ ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
+
+ /* update current frame stats to rc library */
+ if (IVE_RC_NONE != ps_codec->s_cfg.e_rc_mode)
+ {
+ /* number of bytes to stuff */
+ WORD32 i4_stuff_bytes;
+
+ /* update */
+ i4_stuff_bytes = ih264e_update_rc_post_enc(ps_codec, ctxt_sel, ps_proc->i4_pic_cnt);
+
+ /* cbr rc - house keeping */
+ if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
+ {
+ ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0;
+ }
+ else if (i4_stuff_bytes)
+ {
+ /* add filler nal units */
+ ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes);
+ }
+ }
+
+ /********************************************************************/
+ /* signal the output */
+ /********************************************************************/
+ ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes = ps_entropy->ps_bitstrm->u4_strm_buf_offset;
+
+ DEBUG("entropy status %x", ps_entropy->i4_error_code);
+ }
+
+ /* allow threads to dequeue entropy jobs */
+ ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;
+
+ return ps_entropy->i4_error_code;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Packs header information of a mb in to a buffer
+*
+* @par Description:
+* After the deciding the mode info of a macroblock, the syntax elements
+* associated with the mb are packed and stored. The entropy thread unpacks
+* this buffer and generates the end bit stream.
+*
+* @param[in] ps_proc
+* Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
+{
+ /* curr mb type */
+ UWORD32 u4_mb_type = ps_proc->u4_mb_type;
+
+ /* pack mb syntax layer of curr mb (used for entropy coding) */
+ if (u4_mb_type == I4x4)
+ {
+ /* pointer to mb header storage space */
+ UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+
+ /* temp var */
+ WORD32 i4, byte;
+
+ /* mb type plus mode */
+ *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
+
+ /* cbp */
+ *pu1_ptr++ = ps_proc->u4_cbp;
+
+ /* mb qp delta */
+ *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
+
+ /* sub mb modes */
+ for (i4 = 0; i4 < 16; i4 ++)
+ {
+ byte = 0;
+
+ if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
+ ps_proc->au1_intra_luma_mb_4x4_modes[i4])
+ {
+ byte |= 1;
+ }
+ else
+ {
+
+ if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
+ ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
+ {
+ byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1);
+ }
+ else
+ {
+ byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1;
+ }
+ }
+
+ i4++;
+
+ if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
+ ps_proc->au1_intra_luma_mb_4x4_modes[i4])
+ {
+ byte |= 16;
+ }
+ else
+ {
+
+ if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
+ ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
+ {
+ byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5);
+ }
+ else
+ {
+ byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5;
+ }
+ }
+
+ *pu1_ptr++ = byte;
+ }
+
+ /* end of mb layer */
+ ps_proc->pv_mb_header_data = pu1_ptr;
+ }
+ else if (u4_mb_type == I16x16)
+ {
+ /* pointer to mb header storage space */
+ UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+
+ /* mb type plus mode */
+ *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
+
+ /* cbp */
+ *pu1_ptr++ = ps_proc->u4_cbp;
+
+ /* mb qp delta */
+ *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
+
+ /* end of mb layer */
+ ps_proc->pv_mb_header_data = pu1_ptr;
+ }
+ else if (u4_mb_type == P16x16)
+ {
+ /* pointer to mb header storage space */
+ UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+
+ WORD16 *i2_mv_ptr;
+
+ /* mb type plus mode */
+ *pu1_ptr++ = u4_mb_type;
+
+ /* cbp */
+ *pu1_ptr++ = ps_proc->u4_cbp;
+
+ /* mb qp delta */
+ *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
+
+ i2_mv_ptr = (WORD16 *)pu1_ptr;
+
+ *i2_mv_ptr++ = ps_proc->ps_pu->s_l0_mv.i2_mvx - ps_proc->ps_pred_mv->i2_mvx;
+
+ *i2_mv_ptr++ = ps_proc->ps_pu->s_l0_mv.i2_mvy - ps_proc->ps_pred_mv->i2_mvy;
+
+ /* end of mb layer */
+ ps_proc->pv_mb_header_data = i2_mv_ptr;
+ }
+ else if (u4_mb_type == PSKIP)
+ {
+ /* pointer to mb header storage space */
+ UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
+
+ /* mb type plus mode */
+ *pu1_ptr++ = u4_mb_type;
+
+ /* end of mb layer */
+ ps_proc->pv_mb_header_data = pu1_ptr;
+ }
+
+ return IH264E_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief update process context after encoding an mb. This involves preserving
+* the current mb information for later use, initialize the proc ctxt elements to
+* encode next mb.
+*
+* @par Description:
+* This function performs house keeping tasks after encoding an mb.
+* After encoding an mb, various elements of the process context needs to be
+* updated to encode the next mb. For instance, the source, recon and reference
+* pointers, mb indices have to be adjusted to the next mb. The slice index of
+* the current mb needs to be updated. If mb qp modulation is enabled, then if
+* the qp changes the quant param structure needs to be updated. Also to encoding
+* the next mb, the current mb info is used as part of mode prediction or mv
+* prediction. Hence the current mb info has to preserved at top/top left/left
+* locations.
+*
+* @param[in] ps_proc
+* Pointer to the current process context
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
+{
+ /* error status */
+ WORD32 error_status = IH264_SUCCESS;
+
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* curr mb indices */
+ WORD32 i4_mb_x = ps_proc->i4_mb_x;
+ WORD32 i4_mb_y = ps_proc->i4_mb_y;
+
+ /* mb syntax elements of neighbors */
+ mb_info_t *ps_left_syn = &ps_proc->s_left_mb_syntax_ele;
+ mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
+ mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele;
+
+ /* curr mb type */
+ UWORD32 u4_mb_type = ps_proc->u4_mb_type;
+
+ /* curr mb type */
+ UWORD32 u4_is_intra = ps_proc->u4_is_intra;
+
+ /* width in mbs */
+ WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
+
+ /*height in mbs*/
+ WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs;
+
+ /* proc map */
+ UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs);
+
+ /* deblk context */
+ deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
+
+ /* deblk bs context */
+ bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
+
+ /* top row motion vector info */
+ enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
+
+ /* top left mb motion vector */
+ enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
+
+ /* left mb motion vector */
+ enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu;
+
+ /* sub mb modes */
+ UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4);
+
+// /* zero mv */
+// mv_t zero_mv = {0, 0};
+
+ /* Pad the MB to support non standard sizes */
+ UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd;
+ UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
+
+ /*************************************************************/
+ /* During MV prediction, when top right mb is not available, */
+ /* top left mb info. is used for prediction. Hence the curr */
+ /* top, which will be top left for the next mb needs to be */
+ /* preserved before updating it with curr mb info. */
+ /*************************************************************/
+
+ /* mb type, mb class, csbp */
+ *ps_top_left_syn = *ps_top_syn;
+
+ if (ps_proc->i4_slice_type == PSLICE)
+ {
+ /*****************************************/
+ /* update top left with top info results */
+ /*****************************************/
+
+ /* mv */
+ *ps_top_left_mb_pu = *ps_top_row_pu;
+ }
+
+ /*************************************************/
+ /* update top and left with curr mb info results */
+ /*************************************************/
+
+ /* mb type */
+ ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type;
+
+ /* mb class */
+ ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra;
+
+ /* csbp */
+ ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp;
+
+ /* distortion */
+ ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion;
+
+ if (u4_is_intra)
+ {
+ /* mb / sub mb modes */
+ if (I16x16 == u4_mb_type)
+ {
+ pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode;
+ }
+ else if (I4x4 == u4_mb_type)
+ {
+ ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
+ ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
+ }
+ else if (I8x8 == u4_mb_type)
+ {
+ memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
+ memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
+ }
+
+ if (ps_proc->i4_slice_type == PSLICE)
+ {
+ /* mv */
+ *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
+
+// /* reset ngbr mv's */
+// ps_top_row_pu->i1_l0_ref_idx = -1;
+// ps_top_row_pu->s_l0_mv = zero_mv;
+//
+// *ps_left_mb_pu = *ps_top_row_pu;
+ }
+ }
+ else
+ {
+ /* mv */
+ *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
+ }
+
+ /*
+ * Mark that the MB has been coded intra
+ * So that future AIRs can skip it
+ */
+ ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra;
+
+ /**************************************************/
+ /* pack mb header info. for entropy coding */
+ /**************************************************/
+ ih264e_pack_header_data(ps_proc);
+
+ /* update previous mb qp */
+ ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp;
+
+ /* store qp */
+ ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
+
+ /*
+ * We need to sync the cache to make sure that the nmv content of proc
+ * is updated to cache properly
+ */
+ DATA_SYNC();
+
+ /* Just before finishing the row, enqueue the job in to entropy queue.
+ * The master thread depending on its convenience shall dequeue it and
+ * performs entropy.
+ *
+ * WARN !! Placing this block post proc map update can cause queuing of
+ * entropy jobs in out of order.
+ */
+ if (i4_mb_x == i4_wd_mbs - 1)
+ {
+ /* job structures */
+ job_t s_job;
+
+ /* job class */
+ s_job.i4_cmd = CMD_ENTROPY;
+
+ /* number of mbs to be processed in the current job */
+ s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
+
+ /* job start index x */
+ s_job.i2_mb_x = 0;
+
+ /* job start index y */
+ s_job.i2_mb_y = ps_proc->i4_mb_y;
+
+ /* proc base idx */
+ s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt & 1) ? (MAX_PROCESS_CTXT / 2): 0 ;
+
+ /* queue the job */
+ error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
+
+ if(ps_proc->i4_mb_y == (i4_ht_mbs - 1))
+ ih264_list_terminate(ps_codec->pv_entropy_jobq);
+ }
+
+ /* update proc map */
+ pu1_proc_map[i4_mb_x] = 1;
+
+ /**************************************************/
+ /* update proc ctxt elements for encoding next mb */
+ /**************************************************/
+ /* update indices */
+ i4_mb_x ++;
+ ps_proc->i4_mb_x = i4_mb_x;
+
+ if (ps_proc->i4_mb_x == i4_wd_mbs)
+ {
+ ps_proc->i4_mb_y++;
+ ps_proc->i4_mb_x = 0;
+ }
+
+ /* update slice index */
+ ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x];
+
+ /* update buffers pointers */
+ ps_proc->pu1_src_buf_luma += MB_SIZE;
+ ps_proc->pu1_rec_buf_luma += MB_SIZE;
+ ps_proc->pu1_ref_buf_luma += MB_SIZE;
+
+ /*
+ * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
+ * the stride per MB is MB_SIZE
+ */
+ ps_proc->pu1_src_buf_chroma += MB_SIZE;
+ ps_proc->pu1_rec_buf_chroma += MB_SIZE;
+ ps_proc->pu1_ref_buf_chroma += MB_SIZE;
+
+ /* pad right edge */
+ if (u4_pad_right_sz && (ps_proc->i4_mb_x == i4_wd_mbs - 1))
+ {
+ ih264_pad_right_luma(
+ ps_proc->pu1_src_buf_luma + MB_SIZE - u4_pad_right_sz,
+ ps_proc->i4_src_strd, MB_SIZE, u4_pad_right_sz);
+
+ ih264_pad_right_chroma(
+ ps_proc->pu1_src_buf_chroma + MB_SIZE - u4_pad_right_sz,
+ ps_proc->i4_src_strd, BLK8x8SIZE, u4_pad_right_sz);
+ }
+
+ /* pad bottom edge */
+ if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == i4_ht_mbs - 1) &&
+ ps_proc->i4_mb_x != 0)
+ {
+ ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
+ ps_proc->i4_src_strd, MB_SIZE, u4_pad_bottom_sz);
+
+ ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd / 2,
+ ps_proc->i4_src_strd, MB_SIZE, (u4_pad_bottom_sz / 2));
+ }
+
+ /* Reset cost, distortion params */
+ ps_proc->i4_mb_cost = INT_MAX;
+ ps_proc->i4_mb_distortion = SHRT_MAX;
+
+ ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
+
+ ps_proc->pu4_mb_pu_cnt += 1;
+
+ /* deblk ctxts */
+ if (ps_proc->u4_disable_deblock_level != 1)
+ {
+ /* indices */
+ ps_bs->i4_mb_x = ps_proc->i4_mb_x;
+ ps_bs->i4_mb_y = ps_proc->i4_mb_y;
+
+#ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */
+ ps_deblk->i4_mb_x ++;
+
+ ps_deblk->pu1_cur_pic_luma += MB_SIZE;
+ /*
+ * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
+ * the stride per MB is MB_SIZE
+ */
+ ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
+#endif
+ }
+
+ return error_status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief initialize process context.
+*
+* @par Description:
+* Before dispatching the current job to process thread, the process context
+* associated with the job is initialized. Usually every job aims to encode one
+* row of mb's. Basing on the row indices provided by the job, the process
+* context's buffer ptrs, slice indices and other elements that are necessary
+* during core-coding are initialized.
+*
+* @param[in] ps_proc
+* Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
+{
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* nmb processing context*/
+ n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
+
+ /* indices */
+ WORD32 i4_mb_x, i4_mb_y;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_proc->i4_src_strd;
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+ /* quant params */
+ quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
+
+ /* deblk ctxt */
+ deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
+
+ /* deblk bs context */
+ bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
+
+ /* Pointer to mv_buffer of current frame */
+ mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
+
+ /* Pointers for color space conversion */
+ UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base;
+
+ /* Pad the MB to support non standard sizes */
+ UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
+
+ /********************************************************************/
+ /* BEGIN INIT */
+ /********************************************************************/
+
+ i4_mb_x = ps_proc->i4_mb_x;
+ i4_mb_y = ps_proc->i4_mb_y;
+
+ /* Number of mbs processed in one loop of process function */
+ ps_proc->i4_nmb_ntrpy = (ps_proc->i4_wd_mbs > MAX_NMB) ? MAX_NMB : ps_proc->i4_wd_mbs;
+ ps_proc->u4_nmb_me = (ps_proc->i4_wd_mbs > MAX_NMB)? MAX_NMB : ps_proc->i4_wd_mbs;
+
+ /* init buffer pointers */
+ ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
+ ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * BLK8x8SIZE);
+ ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
+ ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
+ ps_proc->pu1_ref_buf_luma = ps_proc->pu1_ref_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
+ ps_proc->pu1_ref_buf_chroma = ps_proc->pu1_ref_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
+
+ /*
+ * Do color space conversion
+ * NOTE : We assume there that the number of MB's to process will not span multiple rows
+ */
+ switch (ps_codec->s_cfg.e_inp_color_fmt)
+ {
+ case IV_YUV_420SP_UV:
+ case IV_YUV_420SP_VU:
+ break;
+
+ case IV_YUV_420P :
+ pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
+ ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
+
+ pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
+ ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
+
+ pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) +
+ ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE);
+
+ ps_codec->pf_ih264e_conv_420p_to_420sp(
+ pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base,
+ ps_proc->pu1_src_buf_luma,
+ ps_proc->pu1_src_buf_chroma, MB_SIZE,
+ ps_proc->i4_wd_mbs * MB_SIZE,
+ ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
+ ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
+ ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
+ ps_proc->i4_src_strd, ps_proc->i4_src_strd, 1);
+ break;
+
+ case IV_YUV_422ILE :
+ pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2)
+ + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
+
+ ps_codec->pf_ih264e_fmt_conv_422i_to_420sp(
+ ps_proc->pu1_src_buf_luma,
+ ps_proc->pu1_src_buf_chroma,
+ ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
+ ps_proc->i4_wd_mbs * MB_SIZE, MB_SIZE,
+ ps_proc->i4_src_strd, ps_proc->i4_src_strd,
+ ps_proc->i4_src_strd,
+ ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
+ break;
+
+ default:
+ break;
+ }
+
+ /* pad bottom edge */
+ if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0)
+ {
+ ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
+ ps_proc->i4_src_strd, MB_SIZE, u4_pad_bottom_sz);
+
+ ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd / 2,
+ ps_proc->i4_src_strd, MB_SIZE, (u4_pad_bottom_sz / 2));
+ }
+
+ /* packed mb coeff data */
+ ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data;
+
+ /* packed mb header data */
+ ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data;
+
+ /* slice index */
+ ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x];
+
+ /*********************************************************************/
+ /* ih264e_init_quant_params() routine is called at the pic init level*/
+ /* this would have initialized the qp. */
+ /* TODO_LATER: currently it is assumed that quant params donot change*/
+ /* across mb's. When they do calculate update ps_qp_params accordingly*/
+ /*********************************************************************/
+
+ /* init mv buffer ptr */
+ ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
+
+ if (i4_mb_y == 0)
+ {
+ ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu;
+ }
+ else
+ {
+ ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
+ }
+
+ ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
+
+ /* mb type */
+ ps_proc->u4_mb_type = I16x16;
+
+ /* lambda */
+ ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp];
+
+ /* mb distortion */
+ ps_proc->i4_mb_distortion = SHRT_MAX;
+
+ if (i4_mb_x == 0)
+ {
+ ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0;
+
+ ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0;
+
+ ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0;
+
+ if (i4_mb_y == 0)
+ {
+ memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t));
+ }
+ }
+
+ /* mb cost */
+ ps_proc->i4_mb_cost = INT_MAX;
+
+ /**********************/
+ /* init deblk context */
+ /**********************/
+ ps_deblk->i4_mb_x = ps_proc->i4_mb_x;
+ /* deblk lags the current mb proc by 1 row */
+ /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */
+ /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */
+ /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */
+ ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1;
+
+ /* buffer ptrs */
+ ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE);
+ ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE);
+
+ /* init deblk bs context */
+ /* mb indices */
+ ps_bs->i4_mb_x = ps_proc->i4_mb_x;
+ ps_bs->i4_mb_y = ps_proc->i4_mb_y;
+
+ /* init n_mb_process context */
+ ps_n_mb_ctxt->i4_mb_x = 0;
+ ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y;
+ ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy;
+
+ return IH264E_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function performs luma & chroma padding
+*
+* @par Description:
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @param[in] pu1_curr_pic_luma
+* Pointer to luma buffer
+*
+* @param[in] pu1_curr_pic_chroma
+* Pointer to chroma buffer
+*
+* @param[in] i4_mb_x
+* mb index x
+*
+* @param[in] i4_mb_y
+* mb index y
+*
+* @param[in] i4_pad_ht
+* number of rows to be padded
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc,
+ UWORD8 *pu1_curr_pic_luma,
+ UWORD8 *pu1_curr_pic_chroma,
+ WORD32 i4_mb_x,
+ WORD32 i4_mb_y,
+ WORD32 i4_pad_ht)
+{
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* strides */
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+ if (i4_mb_x == 0)
+ {
+ /* padding left luma */
+ ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT);
+
+ /* padding left chroma */
+ ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT);
+ }
+ else if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
+ {
+ /* padding right luma */
+ ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT);
+
+ /* padding right chroma */
+ ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT);
+
+ if (i4_mb_y == ps_proc->i4_ht_mbs - 1)
+ {
+ UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd);
+ UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd);
+
+ /* padding bottom luma */
+ ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT);
+
+ /* padding bottom chroma */
+ ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
+ }
+ }
+
+ if (i4_mb_y == 0)
+ {
+ UWORD8 *pu1_rec_luma = pu1_curr_pic_luma;
+ UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma;
+ WORD32 wd = MB_SIZE;
+
+ if (i4_mb_x == 0)
+ {
+ pu1_rec_luma -= PAD_LEFT;
+ pu1_rec_chroma -= PAD_LEFT;
+
+ wd += PAD_LEFT;
+ }
+ else if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
+ {
+ wd += PAD_RIGHT;
+ }
+
+ /* padding top luma */
+ ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP);
+
+ /* padding top chroma */
+ ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1));
+ }
+
+ return IH264E_SUCCESS;
+}
+
+
+
+
+/**
+*******************************************************************************
+*
+* @brief This function performs deblocking, padding and halfpel generation for
+* 'n' MBs
+*
+* @par Description:
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @param[in] pu1_curr_pic_luma
+* Current MB being processed(Luma)
+*
+* @param[in] pu1_curr_pic_chroma
+* Current MB being processed(Chroma)
+*
+* @param[in] i4_mb_x
+* Column value of current MB processed
+*
+* @param[in] i4_mb_y
+* Curent row processed
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc,
+ UWORD8 *pu1_curr_pic_luma,
+ UWORD8 *pu1_curr_pic_chroma,
+ WORD32 i4_mb_x,
+ WORD32 i4_mb_y)
+{
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* n_mb processing context */
+ n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
+
+ /* deblk context */
+ deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
+
+ /* strides */
+ WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
+
+ /* loop variables */
+ WORD32 row, i, j, col;
+
+ /* Padding Width */
+ UWORD32 u4_pad_wd;
+
+ /* deblk_map of the row being deblocked */
+ UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs;
+
+ /* deblk_map_previous row */
+ UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs;
+
+ WORD32 u4_pad_top = 0;
+
+ WORD32 u4_deblk_prev_row = 0;
+
+ /* Number of mbs to be processed */
+ WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs;
+
+ /* Number of mbs actually processed
+ * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */
+ WORD32 i4_n_mb_process_count = 0;
+
+ UWORD8 *pu1_pad_bottom_src = NULL;
+
+ UWORD8 *pu1_pad_src_luma = NULL;
+ UWORD8 *pu1_pad_src_chroma = NULL;
+
+ if (ps_proc->u4_disable_deblock_level == 1)
+ {
+ /* If left most MB is processed, then pad left */
+ if (i4_mb_x == 0)
+ {
+ /* padding left luma */
+ ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
+
+ /* padding left chroma */
+ ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
+ }
+ /*last col*/
+ if (i4_mb_x == (ps_proc->i4_wd_mbs - 1))
+ {
+ /* padding right luma */
+ ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
+
+ /* padding right chroma */
+ ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
+ }
+ }
+
+ if (i4_mb_y > 0)
+ {
+ /* if number of mb's to be processed are less than 'N', go back.
+ * exception to the above clause is end of row */
+ if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) )
+ {
+ return IH264E_SUCCESS;
+ }
+ else
+ {
+ i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs);
+
+ u4_deblk_prev_row = 1;
+
+ /* checking whether the top rows are deblocked */
+ for (col = 0; col < i4_n_mb_process_count; col++)
+ {
+ u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col];
+ }
+
+ /* checking whether the top right MB is deblocked */
+ if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs)
+ {
+ u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count];
+ }
+
+ /* performing deblocking for required number of MBs */
+ if (ps_proc->u4_disable_deblock_level != 1)
+ {
+ /* Top or Top right MBs not deblocked */
+ if (u4_deblk_prev_row != 1)
+ {
+ return IH264E_SUCCESS;
+ }
+
+ for (row = 0; row < i4_n_mb_process_count; row++)
+ {
+ ih264e_deblock_mb(ps_proc, ps_deblk);
+
+ pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
+
+ if (ps_deblk->i4_mb_y > 0)
+ {
+ if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/
+ {
+ /* padding left luma */
+ ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT);
+
+ /* padding left chroma */
+ ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
+ }
+
+ if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/
+ {
+ /* padding right luma */
+ ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
+
+ /* padding right chroma */
+ ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
+ }
+ }
+ ps_deblk->i4_mb_x++;
+
+ ps_deblk->pu1_cur_pic_luma += MB_SIZE;
+ ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
+
+ }
+ }
+ else
+ {
+ ps_deblk->i4_mb_x += i4_n_mb_process_count;
+
+ ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE;
+ ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE;
+ }
+
+ if (i4_mb_y == 2)
+ {
+ u4_pad_wd = i4_n_mb_process_count * MB_SIZE;
+ u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE;
+
+ if (ps_n_mb_ctxt->i4_mb_x == 0)
+ {
+ u4_pad_wd += PAD_LEFT;
+ u4_pad_top = -PAD_LEFT;
+ }
+
+ if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
+ {
+ u4_pad_wd += PAD_RIGHT;
+ }
+
+ /* padding top luma */
+ ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP);
+
+ /* padding top chroma */
+ ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1));
+ }
+
+ ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count;
+
+ if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
+ {
+ if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
+ {
+ /* Bottom Padding is done in one stretch for the entire width */
+ if (ps_proc->u4_disable_deblock_level != 1)
+ {
+ ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE;
+
+ ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE;
+
+ ps_n_mb_ctxt->i4_mb_x = 0;
+ ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y;
+ ps_deblk->i4_mb_x = 0;
+ ps_deblk->i4_mb_y = ps_proc->i4_mb_y;
+
+ /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */
+ ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
+
+ i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs;
+
+ j = (ps_proc->i4_wd_mbs) / i4_n_mbs;
+
+ for (i = 0; i < j; i++)
+ {
+ for (col = 0; col < i4_n_mbs; col++)
+ {
+ ih264e_deblock_mb(ps_proc, ps_deblk);
+
+ pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
+
+ ps_deblk->i4_mb_x++;
+ ps_deblk->pu1_cur_pic_luma += MB_SIZE;
+ ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
+ ps_n_mb_ctxt->i4_mb_x++;
+ }
+ }
+
+ for (col = 0; col < i4_n_mb_process_count; col++)
+ {
+ ih264e_deblock_mb(ps_proc, ps_deblk);
+
+ pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
+
+ ps_deblk->i4_mb_x++;
+ ps_deblk->pu1_cur_pic_luma += MB_SIZE;
+ ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
+ ps_n_mb_ctxt->i4_mb_x++;
+ }
+
+ pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd;
+
+ pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd;
+
+ /* padding left luma */
+ ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
+
+ /* padding left chroma */
+ ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
+
+ pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
+ pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
+
+ /* padding left luma */
+ ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
+
+ /* padding left chroma */
+ ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
+
+ pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
+
+ pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
+
+ /* padding right luma */
+ ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
+
+ /* padding right chroma */
+ ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
+
+ pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
+ pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
+
+ /* padding right luma */
+ ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
+
+ /* padding right chroma */
+ ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
+
+ }
+
+ /* padding bottom luma */
+ pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT;
+ ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT);
+
+ /* padding bottom chroma */
+ pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT;
+ ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
+ }
+ }
+ }
+ }
+
+ return IH264E_SUCCESS;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief This function performs luma & chroma core coding for a set of mb's.
+*
+* @par Description:
+* The mb to be coded is taken and is evaluated over a predefined set of modes
+* (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
+* is selected and using intra/inter prediction filters, prediction is carried out.
+* The deviation between src and pred signal constitutes error signal. This error
+* signal is transformed (hierarchical transform if necessary) and quantized. The
+* quantized residue is packed in to entropy buffer for entropy coding. This is
+* repeated for all the mb's enlisted under the job.
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+WORD32 ih264e_process(process_ctxt_t *ps_proc)
+{
+ /* error status */
+ WORD32 error_status = IH264_SUCCESS;
+
+ /* codec context */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* cbp luma, chroma */
+ UWORD32 u4_cbp_l, u4_cbp_c;
+
+ /* width in mbs */
+ WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
+
+ /* loop var */
+ WORD32 i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt;
+
+ /* valid modes */
+ UWORD32 u4_valid_modes = 0;
+
+ /* gate threshold */
+ WORD32 i4_gate_threshold = 0;
+
+ /* is intra */
+ WORD32 luma_idx, chroma_idx, is_intra;
+
+ /* temp variables */
+ WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1;
+
+ /* list of modes for evaluation */
+ if (ps_proc->i4_slice_type == ISLICE)
+ {
+ /* enable intra 16x16 */
+ u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
+
+ /* enable intra 8x8 */
+ u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
+
+ /* enable intra 4x4 */
+ u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
+ }
+ else if (ps_proc->i4_slice_type == PSLICE)
+ {
+ /* enable intra 16x16 */
+ u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
+
+ /* enable intra 4x4 */
+ if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
+ {
+ u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
+ }
+
+ /* enable inter 16x16 */
+ u4_valid_modes |= (1 << P16x16);
+ }
+
+
+ /* init entropy */
+ ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x;
+ ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y;
+ ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x);
+
+ /* compute recon when :
+ * 1. current frame is to be used as a reference
+ * 2. dump recon for bit stream sanity check
+ */
+ ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref ||
+ ps_codec->s_cfg.u4_enable_recon;
+
+ /* Encode 'n' macroblocks,
+ * 'n' being the number of mbs dictated by current proc ctxt */
+ for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++)
+ {
+ /* since we have not yet found sad, we have not yet got min sad */
+ /* we need to initialize these variables for each MB */
+ /* TODO how to get the min sad into the codec */
+ ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad;
+ ps_proc->u4_min_sad_reached = 0;
+
+ /* mb analysis */
+ {
+ /* temp var */
+ WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs;
+
+ /* force intra refresh ? */
+ WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) ||
+ (ps_proc->pu1_is_intra_coded[i4_mb_id] != 0) ||
+ (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
+
+ /* evaluate inter 16x16 modes */
+ if (u4_valid_modes & (1 << P16x16))
+ {
+ /* compute nmb me */
+ if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
+ {
+ ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me,
+ i4_wd_mbs - ps_proc->i4_mb_x));
+ }
+
+ /* set pointers to ME data appropriately for other modules to use */
+ {
+ UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ;
+
+ /* get the min sad condition for current mb */
+ ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
+ ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
+
+ ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].s_skip_mv);
+ ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
+ ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].s_pred_mv);
+
+ ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
+ ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
+ ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
+ ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
+ ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type;
+
+ /* get the best sub pel buffer */
+ ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf;
+ ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd;
+ }
+ ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
+ }
+ else
+ {
+ /* Derive neighbor availability for the current macroblock */
+ ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl;
+
+ ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
+ }
+
+ /*
+ * If air says intra, we need to force the following code path to evaluate intra
+ * The easy way is just to say that the inter cost is too much
+ */
+ if (!i4_air_enable_inter)
+ {
+ ps_proc->u4_min_sad_reached = 0;
+ ps_proc->i4_mb_cost = INT_MAX;
+ ps_proc->i4_mb_distortion = INT_MAX;
+ }
+ else if (ps_proc->u4_mb_type == PSKIP)
+ {
+ goto UPDATE_MB_INFO;
+ }
+
+ /* wait until the proc of [top + 1] mb is computed.
+ * We wait till the proc dependencies are satisfied */
+ if(ps_proc->i4_mb_y > 0)
+ {
+ /* proc map */
+ UWORD8 *pu1_proc_map_top;
+
+ pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs);
+
+ while (1)
+ {
+ volatile UWORD8 *pu1_buf;
+ WORD32 idx = i4_mb_idx + 1;
+
+ idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1));
+ pu1_buf = pu1_proc_map_top + idx;
+ if(*pu1_buf)
+ break;
+ ithread_yield();
+ }
+ }
+
+ /* If we already have the minimum sad, there is no point in searching for sad again */
+ if (ps_proc->u4_min_sad_reached == 0)
+ {
+ /* intra gating in inter slices */
+ /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
+ if (i4_air_enable_inter && ps_proc->i4_slice_type == PSLICE && ps_codec->u4_inter_gate)
+ {
+ /* distortion of neighboring blocks */
+ WORD32 i4_distortion[4];
+
+ i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion;
+
+ i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion;
+
+ i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion;
+
+ i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion;
+
+ i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2;
+
+ }
+
+ /* If we are going to force intra we need to evaluate intra irrespective of gating */
+ if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
+ {
+ /* evaluate intra 4x4 modes */
+ if (u4_valid_modes & (1 << I4x4))
+ {
+ if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
+ {
+ ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc);
+ }
+ else
+ {
+ ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc);
+ }
+ }
+
+ /* evaluate intra 16x16 modes */
+ if (u4_valid_modes & (1 << I16x16))
+ {
+ ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc);
+ }
+
+ /* evaluate intra 8x8 modes */
+ if (u4_valid_modes & (1 << I8x8))
+ {
+ ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
+ }
+ }
+
+ }
+ }
+
+ /* is intra */
+ if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
+ {
+ luma_idx = ps_proc->u4_mb_type;
+ chroma_idx = 0;
+ is_intra = 1;
+
+ /* evaluate chroma blocks for intra */
+ ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
+ }
+ else
+ {
+ luma_idx = 3;
+ chroma_idx = 1;
+ is_intra = 0;
+ }
+ ps_proc->u4_is_intra = is_intra;
+
+ /* redo MV pred of neighbors in the case intra mb */
+ /* TODO : currently called unconditionally, needs to be called only in the case of intra
+ * to modify neighbors */
+ if (ps_proc->i4_slice_type != ISLICE)
+ {
+ ih264e_mv_pred(ps_proc);
+ }
+
+ /* Perform luma mb core coding */
+ u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc);
+
+ /* Perform luma mb core coding */
+ u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc);
+
+ /* coded block pattern */
+ ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
+
+ /* mb skip */
+ if (is_intra == 0)
+ {
+ if (ps_proc->u4_cbp == 0)
+ {
+ /* get skip mv */
+ UWORD32 u4_for_me = 0;
+ ih264e_find_skip_motion_vector(ps_proc,u4_for_me);
+
+ /* skip ? */
+ if (ps_proc->ps_skip_mv->i2_mvx == ps_proc->ps_pu->s_l0_mv.i2_mvx &&
+ ps_proc->ps_skip_mv->i2_mvy == ps_proc->ps_pu->s_l0_mv.i2_mvy)
+ {
+ ps_proc->u4_mb_type = PSKIP;
+ }
+ }
+ }
+
+UPDATE_MB_INFO:
+
+ /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */
+ ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc);
+
+ /**********************************************************************/
+ /* if disable deblock level is '0' this implies enable deblocking for */
+ /* all edges of all macroblocks with out any restrictions */
+ /* */
+ /* if disable deblock level is '1' this implies disable deblocking for*/
+ /* all edges of all macroblocks with out any restrictions */
+ /* */
+ /* if disable deblock level is '2' this implies enable deblocking for */
+ /* all edges of all macroblocks except edges overlapping with slice */
+ /* boundaries. This option is not currently supported by the encoder */
+ /* hence the slice map should be of no significance to perform debloc */
+ /* king */
+ /**********************************************************************/
+
+ if (ps_proc->u4_compute_recon)
+ {
+ /* deblk context */
+ /* src pointers */
+ UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma;
+ UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma;
+
+ /* src indices */
+ UWORD32 i4_mb_x = ps_proc->i4_mb_x;
+ UWORD32 i4_mb_y = ps_proc->i4_mb_y;
+
+ /* compute blocking strength */
+ if (ps_proc->u4_disable_deblock_level != 1)
+ {
+ ih264e_compute_bs(ps_proc);
+ }
+
+ /* nmb deblocking and hpel and padding */
+ ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma,
+ pu1_cur_pic_chroma, i4_mb_x,
+ i4_mb_y);
+ }
+
+ /* update the context after for coding next mb */
+ error_status |= ih264e_update_proc_ctxt(ps_proc);
+
+ /* Once the last row is processed, mark the buffer status appropriately */
+ if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y)
+ {
+ /* Pointer to current picture buffer structure */
+ pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic;
+
+ /* Pointer to current picture's mv buffer structure */
+ mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
+
+ /**********************************************************************/
+ /* if disable deblock level is '0' this implies enable deblocking for */
+ /* all edges of all macroblocks with out any restrictions */
+ /* */
+ /* if disable deblock level is '1' this implies disable deblocking for*/
+ /* all edges of all macroblocks with out any restrictions */
+ /* */
+ /* if disable deblock level is '2' this implies enable deblocking for */
+ /* all edges of all macroblocks except edges overlapping with slice */
+ /* boundaries. This option is not currently supported by the encoder */
+ /* hence the slice map should be of no significance to perform debloc */
+ /* king */
+ /**********************************************************************/
+ error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC);
+
+ error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC);
+
+ if (ps_codec->s_cfg.u4_enable_recon)
+ {
+ /* pic cnt */
+ ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt;
+
+ /* rec buffers */
+ ps_codec->as_rec_buf[ctxt_sel].s_pic_buf = *ps_proc->ps_cur_pic;
+
+ /* is last? */
+ ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last;
+
+ /* frame time stamp */
+ ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high;
+ ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low;
+ }
+
+ }
+ }
+
+ DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y);
+
+ return error_status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* function to receive frame qp and pic type before encoding
+*
+* @par Description:
+* Before encoding the frame, this function calls the rc library for frame qp
+* and picture type
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @param[in] pic_cnt
+* pic count
+*
+* @param[out] pi4_pic_type
+* pic type
+
+* @returns skip_src
+* if the source frame rate and target frame rate are not identical, the encoder
+* skips few source frames. skip_src is set when the source need not be encoded.
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+WORD32 ih264e_set_rc_pic_params(codec_t *ps_codec, WORD32 cur_pic_cnt, WORD32 *pi4_pic_type)
+{
+ /* rate control context */
+ rate_control_ctxt_t *ps_rate_control = &ps_codec->s_rate_control;
+
+ /* frame qp */
+ UWORD8 u1_frame_qp;
+
+ /* pic type */
+ PIC_TYPE_T pic_type = PIC_NA;
+
+ /* should src be skipped */
+ WORD32 skip_src = 0;
+
+ /* temp var */
+ WORD32 delta_time_stamp = 1;
+
+ /* see if the app requires any specific frame */
+ if (ps_codec->force_curr_frame_type == IV_IDR_FRAME || ps_codec->force_curr_frame_type == IV_I_FRAME)
+ {
+ irc_force_I_frame(ps_codec->s_rate_control.pps_rate_control_api);
+ }
+
+ /* call rate control lib to get curr pic type and qp to be used */
+ skip_src = ih264e_rc_pre_enc(ps_rate_control->pps_rate_control_api,
+ ps_rate_control->pps_pd_frm_rate,
+ ps_rate_control->pps_time_stamp,
+ ps_rate_control->pps_frame_time,
+ delta_time_stamp,
+ (ps_codec->s_cfg.i4_wd_mbs * ps_codec->s_cfg.i4_ht_mbs),
+ &ps_rate_control->e_pic_type,
+ &u1_frame_qp);
+
+ switch (ps_rate_control->e_pic_type)
+ {
+ case I_PIC:
+ pic_type = PIC_I;
+ break;
+
+ case P_PIC:
+ pic_type = PIC_P;
+ break;
+
+ case B_PIC:
+ pic_type = PIC_B;
+ break;
+
+ default:
+ break;
+ }
+
+ /* is idr? */
+ if ((0 == cur_pic_cnt % ps_codec->s_cfg.u4_idr_frm_interval) ||
+ ps_codec->force_curr_frame_type == IV_IDR_FRAME)
+ {
+ pic_type = PIC_IDR;
+ }
+
+ /* force frame tag is not sticky */
+ if (ps_codec->force_curr_frame_type == IV_IDR_FRAME || ps_codec->force_curr_frame_type == IV_I_FRAME)
+ {
+ ps_codec->force_curr_frame_type = IV_NA_FRAME;
+ }
+
+ /* qp */
+ ps_codec->u4_frame_qp = gau1_mpeg2_to_h264_qmap[u1_frame_qp];
+
+ /* pic type */
+ *pi4_pic_type = pic_type;
+
+ return skip_src;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to update rc context after encoding
+*
+* @par Description
+* This function updates the rate control context after the frame is encoded.
+* Number of bits consumed by the current frame, frame distortion, frame cost,
+* number of intra/inter mb's, ... are passed on to rate control context for
+* updating the rc model.
+*
+* @param[in] ps_codec
+* Handle to codec context
+*
+* @param[in] ctxt_sel
+* frame context selector
+*
+* @param[in] pic_cnt
+* pic count
+*
+* @returns i4_stuffing_byte
+* number of stuffing bytes (if necessary)
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 pic_cnt)
+{
+ /* proc set base idx */
+ WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
+
+ /* proc ctxt */
+ process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base];
+
+ /* frame qp */
+ UWORD8 u1_frame_qp = ps_codec->u4_frame_qp;
+
+ /* cbr rc return status */
+ WORD32 i4_stuffing_byte = 0;
+
+ /* current frame stats */
+ frame_info_t s_frame_info;
+ picture_type_e rc_pic_type;
+
+ /* temp var */
+ WORD32 i, j;
+
+ /********************************************************************/
+ /* BEGIN INIT */
+ /********************************************************************/
+
+ /* init frame info */
+ irc_init_frame_info(&s_frame_info);
+
+ /* get frame info */
+ for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
+ {
+ /*****************************************************************/
+ /* One frame can be encoded by max of u4_num_cores threads */
+ /* Accumulating the num mbs, sad, qp and intra_mb_cost from */
+ /* u4_num_cores threads */
+ /*****************************************************************/
+ for (j = 0; j< MAX_MB_TYPE; j++)
+ {
+ s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j];
+
+ s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j];
+
+ s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j];
+ }
+
+ s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum;
+
+ s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum;
+
+ /*****************************************************************/
+ /* gather number of residue and header bits consumed by the frame*/
+ /*****************************************************************/
+ ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy);
+ }
+
+ /* get pic type */
+ switch (ps_codec->pic_type)
+ {
+ case PIC_I:
+ case PIC_IDR:
+ rc_pic_type = I_PIC;
+ break;
+ case PIC_P:
+ rc_pic_type = P_PIC;
+ break;
+ case PIC_B:
+ rc_pic_type = B_PIC;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ /* update rc lib with current frame stats */
+ i4_stuffing_byte = ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api,
+ &(s_frame_info),
+ ps_codec->s_rate_control.pps_pd_frm_rate,
+ ps_codec->s_rate_control.pps_time_stamp,
+ ps_codec->s_rate_control.pps_frame_time,
+ (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
+ &rc_pic_type,
+ pic_cnt,
+ &ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
+ u1_frame_qp,
+ &ps_codec->s_rate_control.num_intra_in_prev_frame,
+ &ps_codec->s_rate_control.i4_avg_activity);
+
+ /* in case the frame needs to be skipped, the frame num should not be incremented */
+ if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
+ {
+ ps_codec->i4_frame_num --;
+ }
+
+ return i4_stuffing_byte;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* entry point of a spawned encoder thread
+*
+* @par Description:
+* The encoder thread dequeues a proc/entropy job from the encoder queue and
+* calls necessary routines.
+*
+* @param[in] pv_proc
+* Process context corresponding to the thread
+*
+* @returns error status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_process_thread(void *pv_proc)
+{
+ /* error status */
+ IH264_ERROR_T ret = IH264_SUCCESS;
+ WORD32 error_status = IH264_SUCCESS;
+
+ /* proc ctxt */
+ process_ctxt_t *ps_proc = pv_proc;
+
+ /* codec ctxt */
+ codec_t *ps_codec = ps_proc->ps_codec;
+
+ /* structure to represent a processing job entry */
+ job_t s_job;
+
+ /* blocking call : entropy dequeue is non-blocking till all
+ * the proc jobs are processed */
+ WORD32 is_blocking = 0;
+
+ /* set affinity */
+ ithread_set_affinity(ps_proc->i4_id);
+
+ while(1)
+ {
+ /* dequeue a job from the entropy queue */
+ {
+ int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
+
+ /* codec context selector */
+ WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
+
+ volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
+
+ /* have the lock */
+ if (error == 0)
+ {
+ if (*pu4_buf == 0)
+ {
+ /* no entropy threads are active, try dequeuing a job from the entropy queue */
+ ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking);
+ if (IH264_SUCCESS == ret)
+ {
+ *pu4_buf = 1;
+ ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
+ goto WORKER;
+ }
+ else if(is_blocking)
+ {
+ ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
+ break;
+ }
+ }
+ ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
+ }
+ }
+
+ /* dequeue a job from the process queue */
+ ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1);
+ if (IH264_SUCCESS != ret)
+ {
+ if(ps_proc->i4_id)
+ break;
+ else
+ {
+ is_blocking = 1;
+ continue;
+ }
+ }
+
+WORKER:
+ /* choose appropriate proc context based on proc_base_idx */
+ ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx];
+
+ switch (s_job.i4_cmd)
+ {
+ case CMD_PROCESS:
+ ps_proc->i4_mb_cnt = s_job.i2_mb_cnt;
+ ps_proc->i4_mb_x = s_job.i2_mb_x;
+ ps_proc->i4_mb_y = s_job.i2_mb_y;
+
+ /* init process context */
+ ih264e_init_proc_ctxt(ps_proc);
+
+ /* core code all mbs enlisted under the current job */
+ error_status |= ih264e_process(ps_proc);
+ break;
+
+ case CMD_ENTROPY:
+ ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x;
+ ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y;
+ ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt;
+
+ /* init entropy */
+ ih264e_init_entropy_ctxt(ps_proc);
+
+ /* entropy code all mbs enlisted under the current job */
+ error_status |= ih264e_entropy(ps_proc);
+ break;
+
+ default:
+ error_status |= IH264_FAIL;
+ break;
+ }
+ }
+
+ /* send error code */
+ ps_proc->i4_error_code = error_status;
+ return ret;
+}
diff --git a/encoder/ih264e_process.h b/encoder/ih264e_process.h
new file mode 100755
index 0000000..9715434
--- /dev/null
+++ b/encoder/ih264e_process.h
@@ -0,0 +1,364 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_process.h
+*
+* @brief
+* Contains functions for codec thread
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_PROCESS_H_
+#define IH264E_PROCESS_H_
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief This function generates sps, pps set on request
+*
+* @par Description
+* When the encoder is set in header generation mode, the following function
+* is called. This generates sps and pps headers and returns the control back
+* to caller.
+*
+* @param[in] ps_codec
+* pointer to codec context
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IH264E_ERROR_T ih264e_generate_sps_pps
+ (
+ codec_t *ps_codec
+ );
+
+/**
+*******************************************************************************
+*
+* @brief initialize entropy context.
+*
+* @par Description:
+* Before invoking the call to perform to entropy coding the entropy context
+* associated with the job needs to be initialized. This involves the start
+* mb address, end mb address, slice index and the pointer to location at
+* which the mb residue info and mb header info are packed.
+*
+* @param[in] ps_proc
+* Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief entry point for entropy coding
+*
+* @par Description
+* This function calls lower level functions to perform entropy coding for a
+* group (n rows) of mb's. After encoding 1 row of mb's, the function takes
+* back the control, updates the ctxt and calls lower level functions again.
+* This process is repeated till all the rows or group of mb's (which ever is
+* minimum) are coded
+*
+* @param[in] ps_proc
+* process context
+*
+* @returns error status
+*
+* @remarks
+* NOTE : It is assumed that this routine is invoked at the start of a slice,
+* so the slice header is generated by default.
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief Packs header information of a mb in to a buffer
+*
+* @par Description:
+* After the deciding the mode info of a macroblock, the syntax elements
+* associated with the mb are packed and stored. The entropy thread unpacks
+* this buffer and generates the end bit stream.
+*
+* @param[in] ps_proc
+* Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pack_header_data
+ (
+ process_ctxt_t *ps_proc
+ );
+
+/**
+*******************************************************************************
+*
+* @brief update process context after encoding an mb. This involves preserving
+* the current mb information for later use, initialize the proc ctxt elements to
+* encode next mb.
+*
+* @par Description:
+* This function performs house keeping tasks after encoding an mb.
+* After encoding an mb, various elements of the process context needs to be
+* updated to encode the next mb. For instance, the source, recon and reference
+* pointers, mb indices have to be adjusted to the next mb. The slice index of
+* the current mb needs to be updated. If mb qp modulation is enabled, then if
+* the qp changes the quant param structure needs to be updated. Also to encoding
+* the next mb, the current mb info is used as part of mode prediction or mv
+* prediction. Hence the current mb info has to preserved at top/top left/left
+* locations.
+*
+* @param[in] ps_proc
+* Pointer to the current process context
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+WORD32 ih264e_update_proc_ctxt
+ (
+ process_ctxt_t *ps_proc
+ );
+
+/**
+*******************************************************************************
+*
+* @brief initialize process context.
+*
+* @par Description:
+* Before dispatching the current job to process thread, the process context
+* associated with the job is initialized. Usually every job aims to encode one
+* row of mb's. Basing on the row indices provided by the job, the process
+* context's buffer ptrs, slice indices and other elements that are necessary
+* during core-coding are initialized.
+*
+* @param[in] ps_proc
+* Pointer to the current process context
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief This function performs luma & chroma padding
+*
+* @par Description:
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @param[in] pu1_curr_pic_luma
+* Pointer to luma buffer
+*
+* @param[in] pu1_curr_pic_chroma
+* Pointer to chroma buffer
+*
+* @param[in] i4_mb_x
+* mb index x
+*
+* @param[in] i4_mb_y
+* mb index y
+*
+* @param[in] i4_pad_ht
+* number of rows to be padded
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pad_recon_buffer
+ (
+ process_ctxt_t *ps_proc,
+ UWORD8 *pu1_curr_pic_luma,
+ UWORD8 *pu1_curr_pic_chroma,
+ WORD32 i4_mb_x,
+ WORD32 i4_mb_y,
+ WORD32 i4_pad_ht
+ );
+
+/**
+*******************************************************************************
+*
+* @brief This function performs luma half pel planes generation
+*
+* @par Description:
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_halfpel_generation
+ (
+ process_ctxt_t *ps_proc,
+ UWORD8 *pu1_curr_pic_luma,
+ WORD32 i4_mb_x,
+ WORD32 i4_mb_y
+ );
+
+/**
+*******************************************************************************
+*
+* @brief This function performs luma & chroma core coding for a set of mb's.
+*
+* @par Description:
+* The mb to be coded is taken and is evaluated over a predefined set of modes
+* (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
+* is selected and using intra/inter prediction filters, prediction is carried out.
+* The deviation between src and pred signal constitutes error signal. This error
+* signal is transformed (hierarchical transform if necessary) and quantized. The
+* quantized residue is packed in to entropy buffer for entropy coding. This is
+* repeated for all the mb's enlisted under the job.
+*
+* @param[in] ps_proc
+* Process context corresponding to the job
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+WORD32 ih264e_process(process_ctxt_t *ps_proc);
+
+/**
+*******************************************************************************
+*
+* @brief
+* function to receive frame qp and pic type before encoding
+*
+* @par Description:
+* Before encoding the frame, this function calls the rc library for frame qp
+* and picture type
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @param[in] pic_cnt
+* pic count
+*
+* @param[out] pi4_pic_type
+* pic type
+
+* @returns skip_src
+* if the source frame rate and target frame rate are not identical, the encoder
+* skips few source frames. skip_src is set when the source need not be encoded.
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+WORD32 ih264e_set_rc_pic_params(codec_t *ps_codec, WORD32 cur_pic_cnt, WORD32 *pi4_pic_type);
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to update rc context after encoding
+*
+* @par Description
+* This function updates the rate control context after the frame is encoded.
+* Number of bits consumed by the current frame, frame distortion, frame cost,
+* number of intra/inter mb's, ... are passed on to rate control context for
+* updating the rc model.
+*
+* @param[in] ps_codec
+* Handle to codec context
+*
+* @param[in] ctxt_sel
+* frame context selector
+*
+* @param[in] pic_cnt
+* pic count
+*
+* @returns i4_stuffing_byte
+* number of stuffing bytes (if necessary)
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 pic_cnt);
+
+/**
+*******************************************************************************
+*
+* @brief
+* entry point of a spawned encoder thread
+*
+* @par Description:
+* The encoder thread dequeues a proc/entropy job from the encoder queue and
+* calls necessary routines.
+*
+* @param[in] pv_proc
+* Process context corresponding to the thread
+*
+* @returns error status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_process_thread(void *pv_proc);
+
+#endif /* IH264E_PROCESS_H_ */
diff --git a/encoder/ih264e_rate_control.c b/encoder/ih264e_rate_control.c
new file mode 100755
index 0000000..1e2fe4f
--- /dev/null
+++ b/encoder/ih264e_rate_control.c
@@ -0,0 +1,801 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_rate_control.c
+*
+* @brief
+* Contains api function definitions for h264 rate control
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_rc_init()
+* - ih264e_rc_get_picture_details()
+* - ih264e_rc_pre_enc()
+* - ih264e_update_rc_mb_info()
+* - ih264e_rc_get_buffer_status()
+* - ih264e_rc_post_enc()
+* - ih264e_update_rc_bits_info()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ih264_defs.h"
+#include "ih264_macros.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_common_tables.h"
+#include "ih264e_defs.h"
+#include "ih264e_globals.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "irc_rate_control_api.h"
+#include "ih264e_time_stamp.h"
+#include "ih264e_modify_frm_rate.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264e_structs.h"
+#include "ih264e_utils.h"
+#include "irc_trace_support.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief This function does nothing
+*
+* @par Description
+* This function does nothing
+*
+* @param[in] variadic function
+
+* @returns none
+*
+* @remarks This function is used by the rc library for debugging purposes.
+* However this function was not part of rc library. So this is defined here
+* to resolve link issues.
+*
+*******************************************************************************
+*/
+int trace_printf(const WORD8 *format, ...)
+{
+ UNUSED(format);
+ return(0);
+};
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function initializes rate control context and variables
+*
+* @par Description
+* This function initializes rate control type, source and target frame rate,
+* average and peak bitrate, intra-inter frame interval and initial
+* quantization parameter
+*
+* @param[in] pv_rc_api
+* Handle to rate control api
+*
+* @param[in] pv_frame_time
+* Handle to frame time context
+*
+* @param[in] pv_time_stamp
+* Handle to time stamp context
+*
+* @param[in] pv_pd_frm_rate
+* Handle to pull down frame time context
+*
+* @param[in] u4_max_frm_rate
+* Maximum frame rate
+*
+* @param[in] u4_src_frm_rate
+* Source frame rate
+*
+* @param[in] u4_tgt_frm_rate
+* Target frame rate
+*
+* @param[in] e_rate_control_type
+* Rate control type
+*
+* @param[in] u4_avg_bit_rate
+* Average bit rate
+*
+* @param[in] u4_peak_bit_rate
+* Peak bit rate
+*
+* @param[in] u4_max_delay
+* Maximum delay between frames
+*
+* @param[in] u4_intra_frame_interval
+* Intra frame interval
+*
+* @param[in] pu1_init_qp
+* Initial qp
+*
+* @param[in] i4_max_inter_frm_int
+* Maximum inter frame interval
+*
+* @param[in] pu1_min_max_qp
+* Array of min/max qp
+*
+* @param[in] u1_profile_level
+* Encoder profile level
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_rc_init(void *pv_rc_api,
+ void *pv_frame_time,
+ void *pv_time_stamp,
+ void *pv_pd_frm_rate,
+ UWORD32 u4_max_frm_rate,
+ UWORD32 u4_src_frm_rate,
+ UWORD32 u4_tgt_frm_rate,
+ rc_type_e e_rate_control_type,
+ UWORD32 u4_avg_bit_rate,
+ UWORD32 u4_peak_bit_rate,
+ UWORD32 u4_max_delay,
+ UWORD32 u4_intra_frame_interval,
+ UWORD8 *pu1_init_qp,
+ WORD32 i4_max_inter_frm_int,
+ UWORD8 *pu1_min_max_qp,
+ UWORD8 u1_profile_level)
+{
+// UWORD8 u1_is_mb_level_rc_on = 0;
+ UWORD32 au4_peak_bit_rate[2] = {0,0};
+ UWORD32 u4_min_bit_rate = 0;
+ WORD32 i4_is_gop_closed = 0;
+// WORD32 i4_use_est_intra_sad = 1;
+ UWORD32 u4_src_ticks = 0;
+ UWORD32 u4_tgt_ticks = 0;
+ UWORD8 u1_level_idx = ih264e_get_lvl_idx(u1_profile_level);
+ UWORD32 u4_max_cpb_size = 1200 * gas_ih264_lvl_tbl[u1_level_idx].u4_max_cpb_size;
+
+ /* Fill the params needed for the RC init */
+ if (e_rate_control_type == CBR_NLDRC)
+ {
+ au4_peak_bit_rate[0] = u4_avg_bit_rate;
+ au4_peak_bit_rate[1] = u4_avg_bit_rate;
+ }
+ else
+ {
+ au4_peak_bit_rate[0] = u4_peak_bit_rate;
+ au4_peak_bit_rate[1] = u4_peak_bit_rate;
+ }
+
+ /* Initialize frame time computation module*/
+ ih264e_init_frame_time(pv_frame_time,
+ u4_src_frm_rate, /* u4_src_frm_rate */
+ u4_tgt_frm_rate); /* u4_tgt_frm_rate */
+
+ /* Initialize the pull_down frame rate */
+ ih264e_init_pd_frm_rate(pv_pd_frm_rate,
+ u4_src_frm_rate); /* u4_input_frm_rate */
+
+ /* Initialize time stamp structure */
+ ih264e_init_time_stamp(pv_time_stamp,
+ u4_max_frm_rate, /* u4_max_frm_rate */
+ u4_src_frm_rate); /* u4_src_frm_rate */
+
+ u4_src_ticks = ih264e_frame_time_get_src_ticks(pv_frame_time);
+ u4_tgt_ticks = ih264e_frame_time_get_tgt_ticks(pv_frame_time);
+
+ /* Initialize the rate control */
+ irc_initialise_rate_control(pv_rc_api, /* RC handle */
+ e_rate_control_type, /* RC algo type */
+ 0, /* MB activity on/off */
+ u4_avg_bit_rate, /* Avg Bitrate */
+ au4_peak_bit_rate, /* Peak bitrate array[2]:[I][P] */
+ u4_min_bit_rate, /* Min Bitrate */
+ u4_src_frm_rate, /* Src frame_rate */
+ u4_max_delay, /* Max buffer delay */
+ u4_intra_frame_interval, /* Intra frm_interval */
+ pu1_init_qp, /* Init QP array[3]:[I][P][B] */
+ u4_max_cpb_size, /* Max VBV/CPB Buffer Size */
+ i4_max_inter_frm_int, /* Max inter frm_interval */
+ i4_is_gop_closed, /* Open/Closed GOP */
+ pu1_min_max_qp, /* Min-max QP array[6]:[Imax][Imin][Pmax][Pmin][Bmax][Bmin] */
+ 0, /* How to calc the I-frame estimated_sad */
+ u4_src_ticks, /* Src_ticks = LCM(src_frm_rate,tgt_frm_rate)/src_frm_rate */
+ u4_tgt_ticks); /* Tgt_ticks = LCM(src_frm_rate,tgt_frm_rate)/tgt_frm_rate */
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get picture details
+*
+* @par Description
+* This function returns the Picture type(I/P/B)
+*
+* @param[in] pv_rc_api
+* Handle to Rate control api
+*
+* @returns
+* Picture type
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api)
+{
+ WORD32 i4_pic_id = 0;
+ WORD32 i4_pic_disp_order_no = 0;
+ picture_type_e e_rc_pic_type = P_PIC;
+
+ irc_get_picture_details(pv_rc_api, &i4_pic_id, &i4_pic_disp_order_no,
+ &e_rc_pic_type);
+
+ return (e_rc_pic_type);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get rate control output before encoding
+*
+* @par Description
+* This function is called before encoding the current frame and gets the qp
+* for the current frame from rate control module
+*
+* @param[in] ps_rate_control_api
+* Handle to rate control api
+*
+* @param[in] ps_pd_frm_rate
+* Handle to pull down frm rate context
+*
+* @param[in] ps_time_stamp
+* Handle to time stamp context
+*
+* @param[in] ps_frame_time
+* Handle to frame time context
+*
+* @param[in] i4_delta_time_stamp
+* Time stamp difference between frames
+*
+* @param[in] i4_total_mb_in_frame
+* Total Macro Blocks in frame
+*
+* @param[in/out] pe_vop_coding_type
+* Picture coding type(I/P/B)
+*
+* @param[in/out] pu1_frame_qp
+* QP for current frame
+*
+* @returns
+* Skip or encode the current frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_rc_pre_enc(void * ps_rate_control_api,
+ void * ps_pd_frm_rate,
+ void * ps_time_stamp,
+ void * ps_frame_time,
+ WORD32 i4_delta_time_stamp,
+ WORD32 i4_total_mb_in_frame,
+ picture_type_e *pe_vop_coding_type,
+ UWORD8 *pu1_frame_qp)
+{
+ WORD8 i4_skip_src = 0, i4_num_app_skips = 0;
+ UWORD32 u4_src_not_skipped_for_dts = 0;
+
+ /* Variables for the update_frm_level_info */
+ WORD32 ai4_tot_mb_in_type[MAX_MB_TYPE];
+ WORD32 ai4_tot_mb_type_qp[MAX_MB_TYPE] = {0, 0};
+ WORD32 ai4_mb_type_sad[MAX_MB_TYPE] = {0, 0};
+ WORD32 ai4_mb_type_tex_bits[MAX_MB_TYPE] = {0, 0};
+ WORD32 i4_total_frame_bits = 0;
+ WORD32 i4_total_hdr_bits = 0;
+ WORD32 i4_avg_mb_activity = 0;
+ WORD32 i4_intra_frm_cost = 0;
+ UWORD8 u1_is_scd = 0;
+
+ /* Set all the MBs to Intra */
+ ai4_tot_mb_in_type[0] = i4_total_mb_in_frame;
+ ai4_tot_mb_in_type[1] = 0;
+
+ /* If delta time stamp is greater than 1, do rcupdate that many times */
+ for (i4_num_app_skips = 0; (i4_num_app_skips < i4_delta_time_stamp - 1); i4_num_app_skips++)
+ {
+ /*update the missing frames frm_rate with 0 */
+ ih264e_update_pd_frm_rate(ps_pd_frm_rate,0);
+
+ /* Update the time stamp */
+ ih264e_update_time_stamp(ps_time_stamp);
+
+ /* Do a pre encode skip update */
+
+ irc_update_frame_level_info(ps_rate_control_api,
+ (*pe_vop_coding_type),
+ ai4_mb_type_sad, /* Frame level SAD for each type of MB[Intra/Inter] */
+ i4_total_frame_bits, /* Total frame bits actually consumed */
+ i4_total_hdr_bits, /*header bits for model updation*/
+ ai4_mb_type_tex_bits, /* Total texture bits consumed for each type of MB[Intra/Inter] used for model */
+ ai4_tot_mb_type_qp, /* Total qp of all MBs based on mb type */
+ ai4_tot_mb_in_type, /* total number of mbs in each mb type */
+ i4_avg_mb_activity, /* Average mb activity in frame */
+ u1_is_scd, /* Is a scene change detected at the current frame */
+ 1, /* If it's a pre-encode skip */
+ i4_intra_frm_cost, /* Sum of Intra cost for each frame */
+ 0); /* Is pic handling [irc_update_pic_handling_state] done before update */
+ }
+
+ /* Update the time stamp for the current frame */
+ ih264e_update_time_stamp(ps_time_stamp);
+
+ /* Check if a src not needs to be skipped */
+ i4_skip_src = ih264e_should_src_be_skipped(ps_frame_time,
+ i4_delta_time_stamp,
+ &u4_src_not_skipped_for_dts);
+
+ /***********************************************************************
+ Based on difference in source and target frame rate frames are skipped
+ ***********************************************************************/
+ if (i4_skip_src)
+ {
+ /*update the missing frames frm_rate with 0 */
+ ih264e_update_pd_frm_rate(ps_pd_frm_rate,0);
+
+ /* Do a pre encode skip update */
+ irc_update_frame_level_info(ps_rate_control_api,
+ (*pe_vop_coding_type),
+ ai4_mb_type_sad, /* Frame level SAD for each type of MB[Intra/Inter] */
+ i4_total_frame_bits, /* Total frame bits actually consumed */
+ i4_total_hdr_bits, /*header bits for model updation*/
+ ai4_mb_type_tex_bits, /* Total texture bits consumed for each type of MB[Intra/Inter] used for model */
+ ai4_tot_mb_type_qp, /* Total qp of all MBs based on mb type */
+ ai4_tot_mb_in_type, /* total number of mbs in each mb type */
+ i4_avg_mb_activity, /* Average mb activity in frame */
+ u1_is_scd, /* Is a scene change detected at the current frame */
+ 1, /* If it's a pre-encode skip */
+ i4_intra_frm_cost, /* Sum of Intra cost for each frame */
+ 0); /* Is pic handling [irc_update_pic_handling_state] done before update */
+
+ /* Set the current frame type to NA */
+ *pe_vop_coding_type = BUF_PIC;
+ }
+ else
+ {
+#define MAX_FRAME_BITS 0x7FFFFFFF
+// WORD32 i4_pic_id;
+// WORD32 i4_pic_disp_order_no;
+ WORD32 i4_avg_frm_rate, i4_source_frame_rate;
+
+ i4_source_frame_rate = ih264e_frame_time_get_src_frame_rate(ps_frame_time);
+
+ /* Update the frame rate of the frame present with the tgt_frm_rate */
+ /* If the frm was not skipped due to delta_time_stamp, update the
+ frame_rate with double the tgt_frame_rate value, so that it makes
+ up for one of the frames skipped by the application */
+ ih264e_update_pd_frm_rate(ps_pd_frm_rate,
+ i4_source_frame_rate);
+
+ /* Based on the update get the average frame rate */
+ i4_avg_frm_rate = ih264e_get_pd_avg_frm_rate(ps_pd_frm_rate);
+
+ /* Call the RC library function to change the frame_rate to the
+ actually achieved frm_rate */
+ irc_change_frm_rate_for_bit_alloc(ps_rate_control_api, i4_avg_frm_rate);
+
+ /* --------Rate control related things. Get pic type and frame Qp---------*/
+ /* Add picture to the stack. For IPP encoder we push the variable
+ into the stack and get back the variables by requesting RC.
+ This interface is designed for IPB encoder */
+ irc_add_picture_to_stack(ps_rate_control_api, 1);
+
+ /* Query the picture_type */
+ *pe_vop_coding_type = ih264e_rc_get_picture_details(ps_rate_control_api);
+
+ /* Get current frame Qp */
+ pu1_frame_qp[0] = (UWORD8)irc_get_frame_level_qp(ps_rate_control_api,
+ (picture_type_e)(pe_vop_coding_type[0]),
+ MAX_FRAME_BITS);
+ }
+
+ return(i4_skip_src);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update mb info for rate control context
+*
+* @par Description
+* After encoding a mb, information such as mb type, qp used, mb distortion
+* resulted in encoding the block and so on needs to be preserved for modeling
+* RC. This is preserved via this function call.
+*
+* @param[in] ps_frame_info
+* Handle Frame info context
+*
+* @param[in] ps_proc
+* Process context
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_rc_mb_info(frame_info_t *ps_frame_info, void *pv_proc)
+{
+ /* proc ctxt */
+ process_ctxt_t *ps_proc = pv_proc;
+
+ /* is intra or inter */
+ WORD32 mb_type = !ps_proc->u4_is_intra;
+
+ /* distortion */
+ ps_frame_info->tot_mb_sad[mb_type] += ps_proc->i4_mb_distortion;
+
+ /* qp */
+ ps_frame_info->qp_sum[mb_type] += gau1_h264_to_mpeg2_qmap[ps_proc->u4_mb_qp];
+
+ /* mb cnt */
+ ps_frame_info->num_mbs[mb_type]++;
+
+ /* cost */
+ if (ps_proc->u4_is_intra)
+ {
+ ps_frame_info->intra_mb_cost_sum += ps_proc->i4_mb_cost;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get rate control buffer status
+*
+* @par Description
+* This function is used to get buffer status(underflow/overflow) by rate
+* control module
+*
+* @param[in] pv_rc_api
+* Handle to rate control api context
+*
+* @param[in] i4_total_frame_bits
+* Total frame bits
+*
+* @param[in] u1_pic_type
+* Picture type
+*
+* @param[in] pi4_num_bits_to_prevent_vbv_underflow
+* Number of bits to prevent underflow
+*
+* @param[out] pu1_is_enc_buf_overflow
+* Buffer overflow indication flag
+*
+* @param[out] pu1_is_enc_buf_underflow
+* Buffer underflow indication flag
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_rc_get_buffer_status(void *pv_rc_api,
+ WORD32 i4_total_frame_bits,
+ picture_type_e e_pic_type,
+ WORD32 *pi4_num_bits_to_prevent_vbv_underflow,
+ UWORD8 *pu1_is_enc_buf_overflow,
+ UWORD8 *pu1_is_enc_buf_underflow)
+{
+ vbv_buf_status_e e_vbv_buf_status = VBV_NORMAL;
+
+ e_vbv_buf_status = irc_get_buffer_status(pv_rc_api,
+ i4_total_frame_bits,
+ e_pic_type,
+ pi4_num_bits_to_prevent_vbv_underflow);
+
+ if (e_vbv_buf_status == VBV_OVERFLOW)
+ {
+ *pu1_is_enc_buf_underflow = 1;
+ *pu1_is_enc_buf_overflow = 0;
+ }
+ else if (e_vbv_buf_status == VBV_UNDERFLOW)
+ {
+ *pu1_is_enc_buf_underflow = 0;
+ *pu1_is_enc_buf_overflow = 1;
+ }
+ else
+ {
+ *pu1_is_enc_buf_underflow = 0;
+ *pu1_is_enc_buf_overflow = 0;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update rate control module after encoding
+*
+* @par Description
+* This function is used to update the rate control module after the current
+* frame encoding is done with details such as bits consumed, SAD for I/P/B,
+* intra cost ,mb type and other
+*
+* @param[in] ps_rate_control_api
+* Handle to rate control api context
+*
+* @param[in] ps_frame_info
+* Handle to frame info context
+*
+* @param[in] ps_pd_frm_rate
+* Handle to pull down frame rate context
+*
+* @param[in] ps_time_stamp
+* Handle to time stamp context
+*
+* @param[in] ps_frame_time
+* Handle to frame time context
+*
+* @param[in] i4_total_mb_in_frame
+* Total mb in frame
+*
+* @param[in] pe_vop_coding_type
+* Picture coding type
+*
+* @param[in] i4_is_first_frame
+* Is first frame
+*
+* @param[in] pi4_is_post_encode_skip
+* Post encoding skip flag
+*
+* @param[in] u1_frame_qp
+* Frame qp
+*
+* @param[in] pi4_num_intra_in_prev_frame
+* Numberf of intra mbs in previous frame
+*
+* @param[in] pi4_avg_activity
+* Average activity
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_rc_post_enc(void * ps_rate_control_api,
+ frame_info_t *ps_frame_info,
+ void * ps_pd_frm_rate,
+ void * ps_time_stamp,
+ void * ps_frame_time,
+ WORD32 i4_total_mb_in_frame,
+ picture_type_e *pe_vop_coding_type,
+ WORD32 i4_is_first_frame,
+ WORD32 *pi4_is_post_encode_skip,
+ UWORD8 u1_frame_qp,
+ WORD32 *pi4_num_intra_in_prev_frame,
+ WORD32 *pi4_avg_activity)
+{
+ /* Variables for the update_frm_level_info */
+ WORD32 ai4_tot_mb_in_type[MAX_MB_TYPE];
+ WORD32 ai4_tot_mb_type_qp[MAX_MB_TYPE] = {0, 0};
+ WORD32 ai4_mb_type_sad[MAX_MB_TYPE] = {0, 0};
+ WORD32 ai4_mb_type_tex_bits[MAX_MB_TYPE] = {0, 0};
+ WORD32 i4_total_frame_bits = 0;
+ WORD32 i4_total_hdr_bits = 0;
+ WORD32 i4_total_texturebits;
+ WORD32 i4_avg_mb_activity = 0;
+ WORD32 i4_intra_frm_cost = 0;
+ UWORD8 u1_is_scd = 0;
+ WORD32 i4_cbr_bits_to_stuff = 0;
+ UWORD32 u4_num_intra_in_prev_frame = *pi4_num_intra_in_prev_frame;
+ UNUSED(ps_pd_frm_rate);
+ UNUSED(ps_time_stamp);
+ UNUSED(ps_frame_time);
+ UNUSED(u1_frame_qp);
+ /* Accumulate RC stats */
+ ai4_tot_mb_in_type[MB_TYPE_INTRA] = irc_fi_get_total_mb(ps_frame_info,MB_TYPE_INTRA);
+ ai4_tot_mb_in_type[MB_TYPE_INTER] = irc_fi_get_total_mb(ps_frame_info,MB_TYPE_INTER);
+ /* ai4_tot_mb_type_qp[MB_TYPE_INTRA] = 0;
+ ai4_tot_mb_type_qp[MB_TYPE_INTER] = ps_enc->pu1_h264_mpg2quant[u1_frame_qp] * i4_total_mb_in_frame;*/
+ ai4_tot_mb_type_qp[MB_TYPE_INTRA] = irc_fi_get_total_mb_qp(ps_frame_info,MB_TYPE_INTRA);
+ ai4_tot_mb_type_qp[MB_TYPE_INTER] = irc_fi_get_total_mb_qp(ps_frame_info,MB_TYPE_INTER);
+ ai4_mb_type_sad[MB_TYPE_INTRA] = irc_fi_get_total_mb_sad(ps_frame_info,MB_TYPE_INTRA);
+ ai4_mb_type_sad[MB_TYPE_INTER] = irc_fi_get_total_mb_sad(ps_frame_info,MB_TYPE_INTER);
+ i4_intra_frm_cost = irc_fi_get_total_intra_mb_cost(ps_frame_info);
+ i4_avg_mb_activity = irc_fi_get_avg_activity(ps_frame_info);
+ i4_total_hdr_bits = irc_fi_get_total_header_bits(ps_frame_info);
+ i4_total_texturebits = irc_fi_get_total_mb_texture_bits(ps_frame_info,MB_TYPE_INTRA);
+ i4_total_texturebits += irc_fi_get_total_mb_texture_bits(ps_frame_info,MB_TYPE_INTER);
+ i4_total_frame_bits = i4_total_hdr_bits + i4_total_texturebits ;
+
+ *pi4_avg_activity = i4_avg_mb_activity;
+
+
+ /* Texture bits are not accumulated. Hence subtracting hdr bits from total bits */
+ ai4_mb_type_tex_bits[MB_TYPE_INTRA] = 0;
+ ai4_mb_type_tex_bits[MB_TYPE_INTER] = i4_total_frame_bits - i4_total_hdr_bits;
+
+ /* Set post encode skip to zero */
+ pi4_is_post_encode_skip[0]= 0;
+
+ /* For NLDRC, get the buffer status for stuffing or skipping */
+ if (irc_get_rc_type(ps_rate_control_api) == CBR_NLDRC)
+ {
+ WORD32 i4_get_num_bit_to_prevent_vbv_overflow;
+ UWORD8 u1_enc_buf_overflow,u1_enc_buf_underflow;
+
+ /* Getting the buffer status */
+ ih264e_rc_get_buffer_status(ps_rate_control_api, i4_total_frame_bits,
+ pe_vop_coding_type[0], &i4_get_num_bit_to_prevent_vbv_overflow,
+ &u1_enc_buf_overflow,&u1_enc_buf_underflow);
+
+ /* We skip the frame if decoder buffer is underflowing. But we never skip first I frame */
+ // if((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 1))
+ if ((u1_enc_buf_overflow == 1) && (i4_is_first_frame != 0))
+ {
+ irc_post_encode_frame_skip(ps_rate_control_api, (picture_type_e)pe_vop_coding_type[0]);
+ // i4_total_frame_bits = imp4_write_skip_frame_header(ps_enc);
+ i4_total_frame_bits = 0;
+
+ *pi4_is_post_encode_skip = 1;
+
+ /* Adjust the GOP if in case we skipped an I-frame */
+ if (*pe_vop_coding_type == I_PIC)
+ irc_force_I_frame(ps_rate_control_api);
+
+ /* Since this frame is skipped by writing 7 bytes header, we say this is a P frame */
+ // *pe_vop_coding_type = P;
+
+ /* Getting the buffer status again,to check if it underflows */
+ irc_get_buffer_status(ps_rate_control_api, i4_total_frame_bits,
+ (picture_type_e)pe_vop_coding_type[0], &i4_get_num_bit_to_prevent_vbv_overflow);
+
+ }
+
+ /* In this case we stuff bytes as buffer is overflowing */
+ if (u1_enc_buf_underflow == 1)
+ {
+ /* The stuffing function is directly pulled out from split controller workspace.
+ encode_vop_data() function makes sure alignment data is dumped at the end of a
+ frame. Split controller was identifying this alignment byte, overwriting it with
+ the stuff data and then finally aligning the buffer. Here every thing is inside
+ the DSP. So, ideally encode_vop_data needn't align, and we can start stuffing directly.
+ But in that case, it'll break the logic for a normal frame.
+ Hence for simplicity, not changing this part since it is ok to align and
+ then overwrite since stuffing is not done for every frame */
+ i4_cbr_bits_to_stuff = irc_get_bits_to_stuff(ps_rate_control_api, i4_total_frame_bits, pe_vop_coding_type[0]);
+
+ /* Just add extra 32 bits to make sure we don't stuff lesser */
+ i4_cbr_bits_to_stuff += 32;
+
+ /* We can not stuff more than the outbuf size. So have a check here */
+ /* Add stuffed bits to total bits */
+ i4_total_frame_bits += i4_cbr_bits_to_stuff;
+ }
+ }
+
+#define ENABLE_SCD 1
+#if ENABLE_SCD
+ /* If number of intra MBs are more than 2/3rd of total MBs, assume it as a scene change */
+ if ((ai4_tot_mb_in_type[MB_TYPE_INTRA] > ((2 * i4_total_mb_in_frame) / 3)) &&
+ (*pe_vop_coding_type == P_PIC) &&
+ (ai4_tot_mb_in_type[MB_TYPE_INTRA] > ((11 * (WORD32)u4_num_intra_in_prev_frame) / 10)))
+ {
+ u1_is_scd = 1;
+ }
+#endif
+
+ /* Update num intra mbs of this frame */
+ if (pi4_is_post_encode_skip[0] == 0)
+ {
+ *pi4_num_intra_in_prev_frame = ai4_tot_mb_in_type[MB_TYPE_INTRA];
+ }
+
+ /* Reset intra count to zero, if u encounter an I frame */
+ if (*pe_vop_coding_type == I_PIC)
+ {
+ *pi4_num_intra_in_prev_frame = 0;
+ }
+
+ /* Do an update of rate control after post encode */
+ irc_update_frame_level_info(ps_rate_control_api, /* RC state */
+ pe_vop_coding_type[0], /* PIC type */
+ ai4_mb_type_sad, /* SAD for [Intra/Inter] */
+ i4_total_frame_bits, /* Total frame bits */
+ i4_total_hdr_bits, /* header bits for */
+ ai4_mb_type_tex_bits, /* for MB[Intra/Inter] */
+ ai4_tot_mb_type_qp, /* for MB[Intra/Inter] */
+ ai4_tot_mb_in_type, /* for MB[Intra/Inter] */
+ i4_avg_mb_activity, /* Average mb activity in frame */
+ u1_is_scd, /* Is a scene change detected */
+ 0, /* Pre encode skip */
+ (WORD32)i4_intra_frm_cost, /* Intra cost for frame */
+ 0); /* Not done outside */
+
+ return (i4_cbr_bits_to_stuff >> 3);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update bits consumed info to rate control context
+*
+* @par Description
+* Function to update bits consume info to rate control context
+*
+* @param[in] ps_frame_info
+* Frame info context
+*
+* @param[in] ps_entropy
+* Entropy context
+*
+* @returns
+* total bits consumed by the frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_rc_bits_info(frame_info_t *ps_frame_info, void *pv_entropy)
+{
+ entropy_ctxt_t *ps_entropy = pv_entropy;
+
+ ps_frame_info->mb_header_bits[MB_TYPE_INTRA] += ps_entropy->u4_header_bits[MB_TYPE_INTRA];
+
+ ps_frame_info->mb_texture_bits[MB_TYPE_INTRA] += ps_entropy->u4_residue_bits[MB_TYPE_INTRA];
+
+ ps_frame_info->mb_header_bits[MB_TYPE_INTER] += ps_entropy->u4_header_bits[MB_TYPE_INTER];
+
+ ps_frame_info->mb_texture_bits[MB_TYPE_INTER] += ps_entropy->u4_residue_bits[MB_TYPE_INTER];
+
+ return;
+}
+
diff --git a/encoder/ih264e_rate_control.h b/encoder/ih264e_rate_control.h
new file mode 100755
index 0000000..de9466a
--- /dev/null
+++ b/encoder/ih264e_rate_control.h
@@ -0,0 +1,351 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_rate_control.h
+*
+* @brief
+* This file contains function declarations of api functions for h264 rate
+* control
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_RATE_CONTROL_H_
+#define IH264E_RATE_CONTROL_H_
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function initializes rate control context and variables
+*
+* @par Description
+* This function initializes rate control type, source and target frame rate,
+* average and peak bitrate, intra-inter frame interval and initial
+* quantization parameter
+*
+* @param[in] pv_rc_api
+* Handle to rate control api
+*
+* @param[in] pv_frame_time
+* Handle to frame time context
+*
+* @param[in] pv_time_stamp
+* Handle to time stamp context
+*
+* @param[in] pv_pd_frm_rate
+* Handle to pull down frame time context
+*
+* @param[in] u4_max_frm_rate
+* Maximum frame rate
+*
+* @param[in] u4_src_frm_rate
+* Source frame rate
+*
+* @param[in] u4_tgt_frm_rate
+* Target frame rate
+*
+* @param[in] e_rate_control_type
+* Rate control type
+*
+* @param[in] u4_avg_bit_rate
+* Average bit rate
+*
+* @param[in] u4_peak_bit_rate
+* Peak bit rate
+*
+* @param[in] u4_max_delay
+* Maximum delay between frames
+*
+* @param[in] u4_intra_frame_interval
+* Intra frame interval
+*
+* @param[in] pu1_init_qp
+* Initial qp
+*
+* @param[in] i4_max_inter_frm_int
+* Maximum inter frame interval
+*
+* @param[in] pu1_min_max_qp
+* Array of min/max qp
+*
+* @param[in] u1_profile_level
+* Encoder profile level
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_rc_init(void *pv_rc_api,
+ void *pv_frame_time,
+ void *pv_time_stamp,
+ void *pv_pd_frm_rate,
+ UWORD32 u4_max_frm_rate,
+ UWORD32 u4_src_frm_rate,
+ UWORD32 u4_tgt_frm_rate,
+ rc_type_e e_rate_control_type,
+ UWORD32 u4_avg_bit_rate,
+ UWORD32 u4_peak_bit_rate,
+ UWORD32 u4_max_delay,
+ UWORD32 u4_intra_frame_interval,
+ UWORD8 *pu1_init_qp,
+ WORD32 i4_max_inter_frm_int,
+ UWORD8 *pu1_min_max_qp,
+ UWORD8 u1_profile_level);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get picture details
+*
+* @par Description
+* This function returns the Picture type(I/P/B)
+*
+* @param[in] pv_rc_api
+* Handle to Rate control api
+*
+* @returns
+* Picture type
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+picture_type_e ih264e_rc_get_picture_details(void *pv_rc_api);
+
+
+/**
+*******************************************************************************
+*
+* @brief Function to get rate control output before encoding
+*
+* @par Description
+* This function is called before encoding the current frame and gets the qp
+* for the current frame from rate control module
+*
+* @param[in] ps_rate_control_api
+* Handle to rate control api
+*
+* @param[in] ps_pd_frm_rate
+* Handle to pull down frm rate context
+*
+* @param[in] ps_time_stamp
+* Handle to time stamp context
+*
+* @param[in] ps_frame_time
+* Handle to frame time context
+*
+* @param[in] i4_delta_time_stamp
+* Time stamp difference between frames
+*
+* @param[in] i4_total_mb_in_frame
+* Total Macro Blocks in frame
+*
+* @param[in/out] pe_vop_coding_type
+* Picture coding type(I/P/B)
+*
+* @param[in/out] pu1_frame_qp
+* QP for current frame
+*
+* @returns
+* Skip or encode the current frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_rc_pre_enc(void * ps_rate_control_api,
+ void * ps_pd_frm_rate,
+ void * ps_time_stamp,
+ void * ps_frame_time,
+ WORD32 i4_delta_time_stamp,
+ WORD32 i4_total_mb_in_frame,
+ picture_type_e *pe_vop_coding_type,
+ UWORD8 *pu1_frame_qp);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update mb info for rate control context
+*
+* @par Description
+* After encoding a mb, information such as mb type, qp used, mb distortion
+* resulted in encoding the block and so on needs to be preserved for modelling
+* RC. This is preserved via this function call.
+*
+* @param[in] ps_frame_info
+* Handle Frame info context
+*
+* @param[in] ps_proc
+* Process context
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_rc_mb_info(frame_info_t *ps_frame_info, void *pv_proc);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get rate control buffer status
+*
+* @par Description
+* This function is used to get buffer status(underflow/overflow) by rate
+* control module
+*
+* @param[in] pv_rc_api
+* Handle to rate control api context
+*
+* @param[in] i4_total_frame_bits
+* Total frame bits
+*
+* @param[in] u1_pic_type
+* Picture type
+*
+* @param[in] pi4_num_bits_to_prevent_vbv_underflow
+* Number of bits to prevent underflow
+*
+* @param[out] pu1_is_enc_buf_overflow
+* Buffer overflow indication flag
+*
+* @param[out] pu1_is_enc_buf_underflow
+* Buffer underflow indication flag
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_rc_get_buffer_status(void *pv_rc_api,
+ WORD32 i4_total_frame_bits,
+ picture_type_e e_pic_type,
+ WORD32 *pi4_num_bits_to_prevent_vbv_underflow,
+ UWORD8 *pu1_is_enc_buf_overflow,
+ UWORD8 *pu1_is_enc_buf_underflow);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update rate control module after encoding
+*
+* @par Description
+* This function is used to update the rate control module after the current
+* frame encoding is done with details such as bits consumed, SAD for I/P/B,
+* intra cost ,mb type and other
+*
+* @param[in] ps_rate_control_api
+* Handle to rate control api context
+*
+* @param[in] ps_frame_info
+* Handle to frame info context
+*
+* @param[in] ps_pd_frm_rate
+* Handle to pull down frame rate context
+*
+* @param[in] ps_time_stamp
+* Handle to time stamp context
+*
+* @param[in] ps_frame_time
+* Handle to frame time context
+*
+* @param[in] i4_total_mb_in_frame
+* Total mb in frame
+*
+* @param[in] pe_vop_coding_type
+* Picture coding type
+*
+* @param[in] i4_is_first_frame
+* Is first frame
+*
+* @param[in] pi4_is_post_encode_skip
+* Post encoding skip flag
+*
+* @param[in] u1_frame_qp
+* Frame qp
+*
+* @param[in] pi4_num_intra_in_prev_frame
+* Number of intra mbs in previous frame
+*
+* @param[in] pi4_avg_activity
+* Average activity
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_rc_post_enc(void *ps_rate_control_api,
+ frame_info_t *ps_frame_info,
+ void *ps_pd_frm_rate,
+ void *ps_time_stamp,
+ void *ps_frame_time,
+ WORD32 i4_total_mb_in_frame,
+ picture_type_e *pe_vop_coding_type,
+ WORD32 i4_is_first_frame,
+ WORD32 *pi4_is_post_encode_skip,
+ UWORD8 u1_frame_qp,
+ WORD32 *pi4_num_intra_in_prev_frame,
+ WORD32 *pi4_avg_activity);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update bits consumed info to rate control context
+*
+* @par Description
+* Function to update bits consume info to rate control context
+*
+* @param[in] ps_frame_info
+* Frame info context
+*
+* @param[in] ps_entropy
+* Entropy context
+*
+* @returns
+* total bits consumed by the frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_rc_bits_info(frame_info_t *ps_frame_info, void *pv_entropy);
+
+#endif /* IH264E_RATE_CONTROL_H */
+
diff --git a/encoder/ih264e_rc_mem_interface.c b/encoder/ih264e_rc_mem_interface.c
new file mode 100755
index 0000000..e4d5781
--- /dev/null
+++ b/encoder/ih264e_rc_mem_interface.c
@@ -0,0 +1,395 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_rc_mem_interface.c
+*
+* @brief
+* This file contains api function definitions for rate control memtabs
+*
+* @author
+* ittiam
+*
+* List of Functions
+* - fill_memtab()
+* - use_or_fill_base()
+* - ih264e_map_rc_mem_recs_to_itt_api()
+* - ih264e_map_itt_mem_rec_to_rc_mem_rec()
+* - ih264e_get_rate_control_mem_tab()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User Include Files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "ih264_size_defs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264e.h"
+#include "ithread.h"
+#include "ih264e.h"
+#include "ih264_defs.h"
+#include "ih264_debug.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_common_tables.h"
+#include "ih264_list.h"
+#include "ih264e_error.h"
+#include "ih264e_defs.h"
+#include "ih264e_bitstream.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_master.h"
+#include "ih264_buf_mgr.h"
+#include "ih264_dpb_mgr.h"
+#include "ih264e_utils.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_config.h"
+#include "ih264e_statistics.h"
+#include "ih264e_trace.h"
+#include "ih264e_statistics.h"
+#include "ih264e_error.h"
+#include "ih264e_utils.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_cavlc.h"
+#include "ih264e_rc_mem_interface.h"
+#include "ih264e_time_stamp.h"
+#include "irc_common.h"
+#include "irc_rd_model.h"
+#include "irc_est_sad.h"
+#include "irc_fixed_point_error_bits.h"
+#include "irc_vbr_storage_vbv.h"
+#include "irc_picture_type.h"
+#include "irc_bit_allocation.h"
+#include "irc_mb_model_based.h"
+#include "irc_cbr_buffer_control.h"
+#include "irc_vbr_str_prms.h"
+#include "irc_rate_control_api.h"
+#include "irc_rate_control_api_structs.h"
+#include "ih264e_modify_frm_rate.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief This function fills memory record attributes
+*
+* @par Description
+* This function fills memory record attributes
+*
+* @param[in] ps_mem_tab
+* pointer to mem records
+*
+* @param[in] u4_size
+* size of the record
+*
+* @param[in] i4_alignment
+* memory alignment size
+*
+* @param[in] e_usage
+* usage
+*
+* @param[in] e_mem_region
+* mem region
+*
+* @return void
+*
+******************************************************************************
+*/
+void fill_memtab(itt_memtab_t *ps_mem_tab,
+ WORD32 u4_size,
+ WORD32 i4_alignment,
+ ITT_MEM_USAGE_TYPE_E e_usage,
+ ITT_MEM_REGION_E e_mem_region)
+{
+ /* Make the size next multiple of alignment */
+ WORD32 i4_aligned_size = (((u4_size) + (i4_alignment-1)) & (~(i4_alignment-1)));
+
+ /* Fill the memtab */
+ ps_mem_tab->u4_size = i4_aligned_size;
+ ps_mem_tab->i4_alignment = i4_alignment;
+ ps_mem_tab->e_usage = e_usage;
+ ps_mem_tab->e_mem_region = e_mem_region;
+}
+
+/**
+******************************************************************************
+*
+* @brief This function fills memory record attributes
+*
+* @par Description
+* This function fills memory record attributes
+*
+* @param[in] ps_mem_tab
+* pointer to mem records
+*
+* @param[in] ptr_to_be_filled
+* handle to the memory record storage space
+*
+* @param[in] e_func_type
+* enum that dictates fill memory records or use memory records
+*
+* @return void
+*
+******************************************************************************
+*/
+WORD32 use_or_fill_base(itt_memtab_t *ps_mem_tab,
+ void **ptr_to_be_filled,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ /* Fill base for freeing the allocated memory */
+ if (e_func_type == FILL_BASE)
+ {
+ if (ptr_to_be_filled[0] != 0)
+ {
+ ps_mem_tab->pv_base = ptr_to_be_filled[0];
+ return (0);
+ }
+ else
+ {
+ return (-1);
+ }
+ }
+ /* obtain the allocated memory from base pointer */
+ if (e_func_type == USE_BASE)
+ {
+ if (ps_mem_tab->pv_base != 0)
+ {
+ ptr_to_be_filled[0] = ps_mem_tab->pv_base;
+ return (0);
+ }
+ else
+ {
+ return (-1);
+ }
+ }
+ return (0);
+}
+
+/**
+******************************************************************************
+*
+* @brief This function maps rc mem records structure to encoder lib mem records
+* structure
+*
+* @par Description
+* This function maps rc mem records structure to encoder lib mem records
+* structure
+*
+* @param[in] ps_mem
+* pointer to encoder lib mem records
+*
+* @param[in] rc_memtab
+* pointer to rc mem records
+*
+* @param[in] num_mem_recs
+* number of memory records
+*
+* @return void
+*
+******************************************************************************
+*/
+void ih264e_map_rc_mem_recs_to_itt_api(iv_mem_rec_t *ps_mem,
+ itt_memtab_t *rc_memtab,
+ UWORD32 num_mem_recs)
+{
+ UWORD32 j;
+ UWORD32 Size, align;
+
+ for (j = 0; j < num_mem_recs; j++)
+ {
+ Size = rc_memtab->u4_size;
+ align = rc_memtab->i4_alignment;
+
+ /* we always ask for external persistent cacheable memory */
+ FILL_MEMTAB(ps_mem, j, Size, align, IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM);
+
+ rc_memtab++;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief This function maps encoder lib mem records structure to RC memory
+* records structure
+*
+* @par Description
+* This function maps encoder lib mem records structure to RC memory
+* records structure
+*
+* @param[in] ps_mem
+* pointer to encoder lib mem records
+*
+* @param[in] rc_memtab
+* pointer to rc mem records
+*
+* @param[in] num_mem_recs
+* Number of memory records
+
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_map_itt_mem_rec_to_rc_mem_rec(iv_mem_rec_t *ps_mem,
+ itt_memtab_t *rc_memtab,
+ UWORD32 num_mem_recs)
+{
+ UWORD32 i;
+
+ for (i = 0; i < num_mem_recs; i++)
+ {
+ rc_memtab->i4_alignment = ps_mem->u4_mem_alignment;
+ rc_memtab->u4_size = ps_mem->u4_mem_size;
+ rc_memtab->pv_base = ps_mem->pv_base;
+
+ /* only DDR memory is available */
+ rc_memtab->e_mem_region = DDR;
+ rc_memtab->e_usage = PERSISTENT;
+
+ rc_memtab++;
+ ps_mem++;
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief Get memtabs for rate control
+*
+* @par Description
+* This routine is used to Get/init memtabs for rate control
+*
+* @param[in] pv_rate_control
+* pointer to rate control context (handle)
+*
+* @param[in] ps_mem
+* pointer to encoder lib mem records
+*
+* @param[in] e_func_type
+* enum that dictates fill memory records or Init memory records
+*
+* @return total number of mem records
+*
+******************************************************************************
+*/
+WORD32 ih264e_get_rate_control_mem_tab(void *pv_rate_control,
+ iv_mem_rec_t *ps_mem,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ static itt_memtab_t as_itt_memtab[NUM_RC_MEMTABS];
+ WORD32 i4_num_memtab = 0, j = 0;
+ void *refptr2[4];
+ void **refptr1[4];
+ rate_control_ctxt_t *ps_rate_control = pv_rate_control;
+
+ for (j = 0; j < 4; j++)
+ refptr1[j] = &(refptr2[j]);
+
+ j = 0;
+
+ if (e_func_type == USE_BASE || e_func_type == FILL_BASE)
+ {
+ refptr1[1] = &ps_rate_control->pps_frame_time;
+ refptr1[2] = &ps_rate_control->pps_time_stamp;
+ refptr1[3] = &ps_rate_control->pps_pd_frm_rate;
+ refptr1[0] = &ps_rate_control->pps_rate_control_api;
+ }
+
+ /* Get the total number of memtabs used by Rate Controller */
+ i4_num_memtab = irc_rate_control_num_fill_use_free_memtab((rate_control_api_t **)refptr1[0], NULL, GET_NUM_MEMTAB);
+ /* Few extra steps during init */
+ ih264e_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+ /* Fill the memtabs used by Rate Controller */
+ i4_num_memtab = irc_rate_control_num_fill_use_free_memtab((rate_control_api_t **)refptr1[0],as_itt_memtab+j,e_func_type);
+ /* Mapping ittiam memtabs to App. memtabs */
+ ih264e_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+ j += i4_num_memtab;
+
+ /* Get the total number of memtabs used by Frame time Module */
+ i4_num_memtab = ih264e_frame_time_get_init_free_memtab((frame_time_t **)refptr1[1], NULL, GET_NUM_MEMTAB);
+ /* Few extra steps during init */
+ ih264e_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+ /* Fill the memtabs used by Frame time Module */
+ i4_num_memtab = ih264e_frame_time_get_init_free_memtab((frame_time_t **)refptr1[1], as_itt_memtab+j, e_func_type);
+ /* Mapping ittiam memtabs to App. memtabs */
+ ih264e_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+ j += i4_num_memtab;
+
+ /* Get the total number of memtabs used by Time stamp Module */
+ i4_num_memtab = ih264e_time_stamp_get_init_free_memtab((time_stamp_t **)refptr1[2], NULL, GET_NUM_MEMTAB);
+ /* Few extra steps during init */
+ ih264e_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+ /* Fill the memtabs used by Time Stamp Module */
+ i4_num_memtab = ih264e_time_stamp_get_init_free_memtab((time_stamp_t **)refptr1[2], as_itt_memtab+j, e_func_type);
+ /* Mapping ittiam memtabs to App. memtabs */
+ ih264e_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+ j += i4_num_memtab;
+
+ /* Get the total number of memtabs used by Frame rate Module */
+ i4_num_memtab = ih264e_pd_frm_rate_get_init_free_memtab((pd_frm_rate_t **)refptr1[3], NULL, GET_NUM_MEMTAB);
+ /* Few extra steps during init */
+ ih264e_map_itt_mem_rec_to_rc_mem_rec((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+ /* Fill the memtabs used by Frame Rate Module */
+ i4_num_memtab = ih264e_pd_frm_rate_get_init_free_memtab((pd_frm_rate_t **)refptr1[3], as_itt_memtab+j, e_func_type);
+ /* Mapping ittiam memtabs to App. memtabs */
+ ih264e_map_rc_mem_recs_to_itt_api((&ps_mem[j]), as_itt_memtab+j, i4_num_memtab);
+ j += i4_num_memtab;
+
+ return j; /* Total MemTabs Needed by Rate Control Module */
+}
diff --git a/encoder/ih264e_rc_mem_interface.h b/encoder/ih264e_rc_mem_interface.h
new file mode 100755
index 0000000..a2946a7
--- /dev/null
+++ b/encoder/ih264e_rc_mem_interface.h
@@ -0,0 +1,179 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_rc_mem_interface.h
+*
+* @brief
+* This file contains function declaration and structures for rate control
+* memtabs
+*
+* @author
+* ittiam
+*
+* @remarks
+* The rate control library is a global library across various codecs. It
+* anticipates certain structures definitions. Those definitions are to be
+* imported from global workspace. Instead of that, the structures needed for
+* rc library are copied in to this file and exported to rc library. If the
+* structures / enums / ... in the global workspace change, this file also needs
+* to be modified accordingly.
+*
+******************************************************************************
+*/
+#ifndef IH264E_RC_MEM_INTERFACE_H_
+#define IH264E_RC_MEM_INTERFACE_H_
+
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+#define FILL_MEMTAB(m_pv_mem_rec, m_j, m_mem_size, m_align, m_type) \
+{ \
+ m_pv_mem_rec[m_j].u4_size = sizeof(iv_mem_rec_t); \
+ m_pv_mem_rec[m_j].u4_mem_size = m_mem_size; \
+ m_pv_mem_rec[m_j].u4_mem_alignment = m_align; \
+ m_pv_mem_rec[m_j].e_mem_type = m_type; \
+}
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum
+{
+ ALIGN_BYTE = 1,
+ ALIGN_WORD16 = 2,
+ ALIGN_WORD32 = 4,
+ ALIGN_WORD64 = 8,
+ ALIGN_128_BYTE = 128
+}ITT_MEM_ALIGNMENT_TYPE_E;
+
+typedef enum
+{
+ SCRATCH = 0,
+ PERSISTENT = 1,
+ WRITEONCE = 2
+}ITT_MEM_USAGE_TYPE_E;
+
+typedef enum
+{
+ L1D = 0,
+ SL2 = 1,
+ DDR = 3
+}ITT_MEM_REGION_E;
+
+typedef enum
+{
+ GET_NUM_MEMTAB = 0,
+ FILL_MEMTAB = 1,
+ USE_BASE = 2,
+ FILL_BASE =3
+}ITT_FUNC_TYPE_E;
+
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/*NOTE : This should be an exact replica of IALG_MemRec, any change in IALG_MemRec
+ must be replicated here*/
+typedef struct
+{
+ /* Size in bytes */
+ UWORD32 u4_size;
+
+ /* Alignment in bytes */
+ WORD32 i4_alignment;
+
+ /* decides which memory region to be placed */
+ ITT_MEM_REGION_E e_mem_region;
+
+ /* memory is scratch or persistent */
+ ITT_MEM_USAGE_TYPE_E e_usage;
+
+ /* Base pointer for allocated memory */
+ void *pv_base;
+} itt_memtab_t;
+
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief This function fills memory record attributes
+*
+* @par Description
+* This function fills memory record attributes
+*
+* @param[in] ps_mem_tab
+* pointer to mem records
+*
+* @param[in] u4_size
+* size of the record
+*
+* @param[in] i4_alignment
+* memory alignment size
+*
+* @param[in] e_usage
+* usage
+*
+* @param[in] e_mem_region
+* mem region
+*
+* @return void
+*
+******************************************************************************
+*/
+void fill_memtab(itt_memtab_t *ps_mem_tab, WORD32 u4_size, WORD32 i4_alignment,
+ ITT_MEM_USAGE_TYPE_E e_usage, ITT_MEM_REGION_E e_mem_region);
+
+/**
+******************************************************************************
+*
+* @brief This function fills memory record attributes
+*
+* @par Description
+* This function fills memory record attributes
+*
+* @param[in] ps_mem_tab
+* pointer to mem records
+*
+* @param[in] ptr_to_be_filled
+* handle to the memory record storage space
+*
+* @param[in] e_func_type
+* enum that dictates fill memory records or use memory records
+*
+* @return void
+*
+******************************************************************************
+*/
+WORD32 use_or_fill_base(itt_memtab_t *ps_mem_tab, void **ptr_to_be_filled,
+ ITT_FUNC_TYPE_E e_func_type);
+
+
+#endif // IH264E_RC_MEM_INTERFACE_H_
+
diff --git a/encoder/ih264e_statistics.h b/encoder/ih264e_statistics.h
new file mode 100755
index 0000000..0ab33ca
--- /dev/null
+++ b/encoder/ih264e_statistics.h
@@ -0,0 +1,141 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_statistics.h
+*
+* @brief
+* Contains macros for generating stats about h264 encoder
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_STATISTICS_H_
+#define IH264E_STATISTICS_H_
+
+#if CAVLC_LEVEL_STATS
+
+/*****************************************************************************/
+/* Extern global declarations */
+/*****************************************************************************/
+
+/**
+ ******************************************************************************
+ * @brief In cavlc encoding, a lut is used for encoding levels. It is not possible
+ * to use look up for all possible levels. The extent to which look up is generated
+ * is based on the statistics that were collected in the following global variables.
+ *
+ * gu4_cavlc_level_bin_lt_4 represents the number coefficients with abs(level) < 4
+ * gu4_cavlc_level_bin_lt_16 represents the number coefficients with 4 < abs(level) < 16
+ * gu4_cavlc_level_bin_lt_32 represents the number coefficients with 16 < abs(level) < 32
+ * and so on ...
+ * ******************************************************************************
+ */
+extern UWORD32 gu4_cavlc_level_bin_lt_4;
+extern UWORD32 gu4_cavlc_level_bin_lt_16;
+extern UWORD32 gu4_cavlc_level_bin_lt_32;
+extern UWORD32 gu4_cavlc_level_bin_lt_64;
+extern UWORD32 gu4_cavlc_level_bin_lt_128;
+extern UWORD32 gu4_cavlc_level_bin_else_where;
+extern UWORD32 gu4_cavlc_level_lut_hit_rate;
+
+/*****************************************************************************/
+/* Extern function declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief print cavlc stats
+******************************************************************************
+*/
+void print_cavlc_level_stats(void);
+
+#define GATHER_CAVLC_STATS1() \
+ if (u4_abs_level < 4)\
+ gu4_cavlc_level_bin_lt_4 ++; \
+ else if (u4_abs_level < 16) \
+ gu4_cavlc_level_bin_lt_16 ++; \
+ else if (u4_abs_level < 32) \
+ gu4_cavlc_level_bin_lt_32 ++; \
+ else if (u4_abs_level < 64) \
+ gu4_cavlc_level_bin_lt_64 ++; \
+ else if (u4_abs_level < 128) \
+ gu4_cavlc_level_bin_lt_128 ++; \
+ else \
+ gu4_cavlc_level_bin_else_where ++;
+
+#define GATHER_CAVLC_STATS2() \
+ gu4_cavlc_level_lut_hit_rate ++;
+
+#else
+
+#define GATHER_CAVLC_STATS1()
+
+#define GATHER_CAVLC_STATS2()
+
+#endif
+
+
+#if GATING_STATS
+
+/*****************************************************************************/
+/* Extern global declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief During encoding at fastest preset, some times if the inter threshold
+* is lesser than the predefined threshold, intra analysis is not done. The
+* below variable keeps track of the number of mb for which intra analysis is not
+* done
+* ******************************************************************************
+*/
+extern UWORD32 gu4_mb_gated_cnt;
+
+/*****************************************************************************/
+/* Extern function declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief print gating stats
+******************************************************************************
+*/
+void print_gating_stats(void);
+
+#define GATHER_GATING_STATS() \
+ gu4_mb_gated_cnt ++;
+
+#else
+
+#define GATHER_GATING_STATS()
+
+#endif
+
+
+#endif /* IH264E_STATISTICS_H_ */
diff --git a/encoder/ih264e_structs.h b/encoder/ih264e_structs.h
new file mode 100755
index 0000000..1043a53
--- /dev/null
+++ b/encoder/ih264e_structs.h
@@ -0,0 +1,2566 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_structs.h
+*
+* @brief
+* Structure definitions used in the encoder
+*
+* @author
+* Harish
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_STRUCTS_H_
+#define IH264E_STRUCTS_H_
+
+/*****************************************************************************/
+/* Extern Function type definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief intra prediction filters leaf level
+******************************************************************************
+ */
+typedef void (*pf_intra_pred)(UWORD8 *pu1_src, UWORD8 *pu1_dst,
+ WORD32 src_strd, WORD32 dst_strd,
+ WORD32 ui_neighboravailability);
+
+/**
+******************************************************************************
+ * @brief inter prediction filters leaf level
+******************************************************************************
+ */
+
+typedef void (*pf_inter_pred_luma_bilinear)(UWORD8 *pu1_src1, UWORD8 *pu1_src2, UWORD8 *pu1_dst,
+ WORD32 src_strd1, WORD32 src_strd2, WORD32 dst_strd,
+ WORD32 height, WORD32 width);
+
+/**
+******************************************************************************
+ * @brief fwd transform leaf level
+******************************************************************************
+ */
+typedef void (*pf_trans_quant)(UWORD8*pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out,
+ WORD32 i4_src_stride, UWORD32 u4_pred_stride, UWORD32 u4_dst_stride,
+ const UWORD16 *pu2_scale_mat, const UWORD16 *pu2_thresh_mat,
+ UWORD32 u4_qbit, UWORD32 u4_round_fact, UWORD8 *pu1_nnz);
+
+typedef void (*pf_iquant_itrans)(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out,
+ WORD32 i4_src_stride, UWORD32 u4_pred_stride, UWORD32 u4_out_stride,
+ const UWORD16 *pu2_iscale_mat, const UWORD16 *pu2_weigh_mat,
+ UWORD32 qp_div, WORD32 *pi4_tmp);
+
+/**
+******************************************************************************
+ * @brief Padding leaf level
+******************************************************************************
+ */
+typedef void (*pf_pad)(UWORD8 *pu1_src, WORD32 src_strd, WORD32 wd, WORD32 pad_size);
+
+/**
+******************************************************************************
+ * @brief memory handling leaf level
+******************************************************************************
+ */
+typedef void (*pf_memcpy)(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes);
+
+typedef void (*pf_memset)(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes);
+
+typedef void (*pf_memcpy_mul8)(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes);
+
+typedef void (*pf_memset_mul8)(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes);
+
+/**
+******************************************************************************
+ * @brief Sad computation
+******************************************************************************
+ */
+typedef void (*pf_compute_sad)(UWORD8 *pu1_src, UWORD8 *pu1_est,
+ UWORD32 src_strd, UWORD32 est_strd,
+ WORD32 i4_max_sad, WORD32 *pi4_mb_distortion);
+
+/**
+******************************************************************************
+ * @brief Intra mode eval:encoder level
+******************************************************************************
+ */
+typedef void (*pf_evaluate_intra_modes)(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels_i16, UWORD8 *pu1_dst,
+ UWORD32 src_strd, UWORD32 dst_strd,
+ WORD32 u4_n_avblty, UWORD32 *u4_intra_mode,
+ WORD32 *pu4_sadmin,
+ UWORD32 u4_valid_intra_modes);
+
+typedef void (*pf_evaluate_intra_4x4_modes)(UWORD8 *pu1_src, UWORD8 *pu1_ngbr_pels, UWORD8 *pu1_dst,
+ UWORD32 src_strd, UWORD32 dst_strd,
+ WORD32 u4_n_avblty, UWORD32 *u4_intra_mode,
+ WORD32 *pu4_sadmin,
+ UWORD32 u4_valid_intra_modes, UWORD32 u4_lambda,
+ UWORD32 u4_predictd_mode);
+
+/**
+******************************************************************************
+ * @brief half_pel generation :encoder level
+******************************************************************************
+ */
+typedef void (*pf_sixtapfilter_horz)(UWORD8 *pu1_src, UWORD8 *pu1_dst,
+ WORD32 src_strd, WORD32 dst_strd);
+
+typedef void (*pf_sixtap_filter_2dvh_vert)(UWORD8 *pu1_src, UWORD8 *pu1_dst1, UWORD8 *pu1_dst2,
+ WORD32 src_strd, WORD32 dst_strd,
+ WORD32 *pi16_pred1,
+ WORD32 pi16_pred1_strd);
+/**
+******************************************************************************
+ * @brief color space conversion
+******************************************************************************
+ */
+typedef void (*pf_fmt_conv_420p_to_420sp)(UWORD8 *pu1_y_src, UWORD8 *pu1_u_src, UWORD8 *pu1_v_src,
+ UWORD8 *pu1_y_dst, UWORD8 *pu1_uv_dst,
+ UWORD16 u2_height, UWORD16 u2_width,
+ UWORD16 src_y_strd, UWORD16 src_u_strd, UWORD16 src_v_strd,
+ UWORD16 dst_y_strd, UWORD16 dst_uv_strd,
+ UWORD32 convert_uv_only);
+
+typedef void (*pf_fmt_conv_422ile_to_420sp)(UWORD8 *pu1_y_buf, UWORD8 *pu1_u_buf, UWORD8 *pu1_v_buf,
+ UWORD8 *pu1_422i_buf,
+ WORD32 u4_y_width, WORD32 u4_y_height, WORD32 u4_y_stride,
+ WORD32 u4_u_stride, WORD32 u4_v_stride,
+ WORD32 u4_422i_stride);
+
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/**
+ ******************************************************************************
+ * @enum CODEC_STATE_T
+ * @brief codec state
+ ******************************************************************************
+ */
+typedef enum
+{
+ INIT_DONE,
+ HEADER_DONE,
+ FIRST_FRAME_DONE,
+} CODEC_STATE_T;
+
+
+/**
+ ******************************************************************************
+ * @enum JOBQ_CMD_T
+ * @brief list of job commands (used during job instantiation)
+ ******************************************************************************
+ */
+typedef enum
+{
+ CMD_PROCESS,
+ CMD_ENTROPY,
+ CMD_FMTCONV,
+ CMD_ME,
+}JOBQ_CMD_T;
+
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/**
+ * PU information
+ */
+typedef struct
+{
+
+ /**
+ * L0 Motion Vector
+ */
+ mv_t s_l0_mv;
+
+ /**
+ * PU X position in terms of min PU (4x4) units
+ */
+ UWORD32 b4_pos_x : 4;
+
+ /**
+ * PU Y position in terms of min PU (4x4) units
+ */
+ UWORD32 b4_pos_y : 4;
+
+ /**
+ * PU width in pixels = (b4_wd + 1) << 2
+ */
+ UWORD32 b4_wd : 2;
+
+ /**
+ * PU height in pixels = (b4_ht + 1) << 2
+ */
+ UWORD32 b4_ht : 2;
+
+ /**
+ * L0 Ref index
+ */
+ WORD8 i1_l0_ref_idx;
+
+} enc_pu_t;
+
+typedef struct _codec_t codec_t;
+
+typedef struct
+{
+ /** Descriptor of raw buffer */
+ iv_raw_buf_t s_raw_buf;
+
+ /** Lower 32bits of time stamp corresponding to the above buffer */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to the above buffer */
+ UWORD32 u4_timestamp_high;
+
+ /** Flag to indicate if the current buffer is last buffer */
+ UWORD32 u4_is_last;
+
+ /** Flag to indicate if mb info is sent along with input buffer */
+ UWORD32 u4_mb_info_type;
+
+ /** Flag to indicate the size of mb info structure */
+ UWORD32 u4_mb_info_size;
+
+ /** Buffer containing mb info if mb_info_type is non-zero */
+ void *pv_mb_info;
+
+ /** Flag to indicate if pic info is sent along with input buffer */
+ UWORD32 u4_pic_info_type;
+
+ /** Buffer containing pic info if mb_info_type is non-zero */
+ void *pv_pic_info;
+
+}inp_buf_t;
+
+typedef struct
+{
+ /** Descriptor of bitstream buffer */
+ iv_bits_buf_t s_bits_buf;
+
+ /** Lower 32bits of time stamp corresponding to the above buffer */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to the above buffer */
+ UWORD32 u4_timestamp_high;
+
+ /** Flag to indicate if the current buffer is last buffer */
+ UWORD32 u4_is_last;
+
+}out_buf_t;
+
+typedef struct
+{
+ /** Descriptor of picture buffer */
+ pic_buf_t s_pic_buf;
+
+ /** Lower 32bits of time stamp corresponding to the above buffer */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to the above buffer */
+ UWORD32 u4_timestamp_high;
+
+ /** Flag to indicate if the current buffer is last buffer */
+ UWORD32 u4_is_last;
+
+ /** Picture count corresponding to current picture */
+ WORD32 i4_pic_cnt;
+
+}rec_buf_t;
+
+typedef struct
+{
+ /** maximum width for which codec should request memory requirements */
+ UWORD32 u4_max_wd;
+
+ /** maximum height for which codec should request memory requirements */
+ UWORD32 u4_max_ht;
+
+ /** Maximum number of reference frames */
+ UWORD32 u4_max_ref_cnt;
+
+ /** Maximum number of reorder frames */
+ UWORD32 u4_max_reorder_cnt;
+
+ /** Maximum level supported */
+ UWORD32 u4_max_level;
+
+ /** Input color format */
+ IV_COLOR_FORMAT_T e_inp_color_fmt;
+
+ /** Flag to enable/disable - To be used only for debugging/testing */
+ UWORD32 u4_enable_recon;
+
+ /** Recon color format */
+ IV_COLOR_FORMAT_T e_recon_color_fmt;
+
+ /** Encoder Speed preset - Value between 0 (slowest) and 100 (fastest) */
+ IVE_SPEED_CONFIG u4_enc_speed_preset;
+
+ /** Rate control mode */
+ IVE_RC_MODE_T e_rc_mode;
+
+ /** Maximum frame rate to be supported */
+ UWORD32 u4_max_framerate;
+
+ /** Maximum bitrate to be supported */
+ UWORD32 u4_max_bitrate;
+
+ /** Maximum number of consecutive B frames */
+ UWORD32 u4_max_num_bframes;
+
+ /** Content type Interlaced/Progressive */
+ IV_CONTENT_TYPE_T e_content_type;
+
+ /** Maximum search range to be used in X direction */
+ UWORD32 u4_max_srch_rng_x;
+
+ /** Maximum search range to be used in Y direction */
+ UWORD32 u4_max_srch_rng_y;
+
+ /** Slice Mode */
+ IVE_SLICE_MODE_T e_slice_mode;
+
+ /** Slice parameter */
+ UWORD32 u4_slice_param;
+
+ /** Processor architecture */
+ IV_ARCH_T e_arch;
+
+ /** SOC details */
+ IV_SOC_T e_soc;
+
+ /** Input width to be sent in bitstream */
+ UWORD32 u4_disp_wd;
+
+ /** Input height to be sent in bitstream */
+ UWORD32 u4_disp_ht;
+
+ /** Input width */
+ UWORD32 u4_wd;
+
+ /** Input height */
+ UWORD32 u4_ht;
+
+ /** Input stride */
+ UWORD32 u4_strd;
+
+ /** Source frame rate */
+ UWORD32 u4_src_frame_rate;
+
+ /** Target frame rate */
+ UWORD32 u4_tgt_frame_rate;
+
+ /** Target bitrate in kilobits per second */
+ UWORD32 u4_target_bitrate;
+
+ /** Force current frame type */
+ IV_PICTURE_CODING_TYPE_T e_frame_type;
+
+ /** Encoder mode */
+ IVE_ENC_MODE_T e_enc_mode;
+
+ /** Set initial Qp for I pictures */
+ UWORD32 u4_i_qp;
+
+ /** Set initial Qp for P pictures */
+ UWORD32 u4_p_qp;
+
+ /** Set initial Qp for B pictures */
+ UWORD32 u4_b_qp;
+
+ /** Set minimum Qp for I pictures */
+ UWORD32 u4_i_qp_min;
+
+ /** Set maximum Qp for I pictures */
+ UWORD32 u4_i_qp_max;
+
+ /** Set minimum Qp for P pictures */
+ UWORD32 u4_p_qp_min;
+
+ /** Set maximum Qp for P pictures */
+ UWORD32 u4_p_qp_max;
+
+ /** Set minimum Qp for B pictures */
+ UWORD32 u4_b_qp_min;
+
+ /** Set maximum Qp for B pictures */
+ UWORD32 u4_b_qp_max;
+
+ /** Adaptive intra refresh mode */
+ IVE_AIR_MODE_T e_air_mode;
+
+ /** Adaptive intra refresh period in frames */
+ UWORD32 u4_air_refresh_period;
+
+ /** VBV buffer delay */
+ UWORD32 u4_vbv_buffer_delay;
+
+ /** VBV buffer size */
+ UWORD32 u4_vbv_buf_size;
+
+ /** Number of cores to be used */
+ UWORD32 u4_num_cores;
+
+ /** ME speed preset - Value between 0 (slowest) and 100 (fastest) */
+ UWORD32 u4_me_speed_preset;
+
+ /** Flag to enable/disable half pel motion estimation */
+ UWORD32 u4_enable_hpel;
+
+ /** Flag to enable/disable quarter pel motion estimation */
+ UWORD32 u4_enable_qpel;
+
+ /** Flag to enable/disable intra 4x4 analysis */
+ UWORD32 u4_enable_intra_4x4;
+
+ /** Flag to enable/disable intra 8x8 analysis */
+ UWORD32 u4_enable_intra_8x8;
+
+ /** Flag to enable/disable intra 16x16 analysis */
+ UWORD32 u4_enable_intra_16x16;
+
+ /** Flag to enable/disable fast SAD approximation */
+ UWORD32 u4_enable_fast_sad;
+
+ /*flag to enable/disable alternate reference frames */
+ UWORD32 u4_enable_alt_ref;
+
+ /*Flag to enable/disable computation of SATDQ in ME*/
+ UWORD32 u4_enable_satqd;
+
+ /*Minimum SAD to search for*/
+ WORD32 i4_min_sad;
+
+ /** Maximum search range in X direction for farthest reference */
+ UWORD32 u4_srch_rng_x;
+
+ /** Maximum search range in Y direction for farthest reference */
+ UWORD32 u4_srch_rng_y;
+
+ /** I frame interval */
+ UWORD32 u4_i_frm_interval;
+
+ /** IDR frame interval */
+ UWORD32 u4_idr_frm_interval;
+
+ /** consecutive B frames */
+ UWORD32 u4_num_b_frames;
+
+ /** Disable deblock level (0: Enable completely, 3: Disable completely */
+ UWORD32 u4_disable_deblock_level;
+
+ /** Profile */
+ IV_PROFILE_T e_profile;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+ /** Flag to say if the current config parameter set is valid
+ * Will be zero to start with and will be set to 1, when configured
+ * Once encoder uses the parameter set, this will be set to zero */
+ UWORD32 u4_is_valid;
+
+ /** Command associated with this config param set */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_cmd;
+
+ /** Input width in mbs */
+ UWORD32 i4_wd_mbs;
+
+ /** Input height in mbs */
+ UWORD32 i4_ht_mbs;
+
+ /** entropy coding mode flag */
+ UWORD32 u4_entropy_coding_mode;
+
+ /** enable weighted prediction */
+ UWORD32 u4_weighted_prediction;
+
+ /** enable constrained intra prediction */
+ UWORD32 u4_constrained_intra_pred;
+
+ /** Pic info type */
+ UWORD32 u4_pic_info_type;
+ /**
+ * MB info type
+ */
+ UWORD32 u4_mb_info_type;
+
+}cfg_params_t;
+
+
+
+/** Structure to hold format conversion context */
+typedef struct
+{
+ /** Current row for which format conversion should be done */
+ WORD32 i4_cur_row;
+
+ /** Number of rows for which format conversion should be done */
+ WORD32 i4_num_rows;
+
+}fmt_conv_t;
+
+
+/**
+ * Structure to represent a processing job entry
+ */
+typedef struct
+{
+ /**
+ * Command
+ */
+ WORD32 i4_cmd;
+
+ /**
+ * MB x of the starting MB
+ */
+ WORD16 i2_mb_x;
+
+ /**
+ * MB y of the starting MB
+ */
+
+ WORD16 i2_mb_y;
+
+ /**
+ * Number of MBs that need to be processed in this job
+ */
+ WORD16 i2_mb_cnt;
+
+ /**
+ * Process contexts base index
+ * Will toggle between 0 and MAX_PROCESS_THREADS
+ */
+ WORD16 i2_proc_base_idx;
+
+} job_t;
+
+
+/**
+ * Structure to represent a MV Bank buffer
+ */
+typedef struct
+{
+ /**
+ * Pointer to hold num PUs each MB in a picture
+ */
+ UWORD32 *pu4_mb_pu_cnt;
+
+ /**
+ * Pointer to hold enc_pu_t for each PU in a picture
+ */
+ enc_pu_t *ps_pic_pu;
+
+ /**
+ * Pointer to hold PU map for each MB in a picture
+ */
+ UWORD8 *pu1_pic_pu_map;
+
+ /**
+ * Pointer to hold the Slice map
+ */
+ UWORD16 *pu1_pic_slice_map;
+
+ /**
+ * Absolute POC for the current MV Bank
+ */
+ WORD32 i4_abs_poc;
+
+ /**
+ * Buffer Id
+ */
+ WORD32 i4_buf_id;
+
+} mv_buf_t;
+
+
+/**
+ * Reference set containing pointers to MV buf and pic buf
+ */
+typedef struct
+{
+ /** Picture count */
+ WORD32 i4_pic_cnt;
+
+ /** POC */
+ WORD32 i4_poc;
+
+ /** picture buffer */
+ pic_buf_t *ps_pic_buf;
+
+ /** mv buffer */
+ mv_buf_t *ps_mv_buf;
+
+}ref_set_t;
+
+typedef struct
+{
+
+ /**
+ * Pointer to current PPS
+ */
+ pps_t *ps_pps;
+
+ /**
+ * Pointer to current SPS
+ */
+ sps_t *ps_sps;
+
+ /**
+ * Pointer to current slice header structure
+ */
+ slice_header_t *ps_slice_hdr;
+
+ /**
+ * MB's x position within a picture in raster scan in MB units
+ */
+ WORD32 i4_mb_x;
+
+ /**
+ * MB's y position within a picture in raster scan in MB units
+ */
+
+ WORD32 i4_mb_y;
+
+ /**
+ * Current PU structure - set to MB enc_pu_t pointer at the start of MB processing and incremented
+ * for every TU
+ */
+ enc_pu_t *ps_pu;
+
+ /**
+ * Pointer to frame level enc_pu_t for the current frame being parsed
+ * where MVs and Intra pred modes will be updated
+ */
+ enc_pu_t *ps_pic_pu;
+
+ /**
+ * Pointer to hold num PUs each MB in a picture
+ */
+ UWORD32 *pu4_mb_pu_cnt;
+
+ /** PU Index map per MB. The indices in this map are w.r.t picture pu array and not
+ * w.r.t MB pu array.
+ * This will be used during mv prediction and since neighbors will have different MB pu map
+ * it will be easier if they all have indices w.r.t picture level PU array rather than MB level
+ * PU array.
+ * pu1_pic_pu_map is map w.r.t MB's enc_pu_t array
+ */
+ UWORD32 *pu4_pic_pu_idx_map;
+
+ /**
+ * Pointer to pu_map for the current frame being parsed
+ * where MVs and Intra pred modes will be updated
+ */
+ UWORD8 *pu1_pic_pu_map;
+
+ /**
+ * PU count in current MB
+ */
+ WORD32 i4_mb_pu_cnt;
+
+ /**
+ * PU count in current MB
+ */
+ WORD32 i4_mb_start_pu_idx;
+
+ /**
+ * Top availability for current MB level
+ */
+ UWORD8 u1_top_mb_avail;
+
+ /**
+ * Top right availability for current MB level
+ */
+ UWORD8 u1_top_rt_mb_avail;
+ /**
+ * Top left availability for current MB level
+ */
+ UWORD8 u1_top_lt_mb_avail;
+ /**
+ * left availability for current MB level
+ */
+ UWORD8 u1_left_mb_avail;
+
+}mv_ctxt_t;
+
+typedef struct
+{
+ /**
+ * MB's x position within a picture in raster scan in MB units
+ */
+ WORD32 i4_mb_x;
+
+ /**
+ * MB's y position within a picture in raster scan in MB units
+ */
+ WORD32 i4_mb_y;
+
+ /**
+ * MB's x position within a Slice in raster scan in MB units
+ */
+ WORD32 i4_mb_slice_x;
+
+ /**
+ * MB's y position within a Slice in raster scan in MB units
+ */
+ WORD32 i4_mb_slice_y;
+
+ /**
+ * Vertical strength, Two bits per edge.
+ * Stored in format. BS[15] | BS[14] | .. |BS[0]
+ */
+ UWORD32 *pu4_pic_vert_bs;
+
+ /**
+ * Boundary strength, Two bits per edge.
+ * Stored in format. BS[15] | BS[14] | .. |BS[0]
+ */
+ UWORD32 *pu4_pic_horz_bs;
+
+ /**
+ * Qp array stored for each mb
+ */
+ UWORD8 *pu1_pic_qp;
+
+}bs_ctxt_t;
+
+typedef struct
+{
+ /**
+ * MB's x position within a picture in raster scan in MB units
+ */
+ WORD32 i4_mb_x;
+
+ /**
+ * MB's y position within a picture in raster scan in MB units
+ */
+ WORD32 i4_mb_y;
+
+ /**
+ * structure that contains BS and QP frame level arrays
+ */
+ bs_ctxt_t s_bs_ctxt;
+
+ /**
+ * Pointer to 0th luma pixel in current pic
+ */
+ UWORD8 *pu1_cur_pic_luma;
+
+ /**
+ * Pointer to 0th chroma pixel in current pic
+ */
+ UWORD8 *pu1_cur_pic_chroma;
+
+ /**
+ * Points to the array of slice indices which is used to identify the slice
+ * to which each MB in a frame belongs.
+ */
+ UWORD8 *pu1_slice_idx;
+
+}deblk_ctxt_t;
+
+
+/**
+ ******************************************************************************
+ * @brief Structure to hold data and flags for 'n' mb processing for
+ * deblocking , padding and half pel generation.
+ ******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * MB's x position last processed + 1
+ */
+ WORD32 i4_mb_x;
+
+ /**
+ * MB's y position ,current processing.
+ */
+ WORD32 i4_mb_y;
+
+ /**
+ * Number of MBs processed in a stretch
+ */
+ WORD32 i4_n_mbs;
+
+}n_mb_process_ctxt_t;
+
+
+/**
+******************************************************************************
+ * @brief Structure to hold coefficient info for a 4x4 subblock.
+ * The following can be used to type-cast coefficient data that is stored
+ * per subblock. Note that though i2_level is shown as an array that
+ * holds 16 coefficients, only the first few entries will be valid. Next
+ * subblocks data starts after the valid number of coefficients. Number
+ * of non-zero coefficients will be derived using number of non-zero bits
+ * in sig coeff map
+******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * significant coefficient map and nnz are packed in
+ * to msb (2 bytes) and lsb (2 bytes) respectively
+ */
+ WORD32 i4_sig_map_nnz;
+
+ /**
+ * array of non zero residue coefficients
+ */
+ WORD16 ai2_residue[16];
+
+}tu_sblk_coeff_data_t;
+
+/**
+******************************************************************************
+ * @brief Structure contains few common state variables such as MB indices,
+ * current SPS, PPS etc which are to be used in the entropy thread. By keeping
+ * it a different structure it is being explicitly signaled that these
+ * variables are specific to entropy threads context and other threads should
+ * not update these elements
+******************************************************************************
+ */
+typedef struct
+{
+
+ /**
+ * start of frame / start of slice flag
+ */
+ WORD32 i4_sof;
+
+ /**
+ * end of frame / end of slice flag
+ */
+ WORD32 i4_eof;
+
+ /**
+ * generate header upon request
+ */
+ WORD32 i4_gen_header;
+
+ /**
+ * seq_parameter_set_id
+ */
+ UWORD32 u4_sps_id;
+
+ /**
+ * Pointer to base of sequence parameter set structure array
+ */
+ sps_t *ps_sps_base;
+
+ /**
+ * pic_parameter_set_id
+ */
+ UWORD32 u4_pps_id;
+
+ /**
+ * Pointer to base of Picture parameter set structure array
+ */
+ pps_t *ps_pps_base;
+
+ /**
+ * Current slice idx
+ */
+ WORD32 i4_cur_slice_idx;
+
+ /**
+ * Points to the array of slice indices which is used to identify the independent slice
+ * to which each MB in a frame belongs.
+ */
+ UWORD8 *pu1_slice_idx;
+
+ /**
+ * Pointer to base of slice header structure array
+ */
+ slice_header_t *ps_slice_hdr_base;
+
+ /**
+ * entropy status
+ */
+ UWORD8 *pu1_entropy_map;
+
+ /**
+ * MB's x position within a picture in raster scan in MB units
+ */
+ WORD32 i4_mb_x;
+
+ /**
+ * MB's y position within a picture in raster scan in MB units
+ */
+ WORD32 i4_mb_y;
+
+ /**
+ * MB start address
+ */
+ WORD32 i4_mb_cnt;
+
+ /**
+ * MB start address
+ */
+ WORD32 i4_mb_start_add;
+
+ /**
+ * MB end address
+ */
+ WORD32 i4_mb_end_add;
+
+ /**
+ * Input width in mbs
+ */
+ WORD32 i4_wd_mbs;
+
+ /**
+ * Input height in mbs
+ */
+ WORD32 i4_ht_mbs;
+
+ /**
+ * Bitstream structure
+ */
+ bitstrm_t *ps_bitstrm;
+
+ /**
+ * transform_8x8_mode_flag
+ */
+ WORD8 i1_transform_8x8_mode_flag;
+
+ /**
+ * entropy_coding_mode_flag
+ */
+ WORD8 u1_entropy_coding_mode_flag;
+
+ /**
+ * Pointer to the top row nnz for luma
+ */
+ UWORD8 (*pu1_top_nnz_luma)[4];
+
+ /**
+ * left nnz for luma
+ */
+ UWORD32 u4_left_nnz_luma;
+
+ /**
+ * Pointer to zero runs before for the mb
+ */
+ UWORD8 au1_zero_run[16];
+
+ /**
+ * Pointer to the top row nnz for chroma
+ */
+ UWORD8 (*pu1_top_nnz_cbcr)[4];
+
+ /**
+ * left nnz for chroma
+ */
+ UWORD8 u4_left_nnz_cbcr;
+
+ /**
+ * Pointer frame level mb subblock coeff data
+ */
+ void *pv_pic_mb_coeff_data;
+
+ /**
+ * Pointer to mb subblock coeff data and number of subblocks and scan idx
+ * Incremented each time a coded subblock is processed
+ */
+ void *pv_mb_coeff_data;
+
+ /**
+ * Pointer frame level mb header data
+ */
+ void *pv_pic_mb_header_data;
+
+ /**
+ * Pointer to mb header data and
+ * incremented each time a coded mb is encoded
+ */
+ void *pv_mb_header_data;
+
+ /**
+ * Error code during parse stage
+ */
+ IH264E_ERROR_T i4_error_code;
+
+ /**
+ * Void pointer to job context
+ */
+ void *pv_proc_jobq, *pv_entropy_jobq;
+
+ /**
+ * Flag to signal end of frame
+ */
+ WORD32 i4_end_of_frame;
+
+ /**
+ * Abs POC count of the frame
+ */
+ WORD32 i4_abs_pic_order_cnt;
+
+ /**
+ * mb skip run
+ */
+ WORD32 *pi4_mb_skip_run;
+
+ /**
+ * Flag to signal end of sequence
+ */
+ UWORD32 u4_is_last;
+
+ /**
+ * Lower 32bits of time-stamp corresponding to the buffer being encoded
+ */
+ UWORD32 u4_timestamp_low;
+
+ /**
+ * Upper 32bits of time-stamp corresponding to the buffer being encoded
+ */
+ UWORD32 u4_timestamp_high;
+
+ /**
+ * Current Picture count - used for synchronization
+ */
+ WORD32 i4_pic_cnt;
+
+ /**
+ * Number of bits consumed by header for I and P mb types
+ */
+ UWORD32 u4_header_bits[MAX_MB_TYPE];
+
+ /**
+ * Number of bits consumed by residue for I and P mb types
+ */
+ UWORD32 u4_residue_bits[MAX_MB_TYPE];
+
+} entropy_ctxt_t;
+
+/**
+******************************************************************************
+* @brief macro block info.
+******************************************************************************
+*/
+typedef struct
+{
+ /**
+ * mb type
+ */
+ UWORD16 u2_is_intra;
+
+ /**
+ * mb type
+ */
+ UWORD16 u2_mb_type;
+
+ /**
+ * csbp
+ */
+ UWORD32 u4_csbp;
+
+ /**
+ * mb distortion
+ */
+ WORD32 i4_mb_distortion;
+
+}mb_info_t;
+
+/**
+******************************************************************************
+* @brief structure presenting the neighbor availability of a mb
+* or subblk or any other partition
+******************************************************************************
+*/
+typedef struct
+{
+ /**
+ * left blk/subblk/partition
+ */
+ UWORD8 u1_mb_a;
+
+ /**
+ * top blk/subblk/partition
+ */
+ UWORD8 u1_mb_b;
+
+ /**
+ * topright blk/subblk/partition
+ */
+ UWORD8 u1_mb_c;
+
+ /**
+ * topleft blk/subblk/partition
+ */
+ UWORD8 u1_mb_d;
+
+}block_neighbors_t;
+
+/**
+ ******************************************************************************
+ * @brief MB info related variables used during NMB processing
+ ******************************************************************************
+ */
+typedef struct
+{
+ UWORD32 u4_mb_type;
+ UWORD32 u4_min_sad;
+ UWORD32 u4_min_sad_reached;
+ WORD32 i4_mb_cost;
+ WORD32 i4_mb_distortion;
+
+
+ mv_t s_skip_mv;
+ mv_t s_pred_mv;
+
+ block_neighbors_t s_ngbr_avbl;
+
+ /*
+ * Buffer to hold best subpel buffer in each MB of NMB
+ */
+ UWORD8 *pu1_best_sub_pel_buf;
+
+ /*
+ * Stride for subpel buffer
+ */
+ UWORD32 u4_bst_spel_buf_strd;
+
+}mb_info_nmb_t;
+
+/**
+ ******************************************************************************
+ * @brief Pixel processing thread context
+ ******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * entropy context
+ */
+ entropy_ctxt_t s_entropy;
+
+ /**
+ * me context
+ */
+ me_ctxt_t s_me_ctxt;
+
+ /**
+ * Pointer to codec context
+ */
+ codec_t *ps_codec;
+
+ /**
+ * N mb process contest
+ */
+ n_mb_process_ctxt_t s_n_mb_ctxt;
+
+ /**
+ * Source pointer to current MB luma
+ */
+ UWORD8 *pu1_src_buf_luma;
+
+ /**
+ * Source pointer to current MB chroma
+ */
+ UWORD8 *pu1_src_buf_chroma;
+
+ /**
+ * Recon pointer to current MB luma
+ */
+ UWORD8 *pu1_rec_buf_luma;
+
+ /**
+ * Recon pointer to current MB chroma
+ */
+ UWORD8 *pu1_rec_buf_chroma;
+
+ /**
+ * Ref pointer to current MB luma
+ */
+ UWORD8 *pu1_ref_buf_luma;
+
+ /**
+ * Ref pointer to current MB chroma
+ */
+ UWORD8 *pu1_ref_buf_chroma;
+
+ /**
+ * pointer to luma plane of input buffer (base :: mb (0,0))
+ */
+ UWORD8 *pu1_src_buf_luma_base;
+
+ /**
+ * pointer to luma plane of reconstructed buffer (base :: mb (0,0))
+ */
+ UWORD8 *pu1_rec_buf_luma_base;
+
+ /**
+ * pointer to luma plane of ref buffer (base :: mb (0,0))
+ */
+ UWORD8 *pu1_ref_buf_luma_base;
+
+ /**
+ * pointer to chroma plane of input buffer (base :: mb (0,0))
+ */
+ UWORD8 *pu1_src_buf_chroma_base;
+
+ /*
+ * Buffer for color space conversion of luma
+ */
+ UWORD8 *pu1_y_csc_buf;
+
+ /*
+ * Buffer for color space conversion of luma
+ */
+
+ UWORD8 *pu1_uv_csc_buf;
+
+ /**
+ * pointer to chroma plane of reconstructed buffer (base :: mb (0,0))
+ */
+ UWORD8 *pu1_rec_buf_chroma_base;
+
+ /**
+ * pointer to chroma plane of reconstructed buffer (base :: mb (0,0))
+ */
+ UWORD8 *pu1_ref_buf_chroma_base;
+
+ /**
+ * Pointer to ME NMB info
+ */
+ mb_info_nmb_t *ps_nmb_info;
+
+ mb_info_nmb_t *ps_cur_mb;
+
+ /**
+ * source stride
+ * (strides for luma and chroma are the same)
+ */
+ WORD32 i4_src_strd;
+
+ /**
+ * recon stride & ref stride
+ * (strides for luma and chroma are the same)
+ */
+ WORD32 i4_rec_strd;
+
+ /**
+ * Offset for half pel x plane from the pic buf
+ */
+ UWORD32 u4_half_x_offset;
+
+ /**
+ * Offset for half pel y plane from half x plane
+ */
+ UWORD32 u4_half_y_offset;
+
+ /**
+ * Offset for half pel xy plane from half y plane
+ */
+ UWORD32 u4_half_xy_offset;
+
+ /**
+ * pred buffer pointer (temp buffer 1)
+ */
+ UWORD8 *pu1_pred_mb;
+
+ /**
+ * pred buffer pointer (prediction buffer for intra 16x16
+ */
+ UWORD8 *pu1_pred_mb_intra_16x16;
+
+ /**
+ * pred buffer pointer (prediction buffer for intra 16x16_plane
+ */
+ UWORD8 *pu1_pred_mb_intra_16x16_plane;
+
+ /**
+ * pred buffer pointer (prediction buffer for intra chroma
+ */
+ UWORD8 *pu1_pred_mb_intra_chroma;
+
+ /**
+ * pred buffer pointer (prediction buffer for intra chroma plane
+ */
+ UWORD8 *pu1_pred_mb_intra_chroma_plane;
+
+ /**
+ * temp. reference buffer ptr for intra 4x4 when rdopt is on
+ */
+ UWORD8 *pu1_ref_mb_intra_4x4;
+
+ /**
+ * prediction buffer stride
+ */
+ WORD32 i4_pred_strd;
+
+ /**
+ * transform buffer pointer (temp buffer 2)
+ */
+ WORD16 *pi2_res_buf;
+
+ /**
+ * temp. transform buffer ptr for intra 4x4 when rdopt is on
+ */
+ WORD16 *pi2_res_buf_intra_4x4;
+
+ /**
+ * transform buffer stride
+ */
+ WORD32 i4_res_strd;
+
+ /**
+ * scratch buffer for inverse transform (temp buffer 3)
+ */
+ void *pv_scratch_buff;
+
+ /**
+ * frame num
+ */
+ WORD32 i4_frame_num;
+
+ /**
+ * start address of frame / sub-frame
+ */
+ WORD32 i4_frame_strt_add;
+
+ /**
+ * IDR pic
+ */
+ UWORD32 u4_is_idr;
+
+ /**
+ * idr_pic_id
+ */
+ UWORD32 u4_idr_pic_id;
+
+ /**
+ * Input width in mbs
+ */
+ WORD32 i4_wd_mbs;
+
+ /**
+ * Input height in mbs
+ */
+ WORD32 i4_ht_mbs;
+
+ /**
+ * slice_type
+ */
+ WORD32 i4_slice_type;
+
+ /**
+ * Current slice idx
+ */
+ WORD32 i4_cur_slice_idx;
+
+ /**
+ * MB's x position within a picture in raster scan in MB units
+ */
+ WORD32 i4_mb_x;
+
+ /**
+ * MB's y position within a picture in raster scan in MB units
+ */
+ WORD32 i4_mb_y;
+
+ /**
+ * MB's x position within a Slice in raster scan in MB units
+ */
+ WORD32 i4_mb_slice_x;
+
+ /**
+ * MB's y position within a Slice in raster scan in MB units
+ */
+ WORD32 i4_mb_slice_y;
+
+ /**
+ * mb type
+ */
+ UWORD32 u4_mb_type;
+
+ /**
+ * is intra
+ */
+ UWORD32 u4_is_intra;
+
+ /**
+ * mb neighbor availability pointer
+ */
+ block_neighbors_t *ps_ngbr_avbl;
+
+ /**
+ * lambda (lagrange multiplier for cost computation)
+ */
+ UWORD32 u4_lambda;
+
+ /**
+ * mb distortion
+ */
+ WORD32 i4_mb_distortion;
+
+ /**
+ * mb cost
+ */
+ WORD32 i4_mb_cost;
+
+ /********************************************************************/
+ /* i4_ngbr_avbl_mb_16 - ngbr avbl of curr mb */
+ /* i4_ngbr_avbl_sb_8 - ngbr avbl of all 8x8 sub blocks of curr mb */
+ /* i4_ngbr_avbl_sb_4 - ngbr avbl of all 4x4 sub blocks of curr mb */
+ /* i4_ngbr_avbl_mb_c - chroma ngbr avbl of curr mb */
+ /********************************************************************/
+ WORD32 i4_ngbr_avbl_16x16_mb;
+ WORD32 ai4_neighbor_avail_8x8_subblks[4];
+ UWORD8 au1_ngbr_avbl_4x4_subblks[16];
+ WORD32 i4_chroma_neighbor_avail_8x8_mb;
+
+ /**
+ * array to store the mode of mb sub blocks
+ */
+ UWORD8 au1_intra_luma_mb_4x4_modes[16];
+
+ /**
+ * array to store the predicted mode of mb sub blks
+ */
+ UWORD8 au1_predicted_intra_luma_mb_4x4_modes[16];
+
+ /**
+ * macro block intra 16x16 mode
+ */
+ UWORD8 u1_l_i16_mode;
+
+ /**
+ * array to store the mode of the macro block intra 8x8 4 modes
+ */
+ UWORD8 au1_intra_luma_mb_8x8_modes[4];
+
+ /**
+ * intra chroma mb mode
+ */
+ UWORD8 u1_c_i8_mode;
+
+ /********************************************************************/
+ /* array to store pixels from the neighborhood for intra prediction */
+ /* i16 - 16 left pels + 1 top left pel + 16 top pels = 33 pels */
+ /* i8 - 8 lpels + 1 tlpels + 8 tpels + 8 tr pels = 25 pels */
+ /* i4 - 4 lpels + 1 tlpels + 4 tpels + 4 tr pels = 13 pels */
+ /* ic - 8 left pels + 1 top left pel + 8 top pels )*2 */
+ /********************************************************************/
+ UWORD8 au1_ngbr_pels[34];
+
+ /**
+ * array for 8x8 intra pels filtering (temp buff 4)
+ */
+ UWORD8 au1_neighbor_pels_i8x8_unfiltered[25];
+
+ /**
+ * Number of sub partitons in the inter pred MB
+ */
+ UWORD32 u4_num_sub_partitions;
+
+ /**
+ * Pointer to hold num PUs each MB in a picture
+ */
+ UWORD32 *pu4_mb_pu_cnt;
+
+ /**
+ * Pointer to the array of structures having motion vectors, size
+ * and position of sub partitions
+ */
+ enc_pu_t *ps_pu;
+
+ /**
+ * predicted motion vector
+ */
+ mv_t *ps_pred_mv;
+
+ /**
+ * top row mb syntax information base
+ * In normal working scenarios, for a given context set,
+ * the mb syntax info pointer is identical across all process threads.
+ * But when the hard bound on slices are enabled, in multi core, frame
+ * is partitioned in to sections equal to set number of cores and each
+ * partition is run independently. In this scenario, a ctxt set will alone
+ * appear to run multiple frames at a time. For this to occur, the common
+ * pointers across the proc ctxt should disappear.
+ *
+ * This is done by allocating MAX_PROCESS_THREADS memory and distributing
+ * across individual ctxts when byte bnd per slice is enabled.
+ */
+ mb_info_t *ps_top_row_mb_syntax_ele_base;
+
+ /**
+ * top row mb syntax information
+ */
+ mb_info_t *ps_top_row_mb_syntax_ele;
+
+ /**
+ * left mb syntax information
+ */
+ mb_info_t s_left_mb_syntax_ele;
+
+ /**
+ * top left mb syntax information
+ */
+ mb_info_t s_top_left_mb_syntax_ele;
+
+ /**
+ * top left mb syntax information
+ */
+
+ mb_info_t s_top_left_mb_syntax_ME;
+
+ /**
+ * left mb motion vector
+ */
+ enc_pu_t s_left_mb_pu_ME;
+
+ /**
+ * top left mb motion vector
+ */
+ enc_pu_t s_top_left_mb_pu_ME;
+
+
+ /**
+ * mb neighbor availability pointer
+ */
+ block_neighbors_t s_ngbr_avbl;
+
+ /**
+ * In case the macroblock type is intra, the intra modes of all
+ * partitions for the left mb are stored in the array below
+ */
+ UWORD8 au1_left_mb_intra_modes[16];
+
+ /**
+ * In case the macroblock type is intra, the intra modes of all
+ * partitions for the top mb are stored in the array below
+ *
+ * In normal working scenarios, for a given context set,
+ * the mb syntax info pointer is identical across all process threads.
+ * But when the hard bound on slices are enabled, in multi core, frame
+ * is partitioned in to sections equal to set number of cores and each
+ * partition is run independently. In this scenario, a ctxt set will alone
+ * appear to run multiple frames at a time. For this to occur, the common
+ * pointers across the proc ctxt should disappear.
+ *
+ * This is done by allocating MAX_PROCESS_THREADS memory and distributing
+ * across individual ctxts when byte bnd per slice is enabled.
+ */
+ UWORD8 *pu1_top_mb_intra_modes_base;
+
+ /**
+ * In case the macroblock type is intra, the intra modes of all
+ * partitions for the top mb are stored in the array below
+ */
+ UWORD8 *pu1_top_mb_intra_modes;
+
+ /**
+ * skip motion vector info
+ */
+ mv_t *ps_skip_mv;
+
+ /**
+ * left mb motion vector
+ */
+ enc_pu_t s_left_mb_pu;
+
+ /**
+ * top left mb motion vector
+ */
+ enc_pu_t s_top_left_mb_pu;
+
+ /**
+ * top row motion vector info
+ *
+ * In normal working scenarios, for a given context set,
+ * the top row pu pointer is identical across all process threads.
+ * But when the hard bound on slices are enabled, in multi core, frame
+ * is partitioned in to sections equal to set number of cores and each
+ * partition is run independently. In this scenario, a ctxt set will alone
+ * appear to run multiple frames at a time. For this to occur, the common
+ * pointers across the proc ctxt should disappear.
+ *
+ * This is done by allocating MAX_PROCESS_THREADS memory and distributing
+ * across individual ctxts when byte bnd per slice is enabled.
+ */
+ enc_pu_t *ps_top_row_pu_base;
+
+ /**
+ * top row motion vector info
+ */
+ enc_pu_t *ps_top_row_pu;
+
+ enc_pu_t *ps_top_row_pu_ME;
+
+ /**
+ * coded block pattern
+ */
+ UWORD32 u4_cbp;
+
+ /**
+ * csbp
+ */
+ UWORD32 u4_csbp;
+
+ /**
+ * number of non zero coeffs
+ */
+ UWORD32 au4_nnz[5];
+
+ /**
+ * number of non zero coeffs for intra 4x4 when rdopt is on
+ */
+ UWORD32 au4_nnz_intra_4x4[4];
+
+ /**
+ * frame qp & mb qp
+ */
+ UWORD32 u4_frame_qp, u4_mb_qp;
+
+ /**
+ * mb qp previous
+ */
+ UWORD32 u4_mb_qp_prev;
+
+ /**
+ * quantization parameters for luma & chroma planes
+ */
+ quant_params_t *ps_qp_params[3];
+
+ /**
+ * Pointer frame level mb subblock coeff data
+ */
+ void *pv_pic_mb_coeff_data;
+
+ /**
+ * Pointer to mb subblock coeff data and number of subblocks and scan idx
+ * Incremented each time a coded subblock is processed
+ */
+ void *pv_mb_coeff_data;
+
+ /**
+ * Pointer frame level mb header data
+ */
+ void *pv_pic_mb_header_data;
+
+ /**
+ * Pointer to mb header data and
+ * incremented each time a coded mb is encoded
+ */
+ void *pv_mb_header_data;
+
+ /**
+ * Signal that pic_init is called first time
+ */
+ WORD32 i4_first_pic_init;
+
+ /**
+ * Current MV Bank's buffer ID
+ */
+ WORD32 i4_cur_mv_bank_buf_id;
+
+ /**
+ * Void pointer to job context
+ */
+ void *pv_proc_jobq, *pv_entropy_jobq;
+
+ /**
+ * Number of MBs to be processed in the current Job
+ */
+ WORD32 i4_mb_cnt;
+
+ /**
+ * ID for the current context - Used for debugging
+ */
+ WORD32 i4_id;
+
+ /**
+ * Pointer to current picture buffer structure
+ */
+ pic_buf_t *ps_cur_pic;
+
+ /**
+ * Pointer to current picture's mv buffer structure
+ */
+ mv_buf_t *ps_cur_mv_buf;
+
+ /**
+ * Flag to indicate if ps_proc was initialized at least once in a frame.
+ * This is needed to handle cases where a core starts to handle format
+ * conversion jobs directly
+ */
+ WORD32 i4_init_done;
+
+ /**
+ * Process status: one byte per MB
+ */
+ UWORD8 *pu1_proc_map;
+
+ /**
+ * Deblk status: one byte per MB
+ */
+ UWORD8 *pu1_deblk_map;
+
+ /**
+ * Process status: one byte per MB
+ */
+ UWORD8 *pu1_me_map;
+
+ /*
+ * Intra refresh mask.
+ * Indicates if an Mb is coded in intra mode within the current AIR interval
+ * NOTE Refreshes after each AIR period
+ * NOTE The map is shared between process
+ */
+ UWORD8 *pu1_is_intra_coded;
+
+ /**
+ * Disable deblock level (0: Enable completely, 3: Disable completely
+ */
+ UWORD32 u4_disable_deblock_level;
+
+ /**
+ * Pointer to the structure that contains deblock context
+ */
+ deblk_ctxt_t s_deblk_ctxt;
+
+ /**
+ * Points to the array of slice indices which is used to identify the independent
+ * slice to which each MB in a frame belongs.
+ */
+ UWORD8 *pu1_slice_idx;
+
+ /**
+ * Pointer to base of slice header structure array
+ */
+ slice_header_t *ps_slice_hdr_base;
+
+ /**
+ * Number of mb's to process in one loop
+ */
+ WORD32 i4_nmb_ntrpy;
+
+ /**
+ * Number of mb's to process in one loop
+ */
+ UWORD32 u4_nmb_me;
+
+ /**
+ * Structure for current input buffer
+ */
+ inp_buf_t s_inp_buf;
+
+ /**
+ * api call cnt
+ */
+ WORD32 i4_encode_api_call_cnt;
+
+ /**
+ * Current Picture count - used for synchronization
+ */
+ WORD32 i4_pic_cnt;
+
+ /**
+ * Intermediate buffer for interpred leaf level functions
+ */
+ WORD32 ai16_pred1[HP_BUFF_WD * HP_BUFF_HT];
+
+ /**
+ * Reference picture for the current picture
+ * TODO: Only 1 reference assumed currently
+ */
+ pic_buf_t *ps_ref_pic;
+
+ /**
+ * frame info used by RC
+ */
+ frame_info_t s_frame_info;
+
+ /*
+ * NOTE NOT PERSISTANT INSIDE FUNCTIONS
+ * Min sad for current MB
+ * will be populated initially
+ * Once a sad less than eq to u4_min_sad is reached, the value will be copied to the cariable
+ */
+ UWORD32 u4_min_sad;
+
+ /*
+ * indicates weather we have rached minimum sa or not
+ */
+ UWORD32 u4_min_sad_reached;
+
+ /**
+ * Current error code
+ */
+ WORD32 i4_error_code;
+
+ /*
+ * Enables or disables computation of recon
+ */
+ UWORD32 u4_compute_recon;
+
+ /*
+ * Buffer for holding half_x (1/2,1 - interpolated)
+ * values when halfpel generation
+ * for the entire plane is not enabled
+ */
+ UWORD8 *pu1_half_x;
+
+ /*
+ * Buffer for holding half_x (1,1/2 - interpolated)
+ * values when halfpel generation
+ * for the entire plane is not enabled
+ */
+ UWORD8 *pu1_half_y;
+
+ /*
+ * Buffer for holding half_x (1/2,1/2 - interpolated)
+ * values when halfpel generation
+ * for the entire plane is not enabled
+ *
+ */
+ UWORD8 *pu1_half_xy;
+
+ /*
+ * Buffer holding best sub pel values
+ */
+ UWORD8 *pu1_best_subpel_buf;
+
+ /*
+ * Stride for buffer holding best sub pel
+ */
+ UWORD32 u4_bst_spel_buf_strd;
+
+} process_ctxt_t;
+
+/**
+ ******************************************************************************
+ * @brief Rate control related variables
+ ******************************************************************************
+ */
+typedef struct
+{
+ void *pps_rate_control_api;
+
+ void *pps_frame_time;
+
+ void *pps_time_stamp;
+
+ void *pps_pd_frm_rate;
+
+ /**
+ * frame rate pull down
+ */
+ WORD32 pre_encode_skip[MAX_CTXT_SETS];
+
+ /**
+ * skip frame (cbr)
+ */
+ WORD32 post_encode_skip[MAX_CTXT_SETS];
+
+ /**
+ * rate control type
+ */
+ rc_type_e e_rc_type;
+
+ /**
+ * pic type
+ */
+ picture_type_e e_pic_type;
+
+ /**
+ * intra cnt in previous frame
+ */
+ WORD32 num_intra_in_prev_frame;
+
+ /**
+ * avg activity of prev frame
+ */
+ WORD32 i4_avg_activity;
+
+}rate_control_ctxt_t;
+
+/**
+ * Codec context
+ */
+struct _codec_t
+{
+ /**
+ * Number of coded pictures
+ */
+ WORD32 i4_coded_pic_cnt;
+
+ /**
+ * Number of encode frame API calls made
+ */
+ WORD32 i4_encode_api_call_cnt;
+
+ /**
+ * Number of pictures encoded
+ */
+ WORD32 i4_pic_cnt;
+
+ /**
+ * Number of threads created
+ */
+ WORD32 i4_proc_thread_cnt;
+
+ /**
+ * Mutex used to keep the control calls thread-safe
+ */
+ void *pv_ctl_mutex;
+
+ /**
+ * Current active config parameters
+ */
+ cfg_params_t s_cfg;
+
+ /**
+ * Array containing the config parameter sets
+ */
+ cfg_params_t as_cfg[MAX_ACTIVE_CONFIG_PARAMS];
+
+ /**
+ * Color format used by encoder internally
+ */
+ IV_COLOR_FORMAT_T e_codec_color_format;
+
+ /**
+ * source stride
+ * (strides for luma and chroma are the same)
+ */
+ WORD32 i4_src_strd;
+
+ /**
+ * recon stride
+ * (strides for luma and chroma are the same)
+ */
+ WORD32 i4_rec_strd;
+
+ /**
+ * Flag to enable/disable deblocking of a frame
+ */
+ WORD32 i4_disable_deblk_pic;
+
+ /**
+ * Number of continuous frames where deblocking was disabled
+ */
+ WORD32 i4_disable_deblk_pic_cnt;
+
+ /**
+ * frame type
+ */
+ PIC_TYPE_T pic_type;
+
+ /**
+ * frame qp
+ */
+ UWORD32 u4_frame_qp;
+
+ /**
+ * frame num
+ */
+ WORD32 i4_frame_num;
+
+ /**
+ * slice_type
+ */
+ WORD32 i4_slice_type;
+
+ /*
+ * Force current frame to specific type
+ */
+ IV_PICTURE_CODING_TYPE_T force_curr_frame_type;
+
+ /**
+ * IDR pic
+ */
+ UWORD32 u4_is_idr;
+
+ /**
+ * idr_pic_id
+ */
+ WORD32 i4_idr_pic_id;
+
+ /**
+ * Flush mode
+ */
+ WORD32 i4_flush_mode;
+
+ /**
+ * Encode header mode
+ */
+ WORD32 i4_header_mode;
+
+ /**
+ * Flag to indicate if header has already
+ * been generated when i4_api_call_cnt 0
+ */
+ UWORD32 u4_header_generated;
+
+ /**
+ * Encode generate header
+ */
+ WORD32 i4_gen_header;
+
+ /**
+ * To signal successful completion of init
+ */
+ WORD32 i4_init_done;
+
+ /**
+ * To signal that at least one picture was decoded
+ */
+ WORD32 i4_first_pic_done;
+
+ /**
+ * Reset flag - Codec is reset if this flag is set
+ */
+ WORD32 i4_reset_flag;
+
+ /**
+ * Current error code
+ */
+ WORD32 i4_error_code;
+
+ /**
+ * threshold residue
+ */
+ WORD32 u4_thres_resi;
+
+ /**
+ * disable intra inter gating
+ */
+ UWORD32 u4_inter_gate;
+
+ /**
+ * Holds mem records passed during init.
+ * This will be used to return the mem records during retrieve call
+ */
+ iv_mem_rec_t *ps_mem_rec_backup;
+
+ /**
+ * Flag to determine if the entropy thread is active
+ */
+ volatile UWORD32 au4_entropy_thread_active[MAX_CTXT_SETS];
+
+ /**
+ * Mutex used to keep the entropy calls thread-safe
+ */
+ void *pv_entropy_mutex;
+
+ /**
+ * Job queue buffer base
+ */
+ void *pv_proc_jobq_buf, *pv_entropy_jobq_buf;
+
+ /**
+ * Job Queue mem tab size
+ */
+ WORD32 i4_proc_jobq_buf_size, i4_entropy_jobq_buf_size;
+
+ /**
+ * Memory for MV Bank buffer manager
+ */
+ void *pv_mv_buf_mgr_base;
+
+ /**
+ * MV Bank buffer manager
+ */
+ void *pv_mv_buf_mgr;
+
+ /**
+ * Pointer to MV Buf structure array
+ */
+ void *ps_mv_buf;
+
+ /**
+ * Base address for Motion Vector bank buffer
+ */
+ void *pv_mv_bank_buf_base;
+
+ /**
+ * MV Bank size allocated
+ */
+ WORD32 i4_total_mv_bank_size;
+
+ /**
+ * Memory for Picture buffer manager for reference pictures
+ */
+ void *pv_ref_buf_mgr_base;
+
+ /**
+ * Picture buffer manager for reference pictures
+ */
+ void *pv_ref_buf_mgr;
+
+ /**
+ * Number of reference buffers added to the buffer manager
+ */
+ WORD32 i4_ref_buf_cnt;
+
+ /**
+ * Pointer to Pic Buf structure array
+ */
+ void *ps_pic_buf;
+
+ /**
+ * Base address for Picture buffer
+ */
+ void *pv_pic_buf_base;
+
+ /**
+ * Total pic buffer size allocated
+ */
+ WORD32 i4_total_pic_buf_size;
+
+ /**
+ * Memory for Buffer manager for output buffers
+ */
+ void *pv_out_buf_mgr_base;
+
+ /**
+ * Buffer manager for output buffers
+ */
+ void *pv_out_buf_mgr;
+
+ /**
+ * Current output buffer's buffer ID
+ */
+ WORD32 i4_out_buf_id;
+
+ /**
+ * Number of output buffers added to the buffer manager
+ */
+ WORD32 i4_out_buf_cnt;
+
+ /**
+ * Memory for Picture buffer manager for input buffers
+ */
+ void *pv_inp_buf_mgr_base;
+
+ /**
+ * Picture buffer manager for input buffers
+ */
+ void *pv_inp_buf_mgr;
+
+ /**
+ * Current input buffer's buffer ID
+ */
+ WORD32 i4_inp_buf_id;
+
+ /**
+ * Number of input buffers added to the buffer manager
+ */
+ WORD32 i4_inp_buf_cnt;
+
+ /**
+ * Current input buffer
+ */
+ pic_buf_t *ps_inp_buf;
+
+ /**
+ * Pointer to dpb manager structure
+ */
+ void *pv_dpb_mgr;
+
+ /**
+ * Pointer to base of Sequence parameter set structure array
+ */
+ sps_t *ps_sps_base;
+
+ /**
+ * Pointer to base of Picture parameter set structure array
+ */
+ pps_t *ps_pps_base;
+
+ /**
+ * seq_parameter_set_id
+ */
+ WORD32 i4_sps_id;
+
+ /**
+ * pic_parameter_set_id
+ */
+ WORD32 i4_pps_id;
+
+ /**
+ * Pointer to base of slice header structure array
+ */
+ slice_header_t *ps_slice_hdr_base;
+
+ /**
+ * packed residue coeff data size for 1 row of mbs
+ */
+ UWORD32 u4_size_coeff_data;
+
+ /**
+ * packed header data size for 1 row of mbs
+ */
+ UWORD32 u4_size_header_data;
+
+ /**
+ * Processing context - One for each processing thread
+ * Create two sets, each set used for alternate frames
+ */
+ process_ctxt_t as_process[MAX_PROCESS_CTXT];
+
+ /**
+ * Thread handle for each of the processing threads
+ */
+ void *apv_proc_thread_handle[MAX_PROCESS_THREADS];
+
+ /**
+ * Thread created flag for each of the processing threads
+ */
+ WORD32 ai4_process_thread_created[MAX_PROCESS_THREADS];
+
+ /**
+ * Void pointer to process job context
+ */
+ void *pv_proc_jobq, *pv_entropy_jobq;
+
+ /**
+ * Number of MBs processed together for better instruction cache handling
+ */
+ WORD32 i4_proc_nmb;
+
+ /**
+ * Previous POC lsb
+ */
+ WORD32 i4_prev_poc_lsb;
+
+ /**
+ * Previous POC msb
+ */
+ WORD32 i4_prev_poc_msb;
+
+ /**
+ * Max POC lsb that has arrived till now
+ */
+ WORD32 i4_max_prev_poc_lsb;
+
+ /**
+ * Context for format conversion
+ */
+ fmt_conv_t s_fmt_conv;
+
+ /**
+ * Absolute pic order count
+ */
+ WORD32 i4_abs_pic_order_cnt;
+
+ /**
+ * Pic order count of lsb
+ */
+ WORD32 i4_pic_order_cnt_lsb;
+
+ /**
+ * Array giving current picture being processed in each context set
+ */
+ WORD32 ai4_pic_cnt[MAX_CTXT_SETS];
+
+ /*
+ * Min sad to search for
+ */
+ UWORD32 u4_min_sad;
+
+ /**
+ * Reference picture set
+ */
+ ref_set_t as_ref_set[MAX_DPB_SIZE + MAX_CTXT_SETS];
+
+ /*
+ * Air pic cnt
+ * Contains the number of pictures that have been encoded with air
+ * This value is moudulo air refresh period
+ */
+ WORD32 i4_air_pic_cnt;
+
+ /*
+ * Intra refresh map
+ * Stores the frames at which intra refresh should occur for a MB
+ */
+ UWORD16 *pu2_intr_rfrsh_map;
+
+ /*
+ * Alternate reference frames
+ * Indicates if the current frame is used as a reference frame
+ */
+ UWORD32 u4_is_curr_frm_ref;
+
+ /*
+ * Memory for color space conversion for luma plane
+ */
+ UWORD8 *pu1_y_csc_buf_base;
+
+ /*
+ * Memory for color space conversion foe chroma plane
+ */
+ UWORD8 *pu1_uv_csc_buf_base;
+
+ /**
+ * Function pointers for intra pred leaf level functions luma
+ */
+ pf_intra_pred apf_intra_pred_16_l[MAX_I16x16];
+ pf_intra_pred apf_intra_pred_8_l[MAX_I8x8];
+ pf_intra_pred apf_intra_pred_4_l[MAX_I4x4];
+
+ /**
+ * Function pointers for intra pred leaf level functions chroma
+ */
+ pf_intra_pred apf_intra_pred_c[MAX_CH_I8x8];
+
+ /**
+ * luma core coding function pointer
+ */
+ UWORD8 (*luma_energy_compaction[4])(process_ctxt_t *ps_proc);
+
+ /**
+ * chroma core coding function pointer
+ */
+ UWORD8 (*chroma_energy_compaction[2])(process_ctxt_t *ps_proc);
+
+ /**
+ * forward transform for intra blk of mb type 16x16
+ */
+ ih264_luma_16x16_resi_trans_dctrans_quant_ft *pf_resi_trans_dctrans_quant_16x16;
+
+ /**
+ * inverse transform for intra blk of mb type 16x16
+ */
+ ih264_luma_16x16_idctrans_iquant_itrans_recon_ft *pf_idctrans_iquant_itrans_recon_16x16;
+
+ /**
+ * forward transform for 4x4 blk luma
+ */
+ ih264_resi_trans_quant_ft *pf_resi_trans_quant_4x4;
+
+ /**
+ * forward transform for 4x4 blk luma
+ */
+ ih264_resi_trans_quant_ft *pf_resi_trans_quant_chroma_4x4;
+
+ /*
+ * hadamard transform and quant for a 4x4 block
+ */
+ ih264_hadamard_quant_ft *pf_hadamard_quant_4x4;
+
+ /*
+ * hadamard transform and quant for a 4x4 block
+ */
+ ih264_hadamard_quant_ft *pf_hadamard_quant_2x2_uv;
+
+ /**
+ * inverse transform for 4x4 blk
+ */
+ ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_4x4;
+
+ /**
+ * inverse transform for chroma 4x4 blk
+ */
+ ih264_iquant_itrans_recon_chroma_ft *pf_iquant_itrans_recon_chroma_4x4;
+
+ /**
+ * inverse transform for 4x4 blk with only single dc coeff
+ */
+ ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_4x4_dc;
+
+ /**
+ * inverse transform for chroma 4x4 blk with only single dc coeff
+ */
+ ih264_iquant_itrans_recon_chroma_ft *pf_iquant_itrans_recon_chroma_4x4_dc;
+
+ /*
+ * Inverse hadamard transform and iquant for a 4x4 block
+ */
+ ih264_ihadamard_scaling_ft *pf_ihadamard_scaling_4x4;
+
+ /*
+ * Inverse hadamard transform and iquant for a 4x4 block
+ */
+ ih264_ihadamard_scaling_ft *pf_ihadamard_scaling_2x2_uv;
+
+ /*
+ * Function for interleave copy*
+ */
+ ih264_interleave_copy_ft *pf_interleave_copy;
+
+ /**
+ * forward transform for 8x8 blk
+ */
+ ih264_resi_trans_quant_ft *pf_resi_trans_quant_8x8;
+
+ /**
+ * inverse transform for 8x8 blk
+ */
+ /**
+ * inverse transform for 4x4 blk
+ */
+ ih264_iquant_itrans_recon_ft *pf_iquant_itrans_recon_8x8;
+
+ /**
+ * forward transform for chroma MB
+ */
+ ih264_chroma_8x8_resi_trans_dctrans_quant_ft *pf_resi_trans_dctrans_quant_8x8_chroma;
+
+ /**
+ * inverse transform for chroma MB
+ */
+ ih264_idctrans_iquant_itrans_recon_ft *pf_idctrans_iquant_itrans_recon_8x8_chroma;
+
+ /**
+ * deblock vertical luma edge with blocking strength 4
+ */
+ ih264_deblk_edge_bs4_ft *pf_deblk_luma_vert_bs4;
+
+ /**
+ * deblock vertical chroma edge with blocking strength 4
+ */
+ ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_vert_bs4;
+
+ /**
+ * deblock vertical luma edge with blocking strength less than 4
+ */
+ ih264_deblk_edge_bslt4_ft *pf_deblk_luma_vert_bslt4;
+
+ /**
+ * deblock vertical chroma edge with blocking strength less than 4
+ */
+ ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_vert_bslt4;
+
+ /**
+ * deblock horizontal luma edge with blocking strength 4
+ */
+ ih264_deblk_edge_bs4_ft *pf_deblk_luma_horz_bs4;
+
+ /**
+ * deblock horizontal chroma edge with blocking strength 4
+ */
+ ih264_deblk_chroma_edge_bs4_ft *pf_deblk_chroma_horz_bs4;
+
+ /**
+ * deblock horizontal luma edge with blocking strength less than 4
+ */
+ ih264_deblk_edge_bslt4_ft *pf_deblk_luma_horz_bslt4;
+
+ /**
+ * deblock horizontal chroma edge with blocking strength less than 4
+ */
+ ih264_deblk_chroma_edge_bslt4_ft *pf_deblk_chroma_horz_bslt4;
+
+
+ /**
+ * functions for padding
+ */
+ pf_pad pf_pad_top;
+ pf_pad pf_pad_bottom;
+ pf_pad pf_pad_left_luma;
+ pf_pad pf_pad_left_chroma;
+ pf_pad pf_pad_right_luma;
+ pf_pad pf_pad_right_chroma;
+
+ /**
+ * Inter pred leaf level functions
+ */
+ ih264_inter_pred_luma_ft *pf_inter_pred_luma_copy;
+ ih264_inter_pred_luma_ft *pf_inter_pred_luma_horz;
+ ih264_inter_pred_luma_ft *pf_inter_pred_luma_vert;
+ pf_inter_pred_luma_bilinear pf_inter_pred_luma_bilinear;
+ ih264_inter_pred_chroma_ft *pf_inter_pred_chroma;
+
+ /**
+ * fn ptrs for compute sad routines
+ */
+ ime_compute_sad_ft *apf_compute_sad_16x16[2];
+ ime_compute_sad_ft *pf_compute_sad_16x8;
+
+ /**
+ * fn ptrs for memory handling operations
+ */
+ pf_memcpy pf_mem_cpy;
+ pf_memset pf_mem_set;
+ pf_memcpy_mul8 pf_mem_cpy_mul8;
+ pf_memset_mul8 pf_mem_set_mul8;
+
+ /**
+ * intra mode eval -encoder level function
+ */
+ pf_evaluate_intra_modes pf_ih264e_evaluate_intra16x16_modes;
+ pf_evaluate_intra_modes pf_ih264e_evaluate_intra_chroma_modes;
+ pf_evaluate_intra_4x4_modes pf_ih264e_evaluate_intra_4x4_modes;
+
+ /* Half pel generation function - encoder level
+ *
+ */
+ pf_sixtapfilter_horz pf_ih264e_sixtapfilter_horz;
+ pf_sixtap_filter_2dvh_vert pf_ih264e_sixtap_filter_2dvh_vert;
+
+ /**
+ * color space conversion form YUV 420P to YUV 420Sp
+ */
+ pf_fmt_conv_420p_to_420sp pf_ih264e_conv_420p_to_420sp;
+
+
+ /**
+ * color space conversion form YUV 420P to YUV 420Sp
+ */
+ pf_fmt_conv_422ile_to_420sp pf_ih264e_fmt_conv_422i_to_420sp;
+
+ /**
+ * write mb layer for a given slice I, P, B
+ */
+ IH264E_ERROR_T (*pf_write_mb_syntax_layer[3]) ( entropy_ctxt_t *ps_ent_ctxt );
+
+
+ /**
+ * Output buffer
+ */
+ out_buf_t as_out_buf[MAX_CTXT_SETS];
+
+ /**
+ * recon buffer
+ */
+ rec_buf_t as_rec_buf[MAX_CTXT_SETS];
+
+ /**
+ * rate control context
+ */
+ rate_control_ctxt_t s_rate_control;
+};
+#endif /* IH264E_STRUCTS_H_ */
diff --git a/encoder/ih264e_time_stamp.c b/encoder/ih264e_time_stamp.c
new file mode 100755
index 0000000..a6a7f3c
--- /dev/null
+++ b/encoder/ih264e_time_stamp.c
@@ -0,0 +1,748 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_time_stamp.c
+*
+* @brief
+* This file contains functions used for source and target time stamp management
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - gcd()
+* - ih264e_get_range()
+* - ih264e_frame_time_get_init_free_memtab()
+* - ih264e_init_frame_time()
+* - ih264e_should_src_be_skipped()
+* - ih264e_time_stamp_get_init_free_memtab()
+* - ih264e_init_time_stamp()
+* - ih264e_update_time_stamp()
+* - ih264e_frame_time_get_src_frame_rate()
+* - ih264e_frame_time_get_tgt_frame_rate()
+* - ih264e_frame_time_get_src_ticks()
+* - ih264e_frame_time_get_tgt_ticks()
+* - ih264e_frame_time_get_src_time()
+* - ih264e_frame_time_get_tgt_time()
+* - ih264e_frame_time_update_src_frame_rate()
+* - ih264e_frame_time_update_tgt_frame_rate()
+* - ih264_time_stamp_update_frame_rate()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* user include files */
+#include "irc_datatypes.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ih264_defs.h"
+#include "ih264e_defs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_structs.h"
+#include "ih264e_rc_mem_interface.h"
+#include "ih264e_time_stamp.h"
+#include "irc_rate_control_api.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Function to compute gcd of two numbers
+*
+* @par Description
+* Function to compute gcd of two numbers
+*
+* @param[in] i4_x
+* value 1
+*
+* @param[in] i4_y
+* value 2
+*
+* @returns
+* GCD(value 1, value 2)
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static WORD32 gcd(WORD32 i4_x, WORD32 i4_y)
+{
+ if (i4_x > i4_y)
+ {
+ i4_x = i4_y + i4_x;
+ i4_y = i4_x - i4_y;
+ i4_x = i4_x - i4_y;
+ }
+ while (i4_y != 0)
+ {
+ WORD32 temp;
+ i4_x = i4_x % i4_y;
+ temp = i4_x;
+ i4_x = i4_y;
+ i4_y = temp;
+ }
+ return (i4_x);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to determine number of bits required to represent a given
+* value
+*
+* @par Description
+* This function determines the number of bits required to represent the given
+* value. It is used to find out number of bits to read when the data size is
+* not fixed (e.g. vop_time_increment_resolution).
+*
+* @param[in] u4_value
+* Value for which the number of bits required to represent is to be determined
+*
+* @param[in] u1_no_of_bits
+* Represents the value's word type = 8/16/32
+*
+* @returns
+* The number of bits required to represent the given number
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+static UWORD8 ih264e_get_range(UWORD32 u4_value, UWORD8 u1_no_of_bits)
+{
+ UWORD8 count;
+ UWORD32 temp;
+
+ if (u4_value > (UWORD32) ((1 << (u1_no_of_bits >> 1)) - 1))
+ {
+ temp = (1 << (u1_no_of_bits - 1));
+ for (count = 0; count < (u1_no_of_bits >> 1); count++)
+ {
+ if ((temp & u4_value) != 0)
+ {
+ return (UWORD8) (u1_no_of_bits - count);
+ }
+ else
+ {
+ temp >>= 1;
+ }
+ }
+ return 0;
+ }
+ else
+ {
+ temp = (1 << ((u1_no_of_bits >> 1) - 1));
+ for (count = 0; count < ((u1_no_of_bits >> 1) - 1); count++)
+ {
+ if ((temp & u4_value) != 0)
+ {
+ return (UWORD8) ((u1_no_of_bits >> 1) - count);
+ }
+ else
+ {
+ temp >>= 1;
+ }
+ }
+ return 1;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to init frame time memtabs
+*
+* @par Description
+* Function to init frame time memtabs
+*
+* @param[in] pps_frame_time
+* Pointer to frame time contexts
+*
+* @param[in] ps_memtab
+* Pointer to memtab
+*
+* @param[in] e_func_type
+* Function type (get memtabs/init memtabs)
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_init_free_memtab(frame_time_handle *pps_frame_time,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static frame_time_t s_temp_frame_time_t;
+
+ /* Hack for al alloc, during which we dont have any state memory.
+ Dereferencing can cause issues */
+ if (e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_frame_time) = &s_temp_frame_time_t;
+
+ /* for src rate control state structure */
+ if (e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(frame_time_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**) pps_frame_time, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to init frame time context
+*
+* @par Description
+* Frame time structure stores the time of the source and the target frames to
+* be encoded. Based on the time we decide whether or not to encode the source
+* frame
+*
+* @param[in] ps_frame_time
+* Pointer Frame time context
+*
+* @param[in] u4_src_frm_rate
+* Source frame rate
+*
+* @param[in] u4_tgt_frm_rate
+* Target frame rate
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_frame_time(frame_time_t *ps_frame_time,
+ UWORD32 u4_src_frm_rate,
+ UWORD32 u4_tgt_frm_rate)
+{
+ /* Initialise the common time base based on which the source and target
+ * frame times increase */
+ WORD32 i4_gcd = gcd(u4_src_frm_rate, u4_tgt_frm_rate);
+
+ ps_frame_time->common_time_base = (u4_src_frm_rate * u4_tgt_frm_rate)
+ / i4_gcd;
+
+ /* The source and target increment per vop is initialized */
+ ps_frame_time->u4_src_frm_time_incr = ps_frame_time->common_time_base
+ / u4_src_frm_rate;
+ ps_frame_time->u4_tgt_frm_time_incr = ps_frame_time->common_time_base
+ / u4_tgt_frm_rate;
+
+ /* Initialise the source and target times to 0 (RESET) */
+ ps_frame_time->u4_src_frm_time = 0;
+ ps_frame_time->u4_tgt_frm_time = 0;
+
+ /* Initialize the number of frms not to be skipped to 0 */
+ ps_frame_time->u4_num_frms_dont_skip = 0;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to check if frame can be skipped
+*
+* @par Description
+* Based on the source and target frame time and the delta time stamp
+* we decide whether to code the source or not.
+* This is based on the assumption
+* that the source frame rate is greater that target frame rate.
+* Updates the time_stamp structure
+*
+* @param[in] ps_frame_time
+* Handle to frame time context
+*
+* @param[in] u4_delta_time_stamp
+* Time stamp difference between frames
+*
+* @param[out] pu4_frm_not_skipped_for_dts
+* Flag to indicate if frame is already skipped by application
+*
+* @returns
+* Flag to skip frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_should_src_be_skipped(frame_time_t *ps_frame_time,
+ UWORD32 u4_delta_time_stamp,
+ UWORD32 *pu4_frm_not_skipped_for_dts)
+{
+ UWORD8 skip_src = 0;
+
+ if (ps_frame_time->u4_tgt_frm_time > ps_frame_time->u4_src_frm_time &&
+ ps_frame_time->u4_tgt_frm_time >= (ps_frame_time->u4_src_frm_time +
+ ps_frame_time->u4_src_frm_time_incr))
+ {
+ skip_src = 1;
+ }
+
+ /* source time gets updated every frame */
+ ps_frame_time->u4_src_frm_time += ps_frame_time->u4_src_frm_time_incr;
+
+ /* target time gets updated only when the source is coded */
+ if (!skip_src)
+ {
+ ps_frame_time->u4_tgt_frm_time += ps_frame_time->u4_tgt_frm_time_incr;
+ }
+
+ /* If the source and target frame times get incremented properly
+ both should be equal to the common time base at the same time. If
+ that happens we reset the time to zero*/
+ if (( ps_frame_time->common_time_base ==(WORD32)ps_frame_time->u4_src_frm_time)
+ && (ps_frame_time->common_time_base ==(WORD32) ps_frame_time->u4_tgt_frm_time ))
+ {
+ ps_frame_time->u4_src_frm_time = 0;
+ ps_frame_time->u4_tgt_frm_time = 0;
+ }
+
+ /* This keeps a count of how many frames need not be skipped in order
+ to take care of the delta time stamp */
+ ps_frame_time->u4_num_frms_dont_skip += (u4_delta_time_stamp - 1);
+
+ /** If this frame is to be skipped in order to maintain the tgt_frm_rate
+ check if already a frame has been skipped by the application.
+ In that case, do not skip this frame **/
+ if (ps_frame_time->u4_num_frms_dont_skip && skip_src)
+ {
+ skip_src = 0;
+ *pu4_frm_not_skipped_for_dts = 1;
+ ps_frame_time->u4_num_frms_dont_skip -= 1;
+ }
+ else
+ {
+ pu4_frm_not_skipped_for_dts[0] = 0;
+ }
+
+ return (skip_src);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to inititialize time stamp memtabs
+*
+* @par Description
+* Function to initialize time stamp memtabs
+*
+* @param[in] pps_time_stamp
+* Pointer to time stamp context
+*
+* @param[in] ps_memtab
+* Pointer to memtab
+*
+* @param[in] e_func_type
+* Funcion type (Get memtab/ init memtab)
+*
+* @returns
+* number of memtabs used
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_time_stamp_get_init_free_memtab(time_stamp_handle *pps_time_stamp,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static time_stamp_t s_temp_time_stamp_t;
+
+ /* Hack for al alloc, during which we dont have any state memory.
+ Dereferencing can cause issues */
+ if (e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_time_stamp) = &s_temp_time_stamp_t;
+
+ /* for src rate control state structure */
+ if (e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(time_stamp_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**) pps_time_stamp, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to initialize time stamp context
+*
+* @par Description
+* Time stamp structure stores the time stamp data that
+* needs to be sent in to the header of MPEG4. Based on the
+* max target frame rate the vop_time increment resolution is set
+* so as to support all the frame rates below max frame rate.
+* A support till the third decimal point is assumed.
+*
+* @param[in] ps_time_stamp
+* Pointer to time stamp structure
+*
+* @param[in] u4_max_frm_rate
+* Maximum frame rate
+*
+* @param[in] u4_src_frm_rate
+* Source frame rate
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_time_stamp(time_stamp_t *ps_time_stamp,
+ UWORD32 u4_max_frm_rate,
+ UWORD32 u4_src_frm_rate)
+{
+ /* We expect the max frame rate to be less than 60000,
+ * if not we divide it by zero and work with it */
+ if (u4_max_frm_rate > 60000)
+ {
+ u4_max_frm_rate >>= 1;
+ ps_time_stamp->is_max_frame_rate_scaled = 1;
+ }
+ else
+ {
+ ps_time_stamp->is_max_frame_rate_scaled = 0;
+ }
+
+ ps_time_stamp->u4_vop_time_incr_res = u4_max_frm_rate;
+ ps_time_stamp->u4_vop_time_incr_range = ih264e_get_range(u4_max_frm_rate, 32);
+ ps_time_stamp->u4_vop_time_incr = (ps_time_stamp->u4_vop_time_incr_res * 1000) / u4_src_frm_rate;/* Since frm rate is in millisec */
+ ps_time_stamp->u4_vop_time = 0;
+ ps_time_stamp->u4_cur_tgt_vop_time = 0;
+ ps_time_stamp->u4_prev_tgt_vop_time = 0;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update time stamp context
+*
+* @par Description
+* Vop time is incremented by increment value. When vop time goes
+* more than the vop time resolution set the modulo time base to
+* 1 and reduce the vop time by vop time resolution so that the
+* excess value is present in vop time and get accumulated over time
+* so that the corresponding frame rate is achieved at a average of
+* 1000 seconds
+*
+* @param[in] ps_time_stamp
+* Pointer to time stamp structure
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_time_stamp(time_stamp_t *ps_time_stamp)
+{
+ /* Since get time stamp is called after the update
+ A copy of the vop time and the modulo time is stored */
+ ps_time_stamp->u4_cur_tgt_vop_time = ps_time_stamp->u4_vop_time;
+
+ ps_time_stamp->u4_vop_time += ps_time_stamp->u4_vop_time_incr;
+ if (ps_time_stamp->u4_vop_time >= ps_time_stamp->u4_vop_time_incr_res)
+ {
+ ps_time_stamp->u4_vop_time -= ps_time_stamp->u4_vop_time_incr_res;
+ }
+}
+
+/****************************************************************************
+ Run-Time Modifying functions
+****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Function to get source frame rate
+*
+* @par Description
+* Function to get source frame rate
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* source frame rate
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_src_frame_rate(frame_time_t *ps_frame_time)
+{
+ return (ps_frame_time->common_time_base / ps_frame_time->u4_src_frm_time_incr);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get target frame rate
+*
+* @par Description
+* Function to get target frame rate
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* target frame rate
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_tgt_frame_rate(frame_time_t *ps_frame_time)
+{
+ return (ps_frame_time->common_time_base / ps_frame_time->u4_tgt_frm_time_incr);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get source time increment
+*
+* @par Description
+* Function to get source time increment
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* source time increment
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_src_ticks(frame_time_t *ps_frame_time)
+{
+ return (ps_frame_time->u4_src_frm_time_incr);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get target time increment
+*
+* @par Description
+* Function to get target time increment
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* target time increment
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_tgt_ticks(frame_time_t *ps_frame_time)
+{
+ return (ps_frame_time->u4_tgt_frm_time_incr);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get src frame time
+*
+* @par Description
+* Function to get src frame time
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* src frame time
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_src_time(frame_time_t *frame_time)
+{
+ return (frame_time->u4_src_frm_time);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to get tgt frame time
+*
+* @par Description
+* Function to get tgt frame time
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* tgt frame time
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_tgt_time(frame_time_t *frame_time)
+{
+ return (frame_time->u4_tgt_frm_time);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update source frame time with a new source frame rate
+*
+* @par Description
+* Function to update source frame time with a new source frame rate
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @param[in] src_frm_rate
+* source frame rate
+*
+* @returns
+* None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_frame_time_update_src_frame_rate(frame_time_t *ps_frame_time,
+ WORD32 src_frm_rate)
+{
+ /* Since tgt frame rate does not change deriving the tgt_frm rate from
+ * common_time_base */
+ WORD32 tgt_frm_rate = ps_frame_time->common_time_base / ps_frame_time->u4_tgt_frm_time_incr;
+
+ /* Re-initialise frame_time based on the new src_frame_rate and
+ * old tgt_frame_rate */
+ ih264e_init_frame_time(ps_frame_time, src_frm_rate, tgt_frm_rate);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update target frame time with a new source frame rate
+*
+* @par Description
+* Function to update target frame time with a new source frame rate
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @param[in] tgt_frm_rate
+* target frame rate
+*
+* @returns
+* None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_frame_time_update_tgt_frame_rate(frame_time_t *ps_frame_time,
+ WORD32 tgt_frm_rate)
+{
+ /* Since src frame rate does not change deriving the src_frm rate from
+ * common_time_base */
+ WORD32 src_frm_rate = ps_frame_time->common_time_base / ps_frame_time->u4_src_frm_time_incr;
+
+ /* Re-initialise frame_time based on the new tgt_frame_rate and
+ * old src_frame_rate */
+ ih264e_init_frame_time(ps_frame_time, src_frm_rate, tgt_frm_rate);
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to update target frame time with a new source frame rate
+*
+* @par Description
+* When the frame rate changes the time increment is modified by appropriate ticks
+*
+* @param[in] ps_time_stamp
+* Pointer to time stamp structure
+*
+* @param[in] src_frm_rate
+* source frame rate
+*
+* @returns
+* None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264_time_stamp_update_frame_rate(time_stamp_t *ps_time_stamp,
+ UWORD32 src_frm_rate)
+{
+ ps_time_stamp->u4_vop_time_incr = (ps_time_stamp->u4_vop_time_incr_res * 1000) / src_frm_rate;/* Since frm rate is in millisec */
+}
diff --git a/encoder/ih264e_time_stamp.h b/encoder/ih264e_time_stamp.h
new file mode 100755
index 0000000..1ee559d
--- /dev/null
+++ b/encoder/ih264e_time_stamp.h
@@ -0,0 +1,498 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_time_stamp.h
+*
+* @brief
+* This file contains function declarations used for managing input and output
+* frame time stamps
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_TIME_STAMP_H_
+#define IH264E_TIME_STAMP_H_
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/**
+ * Parameters for Src/Tgt frames that are encoded
+ */
+typedef struct frame_time_t
+{
+ /* common time base(=LCM) between source and target frame rate (in ticks)*/
+ WORD32 common_time_base;
+
+ /* number of ticks between two source frames */
+ UWORD32 u4_src_frm_time_incr;
+
+ /* number of ticks between two target frames */
+ UWORD32 u4_tgt_frm_time_incr;
+
+ /* Source frame time - measured as modulo of common time base
+ and incremented by src_frm_time_incr */
+ UWORD32 u4_src_frm_time;
+
+ /* Target frame time - measured as modulo of common time base
+ and incremented by tgt_frm_time_incr */
+ UWORD32 u4_tgt_frm_time;
+
+ /* Number of frames not to be skipped while maintaining
+ tgt_frm_rate due to delta_time_stamp */
+ UWORD32 u4_num_frms_dont_skip;
+}frame_time_t;
+
+typedef struct frame_time_t *frame_time_handle;
+
+/**
+ * Parameters that go in the bitstream based on tgt_frm_rate
+ * 1) Initialize the vop_time_incr_res with the max_frame_rate (in frames per 1000 bits)
+ * - To represent all kinds of frame rates
+ * 2) Decide the vop_time_incr based on the source frame rate
+ * - The decoder would like to know which source frame is encoded i.e. the source time
+ * id of the target frame encoded and there by adjusting its time of delay
+ * 3) vop_time increments every source frame and whenever a frame is encoded (target frame),
+ * the encoder queries the vop time of the source frame and sends it in the bit stream.
+ * 4) Since the Source frame skip logic is taken care by the frame_time module, whenever the
+ * encoder queries the time stamp module (which gets updated outside the encoder) the
+ * time stamp module would have the source time
+ */
+typedef struct time_stamp_t
+{
+ /*vop_time_incr_res is a integer that indicates
+ the number of evenly spaced subintervals, called ticks,
+ within one modulo time. */
+ UWORD32 u4_vop_time_incr_res;
+
+ /* number of bits to represent vop_time_incr_res */
+ UWORD32 u4_vop_time_incr_range;
+
+ /* The number of ticks elapsed between two source vops */
+ UWORD32 u4_vop_time_incr;
+
+ /* incremented by vop_time_incr for every source frame.
+ Represents the time offset after a modulo_time_base = 1 is sent
+ in bit stream*/
+ UWORD32 u4_vop_time;
+
+ /* A temporary buffer to copy of vop time and modulo time base
+ is stored since update is called before query (get time stamp) and
+ so these extra variables cur_tgt_vop_time, */
+ UWORD32 u4_cur_tgt_vop_time;
+
+ UWORD32 u4_prev_tgt_vop_time;
+
+ /* This variable is set to 1 if we scale max frame rate by a factor of 2.
+ For mpeg4 standard, we just have 16bits and we can't accommodate more than 60000 as frame rate.
+ So we scale it and work with it */
+ WORD32 is_max_frame_rate_scaled;
+} time_stamp_t;
+
+typedef struct time_stamp_t *time_stamp_handle;
+
+/*****************************************************************************/
+/* Extern function declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to init frame time context
+*
+* @par Description
+* Frame time structure stores the time of the source and the target frames to
+* be encoded. Based on the time we decide whether or not to encode the source
+* frame
+*
+* @param[in] ps_frame_time
+* Pointer Frame time context
+*
+* @param[in] u4_src_frm_rate
+* Source frame rate
+*
+* @param[in] u4_tgt_frm_rate
+* Target frame rate
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_frame_time(frame_time_t *ps_frame_time,
+ UWORD32 u4_src_frm_rate,
+ UWORD32 u4_tgt_frm_rate);
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to check if frame can be skipped
+*
+* @par Description
+* Based on the source and target frame time and the delta time stamp
+* we decide whether to code the source or not.
+* This is based on the assumption
+* that the source frame rate is greater that target frame rate.
+* Updates the time_stamp structure
+*
+* @param[in] ps_frame_time
+* Handle to frame time context
+*
+* @param[in] u4_delta_time_stamp
+* Time stamp difference between frames
+*
+* @param[out] pu4_frm_not_skipped_for_dts
+* Flag to indicate if frame is already skipped by application
+*
+* @returns
+* Flag to skip frame
+*
+* @remarks
+*
+*******************************************************************************
+*/
+UWORD8 ih264e_should_src_be_skipped(frame_time_t *ps_frame_time,
+ UWORD32 u4_delta_time_stamp,
+ UWORD32 *pu4_frm_not_skipped_for_dts);
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to initialize time stamp context
+*
+* @par Description
+* Time stamp structure stores the time stamp data that
+* needs to be sent in to the header of MPEG4. Based on the
+* max target frame rate the vop_time increment resolution is set
+* so as to support all the frame rates below max frame rate.
+* A support till the third decimal point is assumed.
+*
+* @param[in] ps_time_stamp
+* Pointer to time stamp structure
+*
+* @param[in] u4_max_frm_rate
+* Maximum frame rate
+*
+* @param[in] u4_src_frm_rate
+* Source frame rate
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_time_stamp(time_stamp_handle time_stamp,
+ UWORD32 max_frm_rate,
+ UWORD32 src_frm_rate);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update time stamp context
+*
+* @par Description
+* Vop time is incremented by increment value. When vop time goes
+* more than the vop time resolution set the modulo time base to
+* 1 and reduce the vop time by vop time resolution so that the
+* excess value is present in vop time and get accumulated over time
+* so that the corresponding frame rate is achieved at a average of
+* 1000 seconds
+*
+* @param[in] ps_time_stamp
+* Pointer to time stamp structure
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_update_time_stamp(time_stamp_handle time_stamp);
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to init frame time memtabs
+*
+* @par Description
+* Function to init frame time memtabs
+*
+* @param[in] pps_frame_time
+* Pointer to frame time contexts
+*
+* @param[in] ps_memtab
+* Pointer to memtab
+*
+* @param[in] e_func_type
+* Function type (get memtabs/init memtabs)
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_init_free_memtab(frame_time_handle *pps_frame_time,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to initialize time stamp memtabs
+*
+* @par Description
+* Function to initialize time stamp memtabs
+*
+* @param[in] pps_time_stamp
+* Pointer to time stamp context
+*
+* @param[in] ps_memtab
+* Pointer to memtab
+*
+* @param[in] e_func_type
+* Funcion type (Get memtab/ init memtab)
+*
+* @returns
+* number of memtabs used
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_time_stamp_get_init_free_memtab(time_stamp_handle *pps_time_stamp,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+/****************************************************************************
+ Run-Time Modifying functions
+****************************************************************************/
+/**
+*******************************************************************************
+*
+* @brief Function to get source frame rate
+*
+* @par Description
+* Function to get source frame rate
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* source frame rate
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_src_frame_rate(frame_time_t *ps_frame_time);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get target frame rate
+*
+* @par Description
+* Function to get target frame rate
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* target frame rate
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_tgt_frame_rate(frame_time_t *ps_frame_time);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get source time increment
+*
+* @par Description
+* Function to get source time increment
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* source time increment
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_src_ticks(frame_time_t *ps_frame_time);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get target time increment
+*
+* @par Description
+* Function to get target time increment
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* target time increment
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_tgt_ticks(frame_time_t *ps_frame_time);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get src frame time
+*
+* @par Description
+* Function to get src frame time
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* src frame time
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_src_time(frame_time_t *frame_time);
+
+/**
+*******************************************************************************
+*
+* @brief Function to get tgt frame time
+*
+* @par Description
+* Function to get tgt frame time
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @returns
+* tgt frame time
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_frame_time_get_tgt_time(frame_time_t *frame_time);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update source frame time with a new source frame rate
+*
+* @par Description
+* Function to update source frame time with a new source frame rate
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @param[in] src_frm_rate
+* source frame rate
+*
+* @returns
+* None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_frame_time_update_src_frame_rate(frame_time_t *ps_frame_time, WORD32 src_frm_rate);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update target frame time with a new source frame rate
+*
+* @par Description
+* Function to update target frame time with a new source frame rate
+*
+* @param[in] ps_frame_time
+* Pointer to frame time context
+*
+* @param[in] tgt_frm_rate
+* target frame rate
+*
+* @returns
+* None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_frame_time_update_tgt_frame_rate(frame_time_t *ps_frame_time, WORD32 tgt_frm_rate);
+
+/**
+*******************************************************************************
+*
+* @brief Function to update target frame time with a new source frame rate
+*
+* @par Description
+* When the frame rate changes the time increment is modified by appropriate ticks
+*
+* @param[in] ps_time_stamp
+* Pointer to time stamp structure
+*
+* @param[in] src_frm_rate
+* source frame rate
+*
+* @returns
+* None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264_time_stamp_update_frame_rate(time_stamp_t *ps_time_stamp, UWORD32 src_frm_rate);
+
+#endif /*IH264E_TIME_STAMP_H_*/
+
diff --git a/encoder/ih264e_trace.h b/encoder/ih264e_trace.h
new file mode 100755
index 0000000..8134524
--- /dev/null
+++ b/encoder/ih264e_trace.h
@@ -0,0 +1,161 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_trace.h
+*
+* @brief
+* This file contains extern declarations of routines that could be helpful
+* for debugging purposes.
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_TRACE_H_
+#define IH264E_TRACE_H_
+
+#if ENABLE_TRACE
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Data for the trace functionality
+******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * fp
+ */
+ FILE *fp;
+}enc_trace_t;
+
+/*****************************************************************************/
+/* Extern variable declarations */
+/*****************************************************************************/
+extern enc_trace_t g_enc_trace;
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief defines flag used for enabling trace
+******************************************************************************
+ */
+
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Macro to print trace messages
+******************************************************************************
+ */
+
+#define ENTROPY_TRACE(syntax_string, value) \
+ { \
+ if(g_enc_trace.fp) \
+ { \
+ fprintf( g_enc_trace.fp, "%-40s : %d\n", syntax_string, value ); \
+ fflush ( g_enc_trace.fp); \
+ } \
+ }
+
+
+/**
+******************************************************************************
+ * @brief Macro to print CABAC trace messages
+******************************************************************************
+ */
+
+#define AEV_TRACE(string, value, range) \
+ if(range && g_enc_trace.fp) \
+ { \
+ fprintf( g_enc_trace.fp, "%-40s:%8d R:%d\n", string, value, range); \
+ fflush ( g_enc_trace.fp); \
+ }
+
+#else
+
+/* Dummy macros when trace is disabled */
+#define ENTROPY_TRACE(syntax_string, value)
+
+#define AEV_TRACE(string, value, range)
+
+#endif
+
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+
+/**
+******************************************************************************
+*
+* @brief Dummy trace init when trace is disabled in encoder
+*
+* @par Description
+* This routine needs to be called at start of trace
+*
+* @param[in] pu1_file_name
+* Name of file where trace outputs need to be stores (handle)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+extern WORD32 ih264e_trace_init
+ (
+ const char *pu1_file_name
+ );
+
+/**
+******************************************************************************
+*
+* @brief Dummy trace de-init function when trace is disabled
+*
+* @par Description
+* This routine needs to be called at end of trace
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+extern WORD32 ih264e_trace_deinit
+ (
+ void
+ );
+
+#endif // IH264E_TRACE_H_
diff --git a/encoder/ih264e_trace_support.h b/encoder/ih264e_trace_support.h
new file mode 100755
index 0000000..c35bd4f
--- /dev/null
+++ b/encoder/ih264e_trace_support.h
@@ -0,0 +1,61 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_trace_support.h
+*
+* @brief
+* This file contains extern declarations of routines that could be helpful
+* for debugging purposes.
+*
+* @author
+* Harish
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef TRACE_SUPPORT_H_
+#define TRACE_SUPPORT_H_
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+typedef struct
+{
+ WORD8 * pu1_buf;
+ WORD32 i4_offset;
+ WORD32 i4_max_size;
+}trace_support_t;
+
+/*****************************************************************************/
+/* Extern function declarations */
+/*****************************************************************************/
+
+void init_trace_support(WORD8 *pu1_buf, WORD32 i4_size);
+
+int trace_printf(const WORD8 *format, ...);
+
+#endif // TRACE_SUPPORT_H_
diff --git a/encoder/ih264e_utils.c b/encoder/ih264e_utils.c
new file mode 100755
index 0000000..f0086cb
--- /dev/null
+++ b/encoder/ih264e_utils.c
@@ -0,0 +1,1804 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_utils.c
+*
+* @brief
+* Contains miscellaneous utility functions used by the encoder
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_get_min_level()
+* - ih264e_get_lvl_idx()
+* - ih264e_get_dpb_size()
+* - ih264e_get_total_pic_buf_size()
+* - ih264e_get_pic_mv_bank_size()
+* - ih264e_pic_buf_mgr_add_bufs()
+* - ih264e_mv_buf_mgr_add_bufs()
+* - ih264e_init_quant_params()
+* - ih264e_init_air_map()
+* - ih264e_codec_init()
+* - ih264e_pic_init()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* system include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+/* user include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ithread.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264_macros.h"
+#include "ih264_common_tables.h"
+#include "ih264_debug.h"
+#include "ih264_trans_data.h"
+#include "ih264e_defs.h"
+#include "ih264e_globals.h"
+#include "ih264_buf_mgr.h"
+#include "ih264_dpb_mgr.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_utils.h"
+#include "ih264e_config.h"
+#include "ih264e_statistics.h"
+#include "ih264e_trace.h"
+#include "ih264_list.h"
+#include "ih264e_encode_header.h"
+#include "ih264e_me.h"
+#include "ime_defs.h"
+#include "ime.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_core_coding.h"
+#include "ih264e_rc_mem_interface.h"
+#include "ih264e_time_stamp.h"
+#include "ih264e_debug.h"
+#include "ih264e_process.h"
+#include "ih264e_master.h"
+#include "irc_rate_control_api.h"
+#include "ime_statistics.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* Used to get minimum level index for a given picture size
+*
+* @par Description:
+* Gets the minimum level index and then gets corresponding level.
+* Also used to ignore invalid levels like 2.3, 3.3 etc
+*
+* @param[in] level
+* Level of the stream
+*
+* @returns Level index for a given level
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_min_level(WORD32 pic_size)
+{
+ WORD32 lvl_idx = MAX_LEVEL, i;
+
+ for (i = 0; i < MAX_LEVEL; i++)
+ {
+ if (pic_size <= gai4_ih264_max_luma_pic_size[i])
+ {
+ lvl_idx = i;
+ break;
+ }
+ }
+
+ return gai4_ih264_levels[lvl_idx];
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Used to get level index for a given level
+*
+* @par Description:
+* Converts from level_idc (which is multiplied by 30) to an index that can be
+* used as a lookup. Also used to ignore invalid levels like 2.2 , 3.2 etc
+*
+* @param[in] level
+* Level of the stream
+*
+* @returns Level index for a given level
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_lvl_idx(WORD32 level)
+{
+ WORD32 lvl_idx = 0;
+
+ if (level < IH264_LEVEL_11)
+ {
+ lvl_idx = 0;
+ }
+ else if (level < IH264_LEVEL_12)
+ {
+ lvl_idx = 1;
+ }
+ else if (level < IH264_LEVEL_13)
+ {
+ lvl_idx = 2;
+ }
+ else if (level < IH264_LEVEL_20)
+ {
+ lvl_idx = 3;
+ }
+ else if (level < IH264_LEVEL_21)
+ {
+ lvl_idx = 4;
+ }
+ else if (level < IH264_LEVEL_22)
+ {
+ lvl_idx = 5;
+ }
+ else if (level < IH264_LEVEL_30)
+ {
+ lvl_idx = 6;
+ }
+ else if (level < IH264_LEVEL_31)
+ {
+ lvl_idx = 7;
+ }
+ else if (level < IH264_LEVEL_32)
+ {
+ lvl_idx = 8;
+ }
+ else if (level < IH264_LEVEL_40)
+ {
+ lvl_idx = 9;
+ }
+ else if (level < IH264_LEVEL_41)
+ {
+ lvl_idx = 10;
+ }
+ else if (level < IH264_LEVEL_42)
+ {
+ lvl_idx = 11;
+ }
+ else if (level < IH264_LEVEL_50)
+ {
+ lvl_idx = 12;
+ }
+
+ return (lvl_idx);
+}
+
+/**
+*******************************************************************************
+*
+* @brief returns maximum number of pictures allowed in dpb for a given level
+*
+* @par Description:
+* For given width, height and level, number of pictures allowed in decoder
+* picture buffer is computed as per Annex A.3.1
+*
+* @param[in] level
+* level of the bit-stream
+*
+* @param[in] pic_size
+* width * height
+*
+* @returns Number of buffers in DPB
+*
+* @remarks
+* From annexure A.3.1 of H264 specification,
+* max_dec_frame_buffering <= MaxDpbSize, where MaxDpbSize is equal to
+* Min( 1024 * MaxDPB / ( PicWidthInMbs * FrameHeightInMbs * 384 ), 16 ) and
+* MaxDPB is given in Table A-1 in units of 1024 bytes. However the MaxDPB size
+* presented in the look up table gas_ih264_lvl_tbl is in units of 512
+* bytes. Hence the expression is modified accordingly.
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_dpb_size(WORD32 level, WORD32 pic_size)
+{
+ /* dpb size */
+ WORD32 max_dpb_size_bytes = 0;
+
+ /* dec frame buffering */
+ WORD32 max_dpb_size_frames = 0;
+
+ /* temp var */
+ WORD32 i;
+
+ /* determine max luma samples */
+ for (i = 0; i < 16; i++)
+ if (level == (WORD32)gas_ih264_lvl_tbl[i].u4_level_idc)
+ max_dpb_size_bytes = gas_ih264_lvl_tbl[i].u4_max_dpb_size;
+
+ /* from Annexure A.3.1 h264 specification */
+ max_dpb_size_frames =
+ MIN( 1024 * max_dpb_size_bytes / ( pic_size * 3 ), MAX_DPB_SIZE );
+
+ return max_dpb_size_frames;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Used to get reference picture buffer size for a given level and
+* and padding used
+*
+* @par Description:
+* Used to get reference picture buffer size for a given level and padding used
+* Each picture is padded on all four sides
+*
+* @param[in] pic_size
+* Number of luma samples (Width * Height)
+*
+* @param[in] level
+* Level
+*
+* @param[in] horz_pad
+* Total padding used in horizontal direction
+*
+* @param[in] vert_pad
+* Total padding used in vertical direction
+*
+* @returns Total picture buffer size
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_total_pic_buf_size(WORD32 pic_size,
+ WORD32 level,
+ WORD32 horz_pad,
+ WORD32 vert_pad,
+ WORD32 num_ref_frames,
+ WORD32 num_reorder_frames)
+{
+ WORD32 size;
+ WORD32 num_luma_samples;
+ WORD32 lvl_idx;
+ WORD32 max_wd, min_ht;
+ WORD32 num_samples;
+ WORD32 max_num_bufs;
+ WORD32 pad = MAX(horz_pad, vert_pad);
+ UNUSED(pic_size);
+ /*
+ * If num_ref_frames and num_reorder_frmaes is specified
+ * Use minimum value
+ */
+ max_num_bufs = (num_ref_frames + num_reorder_frames + MAX_CTXT_SETS);
+
+ /* Get level index */
+ lvl_idx = ih264e_get_lvl_idx(level);
+
+ /* Maximum number of luma samples in a picture at given level */
+ num_luma_samples = gai4_ih264_max_luma_pic_size[lvl_idx];
+
+ /* Account for chroma */
+ num_samples = num_luma_samples * 3 / 2;
+
+ /* Maximum width of luma samples in a picture at given level */
+ max_wd = gai4_ih264_max_wd_ht[lvl_idx];
+
+ /* Minimum height of luma samples in a picture at given level */
+ min_ht = gai4_ih264_min_wd_ht[lvl_idx];
+
+ /* Allocation is required for
+ * (Wd + horz_pad) * (Ht + vert_pad) * (2 * max_dpb_size + 1)
+ *
+ * Above expanded as
+ * ((Wd * Ht) + (horz_pad * vert_pad) + Wd * vert_pad + Ht * horz_pad) * (2 * max_dpb_size + 1)
+ * (Wd * Ht) * (2 * max_dpb_size + 1) + ((horz_pad * vert_pad) + Wd * vert_pad + Ht * horz_pad) * (2 * max_dpb_size + 1)
+ * Now max_dpb_size increases with smaller Wd and Ht, but Wd * ht * max_dpb_size will still be lesser or equal to max_wd * max_ht * dpb_size
+ *
+ * In the above equation (Wd * Ht) * (2 * max_dpb_size + 1) is accounted by using num_samples * (2 * max_dpb_size + 1) below
+ *
+ * For the padded area use MAX(horz_pad, vert_pad) as pad
+ * ((pad * pad) + pad * (Wd + Ht)) * (2 * max_dpb_size + 1) has to accounted from the above for padding
+ *
+ * Since Width and Height can change worst Wd + Ht is when One of the dimensions is max and other is min
+ * So use max_wd and min_ht
+ */
+
+ /* Number of bytes in reference pictures */
+ size = num_samples * max_num_bufs;
+
+ /* Account for padding area */
+ size += ((pad * pad) + pad * (max_wd + min_ht)) * max_num_bufs;
+
+ return size;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Returns MV bank buffer size for a given number of luma samples
+*
+* @par Description:
+* For given number of luma samples one MV bank size is computed.
+* Each MV bank includes pu_map and enc_pu_t for all the min PUs(4x4) in a picture
+*
+* @param[in] num_luma_samples
+* Max number of luma pixels in the frame
+*
+* @returns Total MV Bank size
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_pic_mv_bank_size(WORD32 num_luma_samples)
+{
+ /* mv bank buffer size */
+ WORD32 mv_bank_size = 0;
+
+ /* number of sub mb partitions possible */
+ WORD32 num_pu = num_luma_samples / (MIN_PU_SIZE * MIN_PU_SIZE);
+
+ /* number of mbs */
+ WORD32 num_mb = num_luma_samples / (MB_SIZE * MB_SIZE);
+
+ /* Size for storing enc_pu_t start index each MB */
+ /* One extra entry is needed to compute number of PUs in the last MB */
+ mv_bank_size += num_mb * sizeof(WORD32);
+
+ /* Size for pu_map */
+ mv_bank_size += num_pu;
+
+ /* Size for storing enc_pu_t for each PU */
+ mv_bank_size += num_pu * sizeof(enc_pu_t);
+
+ return mv_bank_size;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to initialize ps_pic_buf structs add pic buffers to
+* buffer manager in case of non-shared mode
+*
+* @par Description:
+* Function to initialize ps_pic_buf structs add pic buffers to
+* buffer manager in case of non-shared mode
+* To be called once per stream or for every reset
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @returns error status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pic_buf_mgr_add_bufs(codec_t *ps_codec)
+{
+ /* error status */
+ IH264E_ERROR_T ret = IH264E_SUCCESS;
+
+ /* max ref buffer cnt */
+ WORD32 max_num_bufs = ps_codec->i4_ref_buf_cnt;
+
+ /* total size for pic buffers */
+ WORD32 pic_buf_size_allocated = ps_codec->i4_total_pic_buf_size
+ - BUF_MGR_MAX_CNT * sizeof(pic_buf_t);
+
+ /* temp var */
+ UWORD8 *pu1_buf = (UWORD8 *) ps_codec->ps_pic_buf;
+ pic_buf_t *ps_pic_buf = (pic_buf_t *) ps_codec->ps_pic_buf;
+ WORD32 i;
+
+ pu1_buf += BUF_MGR_MAX_CNT * sizeof(pic_buf_t);
+
+ /* In case of non-shared mode, add picture buffers to buffer manager
+ * In case of shared mode, buffers are added in the run-time
+ */
+ {
+ WORD32 buf_ret;
+
+ WORD32 luma_samples = (ps_codec->i4_rec_strd)
+ * (ps_codec->s_cfg.u4_ht + PAD_HT);
+
+ WORD32 chroma_samples = luma_samples >> 1;
+
+ /* Try and add as many buffers as possible for the memory that is allocated */
+ /* If the number of buffers that can be added is less than max_num_bufs
+ * return with an error */
+ for (i = 0; i < max_num_bufs; i++)
+ {
+ pic_buf_size_allocated -= (luma_samples + chroma_samples);
+
+ if (pic_buf_size_allocated < 0)
+ {
+ ps_codec->i4_error_code = IH264E_INSUFFICIENT_MEM_PICBUF;
+ return IH264E_INSUFFICIENT_MEM_PICBUF;
+ }
+
+ ps_pic_buf->pu1_luma = pu1_buf + ps_codec->i4_rec_strd * PAD_TOP
+ + PAD_LEFT;
+ pu1_buf += luma_samples;
+
+ ps_pic_buf->pu1_chroma = pu1_buf
+ + ps_codec->i4_rec_strd * (PAD_TOP / 2)+ PAD_LEFT;
+ pu1_buf += chroma_samples;
+
+ buf_ret = ih264_buf_mgr_add((buf_mgr_t *) ps_codec->pv_ref_buf_mgr,
+ ps_pic_buf, i);
+
+ if (0 != buf_ret)
+ {
+ ps_codec->i4_error_code = IH264E_BUF_MGR_ERROR;
+ return IH264E_BUF_MGR_ERROR;
+ }
+ pu1_buf += (HPEL_PLANES_CNT - 1) * (chroma_samples + luma_samples);
+ ps_pic_buf++;
+ }
+ }
+
+ return ret;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to add buffers to MV Bank buffer manager
+*
+* @par Description:
+* Function to add buffers to MV Bank buffer manager. To be called once per
+* stream or for every reset
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @returns error status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_mv_buf_mgr_add_bufs(codec_t *ps_codec)
+{
+ /* error status */
+ IH264E_ERROR_T error_status = IH264E_SUCCESS;
+ IH264_ERROR_T ret;
+
+ /* max dpb size in frames */
+ WORD32 max_dpb_size = 0;
+
+ /* mv bank size for the entire dpb */
+ WORD32 mv_bank_size_allocated = 0;
+
+ /* mv bank size per pic */
+ WORD32 pic_mv_bank_size = 0;
+
+ /* mv buffer ptr */
+ mv_buf_t *ps_mv_buf = NULL;
+
+ /* num of luma samples */
+ WORD32 num_luma_samples = ALIGN16(ps_codec->s_cfg.u4_wd)
+ * ALIGN16(ps_codec->s_cfg.u4_ht);
+
+ /* number of mb's & frame partitions */
+ WORD32 num_pu, num_mb;
+
+ /* temp var */
+ UWORD8 *pu1_buf = NULL;
+ WORD32 i;
+
+ /* Compute the number of MB Bank buffers needed */
+ max_dpb_size = ps_codec->i4_ref_buf_cnt;
+
+ /* allocate memory for mv buffer array */
+ ps_codec->ps_mv_buf = ps_codec->pv_mv_bank_buf_base;
+ pu1_buf = ps_codec->pv_mv_bank_buf_base;
+ pu1_buf += BUF_MGR_MAX_CNT * sizeof(mv_buf_t);
+
+ /********************************************************************/
+ /* allocate memory for individual elements of mv buffer ptr */
+ /********************************************************************/
+ mv_bank_size_allocated = ps_codec->i4_total_mv_bank_size
+ - (BUF_MGR_MAX_CNT * sizeof(mv_buf_t));
+
+ /* compute MV bank size per picture */
+ pic_mv_bank_size = ih264e_get_pic_mv_bank_size(num_luma_samples);
+
+ num_pu = num_luma_samples / (MIN_PU_SIZE * MIN_PU_SIZE);
+ num_mb = num_luma_samples / (MB_SIZE * MB_SIZE);
+ i = 0;
+ ps_mv_buf = ps_codec->pv_mv_bank_buf_base;
+
+ while (i < max_dpb_size)
+ {
+ mv_bank_size_allocated -= pic_mv_bank_size;
+
+ if (mv_bank_size_allocated < 0)
+ {
+ ps_codec->i4_error_code = IH264E_INSUFFICIENT_MEM_MVBANK;
+
+ error_status = IH264E_INSUFFICIENT_MEM_MVBANK;
+
+ return error_status;
+ }
+
+ ps_mv_buf->pu4_mb_pu_cnt = (UWORD32 *) pu1_buf;
+
+ ps_mv_buf->pu1_pic_pu_map = (pu1_buf + num_mb * sizeof(WORD32));
+
+ ps_mv_buf->ps_pic_pu = (enc_pu_t *) (pu1_buf + num_mb * sizeof(WORD32)
+ + num_pu);
+
+ ret = ih264_buf_mgr_add((buf_mgr_t *) ps_codec->pv_mv_buf_mgr,
+ ps_mv_buf, i);
+
+ if (IH264_SUCCESS != ret)
+ {
+ ps_codec->i4_error_code = IH264E_BUF_MGR_ERROR;
+ error_status = IH264E_BUF_MGR_ERROR;
+ return error_status;
+ }
+
+ pu1_buf += pic_mv_bank_size;
+ ps_mv_buf++;
+ i++;
+ }
+
+ return error_status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function to initialize quant params structure
+*
+* @par Description:
+* The forward quantization modules depends on qp/6, qp mod 6, forward scale
+* matrix, forward threshold matrix, weight list. The inverse quantization
+* modules depends on qp/6, qp mod 6, inverse scale matrix, weight list.
+* These params are initialized in this function.
+*
+* @param[in] ps_proc
+* pointer to process context
+*
+* @param[in] qp
+* quantization parameter
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_quant_params(process_ctxt_t *ps_proc, int qp)
+{
+ /* quant params */
+ quant_params_t *ps_qp_params;
+
+ /* ptr to forward quant threshold matrix */
+ const UWORD16 *pu2_thres_mat = NULL;
+
+ /* ptr to forward scale matrix */
+ const UWORD16 *pu2_scale_mat = gu2_quant_scale_matrix_4x4;
+
+ /* ptr to inverse scale matrix */
+ const UWORD16 *pu2_iscale_mat = gau2_ih264_iquant_scale_matrix_4x4;
+
+ /* temp var */
+ UWORD32 u4_qp[3], u4_qp_div6, u4_qp_mod6;
+ COMPONENT_TYPE plane;
+ WORD32 i;
+ UWORD32 u4_satdq_t;
+ const UWORD16 *pu2_smat;
+
+ /********************************************************************/
+ /* init quant params for all planes Y, U and V */
+ /********************************************************************/
+ /* luma qp */
+ u4_qp[Y] = qp;
+
+ /* chroma qp
+ * TODO_LATER : just in case if the chroma planes use different qp's this
+ * needs to be corrected accordingly.
+ */
+ u4_qp[U] = gu1_qpc_fqpi[qp];
+ u4_qp[V] = gu1_qpc_fqpi[qp];
+
+ plane = Y;
+ while (plane <= V)
+ {
+ u4_qp_div6 = (u4_qp[plane] / 6);
+ u4_qp_mod6 = (u4_qp[plane] % 6);
+
+ ps_qp_params = ps_proc->ps_qp_params[plane];
+
+ /* mb qp */
+ ps_qp_params->u1_mb_qp = u4_qp[plane];
+
+ /* mb qp / 6 */
+ ps_qp_params->u1_qp_div = u4_qp_div6;
+
+ /* mb qp % 6 */
+ ps_qp_params->u1_qp_rem = u4_qp_mod6;
+
+ /* QP bits */
+ ps_qp_params->u1_qbits = QP_BITS_h264_4x4 + u4_qp_div6;
+
+ /* forward scale matrix */
+ ps_qp_params->pu2_scale_mat = pu2_scale_mat + (u4_qp_mod6 * 16);
+
+ /* threshold matrix & weight for quantization */
+ pu2_thres_mat = gu2_forward_quant_threshold_4x4 + (u4_qp_mod6 * 16);
+ for (i = 0; i < 16; i++)
+ {
+ ps_qp_params->pu2_thres_mat[i] = pu2_thres_mat[i]
+ >> (8 - u4_qp_div6);
+ ps_qp_params->pu2_weigh_mat[i] = 16;
+ }
+
+ /* qp dependent rounding constant */
+ ps_qp_params->u4_dead_zone =
+ gu4_forward_quant_round_factor_4x4[u4_qp_div6];
+
+ /* slice dependent rounding constant */
+ if (ps_proc->i4_slice_type != ISLICE
+ && ps_proc->i4_slice_type != SISLICE)
+ {
+ ps_qp_params->u4_dead_zone >>= 1;
+ }
+
+ /* SATQD threshold for zero block prediction */
+ if (ps_proc->ps_codec->s_cfg.u4_enable_satqd)
+ {
+ pu2_smat = ps_qp_params->pu2_scale_mat;
+
+ u4_satdq_t = ((1 << (ps_qp_params->u1_qbits)) - ps_qp_params->u4_dead_zone);
+
+ ps_qp_params->pu2_sad_thrsh[0] = u4_satdq_t / MAX(pu2_smat[3], pu2_smat[11]);
+ ps_qp_params->pu2_sad_thrsh[1] = u4_satdq_t / MAX(pu2_smat[1], pu2_smat[9]);
+ ps_qp_params->pu2_sad_thrsh[2] = u4_satdq_t / pu2_smat[15];
+ ps_qp_params->pu2_sad_thrsh[3] = u4_satdq_t / pu2_smat[7];
+ ps_qp_params->pu2_sad_thrsh[4] = u4_satdq_t / MAX(pu2_smat[12], pu2_smat[14]);
+ ps_qp_params->pu2_sad_thrsh[5] = u4_satdq_t / MAX(pu2_smat[4], pu2_smat[6]);
+ ps_qp_params->pu2_sad_thrsh[6] = u4_satdq_t / pu2_smat[13];
+ ps_qp_params->pu2_sad_thrsh[7] = u4_satdq_t / pu2_smat[5];
+ ps_qp_params->pu2_sad_thrsh[8] = u4_satdq_t / MAX(MAX3(pu2_smat[0], pu2_smat[2], pu2_smat[8]), pu2_smat[10]);
+ }
+
+ /* inverse scale matrix */
+ ps_qp_params->pu2_iscale_mat = pu2_iscale_mat + (u4_qp_mod6 * 16);
+
+ plane += 1;
+ }
+ return ;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Initialize AIR mb frame Map
+*
+* @par Description:
+* Initialize AIR mb frame map
+* MB frame map indicates which frame an Mb should be coded as intra according to AIR
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @returns error_status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_init_air_map(codec_t *ps_codec)
+{
+ /* intra refresh map */
+ UWORD16 *pu2_intr_rfrsh_map = ps_codec->pu2_intr_rfrsh_map;
+
+ /* air mode */
+ IVE_AIR_MODE_T air_mode = ps_codec->s_cfg.e_air_mode;
+
+ /* refresh period */
+ UWORD32 air_period = ps_codec->s_cfg.u4_air_refresh_period;
+
+ /* mb cnt */
+ UWORD32 u4_mb_cnt = ps_codec->s_cfg.i4_wd_mbs * ps_codec->s_cfg.i4_ht_mbs;
+
+ /* temp var */
+ UWORD32 curr_mb, seed_rand = 1;
+
+ switch (air_mode)
+ {
+ case IVE_AIR_MODE_CYCLIC:
+
+ for (curr_mb = 0; curr_mb < u4_mb_cnt; curr_mb++)
+ {
+ pu2_intr_rfrsh_map[curr_mb] = curr_mb % air_period;
+ }
+ break;
+
+ case IVE_AIR_MODE_RANDOM:
+
+ for (curr_mb = 0; curr_mb < u4_mb_cnt; curr_mb++)
+ {
+ seed_rand = (seed_rand * 32719 + 3) % 32749;
+ pu2_intr_rfrsh_map[curr_mb] = seed_rand % air_period;
+ }
+ break;
+
+ default:
+
+ break;
+ }
+
+ return IH264E_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Codec level initializations
+*
+* @par Description:
+* Initializes the codec with parameters that needs to be set before encoding
+* first frame
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @param[in] ps_inp_buf
+* Pointer to input buffer context
+*
+* @returns error_status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_codec_init(codec_t *ps_codec)
+{
+ /********************************************************************
+ * INITIALIZE CODEC CONTEXT *
+ ********************************************************************/
+ /* encoder presets */
+ if (ps_codec->s_cfg.u4_enc_speed_preset != IVE_CONFIG)
+ {
+ if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
+ {/* high quality */
+ /* enable diamond search */
+ ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH;
+ ps_codec->s_cfg.u4_enable_fast_sad = 0;
+
+ /* disable intra 4x4 */
+ ps_codec->s_cfg.u4_enable_intra_4x4 = 1;
+ ps_codec->luma_energy_compaction[1] =
+ ih264e_code_luma_intra_macroblock_4x4_rdopt_on;
+
+ /* sub pel off */
+ ps_codec->s_cfg.u4_enable_hpel = 1;
+
+ /* deblocking off */
+ ps_codec->s_cfg.u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
+
+ /* disabled intra inter gating in Inter slices */
+ ps_codec->u4_inter_gate = 0;
+ }
+ else if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_NORMAL)
+ {/* normal */
+ /* enable diamond search */
+ ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH;
+ ps_codec->s_cfg.u4_enable_fast_sad = 0;
+
+ /* disable intra 4x4 */
+ ps_codec->s_cfg.u4_enable_intra_4x4 = 1;
+
+ /* sub pel off */
+ ps_codec->s_cfg.u4_enable_hpel = 1;
+
+ /* deblocking off */
+ ps_codec->s_cfg.u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
+
+ /* disabled intra inter gating in Inter slices */
+ ps_codec->u4_inter_gate = 0;
+ }
+ else if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST)
+ {/* normal */
+ /* enable diamond search */
+ ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH;
+ ps_codec->s_cfg.u4_enable_fast_sad = 0;
+
+ /* disable intra 4x4 */
+ ps_codec->s_cfg.u4_enable_intra_4x4 = 0;
+
+ /* sub pel off */
+ ps_codec->s_cfg.u4_enable_hpel = 1;
+
+ /* deblocking off */
+ ps_codec->s_cfg.u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_0;
+
+ /* disabled intra inter gating in Inter slices */
+ ps_codec->u4_inter_gate = 1;
+ }
+ else if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_HIGH_SPEED)
+ {/* fast */
+ /* enable diamond search */
+ ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH;
+ ps_codec->s_cfg.u4_enable_fast_sad = 0;
+
+ /* disable intra 4x4 */
+ ps_codec->s_cfg.u4_enable_intra_4x4 = 0;
+
+ /* sub pel off */
+ ps_codec->s_cfg.u4_enable_hpel = 0;
+
+ /* deblocking off */
+ ps_codec->s_cfg.u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_4;
+
+ /* disabled intra inter gating in Inter slices */
+ ps_codec->u4_inter_gate = 0;
+ }
+ else if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST)
+ {/* fastest */
+ /* enable diamond search */
+ ps_codec->s_cfg.u4_me_speed_preset = DMND_SRCH;
+
+ /* disable intra 4x4 */
+ ps_codec->s_cfg.u4_enable_intra_4x4 = 0;
+
+ /* sub pel off */
+ ps_codec->s_cfg.u4_enable_hpel = 0;
+
+ /* deblocking off */
+ ps_codec->s_cfg.u4_disable_deblock_level = DISABLE_DEBLK_LEVEL_4;
+
+ /* disabled intra inter gating in Inter slices */
+ ps_codec->u4_inter_gate = 1;
+ }
+ }
+
+ /*****************************************************************
+ * Initialize AIR inside codec
+ *****************************************************************/
+ if (IVE_AIR_MODE_NONE != ps_codec->s_cfg.e_air_mode)
+ {
+ ih264e_init_air_map(ps_codec);
+
+ ps_codec->i4_air_pic_cnt = -1;
+ }
+
+ /****************************************************/
+ /* INITIALIZE RATE CONTROL */
+ /****************************************************/
+ {
+ /* init qp */
+ UWORD8 au1_init_qp[MAX_PIC_TYPE];
+
+ /* min max qp */
+ UWORD8 au1_min_max_qp[2 * MAX_PIC_TYPE];
+
+ /* init i,p,b qp */
+ au1_init_qp[0] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp];
+ au1_init_qp[1] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp];
+ au1_init_qp[2] = gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp];
+
+ /* init min max qp */
+ au1_min_max_qp[2 * I_PIC] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_min];
+ au1_min_max_qp[2 * I_PIC + 1] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_i_qp_max];
+
+ au1_min_max_qp[2 * P_PIC] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_min];
+ au1_min_max_qp[2 * P_PIC + 1] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_p_qp_max];
+
+ au1_min_max_qp[2 * B_PIC] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_min];
+ au1_min_max_qp[2 * B_PIC + 1] =
+ gau1_h264_to_mpeg2_qmap[ps_codec->s_cfg.u4_b_qp_max];
+
+ /* get rc mode */
+ switch (ps_codec->s_cfg.e_rc_mode)
+ {
+ case IVE_RC_STORAGE:
+ ps_codec->s_rate_control.e_rc_type = VBR_STORAGE;
+ break;
+ case IVE_RC_CBR_NON_LOW_DELAY:
+ ps_codec->s_rate_control.e_rc_type = CBR_NLDRC;
+ break;
+ case IVE_RC_CBR_LOW_DELAY:
+ ps_codec->s_rate_control.e_rc_type = CBR_LDRC;
+ break;
+ case IVE_RC_NONE:
+ ps_codec->s_rate_control.e_rc_type = CONST_QP;
+ break;
+ default:
+ break;
+ }
+
+ /* init rate control */
+ ih264e_rc_init(ps_codec->s_rate_control.pps_rate_control_api,
+ ps_codec->s_rate_control.pps_frame_time,
+ ps_codec->s_rate_control.pps_time_stamp,
+ ps_codec->s_rate_control.pps_pd_frm_rate,
+ ps_codec->s_cfg.u4_max_framerate,
+ ps_codec->s_cfg.u4_src_frame_rate,
+ ps_codec->s_cfg.u4_tgt_frame_rate,
+ ps_codec->s_rate_control.e_rc_type,
+ ps_codec->s_cfg.u4_target_bitrate,
+ ps_codec->s_cfg.u4_max_bitrate,
+ ps_codec->s_cfg.u4_vbv_buffer_delay,
+ ps_codec->s_cfg.u4_i_frm_interval, au1_init_qp,
+ H264_ALLOC_INTER_FRM_INTV, au1_min_max_qp,
+ ps_codec->s_cfg.u4_max_level);
+ }
+
+ /* src stride */
+ ps_codec->i4_src_strd = ps_codec->s_cfg.u4_strd;
+
+ /* recon stride */
+ ps_codec->i4_rec_strd = ALIGN16(ps_codec->s_cfg.u4_max_wd) + PAD_WD;
+
+ /* max ref and reorder cnt */
+ ps_codec->i4_ref_buf_cnt = ps_codec->s_cfg.u4_max_ref_cnt
+ + ps_codec->s_cfg.u4_max_reorder_cnt;
+ ps_codec->i4_ref_buf_cnt += MAX_CTXT_SETS;
+
+ DEBUG_HISTOGRAM_INIT();
+
+ return IH264E_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Picture level initializations
+*
+* @par Description:
+* Before beginning to encode the frame, the current function initializes all
+* the ctxts (proc, entropy, me, ...) basing on the input configured params.
+* It locates space for storing recon in the encoder picture buffer set, fetches
+* reference frame from encoder picture buffer set. Calls RC pre-enc to get
+* qp and pic type for the current frame. Queues proc jobs so that
+* the other threads can begin encoding. In brief, this function sets up the
+* tone for the entire encoder.
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @param[in] ps_inp_buf
+* Pointer to input buffer context
+*
+* @returns error_status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf)
+{
+ /* error status */
+ IH264E_ERROR_T error_status = IH264E_SUCCESS;
+ IH264_ERROR_T ret = IH264_SUCCESS;
+
+ /* mv buff bank */
+ mv_buf_t *ps_mv_buf = NULL;
+ WORD32 cur_mv_bank_buf_id;
+
+ /* recon buffer set */
+ pic_buf_t *ps_cur_pic;
+ WORD32 cur_pic_buf_id;
+ UWORD8 *pu1_cur_pic_luma, *pu1_cur_pic_chroma;
+
+ /* ref buffer set */
+ pic_buf_t *ps_ref_pic;
+ WORD32 ref_set_id;
+
+ /* pic time stamp */
+ UWORD32 u4_timestamp_high = ps_inp_buf->u4_timestamp_high;
+ UWORD32 u4_timestamp_low = ps_inp_buf->u4_timestamp_low;
+
+ /* indices to access curr/prev frame info */
+ WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
+
+ /* curr pic type */
+ PIC_TYPE_T *pic_type = &ps_codec->pic_type;
+
+ /* should src be skipped */
+ WORD32 *skip_src = &ps_codec->s_rate_control.pre_encode_skip[ctxt_sel];
+
+ /* Diamond search Iteration Max Cnt */
+ UWORD32 u4_num_layers =
+ (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST) ?
+ (NUM_LAYERS >> 2) : NUM_LAYERS;
+
+ /* enable fast sad */
+ UWORD32 u4_enable_fast_sad = ps_codec->s_cfg.u4_enable_fast_sad;
+
+ /********************************************************************/
+ /* INITIALIZE CODEC CONTEXT */
+ /********************************************************************/
+
+ /* pre enc rc call */
+ *skip_src = ih264e_set_rc_pic_params(ps_codec,
+ ps_codec->i4_encode_api_call_cnt,
+ (WORD32 *) pic_type);
+ if (*skip_src == 1)
+ {
+ ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_inp_buf =
+ *ps_inp_buf;
+
+ /* inform output bytes generated as zero */
+ ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes = 0;
+
+ return error_status;
+ }
+
+ /********************************************************************/
+ /* Alternate reference frame */
+ /********************************************************************/
+ if (ps_codec->s_cfg.u4_enable_alt_ref)
+ {
+ if (PIC_IDR == *pic_type || PIC_I == *pic_type)
+ {
+ ps_codec->u4_is_curr_frm_ref = 1;
+ }
+ else
+ {
+ ps_codec->u4_is_curr_frm_ref = 1;
+ if(ps_codec->i4_encode_api_call_cnt % (ps_codec->s_cfg.u4_enable_alt_ref + 1))
+ ps_codec->u4_is_curr_frm_ref = 0;
+ }
+
+ if ((ps_codec->u4_is_curr_frm_ref == 1) || (ps_codec->i4_frame_num < 0))
+ {
+ ps_codec->i4_frame_num++;
+ }
+ }
+ else
+ {
+ ps_codec->u4_is_curr_frm_ref = 1;
+
+ ps_codec->i4_frame_num++;
+ }
+
+ /* slice_type */
+ ps_codec->i4_slice_type = PSLICE;
+
+ if ((PIC_I == *pic_type) || (PIC_IDR == *pic_type))
+ {
+ ps_codec->i4_slice_type = ISLICE;
+ }
+ else if (PIC_P == *pic_type)
+ {
+ ps_codec->i4_slice_type = PSLICE;
+ }
+
+ /* is this an IDR pic */
+ ps_codec->u4_is_idr = 0;
+
+ if (PIC_IDR == *pic_type)
+ {
+ /* set idr flag */
+ ps_codec->u4_is_idr = 1;
+
+ /* reset frame num */
+ ps_codec->i4_frame_num = 0;
+
+ /* idr_pic_id */
+ ps_codec->i4_idr_pic_id++;
+ }
+
+ /* set deblock disable flags based on disable deblock level */
+ ps_codec->i4_disable_deblk_pic = 1;
+
+ if (ps_codec->s_cfg.u4_disable_deblock_level == DISABLE_DEBLK_LEVEL_0)
+ {
+ /* enable deblocking */
+ ps_codec->i4_disable_deblk_pic = 0;
+ }
+ else if (ps_codec->s_cfg.u4_disable_deblock_level == DISABLE_DEBLK_LEVEL_2)
+ {
+ /* enable deblocking after a period of frames */
+ if (ps_codec->i4_disable_deblk_pic_cnt == DISABLE_DEBLOCK_INTERVAL
+ || ps_codec->i4_slice_type == ISLICE)
+ {
+ ps_codec->i4_disable_deblk_pic = 0;
+ }
+ }
+ else if (ps_codec->s_cfg.u4_disable_deblock_level == DISABLE_DEBLK_LEVEL_3)
+ {
+ if (ps_codec->i4_slice_type == ISLICE)
+ {
+ ps_codec->i4_disable_deblk_pic = 0;
+ }
+ }
+
+ if (ps_codec->i4_disable_deblk_pic)
+ {
+ ps_codec->i4_disable_deblk_pic_cnt++;
+ }
+ else
+ {
+ ps_codec->i4_disable_deblk_pic_cnt = 0;
+ }
+
+ /* In slice mode - lets not deblk mb edges that lie along slice boundaries */
+ if (ps_codec->i4_disable_deblk_pic == 0)
+ {
+ if (ps_codec->s_cfg.e_slice_mode != IVE_SLICE_MODE_NONE)
+ {
+ ps_codec->i4_disable_deblk_pic = 2;
+ }
+ }
+
+ /* error status */
+ ps_codec->i4_error_code = IH264E_SUCCESS;
+
+ /* populate header */
+ if (ps_codec->i4_gen_header)
+ {
+ /* sps */
+ sps_t *ps_sps = NULL;
+
+ /* pps */
+ pps_t *ps_pps = NULL;
+
+ /*ps_codec->i4_pps_id ++;*/
+ ps_codec->i4_pps_id %= MAX_PPS_CNT;
+
+ /*ps_codec->i4_sps_id ++;*/
+ ps_codec->i4_sps_id %= MAX_SPS_CNT;
+
+ /* populate sps header */
+ ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
+ ih264e_populate_sps(ps_codec, ps_sps);
+
+ /* populate pps header */
+ ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
+ ih264e_populate_pps(ps_codec, ps_pps);
+ }
+
+ /* Reference and MV bank Buffer Manager */
+ {
+ /* min pic cnt among the list of pics stored in ref list */
+ WORD32 min_pic_cnt;
+
+ /* max pic cnt among the list of pics stored in ref list */
+ WORD32 max_pic_cnt;
+
+ /* temp var */
+ WORD32 i;
+
+ ps_ref_pic = NULL;
+
+ /* get reference picture when necessary */
+ /* Only nearest picture encoded (max pic cnt) is used as reference */
+ if ((*pic_type != PIC_IDR) && (*pic_type != PIC_I))
+ {
+ max_pic_cnt = ps_codec->as_ref_set[0].i4_pic_cnt;
+
+ ps_ref_pic = ps_codec->as_ref_set[0].ps_pic_buf;
+
+ /* loop through to get the max pic cnt among the list of pics stored in ref list */
+ for (i = 1; i < ps_codec->i4_ref_buf_cnt; i++)
+ {
+ if (max_pic_cnt < ps_codec->as_ref_set[i].i4_pic_cnt)
+ {
+ max_pic_cnt = ps_codec->as_ref_set[i].i4_pic_cnt;
+ ps_ref_pic = ps_codec->as_ref_set[i].ps_pic_buf;
+ }
+ }
+ }
+
+ /* get a location at which the curr pic info can be stored for future reference */
+ ref_set_id = -1;
+
+ for (i = 0; i < ps_codec->i4_ref_buf_cnt; i++)
+ {
+ if (-1 == ps_codec->as_ref_set[i].i4_pic_cnt)
+ {
+ ref_set_id = i;
+ break;
+ }
+ }
+
+ /* If all the entries in the ref_set array are filled, then remove the entry with least pic_cnt */
+ if (ref_set_id == -1)
+ {
+ /* pic info */
+ pic_buf_t *ps_cur_pic;
+
+ /* mv info */
+ mv_buf_t *ps_cur_mv_buf;
+
+ ref_set_id = 0;
+ min_pic_cnt = ps_codec->as_ref_set[0].i4_pic_cnt;
+
+ /* loop through to get the min pic cnt among the list of pics stored in ref list */
+ for (i = 1; i < ps_codec->i4_ref_buf_cnt; i++)
+ {
+ if (min_pic_cnt > ps_codec->as_ref_set[i].i4_pic_cnt)
+ {
+ min_pic_cnt = ps_codec->as_ref_set[i].i4_pic_cnt;
+ ref_set_id = i;
+ }
+ }
+
+ ps_cur_pic = ps_codec->as_ref_set[ref_set_id].ps_pic_buf;
+
+ ps_cur_mv_buf = ps_codec->as_ref_set[ref_set_id].ps_mv_buf;
+
+ /* release this frame from reference list */
+ ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr,
+ ps_cur_mv_buf->i4_buf_id, BUF_MGR_REF);
+
+ ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr,
+ ps_cur_pic->i4_buf_id, BUF_MGR_REF);
+ }
+
+ if (ps_codec->s_cfg.u4_enable_recon)
+ {
+ ret = ih264_buf_mgr_check_free((buf_mgr_t *)ps_codec->pv_ref_buf_mgr);
+
+ if (ret != IH264_SUCCESS)
+ {
+ return IH264E_NO_FREE_RECONBUF;
+ }
+ }
+ }
+
+ {
+ /*****************************************************************/
+ /* Get free MV Bank to hold current picture's motion vector data */
+ /* If there are no free buffers then return with an error code. */
+ /* If the buffer is to be freed by another thread, change the */
+ /* following to call thread yield and wait for buffer to be freed*/
+ /*****************************************************************/
+ ps_mv_buf = (mv_buf_t *) ih264_buf_mgr_get_next_free(
+ (buf_mgr_t *) ps_codec->pv_mv_buf_mgr,
+ &cur_mv_bank_buf_id);
+
+ if (NULL == ps_mv_buf)
+ {
+ ps_codec->i4_error_code = IH264E_NO_FREE_MVBANK;
+ return IH264E_NO_FREE_MVBANK;
+ }
+
+ /* mark the buffer as needed for reference if the curr pic is available for ref */
+ if (ps_codec->u4_is_curr_frm_ref)
+ {
+ ih264_buf_mgr_set_status(ps_codec->pv_mv_buf_mgr,
+ cur_mv_bank_buf_id, BUF_MGR_REF);
+ }
+
+ /* Set current ABS poc to ps_mv_buf, so that while freeing a reference buffer
+ * corresponding mv buffer can be found by looping through ps_codec->ps_mv_buf array
+ * and getting a buffer id to free
+ */
+ ps_mv_buf->i4_abs_poc = ps_codec->i4_abs_pic_order_cnt;
+
+ ps_mv_buf->i4_buf_id = cur_mv_bank_buf_id;
+ }
+
+ {
+ /*****************************************************************/
+ /* Get free pic buf to hold current picture's recon data */
+ /* If there are no free buffers then return with an error code. */
+ /* If the buffer is to be freed by another thread, change the */
+ /* following to call thread yield and wait for buffer to be freed*/
+ /*****************************************************************/
+ ps_cur_pic = (pic_buf_t *) ih264_buf_mgr_get_next_free(
+ (buf_mgr_t *) ps_codec->pv_ref_buf_mgr,
+ &cur_pic_buf_id);
+
+ if (NULL == ps_cur_pic)
+ {
+ ps_codec->i4_error_code = IH264E_NO_FREE_PICBUF;
+ return IH264E_NO_FREE_PICBUF;
+ }
+
+ /* mark the buffer as needed for reference if the curr pic is available for ref */
+ if (1 == ps_codec->u4_is_curr_frm_ref)
+ {
+ ih264_buf_mgr_set_status(ps_codec->pv_ref_buf_mgr, cur_pic_buf_id,
+ BUF_MGR_REF);
+ }
+
+ /* Mark the current buffer as needed for IO if recon is enabled */
+ if (1 == ps_codec->s_cfg.u4_enable_recon)
+ {
+ ih264_buf_mgr_set_status(ps_codec->pv_ref_buf_mgr, cur_pic_buf_id,
+ BUF_MGR_IO);
+ }
+
+ /* Associate input timestamp with current buffer */
+ ps_cur_pic->u4_timestamp_high = ps_inp_buf->u4_timestamp_high;
+ ps_cur_pic->u4_timestamp_low = ps_inp_buf->u4_timestamp_low;
+
+ ps_cur_pic->i4_abs_poc = ps_codec->i4_abs_pic_order_cnt;
+ ps_cur_pic->i4_poc_lsb = ps_codec->i4_pic_order_cnt_lsb;
+
+ ps_cur_pic->i4_buf_id = cur_pic_buf_id;
+
+ pu1_cur_pic_luma = ps_cur_pic->pu1_luma;
+ pu1_cur_pic_chroma = ps_cur_pic->pu1_chroma;
+ }
+
+ /* in case the current picture is used for reference then add it to the reference set */
+ if (ps_codec->u4_is_curr_frm_ref
+ && ((*pic_type == PIC_IDR) || (*pic_type == PIC_I)
+ || (*pic_type == PIC_P)))
+ {
+ ps_codec->as_ref_set[ref_set_id].i4_pic_cnt = ps_codec->i4_pic_cnt;
+
+ /* TODO: Currently pic_cnt and poc are same - Once frame drops are introduced change appropriately */
+ ps_codec->as_ref_set[ref_set_id].i4_poc = ps_codec->i4_pic_cnt;
+
+ ps_codec->as_ref_set[ref_set_id].ps_mv_buf = ps_mv_buf;
+
+ ps_codec->as_ref_set[ref_set_id].ps_pic_buf = ps_cur_pic;
+ }
+
+ /********************************************************************/
+ /* INITIALIZE PROCESS CONTEXT */
+ /********************************************************************/
+ {
+ /* temp var */
+ WORD32 i, j = 0;
+
+ /* curr proc ctxt */
+ process_ctxt_t *ps_proc = NULL;
+
+ j = ctxt_sel * MAX_PROCESS_THREADS;
+
+ /* begin init */
+ for (i = j; i < (j + MAX_PROCESS_THREADS); i++)
+ {
+ ps_proc = &ps_codec->as_process[i];
+
+ /* luma src buffer */
+ if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE)
+ {
+ ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
+ }
+ else
+ {
+ ps_proc->pu1_src_buf_luma_base =
+ ps_inp_buf->s_raw_buf.apv_bufs[0];
+ }
+
+ /* chroma src buffer */
+ if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE
+ || ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P)
+ {
+ ps_proc->pu1_src_buf_chroma_base =
+ ps_codec->pu1_uv_csc_buf_base;
+ }
+ else
+ {
+ ps_proc->pu1_src_buf_chroma_base =
+ ps_inp_buf->s_raw_buf.apv_bufs[1];
+ }
+
+ /* luma rec buffer */
+ ps_proc->pu1_rec_buf_luma_base = pu1_cur_pic_luma;
+
+ /* chroma rec buffer */
+ ps_proc->pu1_rec_buf_chroma_base = pu1_cur_pic_chroma;
+
+ /* src stride */
+ ps_proc->i4_src_strd = ps_codec->i4_src_strd;
+
+ /* rec stride */
+ ps_proc->i4_rec_strd = ps_codec->i4_rec_strd;
+
+ /* frame num */
+ ps_proc->i4_frame_num = ps_codec->i4_frame_num;
+
+ /* is idr */
+ ps_proc->u4_is_idr = ps_codec->u4_is_idr;
+
+ /* idr pic id */
+ ps_proc->u4_idr_pic_id = ps_codec->i4_idr_pic_id;
+
+ /* slice_type */
+ ps_proc->i4_slice_type = ps_codec->i4_slice_type;
+
+ /* Input width in mbs */
+ ps_proc->i4_wd_mbs = ps_codec->s_cfg.i4_wd_mbs;
+
+ /* Input height in mbs */
+ ps_proc->i4_ht_mbs = ps_codec->s_cfg.i4_ht_mbs;
+
+ /* Half x plane offset from pic buf */
+ ps_proc->u4_half_x_offset = 0;
+
+ /* Half y plane offset from half x plane */
+ ps_proc->u4_half_y_offset = 0;
+
+ /* Half x plane offset from half y plane */
+ ps_proc->u4_half_xy_offset = 0;
+
+ /* top row syntax elements */
+ ps_proc->ps_top_row_mb_syntax_ele =
+ ps_proc->ps_top_row_mb_syntax_ele_base;
+
+ ps_proc->pu1_top_mb_intra_modes =
+ ps_proc->pu1_top_mb_intra_modes_base;
+
+ ps_proc->ps_top_row_pu = ps_proc->ps_top_row_pu_base;
+
+ /* initialize quant params */
+ ps_proc->u4_frame_qp = ps_codec->u4_frame_qp;
+ ps_proc->u4_mb_qp = ps_codec->u4_frame_qp;
+ ih264e_init_quant_params(ps_proc, ps_proc->u4_frame_qp);
+
+ /* previous mb qp*/
+ ps_proc->u4_mb_qp_prev = ps_proc->u4_frame_qp;
+
+ /* Reset frame info */
+ memset(&ps_proc->s_frame_info, 0, sizeof(frame_info_t));
+
+ /* initialize proc, deblk and ME map */
+ if (i == j)
+ {
+ /* row '-1' */
+ memset(ps_proc->pu1_proc_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs);
+ /* row 0 to ht in mbs */
+ memset(ps_proc->pu1_proc_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
+
+ /* row '-1' */
+ memset(ps_proc->pu1_deblk_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs);
+ /* row 0 to ht in mbs */
+ memset(ps_proc->pu1_deblk_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
+
+ /* row '-1' */
+ memset(ps_proc->pu1_me_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs);
+ /* row 0 to ht in mbs */
+ memset(ps_proc->pu1_me_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
+
+ /* at the start of air refresh period, reset intra coded map */
+ if (IVE_AIR_MODE_NONE != ps_codec->s_cfg.e_air_mode)
+ {
+ ps_codec->i4_air_pic_cnt = (ps_codec->i4_air_pic_cnt + 1)
+ % ps_codec->s_cfg.u4_air_refresh_period;
+
+ if (!ps_codec->i4_air_pic_cnt)
+ {
+ memset(ps_proc->pu1_is_intra_coded, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
+ }
+ }
+ }
+
+ /* deblock level */
+ ps_proc->u4_disable_deblock_level = ps_codec->i4_disable_deblk_pic;
+
+ /* slice index map */
+ /* no slice */
+ if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_NONE)
+ {
+ memset(ps_proc->pu1_slice_idx, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
+ }
+ /* generate slices for every 'n' rows, 'n' is given through slice param */
+ else if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
+ {
+ /* slice idx map */
+ UWORD8 *pu1_slice_idx = ps_proc->pu1_slice_idx;
+
+ /* temp var */
+ WORD32 i4_mb_y = 0, slice_idx = 0, cnt;
+
+ while (i4_mb_y < ps_proc->i4_ht_mbs)
+ {
+ if (i4_mb_y +(WORD32)ps_codec->s_cfg.u4_slice_param < ps_proc->i4_ht_mbs)
+ {
+ cnt = ps_codec->s_cfg.u4_slice_param * ps_proc->i4_wd_mbs;
+ i4_mb_y += ps_codec->s_cfg.u4_slice_param;
+ }
+ else
+ {
+ cnt = (ps_proc->i4_ht_mbs - i4_mb_y) * ps_proc->i4_wd_mbs;
+ i4_mb_y += (ps_proc->i4_ht_mbs - i4_mb_y);
+ }
+ memset(pu1_slice_idx, slice_idx, cnt);
+ slice_idx++;
+ pu1_slice_idx += cnt;
+ }
+ }
+
+ /* Current MV Bank's buffer ID */
+ ps_proc->i4_cur_mv_bank_buf_id = cur_mv_bank_buf_id;
+
+ /* Pointer to current picture buffer structure */
+ ps_proc->ps_cur_pic = ps_cur_pic;
+
+ /* Pointer to current pictures mv buffers */
+ ps_proc->ps_cur_mv_buf = ps_mv_buf;
+
+ /* pointer to ref picture */
+ ps_proc->ps_ref_pic = ps_ref_pic;
+
+ if ((*pic_type != PIC_IDR) && (*pic_type != PIC_I))
+ {
+ /* ref pointer luma */
+ ps_proc->pu1_ref_buf_luma_base = ps_ref_pic->pu1_luma;
+
+ /* ref pointer chroma */
+ ps_proc->pu1_ref_buf_chroma_base = ps_ref_pic->pu1_chroma;
+ }
+
+ /* Structure for current input buffer */
+ ps_proc->s_inp_buf = *ps_inp_buf;
+
+ /* Number of encode frame API calls made */
+ ps_proc->i4_encode_api_call_cnt = ps_codec->i4_encode_api_call_cnt;
+
+ /* Current Picture count */
+ ps_proc->i4_pic_cnt = ps_codec->i4_pic_cnt;
+
+ /* error status */
+ ps_proc->i4_error_code = 0;
+
+ /********************************************************************/
+ /* INITIALIZE ENTROPY CONTEXT */
+ /********************************************************************/
+ {
+ entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
+
+ /* start of frame */
+ ps_entropy->i4_sof = 0;
+
+ /* end of frame */
+ ps_entropy->i4_eof = 0;
+
+ /* generate header */
+ ps_entropy->i4_gen_header = ps_codec->i4_gen_header;
+
+ /* sps ref_set_id */
+ ps_entropy->u4_sps_id = ps_codec->i4_sps_id;
+
+ /* sps base */
+ ps_entropy->ps_sps_base = ps_codec->ps_sps_base;
+
+ /* sps id */
+ ps_entropy->u4_pps_id = ps_codec->i4_pps_id;
+
+ /* sps base */
+ ps_entropy->ps_pps_base = ps_codec->ps_pps_base;
+
+ /* slice map */
+ ps_entropy->pu1_slice_idx = ps_proc->pu1_slice_idx;
+
+ /* slice hdr base */
+ ps_entropy->ps_slice_hdr_base = ps_proc->ps_slice_hdr_base;
+
+ /* initialize entropy map */
+ if (i == j)
+ {
+ /* row '-1' */
+ memset(ps_entropy->pu1_entropy_map - ps_proc->i4_wd_mbs, 1, ps_proc->i4_wd_mbs);
+ /* row 0 to ht in mbs */
+ memset(ps_entropy->pu1_entropy_map, 0, ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs);
+ }
+
+ /* wd in mbs */
+ ps_entropy->i4_wd_mbs = ps_proc->i4_wd_mbs;
+
+ /* ht in mbs */
+ ps_entropy->i4_ht_mbs = ps_proc->i4_ht_mbs;
+
+ /* transform_8x8_mode_flag */
+ ps_entropy->i1_transform_8x8_mode_flag = 0;
+
+ /* entropy_coding_mode_flag */
+ ps_entropy->u1_entropy_coding_mode_flag =
+ ps_codec->s_cfg.u4_entropy_coding_mode;
+
+ /* error code */
+ ps_entropy->i4_error_code = IH264E_SUCCESS;
+
+ /* mb skip run */
+ *(ps_proc->s_entropy.pi4_mb_skip_run) = 0;
+
+ /* last frame to encode */
+ ps_proc->s_entropy.u4_is_last = ps_inp_buf->u4_is_last;
+
+ /* Current Picture count */
+ ps_proc->s_entropy.i4_pic_cnt = ps_codec->i4_pic_cnt;
+
+ /* time stamps */
+ ps_entropy->u4_timestamp_low = u4_timestamp_low;
+ ps_entropy->u4_timestamp_high = u4_timestamp_high;
+
+ /* init frame statistics */
+ ps_entropy->u4_header_bits[MB_TYPE_INTRA] = 0;
+ ps_entropy->u4_header_bits[MB_TYPE_INTER] = 0;
+ ps_entropy->u4_residue_bits[MB_TYPE_INTRA] = 0;
+ ps_entropy->u4_residue_bits[MB_TYPE_INTER] = 0;
+ }
+
+ /********************************************************************/
+ /* INITIALIZE DEBLOCK CONTEXT */
+ /********************************************************************/
+ {
+ /* deblk ctxt */
+ deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
+
+ /* slice idx map */
+ ps_deblk->pu1_slice_idx = ps_proc->pu1_slice_idx;
+ }
+
+ /********************************************************************/
+ /* INITIALIZE ME CONTEXT */
+ /********************************************************************/
+ {
+ /* me ctxt */
+ me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
+
+ /* srch range x */
+ ps_me_ctxt->ai2_srch_boundaries[0] =
+ ps_codec->s_cfg.u4_srch_rng_x;
+
+ /* srch range y */
+ ps_me_ctxt->ai2_srch_boundaries[1] =
+ ps_codec->s_cfg.u4_srch_rng_y;
+
+ /* src stride */
+ ps_me_ctxt->i4_src_strd = ps_codec->i4_src_strd;
+
+ /* rec stride */
+ ps_me_ctxt->i4_rec_strd = ps_codec->i4_rec_strd;
+
+ /* Half x plane offset from pic buf */
+ ps_me_ctxt->u4_half_x_offset = ps_proc->u4_half_x_offset;
+
+ /* Half y plane offset from half x plane */
+ ps_me_ctxt->u4_half_y_offset = ps_proc->u4_half_y_offset;
+
+ /* Half x plane offset from half y plane */
+ ps_me_ctxt->u4_half_xy_offset = ps_proc->u4_half_xy_offset;
+
+ /* enable fast sad */
+ ps_me_ctxt->u4_enable_fast_sad = u4_enable_fast_sad;
+
+ /* half pel */
+ ps_me_ctxt->u4_enable_hpel = ps_codec->s_cfg.u4_enable_hpel;
+
+ /* Diamond search Iteration Max Cnt */
+ ps_me_ctxt->u4_num_layers = u4_num_layers;
+
+ /* me speed preset */
+ ps_me_ctxt->u4_me_speed_preset =
+ ps_codec->s_cfg.u4_me_speed_preset;
+
+ /* qp */
+ ps_me_ctxt->u1_mb_qp = ps_codec->u4_frame_qp;
+
+ if ((i == 0) && (0 == ps_codec->i4_pic_cnt))
+ {
+ /* init mv bits tables */
+ ih264e_init_mv_bits(ps_me_ctxt);
+ }
+ }
+
+ ps_proc->ps_ngbr_avbl = &(ps_proc->s_ngbr_avbl);
+
+ }
+
+ /* reset encoder header */
+ ps_codec->i4_gen_header = 0;
+ }
+
+ /********************************************************************/
+ /* ADD JOBS TO THE QUEUE */
+ /********************************************************************/
+ {
+ /* job structures */
+ job_t s_job;
+
+ /* temp var */
+ WORD32 i;
+
+ /* job class */
+ s_job.i4_cmd = CMD_PROCESS;
+
+ /* number of mbs to be processed in the current job */
+ s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
+
+ /* job start index x */
+ s_job.i2_mb_x = 0;
+
+ /* proc base idx */
+ s_job.i2_proc_base_idx = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
+
+ for (i = 0; i < (WORD32)ps_codec->s_cfg.i4_ht_mbs; i++)
+ {
+ /* job start index y */
+ s_job.i2_mb_y = i;
+
+ /* queue the job */
+ ret = ih264_list_queue(ps_codec->pv_proc_jobq, &s_job, 1);
+ if (ret != IH264_SUCCESS)
+ {
+ ps_codec->i4_error_code = ret;
+ return IH264E_FAIL;
+ }
+ }
+
+ /* Once all the jobs are queued, terminate the queue */
+ /* Since the threads are created and deleted in each call, terminating
+ here is not an issue */
+ ih264_list_terminate(ps_codec->pv_proc_jobq);
+ }
+
+ return error_status;
+}
diff --git a/encoder/ih264e_utils.h b/encoder/ih264e_utils.h
new file mode 100755
index 0000000..651dad9
--- /dev/null
+++ b/encoder/ih264e_utils.h
@@ -0,0 +1,327 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_utils.h
+*
+* @brief
+* Contains declarations of miscellaneous utility functions used by the encoder
+*
+* @author
+* Harish
+*
+* @par List of Functions:
+* -ih264e_get_min_level()
+* -ih264e_get_lvl_idx()
+* -ih264e_get_dpb_size()
+* -ih264e_get_total_pic_buf_size()
+* -ih264e_get_pic_mv_bank_size()
+* -ih264e_pic_buf_mgr_add_bufs()
+* -ih264e_mv_buf_mgr_add_bufs()
+* -ih264e_init_quant_params()
+* -ih264e_init_air_map()
+* -ih264e_codec_init()
+* -ih264e_pic_init()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_UTILS_H_
+#define IH264E_UTILS_H_
+
+/**
+*******************************************************************************
+*
+* @brief
+* Used to get minimum level index for a given picture size
+*
+* @par Description:
+* Gets the minimum level index and then gets corresponding level.
+* Also used to ignore invalid levels like 2.3, 3.3 etc
+*
+* @param[in] level
+* Level of the stream
+*
+* @returns Level index for a given level
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_min_level(WORD32 pic_size);
+
+/**
+*******************************************************************************
+*
+* @brief
+* Used to get level index for a given level
+*
+* @par Description:
+* Converts from level_idc (which is multiplied by 30) to an index that can be
+* used as a lookup. Also used to ignore invalid levels like 2.2 , 3.2 etc
+*
+* @param[in] level
+* Level of the stream
+*
+* @returns Level index for a given level
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_lvl_idx(WORD32 level);
+
+/**
+*******************************************************************************
+*
+* @brief returns maximum number of pictures allowed in dpb for a given level
+*
+* @par Description:
+* For given width, height and level, number of pictures allowed in decoder
+* picture buffer is computed as per Annex A.3.1
+*
+* @param[in] level
+* level of the bit-stream
+*
+* @param[in] pic_size
+* width * height
+*
+* @returns Number of buffers in DPB
+*
+* @remarks
+* From annexure A.3.1 of H264 specification,
+* max_dec_frame_buffering <= MaxDpbSize, where MaxDpbSize is equal to
+* Min( 1024 * MaxDPB / ( PicWidthInMbs * FrameHeightInMbs * 384 ), 16 ) and
+* MaxDPB is given in Table A-1 in units of 1024 bytes. However the MaxDPB size
+* presented in the look up table gas_ih264_lvl_tbl is in units of 512
+* bytes. Hence the expression is modified accordingly.
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_dpb_size(WORD32 level, WORD32 pic_size);
+
+/**
+*******************************************************************************
+*
+* @brief
+* Used to get reference picture buffer size for a given level and
+* and padding used
+*
+* @par Description:
+* Used to get reference picture buffer size for a given level and padding used
+* Each picture is padded on all four sides
+*
+* @param[in] pic_size
+* Number of luma samples (Width * Height)
+*
+* @param[in] level
+* Level
+*
+* @param[in] horz_pad
+* Total padding used in horizontal direction
+*
+* @param[in] vert_pad
+* Total padding used in vertical direction
+*
+* @returns Total picture buffer size
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_total_pic_buf_size(WORD32 pic_size, WORD32 level,
+ WORD32 horz_pad, WORD32 vert_pad,
+ WORD32 num_ref_frames,
+ WORD32 num_reorder_frames);
+
+/**
+*******************************************************************************
+*
+* @brief Returns MV bank buffer size for a given number of luma samples
+*
+* @par Description:
+* For given number of luma samples one MV bank size is computed.
+* Each MV bank includes pu_map and enc_pu_t for all the min PUs(4x4) in a picture
+*
+* @param[in] num_luma_samples
+* Max number of luma pixels in the frame
+*
+* @returns Total MV Bank size
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+WORD32 ih264e_get_pic_mv_bank_size(WORD32 num_luma_samples);
+
+/**
+*******************************************************************************
+*
+* @brief
+* Function to initialize ps_pic_buf structs add pic buffers to
+* buffer manager in case of non-shared mode
+*
+* @par Description:
+* Function to initialize ps_pic_buf structs add pic buffers to
+* buffer manager in case of non-shared mode
+* To be called once per stream or for every reset
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @returns error status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pic_buf_mgr_add_bufs(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Function to add buffers to MV Bank buffer manager
+*
+* @par Description:
+* Function to add buffers to MV Bank buffer manager. To be called once per
+* stream or for every reset
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @returns error status
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_mv_buf_mgr_add_bufs(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Function to initialize quant params structure
+*
+* @par Description:
+* The forward quantization modules depends on qp/6, qp mod 6, forward scale
+* matrix, forward threshold matrix, weight list. The inverse quantization
+* modules depends on qp/6, qp mod 6, inverse scale matrix, weight list.
+* These params are initialized in this function.
+*
+* @param[in] ps_proc
+* pointer to process context
+*
+* @param[in] qp
+* quantization parameter
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ih264e_init_quant_params(process_ctxt_t *ps_proc, int qp);
+
+/**
+*******************************************************************************
+*
+* @brief
+* Initialize AIR mb frame Map
+*
+* @par Description:
+* Initialize AIR mb frame map
+* MB frame map indicates which frame an Mb should be coded as intra according to AIR
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @returns error_status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_init_air_map(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief
+* Codec level initializations
+*
+* @par Description:
+* Initializes the codec with parameters that needs to be set before encoding
+* first frame
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @param[in] ps_inp_buf
+* Pointer to input buffer context
+*
+* @returns error_status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_codec_init(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief
+* Picture level initializations
+*
+* @par Description:
+* Before beginning to encode the frame, the current function initializes all
+* the ctxts (proc, entropy, me, ...) basing on the input configured params.
+* It locates space for storing recon in the encoder picture buffer set, fetches
+* reference frame from encoder picture buffer set. Calls RC pre-enc to get
+* qp and pic type for the current frame. Queues proc jobs so that
+* the other threads can begin encoding. In brief, this function sets up the
+* tone for the entire encoder.
+*
+* @param[in] ps_codec
+* Pointer to codec context
+*
+* @param[in] ps_inp_buf
+* Pointer to input buffer context
+*
+* @returns error_status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf);
+
+#endif /* IH264E_UTILS_H_ */
diff --git a/encoder/ih264e_version.c b/encoder/ih264e_version.c
new file mode 100755
index 0000000..3dcba8d
--- /dev/null
+++ b/encoder/ih264e_version.c
@@ -0,0 +1,143 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_version.c
+*
+* @brief
+* Contains version info for H264 encoder
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ih264e_get_version()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* system include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* user include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "ih264_defs.h"
+#include "ih264_debug.h"
+#include "ih264_structs.h"
+#include "ih264e_version.h"
+
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/**
+ * Name of the codec and target platform (All Cortex A processors in this case)
+ */
+#define CODEC_NAME "H264ENC"
+/**
+ * Codec release type, production or evaluation
+ */
+#define CODEC_RELEASE_TYPE "production"
+/**
+ * Version string. First two digits signify major version and last two minor
+ */
+#define CODEC_RELEASE_VER "01.00"
+/**
+ * Vendor name
+ */
+#define CODEC_VENDOR "ITTIAM"
+
+#define MAX_STRLEN 511
+/**
+*******************************************************************************
+* Concatenates various strings to form a version string
+*******************************************************************************
+*/
+#define VERSION(version_string, codec_name, codec_release_type, codec_release_ver, codec_vendor) \
+ strncpy(version_string,"@(#)Id:", MAX_STRLEN); \
+ strncat(version_string,codec_name, MAX_STRLEN); \
+ strncat(version_string,"_", MAX_STRLEN); \
+ strncat(version_string,codec_release_type, MAX_STRLEN); \
+ strncat(version_string," Ver:", MAX_STRLEN); \
+ strncat(version_string,codec_release_ver, MAX_STRLEN); \
+ strncat(version_string," Released by ", MAX_STRLEN); \
+ strncat(version_string,codec_vendor, MAX_STRLEN); \
+ strncat(version_string," Build: ", MAX_STRLEN); \
+ strncat(version_string,__DATE__, MAX_STRLEN); \
+ strncat(version_string," @ ", MAX_STRLEN); \
+ strncat(version_string,__TIME__, MAX_STRLEN);
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* Fills the version info in the given char pointer
+*
+* @par Description:
+* Fills the version info in the given char pointer
+*
+* @param[in] pc_version
+* Pointer to hold version info
+*
+* @param[in] u4_version_bufsize
+* Size of the buffer passed
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_STATUS_T ih264e_get_version(CHAR *pc_version, UWORD32 u4_version_bufsize)
+{
+ CHAR ac_version_tmp[MAX_STRLEN];
+
+ VERSION(ac_version_tmp, CODEC_NAME, CODEC_RELEASE_TYPE, CODEC_RELEASE_VER,
+ CODEC_VENDOR);
+
+ if (u4_version_bufsize >= (strnlen(ac_version_tmp, MAX_STRLEN) + 1))
+ {
+ memcpy(pc_version, ac_version_tmp, (strnlen(ac_version_tmp, MAX_STRLEN) + 1));
+ return IV_SUCCESS;
+ }
+ else
+ {
+ return IV_FAIL;
+ }
+}
diff --git a/encoder/ih264e_version.h b/encoder/ih264e_version.h
new file mode 100755
index 0000000..303a1e2
--- /dev/null
+++ b/encoder/ih264e_version.h
@@ -0,0 +1,64 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_version.h
+*
+* @brief
+* Contains declarations of miscellaneous utility functions used by the encoder
+*
+* @author
+* ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IH264E_VERSION_H_
+#define IH264E_VERSION_H_
+
+/**
+*******************************************************************************
+*
+* @brief
+* Fills the version info in the given char pointer
+*
+* @par Description:
+* Fills the version info in the given char pointer
+*
+* @param[in] pc_version
+* Pointer to hold version info
+*
+* @param[in] u4_version_bufsize
+* Size of the buffer passed
+*
+* @returns error status
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_STATUS_T ih264e_get_version(CHAR *pc_version, UWORD32 u4_version_bufsize);
+
+#endif /* IH264E_VERSION_H_ */
diff --git a/encoder/ime.c b/encoder/ime.c
new file mode 100755
index 0000000..c89aaab
--- /dev/null
+++ b/encoder/ime.c
@@ -0,0 +1,836 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_me.c
+ *
+ * @brief
+ *
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ * -
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+
+/* User include files */
+#include "ime_typedefs.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ime_defs.h"
+#include "ime_macros.h"
+#include "ime.h"
+#include "ime_statistics.h"
+
+/**
+*******************************************************************************
+*
+* @brief Diamond Search
+*
+* @par Description:
+* This function computes the sad at vertices of several layers of diamond grid
+* at a time. The number of layers of diamond grid that would be evaluated is
+* configurable.The function computes the sad at vertices of a diamond grid. If
+* the sad at the center of the diamond grid is lesser than the sad at any other
+* point of the diamond grid, the function marks the candidate Mb partition as
+* mv.
+*
+* @param[in] ps_mb_part
+* pointer to current mb partition ctxt with respect to ME
+*
+* @param[in] ps_me_ctxt
+* pointer to me context
+*
+* @param[in] u4_lambda_motion
+* lambda motion
+*
+* @param[in] u4_enable_fast_sad
+* enable/disable fast sad computation
+*
+* @returns mv pair & corresponding distortion and cost
+*
+* @remarks Diamond Srch, radius is 1
+*
+*******************************************************************************
+*/
+void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt)
+{
+ /* MB partition info */
+ mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+
+ /* lagrange parameter */
+ UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
+
+ /* srch range*/
+ WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
+ WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
+ WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
+ WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
+
+ /* enabled fast sad computation */
+// UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
+
+ /* pointer to src macro block */
+ UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
+ UWORD8 *pu1_ref_mb = ps_me_ctxt->pu1_ref_buf_luma;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
+ WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
+
+ /* least cost */
+ WORD32 i4_cost_least = ps_mb_part->i4_mb_cost;
+
+ /* least sad */
+ WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
+
+ /* mv pair */
+ WORD16 i2_mvx, i2_mvy;
+
+ /* mv bits */
+ UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
+
+ /* temp var */
+ WORD32 i4_cost[4];
+ WORD32 i4_sad[4];
+ UWORD8 *pu1_ref;
+ WORD16 i2_mv_u_x, i2_mv_u_y;
+
+ /* Diamond search Iteration Max Cnt */
+ UWORD32 u4_num_layers = ps_me_ctxt->u4_num_layers;
+
+ /* temp var */
+// UWORD8 u1_prev_jump = NONE;
+// UWORD8 u1_curr_jump = NONE;
+// UWORD8 u1_next_jump;
+// WORD32 mask_arr[5] = {15, 13, 14, 7, 11};
+// WORD32 mask;
+// UWORD8 *apu1_ref[4];
+// WORD32 i, cnt;
+// WORD32 dia[4][2] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}};
+
+ /* mv with best sad during initial evaluation */
+ i2_mvx = ps_mb_part->s_mv_curr.i2_mvx;
+ i2_mvy = ps_mb_part->s_mv_curr.i2_mvy;
+
+ i2_mv_u_x = i2_mvx;
+ i2_mv_u_y = i2_mvy;
+
+ while (u4_num_layers--)
+ {
+ /* FIXME : is this the write way to check for out of bounds ? */
+ if ( (i2_mvx - 1 < i4_srch_range_w) ||
+ (i2_mvx + 1 > i4_srch_range_e) ||
+ (i2_mvy - 1 < i4_srch_range_n) ||
+ (i2_mvy + 1 > i4_srch_range_s) )
+ {
+ break;
+ }
+
+ pu1_ref = pu1_ref_mb + i2_mvx + (i2_mvy * i4_ref_strd);
+
+ ps_me_ctxt->pf_ime_compute_sad4_diamond(pu1_ref,
+ pu1_curr_mb,
+ i4_ref_strd,
+ i4_src_strd,
+ i4_sad);
+
+ DEBUG_SAD_HISTOGRAM_ADD(i4_sad[0], 2);
+ DEBUG_SAD_HISTOGRAM_ADD(i4_sad[1], 2);
+ DEBUG_SAD_HISTOGRAM_ADD(i4_sad[2], 2);
+ DEBUG_SAD_HISTOGRAM_ADD(i4_sad[3], 2);
+
+ /* compute cost */
+ i4_cost[0] = i4_sad[0] + u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx]
+ + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
+ i4_cost[1] = i4_sad[1] + u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx]
+ + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
+ i4_cost[2] = i4_sad[2] + u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
+ + pu1_mv_bits[((i2_mvy - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
+ i4_cost[3] = i4_sad[3] + u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
+ + pu1_mv_bits[((i2_mvy + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
+
+
+ if (i4_cost_least > i4_cost[0])
+ {
+ i4_cost_least = i4_cost[0];
+ i4_distortion_least = i4_sad[0];
+
+ i2_mv_u_x = (i2_mvx - 1);
+ i2_mv_u_y = i2_mvy;
+ }
+
+ if (i4_cost_least > i4_cost[1])
+ {
+ i4_cost_least = i4_cost[1];
+ i4_distortion_least = i4_sad[1];
+
+ i2_mv_u_x = (i2_mvx + 1);
+ i2_mv_u_y = i2_mvy;
+ }
+
+ if (i4_cost_least > i4_cost[2])
+ {
+ i4_cost_least = i4_cost[2];
+ i4_distortion_least = i4_sad[2];
+
+ i2_mv_u_x = i2_mvx;
+ i2_mv_u_y = i2_mvy - 1;
+ }
+
+ if (i4_cost_least > i4_cost[3])
+ {
+ i4_cost_least = i4_cost[3];
+ i4_distortion_least = i4_sad[3];
+
+ i2_mv_u_x = i2_mvx;
+ i2_mv_u_y = i2_mvy + 1;
+ }
+
+ if( (i2_mv_u_x == i2_mvx) && (i2_mv_u_y == i2_mvy))
+ {
+ ps_mb_part->u4_exit = 1;
+ break;
+ }
+ else
+ {
+ i2_mvx = i2_mv_u_x;
+ i2_mvy = i2_mv_u_y;
+ }
+
+
+ }
+
+ if (i4_cost_least < ps_mb_part->i4_mb_cost)
+ {
+ ps_mb_part->i4_mb_cost = i4_cost_least;
+ ps_mb_part->i4_mb_distortion = i4_distortion_least;
+ ps_mb_part->s_mv_curr.i2_mvx = i2_mvx;
+ ps_mb_part->s_mv_curr.i2_mvy = i2_mvy;
+ }
+
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief This function computes the best motion vector among the tentative mv
+* candidates chosen.
+*
+* @par Description:
+* This function determines the position in the search window at which the motion
+* estimation should begin in order to minimise the number of search iterations.
+*
+* @param[in] ps_mb_part
+* pointer to current mb partition ctxt with respect to ME
+*
+* @param[in] u4_lambda_motion
+* lambda motion
+*
+* @param[in] u4_fast_flag
+* enable/disable fast sad computation
+*
+* @returns mv pair & corresponding distortion and cost
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ime_evaluate_init_srchposn_16x16
+ (
+ me_ctxt_t *ps_me_ctxt
+ )
+{
+ UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
+
+ /* candidate mv cnt */
+ UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates;
+
+ /* list of candidate mvs */
+ ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search;
+
+ /* pointer to src macro block */
+ UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
+ UWORD8 *pu1_ref_mb = ps_me_ctxt->pu1_ref_buf_luma;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
+ WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
+
+ /* enabled fast sad computation */
+ UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
+
+ /* SAD(distortion metric) of an 8x8 block */
+ WORD32 i4_mb_distortion;
+
+ /* cost = distortion + u4_lambda_motion * rate */
+ WORD32 i4_mb_cost, i4_mb_cost_least = INT_MAX, i4_distortion_least = INT_MAX;
+
+ /* mb partitions info */
+ mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+
+ /* mv bits */
+ UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
+
+ /* temp var */
+ UWORD32 i, j, u4_srch_pos_idx = 0;
+ UWORD8 *pu1_ref = NULL;
+ WORD16 mv_x, mv_y;
+
+ if (0)
+ {
+ /************************************************************/
+ /* Compute SKIP Cost */
+ /************************************************************/
+ mv_x = ps_mv_list[SKIP_CAND].i2_mvx;
+ mv_y = ps_mv_list[SKIP_CAND].i2_mvy;
+
+ /* adjust ref pointer */
+ pu1_ref = pu1_ref_mb + mv_x + (mv_y * i4_ref_strd);
+
+ /* compute distortion */
+ ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion);
+
+ /* for skip mode cost & distortion are identical
+ * But we shall add a bias to favor skip mode.
+ * Doc. JVT B118 Suggests SKIP_BIAS as 16.
+ * TODO : Empirical analysis of SKIP_BIAS is necessary */
+
+ i4_distortion_least = i4_mb_distortion;
+
+ u4_srch_pos_idx = 0;
+
+#define SKIP_BIAS 8
+
+ i4_mb_cost_least = i4_mb_distortion - (u4_lambda_motion * SKIP_BIAS);
+
+#undef SKIP_BIAS
+ }
+
+
+ /* Carry out a search using each of the motion vector pairs identified above as predictors. */
+ /* TODO : Just like Skip, Do we need to add any bias to zero mv as well */
+ for(i = 0; i < u4_num_candidates; i++)
+ {
+ /* compute sad */
+ WORD32 c_sad = 1;
+
+ for(j = 0; j < i; j++ )
+ {
+ if ( (ps_mv_list[i].i2_mvx == ps_mv_list[j].i2_mvx) &&
+ (ps_mv_list[i].i2_mvy == ps_mv_list[j].i2_mvy) )
+ {
+ c_sad = 0;
+ break;
+ }
+ }
+ if(c_sad)
+ {
+ /* adjust ref pointer */
+ pu1_ref = pu1_ref_mb + ps_mv_list[i].i2_mvx + (ps_mv_list[i].i2_mvy * i4_ref_strd);
+
+ /* compute distortion */
+ ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion);
+ DEBUG_SAD_HISTOGRAM_ADD(i4_mb_distortion, 3);
+ /* compute cost */
+ i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ (ps_mv_list[i].i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
+ + pu1_mv_bits[(ps_mv_list[i].i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
+
+ if (i4_mb_cost < i4_mb_cost_least)
+ {
+ i4_mb_cost_least = i4_mb_cost;
+
+ i4_distortion_least = i4_mb_distortion;
+
+ u4_srch_pos_idx = i;
+ }
+ }
+ }
+
+ if (i4_mb_cost_least < ps_mb_part->i4_mb_cost)
+ {
+ ps_mb_part->u4_srch_pos_idx = u4_srch_pos_idx;
+ ps_mb_part->i4_mb_cost = i4_mb_cost_least;
+ ps_mb_part->i4_mb_distortion = i4_distortion_least;
+ ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[u4_srch_pos_idx].i2_mvx;
+ ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[u4_srch_pos_idx].i2_mvy;
+ }
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief Searches for the best matching full pixel predictor within the search
+* range
+*
+* @par Description:
+* This function begins by computing the mv predict vector for the current mb.
+* This is used for cost computations. Further basing on the algo. chosen, it
+* looks through a set of candidate vectors that best represent the mb a least
+* cost and returns this information.
+*
+* @param[in] ps_proc
+* pointer to current proc ctxt
+*
+* @param[in] ps_me_ctxt
+* pointer to me context
+*
+* @returns mv pair & corresponding distortion and cost
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ime_full_pel_motion_estimation_16x16
+ (
+ me_ctxt_t *ps_me_ctxt
+ )
+{
+ /* mb part info */
+ mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+
+ /******************************************************************/
+ /* Modify Search range about initial candidate instead of zero mv */
+ /******************************************************************/
+ /*
+ * FIXME: The motion vectors in a way can become unbounded. It may so happen that
+ * MV might exceed the limit of the profile configured.
+ */
+ ps_me_ctxt->i4_srch_range_w = MAX(ps_me_ctxt->i4_srch_range_w,
+ -ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
+ ps_me_ctxt->i4_srch_range_e = MIN(ps_me_ctxt->i4_srch_range_e,
+ ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
+ ps_me_ctxt->i4_srch_range_n = MAX(ps_me_ctxt->i4_srch_range_n,
+ -ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
+ ps_me_ctxt->i4_srch_range_s = MIN(ps_me_ctxt->i4_srch_range_s,
+ ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
+
+ /************************************************************/
+ /* Traverse about best initial candidate for mv */
+ /************************************************************/
+
+ switch (ps_me_ctxt->u4_me_speed_preset)
+ {
+ case DMND_SRCH:
+ ime_diamond_search_16x16(ps_me_ctxt);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx << 2;
+ ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy = ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy << 2;
+
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief Searches for the best matching sub pixel predictor within the search
+* range
+*
+* @par Description:
+* This function begins by searching across all sub pixel sample points
+* around the full pel motion vector. The vector with least cost is chosen as
+* the mv for the current mb. If the skip mode is not evaluated while analysing
+* the initial search candidates then analyse it here and update the mv.
+*
+* @param[in] ps_proc
+* pointer to current proc ctxt
+*
+* @param[in] ps_me_ctxt
+* pointer to me context
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ime_sub_pel_motion_estimation_16x16
+ (
+ me_ctxt_t *ps_me_ctxt
+ )
+{
+ /* pointers to src & ref macro block */
+ UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
+
+
+ /* pointers to ref. half pel planes */
+ UWORD8 *pu1_ref_mb_half_x;
+ UWORD8 *pu1_ref_mb_half_y;
+ UWORD8 *pu1_ref_mb_half_xy;
+
+ /* pointers to ref. half pel planes */
+ UWORD8 *pu1_ref_mb_half_x_temp;
+ UWORD8 *pu1_ref_mb_half_y_temp;
+ UWORD8 *pu1_ref_mb_half_xy_temp;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
+
+ WORD32 i4_ref_strd = ps_me_ctxt->u4_hp_buf_strd;
+
+ /* mb partitions info */
+ mb_part_ctxt *ps_mb_part = &ps_me_ctxt->s_mb_part;
+
+ /* SAD(distortion metric) of an mb */
+ WORD32 i4_mb_distortion;
+ WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
+
+ /* cost = distortion + u4_lambda_motion * rate */
+ WORD32 i4_mb_cost;
+ WORD32 i4_mb_cost_least = ps_mb_part->i4_mb_cost;
+
+ /*Best half pel buffer*/
+ UWORD8 *pu1_best_hpel_buf = NULL;
+
+
+ /* mv bits */
+ UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
+
+ /* Motion vectors in full-pel units */
+ WORD16 mv_x, mv_y;
+
+ /* lambda - lagrange constant */
+ UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
+
+ /* Flags to check if half pel points needs to be evaluated */
+ /**************************************/
+ /* 1 bit for each half pel candidate */
+ /* bit 0 - half x = 1, half y = 0 */
+ /* bit 1 - half x = -1, half y = 0 */
+ /* bit 2 - half x = 0, half y = 1 */
+ /* bit 3 - half x = 0, half y = -1 */
+ /* bit 4 - half x = 1, half y = 1 */
+ /* bit 5 - half x = -1, half y = 1 */
+ /* bit 6 - half x = 1, half y = -1 */
+ /* bit 7 - half x = -1, half y = -1 */
+ /**************************************/
+ /* temp var */
+ WORD16 i2_mv_u_x, i2_mv_u_y;
+ WORD32 i, j;
+ WORD32 ai4_sad[8];
+
+ i2_mv_u_x = ps_mb_part->s_mv_curr.i2_mvx;
+ i2_mv_u_y = ps_mb_part->s_mv_curr.i2_mvy;
+
+ /************************************************************/
+ /* Evaluate half pel */
+ /************************************************************/
+ mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2;
+ mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2;
+
+
+ /**************************************************************/
+ /* ps_me_ctxt->pu1_half_x points to the half pel pixel on the */
+ /* left side of full pel */
+ /* ps_me_ctxt->pu1_half_y points to the half pel pixel on the */
+ /* top side of full pel */
+ /* ps_me_ctxt->pu1_half_xy points to the half pel pixel */
+ /* on the top left side of full pel */
+ /* for the function pf_ime_sub_pel_compute_sad_16x16 the */
+ /* default postions are */
+ /* ps_me_ctxt->pu1_half_x = right halp_pel */
+ /* ps_me_ctxt->pu1_half_y = bottom halp_pel */
+ /* ps_me_ctxt->pu1_half_xy = bottom right halp_pel */
+ /* Hence corresponding adjustments made here */
+ /**************************************************************/
+
+ pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->pu1_half_x + 1;
+ pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->pu1_half_y + 1 + i4_ref_strd;
+ pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy = ps_me_ctxt->pu1_half_xy + 1 + i4_ref_strd;
+
+
+ ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16(pu1_curr_mb, pu1_ref_mb_half_x,
+ pu1_ref_mb_half_y,
+ pu1_ref_mb_half_xy,
+ i4_src_strd, i4_ref_strd,
+ ai4_sad);
+
+ /* Half x plane */
+ for(i = 0; i < 2; i++)
+ {
+ WORD32 mv_x_tmp = (mv_x << 2) + 2;
+ WORD32 mv_y_tmp = (mv_y << 2);
+
+ mv_x_tmp -= (i * 4);
+
+ i4_mb_distortion = ai4_sad[i];
+
+ /* compute cost */
+ i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
+ + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
+
+ if (i4_mb_cost < i4_mb_cost_least)
+ {
+ i4_mb_cost_least = i4_mb_cost;
+
+ i4_distortion_least = i4_mb_distortion;
+
+ i2_mv_u_x = mv_x_tmp;
+
+ i2_mv_u_y = mv_y_tmp;
+
+#ifndef HP_PL /*choosing whether left or right half_x*/
+ ps_me_ctxt->pu1_half_x = pu1_ref_mb_half_x_temp - i;
+ pu1_best_hpel_buf = pu1_ref_mb_half_x_temp - i;
+#endif
+ }
+
+ }
+
+ /* Half y plane */
+ for(i = 0; i < 2; i++)
+ {
+ WORD32 mv_x_tmp = (mv_x << 2);
+ WORD32 mv_y_tmp = (mv_y << 2) + 2;
+
+ mv_y_tmp -= (i * 4);
+
+ i4_mb_distortion = ai4_sad[2 + i];
+
+ /* compute cost */
+ i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
+ + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
+
+ if (i4_mb_cost < i4_mb_cost_least)
+ {
+ i4_mb_cost_least = i4_mb_cost;
+
+ i4_distortion_least = i4_mb_distortion;
+
+ i2_mv_u_x = mv_x_tmp;
+
+ i2_mv_u_y = mv_y_tmp;
+
+#ifndef HP_PL/*choosing whether top or bottom half_y*/
+ ps_me_ctxt->pu1_half_y = pu1_ref_mb_half_y_temp - i*(i4_ref_strd);
+ pu1_best_hpel_buf = pu1_ref_mb_half_y_temp - i*(i4_ref_strd);
+#endif
+ }
+
+ }
+
+ /* Half xy plane */
+ for(j = 0; j < 2; j++)
+ {
+ for(i = 0; i < 2; i++)
+ {
+ WORD32 mv_x_tmp = (mv_x << 2) + 2;
+ WORD32 mv_y_tmp = (mv_y << 2) + 2;
+
+ mv_x_tmp -= (i * 4);
+ mv_y_tmp -= (j * 4);
+
+ i4_mb_distortion = ai4_sad[4 + i + 2 * j];
+
+ /* compute cost */
+ i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
+ + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
+
+ if (i4_mb_cost < i4_mb_cost_least)
+ {
+ i4_mb_cost_least = i4_mb_cost;
+
+ i4_distortion_least = i4_mb_distortion;
+
+ i2_mv_u_x = mv_x_tmp;
+
+ i2_mv_u_y = mv_y_tmp;
+
+#ifndef HP_PL /*choosing between four half_xy */
+ ps_me_ctxt->pu1_half_xy = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i;
+ pu1_best_hpel_buf = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i;
+#endif
+ }
+
+ }
+ }
+
+ ps_mb_part->i4_mb_cost = i4_mb_cost_least;
+ ps_mb_part->i4_mb_distortion = i4_distortion_least;
+ ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x;
+ ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y;
+ ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf;
+
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief This function computes cost of skip macroblocks
+*
+* @par Description:
+*
+* @param[in] ps_me_ctxt
+* pointer to me ctxt
+*
+* @param[in] ps_skip_mv
+* pointer to skip mv
+*
+* @returns none
+*
+* @remarks
+* NOTE: while computing the skip cost, do not enable early exit from compute
+* sad function because, a negative bias gets added later
+*
+*******************************************************************************
+*/
+void ime_compute_skip_cost
+ (
+ me_ctxt_t *ps_me_ctxt,
+ void *pv_skip_mv,
+ mb_part_ctxt *ps_smb_part_info,
+ UWORD32 u4_use_stat_sad
+ )
+{
+
+ /* pointers to src & ref macro block */
+ UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
+ UWORD8 *pu1_ref_mb = ps_me_ctxt->pu1_ref_buf_luma;
+
+ /* strides */
+ WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
+ WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
+
+ /* enabled fast sad computation */
+ UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
+
+ /* SAD(distortion metric) of an mb */
+ WORD32 i4_mb_distortion;
+
+ /* cost = distortion + u4_lambda_motion * rate */
+ WORD32 i4_mb_cost;
+
+ /* Motion vectors in full-pel units */
+ WORD16 mv_x, mv_y;
+
+ /* lambda - lagrange constant */
+ UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
+
+ /* skip mv */
+ ime_mv_t *ps_skip_mv = pv_skip_mv, s_clip_skip_mv;
+
+ /* temp var */
+ UWORD8 *pu1_ref = NULL;
+ UWORD32 u4_is_nonzero;
+
+ s_clip_skip_mv.i2_mvx = CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, ps_skip_mv->i2_mvx);
+ s_clip_skip_mv.i2_mvy = CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, ps_skip_mv->i2_mvy);
+
+ if ((s_clip_skip_mv.i2_mvx != ps_skip_mv->i2_mvx) ||
+ (s_clip_skip_mv.i2_mvy != ps_skip_mv->i2_mvy))
+ {
+ /* skip motion vector not with in bounds */
+ /* it is possible that mv is already evaluated */
+ return ;
+ }
+
+ mv_x = (ps_skip_mv->i2_mvx + 2) >> 2;
+ mv_y = (ps_skip_mv->i2_mvy + 2) >> 2;
+
+ if ((mv_x << 2) != ps_skip_mv->i2_mvx || (mv_y << 2) != ps_skip_mv->i2_mvy)
+ {
+
+
+ return ;
+
+
+ }
+ else
+ {
+ /* adjust ref pointer */
+ pu1_ref = pu1_ref_mb + mv_x + (mv_y * i4_ref_strd);
+ }
+
+ if(u4_use_stat_sad == 1)
+ {
+ ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16(pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd,
+ ps_me_ctxt->pu2_sad_thrsh, &i4_mb_distortion,&u4_is_nonzero);
+
+ /*
+ *NOTE The check here is two fold
+ * One is checking if the sad has been reached, ie min sad, which a configurable parameter
+ * If that is reached,we need not do any mode evaluation
+ * Similary if we find a distortion of zero there is no point of doing any further mode evaluation
+ * as sad is a non negative quantity
+ * hence in this case too, no further evaluation is necessary
+ */
+ /*
+ *NOTE in case we need to disable the zero check using satdq,
+ * we need only to set the u4_is_zero to a non zero value
+ */
+ if(u4_is_nonzero==0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
+ {
+ ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad*/
+ ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0)?0:i4_mb_distortion;
+ }
+ }
+ else
+ {
+ ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, INT_MAX, &i4_mb_distortion);
+
+ if(i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
+ {
+ ps_me_ctxt->i4_min_sad = i4_mb_distortion;
+ ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad*/
+ }
+ }
+
+ /* for skip mode cost & distortion are identical
+ * But we shall add a bias to favor skip mode.
+ * Doc. JVT B118 Suggests SKIP_BIAS as 16.
+ * TODO : Empirical analysis of SKIP_BIAS is necessary */
+#define SKIP_BIAS 8
+ i4_mb_cost = i4_mb_distortion - (u4_lambda_motion * SKIP_BIAS);
+#undef SKIP_BIAS
+
+ if (i4_mb_cost <= ps_smb_part_info->i4_mb_cost)
+ {
+ ps_smb_part_info->i4_mb_cost = i4_mb_cost;
+ ps_smb_part_info->i4_mb_distortion = i4_mb_distortion;
+ ps_smb_part_info->s_mv_curr.i2_mvx = ps_skip_mv->i2_mvx;
+ ps_smb_part_info->s_mv_curr.i2_mvy = ps_skip_mv->i2_mvy;
+ }
+}
+
diff --git a/encoder/ime.h b/encoder/ime.h
new file mode 100755
index 0000000..5c039e8
--- /dev/null
+++ b/encoder/ime.h
@@ -0,0 +1,209 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ime.h
+ *
+ * @brief
+ * Contains declarations of global variables for H264 encoder
+ *
+ * @author
+ * Ittiam
+ *
+ * @remarks
+ *
+ *******************************************************************************
+ */
+
+#ifndef IME_H_
+#define IME_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Number of iterations before exiting during diamond search
+******************************************************************************
+ */
+#define NUM_LAYERS 16
+
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+
+/**
+*******************************************************************************
+*
+* @brief Diamond Search
+*
+* @par Description:
+* This function computes the sad at vertices of several layers of diamond grid
+* at a time. The number of layers of diamond grid that would be evaluated is
+* configurable.The function computes the sad at vertices of a diamond grid. If
+* the sad at the center of the diamond grid is lesser than the sad at any other
+* point of the diamond grid, the function marks the candidate Mb partition as
+* mv.
+*
+* @param[in] ps_mb_part
+* pointer to current mb partition ctxt with respect to ME
+*
+* @param[in] ps_me_ctxt
+* pointer to me context
+*
+* @param[in] u4_lambda
+* lambda motion
+*
+* @param[in] u4_fast_flag
+* enable/disable fast sad computation
+*
+* @returns mv pair & corresponding distortion and cost
+*
+* @remarks This module cannot be part of the final product due to its lack of
+* computational feasibility. This is only for quality eval purposes.
+*
+*******************************************************************************
+*/
+extern void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt);
+
+
+/**
+*******************************************************************************
+*
+* @brief This function computes the best motion vector among the tentative mv
+* candidates chosen.
+*
+* @par Description:
+* This function determines the position in the search window at which the motion
+* estimation should begin in order to minimise the number of search iterations.
+*
+* @param[in] ps_mb_part
+* pointer to current mb partition ctxt with respect to ME
+*
+* @param[in] u4_lambda_motion
+* lambda motion
+*
+* @param[in] u4_fast_flag
+* enable/disable fast sad computation
+*
+* @returns mv pair & corresponding distortion and cost
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+extern void ime_evaluate_init_srchposn_16x16
+ (
+ me_ctxt_t *ps_me_ctxt
+ );
+
+/**
+*******************************************************************************
+*
+* @brief Searches for the best matching full pixel predictor within the search
+* range
+*
+* @par Description:
+* This function begins by computing the mv predict vector for the current mb.
+* This is used for cost computations. Further basing on the algo. chosen, it
+* looks through a set of candidate vectors that best represent the mb a least
+* cost and returns this information.
+*
+* @param[in] ps_proc
+* pointer to current proc ctxt
+*
+* @param[in] ps_me_ctxt
+* pointer to me context
+*
+* @returns mv pair & corresponding distortion and cost
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+extern void ime_full_pel_motion_estimation_16x16
+ (
+ me_ctxt_t *ps_me_ctxt
+ );
+
+/**
+*******************************************************************************
+*
+* @brief Searches for the best matching sub pixel predictor within the search
+* range
+*
+* @par Description:
+* This function begins by searching across all sub pixel sample points
+* around the full pel motion vector. The vector with least cost is chosen as
+* the mv for the current mb. If the skip mode is not evaluated while analysing
+* the initial search candidates then analyse it here and update the mv.
+*
+* @param[in] ps_proc
+* pointer to current proc ctxt
+*
+* @param[in] ps_me_ctxt
+* pointer to me context
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+extern void ime_sub_pel_motion_estimation_16x16
+ (
+ me_ctxt_t *ps_me_ctxt
+ );
+
+/**
+*******************************************************************************
+*
+* @brief This function computes cost of skip macroblocks
+*
+* @par Description:
+*
+* @param[in] ps_me_ctxt
+* pointer to me ctxt
+*
+* @param[in] ps_skip_mv
+* pointer to skip mv
+*
+* @returns none
+*
+* @remarks
+* NOTE: while computing the skip cost, do not enable early exit from compute
+* sad function because, a negative bias gets added later
+*
+*******************************************************************************
+*/
+extern void ime_compute_skip_cost
+ (
+ me_ctxt_t *ps_me_ctxt,
+ void *pv_skip_mv,
+ mb_part_ctxt *ps_smb_part_info,
+ UWORD32 u4_use_stat_sad
+ );
+
+
+#endif /* IME_H_ */
diff --git a/encoder/ime_defs.h b/encoder/ime_defs.h
new file mode 100755
index 0000000..14d9c55
--- /dev/null
+++ b/encoder/ime_defs.h
@@ -0,0 +1,59 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevc_typedefs.h
+*
+* @brief
+* Type definitions used in the code
+*
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#ifndef _IME_DEFS_H_
+#define _IME_DEFS_H_
+
+
+/* Macros to Label candidates */
+#define SKIP_CAND 0
+#define ZERO_CAND 1
+#define LEFT_CAND 2
+#define TOP_CAND 3
+#define TOPR_CAND 4
+
+#define NONE 0
+#define LEFT 1
+#define RIGHT 2
+#define TOP 3
+#define BOTTOM 4
+
+#define MB_SIZE 16
+
+#define FULL_SRCH 0
+#define DMND_SRCH 100
+#define NSTEP_SRCH 50
+#define HEX_SRCH 75
+
+#endif /*_IME_DEFS_H_*/
+
diff --git a/encoder/ime_distortion_metrics.c b/encoder/ime_distortion_metrics.c
new file mode 100755
index 0000000..23a1fbc
--- /dev/null
+++ b/encoder/ime_distortion_metrics.c
@@ -0,0 +1,1262 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ih264e_distortion_metrics.c
+*
+* @brief
+* This file contains definitions of routines that compute distortion
+* between two macro/sub blocks of identical dimensions
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ime_sub_pel_compute_sad_16x16()
+* - ime_calculate_sad4_prog()
+* - ime_calculate_sad3_prog()
+* - ime_calculate_sad2_prog()
+* - ime_compute_sad_16x16()
+* - ime_compute_sad_16x16_fast()
+* - ime_compute_sad_16x16_ea8()
+* - ime_compute_sad_8x8()
+* - ime_compute_sad_4x4()
+* - ime_compute_sad_16x8()
+* - ime_compute_satqd_16x16_lumainter()
+* - ime_compute_satqd_8x16_chroma()
+* - ime_compute_satqd_16x16_lumaintra()
+*
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User include files */
+#include "ime_typedefs.h"
+#include "ime_defs.h"
+#include "ime_macros.h"
+#include "ime_statistics.h"
+#include "ime_platform_macros.h"
+#include "ime_distortion_metrics.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) at all subpel points about the src location
+*
+* @par Description
+* This functions computes SAD at all points at a subpel distance from the
+* current source location.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_ref_half_x
+* UWORD8 pointer to half pel buffer
+*
+* @param[out] pu1_ref_half_y
+* UWORD8 pointer to half pel buffer
+*
+* @param[out] pu1_ref_half_xy
+* UWORD8 pointer to half pel buffer
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] ref_strd
+* integer ref stride
+*
+* @param[out] pi4_sad
+* integer evaluated sad
+* pi4_sad[0] - half x
+* pi4_sad[1] - half x - 1
+* pi4_sad[2] - half y
+* pi4_sad[3] - half y - 1
+* pi4_sad[4] - half xy
+* pi4_sad[5] - half xy - 1
+* pi4_sad[6] - half xy - strd
+* pi4_sad[7] - half xy - 1 - strd
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_sub_pel_compute_sad_16x16(UWORD8 *pu1_src,
+ UWORD8 *pu1_ref_half_x,
+ UWORD8 *pu1_ref_half_y,
+ UWORD8 *pu1_ref_half_xy,
+ WORD32 src_strd,
+ WORD32 ref_strd,
+ WORD32 *pi4_sad)
+{
+ UWORD8 *pu1_ref_half_x_left = pu1_ref_half_x - 1;
+ UWORD8 *pu1_ref_half_y_top = pu1_ref_half_y - ref_strd;
+ UWORD8 *pu1_ref_half_xy_left = pu1_ref_half_xy - 1;
+ UWORD8 *pu1_ref_half_xy_top = pu1_ref_half_xy - ref_strd;
+ UWORD8 *pu1_ref_half_xy_top_left = pu1_ref_half_xy - ref_strd - 1;
+
+ WORD32 row, col;
+
+ memset(pi4_sad, 0, 8 * sizeof(WORD32));
+
+ for(row = 0; row < MB_SIZE; row++)
+ {
+ for(col = 0; col < MB_SIZE; col++)
+ {
+ WORD32 src;
+ WORD32 diff;
+
+ src = pu1_src[col];
+
+ diff = src - pu1_ref_half_x[col];
+ pi4_sad[0] += ABS(diff);
+
+ diff = src - pu1_ref_half_x_left[col];
+ pi4_sad[1] += ABS(diff);
+
+ diff = src - pu1_ref_half_y[col];
+ pi4_sad[2] += ABS(diff);
+
+ diff = src - pu1_ref_half_y_top[col];
+ pi4_sad[3] += ABS(diff);
+
+ diff = src - pu1_ref_half_xy[col];
+ pi4_sad[4] += ABS(diff);
+
+ diff = src - pu1_ref_half_xy_left[col];
+ pi4_sad[5] += ABS(diff);
+
+ diff = src - pu1_ref_half_xy_top[col];
+ pi4_sad[6] += ABS(diff);
+
+ diff = src - pu1_ref_half_xy_top_left[col];
+ pi4_sad[7] += ABS(diff);
+ }
+
+ pu1_src += src_strd;
+
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief compute sad
+*
+* @par Description: This function computes the sad at vertices of diamond grid
+* centered at reference pointer and at unit distance from it.
+*
+* @param[in] pu1_ref
+* UWORD8 pointer to the reference
+*
+* @param[out] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] ref_strd
+* integer reference stride
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[out] pi4_sad
+* pointer to integer array evaluated sad
+*
+* @returns sad at all evaluated vertexes
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ime_calculate_sad4_prog(UWORD8 *pu1_ref,
+ UWORD8 *pu1_src,
+ WORD32 ref_strd,
+ WORD32 src_strd,
+ WORD32 *pi4_sad)
+{
+
+ /* reference ptrs at unit 1 distance in diamond pattern centered at pu1_ref */
+ UWORD8 *left_ptr = pu1_ref - 1;
+ UWORD8 *right_ptr = pu1_ref + 1;
+ UWORD8 *top_ptr = pu1_ref - ref_strd;
+ UWORD8 *bot_ptr = pu1_ref + ref_strd;
+
+ /* temp var */
+ WORD32 count2, count3;
+ UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
+ UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
+
+ memset(pi4_sad, 0, 4 * sizeof(WORD32));
+
+ for(count2 = MB_SIZE; count2 > 0; count2--)
+ {
+ for(count3 = MB_SIZE; count3 > 0 ; count3--)
+ {
+ WORD32 src;
+ WORD32 diff;
+
+ src = *pu1_src++;
+
+ diff = src - *left_ptr++;
+ pi4_sad[0] += ABS(diff);
+
+ diff = src - *right_ptr++;
+ pi4_sad[1] += ABS(diff);
+
+ diff = src - *top_ptr++;
+ pi4_sad[2] += ABS(diff);
+
+ diff = src - *bot_ptr++;
+ pi4_sad[3] += ABS(diff);
+ }
+
+ bot_ptr += u4_ref_buf_offset;
+ left_ptr += u4_ref_buf_offset;
+ right_ptr += u4_ref_buf_offset;
+ top_ptr += u4_ref_buf_offset;
+
+ pu1_src += u4_cur_buf_offset;
+ }
+
+}
+
+/**
+*******************************************************************************
+*
+* @brief compute sad
+*
+* @par Description: This function computes the sad at vertices of diamond grid
+* centered at reference pointer and at unit distance from it.
+*
+* @param[in] pu1_ref1, pu1_ref2, pu1_ref3
+* UWORD8 pointer to the reference
+*
+* @param[out] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] ref_strd
+* integer reference stride
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[out] pi4_sad
+* pointer to integer array evaluated sad
+*
+* @returns sad at all evaluated vertexes
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ime_calculate_sad3_prog(UWORD8 *pu1_ref1,
+ UWORD8 *pu1_ref2,
+ UWORD8 *pu1_ref3,
+ UWORD8 *pu1_src,
+ WORD32 ref_strd,
+ WORD32 src_strd,
+ WORD32 *pi4_sad)
+{
+ /* temp var */
+ WORD32 i;
+ UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
+ UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
+
+ for(i = 16; i > 0; i--)
+ {
+ USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+ USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+ USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
+ pu1_src += 4;
+ pu1_ref1 += 4;
+ pu1_ref2 += 4;
+ pu1_ref3 += 4;
+
+ USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+ USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+ USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
+ pu1_src += 4;
+ pu1_ref1 += 4;
+ pu1_ref2 += 4;
+ pu1_ref3 += 4;
+
+ USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+ USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+ USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
+ pu1_src += 4;
+ pu1_ref1 += 4;
+ pu1_ref2 += 4;
+ pu1_ref3 += 4;
+
+ USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+ USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+ USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
+ pu1_src += 4;
+ pu1_ref1 += 4;
+ pu1_ref2 += 4;
+ pu1_ref3 += 4;
+
+ pu1_src += u4_cur_buf_offset;
+ pu1_ref1 += u4_ref_buf_offset;
+ pu1_ref2 += u4_ref_buf_offset;
+ pu1_ref3 += u4_ref_buf_offset;
+ }
+
+}
+
+/**
+*******************************************************************************
+*
+* @brief compute sad
+*
+* @par Description: This function computes the sad at vertices of diamond grid
+* centered at reference pointer and at unit distance from it.
+*
+* @param[in] pu1_ref1, pu1_ref2
+* UWORD8 pointer to the reference
+*
+* @param[out] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] ref_strd
+* integer reference stride
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[out] pi4_sad
+* pointer to integer array evaluated sad
+*
+* @returns sad at all evaluated vertexes
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ime_calculate_sad2_prog(UWORD8 *pu1_ref1,
+ UWORD8 *pu1_ref2,
+ UWORD8 *pu1_src,
+ WORD32 ref_strd,
+ WORD32 src_strd,
+ WORD32 *pi4_sad)
+{
+ /* temp var */
+ WORD32 i;
+ UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
+ UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
+
+ for(i = 16; i > 0; i--)
+ {
+ USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+ USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+ pu1_src += 4;
+ pu1_ref1 += 4;
+ pu1_ref2 += 4;
+
+ USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+ USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+ pu1_src += 4;
+ pu1_ref1 += 4;
+ pu1_ref2 += 4;
+
+ USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+ USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+ pu1_src += 4;
+ pu1_ref1 += 4;
+ pu1_ref2 += 4;
+
+ USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
+ USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
+ pu1_src += 4;
+ pu1_ref1 += 4;
+ pu1_ref2 += 4;
+
+ pu1_src += u4_cur_buf_offset;
+ pu1_ref1 += u4_ref_buf_offset;
+ pu1_ref2 += u4_ref_buf_offset;
+ }
+
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x16 blocks
+*
+* @par Description
+* This functions computes SAD between 2 16x16 blocks. There is a provision
+* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] i4_max_sad
+* integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x16(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion)
+{
+ WORD32 i4_sad = 0;
+ UWORD32 u4_src_offset = src_strd - 16;
+ UWORD32 u4_est_offset = est_strd - 16;
+ UWORD32 i;
+
+GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, 16);
+
+ for(i = 16; i > 0; i--)
+ {
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += 4;
+ pu1_est += 4;
+
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += 4;
+ pu1_est += 4;
+
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += 4;
+ pu1_est += 4;
+
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += 4;
+ pu1_est += 4;
+
+ /* early exit */
+ if(i4_max_sad < i4_sad)
+ {
+
+GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, 16-i);
+
+ *pi4_mb_distortion = i4_sad;
+ return ;
+ }
+ pu1_src += u4_src_offset;
+ pu1_est += u4_est_offset;
+ }
+
+ *pi4_mb_distortion = i4_sad;
+ return ;
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x16 blocks (fast mode)
+*
+* @par Description
+* This functions computes SAD between 2 16x16 blocks. There is a provision
+* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] i4_max_sad
+* integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x16_fast(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion)
+{
+
+ WORD32 i4_sad = 0;
+ UWORD32 u4_src_offset = 2 * src_strd - 16;
+ UWORD32 u4_est_offset = 2 * est_strd - 16;
+ UWORD32 i;
+
+ UNUSED(i4_max_sad);
+
+ for(i = 16; i > 0; i-= 2)
+ {
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += 4;
+ pu1_est += 4;
+
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += 4;
+ pu1_est += 4;
+
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += 4;
+ pu1_est += 4;
+
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += 4;
+ pu1_est += 4;
+
+ pu1_src += u4_src_offset;
+ pu1_est += u4_est_offset;
+ }
+
+ *pi4_mb_distortion = (i4_sad << 1);
+ return ;
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 8x8 blocks
+*
+* @par Description
+* This functions computes SAD between 2 8x8 blocks. There is a provision
+* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_max_sad
+* integer maximum allowed distortion
+*
+* @param[out] i4_sad
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+ */
+
+void ime_compute_sad_8x8(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion)
+{
+ WORD32 i4_sad = 0;
+ UWORD32 u4_src_offset = src_strd - 8;
+ UWORD32 u4_est_offset = est_strd - 8;
+ UWORD32 i, j;
+ WORD16 temp;
+
+ for(i = 8; i > 0; i--)
+ {
+ for(j = 8; j > 0; j--)
+ {
+ /* SAD */
+ temp = *pu1_src++ - *pu1_est++;
+ i4_sad += ABS(temp);
+ }
+ /* early exit */
+ if(i4_max_sad < i4_sad)
+ {
+ *pi4_mb_distortion = i4_sad;
+ return;
+ }
+ pu1_src += u4_src_offset;
+ pu1_est += u4_est_offset;
+ }
+ *pi4_mb_distortion = i4_sad;
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 4x4 blocks
+*
+* @par Description
+* This functions computes SAD between 2 4x4 blocks. There is a provision
+* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_max_sad
+* integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_4x4
+ (
+ UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion
+ )
+{
+ WORD32 i4_sad = 0;
+
+ UNUSED(i4_max_sad);
+
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += src_strd;
+ pu1_est += est_strd;
+
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += src_strd;
+ pu1_est += est_strd;
+
+ USADA8(pu1_src, pu1_est, i4_sad);
+ pu1_src += src_strd;
+ pu1_est += est_strd;
+
+ USADA8(pu1_src, pu1_est, i4_sad);
+ *pi4_mb_distortion = i4_sad;
+}
+
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x8 blocks
+*
+*
+* @par Description
+* This functions computes SAD between 2 16x8 blocks. There is a provision
+* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_max_sad
+* integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x8
+ (
+ UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion
+ )
+{
+ WORD32 i4_sad = 0;
+ UWORD32 u4_src_offset = src_strd - 16;
+ UWORD32 u4_est_offset = est_strd - 16;
+ UWORD32 i, j;
+ WORD16 temp;
+
+GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, 8);
+
+ for(i = 8; i > 0; i--)
+ {
+ for(j = 16; j > 0; j--)
+ {
+ /* SAD */
+ temp = *pu1_src++ - *pu1_est++;
+ i4_sad += ABS(temp);
+ }
+ /* early exit */
+ if(i4_max_sad < i4_sad)
+ {
+
+GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, 8-i);
+
+ *pi4_mb_distortion = i4_sad;
+
+ return;
+ }
+ pu1_src += u4_src_offset;
+ pu1_est += u4_est_offset;
+ }
+
+ *pi4_mb_distortion = i4_sad;
+ return;
+
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x16 blocks
+*
+* @par Description
+* This functions computes SAD between 2 16x16 blocks. There is a provision
+* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] i4_max_sad
+* integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x16_ea8(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion)
+{
+ WORD32 i4_sad = 0;
+ UWORD32 u4_src_offset = src_strd - 16;
+ UWORD32 u4_est_offset = est_strd - 16;
+ UWORD32 i, j;
+ WORD16 temp;
+ UWORD8 *pu1_src_temp = pu1_src + src_strd;
+ UWORD8 *pu1_est_temp = pu1_est + est_strd;
+
+ for(i = 16; i > 0; i -= 2)
+ {
+ for(j = 16; j > 0; j--)
+ {
+ /* SAD */
+ temp = *pu1_src++ - *pu1_est++;
+ i4_sad += ABS(temp);
+ }
+
+ pu1_src += (u4_src_offset + src_strd);
+ pu1_est += (u4_est_offset + est_strd);
+
+ }
+
+ /* early exit */
+ if(i4_max_sad < i4_sad)
+ {
+ *pi4_mb_distortion = i4_sad;
+ return;
+ }
+
+ pu1_src = pu1_src_temp;
+ pu1_est = pu1_est_temp;
+
+ for(i = 16; i > 0; i -= 2)
+ {
+ for(j = 16; j > 0; j--)
+ {
+ /* SAD */
+ temp = *pu1_src++ - *pu1_est++;
+ i4_sad += ABS(temp);
+ }
+
+ pu1_src += u4_src_offset + src_strd;
+ pu1_est += u4_est_offset + est_strd;
+ }
+
+ *pi4_mb_distortion = i4_sad;
+ return;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief This function computes SAD between two 16x16 blocks
+* It also computes if the block will be zero after H264 transform and quant for
+* Intra 16x16 blocks
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] pu2_thrsh
+* Threshold for each element of transofrmed quantized block
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @param[out] pu4_is_zero
+* Poitner to store if the block is zero after transform and quantization
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_satqd_16x16_lumainter(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ UWORD16 *pu2_thrsh,
+ WORD32 *pi4_mb_distortion,
+ UWORD32 *pu4_is_non_zero)
+{
+ UWORD32 i,j;
+ WORD16 s1,s2,s3,s4,sad_1,sad_2,ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8;
+ UWORD8 *pu1_src_lp,*pu1_est_lp;
+ UWORD32 sad = 0;
+
+ (*pi4_mb_distortion) = 0;
+ for(i=0;i<4;i++)
+ {
+ for(j=0;j<4;j++)
+ {
+ pu1_src_lp = pu1_src + 4*j;
+ pu1_est_lp = pu1_est + 4*j;
+
+ s1 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+ s4 = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+ pu1_src_lp += src_strd;
+ pu1_est_lp += est_strd;
+
+ s2 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+ s3 = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+ pu1_src_lp += src_strd;
+ pu1_est_lp += est_strd;
+
+ s2 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+ s3 += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+ pu1_src_lp += src_strd;
+ pu1_est_lp += est_strd;
+
+ s1 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+ s4 += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+ sad_1 = s1+s2+s3+s4;
+
+ if(sad == 0)
+ {
+ sad_2 = sad_1<<1;
+
+ ls1 = sad_2 -(s2 + s3);
+ ls2 = sad_2 -(s1 + s4);
+ ls3 = sad_2 -(s3 + s4);
+ ls4 = sad_2 -(s3 - (s1<<1));
+ ls5 = sad_2 -(s4 - (s2<<1));
+ ls6 = sad_2 -(s1 + s2);
+ ls7 = sad_2 -(s2 - (s4<<1));
+ ls8 = sad_2 -(s1 - (s3<<1));
+
+ if(
+ pu2_thrsh[8] <= sad_1 ||
+ pu2_thrsh[0] <= ls2 ||
+ pu2_thrsh[1] <= ls1 ||
+ pu2_thrsh[2] <= ls8 ||
+ pu2_thrsh[3] <= ls5 ||
+
+ pu2_thrsh[4] <= ls6 ||
+ pu2_thrsh[5] <= ls3 ||
+ pu2_thrsh[6] <= ls7 ||
+ pu2_thrsh[7] <= ls4
+
+ )sad = 1;
+ }
+ (*pi4_mb_distortion) += sad_1;
+ }
+ pu1_src += (src_strd *4);
+ pu1_est += (est_strd *4);
+ }
+ *pu4_is_non_zero = sad;
+}
+
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD and SAQTD) between 2 16x8 (interleaved) chroma blocks
+*
+*
+* @par Description
+* This functions computes SAD between2 16x8 chroma blocks(interleaved)
+* It also checks if the SATDD(Sum of absolute transformed wuqntized differnce beteern the blocks
+* If SAQTD is zero, it gives back zero
+* Other wise sad is retrned
+* There is no provison for early exit
+*
+* The transform done here is the transform for chroma blocks in H264
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] pu2_thrsh
+* Threshold for each element of transofrmed quantized block
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+* Fucntion code is nit updated.
+* Will require debugging and minor modifications
+*
+******************************************************************************
+*/
+void ime_compute_satqd_8x16_chroma(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 max_sad,
+ UWORD16 *thrsh)
+{
+ WORD32 i,j,plane;
+ WORD16 s1,s2,s3,s4,sad_1,sad_2,ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8;
+ UWORD8 *pu1_src_lp,*pu1_est_lp,*pu1_src_plane,*pu1_est_plane;
+ WORD32 sad =0;
+ UNUSED(max_sad);
+
+ pu1_src_plane = pu1_src;
+ pu1_est_plane = pu1_est;
+
+ for(plane =0;plane<2;plane++)
+ {
+ for(i=0;i<4;i++)
+ {
+ for(j=0;j<4;j++)
+ {
+ pu1_src_lp = pu1_src + 8*j;
+ pu1_est_lp = pu1_est + 8*j;
+
+ s1 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
+ s4 = ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
+
+ pu1_src_lp += src_strd;
+ pu1_est_lp += est_strd;
+
+ s2 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
+ s3 = ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
+
+ pu1_src_lp += src_strd;
+ pu1_est_lp += est_strd;
+
+ s2 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
+ s3 += ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
+
+ pu1_src_lp += src_strd;
+ pu1_est_lp += est_strd;
+
+ s1 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
+ s4 += ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
+
+ sad_1 = s1+s2+s3+s4;
+ sad_2 = sad_1<<1;
+
+ ls1 = sad_2 -(s2 + s3);
+ ls2 = sad_2 -(s1 + s4);
+ ls3 = sad_2 -(s3 + s4);
+ ls4 = sad_2 -(s3 - (s1<<1));
+ ls5 = sad_2 -(s4 - (s2<<1));
+ ls6 = sad_2 -(s1 + s2);
+ ls7 = sad_2 -(s2 - (s4<<1));
+ ls8 = sad_2 -(s1 - (s3<<1));
+
+ if(
+ //thrsh[0] > sad_1 && Chroma Dc is checked later
+ thrsh[1] > ls1 &&
+ thrsh[2] > sad_1 &&
+ thrsh[3] > ls2 &&
+
+ thrsh[4] > ls3 &&
+ thrsh[5] > ls4 &&
+ thrsh[6] > ls3 &&
+ thrsh[7] > ls5 &&
+
+ thrsh[8] > sad_1 &&
+ thrsh[9] > ls1 &&
+ thrsh[10]> sad_1 &&
+ thrsh[11]> ls2 &&
+
+ thrsh[12]> ls6 &&
+ thrsh[13]> ls7 &&
+ thrsh[14]> ls6 &&
+ thrsh[15]> ls8
+ )
+ {
+ /*set current sad to be zero*/
+ }
+ else
+ return ;
+
+ sad += sad_1;
+ }
+ pu1_src += (src_strd *4);
+ pu1_est += (est_strd *4);
+ }
+ if(sad < (thrsh[0]<<1))sad = 0;
+ else return ;
+
+ pu1_src = pu1_src_plane+1;
+ pu1_est = pu1_est_plane+1;
+ }
+ return ;
+}
+
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD and SAQTD) between 2 16x16 blocks
+*
+* @par Description
+* This functions computes SAD between 2 16x16 blocks.
+* It also checks if the SATDD(Sum of absolute transformed wuqntized differnce beteern the blocks
+* If SAQTD is zero, it gives back zero
+* Other wise sad is retrned
+* There is no provison for early exit
+*
+* The transform done here is the transform for inter 16x16 blocks in H264
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] pu2_thrsh
+* Threshold for each element of transofrmed quantized block
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_satqd_16x16_lumaintra(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 max_sad,
+ UWORD16 *thrsh,
+ WORD32 *pi4_mb_distortion,
+ UWORD8 *sig_nz_sad)
+{
+ UWORD32 i,j;
+ WORD16 s1[4],s2[4],s3[4],s4[4],sad[4];
+ UWORD8 *pu1_src_lp,*pu1_est_lp;
+ UWORD8 *sig_sad_dc;
+ UWORD32 nz_sad_sig = 0;
+ UNUSED(max_sad);
+ *pi4_mb_distortion =0;
+
+ sig_sad_dc = sig_nz_sad;
+ sig_nz_sad++;
+
+ for(i=0;i<4;i++)
+ {
+ for(j=0;j<4;j++)
+ {
+ pu1_src_lp = pu1_src + 4*j;
+ pu1_est_lp = pu1_est + 4*j;
+
+ s1[j] = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+ s4[j] = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+ pu1_src_lp += src_strd;
+ pu1_est_lp += est_strd;
+
+ s2[j] = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+ s3[j] = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+ pu1_src_lp += src_strd;
+ pu1_est_lp += est_strd;
+
+ s2[j] += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+ s3[j] += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+ pu1_src_lp += src_strd;
+ pu1_est_lp += est_strd;
+
+ s1[j] += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
+ s4[j] += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
+
+ sad[j] = ((s1[j]+s2[j]+s3[j]+s4[j])<<1);
+ }
+
+ for(j=0;j<4;j++)
+ {
+
+ if(
+ //thrsh[0] > (sad[j] >> 1) &&Dc goes in the other part
+ thrsh[1] > (sad[j] -(s2[j] + s3[j])) &&
+ thrsh[2] > (sad[j]>>1) &&
+ thrsh[3] > (sad[j] -(s1[j] + s4[j])) &&
+
+ thrsh[4] > (sad[j] -(s3[j] + s4[j])) &&
+ thrsh[5] > (sad[j] -(s3[j] - (s1[j]<<1))) &&
+ thrsh[6] > (sad[j] -(s3[j] + s4[j])) &&
+ thrsh[7] > (sad[j] -(s4[j] - (s2[j]<<1))) &&
+
+ thrsh[8] > (sad[j]>>1) &&
+ thrsh[9] > (sad[j] -(s2[j] + s3[j])) &&
+ thrsh[10]> (sad[j]>>1) &&
+ thrsh[11]> (sad[j] -(s1[j] + s4[j])) &&
+
+ thrsh[12]> (sad[j] -(s1[j] + s2[j])) &&
+ thrsh[13]> (sad[j] -(s2[j] - (s4[j]<<1))) &&
+ thrsh[14]> (sad[j] -(s1[j] + s2[j])) &&
+ thrsh[15]> (sad[j] -(s1[j] - (s3[j]<<1)))
+ )
+ {
+ //sad[j] = 0; /*set current sad to be zero*/
+ sig_nz_sad[j] = 0;/*Signal that the sad is zero*/
+ }
+ else
+ {
+ sig_nz_sad[j] = 1;/*signal that sad is non zero*/
+ nz_sad_sig = 1;
+ }
+
+ (*pi4_mb_distortion) += (sad[j]>>1);
+ //if((*pi4_mb_distortion) >= max_sad)return; /*return or some thing*/
+ }
+
+ sig_nz_sad += 4;
+ pu1_src += (src_strd *4);
+ pu1_est += (est_strd *4);
+ }
+
+ if((*pi4_mb_distortion) < thrsh[0]<<2)
+ {
+ *sig_sad_dc = 0;
+ if(nz_sad_sig == 0)(*pi4_mb_distortion) = 0;
+ }
+ else *sig_sad_dc = 1;
+}
+
diff --git a/encoder/ime_distortion_metrics.h b/encoder/ime_distortion_metrics.h
new file mode 100755
index 0000000..a30e1fc
--- /dev/null
+++ b/encoder/ime_distortion_metrics.h
@@ -0,0 +1,170 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ih264e_distortion_metrics.h
+*
+* @brief
+* This file contains declarations of routines that compute distortion
+* between two macro/sub blocks of identical dimensions
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef IME_DISTORTION_METRICS_H_
+#define IME_DISTORTION_METRICS_H_
+
+
+/*****************************************************************************/
+/* Type definitions for function prototypes */
+/*****************************************************************************/
+
+typedef void ime_compute_sad_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion);
+
+typedef void ime_compute_sad4_diamond(UWORD8 *pu1_ref,
+ UWORD8 *pu1_src,
+ WORD32 ref_strd,
+ WORD32 src_strd,
+ WORD32 *pi4_sad);
+
+typedef void ime_compute_sad3_diamond(UWORD8 *pu1_ref1,
+ UWORD8 *pu1_ref2,
+ UWORD8 *pu1_ref3,
+ UWORD8 *pu1_src,
+ WORD32 ref_strd,
+ WORD32 src_strd,
+ WORD32 *pi4_sad);
+
+typedef void ime_compute_sad2_diamond(UWORD8 *pu1_ref1,
+ UWORD8 *pu1_ref2,
+ UWORD8 *pu1_src,
+ WORD32 ref_strd,
+ WORD32 src_strd,
+ WORD32 *pi4_sad);
+
+typedef void ime_sub_pel_compute_sad_16x16_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_ref_half_x,
+ UWORD8 *pu1_ref_half_y,
+ UWORD8 *pu1_ref_half_xy,
+ WORD32 src_strd,
+ WORD32 ref_strd,
+ WORD32 *pi4_sad);
+
+typedef void ime_compute_sad_stat(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ UWORD16 *pu2_thrsh,
+ WORD32 *pi4_mb_distortion,
+ UWORD32 *pu4_is_zero);
+
+typedef void ime_compute_satqd_16x16_lumainter_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ UWORD16 *pu2_thrsh,
+ WORD32 *pi4_mb_distortion,
+ UWORD32 *pu4_is_zero);
+
+typedef void ime_compute_satqd_8x16_chroma_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ UWORD16 *thrsh);
+
+typedef void ime_compute_satqd_16x16_lumaintra_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ UWORD16 *thrsh,
+ WORD32 *pi4_mb_distortion,
+ UWORD8 *sig_nz_sad);
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+ime_compute_sad_ft ime_compute_sad_16x16;
+ime_compute_sad_ft ime_compute_sad_16x16_fast;
+ime_compute_sad_ft ime_compute_sad_16x8;
+ime_compute_sad_ft ime_compute_sad_16x16_ea8;
+ime_compute_sad_ft ime_compute_sad_8x8;
+ime_compute_sad_ft ime_compute_sad_4x4;
+ime_compute_sad4_diamond ime_calculate_sad4_prog;
+ime_compute_sad3_diamond ime_calculate_sad3_prog;
+ime_compute_sad2_diamond ime_calculate_sad2_prog;
+ime_sub_pel_compute_sad_16x16_ft ime_sub_pel_compute_sad_16x16;
+ime_compute_sad_stat ime_compute_16x16_sad_stat;
+ime_compute_satqd_16x16_lumainter_ft ime_compute_satqd_16x16_lumainter;
+ime_compute_satqd_8x16_chroma_ft ime_compute_satqd_8x16_chroma;
+ime_compute_satqd_16x16_lumaintra_ft ime_compute_satqd_16x16_lumaintra;
+
+/*SSE4.2 Declarations*/
+ime_compute_sad_ft ime_compute_sad_16x16_sse42;
+ime_compute_sad_ft ime_compute_sad_16x16_fast_sse42;
+ime_compute_sad_ft ime_compute_sad_16x8_sse42;
+ime_compute_sad_ft ime_compute_sad_16x16_ea8_sse42;
+ime_sub_pel_compute_sad_16x16_ft ime_sub_pel_compute_sad_16x16_sse42;
+ime_compute_sad4_diamond ime_calculate_sad4_prog_sse42;
+ime_compute_satqd_16x16_lumainter_ft ime_compute_satqd_16x16_lumainter_sse42;
+
+/* assembly */
+ime_compute_sad_ft ime_compute_sad_16x16_a9q;
+ime_compute_sad_ft ime_compute_sad_16x16_fast_a9q;
+ime_compute_sad_ft ime_compute_sad_16x8_a9q;
+ime_compute_sad_ft ime_compute_sad_16x16_ea8_a9q;
+ime_compute_sad4_diamond ime_calculate_sad4_prog_a9q;
+ime_compute_sad3_diamond ime_calculate_sad3_prog_a9q;
+ime_compute_sad2_diamond ime_calculate_sad2_prog_a9q;
+ime_sub_pel_compute_sad_16x16_ft ime_sub_pel_compute_sad_16x16_a9q;
+ime_compute_sad_stat ime_compute_16x16_sad_stat_a9;
+ime_compute_satqd_16x16_lumainter_ft ime_compute_satqd_16x16_lumainter_a9q;
+
+
+/* assembly - AV8 declarations */
+ime_compute_sad_ft ime_compute_sad_16x16_av8;
+ime_compute_sad_ft ime_compute_sad_16x16_fast_av8;
+ime_compute_sad_ft ime_compute_sad_16x8_av8;
+ime_compute_sad_ft ime_compute_sad_16x16_ea8_av8;
+ime_compute_sad4_diamond ime_calculate_sad4_prog_av8;
+ime_compute_sad3_diamond ime_calculate_sad3_prog_av8;
+ime_compute_sad2_diamond ime_calculate_sad2_prog_av8;
+ime_sub_pel_compute_sad_16x16_ft ime_sub_pel_compute_sad_16x16_av8;
+ime_compute_sad_stat ime_compute_16x16_sad_stat_av8;
+ime_compute_satqd_16x16_lumainter_ft ime_compute_satqd_16x16_lumainter_av8;
+
+
+#endif /* IME_DISTORTION_METRICS_H_ */
+
+
diff --git a/encoder/ime_macros.h b/encoder/ime_macros.h
new file mode 100755
index 0000000..a7b8c65
--- /dev/null
+++ b/encoder/ime_macros.h
@@ -0,0 +1,44 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevc_typedefs.h
+*
+* @brief
+* Type definitions used in the code
+*
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#ifndef _IME_MACROS_H_
+#define _IME_MACROS_H_
+
+#define ABS(x) ((x) < 0 ? (-(x)) : (x))
+#define MAX(a,b) ((a > b)?(a):(b))
+#define MIN(a,b) ((a < b)?(a):(b))
+
+#define CLIP3(miny, maxy, y) (((y) < (miny))?(miny):(((y) > maxy)?(maxy):(y)))
+#define UNUSED(x) ((void)(x))
+
+#endif /*_IME_MACROS_H_*/
diff --git a/encoder/ime_statistics.h b/encoder/ime_statistics.h
new file mode 100755
index 0000000..eeacaf2
--- /dev/null
+++ b/encoder/ime_statistics.h
@@ -0,0 +1,86 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevc_typedefs.h
+*
+* @brief
+* Type definitions used in the code
+*
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#ifndef _IME_STATISTICS_H_
+#define _IME_STATISTICS_H_
+#define DEBUG_HISTOGRAM_ENABLE 0
+#define SAD_EXIT_STATS 0
+
+
+#if SAD_EXIT_STATS
+
+/**
+******************************************************************************
+* @brief While computing sad, if we want to do a early exit, how often we
+* should check if the sad computed till now has exceeded min sad param is
+* chosen statistically.
+* ******************************************************************************
+*/
+extern UWORD32 gu4_16x16_sad_ee_stats[16+1];
+extern UWORD32 gu4_16x8_sad_ee_stats[8+1];
+
+/**
+******************************************************************************
+* @brief print sad early exit stats
+******************************************************************************
+*/
+extern void print_sad_ee_stats(void);
+
+#define GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, i) \
+ gu4_16x16_sad_ee_stats[i]++;
+#define GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, i) \
+ gu4_16x8_sad_ee_stats[i]++;
+
+#else
+
+#define GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, i)
+#define GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, i)
+
+#endif
+
+
+#if DEBUG_HISTOGRAM_ENABLE
+#define DEBUG_HISTOGRAM_INIT() debug_histogram_init()
+#define DEBUG_HISTOGRAM_DUMP(condition) if(condition) debug_histogram_dump()
+#define DEBUG_MV_HISTOGRAM_ADD(mv_x, mv_y) debug_mv_histogram_add(mv_x, mv_y)
+#define DEBUG_SAD_HISTOGRAM_ADD(sad, level) debug_sad_histogram_add(sad, level)
+#else
+#define DEBUG_HISTOGRAM_INIT()
+#define DEBUG_HISTOGRAM_DUMP(condition)
+#define DEBUG_MV_HISTOGRAM_ADD(mv_x, mv_y)
+#define DEBUG_SAD_HISTOGRAM_ADD(sad, level)
+#endif
+
+
+
+#endif /*_IME_STATISTICS_H_*/
diff --git a/encoder/ime_structs.h b/encoder/ime_structs.h
new file mode 100755
index 0000000..7819b91
--- /dev/null
+++ b/encoder/ime_structs.h
@@ -0,0 +1,305 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_me.h
+ *
+ * @brief
+ *
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ * -
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+#ifndef _IME_STRUCTS_H_
+#define _IME_STRUCTS_H_
+
+/**
+ * Motion vector
+ */
+typedef struct
+{
+ /**
+ * Horizontal Motion Vector
+ */
+ WORD16 i2_mvx;
+
+ /**
+ * Vertical Motion Vector
+ */
+ WORD16 i2_mvy;
+} ime_mv_t;
+
+
+/**
+**************************************************************************
+* @brief mb_part_ctxt
+*
+* Structure that would hold the information for individual MB partitions
+* gathered during the full pel ME stage
+**************************************************************************
+*/
+typedef struct
+{
+ /**
+ * best mvs
+ */
+ ime_mv_t s_mv_curr;
+
+ /**
+ * mv predictor
+ */
+ ime_mv_t s_mv_pred;
+
+ /**
+ * SAD associated with the MB partition
+ */
+ WORD32 i4_mb_distortion;
+
+ /**
+ * cost for the MB partition
+ */
+ WORD32 i4_mb_cost;
+
+ /**
+ * Search position for least cost among the list of candidates
+ */
+ UWORD32 u4_srch_pos_idx;
+
+ /**
+ * Search position for least cost among the list of candidates
+ */
+ UWORD32 u4_exit;
+
+ /*
+ * Buffer corresponding to best half pel cost
+ */
+ UWORD8 *pu1_best_hpel_buf;
+
+} mb_part_ctxt;
+
+
+/**
+**************************************************************************
+* @brief me_ctxt_t
+*
+* Structure encapsulating the parameters used in the motion estimation
+* context
+**************************************************************************
+*/
+typedef struct
+{
+ /**
+ * Ref pointer to current MB luma
+ */
+ UWORD8 *pu1_ref_buf_luma;
+
+ /**
+ * Src pointer to current MB luma
+ */
+ UWORD8 *pu1_src_buf_luma;
+
+ /**
+ * source stride
+ * (strides for luma and chroma are the same)
+ */
+ WORD32 i4_src_strd;
+
+ /**
+ * recon stride
+ * (strides for luma and chroma are the same)
+ */
+ WORD32 i4_rec_strd;
+
+ /**
+ * Offset for half pel x plane from the pic buf
+ */
+ UWORD32 u4_half_x_offset;
+
+ /**
+ * Offset for half pel y plane from half x plane
+ */
+ UWORD32 u4_half_y_offset;
+
+ /**
+ * Offset for half pel xy plane from half y plane
+ */
+ UWORD32 u4_half_xy_offset;
+
+ /**
+ * Search range in the X, Y axis in terms of pixels
+ */
+ WORD32 ai2_srch_boundaries[2];
+
+ /**
+ * Search range in the north direction in terms of pixels
+ */
+ WORD32 i4_srch_range_n;
+
+ /**
+ * Search range in the south direction in terms of pixels
+ */
+ WORD32 i4_srch_range_s;
+
+ /**
+ * Search range in the east direction in terms of pixels
+ */
+ WORD32 i4_srch_range_e;
+
+ /**
+ * Search range in the west direction in terms of pixels
+ */
+ WORD32 i4_srch_range_w;
+
+ /**
+ * left mb motion vector
+ */
+ ime_mv_t s_left_mv;
+
+ /**
+ * top left mb motion vector
+ */
+ ime_mv_t s_top_left_mv;
+
+ /**
+ * Number of valid candidates for the Initial search position
+ */
+ UWORD32 u4_num_candidates;
+
+ /**
+ * Motion vector predictors derived from neighbouring
+ * blocks for each of the six block partitions
+ */
+ ime_mv_t as_mv_init_search[5];
+
+ /**
+ * mv bits
+ */
+ UWORD8 *pu1_mv_bits;
+
+ /**
+ * lambda (lagrange multiplier for cost computation)
+ */
+ UWORD32 u4_lambda_motion;
+
+ /**
+ * enabled fast sad computation
+ */
+ UWORD32 u4_enable_fast_sad;
+
+ /*
+ * Enable SKIP block prediction based on SATQD
+ */
+ UWORD32 u4_enable_stat_sad;
+
+ /*
+ * Minimum distortion to search for
+ * */
+ WORD32 i4_min_sad;
+
+ /*
+ * Signal that minimum sad has been reached in ME
+ * */
+ UWORD32 u4_min_sad_reached;
+
+ /**
+ * Flag to enable/disbale half pel motion estimation
+ */
+ UWORD32 u4_enable_hpel;
+
+ /**
+ * Diamond search Iteration Max Cnt
+ */
+ UWORD32 u4_num_layers;
+
+ /**
+ * encoder me speed
+ */
+ UWORD32 u4_me_speed_preset;
+
+ UWORD32 u4_left_is_intra;
+
+ UWORD32 u4_left_is_skip;
+
+ /**
+ * Structure to store the MB partition info
+ */
+ mb_part_ctxt s_mb_part;
+ /*
+ * Threshold to compare the sad with
+ */
+ UWORD16 *pu2_sad_thrsh;
+
+ /**
+ * fn ptrs for compute sad routines
+ */
+ ime_compute_sad_ft *pf_ime_compute_sad_16x16[2];
+ ime_compute_sad_ft *pf_ime_compute_sad_16x8;
+ ime_compute_sad4_diamond *pf_ime_compute_sad4_diamond;
+ ime_compute_sad3_diamond *pf_ime_compute_sad3_diamond;
+ ime_compute_sad2_diamond *pf_ime_compute_sad2_diamond;
+ ime_sub_pel_compute_sad_16x16_ft *pf_ime_sub_pel_compute_sad_16x16;
+
+ /*
+ * Function poitners for SATQD
+ */
+ ime_compute_sad_stat *pf_ime_compute_sad_stat_luma_16x16;
+
+ /**
+ * Qp
+ */
+ UWORD8 u1_mb_qp;
+
+ /*
+ * Buffers for holding half_x , half_y and half_xy
+ * values when halfpel generation
+ * for the entire plane is not enabled
+ */
+ UWORD8 *pu1_half_x;
+ UWORD8 *pu1_half_y;
+ UWORD8 *pu1_half_xy;
+
+
+ /*
+ * Buffers to store the best halfpel plane*
+ */
+ UWORD8 *pu1_hpel_buf;
+
+ /*
+ * Stride for hpel buffer
+ */
+ UWORD32 u4_hpel_buf_strd;
+
+ WORD32 u4_hp_buf_strd;
+
+} me_ctxt_t;
+
+
+#endif // _IME_STRUCTS_H_
+
diff --git a/encoder/ime_typedefs.h b/encoder/ime_typedefs.h
new file mode 100755
index 0000000..d36632d
--- /dev/null
+++ b/encoder/ime_typedefs.h
@@ -0,0 +1,50 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevc_typedefs.h
+*
+* @brief
+* Type definitions used in the code
+*
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#ifndef _IME_TYPEDEFS_H_
+#define _IME_TYPEDEFS_H_
+
+
+typedef unsigned char UWORD8;
+typedef unsigned short UWORD16;
+typedef unsigned int UWORD32;
+typedef unsigned long UWORD64;
+
+typedef signed char WORD8;
+typedef short WORD16;
+typedef int WORD32;
+typedef long WORD64;
+
+typedef char CHAR;
+
+#endif /*_IME_TYPEDEFS_H_*/
diff --git a/encoder/irc_bit_allocation.c b/encoder/irc_bit_allocation.c
new file mode 100755
index 0000000..1dfd9de
--- /dev/null
+++ b/encoder/irc_bit_allocation.c
@@ -0,0 +1,859 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/** Includes */
+#include <stdio.h>
+#include <string.h>
+#include "irc_datatypes.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_common.h"
+#include "irc_cntrl_param.h"
+#include "irc_fixed_point_error_bits.h"
+#include "irc_rd_model.h"
+#include "irc_est_sad.h"
+#include "irc_picture_type.h"
+#include "irc_bit_allocation.h"
+#include "irc_trace_support.h"
+
+/** Macros **/
+#define MIN(x,y) ((x) < (y))? (x) : (y)
+
+/* State structure for bit allocation */
+typedef struct
+{
+ /* using var_q number as it can cross 31 bits for large intra frameinterval */
+ number_t vq_rem_bits_in_period;
+
+ /* Storing inputs */
+ WORD32 i4_tot_frms_in_gop;
+
+ WORD32 i4_num_intra_frm_interval;
+
+ WORD32 i4_bits_per_frm;
+
+} rem_bit_in_prd_t;
+
+typedef struct bit_allocation_t
+{
+ rem_bit_in_prd_t s_rbip;
+
+ /* A universal constant giving the relative complexity between pictures */
+ WORD32 i2_K[MAX_PIC_TYPE];
+
+ /* To get a estimate of the header bits consumed */
+ WORD32 i4_prev_frm_header_bits[MAX_PIC_TYPE];
+
+ WORD32 i4_bits_per_frm;
+
+ WORD32 i4_num_gops_in_period;
+
+ /* Num gops as set by rate control module */
+ WORD32 i4_actual_num_gops_in_period;
+
+ number_t vq_saved_bits;
+
+ WORD32 i4_max_bits_per_frm[MAX_NUM_DRAIN_RATES];
+
+ WORD32 i4_min_bits_per_frm;
+
+ /* Error bits module */
+ error_bits_handle ps_error_bits;
+
+ /* Storing frame rate */
+ WORD32 i4_frame_rate;
+
+ WORD32 i4_bit_rate;
+
+ WORD32 ai4_peak_bit_rate[MAX_NUM_DRAIN_RATES];
+
+} bit_allocation_t;
+
+static WORD32 get_number_of_frms_in_a_gop(pic_handling_handle ps_pic_handling)
+{
+ WORD32 i4_tot_frms_in_gop = 0, i;
+ WORD32 ai4_frms_in_gop[MAX_PIC_TYPE];
+
+ /* Query the pic_handling struct for the rem frames in the period */
+ irc_pic_type_get_frms_in_gop(ps_pic_handling, ai4_frms_in_gop);
+
+ /* Get the total frms in the gop */
+ i4_tot_frms_in_gop = 0;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ i4_tot_frms_in_gop += ai4_frms_in_gop[i];
+ }
+ return (i4_tot_frms_in_gop);
+}
+
+static void init_rbip(rem_bit_in_prd_t *ps_rbip,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_bits_per_frm,
+ WORD32 i4_num_intra_frm_interval)
+{
+ WORD32 i4_tot_frms_in_gop = get_number_of_frms_in_a_gop(ps_pic_handling);
+
+ /* rem_bits_in_period = bits_per_frm * tot_frms_in_gop * num_intra_frm_interval */
+ {
+ number_t vq_bits_per_frm, vq_tot_frms_in_gop, vq_num_intra_frm_interval;
+ number_t *pvq_rem_bits_in_period = &ps_rbip->vq_rem_bits_in_period;
+
+ SET_VAR_Q(vq_bits_per_frm, i4_bits_per_frm, 0);
+ SET_VAR_Q(vq_tot_frms_in_gop, i4_tot_frms_in_gop, 0);
+ SET_VAR_Q(vq_num_intra_frm_interval, i4_num_intra_frm_interval, 0);
+
+ /* rem_bits_in_period = bits_per_frm * tot_frms_in_gop */
+ mult32_var_q(vq_bits_per_frm, vq_tot_frms_in_gop,
+ pvq_rem_bits_in_period);
+
+ /* rem_bits_in_period *= num_intra_frm_interval */
+ mult32_var_q(vq_num_intra_frm_interval, pvq_rem_bits_in_period[0],
+ pvq_rem_bits_in_period);
+ }
+
+ /*
+ * Store the total number of frames in GOP value which is
+ * used from module A
+ */
+ ps_rbip->i4_tot_frms_in_gop = i4_tot_frms_in_gop;
+ ps_rbip->i4_num_intra_frm_interval = i4_num_intra_frm_interval;
+ ps_rbip->i4_bits_per_frm = i4_bits_per_frm;
+}
+
+static void check_update_rbip(rem_bit_in_prd_t *ps_rbip,
+ pic_handling_handle ps_pic_handling)
+{
+ /*
+ * NOTE: Intra frame interval changes after the first I frame that is
+ * encoded in a GOP
+ */
+ WORD32 i4_new_tot_frms_in_gop = get_number_of_frms_in_a_gop(
+ ps_pic_handling);
+
+ if(i4_new_tot_frms_in_gop != ps_rbip->i4_tot_frms_in_gop)
+ {
+ WORD32 i4_rem_frames_in_period =
+ ps_rbip->i4_num_intra_frm_interval
+ * (i4_new_tot_frms_in_gop
+ - ps_rbip->i4_tot_frms_in_gop);
+
+ number_t vq_rem_frms_in_period, s_bits_per_frm, vq_delta_bits_in_period;
+
+ SET_VAR_Q(vq_rem_frms_in_period, i4_rem_frames_in_period, 0);
+ SET_VAR_Q(s_bits_per_frm, ps_rbip->i4_bits_per_frm, 0);
+
+ /* delta_bits_in_period = bits_per_frm * rem_frms_in_period */
+ mult32_var_q(s_bits_per_frm, vq_rem_frms_in_period,
+ &vq_delta_bits_in_period);
+
+ /* rem_bits_in_period += delta_bits_in_period */
+ add32_var_q(vq_delta_bits_in_period, ps_rbip->vq_rem_bits_in_period,
+ &ps_rbip->vq_rem_bits_in_period);
+ }
+ /* Updated the new values */
+ ps_rbip->i4_tot_frms_in_gop = i4_new_tot_frms_in_gop;
+}
+
+static void irc_ba_update_rbip(rem_bit_in_prd_t *ps_rbip,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_num_of_bits)
+{
+ number_t vq_num_bits;
+
+ check_update_rbip(ps_rbip, ps_pic_handling);
+
+ /* rem_bits_in_period += num_of_bits */
+ SET_VAR_Q(vq_num_bits, i4_num_of_bits, 0);
+ add32_var_q(vq_num_bits, ps_rbip->vq_rem_bits_in_period,
+ &ps_rbip->vq_rem_bits_in_period);
+}
+
+static void irc_ba_change_rbip(rem_bit_in_prd_t *ps_rbip,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_new_bits_per_frm,
+ WORD32 i4_new_num_intra_frm_interval)
+{
+ WORD32 ai4_rem_frms_in_period[MAX_PIC_TYPE], i4_rem_frms_in_gop, i;
+ irc_pic_type_get_rem_frms_in_gop(ps_pic_handling, ai4_rem_frms_in_period);
+
+ i4_rem_frms_in_gop = 0;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ i4_rem_frms_in_gop += ai4_rem_frms_in_period[i];
+
+ if(i4_new_bits_per_frm != ps_rbip->i4_bits_per_frm)
+ {
+ WORD32 i4_rem_frms_in_period = (ps_rbip->i4_num_intra_frm_interval - 1)
+ * ps_rbip->i4_tot_frms_in_gop + i4_rem_frms_in_gop;
+
+ number_t vq_rem_frms_in_period, vq_delta_bits_per_frm,
+ vq_delta_bits_in_period;
+
+ /* delta_bits_per_frm = new_bits_per_frm - old_bits_per_frm */
+ SET_VAR_Q(vq_delta_bits_per_frm,
+ (i4_new_bits_per_frm - ps_rbip->i4_bits_per_frm), 0);
+
+ SET_VAR_Q(vq_rem_frms_in_period, i4_rem_frms_in_period, 0);
+
+ /* delta_bits_in_period = delta_bits_per_frm * rem_frms_in_period */
+ mult32_var_q(vq_delta_bits_per_frm, vq_rem_frms_in_period,
+ &vq_delta_bits_in_period);
+
+ /* ps_rbip->rem_bits_in_period += delta_bits_in_period */
+ add32_var_q(vq_delta_bits_in_period, ps_rbip->vq_rem_bits_in_period,
+ &ps_rbip->vq_rem_bits_in_period);
+ }
+
+ if(i4_new_num_intra_frm_interval != ps_rbip->i4_num_intra_frm_interval)
+ {
+ WORD32 i4_rem_frms_in_period = ps_rbip->i4_tot_frms_in_gop
+ * (i4_new_num_intra_frm_interval
+ - ps_rbip->i4_num_intra_frm_interval);
+
+ number_t vq_rem_frms_in_period, vq_new_bits_per_frm,
+ vq_delta_bits_in_period;
+
+ /* new_bits_per_frm = new_new_bits_per_frm - old_new_bits_per_frm */
+ SET_VAR_Q(vq_new_bits_per_frm, i4_new_bits_per_frm, 0);
+
+ SET_VAR_Q(vq_rem_frms_in_period, i4_rem_frms_in_period, 0);
+
+ /* delta_bits_in_period = new_bits_per_frm * rem_frms_in_period */
+ mult32_var_q(vq_new_bits_per_frm, vq_rem_frms_in_period,
+ &vq_delta_bits_in_period);
+
+ /* ps_rbip->rem_bits_in_period += delta_bits_in_period */
+ add32_var_q(vq_delta_bits_in_period, ps_rbip->vq_rem_bits_in_period,
+ &ps_rbip->vq_rem_bits_in_period);
+ }
+ /* Update the new value */
+ ps_rbip->i4_num_intra_frm_interval = i4_new_num_intra_frm_interval;
+ ps_rbip->i4_bits_per_frm = i4_new_bits_per_frm;
+}
+
+WORD32 irc_ba_num_fill_use_free_memtab(bit_allocation_t **pps_bit_allocation,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static bit_allocation_t s_bit_allocation_temp;
+
+ /*
+ * Hack for all alloc, during which we don't have any state memory.
+ * Dereferencing can cause issues
+ */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_bit_allocation) = &s_bit_allocation_temp;
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(bit_allocation_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**)pps_bit_allocation,
+ e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ i4_mem_tab_idx += irc_error_bits_num_fill_use_free_memtab(
+ &pps_bit_allocation[0]->ps_error_bits,
+ &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ return (i4_mem_tab_idx);
+}
+
+/*******************************************************************************
+ Function Name : irc_ba_init_bit_allocation
+ Description : Initialize the bit_allocation structure.
+ ******************************************************************************/
+void irc_ba_init_bit_allocation(bit_allocation_t *ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_num_intra_frm_interval,
+ WORD32 i4_bit_rate,
+ WORD32 i4_frm_rate,
+ WORD32 *i4_peak_bit_rate,
+ WORD32 i4_min_bitrate)
+{
+ WORD32 i;
+ WORD32 i4_bits_per_frm, i4_max_bits_per_frm[MAX_NUM_DRAIN_RATES];
+
+ /* Calculate the bits per frame */
+ X_PROD_Y_DIV_Z(i4_bit_rate, 1000, i4_frm_rate, i4_bits_per_frm);
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ X_PROD_Y_DIV_Z(i4_peak_bit_rate[i], 1000, i4_frm_rate,
+ i4_max_bits_per_frm[i]);
+ }
+ /* Initialize the bits_per_frame */
+ ps_bit_allocation->i4_bits_per_frm = i4_bits_per_frm;
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ ps_bit_allocation->i4_max_bits_per_frm[i] = i4_max_bits_per_frm[i];
+ }
+ X_PROD_Y_DIV_Z(i4_min_bitrate, 1000, i4_frm_rate,
+ ps_bit_allocation->i4_min_bits_per_frm);
+
+ /*
+ * Initialize the rem_bits in period
+ * The first gop in case of an OPEN GOP may have fewer B_PICs,
+ * That condition is not taken care of
+ */
+ init_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, i4_bits_per_frm,
+ i4_num_intra_frm_interval);
+
+ /* Initialize the num_gops_in_period */
+ ps_bit_allocation->i4_num_gops_in_period = i4_num_intra_frm_interval;
+ ps_bit_allocation->i4_actual_num_gops_in_period = i4_num_intra_frm_interval;
+
+ /* Relative complexity between I and P frames */
+ ps_bit_allocation->i2_K[I_PIC] = (1 << K_Q);
+ ps_bit_allocation->i2_K[P_PIC] = I_TO_P_RATIO;
+ ps_bit_allocation->i2_K[B_PIC] = (P_TO_B_RATIO * I_TO_P_RATIO) >> K_Q;
+
+ /* Initialize the saved bits to 0*/
+ SET_VAR_Q(ps_bit_allocation->vq_saved_bits, 0, 0);
+
+ /* Update the error bits module with average bits */
+ irc_init_error_bits(ps_bit_allocation->ps_error_bits, i4_frm_rate,
+ i4_bit_rate);
+ /* Store the input for implementing change in values */
+ ps_bit_allocation->i4_frame_rate = i4_frm_rate;
+ ps_bit_allocation->i4_bit_rate = i4_bit_rate;
+
+ memset(ps_bit_allocation->i4_prev_frm_header_bits, 0, sizeof(ps_bit_allocation->i4_prev_frm_header_bits));
+ for(i=0;i<MAX_NUM_DRAIN_RATES;i++)
+ ps_bit_allocation->ai4_peak_bit_rate[i] = i4_peak_bit_rate[i];
+}
+
+/*******************************************************************************
+ Function Name : get_cur_frm_est_bits
+ Description : Based on remaining bits in period and rd_model
+ the number of bits required for the current frame is estimated.
+ ******************************************************************************/
+WORD32 irc_ba_get_cur_frm_est_texture_bits(bit_allocation_t *ps_bit_allocation,
+ rc_rd_model_handle *pps_rd_model,
+ est_sad_handle ps_est_sad,
+ pic_handling_handle ps_pic_handling,
+ picture_type_e e_pic_type)
+{
+ WORD32 i, j;
+ WORD32 i4_est_texture_bits_for_frm;
+ number_t vq_rem_texture_bits;
+ number_t vq_complexity_estimate[MAX_PIC_TYPE];
+ WORD32 i4_rem_frms_in_period[MAX_PIC_TYPE], i4_frms_in_period[MAX_PIC_TYPE];
+ number_t vq_max_consumable_bits;
+ number_t vq_rem_frms_in_period[MAX_PIC_TYPE], vq_est_texture_bits_for_frm;
+ number_t vq_prev_hdr_bits[MAX_PIC_TYPE];
+
+ WORD32 complexity_est = 0;
+
+ /* Get the rem_frms_in_gop & the frms_in_gop from the pic_type state struct */
+ irc_pic_type_get_rem_frms_in_gop(ps_pic_handling, i4_rem_frms_in_period);
+ irc_pic_type_get_frms_in_gop(ps_pic_handling, i4_frms_in_period);
+
+ /* Depending on the number of gops in a period, find the num_frms_in_prd */
+ for(j = 0; j < MAX_PIC_TYPE; j++)
+ {
+ i4_rem_frms_in_period[j] += (i4_frms_in_period[j]
+ * (ps_bit_allocation->i4_num_gops_in_period - 1));
+ i4_frms_in_period[j] *= ps_bit_allocation->i4_num_gops_in_period;
+ }
+
+ /* Remove the header bits from the remaining bits to find how many bits you
+ can transfer.*/
+ irc_ba_update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, 0);
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ SET_VAR_Q(vq_rem_frms_in_period[i], i4_rem_frms_in_period[i], 0);
+ SET_VAR_Q(vq_prev_hdr_bits[i],
+ ps_bit_allocation->i4_prev_frm_header_bits[i], 0);
+ }
+ {
+ /*
+ *rem_texture_bits = rem_bits_in_period -
+ *(rem_frms_in_period[I_PIC] * prev_frm_header_bits[I_PIC]) -
+ *(rem_frms_in_period[P_PIC] * prev_frm_header_bits[P_PIC]) -
+ *(rem_frms_in_period[B_PIC] * prev_frm_header_bits[B_PIC]);
+ */
+ number_t vq_rem_hdr_bits;
+ vq_rem_texture_bits = ps_bit_allocation->s_rbip.vq_rem_bits_in_period;
+
+ mult32_var_q(vq_prev_hdr_bits[I_PIC], vq_rem_frms_in_period[I_PIC],
+ &vq_rem_hdr_bits);
+ sub32_var_q(vq_rem_texture_bits, vq_rem_hdr_bits, &vq_rem_texture_bits);
+
+ mult32_var_q(vq_prev_hdr_bits[P_PIC], vq_rem_frms_in_period[P_PIC],
+ &vq_rem_hdr_bits);
+ sub32_var_q(vq_rem_texture_bits, vq_rem_hdr_bits, &vq_rem_texture_bits);
+
+ mult32_var_q(vq_prev_hdr_bits[B_PIC], vq_rem_frms_in_period[B_PIC],
+ &vq_rem_hdr_bits);
+ sub32_var_q(vq_rem_texture_bits, vq_rem_hdr_bits, &vq_rem_texture_bits);
+ }
+ {
+ /* max_consumable_bits =
+ *(frms_in_period[I_PIC] * max_bits_per_frm[0] ) +
+ *(frms_in_period[P_PIC] + frms_in_period[B_PIC] ) * max_bits_per_frm[1];
+ */
+ number_t vq_max_bits, vq_max_bits_per_frm[2];
+
+ SET_VAR_Q(vq_max_bits_per_frm[0],
+ ps_bit_allocation->i4_max_bits_per_frm[0], 0);
+ SET_VAR_Q(vq_max_bits_per_frm[1],
+ ps_bit_allocation->i4_max_bits_per_frm[1], 0);
+
+ mult32_var_q(vq_rem_frms_in_period[I_PIC], vq_max_bits_per_frm[0],
+ &vq_max_bits);
+ vq_max_consumable_bits = vq_max_bits;
+
+ mult32_var_q(vq_rem_frms_in_period[P_PIC], vq_max_bits_per_frm[1],
+ &vq_max_bits);
+ add32_var_q(vq_max_bits, vq_max_consumable_bits,
+ &vq_max_consumable_bits);
+
+ mult32_var_q(vq_rem_frms_in_period[B_PIC], vq_max_bits_per_frm[1],
+ &vq_max_bits);
+ add32_var_q(vq_max_bits, vq_max_consumable_bits,
+ &vq_max_consumable_bits);
+ }
+
+ /* rem_texture_bits = MIN(rem_texture_bits, max_consumable_bits) */
+ MIN_VARQ(vq_max_consumable_bits, vq_rem_texture_bits, vq_rem_texture_bits);
+
+ /* The bits are then allocated based on the relative complexity of the
+ current frame with respect to that of the rest of the frames in period */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ number_t vq_lin_mod_coeff, vq_est_sad, vq_K;
+
+ /* Getting the linear model coefficient */
+ vq_lin_mod_coeff = irc_get_linear_coefficient(pps_rd_model[i]);
+
+ /* Getting the estimated SAD */
+ SET_VAR_Q(vq_est_sad, irc_get_est_sad(ps_est_sad,i), 0);
+
+ /* Making K factor a var Q format */
+ SET_VAR_Q(vq_K, ps_bit_allocation->i2_K[i], K_Q);
+
+ /* Complexity_estimate = [ (lin_mod_coeff * estimated_sad) / K factor ] */
+ mult32_var_q(vq_lin_mod_coeff, vq_est_sad, &vq_lin_mod_coeff);
+ div32_var_q(vq_lin_mod_coeff, vq_K, &vq_complexity_estimate[i]);
+ }
+
+ /*
+ * For simple cases, one of the complexities go to zero and in those cases
+ * distribute the bits evenly among frames based on I_TO_P_RATIO
+ */
+
+ /* Also check the B-pictures complexity only in case they are present*/
+ if(i4_frms_in_period[B_PIC] == 0)
+ {
+ complexity_est = (vq_complexity_estimate[I_PIC]
+ && vq_complexity_estimate[P_PIC]);
+ }
+ else
+ {
+ complexity_est = (vq_complexity_estimate[I_PIC]
+ && vq_complexity_estimate[P_PIC]
+ && vq_complexity_estimate[B_PIC]);
+ }
+
+ if(complexity_est)
+ {
+ /*
+ * Estimated texture bits =
+ * (remaining bits) * (cur frm complexity)
+ * ---------------------------------------
+ * (num_i_frm*i_frm_complexity) + (num_p_frm*pfrm_complexity)
+ * + (b_frm * b_frm_cm)
+ */
+ mult32_var_q(vq_rem_texture_bits, vq_complexity_estimate[e_pic_type],
+ &vq_rem_texture_bits);
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ mult32_var_q(vq_rem_frms_in_period[i], vq_complexity_estimate[i],
+ &vq_rem_frms_in_period[i]);
+ }
+
+ add32_var_q(vq_rem_frms_in_period[I_PIC], vq_rem_frms_in_period[P_PIC],
+ &vq_rem_frms_in_period[I_PIC]);
+
+ add32_var_q(vq_rem_frms_in_period[I_PIC], vq_rem_frms_in_period[B_PIC],
+ &vq_rem_frms_in_period[I_PIC]);
+
+ div32_var_q(vq_rem_texture_bits, vq_rem_frms_in_period[I_PIC],
+ &vq_est_texture_bits_for_frm);
+
+ number_t_to_word32(vq_est_texture_bits_for_frm,
+ &i4_est_texture_bits_for_frm);
+ }
+ else
+ {
+ number_t vq_i_to_p_bit_ratio, vq_rem_frms;
+
+ SET_VAR_Q(vq_i_to_p_bit_ratio, I_TO_P_BIT_RATIO, 0);
+
+ /* rem_frms = ((I_TO_P_BIT_RATIO * rem_frms_in_period[I_PIC]) +
+ * rem_frms_in_period[P_PIC] + rem_frms_in_period[B_PIC]);
+ */
+ mult32_var_q(vq_rem_frms_in_period[I_PIC], vq_i_to_p_bit_ratio,
+ &vq_rem_frms);
+ add32_var_q(vq_rem_frms_in_period[P_PIC], vq_rem_frms, &vq_rem_frms);
+ add32_var_q(vq_rem_frms_in_period[B_PIC], vq_rem_frms, &vq_rem_frms);
+
+ /* est_texture_bits_for_frm = rem_texture_bits / rem_frms */
+ div32_var_q(vq_rem_texture_bits, vq_rem_frms,
+ &vq_est_texture_bits_for_frm);
+ number_t_to_word32(vq_est_texture_bits_for_frm,
+ &i4_est_texture_bits_for_frm);
+
+ i4_est_texture_bits_for_frm =
+ (I_PIC == e_pic_type) ?
+ (i4_est_texture_bits_for_frm
+ * I_TO_P_BIT_RATIO) :
+ i4_est_texture_bits_for_frm;
+ }
+
+ /*
+ * If the remaining bits in the period becomes negative then the estimated
+ * texture bits would also become negative. This would send a feedback to
+ * the model which may go for a toss. Thus sending the minimum possible
+ * value = 0
+ */
+ if(i4_est_texture_bits_for_frm < 0)
+ {
+ i4_est_texture_bits_for_frm = 0;
+ }
+
+ return (i4_est_texture_bits_for_frm);
+}
+
+/******************************************************************************
+ Function Name : irc_ba_get_cur_frm_est_header_bits
+ Description : Based on remaining bits in period and rd_model
+ the number of bits required for the current frame is estimated.
+ ******************************************************************************/
+WORD32 irc_ba_get_cur_frm_est_header_bits(bit_allocation_t *ps_bit_allocation,
+ picture_type_e e_pic_type)
+{
+ return (ps_bit_allocation->i4_prev_frm_header_bits[e_pic_type]);
+}
+
+WORD32 irc_ba_get_rem_bits_in_period(bit_allocation_t *ps_bit_allocation,
+ pic_handling_handle ps_pic_handling)
+{
+ WORD32 i4_rem_bits_in_gop = 0;
+ irc_ba_update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, 0);
+ number_t_to_word32(ps_bit_allocation->s_rbip.vq_rem_bits_in_period,
+ &i4_rem_bits_in_gop);
+ return (i4_rem_bits_in_gop);
+}
+
+/*******************************************************************************
+ Function Name : irc_ba_update_cur_frm_consumed_bits
+ Description : Based on remaining bits in period and rd_model
+ the number of bits required for the current frame is estimated.
+ ******************************************************************************/
+void irc_ba_update_cur_frm_consumed_bits(bit_allocation_t *ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_total_frame_bits,
+ WORD32 i4_model_updation_hdr_bits,
+ picture_type_e e_pic_type,
+ UWORD8 u1_is_scd,
+ WORD32 i4_last_frm_in_gop)
+{
+ WORD32 i4_error_bits = irc_get_error_bits(ps_bit_allocation->ps_error_bits);
+
+ /* Update the remaining bits in period */
+ irc_ba_update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling,
+ (-i4_total_frame_bits + i4_error_bits));
+
+ /*
+ * Update the header bits so that it can be used as an estimate to the next
+ * frame
+ */
+ if(u1_is_scd)
+ {
+ /*
+ * In case of SCD, even though the frame type is P, it is equivalent to
+ * a I frame and so the corresponding header bits is updated
+ */
+ ps_bit_allocation->i4_prev_frm_header_bits[I_PIC] =
+ i4_model_updation_hdr_bits;
+
+#define MAX_NUM_GOPS_IN_PERIOD (3)
+ if(ps_bit_allocation->i4_num_gops_in_period < MAX_NUM_GOPS_IN_PERIOD)
+ {
+ /*
+ * Whenever there is a scene change increase the number of gops by
+ * 2 so that the number of bits allocated is not very constrained
+ */
+ ps_bit_allocation->i4_num_gops_in_period += 2;
+ /* Add the extra bits in GOP to remaining bits in period */
+ irc_ba_change_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling,
+ ps_bit_allocation->i4_bits_per_frm,
+ ps_bit_allocation->i4_num_gops_in_period);
+ }
+ }
+ else
+ {
+ ps_bit_allocation->i4_prev_frm_header_bits[e_pic_type] =
+ i4_model_updation_hdr_bits;
+ }
+
+ if(i4_last_frm_in_gop)
+ {
+ WORD32 i4_num_bits_in_a_gop = get_number_of_frms_in_a_gop(
+ ps_pic_handling) * ps_bit_allocation->i4_bits_per_frm;
+ /*
+ * If the number of gops in period has been increased due to scene
+ * change, slowly bring in down across the gops
+ */
+ if(ps_bit_allocation->i4_num_gops_in_period
+ > ps_bit_allocation->i4_actual_num_gops_in_period)
+ {
+ ps_bit_allocation->i4_num_gops_in_period--;
+ irc_ba_change_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling,
+ ps_bit_allocation->i4_bits_per_frm,
+ ps_bit_allocation->i4_num_gops_in_period);
+ }
+ /*
+ * If rem_bits_in_period < 0 decrease the number of bits allocated for
+ * the next period else increase it
+ */
+ irc_ba_update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling,
+ i4_num_bits_in_a_gop);
+ }
+ /* Update the lower modules */
+ irc_update_error_bits(ps_bit_allocation->ps_error_bits);
+}
+
+void irc_ba_change_remaining_bits_in_period(bit_allocation_t *ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_bit_rate,
+ WORD32 i4_frame_rate,
+ WORD32 *i4_peak_bit_rate)
+{
+ WORD32 i4_new_avg_bits_per_frm;
+ WORD32 i4_new_peak_bits_per_frm[MAX_NUM_DRAIN_RATES];
+ WORD32 i4_rem_frms_in_period[MAX_PIC_TYPE];
+ int i;
+
+ /* Calculate the new per frame bits */
+ X_PROD_Y_DIV_Z(i4_bit_rate, 1000, i4_frame_rate, i4_new_avg_bits_per_frm);
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ X_PROD_Y_DIV_Z(i4_peak_bit_rate[i], 1000, i4_frame_rate,
+ i4_new_peak_bits_per_frm[i]);
+ }
+
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ ps_bit_allocation->i4_max_bits_per_frm[i] = i4_new_peak_bits_per_frm[i];
+ }
+
+ /*
+ * Get the rem_frms_in_prd & the frms_in_prd from the pic_type state
+ * struct
+ */
+ irc_pic_type_get_rem_frms_in_gop(ps_pic_handling, i4_rem_frms_in_period);
+
+ /*
+ * If the difference > 0(/ <0), the remaining bits in period needs to be
+ * increased(/decreased) based on the remaining number of frames
+ */
+ irc_ba_change_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling,
+ i4_new_avg_bits_per_frm,
+ ps_bit_allocation->i4_num_gops_in_period);
+
+ /* Update the new average bits per frame */
+ ps_bit_allocation->i4_bits_per_frm = i4_new_avg_bits_per_frm;
+ /* change the lower modules state */
+ irc_change_bitrate_in_error_bits(ps_bit_allocation->ps_error_bits,
+ i4_bit_rate);
+ irc_change_frm_rate_in_error_bits(ps_bit_allocation->ps_error_bits,
+ i4_frame_rate);
+
+ /* Store the modified frame_rate */
+ ps_bit_allocation->i4_frame_rate = i4_frame_rate;
+ ps_bit_allocation->i4_bit_rate = i4_bit_rate;
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ ps_bit_allocation->ai4_peak_bit_rate[i] = i4_peak_bit_rate[i];
+}
+
+void irc_ba_change_ba_peak_bit_rate(bit_allocation_t *ps_bit_allocation,
+ WORD32 *ai4_peak_bit_rate)
+{
+ WORD32 i;
+
+ /* Calculate the bits per frame */
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ X_PROD_Y_DIV_Z(ai4_peak_bit_rate[i], 1000,
+ ps_bit_allocation->i4_frame_rate,
+ ps_bit_allocation->i4_max_bits_per_frm[i]);
+ ps_bit_allocation->ai4_peak_bit_rate[i] = ai4_peak_bit_rate[i];
+ }
+}
+
+/******************************************************************************
+ * @brief Modifies the remaining bit in period for the gop which has fif.
+ * since fif would cause a new gop to be created, we need to add the number
+ * of encoded frames in the fif GOP worth of bits to remaining bits in
+ * period
+ ******************************************************************************/
+void irc_ba_change_rem_bits_in_prd_at_force_I_frame(bit_allocation_t *ps_bit_allocation,
+ pic_handling_handle ps_pic_handling)
+{
+ WORD32 i4_frms_in_period;
+ i4_frms_in_period = irc_pic_type_get_frms_in_gop_force_I_frm(
+ ps_pic_handling);
+ irc_ba_update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling,
+ ps_bit_allocation->i4_bits_per_frm * i4_frms_in_period);
+}
+
+void irc_ba_check_and_update_bit_allocation(bit_allocation_t *ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_cur_buf_size,
+ WORD32 i4_max_buf_size,
+ WORD32 i4_max_bits_inflow_per_frm,
+ WORD32 i4_tot_frame_bits)
+{
+
+ number_t vq_max_drain_bits, vq_extra_bits, vq_less_bits,
+ vq_allocated_saved_bits, vq_min_bits_for_period;
+ WORD32 i4_num_frms_in_period = get_number_of_frms_in_a_gop(ps_pic_handling);
+ number_t vq_rem_bits_in_period, vq_num_frms_in_period, vq_zero;
+ WORD32 b_rem_bits_gt_max_drain, b_rem_bits_lt_min_bits,
+ b_saved_bits_gt_zero;
+ rem_bit_in_prd_t *ps_rbip = &ps_bit_allocation->s_rbip;
+
+ UNUSED(i4_cur_buf_size);
+ UNUSED(i4_max_buf_size);
+ UNUSED(i4_tot_frame_bits);
+
+ /*
+ * If the remaining bits is greater than what can be drained in that period
+ * Clip the remaining bits in period to the maximum it can drain in that
+ * period with the error of current buffer size.Accumulate the saved bits
+ * if any. else if the remaining bits is lesser than the minimum bit rate
+ * promised in that period Add the excess bits to remaining bits in period
+ * and reduce it from the saved bits Else Provide the extra bits from the
+ * "saved bits pool".
+ */
+ /*
+ * max_drain_bits = num_gops_in_period * num_frms_in_period *
+ * * max_bits_inflow_per_frm
+ */
+ SET_VAR_Q(vq_num_frms_in_period,
+ (ps_bit_allocation->i4_num_gops_in_period * i4_num_frms_in_period),
+ 0);
+ SET_VAR_Q(vq_max_drain_bits, i4_max_bits_inflow_per_frm, 0);
+ SET_VAR_Q(vq_zero, 0, 0);
+ mult32_var_q(vq_max_drain_bits, vq_num_frms_in_period, &vq_max_drain_bits);
+
+ /*
+ * min_bits_for_period = num_gops_in_period * num_frms_in_period *
+ * min_bits_per_frm
+ */
+ SET_VAR_Q(vq_min_bits_for_period, ps_bit_allocation->i4_min_bits_per_frm,
+ 0);
+ mult32_var_q(vq_min_bits_for_period, vq_num_frms_in_period,
+ &vq_min_bits_for_period);
+
+ vq_rem_bits_in_period = ps_rbip->vq_rem_bits_in_period;
+
+ /* Evaluate rem_bits_in_period > max_drain_bits */
+ VQ_A_GT_VQ_B(ps_rbip->vq_rem_bits_in_period, vq_max_drain_bits,
+ b_rem_bits_gt_max_drain);
+
+ /* Evaluate rem_bits_in_period < min_bits_for_period */
+ VQ_A_LT_VQ_B(ps_rbip->vq_rem_bits_in_period, vq_min_bits_for_period,
+ b_rem_bits_lt_min_bits);
+
+ /* Evaluate saved_bits > 0 */
+ VQ_A_LT_VQ_B(ps_bit_allocation->vq_saved_bits, vq_zero,
+ b_saved_bits_gt_zero);
+
+ /* (i4_rem_bits_in_period > i4_max_drain_bits) */
+ if(b_rem_bits_gt_max_drain)
+ {
+ /* extra_bits = rem_bits_in_period - max_drain_bits */
+ sub32_var_q(ps_rbip->vq_rem_bits_in_period, vq_max_drain_bits,
+ &vq_extra_bits);
+
+ /* saved_bits += extra_bits */
+ add32_var_q(ps_bit_allocation->vq_saved_bits, vq_extra_bits,
+ &ps_bit_allocation->vq_saved_bits);
+
+ /* rem_bits_in_period = vq_max_drain_bits */
+ ps_rbip->vq_rem_bits_in_period = vq_max_drain_bits;
+ }
+ else if(b_rem_bits_lt_min_bits)
+ {
+ /* extra_bits(-ve) = rem_bits_in_period - i4_min_bits_for_period */
+ sub32_var_q(ps_rbip->vq_rem_bits_in_period, vq_min_bits_for_period,
+ &vq_extra_bits);
+
+ /* saved_bits += extra_bits(-ve) */
+ add32_var_q(ps_bit_allocation->vq_saved_bits, vq_extra_bits,
+ &ps_bit_allocation->vq_saved_bits);
+
+ /* rem_bits_in_period = min_bits_for_period */
+ ps_rbip->vq_rem_bits_in_period = vq_min_bits_for_period;
+ }
+ else if(b_saved_bits_gt_zero)
+ {
+ /* less_bits = max_drain_bits - _rem_bits_in_period */
+ sub32_var_q(vq_max_drain_bits, vq_rem_bits_in_period, &vq_less_bits);
+
+ /* allocated_saved_bits = MIN (less_bits, saved_bits) */
+ MIN_VARQ(ps_bit_allocation->vq_saved_bits, vq_less_bits,
+ vq_allocated_saved_bits);
+
+ /* rem_bits_in_period += allocted_save_bits */
+ add32_var_q(ps_rbip->vq_rem_bits_in_period, vq_allocated_saved_bits,
+ &ps_rbip->vq_rem_bits_in_period);
+
+ /* saved_bits -= allocted_save_bits */
+ sub32_var_q(ps_bit_allocation->vq_saved_bits, vq_allocated_saved_bits,
+ &ps_bit_allocation->vq_saved_bits);
+ }
+ return;
+}
+
+WORD32 irc_ba_get_frame_rate(bit_allocation_t *ps_bit_allocation)
+{
+ return (ps_bit_allocation->i4_frame_rate);
+}
+
+WORD32 irc_ba_get_bit_rate(bit_allocation_t *ps_bit_allocation)
+{
+ return (ps_bit_allocation->i4_bit_rate);
+}
+
+void irc_ba_get_peak_bit_rate(bit_allocation_t *ps_bit_allocation,
+ WORD32 *pi4_peak_bit_rate)
+{
+ WORD32 i;
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ pi4_peak_bit_rate[i] = ps_bit_allocation->ai4_peak_bit_rate[i];
+ }
+}
diff --git a/encoder/irc_bit_allocation.h b/encoder/irc_bit_allocation.h
new file mode 100755
index 0000000..19ba0df
--- /dev/null
+++ b/encoder/irc_bit_allocation.h
@@ -0,0 +1,99 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _BIT_ALLOCATION_H_
+#define _BIT_ALLOCATION_H_
+
+typedef struct bit_allocation_t *bit_allocation_handle;
+
+WORD32 irc_ba_num_fill_use_free_memtab(bit_allocation_handle *pps_bit_allocation,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+void irc_ba_init_bit_allocation(bit_allocation_handle ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_num_intra_frm_interval,
+ WORD32 i4_bit_rate,
+ WORD32 i4_frm_rate,
+ WORD32 *u4_peak_bit_rate,
+ WORD32 i4_min_bitrate);
+
+/* Estimates the number of texture bits required by the current frame */
+WORD32 irc_ba_get_cur_frm_est_texture_bits(bit_allocation_handle ps_bit_allocation,
+ rc_rd_model_handle *pps_rd_model,
+ est_sad_handle ps_est_sad,
+ pic_handling_handle ps_pic_handling,
+ picture_type_e e_pic_type);
+
+/* Estimate the number of header bits required by the current frame */
+WORD32 irc_ba_get_cur_frm_est_header_bits(bit_allocation_handle ps_bit_allocation,
+ picture_type_e e_pic_type);
+
+/* Get the remaining bits allocated in the period */
+WORD32 irc_ba_get_rem_bits_in_period(bit_allocation_handle ps_bit_allocation,
+ pic_handling_handle ps_pic_handling);
+
+WORD32 irc_ba_get_frame_rate(bit_allocation_handle ps_bit_allocation);
+
+WORD32 irc_ba_get_bit_rate(bit_allocation_handle ps_bit_allocation);
+void irc_ba_get_peak_bit_rate(bit_allocation_handle ps_bit_allocation,
+ WORD32 *pi4_peak_bit_rate);
+
+/* Updates the bit allocation module with the actual encoded values */
+void irc_ba_update_cur_frm_consumed_bits(bit_allocation_handle ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_total_frame_bits,
+ WORD32 i4_model_updation_hdr_bits,
+ picture_type_e e_pic_type,
+ UWORD8 u1_is_scd,
+ WORD32 i4_last_frm_in_gop);
+
+void irc_ba_check_and_update_bit_allocation(bit_allocation_handle ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_cur_buf_size,
+ WORD32 i4_max_buf_size,
+ WORD32 i4_max_bits_inflow_per_frm,
+ WORD32 i4_tot_frame_bits);
+
+/* Based on the change in frame/bit rate update the remaining bits in period */
+void irc_ba_change_remaining_bits_in_period(bit_allocation_handle ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_bit_rate,
+ WORD32 i4_frame_rate,
+ WORD32 *i4_peak_bit_rate);
+
+/* Change the gop size in the middle of a current gop */
+void change_gop_size(bit_allocation_handle ps_bit_allocation,
+ WORD32 i4_intra_frm_interval,
+ WORD32 i4_inter_frm_interval,
+ WORD32 i4_num_intra_frm_interval);
+
+void update_rem_frms_in_period(bit_allocation_handle ps_bit_allocation,
+ picture_type_e e_pic_type,
+ UWORD8 u1_is_first_frm,
+ WORD32 i4_intra_frm_interval,
+ WORD32 i4_num_intra_frm_interval);
+
+void irc_ba_change_rem_bits_in_prd_at_force_I_frame(bit_allocation_handle ps_bit_allocation,
+ pic_handling_handle ps_pic_handling);
+
+void irc_ba_change_ba_peak_bit_rate(bit_allocation_handle ps_bit_allocation,
+ WORD32 *ai4_peak_bit_rate);
+#endif
diff --git a/encoder/irc_cbr_buffer_control.c b/encoder/irc_cbr_buffer_control.c
new file mode 100755
index 0000000..c179a28
--- /dev/null
+++ b/encoder/irc_cbr_buffer_control.c
@@ -0,0 +1,653 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_cntrl_param.h"
+#include "irc_common.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_fixed_point_error_bits.h"
+#include "irc_cbr_buffer_control.h"
+#include "irc_trace_support.h"
+
+typedef struct cbr_buffer_t
+{
+ /* Buffer size = Delay * Bitrate*/
+ WORD32 i4_buffer_size;
+
+ /* Constant drain rate */
+ WORD32 i4_drain_bits_per_frame[MAX_NUM_DRAIN_RATES];
+
+ /* Encoder Buffer Fullness */
+ WORD32 i4_ebf;
+
+ /* Upper threshold of the Buffer */
+ WORD32 i4_upr_thr[MAX_PIC_TYPE];
+
+ /* Lower threshold of the Buffer */
+ WORD32 i4_low_thr[MAX_PIC_TYPE];
+
+ /* Stuffing threshold equal to error bits per second in the drain bits
+ * fixed point computation */
+ WORD32 i4_stuffing_threshold;
+
+ /* For error due to bits per frame calculation */
+ error_bits_handle aps_bpf_error_bits[MAX_NUM_DRAIN_RATES];
+
+ /* Whether the buffer model is used for CBR or VBR streaming */
+ WORD32 i4_is_cbr_mode;
+
+ /* Input parameters stored for initialization */
+ WORD32 ai4_bit_rate[MAX_NUM_DRAIN_RATES];
+
+ WORD32 i4_max_delay;
+
+ WORD32 ai4_num_pics_in_delay_period[MAX_PIC_TYPE];
+
+ WORD32 i4_tgt_frm_rate;
+
+ UWORD32 u4_max_vbv_buf_size;
+
+} cbr_buffer_t;
+
+WORD32 irc_cbr_buffer_num_fill_use_free_memtab(cbr_buffer_t **pps_cbr_buffer,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0, i;
+ static cbr_buffer_t s_cbr_buffer_temp;
+
+ /*
+ * Hack for all alloc, during which we don't have any state memory.
+ * Dereferencing can cause issues
+ */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_cbr_buffer) = &s_cbr_buffer_temp;
+
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(cbr_buffer_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**)pps_cbr_buffer, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ i4_mem_tab_idx += irc_error_bits_num_fill_use_free_memtab(
+ &pps_cbr_buffer[0]->aps_bpf_error_bits[i],
+ &ps_memtab[i4_mem_tab_idx], e_func_type);
+ }
+ return (i4_mem_tab_idx);
+}
+
+/******************************************************************************
+ * @brief Initialize the CBR VBV buffer state.
+ * This could however be used for VBR streaming VBV also
+ *
+ ******************************************************************************/
+void irc_init_cbr_buffer(cbr_buffer_t *ps_cbr_buffer,
+ WORD32 i4_buffer_delay,
+ WORD32 i4_tgt_frm_rate,
+ WORD32 *i4_bit_rate,
+ UWORD32 *u4_num_pics_in_delay_prd,
+ UWORD32 u4_vbv_buf_size)
+{
+ WORD32 i4_i, i4_bits_per_frm[MAX_NUM_DRAIN_RATES];
+ int i;
+
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ X_PROD_Y_DIV_Z(i4_bit_rate[i], 1000, i4_tgt_frm_rate,
+ i4_bits_per_frm[i]);
+ /* Drain rate = bitrate/(framerate/1000) */
+ ps_cbr_buffer->i4_drain_bits_per_frame[i] = i4_bits_per_frm[i];
+ /* Initialize the bits per frame error bits calculation */
+ irc_init_error_bits(ps_cbr_buffer->aps_bpf_error_bits[i],
+ i4_tgt_frm_rate, i4_bit_rate[i]);
+ }
+
+ /* Bitrate * delay = buffer size, divide by 1000 as delay is in ms*/
+ /* This would mean CBR mode */
+ if(i4_bit_rate[0] == i4_bit_rate[1])
+ {
+ X_PROD_Y_DIV_Z(i4_bit_rate[0], i4_buffer_delay, 1000,
+ ps_cbr_buffer->i4_buffer_size);
+ ps_cbr_buffer->i4_is_cbr_mode = 1;
+ }
+ else
+ {
+ /* VBR streaming case which has different drain rates for I and P */
+ ps_cbr_buffer->i4_buffer_size = u4_num_pics_in_delay_prd[0]
+ * ps_cbr_buffer->i4_drain_bits_per_frame[0]
+ + u4_num_pics_in_delay_prd[1]
+ * ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+ ps_cbr_buffer->i4_is_cbr_mode = 0;
+ }
+
+ if(ps_cbr_buffer->i4_buffer_size > (WORD32)u4_vbv_buf_size)
+ {
+ ps_cbr_buffer->i4_buffer_size = u4_vbv_buf_size;
+ }
+
+ /* Initially Encoder buffer fullness is zero */
+ ps_cbr_buffer->i4_ebf = 0;
+
+ /* tgt_frame_rate is divided by 1000 because, an approximate value is fine
+ * as this is just a threshold below which stuffing is done to avoid buffer
+ * underflow due to fixed point error in drain rate
+ */
+ ps_cbr_buffer->i4_stuffing_threshold = (i4_bit_rate[0]
+ - (i4_bits_per_frm[0] * (i4_tgt_frm_rate / 1000)));
+
+ for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++)
+ {
+ /*
+ * Upper threshold for
+ * I frame = 1 * bits per frame
+ * P Frame = 4 * bits per frame.
+ * The threshold for I frame is only 1 * bits per frame as the threshold
+ * should only account for error in estimated bits.
+ * In P frame it should account for difference bets bits consumed by
+ * I(Scene change) and P frame I to P complexity is assumed to be 5.
+ */
+ WORD32 i4_index;
+ i4_index = i4_i > 0 ? 1 : 0;
+ ps_cbr_buffer->i4_upr_thr[i4_i] = ps_cbr_buffer->i4_buffer_size
+ - (ps_cbr_buffer->i4_buffer_size >> 3);
+
+ /*
+ * For both I and P frame Lower threshold is equal to drain rate.Even if
+ * the encoder consumes zero bits it should have enough bits to drain
+ */
+ ps_cbr_buffer->i4_low_thr[i4_i] = i4_bits_per_frm[i4_index];
+ }
+
+ /* Storing the input parameters for using it for change functions */
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ ps_cbr_buffer->ai4_bit_rate[i] = i4_bit_rate[i];
+ }
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_cbr_buffer->ai4_num_pics_in_delay_period[i] =
+ u4_num_pics_in_delay_prd[i];
+ }
+ ps_cbr_buffer->i4_tgt_frm_rate = i4_tgt_frm_rate;
+ ps_cbr_buffer->i4_max_delay = i4_buffer_delay;
+ ps_cbr_buffer->u4_max_vbv_buf_size = u4_vbv_buf_size;
+}
+
+/******************************************************************************
+ * @brief Condition check for constraining the number of bits allocated based on
+ * bufer size
+ ******************************************************************************/
+WORD32 irc_cbr_buffer_constraint_check(cbr_buffer_t *ps_cbr_buffer,
+ WORD32 i4_tgt_bits,
+ picture_type_e e_pic_type)
+{
+ WORD32 i4_max_tgt_bits, i4_min_tgt_bits;
+ WORD32 i4_drain_bits_per_frame = (e_pic_type == I_PIC) ?
+ ps_cbr_buffer->i4_drain_bits_per_frame[0] :
+ ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+ /* Max tgt bits = Upper threshold - current encoder buffer fullness */
+ i4_max_tgt_bits = ps_cbr_buffer->i4_upr_thr[e_pic_type]
+ - ps_cbr_buffer->i4_ebf;
+ /* Max tgt bits cannot be negative */
+ if(i4_max_tgt_bits < 0)
+ i4_max_tgt_bits = 0;
+
+ /*
+ * Min tgt bits , least number of bits in the Encoder after
+ * draining such that it is greater than lower threshold
+ */
+ i4_min_tgt_bits = ps_cbr_buffer->i4_low_thr[e_pic_type]
+ - (ps_cbr_buffer->i4_ebf - i4_drain_bits_per_frame);
+ /* Min tgt bits cannot be negative */
+ if(i4_min_tgt_bits < 0)
+ i4_min_tgt_bits = 0;
+
+ /* Current tgt bits should be between max and min tgt bits */
+ CLIP(i4_tgt_bits, i4_max_tgt_bits, i4_min_tgt_bits);
+ return i4_tgt_bits;
+}
+
+/* *****************************************************************************
+ * @brief constaints the bit allocation based on buffer size
+ *
+ ******************************************************************************/
+WORD32 irc_vbr_stream_buffer_constraint_check(cbr_buffer_t *ps_cbr_buffer,
+ WORD32 i4_tgt_bits,
+ picture_type_e e_pic_type)
+{
+ WORD32 i4_max_tgt_bits;
+
+ /* Max tgt bits = Upper threshold - current encoder buffer fullness */
+ i4_max_tgt_bits = ps_cbr_buffer->i4_upr_thr[e_pic_type]
+ - ps_cbr_buffer->i4_ebf;
+
+ /* Max tgt bits cannot be negative */
+ if(i4_max_tgt_bits < 0)
+ i4_max_tgt_bits = 0;
+
+ if(i4_tgt_bits > i4_max_tgt_bits)
+ i4_tgt_bits = i4_max_tgt_bits;
+
+ return i4_tgt_bits;
+}
+
+/* *****************************************************************************
+ * @brief Verifies the buffer state and returns whether it is overflowing,
+ * underflowing or normal
+ *
+ ******************************************************************************/
+vbv_buf_status_e irc_get_cbr_buffer_status(cbr_buffer_t *ps_cbr_buffer,
+ WORD32 i4_tot_consumed_bits,
+ WORD32 *pi4_num_bits_to_prevent_overflow,
+ picture_type_e e_pic_type)
+{
+ vbv_buf_status_e e_buf_status;
+ WORD32 i4_cur_enc_buf;
+ WORD32 i4_error_bits = (e_pic_type == I_PIC) ?
+ irc_get_error_bits(ps_cbr_buffer
+ ->aps_bpf_error_bits[0]) :
+ irc_get_error_bits(ps_cbr_buffer
+ ->aps_bpf_error_bits[1]);
+
+ WORD32 i4_drain_bits_per_frame = (e_pic_type == I_PIC) ?
+ ps_cbr_buffer->i4_drain_bits_per_frame[0] :
+ ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+ /* Add the tot consumed bits to the Encoder Buffer*/
+ i4_cur_enc_buf = ps_cbr_buffer->i4_ebf + i4_tot_consumed_bits;
+
+ /* If the Encoder exceeds the Buffer Size signal an Overflow*/
+ if(i4_cur_enc_buf > ps_cbr_buffer->i4_buffer_size)
+ {
+ e_buf_status = VBV_OVERFLOW;
+ i4_cur_enc_buf = ps_cbr_buffer->i4_buffer_size;
+ }
+ else
+ {
+ /*
+ * Subtract the constant drain bits and error bits due to fixed point
+ * implementation
+ */
+ i4_cur_enc_buf -= (i4_drain_bits_per_frame + i4_error_bits);
+
+ /*
+ * If the buffer is less than stuffing threshold an Underflow is
+ * signaled else its NORMAL
+ */
+ if(i4_cur_enc_buf < ps_cbr_buffer->i4_stuffing_threshold)
+ {
+ e_buf_status = VBV_UNDERFLOW;
+ }
+ else
+ {
+ e_buf_status = VBV_NORMAL;
+ }
+
+ if(i4_cur_enc_buf < 0)
+ i4_cur_enc_buf = 0;
+ }
+
+ /*
+ * The RC lib models the encoder buffer, but the VBV buffer characterizes
+ * the decoder buffer
+ */
+ if(e_buf_status == VBV_OVERFLOW)
+ {
+ e_buf_status = VBV_UNDERFLOW;
+ }
+ else if(e_buf_status == VBV_UNDERFLOW)
+ {
+ e_buf_status = VBV_OVERFLOW;
+ }
+
+ pi4_num_bits_to_prevent_overflow[0] = (ps_cbr_buffer->i4_buffer_size
+ - i4_cur_enc_buf);
+
+ return e_buf_status;
+}
+
+/*******************************************************************************
+ * @brief Based on the bits consumed the buffer model is updated
+ ******************************************************************************/
+void irc_update_cbr_buffer(cbr_buffer_t *ps_cbr_buffer,
+ WORD32 i4_tot_consumed_bits,
+ picture_type_e e_pic_type)
+{
+ WORD32 i4_error_bits = (e_pic_type == I_PIC) ?
+ irc_get_error_bits(ps_cbr_buffer->
+ aps_bpf_error_bits[0]) :
+ irc_get_error_bits( ps_cbr_buffer->
+ aps_bpf_error_bits[1]);
+
+ WORD32 i4_drain_bits_per_frame = (e_pic_type == I_PIC) ?
+ ps_cbr_buffer->i4_drain_bits_per_frame[0] :
+ ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+ /* Update the Encoder buffer with the total consumed bits*/
+ ps_cbr_buffer->i4_ebf += i4_tot_consumed_bits;
+
+ /*
+ * Subtract the drain bits and error bits due to fixed point
+ * implementation
+ */
+ ps_cbr_buffer->i4_ebf -= (i4_drain_bits_per_frame + i4_error_bits);
+
+ if(ps_cbr_buffer->i4_ebf < 0)
+ ps_cbr_buffer->i4_ebf = 0;
+
+ /*SS - Fix for lack of stuffing*/
+ if(ps_cbr_buffer->i4_ebf > ps_cbr_buffer->i4_buffer_size)
+ {
+ trace_printf(
+ (const WORD8*)"Error: Should not be coming here with stuffing\n");
+ ps_cbr_buffer->i4_ebf = ps_cbr_buffer->i4_buffer_size;
+ }
+}
+
+/*******************************************************************************
+ * @brief If the buffer underflows then return the number of bits to prevent
+ * underflow
+ *
+ ******************************************************************************/
+WORD32 irc_get_cbr_bits_to_stuff(cbr_buffer_t *ps_cbr_buffer,
+ WORD32 i4_tot_consumed_bits,
+ picture_type_e e_pic_type)
+{
+ WORD32 i4_bits_to_stuff;
+ WORD32 i4_error_bits = (e_pic_type == I_PIC) ?
+ irc_get_error_bits(ps_cbr_buffer
+ ->aps_bpf_error_bits[0]) :
+ irc_get_error_bits(ps_cbr_buffer
+ ->aps_bpf_error_bits[1]);
+
+ WORD32 i4_drain_bits_per_frame = (e_pic_type == I_PIC) ?
+ ps_cbr_buffer->i4_drain_bits_per_frame[0] :
+ ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+ /*
+ * Stuffing bits got from the following equation
+ * Stuffing_threshold = ebf + tcb - drain bits - error bits + stuff_bits
+ */
+ i4_bits_to_stuff = i4_drain_bits_per_frame + i4_error_bits
+ + ps_cbr_buffer->i4_stuffing_threshold
+ - (ps_cbr_buffer->i4_ebf + i4_tot_consumed_bits);
+
+ return i4_bits_to_stuff;
+}
+
+/*******************************************************************************
+ * @brief Update the state for change in number of pics in the delay period
+ *
+ ******************************************************************************/
+void irc_change_cbr_vbv_num_pics_in_delay_period(cbr_buffer_t *ps_cbr_buffer,
+ UWORD32 *u4_num_pics_in_delay_prd)
+{
+ WORD32 i;
+
+ if(!ps_cbr_buffer->i4_is_cbr_mode)
+ {
+ ps_cbr_buffer->i4_buffer_size =
+ u4_num_pics_in_delay_prd[0]
+ * ps_cbr_buffer->i4_drain_bits_per_frame[0]
+ + u4_num_pics_in_delay_prd[1]
+ * ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+ if(ps_cbr_buffer->i4_buffer_size
+ > (WORD32)ps_cbr_buffer->u4_max_vbv_buf_size)
+ {
+ ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->u4_max_vbv_buf_size;
+ }
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_cbr_buffer->i4_upr_thr[i] = ps_cbr_buffer->i4_buffer_size
+ - (ps_cbr_buffer->i4_buffer_size >> 3);
+ }
+
+ /* Re-initialize the number of pics in delay period */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_cbr_buffer->ai4_num_pics_in_delay_period[i] =
+ u4_num_pics_in_delay_prd[i];
+ }
+ }
+}
+
+/******************************************************************************
+ * @brief update the state for change in target frame rate
+ *
+ ******************************************************************************/
+void irc_change_cbr_vbv_tgt_frame_rate(cbr_buffer_t *ps_cbr_buffer,
+ WORD32 i4_tgt_frm_rate)
+{
+ WORD32 i4_i, i4_bits_per_frm[MAX_NUM_DRAIN_RATES];
+ int i;
+
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ X_PROD_Y_DIV_Z(ps_cbr_buffer->ai4_bit_rate[i], 1000, i4_tgt_frm_rate,
+ i4_bits_per_frm[i]);
+ /* Drain rate = bitrate/(framerate/1000) */
+ ps_cbr_buffer->i4_drain_bits_per_frame[i] = i4_bits_per_frm[i];
+ /* Initialize the bits per frame error bits calculation */
+ irc_change_frm_rate_in_error_bits(ps_cbr_buffer->aps_bpf_error_bits[i],
+ i4_tgt_frm_rate);
+ }
+
+ /* Bitrate * delay = buffer size, divide by 1000 as delay is in ms*/
+ if(!ps_cbr_buffer->i4_is_cbr_mode)
+ {
+ /* VBR streaming case which has different drain rates for I and P */
+ ps_cbr_buffer->i4_buffer_size =
+ ps_cbr_buffer->ai4_num_pics_in_delay_period[0]
+ * ps_cbr_buffer->i4_drain_bits_per_frame[0]
+ + ps_cbr_buffer->ai4_num_pics_in_delay_period[1]
+ * ps_cbr_buffer->i4_drain_bits_per_frame[1];
+ }
+
+ if(ps_cbr_buffer->i4_buffer_size
+ > (WORD32)ps_cbr_buffer->u4_max_vbv_buf_size)
+ {
+ ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->u4_max_vbv_buf_size;
+ }
+
+ /*
+ * Tgt_frame_rate is divided by 1000 because an approximate value is fine as
+ * this is just a threshold below which stuffing is done to avoid buffer
+ * underflow due to fixed point error in drain rate
+ */
+ ps_cbr_buffer->i4_stuffing_threshold = (ps_cbr_buffer->ai4_bit_rate[0]
+ - (i4_bits_per_frm[0] * (i4_tgt_frm_rate / 1000)));
+
+ for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++)
+ {
+ /*
+ * Upper threshold for
+ * I frame = 1 * bits per frame
+ * P Frame = 4 * bits per frame.
+ * The threshold for I frame is only 1 * bits per frame as the threshold should
+ * only account for error in estimated bits.
+ * In P frame it should account for difference bets bits consumed by I(Scene change)
+ * and P frame I to P complexity is assumed to be 5.
+ */
+ WORD32 i4_index;
+ i4_index = i4_i > 0 ? 1 : 0;
+ ps_cbr_buffer->i4_upr_thr[i4_i] = ps_cbr_buffer->i4_buffer_size
+ - (ps_cbr_buffer->i4_buffer_size >> 3);
+
+ /*
+ * For both I and P frame Lower threshold is equal to drain rate.
+ * Even if the encoder consumes zero bits it should have enough bits to
+ * drain
+ */
+ ps_cbr_buffer->i4_low_thr[i4_i] = i4_bits_per_frm[i4_index];
+ }
+
+ /* Storing the input parameters for using it for change functions */
+ ps_cbr_buffer->i4_tgt_frm_rate = i4_tgt_frm_rate;
+}
+
+/*******************************************************************************
+ * @brief Change the state for change in bit rate
+ *
+ ******************************************************************************/
+void irc_change_cbr_vbv_bit_rate(cbr_buffer_t *ps_cbr_buffer,
+ WORD32 *i4_bit_rate)
+{
+ WORD32 i4_i, i4_bits_per_frm[MAX_NUM_DRAIN_RATES];
+ int i;
+
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ X_PROD_Y_DIV_Z(i4_bit_rate[i], 1000, ps_cbr_buffer->i4_tgt_frm_rate,
+ i4_bits_per_frm[i]);
+ /* Drain rate = bitrate/(framerate/1000) */
+ ps_cbr_buffer->i4_drain_bits_per_frame[i] = i4_bits_per_frm[i];
+ /* Initialize the bits per frame error bits calculation */
+ irc_change_bitrate_in_error_bits(ps_cbr_buffer->aps_bpf_error_bits[i],
+ i4_bit_rate[i]);
+ }
+
+ /* Bitrate * delay = buffer size, divide by 1000 as delay is in ms*/
+ if(i4_bit_rate[0] == i4_bit_rate[1]) /* This would mean CBR mode */
+ {
+ X_PROD_Y_DIV_Z(i4_bit_rate[0], ps_cbr_buffer->i4_max_delay, 1000,
+ ps_cbr_buffer->i4_buffer_size);
+ ps_cbr_buffer->i4_is_cbr_mode = 1;
+ }
+ else
+ {
+ /* VBR streaming case which has different drain rates for I and P */
+ ps_cbr_buffer->i4_buffer_size =
+ ps_cbr_buffer->ai4_num_pics_in_delay_period[0]
+ * ps_cbr_buffer->i4_drain_bits_per_frame[0]
+ + ps_cbr_buffer->ai4_num_pics_in_delay_period[1]
+ * ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+ ps_cbr_buffer->i4_is_cbr_mode = 0;
+ }
+
+ if(ps_cbr_buffer->i4_buffer_size
+ > (WORD32)ps_cbr_buffer->u4_max_vbv_buf_size)
+ {
+ ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->u4_max_vbv_buf_size;
+ }
+
+ /*
+ * tgt_frame_rate is divided by 1000 because
+ * an approximate value is fine as this is just a threshold below which
+ * stuffing is done to avoid buffer underflow due to fixed point
+ * error in drain rate
+ */
+ ps_cbr_buffer->i4_stuffing_threshold = (i4_bit_rate[0]
+ - (i4_bits_per_frm[0]
+ * (ps_cbr_buffer->i4_tgt_frm_rate / 1000)));
+
+ for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++)
+ {
+ /*
+ * Upper threshold for
+ * I frame = 1 * bits per frame
+ * P Frame = 4 * bits per frame.
+ * The threshold for I frame is only 1 * bits per frame as the threshold
+ * should only account for error in estimated bits.
+ * In P frame it should account for difference bets bits consumed by
+ * I(Scene change) and P frame I to P complexity is assumed to be 5.
+ */
+
+ WORD32 i4_index;
+ i4_index = i4_i > 0 ? 1 : 0;
+ ps_cbr_buffer->i4_upr_thr[i4_i] = ps_cbr_buffer->i4_buffer_size
+ - (ps_cbr_buffer->i4_buffer_size >> 3);
+
+ /* For both I and P frame Lower threshold is equal to drain rate.
+ * Even if the encoder consumes zero bits it should have enough bits to
+ * drain
+ */
+ ps_cbr_buffer->i4_low_thr[i4_i] = i4_bits_per_frm[i4_index];
+ }
+
+ /* Storing the input parameters for using it for change functions */
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ ps_cbr_buffer->ai4_bit_rate[i] = i4_bit_rate[i];
+ }
+}
+
+void irc_change_cbr_buffer_delay(cbr_buffer_t *ps_cbr_buffer,
+ WORD32 i4_buffer_delay)
+{
+ WORD32 i4_i;
+
+ /* Bitrate * delay = buffer size, divide by 1000 as delay is in ms*/
+ if(ps_cbr_buffer->i4_is_cbr_mode)
+ {
+ X_PROD_Y_DIV_Z(ps_cbr_buffer->ai4_bit_rate[0], i4_buffer_delay, 1000,
+ ps_cbr_buffer->i4_buffer_size);
+ }
+
+ if(ps_cbr_buffer->i4_buffer_size
+ > (WORD32)ps_cbr_buffer->u4_max_vbv_buf_size)
+ {
+ ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->u4_max_vbv_buf_size;
+ }
+
+ for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++)
+ {
+ /*
+ * Upper threshold for
+ * I frame = 1 * bits per frame
+ * P Frame = 4 * bits per frame.
+ * The threshold for I frame is only 1 * bits per frame as the threshold
+ * should only account for error in estimated bits.
+ * In P frame it should account for difference bets bits consumed by I
+ * (Scene change) and P frame I to P complexity is assumed to be 5.
+ */
+ ps_cbr_buffer->i4_upr_thr[i4_i] = ps_cbr_buffer->i4_buffer_size
+ - (ps_cbr_buffer->i4_buffer_size >> 3);
+ }
+
+ /* Storing the input parameters for using it for change functions */
+ ps_cbr_buffer->i4_max_delay = i4_buffer_delay;
+}
+
+WORD32 irc_get_cbr_buffer_delay(cbr_buffer_t *ps_cbr_buffer)
+{
+ return (ps_cbr_buffer->i4_max_delay);
+}
+
+WORD32 irc_get_cbr_buffer_size(cbr_buffer_t *ps_cbr_buffer)
+{
+ return (ps_cbr_buffer->i4_buffer_size);
+}
diff --git a/encoder/irc_cbr_buffer_control.h b/encoder/irc_cbr_buffer_control.h
new file mode 100755
index 0000000..2534961
--- /dev/null
+++ b/encoder/irc_cbr_buffer_control.h
@@ -0,0 +1,104 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : irc_cbr_buffer_control.h */
+/* */
+/* Description : This file contains all the necessary declarations */
+/* for cbr_buffer_control functions */
+/* */
+/* */
+/* List of Functions : <List the functions defined in this file> */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 05 2008 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef CBR_BUFFER_CONTROL_H
+#define CBR_BUFFER_CONTROL_H
+
+/* Macro for clipping a number between to extremes */
+#define CLIP(Number,Max,Min) if((Number) > (Max)) (Number) = (Max); \
+ else if((Number) < (Min)) (Number) = (Min);
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+typedef struct cbr_buffer_t *cbr_buffer_handle;
+
+WORD32 irc_cbr_buffer_num_fill_use_free_memtab(cbr_buffer_handle *pps_cbr_buffer,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+/* Initialize the cbr Buffer*/
+void irc_init_cbr_buffer(cbr_buffer_handle ps_cbr_buffer,
+ WORD32 i4_buffer_delay,
+ WORD32 i4_tgt_frm_rate,
+ WORD32 *i4_bit_rate,
+ UWORD32 *u4_num_pics_in_delay_prd,
+ UWORD32 u4_vbv_buf_size);
+
+/* Check for tgt bits with in CBR buffer*/
+WORD32 irc_cbr_buffer_constraint_check(cbr_buffer_handle ps_cbr_buffer,
+ WORD32 i4_tgt_bits,
+ picture_type_e e_pic_type);
+
+/* Get the buffer status with the current consumed bits*/
+vbv_buf_status_e irc_get_cbr_buffer_status(cbr_buffer_handle ps_cbr_buffer,
+ WORD32 i4_tot_consumed_bits,
+ WORD32 *pi4_num_bits_to_prevent_overflow,
+ picture_type_e e_pic_type);
+
+/* Update the CBR buffer at the end of the VOP*/
+void irc_update_cbr_buffer(cbr_buffer_handle ps_cbr_buffer,
+ WORD32 i4_tot_consumed_bits,
+ picture_type_e e_pic_type);
+
+/*Get the bits needed to stuff in case of Underflow*/
+WORD32 irc_get_cbr_bits_to_stuff(cbr_buffer_handle ps_cbr_buffer,
+ WORD32 i4_tot_consumed_bits,
+ picture_type_e e_pic_type);
+
+WORD32 irc_get_cbr_buffer_delay(cbr_buffer_handle ps_cbr_buffer);
+
+WORD32 irc_get_cbr_buffer_size(cbr_buffer_handle ps_cbr_buffer);
+
+WORD32 irc_vbr_stream_buffer_constraint_check(cbr_buffer_handle ps_cbr_buffer,
+ WORD32 i4_tgt_bits,
+ picture_type_e e_pic_type);
+
+void irc_change_cbr_vbv_bit_rate(cbr_buffer_handle ps_cbr_buffer,
+ WORD32 *i4_bit_rate);
+
+void irc_change_cbr_vbv_tgt_frame_rate(cbr_buffer_handle ps_cbr_buffer,
+ WORD32 i4_tgt_frm_rate);
+
+void irc_change_cbr_vbv_num_pics_in_delay_period(cbr_buffer_handle ps_cbr_buffer,
+ UWORD32 *u4_num_pics_in_delay_prd);
+
+void irc_change_cbr_buffer_delay(cbr_buffer_handle ps_cbr_buffer,
+ WORD32 i4_buffer_delay);
+#endif /* CBR_BUFFER_CONTROL_H */
+
diff --git a/encoder/irc_cntrl_param.h b/encoder/irc_cntrl_param.h
new file mode 100755
index 0000000..82235f7
--- /dev/null
+++ b/encoder/irc_cntrl_param.h
@@ -0,0 +1,59 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _RC_CNTRL_PARAM_H_
+#define _RC_CNTRL_PARAM_H_
+
+/* This file should contain only enumerations exported to codec by RC */
+
+/* RC algo type */
+typedef enum
+{
+ VBR_STORAGE = 0,
+ VBR_STORAGE_DVD_COMP = 1,
+ VBR_STREAMING = 2,
+ CONST_QP = 3,
+ CBR_LDRC = 4,
+ CBR_NLDRC = 5
+
+} rc_type_e;
+
+/* Picture type structure*/
+typedef enum
+{
+ BUF_PIC = -1, I_PIC = 0, P_PIC, B_PIC, MAX_PIC_TYPE
+
+} picture_type_e;
+
+/* MB Type structure*/
+typedef enum
+{
+ /* Based on MB TYPES added the array size increases */
+ MB_TYPE_INTRA, MB_TYPE_INTER, MAX_MB_TYPE
+} mb_type_e;
+
+typedef enum
+{
+ VBV_NORMAL, VBV_UNDERFLOW, VBV_OVERFLOW, VBR_CAUTION
+
+} vbv_buf_status_e;
+
+#endif
+
diff --git a/encoder/irc_common.h b/encoder/irc_common.h
new file mode 100755
index 0000000..c341de4
--- /dev/null
+++ b/encoder/irc_common.h
@@ -0,0 +1,104 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _RC_COMMON_H_
+#define _RC_COMMON_H_
+
+/****************************************************************************
+ NOTE : Put only those things into this file which are common across many
+ files, say I_TO_P_BIT_RATIO macro is used across irc_bit_allocation.c
+ and irc_rate_control_api.c.If anything is exclusive only to one file,
+ define it in the same file
+
+ This file is an RC private file. It should not be exported to Codec
+ ****************************************************************************/
+
+#define UNUSED(x) ((void)(x))
+
+typedef float number_t;
+
+#define mult32_var_q(a,b,c) *c = a * b
+
+#define div32_var_q(a,b,c) (*c = ((b == 0)? a : (a / b)))
+
+#define add32_var_q(a,b,c) *c = a + b
+
+#define sub32_var_q(a,b,c) *c = a - b
+
+#define sqrt32_var_q(a, c) *c = sqrt(a)
+
+#define number_t_to_word32(num_a, a) *a = (WORD32)num_a
+
+#define convert_float_to_fix(a_f, a) *a = (WORD32)a_f
+
+#define convert_fix_to_float(a, a_f) *a_f = (float) a
+
+#define SET_VAR_Q(a,b,c) {a = (float) b;}
+
+
+/* Defines the maximum and the minimum quantizer allowed in the stream.*/
+#define MAX_MPEG2_QP 255 /* 127*/
+
+/* Bits ratio between I and P frame */
+#define I_TO_P_BIT_RATIO 5
+
+/* Calculates P = (X*Y/Z) (Assuming all the four are in integers)*/
+#define X_PROD_Y_DIV_Z(X1,Y1,Z1,P1)\
+{\
+ number_t vq_a,vq_b,vq_c;\
+ SET_VAR_Q(vq_a,(X1),0);\
+ SET_VAR_Q(vq_b,(Y1),0);\
+ SET_VAR_Q(vq_c,(Z1),0);\
+ mult32_var_q(vq_a,vq_b,&vq_a);\
+ div32_var_q(vq_a,vq_c,&vq_a);\
+ number_t_to_word32(vq_a,&(P1));\
+}
+#define VQ_A_LT_VQ_B(A,B, Z) Z = A < B;
+#define VQ_A_GT_VQ_B(A,B, Z) Z = A > B;
+
+/* Z=MAX(A,B) where A, B and Z are var_q variables */
+#define MAX_VARQ(A,B, Z)\
+{\
+ WORD32 a_gt_b;\
+ VQ_A_GT_VQ_B((A), (B), a_gt_b);\
+ (Z) = (a_gt_b) ? (A) : (B);\
+}
+
+/* Z=MIN(A,B) where A, B and Z are var_q variables */
+#define MIN_VARQ(A,B, Z)\
+{\
+ WORD32 a_lt_b;\
+ VQ_A_LT_VQ_B((A), (B), a_lt_b);\
+ (Z) = (a_lt_b) ? (A) : (B);\
+}
+
+/* Maximum number of drain-rates supported. Currently a maximum of only 2
+ drain-rates supported. One for
+ I pictures and the other for P & B pictures */
+#define MAX_NUM_DRAIN_RATES 2
+
+/* The ratios between I to P and P to B Qp is specified here */
+#define K_Q 4
+#define I_TO_P_RATIO (19) /* In K_Q Q factor */
+#define P_TO_B_RATIO (21) /* In K_Q Q factor */
+#define P_TO_I_RATIO (13) /* In K_Q Q factor */
+
+#endif /* _RC_COMMON_H_ */
+
diff --git a/encoder/irc_datatypes.h b/encoder/irc_datatypes.h
new file mode 100755
index 0000000..8e4685a
--- /dev/null
+++ b/encoder/irc_datatypes.h
@@ -0,0 +1,64 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_typedefs.h
+*
+* @brief
+* Type definitions used in the code
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef _IH264_TYPEDEFS_H_
+#define _IH264_TYPEDEFS_H_
+
+
+/*****************************************************************************/
+/* Unsigned data types */
+/*****************************************************************************/
+typedef unsigned char UWORD8;
+typedef unsigned short UWORD16;
+typedef unsigned int UWORD32;
+typedef unsigned long long UWORD64;
+
+
+/*****************************************************************************/
+/* Signed data types */
+/*****************************************************************************/
+typedef signed char WORD8;
+typedef short WORD16;
+typedef int WORD32;
+
+
+/*****************************************************************************/
+/* Miscellaneous data types */
+/*****************************************************************************/
+typedef char CHAR;
+typedef double DOUBLE;
+
+#endif /* _IH264_TYPEDEFS_H_ */
diff --git a/encoder/irc_est_sad.c b/encoder/irc_est_sad.c
new file mode 100755
index 0000000..0d8abc2
--- /dev/null
+++ b/encoder/irc_est_sad.c
@@ -0,0 +1,260 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* Includes */
+/*****************************************************************************/
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_cntrl_param.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_est_sad.h"
+#include "irc_common.h"
+
+typedef struct est_sad_t
+{
+ WORD32 i4_use_est_intra_sad;
+
+ /* Previous frame SAD */
+ UWORD32 au4_prev_frm_sad[MAX_PIC_TYPE];
+
+ /* Current (nth) ifi average P frame SAD */
+ UWORD32 u4_n_p_frm_ifi_avg_sad;
+
+ /* (n-1)th ifi average P frame SAD */
+ UWORD32 u4_n_1_p_frm_ifi_avg_sad;
+
+ /* (n-2)th ifi average P frame SAD */
+ UWORD32 u4_n_2_p_frm_ifi_avg_sad;
+
+ /* number of ifi encoded till now */
+ WORD32 i4_num_ifi_encoded;
+
+ /* number of P frames in the current IFI */
+ WORD32 i4_num_p_frm_in_cur_ifi;
+
+} est_sad_t;
+
+WORD32 irc_est_sad_num_fill_use_free_memtab(est_sad_t **pps_est_sad,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static est_sad_t s_est_sad;
+
+ /* Hack for al alloc, during which we don't have any state memory.
+ * Dereferencing can cause issues
+ */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_est_sad) = &s_est_sad;
+
+ /* For src rate control state structure */
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(est_sad_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**)pps_est_sad, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+void irc_init_est_sad(est_sad_t *ps_est_sad, WORD32 i4_use_est_intra_sad)
+{
+ WORD32 i;
+ ps_est_sad->i4_use_est_intra_sad = i4_use_est_intra_sad;
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_est_sad->au4_prev_frm_sad[i] = 0;
+ }
+
+ ps_est_sad->u4_n_p_frm_ifi_avg_sad = 0;
+ ps_est_sad->u4_n_1_p_frm_ifi_avg_sad = 0;
+ ps_est_sad->u4_n_2_p_frm_ifi_avg_sad = 0;
+ ps_est_sad->i4_num_ifi_encoded = 0;
+ ps_est_sad->i4_num_p_frm_in_cur_ifi = 0;
+}
+
+void irc_reset_est_sad(est_sad_t *ps_est_sad)
+{
+ irc_init_est_sad(ps_est_sad, ps_est_sad->i4_use_est_intra_sad);
+}
+
+/*
+ * Get estimated SAD can be called at any point. The various use cases are:
+ * 1) When a I frame is getting encoded,
+ * - get the estimated of P => No issues since we use the last coded P frame
+ * value
+ * - get estimated of I => This call for two cases:
+ * => a) if num_ifi_encoded is less than 2
+ * then return the previous encoded I frame sad
+ * => b) if num_ifi_encoded is more than 2, then we scale
+ * the prev I sad by the ratio of (n-1) ifi P to n-2 ifi P
+ * 2) When P frame is getting encoded,
+ * - get the estimated of P => No issues since we use the last coded P frame value
+ * - get the estimated of I => Simillar to I we have two cases.
+ * To handle the b) case extra logic had to introduced using
+ * u1_is_n_1_p_frm_ifi_avg_sad_usable flag
+ */
+UWORD32 irc_get_est_sad(est_sad_t *ps_est_sad, picture_type_e e_pic_type)
+{
+ if(ps_est_sad->i4_use_est_intra_sad)
+ {
+ UWORD32 u4_estimated_sad;
+ if(e_pic_type == P_PIC)
+ {
+ u4_estimated_sad = ps_est_sad->au4_prev_frm_sad[P_PIC];
+ }
+ else if(e_pic_type == B_PIC)
+ {
+ u4_estimated_sad = ps_est_sad->au4_prev_frm_sad[B_PIC];
+ }
+ else
+ {
+ if(ps_est_sad->i4_num_ifi_encoded < 2)
+ {
+ /*
+ * Only one IFI has been encoded and so use the previous I
+ * frames SAD
+ */
+ u4_estimated_sad = ps_est_sad->au4_prev_frm_sad[I_PIC];
+ }
+ else
+ {
+ /*
+ * Since the n-1 'P' frame IFI would have just accumulated the
+ * frame sads we average it out here
+ */
+ UWORD32 u4_n_1_p_frm_ifi_avg_sad, u4_n_2_p_frm_ifi_avg_sad;
+ number_t vq_n_1_p_frm_ifi_avg_sad, vq_n_2_p_frm_ifi_avg_sad;
+ number_t vq_prev_frm_sad_i;
+
+ /*
+ * If there are frames in the current IFI start using it to
+ * estimate the I frame SAD
+ */
+ if(ps_est_sad->i4_num_p_frm_in_cur_ifi)
+ {
+ u4_n_1_p_frm_ifi_avg_sad =
+ (ps_est_sad->u4_n_p_frm_ifi_avg_sad
+ / ps_est_sad->i4_num_p_frm_in_cur_ifi);
+ u4_n_2_p_frm_ifi_avg_sad =
+ ps_est_sad->u4_n_1_p_frm_ifi_avg_sad;
+ }
+ else
+ {
+ u4_n_1_p_frm_ifi_avg_sad =
+ ps_est_sad->u4_n_1_p_frm_ifi_avg_sad;
+ u4_n_2_p_frm_ifi_avg_sad =
+ ps_est_sad->u4_n_2_p_frm_ifi_avg_sad;
+ }
+
+ /*
+ * If any of the previous p frame SADs are zeros we just return
+ * the previous I frame SAD
+ */
+ if(u4_n_1_p_frm_ifi_avg_sad && u4_n_2_p_frm_ifi_avg_sad)
+ {
+ SET_VAR_Q(vq_prev_frm_sad_i,
+ ps_est_sad->au4_prev_frm_sad[I_PIC], 0);
+ SET_VAR_Q(vq_n_1_p_frm_ifi_avg_sad,
+ u4_n_1_p_frm_ifi_avg_sad, 0);
+ SET_VAR_Q(vq_n_2_p_frm_ifi_avg_sad,
+ u4_n_2_p_frm_ifi_avg_sad, 0);
+ /*
+ * Estimated SAD =
+ *(n-1)th intra frame interval(ifi) P frame Avg SAD *
+ *(prev I frame SAD /
+ *(prev (n-2)nd intra frame interval(ifi) P frame Avg SAD)
+ */
+ mult32_var_q(vq_prev_frm_sad_i, vq_n_1_p_frm_ifi_avg_sad,
+ &vq_prev_frm_sad_i);
+ div32_var_q(vq_prev_frm_sad_i, vq_n_2_p_frm_ifi_avg_sad,
+ &vq_prev_frm_sad_i);
+ number_t_to_word32(vq_prev_frm_sad_i,
+ (WORD32*)&u4_estimated_sad);
+ }
+ else
+ {
+ u4_estimated_sad = ps_est_sad->au4_prev_frm_sad[I_PIC];
+ }
+ }
+ }
+ return u4_estimated_sad;
+ }
+ else
+ {
+ return ps_est_sad->au4_prev_frm_sad[e_pic_type];
+ }
+}
+
+void irc_update_actual_sad(est_sad_t *ps_est_sad,
+ UWORD32 u4_actual_sad,
+ picture_type_e e_pic_type)
+{
+ ps_est_sad->au4_prev_frm_sad[e_pic_type] = u4_actual_sad;
+
+ if(ps_est_sad->i4_use_est_intra_sad)
+ {
+ if(e_pic_type == I_PIC)
+ {
+ /* The requirement is to have two IFI before estimating I frame SAD */
+ if(ps_est_sad->i4_num_ifi_encoded < 2)
+ ps_est_sad->i4_num_ifi_encoded++;
+
+ /* Calculate the average SAD */
+ if(ps_est_sad->i4_num_p_frm_in_cur_ifi)
+ {
+ ps_est_sad->u4_n_p_frm_ifi_avg_sad /=
+ ps_est_sad->i4_num_p_frm_in_cur_ifi;
+ }
+ else
+ {
+ ps_est_sad->u4_n_p_frm_ifi_avg_sad = 0;
+ }
+ /* Push the (n-1)th average SAD to the (n-2)th average SAD */
+ ps_est_sad->u4_n_2_p_frm_ifi_avg_sad =
+ ps_est_sad->u4_n_1_p_frm_ifi_avg_sad;
+ /* Push the nth average SAD to the (n-1)th average SAD */
+ ps_est_sad->u4_n_1_p_frm_ifi_avg_sad =
+ ps_est_sad->u4_n_p_frm_ifi_avg_sad;
+ /* Reset SAD and number of P frames */
+ ps_est_sad->u4_n_p_frm_ifi_avg_sad = 0;
+ ps_est_sad->i4_num_p_frm_in_cur_ifi = 0;
+ }
+ else
+ {
+ ps_est_sad->u4_n_p_frm_ifi_avg_sad += u4_actual_sad;
+ ps_est_sad->i4_num_p_frm_in_cur_ifi++;
+ }
+ }
+}
+
+void irc_update_actual_sad_for_intra(est_sad_t *ps_est_sad,
+ WORD32 i4_intra_frm_cost)
+{
+ if(!(ps_est_sad->i4_use_est_intra_sad))
+ {
+ irc_update_actual_sad(ps_est_sad, i4_intra_frm_cost, I_PIC);
+ }
+}
diff --git a/encoder/irc_est_sad.h b/encoder/irc_est_sad.h
new file mode 100755
index 0000000..c8238c9
--- /dev/null
+++ b/encoder/irc_est_sad.h
@@ -0,0 +1,64 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _EST_SAD_H_
+#define _EST_SAD_H_
+
+/*
+ * "est_sad_t->i4_use_est_intra_sad" Flag to control how the I frame SAD is estimated.
+ * If set to zero
+ * - it uses the Intra sad calculated by the previous P frame as
+ * the estimated sad for the current I frame
+ * else
+ * - it uses the ratio of P frame sads of the previous two GOPS and
+ * scales the I Frame sad with this ratio to estimate the current
+ * I frame SAD
+ */
+
+/* Estimating the Average SAD for the current picture type is done by:
+ * 1) if picture_type is I
+ * - Estimated SAD = (n-1)th intra frame interval(ifi) P frame Avg SAD *
+ * ( prev I frame SAD / (n-2)nd intra frame interval(ifi) P frame Avg SAD)
+ * - if only one IFI is encoded use the previous I frame SAD
+ * 2) if picture type is P
+ * - Estimate SAD is previous P frame SAD
+ * 3) The first P frame in a IFI could use a little better logic to decide the
+ * estimated SAD but currently we assume the last coded P frames SAD
+ a*/
+
+typedef struct est_sad_t *est_sad_handle;
+
+WORD32 irc_est_sad_num_fill_use_free_memtab(est_sad_handle *est_sad,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+void irc_init_est_sad(est_sad_handle est_sad, WORD32 i4_use_est_frame_sad);
+
+UWORD32 irc_get_est_sad(est_sad_handle est_sad, picture_type_e e_pic_type);
+
+void irc_update_actual_sad(est_sad_handle est_sad,
+ UWORD32 u4_actual_sad,
+ picture_type_e e_pic_type);
+
+void irc_update_actual_sad_for_intra(est_sad_handle est_sad,
+ WORD32 i4_intra_frm_cost);
+
+void irc_reset_est_sad(est_sad_handle ps_est_sad);
+#endif
diff --git a/encoder/irc_fixed_point_error_bits.c b/encoder/irc_fixed_point_error_bits.c
new file mode 100755
index 0000000..42dcfc5
--- /dev/null
+++ b/encoder/irc_fixed_point_error_bits.c
@@ -0,0 +1,185 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_common.h"
+#include "irc_cntrl_param.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_fixed_point_error_bits.h"
+
+typedef struct error_bits_t
+{
+ /* Max tgt frm rate so that dynamic change in frm rate can be handled */
+ WORD32 i4_max_tgt_frm_rate;
+
+ /* Cur frm rate */
+ WORD32 i4_cur_tgt_frm_rate;
+
+ /* tgt frame rate*/
+ WORD32 i4_tgt_frm_rate;
+
+ /* tgt frm rate increment */
+ WORD32 i4_tgt_frm_rate_incr;
+
+ /* flag to indicate 1 second is up */
+ UWORD8 u1_compute_error_bits;
+
+ /* Bitrate/frame rate value added over a period */
+ WORD32 i4_accum_bitrate;
+
+ /* bitrate */
+ WORD32 i4_bitrate;
+
+} error_bits_t;
+
+WORD32 irc_error_bits_num_fill_use_free_memtab(error_bits_t **pps_error_bits,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static error_bits_t s_error_bits_temp;
+
+ /*
+ * Hack for all alloc, during which we don't have any state memory.
+ * Dereferencing can cause issues
+ */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_error_bits) = &s_error_bits_temp;
+
+ /* For src rate control state structure */
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(error_bits_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**)pps_error_bits, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+/*******************************************************************************
+ * @brief Calculates the error bits due to fixed point divisions
+ ******************************************************************************/
+void irc_init_error_bits(error_bits_t *ps_error_bits,
+ WORD32 i4_max_tgt_frm_rate,
+ WORD32 i4_bitrate)
+{
+ /* Initializing the parameters*/
+ ps_error_bits->i4_cur_tgt_frm_rate = 0;
+ ps_error_bits->i4_max_tgt_frm_rate = i4_max_tgt_frm_rate;
+
+ /* Value by which i4_cur_tgt_frm_rate is incremented every VOP*/
+ ps_error_bits->i4_tgt_frm_rate_incr = 1000;
+
+ /*Compute error bits is set to 1 at the end of 1 second*/
+ ps_error_bits->u1_compute_error_bits = 0;
+ ps_error_bits->i4_tgt_frm_rate = i4_max_tgt_frm_rate;
+ ps_error_bits->i4_accum_bitrate = 0;
+ ps_error_bits->i4_bitrate = i4_bitrate;
+}
+
+/*******************************************************************************
+ * @brief Updates the error state
+ ******************************************************************************/
+void irc_update_error_bits(error_bits_t *ps_error_bits)
+{
+ WORD32 i4_bits_per_frame;
+
+ X_PROD_Y_DIV_Z(ps_error_bits->i4_bitrate, 1000,
+ ps_error_bits->i4_tgt_frm_rate, i4_bits_per_frame);
+
+ /*
+ * This value is incremented every at the end of every VOP by
+ * i4_tgt_frm_rate_incr
+ */
+ ps_error_bits->i4_cur_tgt_frm_rate += ps_error_bits->i4_tgt_frm_rate_incr;
+ if(ps_error_bits->u1_compute_error_bits == 1)
+ {
+ ps_error_bits->i4_accum_bitrate = 0;
+ }
+ ps_error_bits->i4_accum_bitrate += i4_bits_per_frame;
+
+ /*
+ * When current tgt frm rate is equal or greater than max tgt frame rate
+ * 1 second is up , compute the error bits
+ */
+ if(ps_error_bits->i4_cur_tgt_frm_rate >= ps_error_bits->i4_max_tgt_frm_rate)
+ {
+ ps_error_bits->i4_cur_tgt_frm_rate -=
+ ps_error_bits->i4_max_tgt_frm_rate;
+ ps_error_bits->u1_compute_error_bits = 1;
+ }
+ else
+ {
+ ps_error_bits->u1_compute_error_bits = 0;
+ }
+}
+
+/*******************************************************************************
+ * @brief Returns the error bits for the current frame if there are any
+ *
+ ******************************************************************************/
+WORD32 irc_get_error_bits(error_bits_t *ps_error_bits)
+{
+ WORD32 i4_error_bits = 0;
+
+ /*If 1s is up calculate error for the last 1s worth of frames*/
+ if(ps_error_bits->u1_compute_error_bits == 1)
+ {
+ /*Error = Actual bitrate - bits_per_frame * num of frames*/
+ i4_error_bits = ps_error_bits->i4_bitrate
+ - ps_error_bits->i4_accum_bitrate;
+ }
+
+ return (i4_error_bits);
+}
+
+/* *****************************************************************************
+ *
+ * @brief Change the frame rate parameter for the error bits state
+ *
+ ******************************************************************************/
+void irc_change_frm_rate_in_error_bits(error_bits_t *ps_error_bits,
+ WORD32 i4_tgt_frm_rate)
+{
+ /* Value by which i4_cur_tgt_frm_rate is incremented every VOP*/
+ ps_error_bits->i4_tgt_frm_rate_incr = (ps_error_bits->i4_max_tgt_frm_rate
+ * 1000) / i4_tgt_frm_rate;
+ ps_error_bits->i4_tgt_frm_rate = i4_tgt_frm_rate;
+}
+
+/*******************************************************************************
+ * @brief Change the bitrate value for error bits module
+ ******************************************************************************/
+void irc_change_bitrate_in_error_bits(error_bits_t *ps_error_bits,
+ WORD32 i4_bitrate)
+{
+ ps_error_bits->i4_bitrate = i4_bitrate;
+}
+
diff --git a/encoder/irc_fixed_point_error_bits.h b/encoder/irc_fixed_point_error_bits.h
new file mode 100755
index 0000000..4ddf1eb
--- /dev/null
+++ b/encoder/irc_fixed_point_error_bits.h
@@ -0,0 +1,64 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : irc_cbr_buffer_control.h */
+/* */
+/* Description : This file contains all the necessary declarations */
+/* for cbr_buffer_control functions */
+/* */
+/* */
+/* List of Functions : <List the functions defined in this file> */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 05 2008 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef FIXED_POINT_ERROR_BITS_H
+#define FIXED_POINT_ERROR_BITS_H
+
+typedef struct error_bits_t *error_bits_handle;
+
+WORD32 irc_error_bits_num_fill_use_free_memtab(error_bits_handle *pps_error_bits,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+void irc_init_error_bits(error_bits_handle ps_error_bits,
+ WORD32 i4_max_tgt_frm_rate,
+ WORD32 i4_bitrate);
+
+void irc_update_error_bits(error_bits_handle ps_error_bits);
+
+WORD32 irc_get_error_bits(error_bits_handle ps_error_bits);
+
+void irc_change_frm_rate_in_error_bits(error_bits_handle ps_error_bits,
+ WORD32 i4_tgt_frm_rate);
+
+void irc_change_bitrate_in_error_bits(error_bits_handle ps_error_bits,
+ WORD32 i4_bitrate);
+
+#endif
+
diff --git a/encoder/irc_frame_info_collector.c b/encoder/irc_frame_info_collector.c
new file mode 100755
index 0000000..65f24c4
--- /dev/null
+++ b/encoder/irc_frame_info_collector.c
@@ -0,0 +1,177 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/******************************************************************************/
+/* File Includes */
+/******************************************************************************/
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+
+void irc_init_frame_info(frame_info_t *frame_info)
+{
+ WORD32 i;
+
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ {
+ frame_info->mb_header_bits[i] = 0;
+ frame_info->tot_mb_sad[i] = 0;
+ frame_info->num_mbs[i] = 0;
+ frame_info->qp_sum[i] = 0;
+ frame_info->mb_texture_bits[i] = 0;
+ }
+
+ frame_info->other_header_bits = 0;
+ frame_info->activity_sum = 0;
+ frame_info->intra_mb_cost_sum = 0;
+}
+
+/******************************************************************************
+ * GET Functions: Sending back collected information to the rate control module
+ ******************************************************************************/
+WORD32 irc_fi_get_total_header_bits(frame_info_t *frame_info)
+{
+ WORD32 total_header_bits = 0, i;
+
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ {
+ total_header_bits += frame_info->mb_header_bits[i];
+ }
+ total_header_bits += frame_info->other_header_bits;
+
+ return (total_header_bits);
+}
+
+WORD32 irc_fi_get_total_texture_bits(frame_info_t *frame_info)
+{
+ WORD32 total_texture_bits = 0, i;
+
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ {
+ total_texture_bits += frame_info->mb_texture_bits[i];
+ }
+
+ return (total_texture_bits);
+}
+
+WORD32 irc_fi_get_total_frame_sad(frame_info_t *frame_info)
+{
+ WORD32 total_sad = 0, i;
+
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ {
+ total_sad += frame_info->tot_mb_sad[i];
+ }
+
+ return (total_sad);
+}
+
+WORD32 irc_fi_get_average_qp(frame_info_t *frame_info)
+{
+ WORD32 i, total_qp = 0, total_mbs = 0;
+
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ {
+ total_qp += frame_info->qp_sum[i];
+ total_mbs += frame_info->num_mbs[i];
+ }
+
+ if(total_mbs)
+ {
+ return (total_qp / total_mbs);
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+WORD32 irc_fi_get_avg_mb_header(frame_info_t *frame_info, UWORD8 mb_type)
+{
+ if(frame_info->num_mbs[mb_type])
+ {
+ return (frame_info->mb_header_bits[mb_type]
+ / frame_info->num_mbs[mb_type]);
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+WORD32 irc_fi_get_total_mb_texture_bits(frame_info_t *frame_info,
+ UWORD8 mb_type)
+{
+ return (frame_info->mb_texture_bits[mb_type]);
+}
+
+WORD32 irc_fi_get_total_mb_sad(frame_info_t *frame_info, UWORD8 mb_type)
+{
+ return (frame_info->tot_mb_sad[mb_type]);
+}
+
+WORD32 irc_fi_get_total_mb_qp(frame_info_t *frame_info, UWORD8 mb_type)
+{
+ if(frame_info->num_mbs[mb_type])
+ {
+ return (frame_info->qp_sum[mb_type]);
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+WORD32 irc_fi_get_total_mb(frame_info_t *frame_info, UWORD8 mb_type)
+{
+ return (frame_info->num_mbs[mb_type]);
+}
+
+WORD32 irc_fi_get_num_intra_mb(frame_info_t *frame_info)
+{
+ return (frame_info->num_mbs[MB_TYPE_INTRA]);
+}
+
+WORD32 irc_fi_get_avg_activity(frame_info_t *frame_info)
+{
+ WORD32 i;
+ WORD32 i4_tot_mbs = 0;
+
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ {
+ i4_tot_mbs += frame_info->num_mbs[i];
+ }
+
+ if(i4_tot_mbs)
+ {
+ return (frame_info->activity_sum / i4_tot_mbs);
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+WORD32 irc_fi_get_total_intra_mb_cost(frame_info_t *frame_info)
+{
+ return (frame_info->intra_mb_cost_sum);
+}
diff --git a/encoder/irc_frame_info_collector.h b/encoder/irc_frame_info_collector.h
new file mode 100755
index 0000000..58dc467
--- /dev/null
+++ b/encoder/irc_frame_info_collector.h
@@ -0,0 +1,109 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _FRAME_INFO_COLLECTOR_H_
+#define _FRAME_INFO_COLLECTOR_H_
+
+typedef struct
+{
+ /* Number of MBs in each type */
+ WORD32 num_mbs[MAX_MB_TYPE];
+
+ /* Sum of all MB SADs of each MB type */
+ WORD32 tot_mb_sad[MAX_MB_TYPE];
+
+ /* Sum of QPs for each mb type */
+ WORD32 qp_sum[MAX_MB_TYPE];
+
+ /* Header bits consumed other than MB headers */
+ WORD32 other_header_bits;
+
+ /* Header bits consumed for each type of MBs */
+ WORD32 mb_header_bits[MAX_MB_TYPE];
+
+ /* Texture bits consumed for each type of MBs */
+ WORD32 mb_texture_bits[MAX_MB_TYPE];
+
+ /* Sum of all MB activity */
+ WORD32 activity_sum;
+
+ /* Sum of all the Intra MB cost values for the entire frame */
+ WORD32 intra_mb_cost_sum;
+
+} frame_info_t;
+
+void irc_init_frame_info(frame_info_t *frame_info);
+
+/*
+ * Update functions: Collecting information from encoder
+ */
+#define FI_UPDATE_OTHER_HEADER_BITS(frame_info,header_bits)\
+ {(frame_info)->other_header_bits += (header_bits);}
+
+#define FI_UPDATE_MB_HEADER(frame_info,header_bits,mb_type)\
+ {(frame_info)->mb_header_bits[(mb_type)] += (header_bits);}
+
+#define FI_UPDATE_MB_TEXTURE(frame_info,texture_bits,mb_type)\
+ {(frame_info)->mb_texture_bits[(mb_type)] += (texture_bits);}
+
+#define FI_UPDATE_MB_SAD(frame_info,mb_sad,mb_type)\
+ {(frame_info)->tot_mb_sad[(mb_type)] += (mb_sad);}
+
+#define FI_UPDATE_MB_QP(frame_info,qp,mb_type)\
+ {(frame_info)->qp_sum[(mb_type)] += (qp);(frame_info)->num_mbs[(mb_type)]++;}
+
+#define FI_UPDATE_ACTIVITY(frame_info,mb_activity)\
+ {(frame_info)->activity_sum += (mb_activity);}
+
+#define FI_UPDATE_INTRA_MB_COST(frame_info,intra_mb_cost)\
+ {(frame_info)->intra_mb_cost_sum += (intra_mb_cost);}
+
+/*
+ * GET Functions: Sending back collected information to the rate control module
+ */
+
+/* Frame Level Model Information */
+WORD32 irc_fi_get_total_header_bits(frame_info_t *frame_info);
+
+WORD32 irc_fi_get_total_texture_bits(frame_info_t *frame_info);
+
+WORD32 irc_fi_get_average_qp(frame_info_t *frame_info);
+
+WORD32 irc_fi_get_total_frame_sad(frame_info_t *frame_info);
+
+WORD32 irc_fi_get_avg_activity(frame_info_t *frame_info);
+
+/* Number of Intra MBs for Scene Change Detection */
+WORD32 irc_fi_get_num_intra_mb(frame_info_t *frame_info);
+
+/* MB Level Model Information */
+WORD32 irc_fi_get_avg_mb_header(frame_info_t *frame_info, UWORD8 mb_type);
+
+WORD32 irc_fi_get_total_mb_texture_bits(frame_info_t *frame_info,
+ UWORD8 mb_type);
+
+WORD32 irc_fi_get_total_mb_sad(frame_info_t *frame_info, UWORD8 mb_type);
+
+WORD32 irc_fi_get_total_mb_qp(frame_info_t *frame_info, UWORD8 mb_type);
+
+WORD32 irc_fi_get_total_mb(frame_info_t *frame_info, UWORD8 mb_type);
+
+WORD32 irc_fi_get_total_intra_mb_cost(frame_info_t *frame_info);
+#endif
diff --git a/encoder/irc_mb_model_based.c b/encoder/irc_mb_model_based.c
new file mode 100755
index 0000000..880ee19
--- /dev/null
+++ b/encoder/irc_mb_model_based.c
@@ -0,0 +1,157 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_cntrl_param.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_mb_model_based.h"
+
+typedef struct mb_rate_control_t
+{
+ /* Frame Qp */
+ UWORD8 u1_frm_qp;
+
+ /*
+ * Estimated average activity for the current frame (updated with the
+ * previous frame activity since it is independent of picture type whether
+ * it is I or P)
+ */
+ WORD32 i4_avg_activity;
+
+} mb_rate_control_t;
+
+WORD32 irc_mbrc_num_fill_use_free_memtab(mb_rate_control_t **pps_mb_rate_control,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static mb_rate_control_t s_mb_rate_control_temp;
+
+ /*
+ * Hack for al alloc, during which we don't have any state memory.
+ * Dereferencing can cause issues
+ */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ {
+ (*pps_mb_rate_control) = &s_mb_rate_control_temp;
+ }
+
+ /*For src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(mb_rate_control_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**)pps_mb_rate_control,
+ e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+/*******************************************************************************
+ MB LEVEL API FUNCTIONS
+ ******************************************************************************/
+
+/******************************************************************************
+ Description : Initialize the mb model and the average activity to default
+ values
+ ******************************************************************************/
+void irc_init_mb_level_rc(mb_rate_control_t *ps_mb_rate_control)
+{
+ /* Set values to default */
+ ps_mb_rate_control->i4_avg_activity = 0;
+}
+
+/******************************************************************************
+ Description : Initialize the mb state with frame level decisions
+ *********************************************************************************/
+void irc_mb_init_frame_level(mb_rate_control_t *ps_mb_rate_control,
+ UWORD8 u1_frame_qp)
+{
+ /* Update frame level QP */
+ ps_mb_rate_control->u1_frm_qp = u1_frame_qp;
+}
+
+/******************************************************************************
+ Description : Reset the mb activity - Whenever there is SCD
+ the mb activity is reset
+ *********************************************************************************/
+void irc_reset_mb_activity(mb_rate_control_t *ps_mb_rate_control)
+{
+ ps_mb_rate_control->i4_avg_activity = 0;
+}
+
+/******************************************************************************
+ Description : Calculates the mb level qp
+ *********************************************************************************/
+void irc_get_mb_qp(mb_rate_control_t *ps_mb_rate_control,
+ WORD32 i4_cur_mb_activity,
+ WORD32 *pi4_mb_qp)
+{
+ WORD32 i4_qp;
+ /* Initialize the mb level qp with the frame level qp */
+ i4_qp = ps_mb_rate_control->u1_frm_qp;
+
+ /*
+ * Store the model based QP - This is used for updating the rate control model
+ */
+ pi4_mb_qp[0] = i4_qp;
+
+ /* Modulate the Qp based on the activity */
+ if((ps_mb_rate_control->i4_avg_activity) && (i4_qp < 100))
+ {
+ i4_qp =((((2 * i4_cur_mb_activity))
+ + ps_mb_rate_control->i4_avg_activity)* i4_qp
+ + ((i4_cur_mb_activity + 2 * ps_mb_rate_control->i4_avg_activity)
+ >> 1))/ (i4_cur_mb_activity + 2 * ps_mb_rate_control->i4_avg_activity);
+
+ if(i4_qp > ((3 * ps_mb_rate_control->u1_frm_qp) >> 1))
+ {
+ i4_qp = ((3 * ps_mb_rate_control->u1_frm_qp) >> 1);
+ }
+ }
+
+ /* Store the qp modulated by mb activity - This is used for encoding the MB */
+ pi4_mb_qp[1] = i4_qp;
+}
+
+/*******************************************************************************
+ Description : Returns the stored frame level QP
+ ******************************************************************************/
+UWORD8 irc_get_frm_level_qp(mb_rate_control_t *ps_mb_rate_control)
+{
+ return (ps_mb_rate_control->u1_frm_qp);
+}
+
+/*******************************************************************************
+ Description : Update the frame level info collected
+ ******************************************************************************/
+void irc_mb_update_frame_level(mb_rate_control_t *ps_mb_rate_control,
+ WORD32 i4_avg_activity)
+{
+ /* Update the Average Activity */
+ ps_mb_rate_control->i4_avg_activity = i4_avg_activity;
+}
diff --git a/encoder/irc_mb_model_based.h b/encoder/irc_mb_model_based.h
new file mode 100755
index 0000000..aad520a
--- /dev/null
+++ b/encoder/irc_mb_model_based.h
@@ -0,0 +1,57 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _MB_MODEL_BASED_H_
+#define _MB_MODEL_BASED_H_
+
+typedef struct mb_rate_control_t *mb_rate_control_handle;
+
+WORD32 irc_mbrc_num_fill_use_free_memtab(mb_rate_control_handle *pps_mb_rate_control,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+/* Initializing the state structure */
+void irc_init_mb_level_rc(mb_rate_control_handle ps_mb_rate_control);
+
+/* MB parameters that are to be initialized at a frame level */
+void irc_mb_init_frame_level(mb_rate_control_handle ps_mb_rate_control,
+ UWORD8 u1_frame_qp);
+
+/* MB Level call to get the mb_level QP */
+void irc_get_mb_qp(mb_rate_control_handle ps_mb_rate_control,
+ WORD32 i4_cur_mb_activity,
+ WORD32 *pi4_mb_qp);
+
+/* MB Parameters that are to be updated at a frame level */
+void irc_mb_update_frame_level(mb_rate_control_handle ps_mb_rate_control,
+ WORD32 i4_avg_activity);
+
+/****************************************************************************
+ CONTROL FUCNTIONS FROM FRAME LEVEL
+ ****************************************************************************/
+
+/* Returns the stored frame level QP */
+UWORD8 irc_get_frm_level_qp(mb_rate_control_handle ps_mb_rate_control);
+
+/* Disables activity based qp modulation */
+void irc_reset_mb_activity(mb_rate_control_handle ps_mb_rate_control);
+
+#endif
+
diff --git a/encoder/irc_mem_req_and_acq.h b/encoder/irc_mem_req_and_acq.h
new file mode 100755
index 0000000..a2946a7
--- /dev/null
+++ b/encoder/irc_mem_req_and_acq.h
@@ -0,0 +1,179 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ih264e_rc_mem_interface.h
+*
+* @brief
+* This file contains function declaration and structures for rate control
+* memtabs
+*
+* @author
+* ittiam
+*
+* @remarks
+* The rate control library is a global library across various codecs. It
+* anticipates certain structures definitions. Those definitions are to be
+* imported from global workspace. Instead of that, the structures needed for
+* rc library are copied in to this file and exported to rc library. If the
+* structures / enums / ... in the global workspace change, this file also needs
+* to be modified accordingly.
+*
+******************************************************************************
+*/
+#ifndef IH264E_RC_MEM_INTERFACE_H_
+#define IH264E_RC_MEM_INTERFACE_H_
+
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+#define FILL_MEMTAB(m_pv_mem_rec, m_j, m_mem_size, m_align, m_type) \
+{ \
+ m_pv_mem_rec[m_j].u4_size = sizeof(iv_mem_rec_t); \
+ m_pv_mem_rec[m_j].u4_mem_size = m_mem_size; \
+ m_pv_mem_rec[m_j].u4_mem_alignment = m_align; \
+ m_pv_mem_rec[m_j].e_mem_type = m_type; \
+}
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum
+{
+ ALIGN_BYTE = 1,
+ ALIGN_WORD16 = 2,
+ ALIGN_WORD32 = 4,
+ ALIGN_WORD64 = 8,
+ ALIGN_128_BYTE = 128
+}ITT_MEM_ALIGNMENT_TYPE_E;
+
+typedef enum
+{
+ SCRATCH = 0,
+ PERSISTENT = 1,
+ WRITEONCE = 2
+}ITT_MEM_USAGE_TYPE_E;
+
+typedef enum
+{
+ L1D = 0,
+ SL2 = 1,
+ DDR = 3
+}ITT_MEM_REGION_E;
+
+typedef enum
+{
+ GET_NUM_MEMTAB = 0,
+ FILL_MEMTAB = 1,
+ USE_BASE = 2,
+ FILL_BASE =3
+}ITT_FUNC_TYPE_E;
+
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/*NOTE : This should be an exact replica of IALG_MemRec, any change in IALG_MemRec
+ must be replicated here*/
+typedef struct
+{
+ /* Size in bytes */
+ UWORD32 u4_size;
+
+ /* Alignment in bytes */
+ WORD32 i4_alignment;
+
+ /* decides which memory region to be placed */
+ ITT_MEM_REGION_E e_mem_region;
+
+ /* memory is scratch or persistent */
+ ITT_MEM_USAGE_TYPE_E e_usage;
+
+ /* Base pointer for allocated memory */
+ void *pv_base;
+} itt_memtab_t;
+
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief This function fills memory record attributes
+*
+* @par Description
+* This function fills memory record attributes
+*
+* @param[in] ps_mem_tab
+* pointer to mem records
+*
+* @param[in] u4_size
+* size of the record
+*
+* @param[in] i4_alignment
+* memory alignment size
+*
+* @param[in] e_usage
+* usage
+*
+* @param[in] e_mem_region
+* mem region
+*
+* @return void
+*
+******************************************************************************
+*/
+void fill_memtab(itt_memtab_t *ps_mem_tab, WORD32 u4_size, WORD32 i4_alignment,
+ ITT_MEM_USAGE_TYPE_E e_usage, ITT_MEM_REGION_E e_mem_region);
+
+/**
+******************************************************************************
+*
+* @brief This function fills memory record attributes
+*
+* @par Description
+* This function fills memory record attributes
+*
+* @param[in] ps_mem_tab
+* pointer to mem records
+*
+* @param[in] ptr_to_be_filled
+* handle to the memory record storage space
+*
+* @param[in] e_func_type
+* enum that dictates fill memory records or use memory records
+*
+* @return void
+*
+******************************************************************************
+*/
+WORD32 use_or_fill_base(itt_memtab_t *ps_mem_tab, void **ptr_to_be_filled,
+ ITT_FUNC_TYPE_E e_func_type);
+
+
+#endif // IH264E_RC_MEM_INTERFACE_H_
+
diff --git a/encoder/irc_picture_type.c b/encoder/irc_picture_type.c
new file mode 100755
index 0000000..186188c
--- /dev/null
+++ b/encoder/irc_picture_type.c
@@ -0,0 +1,1585 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include "stdio.h"
+#include "string.h"
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_cntrl_param.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_picture_type.h"
+#include "irc_trace_support.h"
+
+#define MAX_INTER_FRM_INT 10
+
+/******************************Pic_details ************************************/
+typedef struct
+{
+ /* The id sent by the codec */
+ WORD32 i4_pic_id;
+
+ /* The pics come in, in this order */
+ WORD32 i4_pic_disp_order_no;
+
+ /* I,P,B */
+ picture_type_e e_pic_type;
+
+} pic_details_t;
+
+/**************************Pic_handling structure *****************************/
+typedef struct pic_handling_t
+{
+ /***************************************************************************
+ * Inputs from the codec
+ **************************************************************************/
+
+ /* Number of frames after which an I frame will repeat in display order */
+ WORD32 i4_intra_frm_int;
+
+ /* (num_b_pics_in_subgop + 1) */
+ WORD32 i4_inter_frm_int;
+
+ /* After these many buffered frames, the pics are encoded */
+ WORD32 i4_max_inter_frm_int;
+
+ /* OPEN or CLOSED */
+ WORD32 i4_is_gop_closed;
+
+ /* The pic stack */
+ /* Stack used to store the input pics in encode order */
+ pic_details_t as_pic_stack[MAX_INTER_FRM_INT + 2];
+
+ /***************************************************************************
+ * Counters
+ **************************************************************************/
+
+ /* Decides whether a B or ref pic */
+ WORD32 i4_buf_pic_no;
+
+ /* Current pic's number in displayed, and gets reset after an I-frm */
+ WORD32 i4_pic_disp_order_no;
+
+ /* Number of P frms that have come, in the current gop, so far */
+ WORD32 i4_p_count_in_gop;
+
+ /* Number of B frms that have come, in the current gop, so far */
+ WORD32 i4_b_count_in_gop;
+
+ /* Number of B frms that have come, in the current subgop, so far */
+ WORD32 i4_b_count_in_subgop;
+
+ /***************************************************************************
+ * Indices to the pic stack (Since we store the pics in the encode order,
+ * these vars are modified to meet that)
+ **************************************************************************/
+
+ /* B_PIC index */
+ WORD32 i4_b_pic_idx;
+
+ /* I,P PIC index */
+ WORD32 i4_ref_pic_idx;
+
+ /***************************************************************************
+ * Variables operating on the input pics
+ **************************************************************************/
+
+ /* Flag denoting whether it's the first gop or not */
+ WORD32 i4_is_first_gop;
+
+ /* Number of B_PICs in an incomplete subgop */
+ WORD32 i4_b_in_incomp_subgop;
+
+ /* In CLOSED_GOPs, even if inter_frm_int > 1, there can be 2 continous
+ * P_PICs at the GOP end. This takes values of 0 or 1 */
+ WORD32 i4_extra_p;
+
+ /***************************************************************************
+ * Arrays storing the number of frms in the gop
+ **************************************************************************/
+
+ /* In the steady state, what's the pic distribution in display order */
+ WORD32 i4_frms_in_gop[MAX_PIC_TYPE];
+
+ /*
+ * In case of a change in inter frm int call, the pic distribution in
+ * that gop in display order
+ */
+ WORD32 i4_frms_in_cur_gop[MAX_PIC_TYPE];
+
+ /*
+ * This is used to denote the number of frms remaining to be encoded in the
+ * current gop
+ */
+ WORD32 i4_rem_frms_in_gop[MAX_PIC_TYPE];
+
+ /***************************************************************************
+ * Variables operating on the output pics
+ **************************************************************************/
+
+ /* Counts the frms encoded in a gop */
+ WORD32 i4_coded_pic_no;
+
+ /* Counts from the start of stack to the end repeatedly */
+ WORD32 i4_stack_count;
+
+ /***************************************************************************
+ * Tracking a change in the inputs from the codec
+ **************************************************************************/
+
+ /* A flag that is set when the codec calls for a change in inter_frm_int */
+ WORD32 i4_change_in_inter_frm_int;
+
+ /*
+ * When a change_in_inter_frm_int is called, this stores the new
+ * inter_frm_int
+ */
+ WORD32 i4_new_inter_frm_int;
+
+ /*
+ * When a change_in_inter_frm_int is called in the middle of a gop,this
+ * stores the B_PICs in the incomplete subgop of the mixed gop
+ */
+ WORD32 i4_b_in_incomp_subgop_mix_gop;
+
+ /*
+ * For a CLOSED GOP, when a change_in_inter_frm_int is called in the middle
+ * of a gop,this is a flag denoting if there is an extra P_PIC in the mixed
+ * gop
+ */
+ WORD32 i4_extra_p_mix_gop;
+
+ /* A flag that is set when the codec calls for a change in intra_frm_int */
+ WORD32 i4_change_in_intra_frm_int;
+
+ /*
+ * When a change_in_intra_frm_int is called, this stores the new
+ * intra_frm_int
+ */
+ WORD32 i4_new_intra_frm_int;
+
+ /***************************************************************************
+ * Previous pic_stack_indices & details
+ **************************************************************************/
+ pic_details_t s_prev_pic_details;
+
+ WORD32 i4_prev_b_pic_idx;
+
+ WORD32 i4_last_frm_in_gop;
+
+ WORD32 i4_first_gop_encoded;
+
+ /* NITT TBR */
+ picture_type_e e_previous_pic_type;
+
+ WORD32 i4_force_I_frame;
+
+ WORD32 i4_forced_I_frame_cur_frame;
+
+ WORD32 i4_sum_remaining_frm_in_gop;
+
+ WORD32 i4_mod_temp_ref_cnt;
+
+ WORD32 i4_frames_in_fif_gop;
+
+ WORD32 i4_prev_intra_frame_interval;
+
+} pic_handling_t;
+
+static void irc_update_pic_distbn(pic_handling_t *ps_pic_handling,
+ WORD32 i4_intra_frm_int,
+ WORD32 i4_inter_frm_int,
+ WORD32 i4_gop_boundary);
+
+static void find_pic_distbn_in_gop(WORD32 i4_frms_in_gop[MAX_PIC_TYPE],
+ WORD32 i4_intra_frm_int,
+ WORD32 i4_inter_frm_int,
+ WORD32 i4_is_gop_closed,
+ WORD32 *pi4_b_in_incomp_subgop,
+ WORD32 *pi4_extra_p);
+
+WORD32 irc_pic_handling_num_fill_use_free_memtab(pic_handling_t **pps_pic_handling,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static pic_handling_t s_pic_handling_temp;
+
+ /*
+ * Hack for al alloc, during which we dont have any state memory.
+ * Dereferencing can cause issues
+ */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ {
+ (*pps_pic_handling) = &s_pic_handling_temp;
+ }
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(pic_handling_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**)pps_pic_handling, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+/******************************************************************************
+ Description : initializes the pic handling state struct
+ *****************************************************************************/
+void irc_init_pic_handling(pic_handling_t *ps_pic_handling,
+ WORD32 i4_intra_frm_int,
+ WORD32 i4_max_inter_frm_int,
+ WORD32 i4_is_gop_closed)
+{
+ /* Declarations */
+ WORD32 i;
+
+ /* Checks */
+ /* Codec Parameters */
+ ps_pic_handling->i4_intra_frm_int = i4_intra_frm_int;
+ ps_pic_handling->i4_inter_frm_int = i4_max_inter_frm_int;
+ ps_pic_handling->i4_max_inter_frm_int = i4_max_inter_frm_int;
+ ps_pic_handling->i4_is_gop_closed = i4_is_gop_closed;
+
+ /* Pic_stack */
+ memset(ps_pic_handling->as_pic_stack, 0,
+ sizeof(ps_pic_handling->as_pic_stack));
+ memset(&ps_pic_handling->s_prev_pic_details, 0,
+ sizeof(ps_pic_handling->s_prev_pic_details));
+
+ /* Counters */
+ ps_pic_handling->i4_buf_pic_no = 0;
+ ps_pic_handling->i4_pic_disp_order_no = 0;
+
+ /* Indices to the pic_stack */
+ ps_pic_handling->i4_ref_pic_idx = 0;
+ ps_pic_handling->i4_b_pic_idx = 2;
+ ps_pic_handling->i4_prev_b_pic_idx = 2;
+
+ /* Variables working on the input frames */
+ ps_pic_handling->i4_is_first_gop = 1;
+ ps_pic_handling->i4_p_count_in_gop = 0;
+ ps_pic_handling->i4_b_count_in_gop = 0;
+ ps_pic_handling->i4_b_count_in_subgop = 0;
+
+ /* Variables working on the output frames */
+ ps_pic_handling->i4_coded_pic_no = -1;
+ ps_pic_handling->i4_stack_count = -1;
+
+ /* Tracks the changes in the Codec Parameters */
+ ps_pic_handling->i4_change_in_inter_frm_int = 0;
+ ps_pic_handling->i4_new_inter_frm_int = i4_max_inter_frm_int;
+
+ /* Tracks the changes in the Codec Parameters */
+ ps_pic_handling->i4_change_in_intra_frm_int = 0;
+ ps_pic_handling->i4_new_intra_frm_int = i4_intra_frm_int;
+
+ /* Variables on which the bit allocation is dependent */
+ /* Get the pic distribution in the gop */
+ find_pic_distbn_in_gop(ps_pic_handling->i4_frms_in_gop, i4_intra_frm_int,
+ i4_max_inter_frm_int, i4_is_gop_closed,
+ &ps_pic_handling->i4_b_in_incomp_subgop,
+ &ps_pic_handling->i4_extra_p);
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_frms_in_cur_gop[i] =
+ ps_pic_handling->i4_frms_in_gop[i];
+ ps_pic_handling->i4_rem_frms_in_gop[i] =
+ ps_pic_handling->i4_frms_in_gop[i];
+ }
+
+ ps_pic_handling->e_previous_pic_type = I_PIC;
+ ps_pic_handling->i4_prev_intra_frame_interval = i4_intra_frm_int;
+ ps_pic_handling->i4_force_I_frame = 0;
+ ps_pic_handling->i4_forced_I_frame_cur_frame = 0;
+ ps_pic_handling->i4_sum_remaining_frm_in_gop = 0;
+ ps_pic_handling->i4_mod_temp_ref_cnt = 0;
+
+ ps_pic_handling->i4_b_in_incomp_subgop_mix_gop =
+ ps_pic_handling->i4_b_in_incomp_subgop;
+ ps_pic_handling->i4_extra_p_mix_gop = ps_pic_handling->i4_extra_p;
+
+ ps_pic_handling->i4_last_frm_in_gop = 0;
+ ps_pic_handling->i4_first_gop_encoded = 0;
+ ps_pic_handling->i4_frames_in_fif_gop = 0;
+
+}
+
+/*******************************************************************************
+ * @brief registers the new intra frame interval value
+ ******************************************************************************/
+void irc_pic_handling_register_new_int_frm_interval(pic_handling_t *ps_pic_handling,
+ WORD32 i4_intra_frm_int)
+{
+ ps_pic_handling->i4_change_in_intra_frm_int = 1;
+ ps_pic_handling->i4_new_intra_frm_int = i4_intra_frm_int;
+}
+
+void irc_pic_handling_register_new_inter_frm_interval(pic_handling_t *ps_pic_handling,
+ WORD32 i4_inter_frm_int)
+{
+ /* Update the state structure with the latest values */
+ ps_pic_handling->i4_change_in_inter_frm_int = 1;
+ ps_pic_handling->i4_new_inter_frm_int = i4_inter_frm_int;
+}
+
+static void start_new_gop(pic_handling_t *ps_pic_handling)
+{
+ WORD32 i;
+ WORD32 i4_sum_remaining_frm_in_gop = 0;
+
+ /* Now, the end of gop updates */
+ ps_pic_handling->i4_pic_disp_order_no = 0;
+ ps_pic_handling->i4_buf_pic_no = 0;
+ ps_pic_handling->i4_is_first_gop = 0;
+ ps_pic_handling->i4_extra_p_mix_gop = ps_pic_handling->i4_extra_p;
+
+ if(ps_pic_handling->i4_is_gop_closed)
+ {
+ ps_pic_handling->i4_b_in_incomp_subgop_mix_gop =
+ ps_pic_handling->i4_b_in_incomp_subgop;
+ }
+ /*
+ * Store the number of frames in the gop that is encoded till now
+ * just before Force I frame call is made
+ */
+ ps_pic_handling->i4_frames_in_fif_gop = ps_pic_handling->i4_b_count_in_gop
+ + ps_pic_handling->i4_p_count_in_gop + 1;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ i4_sum_remaining_frm_in_gop += ps_pic_handling->i4_rem_frms_in_gop[i];
+ }
+ ps_pic_handling->i4_sum_remaining_frm_in_gop = i4_sum_remaining_frm_in_gop;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_frms_in_cur_gop[i] =
+ ps_pic_handling->i4_frms_in_gop[i];
+ ps_pic_handling->i4_rem_frms_in_gop[i] =
+ ps_pic_handling->i4_frms_in_cur_gop[i];
+ }
+}
+
+/*******************************************************************************
+ * @brief Fills the pic_stack with the incoming pics in encode order
+ ******************************************************************************/
+void irc_add_pic_to_stack(pic_handling_t *ps_pic_handling, WORD32 i4_enc_pic_id)
+{
+ /* Declarations */
+ WORD32 i4_inter_frm_int, i4_max_inter_frm_int,
+ i4_intra_frm_int, i4_new_inter_frm_int;
+ WORD32 i4_is_gop_closed;
+ WORD32 i4_buf_pic_no, i4_pic_disp_order_no;
+ WORD32 i4_b_pic_idx, i4_ref_pic_idx;
+ WORD32 i4_is_first_gop, i4_b_in_incomp_subgop, i4_p_count_in_gop,
+ i4_b_count_in_gop, i4_b_count_in_subgop;
+ WORD32 i, i4_p_frms_in_prd, i4_b_frms_in_prd,
+ i4_num_b_in_subgop, i4_extra_p;
+ WORD32 i4_condn_for_change_in_inter_frm_int;
+ picture_type_e e_previous_pic_type, e_cur_pic_type;
+ WORD32 i4_force_I_frame;
+
+ /*
+ * Initialize the local vars with the state struct values needed by the
+ * change calls
+ */
+ i4_intra_frm_int = ps_pic_handling->i4_intra_frm_int;
+ i4_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+ i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int;
+ i4_is_gop_closed = ps_pic_handling->i4_is_gop_closed;
+
+ i4_buf_pic_no = ps_pic_handling->i4_buf_pic_no;
+ i4_pic_disp_order_no = ps_pic_handling->i4_pic_disp_order_no;
+ i4_b_count_in_gop = ps_pic_handling->i4_b_count_in_gop;
+ i4_b_frms_in_prd = ps_pic_handling->i4_frms_in_cur_gop[B_PIC];
+ i4_is_first_gop = ps_pic_handling->i4_is_first_gop;
+ i4_new_inter_frm_int = ps_pic_handling->i4_new_inter_frm_int;
+ e_previous_pic_type = ps_pic_handling->e_previous_pic_type;
+ i4_force_I_frame = ps_pic_handling->i4_force_I_frame;
+
+ /* Force I frame :
+ * Two different cases
+ * 1)OPEN_GOP: New GOP is started after number of B pictures in the last
+ * sub gop of a gop to mimic the GOP structure.
+ * 2)Closed GOP:Wait till P frame at input and The frame after a P frame
+ * a new GOP is started to mimic the GOP structure.
+ */
+ if(i4_force_I_frame)
+ {
+ WORD32 i4_temp_is_gop_closed;
+ WORD32 i4_codn = 0;
+ /* A special case of Open GOP where the it behaves like Closed GOP*/
+ if((i4_intra_frm_int % i4_inter_frm_int) == 1)
+ {
+ i4_temp_is_gop_closed = 1;
+ }
+ else
+ {
+ i4_temp_is_gop_closed = i4_is_gop_closed;
+ }
+ /* Get the current picture type to aid decision to force an I frame*/
+ if((i4_buf_pic_no % i4_inter_frm_int)
+ && !(i4_is_gop_closed&& (i4_b_count_in_gop == i4_b_frms_in_prd)))
+ {
+ e_cur_pic_type = B_PIC;
+ }
+ else
+ {
+ if(i4_pic_disp_order_no == 0)
+ {
+ e_cur_pic_type = I_PIC;
+ }
+ else
+ {
+ e_cur_pic_type = P_PIC;
+ }
+ }
+ if((i4_intra_frm_int % i4_inter_frm_int) == 0)
+ {
+ i4_codn = (e_cur_pic_type == P_PIC);
+ }
+ else
+ {
+ i4_codn = (ps_pic_handling->i4_b_count_in_subgop
+ == ps_pic_handling->i4_b_in_incomp_subgop);
+ }
+ if(e_cur_pic_type == I_PIC)
+ {
+ /*
+ * Don't do anything. Resetting the force I frame flag
+ * since the current picture type is already a I frame
+ */
+ i4_force_I_frame = 0;
+ }
+ else if(i4_inter_frm_int == 1)
+ {
+ /*IPP case , Force I frame immediately*/
+ start_new_gop(ps_pic_handling);
+ }
+ else if((!i4_temp_is_gop_closed) && i4_codn)
+ {
+ start_new_gop(ps_pic_handling);
+ if(ps_pic_handling->i4_b_count_in_subgop)
+ {
+ ps_pic_handling->i4_b_pic_idx += 1;
+ ps_pic_handling->i4_b_pic_idx %= (i4_max_inter_frm_int + 1);
+ }
+ }
+ else if(i4_temp_is_gop_closed && (e_previous_pic_type == P_PIC)
+ && (e_cur_pic_type != P_PIC))
+ {
+ start_new_gop(ps_pic_handling);
+ ps_pic_handling->i4_b_pic_idx++;
+ ps_pic_handling->i4_b_pic_idx %= (i4_max_inter_frm_int + 1);
+ }
+ i4_is_first_gop = ps_pic_handling->i4_is_first_gop;
+ }
+
+
+ /***********************CHANGE_INTRA_FRM_INTERVAL**************************
+ *
+ * Call the irc_update_pic_distbn if
+ * 1)Change in intra frm interval flag is set
+ * 2)It's the first B_PIC of a gop
+ */
+ if((ps_pic_handling->i4_change_in_intra_frm_int == 1)
+ && ((i4_pic_disp_order_no == 1)))
+ {
+ irc_update_pic_distbn(ps_pic_handling,
+ ps_pic_handling->i4_new_intra_frm_int,
+ ps_pic_handling->i4_inter_frm_int, 1);
+
+ ps_pic_handling->i4_change_in_intra_frm_int = 0;
+
+ if(ps_pic_handling->i4_new_intra_frm_int == 1)
+ {
+ ps_pic_handling->i4_pic_disp_order_no = 0;
+ }
+ }
+ /*********************CHANGE_INTER_FRM_INTERVAL****************************/
+ /* Call irc_update_pic_distbn if
+ * 1)Change in inter frm interval flag is set
+ * 2)It's the first B_PIC after gop/subgop start, and
+ * 3)The new inter-frm-interval won't cross the intra_frm_interval
+ */
+ if((ps_pic_handling->i4_change_in_inter_frm_int == 1)
+ && ((i4_buf_pic_no % i4_inter_frm_int == 1)
+ || (i4_pic_disp_order_no == 1) || (i4_inter_frm_int == 1)))
+ {
+ /*
+ * Condition which checks if the new inter_frm_int will cross the
+ * intra_frm_int
+ */
+ i4_condn_for_change_in_inter_frm_int = ((i4_pic_disp_order_no
+ + i4_new_inter_frm_int - 1) < i4_intra_frm_int);
+
+ if(i4_condn_for_change_in_inter_frm_int)
+ {
+ /*If the inter_frm_int = 1, then the b_pic_idx needs to be modified */
+ if(i4_inter_frm_int == 1)
+ {
+ ps_pic_handling->i4_b_pic_idx = (1
+ + ps_pic_handling->i4_ref_pic_idx)
+ % (i4_max_inter_frm_int + 1);
+ }
+
+ /*
+ * Depending on the gop/subgop boundary, call the change_inter_frm_int
+ *
+ * TO DO: make a single call, change the name of the fxn to
+ * update_state,
+ * where state = frms_in_gop + b_incomp_subgop + extra_p
+ */
+
+ /* GOP boundary */
+ if(i4_pic_disp_order_no == 1)
+ {
+ irc_update_pic_distbn(ps_pic_handling,
+ ps_pic_handling->i4_intra_frm_int,
+ ps_pic_handling->i4_new_inter_frm_int, 1);
+ }
+ /* Subgop boundary */
+ else
+ {
+ irc_update_pic_distbn(ps_pic_handling,
+ ps_pic_handling->i4_intra_frm_int,
+ ps_pic_handling->i4_new_inter_frm_int, 0);
+ }
+
+ ps_pic_handling->i4_change_in_inter_frm_int = 0;
+ ps_pic_handling->i4_new_inter_frm_int =
+ ps_pic_handling->i4_inter_frm_int;
+ }
+
+ }
+
+ /* Initialize the local vars with the state struct values */
+ i4_buf_pic_no = ps_pic_handling->i4_buf_pic_no;
+ i4_pic_disp_order_no = ps_pic_handling->i4_pic_disp_order_no;
+ i4_b_pic_idx = ps_pic_handling->i4_b_pic_idx;
+ i4_ref_pic_idx = ps_pic_handling->i4_ref_pic_idx;
+ i4_b_in_incomp_subgop = ps_pic_handling->i4_b_in_incomp_subgop_mix_gop;
+ i4_p_count_in_gop = ps_pic_handling->i4_p_count_in_gop;
+ i4_b_count_in_gop = ps_pic_handling->i4_b_count_in_gop;
+ i4_b_count_in_subgop = ps_pic_handling->i4_b_count_in_subgop;
+ i4_p_frms_in_prd = ps_pic_handling->i4_frms_in_cur_gop[P_PIC];
+ i4_b_frms_in_prd = ps_pic_handling->i4_frms_in_cur_gop[B_PIC];
+ i4_extra_p = ps_pic_handling->i4_extra_p_mix_gop;
+ i4_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+ i4_intra_frm_int = ps_pic_handling->i4_intra_frm_int;
+
+ /* Initializing the prev_state vars */
+ ps_pic_handling->i4_prev_b_pic_idx = ps_pic_handling->i4_b_pic_idx;
+
+ i4_num_b_in_subgop = (i4_inter_frm_int - 1);
+
+ /*********************** Fill the stack ***********************************/
+ /* The next part of the code is organized as
+ *
+ * if(B_PIC conditions satisfied)
+ * {
+ * Fill the pic_stack using the b_pic_index
+ * Update the b_pic_index and the other b_pic related vars for the
+ * next B_PIC
+ * }
+ * else
+ * {
+ * if(I_PIC conditions are satisfied)
+ * {
+ * Fill the pic_stack using the ref_pic_index
+ * Update the ref_pic_index and the other ref_pic related vars for the next
+ * I_PIC/P_PIC
+ * }
+ * else
+ * {
+ * Fill the pic_stack using the ref_pic_index
+ * Update the ref_pic_index and the other ref_pic related vars for the next
+ * I_PIC/P_PIC
+ * }
+ * }
+ */
+ /*
+ * Condition for a B_PIC -
+ * 1) Other than the first I_PIC and the periodically appearing P_PICs, after
+ * every inter_frm_int, rest all pics are B_PICs
+ * 2) In case of CLOSED_GOP, the last frame of the gop has to be a P_PIC
+ */
+
+ if((i4_buf_pic_no % i4_inter_frm_int)&& !(i4_is_gop_closed
+ && (i4_b_count_in_gop == i4_b_frms_in_prd))) /**** B_PIC ****/
+ {
+ /* Fill the pic_stack */
+ ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_pic_id = i4_enc_pic_id;
+ ps_pic_handling->as_pic_stack[i4_b_pic_idx].e_pic_type = B_PIC;
+ ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_pic_disp_order_no =
+ i4_pic_disp_order_no;
+
+ /* Store Pic type*/
+ e_previous_pic_type = B_PIC;
+
+ /* Update the prev_pic_details */
+ memcpy(&ps_pic_handling->s_prev_pic_details,
+ &ps_pic_handling->as_pic_stack[i4_b_pic_idx],
+ sizeof(pic_details_t));
+
+ i4_b_count_in_gop++;
+ i4_b_count_in_subgop++;
+
+ /* Update the i4_b_pic_idx */
+ if(!i4_is_gop_closed)
+ {
+ /* If this B_PIC features in one of the complete subgops */
+ if((i4_b_count_in_subgop < i4_num_b_in_subgop)
+ && !(i4_b_count_in_gop == i4_b_frms_in_prd))
+ {
+ i4_b_pic_idx++;
+ }
+ else /* Else if this B_PIC is the last one in a subgop or gop */
+ {
+ /*
+ * If this is the last B_PIC of a GOP, depending on the number
+ * of incomp B_pics in the subgop, there can be either only I
+ * or I,P pics between this and the next B_PIC
+ */
+ if(i4_b_count_in_gop == i4_b_frms_in_prd)
+ {
+ i4_b_pic_idx += (2 + (!i4_b_in_incomp_subgop)); /*Prev*/
+ i4_b_count_in_gop = 0;
+ }
+ /*
+ * For the last B_PIC of a subgop, there's always a P b/w
+ * this & the next B_PIC
+ */
+ else
+ {
+ i4_b_pic_idx += 2;
+ }
+ i4_b_count_in_subgop = 0;
+ }
+ }
+ else
+ {
+ /* For the last B_PIC of a gop
+ * Normally,there will be 3 pics (P,I,P) between this and the next
+ * B_PIC for a CLOSED gop, except when
+ * 1)Number of P_pics in the gop = 1
+ * 2)There is an extra P at the end of the gop
+ */
+ if(i4_b_count_in_gop == i4_b_frms_in_prd)
+ {
+ i4_b_pic_idx += (3 + ((i4_b_in_incomp_subgop == 0)
+ && (i4_p_frms_in_prd> 1)
+ && (i4_pic_disp_order_no
+ != (i4_p_frms_in_prd+ i4_b_frms_in_prd- 1))));
+
+ i4_b_count_in_subgop = 0;
+ }
+ /* For a B_PIC which is not the last one in a subgop */
+ else if(i4_b_count_in_subgop < i4_num_b_in_subgop)
+ {
+ i4_b_pic_idx++;
+ }
+ else /* For the last B_PIC of a subgop */
+ {
+ i4_b_pic_idx += 2;
+ i4_b_count_in_subgop = 0;
+ }
+ }
+ i4_b_pic_idx %= (i4_max_inter_frm_int + 1);
+ }
+ /*********** I or P pic *********/
+ else
+ {
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_id = i4_enc_pic_id;
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_disp_order_no =
+ i4_pic_disp_order_no;
+ /* Store Pic type*/
+ e_previous_pic_type = I_PIC;
+
+ /**** I_PIC ****/
+ if(i4_pic_disp_order_no == 0)
+ {
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].e_pic_type = I_PIC;
+
+ /* Update the prev_pic_details */
+ memcpy(&ps_pic_handling->s_prev_pic_details,
+ &ps_pic_handling->as_pic_stack[i4_ref_pic_idx],
+ sizeof(pic_details_t));
+ /*
+ * In case of an I-frame depending on OPEN or CLOSED gop,
+ * the ref_pic_idx changes
+ */
+ if((!i4_is_gop_closed) && (i4_is_first_gop == 0))
+ {
+ if((i4_p_frms_in_prd <= 1) && (i4_b_in_incomp_subgop == 0))
+ {
+ i4_ref_pic_idx++;
+ }
+ /*
+ * From the 2nd gop onwards, the I and first P frame are
+ * separated by the num_b_in_incomp_subgop
+ */
+ else
+ {
+ i4_ref_pic_idx += (i4_b_in_incomp_subgop + 1);
+ }
+
+ ps_pic_handling->i4_b_in_incomp_subgop_mix_gop =
+ ps_pic_handling->i4_b_in_incomp_subgop;
+ }
+ else
+ {
+ i4_ref_pic_idx++;
+ }
+
+ i4_b_count_in_gop = 0;
+ i4_p_count_in_gop = 0;
+ i4_b_count_in_subgop = 0;
+
+ }
+ /**** P_PIC ****/
+ else
+ {
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].e_pic_type = P_PIC;
+ /* Store Pic type*/
+ e_previous_pic_type = P_PIC;
+
+ /* Update the prev_pic_details */
+ memcpy(&ps_pic_handling->s_prev_pic_details,
+ &ps_pic_handling->as_pic_stack[i4_ref_pic_idx],
+ sizeof(pic_details_t));
+
+ i4_p_count_in_gop++;
+ ps_pic_handling->i4_prev_intra_frame_interval = i4_intra_frm_int;
+
+ /*
+ * In case of an P-frame depending on OPEN or CLOSED gop, the
+ * ref_pic_idx changes
+ */
+ if(i4_is_gop_closed && (i4_p_count_in_gop == i4_p_frms_in_prd))
+ {
+ /*
+ * For the last P_PIC in a gop, if extra_p or incomp_b are
+ * present, the number of such pics between this and the next
+ * ref_pic is (i4_b_in_incomp_subgop + 1)
+ */
+ if((i4_p_count_in_gop > 1)
+ && (i4_b_in_incomp_subgop || i4_extra_p))
+ {
+ i4_ref_pic_idx += (i4_b_in_incomp_subgop + 1);
+ }
+ else
+ {
+ i4_ref_pic_idx += i4_inter_frm_int;
+ }
+ }
+ else
+ {
+ i4_ref_pic_idx += i4_inter_frm_int;
+ }
+ }
+
+ i4_ref_pic_idx %= (i4_max_inter_frm_int + 1);
+ }
+
+ /* Update those variables working on the input frames */
+ i4_pic_disp_order_no++;
+ i4_buf_pic_no++;
+
+ /* For any gop */
+ if(ps_pic_handling->i4_pic_disp_order_no
+ == (i4_max_inter_frm_int - 1- ((!i4_is_gop_closed)
+ * ps_pic_handling->i4_b_in_incomp_subgop_mix_gop)))
+ {
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_rem_frms_in_gop[i] =
+ ps_pic_handling->i4_frms_in_cur_gop[i];
+ }
+
+ if((!i4_is_gop_closed) && (i4_is_first_gop)
+ && (ps_pic_handling->i4_rem_frms_in_gop[B_PIC]
+ > ps_pic_handling->i4_b_in_incomp_subgop_mix_gop))
+ {
+ ps_pic_handling->i4_rem_frms_in_gop[B_PIC] =
+ ps_pic_handling->i4_frms_in_cur_gop[B_PIC]
+ - ps_pic_handling->i4_b_in_incomp_subgop_mix_gop;
+ }
+ }
+
+ /* End of GOP updates */
+ if(i4_pic_disp_order_no == (i4_p_frms_in_prd + i4_b_frms_in_prd + 1))
+ {
+ /* Now, the end of gop updates */
+ i4_pic_disp_order_no = 0;
+ i4_buf_pic_no = 0;
+ i4_is_first_gop = 0;
+ ps_pic_handling->i4_extra_p_mix_gop = ps_pic_handling->i4_extra_p;
+
+ if(i4_is_gop_closed)
+ {
+ ps_pic_handling->i4_b_in_incomp_subgop_mix_gop =
+ ps_pic_handling->i4_b_in_incomp_subgop;
+ }
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_frms_in_cur_gop[i] =
+ ps_pic_handling->i4_frms_in_gop[i];
+ }
+ }
+
+ /* Updating the vars which work on the encoded pics */
+ /* For the first gop */
+ if(((ps_pic_handling->i4_is_first_gop)
+ && (ps_pic_handling->i4_pic_disp_order_no
+ == (i4_max_inter_frm_int - 1)))
+ || (i4_intra_frm_int == 1))
+ {
+ ps_pic_handling->i4_coded_pic_no = 0;
+ ps_pic_handling->i4_stack_count = 0;
+ }
+
+ /* Update the state struct with the modifiable local vars */
+ ps_pic_handling->i4_buf_pic_no = i4_buf_pic_no;
+ ps_pic_handling->i4_pic_disp_order_no = i4_pic_disp_order_no;
+ ps_pic_handling->i4_b_pic_idx = i4_b_pic_idx;
+ ps_pic_handling->i4_ref_pic_idx = i4_ref_pic_idx;
+ ps_pic_handling->i4_is_first_gop = i4_is_first_gop;
+ ps_pic_handling->i4_p_count_in_gop = i4_p_count_in_gop;
+ ps_pic_handling->i4_b_count_in_gop = i4_b_count_in_gop;
+ ps_pic_handling->i4_b_count_in_subgop = i4_b_count_in_subgop;
+ ps_pic_handling->e_previous_pic_type = e_previous_pic_type;
+ ps_pic_handling->i4_force_I_frame = i4_force_I_frame;
+}
+
+/*******************************************************************************
+ * @brief Returns the picture type, ip and display order number for the frame to
+ * be encoded
+ ******************************************************************************/
+void irc_get_pic_from_stack(pic_handling_t *ps_pic_handling,
+ WORD32 *pi4_pic_id,
+ WORD32 *pi4_pic_disp_order_no,
+ picture_type_e *pe_pic_type)
+{
+ pic_details_t s_pic_details;
+ pic_details_t *ps_pic_details = &s_pic_details;
+
+ if(ps_pic_handling->i4_stack_count < 0)
+ {
+ ps_pic_details->e_pic_type = BUF_PIC;
+ ps_pic_details->i4_pic_disp_order_no = -1;
+ ps_pic_details->i4_pic_id = -1;
+ }
+ else
+ {
+ memcpy(ps_pic_details,
+ &ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count],
+ sizeof(pic_details_t));
+
+ /* Force I frame updations */
+ if((ps_pic_handling->i4_force_I_frame == 1)
+ && (ps_pic_details->e_pic_type == I_PIC))
+ {
+ /* Flag to signal change in remaining bits*/
+ ps_pic_handling->i4_forced_I_frame_cur_frame = 1;
+ ps_pic_handling->i4_force_I_frame = 0;
+ /*
+ * Indicates count for no. of Pictures whose temporal reference
+ * has to be modified
+ * in the new GOP
+ */
+ ps_pic_handling->i4_mod_temp_ref_cnt =
+ ps_pic_handling->i4_b_in_incomp_subgop + 1;
+ ps_pic_handling->i4_first_gop_encoded = 1;
+ }
+
+ /*
+ * In MPEG2, the temporal reference of the first displayed frame in a
+ * gop is 0.In case of an OPEN_GOP, the B_PICs of the last subgop in a
+ * gop, maybe coded as a part of the next gop. Hence, in such conditions
+ * the pic_disp_order needs to be modified so that it gives an
+ * indication of the temporal reference
+ */
+ if((!ps_pic_handling->i4_is_gop_closed)
+ && (ps_pic_handling->i4_first_gop_encoded))
+ {
+ if(!ps_pic_handling->i4_mod_temp_ref_cnt)
+ {
+ ps_pic_details->i4_pic_disp_order_no =
+ (ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].i4_pic_disp_order_no
+ + ps_pic_handling->i4_b_in_incomp_subgop)
+ % (ps_pic_handling->i4_prev_intra_frame_interval);
+
+ }
+ else
+ {
+ /*
+ * due to force I frame First frame will have only
+ * ps_pic_handling->i4_frames_in_fif_gop number of frames
+ */
+ ps_pic_details->i4_pic_disp_order_no =
+ (ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].i4_pic_disp_order_no
+ + ps_pic_handling->i4_b_in_incomp_subgop)
+ % (ps_pic_handling->i4_frames_in_fif_gop);
+ ps_pic_handling->i4_mod_temp_ref_cnt--;
+ }
+ }
+ }
+
+ /* Giving this to the Codec */
+ *pi4_pic_id = s_pic_details.i4_pic_id;
+ *pi4_pic_disp_order_no = s_pic_details.i4_pic_disp_order_no;
+ *pe_pic_type = s_pic_details.e_pic_type;
+}
+
+/*******************************************************************************
+ * @brief Updates the picture handling state whenever there is changes in input
+ * parameter
+ *
+ ******************************************************************************/
+static void irc_update_pic_distbn(pic_handling_t *ps_pic_handling,
+ WORD32 i4_intra_frm_int,
+ WORD32 i4_inter_frm_int,
+ WORD32 i4_gop_boundary)
+{
+ /* Declarations */
+ WORD32 i4_is_gop_closed;
+ WORD32 i, i4_prev_inter_frm_int, i4_max_inter_frm_int, i4_pic_disp_order_no;
+ WORD32 i4_b_in_incomp_subgop, i4_extra_p,
+ i4_b_in_incomp_subgop_mix_gop,i4_extra_p_mix_gop;
+ WORD32 i4_pb_frms_till_prev_p;
+ WORD32 ai4_diff_in_frms[MAX_PIC_TYPE];
+
+ /* Initialize the local vars from the state struct */
+ i4_is_gop_closed = ps_pic_handling->i4_is_gop_closed;
+ i4_prev_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+ i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int;
+ i4_b_in_incomp_subgop = ps_pic_handling->i4_b_in_incomp_subgop;
+ i4_extra_p = ps_pic_handling->i4_extra_p;
+ i4_b_in_incomp_subgop_mix_gop =
+ ps_pic_handling->i4_b_in_incomp_subgop_mix_gop;
+ i4_extra_p_mix_gop = ps_pic_handling->i4_extra_p_mix_gop;
+ i4_pic_disp_order_no = ps_pic_handling->i4_pic_disp_order_no;
+
+ i4_pb_frms_till_prev_p = (ps_pic_handling->i4_p_count_in_gop
+ * i4_prev_inter_frm_int);
+
+ /* Check for the validity of the intra_frm_int */
+ if(i4_intra_frm_int <= 0)
+ {
+ i4_intra_frm_int = ps_pic_handling->i4_intra_frm_int;
+ }
+ /* Check for the validity of the inter_frm_int */
+ if((i4_inter_frm_int > i4_max_inter_frm_int) || (i4_inter_frm_int < 0))
+ {
+ i4_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+ }
+
+ /* Keep a copy of the older frms_in_gop */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ai4_diff_in_frms[i] = ps_pic_handling->i4_frms_in_cur_gop[i];
+ }
+
+ /* Update all the variables which are calculated from the inter_frm_int */
+
+ /* Get the new pic distribution in the gop */
+ find_pic_distbn_in_gop(ps_pic_handling->i4_frms_in_gop, i4_intra_frm_int,
+ i4_inter_frm_int, i4_is_gop_closed,
+ &i4_b_in_incomp_subgop, &i4_extra_p);
+
+ /* Find the other related variables */
+ if(i4_gop_boundary == 0)
+ {
+ /*
+ * Since, the inter frame interval has changed between a gop the
+ * current gop will be a mixed gop. So, we need to find the values of
+ * the related variables
+ */
+ find_pic_distbn_in_gop(ps_pic_handling->i4_frms_in_cur_gop,
+ (i4_intra_frm_int - i4_pb_frms_till_prev_p),
+ i4_inter_frm_int, i4_is_gop_closed,
+ &i4_b_in_incomp_subgop_mix_gop,
+ &i4_extra_p_mix_gop);
+
+ ps_pic_handling->i4_frms_in_cur_gop[P_PIC] +=
+ ps_pic_handling->i4_p_count_in_gop;
+ ps_pic_handling->i4_frms_in_cur_gop[B_PIC] +=
+ ps_pic_handling->i4_b_count_in_gop;
+ }
+ else
+ {
+ /*
+ * Since, the inter_frm_interval has changed at a gop boundary, the
+ * new gop will have all the subgops with the new inter_frm_interval
+ */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_frms_in_cur_gop[i] =
+ ps_pic_handling->i4_frms_in_gop[i];
+ }
+
+ i4_b_in_incomp_subgop_mix_gop = i4_b_in_incomp_subgop;
+ i4_extra_p_mix_gop = i4_extra_p;
+ }
+
+ /* For bit-allocation the rem_frms_in_gop need to be updated */
+ /* Checks needed:
+ 1) If the encoding is happening on the same gop as that of the buffering */
+ if(ps_pic_handling->i4_pic_disp_order_no
+ >= (i4_max_inter_frm_int - 1- ((!i4_is_gop_closed)
+ * ps_pic_handling->i4_b_in_incomp_subgop_mix_gop)))
+ {
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_rem_frms_in_gop[i] +=
+ (ps_pic_handling->i4_frms_in_cur_gop[i]
+ - ai4_diff_in_frms[i]);
+ }
+ }
+
+ /* Update the vars which will affect the proper filling of the pic_stack */
+ if(i4_pic_disp_order_no == 0) /*Check if redundant*/
+ {
+ ps_pic_handling->i4_buf_pic_no = 0;
+ }
+ else
+ {
+ ps_pic_handling->i4_buf_pic_no = 1;
+ }
+
+ ps_pic_handling->i4_b_count_in_subgop = 0;
+
+ /* Update the state struct with the new inter_frm_int */
+ ps_pic_handling->i4_inter_frm_int = i4_inter_frm_int;
+ ps_pic_handling->i4_intra_frm_int = i4_intra_frm_int;
+ ps_pic_handling->i4_b_in_incomp_subgop = i4_b_in_incomp_subgop;
+ ps_pic_handling->i4_extra_p = i4_extra_p;
+ ps_pic_handling->i4_b_in_incomp_subgop_mix_gop =
+ i4_b_in_incomp_subgop_mix_gop;
+ ps_pic_handling->i4_extra_p_mix_gop = i4_extra_p_mix_gop;
+
+}
+
+/* *****************************************************************************
+ * @brief Distributes the frames as I, P and B based on intra/inter frame interval.
+ * Along with it it fills the number of frames in sub-gop and extra p frame
+ *
+ ******************************************************************************/
+static void find_pic_distbn_in_gop(WORD32 i4_frms_in_gop[MAX_PIC_TYPE],
+ WORD32 i4_intra_frm_int,
+ WORD32 i4_inter_frm_int,
+ WORD32 i4_is_gop_closed,
+ WORD32 *pi4_b_in_incomp_subgop,
+ WORD32 *pi4_extra_p)
+{
+ /*
+ * Find the pic distribution in the gop depending on the inter and intra
+ * frm intervals
+ */
+ i4_frms_in_gop[I_PIC] = 1;
+
+ /* All I frames */
+ if(i4_intra_frm_int == 1)
+ {
+ i4_frms_in_gop[P_PIC] = 0;
+ i4_frms_in_gop[B_PIC] = 0;
+ *pi4_b_in_incomp_subgop = 0;
+ *pi4_extra_p = 0;
+ }
+ else
+ {
+ if(i4_is_gop_closed)
+ {
+ i4_frms_in_gop[P_PIC] = ((i4_intra_frm_int - 2) / i4_inter_frm_int)
+ + 1;
+
+ if((((i4_intra_frm_int - 2) / i4_inter_frm_int) * i4_inter_frm_int)
+ == (i4_intra_frm_int - 2))
+ {
+ *pi4_extra_p = 1;
+ }
+ else
+ {
+ *pi4_extra_p = 0;
+ }
+ }
+ else
+ {
+ i4_frms_in_gop[P_PIC] = ((i4_intra_frm_int - 1) / i4_inter_frm_int);
+
+ *pi4_extra_p = 0;
+ }
+
+ i4_frms_in_gop[B_PIC] = (i4_intra_frm_int - 1 - i4_frms_in_gop[P_PIC]);
+
+ *pi4_b_in_incomp_subgop = (i4_frms_in_gop[B_PIC] - (i4_inter_frm_int - 1)
+ * ((i4_intra_frm_int - 1)/ i4_inter_frm_int));
+ }
+}
+
+WORD32 irc_pic_type_get_intra_frame_interval(pic_handling_t *ps_pic_handling)
+{
+
+ return (ps_pic_handling->i4_intra_frm_int);
+}
+
+WORD32 irc_pic_type_get_inter_frame_interval(pic_handling_t *ps_pic_handling)
+{
+ return (ps_pic_handling->i4_inter_frm_int);
+}
+
+void irc_pic_type_get_rem_frms_in_gop(pic_handling_t *ps_pic_handling,
+ WORD32 ai4_rem_frms_in_gop[MAX_PIC_TYPE])
+{
+ memcpy(ai4_rem_frms_in_gop, ps_pic_handling->i4_rem_frms_in_gop,
+ sizeof(ps_pic_handling->i4_rem_frms_in_gop));
+}
+
+WORD32 irc_pic_type_get_frms_in_gop_force_I_frm(pic_handling_t *ps_pic_handling)
+{
+ return (ps_pic_handling->i4_frames_in_fif_gop);
+}
+
+void irc_pic_type_get_frms_in_gop(pic_handling_t *ps_pic_handling,
+ WORD32 ai4_frms_in_gop[MAX_PIC_TYPE])
+{
+ memcpy(ai4_frms_in_gop, ps_pic_handling->i4_frms_in_cur_gop,
+ sizeof(ps_pic_handling->i4_frms_in_cur_gop));
+}
+
+WORD32 irc_pic_type_get_disp_order_no(pic_handling_t *ps_pic_handling)
+{
+ return (ps_pic_handling->i4_pic_disp_order_no);
+}
+
+void irc_set_force_I_frame_flag(pic_handling_t *ps_pic_handling)
+{
+ ps_pic_handling->i4_force_I_frame = 1;
+}
+WORD32 irc_get_forced_I_frame_cur_frm_flag(pic_handling_t *ps_pic_handling)
+{
+ return (ps_pic_handling->i4_forced_I_frame_cur_frame);
+}
+void irc_reset_forced_I_frame_cur_frm_flag(pic_handling_t *ps_pic_handling)
+{
+ ps_pic_handling->i4_forced_I_frame_cur_frame = 0;
+}
+
+/******************************************************************************/
+/* Functions that work on the encoded frames */
+/******************************************************************************/
+
+/******************************************************************************
+ Function Name : irc_update_pic_handling
+ Description : Will be called only for the frames to be encoded
+ *****************************************************************************/
+void irc_update_pic_handling(pic_handling_t *ps_pic_handling,
+ picture_type_e e_pic_type)
+{
+
+ WORD32 i4_max_inter_frm_int;
+ WORD32 i;
+
+ /* Initializing the local vars with that of the state struct */
+ i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int;
+
+ /* Update the variables working on the output frames */
+ /* Update the stack count */
+ ps_pic_handling->i4_stack_count++;
+
+ if(ps_pic_handling->i4_stack_count == (i4_max_inter_frm_int + 1))
+ {
+ ps_pic_handling->i4_stack_count = 0;
+ }
+
+ /* Update the rem_frms_in_gop */
+ ps_pic_handling->i4_rem_frms_in_gop[e_pic_type]--;
+
+ /* Assumption : Rem_frms_in_gop needs to be taken care of, for every change in frms */
+ ps_pic_handling->i4_last_frm_in_gop = 0;
+ if((ps_pic_handling->i4_rem_frms_in_gop[I_PIC] <= 0)
+ && (ps_pic_handling->i4_rem_frms_in_gop[P_PIC] <= 0)
+ && (ps_pic_handling->i4_rem_frms_in_gop[B_PIC] <= 0))
+ {
+ /* Copy the cur_frms_in_gop to the rem_frm_in_gop */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_rem_frms_in_gop[i] =
+ ps_pic_handling->i4_frms_in_cur_gop[i];
+ }
+
+ ps_pic_handling->i4_last_frm_in_gop = 1;
+ ps_pic_handling->i4_first_gop_encoded = 1;
+ }
+}
+
+WORD32 irc_is_last_frame_in_gop(pic_handling_handle ps_pic_handling)
+{
+ return (ps_pic_handling->i4_last_frm_in_gop);
+}
+
+/******************************************************************************
+ Function Name : irc_skip_encoded_frame
+ Description : Needs to go to the current pic in the pic_stack.
+ If it's B_PIC don't do anything
+ If it's a reference picture, push all but the last B_PICs
+ in the current subgop one place down (i.e. just copy their
+ pic_details) and move the last B_PIC in that subgop to the
+ next slot of the skipped picture and convert it's pic_type
+ to that of the reference picture
+ *****************************************************************************/
+void irc_skip_encoded_frame(pic_handling_t *ps_pic_handling,
+ picture_type_e e_pic_type)
+{
+ pic_details_t s_pic_details;
+ WORD32 i4_stack_count, i4_next_ref_pic_idx, i4_pic_idx;
+ WORD32 i4_max_inter_frm_int, i4_last_b_pic_idx, i4_first_b_pic_idx;
+ WORD32 i4_next_pic_idx;
+
+ /* State variables used to initialize the local vars (Not to be changed) */
+ i4_stack_count = ps_pic_handling->i4_stack_count;
+ i4_next_ref_pic_idx = ps_pic_handling->i4_ref_pic_idx;
+ i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int;
+
+ i4_next_pic_idx = ((i4_stack_count + 1) % (i4_max_inter_frm_int + 1));
+
+ /*
+ * Check what is the encoded frm_type
+ * Changing a B_PIC to a ref_pic is not reqd if
+ * there are no B_PICs referring from the skipped ref_pic
+ */
+ if(((e_pic_type == P_PIC) || (e_pic_type == I_PIC))
+ && (i4_next_pic_idx != i4_next_ref_pic_idx))
+ {
+ /* Go to the last B_PIC before the next_ref_pic */
+ if(i4_next_ref_pic_idx == 0)
+ {
+ i4_last_b_pic_idx = i4_max_inter_frm_int;
+ }
+ else
+ {
+ i4_last_b_pic_idx = (i4_next_ref_pic_idx - 1);
+ }
+
+ /* Keep a copy of the last B_PIC pic_details */
+ memcpy(&s_pic_details,
+ &ps_pic_handling->as_pic_stack[i4_last_b_pic_idx],
+ sizeof(pic_details_t));
+
+ i4_pic_idx = i4_last_b_pic_idx;
+ i4_first_b_pic_idx = (i4_stack_count + 1) % (i4_max_inter_frm_int + 1);
+
+ /*
+ * All the B_PICs other than the last one, need to be shifted one place
+ * in the stack
+ */
+ while((i4_pic_idx != i4_stack_count)
+ && (i4_first_b_pic_idx != i4_last_b_pic_idx))
+ {
+ if(i4_pic_idx == 0)
+ {
+ i4_pic_idx = i4_max_inter_frm_int;
+ }
+ else
+ {
+ i4_pic_idx--;
+ }
+
+ memcpy(&ps_pic_handling->as_pic_stack[(i4_pic_idx + 1)
+ % (i4_max_inter_frm_int + 1)],
+ &ps_pic_handling->as_pic_stack[i4_pic_idx],
+ sizeof(pic_details_t));
+
+ }
+
+ /*
+ * Copy the last B_PIC pic_details to the first B_PIC place and change
+ * it's pic type to the ref_PIC
+ */
+ /*e_ref_pic_type*/
+ ps_pic_handling->as_pic_stack[i4_first_b_pic_idx].e_pic_type = P_PIC;
+
+ ps_pic_handling->as_pic_stack[i4_first_b_pic_idx].i4_pic_disp_order_no =
+ s_pic_details.i4_pic_disp_order_no;
+ ps_pic_handling->as_pic_stack[i4_first_b_pic_idx].i4_pic_id =
+ s_pic_details.i4_pic_id;
+
+ /* Change the rem_frms_in_prd so that the update works properly */
+ if(ps_pic_handling->i4_rem_frms_in_gop[B_PIC] > 0)
+ {
+ ps_pic_handling->i4_rem_frms_in_gop[B_PIC]--;
+ ps_pic_handling->i4_rem_frms_in_gop[P_PIC]++;
+ }
+ }
+
+}
+
+/******************************************************************************
+ Function Name : flush_frame
+ Description : Since when a flush frame is called, there will be no valid
+ frames after it, the last frame cannot be a B_PIC, as there
+ will be no reference frame for it (Input in display order)
+
+ So,this fxn needs to go to the last added pic in the pic_stack.
+ If it's reference pic don't do anything
+ If it's a B_PIC, copy it's pic_details and put it in the
+ place of the next reference pic, changing the pic_type to
+ P_PIC
+ *****************************************************************************/
+void irc_flush_frame_from_pic_stack(pic_handling_t *ps_pic_handling)
+{
+
+ pic_details_t s_prev_pic_details;
+
+ /* Get the last entered pic_details (not to be modified here) */
+ WORD32 i4_prev_b_pic_idx = ps_pic_handling->i4_prev_b_pic_idx;
+ WORD32 i4_ref_pic_idx = ps_pic_handling->i4_ref_pic_idx;
+ WORD32 i4_b_pic_idx = ps_pic_handling->i4_b_pic_idx;
+
+ memcpy(&s_prev_pic_details, &ps_pic_handling->s_prev_pic_details,
+ sizeof(pic_details_t));
+
+ if(s_prev_pic_details.e_pic_type == B_PIC)
+ {
+ /* Copy the last B_PIC details to the next reference pic in display order */
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_disp_order_no =
+ s_prev_pic_details.i4_pic_disp_order_no;
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_id =
+ s_prev_pic_details.i4_pic_id;
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].e_pic_type = P_PIC;
+
+ /*
+ * Modify the last B_PIC pic_type, so that codec gets to know when
+ * all the buffered frames
+ * are flushed
+ */
+ ps_pic_handling->as_pic_stack[i4_prev_b_pic_idx].e_pic_type =
+ MAX_PIC_TYPE;
+ ps_pic_handling->as_pic_stack[i4_prev_b_pic_idx].i4_pic_id = -1;
+ ps_pic_handling->as_pic_stack[i4_prev_b_pic_idx].i4_pic_disp_order_no =
+ -1;
+ }
+ else
+ {
+ /*
+ * Modify the next pic_type details in the stack, so that codec gets to
+ * know when all the
+ * buffered frames are flushed
+ */
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].e_pic_type = MAX_PIC_TYPE;
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_id = -1;
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_disp_order_no = -1;
+
+ if(ps_pic_handling->i4_inter_frm_int != 1)
+ {
+ ps_pic_handling->as_pic_stack[i4_b_pic_idx].e_pic_type =
+ MAX_PIC_TYPE;
+ ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_pic_id = -1;
+ ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_pic_disp_order_no =
+ -1;
+ }
+ }
+}
+
+/******************************************************************************
+ Function Name : irc_add_pic_to_stack_re_enc
+ Description : In case of a re-enc, we can assume the pictures to be coming
+ in the encode order.
+ In case of re-encoder basically, there are 2 problematic cases.
+ 1)Inter_frm_int is not known to start with
+ 2)Inter_frm_int can keep changing
+ 3)Intra_frm_int set by the application and that actually in the
+ decoded bitstream may be different
+ *****************************************************************************/
+WORD32 irc_add_pic_to_stack_re_enc(pic_handling_t *ps_pic_handling,
+ WORD32 i4_enc_pic_id,
+ picture_type_e e_pic_type)
+{
+ WORD32 i4_b_count_in_subgop;
+ WORD32 i4_max_inter_frm_int, i4_inter_frm_int, i4_intra_frm_int;
+ WORD32 i4_pic_disp_order_no;
+ WORD32 i4_is_gop_closed;
+ picture_type_e e_out_pic_type;
+ WORD32 i4_b_in_incomp_subgop;
+
+ /* Check if a change in intra_frm_int call has been made */
+ if(ps_pic_handling->i4_change_in_intra_frm_int == 1)
+ {
+ irc_update_pic_distbn(ps_pic_handling,
+ ps_pic_handling->i4_new_intra_frm_int,
+ ps_pic_handling->i4_inter_frm_int, 1);
+ ps_pic_handling->i4_change_in_intra_frm_int = 0;
+ }
+
+ /* Check if a change in inter_frm_int call has been made */
+ if(ps_pic_handling->i4_change_in_inter_frm_int == 1)
+ {
+ irc_update_pic_distbn(ps_pic_handling,
+ ps_pic_handling->i4_intra_frm_int,
+ ps_pic_handling->i4_new_inter_frm_int, 1);
+
+ ps_pic_handling->i4_change_in_inter_frm_int = 0;
+ }
+
+ /* Initialize the local vars with the state vars */
+ i4_b_count_in_subgop = ps_pic_handling->i4_b_count_in_subgop;
+ i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int;
+ i4_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+ i4_intra_frm_int = ps_pic_handling->i4_intra_frm_int;
+ i4_pic_disp_order_no = ps_pic_handling->i4_pic_disp_order_no;
+ i4_is_gop_closed = ps_pic_handling->i4_is_gop_closed;
+ i4_b_in_incomp_subgop = ps_pic_handling->i4_b_in_incomp_subgop;
+
+ e_out_pic_type = e_pic_type;
+
+ /* Initially the rate_control assumes an IPP sequence */
+ if(e_pic_type == B_PIC)
+ {
+ /* Update the number of B_PICs in a subgop */
+ i4_b_count_in_subgop++;
+
+ if(i4_b_count_in_subgop > i4_max_inter_frm_int)
+ {
+ return (-1);
+ }
+
+ /* If the number of B_PICs exceed the set inter_frm_int then
+ change the inter_frm_int */
+ if(i4_b_count_in_subgop > (i4_inter_frm_int - 1))
+ {
+ i4_inter_frm_int = (i4_b_count_in_subgop + 1);
+
+ irc_update_pic_distbn(ps_pic_handling, i4_intra_frm_int,
+ i4_inter_frm_int, 0);
+ }
+ }
+ else if((e_pic_type == I_PIC) || (e_pic_type == P_PIC))
+ {
+ /* If the B_PICs in the prev subgop were fewer than the current
+ * (inter_frm_int-1) and none of these conditions occur, it'll mean the
+ * decrease in the inter_frm_int
+ * 1)End of a GOP
+ * 2)Beginning of an OPEN_GOP
+ */
+ if((i4_b_count_in_subgop < (i4_inter_frm_int - 1))
+ && !((!i4_is_gop_closed)
+ && (i4_b_count_in_subgop
+ >= i4_b_in_incomp_subgop))
+ && !((i4_pic_disp_order_no
+ + (i4_inter_frm_int - 1
+ - i4_b_count_in_subgop))
+ > i4_intra_frm_int))
+ {
+ i4_inter_frm_int = (i4_b_count_in_subgop + 1);
+
+ irc_update_pic_distbn(ps_pic_handling, i4_intra_frm_int,
+ i4_inter_frm_int, 0);
+ }
+
+ /* Reset the number of B_PICs in a subgop */
+ i4_b_count_in_subgop = 0;
+ }
+
+ /* Updation of the frame level vars */
+ i4_pic_disp_order_no++;
+
+ /* End of gop condition
+ *Two cases can arise :
+ *1) The intra_frm_int set by the application is greater than the actual
+ * bitstream intra_frm_int (i.e. we will get an I frame before
+ * pic_disp_order_no goes to intra_frm_int)
+ *2) The intra_frm_int set by the application is smaller than the actual bitstream intra_frm_int
+ * (i.e. we won't get an I_PIC even if pic_disp_order_no goes to
+ * intra_frm_int) Constraints :
+ * 1) I_PIC cannot be changed to B_PIC
+ * 2) B_PIC cannot be changed to I_PIC
+ */
+ if(i4_pic_disp_order_no >= i4_intra_frm_int)
+ {
+ if(e_pic_type != B_PIC)
+ {
+ e_out_pic_type = I_PIC;
+ }
+ else
+ {
+ e_out_pic_type = B_PIC;
+ ps_pic_handling->i4_rem_frms_in_gop[B_PIC]++;
+ ps_pic_handling->i4_frms_in_cur_gop[B_PIC]++;
+ ps_pic_handling->i4_frms_in_gop[B_PIC]++;
+ }
+ }
+ else
+ {
+ if((e_pic_type == I_PIC) && (!ps_pic_handling->i4_is_first_gop))
+ {
+ e_out_pic_type = P_PIC;
+ ps_pic_handling->i4_rem_frms_in_gop[P_PIC]++;
+ ps_pic_handling->i4_frms_in_cur_gop[P_PIC]++;
+ ps_pic_handling->i4_frms_in_gop[P_PIC]++;
+ }
+ else
+ {
+ e_out_pic_type = e_pic_type;
+ }
+ }
+
+ /* Update the frm_vars at the end of the gop */
+ if(i4_pic_disp_order_no
+ == (ps_pic_handling->i4_frms_in_cur_gop[P_PIC]
+ + ps_pic_handling->i4_frms_in_cur_gop[B_PIC]
+ + 1))
+ {
+ i4_pic_disp_order_no = 0;
+ ps_pic_handling->i4_is_first_gop = 0;
+ }
+
+ /* Update the vars working on the encoded pics */
+ if((ps_pic_handling->i4_is_first_gop)
+ && (ps_pic_handling->i4_stack_count == -1))
+ {
+ ps_pic_handling->i4_coded_pic_no = 0;
+ ps_pic_handling->i4_stack_count = 0;
+ }
+
+ /* Add the pic_details to the pic_stack */
+ ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].e_pic_type =
+ e_out_pic_type;
+ ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].i4_pic_disp_order_no =
+ ps_pic_handling->i4_pic_disp_order_no;
+ ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].i4_pic_id =
+ i4_enc_pic_id;
+
+ /* Writing back those values which need to be updated */
+ ps_pic_handling->i4_inter_frm_int = i4_inter_frm_int;
+ ps_pic_handling->i4_pic_disp_order_no = i4_pic_disp_order_no;
+ ps_pic_handling->i4_b_count_in_subgop = i4_b_count_in_subgop;
+
+ return (0);
+}
diff --git a/encoder/irc_picture_type.h b/encoder/irc_picture_type.h
new file mode 100755
index 0000000..1af5424
--- /dev/null
+++ b/encoder/irc_picture_type.h
@@ -0,0 +1,95 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _PIC_HANDLING_H_
+#define _PIC_HANDLING_H_
+
+/*
+ * Basic Understanding:
+ * irc_add_pic_to_stack(_re_enc):
+ * This functions converts the input (or display) order to encoding order
+ * */
+typedef struct pic_handling_t *pic_handling_handle;
+
+WORD32 irc_pic_handling_num_fill_use_free_memtab(pic_handling_handle *pps_pic_handling,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+void irc_init_pic_handling(pic_handling_handle ps_pic_handling,
+ WORD32 i4_intra_frm_int,
+ WORD32 i4_max_inter_frm_int,
+ WORD32 i4_is_gop_closed);
+
+void irc_add_pic_to_stack(pic_handling_handle ps_pic_handling,
+ WORD32 i4_enc_pic_id);
+
+WORD32 irc_add_pic_to_stack_re_enc(pic_handling_handle ps_pic_handling,
+ WORD32 i4_enc_pic_id,
+ picture_type_e e_pic_type);
+
+void irc_get_pic_from_stack(pic_handling_handle ps_pic_handling,
+ WORD32 *pi4_pic_id,
+ WORD32 *pi4_pic_disp_order_no,
+ picture_type_e *pe_pic_type);
+
+WORD32 irc_is_last_frame_in_gop(pic_handling_handle ps_pic_handling);
+
+void irc_flush_frame_from_pic_stack(pic_handling_handle ps_pic_handling);
+
+/* NITT TBR The below two functions should be made a single function */
+void irc_skip_encoded_frame(pic_handling_handle ps_pic_handling,
+ picture_type_e e_pic_type);
+
+void irc_update_pic_handling(pic_handling_handle ps_pic_handling,
+ picture_type_e e_pic_type);
+
+/*
+ * Function returns the number of frames that have been encoded in the GOP in
+ * which the force I frame takes impact
+ */
+WORD32 irc_pic_type_get_frms_in_gop_force_I_frm(pic_handling_handle ps_pic_handling);
+
+void irc_set_force_I_frame_flag(pic_handling_handle ps_pic_handling);
+
+WORD32 irc_get_forced_I_frame_cur_frm_flag(pic_handling_handle ps_pic_handling);
+
+void irc_reset_forced_I_frame_cur_frm_flag(pic_handling_handle ps_pic_handling);
+
+/* Normal get functions */
+WORD32 irc_pic_type_get_inter_frame_interval(pic_handling_handle ps_pic_handling);
+
+WORD32 irc_pic_type_get_intra_frame_interval(pic_handling_handle ps_pic_handling);
+
+WORD32 irc_pic_type_get_disp_order_no(pic_handling_handle ps_pic_handling);
+
+void irc_pic_handling_register_new_int_frm_interval(pic_handling_handle ps_pic_handling,
+ WORD32 i4_intra_frm_int);
+
+void irc_pic_handling_register_new_inter_frm_interval(pic_handling_handle ps_pic_handling,
+ WORD32 i4_inter_frm_int);
+
+void irc_pic_type_get_rem_frms_in_gop(pic_handling_handle ps_pic_handling,
+ WORD32 ai4_rem_frms_in_gop[MAX_PIC_TYPE]);
+
+void irc_pic_type_get_frms_in_gop(pic_handling_handle ps_pic_handling,
+ WORD32 ai4_frms_in_gop[MAX_PIC_TYPE]);
+
+#endif /* _PIC_HANDLING_H_ */
+
diff --git a/encoder/irc_rate_control_api.c b/encoder/irc_rate_control_api.c
new file mode 100755
index 0000000..6c6586e
--- /dev/null
+++ b/encoder/irc_rate_control_api.c
@@ -0,0 +1,1600 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include "stdio.h"
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_common.h"
+#include "irc_cntrl_param.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_rd_model.h"
+#include "irc_est_sad.h"
+#include "irc_fixed_point_error_bits.h"
+#include "irc_vbr_storage_vbv.h"
+#include "irc_picture_type.h"
+#include "irc_bit_allocation.h"
+#include "irc_mb_model_based.h"
+#include "irc_cbr_buffer_control.h"
+#include "irc_vbr_str_prms.h"
+#include "irc_rate_control_api.h"
+#include "irc_rate_control_api_structs.h"
+#include "irc_trace_support.h"
+
+#define DEV_Q 4 /*Q format(Shift) for Deviation range factor */
+#define HI_DEV_FCTR 22 /* 1.4*16 */
+#define LO_DEV_FCTR 12 /* 0.75*16 */
+#define GET_HI_DEV_QP(Qprev) (( ((WORD32) Qprev)*HI_DEV_FCTR + (1<<(DEV_Q-1)))>>DEV_Q)
+#define GET_LO_DEV_QP(Qprev) (( ((WORD32) Qprev)*LO_DEV_FCTR + (1<<(DEV_Q-1)))>>DEV_Q)
+#define CLIP_QP(Qc, hi_d, lo_d) (((Qc) < (lo_d))?((lo_d)):(((Qc) > (hi_d))?(hi_d):(Qc)))
+
+/*****************************************************************************/
+/* Restricts the quantization parameter variation within delta */
+/*****************************************************************************/
+/* static WORD32 restrict_swing(WORD32 cur_qp, WORD32 prev_qp, WORD32 delta_qp)
+ {
+ if((cur_qp) - (prev_qp) > (delta_qp)) (cur_qp) = (prev_qp) + (delta_qp) ;
+ if((prev_qp) - (cur_qp) > (delta_qp)) (cur_qp) = (prev_qp) - (delta_qp) ;
+ return cur_qp;
+ }*/
+
+/*****************************************************************************
+ Function Name : rate_control_get_init_free_memtab
+ Description : Takes or gives memtab
+ Inputs : pps_rate_control_api - pointer to RC api pointer
+ ps_memtab - Memtab pointer
+ i4_use_base - Set during init, else 0
+ i4_fill_base - Set during free, else 0
+ *****************************************************************************/
+WORD32 irc_rate_control_num_fill_use_free_memtab(rate_control_handle *pps_rate_control_api,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0, i;
+ static rate_control_api_t s_temp_rc_api;
+
+ /*
+ * Hack for al alloc, during which we dont have any state memory.
+ * Dereferencing can cause issues
+ */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_rate_control_api) = &s_temp_rc_api;
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(rate_control_api_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**)pps_rate_control_api,
+ e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ /* Get the memory requirement of lower modules */
+ i4_mem_tab_idx += irc_ba_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->ps_bit_allocation,
+ &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ i4_mem_tab_idx += irc_cbr_buffer_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->ps_cbr_buffer,
+ &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ i4_mem_tab_idx += irc_est_sad_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->ps_est_sad,
+ &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ i4_mem_tab_idx += irc_mbrc_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->ps_mb_rate_control,
+ &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ i4_mem_tab_idx += irc_vbr_vbv_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->ps_vbr_storage_vbv,
+ &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ i4_mem_tab_idx += irc_rd_model_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->aps_rd_model[i],
+ &ps_memtab[i4_mem_tab_idx], e_func_type);
+ }
+ i4_mem_tab_idx += irc_pic_handling_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->ps_pic_handling,
+ &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ return (i4_mem_tab_idx);
+}
+
+/*****************************************************************************
+ Function Name : irc_initialise_rate_control
+ Description : Initialise the rate control structure
+ Inputs : ps_rate_control_api - api struct
+ e_rate_control_type - VBR, CBR (NLDRC/LDRC), VBR_STREAMING
+ u1_is_mb_level_rc_on - enabling mb level RC
+ u4_avg_bit_rate - bit rate to achieved across the entire
+ file size
+ u4_peak_bit_rate - max possible drain rate
+ u4_frame_rate - number of frames in 1000 seconds
+ u4_intra_frame_interval - num frames between two I frames
+ *au1_init_qp - init_qp for I,P,B
+ *****************************************************************************/
+void irc_initialise_rate_control(rate_control_api_t *ps_rate_control_api,
+ rc_type_e e_rate_control_type,
+ UWORD8 u1_is_mb_level_rc_on,
+ UWORD32 u4_avg_bit_rate,
+ UWORD32 *pu4_peak_bit_rate,
+ UWORD32 u4_min_bit_rate,
+ UWORD32 u4_frame_rate,
+ UWORD32 u4_max_delay,
+ UWORD32 u4_intra_frame_interval,
+ UWORD8 *pu1_init_qp,
+ UWORD32 u4_max_vbv_buff_size,
+ WORD32 i4_max_inter_frm_int,
+ WORD32 i4_is_gop_closed,
+ UWORD8 *pu1_min_max_qp,
+ WORD32 i4_use_est_intra_sad,
+ UWORD32 u4_src_ticks,
+ UWORD32 u4_tgt_ticks)
+{
+ WORD32 i;
+ UWORD32 u4_frms_in_delay_prd = (u4_frame_rate * u4_max_delay) / 1000000;
+ ps_rate_control_api->e_rc_type = e_rate_control_type;
+ ps_rate_control_api->u1_is_mb_level_rc_on = u1_is_mb_level_rc_on;
+
+ trace_printf((const WORD8*)"RC type = %d\n", e_rate_control_type);
+
+ /* Set the avg_bitrate_changed flag for each pic_type to 0 */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->au1_avg_bitrate_changed[i] = 0;
+ }
+
+ /* Initialize the pic_handling module */
+ irc_init_pic_handling(ps_rate_control_api->ps_pic_handling,
+ (WORD32)u4_intra_frame_interval, i4_max_inter_frm_int,
+ i4_is_gop_closed);
+
+ /*** Initialize the rate control modules ***/
+ if(ps_rate_control_api->e_rc_type != CONST_QP)
+ {
+ UWORD32 au4_num_pics_in_delay_prd[MAX_PIC_TYPE];
+
+ /* Initialize the model parameter structures */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ irc_init_frm_rc_rd_model(ps_rate_control_api->aps_rd_model[i],
+ MAX_FRAMES_MODELLED);
+ }
+
+ /* Initialize the buffer mechanism */
+ if((ps_rate_control_api->e_rc_type == VBR_STORAGE)
+ || (ps_rate_control_api->e_rc_type
+ == VBR_STORAGE_DVD_COMP))
+ {
+ /* Assuming both the peak bit rates are same for a VBR_STORAGE and
+ VBR_STORAGE_DVD_COMP */
+ if(pu4_peak_bit_rate[0] != pu4_peak_bit_rate[1])
+ {
+ trace_printf((const WORD8*)"For VBR_STORAGE and VBR_STORAGE_DVD_COMP the peak bit rates should be same\n");
+ }
+ irc_init_vbr_vbv(ps_rate_control_api->ps_vbr_storage_vbv,
+ (WORD32)pu4_peak_bit_rate[0],
+ (WORD32)u4_frame_rate,
+ (WORD32)u4_max_vbv_buff_size);
+ }
+ else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ UWORD32 u4_avg_bit_rate_copy[MAX_NUM_DRAIN_RATES];
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ u4_avg_bit_rate_copy[i] = u4_avg_bit_rate;
+ }
+ /* In case of CBR the num pics in delay is ignored */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ au4_num_pics_in_delay_prd[i] = 0;
+
+ irc_init_cbr_buffer(ps_rate_control_api->ps_cbr_buffer,
+ u4_max_delay, u4_frame_rate,
+ (WORD32 *)u4_avg_bit_rate_copy,
+ au4_num_pics_in_delay_prd,
+ u4_max_vbv_buff_size);
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ irc_init_vbv_str_prms(&ps_rate_control_api->s_vbr_str_prms,
+ u4_intra_frame_interval, u4_src_ticks,
+ u4_tgt_ticks, u4_frms_in_delay_prd);
+
+ /* Get the number of pics of each type in delay period */
+ irc_get_vsp_num_pics_in_dly_prd(
+ &ps_rate_control_api->s_vbr_str_prms,
+ au4_num_pics_in_delay_prd);
+
+ irc_init_cbr_buffer(ps_rate_control_api->ps_cbr_buffer,
+ u4_max_delay, u4_frame_rate,
+ (WORD32 *)pu4_peak_bit_rate,
+ au4_num_pics_in_delay_prd,
+ u4_max_vbv_buff_size);
+ }
+
+ /* Initialize the SAD estimation module */
+ irc_init_est_sad(ps_rate_control_api->ps_est_sad, i4_use_est_intra_sad);
+
+ /* Initialize the bit allocation module according to VBR or CBR */
+ if((ps_rate_control_api->e_rc_type == VBR_STORAGE)
+ || (ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ || (ps_rate_control_api->e_rc_type
+ == VBR_STORAGE_DVD_COMP))
+ {
+ irc_ba_init_bit_allocation(ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ VBR_BIT_ALLOC_PERIOD, u4_avg_bit_rate,
+ u4_frame_rate,
+ (WORD32 *)pu4_peak_bit_rate,
+ u4_min_bit_rate);
+ }
+ else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ irc_ba_init_bit_allocation(ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ CBR_BIT_ALLOC_PERIOD, u4_avg_bit_rate,
+ u4_frame_rate,
+ (WORD32 *)pu4_peak_bit_rate,
+ u4_min_bit_rate);
+ }
+
+ /*
+ * u1_scd_detected will be initialized to 1 when a Scene change is
+ * detected
+ */
+ ps_rate_control_api->u1_scd_detected = 0;
+ }
+
+ /* Initialize the init_qp */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->au1_init_qp[i] = pu1_init_qp[i];
+ ps_rate_control_api->au1_prev_frm_qp[i] = pu1_init_qp[i];
+ ps_rate_control_api->au1_min_max_qp[(i << 1)] =
+ pu1_min_max_qp[(i << 1)];
+ ps_rate_control_api->au1_min_max_qp[(i << 1) + 1] = pu1_min_max_qp[(i
+ << 1) + 1];
+ }
+
+ /* Initialize the is_first_frm_encoded */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->au1_is_first_frm_coded[i] = 0;
+ }
+ ps_rate_control_api->u1_is_first_frm = 1;
+
+ /*
+ * Control flag for delayed impact after a change in peak bitrate has been
+ * made
+ */
+ ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change = 0;
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ ps_rate_control_api->au4_new_peak_bit_rate[i] = pu4_peak_bit_rate[i];
+ }
+
+ /* Initialize the mb level rate control module */
+ irc_init_mb_level_rc(ps_rate_control_api->ps_mb_rate_control);
+ ps_rate_control_api->i4_prev_frm_est_bits = u4_avg_bit_rate * 1000
+ / u4_frame_rate;
+
+ ps_rate_control_api->prev_ref_pic_type = I_PIC;
+}
+
+/******************************************************************************
+ *Description : calls irc_add_pic_to_stack
+ ******************************************************************************/
+void irc_add_picture_to_stack(rate_control_api_t *rate_control_api,
+ WORD32 i4_enc_pic_id)
+{
+ /* Call the routine to add the pic to stack in encode order */
+ irc_add_pic_to_stack(rate_control_api->ps_pic_handling, i4_enc_pic_id);
+}
+
+void irc_add_picture_to_stack_re_enc(rate_control_api_t *rate_control_api,
+ WORD32 i4_enc_pic_id,
+ picture_type_e e_pic_type)
+{
+ /*
+ * In case of a re-encoder, the pics will come in the encode order itself.
+ * So, there is no need to buffer the pics up
+ */
+ irc_add_pic_to_stack_re_enc(rate_control_api->ps_pic_handling,
+ i4_enc_pic_id, e_pic_type);
+}
+
+/*******************************************************************************
+ Description : Decides the picture type based on the state
+ ******************************************************************************/
+void irc_get_picture_details(rate_control_handle rate_control_api,
+ WORD32 *pi4_pic_id,
+ WORD32 *pi4_pic_disp_order_no,
+ picture_type_e *pe_pic_type)
+{
+ /* Call to get the pic_details */
+ irc_get_pic_from_stack(rate_control_api->ps_pic_handling, pi4_pic_id,
+ pi4_pic_disp_order_no, pe_pic_type);
+}
+
+/*******************************************************************************
+ * Description : Gets the frame level qp for the given picture type
+ ******************************************************************************/
+UWORD8 irc_get_frame_level_qp(rate_control_api_t *ps_rate_control_api,
+ picture_type_e e_pic_type,
+ WORD32 i4_ud_max_bits)
+{
+ UWORD8 u1_frame_qp, i;
+
+ if((ps_rate_control_api->e_rc_type != VBR_STORAGE)
+ && (ps_rate_control_api->e_rc_type != VBR_STORAGE_DVD_COMP)
+ && (ps_rate_control_api->e_rc_type != CBR_NLDRC)
+ && (ps_rate_control_api->e_rc_type != CONST_QP)
+ && (ps_rate_control_api->e_rc_type != VBR_STREAMING))
+ {
+ trace_printf((const WORD8*)(const WORD8*)" Only VBR,NLDRC and CONST QP supported for now \n");
+ return (0);
+ }
+
+ if(ps_rate_control_api->e_rc_type != CONST_QP)
+ {
+ UWORD8 u1_is_first_frm_coded = 1;
+
+ /* Check whether at least one frame of a each picture type gets encoded*/
+ /* Check whether it is an IPP or IPB kind of encoding */
+ if((ps_rate_control_api->au1_is_first_frm_coded[I_PIC]
+ && ps_rate_control_api->au1_is_first_frm_coded[P_PIC])
+ || ((irc_pic_type_get_intra_frame_interval(
+ ps_rate_control_api->ps_pic_handling)
+ == 1)
+ && (ps_rate_control_api->au1_is_first_frm_coded[I_PIC])))
+ {
+ if(e_pic_type != B_PIC)
+ u1_is_first_frm_coded = 1;
+ else
+ {
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ u1_is_first_frm_coded &=
+ ps_rate_control_api->au1_is_first_frm_coded[i];
+ }
+ }
+ }
+ else
+ {
+ u1_is_first_frm_coded = 0;
+ }
+
+ if(u1_is_first_frm_coded)
+ {
+ WORD32 i4_cur_est_texture_bits, i4_cur_est_header_bits;
+ WORD32 i4_cur_est_bits;
+ UWORD32 u4_estimated_sad;
+
+ /* Force I frame updation of rem_bits_in_frame*/
+ if(irc_get_forced_I_frame_cur_frm_flag(
+ ps_rate_control_api->ps_pic_handling) == 1)
+ {
+ irc_ba_change_rem_bits_in_prd_at_force_I_frame(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling);
+ irc_reset_forced_I_frame_cur_frm_flag(
+ ps_rate_control_api->ps_pic_handling);
+ }
+
+ /* Get the estimated texture bits allocated for the current frame*/
+ i4_cur_est_texture_bits = irc_ba_get_cur_frm_est_texture_bits(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->aps_rd_model,
+ ps_rate_control_api->ps_est_sad,
+ ps_rate_control_api->ps_pic_handling, e_pic_type);
+
+ /* Get the estimated header bits*/
+ i4_cur_est_header_bits = irc_ba_get_cur_frm_est_header_bits(
+ ps_rate_control_api->ps_bit_allocation, e_pic_type);
+
+ /* Total estimated bits */
+ i4_cur_est_bits = i4_cur_est_header_bits + i4_cur_est_texture_bits;
+
+ trace_printf((const WORD8*)"ft %d, etb = %d, eb %d, ", e_pic_type,
+ i4_cur_est_texture_bits, i4_cur_est_bits);
+
+ /* Threshold the estimated bits based on the buffer fullness*/
+ if(ps_rate_control_api->e_rc_type == VBR_STORAGE)
+ {
+ WORD32 i4_cur_frm_max_bit_possible;
+ i4_cur_frm_max_bit_possible = irc_get_max_target_bits(
+ ps_rate_control_api->ps_vbr_storage_vbv);
+
+ if(i4_cur_est_bits > i4_cur_frm_max_bit_possible)
+ {
+ /* Assuming header would consume the same amount of bits */
+ i4_cur_est_texture_bits = i4_cur_frm_max_bit_possible
+ - i4_cur_est_header_bits;
+ }
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP)
+ {
+ WORD32 i4_rem_bits_in_gop, i4_rem_frms_in_gop, i;
+ WORD32 i4_cur_frm_max_bit_possible,
+ ai4_rem_frms_in_gop[MAX_PIC_TYPE];
+ irc_pic_type_get_rem_frms_in_gop(
+ ps_rate_control_api->ps_pic_handling,
+ ai4_rem_frms_in_gop);
+ i4_rem_bits_in_gop = irc_get_rem_bits_in_period(
+ ps_rate_control_api);
+ i4_rem_frms_in_gop = 0;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ i4_rem_frms_in_gop += ai4_rem_frms_in_gop[i];
+
+ /* Threshold the bits based on estimated buffer fullness */
+ i4_cur_frm_max_bit_possible = irc_get_max_tgt_bits_dvd_comp(
+ ps_rate_control_api->ps_vbr_storage_vbv,
+ i4_rem_bits_in_gop, i4_rem_frms_in_gop,
+ e_pic_type);
+
+ if(i4_cur_est_bits > i4_cur_frm_max_bit_possible)
+ {
+ /* Assuming header would consume the same amount of bits */
+ i4_cur_est_texture_bits = i4_cur_frm_max_bit_possible
+ - i4_cur_est_header_bits;
+
+ }
+ }
+ else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ WORD32 i4_cur_frm_bits_acc_buffer =
+ irc_cbr_buffer_constraint_check(
+ ps_rate_control_api->ps_cbr_buffer,
+ i4_cur_est_bits, e_pic_type);
+
+ /* Assuming the header would consume the same amount of bits */
+ i4_cur_est_texture_bits = i4_cur_frm_bits_acc_buffer
+ - i4_cur_est_header_bits;
+
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ WORD32 i4_cur_frm_bits_acc_buffer =
+ irc_vbr_stream_buffer_constraint_check(
+ ps_rate_control_api->ps_cbr_buffer,
+ i4_cur_est_bits, e_pic_type);
+
+ /* Assuming the header would consume the same amount of bits */
+ i4_cur_est_texture_bits = i4_cur_frm_bits_acc_buffer
+ - i4_cur_est_header_bits;
+ }
+
+ trace_printf((const WORD8*)"emtb = %d, ", i4_cur_est_texture_bits);
+
+ /*
+ * If the estimated texture bits go to values less than zero
+ * due to buffer underflow, make the estimated target bits to go
+ * to zero
+ */
+ if(i4_cur_est_texture_bits < 0)
+ i4_cur_est_texture_bits = 0;
+
+ ps_rate_control_api->i4_prev_frm_est_bits = (i4_cur_est_texture_bits
+ + i4_cur_est_header_bits);
+
+ /* Clip est_texture_bits according to the user-defined max value */
+ if((i4_cur_est_texture_bits
+ > (i4_ud_max_bits - i4_cur_est_header_bits))
+ && (e_pic_type != I_PIC))
+ {
+ i4_cur_est_texture_bits = (i4_ud_max_bits
+ - i4_cur_est_header_bits);
+ trace_printf((const WORD8*)"udcb = %d, ",
+ i4_ud_max_bits - i4_cur_est_header_bits);
+ }
+
+ /* Calculate the estimated SAD for corresponding frame*/
+ u4_estimated_sad = irc_get_est_sad(ps_rate_control_api->ps_est_sad,
+ e_pic_type);
+
+ /* Query the model for the Qp for the corresponding frame*/
+
+ /*
+ * The check is because the model gives a negative QP when the
+ * i4_cur_est_texture_bits is less than or equal to 0
+ * [This is a bug in the model]. As a temporary fix, the frame QP
+ * is being set to the max QP allowed
+ */
+ if(i4_cur_est_texture_bits > 0)
+ {
+ u1_frame_qp = irc_find_qp_for_target_bits(
+ ps_rate_control_api->aps_rd_model[e_pic_type],
+ i4_cur_est_texture_bits,
+ u4_estimated_sad,
+ ps_rate_control_api->au1_min_max_qp[(e_pic_type
+ << 1)],
+ ps_rate_control_api->au1_min_max_qp[(e_pic_type
+ << 1) + 1]);
+ }
+ else
+ {
+ u1_frame_qp = ps_rate_control_api->au1_min_max_qp[(e_pic_type
+ << 1) + 1];
+ }
+
+ trace_printf((const WORD8*)"ehb %d, etb %d, fqp %d, es %d, eb %d, ",
+ i4_cur_est_header_bits, i4_cur_est_texture_bits,
+ u1_frame_qp, u4_estimated_sad, i4_cur_est_bits);
+
+ /* Restricting the QP swing if the average bit rate has changed */
+ if(ps_rate_control_api->au1_avg_bitrate_changed[e_pic_type] == 0)
+ {
+ WORD32 prev_qp;
+ WORD32 hi_dev_qp, lo_dev_qp;
+ /* Restricting the qp swing */
+ prev_qp = ps_rate_control_api->au1_prev_frm_qp[ps_rate_control_api->prev_ref_pic_type];
+
+ if(ps_rate_control_api->prev_ref_pic_type != e_pic_type)
+ {
+ if(e_pic_type == I_PIC)
+ {
+ /*
+ * Constrain I-frame QP to be within specified limit of
+ * prev_ref_qp/Kp
+ */
+ prev_qp = (P_TO_I_RATIO * prev_qp + (1 << (K_Q - 1)))
+ >> (K_Q);
+ }
+ else if(e_pic_type == P_PIC)
+ {
+ /*
+ * Constrain P-frame QP to be within specified limit of
+ * Kp*prev_ref_qp
+ */
+ prev_qp = (I_TO_P_RATIO * prev_qp + (1 << (K_Q - 1)))
+ >> (K_Q);
+ }
+ else if(ps_rate_control_api->prev_ref_pic_type == P_PIC)
+ {
+ /* current frame is B-pic */
+ /* Constrain B-frame QP to be within specified limit of
+ * prev_ref_qp/Kb
+ */
+ prev_qp = (P_TO_B_RATIO * prev_qp + (1 << (K_Q - 1)))
+ >> (K_Q);
+ }
+ else /* if(ps_rate_control_api->prev_ref_pic_type == I_PIC*/
+ {
+ /* current frame is B-pic */
+ /*
+ * Constrain B-frame QP to be within specified limit of
+ * prev_ref_qp/Kb
+ */
+ prev_qp = (P_TO_B_RATIO * I_TO_P_RATIO * prev_qp
+ + (1 << (K_Q + K_Q - 1)))
+ >> (K_Q + K_Q);
+ }
+ }
+
+ hi_dev_qp = GET_HI_DEV_QP(prev_qp);
+ /*
+ * For lower QPs due to scale factor and fixed point arithmetic,
+ * the hi_dev_qp can be same as that of the prev qp and in which
+ * case it gets stuck in the lower most qp and thus not allowing
+ * QPs not to change. To avoid this,for lower qps the hi_dev_qp
+ * should be made slightly more than prev_qp
+ */
+ if(prev_qp == hi_dev_qp)
+ {
+ hi_dev_qp += 1;
+ }
+ lo_dev_qp = GET_LO_DEV_QP(prev_qp);
+ u1_frame_qp = (UWORD8)CLIP_QP((WORD32)u1_frame_qp, hi_dev_qp, lo_dev_qp);
+ }
+ else
+ {
+ ps_rate_control_api->au1_avg_bitrate_changed[e_pic_type] = 0;
+ }
+ }
+ else
+ {
+ /*
+ * The u1_is_first_frm_coded gets reset
+ * a) at start of sequence
+ * b) whenever there is a scene change.
+ * In both cases since we do not have any estimate about the
+ * current frame, we just send in the previous frame qp value.IN
+ * Scene change case the previous QP is incremented by 4 , This is
+ * done because the Scene changed VOP will have over consumed and
+ * chances of future frames skipping is very high. For the init
+ * case, the previous frame QP is initialized with the init qp
+ */
+ if((ps_rate_control_api->u1_scd_detected)
+ && (ps_rate_control_api->e_rc_type != CONST_QP))
+ {
+ /*
+ * If scene change is detected, I frame Qp would have been
+ * updated
+ */
+ /* Use a QP calculated in the prev update fxn */
+ u1_frame_qp = ps_rate_control_api->u1_frm_qp_after_scd;
+ }
+ else
+ {
+ u1_frame_qp = ps_rate_control_api->au1_prev_frm_qp[e_pic_type];
+ }
+ }
+ }
+ else
+ {
+ u1_frame_qp = ps_rate_control_api->au1_init_qp[e_pic_type];
+ }
+
+ trace_printf((const WORD8*)"fqp %d\n", u1_frame_qp);
+
+ return (u1_frame_qp);
+}
+
+/*******************************************************************************
+ *Function Name : irc_get_buffer_status
+ *Description : Gets the state of VBV buffer
+ *Outputs : 0 = normal, 1 = underflow, 2= overflow
+ *Returns : vbv_buf_status_e
+ ******************************************************************************/
+vbv_buf_status_e irc_get_buffer_status(rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_total_frame_bits,
+ picture_type_e e_pic_type,
+ WORD32 *pi4_num_bits_to_prevent_vbv_underflow)
+{
+ vbv_buf_status_e e_buf_status = VBV_NORMAL;
+
+ /* Get the buffer status for the current total consumed bits and error bits*/
+ if(ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP)
+ {
+ e_buf_status = irc_get_vbv_buffer_status(
+ ps_rate_control_api->ps_vbr_storage_vbv,
+ i4_total_frame_bits,
+ pi4_num_bits_to_prevent_vbv_underflow);
+
+ trace_printf((const WORD8*)"e_buf_status = %d\n", e_buf_status);
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STORAGE)
+ {
+ /* For VBR case since there is not underflow returning the max value */
+ pi4_num_bits_to_prevent_vbv_underflow[0] = irc_get_max_vbv_buf_size(
+ ps_rate_control_api->ps_vbr_storage_vbv);
+ e_buf_status = VBV_NORMAL;
+ }
+ else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ e_buf_status = irc_get_cbr_buffer_status(
+ ps_rate_control_api->ps_cbr_buffer, i4_total_frame_bits,
+ pi4_num_bits_to_prevent_vbv_underflow, e_pic_type);
+
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ /* For VBR_streaming, error bits are computed according to peak bitrate*/
+ e_buf_status = irc_get_cbr_buffer_status(
+ ps_rate_control_api->ps_cbr_buffer, i4_total_frame_bits,
+ pi4_num_bits_to_prevent_vbv_underflow, e_pic_type);
+ }
+ return e_buf_status;
+}
+
+/*******************************************************************************
+ Function Name : irc_update_pic_handling_state
+ Description : If the forward path and the backward path of rate control
+ ******************************************************************************/
+void irc_update_pic_handling_state(rate_control_api_t *ps_rate_control_api,
+ picture_type_e e_pic_type)
+{
+ irc_update_pic_handling(ps_rate_control_api->ps_pic_handling, e_pic_type);
+}
+
+/******************************************************************************
+ Function Name : irc_update_frame_level_info
+ Description : Updates the frame level information into the rate control
+ structure
+ ******************************************************************************/
+void irc_update_frame_level_info(rate_control_api_t *ps_rate_control_api,
+ picture_type_e e_pic_type,
+ WORD32 *pi4_mb_type_sad,
+ WORD32 i4_total_frame_bits,
+ WORD32 i4_model_updation_hdr_bits,
+ WORD32 *pi4_mb_type_tex_bits,
+ WORD32 *pi4_tot_mb_type_qp,
+ WORD32 *pi4_tot_mb_in_type,
+ WORD32 i4_avg_activity,
+ UWORD8 u1_is_scd,
+ WORD32 i4_is_it_a_skip,
+ WORD32 i4_intra_frm_cost,
+ WORD32 i4_is_pic_handling_done)
+{
+ UWORD8 u1_num_skips = 0;
+ WORD32 i;
+ UWORD32 u4_frame_sad = 0;
+ WORD32 i4_tot_texture_bits = 0;
+ WORD32 i4_tot_mbs = 0;
+ WORD32 i4_avg_qp = 0;
+
+ /* SCD not supported in case of IPB encoder */
+ if(u1_is_scd && (irc_pic_type_get_inter_frame_interval(
+ ps_rate_control_api->ps_pic_handling) > 1))
+ {
+ u1_is_scd = 0;
+ }
+ trace_printf((const WORD8*)"i4_total_frame_bits %d\n", i4_total_frame_bits);
+
+ if(!i4_is_it_a_skip && !i4_is_pic_handling_done)
+ {
+ /* Update the pic_handling struct */
+ irc_update_pic_handling(ps_rate_control_api->ps_pic_handling,
+ e_pic_type);
+ }
+
+ if(ps_rate_control_api->e_rc_type != CONST_QP)
+ {
+ if(!i4_is_it_a_skip)
+ {
+ WORD32 i4_new_period_flag;
+ /******************************************************************
+ Calculate the total values from the individual values
+ ******************************************************************/
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ u4_frame_sad += pi4_mb_type_sad[i];
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ i4_tot_texture_bits += pi4_mb_type_tex_bits[i];
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ i4_avg_qp += pi4_tot_mb_type_qp[i];
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ i4_tot_mbs += pi4_tot_mb_in_type[i];
+ i4_avg_qp /= i4_tot_mbs; /* Calculate the average QP */
+
+ if(ps_rate_control_api->u1_is_mb_level_rc_on)
+ {
+ /*
+ * The model needs to take into consideration the average
+ * activity of the entire frame while estimating the QP. Thus
+ * the frame sad values are scaled by the average activity
+ * before updating it into the model.
+ */
+ if(!i4_avg_activity)
+ i4_avg_activity = 1;
+ i4_intra_frm_cost *= i4_avg_activity;
+ u4_frame_sad *= i4_avg_activity;
+ }
+
+ /******************************************************************
+ Update the bit allocation module
+ NOTE: For bit allocation module, the pic_type should not be
+ modified to that of 'I', in case of a SCD.
+ ******************************************************************/
+ i4_new_period_flag = irc_is_last_frame_in_gop(
+ ps_rate_control_api->ps_pic_handling);
+ irc_ba_update_cur_frm_consumed_bits(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ i4_total_frame_bits, i4_model_updation_hdr_bits,
+ e_pic_type, u1_is_scd, i4_new_period_flag);
+
+ if(1 == i4_new_period_flag
+ && ((ps_rate_control_api->e_rc_type == VBR_STORAGE)
+ || (ps_rate_control_api->e_rc_type
+ == VBR_STORAGE_DVD_COMP)))
+ {
+ irc_ba_check_and_update_bit_allocation(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ irc_get_cur_vbv_buf_size(
+ ps_rate_control_api->ps_vbr_storage_vbv),
+ irc_get_max_vbv_buf_size(
+ ps_rate_control_api->ps_vbr_storage_vbv),
+ irc_get_max_bits_per_tgt_frm(
+ ps_rate_control_api->ps_vbr_storage_vbv),
+ i4_total_frame_bits);
+ }
+ }
+
+ /**********************************************************************
+ Update the buffer status
+ *********************************************************************/
+ /*
+ * This update is done after overflow and underflow handling to
+ * account for the actual bits dumped
+ */
+ if((ps_rate_control_api->e_rc_type == VBR_STORAGE)
+ || (ps_rate_control_api->e_rc_type
+ == VBR_STORAGE_DVD_COMP))
+ {
+ irc_update_vbr_vbv(ps_rate_control_api->ps_vbr_storage_vbv,
+ i4_total_frame_bits);
+ }
+ else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ irc_update_cbr_buffer(ps_rate_control_api->ps_cbr_buffer,
+ i4_total_frame_bits, e_pic_type);
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ UWORD32 au4_num_pics_in_delay_prd[MAX_PIC_TYPE];
+
+ irc_get_vsp_num_pics_in_dly_prd(
+ &ps_rate_control_api->s_vbr_str_prms,
+ au4_num_pics_in_delay_prd);
+
+ irc_update_cbr_buffer(ps_rate_control_api->ps_cbr_buffer,
+ i4_total_frame_bits, e_pic_type);
+
+ irc_update_vbr_str_prms(&ps_rate_control_api->s_vbr_str_prms,
+ e_pic_type);
+
+ irc_change_cbr_vbv_num_pics_in_delay_period(
+ ps_rate_control_api->ps_cbr_buffer,
+ au4_num_pics_in_delay_prd);
+
+ /*
+ * If the change_in_peak_bitrate flag is set, after the delay period
+ * update the peak_bitrate and the buffer parameters
+ */
+ if(!ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change)
+ {
+ irc_ba_change_ba_peak_bit_rate(
+ ps_rate_control_api->ps_bit_allocation,
+ (WORD32 *)&ps_rate_control_api->au4_new_peak_bit_rate[0]);
+ irc_change_cbr_vbv_bit_rate(
+ ps_rate_control_api->ps_cbr_buffer,
+ (WORD32 *)&ps_rate_control_api->au4_new_peak_bit_rate[0]);
+ }
+ if(ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change)
+ ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change--;
+ }
+
+ if(!i4_is_it_a_skip)
+ {
+ /*******************************************************************
+ Handle the SCENE CHANGE DETECTED
+ 1) Make the picture type as I, so that updation happens as if it is
+ an I frame
+ 2) Reset model, SAD and flag to restart the estimation process
+ ******************************************************************/
+ if(u1_is_scd)
+ {
+ WORD32 i4_frm_qp_after_scd;
+ UWORD32 u4_prev_I_frm_sad;
+
+ e_pic_type = I_PIC;
+
+ /* Scale scd qp based on SCD Frm sad and previous I Frm sad */
+ /* frm_qp_after_scd = (avg_qp * cur_frm_sad)/prev_I_frm_sad */
+
+ /*
+ * QP for the next frame should take care of
+ * 1) due to scene change, the current picture has consumed more
+ * bits
+ * 2) relative complexity of the previous scene and the current
+ * scene
+ */
+
+ /* Get the intra SAD for the previous scene */
+ u4_prev_I_frm_sad = irc_get_est_sad(
+ ps_rate_control_api->ps_est_sad, I_PIC);
+
+ /*
+ * Scale the QP based on the SAD ratio of the current pic and
+ * previous scene intra SAD
+ */
+ X_PROD_Y_DIV_Z(i4_avg_qp, u4_frame_sad, u4_prev_I_frm_sad,
+ i4_frm_qp_after_scd);
+
+ /* Limit the next frame qp by 50% across both the sides */
+ if(i4_frm_qp_after_scd > ((i4_avg_qp * 3) >> 1))
+ {
+ i4_frm_qp_after_scd = (i4_avg_qp * 3) >> 1;
+ }
+ else if(i4_frm_qp_after_scd < (i4_avg_qp >> 1))
+ {
+ i4_frm_qp_after_scd = (i4_avg_qp >> 1);
+ }
+
+ /*
+ * Ensure that the next frame QP is within the min_max limit of
+ * QP allowed
+ */
+ if(i4_frm_qp_after_scd
+ > ps_rate_control_api->au1_min_max_qp[(e_pic_type
+ << 1) + 1])
+ {
+ i4_frm_qp_after_scd =
+ ps_rate_control_api->au1_min_max_qp[(e_pic_type
+ << 1) + 1];
+ }
+ else if(i4_frm_qp_after_scd
+ < ps_rate_control_api->au1_min_max_qp[(e_pic_type
+ << 1)])
+ {
+ i4_frm_qp_after_scd =
+ ps_rate_control_api->au1_min_max_qp[(e_pic_type
+ << 1)];
+ }
+
+ /* Update the state var */
+ ps_rate_control_api->u1_frm_qp_after_scd =
+ (UWORD8)i4_frm_qp_after_scd;
+
+ /* re-set model */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ irc_reset_frm_rc_rd_model(
+ ps_rate_control_api->aps_rd_model[i]);
+ }
+
+ /* Reset the SAD estimation module */
+ irc_reset_est_sad(ps_rate_control_api->ps_est_sad);
+
+ /* Reset flag */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->au1_is_first_frm_coded[i] = 0;
+ }
+
+ /* Reset the MB Rate control */
+ irc_init_mb_level_rc(ps_rate_control_api->ps_mb_rate_control);
+
+ /*Set u1_scd_detected flag*/
+ ps_rate_control_api->u1_scd_detected = 1;
+
+ /*
+ * Adjust the average QP for the frame based on bits
+ * consumption
+ */
+ /*
+ * Initialize the QP for each picture type according to the
+ * average QP of the SCD pic
+ */
+ ps_rate_control_api->au1_prev_frm_qp[I_PIC] = (UWORD8)i4_avg_qp;
+
+ trace_printf((const WORD8*)"SCD DETECTED\n");
+ }
+ else
+ {
+ ps_rate_control_api->u1_scd_detected = 0;
+ /**************************************************************
+ Update the Qp used by the current frame
+ **************************************************************/
+ ps_rate_control_api->au1_prev_frm_qp[e_pic_type] =
+ (UWORD8)i4_avg_qp;
+ }
+
+ /********************************************************************
+ Update the model of the correponding picture type
+ NOTE: For SCD, we force the frame type from 'P' to that of a 'I'
+ ******************************************************************/
+ /*
+ * For very simple sequences no bits are consumed by texture. These
+ * frames do not add any information to the model and so not added
+ */
+ if(i4_tot_texture_bits && u4_frame_sad)
+ {
+ irc_add_frame_to_rd_model(
+ ps_rate_control_api->aps_rd_model[e_pic_type],
+ i4_tot_texture_bits, (UWORD8)i4_avg_qp,
+ u4_frame_sad, u1_num_skips);
+
+ /*
+ * At least one proper frame in added into the model. Until that
+ * keep using the initial QP
+ */
+ ps_rate_control_api->au1_is_first_frm_coded[e_pic_type] = 1;
+ }
+
+ if(i4_avg_activity)
+ {
+ /* Update the mb_level model */
+ irc_mb_update_frame_level(
+ ps_rate_control_api->ps_mb_rate_control,
+ i4_avg_activity);
+ }
+
+ /******************************************************************
+ Update the sad estimation module
+ NOTE: For SCD, we force the frame type from 'P' to that of a 'I'
+ ******************************************************************/
+ if(u4_frame_sad)
+ {
+ irc_update_actual_sad(ps_rate_control_api->ps_est_sad,
+ u4_frame_sad, e_pic_type);
+
+ irc_update_actual_sad_for_intra(ps_rate_control_api->ps_est_sad,
+ i4_intra_frm_cost);
+ }
+
+ /*
+ * Update the variable which denotes that a frame has been
+ * encountered
+ */
+ ps_rate_control_api->u1_is_first_frm = 0;
+
+ }
+ }
+
+ /* Store the prev encoded picture type for restricting Qp swing */
+ if((e_pic_type == I_PIC) || (e_pic_type == P_PIC))
+ {
+ ps_rate_control_api->prev_ref_pic_type = e_pic_type;
+ }
+
+ trace_printf((const WORD8*)"ft %d,hb %d,tb %d,qp %d,fs %d\n", e_pic_type,
+ i4_model_updation_hdr_bits, i4_tot_texture_bits, i4_avg_qp,
+ u4_frame_sad);
+
+ return;
+}
+
+/*******************************************************************************
+ MB Level API functions
+ ******************************************************************************/
+
+/******************************************************************************
+ Function Name : irc_init_mb_rc_frame_level
+ Description : Initialise the frame level details required for a mb level
+ ******************************************************************************/
+
+void irc_init_mb_rc_frame_level(rate_control_api_t *ps_rate_control_api,
+ UWORD8 u1_frame_qp)
+{
+ irc_mb_init_frame_level(ps_rate_control_api->ps_mb_rate_control,
+ u1_frame_qp);
+}
+
+/******************************************************************************
+ Function Name : irc_get_mb_level_qp
+ Description : Get the mb level qp
+ *****************************************************************************/
+void irc_get_mb_level_qp(rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_cur_mb_activity,
+ WORD32 *pi4_mb_qp,
+ picture_type_e e_pic_type)
+{
+ if(ps_rate_control_api->u1_is_mb_level_rc_on)
+ {
+ irc_get_mb_qp(ps_rate_control_api->ps_mb_rate_control,
+ i4_cur_mb_activity, pi4_mb_qp);
+
+ /* Truncating the QP to the Max and Min Qp values possible */
+ if(pi4_mb_qp[1] < ps_rate_control_api->au1_min_max_qp[e_pic_type << 1])
+ {
+ pi4_mb_qp[1] = ps_rate_control_api->au1_min_max_qp[e_pic_type << 1];
+ }
+ if(pi4_mb_qp[1]
+ > ps_rate_control_api->au1_min_max_qp[(e_pic_type << 1)
+ + 1])
+ {
+ pi4_mb_qp[1] = ps_rate_control_api->au1_min_max_qp[(e_pic_type << 1)
+ + 1];
+ }
+ }
+ else
+ {
+ WORD32 i4_qp;
+ i4_qp = irc_get_frm_level_qp(ps_rate_control_api->ps_mb_rate_control);
+ /* Both the qp are used for */
+ pi4_mb_qp[0] = i4_qp; /* Used as feedback for the rate control */
+ pi4_mb_qp[1] = i4_qp; /* Used for quantising the MB*/
+ }
+}
+
+/****************************************************************************
+ Function Name : irc_get_bits_to_stuff
+ Description : Gets the bits to stuff to prevent Underflow of Encoder Buffer
+ *****************************************************************************/
+WORD32 irc_get_bits_to_stuff(rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_tot_consumed_bits,
+ picture_type_e e_pic_type)
+{
+ WORD32 i4_bits_to_stuff;
+ /* Get the CBR bits to stuff*/
+ i4_bits_to_stuff = irc_get_cbr_bits_to_stuff(
+ ps_rate_control_api->ps_cbr_buffer, i4_tot_consumed_bits,
+ e_pic_type);
+ return i4_bits_to_stuff;
+}
+
+/****************************************************************************
+ Function Name : irc_get_prev_frm_est_bits
+ Description : Returns previous frame estimated bits
+ *****************************************************************************/
+WORD32 irc_get_prev_frm_est_bits(rate_control_api_t *ps_rate_control_api)
+{
+ return (ps_rate_control_api->i4_prev_frm_est_bits);
+}
+
+/******************************************************************************
+ Control Level API functions
+ Logic: The control call sets the state structure of the rate control api
+ accordingly such that the next process call would implement the same.
+ ******************************************************************************/
+
+void irc_change_inter_frm_int_call(rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_inter_frm_int)
+{
+ irc_pic_handling_register_new_inter_frm_interval(
+ ps_rate_control_api->ps_pic_handling, i4_inter_frm_int);
+}
+
+void irc_change_intra_frm_int_call(rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_intra_frm_int)
+{
+ irc_pic_handling_register_new_int_frm_interval(
+ ps_rate_control_api->ps_pic_handling, i4_intra_frm_int);
+
+ if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ irc_change_vsp_ifi(&ps_rate_control_api->s_vbr_str_prms,
+ i4_intra_frm_int);
+ }
+}
+
+/****************************************************************************
+ Function Name : irc_change_avg_bit_rate
+ Description : Whenever the average bit rate changes, the excess bits is
+ between the changed bit rate and the old one is re-distributed
+ in the bit allocation module
+ *****************************************************************************/
+void irc_change_avg_bit_rate(rate_control_api_t *ps_rate_control_api,
+ UWORD32 u4_average_bit_rate)
+{
+ int i;
+ if(ps_rate_control_api->e_rc_type != CONST_QP)
+ {
+ /*
+ * Bit Allocation Module: distribute the excess/deficit bits between the
+ * old and the new frame rate to all the remaining frames
+ */
+ irc_ba_change_remaining_bits_in_period(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ u4_average_bit_rate,
+ irc_ba_get_frame_rate(
+ ps_rate_control_api->ps_bit_allocation),
+ (WORD32 *)(ps_rate_control_api->au4_new_peak_bit_rate));
+ }
+ if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ UWORD32 u4_average_bit_rate_copy[MAX_NUM_DRAIN_RATES];
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ u4_average_bit_rate_copy[i] = u4_average_bit_rate;
+ }
+ irc_change_cbr_vbv_bit_rate(ps_rate_control_api->ps_cbr_buffer,
+ (WORD32 *)(u4_average_bit_rate_copy));
+ }
+
+ /*
+ * This is done only for average bitrate changing somewhere after the model
+ * stabilizes.Here it is assumed that user will not do this call after
+ * first few frames. If we dont have this check, what would happen is since
+ * the model has not stabilized, also bitrate has changed before the first
+ * frame, we dont restrict the qp. Qp can go to very bad values after init
+ * qp since if swing is disabled.
+ * This check will become buggy if change bitrate is called say somewhere
+ * after first two frames.Bottom line - RC init is done during create and
+ * this call is done just before first process.And we want to differentiate
+ * between this call done before first process and the call which is done
+ * during run time
+ */
+ if(ps_rate_control_api->u1_is_first_frm == 0)
+ {
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->au1_avg_bitrate_changed[i] = 1;
+ }
+ }
+}
+
+/****************************************************************************
+ Function Name : irc_change_frame_rate
+ Description : Does the necessary changes whenever there is a change in
+ frame rate
+ *****************************************************************************/
+void irc_change_frame_rate(rate_control_api_t *ps_rate_control_api,
+ UWORD32 u4_frame_rate,
+ UWORD32 u4_src_ticks,
+ UWORD32 u4_tgt_ticks)
+{
+
+ if(ps_rate_control_api->e_rc_type != CONST_QP)
+ {
+ UWORD32 u4_frms_in_delay_prd = ((u4_frame_rate
+ * irc_get_cbr_buffer_delay(
+ ps_rate_control_api->ps_cbr_buffer))
+ / 1000000);
+ if((ps_rate_control_api->e_rc_type == VBR_STORAGE)
+ || (ps_rate_control_api->e_rc_type
+ == VBR_STORAGE_DVD_COMP))
+ {
+ irc_change_vbr_vbv_frame_rate(
+ ps_rate_control_api->ps_vbr_storage_vbv,
+ u4_frame_rate);
+ }
+ else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ irc_change_cbr_vbv_tgt_frame_rate(
+ ps_rate_control_api->ps_cbr_buffer, u4_frame_rate);
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ UWORD32 au4_num_pics_in_delay_prd[MAX_PIC_TYPE];
+ irc_change_vsp_tgt_ticks(&ps_rate_control_api->s_vbr_str_prms,
+ u4_tgt_ticks);
+ irc_change_vsp_src_ticks(&ps_rate_control_api->s_vbr_str_prms,
+ u4_src_ticks);
+ irc_change_vsp_fidp(&ps_rate_control_api->s_vbr_str_prms,
+ u4_frms_in_delay_prd);
+
+ irc_get_vsp_num_pics_in_dly_prd(
+ &ps_rate_control_api->s_vbr_str_prms,
+ au4_num_pics_in_delay_prd);
+ irc_change_cbr_vbv_tgt_frame_rate(
+ ps_rate_control_api->ps_cbr_buffer, u4_frame_rate);
+ irc_change_cbr_vbv_num_pics_in_delay_period(
+ ps_rate_control_api->ps_cbr_buffer,
+ au4_num_pics_in_delay_prd);
+ }
+
+ /*
+ * Bit Allocation Module: distribute the excess/deficit bits between the
+ * old and the new frame rate to all the remaining frames
+ */
+ irc_ba_change_remaining_bits_in_period(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ irc_ba_get_bit_rate(
+ ps_rate_control_api->ps_bit_allocation),
+ u4_frame_rate,
+ (WORD32 *)(ps_rate_control_api->au4_new_peak_bit_rate));
+ }
+}
+
+/****************************************************************************
+ Function Name : irc_change_frm_rate_for_bit_alloc
+ Description : Does the necessary changes only in the bit_allocation module
+ there is a change in frame rate
+ *****************************************************************************/
+void irc_change_frm_rate_for_bit_alloc(rate_control_api_t *ps_rate_control_api,
+ UWORD32 u4_frame_rate)
+{
+
+ if(ps_rate_control_api->e_rc_type != CONST_QP)
+ {
+ /*
+ * Bit Allocation Module: distribute the excess/deficit bits between the
+ * old and the new frame rate to all the remaining frames
+ */
+ irc_ba_change_remaining_bits_in_period(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ irc_ba_get_bit_rate(
+ ps_rate_control_api->ps_bit_allocation),
+ u4_frame_rate,
+ (WORD32 *)(ps_rate_control_api->au4_new_peak_bit_rate));
+
+ if(ps_rate_control_api->e_rc_type == VBR_STORAGE
+ || ps_rate_control_api->e_rc_type
+ == VBR_STORAGE_DVD_COMP)
+ {
+ irc_change_vbr_max_bits_per_tgt_frm(
+ ps_rate_control_api->ps_vbr_storage_vbv,
+ u4_frame_rate);
+ }
+ }
+}
+
+void irc_change_init_qp(rate_control_api_t *ps_rate_control_api,
+ UWORD8 *pu1_init_qp)
+{
+ WORD32 i;
+ /* Initialize the init_qp */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->au1_init_qp[i] = pu1_init_qp[i];
+ ps_rate_control_api->au1_prev_frm_qp[i] = pu1_init_qp[i];
+ }
+}
+
+void irc_change_min_max_qp(rate_control_api_t *ps_rate_control_api,
+ UWORD8 *pu1_min_max_qp)
+{
+ WORD32 i;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->au1_min_max_qp[(i << 1)] =
+ pu1_min_max_qp[(i << 1)];
+ ps_rate_control_api->au1_min_max_qp[(i << 1) + 1] = pu1_min_max_qp[(i
+ << 1) + 1];
+ }
+}
+
+/****************************************************************************
+ Function Name : irc_change_peak_bit_rate
+ Description : Does the necessary changes whenever there is a change in
+ peak bit rate
+ *****************************************************************************/
+WORD32 irc_change_peak_bit_rate(rate_control_api_t *ps_rate_control_api,
+ UWORD32 *pu4_peak_bit_rate)
+{
+ WORD32 i4_ret_val = RC_OK;
+ int i;
+
+ /*
+ * Buffer Mechanism Module: Re-initialize the number of bits consumed per
+ * frame
+ */
+ if(ps_rate_control_api->e_rc_type == VBR_STORAGE
+ || ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP)
+ {
+ /* Send the new peak bit rate and the old frame rate */
+ irc_change_vbr_vbv_bit_rate(ps_rate_control_api->ps_vbr_storage_vbv,
+ pu4_peak_bit_rate[0]);
+ irc_ba_change_ba_peak_bit_rate(ps_rate_control_api->ps_bit_allocation,
+ (WORD32 *)pu4_peak_bit_rate);
+
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ ps_rate_control_api->au4_new_peak_bit_rate[i] =
+ pu4_peak_bit_rate[i];
+ }
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ if(ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change)
+ {
+ /*
+ * Means that change in peak bit rate has been made twice before the
+ * previous change could take effect
+ */
+ i4_ret_val = RC_BENIGN_ERR;
+ }
+ /*
+ * If the change happens before encoding the first frame make the
+ * effect immediately else delay the effect
+ */
+ if(ps_rate_control_api->u1_is_first_frm)
+ {
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ ps_rate_control_api->au4_new_peak_bit_rate[i] =
+ pu4_peak_bit_rate[i];
+ }
+ irc_ba_change_ba_peak_bit_rate(
+ ps_rate_control_api->ps_bit_allocation,
+ (WORD32 *)pu4_peak_bit_rate);
+ irc_change_cbr_vbv_bit_rate(ps_rate_control_api->ps_cbr_buffer,
+ (WORD32 *)pu4_peak_bit_rate);
+ }
+ else
+ {
+ UWORD32 au4_num_pics_in_delay_prd[MAX_NUM_DRAIN_RATES];
+ /*
+ * Else store the number of frames after which the effect should
+ * happen and then update the peak bitrate
+ */
+ ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change =
+ irc_get_vsp_num_pics_in_dly_prd(
+ &ps_rate_control_api->s_vbr_str_prms,
+ au4_num_pics_in_delay_prd);
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ ps_rate_control_api->au4_new_peak_bit_rate[i] =
+ pu4_peak_bit_rate[i];
+ }
+ }
+ }
+
+ return (i4_ret_val);
+}
+
+void irc_change_buffer_delay(rate_control_api_t *ps_rate_control_api,
+ UWORD32 u4_buffer_delay)
+{
+ UWORD32 u4_frms_in_delay_prd = ((irc_ba_get_frame_rate(
+ ps_rate_control_api->ps_bit_allocation) * u4_buffer_delay)
+ / 1000000);
+
+ /* Initialize the rate control modules */
+ if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ irc_change_cbr_buffer_delay(ps_rate_control_api->ps_cbr_buffer,
+ u4_buffer_delay);
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STORAGE
+ || ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP)
+ {
+ UWORD32 au4_num_pics_in_delay_prd[MAX_PIC_TYPE];
+
+ irc_change_vsp_fidp(&ps_rate_control_api->s_vbr_str_prms,
+ u4_frms_in_delay_prd);
+
+ /* Get the number of pics of each type in delay period */
+ irc_get_vsp_num_pics_in_dly_prd(&ps_rate_control_api->s_vbr_str_prms,
+ au4_num_pics_in_delay_prd);
+
+ irc_change_cbr_vbv_num_pics_in_delay_period(
+ ps_rate_control_api->ps_cbr_buffer,
+ au4_num_pics_in_delay_prd);
+ }
+}
+
+/* Getter functions to get the current rate control parameters */
+UWORD32 irc_get_frame_rate(rate_control_api_t *ps_rate_control_api)
+{
+ return (irc_ba_get_frame_rate(ps_rate_control_api->ps_bit_allocation));
+}
+
+UWORD32 irc_get_bit_rate(rate_control_api_t *ps_rate_control_api)
+{
+ return (irc_ba_get_bit_rate(ps_rate_control_api->ps_bit_allocation));
+}
+
+UWORD32 irc_get_peak_bit_rate(rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_index)
+{
+ return (ps_rate_control_api->au4_new_peak_bit_rate[i4_index]);
+}
+
+UWORD32 irc_get_intra_frame_interval(rate_control_api_t *ps_rate_control_api)
+{
+ return (irc_pic_type_get_intra_frame_interval(
+ ps_rate_control_api->ps_pic_handling));
+}
+
+UWORD32 irc_get_inter_frame_interval(rate_control_api_t *ps_rate_control_api)
+{
+ return (irc_pic_type_get_inter_frame_interval(
+ ps_rate_control_api->ps_pic_handling));
+}
+
+rc_type_e irc_get_rc_type(rate_control_api_t *ps_rate_control_api)
+{
+ return (ps_rate_control_api->e_rc_type);
+}
+
+WORD32 irc_get_bits_per_frame(rate_control_api_t *ps_rate_control_api)
+{
+ WORD32 i4_bits_per_frm;
+
+ X_PROD_Y_DIV_Z(irc_ba_get_bit_rate(ps_rate_control_api->ps_bit_allocation),
+ (UWORD32)1000,
+ irc_ba_get_frame_rate(ps_rate_control_api->ps_bit_allocation),
+ i4_bits_per_frm);
+
+ return (i4_bits_per_frm);
+}
+
+UWORD32 irc_get_max_delay(rate_control_api_t *ps_rate_control_api)
+{
+ return (irc_get_cbr_buffer_delay(ps_rate_control_api->ps_cbr_buffer));
+}
+
+UWORD32 irc_get_seq_no(rate_control_api_t *ps_rate_control_api)
+{
+ return (irc_pic_type_get_disp_order_no(ps_rate_control_api->ps_pic_handling));
+}
+
+UWORD32 irc_get_rem_frames_in_gop(rate_control_api_t *ps_rate_control_api)
+{
+ WORD32 ai4_rem_frms_in_period[MAX_PIC_TYPE];
+ WORD32 j;
+ UWORD32 u4_rem_frms_in_period = 0;
+
+ /* Get the rem_frms_in_gop & the frms_in_gop from the pic_type state struct */
+ irc_pic_type_get_rem_frms_in_gop(ps_rate_control_api->ps_pic_handling,
+ ai4_rem_frms_in_period);
+
+ /* Depending on the number of gops in a period, find the num_frms_in_prd */
+ for(j = 0; j < MAX_PIC_TYPE; j++)
+ {
+ u4_rem_frms_in_period += ai4_rem_frms_in_period[j];
+ }
+
+ return (u4_rem_frms_in_period);
+}
+
+/****************************************************************************
+ Function Name : irc_flush_buf_frames
+ Description : API call to flush the buffered up frames
+ *****************************************************************************/
+void irc_flush_buf_frames(rate_control_api_t *ps_rate_control_api)
+{
+ irc_flush_frame_from_pic_stack(ps_rate_control_api->ps_pic_handling);
+}
+
+/****************************************************************************
+ Function Name : irc_flush_buf_frames
+ Description : API call to flush the buffered up frames
+ *****************************************************************************/
+
+void irc_post_encode_frame_skip(rate_control_api_t *ps_rate_control_api,
+ picture_type_e e_pic_type)
+{
+ irc_skip_encoded_frame(ps_rate_control_api->ps_pic_handling, e_pic_type);
+}
+
+/****************************************************************************
+ Function Name : irc_force_I_frame
+ Description : API call to force an I frame
+ *****************************************************************************/
+void irc_force_I_frame(rate_control_api_t *ps_rate_control_api)
+{
+ irc_set_force_I_frame_flag(ps_rate_control_api->ps_pic_handling);
+}
+
+/****************************************************************************
+ * Function Name : rc_get_rem_bits_in_gop
+ * Description : API call to get remaining bits in GOP
+ * *****************************************************************************/
+WORD32 irc_get_rem_bits_in_period(rate_control_api_t *ps_rate_control_api)
+{
+ return (irc_ba_get_rem_bits_in_period(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling));
+}
+
+/****************************************************************************
+ * Function Name : irc_get_vbv_buf_fullness
+ * Description : API call to get VBV buffer fullness
+ ******************************************************************************/
+WORD32 irc_get_vbv_buf_fullness(rate_control_api_t *ps_rate_control_api)
+{
+ return (irc_get_cur_vbv_buf_size(ps_rate_control_api->ps_vbr_storage_vbv));
+}
+
+WORD32 irc_get_vbv_buf_size(rate_control_api_t *ps_rate_control_api)
+{
+ if(ps_rate_control_api->e_rc_type == CBR_NLDRC
+ || ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ return (irc_get_cbr_buffer_size(ps_rate_control_api->ps_cbr_buffer));
+ }
+ else
+ {
+ return (irc_get_max_vbv_buf_size(
+ ps_rate_control_api->ps_vbr_storage_vbv));
+ }
+}
+
+WORD32 irc_get_vbv_fulness_with_cur_bits(rate_control_api_t *ps_rate_control_api,
+ UWORD32 u4_bits)
+{
+ return (irc_vbv_get_vbv_buf_fullness(
+ ps_rate_control_api->ps_vbr_storage_vbv, u4_bits));
+}
+
+void irc_set_avg_mb_act(rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_avg_activity)
+{
+ irc_mb_update_frame_level(ps_rate_control_api->ps_mb_rate_control,
+ i4_avg_activity);
+ return;
+}
diff --git a/encoder/irc_rate_control_api.h b/encoder/irc_rate_control_api.h
new file mode 100755
index 0000000..0173037
--- /dev/null
+++ b/encoder/irc_rate_control_api.h
@@ -0,0 +1,188 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _RATE_CONTROL_API_H_
+#define _RATE_CONTROL_API_H_
+
+#define RC_OK 0
+#define RC_FAIL -1
+#define RC_BENIGN_ERR -2
+
+/* This file should only contain RC API function declarations */
+
+typedef struct rate_control_api_t *rate_control_handle;
+
+WORD32 irc_rate_control_num_fill_use_free_memtab(rate_control_handle *pps_rate_control_api,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+void irc_initialise_rate_control(rate_control_handle ps_rate_control_api,
+ rc_type_e e_rate_control_type,
+ UWORD8 u1_is_mb_level_rc_on,
+ UWORD32 u4_avg_bit_rate,
+ UWORD32 *pu4_peak_bit_rate,
+ UWORD32 u4_min_bit_rate,
+ UWORD32 u4_frame_rate,
+ UWORD32 u4_max_delay,
+ UWORD32 u4_intra_frame_interval,
+ UWORD8 *pu1_init_qp,
+ UWORD32 u4_max_vbv_buff_size,
+ WORD32 i4_max_inter_frm_int,
+ WORD32 i4_is_gop_closed,
+ UWORD8 *pu1_min_max_qp,
+ WORD32 i4_use_est_intra_sad,
+ UWORD32 u4_src_ticks,
+ UWORD32 u4_tgt_ticks);
+
+/*****************************************************************************
+ Process level API fuctions (FRAME LEVEL)
+ *****************************************************************************/
+void irc_flush_buf_frames(rate_control_handle ps_rate_control_api);
+
+void irc_post_encode_frame_skip(rate_control_handle ps_rate_control_api,
+ picture_type_e e_pic_type);
+
+void irc_add_picture_to_stack(rate_control_handle rate_control_api,
+ WORD32 i4_enc_pic_id);
+
+void irc_add_picture_to_stack_re_enc(rate_control_handle rate_control_api,
+ WORD32 i4_enc_pic_id,
+ picture_type_e e_pic_type);
+
+void irc_get_picture_details(rate_control_handle rate_control_api,
+ WORD32 *pi4_pic_id,
+ WORD32 *pi4_pic_disp_order_no,
+ picture_type_e *pe_pic_type);
+
+/* Gets the frame level Qp */
+UWORD8 irc_get_frame_level_qp(rate_control_handle rate_control_api,
+ picture_type_e pic_type,
+ WORD32 i4_max_frm_bits);
+
+vbv_buf_status_e irc_get_buffer_status(rate_control_handle rate_control_api,
+ WORD32 i4_total_frame_bits,
+ picture_type_e e_pic_type,
+ WORD32 *pi4_num_bits_to_prevent_vbv_underflow);
+
+WORD32 irc_get_prev_frm_est_bits(rate_control_handle ps_rate_control_api);
+
+void irc_update_pic_handling_state(rate_control_handle ps_rate_control_api,
+ picture_type_e e_pic_type);
+
+void irc_update_frame_level_info(rate_control_handle ps_rate_control_api,
+ picture_type_e e_pic_type,
+ WORD32 *pi4_mb_type_sad,
+ WORD32 i4_total_frame_bits,
+ WORD32 i4_model_updation_hdr_bits,
+ WORD32 *pi4_mb_type_tex_bits,
+ WORD32 *pi4_tot_mb_type_qp,
+ WORD32 *pi4_tot_mb_in_type,
+ WORD32 i4_avg_activity,
+ UWORD8 u1_is_scd,
+ WORD32 i4_is_it_a_skip,
+ WORD32 i4_intra_frm_cost,
+ WORD32 i4_is_pic_handling_done);
+
+/*****************************************************************************
+ MB LEVEL API (just wrapper fucntions)
+ *****************************************************************************/
+
+void irc_init_mb_rc_frame_level(rate_control_handle ps_rate_control_api,
+ UWORD8 u1_frame_qp);/* Current frame qp*/
+
+void irc_get_mb_level_qp(rate_control_handle ps_rate_control_api,
+ WORD32 i4_cur_mb_activity,
+ WORD32 *pi4_mb_qp,
+ picture_type_e e_pic_type);
+
+WORD32 irc_get_bits_to_stuff(rate_control_handle ps_rate_control_api,
+ WORD32 i4_tot_consumed_bits,
+ picture_type_e e_pic_type);
+
+/******************************************************************************
+ Control Level API functions
+ Logic: The control call sets the state structure of the rate control api
+ accordingly such that the next process call would implement the same.
+ ******************************************************************************/
+
+void irc_change_inter_frm_int_call(rate_control_handle ps_rate_control_api,
+ WORD32 i4_inter_frm_int);
+
+void irc_change_intra_frm_int_call(rate_control_handle ps_rate_control_api,
+ WORD32 i4_intra_frm_int);
+
+void irc_change_avg_bit_rate(rate_control_handle ps_rate_control_api,
+ UWORD32 u4_average_bit_rate);
+
+void irc_change_frame_rate(rate_control_handle ps_rate_control_api,
+ UWORD32 u4_frame_rate,
+ UWORD32 u4_src_ticks,
+ UWORD32 u4_target_ticks);
+
+void irc_change_frm_rate_for_bit_alloc(rate_control_handle ps_rate_control_api,
+ UWORD32 u4_frame_rate);
+
+void irc_change_init_qp(rate_control_handle ps_rate_control_api,
+ UWORD8 *init_qp);
+
+WORD32 irc_change_peak_bit_rate(rate_control_handle ps_rate_control_api,
+ UWORD32 *u4_peak_bit_rate);
+
+void irc_change_buffer_delay(rate_control_handle ps_rate_control_api,
+ UWORD32 u4_buffer_delay);
+
+void irc_force_I_frame(rate_control_handle ps_rate_control_api);
+
+void irc_change_min_max_qp(rate_control_handle ps_rate_control_api,
+ UWORD8 *u1_min_max_qp);
+
+/********************************************************************************
+ Getter functions
+ For getting the current state of the rate control structures
+ ********************************************************************************/
+
+UWORD32 irc_get_frame_rate(rate_control_handle ps_rate_control_api);
+
+UWORD32 irc_get_bit_rate(rate_control_handle ps_rate_control_api);
+
+UWORD32 irc_get_intra_frame_interval(rate_control_handle ps_rate_control_api);
+
+UWORD32 irc_get_inter_frame_interval(rate_control_handle ps_rate_control_api);
+
+rc_type_e irc_get_rc_type(rate_control_handle ps_rate_control_api);
+
+WORD32 irc_get_bits_per_frame(rate_control_handle ps_rate_control_api);
+
+UWORD32 irc_get_peak_bit_rate(rate_control_handle ps_rate_control_api,
+ WORD32 i4_index);
+
+UWORD32 irc_get_max_delay(rate_control_handle ps_rate_control_api);
+
+UWORD32 irc_get_seq_no(rate_control_handle ps_rate_control_api);
+
+WORD32 irc_get_rem_bits_in_period(rate_control_handle ps_rate_control_api);
+
+WORD32 irc_get_vbv_buf_fullness(rate_control_handle ps_rate_control_api);
+
+WORD32 irc_get_vbv_buf_size(rate_control_handle ps_rate_control_api);
+
+WORD32 irc_get_vbv_fulness_with_cur_bits(rate_control_handle ps_rate_control_api,
+ UWORD32 u4_bits);
+#endif
diff --git a/encoder/irc_rate_control_api_structs.h b/encoder/irc_rate_control_api_structs.h
new file mode 100755
index 0000000..ba39e7f
--- /dev/null
+++ b/encoder/irc_rate_control_api_structs.h
@@ -0,0 +1,93 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _RATE_CONTROL_API_STRUCTS_H_
+#define _RATE_CONTROL_API_STRUCTS_H_
+
+/*
+ * The following definitions were present in irc_cntrl_param.h, moved to this
+ * file as it is used by irc_rate_control_api.c
+ */
+
+/* num_frm_in_period = BIT_ALLOC_PERIOD*intra_frame_interval */
+#define VBR_BIT_ALLOC_PERIOD 3
+#define CBR_BIT_ALLOC_PERIOD 1
+
+/* Rate control state structure */
+typedef struct rate_control_api_t
+{
+ /* RC Algorithm */
+ rc_type_e e_rc_type;
+
+ /* Whether MB level rc is enabled or not */
+ UWORD8 u1_is_mb_level_rc_on;
+
+ /* Picture handling struct */
+ pic_handling_handle ps_pic_handling;
+
+ /* Model struct for I and P frms */
+ rc_rd_model_handle aps_rd_model[MAX_PIC_TYPE];
+
+ /* VBR storage VBV structure */
+ vbr_storage_vbv_handle ps_vbr_storage_vbv;
+
+ /* Calculate the estimated SAD */
+ est_sad_handle ps_est_sad;
+
+ /* Allocation of bits for each frame */
+ bit_allocation_handle ps_bit_allocation;
+
+ /* Init Qp(also used for Const Qp scenarios) */
+ UWORD8 au1_init_qp[MAX_PIC_TYPE];
+
+ /* MB Level rate control state structure */
+ mb_rate_control_handle ps_mb_rate_control;
+
+ UWORD8 au1_is_first_frm_coded[MAX_PIC_TYPE];
+
+ UWORD8 au1_prev_frm_qp[MAX_PIC_TYPE];
+
+ cbr_buffer_handle ps_cbr_buffer;
+
+ UWORD8 u1_scd_detected;
+
+ UWORD8 u1_frm_qp_after_scd;
+
+ UWORD8 au1_avg_bitrate_changed[MAX_PIC_TYPE];
+
+ UWORD8 u1_is_first_frm;
+
+ UWORD8 au1_min_max_qp[(MAX_PIC_TYPE << 1)];
+
+ WORD32 i4_prev_frm_est_bits;
+
+ vbr_str_prms_t s_vbr_str_prms;
+
+ /* Store the values which are to be impacted after a delay */
+ UWORD32 u4_frms_in_delay_prd_for_peak_bit_rate_change;
+
+ UWORD32 au4_new_peak_bit_rate[MAX_NUM_DRAIN_RATES];
+
+ picture_type_e prev_ref_pic_type;
+
+} rate_control_api_t;
+
+#endif/*_RATE_CONTROL_API_STRUCTS_H_*/
+
diff --git a/encoder/irc_rd_model.c b/encoder/irc_rd_model.c
new file mode 100755
index 0000000..f5c0737
--- /dev/null
+++ b/encoder/irc_rd_model.c
@@ -0,0 +1,565 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/****************************************************************************/
+/* File Name : irc_rd_model.c */
+/* */
+/* Description : Implall the Functions to Model the */
+/* Rate Distortion Behaviour of the Codec over the Last */
+/* Few Frames. */
+/* */
+/* List of Functions : irc_update_frame_rd_model */
+/* estimate_mpeg2_qp_for_resbits */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 21 06 2006 Sarat Initial Version */
+/****************************************************************************/
+
+/* System include files */
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "math.h"
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_common.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_rd_model.h"
+#include "irc_rd_model_struct.h"
+
+
+WORD32 irc_rd_model_num_fill_use_free_memtab(rc_rd_model_t **pps_rc_rd_model,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static rc_rd_model_t s_rc_rd_model_temp;
+
+ /*
+ * Hack for al alloc, during which we don't have any state memory.
+ * Dereferencing can cause issues
+ */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_rc_rd_model) = &s_rc_rd_model_temp;
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(rc_rd_model_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**)pps_rc_rd_model, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+void irc_init_frm_rc_rd_model(rc_rd_model_t *ps_rd_model,
+ UWORD8 u1_max_frames_modelled)
+{
+
+ ps_rd_model->u1_num_frms_in_model = 0;
+ ps_rd_model->u1_curr_frm_counter = 0;
+ ps_rd_model->u1_max_frms_to_model = u1_max_frames_modelled;
+
+ ps_rd_model->model_coeff_a_lin_wo_int = 0;
+ ps_rd_model->model_coeff_b_lin_wo_int = 0;
+ ps_rd_model->model_coeff_c_lin_wo_int = 0;
+}
+
+void irc_reset_frm_rc_rd_model(rc_rd_model_t *ps_rd_model)
+{
+ ps_rd_model->u1_num_frms_in_model = 0;
+ ps_rd_model->u1_curr_frm_counter = 0;
+
+ ps_rd_model->model_coeff_a_lin_wo_int = 0;
+ ps_rd_model->model_coeff_b_lin_wo_int = 0;
+ ps_rd_model->model_coeff_c_lin_wo_int = 0;
+}
+
+static UWORD8 find_model_coeffs(UWORD32 *pi4_res_bits,
+ UWORD32 *pi4_sad_h264,
+ UWORD8 *pu1_num_skips,
+ UWORD8 *pui_avg_mpeg2_qp,
+ UWORD8 u1_num_frms,
+ UWORD8 u1_model_used,
+ WORD8 *pi1_frame_index,
+ model_coeff *pmc_model_coeff,
+ model_coeff *pmc_model_coeff_lin,
+ model_coeff *pmc_model_coeff_lin_wo_int,
+ rc_rd_model_t *ps_rd_model)
+{
+ UWORD32 i;
+ UWORD8 u1_num_frms_used = 0;
+ UWORD8 u1_frm_indx;
+
+#if !(ENABLE_QUAD_RC_MODEL||ENABLE_LIN_MODEL_WITH_INTERCEPT)
+ UNUSED(pu1_num_skips);
+ UNUSED(pmc_model_coeff);
+ UNUSED(pmc_model_coeff_lin);
+#endif
+ float sum_y = 0;
+ float sum_x_y = 0;
+ float sum_x2_y = 0;
+ float sum_x = 0;
+ float sum_x2 = 0;
+ float sum_x3 = 0;
+ float sum_x4 = 0;
+
+ float x0, y0;
+ float model_coeff_a = 0.0, model_coeff_b = 0.0, model_coeff_c = 0.0;
+
+ for(i = 0; i < u1_num_frms; i++)
+ {
+ if(-1 == pi1_frame_index[i])
+ continue;
+
+ u1_frm_indx = (UWORD8)pi1_frame_index[i];
+
+ y0 = (float)(pi4_res_bits[u1_frm_indx]);
+ x0 = (float)(pi4_sad_h264[u1_frm_indx]
+ / (float)pui_avg_mpeg2_qp[u1_frm_indx]);
+
+ sum_y += y0;
+ sum_x_y += x0 * y0;
+ sum_x2_y += x0 * x0 * y0;
+ sum_x += x0;
+ sum_x2 += x0 * x0;
+ sum_x3 += x0 * x0 * x0;
+ sum_x4 += x0 * x0 * x0 * x0;
+ u1_num_frms_used++;
+ }
+
+ sum_y /= u1_num_frms_used;
+ sum_x_y /= u1_num_frms_used;
+ sum_x2_y /= u1_num_frms_used;
+ sum_x /= u1_num_frms_used;
+ sum_x2 /= u1_num_frms_used;
+ sum_x3 /= u1_num_frms_used;
+ sum_x4 /= u1_num_frms_used;
+
+ {
+ UWORD8 u1_curr_frame_index;
+ UWORD8 u1_avgqp_prvfrm;
+ UWORD32 u4_prevfrm_bits, u4_prevfrm_sad;
+
+ u1_curr_frame_index = ps_rd_model->u1_curr_frm_counter;
+ if(0 == u1_curr_frame_index)
+ u1_curr_frame_index = (MAX_FRAMES_MODELLED - 1);
+ else
+ u1_curr_frame_index--;
+
+ u1_avgqp_prvfrm = ps_rd_model->pu1_avg_qp[u1_curr_frame_index];
+ u4_prevfrm_bits = ps_rd_model->pi4_res_bits[u1_curr_frame_index];
+ u4_prevfrm_sad = ps_rd_model->pi4_sad[u1_curr_frame_index];
+
+ if(0 != u4_prevfrm_sad)
+ model_coeff_a = (float)(u4_prevfrm_bits * u1_avgqp_prvfrm)
+ / u4_prevfrm_sad;
+ else
+ model_coeff_a = 0;
+
+ model_coeff_b = 0;
+ model_coeff_c = 0;
+
+ pmc_model_coeff_lin_wo_int[0] = model_coeff_b;
+ pmc_model_coeff_lin_wo_int[1] = model_coeff_a;
+ pmc_model_coeff_lin_wo_int[2] = model_coeff_c;
+ }
+
+ return u1_model_used;
+}
+
+static void irc_update_frame_rd_model(rc_rd_model_t *ps_rd_model)
+{
+ WORD8 pi1_frame_index[MAX_FRAMES_MODELLED],
+ pi1_frame_index_initial[MAX_FRAMES_MODELLED];
+
+ UWORD8 u1_num_skips_temp;
+ UWORD8 u1_avg_mpeg2_qp_temp, u1_min_mpeg2_qp, u1_max_mpeg2_qp;
+ UWORD8 u1_num_frms_input, u1_num_active_frames, u1_reject_frame;
+ UWORD32 u4_num_skips;
+
+ UWORD8 u1_min2_mpeg2_qp, u1_max2_mpeg2_qp;
+ UWORD8 u1_min_qp_frame_indx, u1_max_qp_frame_indx;
+ UWORD8 pu1_num_frames[MPEG2_QP_ELEM];
+ model_coeff model_coeff_array[3], model_coeff_array_lin[3],
+ model_coeff_array_lin_wo_int[3];
+ UWORD32 i;
+ UWORD8 u1_curr_frame_index;
+
+ u1_curr_frame_index = ps_rd_model->u1_curr_frm_counter;
+
+ ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+
+ if(0 == u1_curr_frame_index)
+ u1_curr_frame_index = (MAX_FRAMES_MODELLED - 1);
+ else
+ u1_curr_frame_index--;
+
+ /************************************************************************/
+ /* Rearrange data to be fed into a Linear Regression Module */
+ /* Module finds a,b,c such that */
+ /* y = ax + bx^2 + c */
+ /************************************************************************/
+ u4_num_skips = 0;
+ u1_num_frms_input = 0;
+ memset(pu1_num_frames, 0, MPEG2_QP_ELEM);
+ memset(pi1_frame_index, -1, MAX_FRAMES_MODELLED);
+ u1_min_mpeg2_qp = MAX_MPEG2_QP;
+ u1_max_mpeg2_qp = 0;
+
+ u1_num_active_frames = ps_rd_model->u1_num_frms_in_model;
+ if(u1_num_active_frames > MAX_ACTIVE_FRAMES)
+ {
+ u1_num_active_frames = MAX_ACTIVE_FRAMES;
+ }
+
+ /************************************************************************/
+ /* Choose the set of Points to be used for MSE fit of Quadratic model */
+ /* Points chosen are spread across the Qp range. Max of 2 points are */
+ /* chosen for a Qp. */
+ /************************************************************************/
+ for(i = 0; i < u1_num_active_frames; i++)
+ {
+ u1_reject_frame = 0;
+ u1_num_skips_temp = ps_rd_model->pu1_num_skips[u1_curr_frame_index];
+ u1_avg_mpeg2_qp_temp = ps_rd_model->pu1_avg_qp[u1_curr_frame_index];
+
+ if((0 == u4_num_skips) && (0 != u1_num_skips_temp))
+ u1_reject_frame = 1;
+ if((1 == u4_num_skips) && (u1_num_skips_temp > 1))
+ u1_reject_frame = 1;
+ if(pu1_num_frames[u1_avg_mpeg2_qp_temp] >= 2)
+ u1_reject_frame = 1;
+
+ if(0 == i)
+ u1_reject_frame = 0;
+
+ if(0 == u1_reject_frame)
+ {
+ pi1_frame_index[u1_num_frms_input] = (WORD8)u1_curr_frame_index;
+ pu1_num_frames[u1_avg_mpeg2_qp_temp] += 1;
+
+ if(u1_min_mpeg2_qp > u1_avg_mpeg2_qp_temp)
+ u1_min_mpeg2_qp = u1_avg_mpeg2_qp_temp;
+ if(u1_max_mpeg2_qp < u1_avg_mpeg2_qp_temp)
+ u1_max_mpeg2_qp = u1_avg_mpeg2_qp_temp;
+
+ u1_num_frms_input++;
+ }
+
+ if(0 == u1_curr_frame_index)
+ u1_curr_frame_index = (MAX_FRAMES_MODELLED - 1);
+ else
+ u1_curr_frame_index--;
+ }
+
+ /************************************************************************/
+ /* Add Pivot Points to the Data set to be used for finding Quadratic */
+ /* Model Coeffs. These will help in constraining the shape of Quadratic*/
+ /* to adapt too much to the Local deviations. */
+ /************************************************************************/
+ u1_min2_mpeg2_qp = u1_min_mpeg2_qp;
+ u1_max2_mpeg2_qp = u1_max_mpeg2_qp;
+ u1_min_qp_frame_indx = INVALID_FRAME_INDEX;
+ u1_max_qp_frame_indx = INVALID_FRAME_INDEX;
+
+ /* Loop runnning over the Stored Frame Level Data
+ to find frames of MinQp and MaxQp */
+ for(; i < ps_rd_model->u1_num_frms_in_model; i++)
+ {
+ u1_num_skips_temp = ps_rd_model->pu1_num_skips[u1_curr_frame_index];
+ u1_avg_mpeg2_qp_temp = ps_rd_model->pu1_avg_qp[u1_curr_frame_index];
+
+ if(((0 == u4_num_skips) && (0 != u1_num_skips_temp))
+ || ((1 == u4_num_skips) && (u1_num_skips_temp > 1)))
+ continue;
+
+ if(u1_min2_mpeg2_qp > u1_avg_mpeg2_qp_temp)
+ {
+ u1_min2_mpeg2_qp = u1_avg_mpeg2_qp_temp;
+ u1_min_qp_frame_indx = u1_curr_frame_index;
+ }
+ if(u1_max2_mpeg2_qp < u1_avg_mpeg2_qp_temp)
+ {
+ u1_max2_mpeg2_qp = u1_avg_mpeg2_qp_temp;
+ u1_max_qp_frame_indx = u1_curr_frame_index;
+ }
+ if(0 == u1_curr_frame_index)
+ u1_curr_frame_index = (MAX_FRAMES_MODELLED - 1);
+ else
+ u1_curr_frame_index--;
+ }
+
+ /* Add the Chosen Points to the regression data set */
+ if(INVALID_FRAME_INDEX != u1_min_qp_frame_indx)
+ {
+ pi1_frame_index[u1_num_frms_input] = (WORD8)u1_min_qp_frame_indx;
+ u1_num_frms_input++;
+ }
+ if(INVALID_FRAME_INDEX != u1_max_qp_frame_indx)
+ {
+ pi1_frame_index[u1_num_frms_input] = (WORD8)u1_max_qp_frame_indx;
+ u1_num_frms_input++;
+ }
+ memcpy(pi1_frame_index_initial, pi1_frame_index, MAX_FRAMES_MODELLED);
+
+ /***** Call the Module to Return the Coeffs for the Fed Data *****/
+ ps_rd_model->u1_model_used = find_model_coeffs(ps_rd_model->pi4_res_bits,
+ ps_rd_model->pi4_sad,
+ ps_rd_model->pu1_num_skips,
+ ps_rd_model->pu1_avg_qp,
+ u1_num_frms_input,
+ ps_rd_model->u1_model_used,
+ pi1_frame_index,
+ model_coeff_array,
+ model_coeff_array_lin,
+ model_coeff_array_lin_wo_int,
+ ps_rd_model);
+
+ ps_rd_model->model_coeff_b_lin_wo_int = model_coeff_array_lin_wo_int[0];
+ ps_rd_model->model_coeff_a_lin_wo_int = model_coeff_array_lin_wo_int[1];
+ ps_rd_model->model_coeff_c_lin_wo_int = model_coeff_array_lin_wo_int[2];
+}
+
+UWORD32 irc_estimate_bits_for_qp(rc_rd_model_t *ps_rd_model,
+ UWORD32 u4_estimated_sad,
+ UWORD8 u1_avg_qp)
+{
+ float fl_num_bits = 0;
+
+ fl_num_bits = ps_rd_model->model_coeff_a_lin_wo_int
+ * ((float)(u4_estimated_sad / u1_avg_qp));
+
+ return ((UWORD32)fl_num_bits);
+}
+
+UWORD8 irc_find_qp_for_target_bits(rc_rd_model_t *ps_rd_model,
+ UWORD32 u4_target_res_bits,
+ UWORD32 u4_estimated_sad,
+ UWORD8 u1_min_qp,
+ UWORD8 u1_max_qp)
+{
+ UWORD8 u1_qp;
+ float x_value = 1.0, f_qp;
+
+ ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+
+ {
+ x_value = (float)u4_target_res_bits
+ / ps_rd_model->model_coeff_a_lin_wo_int;
+ }
+
+ if(0 != x_value)
+ f_qp = u4_estimated_sad / x_value;
+ else
+ f_qp = 255;
+
+ if(f_qp > 255)
+ f_qp = 255;
+
+ /* Truncating the QP to the Max and Min Qp values possible */
+ if(f_qp < u1_min_qp)
+ f_qp = u1_min_qp;
+ if(f_qp > u1_max_qp)
+ f_qp = u1_max_qp;
+
+ u1_qp = (UWORD8)(f_qp + 0.5);
+
+ return u1_qp;
+}
+
+void irc_add_frame_to_rd_model(rc_rd_model_t *ps_rd_model,
+ UWORD32 i4_res_bits,
+ UWORD8 u1_avg_mp2qp,
+ UWORD32 i4_sad_h264,
+ UWORD8 u1_num_skips)
+{
+ UWORD8 u1_curr_frame_index;
+ u1_curr_frame_index = ps_rd_model->u1_curr_frm_counter;
+
+ /*Insert the Present Frame Data into the RD Model State Memory*/
+ ps_rd_model->pi4_res_bits[u1_curr_frame_index] = i4_res_bits;
+ ps_rd_model->pi4_sad[u1_curr_frame_index] = i4_sad_h264;
+ ps_rd_model->pu1_num_skips[u1_curr_frame_index] = u1_num_skips;
+ ps_rd_model->pu1_avg_qp[u1_curr_frame_index] = u1_avg_mp2qp;
+
+ ps_rd_model->u1_curr_frm_counter++;
+ if(MAX_FRAMES_MODELLED == ps_rd_model->u1_curr_frm_counter)
+ ps_rd_model->u1_curr_frm_counter = 0;
+
+ if(ps_rd_model->u1_num_frms_in_model < ps_rd_model->u1_max_frms_to_model)
+ {
+ ps_rd_model->u1_num_frms_in_model++;
+ }
+ irc_update_frame_rd_model(ps_rd_model);
+}
+
+/*****************************************************************************
+ *Function Name : irc_calc_per_frm_bits
+ *Description :
+ *Inputs : pu2_num_pics_of_a_pic_type
+ * - pointer to RC api pointer
+ * pu2_num_pics_of_a_pic_type
+ * - N1, N2,...Nk
+ * pu1_update_pic_type_model
+ * - flag which tells whether or not to update model
+ * coefficients of a particular pic-type
+ * u1_num_pic_types
+ * - value of k
+ * pu4_num_skip_of_a_pic_type
+ * - the number of skips of that pic-type. It "may" be used to
+ * update the model coefficients at a later point. Right now
+ * it is not being used at all.
+ * u1_base_pic_type
+ * - base pic type index wrt which alpha & beta are calculated
+ * pfl_gamma
+ * - gamma_i = beta_i / alpha_i
+ * pfl_eta
+ * -
+ * u1_curr_pic_type
+ * - the current pic-type for which the targetted bits need to
+ * be computed
+ * u4_bits_for_sub_gop
+ * - the number of bits to be consumed for the remaining part of
+ * sub-gop
+ * u4_curr_estimated_sad
+ * -
+ * pu1_curr_pic_type_qp
+ * - output of this function
+ *****************************************************************************/
+
+WORD32 irc_calc_per_frm_bits(rc_rd_model_t *ps_rd_model,
+ UWORD16 *pu2_num_pics_of_a_pic_type,
+ UWORD8 *pu1_update_pic_type_model,
+ UWORD8 u1_num_pic_types,
+ UWORD32 *pu4_num_skip_of_a_pic_type,
+ UWORD8 u1_base_pic_type,
+ float *pfl_gamma,
+ float *pfl_eta,
+ UWORD8 u1_curr_pic_type,
+ UWORD32 u4_bits_for_sub_gop,
+ UWORD32 u4_curr_estimated_sad,
+ UWORD8 *pu1_curr_pic_type_qp)
+{
+ WORD32 i4_per_frm_bits_Ti;
+ UWORD8 u1_i;
+ rc_rd_model_t *ps_rd_model_of_pic_type;
+
+ UNUSED(pu4_num_skip_of_a_pic_type);
+ UNUSED(u1_base_pic_type);
+
+ /* First part of this function updates all the model coefficients */
+ /*for all the pic-types */
+ {
+ for(u1_i = 0; u1_i < u1_num_pic_types; u1_i++)
+ {
+ if((0 != pu2_num_pics_of_a_pic_type[u1_i])
+ && (1 == pu1_update_pic_type_model[u1_i]))
+ {
+ irc_update_frame_rd_model(&ps_rd_model[u1_i]);
+ }
+ }
+ }
+
+ /*
+ * The second part of this function deals with solving the
+ * equation using all the pic-types models
+ */
+ {
+ UWORD8 u1_combined_model_used;
+
+ /* solve the equation */
+ {
+ model_coeff eff_A;
+ float fl_sad_by_qp_base;
+ float fl_sad_by_qp_curr_frm = 1.0;
+ float fl_qp_curr_frm;
+ float fl_bits_for_curr_frm = 0;
+
+
+
+ /* If the combined chosen model is linear model without an intercept */
+
+ u1_combined_model_used = PREV_FRAME_MODEL;
+ {
+ eff_A = 0.0;
+
+ for(u1_i = 0; u1_i < u1_num_pic_types; u1_i++)
+ {
+ ps_rd_model_of_pic_type = ps_rd_model + u1_i;
+
+ eff_A += ((pfl_eta[u1_i]
+ + pu2_num_pics_of_a_pic_type[u1_i]- 1)
+ * ps_rd_model_of_pic_type->model_coeff_a_lin_wo_int
+ * pfl_gamma[u1_i]);
+ }
+
+ fl_sad_by_qp_base = u4_bits_for_sub_gop / eff_A;
+
+ fl_sad_by_qp_curr_frm = fl_sad_by_qp_base
+ * pfl_gamma[u1_curr_pic_type]
+ * pfl_eta[u1_curr_pic_type];
+
+ ps_rd_model_of_pic_type = ps_rd_model + u1_curr_pic_type;
+
+ fl_bits_for_curr_frm =
+ ps_rd_model_of_pic_type->model_coeff_a_lin_wo_int
+ * fl_sad_by_qp_curr_frm;
+ }
+
+ /*
+ * Store the model that was finally used to calculate Qp.
+ * This is so that the same model is used in further calculations
+ * for this picture.
+ */
+ ps_rd_model_of_pic_type = ps_rd_model + u1_curr_pic_type;
+ ps_rd_model_of_pic_type->u1_model_used = u1_combined_model_used;
+
+ i4_per_frm_bits_Ti = (WORD32)(fl_bits_for_curr_frm + 0.5);
+
+ if(fl_sad_by_qp_curr_frm > 0)
+ fl_qp_curr_frm = (float)u4_curr_estimated_sad
+ / fl_sad_by_qp_curr_frm;
+ else
+ fl_qp_curr_frm = 255;
+
+ if(fl_qp_curr_frm > 255)
+ fl_qp_curr_frm = 255;
+
+ *pu1_curr_pic_type_qp = (fl_qp_curr_frm + 0.5);
+
+ }
+ }
+ return (i4_per_frm_bits_Ti);
+}
+
+model_coeff irc_get_linear_coefficient(rc_rd_model_t *ps_rd_model)
+{
+ return (ps_rd_model->model_coeff_a_lin_wo_int);
+}
+
+
diff --git a/encoder/irc_rd_model.h b/encoder/irc_rd_model.h
new file mode 100755
index 0000000..8be31c1
--- /dev/null
+++ b/encoder/irc_rd_model.h
@@ -0,0 +1,98 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Name : irc_rd_model.h */
+/* */
+/* Description : Implements all the Functions to Model the */
+/* Rate Distortion Behaviour of the Codec over the Last */
+/* Few Frames. */
+/* */
+/* List of Functions : irc_update_frame_rd_model */
+/* estimate_mpeg2_qp_for_resbits */
+/* update_mb_rd_model */
+/* find_model_coeffs */
+/* refine_set_of_points */
+/* init_mb_rd_model */
+/* irc_add_frame_to_rd_model */
+/* irc_find_qp_for_target_bits */
+/* */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 21 06 2006 Sarat Initial Version */
+/*****************************************************************************/
+
+#ifndef RC_RD_MODEL
+#define RC_RD_MODEL
+
+#define MAX_FRAMES_MODELLED 16
+
+typedef float model_coeff;
+typedef struct rc_rd_model_t *rc_rd_model_handle;
+
+WORD32 irc_rd_model_num_fill_use_free_memtab(rc_rd_model_handle *pps_rc_rd_model,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+/* Interface Functions */
+/* Initialise the rate distortion model */
+void irc_init_frm_rc_rd_model(rc_rd_model_handle ps_rd_model,
+ UWORD8 u1_max_frames_modelled);
+
+/* Reset the rate distortion model */
+void irc_reset_frm_rc_rd_model(rc_rd_model_handle ps_rd_model);
+
+/* Returns the Qp to be used for the given bits and SAD */
+UWORD8 irc_find_qp_for_target_bits(rc_rd_model_handle ps_rd_model,
+ UWORD32 u4_target_res_bits,
+ UWORD32 u4_estimated_sad,
+ UWORD8 u1_max_qp,
+ UWORD8 u1_min_qp);
+
+/* Updates the frame level statistics after encoding a frame */
+void irc_add_frame_to_rd_model(rc_rd_model_handle ps_rd_model,
+ UWORD32 i4_res_bits,
+ UWORD8 u1_avg_mp2qp,
+ UWORD32 i4_sad_h264,
+ UWORD8 u1_num_skips);
+
+UWORD32 irc_estimate_bits_for_qp(rc_rd_model_handle ps_rd_model,
+ UWORD32 u4_estimated_sad,
+ UWORD8 u1_avg_qp);
+
+/* Get the Linear model coefficient */
+model_coeff irc_get_linear_coefficient(rc_rd_model_handle ps_rd_model);
+
+WORD32 irc_calc_per_frm_bits(rc_rd_model_handle ps_rd_model,
+ UWORD16 *pu2_num_pics_of_a_pic_type,
+ UWORD8 *pu1_update_pic_type_model,
+ UWORD8 u1_num_pic_types,
+ UWORD32 *pu4_num_skip_of_a_pic_type,
+ UWORD8 u1_base_pic_type,
+ float *pfl_gamma,
+ float *pfl_eta,
+ UWORD8 u1_curr_pic_type,
+ UWORD32 u4_bits_for_sub_gop,
+ UWORD32 u4_curr_estimated_sad,
+ UWORD8 *pu1_curr_pic_type_qp);
+#endif
+
diff --git a/encoder/irc_rd_model_struct.h b/encoder/irc_rd_model_struct.h
new file mode 100755
index 0000000..dc4c0ea
--- /dev/null
+++ b/encoder/irc_rd_model_struct.h
@@ -0,0 +1,75 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef RC_RD_MODEL_STRUCT
+#define RC_RD_MODEL_STRUCT
+
+/*Enable or diable QUAD model*/
+#define ENABLE_QUAD_RC_MODEL 0
+#define ENABLE_LIN_MODEL_WITH_INTERCEPT 0
+
+/* Number of elements for QP */
+#define MPEG2_QP_ELEM (MAX_MPEG2_QP + 1)
+
+
+#if ENABLE_QUAD_RC_MODEL
+#define QUAD 1
+#define MIN_FRAMES_FOR_QUAD_MODEL 5
+#endif
+
+#define MAX_ACTIVE_FRAMES 16
+#define MIN_FRAMES_FOR_LIN_MODEL 3
+#define INVALID_FRAME_INDEX 255
+
+#define UP_THR_SM 1 /* (1 /pow(2,4) = 0.0625 */
+#define UP_THR_E 4
+
+#define LO_THR_SM 368 /* (368.64 / pow(2,14)) = 0.0225 */
+#define LO_THR_E 14
+
+#define LIN_DEV_THR_SM 1 /* (1 / pow(1,2)) = .25*/
+#define LIN_DEV_THR_E 2
+
+#define PREV_FRAME_MODEL 2
+
+/* Q Factors used for fixed point calculation */
+#define Q_FORMAT_GAMMA 8
+#define Q_FORMAT_ETA 8
+
+typedef struct rc_rd_model_t
+{
+ UWORD8 u1_curr_frm_counter;
+ UWORD8 u1_num_frms_in_model;
+ UWORD8 u1_max_frms_to_model;
+ UWORD8 u1_model_used;
+
+ UWORD32 pi4_res_bits[MAX_FRAMES_MODELLED];
+ UWORD32 pi4_sad[MAX_FRAMES_MODELLED];
+
+ UWORD8 pu1_num_skips[MAX_FRAMES_MODELLED];
+ UWORD8 pu1_avg_qp[MAX_FRAMES_MODELLED];
+ UWORD8 au1_num_frames[MPEG2_QP_ELEM];
+
+ model_coeff model_coeff_a_lin_wo_int;
+ model_coeff model_coeff_b_lin_wo_int;
+ model_coeff model_coeff_c_lin_wo_int;
+} rc_rd_model_t;
+
+#endif /* RC_RD_MODEL_STRUCT */
diff --git a/encoder/irc_trace_support.h b/encoder/irc_trace_support.h
new file mode 100755
index 0000000..c35bd4f
--- /dev/null
+++ b/encoder/irc_trace_support.h
@@ -0,0 +1,61 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_trace_support.h
+*
+* @brief
+* This file contains extern declarations of routines that could be helpful
+* for debugging purposes.
+*
+* @author
+* Harish
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef TRACE_SUPPORT_H_
+#define TRACE_SUPPORT_H_
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+typedef struct
+{
+ WORD8 * pu1_buf;
+ WORD32 i4_offset;
+ WORD32 i4_max_size;
+}trace_support_t;
+
+/*****************************************************************************/
+/* Extern function declarations */
+/*****************************************************************************/
+
+void init_trace_support(WORD8 *pu1_buf, WORD32 i4_size);
+
+int trace_printf(const WORD8 *format, ...);
+
+#endif // TRACE_SUPPORT_H_
diff --git a/encoder/irc_vbr_storage_vbv.c b/encoder/irc_vbr_storage_vbv.c
new file mode 100755
index 0000000..23e9959
--- /dev/null
+++ b/encoder/irc_vbr_storage_vbv.c
@@ -0,0 +1,368 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_common.h"
+#include "irc_cntrl_param.h"
+#include "irc_mem_req_and_acq.h"
+#include "irc_fixed_point_error_bits.h"
+#include "irc_vbr_storage_vbv.h"
+#include "irc_trace_support.h"
+
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
+
+typedef struct vbr_storage_vbv_t
+{
+ WORD32 i4_max_buf_size;
+ WORD32 i4_cur_buf_size;
+ WORD32 i4_max_bits_inflow_per_frm_period;
+ WORD32 i4_max_bits_per_tgt_frm;
+ /* Storing input variables */
+ WORD32 i4_max_bit_rate;
+ WORD32 i4_max_frame_rate;
+ /* Error bits calculation module */
+ error_bits_handle ps_error_bits;
+
+} vbr_storage_vbv_t;
+
+static void overflow_avoided_summation(WORD32 *pi4_accumulator, WORD32 i4_input)
+{
+ if((pi4_accumulator[0] > 0)
+ && (((int)0x7fffffff - pi4_accumulator[0]) < i4_input))
+ {
+ pi4_accumulator[0] = 0x7fffffff;
+ }
+ else if((pi4_accumulator[0] < 0)
+ && (((int)0x80000000 - pi4_accumulator[0]) > i4_input))
+ {
+ pi4_accumulator[0] = 0x80000000;
+ }
+ else
+ {
+ pi4_accumulator[0] += i4_input;
+ }
+}
+
+WORD32 irc_vbr_vbv_num_fill_use_free_memtab(vbr_storage_vbv_t **pps_vbr_storage_vbv,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static vbr_storage_vbv_t s_vbr_storage_vbv_temp;
+
+ /*
+ * Hack for al alloc, during which we don't have any state memory.
+ * Dereferencing can cause issues
+ */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_vbr_storage_vbv) = &s_vbr_storage_vbv_temp;
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(&ps_memtab[i4_mem_tab_idx], sizeof(vbr_storage_vbv_t),
+ ALIGN_128_BYTE, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void**)pps_vbr_storage_vbv,
+ e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ i4_mem_tab_idx += irc_error_bits_num_fill_use_free_memtab(
+ &pps_vbr_storage_vbv[0]->ps_error_bits,
+ &ps_memtab[i4_mem_tab_idx], e_func_type);
+ return (i4_mem_tab_idx);
+}
+
+void irc_init_vbr_vbv(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+ WORD32 i4_max_bit_rate,
+ WORD32 i4_frm_rate,
+ WORD32 i4_max_vbv_buff_size)
+{
+ ps_vbr_storage_vbv->i4_max_buf_size = i4_max_vbv_buff_size;
+ ps_vbr_storage_vbv->i4_cur_buf_size = i4_max_vbv_buff_size;
+
+ /*
+ * Calculate the max number of bits that flow into the decoder
+ * in the interval of two frames
+ */
+ X_PROD_Y_DIV_Z(i4_max_bit_rate, 1000, i4_frm_rate,
+ ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period);
+
+ /* init error bits */
+ irc_init_error_bits(ps_vbr_storage_vbv->ps_error_bits, i4_frm_rate,
+ i4_max_bit_rate);
+
+ /* Storing the input values */
+ ps_vbr_storage_vbv->i4_max_bits_per_tgt_frm =
+ ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period;
+ ps_vbr_storage_vbv->i4_max_bit_rate = i4_max_bit_rate;
+ ps_vbr_storage_vbv->i4_max_frame_rate = i4_frm_rate;
+}
+
+void irc_update_vbr_vbv(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+ WORD32 i4_total_bits_decoded)
+{
+ WORD32 i4_error_bits = irc_get_error_bits(
+ ps_vbr_storage_vbv->ps_error_bits);
+ /*
+ * In the time interval between two decoded frames the buffer would have been
+ * filled up by the max_bits_inflow_per_frm_period.
+ */
+ overflow_avoided_summation(
+ &ps_vbr_storage_vbv->i4_cur_buf_size,
+ (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period
+ + i4_error_bits));
+
+ if(ps_vbr_storage_vbv->i4_cur_buf_size
+ > ps_vbr_storage_vbv->i4_max_buf_size)
+ {
+ ps_vbr_storage_vbv->i4_cur_buf_size =
+ ps_vbr_storage_vbv->i4_max_buf_size;
+ }
+
+ ps_vbr_storage_vbv->i4_cur_buf_size -= i4_total_bits_decoded;
+
+ /* Update the error bits state */
+ irc_update_error_bits(ps_vbr_storage_vbv->ps_error_bits);
+
+}
+
+WORD32 irc_get_max_target_bits(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+ WORD32 i4_cur_buf_size = ps_vbr_storage_vbv->i4_cur_buf_size;
+ WORD32 i4_error_bits = irc_get_error_bits(
+ ps_vbr_storage_vbv->ps_error_bits);
+
+ /* The buffer size when the next frame is decoded */
+ overflow_avoided_summation(
+ &i4_cur_buf_size,
+ (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period
+ + i4_error_bits));
+ if(i4_cur_buf_size > ps_vbr_storage_vbv->i4_max_buf_size)
+ {
+ i4_cur_buf_size = ps_vbr_storage_vbv->i4_max_buf_size;
+ }
+
+ /*
+ * Thus for the next frame the maximum number of bits the decoder can consume
+ * without underflow is i4_cur_buf_size
+ */
+ return i4_cur_buf_size;
+}
+
+/****************************************************************************
+ Function Name : irc_get_buffer_status
+ Description : Gets the state of VBV buffer
+ Inputs : Rate control API , header and texture bits
+ Outputs : 0 = normal, 1 = underflow, 2= overflow
+ Returns : vbv_buf_status_e
+ *****************************************************************************/
+vbv_buf_status_e irc_get_vbv_buffer_status(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+ WORD32 i4_total_frame_bits,
+ WORD32 *pi4_num_bits_to_prevent_vbv_underflow)
+{
+ vbv_buf_status_e e_buf_status;
+ WORD32 i4_cur_buf;
+ WORD32 i4_error_bits = irc_get_error_bits(
+ ps_vbr_storage_vbv->ps_error_bits);
+
+ /* error bits due to fixed point computation of drain rate*/
+ i4_cur_buf = ps_vbr_storage_vbv->i4_cur_buf_size;
+ overflow_avoided_summation(
+ &i4_cur_buf,
+ (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period
+ + i4_error_bits));
+
+ if(i4_cur_buf > ps_vbr_storage_vbv->i4_max_buf_size)
+ {
+ i4_cur_buf = ps_vbr_storage_vbv->i4_max_buf_size;
+ }
+
+ pi4_num_bits_to_prevent_vbv_underflow[0] = i4_cur_buf;
+
+ i4_cur_buf -= i4_total_frame_bits;
+ if(i4_cur_buf < 0)
+ {
+ e_buf_status = VBV_UNDERFLOW;
+ }
+ else if(i4_cur_buf > ps_vbr_storage_vbv->i4_max_buf_size)
+ {
+ e_buf_status = VBV_OVERFLOW;
+ }
+ else if(i4_cur_buf < (ps_vbr_storage_vbv->i4_max_buf_size >> 2))
+ {
+ e_buf_status = VBR_CAUTION;
+ }
+ else
+ {
+ e_buf_status = VBV_NORMAL;
+ }
+
+ return e_buf_status;
+}
+
+UWORD8 irc_restrict_swing_dvd_comp(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+ UWORD8 u1_restrict_swing = 1;
+
+ if(ps_vbr_storage_vbv->i4_cur_buf_size
+ < (ps_vbr_storage_vbv->i4_max_buf_size >> 1))
+ {
+ u1_restrict_swing = 0;
+ }
+
+ return (u1_restrict_swing);
+}
+
+WORD32 irc_get_max_vbv_buf_size(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+ return (ps_vbr_storage_vbv->i4_max_buf_size);
+}
+
+WORD32 irc_get_cur_vbv_buf_size(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+ return (ps_vbr_storage_vbv->i4_cur_buf_size);
+}
+
+WORD32 irc_get_max_bits_inflow_per_frm_periode(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+ return (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period);
+}
+
+WORD32 irc_get_max_bits_per_tgt_frm(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+ return (ps_vbr_storage_vbv->i4_max_bits_per_tgt_frm);
+}
+
+WORD32 irc_vbv_get_vbv_buf_fullness(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+ UWORD32 u4_bits)
+{
+ WORD32 i4_error_bits = irc_get_error_bits(
+ ps_vbr_storage_vbv->ps_error_bits);
+ WORD32 i4_cur_buf_size = ps_vbr_storage_vbv->i4_cur_buf_size;
+
+ overflow_avoided_summation(
+ &i4_cur_buf_size,
+ (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period
+ + i4_error_bits));
+
+ if(i4_cur_buf_size > ps_vbr_storage_vbv->i4_max_buf_size)
+ {
+ i4_cur_buf_size = ps_vbr_storage_vbv->i4_max_buf_size;
+ }
+
+ i4_cur_buf_size -= u4_bits;
+
+ return (i4_cur_buf_size);
+}
+
+WORD32 irc_get_max_tgt_bits_dvd_comp(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+ WORD32 i4_rem_bits_in_gop,
+ WORD32 i4_rem_frms_in_gop,
+ picture_type_e e_pic_type)
+{
+ WORD32 i4_dbf_max, i4_dbf_min, i4_dbf_prev, i4_vbv_size, i4_dbf_desired;
+ WORD32 i4_max_tgt_bits;
+
+ i4_vbv_size = ps_vbr_storage_vbv->i4_max_buf_size;
+ i4_dbf_max = 95 * i4_vbv_size / 100;
+ i4_dbf_min = 10 * i4_vbv_size / 100;
+ i4_dbf_prev = ps_vbr_storage_vbv->i4_cur_buf_size;
+
+ if(i4_rem_bits_in_gop < 0)
+ i4_rem_bits_in_gop = 0;
+ if(i4_rem_frms_in_gop <= 0)
+ i4_rem_frms_in_gop = 1;
+
+ if(e_pic_type == I_PIC)
+ {
+ i4_dbf_desired = i4_dbf_min;
+ }
+ else
+ {
+ i4_dbf_desired = (i4_dbf_max - i4_rem_bits_in_gop / i4_rem_frms_in_gop
+ - i4_dbf_prev) / i4_rem_frms_in_gop;
+ i4_dbf_desired += i4_dbf_prev;
+ }
+
+ i4_dbf_prev += ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period;
+ if(i4_dbf_prev > ps_vbr_storage_vbv->i4_max_buf_size)
+ {
+ i4_dbf_prev = ps_vbr_storage_vbv->i4_max_buf_size;
+ }
+
+ i4_max_tgt_bits = MAX(0, (i4_dbf_prev - i4_dbf_desired));
+ return (i4_max_tgt_bits);
+}
+
+void irc_change_vbr_vbv_frame_rate(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+ WORD32 i4_frm_rate)
+{
+ /*
+ * Calculate the max number of bits that flow into the decoder
+ * in the interval of two frames
+ */
+ X_PROD_Y_DIV_Z(ps_vbr_storage_vbv->i4_max_bit_rate, 1000, i4_frm_rate,
+ ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period);
+
+ /* Update the lower modules */
+ irc_change_frm_rate_in_error_bits(ps_vbr_storage_vbv->ps_error_bits,
+ i4_frm_rate);
+ /* Storing the input values */
+ ps_vbr_storage_vbv->i4_max_frame_rate = i4_frm_rate;
+}
+
+void irc_change_vbr_vbv_bit_rate(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+ WORD32 i4_max_bit_rate)
+{
+ /*
+ * Calculate the max number of bits that flow into the decoder
+ * in the interval of two frames
+ */
+ X_PROD_Y_DIV_Z(i4_max_bit_rate, 1000, ps_vbr_storage_vbv->i4_max_frame_rate,
+ ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period);
+
+ /* update the lower modules */
+ irc_change_bitrate_in_error_bits(ps_vbr_storage_vbv->ps_error_bits,
+ i4_max_bit_rate);
+
+ /* Storing the input values */
+ ps_vbr_storage_vbv->i4_max_bit_rate = i4_max_bit_rate;
+}
+
+void irc_change_vbr_max_bits_per_tgt_frm(vbr_storage_vbv_t *ps_vbr_storage_vbv,
+ WORD32 i4_tgt_frm_rate)
+{
+ /*
+ * Calculate the max number of bits that flow into the decoder
+ * in the interval of two frames
+ */
+ X_PROD_Y_DIV_Z(ps_vbr_storage_vbv->i4_max_bit_rate, 1000, i4_tgt_frm_rate,
+ ps_vbr_storage_vbv->i4_max_bits_per_tgt_frm);
+
+}
diff --git a/encoder/irc_vbr_storage_vbv.h b/encoder/irc_vbr_storage_vbv.h
new file mode 100755
index 0000000..c53c66d
--- /dev/null
+++ b/encoder/irc_vbr_storage_vbv.h
@@ -0,0 +1,119 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _VBR_STORAGE_VBV_H_
+#define _VBR_STORAGE_VBV_H_
+/******************************************************************************
+VBR STORAGE (VBV):
+Max. buffer filling rate: Rmax
+Max. buffer size: Bmax (as specified by level and profile)
+Current Buffer Level: Bcur
+Frame Rate: F
+
+For a storage scenario, the initial buffer size is assumed to be max. For every
+frame the Maximum bits filled in to the buffer is given by Rmaxfrm = Rmax/F. If
+the buffer overflows then the buffer is thresholded to the max buffer size.
+
+ (overflow)
+ B(0) /|
+---|--------------/-|------------------------------ Bmax
+ | / |
+ | /|/ |
+ | /| / |
+ | / | /|/ |
+ |/ | / | /|
+ |/ |/ |
+ |
+ |
+-----------------------|---------------------------
+ |<->| |
+(1/F)=>1/frame_rate (underflow)
+
+
+ B"(i) - Bits in buffer just before decoding a frame.
+ B'(i) - Bits in buffer just after decoding a frame.
+
+
+ B(0) (initBuffer size) = Bmax.
+ B'(i) = B"(i) - bits_decoded
+ B"(i) = Min( Bmax, B'(i-1) + Rmaxfrm)
+
+Overflow Scenario: In VBR case, since we have only a max filling rate (or input bit rate)
+buffer overflow is not a issue (since the buffer filling rate can be reduced to any value
+below this rate)
+
+Underflow Scenario: B'(i) should always be > 0. If not then, the buffer underflows. To
+prevent this condition the number bits that needs to be decoded must be equal to B"(i)
+which is equal to Min( Bmax, B'(i-1) + Rmaxfrm)
+****************************************************************************************/
+
+typedef struct vbr_storage_vbv_t* vbr_storage_vbv_handle;
+
+WORD32 irc_vbr_vbv_num_fill_use_free_memtab(vbr_storage_vbv_handle *pps_vbr_storage_vbv,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+/* Initalises the vbv buffer status */
+void irc_init_vbr_vbv(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+ WORD32 max_bit_rate, /* In bits/sec*/
+ WORD32 max_frm_rate, /* In frames/1000 sec*/
+ WORD32 i4_max_vbv_buff_size); /* in bits*/
+
+/* Updates the buffer after decoding a frame */
+void irc_update_vbr_vbv(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+ WORD32 i4_total_bits_decoded);
+
+/* gets the max_number of bits that can be decoded out of the VBV without underflow */
+WORD32 irc_get_max_target_bits(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+
+WORD32 irc_get_max_bits_inflow_per_frm_periode(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+
+WORD32 irc_get_max_bits_per_tgt_frm(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+
+WORD32 irc_get_cur_vbv_buf_size(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+
+/* Queries the VBV buffer for the buffer status */
+vbv_buf_status_e irc_get_vbv_buffer_status(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+ WORD32 i4_total_frame_bits,
+ WORD32 *pi4_num_bits_to_prevent_vbv_underflow);
+
+UWORD8 irc_restrict_swing_dvd_comp(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+
+WORD32 irc_get_max_vbv_buf_size(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+
+WORD32 irc_vbv_get_vbv_buf_fullness(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+ UWORD32 u4_bits);
+
+WORD32 irc_get_max_tgt_bits_dvd_comp(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+ WORD32 i4_rem_bits_in_gop,
+ WORD32 i4_rem_frms_in_gop,
+ picture_type_e e_pic_type);
+
+/* Changing input values at run time */
+void irc_change_vbr_vbv_bit_rate(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+ WORD32 i4_max_bit_rate);
+
+void irc_change_vbr_vbv_frame_rate(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+ WORD32 i4_frm_rate);
+
+void irc_change_vbr_max_bits_per_tgt_frm(vbr_storage_vbv_handle ps_vbr_storage_vbv,
+ WORD32 i4_tgt_frm_rate);
+#endif
+
diff --git a/encoder/irc_vbr_str_prms.c b/encoder/irc_vbr_str_prms.c
new file mode 100755
index 0000000..29055c2
--- /dev/null
+++ b/encoder/irc_vbr_str_prms.c
@@ -0,0 +1,199 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "irc_datatypes.h"
+#include "irc_cntrl_param.h"
+#include "irc_vbr_str_prms.h"
+
+/******************************************************************************
+ Function Name : irc_init_vbv_str_prms
+ Description : Initializes and calculates the number of I frame and P frames
+ in the delay period
+ Return Values : void
+ *****************************************************************************/
+void irc_init_vbv_str_prms(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_intra_frm_interval,
+ UWORD32 u4_src_ticks,
+ UWORD32 u4_tgt_ticks,
+ UWORD32 u4_frms_in_delay_period)
+{
+
+ UWORD32 i4_num_i_frms_in_delay_per, i4_num_p_frms_in_delay_per;
+
+ p_vbr_str_prms->u4_frms_in_delay_prd = u4_frms_in_delay_period;
+ p_vbr_str_prms->u4_src_ticks = u4_src_ticks;
+ p_vbr_str_prms->u4_tgt_ticks = u4_tgt_ticks;
+ p_vbr_str_prms->u4_intra_frame_int = u4_intra_frm_interval;
+
+ /*
+ * Finding the number of I frames and P frames in delay period. This
+ * value along with the drain rates for the corresponding picture types will
+ * be used to calculate the buffer sizes
+ */
+ i4_num_i_frms_in_delay_per = ((u4_frms_in_delay_period * u4_src_ticks)
+ / (u4_intra_frm_interval * u4_tgt_ticks));
+
+ /* Ceiling the above result*/
+ if((i4_num_i_frms_in_delay_per * u4_intra_frm_interval * u4_tgt_ticks)
+ < (u4_frms_in_delay_period * u4_src_ticks))
+ {
+ i4_num_i_frms_in_delay_per++;
+
+ }
+ i4_num_p_frms_in_delay_per = u4_frms_in_delay_period
+ - i4_num_i_frms_in_delay_per;
+
+ p_vbr_str_prms->u4_num_pics_in_delay_prd[I_PIC] =
+ i4_num_i_frms_in_delay_per;
+ p_vbr_str_prms->u4_num_pics_in_delay_prd[P_PIC] =
+ i4_num_p_frms_in_delay_per;
+ p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks = (u4_intra_frm_interval
+ * (p_vbr_str_prms->u4_num_pics_in_delay_prd[I_PIC]))
+ * u4_tgt_ticks;
+ p_vbr_str_prms->u4_pic_num = 0;
+ p_vbr_str_prms->u4_cur_pos_in_src_ticks = 0;
+}
+
+WORD32 irc_get_vsp_num_pics_in_dly_prd(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 *pu4_num_pics_in_delay_prd)
+{
+ pu4_num_pics_in_delay_prd[I_PIC] =
+ p_vbr_str_prms->u4_num_pics_in_delay_prd[I_PIC];
+ pu4_num_pics_in_delay_prd[P_PIC] =
+ p_vbr_str_prms->u4_num_pics_in_delay_prd[P_PIC];
+ return (p_vbr_str_prms->u4_frms_in_delay_prd);
+}
+
+/******************************************************************************
+ Function Name : irc_update_vbr_str_prms
+ Description : update the number of I frames and P/B frames in the delay period
+ for buffer size calculations
+ *****************************************************************************/
+void irc_update_vbr_str_prms(vbr_str_prms_t *p_vbr_str_prms,
+ picture_type_e e_pic_type)
+{
+ /*
+ * Updating the number of I frames and P frames after encoding every
+ * picture. These values along with the drain rates for the corresponding
+ * picture types will be used to calculate the CBR buffer size every frame
+ */
+
+ if(e_pic_type == I_PIC)
+ {
+ p_vbr_str_prms->u4_num_pics_in_delay_prd[I_PIC]--;
+ }
+ else
+ {
+ p_vbr_str_prms->u4_num_pics_in_delay_prd[P_PIC]--;
+ }
+
+ /* If the next I frame falls within the delay period, we need to increment
+ * the number of I frames in the period, else increment the number of P
+ * frames
+ */
+ if((p_vbr_str_prms->u4_cur_pos_in_src_ticks
+ + (p_vbr_str_prms->u4_frms_in_delay_prd
+ * p_vbr_str_prms->u4_src_ticks))
+ >= p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks)
+ {
+ p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks -=
+ p_vbr_str_prms->u4_cur_pos_in_src_ticks;
+ p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks +=
+ p_vbr_str_prms->u4_intra_frame_int
+ * p_vbr_str_prms->u4_tgt_ticks;
+ p_vbr_str_prms->u4_num_pics_in_delay_prd[I_PIC]++;
+ p_vbr_str_prms->u4_pic_num = 0;
+ p_vbr_str_prms->u4_cur_pos_in_src_ticks = 0;
+ }
+ else
+ {
+ p_vbr_str_prms->u4_num_pics_in_delay_prd[P_PIC]++;
+ }
+ p_vbr_str_prms->u4_pic_num++;
+ p_vbr_str_prms->u4_cur_pos_in_src_ticks += p_vbr_str_prms->u4_src_ticks;
+}
+
+void irc_get_vsp_src_tgt_ticks(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 *pu4_src_ticks,
+ UWORD32 *pu4_tgt_ticks)
+{
+ pu4_src_ticks[0] = p_vbr_str_prms->u4_src_ticks;
+ pu4_tgt_ticks[0] = p_vbr_str_prms->u4_tgt_ticks;
+}
+
+/*******************************************************************************
+ Function Name : change_vbr_str_prms
+ Description : Takes in changes of Intra frame interval, source and target
+ ticks and recalculates the position of the next I frame
+ ******************************************************************************/
+void irc_change_vsp_ifi(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_intra_frame_int)
+{
+ irc_init_vbv_str_prms(p_vbr_str_prms, u4_intra_frame_int,
+ p_vbr_str_prms->u4_src_ticks,
+ p_vbr_str_prms->u4_tgt_ticks,
+ p_vbr_str_prms->u4_frms_in_delay_prd);
+}
+
+void irc_change_vsp_tgt_ticks(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_tgt_ticks)
+{
+ UWORD32 u4_rem_intra_per_scaled;
+ UWORD32 u4_prev_tgt_ticks = p_vbr_str_prms->u4_tgt_ticks;
+
+ /*
+ * If the target frame rate is changed, recalculate the position of the next
+ * I frame based on the new target frame rate
+ * LIMITATIONS :
+ * Currently no support is available for dynamic change in source frame rate
+ */
+
+ u4_rem_intra_per_scaled = ((p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks
+ - p_vbr_str_prms->u4_cur_pos_in_src_ticks)
+ / u4_prev_tgt_ticks) * u4_tgt_ticks;
+
+ p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks = u4_rem_intra_per_scaled
+ + p_vbr_str_prms->u4_cur_pos_in_src_ticks;
+
+}
+
+void irc_change_vsp_src_ticks(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_src_ticks)
+{
+ irc_init_vbv_str_prms(p_vbr_str_prms, p_vbr_str_prms->u4_intra_frame_int,
+ u4_src_ticks, p_vbr_str_prms->u4_tgt_ticks,
+ p_vbr_str_prms->u4_frms_in_delay_prd);
+}
+
+void irc_change_vsp_fidp(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_frms_in_delay_period)
+{
+ irc_init_vbv_str_prms(p_vbr_str_prms, p_vbr_str_prms->u4_intra_frame_int,
+ p_vbr_str_prms->u4_src_ticks,
+ p_vbr_str_prms->u4_tgt_ticks,
+ u4_frms_in_delay_period);
+}
diff --git a/encoder/irc_vbr_str_prms.h b/encoder/irc_vbr_str_prms.h
new file mode 100755
index 0000000..34301d8
--- /dev/null
+++ b/encoder/irc_vbr_str_prms.h
@@ -0,0 +1,65 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef _VBR_STR_PRMS_H_
+#define _VBR_STR_PRMS_H_
+
+typedef struct
+{
+ UWORD32 u4_num_pics_in_delay_prd[MAX_PIC_TYPE];
+ UWORD32 u4_pic_num;
+ UWORD32 u4_intra_prd_pos_in_tgt_ticks;
+ UWORD32 u4_cur_pos_in_src_ticks;
+ UWORD32 u4_intra_frame_int;
+ UWORD32 u4_src_ticks;
+ UWORD32 u4_tgt_ticks;
+ UWORD32 u4_frms_in_delay_prd;
+} vbr_str_prms_t;
+
+void irc_init_vbv_str_prms(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_intra_frm_interval,
+ UWORD32 u4_src_ticks,
+ UWORD32 u4_tgt_ticks,
+ UWORD32 u4_frms_in_delay_period);
+
+WORD32 irc_get_vsp_num_pics_in_dly_prd(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 *pu4_num_pics_in_delay_prd);
+
+void irc_get_vsp_src_tgt_ticks(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 *pu4_src_ticks,
+ UWORD32 *pu4_tgt_ticks);
+
+void irc_update_vbr_str_prms(vbr_str_prms_t *p_vbr_str_prms,
+ picture_type_e e_pic_type);
+
+void irc_change_vsp_ifi(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_intra_frame_int);
+
+void irc_change_vsp_tgt_ticks(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_tgt_ticks);
+
+void irc_change_vsp_src_ticks(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_src_ticks);
+
+void irc_change_vsp_fidp(vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_frms_in_delay_period);
+
+#endif
+
diff --git a/encoder/ithread.h b/encoder/ithread.h
new file mode 100755
index 0000000..82170a5
--- /dev/null
+++ b/encoder/ithread.h
@@ -0,0 +1,101 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* */
+/* File Name : ithread.h */
+/* */
+/* Description : This file contains all the necessary structure and */
+/* enumeration definitions needed for the Application */
+/* Program Interface(API) of the */
+/* Thread Abstraction Layer */
+/* */
+/* List of Functions : ithread_get_handle_size() */
+/* ithread_get_mutex_lock_size() */
+/* ithread_create() */
+/* ithread_exit() */
+/* ithread_join() */
+/* ithread_get_mutex_struct_size() */
+/* ithread_mutex_init() */
+/* ithread_mutex_destroy() */
+/* ithread_mutex_lock() */
+/* ithread_mutex_unlock() */
+/* ithread_yield() */
+/* ithread_sleep() */
+/* ithread_msleep() */
+/* ithread_usleep() */
+/* ithread_get_sem_struct_size() */
+/* ithread_sem_init() */
+/* ithread_sem_post() */
+/* ithread_sem_wait() */
+/* ithread_sem_destroy() */
+/* ithread_set_affinity() */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 06 09 2012 Harish Initial Version */
+/* */
+/*****************************************************************************/
+
+#ifndef _ITHREAD_H_
+#define _ITHREAD_H_
+
+UWORD32 ithread_get_handle_size(void);
+
+UWORD32 ithread_get_mutex_lock_size(void);
+
+WORD32 ithread_create(void *thread_handle, void *attribute, void *strt, void *argument);
+
+void ithread_exit(void *val_ptr);
+
+WORD32 ithread_join(void *thread_id, void ** val_ptr);
+
+WORD32 ithread_get_mutex_struct_size(void);
+
+WORD32 ithread_mutex_init(void *mutex);
+
+WORD32 ithread_mutex_destroy(void *mutex);
+
+WORD32 ithread_mutex_lock(void *mutex);
+
+WORD32 ithread_mutex_unlock(void *mutex);
+
+void ithread_yield(void);
+
+void ithread_sleep(UWORD32 u4_time);
+
+void ithread_msleep(UWORD32 u4_time_ms);
+
+void ithread_usleep(UWORD32 u4_time_us);
+
+UWORD32 ithread_get_sem_struct_size(void);
+
+WORD32 ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value);
+
+WORD32 ithread_sem_post(void *sem);
+
+WORD32 ithread_sem_wait(void *sem);
+
+WORD32 ithread_sem_destroy(void *sem);
+
+WORD32 ithread_set_affinity(WORD32 core_id);
+#endif /* _ITHREAD_H_ */
diff --git a/encoder/iv2.h b/encoder/iv2.h
new file mode 100755
index 0000000..538bb1e
--- /dev/null
+++ b/encoder/iv2.h
@@ -0,0 +1,386 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* iv2.h
+*
+* @brief
+* This file contains all the necessary structure and enumeration
+* definitions needed for the Application Program Interface(API) of the
+* Ittiam Video codecs This is version 2 of Ittiam Video API
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef _IV2_H_
+#define _IV2_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define IV_MAX_RAW_COMPONENTS 4
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+
+/** Function status */
+typedef enum{
+ IV_STATUS_NA = 0x7FFFFFFF,
+ IV_SUCCESS = 0x0,
+ IV_FAIL = 0x1,
+}IV_STATUS_T;
+
+
+/** Defines the types of memory */
+typedef enum {
+ IV_NA_MEM_TYPE = 0x7FFFFFFF,
+ IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM = 0x0,
+ IV_EXTERNAL_CACHEABLE_SCRATCH_MEM = 0x1,
+ IV_EXTERNAL_NONCACHEABLE_PERSISTENT_MEM = 0x2,
+ IV_EXTERNAL_NONCACHEABLE_SCRATCH_MEM = 0x3,
+ IV_INTERNAL_CACHEABLE_PERSISTENT_MEM = 0x10,
+ IV_INTERNAL_CACHEABLE_SCRATCH_MEM = 0x11,
+ IV_INTERNAL_NONCACHEABLE_PERSISTENT_MEM = 0x12,
+ IV_INTERNAL_NONCACHEABLE_SCRATCH_MEM = 0x13,
+}IV_MEM_TYPE_T;
+
+/* The color formats used in video/image codecs */
+
+typedef enum {
+ IV_CHROMA_NA = 0x7FFFFFFF,
+ IV_YUV_420P = 0x0,
+ IV_YUV_420SP_UV = 0x1,
+ IV_YUV_420SP_VU = 0x2,
+
+ IV_YUV_422P = 0x10,
+ IV_YUV_422IBE = 0x11,
+ IV_YUV_422ILE = 0x12,
+
+ IV_YUV_444P = 0x20,
+ IV_YUV_411P = 0x21,
+
+ IV_GRAY = 0x30,
+
+ IV_RGB_565 = 0x31,
+ IV_RGB_24 = 0x32,
+ IV_RGBA_8888 = 0x33
+}IV_COLOR_FORMAT_T;
+
+/** Frame/Field coding types */
+typedef enum {
+ IV_NA_FRAME = 0x7FFFFFFF,
+ IV_I_FRAME = 0x0,
+ IV_P_FRAME = 0x1,
+ IV_B_FRAME = 0x2,
+ IV_IDR_FRAME = 0x3,
+ IV_II_FRAME = 0x4,
+ IV_IP_FRAME = 0x5,
+ IV_IB_FRAME = 0x6,
+ IV_PI_FRAME = 0x7,
+ IV_PP_FRAME = 0x8,
+ IV_PB_FRAME = 0x9,
+ IV_BI_FRAME = 0xa,
+ IV_BP_FRAME = 0xb,
+ IV_BB_FRAME = 0xc,
+ IV_MBAFF_I_FRAME = 0xd,
+ IV_MBAFF_P_FRAME = 0xe,
+ IV_MBAFF_B_FRAME = 0xf,
+ IV_MBAFF_IDR_FRAME = 0x10,
+ IV_NOT_CODED_FRAME = 0x11,
+ IV_FRAMETYPE_DEFAULT = IV_I_FRAME
+}IV_PICTURE_CODING_TYPE_T;
+
+/** Field type */
+typedef enum {
+ IV_NA_FLD = 0x7FFFFFFF,
+ IV_TOP_FLD = 0x0,
+ IV_BOT_FLD = 0x1,
+ IV_FLD_TYPE_DEFAULT = IV_TOP_FLD
+}IV_FLD_TYPE_T;
+
+/** Video content type progressive/interlaced etc */
+typedef enum {
+ IV_CONTENTTYPE_NA = 0x7FFFFFFF,
+ IV_PROGRESSIVE = 0x0,
+ IV_INTERLACED = 0x1,
+ IV_PROGRESSIVE_FRAME = 0x2,
+ IV_INTERLACED_FRAME = 0x3,
+ IV_INTERLACED_TOPFIELD = 0x4,
+ IV_INTERLACED_BOTTOMFIELD = 0x5,
+ IV_CONTENTTYPE_DEFAULT = IV_PROGRESSIVE,
+}IV_CONTENT_TYPE_T;
+
+/** Profile */
+typedef enum
+{
+ IV_PROFILE_NA = 0x7FFFFFFF,
+ IV_PROFILE_BASE = 0x0,
+ IV_PROFILE_MAIN = 0x1,
+ IV_PROFILE_HIGH = 0x2,
+
+
+ IV_PROFILE_SIMPLE = 0x100,
+ IV_PROFILE_ADVSIMPLE = 0x101,
+ IV_PROFILE_DEFAULT = IV_PROFILE_BASE,
+}IV_PROFILE_T;
+
+
+/** Architecture Enumeration */
+typedef enum
+{
+ ARCH_NA = 0x7FFFFFFF,
+ ARCH_ARM_NONEON = 0x0,
+ ARCH_ARM_A9Q,
+ ARCH_ARM_A9A,
+ ARCH_ARM_A9,
+ ARCH_ARM_A7,
+ ARCH_ARM_A5,
+ ARCH_ARM_A15,
+ ARCH_ARM_NEONINTR,
+ ARCH_X86_GENERIC,
+ ARCH_X86_SSSE3,
+ ARCH_X86_SSE42,
+ ARCH_ARM_A53,
+ ARCH_ARM_A57,
+ ARCH_ARM_V8_NEON
+}IV_ARCH_T;
+
+/** SOC Enumeration */
+typedef enum
+{
+ SOC_NA = 0x7FFFFFFF,
+ SOC_GENERIC = 0x0,
+ SOC_HISI_37X
+}IV_SOC_T;
+
+
+/** API command type */
+typedef enum {
+ IV_CMD_NA = 0x7FFFFFFF,
+ IV_CMD_GET_NUM_MEM_REC = 0x0,
+ IV_CMD_FILL_NUM_MEM_REC = 0x1,
+ IV_CMD_RETRIEVE_MEMREC = 0x2,
+ IV_CMD_INIT = 0x3,
+ /* Do not add anything after the following entry */
+ IV_CMD_EXTENSIONS = 0x100
+}IV_API_COMMAND_TYPE_T;
+
+/*****************************************************************************/
+/* Structure Definitions */
+/*****************************************************************************/
+
+/** This structure defines the handle for the codec instance */
+
+typedef struct{
+ /** size of the structure */
+ UWORD32 u4_size;
+ /** Pointer to the API function pointer table of the codec */
+ void *pv_fxns;
+ /** Pointer to the handle of the codec */
+ void *pv_codec_handle;
+}iv_obj_t;
+
+/** This structure defines the memory record holder which will *
+ * be used by the codec to communicate its memory requirements to the *
+ * application through appropriate API functions */
+
+typedef struct {
+ /** size of the structure */
+ UWORD32 u4_size;
+ /** Pointer to the memory allocated by the application */
+ void *pv_base;
+ /** u4_size of the memory to be allocated */
+ UWORD32 u4_mem_size;
+ /** Alignment of the memory pointer */
+ UWORD32 u4_mem_alignment;
+ /** Type of the memory to be allocated */
+ IV_MEM_TYPE_T e_mem_type;
+}iv_mem_rec_t;
+
+/** This structure defines attributes for the raw buffer */
+typedef struct {
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Color format */
+ IV_COLOR_FORMAT_T e_color_fmt;
+
+ /** Pointer to each component */
+ void *apv_bufs[IV_MAX_RAW_COMPONENTS];
+
+ /** Width of each component */
+ UWORD32 au4_wd[IV_MAX_RAW_COMPONENTS];
+
+ /** Height of each component */
+ UWORD32 au4_ht[IV_MAX_RAW_COMPONENTS];
+
+ /** Stride of each component */
+ UWORD32 au4_strd[IV_MAX_RAW_COMPONENTS];
+
+}iv_raw_buf_t;
+
+/** This structure defines attributes for the bitstream buffer */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Pointer to buffer */
+ void *pv_buf;
+
+ /** Number of valid bytes in the buffer */
+ UWORD32 u4_bytes;
+
+ /** Allocated size of the buffer */
+ UWORD32 u4_bufsize;
+
+}iv_bits_buf_t;
+/*****************************************************************************/
+/* Get Number of Memory Records */
+/*****************************************************************************/
+
+/** Input structure : Get number of memory records */
+typedef struct {
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type */
+ IV_API_COMMAND_TYPE_T e_cmd;
+}iv_num_mem_rec_ip_t;
+
+/** Output structure : Get number of memory records */
+typedef struct{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+
+ /** Number of memory records that will be used by the codec */
+ UWORD32 u4_num_mem_rec;
+}iv_num_mem_rec_op_t;
+
+
+/*****************************************************************************/
+/* Fill Memory Records */
+/*****************************************************************************/
+
+/** Input structure : Fill memory records */
+
+typedef struct {
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type */
+ IV_API_COMMAND_TYPE_T e_cmd;
+
+ /** Number of memory records */
+ UWORD32 u4_num_mem_rec;
+
+ /** pointer to array of memrecords structures should be filled by codec
+ with details of memory resource requirements */
+ iv_mem_rec_t *ps_mem_rec;
+
+ /** maximum width for which codec should request memory requirements */
+ UWORD32 u4_max_wd;
+
+ /** maximum height for which codec should request memory requirements*/
+ UWORD32 u4_max_ht;
+
+ /** Maximum number of reference frames */
+ UWORD32 u4_max_ref_cnt;
+
+ /** Maximum number of reorder frames */
+ UWORD32 u4_max_reorder_cnt;
+
+ /** Maximum level supported */
+ UWORD32 u4_max_level;
+
+ /** Color format that codec supports for input/output */
+ IV_COLOR_FORMAT_T e_color_format;
+
+ /** Maximum search range to be used in X direction */
+ UWORD32 u4_max_srch_rng_x;
+
+ /** Maximum search range to be used in Y direction */
+ UWORD32 u4_max_srch_rng_y;
+
+}iv_fill_mem_rec_ip_t;
+
+
+/** Output structure : Fill memory records */
+typedef struct{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+
+ /** no of memory record structures which are filled by codec */
+ UWORD32 u4_num_mem_rec;
+}iv_fill_mem_rec_op_t;
+
+
+/*****************************************************************************/
+/* Retrieve Memory Records */
+/*****************************************************************************/
+
+/** Input structure : Retrieve memory records */
+
+typedef struct {
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type */
+ IV_API_COMMAND_TYPE_T e_cmd;
+
+ /** array of structures where codec should fill with all memory requested earlier */
+ iv_mem_rec_t *ps_mem_rec;
+}iv_retrieve_mem_rec_ip_t;
+
+
+typedef struct{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+
+ /** no of memory record structures which are filled by codec */
+ UWORD32 u4_num_mem_rec_filled;
+}iv_retrieve_mem_rec_op_t;
+
+#endif /* _IV2_H_ */
+
diff --git a/encoder/ive2.h b/encoder/ive2.h
new file mode 100755
index 0000000..8cb0fd1
--- /dev/null
+++ b/encoder/ive2.h
@@ -0,0 +1,1445 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ive2.h
+*
+* @brief
+* This file contains all the necessary structure and enumeration
+* definitions needed for the Application Program Interface(API) of the
+* Ittiam Video Encoders This is version 2
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef _IVE2_H_
+#define _IVE2_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/** Maximum number of components in I/O Buffers */
+#define IVE_MAX_IO_BUFFER_COMPONENTS 4
+
+/** Maximum number of reference pictures */
+#define IVE_MAX_REF 16
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/** Slice modes */
+typedef enum
+{
+ IVE_SLICE_MODE_NA = 0x7FFFFFFF,
+ IVE_SLICE_MODE_NONE = 0x0,
+
+ IVE_SLICE_MODE_BYTES = 0x1,
+ IVE_SLICE_MODE_BLOCKS = 0x2,
+}IVE_SLICE_MODE_T;
+
+/** Adaptive Intra refresh modes */
+typedef enum
+{
+ IVE_AIR_MODE_NA = 0x7FFFFFFF,
+ IVE_AIR_MODE_NONE = 0x0,
+ IVE_AIR_MODE_CYCLIC = 0x1,
+ IVE_AIR_MODE_RANDOM = 0x2,
+ IVE_AIR_MODE_DISTORTION = 0x3,
+}IVE_AIR_MODE_T;
+
+/** Rate control modes */
+typedef enum
+{
+ IVE_RC_NA = 0x7FFFFFFF,
+ IVE_RC_NONE = 0x0,
+ IVE_RC_STORAGE = 0x1,
+ IVE_RC_CBR_NON_LOW_DELAY = 0x2,
+ IVE_RC_CBR_LOW_DELAY = 0x3,
+ IVE_RC_TWOPASS = 0x4,
+ IVE_RC_RATECONTROLPRESET_DEFAULT = IVE_RC_STORAGE
+}IVE_RC_MODE_T;
+
+/** Encoder mode */
+typedef enum
+{
+ IVE_ENC_MODE_NA = 0x7FFFFFFF,
+ IVE_ENC_MODE_HEADER = 0x1,
+ IVE_ENC_MODE_PICTURE = 0x0,
+ IVE_ENC_MODE_DEFAULT = IVE_ENC_MODE_PICTURE,
+}IVE_ENC_MODE_T;
+
+/** Speed Config */
+typedef enum IVE_SPEED_CONFIG
+{
+ IVE_QUALITY_DUMMY = 0x7FFFFFFF,
+ IVE_CONFIG = 0,
+ IVE_SLOWEST = 1,
+ IVE_NORMAL = 2,
+ IVE_FAST = 3,
+ IVE_HIGH_SPEED = 4,
+ IVE_FASTEST = 5,
+}IVE_SPEED_CONFIG;
+
+/** API command type */
+typedef enum
+{
+ IVE_CMD_VIDEO_NA = 0x7FFFFFFF,
+ IVE_CMD_VIDEO_CTL = IV_CMD_EXTENSIONS + 1,
+ IVE_CMD_VIDEO_ENCODE,
+ IVE_CMD_QUEUE_INPUT,
+ IVE_CMD_DEQUEUE_INPUT,
+ IVE_CMD_QUEUE_OUTPUT,
+ IVE_CMD_DEQUEUE_OUTPUT,
+ IVE_CMD_GET_RECON,
+}IVE_API_COMMAND_TYPE_T;
+
+/** Video Control API command type */
+typedef enum
+{
+ IVE_CMD_CT_NA = 0x7FFFFFFF,
+ IVE_CMD_CTL_SETDEFAULT = 0x0,
+ IVE_CMD_CTL_SET_DIMENSIONS = 0x1,
+ IVE_CMD_CTL_SET_FRAMERATE = 0x2,
+ IVE_CMD_CTL_SET_BITRATE = 0x3,
+ IVE_CMD_CTL_SET_FRAMETYPE = 0x4,
+ IVE_CMD_CTL_SET_QP = 0x5,
+ IVE_CMD_CTL_SET_ENC_MODE = 0x6,
+ IVE_CMD_CTL_SET_VBV_PARAMS = 0x7,
+ IVE_CMD_CTL_SET_AIR_PARAMS = 0x8,
+ IVE_CMD_CTL_SET_ME_PARAMS = 0X9,
+ IVE_CMD_CTL_SET_GOP_PARAMS = 0XA,
+ IVE_CMD_CTL_SET_PROFILE_PARAMS = 0XB,
+ IVE_CMD_CTL_SET_DEBLOCK_PARAMS = 0XC,
+ IVE_CMD_CTL_SET_IPE_PARAMS = 0XD,
+ IVE_CMD_CTL_SET_NUM_CORES = 0x30,
+ IVE_CMD_CTL_RESET = 0xA0,
+ IVE_CMD_CTL_FLUSH = 0xB0,
+ IVE_CMD_CTL_GETBUFINFO = 0xC0,
+ IVE_CMD_CTL_GETVERSION = 0xC1,
+ IVE_CMD_CTL_CODEC_SUBCMD_START = 0x100,
+}IVE_CONTROL_API_COMMAND_TYPE_T;
+
+/* IVE_ERROR_BITS_T: A UWORD32 container will be used for reporting the error*/
+/* code to the application. The first 8 bits starting from LSB have been */
+/* reserved for the codec to report internal error details. The rest of the */
+/* bits will be generic for all video encoders and each bit has an associated*/
+/* meaning as mentioned below. The unused bit fields are reserved for future */
+/* extenstions and will be zero in the current implementation */
+typedef enum {
+
+ /* Bit 8 - Unsupported input parameter or configuration. */
+ IVE_UNSUPPORTEDPARAM = 0x8,
+
+ /* Bit 9 - Fatal error (stop the codec).If there is an */
+ /* error and this bit is not set, the error is a recoverable one. */
+ IVE_FATALERROR = 0x9,
+
+ IVE_ERROR_BITS_T_DUMMY_ELEMENT = 0x7FFFFFFF
+}IVE_ERROR_BITS_T;
+
+/* IVE_ERROR_CODES_T: The list of error codes depicting the possible error */
+/* scenarios that can be encountered while encoding */
+typedef enum
+{
+
+ IVE_ERR_NA = 0x7FFFFFFF,
+ IVE_ERR_NONE = 0x00,
+ IVE_ERR_INVALID_API_CMD = 0x01,
+ IVE_ERR_INVALID_API_SUB_CMD = 0x02,
+ IVE_ERR_IP_GET_MEM_REC_API_STRUCT_SIZE_INCORRECT = 0x03,
+ IVE_ERR_OP_GET_MEM_REC_API_STRUCT_SIZE_INCORRECT = 0x04,
+ IVE_ERR_IP_FILL_MEM_REC_API_STRUCT_SIZE_INCORRECT = 0x05,
+ IVE_ERR_OP_FILL_MEM_REC_API_STRUCT_SIZE_INCORRECT = 0x06,
+ IVE_ERR_IP_INIT_API_STRUCT_SIZE_INCORRECT = 0x07,
+ IVE_ERR_OP_INIT_API_STRUCT_SIZE_INCORRECT = 0x08,
+ IVE_ERR_IP_RETRIEVE_MEM_REC_API_STRUCT_SIZE_INCORRECT = 0x09,
+ IVE_ERR_OP_RETRIEVE_MEM_REC_API_STRUCT_SIZE_INCORRECT = 0x0A,
+ IVE_ERR_IP_ENCODE_API_STRUCT_SIZE_INCORRECT = 0x0B,
+ IVE_ERR_OP_ENCODE_API_STRUCT_SIZE_INCORRECT = 0x0C,
+ IVE_ERR_IP_CTL_SETDEF_API_STRUCT_SIZE_INCORRECT = 0x0D,
+ IVE_ERR_OP_CTL_SETDEF_API_STRUCT_SIZE_INCORRECT = 0x0E,
+ IVE_ERR_IP_CTL_GETBUFINFO_API_STRUCT_SIZE_INCORRECT = 0x0F,
+ IVE_ERR_OP_CTL_GETBUFINFO_API_STRUCT_SIZE_INCORRECT = 0x10,
+ IVE_ERR_IP_CTL_GETVERSION_API_STRUCT_SIZE_INCORRECT = 0x11,
+ IVE_ERR_OP_CTL_GETVERSION_API_STRUCT_SIZE_INCORRECT = 0x12,
+ IVE_ERR_IP_CTL_FLUSH_API_STRUCT_SIZE_INCORRECT = 0x13,
+ IVE_ERR_OP_CTL_FLUSH_API_STRUCT_SIZE_INCORRECT = 0x14,
+ IVE_ERR_IP_CTL_RESET_API_STRUCT_SIZE_INCORRECT = 0x15,
+ IVE_ERR_OP_CTL_RESET_API_STRUCT_SIZE_INCORRECT = 0x16,
+ IVE_ERR_IP_CTL_SETCORES_API_STRUCT_SIZE_INCORRECT = 0x17,
+ IVE_ERR_OP_CTL_SETCORES_API_STRUCT_SIZE_INCORRECT = 0x18,
+ IVE_ERR_IP_CTL_SETDIM_API_STRUCT_SIZE_INCORRECT = 0x19,
+ IVE_ERR_OP_CTL_SETDIM_API_STRUCT_SIZE_INCORRECT = 0x1A,
+ IVE_ERR_IP_CTL_SETFRAMERATE_API_STRUCT_SIZE_INCORRECT = 0x1B,
+ IVE_ERR_OP_CTL_SETFRAMERATE_API_STRUCT_SIZE_INCORRECT = 0x1C,
+ IVE_ERR_IP_CTL_SETBITRATE_API_STRUCT_SIZE_INCORRECT = 0x1D,
+ IVE_ERR_OP_CTL_SETBITRATE_API_STRUCT_SIZE_INCORRECT = 0x1E,
+ IVE_ERR_IP_CTL_SETFRAMETYPE_API_STRUCT_SIZE_INCORRECT = 0x1F,
+ IVE_ERR_OP_CTL_SETFRAMETYPE_API_STRUCT_SIZE_INCORRECT = 0x20,
+ IVE_ERR_IP_CTL_SETMEPARAMS_API_STRUCT_SIZE_INCORRECT = 0x21,
+ IVE_ERR_OP_CTL_SETMEPARAMS_API_STRUCT_SIZE_INCORRECT = 0x22,
+ IVE_ERR_IP_CTL_SETIPEPARAMS_API_STRUCT_SIZE_INCORRECT = 0x23,
+ IVE_ERR_OP_CTL_SETIPEPARAMS_API_STRUCT_SIZE_INCORRECT = 0x24,
+ IVE_ERR_IP_CTL_SETGOPPARAMS_API_STRUCT_SIZE_INCORRECT = 0x25,
+ IVE_ERR_OP_CTL_SETGOPPARAMS_API_STRUCT_SIZE_INCORRECT = 0x26,
+ IVE_ERR_IP_CTL_SETDEBLKPARAMS_API_STRUCT_SIZE_INCORRECT = 0x27,
+ IVE_ERR_OP_CTL_SETDEBLKPARAMS_API_STRUCT_SIZE_INCORRECT = 0x28,
+ IVE_ERR_IP_CTL_SETQPPARAMS_API_STRUCT_SIZE_INCORRECT = 0x29,
+ IVE_ERR_OP_CTL_SETQPPARAMS_API_STRUCT_SIZE_INCORRECT = 0x2A,
+ IVE_ERR_FILL_NUM_MEM_RECS_POINTER_NULL = 0x2B,
+ IVE_ERR_NUM_MEM_REC_NOT_SUFFICIENT = 0x2C,
+ IVE_ERR_MEM_REC_STRUCT_SIZE_INCORRECT = 0x2D,
+ IVE_ERR_MEM_REC_BASE_POINTER_NULL = 0x2E,
+ IVE_ERR_MEM_REC_OVERLAP_ERR = 0x2F,
+ IVE_ERR_MEM_REC_INSUFFICIENT_SIZE = 0x30,
+ IVE_ERR_MEM_REC_ALIGNMENT_ERR = 0x31,
+ IVE_ERR_MEM_REC_INCORRECT_TYPE = 0x32,
+ IVE_ERR_HANDLE_NULL = 0x33,
+ IVE_ERR_HANDLE_STRUCT_SIZE_INCORRECT = 0x34,
+ IVE_ERR_API_FUNCTION_PTR_NULL = 0x35,
+ IVE_ERR_INVALID_CODEC_HANDLE = 0x36,
+ IVE_ERR_CTL_GET_VERSION_BUFFER_IS_NULL = 0x37,
+ IVE_ERR_IP_CTL_SETAIRPARAMS_API_STRUCT_SIZE_INCORRECT = 0x38,
+ IVE_ERR_OP_CTL_SETAIRPARAMS_API_STRUCT_SIZE_INCORRECT = 0x39,
+ IVE_ERR_IP_CTL_SETENCMODE_API_STRUCT_SIZE_INCORRECT = 0x3A,
+ IVE_ERR_OP_CTL_SETENCMODE_API_STRUCT_SIZE_INCORRECT = 0x3B,
+ IVE_ERR_IP_CTL_SETVBVPARAMS_API_STRUCT_SIZE_INCORRECT = 0x3C,
+ IVE_ERR_OP_CTL_SETVBVPARAMS_API_STRUCT_SIZE_INCORRECT = 0x3D,
+ IVE_ERR_IP_CTL_SETPROFILE_API_STRUCT_SIZE_INCORRECT = 0x3E,
+ IVE_ERR_OP_CTL_SETPROFILE_API_STRUCT_SIZE_INCORRECT = 0x3F,
+
+}IVE_ERROR_CODES_T;
+
+
+/*****************************************************************************/
+/* Initialize encoder */
+/*****************************************************************************/
+
+/** Input structure : Initialize the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type */
+ IV_API_COMMAND_TYPE_T e_cmd;
+
+ /** Number of memory records */
+ UWORD32 u4_num_mem_rec;
+
+ /** pointer to array of memrecords structures should be filled by codec
+ with details of memory resource requirements */
+ iv_mem_rec_t *ps_mem_rec;
+
+ /** maximum width for which codec should request memory requirements */
+ UWORD32 u4_max_wd;
+
+ /** maximum height for which codec should request memory requirements */
+ UWORD32 u4_max_ht;
+
+ /** Maximum number of reference frames */
+ UWORD32 u4_max_ref_cnt;
+
+ /** Maximum number of reorder frames */
+ UWORD32 u4_max_reorder_cnt;
+
+ /** Maximum level supported */
+ UWORD32 u4_max_level;
+
+ /** Input color format */
+ IV_COLOR_FORMAT_T e_inp_color_fmt;
+
+ /** Flag to enable/disable - To be used only for debugging/testing */
+ UWORD32 u4_enable_recon;
+
+ /** Recon color format */
+ IV_COLOR_FORMAT_T e_recon_color_fmt;
+
+ /** Rate control mode */
+ IVE_RC_MODE_T e_rc_mode;
+
+ /** Maximum frame rate to be supported */
+ UWORD32 u4_max_framerate;
+
+ /** Maximum bitrate to be supported */
+ UWORD32 u4_max_bitrate;
+
+ /** Maximum number of consecutive B frames */
+ UWORD32 u4_max_num_bframes;
+
+ /** Content type Interlaced/Progressive */
+ IV_CONTENT_TYPE_T e_content_type;
+
+ /** Maximum search range to be used in X direction */
+ UWORD32 u4_max_srch_rng_x;
+
+ /** Maximum search range to be used in Y direction */
+ UWORD32 u4_max_srch_rng_y;
+
+ /** Slice Mode */
+ IVE_SLICE_MODE_T e_slice_mode;
+
+ /** Slice parameter */
+ UWORD32 u4_slice_param;
+
+ /** Processor architecture */
+ IV_ARCH_T e_arch;
+
+ /** SOC details */
+ IV_SOC_T e_soc;
+
+
+}ive_init_ip_t;
+
+/** Output structure : Initialize the encoder */
+typedef struct
+{
+ /** Size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_init_op_t;
+
+
+/*****************************************************************************/
+/* Video Encode - Deprecated */
+/*****************************************************************************/
+
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Descriptor for input raw buffer */
+ iv_raw_buf_t s_inp_buf;
+
+ /** Buffer containing pic info if mb_info_type is non-zero */
+ void *pv_bufs;
+
+ /** Flag to indicate if mb info is sent along with input buffer */
+ UWORD32 u4_mb_info_type;
+
+ /** Buffer containing mb info if mb_info_type is non-zero */
+ void *pv_mb_info;
+
+ /** Flag to indicate if pic info is sent along with input buffer */
+ UWORD32 u4_pic_info_type;
+
+ /** Buffer containing pic info if mb_info_type is non-zero */
+ void *pv_pic_info;
+
+ /** Lower 32bits of input time stamp */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of input time stamp */
+ UWORD32 u4_timestamp_high;
+
+ /** Flag to indicate if this is the last input in the stream */
+ UWORD32 u4_is_last;
+
+ /** Descriptor for output bit-stream buffer */
+ iv_bits_buf_t s_out_buf;
+
+ /** Descriptor for recon buffer */
+ iv_raw_buf_t s_recon_buf;
+
+}ive_video_encode_ip_t;
+
+
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** error code */
+ UWORD32 u4_error_code;
+
+ /* Output present */
+ WORD32 output_present;
+
+ /* dump recon */
+ WORD32 dump_recon;
+
+ /* encoded frame type */
+ UWORD32 u4_encoded_frame_type;
+
+ /** Descriptor for input raw buffer freed from codec */
+ iv_raw_buf_t s_inp_buf;
+
+ /** Descriptor for output bit-stream buffer */
+ iv_bits_buf_t s_out_buf;
+
+ /** Descriptor for recon buffer */
+ iv_raw_buf_t s_recon_buf;
+
+}ive_video_encode_op_t;
+
+/*****************************************************************************/
+/* Queue Input raw buffer - Send the YUV buffer to be encoded */
+/*****************************************************************************/
+/** Input structure : Queue input buffer to the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command : IVE_CMD_QUEUE_INPUT */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Descriptor for input raw buffer */
+ iv_raw_buf_t s_inp_buf;
+
+ /** Flag to indicate if mb info is sent along with input buffer */
+ UWORD32 u4_mb_info_type;
+
+ /** Flag to indicate the size of mb info structure */
+ UWORD32 u4_mb_info_size;
+
+ /** Buffer containing mb info if mb_info_type is non-zero */
+ void *pv_mb_info;
+
+ /** Flag to indicate if pic info is sent along with input buffer */
+ UWORD32 u4_pic_info_type;
+
+ /** Buffer containing pic info if mb_info_type is non-zero */
+ void *pv_pic_info;
+
+ /** Lower 32bits of input time stamp */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of input time stamp */
+ UWORD32 u4_timestamp_high;
+
+
+ /** Flag to enable/disable blocking the current API call */
+ UWORD32 u4_is_blocking;
+
+ /** Flag to indicate if this is the last input in the stream */
+ UWORD32 u4_is_last;
+
+}ive_queue_inp_ip_t;
+
+/** Input structure : Queue output buffer to the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_queue_inp_op_t;
+
+/*****************************************************************************/
+/* Dequeue Input raw buffer - Get free YUV buffer from the encoder */
+/*****************************************************************************/
+/** Input structure : Dequeue input buffer from the encoder */
+
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command: IVE_CMD_DEQUEUE_INPUT */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Flag to enable/disable blocking the current API call */
+ UWORD32 u4_is_blocking;
+
+}ive_dequeue_inp_ip_t;
+
+/** Output structure : Dequeue input buffer from the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+
+ /** Buffer descriptor of the buffer returned from encoder */
+ iv_raw_buf_t s_inp_buf;
+
+ /** Flag to indicate if mb info is sent along with input buffer */
+ UWORD32 u4_mb_info_type;
+
+ /** Flag to indicate the size of mb info structure */
+ UWORD32 u4_mb_info_size;
+
+ /** Buffer containing mb info if mb_info_type is non-zero */
+ void *pv_mb_info;
+
+ /** Flag to indicate if pic info is sent along with input buffer */
+ UWORD32 u4_pic_info_type;
+
+ /** Buffer containing pic info if mb_info_type is non-zero */
+ void *pv_pic_info;
+
+ /** Lower 32bits of input time stamp */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of input time stamp */
+ UWORD32 u4_timestamp_high;
+
+ /** Flag to indicate if this is the last input in the stream */
+ UWORD32 u4_is_last;
+
+
+}ive_dequeue_inp_op_t;
+
+/*****************************************************************************/
+/* Queue Output bitstream buffer - Send the bistream buffer to be filled */
+/*****************************************************************************/
+/** Input structure : Queue output buffer to the encoder */
+
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command : IVE_CMD_QUEUE_OUTPUT */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Descriptor for output bit-stream buffer */
+ iv_bits_buf_t s_out_buf;
+
+ /** Flag to enable/disable blocking the current API call */
+ UWORD32 u4_is_blocking;
+
+ /** Flag to indicate if this is the last output in the stream */
+ UWORD32 u4_is_last;
+
+}ive_queue_out_ip_t;
+
+/** Output structure : Queue output buffer to the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+
+}ive_queue_out_op_t;
+
+
+/*****************************************************************************/
+/* Dequeue Output bitstream buffer - Get the bistream buffer filled */
+/*****************************************************************************/
+/** Input structure : Dequeue output buffer from the encoder */
+
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command : IVE_CMD_DEQUEUE_OUTPUT */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Flag to enable/disable blocking the current API call */
+ UWORD32 u4_is_blocking;
+}ive_dequeue_out_ip_t;
+
+/** Output structure : Dequeue output buffer from the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+
+ /** Descriptor for output bit-stream buffer */
+ iv_bits_buf_t s_out_buf;
+
+ /** Lower 32bits of timestamp corresponding to this buffer */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of timestamp corresponding to this buffer */
+ UWORD32 u4_timestamp_high;
+
+ /** Flag to indicate if this is the last output in the stream */
+ UWORD32 u4_is_last;
+
+}ive_dequeue_out_op_t;
+
+/*****************************************************************************/
+/* Get Recon data - Get the reconstructed data from encoder */
+/*****************************************************************************/
+/** Input structure : Get recon data from the encoder */
+
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command : IVE_CMD_GET_RECON */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Flag to enable/disable blocking the current API call */
+ UWORD32 u4_is_blocking;
+
+ /** Descriptor for recon buffer */
+ iv_raw_buf_t s_recon_buf;
+
+ /** Flag to indicate if this is the last recon in the stream */
+ UWORD32 u4_is_last;
+
+}ive_get_recon_ip_t;
+
+/** Output structure : Get recon data from the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+
+ /** Lower 32bits of time stamp corresponding to this buffer */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to this buffer */
+ UWORD32 u4_timestamp_high;
+
+ /** Flag to indicate if this is the last recon in the stream */
+ UWORD32 u4_is_last;
+
+}ive_get_recon_op_t;
+
+/*****************************************************************************/
+/* Video control Flush */
+/*****************************************************************************/
+
+/** Input structure : Flush all the buffers from the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_FLUSH */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+}ive_ctl_flush_ip_t;
+
+/** Output structure : Flush all the buffers from the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_flush_op_t;
+
+/*****************************************************************************/
+/* Video control reset */
+/*****************************************************************************/
+/** Input structure : Reset the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_RESET */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+}ive_ctl_reset_ip_t;
+
+/** Output structure : Reset the encoder */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_reset_op_t;
+
+/*****************************************************************************/
+/* Video control:Get Buf Info */
+/*****************************************************************************/
+
+/** Input structure : Get encoder buffer requirements */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_GETBUFINFO */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** maximum width for which codec should request memory requirements */
+ UWORD32 u4_max_wd;
+
+ /** maximum height for which codec should request memory requirements */
+ UWORD32 u4_max_ht;
+
+ /** Input color format */
+ IV_COLOR_FORMAT_T e_inp_color_fmt;
+
+}ive_ctl_getbufinfo_ip_t;
+
+/** Output structure : Get encoder buffer requirements */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+
+ /** Minimum number of input buffers required for codec */
+ UWORD32 u4_min_inp_bufs;
+
+ /** Minimum number of output buffers required for codec */
+ UWORD32 u4_min_out_bufs;
+
+ /** Number of components in input buffers required for codec */
+ UWORD32 u4_inp_comp_cnt;
+
+ /** Number of components in output buffers required for codec */
+ UWORD32 u4_out_comp_cnt;
+
+ /** Minimum sizes of each component in input buffer required */
+ UWORD32 au4_min_in_buf_size[IVE_MAX_IO_BUFFER_COMPONENTS];
+
+ /** Minimum sizes of each component in output buffer required */
+ UWORD32 au4_min_out_buf_size[IVE_MAX_IO_BUFFER_COMPONENTS];
+
+}ive_ctl_getbufinfo_op_t;
+
+
+
+
+/*****************************************************************************/
+/* Video control:Get Version Info */
+/*****************************************************************************/
+
+/** Input structure : Get encoder version information */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_GETVERSION */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Buffer where version info will be returned */
+ UWORD8 *pu1_version;
+
+ /** Size of the buffer allocated for version info */
+ UWORD32 u4_version_bufsize;
+}ive_ctl_getversioninfo_ip_t;
+
+/** Output structure : Get encoder version information */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_getversioninfo_op_t;
+
+
+/*****************************************************************************/
+/* Video control:set default params */
+/*****************************************************************************/
+/** Input structure : Set default encoder parameters */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SETDEFAULT */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_setdefault_ip_t;
+
+/** Output structure : Set default encoder parameters */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_setdefault_op_t;
+
+/*****************************************************************************/
+/* Video control Set Frame dimensions */
+/*****************************************************************************/
+
+/** Input structure : Set frame dimensions */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_DIMENSIONS */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Input width */
+ UWORD32 u4_wd;
+
+ /** Input height */
+ UWORD32 u4_ht;
+
+ /** Input stride */
+ UWORD32 u4_strd;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_dimensions_ip_t;
+
+/** Output structure : Set frame dimensions */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_dimensions_op_t;
+
+
+/*****************************************************************************/
+/* Video control Set Frame rates */
+/*****************************************************************************/
+
+/** Input structure : Set frame rate */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_FRAMERATE */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Source frame rate */
+ UWORD32 u4_src_frame_rate;
+
+ /** Target frame rate */
+ UWORD32 u4_tgt_frame_rate;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_frame_rate_ip_t;
+
+/** Output structure : Set frame rate */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_frame_rate_op_t;
+
+/*****************************************************************************/
+/* Video control Set Bitrate */
+/*****************************************************************************/
+
+/** Input structure : Set bitrate */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_BITRATE */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Target bitrate in kilobits per second */
+ UWORD32 u4_target_bitrate;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_bitrate_ip_t;
+
+/** Output structure : Set bitrate */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_bitrate_op_t;
+
+/*****************************************************************************/
+/* Video control Set Frame type */
+/*****************************************************************************/
+
+/** Input structure : Set frametype */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_FRAMETYPE */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Force current frame type */
+ IV_PICTURE_CODING_TYPE_T e_frame_type;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_frame_type_ip_t;
+
+/** Output structure : Set frametype */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_frame_type_op_t;
+
+/*****************************************************************************/
+/* Video control Set Encode mode */
+/*****************************************************************************/
+
+/** Input structure : Set encode mode */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_ENC_MODE */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Encoder mode */
+ IVE_ENC_MODE_T e_enc_mode;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_enc_mode_ip_t;
+
+/** Output structure : Set encode mode */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+
+}ive_ctl_set_enc_mode_op_t;
+
+/*****************************************************************************/
+/* Video control Set QP */
+/*****************************************************************************/
+
+/** Input structure : Set QP */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_QP */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Set initial Qp for I pictures */
+ UWORD32 u4_i_qp;
+
+ /** Set initial Qp for P pictures */
+ UWORD32 u4_p_qp;
+
+ /** Set initial Qp for B pictures */
+ UWORD32 u4_b_qp;
+
+ /** Set minimum Qp for I pictures */
+ UWORD32 u4_i_qp_min;
+
+ /** Set maximum Qp for I pictures */
+ UWORD32 u4_i_qp_max;
+
+ /** Set minimum Qp for P pictures */
+ UWORD32 u4_p_qp_min;
+
+ /** Set maximum Qp for P pictures */
+ UWORD32 u4_p_qp_max;
+
+ /** Set minimum Qp for B pictures */
+ UWORD32 u4_b_qp_min;
+
+ /** Set maximum Qp for B pictures */
+ UWORD32 u4_b_qp_max;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+
+}ive_ctl_set_qp_ip_t;
+
+/** Output structure : Set QP */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_qp_op_t;
+
+/*****************************************************************************/
+/* Video control Set AIR params */
+/*****************************************************************************/
+
+/** Input structure : Set AIR params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_AIR_PARAMS */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Adaptive intra refresh mode */
+ IVE_AIR_MODE_T e_air_mode;
+
+ /** Adaptive intra refresh period in frames */
+ UWORD32 u4_air_refresh_period;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+
+}ive_ctl_set_air_params_ip_t;
+
+/** Output structure : Set AIR params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_air_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set VBV params */
+/*****************************************************************************/
+
+/** Input structure : Set VBV params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_VBV_PARAMS */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** VBV buffer delay */
+ UWORD32 u4_vbv_buffer_delay;
+
+ /** VBV buffer size */
+ UWORD32 u4_vbv_buf_size;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+
+}ive_ctl_set_vbv_params_ip_t;
+
+/** Output structure : Set VBV params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_vbv_params_op_t;
+
+
+/*****************************************************************************/
+/* Video control Set Processor Details */
+/*****************************************************************************/
+
+/** Input structure : Set processor details */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_NUM_CORES */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Total number of cores to be used */
+ UWORD32 u4_num_cores;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_num_cores_ip_t;
+
+/** Output structure : Set processor details */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_num_cores_op_t;
+
+/*****************************************************************************/
+/* Video control Set Intra Prediction estimation params */
+/*****************************************************************************/
+
+/** Input structure : Set IPE params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_IPE_PARAMS */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Flag to enable/disbale intra 4x4 analysis */
+ UWORD32 u4_enable_intra_4x4;
+
+ /** Flag to enable/disable pre-enc stage of Intra Pred estimation */
+ UWORD32 u4_pre_enc_ipe;
+
+ /** Speed preset - Value between 0 (slowest) and 100 (fastest) */
+ IVE_SPEED_CONFIG u4_enc_speed_preset;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_ipe_params_ip_t;
+
+/** Output structure : Set IPE Params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_ipe_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set Motion estimation params */
+/*****************************************************************************/
+
+/** Input structure : Set ME Params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_ME_PARAMS */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Flag to enable/disable pre-enc stage of Motion estimation */
+ UWORD32 u4_pre_enc_me;
+
+ /** Speed preset - Value between 0 (slowest) and 100 (fastest) */
+ UWORD32 u4_me_speed_preset;
+
+ /** Flag to enable/disable half pel motion estimation */
+ UWORD32 u4_enable_hpel;
+
+ /** Flag to enable/disable quarter pel motion estimation */
+ UWORD32 u4_enable_qpel;
+
+ /** Flag to enable/disable fast SAD approximation */
+ UWORD32 u4_enable_fast_sad;
+
+ /** Flag to enable/disable alternate reference frames */
+ UWORD32 u4_enable_alt_ref;
+
+ /** Maximum search range in X direction for farthest reference */
+ UWORD32 u4_srch_rng_x;
+
+ /** Maximum search range in Y direction for farthest reference */
+ UWORD32 u4_srch_rng_y;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_me_params_ip_t;
+
+/** Output structure : Set ME Params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_me_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set GOP params */
+/*****************************************************************************/
+
+/** Input structure : Set GOP Params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_GOP_PARAMS */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** I frame interval */
+ UWORD32 u4_i_frm_interval;
+
+ /** IDR frame interval */
+ UWORD32 u4_idr_frm_interval;
+
+ /** consecutive B frames */
+ UWORD32 u4_num_b_frames;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_gop_params_ip_t;
+
+/** Output structure : Set GOP params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_gop_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set Deblock params */
+/*****************************************************************************/
+
+/** Input structure : Set Deblock Params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_GOP_PARAMS */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Disable deblock level (0: Enable completely, 3: Disable completely */
+ UWORD32 u4_disable_deblock_level;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_deblock_params_ip_t;
+
+/** Output structure : Set Deblock Params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_deblock_params_op_t;
+
+/*****************************************************************************/
+/* Video control Set Profile params */
+/*****************************************************************************/
+
+/** Input structure : Set Profile Params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Command type : IVE_CMD_VIDEO_CTL */
+ IVE_API_COMMAND_TYPE_T e_cmd;
+
+ /** Sub command type : IVE_CMD_CTL_SET_PROFILE_PARAMS */
+ IVE_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+
+ /** Profile */
+ IV_PROFILE_T e_profile;
+
+ /** Lower 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_low;
+
+ /** Upper 32bits of time stamp corresponding to input buffer,
+ * from which this command takes effect */
+ UWORD32 u4_timestamp_high;
+
+}ive_ctl_set_profile_params_ip_t;
+
+/** Output structure : Set Profile Params */
+typedef struct
+{
+ /** size of the structure */
+ UWORD32 u4_size;
+
+ /** Return error code */
+ UWORD32 u4_error_code;
+}ive_ctl_set_profile_params_op_t;
+
+
+#endif /* _IVE2_H_ */
+
diff --git a/encoder/mips/ih264e_function_selector.c b/encoder/mips/ih264e_function_selector.c
new file mode 100755
index 0000000..58ec4d0
--- /dev/null
+++ b/encoder/mips/ih264e_function_selector.c
@@ -0,0 +1,110 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_function_selector.c
+*
+* @brief
+* Contains functions to initialize function pointers used in h264
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include Files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include Files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264e_defs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+
+void ih264e_init_function_ptr(void *pv_codec)
+{
+ codec_t *ps_codec = (codec_t *)pv_codec;
+ ih264e_init_function_ptr_generic(ps_codec);
+}
+
+IV_ARCH_T ih264e_default_arch(void)
+{
+ return ARCH_NA;
+}
+
diff --git a/encoder/mips/ih264e_platform_macros.h b/encoder/mips/ih264e_platform_macros.h
new file mode 100755
index 0000000..ed1edd4
--- /dev/null
+++ b/encoder/mips/ih264e_platform_macros.h
@@ -0,0 +1,135 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_platform_macros.h
+ *
+ * @brief
+ * Contains platform specific routines used for codec context intialization
+ *
+ * @author
+ * ittiam
+ *
+ * @remarks
+ * none
+ *
+ *******************************************************************************
+ */
+
+
+#ifndef IH264E_PLATFORM_MACROS_H_
+#define IH264E_PLATFORM_MACROS_H_
+
+#define DATA_SYNC()
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_generic(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr(void *pv_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns IV_ARCH_T
+* architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T ih264e_default_arch(void);
+
+/**
+*******************************************************************************
+*
+* @brief Data Memory Barrier, Data Synchronization Barrier
+*
+*
+* @par Description: These functions do nothing on x86 side. But on arm platforms,
+*
+* Data Memory Barrier acts as a memory barrier. It ensures that all explicit
+* memory accesses that appear in program order before the DMB instruction are
+* observed before any explicit memory accesses that appear in program order
+* after the DMB instruction. It does not affect the ordering of any other
+* instructions executing on the processor
+*
+* Data Synchronization Barrier acts as a special kind of memory barrier. No
+* instruction in program order after this instruction executes until this instruction
+* completes. This instruction completes when:
+* 1. All explicit memory accesses before this instruction complete.
+* 2. All Cache, Branch predictor and TLB maintenance operations before
+* this instruction complete.
+*
+* @param[in] void
+*
+* @returns void
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+
+#endif /* IH264E_PLATFORM_MACROS_H_ */
diff --git a/encoder/mips/ime_platform_macros.h b/encoder/mips/ime_platform_macros.h
new file mode 100755
index 0000000..18e2e8f
--- /dev/null
+++ b/encoder/mips/ime_platform_macros.h
@@ -0,0 +1,52 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ime_platform_macros.h
+*
+* @brief
+* Platform specific Macro definitions used in the codec
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+#ifndef _IME_PLATFORM_MACROS_H_
+#define _IME_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Function macro definitions */
+/*****************************************************************************/
+
+#define USADA8(src,est,sad) \
+ sad += ABS(src[0]-est[0]) + \
+ ABS(src[1]-est[1]) + \
+ ABS(src[2]-est[2]) + \
+ ABS(src[3]-est[3])
+
+
+#endif /* _IH264_PLATFORM_MACROS_H_ */
diff --git a/encoder/x86/ih264e_function_selector.c b/encoder/x86/ih264e_function_selector.c
new file mode 100755
index 0000000..429cdab
--- /dev/null
+++ b/encoder/x86/ih264e_function_selector.c
@@ -0,0 +1,141 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ih264e_function_selector.c
+*
+* @brief
+* Contains functions to initialize function pointers used in h264
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include Files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include Files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264e_defs.h"
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr(void *pv_codec)
+{
+ codec_t *ps_codec = (codec_t *)pv_codec;
+ ih264e_init_function_ptr_generic(ps_codec);
+ switch(ps_codec->s_cfg.e_arch)
+ {
+ case ARCH_X86_GENERIC:
+ ih264e_init_function_ptr_generic(ps_codec);
+ break;
+ case ARCH_X86_SSSE3:
+ ih264e_init_function_ptr_ssse3(ps_codec);
+ break;
+ case ARCH_X86_SSE42:
+ default:
+ ih264e_init_function_ptr_ssse3(ps_codec);
+ ih264e_init_function_ptr_sse42(ps_codec);
+ break;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns IV_ARCH_T
+* architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T ih264e_default_arch(void)
+{
+ return ARCH_X86_SSE42;
+}
+
+
diff --git a/encoder/x86/ih264e_function_selector_sse42.c b/encoder/x86/ih264e_function_selector_sse42.c
new file mode 100755
index 0000000..6fa6308
--- /dev/null
+++ b/encoder/x86/ih264e_function_selector_sse42.c
@@ -0,0 +1,146 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_function_selector_sse42.c
+*
+* @brief
+* Contains functions to initialize function pointers of codec context
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ih264e_init_function_ptr_sse42
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264e_defs.h"
+#include "ih264e_structs.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_core_coding.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264_padding.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264_mem_fns.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_half_pel.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_sse42(codec_t *ps_codec)
+{
+ WORD32 i;
+ process_ctxt_t *ps_proc = NULL;
+ me_ctxt_t *ps_me_ctxt = NULL;
+ printf("Enabling SSE42 functions\n");
+
+ /* Init luma forward transform fn ptr */
+ ps_codec->pf_resi_trans_quant_4x4 = ih264_resi_trans_quant_4x4_sse42;
+ ps_codec->pf_resi_trans_quant_chroma_4x4 = ih264_resi_trans_quant_chroma_4x4_sse42;
+ ps_codec->pf_hadamard_quant_4x4 = ih264_hadamard_quant_4x4_sse42;
+ ps_codec->pf_hadamard_quant_2x2_uv = ih264_hadamard_quant_2x2_uv_sse42;
+
+ /* Init inverse transform fn ptr */
+ ps_codec->pf_iquant_itrans_recon_4x4 = ih264_iquant_itrans_recon_4x4_sse42;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4 = ih264_iquant_itrans_recon_chroma_4x4_sse42;
+ ps_codec->pf_ihadamard_scaling_4x4 = ih264_ihadamard_scaling_4x4_sse42;
+
+ /* sad me level functions */
+ ps_codec->apf_compute_sad_16x16[0] = ime_compute_sad_16x16_sse42;
+ ps_codec->apf_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_sse42;
+ ps_codec->pf_compute_sad_16x8 = ime_compute_sad_16x8_sse42;
+
+ /* sad me level functions */
+ for(i = 0; i < (MAX_PROCESS_CTXT); i++)
+ {
+ ps_proc = &ps_codec->as_process[i];
+
+ ps_me_ctxt = &ps_proc->s_me_ctxt;
+ ps_me_ctxt->pf_ime_compute_sad_16x16[0] = ime_compute_sad_16x16_sse42;
+ ps_me_ctxt->pf_ime_compute_sad_16x16[1] = ime_compute_sad_16x16_fast_sse42;
+ ps_me_ctxt->pf_ime_compute_sad_16x8 = ime_compute_sad_16x8_sse42;
+ ps_me_ctxt->pf_ime_compute_sad4_diamond = ime_calculate_sad4_prog_sse42;
+ ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16 = ime_sub_pel_compute_sad_16x16_sse42;
+ ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16 = ime_compute_satqd_16x16_lumainter_sse42;
+ }
+}
diff --git a/encoder/x86/ih264e_function_selector_ssse3.c b/encoder/x86/ih264e_function_selector_ssse3.c
new file mode 100755
index 0000000..7401e53
--- /dev/null
+++ b/encoder/x86/ih264e_function_selector_ssse3.c
@@ -0,0 +1,190 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_function_selector_ssse3.c
+*
+* @brief
+* Contains functions to initialize function pointers of codec context
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ih264e_init_function_ptr_ssse3
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+
+/* System Include files */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User Include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_defs.h"
+#include "ih264_size_defs.h"
+#include "ih264e_defs.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_distortion_metrics.h"
+#include "ime_structs.h"
+#include "ih264_defs.h"
+#include "ih264_error.h"
+#include "ih264_structs.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+#include "ih264e_structs.h"
+#include "ih264e_platform_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264e_defs.h"
+#include "ih264e_structs.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ih264e_core_coding.h"
+#include "ih264_cavlc_tables.h"
+#include "ih264e_cavlc.h"
+#include "ih264_padding.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264_mem_fns.h"
+#include "ih264e_fmt_conv.h"
+#include "ih264e_half_pel.h"
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_ssse3(codec_t *ps_codec)
+{
+ printf("Enabling SSSE3 functions\n");
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 16x16 */
+ ps_codec->apf_intra_pred_16_l[0] = ih264_intra_pred_luma_16x16_mode_vert_ssse3;
+ ps_codec->apf_intra_pred_16_l[1] = ih264_intra_pred_luma_16x16_mode_horz_ssse3;
+ ps_codec->apf_intra_pred_16_l[2] = ih264_intra_pred_luma_16x16_mode_dc_ssse3;
+ ps_codec->apf_intra_pred_16_l[3] = ih264_intra_pred_luma_16x16_mode_plane_ssse3;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 4x4 */
+ ps_codec->apf_intra_pred_4_l[0] = ih264_intra_pred_luma_4x4_mode_vert_ssse3;
+ ps_codec->apf_intra_pred_4_l[1] = ih264_intra_pred_luma_4x4_mode_horz_ssse3;
+ ps_codec->apf_intra_pred_4_l[2] = ih264_intra_pred_luma_4x4_mode_dc_ssse3;
+ ps_codec->apf_intra_pred_4_l[3] = ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3;
+ ps_codec->apf_intra_pred_4_l[4] = ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3;
+ ps_codec->apf_intra_pred_4_l[5] = ih264_intra_pred_luma_4x4_mode_vert_r_ssse3;
+ ps_codec->apf_intra_pred_4_l[6] = ih264_intra_pred_luma_4x4_mode_horz_d_ssse3;
+ ps_codec->apf_intra_pred_4_l[7] = ih264_intra_pred_luma_4x4_mode_vert_l_ssse3;
+ ps_codec->apf_intra_pred_4_l[8] = ih264_intra_pred_luma_4x4_mode_horz_u_ssse3;
+
+ /* Init function pointers for intra pred leaf level functions luma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_8_l[0] = ih264_intra_pred_luma_8x8_mode_vert_ssse3;
+ ps_codec->apf_intra_pred_8_l[2] = ih264_intra_pred_luma_8x8_mode_dc_ssse3;
+ ps_codec->apf_intra_pred_8_l[3] = ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3;
+ ps_codec->apf_intra_pred_8_l[4] = ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3;
+ ps_codec->apf_intra_pred_8_l[5] = ih264_intra_pred_luma_8x8_mode_vert_r_ssse3;
+ ps_codec->apf_intra_pred_8_l[6] = ih264_intra_pred_luma_8x8_mode_horz_d_ssse3;
+ ps_codec->apf_intra_pred_8_l[7] = ih264_intra_pred_luma_8x8_mode_vert_l_ssse3;
+ ps_codec->apf_intra_pred_8_l[8] = ih264_intra_pred_luma_8x8_mode_horz_u_ssse3;
+
+ /* Init function pointers for intra pred leaf level functions chroma
+ * Intra 8x8 */
+ ps_codec->apf_intra_pred_c[1] = ih264_intra_pred_chroma_8x8_mode_horz_ssse3;
+ ps_codec->apf_intra_pred_c[2] = ih264_intra_pred_chroma_8x8_mode_vert_ssse3;
+ ps_codec->apf_intra_pred_c[3] = ih264_intra_pred_chroma_8x8_mode_plane_ssse3;
+
+ /* Init inverse transform fn ptr */
+ ps_codec->pf_iquant_itrans_recon_8x8 = ih264_iquant_itrans_recon_8x8_ssse3;
+ ps_codec->pf_iquant_itrans_recon_4x4_dc = ih264_iquant_itrans_recon_4x4_dc_ssse3;
+ ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc = ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3;
+
+ /* Init fn ptr luma deblocking */
+ ps_codec->pf_deblk_luma_vert_bs4 = ih264_deblk_luma_vert_bs4_ssse3;
+ ps_codec->pf_deblk_luma_vert_bslt4 = ih264_deblk_luma_vert_bslt4_ssse3;
+ ps_codec->pf_deblk_luma_horz_bs4 = ih264_deblk_luma_horz_bs4_ssse3;
+ ps_codec->pf_deblk_luma_horz_bslt4 = ih264_deblk_luma_horz_bslt4_ssse3;
+ /* Init fn ptr chroma deblocking */
+ ps_codec->pf_deblk_chroma_vert_bs4 = ih264_deblk_chroma_vert_bs4_ssse3;
+ ps_codec->pf_deblk_chroma_vert_bslt4 = ih264_deblk_chroma_vert_bslt4_ssse3;
+ ps_codec->pf_deblk_chroma_horz_bs4 = ih264_deblk_chroma_horz_bs4_ssse3;
+ ps_codec->pf_deblk_chroma_horz_bslt4 = ih264_deblk_chroma_horz_bslt4_ssse3;
+
+ /* Padding Functions */
+ ps_codec->pf_pad_left_luma = ih264_pad_left_luma_ssse3;
+ ps_codec->pf_pad_left_chroma = ih264_pad_left_chroma_ssse3;
+ ps_codec->pf_pad_right_luma = ih264_pad_right_luma_ssse3;
+ ps_codec->pf_pad_right_chroma = ih264_pad_right_chroma_ssse3;
+
+ /* Inter pred leaf level functions */
+ ps_codec->pf_inter_pred_luma_copy = ih264_inter_pred_luma_copy_ssse3;
+ ps_codec->pf_inter_pred_luma_horz = ih264_inter_pred_luma_horz_ssse3;
+ ps_codec->pf_inter_pred_luma_vert = ih264_inter_pred_luma_vert_ssse3;
+ ps_codec->pf_inter_pred_chroma = ih264_inter_pred_chroma_ssse3;
+
+ /* memory handling operations */
+ ps_codec->pf_mem_cpy_mul8 = ih264_memcpy_mul_8_ssse3;
+ ps_codec->pf_mem_set_mul8 = ih264_memset_mul_8_ssse3;
+
+ /*intra mode eval -encoder level function*/
+ ps_codec->pf_ih264e_evaluate_intra16x16_modes = ih264e_evaluate_intra16x16_modes_ssse3;
+ ps_codec->pf_ih264e_evaluate_intra_4x4_modes = ih264e_evaluate_intra_4x4_modes_ssse3;
+ ps_codec->pf_ih264e_evaluate_intra_chroma_modes = ih264e_evaluate_intra_chroma_modes_ssse3;
+
+ /* Halp pel generation function - encoder level*/
+ ps_codec->pf_ih264e_sixtapfilter_horz = ih264e_sixtapfilter_horz_ssse3;
+ ps_codec->pf_ih264e_sixtap_filter_2dvh_vert = ih264e_sixtap_filter_2dvh_vert_ssse3;
+}
diff --git a/encoder/x86/ih264e_half_pel_ssse3.c b/encoder/x86/ih264e_half_pel_ssse3.c
new file mode 100755
index 0000000..42580fa
--- /dev/null
+++ b/encoder/x86/ih264e_half_pel_ssse3.c
@@ -0,0 +1,487 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_half_pel_ssse3.c
+ *
+ * @brief
+ * Contains the x86 intrinsic function definitions for 6-tap vertical filter
+ * and cascaded 2D filter used in motion estimation in H264 encoder.
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ * ih264e_sixtapfilter_horz_ssse3
+ * ih264e_sixtap_filter_2dvh_vert_ssse3
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <assert.h>
+#include <limits.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ithread.h"
+#include "ih264_platform_macros.h"
+#include "ih264_defs.h"
+#include "ih264e_half_pel.h"
+#include "ih264_macros.h"
+#include "ih264e_half_pel.h"
+#include "ih264e_debug.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+/*
+*******************************************************************************
+*
+* @brief
+* Interprediction luma filter for horizontal input(Filter run for width = 17
+* and height =16)
+*
+* @par Description:
+* Applies a 6 tap horizontal filter .The output is clipped to 8 bits sec.
+* 8.4.2.2.1 titled "Luma sample interpolation process"
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @returns
+* None
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+void ih264e_sixtapfilter_horz_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd)
+{
+ WORD32 ht;
+ WORD32 tmp;
+
+ __m128i src_r0_16x8b, src_r1_16x8b, src_r0_sht_16x8b, src_r1_sht_16x8b;
+ __m128i src_r0_t1_16x8b, src_r1_t1_16x8b;
+
+ __m128i res_r0_t1_8x16b, res_r0_t2_8x16b, res_r0_t3_8x16b;
+ __m128i res_r1_t1_8x16b, res_r1_t2_8x16b, res_r1_t3_8x16b;
+
+ __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+ __m128i const_val16_8x16b;
+
+ ht = 16;
+ pu1_src -= 2; // the filter input starts from x[-2] (till x[3])
+
+ coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
+ coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
+ coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5
+ //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
+ const_val16_8x16b = _mm_set1_epi16(16);
+
+ //Row0 : a0 a1 a2 a3 a4 a5 a6 a7 a8 a9.....
+ //Row0 : b0 b1 b2 b3 b4 b5 b6 b7 b8 b9.....
+ //b0 is same a8. Similarly other bn pixels are same as a(n+8) pixels.
+
+ do
+ {
+ src_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); //a0 a1 a2 a3 a4 a5 a6 a7 a8 a9....a15
+ src_r1_16x8b = _mm_loadu_si128((__m128i *)(pu1_src + 8)); //b0 b1 b2 b3 b4 b5 b6 b7 b8 b9....b15
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_16x8b, 1); //a1 a2 a3 a4 a5 a6 a7 a8 a9....a15 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_16x8b, 1); //b1 b2 b3 b4 b5 b6 b7 b8 b9....b15 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a0 a1 a1 a2 a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b0 b1 b1 b2 b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8
+
+ res_r0_t1_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff0_1_16x8b); //a0*c0+a1*c1 a1*c0+a2*c1 a2*c0+a3*c1 a3*c0+a4*c1
+ //a4*c0+a5*c1 a5*c0+a6*c1 a6*c0+a7*c1 a7*c0+a8*c1
+ res_r1_t1_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff0_1_16x8b); //b0*c0+b1*c1 b1*c0+b2*c1 b2*c0+b3*c1 b3*c0+b4*c1
+ //b4*c0+b5*c1 b5*c0+b6*c1 b6*c0+b7*c1 b7*c0+b8*c1
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a2 a3 a4 a5 a6 a7 a8 a9....a15 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b2 b3 b4 b5 b6 b7 b8 b9....b15 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a3 a4 a5 a6 a7 a8 a9....a15 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b3 b4 b5 b6 b7 b8 b9....b15 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a2 a3 a3 a4 a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b2 b3 b3 b4 b4 b5 b5 b6 b6 b7 b7 b8 a8 a9 a9 a10
+
+ res_r0_t2_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff2_3_16x8b); //a2*c2+a3*c3 a3*c2+a4*c3 a4*c2+a5*c3 a5*c2+a6*c3
+ //a6*c2+a7*c3 a7*c2+a8*c3 a8*c2+a9*c3 a9*c2+a10*c3
+ res_r1_t2_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff2_3_16x8b); //b2*c2+b3*c3 b3*c2+b4*c3 b2*c4+b5*c3 b5*c2+b6*c3
+ //b6*c2+b7*c3 b7*c2+b8*c3 b8*c2+b9*c3 b9*c2+b10*c3
+
+ src_r0_16x8b = _mm_srli_si128(src_r0_16x8b, 2); //a4 a5 a6 a7 a8 a9....a15 0 0 0 0
+ src_r1_16x8b = _mm_srli_si128(src_r1_16x8b, 2); //b4 b5 b6 b7 b8 b9....b15 0 0 0 0
+
+ src_r0_sht_16x8b = _mm_srli_si128(src_r0_sht_16x8b, 2); //a5 a6 a7 a8 a9....a15 0 0 0 0 0
+ src_r1_sht_16x8b = _mm_srli_si128(src_r1_sht_16x8b, 2); //b5 b6 b7 b8 b9....b15 0 0 0 0 0
+
+ src_r0_t1_16x8b = _mm_unpacklo_epi8(src_r0_16x8b, src_r0_sht_16x8b); //a4 a5 a5 a6 a6 a7 a7 a8 a8 a9 a9 a10 a10 a11 a11 a12
+ src_r1_t1_16x8b = _mm_unpacklo_epi8(src_r1_16x8b, src_r1_sht_16x8b); //b4 b5 b5 b6 b6 b7 b7 b8 b8 b9 b9 b10 b10 b11 b11 b12
+
+ res_r0_t3_8x16b = _mm_maddubs_epi16(src_r0_t1_16x8b, coeff4_5_16x8b); //a4*c4+a5*c5 a5*c4+a6*c5 a6*c4+a7*c5 a7*c4+a8*c5
+ //a8*c4+a9*c5 a9*c4+a10*c5 a10*c4+a11*c5 a11*c4+a12*c5
+ res_r1_t3_8x16b = _mm_maddubs_epi16(src_r1_t1_16x8b, coeff4_5_16x8b); //b4*c4+b5*c5 b5*c4+b6*c5 b6*c4+b7*c5 b7*c4+b8*c5
+ //b8*c4+b9*c5 b9*c4+b10*c5 b10*c4+b11*c5 b11*c4+b12*c5
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t2_8x16b);
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t2_8x16b);
+ res_r0_t3_8x16b = _mm_add_epi16(res_r0_t3_8x16b, const_val16_8x16b);
+ res_r1_t3_8x16b = _mm_add_epi16(res_r1_t3_8x16b, const_val16_8x16b);
+ res_r0_t1_8x16b = _mm_add_epi16(res_r0_t1_8x16b, res_r0_t3_8x16b);
+ res_r1_t1_8x16b = _mm_add_epi16(res_r1_t1_8x16b, res_r1_t3_8x16b);
+
+ tmp = ((pu1_src[18] + pu1_src[19]) << 2) - pu1_src[17] - pu1_src[20];
+ tmp = pu1_src[16] + pu1_src[21] + (tmp << 2) + tmp;
+
+ res_r0_t1_8x16b = _mm_srai_epi16(res_r0_t1_8x16b, 5); //shifting right by 5 bits.
+ res_r1_t1_8x16b = _mm_srai_epi16(res_r1_t1_8x16b, 5);
+ tmp = (tmp + 16) >> 5;
+
+ src_r0_16x8b = _mm_packus_epi16(res_r0_t1_8x16b, res_r1_t1_8x16b);
+ pu1_dst[16] = CLIP_U8(tmp);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, src_r0_16x8b);
+
+ ht--;
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+ }
+ while(ht > 0);
+}
+
+/*
+*******************************************************************************
+*
+* @brief
+* This function implements a two stage cascaded six tap filter. It
+* applies the six tap filter in the vertical direction on the
+* predictor values, followed by applying the same filter in the
+* horizontal direction on the output of the first stage. The six tap
+* filtering operation is described in sec 8.4.2.2.1 titled "Luma sample
+* interpolation process" (Filter run for width = 17 and height =17)
+*
+* @par Description:
+* The function interpolates the predictors first in the vertical direction
+* and then in the horizontal direction to output the (1/2,1/2). The output
+* of the first stage of the filter is stored in the buffer pointed to by
+* pi16_pred1(only in C) in 16 bit precision.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst1
+* UWORD8 pointer to the destination(Vertical filtered output)
+*
+* @param[out] pu1_dst2
+* UWORD8 pointer to the destination(out put after applying horizontal filter
+* to the intermediate vertical output)
+*
+* @param[in] src_strd
+* integer source stride
+
+* @param[in] dst_strd
+* integer destination stride of pu1_dst
+*
+* @param[in]pi16_pred1
+* Pointer to 16bit intermediate buffer(used only in c)
+*
+* @param[in] pi16_pred1_strd
+* integer destination stride of pi16_pred1
+*
+* @returns
+* None
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+void ih264e_sixtap_filter_2dvh_vert_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_dst1,
+ UWORD8 *pu1_dst2,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD32 *pi4_pred1,
+ WORD32 pred1_strd)
+{
+ WORD32 ht;
+ WORD16 *pi2_pred1;
+
+ ht = 17;
+ pi2_pred1 = (WORD16 *)pi4_pred1;
+ pred1_strd = pred1_strd << 1;
+
+ // Vertical 6-tap filter
+ {
+ __m128i src1_r0_16x8b, src1_r1_16x8b, src1_r2_16x8b;
+ __m128i src1_r3_16x8b, src1_r4_16x8b, src1_r5_16x8b;
+ __m128i src2_r0_16x8b, src2_r1_16x8b, src2_r2_16x8b;
+ __m128i src2_r3_16x8b, src2_r4_16x8b, src2_r5_16x8b;
+
+ __m128i src_r0r1_16x8b, src_r2r3_16x8b, src_r4r5_16x8b;
+
+ __m128i res_t1_8x16b, res_t2_8x16b, res_t3_8x16b;
+ __m128i coeff0_1_16x8b, coeff2_3_16x8b, coeff4_5_16x8b;
+
+ coeff0_1_16x8b = _mm_set1_epi32(0xFB01FB01); //c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1 c0 c1
+ coeff2_3_16x8b = _mm_set1_epi32(0x14141414); //c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3 c2 c3
+ coeff4_5_16x8b = _mm_set1_epi32(0x01FB01FB); //c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5 c4 c5
+ //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
+
+ pu1_src -= 2;
+ pu1_src -= src_strd << 1; // the filter input starts from x[-2] (till x[3])
+
+ // Loading first five rows to start first row processing.
+ // 22 values loaded in each row.
+ src1_r0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src2_r0_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14));
+ pu1_src += src_strd;
+
+ src1_r1_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src2_r1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14));
+ pu1_src += src_strd;
+
+ src1_r2_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src2_r2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14));
+ pu1_src += src_strd;
+
+ src1_r3_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src2_r3_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14));
+ pu1_src += src_strd;
+
+ src1_r4_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src2_r4_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14));
+ pu1_src += src_strd;
+
+ do
+ {
+ src1_r5_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ src2_r5_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + 14));
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src1_r0_16x8b, src1_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src1_r2_16x8b, src1_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src1_r4_16x8b, src1_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+ _mm_storeu_si128((__m128i *)pi2_pred1, res_t1_8x16b);
+
+ src_r0r1_16x8b = _mm_unpackhi_epi8(src1_r0_16x8b, src1_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpackhi_epi8(src1_r2_16x8b, src1_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpackhi_epi8(src1_r4_16x8b, src1_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+ _mm_storeu_si128((__m128i *)(pi2_pred1 + 8), res_t1_8x16b);
+
+ src_r0r1_16x8b = _mm_unpacklo_epi8(src2_r0_16x8b, src2_r1_16x8b);
+ src_r2r3_16x8b = _mm_unpacklo_epi8(src2_r2_16x8b, src2_r3_16x8b);
+ src_r4r5_16x8b = _mm_unpacklo_epi8(src2_r4_16x8b, src2_r5_16x8b);
+
+ res_t1_8x16b = _mm_maddubs_epi16(src_r0r1_16x8b, coeff0_1_16x8b);
+ res_t2_8x16b = _mm_maddubs_epi16(src_r2r3_16x8b, coeff2_3_16x8b);
+ res_t3_8x16b = _mm_maddubs_epi16(src_r4r5_16x8b, coeff4_5_16x8b);
+
+ res_t1_8x16b = _mm_add_epi16(res_t1_8x16b, res_t2_8x16b);
+ res_t1_8x16b = _mm_add_epi16(res_t3_8x16b, res_t1_8x16b);
+
+ _mm_storeu_si128((__m128i *)(pi2_pred1 + 14), res_t1_8x16b);
+
+ src1_r0_16x8b = src1_r1_16x8b;
+ src1_r1_16x8b = src1_r2_16x8b;
+ src1_r2_16x8b = src1_r3_16x8b;
+ src1_r3_16x8b = src1_r4_16x8b;
+ src1_r4_16x8b = src1_r5_16x8b;
+
+ src2_r0_16x8b = src2_r1_16x8b;
+ src2_r1_16x8b = src2_r2_16x8b;
+ src2_r2_16x8b = src2_r3_16x8b;
+ src2_r3_16x8b = src2_r4_16x8b;
+ src2_r4_16x8b = src2_r5_16x8b;
+
+ ht--;
+ pu1_src += src_strd;
+ pi2_pred1 += pred1_strd;
+ }
+ while(ht > 0);
+ }
+
+ ht = 17;
+ pi2_pred1 = (WORD16 *)pi4_pred1;
+
+ // Horizontal 6-tap filter
+ {
+ WORD32 temp;
+
+ __m128i src_r0_8x16b, src_r1_8x16b, src_r2_8x16b, src_r3_8x16b;
+ __m128i src_r4_8x16b, src_r5_8x16b;
+ __m128i src_r0r1_8x16b, src_r2r3_8x16b, src_r4r5_8x16b;
+ __m128i res_vert1_8x16b, res_vert2_8x16b, res_16x8b;
+
+ __m128i res_t0_4x32b, res_t1_4x32b, res_t2_4x32b, res_t3_4x32b;
+ __m128i res_c0_8x16b, res_c1_8x16b;
+
+ __m128i coeff0_1_8x16b, coeff2_3_8x16b, coeff4_5_8x16b;
+ __m128i const_val512_4x32b, const_val16_8x16b;
+
+ coeff0_1_8x16b = _mm_set1_epi32(0xFFFB0001); //c0 c1 c0 c1 c0 c1 c0 c1
+ coeff2_3_8x16b = _mm_set1_epi32(0x00140014); //c2 c3 c2 c3 c2 c3 c2 c3
+ coeff4_5_8x16b = _mm_set1_epi32(0x0001FFFB); //c4 c5 c4 c5 c4 c5 c4 c5
+ //c0 = c5 = 1, c1 = c4 = -5, c2 = c3 = 20
+ const_val512_4x32b = _mm_set1_epi32(512);
+ const_val16_8x16b = _mm_set1_epi16(16);
+
+ do
+ {
+ src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1));
+ src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 1));
+ src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 2));
+ src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 3));
+ src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 4));
+ src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 5));
+
+ res_vert1_8x16b = _mm_add_epi16(src_r2_8x16b, const_val16_8x16b);
+ res_vert1_8x16b = _mm_srai_epi16(res_vert1_8x16b, 5); //shifting right by 5 bits.
+
+ src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_c0_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
+
+ src_r0_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8));
+ src_r1_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8 + 1));
+ src_r2_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8 + 2));
+ src_r3_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8 + 3));
+ src_r4_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8 + 4));
+ src_r5_8x16b = _mm_loadu_si128((__m128i *)(pi2_pred1 + 8 + 5));
+
+ res_vert2_8x16b = _mm_add_epi16(src_r2_8x16b, const_val16_8x16b);
+ res_vert2_8x16b = _mm_srai_epi16(res_vert2_8x16b, 5); //shifting right by 5 bits.
+
+ src_r0r1_8x16b = _mm_unpacklo_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpacklo_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpacklo_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t0_4x32b = _mm_srai_epi32(res_t1_4x32b ,10);
+
+ src_r0r1_8x16b = _mm_unpackhi_epi16(src_r0_8x16b, src_r1_8x16b);
+ src_r2r3_8x16b = _mm_unpackhi_epi16(src_r2_8x16b, src_r3_8x16b);
+ src_r4r5_8x16b = _mm_unpackhi_epi16(src_r4_8x16b, src_r5_8x16b);
+
+ res_t1_4x32b = _mm_madd_epi16(src_r0r1_8x16b, coeff0_1_8x16b);
+ res_t2_4x32b = _mm_madd_epi16(src_r2r3_8x16b, coeff2_3_8x16b);
+ res_t3_4x32b = _mm_madd_epi16(src_r4r5_8x16b, coeff4_5_8x16b);
+
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t2_4x32b);
+ res_t3_4x32b = _mm_add_epi32(res_t3_4x32b, const_val512_4x32b);
+ res_t1_4x32b = _mm_add_epi32(res_t1_4x32b, res_t3_4x32b);
+ res_t1_4x32b = _mm_srai_epi32(res_t1_4x32b, 10);
+
+ res_c1_8x16b = _mm_packs_epi32(res_t0_4x32b, res_t1_4x32b);
+
+ res_16x8b = _mm_packus_epi16(res_vert1_8x16b, res_vert2_8x16b);
+ _mm_storeu_si128((__m128i *)pu1_dst1, res_16x8b);
+ pu1_dst1[16] = CLIP_U8((pi2_pred1[18] + 16) >> 5);
+
+ res_16x8b = _mm_packus_epi16(res_c0_8x16b, res_c1_8x16b);
+ _mm_storeu_si128((__m128i *)pu1_dst2, res_16x8b);
+ temp = ((pi2_pred1[18] + pi2_pred1[19]) << 2) - pi2_pred1[17] - pi2_pred1[20];
+ temp = pi2_pred1[16] + pi2_pred1[21] + (temp << 2) + temp;
+ pu1_dst2[16] = CLIP_U8((temp + 512) >> 10);
+
+ ht--;
+ pi2_pred1 += pred1_strd;
+ pu1_dst1 += dst_strd;
+ pu1_dst2 += dst_strd;
+ }
+ while(ht > 0);
+ }
+}
diff --git a/encoder/x86/ih264e_intra_modes_eval_ssse3.c b/encoder/x86/ih264e_intra_modes_eval_ssse3.c
new file mode 100755
index 0000000..657921f
--- /dev/null
+++ b/encoder/x86/ih264e_intra_modes_eval_ssse3.c
@@ -0,0 +1,1259 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264e_intra_modes_eval_ssse3.c
+*
+* @brief
+* This file contains definitions of routines that perform rate distortion
+* analysis on a macroblock if they are to be coded as intra.
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* ih264e_evaluate_intra16x16_modes_ssse3
+* ih264e_evaluate_intra_4x4_modes_ssse3
+* ih264e_evaluate_intra_chroma_modes_ssse3
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+#include <immintrin.h>
+
+/* User include files */
+#include "ih264e_config.h"
+#include "ih264_typedefs.h"
+#include "ih264e_defs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264_debug.h"
+#include "ih264_defs.h"
+#include "ih264_macros.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_structs.h"
+#include "ih264_common_tables.h"
+#include "ih264_trans_quant_itrans_iquant.h"
+#include "ih264_inter_pred_filters.h"
+#include "ih264_mem_fns.h"
+#include "ih264_padding.h"
+#include "ih264_intra_pred_filters.h"
+#include "ih264_deblk_edge_filters.h"
+#include "ime_distortion_metrics.h"
+#include "ih264e_error.h"
+#include "ih264e_bitstream.h"
+#include "ime_structs.h"
+
+#include "irc_cntrl_param.h"
+#include "irc_frame_info_collector.h"
+#include "ih264e_rate_control.h"
+
+#include "ih264e_structs.h"
+#include "ih264e_intra_modes_eval.h"
+#include "ih264e_globals.h"
+#include "ime_platform_macros.h"
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+/**
+******************************************************************************
+*
+* @brief
+* evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
+* prediction.
+*
+* @par Description
+* This function evaluates first three 16x16 modes and compute corresponding
+* SAD and returns the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] pu1_ngbr_pels_i16
+* UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_n_avblty
+* availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+* pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+* pointer to the variable in which minimum sad is returned
+*
+* @param[in] u4_valid_intra_modes
+* says what all modes are valid
+*
+* @return
+* None
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra16x16_modes_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_ngbr_pels_i16,
+ UWORD8 *pu1_dst,
+ UWORD32 src_strd,
+ UWORD32 dst_strd,
+ WORD32 n_avblty,
+ UWORD32 *u4_intra_mode,
+ WORD32 *pu4_sadmin,
+ UWORD32 u4_valid_intra_modes)
+{
+ UWORD8 *pu1_src_temp;
+
+ WORD32 left, top, horz_flag, vert_flag, dc_flag;
+ WORD32 sad_vert, sad_horz, sad_dc, min_sad;
+
+ WORD32 cnt, dcval;
+ WORD32 src_strd2, src_strd3, src_strd4;
+ WORD32 dst_strd2, dst_strd3, dst_strd4;
+
+ __m128i src1_16x8b, src2_16x8b, src3_16x8b, src4_16x8b;
+ __m128i val1_16x8b, val2_16x8b, val3_16x8b, val4_16x8b;
+ __m128i sad1_8x16b, sad2_8x16b, sad3_8x16b, sad4_8x16b;
+
+ __m128i sad_8x16b, val_16x8b, zero_vector;
+
+ sad_vert = INT_MAX;
+ sad_horz = INT_MAX;
+ sad_dc = INT_MAX;
+
+ src_strd2 = src_strd << 1;
+ src_strd4 = src_strd << 2;
+ src_strd3 = src_strd + src_strd2;
+
+ dst_strd2 = dst_strd << 1;
+ dst_strd4 = dst_strd << 2;
+ dst_strd3 = dst_strd + dst_strd2;
+
+ left = (n_avblty & LEFT_MB_AVAILABLE_MASK);
+ top = (n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
+
+ zero_vector = _mm_setzero_si128();
+
+ horz_flag = left && ((u4_valid_intra_modes & 02) != 0);
+ vert_flag = top && ((u4_valid_intra_modes & 01) != 0);
+ dc_flag = (u4_valid_intra_modes & 04) != 0;
+
+ if(horz_flag)
+ {
+ pu1_src_temp = pu1_src;
+
+ val1_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[15]);
+ val2_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[14]);
+ val3_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[13]);
+ val4_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[12]);
+
+ src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp);
+ src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd));
+ src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2));
+ src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3));
+
+ sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b);
+ sad2_8x16b = _mm_sad_epu8(val2_16x8b, src2_16x8b);
+ sad3_8x16b = _mm_sad_epu8(val3_16x8b, src3_16x8b);
+ sad4_8x16b = _mm_sad_epu8(val4_16x8b, src4_16x8b);
+
+ sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b);
+ sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b);
+
+ cnt = 11;
+ sad_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b);
+ do
+ {
+ pu1_src_temp += src_strd4;
+
+ val1_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt]);
+ val2_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 1]);
+ val3_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 2]);
+ val4_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 3]);
+
+ src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp);
+ src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd));
+ src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2));
+ src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3));
+
+ sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b);
+ sad2_8x16b = _mm_sad_epu8(val2_16x8b, src2_16x8b);
+ sad3_8x16b = _mm_sad_epu8(val3_16x8b, src3_16x8b);
+ sad4_8x16b = _mm_sad_epu8(val4_16x8b, src4_16x8b);
+
+ sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b);
+ sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b);
+ sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b);
+
+ cnt -= 4;
+ sad_8x16b = _mm_add_epi16(sad_8x16b, sad1_8x16b);
+ }
+ while(cnt >= 0);
+
+ sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+ sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+ sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+
+ sad_horz = _mm_extract_epi16(sad_8x16b, 0);
+ }
+
+ if(vert_flag)
+ {
+ pu1_src_temp = pu1_src;
+
+ val1_16x8b = _mm_loadu_si128((__m128i *)(pu1_ngbr_pels_i16 + 17));
+
+ src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp);
+ src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd));
+ src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2));
+ src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3));
+
+ sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b);
+ sad2_8x16b = _mm_sad_epu8(val1_16x8b, src2_16x8b);
+ sad3_8x16b = _mm_sad_epu8(val1_16x8b, src3_16x8b);
+ sad4_8x16b = _mm_sad_epu8(val1_16x8b, src4_16x8b);
+
+ sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b);
+ sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b);
+
+ cnt = 11;
+ sad_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b);
+ do
+ {
+ pu1_src_temp += src_strd4;
+
+ src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp);
+ src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd));
+ src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2));
+ src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3));
+
+ sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b);
+ sad2_8x16b = _mm_sad_epu8(val1_16x8b, src2_16x8b);
+ sad3_8x16b = _mm_sad_epu8(val1_16x8b, src3_16x8b);
+ sad4_8x16b = _mm_sad_epu8(val1_16x8b, src4_16x8b);
+
+ sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b);
+ sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b);
+ sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b);
+
+ cnt -= 4;
+ sad_8x16b = _mm_add_epi16(sad_8x16b, sad1_8x16b);
+ }
+ while(cnt >= 0);
+
+ sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+ sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+ sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+
+ sad_vert = _mm_extract_epi16(sad_8x16b, 0);
+ }
+
+ dcval = 0;
+
+ if(left)
+ {
+ val_16x8b = _mm_loadu_si128((__m128i *)pu1_ngbr_pels_i16);
+ dcval += 8;
+
+ sad1_8x16b = _mm_sad_epu8(val_16x8b, zero_vector);
+ dcval += _mm_extract_epi16(sad1_8x16b, 0);
+ dcval += _mm_extract_epi16(sad1_8x16b, 4);
+ }
+ if(top)
+ {
+ val_16x8b = _mm_loadu_si128((__m128i *)(pu1_ngbr_pels_i16 + 17));
+ dcval += 8;
+
+ sad1_8x16b = _mm_sad_epu8(val_16x8b, zero_vector);
+ dcval += _mm_extract_epi16(sad1_8x16b, 0);
+ dcval += _mm_extract_epi16(sad1_8x16b, 4);
+ }
+ dcval = dcval >> (3 + left + top);
+ dcval += ((left == 0) & (top == 0)) << 7;
+
+ if(dc_flag)
+ {
+ pu1_src_temp = pu1_src;
+ val1_16x8b = _mm_set1_epi8(dcval);
+
+ src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp);
+ src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd));
+ src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2));
+ src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3));
+
+ sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b);
+ sad2_8x16b = _mm_sad_epu8(val1_16x8b, src2_16x8b);
+ sad3_8x16b = _mm_sad_epu8(val1_16x8b, src3_16x8b);
+ sad4_8x16b = _mm_sad_epu8(val1_16x8b, src4_16x8b);
+
+ sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b);
+ sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b);
+
+ cnt = 12;
+ sad_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b);
+ do
+ {
+ pu1_src_temp += src_strd4;
+
+ src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src_temp);
+ src2_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd));
+ src3_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd2));
+ src4_16x8b = _mm_loadu_si128((__m128i *)(pu1_src_temp + src_strd3));
+
+ sad1_8x16b = _mm_sad_epu8(val1_16x8b, src1_16x8b);
+ sad2_8x16b = _mm_sad_epu8(val1_16x8b, src2_16x8b);
+ sad3_8x16b = _mm_sad_epu8(val1_16x8b, src3_16x8b);
+ sad4_8x16b = _mm_sad_epu8(val1_16x8b, src4_16x8b);
+
+ sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad2_8x16b);
+ sad3_8x16b = _mm_packs_epi32(sad3_8x16b, sad4_8x16b);
+ sad1_8x16b = _mm_packs_epi32(sad1_8x16b, sad3_8x16b);
+
+ cnt -= 4;
+ sad_8x16b = _mm_add_epi16(sad_8x16b, sad1_8x16b);
+ }
+ while(cnt > 0);
+
+ sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+ sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+ sad_8x16b = _mm_hadd_epi16(sad_8x16b, sad_8x16b);
+
+ sad_dc = _mm_extract_epi16(sad_8x16b, 0);
+ }
+
+ // Doing prediction for minimum SAD
+ min_sad = MIN3(sad_horz, sad_vert, sad_dc);
+ if(min_sad < *pu4_sadmin)
+ {
+ *pu4_sadmin = min_sad;
+ if(min_sad == sad_vert)
+ {
+ *u4_intra_mode = VERT_I16x16;
+ val1_16x8b = _mm_loadu_si128((__m128i *)(pu1_ngbr_pels_i16 + 17));
+ cnt = 15;
+ do
+ {
+ _mm_storeu_si128((__m128i *)pu1_dst, val1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), val1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), val1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), val1_16x8b);
+
+ cnt -= 4;
+ pu1_dst += dst_strd4;
+ }
+ while(cnt > 0);
+ }
+ else if(min_sad == sad_horz)
+ {
+ *u4_intra_mode = HORZ_I16x16;
+ cnt = 15;
+ do
+ {
+ val1_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt]);
+ val2_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 1]);
+ val3_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 2]);
+ val4_16x8b = _mm_set1_epi8(pu1_ngbr_pels_i16[cnt - 3]);
+
+ _mm_storeu_si128((__m128i *)pu1_dst, val1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), val2_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), val3_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), val4_16x8b);
+
+ cnt -= 4;
+ pu1_dst += dst_strd4;
+ }
+ while(cnt >= 0);
+ }
+ else
+ {
+ *u4_intra_mode = DC_I16x16;
+ val1_16x8b = _mm_set1_epi8(dcval);
+ cnt = 15;
+ do
+ {
+ _mm_storeu_si128((__m128i *)pu1_dst, val1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), val1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), val1_16x8b);
+ _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), val1_16x8b);
+
+ cnt -= 4;
+ pu1_dst += dst_strd4;
+ }
+ while(cnt > 0);
+ }
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief :Evaluate best intra 4x4 mode and do the prediction.
+*
+* @par Description
+* This function evaluates intra 4x4 modes, computes corresponding sad
+* and returns the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+** @param[in] pu1_ngbr_pels
+* UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_n_avblty
+* availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+* Pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+* Pointer to the variable in which minimum cost is returned
+*
+* @param[in] u4_valid_intra_modes
+* Says what all modes are valid
+*
+* * @param[in] u4_lambda
+* Lamda value for computing cost from SAD
+*
+* @param[in] u4_predictd_mode
+* Predicted mode for cost computation
+*
+* @return none
+*
+******************************************************************************
+*/
+void ih264e_evaluate_intra_4x4_modes_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_ngbr_pels,
+ UWORD8 *pu1_dst,
+ UWORD32 src_strd,
+ UWORD32 dst_strd,
+ WORD32 u4_n_avblty,
+ UWORD32 *u4_intra_mode,
+ WORD32 *pu4_sadmin,
+ UWORD32 u4_valid_intra_modes,
+ UWORD32 u4_lambda,
+ UWORD32 u4_predictd_mode)
+{
+ WORD32 left, top;
+ WORD32 sad[MAX_I4x4] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
+ INT_MAX, INT_MAX, INT_MAX, INT_MAX };
+ WORD32 cost[MAX_I4x4] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
+ INT_MAX, INT_MAX, INT_MAX, INT_MAX };
+
+ WORD32 min_cost;
+ WORD32 lambda4 = u4_lambda << 2;
+ WORD32 dst_strd2, dst_strd3;
+
+ __m128i left_top_16x8b, src_16x8b, pred0_16x8b, sad_8x16b;
+ __m128i pred1_16x8b, pred2_16x8b, pred3_16x8b, pred4_16x8b;
+ __m128i pred5_16x8b, pred6_16x8b, pred7_16x8b, pred8_16x8b;
+ __m128i shuffle_16x8b, zero_vector, mask_low_32b;
+
+ left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
+ top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
+
+ dst_strd2 = dst_strd << 1;
+ dst_strd3 = dst_strd + dst_strd2;
+
+ // loading the 4x4 source block and neighbouring pixels
+ {
+ __m128i row1_16x8b, row2_16x8b;
+
+ row1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ row2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
+ left_top_16x8b = _mm_loadu_si128((__m128i *)pu1_ngbr_pels);
+
+ pu1_src += src_strd << 1;
+ src_16x8b = _mm_unpacklo_epi32(row1_16x8b, row2_16x8b);
+
+ row1_16x8b = _mm_loadl_epi64((__m128i *)pu1_src);
+ row2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_src + src_strd));
+ zero_vector = _mm_setzero_si128();
+
+ row1_16x8b = _mm_unpacklo_epi32(row1_16x8b, row2_16x8b);
+ src_16x8b = _mm_unpacklo_epi64(src_16x8b, row1_16x8b);
+ }
+
+ /* Computing SADs*/
+ if(u4_valid_intra_modes & 1)/* VERT mode valid ????*/
+ {
+ pred0_16x8b = _mm_srli_si128(left_top_16x8b, 5);
+ pred0_16x8b = _mm_shuffle_epi32(pred0_16x8b, 0);
+ sad_8x16b = _mm_sad_epu8(src_16x8b, pred0_16x8b);
+
+ sad[VERT_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ cost[VERT_I4x4] = sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ? u4_lambda: lambda4);
+ }
+
+ if(u4_valid_intra_modes & 2)/* HORZ mode valid ????*/
+ {
+ shuffle_16x8b = _mm_setr_epi8(3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0);
+ pred1_16x8b = _mm_shuffle_epi8(left_top_16x8b, shuffle_16x8b);
+
+ sad_8x16b = _mm_sad_epu8(src_16x8b, pred1_16x8b);
+
+ sad[HORZ_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ cost[HORZ_I4x4] = sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ? u4_lambda: lambda4);
+ }
+
+ if(u4_valid_intra_modes & 4)/* DC mode valid ????*/
+ {
+ if(top + left)
+ {
+ WORD32 shft = 1, dcval = 0;
+
+ __m128i val_16x8b, temp_16x8b, temp_8x16b;
+
+ val_16x8b = _mm_setzero_si128();
+
+ if(top)
+ {
+ temp_16x8b = _mm_srli_si128(left_top_16x8b, 5);
+ val_16x8b = _mm_alignr_epi8(temp_16x8b, val_16x8b, 4);
+ shft ++;
+ dcval += 2;
+ }
+ if(left)
+ {
+ val_16x8b = _mm_alignr_epi8(left_top_16x8b, val_16x8b, 4);
+ shft++;
+ dcval += 2;
+ }
+
+ temp_8x16b = _mm_sad_epu8(val_16x8b, zero_vector);
+ dcval += _mm_extract_epi16(temp_8x16b, 4);
+ dcval = dcval >> shft;
+ pred2_16x8b = _mm_set1_epi8(dcval);
+ }
+ else
+ pred2_16x8b = _mm_set1_epi8(128);
+
+ sad_8x16b = _mm_sad_epu8(src_16x8b, pred2_16x8b);
+
+ sad[DC_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ cost[DC_I4x4] = sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ? u4_lambda: lambda4);
+ }
+
+ if(u4_valid_intra_modes > 7)/* if modes other than VERT, HORZ and DC are valid ????*/
+ {
+ __m128i w11_16x8b, w121_16x8b;
+ __m128i temp1_16x8b, temp2_16x8b;
+
+ /* Performing FILT121 and FILT11 operation for all neighbour values*/
+ {
+ __m128i temp1_8x16b, temp2_8x16b, temp3_8x16b;
+ __m128i const_2_8x16b;
+
+ const_2_8x16b = _mm_set1_epi16(2);
+
+ temp1_8x16b = _mm_unpacklo_epi8(left_top_16x8b, zero_vector); //l3 l2 l1 l0 tl t0 t1 t2
+ temp2_8x16b = _mm_slli_si128(temp1_8x16b, 2); // 0 l3 l2 l1 l0 tl t0 t1
+ temp2_8x16b = _mm_shufflelo_epi16(temp2_8x16b, 0xe5); //l3 l3 l2 l1 l0 tl t0 t1
+
+ temp1_8x16b = _mm_add_epi16(temp1_8x16b, temp2_8x16b); //l3+l3 l3+l2 l2+l1... t1+t2
+ temp2_8x16b = _mm_slli_si128(temp1_8x16b, 2); //l3+l3 l3+l3 l3+l2... t0+t1
+ temp2_8x16b = _mm_shufflelo_epi16(temp2_8x16b, 0xe5);
+ temp1_8x16b = _mm_add_epi16(temp1_8x16b, temp2_8x16b); //4*l3 l3+2*l3+l2 l3+2*l2+l1... t0+2*t1+t2
+
+ temp1_8x16b = _mm_add_epi16(const_2_8x16b, temp1_8x16b); //4*l3+2 3*l3+l2+2 l3+2*l2+l1+2.. t0+2*t1+t2+2
+ temp1_8x16b = _mm_srli_epi16(temp1_8x16b, 2);
+
+ temp1_16x8b = _mm_srli_si128(left_top_16x8b, 1);
+ w11_16x8b = _mm_avg_epu8(left_top_16x8b, temp1_16x8b);
+
+ temp2_16x8b = _mm_srli_si128(left_top_16x8b, 6);
+ temp2_8x16b = _mm_unpacklo_epi8(temp2_16x8b, zero_vector); //t1 t2 t3 t4 t5 t6 t7 0
+ temp3_8x16b = _mm_srli_si128(temp2_8x16b, 2); //t2 t3 t4 t5 t6 t7 0 0
+ temp3_8x16b = _mm_shufflehi_epi16(temp3_8x16b, 0xd4); //t2 t3 t4 t5 t6 t7 t7 0
+
+ temp2_8x16b = _mm_add_epi16(temp2_8x16b, temp3_8x16b); //t1+t2 t2+t3... t6+t7 t7+t7 0
+ temp3_8x16b = _mm_srli_si128(temp2_8x16b, 2); //t2+t3 t3+t4... t7+t7 0 0
+ temp2_8x16b = _mm_add_epi16(temp2_8x16b, temp3_8x16b); //t1+2*t2+t3 t2+2*t3+t4.. t6+2*t7+t7 t7+t7 0
+
+ temp2_8x16b = _mm_add_epi16(const_2_8x16b, temp2_8x16b); //t1+2*t2+t3+2 t2+2*t3+t4+2 t3+2*t4+t5+2... t6+2*t7+t7+2 t7+t7+2 2
+ temp2_8x16b = _mm_srli_epi16(temp2_8x16b, 2);
+
+ w121_16x8b = _mm_packus_epi16(temp1_8x16b, temp2_8x16b);
+ }
+
+ if(u4_valid_intra_modes & 8)/* DIAG_DL */
+ {
+ shuffle_16x8b = _mm_setr_epi8( 7, 8, 9, 10,
+ 8, 9, 10, 11,
+ 9, 10, 11, 12,
+ 10, 11, 12, 13);
+ pred3_16x8b = _mm_shuffle_epi8(w121_16x8b, shuffle_16x8b);
+ sad_8x16b = _mm_sad_epu8(src_16x8b, pred3_16x8b);
+
+ sad[DIAG_DL_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ cost[DIAG_DL_I4x4] = sad[DIAG_DL_I4x4] + ((u4_predictd_mode == DIAG_DL_I4x4) ? u4_lambda: lambda4);
+ }
+
+ if(u4_valid_intra_modes & 16)/* DIAG_DR */
+ {
+ shuffle_16x8b = _mm_setr_epi8(5, 6, 7, 8,
+ 4, 5, 6, 7,
+ 3, 4, 5, 6,
+ 2, 3, 4, 5);
+ pred4_16x8b = _mm_shuffle_epi8(w121_16x8b, shuffle_16x8b);
+ sad_8x16b = _mm_sad_epu8(src_16x8b, pred4_16x8b);
+
+ sad[DIAG_DR_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ cost[DIAG_DR_I4x4] = sad[DIAG_DR_I4x4] + ((u4_predictd_mode == DIAG_DR_I4x4) ? u4_lambda: lambda4);
+ }
+
+ if(u4_valid_intra_modes & 32)/* VERT_R mode valid ????*/
+ {
+ temp1_16x8b = _mm_srli_si128(w121_16x8b, 1);
+ temp1_16x8b = _mm_unpacklo_epi64(temp1_16x8b, w11_16x8b);
+ shuffle_16x8b = _mm_setr_epi8(12, 13, 14, 15,
+ 4, 5, 6, 7,
+ 3, 12, 13, 14,
+ 2, 4, 5, 6);
+ pred5_16x8b = _mm_shuffle_epi8(temp1_16x8b, shuffle_16x8b);
+ sad_8x16b = _mm_sad_epu8(src_16x8b, pred5_16x8b);
+
+ sad[VERT_R_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ cost[VERT_R_I4x4] = sad[VERT_R_I4x4] + ((u4_predictd_mode == VERT_R_I4x4) ? u4_lambda: lambda4);
+ }
+
+ if(u4_valid_intra_modes & 64)/* HORZ_D mode valid ????*/
+ {
+ temp1_16x8b = _mm_unpacklo_epi64(w121_16x8b, w11_16x8b);
+ shuffle_16x8b = _mm_setr_epi8(11, 5, 6, 7,
+ 10, 4, 11, 5,
+ 9, 3, 10, 4,
+ 8, 2, 9, 3);
+ pred6_16x8b = _mm_shuffle_epi8(temp1_16x8b, shuffle_16x8b);
+ sad_8x16b = _mm_sad_epu8(src_16x8b, pred6_16x8b);
+
+ sad[HORZ_D_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ cost[HORZ_D_I4x4] = sad[HORZ_D_I4x4] + ((u4_predictd_mode == HORZ_D_I4x4) ? u4_lambda: lambda4);
+ }
+
+ if(u4_valid_intra_modes & 128)/* VERT_L mode valid ????*/
+ {
+ temp1_16x8b = _mm_srli_si128(w121_16x8b, 5);
+ temp2_16x8b = _mm_srli_si128(w11_16x8b, 5);
+ temp1_16x8b = _mm_unpacklo_epi64(temp1_16x8b, temp2_16x8b);
+ shuffle_16x8b = _mm_setr_epi8(8, 9, 10, 11,
+ 2, 3, 4, 5,
+ 9, 10, 11, 12,
+ 3, 4, 5, 6);
+ pred7_16x8b = _mm_shuffle_epi8(temp1_16x8b, shuffle_16x8b);
+ sad_8x16b = _mm_sad_epu8(src_16x8b, pred7_16x8b);
+
+ sad[VERT_L_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ cost[VERT_L_I4x4] = sad[VERT_L_I4x4] + ((u4_predictd_mode == VERT_L_I4x4) ? u4_lambda: lambda4);
+ }
+
+ if(u4_valid_intra_modes & 256)/* HORZ_U mode valid ????*/
+ {
+ temp1_16x8b = _mm_unpacklo_epi64(w121_16x8b, w11_16x8b);
+ shuffle_16x8b = _mm_setr_epi8(10, 3, 9, 2,
+ 9, 2, 8, 1,
+ 8, 1, 0, 0,
+ 0, 0, 0, 0);
+ pred8_16x8b = _mm_shuffle_epi8(temp1_16x8b, shuffle_16x8b);
+ sad_8x16b = _mm_sad_epu8(src_16x8b, pred8_16x8b);
+
+ sad[HORZ_U_I4x4] = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ cost[HORZ_U_I4x4] = sad[HORZ_U_I4x4] + ((u4_predictd_mode == HORZ_U_I4x4) ? u4_lambda: lambda4);
+ }
+
+ min_cost = MIN3(MIN3(cost[0], cost[1], cost[2]),
+ MIN3(cost[3], cost[4], cost[5]),
+ MIN3(cost[6], cost[7], cost[8]));
+ }
+ else
+ { /*Only first three modes valid*/
+ min_cost = MIN3(cost[0], cost[1], cost[2]);
+ }
+
+ *pu4_sadmin = min_cost;
+
+ if(min_cost == cost[0])
+ {
+ *u4_intra_mode = VERT_I4x4;
+ }
+ else if(min_cost == cost[1])
+ {
+ *u4_intra_mode = HORZ_I4x4;
+ pred0_16x8b = pred1_16x8b;
+ }
+ else if(min_cost == cost[2])
+ {
+ *u4_intra_mode = DC_I4x4;
+ pred0_16x8b = pred2_16x8b;
+ }
+ else if(min_cost == cost[3])
+ {
+ *u4_intra_mode = DIAG_DL_I4x4;
+ pred0_16x8b = pred3_16x8b;
+ }
+ else if(min_cost == cost[4])
+ {
+ *u4_intra_mode = DIAG_DR_I4x4;
+ pred0_16x8b = pred4_16x8b;
+ }
+ else if(min_cost == cost[5])
+ {
+ *u4_intra_mode = VERT_R_I4x4;
+ pred0_16x8b = pred5_16x8b;
+ }
+ else if(min_cost == cost[6])
+ {
+ *u4_intra_mode = HORZ_D_I4x4;
+ pred0_16x8b = pred6_16x8b;
+ }
+ else if(min_cost == cost[7])
+ {
+ *u4_intra_mode = VERT_L_I4x4;
+ pred0_16x8b = pred7_16x8b;
+ }
+ else if(min_cost == cost[8])
+ {
+ *u4_intra_mode = HORZ_U_I4x4;
+ pred0_16x8b = pred8_16x8b;
+ }
+
+ mask_low_32b = _mm_set1_epi8(0xff);
+ mask_low_32b = _mm_srli_si128(mask_low_32b, 12);
+
+ _mm_maskmoveu_si128(pred0_16x8b, mask_low_32b, (char*)pu1_dst);
+ pred0_16x8b = _mm_srli_si128(pred0_16x8b, 4);
+ _mm_maskmoveu_si128(pred0_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd));
+ pred0_16x8b = _mm_srli_si128(pred0_16x8b, 4);
+ _mm_maskmoveu_si128(pred0_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd2));
+ pred0_16x8b = _mm_srli_si128(pred0_16x8b, 4);
+ _mm_maskmoveu_si128(pred0_16x8b, mask_low_32b, (char*)(pu1_dst + dst_strd3));
+
+}
+
+/**
+******************************************************************************
+*
+* @brief
+* Evaluate best intra chroma mode (among VERT, HORZ and DC) and do the prediction.
+*
+* @par Description
+* This function evaluates first three intra chroma modes and compute corresponding sad
+* and return the buffer predicted with best mode.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+** @param[in] pu1_ngbr_pels
+* UWORD8 pointer to neighbouring pels
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_n_avblty
+* availability of neighbouring pixels
+*
+* @param[in] u4_intra_mode
+* pointer to the variable in which best mode is returned
+*
+* @param[in] pu4_sadmin
+* pointer to the variable in which minimum sad is returned
+*
+* @param[in] u4_valid_intra_modes
+* says what all modes are valid
+*
+* @return
+* none
+*
+******************************************************************************
+*/
+
+void ih264e_evaluate_intra_chroma_modes_ssse3(UWORD8 *pu1_src,
+ UWORD8 *pu1_ngbr_pels,
+ UWORD8 *pu1_dst,
+ UWORD32 src_strd,
+ UWORD32 dst_strd,
+ WORD32 u4_n_avblty,
+ UWORD32 *u4_intra_mode,
+ WORD32 *pu4_sadmin,
+ UWORD32 u4_valid_intra_modes)
+{
+ WORD32 left, top;
+ WORD32 sad_vert = INT_MAX, sad_horz = INT_MAX, sad_dc = INT_MAX, min_sad;
+
+ __m128i src1_16x8b, src2_16x8b, src3_16x8b, src4_16x8b;
+ __m128i src5_16x8b, src6_16x8b, src7_16x8b, src8_16x8b;
+
+ __m128i top_16x8b, left_16x8b;
+ __m128i pred1_16x8b, pred2_16x8b;
+ __m128i tmp1_8x16b, tmp2_8x16b, sad_8x16b;
+
+ left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
+ top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
+
+ //Loading source
+ {
+ src1_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src2_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src3_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src4_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src5_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src6_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src7_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_src += src_strd;
+ src8_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ }
+
+ if(left)
+ {
+ left_16x8b = _mm_loadu_si128((__m128i *)pu1_ngbr_pels);
+
+ if(u4_valid_intra_modes & 02) //If HORZ mode is valid
+ {
+ __m128i left_tmp_16x8b, left_sh_16x8b;
+ __m128i const_14_15_16x8b;
+
+ const_14_15_16x8b = _mm_set1_epi16(0x0f0e);
+ left_sh_16x8b = _mm_slli_si128(left_16x8b, 2);
+
+ pred1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b); //row 1
+ pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 2
+ tmp1_8x16b = _mm_sad_epu8(src1_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src2_16x8b, pred2_16x8b);
+
+ left_tmp_16x8b = _mm_slli_si128(left_16x8b, 4);
+ left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+ sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b);
+
+ pred1_16x8b = _mm_shuffle_epi8(left_tmp_16x8b, const_14_15_16x8b); //row 3
+ pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 4
+ tmp1_8x16b = _mm_sad_epu8(src3_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src4_16x8b, pred2_16x8b);
+
+ left_tmp_16x8b = _mm_slli_si128(left_tmp_16x8b, 4);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ pred1_16x8b = _mm_shuffle_epi8(left_tmp_16x8b, const_14_15_16x8b); //row 5
+ pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 6
+ tmp1_8x16b = _mm_sad_epu8(src5_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src6_16x8b, pred2_16x8b);
+
+ left_tmp_16x8b = _mm_slli_si128(left_tmp_16x8b, 4);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ pred1_16x8b = _mm_shuffle_epi8(left_tmp_16x8b, const_14_15_16x8b); //row 7
+ pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 8
+ tmp1_8x16b = _mm_sad_epu8(src7_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src8_16x8b, pred2_16x8b);
+
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ sad_horz = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ }
+ }
+
+ if(top)
+ {
+ UWORD8 *pu1_top;
+
+ pu1_top = pu1_ngbr_pels + 2 * BLK8x8SIZE + 2;
+ top_16x8b = _mm_loadu_si128((__m128i *)pu1_top);
+
+ if(u4_valid_intra_modes & 04) //If VERT mode is valid
+ {
+ tmp1_8x16b = _mm_sad_epu8(src1_16x8b, top_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src2_16x8b, top_16x8b);
+ sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src3_16x8b, top_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src4_16x8b, top_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src5_16x8b, top_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src6_16x8b, top_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src7_16x8b, top_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src8_16x8b, top_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ sad_vert = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ }
+ }
+
+ if(u4_valid_intra_modes & 01) //If DC mode is valid
+ {
+ if(left && top)
+ {
+ WORD32 left_up_u, left_down_u, left_up_v, left_down_v;
+ WORD32 top_left_u, top_right_u, top_left_v, top_right_v;
+ WORD32 dc_1u, dc_1v, dc_2u, dc_2v;
+
+ __m128i val_sh_16x8b;
+ __m128i intrlv_mask_8x16b, zero_vector;
+
+ intrlv_mask_8x16b = _mm_set1_epi16(0x00ff);
+ zero_vector = _mm_setzero_si128();
+
+ val_sh_16x8b = _mm_srli_si128(left_16x8b, 1);
+
+ tmp1_8x16b = _mm_and_si128(intrlv_mask_8x16b, left_16x8b);
+ tmp2_8x16b = _mm_and_si128(intrlv_mask_8x16b, val_sh_16x8b);
+ tmp1_8x16b = _mm_sad_epu8(zero_vector, tmp1_8x16b);
+ tmp2_8x16b = _mm_sad_epu8(zero_vector, tmp2_8x16b);
+
+ left_up_u = _mm_extract_epi16(tmp1_8x16b, 4);
+ left_up_v = _mm_extract_epi16(tmp2_8x16b, 4);
+ left_down_u = _mm_extract_epi16(tmp1_8x16b, 0);
+ left_down_v = _mm_extract_epi16(tmp2_8x16b, 0);
+
+ val_sh_16x8b = _mm_srli_si128(top_16x8b, 1);
+
+ tmp1_8x16b = _mm_and_si128(intrlv_mask_8x16b, top_16x8b);
+ tmp2_8x16b = _mm_and_si128(intrlv_mask_8x16b, val_sh_16x8b);
+ tmp1_8x16b = _mm_sad_epu8(zero_vector, tmp1_8x16b);
+ tmp2_8x16b = _mm_sad_epu8(zero_vector, tmp2_8x16b);
+
+ top_left_u = _mm_extract_epi16(tmp1_8x16b, 0);
+ top_left_v = _mm_extract_epi16(tmp2_8x16b, 0);
+ top_right_u = _mm_extract_epi16(tmp1_8x16b, 4);
+ top_right_v = _mm_extract_epi16(tmp2_8x16b, 4);
+
+ // First four rows
+ dc_1u = (left_up_u + top_left_u + 4) >> 3;
+ dc_1v = (left_up_v + top_left_v + 4) >> 3;
+ dc_2u = (top_right_u + 2) >> 2;
+ dc_2v = (top_right_v + 2) >> 2;
+
+ pred1_16x8b = _mm_setr_epi8(dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v,
+ dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v);
+
+ tmp1_8x16b = _mm_sad_epu8(src1_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src2_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src3_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src4_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ // Second four rows
+ dc_1u = (left_down_u + 2) >> 2;
+ dc_1v = (left_down_v + 2) >> 2;
+ dc_2u = (left_down_u + top_right_u + 4) >> 3;
+ dc_2v = (left_down_v + top_right_v + 4) >> 3;
+
+ pred2_16x8b = _mm_setr_epi8(dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v,
+ dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v);
+
+ tmp1_8x16b = _mm_sad_epu8(src5_16x8b, pred2_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src6_16x8b, pred2_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src7_16x8b, pred2_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src8_16x8b, pred2_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ sad_dc = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ }
+ else if(left)
+ {
+ WORD32 left_up_u, left_down_u, left_up_v, left_down_v;
+ WORD32 dc_u, dc_v;
+
+ __m128i left_sh_16x8b;
+ __m128i intrlv_mask_8x16b, zero_vector;
+
+ intrlv_mask_8x16b = _mm_set1_epi16(0x00ff);
+ zero_vector = _mm_setzero_si128();
+
+ left_sh_16x8b = _mm_srli_si128(left_16x8b, 1);
+
+ tmp1_8x16b = _mm_and_si128(intrlv_mask_8x16b, left_16x8b);
+ tmp2_8x16b = _mm_and_si128(intrlv_mask_8x16b, left_sh_16x8b);
+ tmp1_8x16b = _mm_sad_epu8(zero_vector, tmp1_8x16b);
+ tmp2_8x16b = _mm_sad_epu8(zero_vector, tmp2_8x16b);
+
+ left_up_u = _mm_extract_epi16(tmp1_8x16b, 4);
+ left_up_v = _mm_extract_epi16(tmp2_8x16b, 4);
+ left_down_u = _mm_extract_epi16(tmp1_8x16b, 0);
+ left_down_v = _mm_extract_epi16(tmp2_8x16b, 0);
+
+ // First four rows
+ dc_u = (left_up_u + 2) >> 2;
+ dc_v = (left_up_v + 2) >> 2;
+
+ pred1_16x8b = _mm_set1_epi16(dc_u | (dc_v << 8));
+
+ tmp1_8x16b = _mm_sad_epu8(src1_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src2_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src3_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src4_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ // Second four rows
+ dc_u = (left_down_u + 2) >> 2;
+ dc_v = (left_down_v + 2) >> 2;
+
+ pred2_16x8b = _mm_set1_epi16(dc_u | (dc_v << 8));
+
+ tmp1_8x16b = _mm_sad_epu8(src5_16x8b, pred2_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src6_16x8b, pred2_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src7_16x8b, pred2_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src8_16x8b, pred2_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ sad_dc = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ }
+ else if(top)
+ {
+ WORD32 top_left_u, top_right_u, top_left_v, top_right_v;
+ WORD32 dc_1u, dc_1v, dc_2u, dc_2v;
+
+ __m128i top_sh_16x8b;
+ __m128i intrlv_mask_8x16b, zero_vector;
+
+ intrlv_mask_8x16b = _mm_set1_epi16(0x00ff);
+ zero_vector = _mm_setzero_si128();
+
+ top_sh_16x8b = _mm_srli_si128(top_16x8b, 1);
+
+ tmp1_8x16b = _mm_and_si128(intrlv_mask_8x16b, top_16x8b);
+ tmp2_8x16b = _mm_and_si128(intrlv_mask_8x16b, top_sh_16x8b);
+ tmp1_8x16b = _mm_sad_epu8(zero_vector, tmp1_8x16b);
+ tmp2_8x16b = _mm_sad_epu8(zero_vector, tmp2_8x16b);
+
+ top_left_u = _mm_extract_epi16(tmp1_8x16b, 0);
+ top_left_v = _mm_extract_epi16(tmp2_8x16b, 0);
+ top_right_u = _mm_extract_epi16(tmp1_8x16b, 4);
+ top_right_v = _mm_extract_epi16(tmp2_8x16b, 4);
+
+ dc_1u = (top_left_u + 2) >> 2;
+ dc_1v = (top_left_v + 2) >> 2;
+ dc_2u = (top_right_u + 2) >> 2;
+ dc_2v = (top_right_v + 2) >> 2;
+
+ pred1_16x8b = _mm_setr_epi8(dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v, dc_1u, dc_1v,
+ dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v, dc_2u, dc_2v);
+
+ tmp1_8x16b = _mm_sad_epu8(src1_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src2_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src3_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src4_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src5_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src6_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src7_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src8_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ sad_dc = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ }
+ else
+ {
+ pred1_16x8b = _mm_set1_epi8(128);
+
+ tmp1_8x16b = _mm_sad_epu8(src1_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src2_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(tmp1_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src3_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src4_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src5_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src6_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ tmp1_8x16b = _mm_sad_epu8(src7_16x8b, pred1_16x8b);
+ tmp2_8x16b = _mm_sad_epu8(src8_16x8b, pred1_16x8b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp1_8x16b);
+ sad_8x16b = _mm_add_epi16(sad_8x16b, tmp2_8x16b);
+
+ sad_dc = _mm_extract_epi16(sad_8x16b, 0) + _mm_extract_epi16(sad_8x16b, 4);
+ }
+ }
+
+ min_sad = MIN3(sad_horz, sad_vert, sad_dc);
+
+ /* Finding minimum SAD and doing corresponding prediction*/
+ if(min_sad < *pu4_sadmin)
+ {
+ *pu4_sadmin = min_sad;
+
+ if(min_sad == sad_dc)
+ {
+ *u4_intra_mode = DC_CH_I8x8;
+
+ if(!left)
+ pred2_16x8b = pred1_16x8b;
+
+ _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+ pu1_dst += dst_strd;
+
+ _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+ }
+ else if(min_sad == sad_horz)
+ {
+ __m128i left_sh_16x8b, const_14_15_16x8b;
+
+ *u4_intra_mode = HORZ_CH_I8x8;
+
+ const_14_15_16x8b = _mm_set1_epi16(0x0f0e);
+
+ left_sh_16x8b = _mm_slli_si128(left_16x8b, 2);
+ pred1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b); //row 1
+ pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 2
+
+ _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+
+ left_16x8b = _mm_slli_si128(left_16x8b, 4);
+ left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+ pred1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b); //row 3
+ pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 4
+
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+
+ left_16x8b = _mm_slli_si128(left_16x8b, 4);
+ left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+ pred1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b); //row 5
+ pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 6
+
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+
+ left_16x8b = _mm_slli_si128(left_16x8b, 4);
+ left_sh_16x8b = _mm_slli_si128(left_sh_16x8b, 4);
+ pred1_16x8b = _mm_shuffle_epi8(left_16x8b, const_14_15_16x8b); //row 7
+ pred2_16x8b = _mm_shuffle_epi8(left_sh_16x8b, const_14_15_16x8b); //row 8
+
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred1_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, pred2_16x8b);
+ }
+ else
+ {
+ *u4_intra_mode = VERT_CH_I8x8;
+
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ pu1_dst += dst_strd;
+ _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
+ }
+ }
+}
diff --git a/encoder/x86/ih264e_platform_macros.h b/encoder/x86/ih264e_platform_macros.h
new file mode 100755
index 0000000..b4dfadd
--- /dev/null
+++ b/encoder/x86/ih264e_platform_macros.h
@@ -0,0 +1,154 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ih264e_platform_macros.h
+ *
+ * @brief
+ * Contains platform specific routines used for codec context intialization
+ *
+ * @author
+ * ittiam
+ *
+ * @remarks
+ * none
+ *
+ *******************************************************************************
+ */
+
+
+#ifndef IH264E_PLATFORM_MACROS_H_
+#define IH264E_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_generic(codec_t *ps_codec);
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr_ssse3(codec_t *ps_codec);
+void ih264e_init_function_ptr_sse42(codec_t *ps_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the intra/inter/transform/deblk function pointers of
+* codec context
+*
+* @par Description: the current routine initializes the function pointers of
+* codec context basing on the architecture in use
+*
+* @param[in] ps_codec
+* Codec context pointer
+*
+* @returns none
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ih264e_init_function_ptr(void *pv_codec);
+
+/**
+*******************************************************************************
+*
+* @brief Determine the architecture of the encoder executing environment
+*
+* @par Description: This routine returns the architecture of the enviro-
+* ment in which the current encoder is being tested
+*
+* @param[in] void
+*
+* @returns IV_ARCH_T
+* architecture
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+IV_ARCH_T ih264e_default_arch(void);
+
+/**
+*******************************************************************************
+*
+* @brief Data Memory Barrier, Data Synchronization Barrier
+*
+*
+* @par Description: These functions do nothing on x86 side. But on arm platforms,
+*
+* Data Memory Barrier acts as a memory barrier. It ensures that all explicit
+* memory accesses that appear in program order before the DMB instruction are
+* observed before any explicit memory accesses that appear in program order
+* after the DMB instruction. It does not affect the ordering of any other
+* instructions executing on the processor
+*
+* Data Synchronization Barrier acts as a special kind of memory barrier. No
+* instruction in program order after this instruction executes until this instruction
+* completes. This instruction completes when:
+* 1. All explicit memory accesses before this instruction complete.
+* 2. All Cache, Branch predictor and TLB maintenance operations before
+* this instruction complete.
+*
+* @param[in] void
+*
+* @returns void
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+
+#endif /* IH264E_PLATFORM_MACROS_H_ */
diff --git a/encoder/x86/ime_distortion_metrics_sse42.c b/encoder/x86/ime_distortion_metrics_sse42.c
new file mode 100755
index 0000000..0876788
--- /dev/null
+++ b/encoder/x86/ime_distortion_metrics_sse42.c
@@ -0,0 +1,1940 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ime_distortion_metrics_sse42.c
+*
+* @brief
+* This file contains definitions of routines that compute distortion
+* between two macro/sub blocks of identical dimensions
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ime_compute_sad_16x16_sse42()
+* - ime_compute_sad_16x16_fast_sse42()
+* - ime_compute_sad_16x16_ea8_sse42()
+* - ime_compute_sad_16x8_sse42()
+* - ime_calculate_sad4_prog_sse42()
+* - ime_sub_pel_compute_sad_16x16_sse42()
+* - ime_compute_satqd_16x16_lumainter_sse42()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User include files */
+#include "ime_typedefs.h"
+#include "ime_defs.h"
+#include "ime_macros.h"
+#include "ime_statistics.h"
+#include "ime_platform_macros.h"
+#include "ime_distortion_metrics.h"
+#include <immintrin.h>
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x16 blocks
+*
+* @par Description
+* This functions computes SAD between 2 16x16 blocks. There is a provision
+* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] i4_max_sad
+* integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x16_sse42(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion)
+{
+ __m128i src_r0, src_r1, src_r2, src_r3;
+ __m128i est_r0, est_r1, est_r2, est_r3;
+ __m128i res_r0, res_r1, res_r2, res_r3;
+ __m128i sad_val;
+ int val1, val2;
+
+ // Row 0-3 sad calculation
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(res_r0, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ // Row 4-7 sad calculation
+ pu1_src += 4*src_strd;
+ pu1_est += 4*est_strd;
+
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(sad_val, res_r0);
+ sad_val = _mm_add_epi64(sad_val, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ // Row 8-11 sad calculation
+ pu1_src += 4*src_strd;
+ pu1_est += 4*est_strd;
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(sad_val, res_r0);
+ sad_val = _mm_add_epi64(sad_val, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ // Row 12-15 sad calculation
+ pu1_src += 4*src_strd;
+ pu1_est += 4*est_strd;
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(sad_val, res_r0);
+ sad_val = _mm_add_epi64(sad_val, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ val1 = _mm_extract_epi32(sad_val,0);
+ val2 = _mm_extract_epi32(sad_val, 2);
+ *pi4_mb_distortion = (val1+val2);
+
+ return;
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x8 blocks
+*
+*
+* @par Description
+* This functions computes SAD between 2 16x8 blocks. There is a provision
+* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] u4_max_sad
+* integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x8_sse42(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion)
+{
+ __m128i src_r0, src_r1, src_r2, src_r3;
+ __m128i est_r0, est_r1, est_r2, est_r3;
+ __m128i res_r0, res_r1, res_r2, res_r3;
+ __m128i sad_val;
+ int val1, val2;
+
+ // Row 0-3 sad calculation
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(res_r0, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ // Row 4-7 sad calculation
+ pu1_src += 4*src_strd;
+ pu1_est += 4*est_strd;
+
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 3*src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 3*est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(sad_val, res_r0);
+ sad_val = _mm_add_epi64(sad_val, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ val1 = _mm_extract_epi32(sad_val,0);
+ val2 = _mm_extract_epi32(sad_val, 2);
+ *pi4_mb_distortion = (val1+val2);
+ return;
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x16 blocks
+*
+* @par Description
+* This functions computes SAD between 2 16x16 blocks. There is a provision
+* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
+* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] i4_max_sad
+* integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x16_ea8_sse42(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion)
+{
+ __m128i src_r0, src_r1, src_r2, src_r3;
+ __m128i est_r0, est_r1, est_r2, est_r3;
+ __m128i res_r0, res_r1, res_r2, res_r3;
+ __m128i sad_val;
+ WORD32 val1, val2;
+ WORD32 i4_sad;
+ UWORD8 *pu1_src_temp = pu1_src + src_strd;
+ UWORD8 *pu1_est_temp = pu1_est + est_strd;
+
+ // Row 0,2,4,6 sad calculation
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4*src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6*src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4*est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6*est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(res_r0, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ // Row 8,10,12,14 sad calculation
+ pu1_src += 8*src_strd;
+ pu1_est += 8*est_strd;
+
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4*src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6*src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4*est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6*est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(sad_val, res_r0);
+ sad_val = _mm_add_epi64(sad_val, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ pu1_src = pu1_src_temp;
+ pu1_est = pu1_est_temp;
+
+ val1 = _mm_extract_epi32(sad_val, 0);
+ val2 = _mm_extract_epi32(sad_val, 2);
+
+ i4_sad = val1 + val2;
+ if (i4_max_sad < i4_sad)
+ {
+ *pi4_mb_distortion = i4_sad;
+ return ;
+ }
+ // Row 1,3,5,7 sad calculation
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4*src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6*src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4*est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6*est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(sad_val, res_r0);
+ sad_val = _mm_add_epi64(sad_val, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ // Row 9,11,13,15 sad calculation
+ pu1_src += 8*src_strd;
+ pu1_est += 8*est_strd;
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2*src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4*src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6*src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2*est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4*est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6*est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(sad_val, res_r0);
+ sad_val = _mm_add_epi64(sad_val, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ val1 = _mm_extract_epi32(sad_val, 0);
+ val2 = _mm_extract_epi32(sad_val, 2);
+ *pi4_mb_distortion = (val1+val2);
+
+ return;
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) between 2 16x16 blocks (fast mode)
+*
+* @par Description
+* This functions computes SAD between 2 16x16 blocks by processing alternate
+* rows (fast mode). For fast mode it is assumed sad obtained by processing
+* alternate rows is approximately twice as that for the whole block.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] i4_max_sad
+* integer maximum allowed distortion
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_sad_16x16_fast_sse42(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ WORD32 i4_max_sad,
+ WORD32 *pi4_mb_distortion)
+{
+ __m128i src_r0, src_r1, src_r2, src_r3;
+ __m128i est_r0, est_r1, est_r2, est_r3;
+ __m128i res_r0, res_r1, res_r2, res_r3;
+ __m128i sad_val;
+ WORD32 val1, val2;
+ WORD32 i4_sad;
+ UWORD8 *pu1_src_temp = pu1_src + src_strd;
+ UWORD8 *pu1_est_temp = pu1_est + est_strd;
+
+ // Row 0,2,4,6 sad calculation
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2 * src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4 * src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6 * src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2 * est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4 * est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6 * est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(res_r0, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ // Row 8,10,12,14 sad calculation
+ pu1_src += 8 * src_strd;
+ pu1_est += 8 * est_strd;
+
+ src_r0 = _mm_loadu_si128((__m128i *) (pu1_src));
+ src_r1 = _mm_loadu_si128((__m128i *) (pu1_src + 2 * src_strd));
+ src_r2 = _mm_loadu_si128((__m128i *) (pu1_src + 4 * src_strd));
+ src_r3 = _mm_loadu_si128((__m128i *) (pu1_src + 6 * src_strd));
+
+ est_r0 = _mm_loadu_si128((__m128i *) (pu1_est));
+ est_r1 = _mm_loadu_si128((__m128i *) (pu1_est + 2 * est_strd));
+ est_r2 = _mm_loadu_si128((__m128i *) (pu1_est + 4 * est_strd));
+ est_r3 = _mm_loadu_si128((__m128i *) (pu1_est + 6 * est_strd));
+
+ res_r0 = _mm_sad_epu8(src_r0, est_r0);
+ res_r1 = _mm_sad_epu8(src_r1, est_r1);
+ res_r2 = _mm_sad_epu8(src_r2, est_r2);
+ res_r3 = _mm_sad_epu8(src_r3, est_r3);
+
+ sad_val = _mm_add_epi64(sad_val, res_r0);
+ sad_val = _mm_add_epi64(sad_val, res_r1);
+ sad_val = _mm_add_epi64(sad_val, res_r2);
+ sad_val = _mm_add_epi64(sad_val, res_r3);
+
+ pu1_src = pu1_src_temp;
+ pu1_est = pu1_est_temp;
+
+ val1 = _mm_extract_epi32(sad_val, 0);
+ val2 = _mm_extract_epi32(sad_val, 2);
+
+ i4_sad = val1 + val2;
+ *pi4_mb_distortion = (i4_sad<<1);
+ return;
+}
+
+/**
+*******************************************************************************
+*
+* @brief compute sad
+*
+* @par Description: This function computes the sad at vertices of diamond grid
+* centered at reference pointer and at unit distance from it.
+*
+* @param[in] pu1_ref
+* UWORD8 pointer to the reference
+*
+* @param[out] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] ref_strd
+* integer reference stride
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[out] pi4_sad
+* pointer to integer array evaluated sad
+*
+* @returns sad at all evaluated vertexes
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+void ime_calculate_sad4_prog_sse42(UWORD8 *pu1_ref,
+ UWORD8 *pu1_src,
+ WORD32 ref_strd,
+ WORD32 src_strd,
+ WORD32 *pi4_sad)
+{
+ /* reference ptrs at unit 1 distance in diamond pattern centered at pu1_ref */
+ UWORD8 *left_ptr = pu1_ref - 1;
+ UWORD8 *right_ptr = pu1_ref + 1;
+ UWORD8 *top_ptr = pu1_ref - ref_strd;
+ UWORD8 *bot_ptr = pu1_ref + ref_strd;
+
+ WORD32 val1, val2;
+ __m128i src, ref_left, ref_right, ref_top, ref_bot;
+ __m128i res_r0, res_r1, res_r2, res_r3;
+ __m128i sad_r0, sad_r1, sad_r2, sad_r3;
+
+ // Row 0 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ sad_r0 = _mm_sad_epu8(src, ref_left);
+ sad_r1 = _mm_sad_epu8(src, ref_right);
+ sad_r2 = _mm_sad_epu8(src, ref_top);
+ sad_r3 = _mm_sad_epu8(src, ref_bot);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 1 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 2 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 3 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 4 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 5 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 6 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 7 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 8 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 9 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 10 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 11 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 12 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 13 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 14 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ pu1_src += src_strd;
+ left_ptr += ref_strd;
+ right_ptr += ref_strd;
+ top_ptr += ref_strd;
+ bot_ptr += ref_strd;
+
+ // Row 15 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_left = _mm_loadu_si128((__m128i *) (left_ptr));
+ ref_right = _mm_loadu_si128((__m128i *) (right_ptr));
+ ref_top = _mm_loadu_si128((__m128i *) (top_ptr));
+ ref_bot = _mm_loadu_si128((__m128i *) (bot_ptr));
+
+ res_r0 = _mm_sad_epu8(src, ref_left);
+ res_r1 = _mm_sad_epu8(src, ref_right);
+ res_r2 = _mm_sad_epu8(src, ref_top);
+ res_r3 = _mm_sad_epu8(src, ref_bot);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+
+ val1 = _mm_extract_epi32(sad_r0, 0);
+ val2 = _mm_extract_epi32(sad_r0, 2);
+ pi4_sad[0] = (val1 + val2);
+
+ val1 = _mm_extract_epi32(sad_r1, 0);
+ val2 = _mm_extract_epi32(sad_r1, 2);
+ pi4_sad[1] = (val1 + val2);
+
+ val1 = _mm_extract_epi32(sad_r2, 0);
+ val2 = _mm_extract_epi32(sad_r2, 2);
+ pi4_sad[2] = (val1 + val2);
+
+ val1 = _mm_extract_epi32(sad_r3, 0);
+ val2 = _mm_extract_epi32(sad_r3, 2);
+ pi4_sad[3] = (val1 + val2);
+}
+
+/**
+******************************************************************************
+*
+* @brief computes distortion (SAD) at all subpel points about the src location
+*
+* @par Description
+* This functions computes SAD at all points at a subpel distance from the
+* current source location.
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_ref_half_x
+* UWORD8 pointer to half pel buffer
+*
+* @param[out] pu1_ref_half_y
+* UWORD8 pointer to half pel buffer
+*
+* @param[out] pu1_ref_half_xy
+* UWORD8 pointer to half pel buffer
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] ref_strd
+* integer ref stride
+*
+* @param[out] pi4_sad
+* integer evaluated sad
+* pi4_sad[0] - half x
+* pi4_sad[1] - half x - 1
+* pi4_sad[2] - half y
+* pi4_sad[3] - half y - 1
+* pi4_sad[4] - half xy
+* pi4_sad[5] - half xy - 1
+* pi4_sad[6] - half xy - strd
+* pi4_sad[7] - half xy - 1 - strd
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_sub_pel_compute_sad_16x16_sse42(UWORD8 *pu1_src,
+ UWORD8 *pu1_ref_half_x,
+ UWORD8 *pu1_ref_half_y,
+ UWORD8 *pu1_ref_half_xy,
+ WORD32 src_strd,
+ WORD32 ref_strd,
+ WORD32 *pi4_sad)
+{
+ UWORD8 *pu1_ref_half_x_left = pu1_ref_half_x - 1;
+ UWORD8 *pu1_ref_half_y_top = pu1_ref_half_y - ref_strd;
+ UWORD8 *pu1_ref_half_xy_left = pu1_ref_half_xy - 1;
+ UWORD8 *pu1_ref_half_xy_top = pu1_ref_half_xy - ref_strd;
+ UWORD8 *pu1_ref_half_xy_top_left = pu1_ref_half_xy - ref_strd - 1;
+ WORD32 val1, val2;
+
+ __m128i src, ref_half_x, ref_half_y, ref_half_xy;
+ __m128i ref_half_x_left, ref_half_y_top, ref_half_xy_left, ref_half_xy_top, ref_half_xy_top_left;
+ __m128i res_r0, res_r1, res_r2, res_r3, res_r4, res_r5, res_r6, res_r7;
+ __m128i sad_r0, sad_r1, sad_r2, sad_r3, sad_r4, sad_r5, sad_r6, sad_r7;
+ // Row 0 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ sad_r0 = _mm_sad_epu8(src, ref_half_x);
+ sad_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ sad_r2 = _mm_sad_epu8(src, ref_half_y);
+ sad_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ sad_r4 = _mm_sad_epu8(src, ref_half_xy);
+ sad_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ sad_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ sad_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 1 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 2 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 3 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 4 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+
+ // Row 5 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 6 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 7 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 8 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 9 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 10 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 11 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 12 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 13 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 14 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ pu1_src += src_strd;
+ pu1_ref_half_x += ref_strd;
+ pu1_ref_half_x_left += ref_strd;
+ pu1_ref_half_y += ref_strd;
+ pu1_ref_half_y_top += ref_strd;
+ pu1_ref_half_xy += ref_strd;
+ pu1_ref_half_xy_left += ref_strd;
+ pu1_ref_half_xy_top += ref_strd;
+ pu1_ref_half_xy_top_left += ref_strd;
+
+ // Row 15 sad calculation
+ src = _mm_loadu_si128((__m128i *) (pu1_src));
+ ref_half_x = _mm_loadu_si128((__m128i *) (pu1_ref_half_x));
+ ref_half_y = _mm_loadu_si128((__m128i *) (pu1_ref_half_y));
+ ref_half_xy = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy));
+ ref_half_x_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_x_left));
+ ref_half_y_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_y_top));
+ ref_half_xy_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_left));
+ ref_half_xy_top = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top));
+ ref_half_xy_top_left = _mm_loadu_si128((__m128i *) (pu1_ref_half_xy_top_left));
+
+ res_r0 = _mm_sad_epu8(src, ref_half_x);
+ res_r1 = _mm_sad_epu8(src, ref_half_x_left);
+ res_r2 = _mm_sad_epu8(src, ref_half_y);
+ res_r3 = _mm_sad_epu8(src, ref_half_y_top);
+ res_r4 = _mm_sad_epu8(src, ref_half_xy);
+ res_r5 = _mm_sad_epu8(src, ref_half_xy_left);
+ res_r6 = _mm_sad_epu8(src, ref_half_xy_top);
+ res_r7 = _mm_sad_epu8(src, ref_half_xy_top_left);
+
+ sad_r0 = _mm_add_epi64(sad_r0, res_r0);
+ sad_r1 = _mm_add_epi64(sad_r1, res_r1);
+ sad_r2 = _mm_add_epi64(sad_r2, res_r2);
+ sad_r3 = _mm_add_epi64(sad_r3, res_r3);
+ sad_r4 = _mm_add_epi64(sad_r4, res_r4);
+ sad_r5 = _mm_add_epi64(sad_r5, res_r5);
+ sad_r6 = _mm_add_epi64(sad_r6, res_r6);
+ sad_r7 = _mm_add_epi64(sad_r7, res_r7);
+
+ val1 = _mm_extract_epi32(sad_r0, 0);
+ val2 = _mm_extract_epi32(sad_r0, 2);
+ pi4_sad[0] = (val1 + val2);
+
+ val1 = _mm_extract_epi32(sad_r1, 0);
+ val2 = _mm_extract_epi32(sad_r1, 2);
+ pi4_sad[1] = (val1 + val2);
+
+ val1 = _mm_extract_epi32(sad_r2, 0);
+ val2 = _mm_extract_epi32(sad_r2, 2);
+ pi4_sad[2] = (val1 + val2);
+
+ val1 = _mm_extract_epi32(sad_r3, 0);
+ val2 = _mm_extract_epi32(sad_r3, 2);
+ pi4_sad[3] = (val1 + val2);
+
+ val1 = _mm_extract_epi32(sad_r4, 0);
+ val2 = _mm_extract_epi32(sad_r4, 2);
+ pi4_sad[4] = (val1 + val2);
+
+ val1 = _mm_extract_epi32(sad_r5, 0);
+ val2 = _mm_extract_epi32(sad_r5, 2);
+ pi4_sad[5] = (val1 + val2);
+
+ val1 = _mm_extract_epi32(sad_r6, 0);
+ val2 = _mm_extract_epi32(sad_r6, 2);
+ pi4_sad[6] = (val1 + val2);
+
+ val1 = _mm_extract_epi32(sad_r7, 0);
+ val2 = _mm_extract_epi32(sad_r7, 2);
+ pi4_sad[7] = (val1 + val2);
+
+ return;
+}
+/*
+*
+* @brief This function computes SAD between two 16x16 blocks
+* It also computes if the block will be zero after H264 transform and quant for
+* Intra 16x16 blocks
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] dst_strd
+* integer destination stride
+*
+* @param[in] pu2_thrsh
+* Threshold for each element of transofrmed quantized block
+*
+* @param[out] pi4_mb_distortion
+* integer evaluated sad
+*
+* @param[out] pu4_is_zero
+* Poitner to store if the block is zero after transform and quantization
+*
+* @remarks
+*
+******************************************************************************
+*/
+void ime_compute_satqd_16x16_lumainter_sse42(UWORD8 *pu1_src,
+ UWORD8 *pu1_est,
+ WORD32 src_strd,
+ WORD32 est_strd,
+ UWORD16 *pu2_thrsh,
+ WORD32 *pi4_mb_distortion,
+ UWORD32 *pu4_is_zero)
+{
+ __m128i src_r0, src_r1, src_r2, src_r3;
+ __m128i est_r0, est_r1, est_r2, est_r3;
+ __m128i temp0, temp1, temp2, temp3, temp4;
+ __m128i zero = _mm_setzero_si128(); // all bits reset to zero
+ __m128i all_one = _mm_set1_epi8(0xFF);
+ __m128i sad_b1, sad_b2, threshold;
+ WORD16 sad_1, sad_2;
+ WORD32 i;
+ UWORD32 flag = 0;
+ WORD32 test1, test2;
+ threshold = _mm_loadu_si128((__m128i *) pu2_thrsh);
+ (*pi4_mb_distortion) = 0;
+
+ for (i=0; i<4; i++)
+ {
+ src_r0 = _mm_loadl_epi64((__m128i *) pu1_src); //Row 0 - Block1 and 2
+ src_r1 = _mm_loadl_epi64((__m128i *) (pu1_src + src_strd)); //Row 1 - Block1 and 2
+ src_r2 = _mm_loadl_epi64((__m128i *) (pu1_src + 2 * src_strd)); //Row 2 - Block1 and 2
+ src_r3 = _mm_loadl_epi64((__m128i *) (pu1_src + 3 * src_strd)); //Row 3 - Block1 and 2
+
+ src_r0 = _mm_cvtepu8_epi16(src_r0);
+ src_r1 = _mm_cvtepu8_epi16(src_r1);
+ src_r2 = _mm_cvtepu8_epi16(src_r2);
+ src_r3 = _mm_cvtepu8_epi16(src_r3);
+
+ est_r0 = _mm_loadl_epi64((__m128i *) pu1_est);
+ est_r1 = _mm_loadl_epi64((__m128i *) (pu1_est + est_strd));
+ est_r2 = _mm_loadl_epi64((__m128i *) (pu1_est + 2 * est_strd));
+ est_r3 = _mm_loadl_epi64((__m128i *) (pu1_est + 3 * est_strd));
+
+ est_r0 = _mm_cvtepu8_epi16(est_r0);
+ est_r1 = _mm_cvtepu8_epi16(est_r1);
+ est_r2 = _mm_cvtepu8_epi16(est_r2);
+ est_r3 = _mm_cvtepu8_epi16(est_r3);
+
+ src_r0 = _mm_sub_epi16(src_r0, est_r0);
+ src_r1 = _mm_sub_epi16(src_r1, est_r1);
+ src_r2 = _mm_sub_epi16(src_r2, est_r2);
+ src_r3 = _mm_sub_epi16(src_r3, est_r3);
+
+ src_r0 = _mm_abs_epi16(src_r0);
+ src_r1 = _mm_abs_epi16(src_r1);
+ src_r2 = _mm_abs_epi16(src_r2);
+ src_r3 = _mm_abs_epi16(src_r3);
+
+ src_r0 = _mm_add_epi16(src_r0, src_r3); //s1 s4 s4 s1 a1 a4 a4 a1
+ src_r1 = _mm_add_epi16(src_r1, src_r2); //s2 s3 s3 s2 a2 a3 a3 a2
+
+ //SAD calculation
+ temp0 = _mm_add_epi16(src_r0, src_r1); //s1+s2 s4+s3 s4+s3 s1+s2 a1+a2 a4+a3 a4+a3 a1+a2
+ temp0 = _mm_hadd_epi16(temp0, zero);
+ temp0 = _mm_hadd_epi16(temp0, zero); //sad1, sad2 - 16bit values
+
+ sad_1 = _mm_extract_epi16(temp0, 0);
+ sad_2 = _mm_extract_epi16(temp0, 1);
+
+ (*pi4_mb_distortion) += sad_1 + sad_2;
+
+ if (flag == 0) {
+ sad_b1 = _mm_set1_epi16((sad_1 << 1));
+ sad_b2 = _mm_set1_epi16((sad_2 << 1));
+
+ src_r0 = _mm_shufflelo_epi16(src_r0, 0x9c); //Block 0 s1 s1 s4 s4 a1 a4 a4 a1
+ src_r0 = _mm_shufflehi_epi16(src_r0, 0x9c); //Block 1 s1 s1 s4 s4 a1 a1 a4 a4
+
+ src_r1 = _mm_shufflelo_epi16(src_r1, 0x9c); //Block 0 s2 s2 s3 s3 a2 a3 a3 a2
+ src_r1 = _mm_shufflehi_epi16(src_r1, 0x9c); //Block 1 s2 s2 s3 s3 a2 a2 a3 a3
+
+ src_r0 = _mm_hadd_epi16(src_r0, zero); //s1 s4 a1 a4 0 0 0 0
+ src_r1 = _mm_hadd_epi16(src_r1, zero); //s2 s3 a2 a3 0 0 0 0
+
+ temp0 = _mm_slli_epi16(src_r0, 1);//s1<<1 s4<<1 a1<<1 a4<<1 0 0 0 0
+ temp1 = _mm_slli_epi16(src_r1, 1);//s2<<1 s3<<1 a2<<1 a3<<1 0 0 0 0
+
+ temp0 = _mm_shufflelo_epi16(temp0, 0xb1);//s4<<1 s1<<1 a4<<1 a1<<1 0 0 0 0
+ temp1 = _mm_shufflelo_epi16(temp1, 0xb1);//s3<<1 s2<<1 a3<<1 a2<<1 0 0 0 0
+
+ temp2 = _mm_sub_epi16(src_r0, temp1);//(s1-s3<<1) (s4-s2<<1) (a1-a3<<1) (a4-a2<<1) 0 0 0 0
+ temp3 = _mm_sub_epi16(src_r1, temp0);//(s2-s4<<1) (s3-s1<<1) (a2-a4<<1) (a3-a1<<1) 0 0 0 0
+
+ temp4 = _mm_add_epi16(src_r0, src_r1);//s1+s2 s4+s3 a1+a2 a4+a3 0 0 0 0
+
+ temp0 = _mm_hadd_epi16(src_r0, zero); //s1+s4 a1+a4 0 0 0 0 0 0
+ temp1 = _mm_hadd_epi16(src_r1, zero); //s2+s3 a2+a3 0 0 0 0 0 0
+
+ temp0 = _mm_unpacklo_epi16(temp0, temp1);//s1+s4 s2+s3 a1+a4 a2+a3 0 0 0 0
+
+ temp0 = _mm_unpacklo_epi32(temp0, temp2);//s1+s4 s2+s3 (s1-s3<<1) (s4-s2<<1) a1+a4 a2+a3 (a1-a3<<1) (a4-a2<<1)
+ temp1 = _mm_unpacklo_epi32(temp4, temp3);//s1+s2 s4+s3 (s2-s4<<1) (s3-s1<<1) a1+a2 a4+a3 (a2-a4<<1) (a3-a1<<1)
+
+ temp2 = _mm_unpacklo_epi64(temp0, temp1);//s1+s4 s2+s3 (s1-s3<<1) (s4-s2<<1) s1+s2 s4+s3 (s2-s4<<1) (s3-s1<<1)
+ temp3 = _mm_unpackhi_epi64(temp0, temp1); //a1+a4 a2+a3 (a1-a3<<1) (a4-a2<<1) a1+a2 a4+a3 (s2-s4<<1) (s3-s1<<1)
+
+ sad_b1 = _mm_sub_epi16(sad_b1, temp2); //lsi values Block0
+ sad_b2 = _mm_sub_epi16(sad_b2, temp3); //lsi values Block1
+
+ temp0 = _mm_cmpgt_epi16(threshold, sad_b1); //if any threshold[i]>ls[i], corresponding 16-bit value in temp becomes 0xffff
+
+ temp1 = _mm_cmpgt_epi16(threshold, sad_b2);
+
+ temp0 = _mm_xor_si128(temp0, all_one); //Xor with 1 => NOT operation
+ temp1 = _mm_xor_si128(temp1, all_one);
+
+ test1 = _mm_test_all_zeros(temp0, all_one);
+ test2 = _mm_test_all_zeros(temp1, all_one);
+
+ if (test1 == 0 || test2 == 0 || pu2_thrsh[8] <= sad_1
+ || pu2_thrsh[8] <= sad_2)
+ flag = 1;
+ }
+
+ pu1_src += 8;
+ pu1_est += 8;
+
+ src_r0 = _mm_loadl_epi64((__m128i *) pu1_src); //Row 0 - Block1 and 2
+ src_r1 = _mm_loadl_epi64((__m128i *) (pu1_src + src_strd)); //Row 1 - Block1 and 2
+ src_r2 = _mm_loadl_epi64((__m128i *) (pu1_src + 2 * src_strd)); //Row 2 - Block1 and 2
+ src_r3 = _mm_loadl_epi64((__m128i *) (pu1_src + 3 * src_strd)); //Row 3 - Block1 and 2
+
+ src_r0 = _mm_cvtepu8_epi16(src_r0);
+ src_r1 = _mm_cvtepu8_epi16(src_r1);
+ src_r2 = _mm_cvtepu8_epi16(src_r2);
+ src_r3 = _mm_cvtepu8_epi16(src_r3);
+
+ est_r0 = _mm_loadl_epi64((__m128i *) pu1_est);
+ est_r1 = _mm_loadl_epi64((__m128i *) (pu1_est + est_strd));
+ est_r2 = _mm_loadl_epi64((__m128i *) (pu1_est + 2 * est_strd));
+ est_r3 = _mm_loadl_epi64((__m128i *) (pu1_est + 3 * est_strd));
+
+ est_r0 = _mm_cvtepu8_epi16(est_r0);
+ est_r1 = _mm_cvtepu8_epi16(est_r1);
+ est_r2 = _mm_cvtepu8_epi16(est_r2);
+ est_r3 = _mm_cvtepu8_epi16(est_r3);
+
+ src_r0 = _mm_sub_epi16(src_r0, est_r0);
+ src_r1 = _mm_sub_epi16(src_r1, est_r1);
+ src_r2 = _mm_sub_epi16(src_r2, est_r2);
+ src_r3 = _mm_sub_epi16(src_r3, est_r3);
+
+ src_r0 = _mm_abs_epi16(src_r0);
+ src_r1 = _mm_abs_epi16(src_r1);
+ src_r2 = _mm_abs_epi16(src_r2);
+ src_r3 = _mm_abs_epi16(src_r3);
+
+ src_r0 = _mm_add_epi16(src_r0, src_r3); //s1 s4 s4 s1 a1 a4 a4 a1
+ src_r1 = _mm_add_epi16(src_r1, src_r2); //s2 s3 s3 s2 a2 a3 a3 a2
+
+ //SAD calculation
+ temp0 = _mm_add_epi16(src_r0, src_r1);
+ temp0 = _mm_hadd_epi16(temp0, zero);
+ temp0 = _mm_hadd_epi16(temp0, zero); //sad1, sad2 - 16bit values
+
+ sad_1 = _mm_extract_epi16(temp0, 0);
+ sad_2 = _mm_extract_epi16(temp0, 1);
+
+ (*pi4_mb_distortion) += sad_1 + sad_2;
+
+ if (flag == 0) {
+ sad_b1 = _mm_set1_epi16((sad_1 << 1));
+ sad_b2 = _mm_set1_epi16((sad_2 << 1));
+
+ src_r0 = _mm_shufflelo_epi16(src_r0, 0x9c); //Block 0 s1 s1 s4 s4 a1 a4 a4 a1
+ src_r0 = _mm_shufflehi_epi16(src_r0, 0x9c); //Block 1 s1 s1 s4 s4 a1 a1 a4 a4
+
+ src_r1 = _mm_shufflelo_epi16(src_r1, 0x9c); //Block 0 s2 s2 s3 s3 a2 a3 a3 a2
+ src_r1 = _mm_shufflehi_epi16(src_r1, 0x9c); //Block 1 s2 s2 s3 s3 a2 a2 a3 a3
+
+ src_r0 = _mm_hadd_epi16(src_r0, zero); //s1 s4 a1 a4 0 0 0 0
+ src_r1 = _mm_hadd_epi16(src_r1, zero); //s2 s3 a2 a3 0 0 0 0
+
+ temp0 = _mm_slli_epi16(src_r0, 1);//s1<<1 s4<<1 a1<<1 a4<<1 0 0 0 0
+ temp1 = _mm_slli_epi16(src_r1, 1);//s2<<1 s3<<1 a2<<1 a3<<1 0 0 0 0
+
+ temp0 = _mm_shufflelo_epi16(temp0, 0xb1);//s4<<1 s1<<1 a4<<1 a1<<1 0 0 0 0
+ temp1 = _mm_shufflelo_epi16(temp1, 0xb1);//s3<<1 s2<<1 a3<<1 a2<<1 0 0 0 0
+
+ temp2 = _mm_sub_epi16(src_r0, temp1);//(s1-s3<<1) (s4-s2<<1) (a1-a3<<1) (a4-a2<<1) 0 0 0 0
+ temp3 = _mm_sub_epi16(src_r1, temp0);//(s2-s4<<1) (s3-s1<<1) (a2-a4<<1) (a3-a1<<1) 0 0 0 0
+
+ temp4 = _mm_add_epi16(src_r0, src_r1);//s1+s2 s4+s3 a1+a2 a4+a3 0 0 0 0
+
+ temp0 = _mm_hadd_epi16(src_r0, zero); //s1+s4 a1+a4 0 0 0 0 0 0
+ temp1 = _mm_hadd_epi16(src_r1, zero); //s2+s3 a2+a3 0 0 0 0 0 0
+
+ temp0 = _mm_unpacklo_epi16(temp0, temp1);//s1+s4 s2+s3 a1+a4 a2+a3 0 0 0 0
+
+ temp0 = _mm_unpacklo_epi32(temp0, temp2);//s1+s4 s2+s3 (s1-s3<<1) (s4-s2<<1) a1+a4 a2+a3 (a1-a3<<1) (a4-a2<<1)
+ temp1 = _mm_unpacklo_epi32(temp4, temp3);//s1+s2 s4+s3 (s2-s4<<1) (s3-s1<<1) a1+a2 a4+a3 (a2-a4<<1) (a3-a1<<1)
+
+ temp2 = _mm_unpacklo_epi64(temp0, temp1);//s1+s4 s2+s3 (s1-s3<<1) (s4-s2<<1) s1+s2 s4+s3 (s2-s4<<1) (s3-s1<<1)
+ temp3 = _mm_unpackhi_epi64(temp0, temp1); //a1+a4 a2+a3 (a1-a3<<1) (a4-a2<<1) a1+a2 a4+a3 (s2-s4<<1) (s3-s1<<1)
+
+ sad_b1 = _mm_sub_epi16(sad_b1, temp2); //lsi values Block0
+ sad_b2 = _mm_sub_epi16(sad_b2, temp3); //lsi values Block1
+
+ temp0 = _mm_cmpgt_epi16(threshold, sad_b1); //if any threshold[i]>ls[i], corresponding 16-bit value in temp becomes 0xffff
+
+ temp1 = _mm_cmpgt_epi16(threshold, sad_b2);
+
+ temp0 = _mm_xor_si128(temp0, all_one); //Xor with 1 => NOT operation
+ temp1 = _mm_xor_si128(temp1, all_one);
+
+ test1 = _mm_test_all_zeros(temp0, all_one);
+ test2 = _mm_test_all_zeros(temp1, all_one);
+
+ if (test1 == 0 || test2 == 0 || pu2_thrsh[8] <= sad_1
+ || pu2_thrsh[8] <= sad_2)
+ flag = 1;
+ }
+
+ pu1_src += 4*src_strd - 8;
+ pu1_est += 4*est_strd - 8;
+ }
+
+ *pu4_is_zero = flag;
+}
diff --git a/encoder/x86/ime_platform_macros.h b/encoder/x86/ime_platform_macros.h
new file mode 100755
index 0000000..18e2e8f
--- /dev/null
+++ b/encoder/x86/ime_platform_macros.h
@@ -0,0 +1,52 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ime_platform_macros.h
+*
+* @brief
+* Platform specific Macro definitions used in the codec
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+#ifndef _IME_PLATFORM_MACROS_H_
+#define _IME_PLATFORM_MACROS_H_
+
+/*****************************************************************************/
+/* Function macro definitions */
+/*****************************************************************************/
+
+#define USADA8(src,est,sad) \
+ sad += ABS(src[0]-est[0]) + \
+ ABS(src[1]-est[1]) + \
+ ABS(src[2]-est[2]) + \
+ ABS(src[3]-est[3])
+
+
+#endif /* _IH264_PLATFORM_MACROS_H_ */
diff --git a/test/Android.mk b/test/Android.mk
new file mode 100755
index 0000000..0085832
--- /dev/null
+++ b/test/Android.mk
@@ -0,0 +1,8 @@
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+# encoder
+include $(LOCAL_PATH)/encoder.mk
+
+# decoder
+include $(LOCAL_PATH)/decoder.mk
diff --git a/test/decoder.mk b/test/decoder.mk
new file mode 100755
index 0000000..1a49a92
--- /dev/null
+++ b/test/decoder.mk
@@ -0,0 +1,13 @@
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := avcdec
+LOCAL_MODULE_TAGS := optional
+
+LOCAL_CFLAGS := -DPROFILE_ENABLE -DARM -DMD5_DISABLE -fPIC
+LOCAL_C_INCLUDES += $(LOCAL_PATH)/../decoder $(LOCAL_PATH)/../common $(LOCAL_PATH)/decoder/
+LOCAL_SRC_FILES := decoder/main.c
+LOCAL_STATIC_LIBRARIES := libavcdec
+
+include $(BUILD_EXECUTABLE)
diff --git a/test/decoder/main.c b/test/decoder/main.c
new file mode 100755
index 0000000..0076ce9
--- /dev/null
+++ b/test/decoder/main.c
@@ -0,0 +1,3196 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* */
+/* File Name : main.c */
+/* */
+/* Description : Contains an application that demonstrates use of H264*/
+/* decoder API */
+/* */
+/* List of Functions : */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 Harish Initial Version */
+/*****************************************************************************/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#ifdef X86_MINGW
+#include <signal.h>
+#endif
+
+#ifndef IOS
+#include <malloc.h>
+#endif
+#ifdef IOS_DISPLAY
+#include "cast_types.h"
+#else
+#include "ih264_typedefs.h"
+#endif
+
+#include "iv.h"
+#include "ivd.h"
+#include "ih264d.h"
+#include "ithread.h"
+
+#ifdef WINDOWS_TIMER
+#include <windows.h>
+#else
+#include <sys/time.h>
+#endif
+
+#define ALIGN8(x) ((((x) + 7) >> 3) << 3)
+#define NUM_DISPLAY_BUFFERS 4
+#define DEFAULT_FPS 30
+
+#define ENABLE_DEGRADE 0
+#define MAX_DISP_BUFFERS 64
+#define EXTRA_DISP_BUFFERS 8
+#define STRLENGTH 1000
+
+//#define TEST_FLUSH
+#define FLUSH_FRM_CNT 100
+//#define APP_EXTRA_BUFS 1
+
+#ifdef IOS
+#define PATHLENMAX 500
+char filename_with_path[PATHLENMAX];
+#endif
+
+#ifdef PROFILE_ENABLE
+ #ifdef WINDOWS_TIMER
+ typedef LARGE_INTEGER TIMER;
+ #else
+ //#ifdef GCC_TIMER
+ typedef struct timeval TIMER;
+ //#endif
+ #endif
+#else
+ typedef WORD32 TIMER;
+#endif
+
+#ifdef PROFILE_ENABLE
+ #ifdef WINDOWS_TIMER
+ #define GETTIME(timer) QueryPerformanceCounter(timer);
+ #else
+ //#ifdef GCC_TIMER
+ #define GETTIME(timer) gettimeofday(timer,NULL);
+ //#endif
+ #endif
+
+ #ifdef WINDOWS_TIMER
+ #define ELAPSEDTIME(s_start_timer,s_end_timer, s_elapsed_time, frequency) \
+ { \
+ TIMER s_temp_time; \
+ s_temp_time.LowPart = s_end_timer.LowPart - s_start_timer.LowPart ; \
+ s_elapsed_time = (UWORD32) ( ((DOUBLE)s_temp_time.LowPart / (DOUBLE)frequency.LowPart ) * 1000000); \
+ }
+ #else
+ //#ifdef GCC_TIMER
+ #define ELAPSEDTIME(s_start_timer,s_end_timer, s_elapsed_time, frequency) \
+ s_elapsed_time = ((s_end_timer.tv_sec - s_start_timer.tv_sec) * 1000000) + (s_end_timer.tv_usec - s_start_timer.tv_usec);
+ //#endif
+ #endif
+
+#else
+ #define GETTIME(timer)
+ #define ELAPSEDTIME(s_start_timer,s_end_timer, s_elapsed_time, frequency)
+#endif
+
+
+/* Function declarations */
+#ifndef MD5_DISABLE
+void calc_md5_cksum(UWORD8 *pu1_inbuf,UWORD32 u4_stride,UWORD32 u4_width,UWORD32 u4_height,UWORD8 *pu1_cksum_p );
+#else
+#define calc_md5_cksum(a, b, c, d, e)
+#endif
+#ifdef SDL_DISPLAY
+void* sdl_disp_init(UWORD32, UWORD32, WORD32, WORD32, WORD32, WORD32, WORD32, WORD32 *, WORD32 *);
+void sdl_alloc_disp_buffers(void *);
+void sdl_display(void *, WORD32 );
+void sdl_set_disp_buffers(void *, WORD32, UWORD8 **, UWORD8 **, UWORD8 **);
+void sdl_disp_deinit(void *);
+void sdl_disp_usleep(UWORD32);
+IV_COLOR_FORMAT_T sdl_get_color_fmt(void);
+UWORD32 sdl_get_stride(void);
+#endif
+
+#ifdef INTEL_CE5300
+void* gdl_disp_init(UWORD32, UWORD32, WORD32, WORD32, WORD32, WORD32, WORD32, WORD32 *, WORD32 *);
+void gdl_alloc_disp_buffers(void *);
+void gdl_display(void *, WORD32 );
+void gdl_set_disp_buffers(void *, WORD32, UWORD8 **, UWORD8 **, UWORD8 **);
+void gdl_disp_deinit(void *);
+void gdl_disp_usleep(UWORD32);
+IV_COLOR_FORMAT_T gdl_get_color_fmt(void);
+UWORD32 gdl_get_stride(void);
+#endif
+
+#ifdef FBDEV_DISPLAY
+void* fbd_disp_init(UWORD32, UWORD32, WORD32, WORD32, WORD32, WORD32, WORD32, WORD32 *, WORD32 *);
+void fbd_alloc_disp_buffers(void *);
+void fbd_display(void *, WORD32 );
+void fbd_set_disp_buffers(void *, WORD32, UWORD8 **, UWORD8 **, UWORD8 **);
+void fbd_disp_deinit(void *);
+void fbd_disp_usleep(UWORD32);
+IV_COLOR_FORMAT_T fbd_get_color_fmt(void);
+UWORD32 fbd_get_stride(void);
+#endif
+
+#ifdef IOS_DISPLAY
+void* ios_disp_init(UWORD32, UWORD32, WORD32, WORD32, WORD32, WORD32, WORD32, WORD32 *, WORD32 *);
+void ios_alloc_disp_buffers(void *);
+void ios_display(void *, WORD32 );
+void ios_set_disp_buffers(void *, WORD32, UWORD8 **, UWORD8 **, UWORD8 **);
+void ios_disp_deinit(void *);
+void ios_disp_usleep(UWORD32);
+IV_COLOR_FORMAT_T ios_get_color_fmt(void);
+UWORD32 ios_get_stride(void);
+#endif
+
+typedef struct
+{
+ UWORD32 u4_piclen_flag;
+ UWORD32 u4_file_save_flag;
+ UWORD32 u4_chksum_save_flag;
+ UWORD32 u4_max_frm_ts;
+ IV_COLOR_FORMAT_T e_output_chroma_format;
+ IVD_ARCH_T e_arch;
+ IVD_SOC_T e_soc;
+ UWORD32 dump_q_rd_idx;
+ UWORD32 dump_q_wr_idx;
+ WORD32 disp_q_wr_idx;
+ WORD32 disp_q_rd_idx;
+
+ void *cocodec_obj;
+ UWORD32 u4_share_disp_buf;
+ UWORD32 num_disp_buf;
+ UWORD32 b_pic_present;
+ UWORD32 u4_disable_dblk_level;
+ WORD32 i4_degrade_type;
+ WORD32 i4_degrade_pics;
+ UWORD32 u4_num_cores;
+ UWORD32 disp_delay;
+ WORD32 trace_enable;
+ CHAR ac_trace_fname[STRLENGTH];
+ CHAR ac_piclen_fname[STRLENGTH];
+ CHAR ac_ip_fname[STRLENGTH];
+ CHAR ac_op_fname[STRLENGTH];
+ CHAR ac_op_chksum_fname[STRLENGTH];
+ ivd_out_bufdesc_t s_disp_buffers[MAX_DISP_BUFFERS];
+ iv_yuv_buf_t s_disp_frm_queue[MAX_DISP_BUFFERS];
+ UWORD32 s_disp_frm_id_queue[MAX_DISP_BUFFERS];
+ UWORD32 loopback;
+ UWORD32 display;
+ UWORD32 full_screen;
+ UWORD32 fps;
+ UWORD32 max_wd;
+ UWORD32 max_ht;
+ UWORD32 max_level;
+
+ UWORD32 u4_strd;
+
+ /* For signalling to display thread */
+ UWORD32 u4_pic_wd;
+ UWORD32 u4_pic_ht;
+
+ /* For IOS diplay */
+ WORD32 i4_screen_wd;
+ WORD32 i4_screen_ht;
+
+ //UWORD32 u4_output_present;
+ WORD32 quit;
+ WORD32 paused;
+
+
+ void *pv_disp_ctx;
+ void *display_thread_handle;
+ WORD32 display_thread_created;
+ volatile WORD32 display_init_done;
+ volatile WORD32 display_deinit_flag;
+
+ void *(*disp_init)(UWORD32, UWORD32, WORD32, WORD32, WORD32, WORD32, WORD32, WORD32 *, WORD32 *);
+ void (*alloc_disp_buffers)(void *);
+ void (*display_buffer)(void *, WORD32);
+ void (*set_disp_buffers)(void *, WORD32, UWORD8 **, UWORD8 **, UWORD8 **);
+ void (*disp_deinit)(void *);
+ void (*disp_usleep)(UWORD32);
+ IV_COLOR_FORMAT_T (*get_color_fmt)(void);
+ UWORD32 (*get_stride)(void);
+} vid_dec_ctx_t;
+
+
+
+typedef enum
+{
+ INVALID,
+ HELP,
+ VERSION,
+ INPUT_FILE,
+ OUTPUT,
+ CHKSUM,
+ SAVE_OUTPUT,
+ SAVE_CHKSUM,
+ CHROMA_FORMAT,
+ NUM_FRAMES,
+ NUM_CORES,
+ DISABLE_DEBLOCK_LEVEL,
+ SHARE_DISPLAY_BUF,
+ LOOPBACK,
+ DISPLAY,
+ FULLSCREEN,
+ FPS,
+ TRACE,
+ MAX_WD,
+ MAX_HT,
+ MAX_LEVEL,
+ CONFIG,
+
+ DEGRADE_TYPE,
+ DEGRADE_PICS,
+ ARCH,
+ SOC,
+ PICLEN,
+ PICLEN_FILE,
+} ARGUMENT_T;
+
+typedef struct
+{
+ CHAR argument_shortname[4];
+ CHAR argument_name[128];
+ ARGUMENT_T argument;
+ CHAR description[512];
+} argument_t;
+
+static const argument_t argument_mapping[] =
+{
+ {"-h", "--help", HELP,
+ "Print this help\n"},
+ { "-c", "--config", CONFIG,
+ "config file (Default: test.cfg)\n" },
+
+ {"-v", "--version", VERSION,
+ "Version information\n"},
+ {"-i", "--input", INPUT_FILE,
+ "Input file\n"},
+ {"-o", "--output", OUTPUT,
+ "Output file\n"},
+ {"--", "--piclen", PICLEN,
+ "Flag to signal if the decoder has to use a file containing number of bytes in each picture to be fed in each call\n"},
+ {"--", "--piclen_file", PICLEN_FILE,
+ "File containing number of bytes in each picture - each line containing one i4_size\n"},
+ {"--", "--chksum", CHKSUM,
+ "Output MD5 Checksum file\n"},
+ { "-s", "--save_output", SAVE_OUTPUT,
+ "Save Output file\n" },
+ { "--", "--save_chksum", SAVE_CHKSUM,
+ "Save Check sum file\n" },
+ {"--", "--chroma_format", CHROMA_FORMAT,
+ "Output Chroma format Supported values YUV_420P, YUV_422ILE, RGB_565, YUV_420SP_UV, YUV_420SP_VU\n" },
+ { "-n", "--num_frames", NUM_FRAMES,
+ "Number of frames to be decoded\n" },
+ { "--", "--num_cores", NUM_CORES,
+ "Number of cores to be used\n" },
+ { "--", "--share_display_buf", SHARE_DISPLAY_BUF,
+ "Enable shared display buffer mode\n" },
+ {"--", "--disable_deblock_level", DISABLE_DEBLOCK_LEVEL,
+ "Disable deblocking level : 0 to 4 - 0 Enable deblocking 4 Disable deblocking completely\n"},
+ { "--", "--loopback", LOOPBACK,
+ "Enable playback in a loop\n" },
+ { "--", "--display", DISPLAY,
+ "Enable display (uses SDL)\n" },
+ { "--", "--fullscreen", FULLSCREEN,
+ "Enable full screen (Only for GDL and SDL)\n" },
+ { "--", "--fps", FPS,
+ "FPS to be used for display \n" },
+ {"-i", "--trace", TRACE,
+ "Trace file\n"},
+ { "--", "--max_wd", MAX_WD,
+ "Maximum width (Default: 2560) \n" },
+ { "--", "--max_ht", MAX_HT,
+ "Maximum height (Default: 1600)\n" },
+
+ { "--", "--max_level", MAX_LEVEL,
+ "Maximum Decoder Level (Default: 50)\n" },
+
+ {"--", "--degrade_type", DEGRADE_TYPE,
+ "Degrade type : 0: No degrade 0th bit set : Disable SAO 1st bit set : Disable deblocking 2nd bit set : Faster inter prediction filters 3rd bit set : Fastest inter prediction filters\n" },
+ {"--", "--degrade_pics", DEGRADE_PICS,
+ "Degrade pics : 0 : No degrade 1 : Only on non-reference frames 2 : Do not degrade every 4th or key frames 3 : All non-key frames 4 : All frames"},
+
+ {"--", "--arch", ARCH,
+ "Set Architecture. Supported values ARM_NONEON, ARM_A9Q, ARM_A7, ARM_A5, ARM_NEONINTR,ARMV8_GENERIC, X86_GENERIC, X86_SSSE3, X86_SSE4 \n" },
+ {"--", "--soc", SOC,
+ "Set SOC. Supported values GENERIC, HISI_37X \n" },
+
+};
+
+#define PEAK_WINDOW_SIZE 8
+#define MAX_FRAME_WIDTH 2560
+#define MAX_FRAME_HEIGHT 1600
+#define MAX_LEVEL_SUPPORTED 50
+#define MAX_REF_FRAMES 16
+#define MAX_REORDER_FRAMES 16
+#define DEFAULT_SHARE_DISPLAY_BUF 0
+#define STRIDE 0
+#define DEFAULT_NUM_CORES 1
+
+
+#define DUMP_SINGLE_BUF 0
+#define IV_ISFATALERROR(x) (((x) >> IVD_FATALERROR) & 0x1)
+
+#define ivd_api_function ih264d_api_function
+
+#ifdef IOS
+char filename_trace[PATHLENMAX];
+#endif
+
+#if ANDROID_NDK
+/*****************************************************************************/
+/* */
+/* Function Name : raise */
+/* */
+/* Description : Needed as a workaround when the application is built in */
+/* Android NDK. This is an exception to be called for divide*/
+/* by zero error */
+/* */
+/* Inputs : a */
+/* Globals : */
+/* Processing : None */
+/* */
+/* Outputs : */
+/* Returns : */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 100189 Initial Version */
+/* */
+/*****************************************************************************/
+int raise(int a)
+{
+ printf("Divide by zero\n");
+ return 0;
+}
+#endif
+
+#ifdef _WIN32
+/*****************************************************************************/
+/* Function to print library calls */
+/*****************************************************************************/
+/*****************************************************************************/
+/* */
+/* Function Name : memalign */
+/* */
+/* Description : Returns malloc data. Ideally should return aligned memory*/
+/* support alignment will be added later */
+/* */
+/* Inputs : alignment */
+/* i4_size */
+/* Globals : */
+/* Processing : */
+/* */
+/* Outputs : */
+/* Returns : */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 100189 Initial Version */
+/* */
+/*****************************************************************************/
+
+void * ih264a_aligned_malloc(WORD32 alignment, WORD32 i4_size)
+{
+ return (void *)_aligned_malloc(i4_size, alignment);
+}
+
+void ih264a_aligned_free(void *pv_buf)
+{
+ _aligned_free(pv_buf);
+ return;
+}
+#endif
+
+#if IOS
+void * ih264a_aligned_malloc(WORD32 alignment, WORD32 i4_size)
+{
+ return malloc(i4_size);
+}
+
+void ih264a_aligned_free(void *pv_buf)
+{
+ free(pv_buf);
+ return;
+}
+#endif
+
+#if (!defined(IOS)) && (!defined(_WIN32))
+void * ih264a_aligned_malloc(WORD32 alignment, WORD32 i4_size)
+{
+ return memalign(alignment, i4_size);
+}
+
+void ih264a_aligned_free(void *pv_buf)
+{
+ free(pv_buf);
+ return;
+}
+#endif
+/*****************************************************************************/
+/* */
+/* Function Name : set_degrade */
+/* */
+/* Description : Control call to set degrade level */
+/* */
+/* */
+/* Inputs : codec_obj - Codec Handle */
+/* type - degrade level value between 0 to 4 */
+/* 0 : No degrade */
+/* 1st bit : Disable SAO */
+/* 2nd bit : Disable Deblock */
+/* 3rd bit : Faster MC for non-ref */
+/* 4th bit : Fastest MC for non-ref */
+/* pics - Pictures that are are degraded */
+/* 0 : No degrade */
+/* 1 : Non-ref pictures */
+/* 2 : Pictures at given interval are not degraded */
+/* 3 : All non-key pictures */
+/* 4 : All pictures */
+/* Globals : */
+/* Processing : Calls degrade control to the codec */
+/* */
+/* Outputs : */
+/* Returns : Control call return i4_status */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 100189 Initial Version */
+/* */
+/*****************************************************************************/
+
+IV_API_CALL_STATUS_T set_degrade(void *codec_obj, UWORD32 type, WORD32 pics)
+{
+ ih264d_ctl_degrade_ip_t s_ctl_ip;
+ ih264d_ctl_degrade_op_t s_ctl_op;
+ void *pv_api_ip, *pv_api_op;
+ IV_API_CALL_STATUS_T e_dec_status;
+
+ s_ctl_ip.u4_size = sizeof(ih264d_ctl_degrade_ip_t);
+ s_ctl_ip.i4_degrade_type = type;
+ s_ctl_ip.i4_nondegrade_interval = 4;
+ s_ctl_ip.i4_degrade_pics = pics;
+
+ s_ctl_op.u4_size = sizeof(ih264d_ctl_degrade_op_t);
+
+ pv_api_ip = (void *)&s_ctl_ip;
+ pv_api_op = (void *)&s_ctl_op;
+
+ s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+ s_ctl_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T) IH264D_CMD_CTL_DEGRADE;
+
+ e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, pv_api_ip, pv_api_op);
+
+ if(IV_SUCCESS != e_dec_status)
+ {
+ printf("Error in setting degrade level \n");
+ }
+ return (e_dec_status);
+
+}
+
+
+
+/*****************************************************************************/
+/* */
+/* Function Name : enable_skipb_frames */
+/* */
+/* Description : Control call to enable skipping of b frames */
+/* */
+/* */
+/* Inputs : codec_obj : Codec handle */
+/* Globals : */
+/* Processing : Calls enable skip B frames control */
+/* */
+/* Outputs : */
+/* Returns : Control call return i4_status */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 100189 Initial Version */
+/* */
+/*****************************************************************************/
+
+IV_API_CALL_STATUS_T enable_skipb_frames(void *codec_obj,
+ vid_dec_ctx_t *ps_app_ctx)
+{
+ ivd_ctl_set_config_ip_t s_ctl_ip;
+ ivd_ctl_set_config_op_t s_ctl_op;
+ IV_API_CALL_STATUS_T e_dec_status;
+
+ s_ctl_ip.u4_disp_wd = ps_app_ctx->u4_strd;
+ s_ctl_ip.e_frm_skip_mode = IVD_SKIP_B;
+
+ s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT;
+ s_ctl_ip.e_vid_dec_mode = IVD_DECODE_FRAME;
+ s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+ s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS;
+ s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t);
+ s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t);
+
+ e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip,
+ (void *)&s_ctl_op);
+
+ if(IV_SUCCESS != e_dec_status)
+ {
+ printf("Error in Enable SkipB frames \n");
+ }
+
+ return e_dec_status;
+}
+/*****************************************************************************/
+/* */
+/* Function Name : disable_skipb_frames */
+/* */
+/* Description : Control call to disable skipping of b frames */
+/* */
+/* */
+/* Inputs : codec_obj : Codec handle */
+/* Globals : */
+/* Processing : Calls disable B frame skip control */
+/* */
+/* Outputs : */
+/* Returns : Control call return i4_status */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 100189 Initial Version */
+/* */
+/*****************************************************************************/
+
+IV_API_CALL_STATUS_T disable_skipb_frames(void *codec_obj,
+ vid_dec_ctx_t *ps_app_ctx)
+{
+ ivd_ctl_set_config_ip_t s_ctl_ip;
+ ivd_ctl_set_config_op_t s_ctl_op;
+ IV_API_CALL_STATUS_T e_dec_status;
+
+ s_ctl_ip.u4_disp_wd = ps_app_ctx->u4_strd;
+ s_ctl_ip.e_frm_skip_mode = IVD_SKIP_NONE;
+
+ s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT;
+ s_ctl_ip.e_vid_dec_mode = IVD_DECODE_FRAME;
+ s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+ s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS;
+ s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t);
+ s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t);
+
+ e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip,
+ (void *)&s_ctl_op);
+
+ if(IV_SUCCESS != e_dec_status)
+ {
+ printf("Error in Disable SkipB frames\n");
+ }
+
+ return e_dec_status;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : enable_skippb_frames */
+/* */
+/* Description : Control call to enable skipping of P & B frames */
+/* */
+/* */
+/* Inputs : codec_obj : Codec handle */
+/* Globals : */
+/* Processing : Calls enable skip P and B frames control */
+/* */
+/* Outputs : */
+/* Returns : Control call return i4_status */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 100189 Initial Version */
+/* */
+/*****************************************************************************/
+
+IV_API_CALL_STATUS_T enable_skippb_frames(void *codec_obj,
+ vid_dec_ctx_t *ps_app_ctx)
+{
+ ivd_ctl_set_config_ip_t s_ctl_ip;
+ ivd_ctl_set_config_op_t s_ctl_op;
+ IV_API_CALL_STATUS_T e_dec_status;
+
+ s_ctl_ip.u4_disp_wd = ps_app_ctx->u4_strd;
+ s_ctl_ip.e_frm_skip_mode = IVD_SKIP_PB;
+
+ s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT;
+ s_ctl_ip.e_vid_dec_mode = IVD_DECODE_FRAME;
+ s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+ s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS;
+ s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t);
+ s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t);
+
+ e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip,
+ (void *)&s_ctl_op);
+ if(IV_SUCCESS != e_dec_status)
+ {
+ printf("Error in Enable SkipPB frames\n");
+ }
+
+ return e_dec_status;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : disable_skippb_frames */
+/* */
+/* Description : Control call to disable skipping of P and B frames */
+/* */
+/* */
+/* Inputs : codec_obj : Codec handle */
+/* Globals : */
+/* Processing : Calls disable P and B frame skip control */
+/* */
+/* Outputs : */
+/* Returns : Control call return i4_status */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 100189 Initial Version */
+/* */
+/*****************************************************************************/
+
+IV_API_CALL_STATUS_T disable_skippb_frames(void *codec_obj,
+ vid_dec_ctx_t *ps_app_ctx)
+{
+ ivd_ctl_set_config_ip_t s_ctl_ip;
+ ivd_ctl_set_config_op_t s_ctl_op;
+ IV_API_CALL_STATUS_T e_dec_status;
+
+ s_ctl_ip.u4_disp_wd = ps_app_ctx->u4_strd;
+ s_ctl_ip.e_frm_skip_mode = IVD_SKIP_NONE;
+
+ s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT;
+ s_ctl_ip.e_vid_dec_mode = IVD_DECODE_FRAME;
+ s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+ s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS;
+ s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t);
+ s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t);
+
+ e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip,
+ (void *)&s_ctl_op);
+ if(IV_SUCCESS != e_dec_status)
+ {
+ printf("Error in Disable SkipPB frames\n");
+ }
+
+ return e_dec_status;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : release_disp_frame */
+/* */
+/* Description : Calls release display control - Used to signal to the */
+/* decoder that this particular buffer has been displayed */
+/* and that the codec is now free to write to this buffer */
+/* */
+/* */
+/* Inputs : codec_obj : Codec Handle */
+/* buf_id : Buffer Id of the buffer to be released */
+/* This id would have been returned earlier by */
+/* the codec */
+/* Globals : */
+/* Processing : Calls Release Display call */
+/* */
+/* Outputs : */
+/* Returns : Status of release display call */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 100189 Initial Version */
+/* */
+/*****************************************************************************/
+
+IV_API_CALL_STATUS_T release_disp_frame(void *codec_obj, UWORD32 buf_id)
+{
+ ivd_rel_display_frame_ip_t s_video_rel_disp_ip;
+ ivd_rel_display_frame_op_t s_video_rel_disp_op;
+ IV_API_CALL_STATUS_T e_dec_status;
+
+ s_video_rel_disp_ip.e_cmd = IVD_CMD_REL_DISPLAY_FRAME;
+ s_video_rel_disp_ip.u4_size = sizeof(ivd_rel_display_frame_ip_t);
+ s_video_rel_disp_op.u4_size = sizeof(ivd_rel_display_frame_op_t);
+ s_video_rel_disp_ip.u4_disp_buf_id = buf_id;
+
+ e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_video_rel_disp_ip,
+ (void *)&s_video_rel_disp_op);
+ if(IV_SUCCESS != e_dec_status)
+ {
+ printf("Error in Release Disp frame\n");
+ }
+
+
+ return (e_dec_status);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : get_version */
+/* */
+/* Description : Control call to get codec version */
+/* */
+/* */
+/* Inputs : codec_obj : Codec handle */
+/* Globals : */
+/* Processing : Calls enable skip B frames control */
+/* */
+/* Outputs : */
+/* Returns : Control call return i4_status */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 100189 Initial Version */
+/* */
+/*****************************************************************************/
+
+IV_API_CALL_STATUS_T get_version(void *codec_obj)
+{
+ ivd_ctl_getversioninfo_ip_t ps_ctl_ip;
+ ivd_ctl_getversioninfo_op_t ps_ctl_op;
+ UWORD8 au1_buf[512];
+ IV_API_CALL_STATUS_T i4_status;
+ ps_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+ ps_ctl_ip.e_sub_cmd = IVD_CMD_CTL_GETVERSION;
+ ps_ctl_ip.u4_size = sizeof(ivd_ctl_getversioninfo_ip_t);
+ ps_ctl_op.u4_size = sizeof(ivd_ctl_getversioninfo_op_t);
+ ps_ctl_ip.pv_version_buffer = au1_buf;
+ ps_ctl_ip.u4_version_buffer_size = sizeof(au1_buf);
+
+ i4_status = ivd_api_function((iv_obj_t *)codec_obj,
+ (void *)&(ps_ctl_ip),
+ (void *)&(ps_ctl_op));
+
+ if(i4_status != IV_SUCCESS)
+ {
+ printf("Error in Getting Version number e_dec_status = %d u4_error_code = %x\n",
+ i4_status, ps_ctl_op.u4_error_code);
+ }
+ else
+ {
+ printf("Ittiam Decoder Version number: %s\n",
+ (char *)ps_ctl_ip.pv_version_buffer);
+ }
+ return i4_status;
+}
+/*****************************************************************************/
+/* */
+/* Function Name : codec_exit */
+/* */
+/* Description : handles unrecoverable errors */
+/* Inputs : Error message */
+/* Globals : None */
+/* Processing : Prints error message to console and exits. */
+/* Outputs : Error mesage to the console */
+/* Returns : None */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 07 06 2006 Sankar Creation */
+/* */
+/*****************************************************************************/
+void codec_exit(CHAR *pc_err_message)
+{
+ printf("%s\n", pc_err_message);
+ exit(-1);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : dump_output */
+/* */
+/* Description : Used to dump output YUV */
+/* Inputs : App context, disp output desc, File pointer */
+/* Globals : None */
+/* Processing : Dumps to a file */
+/* Returns : None */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 07 06 2006 Sankar Creation */
+/* */
+/*****************************************************************************/
+void dump_output(vid_dec_ctx_t *ps_app_ctx,
+ iv_yuv_buf_t *ps_disp_frm_buf,
+ UWORD32 u4_disp_frm_id,
+ FILE *ps_op_file,
+ FILE *ps_op_chksum_file,
+ WORD32 i4_op_frm_ts,
+ UWORD32 file_save,
+ UWORD32 chksum_save)
+
+{
+
+ UWORD32 i;
+ iv_yuv_buf_t s_dump_disp_frm_buf;
+ UWORD32 u4_disp_id;
+
+ memset(&s_dump_disp_frm_buf, 0, sizeof(iv_yuv_buf_t));
+
+ if(ps_app_ctx->u4_share_disp_buf)
+ {
+ if(ps_app_ctx->dump_q_wr_idx == MAX_DISP_BUFFERS)
+ ps_app_ctx->dump_q_wr_idx = 0;
+
+ if(ps_app_ctx->dump_q_rd_idx == MAX_DISP_BUFFERS)
+ ps_app_ctx->dump_q_rd_idx = 0;
+
+ ps_app_ctx->s_disp_frm_queue[ps_app_ctx->dump_q_wr_idx] =
+ *ps_disp_frm_buf;
+ ps_app_ctx->s_disp_frm_id_queue[ps_app_ctx->dump_q_wr_idx] =
+ u4_disp_frm_id;
+ ps_app_ctx->dump_q_wr_idx++;
+
+ if((WORD32)i4_op_frm_ts >= (WORD32)(ps_app_ctx->disp_delay - 1))
+ {
+ s_dump_disp_frm_buf =
+ ps_app_ctx->s_disp_frm_queue[ps_app_ctx->dump_q_rd_idx];
+ u4_disp_id =
+ ps_app_ctx->s_disp_frm_id_queue[ps_app_ctx->dump_q_rd_idx];
+ ps_app_ctx->dump_q_rd_idx++;
+ }
+ else
+ {
+ return;
+ }
+ }
+ else
+ {
+ s_dump_disp_frm_buf = *ps_disp_frm_buf;
+ u4_disp_id = u4_disp_frm_id;
+ }
+
+ release_disp_frame(ps_app_ctx->cocodec_obj, u4_disp_id);
+
+ if(0 == file_save && 0 == chksum_save)
+ return;
+
+ if(NULL == s_dump_disp_frm_buf.pv_y_buf)
+ return;
+
+ if(ps_app_ctx->e_output_chroma_format == IV_YUV_420P)
+ {
+#if DUMP_SINGLE_BUF
+ {
+ UWORD8 *buf = s_dump_disp_frm_buf.pv_y_buf - 80 - (s_dump_disp_frm_buf.u4_y_strd * 80);
+
+ UWORD32 i4_size = s_dump_disp_frm_buf.u4_y_strd * ((s_dump_disp_frm_buf.u4_y_ht + 160) + (s_dump_disp_frm_buf.u4_u_ht + 80));
+ fwrite(buf, 1, i4_size ,ps_op_file);
+
+ }
+#else
+ if(0 != file_save)
+ {
+ UWORD8 *buf;
+
+ buf = (UWORD8 *)s_dump_disp_frm_buf.pv_y_buf;
+ for(i = 0; i < s_dump_disp_frm_buf.u4_y_ht; i++)
+ {
+ fwrite(buf, 1, s_dump_disp_frm_buf.u4_y_wd, ps_op_file);
+ buf += s_dump_disp_frm_buf.u4_y_strd;
+ }
+
+ buf = (UWORD8 *)s_dump_disp_frm_buf.pv_u_buf;
+ for(i = 0; i < s_dump_disp_frm_buf.u4_u_ht; i++)
+ {
+ fwrite(buf, 1, s_dump_disp_frm_buf.u4_u_wd, ps_op_file);
+ buf += s_dump_disp_frm_buf.u4_u_strd;
+ }
+ buf = (UWORD8 *)s_dump_disp_frm_buf.pv_v_buf;
+ for(i = 0; i < s_dump_disp_frm_buf.u4_v_ht; i++)
+ {
+ fwrite(buf, 1, s_dump_disp_frm_buf.u4_v_wd, ps_op_file);
+ buf += s_dump_disp_frm_buf.u4_v_strd;
+ }
+
+ }
+
+ if(0 != chksum_save)
+ {
+ UWORD8 au1_y_chksum[16];
+ UWORD8 au1_u_chksum[16];
+ UWORD8 au1_v_chksum[16];
+ calc_md5_cksum((UWORD8 *)s_dump_disp_frm_buf.pv_y_buf,
+ s_dump_disp_frm_buf.u4_y_strd,
+ s_dump_disp_frm_buf.u4_y_wd,
+ s_dump_disp_frm_buf.u4_y_ht,
+ au1_y_chksum);
+ calc_md5_cksum((UWORD8 *)s_dump_disp_frm_buf.pv_u_buf,
+ s_dump_disp_frm_buf.u4_u_strd,
+ s_dump_disp_frm_buf.u4_u_wd,
+ s_dump_disp_frm_buf.u4_u_ht,
+ au1_u_chksum);
+ calc_md5_cksum((UWORD8 *)s_dump_disp_frm_buf.pv_v_buf,
+ s_dump_disp_frm_buf.u4_v_strd,
+ s_dump_disp_frm_buf.u4_v_wd,
+ s_dump_disp_frm_buf.u4_v_ht,
+ au1_v_chksum);
+
+ fwrite(au1_y_chksum, sizeof(UWORD8), 16, ps_op_chksum_file);
+ fwrite(au1_u_chksum, sizeof(UWORD8), 16, ps_op_chksum_file);
+ fwrite(au1_v_chksum, sizeof(UWORD8), 16, ps_op_chksum_file);
+ }
+#endif
+ }
+ else if((ps_app_ctx->e_output_chroma_format == IV_YUV_420SP_UV)
+ || (ps_app_ctx->e_output_chroma_format == IV_YUV_420SP_VU))
+ {
+#if DUMP_SINGLE_BUF
+ {
+
+ UWORD8 *buf = s_dump_disp_frm_buf.pv_y_buf - 24 - (s_dump_disp_frm_buf.u4_y_strd * 40);
+
+ UWORD32 i4_size = s_dump_disp_frm_buf.u4_y_strd * ((s_dump_disp_frm_buf.u4_y_ht + 80) + (s_dump_disp_frm_buf.u4_u_ht + 40));
+ fwrite(buf, 1, i4_size ,ps_op_file);
+ }
+#else
+ {
+ UWORD8 *buf;
+
+ buf = (UWORD8 *)s_dump_disp_frm_buf.pv_y_buf;
+ for(i = 0; i < s_dump_disp_frm_buf.u4_y_ht; i++)
+ {
+ fwrite(buf, 1, s_dump_disp_frm_buf.u4_y_wd, ps_op_file);
+ buf += s_dump_disp_frm_buf.u4_y_strd;
+ }
+
+ buf = (UWORD8 *)s_dump_disp_frm_buf.pv_u_buf;
+ for(i = 0; i < s_dump_disp_frm_buf.u4_u_ht; i++)
+ {
+ fwrite(buf, 1, s_dump_disp_frm_buf.u4_u_wd, ps_op_file);
+ buf += s_dump_disp_frm_buf.u4_u_strd;
+ }
+ }
+#endif
+ }
+ else if(ps_app_ctx->e_output_chroma_format == IV_RGBA_8888)
+ {
+ UWORD8 *buf;
+
+ buf = (UWORD8 *)s_dump_disp_frm_buf.pv_y_buf;
+ for(i = 0; i < s_dump_disp_frm_buf.u4_y_ht; i++)
+ {
+ fwrite(buf, 1, s_dump_disp_frm_buf.u4_y_wd * 4, ps_op_file);
+ buf += s_dump_disp_frm_buf.u4_y_strd * 4;
+ }
+ }
+ else
+ {
+ UWORD8 *buf;
+
+ buf = (UWORD8 *)s_dump_disp_frm_buf.pv_y_buf;
+ for(i = 0; i < s_dump_disp_frm_buf.u4_y_ht; i++)
+ {
+ fwrite(buf, 1, s_dump_disp_frm_buf.u4_y_strd * 2, ps_op_file);
+ buf += s_dump_disp_frm_buf.u4_y_strd * 2;
+ }
+ }
+
+ fflush(ps_op_file);
+ fflush(ps_op_chksum_file);
+
+}
+
+
+/*****************************************************************************/
+/* */
+/* Function Name : print_usage */
+/* */
+/* Description : Prints argument format */
+/* */
+/* */
+/* Inputs : */
+/* Globals : */
+/* Processing : Prints argument format */
+/* */
+/* Outputs : */
+/* Returns : */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 100189 Initial Version */
+/* */
+/*****************************************************************************/
+
+void print_usage(void)
+{
+ WORD32 i = 0;
+ WORD32 num_entries = sizeof(argument_mapping) / sizeof(argument_t);
+ printf("\nUsage:\n");
+ while(i < num_entries)
+ {
+ printf("%-32s\t %s", argument_mapping[i].argument_name,
+ argument_mapping[i].description);
+ i++;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : get_argument */
+/* */
+/* Description : Gets argument for a given string */
+/* */
+/* */
+/* Inputs : name */
+/* Globals : */
+/* Processing : Searches the given string in the array and returns */
+/* appropriate argument ID */
+/* */
+/* Outputs : Argument ID */
+/* Returns : Argument ID */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 100189 Initial Version */
+/* */
+/*****************************************************************************/
+
+ARGUMENT_T get_argument(CHAR *name)
+{
+ WORD32 i = 0;
+ WORD32 num_entries = sizeof(argument_mapping) / sizeof(argument_t);
+ while(i < num_entries)
+ {
+ if((0 == strcmp(argument_mapping[i].argument_name, name)) ||
+ ((0 == strcmp(argument_mapping[i].argument_shortname, name)) &&
+ (0 != strcmp(argument_mapping[i].argument_shortname, "--"))))
+ {
+ return argument_mapping[i].argument;
+ }
+ i++;
+ }
+ return INVALID;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : get_argument */
+/* */
+/* Description : Gets argument for a given string */
+/* */
+/* */
+/* Inputs : name */
+/* Globals : */
+/* Processing : Searches the given string in the array and returns */
+/* appropriate argument ID */
+/* */
+/* Outputs : Argument ID */
+/* Returns : Argument ID */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 100189 Initial Version */
+/* */
+/*****************************************************************************/
+
+void parse_argument(vid_dec_ctx_t *ps_app_ctx, CHAR *argument, CHAR *value)
+{
+ ARGUMENT_T arg;
+
+ arg = get_argument(argument);
+ switch(arg)
+ {
+ case HELP:
+ print_usage();
+ exit(-1);
+ case VERSION:
+ break;
+ case INPUT_FILE:
+ sscanf(value, "%s", ps_app_ctx->ac_ip_fname);
+ //input_passed = 1;
+ break;
+
+ case OUTPUT:
+ sscanf(value, "%s", ps_app_ctx->ac_op_fname);
+ break;
+
+ case CHKSUM:
+ sscanf(value, "%s", ps_app_ctx->ac_op_chksum_fname);
+ break;
+
+ case SAVE_OUTPUT:
+ sscanf(value, "%d", &ps_app_ctx->u4_file_save_flag);
+ break;
+
+ case SAVE_CHKSUM:
+ sscanf(value, "%d", &ps_app_ctx->u4_chksum_save_flag);
+ break;
+
+ case CHROMA_FORMAT:
+ if((strcmp(value, "YUV_420P")) == 0)
+ ps_app_ctx->e_output_chroma_format = IV_YUV_420P;
+ else if((strcmp(value, "YUV_422ILE")) == 0)
+ ps_app_ctx->e_output_chroma_format = IV_YUV_422ILE;
+ else if((strcmp(value, "RGB_565")) == 0)
+ ps_app_ctx->e_output_chroma_format = IV_RGB_565;
+ else if((strcmp(value, "RGBA_8888")) == 0)
+ ps_app_ctx->e_output_chroma_format = IV_RGBA_8888;
+ else if((strcmp(value, "YUV_420SP_UV")) == 0)
+ ps_app_ctx->e_output_chroma_format = IV_YUV_420SP_UV;
+ else if((strcmp(value, "YUV_420SP_VU")) == 0)
+ ps_app_ctx->e_output_chroma_format = IV_YUV_420SP_VU;
+ else
+ {
+ printf("\nInvalid colour format setting it to IV_YUV_420P\n");
+ ps_app_ctx->e_output_chroma_format = IV_YUV_420P;
+ }
+
+ break;
+ case NUM_FRAMES:
+ sscanf(value, "%d", &ps_app_ctx->u4_max_frm_ts);
+ break;
+
+ case NUM_CORES:
+ sscanf(value, "%d", &ps_app_ctx->u4_num_cores);
+ break;
+ case DEGRADE_PICS:
+ sscanf(value, "%d", &ps_app_ctx->i4_degrade_pics);
+ break;
+ case DEGRADE_TYPE:
+ sscanf(value, "%d", &ps_app_ctx->i4_degrade_type);
+ break;
+ case SHARE_DISPLAY_BUF:
+ sscanf(value, "%d", &ps_app_ctx->u4_share_disp_buf);
+ break;
+ case LOOPBACK:
+ sscanf(value, "%d", &ps_app_ctx->loopback);
+ break;
+ case DISPLAY:
+#if defined(SDL_DISPLAY) || defined(FBDEV_DISPLAY) || defined(INTEL_CE5300) || defined(IOS_DISPLAY)
+ sscanf(value, "%d", &ps_app_ctx->display);
+#else
+ ps_app_ctx->display = 0;
+#endif
+ break;
+ case FULLSCREEN:
+ sscanf(value, "%d", &ps_app_ctx->full_screen);
+ break;
+ case FPS:
+ sscanf(value, "%d", &ps_app_ctx->fps);
+ if(ps_app_ctx->fps <= 0)
+ ps_app_ctx->fps = DEFAULT_FPS;
+ break;
+ case MAX_WD:
+ sscanf(value, "%d", &ps_app_ctx->max_wd);
+ break;
+ case MAX_HT:
+ sscanf(value, "%d", &ps_app_ctx->max_ht);
+ break;
+ case MAX_LEVEL:
+ sscanf(value, "%d", &ps_app_ctx->max_level);
+ break;
+ case ARCH:
+ if((strcmp(value, "ARM_NONEON")) == 0)
+ ps_app_ctx->e_arch = ARCH_ARM_NONEON;
+ else if((strcmp(value, "ARM_A9Q")) == 0)
+ ps_app_ctx->e_arch = ARCH_ARM_A9Q;
+ else if((strcmp(value, "ARM_A7")) == 0)
+ ps_app_ctx->e_arch = ARCH_ARM_A7;
+ else if((strcmp(value, "ARM_A5")) == 0)
+ ps_app_ctx->e_arch = ARCH_ARM_A5;
+ else if((strcmp(value, "ARM_NEONINTR")) == 0)
+ ps_app_ctx->e_arch = ARCH_ARM_NEONINTR;
+ else if((strcmp(value, "X86_GENERIC")) == 0)
+ ps_app_ctx->e_arch = ARCH_X86_GENERIC;
+ else if((strcmp(value, "X86_SSSE3")) == 0)
+ ps_app_ctx->e_arch = ARCH_X86_SSSE3;
+ else if((strcmp(value, "X86_SSE42")) == 0)
+ ps_app_ctx->e_arch = ARCH_X86_SSE42;
+ else if((strcmp(value, "X86_AVX2")) == 0)
+ ps_app_ctx->e_arch = ARCH_X86_AVX2;
+ else if((strcmp(value, "MIPS_GENERIC")) == 0)
+ ps_app_ctx->e_arch = ARCH_MIPS_GENERIC;
+ else if((strcmp(value, "MIPS_32")) == 0)
+ ps_app_ctx->e_arch = ARCH_MIPS_32;
+ else if((strcmp(value, "ARMV8_GENERIC")) == 0)
+ ps_app_ctx->e_arch = ARCH_ARMV8_GENERIC;
+ else
+ {
+ printf("\nInvalid Arch. Setting it to ARM_A9Q\n");
+ ps_app_ctx->e_arch = ARCH_ARM_A9Q;
+ }
+
+ break;
+ case SOC:
+ if((strcmp(value, "GENERIC")) == 0)
+ ps_app_ctx->e_soc = SOC_GENERIC;
+ else if((strcmp(value, "HISI_37X")) == 0)
+ ps_app_ctx->e_soc = SOC_HISI_37X;
+ else
+ {
+ ps_app_ctx->e_soc = atoi(value);
+/*
+ printf("\nInvalid SOC. Setting it to GENERIC\n");
+ ps_app_ctx->e_soc = SOC_GENERIC;
+*/
+ }
+ break;
+ case PICLEN:
+ sscanf(value, "%d", &ps_app_ctx->u4_piclen_flag);
+ break;
+
+ case PICLEN_FILE:
+ sscanf(value, "%s", ps_app_ctx->ac_piclen_fname);
+ break;
+ case DISABLE_DEBLOCK_LEVEL:
+ sscanf(value, "%d", &ps_app_ctx->u4_disable_dblk_level);
+ break;
+
+ case INVALID:
+ default:
+ printf("Ignoring argument : %s\n", argument);
+ break;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : read_cfg_file */
+/* */
+/* Description : Reads arguments from a configuration file */
+/* */
+/* */
+/* Inputs : ps_app_ctx : Application context */
+/* fp_cfg_file : Configuration file handle */
+/* Globals : */
+/* Processing : Parses the arguments and fills in the application context*/
+/* */
+/* Outputs : Arguments parsed */
+/* Returns : None */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 100189 Initial Version */
+/* */
+/*****************************************************************************/
+
+void read_cfg_file(vid_dec_ctx_t *ps_app_ctx, FILE *fp_cfg_file)
+{
+
+ CHAR line[STRLENGTH];
+ CHAR description[STRLENGTH];
+ CHAR value[STRLENGTH];
+ CHAR argument[STRLENGTH];
+ void *ret;
+ while(0 == feof(fp_cfg_file))
+ {
+ line[0] = '\0';
+ ret = fgets(line, STRLENGTH, fp_cfg_file);
+ if(NULL == ret)
+ break;
+ argument[0] = '\0';
+ /* Reading Input File Name */
+ sscanf(line, "%s %s %s", argument, value, description);
+ if(argument[0] == '\0')
+ continue;
+
+ parse_argument(ps_app_ctx, argument, value);
+ }
+
+
+}
+
+/*!
+**************************************************************************
+* \if Function name : dispq_producer_dequeue \endif
+*
+* \brief
+* This function gets a free buffer index where display data can be written
+* This is a blocking call and can be exited by setting quit to true in
+* the application context
+*
+* \param[in] ps_app_ctx : Pointer to application context
+*
+* \return
+* returns Next free buffer index for producer
+*
+* \author
+* Ittiam
+*
+**************************************************************************
+*/
+WORD32 dispq_producer_dequeue(vid_dec_ctx_t *ps_app_ctx)
+{
+ WORD32 idx;
+
+ /* If there is no free buffer wait */
+
+ while(((ps_app_ctx->disp_q_wr_idx + 1) % NUM_DISPLAY_BUFFERS) == ps_app_ctx->disp_q_rd_idx)
+ {
+
+ ithread_msleep(1);
+
+ if(ps_app_ctx->quit)
+ return(-1);
+ }
+
+ idx = ps_app_ctx->disp_q_wr_idx;
+ return (idx);
+}
+
+/*!
+**************************************************************************
+* \if Function name : dispq_producer_queue \endif
+*
+* \brief
+* This function adds buffer which can be displayed
+*
+* \param[in] ps_app_ctx : Pointer to application context
+*
+* \return
+* returns Next free buffer index for producer
+*
+* \author
+* Ittiam
+*
+**************************************************************************
+*/
+WORD32 dispq_producer_queue(vid_dec_ctx_t *ps_app_ctx)
+{
+ ps_app_ctx->disp_q_wr_idx++;
+ if(ps_app_ctx->disp_q_wr_idx == NUM_DISPLAY_BUFFERS)
+ ps_app_ctx->disp_q_wr_idx = 0;
+
+ return (0);
+}
+/*!
+**************************************************************************
+* \if Function name : dispq_consumer_dequeue \endif
+*
+* \brief
+* This function gets a free buffer index where display data can be written
+* This is a blocking call and can be exited by setting quit to true in
+* the application context
+*
+* \param[in] ps_app_ctx : Pointer to application context
+*
+* \return
+* returns Next free buffer index for producer
+*
+* \author
+* Ittiam
+*
+**************************************************************************
+*/
+WORD32 dispq_consumer_dequeue(vid_dec_ctx_t *ps_app_ctx)
+{
+ WORD32 idx;
+
+ /* If there is no free buffer wait */
+
+ while(ps_app_ctx->disp_q_wr_idx == ps_app_ctx->disp_q_rd_idx)
+ {
+
+ ithread_msleep(1);
+
+ if(ps_app_ctx->quit)
+ return(-1);
+ }
+
+ idx = ps_app_ctx->disp_q_rd_idx;
+ return (idx);
+}
+
+/*!
+**************************************************************************
+* \if Function name : dispq_producer_queue \endif
+*
+* \brief
+* This function adds buffer which can be displayed
+*
+* \param[in] ps_app_ctx : Pointer to application context
+*
+* \return
+* returns Next free buffer index for producer
+*
+* \author
+* Ittiam
+*
+**************************************************************************
+*/
+WORD32 dispq_consumer_queue(vid_dec_ctx_t *ps_app_ctx)
+{
+ ps_app_ctx->disp_q_rd_idx++;
+ if(ps_app_ctx->disp_q_rd_idx == NUM_DISPLAY_BUFFERS)
+ ps_app_ctx->disp_q_rd_idx = 0;
+
+ return (0);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : display_thread */
+/* */
+/* Description : Thread to display the frame */
+/* */
+/* */
+/* Inputs : pv_ctx : Application context */
+/* */
+/* Globals : */
+/* Processing : Wait for a buffer to get produced by decoder and display */
+/* that frame */
+/* */
+/* Outputs : */
+/* Returns : None */
+/* */
+/* Issues : Pause followed by quit is making some deadlock condn */
+/* If decoder was lagging initially and then fasten up, */
+/* display will also go at faster rate till it reaches */
+/* equilibrium wrt the initial time */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 05 2013 100578 Initial Version */
+/* */
+/*****************************************************************************/
+
+WORD32 display_thread(void *pv_ctx)
+{
+ vid_dec_ctx_t *ps_app_ctx = (vid_dec_ctx_t *) pv_ctx;
+
+
+ UWORD32 frm_duration; /* in us */
+ UWORD32 current_time;
+ UWORD32 expected_time;
+ TIMER s_end_timer;
+ TIMER s_first_frame_time;
+ UWORD32 first_frame_displayed;
+
+#ifdef WINDOWS_TIMER
+ TIMER frequency;
+#endif
+
+#ifdef WINDOWS_TIMER
+ QueryPerformanceFrequency ( &frequency);
+#endif
+ first_frame_displayed = 0;
+ expected_time = 0;
+ frm_duration = 1000000/ps_app_ctx->fps;
+
+ /* Init display and allocate display buffers */
+ ps_app_ctx->pv_disp_ctx = (void *)ps_app_ctx->disp_init(ps_app_ctx->u4_pic_wd,
+ ps_app_ctx->u4_pic_ht,
+ ps_app_ctx->i4_screen_wd,
+ ps_app_ctx->i4_screen_ht,
+ ps_app_ctx->max_wd,
+ ps_app_ctx->max_ht,
+ ps_app_ctx->full_screen,
+ &ps_app_ctx->quit,
+ &ps_app_ctx->paused);
+ ps_app_ctx->alloc_disp_buffers(ps_app_ctx->pv_disp_ctx);
+
+ ps_app_ctx->display_init_done = 1;
+
+ while(1)
+ {
+ WORD32 rd_idx;
+
+ rd_idx = dispq_consumer_dequeue(ps_app_ctx);
+ if (ps_app_ctx->quit)
+ break;
+
+ ps_app_ctx->display_buffer(ps_app_ctx->pv_disp_ctx, rd_idx);
+
+ if(0 == first_frame_displayed)
+ {
+ GETTIME(&s_first_frame_time);
+ first_frame_displayed = 1;
+ }
+
+ /*********************************************************************/
+ /* Sleep based on the expected time of arrival of current buffer and */
+ /* the Current frame */
+ /*********************************************************************/
+
+ GETTIME(&s_end_timer);
+ ELAPSEDTIME(s_first_frame_time,s_end_timer,current_time,frequency);
+
+ /* time in micro second */
+ expected_time += frm_duration;
+
+ //printf("current_time %d expected_time %d diff %d \n", current_time, expected_time, (expected_time - current_time));
+ /* sleep for the diff. in time */
+ if(current_time < expected_time)
+ ps_app_ctx->disp_usleep((expected_time - current_time));
+ else
+ expected_time += (current_time - expected_time);
+
+ dispq_consumer_queue(ps_app_ctx);
+
+ }
+
+
+ while(0 == ps_app_ctx->display_deinit_flag)
+ {
+ ps_app_ctx->disp_usleep(1000);
+ }
+ ps_app_ctx->disp_deinit(ps_app_ctx->pv_disp_ctx);
+
+ /* destroy the display thread */
+ ithread_exit(ps_app_ctx->display_thread_handle);
+
+ return 0;
+}
+
+void output_write_stall(CHAR *fname, UWORD32 cur_frm_idx)
+{
+ const UWORD8 threshold = 64;
+ CHAR past_fname[1000];
+ FILE *fp_fast_file = NULL;
+
+ if (cur_frm_idx >= threshold)
+ {
+ sprintf(past_fname, fname, cur_frm_idx - threshold);
+ do
+ {
+ fp_fast_file = fopen(past_fname,"rb");
+ if (fp_fast_file != NULL)
+ {
+ fclose(fp_fast_file);
+ /* Wait until the resource is released by a third party app*/
+ ithread_msleep(5);
+ }
+ else
+ break;
+ } while(1);
+ }
+}
+
+void flush_output(iv_obj_t *codec_obj,
+ vid_dec_ctx_t *ps_app_ctx,
+ ivd_out_bufdesc_t *ps_out_buf,
+ UWORD8 *pu1_bs_buf,
+ UWORD32 *pu4_op_frm_ts,
+ FILE *ps_op_file,
+ FILE *ps_op_chksum_file,
+ UWORD32 u4_ip_frm_ts,
+ UWORD32 u4_bytes_remaining)
+{
+ WORD32 ret;
+
+ do
+ {
+
+ ivd_ctl_flush_ip_t s_ctl_ip;
+ ivd_ctl_flush_op_t s_ctl_op;
+
+ if(*pu4_op_frm_ts >= (ps_app_ctx->u4_max_frm_ts + ps_app_ctx->disp_delay))
+ break;
+
+ s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+ s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_FLUSH;
+ s_ctl_ip.u4_size = sizeof(ivd_ctl_flush_ip_t);
+ s_ctl_op.u4_size = sizeof(ivd_ctl_flush_op_t);
+ ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip,
+ (void *)&s_ctl_op);
+
+ if(ret != IV_SUCCESS)
+ {
+ printf("Error in Setting the decoder in flush mode\n");
+ }
+
+ if(IV_SUCCESS == ret)
+ {
+ ivd_video_decode_ip_t s_video_decode_ip;
+ ivd_video_decode_op_t s_video_decode_op;
+
+ s_video_decode_ip.e_cmd = IVD_CMD_VIDEO_DECODE;
+ s_video_decode_ip.u4_ts = u4_ip_frm_ts;
+ s_video_decode_ip.pv_stream_buffer = pu1_bs_buf;
+ s_video_decode_ip.u4_num_Bytes = u4_bytes_remaining;
+ s_video_decode_ip.u4_size = sizeof(ivd_video_decode_ip_t);
+ s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[0] =
+ ps_out_buf->u4_min_out_buf_size[0];
+ s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[1] =
+ ps_out_buf->u4_min_out_buf_size[1];
+ s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[2] =
+ ps_out_buf->u4_min_out_buf_size[2];
+
+ s_video_decode_ip.s_out_buffer.pu1_bufs[0] =
+ ps_out_buf->pu1_bufs[0];
+ s_video_decode_ip.s_out_buffer.pu1_bufs[1] =
+ ps_out_buf->pu1_bufs[1];
+ s_video_decode_ip.s_out_buffer.pu1_bufs[2] =
+ ps_out_buf->pu1_bufs[2];
+ s_video_decode_ip.s_out_buffer.u4_num_bufs =
+ ps_out_buf->u4_num_bufs;
+
+ s_video_decode_op.u4_size = sizeof(ivd_video_decode_op_t);
+
+ /*****************************************************************************/
+ /* API Call: Video Decode */
+ /*****************************************************************************/
+ ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_video_decode_ip,
+ (void *)&s_video_decode_op);
+
+ if(1 == s_video_decode_op.u4_output_present)
+ {
+ CHAR cur_fname[1000];
+ CHAR *extn = NULL;
+ /* The objective is to dump the decoded frames into separate files instead of
+ * dumping all the frames in one common file. Also, the number of dumped frames
+ * at any given instance of time cannot exceed 'frame_memory'
+ */
+ if(ps_app_ctx->u4_file_save_flag)
+ {
+ /* Locate the position of extension yuv */
+ extn = strstr(ps_app_ctx->ac_op_fname,"%d");
+ if (extn != NULL)
+ {
+ output_write_stall(ps_app_ctx->ac_op_fname,*pu4_op_frm_ts);
+ /* Generate output file names */
+ sprintf(cur_fname,ps_app_ctx->ac_op_fname,*pu4_op_frm_ts);
+ /* Open Output file */
+ ps_op_file = fopen(cur_fname,"wb");
+ if (NULL == ps_op_file)
+ {
+ CHAR ac_error_str[STRLENGTH];
+ sprintf(ac_error_str, "Could not open output file %s",
+ cur_fname);
+
+ codec_exit(ac_error_str);
+ }
+ }
+ }
+
+ dump_output(ps_app_ctx, &(s_video_decode_op.s_disp_frm_buf),
+ s_video_decode_op.u4_disp_buf_id, ps_op_file,
+ ps_op_chksum_file,
+ *pu4_op_frm_ts, ps_app_ctx->u4_file_save_flag,
+ ps_app_ctx->u4_chksum_save_flag);
+ if (extn != NULL)
+ fclose(ps_op_file);
+ (*pu4_op_frm_ts)++;
+ }
+ }
+ }
+ while(IV_SUCCESS == ret);
+
+}
+
+#ifdef X86_MINGW
+void sigsegv_handler()
+{
+ printf("Segmentation fault, Exiting.. \n");
+ exit(-1);
+}
+#endif
+
+UWORD32 default_get_stride(void)
+{
+ return 0;
+}
+
+
+IV_COLOR_FORMAT_T default_get_color_fmt(void)
+{
+ return IV_YUV_420P;
+}
+/*****************************************************************************/
+/* */
+/* Function Name : main */
+/* */
+/* Description : Application to demonstrate codec API */
+/* */
+/* */
+/* Inputs : argc - Number of arguments */
+/* argv[] - Arguments */
+/* Globals : */
+/* Processing : Shows how to use create, process, control and delete */
+/* */
+/* Outputs : Codec output in a file */
+/* Returns : */
+/* */
+/* Issues : Assumes both PROFILE_ENABLE to be */
+/* defined for multithread decode-display working */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 100189 Initial Version */
+/* 09 05 2013 100578 Multithread decode-display */
+/*****************************************************************************/
+#ifdef IOS
+int h264dec_main(char * homedir,char *documentdir, int screen_wd, int screen_ht)
+#else
+int main(WORD32 argc, CHAR *argv[])
+#endif
+{
+ CHAR ac_cfg_fname[STRLENGTH];
+ FILE *fp_cfg_file = NULL;
+ FILE *ps_piclen_file = NULL;
+ FILE *ps_ip_file = NULL;
+ FILE *ps_op_file = NULL;
+ FILE *ps_op_chksum_file = NULL;
+ WORD32 ret;
+ CHAR ac_error_str[STRLENGTH];
+ vid_dec_ctx_t s_app_ctx;
+ UWORD8 *pu1_bs_buf;
+
+ ivd_out_bufdesc_t *ps_out_buf;
+ UWORD32 u4_num_bytes_dec = 0;
+ UWORD32 file_pos = 0;
+ IV_API_CALL_STATUS_T e_dec_status;
+ UWORD32 u4_ip_frm_ts = 0, u4_op_frm_ts = 0;
+
+ WORD32 u4_bytes_remaining = 0;
+ void *pv_mem_rec_location;
+ UWORD32 u4_num_mem_recs;
+ UWORD32 i;
+ UWORD32 u4_ip_buf_len;
+ UWORD32 frm_cnt = 0;
+ WORD32 total_bytes_comsumed;
+ UWORD32 max_op_frm_ts;
+
+#ifdef PROFILE_ENABLE
+ UWORD32 u4_tot_cycles = 0;
+ UWORD32 u4_tot_fmt_cycles = 0;
+ UWORD32 peak_window[PEAK_WINDOW_SIZE];
+ UWORD32 peak_window_idx = 0;
+ UWORD32 peak_avg_max = 0;
+#ifdef INTEL_CE5300
+ UWORD32 time_consumed = 0;
+ UWORD32 bytes_consumed = 0;
+#endif
+#endif
+
+#ifdef WINDOWS_TIMER
+ TIMER frequency;
+#endif
+ WORD32 width = 0, height = 0;
+ iv_obj_t *codec_obj;
+#if defined(GPU_BUILD) && !defined(X86)
+// int ioctl_init();
+// ioctl_init();
+#endif
+
+#ifdef X86_MINGW
+ //For getting printfs without any delay
+ setvbuf(stdout, NULL, _IONBF, 0);
+ setvbuf(stderr, NULL, _IONBF, 0);
+#endif
+#ifdef IOS
+ sprintf(filename_trace, "%s/iostrace.txt", homedir );
+ printf("\ntrace file name = %s",filename_trace);
+#endif
+
+#ifdef X86_MINGW
+ {
+ signal(SIGSEGV, sigsegv_handler);
+ }
+#endif
+
+
+#ifndef IOS
+ /* Usage */
+ if(argc < 2)
+ {
+ printf("Using test.cfg as configuration file \n");
+ strcpy(ac_cfg_fname, "test.cfg");
+ }
+ else if(argc == 2)
+ {
+ strcpy(ac_cfg_fname, argv[1]);
+ }
+
+#else
+ strcpy(ac_cfg_fname, "test.cfg");
+
+#endif
+
+
+ /***********************************************************************/
+ /* Initialize Application parameters */
+ /***********************************************************************/
+
+ strcpy(s_app_ctx.ac_ip_fname, "\0");
+ s_app_ctx.dump_q_wr_idx = 0;
+ s_app_ctx.dump_q_rd_idx = 0;
+ s_app_ctx.display_thread_created = 0;
+ s_app_ctx.disp_q_wr_idx = 0;
+ s_app_ctx.disp_q_rd_idx = 0;
+ s_app_ctx.disp_delay = 0;
+ s_app_ctx.loopback = 0;
+ s_app_ctx.display = 0;
+ s_app_ctx.full_screen = 0;
+ s_app_ctx.u4_piclen_flag = 0;
+ s_app_ctx.fps = DEFAULT_FPS;
+ file_pos = 0;
+ total_bytes_comsumed = 0;
+ u4_ip_frm_ts = 0;
+ u4_op_frm_ts = 0;
+#ifdef PROFILE_ENABLE
+ memset(peak_window, 0, sizeof(WORD32) * PEAK_WINDOW_SIZE);
+#endif
+ s_app_ctx.u4_share_disp_buf = DEFAULT_SHARE_DISPLAY_BUF;
+ s_app_ctx.u4_num_cores = DEFAULT_NUM_CORES;
+ s_app_ctx.i4_degrade_type = 0;
+ s_app_ctx.i4_degrade_pics = 0;
+ s_app_ctx.max_wd = 0;
+ s_app_ctx.max_ht = 0;
+ s_app_ctx.max_level = 0;
+ s_app_ctx.e_arch = ARCH_ARM_A9Q;
+ s_app_ctx.e_soc = SOC_GENERIC;
+
+ s_app_ctx.u4_strd = STRIDE;
+
+ s_app_ctx.display_thread_handle = malloc(ithread_get_handle_size());
+ s_app_ctx.quit = 0;
+ s_app_ctx.paused = 0;
+ //s_app_ctx.u4_output_present = 0;
+
+ s_app_ctx.get_stride = &default_get_stride;
+
+ s_app_ctx.get_color_fmt = &default_get_color_fmt;
+
+ /* Set function pointers for display */
+#ifdef SDL_DISPLAY
+ s_app_ctx.disp_init = &sdl_disp_init;
+ s_app_ctx.alloc_disp_buffers = &sdl_alloc_disp_buffers;
+ s_app_ctx.display_buffer = &sdl_display;
+ s_app_ctx.set_disp_buffers = &sdl_set_disp_buffers;
+ s_app_ctx.disp_deinit = &sdl_disp_deinit;
+ s_app_ctx.disp_usleep = &sdl_disp_usleep;
+ s_app_ctx.get_color_fmt = &sdl_get_color_fmt;
+ s_app_ctx.get_stride = &sdl_get_stride;
+#endif
+
+#ifdef FBDEV_DISPLAY
+ s_app_ctx.disp_init = &fbd_disp_init;
+ s_app_ctx.alloc_disp_buffers = &fbd_alloc_disp_buffers;
+ s_app_ctx.display_buffer = &fbd_display;
+ s_app_ctx.set_disp_buffers = &fbd_set_disp_buffers;
+ s_app_ctx.disp_deinit = &fbd_disp_deinit;
+ s_app_ctx.disp_usleep = &fbd_disp_usleep;
+ s_app_ctx.get_color_fmt = &fbd_get_color_fmt;
+ s_app_ctx.get_stride = &fbd_get_stride;
+#endif
+
+#ifdef INTEL_CE5300
+ s_app_ctx.disp_init = &gdl_disp_init;
+ s_app_ctx.alloc_disp_buffers = &gdl_alloc_disp_buffers;
+ s_app_ctx.display_buffer = &gdl_display;
+ s_app_ctx.set_disp_buffers = &gdl_set_disp_buffers;
+ s_app_ctx.disp_deinit = &gdl_disp_deinit;
+ s_app_ctx.disp_usleep = &gdl_disp_usleep;
+ s_app_ctx.get_color_fmt = &gdl_get_color_fmt;
+ s_app_ctx.get_stride = &gdl_get_stride;
+#endif
+
+#ifdef IOS_DISPLAY
+ s_app_ctx.disp_init = &ios_disp_init;
+ s_app_ctx.alloc_disp_buffers = &ios_alloc_disp_buffers;
+ s_app_ctx.display_buffer = &ios_display;
+ s_app_ctx.set_disp_buffers = &ios_set_disp_buffers;
+ s_app_ctx.disp_deinit = &ios_disp_deinit;
+ s_app_ctx.disp_usleep = &ios_disp_usleep;
+ s_app_ctx.get_color_fmt = &ios_get_color_fmt;
+ s_app_ctx.get_stride = &ios_get_stride;
+#endif
+
+ s_app_ctx.display_deinit_flag = 0;
+ s_app_ctx.e_output_chroma_format = IV_YUV_420SP_UV;
+ /*************************************************************************/
+ /* Parse arguments */
+ /*************************************************************************/
+
+#ifndef IOS
+ /* Read command line arguments */
+ if(argc > 2)
+ {
+ for(i = 1; i < (UWORD32)argc; i += 2)
+ {
+ if(CONFIG == get_argument(argv[i]))
+ {
+ strcpy(ac_cfg_fname, argv[i + 1]);
+ if((fp_cfg_file = fopen(ac_cfg_fname, "r")) == NULL)
+ {
+ sprintf(ac_error_str, "Could not open Configuration file %s",
+ ac_cfg_fname);
+ codec_exit(ac_error_str);
+ }
+ read_cfg_file(&s_app_ctx, fp_cfg_file);
+ fclose(fp_cfg_file);
+ }
+ else
+ {
+ parse_argument(&s_app_ctx, argv[i], argv[i + 1]);
+ }
+ }
+ }
+ else
+ {
+ if((fp_cfg_file = fopen(ac_cfg_fname, "r")) == NULL)
+ {
+ sprintf(ac_error_str, "Could not open Configuration file %s",
+ ac_cfg_fname);
+ codec_exit(ac_error_str);
+ }
+ read_cfg_file(&s_app_ctx, fp_cfg_file);
+ fclose(fp_cfg_file);
+ }
+#else
+ sprintf(filename_with_path, "%s/%s", homedir, ac_cfg_fname);
+ if((fp_cfg_file = fopen(filename_with_path, "r")) == NULL)
+ {
+ sprintf(ac_error_str, "Could not open Configuration file %s",
+ ac_cfg_fname);
+ codec_exit(ac_error_str);
+
+ }
+ read_cfg_file(&s_app_ctx, fp_cfg_file);
+ fclose(fp_cfg_file);
+
+#endif
+#ifdef PRINT_PICSIZE
+ /* If the binary is used for only getting number of bytes in each picture, then disable the following features */
+ s_app_ctx.u4_piclen_flag = 0;
+ s_app_ctx.u4_file_save_flag = 0;
+ s_app_ctx.u4_chksum_save_flag = 0;
+ s_app_ctx.i4_degrade_pics = 0;
+ s_app_ctx.i4_degrade_type = 0;
+ s_app_ctx.loopback = 0;
+ s_app_ctx.u4_share_disp_buf = 0;
+ s_app_ctx.display = 0;
+#endif
+
+ /* If display is enabled, then turn off shared mode and get color format that is supported by display */
+ if(1 == s_app_ctx.display)
+ {
+ s_app_ctx.u4_share_disp_buf = 0;
+ s_app_ctx.e_output_chroma_format = s_app_ctx.get_color_fmt();
+ }
+ if(strcmp(s_app_ctx.ac_ip_fname, "\0") == 0)
+ {
+ printf("\nNo input file given for decoding\n");
+ exit(-1);
+ }
+
+
+
+ /***********************************************************************/
+ /* create the file object for input file */
+ /***********************************************************************/
+#ifdef IOS
+ sprintf(filename_with_path, "%s/%s", homedir, s_app_ctx.ac_ip_fname);
+ ps_ip_file = fopen(filename_with_path, "rb");
+#else
+ ps_ip_file = fopen(s_app_ctx.ac_ip_fname, "rb");
+#endif
+ if(NULL == ps_ip_file)
+ {
+ sprintf(ac_error_str, "Could not open input file %s",
+ s_app_ctx.ac_ip_fname);
+ codec_exit(ac_error_str);
+ }
+ /***********************************************************************/
+ /* create the file object for input file */
+ /***********************************************************************/
+ if(1 == s_app_ctx.u4_piclen_flag)
+ {
+#ifdef IOS
+ sprintf(filename_with_path, "%s/%s", homedir, s_app_ctx.ac_piclen_fname);
+ ps_piclen_file = fopen(filename_with_path, "rb");
+#else
+ ps_piclen_file = fopen(s_app_ctx.ac_piclen_fname, "rb");
+#endif
+ if(NULL == ps_piclen_file)
+ {
+ sprintf(ac_error_str, "Could not open piclen file %s",
+ s_app_ctx.ac_piclen_fname);
+ codec_exit(ac_error_str);
+ }
+ }
+
+ /***********************************************************************/
+ /* create the file object for output file */
+ /***********************************************************************/
+
+ /* If the filename does not contain %d, then output will be dumped to
+ a single file and it is opened here */
+ if((1 == s_app_ctx.u4_file_save_flag) && (strstr(s_app_ctx.ac_op_fname,"%d") == NULL))
+ {
+#ifdef IOS
+ sprintf(filename_with_path, "%s/%s", documentdir, s_app_ctx.ac_op_fname);
+ ps_op_file = fopen(filename_with_path,"wb");
+#else
+ ps_op_file = fopen(s_app_ctx.ac_op_fname, "wb");
+#endif
+
+ if(NULL == ps_op_file)
+ {
+ sprintf(ac_error_str, "Could not open output file %s",
+ s_app_ctx.ac_op_fname);
+ codec_exit(ac_error_str);
+ }
+ }
+
+ /***********************************************************************/
+ /* create the file object for check sum file */
+ /***********************************************************************/
+ if(1 == s_app_ctx.u4_chksum_save_flag)
+ {
+#if IOS
+ sprintf(filename_with_path, "%s/%s", documentdir, s_app_ctx.ac_op_chksum_fname);
+ ps_op_chksum_file = fopen(filename_with_path,"wb");
+#else
+ ps_op_chksum_file = fopen(s_app_ctx.ac_op_chksum_fname, "wb");
+#endif
+ if(NULL == ps_op_chksum_file)
+ {
+ sprintf(ac_error_str, "Could not open check sum file %s",
+ s_app_ctx.ac_op_chksum_fname);
+ codec_exit(ac_error_str);
+ }
+ }
+ /***********************************************************************/
+ /* Create decoder instance */
+ /***********************************************************************/
+ {
+
+ ps_out_buf = (ivd_out_bufdesc_t *)malloc(sizeof(ivd_out_bufdesc_t));
+
+ {
+ iv_num_mem_rec_ip_t s_no_of_mem_rec_query_ip;
+ iv_num_mem_rec_op_t s_no_of_mem_rec_query_op;
+
+ s_no_of_mem_rec_query_ip.u4_size = sizeof(s_no_of_mem_rec_query_ip);
+ s_no_of_mem_rec_query_op.u4_size = sizeof(s_no_of_mem_rec_query_op);
+ s_no_of_mem_rec_query_ip.e_cmd = IV_CMD_GET_NUM_MEM_REC;
+
+ /*****************************************************************************/
+ /* API Call: Get Number of Mem Records */
+ /*****************************************************************************/
+ e_dec_status = ivd_api_function(
+ NULL, (void*)&s_no_of_mem_rec_query_ip,
+ (void*)&s_no_of_mem_rec_query_op);
+ if(IV_SUCCESS != e_dec_status)
+ {
+ sprintf(ac_error_str, "Error in get mem records");
+ codec_exit(ac_error_str);
+ }
+
+ u4_num_mem_recs = s_no_of_mem_rec_query_op.u4_num_mem_rec;
+ }
+
+ pv_mem_rec_location = malloc(u4_num_mem_recs * sizeof(iv_mem_rec_t));
+ if(pv_mem_rec_location == NULL)
+ {
+ sprintf(ac_error_str, "Allocation failure for mem_rec_location");
+ codec_exit(ac_error_str);
+
+ }
+
+ {
+ ih264d_fill_mem_rec_ip_t s_fill_mem_rec_ip;
+ ih264d_fill_mem_rec_op_t s_fill_mem_rec_op;
+ iv_mem_rec_t *ps_mem_rec;
+ UWORD32 total_size;
+
+ s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.e_cmd =
+ IV_CMD_FILL_NUM_MEM_REC;
+ s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.pv_mem_rec_location =
+ (iv_mem_rec_t *)pv_mem_rec_location;
+ s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_max_frm_wd =
+ (s_app_ctx.max_wd == 0) ? MAX_FRAME_WIDTH : s_app_ctx.max_wd;
+ s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_max_frm_ht =
+ (s_app_ctx.max_ht == 0) ? MAX_FRAME_HEIGHT : s_app_ctx.max_ht;
+ s_fill_mem_rec_ip.i4_level = (s_app_ctx.max_level == 0) ? MAX_LEVEL_SUPPORTED : s_app_ctx.max_level;
+ s_fill_mem_rec_ip.u4_num_ref_frames = MAX_REF_FRAMES;
+ s_fill_mem_rec_ip.u4_num_reorder_frames = MAX_REORDER_FRAMES;
+ s_fill_mem_rec_ip.u4_share_disp_buf = s_app_ctx.u4_share_disp_buf;
+ s_fill_mem_rec_ip.e_output_format =
+ (IV_COLOR_FORMAT_T)s_app_ctx.e_output_chroma_format;
+ s_fill_mem_rec_ip.u4_num_extra_disp_buf = EXTRA_DISP_BUFFERS;
+
+ s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_size =
+ sizeof(ih264d_fill_mem_rec_ip_t);
+ s_fill_mem_rec_op.s_ivd_fill_mem_rec_op_t.u4_size =
+ sizeof(ih264d_fill_mem_rec_op_t);
+
+ ps_mem_rec = (iv_mem_rec_t *)pv_mem_rec_location;
+ for(i = 0; i < u4_num_mem_recs; i++)
+ ps_mem_rec[i].u4_size = sizeof(iv_mem_rec_t);
+
+ /*****************************************************************************/
+ /* API Call: Fill Mem Records */
+ /*****************************************************************************/
+
+ e_dec_status = ivd_api_function(NULL,
+ (void *)&s_fill_mem_rec_ip,
+ (void *)&s_fill_mem_rec_op);
+
+ u4_num_mem_recs =
+ s_fill_mem_rec_op.s_ivd_fill_mem_rec_op_t.u4_num_mem_rec_filled;
+
+ if(IV_SUCCESS != e_dec_status)
+ {
+ sprintf(ac_error_str, "Error in fill mem records: %x",s_fill_mem_rec_op.s_ivd_fill_mem_rec_op_t.u4_error_code);
+ codec_exit(ac_error_str);
+ }
+
+ ps_mem_rec = (iv_mem_rec_t *)pv_mem_rec_location;
+ total_size = 0;
+ for(i = 0; i < u4_num_mem_recs; i++)
+ {
+ ps_mem_rec->pv_base = ih264a_aligned_malloc(ps_mem_rec->u4_mem_alignment,
+ ps_mem_rec->u4_mem_size);
+ if(ps_mem_rec->pv_base == NULL)
+ {
+ sprintf(ac_error_str,
+ "\nAllocation failure for mem record id %d i4_size %d\n",
+ i, ps_mem_rec->u4_mem_size);
+ codec_exit(ac_error_str);
+
+ }
+ total_size += ps_mem_rec->u4_mem_size;
+ ps_mem_rec++;
+ }
+ printf("\nTotal memory for codec %d\n", total_size);
+ }
+ /*****************************************************************************/
+ /* API Call: Initialize the Decoder */
+ /*****************************************************************************/
+ {
+ ih264d_init_ip_t s_init_ip;
+ ih264d_init_op_t s_init_op;
+ void *fxns = &ivd_api_function;
+ iv_mem_rec_t *mem_tab;
+
+ mem_tab = (iv_mem_rec_t*)pv_mem_rec_location;
+ s_init_ip.s_ivd_init_ip_t.e_cmd = (IVD_API_COMMAND_TYPE_T)IV_CMD_INIT;
+ s_init_ip.s_ivd_init_ip_t.pv_mem_rec_location = mem_tab;
+ s_init_ip.s_ivd_init_ip_t.u4_frm_max_wd = (s_app_ctx.max_wd == 0) ? MAX_FRAME_WIDTH : s_app_ctx.max_wd;
+ s_init_ip.s_ivd_init_ip_t.u4_frm_max_ht = (s_app_ctx.max_ht == 0) ? MAX_FRAME_HEIGHT : s_app_ctx.max_ht;
+ s_init_ip.i4_level = (s_app_ctx.max_level == 0) ? MAX_LEVEL_SUPPORTED : s_app_ctx.max_level;
+ s_init_ip.u4_num_ref_frames = MAX_REF_FRAMES;
+ s_init_ip.u4_num_reorder_frames = MAX_REORDER_FRAMES;
+ s_init_ip.u4_share_disp_buf = s_app_ctx.u4_share_disp_buf;
+ s_init_ip.u4_num_extra_disp_buf = EXTRA_DISP_BUFFERS;
+ s_init_ip.s_ivd_init_ip_t.u4_num_mem_rec = u4_num_mem_recs;
+ s_init_ip.s_ivd_init_ip_t.e_output_format =
+ (IV_COLOR_FORMAT_T)s_app_ctx.e_output_chroma_format;
+ s_init_ip.s_ivd_init_ip_t.u4_size = sizeof(ih264d_init_ip_t);
+ s_init_op.s_ivd_init_op_t.u4_size = sizeof(ih264d_init_op_t);
+
+ codec_obj = (iv_obj_t*)mem_tab[0].pv_base;
+ codec_obj->pv_fxns = fxns;
+ codec_obj->u4_size = sizeof(iv_obj_t);
+
+ s_app_ctx.cocodec_obj = codec_obj;
+
+ ret = ivd_api_function((iv_obj_t*)codec_obj, (void *)&s_init_ip,
+ (void *)&s_init_op);
+ if(ret != IV_SUCCESS)
+ {
+ sprintf(ac_error_str, "Error in Init %8x\n",
+ s_init_op.s_ivd_init_op_t.u4_error_code);
+ codec_exit(ac_error_str);
+ }
+
+ /*****************************************************************************/
+ /* Input and output buffer allocation */
+ /*****************************************************************************/
+ {
+
+ ivd_ctl_getbufinfo_ip_t s_ctl_ip;
+ ivd_ctl_getbufinfo_op_t s_ctl_op;
+
+ s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+ s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_GETBUFINFO;
+ s_ctl_ip.u4_size = sizeof(ivd_ctl_getbufinfo_ip_t);
+ s_ctl_op.u4_size = sizeof(ivd_ctl_getbufinfo_op_t);
+ ret = ivd_api_function((iv_obj_t*)codec_obj, (void *)&s_ctl_ip,
+ (void *)&s_ctl_op);
+ if(ret != IV_SUCCESS)
+ {
+ sprintf(ac_error_str, "Error in Get Buf Info %x", s_ctl_op.u4_error_code);
+ codec_exit(ac_error_str);
+ }
+
+ /* Allocate input buffer */
+ u4_ip_buf_len = s_ctl_op.u4_min_in_buf_size[0];
+ pu1_bs_buf = (UWORD8 *)malloc(u4_ip_buf_len);
+
+ if(pu1_bs_buf == NULL)
+ {
+ sprintf(ac_error_str,
+ "\nAllocation failure for input buffer of i4_size %d",
+ u4_ip_buf_len);
+ codec_exit(ac_error_str);
+ }
+ s_app_ctx.num_disp_buf = s_ctl_op.u4_num_disp_bufs;
+ /* Allocate output buffer only if display buffers are not shared */
+ /* Or if shared and output is 420P */
+ if((0 == s_app_ctx.u4_share_disp_buf) || (IV_YUV_420P == s_app_ctx.e_output_chroma_format))
+ {
+ UWORD32 outlen;
+ ps_out_buf->u4_min_out_buf_size[0] =
+ s_ctl_op.u4_min_out_buf_size[0];
+ ps_out_buf->u4_min_out_buf_size[1] =
+ s_ctl_op.u4_min_out_buf_size[1];
+ ps_out_buf->u4_min_out_buf_size[2] =
+ s_ctl_op.u4_min_out_buf_size[2];
+
+ outlen = s_ctl_op.u4_min_out_buf_size[0];
+ if(s_ctl_op.u4_min_num_out_bufs > 1)
+ outlen += s_ctl_op.u4_min_out_buf_size[1];
+
+ if(s_ctl_op.u4_min_num_out_bufs > 2)
+ outlen += s_ctl_op.u4_min_out_buf_size[2];
+
+ ps_out_buf->pu1_bufs[0] = (UWORD8 *)malloc(outlen);
+ if(ps_out_buf->pu1_bufs[0] == NULL)
+ {
+ sprintf(ac_error_str,
+ "\nAllocation failure for output buffer of i4_size %d",
+ outlen);
+ codec_exit(ac_error_str);
+ }
+
+ if(s_ctl_op.u4_min_num_out_bufs > 1)
+ ps_out_buf->pu1_bufs[1] = ps_out_buf->pu1_bufs[0]
+ + (s_ctl_op.u4_min_out_buf_size[0]);
+
+ if(s_ctl_op.u4_min_num_out_bufs > 2)
+ ps_out_buf->pu1_bufs[2] = ps_out_buf->pu1_bufs[1]
+ + (s_ctl_op.u4_min_out_buf_size[1]);
+
+ ps_out_buf->u4_num_bufs = s_ctl_op.u4_min_num_out_bufs;
+ }
+
+ }
+ }
+
+ }
+
+
+ /*************************************************************************/
+ /* set num of cores */
+ /*************************************************************************/
+ {
+
+ ih264d_ctl_set_num_cores_ip_t s_ctl_set_cores_ip;
+ ih264d_ctl_set_num_cores_op_t s_ctl_set_cores_op;
+
+ s_ctl_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+ s_ctl_set_cores_ip.e_sub_cmd =(IVD_CONTROL_API_COMMAND_TYPE_T) IH264D_CMD_CTL_SET_NUM_CORES;
+ s_ctl_set_cores_ip.u4_num_cores = s_app_ctx.u4_num_cores;
+ s_ctl_set_cores_ip.u4_size = sizeof(ih264d_ctl_set_num_cores_ip_t);
+ s_ctl_set_cores_op.u4_size = sizeof(ih264d_ctl_set_num_cores_op_t);
+
+ ret = ivd_api_function((iv_obj_t*)codec_obj, (void *)&s_ctl_set_cores_ip,
+ (void *)&s_ctl_set_cores_op);
+ if(ret != IV_SUCCESS)
+ {
+ sprintf(ac_error_str, "\nError in setting number of cores");
+ codec_exit(ac_error_str);
+ }
+
+ }
+
+ /*************************************************************************/
+ /* set processsor */
+ /*************************************************************************/
+ {
+
+ ih264d_ctl_set_processor_ip_t s_ctl_set_num_processor_ip;
+ ih264d_ctl_set_processor_op_t s_ctl_set_num_processor_op;
+
+ s_ctl_set_num_processor_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+ s_ctl_set_num_processor_ip.e_sub_cmd =(IVD_CONTROL_API_COMMAND_TYPE_T) IH264D_CMD_CTL_SET_PROCESSOR;
+ s_ctl_set_num_processor_ip.u4_arch = s_app_ctx.e_arch;
+ s_ctl_set_num_processor_ip.u4_soc = s_app_ctx.e_soc;
+ s_ctl_set_num_processor_ip.u4_size = sizeof(ih264d_ctl_set_processor_ip_t);
+ s_ctl_set_num_processor_op.u4_size = sizeof(ih264d_ctl_set_processor_op_t);
+
+ ret = ivd_api_function((iv_obj_t*)codec_obj, (void *)&s_ctl_set_num_processor_ip,
+ (void *)&s_ctl_set_num_processor_op);
+ if(ret != IV_SUCCESS)
+ {
+ sprintf(ac_error_str, "\nError in setting Processor type");
+ codec_exit(ac_error_str);
+ }
+
+ }
+
+
+ /*****************************************************************************/
+ /* Decode header to get width and height and buffer sizes */
+ /*****************************************************************************/
+ {
+
+ ivd_ctl_set_config_ip_t s_ctl_ip;
+ ivd_ctl_set_config_op_t s_ctl_op;
+
+ ivd_video_decode_ip_t s_video_decode_ip;
+ ivd_video_decode_op_t s_video_decode_op;
+
+ s_ctl_ip.u4_disp_wd = STRIDE;
+ if(1 == s_app_ctx.display)
+ s_ctl_ip.u4_disp_wd = s_app_ctx.get_stride();
+
+ s_ctl_ip.e_frm_skip_mode = IVD_SKIP_NONE;
+ s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT;
+ s_ctl_ip.e_vid_dec_mode = IVD_DECODE_HEADER;
+ s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+ s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS;
+ s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t);
+ s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t);
+
+ ret = ivd_api_function((iv_obj_t*)codec_obj, (void *)&s_ctl_ip,
+ (void *)&s_ctl_op);
+ if(ret != IV_SUCCESS)
+ {
+ sprintf(ac_error_str,
+ "\nError in setting the codec in header decode mode");
+ codec_exit(ac_error_str);
+ }
+
+ do
+ {
+ WORD32 numbytes;
+ if(0 == s_app_ctx.u4_piclen_flag)
+ {
+ fseek(ps_ip_file, file_pos, SEEK_SET);
+ numbytes = u4_ip_buf_len;
+ }
+ else
+ {
+ WORD32 entries;
+ entries = fscanf(ps_piclen_file, "%d\n", &numbytes);
+ if(1 != entries)
+ numbytes = u4_ip_buf_len;
+ }
+
+ u4_bytes_remaining = fread(pu1_bs_buf, sizeof(UWORD8), numbytes,
+ ps_ip_file);
+
+ if(0 == u4_bytes_remaining)
+ {
+ sprintf(ac_error_str, "\nUnable to read from input file");
+ codec_exit(ac_error_str);
+ }
+
+ s_video_decode_ip.e_cmd = IVD_CMD_VIDEO_DECODE;
+ s_video_decode_ip.u4_ts = u4_ip_frm_ts;
+ s_video_decode_ip.pv_stream_buffer = pu1_bs_buf;
+ s_video_decode_ip.u4_num_Bytes = u4_bytes_remaining;
+ s_video_decode_ip.u4_size = sizeof(ivd_video_decode_ip_t);
+ s_video_decode_op.u4_size = sizeof(ivd_video_decode_op_t);
+
+ /*****************************************************************************/
+ /* API Call: Header Decode */
+ /*****************************************************************************/
+ ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_video_decode_ip,
+ (void *)&s_video_decode_op);
+
+ if(ret != IV_SUCCESS)
+ {
+ printf("Error in header decode %x\n", s_video_decode_op.u4_error_code);
+ // codec_exit(ac_error_str);
+ }
+
+ u4_num_bytes_dec = s_video_decode_op.u4_num_bytes_consumed;
+#ifndef PROFILE_ENABLE
+ printf("%d\n",s_video_decode_op.u4_num_bytes_consumed);
+#endif
+ file_pos += u4_num_bytes_dec;
+ total_bytes_comsumed += u4_num_bytes_dec;
+ }while(ret != IV_SUCCESS);
+
+ /* copy pic_wd and pic_ht to initialize buffers */
+ s_app_ctx.u4_pic_wd = s_video_decode_op.u4_pic_wd;
+ s_app_ctx.u4_pic_ht = s_video_decode_op.u4_pic_ht;
+
+#if IOS_DISPLAY
+ s_app_ctx.i4_screen_wd = screen_wd;
+ s_app_ctx.i4_screen_ht = screen_ht;
+#endif
+
+ /* Create display thread and wait for the display buffers to be initialized */
+ if(1 == s_app_ctx.display)
+ {
+ if(0 == s_app_ctx.display_thread_created)
+ {
+ s_app_ctx.display_init_done = 0;
+ ithread_create(s_app_ctx.display_thread_handle, NULL,
+ (void *) &display_thread, (void *) &s_app_ctx);
+ s_app_ctx.display_thread_created = 1;
+
+ while(1)
+ {
+ if(s_app_ctx.display_init_done)
+ break;
+
+ ithread_msleep(1);
+ }
+ }
+
+ s_app_ctx.u4_strd = s_app_ctx.get_stride();
+ }
+ }
+
+ /*************************************************************************/
+ /* Get actual number of output buffers requried, which is dependent */
+ /* on ps_bitstrm properties such as width, height and level etc */
+ /* This is needed mainly for shared display mode */
+ /*************************************************************************/
+ //if(1 == s_app_ctx.u4_share_disp_buf)
+ {
+ ivd_ctl_getbufinfo_ip_t s_ctl_ip;
+ ivd_ctl_getbufinfo_op_t s_ctl_op;
+ WORD32 outlen = 0;
+
+ s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+ s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_GETBUFINFO;
+ s_ctl_ip.u4_size = sizeof(ivd_ctl_getbufinfo_ip_t);
+ s_ctl_op.u4_size = sizeof(ivd_ctl_getbufinfo_op_t);
+ ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip,
+ (void *)&s_ctl_op);
+ if(ret != IV_SUCCESS)
+ {
+ sprintf(ac_error_str, "Error in Get Buf Info %x", s_ctl_op.u4_error_code);
+ codec_exit(ac_error_str);
+ }
+
+#ifdef APP_EXTRA_BUFS
+ s_app_ctx.disp_delay = EXTRA_DISP_BUFFERS;
+ s_ctl_op.u4_num_disp_bufs += EXTRA_DISP_BUFFERS;
+#endif
+
+ /*****************************************************************************/
+ /* API Call: Allocate display buffers for display buffer shared case */
+ /*****************************************************************************/
+
+ for(i = 0; i < s_ctl_op.u4_num_disp_bufs; i++)
+ {
+
+ s_app_ctx.s_disp_buffers[i].u4_min_out_buf_size[0] =
+ s_ctl_op.u4_min_out_buf_size[0];
+ s_app_ctx.s_disp_buffers[i].u4_min_out_buf_size[1] =
+ s_ctl_op.u4_min_out_buf_size[1];
+ s_app_ctx.s_disp_buffers[i].u4_min_out_buf_size[2] =
+ s_ctl_op.u4_min_out_buf_size[2];
+
+ outlen = s_ctl_op.u4_min_out_buf_size[0];
+ if(s_ctl_op.u4_min_num_out_bufs > 1)
+ outlen += s_ctl_op.u4_min_out_buf_size[1];
+
+ if(s_ctl_op.u4_min_num_out_bufs > 2)
+ outlen += s_ctl_op.u4_min_out_buf_size[2];
+
+ s_app_ctx.s_disp_buffers[i].pu1_bufs[0] = (UWORD8 *)malloc(outlen);
+
+ if(s_app_ctx.s_disp_buffers[i].pu1_bufs[0] == NULL)
+ {
+ sprintf(ac_error_str,
+ "\nAllocation failure for output buffer of i4_size %d",
+ outlen);
+ codec_exit(ac_error_str);
+ }
+
+ if(s_ctl_op.u4_min_num_out_bufs > 1)
+ s_app_ctx.s_disp_buffers[i].pu1_bufs[1] =
+ s_app_ctx.s_disp_buffers[i].pu1_bufs[0]
+ + (s_ctl_op.u4_min_out_buf_size[0]);
+
+ if(s_ctl_op.u4_min_num_out_bufs > 2)
+ s_app_ctx.s_disp_buffers[i].pu1_bufs[2] =
+ s_app_ctx.s_disp_buffers[i].pu1_bufs[1]
+ + (s_ctl_op.u4_min_out_buf_size[1]);
+
+ s_app_ctx.s_disp_buffers[i].u4_num_bufs =
+ s_ctl_op.u4_min_num_out_bufs;
+ }
+ s_app_ctx.num_disp_buf = s_ctl_op.u4_num_disp_bufs;
+
+ /*****************************************************************************/
+ /* API Call: Send the allocated display buffers to codec */
+ /*****************************************************************************/
+ {
+ ivd_set_display_frame_ip_t s_set_display_frame_ip;
+ ivd_set_display_frame_op_t s_set_display_frame_op;
+
+ s_set_display_frame_ip.e_cmd = IVD_CMD_SET_DISPLAY_FRAME;
+ s_set_display_frame_ip.u4_size = sizeof(ivd_set_display_frame_ip_t);
+ s_set_display_frame_op.u4_size = sizeof(ivd_set_display_frame_op_t);
+
+ s_set_display_frame_ip.num_disp_bufs = s_app_ctx.num_disp_buf;
+
+ memcpy(&(s_set_display_frame_ip.s_disp_buffer),
+ &(s_app_ctx.s_disp_buffers),
+ s_ctl_op.u4_num_disp_bufs * sizeof(ivd_out_bufdesc_t));
+
+ ret = ivd_api_function((iv_obj_t *)codec_obj,
+ (void *)&s_set_display_frame_ip,
+ (void *)&s_set_display_frame_op);
+
+ if(IV_SUCCESS != ret)
+ {
+ sprintf(ac_error_str, "Error in Set display frame");
+ codec_exit(ac_error_str);
+ }
+
+ }
+
+ }
+
+ /*************************************************************************/
+ /* Get frame dimensions for display buffers such as x_offset,y_offset */
+ /* etc. This information might be needed to set display buffer */
+ /* offsets in case of shared display buffer mode */
+ /*************************************************************************/
+ {
+
+ ih264d_ctl_get_frame_dimensions_ip_t s_ctl_get_frame_dimensions_ip;
+ ih264d_ctl_get_frame_dimensions_op_t s_ctl_get_frame_dimensions_op;
+
+ s_ctl_get_frame_dimensions_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+ s_ctl_get_frame_dimensions_ip.e_sub_cmd =
+ (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_GET_BUFFER_DIMENSIONS;
+ s_ctl_get_frame_dimensions_ip.u4_size =
+ sizeof(ih264d_ctl_get_frame_dimensions_ip_t);
+ s_ctl_get_frame_dimensions_op.u4_size =
+ sizeof(ih264d_ctl_get_frame_dimensions_op_t);
+
+ ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_get_frame_dimensions_ip,
+ (void *)&s_ctl_get_frame_dimensions_op);
+ if(IV_SUCCESS != ret)
+ {
+ sprintf(ac_error_str, "Error in Get buffer Dimensions");
+ codec_exit(ac_error_str);
+ }
+
+/*
+ printf("Frame offsets due to padding\n");
+ printf("s_ctl_get_frame_dimensions_op.x_offset[0] %d s_ctl_get_frame_dimensions_op.y_offset[0] %d\n",
+ s_ctl_get_frame_dimensions_op.u4_x_offset[0],
+ s_ctl_get_frame_dimensions_op.u4_y_offset[0]);
+*/
+ }
+
+
+
+ /*************************************************************************/
+ /* Set the decoder in frame decode mode. It was set in header decode */
+ /* mode earlier */
+ /*************************************************************************/
+ {
+
+ ivd_ctl_set_config_ip_t s_ctl_ip;
+ ivd_ctl_set_config_op_t s_ctl_op;
+
+ s_ctl_ip.u4_disp_wd = STRIDE;
+ if(1 == s_app_ctx.display)
+ s_ctl_ip.u4_disp_wd = s_app_ctx.get_stride();
+ s_ctl_ip.e_frm_skip_mode = IVD_SKIP_NONE;
+
+ s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT;
+ s_ctl_ip.e_vid_dec_mode = IVD_DECODE_FRAME;
+ s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+ s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS;
+ s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t);
+
+ s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t);
+
+ ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, (void *)&s_ctl_op);
+
+ if(IV_SUCCESS != ret)
+ {
+ sprintf(ac_error_str, "Error in Set Parameters");
+ //codec_exit(ac_error_str);
+ }
+
+ }
+ /*************************************************************************/
+ /* If required disable deblocking and sao at given level */
+ /*************************************************************************/
+
+ set_degrade(codec_obj, s_app_ctx.i4_degrade_type, s_app_ctx.i4_degrade_pics);
+#ifdef WINDOWS_TIMER
+ QueryPerformanceFrequency ( &frequency);
+#endif
+#ifndef PRINT_PICSIZE
+ get_version(codec_obj);
+#endif
+ max_op_frm_ts = (s_app_ctx.u4_max_frm_ts > 0)? (s_app_ctx.u4_max_frm_ts + s_app_ctx.disp_delay): 0xffffffff;
+ while(u4_op_frm_ts < max_op_frm_ts)
+ {
+
+#ifdef TEST_FLUSH
+ if(u4_ip_frm_ts == FLUSH_FRM_CNT)
+ {
+ ivd_ctl_flush_ip_t s_ctl_ip;
+ ivd_ctl_flush_op_t s_ctl_op;
+
+ s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+ s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_FLUSH;
+ s_ctl_ip.u4_size = sizeof(ivd_ctl_flush_ip_t);
+ s_ctl_op.u4_size = sizeof(ivd_ctl_flush_op_t);
+ ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip,
+ (void *)&s_ctl_op);
+
+ if(ret != IV_SUCCESS)
+ {
+ printf("Error in Setting the decoder in flush mode\n");
+ }
+// file_pos = 0;
+
+// fseek(ps_ip_file, file_pos, SEEK_SET);
+
+ }
+#endif
+ if(u4_ip_frm_ts < s_app_ctx.num_disp_buf)
+ {
+ release_disp_frame(codec_obj, u4_ip_frm_ts);
+ }
+
+
+ /*************************************************************************/
+ /* set num of cores */
+ /*************************************************************************/
+#ifdef DYNAMIC_NUMCORES
+ {
+
+ ih264d_ctl_set_num_cores_ip_t s_ctl_set_cores_ip;
+ ih264d_ctl_set_num_cores_op_t s_ctl_set_cores_op;
+
+ s_ctl_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+ s_ctl_set_cores_ip.e_sub_cmd = IH264D_CMD_CTL_SET_NUM_CORES;
+ s_ctl_set_cores_ip.u4_num_cores = 1 + 3 * (u4_ip_frm_ts % 2);
+ s_ctl_set_cores_ip.u4_size = sizeof(ih264d_ctl_set_num_cores_ip_t);
+ s_ctl_set_cores_op.u4_size = sizeof(ih264d_ctl_set_num_cores_op_t);
+
+ ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_set_cores_ip,
+ (void *)&s_ctl_set_cores_op);
+ if(ret != IV_SUCCESS)
+ {
+ sprintf(ac_error_str, "\nError in setting number of cores");
+ codec_exit(ac_error_str);
+ }
+
+ }
+#endif
+ /***********************************************************************/
+ /* Seek the file to start of current frame, this is equavelent of */
+ /* having a parcer which tells the start of current frame */
+ /***********************************************************************/
+ {
+ WORD32 numbytes;
+
+ if(0 == s_app_ctx.u4_piclen_flag)
+ {
+ fseek(ps_ip_file, file_pos, SEEK_SET);
+ numbytes = u4_ip_buf_len;
+ }
+ else
+ {
+ WORD32 entries;
+ entries = fscanf(ps_piclen_file, "%d\n", &numbytes);
+ if(1 != entries)
+ numbytes = u4_ip_buf_len;
+ }
+
+ u4_bytes_remaining = fread(pu1_bs_buf, sizeof(UWORD8),
+ numbytes, ps_ip_file);
+
+ if(u4_bytes_remaining == 0)
+ {
+ if(1 == s_app_ctx.loopback)
+ {
+ file_pos = 0;
+ if(0 == s_app_ctx.u4_piclen_flag)
+ {
+ fseek(ps_ip_file, file_pos, SEEK_SET);
+ numbytes = u4_ip_buf_len;
+ }
+ else
+ {
+ WORD32 entries;
+ entries = fscanf(ps_piclen_file, "%d\n", &numbytes);
+ if(1 != entries)
+ numbytes = u4_ip_buf_len;
+ }
+
+
+ u4_bytes_remaining = fread(pu1_bs_buf, sizeof(UWORD8),
+ numbytes, ps_ip_file);
+ }
+ else
+ break;
+ }
+ }
+
+ /*********************************************************************/
+ /* Following calls can be enabled at diffent times */
+ /*********************************************************************/
+#if ENABLE_DEGRADE
+ if(u4_op_frm_ts >= 10000)
+ disable_deblocking(codec_obj, 4);
+
+ if(u4_op_frm_ts == 30000)
+ enable_deblocking(codec_obj);
+
+ if(u4_op_frm_ts == 10000)
+ enable_skippb_frames(codec_obj);
+
+ if(u4_op_frm_ts == 60000)
+ disable_skippb_frames(codec_obj);
+
+ if(u4_op_frm_ts == 30000)
+ enable_skipb_frames(codec_obj);
+
+ if(u4_op_frm_ts == 60000)
+ disable_skipb_frames(codec_obj);
+#endif
+
+
+ {
+ ivd_video_decode_ip_t s_video_decode_ip;
+ ivd_video_decode_op_t s_video_decode_op;
+#ifdef PROFILE_ENABLE
+ UWORD32 s_elapsed_time;
+ TIMER s_start_timer;
+ TIMER s_end_timer;
+#endif
+
+
+ s_video_decode_ip.e_cmd = IVD_CMD_VIDEO_DECODE;
+ s_video_decode_ip.u4_ts = u4_ip_frm_ts;
+ s_video_decode_ip.pv_stream_buffer = pu1_bs_buf;
+ s_video_decode_ip.u4_num_Bytes = u4_bytes_remaining;
+ s_video_decode_ip.u4_size = sizeof(ivd_video_decode_ip_t);
+ s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[0] =
+ ps_out_buf->u4_min_out_buf_size[0];
+ s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[1] =
+ ps_out_buf->u4_min_out_buf_size[1];
+ s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[2] =
+ ps_out_buf->u4_min_out_buf_size[2];
+
+ s_video_decode_ip.s_out_buffer.pu1_bufs[0] =
+ ps_out_buf->pu1_bufs[0];
+ s_video_decode_ip.s_out_buffer.pu1_bufs[1] =
+ ps_out_buf->pu1_bufs[1];
+ s_video_decode_ip.s_out_buffer.pu1_bufs[2] =
+ ps_out_buf->pu1_bufs[2];
+ s_video_decode_ip.s_out_buffer.u4_num_bufs =
+ ps_out_buf->u4_num_bufs;
+ s_video_decode_op.u4_size = sizeof(ivd_video_decode_op_t);
+
+ /* Get display buffer pointers */
+ if(1 == s_app_ctx.display)
+ {
+ WORD32 wr_idx;
+
+ wr_idx = dispq_producer_dequeue(&s_app_ctx);
+
+ if(s_app_ctx.quit)
+ break;
+
+ s_app_ctx.set_disp_buffers(s_app_ctx.pv_disp_ctx, wr_idx,
+ &s_video_decode_ip.s_out_buffer.pu1_bufs[0],
+ &s_video_decode_ip.s_out_buffer.pu1_bufs[1],
+ &s_video_decode_ip.s_out_buffer.pu1_bufs[2]);
+ }
+
+ /*****************************************************************************/
+ /* API Call: Video Decode */
+ /*****************************************************************************/
+
+ GETTIME(&s_start_timer);
+
+ ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_video_decode_ip,
+ (void *)&s_video_decode_op);
+
+
+ GETTIME(&s_end_timer);
+ ELAPSEDTIME(s_start_timer,s_end_timer,s_elapsed_time,frequency);
+#ifdef PROFILE_ENABLE
+ {
+ UWORD32 peak_avg, id;
+ u4_tot_cycles += s_elapsed_time;
+ peak_window[peak_window_idx++] = s_elapsed_time;
+ if(peak_window_idx == PEAK_WINDOW_SIZE)
+ peak_window_idx = 0;
+ peak_avg = 0;
+ for(id = 0; id < PEAK_WINDOW_SIZE; id++)
+ {
+ peak_avg += peak_window[id];
+ }
+ peak_avg /= PEAK_WINDOW_SIZE;
+ if(peak_avg > peak_avg_max)
+ peak_avg_max = peak_avg;
+ frm_cnt++;
+
+ printf("FrameNum: %4d TimeTaken(microsec): %6d AvgTime: %6d PeakAvgTimeMax: %6d Output: %2d NumBytes: %6d \n",
+ frm_cnt, s_elapsed_time, u4_tot_cycles / frm_cnt, peak_avg_max, s_video_decode_op.u4_output_present, s_video_decode_op.u4_num_bytes_consumed);
+
+ }
+#ifdef INTEL_CE5300
+ time_consumed += s_elapsed_time;
+ bytes_consumed += s_video_decode_op.u4_num_bytes_consumed;
+ if (!(frm_cnt % (s_app_ctx.fps)))
+ {
+ time_consumed = time_consumed/s_app_ctx.fps;
+ printf("Average decode time(micro sec) for the last second = %6d\n",time_consumed);
+ printf("Average bitrate(kb) for the last second = %6d\n",(bytes_consumed * 8) / 1024);
+ time_consumed = 0;
+ bytes_consumed = 0;
+
+ }
+#endif
+#else
+ printf("%d\n",s_video_decode_op.u4_num_bytes_consumed);
+#endif
+
+ if(ret != IV_SUCCESS)
+ {
+ printf("Error in video Frame decode : ret %x Error %x\n", ret,
+ s_video_decode_op.u4_error_code);
+ }
+
+ if((IV_SUCCESS != ret) &&
+ ((s_video_decode_op.u4_error_code & 0xFF) == IVD_RES_CHANGED))
+ {
+ ivd_ctl_reset_ip_t s_ctl_ip;
+ ivd_ctl_reset_op_t s_ctl_op;
+
+ flush_output(codec_obj, &s_app_ctx, ps_out_buf,
+ pu1_bs_buf, &u4_op_frm_ts,
+ ps_op_file, ps_op_chksum_file,
+ u4_ip_frm_ts, u4_bytes_remaining);
+
+ s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+ s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_RESET;
+ s_ctl_ip.u4_size = sizeof(ivd_ctl_reset_ip_t);
+ s_ctl_op.u4_size = sizeof(ivd_ctl_reset_op_t);
+
+ ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip,
+ (void *)&s_ctl_op);
+ if(IV_SUCCESS != ret)
+ {
+ sprintf(ac_error_str, "Error in Reset");
+ codec_exit(ac_error_str);
+ }
+ /*************************************************************************/
+ /* set num of cores */
+ /*************************************************************************/
+ {
+
+ ih264d_ctl_set_num_cores_ip_t s_ctl_set_cores_ip;
+ ih264d_ctl_set_num_cores_op_t s_ctl_set_cores_op;
+
+ s_ctl_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+ s_ctl_set_cores_ip.e_sub_cmd =(IVD_CONTROL_API_COMMAND_TYPE_T) IH264D_CMD_CTL_SET_NUM_CORES;
+ s_ctl_set_cores_ip.u4_num_cores = s_app_ctx.u4_num_cores;
+ s_ctl_set_cores_ip.u4_size = sizeof(ih264d_ctl_set_num_cores_ip_t);
+ s_ctl_set_cores_op.u4_size = sizeof(ih264d_ctl_set_num_cores_op_t);
+
+ ret = ivd_api_function((iv_obj_t*)codec_obj, (void *)&s_ctl_set_cores_ip,
+ (void *)&s_ctl_set_cores_op);
+ if(ret != IV_SUCCESS)
+ {
+ sprintf(ac_error_str, "\nError in setting number of cores");
+ codec_exit(ac_error_str);
+ }
+
+ }
+ /*************************************************************************/
+ /* set processsor */
+ /*************************************************************************/
+
+ {
+
+ ih264d_ctl_set_processor_ip_t s_ctl_set_num_processor_ip;
+ ih264d_ctl_set_processor_op_t s_ctl_set_num_processor_op;
+
+ s_ctl_set_num_processor_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+ s_ctl_set_num_processor_ip.e_sub_cmd =(IVD_CONTROL_API_COMMAND_TYPE_T) IH264D_CMD_CTL_SET_PROCESSOR;
+ s_ctl_set_num_processor_ip.u4_arch = s_app_ctx.e_arch;
+ s_ctl_set_num_processor_ip.u4_soc = s_app_ctx.e_soc;
+ s_ctl_set_num_processor_ip.u4_size = sizeof(ih264d_ctl_set_processor_ip_t);
+ s_ctl_set_num_processor_op.u4_size = sizeof(ih264d_ctl_set_processor_op_t);
+
+ ret = ivd_api_function((iv_obj_t*)codec_obj, (void *)&s_ctl_set_num_processor_ip,
+ (void *)&s_ctl_set_num_processor_op);
+ if(ret != IV_SUCCESS)
+ {
+ sprintf(ac_error_str, "\nError in setting Processor type");
+ codec_exit(ac_error_str);
+ }
+
+ }
+
+ }
+
+
+ if((1 == s_app_ctx.display) &&
+ (1 == s_video_decode_op.u4_output_present))
+ {
+ dispq_producer_queue(&s_app_ctx);
+ }
+
+ if(IV_B_FRAME == s_video_decode_op.e_pic_type)
+ s_app_ctx.b_pic_present |= 1;
+
+ u4_num_bytes_dec = s_video_decode_op.u4_num_bytes_consumed;
+
+ file_pos += u4_num_bytes_dec;
+ total_bytes_comsumed += u4_num_bytes_dec;
+ u4_ip_frm_ts++;
+
+
+ if(1 == s_video_decode_op.u4_output_present)
+ {
+
+ CHAR cur_fname[1000];
+ CHAR *extn = NULL;
+ /* The objective is to dump the decoded frames into separate files instead of
+ * dumping all the frames in one common file. Also, the number of dumped frames
+ * at any given instance of time cannot exceed 'frame_memory'
+ */
+ if(s_app_ctx.u4_file_save_flag)
+ {
+ /* Locate the position of extension yuv */
+ extn = strstr(s_app_ctx.ac_op_fname,"%d");
+ if (extn != NULL)
+ {
+ output_write_stall(s_app_ctx.ac_op_fname,u4_op_frm_ts);
+ /* Generate output file names */
+ sprintf(cur_fname,s_app_ctx.ac_op_fname,u4_op_frm_ts);
+ /* Open Output file */
+ ps_op_file = fopen(cur_fname,"wb");
+ if (NULL == ps_op_file)
+ {
+ sprintf(ac_error_str, "Could not open output file %s",
+ cur_fname);
+
+ codec_exit(ac_error_str);
+ }
+ }
+ }
+
+ width = s_video_decode_op.s_disp_frm_buf.u4_y_wd;
+ height = s_video_decode_op.s_disp_frm_buf.u4_y_ht;
+ dump_output(&s_app_ctx, &(s_video_decode_op.s_disp_frm_buf),
+ s_video_decode_op.u4_disp_buf_id, ps_op_file,
+ ps_op_chksum_file,
+ u4_op_frm_ts, s_app_ctx.u4_file_save_flag,
+ s_app_ctx.u4_chksum_save_flag);
+
+ u4_op_frm_ts++;
+ if (extn != NULL)
+ fclose(ps_op_file);
+
+ }
+ else
+ {
+ if((s_video_decode_op.u4_error_code >> IVD_FATALERROR) & 1)
+ {
+ printf("Fatal error\n");
+ break;
+ }
+ }
+
+ }
+ }
+
+ /***********************************************************************/
+ /* To get the last decoded frames, call process with NULL input */
+ /***********************************************************************/
+ flush_output(codec_obj, &s_app_ctx, ps_out_buf,
+ pu1_bs_buf, &u4_op_frm_ts,
+ ps_op_file, ps_op_chksum_file,
+ u4_ip_frm_ts, u4_bytes_remaining);
+
+ /* set disp_end u4_flag */
+ s_app_ctx.quit = 1;
+
+
+#ifdef PROFILE_ENABLE
+ printf("Summary\n");
+ printf("Input filename : %s\n", s_app_ctx.ac_ip_fname);
+ printf("Output Width : %-4d\n", width);
+ printf("Output Height : %-4d\n", height);
+
+ if(frm_cnt)
+ {
+ double avg = u4_tot_cycles / frm_cnt;
+ double bytes_avg = total_bytes_comsumed / frm_cnt;
+ double bitrate = (bytes_avg * 8 * s_app_ctx.fps)/1000000;
+ printf("Bitrate @ %2d fps(mbps) : %-6.2f\n", s_app_ctx.fps, bitrate);
+ printf("Average decode time(micro sec) : %-6d\n", (WORD32)avg);
+ printf("Avg Peak decode time(%2d frames) : %-6d\n", PEAK_WINDOW_SIZE, (WORD32)peak_avg_max);
+ avg = (u4_tot_cycles + u4_tot_fmt_cycles)* 1.0 / frm_cnt;
+
+ if(0 == s_app_ctx.u4_share_disp_buf)
+ printf("FPS achieved (with format conv) : %-3.2f\n", 1000000/avg);
+ else
+ printf("FPS achieved : %-3.2f\n", 1000000/avg);
+ }
+#endif
+ /***********************************************************************/
+ /* Clear the decoder, close all the files, free all the memory */
+ /***********************************************************************/
+ if(1 == s_app_ctx.display)
+ {
+ s_app_ctx.display_deinit_flag = 1;
+ /* wait for display to finish */
+ if(s_app_ctx.display_thread_created)
+ {
+ ithread_join(s_app_ctx.display_thread_handle, NULL);
+ }
+ free(s_app_ctx.display_thread_handle);
+ }
+
+ {
+ iv_retrieve_mem_rec_ip_t s_retrieve_dec_ip;
+ iv_retrieve_mem_rec_op_t s_retrieve_dec_op;
+ s_retrieve_dec_ip.pv_mem_rec_location = (iv_mem_rec_t *)pv_mem_rec_location;
+
+ s_retrieve_dec_ip.e_cmd = IV_CMD_RETRIEVE_MEMREC;
+ s_retrieve_dec_ip.u4_size = sizeof(iv_retrieve_mem_rec_ip_t);
+ s_retrieve_dec_op.u4_size = sizeof(iv_retrieve_mem_rec_op_t);
+
+ ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_retrieve_dec_ip,
+ (void *)&s_retrieve_dec_op);
+
+ if(IV_SUCCESS != ret)
+ {
+ sprintf(ac_error_str, "Error in Retrieve Memrec");
+ codec_exit(ac_error_str);
+ }
+
+ {
+ iv_mem_rec_t *ps_mem_rec;
+ UWORD16 u2_i;
+
+ u4_num_mem_recs = s_retrieve_dec_op.u4_num_mem_rec_filled;
+
+ ps_mem_rec = s_retrieve_dec_ip.pv_mem_rec_location;
+
+ for(u2_i = 0; u2_i < u4_num_mem_recs; u2_i++)
+ {
+ ih264a_aligned_free(ps_mem_rec->pv_base);
+ ps_mem_rec++;
+ }
+ free(s_retrieve_dec_ip.pv_mem_rec_location);
+ }
+
+ }
+ /***********************************************************************/
+ /* Close all the files and free all the memory */
+ /***********************************************************************/
+ {
+ fclose(ps_ip_file);
+
+ if((1 == s_app_ctx.u4_file_save_flag) && (strstr(s_app_ctx.ac_op_fname,"%d") == NULL))
+ {
+ fclose(ps_op_file);
+ }
+ if(1 == s_app_ctx.u4_chksum_save_flag)
+ {
+ fclose(ps_op_chksum_file);
+ }
+
+ }
+
+ if(0 == s_app_ctx.u4_share_disp_buf)
+ {
+ free(ps_out_buf->pu1_bufs[0]);
+ }
+
+ for(i = 0; i < s_app_ctx.num_disp_buf; i++)
+ {
+ free(s_app_ctx.s_disp_buffers[i].pu1_bufs[0]);
+ }
+
+ free(ps_out_buf);
+ free(pu1_bs_buf);
+
+ return (0);
+}
diff --git a/test/encoder.mk b/test/encoder.mk
new file mode 100755
index 0000000..9a0980e
--- /dev/null
+++ b/test/encoder.mk
@@ -0,0 +1,13 @@
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := avcenc
+LOCAL_MODULE_TAGS := optional
+
+LOCAL_CFLAGS := -DPROFILE_ENABLE -DARM -DMD5_DISABLE -fPIC -pie
+LOCAL_C_INCLUDES += $(LOCAL_PATH)/../encoder $(LOCAL_PATH)/../common $(LOCAL_PATH)/encoder/
+LOCAL_SRC_FILES := encoder/main.c encoder/psnr.c encoder/input.c encoder/output.c encoder/recon.c
+LOCAL_STATIC_LIBRARIES := libavcenc
+
+include $(BUILD_EXECUTABLE)
diff --git a/test/encoder/app.h b/test/encoder/app.h
new file mode 100755
index 0000000..8c409b8
--- /dev/null
+++ b/test/encoder/app.h
@@ -0,0 +1,348 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/* */
+/* File Name : app.h */
+/* */
+/* Description : This file contains all the necessary structure and */
+/* enumeration definitions needed for the Application */
+/* */
+/* List of Functions : */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 26 08 2010 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef _APP_H_
+#define _APP_H_
+
+#include "iv2.h"
+#include "ive2.h"
+#ifdef WINDOWS_TIMER
+#include <windows.h>
+#endif
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+#define MAX(a, b) ((a) > (b))? (a) : (b)
+#define MIN(a, b) ((a) < (b))? (a) : (b)
+
+#define ALIGN16(x) ((((x) + 15) >> 4) << 4)
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+#define DEFAULT_NUM_INPUT_BUFS 1
+#define DEFAULT_MAX_INPUT_BUFS 32
+
+#define DEFAULT_NUM_OUTPUT_BUFS 1
+#define DEFAULT_MAX_OUTPUT_BUFS 32
+
+#define DEFAULT_NUM_RECON_BUFS 1
+#define DEFAULT_MAX_RECON_BUFS DEFAULT_NUM_RECON_BUFS
+
+
+#define LEN_STATUS_BUFFER (10 * 1024)
+#define MAX_VBV_BUFF_SIZE (120 * 16384)
+#define MAX_NUM_IO_BUFS 3
+
+#define DEFAULT_MAX_REF_FRM 1
+#define DEFAULT_MAX_REORDER_FRM 0
+#define DEFAULT_QP_MIN 0
+#define DEFAULT_QP_MAX 51
+#define DEFAULT_MAX_BITRATE 20000000
+#define DEFAULT_MAX_SRCH_RANGE_X 256
+#define DEFAULT_MAX_SRCH_RANGE_Y 256
+#define DEFAULT_MAX_FRAMERATE 120000
+#define DEFAULT_NUM_CORES 1
+#define DEFAULT_NUM_CORES_PRE_ENC 0
+#define DEFAULT_FPS 30
+#define DEFAULT_ENC_SPEED 100
+
+#define DEFAULT_MEM_REC_CNT 0
+#define DEFAULT_RECON_ENABLE 0
+#define DEFAULT_CHKSUM_ENABLE 0
+#define DEFAULT_START_FRM 0
+#define DEFAULT_NUM_FRMS 0xFFFFFFFF
+#define DEFAULT_INP_COLOR_FMT IV_YUV_420SP_UV
+#define DEFAULT_RECON_COLOR_FMT IV_YUV_420P
+#define DEFAULT_LOOPBACK 0
+#define DEFAULT_SRC_FRAME_RATE 30
+#define DEFAULT_TGT_FRAME_RATE 30
+#define DEFAULT_MAX_WD 1920
+#define DEFAULT_MAX_HT 1920
+#define DEFAULT_MAX_LEVEL 50
+#define DEFAULT_STRIDE 0
+#define DEFAULT_WD 0
+#define DEFAULT_HT 0
+#define DEFAULT_PSNR_ENABLE 0
+#define DEFAULT_ME_SPEED 100
+#define DEFAULT_ENABLE_FAST_SAD 0
+#define DEFAULT_ENABLE_ALT_REF 0
+#define DEFAULT_RC 1
+#define DEFAULT_BITRATE 6000000
+#define DEFAULT_I_QP 25
+#define DEFAULT_I_QP_MAX DEFAULT_QP_MAX
+#define DEFAULT_I_QP_MIN 0
+#define DEFAULT_P_QP 28
+#define DEFAULT_P_QP_MAX DEFAULT_QP_MAX
+#define DEFAULT_P_QP_MIN 0
+#define DEFAULT_B_QP 28
+#define DEFAULT_B_QP_MAX DEFAULT_QP_MAX
+#define DEFAULT_B_QP_MIN 0
+#define DEFAULT_AIR 0
+#define DEFAULT_AIR_REFRESH_PERIOD 30
+#define DEFAULT_SRCH_RNG_X 64
+#define DEFAULT_SRCH_RNG_Y 48
+#define DEFAULT_I_INTERVAL 30
+#define DEFAULT_IDR_INTERVAL 1000
+#define DEFAULT_B_FRAMES 0
+#define DEFAULT_DISABLE_DEBLK_LEVEL 0
+#define DEFAULT_HPEL 1
+#define DEFAULT_QPEL 1
+#define DEFAULT_I4 1
+#define DEFAULT_EPROFILE IV_PROFILE_BASE
+#define DEFAULT_SLICE_MODE 0
+#define DEFAULT_SLICE_PARAM 256
+#define DEFAULT_ARCH ARCH_ARM_A9Q
+#define STRLENGTH 500
+
+
+/*****************************************************************************/
+/* profile Macros */
+/*****************************************************************************/
+#ifdef PROFILE_ENABLE
+ #ifdef WINDOWS_TIMER
+ typedef LARGE_INTEGER TIMER;
+ #else
+ //#ifdef X86_MINGW
+ typedef struct timeval TIMER;
+ //#endif
+ #endif
+#endif
+
+#ifdef PROFILE_ENABLE
+ #ifdef WINDOWS_TIMER
+ #define GETTIME(timer) QueryPerformanceCounter(timer);
+ #else
+ //#ifdef X86_MINGW
+ #define GETTIME(timer) gettimeofday(timer,NULL);
+ //#endif
+ #endif
+
+ #ifdef WINDOWS_TIMER
+ #define ELAPSEDTIME(s_start_timer,s_end_timer, s_elapsed_time, frequency) \
+ { \
+ TIMER s_temp_time; \
+ s_temp_time.LowPart = s_end_timer.LowPart - s_start_timer.LowPart ; \
+ s_elapsed_time = (UWORD32) ( ((DOUBLE)s_temp_time.LowPart / (DOUBLE)frequency.LowPart ) * 1000000); \
+ }
+ #else
+ //#ifdef X86_MINGW
+ #define ELAPSEDTIME(s_start_timer,s_end_timer, s_elapsed_time, frequency) \
+ s_elapsed_time = ((s_end_timer.tv_sec - s_start_timer.tv_sec) * 1000000) + (s_end_timer.tv_usec - s_start_timer.tv_usec);
+ //#endif
+ #endif
+
+#else
+ #define GETTIME(timer)
+ #define ELAPSEDTIME(s_start_timer,s_end_timer, s_elapsed_time, frequency)
+#endif
+
+
+/*****************************************************************************/
+/* Structure definitions */
+/*****************************************************************************/
+typedef struct
+{
+ UWORD8 *pu1_buf;
+ UWORD32 u4_buf_size;
+ UWORD32 u4_timestamp_low;
+ UWORD32 u4_timestamp_high;
+ UWORD32 u4_is_free;
+ void *pv_mb_info;
+ void *pv_pic_info;
+}input_buf_t;
+
+typedef struct
+{
+ UWORD8 *pu1_buf;
+ UWORD32 u4_buf_size;
+ UWORD32 u4_timestamp_low;
+ UWORD32 u4_timestamp_high;
+ UWORD32 u4_is_free;
+}output_buf_t;
+
+typedef struct
+{
+ UWORD8 *pu1_buf;
+ UWORD32 u4_buf_size;
+ UWORD32 u4_timestamp_low;
+ UWORD32 u4_timestamp_high;
+ UWORD32 u4_is_free;
+}recon_buf_t;
+
+typedef struct
+{
+ iv_obj_t *ps_enc;
+ iv_mem_rec_t *ps_mem_rec;
+ UWORD32 u4_num_mem_rec;
+ UWORD32 u4_recon_enable;
+ UWORD32 u4_chksum_enable;
+ UWORD32 u4_mb_info_type;
+ UWORD32 u4_pic_info_type;
+ UWORD32 u4_mb_info_size;
+ UWORD32 u4_pic_info_size;
+ UWORD32 u4_start_frm;
+ UWORD32 u4_max_num_frms;
+ UWORD32 u4_total_bytes;
+ UWORD32 u4_pics_cnt;
+ IV_COLOR_FORMAT_T e_inp_color_fmt;
+ IV_COLOR_FORMAT_T e_recon_color_fmt;
+ IV_ARCH_T e_arch;
+ IV_SOC_T e_soc;
+
+ WORD32 header_generated;
+ void *pv_codec_obj;
+
+ UWORD32 u4_num_cores;
+ UWORD32 u4_pre_enc_me;
+ UWORD32 u4_pre_enc_ipe;
+ CHAR ac_ip_fname[STRLENGTH];
+ CHAR ac_op_fname[STRLENGTH];
+ CHAR ac_recon_fname[STRLENGTH];
+ CHAR ac_chksum_fname[STRLENGTH];
+ CHAR ac_mb_info_fname[STRLENGTH];
+ CHAR ac_pic_info_fname[STRLENGTH];
+
+
+ FILE *fp_ip;
+ FILE *fp_op;
+ FILE *fp_recon;
+ FILE *fp_chksum;
+ FILE *fp_psnr_ip;
+ FILE *fp_mb_info;
+ FILE *fp_pic_info;
+ FILE *fp_dump_op;
+
+
+ UWORD32 u4_loopback;
+ UWORD32 u4_max_frame_rate;
+ UWORD32 u4_src_frame_rate;
+ UWORD32 u4_tgt_frame_rate;
+ UWORD32 u4_max_wd;
+ UWORD32 u4_max_ht;
+ UWORD32 u4_max_level;
+
+ UWORD32 u4_strd;
+
+ UWORD32 u4_wd;
+ UWORD32 u4_ht;
+
+ UWORD32 u4_psnr_enable;
+
+
+ UWORD32 u4_enc_speed;
+ UWORD32 u4_me_speed;
+ UWORD32 u4_enable_fast_sad;
+ UWORD32 u4_enable_alt_ref;
+ UWORD32 u4_rc;
+ UWORD32 u4_max_bitrate;
+ UWORD32 u4_bitrate;
+ UWORD32 u4_i_qp,u4_i_qp_max,u4_i_qp_min;
+ UWORD32 u4_p_qp,u4_p_qp_max,u4_p_qp_min;
+ UWORD32 u4_b_qp,u4_b_qp_max,u4_b_qp_min;
+ UWORD32 u4_air;
+ UWORD32 u4_air_refresh_period;
+ UWORD32 u4_srch_rng_x;
+ UWORD32 u4_srch_rng_y;
+ UWORD32 u4_i_interval;
+ UWORD32 u4_idr_interval;
+ UWORD32 u4_b_frames;
+ UWORD32 u4_disable_deblk_level;
+ UWORD32 u4_hpel;
+ UWORD32 u4_qpel;
+ UWORD32 u4_enable_intra_4x4;
+ IV_PROFILE_T e_profile;
+
+ UWORD32 u4_slice_mode;
+ UWORD32 u4_slice_param;
+
+ void *pv_input_thread_handle;
+ void *pv_output_thread_handle;
+ void *pv_recon_thread_handle;
+
+ ih264e_ctl_getbufinfo_op_t s_get_buf_info_op;
+ input_buf_t as_input_buf[DEFAULT_MAX_INPUT_BUFS];
+ output_buf_t as_output_buf[DEFAULT_MAX_OUTPUT_BUFS];
+ recon_buf_t as_recon_buf[DEFAULT_MAX_RECON_BUFS];
+
+ DOUBLE adbl_psnr[3];
+ UWORD32 u4_psnr_cnt;
+ UWORD8 *pu1_psnr_buf;
+ UWORD8 u4_psnr_buf_size;
+
+ UWORD32 u4_vbv_buffer_delay;
+ UWORD32 u4_vbv_buf_size;
+
+ TIMER enc_start_time;
+ TIMER enc_last_time;
+ WORD32 avg_time;
+
+
+} app_ctxt_t;
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+void codec_exit(CHAR *pc_err_message);
+void allocate_input(app_ctxt_t *ps_app_ctxt);
+void allocate_output(app_ctxt_t *ps_app_ctxt);
+void allocate_recon(app_ctxt_t *ps_app_ctxt);
+
+IV_STATUS_T read_input(FILE *fp, iv_raw_buf_t *ps_raw_buf);
+IV_STATUS_T write_recon(FILE *fp, iv_raw_buf_t *ps_raw_buf);
+IV_STATUS_T write_output(FILE *fp, UWORD8 *pu1_buf, WORD32 num_bytes);
+
+IV_STATUS_T read_mb_info(app_ctxt_t *ps_app_ctxt, void *pv_mb_info);
+IV_STATUS_T read_pic_info(app_ctxt_t *ps_app_ctxt, void *pv_pic_info);
+
+void * ih264a_aligned_malloc(WORD32 alignment, WORD32 size);
+void ih264a_aligned_free(void *pv_buf);
+
+void free_input(app_ctxt_t *ps_app_ctxt);
+void free_recon(app_ctxt_t *ps_app_ctxt);
+void free_output(app_ctxt_t *ps_app_ctxt);
+
+void init_raw_buf_descr(app_ctxt_t *ps_app_ctxt, iv_raw_buf_t *ps_raw_buf, UWORD8 *pu1_buf, IV_COLOR_FORMAT_T e_color_fmt);
+
+#ifndef MD5_DISABLE
+void calc_md5_cksum(UWORD8 *pu1_inbuf,UWORD32 u4_stride,UWORD32 u4_width,UWORD32 u4_height,UWORD8 *pu1_cksum_p );
+#else
+#define calc_md5_cksum(a, b, c, d, e)
+#endif
+
+#endif /* _APP_H_ */
diff --git a/test/encoder/input.c b/test/encoder/input.c
new file mode 100755
index 0000000..aa52b45
--- /dev/null
+++ b/test/encoder/input.c
@@ -0,0 +1,312 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <sys/time.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "app.h"
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+
+/*****************************************************************************/
+/* Macros */
+/*****************************************************************************/
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+IV_STATUS_T read_pic_info(app_ctxt_t *ps_app_ctxt, void *pv_pic_info)
+{
+ IV_STATUS_T ret = IV_SUCCESS;
+ WORD32 size, bytes;
+
+ switch(ps_app_ctxt->u4_pic_info_type)
+ {
+ case 1:
+ size = sizeof(ih264e_pic_info1_t);
+ ps_app_ctxt->u4_pic_info_size = sizeof(ih264e_pic_info1_t);
+ break;
+ case 2:
+ size = sizeof(ih264e_pic_info2_t);
+ ps_app_ctxt->u4_pic_info_size = sizeof(ih264e_pic_info2_t);
+ break;
+ default:
+ size = 0;
+ break;
+ }
+
+ bytes = fread(pv_pic_info, 1, size, ps_app_ctxt->fp_pic_info);
+ if(bytes != size)
+ ret = IV_FAIL;
+
+ return ret;
+}
+
+IV_STATUS_T read_mb_info(app_ctxt_t *ps_app_ctxt, void *pv_mb_info)
+{
+ IV_STATUS_T ret = IV_SUCCESS;
+ WORD32 num_mbs;
+ WORD32 size;
+ WORD32 bytes;
+
+ num_mbs = ALIGN16(ps_app_ctxt->u4_wd) * ALIGN16(ps_app_ctxt->u4_ht);
+ num_mbs /= 256;
+
+ switch(ps_app_ctxt->u4_mb_info_type)
+ {
+ case 1:
+ size = sizeof(ih264e_mb_info1_t) * num_mbs;
+ ps_app_ctxt->u4_mb_info_size = sizeof(ih264e_mb_info1_t);
+ break;
+ case 2:
+ size = sizeof(ih264e_mb_info2_t) * num_mbs;
+ ps_app_ctxt->u4_mb_info_size = sizeof(ih264e_mb_info2_t);
+ break;
+ case 3:
+ size = sizeof(ih264e_mb_info3_t) * num_mbs;
+ ps_app_ctxt->u4_mb_info_size = sizeof(ih264e_mb_info3_t);
+ break;
+ case 4:
+ size = sizeof(ih264e_mb_info4_t) * num_mbs;
+ ps_app_ctxt->u4_mb_info_size = sizeof(ih264e_mb_info4_t);
+ break;
+ default:
+ size = 0;
+ break;
+ }
+
+ bytes = fread(pv_mb_info, 1, size, ps_app_ctxt->fp_mb_info);
+ if(bytes != size)
+ ret = IV_FAIL;
+
+ return ret;
+}
+
+IV_STATUS_T read_input(FILE *fp, iv_raw_buf_t *ps_raw_buf)
+{
+ WORD32 bytes;
+ WORD32 wd, ht, strd;
+ UWORD8 *pu1_buf;
+ WORD32 i;
+ WORD32 comp;
+ WORD32 num_comp;
+
+ if (IV_YUV_422ILE == ps_raw_buf->e_color_fmt)
+ {
+ wd = ps_raw_buf->au4_wd[0];
+ ht = ps_raw_buf->au4_ht[0];
+ strd = ps_raw_buf->au4_strd[0];
+ pu1_buf = ps_raw_buf->apv_bufs[0];
+
+ for(i = 0; i < ht; i++)
+ {
+ bytes = fread(pu1_buf, sizeof(UWORD8), wd, fp);
+ if(bytes != wd )
+ {
+ return(IV_FAIL);
+ }
+ pu1_buf += strd;
+ }
+ }
+ else
+ {
+ num_comp = 2;
+
+ if(IV_YUV_420P == ps_raw_buf->e_color_fmt)
+ num_comp = 3;
+
+ for(comp = 0; comp < num_comp; comp++)
+ {
+ wd = ps_raw_buf->au4_wd[comp];
+ ht = ps_raw_buf->au4_ht[comp];
+ strd = ps_raw_buf->au4_strd[comp];
+ pu1_buf = ps_raw_buf->apv_bufs[comp];
+
+ for(i = 0; i < ht; i++)
+ {
+ bytes = fread(pu1_buf, sizeof(UWORD8), wd, fp);
+ if(bytes != wd)
+ {
+ return(IV_FAIL);
+ }
+ pu1_buf += strd;
+ }
+ }
+ }
+ return IV_SUCCESS;
+}
+
+
+IV_STATUS_T dump_input(FILE *fp, iv_raw_buf_t *ps_raw_buf)
+{
+ WORD32 bytes;
+ WORD32 wd, ht, strd;
+ UWORD8 *pu1_buf;
+ WORD32 i;
+ WORD32 comp;
+ WORD32 num_comp;
+
+ if (IV_YUV_422ILE == ps_raw_buf->e_color_fmt)
+ {
+ wd = ps_raw_buf->au4_wd[0];
+ ht = ps_raw_buf->au4_ht[0];
+ strd = ps_raw_buf->au4_strd[0];
+ pu1_buf = ps_raw_buf->apv_bufs[0];
+
+ for(i = 0; i < ht; i++)
+ {
+ bytes = fwrite(pu1_buf, sizeof(UWORD8), wd, fp);
+ if(bytes != wd )
+ {
+ return(IV_FAIL);
+ }
+ pu1_buf += strd;
+ }
+ }
+ else
+ {
+ num_comp = 2;
+
+ if(IV_YUV_420P == ps_raw_buf->e_color_fmt)
+ num_comp = 3;
+
+ for(comp = 0; comp < num_comp; comp++)
+ {
+ wd = ps_raw_buf->au4_wd[comp];
+ ht = ps_raw_buf->au4_ht[comp];
+ strd = ps_raw_buf->au4_strd[comp];
+ pu1_buf = ps_raw_buf->apv_bufs[comp];
+
+ for(i = 0; i < ht; i++)
+ {
+ bytes = fwrite(pu1_buf, sizeof(UWORD8), wd, fp);
+ if(bytes != wd)
+ {
+ return(IV_FAIL);
+ }
+ pu1_buf += strd;
+ }
+ }
+ }
+ return IV_SUCCESS;
+}
+
+void allocate_input(app_ctxt_t *ps_app_ctxt)
+{
+
+ WORD32 num_bufs;
+ WORD32 pic_size;
+ WORD32 luma_size;
+ WORD32 chroma_size;
+ WORD32 num_mbs;
+ WORD32 i;
+ UWORD8 *pu1_buf[3];
+
+ ih264e_ctl_getbufinfo_op_t *ps_get_buf_info_op = &ps_app_ctxt->s_get_buf_info_op;
+
+ num_bufs = MAX(DEFAULT_NUM_INPUT_BUFS, ps_get_buf_info_op->s_ive_op.u4_min_inp_bufs);
+ num_bufs = MIN(DEFAULT_MAX_INPUT_BUFS, num_bufs);
+
+ /* Size of buffer */
+ luma_size = ps_get_buf_info_op->s_ive_op.au4_min_in_buf_size[0];
+ chroma_size = ps_get_buf_info_op->s_ive_op.au4_min_in_buf_size[1]+
+ ps_get_buf_info_op->s_ive_op.au4_min_in_buf_size[2];
+
+ pic_size = luma_size + chroma_size;
+
+ num_mbs = ALIGN16(ps_app_ctxt->u4_max_wd) * ALIGN16(ps_app_ctxt->u4_max_ht);
+ num_mbs /= 256;
+
+ /* Memset the input buffer array to set is_free to 0 */
+ memset(ps_app_ctxt->as_input_buf, 0, sizeof(input_buf_t) * DEFAULT_MAX_INPUT_BUFS);
+
+ for(i = 0; i < num_bufs; i++)
+ {
+ pu1_buf[0] = (UWORD8 *)ih264a_aligned_malloc(16, pic_size);
+ if(NULL == pu1_buf[0])
+ {
+ CHAR ac_error[STRLENGTH];
+ sprintf(ac_error, "Allocation failed for input buffer of size %d\n",
+ pic_size);
+ codec_exit(ac_error);
+ }
+ ps_app_ctxt->as_input_buf[i].pu1_buf = pu1_buf[0];
+
+ pu1_buf[0] = (UWORD8 *)ih264a_aligned_malloc(16, num_mbs * sizeof(ih264e_mb_info_t));
+ if(NULL == pu1_buf[0])
+ {
+ CHAR ac_error[STRLENGTH];
+ sprintf(ac_error, "Allocation failed for mb info buffer of size %d\n",
+ (WORD32)(num_mbs * sizeof(ih264e_mb_info_t)));
+ codec_exit(ac_error);
+ }
+ ps_app_ctxt->as_input_buf[i].pv_mb_info = pu1_buf[0];
+ pu1_buf[0] = (UWORD8 *)ih264a_aligned_malloc(16, sizeof(ih264e_pic_info2_t));
+ if(NULL == pu1_buf[0])
+ {
+ CHAR ac_error[STRLENGTH];
+ sprintf(ac_error, "Allocation failed for pic info buffer of size %d\n",
+ (WORD32) sizeof(ih264e_pic_info2_t));
+ codec_exit(ac_error);
+ }
+ ps_app_ctxt->as_input_buf[i].pv_pic_info = pu1_buf[0];
+ ps_app_ctxt->as_input_buf[i].u4_buf_size = pic_size;
+ ps_app_ctxt->as_input_buf[i].u4_is_free = 1;
+ }
+ return;
+}
+
+
+void free_input(app_ctxt_t *ps_app_ctxt)
+{
+
+ WORD32 num_bufs;
+ WORD32 i;
+
+ num_bufs = MAX(DEFAULT_NUM_INPUT_BUFS, ps_app_ctxt->s_get_buf_info_op.s_ive_op.u4_min_inp_bufs);
+ num_bufs = MIN(DEFAULT_MAX_INPUT_BUFS, num_bufs);
+
+ for(i = 0; i < num_bufs; i++)
+ {
+ ih264a_aligned_free(ps_app_ctxt->as_input_buf[i].pu1_buf);
+ ih264a_aligned_free(ps_app_ctxt->as_input_buf[i].pv_mb_info);
+ ih264a_aligned_free(ps_app_ctxt->as_input_buf[i].pv_pic_info);
+ }
+ return;
+}
+
diff --git a/test/encoder/main.c b/test/encoder/main.c
new file mode 100755
index 0000000..b02958a
--- /dev/null
+++ b/test/encoder/main.c
@@ -0,0 +1,2512 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <assert.h>
+#include <string.h>
+#include <sys/time.h>
+#include <malloc.h>
+
+#ifdef WINDOWS_TIMER
+#include "windows.h"
+#endif
+/* User include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "app.h"
+#include "psnr.h"
+
+/* Function declarations */
+#ifndef MD5_DISABLE
+void calc_md5_cksum(UWORD8 *pu1_inbuf,UWORD32 u4_stride,UWORD32 u4_width,UWORD32 u4_height,UWORD8 *pu1_cksum_p );
+#else
+#define calc_md5_cksum(a, b, c, d, e)
+#endif
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum
+{
+ INVALID,
+ HELP,
+ INPUT_FILE,
+ OUTPUT_FILE,
+ RECON_FILE,
+ RECON_ENABLE,
+ CHKSUM_ENABLE,
+ CHKSUM_FILE,
+ INPUT_CHROMA_FORMAT,
+ RECON_CHROMA_FORMAT,
+ MAX_WD,
+ MAX_HT,
+ WD,
+ HT,
+ MAX_LEVEL,
+ ENC_SPEED,
+ ME_SPEED,
+ START_FRM,
+ NUM_FRMS,
+ MAX_FRAMERATE,
+ SRC_FRAMERATE,
+ TGT_FRAMERATE,
+ RC,
+ MAX_BITRATE,
+ BITRATE,
+ I_QP,
+ P_QP,
+ B_QP,
+ I_QP_MAX,
+ P_QP_MAX,
+ B_QP_MAX,
+ I_QP_MIN,
+ P_QP_MIN,
+ B_QP_MIN,
+ AIR,
+ AIR_REFRESH_PERIOD,
+ ARCH,
+ SOC,
+ NUMCORES,
+ PRE_ENC_ME,
+ PRE_ENC_IPE,
+ HPEL,
+ QPEL,
+ SRCH_RNG_X,
+ SRCH_RNG_Y,
+ I_INTERVAL,
+ IDR_INTERVAL,
+ B_FRMS,
+ DISABLE_DBLK,
+ PROFILE,
+ FAST_SAD,
+ ALT_REF,
+ DISABLE_DEBLOCK_LEVEL,
+ PSNR,
+ SLICE_MODE,
+ SLICE_PARAM,
+ CONFIG,
+ LOOPBACK,
+ VBV_DELAY,
+ VBV_SIZE,
+ INTRA_4x4_ENABLE,
+ MB_INFO_FILE,
+ MB_INFO_TYPE,
+ PIC_INFO_FILE,
+ PIC_INFO_TYPE,
+} ARGUMENT_T;
+
+typedef struct
+{
+ CHAR argument_shortname[8];
+ CHAR argument_name[128];
+ ARGUMENT_T argument;
+ CHAR description[512];
+} argument_t;
+
+static const argument_t argument_mapping[] =
+ {
+ { "--", "--help", HELP, "Print this help\n" },
+ { "-i", "--input", INPUT_FILE, "Input file\n" },
+ { "-o", "--output", OUTPUT_FILE, "Output file\n" },
+ { "--", "--recon_enable", RECON_ENABLE, "Recon enable flag\n" },
+ { "-r", "--recon", RECON_FILE, "Recon file \n" },
+ { "--", "--input_chroma_format", INPUT_CHROMA_FORMAT,
+ "Input Chroma format Supported values YUV_420P, YUV_420SP_UV, YUV_420SP_VU\n" },
+ { "--", "--recon_chroma_format", RECON_CHROMA_FORMAT,
+ "Recon Chroma format Supported values YUV_420P, YUV_420SP_UV, YUV_420SP_VU\n" },
+ { "-w", "--width", WD, "Width of input file\n" },
+ { "-h", "--height", HT, "Height file\n" },
+ { "--", "--start_frame", START_FRM, "Starting frame number\n" },
+ { "-f", "--num_frames", NUM_FRMS, "Number of frames to be encoded\n" },
+ { "--", "--rc", RC, "Rate control mode 0: Constant Qp, 1: Storage, 2: CBR non low delay, 3: CBR low delay \n" },
+ { "--", "--max_framerate", MAX_FRAMERATE, "Maximum frame rate \n" },
+ { "--", "--tgt_framerate", TGT_FRAMERATE, "Target frame rate \n" },
+ { "--", "--src_framerate", SRC_FRAMERATE, "Source frame rate \n" },
+ { "--", "--i_interval", I_INTERVAL, "Intra frame interval \n" },
+ { "--", "--idr_interval", IDR_INTERVAL, "IDR frame interval \n" },
+ { "--", "--bframes", B_FRMS, "Consecutive B frames \n" },
+ { "--", "--speed", ENC_SPEED, "Encoder speed preset 0 (slowest) and 100 (fastest)\n" },
+ { "--", "--me_speed", ME_SPEED, "Encoder speed preset 0 (slowest) and 100 (fastest)\n" },
+ { "--", "--fast_sad", FAST_SAD, " Flag for faster sad execution\n" },
+ { "--", "--alt_ref", ALT_REF , "Flag to enable alternate refernce frames"},
+ { "--", "--hpel", HPEL, "Flag to enable/disable Quarter pel estimation \n" },
+ { "--", "--qpel", QPEL, "Flag to enable/disable Quarter pel estimation \n" },
+ { "--", "--disable_deblock_level", DISABLE_DEBLOCK_LEVEL,
+ "Disable deblock level - 0 : Enables deblock completely, 1: enables for I and 8th frame , 2: Enables for I only, 3 : disables completely\n" },
+ { "--", "--search_range_x", SRCH_RNG_X, "Search range for X \n" },
+ { "--", "--search_range_y", SRCH_RNG_Y, "Search range for Y \n" },
+ { "--", "--psnr", PSNR, "Enable PSNR computation (Disable while benchmarking performance) \n" },
+ { "--", "--pre_enc_me", PRE_ENC_ME, "Flag to enable/disable Pre Enc Motion Estimation\n" },
+ { "--", "--pre_enc_ipe", PRE_ENC_IPE, "Flag to enable/disable Pre Enc Intra prediction Estimation\n" },
+ { "-n", "--num_cores", NUMCORES, "Number of cores to be used\n" },
+ { "--", "--adaptive_intra_refresh", AIR ,"Adaptive Intra Refresh enable/disable\n"},
+ { "--", "--air_refresh_period", AIR_REFRESH_PERIOD,"adaptive intra refresh period\n"},
+ { "--", "--slice", SLICE_MODE, "Slice mode- 0 :No slice, 1: Bytes per slice, 2: MB/CTB per slice \n" },
+ { "--", "--slice_param", SLICE_PARAM, "Slice param value based on slice mode. Slice mode of 1 implies number of bytes per slice, 2 implies number of MBs/CTBs, for 0 value is neglected \n" },
+ { "--", "--max_wd", MAX_WD, "Maximum width (Default: 1920) \n" },
+ { "--", "--max_ht", MAX_HT, "Maximum height (Default: 1088)\n" },
+ { "--", "--max_level", MAX_LEVEL, "Maximum Level (Default: 50)\n" },
+ { "--", "--arch", ARCH, "Set Architecture. Supported values ARM_NONEON, ARM_A9Q, ARM_A7, ARM_A5, ARM_NEONINTR, X86_GENERIC, X86_SSSE3, X86_SSE4 \n" },
+ { "--", "--soc", SOC, "Set SOC. Supported values GENERIC, HISI_37X \n" },
+ { "--", "--chksum", CHKSUM_FILE, "Save Check sum file for recon data\n" },
+ { "--", "--chksum_enable", CHKSUM_ENABLE, "Recon MD5 Checksum file\n"},
+ { "-c", "--config", CONFIG, "config file (Default: enc.cfg)\n" },
+ { "--", "--loopback", LOOPBACK, "Enable encoding in a loop\n" },
+ { "--", "--profile", PROFILE, "Profile mode: Supported values BASE, MAIN, HIGH\n" },
+ { "--", "--max_bitrate", MAX_BITRATE, "Max bitrate\n"},
+ { "--", "--bitrate", BITRATE, "Target bitrate\n"},
+ { "--", "--qp_i", I_QP, "QP for I frames\n"},
+ { "--", "--qp_p", P_QP, "QP for P frames\n"},
+ { "--", "--qp_b", B_QP, "QP for B frames\n"},
+ { "--", "--qp_i_max", I_QP_MAX, "Max QP for I frames\n"},
+ { "--", "--qp_p_max", P_QP_MAX, "Max QP for P frames\n"},
+ { "--", "--qp_b_max", B_QP_MAX, "Max QP for B frames\n"},
+ { "--", "--qp_i_min", I_QP_MIN, "Min QP for I frames\n"},
+ { "--", "--qp_p_min", P_QP_MIN, "Min QP for P frames\n"},
+ { "--", "--qp_b_min", B_QP_MIN, "Min QP for B frames\n"},
+ { "--", "--vbv_delay", VBV_DELAY, "VBV buffer delay\n"},
+ { "--", "--vbv_size", VBV_SIZE, "VBV buffer size\n"},
+ { "-i4", "--intra_4x4_enable", INTRA_4x4_ENABLE, "Intra 4x4 enable \n" },
+ { "--", "--mb_info_file", MB_INFO_FILE, "MB info file\n"},
+ { "--", "--mb_info_type", MB_INFO_TYPE, "MB info type\n"},
+ { "--", "--pic_info_file", PIC_INFO_FILE, "Pic info file\n"},
+ { "--", "--pic_info_type", PIC_INFO_TYPE, "Pic info type\n"},
+ };
+
+
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+
+
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+
+#if(defined X86) && (defined X86_MINGW)
+/*****************************************************************************/
+/* Function to print library calls */
+/*****************************************************************************/
+/*****************************************************************************/
+/* */
+/* Function Name : memalign */
+/* */
+/* Description : Returns malloc data. Ideally should return aligned memory*/
+/* support alignment will be added later */
+/* */
+/* Inputs : alignment */
+/* size */
+/* Globals : */
+/* Processing : */
+/* */
+/* Outputs : */
+/* Returns : */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 100189 Initial Version */
+/* */
+/*****************************************************************************/
+
+void * ih264a_aligned_malloc(WORD32 alignment, WORD32 size)
+{
+ return _aligned_malloc(size, alignment);
+}
+
+void ih264a_aligned_free(void *pv_buf)
+{
+ _aligned_free(pv_buf);
+ return;
+}
+
+#elif IOS
+
+void * ih264a_aligned_malloc(WORD32 alignment, WORD32 size)
+{
+ return malloc(size);
+}
+
+void ih264a_aligned_free(void *pv_buf)
+{
+ free(pv_buf);
+ return;
+}
+
+#else
+
+void * ih264a_aligned_malloc(WORD32 alignment, WORD32 size)
+{
+ return memalign(alignment, size);
+}
+
+void ih264a_aligned_free(void *pv_buf)
+{
+ free(pv_buf);
+ return;
+}
+
+#endif
+
+/*****************************************************************************/
+/* */
+/* Function Name : codec_exit */
+/* */
+/* Description : handles unrecoverable errors */
+/* Inputs : Error message */
+/* Globals : None */
+/* Processing : Prints error message to console and exits. */
+/* Outputs : Error message to the console */
+/* Returns : None */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 07 06 2006 Sankar Creation */
+/* */
+/*****************************************************************************/
+void codec_exit(CHAR *pc_err_message)
+{
+ printf("%s\n", pc_err_message);
+ exit(-1);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : codec_exit */
+/* */
+/* Description : handles unrecoverable errors */
+/* Inputs : Error message */
+/* Globals : None */
+/* Processing : Prints error message to console and exits. */
+/* Outputs : Error mesage to the console */
+/* Returns : None */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 07 06 2006 Sankar Creation */
+/* */
+/*****************************************************************************/
+IV_COLOR_FORMAT_T get_chroma_fmt(CHAR *value)
+{
+ IV_COLOR_FORMAT_T e_chroma_format;
+ if((strcmp(value, "YUV_420P")) == 0)
+ e_chroma_format = IV_YUV_420P;
+ else if((strcmp(value, "YUV_422ILE")) == 0)
+ e_chroma_format = IV_YUV_422ILE;
+ else if((strcmp(value, "RGB_565")) == 0)
+ e_chroma_format = IV_RGB_565;
+ else if((strcmp(value, "RGBA_8888")) == 0)
+ e_chroma_format = IV_RGBA_8888;
+ else if((strcmp(value, "YUV_420SP_UV")) == 0)
+ e_chroma_format = IV_YUV_420SP_UV;
+ else if((strcmp(value, "YUV_420SP_VU")) == 0)
+ e_chroma_format = IV_YUV_420SP_VU;
+ else
+ {
+ printf("\nInvalid colour format setting it to IV_YUV_420P\n");
+ e_chroma_format = IV_YUV_420P;
+ }
+ return e_chroma_format;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : codec_exit */
+/* */
+/* Description : handles unrecoverable errors */
+/* Inputs : Error message */
+/* Globals : None */
+/* Processing : Prints error message to console and exits. */
+/* Outputs : Error mesage to the console */
+/* Returns : None */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 07 06 2006 Sankar Creation */
+/* */
+/*****************************************************************************/
+IVE_SPEED_CONFIG get_speed_preset(CHAR *value)
+{
+ IVE_SPEED_CONFIG e_enc_speed_preset;
+ if((strcmp(value, "CONFIG")) == 0)
+ e_enc_speed_preset = IVE_CONFIG;
+ else if((strcmp(value, "SLOWEST")) == 0)
+ e_enc_speed_preset = IVE_SLOWEST;
+ else if((strcmp(value, "NORMAL")) == 0)
+ e_enc_speed_preset = IVE_NORMAL;
+ else if((strcmp(value, "FAST")) == 0)
+ e_enc_speed_preset = IVE_FAST;
+ else if((strcmp(value, "HIGH_SPEED")) == 0)
+ e_enc_speed_preset = IVE_HIGH_SPEED;
+ else if((strcmp(value, "FASTEST")) == 0)
+ e_enc_speed_preset = IVE_FASTEST;
+ else
+ {
+ printf("\nInvalid speed preset, setting it to IVE_FASTEST\n");
+ e_enc_speed_preset = IVE_FASTEST;
+ }
+ return e_enc_speed_preset;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : print_usage */
+/* */
+/* Description : Prints argument format */
+/* */
+/* */
+/* Inputs : */
+/* Globals : */
+/* Processing : Prints argument format */
+/* */
+/* Outputs : */
+/* Returns : */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 100189 Initial Version */
+/* */
+/*****************************************************************************/
+
+void print_usage(void)
+{
+ WORD32 i = 0;
+ WORD32 num_entries = sizeof(argument_mapping) / sizeof(argument_t);
+ printf("\nUsage:\n");
+ while(i < num_entries)
+ {
+ printf("%-32s\t %s", argument_mapping[i].argument_name,
+ argument_mapping[i].description);
+ i++;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : get_argument */
+/* */
+/* Description : Gets argument for a given string */
+/* */
+/* */
+/* Inputs : name */
+/* Globals : */
+/* Processing : Searches the given string in the array and returns */
+/* appropriate argument ID */
+/* */
+/* Outputs : Argument ID */
+/* Returns : Argument ID */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 100189 Initial Version */
+/* */
+/*****************************************************************************/
+ARGUMENT_T get_argument(CHAR *name)
+{
+ WORD32 i = 0;
+ WORD32 num_entries = sizeof(argument_mapping) / sizeof(argument_t);
+ while(i < num_entries)
+ {
+ if((0 == strcmp(argument_mapping[i].argument_name, name)) ||
+ ((0 == strcmp(argument_mapping[i].argument_shortname, name)) &&
+ (0 != strcmp(argument_mapping[i].argument_shortname, "--"))))
+ {
+ return argument_mapping[i].argument;
+ }
+ i++;
+ }
+ return INVALID;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : get_argument */
+/* */
+/* Description : Gets argument for a given string */
+/* */
+/* */
+/* Inputs : name */
+/* Globals : */
+/* Processing : Searches the given string in the array and returns */
+/* appropriate argument ID */
+/* */
+/* Outputs : Argument ID */
+/* Returns : Argument ID */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 100189 Initial Version */
+/* */
+/*****************************************************************************/
+void parse_argument(app_ctxt_t *ps_app_ctxt, CHAR *argument, CHAR *value)
+{
+ ARGUMENT_T arg;
+
+ arg = get_argument(argument);
+ switch(arg)
+ {
+ case HELP:
+ print_usage();
+ exit(-1);
+ break;
+ case SLICE_MODE:
+ sscanf(value, "%d", &ps_app_ctxt->u4_slice_mode);
+ break;
+ case SLICE_PARAM:
+ sscanf(value, "%d", &ps_app_ctxt->u4_slice_param);
+ break;
+ case INPUT_FILE:
+ sscanf(value, "%s", ps_app_ctxt->ac_ip_fname);
+ break;
+
+ case OUTPUT_FILE:
+ sscanf(value, "%s", ps_app_ctxt->ac_op_fname);
+ break;
+
+ case RECON_FILE:
+ sscanf(value, "%s", ps_app_ctxt->ac_recon_fname);
+ break;
+
+ case RECON_ENABLE:
+ sscanf(value, "%d", &ps_app_ctxt->u4_recon_enable);
+ break;
+
+ case CHKSUM_FILE:
+ sscanf(value, "%s", ps_app_ctxt->ac_chksum_fname);
+ break;
+
+ case CHKSUM_ENABLE:
+ sscanf(value, "%d", &ps_app_ctxt->u4_chksum_enable);
+ break;
+
+ case MB_INFO_FILE:
+ sscanf(value, "%s", ps_app_ctxt->ac_mb_info_fname);
+ break;
+
+ case MB_INFO_TYPE:
+ sscanf(value, "%d", &ps_app_ctxt->u4_mb_info_type);
+ break;
+
+ case PIC_INFO_FILE:
+ sscanf(value, "%s", ps_app_ctxt->ac_pic_info_fname);
+ break;
+
+ case PIC_INFO_TYPE:
+ sscanf(value, "%d", &ps_app_ctxt->u4_pic_info_type);
+ break;
+
+ case INPUT_CHROMA_FORMAT:
+ ps_app_ctxt->e_inp_color_fmt = get_chroma_fmt(value);
+ break;
+
+ case RECON_CHROMA_FORMAT:
+ ps_app_ctxt->e_recon_color_fmt = get_chroma_fmt(value);
+ break;
+
+ case MAX_WD:
+ sscanf(value, "%d", &ps_app_ctxt->u4_max_wd);
+ break;
+
+ case MAX_HT:
+ sscanf(value, "%d", &ps_app_ctxt->u4_max_ht);
+ break;
+
+ case WD:
+ sscanf(value, "%d", &ps_app_ctxt->u4_wd);
+ break;
+
+ case HT:
+ sscanf(value, "%d", &ps_app_ctxt->u4_ht);
+ break;
+
+ case MAX_LEVEL:
+ sscanf(value, "%d", &ps_app_ctxt->u4_max_level);
+ break;
+
+ case ENC_SPEED:
+ ps_app_ctxt->u4_enc_speed = get_speed_preset(value);
+ break;
+
+ case ME_SPEED:
+ sscanf(value, "%d", &ps_app_ctxt->u4_me_speed);
+ break;
+
+ case START_FRM:
+ sscanf(value, "%d", &ps_app_ctxt->u4_start_frm);
+ break;
+
+ case NUM_FRMS:
+ sscanf(value, "%d", &ps_app_ctxt->u4_max_num_frms);
+ break;
+
+ case MAX_FRAMERATE:
+ sscanf(value, "%d", &ps_app_ctxt->u4_max_frame_rate);
+ if(ps_app_ctxt->u4_max_frame_rate <= 0)
+ ps_app_ctxt->u4_max_frame_rate = DEFAULT_MAX_FRAMERATE;
+ break;
+
+ case SRC_FRAMERATE:
+ sscanf(value, "%d", &ps_app_ctxt->u4_src_frame_rate);
+ if(ps_app_ctxt->u4_src_frame_rate <= 0)
+ ps_app_ctxt->u4_src_frame_rate = DEFAULT_SRC_FRAME_RATE;
+ break;
+
+ case TGT_FRAMERATE:
+ sscanf(value, "%d", &ps_app_ctxt->u4_tgt_frame_rate);
+ if(ps_app_ctxt->u4_tgt_frame_rate <= 0)
+ ps_app_ctxt->u4_tgt_frame_rate = DEFAULT_TGT_FRAME_RATE;
+ break;
+
+ case RC:
+ sscanf(value, "%d", &ps_app_ctxt->u4_rc);
+ break;
+
+ case MAX_BITRATE:
+ sscanf(value, "%d", &ps_app_ctxt->u4_max_bitrate);
+ break;
+
+ case BITRATE:
+ sscanf(value, "%d", &ps_app_ctxt->u4_bitrate);
+ break;
+
+ case I_QP:
+ sscanf(value, "%d", &ps_app_ctxt->u4_i_qp);
+ break;
+
+ case I_QP_MAX:
+ sscanf(value, "%d", &ps_app_ctxt->u4_i_qp_max);
+ break;
+
+ case I_QP_MIN:
+ sscanf(value, "%d", &ps_app_ctxt->u4_i_qp_min);
+ break;
+
+ case P_QP:
+ sscanf(value, "%d", &ps_app_ctxt->u4_p_qp);
+ break;
+
+ case P_QP_MAX:
+ sscanf(value, "%d", &ps_app_ctxt->u4_p_qp_max);
+ break;
+
+ case P_QP_MIN:
+ sscanf(value, "%d", &ps_app_ctxt->u4_p_qp_min);
+ break;
+
+ case B_QP:
+ sscanf(value, "%d", &ps_app_ctxt->u4_b_qp);
+ break;
+
+ case B_QP_MAX:
+ sscanf(value, "%d", &ps_app_ctxt->u4_b_qp_max);
+ break;
+
+ case B_QP_MIN:
+ sscanf(value, "%d", &ps_app_ctxt->u4_b_qp_min);
+ break;
+
+ case AIR:
+ sscanf(value, "%d", &ps_app_ctxt->u4_air);
+ break;
+
+ case ARCH:
+ if((strcmp(value, "ARM_NONEON")) == 0)
+ ps_app_ctxt->e_arch = ARCH_ARM_NONEON;
+ else if((strcmp(value, "ARM_A9Q")) == 0)
+ ps_app_ctxt->e_arch = ARCH_ARM_A9Q;
+ else if((strcmp(value, "ARM_A7")) == 0)
+ ps_app_ctxt->e_arch = ARCH_ARM_A7;
+ else if((strcmp(value, "ARM_A5")) == 0)
+ ps_app_ctxt->e_arch = ARCH_ARM_A5;
+ else if((strcmp(value, "ARM_NEONINTR")) == 0)
+ ps_app_ctxt->e_arch = ARCH_ARM_NEONINTR;
+ else if((strcmp(value, "X86_GENERIC")) == 0)
+ ps_app_ctxt->e_arch = ARCH_X86_GENERIC;
+ else if((strcmp(value, "X86_SSSE3")) == 0)
+ ps_app_ctxt->e_arch = ARCH_X86_SSSE3;
+ else if((strcmp(value, "X86_SSE42")) == 0)
+ ps_app_ctxt->e_arch = ARCH_X86_SSE42;
+ else if((strcmp(value, "ARM_A53")) == 0)
+ ps_app_ctxt->e_arch = ARCH_ARM_A53;
+ else if((strcmp(value, "ARM_A57")) == 0)
+ ps_app_ctxt->e_arch = ARCH_ARM_A57;
+ else if((strcmp(value, "ARM_V8_NEON")) == 0)
+ ps_app_ctxt->e_arch = ARCH_ARM_V8_NEON;
+ else
+ {
+ printf("\nInvalid Arch. Setting it to ARM_A9Q\n");
+ ps_app_ctxt->e_arch = ARCH_ARM_A9Q;
+ }
+
+ break;
+ case SOC:
+ if((strcmp(value, "GENERIC")) == 0)
+ ps_app_ctxt->e_soc = SOC_GENERIC;
+ else if((strcmp(value, "HISI_37X")) == 0)
+ ps_app_ctxt->e_soc = SOC_HISI_37X;
+ else
+ {
+ ps_app_ctxt->e_soc = SOC_GENERIC;
+ }
+ break;
+
+ case NUMCORES:
+ sscanf(value, "%d", &ps_app_ctxt->u4_num_cores);
+ break;
+
+ case LOOPBACK:
+ sscanf(value, "%d", &ps_app_ctxt->u4_loopback);
+ break;
+
+ case PRE_ENC_ME:
+ sscanf(value, "%d", &ps_app_ctxt->u4_pre_enc_me);
+ break;
+
+ case PRE_ENC_IPE:
+ sscanf(value, "%d", &ps_app_ctxt->u4_pre_enc_ipe);
+ break;
+
+ case HPEL:
+ sscanf(value, "%d", &ps_app_ctxt->u4_hpel);
+ break;
+
+ case QPEL:
+ sscanf(value, "%d", &ps_app_ctxt->u4_qpel);
+ break;
+
+ case SRCH_RNG_X:
+ sscanf(value, "%d", &ps_app_ctxt->u4_srch_rng_x);
+ break;
+
+ case SRCH_RNG_Y:
+ sscanf(value, "%d", &ps_app_ctxt->u4_srch_rng_y);
+ break;
+
+ case I_INTERVAL:
+ sscanf(value, "%d", &ps_app_ctxt->u4_i_interval);
+ break;
+
+ case IDR_INTERVAL:
+ sscanf(value, "%d", &ps_app_ctxt->u4_idr_interval);
+ break;
+
+ case B_FRMS:
+ sscanf(value, "%d", &ps_app_ctxt->u4_b_frames);
+ break;
+
+ case DISABLE_DEBLOCK_LEVEL:
+ sscanf(value, "%d", &ps_app_ctxt->u4_disable_deblk_level);
+ break;
+
+ case VBV_DELAY:
+ sscanf(value, "%d", &ps_app_ctxt->u4_vbv_buffer_delay);
+ break;
+
+ case VBV_SIZE:
+ sscanf(value, "%d", &ps_app_ctxt->u4_vbv_buf_size);
+ break;
+
+ case FAST_SAD:
+ sscanf(value, "%d", &ps_app_ctxt->u4_enable_fast_sad);
+ break;
+
+ case ALT_REF:
+ sscanf(value, "%d", &ps_app_ctxt->u4_enable_alt_ref);
+ break;
+
+ case AIR_REFRESH_PERIOD:
+ sscanf(value, "%d", &ps_app_ctxt->u4_air_refresh_period);
+ break;
+
+ case PROFILE:
+ if((strcmp(value, "BASE")) == 0)
+ ps_app_ctxt->e_profile = IV_PROFILE_BASE;
+ else if((strcmp(value, "MAIN")) == 0)
+ ps_app_ctxt->e_profile = IV_PROFILE_MAIN;
+ else if((strcmp(value, "HIGH")) == 0)
+ ps_app_ctxt->e_profile = IV_PROFILE_HIGH;
+ else
+ {
+ printf("\nInvalid profile. Setting it to BASE\n");
+ ps_app_ctxt->e_profile = IV_PROFILE_BASE;
+ }
+ break;
+
+ case PSNR:
+ sscanf(value, "%d", &ps_app_ctxt->u4_psnr_enable);
+ break;
+
+ case INTRA_4x4_ENABLE:
+ sscanf(value, "%d", &ps_app_ctxt->u4_enable_intra_4x4);
+ break;
+
+
+ case INVALID:
+ default:
+ printf("Ignoring argument : %s\n", argument);
+ break;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : read_cfg_file */
+/* */
+/* Description : Reads arguments from a configuration file */
+/* */
+/* */
+/* Inputs : ps_app_ctxt : Application context */
+/* fp_cfg_file : Configuration file handle */
+/* Globals : */
+/* Processing : Parses the arguments and fills in the application context*/
+/* */
+/* Outputs : Arguments parsed */
+/* Returns : None */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 07 09 2012 100189 Initial Version */
+/* */
+/*****************************************************************************/
+void read_cfg_file(app_ctxt_t *ps_app_ctxt, FILE *fp_cfg)
+{
+ CHAR line[STRLENGTH];
+ CHAR description[STRLENGTH];
+ CHAR value[STRLENGTH];
+ CHAR argument[STRLENGTH];
+
+ while(0 == (feof(fp_cfg)))
+ {
+ line[0] = '\0';
+ fgets(line, STRLENGTH, fp_cfg);
+ argument[0] = '\0';
+ /* Reading Input File Name */
+ sscanf(line, "%s %s %s", argument, value, description);
+ if(argument[0] == '\0')
+ continue;
+
+ parse_argument(ps_app_ctxt, argument, value);
+ }
+}
+
+void invalid_argument_exit(CHAR *pc_err_message)
+{
+ print_usage();
+ codec_exit(pc_err_message);
+}
+
+void validate_params(app_ctxt_t *ps_app_ctxt)
+{
+ CHAR ac_error[STRLENGTH];
+
+ if(ps_app_ctxt->ac_ip_fname[0] == '\0')
+ {
+ invalid_argument_exit("Specify input file");
+ }
+ if(ps_app_ctxt->ac_op_fname[0] == '\0')
+ {
+ invalid_argument_exit("Specify output file");
+ }
+ if((1 == ps_app_ctxt->u4_recon_enable) && (ps_app_ctxt->ac_recon_fname[0] == '\0'))
+ {
+ invalid_argument_exit("Specify recon file");
+ }
+ if((1 == ps_app_ctxt->u4_chksum_enable) && (ps_app_ctxt->ac_chksum_fname[0] == '\0'))
+ {
+ invalid_argument_exit("Specify checksum file");
+ }
+ if(0 >= (WORD32)ps_app_ctxt->u4_wd)
+ {
+ sprintf(ac_error, "Invalid width: %d", ps_app_ctxt->u4_wd);
+ invalid_argument_exit(ac_error);
+ }
+ if(0 >= (WORD32)ps_app_ctxt->u4_ht)
+ {
+ sprintf(ac_error, "Invalid height: %d", ps_app_ctxt->u4_ht);
+ invalid_argument_exit(ac_error);
+ }
+
+ if(0 == (WORD32)ps_app_ctxt->u4_max_num_frms)
+ {
+ sprintf(ac_error, "Invalid number of frames to be encoded: %d", ps_app_ctxt->u4_max_num_frms);
+ invalid_argument_exit(ac_error);
+ }
+
+ return;
+}
+
+void init_default_params(app_ctxt_t *ps_app_ctxt)
+{
+
+ ps_app_ctxt->ps_enc = NULL;
+ ps_app_ctxt->ps_mem_rec = NULL;
+ ps_app_ctxt->u4_num_mem_rec = DEFAULT_MEM_REC_CNT;
+ ps_app_ctxt->u4_recon_enable = DEFAULT_RECON_ENABLE;
+ ps_app_ctxt->u4_chksum_enable = DEFAULT_CHKSUM_ENABLE;
+ ps_app_ctxt->u4_mb_info_type = 0;
+ ps_app_ctxt->u4_pic_info_type = 0;
+ ps_app_ctxt->u4_mb_info_size = 0;
+ ps_app_ctxt->u4_pic_info_size = 0;
+ ps_app_ctxt->u4_start_frm = DEFAULT_START_FRM;
+ ps_app_ctxt->u4_max_num_frms = DEFAULT_NUM_FRMS;
+ ps_app_ctxt->u4_total_bytes = 0;
+ ps_app_ctxt->u4_pics_cnt = 0;
+ ps_app_ctxt->e_inp_color_fmt = DEFAULT_INP_COLOR_FMT;
+ ps_app_ctxt->e_recon_color_fmt = DEFAULT_RECON_COLOR_FMT;
+ ps_app_ctxt->e_arch = DEFAULT_ARCH;
+ ps_app_ctxt->e_soc = SOC_GENERIC;
+ ps_app_ctxt->header_generated = 0;
+ ps_app_ctxt->pv_codec_obj = NULL;
+ ps_app_ctxt->u4_num_cores = DEFAULT_NUM_CORES;
+ ps_app_ctxt->u4_pre_enc_me = 0;
+ ps_app_ctxt->u4_pre_enc_ipe = 0;
+ ps_app_ctxt->ac_ip_fname[0] = '\0';
+ ps_app_ctxt->ac_op_fname[0] = '\0';
+ ps_app_ctxt->ac_recon_fname[0] = '\0';
+ ps_app_ctxt->ac_chksum_fname[0] = '\0';
+ ps_app_ctxt->ac_mb_info_fname[0] = '\0';
+ ps_app_ctxt->fp_ip = NULL;
+ ps_app_ctxt->fp_op = NULL;
+ ps_app_ctxt->fp_recon = NULL;
+ ps_app_ctxt->fp_chksum = NULL;
+ ps_app_ctxt->fp_psnr_ip = NULL;
+ ps_app_ctxt->fp_mb_info = NULL;
+ ps_app_ctxt->fp_pic_info = NULL;
+ ps_app_ctxt->u4_loopback = DEFAULT_LOOPBACK;
+ ps_app_ctxt->u4_max_frame_rate = DEFAULT_MAX_FRAMERATE;
+ ps_app_ctxt->u4_src_frame_rate = DEFAULT_SRC_FRAME_RATE;
+ ps_app_ctxt->u4_tgt_frame_rate = DEFAULT_TGT_FRAME_RATE;
+ ps_app_ctxt->u4_max_wd = DEFAULT_MAX_WD;
+ ps_app_ctxt->u4_max_ht = DEFAULT_MAX_HT;
+ ps_app_ctxt->u4_max_level = DEFAULT_MAX_LEVEL;
+ ps_app_ctxt->u4_strd = DEFAULT_STRIDE;
+ ps_app_ctxt->u4_wd = DEFAULT_WD;
+ ps_app_ctxt->u4_ht = DEFAULT_HT;
+ ps_app_ctxt->u4_psnr_enable = DEFAULT_PSNR_ENABLE;
+ ps_app_ctxt->u4_enc_speed = IVE_FASTEST;
+ ps_app_ctxt->u4_me_speed = DEFAULT_ME_SPEED;
+ ps_app_ctxt->u4_enable_fast_sad = DEFAULT_ENABLE_FAST_SAD;
+ ps_app_ctxt->u4_enable_alt_ref = DEFAULT_ENABLE_ALT_REF;
+ ps_app_ctxt->u4_rc = DEFAULT_RC;
+ ps_app_ctxt->u4_max_bitrate = DEFAULT_MAX_BITRATE;
+ ps_app_ctxt->u4_bitrate = DEFAULT_BITRATE;
+ ps_app_ctxt->u4_i_qp = DEFAULT_I_QP;
+ ps_app_ctxt->u4_p_qp = DEFAULT_P_QP;
+ ps_app_ctxt->u4_b_qp = DEFAULT_B_QP;
+ ps_app_ctxt->u4_i_qp_min = DEFAULT_QP_MIN;
+ ps_app_ctxt->u4_i_qp_max = DEFAULT_QP_MAX;
+ ps_app_ctxt->u4_p_qp_min = DEFAULT_QP_MIN;
+ ps_app_ctxt->u4_p_qp_max = DEFAULT_QP_MAX;
+ ps_app_ctxt->u4_b_qp_min = DEFAULT_QP_MIN;
+ ps_app_ctxt->u4_b_qp_max = DEFAULT_QP_MAX;
+ ps_app_ctxt->u4_air = DEFAULT_AIR;
+ ps_app_ctxt->u4_air_refresh_period = DEFAULT_AIR_REFRESH_PERIOD;
+ ps_app_ctxt->u4_srch_rng_x = DEFAULT_SRCH_RNG_X;
+ ps_app_ctxt->u4_srch_rng_y = DEFAULT_SRCH_RNG_Y;
+ ps_app_ctxt->u4_i_interval = DEFAULT_I_INTERVAL;
+ ps_app_ctxt->u4_idr_interval = DEFAULT_IDR_INTERVAL;
+ ps_app_ctxt->u4_b_frames = DEFAULT_B_FRAMES;
+ ps_app_ctxt->u4_disable_deblk_level = DEFAULT_DISABLE_DEBLK_LEVEL;
+ ps_app_ctxt->u4_hpel = DEFAULT_HPEL;
+ ps_app_ctxt->u4_qpel = DEFAULT_QPEL;
+ ps_app_ctxt->u4_enable_intra_4x4 = DEFAULT_I4;
+ ps_app_ctxt->e_profile = DEFAULT_EPROFILE;
+ ps_app_ctxt->u4_slice_mode = DEFAULT_SLICE_MODE;
+ ps_app_ctxt->u4_slice_param = DEFAULT_SLICE_PARAM;
+ ps_app_ctxt->pv_input_thread_handle = NULL;
+ ps_app_ctxt->pv_output_thread_handle = NULL;
+ ps_app_ctxt->pv_recon_thread_handle = NULL;
+ ps_app_ctxt->u4_vbv_buf_size = 0;
+ ps_app_ctxt->u4_vbv_buffer_delay = 1000;
+ ps_app_ctxt->adbl_psnr[0] = 0.0;
+ ps_app_ctxt->adbl_psnr[1] = 0.0;
+ ps_app_ctxt->adbl_psnr[2] = 0.0;
+ ps_app_ctxt->u4_psnr_cnt = 0;
+ ps_app_ctxt->pu1_psnr_buf = NULL;
+ ps_app_ctxt->u4_psnr_buf_size = 0;
+
+ return;
+}
+
+void set_dimensions(app_ctxt_t *ps_app_ctxt,
+ UWORD32 u4_timestamp_low,
+ UWORD32 u4_timestamp_high)
+{
+ ih264e_ctl_set_dimensions_ip_t s_frame_dimensions_ip;
+ ih264e_ctl_set_dimensions_op_t s_frame_dimensions_op;
+ IV_STATUS_T status;
+
+ s_frame_dimensions_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL;
+ s_frame_dimensions_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_DIMENSIONS;
+
+ s_frame_dimensions_ip.s_ive_ip.u4_ht = ps_app_ctxt->u4_ht;
+ s_frame_dimensions_ip.s_ive_ip.u4_wd = ps_app_ctxt->u4_wd;
+ s_frame_dimensions_ip.s_ive_ip.u4_strd = ps_app_ctxt->u4_strd;
+
+ s_frame_dimensions_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high;
+ s_frame_dimensions_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low;
+
+ s_frame_dimensions_ip.s_ive_ip.u4_size =
+ sizeof(ih264e_ctl_set_dimensions_ip_t);
+ s_frame_dimensions_op.s_ive_op.u4_size =
+ sizeof(ih264e_ctl_set_dimensions_op_t);
+
+ status = ih264e_api_function(ps_app_ctxt->ps_enc,
+ &s_frame_dimensions_ip,
+ &s_frame_dimensions_op);
+ if(status != IV_SUCCESS)
+ {
+ CHAR ac_error[STRLENGTH];
+ sprintf(ac_error, "Unable to set frame dimensions = 0x%x\n",
+ s_frame_dimensions_op.s_ive_op.u4_error_code);
+ codec_exit(ac_error);
+ }
+ return;
+}
+
+void set_frame_rate(app_ctxt_t *ps_app_ctxt,
+ UWORD32 u4_timestamp_low,
+ UWORD32 u4_timestamp_high)
+{
+ ih264e_ctl_set_frame_rate_ip_t s_frame_rate_ip;
+ ih264e_ctl_set_frame_rate_op_t s_frame_rate_op;
+ IV_STATUS_T status;
+
+ s_frame_rate_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL;
+ s_frame_rate_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_FRAMERATE;
+
+ s_frame_rate_ip.s_ive_ip.u4_src_frame_rate =
+ ps_app_ctxt->u4_src_frame_rate;
+ s_frame_rate_ip.s_ive_ip.u4_tgt_frame_rate =
+ ps_app_ctxt->u4_tgt_frame_rate;
+
+ s_frame_rate_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high;
+ s_frame_rate_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low;
+
+ s_frame_rate_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_frame_rate_ip_t);
+ s_frame_rate_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_frame_rate_op_t);
+
+ status = ih264e_api_function(ps_app_ctxt->ps_enc,&s_frame_rate_ip,&s_frame_rate_op);
+ if(status != IV_SUCCESS)
+ {
+ CHAR ac_error[STRLENGTH];
+ sprintf(ac_error, "Unable to set frame rate = 0x%x\n",
+ s_frame_rate_op.s_ive_op.u4_error_code);
+ codec_exit(ac_error);
+ }
+ return;
+}
+
+
+void set_ipe_params(app_ctxt_t *ps_app_ctxt,
+ UWORD32 u4_timestamp_low,
+ UWORD32 u4_timestamp_high)
+{
+ ih264e_ctl_set_ipe_params_ip_t s_ipe_params_ip;
+ ih264e_ctl_set_ipe_params_op_t s_ipe_params_op;
+ IV_STATUS_T status;
+
+ s_ipe_params_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL;
+ s_ipe_params_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_IPE_PARAMS;
+
+ s_ipe_params_ip.s_ive_ip.u4_enable_intra_4x4 = ps_app_ctxt->u4_enable_intra_4x4;
+ s_ipe_params_ip.s_ive_ip.u4_enc_speed_preset = ps_app_ctxt->u4_enc_speed;
+
+ s_ipe_params_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high;
+ s_ipe_params_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low;
+
+ s_ipe_params_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_ipe_params_ip_t);
+ s_ipe_params_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_ipe_params_op_t);
+
+ status = ih264e_api_function(ps_app_ctxt->ps_enc,&s_ipe_params_ip,&s_ipe_params_op);
+ if(status != IV_SUCCESS)
+ {
+ CHAR ac_error[STRLENGTH];
+ sprintf(ac_error, "Unable to set ipe params = 0x%x\n",
+ s_ipe_params_op.s_ive_op.u4_error_code);
+ codec_exit(ac_error);
+ }
+ return;
+}
+
+void set_bit_rate(app_ctxt_t *ps_app_ctxt,
+ UWORD32 u4_timestamp_low, UWORD32 u4_timestamp_high)
+{
+ ih264e_ctl_set_bitrate_ip_t s_bitrate_ip;
+ ih264e_ctl_set_bitrate_op_t s_bitrate_op;
+ IV_STATUS_T status;
+
+ s_bitrate_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL;
+ s_bitrate_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_BITRATE;
+
+ s_bitrate_ip.s_ive_ip.u4_target_bitrate = ps_app_ctxt->u4_bitrate;
+
+ s_bitrate_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high;
+ s_bitrate_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low;
+
+ s_bitrate_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_bitrate_ip_t);
+ s_bitrate_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_bitrate_op_t);
+
+ status = ih264e_api_function(ps_app_ctxt->ps_enc,&s_bitrate_ip,&s_bitrate_op);
+ if(status != IV_SUCCESS)
+ {
+ CHAR ac_error[STRLENGTH];
+ sprintf(ac_error, "Unable to set bit rate = 0x%x\n",
+ s_bitrate_op.s_ive_op.u4_error_code);
+ codec_exit(ac_error);
+ }
+ return;
+}
+
+
+void set_frame_type(app_ctxt_t *ps_app_ctxt,
+ UWORD32 u4_timestamp_low,
+ UWORD32 u4_timestamp_high,
+ IV_PICTURE_CODING_TYPE_T e_frame_type)
+{
+ ih264e_ctl_set_frame_type_ip_t s_frame_type_ip;
+ ih264e_ctl_set_frame_type_op_t s_frame_type_op;
+ IV_STATUS_T status;
+
+ s_frame_type_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL;
+ s_frame_type_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_FRAMETYPE;
+
+ s_frame_type_ip.s_ive_ip.e_frame_type = e_frame_type;
+
+ s_frame_type_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high;
+ s_frame_type_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low;
+
+ s_frame_type_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_frame_type_ip_t);
+ s_frame_type_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_frame_type_op_t);
+
+ status = ih264e_api_function(ps_app_ctxt->ps_enc,&s_frame_type_ip,&s_frame_type_op);
+ if(status != IV_SUCCESS)
+ {
+ CHAR ac_error[STRLENGTH];
+ sprintf(ac_error, "Unable to set frame type = 0x%x\n",
+ s_frame_type_op.s_ive_op.u4_error_code);
+ codec_exit(ac_error);
+ }
+ return;
+}
+
+void set_qp(app_ctxt_t *ps_app_ctxt,
+ UWORD32 u4_timestamp_low, UWORD32 u4_timestamp_high)
+{
+ ih264e_ctl_set_qp_ip_t s_qp_ip;
+ ih264e_ctl_set_qp_op_t s_qp_op;
+ IV_STATUS_T status;
+
+ s_qp_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL;
+ s_qp_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_QP;
+
+ s_qp_ip.s_ive_ip.u4_i_qp = ps_app_ctxt->u4_i_qp;
+ s_qp_ip.s_ive_ip.u4_i_qp_max = ps_app_ctxt->u4_i_qp_max;
+ s_qp_ip.s_ive_ip.u4_i_qp_min = ps_app_ctxt->u4_i_qp_min;
+
+ s_qp_ip.s_ive_ip.u4_p_qp = ps_app_ctxt->u4_p_qp;
+ s_qp_ip.s_ive_ip.u4_p_qp_max = ps_app_ctxt->u4_p_qp_max;
+ s_qp_ip.s_ive_ip.u4_p_qp_min = ps_app_ctxt->u4_p_qp_min;
+
+ s_qp_ip.s_ive_ip.u4_b_qp = ps_app_ctxt->u4_b_qp;
+ s_qp_ip.s_ive_ip.u4_b_qp_max = ps_app_ctxt->u4_b_qp_max;
+ s_qp_ip.s_ive_ip.u4_b_qp_min = ps_app_ctxt->u4_b_qp_min;
+
+ s_qp_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high;
+ s_qp_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low;
+
+ s_qp_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_qp_ip_t);
+ s_qp_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_qp_op_t);
+
+ status = ih264e_api_function(ps_app_ctxt->ps_enc,&s_qp_ip,&s_qp_op);
+ if(status != IV_SUCCESS)
+ {
+ CHAR ac_error[STRLENGTH];
+ sprintf(ac_error, "Unable to set qp 0x%x\n",
+ s_qp_op.s_ive_op.u4_error_code);
+ codec_exit(ac_error);
+ }
+ return;
+}
+
+void set_enc_mode(app_ctxt_t *ps_app_ctxt,
+ UWORD32 u4_timestamp_low, UWORD32 u4_timestamp_high,
+ IVE_ENC_MODE_T e_enc_mode)
+{
+ IV_STATUS_T status;
+
+ ih264e_ctl_set_enc_mode_ip_t s_enc_mode_ip;
+ ih264e_ctl_set_enc_mode_op_t s_enc_mode_op;
+
+ s_enc_mode_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL;
+ s_enc_mode_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_ENC_MODE;
+
+ s_enc_mode_ip.s_ive_ip.e_enc_mode = e_enc_mode;
+
+ s_enc_mode_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high;
+ s_enc_mode_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low;
+
+ s_enc_mode_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_enc_mode_ip_t);
+ s_enc_mode_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_enc_mode_op_t);
+
+ status = ih264e_api_function(ps_app_ctxt->ps_enc, &s_enc_mode_ip,
+ &s_enc_mode_op);
+ if(status != IV_SUCCESS)
+ {
+ CHAR ac_error[STRLENGTH];
+ sprintf(ac_error, "Unable to set in header encode mode = 0x%x\n",
+ s_enc_mode_op.s_ive_op.u4_error_code);
+ codec_exit(ac_error);
+ }
+ return;
+}
+
+
+void set_vbv_params(app_ctxt_t *ps_app_ctxt,
+ UWORD32 u4_timestamp_low,
+ UWORD32 u4_timestamp_high)
+{
+ ih264e_ctl_set_vbv_params_ip_t s_vbv_ip;
+ ih264e_ctl_set_vbv_params_op_t s_vbv_op;
+ IV_STATUS_T status;
+
+ s_vbv_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL;
+ s_vbv_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_VBV_PARAMS;
+
+ s_vbv_ip.s_ive_ip.u4_vbv_buf_size = ps_app_ctxt->u4_vbv_buf_size;
+ s_vbv_ip.s_ive_ip.u4_vbv_buffer_delay =
+ ps_app_ctxt->u4_vbv_buffer_delay;
+
+ s_vbv_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high;
+ s_vbv_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low;
+
+ s_vbv_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_vbv_params_ip_t);
+ s_vbv_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_vbv_params_op_t);
+
+ status = ih264e_api_function(ps_app_ctxt->ps_enc,&s_vbv_ip,&s_vbv_op);
+ if(status != IV_SUCCESS)
+ {
+ CHAR ac_error[STRLENGTH];
+ sprintf(ac_error, "Unable to set VBC params = 0x%x\n",
+ s_vbv_op.s_ive_op.u4_error_code);
+ codec_exit(ac_error);
+ }
+ return;
+}
+
+void set_air_params(app_ctxt_t *ps_app_ctxt,
+ UWORD32 u4_timestamp_low,
+ UWORD32 u4_timestamp_high)
+{
+ ih264e_ctl_set_air_params_ip_t s_air_ip;
+ ih264e_ctl_set_air_params_op_t s_air_op;
+ IV_STATUS_T status;
+
+ s_air_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL;
+ s_air_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_AIR_PARAMS;
+
+ s_air_ip.s_ive_ip.e_air_mode = ps_app_ctxt->u4_air;
+ s_air_ip.s_ive_ip.u4_air_refresh_period = ps_app_ctxt->u4_air_refresh_period;
+
+ s_air_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high;
+ s_air_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low;
+
+ s_air_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_air_params_ip_t);
+ s_air_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_air_params_op_t);
+
+ status = ih264e_api_function(ps_app_ctxt->ps_enc,&s_air_ip,&s_air_op);
+ if(status != IV_SUCCESS)
+ {
+ CHAR ac_error[STRLENGTH];
+ sprintf(ac_error, "Unable to set air params = 0x%x\n",
+ s_air_op.s_ive_op.u4_error_code);
+ codec_exit(ac_error);
+ }
+ return;
+}
+
+void set_me_params(app_ctxt_t *ps_app_ctxt,
+ UWORD32 u4_timestamp_low,
+ UWORD32 u4_timestamp_high)
+{
+ IV_STATUS_T status;
+
+ ih264e_ctl_set_me_params_ip_t s_me_params_ip;
+ ih264e_ctl_set_me_params_op_t s_me_params_op;
+
+ s_me_params_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL;
+ s_me_params_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_ME_PARAMS;
+
+ s_me_params_ip.s_ive_ip.u4_enable_fast_sad = ps_app_ctxt->u4_enable_fast_sad;
+ s_me_params_ip.s_ive_ip.u4_enable_alt_ref = ps_app_ctxt->u4_enable_alt_ref;
+
+ s_me_params_ip.s_ive_ip.u4_enable_hpel = ps_app_ctxt->u4_hpel;
+ s_me_params_ip.s_ive_ip.u4_enable_qpel = ps_app_ctxt->u4_qpel;
+ s_me_params_ip.s_ive_ip.u4_me_speed_preset = ps_app_ctxt->u4_me_speed;
+ s_me_params_ip.s_ive_ip.u4_srch_rng_x = ps_app_ctxt->u4_srch_rng_x;
+ s_me_params_ip.s_ive_ip.u4_srch_rng_y = ps_app_ctxt->u4_srch_rng_y;
+
+ s_me_params_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high;
+ s_me_params_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low;
+
+ s_me_params_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_me_params_ip_t);
+ s_me_params_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_me_params_op_t);
+
+ status = ih264e_api_function(ps_app_ctxt->ps_enc, &s_me_params_ip,
+ &s_me_params_op);
+ if(status != IV_SUCCESS)
+ {
+ CHAR ac_error[STRLENGTH];
+ sprintf(ac_error, "Unable to set me params = 0x%x\n",
+ s_me_params_op.s_ive_op.u4_error_code);
+ codec_exit(ac_error);
+ }
+ return;
+}
+
+
+void set_gop_params(app_ctxt_t *ps_app_ctxt,
+ UWORD32 u4_timestamp_low,
+ UWORD32 u4_timestamp_high)
+{
+ IV_STATUS_T status;
+
+ ih264e_ctl_set_gop_params_ip_t s_gop_params_ip;
+ ih264e_ctl_set_gop_params_op_t s_gop_params_op;
+
+ s_gop_params_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL;
+ s_gop_params_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_GOP_PARAMS;
+
+ s_gop_params_ip.s_ive_ip.u4_i_frm_interval = ps_app_ctxt->u4_i_interval;
+ s_gop_params_ip.s_ive_ip.u4_idr_frm_interval = ps_app_ctxt->u4_idr_interval;
+ s_gop_params_ip.s_ive_ip.u4_num_b_frames = ps_app_ctxt->u4_b_frames;
+
+ s_gop_params_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high;
+ s_gop_params_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low;
+
+ s_gop_params_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_gop_params_ip_t);
+ s_gop_params_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_gop_params_op_t);
+
+ status = ih264e_api_function(ps_app_ctxt->ps_enc, &s_gop_params_ip,
+ &s_gop_params_op);
+ if(status != IV_SUCCESS)
+ {
+ CHAR ac_error[STRLENGTH];
+ sprintf(ac_error, "Unable to set ME params = 0x%x\n",
+ s_gop_params_op.s_ive_op.u4_error_code);
+ codec_exit(ac_error);
+ }
+ return;
+}
+
+void set_profile_params(app_ctxt_t *ps_app_ctxt,
+ UWORD32 u4_timestamp_low,
+ UWORD32 u4_timestamp_high)
+{
+ IV_STATUS_T status;
+
+ ih264e_ctl_set_profile_params_ip_t s_profile_params_ip;
+ ih264e_ctl_set_profile_params_op_t s_profile_params_op;
+
+ s_profile_params_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL;
+ s_profile_params_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_PROFILE_PARAMS;
+
+ s_profile_params_ip.s_ive_ip.e_profile = ps_app_ctxt->e_profile;
+
+ s_profile_params_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high;
+ s_profile_params_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low;
+
+ s_profile_params_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_profile_params_ip_t);
+ s_profile_params_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_profile_params_op_t);
+
+ status = ih264e_api_function(ps_app_ctxt->ps_enc, &s_profile_params_ip,
+ &s_profile_params_op);
+ if(status != IV_SUCCESS)
+ {
+ CHAR ac_error[STRLENGTH];
+ sprintf(ac_error, "Unable to set profile params = 0x%x\n",
+ s_profile_params_op.s_ive_op.u4_error_code);
+ codec_exit(ac_error);
+ }
+ return;
+}
+
+void set_deblock_params(app_ctxt_t *ps_app_ctxt,
+ UWORD32 u4_timestamp_low,
+ UWORD32 u4_timestamp_high)
+{
+ IV_STATUS_T status;
+
+ ih264e_ctl_set_deblock_params_ip_t s_deblock_params_ip;
+ ih264e_ctl_set_deblock_params_op_t s_deblock_params_op;
+
+ s_deblock_params_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL;
+ s_deblock_params_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_DEBLOCK_PARAMS;
+
+ s_deblock_params_ip.s_ive_ip.u4_disable_deblock_level =
+ ps_app_ctxt->u4_disable_deblk_level;
+
+ s_deblock_params_ip.s_ive_ip.u4_timestamp_high = u4_timestamp_high;
+ s_deblock_params_ip.s_ive_ip.u4_timestamp_low = u4_timestamp_low;
+
+ s_deblock_params_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_deblock_params_ip_t);
+ s_deblock_params_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_deblock_params_op_t);
+
+ status = ih264e_api_function(ps_app_ctxt->ps_enc, &s_deblock_params_ip,
+ &s_deblock_params_op);
+ if(status != IV_SUCCESS)
+ {
+ CHAR ac_error[STRLENGTH];
+ sprintf(ac_error, "Unable to enable/disable deblock params = 0x%x\n",
+ s_deblock_params_op.s_ive_op.u4_error_code);
+ codec_exit(ac_error);
+ }
+ return;
+}
+
+#define PEAK_WINDOW_SIZE 8
+
+void synchronous_encode(iv_obj_t *ps_enc, app_ctxt_t *ps_app_ctxt)
+{
+ ih264e_video_encode_ip_t ih264e_video_encode_ip;
+ ih264e_video_encode_op_t ih264e_video_encode_op;
+
+ ive_video_encode_ip_t *ps_video_encode_ip = &ih264e_video_encode_ip.s_ive_ip;
+ ive_video_encode_op_t *ps_video_encode_op = &ih264e_video_encode_op.s_ive_op;
+
+ iv_raw_buf_t *ps_inp_raw_buf = &ps_video_encode_ip->s_inp_buf;
+
+ IV_STATUS_T status = IV_SUCCESS;
+
+ WORD32 i, read_failed = 0, is_last = 0, buff_size = 0, num_bytes = 0;
+ UWORD32 u4_total_time = 0;
+ UWORD8 *pu1_buf = NULL;
+ UWORD32 u4_timestamp_low, u4_timestamp_high;
+ void *pv_mb_info = NULL, *pv_pic_info = NULL;
+
+ TIMER curtime ;
+#ifdef WINDOWS_TIMER
+ TIMER frequency;
+#endif
+ WORD32 peak_window[PEAK_WINDOW_SIZE] = {0};
+ WORD32 peak_window_idx = 0;
+ WORD32 peak_avg_max = 0, timetaken = 0;
+ iv_raw_buf_t s_inp_buf, s_recon_buf;
+ CHAR ac_error[STRLENGTH];
+ WORD32 end_of_frames=0;
+
+ u4_timestamp_low = 0;
+ u4_timestamp_high = 0;
+
+ /*************************************************************************/
+ /* Allocate I/O Buffers */
+ /*************************************************************************/
+ allocate_input(ps_app_ctxt);
+ allocate_output(ps_app_ctxt);
+ allocate_recon(ps_app_ctxt);
+
+ /* init psnr */
+ init_psnr(ps_app_ctxt);
+
+ /* open file pointers */
+ ps_app_ctxt->fp_ip = fopen(ps_app_ctxt->ac_ip_fname, "rb");
+ if(NULL == ps_app_ctxt->fp_ip)
+ {
+ sprintf(ac_error, "Unable to open input file for reading: %s", ps_app_ctxt->ac_ip_fname);
+ invalid_argument_exit(ac_error);
+ }
+
+ ps_app_ctxt->fp_op = fopen(ps_app_ctxt->ac_op_fname, "wb");
+ if(NULL == ps_app_ctxt->fp_op)
+ {
+ sprintf(ac_error, "Unable to open output file for writing: %s", ps_app_ctxt->ac_op_fname);
+ invalid_argument_exit(ac_error);
+ }
+
+ if(1 == ps_app_ctxt->u4_recon_enable)
+ {
+ ps_app_ctxt->fp_recon = fopen(ps_app_ctxt->ac_recon_fname, "wb");
+ if(NULL == ps_app_ctxt->fp_recon)
+ {
+ sprintf(ac_error, "Unable to open recon file for writing: %s", ps_app_ctxt->ac_recon_fname);
+ invalid_argument_exit(ac_error);
+ }
+ }
+
+ if(1 == ps_app_ctxt->u4_chksum_enable)
+ {
+ ps_app_ctxt->fp_chksum = fopen(ps_app_ctxt->ac_chksum_fname, "wb");
+ if(NULL == ps_app_ctxt->fp_chksum)
+ {
+ sprintf(ac_error, "Unable to open checksum file for writing: %s", ps_app_ctxt->ac_chksum_fname);
+ invalid_argument_exit(ac_error);
+ }
+ }
+
+#if 0 //Input buffer dump
+ //if(1 == ps_app_ctxt->u4_psnr_enable)
+ {
+ ps_app_ctxt->fp_dump_op = fopen("D:\\dump\\inp.yuv", "wb");
+ if(NULL == ps_app_ctxt->fp_dump_op)
+ {
+ sprintf(ac_error, "Unable to open output file for input dump: %s", "D:\\dump\\inp.yuv");
+ invalid_argument_exit(ac_error);
+ }
+ }
+#endif //Input buffer dump
+
+ /* If PSNR is enabled, open input file again and hold a different file pointer
+ * This makes it easy to compute PSNR without adding dependency between input and recon threads
+ */
+ if(1 == ps_app_ctxt->u4_psnr_enable)
+ {
+ ps_app_ctxt->fp_psnr_ip = fopen(ps_app_ctxt->ac_ip_fname, "rb");
+ if(NULL == ps_app_ctxt->fp_psnr_ip)
+ {
+ sprintf(ac_error, "Unable to open input file for reading: %s", ps_app_ctxt->ac_ip_fname);
+ invalid_argument_exit(ac_error);
+ }
+ }
+
+ if(0 != ps_app_ctxt->u4_mb_info_type)
+ {
+ ps_app_ctxt->fp_mb_info = fopen(ps_app_ctxt->ac_mb_info_fname, "rb");
+ if(NULL == ps_app_ctxt->fp_mb_info)
+ {
+ sprintf(ac_error, "Unable to open MB info file for reading: %s", ps_app_ctxt->ac_mb_info_fname);
+ invalid_argument_exit(ac_error);
+ }
+ }
+ if (ps_app_ctxt->u4_pic_info_type)
+ {
+ ps_app_ctxt->fp_pic_info = fopen(ps_app_ctxt->ac_pic_info_fname, "rb");
+ if(NULL == ps_app_ctxt->fp_pic_info)
+ {
+ sprintf(ac_error, "Unable to open Pic info file for reading: %s", ps_app_ctxt->ac_pic_info_fname);
+ invalid_argument_exit(ac_error);
+ }
+ }
+
+ GETTIME(&ps_app_ctxt->enc_start_time);
+ ps_app_ctxt->enc_last_time = ps_app_ctxt->enc_start_time;
+
+ while(1)
+ {
+
+
+
+
+
+ /******************************************************************************/
+ /****************** Input Initialization **************************************/
+ /******************************************************************************/
+
+ for(i = 0; i < DEFAULT_MAX_INPUT_BUFS; i++)
+ {
+ if(ps_app_ctxt->as_input_buf[i].u4_is_free)
+ {
+ pu1_buf = ps_app_ctxt->as_input_buf[i].pu1_buf;
+ pv_mb_info = ps_app_ctxt->as_input_buf[i].pv_mb_info;
+ pv_pic_info = ps_app_ctxt->as_input_buf[i].pv_pic_info;
+ ps_app_ctxt->as_input_buf[i].u4_is_free = 0;
+ break;
+ }
+ }
+
+ ps_video_encode_ip->u4_size = sizeof(ih264e_video_encode_ip_t);
+ ps_video_encode_op->u4_size = sizeof(ih264e_video_encode_op_t);
+
+ ps_video_encode_ip->e_cmd = IVE_CMD_VIDEO_ENCODE;
+ ps_video_encode_ip->pv_bufs = pu1_buf;
+ ps_video_encode_ip->pv_mb_info = pv_mb_info;
+ ps_video_encode_ip->pv_pic_info = pv_pic_info;
+ ps_video_encode_ip->u4_pic_info_type = ps_app_ctxt->u4_pic_info_type;
+ /*
+ * Since the buffers are used for reading,
+ * And after each row we have a stride we nned to calculate
+ * the luma size according to the stride
+ */
+ ps_inp_raw_buf->e_color_fmt = ps_app_ctxt->e_inp_color_fmt;
+
+ /* Initialize for 420SP */
+ if(IV_YUV_420SP_UV == ps_app_ctxt->e_inp_color_fmt||
+ IV_YUV_420SP_VU == ps_app_ctxt->e_inp_color_fmt)
+ {
+ /*init luma buffer*/
+ ps_inp_raw_buf->apv_bufs[0] = pu1_buf;
+
+ /*Init chroma buffer*/
+ pu1_buf += (ps_app_ctxt->u4_strd) * ALIGN16(ps_app_ctxt->u4_ht);
+ ps_inp_raw_buf->apv_bufs[1] = pu1_buf;
+
+ ps_inp_raw_buf->au4_wd[0] = ps_app_ctxt->u4_wd;
+ ps_inp_raw_buf->au4_wd[1] = ps_app_ctxt->u4_wd;
+
+ ps_inp_raw_buf->au4_ht[0] = ps_app_ctxt->u4_ht;
+ ps_inp_raw_buf->au4_ht[1] = ps_app_ctxt->u4_ht / 2;
+
+ ps_inp_raw_buf->au4_strd[0] = ps_app_ctxt->u4_strd;
+ ps_inp_raw_buf->au4_strd[1] = ps_app_ctxt->u4_strd;
+ }
+ else if(IV_YUV_420P == ps_app_ctxt->e_inp_color_fmt)
+ {
+ /* init buffers */
+ ps_inp_raw_buf->apv_bufs[0] = pu1_buf;
+ pu1_buf += (ps_app_ctxt->u4_strd) * ALIGN16(ps_app_ctxt->u4_ht);
+ ps_inp_raw_buf->apv_bufs[1] = pu1_buf;
+ pu1_buf += (ps_app_ctxt->u4_strd >> 1) * (ALIGN16(ps_app_ctxt->u4_ht) >> 1);
+ ps_inp_raw_buf->apv_bufs[2] = pu1_buf;
+
+ ps_inp_raw_buf->au4_wd[0] = ps_app_ctxt->u4_wd;
+ ps_inp_raw_buf->au4_wd[1] = ps_app_ctxt->u4_wd / 2;
+ ps_inp_raw_buf->au4_wd[2] = ps_app_ctxt->u4_wd / 2;
+
+ ps_inp_raw_buf->au4_ht[0] = ps_app_ctxt->u4_ht;
+ ps_inp_raw_buf->au4_ht[1] = ps_app_ctxt->u4_ht / 2;
+ ps_inp_raw_buf->au4_ht[2] = ps_app_ctxt->u4_ht / 2;
+
+ ps_inp_raw_buf->au4_strd[0] = ps_app_ctxt->u4_strd;
+ ps_inp_raw_buf->au4_strd[1] = ps_app_ctxt->u4_strd / 2;
+ ps_inp_raw_buf->au4_strd[2] = ps_app_ctxt->u4_strd / 2;
+
+ }
+ else if(IV_YUV_422ILE == ps_app_ctxt->e_inp_color_fmt)
+ {
+ /*init luma buffer*/
+ ps_inp_raw_buf->apv_bufs[0] = pu1_buf;
+
+ ps_inp_raw_buf->au4_wd[0] = ps_app_ctxt->u4_wd * 2;
+
+ ps_inp_raw_buf->au4_ht[0] = ps_app_ctxt->u4_ht;
+
+ ps_inp_raw_buf->au4_strd[0] = ps_app_ctxt->u4_strd *2;
+ }
+
+ while(1)
+ {
+ IV_STATUS_T mb_info_status = IV_SUCCESS, pic_info_status = IV_SUCCESS;
+ read_failed = 0;
+ status = read_input(ps_app_ctxt->fp_ip, ps_inp_raw_buf);
+ if (ps_app_ctxt->u4_mb_info_type != 0)
+ {
+ mb_info_status = read_mb_info(ps_app_ctxt, pv_mb_info);
+ }
+ if (ps_app_ctxt->u4_pic_info_type != 0)
+ {
+ pic_info_status = read_pic_info(ps_app_ctxt, pv_pic_info);
+ }
+ if((IV_SUCCESS != status) || (IV_SUCCESS != mb_info_status)
+ || (IV_SUCCESS != pic_info_status))
+ {
+ if(0 == ps_app_ctxt->u4_loopback)
+ {
+ is_last = 1;
+ read_failed = 1;
+
+ break;
+ }
+ else
+ fseek(ps_app_ctxt->fp_ip, 0, SEEK_SET);
+ }
+ else
+ break;
+ }
+
+ /******************************************************************************/
+ /****************** Output Initialization *************************************/
+ /******************************************************************************/
+
+ for(i = 0; i < DEFAULT_MAX_OUTPUT_BUFS; i++)
+ {
+ if(ps_app_ctxt->as_output_buf[i].u4_is_free)
+ {
+ pu1_buf = ps_app_ctxt->as_output_buf[i].pu1_buf;
+ buff_size = ps_app_ctxt->as_output_buf[i].u4_buf_size;
+ ps_app_ctxt->as_output_buf[i].u4_is_free = 0;
+ break;
+ }
+ }
+ ps_video_encode_ip->s_out_buf.pv_buf = pu1_buf;
+ ps_video_encode_ip->s_out_buf.u4_bytes = 0;
+ ps_video_encode_ip->s_out_buf.u4_bufsize = buff_size;
+
+ /******************************************************************************/
+ /****************** Recon Initialization **************************************/
+ /******************************************************************************/
+ init_raw_buf_descr(ps_app_ctxt, &s_recon_buf, ps_app_ctxt->as_recon_buf[0].pu1_buf, ps_app_ctxt->e_recon_color_fmt);
+
+ if(ps_app_ctxt->u4_psnr_enable)
+ init_raw_buf_descr(ps_app_ctxt, &s_inp_buf, ps_app_ctxt->pu1_psnr_buf, ps_app_ctxt->e_inp_color_fmt);
+
+ ps_video_encode_ip->s_recon_buf = s_recon_buf;
+
+ /******************************************************************************/
+ /************************* Un Initialized *************************************/
+ /******************************************************************************/
+ if(0 == ps_app_ctxt->u4_loopback)
+ {
+ /* If input file is read completely and loopback is not enabled,
+ * then exit the loop */
+ if(feof(ps_app_ctxt->fp_ip))
+ {
+ is_last = 1;
+ }
+ }
+
+
+ /* If last frame, send input null to get back encoded frames */
+ if ( is_last == 1 || ((ps_app_ctxt->u4_max_num_frms) <= u4_timestamp_low) )
+ {
+ is_last = 1;
+ ps_inp_raw_buf->apv_bufs[0] = NULL;
+ ps_inp_raw_buf->apv_bufs[1] = NULL;
+ ps_inp_raw_buf->apv_bufs[2] = NULL;
+ end_of_frames = 1;
+ }
+
+ ps_video_encode_ip->u4_is_last = is_last;
+ ps_video_encode_ip->u4_mb_info_type = ps_app_ctxt->u4_mb_info_type;
+ ps_video_encode_ip->u4_pic_info_type = ps_app_ctxt->u4_pic_info_type;;
+ ps_video_encode_op->s_out_buf.pv_buf= 0;
+ ps_video_encode_ip->u4_timestamp_high = u4_timestamp_high;
+ ps_video_encode_ip->u4_timestamp_low = u4_timestamp_low;
+
+
+ GETTIME(&ps_app_ctxt->enc_last_time);
+
+ status = ih264e_api_function(ps_enc, &ih264e_video_encode_ip, &ih264e_video_encode_op);
+
+ if (IV_SUCCESS != status)
+ {
+ printf("Encode Frame failed = 0x%x\n", ih264e_video_encode_op.s_ive_op.u4_error_code);
+ break;
+ }
+
+#ifdef WINDOWS_TIMER
+ QueryPerformanceFrequency ( &frequency);
+#endif
+ GETTIME(&curtime);
+ ELAPSEDTIME(ps_app_ctxt->enc_last_time, curtime, timetaken, frequency);
+ ps_app_ctxt->enc_last_time = curtime;
+
+#ifdef PROFILE_ENABLE
+ {
+ WORD32 peak_avg, id;
+ u4_total_time += timetaken;
+ peak_window[peak_window_idx++] = timetaken;
+ if(peak_window_idx == PEAK_WINDOW_SIZE)
+ peak_window_idx = 0;
+ peak_avg = 0;
+ for(id = 0; id < PEAK_WINDOW_SIZE; id++)
+ {
+ peak_avg += peak_window[id];
+ }
+ peak_avg /= PEAK_WINDOW_SIZE;
+ if (peak_avg > peak_avg_max)
+ peak_avg_max = peak_avg;
+ }
+#endif
+
+ /******************************************************************************/
+ /****************** Writing Output ********************************************/
+ /******************************************************************************/
+ num_bytes = 0;
+ /* Break if all the encoded frames are taken from encoder */
+ if(1 == end_of_frames && 0 == ps_video_encode_op->output_present)
+ {
+ break;
+ }
+ if(1 == ps_video_encode_op->output_present)
+ {
+ num_bytes = ps_video_encode_op->s_out_buf.u4_bytes;
+ buff_size = ps_video_encode_op->s_out_buf.u4_bufsize;
+ pu1_buf = (UWORD8*)ps_video_encode_op->s_out_buf.pv_buf;
+
+ status = write_output(ps_app_ctxt->fp_op, pu1_buf, num_bytes);
+ if(IV_SUCCESS != status)
+ {
+ printf("Error: Unable to write to output file\n");
+ break;
+ }
+
+ /* Reuse of freed input buffer */
+ for(i = 0; i < DEFAULT_MAX_INPUT_BUFS; i++)
+ {
+ if(ps_app_ctxt->as_input_buf[i].pu1_buf == ps_video_encode_op->s_inp_buf.apv_bufs[0])
+ {
+ ps_app_ctxt->as_input_buf[i].u4_is_free = 1;
+ break;
+ }
+ }
+
+ /* Reuse of freed output buffer */
+ for(i = 0; i < DEFAULT_MAX_OUTPUT_BUFS; i++)
+ {
+ if(ps_app_ctxt->as_output_buf[i].pu1_buf == ps_video_encode_op->s_out_buf.pv_buf)
+ {
+ ps_app_ctxt->as_output_buf[i].u4_is_free = 1;
+ break;
+ }
+ }
+ }
+
+ if (ps_video_encode_op->dump_recon == 1)
+ {
+ ps_app_ctxt->u4_pics_cnt++;
+
+ ps_app_ctxt->avg_time = u4_total_time / ps_app_ctxt->u4_pics_cnt;
+ if (ps_app_ctxt->u4_psnr_enable == 0)
+ {
+ UWORD8 u1_pic_type[][5] = { "IDR", "I", "P","NA" };
+ WORD32 lookup_idx = 0;
+
+ if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_IDR_FRAME)
+ {
+ lookup_idx = 0;
+ }
+ else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_I_FRAME)
+ {
+ lookup_idx = 1;
+ }
+ else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_P_FRAME)
+ {
+ lookup_idx = 2;
+ }
+ else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_NA_FRAME)
+ {
+ lookup_idx = 3;
+ }
+
+ printf("[%s] PicNum %4d Bytes Generated %6d TimeTaken(microsec): %6d AvgTime: %6d PeakAvgTimeMax: %6d\n", u1_pic_type[lookup_idx], ps_app_ctxt->u4_pics_cnt, num_bytes, timetaken, ps_app_ctxt->avg_time, peak_avg_max);
+ }
+
+ ps_app_ctxt->u4_total_bytes += num_bytes;
+
+ /******************************************************************************/
+ /****************** Writing Recon ********************************************/
+ /******************************************************************************/
+ if(1 == ps_video_encode_op->output_present)
+ {
+ s_recon_buf = ps_video_encode_op->s_recon_buf;
+
+ /* Dump recon when enabled, and output bytes != 0*/
+ if(ps_app_ctxt->u4_recon_enable)
+ {
+ status = write_recon(ps_app_ctxt->fp_recon, &s_recon_buf);
+ if(IV_SUCCESS != status)
+ {
+ printf("Error: Unable to write to recon file\n");
+ break;
+ }
+ }
+
+
+ if(ps_app_ctxt->u4_psnr_enable)
+ {
+ read_input(ps_app_ctxt->fp_psnr_ip, &s_inp_buf);
+ compute_psnr(ps_app_ctxt, &s_recon_buf, &s_inp_buf);
+ }
+
+
+ if(ps_app_ctxt->u4_chksum_enable)
+ {
+ WORD32 comp;
+ WORD32 num_comp;
+ num_comp = 2;
+ if(IV_YUV_420P == s_recon_buf.e_color_fmt)
+ num_comp = 3;
+
+ for(comp = 0; comp < num_comp; comp++ )
+ {
+ UWORD8 au1_chksum[16];
+
+ calc_md5_cksum((UWORD8 *)s_recon_buf.apv_bufs[comp],
+ s_recon_buf.au4_strd[comp],
+ s_recon_buf.au4_wd[comp],
+ s_recon_buf.au4_ht[comp],
+ au1_chksum);
+
+ fwrite(au1_chksum, sizeof(UWORD8), 16, ps_app_ctxt->fp_chksum);
+ }
+ }
+
+
+ }
+ }
+ else
+ {
+ if (ps_app_ctxt->u4_psnr_enable == 0)
+ {
+ UWORD8 u1_pic_type[][5] = { "IDR", "I", "P", "NA" };
+ WORD32 lookup_idx = 0;
+
+ if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_IDR_FRAME)
+ {
+ lookup_idx = 0;
+ }
+ else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_I_FRAME)
+ {
+ lookup_idx = 1;
+ }
+ else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_P_FRAME)
+ {
+ lookup_idx = 2;
+ }
+ else if (ih264e_video_encode_op.s_ive_op.u4_encoded_frame_type == IV_NA_FRAME)
+ {
+ lookup_idx = 3;
+ }
+
+ printf("[%s] PicNum %4d Bytes Generated %6d TimeTaken(microsec): %6d AvgTime: %6d PeakAvgTimeMax: %6d\n", u1_pic_type[lookup_idx], ps_app_ctxt->u4_pics_cnt, num_bytes, timetaken, ps_app_ctxt->avg_time, peak_avg_max);
+ }
+ else
+ {
+ read_input(ps_app_ctxt->fp_psnr_ip, &s_inp_buf);
+ }
+ }
+#if 0 //Input buffer dump
+ /*Dump input buffers to a file*/
+ dump_input(ps_app_ctxt->fp_dump_op, ps_inp_raw_buf);
+#endif //Input buffer dump
+
+ if(is_last)
+ break;
+
+ u4_timestamp_low++;
+ }
+
+ /* Pic count is 1 more than actual num frames encoded, beacause last call is to just get the output */
+ ps_app_ctxt->u4_pics_cnt--;
+
+ if(ps_app_ctxt->u4_psnr_enable)
+ {
+ print_average_psnr(ps_app_ctxt);
+ }
+
+ /* house keeping operations */
+ fclose(ps_app_ctxt->fp_ip);
+ fclose(ps_app_ctxt->fp_op);
+ if(1 == ps_app_ctxt->u4_recon_enable)
+ {
+ fclose(ps_app_ctxt->fp_recon);
+ }
+ if(1 == ps_app_ctxt->u4_chksum_enable)
+ {
+ fclose(ps_app_ctxt->fp_chksum);
+ }
+ if(1 == ps_app_ctxt->u4_psnr_enable)
+ {
+ fclose(ps_app_ctxt->fp_psnr_ip);
+ }
+
+ if(0 != ps_app_ctxt->u4_mb_info_type)
+ {
+ fclose(ps_app_ctxt->fp_mb_info);
+ }
+ if (ps_app_ctxt->u4_pic_info_type)
+ {
+ fclose(ps_app_ctxt->fp_pic_info);
+ }
+
+ free_input(ps_app_ctxt);
+ free_output(ps_app_ctxt);
+ free_recon(ps_app_ctxt);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : main */
+/* */
+/* Description : Application to demonstrate codec API */
+/* */
+/* */
+/* Inputs : argc - Number of arguments */
+/* argv[] - Arguments */
+/* Globals : */
+/* Processing : Shows how to use create, process, control and delete */
+/* */
+/* Outputs : Codec output in a file */
+/* Returns : */
+/* */
+/* Issues : Assumes both PROFILE_ENABLE to be */
+/* defined for multithread decode-display working */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes */
+/* 20 11 2013 100189 Initial Version */
+/*****************************************************************************/
+#ifdef IOS
+int h264enc_main(char * homedir)
+#else
+int main(int argc, char *argv[])
+#endif
+{
+ /* Config Parameters for Encoding */
+ app_ctxt_t s_app_ctxt;
+
+ /* error string */
+ CHAR ac_error[STRLENGTH];
+
+ /* config file name */
+ CHAR ac_cfg_fname[STRLENGTH];
+
+ /* error status */
+ IV_STATUS_T status = IV_SUCCESS;
+
+ /* temp var */
+ CHAR filename_with_path[STRLENGTH];
+ WORD32 num_mem_recs;
+ iv_obj_t *ps_enc;
+ WORD32 i;
+ FILE *fp_cfg = NULL;
+
+#ifdef X86_MINGW
+
+ /* For getting printfs without any delay in eclipse */
+ setvbuf(stdout, NULL, _IONBF, 0);
+ setvbuf(stderr, NULL, _IONBF, 0);
+
+#endif
+
+ init_default_params(&s_app_ctxt);
+
+#ifndef IOS
+
+ /* Usage */
+ if(argc < 2)
+ {
+ printf("Using enc.cfg as configuration file \n");
+ strcpy(ac_cfg_fname, "enc.cfg");
+ }
+ else if(argc == 2)
+ {
+ strcpy(ac_cfg_fname, argv[1]);
+ }
+
+#endif
+
+ /*************************************************************************/
+ /* Parse arguments */
+ /*************************************************************************/
+
+#ifndef IOS
+
+ /* Read command line arguments */
+ if(argc > 2)
+ {
+ for(i = 1; i < argc; i += 2)
+ {
+ if(CONFIG == get_argument(argv[i]))
+ {
+ strcpy(ac_cfg_fname, argv[i + 1]);
+ if((fp_cfg = fopen(ac_cfg_fname, "r")) == NULL)
+ {
+ sprintf(ac_error,
+ "Could not open Configuration file %s",
+ ac_cfg_fname);
+ codec_exit(ac_error);
+ }
+ read_cfg_file(&s_app_ctxt, fp_cfg);
+ fclose(fp_cfg);
+ }
+ else
+ {
+ parse_argument(&s_app_ctxt, argv[i], argv[i + 1]);
+ }
+ }
+ }
+ else
+ {
+ if((fp_cfg = fopen(ac_cfg_fname, "r")) == NULL)
+ {
+ sprintf(ac_error, "Could not open Configuration file %s",
+ ac_cfg_fname);
+ codec_exit(ac_error);
+ }
+ read_cfg_file(&s_app_ctxt, fp_cfg);
+ fclose(fp_cfg);
+ }
+
+#else
+
+ sprintf(filename_with_path, "%s/%s", homedir, "enc.cfg");
+ if((fp_cfg = fopen(filename_with_path, "r")) == NULL)
+ {
+ sprintf(ac_error, "Could not open Configuration file %s",
+ ac_cfg_fname);
+ codec_exit(ac_error);
+
+ }
+ read_cfg_file(&s_app_ctxt, fp_cfg);
+ fclose(fp_cfg);
+
+#endif
+
+
+ validate_params(&s_app_ctxt);
+
+
+ /*************************************************************************/
+ /* Getting Number of MemRecords */
+ /*************************************************************************/
+ {
+ ih264e_num_mem_rec_ip_t s_num_mem_rec_ip;
+ ih264e_num_mem_rec_op_t s_num_mem_rec_op;
+
+ s_num_mem_rec_ip.s_ive_ip.u4_size = sizeof(ih264e_num_mem_rec_ip_t);
+ s_num_mem_rec_op.s_ive_op.u4_size = sizeof(ih264e_num_mem_rec_op_t);
+
+ s_num_mem_rec_ip.s_ive_ip.e_cmd = IV_CMD_GET_NUM_MEM_REC;
+
+ status = ih264e_api_function(0, &s_num_mem_rec_ip, &s_num_mem_rec_op);
+
+ if(status != IV_SUCCESS)
+ {
+ sprintf(ac_error, "Get number of memory records failed = 0x%x\n", s_num_mem_rec_op.s_ive_op.u4_error_code);
+ codec_exit(ac_error);
+ }
+
+ s_app_ctxt.u4_num_mem_rec = num_mem_recs = s_num_mem_rec_op.s_ive_op.u4_num_mem_rec;
+ }
+
+ /* Allocate array to hold memory records */
+ s_app_ctxt.ps_mem_rec = (iv_mem_rec_t *) malloc(num_mem_recs * sizeof(iv_mem_rec_t));
+ if(NULL == s_app_ctxt.ps_mem_rec)
+ {
+
+ sprintf(ac_error, "Unable to allocate memory for hold memory records: Size %d", (WORD32)(num_mem_recs * sizeof(iv_mem_rec_t)));
+ codec_exit(ac_error);
+ }
+
+ {
+ iv_mem_rec_t *ps_mem_rec;
+ ps_mem_rec = s_app_ctxt.ps_mem_rec;
+ for(i = 0; i < num_mem_recs; i++)
+ {
+ ps_mem_rec->u4_size = sizeof(iv_mem_rec_t);
+ ps_mem_rec->pv_base = NULL;
+ ps_mem_rec->u4_mem_size = 0;
+ ps_mem_rec->u4_mem_alignment = 0;
+ ps_mem_rec->e_mem_type = IV_NA_MEM_TYPE;
+
+ ps_mem_rec++;
+ }
+ }
+
+ /*************************************************************************/
+ /* Getting MemRecords Attributes */
+ /*************************************************************************/
+ {
+ ih264e_fill_mem_rec_ip_t s_fill_mem_rec_ip;
+ ih264e_fill_mem_rec_op_t s_fill_mem_rec_op;
+
+ s_fill_mem_rec_ip.s_ive_ip.u4_size = sizeof(ih264e_fill_mem_rec_ip_t);
+ s_fill_mem_rec_op.s_ive_op.u4_size = sizeof(ih264e_fill_mem_rec_op_t);
+
+ s_fill_mem_rec_ip.s_ive_ip.e_cmd = IV_CMD_FILL_NUM_MEM_REC;
+ s_fill_mem_rec_ip.s_ive_ip.ps_mem_rec = s_app_ctxt.ps_mem_rec;
+ s_fill_mem_rec_ip.s_ive_ip.u4_num_mem_rec = s_app_ctxt.u4_num_mem_rec;
+ s_fill_mem_rec_ip.s_ive_ip.u4_max_wd = s_app_ctxt.u4_max_wd;
+ s_fill_mem_rec_ip.s_ive_ip.u4_max_ht = s_app_ctxt.u4_max_ht;
+ s_fill_mem_rec_ip.s_ive_ip.u4_max_level = s_app_ctxt.u4_max_level;
+ s_fill_mem_rec_ip.s_ive_ip.e_color_format = DEFAULT_INP_COLOR_FMT;
+ s_fill_mem_rec_ip.s_ive_ip.u4_max_ref_cnt = DEFAULT_MAX_REF_FRM;
+ s_fill_mem_rec_ip.s_ive_ip.u4_max_reorder_cnt = DEFAULT_MAX_REORDER_FRM;
+ s_fill_mem_rec_ip.s_ive_ip.u4_max_srch_rng_x = DEFAULT_MAX_SRCH_RANGE_X;
+ s_fill_mem_rec_ip.s_ive_ip.u4_max_srch_rng_y = DEFAULT_MAX_SRCH_RANGE_Y;
+
+ status = ih264e_api_function(0, &s_fill_mem_rec_ip, &s_fill_mem_rec_op);
+
+ if(status != IV_SUCCESS)
+ {
+ sprintf(ac_error, "Fill memory records failed = 0x%x\n",
+ s_fill_mem_rec_op.s_ive_op.u4_error_code);
+ codec_exit(ac_error);
+ }
+ }
+
+ /*************************************************************************/
+ /* Allocating Memory for Mem Records */
+ /*************************************************************************/
+ {
+ WORD32 total_size;
+ iv_mem_rec_t *ps_mem_rec;
+ total_size = 0;
+
+ ps_mem_rec = s_app_ctxt.ps_mem_rec;
+ for(i = 0; i < num_mem_recs; i++)
+ {
+ ps_mem_rec->pv_base = ih264a_aligned_malloc(ps_mem_rec->u4_mem_alignment,
+ ps_mem_rec->u4_mem_size);
+ if(ps_mem_rec->pv_base == NULL)
+ {
+ sprintf(ac_error, "Allocation failure for mem record id %d size %d\n",
+ i, ps_mem_rec->u4_mem_size);
+ codec_exit(ac_error);
+ }
+ total_size += ps_mem_rec->u4_mem_size;
+
+ ps_mem_rec++;
+ }
+ printf("\nTotal memory for codec %d\n", total_size);
+ }
+
+
+ /*************************************************************************/
+ /* Codec Instance Creation */
+ /*************************************************************************/
+ {
+ ih264e_init_ip_t s_init_ip;
+ ih264e_init_op_t s_init_op;
+
+ ps_enc = s_app_ctxt.ps_mem_rec[0].pv_base;
+ ps_enc->u4_size = sizeof(iv_obj_t);
+ ps_enc->pv_fxns = ih264e_api_function;
+ s_app_ctxt.ps_enc = ps_enc;
+
+ s_init_ip.s_ive_ip.u4_size = sizeof(ih264e_init_ip_t);
+ s_init_op.s_ive_op.u4_size = sizeof(ih264e_init_op_t);
+
+ s_init_ip.s_ive_ip.e_cmd = IV_CMD_INIT;
+ s_init_ip.s_ive_ip.u4_num_mem_rec = s_app_ctxt.u4_num_mem_rec;
+ s_init_ip.s_ive_ip.ps_mem_rec = s_app_ctxt.ps_mem_rec;
+ s_init_ip.s_ive_ip.u4_max_wd = s_app_ctxt.u4_max_wd;
+ s_init_ip.s_ive_ip.u4_max_ht = s_app_ctxt.u4_max_ht;
+ s_init_ip.s_ive_ip.u4_max_ref_cnt = DEFAULT_MAX_REF_FRM;
+ s_init_ip.s_ive_ip.u4_max_reorder_cnt = DEFAULT_MAX_REORDER_FRM;
+ s_init_ip.s_ive_ip.u4_max_level = s_app_ctxt.u4_max_level;
+ s_init_ip.s_ive_ip.e_inp_color_fmt = s_app_ctxt.e_inp_color_fmt;
+ if(s_app_ctxt.u4_recon_enable || s_app_ctxt.u4_psnr_enable || s_app_ctxt.u4_chksum_enable)
+ {
+ s_init_ip.s_ive_ip.u4_enable_recon = 1;
+ }
+ else
+ {
+ s_init_ip.s_ive_ip.u4_enable_recon = 0;
+ }
+ s_init_ip.s_ive_ip.e_recon_color_fmt = s_app_ctxt.e_recon_color_fmt;
+ s_init_ip.s_ive_ip.e_rc_mode = s_app_ctxt.u4_rc;
+ s_init_ip.s_ive_ip.u4_max_framerate = s_app_ctxt.u4_max_frame_rate;
+ s_init_ip.s_ive_ip.u4_max_bitrate = s_app_ctxt.u4_max_bitrate;
+ s_init_ip.s_ive_ip.u4_max_num_bframes = DEFAULT_B_FRAMES;
+ s_init_ip.s_ive_ip.e_content_type = IV_PROGRESSIVE;
+ s_init_ip.s_ive_ip.u4_max_srch_rng_x = DEFAULT_MAX_SRCH_RANGE_X;
+ s_init_ip.s_ive_ip.u4_max_srch_rng_y = DEFAULT_MAX_SRCH_RANGE_Y;
+ s_init_ip.s_ive_ip.e_slice_mode = s_app_ctxt.u4_slice_mode;
+ s_init_ip.s_ive_ip.u4_slice_param = s_app_ctxt.u4_slice_param;
+ s_init_ip.s_ive_ip.e_arch = s_app_ctxt.e_arch;
+ s_init_ip.s_ive_ip.e_soc = s_app_ctxt.e_soc;
+
+ status = ih264e_api_function(ps_enc, &s_init_ip, &s_init_op);
+
+ if(status != IV_SUCCESS)
+ {
+ sprintf(ac_error, "Init memory records failed = 0x%x\n",
+ s_init_op.s_ive_op.u4_error_code);
+ codec_exit(ac_error);
+ }
+ }
+
+ /*************************************************************************/
+ /* set processor details */
+ /*************************************************************************/
+ {
+ ih264e_ctl_set_num_cores_ip_t s_ctl_set_num_cores_ip;
+ ih264e_ctl_set_num_cores_op_t s_ctl_set_num_cores_op;
+ s_ctl_set_num_cores_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL;
+ s_ctl_set_num_cores_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_SET_NUM_CORES;
+ s_ctl_set_num_cores_ip.s_ive_ip.u4_num_cores = s_app_ctxt.u4_num_cores;
+ s_ctl_set_num_cores_ip.s_ive_ip.u4_timestamp_high = 0;
+ s_ctl_set_num_cores_ip.s_ive_ip.u4_timestamp_low = 0;
+ s_ctl_set_num_cores_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_set_num_cores_ip_t);
+
+ s_ctl_set_num_cores_op.s_ive_op.u4_size = sizeof(ih264e_ctl_set_num_cores_op_t);
+
+ status = ih264e_api_function(ps_enc, (void *) &s_ctl_set_num_cores_ip,
+ (void *) &s_ctl_set_num_cores_op);
+ if(status != IV_SUCCESS)
+ {
+ sprintf(ac_error, "Unable to set processor params = 0x%x\n",
+ s_ctl_set_num_cores_op.s_ive_op.u4_error_code);
+ codec_exit(ac_error);
+ }
+
+ }
+
+ /*************************************************************************/
+ /* Get Codec Version */
+ /*************************************************************************/
+ {
+ ih264e_ctl_getversioninfo_ip_t s_ctl_set_getversioninfo_ip;
+ ih264e_ctl_getversioninfo_op_t s_ctl_set_getversioninfo_op;
+ CHAR ac_version_string[STRLENGTH];
+ s_ctl_set_getversioninfo_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL;
+ s_ctl_set_getversioninfo_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_GETVERSION;
+ s_ctl_set_getversioninfo_ip.s_ive_ip.pu1_version = (UWORD8 *)ac_version_string;
+ s_ctl_set_getversioninfo_ip.s_ive_ip.u4_version_bufsize = sizeof(ac_version_string);
+ s_ctl_set_getversioninfo_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_getversioninfo_ip_t);
+ s_ctl_set_getversioninfo_op.s_ive_op.u4_size = sizeof(ih264e_ctl_getversioninfo_op_t);
+
+ status = ih264e_api_function(ps_enc, (void *) &s_ctl_set_getversioninfo_ip,
+ (void *) &s_ctl_set_getversioninfo_op);
+ if(status != IV_SUCCESS)
+ {
+ sprintf(ac_error, "Unable to get codec version = 0x%x\n",
+ s_ctl_set_getversioninfo_op.s_ive_op.u4_error_code);
+ codec_exit(ac_error);
+ }
+ printf("CODEC VERSION %s\n", ac_version_string);
+ }
+
+ /*************************************************************************/
+ /* Get I/O Buffer Requirement */
+ /*************************************************************************/
+ {
+ ih264e_ctl_getbufinfo_ip_t s_get_buf_info_ip;
+ ih264e_ctl_getbufinfo_op_t s_get_buf_info_op;
+
+ s_get_buf_info_ip.s_ive_ip.u4_size = sizeof(ih264e_ctl_getbufinfo_ip_t);
+ s_get_buf_info_op.s_ive_op.u4_size = sizeof(ih264e_ctl_getbufinfo_op_t);
+
+ s_get_buf_info_ip.s_ive_ip.e_cmd = IVE_CMD_VIDEO_CTL;
+ s_get_buf_info_ip.s_ive_ip.e_sub_cmd = IVE_CMD_CTL_GETBUFINFO;
+ s_get_buf_info_ip.s_ive_ip.u4_max_ht = s_app_ctxt.u4_max_ht;
+ s_get_buf_info_ip.s_ive_ip.u4_max_wd = s_app_ctxt.u4_max_wd;
+ s_get_buf_info_ip.s_ive_ip.e_inp_color_fmt = s_app_ctxt.e_inp_color_fmt;
+
+ status = ih264e_api_function(ps_enc, &s_get_buf_info_ip, &s_get_buf_info_op);
+
+ if (status != IV_SUCCESS)
+ {
+ sprintf(ac_error, "Unable to get I/O buffer requirements = 0x%x\n",
+ s_get_buf_info_op.s_ive_op.u4_error_code);
+ codec_exit(ac_error);
+ }
+ s_app_ctxt.s_get_buf_info_op = s_get_buf_info_op;
+ }
+
+ /*****************************************************************************/
+ /* Add the following initializations based on the parameters in context */
+ /*****************************************************************************/
+
+
+ /*****************************************************************************/
+ /* Video control Set Frame dimensions */
+ /*****************************************************************************/
+ s_app_ctxt.u4_strd = ALIGN16(s_app_ctxt.u4_wd);
+ set_dimensions(&s_app_ctxt, 0, 0);
+
+ /*****************************************************************************/
+ /* Video control Set Frame rates */
+ /*****************************************************************************/
+ set_frame_rate(&s_app_ctxt, 0, 0);
+
+ /*****************************************************************************/
+ /* Video control Set IPE Params */
+ /*****************************************************************************/
+ set_ipe_params(&s_app_ctxt, 0, 0);
+
+ /*****************************************************************************/
+ /* Video control Set Bitrate */
+ /*****************************************************************************/
+ set_bit_rate(&s_app_ctxt, 0, 0);
+
+ /*****************************************************************************/
+ /* Video control Set QP */
+ /*****************************************************************************/
+ set_qp(&s_app_ctxt,0,0);
+
+ /*****************************************************************************/
+ /* Video control Set AIR params */
+ /*****************************************************************************/
+ set_air_params(&s_app_ctxt,0,0);
+
+ /*****************************************************************************/
+ /* Video control Set VBV params */
+ /*****************************************************************************/
+ set_vbv_params(&s_app_ctxt,0,0);
+
+ /*****************************************************************************/
+ /* Video control Set Motion estimation params */
+ /*****************************************************************************/
+ set_me_params(&s_app_ctxt,0,0);
+
+ /*****************************************************************************/
+ /* Video control Set GOP params */
+ /*****************************************************************************/
+ set_gop_params(&s_app_ctxt, 0, 0);
+
+ /*****************************************************************************/
+ /* Video control Set Deblock params */
+ /*****************************************************************************/
+ set_deblock_params(&s_app_ctxt, 0, 0);
+
+ /*****************************************************************************/
+ /* Video control Set Profile params */
+ /*****************************************************************************/
+ set_profile_params(&s_app_ctxt, 0, 0);
+
+ /*****************************************************************************/
+ /* Video control Set in Encode header mode */
+ /*****************************************************************************/
+ set_enc_mode(&s_app_ctxt, 0, 0, IVE_ENC_MODE_PICTURE);
+
+#ifdef IOS
+ /* Correct file paths */
+ sprintf(filename_with_path, "%s/%s", homedir, s_app_ctxt.ac_ip_fname);
+ strcpy (s_app_ctxt.ac_ip_fname, filename_with_path);
+
+ sprintf(filename_with_path, "%s/%s", homedir, s_app_ctxt.ac_op_fname);
+ strcpy (s_app_ctxt.ac_op_fname, filename_with_path);
+
+ sprintf(filename_with_path, "%s/%s", homedir, s_app_ctxt.ac_recon_fname);
+ strcpy (s_app_ctxt.ac_recon_fname, filename_with_path);
+
+ sprintf(filename_with_path, "%s/%s", homedir, s_app_ctxt.ac_chksum_fname);
+ strcpy (s_app_ctxt.ac_chksum_fname, filename_with_path);
+
+ sprintf(filename_with_path, "%s/%s", homedir, s_app_ctxt.ac_mb_info_fname);
+ strcpy (s_app_ctxt.ac_mb_info_fname, filename_with_path);
+
+ sprintf(filename_with_path, "%s/%s", homedir, s_app_ctxt.ac_pic_info_fname);
+ strcpy (s_app_ctxt.ac_pic_info_fname, filename_with_path);
+#endif
+
+ /*************************************************************************/
+ /* begin encoding */
+ /*************************************************************************/
+
+ synchronous_encode(ps_enc, &s_app_ctxt);
+
+ {
+ DOUBLE bytes_per_frame;
+ DOUBLE bytes_per_second;
+ WORD32 achieved_bitrate;
+
+ if(s_app_ctxt.u4_pics_cnt != 0)
+ bytes_per_frame = (s_app_ctxt.u4_total_bytes) / (s_app_ctxt.u4_pics_cnt);
+ else
+ bytes_per_frame = 0;
+
+ bytes_per_second = (bytes_per_frame * s_app_ctxt.u4_tgt_frame_rate);
+
+ achieved_bitrate = bytes_per_second * 8;
+
+ printf("\nEncoding Completed\n");
+ printf("Summary\n");
+ printf("Input filename : %s\n", s_app_ctxt.ac_ip_fname);
+ printf("Output filename : %s\n", s_app_ctxt.ac_op_fname);
+ printf("Output Width : %-4d\n", s_app_ctxt.u4_wd);
+ printf("Output Height : %-4d\n", s_app_ctxt.u4_ht);
+ printf("Target Bitrate (bps) : %-4d\n", s_app_ctxt.u4_bitrate);
+ printf("Achieved Bitrate (bps) : %-4d\n", achieved_bitrate);
+ printf("Average Time per Frame : %-4d\n", s_app_ctxt.avg_time);
+ printf("Achieved FPS : %-4.2f\n", 1000000.0 / s_app_ctxt.avg_time);
+ }
+
+
+ /*************************************************************************/
+ /* Close Codec Instance */
+ /*************************************************************************/
+ {
+ ih264e_retrieve_mem_rec_ip_t s_retrieve_mem_ip;
+ ih264e_retrieve_mem_rec_op_t s_retrieve_mem_op;
+ iv_mem_rec_t *ps_mem_rec;
+ s_retrieve_mem_ip.s_ive_ip.u4_size =
+ sizeof(ih264e_retrieve_mem_rec_ip_t);
+ s_retrieve_mem_op.s_ive_op.u4_size =
+ sizeof(ih264e_retrieve_mem_rec_op_t);
+
+ s_retrieve_mem_ip.s_ive_ip.e_cmd = IV_CMD_RETRIEVE_MEMREC;
+ s_retrieve_mem_ip.s_ive_ip.ps_mem_rec = s_app_ctxt.ps_mem_rec;
+
+ status = ih264e_api_function(ps_enc, &s_retrieve_mem_ip,
+ &s_retrieve_mem_op);
+
+ if(status != IV_SUCCESS)
+ {
+ sprintf(ac_error, "Unable to retrieve memory records = 0x%x\n",
+ s_retrieve_mem_op.s_ive_op.u4_error_code);
+ codec_exit(ac_error);
+ }
+
+ /* Free memory records */
+ ps_mem_rec = s_app_ctxt.ps_mem_rec;
+ for(i = 0; i < num_mem_recs; i++)
+ {
+ ih264a_aligned_free(ps_mem_rec->pv_base);
+ ps_mem_rec++;
+ }
+
+ free(s_app_ctxt.ps_mem_rec);
+
+ }
+
+ return 0;
+}
+
+
+#ifdef ANDROID_NDK
+int raise(int a)
+{
+ printf("Divide by zero\n");
+ return 0;
+}
+void __aeabi_assert(const char *assertion, const char *file, unsigned int line)
+{
+ return;
+}
+#endif
diff --git a/test/encoder/output.c b/test/encoder/output.c
new file mode 100755
index 0000000..e0f27dd
--- /dev/null
+++ b/test/encoder/output.c
@@ -0,0 +1,109 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <sys/time.h>
+/* User include files */
+
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "app.h"
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define PEAK_WINDOW_SIZE 8
+/*****************************************************************************/
+/* Macros */
+/*****************************************************************************/
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+IV_STATUS_T write_output(FILE *fp, UWORD8 *pu1_buf, WORD32 num_bytes)
+{
+ WORD32 bytes;
+
+ bytes = fwrite(pu1_buf, sizeof(UWORD8), num_bytes, fp);
+ if(bytes != num_bytes)
+ return IV_FAIL;
+ fflush(fp);
+
+ return IV_SUCCESS;
+}
+
+void allocate_output(app_ctxt_t *ps_app_ctxt)
+{
+
+ WORD32 num_bufs;
+ WORD32 i;
+ UWORD8 *pu1_buf;
+ WORD32 buf_size;
+ num_bufs = MAX(DEFAULT_NUM_OUTPUT_BUFS, ps_app_ctxt->s_get_buf_info_op.s_ive_op.u4_min_out_bufs);
+ num_bufs = MIN(DEFAULT_MAX_OUTPUT_BUFS, num_bufs);
+
+ buf_size = ps_app_ctxt->s_get_buf_info_op.s_ive_op.au4_min_out_buf_size[0];
+ /* Memset the output buffer array to set is_free to 0 */
+ memset(ps_app_ctxt->as_output_buf, 0, sizeof(output_buf_t) * DEFAULT_MAX_OUTPUT_BUFS);
+
+ for(i = 0; i < num_bufs; i++)
+ {
+ pu1_buf = (UWORD8 *)ih264a_aligned_malloc(16, buf_size);
+ if(NULL == pu1_buf)
+ {
+ CHAR ac_error[STRLENGTH];
+ sprintf(ac_error, "Allocation failed for output buffer of size %d\n",
+ buf_size);
+ codec_exit(ac_error);
+ }
+ ps_app_ctxt->as_output_buf[i].pu1_buf = pu1_buf;
+ ps_app_ctxt->as_output_buf[i].u4_buf_size = buf_size;
+ ps_app_ctxt->as_output_buf[i].u4_is_free = 1;
+
+ }
+ return;
+}
+
+void free_output(app_ctxt_t *ps_app_ctxt)
+{
+
+ WORD32 num_bufs;
+ WORD32 i;
+
+ num_bufs = MAX(DEFAULT_NUM_OUTPUT_BUFS, ps_app_ctxt->s_get_buf_info_op.s_ive_op.u4_min_out_bufs);
+ num_bufs = MIN(DEFAULT_MAX_OUTPUT_BUFS, num_bufs);
+ for(i = 0; i < num_bufs; i++)
+ {
+
+ ih264a_aligned_free(ps_app_ctxt->as_output_buf[i].pu1_buf);
+ }
+ return;
+}
+
diff --git a/test/encoder/psnr.c b/test/encoder/psnr.c
new file mode 100755
index 0000000..c9bb6a1
--- /dev/null
+++ b/test/encoder/psnr.c
@@ -0,0 +1,242 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <sys/time.h>
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "app.h"
+#include "psnr.h"
+
+/*****************************************************************************/
+/* */
+/* Function Name : init_psnr */
+/* */
+/* Description : Initialize PSNR for the Y, U, V component */
+/* */
+/* Inputs : */
+/* */
+/* Globals : */
+/* */
+/* Processing : */
+/* */
+/* Outputs : */
+/* */
+/* Returns : */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 28 12 2005 Ittiam Draft */
+/* */
+/*****************************************************************************/
+void init_psnr(app_ctxt_t *ps_app_ctxt)
+{
+ ps_app_ctxt->adbl_psnr[0] = 0;
+ ps_app_ctxt->adbl_psnr[1] = 0;
+ ps_app_ctxt->adbl_psnr[2] = 0;
+ ps_app_ctxt->u4_psnr_cnt = 0;
+}
+
+
+/*****************************************************************************/
+/* */
+/* Function Name : compute_psnr */
+/* */
+/* Description : Computes the PSNR for the Y, U, V component */
+/* */
+/* Inputs : */
+/* */
+/* Globals : */
+/* */
+/* Processing : */
+/* */
+/* Outputs : */
+/* */
+/* Returns : */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 28 12 2005 Ittiam Draft */
+/* */
+/*****************************************************************************/
+void compute_psnr(app_ctxt_t *ps_app_ctxt, iv_raw_buf_t *ps_buf1, iv_raw_buf_t *ps_buf2)
+{
+ WORD32 i, j;
+ WORD32 comp;
+ DOUBLE df_psnr[3];
+ WORD32 wd, ht, strd1, strd2;
+ UWORD8 *pu1_buf1, *pu1_buf2;
+ WORD32 incr1, incr2;
+
+ printf("\nPicNum %4d\t ", ps_app_ctxt->u4_psnr_cnt);
+
+ for(comp = 0; comp < 3; comp++)
+ {
+ df_psnr[comp] = 0;
+ pu1_buf1 = (UWORD8 *)ps_buf1->apv_bufs[comp];
+ pu1_buf2 = (UWORD8 *)ps_buf2->apv_bufs[comp];
+ wd = ps_buf1->au4_wd[comp];
+ ht = ps_buf1->au4_ht[comp];
+ strd1 = ps_buf1->au4_strd[comp];
+ strd2 = ps_buf2->au4_strd[comp];
+ incr1 = 1;
+ incr2 = 1;
+
+ if((IV_YUV_420SP_UV == ps_buf1->e_color_fmt)
+ || (IV_YUV_420SP_UV == ps_buf1->e_color_fmt))
+ {
+ switch(comp)
+ {
+ case 0:
+ pu1_buf1 = ps_buf1->apv_bufs[0];
+ break;
+ case 1:
+ if(IV_YUV_420SP_UV == ps_buf1->e_color_fmt)
+ pu1_buf1 = (UWORD8 *)ps_buf1->apv_bufs[1];
+ else
+ pu1_buf1 = (UWORD8 *)ps_buf1->apv_bufs[1] + 1;
+ incr1 = 2;
+ break;
+ case 2:
+ if(IV_YUV_420SP_UV == ps_buf1->e_color_fmt)
+ pu1_buf1 = (UWORD8 *)ps_buf1->apv_bufs[1] + 1;
+ else
+ pu1_buf1 = ps_buf1->apv_bufs[1];
+ incr1 = 2;
+ break;
+ }
+ }
+ if ((IV_YUV_420SP_UV == ps_buf2->e_color_fmt)
+ || (IV_YUV_420SP_UV == ps_buf2->e_color_fmt))
+ {
+ switch(comp)
+ {
+ case 0:
+ pu1_buf2 = ps_buf2->apv_bufs[0];
+ break;
+ case 1:
+ if(IV_YUV_420SP_UV == ps_buf2->e_color_fmt)
+ pu1_buf2 = ps_buf2->apv_bufs[1];
+ else
+ pu1_buf2 = (UWORD8 *)ps_buf2->apv_bufs[1] + 1;
+ incr1 = 2;
+ break;
+ case 2:
+ if(IV_YUV_420SP_UV == ps_buf2->e_color_fmt)
+ pu1_buf2 = (UWORD8 *)ps_buf2->apv_bufs[1] + 1;
+ else
+ pu1_buf2 = ps_buf2->apv_bufs[1];
+ incr1 = 2;
+ break;
+ }
+ }
+
+ for(i = 0; i < ht; i++)
+ {
+ for(j = 0; j < wd; j++)
+ {
+ WORD32 diff;
+ diff = (*pu1_buf1 - *pu1_buf2);
+ pu1_buf1 += incr1;
+ pu1_buf2 += incr2;
+ df_psnr[comp] += diff * diff;
+ }
+ pu1_buf1 += strd1 - ps_buf1->au4_wd[comp];
+ pu1_buf2 += strd2 - ps_buf2->au4_wd[comp];
+ }
+ df_psnr[comp] /= (wd * ht);
+ if(df_psnr[comp])
+ df_psnr[comp] = 20 * log10(255 / sqrt(df_psnr[comp]));
+ else
+ df_psnr[comp] = 100;
+
+ ps_app_ctxt->adbl_psnr[comp] += df_psnr[comp];
+ switch(comp)
+ {
+ case 0:
+ printf("Y :");
+ break;
+ case 1:
+ printf("U :");
+ break;
+ case 2:
+ printf("V :");
+ break;
+ default:
+ break;
+ }
+ printf("%2.2f\t", df_psnr[comp]);
+
+ }
+
+ ps_app_ctxt->u4_psnr_cnt++;
+}
+
+
+/*****************************************************************************/
+/* */
+/* Function Name : print_average_psnr */
+/* */
+/* Description : Computes the average PSNR for the Y, U, V component */
+/* */
+/* Inputs : */
+/* */
+/* Globals : */
+/* */
+/* Processing : */
+/* */
+/* Outputs : */
+/* */
+/* Returns : */
+/* */
+/* Issues : */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 28 12 2005 Ittiam Draft */
+/* */
+/*****************************************************************************/
+void print_average_psnr(app_ctxt_t *ps_app_ctxt)
+{
+ printf("\n");
+
+ printf("Avg PSNR Y : %-2.2f\n", (ps_app_ctxt->adbl_psnr[0] / ps_app_ctxt->u4_psnr_cnt));
+ printf("Avg PSNR U : %-2.2f\n", (ps_app_ctxt->adbl_psnr[1] / ps_app_ctxt->u4_psnr_cnt));
+ printf("Avg PSNR V : %-2.2f\n", (ps_app_ctxt->adbl_psnr[2] / ps_app_ctxt->u4_psnr_cnt));
+}
+
diff --git a/test/encoder/psnr.h b/test/encoder/psnr.h
new file mode 100755
index 0000000..fd388cf
--- /dev/null
+++ b/test/encoder/psnr.h
@@ -0,0 +1,62 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/****************************************************************************/
+/* */
+/* File Name : psnr.h */
+/* */
+/* Description : Contains functions for psnr computation */
+/* */
+/* List of Functions : ih264e_api_function */
+/* compute_psnr */
+/* print_average_psnr */
+/* Issues / Problems : */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes) */
+/* */
+/****************************************************************************/
+#ifndef PSNR_H
+#define PSNR_H
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+void init_psnr(app_ctxt_t *ps_app_ctxt);
+
+void compute_psnr(app_ctxt_t *ps_app_ctxt,
+ iv_raw_buf_t *ps_buf1,
+ iv_raw_buf_t *ps_buf2);
+
+void print_average_psnr(app_ctxt_t *ps_app_ctxt);
+
+#if COMPUTE_PSNR
+
+#define GET_AVERAGE_PSNR_Y(print) print_average_psnr(print)
+
+#else /* COMPUTE_PSNR */
+
+#define GET_AVERAGE_PSNR_Y(print) 0
+
+#endif /* COMPUTE_PSNR */
+
+#endif
+
+
diff --git a/test/encoder/recon.c b/test/encoder/recon.c
new file mode 100755
index 0000000..7fd0f5c
--- /dev/null
+++ b/test/encoder/recon.c
@@ -0,0 +1,221 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <sys/time.h>
+/* User include files */
+
+#include "ih264_typedefs.h"
+#include "iv2.h"
+#include "ive2.h"
+#include "ih264e.h"
+#include "app.h"
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+
+/*****************************************************************************/
+/* Macros */
+/*****************************************************************************/
+
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+
+IV_STATUS_T write_recon(FILE *fp, iv_raw_buf_t *ps_raw_buf)
+{
+ WORD32 bytes;
+ WORD32 wd, ht, strd;
+ UWORD8 *pu1_buf;
+ WORD32 i;
+ WORD32 comp;
+ WORD32 num_comp;
+
+ num_comp = 2;
+ if(IV_YUV_420P == ps_raw_buf->e_color_fmt)
+ num_comp = 3;
+
+ for(comp = 0; comp < num_comp; comp++)
+ {
+ wd = ps_raw_buf->au4_wd[comp];
+ ht = ps_raw_buf->au4_ht[comp];
+ strd = ps_raw_buf->au4_strd[comp];
+ pu1_buf = ps_raw_buf->apv_bufs[comp];
+ for(i = 0; i < ht; i++)
+ {
+ bytes = fwrite(pu1_buf, sizeof(UWORD8), wd, fp);
+ if(bytes != wd)
+ {
+ return(IV_FAIL);
+ }
+ pu1_buf += wd;
+ }
+ }
+
+ fflush(fp);
+ return IV_SUCCESS;
+}
+void allocate_recon(app_ctxt_t *ps_app_ctxt)
+{
+
+ WORD32 num_bufs;
+ WORD32 pic_size;
+ WORD32 luma_size;
+ WORD32 chroma_size;
+ WORD32 i;
+ UWORD8 *pu1_buf;
+
+ num_bufs = DEFAULT_NUM_RECON_BUFS;
+
+ /* Size of buffer for YUV420/420SP */
+ luma_size = ALIGN16(ps_app_ctxt->u4_max_wd) * ALIGN16(ps_app_ctxt->u4_max_ht);
+ chroma_size = (luma_size) / 4;
+ pic_size = luma_size + chroma_size * 2;
+
+
+ for(i = 0; i < num_bufs; i++)
+ {
+ pu1_buf = (UWORD8 *)ih264a_aligned_malloc(16, pic_size);
+ if(NULL == pu1_buf)
+ {
+ CHAR ac_error[STRLENGTH];
+ sprintf(ac_error, "Allocation failed for recon buffer of size %d\n",
+ pic_size);
+ codec_exit(ac_error);
+ }
+ ps_app_ctxt->as_recon_buf[i].pu1_buf = pu1_buf;
+ ps_app_ctxt->as_recon_buf[i].u4_buf_size = pic_size;
+ ps_app_ctxt->as_recon_buf[i].u4_is_free = 1;
+ }
+
+ if(ps_app_ctxt->u4_psnr_enable)
+ {
+ pu1_buf = (UWORD8 *)ih264a_aligned_malloc(16, pic_size);
+ if(NULL == pu1_buf)
+ {
+ CHAR ac_error[STRLENGTH];
+ sprintf(ac_error, "Allocation failed for recon buffer of size %d\n",
+ pic_size);
+ codec_exit(ac_error);
+ }
+ ps_app_ctxt->pu1_psnr_buf = pu1_buf;
+ ps_app_ctxt->u4_psnr_buf_size = pic_size;
+ }
+ return;
+}
+
+void free_recon(app_ctxt_t *ps_app_ctxt)
+{
+
+ WORD32 num_bufs;
+ WORD32 i;
+
+ num_bufs = DEFAULT_NUM_RECON_BUFS;
+
+ for(i = 0; i < num_bufs; i++)
+ {
+ ih264a_aligned_free(ps_app_ctxt->as_recon_buf[i].pu1_buf);
+ }
+
+ if(ps_app_ctxt->u4_psnr_enable)
+ {
+ ih264a_aligned_free(ps_app_ctxt->pu1_psnr_buf);
+
+ }
+ return;
+}
+
+
+
+void init_raw_buf_descr(app_ctxt_t *ps_app_ctxt, iv_raw_buf_t *ps_raw_buf, UWORD8 *pu1_buf, IV_COLOR_FORMAT_T e_color_fmt)
+{
+ WORD32 luma_size;
+ WORD32 chroma_size;
+
+ /* All the pointers and dimensions are initialized here
+ * to support change in resolution from the application */
+ luma_size = ALIGN16(ps_app_ctxt->u4_wd) * ALIGN16(ps_app_ctxt->u4_ht);
+ chroma_size = (luma_size) / 4;
+
+ ps_raw_buf->apv_bufs[0] = pu1_buf;
+ pu1_buf += luma_size;
+
+ ps_raw_buf->apv_bufs[1] = pu1_buf;
+ pu1_buf += chroma_size;
+
+ ps_raw_buf->apv_bufs[2] = NULL;
+ if(IV_YUV_420P == e_color_fmt)
+ {
+ ps_raw_buf->apv_bufs[2] = pu1_buf;
+ }
+
+ ps_raw_buf->e_color_fmt = e_color_fmt;
+ ps_raw_buf->au4_wd[0] = ps_app_ctxt->u4_wd;
+ ps_raw_buf->au4_ht[0] = ps_app_ctxt->u4_ht;
+ ps_raw_buf->au4_strd[0] = ps_app_ctxt->u4_wd;
+
+ /* Initialize for 420SP */
+ {
+ ps_raw_buf->au4_wd[1] = ps_app_ctxt->u4_wd;
+ ps_raw_buf->au4_wd[2] = 0;
+
+ ps_raw_buf->au4_ht[1] = ps_app_ctxt->u4_ht / 2;
+ ps_raw_buf->au4_ht[2] = 0;
+
+ ps_raw_buf->au4_strd[1] = ps_app_ctxt->u4_wd;
+ ps_raw_buf->au4_strd[2] = 0;
+ }
+
+ if(IV_YUV_420P == e_color_fmt)
+ {
+ ps_raw_buf->au4_wd[1] = ps_app_ctxt->u4_wd / 2;
+ ps_raw_buf->au4_wd[2] = ps_app_ctxt->u4_wd / 2;
+
+ ps_raw_buf->au4_ht[1] = ps_app_ctxt->u4_ht / 2;
+ ps_raw_buf->au4_ht[2] = ps_app_ctxt->u4_ht / 2;
+
+ ps_raw_buf->au4_strd[1] = ps_app_ctxt->u4_wd / 2;
+ ps_raw_buf->au4_strd[2] = ps_app_ctxt->u4_wd / 2;
+ }
+ /* If stride is not initialized, then use width as stride */
+ if(0 == ps_raw_buf->au4_strd[0])
+ {
+ ps_raw_buf->au4_strd[0] = ps_raw_buf->au4_wd[0];
+ ps_raw_buf->au4_strd[1] = ps_raw_buf->au4_wd[1];
+ ps_raw_buf->au4_strd[2] = ps_raw_buf->au4_wd[2];
+ }
+
+ ps_raw_buf->u4_size = sizeof(iv_raw_buf_t);
+ return;
+}
+
+