diff options
Diffstat (limited to 'libvpx')
112 files changed, 5940 insertions, 4221 deletions
diff --git a/libvpx/build/make/configure.sh b/libvpx/build/make/configure.sh index 2fbcfe5..514c442 100755 --- a/libvpx/build/make/configure.sh +++ b/libvpx/build/make/configure.sh @@ -274,6 +274,7 @@ TMP_ASM="${TMPDIRx}/vpx-conf-$$-${RAND}.asm" clean_temp_files() { rm -f ${TMP_C} ${TMP_CC} ${TMP_H} ${TMP_O} ${TMP_X} ${TMP_ASM} + enabled gcov && rm -f ${TMP_C%.c}.gcno ${TMP_CC%.cc}.gcno } # diff --git a/libvpx/build/make/gen_msvs_proj.sh b/libvpx/build/make/gen_msvs_proj.sh index 5936370..df91435 100755 --- a/libvpx/build/make/gen_msvs_proj.sh +++ b/libvpx/build/make/gen_msvs_proj.sh @@ -162,7 +162,8 @@ generate_filter() { done done fi - if [ "$pat" == "c" ] || [ "$pat" == "cc" ] ; then + if [ "$pat" == "c" ] || \ + [ "$pat" == "cc" ] || [ "$pat" == "cpp" ]; then for plat in "${platforms[@]}"; do for cfg in Debug Release; do open_tag FileConfiguration \ @@ -561,7 +562,7 @@ generate_vcproj() { close_tag Configurations open_tag Files - generate_filter srcs "Source Files" "c;cc;def;odl;idl;hpj;bat;asm;asmx" + generate_filter srcs "Source Files" "c;cc;cpp;def;odl;idl;hpj;bat;asm;asmx" generate_filter hdrs "Header Files" "h;hm;inl;inc;xsd" generate_filter resrcs "Resource Files" "rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav" generate_filter resrcs "Build Files" "mk" diff --git a/libvpx/build/make/gen_msvs_vcxproj.sh b/libvpx/build/make/gen_msvs_vcxproj.sh index 7c8871b..23990a4 100755 --- a/libvpx/build/make/gen_msvs_vcxproj.sh +++ b/libvpx/build/make/gen_msvs_vcxproj.sh @@ -174,7 +174,8 @@ generate_filter() { done done close_tag CustomBuild - elif [ "$pat" == "c" ] || [ "$pat" == "cc" ] ; then + elif [ "$pat" == "c" ] || \ + [ "$pat" == "cc" ] || [ "$pat" == "cpp" ]; then open_tag ClCompile \ Include=".\\$f" # Separate file names with Condition? @@ -524,7 +525,7 @@ generate_vcxproj() { done open_tag ItemGroup - generate_filter "Source Files" "c;cc;def;odl;idl;hpj;bat;asm;asmx;s" + generate_filter "Source Files" "c;cc;cpp;def;odl;idl;hpj;bat;asm;asmx;s" close_tag ItemGroup open_tag ItemGroup generate_filter "Header Files" "h;hm;inl;inc;xsd" diff --git a/libvpx/build/make/obj_int_extract.c b/libvpx/build/make/obj_int_extract.c index 819ce9d..2e50f38 100644 --- a/libvpx/build/make/obj_int_extract.c +++ b/libvpx/build/make/obj_int_extract.c @@ -34,6 +34,18 @@ int log_msg(const char *fmt, ...) { } #if defined(__GNUC__) && __GNUC__ + +#if defined(FORCE_PARSE_ELF) + +#if defined(__MACH__) +#undef __MACH__ +#endif + +#if !defined(__ELF__) +#define __ELF__ +#endif +#endif + #if defined(__MACH__) #include <mach-o/loader.h> diff --git a/libvpx/examples.mk b/libvpx/examples.mk index 87be5a8..fa5d66c 100644 --- a/libvpx/examples.mk +++ b/libvpx/examples.mk @@ -70,7 +70,7 @@ vp9_spatial_scalable_encoder.SRCS += vpxstats.c vpxstats.h vp9_spatial_scalable_encoder.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D vp9_spatial_scalable_encoder.DESCRIPTION = Spatial Scalable Encoder -ifeq ($(CONFIG_SHARED),no) +ifneq ($(CONFIG_SHARED),yes) EXAMPLES-$(CONFIG_VP9_ENCODER) += resize_util.c endif diff --git a/libvpx/examples/vp9_spatial_scalable_encoder.c b/libvpx/examples/vp9_spatial_scalable_encoder.c index 5c80d34..64e62ef 100644 --- a/libvpx/examples/vp9_spatial_scalable_encoder.c +++ b/libvpx/examples/vp9_spatial_scalable_encoder.c @@ -67,13 +67,22 @@ static const arg_def_t pass_arg = ARG_DEF(NULL, "pass", 1, "Pass to execute (1/2)"); static const arg_def_t fpf_name_arg = ARG_DEF(NULL, "fpf", 1, "First pass statistics file name"); +static const arg_def_t min_q_arg = + ARG_DEF(NULL, "min-q", 1, "Minimum quantizer"); +static const arg_def_t max_q_arg = + ARG_DEF(NULL, "max-q", 1, "Maximum quantizer"); +static const arg_def_t min_bitrate_arg = + ARG_DEF(NULL, "min-bitrate", 1, "Minimum bitrate"); +static const arg_def_t max_bitrate_arg = + ARG_DEF(NULL, "max-bitrate", 1, "Maximum bitrate"); static const arg_def_t *svc_args[] = { &encoding_mode_arg, &frames_arg, &width_arg, &height_arg, &timebase_arg, &bitrate_arg, &skip_frames_arg, &layers_arg, &kf_dist_arg, &scale_factors_arg, &quantizers_arg, - &quantizers_keyframe_arg, &passes_arg, &pass_arg, - &fpf_name_arg, NULL + &quantizers_keyframe_arg, &passes_arg, &pass_arg, + &fpf_name_arg, &min_q_arg, &max_q_arg, &min_bitrate_arg, + &max_bitrate_arg, NULL }; static const SVC_ENCODING_MODE default_encoding_mode = @@ -120,6 +129,8 @@ static void parse_command_line(int argc, const char **argv_, int passes = 0; int pass = 0; const char *fpf_file_name = NULL; + unsigned int min_bitrate = 0; + unsigned int max_bitrate = 0; // initialize SvcContext with parameters that will be passed to vpx_svc_init svc_ctx->log_level = SVC_LOG_DEBUG; @@ -186,6 +197,14 @@ static void parse_command_line(int argc, const char **argv_, } } else if (arg_match(&arg, &fpf_name_arg, argi)) { fpf_file_name = arg.val; + } else if (arg_match(&arg, &min_q_arg, argi)) { + enc_cfg->rc_min_quantizer = arg_parse_uint(&arg); + } else if (arg_match(&arg, &max_q_arg, argi)) { + enc_cfg->rc_max_quantizer = arg_parse_uint(&arg); + } else if (arg_match(&arg, &min_bitrate_arg, argi)) { + min_bitrate = arg_parse_uint(&arg); + } else if (arg_match(&arg, &max_bitrate_arg, argi)) { + max_bitrate = arg_parse_uint(&arg); } else { ++argj; } @@ -221,6 +240,17 @@ static void parse_command_line(int argc, const char **argv_, app_input->pass = pass; } + if (enc_cfg->rc_target_bitrate > 0) { + if (min_bitrate > 0) { + enc_cfg->rc_2pass_vbr_minsection_pct = + min_bitrate * 100 / enc_cfg->rc_target_bitrate; + } + if (max_bitrate > 0) { + enc_cfg->rc_2pass_vbr_maxsection_pct = + max_bitrate * 100 / enc_cfg->rc_target_bitrate; + } + } + // Check for unrecognized options for (argi = argv; *argi; ++argi) if (argi[0][0] == '-' && strlen(argi[0]) > 1) diff --git a/libvpx/examples/vpx_temporal_scalable_patterns.c b/libvpx/examples/vpx_temporal_scalable_patterns.c index b25953f..5cb4ee9 100644 --- a/libvpx/examples/vpx_temporal_scalable_patterns.c +++ b/libvpx/examples/vpx_temporal_scalable_patterns.c @@ -437,6 +437,7 @@ int main(int argc, char **argv) { vpx_codec_err_t res; unsigned int width; unsigned int height; + int speed; int frame_avail; int got_data; int flags = 0; @@ -457,7 +458,7 @@ int main(int argc, char **argv) { // Check usage and arguments. if (argc < 11) { die("Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> " - "<rate_num> <rate_den> <frame_drop_threshold> <mode> " + "<rate_num> <rate_den> <speed> <frame_drop_threshold> <mode> " "<Rate_0> ... <Rate_nlayers-1> \n", argv[0]); } @@ -473,12 +474,12 @@ int main(int argc, char **argv) { die("Invalid resolution: %d x %d", width, height); } - layering_mode = strtol(argv[9], NULL, 0); + layering_mode = strtol(argv[10], NULL, 0); if (layering_mode < 0 || layering_mode > 12) { - die("Invalid mode (0..12) %s", argv[9]); + die("Invalid layering mode (0..12) %s", argv[10]); } - if (argc != 10 + mode_to_num_layers[layering_mode]) { + if (argc != 11 + mode_to_num_layers[layering_mode]) { die("Invalid number of arguments"); } @@ -501,12 +502,17 @@ int main(int argc, char **argv) { cfg.g_timebase.num = strtol(argv[6], NULL, 0); cfg.g_timebase.den = strtol(argv[7], NULL, 0); - for (i = 10; (int)i < 10 + mode_to_num_layers[layering_mode]; ++i) { - cfg.ts_target_bitrate[i - 10] = strtol(argv[i], NULL, 0); + speed = strtol(argv[8], NULL, 0); + if (speed < 0) { + die("Invalid speed setting: must be positive"); + } + + for (i = 11; (int)i < 11 + mode_to_num_layers[layering_mode]; ++i) { + cfg.ts_target_bitrate[i - 11] = strtol(argv[i], NULL, 0); } // Real time parameters. - cfg.rc_dropframe_thresh = strtol(argv[8], NULL, 0); + cfg.rc_dropframe_thresh = strtol(argv[9], NULL, 0); cfg.rc_end_usage = VPX_CBR; cfg.rc_resize_allowed = 0; cfg.rc_min_quantizer = 2; @@ -563,14 +569,16 @@ int main(int argc, char **argv) { if (vpx_codec_enc_init(&codec, encoder->interface(), &cfg, 0)) die_codec(&codec, "Failed to initialize encoder"); - vpx_codec_control(&codec, VP8E_SET_CPUUSED, -6); - vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, 1); - if (strncmp(encoder->name, "vp9", 3) == 0) { - vpx_codec_control(&codec, VP8E_SET_CPUUSED, 5); - vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3); - vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, 0); - if (vpx_codec_control(&codec, VP9E_SET_SVC, 1)) { - die_codec(&codec, "Failed to set SVC"); + if (strncmp(encoder->name, "vp8", 3) == 0) { + vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed); + vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, 1); + } else if (strncmp(encoder->name, "vp9", 3) == 0) { + vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed); + vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3); + vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0); + vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, 0); + if (vpx_codec_control(&codec, VP9E_SET_SVC, 1)) { + die_codec(&codec, "Failed to set SVC"); } } vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1); diff --git a/libvpx/test/android/scrape_gtest_log.py b/libvpx/test/android/scrape_gtest_log.py new file mode 100644 index 0000000..487845c --- /dev/null +++ b/libvpx/test/android/scrape_gtest_log.py @@ -0,0 +1,57 @@ +# Copyright (c) 2014 The WebM project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Standalone script which parses a gtest log for json. + +Json is returned returns as an array. This script is used by the libvpx +waterfall to gather json results mixed in with gtest logs. This is +dubious software engineering. +""" + +import getopt +import json +import os +import re +import sys + + +def main(): + if len(sys.argv) != 3: + print "Expects a file to write json to!" + exit(1) + + try: + opts, _ = \ + getopt.getopt(sys.argv[1:], \ + 'o:', ['output-json=']) + except getopt.GetOptError: + print 'scrape_gtest_log.py -o <output_json>' + sys.exit(2) + + output_json = '' + for opt, arg in opts: + if opt in ('-o', '--output-json'): + output_json = os.path.join(arg) + + blob = sys.stdin.read() + json_string = '[' + ','.join('{' + x + '}' for x in + re.findall(r'{([^}]*.?)}', blob)) + ']' + print blob + + output = json.dumps(json.loads(json_string), indent=4, sort_keys=True) + print output + + path = os.path.dirname(output_json) + if path and not os.path.exists(path): + os.makedirs(path) + + outfile = open(output_json, 'w') + outfile.write(output) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/libvpx/test/datarate_test.cc b/libvpx/test/datarate_test.cc index f673adc..e8604a6 100644 --- a/libvpx/test/datarate_test.cc +++ b/libvpx/test/datarate_test.cc @@ -570,9 +570,9 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayersFrameDropping) { << " The datarate for the file is greater than target by too much, " "for layer: " << j; // Expect some frame drops in this test: for this 200 frames test, - // expect at least 10% and not more than 50% drops. + // expect at least 10% and not more than 60% drops. ASSERT_GE(num_drops_, 20); - ASSERT_LE(num_drops_, 100); + ASSERT_LE(num_drops_, 120); } } @@ -581,8 +581,4 @@ VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large, ::testing::Values(::libvpx_test::kOnePassGood, ::libvpx_test::kRealTime), ::testing::Range(2, 7)); -// TODO(marpan): Speed 7 fails on one of these tests (likely just a threshold -// needs to be changed), so for now test up to speed 6, and start at 2 (since -// speed 0 and 1 are slow). Allow speed 7 (for real-time mode) after -// looking into/fix failing issue. } // namespace diff --git a/libvpx/test/external_frame_buffer_test.cc b/libvpx/test/external_frame_buffer_test.cc index 2e7adc1..54c79e9 100644 --- a/libvpx/test/external_frame_buffer_test.cc +++ b/libvpx/test/external_frame_buffer_test.cc @@ -210,7 +210,7 @@ class ExternalFrameBufferMD5Test ASSERT_TRUE(fb_list_.CreateBufferList(num_buffers_)); ASSERT_EQ(VPX_CODEC_OK, decoder->SetFrameBufferFunctions( - GetVp9FrameBuffer, ReleaseVP9FrameBuffer, this)); + GetVP9FrameBuffer, ReleaseVP9FrameBuffer, this)); } } @@ -242,7 +242,7 @@ class ExternalFrameBufferMD5Test // Callback to get a free external frame buffer. Return value < 0 is an // error. - static int GetVp9FrameBuffer(void *user_priv, size_t min_size, + static int GetVP9FrameBuffer(void *user_priv, size_t min_size, vpx_codec_frame_buffer_t *fb) { ExternalFrameBufferMD5Test *const md5Test = reinterpret_cast<ExternalFrameBufferMD5Test*>(user_priv); @@ -462,5 +462,7 @@ TEST_F(ExternalFrameBufferTest, SetAfterDecode) { } VP9_INSTANTIATE_TEST_CASE(ExternalFrameBufferMD5Test, - ::testing::ValuesIn(libvpx_test::kVP9TestVectors)); + ::testing::ValuesIn(libvpx_test::kVP9TestVectors, + libvpx_test::kVP9TestVectors + + libvpx_test::kNumVP9TestVectors)); } // namespace diff --git a/libvpx/test/pp_filter_test.cc b/libvpx/test/pp_filter_test.cc index ff7bb08..86c2b0e 100644 --- a/libvpx/test/pp_filter_test.cc +++ b/libvpx/test/pp_filter_test.cc @@ -25,7 +25,7 @@ typedef void (*post_proc_func_t)(unsigned char *src_ptr, namespace { -class Vp8PostProcessingFilterTest +class VP8PostProcessingFilterTest : public ::testing::TestWithParam<post_proc_func_t> { public: virtual void TearDown() { @@ -36,7 +36,7 @@ class Vp8PostProcessingFilterTest // Test routine for the VP8 post-processing function // vp8_post_proc_down_and_across_mb_row_c. -TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) { +TEST_P(VP8PostProcessingFilterTest, FilterOutputCheck) { // Size of the underlying data block that will be filtered. const int block_width = 16; const int block_height = 16; @@ -91,7 +91,7 @@ TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) { for (int i = 0; i < block_height; ++i) { for (int j = 0; j < block_width; ++j) { EXPECT_EQ(expected_data[i], pixel_ptr[j]) - << "Vp8PostProcessingFilterTest failed with invalid filter output"; + << "VP8PostProcessingFilterTest failed with invalid filter output"; } pixel_ptr += output_stride; } @@ -101,11 +101,11 @@ TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) { vpx_free(flimits); }; -INSTANTIATE_TEST_CASE_P(C, Vp8PostProcessingFilterTest, +INSTANTIATE_TEST_CASE_P(C, VP8PostProcessingFilterTest, ::testing::Values(vp8_post_proc_down_and_across_mb_row_c)); #if HAVE_SSE2 -INSTANTIATE_TEST_CASE_P(SSE2, Vp8PostProcessingFilterTest, +INSTANTIATE_TEST_CASE_P(SSE2, VP8PostProcessingFilterTest, ::testing::Values(vp8_post_proc_down_and_across_mb_row_sse2)); #endif diff --git a/libvpx/test/set_roi.cc b/libvpx/test/set_roi.cc index e28f511..5b054f4 100644 --- a/libvpx/test/set_roi.cc +++ b/libvpx/test/set_roi.cc @@ -26,7 +26,7 @@ using libvpx_test::ACMRandom; namespace { -TEST(Vp8RoiMapTest, ParameterCheck) { +TEST(VP8RoiMapTest, ParameterCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); int delta_q[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 }; int delta_lf[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 }; diff --git a/libvpx/test/svc_test.cc b/libvpx/test/svc_test.cc index dff2ec7..fb9277b 100644 --- a/libvpx/test/svc_test.cc +++ b/libvpx/test/svc_test.cc @@ -31,6 +31,7 @@ class SvcTest : public ::testing::Test { SvcTest() : codec_iface_(0), test_file_name_("hantro_collage_w352h288.yuv"), + stats_file_name_("hantro_collage_w352h288.stat"), codec_initialized_(false), decoder_(0) { memset(&svc_, 0, sizeof(svc_)); @@ -73,6 +74,7 @@ class SvcTest : public ::testing::Test { struct vpx_codec_enc_cfg codec_enc_; vpx_codec_iface_t *codec_iface_; std::string test_file_name_; + std::string stats_file_name_; bool codec_initialized_; Decoder *decoder_; }; @@ -397,4 +399,74 @@ TEST_F(SvcTest, FirstPassEncode) { EXPECT_GT(vpx_svc_get_rc_stats_buffer_size(&svc_), 0U); } +TEST_F(SvcTest, SecondPassEncode) { + svc_.spatial_layers = 2; + codec_enc_.g_pass = VPX_RC_LAST_PASS; + + FILE *const stats_file = libvpx_test::OpenTestDataFile(stats_file_name_); + ASSERT_TRUE(stats_file != NULL) << "Stats file open failed. Filename: " + << stats_file; + + struct vpx_fixed_buf stats_buf; + fseek(stats_file, 0, SEEK_END); + stats_buf.sz = static_cast<size_t>(ftell(stats_file)); + fseek(stats_file, 0, SEEK_SET); + + stats_buf.buf = malloc(stats_buf.sz); + ASSERT_TRUE(stats_buf.buf != NULL); + const size_t bytes_read = fread(stats_buf.buf, 1, stats_buf.sz, stats_file); + ASSERT_EQ(bytes_read, stats_buf.sz); + fclose(stats_file); + codec_enc_.rc_twopass_stats_in = stats_buf; + + vpx_codec_err_t res = + vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + ASSERT_EQ(VPX_CODEC_OK, res); + codec_initialized_ = true; + + libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight, + codec_enc_.g_timebase.den, + codec_enc_.g_timebase.num, 0, 30); + // FRAME 0 + video.Begin(); + // This frame is a keyframe. + res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), + video.duration(), VPX_DL_GOOD_QUALITY); + ASSERT_EQ(VPX_CODEC_OK, res); + EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_)); + + vpx_codec_err_t res_dec = decoder_->DecodeFrame( + static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), + vpx_svc_get_frame_size(&svc_)); + ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + + // FRAME 1 + video.Next(); + // This is a P-frame. + res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), + video.duration(), VPX_DL_GOOD_QUALITY); + ASSERT_EQ(VPX_CODEC_OK, res); + EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_)); + + res_dec = decoder_->DecodeFrame( + static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), + vpx_svc_get_frame_size(&svc_)); + ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + + // FRAME 2 + video.Next(); + // This is a P-frame. + res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), + video.duration(), VPX_DL_GOOD_QUALITY); + ASSERT_EQ(VPX_CODEC_OK, res); + EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_)); + + res_dec = decoder_->DecodeFrame( + static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), + vpx_svc_get_frame_size(&svc_)); + ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError(); + + free(stats_buf.buf); +} + } // namespace diff --git a/libvpx/test/test-data.sha1 b/libvpx/test/test-data.sha1 index 6f718ef..cf2ad1e 100644 --- a/libvpx/test/test-data.sha1 +++ b/libvpx/test/test-data.sha1 @@ -1,4 +1,5 @@ d5dfb0151c9051f8c85999255645d7a23916d3c0 hantro_collage_w352h288.yuv +998cec53307c94aa5835aaf8d5731f6a3c7c2e5a hantro_collage_w352h288.stat b87815bf86020c592ccc7a846ba2e28ec8043902 hantro_odd.yuv b1f1c3ec79114b9a0651af24ce634afb44a9a419 rush_hour_444.y4m 5184c46ddca8b1fadd16742e8500115bc8f749da vp80-00-comprehensive-001.ivf @@ -588,3 +589,49 @@ b3c48382cf7d0454e83a02497c229d27720f9e20 vp90-2-11-size-351x287.webm.md5 92a756469fa438220524e7fa6ac1d38c89514d17 vp90-2-12-droppable_2.ivf.md5 c21e97e4ba486520118d78b01a5cb6e6dc33e190 vp90-2-12-droppable_3.ivf 601abc9e4176c70f82ac0381365e9b151fdd24cd vp90-2-12-droppable_3.ivf.md5 +61c640dad23cd4f7ad811b867e7b7e3521f4e3ba vp90-2-13-largescaling.webm +bca1b02eebdb088fa3f389fe0e7571e75a71f523 vp90-2-13-largescaling.webm.md5 +c740708fa390806eebaf669909c1285ab464f886 vp90-2-14-resize-fp-tiles-1-2.webm +c7b85ffd8e11500f73f52e7dc5a47f57c393d47f vp90-2-14-resize-fp-tiles-1-2.webm.md5 +ec8faa352a08f7033c60f29f80d505e2d7daa103 vp90-2-14-resize-fp-tiles-1-4.webm +6852c783fb421bda5ded3d4c5a3ffc46de03fbc1 vp90-2-14-resize-fp-tiles-1-4.webm.md5 +8af61853ac0d07c4cb5bf7c2016661ba350b3497 vp90-2-14-resize-fp-tiles-1-8.webm +571353bac89fea60b5706073409aa3c0d42aefe9 vp90-2-14-resize-fp-tiles-1-8.webm.md5 +b1c187ed69931496b82ec194017a79831bafceef vp90-2-14-resize-fp-tiles-1-16.webm +1c199a41afe42ce303944d70089eaaa2263b4a09 vp90-2-14-resize-fp-tiles-1-16.webm.md5 +8eaae5a6f2dff934610b0c7a917d7f583ba74aa5 vp90-2-14-resize-fp-tiles-2-1.webm +db18fcf915f7ffaea6c39feab8bda6c1688af011 vp90-2-14-resize-fp-tiles-2-1.webm.md5 +bc3046d138941e2a20e9ceec0ff6d25c25d12af3 vp90-2-14-resize-fp-tiles-4-1.webm +393211b808030d09a79927b17a4374b2f68a60ae vp90-2-14-resize-fp-tiles-4-1.webm.md5 +6e8f8e31721a0f7f68a2964e36e0e698c2e276b1 vp90-2-14-resize-fp-tiles-8-1.webm +491fd3cd78fb0577bfe905bb64bbf64bd7d29140 vp90-2-14-resize-fp-tiles-8-1.webm.md5 +cc5958da2a7edf739cd2cfeb18bd05e77903087e vp90-2-14-resize-fp-tiles-16-1.webm +0b58daf55aaf9063bf5b4fb33393d18b417dc428 vp90-2-14-resize-fp-tiles-16-1.webm.md5 +821eeecc9d8c6a316134dd42d1ff057787d8047b vp90-2-14-resize-fp-tiles-2-4.webm +374c549f2839a3d0b732c4e3650700144037e76c vp90-2-14-resize-fp-tiles-2-4.webm.md5 +dff8c8e49aacea9f4c7f22cb882da984e2a1b405 vp90-2-14-resize-fp-tiles-2-8.webm +e5b8820a7c823b21297d6e889e57ec401882c210 vp90-2-14-resize-fp-tiles-2-8.webm.md5 +77629e4b23e32896aadf6e994c78bd4ffa1c7797 vp90-2-14-resize-fp-tiles-2-16.webm +1937f5df032664ac345d4613ad4417b4967b1230 vp90-2-14-resize-fp-tiles-2-16.webm.md5 +380ba5702bb1ec7947697314ab0300b5c56a1665 vp90-2-14-resize-fp-tiles-4-2.webm +fde7b30d2aa64c1e851a4852f655d79fc542cf66 vp90-2-14-resize-fp-tiles-4-2.webm.md5 +dc784b258ffa2abc2ae693d11792acf0bb9cb74f vp90-2-14-resize-fp-tiles-8-2.webm +edf26f0130aeee8342d49c2c8f0793ad008782d9 vp90-2-14-resize-fp-tiles-8-2.webm.md5 +8e575789fd63ebf69e8eff1b9a4351a249a73bee vp90-2-14-resize-fp-tiles-16-2.webm +b6415318c1c589a1f64b9d569ce3cabbec2e0d52 vp90-2-14-resize-fp-tiles-16-2.webm.md5 +e3adc944a11c4c5517e63664c84ebb0847b64d81 vp90-2-14-resize-fp-tiles-4-8.webm +03cba0532bc90a05b1990db830bf5701e24e7982 vp90-2-14-resize-fp-tiles-4-8.webm.md5 +3b27a991eb6d78dce38efab35b7db682e8cbbee3 vp90-2-14-resize-fp-tiles-4-16.webm +5d16b7f82bf59f802724ddfd97abb487150b1c9d vp90-2-14-resize-fp-tiles-4-16.webm.md5 +d5fed8c28c1d4c7e232ebbd25cf758757313ed96 vp90-2-14-resize-fp-tiles-8-4.webm +5a8ff8a52cbbde7bfab569beb6d971c5f8b904f7 vp90-2-14-resize-fp-tiles-8-4.webm.md5 +17a5faa023d77ee9dad423a4e0d3145796bbc500 vp90-2-14-resize-fp-tiles-16-4.webm +2ef8daa3c3e750fd745130d0a76a39fe86f0448f vp90-2-14-resize-fp-tiles-16-4.webm.md5 +9361e031f5cc990d8740863e310abb5167ae351e vp90-2-14-resize-fp-tiles-8-16.webm +57f13a2197486584f4e1a4f82ad969f3abc5a1a2 vp90-2-14-resize-fp-tiles-8-16.webm.md5 +5803fc6fcbfb47b7661f3fcc6499158a32b56675 vp90-2-14-resize-fp-tiles-16-8.webm +be0fe64a1a4933696ff92d93f9bdecdbd886dc13 vp90-2-14-resize-fp-tiles-16-8.webm.md5 +0ac0f6d20a0afed77f742a3b9acb59fd7b9cb093 vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm +1765315acccfe6cd12230e731369fcb15325ebfa vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm.md5 +4a2b7a683576fe8e330c7d1c4f098ff4e70a43a8 vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm +1ef480392112b3509cb190afbb96f9a38dd9fbac vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm.md5 diff --git a/libvpx/test/test.mk b/libvpx/test/test.mk index 175bc52..92664e2 100644 --- a/libvpx/test/test.mk +++ b/libvpx/test/test.mk @@ -122,6 +122,7 @@ endif # CONFIG_SHARED ## TEST DATA ## LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.yuv +LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.stat LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m @@ -693,8 +694,54 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_2.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_2.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_3.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_3.ivf.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-13-largescaling.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-13-largescaling.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-8.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-8.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-16.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-16.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-1.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-1.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-1.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-1.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-8.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-8.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-16.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-16.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-2.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-2.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-8.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-8.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-16.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-16.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-4.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-4.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-16.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-16.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm.md5 +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm +LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm.md5 ifeq ($(CONFIG_DECODE_PERF_TESTS),yes) # BBB VP9 streams diff --git a/libvpx/test/test_vector_test.cc b/libvpx/test/test_vector_test.cc index 53b7636..9ba18da 100644 --- a/libvpx/test/test_vector_test.cc +++ b/libvpx/test/test_vector_test.cc @@ -89,8 +89,12 @@ TEST_P(TestVectorTest, MD5Match) { } VP8_INSTANTIATE_TEST_CASE(TestVectorTest, - ::testing::ValuesIn(libvpx_test::kVP8TestVectors)); + ::testing::ValuesIn(libvpx_test::kVP8TestVectors, + libvpx_test::kVP8TestVectors + + libvpx_test::kNumVP8TestVectors)); VP9_INSTANTIATE_TEST_CASE(TestVectorTest, - ::testing::ValuesIn(libvpx_test::kVP9TestVectors)); + ::testing::ValuesIn(libvpx_test::kVP9TestVectors, + libvpx_test::kVP9TestVectors + + libvpx_test::kNumVP9TestVectors)); } // namespace diff --git a/libvpx/test/test_vectors.cc b/libvpx/test/test_vectors.cc index aba8a3c..ff3c389 100644 --- a/libvpx/test/test_vectors.cc +++ b/libvpx/test/test_vectors.cc @@ -12,8 +12,10 @@ namespace libvpx_test { +#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0])) + #if CONFIG_VP8_DECODER -const char *kVP8TestVectors[kNumVp8TestVectors] = { +const char *const kVP8TestVectors[] = { "vp80-00-comprehensive-001.ivf", "vp80-00-comprehensive-002.ivf", "vp80-00-comprehensive-003.ivf", "vp80-00-comprehensive-004.ivf", "vp80-00-comprehensive-005.ivf", @@ -47,9 +49,10 @@ const char *kVP8TestVectors[kNumVp8TestVectors] = { "vp80-05-sharpness-1440.ivf", "vp80-05-sharpness-1443.ivf", "vp80-06-smallsize.ivf" }; +const int kNumVP8TestVectors = NELEMENTS(kVP8TestVectors); #endif // CONFIG_VP8_DECODER #if CONFIG_VP9_DECODER -const char *kVP9TestVectors[kNumVp9TestVectors] = { +const char *const kVP9TestVectors[] = { "vp90-2-00-quantizer-00.webm", "vp90-2-00-quantizer-01.webm", "vp90-2-00-quantizer-02.webm", "vp90-2-00-quantizer-03.webm", "vp90-2-00-quantizer-04.webm", "vp90-2-00-quantizer-05.webm", @@ -161,8 +164,22 @@ const char *kVP9TestVectors[kNumVp9TestVectors] = { "vp90-2-11-size-351x287.webm", "vp90-2-11-size-351x288.webm", "vp90-2-11-size-352x287.webm", "vp90-2-12-droppable_1.ivf", "vp90-2-12-droppable_2.ivf", "vp90-2-12-droppable_3.ivf", - "vp91-2-04-yv444.webm" + "vp90-2-13-largescaling.webm", "vp91-2-04-yv444.webm", + "vp90-2-14-resize-fp-tiles-1-16.webm", + "vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm", + "vp90-2-14-resize-fp-tiles-1-2.webm", "vp90-2-14-resize-fp-tiles-1-4.webm", + "vp90-2-14-resize-fp-tiles-16-1.webm", "vp90-2-14-resize-fp-tiles-16-2.webm", + "vp90-2-14-resize-fp-tiles-16-4.webm", + "vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm", + "vp90-2-14-resize-fp-tiles-16-8.webm", "vp90-2-14-resize-fp-tiles-1-8.webm", + "vp90-2-14-resize-fp-tiles-2-16.webm", "vp90-2-14-resize-fp-tiles-2-1.webm", + "vp90-2-14-resize-fp-tiles-2-4.webm", "vp90-2-14-resize-fp-tiles-2-8.webm", + "vp90-2-14-resize-fp-tiles-4-16.webm", "vp90-2-14-resize-fp-tiles-4-1.webm", + "vp90-2-14-resize-fp-tiles-4-2.webm", "vp90-2-14-resize-fp-tiles-4-8.webm", + "vp90-2-14-resize-fp-tiles-8-16.webm", "vp90-2-14-resize-fp-tiles-8-1.webm", + "vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm" }; +const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors); #endif // CONFIG_VP9_DECODER } // namespace libvpx_test diff --git a/libvpx/test/test_vectors.h b/libvpx/test/test_vectors.h index d5ecc96..8e1aabb 100644 --- a/libvpx/test/test_vectors.h +++ b/libvpx/test/test_vectors.h @@ -16,14 +16,13 @@ namespace libvpx_test { #if CONFIG_VP8_DECODER -const int kNumVp8TestVectors = 62; -extern const char *kVP8TestVectors[kNumVp8TestVectors]; +extern const int kNumVP8TestVectors; +extern const char *const kVP8TestVectors[]; #endif #if CONFIG_VP9_DECODER -const int kNumVp9TestVectors = 223; - -extern const char *kVP9TestVectors[kNumVp9TestVectors]; +extern const int kNumVP9TestVectors; +extern const char *const kVP9TestVectors[]; #endif // CONFIG_VP9_DECODER } // namespace libvpx_test diff --git a/libvpx/test/tools_common.sh b/libvpx/test/tools_common.sh new file mode 100755 index 0000000..cd79771 --- /dev/null +++ b/libvpx/test/tools_common.sh @@ -0,0 +1,437 @@ +#!/bin/sh +## +## Copyright (c) 2014 The WebM project authors. All Rights Reserved. +## +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. +## +## This file contains shell code shared by test scripts for libvpx tools. +set -e + +# Sets $VPX_TOOL_TEST to the name specified by positional parameter one. +test_begin() { + VPX_TOOL_TEST="${1}" +} + +# Clears the VPX_TOOL_TEST variable after confirming that $VPX_TOOL_TEST matches +# positional parameter one. +test_end() { + if [ "$1" != "${VPX_TOOL_TEST}" ]; then + echo "FAIL completed test mismatch!." + echo " completed test: ${1}" + echo " active test: ${VPX_TOOL_TEST}." + return 1 + fi + VPX_TOOL_TEST='<unset>' +} + +# Echoes the target configuration being tested. +test_configuration_target() { + vpx_config_mk="${LIBVPX_CONFIG_PATH}/config.mk" + # Find the TOOLCHAIN line, split it using ':=' as the field separator, and + # print the last field to get the value. Then pipe the value to tr to consume + # any leading/trailing spaces while allowing tr to echo the output to stdout. + awk -F ':=' '/TOOLCHAIN/ { print $NF }' "${vpx_config_mk}" | tr -d ' ' +} + +# Trap function used for failure reports and tool output directory removal. +# When the contents of $VPX_TOOL_TEST do not match the string '<unset>', reports +# failure of test stored in $VPX_TOOL_TEST. +cleanup() { + if [ -n "${VPX_TOOL_TEST}" ] && [ "${VPX_TOOL_TEST}" != '<unset>' ]; then + echo "FAIL: $VPX_TOOL_TEST" + fi + if [ -n "${VPX_TEST_OUTPUT_DIR}" ] && [ -d "${VPX_TEST_OUTPUT_DIR}" ]; then + rm -rf "${VPX_TEST_OUTPUT_DIR}" + fi +} + +# Echoes the git hash portion of the VERSION_STRING variable defined in +# $LIBVPX_CONFIG_PATH/config.mk to stdout, or the version number string when +# no git hash is contained in VERSION_STRING. +config_hash() { + vpx_config_mk="${LIBVPX_CONFIG_PATH}/config.mk" + # Find VERSION_STRING line, split it with "-g" and print the last field to + # output the git hash to stdout. + vpx_version=$(awk -F -g '/VERSION_STRING/ {print $NF}' "${vpx_config_mk}") + # Handle two situations here: + # 1. The default case: $vpx_version is a git hash, so echo it unchanged. + # 2. When being run a non-dev tree, the -g portion is not present in the + # version string: It's only the version number. + # In this case $vpx_version is something like 'VERSION_STRING=v1.3.0', so + # we echo only what is after the '='. + echo "${vpx_version##*=}" +} + +# Echoes the short form of the current git hash. +current_hash() { + if git --version > /dev/null 2>&1; then + (cd "$(dirname "${0}")" + git rev-parse --short HEAD) + else + # Return the config hash if git is unavailable: Fail silently, git hashes + # are used only for warnings. + config_hash + fi +} + +# Echoes warnings to stdout when git hash in vpx_config.h does not match the +# current git hash. +check_git_hashes() { + hash_at_configure_time=$(config_hash) + hash_now=$(current_hash) + + if [ "${hash_at_configure_time}" != "${hash_now}" ]; then + echo "Warning: git hash has changed since last configure." + fi +} + +# This script requires that the LIBVPX_BIN_PATH, LIBVPX_CONFIG_PATH, and +# LIBVPX_TEST_DATA_PATH variables are in the environment: Confirm that +# the variables are set and that they all evaluate to directory paths. +verify_vpx_test_environment() { + if [ ! -d "${LIBVPX_BIN_PATH}" ]; then + echo "The LIBVPX_BIN_PATH environment variable must be set." + return 1 + fi + if [ ! -d "${LIBVPX_CONFIG_PATH}" ]; then + echo "The LIBVPX_CONFIG_PATH environment variable must be set." + return 1 + fi + if [ ! -d "${LIBVPX_TEST_DATA_PATH}" ]; then + echo "The LIBVPX_TEST_DATA_PATH environment variable must be set." + return 1 + fi +} + +# Greps vpx_config.h in LIBVPX_CONFIG_PATH for positional parameter one, which +# should be a LIBVPX preprocessor flag. Echoes yes to stdout when the feature +# is available. +vpx_config_option_enabled() { + vpx_config_option="${1}" + vpx_config_file="${LIBVPX_CONFIG_PATH}/vpx_config.h" + config_line=$(grep "${vpx_config_option}" "${vpx_config_file}") + if echo "${config_line}" | egrep -q '1$'; then + echo yes + fi +} + +# Echoes yes when output of test_configuration_target() contains win32 or win64. +is_windows_target() { + if test_configuration_target \ + | grep -q -e win32 -e win64 > /dev/null 2>&1; then + echo yes + fi +} + +# Echoes yes to stdout when the file named by positional parameter one exists +# in LIBVPX_BIN_PATH, and is executable. +vpx_tool_available() { + tool_name="${1}" + if [ "$(is_windows_target)" = "yes" ]; then + tool_name="${tool_name}.exe" + fi + [ -x "${LIBVPX_BIN_PATH}/${1}" ] && echo yes +} + +# Echoes yes to stdout when vpx_config_option_enabled() reports yes for +# CONFIG_VP8_DECODER. +vp8_decode_available() { + [ "$(vpx_config_option_enabled CONFIG_VP8_DECODER)" = "yes" ] && echo yes +} + +# Echoes yes to stdout when vpx_config_option_enabled() reports yes for +# CONFIG_VP8_ENCODER. +vp8_encode_available() { + [ "$(vpx_config_option_enabled CONFIG_VP8_ENCODER)" = "yes" ] && echo yes +} + +# Echoes yes to stdout when vpx_config_option_enabled() reports yes for +# CONFIG_VP9_DECODER. +vp9_decode_available() { + [ "$(vpx_config_option_enabled CONFIG_VP9_DECODER)" = "yes" ] && echo yes +} + +# Echoes yes to stdout when vpx_config_option_enabled() reports yes for +# CONFIG_VP9_ENCODER. +vp9_encode_available() { + [ "$(vpx_config_option_enabled CONFIG_VP9_ENCODER)" = "yes" ] && echo yes +} + +# Echoes yes to stdout when vpx_config_option_enabled() reports yes for +# CONFIG_WEBM_IO. +webm_io_available() { + [ "$(vpx_config_option_enabled CONFIG_WEBM_IO)" = "yes" ] && echo yes +} + +# Echoes yes to stdout when vpxdec exists according to vpx_tool_available(). +vpxdec_available() { + [ -n $(vpx_tool_available vpxdec) ] && echo yes +} + +# Wrapper function for running vpxdec in noblit mode. Requires that +# LIBVPX_BIN_PATH points to the directory containing vpxdec. Positional +# parameter one is used as the input file path. Positional parameter two, when +# present, is interpreted as a boolean flag that means the input should be sent +# to vpxdec via pipe from cat instead of directly. +vpxdec() { + input="${1}" + pipe_input=${2} + + if [ $# -gt 2 ]; then + # shift away $1 and $2 so the remaining arguments can be passed to vpxdec + # via $@. + shift 2 + fi + + decoder="${LIBVPX_BIN_PATH}/vpxdec" + + if [ "$(is_windows_target)" = "yes" ]; then + decoder="${decoder}.exe" + fi + + if [ -z "${pipe_input}" ]; then + "${decoder}" "$input" --summary --noblit "$@" > /dev/null 2>&1 + else + cat "${input}" | "${decoder}" - --summary --noblit "$@" > /dev/null 2>&1 + fi +} + +# Echoes yes to stdout when vpxenc exists according to vpx_tool_available(). +vpxenc_available() { + [ -n $(vpx_tool_available vpxenc) ] && echo yes +} + +# Wrapper function for running vpxenc. Positional parameters are interpreted as +# follows: +# 1 - codec name +# 2 - input width +# 3 - input height +# 4 - number of frames to encode +# 5 - path to input file +# 6 - path to output file +# Note: The output file path must end in .ivf to output an IVF file. +# 7 - extra flags +# Note: Extra flags currently supports a special case: when set to "-" +# input is piped to vpxenc via cat. +vpxenc() { + encoder="${LIBVPX_BIN_PATH}/vpxenc" + codec="${1}" + width=${2} + height=${3} + frames=${4} + input=${5} + output="${VPX_TEST_OUTPUT_DIR}/${6}" + extra_flags=${7} + + if [ "$(is_windows_target)" = "yes" ]; then + encoder="${encoder}.exe" + fi + + # Because --ivf must be within the command line to get IVF from vpxenc. + if echo "${output}" | egrep -q 'ivf$'; then + use_ivf=--ivf + else + unset use_ivf + fi + + if [ "${extra_flags}" = "-" ]; then + pipe_input=yes + extra_flags=${8} + else + unset pipe_input + fi + + if [ -z "${pipe_input}" ]; then + "${encoder}" --codec=${codec} --width=${width} --height=${height} \ + --limit=${frames} ${use_ivf} ${extra_flags} --output="${output}" \ + "${input}" > /dev/null 2>&1 + else + cat "${input}" \ + | "${encoder}" --codec=${codec} --width=${width} --height=${height} \ + --limit=${frames} ${use_ivf} ${extra_flags} --output="${output}" - \ + > /dev/null 2>&1 + fi + + if [ ! -e "${output}" ]; then + # Return non-zero exit status: output file doesn't exist, so something + # definitely went wrong. + return 1 + fi +} + +# Filters strings from positional parameter one using the filter specified by +# positional parameter two. Filter behavior depends on the presence of a third +# positional parameter. When parameter three is present, strings that match the +# filter are excluded. When omitted, strings matching the filter are included. +# The filtered string is echoed to stdout. +filter_strings() { + strings=${1} + filter=${2} + exclude=${3} + + if [ -n "${exclude}" ]; then + # When positional parameter three exists the caller wants to remove strings. + # Tell grep to invert matches using the -v argument. + exclude='-v' + else + unset exclude + fi + + if [ -n "${filter}" ]; then + for s in ${strings}; do + if echo "${s}" | egrep -q ${exclude} "${filter}" > /dev/null 2>&1; then + filtered_strings="${filtered_strings} ${s}" + fi + done + else + filtered_strings="${strings}" + fi + echo "${filtered_strings}" +} + +# Runs user test functions passed via positional parameters one and two. +# Functions in positional parameter one are treated as environment verification +# functions and are run unconditionally. Functions in positional parameter two +# are run according to the rules specified in vpx_test_usage(). +run_tests() { + env_tests="verify_vpx_test_environment ${1}" + tests_to_filter="${2}" + + if [ "${VPX_TEST_RUN_DISABLED_TESTS}" != "yes" ]; then + # Filter out DISABLED tests. + tests_to_filter=$(filter_strings "${tests_to_filter}" ^DISABLED exclude) + fi + + if [ -n "${VPX_TEST_FILTER}" ]; then + # Remove tests not matching the user's filter. + tests_to_filter=$(filter_strings "${tests_to_filter}" ${VPX_TEST_FILTER}) + fi + + tests_to_run="${env_tests} ${tests_to_filter}" + + check_git_hashes + + # Run tests. + for test in ${tests_to_run}; do + test_begin "${test}" + "${test}" + [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ] && echo " PASS ${test}" + test_end "${test}" + done + + tested_config="$(test_configuration_target) @ $(current_hash)" + echo $(basename "${0%.*}"): Done, all tests pass for ${tested_config}. +} + +vpx_test_usage() { +cat << EOF + Usage: ${0##*/} [arguments] + --bin-path <path to libvpx binaries directory> + --config-path <path to libvpx config directory> + --filter <filter>: User test filter. Only tests matching filter are run. + --run-disabled-tests: Run disabled tests. + --help: Display this message and exit. + --test-data-path <path to libvpx test data directory> + --verbose: Verbose output. + + When the --bin-path option is not specified the script attempts to use + \$LIBVPX_BIN_PATH and then the current directory. + + When the --config-path option is not specified the script attempts to use + \$LIBVPX_CONFIG_PATH and then the current directory. + + When the -test-data-path option is not specified the script attempts to use + \$LIBVPX_TEST_DATA_PATH and then the current directory. +EOF +} + +# Returns non-zero (failure) when required environment variables are empty +# strings. +vpx_test_check_environment() { + if [ -z "${LIBVPX_BIN_PATH}" ] || \ + [ -z "${LIBVPX_CONFIG_PATH}" ] || \ + [ -z "${LIBVPX_TEST_DATA_PATH}" ]; then + return 1 + fi +} + +# Parse the command line. +while [ -n "$1" ]; do + case "$1" in + --bin-path) + LIBVPX_BIN_PATH="$2" + shift + ;; + --config-path) + LIBVPX_CONFIG_PATH="$2" + shift + ;; + --filter) + VPX_TEST_FILTER="$2" + shift + ;; + --run-disabled-tests) + VPX_TEST_RUN_DISABLED_TESTS=yes + ;; + --help) + vpx_test_usage + exit + ;; + --test-data-path) + LIBVPX_TEST_DATA_PATH="$2" + shift + ;; + --verbose) + VPX_TEST_VERBOSE_OUTPUT=yes + ;; + *) + vpx_test_usage + exit 1 + ;; + esac + shift +done + +# Handle running the tests from a build directory without arguments when running +# the tests on *nix/macosx. +LIBVPX_BIN_PATH="${LIBVPX_BIN_PATH:-.}" +LIBVPX_CONFIG_PATH="${LIBVPX_CONFIG_PATH:-.}" +LIBVPX_TEST_DATA_PATH="${LIBVPX_TEST_DATA_PATH:-.}" + +# Create a temporary directory for output files, and a trap to clean it up. +if [ -n "${TMPDIR}" ]; then + VPX_TEST_TEMP_ROOT="${TMPDIR}" +elif [ -n "${TEMPDIR}" ]; then + VPX_TEST_TEMP_ROOT="${TEMPDIR}" +else + VPX_TEST_TEMP_ROOT=/tmp +fi + +VPX_TEST_RAND=$(awk 'BEGIN { srand(); printf "%d\n",(rand() * 32768)}') +VPX_TEST_OUTPUT_DIR="${VPX_TEST_TEMP_ROOT}/vpx_test_${VPX_TEST_RAND}" + +if ! mkdir -p "${VPX_TEST_OUTPUT_DIR}" || \ + [ ! -d "${VPX_TEST_OUTPUT_DIR}" ]; then + echo "${0##*/}: Cannot create output directory, giving up." + echo "${0##*/}: VPX_TEST_OUTPUT_DIR=${VPX_TEST_OUTPUT_DIR}" + exit 1 +fi + +trap cleanup EXIT + +if [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ]; then +cat << EOF +$(basename "${0%.*}") test configuration: + LIBVPX_BIN_PATH=${LIBVPX_BIN_PATH} + LIBVPX_CONFIG_PATH=${LIBVPX_CONFIG_PATH} + LIBVPX_TEST_DATA_PATH=${LIBVPX_TEST_DATA_PATH} + VPX_TEST_OUTPUT_DIR=${VPX_TEST_OUTPUT_DIR} + VPX_TEST_VERBOSE_OUTPUT=${VPX_TEST_VERBOSE_OUTPUT} + VPX_TEST_FILTER=${VPX_TEST_FILTER} + VPX_TEST_RUN_DISABLED_TESTS=${VPX_TEST_RUN_DISABLED_TESTS} +EOF +fi diff --git a/libvpx/test/vp8_fdct4x4_test.cc b/libvpx/test/vp8_fdct4x4_test.cc index e3c292e..bdbf74e 100644 --- a/libvpx/test/vp8_fdct4x4_test.cc +++ b/libvpx/test/vp8_fdct4x4_test.cc @@ -68,7 +68,7 @@ void reference_idct4x4(const int16_t *input, int16_t *output) { using libvpx_test::ACMRandom; -TEST(Vp8FdctTest, SignBiasCheck) { +TEST(VP8FdctTest, SignBiasCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); int16_t test_input_block[16]; int16_t test_output_block[16]; @@ -127,7 +127,7 @@ TEST(Vp8FdctTest, SignBiasCheck) { << "Error: 4x4 FDCT has a sign bias > 10% for input range [-15, 15]"; }; -TEST(Vp8FdctTest, RoundTripErrorCheck) { +TEST(VP8FdctTest, RoundTripErrorCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); int max_error = 0; double total_error = 0; diff --git a/libvpx/test/vp9_thread_test.cc b/libvpx/test/vp9_thread_test.cc index a78cdea..5523f20 100644 --- a/libvpx/test/vp9_thread_test.cc +++ b/libvpx/test/vp9_thread_test.cc @@ -153,6 +153,66 @@ TEST(VP9DecodeMTTest, MTDecode2) { } } +// Test tile quantity changes within one file. +TEST(VP9DecodeMTTest, MTDecode3) { + static const struct { + const char *name; + const char *expected_md5; + } files[] = { + { "vp90-2-14-resize-fp-tiles-1-16.webm", + "0cd5e632c326297e975f38949c31ea94" }, + { "vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm", + "5c78a96a42e7f4a4f6b2edcdb791e44c" }, + { "vp90-2-14-resize-fp-tiles-1-2.webm", + "e030450ae85c3277be2a418769df98e2" }, + { "vp90-2-14-resize-fp-tiles-1-4.webm", + "312eed4e2b64eb7a4e7f18916606a430" }, + { "vp90-2-14-resize-fp-tiles-16-1.webm", + "1755c16d8af16a9cb3fe7338d90abe52" }, + { "vp90-2-14-resize-fp-tiles-16-2.webm", + "500300592d3fcb6f12fab25e48aaf4df" }, + { "vp90-2-14-resize-fp-tiles-16-4.webm", + "47c48379fa6331215d91c67648e1af6e" }, + { "vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm", + "eecf17290739bc708506fa4827665989" }, + { "vp90-2-14-resize-fp-tiles-16-8.webm", + "29b6bb54e4c26b5ca85d5de5fed94e76" }, + { "vp90-2-14-resize-fp-tiles-1-8.webm", + "1b6f175e08cd82cf84bb800ac6d1caa3" }, + { "vp90-2-14-resize-fp-tiles-2-16.webm", + "ca3b03e4197995d8d5444ede7a6c0804" }, + { "vp90-2-14-resize-fp-tiles-2-1.webm", + "99aec065369d70bbb78ccdff65afed3f" }, + { "vp90-2-14-resize-fp-tiles-2-4.webm", + "22d0ebdb49b87d2920a85aea32e1afd5" }, + { "vp90-2-14-resize-fp-tiles-2-8.webm", + "c2115cf051c62e0f7db1d4a783831541" }, + { "vp90-2-14-resize-fp-tiles-4-16.webm", + "c690d7e1719b31367564cac0af0939cb" }, + { "vp90-2-14-resize-fp-tiles-4-1.webm", + "a926020b2cc3e15ad4cc271853a0ff26" }, + { "vp90-2-14-resize-fp-tiles-4-2.webm", + "42699063d9e581f1993d0cf890c2be78" }, + { "vp90-2-14-resize-fp-tiles-4-8.webm", + "7f76d96036382f45121e3d5aa6f8ec52" }, + { "vp90-2-14-resize-fp-tiles-8-16.webm", + "76a43fcdd7e658542913ea43216ec55d" }, + { "vp90-2-14-resize-fp-tiles-8-1.webm", + "8e3fbe89486ca60a59299dea9da91378" }, + { "vp90-2-14-resize-fp-tiles-8-2.webm", + "ae96f21f21b6370cc0125621b441fc52" }, + { "vp90-2-14-resize-fp-tiles-8-4.webm", + "3eb4f24f10640d42218f7fd7b9fd30d4" }, + }; + + for (int i = 0; i < static_cast<int>(sizeof(files) / sizeof(files[0])); ++i) { + for (int t = 2; t <= 8; ++t) { + EXPECT_STREQ(files[i].expected_md5, DecodeFile(files[i].name, t).c_str()) + << "threads = " << t; + } + } +} + INSTANTIATE_TEST_CASE_P(Synchronous, VP9WorkerThreadTest, ::testing::Bool()); } // namespace diff --git a/libvpx/test/vpxdec.sh b/libvpx/test/vpxdec.sh new file mode 100755 index 0000000..d236f97 --- /dev/null +++ b/libvpx/test/vpxdec.sh @@ -0,0 +1,65 @@ +#!/bin/sh +## +## Copyright (c) 2014 The WebM project authors. All Rights Reserved. +## +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. +## +## This file tests vpxdec. To add new tests to this file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to vpxdec_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +VP8_IVF_FILE="${LIBVPX_TEST_DATA_PATH}/vp80-00-comprehensive-001.ivf" +VP9_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-00-quantizer-00.webm" + +# Environment check: Make sure input is available. +vpxdec_verify_environment() { + if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_WEBM_FILE}" ]; then + echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." + return 1 + fi +} + +vpxdec_can_decode_vp8() { + if [ "$(vpxdec_available)" = "yes" ] && \ + [ "$(vp8_decode_available)" = "yes" ]; then + echo yes + fi +} + +vpxdec_can_decode_vp9() { + if [ "$(vpxdec_available)" = "yes" ] && \ + [ "$(vp9_decode_available)" = "yes" ]; then + echo yes + fi +} + +vpxdec_vp8_ivf() { + if [ "$(vpxdec_can_decode_vp8)" = "yes" ]; then + vpxdec "${VP8_IVF_FILE}" + fi +} + +vpxdec_vp8_ivf_pipe_input() { + if [ "$(vpxdec_can_decode_vp8)" = "yes" ]; then + vpxdec "${VP8_IVF_FILE}" - + fi +} + +vpxdec_vp9_webm() { + if [ "$(vpxdec_can_decode_vp9)" = "yes" ] && \ + [ "$(webm_io_available)" = "yes" ]; then + vpxdec "${VP9_WEBM_FILE}" + fi +} + +vpxdec_tests="vpxdec_vp8_ivf + vpxdec_vp8_ivf_pipe_input + vpxdec_vp9_webm" + +run_tests vpxdec_verify_environment "${vpxdec_tests}" diff --git a/libvpx/test/vpxenc.sh b/libvpx/test/vpxenc.sh new file mode 100755 index 0000000..89e4eb3 --- /dev/null +++ b/libvpx/test/vpxenc.sh @@ -0,0 +1,96 @@ +#!/bin/sh +## +## Copyright (c) 2014 The WebM project authors. All Rights Reserved. +## +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. +## +## This file tests vpxenc using hantro_collage_w352h288.yuv as input. To add +## new tests to this file, do the following: +## 1. Write a shell function (this is your test). +## 2. Add the function to vpxenc_tests (on a new line). +## +. $(dirname $0)/tools_common.sh + +YUV_RAW_INPUT="${LIBVPX_TEST_DATA_PATH}/hantro_collage_w352h288.yuv" +YUV_RAW_INPUT_WIDTH=352 +YUV_RAW_INPUT_HEIGHT=288 +TEST_FRAMES=10 + +# Environment check: Make sure input is available. +vpxenc_verify_environment() { + if [ ! -e "${YUV_RAW_INPUT}" ]; then + echo "The file ${YUV_RAW_INPUT##*/} must exist in LIBVPX_TEST_DATA_PATH." + return 1 + fi +} + +vpxenc_can_encode_vp8() { + if [ "$(vpxenc_available)" = "yes" ] && \ + [ "$(vp8_encode_available)" = "yes" ]; then + echo yes + fi +} + +vpxenc_can_encode_vp9() { + if [ "$(vpxenc_available)" = "yes" ] && \ + [ "$(vp9_encode_available)" = "yes" ]; then + echo yes + fi +} + +vpxenc_vp8_ivf() { + if [ "$(vpxenc_can_encode_vp8)" = "yes" ]; then + vpxenc vp8 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \ + "${YUV_RAW_INPUT}" vp8.ivf + fi +} + +vpxenc_vp8_ivf_pipe_input() { + if [ "$(vpxenc_can_encode_vp8)" = "yes" ]; then + vpxenc vp8 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \ + "${YUV_RAW_INPUT}" vp8.ivf - + fi +} + +vpxenc_vp8_webm() { + if [ "$(vpxenc_can_encode_vp8)" = "yes" ] && + [ "$(webm_io_available)" = "yes" ] ; then + vpxenc vp8 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \ + "${YUV_RAW_INPUT}" vp8.webm + fi +} + +vpxenc_vp9_ivf() { + if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then + vpxenc vp9 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \ + "${YUV_RAW_INPUT}" vp9.ivf + fi +} + +vpxenc_vp9_webm() { + if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && + [ "$(webm_io_available)" = "yes" ] ; then + vpxenc vp9 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \ + "${YUV_RAW_INPUT}" vp9.webm + fi +} + +DISABLED_vpxenc_vp9_ivf_lossless() { + if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then + vpxenc vp9 ${YUV_RAW_INPUT_WIDTH} ${YUV_RAW_INPUT_HEIGHT} ${TEST_FRAMES} \ + "${YUV_RAW_INPUT}" vp9_lossless.ivf --lossless + fi +} + +vpxenc_tests="vpxenc_vp8_ivf + vpxenc_vp8_webm + vpxenc_vp8_ivf_pipe_input + vpxenc_vp9_ivf + vpxenc_vp9_webm + DISABLED_vpxenc_vp9_ivf_lossless" + +run_tests vpxenc_verify_environment "${vpxenc_tests}" diff --git a/libvpx/third_party/libwebm/README.webm b/libvpx/third_party/libwebm/README.webm index b13c8cb..2c7570d 100644 --- a/libvpx/third_party/libwebm/README.webm +++ b/libvpx/third_party/libwebm/README.webm @@ -1,5 +1,5 @@ URL: https://chromium.googlesource.com/webm/libwebm -Version: 630a0e3c338e1b32bddf513a2dad807908d2976a +Version: a7118d8ec564e9db841da1eb01f547f3229f240a License: BSD License File: LICENSE.txt diff --git a/libvpx/third_party/libwebm/mkvmuxerutil.cpp b/libvpx/third_party/libwebm/mkvmuxerutil.cpp index 96350e9..18060e9 100644 --- a/libvpx/third_party/libwebm/mkvmuxerutil.cpp +++ b/libvpx/third_party/libwebm/mkvmuxerutil.cpp @@ -292,11 +292,11 @@ bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value) { if (WriteID(writer, type)) return false; - const int32 length = strlen(value); + const uint64 length = strlen(value); if (WriteUInt(writer, length)) return false; - if (writer->Write(value, length)) + if (writer->Write(value, static_cast<const uint32>(length))) return false; return true; diff --git a/libvpx/third_party/libwebm/mkvreader.cpp b/libvpx/third_party/libwebm/mkvreader.cpp index cb3567f..b4b2459 100644 --- a/libvpx/third_party/libwebm/mkvreader.cpp +++ b/libvpx/third_party/libwebm/mkvreader.cpp @@ -14,13 +14,20 @@ namespace mkvparser { MkvReader::MkvReader() : - m_file(NULL) -{ + m_file(NULL), + reader_owns_file_(true) { } -MkvReader::~MkvReader() -{ +MkvReader::MkvReader(FILE* fp) : + m_file(fp), + reader_owns_file_(false) { + GetFileSize(); +} + +MkvReader::~MkvReader() { + if (reader_owns_file_) Close(); + m_file = NULL; } int MkvReader::Open(const char* fileName) @@ -42,12 +49,17 @@ int MkvReader::Open(const char* fileName) if (m_file == NULL) return -1; #endif + return !GetFileSize(); +} +bool MkvReader::GetFileSize() { + if (m_file == NULL) + return false; #ifdef _MSC_VER int status = _fseeki64(m_file, 0L, SEEK_END); if (status) - return -1; //error + return false; //error m_length = _ftelli64(m_file); #else @@ -56,16 +68,19 @@ int MkvReader::Open(const char* fileName) #endif assert(m_length >= 0); + if (m_length < 0) + return false; + #ifdef _MSC_VER status = _fseeki64(m_file, 0L, SEEK_SET); if (status) - return -1; //error + return false; //error #else fseek(m_file, 0L, SEEK_SET); #endif - return 0; + return true; } void MkvReader::Close() diff --git a/libvpx/third_party/libwebm/mkvreader.hpp b/libvpx/third_party/libwebm/mkvreader.hpp index adcc29f..8ebdd99 100644 --- a/libvpx/third_party/libwebm/mkvreader.hpp +++ b/libvpx/third_party/libwebm/mkvreader.hpp @@ -21,6 +21,7 @@ class MkvReader : public IMkvReader MkvReader& operator=(const MkvReader&); public: MkvReader(); + MkvReader(FILE* fp); virtual ~MkvReader(); int Open(const char*); @@ -29,8 +30,15 @@ public: virtual int Read(long long position, long length, unsigned char* buffer); virtual int Length(long long* total, long long* available); private: + + // Determines the size of the file. This is called either by the constructor + // or by the Open function depending on file ownership. Returns true on + // success. + bool GetFileSize(); + long long m_length; FILE* m_file; + bool reader_owns_file_; }; } //end namespace mkvparser diff --git a/libvpx/tools_common.h b/libvpx/tools_common.h index 58894de..549e895 100644 --- a/libvpx/tools_common.h +++ b/libvpx/tools_common.h @@ -22,10 +22,12 @@ #endif #if defined(_MSC_VER) -/* MSVS doesn't define off_t, and uses _f{seek,tell}i64. */ -typedef __int64 off_t; +/* MSVS uses _f{seek,tell}i64. */ #define fseeko _fseeki64 #define ftello _ftelli64 +typedef long _off_t; // NOLINT - MSVS compatible type +typedef __int64 off_t; // fseeki64 compatible type +#define _OFF_T_DEFINED #elif defined(_WIN32) /* MinGW defines off_t as long and uses f{seek,tell}o64/off64_t for large * files. */ diff --git a/libvpx/vp8/common/loopfilter.c b/libvpx/vp8/common/loopfilter.c index 19857a7..7a07e76 100644 --- a/libvpx/vp8/common/loopfilter.c +++ b/libvpx/vp8/common/loopfilter.c @@ -15,7 +15,6 @@ #include "onyxc_int.h" #include "vpx_mem/vpx_mem.h" -typedef unsigned char uc; static void lf_init_lut(loop_filter_info_n *lfi) { diff --git a/libvpx/vp8/common/postproc.c b/libvpx/vp8/common/postproc.c index e3bee32..7d0fbf6 100644 --- a/libvpx/vp8/common/postproc.c +++ b/libvpx/vp8/common/postproc.c @@ -303,8 +303,8 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols, i { d[r&15] = (rv2[r&127] + sum + s[0]) >> 4; } - - s[-8*pitch] = d[(r-8)&15]; + if (r >= 8) + s[-8*pitch] = d[(r-8)&15]; s += pitch; } } diff --git a/libvpx/vp8/common/x86/postproc_mmx.asm b/libvpx/vp8/common/x86/postproc_mmx.asm index 5cf110b..8be3431 100644 --- a/libvpx/vp8/common/x86/postproc_mmx.asm +++ b/libvpx/vp8/common/x86/postproc_mmx.asm @@ -204,13 +204,16 @@ sym(vp8_mbpost_proc_down_mmx): and rcx, 15 movd DWORD PTR [rsp+rcx*4], mm1 ;d[rcx*4] + cmp edx, 8 + jl .skip_assignment + mov rcx, rdx sub rcx, 8 - and rcx, 15 movd mm1, DWORD PTR [rsp+rcx*4] ;d[rcx*4] - movd [rsi], mm1 + +.skip_assignment lea rsi, [rsi+rax] lea rdi, [rdi+rax] diff --git a/libvpx/vp8/common/x86/postproc_sse2.asm b/libvpx/vp8/common/x86/postproc_sse2.asm index 00f84a3..f53daa7 100644 --- a/libvpx/vp8/common/x86/postproc_sse2.asm +++ b/libvpx/vp8/common/x86/postproc_sse2.asm @@ -425,13 +425,16 @@ sym(vp8_mbpost_proc_down_xmm): and rcx, 15 movq QWORD PTR [rsp + rcx*8], xmm1 ;d[rcx*8] + cmp edx, 8 + jl .skip_assignment + mov rcx, rdx sub rcx, 8 - and rcx, 15 movq mm0, [rsp + rcx*8] ;d[rcx*8] - movq [rsi], mm0 + +.skip_assignment lea rsi, [rsi+rax] lea rdi, [rdi+rax] diff --git a/libvpx/vp9/common/vp9_alloccommon.c b/libvpx/vp9/common/vp9_alloccommon.c index 08ab27a..f44ada1 100644 --- a/libvpx/vp9/common/vp9_alloccommon.c +++ b/libvpx/vp9/common/vp9_alloccommon.c @@ -17,22 +17,21 @@ #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_systemdependent.h" -void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi) { - const int stride = cm->mode_info_stride; +static void clear_mi_border(const VP9_COMMON *cm, MODE_INFO *mi) { int i; - // Clear down top border row - vpx_memset(mi, 0, sizeof(MODE_INFO) * stride); + // Top border row + vpx_memset(mi, 0, sizeof(*mi) * cm->mi_stride); - // Clear left border column - for (i = 1; i < cm->mi_rows + 1; i++) - vpx_memset(&mi[i * stride], 0, sizeof(MODE_INFO)); + // Left border column + for (i = 1; i < cm->mi_rows + 1; ++i) + vpx_memset(&mi[i * cm->mi_stride], 0, sizeof(*mi)); } static void set_mb_mi(VP9_COMMON *cm, int aligned_width, int aligned_height) { cm->mi_cols = aligned_width >> MI_SIZE_LOG2; cm->mi_rows = aligned_height >> MI_SIZE_LOG2; - cm->mode_info_stride = cm->mi_cols + MI_BLOCK_SIZE; + cm->mi_stride = cm->mi_cols + MI_BLOCK_SIZE; cm->mb_cols = (cm->mi_cols + 1) >> 1; cm->mb_rows = (cm->mi_rows + 1) >> 1; @@ -40,19 +39,17 @@ static void set_mb_mi(VP9_COMMON *cm, int aligned_width, int aligned_height) { } static void setup_mi(VP9_COMMON *cm) { - cm->mi = cm->mip + cm->mode_info_stride + 1; - cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1; - cm->mi_grid_visible = cm->mi_grid_base + cm->mode_info_stride + 1; - cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mode_info_stride + 1; + cm->mi = cm->mip + cm->mi_stride + 1; + cm->prev_mi = cm->prev_mip + cm->mi_stride + 1; + cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1; + cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1; - vpx_memset(cm->mip, 0, - cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(*cm->mip)); + vpx_memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip)); - vpx_memset(cm->mi_grid_base, 0, - cm->mode_info_stride * (cm->mi_rows + 1) * - sizeof(*cm->mi_grid_base)); + vpx_memset(cm->mi_grid_base, 0, cm->mi_stride * (cm->mi_rows + 1) * + sizeof(*cm->mi_grid_base)); - vp9_update_mode_info_border(cm, cm->prev_mip); + clear_mi_border(cm, cm->prev_mip); } static int alloc_mi(VP9_COMMON *cm, int mi_size) { @@ -108,6 +105,12 @@ void vp9_free_frame_buffers(VP9_COMMON *cm) { vpx_free(cm->last_frame_seg_map); cm->last_frame_seg_map = NULL; + + vpx_free(cm->above_context); + cm->above_context = NULL; + + vpx_free(cm->above_seg_context); + cm->above_seg_context = NULL; } int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) { @@ -123,7 +126,7 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) { set_mb_mi(cm, aligned_width, aligned_height); free_mi(cm); - if (alloc_mi(cm, cm->mode_info_stride * (cm->mi_rows + MI_BLOCK_SIZE))) + if (alloc_mi(cm, cm->mi_stride * (cm->mi_rows + MI_BLOCK_SIZE))) goto fail; setup_mi(cm); @@ -134,6 +137,21 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) { if (!cm->last_frame_seg_map) goto fail; + vpx_free(cm->above_context); + cm->above_context = + (ENTROPY_CONTEXT *)vpx_calloc(2 * mi_cols_aligned_to_sb(cm->mi_cols) * + MAX_MB_PLANE, + sizeof(*cm->above_context)); + if (!cm->above_context) + goto fail; + + vpx_free(cm->above_seg_context); + cm->above_seg_context = + (PARTITION_CONTEXT *)vpx_calloc(mi_cols_aligned_to_sb(cm->mi_cols), + sizeof(*cm->above_seg_context)); + if (!cm->above_seg_context) + goto fail; + return 0; fail: @@ -142,12 +160,11 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) { } int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) { - int i; - const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2); const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2); const int ss_x = cm->subsampling_x; const int ss_y = cm->subsampling_y; + int i; vp9_free_frame_buffers(cm); @@ -172,7 +189,7 @@ int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) { set_mb_mi(cm, aligned_width, aligned_height); - if (alloc_mi(cm, cm->mode_info_stride * (cm->mi_rows + MI_BLOCK_SIZE))) + if (alloc_mi(cm, cm->mi_stride * (cm->mi_rows + MI_BLOCK_SIZE))) goto fail; setup_mi(cm); @@ -182,6 +199,19 @@ int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) { if (!cm->last_frame_seg_map) goto fail; + cm->above_context = + (ENTROPY_CONTEXT *)vpx_calloc(2 * mi_cols_aligned_to_sb(cm->mi_cols) * + MAX_MB_PLANE, + sizeof(*cm->above_context)); + if (!cm->above_context) + goto fail; + + cm->above_seg_context = + (PARTITION_CONTEXT *)vpx_calloc(mi_cols_aligned_to_sb(cm->mi_cols), + sizeof(*cm->above_seg_context)); + if (!cm->above_seg_context) + goto fail; + return 0; fail: @@ -194,10 +224,6 @@ void vp9_remove_common(VP9_COMMON *cm) { vp9_free_internal_frame_buffers(&cm->int_frame_buffers); } -void vp9_initialize_common() { - vp9_init_neighbors(); -} - void vp9_update_frame_size(VP9_COMMON *cm) { const int aligned_width = ALIGN_POWER_OF_TWO(cm->width, MI_SIZE_LOG2); const int aligned_height = ALIGN_POWER_OF_TWO(cm->height, MI_SIZE_LOG2); @@ -220,8 +246,8 @@ void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) { cm->mi_grid_base = temp2; // Update the upper left visible macroblock ptrs. - cm->mi = cm->mip + cm->mode_info_stride + 1; - cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1; - cm->mi_grid_visible = cm->mi_grid_base + cm->mode_info_stride + 1; - cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mode_info_stride + 1; + cm->mi = cm->mip + cm->mi_stride + 1; + cm->prev_mi = cm->prev_mip + cm->mi_stride + 1; + cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1; + cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1; } diff --git a/libvpx/vp9/common/vp9_alloccommon.h b/libvpx/vp9/common/vp9_alloccommon.h index fca6935..06636a9 100644 --- a/libvpx/vp9/common/vp9_alloccommon.h +++ b/libvpx/vp9/common/vp9_alloccommon.h @@ -12,26 +12,23 @@ #ifndef VP9_COMMON_VP9_ALLOCCOMMON_H_ #define VP9_COMMON_VP9_ALLOCCOMMON_H_ -#include "vp9/common/vp9_onyxc_int.h" - #ifdef __cplusplus extern "C" { #endif -void vp9_initialize_common(); +struct VP9Common; -void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi); +void vp9_remove_common(struct VP9Common *cm); -void vp9_remove_common(VP9_COMMON *cm); +int vp9_resize_frame_buffers(struct VP9Common *cm, int width, int height); -int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height); -int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height); -void vp9_free_frame_buffers(VP9_COMMON *cm); +int vp9_alloc_frame_buffers(struct VP9Common *cm, int width, int height); +void vp9_free_frame_buffers(struct VP9Common *cm); -void vp9_update_frame_size(VP9_COMMON *cm); +void vp9_update_frame_size(struct VP9Common *cm); -void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm); +void vp9_swap_mi_and_prev_mi(struct VP9Common *cm); #ifdef __cplusplus } // extern "C" diff --git a/libvpx/vp9/common/vp9_blockd.c b/libvpx/vp9/common/vp9_blockd.c index e1d1318..fedfb18 100644 --- a/libvpx/vp9/common/vp9_blockd.c +++ b/libvpx/vp9/common/vp9_blockd.c @@ -40,7 +40,7 @@ void vp9_foreach_transformed_block_in_plane( const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, foreach_transformed_block_visitor visit, void *arg) { const struct macroblockd_plane *const pd = &xd->plane[plane]; - const MB_MODE_INFO* mbmi = &xd->mi_8x8[0]->mbmi; + const MB_MODE_INFO* mbmi = &xd->mi[0]->mbmi; // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 // transform size varies per plane, look it up in a common way. diff --git a/libvpx/vp9/common/vp9_blockd.h b/libvpx/vp9/common/vp9_blockd.h index ca5a0c2..55320a6 100644 --- a/libvpx/vp9/common/vp9_blockd.h +++ b/libvpx/vp9/common/vp9_blockd.h @@ -120,23 +120,23 @@ static INLINE int mi_width_log2(BLOCK_SIZE sb_type) { // This structure now relates to 8x8 block regions. typedef struct { - MB_PREDICTION_MODE mode, uv_mode; - MV_REFERENCE_FRAME ref_frame[2]; + // Common for both INTER and INTRA blocks + BLOCK_SIZE sb_type; + MB_PREDICTION_MODE mode; TX_SIZE tx_size; - int_mv mv[2]; // for each reference frame used - int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; - - uint8_t mode_context[MAX_REF_FRAMES]; - - unsigned char skip; // 0=need to decode coeffs, 1=no coefficients - unsigned char segment_id; // Segment id for this block. + uint8_t skip; + uint8_t segment_id; + uint8_t seg_id_predicted; // valid only when temporal_update is enabled - // Flags used for prediction status of various bit-stream signals - unsigned char seg_id_predicted; + // Only for INTRA blocks + MB_PREDICTION_MODE uv_mode; + // Only for INTER blocks + MV_REFERENCE_FRAME ref_frame[2]; + int_mv mv[2]; + int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; + uint8_t mode_context[MAX_REF_FRAMES]; INTERP_FILTER interp_filter; - - BLOCK_SIZE sb_type; } MB_MODE_INFO; typedef struct { @@ -204,11 +204,10 @@ typedef struct RefBuffer { typedef struct macroblockd { struct macroblockd_plane plane[MAX_MB_PLANE]; - int mode_info_stride; + int mi_stride; // A NULL indicates that the 8x8 is not part of the image - MODE_INFO **mi_8x8; - MODE_INFO **prev_mi_8x8; + MODE_INFO **mi; int up_available; int left_available; @@ -232,11 +231,10 @@ typedef struct macroblockd { /* Inverse transform function pointers. */ void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob); - const InterpKernel *interp_kernel; - int corrupted; - /* Y,U,V,(A) */ + DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]); + ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16]; @@ -244,8 +242,6 @@ typedef struct macroblockd { PARTITION_CONTEXT left_seg_context[8]; } MACROBLOCKD; - - static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize, PARTITION_TYPE partition) { const BLOCK_SIZE subsize = subsize_lookup[partition][bsize]; @@ -257,7 +253,7 @@ extern const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES]; static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, const MACROBLOCKD *xd) { - const MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; if (plane_type != PLANE_TYPE_Y || is_inter_block(mbmi)) return DCT_DCT; @@ -266,7 +262,7 @@ static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type, const MACROBLOCKD *xd, int ib) { - const MODE_INFO *const mi = xd->mi_8x8[0]; + const MODE_INFO *const mi = xd->mi[0]; if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(&mi->mbmi)) return DCT_DCT; diff --git a/libvpx/vp9/common/vp9_entropy.h b/libvpx/vp9/common/vp9_entropy.h index 15bf8eb..6788eb6 100644 --- a/libvpx/vp9/common/vp9_entropy.h +++ b/libvpx/vp9/common/vp9_entropy.h @@ -175,7 +175,7 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a, static const INLINE scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size, PLANE_TYPE type, int block_idx) { - const MODE_INFO *const mi = xd->mi_8x8[0]; + const MODE_INFO *const mi = xd->mi[0]; if (is_inter_block(&mi->mbmi) || type != PLANE_TYPE_Y || xd->lossless) { return &vp9_default_scan_orders[tx_size]; diff --git a/libvpx/vp9/common/vp9_entropymode.c b/libvpx/vp9/common/vp9_entropymode.c index f2c81bc..5b00b00 100644 --- a/libvpx/vp9/common/vp9_entropymode.c +++ b/libvpx/vp9/common/vp9_entropymode.c @@ -465,11 +465,10 @@ void vp9_setup_past_independence(VP9_COMMON *cm) { } if (frame_is_intra_only(cm)) - vpx_memset(cm->prev_mip, 0, - cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(MODE_INFO)); + vpx_memset(cm->prev_mip, 0, cm->mi_stride * (cm->mi_rows + 1) * + sizeof(*cm->prev_mip)); - vpx_memset(cm->mip, 0, - cm->mode_info_stride * (cm->mi_rows + 1) * sizeof(MODE_INFO)); + vpx_memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip)); vp9_zero(cm->ref_frame_sign_bias); diff --git a/libvpx/vp9/common/vp9_entropymv.c b/libvpx/vp9/common/vp9_entropymv.c index 197b7c0..5bb0482 100644 --- a/libvpx/vp9/common/vp9_entropymv.c +++ b/libvpx/vp9/common/vp9_entropymv.c @@ -8,14 +8,13 @@ * be found in the AUTHORS file in the root of the source tree. */ - #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_entropymv.h" #define MV_COUNT_SAT 20 #define MV_MAX_UPDATE_FACTOR 128 -/* Integer pel reference mv threshold for use of high-precision 1/8 mv */ +// Integer pel reference mv threshold for use of high-precision 1/8 mv #define COMPANDED_MVREF_THRESH 8 const vp9_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = { @@ -49,32 +48,30 @@ const vp9_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { static const nmv_context default_nmv_context = { {32, 64, 96}, - { // NOLINT - { /* vert component */ // NOLINT - 128, /* sign */ - {224, 144, 192, 168, 192, 176, 192, 198, 198, 245}, /* class */ - {216}, /* class0 */ - {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, /* bits */ - {{128, 128, 64}, {96, 112, 64}}, /* class0_fp */ - {64, 96, 64}, /* fp */ - 160, /* class0_hp bit */ - 128, /* hp */ + { + { // Vertical component + 128, // sign + {224, 144, 192, 168, 192, 176, 192, 198, 198, 245}, // class + {216}, // class0 + {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, // bits + {{128, 128, 64}, {96, 112, 64}}, // class0_fp + {64, 96, 64}, // fp + 160, // class0_hp bit + 128, // hp }, - { /* hor component */ // NOLINT - 128, /* sign */ - {216, 128, 176, 160, 176, 176, 192, 198, 198, 208}, /* class */ - {208}, /* class0 */ - {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, /* bits */ - {{128, 128, 64}, {96, 112, 64}}, /* class0_fp */ - {64, 96, 64}, /* fp */ - 160, /* class0_hp bit */ - 128, /* hp */ + { // Horizontal component + 128, // sign + {216, 128, 176, 160, 176, 176, 192, 198, 198, 208}, // class + {208}, // class0 + {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, // bits + {{128, 128, 64}, {96, 112, 64}}, // class0_fp + {64, 96, 64}, // fp + 160, // class0_hp bit + 128, // hp } }, }; -#define mv_class_base(c) ((c) ? (CLASS0_SIZE << (c + 2)) : 0) - static const uint8_t log_in_base_2[] = { 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, @@ -121,9 +118,13 @@ static const uint8_t log_in_base_2[] = { 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10 }; +static INLINE int mv_class_base(MV_CLASS_TYPE c) { + return c ? CLASS0_SIZE << (c + 2) : 0; +} + MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) { - const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096) ? MV_CLASS_10 : - (MV_CLASS_TYPE)log_in_base_2[z >> 3]; + const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096) ? + MV_CLASS_10 : (MV_CLASS_TYPE)log_in_base_2[z >> 3]; if (offset) *offset = z - mv_class_base(c); return c; diff --git a/libvpx/vp9/common/vp9_enums.h b/libvpx/vp9/common/vp9_enums.h index e96e769..068284f 100644 --- a/libvpx/vp9/common/vp9_enums.h +++ b/libvpx/vp9/common/vp9_enums.h @@ -25,6 +25,23 @@ extern "C" { #define MI_MASK (MI_BLOCK_SIZE - 1) +// Bitstream profiles indicated by 2 bits in the uncompressed header. +// 00: Profile 0. 4:2:0 only. +// 10: Profile 1. adds 4:4:4, 4:2:2, alpha. +// 01: Profile 2. Supports 10-bit and 12-bit color only. +// 11: Undefined profile. +typedef enum BITSTREAM_PROFILE { + PROFILE_0, + PROFILE_1, + PROFILE_2, + MAX_PROFILES +} BITSTREAM_PROFILE; + +typedef enum BIT_DEPTH { + BITS_8, + BITS_10, + BITS_12 +} BIT_DEPTH; typedef enum BLOCK_SIZE { BLOCK_4X4, @@ -94,6 +111,12 @@ typedef enum { SRGB = 7 // RGB } COLOR_SPACE; +typedef enum { + VP9_LAST_FLAG = 1 << 0, + VP9_GOLD_FLAG = 1 << 1, + VP9_ALT_FLAG = 1 << 2, +} VP9_REFFRAME; + #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp9/common/vp9_loopfilter.c b/libvpx/vp9/common/vp9_loopfilter.c index af8afed..3ac5a05 100644 --- a/libvpx/vp9/common/vp9_loopfilter.c +++ b/libvpx/vp9/common/vp9_loopfilter.c @@ -228,6 +228,12 @@ static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { } } +static uint8_t get_filter_level(const loop_filter_info_n *lfi_n, + const MB_MODE_INFO *mbmi) { + return lfi_n->lvl[mbmi->segment_id][mbmi->ref_frame[0]] + [mode_lf_lut[mbmi->mode]]; +} + void vp9_loop_filter_init(VP9_COMMON *cm) { loop_filter_info_n *lfi = &cm->lf_info; struct loopfilter *lf = &cm->lf; @@ -493,27 +499,25 @@ static void build_masks(const loop_filter_info_n *const lfi_n, const MODE_INFO *mi, const int shift_y, const int shift_uv, LOOP_FILTER_MASK *lfm) { - const BLOCK_SIZE block_size = mi->mbmi.sb_type; - const TX_SIZE tx_size_y = mi->mbmi.tx_size; - const TX_SIZE tx_size_uv = get_uv_tx_size(&mi->mbmi); - const int skip = mi->mbmi.skip; - const int seg = mi->mbmi.segment_id; - const int ref = mi->mbmi.ref_frame[0]; - const int filter_level = lfi_n->lvl[seg][ref][mode_lf_lut[mi->mbmi.mode]]; - uint64_t *left_y = &lfm->left_y[tx_size_y]; - uint64_t *above_y = &lfm->above_y[tx_size_y]; - uint64_t *int_4x4_y = &lfm->int_4x4_y; - uint16_t *left_uv = &lfm->left_uv[tx_size_uv]; - uint16_t *above_uv = &lfm->above_uv[tx_size_uv]; - uint16_t *int_4x4_uv = &lfm->int_4x4_uv; + const MB_MODE_INFO *mbmi = &mi->mbmi; + const BLOCK_SIZE block_size = mbmi->sb_type; + const TX_SIZE tx_size_y = mbmi->tx_size; + const TX_SIZE tx_size_uv = get_uv_tx_size(mbmi); + const int filter_level = get_filter_level(lfi_n, mbmi); + uint64_t *const left_y = &lfm->left_y[tx_size_y]; + uint64_t *const above_y = &lfm->above_y[tx_size_y]; + uint64_t *const int_4x4_y = &lfm->int_4x4_y; + uint16_t *const left_uv = &lfm->left_uv[tx_size_uv]; + uint16_t *const above_uv = &lfm->above_uv[tx_size_uv]; + uint16_t *const int_4x4_uv = &lfm->int_4x4_uv; int i; - int w = num_8x8_blocks_wide_lookup[block_size]; - int h = num_8x8_blocks_high_lookup[block_size]; // If filter level is 0 we don't loop filter. if (!filter_level) { return; } else { + const int w = num_8x8_blocks_wide_lookup[block_size]; + const int h = num_8x8_blocks_high_lookup[block_size]; int index = shift_y; for (i = 0; i < h; i++) { vpx_memset(&lfm->lfl_y[index], filter_level, w); @@ -540,7 +544,7 @@ static void build_masks(const loop_filter_info_n *const lfi_n, // If the block has no coefficients and is not intra we skip applying // the loop filter on block edges. - if (skip && ref > INTRA_FRAME) + if (mbmi->skip && is_inter_block(mbmi)) return; // Here we are adding a mask for the transform size. The transform @@ -561,12 +565,11 @@ static void build_masks(const loop_filter_info_n *const lfi_n, // boundaries. These differ from the 4x4 boundaries on the outside edge of // an 8x8 in that the internal ones can be skipped and don't depend on // the prediction block size. - if (tx_size_y == TX_4X4) { + if (tx_size_y == TX_4X4) *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y; - } - if (tx_size_uv == TX_4X4) { + + if (tx_size_uv == TX_4X4) *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; - } } // This function does the same thing as the one above with the exception that @@ -575,22 +578,20 @@ static void build_masks(const loop_filter_info_n *const lfi_n, static void build_y_mask(const loop_filter_info_n *const lfi_n, const MODE_INFO *mi, const int shift_y, LOOP_FILTER_MASK *lfm) { - const BLOCK_SIZE block_size = mi->mbmi.sb_type; - const TX_SIZE tx_size_y = mi->mbmi.tx_size; - const int skip = mi->mbmi.skip; - const int seg = mi->mbmi.segment_id; - const int ref = mi->mbmi.ref_frame[0]; - const int filter_level = lfi_n->lvl[seg][ref][mode_lf_lut[mi->mbmi.mode]]; - uint64_t *left_y = &lfm->left_y[tx_size_y]; - uint64_t *above_y = &lfm->above_y[tx_size_y]; - uint64_t *int_4x4_y = &lfm->int_4x4_y; + const MB_MODE_INFO *mbmi = &mi->mbmi; + const BLOCK_SIZE block_size = mbmi->sb_type; + const TX_SIZE tx_size_y = mbmi->tx_size; + const int filter_level = get_filter_level(lfi_n, mbmi); + uint64_t *const left_y = &lfm->left_y[tx_size_y]; + uint64_t *const above_y = &lfm->above_y[tx_size_y]; + uint64_t *const int_4x4_y = &lfm->int_4x4_y; int i; - int w = num_8x8_blocks_wide_lookup[block_size]; - int h = num_8x8_blocks_high_lookup[block_size]; if (!filter_level) { return; } else { + const int w = num_8x8_blocks_wide_lookup[block_size]; + const int h = num_8x8_blocks_high_lookup[block_size]; int index = shift_y; for (i = 0; i < h; i++) { vpx_memset(&lfm->lfl_y[index], filter_level, w); @@ -601,7 +602,7 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n, *above_y |= above_prediction_mask[block_size] << shift_y; *left_y |= left_prediction_mask[block_size] << shift_y; - if (skip && ref > INTRA_FRAME) + if (mbmi->skip && is_inter_block(mbmi)) return; *above_y |= (size_mask[block_size] & @@ -610,9 +611,8 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n, *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y]) << shift_y; - if (tx_size_y == TX_4X4) { + if (tx_size_y == TX_4X4) *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y; - } } // This function sets up the bit masks for the entire 64x64 region represented @@ -868,13 +868,6 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16])); } -static uint8_t build_lfi(const loop_filter_info_n *lfi_n, - const MB_MODE_INFO *mbmi) { - const int seg = mbmi->segment_id; - const int ref = mbmi->ref_frame[0]; - return lfi_n->lvl[seg][ref][mode_lf_lut[mbmi->mode]]; -} - static void filter_selectively_vert(uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8, @@ -916,7 +909,7 @@ static void filter_block_plane_non420(VP9_COMMON *cm, const int ss_y = plane->subsampling_y; const int row_step = 1 << ss_x; const int col_step = 1 << ss_y; - const int row_step_stride = cm->mode_info_stride * row_step; + const int row_step_stride = cm->mi_stride * row_step; struct buf_2d *const dst = &plane->dst; uint8_t* const dst0 = dst->buf; unsigned int mask_16x16[MI_BLOCK_SIZE] = {0}; @@ -953,7 +946,7 @@ static void filter_block_plane_non420(VP9_COMMON *cm, // Filter level can vary per MI if (!(lfl[(r << 3) + (c >> ss_x)] = - build_lfi(&cm->lf_info, &mi[0].mbmi))) + get_filter_level(&cm->lf_info, &mi[0].mbmi))) continue; // Build masks based on the transform size of each block @@ -1208,7 +1201,7 @@ void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer, xd->plane[1].subsampling_x == 1); for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { - MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mode_info_stride; + MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mi_stride; for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { int plane; @@ -1217,8 +1210,8 @@ void vp9_loop_filter_rows(const YV12_BUFFER_CONFIG *frame_buffer, // TODO(JBB): Make setup_mask work for non 420. if (use_420) - vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, - cm->mode_info_stride, &lfm); + vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mi_stride, + &lfm); for (plane = 0; plane < num_planes; ++plane) { if (use_420) diff --git a/libvpx/vp9/common/vp9_mvref_common.c b/libvpx/vp9/common/vp9_mvref_common.c index 9f2c2df..1aab362 100644 --- a/libvpx/vp9/common/vp9_mvref_common.c +++ b/libvpx/vp9/common/vp9_mvref_common.c @@ -148,28 +148,30 @@ static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref, // This macro is used to add a motion vector mv_ref list if it isn't // already in the list. If it's the second motion vector it will also // skip all additional processing and jump to done! -#define ADD_MV_REF_LIST(MV) \ +#define ADD_MV_REF_LIST(mv) \ do { \ if (refmv_count) { \ - if ((MV).as_int != mv_ref_list[0].as_int) { \ - mv_ref_list[refmv_count] = (MV); \ + if ((mv).as_int != mv_ref_list[0].as_int) { \ + mv_ref_list[refmv_count] = (mv); \ goto Done; \ } \ } else { \ - mv_ref_list[refmv_count++] = (MV); \ + mv_ref_list[refmv_count++] = (mv); \ } \ } while (0) // If either reference frame is different, not INTRA, and they // are different from each other scale and add the mv to our list. -#define IF_DIFF_REF_FRAME_ADD_MV(CANDIDATE) \ +#define IF_DIFF_REF_FRAME_ADD_MV(mbmi) \ do { \ - if ((CANDIDATE)->ref_frame[0] != ref_frame) \ - ADD_MV_REF_LIST(scale_mv((CANDIDATE), 0, ref_frame, ref_sign_bias)); \ - if ((CANDIDATE)->ref_frame[1] != ref_frame && \ - has_second_ref(CANDIDATE) && \ - (CANDIDATE)->mv[1].as_int != (CANDIDATE)->mv[0].as_int) \ - ADD_MV_REF_LIST(scale_mv((CANDIDATE), 1, ref_frame, ref_sign_bias)); \ + if (is_inter_block(mbmi)) { \ + if ((mbmi)->ref_frame[0] != ref_frame) \ + ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias)); \ + if (has_second_ref(mbmi) && \ + (mbmi)->ref_frame[1] != ref_frame && \ + (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \ + ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias)); \ + } \ } while (0) @@ -193,11 +195,14 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, int block, int mi_row, int mi_col) { const int *ref_sign_bias = cm->ref_frame_sign_bias; int i, refmv_count = 0; - const MODE_INFO *prev_mi = cm->coding_use_prev_mi && cm->prev_mi ? - xd->prev_mi_8x8[0] : NULL; + const MODE_INFO *prev_mi = cm->prev_mi + ? cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col] + : NULL; + const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->mbmi : NULL; + + const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type]; - const MB_MODE_INFO *const prev_mbmi = cm->coding_use_prev_mi && prev_mi ? - &prev_mi->mbmi : NULL; + int different_ref_found = 0; int context_counter = 0; @@ -210,8 +215,8 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, for (i = 0; i < 2; ++i) { const POSITION *const mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MODE_INFO *const candidate_mi = xd->mi_8x8[mv_ref->col + mv_ref->row - * xd->mode_info_stride]; + const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * + xd->mi_stride]; const MB_MODE_INFO *const candidate = &candidate_mi->mbmi; // Keep counts for entropy encoding. context_counter += mode_2_counter[candidate->mode]; @@ -230,9 +235,8 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, for (; i < MVREF_NEIGHBOURS; ++i) { const POSITION *const mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MB_MODE_INFO *const candidate = &xd->mi_8x8[mv_ref->col + - mv_ref->row - * xd->mode_info_stride]->mbmi; + const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row * + xd->mi_stride]->mbmi; different_ref_found = 1; if (candidate->ref_frame[0] == ref_frame) @@ -257,19 +261,17 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, for (i = 0; i < MVREF_NEIGHBOURS; ++i) { const POSITION *mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MB_MODE_INFO *const candidate = &xd->mi_8x8[mv_ref->col + - mv_ref->row - * xd->mode_info_stride]->mbmi; + const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row + * xd->mi_stride]->mbmi; // If the candidate is INTRA we don't want to consider its mv. - if (is_inter_block(candidate)) - IF_DIFF_REF_FRAME_ADD_MV(candidate); + IF_DIFF_REF_FRAME_ADD_MV(candidate); } } } // Since we still don't have a candidate we'll try the last frame. - if (prev_mbmi && is_inter_block(prev_mbmi)) + if (prev_mbmi) IF_DIFF_REF_FRAME_ADD_MV(prev_mbmi); Done: @@ -318,7 +320,7 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, int block, int ref, int mi_row, int mi_col, int_mv *nearest, int_mv *near) { int_mv mv_list[MAX_MV_REF_CANDIDATES]; - MODE_INFO *const mi = xd->mi_8x8[0]; + MODE_INFO *const mi = xd->mi[0]; b_mode_info *bmi = mi->bmi; int n; diff --git a/libvpx/vp9/common/vp9_onyxc_int.h b/libvpx/vp9/common/vp9_onyxc_int.h index 52889f7..fe9cc9e 100644 --- a/libvpx/vp9/common/vp9_onyxc_int.h +++ b/libvpx/vp9/common/vp9_onyxc_int.h @@ -126,7 +126,7 @@ typedef struct VP9Common { int MBs; int mb_rows, mi_rows; int mb_cols, mi_cols; - int mode_info_stride; + int mi_stride; /* profile settings */ TX_MODE tx_mode; @@ -179,7 +179,10 @@ typedef struct VP9Common { FRAME_COUNTS counts; unsigned int current_video_frame; - int version; + BITSTREAM_PROFILE profile; + + // BITS_8 in versions 0 and 1, BITS_10 or BITS_12 in version 2 + BIT_DEPTH bit_depth; #if CONFIG_VP9_POSTPROC struct postproc_state postproc_state; @@ -202,6 +205,9 @@ typedef struct VP9Common { // Handles memory for the codec. InternalFrameBufferList int_frame_buffers; + + PARTITION_CONTEXT *above_seg_context; + ENTROPY_CONTEXT *above_context; } VP9_COMMON; static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) { @@ -234,24 +240,33 @@ static INLINE int mi_cols_aligned_to_sb(int n_mis) { return ALIGN_POWER_OF_TWO(n_mis, MI_BLOCK_SIZE_LOG2); } +static INLINE void init_macroblockd(VP9_COMMON *cm, MACROBLOCKD *xd) { + int i; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + xd->plane[i].dqcoeff = xd->dqcoeff[i]; + xd->above_context[i] = cm->above_context + + i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols); + } + + xd->above_seg_context = cm->above_seg_context; + xd->mi_stride = cm->mi_stride; +} + static INLINE const vp9_prob* get_partition_probs(const VP9_COMMON *cm, int ctx) { return cm->frame_type == KEY_FRAME ? vp9_kf_partition_probs[ctx] : cm->fc.partition_prob[ctx]; } -static INLINE void set_skip_context( - MACROBLOCKD *xd, - ENTROPY_CONTEXT *above_context[MAX_MB_PLANE], - ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16], - int mi_row, int mi_col) { +static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) { const int above_idx = mi_col * 2; const int left_idx = (mi_row * 2) & 15; int i; - for (i = 0; i < MAX_MB_PLANE; i++) { + for (i = 0; i < MAX_MB_PLANE; ++i) { struct macroblockd_plane *const pd = &xd->plane[i]; - pd->above_context = above_context[i] + (above_idx >> pd->subsampling_x); - pd->left_context = left_context[i] + (left_idx >> pd->subsampling_y); + pd->above_context = &xd->above_context[i][above_idx >> pd->subsampling_x]; + pd->left_context = &xd->left_context[i][left_idx >> pd->subsampling_y]; } } @@ -269,27 +284,27 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile, xd->left_available = (mi_col > tile->mi_col_start); } -static INLINE void set_prev_mi(VP9_COMMON *cm) { - const int use_prev_in_find_mv_refs = cm->width == cm->last_width && - cm->height == cm->last_height && - !cm->intra_only && - cm->last_show_frame; +static INLINE MODE_INFO *get_prev_mi(VP9_COMMON *cm) { + const int use_prev_mi = cm->coding_use_prev_mi && + cm->width == cm->last_width && + cm->height == cm->last_height && + !cm->intra_only && + cm->last_show_frame; // Special case: set prev_mi to NULL when the previous mode info // context cannot be used. - cm->prev_mi = use_prev_in_find_mv_refs ? - cm->prev_mip + cm->mode_info_stride + 1 : NULL; + return use_prev_mi ? &cm->prev_mip[cm->mi_stride + 1] : NULL; } static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) { return cm->frame_type == KEY_FRAME || cm->intra_only; } -static INLINE void update_partition_context( - PARTITION_CONTEXT *above_seg_context, - PARTITION_CONTEXT left_seg_context[8], - int mi_row, int mi_col, BLOCK_SIZE subsize, BLOCK_SIZE bsize) { - PARTITION_CONTEXT *const above_ctx = above_seg_context + mi_col; - PARTITION_CONTEXT *const left_ctx = left_seg_context + (mi_row & MI_MASK); +static INLINE void update_partition_context(MACROBLOCKD *xd, + int mi_row, int mi_col, + BLOCK_SIZE subsize, + BLOCK_SIZE bsize) { + PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col; + PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK); // num_4x4_blocks_wide_lookup[bsize] / 2 const int bs = num_8x8_blocks_wide_lookup[bsize]; @@ -301,12 +316,11 @@ static INLINE void update_partition_context( vpx_memset(left_ctx, partition_context_lookup[subsize].left, bs); } -static INLINE int partition_plane_context( - const PARTITION_CONTEXT *above_seg_context, - const PARTITION_CONTEXT left_seg_context[8], - int mi_row, int mi_col, BLOCK_SIZE bsize) { - const PARTITION_CONTEXT *above_ctx = above_seg_context + mi_col; - const PARTITION_CONTEXT *left_ctx = left_seg_context + (mi_row & MI_MASK); +static INLINE int partition_plane_context(const MACROBLOCKD *xd, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col; + const PARTITION_CONTEXT *left_ctx = xd->left_seg_context + (mi_row & MI_MASK); const int bsl = mi_width_log2(bsize); const int bs = 1 << bsl; diff --git a/libvpx/vp9/common/vp9_postproc.h b/libvpx/vp9/common/vp9_postproc.h index b07d5d0..ebebc1a 100644 --- a/libvpx/vp9/common/vp9_postproc.h +++ b/libvpx/vp9/common/vp9_postproc.h @@ -13,6 +13,7 @@ #define VP9_COMMON_VP9_POSTPROC_H_ #include "vpx_ports/mem.h" +#include "vpx_scale/yv12config.h" #include "vp9/common/vp9_ppflags.h" #ifdef __cplusplus diff --git a/libvpx/vp9/common/vp9_ppflags.h b/libvpx/vp9/common/vp9_ppflags.h index 8168935..e8b04d2 100644 --- a/libvpx/vp9/common/vp9_ppflags.h +++ b/libvpx/vp9/common/vp9_ppflags.h @@ -33,10 +33,12 @@ typedef struct { int post_proc_flag; int deblocking_level; int noise_level; +#if CONFIG_POSTPROC_VISUALIZER int display_ref_frame_flag; int display_mb_modes_flag; int display_b_modes_flag; int display_mv_flag; +#endif // CONFIG_POSTPROC_VISUALIZER } vp9_ppflags_t; #ifdef __cplusplus diff --git a/libvpx/vp9/common/vp9_pred_common.c b/libvpx/vp9/common/vp9_pred_common.c index 197bcb6..bc9d6ef 100644 --- a/libvpx/vp9/common/vp9_pred_common.c +++ b/libvpx/vp9/common/vp9_pred_common.c @@ -348,7 +348,7 @@ int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { // left of the entries corresponding to real blocks. // The prediction flags in these dummy entries are initialized to 0. int vp9_get_tx_size_context(const MACROBLOCKD *xd) { - const int max_tx_size = max_txsize_lookup[xd->mi_8x8[0]->mbmi.sb_type]; + const int max_tx_size = max_txsize_lookup[xd->mi[0]->mbmi.sb_type]; const MB_MODE_INFO *const above_mbmi = get_mbmi(get_above_mi(xd)); const MB_MODE_INFO *const left_mbmi = get_mbmi(get_left_mi(xd)); const int has_above = above_mbmi != NULL; diff --git a/libvpx/vp9/common/vp9_pred_common.h b/libvpx/vp9/common/vp9_pred_common.h index 6c7a0d3..1a7ba86 100644 --- a/libvpx/vp9/common/vp9_pred_common.h +++ b/libvpx/vp9/common/vp9_pred_common.h @@ -19,11 +19,11 @@ extern "C" { #endif static INLINE const MODE_INFO *get_above_mi(const MACROBLOCKD *const xd) { - return xd->up_available ? xd->mi_8x8[-xd->mode_info_stride] : NULL; + return xd->up_available ? xd->mi[-xd->mi_stride] : NULL; } static INLINE const MODE_INFO *get_left_mi(const MACROBLOCKD *const xd) { - return xd->left_available ? xd->mi_8x8[-1] : NULL; + return xd->left_available ? xd->mi[-1] : NULL; } int vp9_get_segment_id(VP9_COMMON *cm, const uint8_t *segment_ids, diff --git a/libvpx/vp9/common/vp9_reconinter.c b/libvpx/vp9/common/vp9_reconinter.c index 005f370..e722d6a 100644 --- a/libvpx/vp9/common/vp9_reconinter.c +++ b/libvpx/vp9/common/vp9_reconinter.c @@ -144,8 +144,9 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, int x, int y, int w, int h, int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; - const MODE_INFO *mi = xd->mi_8x8[0]; + const MODE_INFO *mi = xd->mi[0]; const int is_compound = has_second_ref(&mi->mbmi); + const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter); int ref; for (ref = 0; ref < 1 + is_compound; ++ref) { @@ -193,8 +194,7 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, + (scaled_mv.col >> SUBPEL_BITS); inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, - subpel_x, subpel_y, sf, w, h, ref, xd->interp_kernel, - xs, ys); + subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); } } @@ -212,7 +212,7 @@ static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize, const int bw = 4 * num_4x4_w; const int bh = 4 * num_4x4_h; - if (xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8) { + if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) { int i = 0, x, y; assert(bsize == BLOCK_8X8); for (y = 0; y < num_4x4_h; ++y) @@ -248,8 +248,9 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, int x, int y, int w, int h, int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; - const MODE_INFO *mi = xd->mi_8x8[0]; + const MODE_INFO *mi = xd->mi[0]; const int is_compound = has_second_ref(&mi->mbmi); + const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter); int ref; for (ref = 0; ref < 1 + is_compound; ++ref) { @@ -308,10 +309,8 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, y0_16 = sf->scale_value_y(y0_16, sf); // Map the top left corner of the block into the reference frame. - // NOTE: This must be done in this way instead of - // sf->scale_value_x(x_start + x, sf). - x0 = sf->scale_value_x(x_start, sf) + sf->scale_value_x(x, sf); - y0 = sf->scale_value_y(y_start, sf) + sf->scale_value_y(y, sf); + x0 = sf->scale_value_x(x_start + x, sf); + y0 = sf->scale_value_y(y_start + y, sf); // Scale the MV and incorporate the sub-pixel offset of the block // in the reference frame. @@ -379,7 +378,7 @@ static void dec_build_inter_predictors(MACROBLOCKD *xd, int plane, int block, } inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, - subpel_y, sf, w, h, ref, xd->interp_kernel, xs, ys); + subpel_y, sf, w, h, ref, kernel, xs, ys); } } @@ -396,7 +395,7 @@ void vp9_dec_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, const int bw = 4 * num_4x4_w; const int bh = 4 * num_4x4_h; - if (xd->mi_8x8[0]->mbmi.sb_type < BLOCK_8X8) { + if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) { int i = 0, x, y; assert(bsize == BLOCK_8X8); for (y = 0; y < num_4x4_h; ++y) diff --git a/libvpx/vp9/common/vp9_reconintra.c b/libvpx/vp9/common/vp9_reconintra.c index 915c1c1..44951b5 100644 --- a/libvpx/vp9/common/vp9_reconintra.c +++ b/libvpx/vp9/common/vp9_reconintra.c @@ -347,6 +347,8 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; + vpx_memset(left_col, 129, 64); + // left if (left_available) { if (xd->mb_to_bottom_edge < 0) { @@ -366,8 +368,6 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, for (i = 0; i < bs; ++i) left_col[i] = ref[i * ref_stride - 1]; } - } else { - vpx_memset(left_col, 129, bs); } // TODO(hkuang) do not extend 2*bs pixels for all modes. diff --git a/libvpx/vp9/common/vp9_rtcd_defs.pl b/libvpx/vp9/common/vp9_rtcd_defs.pl index e4cd9d4..b455592 100644 --- a/libvpx/vp9/common/vp9_rtcd_defs.pl +++ b/libvpx/vp9/common/vp9_rtcd_defs.pl @@ -380,6 +380,10 @@ specialize qw/vp9_variance64x64/, "$sse2_x86inc", "$avx2_x86inc"; add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc"; +add_proto qw/void vp9_get_sse_sum_16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; +specialize qw/vp9_get_sse_sum_16x16 sse2/; +$vp9_get_sse_sum_16x16_sse2=vp9_get16x16var_sse2; + add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc"; @@ -636,7 +640,7 @@ add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int src_stride, const specialize qw/vp9_sad4x4x8 sse4/; add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; -specialize qw/vp9_sad64x64x4d sse2/; +specialize qw/vp9_sad64x64x4d sse2 avx2/; add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; specialize qw/vp9_sad32x64x4d sse2/; @@ -651,7 +655,7 @@ add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, co specialize qw/vp9_sad16x32x4d sse2/; add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; -specialize qw/vp9_sad32x32x4d sse2/; +specialize qw/vp9_sad32x32x4d sse2 avx2/; add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"; specialize qw/vp9_sad16x16x4d sse2/; diff --git a/libvpx/vp9/common/vp9_scale.c b/libvpx/vp9/common/vp9_scale.c index e0f1e34..d3405fc 100644 --- a/libvpx/vp9/common/vp9_scale.c +++ b/libvpx/vp9/common/vp9_scale.c @@ -13,11 +13,11 @@ #include "vp9/common/vp9_scale.h" static INLINE int scaled_x(int val, const struct scale_factors *sf) { - return val * sf->x_scale_fp >> REF_SCALE_SHIFT; + return (int)((int64_t)val * sf->x_scale_fp >> REF_SCALE_SHIFT); } static INLINE int scaled_y(int val, const struct scale_factors *sf) { - return val * sf->y_scale_fp >> REF_SCALE_SHIFT; + return (int)((int64_t)val * sf->y_scale_fp >> REF_SCALE_SHIFT); } static int unscaled_value(int val, const struct scale_factors *sf) { diff --git a/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c index 7e9cc84..b84db97 100644 --- a/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c +++ b/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c @@ -33,10 +33,11 @@ DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = { }; #if defined(__clang__) -# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ <= 3) +# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ <= 3) || \ + (defined(__APPLE__) && __clang_major__ == 5 && __clang_minor__ == 0) # define MM256_BROADCASTSI128_SI256(x) \ _mm_broadcastsi128_si256((__m128i const *)&(x)) -# else // clang > 3.3 +# else // clang > 3.3, and not 5.0 on macosx. # define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) # endif // clang <= 3.3 #elif defined(__GNUC__) diff --git a/libvpx/vp9/decoder/vp9_decodeframe.c b/libvpx/vp9/decoder/vp9_decodeframe.c index 6926657..9b63961 100644 --- a/libvpx/vp9/decoder/vp9_decodeframe.c +++ b/libvpx/vp9/decoder/vp9_decodeframe.c @@ -187,41 +187,13 @@ static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) { xd->plane[i].dequant = cm->uv_dequant[q_index]; } -// Allocate storage for each tile column. -// TODO(jzern): when max_threads <= 1 the same storage could be used for each -// tile. -static void alloc_tile_storage(VP9D_COMP *pbi, int tile_rows, int tile_cols) { - VP9_COMMON *const cm = &pbi->common; - const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); - int i; - - // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm - // block where mi unit size is 8x8. - CHECK_MEM_ERROR(cm, pbi->above_context[0], - vpx_realloc(pbi->above_context[0], - sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * - 2 * aligned_mi_cols)); - for (i = 1; i < MAX_MB_PLANE; ++i) { - pbi->above_context[i] = pbi->above_context[0] + - i * sizeof(*pbi->above_context[0]) * - 2 * aligned_mi_cols; - } - - // This is sized based on the entire frame. Each tile operates within its - // column bounds. - CHECK_MEM_ERROR(cm, pbi->above_seg_context, - vpx_realloc(pbi->above_seg_context, - sizeof(*pbi->above_seg_context) * - aligned_mi_cols)); -} - static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block, TX_SIZE tx_size, uint8_t *dst, int stride, int eob) { struct macroblockd_plane *const pd = &xd->plane[plane]; if (eob > 0) { TX_TYPE tx_type; - const int plane_type = pd->plane_type; + const PLANE_TYPE plane_type = pd->plane_type; int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); switch (tx_size) { case TX_4X4: @@ -269,11 +241,11 @@ struct intra_args { static void predict_and_reconstruct_intra_block(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { - struct intra_args *const args = arg; + struct intra_args *const args = (struct intra_args *)arg; VP9_COMMON *const cm = args->cm; MACROBLOCKD *const xd = args->xd; struct macroblockd_plane *const pd = &xd->plane[plane]; - MODE_INFO *const mi = xd->mi_8x8[0]; + MODE_INFO *const mi = xd->mi[0]; const MB_PREDICTION_MODE mode = (plane == 0) ? get_y_mode(mi, block) : mi->mbmi.uv_mode; int x, y; @@ -305,7 +277,7 @@ struct inter_args { static void reconstruct_inter_block(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { - struct inter_args *args = arg; + struct inter_args *args = (struct inter_args *)arg; VP9_COMMON *const cm = args->cm; MACROBLOCKD *const xd = args->xd; struct macroblockd_plane *const pd = &xd->plane[plane]; @@ -319,36 +291,36 @@ static void reconstruct_inter_block(int plane, int block, *args->eobtotal += eob; } -static void set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, - const TileInfo *const tile, - BLOCK_SIZE bsize, int mi_row, int mi_col) { +static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + BLOCK_SIZE bsize, int mi_row, int mi_col) { const int bw = num_8x8_blocks_wide_lookup[bsize]; const int bh = num_8x8_blocks_high_lookup[bsize]; const int x_mis = MIN(bw, cm->mi_cols - mi_col); const int y_mis = MIN(bh, cm->mi_rows - mi_row); - const int offset = mi_row * cm->mode_info_stride + mi_col; + const int offset = mi_row * cm->mi_stride + mi_col; int x, y; - xd->mi_8x8 = cm->mi_grid_visible + offset; - xd->prev_mi_8x8 = cm->prev_mi_grid_visible + offset; - xd->mi_8x8[0] = &cm->mi[offset]; - xd->mi_8x8[0]->mbmi.sb_type = bsize; + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = &cm->mi[offset]; + xd->mi[0]->mbmi.sb_type = bsize; for (y = 0; y < y_mis; ++y) for (x = !y; x < x_mis; ++x) - xd->mi_8x8[y * cm->mode_info_stride + x] = xd->mi_8x8[0]; + xd->mi[y * cm->mi_stride + x] = xd->mi[0]; - set_skip_context(xd, xd->above_context, xd->left_context, mi_row, mi_col); + set_skip_context(xd, mi_row, mi_col); // Distance of Mb to the various image edges. These are specified to 8th pel // as they are always compared to values that are in 1/8th pel units set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col); + return &xd->mi[0]->mbmi; } static void set_ref(VP9_COMMON *const cm, MACROBLOCKD *const xd, int idx, int mi_row, int mi_col) { - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; RefBuffer *ref_buffer = &cm->frame_refs[mbmi->ref_frame[idx] - LAST_FRAME]; xd->block_refs[idx] = ref_buffer; if (!vp9_is_valid_scale(&ref_buffer->sf)) @@ -364,17 +336,12 @@ static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, vp9_reader *r, BLOCK_SIZE bsize) { const int less8x8 = bsize < BLOCK_8X8; - MB_MODE_INFO *mbmi; - - set_offsets(cm, xd, tile, bsize, mi_row, mi_col); + MB_MODE_INFO *mbmi = set_offsets(cm, xd, tile, bsize, mi_row, mi_col); vp9_read_mode_info(cm, xd, tile, mi_row, mi_col, r); if (less8x8) bsize = BLOCK_8X8; - // Has to be called after set_offsets - mbmi = &xd->mi_8x8[0]->mbmi; - if (mbmi->skip) { reset_skip_context(xd, bsize); } else { @@ -393,8 +360,6 @@ static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, if (has_second_ref(mbmi)) set_ref(cm, xd, 1, mi_row, mi_col); - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); - // Prediction vp9_dec_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); @@ -414,16 +379,14 @@ static void decode_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd, int hbs, int mi_row, int mi_col, BLOCK_SIZE bsize, vp9_reader *r) { - const int ctx = partition_plane_context(xd->above_seg_context, - xd->left_seg_context, - mi_row, mi_col, bsize); + const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); const vp9_prob *const probs = get_partition_probs(cm, ctx); const int has_rows = (mi_row + hbs) < cm->mi_rows; const int has_cols = (mi_col + hbs) < cm->mi_cols; PARTITION_TYPE p; if (has_rows && has_cols) - p = vp9_read_tree(r, vp9_partition_tree, probs); + p = (PARTITION_TYPE)vp9_read_tree(r, vp9_partition_tree, probs); else if (!has_rows && has_cols) p = vp9_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ; else if (has_rows && !has_cols) @@ -481,8 +444,7 @@ static void decode_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd, // update partition context if (bsize >= BLOCK_8X8 && (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) - update_partition_context(xd->above_seg_context, xd->left_seg_context, - mi_row, mi_col, subsize, bsize); + update_partition_context(xd, mi_row, mi_col, subsize, bsize); } static void setup_token_decoder(const uint8_t *data, @@ -650,9 +612,7 @@ static void setup_display_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { read_frame_size(rb, &cm->display_width, &cm->display_height); } -static void apply_frame_size(VP9D_COMP *pbi, int width, int height) { - VP9_COMMON *cm = &pbi->common; - +static void apply_frame_size(VP9_COMMON *cm, int width, int height) { if (cm->width != width || cm->height != height) { // Change in frame size. // TODO(agrange) Don't test width/height, check overall size. @@ -679,18 +639,15 @@ static void apply_frame_size(VP9D_COMP *pbi, int width, int height) { } } -static void setup_frame_size(VP9D_COMP *pbi, - struct vp9_read_bit_buffer *rb) { +static void setup_frame_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { int width, height; read_frame_size(rb, &width, &height); - apply_frame_size(pbi, width, height); - setup_display_size(&pbi->common, rb); + apply_frame_size(cm, width, height); + setup_display_size(cm, rb); } -static void setup_frame_size_with_refs(VP9D_COMP *pbi, +static void setup_frame_size_with_refs(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { - VP9_COMMON *const cm = &pbi->common; - int width, height; int found = 0, i; for (i = 0; i < REFS_PER_FRAME; ++i) { @@ -710,22 +667,11 @@ static void setup_frame_size_with_refs(VP9D_COMP *pbi, vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Referenced frame with invalid size"); - apply_frame_size(pbi, width, height); + apply_frame_size(cm, width, height); setup_display_size(cm, rb); } -static void setup_tile_context(VP9D_COMP *const pbi, MACROBLOCKD *const xd, - int tile_row, int tile_col) { - int i; - - for (i = 0; i < MAX_MB_PLANE; ++i) - xd->above_context[i] = pbi->above_context[i]; - - // see note in alloc_tile_storage(). - xd->above_seg_context = pbi->above_seg_context; -} - -static void decode_tile(VP9D_COMP *pbi, const TileInfo *const tile, +static void decode_tile(VP9Decoder *pbi, const TileInfo *const tile, vp9_reader *r) { const int num_threads = pbi->oxcf.max_threads; VP9_COMMON *const cm = &pbi->common; @@ -830,15 +776,15 @@ typedef struct TileBuffer { int col; // only used with multi-threaded decoding } TileBuffer; -static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { +static const uint8_t *decode_tiles(VP9Decoder *pbi, + const uint8_t *data, + const uint8_t *data_end) { VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; TileBuffer tile_buffers[4][1 << 6]; int tile_row, tile_col; - const uint8_t *const data_end = pbi->source + pbi->source_sz; const uint8_t *end = NULL; vp9_reader r; @@ -847,11 +793,11 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. - vpx_memset(pbi->above_context[0], 0, - sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * 2 * aligned_cols); + vpx_memset(cm->above_context, 0, + sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols); - vpx_memset(pbi->above_seg_context, 0, - sizeof(*pbi->above_seg_context) * aligned_cols); + vpx_memset(cm->above_seg_context, 0, + sizeof(*cm->above_seg_context) * aligned_cols); // Load tile data into tile_buffers for (tile_row = 0; tile_row < tile_rows; ++tile_row) { @@ -878,7 +824,6 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { vp9_tile_init(&tile, cm, tile_row, col); setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &r); - setup_tile_context(pbi, xd, tile_row, col); decode_tile(pbi, &tile, &r); if (last_tile) @@ -889,17 +834,6 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { return end; } -static void setup_tile_macroblockd(TileWorkerData *const tile_data) { - MACROBLOCKD *xd = &tile_data->xd; - struct macroblockd_plane *const pd = xd->plane; - int i; - - for (i = 0; i < MAX_MB_PLANE; ++i) { - pd[i].dqcoeff = tile_data->dqcoeff[i]; - vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t)); - } -} - static int tile_worker_hook(void *arg1, void *arg2) { TileWorkerData *const tile_data = (TileWorkerData*)arg1; const TileInfo *const tile = (TileInfo*)arg2; @@ -931,10 +865,11 @@ static int compare_tile_buffers(const void *a, const void *b) { } } -static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { +static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, + const uint8_t *data, + const uint8_t *data_end) { VP9_COMMON *const cm = &pbi->common; const uint8_t *bit_reader_end = NULL; - const uint8_t *const data_end = pbi->source + pbi->source_sz; const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; @@ -947,12 +882,16 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { assert(tile_rows == 1); (void)tile_rows; - if (num_workers > pbi->num_tile_workers) { + // TODO(jzern): See if we can remove the restriction of passing in max + // threads to the decoder. + if (pbi->num_tile_workers == 0) { + const int num_threads = pbi->oxcf.max_threads & ~1; int i; + // TODO(jzern): Allocate one less worker, as in the current code we only + // use num_threads - 1 workers. CHECK_MEM_ERROR(cm, pbi->tile_workers, - vpx_realloc(pbi->tile_workers, - num_workers * sizeof(*pbi->tile_workers))); - for (i = pbi->num_tile_workers; i < num_workers; ++i) { + vpx_malloc(num_threads * sizeof(*pbi->tile_workers))); + for (i = 0; i < num_threads; ++i) { VP9Worker *const worker = &pbi->tile_workers[i]; ++pbi->num_tile_workers; @@ -960,7 +899,7 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { CHECK_MEM_ERROR(cm, worker->data1, vpx_memalign(32, sizeof(TileWorkerData))); CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo))); - if (i < num_workers - 1 && !vp9_worker_reset(worker)) { + if (i < num_threads - 1 && !vp9_worker_reset(worker)) { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile decoder thread creation failed"); } @@ -968,17 +907,16 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { } // Reset tile decoding hook - for (n = 0; n < pbi->num_tile_workers; ++n) { + for (n = 0; n < num_workers; ++n) { pbi->tile_workers[n].hook = (VP9WorkerHook)tile_worker_hook; } // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. - vpx_memset(pbi->above_context[0], 0, - sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * - 2 * aligned_mi_cols); - vpx_memset(pbi->above_seg_context, 0, - sizeof(*pbi->above_seg_context) * aligned_mi_cols); + vpx_memset(cm->above_context, 0, + sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); + vpx_memset(cm->above_seg_context, 0, + sizeof(*cm->above_seg_context) * aligned_mi_cols); // Load tile data into tile_buffers for (n = 0; n < tile_cols; ++n) { @@ -1023,11 +961,10 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { tile_data->xd = pbi->mb; tile_data->xd.corrupted = 0; vp9_tile_init(tile, tile_data->cm, 0, buf->col); - setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &tile_data->bit_reader); - setup_tile_context(pbi, &tile_data->xd, 0, buf->col); - setup_tile_macroblockd(tile_data); + init_macroblockd(cm, &tile_data->xd); + vp9_zero(tile_data->xd.dqcoeff); worker->had_error = 0; if (i == num_workers - 1 || n == tile_cols - 1) { @@ -1072,12 +1009,13 @@ static void error_handler(void *data) { vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet"); } -#define RESERVED \ - if (vp9_rb_read_bit(rb)) \ - vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, \ - "Reserved bit must be unset") +static BITSTREAM_PROFILE read_profile(struct vp9_read_bit_buffer *rb) { + int profile = vp9_rb_read_bit(rb); + profile |= vp9_rb_read_bit(rb) << 1; + return (BITSTREAM_PROFILE) profile; +} -static size_t read_uncompressed_header(VP9D_COMP *pbi, +static size_t read_uncompressed_header(VP9Decoder *pbi, struct vp9_read_bit_buffer *rb) { VP9_COMMON *const cm = &pbi->common; size_t sz; @@ -1089,8 +1027,10 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid frame marker"); - cm->version = vp9_rb_read_bit(rb); - RESERVED; + cm->profile = read_profile(rb); + if (cm->profile >= MAX_PROFILES) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Unsupported bitstream profile"); cm->show_existing_frame = vp9_rb_read_bit(rb); if (cm->show_existing_frame) { @@ -1115,11 +1055,12 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, if (cm->frame_type == KEY_FRAME) { check_sync_code(cm, rb); - - cm->color_space = vp9_rb_read_literal(rb, 3); // colorspace + if (cm->profile > PROFILE_1) + cm->bit_depth = vp9_rb_read_bit(rb) ? BITS_12 : BITS_10; + cm->color_space = (COLOR_SPACE)vp9_rb_read_literal(rb, 3); if (cm->color_space != SRGB) { vp9_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range - if (cm->version == 1) { + if (cm->profile >= PROFILE_1) { cm->subsampling_x = vp9_rb_read_bit(rb); cm->subsampling_y = vp9_rb_read_bit(rb); vp9_rb_read_bit(rb); // has extra plane @@ -1127,7 +1068,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, cm->subsampling_y = cm->subsampling_x = 1; } } else { - if (cm->version == 1) { + if (cm->profile >= PROFILE_1) { cm->subsampling_y = cm->subsampling_x = 0; vp9_rb_read_bit(rb); // has extra plane } else { @@ -1143,7 +1084,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, cm->frame_refs[i].buf = get_frame_new_buffer(cm); } - setup_frame_size(pbi, rb); + setup_frame_size(cm, rb); } else { cm->intra_only = cm->show_frame ? 0 : vp9_rb_read_bit(rb); @@ -1154,7 +1095,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, check_sync_code(cm, rb); pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES); - setup_frame_size(pbi, rb); + setup_frame_size(cm, rb); } else { pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES); @@ -1166,7 +1107,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, cm->ref_frame_sign_bias[LAST_FRAME + i] = vp9_rb_read_bit(rb); } - setup_frame_size_with_refs(pbi, rb); + setup_frame_size_with_refs(cm, rb); cm->allow_high_precision_mv = vp9_rb_read_bit(rb); cm->interp_filter = read_interp_filter(rb); @@ -1214,7 +1155,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, return sz; } -static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data, +static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data, size_t partition_size) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; @@ -1314,14 +1255,12 @@ static void debug_check_frame_counts(const VP9_COMMON *const cm) { } #endif // NDEBUG -int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { - int i; +int vp9_decode_frame(VP9Decoder *pbi, + const uint8_t *data, const uint8_t *data_end, + const uint8_t **p_data_end) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; - const uint8_t *data = pbi->source; - const uint8_t *const data_end = pbi->source + pbi->source_sz; - struct vp9_read_bit_buffer rb = { data, data_end, 0, cm, error_handler }; const size_t first_partition_size = read_uncompressed_header(pbi, &rb); const int keyframe = cm->frame_type == KEY_FRAME; @@ -1347,7 +1286,8 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { pbi->do_loopfilter_inline = (cm->log2_tile_rows | cm->log2_tile_cols) == 0 && cm->lf.filter_level; if (pbi->do_loopfilter_inline && pbi->lf_worker.data1 == NULL) { - CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, vpx_malloc(sizeof(LFWorkerData))); + CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, + vpx_memalign(32, sizeof(LFWorkerData))); pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker; if (pbi->oxcf.max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, @@ -1355,21 +1295,15 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { } } - alloc_tile_storage(pbi, tile_rows, tile_cols); - - xd->mode_info_stride = cm->mode_info_stride; - if (cm->coding_use_prev_mi) - set_prev_mi(cm); - else - cm->prev_mi = NULL; + init_macroblockd(cm, &pbi->mb); + cm->prev_mi = get_prev_mi(cm); setup_plane_dequants(cm, xd, cm->base_qindex); vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y); cm->fc = cm->frame_contexts[cm->frame_context_idx]; vp9_zero(cm->counts); - for (i = 0; i < MAX_MB_PLANE; ++i) - vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t)); + vp9_zero(xd->dqcoeff); xd->corrupted = 0; new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size); @@ -1378,9 +1312,9 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { // single-frame tile decoding. if (pbi->oxcf.max_threads > 1 && tile_rows == 1 && tile_cols > 1 && cm->frame_parallel_decoding_mode) { - *p_data_end = decode_tiles_mt(pbi, data + first_partition_size); + *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); } else { - *p_data_end = decode_tiles(pbi, data + first_partition_size); + *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end); } new_fb->corrupted |= xd->corrupted; diff --git a/libvpx/vp9/decoder/vp9_decodeframe.h b/libvpx/vp9/decoder/vp9_decodeframe.h index 4537bc8..8a19daf 100644 --- a/libvpx/vp9/decoder/vp9_decodeframe.h +++ b/libvpx/vp9/decoder/vp9_decodeframe.h @@ -17,10 +17,13 @@ extern "C" { #endif struct VP9Common; -struct VP9Decompressor; +struct VP9Decoder; void vp9_init_dequantizer(struct VP9Common *cm); -int vp9_decode_frame(struct VP9Decompressor *cpi, const uint8_t **p_data_end); + +int vp9_decode_frame(struct VP9Decoder *pbi, + const uint8_t *data, const uint8_t *data_end, + const uint8_t **p_data_end); #ifdef __cplusplus } // extern "C" diff --git a/libvpx/vp9/decoder/vp9_decodemv.c b/libvpx/vp9/decoder/vp9_decodemv.c index 06a21ea..3618f12 100644 --- a/libvpx/vp9/decoder/vp9_decodemv.c +++ b/libvpx/vp9/decoder/vp9_decodemv.c @@ -63,7 +63,7 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, TX_SIZE max_tx_size, vp9_reader *r) { const int ctx = vp9_get_tx_size_context(xd); const vp9_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc.tx_probs); - TX_SIZE tx_size = (TX_SIZE)vp9_read(r, tx_probs[0]); + int tx_size = vp9_read(r, tx_probs[0]); if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) { tx_size += vp9_read(r, tx_probs[1]); if (tx_size != TX_8X8 && max_tx_size >= TX_32X32) @@ -72,7 +72,7 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, if (!cm->frame_parallel_decoding_mode) ++get_tx_counts(max_tx_size, ctx, &cm->counts.tx)[tx_size]; - return tx_size; + return (TX_SIZE)tx_size; } static TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, TX_MODE tx_mode, @@ -104,7 +104,7 @@ static int read_intra_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, vp9_reader *r) { struct segmentation *const seg = &cm->seg; - const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type; + const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; int segment_id; if (!seg->enabled) @@ -121,7 +121,7 @@ static int read_intra_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, vp9_reader *r) { struct segmentation *const seg = &cm->seg; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const BLOCK_SIZE bsize = mbmi->sb_type; int predicted_segment_id, segment_id; @@ -161,10 +161,10 @@ static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd, static void read_intra_frame_mode_info(VP9_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, vp9_reader *r) { - MODE_INFO *const mi = xd->mi_8x8[0]; + MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; - const MODE_INFO *above_mi = xd->mi_8x8[-cm->mode_info_stride]; - const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL; + const MODE_INFO *above_mi = xd->mi[-cm->mi_stride]; + const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL; const BLOCK_SIZE bsize = mbmi->sb_type; int i; @@ -237,14 +237,15 @@ static int read_mv_component(vp9_reader *r, static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref, const nmv_context *ctx, nmv_context_counts *counts, int allow_hp) { - const MV_JOINT_TYPE j = vp9_read_tree(r, vp9_mv_joint_tree, ctx->joints); + const MV_JOINT_TYPE joint_type = + (MV_JOINT_TYPE)vp9_read_tree(r, vp9_mv_joint_tree, ctx->joints); const int use_hp = allow_hp && vp9_use_mv_hp(ref); MV diff = {0, 0}; - if (mv_joint_vertical(j)) + if (mv_joint_vertical(joint_type)) diff.row = read_mv_component(r, &ctx->comps[0], use_hp); - if (mv_joint_horizontal(j)) + if (mv_joint_horizontal(joint_type)) diff.col = read_mv_component(r, &ctx->comps[1], use_hp); vp9_inc_mv(&diff, counts); @@ -276,7 +277,8 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, FRAME_COUNTS *const counts = &cm->counts; if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { - ref_frame[0] = vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME); + ref_frame[0] = (MV_REFERENCE_FRAME)vp9_get_segdata(&cm->seg, segment_id, + SEG_LVL_REF_FRAME); ref_frame[1] = NONE; } else { const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, r); @@ -513,7 +515,7 @@ static void read_inter_frame_mode_info(VP9_COMMON *const cm, MACROBLOCKD *const xd, const TileInfo *const tile, int mi_row, int mi_col, vp9_reader *r) { - MODE_INFO *const mi = xd->mi_8x8[0]; + MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; int inter_block; diff --git a/libvpx/vp9/decoder/vp9_decoder.c b/libvpx/vp9/decoder/vp9_decoder.c index ebb329c..fd74478 100644 --- a/libvpx/vp9/decoder/vp9_decoder.c +++ b/libvpx/vp9/decoder/vp9_decoder.c @@ -104,23 +104,14 @@ void vp9_initialize_dec() { static int init_done = 0; if (!init_done) { - vp9_initialize_common(); + vp9_init_neighbors(); vp9_init_quant_tables(); init_done = 1; } } -static void init_macroblockd(VP9D_COMP *const pbi) { - MACROBLOCKD *xd = &pbi->mb; - struct macroblockd_plane *const pd = xd->plane; - int i; - - for (i = 0; i < MAX_MB_PLANE; ++i) - pd[i].dqcoeff = pbi->dqcoeff[i]; -} - -VP9D_COMP *vp9_create_decompressor(VP9D_CONFIG *oxcf) { - VP9D_COMP *const pbi = vpx_memalign(32, sizeof(VP9D_COMP)); +VP9Decoder *vp9_decoder_create(const VP9D_CONFIG *oxcf) { + VP9Decoder *const pbi = vpx_memalign(32, sizeof(*pbi)); VP9_COMMON *const cm = pbi ? &pbi->common : NULL; if (!cm) @@ -128,12 +119,9 @@ VP9D_COMP *vp9_create_decompressor(VP9D_CONFIG *oxcf) { vp9_zero(*pbi); - // Initialize the references to not point to any frame buffers. - memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); - if (setjmp(cm->error.jmp)) { cm->error.setjmp = 0; - vp9_remove_decompressor(pbi); + vp9_decoder_remove(pbi); return NULL; } @@ -142,9 +130,13 @@ VP9D_COMP *vp9_create_decompressor(VP9D_CONFIG *oxcf) { vp9_rtcd(); + // Initialize the references to not point to any frame buffers. + vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); + + cm->current_video_frame = 0; pbi->oxcf = *oxcf; pbi->ready_for_new_data = 1; - cm->current_video_frame = 0; + pbi->decoded_key_frame = 0; // vp9_init_dequantizer() is first called here. Add check in // frame_init_dequantizer() to avoid unnecessary calling of @@ -154,22 +146,17 @@ VP9D_COMP *vp9_create_decompressor(VP9D_CONFIG *oxcf) { vp9_loop_filter_init(cm); cm->error.setjmp = 0; - pbi->decoded_key_frame = 0; - - init_macroblockd(pbi); vp9_worker_init(&pbi->lf_worker); return pbi; } -void vp9_remove_decompressor(VP9D_COMP *pbi) { +void vp9_decoder_remove(VP9Decoder *pbi) { + VP9_COMMON *const cm = &pbi->common; int i; - if (!pbi) - return; - - vp9_remove_common(&pbi->common); + vp9_remove_common(cm); vp9_worker_end(&pbi->lf_worker); vpx_free(pbi->lf_worker.data1); for (i = 0; i < pbi->num_tile_workers; ++i) { @@ -181,16 +168,11 @@ void vp9_remove_decompressor(VP9D_COMP *pbi) { vpx_free(pbi->tile_workers); if (pbi->num_tile_workers) { - VP9_COMMON *const cm = &pbi->common; const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; - VP9LfSync *const lf_sync = &pbi->lf_row_sync; - - vp9_loop_filter_dealloc(lf_sync, sb_rows); + vp9_loop_filter_dealloc(&pbi->lf_row_sync, sb_rows); } - vpx_free(pbi->above_context[0]); - vpx_free(pbi->above_seg_context); vpx_free(pbi); } @@ -200,7 +182,7 @@ static int equal_dimensions(const YV12_BUFFER_CONFIG *a, a->uv_height == b->uv_height && a->uv_width == b->uv_width; } -vpx_codec_err_t vp9_copy_reference_dec(VP9D_COMP *pbi, +vpx_codec_err_t vp9_copy_reference_dec(VP9Decoder *pbi, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) { VP9_COMMON *cm = &pbi->common; @@ -227,17 +209,15 @@ vpx_codec_err_t vp9_copy_reference_dec(VP9D_COMP *pbi, } -vpx_codec_err_t vp9_set_reference_dec(VP9D_COMP *pbi, +vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) { - VP9_COMMON *cm = &pbi->common; RefBuffer *ref_buf = NULL; - /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the - * encoder is using the frame buffers for. This is just a stub to keep the - * vpxenc --test-decode functionality working, and will be replaced in a - * later commit that adds VP9-specific controls for this functionality. - */ + // TODO(jkoleszar): The decoder doesn't have any real knowledge of what the + // encoder is using the frame buffers for. This is just a stub to keep the + // vpxenc --test-decode functionality working, and will be replaced in a + // later commit that adds VP9-specific controls for this functionality. if (ref_frame_flag == VP9_LAST_FLAG) { ref_buf = &cm->frame_refs[0]; } else if (ref_frame_flag == VP9_GOLD_FLAG) { @@ -245,13 +225,13 @@ vpx_codec_err_t vp9_set_reference_dec(VP9D_COMP *pbi, } else if (ref_frame_flag == VP9_ALT_FLAG) { ref_buf = &cm->frame_refs[2]; } else { - vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Invalid reference frame"); - return pbi->common.error.error_code; + return cm->error.error_code; } if (!equal_dimensions(ref_buf->buf, sd)) { - vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Incorrect buffer dimensions"); } else { int *ref_fb_ptr = &ref_buf->idx; @@ -268,11 +248,11 @@ vpx_codec_err_t vp9_set_reference_dec(VP9D_COMP *pbi, vp8_yv12_copy_frame(sd, ref_buf->buf); } - return pbi->common.error.error_code; + return cm->error.error_code; } -int vp9_get_reference_dec(VP9D_COMP *pbi, int index, YV12_BUFFER_CONFIG **fb) { +int vp9_get_reference_dec(VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb) { VP9_COMMON *cm = &pbi->common; if (index < 0 || index >= REF_FRAMES) @@ -283,7 +263,7 @@ int vp9_get_reference_dec(VP9D_COMP *pbi, int index, YV12_BUFFER_CONFIG **fb) { } /* If any buffer updating is signaled it should be done here. */ -static void swap_frame_buffers(VP9D_COMP *pbi) { +static void swap_frame_buffers(VP9Decoder *pbi) { int ref_index = 0, mask; VP9_COMMON *const cm = &pbi->common; @@ -307,35 +287,24 @@ static void swap_frame_buffers(VP9D_COMP *pbi) { cm->frame_refs[ref_index].idx = INT_MAX; } -int vp9_receive_compressed_data(VP9D_COMP *pbi, +int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size, const uint8_t **psource, int64_t time_stamp) { - VP9_COMMON *cm = NULL; + VP9_COMMON *const cm = &pbi->common; const uint8_t *source = *psource; int retcode = 0; - /*if(pbi->ready_for_new_data == 0) - return -1;*/ - - if (!pbi) - return -1; - - cm = &pbi->common; cm->error.error_code = VPX_CODEC_OK; - pbi->source = source; - pbi->source_sz = size; - - if (pbi->source_sz == 0) { - /* This is used to signal that we are missing frames. - * We do not know if the missing frame(s) was supposed to update - * any of the reference buffers, but we act conservative and - * mark only the last buffer as corrupted. - * - * TODO(jkoleszar): Error concealment is undefined and non-normative - * at this point, but if it becomes so, [0] may not always be the correct - * thing to do here. - */ + if (size == 0) { + // This is used to signal that we are missing frames. + // We do not know if the missing frame(s) was supposed to update + // any of the reference buffers, but we act conservative and + // mark only the last buffer as corrupted. + // + // TODO(jkoleszar): Error concealment is undefined and non-normative + // at this point, but if it becomes so, [0] may not always be the correct + // thing to do here. if (cm->frame_refs[0].idx != INT_MAX) cm->frame_refs[0].buf->corrupted = 1; } @@ -349,14 +318,13 @@ int vp9_receive_compressed_data(VP9D_COMP *pbi, if (setjmp(cm->error.jmp)) { cm->error.setjmp = 0; - /* We do not know if the missing frame(s) was supposed to update - * any of the reference buffers, but we act conservative and - * mark only the last buffer as corrupted. - * - * TODO(jkoleszar): Error concealment is undefined and non-normative - * at this point, but if it becomes so, [0] may not always be the correct - * thing to do here. - */ + // We do not know if the missing frame(s) was supposed to update + // any of the reference buffers, but we act conservative and + // mark only the last buffer as corrupted. + // + // TODO(jkoleszar): Error concealment is undefined and non-normative + // at this point, but if it becomes so, [0] may not always be the correct + // thing to do here. if (cm->frame_refs[0].idx != INT_MAX) cm->frame_refs[0].buf->corrupted = 1; @@ -368,7 +336,7 @@ int vp9_receive_compressed_data(VP9D_COMP *pbi, cm->error.setjmp = 1; - retcode = vp9_decode_frame(pbi, psource); + retcode = vp9_decode_frame(pbi, source, source + size, psource); if (retcode < 0) { cm->error.error_code = VPX_CODEC_ERROR; @@ -430,13 +398,12 @@ int vp9_receive_compressed_data(VP9D_COMP *pbi, pbi->ready_for_new_data = 0; pbi->last_time_stamp = time_stamp; - pbi->source_sz = 0; cm->error.setjmp = 0; return retcode; } -int vp9_get_raw_frame(VP9D_COMP *pbi, YV12_BUFFER_CONFIG *sd, +int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp9_ppflags_t *flags) { int ret = -1; @@ -455,19 +422,12 @@ int vp9_get_raw_frame(VP9D_COMP *pbi, YV12_BUFFER_CONFIG *sd, #if CONFIG_VP9_POSTPROC ret = vp9_post_proc_frame(&pbi->common, sd, flags); #else - - if (pbi->common.frame_to_show) { *sd = *pbi->common.frame_to_show; sd->y_width = pbi->common.width; sd->y_height = pbi->common.height; sd->uv_width = sd->y_width >> pbi->common.subsampling_x; sd->uv_height = sd->y_height >> pbi->common.subsampling_y; - ret = 0; - } else { - ret = -1; - } - #endif /*!CONFIG_POSTPROC*/ vp9_clear_system_state(); return ret; diff --git a/libvpx/vp9/decoder/vp9_decoder.h b/libvpx/vp9/decoder/vp9_decoder.h index 4cbff45..c9dc251 100644 --- a/libvpx/vp9/decoder/vp9_decoder.h +++ b/libvpx/vp9/decoder/vp9_decoder.h @@ -31,30 +31,17 @@ typedef struct { int width; int height; int version; - int postprocess; int max_threads; int inv_tile_order; - int input_partition; } VP9D_CONFIG; -typedef enum { - VP9_LAST_FLAG = 1, - VP9_GOLD_FLAG = 2, - VP9_ALT_FLAG = 4 -} VP9_REFFRAME; - -typedef struct VP9Decompressor { +typedef struct VP9Decoder { DECLARE_ALIGNED(16, MACROBLOCKD, mb); DECLARE_ALIGNED(16, VP9_COMMON, common); - DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]); - VP9D_CONFIG oxcf; - const uint8_t *source; - size_t source_sz; - int64_t last_time_stamp; int ready_for_new_data; @@ -72,37 +59,34 @@ typedef struct VP9Decompressor { int num_tile_workers; VP9LfSync lf_row_sync; - - ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; - PARTITION_CONTEXT *above_seg_context; -} VP9D_COMP; +} VP9Decoder; void vp9_initialize_dec(); -int vp9_receive_compressed_data(struct VP9Decompressor *pbi, +int vp9_receive_compressed_data(struct VP9Decoder *pbi, size_t size, const uint8_t **dest, int64_t time_stamp); -int vp9_get_raw_frame(struct VP9Decompressor *pbi, +int vp9_get_raw_frame(struct VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp9_ppflags_t *flags); -vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decompressor *pbi, +vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decoder *pbi, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); -vpx_codec_err_t vp9_set_reference_dec(struct VP9Decompressor *pbi, +vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); -int vp9_get_reference_dec(struct VP9Decompressor *pbi, +int vp9_get_reference_dec(struct VP9Decoder *pbi, int index, YV12_BUFFER_CONFIG **fb); -struct VP9Decompressor *vp9_create_decompressor(VP9D_CONFIG *oxcf); +struct VP9Decoder *vp9_decoder_create(const VP9D_CONFIG *oxcf); -void vp9_remove_decompressor(struct VP9Decompressor *pbi); +void vp9_decoder_remove(struct VP9Decoder *pbi); #ifdef __cplusplus } // extern "C" diff --git a/libvpx/vp9/decoder/vp9_detokenize.c b/libvpx/vp9/decoder/vp9_detokenize.c index 52e78cd..860da53 100644 --- a/libvpx/vp9/decoder/vp9_detokenize.c +++ b/libvpx/vp9/decoder/vp9_detokenize.c @@ -86,7 +86,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type, const int max_eob = 16 << (tx_size << 1); const FRAME_CONTEXT *const fc = &cm->fc; FRAME_COUNTS *const counts = &cm->counts; - const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi); + const int ref = is_inter_block(&xd->mi[0]->mbmi); int band, c = 0; const vp9_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] = fc->coef_probs[tx_size][type][ref]; diff --git a/libvpx/vp9/decoder/vp9_dthread.c b/libvpx/vp9/decoder/vp9_dthread.c index 4df8509..9b124c9 100644 --- a/libvpx/vp9/decoder/vp9_dthread.c +++ b/libvpx/vp9/decoder/vp9_dthread.c @@ -99,7 +99,7 @@ static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer, for (r = start; r < stop; r += num_lf_workers) { const int mi_row = r << MI_BLOCK_SIZE_LOG2; - MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mode_info_stride; + MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mi_stride; for (c = 0; c < sb_cols; ++c) { const int mi_col = c << MI_BLOCK_SIZE_LOG2; @@ -108,8 +108,7 @@ static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer, sync_read(lf_sync, r, c); vp9_setup_dst_planes(xd, frame_buffer, mi_row, mi_col); - vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mode_info_stride, - &lfm); + vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mi_stride, &lfm); for (plane = 0; plane < num_planes; ++plane) { vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm); @@ -133,13 +132,15 @@ static int loop_filter_row_worker(void *arg1, void *arg2) { // VP9 decoder: Implement multi-threaded loopfilter that uses the tile // threads. -void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, +void vp9_loop_filter_frame_mt(VP9Decoder *pbi, VP9_COMMON *cm, MACROBLOCKD *xd, int frame_filter_level, int y_only, int partial_frame) { // Number of superblock rows and cols const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; + const int tile_cols = 1 << cm->log2_tile_cols; + const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols); int i; // Allocate memory used in thread synchronization. @@ -169,7 +170,16 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, sizeof(*pbi->lf_row_sync.cur_sb_col) * sb_rows); // Set up loopfilter thread data. - for (i = 0; i < pbi->num_tile_workers; ++i) { + // The decoder is using num_workers instead of pbi->num_tile_workers + // because it has been observed that using more threads on the + // loopfilter, than there are tile columns in the frame will hurt + // performance on Android. This is because the system will only + // schedule the tile decode workers on cores equal to the number + // of tile columns. Then if the decoder tries to use more threads for the + // loopfilter, it will hurt performance because of contention. If the + // multithreading code changes in the future then the number of workers + // used by the loopfilter should be revisited. + for (i = 0; i < num_workers; ++i) { VP9Worker *const worker = &pbi->tile_workers[i]; TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; LFWorkerData *const lf_data = &tile_data->lfdata; @@ -185,10 +195,10 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, lf_data->y_only = y_only; // always do all planes in decoder lf_data->lf_sync = &pbi->lf_row_sync; - lf_data->num_lf_workers = pbi->num_tile_workers; + lf_data->num_lf_workers = num_workers; // Start loopfiltering - if (i == pbi->num_tile_workers - 1) { + if (i == num_workers - 1) { vp9_worker_execute(worker); } else { vp9_worker_launch(worker); @@ -196,7 +206,7 @@ void vp9_loop_filter_frame_mt(VP9D_COMP *pbi, } // Wait till all rows are finished - for (i = 0; i < pbi->num_tile_workers; ++i) { + for (i = 0; i < num_workers; ++i) { vp9_worker_sync(&pbi->tile_workers[i]); } } diff --git a/libvpx/vp9/decoder/vp9_dthread.h b/libvpx/vp9/decoder/vp9_dthread.h index 6d4450f..005bd7b 100644 --- a/libvpx/vp9/decoder/vp9_dthread.h +++ b/libvpx/vp9/decoder/vp9_dthread.h @@ -18,13 +18,12 @@ struct macroblockd; struct VP9Common; -struct VP9Decompressor; +struct VP9Decoder; typedef struct TileWorkerData { struct VP9Common *cm; vp9_reader bit_reader; DECLARE_ALIGNED(16, struct macroblockd, xd); - DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]); // Row-based parallel loopfilter data LFWorkerData lfdata; @@ -51,7 +50,7 @@ void vp9_loop_filter_alloc(struct VP9Common *cm, struct VP9LfSyncData *lf_sync, void vp9_loop_filter_dealloc(struct VP9LfSyncData *lf_sync, int rows); // Multi-threaded loopfilter that uses the tile threads. -void vp9_loop_filter_frame_mt(struct VP9Decompressor *pbi, +void vp9_loop_filter_frame_mt(struct VP9Decoder *pbi, struct VP9Common *cm, struct macroblockd *xd, int frame_filter_level, diff --git a/libvpx/vp9/encoder/vp9_aq_complexity.c b/libvpx/vp9/encoder/vp9_aq_complexity.c new file mode 100644 index 0000000..47ad8d8 --- /dev/null +++ b/libvpx/vp9/encoder/vp9_aq_complexity.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <limits.h> +#include <math.h> + +#include "vp9/common/vp9_seg_common.h" + +#include "vp9/encoder/vp9_segmentation.h" + +static const double in_frame_q_adj_ratio[MAX_SEGMENTS] = + {1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + +void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + struct segmentation *const seg = &cm->seg; + + // Make SURE use of floating point in this function is safe. + vp9_clear_system_state(); + + if (cm->frame_type == KEY_FRAME || + cpi->refresh_alt_ref_frame || + (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { + int segment; + + // Clear down the segment map. + vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); + + // Clear down the complexity map used for rd. + vpx_memset(cpi->complexity_map, 0, cm->mi_rows * cm->mi_cols); + + vp9_enable_segmentation(seg); + vp9_clearall_segfeatures(seg); + + // Select delta coding method. + seg->abs_delta = SEGMENT_DELTADATA; + + // Segment 0 "Q" feature is disabled so it defaults to the baseline Q. + vp9_disable_segfeature(seg, 0, SEG_LVL_ALT_Q); + + // Use some of the segments for in frame Q adjustment. + for (segment = 1; segment < 2; segment++) { + const int qindex_delta = + vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex, + in_frame_q_adj_ratio[segment]); + vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q); + vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta); + } + } +} + +// Select a segment for the current SB64 +void vp9_select_in_frame_q_segment(VP9_COMP *cpi, + int mi_row, int mi_col, + int output_enabled, int projected_rate) { + VP9_COMMON *const cm = &cpi->common; + + const int mi_offset = mi_row * cm->mi_cols + mi_col; + const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; + const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; + const int xmis = MIN(cm->mi_cols - mi_col, bw); + const int ymis = MIN(cm->mi_rows - mi_row, bh); + int complexity_metric = 64; + int x, y; + + unsigned char segment; + + if (!output_enabled) { + segment = 0; + } else { + // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh). + // It is converted to bits * 256 units. + const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) / + (bw * bh); + + if (projected_rate < (target_rate / 4)) { + segment = 1; + } else { + segment = 0; + } + + if (target_rate > 0) { + complexity_metric = + clamp((int)((projected_rate * 64) / target_rate), 16, 255); + } + } + + // Fill in the entires in the segment map corresponding to this SB64. + for (y = 0; y < ymis; y++) { + for (x = 0; x < xmis; x++) { + cpi->segmentation_map[mi_offset + y * cm->mi_cols + x] = segment; + cpi->complexity_map[mi_offset + y * cm->mi_cols + x] = + (unsigned char)complexity_metric; + } + } +} diff --git a/libvpx/vp9/encoder/vp9_aq_complexity.h b/libvpx/vp9/encoder/vp9_aq_complexity.h new file mode 100644 index 0000000..af031a4 --- /dev/null +++ b/libvpx/vp9/encoder/vp9_aq_complexity.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ +#define VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP9_COMP; + +// Select a segment for the current SB64. +void vp9_select_in_frame_q_segment(struct VP9_COMP *cpi, int mi_row, int mi_col, + int output_enabled, int projected_rate); + + +// This function sets up a set of segments with delta Q values around +// the baseline frame quantizer. +void vp9_setup_in_frame_q_adj(struct VP9_COMP *cpi); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ diff --git a/libvpx/vp9/encoder/vp9_craq.c b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c index 40437c7..7879091 100644 --- a/libvpx/vp9/encoder/vp9_craq.c +++ b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -11,7 +11,7 @@ #include <limits.h> #include <math.h> -#include "vp9/encoder/vp9_craq.h" +#include "vp9/encoder/vp9_aq_cyclicrefresh.h" #include "vp9/common/vp9_seg_common.h" @@ -19,19 +19,69 @@ #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_segmentation.h" +struct CYCLIC_REFRESH { + // Percentage of super-blocks per frame that are targeted as candidates + // for cyclic refresh. + int max_sbs_perframe; + // Maximum q-delta as percentage of base q. + int max_qdelta_perc; + // Block size below which we don't apply cyclic refresh. + BLOCK_SIZE min_block_size; + // Superblock starting index for cycling through the frame. + int sb_index; + // Controls how long a block will need to wait to be refreshed again. + int time_for_refresh; + // Actual number of (8x8) blocks that were applied delta-q (segment 1). + int num_seg_blocks; + // Actual encoding bits for segment 1. + int actual_seg_bits; + // RD mult. parameters for segment 1. + int rdmult; + // Cyclic refresh map. + signed char *map; + // Projected rate and distortion for the current superblock. + int64_t projected_rate_sb; + int64_t projected_dist_sb; + // Thresholds applied to projected rate/distortion of the superblock. + int64_t thresh_rate_sb; + int64_t thresh_dist_sb; +}; + +CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) { + CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr)); + if (cr == NULL) + return NULL; + + cr->map = vpx_calloc(mi_rows * mi_cols, sizeof(*cr->map)); + if (cr->map == NULL) { + vpx_free(cr); + return NULL; + } + + return cr; +} + +void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) { + vpx_free(cr->map); + vpx_free(cr); +} // Check if we should turn off cyclic refresh based on bitrate condition. -static int apply_cyclic_refresh_bitrate(VP9_COMP *const cpi) { +static int apply_cyclic_refresh_bitrate(const VP9_COMMON *cm, + const RATE_CONTROL *rc) { // Turn off cyclic refresh if bits available per frame is not sufficiently // larger than bit cost of segmentation. Segment map bit cost should scale // with number of seg blocks, so compare available bits to number of blocks. // Average bits available per frame = av_per_frame_bandwidth // Number of (8x8) blocks in frame = mi_rows * mi_cols; - float factor = 0.5; - int number_blocks = cpi->common.mi_rows * cpi->common.mi_cols; + const float factor = 0.5; + const int number_blocks = cm->mi_rows * cm->mi_cols; // The condition below corresponds to turning off at target bitrates: // ~24kbps for CIF, 72kbps for VGA (at 30fps). - if (cpi->rc.av_per_frame_bandwidth < factor * number_blocks) + // Also turn off at very small frame sizes, to avoid too large fraction of + // superblocks to be refreshed per frame. Threshold below is less than QCIF. + if (rc->av_per_frame_bandwidth < factor * number_blocks || + number_blocks / 64 < 5) return 0; else return 1; @@ -41,11 +91,9 @@ static int apply_cyclic_refresh_bitrate(VP9_COMP *const cpi) { // (lower-qp coding). Decision can be based on various factors, such as // size of the coding block (i.e., below min_block size rejected), coding // mode, and rate/distortion. -static int candidate_refresh_aq(VP9_COMP *const cpi, - MODE_INFO *const mi, - int bsize, - int use_rd) { - CYCLIC_REFRESH *const cr = &cpi->cyclic_refresh; +static int candidate_refresh_aq(const CYCLIC_REFRESH *cr, + const MB_MODE_INFO *mbmi, + BLOCK_SIZE bsize, int use_rd) { if (use_rd) { // If projected rate is below the thresh_rate (well below target, // so undershoot expected), accept it for lower-qp coding. @@ -56,18 +104,18 @@ static int candidate_refresh_aq(VP9_COMP *const cpi, // 2) mode is non-zero mv and projected distortion is above thresh_dist // 3) mode is an intra-mode (we may want to allow some of this under // another thresh_dist) - else if ((bsize < cr->min_block_size) || - (mi->mbmi.mv[0].as_int != 0 && - cr->projected_dist_sb > cr->thresh_dist_sb) || - !is_inter_block(&mi->mbmi)) + else if (bsize < cr->min_block_size || + (mbmi->mv[0].as_int != 0 && + cr->projected_dist_sb > cr->thresh_dist_sb) || + !is_inter_block(mbmi)) return 0; else return 1; } else { // Rate/distortion not used for update. - if ((bsize < cr->min_block_size) || - (mi->mbmi.mv[0].as_int != 0) || - !is_inter_block(&mi->mbmi)) + if (bsize < cr->min_block_size || + mbmi->mv[0].as_int != 0 || + !is_inter_block(mbmi)) return 0; else return 1; @@ -77,33 +125,31 @@ static int candidate_refresh_aq(VP9_COMP *const cpi, // Prior to coding a given prediction block, of size bsize at (mi_row, mi_col), // check if we should reset the segment_id, and update the cyclic_refresh map // and segmentation map. -void vp9_update_segment_aq(VP9_COMP *const cpi, - MODE_INFO *const mi, - int mi_row, - int mi_col, - int bsize, - int use_rd) { - CYCLIC_REFRESH *const cr = &cpi->cyclic_refresh; - VP9_COMMON *const cm = &cpi->common; +void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, + MB_MODE_INFO *const mbmi, + int mi_row, int mi_col, + BLOCK_SIZE bsize, int use_rd) { + const VP9_COMMON *const cm = &cpi->common; + CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; const int bw = num_8x8_blocks_wide_lookup[bsize]; const int bh = num_8x8_blocks_high_lookup[bsize]; const int xmis = MIN(cm->mi_cols - mi_col, bw); const int ymis = MIN(cm->mi_rows - mi_row, bh); const int block_index = mi_row * cm->mi_cols + mi_col; + const int refresh_this_block = candidate_refresh_aq(cr, mbmi, bsize, use_rd); // Default is to not update the refresh map. int new_map_value = cr->map[block_index]; int x = 0; int y = 0; - int current_segment = mi->mbmi.segment_id; - int refresh_this_block = candidate_refresh_aq(cpi, mi, bsize, use_rd); + // Check if we should reset the segment_id for this block. - if (current_segment && !refresh_this_block) - mi->mbmi.segment_id = 0; + if (mbmi->segment_id > 0 && !refresh_this_block) + mbmi->segment_id = 0; // Update the cyclic refresh map, to be used for setting segmentation map // for the next frame. If the block will be refreshed this frame, mark it // as clean. The magnitude of the -ve influences how long before we consider // it for refresh again. - if (mi->mbmi.segment_id == 1) { + if (mbmi->segment_id == 1) { new_map_value = -cr->time_for_refresh; } else if (refresh_this_block) { // Else if it is accepted as candidate for refresh, and has not already @@ -121,54 +167,54 @@ void vp9_update_segment_aq(VP9_COMP *const cpi, for (x = 0; x < xmis; x++) { cr->map[block_index + y * cm->mi_cols + x] = new_map_value; cpi->segmentation_map[block_index + y * cm->mi_cols + x] = - mi->mbmi.segment_id; + mbmi->segment_id; } // Keep track of actual number (in units of 8x8) of blocks in segment 1 used // for encoding this frame. - if (mi->mbmi.segment_id) + if (mbmi->segment_id) cr->num_seg_blocks += xmis * ymis; } // Setup cyclic background refresh: set delta q and segmentation map. -void vp9_setup_cyclic_refresh_aq(VP9_COMP *const cpi) { +void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; - CYCLIC_REFRESH *const cr = &cpi->cyclic_refresh; + const RATE_CONTROL *const rc = &cpi->rc; + CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; struct segmentation *const seg = &cm->seg; - unsigned char *seg_map = cpi->segmentation_map; - int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cpi); + unsigned char *const seg_map = cpi->segmentation_map; + const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc); // Don't apply refresh on key frame or enhancement layer frames. if (!apply_cyclic_refresh || - (cpi->common.frame_type == KEY_FRAME) || + (cm->frame_type == KEY_FRAME) || (cpi->svc.temporal_layer_id > 0)) { // Set segmentation map to 0 and disable. vpx_memset(seg_map, 0, cm->mi_rows * cm->mi_cols); vp9_disable_segmentation(&cm->seg); - if (cpi->common.frame_type == KEY_FRAME) - cr->mb_index = 0; + if (cm->frame_type == KEY_FRAME) + cr->sb_index = 0; return; } else { int qindex_delta = 0; - int mbs_in_frame = cm->mi_rows * cm->mi_cols; - int i, x, y, block_count, bl_index, bl_index2; - int sum_map, new_value, mi_row, mi_col, xmis, ymis, qindex2; + int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame; + int xmis, ymis, x, y, qindex2; // Rate target ratio to set q delta. - float rate_ratio_qdelta = 2.0; + const float rate_ratio_qdelta = 2.0; vp9_clear_system_state(); // Some of these parameters may be set via codec-control function later. - cr->max_mbs_perframe = 10; + cr->max_sbs_perframe = 10; cr->max_qdelta_perc = 50; - cr->min_block_size = BLOCK_16X16; + cr->min_block_size = BLOCK_8X8; cr->time_for_refresh = 1; // Set rate threshold to some fraction of target (and scaled by 256). - cr->thresh_rate_sb = (cpi->rc.sb64_target_rate * 256) >> 2; + cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 2; // Distortion threshold, quadratic in Q, scale factor to be adjusted. cr->thresh_dist_sb = 8 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) * vp9_convert_qindex_to_q(cm->base_qindex)); if (cpi->sf.use_nonrd_pick_mode) { // May want to be more conservative with thresholds in non-rd mode for now // as rate/distortion are derived from model based on prediction residual. - cr->thresh_rate_sb = (cpi->rc.sb64_target_rate * 256) >> 3; + cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 3; cr->thresh_dist_sb = 4 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) * vp9_convert_qindex_to_q(cm->base_qindex)); } @@ -195,73 +241,84 @@ void vp9_setup_cyclic_refresh_aq(VP9_COMP *const cpi) { vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); // Set the q delta for segment 1. - qindex_delta = vp9_compute_qdelta_by_rate(cpi, + qindex_delta = vp9_compute_qdelta_by_rate(rc, cm->frame_type, cm->base_qindex, rate_ratio_qdelta); // TODO(marpan): Incorporate the actual-vs-target rate over/undershoot from // previous encoded frame. - if ((-qindex_delta) > cr->max_qdelta_perc * cm->base_qindex / 100) { + if (-qindex_delta > cr->max_qdelta_perc * cm->base_qindex / 100) qindex_delta = -cr->max_qdelta_perc * cm->base_qindex / 100; - } // Compute rd-mult for segment 1. qindex2 = clamp(cm->base_qindex + cm->y_dc_delta_q + qindex_delta, 0, MAXQ); cr->rdmult = vp9_compute_rd_mult(cpi, qindex2); vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qindex_delta); - // Number of target macroblocks to get the q delta (segment 1). - block_count = cr->max_mbs_perframe * mbs_in_frame / 100; - // Set the segmentation map: cycle through the macroblocks, starting at + + sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; + sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; + sbs_in_frame = sb_cols * sb_rows; + // Number of target superblocks to get the q delta (segment 1). + block_count = cr->max_sbs_perframe * sbs_in_frame / 100; + // Set the segmentation map: cycle through the superblocks, starting at // cr->mb_index, and stopping when either block_count blocks have been found // to be refreshed, or we have passed through whole frame. - // Note the setting of seg_map below is done in two steps (one over 8x8) - // and then another over SB, in order to keep the value constant over SB. - // TODO(marpan): Do this in one pass in SB order. - assert(cr->mb_index < mbs_in_frame); - i = cr->mb_index; + assert(cr->sb_index < sbs_in_frame); + i = cr->sb_index; do { - // If the macroblock is as a candidate for clean up then mark it - // for possible boost/refresh (segment 1). The segment id may get reset to - // 0 later if the macroblock gets coded anything other than ZEROMV. - if (cr->map[i] == 0) { - seg_map[i] = 1; - block_count--; - } else if (cr->map[i] < 0) { - cr->map[i]++; + int sum_map = 0; + // Get the mi_row/mi_col corresponding to superblock index i. + int sb_row_index = (i / sb_cols); + int sb_col_index = i - sb_row_index * sb_cols; + int mi_row = sb_row_index * MI_BLOCK_SIZE; + int mi_col = sb_col_index * MI_BLOCK_SIZE; + assert(mi_row >= 0 && mi_row < cm->mi_rows); + assert(mi_col >= 0 && mi_col < cm->mi_cols); + bl_index = mi_row * cm->mi_cols + mi_col; + // Loop through all 8x8 blocks in superblock and update map. + xmis = MIN(cm->mi_cols - mi_col, + num_8x8_blocks_wide_lookup[BLOCK_64X64]); + ymis = MIN(cm->mi_rows - mi_row, + num_8x8_blocks_high_lookup[BLOCK_64X64]); + for (y = 0; y < ymis; y++) { + for (x = 0; x < xmis; x++) { + const int bl_index2 = bl_index + y * cm->mi_cols + x; + // If the block is as a candidate for clean up then mark it + // for possible boost/refresh (segment 1). The segment id may get + // reset to 0 later if block gets coded anything other than ZEROMV. + if (cr->map[bl_index2] == 0) { + seg_map[bl_index2] = 1; + sum_map++; + } else if (cr->map[bl_index2] < 0) { + cr->map[bl_index2]++; + } + } + } + // Enforce constant segment over superblock. + // If segment is partial over superblock, reset to either all 1 or 0. + if (sum_map > 0 && sum_map < xmis * ymis) { + const int new_value = (sum_map >= xmis * ymis / 2); + for (y = 0; y < ymis; y++) + for (x = 0; x < xmis; x++) + seg_map[bl_index + y * cm->mi_cols + x] = new_value; } i++; - if (i == mbs_in_frame) { + if (i == sbs_in_frame) { i = 0; } - } while (block_count && i != cr->mb_index); - cr->mb_index = i; - // Enforce constant segment map over superblock. - for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { - bl_index = mi_row * cm->mi_cols + mi_col; - xmis = num_8x8_blocks_wide_lookup[BLOCK_64X64]; - ymis = num_8x8_blocks_high_lookup[BLOCK_64X64]; - xmis = MIN(cm->mi_cols - mi_col, xmis); - ymis = MIN(cm->mi_rows - mi_row, ymis); - sum_map = 0; - for (y = 0; y < ymis; y++) - for (x = 0; x < xmis; x++) { - bl_index2 = bl_index + y * cm->mi_cols + x; - sum_map += seg_map[bl_index2]; - } - new_value = 0; - // If segment is partial over superblock, reset. - if (sum_map > 0 && sum_map < xmis * ymis) { - if (sum_map < xmis * ymis / 2) - new_value = 0; - else - new_value = 1; - for (y = 0; y < ymis; y++) - for (x = 0; x < xmis; x++) { - bl_index2 = bl_index + y * cm->mi_cols + x; - seg_map[bl_index2] = new_value; - } - } - } + if (sum_map >= xmis * ymis /2) + block_count--; + } while (block_count && i != cr->sb_index); + cr->sb_index = i; } } + +void vp9_cyclic_refresh_set_rate_and_dist_sb(CYCLIC_REFRESH *cr, + int64_t rate_sb, int64_t dist_sb) { + cr->projected_rate_sb = rate_sb; + cr->projected_dist_sb = dist_sb; +} + +int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr) { + return cr->rdmult; +} diff --git a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h new file mode 100644 index 0000000..f556d65 --- /dev/null +++ b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ +#define VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ + +#include "vp9/common/vp9_blockd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP9_COMP; + +struct CYCLIC_REFRESH; +typedef struct CYCLIC_REFRESH CYCLIC_REFRESH; + +CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols); + +void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr); + +// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col), +// check if we should reset the segment_id, and update the cyclic_refresh map +// and segmentation map. +void vp9_cyclic_refresh_update_segment(struct VP9_COMP *const cpi, + MB_MODE_INFO *const mbmi, + int mi_row, int mi_col, + BLOCK_SIZE bsize, int use_rd); + +// Setup cyclic background refresh: set delta q and segmentation map. +void vp9_cyclic_refresh_setup(struct VP9_COMP *const cpi); + +void vp9_cyclic_refresh_set_rate_and_dist_sb(CYCLIC_REFRESH *cr, + int64_t rate_sb, int64_t dist_sb); + +int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ diff --git a/libvpx/vp9/encoder/vp9_vaq.c b/libvpx/vp9/encoder/vp9_aq_variance.c index c71c171..ae2a163 100644 --- a/libvpx/vp9/encoder/vp9_vaq.c +++ b/libvpx/vp9/encoder/vp9_aq_variance.c @@ -10,7 +10,7 @@ #include <math.h> -#include "vp9/encoder/vp9_vaq.h" +#include "vp9/encoder/vp9_aq_variance.h" #include "vp9/common/vp9_seg_common.h" @@ -99,7 +99,7 @@ void vp9_vaq_frame_setup(VP9_COMP *cpi) { continue; } - qindex_delta = vp9_compute_qdelta(cpi, base_q, base_q * Q_RATIO(i)); + qindex_delta = vp9_compute_qdelta(&cpi->rc, base_q, base_q * Q_RATIO(i)); vp9_set_segdata(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q, qindex_delta); vp9_enable_segfeature(seg, SEGMENT_ID(i), SEG_LVL_ALT_Q); diff --git a/libvpx/vp9/encoder/vp9_vaq.h b/libvpx/vp9/encoder/vp9_aq_variance.h index c73114a..381fe50 100644 --- a/libvpx/vp9/encoder/vp9_vaq.h +++ b/libvpx/vp9/encoder/vp9_aq_variance.h @@ -9,8 +9,8 @@ */ -#ifndef VP9_ENCODER_VP9_VAQ_H_ -#define VP9_ENCODER_VP9_VAQ_H_ +#ifndef VP9_ENCODER_VP9_AQ_VARIANCE_H_ +#define VP9_ENCODER_VP9_AQ_VARIANCE_H_ #include "vp9/encoder/vp9_onyx_int.h" @@ -31,4 +31,4 @@ int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs); } // extern "C" #endif -#endif // VP9_ENCODER_VP9_VAQ_H_ +#endif // VP9_ENCODER_VP9_AQ_VARIANCE_H_ diff --git a/libvpx/vp9/encoder/vp9_bitstream.c b/libvpx/vp9/encoder/vp9_bitstream.c index 1b4a6cc..8d2afb9 100644 --- a/libvpx/vp9/encoder/vp9_bitstream.c +++ b/libvpx/vp9/encoder/vp9_bitstream.c @@ -192,7 +192,7 @@ static void write_segment_id(vp9_writer *w, const struct segmentation *seg, static void write_ref_frames(const VP9_COMP *cpi, vp9_writer *w) { const VP9_COMMON *const cm = &cpi->common; const MACROBLOCKD *const xd = &cpi->mb.e_mbd; - const MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const int is_compound = has_second_ref(mbmi); const int segment_id = mbmi->segment_id; @@ -336,7 +336,7 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, MODE_INFO **mi_8x8, const MACROBLOCKD *const xd = &cpi->mb.e_mbd; const struct segmentation *const seg = &cm->seg; const MODE_INFO *const mi = mi_8x8[0]; - const MODE_INFO *const above_mi = mi_8x8[-xd->mode_info_stride]; + const MODE_INFO *const above_mi = mi_8x8[-xd->mi_stride]; const MODE_INFO *const left_mi = xd->left_available ? mi_8x8[-1] : NULL; const MB_MODE_INFO *const mbmi = &mi->mbmi; const BLOCK_SIZE bsize = mbmi->sb_type; @@ -375,15 +375,15 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCKD *const xd = &cpi->mb.e_mbd; MODE_INFO *m; - xd->mi_8x8 = cm->mi_grid_visible + (mi_row * cm->mode_info_stride + mi_col); - m = xd->mi_8x8[0]; + xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col); + m = xd->mi[0]; set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[m->mbmi.sb_type], mi_col, num_8x8_blocks_wide_lookup[m->mbmi.sb_type], cm->mi_rows, cm->mi_cols); if (frame_is_intra_only(cm)) { - write_mb_modes_kf(cpi, xd->mi_8x8, w); + write_mb_modes_kf(cpi, xd->mi, w); } else { pack_inter_mode_mvs(cpi, m, w); } @@ -392,12 +392,10 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile, pack_mb_tokens(w, tok, tok_end); } -static void write_partition(VP9_COMP *cpi, int hbs, int mi_row, int mi_col, +static void write_partition(VP9_COMMON *cm, MACROBLOCKD *xd, + int hbs, int mi_row, int mi_col, PARTITION_TYPE p, BLOCK_SIZE bsize, vp9_writer *w) { - VP9_COMMON *const cm = &cpi->common; - const int ctx = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); const vp9_prob *const probs = get_partition_probs(cm, ctx); const int has_rows = (mi_row + hbs) < cm->mi_rows; const int has_cols = (mi_col + hbs) < cm->mi_cols; @@ -415,21 +413,24 @@ static void write_partition(VP9_COMP *cpi, int hbs, int mi_row, int mi_col, } } -static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile, +static void write_modes_sb(VP9_COMP *cpi, + const TileInfo *const tile, vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end, int mi_row, int mi_col, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + const int bsl = b_width_log2(bsize); const int bs = (1 << bsl) / 4; PARTITION_TYPE partition; BLOCK_SIZE subsize; - MODE_INFO *m = cm->mi_grid_visible[mi_row * cm->mode_info_stride + mi_col]; + MODE_INFO *m = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; partition = partition_lookup[bsl][m->mbmi.sb_type]; - write_partition(cpi, bs, mi_row, mi_col, partition, bsize, w); + write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w); subsize = get_subsize(bsize, partition); if (subsize < BLOCK_8X8) { write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); @@ -465,29 +466,30 @@ static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile, // update partition context if (bsize >= BLOCK_8X8 && (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) - update_partition_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, subsize, bsize); + update_partition_context(xd, mi_row, mi_col, subsize, bsize); } -static void write_modes(VP9_COMP *cpi, const TileInfo *const tile, +static void write_modes(VP9_COMP *cpi, + const TileInfo *const tile, vp9_writer *w, TOKENEXTRA **tok, TOKENEXTRA *tok_end) { int mi_row, mi_col; for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; mi_row += MI_BLOCK_SIZE) { - vp9_zero(cpi->left_seg_context); + vp9_zero(cpi->mb.e_mbd.left_seg_context); for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, BLOCK_64X64); + write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, + BLOCK_64X64); } } -static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size) { +static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size, + vp9_coeff_stats *coef_branch_ct) { vp9_coeff_probs_model *coef_probs = cpi->frame_coef_probs[tx_size]; vp9_coeff_count *coef_counts = cpi->coef_counts[tx_size]; unsigned int (*eob_branch_ct)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] = cpi->common.counts.eob_branch[tx_size]; - vp9_coeff_stats *coef_branch_ct = cpi->frame_branch_ct[tx_size]; int i, j, k, l, m; for (i = 0; i < PLANE_TYPES; ++i) { @@ -510,16 +512,16 @@ static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size) { } static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi, - TX_SIZE tx_size) { + TX_SIZE tx_size, + vp9_coeff_stats *frame_branch_ct) { vp9_coeff_probs_model *new_frame_coef_probs = cpi->frame_coef_probs[tx_size]; vp9_coeff_probs_model *old_frame_coef_probs = cpi->common.fc.coef_probs[tx_size]; - vp9_coeff_stats *frame_branch_ct = cpi->frame_branch_ct[tx_size]; const vp9_prob upd = DIFF_UPDATE_PROB; const int entropy_nodes_update = UNCONSTRAINED_NODES; int i, j, k, l, t; switch (cpi->sf.use_fast_coef_updates) { - case 0: { + case TWO_LOOP: { /* dry run to see if there is any udpate at all needed */ int savings = 0; int update[2] = {0, 0}; @@ -594,14 +596,14 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi, return; } - case 1: - case 2: { + case ONE_LOOP: + case ONE_LOOP_REDUCED: { const int prev_coef_contexts_to_update = - cpi->sf.use_fast_coef_updates == 2 ? COEFF_CONTEXTS >> 1 - : COEFF_CONTEXTS; + cpi->sf.use_fast_coef_updates == ONE_LOOP_REDUCED ? + COEFF_CONTEXTS >> 1 : COEFF_CONTEXTS; const int coef_band_to_update = - cpi->sf.use_fast_coef_updates == 2 ? COEF_BANDS >> 1 - : COEF_BANDS; + cpi->sf.use_fast_coef_updates == ONE_LOOP_REDUCED ? + COEF_BANDS >> 1 : COEF_BANDS; int updates = 0; int noupdates_before_first = 0; for (i = 0; i < PLANE_TYPES; ++i) { @@ -667,13 +669,15 @@ static void update_coef_probs(VP9_COMP *cpi, vp9_writer* w) { const TX_MODE tx_mode = cpi->common.tx_mode; const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode]; TX_SIZE tx_size; + vp9_coeff_stats frame_branch_ct[TX_SIZES][PLANE_TYPES]; + vp9_clear_system_state(); for (tx_size = TX_4X4; tx_size <= TX_32X32; ++tx_size) - build_tree_distribution(cpi, tx_size); + build_tree_distribution(cpi, tx_size, frame_branch_ct[tx_size]); for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) - update_coef_probs_common(w, cpi, tx_size); + update_coef_probs_common(w, cpi, tx_size, frame_branch_ct[tx_size]); } static void encode_loopfilter(struct loopfilter *lf, @@ -930,7 +934,7 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) { const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; - vpx_memset(cpi->above_seg_context, 0, sizeof(*cpi->above_seg_context) * + vpx_memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) * mi_cols_aligned_to_sb(cm->mi_cols)); tok[0][0] = cpi->tok; @@ -1027,19 +1031,22 @@ static void write_sync_code(struct vp9_write_bit_buffer *wb) { vp9_wb_write_literal(wb, VP9_SYNC_CODE_2, 8); } +static void write_profile(BITSTREAM_PROFILE profile, + struct vp9_write_bit_buffer *wb) { + assert(profile < MAX_PROFILES); + vp9_wb_write_bit(wb, profile & 1); + vp9_wb_write_bit(wb, profile >> 1); +} + static void write_uncompressed_header(VP9_COMP *cpi, struct vp9_write_bit_buffer *wb) { VP9_COMMON *const cm = &cpi->common; vp9_wb_write_literal(wb, VP9_FRAME_MARKER, 2); - // bitstream version. - // 00 - profile 0. 4:2:0 only - // 10 - profile 1. adds 4:4:4, 4:2:2, alpha - vp9_wb_write_bit(wb, cm->version); - vp9_wb_write_bit(wb, 0); + write_profile(cm->profile, wb); - vp9_wb_write_bit(wb, 0); + vp9_wb_write_bit(wb, 0); // show_existing_frame vp9_wb_write_bit(wb, cm->frame_type); vp9_wb_write_bit(wb, cm->show_frame); vp9_wb_write_bit(wb, cm->error_resilient_mode); @@ -1047,16 +1054,20 @@ static void write_uncompressed_header(VP9_COMP *cpi, if (cm->frame_type == KEY_FRAME) { const COLOR_SPACE cs = UNKNOWN; write_sync_code(wb); + if (cm->profile > PROFILE_1) { + assert(cm->bit_depth > BITS_8); + vp9_wb_write_bit(wb, cm->bit_depth - BITS_10); + } vp9_wb_write_literal(wb, cs, 3); if (cs != SRGB) { vp9_wb_write_bit(wb, 0); // 0: [16, 235] (i.e. xvYCC), 1: [0, 255] - if (cm->version == 1) { + if (cm->profile >= PROFILE_1) { vp9_wb_write_bit(wb, cm->subsampling_x); vp9_wb_write_bit(wb, cm->subsampling_y); vp9_wb_write_bit(wb, 0); // has extra plane } } else { - assert(cm->version == 1); + assert(cm->profile == PROFILE_1); vp9_wb_write_bit(wb, 0); // has extra plane } @@ -1184,7 +1195,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) { uint8_t *data = dest; - size_t first_part_size; + size_t first_part_size, uncompressed_hdr_size; struct vp9_write_bit_buffer wb = {data, 0}; struct vp9_write_bit_buffer saved_wb; @@ -1192,7 +1203,8 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) { saved_wb = wb; vp9_wb_write_literal(&wb, 0, 16); // don't know in advance first part. size - data += vp9_rb_bytes_written(&wb); + uncompressed_hdr_size = vp9_rb_bytes_written(&wb); + data += uncompressed_hdr_size; vp9_compute_update_table(); diff --git a/libvpx/vp9/encoder/vp9_block.h b/libvpx/vp9/encoder/vp9_block.h index 888984c..7729d84 100644 --- a/libvpx/vp9/encoder/vp9_block.h +++ b/libvpx/vp9/encoder/vp9_block.h @@ -157,7 +157,6 @@ struct macroblock { // note that token_costs is the cost when eob node is skipped vp9_coeff_cost token_costs[TX_SIZES]; - DECLARE_ALIGNED(16, uint8_t, token_cache[1024]); int optimize; @@ -197,7 +196,8 @@ struct macroblock { // TODO(jingning): the variables used here are little complicated. need further // refactoring on organizing the temporary buffers, when recursive // partition down to 4x4 block size is enabled. -static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, BLOCK_SIZE bsize) { +static INLINE PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, + BLOCK_SIZE bsize) { switch (bsize) { case BLOCK_64X64: return &x->sb64_context; diff --git a/libvpx/vp9/encoder/vp9_craq.h b/libvpx/vp9/encoder/vp9_craq.h deleted file mode 100644 index 1f81f3e..0000000 --- a/libvpx/vp9/encoder/vp9_craq.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2014 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_ENCODER_VP9_CRAQ_H_ -#define VP9_ENCODER_VP9_CRAQ_H_ - -#include "vp9/encoder/vp9_onyx_int.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Check if we should turn off cyclic refresh based on bitrate condition. -static int apply_cyclic_refresh_bitrate(VP9_COMP *const cpi); - -// Check if this coding block, of size bsize, should be considered for refresh -// (lower-qp coding). -static int candidate_refresh_aq(VP9_COMP *const cpi, - MODE_INFO *const mi, - int bsize, - int use_rd); - -// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col), -// check if we should reset the segment_id, and update the cyclic_refresh map -// and segmentation map. -void vp9_update_segment_aq(VP9_COMP *const cpi, - MODE_INFO *const mi, - int mi_row, - int mi_col, - int bsize, - int use_rd); - -// Setup cyclic background refresh: set delta q and segmentation map. -void vp9_setup_cyclic_refresh_aq(VP9_COMP *const cpi); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_ENCODER_VP9_CRAQ_H_ diff --git a/libvpx/vp9/encoder/vp9_encodeframe.c b/libvpx/vp9/encoder/vp9_encodeframe.c index 2f6c33d..61a5022 100644 --- a/libvpx/vp9/encoder/vp9_encodeframe.c +++ b/libvpx/vp9/encoder/vp9_encodeframe.c @@ -30,6 +30,9 @@ #include "vp9/common/vp9_systemdependent.h" #include "vp9/common/vp9_tile_common.h" +#include "vp9/encoder/vp9_aq_complexity.h" +#include "vp9/encoder/vp9_aq_cyclicrefresh.h" +#include "vp9/encoder/vp9_aq_variance.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemv.h" @@ -38,8 +41,6 @@ #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_segmentation.h" #include "vp9/encoder/vp9_tokenize.h" -#include "vp9/encoder/vp9_vaq.h" -#include "vp9/encoder/vp9_craq.h" #define GF_ZEROMV_ZBIN_BOOST 0 #define LF_ZEROMV_ZBIN_BOOST 0 @@ -162,15 +163,14 @@ static INLINE void set_modeinfo_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col) { - const int idx_str = xd->mode_info_stride * mi_row + mi_col; - xd->mi_8x8 = cm->mi_grid_visible + idx_str; - xd->prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; - xd->mi_8x8[0] = cm->mi + idx_str; + const int idx_str = xd->mi_stride * mi_row + mi_col; + xd->mi = cm->mi_grid_visible + idx_str; + xd->mi[0] = cm->mi + idx_str; } -static int is_block_in_mb_map(VP9_COMP *cpi, int mi_row, int mi_col, +static int is_block_in_mb_map(const VP9_COMP *cpi, int mi_row, int mi_col, BLOCK_SIZE bsize) { - VP9_COMMON *const cm = &cpi->common; + const VP9_COMMON *const cm = &cpi->common; const int mb_rows = cm->mb_rows; const int mb_cols = cm->mb_cols; const int mb_row = mi_row >> 1; @@ -194,6 +194,16 @@ static int is_block_in_mb_map(VP9_COMP *cpi, int mi_row, int mi_col, return 0; } +static int check_active_map(const VP9_COMP *cpi, const MACROBLOCK *x, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + if (cpi->active_map_enabled && !x->e_mbd.lossless) { + return is_block_in_mb_map(cpi, mi_row, mi_col, bsize); + } else { + return 1; + } +} + static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, int mi_col, BLOCK_SIZE bsize) { MACROBLOCK *const x = &cpi->mb; @@ -207,20 +217,15 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, const int idx_map = mb_row * cm->mb_cols + mb_col; const struct segmentation *const seg = &cm->seg; - set_skip_context(xd, cpi->above_context, cpi->left_context, mi_row, mi_col); + set_skip_context(xd, mi_row, mi_col); // Activity map pointer x->mb_activity_ptr = &cpi->mb_activity_map[idx_map]; - - if (cpi->active_map_enabled && !x->e_mbd.lossless) { - x->in_active_map = is_block_in_mb_map(cpi, mi_row, mi_col, bsize); - } else { - x->in_active_map = 1; - } + x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize); set_modeinfo_offsets(cm, xd, mi_row, mi_col); - mbmi = &xd->mi_8x8[0]->mbmi; + mbmi = &xd->mi[0]->mbmi; // Set up destination pointers. vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col); @@ -253,22 +258,6 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, } vp9_init_plane_quantizers(cpi, x); - if (seg->enabled && cpi->seg0_cnt > 0 && - !vp9_segfeature_active(seg, 0, SEG_LVL_REF_FRAME) && - vp9_segfeature_active(seg, 1, SEG_LVL_REF_FRAME)) { - cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt; - } else { - const int y = mb_row & ~3; - const int x = mb_col & ~3; - const int p16 = ((mb_row & 1) << 1) + (mb_col & 1); - const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1); - const int tile_progress = tile->mi_col_start * cm->mb_rows >> 1; - const int mb_cols = (tile->mi_col_end - tile->mi_col_start) >> 1; - - cpi->seg0_progress = ((y * mb_cols + x * 4 + p32 + p16 + tile_progress) - << 16) / cm->MBs; - } - x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id]; } else { mbmi->segment_id = 0; @@ -276,19 +265,18 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, } } -static void duplicate_modeinfo_in_sb(VP9_COMMON * const cm, +static void duplicate_mode_info_in_sb(VP9_COMMON * const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, BLOCK_SIZE bsize) { const int block_width = num_8x8_blocks_wide_lookup[bsize]; const int block_height = num_8x8_blocks_high_lookup[bsize]; - const int mis = xd->mode_info_stride; int i, j; for (j = 0; j < block_height; ++j) for (i = 0; i < block_width; ++i) { if (mi_row + j < cm->mi_rows && mi_col + i < cm->mi_cols) - xd->mi_8x8[j * mis + i] = xd->mi_8x8[0]; + xd->mi[j * xd->mi_stride + i] = xd->mi[0]; } } @@ -299,8 +287,8 @@ static void set_block_size(VP9_COMP * const cpi, if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) { MACROBLOCKD *const xd = &cpi->mb.e_mbd; set_modeinfo_offsets(&cpi->common, xd, mi_row, mi_col); - xd->mi_8x8[0]->mbmi.sb_type = bsize; - duplicate_modeinfo_in_sb(&cpi->common, xd, mi_row, mi_col, bsize); + xd->mi[0]->mbmi.sb_type = bsize; + duplicate_mode_info_in_sb(&cpi->common, xd, mi_row, mi_col, bsize); } } @@ -496,13 +484,13 @@ static void choose_partitioning(VP9_COMP *cpi, if (cm->frame_type != KEY_FRAME) { vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, sf); - xd->mi_8x8[0]->mbmi.ref_frame[0] = LAST_FRAME; - xd->mi_8x8[0]->mbmi.sb_type = BLOCK_64X64; + xd->mi[0]->mbmi.ref_frame[0] = LAST_FRAME; + xd->mi[0]->mbmi.sb_type = BLOCK_64X64; vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, - xd->mi_8x8[0]->mbmi.ref_mvs[LAST_FRAME], + xd->mi[0]->mbmi.ref_mvs[LAST_FRAME], &nearest_mv, &near_mv); - xd->mi_8x8[0]->mbmi.mv[0] = nearest_mv; + xd->mi[0]->mbmi.mv[0] = nearest_mv; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_64X64); d = xd->plane[0].dst.buf; @@ -829,52 +817,6 @@ static void activity_masking(VP9_COMP *cpi, MACROBLOCK *x) { adjust_act_zbin(cpi, x); } -// Select a segment for the current SB64 -static void select_in_frame_q_segment(VP9_COMP *cpi, - int mi_row, int mi_col, - int output_enabled, int projected_rate) { - VP9_COMMON *const cm = &cpi->common; - - const int mi_offset = mi_row * cm->mi_cols + mi_col; - const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; - const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; - const int xmis = MIN(cm->mi_cols - mi_col, bw); - const int ymis = MIN(cm->mi_rows - mi_row, bh); - int complexity_metric = 64; - int x, y; - - unsigned char segment; - - if (!output_enabled) { - segment = 0; - } else { - // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh). - // It is converted to bits * 256 units - const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) / - (bw * bh); - - if (projected_rate < (target_rate / 4)) { - segment = 1; - } else { - segment = 0; - } - - if (target_rate > 0) { - complexity_metric = - clamp((int)((projected_rate * 64) / target_rate), 16, 255); - } - } - - // Fill in the entires in the segment map corresponding to this SB64 - for (y = 0; y < ymis; y++) { - for (x = 0; x < xmis; x++) { - cpi->segmentation_map[mi_offset + y * cm->mi_cols + x] = segment; - cpi->complexity_map[mi_offset + y * cm->mi_cols + x] = - (unsigned char)complexity_metric; - } - } -} - static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, BLOCK_SIZE bsize, int output_enabled) { @@ -885,32 +827,37 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; MODE_INFO *mi = &ctx->mic; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; - MODE_INFO *mi_addr = xd->mi_8x8[0]; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + MODE_INFO *mi_addr = xd->mi[0]; const struct segmentation *const seg = &cm->seg; - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int mi_height = num_8x8_blocks_high_lookup[bsize]; int max_plane; assert(mi->mbmi.sb_type == bsize); - // For in frame adaptive Q copy over the chosen segment id into the - // mode innfo context for the chosen mode / partition. - if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ || - cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) && - output_enabled) { - // Check for reseting segment_id and update cyclic map. - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && seg->enabled) { - vp9_update_segment_aq(cpi, xd->mi_8x8[0], mi_row, mi_col, bsize, 1); + *mi_addr = *mi; + + // If segmentation in use + if (seg->enabled && output_enabled) { + // For in frame complexity AQ copy the segment id from the segment map. + if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { + const uint8_t *const map = seg->update_map ? cpi->segmentation_map + : cm->last_frame_seg_map; + mi_addr->mbmi.segment_id = + vp9_get_segment_id(cm, map, bsize, mi_row, mi_col); + } + // Else for cyclic refresh mode update the segment map, set the segment id + // and then update the quantizer. + else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, + mi_row, mi_col, bsize, 1); vp9_init_plane_quantizers(cpi, x); } - mi->mbmi.segment_id = xd->mi_8x8[0]->mbmi.segment_id; } - *mi_addr = *mi; - max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1; for (i = 0; i < max_plane; ++i) { p[i].coeff = ctx->coeff_pbuf[i][1]; @@ -932,7 +879,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, for (x_idx = 0; x_idx < mi_width; x_idx++) if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) { - xd->mi_8x8[x_idx + y * mis] = mi_addr; + xd->mi[x_idx + y * mis] = mi_addr; } if (cpi->oxcf.aq_mode) @@ -1051,7 +998,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, } set_offsets(cpi, tile, mi_row, mi_col, bsize); - mbmi = &xd->mi_8x8[0]->mbmi; + mbmi = &xd->mi[0]->mbmi; mbmi->sb_type = bsize; for (i = 0; i < MAX_MB_PLANE; ++i) { @@ -1101,12 +1048,12 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, (mi_col <= 1) || (mi_col >= (cm->mi_cols - 2)); if (!is_edge && (complexity > 128)) x->rdmult += ((x->rdmult * (complexity - 128)) / 256); - } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + } else if (aq_mode == CYCLIC_REFRESH_AQ) { const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map - : cm->last_frame_seg_map; + : cm->last_frame_seg_map; // If segment 1, use rdmult for that segment. if (vp9_get_segment_id(cm, map, bsize, mi_row, mi_col)) - x->rdmult = cpi->cyclic_refresh.rdmult; + x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); } // Find best coding mode & reconstruct the MB so it is available @@ -1129,8 +1076,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, vp9_clear_system_state(); *totalrate = (int)round(*totalrate * rdmult_ratio); } - } else if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) || - (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)) { + } else if (aq_mode == COMPLEXITY_AQ || aq_mode == CYCLIC_REFRESH_AQ) { x->rdmult = orig_rdmult; } } @@ -1139,7 +1085,7 @@ static void update_stats(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const MACROBLOCK *const x = &cpi->mb; const MACROBLOCKD *const xd = &x->e_mbd; - const MODE_INFO *const mi = xd->mi_8x8[0]; + const MODE_INFO *const mi = xd->mi[0]; const MB_MODE_INFO *const mbmi = &mi->mbmi; if (!frame_is_intra_only(cm)) { @@ -1206,21 +1152,21 @@ static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col, int mi_height = num_8x8_blocks_high_lookup[bsize]; for (p = 0; p < MAX_MB_PLANE; p++) { vpx_memcpy( - cpi->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x), + xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x), a + num_4x4_blocks_wide * p, (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> xd->plane[p].subsampling_x); vpx_memcpy( - cpi->left_context[p] + xd->left_context[p] + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), l + num_4x4_blocks_high * p, (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> xd->plane[p].subsampling_y); } - vpx_memcpy(cpi->above_seg_context + mi_col, sa, - sizeof(*cpi->above_seg_context) * mi_width); - vpx_memcpy(cpi->left_seg_context + (mi_row & MI_MASK), sl, - sizeof(cpi->left_seg_context[0]) * mi_height); + vpx_memcpy(xd->above_seg_context + mi_col, sa, + sizeof(*xd->above_seg_context) * mi_width); + vpx_memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl, + sizeof(xd->left_seg_context[0]) * mi_height); } static void save_context(VP9_COMP *cpi, int mi_row, int mi_col, ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], @@ -1239,20 +1185,20 @@ static void save_context(VP9_COMP *cpi, int mi_row, int mi_col, for (p = 0; p < MAX_MB_PLANE; ++p) { vpx_memcpy( a + num_4x4_blocks_wide * p, - cpi->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x), + xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x), (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> xd->plane[p].subsampling_x); vpx_memcpy( l + num_4x4_blocks_high * p, - cpi->left_context[p] + xd->left_context[p] + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> xd->plane[p].subsampling_y); } - vpx_memcpy(sa, cpi->above_seg_context + mi_col, - sizeof(*cpi->above_seg_context) * mi_width); - vpx_memcpy(sl, cpi->left_seg_context + (mi_row & MI_MASK), - sizeof(cpi->left_seg_context[0]) * mi_height); + vpx_memcpy(sa, xd->above_seg_context + mi_col, + sizeof(*xd->above_seg_context) * mi_width); + vpx_memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK), + sizeof(xd->left_seg_context[0]) * mi_height); } static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, @@ -1284,6 +1230,8 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile, int output_enabled, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; int ctx; PARTITION_TYPE partition; @@ -1293,8 +1241,7 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile, return; if (bsize >= BLOCK_8X8) { - ctx = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, bsize); + ctx = partition_plane_context(xd, mi_row, mi_col, bsize); subsize = *get_sb_partitioning(x, bsize); } else { ctx = 0; @@ -1349,8 +1296,7 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile, } if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) - update_partition_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, subsize, bsize); + update_partition_context(xd, mi_row, mi_col, subsize, bsize); } // Check to see if the given partition size is allowed for a specified number @@ -1382,7 +1328,7 @@ static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile, MODE_INFO **mi_8x8, int mi_row, int mi_col, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; int row8x8_remaining = tile->mi_row_end - mi_row; int col8x8_remaining = tile->mi_col_end - mi_col; int block_row, block_col; @@ -1418,15 +1364,79 @@ static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile, } } +static void constrain_copy_partitioning(VP9_COMP *const cpi, + const TileInfo *const tile, + MODE_INFO **mi_8x8, + MODE_INFO **prev_mi_8x8, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + VP9_COMMON *const cm = &cpi->common; + const int mis = cm->mi_stride; + const int row8x8_remaining = tile->mi_row_end - mi_row; + const int col8x8_remaining = tile->mi_col_end - mi_col; + MODE_INFO *const mi_upper_left = cm->mi + mi_row * mis + mi_col; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int bw = num_8x8_blocks_wide_lookup[bsize]; + int block_row, block_col; + + assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); + + // If the SB64 if it is all "in image". + if ((col8x8_remaining >= MI_BLOCK_SIZE) && + (row8x8_remaining >= MI_BLOCK_SIZE)) { + for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) { + for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) { + const int index = block_row * mis + block_col; + MODE_INFO *prev_mi = prev_mi_8x8[index]; + const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0; + // Use previous partition if block size is not larger than bsize. + if (prev_mi && sb_type <= bsize) { + int block_row2, block_col2; + for (block_row2 = 0; block_row2 < bh; ++block_row2) { + for (block_col2 = 0; block_col2 < bw; ++block_col2) { + const int index2 = (block_row + block_row2) * mis + + block_col + block_col2; + prev_mi = prev_mi_8x8[index2]; + if (prev_mi) { + const ptrdiff_t offset = prev_mi - cm->prev_mi; + mi_8x8[index2] = cm->mi + offset; + mi_8x8[index2]->mbmi.sb_type = prev_mi->mbmi.sb_type; + } + } + } + } else { + // Otherwise, use fixed partition of size bsize. + mi_8x8[index] = mi_upper_left + index; + mi_8x8[index]->mbmi.sb_type = bsize; + } + } + } + } else { + // Else this is a partial SB64, copy previous partition. + for (block_row = 0; block_row < 8; ++block_row) { + for (block_col = 0; block_col < 8; ++block_col) { + MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col]; + const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0; + if (prev_mi) { + const ptrdiff_t offset = prev_mi - cm->prev_mi; + mi_8x8[block_row * mis + block_col] = cm->mi + offset; + mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type; + } + } + } + } +} + static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8, MODE_INFO **prev_mi_8x8) { - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; int block_row, block_col; for (block_row = 0; block_row < 8; ++block_row) { for (block_col = 0; block_col < 8; ++block_col) { MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col]; const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0; + if (prev_mi) { const ptrdiff_t offset = prev_mi - cm->prev_mi; mi_8x8[block_row * mis + block_col] = cm->mi + offset; @@ -1436,8 +1446,127 @@ static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8, } } +const struct { + int row; + int col; +} coord_lookup[16] = { + // 32x32 index = 0 + {0, 0}, {0, 2}, {2, 0}, {2, 2}, + // 32x32 index = 1 + {0, 4}, {0, 6}, {2, 4}, {2, 6}, + // 32x32 index = 2 + {4, 0}, {4, 2}, {6, 0}, {6, 2}, + // 32x32 index = 3 + {4, 4}, {4, 6}, {6, 4}, {6, 6}, +}; + +static void set_source_var_based_partition(VP9_COMP *cpi, + const TileInfo *const tile, + MODE_INFO **mi_8x8, + int mi_row, int mi_col) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *x = &cpi->mb; + const int mis = cm->mi_stride; + int row8x8_remaining = tile->mi_row_end - mi_row; + int col8x8_remaining = tile->mi_col_end - mi_col; + int r, c; + MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col; + + assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); + + // In-image SB64 + if ((col8x8_remaining >= MI_BLOCK_SIZE) && + (row8x8_remaining >= MI_BLOCK_SIZE)) { + const int src_stride = x->plane[0].src.stride; + const int pre_stride = cpi->Last_Source->y_stride; + const uint8_t *src = x->plane[0].src.buf; + const int pre_offset = (mi_row * MI_SIZE) * pre_stride + + (mi_col * MI_SIZE); + const uint8_t *pre_src = cpi->Last_Source->y_buffer + pre_offset; + const int thr_32x32 = cpi->sf.source_var_thresh; + const int thr_64x64 = thr_32x32 << 1; + int i, j; + int index; + diff d32[4]; + int use16x16 = 0; + + for (i = 0; i < 4; i++) { + diff d16[4]; + + for (j = 0; j < 4; j++) { + int b_mi_row = coord_lookup[i * 4 + j].row; + int b_mi_col = coord_lookup[i * 4 + j].col; + int b_offset = b_mi_row * MI_SIZE * src_stride + + b_mi_col * MI_SIZE; + + vp9_get_sse_sum_16x16(src + b_offset, + src_stride, + pre_src + b_offset, + pre_stride, &d16[j].sse, &d16[j].sum); + + d16[j].var = d16[j].sse - + (((uint32_t)d16[j].sum * d16[j].sum) >> 8); + + index = b_mi_row * mis + b_mi_col; + mi_8x8[index] = mi_upper_left + index; + mi_8x8[index]->mbmi.sb_type = BLOCK_16X16; + + // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition + // size to further improve quality. + } + + if (d16[0].var < thr_32x32 && d16[1].var < thr_32x32 && + d16[2].var < thr_32x32 && d16[3].var < thr_32x32) { + d32[i].sse = d16[0].sse; + d32[i].sum = d16[0].sum; + + for (j = 1; j < 4; j++) { + d32[i].sse += d16[j].sse; + d32[i].sum += d16[j].sum; + } + + d32[i].var = d32[i].sse - (((int64_t)d32[i].sum * d32[i].sum) >> 10); + + index = coord_lookup[i*4].row * mis + coord_lookup[i*4].col; + mi_8x8[index] = mi_upper_left + index; + mi_8x8[index]->mbmi.sb_type = BLOCK_32X32; + + if (!((cm->current_video_frame - 1) % + cpi->sf.search_type_check_frequency)) + cpi->use_large_partition_rate += 1; + } else { + use16x16 = 1; + } + } + + if (!use16x16) { + if (d32[0].var < thr_64x64 && d32[1].var < thr_64x64 && + d32[2].var < thr_64x64 && d32[3].var < thr_64x64) { + mi_8x8[0] = mi_upper_left; + mi_8x8[0]->mbmi.sb_type = BLOCK_64X64; + } + } + } else { // partial in-image SB64 + BLOCK_SIZE bsize = BLOCK_16X16; + int bh = num_8x8_blocks_high_lookup[bsize]; + int bw = num_8x8_blocks_wide_lookup[bsize]; + + for (r = 0; r < MI_BLOCK_SIZE; r += bh) { + for (c = 0; c < MI_BLOCK_SIZE; c += bw) { + int index = r * mis + c; + // Find a partition size that fits + bsize = find_partition_size(bsize, + (row8x8_remaining - r), + (col8x8_remaining - c), &bh, &bw); + mi_8x8[index] = mi_upper_left + index; + mi_8x8[index]->mbmi.sb_type = bsize; + } + } + } +} + static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) { - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; int block_row, block_col; if (cm->prev_mi) { @@ -1455,22 +1584,21 @@ static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) { return 0; } -static void update_state_rt(VP9_COMP *cpi, const PICK_MODE_CONTEXT *ctx, +static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, int bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const struct segmentation *const seg = &cm->seg; - // TODO(jingning) We might need PICK_MODE_CONTEXT to buffer coding modes - // associated with variable block sizes. Otherwise, remove this ctx - // from argument list. - (void)ctx; + *(xd->mi[0]) = ctx->mic; - // Check for reseting segment_id and update cyclic map. - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && seg->enabled) { - vp9_update_segment_aq(cpi, xd->mi_8x8[0], mi_row, mi_col, bsize, 1); + // For in frame adaptive Q, check for reseting the segment_id and updating + // the cyclic refresh map. + if ((cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) && seg->enabled) { + vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, + mi_row, mi_col, bsize, 1); vp9_init_plane_quantizers(cpi, x); } @@ -1482,11 +1610,13 @@ static void update_state_rt(VP9_COMP *cpi, const PICK_MODE_CONTEXT *ctx, ++cm->counts.switchable_interp[pred_ctx][mbmi->interp_filter]; } } + + x->skip = ctx->skip; } static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile, - TOKENEXTRA **tp, int mi_row, int mi_col, - int output_enabled, BLOCK_SIZE bsize) { + TOKENEXTRA **tp, int mi_row, int mi_col, + int output_enabled, BLOCK_SIZE bsize) { MACROBLOCK *const x = &cpi->mb; if (bsize < BLOCK_8X8) { @@ -1495,6 +1625,7 @@ static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile, if (x->ab_index > 0) return; } + set_offsets(cpi, tile, mi_row, mi_col, bsize); update_state_rt(cpi, get_block_context(x, bsize), mi_row, mi_col, bsize); @@ -1510,6 +1641,8 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile, int output_enabled, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; int ctx; PARTITION_TYPE partition; @@ -1520,10 +1653,9 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile, if (bsize >= BLOCK_8X8) { MACROBLOCKD *const xd = &cpi->mb.e_mbd; - const int idx_str = xd->mode_info_stride * mi_row + mi_col; + const int idx_str = xd->mi_stride * mi_row + mi_col; MODE_INFO ** mi_8x8 = cm->mi_grid_visible + idx_str; - ctx = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, bsize); + ctx = partition_plane_context(xd, mi_row, mi_col, bsize); subsize = mi_8x8[0]->mbmi.sb_type; } else { ctx = 0; @@ -1582,8 +1714,7 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile, } if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) - update_partition_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, subsize, bsize); + update_partition_context(xd, mi_row, mi_col, subsize, bsize); } static void rd_use_partition(VP9_COMP *cpi, @@ -1594,12 +1725,10 @@ static void rd_use_partition(VP9_COMP *cpi, int do_recon) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; - const int mis = cm->mode_info_stride; + MACROBLOCKD *const xd = &x->e_mbd; + const int mis = cm->mi_stride; const int bsl = b_width_log2(bsize); - const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; - const int ms = num_4x4_blocks_wide / 2; - const int mh = num_4x4_blocks_high / 2; + const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2; const int bss = (1 << bsl) / 4; int i, pl; PARTITION_TYPE partition = PARTITION_NONE; @@ -1618,10 +1747,14 @@ static void rd_use_partition(VP9_COMP *cpi, BLOCK_SIZE sub_subsize = BLOCK_4X4; int splits_below = 0; BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type; + int do_partition_search = 1; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; + assert(num_4x4_blocks_wide_lookup[bsize] == + num_4x4_blocks_high_lookup[bsize]); + partition = partition_lookup[bsl][bs_type]; subsize = get_subsize(bsize, partition); @@ -1641,9 +1774,22 @@ static void rd_use_partition(VP9_COMP *cpi, if (bsize == BLOCK_16X16) { set_offsets(cpi, tile, mi_row, mi_col, bsize); x->mb_energy = vp9_block_energy(cpi, x, bsize); + } else { + x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize); } - if (cpi->sf.partition_search_type == SEARCH_PARTITION && + if (!x->in_active_map) { + do_partition_search = 0; + if (mi_row + (mi_step >> 1) < cm->mi_rows && + mi_col + (mi_step >> 1) < cm->mi_cols) { + *(get_sb_partitioning(x, bsize)) = bsize; + bs_type = mi_8x8[0]->mbmi.sb_type = bsize; + subsize = bsize; + partition = PARTITION_NONE; + } + } + if (do_partition_search && + cpi->sf.partition_search_type == SEARCH_PARTITION && cpi->sf.adjust_partitioning_from_last_frame) { // Check if any of the sub blocks are further split. if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) { @@ -1661,15 +1807,13 @@ static void rd_use_partition(VP9_COMP *cpi, // If partition is not none try none unless each of the 4 splits are split // even further.. if (partition != PARTITION_NONE && !splits_below && - mi_row + (ms >> 1) < cm->mi_rows && - mi_col + (ms >> 1) < cm->mi_cols) { + mi_row + (mi_step >> 1) < cm->mi_rows && + mi_col + (mi_step >> 1) < cm->mi_cols) { *(get_sb_partitioning(x, bsize)) = bsize; rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rate, &none_dist, bsize, get_block_context(x, bsize), INT64_MAX); - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (none_rate < INT_MAX) { none_rate += x->partition_cost[pl][PARTITION_NONE]; @@ -1694,14 +1838,14 @@ static void rd_use_partition(VP9_COMP *cpi, &last_part_dist, subsize, get_block_context(x, subsize), INT64_MAX); if (last_part_rate != INT_MAX && - bsize >= BLOCK_8X8 && mi_row + (mh >> 1) < cm->mi_rows) { + bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) { int rt = 0; int64_t dt = 0; update_state(cpi, get_block_context(x, subsize), mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); *get_sb_index(x, subsize) = 1; - rd_pick_sb_modes(cpi, tile, mi_row + (ms >> 1), mi_col, &rt, &dt, + rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &rt, &dt, subsize, get_block_context(x, subsize), INT64_MAX); if (rt == INT_MAX || dt == INT64_MAX) { last_part_rate = INT_MAX; @@ -1719,14 +1863,14 @@ static void rd_use_partition(VP9_COMP *cpi, &last_part_dist, subsize, get_block_context(x, subsize), INT64_MAX); if (last_part_rate != INT_MAX && - bsize >= BLOCK_8X8 && mi_col + (ms >> 1) < cm->mi_cols) { + bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) { int rt = 0; int64_t dt = 0; update_state(cpi, get_block_context(x, subsize), mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); *get_sb_index(x, subsize) = 1; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (ms >> 1), &rt, &dt, + rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &rt, &dt, subsize, get_block_context(x, subsize), INT64_MAX); if (rt == INT_MAX || dt == INT64_MAX) { last_part_rate = INT_MAX; @@ -1742,8 +1886,8 @@ static void rd_use_partition(VP9_COMP *cpi, last_part_rate = 0; last_part_dist = 0; for (i = 0; i < 4; i++) { - int x_idx = (i & 1) * (ms >> 1); - int y_idx = (i >> 1) * (ms >> 1); + int x_idx = (i & 1) * (mi_step >> 1); + int y_idx = (i >> 1) * (mi_step >> 1); int jj = i >> 1, ii = i & 0x01; int rt; int64_t dt; @@ -1769,18 +1913,20 @@ static void rd_use_partition(VP9_COMP *cpi, assert(0); } - pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (last_part_rate < INT_MAX) { last_part_rate += x->partition_cost[pl][partition]; last_part_rd = RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist); } - if (cpi->sf.adjust_partitioning_from_last_frame + if (do_partition_search + && cpi->sf.adjust_partitioning_from_last_frame && cpi->sf.partition_search_type == SEARCH_PARTITION && partition != PARTITION_SPLIT && bsize > BLOCK_8X8 - && (mi_row + ms < cm->mi_rows || mi_row + (ms >> 1) == cm->mi_rows) - && (mi_col + ms < cm->mi_cols || mi_col + (ms >> 1) == cm->mi_cols)) { + && (mi_row + mi_step < cm->mi_rows || + mi_row + (mi_step >> 1) == cm->mi_rows) + && (mi_col + mi_step < cm->mi_cols || + mi_col + (mi_step >> 1) == cm->mi_cols)) { BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT); chosen_rate = 0; chosen_dist = 0; @@ -1788,8 +1934,8 @@ static void rd_use_partition(VP9_COMP *cpi, // Split partition. for (i = 0; i < 4; i++) { - int x_idx = (i & 1) * (num_4x4_blocks_wide >> 2); - int y_idx = (i >> 1) * (num_4x4_blocks_wide >> 2); + int x_idx = (i & 1) * (mi_step >> 1); + int y_idx = (i >> 1) * (mi_step >> 1); int rt = 0; int64_t dt = 0; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; @@ -1823,14 +1969,11 @@ static void rd_use_partition(VP9_COMP *cpi, encode_sb(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, 0, split_subsize); - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row + y_idx, mi_col + x_idx, + pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx, split_subsize); chosen_rate += x->partition_cost[pl][PARTITION_NONE]; } - pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (chosen_rate < INT_MAX) { chosen_rate += x->partition_cost[pl][PARTITION_SPLIT]; chosen_rd = RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist); @@ -1868,14 +2011,14 @@ static void rd_use_partition(VP9_COMP *cpi, // and and if necessary apply a Q delta using segmentation to get // closer to the target. if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) { - select_in_frame_q_segment(cpi, mi_row, mi_col, - output_enabled, chosen_rate); - } - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { - cpi->cyclic_refresh.projected_rate_sb = chosen_rate; - cpi->cyclic_refresh.projected_dist_sb = chosen_dist; + vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, + output_enabled, chosen_rate); } + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh, + chosen_rate, chosen_dist); + encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize); } @@ -1923,7 +2066,7 @@ static void get_sb_partition_size_range(VP9_COMP *cpi, MODE_INFO ** mi_8x8, *min_block_size = MIN(*min_block_size, sb_type); *max_block_size = MAX(*max_block_size, sb_type); } - index += xd->mode_info_stride; + index += xd->mi_stride; } } @@ -1939,77 +2082,71 @@ static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = { // Look at neighboring blocks and set a min and max partition size based on // what they chose. static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, - int row, int col, + int mi_row, int mi_col, BLOCK_SIZE *min_block_size, BLOCK_SIZE *max_block_size) { - VP9_COMMON * const cm = &cpi->common; + VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; - MODE_INFO ** mi_8x8 = xd->mi_8x8; - MODE_INFO ** prev_mi_8x8 = xd->prev_mi_8x8; - + MODE_INFO **mi_8x8 = xd->mi; const int left_in_image = xd->left_available && mi_8x8[-1]; const int above_in_image = xd->up_available && - mi_8x8[-xd->mode_info_stride]; - MODE_INFO ** above_sb64_mi_8x8; - MODE_INFO ** left_sb64_mi_8x8; + mi_8x8[-xd->mi_stride]; + MODE_INFO **above_sb64_mi_8x8; + MODE_INFO **left_sb64_mi_8x8; - int row8x8_remaining = tile->mi_row_end - row; - int col8x8_remaining = tile->mi_col_end - col; + int row8x8_remaining = tile->mi_row_end - mi_row; + int col8x8_remaining = tile->mi_col_end - mi_col; int bh, bw; - + BLOCK_SIZE min_size = BLOCK_4X4; + BLOCK_SIZE max_size = BLOCK_64X64; // Trap case where we do not have a prediction. - if (!left_in_image && !above_in_image && - ((cm->frame_type == KEY_FRAME) || !cm->prev_mi)) { - *min_block_size = BLOCK_4X4; - *max_block_size = BLOCK_64X64; - } else { + if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) { // Default "min to max" and "max to min" - *min_block_size = BLOCK_64X64; - *max_block_size = BLOCK_4X4; + min_size = BLOCK_64X64; + max_size = BLOCK_4X4; // NOTE: each call to get_sb_partition_size_range() uses the previous // passed in values for min and max as a starting point. - // // Find the min and max partition used in previous frame at this location - if (cm->prev_mi && (cm->frame_type != KEY_FRAME)) { - get_sb_partition_size_range(cpi, prev_mi_8x8, - min_block_size, max_block_size); + if (cm->frame_type != KEY_FRAME) { + MODE_INFO **const prev_mi = + &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col]; + get_sb_partition_size_range(cpi, prev_mi, &min_size, &max_size); } - // Find the min and max partition sizes used in the left SB64 if (left_in_image) { left_sb64_mi_8x8 = &mi_8x8[-MI_BLOCK_SIZE]; get_sb_partition_size_range(cpi, left_sb64_mi_8x8, - min_block_size, max_block_size); + &min_size, &max_size); } - // Find the min and max partition sizes used in the above SB64. if (above_in_image) { - above_sb64_mi_8x8 = &mi_8x8[-xd->mode_info_stride * MI_BLOCK_SIZE]; + above_sb64_mi_8x8 = &mi_8x8[-xd->mi_stride * MI_BLOCK_SIZE]; get_sb_partition_size_range(cpi, above_sb64_mi_8x8, - min_block_size, max_block_size); + &min_size, &max_size); + } + // adjust observed min and max + if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) { + min_size = min_partition_size[min_size]; + max_size = max_partition_size[max_size]; } } - // adjust observed min and max - if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) { - *min_block_size = min_partition_size[*min_block_size]; - *max_block_size = max_partition_size[*max_block_size]; - } - - // Check border cases where max and min from neighbours may not be legal. - *max_block_size = find_partition_size(*max_block_size, - row8x8_remaining, col8x8_remaining, - &bh, &bw); - *min_block_size = MIN(*min_block_size, *max_block_size); + // Check border cases where max and min from neighbors may not be legal. + max_size = find_partition_size(max_size, + row8x8_remaining, col8x8_remaining, + &bh, &bw); + min_size = MIN(min_size, max_size); // When use_square_partition_only is true, make sure at least one square // partition is allowed by selecting the next smaller square size as // *min_block_size. if (cpi->sf.use_square_partition_only && - next_square_size[*max_block_size] < *min_block_size) { - *min_block_size = next_square_size[*max_block_size]; + next_square_size[max_size] < min_size) { + min_size = next_square_size[max_size]; } + *min_block_size = min_size; + *max_block_size = max_size; } static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { @@ -2029,7 +2166,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, int64_t *dist, int do_recon, int64_t best_rd) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; - const int ms = num_8x8_blocks_wide_lookup[bsize] / 2; + MACROBLOCKD *const xd = &x->e_mbd; + const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; PARTITION_CONTEXT sl[8], sa[8]; TOKENEXTRA *tp_orig = *tp; @@ -2042,8 +2180,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, int do_split = bsize >= BLOCK_8X8; int do_rect = 1; // Override skipping rectangular partition operations for edge blocks - const int force_horz_split = (mi_row + ms >= cm->mi_rows); - const int force_vert_split = (mi_col + ms >= cm->mi_cols); + const int force_horz_split = (mi_row + mi_step >= cm->mi_rows); + const int force_vert_split = (mi_col + mi_step >= cm->mi_cols); const int xss = x->e_mbd.plane[1].subsampling_x; const int yss = x->e_mbd.plane[1].subsampling_y; @@ -2069,6 +2207,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, if (bsize == BLOCK_16X16) { set_offsets(cpi, tile, mi_row, mi_col, bsize); x->mb_energy = vp9_block_energy(cpi, x, bsize); + } else { + x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize); } // Determine partition types in search according to the speed features. @@ -2110,9 +2250,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, ctx, best_rd); if (this_rate != INT_MAX) { if (bsize >= BLOCK_8X8) { - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); this_rate += x->partition_cost[pl][PARTITION_NONE]; } sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist); @@ -2157,8 +2295,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, if (do_split) { subsize = get_subsize(bsize, PARTITION_SPLIT); for (i = 0; i < 4 && sum_rd < best_rd; ++i) { - const int x_idx = (i & 1) * ms; - const int y_idx = (i >> 1) * ms; + const int x_idx = (i & 1) * mi_step; + const int y_idx = (i >> 1) * mi_step; if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) continue; @@ -2182,9 +2320,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, } } if (sum_rd < best_rd && i == 4) { - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rate += x->partition_cost[pl][PARTITION_SPLIT]; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); if (sum_rd < best_rd) { @@ -2216,7 +2352,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, get_block_context(x, subsize), best_rd); sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) { + if (sum_rd < best_rd && mi_row + mi_step < cm->mi_rows) { update_state(cpi, get_block_context(x, subsize), mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); @@ -2228,7 +2364,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, partition_none_allowed) get_block_context(x, subsize)->pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col, &this_rate, + rd_pick_sb_modes(cpi, tile, mi_row + mi_step, mi_col, &this_rate, &this_dist, subsize, get_block_context(x, subsize), best_rd - sum_rd); if (this_rate == INT_MAX) { @@ -2240,9 +2376,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, } } if (sum_rd < best_rd) { - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rate += x->partition_cost[pl][PARTITION_HORZ]; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); if (sum_rd < best_rd) { @@ -2269,7 +2403,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize, get_block_context(x, subsize), best_rd); sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); - if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) { + if (sum_rd < best_rd && mi_col + mi_step < cm->mi_cols) { update_state(cpi, get_block_context(x, subsize), mi_row, mi_col, subsize, 0); encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); @@ -2281,7 +2415,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, partition_none_allowed) get_block_context(x, subsize)->pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms, &this_rate, + rd_pick_sb_modes(cpi, tile, mi_row, mi_col + mi_step, &this_rate, &this_dist, subsize, get_block_context(x, subsize), best_rd - sum_rd); if (this_rate == INT_MAX) { @@ -2293,9 +2427,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, } } if (sum_rd < best_rd) { - pl = partition_plane_context(cpi->above_seg_context, - cpi->left_seg_context, - mi_row, mi_col, bsize); + pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rate += x->partition_cost[pl][PARTITION_VERT]; sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); if (sum_rd < best_rd) { @@ -2323,13 +2455,14 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, // and and if necessary apply a Q delta using segmentation to get // closer to the target. if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) { - select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled, best_rate); - } - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { - cpi->cyclic_refresh.projected_rate_sb = best_rate; - cpi->cyclic_refresh.projected_dist_sb = best_dist; + vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled, + best_rate); } + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh, + best_rate, best_dist); + encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize); } if (bsize == BLOCK_64X64) { @@ -2344,11 +2477,13 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, TOKENEXTRA **tp) { VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + SPEED_FEATURES *const sf = &cpi->sf; int mi_col; // Initialize the left context for the new SB row - vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context)); - vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context)); + vpx_memset(&xd->left_context, 0, sizeof(xd->left_context)); + vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); // Code each SB in the row for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; @@ -2359,7 +2494,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, BLOCK_SIZE i; MACROBLOCK *x = &cpi->mb; - if (cpi->sf.adaptive_pred_interp_filter) { + if (sf->adaptive_pred_interp_filter) { for (i = BLOCK_4X4; i < BLOCK_8X8; ++i) { const int num_4x4_w = num_4x4_blocks_wide_lookup[i]; const int num_4x4_h = num_4x4_blocks_high_lookup[i]; @@ -2373,63 +2508,69 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, vp9_zero(cpi->mb.pred_mv); - if ((cpi->sf.partition_search_type == SEARCH_PARTITION && - cpi->sf.use_lastframe_partitioning) || - cpi->sf.partition_search_type == FIXED_PARTITION || - cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION) { - const int idx_str = cm->mode_info_stride * mi_row + mi_col; + if ((sf->partition_search_type == SEARCH_PARTITION && + sf->use_lastframe_partitioning) || + sf->partition_search_type == FIXED_PARTITION || + sf->partition_search_type == VAR_BASED_PARTITION || + sf->partition_search_type == VAR_BASED_FIXED_PARTITION) { + const int idx_str = cm->mi_stride * mi_row + mi_col; MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; cpi->mb.source_variance = UINT_MAX; - if (cpi->sf.partition_search_type == FIXED_PARTITION) { + if (sf->partition_search_type == FIXED_PARTITION) { set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, - cpi->sf.always_this_block_size); + sf->always_this_block_size); rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1); - } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION) { + } else if (sf->partition_search_type == VAR_BASED_FIXED_PARTITION) { BLOCK_SIZE bsize; set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col); set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize); rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1); - } else if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) { + } else if (sf->partition_search_type == VAR_BASED_PARTITION) { choose_partitioning(cpi, tile, mi_row, mi_col); rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1); } else { if ((cm->current_video_frame - % cpi->sf.last_partitioning_redo_frequency) == 0 + % sf->last_partitioning_redo_frequency) == 0 || cm->prev_mi == 0 || cm->show_frame == 0 || cm->frame_type == KEY_FRAME || cpi->rc.is_src_frame_alt_ref - || ((cpi->sf.use_lastframe_partitioning == + || ((sf->use_lastframe_partitioning == LAST_FRAME_PARTITION_LOW_MOTION) && sb_has_motion(cm, prev_mi_8x8))) { // If required set upper and lower partition size limits - if (cpi->sf.auto_min_max_partition_size) { + if (sf->auto_min_max_partition_size) { set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); rd_auto_partition_range(cpi, tile, mi_row, mi_col, - &cpi->sf.min_partition_size, - &cpi->sf.max_partition_size); + &sf->min_partition_size, + &sf->max_partition_size); } rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, INT64_MAX); } else { - copy_partitioning(cm, mi_8x8, prev_mi_8x8); + if (sf->constrain_copy_partition && + sb_has_motion(cm, prev_mi_8x8)) + constrain_copy_partitioning(cpi, tile, mi_8x8, prev_mi_8x8, + mi_row, mi_col, BLOCK_16X16); + else + copy_partitioning(cm, mi_8x8, prev_mi_8x8); rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1); } } } else { // If required set upper and lower partition size limits - if (cpi->sf.auto_min_max_partition_size) { + if (sf->auto_min_max_partition_size) { set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); rd_auto_partition_range(cpi, tile, mi_row, mi_col, - &cpi->sf.min_partition_size, - &cpi->sf.max_partition_size); + &sf->min_partition_size, + &sf->max_partition_size); } rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, INT64_MAX); @@ -2444,9 +2585,6 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); x->act_zbin_adj = 0; - cpi->seg0_idx = 0; - - xd->mode_info_stride = cm->mode_info_stride; // Copy data over into macro block data structures. vp9_setup_src_planes(x, cpi->Source, 0, 0); @@ -2458,27 +2596,16 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); - xd->mi_8x8[0]->mbmi.mode = DC_PRED; - xd->mi_8x8[0]->mbmi.uv_mode = DC_PRED; - - vp9_zero(cm->counts.y_mode); - vp9_zero(cm->counts.uv_mode); - vp9_zero(cm->counts.inter_mode); - vp9_zero(cm->counts.partition); - vp9_zero(cm->counts.intra_inter); - vp9_zero(cm->counts.comp_inter); - vp9_zero(cm->counts.single_ref); - vp9_zero(cm->counts.comp_ref); - vp9_zero(cm->counts.tx); - vp9_zero(cm->counts.skip); + xd->mi[0]->mbmi.mode = DC_PRED; + xd->mi[0]->mbmi.uv_mode = DC_PRED; // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. - vpx_memset(cpi->above_context[0], 0, - sizeof(*cpi->above_context[0]) * + vpx_memset(xd->above_context[0], 0, + sizeof(*xd->above_context[0]) * 2 * aligned_mi_cols * MAX_MB_PLANE); - vpx_memset(cpi->above_seg_context, 0, - sizeof(*cpi->above_seg_context) * aligned_mi_cols); + vpx_memset(xd->above_seg_context, 0, + sizeof(*xd->above_seg_context) * aligned_mi_cols); } static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { @@ -2508,100 +2635,15 @@ static int check_dual_ref_flags(VP9_COMP *cpi) { } } -static int get_skip_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs) { - int x, y; - - for (y = 0; y < ymbs; y++) { - for (x = 0; x < xmbs; x++) { - if (!mi_8x8[y * mis + x]->mbmi.skip) - return 0; - } - } - - return 1; -} - -static void set_txfm_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs, - TX_SIZE tx_size) { - int x, y; - - for (y = 0; y < ymbs; y++) { - for (x = 0; x < xmbs; x++) - mi_8x8[y * mis + x]->mbmi.tx_size = tx_size; - } -} - -static void reset_skip_txfm_size_b(const VP9_COMMON *cm, int mis, - TX_SIZE max_tx_size, int bw, int bh, - int mi_row, int mi_col, - MODE_INFO **mi_8x8) { - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) { - return; - } else { - const MB_MODE_INFO *const mbmi = &mi_8x8[0]->mbmi; - if (mbmi->tx_size > max_tx_size) { - const int ymbs = MIN(bh, cm->mi_rows - mi_row); - const int xmbs = MIN(bw, cm->mi_cols - mi_col); - - assert(vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) || - get_skip_flag(mi_8x8, mis, ymbs, xmbs)); - set_txfm_flag(mi_8x8, mis, ymbs, xmbs, max_tx_size); - } - } -} - -static void reset_skip_txfm_size_sb(VP9_COMMON *cm, MODE_INFO **mi_8x8, - TX_SIZE max_tx_size, int mi_row, int mi_col, - BLOCK_SIZE bsize) { - const int mis = cm->mode_info_stride; - int bw, bh; - const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2; - - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) - return; - - bw = num_8x8_blocks_wide_lookup[mi_8x8[0]->mbmi.sb_type]; - bh = num_8x8_blocks_high_lookup[mi_8x8[0]->mbmi.sb_type]; - - if (bw == bs && bh == bs) { - reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, bs, mi_row, mi_col, - mi_8x8); - } else if (bw == bs && bh < bs) { - reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, hbs, mi_row, mi_col, - mi_8x8); - reset_skip_txfm_size_b(cm, mis, max_tx_size, bs, hbs, mi_row + hbs, - mi_col, mi_8x8 + hbs * mis); - } else if (bw < bs && bh == bs) { - reset_skip_txfm_size_b(cm, mis, max_tx_size, hbs, bs, mi_row, mi_col, - mi_8x8); - reset_skip_txfm_size_b(cm, mis, max_tx_size, hbs, bs, mi_row, - mi_col + hbs, mi_8x8 + hbs); - } else { - const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize]; - int n; - - assert(bw < bs && bh < bs); - - for (n = 0; n < 4; n++) { - const int mi_dc = hbs * (n & 1); - const int mi_dr = hbs * (n >> 1); - - reset_skip_txfm_size_sb(cm, &mi_8x8[mi_dr * mis + mi_dc], max_tx_size, - mi_row + mi_dr, mi_col + mi_dc, subsize); - } - } -} - static void reset_skip_txfm_size(VP9_COMMON *cm, TX_SIZE txfm_max) { int mi_row, mi_col; - const int mis = cm->mode_info_stride; - MODE_INFO **mi_8x8, **mi_ptr = cm->mi_grid_visible; + const int mis = cm->mi_stride; + MODE_INFO **mi_ptr = cm->mi_grid_visible; - for (mi_row = 0; mi_row < cm->mi_rows; mi_row += 8, mi_ptr += 8 * mis) { - mi_8x8 = mi_ptr; - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += 8, mi_8x8 += 8) { - reset_skip_txfm_size_sb(cm, mi_8x8, txfm_max, mi_row, mi_col, - BLOCK_64X64); + for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) { + for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) { + if (mi_ptr[mi_col]->mbmi.tx_size > txfm_max) + mi_ptr[mi_col]->mbmi.tx_size = txfm_max; } } } @@ -2680,16 +2722,347 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; set_offsets(cpi, tile, mi_row, mi_col, bsize); - xd->mi_8x8[0]->mbmi.sb_type = bsize; + xd->mi[0]->mbmi.sb_type = bsize; if (!frame_is_intra_only(cm)) { vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col, rate, dist, bsize); } else { MB_PREDICTION_MODE intramode = DC_PRED; - set_mode_info(&xd->mi_8x8[0]->mbmi, bsize, intramode); + set_mode_info(&xd->mi[0]->mbmi, bsize, intramode); + } + duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); +} + +static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x, + int mi_row, int mi_col, + BLOCK_SIZE bsize, BLOCK_SIZE subsize) { + MACROBLOCKD *xd = &x->e_mbd; + int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; + PARTITION_TYPE partition = partition_lookup[bsl][subsize]; + + assert(bsize >= BLOCK_8X8); + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + switch (partition) { + case PARTITION_NONE: + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + *(xd->mi[0]) = get_block_context(x, subsize)->mic; + duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); + break; + case PARTITION_VERT: + *get_sb_index(x, subsize) = 0; + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + *(xd->mi[0]) = get_block_context(x, subsize)->mic; + duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); + + if (mi_col + hbs < cm->mi_cols) { + *get_sb_index(x, subsize) = 1; + set_modeinfo_offsets(cm, xd, mi_row, mi_col + hbs); + *(xd->mi[0]) = get_block_context(x, subsize)->mic; + duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, bsize); + } + break; + case PARTITION_HORZ: + *get_sb_index(x, subsize) = 0; + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + *(xd->mi[0]) = get_block_context(x, subsize)->mic; + duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); + if (mi_row + hbs < cm->mi_rows) { + *get_sb_index(x, subsize) = 1; + set_modeinfo_offsets(cm, xd, mi_row + hbs, mi_col); + *(xd->mi[0]) = get_block_context(x, subsize)->mic; + duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, bsize); + } + break; + case PARTITION_SPLIT: + *get_sb_index(x, subsize) = 0; + fill_mode_info_sb(cm, x, mi_row, mi_col, subsize, + *(get_sb_partitioning(x, subsize))); + *get_sb_index(x, subsize) = 1; + fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize, + *(get_sb_partitioning(x, subsize))); + *get_sb_index(x, subsize) = 2; + fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize, + *(get_sb_partitioning(x, subsize))); + *get_sb_index(x, subsize) = 3; + fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize, + *(get_sb_partitioning(x, subsize))); + break; + default: + break; + } +} + +static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, + TOKENEXTRA **tp, int mi_row, + int mi_col, BLOCK_SIZE bsize, int *rate, + int64_t *dist, int do_recon, int64_t best_rd) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const int ms = num_8x8_blocks_wide_lookup[bsize] / 2; + TOKENEXTRA *tp_orig = *tp; + PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize); + int i; + BLOCK_SIZE subsize; + int this_rate, sum_rate = 0, best_rate = INT_MAX; + int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX; + int64_t sum_rd = 0; + int do_split = bsize >= BLOCK_8X8; + int do_rect = 1; + // Override skipping rectangular partition operations for edge blocks + const int force_horz_split = (mi_row + ms >= cm->mi_rows); + const int force_vert_split = (mi_col + ms >= cm->mi_cols); + const int xss = x->e_mbd.plane[1].subsampling_x; + const int yss = x->e_mbd.plane[1].subsampling_y; + + int partition_none_allowed = !force_horz_split && !force_vert_split; + int partition_horz_allowed = !force_vert_split && yss <= xss && + bsize >= BLOCK_8X8; + int partition_vert_allowed = !force_horz_split && xss <= yss && + bsize >= BLOCK_8X8; + (void) *tp_orig; + + if (bsize < BLOCK_8X8) { + // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0 + // there is nothing to be done. + if (x->ab_index != 0) { + *rate = 0; + *dist = 0; + return; + } + } + + assert(num_8x8_blocks_wide_lookup[bsize] == + num_8x8_blocks_high_lookup[bsize]); + + x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize); + + // Determine partition types in search according to the speed features. + // The threshold set here has to be of square block size. + if (cpi->sf.auto_min_max_partition_size) { + partition_none_allowed &= (bsize <= cpi->sf.max_partition_size && + bsize >= cpi->sf.min_partition_size); + partition_horz_allowed &= ((bsize <= cpi->sf.max_partition_size && + bsize > cpi->sf.min_partition_size) || + force_horz_split); + partition_vert_allowed &= ((bsize <= cpi->sf.max_partition_size && + bsize > cpi->sf.min_partition_size) || + force_vert_split); + do_split &= bsize > cpi->sf.min_partition_size; + } + if (cpi->sf.use_square_partition_only) { + partition_horz_allowed &= force_horz_split; + partition_vert_allowed &= force_vert_split; + } + + if (!x->in_active_map && (partition_horz_allowed || partition_vert_allowed)) + do_split = 0; + + // PARTITION_NONE + if (partition_none_allowed) { + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, + &this_rate, &this_dist, bsize); + ctx->mic.mbmi = xd->mi[0]->mbmi; + + if (this_rate != INT_MAX) { + int pl = partition_plane_context(xd, mi_row, mi_col, bsize); + this_rate += x->partition_cost[pl][PARTITION_NONE]; + sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist); + if (sum_rd < best_rd) { + int64_t stop_thresh = 4096; + int64_t stop_thresh_rd; + + best_rate = this_rate; + best_dist = this_dist; + best_rd = sum_rd; + if (bsize >= BLOCK_8X8) + *(get_sb_partitioning(x, bsize)) = bsize; + + // Adjust threshold according to partition size. + stop_thresh >>= 8 - (b_width_log2_lookup[bsize] + + b_height_log2_lookup[bsize]); + + stop_thresh_rd = RDCOST(x->rdmult, x->rddiv, 0, stop_thresh); + // If obtained distortion is very small, choose current partition + // and stop splitting. + if (!x->e_mbd.lossless && best_rd < stop_thresh_rd) { + do_split = 0; + do_rect = 0; + } + } + } + if (!x->in_active_map) { + do_split = 0; + do_rect = 0; + } + } + + // store estimated motion vector + store_pred_mv(x, ctx); + + // PARTITION_SPLIT + sum_rd = 0; + if (do_split) { + int pl = partition_plane_context(xd, mi_row, mi_col, bsize); + sum_rate += x->partition_cost[pl][PARTITION_SPLIT]; + subsize = get_subsize(bsize, PARTITION_SPLIT); + for (i = 0; i < 4 && sum_rd < best_rd; ++i) { + const int x_idx = (i & 1) * ms; + const int y_idx = (i >> 1) * ms; + + if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) + continue; + + *get_sb_index(x, subsize) = i; + load_pred_mv(x, ctx); + + nonrd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, + subsize, &this_rate, &this_dist, 0, + best_rd - sum_rd); + + if (this_rate == INT_MAX) { + sum_rd = INT64_MAX; + } else { + sum_rate += this_rate; + sum_dist += this_dist; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + } + } + + if (sum_rd < best_rd) { + best_rate = sum_rate; + best_dist = sum_dist; + best_rd = sum_rd; + *(get_sb_partitioning(x, bsize)) = subsize; + } else { + // skip rectangular partition test when larger block size + // gives better rd cost + if (cpi->sf.less_rectangular_check) + do_rect &= !partition_none_allowed; + } + } + + // PARTITION_HORZ + if (partition_horz_allowed && do_rect) { + subsize = get_subsize(bsize, PARTITION_HORZ); + *get_sb_index(x, subsize) = 0; + if (cpi->sf.adaptive_motion_search) + load_pred_mv(x, ctx); + + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, + &this_rate, &this_dist, subsize); + + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; + + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + + if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) { + *get_sb_index(x, subsize) = 1; + + load_pred_mv(x, ctx); + + nonrd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col, + &this_rate, &this_dist, subsize); + + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; + + if (this_rate == INT_MAX) { + sum_rd = INT64_MAX; + } else { + int pl = partition_plane_context(xd, mi_row, mi_col, bsize); + this_rate += x->partition_cost[pl][PARTITION_HORZ]; + sum_rate += this_rate; + sum_dist += this_dist; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + } + } + if (sum_rd < best_rd) { + best_rd = sum_rd; + best_rate = sum_rate; + best_dist = sum_dist; + *(get_sb_partitioning(x, bsize)) = subsize; + } + } + + // PARTITION_VERT + if (partition_vert_allowed && do_rect) { + subsize = get_subsize(bsize, PARTITION_VERT); + + *get_sb_index(x, subsize) = 0; + if (cpi->sf.adaptive_motion_search) + load_pred_mv(x, ctx); + + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, + &this_rate, &this_dist, subsize); + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) { + *get_sb_index(x, subsize) = 1; + + load_pred_mv(x, ctx); + + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms, + &this_rate, &this_dist, subsize); + + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; + + if (this_rate == INT_MAX) { + sum_rd = INT64_MAX; + } else { + int pl = partition_plane_context(xd, mi_row, mi_col, bsize); + this_rate += x->partition_cost[pl][PARTITION_VERT]; + sum_rate += this_rate; + sum_dist += this_dist; + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + } + } + if (sum_rd < best_rd) { + best_rate = sum_rate; + best_dist = sum_dist; + best_rd = sum_rd; + *(get_sb_partitioning(x, bsize)) = subsize; + } + } + + *rate = best_rate; + *dist = best_dist; + + if (best_rate == INT_MAX) + return; + + // update mode info array + fill_mode_info_sb(cm, x, mi_row, mi_col, bsize, + *(get_sb_partitioning(x, bsize))); + + if (best_rate < INT_MAX && best_dist < INT64_MAX && do_recon) { + int output_enabled = (bsize == BLOCK_64X64); + + // Check the projected output rate for this SB against it's target + // and and if necessary apply a Q delta using segmentation to get + // closer to the target. + if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) { + vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled, + best_rate); + } + + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh, + best_rate, best_dist); + + encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize); + } + + if (bsize == BLOCK_64X64) { + assert(tp_orig < *tp); + assert(best_rate < INT_MAX); + assert(best_dist < INT64_MAX); + } else { + assert(tp_orig == *tp); } - duplicate_modeinfo_in_sb(cm, xd, mi_row, mi_col, bsize); } static void nonrd_use_partition(VP9_COMP *cpi, @@ -2701,35 +3074,34 @@ static void nonrd_use_partition(VP9_COMP *cpi, int *totrate, int64_t *totdist) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; PARTITION_TYPE partition; BLOCK_SIZE subsize; - int rate; - int64_t dist; + int rate = INT_MAX; + int64_t dist = INT64_MAX; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - if (bsize >= BLOCK_8X8) { - subsize = mi_8x8[0]->mbmi.sb_type; - } else { - subsize = BLOCK_4X4; - } - + subsize = (bsize >= BLOCK_8X8) ? mi_8x8[0]->mbmi.sb_type : BLOCK_4X4; partition = partition_lookup[bsl][subsize]; switch (partition) { case PARTITION_NONE: nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize); + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; break; case PARTITION_VERT: *get_sb_index(x, subsize) = 0; nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize); + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; if (mi_col + hbs < cm->mi_cols) { *get_sb_index(x, subsize) = 1; nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + hbs, &rate, &dist, subsize); + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; if (rate != INT_MAX && dist != INT64_MAX && *totrate != INT_MAX && *totdist != INT64_MAX) { *totrate += rate; @@ -2740,10 +3112,12 @@ static void nonrd_use_partition(VP9_COMP *cpi, case PARTITION_HORZ: *get_sb_index(x, subsize) = 0; nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize); + get_block_context(x, subsize)->mic.mbmi = xd->mi[0]->mbmi; if (mi_row + hbs < cm->mi_rows) { *get_sb_index(x, subsize) = 1; nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col, &rate, &dist, subsize); + get_block_context(x, subsize)->mic.mbmi = mi_8x8[0]->mbmi; if (rate != INT_MAX && dist != INT64_MAX && *totrate != INT_MAX && *totdist != INT64_MAX) { *totrate += rate; @@ -2753,7 +3127,6 @@ static void nonrd_use_partition(VP9_COMP *cpi, break; case PARTITION_SPLIT: subsize = get_subsize(bsize, PARTITION_SPLIT); - *get_sb_index(x, subsize) = 0; nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, subsize, output_enabled, totrate, totdist); @@ -2790,10 +3163,9 @@ static void nonrd_use_partition(VP9_COMP *cpi, } if (bsize == BLOCK_64X64 && output_enabled) { - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { - cpi->cyclic_refresh.projected_rate_sb = *totrate; - cpi->cyclic_refresh.projected_dist_sb = *totdist; - } + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh, + *totrate, *totdist); encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, bsize); } } @@ -2801,132 +3173,102 @@ static void nonrd_use_partition(VP9_COMP *cpi, static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, TOKENEXTRA **tp) { VP9_COMMON *cm = &cpi->common; + MACROBLOCKD *xd = &cpi->mb.e_mbd; int mi_col; // Initialize the left context for the new SB row - vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context)); - vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context)); + vpx_memset(&xd->left_context, 0, sizeof(xd->left_context)); + vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); // Code each SB in the row for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) { - int dummy_rate; - int64_t dummy_dist; - const int idx_str = cm->mode_info_stride * mi_row + mi_col; + int dummy_rate = 0; + int64_t dummy_dist = 0; + const int idx_str = cm->mi_stride * mi_row + mi_col; MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; - - BLOCK_SIZE bsize = cpi->sf.partition_search_type == FIXED_PARTITION ? - cpi->sf.always_this_block_size : - get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col); + BLOCK_SIZE bsize; cpi->mb.source_variance = UINT_MAX; + vp9_zero(cpi->mb.pred_mv); // Set the partition type of the 64X64 block - if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) - choose_partitioning(cpi, tile, mi_row, mi_col); - else if (cpi->sf.partition_search_type == REFERENCE_PARTITION) { - if (cpi->sf.partition_check) { - MACROBLOCK *x = &cpi->mb; - int rate1, rate2, rate3; - int64_t dist1, dist2, dist3; - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, BLOCK_8X8); + switch (cpi->sf.partition_search_type) { + case VAR_BASED_PARTITION: + choose_partitioning(cpi, tile, mi_row, mi_col); nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, - 0, &rate1, &dist1); - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, BLOCK_16X16); + 1, &dummy_rate, &dummy_dist); + break; + case SOURCE_VAR_BASED_PARTITION: + set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); + set_source_var_based_partition(cpi, tile, mi_8x8, mi_row, mi_col); nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, - 0, &rate2, &dist2); - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, BLOCK_32X32); + 1, &dummy_rate, &dummy_dist); + break; + case VAR_BASED_FIXED_PARTITION: + case FIXED_PARTITION: + bsize = cpi->sf.partition_search_type == FIXED_PARTITION ? + cpi->sf.always_this_block_size : + get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col); + set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize); nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, - 0, &rate3, &dist3); - - if (RDCOST(x->rdmult, x->rddiv, rate1, dist1) < - RDCOST(x->rdmult, x->rddiv, rate2, dist2)) { - if (RDCOST(x->rdmult, x->rddiv, rate1, dist1) < - RDCOST(x->rdmult, x->rddiv, rate3, dist3)) - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, - BLOCK_8X8); - else - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, - BLOCK_32X32); + 1, &dummy_rate, &dummy_dist); + break; + case REFERENCE_PARTITION: + if (cpi->sf.partition_check || sb_has_motion(cm, prev_mi_8x8)) { + nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64, + &dummy_rate, &dummy_dist, 1, INT64_MAX); } else { - if (RDCOST(x->rdmult, x->rddiv, rate2, dist2) < - RDCOST(x->rdmult, x->rddiv, rate3, dist3)) - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, - BLOCK_16X16); - else - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, - BLOCK_32X32); - } - - } else { - if (!sb_has_motion(cm, prev_mi_8x8)) copy_partitioning(cm, mi_8x8, prev_mi_8x8); - else - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize); - } + nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, + BLOCK_64X64, 1, &dummy_rate, &dummy_dist); + } + break; + default: + assert(0); } - else - set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize); - - nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, 1, - &dummy_rate, &dummy_dist); } } // end RTC play code static void encode_frame_internal(VP9_COMP *cpi) { - int mi_row; + SPEED_FEATURES *const sf = &cpi->sf; MACROBLOCK *const x = &cpi->mb; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; -// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n", -// cpi->common.current_video_frame, cpi->common.show_frame, -// cm->frame_type); - - vp9_zero(cm->counts.switchable_interp); - vp9_zero(cpi->tx_stepdown_count); - - xd->mi_8x8 = cm->mi_grid_visible; - // required for vp9_frame_init_quantizer - xd->mi_8x8[0] = cm->mi; + xd->mi = cm->mi_grid_visible; + xd->mi[0] = cm->mi; - vp9_zero(cm->counts.mv); + vp9_zero(cm->counts); vp9_zero(cpi->coef_counts); - vp9_zero(cm->counts.eob_branch); + vp9_zero(cpi->tx_stepdown_count); + vp9_zero(cpi->rd_comp_pred_diff); + vp9_zero(cpi->rd_filter_diff); + vp9_zero(cpi->rd_tx_select_diff); + vp9_zero(cpi->rd_tx_select_threshes); - // Set frame level transform size use case cm->tx_mode = select_tx_mode(cpi); - cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 - && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; + cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && + cm->y_dc_delta_q == 0 && + cm->uv_dc_delta_q == 0 && + cm->uv_ac_delta_q == 0; switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless); vp9_frame_init_quantizer(cpi); vp9_initialize_rd_consts(cpi); vp9_initialize_me_consts(cpi, cm->base_qindex); - - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { - // Initialize encode frame context. - init_encode_frame_mb_context(cpi); - - // Build a frame level activity map - build_activity_map(cpi); - } - - // Re-initialize encode frame context. init_encode_frame_mb_context(cpi); - vp9_zero(cpi->rd_comp_pred_diff); - vp9_zero(cpi->rd_filter_diff); - vp9_zero(cpi->rd_tx_select_diff); - vp9_zero(cpi->rd_tx_select_threshes); + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) + build_activity_map(cpi); - set_prev_mi(cm); + cm->prev_mi = get_prev_mi(cm); - if (cpi->sf.use_nonrd_pick_mode) { + if (sf->use_nonrd_pick_mode) { // Initialize internal buffer pointers for rtc coding, where non-RD // mode decision is used and hence no buffer pointer swap needed. int i; @@ -2941,6 +3283,29 @@ static void encode_frame_internal(VP9_COMP *cpi) { p[i].eobs = ctx->eobs_pbuf[i][0]; } vp9_zero(x->zcoeff_blk); + + if (cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION && + cm->current_video_frame > 0) { + int check_freq = cpi->sf.search_type_check_frequency; + + if ((cm->current_video_frame - 1) % check_freq == 0) { + cpi->use_large_partition_rate = 0; + } + + if ((cm->current_video_frame - 1) % check_freq == 1) { + const int mbs_in_b32x32 = 1 << ((b_width_log2_lookup[BLOCK_32X32] - + b_width_log2_lookup[BLOCK_16X16]) + + (b_height_log2_lookup[BLOCK_32X32] - + b_height_log2_lookup[BLOCK_16X16])); + cpi->use_large_partition_rate = cpi->use_large_partition_rate * 100 * + mbs_in_b32x32 / cm->MBs; + } + + if ((cm->current_video_frame - 1) % check_freq >= 1) { + if (cpi->use_large_partition_rate < 15) + cpi->sf.partition_search_type = FIXED_PARTITION; + } + } } { @@ -2958,12 +3323,13 @@ static void encode_frame_internal(VP9_COMP *cpi) { for (tile_col = 0; tile_col < tile_cols; tile_col++) { TileInfo tile; TOKENEXTRA *tp_old = tp; + int mi_row; // For each row of SBs in the frame vp9_tile_init(&tile, cm, tile_row, tile_col); for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end; mi_row += MI_BLOCK_SIZE) { - if (cpi->sf.use_nonrd_pick_mode && cm->frame_type != KEY_FRAME) + if (sf->use_nonrd_pick_mode && cm->frame_type != KEY_FRAME) encode_nonrd_sb_row(cpi, &tile, mi_row, &tp); else encode_rd_sb_row(cpi, &tile, mi_row, &tp); @@ -2978,18 +3344,18 @@ static void encode_frame_internal(VP9_COMP *cpi) { cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer); } - if (cpi->sf.skip_encode_sb) { + if (sf->skip_encode_sb) { int j; unsigned int intra_count = 0, inter_count = 0; for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) { intra_count += cm->counts.intra_inter[j][0]; inter_count += cm->counts.intra_inter[j][1]; } - cpi->sf.skip_encode_frame = (intra_count << 2) < inter_count && - cm->frame_type != KEY_FRAME && - cm->show_frame; + sf->skip_encode_frame = (intra_count << 2) < inter_count && + cm->frame_type != KEY_FRAME && + cm->show_frame; } else { - cpi->sf.skip_encode_frame = 0; + sf->skip_encode_frame = 0; } #if 0 @@ -3023,33 +3389,31 @@ void vp9_encode_frame(VP9_COMP *cpi) { if (cpi->sf.frame_parameter_update) { int i; - REFERENCE_MODE reference_mode; - /* - * This code does a single RD pass over the whole frame assuming - * either compound, single or hybrid prediction as per whatever has - * worked best for that type of frame in the past. - * It also predicts whether another coding mode would have worked - * better that this coding mode. If that is the case, it remembers - * that for subsequent frames. - * It does the same analysis for transform size selection also. - */ + + // This code does a single RD pass over the whole frame assuming + // either compound, single or hybrid prediction as per whatever has + // worked best for that type of frame in the past. + // It also predicts whether another coding mode would have worked + // better that this coding mode. If that is the case, it remembers + // that for subsequent frames. + // It does the same analysis for transform size selection also. const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi); const int64_t *mode_thresh = cpi->rd_prediction_type_threshes[frame_type]; const int64_t *filter_thresh = cpi->rd_filter_threshes[frame_type]; /* prediction (compound, single or hybrid) mode selection */ - if (frame_type == 3 || !cm->allow_comp_inter_inter) - reference_mode = SINGLE_REFERENCE; + if (frame_type == ALTREF_FRAME || !cm->allow_comp_inter_inter) + cm->reference_mode = SINGLE_REFERENCE; else if (mode_thresh[COMPOUND_REFERENCE] > mode_thresh[SINGLE_REFERENCE] && mode_thresh[COMPOUND_REFERENCE] > mode_thresh[REFERENCE_MODE_SELECT] && check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100) - reference_mode = COMPOUND_REFERENCE; + cm->reference_mode = COMPOUND_REFERENCE; else if (mode_thresh[SINGLE_REFERENCE] > mode_thresh[REFERENCE_MODE_SELECT]) - reference_mode = SINGLE_REFERENCE; + cm->reference_mode = SINGLE_REFERENCE; else - reference_mode = REFERENCE_MODE_SELECT; + cm->reference_mode = REFERENCE_MODE_SELECT; if (cm->interp_filter == SWITCHABLE) { if (frame_type != ALTREF_FRAME && @@ -3065,9 +3429,6 @@ void vp9_encode_frame(VP9_COMP *cpi) { } } - cpi->mb.e_mbd.lossless = cpi->oxcf.lossless; - cm->reference_mode = reference_mode; - encode_frame_internal(cpi); for (i = 0; i < REFERENCE_MODES; ++i) { @@ -3146,10 +3507,8 @@ void vp9_encode_frame(VP9_COMP *cpi) { } } } else { - cpi->mb.e_mbd.lossless = cpi->oxcf.lossless; cm->reference_mode = SINGLE_REFERENCE; - // Force the usage of the BILINEAR interp_filter. - cm->interp_filter = BILINEAR; + cm->interp_filter = SWITCHABLE; encode_frame_internal(cpi); } } @@ -3214,19 +3573,20 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO **mi_8x8 = xd->mi_8x8; + MODE_INFO **mi_8x8 = xd->mi; MODE_INFO *mi = mi_8x8[0]; MB_MODE_INFO *mbmi = &mi->mbmi; PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize); unsigned int segment_id = mbmi->segment_id; - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int mi_height = num_8x8_blocks_high_lookup[bsize]; x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8 && - (cpi->oxcf.aq_mode != COMPLEXITY_AQ && - cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ) && - !cpi->sf.use_nonrd_pick_mode; + cpi->oxcf.aq_mode != COMPLEXITY_AQ && + cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ && + cpi->sf.allow_skip_recode; + x->skip_optimize = ctx->is_coded; ctx->is_coded = 1; x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct; @@ -3243,7 +3603,6 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, } } else { set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { // Adjust the zbin based on this MB rate. diff --git a/libvpx/vp9/encoder/vp9_encodeframe.h b/libvpx/vp9/encoder/vp9_encodeframe.h index 72343cd..131e932 100644 --- a/libvpx/vp9/encoder/vp9_encodeframe.h +++ b/libvpx/vp9/encoder/vp9_encodeframe.h @@ -20,6 +20,12 @@ struct macroblock; struct yv12_buffer_config; struct VP9_COMP; +typedef struct { + unsigned int sse; + int sum; + unsigned int var; +} diff; + void vp9_setup_src_planes(struct macroblock *x, const struct yv12_buffer_config *src, int mi_row, int mi_col); diff --git a/libvpx/vp9/encoder/vp9_encodemb.c b/libvpx/vp9/encoder/vp9_encodemb.c index fae03bf..5e98e4e 100644 --- a/libvpx/vp9/encoder/vp9_encodemb.c +++ b/libvpx/vp9/encoder/vp9_encodemb.c @@ -111,7 +111,7 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize, MACROBLOCKD *const xd = &mb->e_mbd; struct macroblock_plane *p = &mb->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; - const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi); + const int ref = is_inter_block(&xd->mi[0]->mbmi); vp9_token_state tokens[1025][2]; unsigned best_index[1025][2]; const int16_t *coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block); @@ -139,7 +139,7 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize, /* Now set up a Viterbi trellis to evaluate alternative roundings. */ rdmult = mb->rdmult * err_mult; - if (!is_inter_block(&mb->e_mbd.mi_8x8[0]->mbmi)) + if (!is_inter_block(&mb->e_mbd.mi[0]->mbmi)) rdmult = (rdmult * 9) >> 4; rddiv = mb->rddiv; /* Initialize the sentinel node of the trellis. */ @@ -452,7 +452,7 @@ void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) { void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { MACROBLOCKD *const xd = &x->e_mbd; struct optimize_ctx ctx; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct encode_b_args arg = {x, &ctx, &mbmi->skip}; int plane; @@ -477,7 +477,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, struct encode_b_args* const args = arg; MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; int16_t *coeff = BLOCK_OFFSET(p->coeff, block); @@ -562,7 +562,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, case TX_4X4: tx_type = get_tx_type_4x4(pd->plane_type, xd, block); scan_order = &vp9_scan_orders[TX_4X4][tx_type]; - mode = plane == 0 ? get_y_mode(xd->mi_8x8[0], block) : mbmi->uv_mode; + mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode; vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, x->skip_encode ? src : dst, x->skip_encode ? src_stride : dst_stride, @@ -608,14 +608,14 @@ void vp9_encode_block_intra(MACROBLOCK *x, int plane, int block, void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { const MACROBLOCKD *const xd = &x->e_mbd; - struct encode_b_args arg = {x, NULL, &xd->mi_8x8[0]->mbmi.skip}; + struct encode_b_args arg = {x, NULL, &xd->mi[0]->mbmi.skip}; vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block_intra, &arg); } int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred) { - MB_MODE_INFO * mbmi = &x->e_mbd.mi_8x8[0]->mbmi; + MB_MODE_INFO * mbmi = &x->e_mbd.mi[0]->mbmi; x->skip_encode = 0; mbmi->mode = DC_PRED; mbmi->ref_frame[0] = INTRA_FRAME; diff --git a/libvpx/vp9/encoder/vp9_encodemv.c b/libvpx/vp9/encoder/vp9_encodemv.c index 2a10bbf..9d44865 100644 --- a/libvpx/vp9/encoder/vp9_encodemv.c +++ b/libvpx/vp9/encoder/vp9_encodemv.c @@ -242,7 +242,7 @@ static void inc_mvs(const MB_MODE_INFO *mbmi, const int_mv mvs[2], } void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd) { - const MODE_INFO *mi = xd->mi_8x8[0]; + const MODE_INFO *mi = xd->mi[0]; const MB_MODE_INFO *const mbmi = &mi->mbmi; if (mbmi->sb_type < BLOCK_8X8) { diff --git a/libvpx/vp9/encoder/vp9_firstpass.c b/libvpx/vp9/encoder/vp9_firstpass.c index c4c219b..db32ef8 100644 --- a/libvpx/vp9/encoder/vp9_firstpass.c +++ b/libvpx/vp9/encoder/vp9_firstpass.c @@ -23,6 +23,7 @@ #include "vp9/common/vp9_reconinter.h" // vp9_setup_dst_planes() #include "vp9/common/vp9_systemdependent.h" +#include "vp9/encoder/vp9_aq_variance.h" #include "vp9/encoder/vp9_block.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encodemb.h" @@ -34,7 +35,6 @@ #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rdopt.h" -#include "vp9/encoder/vp9_vaq.h" #include "vp9/encoder/vp9_variance.h" #define OUTPUT_FPF 0 @@ -54,8 +54,6 @@ #define MIN_KF_BOOST 300 -#define DISABLE_RC_LONG_TERM_MEM 0 - #if CONFIG_MULTIPLE_ARF // Set MIN_GF_INTERVAL to 1 for the full decomposition. #define MIN_GF_INTERVAL 2 @@ -63,6 +61,8 @@ #define MIN_GF_INTERVAL 4 #endif +#define DISABLE_RC_LONG_TERM_MEM + static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) { YV12_BUFFER_CONFIG temp = *a; *a = *b; @@ -257,12 +257,22 @@ static void avg_stats(FIRSTPASS_STATS *section) { // harder frames. static double calculate_modified_err(const VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame) { - const struct twopass_rc *const twopass = &cpi->twopass; - const FIRSTPASS_STATS *const stats = &twopass->total_stats; - const double av_err = stats->ssim_weighted_pred_err / stats->count; - double modified_error = av_err * pow(this_frame->ssim_weighted_pred_err / - DOUBLE_DIVIDE_CHECK(av_err), - cpi->oxcf.two_pass_vbrbias / 100.0); + const struct twopass_rc *twopass = &cpi->twopass; + const SVC *const svc = &cpi->svc; + const FIRSTPASS_STATS *stats; + double av_err; + double modified_error; + + if (svc->number_spatial_layers > 1 && + svc->number_temporal_layers == 1) { + twopass = &svc->layer_context[svc->spatial_layer_id].twopass; + } + + stats = &twopass->total_stats; + av_err = stats->ssim_weighted_pred_err / stats->count; + modified_error = av_err * pow(this_frame->ssim_weighted_pred_err / + DOUBLE_DIVIDE_CHECK(av_err), + cpi->oxcf.two_pass_vbrbias / 100.0); return fclamp(modified_error, twopass->modified_error_min, twopass->modified_error_max); @@ -326,15 +336,13 @@ static double simple_weight(const YV12_BUFFER_CONFIG *buf) { } // This function returns the maximum target rate per frame. -static int frame_max_bits(const VP9_COMP *cpi) { - int64_t max_bits = - ((int64_t)cpi->rc.av_per_frame_bandwidth * - (int64_t)cpi->oxcf.two_pass_vbrmax_section) / 100; - +static int frame_max_bits(const RATE_CONTROL *rc, const VP9_CONFIG *oxcf) { + int64_t max_bits = ((int64_t)rc->av_per_frame_bandwidth * + (int64_t)oxcf->two_pass_vbrmax_section) / 100; if (max_bits < 0) max_bits = 0; - else if (max_bits > cpi->rc.max_frame_bandwidth) - max_bits = cpi->rc.max_frame_bandwidth; + else if (max_bits > rc->max_frame_bandwidth) + max_bits = rc->max_frame_bandwidth; return (int)max_bits; } @@ -375,7 +383,7 @@ static unsigned int zz_motion_search(const MACROBLOCK *x) { const uint8_t *const ref = xd->plane[0].pre[0].buf; const int ref_stride = xd->plane[0].pre[0].stride; unsigned int sse; - vp9_variance_fn_t fn = get_block_variance_fn(xd->mi_8x8[0]->mbmi.sb_type); + vp9_variance_fn_t fn = get_block_variance_fn(xd->mi[0]->mbmi.sb_type); fn(src, src_stride, ref, ref_stride, &sse); return sse; } @@ -389,7 +397,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int num00, tmp_err, n, sr = 0; int step_param = 3; int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; - const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type; + const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize]; int new_mv_mode_penalty = 256; const int quart_frm = MIN(cpi->common.width, cpi->common.height); @@ -533,8 +541,8 @@ void vp9_first_pass(VP9_COMP *cpi) { vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL); vp9_setup_dst_planes(xd, new_yv12, 0, 0); - xd->mi_8x8 = cm->mi_grid_visible; - xd->mi_8x8[0] = cm->mi; + xd->mi = cm->mi_grid_visible; + xd->mi[0] = cm->mi; vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); @@ -582,8 +590,8 @@ void vp9_first_pass(VP9_COMP *cpi) { xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset; xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset; xd->left_available = (mb_col != 0); - xd->mi_8x8[0]->mbmi.sb_type = bsize; - xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME; + xd->mi[0]->mbmi.sb_type = bsize; + xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME; set_mi_row_col(xd, &tile, mb_row << 1, num_8x8_blocks_high_lookup[bsize], mb_col << 1, num_8x8_blocks_wide_lookup[bsize], @@ -702,11 +710,11 @@ void vp9_first_pass(VP9_COMP *cpi) { mv.as_mv.row *= 8; mv.as_mv.col *= 8; this_error = motion_error; - xd->mi_8x8[0]->mbmi.mode = NEWMV; - xd->mi_8x8[0]->mbmi.mv[0] = mv; - xd->mi_8x8[0]->mbmi.tx_size = TX_4X4; - xd->mi_8x8[0]->mbmi.ref_frame[0] = LAST_FRAME; - xd->mi_8x8[0]->mbmi.ref_frame[1] = NONE; + xd->mi[0]->mbmi.mode = NEWMV; + xd->mi[0]->mbmi.mv[0] = mv; + xd->mi[0]->mbmi.tx_size = TX_4X4; + xd->mi[0]->mbmi.ref_frame[0] = LAST_FRAME; + xd->mi[0]->mbmi.ref_frame[1] = NONE; vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, bsize); vp9_encode_sby_pass1(x, bsize); sum_mvr += mv.as_mv.row; @@ -902,21 +910,21 @@ int vp9_twopass_worst_quality(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, const double section_err = fpstats->coded_error / fpstats->count; const double err_per_mb = section_err / num_mbs; + const double speed_term = 1.0 + ((double)cpi->speed * 0.04); if (section_target_bandwitdh <= 0) return rc->worst_quality; // Highest value allowed - target_norm_bits_per_mb = section_target_bandwitdh < (1 << 20) - ? (512 * section_target_bandwitdh) / num_mbs - : 512 * (section_target_bandwitdh / num_mbs); + target_norm_bits_per_mb = + ((uint64_t)section_target_bandwitdh << BPER_MB_NORMBITS) / num_mbs; // Try and pick a max Q that will be high enough to encode the // content at the given rate. for (q = rc->best_quality; q < rc->worst_quality; ++q) { const double err_correction_factor = calc_correction_factor(err_per_mb, ERR_DIVISOR, 0.5, 0.90, q); - const int bits_per_mb_at_this_q = vp9_rc_bits_per_mb(INTER_FRAME, q, - err_correction_factor); + const int bits_per_mb_at_this_q = + vp9_rc_bits_per_mb(INTER_FRAME, q, (err_correction_factor * speed_term)); if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) break; } @@ -931,10 +939,18 @@ int vp9_twopass_worst_quality(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, extern void vp9_new_framerate(VP9_COMP *cpi, double framerate); void vp9_init_second_pass(VP9_COMP *cpi) { + SVC *const svc = &cpi->svc; FIRSTPASS_STATS this_frame; const FIRSTPASS_STATS *start_pos; - struct twopass_rc *const twopass = &cpi->twopass; + struct twopass_rc *twopass = &cpi->twopass; const VP9_CONFIG *const oxcf = &cpi->oxcf; + const int is_spatial_svc = (svc->number_spatial_layers > 1) && + (svc->number_temporal_layers == 1); + double frame_rate; + + if (is_spatial_svc) { + twopass = &svc->layer_context[svc->spatial_layer_id].twopass; + } zero_stats(&twopass->total_stats); zero_stats(&twopass->total_left_stats); @@ -945,30 +961,44 @@ void vp9_init_second_pass(VP9_COMP *cpi) { twopass->total_stats = *twopass->stats_in_end; twopass->total_left_stats = twopass->total_stats; + frame_rate = 10000000.0 * twopass->total_stats.count / + twopass->total_stats.duration; // Each frame can have a different duration, as the frame rate in the source // isn't guaranteed to be constant. The frame rate prior to the first frame // encoded in the second pass is a guess. However, the sum duration is not. // It is calculated based on the actual durations of all frames from the // first pass. - vp9_new_framerate(cpi, 10000000.0 * twopass->total_stats.count / - twopass->total_stats.duration); + + if (is_spatial_svc) { + vp9_update_spatial_layer_framerate(cpi, frame_rate); + twopass->bits_left = + (int64_t)(twopass->total_stats.duration * + svc->layer_context[svc->spatial_layer_id].target_bandwidth / + 10000000.0); + } else { + vp9_new_framerate(cpi, frame_rate); + twopass->bits_left = (int64_t)(twopass->total_stats.duration * + oxcf->target_bandwidth / 10000000.0); + } cpi->output_framerate = oxcf->framerate; - twopass->bits_left = (int64_t)(twopass->total_stats.duration * - oxcf->target_bandwidth / 10000000.0); // Calculate a minimum intra value to be used in determining the IIratio // scores used in the second pass. We have this minimum to make sure // that clips that are static but "low complexity" in the intra domain // are still boosted appropriately for KF/GF/ARF. - twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; - twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; + if (!is_spatial_svc) { + // We don't know the number of MBs for each layer at this point. + // So we will do it later. + twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; + twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; + } // This variable monitors how far behind the second ref update is lagging. twopass->sr_update_lag = 1; - // Scan the first pass file and calculate an average Intra / Inter error score - // ratio for the sequence. + // Scan the first pass file and calculate an average Intra / Inter error + // score ratio for the sequence. { double sum_iiratio = 0.0; start_pos = twopass->stats_in; @@ -1027,8 +1057,8 @@ static double get_prediction_decay_rate(const VP9_COMMON *cm, // Function to test for a condition where a complex transition is followed // by a static section. For example in slide shows where there is a fade // between slides. This is to help with more optimal kf and gf positioning. -static int detect_transition_to_still(VP9_COMP *cpi, int frame_interval, - int still_interval, +static int detect_transition_to_still(struct twopass_rc *twopass, + int frame_interval, int still_interval, double loop_decay_rate, double last_decay_rate) { int trans_to_still = 0; @@ -1040,19 +1070,19 @@ static int detect_transition_to_still(VP9_COMP *cpi, int frame_interval, loop_decay_rate >= 0.999 && last_decay_rate < 0.9) { int j; - const FIRSTPASS_STATS *position = cpi->twopass.stats_in; + const FIRSTPASS_STATS *position = twopass->stats_in; FIRSTPASS_STATS tmp_next_frame; // Look ahead a few frames to see if static condition persists... for (j = 0; j < still_interval; ++j) { - if (EOF == input_stats(&cpi->twopass, &tmp_next_frame)) + if (EOF == input_stats(twopass, &tmp_next_frame)) break; if (tmp_next_frame.pcnt_inter - tmp_next_frame.pcnt_motion < 0.999) break; } - reset_fpf_position(&cpi->twopass, position); + reset_fpf_position(twopass, position); // Only if it does do we signal a transition to still. if (j == still_interval) @@ -1374,9 +1404,11 @@ void define_fixed_arf_period(VP9_COMP *cpi) { // Analyse and define a gf/arf group. static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { + RATE_CONTROL *const rc = &cpi->rc; + VP9_CONFIG *const oxcf = &cpi->oxcf; + struct twopass_rc *const twopass = &cpi->twopass; FIRSTPASS_STATS next_frame = { 0 }; const FIRSTPASS_STATS *start_pos; - struct twopass_rc *const twopass = &cpi->twopass; int i; double boost_score = 0.0; double old_boost_score = 0.0; @@ -1395,16 +1427,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { double mv_in_out_accumulator = 0.0; double abs_mv_in_out_accumulator = 0.0; double mv_ratio_accumulator_thresh; - const int max_bits = frame_max_bits(cpi); // Max bits for a single frame. - - unsigned int allow_alt_ref = cpi->oxcf.play_alternate && - cpi->oxcf.lag_in_frames; + // Max bits for a single frame. + const int max_bits = frame_max_bits(rc, oxcf); + unsigned int allow_alt_ref = oxcf->play_alternate && oxcf->lag_in_frames; int f_boost = 0; int b_boost = 0; int flash_detected; int active_max_gf_interval; - RATE_CONTROL *const rc = &cpi->rc; twopass->gf_group_bits = 0; @@ -1476,7 +1506,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Break clause to detect very still sections after motion. For example, // a static image after a fade or other transition. - if (detect_transition_to_still(cpi, i, 5, loop_decay_rate, + if (detect_transition_to_still(twopass, i, 5, loop_decay_rate, last_loop_decay_rate)) { allow_alt_ref = 0; break; @@ -1615,8 +1645,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Calculate the bits to be allocated to the group as a whole. if (twopass->kf_group_bits > 0 && twopass->kf_group_error_left > 0) { - twopass->gf_group_bits = (int64_t)(cpi->twopass.kf_group_bits * - (gf_group_err / cpi->twopass.kf_group_error_left)); + twopass->gf_group_bits = (int64_t)(twopass->kf_group_bits * + (gf_group_err / twopass->kf_group_error_left)); } else { twopass->gf_group_bits = 0; } @@ -1705,10 +1735,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { { // Adjust KF group bits and error remaining. twopass->kf_group_error_left -= (int64_t)gf_group_err; - twopass->kf_group_bits -= twopass->gf_group_bits; - - if (twopass->kf_group_bits < 0) - twopass->kf_group_bits = 0; // If this is an arf update we want to remove the score for the overlay // frame at the end which will usually be very cheap to code. @@ -1725,11 +1751,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { twopass->gf_group_error_left = (int64_t)gf_group_err; } - twopass->gf_group_bits -= twopass->gf_bits; - - if (twopass->gf_group_bits < 0) - twopass->gf_group_bits = 0; - // This condition could fail if there are two kfs very close together // despite MIN_GF_INTERVAL and would cause a divide by 0 in the // calculation of alt_extra_bits. @@ -1738,8 +1759,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (boost >= 150) { const int pct_extra = MIN(20, (boost - 100) / 50); - const int alt_extra_bits = (int)((twopass->gf_group_bits * pct_extra) / - 100); + const int alt_extra_bits = (int)(( + MAX(twopass->gf_group_bits - twopass->gf_bits, 0) * + pct_extra) / 100); twopass->gf_group_bits -= alt_extra_bits; } } @@ -1768,40 +1790,36 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Allocate bits to a normal frame that is neither a gf an arf or a key frame. static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { + struct twopass_rc *twopass = &cpi->twopass; + // For a single frame. + const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf); + // Calculate modified prediction error used in bit allocation. + const double modified_err = calculate_modified_err(cpi, this_frame); int target_frame_size; - double modified_err; double err_fraction; - const int max_bits = frame_max_bits(cpi); // Max for a single frame. - - // Calculate modified prediction error used in bit allocation. - modified_err = calculate_modified_err(cpi, this_frame); - if (cpi->twopass.gf_group_error_left > 0) + if (twopass->gf_group_error_left > 0) // What portion of the remaining GF group error is used by this frame. - err_fraction = modified_err / cpi->twopass.gf_group_error_left; + err_fraction = modified_err / twopass->gf_group_error_left; else err_fraction = 0.0; // How many of those bits available for allocation should we give it? - target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction); + target_frame_size = (int)((double)twopass->gf_group_bits * err_fraction); // Clip target size to 0 - max_bits (or cpi->twopass.gf_group_bits) at // the top end. target_frame_size = clamp(target_frame_size, 0, - MIN(max_bits, (int)cpi->twopass.gf_group_bits)); + MIN(max_bits, (int)twopass->gf_group_bits)); // Adjust error and bits remaining. - cpi->twopass.gf_group_error_left -= (int64_t)modified_err; - cpi->twopass.gf_group_bits -= target_frame_size; - - if (cpi->twopass.gf_group_bits < 0) - cpi->twopass.gf_group_bits = 0; + twopass->gf_group_error_left -= (int64_t)modified_err; // Per frame bit target for this frame. vp9_rc_set_frame_target(cpi, target_frame_size); } -static int test_candidate_kf(VP9_COMP *cpi, +static int test_candidate_kf(struct twopass_rc *twopass, const FIRSTPASS_STATS *last_frame, const FIRSTPASS_STATS *this_frame, const FIRSTPASS_STATS *next_frame) { @@ -1822,7 +1840,7 @@ static int test_candidate_kf(VP9_COMP *cpi, ((next_frame->intra_error / DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) > 3.5))))) { int i; - const FIRSTPASS_STATS *start_pos = cpi->twopass.stats_in; + const FIRSTPASS_STATS *start_pos = twopass->stats_in; FIRSTPASS_STATS local_next_frame = *next_frame; double boost_score = 0.0; double old_boost_score = 0.0; @@ -1859,7 +1877,7 @@ static int test_candidate_kf(VP9_COMP *cpi, old_boost_score = boost_score; // Get the next frame details - if (EOF == input_stats(&cpi->twopass, &local_next_frame)) + if (EOF == input_stats(twopass, &local_next_frame)) break; } @@ -1869,7 +1887,7 @@ static int test_candidate_kf(VP9_COMP *cpi, is_viable_kf = 1; } else { // Reset the file position - reset_fpf_position(&cpi->twopass, start_pos); + reset_fpf_position(twopass, start_pos); is_viable_kf = 0; } @@ -1882,16 +1900,13 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { int i, j; RATE_CONTROL *const rc = &cpi->rc; struct twopass_rc *const twopass = &cpi->twopass; - FIRSTPASS_STATS last_frame; const FIRSTPASS_STATS first_frame = *this_frame; - FIRSTPASS_STATS next_frame; const FIRSTPASS_STATS *start_position = twopass->stats_in; - + FIRSTPASS_STATS next_frame; + FIRSTPASS_STATS last_frame; double decay_accumulator = 1.0; double zero_motion_accumulator = 1.0; - double boost_score = 0; - double loop_decay_rate; - + double boost_score = 0.0; double kf_mod_err = 0.0; double kf_group_err = 0.0; double recent_loop_decay[8] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; @@ -1929,8 +1944,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Provided that we are not at the end of the file... if (cpi->oxcf.auto_key && lookup_next_frame_stats(twopass, &next_frame) != EOF) { + double loop_decay_rate; + // Check for a scene cut. - if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame)) + if (test_candidate_kf(twopass, &last_frame, this_frame, &next_frame)) break; // How fast is the prediction quality decaying? @@ -1946,7 +1963,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Special check for transition or high motion followed by a // static scene. - if (detect_transition_to_still(cpi, i, cpi->key_frame_frequency - i, + if (detect_transition_to_still(twopass, i, cpi->key_frame_frequency - i, loop_decay_rate, decay_accumulator)) break; @@ -1999,7 +2016,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Calculate the number of bits that should be assigned to the kf group. if (twopass->bits_left > 0 && twopass->modified_error_left > 0.0) { // Maximum number of bits for a single normal frame (not key frame). - const int max_bits = frame_max_bits(cpi); + const int max_bits = frame_max_bits(rc, &cpi->oxcf); // Maximum number of bits allocated to the key frame group. int64_t max_grp_bits; @@ -2051,10 +2068,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // How fast is prediction quality decaying. if (!detect_flash(twopass, 0)) { - loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame); + const double loop_decay_rate = get_prediction_decay_rate(&cpi->common, + &next_frame); decay_accumulator *= loop_decay_rate; - decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR - ? MIN_DECAY_FACTOR : decay_accumulator; + decay_accumulator = MAX(decay_accumulator, MIN_DECAY_FACTOR); } boost_score += (decay_accumulator * r); @@ -2085,7 +2102,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (1) { int kf_boost = (int)boost_score; int allocation_chunks; - int alt_kf_bits; if (kf_boost < (rc->frames_to_key * 3)) kf_boost = (rc->frames_to_key * 3); @@ -2119,14 +2135,12 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Prevent overflow. if (kf_boost > 1028) { - int divisor = kf_boost >> 10; + const int divisor = kf_boost >> 10; kf_boost /= divisor; allocation_chunks /= divisor; } - twopass->kf_group_bits = (twopass->kf_group_bits < 0) ? 0 - : twopass->kf_group_bits; - + twopass->kf_group_bits = MAX(0, twopass->kf_group_bits); // Calculate the number of bits to be spent on the key frame. twopass->kf_bits = (int)((double)kf_boost * ((double)twopass->kf_group_bits / allocation_chunks)); @@ -2136,11 +2150,11 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // then use an alternate calculation based on the kf error score // which should give a smaller key frame. if (kf_mod_err < kf_group_err / rc->frames_to_key) { - double alt_kf_grp_bits = ((double)twopass->bits_left * + double alt_kf_grp_bits = ((double)twopass->bits_left * (kf_mod_err * (double)rc->frames_to_key) / DOUBLE_DIVIDE_CHECK(twopass->modified_error_left)); - alt_kf_bits = (int)((double)kf_boost * + const int alt_kf_bits = (int)((double)kf_boost * (alt_kf_grp_bits / (double)allocation_chunks)); if (twopass->kf_bits > alt_kf_bits) @@ -2149,12 +2163,11 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Else if it is much harder than other frames in the group make sure // it at least receives an allocation in keeping with its relative // error score. - alt_kf_bits = (int)((double)twopass->bits_left * (kf_mod_err / + const int alt_kf_bits = (int)((double)twopass->bits_left * (kf_mod_err / DOUBLE_DIVIDE_CHECK(twopass->modified_error_left))); - if (alt_kf_bits > twopass->kf_bits) { + if (alt_kf_bits > twopass->kf_bits) twopass->kf_bits = alt_kf_bits; - } } twopass->kf_group_bits -= twopass->kf_bits; // Per frame bit target for this frame. @@ -2187,14 +2200,24 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; struct twopass_rc *const twopass = &cpi->twopass; - const int frames_left = (int)(twopass->total_stats.count - - cm->current_video_frame); + int frames_left; FIRSTPASS_STATS this_frame; FIRSTPASS_STATS this_frame_copy; double this_frame_intra_error; double this_frame_coded_error; int target; + LAYER_CONTEXT *lc = NULL; + int is_spatial_svc = (cpi->use_svc && cpi->svc.number_temporal_layers == 1); + + if (is_spatial_svc) { + lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; + frames_left = (int)(twopass->total_stats.count - + lc->current_video_frame_in_layer); + } else { + frames_left = (int)(twopass->total_stats.count - + cm->current_video_frame); + } if (!twopass->stats_in) return; @@ -2207,9 +2230,15 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { vp9_clear_system_state(); + if (is_spatial_svc && twopass->kf_intra_err_min == 0) { + twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; + twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; + } + if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) { twopass->active_worst_quality = cpi->oxcf.cq_level; - } else if (cm->current_video_frame == 0) { + } else if (cm->current_video_frame == 0 || + (is_spatial_svc && lc->current_video_frame_in_layer == 0)) { // Special case code for first frame. const int section_target_bandwidth = (int)(twopass->bits_left / frames_left); @@ -2232,6 +2261,11 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { // Define next KF group and assign bits to it. this_frame_copy = this_frame; find_next_key_frame(cpi, &this_frame_copy); + // Don't place key frame in any enhancement layers in spatial svc + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1 && + cpi->svc.spatial_layer_id > 0) { + cm->frame_type = INTER_FRAME; + } } else { cm->frame_type = INTER_FRAME; } @@ -2291,23 +2325,24 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { subtract_stats(&twopass->total_left_stats, &this_frame); } -void vp9_twopass_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { +void vp9_twopass_postencode_update(VP9_COMP *cpi) { #ifdef DISABLE_RC_LONG_TERM_MEM - cpi->twopass.bits_left -= cpi->rc.this_frame_target; + const uint64_t bits_used = cpi->rc.this_frame_target; #else - cpi->twopass.bits_left -= 8 * bytes_used; + const uint64_t bits_used = cpi->rc.projected_frame_size; +#endif + cpi->twopass.bits_left -= bits_used; + cpi->twopass.bits_left = MAX(cpi->twopass.bits_left, 0); // Update bits left to the kf and gf groups to account for overshoot or // undershoot on these frames. - if (cm->frame_type == KEY_FRAME) { - cpi->twopass.kf_group_bits += cpi->rc.this_frame_target - - cpi->rc.projected_frame_size; - - cpi->twopass.kf_group_bits = MAX(cpi->twopass.kf_group_bits, 0); - } else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) { - cpi->twopass.gf_group_bits += cpi->rc.this_frame_target - - cpi->rc.projected_frame_size; - + if (cpi->common.frame_type == KEY_FRAME) { + // For key frames kf_group_bits already had the target bits subtracted out. + // So now update to the correct value based on the actual bits used. + cpi->twopass.kf_group_bits += cpi->rc.this_frame_target - bits_used; + } else { + cpi->twopass.kf_group_bits -= bits_used; + cpi->twopass.gf_group_bits -= bits_used; cpi->twopass.gf_group_bits = MAX(cpi->twopass.gf_group_bits, 0); } -#endif + cpi->twopass.kf_group_bits = MAX(cpi->twopass.kf_group_bits, 0); } diff --git a/libvpx/vp9/encoder/vp9_firstpass.h b/libvpx/vp9/encoder/vp9_firstpass.h index bf7b5a1..7a16c8f 100644 --- a/libvpx/vp9/encoder/vp9_firstpass.h +++ b/libvpx/vp9/encoder/vp9_firstpass.h @@ -35,7 +35,7 @@ typedef struct { double new_mv_count; double duration; double count; - int spatial_layer_id; + int64_t spatial_layer_id; } FIRSTPASS_STATS; struct twopass_rc { @@ -95,8 +95,7 @@ int vp9_twopass_worst_quality(struct VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, int section_target_bandwitdh); // Post encode update of the rate control parameters for 2-pass -void vp9_twopass_postencode_update(struct VP9_COMP *cpi, - uint64_t bytes_used); +void vp9_twopass_postencode_update(struct VP9_COMP *cpi); #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp9/encoder/vp9_lookahead.c b/libvpx/vp9/encoder/vp9_lookahead.c index a88d5ec..cf03e01 100644 --- a/libvpx/vp9/encoder/vp9_lookahead.c +++ b/libvpx/vp9/encoder/vp9_lookahead.c @@ -28,8 +28,8 @@ struct lookahead_ctx { /* Return the buffer at the given absolute index and increment the index */ -static struct lookahead_entry * pop(struct lookahead_ctx *ctx, - unsigned int *idx) { +static struct lookahead_entry *pop(struct lookahead_ctx *ctx, + unsigned int *idx) { unsigned int index = *idx; struct lookahead_entry *buf = ctx->buf + index; @@ -55,16 +55,19 @@ void vp9_lookahead_destroy(struct lookahead_ctx *ctx) { } -struct lookahead_ctx * vp9_lookahead_init(unsigned int width, - unsigned int height, - unsigned int subsampling_x, - unsigned int subsampling_y, - unsigned int depth) { +struct lookahead_ctx *vp9_lookahead_init(unsigned int width, + unsigned int height, + unsigned int subsampling_x, + unsigned int subsampling_y, + unsigned int depth) { struct lookahead_ctx *ctx = NULL; // Clamp the lookahead queue depth depth = clamp(depth, 1, MAX_LAG_BUFFERS); + // Allocate memory to keep previous source frames available. + depth += MAX_PRE_FRAMES; + // Allocate the lookahead structures ctx = calloc(1, sizeof(*ctx)); if (ctx) { @@ -96,7 +99,7 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, int mb_cols = (src->y_width + 15) >> 4; #endif - if (ctx->sz + 1 > ctx->max_sz) + if (ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz) return 1; ctx->sz++; buf = pop(ctx, &ctx->write_idx); @@ -159,11 +162,11 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, } -struct lookahead_entry * vp9_lookahead_pop(struct lookahead_ctx *ctx, - int drain) { +struct lookahead_entry *vp9_lookahead_pop(struct lookahead_ctx *ctx, + int drain) { struct lookahead_entry *buf = NULL; - if (ctx->sz && (drain || ctx->sz == ctx->max_sz)) { + if (ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) { buf = pop(ctx, &ctx->read_idx); ctx->sz--; } @@ -171,16 +174,28 @@ struct lookahead_entry * vp9_lookahead_pop(struct lookahead_ctx *ctx, } -struct lookahead_entry * vp9_lookahead_peek(struct lookahead_ctx *ctx, - int index) { +struct lookahead_entry *vp9_lookahead_peek(struct lookahead_ctx *ctx, + int index) { struct lookahead_entry *buf = NULL; - if (index < (int)ctx->sz) { - index += ctx->read_idx; - if (index >= (int)ctx->max_sz) - index -= ctx->max_sz; - buf = ctx->buf + index; + if (index >= 0) { + // Forward peek + if (index < (int)ctx->sz) { + index += ctx->read_idx; + if (index >= (int)ctx->max_sz) + index -= ctx->max_sz; + buf = ctx->buf + index; + } + } else if (index < 0) { + // Backward peek + if (-index <= MAX_PRE_FRAMES) { + index += ctx->read_idx; + if (index < 0) + index += ctx->max_sz; + buf = ctx->buf + index; + } } + return buf; } diff --git a/libvpx/vp9/encoder/vp9_lookahead.h b/libvpx/vp9/encoder/vp9_lookahead.h index ff63c0d..046c533 100644 --- a/libvpx/vp9/encoder/vp9_lookahead.h +++ b/libvpx/vp9/encoder/vp9_lookahead.h @@ -20,6 +20,9 @@ extern "C" { #define MAX_LAG_BUFFERS 25 +// The max of past frames we want to keep in the queue. +#define MAX_PRE_FRAMES 1 + struct lookahead_entry { YV12_BUFFER_CONFIG img; int64_t ts_start; diff --git a/libvpx/vp9/encoder/vp9_mbgraph.c b/libvpx/vp9/encoder/vp9_mbgraph.c index 6520389..44b171f 100644 --- a/libvpx/vp9/encoder/vp9_mbgraph.c +++ b/libvpx/vp9/encoder/vp9_mbgraph.c @@ -61,8 +61,8 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, &sse); } - xd->mi_8x8[0]->mbmi.mode = NEWMV; - xd->mi_8x8[0]->mbmi.mv[0].as_mv = *dst_mv; + xd->mi[0]->mbmi.mode = NEWMV; + xd->mi[0]->mbmi.mv[0].as_mv = *dst_mv; vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16); @@ -145,7 +145,7 @@ static int find_best_16x16_intra(VP9_COMP *cpi, for (mode = DC_PRED; mode <= TM_PRED; mode++) { unsigned int err; - xd->mi_8x8[0]->mbmi.mode = mode; + xd->mi[0]->mbmi.mode = mode; vp9_predict_intra_block(xd, 0, 2, TX_16X16, mode, x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].dst.buf, xd->plane[0].dst.stride, @@ -252,7 +252,7 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi, xd->plane[0].dst.stride = buf->y_stride; xd->plane[0].pre[0].stride = buf->y_stride; xd->plane[1].dst.stride = buf->uv_stride; - xd->mi_8x8[0] = &mi_local; + xd->mi[0] = &mi_local; mi_local.mbmi.sb_type = BLOCK_16X16; mi_local.mbmi.ref_frame[0] = LAST_FRAME; mi_local.mbmi.ref_frame[1] = NONE; @@ -370,7 +370,6 @@ static void separate_arf_mbs(VP9_COMP *cpi) { else cpi->static_mb_pct = 0; - cpi->seg0_cnt = ncnt[0]; vp9_enable_segmentation(&cm->seg); } else { cpi->static_mb_pct = 0; diff --git a/libvpx/vp9/encoder/vp9_mcomp.c b/libvpx/vp9/encoder/vp9_mcomp.c index 2ae8a2a..f7a02a4 100644 --- a/libvpx/vp9/encoder/vp9_mcomp.c +++ b/libvpx/vp9/encoder/vp9_mcomp.c @@ -23,6 +23,11 @@ // #define NEW_DIAMOND_SEARCH +static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf, + const MV *mv) { + return &buf->buf[mv->row * buf->stride + mv->col]; +} + void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) { int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); @@ -370,9 +375,9 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x, unsigned int sse; unsigned int whichdir; int thismse; - unsigned int halfiters = iters_per_step; - unsigned int quarteriters = iters_per_step; - unsigned int eighthiters = iters_per_step; + const unsigned int halfiters = iters_per_step; + const unsigned int quarteriters = iters_per_step; + const unsigned int eighthiters = iters_per_step; DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); const int y_stride = xd->plane[0].pre[0].stride; @@ -399,7 +404,7 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x, // calculate central point error // TODO(yunqingwang): central pointer error was already calculated in full- // pixel search, and can be passed in this function. - comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); + vp9_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); @@ -495,8 +500,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, - int do_init_search, - int do_refine, + int do_init_search, int do_refine, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv, @@ -508,20 +512,15 @@ static int vp9_pattern_search(const MACROBLOCK *x, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, }; int i, j, s, t; - const uint8_t *what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const int in_what_stride = xd->plane[0].pre[0].stride; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; int br, bc; - MV this_mv; int bestsad = INT_MAX; int thissad; - const uint8_t *base_offset; - const uint8_t *this_offset; int k = -1; - int best_site = -1; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; int best_init_s = search_param_to_steps[search_param]; - const int *mvjsadcost = x->nmvjointsadcost; + const int *const mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; // adjust ref_mv to make sure it is within MV range @@ -530,13 +529,10 @@ static int vp9_pattern_search(const MACROBLOCK *x, bc = ref_mv->col; // Work out the start point for the search - base_offset = xd->plane[0].pre[0].buf; - this_offset = base_offset + (br * in_what_stride) + bc; - this_mv.row = br; - this_mv.col = bc; - bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff) - + mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + bestsad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), in_what->stride, + 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, + mvjsadcost, mvsadcost, sad_per_bit); // Search all possible scales upto the search param around the center point // pick the scale of the point that is best as the starting scale of @@ -545,27 +541,25 @@ static int vp9_pattern_search(const MACROBLOCK *x, s = best_init_s; best_init_s = -1; for (t = 0; t <= s; ++t) { - best_site = -1; + int best_site = -1; if (check_bounds(x, br, bc, 1 << t)) { for (i = 0; i < num_candidates[t]; i++) { - this_mv.row = br + candidates[t][i].row; - this_mv.col = bc + candidates[t][i].col; - this_offset = base_offset + (this_mv.row * in_what_stride) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + const MV this_mv = {br + candidates[t][i].row, + bc + candidates[t][i].col}; + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } else { for (i = 0; i < num_candidates[t]; i++) { - this_mv.row = br + candidates[t][i].row; - this_mv.col = bc + candidates[t][i].col; + const MV this_mv = {br + candidates[t][i].row, + bc + candidates[t][i].col}; if (!is_mv_in(x, &this_mv)) continue; - this_offset = base_offset + (this_mv.row * in_what_stride) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } @@ -585,31 +579,30 @@ static int vp9_pattern_search(const MACROBLOCK *x, // If the center point is still the best, just skip this and move to // the refinement step. if (best_init_s != -1) { + int best_site = -1; s = best_init_s; - best_site = -1; + do { // No need to search all 6 points the 1st time if initial search was used if (!do_init_search || s != best_init_s) { if (check_bounds(x, br, bc, 1 << s)) { for (i = 0; i < num_candidates[s]; i++) { - this_mv.row = br + candidates[s][i].row; - this_mv.col = bc + candidates[s][i].col; - this_offset = base_offset + (this_mv.row * in_what_stride) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + const MV this_mv = {br + candidates[s][i].row, + bc + candidates[s][i].col}; + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } else { for (i = 0; i < num_candidates[s]; i++) { - this_mv.row = br + candidates[s][i].row; - this_mv.col = bc + candidates[s][i].col; + const MV this_mv = {br + candidates[s][i].row, + bc + candidates[s][i].col}; if (!is_mv_in(x, &this_mv)) continue; - this_offset = base_offset + (this_mv.row * in_what_stride) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } @@ -632,24 +625,22 @@ static int vp9_pattern_search(const MACROBLOCK *x, if (check_bounds(x, br, bc, 1 << s)) { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { - this_mv.row = br + candidates[s][next_chkpts_indices[i]].row; - this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col; - this_offset = base_offset + (this_mv.row * (in_what_stride)) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, + bc + candidates[s][next_chkpts_indices[i]].col}; + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } else { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { - this_mv.row = br + candidates[s][next_chkpts_indices[i]].row; - this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col; + const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, + bc + candidates[s][next_chkpts_indices[i]].col}; if (!is_mv_in(x, &this_mv)) continue; - this_offset = base_offset + (this_mv.row * (in_what_stride)) + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } @@ -666,29 +657,28 @@ static int vp9_pattern_search(const MACROBLOCK *x, // Check 4 1-away neighbors if do_refine is true. // For most well-designed schemes do_refine will not be necessary. if (do_refine) { - static const MV neighbors[4] = { {0, -1}, { -1, 0}, {1, 0}, {0, 1} }; + static const MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}}; + for (j = 0; j < 16; j++) { - best_site = -1; + int best_site = -1; if (check_bounds(x, br, bc, 1)) { for (i = 0; i < 4; i++) { - this_mv.row = br + neighbors[i].row; - this_mv.col = bc + neighbors[i].col; - this_offset = base_offset + this_mv.row * in_what_stride + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + const MV this_mv = {br + neighbors[i].row, + bc + neighbors[i].col}; + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } else { for (i = 0; i < 4; i++) { - this_mv.row = br + neighbors[i].row; - this_mv.col = bc + neighbors[i].col; + const MV this_mv = {br + neighbors[i].row, + bc + neighbors[i].col}; if (!is_mv_in(x, &this_mv)) continue; - this_offset = base_offset + this_mv.row * in_what_stride + - this_mv.col; - thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, - bestsad); + thissad = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride, bestsad); CHECK_BETTER } } @@ -705,8 +695,6 @@ static int vp9_pattern_search(const MACROBLOCK *x, best_mv->row = br; best_mv->col = bc; - this_mv.row = best_mv->row * 8; - this_mv.col = best_mv->col * 8; return bestsad; } @@ -714,41 +702,32 @@ int vp9_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv, const MV *center_mv, const vp9_variance_fn_ptr_t *vfp, int use_mvcost) { - unsigned int unused; - const MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *base_offset = xd->plane[0].pre[0].buf; - const uint8_t *this_offset = &base_offset[best_mv->row * in_what_stride + - best_mv->col]; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV mv = {best_mv->row * 8, best_mv->col * 8}; - return vfp->vf(what, what_stride, this_offset, in_what_stride, &unused) + + unsigned int unused; + + return vfp->vf(what->buf, what->stride, + get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) + (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, x->errorperbit) : 0); } int vp9_get_mvpred_av_var(const MACROBLOCK *x, - MV *best_mv, - const MV *center_mv, + const MV *best_mv, const MV *center_mv, const uint8_t *second_pred, const vp9_variance_fn_ptr_t *vfp, int use_mvcost) { - unsigned int bestsad; - MV this_mv; const MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *base_offset = xd->plane[0].pre[0].buf; - const uint8_t *this_offset = base_offset + (best_mv->row * in_what_stride) + - best_mv->col; - this_mv.row = best_mv->row * 8; - this_mv.col = best_mv->col * 8; - return vfp->svaf(this_offset, in_what_stride, 0, 0, what, what_stride, - &bestsad, second_pred) + - (use_mvcost ? mv_err_cost(&this_mv, center_mv, x->nmvjointcost, + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; + const MV mv = {best_mv->row * 8, best_mv->col * 8}; + unsigned int unused; + + return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0, + what->buf, what->stride, &unused, second_pred) + + (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, x->errorperbit) : 0); } @@ -908,7 +887,6 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, const int what_stride = x->plane[0].src.stride; const uint8_t *in_what; const int in_what_stride = xd->plane[0].pre[0].stride; - MV this_mv; unsigned int bestsad = INT_MAX; int ref_row, ref_col; @@ -960,8 +938,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, for (i = 0; i < 4; ++i) { if (sad_array[i] < bestsad) { - this_mv.row = ref_row + tr; - this_mv.col = ref_col + tc + i; + const MV this_mv = {ref_row + tr, ref_col + tc + i}; thissad = sad_array[i] + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); @@ -979,8 +956,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, bestsad); if (thissad < bestsad) { - this_mv.row = ref_row + tr; - this_mv.col = ref_col + tc + i; + const MV this_mv = {ref_row + tr, ref_col + tc + i}; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); @@ -1005,66 +981,49 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, const vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], const MV *center_mv) { - int i, j, step; - const MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const uint8_t *in_what; - const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *best_address; - - int bestsad = INT_MAX; - int best_site = 0; - int last_site = 0; - - int ref_row, ref_col; - + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; // search_param determines the length of the initial step and hence the number // of iterations // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = // (MAX_FIRST_STEP/4) pel... etc. const search_site *const ss = &x->ss[search_param * x->searches_per_step]; const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; - const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; + const uint8_t *best_address; + int best_sad = INT_MAX; + int best_site = 0; + int last_site = 0; + int i, j, step; clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); - ref_row = ref_mv->row; - ref_col = ref_mv->col; + best_address = get_buf_from_mv(in_what, ref_mv); *num00 = 0; - best_mv->row = ref_row; - best_mv->col = ref_col; - - // Work out the start point for the search - in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; - best_address = in_what; + *best_mv = *ref_mv; // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + best_sad = fn_ptr->sdf(what->buf, what->stride, + in_what->buf, in_what->stride, 0x7fffffff) + + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); i = 1; for (step = 0; step < tot_steps; step++) { for (j = 0; j < x->searches_per_step; j++) { - const MV this_mv = {best_mv->row + ss[i].mv.row, - best_mv->col + ss[i].mv.col}; - if (is_mv_in(x, &this_mv)) { - const uint8_t *const check_here = ss[i].offset + best_address; - int thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, - bestsad); - - if (thissad < bestsad) { - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; + const MV mv = {best_mv->row + ss[i].mv.row, + best_mv->col + ss[i].mv.col}; + if (is_mv_in(x, &mv)) { + int sad = fn_ptr->sdf(what->buf, what->stride, + best_address + ss[i].offset, in_what->stride, + best_sad); + if (sad < best_sad) { + sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, + sad_per_bit); + if (sad < best_sad) { + best_sad = sad; best_site = i; } } @@ -1083,14 +1042,14 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, const MV this_mv = {best_mv->row + ss[best_site].mv.row, best_mv->col + ss[best_site].mv.col}; if (is_mv_in(x, &this_mv)) { - const uint8_t *const check_here = ss[best_site].offset + best_address; - int thissad = fn_ptr->sdf(what, what_stride, check_here, - in_what_stride, bestsad); - if (thissad < bestsad) { - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - if (thissad < bestsad) { - bestsad = thissad; + int sad = fn_ptr->sdf(what->buf, what->stride, + best_address + ss[best_site].offset, + in_what->stride, best_sad); + if (sad < best_sad) { + sad += mvsad_err_cost(&this_mv, &fcenter_mv, + mvjsadcost, mvsadcost, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; best_mv->row += ss[best_site].mv.row; best_mv->col += ss[best_site].mv.col; best_address += ss[best_site].offset; @@ -1101,11 +1060,11 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, break; }; #endif - } else if (best_address == in_what) { + } else if (best_address == in_what->buf) { (*num00)++; } } - return bestsad; + return best_sad; } int vp9_diamond_search_sadx4(const MACROBLOCK *x, @@ -1331,10 +1290,8 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, const MV *center_mv, MV *best_mv) { int r, c; const MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *const what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const uint8_t *const in_what = xd->plane[0].pre[0].buf; - const int in_what_stride = xd->plane[0].pre[0].stride; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const int row_min = MAX(ref_mv->row - distance, x->mv_row_min); const int row_max = MIN(ref_mv->row + distance, x->mv_row_max); const int col_min = MAX(ref_mv->col - distance, x->mv_col_min); @@ -1342,25 +1299,22 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, const int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride + - ref_mv->col]; - int best_sad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, - 0x7fffffff) + + int best_sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); *best_mv = *ref_mv; for (r = row_min; r < row_max; ++r) { for (c = col_min; c < col_max; ++c) { - const MV this_mv = {r, c}; - const uint8_t *check_here = &in_what[r * in_what_stride + c]; - const int sad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, - best_sad) + - mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + const MV mv = {r, c}; + const int sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) + + mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, + sad_per_bit); if (sad < best_sad) { best_sad = sad; - *best_mv = this_mv; + *best_mv = mv; } } } @@ -1472,7 +1426,6 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, MV this_mv; unsigned int bestsad = INT_MAX; int r, c; - unsigned int thissad; int ref_row = ref_mv->row; int ref_col = ref_mv->col; @@ -1512,7 +1465,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); for (i = 0; i < 8; i++) { - thissad = (unsigned int)sad_array8[i]; + unsigned int thissad = (unsigned int)sad_array8[i]; if (thissad < bestsad) { this_mv.col = c; @@ -1537,12 +1490,12 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); for (i = 0; i < 3; i++) { - thissad = sad_array[i]; + unsigned int thissad = sad_array[i]; if (thissad < bestsad) { this_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, + mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; @@ -1557,8 +1510,8 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, } while (c < col_max) { - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, - bestsad); + unsigned int thissad = fn_ptr->sdf(what, what_stride, + check_here, in_what_stride, bestsad); if (thissad < bestsad) { this_mv.col = c; @@ -1585,41 +1538,34 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x, const vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost, int *mvcost[2], const MV *center_mv) { - const MACROBLOCKD *const xd = &x->e_mbd; const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; - int i, j; - - const int what_stride = x->plane[0].src.stride; - const uint8_t *const what = x->plane[0].src.buf; - const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *const in_what = xd->plane[0].pre[0].buf; - const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride + - ref_mv->col]; + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; const int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - unsigned int bestsad = fn_ptr->sdf(what, what_stride, best_address, - in_what_stride, 0x7fffffff) + + unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), + in_what->stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + int i, j; for (i = 0; i < search_range; i++) { int best_site = -1; for (j = 0; j < 4; j++) { - const MV this_mv = {ref_mv->row + neighbors[j].row, - ref_mv->col + neighbors[j].col}; - if (is_mv_in(x, &this_mv)) { - const uint8_t *check_here = &in_what[this_mv.row * in_what_stride + - this_mv.col]; - unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, - in_what_stride, bestsad); - if (thissad < bestsad) { - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, error_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; + const MV mv = {ref_mv->row + neighbors[j].row, + ref_mv->col + neighbors[j].col}; + if (is_mv_in(x, &mv)) { + unsigned int sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride, best_sad); + if (sad < best_sad) { + sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, + error_per_bit); + if (sad < best_sad) { + best_sad = sad; best_site = j; } } @@ -1633,7 +1579,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x, ref_mv->col += neighbors[best_site].col; } } - return bestsad; + return best_sad; } int vp9_refining_search_sadx4(const MACROBLOCK *x, @@ -1643,74 +1589,64 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x, int *mvjcost, int *mvcost[2], const MV *center_mv) { const MACROBLOCKD *const xd = &x->e_mbd; - MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; - int i, j; - - const int what_stride = x->plane[0].src.stride; - const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *what = x->plane[0].src.buf; - const uint8_t *best_address = xd->plane[0].pre[0].buf + - (ref_mv->row * xd->plane[0].pre[0].stride) + - ref_mv->col; - + const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - - unsigned int bestsad = fn_ptr->sdf(what, what_stride, best_address, - in_what_stride, 0x7fffffff) + + const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv); + unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address, + in_what->stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + int i, j; for (i = 0; i < search_range; i++) { int best_site = -1; - int all_in = ((ref_mv->row - 1) > x->mv_row_min) & - ((ref_mv->row + 1) < x->mv_row_max) & - ((ref_mv->col - 1) > x->mv_col_min) & - ((ref_mv->col + 1) < x->mv_col_max); + const int all_in = ((ref_mv->row - 1) > x->mv_row_min) & + ((ref_mv->row + 1) < x->mv_row_max) & + ((ref_mv->col - 1) > x->mv_col_min) & + ((ref_mv->col + 1) < x->mv_col_max); if (all_in) { - unsigned int sad_array[4]; - uint8_t const *block_offset[4] = { - best_address - in_what_stride, + unsigned int sads[4]; + const uint8_t *const positions[4] = { + best_address - in_what->stride, best_address - 1, best_address + 1, - best_address + in_what_stride + best_address + in_what->stride }; - fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, - sad_array); + fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads); - for (j = 0; j < 4; j++) { - if (sad_array[j] < bestsad) { - const MV this_mv = {ref_mv->row + neighbors[j].row, - ref_mv->col + neighbors[j].col}; - sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, + for (j = 0; j < 4; ++j) { + if (sads[j] < best_sad) { + const MV mv = {ref_mv->row + neighbors[j].row, + ref_mv->col + neighbors[j].col}; + sads[j] += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); - if (sad_array[j] < bestsad) { - bestsad = sad_array[j]; + if (sads[j] < best_sad) { + best_sad = sads[j]; best_site = j; } } } } else { - for (j = 0; j < 4; j++) { - const MV this_mv = {ref_mv->row + neighbors[j].row, - ref_mv->col + neighbors[j].col}; - - if (is_mv_in(x, &this_mv)) { - const uint8_t *check_here = neighbors[j].row * in_what_stride + - neighbors[j].col + best_address; - unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, - in_what_stride, bestsad); - - if (thissad < bestsad) { - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, error_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; + for (j = 0; j < 4; ++j) { + const MV mv = {ref_mv->row + neighbors[j].row, + ref_mv->col + neighbors[j].col}; + + if (is_mv_in(x, &mv)) { + unsigned int sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), + in_what->stride, best_sad); + if (sad < best_sad) { + sad += mvsad_err_cost(&mv, &fcenter_mv, + mvjsadcost, mvsadcost, error_per_bit); + + if (sad < best_sad) { + best_sad = sad; best_site = j; } } @@ -1723,12 +1659,11 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x, } else { ref_mv->row += neighbors[best_site].row; ref_mv->col += neighbors[best_site].col; - best_address += (neighbors[best_site].row) * in_what_stride + - neighbors[best_site].col; + best_address = get_buf_from_mv(in_what, ref_mv); } } - return bestsad; + return best_sad; } // This function is called when we do joint motion search in comp_inter_inter @@ -1740,48 +1675,36 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, int *mvjcost, int *mvcost[2], const MV *center_mv, const uint8_t *second_pred, int w, int h) { - const MACROBLOCKD *const xd = &x->e_mbd; const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}, {-1, -1}, {1, -1}, {-1, 1}, {1, 1}}; - int i, j; - - const uint8_t *what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const uint8_t *in_what = xd->plane[0].pre[0].buf; - const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *best_address = &in_what[ref_mv->row * in_what_stride + - ref_mv->col]; - unsigned int thissad; - MV this_mv; + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - - /* Get compound pred by averaging two pred blocks. */ - unsigned int bestsad = fn_ptr->sdaf(what, what_stride, - best_address, in_what_stride, - second_pred, 0x7fffffff) + + unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), in_what->stride, + second_pred, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + int i, j; for (i = 0; i < search_range; ++i) { int best_site = -1; - for (j = 0; j < 8; j++) { - this_mv.row = ref_mv->row + neighbors[j].row; - this_mv.col = ref_mv->col + neighbors[j].col; - - if (is_mv_in(x, &this_mv)) { - const uint8_t *check_here = &in_what[this_mv.row * in_what_stride + - this_mv.col]; + for (j = 0; j < 8; ++j) { + const MV mv = {ref_mv->row + neighbors[j].row, + ref_mv->col + neighbors[j].col}; - thissad = fn_ptr->sdaf(what, what_stride, check_here, in_what_stride, - second_pred, bestsad); - if (thissad < bestsad) { - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, + if (is_mv_in(x, &mv)) { + unsigned int sad = fn_ptr->sdaf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride, + second_pred, best_sad); + if (sad < best_sad) { + sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); - if (thissad < bestsad) { - bestsad = thissad; + if (sad < best_sad) { + best_sad = sad; best_site = j; } } @@ -1795,5 +1718,5 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, ref_mv->col += neighbors[best_site].col; } } - return bestsad; + return best_sad; } diff --git a/libvpx/vp9/encoder/vp9_mcomp.h b/libvpx/vp9/encoder/vp9_mcomp.h index 917de75..f7b7c5e 100644 --- a/libvpx/vp9/encoder/vp9_mcomp.h +++ b/libvpx/vp9/encoder/vp9_mcomp.h @@ -42,8 +42,7 @@ int vp9_get_mvpred_var(const MACROBLOCK *x, const vp9_variance_fn_ptr_t *vfp, int use_mvcost); int vp9_get_mvpred_av_var(const MACROBLOCK *x, - MV *best_mv, - const MV *center_mv, + const MV *best_mv, const MV *center_mv, const uint8_t *second_pred, const vp9_variance_fn_ptr_t *vfp, int use_mvcost); diff --git a/libvpx/vp9/encoder/vp9_onyx_if.c b/libvpx/vp9/encoder/vp9_onyx_if.c index cccc1a9..3619ec8 100644 --- a/libvpx/vp9/encoder/vp9_onyx_if.c +++ b/libvpx/vp9/encoder/vp9_onyx_if.c @@ -27,8 +27,10 @@ #include "vp9/common/vp9_systemdependent.h" #include "vp9/common/vp9_tile_common.h" +#include "vp9/encoder/vp9_aq_complexity.h" +#include "vp9/encoder/vp9_aq_cyclicrefresh.h" +#include "vp9/encoder/vp9_aq_variance.h" #include "vp9/encoder/vp9_bitstream.h" -#include "vp9/encoder/vp9_craq.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_firstpass.h" @@ -38,17 +40,14 @@ #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_segmentation.h" +#include "vp9/encoder/vp9_speed_features.h" +#if CONFIG_INTERNAL_STATS +#include "vp9/encoder/vp9_ssim.h" +#endif #include "vp9/encoder/vp9_temporal_filter.h" -#include "vp9/encoder/vp9_vaq.h" #include "vp9/encoder/vp9_resize.h" #include "vp9/encoder/vp9_svc_layercontext.h" -#define ALL_INTRA_MODES 0x3FF -#define INTRA_DC_ONLY 0x01 -#define INTRA_DC_TM ((1 << TM_PRED) | (1 << DC_PRED)) -#define INTRA_DC_H_V ((1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED)) -#define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED)) - void vp9_coef_tree_initialize(); #define DEFAULT_INTERP_FILTER SWITCHABLE @@ -62,30 +61,11 @@ void vp9_coef_tree_initialize(); // now so that HIGH_PRECISION is always // chosen. -// Masks for partially or completely disabling split mode -#define DISABLE_ALL_SPLIT 0x3F -#define DISABLE_ALL_INTER_SPLIT 0x1F -#define DISABLE_COMPOUND_SPLIT 0x18 -#define LAST_AND_INTRA_SPLIT_ONLY 0x1E - // Max rate target for 1080P and below encodes under normal circumstances // (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB #define MAX_MB_RATE 250 #define MAXRATE_1080P 2025000 -#if CONFIG_INTERNAL_STATS -extern double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, int lumamask, - double *weight); - - -extern double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, double *ssim_y, - double *ssim_u, double *ssim_v); - - -#endif - // #define OUTPUT_YUV_REC #ifdef OUTPUT_YUV_SRC @@ -103,9 +83,6 @@ FILE *keyfile; void vp9_init_quantizer(VP9_COMP *cpi); -static const double in_frame_q_adj_ratio[MAX_SEGMENTS] = - {1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; - static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) { switch (mode) { case NORMAL: @@ -144,17 +121,33 @@ static void set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) { } } +static void setup_key_frame(VP9_COMP *cpi) { + vp9_setup_past_independence(&cpi->common); + + // All buffers are implicitly updated on key frames. + cpi->refresh_golden_frame = 1; + cpi->refresh_alt_ref_frame = 1; +} + +static void setup_inter_frame(VP9_COMMON *cm) { + if (cm->error_resilient_mode || cm->intra_only) + vp9_setup_past_independence(cm); + + assert(cm->frame_context_idx < FRAME_CONTEXTS); + cm->fc = cm->frame_contexts[cm->frame_context_idx]; +} + void vp9_initialize_enc() { static int init_done = 0; if (!init_done) { - vp9_initialize_common(); + vp9_init_neighbors(); + vp9_init_quant_tables(); + vp9_coef_tree_initialize(); vp9_tokenize_initialize(); - vp9_init_quant_tables(); vp9_init_me_luts(); vp9_rc_init_minq_luts(); - // init_base_skip_probs(); vp9_entropy_mv_init(); vp9_entropy_mode_init(); init_done = 1; @@ -163,6 +156,7 @@ void vp9_initialize_enc() { static void dealloc_compressor_data(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; + int i; // Delete sementation map vpx_free(cpi->segmentation_map); @@ -173,16 +167,19 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { cpi->coding_context.last_frame_seg_map_copy = NULL; vpx_free(cpi->complexity_map); - cpi->complexity_map = 0; - vpx_free(cpi->cyclic_refresh.map); - cpi->cyclic_refresh.map = 0; + cpi->complexity_map = NULL; + + vp9_cyclic_refresh_free(cpi->cyclic_refresh); + cpi->cyclic_refresh = NULL; + vpx_free(cpi->active_map); - cpi->active_map = 0; + cpi->active_map = NULL; vp9_free_frame_buffers(cm); vp9_free_frame_buffer(&cpi->last_frame_uf); vp9_free_frame_buffer(&cpi->scaled_source); + vp9_free_frame_buffer(&cpi->scaled_last_source); vp9_free_frame_buffer(&cpi->alt_ref_buffer); vp9_lookahead_destroy(cpi->lookahead); @@ -195,103 +192,59 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { vpx_free(cpi->mb_norm_activity_map); cpi->mb_norm_activity_map = 0; - vpx_free(cpi->above_context[0]); - cpi->above_context[0] = NULL; - - vpx_free(cpi->above_seg_context); - cpi->above_seg_context = NULL; -} - -// Computes a q delta (in "q index" terms) to get from a starting q value -// to a target q value -int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget) { - const RATE_CONTROL *const rc = &cpi->rc; - int start_index = rc->worst_quality; - int target_index = rc->worst_quality; - int i; - - // Convert the average q value to an index. - for (i = rc->best_quality; i < rc->worst_quality; ++i) { - start_index = i; - if (vp9_convert_qindex_to_q(i) >= qstart) - break; + for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { + LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i]; + vpx_free(lc->rc_twopass_stats_in.buf); + lc->rc_twopass_stats_in.buf = NULL; + lc->rc_twopass_stats_in.sz = 0; } - - // Convert the q target to an index - for (i = rc->best_quality; i < rc->worst_quality; ++i) { - target_index = i; - if (vp9_convert_qindex_to_q(i) >= qtarget) - break; - } - - return target_index - start_index; } -// Computes a q delta (in "q index" terms) to get from a starting q value -// to a value that should equate to the given rate ratio. -int vp9_compute_qdelta_by_rate(VP9_COMP *cpi, int base_q_index, - double rate_target_ratio) { - int i; - int target_index = cpi->rc.worst_quality; +static void save_coding_context(VP9_COMP *cpi) { + CODING_CONTEXT *const cc = &cpi->coding_context; + VP9_COMMON *cm = &cpi->common; - // Look up the current projected bits per block for the base index - const int base_bits_per_mb = vp9_rc_bits_per_mb(cpi->common.frame_type, - base_q_index, 1.0); + // Stores a snapshot of key state variables which can subsequently be + // restored with a call to vp9_restore_coding_context. These functions are + // intended for use in a re-code loop in vp9_compress_frame where the + // quantizer value is adjusted between loop iterations. + vp9_copy(cc->nmvjointcost, cpi->mb.nmvjointcost); + vp9_copy(cc->nmvcosts, cpi->mb.nmvcosts); + vp9_copy(cc->nmvcosts_hp, cpi->mb.nmvcosts_hp); - // Find the target bits per mb based on the base value and given ratio. - const int target_bits_per_mb = (int)(rate_target_ratio * base_bits_per_mb); + vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs); - // Convert the q target to an index - for (i = cpi->rc.best_quality; i < cpi->rc.worst_quality; ++i) { - target_index = i; - if (vp9_rc_bits_per_mb(cpi->common.frame_type, i, 1.0) <= - target_bits_per_mb ) - break; - } + vpx_memcpy(cpi->coding_context.last_frame_seg_map_copy, + cm->last_frame_seg_map, (cm->mi_rows * cm->mi_cols)); - return target_index - base_q_index; -} - -// This function sets up a set of segments with delta Q values around -// the baseline frame quantizer. -static void setup_in_frame_q_adj(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - struct segmentation *const seg = &cm->seg; - - // Make SURE use of floating point in this function is safe. - vp9_clear_system_state(); + vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas); + vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas); - if (cm->frame_type == KEY_FRAME || - cpi->refresh_alt_ref_frame || - (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { - int segment; + cc->fc = cm->fc; +} - // Clear down the segment map - vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); +static void restore_coding_context(VP9_COMP *cpi) { + CODING_CONTEXT *const cc = &cpi->coding_context; + VP9_COMMON *cm = &cpi->common; - // Clear down the complexity map used for rd - vpx_memset(cpi->complexity_map, 0, cm->mi_rows * cm->mi_cols); + // Restore key state variables to the snapshot state stored in the + // previous call to vp9_save_coding_context. + vp9_copy(cpi->mb.nmvjointcost, cc->nmvjointcost); + vp9_copy(cpi->mb.nmvcosts, cc->nmvcosts); + vp9_copy(cpi->mb.nmvcosts_hp, cc->nmvcosts_hp); - vp9_enable_segmentation(seg); - vp9_clearall_segfeatures(seg); + vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs); - // Select delta coding method - seg->abs_delta = SEGMENT_DELTADATA; + vpx_memcpy(cm->last_frame_seg_map, + cpi->coding_context.last_frame_seg_map_copy, + (cm->mi_rows * cm->mi_cols)); - // Segment 0 "Q" feature is disabled so it defaults to the baseline Q - vp9_disable_segfeature(seg, 0, SEG_LVL_ALT_Q); + vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas); + vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas); - // Use some of the segments for in frame Q adjustment - for (segment = 1; segment < 2; segment++) { - const int qindex_delta = - vp9_compute_qdelta_by_rate(cpi, - cm->base_qindex, - in_frame_q_adj_ratio[segment]); - vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q); - vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta); - } - } + cm->fc = cc->fc; } + static void configure_static_seg_features(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; @@ -335,7 +288,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) { seg->update_map = 1; seg->update_data = 1; - qi_delta = vp9_compute_qdelta(cpi, rc->avg_q, rc->avg_q * 0.875); + qi_delta = vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875); vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2); vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2); @@ -356,7 +309,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) { seg->update_data = 1; seg->abs_delta = SEGMENT_DELTADATA; - qi_delta = vp9_compute_qdelta(cpi, rc->avg_q, rc->avg_q * 1.125); + qi_delta = vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125); vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2); vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); @@ -446,7 +399,7 @@ static void update_reference_segmentation_map(VP9_COMP *cpi) { uint8_t *cache = cache_ptr; for (col = 0; col < cm->mi_cols; col++, mi_8x8++, cache++) cache[0] = mi_8x8[0]->mbmi.segment_id; - mi_8x8_ptr += cm->mode_info_stride; + mi_8x8_ptr += cm->mi_stride; cache_ptr += cm->mi_cols; } } @@ -455,557 +408,137 @@ static int is_slowest_mode(int mode) { } static void set_rd_speed_thresholds(VP9_COMP *cpi) { - SPEED_FEATURES *sf = &cpi->sf; int i; // Set baseline threshold values for (i = 0; i < MAX_MODES; ++i) - sf->thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; - - sf->thresh_mult[THR_NEARESTMV] = 0; - sf->thresh_mult[THR_NEARESTG] = 0; - sf->thresh_mult[THR_NEARESTA] = 0; - - sf->thresh_mult[THR_DC] += 1000; - - sf->thresh_mult[THR_NEWMV] += 1000; - sf->thresh_mult[THR_NEWA] += 1000; - sf->thresh_mult[THR_NEWG] += 1000; - - sf->thresh_mult[THR_NEARMV] += 1000; - sf->thresh_mult[THR_NEARA] += 1000; - sf->thresh_mult[THR_COMP_NEARESTLA] += 1000; - sf->thresh_mult[THR_COMP_NEARESTGA] += 1000; - - sf->thresh_mult[THR_TM] += 1000; - - sf->thresh_mult[THR_COMP_NEARLA] += 1500; - sf->thresh_mult[THR_COMP_NEWLA] += 2000; - sf->thresh_mult[THR_NEARG] += 1000; - sf->thresh_mult[THR_COMP_NEARGA] += 1500; - sf->thresh_mult[THR_COMP_NEWGA] += 2000; - - sf->thresh_mult[THR_ZEROMV] += 2000; - sf->thresh_mult[THR_ZEROG] += 2000; - sf->thresh_mult[THR_ZEROA] += 2000; - sf->thresh_mult[THR_COMP_ZEROLA] += 2500; - sf->thresh_mult[THR_COMP_ZEROGA] += 2500; - - sf->thresh_mult[THR_H_PRED] += 2000; - sf->thresh_mult[THR_V_PRED] += 2000; - sf->thresh_mult[THR_D45_PRED ] += 2500; - sf->thresh_mult[THR_D135_PRED] += 2500; - sf->thresh_mult[THR_D117_PRED] += 2500; - sf->thresh_mult[THR_D153_PRED] += 2500; - sf->thresh_mult[THR_D207_PRED] += 2500; - sf->thresh_mult[THR_D63_PRED] += 2500; + cpi->rd_thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; + + cpi->rd_thresh_mult[THR_NEARESTMV] = 0; + cpi->rd_thresh_mult[THR_NEARESTG] = 0; + cpi->rd_thresh_mult[THR_NEARESTA] = 0; + + cpi->rd_thresh_mult[THR_DC] += 1000; + + cpi->rd_thresh_mult[THR_NEWMV] += 1000; + cpi->rd_thresh_mult[THR_NEWA] += 1000; + cpi->rd_thresh_mult[THR_NEWG] += 1000; + + cpi->rd_thresh_mult[THR_NEARMV] += 1000; + cpi->rd_thresh_mult[THR_NEARA] += 1000; + cpi->rd_thresh_mult[THR_COMP_NEARESTLA] += 1000; + cpi->rd_thresh_mult[THR_COMP_NEARESTGA] += 1000; + + cpi->rd_thresh_mult[THR_TM] += 1000; + + cpi->rd_thresh_mult[THR_COMP_NEARLA] += 1500; + cpi->rd_thresh_mult[THR_COMP_NEWLA] += 2000; + cpi->rd_thresh_mult[THR_NEARG] += 1000; + cpi->rd_thresh_mult[THR_COMP_NEARGA] += 1500; + cpi->rd_thresh_mult[THR_COMP_NEWGA] += 2000; + + cpi->rd_thresh_mult[THR_ZEROMV] += 2000; + cpi->rd_thresh_mult[THR_ZEROG] += 2000; + cpi->rd_thresh_mult[THR_ZEROA] += 2000; + cpi->rd_thresh_mult[THR_COMP_ZEROLA] += 2500; + cpi->rd_thresh_mult[THR_COMP_ZEROGA] += 2500; + + cpi->rd_thresh_mult[THR_H_PRED] += 2000; + cpi->rd_thresh_mult[THR_V_PRED] += 2000; + cpi->rd_thresh_mult[THR_D45_PRED ] += 2500; + cpi->rd_thresh_mult[THR_D135_PRED] += 2500; + cpi->rd_thresh_mult[THR_D117_PRED] += 2500; + cpi->rd_thresh_mult[THR_D153_PRED] += 2500; + cpi->rd_thresh_mult[THR_D207_PRED] += 2500; + cpi->rd_thresh_mult[THR_D63_PRED] += 2500; /* disable frame modes if flags not set */ if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) { - sf->thresh_mult[THR_NEWMV ] = INT_MAX; - sf->thresh_mult[THR_NEARESTMV] = INT_MAX; - sf->thresh_mult[THR_ZEROMV ] = INT_MAX; - sf->thresh_mult[THR_NEARMV ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEWMV ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARESTMV] = INT_MAX; + cpi->rd_thresh_mult[THR_ZEROMV ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARMV ] = INT_MAX; } if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) { - sf->thresh_mult[THR_NEARESTG ] = INT_MAX; - sf->thresh_mult[THR_ZEROG ] = INT_MAX; - sf->thresh_mult[THR_NEARG ] = INT_MAX; - sf->thresh_mult[THR_NEWG ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARESTG ] = INT_MAX; + cpi->rd_thresh_mult[THR_ZEROG ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARG ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEWG ] = INT_MAX; } if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) { - sf->thresh_mult[THR_NEARESTA ] = INT_MAX; - sf->thresh_mult[THR_ZEROA ] = INT_MAX; - sf->thresh_mult[THR_NEARA ] = INT_MAX; - sf->thresh_mult[THR_NEWA ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARESTA ] = INT_MAX; + cpi->rd_thresh_mult[THR_ZEROA ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEARA ] = INT_MAX; + cpi->rd_thresh_mult[THR_NEWA ] = INT_MAX; } if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != (VP9_LAST_FLAG | VP9_ALT_FLAG)) { - sf->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARLA ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEWLA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_ZEROLA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEARESTLA] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEARLA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEWLA ] = INT_MAX; } if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != (VP9_GOLD_FLAG | VP9_ALT_FLAG)) { - sf->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX; - sf->thresh_mult[THR_COMP_NEARGA ] = INT_MAX; - sf->thresh_mult[THR_COMP_NEWGA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_ZEROGA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEARESTGA] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEARGA ] = INT_MAX; + cpi->rd_thresh_mult[THR_COMP_NEWGA ] = INT_MAX; } } static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) { - SPEED_FEATURES *sf = &cpi->sf; + const SPEED_FEATURES *const sf = &cpi->sf; int i; for (i = 0; i < MAX_REFS; ++i) - sf->thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; + cpi->rd_thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; - sf->thresh_mult_sub8x8[THR_LAST] += 2500; - sf->thresh_mult_sub8x8[THR_GOLD] += 2500; - sf->thresh_mult_sub8x8[THR_ALTR] += 2500; - sf->thresh_mult_sub8x8[THR_INTRA] += 2500; - sf->thresh_mult_sub8x8[THR_COMP_LA] += 4500; - sf->thresh_mult_sub8x8[THR_COMP_GA] += 4500; + cpi->rd_thresh_mult_sub8x8[THR_LAST] += 2500; + cpi->rd_thresh_mult_sub8x8[THR_GOLD] += 2500; + cpi->rd_thresh_mult_sub8x8[THR_ALTR] += 2500; + cpi->rd_thresh_mult_sub8x8[THR_INTRA] += 2500; + cpi->rd_thresh_mult_sub8x8[THR_COMP_LA] += 4500; + cpi->rd_thresh_mult_sub8x8[THR_COMP_GA] += 4500; // Check for masked out split cases. - for (i = 0; i < MAX_REFS; i++) { + for (i = 0; i < MAX_REFS; i++) if (sf->disable_split_mask & (1 << i)) - sf->thresh_mult_sub8x8[i] = INT_MAX; - } + cpi->rd_thresh_mult_sub8x8[i] = INT_MAX; // disable mode test if frame flag is not set if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) - sf->thresh_mult_sub8x8[THR_LAST] = INT_MAX; + cpi->rd_thresh_mult_sub8x8[THR_LAST] = INT_MAX; if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) - sf->thresh_mult_sub8x8[THR_GOLD] = INT_MAX; + cpi->rd_thresh_mult_sub8x8[THR_GOLD] = INT_MAX; if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) - sf->thresh_mult_sub8x8[THR_ALTR] = INT_MAX; + cpi->rd_thresh_mult_sub8x8[THR_ALTR] = INT_MAX; if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != (VP9_LAST_FLAG | VP9_ALT_FLAG)) - sf->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX; + cpi->rd_thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX; if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != (VP9_GOLD_FLAG | VP9_ALT_FLAG)) - sf->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX; + cpi->rd_thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX; } -static void set_good_speed_feature(VP9_COMMON *cm, - SPEED_FEATURES *sf, - int speed) { - int i; - sf->adaptive_rd_thresh = 1; - sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW); - if (speed == 1) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = frame_is_intra_only(cm) - ? USE_FULL_RD : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 1; - sf->auto_mv_step_size = 1; - sf->adaptive_rd_thresh = 2; - sf->recode_loop = ALLOW_RECODE_KFARFGF; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - } - if (speed == 2) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = frame_is_intra_only(cm) - ? USE_FULL_RD : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_INTRA_LOWVAR; - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 2; - sf->reference_masking = 1; - sf->auto_mv_step_size = 1; - - sf->disable_filter_search_var_thresh = 50; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->adaptive_rd_thresh = 2; - sf->recode_loop = ALLOW_RECODE_KFARFGF; - sf->use_lp32x32fdct = 1; - sf->mode_skip_start = 11; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - } - if (speed == 3) { - sf->use_square_partition_only = 1; - sf->tx_size_search_method = USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = DISABLE_ALL_SPLIT; - else - sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_INTRA_LOWVAR; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 2; - sf->reference_masking = 1; - sf->auto_mv_step_size = 1; - - sf->disable_split_var_thresh = 32; - sf->disable_filter_search_var_thresh = 100; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->use_uv_intra_rd_estimate = 1; - sf->skip_encode_sb = 1; - sf->use_lp32x32fdct = 1; - sf->subpel_iters_per_step = 1; - sf->use_fast_coef_updates = 2; - sf->use_fast_coef_costing = 1; - - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - } - if (speed == 4) { - sf->use_square_partition_only = 1; - sf->tx_size_search_method = USE_LARGESTALL; - sf->disable_split_mask = DISABLE_ALL_SPLIT; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_COMP_REFMISMATCH | - FLAG_SKIP_INTRA_LOWVAR | - FLAG_EARLY_TERMINATE; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 2; - sf->reference_masking = 1; - sf->auto_mv_step_size = 1; - - sf->disable_split_var_thresh = 64; - sf->disable_filter_search_var_thresh = 200; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->use_uv_intra_rd_estimate = 1; - sf->skip_encode_sb = 1; - sf->use_lp32x32fdct = 1; - sf->subpel_iters_per_step = 1; - sf->use_fast_coef_updates = 2; - sf->use_fast_coef_costing = 1; - - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - } - if (speed >= 5) { - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->partition_search_type = FIXED_PARTITION; - sf->tx_size_search_method = frame_is_intra_only(cm) ? - USE_FULL_RD : USE_LARGESTALL; - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_COMP_REFMISMATCH | - FLAG_SKIP_INTRA_LOWVAR | - FLAG_EARLY_TERMINATE; - sf->use_rd_breakout = 1; - sf->use_lp32x32fdct = 1; - sf->optimize_coefficients = 0; - sf->auto_mv_step_size = 1; - sf->reference_masking = 1; - - sf->disable_split_mask = DISABLE_ALL_SPLIT; - sf->search_method = HEX; - sf->subpel_iters_per_step = 1; - sf->disable_split_var_thresh = 64; - sf->disable_filter_search_var_thresh = 500; - for (i = 0; i < TX_SIZES; i++) { - sf->intra_y_mode_mask[i] = INTRA_DC_ONLY; - sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; - } - sf->use_fast_coef_updates = 2; - sf->use_fast_coef_costing = 1; - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - } -} - -static void set_rt_speed_feature(VP9_COMMON *cm, - SPEED_FEATURES *sf, - int speed) { - sf->static_segmentation = 0; - sf->adaptive_rd_thresh = 1; - sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW); - sf->encode_breakout_thresh = 1; - sf->use_fast_coef_costing = 1; - - if (speed == 1) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = - frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 1; - sf->auto_mv_step_size = 1; - sf->adaptive_rd_thresh = 2; - sf->recode_loop = ALLOW_RECODE_KFARFGF; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->encode_breakout_thresh = 8; - } - if (speed >= 2) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = - frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH - | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA - | FLAG_SKIP_INTRA_LOWVAR; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_interp_filter = 2; - sf->auto_mv_step_size = 1; - sf->reference_masking = 1; - - sf->disable_filter_search_var_thresh = 50; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->adaptive_rd_thresh = 2; - sf->recode_loop = ALLOW_RECODE_KFARFGF; - sf->use_lp32x32fdct = 1; - sf->mode_skip_start = 11; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->encode_breakout_thresh = 200; - } - if (speed >= 3) { - sf->use_square_partition_only = 1; - sf->tx_size_search_method = USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = DISABLE_ALL_SPLIT; - else - sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH - | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA - | FLAG_SKIP_INTRA_LOWVAR; - - sf->disable_filter_search_var_thresh = 100; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; - sf->use_uv_intra_rd_estimate = 1; - sf->skip_encode_sb = 1; - sf->subpel_iters_per_step = 1; - sf->use_fast_coef_updates = 2; - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - sf->encode_breakout_thresh = 400; - } - if (speed >= 4) { - sf->optimize_coefficients = 0; - sf->disable_split_mask = DISABLE_ALL_SPLIT; - sf->use_fast_lpf_pick = 2; - sf->encode_breakout_thresh = 700; - } - if (speed >= 5) { - int i; - sf->last_partitioning_redo_frequency = 4; - sf->adaptive_rd_thresh = 5; - sf->use_fast_coef_costing = 0; - sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX; - sf->adjust_partitioning_from_last_frame = - cm->last_frame_type != cm->frame_type || (0 == - (cm->current_video_frame + 1) % sf->last_partitioning_redo_frequency); - sf->subpel_force_stop = 1; - for (i = 0; i < TX_SIZES; i++) { - sf->intra_y_mode_mask[i] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; - } - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_ONLY; - sf->frame_parameter_update = 0; - sf->encode_breakout_thresh = 1000; - sf->search_method = FAST_HEX; - sf->disable_inter_mode_mask[BLOCK_32X32] = 1 << INTER_OFFSET(ZEROMV); - sf->disable_inter_mode_mask[BLOCK_32X64] = ~(1 << INTER_OFFSET(NEARESTMV)); - sf->disable_inter_mode_mask[BLOCK_64X32] = ~(1 << INTER_OFFSET(NEARESTMV)); - sf->disable_inter_mode_mask[BLOCK_64X64] = ~(1 << INTER_OFFSET(NEARESTMV)); - sf->max_intra_bsize = BLOCK_32X32; - } - if (speed >= 6) { - sf->partition_check = - (cm->current_video_frame % sf->last_partitioning_redo_frequency == 1); - sf->partition_search_type = REFERENCE_PARTITION; - sf->use_nonrd_pick_mode = 1; - sf->search_method = FAST_DIAMOND; - } - if (speed >= 7) { - sf->partition_search_type = VAR_BASED_FIXED_PARTITION; - sf->use_nonrd_pick_mode = 1; - sf->search_method = FAST_DIAMOND; - } - if (speed >= 8) { - int i; - for (i = 0; i < BLOCK_SIZES; ++i) - sf->disable_inter_mode_mask[i] = 14; // only search NEARESTMV (0) - } -} - -void vp9_set_speed_features(VP9_COMP *cpi) { - SPEED_FEATURES *sf = &cpi->sf; - VP9_COMMON *cm = &cpi->common; - int speed = cpi->speed; - int i; - - // Convert negative speed to positive - if (speed < 0) - speed = -speed; - +static void set_speed_features(VP9_COMP *cpi) { #if CONFIG_INTERNAL_STATS + int i; for (i = 0; i < MAX_MODES; ++i) cpi->mode_chosen_counts[i] = 0; #endif - // best quality defaults - sf->frame_parameter_update = 1; - sf->search_method = NSTEP; - sf->recode_loop = ALLOW_RECODE; - sf->subpel_search_method = SUBPEL_TREE; - sf->subpel_iters_per_step = 2; - sf->subpel_force_stop = 0; - sf->optimize_coefficients = !cpi->oxcf.lossless; - sf->reduce_first_step_size = 0; - sf->auto_mv_step_size = 0; - sf->max_step_search_steps = MAX_MVSEARCH_STEPS; - sf->comp_inter_joint_search_thresh = BLOCK_4X4; - sf->adaptive_rd_thresh = 0; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_OFF; - sf->tx_size_search_method = USE_FULL_RD; - sf->use_lp32x32fdct = 0; - sf->adaptive_motion_search = 0; - sf->adaptive_pred_interp_filter = 0; - sf->reference_masking = 0; - sf->partition_search_type = SEARCH_PARTITION; - sf->less_rectangular_check = 0; - sf->use_square_partition_only = 0; - sf->auto_min_max_partition_size = NOT_IN_USE; - sf->max_partition_size = BLOCK_64X64; - sf->min_partition_size = BLOCK_4X4; - sf->adjust_partitioning_from_last_frame = 0; - sf->last_partitioning_redo_frequency = 4; - sf->disable_split_mask = 0; - sf->mode_search_skip_flags = 0; - sf->disable_split_var_thresh = 0; - sf->disable_filter_search_var_thresh = 0; - for (i = 0; i < TX_SIZES; i++) { - sf->intra_y_mode_mask[i] = ALL_INTRA_MODES; - sf->intra_uv_mode_mask[i] = ALL_INTRA_MODES; - } - sf->use_rd_breakout = 0; - sf->skip_encode_sb = 0; - sf->use_uv_intra_rd_estimate = 0; - sf->use_fast_lpf_pick = 0; - sf->use_fast_coef_updates = 0; - sf->use_fast_coef_costing = 0; - sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set - sf->use_nonrd_pick_mode = 0; - sf->encode_breakout_thresh = 0; - for (i = 0; i < BLOCK_SIZES; ++i) - sf->disable_inter_mode_mask[i] = 0; - sf->max_intra_bsize = BLOCK_64X64; - // This setting only takes effect when partition_search_type is set - // to FIXED_PARTITION. - sf->always_this_block_size = BLOCK_16X16; - - switch (cpi->oxcf.mode) { - case MODE_BESTQUALITY: - case MODE_SECONDPASS_BEST: // This is the best quality mode. - cpi->diamond_search_sad = vp9_full_range_search; - break; - case MODE_FIRSTPASS: - case MODE_GOODQUALITY: - case MODE_SECONDPASS: - set_good_speed_feature(cm, sf, speed); - break; - case MODE_REALTIME: - set_rt_speed_feature(cm, sf, speed); - break; - }; /* switch */ + vp9_set_speed_features(cpi); // Set rd thresholds based on mode and speed setting set_rd_speed_thresholds(cpi); set_rd_speed_thresholds_sub8x8(cpi); - // Slow quant, dct and trellis not worthwhile for first pass - // so make sure they are always turned off. - if (cpi->pass == 1) { - sf->optimize_coefficients = 0; - } - - // No recode for 1 pass. - if (cpi->pass == 0) { - sf->recode_loop = DISALLOW_RECODE; - sf->optimize_coefficients = 0; - } - cpi->mb.fwd_txm4x4 = vp9_fdct4x4; if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) { cpi->mb.fwd_txm4x4 = vp9_fwht4x4; } - - if (cpi->sf.subpel_search_method == SUBPEL_TREE) { - cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree; - cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_tree; - } - - cpi->mb.optimize = cpi->sf.optimize_coefficients == 1 && cpi->pass != 1; - - if (cpi->encode_breakout && cpi->oxcf.mode == MODE_REALTIME && - sf->encode_breakout_thresh > cpi->encode_breakout) - cpi->encode_breakout = sf->encode_breakout_thresh; - - if (sf->disable_split_mask == DISABLE_ALL_SPLIT) - sf->adaptive_pred_interp_filter = 0; } static void alloc_raw_frame_buffers(VP9_COMP *cpi) { @@ -1048,6 +581,13 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) { vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate scaled source buffer"); + if (vp9_alloc_frame_buffer(&cpi->scaled_last_source, + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9_ENC_BORDER_IN_PIXELS)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate scaled last source buffer"); + vpx_free(cpi->tok); { @@ -1065,24 +605,12 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) { CHECK_MEM_ERROR(cm, cpi->mb_norm_activity_map, vpx_calloc(sizeof(unsigned int), cm->mb_rows * cm->mb_cols)); - - // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm - // block where mi unit size is 8x8. - vpx_free(cpi->above_context[0]); - CHECK_MEM_ERROR(cm, cpi->above_context[0], - vpx_calloc(2 * mi_cols_aligned_to_sb(cm->mi_cols) * - MAX_MB_PLANE, - sizeof(*cpi->above_context[0]))); - - vpx_free(cpi->above_seg_context); - CHECK_MEM_ERROR(cm, cpi->above_seg_context, - vpx_calloc(mi_cols_aligned_to_sb(cm->mi_cols), - sizeof(*cpi->above_seg_context))); } static void update_frame_size(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; vp9_update_frame_size(cm); @@ -1101,6 +629,13 @@ static void update_frame_size(VP9_COMP *cpi) { vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to reallocate scaled source buffer"); + if (vp9_realloc_frame_buffer(&cpi->scaled_last_source, + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to reallocate scaled last source buffer"); + { int y_stride = cpi->scaled_source.y_stride; @@ -1111,14 +646,7 @@ static void update_frame_size(VP9_COMP *cpi) { } } - { - int i; - for (i = 1; i < MAX_MB_PLANE; ++i) { - cpi->above_context[i] = cpi->above_context[0] + - i * sizeof(*cpi->above_context[0]) * 2 * - mi_cols_aligned_to_sb(cm->mi_cols); - } - } + init_macroblockd(cm, xd); } // Table that converts 0-63 Q range values passed in outside to the Qindex @@ -1153,10 +681,9 @@ void vp9_new_framerate(VP9_COMP *cpi, double framerate) { oxcf->framerate = framerate < 0.1 ? 30 : framerate; cpi->output_framerate = cpi->oxcf.framerate; rc->av_per_frame_bandwidth = (int)(oxcf->target_bandwidth / - cpi->output_framerate); + cpi->output_framerate); rc->min_frame_bandwidth = (int)(rc->av_per_frame_bandwidth * - oxcf->two_pass_vbrmin_section / 100); - + oxcf->two_pass_vbrmin_section / 100); rc->min_frame_bandwidth = MAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS); @@ -1213,12 +740,12 @@ static void set_tile_limits(VP9_COMP *cpi) { static void init_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { VP9_COMMON *const cm = &cpi->common; - RATE_CONTROL *const rc = &cpi->rc; int i; cpi->oxcf = *oxcf; - cm->version = oxcf->version; + cm->profile = oxcf->profile; + cm->bit_depth = oxcf->bit_depth; cm->width = oxcf->width; cm->height = oxcf->height; @@ -1231,43 +758,16 @@ static void init_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { // Temporal scalability. cpi->svc.number_temporal_layers = oxcf->ts_number_layers; - if (cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + if ((cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) || + (cpi->svc.number_spatial_layers > 1 && + cpi->oxcf.mode == MODE_SECONDPASS_BEST)) { vp9_init_layer_context(cpi); } // change includes all joint functionality vp9_change_config(cpi, oxcf); - // Initialize active best and worst q and average q values. - if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - rc->avg_frame_qindex[0] = cpi->oxcf.worst_allowed_q; - rc->avg_frame_qindex[1] = cpi->oxcf.worst_allowed_q; - rc->avg_frame_qindex[2] = cpi->oxcf.worst_allowed_q; - } else { - rc->avg_frame_qindex[0] = (cpi->oxcf.worst_allowed_q + - cpi->oxcf.best_allowed_q) / 2; - rc->avg_frame_qindex[1] = (cpi->oxcf.worst_allowed_q + - cpi->oxcf.best_allowed_q) / 2; - rc->avg_frame_qindex[2] = (cpi->oxcf.worst_allowed_q + - cpi->oxcf.best_allowed_q) / 2; - } - rc->last_q[0] = cpi->oxcf.best_allowed_q; - rc->last_q[1] = cpi->oxcf.best_allowed_q; - rc->last_q[2] = cpi->oxcf.best_allowed_q; - - // Initialise the starting buffer levels - rc->buffer_level = cpi->oxcf.starting_buffer_level; - rc->bits_off_target = cpi->oxcf.starting_buffer_level; - - rc->rolling_target_bits = rc->av_per_frame_bandwidth; - rc->rolling_actual_bits = rc->av_per_frame_bandwidth; - rc->long_rolling_target_bits = rc->av_per_frame_bandwidth; - rc->long_rolling_actual_bits = rc->av_per_frame_bandwidth; - - rc->total_actual_bits = 0; - rc->total_target_vs_actual = 0; - cpi->static_mb_pct = 0; cpi->lst_fb_idx = 0; @@ -1281,15 +781,18 @@ static void init_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { cpi->fixed_divide[i] = 0x80000 / i; } -void vp9_change_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { +void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) { VP9_COMMON *const cm = &cpi->common; + RATE_CONTROL *const rc = &cpi->rc; - if (!cpi || !oxcf) - return; + if (cm->profile != oxcf->profile) + cm->profile = oxcf->profile; + cm->bit_depth = oxcf->bit_depth; - if (cm->version != oxcf->version) { - cm->version = oxcf->version; - } + if (cm->profile <= PROFILE_1) + assert(cm->bit_depth == BITS_8); + else + assert(cm->bit_depth > BITS_8); cpi->oxcf = *oxcf; @@ -1325,15 +828,17 @@ void vp9_change_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { break; } - cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q]; - cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q]; - cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level]; - cpi->oxcf.lossless = oxcf->lossless; - cpi->mb.e_mbd.itxm_add = cpi->oxcf.lossless ? vp9_iwht4x4_add - : vp9_idct4x4_add; - cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL; - + if (cpi->oxcf.lossless) { + // In lossless mode, make sure right quantizer range and correct transform + // is set. + cpi->oxcf.worst_allowed_q = 0; + cpi->oxcf.best_allowed_q = 0; + cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add; + } else { + cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add; + } + rc->baseline_gf_interval = DEFAULT_GF_INTERVAL; cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG; cpi->refresh_golden_frame = 0; @@ -1382,17 +887,15 @@ void vp9_change_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { cpi->oxcf.target_bandwidth, 1000); // Under a configuration change, where maximum_buffer_size may change, // keep buffer level clipped to the maximum allowed buffer size. - cpi->rc.bits_off_target = MIN(cpi->rc.bits_off_target, - cpi->oxcf.maximum_buffer_size); - cpi->rc.buffer_level = MIN(cpi->rc.buffer_level, - cpi->oxcf.maximum_buffer_size); + rc->bits_off_target = MIN(rc->bits_off_target, cpi->oxcf.maximum_buffer_size); + rc->buffer_level = MIN(rc->buffer_level, cpi->oxcf.maximum_buffer_size); // Set up frame rate and related parameters rate control values. vp9_new_framerate(cpi, cpi->oxcf.framerate); // Set absolute upper and lower quality limits - cpi->rc.worst_quality = cpi->oxcf.worst_allowed_q; - cpi->rc.best_quality = cpi->oxcf.best_allowed_q; + rc->worst_quality = cpi->oxcf.worst_allowed_q; + rc->best_quality = cpi->oxcf.best_allowed_q; // active values should only be modified if out of new range @@ -1417,8 +920,9 @@ void vp9_change_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { } update_frame_size(cpi); - if (cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + if ((cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) || + (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2)) { vp9_update_layer_context_change_config(cpi, (int)cpi->oxcf.target_bandwidth); } @@ -1434,7 +938,7 @@ void vp9_change_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { #else cpi->alt_ref_source = NULL; #endif - cpi->rc.is_src_frame_alt_ref = 0; + rc->is_src_frame_alt_ref = 0; #if 0 // Experimental RD Code @@ -1455,7 +959,7 @@ static void cal_nmvjointsadcost(int *mvjointsadcost) { mvjointsadcost[0] = 600; mvjointsadcost[1] = 300; mvjointsadcost[2] = 300; - mvjointsadcost[0] = 300; + mvjointsadcost[3] = 300; } static void cal_nmvsadcosts(int *mvsadcost[2]) { @@ -1611,7 +1115,6 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { int i, j; VP9_COMP *const cpi = vpx_memalign(32, sizeof(VP9_COMP)); VP9_COMMON *const cm = cpi != NULL ? &cpi->common : NULL; - RATE_CONTROL *const rc = cpi != NULL ? &cpi->rc : NULL; if (!cm) return NULL; @@ -1634,6 +1137,7 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->use_svc = 0; init_config(cpi, oxcf); + vp9_rc_init(&cpi->oxcf, cpi->pass, &cpi->rc); init_pick_mode_context(cpi); cm->current_video_frame = 0; @@ -1641,8 +1145,6 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { // Set reference frame sign bias for ALTREF frame to 1 (for now) cm->ref_frame_sign_bias[ALTREF_FRAME] = 1; - rc->baseline_gf_interval = DEFAULT_GF_INTERVAL; - cpi->gold_is_last = 0; cpi->alt_is_last = 0; cpi->gold_is_alt = 0; @@ -1656,8 +1158,8 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { vpx_calloc(cm->mi_rows * cm->mi_cols, 1)); // Create a map used for cyclic background refresh. - CHECK_MEM_ERROR(cm, cpi->cyclic_refresh.map, - vpx_calloc(cm->mi_rows * cm->mi_cols, 1)); + CHECK_MEM_ERROR(cm, cpi->cyclic_refresh, + vp9_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols)); // And a place holder structure is the coding context // for use if we want to save and restore it @@ -1678,13 +1180,6 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { /*Initialize the feed-forward activity masking.*/ cpi->activity_avg = 90 << 12; cpi->key_frame_frequency = cpi->oxcf.key_freq; - - rc->frames_since_key = 8; // Sensible default for first frame. - rc->this_key_frame_forced = 0; - rc->next_key_frame_forced = 0; - - rc->source_alt_ref_pending = 0; - rc->source_alt_ref_active = 0; cpi->refresh_alt_ref_frame = 0; #if CONFIG_MULTIPLE_ARF @@ -1740,18 +1235,6 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->first_time_stamp_ever = INT64_MAX; - rc->frames_till_gf_update_due = 0; - - rc->ni_av_qi = cpi->oxcf.worst_allowed_q; - rc->ni_tot_qi = 0; - rc->ni_frames = 0; - rc->tot_q = 0.0; - rc->avg_q = vp9_convert_qindex_to_q(cpi->oxcf.worst_allowed_q); - - rc->rate_correction_factor = 1.0; - rc->key_frame_rate_correction_factor = 1.0; - rc->gf_rate_correction_factor = 1.0; - cal_nmvjointsadcost(cpi->mb.nmvjointsadcost); cpi->mb.nmvcost[0] = &cpi->mb.nmvcosts[0][MV_MAX]; cpi->mb.nmvcost[1] = &cpi->mb.nmvcosts[1][MV_MAX]; @@ -1787,13 +1270,53 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { const size_t packet_sz = sizeof(FIRSTPASS_STATS); const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz); - cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf; - cpi->twopass.stats_in = cpi->twopass.stats_in_start; - cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1]; - vp9_init_second_pass(cpi); + if (cpi->svc.number_spatial_layers > 1 + && cpi->svc.number_temporal_layers == 1) { + FIRSTPASS_STATS *const stats = oxcf->two_pass_stats_in.buf; + FIRSTPASS_STATS *stats_copy[VPX_SS_MAX_LAYERS] = {0}; + int i; + + for (i = 0; i < oxcf->ss_number_layers; ++i) { + FIRSTPASS_STATS *const last_packet_for_layer = + &stats[packets - oxcf->ss_number_layers + i]; + const int layer_id = (int)last_packet_for_layer->spatial_layer_id; + const int packets_in_layer = (int)last_packet_for_layer->count + 1; + if (layer_id >= 0 && layer_id < oxcf->ss_number_layers) { + LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer_id]; + + vpx_free(lc->rc_twopass_stats_in.buf); + + lc->rc_twopass_stats_in.sz = packets_in_layer * packet_sz; + CHECK_MEM_ERROR(cm, lc->rc_twopass_stats_in.buf, + vpx_malloc(lc->rc_twopass_stats_in.sz)); + lc->twopass.stats_in_start = lc->rc_twopass_stats_in.buf; + lc->twopass.stats_in = lc->twopass.stats_in_start; + lc->twopass.stats_in_end = lc->twopass.stats_in_start + + packets_in_layer - 1; + stats_copy[layer_id] = lc->rc_twopass_stats_in.buf; + } + } + + for (i = 0; i < packets; ++i) { + const int layer_id = (int)stats[i].spatial_layer_id; + if (layer_id >= 0 && layer_id < oxcf->ss_number_layers + && stats_copy[layer_id] != NULL) { + *stats_copy[layer_id] = stats[i]; + ++stats_copy[layer_id]; + } + } + + vp9_init_second_pass_spatial_svc(cpi); + } else { + cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf; + cpi->twopass.stats_in = cpi->twopass.stats_in_start; + cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1]; + + vp9_init_second_pass(cpi); + } } - vp9_set_speed_features(cpi); + set_speed_features(cpi); // Default rd threshold factors for mode selection for (i = 0; i < BLOCK_SIZES; ++i) { @@ -2046,53 +1569,42 @@ void vp9_remove_compressor(VP9_COMP *cpi) { #endif } +static int64_t get_sse(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int width, int height) { + const int dw = width % 16; + const int dh = height % 16; + int64_t total_sse = 0; + unsigned int sse = 0; + int sum = 0; + int x, y; + + if (dw > 0) { + variance(&a[width - dw], a_stride, &b[width - dw], b_stride, + dw, height, &sse, &sum); + total_sse += sse; + } + if (dh > 0) { + variance(&a[(height - dh) * a_stride], a_stride, + &b[(height - dh) * b_stride], b_stride, + width - dw, dh, &sse, &sum); + total_sse += sse; + } -static uint64_t calc_plane_error(const uint8_t *orig, int orig_stride, - const uint8_t *recon, int recon_stride, - unsigned int cols, unsigned int rows) { - unsigned int row, col; - uint64_t total_sse = 0; - int diff; - - for (row = 0; row + 16 <= rows; row += 16) { - for (col = 0; col + 16 <= cols; col += 16) { - unsigned int sse; - - vp9_mse16x16(orig + col, orig_stride, recon + col, recon_stride, &sse); + for (y = 0; y < height / 16; ++y) { + const uint8_t *pa = a; + const uint8_t *pb = b; + for (x = 0; x < width / 16; ++x) { + vp9_mse16x16(pa, a_stride, pb, b_stride, &sse); total_sse += sse; - } - - /* Handle odd-sized width */ - if (col < cols) { - unsigned int border_row, border_col; - const uint8_t *border_orig = orig; - const uint8_t *border_recon = recon; - for (border_row = 0; border_row < 16; border_row++) { - for (border_col = col; border_col < cols; border_col++) { - diff = border_orig[border_col] - border_recon[border_col]; - total_sse += diff * diff; - } - - border_orig += orig_stride; - border_recon += recon_stride; - } + pa += 16; + pb += 16; } - orig += orig_stride * 16; - recon += recon_stride * 16; - } - - /* Handle odd-sized height */ - for (; row < rows; row++) { - for (col = 0; col < cols; col++) { - diff = orig[col] - recon[col]; - total_sse += diff * diff; - } - - orig += orig_stride; - recon += recon_stride; + a += 16 * a_stride; + b += 16 * b_stride; } return total_sse; @@ -2120,9 +1632,9 @@ static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, const int w = widths[i]; const int h = heights[i]; const uint32_t samples = w * h; - const uint64_t sse = calc_plane_error(a_planes[i], a_strides[i], - b_planes[i], b_strides[i], - w, h); + const uint64_t sse = get_sse(a_planes[i], a_strides[i], + b_planes[i], b_strides[i], + w, h); psnr->sse[1 + i] = sse; psnr->samples[1 + i] = samples; psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, 255.0, (double)sse); @@ -2521,7 +2033,7 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { vpx_usec_timer_start(&timer); - vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.use_fast_lpf_pick); + vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick); vpx_usec_timer_mark(&timer); cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); @@ -2596,7 +2108,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { vp9_clear_system_state(); - recon_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm)); + recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); if (cpi->twopass.total_left_stats.coded_error != 0.0) fprintf(f, "%10u %10d %10d %10d %10d %10d " @@ -2652,7 +2164,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi, int q) { VP9_COMMON *const cm = &cpi->common; vp9_clear_system_state(); - vp9_set_quantizer(cpi, q); + vp9_set_quantizer(cm, q); // Set up entropy context depending on frame type. The decoder mandates // the use of the default context, index 0, for keyframes and inter @@ -2660,21 +2172,21 @@ static void encode_without_recode_loop(VP9_COMP *cpi, // other inter-frames the encoder currently uses only two contexts; // context 1 for ALTREF frames and context 0 for the others. if (cm->frame_type == KEY_FRAME) { - vp9_setup_key_frame(cpi); + setup_key_frame(cpi); } else { - if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) { - cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame; - } - vp9_setup_inter_frame(cpi); + if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) + cm->frame_context_idx = cpi->refresh_alt_ref_frame; + + setup_inter_frame(cm); } // Variance adaptive and in frame q adjustment experiments are mutually // exclusive. if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_vaq_frame_setup(cpi); } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { - setup_in_frame_q_adj(cpi); + vp9_setup_in_frame_q_adj(cpi); } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { - vp9_setup_cyclic_refresh_aq(cpi); + vp9_cyclic_refresh_setup(cpi); } // transform / motion compensation build reconstruction frame vp9_encode_frame(cpi); @@ -2709,7 +2221,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, do { vp9_clear_system_state(); - vp9_set_quantizer(cpi, q); + vp9_set_quantizer(cm, q); if (loop_count == 0) { // Set up entropy context depending on frame type. The decoder mandates @@ -2718,12 +2230,12 @@ static void encode_with_recode_loop(VP9_COMP *cpi, // other inter-frames the encoder currently uses only two contexts; // context 1 for ALTREF frames and context 0 for the others. if (cm->frame_type == KEY_FRAME) { - vp9_setup_key_frame(cpi); + setup_key_frame(cpi); } else { - if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) { + if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame; - } - vp9_setup_inter_frame(cpi); + + setup_inter_frame(cm); } } @@ -2732,7 +2244,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_vaq_frame_setup(cpi); } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { - setup_in_frame_q_adj(cpi); + vp9_setup_in_frame_q_adj(cpi); } // transform / motion compensation build reconstruction frame @@ -2748,13 +2260,13 @@ static void encode_with_recode_loop(VP9_COMP *cpi, // accurate estimate of output frame size to determine if we need // to recode. if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) { - vp9_save_coding_context(cpi); + save_coding_context(cpi); cpi->dummy_packing = 1; if (!cpi->sf.use_nonrd_pick_mode) vp9_pack_bitstream(cpi, dest, size); rc->projected_frame_size = (int)(*size) << 3; - vp9_restore_coding_context(cpi); + restore_coding_context(cpi); if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1; @@ -2767,7 +2279,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, rc->this_key_frame_forced && (rc->projected_frame_size < rc->max_frame_bandwidth)) { int last_q = q; - int kf_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm)); + int kf_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); int high_err_target = cpi->ambient_err; int low_err_target = cpi->ambient_err >> 1; @@ -2971,6 +2483,19 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } else { cpi->Source = cpi->un_scaled_source; } + + // Scale the last source buffer, if required. + if (cpi->unscaled_last_source != NULL) { + if (cm->mi_cols * MI_SIZE != cpi->unscaled_last_source->y_width || + cm->mi_rows * MI_SIZE != cpi->unscaled_last_source->y_height) { + scale_and_extend_frame_nonnormative(cpi->unscaled_last_source, + &cpi->scaled_last_source); + cpi->Last_Source = &cpi->scaled_last_source; + } else { + cpi->Last_Source = cpi->unscaled_last_source; + } + } + vp9_scale_references(cpi); vp9_clear_system_state(); @@ -3008,7 +2533,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Set various flags etc to special state if it is a key frame. if (frame_is_intra_only(cm)) { - vp9_setup_key_frame(cpi); + setup_key_frame(cpi); // Reset the loop filter deltas and segmentation map. vp9_reset_segment_features(&cm->seg); @@ -3090,6 +2615,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_write_yuv_frame(cpi->Source); #endif + set_speed_features(cpi); + // Decide q and q bounds. q = vp9_rc_pick_q_and_bounds(cpi, &bottom_index, &top_index); @@ -3099,8 +2626,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, set_high_precision_mv(cpi, q < HIGH_PRECISION_MV_QTHRESH); } - vp9_set_speed_features(cpi); - if (cpi->sf.recode_loop == DISALLOW_RECODE) { encode_without_recode_loop(cpi, size, dest, q); } else { @@ -3111,7 +2636,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // fixed interval. Note the reconstruction error if it is the frame before // the force key frame if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) { - cpi->ambient_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm)); + cpi->ambient_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); } // If the encoder forced a KEY_FRAME decision @@ -3231,11 +2756,13 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Don't increment frame counters if this was an altref buffer // update not a real frame ++cm->current_video_frame; + if (cpi->use_svc) + vp9_inc_frame_in_layer(&cpi->svc); } // restore prev_mi - cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1; - cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mode_info_stride + 1; + cm->prev_mi = cm->prev_mip + cm->mi_stride + 1; + cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1; } static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest, @@ -3261,7 +2788,7 @@ static void Pass1Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, (void) frame_flags; vp9_rc_get_first_pass_params(cpi); - vp9_set_quantizer(cpi, find_fp_qindex()); + vp9_set_quantizer(&cpi->common, find_fp_qindex()); vp9_first_pass(cpi); } @@ -3272,7 +2799,7 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size, vp9_rc_get_second_pass_params(cpi); encode_frame_to_data_rate(cpi, size, dest, frame_flags); - vp9_twopass_postencode_update(cpi, *size); + vp9_twopass_postencode_update(cpi); } static void check_initial_width(VP9_COMP *cpi, int subsampling_x, @@ -3306,7 +2833,7 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags, vpx_usec_timer_mark(&timer); cpi->time_receive_data += vpx_usec_timer_elapsed(&timer); - if (cm->version == 0 && (subsampling_x != 1 || subsampling_y != 1)) { + if (cm->profile == PROFILE_0 && (subsampling_x != 1 || subsampling_y != 1)) { vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM, "Non-4:2:0 color space requires profile >= 1"); res = -1; @@ -3377,8 +2904,9 @@ void adjust_frame_rate(VP9_COMP *cpi) { int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, size_t *size, uint8_t *dest, int64_t *time_stamp, int64_t *time_end, int flush) { - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &cpi->mb.e_mbd; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + RATE_CONTROL *const rc = &cpi->rc; struct vpx_usec_timer cmptimer; YV12_BUFFER_CONFIG *force_src_buffer = NULL; MV_REFERENCE_FRAME ref_frame; @@ -3386,9 +2914,14 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, if (!cpi) return -1; + if (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2) { + vp9_restore_layer_context(cpi); + } + vpx_usec_timer_start(&cmptimer); cpi->source = NULL; + cpi->last_source = NULL; set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV); @@ -3400,7 +2933,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cpi->refresh_alt_ref_frame = 0; // Should we code an alternate reference frame. - if (cpi->oxcf.play_alternate && cpi->rc.source_alt_ref_pending) { + if (cpi->oxcf.play_alternate && rc->source_alt_ref_pending) { int frames_to_arf; #if CONFIG_MULTIPLE_ARF @@ -3412,9 +2945,9 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, - cpi->next_frame_in_order; else #endif - frames_to_arf = cpi->rc.frames_till_gf_update_due; + frames_to_arf = rc->frames_till_gf_update_due; - assert(frames_to_arf <= cpi->rc.frames_to_key); + assert(frames_to_arf <= rc->frames_to_key); if ((cpi->source = vp9_lookahead_peek(cpi->lookahead, frames_to_arf))) { #if CONFIG_MULTIPLE_ARF @@ -3426,7 +2959,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, if (cpi->oxcf.arnr_max_frames > 0) { // Produce the filtered ARF frame. // TODO(agrange) merge these two functions. - vp9_configure_arnr_filter(cpi, frames_to_arf, cpi->rc.gfu_boost); + vp9_configure_arnr_filter(cpi, frames_to_arf, rc->gfu_boost); vp9_temporal_filter_prepare(cpi, frames_to_arf); vp9_extend_frame_borders(&cpi->alt_ref_buffer); force_src_buffer = &cpi->alt_ref_buffer; @@ -3436,14 +2969,14 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cpi->refresh_alt_ref_frame = 1; cpi->refresh_golden_frame = 0; cpi->refresh_last_frame = 0; - cpi->rc.is_src_frame_alt_ref = 0; + rc->is_src_frame_alt_ref = 0; #if CONFIG_MULTIPLE_ARF if (!cpi->multi_arf_enabled) #endif - cpi->rc.source_alt_ref_pending = 0; + rc->source_alt_ref_pending = 0; } else { - cpi->rc.source_alt_ref_pending = 0; + rc->source_alt_ref_pending = 0; } } @@ -3451,25 +2984,32 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, #if CONFIG_MULTIPLE_ARF int i; #endif + + // Get last frame source. + if (cm->current_video_frame > 0) { + if ((cpi->last_source = vp9_lookahead_peek(cpi->lookahead, -1)) == NULL) + return -1; + } + if ((cpi->source = vp9_lookahead_pop(cpi->lookahead, flush))) { cm->show_frame = 1; cm->intra_only = 0; #if CONFIG_MULTIPLE_ARF // Is this frame the ARF overlay. - cpi->rc.is_src_frame_alt_ref = 0; + rc->is_src_frame_alt_ref = 0; for (i = 0; i < cpi->arf_buffered; ++i) { if (cpi->source == cpi->alt_ref_source[i]) { - cpi->rc.is_src_frame_alt_ref = 1; + rc->is_src_frame_alt_ref = 1; cpi->refresh_golden_frame = 1; break; } } #else - cpi->rc.is_src_frame_alt_ref = cpi->alt_ref_source - && (cpi->source == cpi->alt_ref_source); + rc->is_src_frame_alt_ref = cpi->alt_ref_source && + (cpi->source == cpi->alt_ref_source); #endif - if (cpi->rc.is_src_frame_alt_ref) { + if (rc->is_src_frame_alt_ref) { // Current frame is an ARF overlay frame. #if CONFIG_MULTIPLE_ARF cpi->alt_ref_source[i] = NULL; @@ -3489,13 +3029,20 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, if (cpi->source) { cpi->un_scaled_source = cpi->Source = force_src_buffer ? force_src_buffer : &cpi->source->img; + + if (cpi->last_source != NULL) { + cpi->unscaled_last_source = &cpi->last_source->img; + } else { + cpi->unscaled_last_source = NULL; + } + *time_stamp = cpi->source->ts_start; *time_end = cpi->source->ts_end; *frame_flags = cpi->source->flags; #if CONFIG_MULTIPLE_ARF - if ((cm->frame_type != KEY_FRAME) && (cpi->pass == 2)) - cpi->rc.source_alt_ref_pending = is_next_frame_arf(cpi); + if (cm->frame_type != KEY_FRAME && cpi->pass == 2) + rc->source_alt_ref_pending = is_next_frame_arf(cpi); #endif } else { *size = 0; @@ -3518,7 +3065,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, if (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - vp9_update_layer_framerate(cpi); + vp9_update_temporal_layer_framerate(cpi); vp9_restore_layer_context(cpi); } @@ -3568,8 +3115,6 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME); - xd->interp_kernel = vp9_get_interp_kernel( - DEFAULT_INTERP_FILTER == SWITCHABLE ? EIGHTTAP : DEFAULT_INTERP_FILTER); if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_vaq_init(); @@ -3578,7 +3123,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, if (cpi->pass == 1 && (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) { Pass1Encode(cpi, size, dest, frame_flags); - } else if (cpi->pass == 2 && !cpi->use_svc) { + } else if (cpi->pass == 2 && + (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) { Pass2Encode(cpi, size, dest, frame_flags); } else if (cpi->use_svc) { SvcEncode(cpi, size, dest, frame_flags); @@ -3600,8 +3146,9 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } // Save layer specific state. - if (cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + if ((cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) || + (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2)) { vp9_save_layer_context(cpi); } @@ -3844,28 +3391,12 @@ void vp9_set_svc(VP9_COMP *cpi, int use_svc) { return; } -int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *reference) { - int i, j; - int total = 0; - - const uint8_t *src = source->y_buffer; - const uint8_t *ref = reference->y_buffer; - - // Loop through the Y plane raw and reconstruction data summing - // (square differences) - for (i = 0; i < source->y_height; i += 16) { - for (j = 0; j < source->y_width; j += 16) { - unsigned int sse; - total += vp9_mse16x16(src + j, source->y_stride, - ref + j, reference->y_stride, &sse); - } - - src += 16 * source->y_stride; - ref += 16 * reference->y_stride; - } +int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b) { + assert(a->y_crop_width == b->y_crop_width); + assert(a->y_crop_height == b->y_crop_height); - return total; + return (int)get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, + a->y_crop_width, a->y_crop_height); } diff --git a/libvpx/vp9/encoder/vp9_onyx_int.h b/libvpx/vp9/encoder/vp9_onyx_int.h index f4b44ce..e30fb02 100644 --- a/libvpx/vp9/encoder/vp9_onyx_int.h +++ b/libvpx/vp9/encoder/vp9_onyx_int.h @@ -23,6 +23,7 @@ #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_onyxc_int.h" +#include "vp9/encoder/vp9_aq_cyclicrefresh.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_lookahead.h" @@ -30,6 +31,7 @@ #include "vp9/encoder/vp9_mcomp.h" #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_ratectrl.h" +#include "vp9/encoder/vp9_speed_features.h" #include "vp9/encoder/vp9_svc_layercontext.h" #include "vp9/encoder/vp9_tokenize.h" #include "vp9/encoder/vp9_variance.h" @@ -114,75 +116,6 @@ typedef enum { } THR_MODES_SUB8X8; typedef enum { - DIAMOND = 0, - NSTEP = 1, - HEX = 2, - BIGDIA = 3, - SQUARE = 4, - FAST_HEX = 5, - FAST_DIAMOND = 6 -} SEARCH_METHODS; - -typedef enum { - USE_FULL_RD = 0, - USE_LARGESTINTRA, - USE_LARGESTINTRA_MODELINTER, - USE_LARGESTALL -} TX_SIZE_SEARCH_METHOD; - -typedef enum { - NOT_IN_USE = 0, - RELAXED_NEIGHBORING_MIN_MAX = 1, - STRICT_NEIGHBORING_MIN_MAX = 2 -} AUTO_MIN_MAX_MODE; - -typedef enum { - // Terminate search early based on distortion so far compared to - // qp step, distortion in the neighborhood of the frame, etc. - FLAG_EARLY_TERMINATE = 1 << 0, - - // Skips comp inter modes if the best so far is an intra mode. - FLAG_SKIP_COMP_BESTINTRA = 1 << 1, - - // Skips comp inter modes if the best single intermode so far does - // not have the same reference as one of the two references being - // tested. - FLAG_SKIP_COMP_REFMISMATCH = 1 << 2, - - // Skips oblique intra modes if the best so far is an inter mode. - FLAG_SKIP_INTRA_BESTINTER = 1 << 3, - - // Skips oblique intra modes at angles 27, 63, 117, 153 if the best - // intra so far is not one of the neighboring directions. - FLAG_SKIP_INTRA_DIRMISMATCH = 1 << 4, - - // Skips intra modes other than DC_PRED if the source variance is small - FLAG_SKIP_INTRA_LOWVAR = 1 << 5, -} MODE_SEARCH_SKIP_LOGIC; - -typedef enum { - SUBPEL_TREE = 0, - // Other methods to come -} SUBPEL_SEARCH_METHODS; - -typedef enum { - LAST_FRAME_PARTITION_OFF = 0, - LAST_FRAME_PARTITION_LOW_MOTION = 1, - LAST_FRAME_PARTITION_ALL = 2 -} LAST_FRAME_PARTITION_METHOD; - -typedef enum { - // No recode. - DISALLOW_RECODE = 0, - // Allow recode for KF and exceeding maximum frame bandwidth. - ALLOW_RECODE_KFMAXBW = 1, - // Allow recode only for KF/ARF/GF frames. - ALLOW_RECODE_KFARFGF = 2, - // Allow recode for all frames based on bitrate constraints. - ALLOW_RECODE = 3, -} RECODE_LOOP_TYPE; - -typedef enum { // encode_breakout is disabled. ENCODE_BREAKOUT_DISABLED = 0, // encode_breakout is enabled. @@ -192,225 +125,6 @@ typedef enum { } ENCODE_BREAKOUT_TYPE; typedef enum { - // Search partitions using RD/NONRD criterion - SEARCH_PARTITION = 0, - - // Always use a fixed size partition - FIXED_PARTITION = 1, - - // Use a fixed size partition in every 64X64 SB, where the size is - // determined based on source variance - VAR_BASED_FIXED_PARTITION = 2, - - REFERENCE_PARTITION = 3, - - // Use an arbitrary partitioning scheme based on source variance within - // a 64X64 SB - VAR_BASED_PARTITION -} PARTITION_SEARCH_TYPE; - -typedef struct { - // Frame level coding parameter update - int frame_parameter_update; - - // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc). - SEARCH_METHODS search_method; - - RECODE_LOOP_TYPE recode_loop; - - // Subpel_search_method can only be subpel_tree which does a subpixel - // logarithmic search that keeps stepping at 1/2 pixel units until - // you stop getting a gain, and then goes on to 1/4 and repeats - // the same process. Along the way it skips many diagonals. - SUBPEL_SEARCH_METHODS subpel_search_method; - - // Maximum number of steps in logarithmic subpel search before giving up. - int subpel_iters_per_step; - - // Control when to stop subpel search - int subpel_force_stop; - - // Thresh_mult is used to set a threshold for the rd score. A higher value - // means that we will accept the best mode so far more often. This number - // is used in combination with the current block size, and thresh_freq_fact - // to pick a threshold. - int thresh_mult[MAX_MODES]; - int thresh_mult_sub8x8[MAX_REFS]; - - // This parameter controls the number of steps we'll do in a diamond - // search. - int max_step_search_steps; - - // This parameter controls which step in the n-step process we start at. - // It's changed adaptively based on circumstances. - int reduce_first_step_size; - - // If this is set to 1, we limit the motion search range to 2 times the - // largest motion vector found in the last frame. - int auto_mv_step_size; - - // Trellis (dynamic programming) optimization of quantized values (+1, 0). - int optimize_coefficients; - - // Always set to 0. If on it enables 0 cost background transmission - // (except for the initial transmission of the segmentation). The feature is - // disabled because the addition of very large block sizes make the - // backgrounds very to cheap to encode, and the segmentation we have - // adds overhead. - int static_segmentation; - - // If 1 we iterate finding a best reference for 2 ref frames together - via - // a log search that iterates 4 times (check around mv for last for best - // error of combined predictor then check around mv for alt). If 0 we - // we just use the best motion vector found for each frame by itself. - int comp_inter_joint_search_thresh; - - // This variable is used to cap the maximum number of times we skip testing a - // mode to be evaluated. A high value means we will be faster. - int adaptive_rd_thresh; - - // Enables skipping the reconstruction step (idct, recon) in the - // intermediate steps assuming the last frame didn't have too many intra - // blocks and the q is less than a threshold. - int skip_encode_sb; - int skip_encode_frame; - - // This variable allows us to reuse the last frames partition choices - // (64x64 v 32x32 etc) for this frame. It can be set to only use the last - // frame as a starting point in low motion scenes or always use it. If set - // we use last partitioning_redo frequency to determine how often to redo - // the partitioning from scratch. Adjust_partitioning_from_last_frame - // enables us to adjust up or down one partitioning from the last frames - // partitioning. - LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning; - - // Determine which method we use to determine transform size. We can choose - // between options like full rd, largest for prediction size, largest - // for intra and model coefs for the rest. - TX_SIZE_SEARCH_METHOD tx_size_search_method; - - // Low precision 32x32 fdct keeps everything in 16 bits and thus is less - // precise but significantly faster than the non lp version. - int use_lp32x32fdct; - - // TODO(JBB): remove this as its no longer used. - - // After looking at the first set of modes (set by index here), skip - // checking modes for reference frames that don't match the reference frame - // of the best so far. - int mode_skip_start; - - // TODO(JBB): Remove this. - int reference_masking; - - PARTITION_SEARCH_TYPE partition_search_type; - - // Used if partition_search_type = FIXED_SIZE_PARTITION - BLOCK_SIZE always_this_block_size; - - // Skip rectangular partition test when partition type none gives better - // rd than partition type split. - int less_rectangular_check; - - // Disable testing non square partitions. (eg 16x32) - int use_square_partition_only; - - // Sets min and max partition sizes for this 64x64 region based on the - // same 64x64 in last encoded frame, and the left and above neighbor. - AUTO_MIN_MAX_MODE auto_min_max_partition_size; - - // Min and max partition size we enable (block_size) as per auto - // min max, but also used by adjust partitioning, and pick_partitioning. - BLOCK_SIZE min_partition_size; - BLOCK_SIZE max_partition_size; - - // Whether or not we allow partitions one smaller or one greater than the last - // frame's partitioning. Only used if use_lastframe_partitioning is set. - int adjust_partitioning_from_last_frame; - - // How frequently we re do the partitioning from scratch. Only used if - // use_lastframe_partitioning is set. - int last_partitioning_redo_frequency; - - // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable - // it always, to allow it for only Last frame and Intra, disable it for all - // inter modes or to enable it always. - int disable_split_mask; - - // TODO(jingning): combine the related motion search speed features - // This allows us to use motion search at other sizes as a starting - // point for this motion search and limits the search range around it. - int adaptive_motion_search; - - // Allows sub 8x8 modes to use the prediction filter that was determined - // best for 8x8 mode. If set to 0 we always re check all the filters for - // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter - // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected. - int adaptive_pred_interp_filter; - - // Search through variable block partition types in non-RD mode decision - // encoding process for RTC. - int partition_check; - - // Implements various heuristics to skip searching modes - // The heuristics selected are based on flags - // defined in the MODE_SEARCH_SKIP_HEURISTICS enum - unsigned int mode_search_skip_flags; - - // A source variance threshold below which the split mode is disabled - unsigned int disable_split_var_thresh; - - // A source variance threshold below which filter search is disabled - // Choose a very large value (UINT_MAX) to use 8-tap always - unsigned int disable_filter_search_var_thresh; - - // These bit masks allow you to enable or disable intra modes for each - // transform size separately. - int intra_y_mode_mask[TX_SIZES]; - int intra_uv_mode_mask[TX_SIZES]; - - // This variable enables an early break out of mode testing if the model for - // rd built from the prediction signal indicates a value that's much - // higher than the best rd we've seen so far. - int use_rd_breakout; - - // This enables us to use an estimate for intra rd based on dc mode rather - // than choosing an actual uv mode in the stage of encoding before the actual - // final encode. - int use_uv_intra_rd_estimate; - - // This feature controls how the loop filter level is determined: - // 0: Try the full image with different values. - // 1: Try a small portion of the image with different values. - // 2: Estimate the level based on quantizer and frame type - int use_fast_lpf_pick; - - // This feature limits the number of coefficients updates we actually do - // by only looking at counts from 1/2 the bands. - int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced - - // This flag controls the use of non-RD mode decision. - int use_nonrd_pick_mode; - - // This variable sets the encode_breakout threshold. Currently, it is only - // enabled in real time mode. - int encode_breakout_thresh; - - // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV - // modes are disabled in order from LSB to MSB for each BLOCK_SIZE. - int disable_inter_mode_mask[BLOCK_SIZES]; - - // This feature controls whether we do the expensive context update and - // calculation in the rd coefficient costing loop. - int use_fast_coef_costing; - - // This variable controls the maximum block size where intra blocks can be - // used in inter frames. - // TODO(aconverse): Fold this into one of the other many mode skips - BLOCK_SIZE max_intra_bsize; -} SPEED_FEATURES; - -typedef enum { NORMAL = 0, FOURFIVE = 1, THREEFIVE = 2, @@ -418,44 +132,12 @@ typedef enum { } VPX_SCALING; typedef enum { - VP9_LAST_FLAG = 1 << 0, - VP9_GOLD_FLAG = 1 << 1, - VP9_ALT_FLAG = 1 << 2, -} VP9_REFFRAME; - -typedef enum { USAGE_LOCAL_FILE_PLAYBACK = 0, USAGE_STREAM_FROM_SERVER = 1, USAGE_CONSTRAINED_QUALITY = 2, USAGE_CONSTANT_QUALITY = 3, } END_USAGE; -typedef struct { - // Target percentage of blocks per frame that are cyclicly refreshed. - int max_mbs_perframe; - // Maximum q-delta as percentage of base q. - int max_qdelta_perc; - // Block size below which we don't apply cyclic refresh. - BLOCK_SIZE min_block_size; - // Macroblock starting index (unit of 8x8) for cycling through the frame. - int mb_index; - // Controls how long a block will need to wait to be refreshed again. - int time_for_refresh; - // Actual number of blocks that were applied delta-q (segment 1). - int num_seg_blocks; - // Actual encoding bits for segment 1. - int actual_seg_bits; - // RD mult. parameters for segment 1. - int rdmult; - // Cyclic refresh map. - signed char *map; - // Projected rate and distortion for the current superblock. - int64_t projected_rate_sb; - int64_t projected_dist_sb; - // Thresholds applied to projected rate/distortion of the superblock. - int64_t thresh_rate_sb; - int64_t thresh_dist_sb; -} CYCLIC_REFRESH; typedef enum { // Good Quality Fast Encoding. The encoder balances quality with the // amount of time it takes to encode the output. (speed setting @@ -503,10 +185,9 @@ typedef enum { AQ_MODE_COUNT // This should always be the last member of the enum } AQ_MODE; -typedef struct { - int version; // 4 versions of bitstream defined: - // 0 - best quality/slowest decode, - // 3 - lowest quality/fastest decode +typedef struct VP9_CONFIG { + BITSTREAM_PROFILE profile; + BIT_DEPTH bit_depth; int width; // width of data passed to the compressor int height; // height of data passed to the compressor double framerate; // set to passed in framerate @@ -550,6 +231,9 @@ typedef struct { int lossless; AQ_MODE aq_mode; // Adaptive Quantization mode + // Enable feature to reduce the frame quantization every x frames. + int frame_periodic_boost; + // two pass datarate control int two_pass_vbrbias; // two pass datarate control tweaks int two_pass_vbrmin_section; @@ -598,23 +282,7 @@ typedef struct { } VP9_CONFIG; typedef struct VP9_COMP { - DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, y_round[QINDEX_RANGE][8]); - - DECLARE_ALIGNED(16, int16_t, uv_quant[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, uv_quant_shift[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, uv_zbin[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, uv_round[QINDEX_RANGE][8]); - -#if CONFIG_ALPHA - DECLARE_ALIGNED(16, int16_t, a_quant[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, a_quant_shift[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, a_zbin[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, a_round[QINDEX_RANGE][8]); -#endif - + QUANTS quants; MACROBLOCK mb; VP9_COMMON common; VP9_CONFIG oxcf; @@ -625,10 +293,14 @@ typedef struct VP9_COMP { #else struct lookahead_entry *alt_ref_source; #endif + struct lookahead_entry *last_source; YV12_BUFFER_CONFIG *Source; + YV12_BUFFER_CONFIG *Last_Source; // NULL for first frame and alt_ref frames YV12_BUFFER_CONFIG *un_scaled_source; YV12_BUFFER_CONFIG scaled_source; + YV12_BUFFER_CONFIG *unscaled_last_source; + YV12_BUFFER_CONFIG scaled_last_source; int key_frame_frequency; @@ -671,6 +343,13 @@ typedef struct VP9_COMP { // Ambient reconstruction err target for force key frames int ambient_err; + // Thresh_mult is used to set a threshold for the rd score. A higher value + // means that we will accept the best mode so far more often. This number + // is used in combination with the current block size, and thresh_freq_fact + // to pick a threshold. + int rd_thresh_mult[MAX_MODES]; + int rd_thresh_mult_sub8x8[MAX_REFS]; + int rd_threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES]; int rd_thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; int rd_thresh_sub8x8[MAX_SEGMENTS][BLOCK_SIZES][MAX_REFS]; @@ -708,14 +387,12 @@ typedef struct VP9_COMP { vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES]; vp9_coeff_probs_model frame_coef_probs[TX_SIZES][PLANE_TYPES]; - vp9_coeff_stats frame_branch_ct[TX_SIZES][PLANE_TYPES]; struct vpx_codec_pkt_list *output_pkt_list; MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS]; int mbgraph_n_frames; // number of frames filled in the above int static_mb_pct; // % forced skip mbs by segmentation - int seg0_progress, seg0_idx, seg0_cnt; // for real time encoding int speed; @@ -747,7 +424,7 @@ typedef struct VP9_COMP { unsigned char *active_map; unsigned int active_map_enabled; - CYCLIC_REFRESH cyclic_refresh; + CYCLIC_REFRESH *cyclic_refresh; fractional_mv_step_fp *find_fractional_mv_step; fractional_mv_step_comp_fp *find_fractional_mv_step_comp; @@ -805,10 +482,6 @@ typedef struct VP9_COMP { unsigned int activity_avg; unsigned int *mb_activity_map; int *mb_norm_activity_map; - int output_partition; - - // Force next frame to intra when kf_auto says so. - int force_next_frame_intra; int droppable; @@ -823,6 +496,8 @@ typedef struct VP9_COMP { SVC svc; + int use_large_partition_rate; + #if CONFIG_MULTIPLE_ARF // ARF tracking variables. int multi_arf_enabled; @@ -840,13 +515,6 @@ typedef struct VP9_COMP { // Debug / test stats int64_t mode_test_hits[BLOCK_SIZES]; #endif - - // Y,U,V,(A) - ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; - ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16]; - - PARTITION_CONTEXT *above_seg_context; - PARTITION_CONTEXT left_seg_context[8]; } VP9_COMP; void vp9_initialize_enc(); @@ -854,7 +522,7 @@ void vp9_initialize_enc(); struct VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf); void vp9_remove_compressor(VP9_COMP *cpi); -void vp9_change_config(VP9_COMP *cpi, VP9_CONFIG *oxcf); +void vp9_change_config(VP9_COMP *cpi, const VP9_CONFIG *oxcf); // receive a frames worth of data. caller can assume that a copy of this // frame is made and not just a copy of the pointer.. @@ -903,8 +571,8 @@ void vp9_set_svc(VP9_COMP *cpi, int use_svc); int vp9_get_quantizer(struct VP9_COMP *cpi); -static int get_ref_frame_idx(const VP9_COMP *cpi, - MV_REFERENCE_FRAME ref_frame) { +static INLINE int get_ref_frame_idx(const VP9_COMP *cpi, + MV_REFERENCE_FRAME ref_frame) { if (ref_frame == LAST_FRAME) { return cpi->lst_fb_idx; } else if (ref_frame == GOLDEN_FRAME) { @@ -914,39 +582,45 @@ static int get_ref_frame_idx(const VP9_COMP *cpi, } } -static YV12_BUFFER_CONFIG *get_ref_frame_buffer(VP9_COMP *cpi, - MV_REFERENCE_FRAME ref_frame) { - VP9_COMMON *const cm = &cpi->common; - return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, - ref_frame)]].buf; +static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer( + VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { + VP9_COMMON * const cm = &cpi->common; + return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]] + .buf; } -void vp9_set_speed_features(VP9_COMP *cpi); - -int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *reference); +// Intra only frames, golden frames (except alt ref overlays) and +// alt ref frames tend to be coded at a higher than ambient quality +static INLINE int vp9_frame_is_boosted(const VP9_COMP *cpi) { + return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame || + (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref); +} -void vp9_alloc_compressor_data(VP9_COMP *cpi); +static INLINE int get_token_alloc(int mb_rows, int mb_cols) { + // TODO(JBB): make this work for alpha channel and double check we can't + // exceed this token count if we have a 32x32 transform crossing a boundary + // at a multiple of 16. + // mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full + // resolution. We assume up to 1 token per pixel, and then allow + // a head room of 4. + return mb_rows * mb_cols * (16 * 16 * 3 + 4); +} -int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget); +int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b); -int vp9_compute_qdelta_by_rate(VP9_COMP *cpi, int base_q_index, - double rate_target_ratio); +void vp9_alloc_compressor_data(VP9_COMP *cpi); void vp9_scale_references(VP9_COMP *cpi); void vp9_update_reference_frames(VP9_COMP *cpi); -static int get_token_alloc(int mb_rows, int mb_cols) { - return mb_rows * mb_cols * (48 * 16 + 4); -} - extern const int q_trans[]; int64_t vp9_rescale(int64_t val, int64_t num, int denom); -static void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd, - MV_REFERENCE_FRAME ref0, MV_REFERENCE_FRAME ref1) { +static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd, + MV_REFERENCE_FRAME ref0, + MV_REFERENCE_FRAME ref1) { xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME : 0]; xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME diff --git a/libvpx/vp9/encoder/vp9_picklpf.c b/libvpx/vp9/encoder/vp9_picklpf.c index b5f4901..3ac8522 100644 --- a/libvpx/vp9/encoder/vp9_picklpf.c +++ b/libvpx/vp9/encoder/vp9_picklpf.c @@ -10,16 +10,18 @@ #include <assert.h> #include <limits.h> + +#include "./vpx_scale_rtcd.h" + +#include "vpx_mem/vpx_mem.h" + +#include "vp9/common/vp9_loopfilter.h" #include "vp9/common/vp9_onyxc_int.h" +#include "vp9/common/vp9_quant_common.h" + #include "vp9/encoder/vp9_onyx_int.h" #include "vp9/encoder/vp9_picklpf.h" #include "vp9/encoder/vp9_quantize.h" -#include "vp9/common/vp9_quant_common.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_scale/vpx_scale.h" -#include "vp9/common/vp9_alloccommon.h" -#include "vp9/common/vp9_loopfilter.h" -#include "./vpx_scale_rtcd.h" static int get_max_filter_level(VP9_COMP *cpi) { return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4 @@ -28,12 +30,12 @@ static int get_max_filter_level(VP9_COMP *cpi) { static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi, - MACROBLOCKD *const xd, VP9_COMMON *const cm, int filt_level, int partial_frame) { + VP9_COMMON *const cm = &cpi->common; int filt_err; - vp9_loop_filter_frame(cm, xd, filt_level, 1, partial_frame); - filt_err = vp9_calc_ss_err(sd, cm->frame_to_show); + vp9_loop_filter_frame(cm, &cpi->mb.e_mbd, filt_level, 1, partial_frame); + filt_err = vp9_get_y_sse(sd, cm->frame_to_show); // Re-instate the unfiltered frame vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); @@ -43,7 +45,6 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi, static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial_frame) { - MACROBLOCKD *const xd = &cpi->mb.e_mbd; VP9_COMMON *const cm = &cpi->common; struct loopfilter *const lf = &cm->lf; const int min_filter_level = 0; @@ -64,7 +65,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, // Make a copy of the unfiltered / processed recon buffer vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf); - best_err = try_filter_frame(sd, cpi, xd, cm, filt_mid, partial_frame); + best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame); filt_best = filt_mid; ss_err[filt_mid] = best_err; @@ -86,7 +87,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, if (filt_direction <= 0 && filt_low != filt_mid) { // Get Low filter error score if (ss_err[filt_low] < 0) { - filt_err = try_filter_frame(sd, cpi, xd, cm, filt_low, partial_frame); + filt_err = try_filter_frame(sd, cpi, filt_low, partial_frame); ss_err[filt_low] = filt_err; } else { filt_err = ss_err[filt_low]; @@ -105,7 +106,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, // Now look at filt_high if (filt_direction >= 0 && filt_high != filt_mid) { if (ss_err[filt_high] < 0) { - filt_err = try_filter_frame(sd, cpi, xd, cm, filt_high, partial_frame); + filt_err = try_filter_frame(sd, cpi, filt_high, partial_frame); ss_err[filt_high] = filt_err; } else { filt_err = ss_err[filt_high]; @@ -119,7 +120,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, // Half the step distance if the best filter value was the same as last time if (filt_best == filt_mid) { - filter_step = filter_step / 2; + filter_step /= 2; filt_direction = 0; } else { filt_direction = (filt_best < filt_mid) ? -1 : 1; @@ -131,25 +132,24 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, } void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, - int method) { + LPF_PICK_METHOD method) { VP9_COMMON *const cm = &cpi->common; struct loopfilter *const lf = &cm->lf; lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0 : cpi->oxcf.sharpness; - if (method == 2) { + if (method == LPF_PICK_FROM_Q) { const int min_filter_level = 0; const int max_filter_level = get_max_filter_level(cpi); const int q = vp9_ac_quant(cm->base_qindex, 0); // These values were determined by linear fitting the result of the - // searched level - // filt_guess = q * 0.316206 + 3.87252 - int filt_guess = (q * 20723 + 1015158 + (1 << 17)) >> 18; + // searched level, filt_guess = q * 0.316206 + 3.87252 + int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18); if (cm->frame_type == KEY_FRAME) filt_guess -= 4; lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level); } else { - search_filter_level(sd, cpi, method == 1); + search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE); } } diff --git a/libvpx/vp9/encoder/vp9_picklpf.h b/libvpx/vp9/encoder/vp9_picklpf.h index 203ef87..7d08ddb 100644 --- a/libvpx/vp9/encoder/vp9_picklpf.h +++ b/libvpx/vp9/encoder/vp9_picklpf.h @@ -16,11 +16,13 @@ extern "C" { #endif +#include "vp9/encoder/vp9_onyx_int.h" + struct yv12_buffer_config; struct VP9_COMP; void vp9_pick_filter_level(const struct yv12_buffer_config *sd, - struct VP9_COMP *cpi, int method); + struct VP9_COMP *cpi, LPF_PICK_METHOD method); #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp9/encoder/vp9_pickmode.c b/libvpx/vp9/encoder/vp9_pickmode.c index 6c84144..f3fe99c 100644 --- a/libvpx/vp9/encoder/vp9_pickmode.c +++ b/libvpx/vp9/encoder/vp9_pickmode.c @@ -29,9 +29,9 @@ static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, BLOCK_SIZE bsize, int mi_row, int mi_col, - int_mv *tmp_mv) { + int_mv *tmp_mv, int *rate_mv) { MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; int step_param; int sadpb = x->sadperbit16; @@ -76,8 +76,11 @@ static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, return; } } - - mvp_full = mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv; + assert(x->mv_best_ref_index[ref] <= 2); + if (x->mv_best_ref_index[ref] < 2) + mvp_full = mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv; + else + mvp_full = x->pred_mv[ref].as_mv; mvp_full.col >>= 3; mvp_full.row >>= 3; @@ -125,14 +128,20 @@ static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; } + + // calculate the bit cost on motion vector + mvp_full.row = tmp_mv->as_mv.row * 8; + mvp_full.col = tmp_mv->as_mv.col * 8; + *rate_mv = vp9_mv_bit_cost(&mvp_full, &ref_mv, + x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); } static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, BLOCK_SIZE bsize, int mi_row, int mi_col, - MV *tmp_mv, int *rate_mv) { + MV *tmp_mv) { MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; int ref = mbmi->ref_frame[0]; MV ref_mv = mbmi->ref_mvs[ref][0].as_mv; @@ -160,15 +169,13 @@ static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref]); - // calculate the bit cost on motion vector - *rate_mv = vp9_mv_bit_cost(tmp_mv, &ref_mv, - x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); - if (scaled_ref_frame) { int i; for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; } + + x->pred_mv[ref].as_mv = *tmp_mv; } static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, @@ -183,14 +190,12 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &xd->plane[0]; - const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); - int var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, - pd->dst.buf, pd->dst.stride, &sse); + int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride, + pd->dst.buf, pd->dst.stride, &sse); - vp9_model_rd_from_var_lapndz(var + sse, 1 << num_pels_log2_lookup[bs], + vp9_model_rd_from_var_lapndz(sse + var, 1 << num_pels_log2_lookup[bsize], pd->dequant[1] >> 3, &rate, &dist); - *out_rate_sum = rate; *out_dist_sum = dist << 3; } @@ -204,12 +209,12 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t *returndistortion, BLOCK_SIZE bsize) { MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &xd->plane[0]; - const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); MB_PREDICTION_MODE this_mode, best_mode = ZEROMV; MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME; + INTERP_FILTER best_pred_filter = EIGHTTAP; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; struct buf_2d yv12_mb[4][MAX_MB_PLANE]; static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, @@ -227,6 +232,13 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, intra_cost_penalty, 0); const int64_t intra_mode_cost = 50; + unsigned char segment_id = mbmi->segment_id; + const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize]; + const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize]; + // Mode index conversion form THR_MODES to MB_PREDICTION_MODE for a ref frame. + int mode_idx[MB_MODE_COUNT] = {0}; + INTERP_FILTER filter_ref = SWITCHABLE; + x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; x->skip = 0; @@ -244,20 +256,24 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->interp_filter = cpi->common.interp_filter == SWITCHABLE ? EIGHTTAP : cpi->common.interp_filter; mbmi->skip = 0; - mbmi->segment_id = 0; - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); + mbmi->segment_id = segment_id; for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) { x->pred_mv_sad[ref_frame] = INT_MAX; if (cpi->ref_frame_flags & flag_list[ref_frame]) { vp9_setup_buffer_inter(cpi, x, tile, - ref_frame, block_size, mi_row, mi_col, + ref_frame, bsize, mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; frame_mv[ZEROMV][ref_frame].as_int = 0; } + if (xd->up_available) + filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter; + else if (xd->left_available) + filter_ref = xd->mi[-1]->mbmi.interp_filter; + for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) { if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue; @@ -270,6 +286,14 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->ref_frame[0] = ref_frame; + // Set conversion index for LAST_FRAME. + if (ref_frame == LAST_FRAME) { + mode_idx[NEARESTMV] = THR_NEARESTMV; // LAST_FRAME, NEARESTMV + mode_idx[NEARMV] = THR_NEARMV; // LAST_FRAME, NEARMV + mode_idx[ZEROMV] = THR_ZEROMV; // LAST_FRAME, ZEROMV + mode_idx[NEWMV] = THR_NEWMV; // LAST_FRAME, NEWMV + } + for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { int rate_mv = 0; @@ -277,18 +301,29 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, (1 << INTER_OFFSET(this_mode))) continue; + if (best_rd < ((int64_t)rd_threshes[mode_idx[this_mode]] * + rd_thresh_freq_fact[this_mode] >> 5) || + rd_threshes[mode_idx[this_mode]] == INT_MAX) + continue; + if (this_mode == NEWMV) { + int rate_mode = 0; if (this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize])) continue; full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col, - &frame_mv[NEWMV][ref_frame]); + &frame_mv[NEWMV][ref_frame], &rate_mv); if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV) continue; + rate_mode = x->inter_mode_cost[mbmi->mode_context[ref_frame]] + [INTER_OFFSET(this_mode)]; + if (RDCOST(x->rdmult, x->rddiv, rate_mv + rate_mode, 0) > best_rd) + continue; + sub_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col, - &frame_mv[NEWMV][ref_frame].as_mv, &rate_mv); + &frame_mv[NEWMV][ref_frame].as_mv); } if (this_mode != NEARESTMV) @@ -298,9 +333,63 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->mode = this_mode; mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int; - vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); - model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist); + // Search for the best prediction filter type, when the resulting + // motion vector is at sub-pixel accuracy level for luma component, i.e., + // the last three bits are all zeros. + if ((this_mode == NEWMV || filter_ref == SWITCHABLE) && + ((mbmi->mv[0].as_mv.row & 0x07) != 0 || + (mbmi->mv[0].as_mv.col & 0x07) != 0)) { + int64_t tmp_rdcost1 = INT64_MAX; + int64_t tmp_rdcost2 = INT64_MAX; + int64_t tmp_rdcost3 = INT64_MAX; + int pf_rate[3]; + int64_t pf_dist[3]; + + mbmi->interp_filter = EIGHTTAP; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP], + &pf_dist[EIGHTTAP]); + tmp_rdcost1 = RDCOST(x->rdmult, x->rddiv, + vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP], + pf_dist[EIGHTTAP]); + + mbmi->interp_filter = EIGHTTAP_SHARP; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP_SHARP], + &pf_dist[EIGHTTAP_SHARP]); + tmp_rdcost2 = RDCOST(x->rdmult, x->rddiv, + vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP_SHARP], + pf_dist[EIGHTTAP_SHARP]); + + mbmi->interp_filter = EIGHTTAP_SMOOTH; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP_SMOOTH], + &pf_dist[EIGHTTAP_SMOOTH]); + tmp_rdcost3 = RDCOST(x->rdmult, x->rddiv, + vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP_SMOOTH], + pf_dist[EIGHTTAP_SMOOTH]); + + if (tmp_rdcost2 < tmp_rdcost1) { + if (tmp_rdcost2 < tmp_rdcost3) + mbmi->interp_filter = EIGHTTAP_SHARP; + else + mbmi->interp_filter = EIGHTTAP_SMOOTH; + } else { + if (tmp_rdcost1 < tmp_rdcost3) + mbmi->interp_filter = EIGHTTAP; + else + mbmi->interp_filter = EIGHTTAP_SMOOTH; + } + + rate = pf_rate[mbmi->interp_filter]; + dist = pf_dist[mbmi->interp_filter]; + } else { + mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP: filter_ref; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist); + } + rate += rate_mv; rate += x->inter_mode_cost[mbmi->mode_context[ref_frame]] [INTER_OFFSET(this_mode)]; @@ -311,15 +400,17 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = rate; *returndistortion = dist; best_mode = this_mode; + best_pred_filter = mbmi->interp_filter; best_ref_frame = ref_frame; } } } mbmi->mode = best_mode; + mbmi->interp_filter = best_pred_filter; mbmi->ref_frame[0] = best_ref_frame; mbmi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int; - xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int; + xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int; // Perform intra prediction search, if the best SAD is above a certain // threshold. diff --git a/libvpx/vp9/encoder/vp9_quantize.c b/libvpx/vp9/encoder/vp9_quantize.c index 4ab8995..c092ee4 100644 --- a/libvpx/vp9/encoder/vp9_quantize.c +++ b/libvpx/vp9/encoder/vp9_quantize.c @@ -153,6 +153,7 @@ static void invert_quant(int16_t *quant, int16_t *shift, int d) { void vp9_init_quantizer(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; + QUANTS *const quants = &cpi->quants; int i, q, quant; for (q = 0; q < QINDEX_RANGE; q++) { @@ -163,48 +164,49 @@ void vp9_init_quantizer(VP9_COMP *cpi) { // y quant = i == 0 ? vp9_dc_quant(q, cm->y_dc_delta_q) : vp9_ac_quant(q, 0); - invert_quant(&cpi->y_quant[q][i], &cpi->y_quant_shift[q][i], quant); - cpi->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); - cpi->y_round[q][i] = (qrounding_factor * quant) >> 7; + invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i], quant); + quants->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); + quants->y_round[q][i] = (qrounding_factor * quant) >> 7; cm->y_dequant[q][i] = quant; // uv quant = i == 0 ? vp9_dc_quant(q, cm->uv_dc_delta_q) : vp9_ac_quant(q, cm->uv_ac_delta_q); - invert_quant(&cpi->uv_quant[q][i], &cpi->uv_quant_shift[q][i], quant); - cpi->uv_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); - cpi->uv_round[q][i] = (qrounding_factor * quant) >> 7; + invert_quant(&quants->uv_quant[q][i], + &quants->uv_quant_shift[q][i], quant); + quants->uv_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); + quants->uv_round[q][i] = (qrounding_factor * quant) >> 7; cm->uv_dequant[q][i] = quant; #if CONFIG_ALPHA // alpha quant = i == 0 ? vp9_dc_quant(q, cm->a_dc_delta_q) : vp9_ac_quant(q, cm->a_ac_delta_q); - invert_quant(&cpi->a_quant[q][i], &cpi->a_quant_shift[q][i], quant); - cpi->a_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); - cpi->a_round[q][i] = (qrounding_factor * quant) >> 7; + invert_quant(&quants->a_quant[q][i], &quants->a_quant_shift[q][i], quant); + quants->a_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); + quants->a_round[q][i] = (qrounding_factor * quant) >> 7; cm->a_dequant[q][i] = quant; #endif } for (i = 2; i < 8; i++) { - cpi->y_quant[q][i] = cpi->y_quant[q][1]; - cpi->y_quant_shift[q][i] = cpi->y_quant_shift[q][1]; - cpi->y_zbin[q][i] = cpi->y_zbin[q][1]; - cpi->y_round[q][i] = cpi->y_round[q][1]; + quants->y_quant[q][i] = quants->y_quant[q][1]; + quants->y_quant_shift[q][i] = quants->y_quant_shift[q][1]; + quants->y_zbin[q][i] = quants->y_zbin[q][1]; + quants->y_round[q][i] = quants->y_round[q][1]; cm->y_dequant[q][i] = cm->y_dequant[q][1]; - cpi->uv_quant[q][i] = cpi->uv_quant[q][1]; - cpi->uv_quant_shift[q][i] = cpi->uv_quant_shift[q][1]; - cpi->uv_zbin[q][i] = cpi->uv_zbin[q][1]; - cpi->uv_round[q][i] = cpi->uv_round[q][1]; + quants->uv_quant[q][i] = quants->uv_quant[q][1]; + quants->uv_quant_shift[q][i] = quants->uv_quant_shift[q][1]; + quants->uv_zbin[q][i] = quants->uv_zbin[q][1]; + quants->uv_round[q][i] = quants->uv_round[q][1]; cm->uv_dequant[q][i] = cm->uv_dequant[q][1]; #if CONFIG_ALPHA - cpi->a_quant[q][i] = cpi->a_quant[q][1]; - cpi->a_quant_shift[q][i] = cpi->a_quant_shift[q][1]; - cpi->a_zbin[q][i] = cpi->a_zbin[q][1]; - cpi->a_round[q][i] = cpi->a_round[q][1]; + quants->a_quant[q][i] = quants->a_quant[q][1]; + quants->a_quant_shift[q][i] = quants->a_quant_shift[q][1]; + quants->a_zbin[q][i] = quants->a_zbin[q][1]; + quants->a_round[q][i] = quants->a_round[q][1]; cm->a_dequant[q][i] = cm->a_dequant[q][1]; #endif } @@ -213,27 +215,28 @@ void vp9_init_quantizer(VP9_COMP *cpi) { void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { const VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - const int segment_id = xd->mi_8x8[0]->mbmi.segment_id; + MACROBLOCKD *const xd = &x->e_mbd; + QUANTS *const quants = &cpi->quants; + const int segment_id = xd->mi[0]->mbmi.segment_id; const int qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); const int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q); const int zbin = cpi->zbin_mode_boost + x->act_zbin_adj; int i; // Y - x->plane[0].quant = cpi->y_quant[qindex]; - x->plane[0].quant_shift = cpi->y_quant_shift[qindex]; - x->plane[0].zbin = cpi->y_zbin[qindex]; - x->plane[0].round = cpi->y_round[qindex]; + x->plane[0].quant = quants->y_quant[qindex]; + x->plane[0].quant_shift = quants->y_quant_shift[qindex]; + x->plane[0].zbin = quants->y_zbin[qindex]; + x->plane[0].round = quants->y_round[qindex]; x->plane[0].zbin_extra = (int16_t)((cm->y_dequant[qindex][1] * zbin) >> 7); xd->plane[0].dequant = cm->y_dequant[qindex]; // UV for (i = 1; i < 3; i++) { - x->plane[i].quant = cpi->uv_quant[qindex]; - x->plane[i].quant_shift = cpi->uv_quant_shift[qindex]; - x->plane[i].zbin = cpi->uv_zbin[qindex]; - x->plane[i].round = cpi->uv_round[qindex]; + x->plane[i].quant = quants->uv_quant[qindex]; + x->plane[i].quant_shift = quants->uv_quant_shift[qindex]; + x->plane[i].zbin = quants->uv_zbin[qindex]; + x->plane[i].round = quants->uv_round[qindex]; x->plane[i].zbin_extra = (int16_t)((cm->uv_dequant[qindex][1] * zbin) >> 7); xd->plane[i].dequant = cm->uv_dequant[qindex]; } @@ -273,9 +276,7 @@ void vp9_frame_init_quantizer(VP9_COMP *cpi) { vp9_init_plane_quantizers(cpi, &cpi->mb); } -void vp9_set_quantizer(struct VP9_COMP *cpi, int q) { - VP9_COMMON *const cm = &cpi->common; - +void vp9_set_quantizer(VP9_COMMON *cm, int q) { // quantizer has to be reinitialized with vp9_init_quantizer() if any // delta_q changes. cm->base_qindex = q; diff --git a/libvpx/vp9/encoder/vp9_quantize.h b/libvpx/vp9/encoder/vp9_quantize.h index f356b12..7d231df 100644 --- a/libvpx/vp9/encoder/vp9_quantize.h +++ b/libvpx/vp9/encoder/vp9_quantize.h @@ -17,12 +17,30 @@ extern "C" { #endif +typedef struct { + DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, y_round[QINDEX_RANGE][8]); + + DECLARE_ALIGNED(16, int16_t, uv_quant[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, uv_quant_shift[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, uv_zbin[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, uv_round[QINDEX_RANGE][8]); + +#if CONFIG_ALPHA + DECLARE_ALIGNED(16, int16_t, a_quant[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, a_quant_shift[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, a_zbin[QINDEX_RANGE][8]); + DECLARE_ALIGNED(16, int16_t, a_round[QINDEX_RANGE][8]); +#endif +} QUANTS; + void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block, const int16_t *scan, const int16_t *iscan); struct VP9_COMP; - -void vp9_set_quantizer(struct VP9_COMP *cpi, int q); +struct VP9Common; void vp9_frame_init_quantizer(struct VP9_COMP *cpi); @@ -32,6 +50,8 @@ void vp9_init_plane_quantizers(struct VP9_COMP *cpi, MACROBLOCK *x); void vp9_init_quantizer(struct VP9_COMP *cpi); +void vp9_set_quantizer(struct VP9Common *cm, int q); + #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp9/encoder/vp9_ratectrl.c b/libvpx/vp9/encoder/vp9_ratectrl.c index edc48bb..3420816 100644 --- a/libvpx/vp9/encoder/vp9_ratectrl.c +++ b/libvpx/vp9/encoder/vp9_ratectrl.c @@ -35,9 +35,6 @@ #define MIN_BPB_FACTOR 0.005 #define MAX_BPB_FACTOR 50 -// Bits Per MB at different Q (Multiplied by 512) -#define BPER_MB_NORMBITS 9 - // Tables relating active max Q to active min Q static int kf_low_motion_minq[QINDEX_RANGE]; static int kf_high_motion_minq[QINDEX_RANGE]; @@ -55,10 +52,9 @@ static int kf_low = 400; // formulaic approach to facilitate easier adjustment of the Q tables. // The formulae were derived from computing a 3rd order polynomial best // fit to the original data (after plotting real maxq vs minq (not q index)) -static int calculate_minq_index(double maxq, - double x3, double x2, double x1, double c) { +static int get_minq_index(double maxq, double x3, double x2, double x1) { int i; - const double minqtarget = MIN(((x3 * maxq + x2) * maxq + x1) * maxq + c, + const double minqtarget = MIN(((x3 * maxq + x2) * maxq + x1) * maxq, maxq); // Special case handling to deal with the step from q2.0 @@ -66,57 +62,26 @@ static int calculate_minq_index(double maxq, if (minqtarget <= 2.0) return 0; - for (i = 0; i < QINDEX_RANGE; i++) { + for (i = 0; i < QINDEX_RANGE; i++) if (minqtarget <= vp9_convert_qindex_to_q(i)) return i; - } return QINDEX_RANGE - 1; } -void vp9_rc_init_minq_luts(void) { +void vp9_rc_init_minq_luts() { int i; for (i = 0; i < QINDEX_RANGE; i++) { const double maxq = vp9_convert_qindex_to_q(i); - - kf_low_motion_minq[i] = calculate_minq_index(maxq, - 0.000001, - -0.0004, - 0.15, - 0.0); - kf_high_motion_minq[i] = calculate_minq_index(maxq, - 0.000002, - -0.0012, - 0.50, - 0.0); - - gf_low_motion_minq[i] = calculate_minq_index(maxq, - 0.0000015, - -0.0009, - 0.32, - 0.0); - gf_high_motion_minq[i] = calculate_minq_index(maxq, - 0.0000021, - -0.00125, - 0.50, - 0.0); - afq_low_motion_minq[i] = calculate_minq_index(maxq, - 0.0000015, - -0.0009, - 0.33, - 0.0); - afq_high_motion_minq[i] = calculate_minq_index(maxq, - 0.0000021, - -0.00125, - 0.55, - 0.0); - inter_minq[i] = calculate_minq_index(maxq, - 0.00000271, - -0.00113, - 0.75, - 0.0); + kf_low_motion_minq[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.15); + kf_high_motion_minq[i] = get_minq_index(maxq, 0.000002, -0.0012, 0.50); + gf_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.32); + gf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50); + afq_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.33); + afq_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55); + inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.75); } } @@ -138,79 +103,10 @@ int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex, return (int)(0.5 + (enumerator * correction_factor / q)); } -void vp9_save_coding_context(VP9_COMP *cpi) { - CODING_CONTEXT *const cc = &cpi->coding_context; - VP9_COMMON *cm = &cpi->common; - - // Stores a snapshot of key state variables which can subsequently be - // restored with a call to vp9_restore_coding_context. These functions are - // intended for use in a re-code loop in vp9_compress_frame where the - // quantizer value is adjusted between loop iterations. - vp9_copy(cc->nmvjointcost, cpi->mb.nmvjointcost); - vp9_copy(cc->nmvcosts, cpi->mb.nmvcosts); - vp9_copy(cc->nmvcosts_hp, cpi->mb.nmvcosts_hp); - - vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs); - - vpx_memcpy(cpi->coding_context.last_frame_seg_map_copy, - cm->last_frame_seg_map, (cm->mi_rows * cm->mi_cols)); - - vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas); - vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas); - - cc->fc = cm->fc; -} - -void vp9_restore_coding_context(VP9_COMP *cpi) { - CODING_CONTEXT *const cc = &cpi->coding_context; - VP9_COMMON *cm = &cpi->common; - - // Restore key state variables to the snapshot state stored in the - // previous call to vp9_save_coding_context. - vp9_copy(cpi->mb.nmvjointcost, cc->nmvjointcost); - vp9_copy(cpi->mb.nmvcosts, cc->nmvcosts); - vp9_copy(cpi->mb.nmvcosts_hp, cc->nmvcosts_hp); - - vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs); - - vpx_memcpy(cm->last_frame_seg_map, - cpi->coding_context.last_frame_seg_map_copy, - (cm->mi_rows * cm->mi_cols)); - - vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas); - vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas); - - cm->fc = cc->fc; -} - -void vp9_setup_key_frame(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - - vp9_setup_past_independence(cm); - - /* All buffers are implicitly updated on key frames. */ - cpi->refresh_golden_frame = 1; - cpi->refresh_alt_ref_frame = 1; -} - -void vp9_setup_inter_frame(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - if (cm->error_resilient_mode || cm->intra_only) - vp9_setup_past_independence(cm); - - assert(cm->frame_context_idx < FRAME_CONTEXTS); - cm->fc = cm->frame_contexts[cm->frame_context_idx]; -} - -static int estimate_bits_at_q(int frame_kind, int q, int mbs, +static int estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs, double correction_factor) { - const int bpm = (int)(vp9_rc_bits_per_mb(frame_kind, q, correction_factor)); - - // Attempt to retain reasonable accuracy without overflow. The cutoff is - // chosen such that the maximum product of Bpm and MBs fits 31 bits. The - // largest Bpm takes 20 bits. - return (mbs > (1 << 11)) ? (bpm >> BPER_MB_NORMBITS) * mbs - : (bpm * mbs) >> BPER_MB_NORMBITS; + const int bpm = (int)(vp9_rc_bits_per_mb(frame_type, q, correction_factor)); + return ((uint64_t)bpm * mbs) >> BPER_MB_NORMBITS; } int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) { @@ -247,13 +143,12 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) { // Update the buffer level for higher layers, given the encoded current layer. -static void update_layer_buffer_level(VP9_COMP *const cpi, - int encoded_frame_size) { +static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) { int temporal_layer = 0; - int current_temporal_layer = cpi->svc.temporal_layer_id; + int current_temporal_layer = svc->temporal_layer_id; for (temporal_layer = current_temporal_layer + 1; - temporal_layer < cpi->svc.number_temporal_layers; ++temporal_layer) { - LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer]; + temporal_layer < svc->number_temporal_layers; ++temporal_layer) { + LAYER_CONTEXT *lc = &svc->layer_context[temporal_layer]; RATE_CONTROL *lrc = &lc->rc; int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate - encoded_frame_size); @@ -283,10 +178,60 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { rc->buffer_level = rc->bits_off_target; if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - update_layer_buffer_level(cpi, encoded_frame_size); + update_layer_buffer_level(&cpi->svc, encoded_frame_size); } } +void vp9_rc_init(const VP9_CONFIG *oxcf, int pass, RATE_CONTROL *rc) { + if (pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) { + rc->avg_frame_qindex[0] = oxcf->worst_allowed_q; + rc->avg_frame_qindex[1] = oxcf->worst_allowed_q; + rc->avg_frame_qindex[2] = oxcf->worst_allowed_q; + } else { + rc->avg_frame_qindex[0] = (oxcf->worst_allowed_q + + oxcf->best_allowed_q) / 2; + rc->avg_frame_qindex[1] = (oxcf->worst_allowed_q + + oxcf->best_allowed_q) / 2; + rc->avg_frame_qindex[2] = (oxcf->worst_allowed_q + + oxcf->best_allowed_q) / 2; + } + + rc->last_q[0] = oxcf->best_allowed_q; + rc->last_q[1] = oxcf->best_allowed_q; + rc->last_q[2] = oxcf->best_allowed_q; + + rc->buffer_level = oxcf->starting_buffer_level; + rc->bits_off_target = oxcf->starting_buffer_level; + + rc->rolling_target_bits = rc->av_per_frame_bandwidth; + rc->rolling_actual_bits = rc->av_per_frame_bandwidth; + rc->long_rolling_target_bits = rc->av_per_frame_bandwidth; + rc->long_rolling_actual_bits = rc->av_per_frame_bandwidth; + + rc->total_actual_bits = 0; + rc->total_target_vs_actual = 0; + + rc->baseline_gf_interval = DEFAULT_GF_INTERVAL; + rc->frames_since_key = 8; // Sensible default for first frame. + rc->this_key_frame_forced = 0; + rc->next_key_frame_forced = 0; + rc->source_alt_ref_pending = 0; + rc->source_alt_ref_active = 0; + + rc->frames_till_gf_update_due = 0; + + rc->ni_av_qi = oxcf->worst_allowed_q; + rc->ni_tot_qi = 0; + rc->ni_frames = 0; + + rc->tot_q = 0.0; + rc->avg_q = vp9_convert_qindex_to_q(oxcf->worst_allowed_q); + + rc->rate_correction_factor = 1.0; + rc->key_frame_rate_correction_factor = 1.0; + rc->gf_rate_correction_factor = 1.0; +} + int vp9_rc_drop_frame(VP9_COMP *cpi) { const VP9_CONFIG *oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; @@ -330,6 +275,7 @@ static double get_rate_correction_factor(const VP9_COMP *cpi) { return cpi->rc.key_frame_rate_correction_factor; } else { if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) && + !cpi->rc.is_src_frame_alt_ref && !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) return cpi->rc.gf_rate_correction_factor; else @@ -342,6 +288,7 @@ static void set_rate_correction_factor(VP9_COMP *cpi, double factor) { cpi->rc.key_frame_rate_correction_factor = factor; } else { if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) && + !cpi->rc.is_src_frame_alt_ref && !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) cpi->rc.gf_rate_correction_factor = factor; else @@ -350,7 +297,7 @@ static void set_rate_correction_factor(VP9_COMP *cpi, double factor) { } void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { - const int q = cpi->common.base_qindex; + const VP9_COMMON *const cm = &cpi->common; int correction_factor = 100; double rate_correction_factor = get_rate_correction_factor(cpi); double adjustment_limit; @@ -363,8 +310,8 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { // Work out how big we would have expected the frame to be at this Q given // the current correction factor. // Stay in double to avoid int overflow when values are large - projected_size_based_on_q = estimate_bits_at_q(cpi->common.frame_type, q, - cpi->common.MBs, + projected_size_based_on_q = estimate_bits_at_q(cm->frame_type, + cm->base_qindex, cm->MBs, rate_correction_factor); // Work out a size correction factor. if (projected_size_based_on_q > 0) @@ -388,20 +335,18 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { if (correction_factor > 102) { // We are not already at the worst allowable quality - correction_factor = - (int)(100 + ((correction_factor - 100) * adjustment_limit)); - rate_correction_factor = - ((rate_correction_factor * correction_factor) / 100); + correction_factor = (int)(100 + ((correction_factor - 100) * + adjustment_limit)); + rate_correction_factor = (rate_correction_factor * correction_factor) / 100; // Keep rate_correction_factor within limits if (rate_correction_factor > MAX_BPB_FACTOR) rate_correction_factor = MAX_BPB_FACTOR; } else if (correction_factor < 99) { // We are not already at the best allowable quality - correction_factor = - (int)(100 - ((100 - correction_factor) * adjustment_limit)); - rate_correction_factor = - ((rate_correction_factor * correction_factor) / 100); + correction_factor = (int)(100 - ((100 - correction_factor) * + adjustment_limit)); + rate_correction_factor = (rate_correction_factor * correction_factor) / 100; // Keep rate_correction_factor within limits if (rate_correction_factor < MIN_BPB_FACTOR) @@ -422,11 +367,8 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame, // Calculate required scaling factor based on target frame size and size of // frame produced using previous Q. - if (target_bits_per_frame >= (INT_MAX >> BPER_MB_NORMBITS)) - // Case where we would overflow int - target_bits_per_mb = (target_bits_per_frame / cm->MBs) << BPER_MB_NORMBITS; - else - target_bits_per_mb = (target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs; + target_bits_per_mb = + ((uint64_t)target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs; i = active_best_quality; @@ -493,6 +435,7 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { // If buffer is below the optimal level, let the active_worst_quality go from // ambient Q (at buffer = optimal level) to worst_quality level // (at buffer = critical level). + const VP9_COMMON *const cm = &cpi->common; const VP9_CONFIG *oxcf = &cpi->oxcf; const RATE_CONTROL *rc = &cpi->rc; // Buffer level below which we push active_worst to worst_quality. @@ -500,9 +443,9 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { int64_t buff_lvl_step = 0; int adjustment = 0; int active_worst_quality; - if (cpi->common.frame_type == KEY_FRAME) + if (cm->frame_type == KEY_FRAME) return rc->worst_quality; - if (cpi->common.current_video_frame > 1) + if (cm->current_video_frame > 1) active_worst_quality = MIN(rc->worst_quality, rc->avg_frame_qindex[INTER_FRAME] * 5 / 4); else @@ -556,7 +499,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, if (rc->this_key_frame_forced) { int qindex = rc->last_boosted_qindex; double last_boosted_q = vp9_convert_qindex_to_q(qindex); - int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q, + int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, (last_boosted_q * 0.75)); active_best_quality = MAX(qindex + delta_qindex, rc->best_quality); } else if (cm->current_video_frame > 0) { @@ -578,8 +521,8 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, // Convert the adjustment factor to a qindex delta // on active_best_quality. q_val = vp9_convert_qindex_to_q(active_best_quality); - active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val * - q_adj_factor); + active_best_quality += vp9_compute_qdelta(rc, q_val, + q_val * q_adj_factor); } } else if (!rc->is_src_frame_alt_ref && !cpi->use_svc && @@ -635,7 +578,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, active_best_quality, active_worst_quality); if (q > *top_index) { // Special case when we are targeting the max allowed rate - if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth) + if (rc->this_frame_target >= rc->max_frame_bandwidth) *top_index = q; else q = *top_index; @@ -668,8 +611,8 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, if (rc->this_key_frame_forced) { int qindex = rc->last_boosted_qindex; double last_boosted_q = vp9_convert_qindex_to_q(qindex); - int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q, - (last_boosted_q * 0.75)); + int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, + last_boosted_q * 0.75); active_best_quality = MAX(qindex + delta_qindex, rc->best_quality); } else if (cm->current_video_frame > 0) { // not first frame of one pass and kf_boost is set @@ -690,15 +633,15 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, // Convert the adjustment factor to a qindex delta // on active_best_quality. q_val = vp9_convert_qindex_to_q(active_best_quality); - active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val * - q_adj_factor); + active_best_quality += vp9_compute_qdelta(rc, q_val, + q_val * q_adj_factor); } #else double current_q; // Force the KF quantizer to be 30% of the active_worst_quality. current_q = vp9_convert_qindex_to_q(active_worst_quality); active_best_quality = active_worst_quality - + vp9_compute_qdelta(cpi, current_q, current_q * 0.3); + + vp9_compute_qdelta(rc, current_q, current_q * 0.3); #endif } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { @@ -801,7 +744,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, active_best_quality, active_worst_quality); if (q > *top_index) { // Special case when we are targeting the max allowed rate - if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth) + if (rc->this_frame_target >= rc->max_frame_bandwidth) *top_index = q; else q = *top_index; @@ -817,7 +760,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, assert(level >= 0); new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level))); q = active_worst_quality + - vp9_compute_qdelta(cpi, current_q, new_q); + vp9_compute_qdelta(rc, current_q, new_q); *bottom_index = q; *top_index = q; @@ -850,8 +793,8 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, if (rc->this_key_frame_forced) { int qindex = rc->last_boosted_qindex; double last_boosted_q = vp9_convert_qindex_to_q(qindex); - int delta_qindex = vp9_compute_qdelta(cpi, last_boosted_q, - (last_boosted_q * 0.75)); + int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, + last_boosted_q * 0.75); active_best_quality = MAX(qindex + delta_qindex, rc->best_quality); } else { // Not forced keyframe. @@ -875,15 +818,15 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, // Convert the adjustment factor to a qindex delta // on active_best_quality. q_val = vp9_convert_qindex_to_q(active_best_quality); - active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val * - q_adj_factor); + active_best_quality += vp9_compute_qdelta(rc, q_val, + q_val * q_adj_factor); } #else double current_q; // Force the KF quantizer to be 30% of the active_worst_quality. current_q = vp9_convert_qindex_to_q(active_worst_quality); active_best_quality = active_worst_quality - + vp9_compute_qdelta(cpi, current_q, current_q * 0.3); + + vp9_compute_qdelta(rc, current_q, current_q * 0.3); #endif } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { @@ -984,7 +927,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, active_best_quality, active_worst_quality); if (q > *top_index) { // Special case when we are targeting the max allowed rate. - if (cpi->rc.this_frame_target >= cpi->rc.max_frame_bandwidth) + if (rc->this_frame_target >= rc->max_frame_bandwidth) *top_index = q; else q = *top_index; @@ -1000,7 +943,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, assert(level >= 0); new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level))); q = active_worst_quality + - vp9_compute_qdelta(cpi, current_q, new_q); + vp9_compute_qdelta(rc, current_q, new_q); *bottom_index = q; *top_index = q; @@ -1016,8 +959,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, } int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi, - int *bottom_index, - int *top_index) { + int *bottom_index, int *top_index) { int q; if (cpi->pass == 0) { if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) @@ -1028,14 +970,14 @@ int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi, q = rc_pick_q_and_bounds_two_pass(cpi, bottom_index, top_index); } - // JBB : This is realtime mode. In real time mode the first frame - // should be larger. Q of 0 is disabled because we force tx size to be + // Q of 0 is disabled because we force tx size to be // 16x16... if (cpi->sf.use_nonrd_pick_mode) { - if (cpi->common.current_video_frame == 0) - q /= 3; if (q == 0) q++; + if (cpi->sf.force_frame_boost == 1) + q -= cpi->sf.max_delta_qindex; + if (q < *bottom_index) *bottom_index = q; else if (q > *top_index) @@ -1053,28 +995,14 @@ void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi, *frame_under_shoot_limit = 0; *frame_over_shoot_limit = INT_MAX; } else { - if (cpi->common.frame_type == KEY_FRAME) { - *frame_over_shoot_limit = this_frame_target * 9 / 8; - *frame_under_shoot_limit = this_frame_target * 7 / 8; - } else { - if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) { - *frame_over_shoot_limit = this_frame_target * 9 / 8; - *frame_under_shoot_limit = this_frame_target * 7 / 8; - } else { - // Stron overshoot limit for constrained quality - if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { - *frame_over_shoot_limit = this_frame_target * 11 / 8; - *frame_under_shoot_limit = this_frame_target * 2 / 8; - } else { - *frame_over_shoot_limit = this_frame_target * 11 / 8; - *frame_under_shoot_limit = this_frame_target * 5 / 8; - } - } - } + int recode_tolerance = + (cpi->sf.recode_tolerance * this_frame_target) / 100; + + *frame_over_shoot_limit = this_frame_target + recode_tolerance; + *frame_under_shoot_limit = this_frame_target - recode_tolerance; // For very small rate targets where the fractional adjustment - // (eg * 7/8) may be tiny make sure there is at least a minimum - // range. + // may be tiny make sure there is at least a minimum range. *frame_over_shoot_limit += 200; *frame_under_shoot_limit -= 200; if (*frame_under_shoot_limit < 0) @@ -1099,16 +1027,17 @@ void vp9_rc_set_frame_target(VP9_COMP *cpi, int target) { static void update_alt_ref_frame_stats(VP9_COMP *cpi) { // this frame refreshes means next frames don't unless specified by user - cpi->rc.frames_since_golden = 0; + RATE_CONTROL *const rc = &cpi->rc; + rc->frames_since_golden = 0; #if CONFIG_MULTIPLE_ARF if (!cpi->multi_arf_enabled) #endif // Clear the alternate reference update pending flag. - cpi->rc.source_alt_ref_pending = 0; + rc->source_alt_ref_pending = 0; // Set the alternate reference frame active flag - cpi->rc.source_alt_ref_active = 1; + rc->source_alt_ref_active = 1; } static void update_golden_frame_stats(VP9_COMP *cpi) { @@ -1137,6 +1066,7 @@ static void update_golden_frame_stats(VP9_COMP *cpi) { void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { VP9_COMMON *const cm = &cpi->common; + const VP9_CONFIG *const oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; cm->last_frame_type = cm->frame_type; @@ -1146,7 +1076,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { // Post encode loop adjustment of Q prediction. vp9_rc_update_rate_correction_factors( cpi, (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF || - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0); + oxcf->end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0); // Keep a record of last Q and ambient average Q. if (cm->frame_type == KEY_FRAME) { @@ -1155,7 +1085,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { 3 * rc->avg_frame_qindex[KEY_FRAME] + cm->base_qindex, 2); } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) && - !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) { + !(cpi->use_svc && oxcf->end_usage == USAGE_STREAM_FROM_SERVER)) { rc->last_q[2] = cm->base_qindex; rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO( 3 * rc->avg_frame_qindex[2] + cm->base_qindex, 2); @@ -1201,12 +1131,11 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { // Actual bits spent rc->total_actual_bits += rc->projected_frame_size; + rc->total_target_bits += (cm->show_frame ? rc->av_per_frame_bandwidth : 0); - // Debug stats - rc->total_target_vs_actual += (rc->this_frame_target - - rc->projected_frame_size); + rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits; - if (cpi->oxcf.play_alternate && cpi->refresh_alt_ref_frame && + if (oxcf->play_alternate && cpi->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME)) // Update the alternate reference frame stats as appropriate. update_alt_ref_frame_stats(cpi); @@ -1239,15 +1168,15 @@ static int test_for_kf_one_pass(VP9_COMP *cpi) { static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) { static const int af_ratio = 10; - const RATE_CONTROL *rc = &cpi->rc; + const RATE_CONTROL *const rc = &cpi->rc; int target; #if USE_ALTREF_FOR_ONE_PASS target = (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) ? - (rc->av_per_frame_bandwidth * cpi->rc.baseline_gf_interval * af_ratio) / - (cpi->rc.baseline_gf_interval + af_ratio - 1) : - (rc->av_per_frame_bandwidth * cpi->rc.baseline_gf_interval) / - (cpi->rc.baseline_gf_interval + af_ratio - 1); + (rc->av_per_frame_bandwidth * rc->baseline_gf_interval * af_ratio) / + (rc->baseline_gf_interval + af_ratio - 1) : + (rc->av_per_frame_bandwidth * rc->baseline_gf_interval) / + (rc->baseline_gf_interval + af_ratio - 1); #else target = rc->av_per_frame_bandwidth; #endif @@ -1299,18 +1228,19 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { const VP9_CONFIG *oxcf = &cpi->oxcf; const RATE_CONTROL *rc = &cpi->rc; + const SVC *const svc = &cpi->svc; const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level; const int64_t one_pct_bits = 1 + oxcf->optimal_buffer_level / 100; int min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS); int target = rc->av_per_frame_bandwidth; - if (cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + if (svc->number_temporal_layers > 1 && + oxcf->end_usage == USAGE_STREAM_FROM_SERVER) { // Note that for layers, av_per_frame_bandwidth is the cumulative // per-frame-bandwidth. For the target size of this frame, use the // layer average frame size (i.e., non-cumulative per-frame-bw). - int current_temporal_layer = cpi->svc.temporal_layer_id; - const LAYER_CONTEXT *lc = &cpi->svc.layer_context[current_temporal_layer]; + int current_temporal_layer = svc->temporal_layer_id; + const LAYER_CONTEXT *lc = &svc->layer_context[current_temporal_layer]; target = lc->avg_frame_size; min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS); } @@ -1347,13 +1277,14 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { void vp9_rc_get_svc_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; - int target = cpi->rc.av_per_frame_bandwidth; + RATE_CONTROL *const rc = &cpi->rc; + int target = rc->av_per_frame_bandwidth; if ((cm->current_video_frame == 0) || (cm->frame_flags & FRAMEFLAGS_KEY) || - (cpi->oxcf.auto_key && (cpi->rc.frames_since_key % + (cpi->oxcf.auto_key && (rc->frames_since_key % cpi->key_frame_frequency == 0))) { cm->frame_type = KEY_FRAME; - cpi->rc.source_alt_ref_active = 0; + rc->source_alt_ref_active = 0; if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { target = calc_iframe_target_size_one_pass_cbr(cpi); } @@ -1364,8 +1295,8 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { } } vp9_rc_set_frame_target(cpi, target); - cpi->rc.frames_till_gf_update_due = INT_MAX; - cpi->rc.baseline_gf_interval = INT_MAX; + rc->frames_till_gf_update_due = INT_MAX; + rc->baseline_gf_interval = INT_MAX; } void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { @@ -1392,3 +1323,46 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { rc->frames_till_gf_update_due = INT_MAX; rc->baseline_gf_interval = INT_MAX; } + +int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget) { + int start_index = rc->worst_quality; + int target_index = rc->worst_quality; + int i; + + // Convert the average q value to an index. + for (i = rc->best_quality; i < rc->worst_quality; ++i) { + start_index = i; + if (vp9_convert_qindex_to_q(i) >= qstart) + break; + } + + // Convert the q target to an index + for (i = rc->best_quality; i < rc->worst_quality; ++i) { + target_index = i; + if (vp9_convert_qindex_to_q(i) >= qtarget) + break; + } + + return target_index - start_index; +} + +int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, + int qindex, double rate_target_ratio) { + int target_index = rc->worst_quality; + int i; + + // Look up the current projected bits per block for the base index + const int base_bits_per_mb = vp9_rc_bits_per_mb(frame_type, qindex, 1.0); + + // Find the target bits per mb based on the base value and given ratio. + const int target_bits_per_mb = (int)(rate_target_ratio * base_bits_per_mb); + + // Convert the q target to an index + for (i = rc->best_quality; i < rc->worst_quality; ++i) { + target_index = i; + if (vp9_rc_bits_per_mb(frame_type, i, 1.0) <= target_bits_per_mb ) + break; + } + + return target_index - qindex; +} diff --git a/libvpx/vp9/encoder/vp9_ratectrl.h b/libvpx/vp9/encoder/vp9_ratectrl.h index ed6266f..7693c2b 100644 --- a/libvpx/vp9/encoder/vp9_ratectrl.h +++ b/libvpx/vp9/encoder/vp9_ratectrl.h @@ -22,6 +22,9 @@ extern "C" { #define FRAME_OVERHEAD_BITS 200 +// Bits Per MB at different Q (Multiplied by 512) +#define BPER_MB_NORMBITS 9 + typedef struct { // Rate targetting variables int this_frame_target; @@ -58,7 +61,7 @@ typedef struct { int ni_av_qi; int ni_tot_qi; int ni_frames; - int avg_frame_qindex[3]; // 0 - KEY, 1 - INTER, 2 - ARF/GF + int avg_frame_qindex[3]; // 0 - KEY, 1 - INTER, 2 - ARF/GF double tot_q; double avg_q; @@ -75,7 +78,8 @@ typedef struct { int long_rolling_actual_bits; int64_t total_actual_bits; - int total_target_vs_actual; // debug stats + int64_t total_target_bits; + int64_t total_target_vs_actual; int worst_quality; int best_quality; @@ -83,17 +87,13 @@ typedef struct { } RATE_CONTROL; struct VP9_COMP; +struct VP9_CONFIG; -void vp9_save_coding_context(struct VP9_COMP *cpi); -void vp9_restore_coding_context(struct VP9_COMP *cpi); - -void vp9_setup_key_frame(struct VP9_COMP *cpi); -void vp9_setup_inter_frame(struct VP9_COMP *cpi); +void vp9_rc_init(const struct VP9_CONFIG *oxcf, int pass, RATE_CONTROL *rc); double vp9_convert_qindex_to_q(int qindex); -// initialize luts for minq -void vp9_rc_init_minq_luts(void); +void vp9_rc_init_minq_luts(); // Generally at the high level, the following flow is expected // to be enforced for rate control: @@ -166,6 +166,15 @@ int vp9_rc_clamp_pframe_target_size(const struct VP9_COMP *const cpi, // This function is called only from the vp9_rc_get_..._params() functions. void vp9_rc_set_frame_target(struct VP9_COMP *cpi, int target); +// Computes a q delta (in "q index" terms) to get from a starting q value +// to a target q value +int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget); + +// Computes a q delta (in "q index" terms) to get from a starting q value +// to a value that should equate to the given rate ratio. +int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, + int qindex, double rate_target_ratio); + #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp9/encoder/vp9_rdopt.c b/libvpx/vp9/encoder/vp9_rdopt.c index 2fd25ef..dcd2852 100644 --- a/libvpx/vp9/encoder/vp9_rdopt.c +++ b/libvpx/vp9/encoder/vp9_rdopt.c @@ -244,7 +244,6 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { static void set_block_thresholds(VP9_COMP *cpi) { const VP9_COMMON *const cm = &cpi->common; - const SPEED_FEATURES *const sf = &cpi->sf; int i, bsize, segment_id; for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) { @@ -261,13 +260,13 @@ static void set_block_thresholds(VP9_COMP *cpi) { for (i = 0; i < MAX_MODES; ++i) cpi->rd_threshes[segment_id][bsize][i] = - sf->thresh_mult[i] < thresh_max ? sf->thresh_mult[i] * t / 4 + cpi->rd_thresh_mult[i] < thresh_max ? cpi->rd_thresh_mult[i] * t / 4 : INT_MAX; for (i = 0; i < MAX_REFS; ++i) { cpi->rd_thresh_sub8x8[segment_id][bsize][i] = - sf->thresh_mult_sub8x8[i] < thresh_max - ? sf->thresh_mult_sub8x8[i] * t / 4 + cpi->rd_thresh_mult_sub8x8[i] < thresh_max + ? cpi->rd_thresh_mult_sub8x8[i] * t / 4 : INT_MAX; } } @@ -433,7 +432,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, int i; int64_t rate_sum = 0; int64_t dist_sum = 0; - const int ref = xd->mi_8x8[0]->mbmi.ref_frame[0]; + const int ref = xd->mi[0]->mbmi.ref_frame[0]; unsigned int sse; for (i = 0; i < MAX_MB_PLANE; ++i) { @@ -557,7 +556,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x, const int16_t *scan, const int16_t *nb, int use_fast_coef_costing) { MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const struct macroblock_plane *p = &x->plane[plane]; const struct macroblockd_plane *pd = &xd->plane[plane]; const PLANE_TYPE type = pd->plane_type; @@ -566,7 +565,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x, const int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = x->token_costs[tx_size][type][is_inter_block(mbmi)]; - uint8_t *p_tok = x->token_cache; + uint8_t token_cache[32 * 32]; int pt = combine_entropy_contexts(*A, *L); int c, cost; // Check for consistency of tx_size with mode info @@ -584,7 +583,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x, int v = qcoeff[0]; int prev_t = vp9_dct_value_tokens_ptr[v].token; cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v]; - p_tok[0] = vp9_pt_energy_class[prev_t]; + token_cache[0] = vp9_pt_energy_class[prev_t]; ++token_costs; // ac tokens @@ -597,9 +596,9 @@ static INLINE int cost_coeffs(MACROBLOCK *x, if (use_fast_coef_costing) { cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v]; } else { - pt = get_coef_context(nb, p_tok, c); + pt = get_coef_context(nb, token_cache, c); cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v]; - p_tok[rc] = vp9_pt_energy_class[t]; + token_cache[rc] = vp9_pt_energy_class[t]; } prev_t = t; if (!--band_left) { @@ -613,7 +612,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x, if (use_fast_coef_costing) { cost += (*token_costs)[0][!prev_t][EOB_TOKEN]; } else { - pt = get_coef_context(nb, p_tok, c); + pt = get_coef_context(nb, token_cache, c); cost += (*token_costs)[0][pt][EOB_TOKEN]; } } @@ -639,7 +638,7 @@ static void dist_block(int plane, int block, TX_SIZE tx_size, &this_sse) >> shift; args->sse = this_sse >> shift; - if (x->skip_encode && !is_inter_block(&xd->mi_8x8[0]->mbmi)) { + if (x->skip_encode && !is_inter_block(&xd->mi[0]->mbmi)) { // TODO(jingning): tune the model to better capture the distortion. int64_t p = (pd->dequant[1] * pd->dequant[1] * (1 << ss_txfrm_size)) >> (shift + 2); @@ -664,7 +663,7 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, struct rdcost_block_args *args = arg; MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; int64_t rd1, rd2, rd; if (args->skip) @@ -750,7 +749,7 @@ static void txfm_rd_in_plane(MACROBLOCK *x, args.use_fast_coef_costing = use_fast_coef_casting; if (plane == 0) - xd->mi_8x8[0]->mbmi.tx_size = tx_size; + xd->mi[0]->mbmi.tx_size = tx_size; vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); @@ -780,7 +779,7 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x, VP9_COMMON *const cm = &cpi->common; const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; mbmi->tx_size = MIN(max_tx_size, largest_tx_size); @@ -799,7 +798,7 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, const TX_SIZE max_tx_size = max_txsize_lookup[bs]; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; vp9_prob skip_prob = vp9_get_skip_prob(cm, xd); int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX}, {INT64_MAX, INT64_MAX}, @@ -882,7 +881,7 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, const TX_SIZE max_tx_size = max_txsize_lookup[bs]; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; vp9_prob skip_prob = vp9_get_skip_prob(cm, xd); int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX}, {INT64_MAX, INT64_MAX}, @@ -952,7 +951,7 @@ static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int r[TX_SIZES][2], s[TX_SIZES]; int64_t d[TX_SIZES], sse[TX_SIZES]; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const TX_SIZE max_tx_size = max_txsize_lookup[bs]; TX_SIZE tx_size; @@ -995,7 +994,7 @@ static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t ref_best_rd) { int64_t sse[TX_SIZES]; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; assert(bs == mbmi->sb_type); if (cpi->sf.tx_size_search_method != USE_FULL_RD) { @@ -1071,7 +1070,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, vpx_memcpy(ta, a, sizeof(ta)); vpx_memcpy(tl, l, sizeof(tl)); - xd->mi_8x8[0]->mbmi.tx_size = TX_4X4; + xd->mi[0]->mbmi.tx_size = TX_4X4; for (mode = DC_PRED; mode <= TM_PRED; ++mode) { int64_t this_rd; @@ -1100,7 +1099,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block, p->src_diff); int16_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block); - xd->mi_8x8[0]->bmi[block].as_mode = mode; + xd->mi[0]->bmi[block].as_mode = mode; vp9_predict_intra_block(xd, block, 1, TX_4X4, mode, x->skip_encode ? src : dst, @@ -1173,10 +1172,10 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb, int64_t best_rd) { int i, j; const MACROBLOCKD *const xd = &mb->e_mbd; - MODE_INFO *const mic = xd->mi_8x8[0]; - const MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride]; - const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL; - const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type; + MODE_INFO *const mic = xd->mi[0]; + const MODE_INFO *above_mi = xd->mi[-xd->mi_stride]; + const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL; + const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int idx, idy; @@ -1243,7 +1242,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, MB_PREDICTION_MODE mode; MB_PREDICTION_MODE mode_selected = DC_PRED; MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO *const mic = xd->mi_8x8[0]; + MODE_INFO *const mic = xd->mi[0]; int this_rate, this_rate_tokenonly, s; int64_t this_distortion, this_rd; TX_SIZE best_tx = TX_4X4; @@ -1257,8 +1256,8 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, /* Y Search for intra prediction mode */ for (mode = DC_PRED; mode <= TM_PRED; mode++) { int64_t local_tx_cache[TX_MODES]; - MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride]; - MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL; + MODE_INFO *above_mi = xd->mi[-xd->mi_stride]; + MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL; if (!(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]] & (1 << mode))) continue; @@ -1312,7 +1311,7 @@ static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, int64_t *sse, BLOCK_SIZE bsize, int64_t ref_best_rd) { MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi); int plane; int pnrate = 0, pnskip = 1; @@ -1369,7 +1368,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) continue; - xd->mi_8x8[0]->mbmi.uv_mode = mode; + xd->mi[0]->mbmi.uv_mode = mode; super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, &this_sse, bsize, best_rd); @@ -1410,7 +1409,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, } } - xd->mi_8x8[0]->mbmi.uv_mode = mode_selected; + xd->mi[0]->mbmi.uv_mode = mode_selected; return best_rd; } @@ -1421,7 +1420,7 @@ static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x, const VP9_COMMON *cm = &cpi->common; int64_t unused; - x->e_mbd.mi_8x8[0]->mbmi.uv_mode = DC_PRED; + x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED; super_block_uvrd(cpi, x, rate_tokenonly, distortion, skippable, &unused, bsize, INT64_MAX); *rate = *rate_tokenonly + x->intra_uv_mode_cost[cm->frame_type][DC_PRED]; @@ -1447,13 +1446,13 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size); } - *mode_uv = x->e_mbd.mi_8x8[0]->mbmi.uv_mode; + *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode; } -static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode, +static int cost_mv_ref(const VP9_COMP *cpi, MB_PREDICTION_MODE mode, int mode_context) { - MACROBLOCK *const x = &cpi->mb; - const int segment_id = x->e_mbd.mi_8x8[0]->mbmi.segment_id; + const MACROBLOCK *const x = &cpi->mb; + const int segment_id = x->e_mbd.mi[0]->mbmi.segment_id; // Don't account for mode here if segment skip is enabled. if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) { @@ -1478,7 +1477,7 @@ static int labels2mode(VP9_COMP *cpi, MACROBLOCKD *xd, int i, int_mv seg_mvs[MAX_REF_FRAMES], int_mv *best_ref_mv[2], const int *mvjcost, int *mvcost[2]) { - MODE_INFO *const mic = xd->mi_8x8[0]; + MODE_INFO *const mic = xd->mi[0]; const MB_MODE_INFO *const mbmi = &mic->mbmi; int thismvcost = 0; int idx, idy; @@ -1546,7 +1545,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, MACROBLOCKD *xd = &x->e_mbd; struct macroblockd_plane *const pd = &xd->plane[0]; struct macroblock_plane *const p = &x->plane[0]; - MODE_INFO *const mi = xd->mi_8x8[0]; + MODE_INFO *const mi = xd->mi[0]; const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd); const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize]; @@ -1560,6 +1559,8 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, int thisrate = 0, ref; const scan_order *so = &vp9_default_scan_orders[TX_4X4]; const int is_compound = has_second_ref(&mi->mbmi); + const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter); + for (ref = 0; ref < 1 + is_compound; ++ref) { const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i, pd->pre[ref].stride)]; @@ -1567,7 +1568,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height, ref, - xd->interp_kernel, MV_PRECISION_Q3, + kernel, MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2)); } @@ -1643,7 +1644,7 @@ static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) { } static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { - MB_MODE_INFO *const mbmi = &x->e_mbd.mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi; struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &x->e_mbd.plane[0]; @@ -1658,7 +1659,7 @@ static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src, struct buf_2d orig_pre[2]) { - MB_MODE_INFO *mbmi = &x->e_mbd.mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi; x->plane[0].src = orig_src; x->e_mbd.plane[0].pre[0] = orig_pre[0]; if (has_second_ref(mbmi)) @@ -1669,6 +1670,45 @@ static INLINE int mv_has_subpel(const MV *mv) { return (mv->row & 0x0F) || (mv->col & 0x0F); } +// Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion. +// TODO(aconverse): Find out if this is still productive then clean up or remove +static int check_best_zero_mv( + const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES], + int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], + int disable_inter_mode_mask, int this_mode, int ref_frame, + int second_ref_frame) { + if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) && + (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) && + frame_mv[this_mode][ref_frame].as_int == 0 && + (second_ref_frame == NONE || + frame_mv[this_mode][second_ref_frame].as_int == 0)) { + int rfc = mode_context[ref_frame]; + int c1 = cost_mv_ref(cpi, NEARMV, rfc); + int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); + int c3 = cost_mv_ref(cpi, ZEROMV, rfc); + + if (this_mode == NEARMV) { + if (c1 > c3) return 0; + } else if (this_mode == NEARESTMV) { + if (c2 > c3) return 0; + } else { + assert(this_mode == ZEROMV); + if (second_ref_frame == NONE) { + if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0) || + (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0)) + return 0; + } else { + if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0 && + frame_mv[NEARESTMV][second_ref_frame].as_int == 0) || + (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0 && + frame_mv[NEARMV][second_ref_frame].as_int == 0)) + return 0; + } + } + } + return 1; +} + static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, BEST_SEG_INFO *bsi_buf, int filter_idx, @@ -1679,7 +1719,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, MB_PREDICTION_MODE this_mode; MACROBLOCKD *xd = &x->e_mbd; VP9_COMMON *cm = &cpi->common; - MODE_INFO *mi = xd->mi_8x8[0]; + MODE_INFO *mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &xd->plane[0]; @@ -1737,43 +1777,11 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, if (disable_inter_mode_mask & (1 << mode_idx)) continue; - // if we're near/nearest and mv == 0,0, compare to zeromv - if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) && - (this_mode == NEARMV || this_mode == NEARESTMV || - this_mode == ZEROMV) && - frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 && - (!has_second_rf || - frame_mv[this_mode][mbmi->ref_frame[1]].as_int == 0)) { - int rfc = mbmi->mode_context[mbmi->ref_frame[0]]; - int c1 = cost_mv_ref(cpi, NEARMV, rfc); - int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); - int c3 = cost_mv_ref(cpi, ZEROMV, rfc); - - if (this_mode == NEARMV) { - if (c1 > c3) - continue; - } else if (this_mode == NEARESTMV) { - if (c2 > c3) - continue; - } else { - assert(this_mode == ZEROMV); - if (!has_second_rf) { - if ((c3 >= c2 && - frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) || - (c3 >= c1 && - frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0)) - continue; - } else { - if ((c3 >= c2 && - frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 && - frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) || - (c3 >= c1 && - frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 && - frame_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0)) - continue; - } - } - } + if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, + disable_inter_mode_mask, + this_mode, mbmi->ref_frame[0], + mbmi->ref_frame[1])) + continue; vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre)); vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above, @@ -2090,7 +2098,7 @@ static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, int i; BEST_SEG_INFO *bsi = bsi_buf + filter_idx; MACROBLOCKD *xd = &x->e_mbd; - MODE_INFO *mi = xd->mi_8x8[0]; + MODE_INFO *mi = xd->mi[0]; MB_MODE_INFO *mbmi = &mi->mbmi; int mode_idx; @@ -2137,7 +2145,7 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer, int ref_y_stride, int ref_frame, BLOCK_SIZE block_size ) { MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; int_mv this_mv; int i; int zero_seen = 0; @@ -2267,7 +2275,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, // restored if we decide to encode this way ctx->skip = x->skip; ctx->best_mode_index = mode_index; - ctx->mic = *xd->mi_8x8[0]; + ctx->mic = *xd->mi[0]; ctx->best_ref_mv[0].as_int = ref_mv->as_int; ctx->best_ref_mv[1].as_int = second_ref_mv->as_int; @@ -2318,7 +2326,7 @@ void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, const VP9_COMMON *cm = &cpi->common; const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO *const mi = xd->mi_8x8[0]; + MODE_INFO *const mi = xd->mi[0]; int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame]; const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; @@ -2350,9 +2358,9 @@ const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL; } -static INLINE int get_switchable_rate(const MACROBLOCK *x) { +int vp9_get_switchable_rate(const MACROBLOCK *x) { const MACROBLOCKD *const xd = &x->e_mbd; - const MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const int ctx = vp9_get_pred_context_switchable_interp(xd); return SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[ctx][mbmi->interp_filter]; @@ -2365,7 +2373,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int_mv *tmp_mv, int *rate_mv) { MACROBLOCKD *xd = &x->e_mbd; VP9_COMMON *cm = &cpi->common; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; int bestsme = INT_MAX; int further_steps, step_param; @@ -2531,13 +2539,14 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, const int pw = 4 * num_4x4_blocks_wide_lookup[bsize]; const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] }; int_mv ref_mv[2]; int ite, ref; // Prediction buffer from second frame. uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t)); + const InterpKernel *kernel = vp9_get_interp_kernel(mbmi->interp_filter); // Do joint motion search in compound mode to get more accurate mv. struct buf_2d backup_yv12[2][MAX_MB_PLANE]; @@ -2591,7 +2600,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, &frame_mv[refs[!id]].as_mv, &xd->block_refs[!id]->sf, pw, ph, 0, - xd->interp_kernel, MV_PRECISION_Q3, + kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE); // Compound motion search on first ref frame. @@ -2692,7 +2701,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, const int64_t ref_best_rd) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const int is_comp_pred = has_second_ref(mbmi); const int num_refs = is_comp_pred ? 2 : 1; const int this_mode = mbmi->mode; @@ -2744,7 +2753,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, return INT64_MAX; *rate2 += rate_mv; frame_mv[refs[0]].as_int = - xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int; + xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int; single_newmv[refs[0]].as_int = tmp_mv.as_int; } } @@ -2806,8 +2815,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int j; int64_t rs_rd; mbmi->interp_filter = i; - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); - rs = get_switchable_rate(x); + rs = vp9_get_switchable_rate(x); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); if (i > 0 && intpel_mv) { @@ -2877,8 +2885,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Set the appropriate filter mbmi->interp_filter = cm->interp_filter != SWITCHABLE ? cm->interp_filter : *best_filter; - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); - rs = cm->interp_filter == SWITCHABLE ? get_switchable_rate(x) : 0; + rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(x) : 0; if (pred_exists) { if (best_needs_copy) { @@ -2908,7 +2915,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } if (cm->interp_filter == SWITCHABLE) - *rate2 += get_switchable_rate(x); + *rate2 += vp9_get_switchable_rate(x); if (!is_comp_pred) { if (!x->in_active_map) { @@ -3066,7 +3073,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, TX_SIZE max_uv_tx_size; x->skip_encode = 0; ctx->skip = 0; - xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME; + xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME; if (bsize >= BLOCK_8X8) { if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, @@ -3075,7 +3082,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = INT_MAX; return; } - max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize); + max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize); rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, &dist_uv, &uv_skip, bsize, max_uv_tx_size); } else { @@ -3085,7 +3092,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = INT_MAX; return; } - max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize); + max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize); rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size); } @@ -3108,7 +3115,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } - ctx->mic = *xd->mi_8x8[0]; + ctx->mic = *xd->mi[0]; } int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, @@ -3121,9 +3128,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_rd_so_far) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const struct segmentation *const seg = &cm->seg; - const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); MB_PREDICTION_MODE this_mode; MV_REFERENCE_FRAME ref_frame, second_ref_frame; unsigned char segment_id = mbmi->segment_id; @@ -3189,7 +3195,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, x->pred_mv_sad[ref_frame] = INT_MAX; if (cpi->ref_frame_flags & flag_list[ref_frame]) { vp9_setup_buffer_inter(cpi, x, tile, - ref_frame, block_size, mi_row, mi_col, + ref_frame, bsize, mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; @@ -3371,46 +3377,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } } else { - // TODO(aconverse): Find out if this is still productive then clean up or - // remove - // if we're near/nearest and mv == 0,0, compare to zeromv if (x->in_active_map && - !(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) && - (this_mode == NEARMV || this_mode == NEARESTMV || - this_mode == ZEROMV) && - frame_mv[this_mode][ref_frame].as_int == 0 && - !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) && - (!comp_pred || frame_mv[this_mode][second_ref_frame].as_int == 0)) { - int rfc = mbmi->mode_context[ref_frame]; - int c1 = cost_mv_ref(cpi, NEARMV, rfc); - int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); - int c3 = cost_mv_ref(cpi, ZEROMV, rfc); - - if (this_mode == NEARMV) { - if (c1 > c3) - continue; - } else if (this_mode == NEARESTMV) { - if (c2 > c3) - continue; - } else { - assert(this_mode == ZEROMV); - if (!comp_pred) { - if ((c3 >= c2 && - frame_mv[NEARESTMV][ref_frame].as_int == 0) || - (c3 >= c1 && - frame_mv[NEARMV][ref_frame].as_int == 0)) - continue; - } else { - if ((c3 >= c2 && - frame_mv[NEARESTMV][ref_frame].as_int == 0 && - frame_mv[NEARESTMV][second_ref_frame].as_int == 0) || - (c3 >= c1 && - frame_mv[NEARMV][ref_frame].as_int == 0 && - frame_mv[NEARMV][second_ref_frame].as_int == 0)) - continue; - } - } - } + !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) + if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, + disable_inter_mode_mask, this_mode, ref_frame, + second_ref_frame)) + continue; } mbmi->mode = this_mode; @@ -3423,7 +3395,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, : cm->interp_filter; x->skip = 0; set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); // Select prediction reference frames. for (i = 0; i < MAX_MB_PLANE; i++) { @@ -3788,9 +3759,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_rd_so_far) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const struct segmentation *seg = &cm->seg; - const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); MV_REFERENCE_FRAME ref_frame, second_ref_frame; unsigned char segment_id = mbmi->segment_id; int comp_pred, i; @@ -3850,7 +3820,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { if (cpi->ref_frame_flags & flag_list[ref_frame]) { vp9_setup_buffer_inter(cpi, x, tile, - ref_frame, block_size, mi_row, mi_col, + ref_frame, bsize, mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); } @@ -3968,7 +3938,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, // them for this frame. mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter; - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); if (comp_pred) { if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) @@ -4067,7 +4036,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, cpi->rd_thresh_sub8x8[segment_id][bsize][THR_ALTR]; this_rd_thresh = (ref_frame == GOLDEN_FRAME) ? cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh; - xd->mi_8x8[0]->mbmi.tx_size = TX_4X4; + xd->mi[0]->mbmi.tx_size = TX_4X4; cpi->mask_filter_rd = 0; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) @@ -4091,7 +4060,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int newbest, rs; int64_t rs_rd; mbmi->interp_filter = switchable_filter_index; - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile, &mbmi->ref_mvs[ref_frame][0], second_ref, @@ -4104,7 +4072,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (tmp_rd == INT64_MAX) continue; - rs = get_switchable_rate(x); + rs = vp9_get_switchable_rate(x); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); cpi->rd_filter_cache[switchable_filter_index] = tmp_rd; cpi->rd_filter_cache[SWITCHABLE_FILTERS] = @@ -4131,7 +4099,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, tmp_best_skippable = skippable; tmp_best_mbmode = *mbmi; for (i = 0; i < 4; i++) { - tmp_best_bmodes[i] = xd->mi_8x8[0]->bmi[i]; + tmp_best_bmodes[i] = xd->mi[0]->bmi[i]; x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i]; } pred_exists = 1; @@ -4156,7 +4124,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ? tmp_best_filter : cm->interp_filter); - xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); if (!pred_exists) { // Handles the special case when a filter that is not in the // switchable list (bilinear, 6-tap) is indicated at the frame level @@ -4179,14 +4146,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, skippable = tmp_best_skippable; *mbmi = tmp_best_mbmode; for (i = 0; i < 4; i++) - xd->mi_8x8[0]->bmi[i] = tmp_best_bmodes[i]; + xd->mi[0]->bmi[i] = tmp_best_bmodes[i]; } rate2 += rate; distortion2 += distortion; if (cm->interp_filter == SWITCHABLE) - rate2 += get_switchable_rate(x); + rate2 += vp9_get_switchable_rate(x); if (!mode_excluded) mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE @@ -4263,8 +4230,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } // Keep record of best inter rd with single reference - if (is_inter_block(&xd->mi_8x8[0]->mbmi) && - !has_second_ref(&xd->mi_8x8[0]->mbmi) && + if (is_inter_block(&xd->mi[0]->mbmi) && + !has_second_ref(&xd->mi[0]->mbmi) && !mode_excluded && this_rd < best_inter_rd) { best_inter_rd = this_rd; @@ -4304,7 +4271,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, sizeof(uint8_t) * ctx->num_4x4_blk); for (i = 0; i < 4; i++) - best_bmodes[i] = xd->mi_8x8[0]->bmi[i]; + best_bmodes[i] = xd->mi[0]->bmi[i]; // TODO(debargha): enhance this test with a better distortion prediction // based on qp, activity mask and history @@ -4453,13 +4420,13 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, x->skip |= best_skip2; if (!is_inter_block(&best_mbmode)) { for (i = 0; i < 4; i++) - xd->mi_8x8[0]->bmi[i].as_mode = best_bmodes[i].as_mode; + xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode; } else { for (i = 0; i < 4; ++i) - vpx_memcpy(&xd->mi_8x8[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info)); + vpx_memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info)); - mbmi->mv[0].as_int = xd->mi_8x8[0]->bmi[3].as_mv[0].as_int; - mbmi->mv[1].as_int = xd->mi_8x8[0]->bmi[3].as_mv[1].as_int; + mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int; + mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int; } for (i = 0; i < REFERENCE_MODES; ++i) { diff --git a/libvpx/vp9/encoder/vp9_rdopt.h b/libvpx/vp9/encoder/vp9_rdopt.h index 6968fa6..a01dbd4 100644 --- a/libvpx/vp9/encoder/vp9_rdopt.h +++ b/libvpx/vp9/encoder/vp9_rdopt.h @@ -40,6 +40,8 @@ void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n, unsigned int qstep, int *rate, int64_t *dist); +int vp9_get_switchable_rate(const MACROBLOCK *x); + void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, MV_REFERENCE_FRAME ref_frame, diff --git a/libvpx/vp9/encoder/vp9_sad.c b/libvpx/vp9/encoder/vp9_sad.c index 58c5df4..9d8da0d 100644 --- a/libvpx/vp9/encoder/vp9_sad.c +++ b/libvpx/vp9/encoder/vp9_sad.c @@ -44,7 +44,7 @@ unsigned int vp9_sad##m##x##n##_avg_c(const uint8_t *src_ptr, int src_stride, \ const uint8_t *second_pred, \ unsigned int max_sad) { \ uint8_t comp_pred[m * n]; \ - comp_avg_pred(comp_pred, second_pred, m, n, ref_ptr, ref_stride); \ + vp9_comp_avg_pred(comp_pred, second_pred, m, n, ref_ptr, ref_stride); \ return sad(src_ptr, src_stride, comp_pred, m, m, n); \ } diff --git a/libvpx/vp9/encoder/vp9_segmentation.c b/libvpx/vp9/encoder/vp9_segmentation.c index fd8fa53..9d3e6dc 100644 --- a/libvpx/vp9/encoder/vp9_segmentation.c +++ b/libvpx/vp9/encoder/vp9_segmentation.c @@ -133,8 +133,8 @@ static void count_segs(VP9_COMP *cpi, const TileInfo *const tile, if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - xd->mi_8x8 = mi_8x8; - segment_id = xd->mi_8x8[0]->mbmi.segment_id; + xd->mi = mi_8x8; + segment_id = xd->mi[0]->mbmi.segment_id; set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); @@ -152,7 +152,7 @@ static void count_segs(VP9_COMP *cpi, const TileInfo *const tile, // Store the prediction status for this mb and update counts // as appropriate - xd->mi_8x8[0]->mbmi.seg_id_predicted = pred_flag; + xd->mi[0]->mbmi.seg_id_predicted = pred_flag; temporal_predictor_count[pred_context][pred_flag]++; if (!pred_flag) @@ -169,7 +169,7 @@ static void count_segs_sb(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, int mi_col, BLOCK_SIZE bsize) { const VP9_COMMON *const cm = &cpi->common; - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; int bw, bh; const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2; @@ -229,7 +229,7 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { vp9_prob t_pred_tree[SEG_TREE_PROBS]; vp9_prob t_nopred_prob[PREDICTION_PROBS]; - const int mis = cm->mode_info_stride; + const int mis = cm->mi_stride; MODE_INFO **mi_ptr, **mi; // Set default state for the segment tree probabilities and the diff --git a/libvpx/vp9/encoder/vp9_speed_features.c b/libvpx/vp9/encoder/vp9_speed_features.c new file mode 100644 index 0000000..d6b6174 --- /dev/null +++ b/libvpx/vp9/encoder/vp9_speed_features.c @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <limits.h> + +#include "vp9/encoder/vp9_onyx_int.h" +#include "vp9/encoder/vp9_speed_features.h" + +#define ALL_INTRA_MODES ((1 << DC_PRED) | \ + (1 << V_PRED) | (1 << H_PRED) | \ + (1 << D45_PRED) | (1 << D135_PRED) | \ + (1 << D117_PRED) | (1 << D153_PRED) | \ + (1 << D207_PRED) | (1 << D63_PRED) | \ + (1 << TM_PRED)) +#define INTRA_DC_ONLY (1 << DC_PRED) +#define INTRA_DC_TM ((1 << TM_PRED) | (1 << DC_PRED)) +#define INTRA_DC_H_V ((1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED)) +#define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED)) + +// Masks for partially or completely disabling split mode +#define DISABLE_ALL_INTER_SPLIT ((1 << THR_COMP_GA) | \ + (1 << THR_COMP_LA) | \ + (1 << THR_ALTR) | \ + (1 << THR_GOLD) | \ + (1 << THR_LAST)) + +#define DISABLE_ALL_SPLIT ((1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT) + +#define DISABLE_COMPOUND_SPLIT ((1 << THR_COMP_GA) | (1 << THR_COMP_LA)) + +#define LAST_AND_INTRA_SPLIT_ONLY ((1 << THR_COMP_GA) | \ + (1 << THR_COMP_LA) | \ + (1 << THR_ALTR) | \ + (1 << THR_GOLD)) + +static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, + SPEED_FEATURES *sf, int speed) { + sf->adaptive_rd_thresh = 1; + sf->recode_loop = (speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW; + sf->allow_skip_recode = 1; + + if (speed >= 1) { + sf->use_square_partition_only = !frame_is_intra_only(cm); + sf->less_rectangular_check = 1; + sf->tx_size_search_method = vp9_frame_is_boosted(cpi) ? USE_FULL_RD + : USE_LARGESTALL; + + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT + : DISABLE_ALL_INTER_SPLIT; + else + sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; + sf->use_rd_breakout = 1; + sf->adaptive_motion_search = 1; + sf->auto_mv_step_size = 1; + sf->adaptive_rd_thresh = 2; + sf->subpel_iters_per_step = 1; + sf->mode_skip_start = 10; + sf->adaptive_pred_interp_filter = 1; + + sf->recode_loop = ALLOW_RECODE_KFARFGF; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; + } + + if (speed >= 2) { + sf->tx_size_search_method = vp9_frame_is_boosted(cpi) ? USE_FULL_RD + : USE_LARGESTALL; + + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT + : DISABLE_ALL_INTER_SPLIT; + else + sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; + + sf->adaptive_pred_interp_filter = 2; + sf->reference_masking = 1; + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | + FLAG_SKIP_INTRA_BESTINTER | + FLAG_SKIP_COMP_BESTINTRA | + FLAG_SKIP_INTRA_LOWVAR; + sf->disable_filter_search_var_thresh = 100; + sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; + sf->adjust_partitioning_from_last_frame = 1; + sf->last_partitioning_redo_frequency = 3; + } + + if (speed >= 3) { + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = DISABLE_ALL_SPLIT; + else + sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; + + sf->recode_loop = ALLOW_RECODE_KFMAXBW; + sf->adaptive_rd_thresh = 3; + sf->mode_skip_start = 6; + sf->use_fast_coef_updates = ONE_LOOP_REDUCED; + sf->use_fast_coef_costing = 1; + } + + if (speed >= 4) { + sf->use_square_partition_only = 1; + sf->tx_size_search_method = USE_LARGESTALL; + sf->disable_split_mask = DISABLE_ALL_SPLIT; + sf->adaptive_rd_thresh = 4; + sf->mode_search_skip_flags |= FLAG_SKIP_COMP_REFMISMATCH | + FLAG_EARLY_TERMINATE; + sf->disable_filter_search_var_thresh = 200; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; + sf->use_lp32x32fdct = 1; + } + + if (speed >= 5) { + int i; + + sf->partition_search_type = FIXED_PARTITION; + sf->optimize_coefficients = 0; + sf->search_method = HEX; + sf->disable_filter_search_var_thresh = 500; + for (i = 0; i < TX_SIZES; ++i) { + sf->intra_y_mode_mask[i] = INTRA_DC_ONLY; + sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; + } + cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED; + } +} + +static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf, + int speed) { + sf->static_segmentation = 0; + sf->adaptive_rd_thresh = 1; + sf->encode_breakout_thresh = 1; + sf->use_fast_coef_costing = 1; + + if (speed == 1) { + sf->use_square_partition_only = !frame_is_intra_only(cm); + sf->less_rectangular_check = 1; + sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD + : USE_LARGESTALL; + + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT + : DISABLE_ALL_INTER_SPLIT; + else + sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; + + sf->use_rd_breakout = 1; + sf->adaptive_motion_search = 1; + sf->adaptive_pred_interp_filter = 1; + sf->auto_mv_step_size = 1; + sf->adaptive_rd_thresh = 2; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; + sf->encode_breakout_thresh = 8; + } + + if (speed >= 2) { + sf->use_square_partition_only = !frame_is_intra_only(cm); + sf->less_rectangular_check = 1; + sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD + : USE_LARGESTALL; + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = cm->show_frame ? + DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; + else + sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; + + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | + FLAG_SKIP_INTRA_BESTINTER | + FLAG_SKIP_COMP_BESTINTRA | + FLAG_SKIP_INTRA_LOWVAR; + sf->use_rd_breakout = 1; + sf->adaptive_motion_search = 1; + sf->adaptive_pred_interp_filter = 2; + sf->auto_mv_step_size = 1; + sf->reference_masking = 1; + + sf->disable_filter_search_var_thresh = 50; + sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + + sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; + sf->adjust_partitioning_from_last_frame = 1; + sf->last_partitioning_redo_frequency = 3; + + sf->adaptive_rd_thresh = 2; + sf->use_lp32x32fdct = 1; + sf->mode_skip_start = 11; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; + sf->encode_breakout_thresh = 200; + } + + if (speed >= 3) { + sf->use_square_partition_only = 1; + sf->disable_filter_search_var_thresh = 100; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; + sf->constrain_copy_partition = 1; + sf->use_uv_intra_rd_estimate = 1; + sf->skip_encode_sb = 1; + sf->subpel_iters_per_step = 1; + sf->use_fast_coef_updates = ONE_LOOP_REDUCED; + sf->adaptive_rd_thresh = 4; + sf->mode_skip_start = 6; + sf->allow_skip_recode = 0; + sf->optimize_coefficients = 0; + sf->disable_split_mask = DISABLE_ALL_SPLIT; + sf->lpf_pick = LPF_PICK_FROM_Q; + sf->encode_breakout_thresh = 700; + } + + if (speed >= 4) { + int i; + sf->last_partitioning_redo_frequency = 4; + sf->adaptive_rd_thresh = 5; + sf->use_fast_coef_costing = 0; + sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX; + sf->adjust_partitioning_from_last_frame = + cm->last_frame_type != cm->frame_type || (0 == + (cm->current_video_frame + 1) % sf->last_partitioning_redo_frequency); + sf->subpel_force_stop = 1; + for (i = 0; i < TX_SIZES; i++) { + sf->intra_y_mode_mask[i] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; + } + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_ONLY; + sf->frame_parameter_update = 0; + sf->encode_breakout_thresh = 1000; + sf->search_method = FAST_HEX; + sf->disable_inter_mode_mask[BLOCK_32X32] = 1 << INTER_OFFSET(ZEROMV); + sf->disable_inter_mode_mask[BLOCK_32X64] = ~(1 << INTER_OFFSET(NEARESTMV)); + sf->disable_inter_mode_mask[BLOCK_64X32] = ~(1 << INTER_OFFSET(NEARESTMV)); + sf->disable_inter_mode_mask[BLOCK_64X64] = ~(1 << INTER_OFFSET(NEARESTMV)); + sf->max_intra_bsize = BLOCK_32X32; + sf->allow_skip_recode = 1; + } + + if (speed >= 5) { + sf->max_partition_size = BLOCK_32X32; + sf->min_partition_size = BLOCK_8X8; + sf->partition_check = + (cm->current_video_frame % sf->last_partitioning_redo_frequency == 1); + sf->force_frame_boost = cm->frame_type == KEY_FRAME || + (cm->current_video_frame % + (sf->last_partitioning_redo_frequency << 1) == 1); + sf->max_delta_qindex = (cm->frame_type == KEY_FRAME) ? 20 : 15; + sf->partition_search_type = REFERENCE_PARTITION; + sf->use_nonrd_pick_mode = 1; + sf->search_method = FAST_DIAMOND; + sf->allow_skip_recode = 0; + } + + if (speed >= 6) { + // Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION. + sf->partition_search_type = SOURCE_VAR_BASED_PARTITION; + sf->search_type_check_frequency = 50; + sf->source_var_thresh = 360; + + sf->use_nonrd_pick_mode = 1; + sf->search_method = FAST_DIAMOND; + } + + if (speed >= 7) { + int i; + for (i = 0; i < BLOCK_SIZES; ++i) + sf->disable_inter_mode_mask[i] = ~(1 << INTER_OFFSET(NEARESTMV)); + } +} + +void vp9_set_speed_features(VP9_COMP *cpi) { + SPEED_FEATURES *const sf = &cpi->sf; + VP9_COMMON *const cm = &cpi->common; + const VP9_CONFIG *const oxcf = &cpi->oxcf; + const int speed = cpi->speed < 0 ? -cpi->speed : cpi->speed; + int i; + + // best quality defaults + sf->frame_parameter_update = 1; + sf->search_method = NSTEP; + sf->recode_loop = ALLOW_RECODE; + sf->subpel_search_method = SUBPEL_TREE; + sf->subpel_iters_per_step = 2; + sf->subpel_force_stop = 0; + sf->optimize_coefficients = !oxcf->lossless; + sf->reduce_first_step_size = 0; + sf->auto_mv_step_size = 0; + sf->max_step_search_steps = MAX_MVSEARCH_STEPS; + sf->comp_inter_joint_search_thresh = BLOCK_4X4; + sf->adaptive_rd_thresh = 0; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_OFF; + sf->tx_size_search_method = USE_FULL_RD; + sf->use_lp32x32fdct = 0; + sf->adaptive_motion_search = 0; + sf->adaptive_pred_interp_filter = 0; + sf->reference_masking = 0; + sf->partition_search_type = SEARCH_PARTITION; + sf->less_rectangular_check = 0; + sf->use_square_partition_only = 0; + sf->auto_min_max_partition_size = NOT_IN_USE; + sf->max_partition_size = BLOCK_64X64; + sf->min_partition_size = BLOCK_4X4; + sf->adjust_partitioning_from_last_frame = 0; + sf->last_partitioning_redo_frequency = 4; + sf->constrain_copy_partition = 0; + sf->disable_split_mask = 0; + sf->mode_search_skip_flags = 0; + sf->force_frame_boost = 0; + sf->max_delta_qindex = 0; + sf->disable_split_var_thresh = 0; + sf->disable_filter_search_var_thresh = 0; + for (i = 0; i < TX_SIZES; i++) { + sf->intra_y_mode_mask[i] = ALL_INTRA_MODES; + sf->intra_uv_mode_mask[i] = ALL_INTRA_MODES; + } + sf->use_rd_breakout = 0; + sf->skip_encode_sb = 0; + sf->use_uv_intra_rd_estimate = 0; + sf->allow_skip_recode = 0; + sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE; + sf->use_fast_coef_updates = TWO_LOOP; + sf->use_fast_coef_costing = 0; + sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set + sf->use_nonrd_pick_mode = 0; + sf->encode_breakout_thresh = 0; + for (i = 0; i < BLOCK_SIZES; ++i) + sf->disable_inter_mode_mask[i] = 0; + sf->max_intra_bsize = BLOCK_64X64; + // This setting only takes effect when partition_search_type is set + // to FIXED_PARTITION. + sf->always_this_block_size = BLOCK_16X16; + sf->search_type_check_frequency = 50; + sf->source_var_thresh = 100; + + // Recode loop tolerence %. + sf->recode_tolerance = 25; + + switch (oxcf->mode) { + case MODE_BESTQUALITY: + case MODE_SECONDPASS_BEST: // This is the best quality mode. + cpi->diamond_search_sad = vp9_full_range_search; + break; + case MODE_FIRSTPASS: + case MODE_GOODQUALITY: + case MODE_SECONDPASS: + set_good_speed_feature(cpi, cm, sf, speed); + break; + case MODE_REALTIME: + set_rt_speed_feature(cm, sf, speed); + break; + } + + // Slow quant, dct and trellis not worthwhile for first pass + // so make sure they are always turned off. + if (cpi->pass == 1) + sf->optimize_coefficients = 0; + + // No recode for 1 pass. + if (cpi->pass == 0) { + sf->recode_loop = DISALLOW_RECODE; + sf->optimize_coefficients = 0; + } + + if (sf->subpel_search_method == SUBPEL_TREE) { + cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree; + cpi->find_fractional_mv_step_comp = vp9_find_best_sub_pixel_comp_tree; + } + + cpi->mb.optimize = sf->optimize_coefficients == 1 && cpi->pass != 1; + + if (cpi->encode_breakout && oxcf->mode == MODE_REALTIME && + sf->encode_breakout_thresh > cpi->encode_breakout) + cpi->encode_breakout = sf->encode_breakout_thresh; + + if (sf->disable_split_mask == DISABLE_ALL_SPLIT) + sf->adaptive_pred_interp_filter = 0; + + if (!cpi->oxcf.frame_periodic_boost) { + sf->max_delta_qindex = 0; + } +} diff --git a/libvpx/vp9/encoder/vp9_speed_features.h b/libvpx/vp9/encoder/vp9_speed_features.h new file mode 100644 index 0000000..72f548a --- /dev/null +++ b/libvpx/vp9/encoder/vp9_speed_features.h @@ -0,0 +1,359 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_ENCODER_VP9_SPEED_FEATURES_H_ +#define VP9_ENCODER_VP9_SPEED_FEATURES_H_ + +#include "vp9/common/vp9_enums.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + DIAMOND = 0, + NSTEP = 1, + HEX = 2, + BIGDIA = 3, + SQUARE = 4, + FAST_HEX = 5, + FAST_DIAMOND = 6 +} SEARCH_METHODS; + +typedef enum { + // No recode. + DISALLOW_RECODE = 0, + // Allow recode for KF and exceeding maximum frame bandwidth. + ALLOW_RECODE_KFMAXBW = 1, + // Allow recode only for KF/ARF/GF frames. + ALLOW_RECODE_KFARFGF = 2, + // Allow recode for all frames based on bitrate constraints. + ALLOW_RECODE = 3, +} RECODE_LOOP_TYPE; + +typedef enum { + SUBPEL_TREE = 0, + // Other methods to come +} SUBPEL_SEARCH_METHODS; + +typedef enum { + LAST_FRAME_PARTITION_OFF = 0, + LAST_FRAME_PARTITION_LOW_MOTION = 1, + LAST_FRAME_PARTITION_ALL = 2 +} LAST_FRAME_PARTITION_METHOD; + +typedef enum { + USE_FULL_RD = 0, + USE_LARGESTINTRA, + USE_LARGESTINTRA_MODELINTER, + USE_LARGESTALL +} TX_SIZE_SEARCH_METHOD; + +typedef enum { + NOT_IN_USE = 0, + RELAXED_NEIGHBORING_MIN_MAX = 1, + STRICT_NEIGHBORING_MIN_MAX = 2 +} AUTO_MIN_MAX_MODE; + +typedef enum { + // Try the full image with different values. + LPF_PICK_FROM_FULL_IMAGE, + // Try a small portion of the image with different values. + LPF_PICK_FROM_SUBIMAGE, + // Estimate the level based on quantizer and frame type + LPF_PICK_FROM_Q, +} LPF_PICK_METHOD; + +typedef enum { + // Terminate search early based on distortion so far compared to + // qp step, distortion in the neighborhood of the frame, etc. + FLAG_EARLY_TERMINATE = 1 << 0, + + // Skips comp inter modes if the best so far is an intra mode. + FLAG_SKIP_COMP_BESTINTRA = 1 << 1, + + // Skips comp inter modes if the best single intermode so far does + // not have the same reference as one of the two references being + // tested. + FLAG_SKIP_COMP_REFMISMATCH = 1 << 2, + + // Skips oblique intra modes if the best so far is an inter mode. + FLAG_SKIP_INTRA_BESTINTER = 1 << 3, + + // Skips oblique intra modes at angles 27, 63, 117, 153 if the best + // intra so far is not one of the neighboring directions. + FLAG_SKIP_INTRA_DIRMISMATCH = 1 << 4, + + // Skips intra modes other than DC_PRED if the source variance is small + FLAG_SKIP_INTRA_LOWVAR = 1 << 5, +} MODE_SEARCH_SKIP_LOGIC; + +typedef enum { + // Search partitions using RD/NONRD criterion + SEARCH_PARTITION = 0, + + // Always use a fixed size partition + FIXED_PARTITION = 1, + + // Use a fixed size partition in every 64X64 SB, where the size is + // determined based on source variance + VAR_BASED_FIXED_PARTITION = 2, + + REFERENCE_PARTITION = 3, + + // Use an arbitrary partitioning scheme based on source variance within + // a 64X64 SB + VAR_BASED_PARTITION, + + // Use non-fixed partitions based on source variance + SOURCE_VAR_BASED_PARTITION +} PARTITION_SEARCH_TYPE; + +typedef enum { + // Does a dry run to see if any of the contexts need to be updated or not, + // before the final run. + TWO_LOOP = 0, + + // No dry run conducted. + ONE_LOOP = 1, + + // No dry run, also only half the coef contexts and bands are updated. + // The rest are not updated at all. + ONE_LOOP_REDUCED = 2 +} FAST_COEFF_UPDATE; + +typedef struct { + // Frame level coding parameter update + int frame_parameter_update; + + // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc). + SEARCH_METHODS search_method; + + RECODE_LOOP_TYPE recode_loop; + + // Subpel_search_method can only be subpel_tree which does a subpixel + // logarithmic search that keeps stepping at 1/2 pixel units until + // you stop getting a gain, and then goes on to 1/4 and repeats + // the same process. Along the way it skips many diagonals. + SUBPEL_SEARCH_METHODS subpel_search_method; + + // Maximum number of steps in logarithmic subpel search before giving up. + int subpel_iters_per_step; + + // Control when to stop subpel search + int subpel_force_stop; + + // This parameter controls the number of steps we'll do in a diamond + // search. + int max_step_search_steps; + + // This parameter controls which step in the n-step process we start at. + // It's changed adaptively based on circumstances. + int reduce_first_step_size; + + // If this is set to 1, we limit the motion search range to 2 times the + // largest motion vector found in the last frame. + int auto_mv_step_size; + + // Trellis (dynamic programming) optimization of quantized values (+1, 0). + int optimize_coefficients; + + // Always set to 0. If on it enables 0 cost background transmission + // (except for the initial transmission of the segmentation). The feature is + // disabled because the addition of very large block sizes make the + // backgrounds very to cheap to encode, and the segmentation we have + // adds overhead. + int static_segmentation; + + // If 1 we iterate finding a best reference for 2 ref frames together - via + // a log search that iterates 4 times (check around mv for last for best + // error of combined predictor then check around mv for alt). If 0 we + // we just use the best motion vector found for each frame by itself. + int comp_inter_joint_search_thresh; + + // This variable is used to cap the maximum number of times we skip testing a + // mode to be evaluated. A high value means we will be faster. + int adaptive_rd_thresh; + + // Enables skipping the reconstruction step (idct, recon) in the + // intermediate steps assuming the last frame didn't have too many intra + // blocks and the q is less than a threshold. + int skip_encode_sb; + int skip_encode_frame; + // Speed feature to allow or disallow skipping of recode at block + // level within a frame. + int allow_skip_recode; + + // This variable allows us to reuse the last frames partition choices + // (64x64 v 32x32 etc) for this frame. It can be set to only use the last + // frame as a starting point in low motion scenes or always use it. If set + // we use last partitioning_redo frequency to determine how often to redo + // the partitioning from scratch. Adjust_partitioning_from_last_frame + // enables us to adjust up or down one partitioning from the last frames + // partitioning. + LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning; + + // Determine which method we use to determine transform size. We can choose + // between options like full rd, largest for prediction size, largest + // for intra and model coefs for the rest. + TX_SIZE_SEARCH_METHOD tx_size_search_method; + + // Low precision 32x32 fdct keeps everything in 16 bits and thus is less + // precise but significantly faster than the non lp version. + int use_lp32x32fdct; + + // TODO(JBB): remove this as its no longer used. + + // After looking at the first set of modes (set by index here), skip + // checking modes for reference frames that don't match the reference frame + // of the best so far. + int mode_skip_start; + + // TODO(JBB): Remove this. + int reference_masking; + + PARTITION_SEARCH_TYPE partition_search_type; + + // Used if partition_search_type = FIXED_SIZE_PARTITION + BLOCK_SIZE always_this_block_size; + + // Skip rectangular partition test when partition type none gives better + // rd than partition type split. + int less_rectangular_check; + + // Disable testing non square partitions. (eg 16x32) + int use_square_partition_only; + + // Sets min and max partition sizes for this 64x64 region based on the + // same 64x64 in last encoded frame, and the left and above neighbor. + AUTO_MIN_MAX_MODE auto_min_max_partition_size; + + // Min and max partition size we enable (block_size) as per auto + // min max, but also used by adjust partitioning, and pick_partitioning. + BLOCK_SIZE min_partition_size; + BLOCK_SIZE max_partition_size; + + // Whether or not we allow partitions one smaller or one greater than the last + // frame's partitioning. Only used if use_lastframe_partitioning is set. + int adjust_partitioning_from_last_frame; + + // How frequently we re do the partitioning from scratch. Only used if + // use_lastframe_partitioning is set. + int last_partitioning_redo_frequency; + + // This enables constrained copy partitioning, which, given an input block + // size bsize, will copy previous partition for partitions less than bsize, + // otherwise bsize partition is used. bsize is currently set to 16x16. + // Used for the case where motion is detected in superblock. + int constrain_copy_partition; + + // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable + // it always, to allow it for only Last frame and Intra, disable it for all + // inter modes or to enable it always. + int disable_split_mask; + + // TODO(jingning): combine the related motion search speed features + // This allows us to use motion search at other sizes as a starting + // point for this motion search and limits the search range around it. + int adaptive_motion_search; + + // Allows sub 8x8 modes to use the prediction filter that was determined + // best for 8x8 mode. If set to 0 we always re check all the filters for + // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter + // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected. + int adaptive_pred_interp_filter; + + // Search through variable block partition types in non-RD mode decision + // encoding process for RTC. + int partition_check; + + // Use finer quantizer in every other few frames that run variable block + // partition type search. + int force_frame_boost; + + // Maximally allowed base quantization index fluctuation. + int max_delta_qindex; + + // Implements various heuristics to skip searching modes + // The heuristics selected are based on flags + // defined in the MODE_SEARCH_SKIP_HEURISTICS enum + unsigned int mode_search_skip_flags; + + // A source variance threshold below which the split mode is disabled + unsigned int disable_split_var_thresh; + + // A source variance threshold below which filter search is disabled + // Choose a very large value (UINT_MAX) to use 8-tap always + unsigned int disable_filter_search_var_thresh; + + // These bit masks allow you to enable or disable intra modes for each + // transform size separately. + int intra_y_mode_mask[TX_SIZES]; + int intra_uv_mode_mask[TX_SIZES]; + + // This variable enables an early break out of mode testing if the model for + // rd built from the prediction signal indicates a value that's much + // higher than the best rd we've seen so far. + int use_rd_breakout; + + // This enables us to use an estimate for intra rd based on dc mode rather + // than choosing an actual uv mode in the stage of encoding before the actual + // final encode. + int use_uv_intra_rd_estimate; + + // This feature controls how the loop filter level is determined. + LPF_PICK_METHOD lpf_pick; + + // This feature limits the number of coefficients updates we actually do + // by only looking at counts from 1/2 the bands. + FAST_COEFF_UPDATE use_fast_coef_updates; + + // This flag controls the use of non-RD mode decision. + int use_nonrd_pick_mode; + + // This variable sets the encode_breakout threshold. Currently, it is only + // enabled in real time mode. + int encode_breakout_thresh; + + // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV + // modes are disabled in order from LSB to MSB for each BLOCK_SIZE. + int disable_inter_mode_mask[BLOCK_SIZES]; + + // This feature controls whether we do the expensive context update and + // calculation in the rd coefficient costing loop. + int use_fast_coef_costing; + + // This feature controls the tolerence vs target used in deciding whether to + // recode a frame. It has no meaning if recode is disabled. + int recode_tolerance; + + // This variable controls the maximum block size where intra blocks can be + // used in inter frames. + // TODO(aconverse): Fold this into one of the other many mode skips + BLOCK_SIZE max_intra_bsize; + + // The frequency that we check if SOURCE_VAR_BASED_PARTITION or + // FIXED_PARTITION search type should be used. + int search_type_check_frequency; + + // The threshold used in SOURCE_VAR_BASED_PARTITION search type. + int source_var_thresh; +} SPEED_FEATURES; + +struct VP9_COMP; + +void vp9_set_speed_features(struct VP9_COMP *cpi); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_SPEED_FEATURES_H_ + diff --git a/libvpx/vp9/encoder/vp9_ssim.c b/libvpx/vp9/encoder/vp9_ssim.c index 1435191..026e6a8 100644 --- a/libvpx/vp9/encoder/vp9_ssim.c +++ b/libvpx/vp9/encoder/vp9_ssim.c @@ -8,8 +8,9 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vp9_rtcd.h" -#include "vp9/encoder/vp9_onyx_int.h" +#include "vp9/encoder/vp9_ssim.h" void vp9_ssim_parms_16x16_c(uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, diff --git a/libvpx/vp9/encoder/vp9_ssim.h b/libvpx/vp9/encoder/vp9_ssim.h new file mode 100644 index 0000000..a581c2c --- /dev/null +++ b/libvpx/vp9/encoder/vp9_ssim.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_ENCODER_VP9_SSIM_H_ +#define VP9_ENCODER_VP9_SSIM_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "vpx_scale/yv12config.h" + +double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, + int lumamask, double *weight); + +double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, + double *ssim_y, double *ssim_u, double *ssim_v); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_SSIM_H_ diff --git a/libvpx/vp9/encoder/vp9_svc_layercontext.c b/libvpx/vp9/encoder/vp9_svc_layercontext.c index eba7bc6..c2b6263 100644 --- a/libvpx/vp9/encoder/vp9_svc_layercontext.c +++ b/libvpx/vp9/encoder/vp9_svc_layercontext.c @@ -14,17 +14,26 @@ #include "vp9/encoder/vp9_svc_layercontext.h" void vp9_init_layer_context(VP9_COMP *const cpi) { + SVC *const svc = &cpi->svc; const VP9_CONFIG *const oxcf = &cpi->oxcf; - int temporal_layer = 0; - cpi->svc.spatial_layer_id = 0; - cpi->svc.temporal_layer_id = 0; - for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers; - ++temporal_layer) { - LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer]; + int layer; + int layer_end; + + svc->spatial_layer_id = 0; + svc->temporal_layer_id = 0; + + if (svc->number_temporal_layers > 1) { + layer_end = svc->number_temporal_layers; + } else { + layer_end = svc->number_spatial_layers; + } + + for (layer = 0; layer < layer_end; ++layer) { + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; RATE_CONTROL *const lrc = &lc->rc; - lrc->avg_frame_qindex[INTER_FRAME] = q_trans[oxcf->worst_allowed_q]; - lrc->last_q[INTER_FRAME] = q_trans[oxcf->worst_allowed_q]; - lrc->ni_av_qi = q_trans[oxcf->worst_allowed_q]; + lc->current_video_frame_in_layer = 0; + lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q; + lrc->ni_av_qi = oxcf->worst_allowed_q; lrc->total_actual_bits = 0; lrc->total_target_vs_actual = 0; lrc->ni_tot_qi = 0; @@ -35,11 +44,19 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { lrc->decimation_factor = 0; lrc->rate_correction_factor = 1.0; lrc->key_frame_rate_correction_factor = 1.0; - lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] * - 1000; - lrc->buffer_level = - vp9_rescale((int)(oxcf->starting_buffer_level), - lc->target_bandwidth, 1000); + + if (svc->number_temporal_layers > 1) { + lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000; + lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q; + } else { + lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000; + lrc->last_q[0] = oxcf->best_allowed_q; + lrc->last_q[1] = oxcf->best_allowed_q; + lrc->last_q[2] = oxcf->best_allowed_q; + } + + lrc->buffer_level = vp9_rescale((int)(oxcf->starting_buffer_level), + lc->target_bandwidth, 1000); lrc->bits_off_target = lrc->buffer_level; } } @@ -47,16 +64,29 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { // Update the layer context from a change_config() call. void vp9_update_layer_context_change_config(VP9_COMP *const cpi, const int target_bandwidth) { + SVC *const svc = &cpi->svc; const VP9_CONFIG *const oxcf = &cpi->oxcf; const RATE_CONTROL *const rc = &cpi->rc; - int temporal_layer = 0; + int layer; + int layer_end; float bitrate_alloc = 1.0; - for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers; - ++temporal_layer) { - LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer]; + + if (svc->number_temporal_layers > 1) { + layer_end = svc->number_temporal_layers; + } else { + layer_end = svc->number_spatial_layers; + } + + for (layer = 0; layer < layer_end; ++layer) { + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; RATE_CONTROL *const lrc = &lc->rc; - lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] * 1000; - bitrate_alloc = (float)lc->target_bandwidth / (float)target_bandwidth; + + if (svc->number_temporal_layers > 1) { + lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000; + } else { + lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000; + } + bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth; // Update buffer-related quantities. lc->starting_buffer_level = (int64_t)(oxcf->starting_buffer_level * bitrate_alloc); @@ -67,7 +97,11 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size); lrc->buffer_level = MIN(lrc->buffer_level, lc->maximum_buffer_size); // Update framerate-related quantities. - lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer]; + if (svc->number_temporal_layers > 1) { + lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[layer]; + } else { + lc->framerate = oxcf->framerate; + } lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); lrc->max_frame_bandwidth = rc->max_frame_bandwidth; // Update qp-related quantities. @@ -76,34 +110,70 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, } } -void vp9_update_layer_framerate(VP9_COMP *const cpi) { - int temporal_layer = cpi->svc.temporal_layer_id; +static LAYER_CONTEXT *get_layer_context(SVC *svc) { + return svc->number_temporal_layers > 1 ? + &svc->layer_context[svc->temporal_layer_id] : + &svc->layer_context[svc->spatial_layer_id]; +} + +void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) { + SVC *const svc = &cpi->svc; const VP9_CONFIG *const oxcf = &cpi->oxcf; - LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer]; + LAYER_CONTEXT *const lc = get_layer_context(svc); RATE_CONTROL *const lrc = &lc->rc; - lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer]; + const int layer = svc->temporal_layer_id; + + lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[layer]; lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth; // Update the average layer frame size (non-cumulative per-frame-bw). - if (temporal_layer == 0) { + if (layer == 0) { lc->avg_frame_size = lrc->av_per_frame_bandwidth; } else { - double prev_layer_framerate = oxcf->framerate / - oxcf->ts_rate_decimator[temporal_layer - 1]; - int prev_layer_target_bandwidth = - oxcf->ts_target_bitrate[temporal_layer - 1] * 1000; + const double prev_layer_framerate = + oxcf->framerate / oxcf->ts_rate_decimator[layer - 1]; + const int prev_layer_target_bandwidth = + oxcf->ts_target_bitrate[layer - 1] * 1000; lc->avg_frame_size = (int)((lc->target_bandwidth - prev_layer_target_bandwidth) / (lc->framerate - prev_layer_framerate)); } } +void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) { + const VP9_CONFIG *const oxcf = &cpi->oxcf; + LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc); + RATE_CONTROL *const lrc = &lc->rc; + + lc->framerate = framerate; + lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); + lrc->min_frame_bandwidth = (int)(lrc->av_per_frame_bandwidth * + oxcf->two_pass_vbrmin_section / 100); + lrc->max_frame_bandwidth = (int)(((int64_t)lrc->av_per_frame_bandwidth * + oxcf->two_pass_vbrmax_section) / 100); + lrc->max_gf_interval = 16; + + lrc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1; + + if (oxcf->play_alternate && oxcf->lag_in_frames) { + if (lrc->max_gf_interval > oxcf->lag_in_frames - 1) + lrc->max_gf_interval = oxcf->lag_in_frames - 1; + + if (lrc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) + lrc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; + } + + if (lrc->max_gf_interval > lrc->static_scene_max_gf_interval) + lrc->max_gf_interval = lrc->static_scene_max_gf_interval; +} + void vp9_restore_layer_context(VP9_COMP *const cpi) { - int temporal_layer = cpi->svc.temporal_layer_id; - LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer]; - int frame_since_key = cpi->rc.frames_since_key; - int frame_to_key = cpi->rc.frames_to_key; + LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc); + const int old_frame_since_key = cpi->rc.frames_since_key; + const int old_frame_to_key = cpi->rc.frames_to_key; + cpi->rc = lc->rc; + cpi->twopass = lc->twopass; cpi->oxcf.target_bandwidth = lc->target_bandwidth; cpi->oxcf.starting_buffer_level = lc->starting_buffer_level; cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level; @@ -111,17 +181,44 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) { cpi->output_framerate = lc->framerate; // Reset the frames_since_key and frames_to_key counters to their values // before the layer restore. Keep these defined for the stream (not layer). - cpi->rc.frames_since_key = frame_since_key; - cpi->rc.frames_to_key = frame_to_key; + if (cpi->svc.number_temporal_layers > 1) { + cpi->rc.frames_since_key = old_frame_since_key; + cpi->rc.frames_to_key = old_frame_to_key; + } } void vp9_save_layer_context(VP9_COMP *const cpi) { - int temporal_layer = cpi->svc.temporal_layer_id; - LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer]; + const VP9_CONFIG *const oxcf = &cpi->oxcf; + LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc); + lc->rc = cpi->rc; - lc->target_bandwidth = (int)cpi->oxcf.target_bandwidth; - lc->starting_buffer_level = cpi->oxcf.starting_buffer_level; - lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level; - lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size; + lc->twopass = cpi->twopass; + lc->target_bandwidth = (int)oxcf->target_bandwidth; + lc->starting_buffer_level = oxcf->starting_buffer_level; + lc->optimal_buffer_level = oxcf->optimal_buffer_level; + lc->maximum_buffer_size = oxcf->maximum_buffer_size; lc->framerate = cpi->output_framerate; } + +void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) { + SVC *const svc = &cpi->svc; + int i; + + for (i = 0; i < svc->number_spatial_layers; ++i) { + struct twopass_rc *const twopass = &svc->layer_context[i].twopass; + + svc->spatial_layer_id = i; + vp9_init_second_pass(cpi); + + twopass->total_stats.spatial_layer_id = i; + twopass->total_left_stats.spatial_layer_id = i; + } + svc->spatial_layer_id = 0; +} + +void vp9_inc_frame_in_layer(SVC *svc) { + LAYER_CONTEXT *const lc = (svc->number_temporal_layers > 1) + ? &svc->layer_context[svc->temporal_layer_id] + : &svc->layer_context[svc->spatial_layer_id]; + ++lc->current_video_frame_in_layer; +} diff --git a/libvpx/vp9/encoder/vp9_svc_layercontext.h b/libvpx/vp9/encoder/vp9_svc_layercontext.h index e81b0b7..2abed30 100644 --- a/libvpx/vp9/encoder/vp9_svc_layercontext.h +++ b/libvpx/vp9/encoder/vp9_svc_layercontext.h @@ -28,6 +28,8 @@ typedef struct { double framerate; int avg_frame_size; struct twopass_rc twopass; + struct vpx_fixed_buf rc_twopass_stats_in; + unsigned int current_video_frame_in_layer; } LAYER_CONTEXT; typedef struct { @@ -35,8 +37,8 @@ typedef struct { int temporal_layer_id; int number_spatial_layers; int number_temporal_layers; - // Layer context used for rate control in temporal CBR mode or spatial - // two pass mode. Defined for temporal or spatial layers for now. + // Layer context used for rate control in one pass temporal CBR mode or + // two pass spatial mode. Defined for temporal or spatial layers for now. // Does not support temporal combined with spatial RC. LAYER_CONTEXT layer_context[MAX(VPX_TS_MAX_LAYERS, VPX_SS_MAX_LAYERS)]; } SVC; @@ -51,8 +53,12 @@ void vp9_update_layer_context_change_config(struct VP9_COMP *const cpi, const int target_bandwidth); // Prior to encoding the frame, update framerate-related quantities -// for the current layer. -void vp9_update_layer_framerate(struct VP9_COMP *const cpi); +// for the current temporal layer. +void vp9_update_temporal_layer_framerate(struct VP9_COMP *const cpi); + +// Update framerate-related quantities for the current spatial layer. +void vp9_update_spatial_layer_framerate(struct VP9_COMP *const cpi, + double framerate); // Prior to encoding the frame, set the layer context, for the current layer // to be encoded, to the cpi struct. @@ -61,6 +67,12 @@ void vp9_restore_layer_context(struct VP9_COMP *const cpi); // Save the layer context after encoding the frame. void vp9_save_layer_context(struct VP9_COMP *const cpi); +// Initialize second pass rc for spatial svc. +void vp9_init_second_pass_spatial_svc(struct VP9_COMP *cpi); + +// Increment number of video frames in layer +void vp9_inc_frame_in_layer(SVC *svc); + #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp9/encoder/vp9_temporal_filter.c b/libvpx/vp9/encoder/vp9_temporal_filter.c index 6233116..0410273 100644 --- a/libvpx/vp9/encoder/vp9_temporal_filter.c +++ b/libvpx/vp9/encoder/vp9_temporal_filter.c @@ -41,7 +41,10 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, struct scale_factors *scale, int x, int y) { const int which_mv = 0; - MV mv = { mv_row, mv_col }; + const MV mv = { mv_row, mv_col }; + const InterpKernel *const kernel = + vp9_get_interp_kernel(xd->mi[0]->mbmi.interp_filter); + enum mv_precision mv_precision_uv; int uv_stride; if (uv_block_size == 8) { @@ -58,7 +61,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, scale, 16, 16, which_mv, - xd->interp_kernel, MV_PRECISION_Q3, x, y); + kernel, MV_PRECISION_Q3, x, y); vp9_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256], uv_block_size, @@ -66,7 +69,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, scale, uv_block_size, uv_block_size, which_mv, - xd->interp_kernel, mv_precision_uv, x, y); + kernel, mv_precision_uv, x, y); vp9_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512], uv_block_size, @@ -74,7 +77,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, scale, uv_block_size, uv_block_size, which_mv, - xd->interp_kernel, mv_precision_uv, x, y); + kernel, mv_precision_uv, x, y); } void vp9_temporal_filter_apply_c(uint8_t *frame1, @@ -133,7 +136,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, MV best_ref_mv1 = {0, 0}; MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ - MV *ref_mv = &x->e_mbd.mi_8x8[0]->bmi[0].as_mv[0].as_mv; + MV *ref_mv = &x->e_mbd.mi[0]->bmi[0].as_mv[0].as_mv; // Save input state struct buf_2d src = x->plane[0].src; @@ -250,8 +253,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, if (cpi->frames[frame] == NULL) continue; - mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.row = 0; - mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.col = 0; + mbd->mi[0]->bmi[0].as_mv[0].as_mv.row = 0; + mbd->mi[0]->bmi[0].as_mv[0].as_mv.col = 0; if (frame == alt_ref_index) { filter_weight = 2; @@ -284,8 +287,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, cpi->frames[frame]->v_buffer + mb_uv_offset, cpi->frames[frame]->y_stride, mb_uv_height, - mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.row, - mbd->mi_8x8[0]->bmi[0].as_mv[0].as_mv.col, + mbd->mi[0]->bmi[0].as_mv[0].as_mv.row, + mbd->mi[0]->bmi[0].as_mv[0].as_mv.col, predictor, scale, mb_col * 16, mb_row * 16); diff --git a/libvpx/vp9/encoder/vp9_tokenize.c b/libvpx/vp9/encoder/vp9_tokenize.c index bb5f1c2..291ccb3 100644 --- a/libvpx/vp9/encoder/vp9_tokenize.c +++ b/libvpx/vp9/encoder/vp9_tokenize.c @@ -108,7 +108,7 @@ void vp9_coef_tree_initialize() { vp9_tokens_from_tree(vp9_coef_encodings, vp9_coef_tree); } -static void fill_value_tokens() { +void vp9_tokenize_initialize() { TOKENVALUE *const t = dct_value_tokens + DCT_MAX_VALUE; const vp9_extra_bit *const e = vp9_extra_bits; @@ -162,7 +162,6 @@ struct tokenize_b_args { VP9_COMP *cpi; MACROBLOCKD *xd; TOKENEXTRA **tp; - uint8_t *token_cache; }; static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize, @@ -213,10 +212,10 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, VP9_COMP *cpi = args->cpi; MACROBLOCKD *xd = args->xd; TOKENEXTRA **tp = args->tp; - uint8_t *token_cache = args->token_cache; + uint8_t token_cache[32 * 32]; struct macroblock_plane *p = &cpi->mb.plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; - MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; int pt; /* near block/prev token context index */ int c; TOKENEXTRA *t = *tp; /* store tokens starting here */ @@ -310,12 +309,12 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; TOKENEXTRA *t_backup = *t; const int ctx = vp9_get_skip_context(xd); const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP); - struct tokenize_b_args arg = {cpi, xd, t, cpi->mb.token_cache}; + struct tokenize_b_args arg = {cpi, xd, t}; if (mbmi->skip) { if (!dry_run) cm->counts.skip[ctx][1] += skip_inc; @@ -333,7 +332,3 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, *t = t_backup; } } - -void vp9_tokenize_initialize() { - fill_value_tokens(); -} diff --git a/libvpx/vp9/encoder/vp9_variance.c b/libvpx/vp9/encoder/vp9_variance.c index 8bc3850..71867a9 100644 --- a/libvpx/vp9/encoder/vp9_variance.c +++ b/libvpx/vp9/encoder/vp9_variance.c @@ -216,7 +216,7 @@ unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 64, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter); - comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64); + vp9_comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64); return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse); } @@ -273,7 +273,7 @@ unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 32, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter); - comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32); + vp9_comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32); return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse); } @@ -330,7 +330,7 @@ unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 32, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter); - comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32); + vp9_comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32); return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse); } @@ -387,7 +387,7 @@ unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 16, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter); - comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16); + vp9_comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16); return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse); } @@ -417,6 +417,12 @@ unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, return (var - (((int64_t)avg * avg) >> 10)); } +void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse, int *sum) { + variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum); +} + unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -614,7 +620,7 @@ unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, // Now filter Verticaly var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); - comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4); + vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4); return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse); } @@ -658,7 +664,7 @@ unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 8, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); - comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8); + vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8); return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse); } @@ -703,7 +709,7 @@ unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, 1, 17, 16, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter); - comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16); + vp9_comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16); return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse); } @@ -747,7 +753,7 @@ unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 64, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter); - comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64); + vp9_comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64); return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse); } @@ -791,7 +797,7 @@ unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 32, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter); - comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32); + vp9_comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32); return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse); } @@ -955,7 +961,7 @@ unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 16, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter); - comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16); + vp9_comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16); return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse); } @@ -999,7 +1005,7 @@ unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 8, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter); - comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8); + vp9_comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8); return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse); } @@ -1043,7 +1049,7 @@ unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 5, 8, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter); - comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8); + vp9_comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8); return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse); } @@ -1089,6 +1095,23 @@ unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 4, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter); - comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4); + vp9_comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4); return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse); } + + +void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, + int height, const uint8_t *ref, int ref_stride) { + int i, j; + + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + int tmp; + tmp = pred[j] + ref[j]; + comp_pred[j] = (tmp + 1) >> 1; + } + comp_pred += width; + pred += width; + ref += ref_stride; + } +} diff --git a/libvpx/vp9/encoder/vp9_variance.h b/libvpx/vp9/encoder/vp9_variance.h index 3bc2091..62e20dc 100644 --- a/libvpx/vp9/encoder/vp9_variance.h +++ b/libvpx/vp9/encoder/vp9_variance.h @@ -100,21 +100,9 @@ typedef struct vp9_variance_vtable { vp9_sad_multi_d_fn_t sdx4df; } vp9_variance_fn_ptr_t; -static void comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, - int height, const uint8_t *ref, int ref_stride) { - int i, j; - - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - int tmp; - tmp = pred[j] + ref[j]; - comp_pred[j] = (tmp + 1) >> 1; - } - comp_pred += width; - pred += width; - ref += ref_stride; - } -} +void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, + int height, const uint8_t *ref, int ref_stride); + #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp9/encoder/vp9_write_bit_buffer.c b/libvpx/vp9/encoder/vp9_write_bit_buffer.c new file mode 100644 index 0000000..962d0ca --- /dev/null +++ b/libvpx/vp9/encoder/vp9_write_bit_buffer.c @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/encoder/vp9_write_bit_buffer.h" + +size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb) { + return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0); +} + +void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit) { + const int off = (int)wb->bit_offset; + const int p = off / CHAR_BIT; + const int q = CHAR_BIT - 1 - off % CHAR_BIT; + if (q == CHAR_BIT -1) { + wb->bit_buffer[p] = bit << q; + } else { + wb->bit_buffer[p] &= ~(1 << q); + wb->bit_buffer[p] |= bit << q; + } + wb->bit_offset = off + 1; +} + +void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, int data, int bits) { + int bit; + for (bit = bits - 1; bit >= 0; bit--) + vp9_wb_write_bit(wb, (data >> bit) & 1); +} diff --git a/libvpx/vp9/encoder/vp9_write_bit_buffer.h b/libvpx/vp9/encoder/vp9_write_bit_buffer.h index 1795e05..073608d 100644 --- a/libvpx/vp9/encoder/vp9_write_bit_buffer.h +++ b/libvpx/vp9/encoder/vp9_write_bit_buffer.h @@ -24,29 +24,11 @@ struct vp9_write_bit_buffer { size_t bit_offset; }; -static size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb) { - return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0); -} - -static void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit) { - const int off = (int)wb->bit_offset; - const int p = off / CHAR_BIT; - const int q = CHAR_BIT - 1 - off % CHAR_BIT; - if (q == CHAR_BIT -1) { - wb->bit_buffer[p] = bit << q; - } else { - wb->bit_buffer[p] &= ~(1 << q); - wb->bit_buffer[p] |= bit << q; - } - wb->bit_offset = off + 1; -} - -static void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, - int data, int bits) { - int bit; - for (bit = bits - 1; bit >= 0; bit--) - vp9_wb_write_bit(wb, (data >> bit) & 1); -} +size_t vp9_rb_bytes_written(struct vp9_write_bit_buffer *wb); + +void vp9_wb_write_bit(struct vp9_write_bit_buffer *wb, int bit); + +void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, int data, int bits); #ifdef __cplusplus diff --git a/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c b/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c new file mode 100644 index 0000000..f31b176 --- /dev/null +++ b/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <immintrin.h> // AVX2 +#include "vpx/vpx_integer.h" + +void vp9_sad32x32x4d_avx2(uint8_t *src, + int src_stride, + uint8_t *ref[4], + int ref_stride, + unsigned int res[4]) { + __m256i src_reg, ref0_reg, ref1_reg, ref2_reg, ref3_reg; + __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3; + __m256i sum_mlow, sum_mhigh; + int i; + uint8_t *ref0, *ref1, *ref2, *ref3; + + ref0 = ref[0]; + ref1 = ref[1]; + ref2 = ref[2]; + ref3 = ref[3]; + sum_ref0 = _mm256_set1_epi16(0); + sum_ref1 = _mm256_set1_epi16(0); + sum_ref2 = _mm256_set1_epi16(0); + sum_ref3 = _mm256_set1_epi16(0); + for (i = 0; i < 32 ; i++) { + // load src and all refs + src_reg = _mm256_load_si256((__m256i *)(src)); + ref0_reg = _mm256_loadu_si256((__m256i *) (ref0)); + ref1_reg = _mm256_loadu_si256((__m256i *) (ref1)); + ref2_reg = _mm256_loadu_si256((__m256i *) (ref2)); + ref3_reg = _mm256_loadu_si256((__m256i *) (ref3)); + // sum of the absolute differences between every ref-i to src + ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg); + ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg); + ref2_reg = _mm256_sad_epu8(ref2_reg, src_reg); + ref3_reg = _mm256_sad_epu8(ref3_reg, src_reg); + // sum every ref-i + sum_ref0 = _mm256_add_epi32(sum_ref0, ref0_reg); + sum_ref1 = _mm256_add_epi32(sum_ref1, ref1_reg); + sum_ref2 = _mm256_add_epi32(sum_ref2, ref2_reg); + sum_ref3 = _mm256_add_epi32(sum_ref3, ref3_reg); + + src+= src_stride; + ref0+= ref_stride; + ref1+= ref_stride; + ref2+= ref_stride; + ref3+= ref_stride; + } + { + __m128i sum; + // in sum_ref-i the result is saved in the first 4 bytes + // the other 4 bytes are zeroed. + // sum_ref1 and sum_ref3 are shifted left by 4 bytes + sum_ref1 = _mm256_slli_si256(sum_ref1, 4); + sum_ref3 = _mm256_slli_si256(sum_ref3, 4); + + // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3 + sum_ref0 = _mm256_or_si256(sum_ref0, sum_ref1); + sum_ref2 = _mm256_or_si256(sum_ref2, sum_ref3); + + // merge every 64 bit from each sum_ref-i + sum_mlow = _mm256_unpacklo_epi64(sum_ref0, sum_ref2); + sum_mhigh = _mm256_unpackhi_epi64(sum_ref0, sum_ref2); + + // add the low 64 bit to the high 64 bit + sum_mlow = _mm256_add_epi32(sum_mlow, sum_mhigh); + + // add the low 128 bit to the high 128 bit + sum = _mm_add_epi32(_mm256_castsi256_si128(sum_mlow), + _mm256_extractf128_si256(sum_mlow, 1)); + + _mm_storeu_si128((__m128i *)(res), sum); + } +} + +void vp9_sad64x64x4d_avx2(uint8_t *src, + int src_stride, + uint8_t *ref[4], + int ref_stride, + unsigned int res[4]) { + __m256i src_reg, srcnext_reg, ref0_reg, ref0next_reg; + __m256i ref1_reg, ref1next_reg, ref2_reg, ref2next_reg; + __m256i ref3_reg, ref3next_reg; + __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3; + __m256i sum_mlow, sum_mhigh; + int i; + uint8_t *ref0, *ref1, *ref2, *ref3; + + ref0 = ref[0]; + ref1 = ref[1]; + ref2 = ref[2]; + ref3 = ref[3]; + sum_ref0 = _mm256_set1_epi16(0); + sum_ref1 = _mm256_set1_epi16(0); + sum_ref2 = _mm256_set1_epi16(0); + sum_ref3 = _mm256_set1_epi16(0); + for (i = 0; i < 64 ; i++) { + // load 64 bytes from src and all refs + src_reg = _mm256_load_si256((__m256i *)(src)); + srcnext_reg = _mm256_load_si256((__m256i *)(src + 32)); + ref0_reg = _mm256_loadu_si256((__m256i *) (ref0)); + ref0next_reg = _mm256_loadu_si256((__m256i *) (ref0 + 32)); + ref1_reg = _mm256_loadu_si256((__m256i *) (ref1)); + ref1next_reg = _mm256_loadu_si256((__m256i *) (ref1 + 32)); + ref2_reg = _mm256_loadu_si256((__m256i *) (ref2)); + ref2next_reg = _mm256_loadu_si256((__m256i *) (ref2 + 32)); + ref3_reg = _mm256_loadu_si256((__m256i *) (ref3)); + ref3next_reg = _mm256_loadu_si256((__m256i *) (ref3 + 32)); + // sum of the absolute differences between every ref-i to src + ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg); + ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg); + ref2_reg = _mm256_sad_epu8(ref2_reg, src_reg); + ref3_reg = _mm256_sad_epu8(ref3_reg, src_reg); + ref0next_reg = _mm256_sad_epu8(ref0next_reg, srcnext_reg); + ref1next_reg = _mm256_sad_epu8(ref1next_reg, srcnext_reg); + ref2next_reg = _mm256_sad_epu8(ref2next_reg, srcnext_reg); + ref3next_reg = _mm256_sad_epu8(ref3next_reg, srcnext_reg); + + // sum every ref-i + sum_ref0 = _mm256_add_epi32(sum_ref0, ref0_reg); + sum_ref1 = _mm256_add_epi32(sum_ref1, ref1_reg); + sum_ref2 = _mm256_add_epi32(sum_ref2, ref2_reg); + sum_ref3 = _mm256_add_epi32(sum_ref3, ref3_reg); + sum_ref0 = _mm256_add_epi32(sum_ref0, ref0next_reg); + sum_ref1 = _mm256_add_epi32(sum_ref1, ref1next_reg); + sum_ref2 = _mm256_add_epi32(sum_ref2, ref2next_reg); + sum_ref3 = _mm256_add_epi32(sum_ref3, ref3next_reg); + src+= src_stride; + ref0+= ref_stride; + ref1+= ref_stride; + ref2+= ref_stride; + ref3+= ref_stride; + } + { + __m128i sum; + + // in sum_ref-i the result is saved in the first 4 bytes + // the other 4 bytes are zeroed. + // sum_ref1 and sum_ref3 are shifted left by 4 bytes + sum_ref1 = _mm256_slli_si256(sum_ref1, 4); + sum_ref3 = _mm256_slli_si256(sum_ref3, 4); + + // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3 + sum_ref0 = _mm256_or_si256(sum_ref0, sum_ref1); + sum_ref2 = _mm256_or_si256(sum_ref2, sum_ref3); + + // merge every 64 bit from each sum_ref-i + sum_mlow = _mm256_unpacklo_epi64(sum_ref0, sum_ref2); + sum_mhigh = _mm256_unpackhi_epi64(sum_ref0, sum_ref2); + + // add the low 64 bit to the high 64 bit + sum_mlow = _mm256_add_epi32(sum_mlow, sum_mhigh); + + // add the low 128 bit to the high 128 bit + sum = _mm_add_epi32(_mm256_castsi256_si128(sum_mlow), + _mm256_extractf128_si256(sum_mlow, 1)); + + _mm_storeu_si128((__m128i *)(res), sum); + } +} diff --git a/libvpx/vp9/vp9_cx_iface.c b/libvpx/vp9/vp9_cx_iface.c index 2e83260..152e1f4 100644 --- a/libvpx/vp9/vp9_cx_iface.c +++ b/libvpx/vp9/vp9_cx_iface.c @@ -21,7 +21,7 @@ struct vp9_extracfg { struct vpx_codec_pkt_list *pkt_list; - int cpu_used; /* available cpu percentage in 1/16 */ + int cpu_used; // available cpu percentage in 1/16 unsigned int enable_auto_alt_ref; unsigned int noise_sensitivity; unsigned int sharpness; @@ -32,15 +32,17 @@ struct vp9_extracfg { unsigned int arnr_strength; unsigned int arnr_type; vp8e_tuning tuning; - unsigned int cq_level; /* constrained quality level */ + unsigned int cq_level; // constrained quality level unsigned int rc_max_intra_bitrate_pct; unsigned int lossless; unsigned int frame_parallel_decoding_mode; AQ_MODE aq_mode; + unsigned int frame_periodic_boost; + BIT_DEPTH bit_depth; }; struct extraconfig_map { - int usage; + int usage; struct vp9_extracfg cfg; }; @@ -49,22 +51,24 @@ static const struct extraconfig_map extracfg_map[] = { 0, { // NOLINT NULL, - 0, /* cpu_used */ - 1, /* enable_auto_alt_ref */ - 0, /* noise_sensitivity */ - 0, /* sharpness */ - 0, /* static_thresh */ - 0, /* tile_columns */ - 0, /* tile_rows */ - 7, /* arnr_max_frames */ - 5, /* arnr_strength */ - 3, /* arnr_type*/ - VP8_TUNE_PSNR, /* tuning*/ - 10, /* cq_level */ - 0, /* rc_max_intra_bitrate_pct */ - 0, /* lossless */ - 0, /* frame_parallel_decoding_mode */ - NO_AQ, /* aq_mode */ + 0, // cpu_used + 1, // enable_auto_alt_ref + 0, // noise_sensitivity + 0, // sharpness + 0, // static_thresh + 0, // tile_columns + 0, // tile_rows + 7, // arnr_max_frames + 5, // arnr_strength + 3, // arnr_type + VP8_TUNE_PSNR, // tuning + 10, // cq_level + 0, // rc_max_intra_bitrate_pct + 0, // lossless + 0, // frame_parallel_decoding_mode + NO_AQ, // aq_mode + 0, // frame_periodic_delta_q + BITS_8, // Bit depth } } }; @@ -101,15 +105,12 @@ static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) { return VP9_LAST_FLAG; } -static vpx_codec_err_t -update_error_state(vpx_codec_alg_priv_t *ctx, - const struct vpx_internal_error_info *error) { - vpx_codec_err_t res; +static vpx_codec_err_t update_error_state(vpx_codec_alg_priv_t *ctx, + const struct vpx_internal_error_info *error) { + const vpx_codec_err_t res = error->error_code; - if ((res = error->error_code)) - ctx->base.err_detail = error->has_detail - ? error->detail - : NULL; + if (res != VPX_CODEC_OK) + ctx->base.err_detail = error->has_detail ? error->detail : NULL; return res; } @@ -140,11 +141,11 @@ update_error_state(vpx_codec_alg_priv_t *ctx, if (!!((p)->memb) != (p)->memb) ERROR(#memb " expected boolean");\ } while (0) -static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, +static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, const vpx_codec_enc_cfg_t *cfg, const struct vp9_extracfg *extra_cfg) { - RANGE_CHECK(cfg, g_w, 1, 65535); /* 16 bits available */ - RANGE_CHECK(cfg, g_h, 1, 65535); /* 16 bits available */ + RANGE_CHECK(cfg, g_w, 1, 65535); // 16 bits available + RANGE_CHECK(cfg, g_h, 1, 65535); // 16 bits available RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000); RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den); RANGE_CHECK_HI(cfg, g_profile, 3); @@ -152,12 +153,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK_HI(cfg, rc_max_quantizer, 63); RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer); RANGE_CHECK_BOOL(extra_cfg, lossless); - if (extra_cfg->lossless) { - RANGE_CHECK_HI(cfg, rc_max_quantizer, 0); - RANGE_CHECK_HI(cfg, rc_min_quantizer, 0); - } RANGE_CHECK(extra_cfg, aq_mode, 0, AQ_MODE_COUNT - 1); - + RANGE_CHECK(extra_cfg, frame_periodic_boost, 0, 1); RANGE_CHECK_HI(cfg, g_threads, 64); RANGE_CHECK_HI(cfg, g_lag_in_frames, MAX_LAG_BUFFERS); RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_Q); @@ -165,37 +162,31 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK_HI(cfg, rc_overshoot_pct, 1000); RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100); RANGE_CHECK(cfg, kf_mode, VPX_KF_DISABLED, VPX_KF_AUTO); - // RANGE_CHECK_BOOL(cfg, g_delete_firstpassfile); RANGE_CHECK_BOOL(cfg, rc_resize_allowed); RANGE_CHECK_HI(cfg, rc_dropframe_thresh, 100); RANGE_CHECK_HI(cfg, rc_resize_up_thresh, 100); RANGE_CHECK_HI(cfg, rc_resize_down_thresh, 100); RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_LAST_PASS); - RANGE_CHECK(cfg, ss_number_layers, 1, - VPX_SS_MAX_LAYERS); /*Spatial layers max */ - + RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS); RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS); if (cfg->ts_number_layers > 1) { unsigned int i; - for (i = 1; i < cfg->ts_number_layers; ++i) { - if (cfg->ts_target_bitrate[i] < cfg->ts_target_bitrate[i-1]) { + for (i = 1; i < cfg->ts_number_layers; ++i) + if (cfg->ts_target_bitrate[i] < cfg->ts_target_bitrate[i - 1]) ERROR("ts_target_bitrate entries are not increasing"); - } - } - RANGE_CHECK(cfg, ts_rate_decimator[cfg->ts_number_layers-1], 1, 1); - for (i = cfg->ts_number_layers-2; i > 0; --i) { - if (cfg->ts_rate_decimator[i-1] != 2*cfg->ts_rate_decimator[i]) { + + RANGE_CHECK(cfg, ts_rate_decimator[cfg->ts_number_layers - 1], 1, 1); + for (i = cfg->ts_number_layers - 2; i > 0; --i) + if (cfg->ts_rate_decimator[i - 1] != 2 * cfg->ts_rate_decimator[i]) ERROR("ts_rate_decimator factors are not powers of 2"); - } - } } - /* VP8 does not support a lower bound on the keyframe interval in - * automatic keyframe placement mode. - */ - if (cfg->kf_mode != VPX_KF_DISABLED && cfg->kf_min_dist != cfg->kf_max_dist - && cfg->kf_min_dist > 0) + // VP8 does not support a lower bound on the keyframe interval in + // automatic keyframe placement mode. + if (cfg->kf_mode != VPX_KF_DISABLED && + cfg->kf_min_dist != cfg->kf_max_dist && + cfg->kf_min_dist > 0) ERROR("kf_min_dist not supported in auto mode, use 0 " "or kf_max_dist instead."); @@ -225,22 +216,57 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, if (cfg->rc_twopass_stats_in.sz % packet_sz) ERROR("rc_twopass_stats_in.sz indicates truncated packet."); - if (cfg->rc_twopass_stats_in.sz < 2 * packet_sz) - ERROR("rc_twopass_stats_in requires at least two packets."); + if (cfg->ss_number_layers > 1) { + int i; + unsigned int n_packets_per_layer[VPX_SS_MAX_LAYERS] = {0}; + + stats = cfg->rc_twopass_stats_in.buf; + for (i = 0; i < n_packets; ++i) { + const int layer_id = (int)stats[i].spatial_layer_id; + if (layer_id >= 0 && layer_id < (int)cfg->ss_number_layers) { + ++n_packets_per_layer[layer_id]; + } + } + + for (i = 0; i < (int)cfg->ss_number_layers; ++i) { + unsigned int layer_id; + if (n_packets_per_layer[i] < 2) { + ERROR("rc_twopass_stats_in requires at least two packets for each " + "layer."); + } + + stats = (const FIRSTPASS_STATS *)cfg->rc_twopass_stats_in.buf + + n_packets - cfg->ss_number_layers + i; + layer_id = (int)stats->spatial_layer_id; + + if (layer_id >= cfg->ss_number_layers + ||(int)(stats->count + 0.5) != n_packets_per_layer[layer_id] - 1) + ERROR("rc_twopass_stats_in missing EOS stats packet"); + } + } else { + if (cfg->rc_twopass_stats_in.sz < 2 * packet_sz) + ERROR("rc_twopass_stats_in requires at least two packets."); - stats = - (const FIRSTPASS_STATS *)cfg->rc_twopass_stats_in.buf + n_packets - 1; + stats = + (const FIRSTPASS_STATS *)cfg->rc_twopass_stats_in.buf + n_packets - 1; - if ((int)(stats->count + 0.5) != n_packets - 1) - ERROR("rc_twopass_stats_in missing EOS stats packet"); + if ((int)(stats->count + 0.5) != n_packets - 1) + ERROR("rc_twopass_stats_in missing EOS stats packet"); + } } + if (cfg->g_profile <= (unsigned int)PROFILE_1 && + extra_cfg->bit_depth > BITS_8) + ERROR("High bit-depth not supported in profile < 2"); + if (cfg->g_profile > (unsigned int)PROFILE_1 && + extra_cfg->bit_depth == BITS_8) + ERROR("Bit-depth 8 not supported in profile > 1"); return VPX_CODEC_OK; } static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx, - const vpx_image_t *img) { + const vpx_image_t *img) { switch (img->fmt) { case VPX_IMG_FMT_YV12: case VPX_IMG_FMT_I420: @@ -252,19 +278,21 @@ static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx, "supported."); } - if ((img->d_w != ctx->cfg.g_w) || (img->d_h != ctx->cfg.g_h)) + if (img->d_w != ctx->cfg.g_w || img->d_h != ctx->cfg.g_h) ERROR("Image size must match encoder init configuration size"); return VPX_CODEC_OK; } -static vpx_codec_err_t set_vp9e_config(VP9_CONFIG *oxcf, - const vpx_codec_enc_cfg_t *cfg, - const struct vp9_extracfg *extra_cfg) { - oxcf->version = cfg->g_profile; +static vpx_codec_err_t set_encoder_config( + VP9_CONFIG *oxcf, + const vpx_codec_enc_cfg_t *cfg, + const struct vp9_extracfg *extra_cfg) { + oxcf->profile = cfg->g_profile; oxcf->width = cfg->g_w; oxcf->height = cfg->g_h; + oxcf->bit_depth = extra_cfg->bit_depth; // guess a frame rate if out of whack, use 30 oxcf->framerate = (double)cfg->g_timebase.den / cfg->g_timebase.num; if (oxcf->framerate > 180) @@ -296,9 +324,9 @@ static vpx_codec_err_t set_vp9e_config(VP9_CONFIG *oxcf, oxcf->target_bandwidth = cfg->rc_target_bitrate; oxcf->rc_max_intra_bitrate_pct = extra_cfg->rc_max_intra_bitrate_pct; - oxcf->best_allowed_q = cfg->rc_min_quantizer; - oxcf->worst_allowed_q = cfg->rc_max_quantizer; - oxcf->cq_level = extra_cfg->cq_level; + oxcf->best_allowed_q = q_trans[cfg->rc_min_quantizer]; + oxcf->worst_allowed_q = q_trans[cfg->rc_max_quantizer]; + oxcf->cq_level = q_trans[extra_cfg->cq_level]; oxcf->fixed_q = -1; oxcf->under_shoot_pct = cfg->rc_undershoot_pct; @@ -344,6 +372,8 @@ static vpx_codec_err_t set_vp9e_config(VP9_CONFIG *oxcf, oxcf->aq_mode = extra_cfg->aq_mode; + oxcf->frame_periodic_boost = extra_cfg->frame_periodic_boost; + oxcf->ss_number_layers = cfg->ss_number_layers; if (oxcf->ss_number_layers > 1) { @@ -394,26 +424,25 @@ static vpx_codec_err_t set_vp9e_config(VP9_CONFIG *oxcf, return VPX_CODEC_OK; } -static vpx_codec_err_t vp9e_set_config(vpx_codec_alg_priv_t *ctx, - const vpx_codec_enc_cfg_t *cfg) { +static vpx_codec_err_t encoder_set_config(vpx_codec_alg_priv_t *ctx, + const vpx_codec_enc_cfg_t *cfg) { vpx_codec_err_t res; - if ((cfg->g_w != ctx->cfg.g_w) || (cfg->g_h != ctx->cfg.g_h)) + if (cfg->g_w != ctx->cfg.g_w || cfg->g_h != ctx->cfg.g_h) ERROR("Cannot change width or height after initialization"); - /* Prevent increasing lag_in_frames. This check is stricter than it needs - * to be -- the limit is not increasing past the first lag_in_frames - * value, but we don't track the initial config, only the last successful - * config. - */ - if ((cfg->g_lag_in_frames > ctx->cfg.g_lag_in_frames)) + // Prevent increasing lag_in_frames. This check is stricter than it needs + // to be -- the limit is not increasing past the first lag_in_frames + // value, but we don't track the initial config, only the last successful + // config. + if (cfg->g_lag_in_frames > ctx->cfg.g_lag_in_frames) ERROR("Cannot increase lag_in_frames"); res = validate_config(ctx, cfg, &ctx->extra_cfg); if (res == VPX_CODEC_OK) { ctx->cfg = *cfg; - set_vp9e_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg); + set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg); vp9_change_config(ctx->cpi, &ctx->oxcf); } @@ -424,19 +453,19 @@ static vpx_codec_err_t vp9e_set_config(vpx_codec_alg_priv_t *ctx, int vp9_reverse_trans(int q); -static vpx_codec_err_t get_param(vpx_codec_alg_priv_t *ctx, - int ctrl_id, - va_list args) { +static vpx_codec_err_t ctrl_get_param(vpx_codec_alg_priv_t *ctx, int ctrl_id, + va_list args) { void *arg = va_arg(args, void *); #define MAP(id, var) case id: *(RECAST(id, arg)) = var; break - if (arg == NULL) return VPX_CODEC_INVALID_PARAM; + if (arg == NULL) + return VPX_CODEC_INVALID_PARAM; switch (ctrl_id) { - MAP(VP8E_GET_LAST_QUANTIZER, vp9_get_quantizer(ctx->cpi)); - MAP(VP8E_GET_LAST_QUANTIZER_64, - vp9_reverse_trans(vp9_get_quantizer(ctx->cpi))); + MAP(VP8E_GET_LAST_QUANTIZER, vp9_get_quantizer(ctx->cpi)); + MAP(VP8E_GET_LAST_QUANTIZER_64, + vp9_reverse_trans(vp9_get_quantizer(ctx->cpi))); } return VPX_CODEC_OK; @@ -444,10 +473,9 @@ static vpx_codec_err_t get_param(vpx_codec_alg_priv_t *ctx, } -static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx, - int ctrl_id, - va_list args) { - vpx_codec_err_t res = VPX_CODEC_OK; +static vpx_codec_err_t ctrl_set_param(vpx_codec_alg_priv_t *ctx, int ctrl_id, + va_list args) { + vpx_codec_err_t res = VPX_CODEC_OK; struct vp9_extracfg extra_cfg = ctx->extra_cfg; #define MAP(id, var) case id: var = CAST(id, args); break; @@ -470,13 +498,14 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx, MAP(VP9E_SET_FRAME_PARALLEL_DECODING, extra_cfg.frame_parallel_decoding_mode); MAP(VP9E_SET_AQ_MODE, extra_cfg.aq_mode); + MAP(VP9E_SET_FRAME_PERIODIC_BOOST, extra_cfg.frame_periodic_boost); } res = validate_config(ctx, &ctx->cfg, &extra_cfg); if (res == VPX_CODEC_OK) { ctx->extra_cfg = extra_cfg; - set_vp9e_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg); + set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg); vp9_change_config(ctx->cpi, &ctx->oxcf); } @@ -484,15 +513,13 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx, #undef MAP } - -static vpx_codec_err_t vp9e_common_init(vpx_codec_ctx_t *ctx) { - vpx_codec_err_t res = VPX_CODEC_OK; - struct vpx_codec_alg_priv *priv; - vpx_codec_enc_cfg_t *cfg; - unsigned int i; +static vpx_codec_err_t encoder_common_init(vpx_codec_ctx_t *ctx) { + vpx_codec_err_t res = VPX_CODEC_OK; if (ctx->priv == NULL) { - priv = calloc(1, sizeof(struct vpx_codec_alg_priv)); + int i; + vpx_codec_enc_cfg_t *cfg; + struct vpx_codec_alg_priv *priv = calloc(1, sizeof(*priv)); if (priv == NULL) return VPX_CODEC_MEM_ERROR; @@ -504,22 +531,20 @@ static vpx_codec_err_t vp9e_common_init(vpx_codec_ctx_t *ctx) { ctx->priv->enc.total_encoders = 1; if (ctx->config.enc) { - /* Update the reference to the config structure to an - * internal copy. - */ + // Update the reference to the config structure to an + // internal copy. ctx->priv->alg_priv->cfg = *ctx->config.enc; ctx->config.enc = &ctx->priv->alg_priv->cfg; } - cfg = &ctx->priv->alg_priv->cfg; + cfg = &ctx->priv->alg_priv->cfg; - /* Select the extra vp6 configuration table based on the current - * usage value. If the current usage value isn't found, use the - * values for usage case 0. - */ + // Select the extra vp6 configuration table based on the current + // usage value. If the current usage value isn't found, use the + // values for usage case 0. for (i = 0; extracfg_map[i].usage && extracfg_map[i].usage != cfg->g_usage; - i++) {} + ++i) {} priv->extra_cfg = extracfg_map[i].cfg; priv->extra_cfg.pkt_list = &priv->pkt_list.head; @@ -530,8 +555,8 @@ static vpx_codec_err_t vp9e_common_init(vpx_codec_ctx_t *ctx) { if (priv->cx_data_sz < 4096) priv->cx_data_sz = 4096; priv->cx_data = (unsigned char *)malloc(priv->cx_data_sz); - - if (priv->cx_data == NULL) return VPX_CODEC_MEM_ERROR; + if (priv->cx_data == NULL) + return VPX_CODEC_MEM_ERROR; vp9_initialize_enc(); @@ -539,7 +564,7 @@ static vpx_codec_err_t vp9e_common_init(vpx_codec_ctx_t *ctx) { if (res == VPX_CODEC_OK) { VP9_COMP *cpi; - set_vp9e_config(&ctx->priv->alg_priv->oxcf, + set_encoder_config(&ctx->priv->alg_priv->oxcf, &ctx->priv->alg_priv->cfg, &ctx->priv->alg_priv->extra_cfg); cpi = vp9_create_compressor(&ctx->priv->alg_priv->oxcf); @@ -554,12 +579,12 @@ static vpx_codec_err_t vp9e_common_init(vpx_codec_ctx_t *ctx) { } -static vpx_codec_err_t vp9e_init(vpx_codec_ctx_t *ctx, - vpx_codec_priv_enc_mr_cfg_t *data) { - return vp9e_common_init(ctx); +static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx, + vpx_codec_priv_enc_mr_cfg_t *data) { + return encoder_common_init(ctx); } -static vpx_codec_err_t vp9e_destroy(vpx_codec_alg_priv_t *ctx) { +static vpx_codec_err_t encoder_destroy(vpx_codec_alg_priv_t *ctx) { free(ctx->cx_data); vp9_remove_compressor(ctx->cpi); free(ctx); @@ -604,10 +629,10 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) { assert(ctx->pending_frame_count); assert(ctx->pending_frame_count <= 8); - /* Add the number of frames to the marker byte */ + // Add the number of frames to the marker byte marker |= ctx->pending_frame_count - 1; - /* Choose the magnitude */ + // Choose the magnitude for (mag = 0, mask = 0xff; mag < 4; mag++) { if (ctx->pending_frame_magnitude < mask) break; @@ -616,7 +641,7 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) { } marker |= mag << 3; - /* Write the index */ + // Write the index index_sz = 2 + (mag + 1) * ctx->pending_frame_count; if (ctx->pending_cx_data_sz + index_sz < ctx->cx_data_sz) { uint8_t *x = ctx->pending_cx_data + ctx->pending_cx_data_sz; @@ -637,12 +662,12 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) { return index_sz; } -static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx, - const vpx_image_t *img, - vpx_codec_pts_t pts, - unsigned long duration, - vpx_enc_frame_flags_t flags, - unsigned long deadline) { +static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, + const vpx_image_t *img, + vpx_codec_pts_t pts, + unsigned long duration, + vpx_enc_frame_flags_t flags, + unsigned long deadline) { vpx_codec_err_t res = VPX_CODEC_OK; if (img) @@ -651,15 +676,15 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx, pick_quickcompress_mode(ctx, duration, deadline); vpx_codec_pkt_list_init(&ctx->pkt_list); - /* Handle Flags */ - if (((flags & VP8_EFLAG_NO_UPD_GF) && (flags & VP8_EFLAG_FORCE_GF)) - || ((flags & VP8_EFLAG_NO_UPD_ARF) && (flags & VP8_EFLAG_FORCE_ARF))) { + // Handle Flags + if (((flags & VP8_EFLAG_NO_UPD_GF) && (flags & VP8_EFLAG_FORCE_GF)) || + ((flags & VP8_EFLAG_NO_UPD_ARF) && (flags & VP8_EFLAG_FORCE_ARF))) { ctx->base.err_detail = "Conflicting flags."; return VPX_CODEC_INVALID_PARAM; } - if (flags & (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF - | VP8_EFLAG_NO_REF_ARF)) { + if (flags & (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF)) { int ref = 7; if (flags & VP8_EFLAG_NO_REF_LAST) @@ -674,9 +699,9 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx, vp9_use_as_reference(ctx->cpi, ref); } - if (flags & (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF - | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_FORCE_GF - | VP8_EFLAG_FORCE_ARF)) { + if (flags & (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_FORCE_GF | + VP8_EFLAG_FORCE_ARF)) { int upd = 7; if (flags & VP8_EFLAG_NO_UPD_LAST) @@ -695,16 +720,16 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx, vp9_update_entropy(ctx->cpi, 0); } - /* Handle fixed keyframe intervals */ - if (ctx->cfg.kf_mode == VPX_KF_AUTO - && ctx->cfg.kf_min_dist == ctx->cfg.kf_max_dist) { + // Handle fixed keyframe intervals + if (ctx->cfg.kf_mode == VPX_KF_AUTO && + ctx->cfg.kf_min_dist == ctx->cfg.kf_max_dist) { if (++ctx->fixed_kf_cntr > ctx->cfg.kf_min_dist) { flags |= VPX_EFLAG_FORCE_KF; ctx->fixed_kf_cntr = 1; } } - /* Initialize the encoder instance on the first frame. */ + // Initialize the encoder instance on the first frame. if (res == VPX_CODEC_OK && ctx->cpi != NULL) { unsigned int lib_flags; YV12_BUFFER_CONFIG sd; @@ -712,15 +737,15 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx, size_t size, cx_data_sz; unsigned char *cx_data; - /* Set up internal flags */ + // Set up internal flags if (ctx->base.init_flags & VPX_CODEC_USE_PSNR) ((VP9_COMP *)ctx->cpi)->b_calculate_psnr = 1; - /* Convert API flags to internal codec lib flags */ + // Convert API flags to internal codec lib flags lib_flags = (flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0; - /* vp8 use 10,000,000 ticks/second as time stamp */ - dst_time_stamp = pts * 10000000 * ctx->cfg.g_timebase.num + /* vp9 use 10,000,000 ticks/second as time stamp */ + dst_time_stamp = (pts * 10000000 * ctx->cfg.g_timebase.num) / ctx->cfg.g_timebase.den; dst_end_time_stamp = (pts + duration) * 10000000 * ctx->cfg.g_timebase.num / ctx->cfg.g_timebase.den; @@ -760,11 +785,11 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx, cx_data, &dst_time_stamp, &dst_end_time_stamp, !img)) { if (size) { - vpx_codec_pts_t round, delta; + vpx_codec_pts_t round, delta; vpx_codec_cx_pkt_t pkt; - VP9_COMP *cpi = (VP9_COMP *)ctx->cpi; + VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi; - /* Pack invisible frames with the next visible frame */ + // Pack invisible frames with the next visible frame if (cpi->common.show_frame == 0) { if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data; @@ -776,7 +801,7 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx, continue; } - /* Add the frame packet to the list of returned packets. */ + // Add the frame packet to the list of returned packets. round = (vpx_codec_pts_t)1000000 * ctx->cfg.g_timebase.num / 2 - 1; delta = (dst_end_time_stamp - dst_time_stamp); pkt.kind = VPX_CODEC_CX_FRAME_PKT; @@ -807,48 +832,25 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx, if (cpi->droppable) pkt.data.frame.flags |= VPX_FRAME_IS_DROPPABLE; - /*if (cpi->output_partition) - { - int i; - const int num_partitions = 1; - - pkt.data.frame.flags |= VPX_FRAME_IS_FRAGMENT; - - for (i = 0; i < num_partitions; ++i) - { - pkt.data.frame.buf = cx_data; - pkt.data.frame.sz = cpi->partition_sz[i]; - pkt.data.frame.partition_id = i; - // don't set the fragment bit for the last partition - if (i == (num_partitions - 1)) - pkt.data.frame.flags &= ~VPX_FRAME_IS_FRAGMENT; - vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt); - cx_data += cpi->partition_sz[i]; - cx_data_sz -= cpi->partition_sz[i]; - } - } - else*/ - { - if (ctx->pending_cx_data) { - ctx->pending_frame_sizes[ctx->pending_frame_count++] = size; - ctx->pending_frame_magnitude |= size; - ctx->pending_cx_data_sz += size; - size += write_superframe_index(ctx); - pkt.data.frame.buf = ctx->pending_cx_data; - pkt.data.frame.sz = ctx->pending_cx_data_sz; - ctx->pending_cx_data = NULL; - ctx->pending_cx_data_sz = 0; - ctx->pending_frame_count = 0; - ctx->pending_frame_magnitude = 0; - } else { - pkt.data.frame.buf = cx_data; - pkt.data.frame.sz = size; - } - pkt.data.frame.partition_id = -1; - vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt); - cx_data += size; - cx_data_sz -= size; + if (ctx->pending_cx_data) { + ctx->pending_frame_sizes[ctx->pending_frame_count++] = size; + ctx->pending_frame_magnitude |= size; + ctx->pending_cx_data_sz += size; + size += write_superframe_index(ctx); + pkt.data.frame.buf = ctx->pending_cx_data; + pkt.data.frame.sz = ctx->pending_cx_data_sz; + ctx->pending_cx_data = NULL; + ctx->pending_cx_data_sz = 0; + ctx->pending_frame_count = 0; + ctx->pending_frame_magnitude = 0; + } else { + pkt.data.frame.buf = cx_data; + pkt.data.frame.sz = size; } + pkt.data.frame.partition_id = -1; + vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt); + cx_data += size; + cx_data_sz -= size; } } } @@ -857,15 +859,14 @@ static vpx_codec_err_t vp9e_encode(vpx_codec_alg_priv_t *ctx, } -static const vpx_codec_cx_pkt_t *vp9e_get_cxdata(vpx_codec_alg_priv_t *ctx, - vpx_codec_iter_t *iter) { +static const vpx_codec_cx_pkt_t *encoder_get_cxdata(vpx_codec_alg_priv_t *ctx, + vpx_codec_iter_t *iter) { return vpx_codec_pkt_list_get(&ctx->pkt_list.head, iter); } -static vpx_codec_err_t vp9e_set_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { - vpx_ref_frame_t *frame = va_arg(args, vpx_ref_frame_t *); +static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { + vpx_ref_frame_t *const frame = va_arg(args, vpx_ref_frame_t *); if (frame != NULL) { YV12_BUFFER_CONFIG sd; @@ -879,10 +880,9 @@ static vpx_codec_err_t vp9e_set_reference(vpx_codec_alg_priv_t *ctx, } } -static vpx_codec_err_t vp9e_copy_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { - vpx_ref_frame_t *frame = va_arg(args, vpx_ref_frame_t *); +static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { + vpx_ref_frame_t *const frame = va_arg(args, vpx_ref_frame_t *); if (frame != NULL) { YV12_BUFFER_CONFIG sd; @@ -896,9 +896,8 @@ static vpx_codec_err_t vp9e_copy_reference(vpx_codec_alg_priv_t *ctx, } } -static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { +static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { vp9_ref_frame_t *frame = va_arg(args, vp9_ref_frame_t *); if (frame != NULL) { @@ -912,9 +911,8 @@ static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx, } } -static vpx_codec_err_t vp9e_set_previewpp(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { +static vpx_codec_err_t ctrl_set_previewpp(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { #if CONFIG_VP9_POSTPROC vp8_postproc_cfg_t *config = va_arg(args, vp8_postproc_cfg_t *); (void)ctr_id; @@ -934,14 +932,14 @@ static vpx_codec_err_t vp9e_set_previewpp(vpx_codec_alg_priv_t *ctx, } -static vpx_image_t *vp9e_get_preview(vpx_codec_alg_priv_t *ctx) { +static vpx_image_t *encoder_get_preview(vpx_codec_alg_priv_t *ctx) { YV12_BUFFER_CONFIG sd; vp9_ppflags_t flags = {0}; if (ctx->preview_ppcfg.post_proc_flag) { - flags.post_proc_flag = ctx->preview_ppcfg.post_proc_flag; - flags.deblocking_level = ctx->preview_ppcfg.deblocking_level; - flags.noise_level = ctx->preview_ppcfg.noise_level; + flags.post_proc_flag = ctx->preview_ppcfg.post_proc_flag; + flags.deblocking_level = ctx->preview_ppcfg.deblocking_level; + flags.noise_level = ctx->preview_ppcfg.noise_level; } if (vp9_get_preview_raw_frame(ctx->cpi, &sd, &flags) == 0) { @@ -952,41 +950,36 @@ static vpx_image_t *vp9e_get_preview(vpx_codec_alg_priv_t *ctx) { } } -static vpx_codec_err_t vp9e_update_entropy(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { +static vpx_codec_err_t ctrl_update_entropy(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { const int update = va_arg(args, int); vp9_update_entropy(ctx->cpi, update); return VPX_CODEC_OK; } -static vpx_codec_err_t vp9e_update_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { +static vpx_codec_err_t ctrl_update_reference(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { const int ref_frame_flags = va_arg(args, int); vp9_update_reference(ctx->cpi, ref_frame_flags); return VPX_CODEC_OK; } -static vpx_codec_err_t vp9e_use_reference(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { +static vpx_codec_err_t ctrl_use_reference(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { const int reference_flag = va_arg(args, int); vp9_use_as_reference(ctx->cpi, reference_flag); return VPX_CODEC_OK; } -static vpx_codec_err_t vp9e_set_roi_map(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { +static vpx_codec_err_t ctrl_set_roi_map(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { // TODO(yaowu): Need to re-implement and test for VP9. return VPX_CODEC_INVALID_PARAM; } -static vpx_codec_err_t vp9e_set_activemap(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { +static vpx_codec_err_t ctrl_set_active_map(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { vpx_active_map_t *const map = va_arg(args, vpx_active_map_t *); if (map) { @@ -999,8 +992,8 @@ static vpx_codec_err_t vp9e_set_activemap(vpx_codec_alg_priv_t *ctx, } } -static vpx_codec_err_t vp9e_set_scalemode(vpx_codec_alg_priv_t *ctx, - int ctr_id, va_list args) { +static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { vpx_scaling_mode_t *const mode = va_arg(args, vpx_scaling_mode_t *); if (mode) { @@ -1013,40 +1006,45 @@ static vpx_codec_err_t vp9e_set_scalemode(vpx_codec_alg_priv_t *ctx, } } -static vpx_codec_err_t vp9e_set_svc(vpx_codec_alg_priv_t *ctx, int ctr_id, +static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { int data = va_arg(args, int); + const vpx_codec_enc_cfg_t *cfg = &ctx->cfg; vp9_set_svc(ctx->cpi, data); - // CBR mode for SVC with both temporal and spatial layers not yet supported. + // CBR or two pass mode for SVC with both temporal and spatial layers + // not yet supported. if (data == 1 && - ctx->cfg.rc_end_usage == VPX_CBR && - ctx->cfg.ss_number_layers > 1 && - ctx->cfg.ts_number_layers > 1) { + (cfg->rc_end_usage == VPX_CBR || + cfg->g_pass == VPX_RC_FIRST_PASS || + cfg->g_pass == VPX_RC_LAST_PASS) && + cfg->ss_number_layers > 1 && + cfg->ts_number_layers > 1) { return VPX_CODEC_INVALID_PARAM; } return VPX_CODEC_OK; } -static vpx_codec_err_t vp9e_set_svc_layer_id(vpx_codec_alg_priv_t *ctx, +static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { - vpx_svc_layer_id_t *data = va_arg(args, vpx_svc_layer_id_t *); - VP9_COMP *cpi = (VP9_COMP *)ctx->cpi; - cpi->svc.spatial_layer_id = data->spatial_layer_id; - cpi->svc.temporal_layer_id = data->temporal_layer_id; + vpx_svc_layer_id_t *const data = va_arg(args, vpx_svc_layer_id_t *); + VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi; + SVC *const svc = &cpi->svc; + svc->spatial_layer_id = data->spatial_layer_id; + svc->temporal_layer_id = data->temporal_layer_id; // Checks on valid layer_id input. - if (cpi->svc.temporal_layer_id < 0 || - cpi->svc.temporal_layer_id >= (int)ctx->cfg.ts_number_layers) { + if (svc->temporal_layer_id < 0 || + svc->temporal_layer_id >= (int)ctx->cfg.ts_number_layers) { return VPX_CODEC_INVALID_PARAM; } - if (cpi->svc.spatial_layer_id < 0 || - cpi->svc.spatial_layer_id >= (int)ctx->cfg.ss_number_layers) { + if (svc->spatial_layer_id < 0 || + svc->spatial_layer_id >= (int)ctx->cfg.ss_number_layers) { return VPX_CODEC_INVALID_PARAM; } return VPX_CODEC_OK; } -static vpx_codec_err_t vp9e_set_svc_parameters(vpx_codec_alg_priv_t *ctx, +static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { VP9_COMP *const cpi = ctx->cpi; vpx_svc_parameters_t *const params = va_arg(args, vpx_svc_parameters_t *); @@ -1067,135 +1065,139 @@ static vpx_codec_err_t vp9e_set_svc_parameters(vpx_codec_alg_priv_t *ctx, ctx->cfg.rc_max_quantizer = params->max_quantizer; ctx->cfg.rc_min_quantizer = params->min_quantizer; - set_vp9e_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg); + set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg); vp9_change_config(ctx->cpi, &ctx->oxcf); return VPX_CODEC_OK; } -static vpx_codec_ctrl_fn_map_t vp9e_ctf_maps[] = { - {VP8_SET_REFERENCE, vp9e_set_reference}, - {VP8_COPY_REFERENCE, vp9e_copy_reference}, - {VP8_SET_POSTPROC, vp9e_set_previewpp}, - {VP8E_UPD_ENTROPY, vp9e_update_entropy}, - {VP8E_UPD_REFERENCE, vp9e_update_reference}, - {VP8E_USE_REFERENCE, vp9e_use_reference}, - {VP8E_SET_ROI_MAP, vp9e_set_roi_map}, - {VP8E_SET_ACTIVEMAP, vp9e_set_activemap}, - {VP8E_SET_SCALEMODE, vp9e_set_scalemode}, - {VP8E_SET_CPUUSED, set_param}, - {VP8E_SET_NOISE_SENSITIVITY, set_param}, - {VP8E_SET_ENABLEAUTOALTREF, set_param}, - {VP8E_SET_SHARPNESS, set_param}, - {VP8E_SET_STATIC_THRESHOLD, set_param}, - {VP9E_SET_TILE_COLUMNS, set_param}, - {VP9E_SET_TILE_ROWS, set_param}, - {VP8E_GET_LAST_QUANTIZER, get_param}, - {VP8E_GET_LAST_QUANTIZER_64, get_param}, - {VP8E_SET_ARNR_MAXFRAMES, set_param}, - {VP8E_SET_ARNR_STRENGTH, set_param}, - {VP8E_SET_ARNR_TYPE, set_param}, - {VP8E_SET_TUNING, set_param}, - {VP8E_SET_CQ_LEVEL, set_param}, - {VP8E_SET_MAX_INTRA_BITRATE_PCT, set_param}, - {VP9E_SET_LOSSLESS, set_param}, - {VP9E_SET_FRAME_PARALLEL_DECODING, set_param}, - {VP9E_SET_AQ_MODE, set_param}, - {VP9_GET_REFERENCE, get_reference}, - {VP9E_SET_SVC, vp9e_set_svc}, - {VP9E_SET_SVC_PARAMETERS, vp9e_set_svc_parameters}, - {VP9E_SET_SVC_LAYER_ID, vp9e_set_svc_layer_id}, +static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { + {VP8_COPY_REFERENCE, ctrl_copy_reference}, + {VP8E_UPD_ENTROPY, ctrl_update_entropy}, + {VP8E_UPD_REFERENCE, ctrl_update_reference}, + {VP8E_USE_REFERENCE, ctrl_use_reference}, + + // Setters + {VP8_SET_REFERENCE, ctrl_set_reference}, + {VP8_SET_POSTPROC, ctrl_set_previewpp}, + {VP8E_SET_ROI_MAP, ctrl_set_roi_map}, + {VP8E_SET_ACTIVEMAP, ctrl_set_active_map}, + {VP8E_SET_SCALEMODE, ctrl_set_scale_mode}, + {VP8E_SET_CPUUSED, ctrl_set_param}, + {VP8E_SET_NOISE_SENSITIVITY, ctrl_set_param}, + {VP8E_SET_ENABLEAUTOALTREF, ctrl_set_param}, + {VP8E_SET_SHARPNESS, ctrl_set_param}, + {VP8E_SET_STATIC_THRESHOLD, ctrl_set_param}, + {VP9E_SET_TILE_COLUMNS, ctrl_set_param}, + {VP9E_SET_TILE_ROWS, ctrl_set_param}, + {VP8E_SET_ARNR_MAXFRAMES, ctrl_set_param}, + {VP8E_SET_ARNR_STRENGTH, ctrl_set_param}, + {VP8E_SET_ARNR_TYPE, ctrl_set_param}, + {VP8E_SET_TUNING, ctrl_set_param}, + {VP8E_SET_CQ_LEVEL, ctrl_set_param}, + {VP8E_SET_MAX_INTRA_BITRATE_PCT, ctrl_set_param}, + {VP9E_SET_LOSSLESS, ctrl_set_param}, + {VP9E_SET_FRAME_PARALLEL_DECODING, ctrl_set_param}, + {VP9E_SET_AQ_MODE, ctrl_set_param}, + {VP9E_SET_FRAME_PERIODIC_BOOST, ctrl_set_param}, + {VP9E_SET_SVC, ctrl_set_svc}, + {VP9E_SET_SVC_PARAMETERS, ctrl_set_svc_parameters}, + {VP9E_SET_SVC_LAYER_ID, ctrl_set_svc_layer_id}, + + // Getters + {VP8E_GET_LAST_QUANTIZER, ctrl_get_param}, + {VP8E_GET_LAST_QUANTIZER_64, ctrl_get_param}, + {VP9_GET_REFERENCE, ctrl_get_reference}, + { -1, NULL}, }; -static vpx_codec_enc_cfg_map_t vp9e_usage_cfg_map[] = { +static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = { { 0, { // NOLINT - 0, /* g_usage */ - 0, /* g_threads */ - 0, /* g_profile */ + 0, // g_usage + 0, // g_threads + 0, // g_profile - 320, /* g_width */ - 240, /* g_height */ - {1, 30}, /* g_timebase */ + 320, // g_width + 240, // g_height + {1, 30}, // g_timebase - 0, /* g_error_resilient */ + 0, // g_error_resilient - VPX_RC_ONE_PASS, /* g_pass */ + VPX_RC_ONE_PASS, // g_pass - 25, /* g_lag_in_frames */ + 25, // g_lag_in_frames - 0, /* rc_dropframe_thresh */ - 0, /* rc_resize_allowed */ - 60, /* rc_resize_down_thresold */ - 30, /* rc_resize_up_thresold */ + 0, // rc_dropframe_thresh + 0, // rc_resize_allowed + 60, // rc_resize_down_thresold + 30, // rc_resize_up_thresold - VPX_VBR, /* rc_end_usage */ + VPX_VBR, // rc_end_usage #if VPX_ENCODER_ABI_VERSION > (1 + VPX_CODEC_ABI_VERSION) - {0}, /* rc_twopass_stats_in */ + {0}, // rc_twopass_stats_in #endif - 256, /* rc_target_bandwidth */ - 0, /* rc_min_quantizer */ - 63, /* rc_max_quantizer */ - 100, /* rc_undershoot_pct */ - 100, /* rc_overshoot_pct */ - - 6000, /* rc_max_buffer_size */ - 4000, /* rc_buffer_initial_size; */ - 5000, /* rc_buffer_optimal_size; */ - - 50, /* rc_two_pass_vbrbias */ - 0, /* rc_two_pass_vbrmin_section */ - 2000, /* rc_two_pass_vbrmax_section */ - - /* keyframing settings (kf) */ - VPX_KF_AUTO, /* g_kfmode*/ - 0, /* kf_min_dist */ - 9999, /* kf_max_dist */ - - VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */ - {0}, /* ss_target_bitrate */ - 1, /* ts_number_layers */ - {0}, /* ts_target_bitrate */ - {0}, /* ts_rate_decimator */ - 0, /* ts_periodicity */ - {0}, /* ts_layer_id */ + 256, // rc_target_bandwidth + 0, // rc_min_quantizer + 63, // rc_max_quantizer + 100, // rc_undershoot_pct + 100, // rc_overshoot_pct + + 6000, // rc_max_buffer_size + 4000, // rc_buffer_initial_size + 5000, // rc_buffer_optimal_size + + 50, // rc_two_pass_vbrbias + 0, // rc_two_pass_vbrmin_section + 2000, // rc_two_pass_vbrmax_section + + // keyframing settings (kf) + VPX_KF_AUTO, // g_kfmode + 0, // kf_min_dist + 9999, // kf_max_dist + + VPX_SS_DEFAULT_LAYERS, // ss_number_layers + {0}, // ss_target_bitrate + 1, // ts_number_layers + {0}, // ts_target_bitrate + {0}, // ts_rate_decimator + 0, // ts_periodicity + {0}, // ts_layer_id #if VPX_ENCODER_ABI_VERSION == (1 + VPX_CODEC_ABI_VERSION) - "vp8.fpf" /* first pass filename */ + "vp8.fpf" // first pass filename #endif } }, { -1, {NOT_IMPLEMENTED}} }; - #ifndef VERSION_STRING #define VERSION_STRING #endif CODEC_INTERFACE(vpx_codec_vp9_cx) = { "WebM Project VP9 Encoder" VERSION_STRING, VPX_CODEC_INTERNAL_ABI_VERSION, - VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR | - VPX_CODEC_CAP_OUTPUT_PARTITION, - /* vpx_codec_caps_t caps; */ - vp9e_init, /* vpx_codec_init_fn_t init; */ - vp9e_destroy, /* vpx_codec_destroy_fn_t destroy; */ - vp9e_ctf_maps, /* vpx_codec_ctrl_fn_map_t *ctrl_maps; */ - NOT_IMPLEMENTED, /* vpx_codec_get_mmap_fn_t get_mmap; */ - NOT_IMPLEMENTED, /* vpx_codec_set_mmap_fn_t set_mmap; */ + VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR, // vpx_codec_caps_t + encoder_init, // vpx_codec_init_fn_t + encoder_destroy, // vpx_codec_destroy_fn_t + encoder_ctrl_maps, // vpx_codec_ctrl_fn_map_t + NOT_IMPLEMENTED, // vpx_codec_get_mmap_fn_t + NOT_IMPLEMENTED, // vpx_codec_set_mmap_fn_t { // NOLINT - NOT_IMPLEMENTED, /* vpx_codec_peek_si_fn_t peek_si; */ - NOT_IMPLEMENTED, /* vpx_codec_get_si_fn_t get_si; */ - NOT_IMPLEMENTED, /* vpx_codec_decode_fn_t decode; */ - NOT_IMPLEMENTED, /* vpx_codec_frame_get_fn_t frame_get; */ + NOT_IMPLEMENTED, // vpx_codec_peek_si_fn_t + NOT_IMPLEMENTED, // vpx_codec_get_si_fn_t + NOT_IMPLEMENTED, // vpx_codec_decode_fn_t + NOT_IMPLEMENTED, // vpx_codec_frame_get_fn_t }, { // NOLINT - vp9e_usage_cfg_map, /* vpx_codec_enc_cfg_map_t peek_si; */ - vp9e_encode, /* vpx_codec_encode_fn_t encode; */ - vp9e_get_cxdata, /* vpx_codec_get_cx_data_fn_t frame_get; */ - vp9e_set_config, - NOT_IMPLEMENTED, - vp9e_get_preview, - } /* encoder functions */ + encoder_usage_cfg_map, // vpx_codec_enc_cfg_map_t + encoder_encode, // vpx_codec_encode_fn_t + encoder_get_cxdata, // vpx_codec_get_cx_data_fn_t + encoder_set_config, // vpx_codec_enc_config_set_fn_t + NOT_IMPLEMENTED, // vpx_codec_get_global_headers_fn_t + encoder_get_preview, // vpx_codec_get_preview_frame_fn_t + NOT_IMPLEMENTED , // vpx_codec_enc_mr_get_mem_loc_fn_t + } }; diff --git a/libvpx/vp9/vp9_dx_iface.c b/libvpx/vp9/vp9_dx_iface.c index 72701d9..5ed7484 100644 --- a/libvpx/vp9/vp9_dx_iface.c +++ b/libvpx/vp9/vp9_dx_iface.c @@ -8,44 +8,32 @@ * be found in the AUTHORS file in the root of the source tree. */ - #include <stdlib.h> #include <string.h> -#include "vpx/vpx_decoder.h" -#include "vpx/vp8dx.h" -#include "vpx/internal/vpx_codec_internal.h" + #include "./vpx_version.h" + +#include "vpx/internal/vpx_codec_internal.h" +#include "vpx/vp8dx.h" +#include "vpx/vpx_decoder.h" + #include "vp9/common/vp9_frame_buffers.h" + #include "vp9/decoder/vp9_decoder.h" #include "vp9/decoder/vp9_read_bit_buffer.h" + #include "vp9/vp9_iface_common.h" #define VP9_CAP_POSTPROC (CONFIG_VP9_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0) -typedef vpx_codec_stream_info_t vp9_stream_info_t; -/* Structures for handling memory allocations */ -typedef enum { - VP9_SEG_ALG_PRIV = 256, - VP9_SEG_MAX -} mem_seg_id_t; -#define NELEMENTS(x) ((int)(sizeof(x)/sizeof(x[0]))) - -static unsigned long priv_sz(const vpx_codec_dec_cfg_t *si, - vpx_codec_flags_t flags); - -static const mem_req_t vp9_mem_req_segs[] = { - {VP9_SEG_ALG_PRIV, 0, 8, VPX_CODEC_MEM_ZERO, priv_sz}, - {VP9_SEG_MAX, 0, 0, 0, NULL} -}; +typedef vpx_codec_stream_info_t vp9_stream_info_t; struct vpx_codec_alg_priv { vpx_codec_priv_t base; - vpx_codec_mmap_t mmaps[NELEMENTS(vp9_mem_req_segs) - 1]; vpx_codec_dec_cfg_t cfg; vp9_stream_info_t si; - int defer_alloc; int decoder_init; - struct VP9Decompressor *pbi; + struct VP9Decoder *pbi; int postproc_cfg_set; vp8_postproc_cfg_t postproc_cfg; #if CONFIG_POSTPROC_VISUALIZER @@ -66,86 +54,54 @@ struct vpx_codec_alg_priv { vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb; }; -static unsigned long priv_sz(const vpx_codec_dec_cfg_t *si, - vpx_codec_flags_t flags) { - /* Although this declaration is constant, we can't use it in the requested - * segments list because we want to define the requested segments list - * before defining the private type (so that the number of memory maps is - * known) - */ - (void)si; - return sizeof(vpx_codec_alg_priv_t); -} - -static void vp9_init_ctx(vpx_codec_ctx_t *ctx, const vpx_codec_mmap_t *mmap) { - int i; - - ctx->priv = (vpx_codec_priv_t *)mmap->base; - ctx->priv->sz = sizeof(*ctx->priv); - ctx->priv->iface = ctx->iface; - ctx->priv->alg_priv = (struct vpx_codec_alg_priv *)mmap->base; - - for (i = 0; i < NELEMENTS(ctx->priv->alg_priv->mmaps); i++) - ctx->priv->alg_priv->mmaps[i].id = vp9_mem_req_segs[i].id; - - ctx->priv->alg_priv->mmaps[0] = *mmap; - ctx->priv->alg_priv->si.sz = sizeof(ctx->priv->alg_priv->si); - ctx->priv->init_flags = ctx->init_flags; - - if (ctx->config.dec) { - /* Update the reference to the config structure to an internal copy. */ - ctx->priv->alg_priv->cfg = *ctx->config.dec; - ctx->config.dec = &ctx->priv->alg_priv->cfg; - } -} - -static void vp9_finalize_mmaps(vpx_codec_alg_priv_t *ctx) { - /* nothing to clean up */ -} - -static vpx_codec_err_t vp9_init(vpx_codec_ctx_t *ctx, - vpx_codec_priv_enc_mr_cfg_t *data) { - vpx_codec_err_t res = VPX_CODEC_OK; - +static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx, + vpx_codec_priv_enc_mr_cfg_t *data) { // This function only allocates space for the vpx_codec_alg_priv_t // structure. More memory may be required at the time the stream // information becomes known. if (!ctx->priv) { - vpx_codec_mmap_t mmap; - - mmap.id = vp9_mem_req_segs[0].id; - mmap.sz = sizeof(vpx_codec_alg_priv_t); - mmap.align = vp9_mem_req_segs[0].align; - mmap.flags = vp9_mem_req_segs[0].flags; - - res = vpx_mmap_alloc(&mmap); - if (!res) { - vp9_init_ctx(ctx, &mmap); - - ctx->priv->alg_priv->defer_alloc = 1; + vpx_codec_alg_priv_t *alg_priv = vpx_memalign(32, sizeof(*alg_priv)); + if (alg_priv == NULL) + return VPX_CODEC_MEM_ERROR; + + vp9_zero(*alg_priv); + + ctx->priv = (vpx_codec_priv_t *)alg_priv; + ctx->priv->sz = sizeof(*ctx->priv); + ctx->priv->iface = ctx->iface; + ctx->priv->alg_priv = alg_priv; + ctx->priv->alg_priv->si.sz = sizeof(ctx->priv->alg_priv->si); + ctx->priv->init_flags = ctx->init_flags; + + if (ctx->config.dec) { + // Update the reference to the config structure to an internal copy. + ctx->priv->alg_priv->cfg = *ctx->config.dec; + ctx->config.dec = &ctx->priv->alg_priv->cfg; } } - return res; + return VPX_CODEC_OK; } -static vpx_codec_err_t vp9_destroy(vpx_codec_alg_priv_t *ctx) { - int i; - - vp9_remove_decompressor(ctx->pbi); - - for (i = NELEMENTS(ctx->mmaps) - 1; i >= 0; i--) { - if (ctx->mmaps[i].dtor) - ctx->mmaps[i].dtor(&ctx->mmaps[i]); +static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) { + if (ctx->pbi) { + vp9_decoder_remove(ctx->pbi); + ctx->pbi = NULL; } + vpx_free(ctx); + return VPX_CODEC_OK; } -static vpx_codec_err_t vp9_peek_si(const uint8_t *data, unsigned int data_sz, - vpx_codec_stream_info_t *si) { - if (data_sz <= 8) return VPX_CODEC_UNSUP_BITSTREAM; - if (data + data_sz <= data) return VPX_CODEC_INVALID_PARAM; +static vpx_codec_err_t decoder_peek_si(const uint8_t *data, + unsigned int data_sz, + vpx_codec_stream_info_t *si) { + if (data_sz <= 8) + return VPX_CODEC_UNSUP_BITSTREAM; + + if (data + data_sz <= data) + return VPX_CODEC_INVALID_PARAM; si->is_kf = 0; si->w = si->h = 0; @@ -203,8 +159,8 @@ static vpx_codec_err_t vp9_peek_si(const uint8_t *data, unsigned int data_sz, return VPX_CODEC_OK; } -static vpx_codec_err_t vp9_get_si(vpx_codec_alg_priv_t *ctx, - vpx_codec_stream_info_t *si) { +static vpx_codec_err_t decoder_get_si(vpx_codec_alg_priv_t *ctx, + vpx_codec_stream_info_t *si) { const size_t sz = (si->sz >= sizeof(vp9_stream_info_t)) ? sizeof(vp9_stream_info_t) : sizeof(vpx_codec_stream_info_t); @@ -214,7 +170,6 @@ static vpx_codec_err_t vp9_get_si(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_OK; } - static vpx_codec_err_t update_error_state(vpx_codec_alg_priv_t *ctx, const struct vpx_internal_error_info *error) { if (error->error_code) @@ -223,149 +178,122 @@ static vpx_codec_err_t update_error_state(vpx_codec_alg_priv_t *ctx, return error->error_code; } -static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, - const uint8_t **data, unsigned int data_sz, - void *user_priv, int64_t deadline) { - vpx_codec_err_t res = VPX_CODEC_OK; +static void init_buffer_callbacks(vpx_codec_alg_priv_t *ctx) { + VP9_COMMON *const cm = &ctx->pbi->common; - ctx->img_avail = 0; + cm->new_fb_idx = -1; - /* Determine the stream parameters. Note that we rely on peek_si to - * validate that we have a buffer that does not wrap around the top - * of the heap. - */ - if (!ctx->si.h) - res = ctx->base.iface->dec.peek_si(*data, data_sz, &ctx->si); + if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) { + cm->get_fb_cb = ctx->get_ext_fb_cb; + cm->release_fb_cb = ctx->release_ext_fb_cb; + cm->cb_priv = ctx->ext_priv; + } else { + cm->get_fb_cb = vp9_get_frame_buffer; + cm->release_fb_cb = vp9_release_frame_buffer; + if (vp9_alloc_internal_frame_buffers(&cm->int_frame_buffers)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to initialize internal frame buffers"); - /* Perform deferred allocations, if required */ - if (!res && ctx->defer_alloc) { - int i; + cm->cb_priv = &cm->int_frame_buffers; + } +} - for (i = 1; !res && i < NELEMENTS(ctx->mmaps); i++) { - vpx_codec_dec_cfg_t cfg; +static void set_default_ppflags(vp8_postproc_cfg_t *cfg) { + cfg->post_proc_flag = VP8_DEBLOCK | VP8_DEMACROBLOCK; + cfg->deblocking_level = 4; + cfg->noise_level = 0; +} - cfg.w = ctx->si.w; - cfg.h = ctx->si.h; - ctx->mmaps[i].id = vp9_mem_req_segs[i].id; - ctx->mmaps[i].sz = vp9_mem_req_segs[i].sz; - ctx->mmaps[i].align = vp9_mem_req_segs[i].align; - ctx->mmaps[i].flags = vp9_mem_req_segs[i].flags; +static void set_ppflags(const vpx_codec_alg_priv_t *ctx, + vp9_ppflags_t *flags) { + flags->post_proc_flag = +#if CONFIG_POSTPROC_VISUALIZER + (ctx->dbg_color_ref_frame_flag ? VP9D_DEBUG_CLR_FRM_REF_BLKS : 0) | + (ctx->dbg_color_mb_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) | + (ctx->dbg_color_b_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) | + (ctx->dbg_display_mv_flag ? VP9D_DEBUG_DRAW_MV : 0) | +#endif + ctx->postproc_cfg.post_proc_flag; - if (!ctx->mmaps[i].sz) - ctx->mmaps[i].sz = vp9_mem_req_segs[i].calc_sz(&cfg, - ctx->base.init_flags); + flags->deblocking_level = ctx->postproc_cfg.deblocking_level; + flags->noise_level = ctx->postproc_cfg.noise_level; +#if CONFIG_POSTPROC_VISUALIZER + flags->display_ref_frame_flag = ctx->dbg_color_ref_frame_flag; + flags->display_mb_modes_flag = ctx->dbg_color_mb_modes_flag; + flags->display_b_modes_flag = ctx->dbg_color_b_modes_flag; + flags->display_mv_flag = ctx->dbg_display_mv_flag; +#endif +} - res = vpx_mmap_alloc(&ctx->mmaps[i]); - } +static void init_decoder(vpx_codec_alg_priv_t *ctx) { + VP9D_CONFIG oxcf; + oxcf.width = ctx->si.w; + oxcf.height = ctx->si.h; + oxcf.version = 9; + oxcf.max_threads = ctx->cfg.threads; + oxcf.inv_tile_order = ctx->invert_tile_order; - if (!res) - vp9_finalize_mmaps(ctx); + ctx->pbi = vp9_decoder_create(&oxcf); + if (ctx->pbi == NULL) + return; - ctx->defer_alloc = 0; - } + vp9_initialize_dec(); - /* Initialize the decoder instance on the first frame*/ - if (!res && !ctx->decoder_init) { - res = vpx_validate_mmaps(&ctx->si, ctx->mmaps, - vp9_mem_req_segs, NELEMENTS(vp9_mem_req_segs), - ctx->base.init_flags); - - if (!res) { - VP9D_CONFIG oxcf; - struct VP9Decompressor *optr; - - vp9_initialize_dec(); - - oxcf.width = ctx->si.w; - oxcf.height = ctx->si.h; - oxcf.version = 9; - oxcf.postprocess = 0; - oxcf.max_threads = ctx->cfg.threads; - oxcf.inv_tile_order = ctx->invert_tile_order; - optr = vp9_create_decompressor(&oxcf); - - // If postprocessing was enabled by the application and a - // configuration has not been provided, default it. - if (!ctx->postproc_cfg_set && - (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) { - ctx->postproc_cfg.post_proc_flag = VP8_DEBLOCK | VP8_DEMACROBLOCK; - ctx->postproc_cfg.deblocking_level = 4; - ctx->postproc_cfg.noise_level = 0; - } + // If postprocessing was enabled by the application and a + // configuration has not been provided, default it. + if (!ctx->postproc_cfg_set && + (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) + set_default_ppflags(&ctx->postproc_cfg); - if (!optr) { - res = VPX_CODEC_ERROR; - } else { - VP9D_COMP *const pbi = (VP9D_COMP*)optr; - VP9_COMMON *const cm = &pbi->common; + init_buffer_callbacks(ctx); +} - // Set index to not initialized. - cm->new_fb_idx = -1; +static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, + const uint8_t **data, unsigned int data_sz, + void *user_priv, int64_t deadline) { + YV12_BUFFER_CONFIG sd = { 0 }; + int64_t time_stamp = 0, time_end_stamp = 0; + vp9_ppflags_t flags = {0}; + VP9_COMMON *cm = NULL; - if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) { - cm->get_fb_cb = ctx->get_ext_fb_cb; - cm->release_fb_cb = ctx->release_ext_fb_cb; - cm->cb_priv = ctx->ext_priv; - } else { - cm->get_fb_cb = vp9_get_frame_buffer; - cm->release_fb_cb = vp9_release_frame_buffer; + ctx->img_avail = 0; - if (vp9_alloc_internal_frame_buffers(&cm->int_frame_buffers)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to initialize internal frame buffers"); - cm->cb_priv = &cm->int_frame_buffers; - } + // Determine the stream parameters. Note that we rely on peek_si to + // validate that we have a buffer that does not wrap around the top + // of the heap. + if (!ctx->si.h) { + const vpx_codec_err_t res = + ctx->base.iface->dec.peek_si(*data, data_sz, &ctx->si); + if (res != VPX_CODEC_OK) + return res; + } - ctx->pbi = optr; - } - } + // Initialize the decoder instance on the first frame + if (!ctx->decoder_init) { + init_decoder(ctx); + if (ctx->pbi == NULL) + return VPX_CODEC_ERROR; ctx->decoder_init = 1; } - if (!res && ctx->pbi) { - YV12_BUFFER_CONFIG sd; - int64_t time_stamp = 0, time_end_stamp = 0; - vp9_ppflags_t flags = {0}; + cm = &ctx->pbi->common; - if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) { - flags.post_proc_flag = -#if CONFIG_POSTPROC_VISUALIZER - (ctx->dbg_color_ref_frame_flag ? VP9D_DEBUG_CLR_FRM_REF_BLKS : 0) | - (ctx->dbg_color_mb_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) | - (ctx->dbg_color_b_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) | - (ctx->dbg_display_mv_flag ? VP9D_DEBUG_DRAW_MV : 0) | -#endif - ctx->postproc_cfg.post_proc_flag; + if (vp9_receive_compressed_data(ctx->pbi, data_sz, data, deadline)) + return update_error_state(ctx, &cm->error); - flags.deblocking_level = ctx->postproc_cfg.deblocking_level; - flags.noise_level = ctx->postproc_cfg.noise_level; -#if CONFIG_POSTPROC_VISUALIZER - flags.display_ref_frame_flag = ctx->dbg_color_ref_frame_flag; - flags.display_mb_modes_flag = ctx->dbg_color_mb_modes_flag; - flags.display_b_modes_flag = ctx->dbg_color_b_modes_flag; - flags.display_mv_flag = ctx->dbg_display_mv_flag; -#endif - } + if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) + set_ppflags(ctx, &flags); - if (vp9_receive_compressed_data(ctx->pbi, data_sz, data, deadline)) { - VP9D_COMP *pbi = (VP9D_COMP*)ctx->pbi; - res = update_error_state(ctx, &pbi->common.error); - } + if (vp9_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags)) + return update_error_state(ctx, &cm->error); - if (!res && 0 == vp9_get_raw_frame(ctx->pbi, &sd, &time_stamp, - &time_end_stamp, &flags)) { - VP9D_COMP *const pbi = (VP9D_COMP*)ctx->pbi; - VP9_COMMON *const cm = &pbi->common; - yuvconfig2image(&ctx->img, &sd, user_priv); + yuvconfig2image(&ctx->img, &sd, user_priv); + ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; + ctx->img_avail = 1; - ctx->img.fb_priv = cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; - ctx->img_avail = 1; - } - } - - return res; + return VPX_CODEC_OK; } static void parse_superframe_index(const uint8_t *data, size_t data_sz, @@ -384,7 +312,7 @@ static void parse_superframe_index(const uint8_t *data, size_t data_sz, if (data_sz >= index_sz && data[data_sz - index_sz] == marker) { // found a valid superframe index uint32_t i, j; - const uint8_t *x = data + data_sz - index_sz + 1; + const uint8_t *x = &data[data_sz - index_sz + 1]; for (i = 0; i < frames; i++) { uint32_t this_sz = 0; @@ -399,18 +327,17 @@ static void parse_superframe_index(const uint8_t *data, size_t data_sz, } } -static vpx_codec_err_t vp9_decode(vpx_codec_alg_priv_t *ctx, - const uint8_t *data, - unsigned int data_sz, - void *user_priv, - long deadline) { +static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx, + const uint8_t *data, unsigned int data_sz, + void *user_priv, long deadline) { const uint8_t *data_start = data; const uint8_t *data_end = data + data_sz; vpx_codec_err_t res = VPX_CODEC_OK; uint32_t sizes[8]; int frames_this_pts, frame_count = 0; - if (data == NULL || data_sz == 0) return VPX_CODEC_INVALID_PARAM; + if (data == NULL || data_sz == 0) + return VPX_CODEC_INVALID_PARAM; parse_superframe_index(data, data_sz, sizes, &frames_this_pts); @@ -449,27 +376,27 @@ static vpx_codec_err_t vp9_decode(vpx_codec_alg_priv_t *ctx, assert(data_start >= data); assert(data_start <= data_end); - /* Early exit if there was a decode error */ + // Early exit if there was a decode error if (res) break; - /* Account for suboptimal termination by the encoder. */ + // Account for suboptimal termination by the encoder. while (data_start < data_end && *data_start == 0) data_start++; data_sz = (unsigned int)(data_end - data_start); } while (data_start < data_end); + return res; } -static vpx_image_t *vp9_get_frame(vpx_codec_alg_priv_t *ctx, - vpx_codec_iter_t *iter) { +static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, + vpx_codec_iter_t *iter) { vpx_image_t *img = NULL; if (ctx->img_avail) { - /* iter acts as a flip flop, so an image is only returned on the first - * call to get_frame. - */ + // iter acts as a flip flop, so an image is only returned on the first + // call to get_frame. if (!(*iter)) { img = &ctx->img; *iter = img; @@ -480,7 +407,7 @@ static vpx_image_t *vp9_get_frame(vpx_codec_alg_priv_t *ctx, return img; } -static vpx_codec_err_t vp9_set_fb_fn( +static vpx_codec_err_t decoder_set_fb_fn( vpx_codec_alg_priv_t *ctx, vpx_get_frame_buffer_cb_fn_t cb_get, vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) { @@ -498,93 +425,24 @@ static vpx_codec_err_t vp9_set_fb_fn( return VPX_CODEC_ERROR; } -static vpx_codec_err_t vp9_xma_get_mmap(const vpx_codec_ctx_t *ctx, - vpx_codec_mmap_t *mmap, - vpx_codec_iter_t *iter) { - vpx_codec_err_t res; - const mem_req_t *seg_iter = (const mem_req_t *)*iter; - - /* Get address of next segment request */ - do { - if (!seg_iter) - seg_iter = vp9_mem_req_segs; - else if (seg_iter->id != VP9_SEG_MAX) - seg_iter++; - - *iter = (vpx_codec_iter_t)seg_iter; - - if (seg_iter->id != VP9_SEG_MAX) { - mmap->id = seg_iter->id; - mmap->sz = seg_iter->sz; - mmap->align = seg_iter->align; - mmap->flags = seg_iter->flags; - - if (!seg_iter->sz) - mmap->sz = seg_iter->calc_sz(ctx->config.dec, ctx->init_flags); - - res = VPX_CODEC_OK; - } else { - res = VPX_CODEC_LIST_END; - } - } while (!mmap->sz && res != VPX_CODEC_LIST_END); - - return res; -} - -static vpx_codec_err_t vp9_xma_set_mmap(vpx_codec_ctx_t *ctx, - const vpx_codec_mmap_t *mmap) { - vpx_codec_err_t res = VPX_CODEC_MEM_ERROR; - int i, done; - - if (!ctx->priv) { - if (mmap->id == VP9_SEG_ALG_PRIV) { - if (!ctx->priv) { - vp9_init_ctx(ctx, mmap); - res = VPX_CODEC_OK; - } - } - } - - done = 1; - - if (!res && ctx->priv->alg_priv) { - for (i = 0; i < NELEMENTS(ctx->priv->alg_priv->mmaps); i++) { - if (ctx->priv->alg_priv->mmaps[i].id == mmap->id) - if (!ctx->priv->alg_priv->mmaps[i].base) { - ctx->priv->alg_priv->mmaps[i] = *mmap; - res = VPX_CODEC_OK; - } - - done &= (ctx->priv->alg_priv->mmaps[i].base != NULL); - } - } - - if (done && !res) { - vp9_finalize_mmaps(ctx->priv->alg_priv); - res = ctx->iface->init(ctx, NULL); - } - - return res; -} - -static vpx_codec_err_t set_reference(vpx_codec_alg_priv_t *ctx, int ctr_id, - va_list args) { - vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); +static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { + vpx_ref_frame_t *const data = va_arg(args, vpx_ref_frame_t *); if (data) { - vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data; + vpx_ref_frame_t *const frame = (vpx_ref_frame_t *)data; YV12_BUFFER_CONFIG sd; image2yuvconfig(&frame->img, &sd); - return vp9_set_reference_dec(ctx->pbi, + return vp9_set_reference_dec(&ctx->pbi->common, (VP9_REFFRAME)frame->frame_type, &sd); } else { return VPX_CODEC_INVALID_PARAM; } } -static vpx_codec_err_t copy_reference(vpx_codec_alg_priv_t *ctx, int ctr_id, - va_list args) { +static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); if (data) { @@ -600,8 +458,8 @@ static vpx_codec_err_t copy_reference(vpx_codec_alg_priv_t *ctx, int ctr_id, } } -static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx, int ctr_id, - va_list args) { +static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *); if (data) { @@ -615,8 +473,8 @@ static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx, int ctr_id, } } -static vpx_codec_err_t set_postproc(vpx_codec_alg_priv_t *ctx, int ctr_id, - va_list args) { +static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { #if CONFIG_VP9_POSTPROC vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *); @@ -632,8 +490,8 @@ static vpx_codec_err_t set_postproc(vpx_codec_alg_priv_t *ctx, int ctr_id, #endif } -static vpx_codec_err_t set_dbg_options(vpx_codec_alg_priv_t *ctx, int ctrl_id, - va_list args) { +static vpx_codec_err_t ctrl_set_dbg_options(vpx_codec_alg_priv_t *ctx, + int ctrl_id, va_list args) { #if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC int data = va_arg(args, int); @@ -652,14 +510,15 @@ static vpx_codec_err_t set_dbg_options(vpx_codec_alg_priv_t *ctx, int ctrl_id, #endif } -static vpx_codec_err_t get_last_ref_updates(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { - int *update_info = va_arg(args, int *); - VP9D_COMP *pbi = (VP9D_COMP*)ctx->pbi; +static vpx_codec_err_t ctrl_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, + int ctrl_id, va_list args) { + int *const update_info = va_arg(args, int *); if (update_info) { - *update_info = pbi->refresh_frame_flags; - + if (ctx->pbi) + *update_info = ctx->pbi->refresh_frame_flags; + else + return VPX_CODEC_ERROR; return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; @@ -667,14 +526,13 @@ static vpx_codec_err_t get_last_ref_updates(vpx_codec_alg_priv_t *ctx, } -static vpx_codec_err_t get_frame_corrupted(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { +static vpx_codec_err_t ctrl_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, + int ctrl_id, va_list args) { int *corrupted = va_arg(args, int *); if (corrupted) { - VP9D_COMP *pbi = (VP9D_COMP*)ctx->pbi; - if (pbi) - *corrupted = pbi->common.frame_to_show->corrupted; + if (ctx->pbi) + *corrupted = ctx->pbi->common.frame_to_show->corrupted; else return VPX_CODEC_ERROR; return VPX_CODEC_OK; @@ -683,15 +541,15 @@ static vpx_codec_err_t get_frame_corrupted(vpx_codec_alg_priv_t *ctx, } } -static vpx_codec_err_t get_display_size(vpx_codec_alg_priv_t *ctx, - int ctrl_id, va_list args) { +static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx, + int ctrl_id, va_list args) { int *const display_size = va_arg(args, int *); if (display_size) { - const VP9D_COMP *const pbi = (VP9D_COMP*)ctx->pbi; - if (pbi) { - display_size[0] = pbi->common.display_width; - display_size[1] = pbi->common.display_height; + if (ctx->pbi) { + const VP9_COMMON *const cm = &ctx->pbi->common; + display_size[0] = cm->display_width; + display_size[1] = cm->display_height; } else { return VPX_CODEC_ERROR; } @@ -701,30 +559,33 @@ static vpx_codec_err_t get_display_size(vpx_codec_alg_priv_t *ctx, } } -static vpx_codec_err_t set_invert_tile_order(vpx_codec_alg_priv_t *ctx, - int ctr_id, - va_list args) { +static vpx_codec_err_t ctrl_set_invert_tile_order(vpx_codec_alg_priv_t *ctx, + int ctr_id, va_list args) { ctx->invert_tile_order = va_arg(args, int); return VPX_CODEC_OK; } -static vpx_codec_ctrl_fn_map_t ctf_maps[] = { - {VP8_SET_REFERENCE, set_reference}, - {VP8_COPY_REFERENCE, copy_reference}, - {VP8_SET_POSTPROC, set_postproc}, - {VP8_SET_DBG_COLOR_REF_FRAME, set_dbg_options}, - {VP8_SET_DBG_COLOR_MB_MODES, set_dbg_options}, - {VP8_SET_DBG_COLOR_B_MODES, set_dbg_options}, - {VP8_SET_DBG_DISPLAY_MV, set_dbg_options}, - {VP8D_GET_LAST_REF_UPDATES, get_last_ref_updates}, - {VP8D_GET_FRAME_CORRUPTED, get_frame_corrupted}, - {VP9_GET_REFERENCE, get_reference}, - {VP9D_GET_DISPLAY_SIZE, get_display_size}, - {VP9_INVERT_TILE_DECODE_ORDER, set_invert_tile_order}, +static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = { + {VP8_COPY_REFERENCE, ctrl_copy_reference}, + + // Setters + {VP8_SET_REFERENCE, ctrl_set_reference}, + {VP8_SET_POSTPROC, ctrl_set_postproc}, + {VP8_SET_DBG_COLOR_REF_FRAME, ctrl_set_dbg_options}, + {VP8_SET_DBG_COLOR_MB_MODES, ctrl_set_dbg_options}, + {VP8_SET_DBG_COLOR_B_MODES, ctrl_set_dbg_options}, + {VP8_SET_DBG_DISPLAY_MV, ctrl_set_dbg_options}, + {VP9_INVERT_TILE_DECODE_ORDER, ctrl_set_invert_tile_order}, + + // Getters + {VP8D_GET_LAST_REF_UPDATES, ctrl_get_last_ref_updates}, + {VP8D_GET_FRAME_CORRUPTED, ctrl_get_frame_corrupted}, + {VP9_GET_REFERENCE, ctrl_get_reference}, + {VP9D_GET_DISPLAY_SIZE, ctrl_get_display_size}, + { -1, NULL}, }; - #ifndef VERSION_STRING #define VERSION_STRING #endif @@ -732,22 +593,20 @@ CODEC_INTERFACE(vpx_codec_vp9_dx) = { "WebM Project VP9 Decoder" VERSION_STRING, VPX_CODEC_INTERNAL_ABI_VERSION, VPX_CODEC_CAP_DECODER | VP9_CAP_POSTPROC | - VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER, - /* vpx_codec_caps_t caps; */ - vp9_init, /* vpx_codec_init_fn_t init; */ - vp9_destroy, /* vpx_codec_destroy_fn_t destroy; */ - ctf_maps, /* vpx_codec_ctrl_fn_map_t *ctrl_maps; */ - vp9_xma_get_mmap, /* vpx_codec_get_mmap_fn_t get_mmap; */ - vp9_xma_set_mmap, /* vpx_codec_set_mmap_fn_t set_mmap; */ + VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER, // vpx_codec_caps_t + decoder_init, // vpx_codec_init_fn_t + decoder_destroy, // vpx_codec_destroy_fn_t + decoder_ctrl_maps, // vpx_codec_ctrl_fn_map_t + NOT_IMPLEMENTED, // vpx_codec_get_mmap_fn_t + NOT_IMPLEMENTED, // vpx_codec_set_mmap_fn_t { // NOLINT - vp9_peek_si, /* vpx_codec_peek_si_fn_t peek_si; */ - vp9_get_si, /* vpx_codec_get_si_fn_t get_si; */ - vp9_decode, /* vpx_codec_decode_fn_t decode; */ - vp9_get_frame, /* vpx_codec_frame_get_fn_t frame_get; */ - vp9_set_fb_fn, /* vpx_codec_set_fb_fn_t set_fb_fn; */ + decoder_peek_si, // vpx_codec_peek_si_fn_t + decoder_get_si, // vpx_codec_get_si_fn_t + decoder_decode, // vpx_codec_decode_fn_t + decoder_get_frame, // vpx_codec_frame_get_fn_t + decoder_set_fb_fn, // vpx_codec_set_fb_fn_t }, { // NOLINT - /* encoder functions */ NOT_IMPLEMENTED, NOT_IMPLEMENTED, NOT_IMPLEMENTED, diff --git a/libvpx/vp9/vp9cx.mk b/libvpx/vp9/vp9cx.mk index b14e7e5..da6c0f8 100644 --- a/libvpx/vp9/vp9cx.mk +++ b/libvpx/vp9/vp9cx.mk @@ -30,6 +30,7 @@ VP9_CX_SRCS-yes += encoder/vp9_firstpass.c VP9_CX_SRCS-yes += encoder/vp9_block.h VP9_CX_SRCS-yes += encoder/vp9_writer.h VP9_CX_SRCS-yes += encoder/vp9_writer.c +VP9_CX_SRCS-yes += encoder/vp9_write_bit_buffer.c VP9_CX_SRCS-yes += encoder/vp9_write_bit_buffer.h VP9_CX_SRCS-yes += encoder/vp9_bitstream.h VP9_CX_SRCS-yes += encoder/vp9_encodemb.h @@ -59,19 +60,24 @@ VP9_CX_SRCS-yes += encoder/vp9_pickmode.c VP9_CX_SRCS-yes += encoder/vp9_sad.c VP9_CX_SRCS-yes += encoder/vp9_segmentation.c VP9_CX_SRCS-yes += encoder/vp9_segmentation.h +VP9_CX_SRCS-yes += encoder/vp9_speed_features.c +VP9_CX_SRCS-yes += encoder/vp9_speed_features.h VP9_CX_SRCS-yes += encoder/vp9_subexp.c VP9_CX_SRCS-yes += encoder/vp9_subexp.h VP9_CX_SRCS-yes += encoder/vp9_svc_layercontext.c VP9_CX_SRCS-yes += encoder/vp9_resize.c VP9_CX_SRCS-yes += encoder/vp9_resize.h VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.c +VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_ssim.h VP9_CX_SRCS-yes += encoder/vp9_tokenize.c VP9_CX_SRCS-yes += encoder/vp9_treewriter.c VP9_CX_SRCS-yes += encoder/vp9_variance.c -VP9_CX_SRCS-yes += encoder/vp9_vaq.c -VP9_CX_SRCS-yes += encoder/vp9_vaq.h -VP9_CX_SRCS-yes += encoder/vp9_craq.c -VP9_CX_SRCS-yes += encoder/vp9_craq.h +VP9_CX_SRCS-yes += encoder/vp9_aq_variance.c +VP9_CX_SRCS-yes += encoder/vp9_aq_variance.h +VP9_CX_SRCS-yes += encoder/vp9_aq_cyclicrefresh.c +VP9_CX_SRCS-yes += encoder/vp9_aq_cyclicrefresh.h +VP9_CX_SRCS-yes += encoder/vp9_aq_complexity.c +VP9_CX_SRCS-yes += encoder/vp9_aq_complexity.h ifeq ($(CONFIG_VP9_POSTPROC),yes) VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.h VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.c @@ -89,6 +95,7 @@ VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_sad_mmx.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_impl_sse2.asm VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm +VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_sad4d_intrin_avx2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance_impl_sse2.asm VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm diff --git a/libvpx/vpx/src/svc_encodeframe.c b/libvpx/vpx/src/svc_encodeframe.c index d48a761..76aacd2 100644 --- a/libvpx/vpx/src/svc_encodeframe.c +++ b/libvpx/vpx/src/svc_encodeframe.c @@ -524,9 +524,6 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, svc_ctx->spatial_layers); return VPX_CODEC_INVALID_PARAM; } - // use SvcInternal value for number of layers to enable forcing single layer - // for first frame - si->layers = svc_ctx->spatial_layers; res = parse_quantizer_values(svc_ctx, si->quantizers, 0); if (res != VPX_CODEC_OK) return res; @@ -538,10 +535,13 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, res = parse_scale_factors(svc_ctx, si->scale_factors); if (res != VPX_CODEC_OK) return res; - // parse aggregate command line options + // Parse aggregate command line options. Options must start with + // "layers=xx" then followed by other options res = parse_options(svc_ctx, si->options); if (res != VPX_CODEC_OK) return res; + si->layers = svc_ctx->spatial_layers; + // Assign target bitrate for each layer. We calculate the ratio // from the resolution for now. // TODO(Minghai): Optimize the mechanism of allocating bits after @@ -583,8 +583,12 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, enc_cfg->rc_dropframe_thresh = 0; enc_cfg->rc_end_usage = VPX_CBR; enc_cfg->rc_resize_allowed = 0; - enc_cfg->rc_min_quantizer = 33; - enc_cfg->rc_max_quantizer = 33; + + if (enc_cfg->g_pass == VPX_RC_ONE_PASS) { + enc_cfg->rc_min_quantizer = 33; + enc_cfg->rc_max_quantizer = 33; + } + enc_cfg->rc_undershoot_pct = 100; enc_cfg->rc_overshoot_pct = 15; enc_cfg->rc_buf_initial_sz = 500; @@ -784,12 +788,17 @@ static void set_svc_parameters(SvcContext *svc_ctx, } layer_index = layer + VPX_SS_MAX_LAYERS - si->layers; - if (vpx_svc_is_keyframe(svc_ctx)) { - svc_params.min_quantizer = si->quantizer_keyframe[layer_index]; - svc_params.max_quantizer = si->quantizer_keyframe[layer_index]; + if (codec_ctx->config.enc->g_pass == VPX_RC_ONE_PASS) { + if (vpx_svc_is_keyframe(svc_ctx)) { + svc_params.min_quantizer = si->quantizer_keyframe[layer_index]; + svc_params.max_quantizer = si->quantizer_keyframe[layer_index]; + } else { + svc_params.min_quantizer = si->quantizer[layer_index]; + svc_params.max_quantizer = si->quantizer[layer_index]; + } } else { - svc_params.min_quantizer = si->quantizer[layer_index]; - svc_params.max_quantizer = si->quantizer[layer_index]; + svc_params.min_quantizer = codec_ctx->config.enc->rc_min_quantizer; + svc_params.max_quantizer = codec_ctx->config.enc->rc_max_quantizer; } svc_params.distance_from_i_frame = si->frame_within_gop; diff --git a/libvpx/vpx/vp8cx.h b/libvpx/vpx/vp8cx.h index 0b637d4..8944a26 100644 --- a/libvpx/vpx/vp8cx.h +++ b/libvpx/vpx/vp8cx.h @@ -192,6 +192,7 @@ enum vp8e_enc_control_id { VP9E_SET_TILE_ROWS, VP9E_SET_FRAME_PARALLEL_DECODING, VP9E_SET_AQ_MODE, + VP9E_SET_FRAME_PERIODIC_BOOST, VP9E_SET_SVC, VP9E_SET_SVC_PARAMETERS, @@ -364,6 +365,8 @@ VPX_CTRL_USE_TYPE(VP9E_SET_FRAME_PARALLEL_DECODING, unsigned int) VPX_CTRL_USE_TYPE(VP9E_SET_AQ_MODE, unsigned int) +VPX_CTRL_USE_TYPE(VP9E_SET_FRAME_PERIODIC_BOOST, unsigned int) + /*! @} - end defgroup vp8_encoder */ #ifdef __cplusplus } // extern "C" diff --git a/libvpx/vpxenc.c b/libvpx/vpxenc.c index 1cd5e92..00d3e3e 100644 --- a/libvpx/vpxenc.c +++ b/libvpx/vpxenc.c @@ -400,13 +400,17 @@ static const arg_def_t frame_parallel_decoding = ARG_DEF( NULL, "frame-parallel", 1, "Enable frame parallel decodability features"); static const arg_def_t aq_mode = ARG_DEF( NULL, "aq-mode", 1, - "Adaptive q mode (0: off (by default), 1: variance 2: complexity)"); + "Adaptive q mode (0: off (by default), 1: variance 2: complexity, " + "3: cyclic refresh)"); +static const arg_def_t frame_periodic_boost = ARG_DEF( + NULL, "frame_boost", 1, + "Enable frame periodic boost (0: off (by default), 1: on)"); static const arg_def_t *vp9_args[] = { &cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh, &tile_cols, &tile_rows, &arnr_maxframes, &arnr_strength, &arnr_type, &tune_ssim, &cq_level, &max_intra_rate_pct, &lossless, - &frame_parallel_decoding, &aq_mode, + &frame_parallel_decoding, &aq_mode, &frame_periodic_boost, NULL }; static const int vp9_arg_ctrl_map[] = { @@ -416,6 +420,7 @@ static const int vp9_arg_ctrl_map[] = { VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE, VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT, VP9E_SET_LOSSLESS, VP9E_SET_FRAME_PARALLEL_DECODING, VP9E_SET_AQ_MODE, + VP9E_SET_FRAME_PERIODIC_BOOST, 0 }; #endif @@ -1498,7 +1503,7 @@ static void print_time(const char *label, int64_t etl) { etl -= mins * 60; secs = etl; - fprintf(stderr, "[%3s %2"PRId64":%02"PRId64": % 02"PRId64"] ", + fprintf(stderr, "[%3s %2"PRId64":%02"PRId64":%02"PRId64"] ", label, hours, mins, secs); } else { fprintf(stderr, "[%3s unknown] ", label); |