h264: Implement local cache of the latest decoded pictures

The libva only provides the reference images needed to decode the current picture, but not the full DPB. However, some codecs need that whole DPB in order to decode a picture. For example, the Allwinner hardware codec has an internal SRAM, with each picture getting a slot in that SRAM, and during each decoding process, some metadata will then be generated from that SRAM content to a separate buffer. Therefore, each frames must be located at the same SRAM position each time so that the metadata are then re-used properly. However, since libva will only pass a few reference images, we can end up in a situation where multiple, subsequent, frames will have the same reference images set, but might all be used as reference later on and cannot therefore be located at the same position. And from a more theorical point of view, Linux expects a full blown DPB in its H264 control. In order to work around this, we can create a shadow of the DPB by simply maintaining a list of 16 decoded images, each associated with their VAPictureH264 and an age. This age is the last time we used that frame as reference. When a new picture is decoded, either we assign it to a free slot, or we reuse the slot from the frame that hasn't been used as a reference for the longest time. This is a much simpler approach than the one documented in the H264 spec, but this shouldn't really be a problem since we don't handle the reference frames ourselves, but just re-use the one from the libva, and taken from the bitstream before. As such, frames that are not supposed to be used for reference will not be anymore, their age will not increase, and therefore after a while we will garbage-collect their slot to store a much newer frame. Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
author: Maxime Ripard <maxime.ripard@bootlin.com> 2018-07-16 14:28:25 +0200
committer: Maxime Ripard <maxime.ripard@bootlin.com> 2018-07-17 15:30:33 +0200
commit: e7c09a336f74a688e6fb3609afd8c2687dee03c4 (patch)
tree: 924966b8a778a0cec122b45c66423ec2fca70f68 /src
parent: dadb3d344f5327b0e27c559f46051ca522887077 (diff)
download: libva-v4l2-request-e7c09a336f74a688e6fb3609afd8c2687dee03c4.tar.gz
libva-v4l2-request-e7c09a336f74a688e6fb3609afd8c2687dee03c4.tar.bz2
libva-v4l2-request-e7c09a336f74a688e6fb3609afd8c2687dee03c4.zip
4 files changed, 220 insertions, 36 deletions
diff --git a/src/context.c b/src/context.c
index 9712ad1..524c6a7 100644
--- a/src/context.c
+++ b/src/context.c
@@ -74,6 +74,7 @@ VAStatus SunxiCedrusCreateContext(VADriverContextP context,
 		status = VA_STATUS_ERROR_ALLOCATION_FAILED;
 		goto error;
 	}
+	memset(&context_object->dpb, 0, sizeof(context_object->dpb));
 
 	switch (config_object->profile) {
 	case VAProfileMPEG2Simple:
diff --git a/src/context.h b/src/context.h
index eeba6d5..5fcb91c 100644
--- a/src/context.h
+++ b/src/context.h
@@ -29,6 +29,7 @@
 #include <va/va_backend.h>
 
 #include "object_heap.h"
+#include "h264.h"
 
 #define CONTEXT(data, id)                                                      \
 	((struct object_context *)object_heap_lookup(&(data)->context_heap, id))
@@ -45,6 +46,9 @@ struct object_context {
 	int picture_width;
 	int picture_height;
 	int flags;
+
+	/* H264 only */
+	struct h264_dpb dpb;
 };
 
 VAStatus SunxiCedrusCreateContext(VADriverContextP context,
diff --git a/src/h264.c b/src/h264.c
index 8b2fae8..9514bb2 100644
--- a/src/h264.c
+++ b/src/h264.c
@@ -25,6 +25,7 @@
  */
 
 #include <assert.h>
+#include <limits.h>
 #include <string.h>
 
 #include <sys/ioctl.h>
@@ -41,19 +42,167 @@ enum h264_slice_type {
 	H264_SLICE_B    = 1,
 };
 
-static int h264_lookup_ref_pic(VAPictureParameterBufferH264 *VAPicture,
-			       unsigned int frame_num)
+static bool is_picture_null(VAPictureH264 *pic)
 {
-	int i;
+	return pic->picture_id == VA_INVALID_SURFACE;
+}
+
+static struct h264_dpb_entry *
+dpb_find_invalid_entry(struct object_context *context)
+{
+	unsigned int i;
+
+	for (i = 0; i < H264_DPB_SIZE; i++) {
+		struct h264_dpb_entry *entry = &context->dpb.entries[i];
+
+		if (!entry->valid && !entry->reserved)
+			return entry;
+	}
+
+	return NULL;
+}
+
+static struct h264_dpb_entry *
+dpb_find_oldest_unused_entry(struct object_context *context)
+{
+	unsigned int min_age = UINT_MAX;
+	unsigned int i;
+	struct h264_dpb_entry *match = NULL;
+
+	for (i = 0; i < H264_DPB_SIZE; i++) {
+		struct h264_dpb_entry *entry = &context->dpb.entries[i];
+
+		if (!entry->used && (entry->age < min_age)) {
+			min_age = entry->age;
+			match = entry;
+		}
+	}
+
+	return match;
+}
+
+static struct h264_dpb_entry *dpb_find_entry(struct object_context *context)
+{
+	struct h264_dpb_entry *entry;
+
+	entry = dpb_find_invalid_entry(context);
+	if (!entry)
+		entry = dpb_find_oldest_unused_entry(context);
+
+	return entry;
+}
 
-	for (i = 0; i < VAPicture->num_ref_frames; i++) {
-		VAPictureH264 *pic = &VAPicture->ReferenceFrames[i];
+static struct h264_dpb_entry *dpb_lookup(struct object_context *context,
+					 VAPictureH264 *pic, unsigned int *idx)
+{
+	unsigned int i;
+
+	for (i = 0; i < H264_DPB_SIZE; i++) {
+		struct h264_dpb_entry *entry = &context->dpb.entries[i];
+
+		if (!entry->valid)
+			continue;
 
-		if (frame_num == pic->frame_idx)
-			return i;
+		if (entry->pic.picture_id == pic->picture_id) {
+			if (idx)
+				*idx = i;
+
+			return entry;
+		}
 	}
 
-	return 0;
+	return NULL;
+}
+
+static void dpb_clear_entry(struct h264_dpb_entry *entry, bool reserved)
+{
+	memset(entry, 0, sizeof(*entry));
+
+	if (reserved)
+		entry->reserved = true;
+}
+
+static void dpb_insert(struct object_context *context, VAPictureH264 *pic,
+		       struct h264_dpb_entry *entry)
+{
+	if (is_picture_null(pic))
+		return;
+
+	if (dpb_lookup(context, pic, NULL))
+		return;
+
+	if (!entry)
+		entry = dpb_find_entry(context);
+
+	memcpy(&entry->pic, pic, sizeof(entry->pic));
+	entry->age = context->dpb.age;
+	entry->valid = true;
+	entry->reserved = false;
+
+	if (!(pic->flags & VA_PICTURE_H264_INVALID))
+		entry->used = true;
+}
+
+static void dpb_update(struct object_context *context,
+		       VAPictureParameterBufferH264 *parameters)
+{
+	unsigned int i;
+
+	context->dpb.age++;
+
+	for (i = 0; i < H264_DPB_SIZE; i++) {
+		struct h264_dpb_entry *entry = &context->dpb.entries[i];
+
+		entry->used = false;
+	}
+
+	for (i = 0; i < parameters->num_ref_frames; i++) {
+		VAPictureH264 *pic = &parameters->ReferenceFrames[i];
+		struct h264_dpb_entry *entry;
+
+		if (is_picture_null(pic))
+			continue;
+
+		entry = dpb_lookup(context, pic, NULL);
+		if (entry) {
+			entry->age = context->dpb.age;
+			entry->used = true;
+		} else {
+			dpb_insert(context, pic, NULL);
+		}
+	}
+}
+
+static void h264_fill_dpb(struct cedrus_data *data,
+			  struct object_context *context,
+			  struct v4l2_ctrl_h264_decode_param *decode)
+{
+	int i;
+
+	for (i = 0; i < H264_DPB_SIZE; i++) {
+		struct v4l2_h264_dpb_entry *dpb = &decode->dpb[i];
+		struct h264_dpb_entry *entry = &context->dpb.entries[i];
+		struct object_surface *surface =
+			SURFACE(data, entry->pic.picture_id);
+
+		if (!entry->valid)
+			continue;
+
+		if (surface)
+			dpb->buf_index = surface->destination_index;
+
+		dpb->frame_num = entry->pic.frame_idx;
+		dpb->top_field_order_cnt = entry->pic.TopFieldOrderCnt;
+		dpb->bottom_field_order_cnt = entry->pic.BottomFieldOrderCnt;
+
+		dpb->flags = V4L2_H264_DPB_ENTRY_FLAG_VALID;
+
+		if (entry->used)
+			dpb->flags |= V4L2_H264_DPB_ENTRY_FLAG_ACTIVE;
+
+		if (entry->pic.flags & VA_PICTURE_H264_LONG_TERM_REFERENCE)
+			dpb->flags |= V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM;
+	}
 }
 
 static void h264_va_picture_to_v4l2(struct cedrus_data *driver_data,
@@ -63,32 +212,12 @@ static void h264_va_picture_to_v4l2(struct cedrus_data *driver_data,
 				    struct v4l2_ctrl_h264_pps *pps,
 				    struct v4l2_ctrl_h264_sps *sps)
 {
-	int i;
+	h264_fill_dpb(driver_data, context, decode);
 
 	decode->num_slices = VAPicture->num_ref_frames;
 	decode->top_field_order_cnt = VAPicture->CurrPic.TopFieldOrderCnt;
 	decode->bottom_field_order_cnt = VAPicture->CurrPic.BottomFieldOrderCnt;
 
-	for (i = 0; i < VAPicture->num_ref_frames; i++) {
-		struct v4l2_h264_dpb_entry *dpb = &decode->dpb[i];
-		VAPictureH264 *pic = &VAPicture->ReferenceFrames[i];
-		struct object_surface *surface_object =
-			SURFACE(driver_data, pic->picture_id);
-
-		if (surface_object)
-			dpb->buf_index = surface_object->destination_index;
-
-		dpb->frame_num = pic->frame_idx;
-		dpb->top_field_order_cnt = pic->TopFieldOrderCnt;
-		dpb->bottom_field_order_cnt = pic->BottomFieldOrderCnt;
-
-		if (pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE)
-			dpb->flags |= V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM;
-
-		if (!(pic->flags & VA_PICTURE_H264_INVALID))
-			dpb->flags |= V4L2_H264_DPB_ENTRY_FLAG_ACTIVE;
-	}
-
 	pps->weighted_bipred_idc =
 		VAPicture->pic_fields.bits.weighted_bipred_idc;
 	pps->pic_init_qs_minus26 = VAPicture->pic_init_qs_minus26;
@@ -172,7 +301,6 @@ static void h264_va_slice_to_v4l2(struct cedrus_data *driver_data,
 				  struct v4l2_ctrl_h264_slice_param *slice)
 {
 	struct v4l2_h264_weight_factors *factors;
-	int i;
 
 	slice->size = VASlice->slice_data_size;
 	slice->header_bit_size = VASlice->slice_data_bit_offset;
@@ -187,21 +315,41 @@ static void h264_va_slice_to_v4l2(struct cedrus_data *driver_data,
 
 	if (((VASlice->slice_type % 5) == H264_SLICE_P) ||
 	    ((VASlice->slice_type % 5) == H264_SLICE_B)) {
+		unsigned int i;
+
 		slice->num_ref_idx_l0_active_minus1 =
 			VASlice->num_ref_idx_l0_active_minus1;
 
-		for (i = 0; i < VASlice->num_ref_idx_l0_active_minus1 + 1; i++)
-			slice->ref_pic_list0[i] = h264_lookup_ref_pic(
-				VAPicture, VASlice->RefPicList0[i].frame_idx);
+		for (i = 0; i < VASlice->num_ref_idx_l0_active_minus1 + 1; i++) {
+			VAPictureH264 *pic = &VASlice->RefPicList0[i];
+			struct h264_dpb_entry *entry;
+			unsigned int idx;
+
+			entry = dpb_lookup(context, pic, &idx);
+			if (!entry)
+				continue;
+
+			slice->ref_pic_list0[i] = idx;
+		}
 	}
 
 	if ((VASlice->slice_type % 5) == H264_SLICE_B) {
+		unsigned int i;
+
 		slice->num_ref_idx_l1_active_minus1 =
 			VASlice->num_ref_idx_l1_active_minus1;
 
-		for (i = 0; i < VASlice->num_ref_idx_l1_active_minus1 + 1; i++)
-			slice->ref_pic_list1[i] = h264_lookup_ref_pic(
-				VAPicture, VASlice->RefPicList1[i].frame_idx);
+		for (i = 0; i < VASlice->num_ref_idx_l1_active_minus1 + 1; i++) {
+			VAPictureH264 *pic = &VASlice->RefPicList1[i];
+			struct h264_dpb_entry *entry;
+			unsigned int idx;
+
+			entry = dpb_lookup(context, pic, &idx);
+			if (!entry)
+				continue;
+
+			slice->ref_pic_list1[i] = idx;
+		}
 	}
 
 	if (VASlice->direct_spatial_mv_pred_flag)
@@ -242,8 +390,18 @@ int h264_set_controls(struct cedrus_data *driver_data,
 	struct v4l2_ctrl_h264_slice_param slice = { 0 };
 	struct v4l2_ctrl_h264_pps pps = { 0 };
 	struct v4l2_ctrl_h264_sps sps = { 0 };
+	struct h264_dpb_entry *output;
 	int rc;
 
+	output = dpb_lookup(context, &surface->params.h264.picture.CurrPic,
+			    NULL);
+	if (!output)
+		output = dpb_find_entry(context);
+
+	dpb_clear_entry(output, true);
+
+	dpb_update(context, &surface->params.h264.picture);
+
 	h264_va_picture_to_v4l2(driver_data, context,
 				&surface->params.h264.picture,
 				&decode, &pps, &sps);
@@ -281,5 +439,7 @@ int h264_set_controls(struct cedrus_data *driver_data,
 	if (rc < 0)
 		return VA_STATUS_ERROR_OPERATION_FAILED;
 
+	dpb_insert(context, &surface->params.h264.picture.CurrPic, output);
+
 	return VA_STATUS_SUCCESS;
 }
diff --git a/src/h264.h b/src/h264.h
index acf4c33..00583bf 100644
--- a/src/h264.h
+++ b/src/h264.h
@@ -27,10 +27,29 @@
 #ifndef _H264_H_
 #define _H264_H_
 
+#include <stdbool.h>
+
+#include <va/va.h>
+
 struct object_context;
 struct object_surface;
 struct cedrus_data;
 
+#define H264_DPB_SIZE 16
+
+struct h264_dpb_entry {
+	VAPictureH264 pic;
+	unsigned int age;
+	bool used;
+	bool valid;
+	bool reserved;
+};
+
+struct h264_dpb {
+	struct h264_dpb_entry entries[H264_DPB_SIZE];
+	unsigned int age;
+};
+
 int h264_set_controls(struct cedrus_data *data,
 		      struct object_context *context,
 		      struct object_surface *surface);
author	Maxime Ripard <maxime.ripard@bootlin.com>	2018-07-16 14:28:25 +0200
committer	Maxime Ripard <maxime.ripard@bootlin.com>	2018-07-17 15:30:33 +0200
commit	e7c09a336f74a688e6fb3609afd8c2687dee03c4 (patch)
tree	924966b8a778a0cec122b45c66423ec2fca70f68 /src
parent	dadb3d344f5327b0e27c559f46051ca522887077 (diff)
download	libva-v4l2-request-e7c09a336f74a688e6fb3609afd8c2687dee03c4.tar.gz libva-v4l2-request-e7c09a336f74a688e6fb3609afd8c2687dee03c4.tar.bz2 libva-v4l2-request-e7c09a336f74a688e6fb3609afd8c2687dee03c4.zip