diff options
author | Chirayu Desai <chirayudesai1@gmail.com> | 2012-07-12 14:37:05 +0530 |
---|---|---|
committer | Chirayu Desai <chirayudesai1@gmail.com> | 2012-08-18 14:52:44 +0530 |
commit | 0a336cc1f20ec04f5af90cc455a769b8cc3138ea (patch) | |
tree | 35d50d8a1080e359e90c0ae062371b4208baaa75 /exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video | |
parent | f1587a360fb72dffce67917f3dbc8144165014da (diff) | |
download | android_hardware_samsung-0a336cc1f20ec04f5af90cc455a769b8cc3138ea.tar.gz android_hardware_samsung-0a336cc1f20ec04f5af90cc455a769b8cc3138ea.tar.bz2 android_hardware_samsung-0a336cc1f20ec04f5af90cc455a769b8cc3138ea.zip |
exynos3: initial commit
Patch Set 2: removed unused files,
as audio, camera and sensors are device specific
Patch Set 3: aries OMX
Patch Set 5: liblight patch for epic
Patch Set 9: liblight(s) is device specific, rmed it from here.
Change-Id: I57a3d5714037836bab441ee5a3e772c260fb21d4
Diffstat (limited to 'exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video')
15 files changed, 5972 insertions, 0 deletions
diff --git a/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/Android.mk b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/Android.mk new file mode 100644 index 0000000..4106a68 --- /dev/null +++ b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/Android.mk @@ -0,0 +1,36 @@ + +LOCAL_PATH := $(call my-dir) +include $(CLEAR_VARS) + +LOCAL_MODULE_TAGS := optional + +ifeq ($(ARCH_ARM_HAVE_NEON),true) +LOCAL_SRC_FILES := \ + csc_yuv420_nv12t_y_neon.s \ + csc_yuv420_nv12t_uv_neon.s \ + csc_nv12t_yuv420_y_neon.s \ + csc_nv12t_yuv420_uv_neon.s \ + csc_interleave_memcpy.s \ + csc_deinterleave_memcpy.s + +else +LOCAL_SRC_FILES := \ + color_space_convertor.c + +endif + +LOCAL_MODULE := libseccsc + +LOCAL_CFLAGS := + +LOCAL_ARM_MODE := arm + +LOCAL_STATIC_LIBRARIES := + +LOCAL_SHARED_LIBRARIES := liblog + +LOCAL_C_INCLUDES := \ + $(SEC_CODECS)/video/mfc_c110/include + +include $(BUILD_STATIC_LIBRARY) + diff --git a/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/color_space_convertor.c b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/color_space_convertor.c new file mode 100644 index 0000000..c1ac638 --- /dev/null +++ b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/color_space_convertor.c @@ -0,0 +1,1092 @@ +/* + * + * Copyright 2011 Samsung Electronics S.LSI Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @file color_space_convertor.c + * @brief SEC_OMX specific define + * @author ShinWon Lee (shinwon.lee@samsung.com) + * @version 1.0 + * @history + * 2011.7.01 : Create + */ + +#include "stdlib.h" +#include "color_space_convertor.h" + +#define TILED_SIZE 64*32 + +/* + * De-interleaves src to dest1, dest2 + * + * @param dest1 + * Address of de-interleaved data[out] + * + * @param dest2 + * Address of de-interleaved data[out] + * + * @param src + * Address of interleaved data[in] + * + * @param src_size + * Size of interleaved data[in] + */ +void csc_deinterleave_memcpy(char *dest1, char *dest2, char *src, int src_size) +{ + int i = 0; + for(i=0; i<src_size/2; i++) { + dest1[i] = src[i*2]; + dest2[i] = src[i*2+1]; + } +} + +/* + * Interleaves src1, src2 to dest + * + * @param dest + * Address of interleaved data[out] + * + * @param src1 + * Address of de-interleaved data[in] + * + * @param src2 + * Address of de-interleaved data[in] + * + * @param src_size + * Size of de-interleaved data[in] + */ +void csc_interleave_memcpy(char *dest, char *src1, char *src2, int src_size) +{ + int i = 0; + for(i=0; i<src_size; i++) { + dest[i*2] = src1[i]; + dest[i*2+1] = src2[i]; + } +} + +/* + * Converts tiled data to linear. + * 1. Y of NV12T to Y of YUV420P + * 2. Y of NV12T to Y of YUV420S + * 3. UV of NV12T to UV of YUV420S + * + * @param yuv420_dest + * Y or UV plane address of YUV420[out] + * + * @param nv12t_src + * Y or UV plane address of NV12T[in] + * + * @param yuv420_width + * Width of YUV420[in] + * + * @param yuv420_height + * Y: Height of YUV420, UV: Height/2 of YUV420[in] + */ +void csc_tiled_to_linear(char *yuv420_dest, char *nv12t_src, int yuv420_width, int yuv420_height) +{ + unsigned int i, j; + unsigned int tiled_x_index = 0, tiled_y_index = 0; + unsigned int aligned_x_size = 0; + unsigned int tiled_offset = 0, tiled_offset1 = 0, tiled_offset2 = 0, tiled_offset3 = 0; + unsigned int temp1 = 0, temp2 = 0; + + if (yuv420_width >= 1024) { + for (i=0; i<yuv420_height; i=i+1) { + tiled_offset = 0; + tiled_y_index = i>>5; + if (tiled_y_index & 0x1) { + /* odd fomula: 2+x_block_num*(y-1) */ + tiled_offset = tiled_y_index-1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset*(temp1>>6); + tiled_offset = tiled_offset+2; + tiled_offset = tiled_offset<<11; + tiled_offset1 = tiled_offset+2048*1; + tiled_offset2 = tiled_offset+2048*2; + tiled_offset3 = tiled_offset+2048*3; + temp2 = 8; + } else { + temp2 = ((yuv420_height+31)>>5)<<5; + /* even fomula: x_block_num*y */ + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset<<11; + if ((i+32)<temp2) { + tiled_offset1 = tiled_offset+2048*1; + tiled_offset2 = tiled_offset+2048*6; + tiled_offset3 = tiled_offset+2048*7; + temp2 = 8; + } else { + tiled_offset1 = tiled_offset+2048*1; + tiled_offset2 = tiled_offset+2048*2; + tiled_offset3 = tiled_offset+2048*3; + temp2 = 4; + } + } + temp1 = i&0x1F; + memcpy(yuv420_dest+yuv420_width*(i), nv12t_src+tiled_offset+64*(temp1), 64); + memcpy(yuv420_dest+yuv420_width*(i)+64*1, nv12t_src+tiled_offset1+64*(temp1), 64); + memcpy(yuv420_dest+yuv420_width*(i)+64*2, nv12t_src+tiled_offset2+64*(temp1), 64); + memcpy(yuv420_dest+yuv420_width*(i)+64*3, nv12t_src+tiled_offset3+64*(temp1), 64); + + tiled_offset = tiled_offset+temp2*2048; + tiled_offset1 = tiled_offset1+temp2*2048; + tiled_offset2 = tiled_offset2+temp2*2048; + tiled_offset3 = tiled_offset3+temp2*2048; + memcpy(yuv420_dest+yuv420_width*(i)+64*4, nv12t_src+tiled_offset+64*(temp1), 64); + memcpy(yuv420_dest+yuv420_width*(i)+64*5, nv12t_src+tiled_offset1+64*(temp1), 64); + memcpy(yuv420_dest+yuv420_width*(i)+64*6, nv12t_src+tiled_offset2+64*(temp1), 64); + memcpy(yuv420_dest+yuv420_width*(i)+64*7, nv12t_src+tiled_offset3+64*(temp1), 64); + + tiled_offset = tiled_offset+temp2*2048; + tiled_offset1 = tiled_offset1+temp2*2048; + tiled_offset2 = tiled_offset2+temp2*2048; + tiled_offset3 = tiled_offset3+temp2*2048; + memcpy(yuv420_dest+yuv420_width*(i)+64*8, nv12t_src+tiled_offset+64*(temp1), 64); + memcpy(yuv420_dest+yuv420_width*(i)+64*9, nv12t_src+tiled_offset1+64*(temp1), 64); + memcpy(yuv420_dest+yuv420_width*(i)+64*10, nv12t_src+tiled_offset2+64*(temp1), 64); + memcpy(yuv420_dest+yuv420_width*(i)+64*11, nv12t_src+tiled_offset3+64*(temp1), 64); + + tiled_offset = tiled_offset+temp2*2048; + tiled_offset1 = tiled_offset1+temp2*2048; + tiled_offset2 = tiled_offset2+temp2*2048; + tiled_offset3 = tiled_offset3+temp2*2048; + memcpy(yuv420_dest+yuv420_width*(i)+64*12, nv12t_src+tiled_offset+64*(temp1), 64); + memcpy(yuv420_dest+yuv420_width*(i)+64*13, nv12t_src+tiled_offset1+64*(temp1), 64); + memcpy(yuv420_dest+yuv420_width*(i)+64*14, nv12t_src+tiled_offset2+64*(temp1), 64); + memcpy(yuv420_dest+yuv420_width*(i)+64*15, nv12t_src+tiled_offset3+64*(temp1), 64); + } + aligned_x_size = 1024; + } + + if ((yuv420_width-aligned_x_size) >= 512) { + for (i=0; i<yuv420_height; i=i+1) { + tiled_offset = 0; + tiled_y_index = i>>5; + if (tiled_y_index & 0x1) { + /* odd fomula: 2+x_block_num*(y-1) */ + tiled_offset = tiled_y_index-1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset*(temp1>>6); + tiled_offset = tiled_offset+2; + temp1 = aligned_x_size>>5; + tiled_offset = tiled_offset+temp1; + tiled_offset = tiled_offset<<11; + tiled_offset1 = tiled_offset+2048*1; + tiled_offset2 = tiled_offset+2048*2; + tiled_offset3 = tiled_offset+2048*3; + temp2 = 8; + } else { + temp2 = ((yuv420_height+31)>>5)<<5; + /* even fomula: x_block_num*y */ + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset<<11; + if ((i+32)<temp2) { + temp1 = aligned_x_size>>5; + tiled_offset = tiled_offset+(temp1<<11); + tiled_offset1 = tiled_offset+2048*1; + tiled_offset2 = tiled_offset+2048*6; + tiled_offset3 = tiled_offset+2048*7; + temp2 = 8; + } else { + temp1 = aligned_x_size>>6; + tiled_offset = tiled_offset+(temp1<<11); + tiled_offset1 = tiled_offset+2048*1; + tiled_offset2 = tiled_offset+2048*2; + tiled_offset3 = tiled_offset+2048*3; + temp2 = 4; + } + } + temp1 = i&0x1F; + memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i), nv12t_src+tiled_offset+64*(temp1), 64); + memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64*1, nv12t_src+tiled_offset1+64*(temp1), 64); + memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64*2, nv12t_src+tiled_offset2+64*(temp1), 64); + memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64*3, nv12t_src+tiled_offset3+64*(temp1), 64); + + tiled_offset = tiled_offset+temp2*2048; + tiled_offset1 = tiled_offset1+temp2*2048; + tiled_offset2 = tiled_offset2+temp2*2048; + tiled_offset3 = tiled_offset3+temp2*2048; + memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64*4, nv12t_src+tiled_offset+64*(temp1), 64); + memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64*5, nv12t_src+tiled_offset1+64*(temp1), 64); + memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64*6, nv12t_src+tiled_offset2+64*(temp1), 64); + memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64*7, nv12t_src+tiled_offset3+64*(temp1), 64); + } + aligned_x_size = aligned_x_size+512; + } + + if ((yuv420_width-aligned_x_size) >= 256) { + for (i=0; i<yuv420_height; i=i+1) { + tiled_offset = 0; + tiled_y_index = i>>5; + if (tiled_y_index & 0x1) { + /* odd fomula: 2+x_block_num*(y-1) */ + tiled_offset = tiled_y_index-1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset*(temp1>>6); + tiled_offset = tiled_offset+2; + temp1 = aligned_x_size>>5; + tiled_offset = tiled_offset+temp1; + tiled_offset = tiled_offset<<11; + tiled_offset1 = tiled_offset+2048*1; + tiled_offset2 = tiled_offset+2048*2; + tiled_offset3 = tiled_offset+2048*3; + } else { + temp2 = ((yuv420_height+31)>>5)<<5; + /* even fomula: x_block_num*y */ + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset<<11; + if ((i+32)<temp2) { + temp1 = aligned_x_size>>5; + tiled_offset = tiled_offset+(temp1<<11); + tiled_offset1 = tiled_offset+2048*1; + tiled_offset2 = tiled_offset+2048*6; + tiled_offset3 = tiled_offset+2048*7; + } else { + temp1 = aligned_x_size>>6; + tiled_offset = tiled_offset+(temp1<<11); + tiled_offset1 = tiled_offset+2048*1; + tiled_offset2 = tiled_offset+2048*2; + tiled_offset3 = tiled_offset+2048*3; + } + } + temp1 = i&0x1F; + memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i), nv12t_src+tiled_offset+64*(temp1), 64); + memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64*1, nv12t_src+tiled_offset1+64*(temp1), 64); + memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64*2, nv12t_src+tiled_offset2+64*(temp1), 64); + memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64*3, nv12t_src+tiled_offset3+64*(temp1), 64); + } + aligned_x_size = aligned_x_size+256; + } + + if ((yuv420_width-aligned_x_size) >= 128) { + for (i=0; i<yuv420_height; i=i+2) { + tiled_offset = 0; + tiled_y_index = i>>5; + if (tiled_y_index & 0x1) { + /* odd fomula: 2+x_block_num*(y-1) */ + tiled_offset = tiled_y_index-1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset*(temp1>>6); + tiled_offset = tiled_offset+2; + temp1 = aligned_x_size>>5; + tiled_offset = tiled_offset+temp1; + tiled_offset = tiled_offset<<11; + tiled_offset1 = tiled_offset+2048*1; + } else { + temp2 = ((yuv420_height+31)>>5)<<5; + /* even fomula: x_block_num*y */ + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset<<11; + if ((i+32)<temp2) { + temp1 = aligned_x_size>>5; + tiled_offset = tiled_offset+(temp1<<11); + tiled_offset1 = tiled_offset+2048*1; + } else { + temp1 = aligned_x_size>>6; + tiled_offset = tiled_offset+(temp1<<11); + tiled_offset1 = tiled_offset+2048*1; + } + } + temp1 = i&0x1F; + memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i), nv12t_src+tiled_offset+64*(temp1), 64); + memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64, nv12t_src+tiled_offset1+64*(temp1), 64); + memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i+1), nv12t_src+tiled_offset+64*(temp1+1), 64); + memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i+1)+64, nv12t_src+tiled_offset1+64*(temp1+1), 64); + } + aligned_x_size = aligned_x_size+128; + } + + if ((yuv420_width-aligned_x_size) >= 64) { + for (i=0; i<yuv420_height; i=i+4) { + tiled_offset = 0; + tiled_x_index = aligned_x_size>>6; + tiled_y_index = i>>5; + if (tiled_y_index & 0x1) { + /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */ + tiled_offset = tiled_y_index-1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset*(temp1>>6); + tiled_offset = tiled_offset+tiled_x_index; + tiled_offset = tiled_offset+2; + temp1 = (tiled_x_index>>2)<<2; + tiled_offset = tiled_offset+temp1; + tiled_offset = tiled_offset<<11; + } else { + temp2 = ((yuv420_height+31)>>5)<<5; + if ((i+32)<temp2) { + /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */ + temp1 = tiled_x_index+2; + temp1 = (temp1>>2)<<2; + tiled_offset = tiled_x_index+temp1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset+tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset<<11; + } else { + /* even2 fomula: x+x_block_num*y */ + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset+tiled_x_index; + tiled_offset = tiled_offset<<11; + } + } + + temp1 = i&0x1F; + temp2 = aligned_x_size&0x3F; + memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i), nv12t_src+tiled_offset+temp2+64*(temp1), 64); + memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i+1), nv12t_src+tiled_offset+temp2+64*(temp1+1), 64); + memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i+2), nv12t_src+tiled_offset+temp2+64*(temp1+2), 64); + memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i+3), nv12t_src+tiled_offset+temp2+64*(temp1+3), 64); + } + aligned_x_size = aligned_x_size+64; + } + + if (yuv420_width != aligned_x_size) { + for (i=0; i<yuv420_height; i=i+4) { + for (j=aligned_x_size; j<yuv420_width; j=j+4) { + tiled_offset = 0; + tiled_x_index = j>>6; + tiled_y_index = i>>5; + if (tiled_y_index & 0x1) { + /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */ + tiled_offset = tiled_y_index-1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset*(temp1>>6); + tiled_offset = tiled_offset+tiled_x_index; + tiled_offset = tiled_offset+2; + temp1 = (tiled_x_index>>2)<<2; + tiled_offset = tiled_offset+temp1; + tiled_offset = tiled_offset<<11; + } else { + temp2 = ((yuv420_height+31)>>5)<<5; + if ((i+32)<temp2) { + /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */ + temp1 = tiled_x_index+2; + temp1 = (temp1>>2)<<2; + tiled_offset = tiled_x_index+temp1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset+tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset<<11; + } else { + /* even2 fomula: x+x_block_num*y */ + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset+tiled_x_index; + tiled_offset = tiled_offset<<11; + } + } + + temp1 = i&0x1F; + temp2 = j&0x3F; + memcpy(yuv420_dest+j+yuv420_width*(i), nv12t_src+tiled_offset+temp2+64*(temp1), 4); + memcpy(yuv420_dest+j+yuv420_width*(i+1), nv12t_src+tiled_offset+temp2+64*(temp1+1), 4); + memcpy(yuv420_dest+j+yuv420_width*(i+2), nv12t_src+tiled_offset+temp2+64*(temp1+2), 4); + memcpy(yuv420_dest+j+yuv420_width*(i+3), nv12t_src+tiled_offset+temp2+64*(temp1+3), 4); + } + } + } +} + +/* + * Converts and Deinterleaves tiled data to linear + * 1. UV of NV12T to UV of YUV420P + * + * @param yuv420_u_dest + * U plane address of YUV420P[out] + * + * @param yuv420_v_dest + * V plane address of YUV420P[out] + * + * @param nv12t_src + * UV plane address of NV12T[in] + * + * @param yuv420_width + * Width of YUV420[in] + * + * @param yuv420_uv_height + * Height/2 of YUV420[in] + */ +void csc_tiled_to_linear_deinterleave(char *yuv420_u_dest, char *yuv420_v_dest, char *nv12t_uv_src, int yuv420_width, int yuv420_uv_height) +{ + unsigned int i, j; + unsigned int tiled_x_index = 0, tiled_y_index = 0; + unsigned int aligned_x_size = 0; + unsigned int tiled_offset = 0, tiled_offset1 = 0, tiled_offset2 = 0, tiled_offset3 = 0; + unsigned int temp1 = 0, temp2 = 0; + + if (yuv420_width >= 1024) { + for (i=0; i<yuv420_uv_height; i=i+1) { + tiled_offset = 0; + tiled_y_index = i>>5; + if (tiled_y_index & 0x1) { + /* odd fomula: 2+x_block_num*(y-1) */ + tiled_offset = tiled_y_index-1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset*(temp1>>6); + tiled_offset = tiled_offset+2; + tiled_offset = tiled_offset<<11; + tiled_offset1 = tiled_offset+2048*1; + tiled_offset2 = tiled_offset+2048*2; + tiled_offset3 = tiled_offset+2048*3; + temp2 = 8; + } else { + temp2 = ((yuv420_uv_height+31)>>5)<<5; + /* even fomula: x_block_num*y */ + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset<<11; + if ((i+32)<temp2) { + tiled_offset1 = tiled_offset+2048*1; + tiled_offset2 = tiled_offset+2048*6; + tiled_offset3 = tiled_offset+2048*7; + temp2 = 8; + } else { + tiled_offset1 = tiled_offset+2048*1; + tiled_offset2 = tiled_offset+2048*2; + tiled_offset3 = tiled_offset+2048*3; + temp2 = 4; + } + } + temp1 = i&0x1F; + csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i), yuv420_v_dest+yuv420_width/2*(i), nv12t_uv_src+tiled_offset+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*1, yuv420_v_dest+yuv420_width/2*(i)+32*1, nv12t_uv_src+tiled_offset1+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*2, yuv420_v_dest+yuv420_width/2*(i)+32*2, nv12t_uv_src+tiled_offset2+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*3, yuv420_v_dest+yuv420_width/2*(i)+32*3, nv12t_uv_src+tiled_offset3+64*(temp1), 64); + + tiled_offset = tiled_offset+temp2*2048; + tiled_offset1 = tiled_offset1+temp2*2048; + tiled_offset2 = tiled_offset2+temp2*2048; + tiled_offset3 = tiled_offset3+temp2*2048; + csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*4, yuv420_v_dest+yuv420_width/2*(i)+32*4, nv12t_uv_src+tiled_offset+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*5, yuv420_v_dest+yuv420_width/2*(i)+32*5, nv12t_uv_src+tiled_offset1+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*6, yuv420_v_dest+yuv420_width/2*(i)+32*6, nv12t_uv_src+tiled_offset2+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*7, yuv420_v_dest+yuv420_width/2*(i)+32*7, nv12t_uv_src+tiled_offset3+64*(temp1), 64); + + tiled_offset = tiled_offset+temp2*2048; + tiled_offset1 = tiled_offset1+temp2*2048; + tiled_offset2 = tiled_offset2+temp2*2048; + tiled_offset3 = tiled_offset3+temp2*2048; + csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*8, yuv420_v_dest+yuv420_width/2*(i)+32*8, nv12t_uv_src+tiled_offset+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*9, yuv420_v_dest+yuv420_width/2*(i)+32*9, nv12t_uv_src+tiled_offset1+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*10, yuv420_v_dest+yuv420_width/2*(i)+32*10, nv12t_uv_src+tiled_offset2+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*11, yuv420_v_dest+yuv420_width/2*(i)+32*11, nv12t_uv_src+tiled_offset3+64*(temp1), 64); + + tiled_offset = tiled_offset+temp2*2048; + tiled_offset1 = tiled_offset1+temp2*2048; + tiled_offset2 = tiled_offset2+temp2*2048; + tiled_offset3 = tiled_offset3+temp2*2048; + csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*12, yuv420_v_dest+yuv420_width/2*(i)+32*12, nv12t_uv_src+tiled_offset+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*13, yuv420_v_dest+yuv420_width/2*(i)+32*13, nv12t_uv_src+tiled_offset1+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*14, yuv420_v_dest+yuv420_width/2*(i)+32*14, nv12t_uv_src+tiled_offset2+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*15, yuv420_v_dest+yuv420_width/2*(i)+32*15, nv12t_uv_src+tiled_offset3+64*(temp1), 64); + } + aligned_x_size = 1024; + } + + if ((yuv420_width-aligned_x_size) >= 512) { + for (i=0; i<yuv420_uv_height; i=i+1) { + tiled_offset = 0; + tiled_y_index = i>>5; + if (tiled_y_index & 0x1) { + /* odd fomula: 2+x_block_num*(y-1) */ + tiled_offset = tiled_y_index-1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset*(temp1>>6); + tiled_offset = tiled_offset+2; + temp1 = aligned_x_size>>5; + tiled_offset = tiled_offset+temp1; + tiled_offset = tiled_offset<<11; + tiled_offset1 = tiled_offset+2048*1; + tiled_offset2 = tiled_offset+2048*2; + tiled_offset3 = tiled_offset+2048*3; + temp2 = 8; + } else { + temp2 = ((yuv420_uv_height+31)>>5)<<5; + /* even fomula: x_block_num*y */ + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset<<11; + if ((i+32)<temp2) { + temp1 = aligned_x_size>>5; + tiled_offset = tiled_offset+(temp1<<11); + tiled_offset1 = tiled_offset+2048*1; + tiled_offset2 = tiled_offset+2048*6; + tiled_offset3 = tiled_offset+2048*7; + temp2 = 8; + } else { + temp1 = aligned_x_size>>6; + tiled_offset = tiled_offset+(temp1<<11); + tiled_offset1 = tiled_offset+2048*1; + tiled_offset2 = tiled_offset+2048*2; + tiled_offset3 = tiled_offset+2048*3; + temp2 = 4; + } + } + temp1 = i&0x1F; + csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i), yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i), nv12t_uv_src+tiled_offset+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*1, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*1, nv12t_uv_src+tiled_offset1+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*2, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*2, nv12t_uv_src+tiled_offset2+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*3, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*3, nv12t_uv_src+tiled_offset3+64*(temp1), 64); + + tiled_offset = tiled_offset+temp2*2048; + tiled_offset1 = tiled_offset1+temp2*2048; + tiled_offset2 = tiled_offset2+temp2*2048; + tiled_offset3 = tiled_offset3+temp2*2048; + csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*4, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*4, nv12t_uv_src+tiled_offset+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*5, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*5, nv12t_uv_src+tiled_offset1+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*6, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*6, nv12t_uv_src+tiled_offset2+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*7, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*7, nv12t_uv_src+tiled_offset3+64*(temp1), 64); + } + aligned_x_size = aligned_x_size+512; + } + + if ((yuv420_width-aligned_x_size) >= 256) { + for (i=0; i<yuv420_uv_height; i=i+1) { + tiled_offset = 0; + tiled_y_index = i>>5; + if (tiled_y_index & 0x1) { + /* odd fomula: 2+x_block_num*(y-1) */ + tiled_offset = tiled_y_index-1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset*(temp1>>6); + tiled_offset = tiled_offset+2; + temp1 = aligned_x_size>>5; + tiled_offset = tiled_offset+temp1; + tiled_offset = tiled_offset<<11; + tiled_offset1 = tiled_offset+2048*1; + tiled_offset2 = tiled_offset+2048*2; + tiled_offset3 = tiled_offset+2048*3; + } else { + temp2 = ((yuv420_uv_height+31)>>5)<<5; + /* even fomula: x_block_num*y */ + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset<<11; + if ((i+32)<temp2) { + temp1 = aligned_x_size>>5; + tiled_offset = tiled_offset+(temp1<<11); + tiled_offset1 = tiled_offset+2048*1; + tiled_offset2 = tiled_offset+2048*6; + tiled_offset3 = tiled_offset+2048*7; + } else { + temp1 = aligned_x_size>>6; + tiled_offset = tiled_offset+(temp1<<11); + tiled_offset1 = tiled_offset+2048*1; + tiled_offset2 = tiled_offset+2048*2; + tiled_offset3 = tiled_offset+2048*3; + } + } + temp1 = i&0x1F; + csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i), yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i), nv12t_uv_src+tiled_offset+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*1, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*1, nv12t_uv_src+tiled_offset1+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*2, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*2, nv12t_uv_src+tiled_offset2+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*3, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*3, nv12t_uv_src+tiled_offset3+64*(temp1), 64); + } + aligned_x_size = aligned_x_size+256; + } + + if ((yuv420_width-aligned_x_size) >= 128) { + for (i=0; i<yuv420_uv_height; i=i+2) { + tiled_offset = 0; + tiled_y_index = i>>5; + if (tiled_y_index & 0x1) { + /* odd fomula: 2+x_block_num*(y-1) */ + tiled_offset = tiled_y_index-1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset*(temp1>>6); + tiled_offset = tiled_offset+2; + temp1 = aligned_x_size>>5; + tiled_offset = tiled_offset+temp1; + tiled_offset = tiled_offset<<11; + tiled_offset1 = tiled_offset+2048*1; + } else { + temp2 = ((yuv420_uv_height+31)>>5)<<5; + /* even fomula: x_block_num*y */ + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset<<11; + if ((i+32)<temp2) { + temp1 = aligned_x_size>>5; + tiled_offset = tiled_offset+(temp1<<11); + tiled_offset1 = tiled_offset+2048*1; + } else { + temp1 = aligned_x_size>>6; + tiled_offset = tiled_offset+(temp1<<11); + tiled_offset1 = tiled_offset+2048*1; + } + } + temp1 = i&0x1F; + csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i), yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i), nv12t_uv_src+tiled_offset+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*1, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*1, nv12t_uv_src+tiled_offset1+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i+1), yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i+1), nv12t_uv_src+tiled_offset+64*(temp1+1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i+1)+32*1, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i+1)+32*1, nv12t_uv_src+tiled_offset1+64*(temp1+1), 64); + } + aligned_x_size = aligned_x_size+128; + } + + if ((yuv420_width-aligned_x_size) >= 64) { + for (i=0; i<yuv420_uv_height; i=i+2) { + tiled_offset = 0; + tiled_x_index = aligned_x_size>>6; + tiled_y_index = i>>5; + if (tiled_y_index & 0x1) { + /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */ + tiled_offset = tiled_y_index-1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset*(temp1>>6); + tiled_offset = tiled_offset+tiled_x_index; + tiled_offset = tiled_offset+2; + temp1 = (tiled_x_index>>2)<<2; + tiled_offset = tiled_offset+temp1; + tiled_offset = tiled_offset<<11; + } else { + temp2 = ((yuv420_uv_height+31)>>5)<<5; + if ((i+32)<temp2) { + /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */ + temp1 = tiled_x_index+2; + temp1 = (temp1>>2)<<2; + tiled_offset = tiled_x_index+temp1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset+tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset<<11; + } else { + /* even2 fomula: x+x_block_num*y */ + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset+tiled_x_index; + tiled_offset = tiled_offset<<11; + } + } + temp1 = i&0x1F; + temp2 = aligned_x_size&0x3F; + csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i), yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i), nv12t_uv_src+tiled_offset+64*(temp1), 64); + csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i+1), yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i+1), nv12t_uv_src+tiled_offset+64*(temp1+1), 64); + } + aligned_x_size = aligned_x_size+64; + } + + if (yuv420_width != aligned_x_size) { + for (i=0; i<yuv420_uv_height; i=i+2) { + for (j=aligned_x_size; j<yuv420_width; j=j+4) { + tiled_offset = 0; + tiled_x_index = j>>6; + tiled_y_index = i>>5; + if (tiled_y_index & 0x1) { + /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */ + tiled_offset = tiled_y_index-1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset*(temp1>>6); + tiled_offset = tiled_offset+tiled_x_index; + tiled_offset = tiled_offset+2; + temp1 = (tiled_x_index>>2)<<2; + tiled_offset = tiled_offset+temp1; + tiled_offset = tiled_offset<<11; + } else { + temp2 = ((yuv420_uv_height+31)>>5)<<5; + if ((i+32)<temp2) { + /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */ + temp1 = tiled_x_index+2; + temp1 = (temp1>>2)<<2; + tiled_offset = tiled_x_index+temp1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset+tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset<<11; + } else { + /* even2 fomula: x+x_block_num*y */ + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset+tiled_x_index; + tiled_offset = tiled_offset<<11; + } + } + temp1 = i&0x1F; + temp2 = j&0x3F; + csc_deinterleave_memcpy(yuv420_u_dest+j/2+yuv420_width/2*(i), yuv420_v_dest+j/2+yuv420_width/2*(i), nv12t_uv_src+tiled_offset+temp2+64*(temp1), 4); + csc_deinterleave_memcpy(yuv420_u_dest+j/2+yuv420_width/2*(i+1), yuv420_v_dest+j/2+yuv420_width/2*(i+1), nv12t_uv_src+tiled_offset+temp2+64*(temp1+1), 4); + } + } + } +} + +/* + * Converts linear data to tiled. + * 1. Y of YUV420P to Y of NV12T + * 2. Y of YUV420S to Y of NV12T + * 3. UV of YUV420S to UV of NV12T + * + * @param nv12t_dest + * Y or UV plane address of NV12T[out] + * + * @param yuv420_src + * Y or UV plane address of YUV420P(S)[in] + * + * @param yuv420_width + * Width of YUV420[in] + * + * @param yuv420_height + * Y: Height of YUV420, UV: Height/2 of YUV420[in] + */ +void csc_linear_to_tiled(char *nv12t_dest, char *yuv420_src, int yuv420_width, int yuv420_height) +{ + unsigned int i, j; + unsigned int tiled_x_index = 0, tiled_y_index = 0; + unsigned int aligned_x_size = 0, aligned_y_size = 0; + unsigned int tiled_offset = 0; + unsigned int temp1 = 0, temp2 = 0; + + aligned_y_size = (yuv420_height>>5)<<5; + aligned_x_size = (yuv420_width>>6)<<6; + + for (i=0; i<aligned_y_size; i=i+32) { + for (j=0; j<aligned_x_size; j=j+64) { + tiled_offset = 0; + tiled_x_index = j>>6; + tiled_y_index = i>>5; + if (tiled_y_index & 0x1) { + /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */ + tiled_offset = tiled_y_index-1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset*(temp1>>6); + tiled_offset = tiled_offset+tiled_x_index; + tiled_offset = tiled_offset+2; + temp1 = (tiled_x_index>>2)<<2; + tiled_offset = tiled_offset+temp1; + tiled_offset = tiled_offset<<11; + } else { + temp2 = ((yuv420_height+31)>>5)<<5; + if ((i+32)<temp2) { + /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */ + temp1 = tiled_x_index+2; + temp1 = (temp1>>2)<<2; + tiled_offset = tiled_x_index+temp1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset+tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset<<11; + } else { + /* even2 fomula: x+x_block_num*y */ + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset+tiled_x_index; + tiled_offset = tiled_offset<<11; + } + } + + memcpy(nv12t_dest+tiled_offset, yuv420_src+j+yuv420_width*(i), 64); + memcpy(nv12t_dest+tiled_offset+64*1, yuv420_src+j+yuv420_width*(i+1), 64); + memcpy(nv12t_dest+tiled_offset+64*2, yuv420_src+j+yuv420_width*(i+2), 64); + memcpy(nv12t_dest+tiled_offset+64*3, yuv420_src+j+yuv420_width*(i+3), 64); + memcpy(nv12t_dest+tiled_offset+64*4, yuv420_src+j+yuv420_width*(i+4), 64); + memcpy(nv12t_dest+tiled_offset+64*5, yuv420_src+j+yuv420_width*(i+5), 64); + memcpy(nv12t_dest+tiled_offset+64*6, yuv420_src+j+yuv420_width*(i+6), 64); + memcpy(nv12t_dest+tiled_offset+64*7, yuv420_src+j+yuv420_width*(i+7), 64); + memcpy(nv12t_dest+tiled_offset+64*8, yuv420_src+j+yuv420_width*(i+8), 64); + memcpy(nv12t_dest+tiled_offset+64*9, yuv420_src+j+yuv420_width*(i+9), 64); + memcpy(nv12t_dest+tiled_offset+64*10, yuv420_src+j+yuv420_width*(i+10), 64); + memcpy(nv12t_dest+tiled_offset+64*11, yuv420_src+j+yuv420_width*(i+11), 64); + memcpy(nv12t_dest+tiled_offset+64*12, yuv420_src+j+yuv420_width*(i+12), 64); + memcpy(nv12t_dest+tiled_offset+64*13, yuv420_src+j+yuv420_width*(i+13), 64); + memcpy(nv12t_dest+tiled_offset+64*14, yuv420_src+j+yuv420_width*(i+14), 64); + memcpy(nv12t_dest+tiled_offset+64*15, yuv420_src+j+yuv420_width*(i+15), 64); + memcpy(nv12t_dest+tiled_offset+64*16, yuv420_src+j+yuv420_width*(i+16), 64); + memcpy(nv12t_dest+tiled_offset+64*17, yuv420_src+j+yuv420_width*(i+17), 64); + memcpy(nv12t_dest+tiled_offset+64*18, yuv420_src+j+yuv420_width*(i+18), 64); + memcpy(nv12t_dest+tiled_offset+64*19, yuv420_src+j+yuv420_width*(i+19), 64); + memcpy(nv12t_dest+tiled_offset+64*20, yuv420_src+j+yuv420_width*(i+20), 64); + memcpy(nv12t_dest+tiled_offset+64*21, yuv420_src+j+yuv420_width*(i+21), 64); + memcpy(nv12t_dest+tiled_offset+64*22, yuv420_src+j+yuv420_width*(i+22), 64); + memcpy(nv12t_dest+tiled_offset+64*23, yuv420_src+j+yuv420_width*(i+23), 64); + memcpy(nv12t_dest+tiled_offset+64*24, yuv420_src+j+yuv420_width*(i+24), 64); + memcpy(nv12t_dest+tiled_offset+64*25, yuv420_src+j+yuv420_width*(i+25), 64); + memcpy(nv12t_dest+tiled_offset+64*26, yuv420_src+j+yuv420_width*(i+26), 64); + memcpy(nv12t_dest+tiled_offset+64*27, yuv420_src+j+yuv420_width*(i+27), 64); + memcpy(nv12t_dest+tiled_offset+64*28, yuv420_src+j+yuv420_width*(i+28), 64); + memcpy(nv12t_dest+tiled_offset+64*29, yuv420_src+j+yuv420_width*(i+29), 64); + memcpy(nv12t_dest+tiled_offset+64*30, yuv420_src+j+yuv420_width*(i+30), 64); + memcpy(nv12t_dest+tiled_offset+64*31, yuv420_src+j+yuv420_width*(i+31), 64); + } + } + + for (i=aligned_y_size; i<yuv420_height; i=i+4) { + for (j=0; j<aligned_x_size; j=j+64) { + tiled_offset = 0; + tiled_x_index = j>>6; + tiled_y_index = i>>5; + if (tiled_y_index & 0x1) { + /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */ + tiled_offset = tiled_y_index-1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset*(temp1>>6); + tiled_offset = tiled_offset+tiled_x_index; + tiled_offset = tiled_offset+2; + temp1 = (tiled_x_index>>2)<<2; + tiled_offset = tiled_offset+temp1; + tiled_offset = tiled_offset<<11; + } else { + temp2 = ((yuv420_height+31)>>5)<<5; + if ((i+32)<temp2) { + /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */ + temp1 = tiled_x_index+2; + temp1 = (temp1>>2)<<2; + tiled_offset = tiled_x_index+temp1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset+tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset<<11; + } else { + /* even2 fomula: x+x_block_num*y */ + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset+tiled_x_index; + tiled_offset = tiled_offset<<11; + } + } + + temp1 = i&0x1F; + memcpy(nv12t_dest+tiled_offset+64*(temp1), yuv420_src+j+yuv420_width*(i), 64); + memcpy(nv12t_dest+tiled_offset+64*(temp1+1), yuv420_src+j+yuv420_width*(i+1), 64); + memcpy(nv12t_dest+tiled_offset+64*(temp1+2), yuv420_src+j+yuv420_width*(i+2), 64); + memcpy(nv12t_dest+tiled_offset+64*(temp1+3), yuv420_src+j+yuv420_width*(i+3), 64); + } + } + + for (i=0; i<yuv420_height; i=i+4) { + for (j=aligned_x_size; j<yuv420_width; j=j+4) { + tiled_offset = 0; + tiled_x_index = j>>6; + tiled_y_index = i>>5; + if (tiled_y_index & 0x1) { + /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */ + tiled_offset = tiled_y_index-1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset*(temp1>>6); + tiled_offset = tiled_offset+tiled_x_index; + tiled_offset = tiled_offset+2; + temp1 = (tiled_x_index>>2)<<2; + tiled_offset = tiled_offset+temp1; + tiled_offset = tiled_offset<<11; + } else { + temp2 = ((yuv420_height+31)>>5)<<5; + if ((i+32)<temp2) { + /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */ + temp1 = tiled_x_index+2; + temp1 = (temp1>>2)<<2; + tiled_offset = tiled_x_index+temp1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset+tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset<<11; + } else { + /* even2 fomula: x+x_block_num*y */ + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset+tiled_x_index; + tiled_offset = tiled_offset<<11; + } + } + + temp1 = i&0x1F; + temp2 = j&0x3F; + memcpy(nv12t_dest+tiled_offset+temp2+64*(temp1), yuv420_src+j+yuv420_width*(i), 4); + memcpy(nv12t_dest+tiled_offset+temp2+64*(temp1+1), yuv420_src+j+yuv420_width*(i+1), 4); + memcpy(nv12t_dest+tiled_offset+temp2+64*(temp1+2), yuv420_src+j+yuv420_width*(i+2), 4); + memcpy(nv12t_dest+tiled_offset+temp2+64*(temp1+3), yuv420_src+j+yuv420_width*(i+3), 4); + } + } +} + +/* + * Converts and Interleaves linear to tiled + * 1. UV of YUV420P to UV of NV12T + * + * @param nv12t_uv_dest + * UV plane address of NV12T[out] + * + * @param yuv420p_u_src + * U plane address of YUV420P[in] + * + * @param yuv420p_v_src + * V plane address of YUV420P[in] + * + * @param yuv420_width + * Width of YUV420[in] + * + * @param yuv420_uv_height + * Height/2 of YUV420[in] + */ +void csc_linear_to_tiled_interleave(char *nv12t_uv_dest, char *yuv420p_u_src, char *yuv420p_v_src, int yuv420_width, int yuv420_uv_height) +{ + unsigned int i, j; + unsigned int tiled_x_index = 0, tiled_y_index = 0; + unsigned int aligned_x_size = 0, aligned_y_size = 0; + unsigned int tiled_offset = 0; + unsigned int temp1 = 0, temp2 = 0; + + aligned_y_size = (yuv420_uv_height>>5)<<5; + aligned_x_size = ((yuv420_width)>>6)<<6; + + for (i=0; i<aligned_y_size; i=i+32) { + for (j=0; j<aligned_x_size; j=j+64) { + tiled_offset = 0; + tiled_x_index = j>>6; + tiled_y_index = i>>5; + if (tiled_y_index & 0x1) { + /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */ + tiled_offset = tiled_y_index-1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset*(temp1>>6); + tiled_offset = tiled_offset+tiled_x_index; + tiled_offset = tiled_offset+2; + temp1 = (tiled_x_index>>2)<<2; + tiled_offset = tiled_offset+temp1; + tiled_offset = tiled_offset<<11; + } else { + temp2 = ((yuv420_uv_height+31)>>5)<<5; + if ((i+32)<temp2) { + /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */ + temp1 = tiled_x_index+2; + temp1 = (temp1>>2)<<2; + tiled_offset = tiled_x_index+temp1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset+tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset<<11; + } else { + /* even2 fomula: x+x_block_num*y */ + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset+tiled_x_index; + tiled_offset = tiled_offset<<11; + } + } + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset, yuv420p_u_src+j/2+yuv420_width/2*(i), yuv420p_v_src+j/2+yuv420_width/2*(i), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*1, yuv420p_u_src+j/2+yuv420_width/2*(i+1), yuv420p_v_src+j/2+yuv420_width/2*(i+1), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*2, yuv420p_u_src+j/2+yuv420_width/2*(i+2), yuv420p_v_src+j/2+yuv420_width/2*(i+2), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*3, yuv420p_u_src+j/2+yuv420_width/2*(i+3), yuv420p_v_src+j/2+yuv420_width/2*(i+3), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*4, yuv420p_u_src+j/2+yuv420_width/2*(i+4), yuv420p_v_src+j/2+yuv420_width/2*(i+4), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*5, yuv420p_u_src+j/2+yuv420_width/2*(i+5), yuv420p_v_src+j/2+yuv420_width/2*(i+5), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*6, yuv420p_u_src+j/2+yuv420_width/2*(i+6), yuv420p_v_src+j/2+yuv420_width/2*(i+6), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*7, yuv420p_u_src+j/2+yuv420_width/2*(i+7), yuv420p_v_src+j/2+yuv420_width/2*(i+7), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*8, yuv420p_u_src+j/2+yuv420_width/2*(i+8), yuv420p_v_src+j/2+yuv420_width/2*(i+8), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*9, yuv420p_u_src+j/2+yuv420_width/2*(i+9), yuv420p_v_src+j/2+yuv420_width/2*(i+9), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*10, yuv420p_u_src+j/2+yuv420_width/2*(i+10), yuv420p_v_src+j/2+yuv420_width/2*(i+10), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*11, yuv420p_u_src+j/2+yuv420_width/2*(i+11), yuv420p_v_src+j/2+yuv420_width/2*(i+11), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*12, yuv420p_u_src+j/2+yuv420_width/2*(i+12), yuv420p_v_src+j/2+yuv420_width/2*(i+12), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*13, yuv420p_u_src+j/2+yuv420_width/2*(i+13), yuv420p_v_src+j/2+yuv420_width/2*(i+13), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*14, yuv420p_u_src+j/2+yuv420_width/2*(i+14), yuv420p_v_src+j/2+yuv420_width/2*(i+14), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*15, yuv420p_u_src+j/2+yuv420_width/2*(i+15), yuv420p_v_src+j/2+yuv420_width/2*(i+15), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*16, yuv420p_u_src+j/2+yuv420_width/2*(i+16), yuv420p_v_src+j/2+yuv420_width/2*(i+16), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*17, yuv420p_u_src+j/2+yuv420_width/2*(i+17), yuv420p_v_src+j/2+yuv420_width/2*(i+17), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*18, yuv420p_u_src+j/2+yuv420_width/2*(i+18), yuv420p_v_src+j/2+yuv420_width/2*(i+18), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*19, yuv420p_u_src+j/2+yuv420_width/2*(i+19), yuv420p_v_src+j/2+yuv420_width/2*(i+19), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*20, yuv420p_u_src+j/2+yuv420_width/2*(i+20), yuv420p_v_src+j/2+yuv420_width/2*(i+20), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*21, yuv420p_u_src+j/2+yuv420_width/2*(i+21), yuv420p_v_src+j/2+yuv420_width/2*(i+21), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*22, yuv420p_u_src+j/2+yuv420_width/2*(i+22), yuv420p_v_src+j/2+yuv420_width/2*(i+22), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*23, yuv420p_u_src+j/2+yuv420_width/2*(i+23), yuv420p_v_src+j/2+yuv420_width/2*(i+23), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*24, yuv420p_u_src+j/2+yuv420_width/2*(i+24), yuv420p_v_src+j/2+yuv420_width/2*(i+24), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*25, yuv420p_u_src+j/2+yuv420_width/2*(i+25), yuv420p_v_src+j/2+yuv420_width/2*(i+25), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*26, yuv420p_u_src+j/2+yuv420_width/2*(i+26), yuv420p_v_src+j/2+yuv420_width/2*(i+26), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*27, yuv420p_u_src+j/2+yuv420_width/2*(i+27), yuv420p_v_src+j/2+yuv420_width/2*(i+27), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*28, yuv420p_u_src+j/2+yuv420_width/2*(i+28), yuv420p_v_src+j/2+yuv420_width/2*(i+28), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*29, yuv420p_u_src+j/2+yuv420_width/2*(i+29), yuv420p_v_src+j/2+yuv420_width/2*(i+29), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*30, yuv420p_u_src+j/2+yuv420_width/2*(i+30), yuv420p_v_src+j/2+yuv420_width/2*(i+30), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*31, yuv420p_u_src+j/2+yuv420_width/2*(i+31), yuv420p_v_src+j/2+yuv420_width/2*(i+31), 32); + } + } + + for (i=aligned_y_size; i<yuv420_uv_height; i=i+4) { + for (j=0; j<aligned_x_size; j=j+64) { + tiled_offset = 0; + tiled_x_index = j>>6; + tiled_y_index = i>>5; + if (tiled_y_index & 0x1) { + /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */ + tiled_offset = tiled_y_index-1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset*(temp1>>6); + tiled_offset = tiled_offset+tiled_x_index; + tiled_offset = tiled_offset+2; + temp1 = (tiled_x_index>>2)<<2; + tiled_offset = tiled_offset+temp1; + tiled_offset = tiled_offset<<11; + } else { + temp2 = ((yuv420_uv_height+31)>>5)<<5; + if ((i+32)<temp2) { + /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */ + temp1 = tiled_x_index+2; + temp1 = (temp1>>2)<<2; + tiled_offset = tiled_x_index+temp1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset+tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset<<11; + } else { + /* even2 fomula: x+x_block_num*y */ + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset+tiled_x_index; + tiled_offset = tiled_offset<<11; + } + } + temp1 = i&0x1F; + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*(temp1), yuv420p_u_src+j/2+yuv420_width/2*(i), yuv420p_v_src+j/2+yuv420_width/2*(i), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*(temp1+1), yuv420p_u_src+j/2+yuv420_width/2*(i+1), yuv420p_v_src+j/2+yuv420_width/2*(i+1), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*(temp1+2), yuv420p_u_src+j/2+yuv420_width/2*(i+2), yuv420p_v_src+j/2+yuv420_width/2*(i+2), 32); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*(temp1+3), yuv420p_u_src+j/2+yuv420_width/2*(i+3), yuv420p_v_src+j/2+yuv420_width/2*(i+3), 32); + } + } + + for (i=0; i<yuv420_uv_height; i=i+4) { + for (j=aligned_x_size; j<yuv420_width; j=j+4) { + tiled_offset = 0; + tiled_x_index = j>>6; + tiled_y_index = i>>5; + if (tiled_y_index & 0x1) { + /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */ + tiled_offset = tiled_y_index-1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset*(temp1>>6); + tiled_offset = tiled_offset+tiled_x_index; + tiled_offset = tiled_offset+2; + temp1 = (tiled_x_index>>2)<<2; + tiled_offset = tiled_offset+temp1; + tiled_offset = tiled_offset<<11; + } else { + temp2 = ((yuv420_uv_height+31)>>5)<<5; + if ((i+32)<temp2) { + /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */ + temp1 = tiled_x_index+2; + temp1 = (temp1>>2)<<2; + tiled_offset = tiled_x_index+temp1; + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_offset+tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset<<11; + } else { + /* even2 fomula: x+x_block_num*y */ + temp1 = ((yuv420_width+127)>>7)<<7; + tiled_offset = tiled_y_index*(temp1>>6); + tiled_offset = tiled_offset+tiled_x_index; + tiled_offset = tiled_offset<<11; + } + } + temp1 = i&0x1F; + temp2 = j&0x3F; + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+temp2+64*(temp1), yuv420p_u_src+j/2+yuv420_width/2*(i), yuv420p_v_src+j/2+yuv420_width/2*(i), 2); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+temp2+64*(temp1+1), yuv420p_u_src+j/2+yuv420_width/2*(i+1), yuv420p_v_src+j/2+yuv420_width/2*(i+1), 2); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+temp2+64*(temp1+2), yuv420p_u_src+j/2+yuv420_width/2*(i+2), yuv420p_v_src+j/2+yuv420_width/2*(i+2), 2); + csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+temp2+64*(temp1+3), yuv420p_u_src+j/2+yuv420_width/2*(i+3), yuv420p_v_src+j/2+yuv420_width/2*(i+3), 2); + } + } +} + diff --git a/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_deinterleave_memcpy.s b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_deinterleave_memcpy.s new file mode 100644 index 0000000..5b55080 --- /dev/null +++ b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_deinterleave_memcpy.s @@ -0,0 +1,128 @@ +/* + * + * Copyright 2011 Samsung Electronics S.LSI Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @file csc_deinterleave_memcpy.s + * @brief SEC_OMX specific define + * @author ShinWon Lee (shinwon.lee@samsung.com) + * @version 1.0 + * @history + * 2011.7.01 : Create + */ + .arch armv7-a + .text + .global csc_deinterleave_memcpy + .type csc_deinterleave_memcpy, %function +csc_deinterleave_memcpy: + .fnstart + + @r0 dest1 + @r1 dest2 + @r2 src + @r3 src_size + @r4 i + @r5 temp1 + @r6 temp2 + @r7 temp3 + + stmfd sp!, {r4-r12,r14} @ backup registers + + mov r4, #0 + cmp r3, #256 + blt LINEAR_SIZE_128 + + bic r5, r3, #0xFF +LINEAR_SIZE_256_LOOP: + pld [r2, #64] + vld2.8 {q0, q1}, [r2]! + pld [r2, #64] + vld2.8 {q2, q3}, [r2]! + pld [r2, #64] + vld2.8 {q4, q5}, [r2]! + pld [r2, #64] + vld2.8 {q6, q7}, [r2]! + pld [r2, #64] + vld2.8 {q8, q9}, [r2]! + pld [r2, #64] + vld2.8 {q10, q11}, [r2]! + vld2.8 {q12, q13}, [r2]! + vld2.8 {q14, q15}, [r2]! + + vst1.8 {q0}, [r0]! + vst1.8 {q2}, [r0]! + vst1.8 {q4}, [r0]! + vst1.8 {q6}, [r0]! + vst1.8 {q8}, [r0]! + vst1.8 {q10}, [r0]! + vst1.8 {q12}, [r0]! + vst1.8 {q14}, [r0]! + + vst1.8 {q1}, [r1]! + vst1.8 {q3}, [r1]! + vst1.8 {q5}, [r1]! + vst1.8 {q7}, [r1]! + vst1.8 {q9}, [r1]! + vst1.8 {q11}, [r1]! + vst1.8 {q13}, [r1]! + vst1.8 {q15}, [r1]! + + add r4, #256 + cmp r4, r5 + blt LINEAR_SIZE_256_LOOP + +LINEAR_SIZE_128: + sub r5, r3, r4 + cmp r5, #64 + blt LINEAR_SIZE_4 + pld [r2, #64] + vld2.8 {q0, q1}, [r2]! + pld [r2, #64] + vld2.8 {q2, q3}, [r2]! + vld2.8 {q4, q5}, [r2]! + vld2.8 {q6, q7}, [r2]! + + vst1.8 {q0}, [r0]! + vst1.8 {q4}, [r0]! + vst1.8 {q2}, [r0]! + vst1.8 {q6}, [r0]! + + vst1.8 {q1}, [r1]! + vst1.8 {q3}, [r1]! + vst1.8 {q5}, [r1]! + vst1.8 {q7}, [r1]! + + add r4, #128 + +LINEAR_SIZE_4: + ldrb r6, [r2], #1 + ldrb r7, [r2], #1 + ldrb r8, [r2], #1 + ldrb r9, [r2], #1 + + strb r6, [r0], #1 + strb r8, [r0], #1 + strb r7, [r1], #1 + strb r9, [r1], #1 + + add r4, #4 + cmp r4, r3 + blt LINEAR_SIZE_4 + +RESTORE_REG: + ldmfd sp!, {r4-r12,r15} @ restore registers + .fnend + diff --git a/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_interleave_memcpy.s b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_interleave_memcpy.s new file mode 100644 index 0000000..54f4436 --- /dev/null +++ b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_interleave_memcpy.s @@ -0,0 +1,133 @@ +/* + * + * Copyright 2011 Samsung Electronics S.LSI Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @file csc_interleave_memcpy.s + * @brief SEC_OMX specific define + * @author ShinWon Lee (shinwon.lee@samsung.com) + * @version 1.0 + * @history + * 2011.7.01 : Create + */ + .arch armv7-a + .text + .global csc_interleave_memcpy + .type csc_interleave_memcpy, %function +csc_interleave_memcpy: + .fnstart + + @r0 dest + @r1 src1 + @r2 src2 + @r3 src_size + @r4 i + @r5 temp1 + @r6 temp2 + @r7 temp3 + @r8 temp2 + @r9 temp3 + + stmfd sp!, {r4-r12,r14} @ backup registers + + mov r4, #0 + cmp r3, #128 + blt LINEAR_SIZE_64 + + bic r5, r3, #0x2F +LINEAR_SIZE_128_LOOP: + pld [r1, #64] + vld1.8 {q0}, [r1]! + vld1.8 {q2}, [r1]! + vld1.8 {q4}, [r1]! + vld1.8 {q6}, [r1]! + pld [r2] + vld1.8 {q8}, [r1]! + vld1.8 {q10}, [r1]! + vld1.8 {q12}, [r1]! + vld1.8 {q14}, [r1]! + pld [r2, #64] + vld1.8 {q1}, [r2]! + vld1.8 {q3}, [r2]! + vld1.8 {q5}, [r2]! + vld1.8 {q7}, [r2]! + vld1.8 {q9}, [r2]! + vld1.8 {q11}, [r2]! + vld1.8 {q13}, [r2]! + vld1.8 {q15}, [r2]! + + vst2.8 {q0, q1}, [r0]! + vst2.8 {q2, q3}, [r0]! + vst2.8 {q4, q5}, [r0]! + vst2.8 {q6, q7}, [r0]! + vst2.8 {q8, q9}, [r0]! + vst2.8 {q10, q11}, [r0]! + pld [r1] + vst2.8 {q12, q13}, [r0]! + vst2.8 {q14, q15}, [r0]! + + add r4, #128 + cmp r4, r5 + blt LINEAR_SIZE_128_LOOP + +LINEAR_SIZE_64: + sub r5, r3, r4 + cmp r5, #64 + blt LINEAR_SIZE_2 +LINEAR_SIZE_64_LOOP: + pld [r2] + vld1.8 {q0}, [r1]! + vld1.8 {q2}, [r1]! + vld1.8 {q4}, [r1]! + vld1.8 {q6}, [r1]! + vld1.8 {q1}, [r2]! + vld1.8 {q3}, [r2]! + vld1.8 {q5}, [r2]! + vld1.8 {q7}, [r2]! + + vst2.8 {q0, q1}, [r0]! + vst2.8 {q2, q3}, [r0]! + pld [r1] + vst2.8 {q4, q5}, [r0]! + vst2.8 {q6, q7}, [r0]! + + add r4, #64 + cmp r4, r3 + blt LINEAR_SIZE_64_LOOP + +LINEAR_SIZE_2: + sub r5, r3, r4 + cmp r5, #2 + blt RESTORE_REG +LINEAR_SIZE_2_LOOP: + ldrb r6, [r1], #1 + ldrb r7, [r2], #1 + ldrb r8, [r1], #1 + ldrb r9, [r2], #1 + + strb r6, [r0], #1 + strb r7, [r0], #1 + strb r8, [r0], #1 + strb r9, [r0], #1 + + add r4, #2 + cmp r4, r3 + blt LINEAR_SIZE_2_LOOP + +RESTORE_REG: + ldmfd sp!, {r4-r12,r15} @ restore registers + .fnend + diff --git a/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_uv_neon.s b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_uv_neon.s new file mode 100644 index 0000000..08e359c --- /dev/null +++ b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_uv_neon.s @@ -0,0 +1,768 @@ +/* + * + * Copyright 2011 Samsung Electronics S.LSI Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @file csc_nv12t_yuv420_uv_neon.s + * @brief SEC_OMX specific define + * @author ShinWon Lee (shinwon.lee@samsung.com) + * @version 1.0 + * @history + * 2011.7.01 : Create + */ + +/* + * Converts and Deinterleaves tiled data to linear + * 1. UV of NV12T to UV of YUV420P + * + * @param yuv420_u_dest + * U plane address of YUV420P[out] + * + * @param yuv420_v_dest + * V plane address of YUV420P[out] + * + * @param nv12t_src + * UV plane address of NV12T[in] + * + * @param yuv420_width + * Width of YUV420[in] + * + * @param yuv420_uv_height + * Height/2 of YUV420[in] + */ + + .arch armv7-a + .text + .global csc_tiled_to_linear_deinterleave + .type csc_tiled_to_linear_deinterleave, %function +csc_tiled_to_linear_deinterleave: + .fnstart + + @r0 linear_u_dest + @r1 linear_v_dest + @r2 tiled_uv_src + @r3 linear_x_size + @r4 linear_y_size + @r5 j + @r6 i + @r7 tiled_addr + @r8 linear_addr + @r9 aligned_x_size + @r10 temp1 + @r11 temp2 + @r12 temp3 + @r14 temp4 + + stmfd sp!, {r4-r12,r14} @ backup registers + + ldr r4, [sp, #40] @ load linear_y_size to r4 + + mov r9, #0 + +LINEAR_X_SIZE_1024: + cmp r3, #1024 + blt LINEAR_X_SIZE_512 + + mov r6, #0 +LINEAR_X_SIZE_1024_LOOP: + mov r7, #0 @ tiled_offset = 0@ + mov r5, r6, asr #5 @ tiled_y_index = i>>5@ + and r10, r5, #0x1 + cmp r10, #0x1 + bne LINEAR_X_SIZE_1024_LOOP_EVEN +LINEAR_X_SIZE_1024_LOOP_ODD: + sub r7, r5, #1 @ tiled_offset = tiled_y_index-1@ + add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@ + bic r10, #0x7F + mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@ + mul r7, r7, r10 + mov r5, #8 + mov r5, r5, lsl #11 + sub r5, r5, #32 + add r7, r7, #2 @ tiled_offset = tiled_offset+2@ + mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@ + add r11, r7, #2048 + add r12, r7, #4096 + add r14, r7, #6144 + b LINEAR_X_SIZE_1024_LOOP_MEMCPY + +LINEAR_X_SIZE_1024_LOOP_EVEN: + add r11, r4, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@ + bic r11, r11, #0x1F + add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@ + bic r10, #0x7F + mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@ + mul r7, r5, r10 + add r12, r6, #32 + cmp r12, r11 + mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@ + add r11, r7, #2048 + movlt r5, #8 + addlt r12, r7, #12288 + addlt r14, r7, #14336 + movge r5, #4 + addge r12, r7, #2048 + addge r14, r7, #2048 + mov r5, r5, lsl #11 + sub r5, r5, #32 + +LINEAR_X_SIZE_1024_LOOP_MEMCPY: + and r10, r6, #0x1F + mov r10, r10, lsl #6 + add r10, r2, r10 + + add r7, r7, r10 @ tiled_addr = tiled_src+64*(temp1) + add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1) + pld [r11] + vld2.8 {q0, q1}, [r7]! + pld [r11, #32] + add r12, r12, r10 @ tiled_addr2 = tiled_src+64*(temp1) + vld2.8 {q2, q3}, [r7], r5 + pld [r12] + vld2.8 {q4, q5}, [r11]! + pld [r12, #32] + add r14, r14, r10 @ tiled_addr3 = tiled_src+64*(temp1) + vld2.8 {q6, q7}, [r11], r5 + pld [r14] + vld2.8 {q8, q9}, [r12]! + pld [r14, #32] + mov r10, r3, asr #1 + vld2.8 {q10, q11}, [r12], r5 + mul r10, r10, r6 + vld2.8 {q12, q13}, [r14]! + vld2.8 {q14, q15}, [r14], r5 + + add r8, r0, r10 + vst1.8 {q0}, [r8]! + vst1.8 {q2}, [r8]! + vst1.8 {q4}, [r8]! + vst1.8 {q6}, [r8]! + vst1.8 {q8}, [r8]! + vst1.8 {q10}, [r8]! + vst1.8 {q12}, [r8]! + vst1.8 {q14}, [r8]! + + add r10, r1, r10 + vst1.8 {q1}, [r10]! + vst1.8 {q3}, [r10]! + vst1.8 {q5}, [r10]! + vst1.8 {q7}, [r10]! + vst1.8 {q9}, [r10]! + vst1.8 {q11}, [r10]! + pld [r7] + vst1.8 {q13}, [r10]! + pld [r7, #32] + vst1.8 {q15}, [r10]! + + pld [r11] + vld2.8 {q0, q1}, [r7]! + pld [r11, #32] + vld2.8 {q2, q3}, [r7], r5 + pld [r12] + vld2.8 {q4, q5}, [r11]! + pld [r12, #32] + vld2.8 {q6, q7}, [r11], r5 + pld [r14] + vld2.8 {q8, q9}, [r12]! + pld [r14, #32] + vld2.8 {q10, q11}, [r12], r5 + vld2.8 {q12, q13}, [r14]! + vld2.8 {q14, q15}, [r14], r5 + + vst1.8 {q0}, [r8]! + vst1.8 {q2}, [r8]! + vst1.8 {q4}, [r8]! + vst1.8 {q6}, [r8]! + vst1.8 {q8}, [r8]! + vst1.8 {q10}, [r8]! + vst1.8 {q12}, [r8]! + vst1.8 {q14}, [r8]! + + vst1.8 {q1}, [r10]! + vst1.8 {q3}, [r10]! + vst1.8 {q5}, [r10]! + vst1.8 {q7}, [r10]! + vst1.8 {q9}, [r10]! + vst1.8 {q11}, [r10]! + pld [r7] + vst1.8 {q13}, [r10]! + pld [r7, #32] + vst1.8 {q15}, [r10]! + + pld [r11] + vld2.8 {q0, q1}, [r7]! + pld [r11, #32] + vld2.8 {q2, q3}, [r7], r5 + pld [r12] + vld2.8 {q4, q5}, [r11]! + pld [r12, #32] + vld2.8 {q6, q7}, [r11], r5 + pld [r14] + vld2.8 {q8, q9}, [r12]! + pld [r14, #32] + vld2.8 {q10, q11}, [r12], r5 + vld2.8 {q12, q13}, [r14]! + vld2.8 {q14, q15}, [r14], r5 + + vst1.8 {q0}, [r8]! + vst1.8 {q2}, [r8]! + vst1.8 {q4}, [r8]! + vst1.8 {q6}, [r8]! + vst1.8 {q8}, [r8]! + vst1.8 {q10}, [r8]! + vst1.8 {q12}, [r8]! + vst1.8 {q14}, [r8]! + + vst1.8 {q1}, [r10]! + vst1.8 {q3}, [r10]! + vst1.8 {q5}, [r10]! + vst1.8 {q7}, [r10]! + vst1.8 {q9}, [r10]! + vst1.8 {q11}, [r10]! + pld [r7] + vst1.8 {q13}, [r10]! + pld [r7, #32] + vst1.8 {q15}, [r10]! + + pld [r11] + vld2.8 {q0, q1}, [r7]! + pld [r11, #32] + vld2.8 {q2, q3}, [r7] + pld [r12] + vld2.8 {q4, q5}, [r11]! + pld [r12, #32] + vld2.8 {q6, q7}, [r11] + pld [r14] + vld2.8 {q8, q9}, [r12]! + pld [r14, #32] + vld2.8 {q10, q11}, [r12] + vld2.8 {q12, q13}, [r14]! + vld2.8 {q14, q15}, [r14] + + vst1.8 {q0}, [r8]! + vst1.8 {q2}, [r8]! + vst1.8 {q4}, [r8]! + vst1.8 {q6}, [r8]! + vst1.8 {q8}, [r8]! + vst1.8 {q10}, [r8]! + vst1.8 {q12}, [r8]! + vst1.8 {q14}, [r8]! + + vst1.8 {q1}, [r10]! + vst1.8 {q3}, [r10]! + vst1.8 {q5}, [r10]! + vst1.8 {q7}, [r10]! + vst1.8 {q9}, [r10]! + vst1.8 {q11}, [r10]! + add r6, #1 + vst1.8 {q13}, [r10]! + cmp r6, r4 + vst1.8 {q15}, [r10]! + + blt LINEAR_X_SIZE_1024_LOOP + + mov r9, #1024 + +LINEAR_X_SIZE_512: + sub r10, r3, r9 + cmp r10, #512 + blt LINEAR_X_SIZE_256 + + mov r6, #0 +LINEAR_X_SIZE_512_LOOP: + mov r7, #0 @ tiled_offset = 0@ + mov r5, r6, asr #5 @ tiled_y_index = i>>5@ + and r10, r5, #0x1 + cmp r10, #0x1 + bne LINEAR_X_SIZE_512_LOOP_EVEN +LINEAR_X_SIZE_512_LOOP_ODD: + sub r7, r5, #1 @ tiled_offset = tiled_y_index-1@ + add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@ + bic r10, #0x7F + mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@ + mul r7, r7, r10 + mov r5, #8 + mov r5, r5, lsl #11 + add r7, r7, #2 @ tiled_offset = tiled_offset+2@ + mov r10, r9, asr #5 + add r7, r7, r10 + mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@ + add r11, r7, #2048 + add r12, r7, #4096 + add r14, r7, #6144 + sub r5, r5, #32 + b LINEAR_X_SIZE_512_LOOP_MEMCPY + +LINEAR_X_SIZE_512_LOOP_EVEN: + add r11, r4, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@ + bic r11, r11, #0x1F + add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@ + bic r10, #0x7F + mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@ + mul r7, r5, r10 + add r12, r6, #32 + cmp r12, r11 + mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@ + movlt r5, #8 + movlt r10, r9, asr #5 + movge r10, r9, asr #6 + add r7, r7, r10, lsl #11 + add r11, r7, #2048 + addlt r12, r7, #12288 + addlt r14, r7, #14336 + movge r5, #4 + addge r12, r7, #4096 + addge r14, r7, #6144 + mov r5, r5, lsl #11 + sub r5, r5, #32 + +LINEAR_X_SIZE_512_LOOP_MEMCPY: + and r10, r6, #0x1F + mov r10, r10, lsl #6 + add r10, r2, r10 + + add r7, r7, r10 @ tiled_addr = tiled_src+64*(temp1) + add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1) + pld [r11] + vld2.8 {q0, q1}, [r7]! + pld [r11, #32] + add r12, r12, r10 @ tiled_addr2 = tiled_src+64*(temp1) + vld2.8 {q2, q3}, [r7], r5 + pld [r12] + vld2.8 {q4, q5}, [r11]! + pld [r12, #32] + add r14, r14, r10 @ tiled_addr3 = tiled_src+64*(temp1) + vld2.8 {q6, q7}, [r11], r5 + pld [r14] + mov r10, r3, asr #1 + vld2.8 {q8, q9}, [r12]! + pld [r14, #32] + mul r10, r10, r6 + vld2.8 {q10, q11}, [r12], r5 + add r8, r0, r10 + vld2.8 {q12, q13}, [r14]! + add r8, r8, r9, asr #1 + vld2.8 {q14, q15}, [r14], r5 + + vst1.8 {q0}, [r8]! + vst1.8 {q2}, [r8]! + vst1.8 {q4}, [r8]! + vst1.8 {q6}, [r8]! + vst1.8 {q8}, [r8]! + vst1.8 {q10}, [r8]! + vst1.8 {q12}, [r8]! + add r10, r1, r10 + vst1.8 {q14}, [r8]! + + add r10, r10, r9, asr #1 + vst1.8 {q1}, [r10]! + vst1.8 {q3}, [r10]! + vst1.8 {q5}, [r10]! + vst1.8 {q7}, [r10]! + vst1.8 {q9}, [r10]! + vst1.8 {q11}, [r10]! + pld [r7] + vst1.8 {q13}, [r10]! + pld [r7, #32] + vst1.8 {q15}, [r10]! + + pld [r11] + vld2.8 {q0, q1}, [r7]! + pld [r11, #32] + vld2.8 {q2, q3}, [r7] + pld [r12] + vld2.8 {q4, q5}, [r11]! + pld [r12, #32] + vld2.8 {q6, q7}, [r11] + pld [r14] + vld2.8 {q8, q9}, [r12]! + pld [r14, #32] + vld2.8 {q10, q11}, [r12] + vld2.8 {q12, q13}, [r14]! + vld2.8 {q14, q15}, [r14] + + vst1.8 {q0}, [r8]! + vst1.8 {q2}, [r8]! + vst1.8 {q4}, [r8]! + vst1.8 {q6}, [r8]! + vst1.8 {q8}, [r8]! + vst1.8 {q10}, [r8]! + vst1.8 {q12}, [r8]! + vst1.8 {q14}, [r8]! + + vst1.8 {q1}, [r10]! + vst1.8 {q3}, [r10]! + vst1.8 {q5}, [r10]! + vst1.8 {q7}, [r10]! + vst1.8 {q9}, [r10]! + vst1.8 {q11}, [r10]! + add r6, #1 + vst1.8 {q13}, [r10]! + cmp r6, r4 + vst1.8 {q15}, [r10]! + + blt LINEAR_X_SIZE_512_LOOP + + add r9, r9, #512 + +LINEAR_X_SIZE_256: + sub r10, r3, r9 + cmp r10, #256 + blt LINEAR_X_SIZE_128 + + mov r6, #0 +LINEAR_X_SIZE_256_LOOP: + mov r7, #0 @ tiled_offset = 0@ + mov r5, r6, asr #5 @ tiled_y_index = i>>5@ + and r10, r5, #0x1 + cmp r10, #0x1 + bne LINEAR_X_SIZE_256_LOOP_EVEN +LINEAR_X_SIZE_256_LOOP_ODD: + sub r7, r5, #1 @ tiled_offset = tiled_y_index-1@ + add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@ + bic r10, #0x7F + mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@ + mul r7, r7, r10 + add r7, r7, #2 @ tiled_offset = tiled_offset+2@ + mov r10, r9, asr #5 + add r7, r7, r10 + mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@ + add r11, r7, #2048 + add r12, r7, #4096 + add r14, r7, #6144 + b LINEAR_X_SIZE_256_LOOP_MEMCPY + +LINEAR_X_SIZE_256_LOOP_EVEN: + add r11, r4, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@ + bic r11, r11, #0x1F + add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@ + bic r10, #0x7F + mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@ + mul r7, r5, r10 + mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@ + add r12, r6, #32 + cmp r12, r11 + movlt r10, r9, asr #5 + addlt r7, r7, r10, lsl #11 + addlt r11, r7, #2048 + addlt r12, r7, #12288 + addlt r14, r7, #14336 + movge r10, r9, asr #6 + addge r7, r7, r10, lsl #11 + addge r11, r7, #2048 + addge r12, r7, #4096 + addge r14, r7, #6144 + +LINEAR_X_SIZE_256_LOOP_MEMCPY: + and r10, r6, #0x1F + mov r10, r10, lsl #6 + add r10, r2, r10 + + add r7, r7, r10 @ tiled_addr = tiled_src+64*(temp1) + add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1) + pld [r11] + vld2.8 {q0, q1}, [r7]! + pld [r11, #32] + add r12, r12, r10 @ tiled_addr2 = tiled_src+64*(temp1) + vld2.8 {q2, q3}, [r7] + pld [r12] + vld2.8 {q4, q5}, [r11]! + pld [r12, #32] + add r14, r14, r10 @ tiled_addr3 = tiled_src+64*(temp1) + vld2.8 {q6, q7}, [r11] + pld [r14] + vld2.8 {q8, q9}, [r12]! + pld [r14, #32] + mov r10, r3, asr #1 + vld2.8 {q10, q11}, [r12] + mul r10, r10, r6 + vld2.8 {q12, q13}, [r14]! + add r8, r0, r10 + vld2.8 {q14, q15}, [r14] + + add r8, r8, r9, asr #1 + vst1.8 {q0}, [r8]! + vst1.8 {q2}, [r8]! + vst1.8 {q4}, [r8]! + vst1.8 {q6}, [r8]! + vst1.8 {q8}, [r8]! + vst1.8 {q10}, [r8]! + vst1.8 {q12}, [r8]! + add r10, r1, r10 + vst1.8 {q14}, [r8]! + + add r10, r10, r9, asr #1 + vst1.8 {q1}, [r10]! + vst1.8 {q3}, [r10]! + vst1.8 {q5}, [r10]! + vst1.8 {q7}, [r10]! + vst1.8 {q9}, [r10]! + vst1.8 {q11}, [r10]! + add r6, #1 + vst1.8 {q13}, [r10]! + cmp r6, r4 + vst1.8 {q15}, [r10]! + blt LINEAR_X_SIZE_256_LOOP + + add r9, r9, #256 + +LINEAR_X_SIZE_128: + sub r10, r3, r9 + cmp r10, #128 + blt LINEAR_X_SIZE_64 + + mov r6, #0 +LINEAR_X_SIZE_128_LOOP: + mov r7, #0 @ tiled_offset = 0@ + mov r5, r6, asr #5 @ tiled_y_index = i>>5@ + and r10, r5, #0x1 + cmp r10, #0x1 + bne LINEAR_X_SIZE_128_LOOP_EVEN +LINEAR_X_SIZE_128_LOOP_ODD: + sub r7, r5, #1 @ tiled_offset = tiled_y_index-1@ + add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@ + bic r10, #0x7F + mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@ + mul r7, r7, r10 + add r7, r7, #2 @ tiled_offset = tiled_offset+2@ + mov r10, r9, asr #5 + add r7, r7, r10 + mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@ + add r11, r7, #2048 + b LINEAR_X_SIZE_128_LOOP_MEMCPY + +LINEAR_X_SIZE_128_LOOP_EVEN: + add r11, r4, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@ + bic r11, r11, #0x1F + add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@ + bic r10, #0x7F + mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@ + mul r7, r5, r10 + mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@ + add r12, r6, #32 + cmp r12, r11 + movlt r10, r9, asr #5 + movge r10, r9, asr #6 + add r7, r7, r10, lsl #11 + add r11, r7, #2048 + +LINEAR_X_SIZE_128_LOOP_MEMCPY: + and r10, r6, #0x1F + mov r10, r10, lsl #6 + add r10, r2, r10 + + add r7, r7, r10 @ tiled_addr = tiled_src+64*(temp1) + add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1) + pld [r11] + vld2.8 {q0, q1}, [r7]! + pld [r11, #32] + vld2.8 {q2, q3}, [r7]! + pld [r7] + vld2.8 {q4, q5}, [r11]! + mov r10, r3, asr #1 + pld [r7, #32] + vld2.8 {q6, q7}, [r11]! + mul r10, r10, r6 + pld [r11] + vld2.8 {q8, q9}, [r7]! + add r10, r10, r9, asr #1 + pld [r11, #32] + vld2.8 {q10, q11}, [r7]! + add r8, r0, r10 + vld2.8 {q12, q13}, [r11]! + mov r14, r3, asr #1 + vld2.8 {q14, q15}, [r11]! + + sub r14, r14, #48 + vst1.8 {q0}, [r8]! + vst1.8 {q2}, [r8]! + vst1.8 {q4}, [r8]! + vst1.8 {q6}, [r8], r14 + vst1.8 {q8}, [r8]! + vst1.8 {q10}, [r8]! + vst1.8 {q12}, [r8]! + vst1.8 {q14}, [r8]! + + add r10, r1, r10 + vst1.8 {q1}, [r10]! + vst1.8 {q3}, [r10]! + vst1.8 {q5}, [r10]! + vst1.8 {q7}, [r10], r14 + vst1.8 {q9}, [r10]! + vst1.8 {q11}, [r10]! + add r6, #2 + vst1.8 {q13}, [r10]! + cmp r6, r4 + vst1.8 {q15}, [r10]! + + blt LINEAR_X_SIZE_128_LOOP + + add r9, r9, #128 + +LINEAR_X_SIZE_64: + sub r10, r3, r9 + cmp r10, #64 + blt LINEAR_X_SIZE_4 + + mov r5, r9 + mov r6, #0 + +LINEAR_X_SIZE_64_LOOP: + bl GET_TILED_OFFSET + +LINEAR_X_SIZE_64_LOOP_MEMCPY: + and r10, r6, #0x1F + mov r14, r3, asr #1 + mov r10, r10, lsl #6 + sub r14, r14, #16 + add r10, r2, r10 + + add r7, r7, r10 @ tiled_addr = tiled_src+64*(temp1) + pld [r7, #64] + vld2.8 {q0, q1}, [r7]! + mov r10, r3, asr #1 + pld [r7, #64] + vld2.8 {q2, q3}, [r7]! + mul r10, r10, r6 + vld2.8 {q4, q5}, [r7]! + add r10, r10, r9, asr #1 + vld2.8 {q6, q7}, [r7]! + add r8, r0, r10 + + vst1.8 {q0}, [r8]! + vst1.8 {q2}, [r8], r14 + vst1.8 {q4}, [r8]! + vst1.8 {q6}, [r8], r14 + + add r10, r1, r10 + vst1.8 {q1}, [r10]! + vst1.8 {q3}, [r10], r14 + add r6, #2 + vst1.8 {q5}, [r10]! + cmp r6, r4 + vst1.8 {q7}, [r10], r14 + + blt LINEAR_X_SIZE_64_LOOP + + add r9, r9, #64 + +LINEAR_X_SIZE_4: + cmp r9, r3 + beq RESTORE_REG + + mov r6, #0 @ i = 0 +LINEAR_Y_SIZE_4_LOOP: + + mov r5, r9 @ j = aligned_x_size +LINEAR_X_SIZE_4_LOOP: + + bl GET_TILED_OFFSET + + mov r11, r3, asr #1 @ temp1 = linear_x_size/2 + mul r11, r11, r6 @ temp1 = temp1*(i) + add r11, r11, r5, asr #1 @ temp1 = temp1+j/2 + mov r12, r3, asr #1 @ temp2 = linear_x_size/2 + sub r12, r12, #1 @ temp2 = linear_x_size-1 + + add r8, r0, r11 @ linear_addr = linear_dest_u+temp1 + add r11, r1, r11 @ temp1 = linear_dest_v+temp1 + add r7, r2, r7 @ tiled_addr = tiled_src+tiled_addr + and r14, r6, #0x1F @ temp3 = i&0x1F@ + mov r14, r14, lsl #6 @ temp3 = temp3*64 + add r7, r7, r14 @ tiled_addr = tiled_addr+temp3 + and r14, r5, #0x3F @ temp3 = j&0x3F + add r7, r7, r14 @ tiled_addr = tiled_addr+temp3 + + ldrh r10, [r7], #2 + ldrh r14, [r7], #62 + strb r10, [r8], #1 + mov r10, r10, asr #8 + strb r10, [r11], #1 + strb r14, [r8], r12 + mov r14, r14, asr #8 + strb r14, [r11], r12 + + ldrh r10, [r7], #2 + ldrh r14, [r7], #62 + strb r10, [r8], #1 + mov r10, r10, asr #8 + strb r10, [r11], #1 + strb r14, [r8], r12 + mov r14, r14, asr #8 + strb r14, [r11], r12 + + add r5, r5, #4 @ j = j+4 + cmp r5, r3 @ j<linear_x_size + blt LINEAR_X_SIZE_4_LOOP + + add r6, r6, #2 @ i = i+4 + cmp r6, r4 @ i<linear_y_size + blt LINEAR_Y_SIZE_4_LOOP + +RESTORE_REG: + ldmfd sp!, {r4-r12,r15} @ restore registers + +GET_TILED_OFFSET: + stmfd sp!, {r14} + + mov r12, r6, asr #5 @ temp2 = i>>5 + mov r11, r5, asr #6 @ temp1 = j>>6 + + and r14, r12, #0x1 @ if (temp2 & 0x1) + cmp r14, #0x1 + bne GET_TILED_OFFSET_EVEN_FORMULA_1 + +GET_TILED_OFFSET_ODD_FORMULA: + sub r7, r12, #1 @ tiled_addr = temp2-1 + add r14, r3, #127 @ temp3 = linear_x_size+127 + bic r14, r14, #0x7F @ temp3 = (temp3 >>7)<<7 + mov r14, r14, asr #6 @ temp3 = temp3>>6 + mul r7, r7, r14 @ tiled_addr = tiled_addr*temp3 + add r7, r7, r11 @ tiled_addr = tiled_addr+temp1 + add r7, r7, #2 @ tiled_addr = tiled_addr+2 + bic r14, r11, #0x3 @ temp3 = (temp1>>2)<<2 + add r7, r7, r14 @ tiled_addr = tiled_addr+temp3 + mov r7, r7, lsl #11 @ tiled_addr = tiled_addr<<11 + b GET_TILED_OFFSET_RETURN + +GET_TILED_OFFSET_EVEN_FORMULA_1: + add r14, r4, #31 @ temp3 = linear_y_size+31 + bic r14, r14, #0x1F @ temp3 = (temp3>>5)<<5 + sub r14, r14, #32 @ temp3 = temp3 - 32 + cmp r6, r14 @ if (i<(temp3-32)) { + bge GET_TILED_OFFSET_EVEN_FORMULA_2 + add r14, r11, #2 @ temp3 = temp1+2 + bic r14, r14, #3 @ temp3 = (temp3>>2)<<2 + add r7, r11, r14 @ tiled_addr = temp1+temp3 + add r14, r3, #127 @ temp3 = linear_x_size+127 + bic r14, r14, #0x7F @ temp3 = (temp3>>7)<<7 + mov r14, r14, asr #6 @ temp3 = temp3>>6 + mul r12, r12, r14 @ tiled_y_index = tiled_y_index*temp3 + add r7, r7, r12 @ tiled_addr = tiled_addr+tiled_y_index + mov r7, r7, lsl #11 @ + b GET_TILED_OFFSET_RETURN + +GET_TILED_OFFSET_EVEN_FORMULA_2: + add r14, r3, #127 @ temp3 = linear_x_size+127 + bic r14, r14, #0x7F @ temp3 = (temp3>>7)<<7 + mov r14, r14, asr #6 @ temp3 = temp3>>6 + mul r7, r12, r14 @ tiled_addr = temp2*temp3 + add r7, r7, r11 @ tiled_addr = tiled_addr+temp3 + mov r7, r7, lsl #11 @ tiled_addr = tiled_addr<<11@ + +GET_TILED_OFFSET_RETURN: + ldmfd sp!, {r15} @ restore registers + .fnend diff --git a/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_y_neon.s b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_y_neon.s new file mode 100644 index 0000000..d71ee17 --- /dev/null +++ b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_y_neon.s @@ -0,0 +1,680 @@ +/* + * + * Copyright 2011 Samsung Electronics S.LSI Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @file csc_nv12t_yuv420_y_neon.s + * @brief SEC_OMX specific define + * @author ShinWon Lee (shinwon.lee@samsung.com) + * @version 1.0 + * @history + * 2011.7.01 : Create + */ + +/* + * Converts tiled data to linear. + * 1. Y of NV12T to Y of YUV420P + * 2. Y of NV12T to Y of YUV420S + * 3. UV of NV12T to UV of YUV420S + * + * @param yuv420_dest + * Y or UV plane address of YUV420[out] + * + * @param nv12t_src + * Y or UV plane address of NV12T[in] + * + * @param yuv420_width + * Width of YUV420[in] + * + * @param yuv420_height + * Y: Height of YUV420, UV: Height/2 of YUV420[in] + */ + + .arch armv7-a + .text + .global csc_tiled_to_linear + .type csc_tiled_to_linear, %function +csc_tiled_to_linear: + .fnstart + + @r0 linear_dest + @r1 tiled_src + @r2 linear_x_size + @r3 linear_y_size + @r4 j + @r5 i + @r6 tiled_addr + @r7 linear_addr + @r8 aligned_x_size + @r9 aligned_y_size + @r10 temp1 + @r11 temp2 + @r12 temp3 + @r14 temp4 + + stmfd sp!, {r4-r12,r14} @ backup registers + + mov r8, #0 + cmp r2, #1024 + blt LINEAR_X_SIZE_512 + +LINEAR_X_SIZE_1024: + + mov r5, #0 +LINEAR_X_SIZE_1024_LOOP: + mov r6, #0 @ tiled_offset = 0@ + mov r4, r5, asr #5 @ tiled_y_index = i>>5@ + and r10, r4, #0x1 + cmp r10, #0x1 + bne LINEAR_X_SIZE_1024_LOOP_EVEN +LINEAR_X_SIZE_1024_LOOP_ODD: + sub r6, r4, #1 @ tiled_offset = tiled_y_index-1@ + add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@ + bic r10, #0x7F + mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@ + mul r6, r6, r10 + mov r4, #8 + mov r4, r4, lsl #11 + sub r4, r4, #32 + add r6, r6, #2 @ tiled_offset = tiled_offset+2@ + mov r6, r6, lsl #11 @ tiled_offset = tiled_offset<<11@ + add r11, r6, #2048 + add r12, r6, #4096 + add r14, r6, #6144 + b LINEAR_X_SIZE_1024_LOOP_MEMCPY + +LINEAR_X_SIZE_1024_LOOP_EVEN: + add r11, r3, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@ + bic r11, r11, #0x1F + add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@ + bic r10, #0x7F + mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@ + mul r6, r4, r10 + add r12, r5, #32 + cmp r12, r11 + mov r6, r6, lsl #11 @ tiled_offset = tiled_offset<<11@ + add r11, r6, #2048 + movlt r4, #8 + addlt r12, r6, #12288 + addlt r14, r6, #14336 + movge r4, #4 + addge r12, r6, #4096 + addge r14, r6, #6144 + mov r4, r4, lsl #11 + sub r4, r4, #32 + +LINEAR_X_SIZE_1024_LOOP_MEMCPY: + and r10, r5, #0x1F + mov r10, r10, lsl #6 + add r10, r1, r10 + + add r6, r6, r10 @ tiled_addr = tiled_src+64*(temp1) + add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1) + pld [r11] + vld1.8 {q0, q1}, [r6]! + pld [r11, #32] + add r12, r12, r10 @ tiled_addr2 = tiled_src+64*(temp1) + vld1.8 {q2, q3}, [r6], r4 + pld [r12] + vld1.8 {q4, q5}, [r11]! + pld [r12, #32] + add r14, r14, r10 @ tiled_addr3 = tiled_src+64*(temp1) + vld1.8 {q6, q7}, [r11], r4 + pld [r14] + vld1.8 {q8, q9}, [r12]! + pld [r14, #32] + mul r7, r2, r5 + vld1.8 {q10, q11}, [r12], r4 + add r7, r7, r0 + vld1.8 {q12, q13}, [r14]! + vld1.8 {q14, q15}, [r14], r4 + + vst1.8 {q0, q1}, [r7]! + vst1.8 {q2, q3}, [r7]! + vst1.8 {q4, q5}, [r7]! + vst1.8 {q6, q7}, [r7]! + vst1.8 {q8, q9}, [r7]! + vst1.8 {q10, q11}, [r7]! + pld [r6] + vst1.8 {q12, q13}, [r7]! + pld [r6, #32] + vst1.8 {q14, q15}, [r7]! + + pld [r11] + vld1.8 {q0, q1}, [r6]! + pld [r11, #32] + vld1.8 {q2, q3}, [r6], r4 + + pld [r12] + vld1.8 {q4, q5}, [r11]! + pld [r12, #32] + vld1.8 {q6, q7}, [r11], r4 + pld [r14] + vld1.8 {q8, q9}, [r12]! + pld [r14, #32] + vld1.8 {q10, q11}, [r12], r4 + vld1.8 {q12, q13}, [r14]! + vld1.8 {q14, q15}, [r14], r4 + + vst1.8 {q0, q1}, [r7]! + vst1.8 {q2, q3}, [r7]! + vst1.8 {q4, q5}, [r7]! + vst1.8 {q6, q7}, [r7]! + vst1.8 {q8, q9}, [r7]! + vst1.8 {q10, q11}, [r7]! + pld [r6] + vst1.8 {q12, q13}, [r7]! + pld [r6, #32] + vst1.8 {q14, q15}, [r7]! + + pld [r11] + vld1.8 {q0, q1}, [r6]! + pld [r11, #32] + vld1.8 {q2, q3}, [r6], r4 + pld [r12] + vld1.8 {q4, q5}, [r11]! + pld [r12, #32] + vld1.8 {q6, q7}, [r11], r4 + pld [r14] + vld1.8 {q8, q9}, [r12]! + pld [r14, #32] + vld1.8 {q10, q11}, [r12], r4 + vld1.8 {q12, q13}, [r14]! + vld1.8 {q14, q15}, [r14], r4 + + vst1.8 {q0, q1}, [r7]! + vst1.8 {q2, q3}, [r7]! + vst1.8 {q4, q5}, [r7]! + vst1.8 {q6, q7}, [r7]! + vst1.8 {q8, q9}, [r7]! + vst1.8 {q10, q11}, [r7]! + pld [r6] + vst1.8 {q12, q13}, [r7]! + pld [r6, #32] + vst1.8 {q14, q15}, [r7]! + + pld [r11] + vld1.8 {q0, q1}, [r6]! + pld [r11, #32] + vld1.8 {q2, q3}, [r6] + pld [r12] + vld1.8 {q4, q5}, [r11]! + pld [r12, #32] + vld1.8 {q6, q7}, [r11] + pld [r14] + vld1.8 {q8, q9}, [r12]! + pld [r14, #32] + vld1.8 {q10, q11}, [r12] + vld1.8 {q12, q13}, [r14]! + vld1.8 {q14, q15}, [r14] + + vst1.8 {q0, q1}, [r7]! + vst1.8 {q2, q3}, [r7]! + vst1.8 {q4, q5}, [r7]! + vst1.8 {q6, q7}, [r7]! + vst1.8 {q8, q9}, [r7]! + vst1.8 {q10, q11}, [r7]! + add r5, #1 + vst1.8 {q12, q13}, [r7]! + cmp r5, r3 + vst1.8 {q14, q15}, [r7]! + + blt LINEAR_X_SIZE_1024_LOOP + + mov r8, #1024 + +LINEAR_X_SIZE_512: + + sub r14, r2, r8 + cmp r14, #512 + blt LINEAR_X_SIZE_256 + + mov r5, #0 +LINEAR_X_SIZE_512_LOOP: + mov r6, #0 + mov r4, r5, asr #5 @ tiled_y_index = i>>5 + and r10, r4, #0x1 + cmp r10, #0x1 + bne LINEAR_X_SIZE_512_LOOP_EVEN + +LINEAR_X_SIZE_512_LOOP_ODD: + sub r6, r4, #1 + add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@ + bic r10, #0x7F + mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@ + mul r6, r6, r10 + mov r4, #8 + mov r4, r4, lsl #11 + sub r4, r4, #32 + add r6, r6, #2 @ tiled_offset = tiled_offset+2@ + mov r10, r8, asr #5 @ temp1 = aligned_x_size>>5@ + add r6, r6, r10 @ tiled_offset = tiled_offset+temp1@ + mov r6, r6, lsl #11 + add r11, r6, #2048 + add r12, r6, #4096 + add r14, r6, #6144 + b LINEAR_X_SIZE_512_LOOP_MEMCPY + +LINEAR_X_SIZE_512_LOOP_EVEN: + add r11, r3, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@ + bic r11, r11, #0x1F + add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@ + bic r10, #0x7F + mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@ + mul r6, r4, r10 + add r12, r5, #32 + cmp r12, r11 + mov r6, r6, lsl #11 @ tiled_offset = tiled_offset<<11@ + movlt r4, #8 + movlt r10, r8, asr #5 @ temp1 = aligned_x_size>>5@ + movge r10, r8, asr #6 @ temp1 = aligned_x_size>>6@ + add r6, r6, r10, lsl #11 @ tiled_offset = tiled_offset+(temp1<<11)@ + add r11, r6, #2048 + addlt r12, r6, #12288 + addlt r14, r6, #14336 + movge r4, #4 + addge r12, r6, #4096 + addge r14, r6, #6144 + mov r4, r4, lsl #11 + sub r4, r4, #32 + +LINEAR_X_SIZE_512_LOOP_MEMCPY: + and r10, r5, #0x1F + mov r10, r10, lsl #6 + add r10, r1, r10 + + add r6, r6, r10 @ tiled_addr = tiled_src+64*(temp1) + add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1) + pld [r11] + vld1.8 {q0, q1}, [r6]! + pld [r11, #32] + add r12, r12, r10 @ tiled_addr2 = tiled_src+64*(temp1) + vld1.8 {q2, q3}, [r6], r4 + pld [r12] + vld1.8 {q4, q5}, [r11]! + pld [r12, #32] + add r14, r14, r10 @ tiled_addr3 = tiled_src+64*(temp1) + vld1.8 {q6, q7}, [r11], r4 + pld [r14] + vld1.8 {q8, q9}, [r12]! + pld [r14, #32] + mul r7, r2, r5 + vld1.8 {q10, q11}, [r12], r4 + add r7, r7, r8 + vld1.8 {q12, q13}, [r14]! + vld1.8 {q14, q15}, [r14], r4 + + add r7, r7, r0 + vst1.8 {q0, q1}, [r7]! + vst1.8 {q2, q3}, [r7]! + vst1.8 {q4, q5}, [r7]! + vst1.8 {q6, q7}, [r7]! + vst1.8 {q8, q9}, [r7]! + vst1.8 {q10, q11}, [r7]! + pld [r6] + vst1.8 {q12, q13}, [r7]! + pld [r6, #32] + vst1.8 {q14, q15}, [r7]! + + pld [r11] + vld1.8 {q0, q1}, [r6]! + pld [r11, #32] + vld1.8 {q2, q3}, [r6], r4 + pld [r12] + vld1.8 {q4, q5}, [r11]! + pld [r12, #32] + vld1.8 {q6, q7}, [r11], r4 + pld [r14] + vld1.8 {q8, q9}, [r12]! + pld [r14, #32] + vld1.8 {q10, q11}, [r12], r4 + vld1.8 {q12, q13}, [r14]! + vld1.8 {q14, q15}, [r14], r4 + + vst1.8 {q0, q1}, [r7]! + vst1.8 {q2, q3}, [r7]! + vst1.8 {q4, q5}, [r7]! + vst1.8 {q6, q7}, [r7]! + vst1.8 {q8, q9}, [r7]! + vst1.8 {q10, q11}, [r7]! + add r5, #1 + vst1.8 {q12, q13}, [r7]! + cmp r5, r3 + vst1.8 {q14, q15}, [r7]! + + blt LINEAR_X_SIZE_512_LOOP + + add r8, r8, #512 + +LINEAR_X_SIZE_256: + + sub r14, r2, r8 + cmp r14, #256 + blt LINEAR_X_SIZE_128 + + mov r5, #0 +LINEAR_X_SIZE_256_LOOP: + mov r6, #0 + mov r4, r5, asr #5 @ tiled_y_index = i>>5 + and r10, r4, #0x1 + cmp r10, #0x1 + bne LINEAR_X_SIZE_256_LOOP_EVEN + +LINEAR_X_SIZE_256_LOOP_ODD: + sub r6, r4, #1 + add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@ + bic r10, #0x7F + mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@ + mul r6, r6, r10 + add r6, r6, #2 @ tiled_offset = tiled_offset+2@ + mov r10, r8, asr #5 @ temp1 = aligned_x_size>>5@ + add r6, r6, r10 @ tiled_offset = tiled_offset+temp1@ + mov r6, r6, lsl #11 + add r11, r6, #2048 + add r12, r6, #4096 + add r14, r6, #6144 + b LINEAR_X_SIZE_256_LOOP_MEMCPY + +LINEAR_X_SIZE_256_LOOP_EVEN: + add r11, r3, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@ + bic r11, r11, #0x1F + add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@ + bic r10, #0x7F + mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@ + mul r6, r4, r10 + mov r6, r6, lsl #11 @ tiled_offset = tiled_offset<<11@ + add r12, r5, #32 + cmp r12, r11 + movlt r10, r8, asr #5 @ temp1 = aligned_x_size>>5@ + movge r10, r8, asr #6 @ temp1 = aligned_x_size>>6@ + add r6, r6, r10, lsl #11 @ tiled_offset = tiled_offset+(temp1<<11)@ + add r11, r6, #2048 + addlt r12, r6, #12288 + addlt r14, r6, #14336 + addge r12, r6, #4096 + addge r14, r6, #6144 + +LINEAR_X_SIZE_256_LOOP_MEMCPY: + and r10, r5, #0x1F + mov r10, r10, lsl #6 + add r10, r1, r10 + + add r6, r6, r10 @ tiled_addr = tiled_src+64*(temp1) + add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1) + pld [r11] + vld1.8 {q0, q1}, [r6]! + pld [r11, #32] + add r12, r12, r10 @ tiled_addr2 = tiled_src+64*(temp1) + vld1.8 {q2, q3}, [r6] + pld [r12] + vld1.8 {q4, q5}, [r11]! + pld [r12, #32] + add r14, r14, r10 @ tiled_addr3 = tiled_src+64*(temp1) + vld1.8 {q6, q7}, [r11] + pld [r14] + mul r7, r2, r5 + vld1.8 {q8, q9}, [r12]! + pld [r14, #32] + add r7, r7, r8 + vld1.8 {q10, q11}, [r12] + add r7, r7, r0 + vld1.8 {q12, q13}, [r14]! + vld1.8 {q14, q15}, [r14] + + vst1.8 {q0, q1}, [r7]! + vst1.8 {q2, q3}, [r7]! + vst1.8 {q4, q5}, [r7]! + vst1.8 {q6, q7}, [r7]! + vst1.8 {q8, q9}, [r7]! + vst1.8 {q10, q11}, [r7]! + add r5, #1 + vst1.8 {q12, q13}, [r7]! + cmp r5, r3 + vst1.8 {q14, q15}, [r7]! + + blt LINEAR_X_SIZE_256_LOOP + + add r8, r8, #256 + +LINEAR_X_SIZE_128: + + sub r14, r2, r8 + cmp r14, #128 + blt LINEAR_X_SIZE_64 + + mov r5, #0 +LINEAR_X_SIZE_128_LOOP: + mov r6, #0 + mov r4, r5, asr #5 @ tiled_y_index = i>>5 + and r10, r4, #0x1 + cmp r10, #0x1 + bne LINEAR_X_SIZE_128_LOOP_EVEN + +LINEAR_X_SIZE_128_LOOP_ODD: + sub r6, r4, #1 + add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@ + bic r10, #0x7F + mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@ + mul r6, r6, r10 + add r6, r6, #2 @ tiled_offset = tiled_offset+2@ + mov r10, r8, asr #5 @ temp1 = aligned_x_size>>5@ + add r6, r6, r10 @ tiled_offset = tiled_offset+temp1@ + mov r6, r6, lsl #11 + add r11, r6, #2048 + b LINEAR_X_SIZE_128_LOOP_MEMCPY + +LINEAR_X_SIZE_128_LOOP_EVEN: + add r11, r3, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@ + bic r11, r11, #0x1F + add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@ + bic r10, #0x7F + mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@ + mul r6, r4, r10 + mov r6, r6, lsl #11 @ tiled_offset = tiled_offset<<11@ + add r12, r5, #32 + cmp r12, r11 + movlt r10, r8, asr #5 @ temp1 = aligned_x_size>>5@ + movge r10, r8, asr #6 @ temp1 = aligned_x_size>>6@ + add r6, r6, r10, lsl #11 @ tiled_offset = tiled_offset+(temp1<<11)@ + add r11, r6, #2048 + +LINEAR_X_SIZE_128_LOOP_MEMCPY: + and r10, r5, #0x1F + mov r10, r10, lsl #6 + add r10, r1, r10 + + add r6, r6, r10 @ tiled_addr = tiled_src+64*(temp1) + add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1) + pld [r6, #64] + vld1.8 {q0, q1}, [r6]! + pld [r6, #64] + vld1.8 {q2, q3}, [r6]! + mul r7, r2, r5 + pld [r11] + vld1.8 {q4, q5}, [r6]! + add r7, r7, r8 + pld [r11, #32] + vld1.8 {q6, q7}, [r6] + add r7, r7, r0 + pld [r11, #64] + vld1.8 {q8, q9}, [r11]! + pld [r11, #64] + vld1.8 {q10, q11}, [r11]! + vld1.8 {q12, q13}, [r11]! + vld1.8 {q14, q15}, [r11] + + sub r9, r2, #96 + vst1.8 {q0, q1}, [r7]! + vst1.8 {q2, q3}, [r7]! + vst1.8 {q8, q9}, [r7]! + vst1.8 {q10, q11}, [r7], r9 + vst1.8 {q4, q5}, [r7]! + vst1.8 {q6, q7}, [r7]! + add r5, #2 + vst1.8 {q12, q13}, [r7]! + cmp r5, r3 + vst1.8 {q14, q15}, [r7] + + blt LINEAR_X_SIZE_128_LOOP + + add r8, r8, #128 + +LINEAR_X_SIZE_64: + + sub r14, r2, r8 + cmp r14, #64 + blt LINEAR_X_SIZE_4 + + mov r5, #0 + mov r4, r8 + +LINEAR_X_SIZE_64_LOOP: + + bl GET_TILED_OFFSET + + add r6, r1, r6 @ tiled_addr = tiled_src+tiled_addr + and r11, r5, #0x1F @ temp2 = i&0x1F + mov r11, r11, lsl #6 @ temp2 = 64*temp2 + add r6, r6, r11 @ tiled_addr = tiled_addr+temp2 + + pld [r6, #64] + vld1.8 {q0, q1}, [r6]! @ store {tiled_addr} + mul r10, r2, r5 @ temp1 = linear_x_size*(i) + pld [r6, #64] + vld1.8 {q2, q3}, [r6]! + pld [r6, #64] + vld1.8 {q4, q5}, [r6]! @ store {tiled_addr+64*1} + pld [r6, #64] + vld1.8 {q6, q7}, [r6]! + pld [r6, #64] + vld1.8 {q8, q9}, [r6]! @ store {tiled_addr+64*2} + pld [r6, #64] + vld1.8 {q10, q11}, [r6]! + add r7, r0, r4 @ linear_addr = linear_dest+j + vld1.8 {q12, q13}, [r6]! @ store {tiled_addr+64*3} + add r7, r7, r10 @ linear_addr = linear_addr+temp1 + vld1.8 {q14, q15}, [r6]! + sub r10, r2, #32 @ temp1 = linear_x_size-32 + + vst1.8 {q0, q1}, [r7]! @ load {linear_src, 64} + vst1.8 {q2, q3}, [r7], r10 + vst1.8 {q4, q5}, [r7]! @ load {linear_src+linear_x_size*1, 64} + vst1.8 {q6, q7}, [r7], r10 + vst1.8 {q8, q9}, [r7]! @ load {linear_src+linear_x_size*2, 64} + vst1.8 {q10, q11}, [r7], r10 + add r5, #4 + vst1.8 {q12, q13}, [r7]! @ load {linear_src+linear_x_size*3, 64} + cmp r5, r3 + vst1.8 {q14, q15}, [r7], r10 + + blt LINEAR_X_SIZE_64_LOOP + + add r8, r8, #64 + +LINEAR_X_SIZE_4: + cmp r8, r2 + beq RESTORE_REG + + mov r5, #0 @ i = 0 +LINEAR_Y_SIZE_4_LOOP: + + mov r4, r8 @ j = aligned_x_size +LINEAR_X_SIZE_4_LOOP: + + bl GET_TILED_OFFSET + + and r10, r5, #0x1F @ temp1 = i&0x1F + and r11, r4, #0x3F @ temp2 = j&0x3F + + add r6, r6, r1 + add r6, r6, r11 + add r6, r6, r10, lsl #6 + + ldr r10, [r6], #64 + add r7, r0, r4 + ldr r11, [r6], #64 + mul r9, r2, r5 + ldr r12, [r6], #64 + add r7, r7, r9 + ldr r14, [r6], #64 + + str r10, [r7], r2 + str r11, [r7], r2 + str r12, [r7], r2 + str r14, [r7], r2 + + add r4, r4, #4 @ j = j+4 + cmp r4, r2 @ j<linear_x_size + blt LINEAR_X_SIZE_4_LOOP + + add r5, r5, #4 @ i = i+4 + cmp r5, r3 @ i<linear_y_size + blt LINEAR_Y_SIZE_4_LOOP + +RESTORE_REG: + ldmfd sp!, {r4-r12,r15} @ restore registers + +GET_TILED_OFFSET: + + mov r11, r5, asr #5 @ temp2 = i>>5 + mov r10, r4, asr #6 @ temp1 = j>>6 + + and r12, r11, #0x1 @ if (temp2 & 0x1) + cmp r12, #0x1 + bne GET_TILED_OFFSET_EVEN_FORMULA_1 + +GET_TILED_OFFSET_ODD_FORMULA: + sub r6, r11, #1 @ tiled_addr = temp2-1 + add r12, r2, #127 @ temp3 = linear_x_size+127 + bic r12, r12, #0x7F @ temp3 = (temp3 >>7)<<7 + mov r12, r12, asr #6 @ temp3 = temp3>>6 + mul r6, r6, r12 @ tiled_addr = tiled_addr*temp3 + add r6, r6, r10 @ tiled_addr = tiled_addr+temp1 + add r6, r6, #2 @ tiled_addr = tiled_addr+2 + bic r12, r10, #0x3 @ temp3 = (temp1>>2)<<2 + add r6, r6, r12 @ tiled_addr = tiled_addr+temp3 + mov r6, r6, lsl #11 @ tiled_addr = tiled_addr<<11 + b GET_TILED_OFFSET_RETURN + +GET_TILED_OFFSET_EVEN_FORMULA_1: + add r12, r3, #31 @ temp3 = linear_y_size+31 + bic r12, r12, #0x1F @ temp3 = (temp3>>5)<<5 + sub r12, r12, #32 @ temp3 = temp3 - 32 + cmp r5, r12 @ if (i<(temp3-32)) { + bge GET_TILED_OFFSET_EVEN_FORMULA_2 + add r12, r10, #2 @ temp3 = temp1+2 + bic r12, r12, #3 @ temp3 = (temp3>>2)<<2 + add r6, r10, r12 @ tiled_addr = temp1+temp3 + add r12, r2, #127 @ temp3 = linear_x_size+127 + bic r12, r12, #0x7F @ temp3 = (temp3>>7)<<7 + mov r12, r12, asr #6 @ temp3 = temp3>>6 + mul r11, r11, r12 @ tiled_y_index = tiled_y_index*temp3 + add r6, r6, r11 @ tiled_addr = tiled_addr+tiled_y_index + mov r6, r6, lsl #11 @ + b GET_TILED_OFFSET_RETURN + +GET_TILED_OFFSET_EVEN_FORMULA_2: + add r12, r2, #127 @ temp3 = linear_x_size+127 + bic r12, r12, #0x7F @ temp3 = (temp3>>7)<<7 + mov r12, r12, asr #6 @ temp3 = temp3>>6 + mul r6, r11, r12 @ tiled_addr = temp2*temp3 + add r6, r6, r10 @ tiled_addr = tiled_addr+temp3 + mov r6, r6, lsl #11 @ tiled_addr = tiled_addr<<11@ + +GET_TILED_OFFSET_RETURN: + mov pc, lr + .fnend + diff --git a/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_uv_neon.s b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_uv_neon.s new file mode 100644 index 0000000..dd2c879 --- /dev/null +++ b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_uv_neon.s @@ -0,0 +1,573 @@ +/* + * + * Copyright 2011 Samsung Electronics S.LSI Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @file csc_yuv420_nv12t_uv_neon.s + * @brief SEC_OMX specific define + * @author ShinWon Lee (shinwon.lee@samsung.com) + * @version 1.0 + * @history + * 2011.7.01 : Create + */ + +/* + * Converts and Interleaves linear to tiled + * 1. UV of YUV420P to UV of NV12T + * + * @param nv12t_uv_dest + * UV plane address of NV12T[out] + * + * @param yuv420p_u_src + * U plane address of YUV420P[in] + * + * @param yuv420p_v_src + * V plane address of YUV420P[in] + * + * @param yuv420_width + * Width of YUV420[in] + * + * @param yuv420_uv_height + * Height/2 of YUV420[in] + */ + + .arch armv7-a + .text + .global csc_linear_to_tiled_interleave + .type csc_linear_to_tiled_interleave, %function +csc_linear_to_tiled_interleave: + .fnstart + + @r0 tiled_dest + @r1 linear_src_u + @r2 linear_src_v + @r3 linear_x_size + @r4 linear_y_size + @r5 j + @r6 i + @r7 tiled_addr + @r8 linear_addr + @r9 aligned_x_size + @r10 aligned_y_size + @r11 temp1 + @r12 temp2 + @r14 temp3 + + stmfd sp!, {r4-r12,r14} @ backup registers + + ldr r4, [sp, #40] @ load linear_y_size to r4 + + bic r10, r4, #0x1F @ aligned_y_size = (linear_y_size>>5)<<5 + bic r9, r3, #0x3F @ aligned_x_size = (linear_x_size>>6)<<6 + + mov r6, #0 @ i = 0 +LOOP_ALIGNED_Y_SIZE: + + mov r5, #0 @ j = 0 +LOOP_ALIGNED_X_SIZE: + + bl GET_TILED_OFFSET + + mov r11, r3, asr #1 @ temp1 = linear_x_size/2 + mul r11, r11, r6 @ temp1 = temp1*(i) + add r11, r11, r5, asr #1 @ temp1 = temp1+j/2 + mov r12, r3, asr #1 @ temp2 = linear_x_size/2 + sub r12, r12, #16 @ temp2 = linear_x_size-16 + + add r8, r1, r11 @ linear_addr = linear_src_u+temp1 + add r11, r2, r11 @ temp1 = linear_src_v+temp1 + add r7, r0, r7 @ tiled_addr = tiled_dest+tiled_addr + + pld [r8, r3] + vld1.8 {q0}, [r8]! + vld1.8 {q2}, [r8], r12 + pld [r11, r3] + vld1.8 {q1}, [r11]! + vld1.8 {q3}, [r11], r12 + pld [r8, r3] + vld1.8 {q4}, [r8]! + vld1.8 {q6}, [r8], r12 + pld [r11, r3] + vld1.8 {q5}, [r11]! + vld1.8 {q7}, [r11], r12 + pld [r8, r3] + vld1.8 {q8}, [r8]! + vld1.8 {q10}, [r8], r12 + pld [r11, r3] + vld1.8 {q9}, [r11]! + vld1.8 {q11}, [r11], r12 + pld [r8, r3] + vld1.8 {q12}, [r8]! + vld1.8 {q14}, [r8], r12 + pld [r11, r3] + vld1.8 {q13}, [r11]! + vld1.8 {q15}, [r11], r12 + + vst2.8 {q0, q1}, [r7]! + vst2.8 {q2, q3}, [r7]! + vst2.8 {q4, q5}, [r7]! + vst2.8 {q6, q7}, [r7]! + vst2.8 {q8, q9}, [r7]! + vst2.8 {q10, q11}, [r7]! + vst2.8 {q12, q13}, [r7]! + vst2.8 {q14, q15}, [r7]! + + pld [r8, r3] + vld1.8 {q0}, [r8]! + vld1.8 {q2}, [r8], r12 + pld [r11, r3] + vld1.8 {q1}, [r11]! + vld1.8 {q3}, [r11], r12 + pld [r8, r3] + vld1.8 {q4}, [r8]! + vld1.8 {q6}, [r8], r12 + pld [r11, r3] + vld1.8 {q5}, [r11]! + vld1.8 {q7}, [r11], r12 + pld [r8, r3] + vld1.8 {q8}, [r8]! + vld1.8 {q10}, [r8], r12 + pld [r11, r3] + vld1.8 {q9}, [r11]! + vld1.8 {q11}, [r11], r12 + pld [r8, r3] + vld1.8 {q12}, [r8]! + vld1.8 {q14}, [r8], r12 + pld [r11, r3] + vld1.8 {q13}, [r11]! + vld1.8 {q15}, [r11], r12 + + vst2.8 {q0, q1}, [r7]! + vst2.8 {q2, q3}, [r7]! + vst2.8 {q4, q5}, [r7]! + vst2.8 {q6, q7}, [r7]! + vst2.8 {q8, q9}, [r7]! + vst2.8 {q10, q11}, [r7]! + vst2.8 {q12, q13}, [r7]! + vst2.8 {q14, q15}, [r7]! + + pld [r8, r3] + vld1.8 {q0}, [r8]! + vld1.8 {q2}, [r8], r12 + pld [r11, r3] + vld1.8 {q1}, [r11]! + vld1.8 {q3}, [r11], r12 + pld [r8, r3] + vld1.8 {q4}, [r8]! + vld1.8 {q6}, [r8], r12 + pld [r11, r3] + vld1.8 {q5}, [r11]! + vld1.8 {q7}, [r11], r12 + pld [r8, r3] + vld1.8 {q8}, [r8]! + vld1.8 {q10}, [r8], r12 + pld [r11, r3] + vld1.8 {q9}, [r11]! + vld1.8 {q11}, [r11], r12 + pld [r8, r3] + vld1.8 {q12}, [r8]! + vld1.8 {q14}, [r8], r12 + pld [r11, r3] + vld1.8 {q13}, [r11]! + vld1.8 {q15}, [r11], r12 + + vst2.8 {q0, q1}, [r7]! + vst2.8 {q2, q3}, [r7]! + vst2.8 {q4, q5}, [r7]! + vst2.8 {q6, q7}, [r7]! + vst2.8 {q8, q9}, [r7]! + vst2.8 {q10, q11}, [r7]! + vst2.8 {q12, q13}, [r7]! + vst2.8 {q14, q15}, [r7]! + + pld [r8, r3] + vld1.8 {q0}, [r8]! + vld1.8 {q2}, [r8], r12 + pld [r11, r3] + vld1.8 {q1}, [r11]! + vld1.8 {q3}, [r11], r12 + pld [r8, r3] + vld1.8 {q4}, [r8]! + vld1.8 {q6}, [r8], r12 + pld [r11, r3] + vld1.8 {q5}, [r11]! + vld1.8 {q7}, [r11], r12 + pld [r8, r3] + vld1.8 {q8}, [r8]! + vld1.8 {q10}, [r8], r12 + pld [r11, r3] + vld1.8 {q9}, [r11]! + vld1.8 {q11}, [r11], r12 + pld [r8, r3] + vld1.8 {q12}, [r8]! + vld1.8 {q14}, [r8], r12 + pld [r11, r3] + vld1.8 {q13}, [r11]! + vld1.8 {q15}, [r11], r12 + + vst2.8 {q0, q1}, [r7]! + vst2.8 {q2, q3}, [r7]! + vst2.8 {q4, q5}, [r7]! + vst2.8 {q6, q7}, [r7]! + vst2.8 {q8, q9}, [r7]! + vst2.8 {q10, q11}, [r7]! + vst2.8 {q12, q13}, [r7]! + vst2.8 {q14, q15}, [r7]! + + pld [r8, r3] + vld1.8 {q0}, [r8]! + vld1.8 {q2}, [r8], r12 + pld [r11, r3] + vld1.8 {q1}, [r11]! + vld1.8 {q3}, [r11], r12 + pld [r8, r3] + vld1.8 {q4}, [r8]! + vld1.8 {q6}, [r8], r12 + pld [r11, r3] + vld1.8 {q5}, [r11]! + vld1.8 {q7}, [r11], r12 + pld [r8, r3] + vld1.8 {q8}, [r8]! + vld1.8 {q10}, [r8], r12 + pld [r11, r3] + vld1.8 {q9}, [r11]! + vld1.8 {q11}, [r11], r12 + pld [r8, r3] + vld1.8 {q12}, [r8]! + vld1.8 {q14}, [r8], r12 + pld [r11, r3] + vld1.8 {q13}, [r11]! + vld1.8 {q15}, [r11], r12 + + vst2.8 {q0, q1}, [r7]! + vst2.8 {q2, q3}, [r7]! + vst2.8 {q4, q5}, [r7]! + vst2.8 {q6, q7}, [r7]! + vst2.8 {q8, q9}, [r7]! + vst2.8 {q10, q11}, [r7]! + vst2.8 {q12, q13}, [r7]! + vst2.8 {q14, q15}, [r7]! + + pld [r8, r3] + vld1.8 {q0}, [r8]! + vld1.8 {q2}, [r8], r12 + pld [r11, r3] + vld1.8 {q1}, [r11]! + vld1.8 {q3}, [r11], r12 + pld [r8, r3] + vld1.8 {q4}, [r8]! + vld1.8 {q6}, [r8], r12 + pld [r11, r3] + vld1.8 {q5}, [r11]! + vld1.8 {q7}, [r11], r12 + pld [r8, r3] + vld1.8 {q8}, [r8]! + vld1.8 {q10}, [r8], r12 + pld [r11, r3] + vld1.8 {q9}, [r11]! + vld1.8 {q11}, [r11], r12 + pld [r8, r3] + vld1.8 {q12}, [r8]! + vld1.8 {q14}, [r8], r12 + pld [r11, r3] + vld1.8 {q13}, [r11]! + vld1.8 {q15}, [r11], r12 + + vst2.8 {q0, q1}, [r7]! + vst2.8 {q2, q3}, [r7]! + vst2.8 {q4, q5}, [r7]! + vst2.8 {q6, q7}, [r7]! + vst2.8 {q8, q9}, [r7]! + vst2.8 {q10, q11}, [r7]! + vst2.8 {q12, q13}, [r7]! + vst2.8 {q14, q15}, [r7]! + + pld [r8, r3] + vld1.8 {q0}, [r8]! + vld1.8 {q2}, [r8], r12 + pld [r11, r3] + vld1.8 {q1}, [r11]! + vld1.8 {q3}, [r11], r12 + pld [r8, r3] + vld1.8 {q4}, [r8]! + vld1.8 {q6}, [r8], r12 + pld [r11, r3] + vld1.8 {q5}, [r11]! + vld1.8 {q7}, [r11], r12 + pld [r8, r3] + vld1.8 {q8}, [r8]! + vld1.8 {q10}, [r8], r12 + pld [r11, r3] + vld1.8 {q9}, [r11]! + vld1.8 {q11}, [r11], r12 + pld [r8, r3] + vld1.8 {q12}, [r8]! + vld1.8 {q14}, [r8], r12 + pld [r11, r3] + vld1.8 {q13}, [r11]! + vld1.8 {q15}, [r11], r12 + + vst2.8 {q0, q1}, [r7]! + vst2.8 {q2, q3}, [r7]! + vst2.8 {q4, q5}, [r7]! + vst2.8 {q6, q7}, [r7]! + vst2.8 {q8, q9}, [r7]! + vst2.8 {q10, q11}, [r7]! + vst2.8 {q12, q13}, [r7]! + vst2.8 {q14, q15}, [r7]! + + pld [r8, r3] + vld1.8 {q0}, [r8]! + vld1.8 {q2}, [r8], r12 + pld [r11, r3] + vld1.8 {q1}, [r11]! + vld1.8 {q3}, [r11], r12 + pld [r8, r3] + vld1.8 {q4}, [r8]! + vld1.8 {q6}, [r8], r12 + pld [r11, r3] + vld1.8 {q5}, [r11]! + vld1.8 {q7}, [r11], r12 + pld [r8, r3] + vld1.8 {q8}, [r8]! + vld1.8 {q10}, [r8], r12 + pld [r11, r3] + vld1.8 {q9}, [r11]! + vld1.8 {q11}, [r11], r12 + pld [r8, r3] + vld1.8 {q12}, [r8]! + vld1.8 {q14}, [r8], r12 + pld [r11, r3] + vld1.8 {q13}, [r11]! + vld1.8 {q15}, [r11], r12 + + vst2.8 {q0, q1}, [r7]! + vst2.8 {q2, q3}, [r7]! + vst2.8 {q4, q5}, [r7]! + vst2.8 {q6, q7}, [r7]! + vst2.8 {q8, q9}, [r7]! + vst2.8 {q10, q11}, [r7]! + vst2.8 {q12, q13}, [r7]! + vst2.8 {q14, q15}, [r7]! + + add r5, r5, #64 @ j = j+64 + cmp r5, r9 @ j<aligned_x_size + blt LOOP_ALIGNED_X_SIZE + + add r6, r6, #32 @ i = i+32 + cmp r6, r10 @ i<aligned_y_size + blt LOOP_ALIGNED_Y_SIZE + + ldr r4, [sp, #40] @ load linear_y_size to r4 + cmp r6, r4 + beq LOOP_LINEAR_Y_SIZE_2_START + +LOOP_LINEAR_Y_SIZE_1: + + mov r5, #0 @ j = 0 +LOOP_ALIGNED_X_SIZE_1: + + bl GET_TILED_OFFSET + + mov r11, r3, asr #1 @ temp1 = linear_x_size/2 + mul r11, r11, r6 @ temp1 = temp1*(i) + add r11, r11, r5, asr #1 @ temp1 = temp1+j/2 + mov r12, r3, asr #1 @ temp2 = linear_x_size/2 + sub r12, r12, #16 @ temp2 = linear_x_size-16 + + add r8, r1, r11 @ linear_addr = linear_src_u+temp1 + add r11, r2, r11 @ temp1 = linear_src_v+temp1 + add r7, r0, r7 @ tiled_addr = tiled_dest+tiled_addr + and r14, r6, #0x1F @ temp3 = i&0x1F@ + mov r14, r14, lsl #6 @ temp3 = temp3*64 + add r7, r7, r14 @ tiled_addr = tiled_addr+temp3 + + pld [r8, r3] + vld1.8 {q0}, [r8]! + vld1.8 {q2}, [r8], r12 + pld [r11, r3] + vld1.8 {q1}, [r11]! + vld1.8 {q3}, [r11], r12 + pld [r8, r3] + vld1.8 {q4}, [r8]! + vld1.8 {q6}, [r8], r12 + pld [r11, r3] + vld1.8 {q5}, [r11]! + vld1.8 {q7}, [r11], r12 + pld [r8, r3] + vld1.8 {q8}, [r8]! + vld1.8 {q10}, [r8], r12 + pld [r11, r3] + vld1.8 {q9}, [r11]! + vld1.8 {q11}, [r11], r12 + pld [r8, r3] + vld1.8 {q12}, [r8]! + vld1.8 {q14}, [r8], r12 + pld [r11, r3] + vld1.8 {q13}, [r11]! + vld1.8 {q15}, [r11], r12 + + vst2.8 {q0, q1}, [r7]! @ store {tiled_addr} + vst2.8 {q2, q3}, [r7]! + vst2.8 {q4, q5}, [r7]! @ store {tiled_addr+64*1} + vst2.8 {q6, q7}, [r7]! + vst2.8 {q8, q9}, [r7]! @ store {tiled_addr+64*2} + vst2.8 {q10, q11}, [r7]! + vst2.8 {q12, q13}, [r7]! @ store {tiled_addr+64*3} + vst2.8 {q14, q15}, [r7]! + + add r5, r5, #64 @ j = j+64 + cmp r5, r9 @ j<aligned_x_size + blt LOOP_ALIGNED_X_SIZE_1 + + add r6, r6, #4 @ i = i+4 + cmp r6, r4 @ i<linear_y_size + blt LOOP_LINEAR_Y_SIZE_1 + +LOOP_LINEAR_Y_SIZE_2_START: + cmp r5, r3 + beq RESTORE_REG + + mov r6, #0 @ i = 0 +LOOP_LINEAR_Y_SIZE_2: + + mov r5, r9 @ j = aligned_x_size +LOOP_LINEAR_X_SIZE_2: + + bl GET_TILED_OFFSET + + mov r11, r3, asr #1 @ temp1 = linear_x_size/2 + mul r11, r11, r6 @ temp1 = temp1*(i) + add r11, r11, r5, asr #1 @ temp1 = temp1+j/2 + mov r12, r3, asr #1 @ temp2 = linear_x_size/2 + sub r12, r12, #1 @ temp2 = linear_x_size-1 + + add r8, r1, r11 @ linear_addr = linear_src_u+temp1 + add r11, r2, r11 @ temp1 = linear_src_v+temp1 + add r7, r0, r7 @ tiled_addr = tiled_dest+tiled_addr + and r14, r6, #0x1F @ temp3 = i&0x1F@ + mov r14, r14, lsl #6 @ temp3 = temp3*64 + add r7, r7, r14 @ tiled_addr = tiled_addr+temp3 + and r14, r5, #0x3F @ temp3 = j&0x3F + add r7, r7, r14 @ tiled_addr = tiled_addr+temp3 + + ldrb r10, [r8], #1 + ldrb r14, [r11], #1 + mov r14, r14, lsl #8 + orr r10, r10, r14 + strh r10, [r7], #2 + ldrb r10, [r8], r12 + ldrb r14, [r11], r12 + mov r14, r14, lsl #8 + orr r10, r10, r14 + strh r10, [r7], #62 + + ldrb r10, [r8], #1 + ldrb r14, [r11], #1 + mov r14, r14, lsl #8 + orr r10, r10, r14 + strh r10, [r7], #2 + ldrb r10, [r8], r12 + ldrb r14, [r11], r12 + mov r14, r14, lsl #8 + orr r10, r10, r14 + strh r10, [r7], #62 + + ldrb r10, [r8], #1 + ldrb r14, [r11], #1 + mov r14, r14, lsl #8 + orr r10, r10, r14 + strh r10, [r7], #2 + ldrb r10, [r8], r12 + ldrb r14, [r11], r12 + mov r14, r14, lsl #8 + orr r10, r10, r14 + strh r10, [r7], #62 + + ldrb r10, [r8], #1 + ldrb r14, [r11], #1 + mov r14, r14, lsl #8 + orr r10, r10, r14 + strh r10, [r7], #2 + ldrb r10, [r8], r12 + ldrb r14, [r11], r12 + mov r14, r14, lsl #8 + orr r10, r10, r14 + strh r10, [r7], #62 + + add r5, r5, #4 @ j = j+4 + cmp r5, r3 @ j<linear_x_size + blt LOOP_LINEAR_X_SIZE_2 + + add r6, r6, #4 @ i = i+4 + cmp r6, r4 @ i<linear_y_size + blt LOOP_LINEAR_Y_SIZE_2 + +RESTORE_REG: + ldmfd sp!, {r4-r12,r15} @ restore registers + +GET_TILED_OFFSET: + stmfd sp!, {r14} + + mov r12, r6, asr #5 @ temp2 = i>>5 + mov r11, r5, asr #6 @ temp1 = j>>6 + + and r14, r12, #0x1 @ if (temp2 & 0x1) + cmp r14, #0x1 + bne GET_TILED_OFFSET_EVEN_FORMULA_1 + +GET_TILED_OFFSET_ODD_FORMULA: + sub r7, r12, #1 @ tiled_addr = temp2-1 + add r14, r3, #127 @ temp3 = linear_x_size+127 + bic r14, r14, #0x7F @ temp3 = (temp3 >>7)<<7 + mov r14, r14, asr #6 @ temp3 = temp3>>6 + mul r7, r7, r14 @ tiled_addr = tiled_addr*temp3 + add r7, r7, r11 @ tiled_addr = tiled_addr+temp1 + add r7, r7, #2 @ tiled_addr = tiled_addr+2 + bic r14, r11, #0x3 @ temp3 = (temp1>>2)<<2 + add r7, r7, r14 @ tiled_addr = tiled_addr+temp3 + mov r7, r7, lsl #11 @ tiled_addr = tiled_addr<<11 + b GET_TILED_OFFSET_RETURN + +GET_TILED_OFFSET_EVEN_FORMULA_1: + add r14, r4, #31 @ temp3 = linear_y_size+31 + bic r14, r14, #0x1F @ temp3 = (temp3>>5)<<5 + sub r14, r14, #32 @ temp3 = temp3 - 32 + cmp r6, r14 @ if (i<(temp3-32)) { + bge GET_TILED_OFFSET_EVEN_FORMULA_2 + add r14, r11, #2 @ temp3 = temp1+2 + bic r14, r14, #3 @ temp3 = (temp3>>2)<<2 + add r7, r11, r14 @ tiled_addr = temp1+temp3 + add r14, r3, #127 @ temp3 = linear_x_size+127 + bic r14, r14, #0x7F @ temp3 = (temp3>>7)<<7 + mov r14, r14, asr #6 @ temp3 = temp3>>6 + mul r12, r12, r14 @ tiled_y_index = tiled_y_index*temp3 + add r7, r7, r12 @ tiled_addr = tiled_addr+tiled_y_index + mov r7, r7, lsl #11 @ + b GET_TILED_OFFSET_RETURN + +GET_TILED_OFFSET_EVEN_FORMULA_2: + add r14, r3, #127 @ temp3 = linear_x_size+127 + bic r14, r14, #0x7F @ temp3 = (temp3>>7)<<7 + mov r14, r14, asr #6 @ temp3 = temp3>>6 + mul r7, r12, r14 @ tiled_addr = temp2*temp3 + add r7, r7, r11 @ tiled_addr = tiled_addr+temp3 + mov r7, r7, lsl #11 @ tiled_addr = tiled_addr<<11@ + +GET_TILED_OFFSET_RETURN: + ldmfd sp!, {r15} @ restore registers + .fnend + diff --git a/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_y_neon.s b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_y_neon.s new file mode 100644 index 0000000..3f8932a --- /dev/null +++ b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_y_neon.s @@ -0,0 +1,451 @@ +/* + * + * Copyright 2011 Samsung Electronics S.LSI Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @file csc_yuv420_nv12t_y_neon.s + * @brief SEC_OMX specific define + * @author ShinWon Lee (shinwon.lee@samsung.com) + * @version 1.0 + * @history + * 2011.7.01 : Create + */ + +/* + * Converts linear data to tiled. + * 1. Y of YUV420P to Y of NV12T + * 2. Y of YUV420S to Y of NV12T + * 3. UV of YUV420S to UV of NV12T + * + * @param nv12t_dest + * Y or UV plane address of NV12T[out] + * + * @param yuv420_src + * Y or UV plane address of YUV420P(S)[in] + * + * @param yuv420_width + * Width of YUV420[in] + * + * @param yuv420_height + * Y: Height of YUV420, UV: Height/2 of YUV420[in] + */ + + .arch armv7-a + .text + .global csc_linear_to_tiled + .type csc_linear_to_tiled, %function +csc_linear_to_tiled: + .fnstart + + @r0 tiled_dest + @r1 linear_src + @r2 linear_x_size + @r3 linear_y_size + @r4 j + @r5 i + @r6 nn(tiled_addr) + @r7 mm(linear_addr) + @r8 aligned_x_size + @r9 aligned_y_size + @r10 temp1 + @r11 temp2 + @r12 temp3 + @r14 temp4 + + stmfd sp!, {r4-r12,r14} @ backup registers + + bic r9, r3, #0x1F @ aligned_y_size = (linear_y_size>>5)<<5 + bic r8, r2, #0x3F @ aligned_x_size = (linear_x_size>>6)<<6 + + mov r5, #0 @ i = 0 +LOOP_ALIGNED_Y_SIZE: + + mov r4, #0 @ j = 0 +LOOP_ALIGNED_X_SIZE: + + bl GET_TILED_OFFSET + + mul r10, r2, r5 @ temp1 = linear_x_size*(i) + add r7, r1, r4 @ linear_addr = linear_src+j + add r7, r7, r10 @ linear_addr = linear_addr+temp1 + sub r10, r2, #32 + + pld [r7, r2, lsl #1] + vld1.8 {q0, q1}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q2, q3}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q4, q5}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q6, q7}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q8, q9}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q10, q11}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q12, q13}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q14, q15}, [r7], r10 + + add r6, r0, r6 @ tiled_addr = tiled_dest+tiled_addr + + vst1.8 {q0, q1}, [r6]! + vst1.8 {q2, q3}, [r6]! + vst1.8 {q4, q5}, [r6]! + vst1.8 {q6, q7}, [r6]! + vst1.8 {q8, q9}, [r6]! + vst1.8 {q10, q11}, [r6]! + vst1.8 {q12, q13}, [r6]! + vst1.8 {q14, q15}, [r6]! + + pld [r7, r2, lsl #1] + vld1.8 {q0, q1}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q2, q3}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q4, q5}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q6, q7}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q8, q9}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q10, q11}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q12, q13}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q14, q15}, [r7], r10 + + vst1.8 {q0, q1}, [r6]! + vst1.8 {q2, q3}, [r6]! + vst1.8 {q4, q5}, [r6]! + vst1.8 {q6, q7}, [r6]! + vst1.8 {q8, q9}, [r6]! + vst1.8 {q10, q11}, [r6]! + vst1.8 {q12, q13}, [r6]! + vst1.8 {q14, q15}, [r6]! + + pld [r7, r2, lsl #1] + vld1.8 {q0, q1}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q2, q3}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q4, q5}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q6, q7}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q8, q9}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q10, q11}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q12, q13}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q14, q15}, [r7], r10 + + vst1.8 {q0, q1}, [r6]! + vst1.8 {q2, q3}, [r6]! + vst1.8 {q4, q5}, [r6]! + vst1.8 {q6, q7}, [r6]! + vst1.8 {q8, q9}, [r6]! + vst1.8 {q10, q11}, [r6]! + vst1.8 {q12, q13}, [r6]! + vst1.8 {q14, q15}, [r6]! + + pld [r7, r2, lsl #1] + vld1.8 {q0, q1}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q2, q3}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q4, q5}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q6, q7}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q8, q9}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q10, q11}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q12, q13}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q14, q15}, [r7], r10 + + vst1.8 {q0, q1}, [r6]! + vst1.8 {q2, q3}, [r6]! + vst1.8 {q4, q5}, [r6]! + vst1.8 {q6, q7}, [r6]! + vst1.8 {q8, q9}, [r6]! + vst1.8 {q10, q11}, [r6]! + vst1.8 {q12, q13}, [r6]! + vst1.8 {q14, q15}, [r6]! + + pld [r7, r2, lsl #1] + vld1.8 {q0, q1}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q2, q3}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q4, q5}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q6, q7}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q8, q9}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q10, q11}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q12, q13}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q14, q15}, [r7], r10 + + vst1.8 {q0, q1}, [r6]! + vst1.8 {q2, q3}, [r6]! + vst1.8 {q4, q5}, [r6]! + vst1.8 {q6, q7}, [r6]! + vst1.8 {q8, q9}, [r6]! + vst1.8 {q10, q11}, [r6]! + vst1.8 {q12, q13}, [r6]! + vst1.8 {q14, q15}, [r6]! + + pld [r7, r2, lsl #1] + vld1.8 {q0, q1}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q2, q3}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q4, q5}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q6, q7}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q8, q9}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q10, q11}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q12, q13}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q14, q15}, [r7], r10 + + vst1.8 {q0, q1}, [r6]! + vst1.8 {q2, q3}, [r6]! + vst1.8 {q4, q5}, [r6]! + vst1.8 {q6, q7}, [r6]! + vst1.8 {q8, q9}, [r6]! + vst1.8 {q10, q11}, [r6]! + vst1.8 {q12, q13}, [r6]! + vst1.8 {q14, q15}, [r6]! + + pld [r7, r2, lsl #1] + vld1.8 {q0, q1}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q2, q3}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q4, q5}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q6, q7}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q8, q9}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q10, q11}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q12, q13}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q14, q15}, [r7], r10 + + vst1.8 {q0, q1}, [r6]! + vst1.8 {q2, q3}, [r6]! + vst1.8 {q4, q5}, [r6]! + vst1.8 {q6, q7}, [r6]! + vst1.8 {q8, q9}, [r6]! + vst1.8 {q10, q11}, [r6]! + vst1.8 {q12, q13}, [r6]! + vst1.8 {q14, q15}, [r6]! + + pld [r7, r2, lsl #1] + vld1.8 {q0, q1}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q2, q3}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q4, q5}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q6, q7}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q8, q9}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q10, q11}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q12, q13}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q14, q15}, [r7], r10 + + vst1.8 {q0, q1}, [r6]! + vst1.8 {q2, q3}, [r6]! + vst1.8 {q4, q5}, [r6]! + vst1.8 {q6, q7}, [r6]! + vst1.8 {q8, q9}, [r6]! + vst1.8 {q10, q11}, [r6]! + vst1.8 {q12, q13}, [r6]! + vst1.8 {q14, q15}, [r6]! + + add r4, r4, #64 @ j = j+64 + cmp r4, r8 @ j<aligned_x_size + blt LOOP_ALIGNED_X_SIZE + + add r5, r5, #32 @ i = i+32 + cmp r5, r9 @ i<aligned_y_size + blt LOOP_ALIGNED_Y_SIZE + + cmp r5, r3 + beq LOOP_LINEAR_Y_SIZE_2_START + +LOOP_LINEAR_Y_SIZE_1: + + mov r4, #0 @ j = 0 +LOOP_ALIGNED_X_SIZE_1: + + bl GET_TILED_OFFSET + + mul r10, r2, r5 @ temp1 = linear_x_size*(i) + add r7, r1, r4 @ linear_addr = linear_src+j + add r7, r7, r10 @ linear_addr = linear_addr+temp1 + sub r10, r2, #32 @ temp1 = linear_x_size-32 + + pld [r7, r2, lsl #1] + vld1.8 {q0, q1}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q2, q3}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q4, q5}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q6, q7}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q8, q9}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q10, q11}, [r7], r10 + pld [r7, r2, lsl #1] + vld1.8 {q12, q13}, [r7]! + pld [r7, r2, lsl #1] + vld1.8 {q14, q15}, [r7], r10 + + add r6, r0, r6 @ tiled_addr = tiled_dest+tiled_addr + and r11, r5, #0x1F @ temp2 = i&0x1F + mov r11, r11, lsl #6 @ temp2 = 64*temp2 + add r6, r6, r11 @ tiled_addr = tiled_addr+temp2 + + vst1.8 {q0, q1}, [r6]! + vst1.8 {q2, q3}, [r6]! + vst1.8 {q4, q5}, [r6]! + vst1.8 {q6, q7}, [r6]! + vst1.8 {q8, q9}, [r6]! + vst1.8 {q10, q11}, [r6]! + vst1.8 {q12, q13}, [r6]! + vst1.8 {q14, q15}, [r6]! + + add r4, r4, #64 @ j = j+64 + cmp r4, r8 @ j<aligned_x_size + blt LOOP_ALIGNED_X_SIZE_1 + + add r5, r5, #4 @ i = i+4 + cmp r5, r3 @ i<linear_y_size + blt LOOP_LINEAR_Y_SIZE_1 + +LOOP_LINEAR_Y_SIZE_2_START: + cmp r4, r2 + beq RESTORE_REG + + mov r5, #0 @ i = 0 +LOOP_LINEAR_Y_SIZE_2: + + mov r4, r8 @ j = aligned_x_size +LOOP_LINEAR_X_SIZE_2: + + bl GET_TILED_OFFSET + + mul r10, r2, r5 @ temp1 = linear_x_size*(i) + add r7, r1, r4 @ linear_addr = linear_src+j + add r7, r7, r10 @ linear_addr = linear_addr+temp1 + + add r6, r0, r6 @ tiled_addr = tiled_dest+tiled_addr + and r11, r5, #0x1F @ temp2 = i&0x1F + mov r11, r11, lsl #6 @ temp2 = 64*temp2 + add r6, r6, r11 @ tiled_addr = tiled_addr+temp2 + and r11, r4, #0x3F @ temp2 = j&0x3F + add r6, r6, r11 @ tiled_addr = tiled_addr+temp2 + + ldr r10, [r7], r2 + ldr r11, [r7], r2 + ldr r12, [r7], r2 + ldr r14, [r7], r2 + str r10, [r6], #64 + str r11, [r6], #64 + str r12, [r6], #64 + str r14, [r6], #64 + + add r4, r4, #4 @ j = j+4 + cmp r4, r2 @ j<linear_x_size + blt LOOP_LINEAR_X_SIZE_2 + + add r5, r5, #4 @ i = i+4 + cmp r5, r3 @ i<linear_y_size + blt LOOP_LINEAR_Y_SIZE_2 + +RESTORE_REG: + ldmfd sp!, {r4-r12,r15} @ restore registers + +GET_TILED_OFFSET: + + mov r11, r5, asr #5 @ temp2 = i>>5 + mov r10, r4, asr #6 @ temp1 = j>>6 + + and r12, r11, #0x1 @ if (temp2 & 0x1) + cmp r12, #0x1 + bne GET_TILED_OFFSET_EVEN_FORMULA_1 + +GET_TILED_OFFSET_ODD_FORMULA: + sub r6, r11, #1 @ tiled_addr = temp2-1 + add r12, r2, #127 @ temp3 = linear_x_size+127 + bic r12, r12, #0x7F @ temp3 = (temp3 >>7)<<7 + mov r12, r12, asr #6 @ temp3 = temp3>>6 + mul r6, r6, r12 @ tiled_addr = tiled_addr*temp3 + add r6, r6, r10 @ tiled_addr = tiled_addr+temp1 + add r6, r6, #2 @ tiled_addr = tiled_addr+2 + bic r12, r10, #0x3 @ temp3 = (temp1>>2)<<2 + add r6, r6, r12 @ tiled_addr = tiled_addr+temp3 + mov r6, r6, lsl #11 @ tiled_addr = tiled_addr<<11 + b GET_TILED_OFFSET_RETURN + +GET_TILED_OFFSET_EVEN_FORMULA_1: + add r12, r3, #31 @ temp3 = linear_y_size+31 + bic r12, r12, #0x1F @ temp3 = (temp3>>5)<<5 + sub r12, r12, #32 @ temp3 = temp3 - 32 + cmp r5, r12 @ if (i<(temp3-32)) { + bge GET_TILED_OFFSET_EVEN_FORMULA_2 + add r12, r10, #2 @ temp3 = temp1+2 + bic r12, r12, #3 @ temp3 = (temp3>>2)<<2 + add r6, r10, r12 @ tiled_addr = temp1+temp3 + add r12, r2, #127 @ temp3 = linear_x_size+127 + bic r12, r12, #0x7F @ temp3 = (temp3>>7)<<7 + mov r12, r12, asr #6 @ temp3 = temp3>>6 + mul r11, r11, r12 @ tiled_y_index = tiled_y_index*temp3 + add r6, r6, r11 @ tiled_addr = tiled_addr+tiled_y_index + mov r6, r6, lsl #11 @ + b GET_TILED_OFFSET_RETURN + +GET_TILED_OFFSET_EVEN_FORMULA_2: + add r12, r2, #127 @ temp3 = linear_x_size+127 + bic r12, r12, #0x7F @ temp3 = (temp3>>7)<<7 + mov r12, r12, asr #6 @ temp3 = temp3>>6 + mul r6, r11, r12 @ tiled_addr = temp2*temp3 + add r6, r6, r10 @ tiled_addr = tiled_addr+temp3 + mov r6, r6, lsl #11 @ tiled_addr = tiled_addr<<11@ + +GET_TILED_OFFSET_RETURN: + mov pc, lr + .fnend + diff --git a/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/dec/Android.mk b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/dec/Android.mk new file mode 100644 index 0000000..c15cd41 --- /dev/null +++ b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/dec/Android.mk @@ -0,0 +1,26 @@ + +LOCAL_PATH := $(call my-dir) +include $(CLEAR_VARS) + +LOCAL_MODULE_TAGS := optional + +LOCAL_SRC_FILES := \ + src/SsbSipMfcDecAPI.c + +LOCAL_MODULE := libsecmfcdecapi + + + +LOCAL_CFLAGS := + +LOCAL_ARM_MODE := arm + +LOCAL_STATIC_LIBRARIES := + +LOCAL_SHARED_LIBRARIES := liblog + +LOCAL_C_INCLUDES := \ + $(SEC_CODECS)/video/mfc_c110/include + +include $(BUILD_STATIC_LIBRARY) + diff --git a/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/dec/src/SsbSipMfcDecAPI.c b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/dec/src/SsbSipMfcDecAPI.c new file mode 100644 index 0000000..6c64ef8 --- /dev/null +++ b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/dec/src/SsbSipMfcDecAPI.c @@ -0,0 +1,518 @@ +/* + * Copyright 2010 Samsung Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <fcntl.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <utils/Log.h> + +#include "mfc_interface.h" +#include "SsbSipMfcApi.h" + +#define _MFCLIB_MAGIC_NUMBER 0x92241000 + +#define USR_DATA_START_CODE (0x000001B2) +#define VOP_START_CODE (0x000001B6) +#define MP4_START_CODE (0x000001) + +static void getAByte(char *buff, int *code) +{ + int byte; + + *code = (*code << 8); + byte = (int)*buff; + byte &= 0xFF; + *code |= byte; +} + +static mfc_packed_mode isPBPacked(_MFCLIB *pCtx, int length) +{ + char *strmBuffer = NULL; + char *strmBufferEnd = NULL; + int startCode = 0xFFFFFFFF; + + strmBuffer = (char *)pCtx->virStrmBuf; + strmBufferEnd = (char *)pCtx->virStrmBuf + length; + + while (1) { + while (startCode != USR_DATA_START_CODE) { + if (startCode == VOP_START_CODE) { + ALOGV("isPBPacked: VOP START Found !!.....return\n"); + ALOGV("isPBPacked: Non Packed PB\n"); + return MFC_UNPACKED_PB; + } + getAByte(strmBuffer, &startCode); + strmBuffer++; + if (strmBuffer >= strmBufferEnd) + goto out; + } + ALOGV("isPBPacked: User Data Found !!\n"); + + do { + if (*strmBuffer == 'p') { + ALOGI("isPBPacked: Packed PB\n"); + return MFC_PACKED_PB; + } + getAByte(strmBuffer, &startCode); + strmBuffer++; + if (strmBuffer >= strmBufferEnd) + goto out; + } while ((startCode >> 8) != MP4_START_CODE); + } + +out: + ALOGV("isPBPacked: Non Packed PB\n"); + return MFC_UNPACKED_PB; +} + +void *SsbSipMfcDecOpen(void *value) +{ + int hMFCOpen; + unsigned int mapped_addr; + _MFCLIB *pCTX; + mfc_common_args DecArg; + int ret_code; + + pCTX = (_MFCLIB *)malloc(sizeof(_MFCLIB)); + if (pCTX == NULL) { + ALOGE("SsbSipMfcDecOpen: malloc failed.\n"); + return NULL; + } + memset(pCTX, 0, sizeof(_MFCLIB)); + + hMFCOpen = open(S5PC110_MFC_DEV_NAME, O_RDWR | O_NDELAY); + if (hMFCOpen < 0) { + ALOGE("SsbSipMfcDecOpen: MFC Open failure\n"); + return NULL; + } + + if (*(unsigned int *)value == NO_CACHE || + *(unsigned int *)value == CACHE) { + DecArg.args.buf_type = *(unsigned int *)value; + ret_code = ioctl(hMFCOpen, IOCTL_MFC_BUF_CACHE, &DecArg); + if (DecArg.ret_code != MFC_RET_OK) { + ALOGE("SsbSipMfcDecOpenExt: IOCTL_MFC_BUF_CACHE (%d) failed\n", DecArg.ret_code); + } + } else { + ALOGE("SsbSipMfcDecOpenExt: value is invalid, value: %d\n", *(int *)value); + } + + mapped_addr = (unsigned int)mmap(0, MMAP_BUFFER_SIZE_MMAP, PROT_READ | PROT_WRITE, MAP_SHARED, hMFCOpen, 0); + if (!mapped_addr) { + ALOGE("SsbSipMfcDecOpen: FIMV5.0 driver address mapping failed\n"); + return NULL; + } + + pCTX->magic = _MFCLIB_MAGIC_NUMBER; + pCTX->hMFC = hMFCOpen; + pCTX->mapped_addr = mapped_addr; + pCTX->inter_buff_status = MFC_USE_NONE; + + return (void *)pCTX; +} + +SSBSIP_MFC_ERROR_CODE SsbSipMfcDecInit(void *openHandle, SSBSIP_MFC_CODEC_TYPE codec_type, int Frameleng) +{ + int ret_code; + int packedPB = MFC_UNPACKED_PB; + mfc_common_args DecArg; + _MFCLIB *pCTX; + + if (openHandle == NULL) { + ALOGE("SsbSipMfcDecSetConfig: openHandle is NULL\n"); + return MFC_RET_INVALID_PARAM; + } + + pCTX = (_MFCLIB *)openHandle; + memset(&DecArg, 0x00, sizeof(DecArg)); + + if ((codec_type != MPEG4_DEC) && + (codec_type != H264_DEC) && + (codec_type != H263_DEC) && + (codec_type != MPEG1_DEC) && + (codec_type != MPEG2_DEC) && + (codec_type != FIMV1_DEC) && + (codec_type != FIMV2_DEC) && + (codec_type != FIMV3_DEC) && + (codec_type != FIMV4_DEC) && + (codec_type != XVID_DEC) && + (codec_type != VC1RCV_DEC) && + (codec_type != VC1_DEC)) { + ALOGE("SsbSipMfcDecOpen: Undefined codec type.\n"); + return MFC_RET_INVALID_PARAM; + } + + pCTX->codec_type = codec_type; + + if ((pCTX->codec_type == MPEG4_DEC) || + (pCTX->codec_type == FIMV1_DEC) || + (pCTX->codec_type == FIMV2_DEC) || + (pCTX->codec_type == FIMV3_DEC) || + (pCTX->codec_type == FIMV4_DEC) || + (pCTX->codec_type == XVID_DEC)) + packedPB = isPBPacked(pCTX, Frameleng); + + /* init args */ + DecArg.args.dec_init.in_codec_type = pCTX->codec_type; + DecArg.args.dec_init.in_strm_size = Frameleng; + DecArg.args.dec_init.in_strm_buf = pCTX->phyStrmBuf; + DecArg.args.dec_init.in_packed_PB = packedPB; + + /* mem alloc args */ + DecArg.args.dec_init.in_mapped_addr = pCTX->mapped_addr; + + ret_code = ioctl(pCTX->hMFC, IOCTL_MFC_DEC_INIT, &DecArg); + if (DecArg.ret_code != MFC_RET_OK) { + ALOGE("SsbSipMfcDecInit: IOCTL_MFC_DEC_INIT (%d) failed\n", DecArg.ret_code); + return MFC_RET_DEC_INIT_FAIL; + } + + pCTX->decOutInfo.img_width = DecArg.args.dec_init.out_img_width; + pCTX->decOutInfo.img_height = DecArg.args.dec_init.out_img_height; + pCTX->decOutInfo.buf_width = DecArg.args.dec_init.out_buf_width; + pCTX->decOutInfo.buf_height = DecArg.args.dec_init.out_buf_height; + + /* by RainAde : crop information */ + pCTX->decOutInfo.crop_top_offset = DecArg.args.dec_init.out_crop_top_offset; + pCTX->decOutInfo.crop_bottom_offset = DecArg.args.dec_init.out_crop_bottom_offset; + pCTX->decOutInfo.crop_left_offset = DecArg.args.dec_init.out_crop_left_offset; + pCTX->decOutInfo.crop_right_offset = DecArg.args.dec_init.out_crop_right_offset; + + pCTX->virFrmBuf.luma = DecArg.args.dec_init.out_u_addr.luma; + pCTX->virFrmBuf.chroma = DecArg.args.dec_init.out_u_addr.chroma; + + pCTX->phyFrmBuf.luma = DecArg.args.dec_init.out_p_addr.luma; + pCTX->phyFrmBuf.chroma = DecArg.args.dec_init.out_p_addr.chroma; + pCTX->sizeFrmBuf.luma = DecArg.args.dec_init.out_frame_buf_size.luma; + pCTX->sizeFrmBuf.chroma = DecArg.args.dec_init.out_frame_buf_size.chroma; + pCTX->inter_buff_status |= MFC_USE_YUV_BUFF; + + return MFC_RET_OK; +} + +SSBSIP_MFC_ERROR_CODE SsbSipMfcDecExe(void *openHandle, int lengthBufFill) +{ + int ret_code; + int Yoffset; + int Coffset; + _MFCLIB *pCTX; + mfc_common_args DecArg; + + if (openHandle == NULL) { + ALOGE("SsbSipMfcDecExe: openHandle is NULL\n"); + return MFC_RET_INVALID_PARAM; + } + + if ((lengthBufFill < 0) || (lengthBufFill > MAX_DECODER_INPUT_BUFFER_SIZE)) { + ALOGE("SsbSipMfcDecExe: lengthBufFill is invalid. (lengthBufFill=%d)\n", lengthBufFill); + return MFC_RET_INVALID_PARAM; + } + + pCTX = (_MFCLIB *)openHandle; + memset(&DecArg, 0x00, sizeof(DecArg)); + + DecArg.args.dec_exe.in_codec_type = pCTX->codec_type; + DecArg.args.dec_exe.in_strm_buf = pCTX->phyStrmBuf; + DecArg.args.dec_exe.in_strm_size = lengthBufFill; + DecArg.args.dec_exe.in_frm_buf.luma = pCTX->phyFrmBuf.luma; + DecArg.args.dec_exe.in_frm_buf.chroma = pCTX->phyFrmBuf.chroma; + DecArg.args.dec_exe.in_frm_size.luma = pCTX->sizeFrmBuf.luma; + DecArg.args.dec_exe.in_frm_size.chroma = pCTX->sizeFrmBuf.chroma; + DecArg.args.dec_exe.in_frametag = pCTX->in_frametag; + + ret_code = ioctl(pCTX->hMFC, IOCTL_MFC_DEC_EXE, &DecArg); + if (DecArg.ret_code != MFC_RET_OK) { + ALOGE("SsbSipMfcDecExe: IOCTL_MFC_DEC_EXE failed(ret : %d)\n", DecArg.ret_code); + return MFC_RET_DEC_EXE_ERR; + } + + Yoffset = DecArg.args.dec_exe.out_display_Y_addr - DecArg.args.dec_exe.in_frm_buf.luma; + Coffset = DecArg.args.dec_exe.out_display_C_addr - DecArg.args.dec_exe.in_frm_buf.chroma; + + pCTX->decOutInfo.YPhyAddr = (void *)(DecArg.args.dec_exe.out_display_Y_addr); + pCTX->decOutInfo.CPhyAddr = (void *)(DecArg.args.dec_exe.out_display_C_addr); + pCTX->decOutInfo.YVirAddr = (void *)(pCTX->virFrmBuf.luma + Yoffset); + pCTX->decOutInfo.CVirAddr = (void *)(pCTX->virFrmBuf.chroma + Coffset); + pCTX->decOutInfo.timestamp_top = DecArg.args.dec_exe.out_timestamp_top; + pCTX->decOutInfo.timestamp_bottom = DecArg.args.dec_exe.out_timestamp_bottom; + pCTX->decOutInfo.consumedByte = DecArg.args.dec_exe.out_consume_bytes; + pCTX->decOutInfo.res_change = DecArg.args.dec_exe.out_res_change; + pCTX->decOutInfo.crop_top_offset = DecArg.args.dec_exe.out_crop_top_offset; + pCTX->decOutInfo.crop_bottom_offset = DecArg.args.dec_exe.out_crop_bottom_offset; + pCTX->decOutInfo.crop_left_offset = DecArg.args.dec_exe.out_crop_left_offset; + pCTX->decOutInfo.crop_right_offset = DecArg.args.dec_exe.out_crop_right_offset; + pCTX->out_frametag_top = DecArg.args.dec_exe.out_frametag_top; + pCTX->out_frametag_bottom = DecArg.args.dec_exe.out_frametag_bottom; + pCTX->displayStatus = DecArg.args.dec_exe.out_display_status; + + return MFC_RET_OK; +} + +SSBSIP_MFC_ERROR_CODE SsbSipMfcDecClose(void *openHandle) +{ + int ret_code; + _MFCLIB *pCTX; + mfc_common_args free_arg; + + if (openHandle == NULL) { + ALOGE("SsbSipMfcDecClose: openHandle is NULL\n"); + return MFC_RET_INVALID_PARAM; + } + + pCTX = (_MFCLIB *)openHandle; + + if (pCTX->inter_buff_status & MFC_USE_YUV_BUFF) { + free_arg.args.mem_free.u_addr = pCTX->virFrmBuf.luma; + ret_code = ioctl(pCTX->hMFC, IOCTL_MFC_FREE_BUF, &free_arg); + free_arg.args.mem_free.u_addr = pCTX->virFrmBuf.chroma; + ret_code = ioctl(pCTX->hMFC, IOCTL_MFC_FREE_BUF, &free_arg); + } + + if (pCTX->inter_buff_status & MFC_USE_STRM_BUFF) { + free_arg.args.mem_free.u_addr = pCTX->virStrmBuf; + ret_code = ioctl(pCTX->hMFC, IOCTL_MFC_FREE_BUF, &free_arg); + } + + pCTX->inter_buff_status = MFC_USE_NONE; + + munmap((void *)pCTX->mapped_addr, MMAP_BUFFER_SIZE_MMAP); + close(pCTX->hMFC); + free(pCTX); + + return MFC_RET_OK; +} + +void *SsbSipMfcDecGetInBuf(void *openHandle, void **phyInBuf, int inputBufferSize) +{ + int ret_code; + _MFCLIB *pCTX; + mfc_common_args user_addr_arg, phys_addr_arg; + + if (inputBufferSize < 0) { + ALOGE("SsbSipMfcDecGetInBuf: inputBufferSize = %d is invalid\n", inputBufferSize); + return NULL; + } + + if (openHandle == NULL) { + ALOGE("SsbSipMfcDecGetInBuf: openHandle is NULL\n"); + return NULL; + } + + pCTX = (_MFCLIB *)openHandle; + + user_addr_arg.args.mem_alloc.codec_type = pCTX->codec_type; + user_addr_arg.args.mem_alloc.buff_size = inputBufferSize; + user_addr_arg.args.mem_alloc.mapped_addr = pCTX->mapped_addr; + ret_code = ioctl(pCTX->hMFC, IOCTL_MFC_GET_IN_BUF, &user_addr_arg); + if (ret_code < 0) { + ALOGE("SsbSipMfcDecGetInBuf: IOCTL_MFC_GET_IN_BUF failed\n"); + return NULL; + } + pCTX->virStrmBuf = user_addr_arg.args.mem_alloc.out_uaddr; + pCTX->phyStrmBuf = user_addr_arg.args.mem_alloc.out_paddr; + pCTX->sizeStrmBuf = inputBufferSize; + pCTX->inter_buff_status |= MFC_USE_STRM_BUFF; + + *phyInBuf = (void *)pCTX->phyStrmBuf; + + return (void *)pCTX->virStrmBuf; +} + +SSBSIP_MFC_ERROR_CODE SsbSipMfcDecSetInBuf(void *openHandle, void *phyInBuf, void *virInBuf, int inputBufferSize) +{ + _MFCLIB *pCTX; + + if (openHandle == NULL) { + ALOGE("SsbSipMfcDecSetInBuf: openHandle is NULL\n"); + return MFC_RET_INVALID_PARAM; + } + + pCTX = (_MFCLIB *)openHandle; + + pCTX->phyStrmBuf = (int)phyInBuf; + pCTX->virStrmBuf = (int)virInBuf; + pCTX->sizeStrmBuf = inputBufferSize; + return MFC_RET_OK; +} + +SSBSIP_MFC_DEC_OUTBUF_STATUS SsbSipMfcDecGetOutBuf(void *openHandle, SSBSIP_MFC_DEC_OUTPUT_INFO *output_info) +{ + _MFCLIB *pCTX; + + if (openHandle == NULL) { + ALOGE("SsbSipMfcDecGetOutBuf: openHandle is NULL\n"); + return MFC_GETOUTBUF_DISPLAY_END; + } + + pCTX = (_MFCLIB *)openHandle; + + output_info->YPhyAddr = pCTX->decOutInfo.YPhyAddr; + output_info->CPhyAddr = pCTX->decOutInfo.CPhyAddr; + + output_info->YVirAddr = pCTX->decOutInfo.YVirAddr; + output_info->CVirAddr = pCTX->decOutInfo.CVirAddr; + + output_info->img_width = pCTX->decOutInfo.img_width; + output_info->img_height= pCTX->decOutInfo.img_height; + + output_info->buf_width = pCTX->decOutInfo.buf_width; + output_info->buf_height= pCTX->decOutInfo.buf_height; + + /* by RainAde : for crop information */ + output_info->crop_top_offset = pCTX->decOutInfo.crop_top_offset; + output_info->crop_bottom_offset= pCTX->decOutInfo.crop_bottom_offset; + output_info->crop_left_offset = pCTX->decOutInfo.crop_left_offset; + output_info->crop_right_offset= pCTX->decOutInfo.crop_right_offset; + + if (pCTX->displayStatus == 0) + return MFC_GETOUTBUF_DISPLAY_END; + else if (pCTX->displayStatus == 1) + return MFC_GETOUTBUF_DISPLAY_DECODING; + else if (pCTX->displayStatus == 2) + return MFC_GETOUTBUF_DISPLAY_ONLY; + else + return MFC_GETOUTBUF_DECODING_ONLY; +} + +SSBSIP_MFC_ERROR_CODE SsbSipMfcDecSetConfig(void *openHandle, SSBSIP_MFC_DEC_CONF conf_type, void *value) +{ + int ret_code; + _MFCLIB *pCTX; + mfc_common_args DecArg; + SSBSIP_MFC_IMG_RESOLUTION *img_resolution; + + if (openHandle == NULL) { + ALOGE("SsbSipMfcDecSetConfig: openHandle is NULL\n"); + return MFC_RET_INVALID_PARAM; + } + + if (value == NULL) { + ALOGE("SsbSipMfcDecSetConfig: value is NULL\n"); + return MFC_RET_INVALID_PARAM; + } + + pCTX = (_MFCLIB *)openHandle; + memset(&DecArg, 0x00, sizeof(DecArg)); + + switch (conf_type) { + case MFC_DEC_SETCONF_POST_ENABLE: + case MFC_DEC_SETCONF_EXTRA_BUFFER_NUM: + case MFC_DEC_SETCONF_DISPLAY_DELAY: + case MFC_DEC_SETCONF_IS_LAST_FRAME: + case MFC_DEC_SETCONF_SLICE_ENABLE: + case MFC_DEC_SETCONF_CRC_ENABLE: + DecArg.args.set_config.in_config_param = conf_type; + DecArg.args.set_config.in_config_value[0] = *((unsigned int *)value); + DecArg.args.set_config.in_config_value[1] = 0; + break; + + case MFC_DEC_SETCONF_FIMV1_WIDTH_HEIGHT: + img_resolution = (SSBSIP_MFC_IMG_RESOLUTION *)value; + DecArg.args.set_config.in_config_param = conf_type; + DecArg.args.set_config.in_config_value[0] = img_resolution->width; + DecArg.args.set_config.in_config_value[1] = img_resolution->height; + break; + + case MFC_DEC_SETCONF_FRAME_TAG: + pCTX->in_frametag = *((int *)value); + return MFC_RET_OK; + + default: + ALOGE("SsbSipMfcDecSetConfig: No such conf_type is supported.\n"); + return MFC_RET_INVALID_PARAM; + } + + ret_code = ioctl(pCTX->hMFC, IOCTL_MFC_SET_CONFIG, &DecArg); + if (DecArg.ret_code != MFC_RET_OK) { + ALOGE("SsbSipMfcDecSetConfig: IOCTL_MFC_SET_CONFIG failed(ret : %d, conf_type: %d)\n", DecArg.ret_code, conf_type); + return MFC_RET_DEC_SET_CONF_FAIL; + } + + return MFC_RET_OK; +} + +SSBSIP_MFC_ERROR_CODE SsbSipMfcDecGetConfig(void *openHandle, SSBSIP_MFC_DEC_CONF conf_type, void *value) +{ + int ret_code; + _MFCLIB *pCTX; + mfc_common_args DecArg; + + SSBSIP_MFC_IMG_RESOLUTION *img_resolution; + SSBSIP_MFC_CROP_INFORMATION *crop_information; + MFC_CRC_DATA *crc_data; + + if (openHandle == NULL) { + ALOGE("SsbSipMfcDecGetConfig: openHandle is NULL\n"); + return MFC_RET_FAIL; + } + + if (value == NULL) { + ALOGE("SsbSipMfcDecGetConfig: value is NULL\n"); + return MFC_RET_FAIL; + } + + pCTX = (_MFCLIB *)openHandle; + memset(&DecArg, 0x00, sizeof(DecArg)); + + switch (conf_type) { + case MFC_DEC_GETCONF_BUF_WIDTH_HEIGHT: + img_resolution = (SSBSIP_MFC_IMG_RESOLUTION *)value; + img_resolution->width = pCTX->decOutInfo.img_width; + img_resolution->height = pCTX->decOutInfo.img_height; + img_resolution->buf_width = pCTX->decOutInfo.buf_width; + img_resolution->buf_height = pCTX->decOutInfo.buf_height; + break; + + /* Added by RainAde */ + case MFC_DEC_GETCONF_CROP_INFO: + crop_information = (SSBSIP_MFC_CROP_INFORMATION*)value; + crop_information->crop_top_offset = pCTX->decOutInfo.crop_top_offset; + crop_information->crop_bottom_offset= pCTX->decOutInfo.crop_bottom_offset; + crop_information->crop_left_offset = pCTX->decOutInfo.crop_left_offset; + crop_information->crop_right_offset= pCTX->decOutInfo.crop_right_offset; + break; + + case MFC_DEC_GETCONF_CRC_DATA: + crc_data = (MFC_CRC_DATA *)value; + + DecArg.args.get_config.in_config_param = conf_type; + + ret_code = ioctl(pCTX->hMFC, IOCTL_MFC_GET_CONFIG, &DecArg); + if (DecArg.ret_code != MFC_RET_OK) { + ALOGE("SsbSipMfcDecGetConfig: IOCTL_MFC_GET_CONFIG failed(ret : %d, conf_type: %d)\n", DecArg.ret_code, conf_type); + return MFC_RET_DEC_GET_CONF_FAIL; + } + crc_data->luma0 = DecArg.args.get_config.out_config_value[0]; + crc_data->chroma0 = DecArg.args.get_config.out_config_value[1]; + break; + + case MFC_DEC_GETCONF_FRAME_TAG: + *((unsigned int *)value) = pCTX->out_frametag_top; + break; + + default: + ALOGE("SsbSipMfcDecGetConfig: No such conf_type is supported.\n"); + return MFC_RET_INVALID_PARAM; + } + + return MFC_RET_OK; +} diff --git a/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/enc/Android.mk b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/enc/Android.mk new file mode 100644 index 0000000..b57346b --- /dev/null +++ b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/enc/Android.mk @@ -0,0 +1,26 @@ + +LOCAL_PATH := $(call my-dir) +include $(CLEAR_VARS) + +LOCAL_MODULE_TAGS := optional + +LOCAL_SRC_FILES := \ + src/SsbSipMfcEncAPI.c + +LOCAL_MODULE := libsecmfcencapi + + + +LOCAL_CFLAGS := -DUSE_FIMC_FRAME_BUFFER + +LOCAL_ARM_MODE := arm + +LOCAL_STATIC_LIBRARIES := + +LOCAL_SHARED_LIBRARIES := liblog + +LOCAL_C_INCLUDES := \ + $(SEC_CODECS)/video/mfc_c110/include + +include $(BUILD_STATIC_LIBRARY) + diff --git a/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/enc/src/SsbSipMfcEncAPI.c b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/enc/src/SsbSipMfcEncAPI.c new file mode 100644 index 0000000..a27fcc0 --- /dev/null +++ b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/enc/src/SsbSipMfcEncAPI.c @@ -0,0 +1,686 @@ +/* + * Copyright 2010 Samsung Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <fcntl.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <utils/Log.h> + +#include "SsbSipMfcApi.h" +#include "mfc_interface.h" + +#define _MFCLIB_MAGIC_NUMBER 0x92241001 + +void *SsbSipMfcEncOpen(void *value) +{ + int hMFCOpen; + _MFCLIB *pCTX; + unsigned int mapped_addr; + mfc_common_args EncArg; + int ret_code; + + hMFCOpen = open(S5PC110_MFC_DEV_NAME, O_RDWR | O_NDELAY); + if (hMFCOpen < 0) { + ALOGE("SsbSipMfcEncOpen: MFC Open failure\n"); + return NULL; + } + + pCTX = (_MFCLIB *)malloc(sizeof(_MFCLIB)); + if (pCTX == NULL) { + ALOGE("SsbSipMfcEncOpen: malloc failed.\n"); + close(hMFCOpen); + return NULL; + } + + if (*(unsigned int *)value == NO_CACHE || + *(unsigned int *)value == CACHE) { + EncArg.args.buf_type = *(unsigned int *)value; + ret_code = ioctl(hMFCOpen, IOCTL_MFC_BUF_CACHE, &EncArg); + if (EncArg.ret_code != MFC_RET_OK) { + ALOGE("SsbSipMfcDecOpenExt: IOCTL_MFC_BUF_CACHE (%d) failed\n", EncArg.ret_code); + } + } else { + ALOGE("SsbSipMfcDecOpenExt: value is invalid, value: %d\n", *(int *)value); + } + + mapped_addr = (unsigned int)mmap(0, MMAP_BUFFER_SIZE_MMAP, PROT_READ | PROT_WRITE, MAP_SHARED, hMFCOpen, 0); + if (!mapped_addr) { + ALOGE("SsbSipMfcEncOpen: FIMV5.0 driver address mapping failed\n"); + return NULL; + } + + memset(pCTX, 0, sizeof(_MFCLIB)); + + pCTX->magic = _MFCLIB_MAGIC_NUMBER; + pCTX->hMFC = hMFCOpen; + pCTX->mapped_addr = mapped_addr; + pCTX->inter_buff_status = MFC_USE_NONE; + + return (void *)pCTX; +} + +SSBSIP_MFC_ERROR_CODE SsbSipMfcEncInit(void *openHandle, void *param) +{ + int ret_code; + int dpbBufSize; + + _MFCLIB *pCTX; + mfc_common_args EncArg; + mfc_common_args user_addr_arg, phys_addr_arg; + SSBSIP_MFC_ENC_H264_PARAM *h264_arg; + SSBSIP_MFC_ENC_MPEG4_PARAM *mpeg4_arg; + SSBSIP_MFC_ENC_H263_PARAM *h263_arg; + SSBSIP_MFC_CODEC_TYPE codec_type; + + pCTX = (_MFCLIB *)openHandle; + memset(&EncArg, 0, sizeof(mfc_common_args)); + + ALOGV("SsbSipMfcEncInit: Encode Init start\n"); + + mpeg4_arg = (SSBSIP_MFC_ENC_MPEG4_PARAM *)param; + codec_type = mpeg4_arg->codecType; + + if ((codec_type != MPEG4_ENC) && + (codec_type != H264_ENC) && + (codec_type != H263_ENC)) { + ALOGE("SsbSipMfcEncOpen: Undefined codec type.\n"); + return MFC_RET_INVALID_PARAM; + } + + pCTX->codec_type = codec_type; + + switch (pCTX->codec_type) { + case MPEG4_ENC: + ALOGV("SsbSipMfcEncInit: MPEG4 Encode\n"); + mpeg4_arg = (SSBSIP_MFC_ENC_MPEG4_PARAM *)param; + + pCTX->width = mpeg4_arg->SourceWidth; + pCTX->height = mpeg4_arg->SourceHeight; + break; + + case H263_ENC: + ALOGV("SsbSipMfcEncInit: H263 Encode\n"); + h263_arg = (SSBSIP_MFC_ENC_H263_PARAM *)param; + + pCTX->width = h263_arg->SourceWidth; + pCTX->height = h263_arg->SourceHeight; + break; + + case H264_ENC: + ALOGV("SsbSipMfcEncInit: H264 Encode\n"); + h264_arg = (SSBSIP_MFC_ENC_H264_PARAM *)param; + + pCTX->width = h264_arg->SourceWidth; + pCTX->height = h264_arg->SourceHeight; + break; + + default: + break; + } + + switch (pCTX->codec_type) { + case MPEG4_ENC: + mpeg4_arg = (SSBSIP_MFC_ENC_MPEG4_PARAM*)param; + + EncArg.args.enc_init_mpeg4.in_codec_type = pCTX->codec_type; + EncArg.args.enc_init_mpeg4.in_profile_level = ENC_PROFILE_LEVEL(mpeg4_arg->ProfileIDC, mpeg4_arg->LevelIDC); + + EncArg.args.enc_init_mpeg4.in_width = mpeg4_arg->SourceWidth; + EncArg.args.enc_init_mpeg4.in_height = mpeg4_arg->SourceHeight; + EncArg.args.enc_init_mpeg4.in_gop_num = mpeg4_arg->IDRPeriod; + if (mpeg4_arg->DisableQpelME) + EncArg.args.enc_init_mpeg4.in_qpelME_enable = 0; + else + EncArg.args.enc_init_mpeg4.in_qpelME_enable = 1; + + EncArg.args.enc_init_mpeg4.in_MS_mode = mpeg4_arg->SliceMode; + EncArg.args.enc_init_mpeg4.in_MS_size = mpeg4_arg->SliceArgument; + + if (mpeg4_arg->NumberBFrames > 2) { + ALOGE("SsbSipMfcEncInit: No such BframeNum is supported.\n"); + return MFC_RET_INVALID_PARAM; + } + EncArg.args.enc_init_mpeg4.in_BframeNum = mpeg4_arg->NumberBFrames; + EncArg.args.enc_init_mpeg4.in_mb_refresh = mpeg4_arg->RandomIntraMBRefresh; + + /* rate control*/ + EncArg.args.enc_init_mpeg4.in_RC_frm_enable = mpeg4_arg->EnableFRMRateControl; + if ((mpeg4_arg->QSCodeMin > 51) || (mpeg4_arg->QSCodeMax > 51)) { + ALOGE("SsbSipMfcEncInit: No such Min/Max QP is supported.\n"); + return MFC_RET_INVALID_PARAM; + } + EncArg.args.enc_init_mpeg4.in_RC_qbound = ENC_RC_QBOUND(mpeg4_arg->QSCodeMin, mpeg4_arg->QSCodeMax); + EncArg.args.enc_init_mpeg4.in_RC_rpara = mpeg4_arg->CBRPeriodRf; + + /* pad control */ + EncArg.args.enc_init_mpeg4.in_pad_ctrl_on = mpeg4_arg->PadControlOn; + if ((mpeg4_arg->LumaPadVal > 255) || (mpeg4_arg->CbPadVal > 255) || (mpeg4_arg->CrPadVal > 255)) { + ALOGE("SsbSipMfcEncInit: No such Pad value is supported.\n"); + return MFC_RET_INVALID_PARAM; + } + EncArg.args.enc_init_mpeg4.in_luma_pad_val = mpeg4_arg->LumaPadVal; + EncArg.args.enc_init_mpeg4.in_cb_pad_val = mpeg4_arg->CbPadVal; + EncArg.args.enc_init_mpeg4.in_cr_pad_val = mpeg4_arg->CrPadVal; + EncArg.args.enc_init_mpeg4.in_frame_map = mpeg4_arg->FrameMap; + + EncArg.args.enc_init_mpeg4.in_time_increament_res = mpeg4_arg->TimeIncreamentRes; + EncArg.args.enc_init_mpeg4.in_time_vop_time_increament = mpeg4_arg->VopTimeIncreament; + EncArg.args.enc_init_mpeg4.in_RC_framerate = (mpeg4_arg->TimeIncreamentRes / mpeg4_arg->VopTimeIncreament); + EncArg.args.enc_init_mpeg4.in_RC_bitrate = mpeg4_arg->Bitrate; + if ((mpeg4_arg->FrameQp > 51) || (mpeg4_arg->FrameQp_P) > 51 || (mpeg4_arg->FrameQp_B > 51)) { + ALOGE("SsbSipMfcEncInit: No such FrameQp is supported.\n"); + return MFC_RET_INVALID_PARAM; + } + EncArg.args.enc_init_mpeg4.in_frame_qp = mpeg4_arg->FrameQp; + if (mpeg4_arg->FrameQp_P) + EncArg.args.enc_init_mpeg4.in_frame_P_qp = mpeg4_arg->FrameQp_P; + else + EncArg.args.enc_init_mpeg4.in_frame_P_qp = mpeg4_arg->FrameQp; + if (mpeg4_arg->FrameQp_B) + EncArg.args.enc_init_mpeg4.in_frame_B_qp = mpeg4_arg->FrameQp_B; + else + EncArg.args.enc_init_mpeg4.in_frame_B_qp = mpeg4_arg->FrameQp; + + break; + + case H263_ENC: + h263_arg = (SSBSIP_MFC_ENC_H263_PARAM *)param; + + EncArg.args.enc_init_mpeg4.in_codec_type = pCTX->codec_type; + EncArg.args.enc_init_mpeg4.in_profile_level = ENC_PROFILE_LEVEL(66, 40); + EncArg.args.enc_init_mpeg4.in_width = h263_arg->SourceWidth; + EncArg.args.enc_init_mpeg4.in_height = h263_arg->SourceHeight; + EncArg.args.enc_init_mpeg4.in_gop_num = h263_arg->IDRPeriod; + EncArg.args.enc_init_mpeg4.in_mb_refresh = h263_arg->RandomIntraMBRefresh; + EncArg.args.enc_init_mpeg4.in_MS_mode = h263_arg->SliceMode; + EncArg.args.enc_init_mpeg4.in_MS_size = 0; + + /* rate control*/ + EncArg.args.enc_init_mpeg4.in_RC_frm_enable = h263_arg->EnableFRMRateControl; + if ((h263_arg->QSCodeMin > 51) || (h263_arg->QSCodeMax > 51)) { + ALOGE("SsbSipMfcEncInit: No such Min/Max QP is supported.\n"); + return MFC_RET_INVALID_PARAM; + } + EncArg.args.enc_init_mpeg4.in_RC_qbound = ENC_RC_QBOUND(h263_arg->QSCodeMin, h263_arg->QSCodeMax); + EncArg.args.enc_init_mpeg4.in_RC_rpara = h263_arg->CBRPeriodRf; + + /* pad control */ + EncArg.args.enc_init_mpeg4.in_pad_ctrl_on = h263_arg->PadControlOn; + if ((h263_arg->LumaPadVal > 255) || (h263_arg->CbPadVal > 255) || (h263_arg->CrPadVal > 255)) { + ALOGE("SsbSipMfcEncInit: No such Pad value is supported.\n"); + return MFC_RET_INVALID_PARAM; + } + EncArg.args.enc_init_mpeg4.in_luma_pad_val = h263_arg->LumaPadVal; + EncArg.args.enc_init_mpeg4.in_cb_pad_val = h263_arg->CbPadVal; + EncArg.args.enc_init_mpeg4.in_cr_pad_val = h263_arg->CrPadVal; + EncArg.args.enc_init_mpeg4.in_frame_map = mpeg4_arg->FrameMap; + + EncArg.args.enc_init_mpeg4.in_RC_framerate = h263_arg->FrameRate; + EncArg.args.enc_init_mpeg4.in_RC_bitrate = h263_arg->Bitrate; + if (h263_arg->FrameQp > 51) { + ALOGE("SsbSipMfcEncInit: No such FrameQp is supported.\n"); + return MFC_RET_INVALID_PARAM; + } + EncArg.args.enc_init_mpeg4.in_frame_qp = h263_arg->FrameQp; + if (h263_arg->FrameQp_P) + EncArg.args.enc_init_mpeg4.in_frame_P_qp = h263_arg->FrameQp_P; + else + EncArg.args.enc_init_mpeg4.in_frame_P_qp = h263_arg->FrameQp; + + break; + + case H264_ENC: + h264_arg = (SSBSIP_MFC_ENC_H264_PARAM *)param; + + EncArg.args.enc_init_h264.in_codec_type = H264_ENC; + EncArg.args.enc_init_h264.in_profile_level = ENC_PROFILE_LEVEL(h264_arg->ProfileIDC, h264_arg->LevelIDC); + + EncArg.args.enc_init_h264.in_width = h264_arg->SourceWidth; + EncArg.args.enc_init_h264.in_height = h264_arg->SourceHeight; + EncArg.args.enc_init_h264.in_gop_num = h264_arg->IDRPeriod; + + if ((h264_arg->NumberRefForPframes > 2) || (h264_arg->NumberReferenceFrames > 2)) { + ALOGE("SsbSipMfcEncInit: No such ref Num is supported.\n"); + return MFC_RET_INVALID_PARAM; + } + EncArg.args.enc_init_h264.in_reference_num = h264_arg->NumberReferenceFrames; + EncArg.args.enc_init_h264.in_ref_num_p = h264_arg->NumberRefForPframes; + + if ((h264_arg->SliceMode == 0) || (h264_arg->SliceMode == 1) || + (h264_arg->SliceMode == 2) || (h264_arg->SliceMode == 4)) { + EncArg.args.enc_init_h264.in_MS_mode = h264_arg->SliceMode; + } else { + ALOGE("SsbSipMfcEncInit: No such slice mode is supported.\n"); + return MFC_RET_INVALID_PARAM; + } + EncArg.args.enc_init_h264.in_MS_size = h264_arg->SliceArgument; + + if (h264_arg->NumberBFrames > 2) { + ALOGE("SsbSipMfcEncInit: No such BframeNum is supported.\n"); + return MFC_RET_INVALID_PARAM; + } + EncArg.args.enc_init_h264.in_BframeNum = h264_arg->NumberBFrames; + + EncArg.args.enc_init_h264.in_deblock_filt = h264_arg->LoopFilterDisable; + if ((abs(h264_arg->LoopFilterAlphaC0Offset) > 6) || (abs(h264_arg->LoopFilterBetaOffset) > 6)) { + ALOGE("SsbSipMfcEncInit: No such AlphaC0Offset or BetaOffset is supported.\n"); + return MFC_RET_INVALID_PARAM; + } + EncArg.args.enc_init_h264.in_deblock_alpha_C0 = h264_arg->LoopFilterAlphaC0Offset; + EncArg.args.enc_init_h264.in_deblock_beta = h264_arg->LoopFilterBetaOffset; + + EncArg.args.enc_init_h264.in_symbolmode = h264_arg->SymbolMode; + EncArg.args.enc_init_h264.in_interlace_mode = h264_arg->PictureInterlace; + EncArg.args.enc_init_h264.in_transform8x8_mode = h264_arg->Transform8x8Mode; + + EncArg.args.enc_init_h264.in_mb_refresh = h264_arg->RandomIntraMBRefresh; + + /* pad control */ + EncArg.args.enc_init_h264.in_pad_ctrl_on = h264_arg->PadControlOn; + if ((h264_arg->LumaPadVal > 255) || (h264_arg->CbPadVal > 255) || (h264_arg->CrPadVal > 255)) { + ALOGE("SsbSipMfcEncInit: No such Pad value is supported.\n"); + return MFC_RET_INVALID_PARAM; + } + EncArg.args.enc_init_h264.in_luma_pad_val = h264_arg->LumaPadVal; + EncArg.args.enc_init_h264.in_cb_pad_val = h264_arg->CbPadVal; + EncArg.args.enc_init_h264.in_cr_pad_val = h264_arg->CrPadVal; + EncArg.args.enc_init_mpeg4.in_frame_map = mpeg4_arg->FrameMap; + + /* rate control*/ + EncArg.args.enc_init_h264.in_RC_frm_enable = h264_arg->EnableFRMRateControl; + EncArg.args.enc_init_h264.in_RC_mb_enable = h264_arg->EnableMBRateControl; + EncArg.args.enc_init_h264.in_RC_framerate = h264_arg->FrameRate; + EncArg.args.enc_init_h264.in_RC_bitrate = h264_arg->Bitrate; + if (h264_arg->FrameQp > 51) { + ALOGE("SsbSipMfcEncInit: No such FrameQp is supported.\n"); + return MFC_RET_INVALID_PARAM; + } + EncArg.args.enc_init_h264.in_frame_qp = h264_arg->FrameQp; + if (h264_arg->FrameQp_P) + EncArg.args.enc_init_h264.in_frame_P_qp = h264_arg->FrameQp_P; + else + EncArg.args.enc_init_h264.in_frame_P_qp = h264_arg->FrameQp; + if (h264_arg->FrameQp_B) + EncArg.args.enc_init_h264.in_frame_B_qp = h264_arg->FrameQp_B; + else + EncArg.args.enc_init_h264.in_frame_B_qp = h264_arg->FrameQp; + + if ((h264_arg->QSCodeMin > 51) || (h264_arg->QSCodeMax > 51)) { + ALOGE("SsbSipMfcEncInit: No such Min/Max QP is supported.\n"); + return MFC_RET_INVALID_PARAM; + } + EncArg.args.enc_init_h264.in_RC_qbound = ENC_RC_QBOUND(h264_arg->QSCodeMin, h264_arg->QSCodeMax); + EncArg.args.enc_init_h264.in_RC_rpara = h264_arg->CBRPeriodRf; + EncArg.args.enc_init_h264.in_RC_mb_dark_disable = h264_arg->DarkDisable; + EncArg.args.enc_init_h264.in_RC_mb_smooth_disable = h264_arg->SmoothDisable; + EncArg.args.enc_init_h264.in_RC_mb_static_disable = h264_arg->StaticDisable; + EncArg.args.enc_init_h264.in_RC_mb_activity_disable = h264_arg->ActivityDisable; + + /* default setting */ + EncArg.args.enc_init_h264.in_md_interweight_pps = 0; + EncArg.args.enc_init_h264.in_md_intraweight_pps = 0; + break; + + default: + break; + } + + EncArg.args.enc_init_mpeg4.in_mapped_addr = pCTX->mapped_addr; + + ret_code = ioctl(pCTX->hMFC, IOCTL_MFC_ENC_INIT, &EncArg); + if (EncArg.ret_code != MFC_RET_OK) { + ALOGE("SsbSipMfcEncInit: IOCTL_MFC_ENC_INIT (%d) failed\n", EncArg.ret_code); + return MFC_RET_ENC_INIT_FAIL; + } + + pCTX->virStrmBuf = EncArg.args.enc_init_mpeg4.out_u_addr.strm_ref_y; + pCTX->phyStrmBuf = EncArg.args.enc_init_mpeg4.out_p_addr.strm_ref_y; + pCTX->sizeStrmBuf = MAX_ENCODER_OUTPUT_BUFFER_SIZE; + pCTX->encodedHeaderSize = EncArg.args.enc_init_mpeg4.out_header_size; + + pCTX->virMvRefYC = EncArg.args.enc_init_mpeg4.out_u_addr.mv_ref_yc; + + pCTX->inter_buff_status |= MFC_USE_STRM_BUFF; + + return MFC_RET_OK; +} + +SSBSIP_MFC_ERROR_CODE SsbSipMfcEncExe(void *openHandle) +{ + int ret_code; + _MFCLIB *pCTX; + mfc_common_args EncArg; + + if (openHandle == NULL) { + ALOGE("SsbSipMfcEncExe: openHandle is NULL\n"); + return MFC_RET_INVALID_PARAM; + } + + pCTX = (_MFCLIB *)openHandle; + + memset(&EncArg, 0x00, sizeof(mfc_common_args)); + + EncArg.args.enc_exe.in_codec_type = pCTX->codec_type; + EncArg.args.enc_exe.in_Y_addr = (unsigned int)pCTX->phyFrmBuf.luma; + EncArg.args.enc_exe.in_CbCr_addr = (unsigned int)pCTX->phyFrmBuf.chroma; + EncArg.args.enc_exe.in_Y_addr_vir = (unsigned int)pCTX->virFrmBuf.luma; + EncArg.args.enc_exe.in_CbCr_addr_vir = (unsigned int)pCTX->virFrmBuf.chroma; + EncArg.args.enc_exe.in_strm_st = (unsigned int)pCTX->phyStrmBuf; + EncArg.args.enc_exe.in_strm_end = (unsigned int)pCTX->phyStrmBuf + pCTX->sizeStrmBuf; + EncArg.args.enc_exe.in_frametag = pCTX->in_frametag; + if (pCTX->encode_cnt == 0) { + EncArg.args.enc_exe.in_strm_st = (unsigned int)pCTX->phyStrmBuf; + EncArg.args.enc_exe.in_strm_end = (unsigned int)pCTX->phyStrmBuf + pCTX->sizeStrmBuf; + } else { + EncArg.args.enc_exe.in_strm_st = (unsigned int)pCTX->phyStrmBuf + (MAX_ENCODER_OUTPUT_BUFFER_SIZE/2); + EncArg.args.enc_exe.in_strm_end = (unsigned int)pCTX->phyStrmBuf + (MAX_ENCODER_OUTPUT_BUFFER_SIZE/2) + pCTX->sizeStrmBuf; + } + + ret_code = ioctl(pCTX->hMFC, IOCTL_MFC_ENC_EXE, &EncArg); + if (EncArg.ret_code != MFC_RET_OK) { + ALOGE("SsbSipMfcDecExe: IOCTL_MFC_ENC_EXE failed(ret : %d)\n", EncArg.ret_code); + return MFC_RET_ENC_EXE_ERR; + } + + pCTX->encodedDataSize = EncArg.args.enc_exe.out_encoded_size; + pCTX->encodedframeType = EncArg.args.enc_exe.out_frame_type; + pCTX->encoded_Y_paddr = EncArg.args.enc_exe.out_encoded_Y_paddr; + pCTX->encoded_C_paddr = EncArg.args.enc_exe.out_encoded_C_paddr; + pCTX->out_frametag_top = EncArg.args.enc_exe.out_frametag_top; + pCTX->out_frametag_bottom = EncArg.args.enc_exe.out_frametag_bottom; + + return MFC_RET_OK; +} + +SSBSIP_MFC_ERROR_CODE SsbSipMfcEncClose(void *openHandle) +{ + int ret_code; + _MFCLIB *pCTX; + mfc_common_args free_arg; + + if (openHandle == NULL) { + ALOGE("SsbSipMfcEncClose: openHandle is NULL\n"); + return MFC_RET_INVALID_PARAM; + } + + pCTX = (_MFCLIB *)openHandle; + + if (pCTX->inter_buff_status & MFC_USE_YUV_BUFF) { + free_arg.args.mem_free.u_addr = pCTX->virFrmBuf.luma; + ret_code = ioctl(pCTX->hMFC, IOCTL_MFC_FREE_BUF, &free_arg); + } + + if (pCTX->inter_buff_status & MFC_USE_STRM_BUFF) { + free_arg.args.mem_free.u_addr = pCTX->virStrmBuf; + ret_code = ioctl(pCTX->hMFC, IOCTL_MFC_FREE_BUF, &free_arg); + free_arg.args.mem_free.u_addr = pCTX->virMvRefYC; + ret_code = ioctl(pCTX->hMFC, IOCTL_MFC_FREE_BUF, &free_arg); + } + + pCTX->inter_buff_status = MFC_USE_NONE; + + munmap((void *)pCTX->mapped_addr, MMAP_BUFFER_SIZE_MMAP); + close(pCTX->hMFC); + free(pCTX); + + return MFC_RET_OK; +} + +SSBSIP_MFC_ERROR_CODE SsbSipMfcEncSetSize(void *openHandle, SSBSIP_MFC_CODEC_TYPE codecType, int nWidth, int nHeight) +{ + _MFCLIB *pCTX = (_MFCLIB *)openHandle; + + if (pCTX == NULL) + return MFC_RET_INVALID_PARAM; + + if (nWidth <= 0 || nHeight <= 0) + return MFC_RET_INVALID_PARAM; + pCTX->width = nWidth; + pCTX->height = nHeight; + + if ((H264_ENC != codecType) && + (MPEG4_ENC != codecType) && + (H263_ENC != codecType)) + return MFC_RET_INVALID_PARAM; + pCTX->codec_type = codecType; + + return MFC_RET_OK; +} + +SSBSIP_MFC_ERROR_CODE SsbSipMfcEncGetInBuf(void *openHandle, SSBSIP_MFC_ENC_INPUT_INFO *input_info) +{ + int ret_code; + _MFCLIB *pCTX; + mfc_common_args user_addr_arg, phys_addr_arg; + int y_size, c_size; + int aligned_y_size, aligned_c_size; + + if (openHandle == NULL) { + ALOGE("SsbSipMfcEncGetInBuf: openHandle is NULL\n"); + return MFC_RET_INVALID_PARAM; + } + + pCTX = (_MFCLIB *)openHandle; + + user_addr_arg.args.mem_alloc.codec_type = pCTX->codec_type; + + y_size = pCTX->width * pCTX->height; + c_size = (pCTX->width * pCTX->height) >> 1; + + aligned_y_size = ALIGN_TO_8KB(ALIGN_TO_128B(pCTX->width) * ALIGN_TO_32B(pCTX->height)); + aligned_c_size = ALIGN_TO_8KB(ALIGN_TO_128B(pCTX->width) * ALIGN_TO_32B(pCTX->height/2)); + + /* Allocate luma & chroma buf */ + user_addr_arg.args.mem_alloc.buff_size = aligned_y_size + aligned_c_size; + user_addr_arg.args.mem_alloc.mapped_addr = pCTX->mapped_addr; + ret_code = ioctl(pCTX->hMFC, IOCTL_MFC_GET_IN_BUF, &user_addr_arg); + if (ret_code < 0) { + ALOGE("SsbSipMfcEncGetInBuf: IOCTL_MFC_GET_IN_BUF failed\n"); + return MFC_RET_ENC_GET_INBUF_FAIL; + } + pCTX->virFrmBuf.luma = user_addr_arg.args.mem_alloc.out_uaddr; + pCTX->virFrmBuf.chroma = user_addr_arg.args.mem_alloc.out_uaddr + (unsigned int)aligned_y_size; + pCTX->phyFrmBuf.luma = user_addr_arg.args.mem_alloc.out_paddr; + pCTX->phyFrmBuf.chroma = user_addr_arg.args.mem_alloc.out_paddr + (unsigned int)aligned_y_size; + + pCTX->sizeFrmBuf.luma = (unsigned int)y_size; + pCTX->sizeFrmBuf.chroma = (unsigned int)c_size; + pCTX->inter_buff_status |= MFC_USE_YUV_BUFF; + + input_info->YPhyAddr = (void*)pCTX->phyFrmBuf.luma; + input_info->CPhyAddr = (void*)pCTX->phyFrmBuf.chroma; + input_info->YVirAddr = (void*)pCTX->virFrmBuf.luma; + input_info->CVirAddr = (void*)pCTX->virFrmBuf.chroma; + + input_info->YSize = aligned_y_size; + input_info->CSize = aligned_c_size; + + return MFC_RET_OK; +} + +SSBSIP_MFC_ERROR_CODE SsbSipMfcEncSetInBuf(void *openHandle, SSBSIP_MFC_ENC_INPUT_INFO *input_info) +{ + _MFCLIB *pCTX; + + if (openHandle == NULL) { + ALOGE("SsbSipMfcEncSetInBuf: openHandle is NULL\n"); + return MFC_RET_INVALID_PARAM; + } + + ALOGV("SsbSipMfcEncSetInBuf: input_info->YPhyAddr & input_info->CPhyAddr should be 64KB aligned\n"); + + pCTX = (_MFCLIB *)openHandle; + + pCTX->phyFrmBuf.luma = (unsigned int)input_info->YPhyAddr; + pCTX->phyFrmBuf.chroma = (unsigned int)input_info->CPhyAddr; + pCTX->virFrmBuf.luma = (unsigned int)input_info->YVirAddr; + pCTX->virFrmBuf.chroma = (unsigned int)input_info->CVirAddr; + + pCTX->sizeFrmBuf.luma = (unsigned int)input_info->YSize; + pCTX->sizeFrmBuf.chroma = (unsigned int)input_info->CSize; + + return MFC_RET_OK; +} + +SSBSIP_MFC_ERROR_CODE SsbSipMfcEncGetOutBuf(void *openHandle, SSBSIP_MFC_ENC_OUTPUT_INFO *output_info) +{ + _MFCLIB *pCTX; + + if (openHandle == NULL) { + ALOGE("SsbSipMfcEncGetOutBuf: openHandle is NULL\n"); + return MFC_RET_INVALID_PARAM; + } + + pCTX = (_MFCLIB *)openHandle; + + output_info->headerSize = pCTX->encodedHeaderSize; + output_info->dataSize = pCTX->encodedDataSize; + + if (pCTX->encode_cnt == 0) { + output_info->StrmPhyAddr = (void *)pCTX->phyStrmBuf; + output_info->StrmVirAddr = (void *)pCTX->virStrmBuf; + } else { + output_info->StrmPhyAddr = (unsigned char *)pCTX->phyStrmBuf + (MAX_ENCODER_OUTPUT_BUFFER_SIZE/2); + output_info->StrmVirAddr = (unsigned char *)pCTX->virStrmBuf + (MAX_ENCODER_OUTPUT_BUFFER_SIZE/2); + } + + pCTX->encode_cnt ++; + pCTX->encode_cnt %= 2; + + if (pCTX->encodedframeType == 0) + output_info->frameType = MFC_FRAME_TYPE_NOT_CODED; + else if (pCTX->encodedframeType == 1) + output_info->frameType = MFC_FRAME_TYPE_I_FRAME; + else if (pCTX->encodedframeType == 2) + output_info->frameType = MFC_FRAME_TYPE_P_FRAME; + else if (pCTX->encodedframeType == 3) + output_info->frameType = MFC_FRAME_TYPE_B_FRAME; + else if (pCTX->encodedframeType == 4) + output_info->frameType = MFC_FRAME_TYPE_OTHERS; + else { + ALOGE("Strange encoded frame type = %d\n", pCTX->encodedframeType); + return MFC_RET_INVALID_PARAM; + } + + output_info->encodedYPhyAddr = (void *)pCTX->encoded_Y_paddr; + output_info->encodedCPhyAddr = (void *)pCTX->encoded_C_paddr; + + return MFC_RET_OK; +} + +SSBSIP_MFC_ERROR_CODE SsbSipMfcEncSetOutBuf(void *openHandle, void *phyOutbuf, void *virOutbuf, int outputBufferSize) +{ + _MFCLIB *pCTX; + + if (openHandle == NULL) { + ALOGE("SsbSipMfcEncSetOutBuf: openHandle is NULL\n"); + return MFC_RET_INVALID_PARAM; + } + + pCTX = (_MFCLIB *)openHandle; + + pCTX->phyStrmBuf = (int)phyOutbuf; + pCTX->virStrmBuf = (int)virOutbuf; + pCTX->sizeStrmBuf = outputBufferSize; + + return MFC_RET_OK; +} + +SSBSIP_MFC_ERROR_CODE SsbSipMfcEncSetConfig(void *openHandle, SSBSIP_MFC_ENC_CONF conf_type, void *value) +{ + int ret_code; + _MFCLIB *pCTX; + mfc_common_args EncArg; + + if (openHandle == NULL) { + ALOGE("SsbSipMfcEncSetConfig: openHandle is NULL\n"); + return MFC_RET_INVALID_PARAM; + } + + if (value == NULL) { + ALOGE("SsbSipMfcEncSetConfig: value is NULL\n"); + return MFC_RET_INVALID_PARAM; + } + + pCTX = (_MFCLIB *)openHandle; + memset(&EncArg, 0x00, sizeof(mfc_common_args)); + + switch (conf_type) { + case MFC_ENC_SETCONF_FRAME_TYPE: + case MFC_ENC_SETCONF_CHANGE_FRAME_RATE: + case MFC_ENC_SETCONF_CHANGE_BIT_RATE: + case MFC_ENC_SETCONF_ALLOW_FRAME_SKIP: + EncArg.args.set_config.in_config_param = conf_type; + EncArg.args.set_config.in_config_value[0] = *((unsigned int *) value); + EncArg.args.set_config.in_config_value[1] = 0; + break; + + case MFC_ENC_SETCONF_FRAME_TAG: + pCTX->in_frametag = *((int *)value); + return MFC_RET_OK; + + default: + ALOGE("SsbSipMfcEncSetConfig: No such conf_type is supported.\n"); + return MFC_RET_INVALID_PARAM; + } + + ret_code = ioctl(pCTX->hMFC, IOCTL_MFC_SET_CONFIG, &EncArg); + if (EncArg.ret_code != MFC_RET_OK) { + ALOGE("SsbSipMfcEncSetConfig: IOCTL_MFC_SET_CONFIG failed(ret : %d)\n", EncArg.ret_code); + return MFC_RET_ENC_SET_CONF_FAIL; + } + + return MFC_RET_OK; +} + +SSBSIP_MFC_ERROR_CODE SsbSipMfcEncGetConfig(void *openHandle, SSBSIP_MFC_ENC_CONF conf_type, void *value) +{ + int ret_code; + _MFCLIB *pCTX; + mfc_common_args EncArg; + + pCTX = (_MFCLIB *)openHandle; + + if (openHandle == NULL) { + ALOGE("SsbSipMfcEncGetConfig: openHandle is NULL\n"); + return MFC_RET_INVALID_PARAM; + } + if (value == NULL) { + ALOGE("SsbSipMfcEncGetConfig: value is NULL\n"); + return MFC_RET_INVALID_PARAM; + } + + pCTX = (_MFCLIB *)openHandle; + memset(&EncArg, 0x00, sizeof(mfc_common_args)); + + switch (conf_type) { + case MFC_ENC_GETCONF_FRAME_TAG: + *((unsigned int *)value) = pCTX->out_frametag_top; + break; + + default: + ALOGE("SsbSipMfcEncGetConfig: No such conf_type is supported.\n"); + return MFC_RET_INVALID_PARAM; + } + + return MFC_RET_OK; +} diff --git a/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/SsbSipMfcApi.h b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/SsbSipMfcApi.h new file mode 100644 index 0000000..118e1ba --- /dev/null +++ b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/SsbSipMfcApi.h @@ -0,0 +1,332 @@ +/* + * Copyright 2010 Samsung Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _SSBSIP_MFC_API_H_ +#define _SSBSIP_MFC_API_H_ + +/*--------------------------------------------------------------------------------*/ +/* Definition */ +/*--------------------------------------------------------------------------------*/ +#define MAX_DECODER_INPUT_BUFFER_SIZE (1024 * 3072) +#define MAX_ENCODER_OUTPUT_BUFFER_SIZE (1024 * 3072) + +#define MMAP_BUFFER_SIZE_MMAP (35328*1024) // 34.5*1024*1024 + +#define S5PC110_MFC_DEV_NAME "/dev/s3c-mfc" + +/*--------------------------------------------------------------------------------*/ +/* Structure and Type */ +/*--------------------------------------------------------------------------------*/ +typedef enum { + H264_DEC, + VC1_DEC, /* VC1 advaced Profile decoding */ + MPEG4_DEC, + XVID_DEC, + MPEG1_DEC, + MPEG2_DEC, + H263_DEC, + VC1RCV_DEC, /* VC1 simple/main profile decoding */ + FIMV1_DEC, + FIMV2_DEC, + FIMV3_DEC, + FIMV4_DEC, + H264_ENC, + MPEG4_ENC, + H263_ENC, + UNKNOWN_TYPE +} SSBSIP_MFC_CODEC_TYPE; + +typedef enum { + DONT_CARE = 0, + I_FRAME = 1, + NOT_CODED = 2 +} SSBSIP_MFC_FORCE_SET_FRAME_TYPE; + +typedef enum { + NV12_LINEAR = 0, + NV12_TILE +} SSBSIP_MFC_INSTRM_MODE_TYPE; + +typedef enum { + NO_CACHE = 0, + CACHE = 1 +} SSBIP_MFC_BUFFER_TYPE; + +typedef enum { + MFC_DEC_SETCONF_POST_ENABLE = 1, + MFC_DEC_SETCONF_EXTRA_BUFFER_NUM, + MFC_DEC_SETCONF_DISPLAY_DELAY, + MFC_DEC_SETCONF_IS_LAST_FRAME, + MFC_DEC_SETCONF_SLICE_ENABLE, + MFC_DEC_SETCONF_CRC_ENABLE, + MFC_DEC_SETCONF_FIMV1_WIDTH_HEIGHT, + MFC_DEC_SETCONF_FRAME_TAG, + MFC_DEC_GETCONF_CRC_DATA, + MFC_DEC_GETCONF_BUF_WIDTH_HEIGHT, + MFC_DEC_GETCONF_CROP_INFO, + MFC_DEC_GETCONF_FRAME_TAG +} SSBSIP_MFC_DEC_CONF; + +typedef enum { + MFC_ENC_SETCONF_FRAME_TYPE = 100, + MFC_ENC_SETCONF_CHANGE_FRAME_RATE, + MFC_ENC_SETCONF_CHANGE_BIT_RATE, + MFC_ENC_SETCONF_FRAME_TAG, + MFC_ENC_SETCONF_ALLOW_FRAME_SKIP, + MFC_ENC_GETCONF_FRAME_TAG +} SSBSIP_MFC_ENC_CONF; + +typedef enum { + MFC_GETOUTBUF_STATUS_NULL = 0, + MFC_GETOUTBUF_DECODING_ONLY = 1, + MFC_GETOUTBUF_DISPLAY_DECODING, + MFC_GETOUTBUF_DISPLAY_ONLY, + MFC_GETOUTBUF_DISPLAY_END +} SSBSIP_MFC_DEC_OUTBUF_STATUS; + +typedef enum { + MFC_FRAME_TYPE_NOT_CODED, + MFC_FRAME_TYPE_I_FRAME, + MFC_FRAME_TYPE_P_FRAME, + MFC_FRAME_TYPE_B_FRAME, + MFC_FRAME_TYPE_OTHERS +} SSBSIP_MFC_FRAME_TYPE; + +typedef enum { + MFC_RET_OK = 1, + MFC_RET_FAIL = -1000, + MFC_RET_OPEN_FAIL = -1001, + MFC_RET_CLOSE_FAIL = -1002, + + MFC_RET_DEC_INIT_FAIL = -2000, + MFC_RET_DEC_EXE_TIME_OUT = -2001, + MFC_RET_DEC_EXE_ERR = -2002, + MFC_RET_DEC_GET_INBUF_FAIL = -2003, + MFC_RET_DEC_SET_INBUF_FAIL = -2004, + MFC_RET_DEC_GET_OUTBUF_FAIL = -2005, + MFC_RET_DEC_GET_CONF_FAIL = -2006, + MFC_RET_DEC_SET_CONF_FAIL = -2007, + + MFC_RET_ENC_INIT_FAIL = -3000, + MFC_RET_ENC_EXE_TIME_OUT = -3001, + MFC_RET_ENC_EXE_ERR = -3002, + MFC_RET_ENC_GET_INBUF_FAIL = -3003, + MFC_RET_ENC_SET_INBUF_FAIL = -3004, + MFC_RET_ENC_GET_OUTBUF_FAIL = -3005, + MFC_RET_ENC_SET_OUTBUF_FAIL = -3006, + MFC_RET_ENC_GET_CONF_FAIL = -3007, + MFC_RET_ENC_SET_CONF_FAIL = -3008, + + MFC_RET_INVALID_PARAM = -4000 +} SSBSIP_MFC_ERROR_CODE; + +typedef struct { + void *YPhyAddr; // [OUT] physical address of Y + void *CPhyAddr; // [OUT] physical address of CbCr + void *YVirAddr; // [OUT] virtual address of Y + void *CVirAddr; // [OUT] virtual address of CbCr + + int img_width; // [OUT] width of real image + int img_height; // [OUT] height of real image + int buf_width; // [OUT] width aligned to 16 + int buf_height; // [OUT] height alighed to 16 + + int timestamp_top; // [OUT] timestamp of top filed(This is used for interlaced stream) + int timestamp_bottom; // [OUT] timestamp of bottom filed(This is used for interlaced stream) + int consumedByte; // [OUT] the number of byte consumed during decoding + int res_change; // [OUT] whether resolution is changed or not. 0: not change, 1: increased, 2: decreased + int crop_top_offset; // [OUT] crop information, top_offset + int crop_bottom_offset; // [OUT] crop information, bottom_offset + int crop_left_offset; // [OUT] crop information, left_offset + int crop_right_offset; // [OUT] crop information, right_offset +} SSBSIP_MFC_DEC_OUTPUT_INFO; + +typedef struct { + void *YPhyAddr; // [IN/OUT] physical address of Y + void *CPhyAddr; // [IN/OUT] physical address of CbCr + void *YVirAddr; // [IN/OUT] virtual address of Y + void *CVirAddr; // [IN/OUT] virtual address of CbCr + int YSize; // [IN/OUT] input size of Y data + int CSize; // [IN/OUT] input size of CbCr data +} SSBSIP_MFC_ENC_INPUT_INFO; + +typedef struct { + unsigned int dataSize; // [OUT] encoded data size(without header) + unsigned int headerSize; // [OUT] encoded header size + unsigned int frameType; // [OUT] frame type of encoded stream + void *StrmPhyAddr; // [OUT] physical address of Y + void *StrmVirAddr; // [OUT] virtual address of Y + void *encodedYPhyAddr; // [OUT] physical address of Y which is flushed + void *encodedCPhyAddr; // [OUT] physical address of C which is flushed +} SSBSIP_MFC_ENC_OUTPUT_INFO; + +typedef struct { + // common parameters + SSBSIP_MFC_CODEC_TYPE codecType; // [IN] codec type + int SourceWidth; // [IN] width of video to be encoded + int SourceHeight; // [IN] height of video to be encoded + int IDRPeriod; // [IN] GOP number(interval of I-frame) + int SliceMode; // [IN] Multi slice mode + int RandomIntraMBRefresh; // [IN] cyclic intra refresh + int EnableFRMRateControl; // [IN] frame based rate control enable + int Bitrate; // [IN] rate control parameter(bit rate) + int FrameQp; // [IN] The quantization parameter of the frame + int FrameQp_P; // [IN] The quantization parameter of the P frame + int QSCodeMax; // [IN] Maximum Quantization value + int QSCodeMin; // [IN] Minimum Quantization value + int CBRPeriodRf; // [IN] Reaction coefficient parameter for rate control + int PadControlOn; // [IN] Enable padding control + int LumaPadVal; // [IN] Luma pel value used to fill padding area + int CbPadVal; // [IN] CB pel value used to fill padding area + int CrPadVal; // [IN] CR pel value used to fill padding area + int FrameMap; // [IN] Encoding input mode(tile mode or linear mode) + + // H.264 specific parameters + int ProfileIDC; // [IN] profile + int LevelIDC; // [IN] level + int FrameQp_B; // [IN] The quantization parameter of the B frame + int FrameRate; // [IN] rate control parameter(frame rate) + int SliceArgument; // [IN] MB number or byte number + int NumberBFrames; // [IN] The number of consecutive B frame inserted + int NumberReferenceFrames; // [IN] The number of reference pictures used + int NumberRefForPframes; // [IN] The number of reference pictures used for encoding P pictures + int LoopFilterDisable; // [IN] disable the loop filter + int LoopFilterAlphaC0Offset; // [IN] Alpha & C0 offset for H.264 loop filter + int LoopFilterBetaOffset; // [IN] Beta offset for H.264 loop filter + int SymbolMode; // [IN] The mode of entropy coding(CABAC, CAVLC) + int PictureInterlace; // [IN] Enables the interlace mode + int Transform8x8Mode; // [IN] Allow 8x8 transform(This is allowed only for high profile) + int EnableMBRateControl; // [IN] Enable macroblock-level rate control + int DarkDisable; // [IN] Disable adaptive rate control on dark region + int SmoothDisable; // [IN] Disable adaptive rate control on smooth region + int StaticDisable; // [IN] Disable adaptive rate control on static region + int ActivityDisable; // [IN] Disable adaptive rate control on high activity region +} SSBSIP_MFC_ENC_H264_PARAM; + +typedef struct { + // common parameters + SSBSIP_MFC_CODEC_TYPE codecType; // [IN] codec type + int SourceWidth; // [IN] width of video to be encoded + int SourceHeight; // [IN] height of video to be encoded + int IDRPeriod; // [IN] GOP number(interval of I-frame) + int SliceMode; // [IN] Multi slice mode + int RandomIntraMBRefresh; // [IN] cyclic intra refresh + int EnableFRMRateControl; // [IN] frame based rate control enable + int Bitrate; // [IN] rate control parameter(bit rate) + int FrameQp; // [IN] The quantization parameter of the frame + int FrameQp_P; // [IN] The quantization parameter of the P frame + int QSCodeMax; // [IN] Maximum Quantization value + int QSCodeMin; // [IN] Minimum Quantization value + int CBRPeriodRf; // [IN] Reaction coefficient parameter for rate control + int PadControlOn; // [IN] Enable padding control + int LumaPadVal; // [IN] Luma pel value used to fill padding area + int CbPadVal; // [IN] CB pel value used to fill padding area + int CrPadVal; // [IN] CR pel value used to fill padding area + int FrameMap; // [IN] Encoding input mode(tile mode or linear mode) + + // MPEG4 specific parameters + int ProfileIDC; // [IN] profile + int LevelIDC; // [IN] level + int FrameQp_B; // [IN] The quantization parameter of the B frame + int TimeIncreamentRes; // [IN] frame rate + int VopTimeIncreament; // [IN] frame rate + int SliceArgument; // [IN] MB number or byte number + int NumberBFrames; // [IN] The number of consecutive B frame inserted + int DisableQpelME; // [IN] disable quarter-pixel motion estimation +} SSBSIP_MFC_ENC_MPEG4_PARAM; + +typedef struct { + // common parameters + SSBSIP_MFC_CODEC_TYPE codecType; // [IN] codec type + int SourceWidth; // [IN] width of video to be encoded + int SourceHeight; // [IN] height of video to be encoded + int IDRPeriod; // [IN] GOP number(interval of I-frame) + int SliceMode; // [IN] Multi slice mode + int RandomIntraMBRefresh; // [IN] cyclic intra refresh + int EnableFRMRateControl; // [IN] frame based rate control enable + int Bitrate; // [IN] rate control parameter(bit rate) + int FrameQp; // [IN] The quantization parameter of the frame + int FrameQp_P; // [IN] The quantization parameter of the P frame + int QSCodeMax; // [IN] Maximum Quantization value + int QSCodeMin; // [IN] Minimum Quantization value + int CBRPeriodRf; // [IN] Reaction coefficient parameter for rate control + int PadControlOn; // [IN] Enable padding control + int LumaPadVal; // [IN] Luma pel value used to fill padding area + int CbPadVal; // [IN] CB pel value used to fill padding area + int CrPadVal; // [IN] CR pel value used to fill padding area + int FrameMap; // [IN] Encoding input mode(tile mode or linear mode) + + // H.263 specific parameters + int FrameRate; // [IN] rate control parameter(frame rate) +} SSBSIP_MFC_ENC_H263_PARAM; + +typedef struct { + int width; + int height; + int buf_width; + int buf_height; +} SSBSIP_MFC_IMG_RESOLUTION; + +typedef struct { + int crop_top_offset; + int crop_bottom_offset; + int crop_left_offset; + int crop_right_offset; +} SSBSIP_MFC_CROP_INFORMATION; + +#ifdef __cplusplus +extern "C" { +#endif + +/*--------------------------------------------------------------------------------*/ +/* Decoding APIs */ +/*--------------------------------------------------------------------------------*/ +void *SsbSipMfcDecOpen(void *value); +SSBSIP_MFC_ERROR_CODE SsbSipMfcDecInit(void *openHandle, SSBSIP_MFC_CODEC_TYPE codec_type, int Frameleng); +SSBSIP_MFC_ERROR_CODE SsbSipMfcDecExe(void *openHandle, int lengthBufFill); +SSBSIP_MFC_ERROR_CODE SsbSipMfcDecClose(void *openHandle); + +void *SsbSipMfcDecGetInBuf(void *openHandle, void **phyInBuf, int inputBufferSize); +SSBSIP_MFC_ERROR_CODE SsbSipMfcDecSetInBuf(void *openHandle, void *phyInBuf, void *virInBuf, int inputBufferSize); + +SSBSIP_MFC_DEC_OUTBUF_STATUS SsbSipMfcDecGetOutBuf(void *openHandle, SSBSIP_MFC_DEC_OUTPUT_INFO *output_info); + +SSBSIP_MFC_ERROR_CODE SsbSipMfcDecSetConfig(void *openHandle, SSBSIP_MFC_DEC_CONF conf_type, void *value); +SSBSIP_MFC_ERROR_CODE SsbSipMfcDecGetConfig(void *openHandle, SSBSIP_MFC_DEC_CONF conf_type, void *value); + +/*--------------------------------------------------------------------------------*/ +/* Encoding APIs */ +/*--------------------------------------------------------------------------------*/ +void *SsbSipMfcEncOpen(void *value); +SSBSIP_MFC_ERROR_CODE SsbSipMfcEncInit(void *openHandle, void *param); +SSBSIP_MFC_ERROR_CODE SsbSipMfcEncExe(void *openHandle); +SSBSIP_MFC_ERROR_CODE SsbSipMfcEncClose(void *openHandle); + +SSBSIP_MFC_ERROR_CODE SsbSipMfcEncSetSize(void *openHandle, SSBSIP_MFC_CODEC_TYPE codecType, int nWidth, int nHeight); +SSBSIP_MFC_ERROR_CODE SsbSipMfcEncGetInBuf(void *openHandle, SSBSIP_MFC_ENC_INPUT_INFO *input_info); +SSBSIP_MFC_ERROR_CODE SsbSipMfcEncSetInBuf(void *openHandle, SSBSIP_MFC_ENC_INPUT_INFO *input_info); + +SSBSIP_MFC_ERROR_CODE SsbSipMfcEncGetOutBuf(void *openHandle, SSBSIP_MFC_ENC_OUTPUT_INFO *output_info); +SSBSIP_MFC_ERROR_CODE SsbSipMfcEncSetOutBuf (void *openHandle, void *phyOutbuf, void *virOutbuf, int outputBufferSize); + +SSBSIP_MFC_ERROR_CODE SsbSipMfcEncSetConfig(void *openHandle, SSBSIP_MFC_ENC_CONF conf_type, void *value); +SSBSIP_MFC_ERROR_CODE SsbSipMfcEncGetConfig(void *openHandle, SSBSIP_MFC_ENC_CONF conf_type, void *value); + +#ifdef __cplusplus +} +#endif + +#endif /* _SSBSIP_MFC_API_H_ */ diff --git a/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/color_space_convertor.h b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/color_space_convertor.h new file mode 100644 index 0000000..4ad5bda --- /dev/null +++ b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/color_space_convertor.h @@ -0,0 +1,176 @@ +/* + * + * Copyright 2011 Samsung Electronics S.LSI Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @file color_space_convertor.h + * @brief SEC_OMX specific define. + * NV12T(tiled) layout: + * Each element is not pixel. It is 64x32 pixel block. + * uv pixel block is interleaved as u v u v u v ... + * y1 y2 y7 y8 y9 y10 y15 y16 + * y3 y4 y5 y6 y11 y12 y13 y14 + * y17 y18 y23 y24 y25 y26 y31 y32 + * y19 y20 y21 y22 y27 y28 y29 y30 + * uv1 uv2 uv7 uv8 uv9 uv10 uv15 uv16 + * uv3 uv4 uv5 uv6 uv11 uv12 uv13 uv14 + * YUV420Planar(linear) layout: + * Each element is not pixel. It is 64x32 pixel block. + * y1 y2 y3 y4 y5 y6 y7 y8 + * y9 y10 y11 y12 y13 y14 y15 y16 + * y17 y18 y19 y20 y21 y22 y23 y24 + * y25 y26 y27 y28 y29 y30 y31 y32 + * u1 u2 u3 u4 u5 u6 u7 u8 + * v1 v2 v3 v4 v5 v6 v7 v8 + * YUV420Semiplanar(linear) layout: + * Each element is not pixel. It is 64x32 pixel block. + * uv pixel block is interleaved as u v u v u v ... + * y1 y2 y3 y4 y5 y6 y7 y8 + * y9 y10 y11 y12 y13 y14 y15 y16 + * y17 y18 y19 y20 y21 y22 y23 y24 + * y25 y26 y27 y28 y29 y30 y31 y32 + * uv1 uv2 uv3 uv4 uv5 uv6 uv7 uv8 + * uv9 uv10 uv11 uv12 uv13 uv14 uv15 uv16 + * @author ShinWon Lee (shinwon.lee@samsung.com) + * @version 1.0 + * @history + * 2011.7.01 : Create + */ + +#ifndef COLOR_SPACE_CONVERTOR_H_ +#define COLOR_SPACE_CONVERTOR_H_ + +/*--------------------------------------------------------------------------------*/ +/* Format Conversion API */ +/*--------------------------------------------------------------------------------*/ +/* C Code */ +/* + * De-interleaves src to dest1, dest2 + * + * @param dest1 + * Address of de-interleaved data[out] + * + * @param dest2 + * Address of de-interleaved data[out] + * + * @param src + * Address of interleaved data[in] + * + * @param src_size + * Size of interleaved data[in] + */ +void csc_deinterleave_memcpy(char *dest1, char *dest2, char *src, int src_size); + +/* + * Interleaves src1, src2 to dest + * + * @param dest + * Address of interleaved data[out] + * + * @param src1 + * Address of de-interleaved data[in] + * + * @param src2 + * Address of de-interleaved data[in] + * + * @param src_size + * Size of de-interleaved data[in] + */ +void csc_interleave_memcpy(char *dest, char *src1, char *src2, int src_size); + +/* + * Converts tiled data to linear. + * 1. Y of NV12T to Y of YUV420P + * 2. Y of NV12T to Y of YUV420S + * 3. UV of NV12T to UV of YUV420S + * + * @param yuv420_dest + * Y or UV plane address of YUV420[out] + * + * @param nv12t_src + * Y or UV plane address of NV12T[in] + * + * @param yuv420_width + * Width of YUV420[in] + * + * @param yuv420_height + * Y: Height of YUV420, UV: Height/2 of YUV420[in] + */ +void csc_tiled_to_linear(char *yuv420p_y_dest, char *nv12t_y_src, int yuv420p_width, int yuv420p_y_height); + +/* + * Converts and Deinterleaves tiled data to linear + * 1. UV of NV12T to UV of YUV420P + * + * @param yuv420_u_dest + * U plane address of YUV420P[out] + * + * @param yuv420_v_dest + * V plane address of YUV420P[out] + * + * @param nv12t_src + * UV plane address of NV12T[in] + * + * @param yuv420_width + * Width of YUV420[in] + * + * @param yuv420_uv_height + * Height/2 of YUV420[in] + */ +void csc_tiled_to_linear_deinterleave(char *yuv420p_u_dest, char *yuv420p_v_dest, char *nv12t_uv_src, int yuv420p_width, int yuv420p_uv_height); + +/* + * Converts linear data to tiled. + * 1. Y of YUV420P to Y of NV12T + * 2. Y of YUV420S to Y of NV12T + * 3. UV of YUV420S to UV of NV12T + * + * @param nv12t_dest + * Y or UV plane address of NV12T[out] + * + * @param yuv420_src + * Y or UV plane address of YUV420P(S)[in] + * + * @param yuv420_width + * Width of YUV420[in] + * + * @param yuv420_height + * Y: Height of YUV420, UV: Height/2 of YUV420[in] + */ +void csc_linear_to_tiled(char *nv12t_dest, char *yuv420p_src, int yuv420p_width, int yuv420p_y_height); + +/* + * Converts and Interleaves linear to tiled + * 1. UV of YUV420P to UV of NV12T + * + * @param nv12t_uv_dest + * UV plane address of NV12T[out] + * + * @param yuv420p_u_src + * U plane address of YUV420P[in] + * + * @param yuv420p_v_src + * V plane address of YUV420P[in] + * + * @param yuv420_width + * Width of YUV420[in] + * + * @param yuv420_uv_height + * Height/2 of YUV420[in] + */ +void csc_linear_to_tiled_interleave(char *nv12t_uv_dest, char *yuv420p_u_src, char *yuv420p_v_src, int yuv420p_width, int yuv420p_uv_height); + +#endif /*COLOR_SPACE_CONVERTOR_H_*/ diff --git a/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/mfc_interface.h b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/mfc_interface.h new file mode 100644 index 0000000..466860d --- /dev/null +++ b/exynos3/s5pc110/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/mfc_interface.h @@ -0,0 +1,347 @@ +/* + * Copyright 2010 Samsung Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _MFC_INTERFACE_H_ +#define _MFC_INTERFACE_H_ + +#include "SsbSipMfcApi.h" + +#define IOCTL_MFC_DEC_INIT 0x00800001 +#define IOCTL_MFC_ENC_INIT 0x00800002 +#define IOCTL_MFC_DEC_EXE 0x00800003 +#define IOCTL_MFC_ENC_EXE 0x00800004 + +#define IOCTL_MFC_GET_IN_BUF 0x00800010 +#define IOCTL_MFC_FREE_BUF 0x00800011 +#define IOCTL_MFC_GET_PHYS_ADDR 0x00800012 + +#define IOCTL_MFC_SET_CONFIG 0x00800101 +#define IOCTL_MFC_GET_CONFIG 0x00800102 + +#define IOCTL_MFC_BUF_CACHE 0x00801000 + +/* MFC H/W support maximum 32 extra DPB */ +#define MFC_MAX_EXTRA_DPB 5 + +#define ENC_PROFILE_LEVEL(profile, level) ((profile) | ((level) << 8)) + +#define ENC_PROFILE_MPEG4_SP 0 +#define ENC_PROFILE_MPEG4_ASP 1 +#define ENC_PROFILE_H264_BP 0 +#define ENC_PROFILE_H264_MAIN 1 +#define ENC_PROFILE_H264_HIGH 2 + +#define ENC_RC_DISABLE 0 +#define ENC_RC_ENABLE_MACROBLOCK 1 +#define ENC_RC_ENABLE_FRAME 2 + +#define ENC_RC_QBOUND(min_qp, max_qp) ((min_qp) | ((max_qp) << 8)) +#define ENC_RC_MB_CTRL_DARK_DISABLE (1 << 3) +#define ENC_RC_MB_CTRL_SMOOTH_DISABLE (1 << 2) +#define ENC_RC_MB_CTRL_STATIC_DISABLE (1 << 1) +#define ENC_RC_MB_CTRL_ACTIVITY_DISABLE (1 << 0) + +#define ALIGN_TO_16B(x) ((((x) + (1 << 4) - 1) >> 4) << 4) +#define ALIGN_TO_32B(x) ((((x) + (1 << 5) - 1) >> 5) << 5) +#define ALIGN_TO_64B(x) ((((x) + (1 << 6) - 1) >> 6) << 6) +#define ALIGN_TO_128B(x) ((((x) + (1 << 7) - 1) >> 7) << 7) +#define ALIGN_TO_2KB(x) ((((x) + (1 << 11) - 1) >> 11) << 11) +#define ALIGN_TO_4KB(x) ((((x) + (1 << 12) - 1) >> 12) << 12) +#define ALIGN_TO_8KB(x) ((((x) + (1 << 13) - 1) >> 13) << 13) +#define ALIGN_TO_64KB(x) ((((x) + (1 << 16) - 1) >> 16) << 16) +#define ALIGN_TO_128KB(x) ((((x) + (1 << 17) - 1) >> 17) << 17) + +typedef struct { + int luma0; // per frame (or top field) + int chroma0; // per frame (or top field) + int luma1; // per frame (or bottom field) + int chroma1; // per frame (or bottom field) +} MFC_CRC_DATA; + +typedef enum { + MFC_USE_NONE = 0x00, + MFC_USE_YUV_BUFF = 0x01, + MFC_USE_STRM_BUFF = 0x10 +} mfc_interbuff_status; + +typedef enum { + MFC_UNPACKED_PB = 0, + MFC_PACKED_PB = 1 +} mfc_packed_mode; + +typedef struct tag_strm_ref_buf_arg { + unsigned int strm_ref_y; + unsigned int mv_ref_yc; +} mfc_strm_ref_buf_arg_t; + +typedef struct tag_frame_buf_arg { + unsigned int luma; + unsigned int chroma; +} mfc_frame_buf_arg_t; + +typedef struct { + SSBSIP_MFC_CODEC_TYPE in_codec_type; /* [IN] codec type */ + int in_width; /* [IN] width of YUV420 frame to be encoded */ + int in_height; /* [IN] height of YUV420 frame to be encoded */ + int in_profile_level; /* [IN] profile & level */ + int in_gop_num; /* [IN] GOP Number (interval of I-frame) */ + int in_frame_qp; /* [IN] the quantization parameter of the frame */ + int in_frame_P_qp; /* [IN] the quantization parameter of the P frame */ + int in_frame_B_qp; /* [IN] the quantization parameter of the B frame */ + + int in_RC_frm_enable; /* [IN] RC enable (0:disable, 1:frame level RC) */ + int in_RC_framerate; /* [IN] RC parameter (framerate) */ + int in_RC_bitrate; /* [IN] RC parameter (bitrate in kbps) */ + int in_RC_qbound; /* [IN] RC parameter (Q bound) */ + int in_RC_rpara; /* [IN] RC parameter (Reaction Coefficient) */ + + int in_MS_mode; /* [IN] Multi-slice mode (0:single, 1:multiple) */ + int in_MS_size; /* [IN] Multi-slice size (in num. of mb or byte) */ + int in_mb_refresh; /* [IN] Macroblock refresh */ + int in_interlace_mode; /* [IN] interlace mode(0:progressive, 1:interlace) */ + int in_BframeNum; /* [IN] B frame number */ + + int in_pad_ctrl_on; /* [IN] Enable (1) / Disable (0) padding */ + int in_luma_pad_val; /* [IN] pad value if pad_ctrl_on is Enable */ + int in_cb_pad_val; + int in_cr_pad_val; + + int in_frame_map; /* [IN] Encoding input NV12 type linear(0) TILE(1) */ + + unsigned int in_mapped_addr; + mfc_strm_ref_buf_arg_t out_u_addr; + mfc_strm_ref_buf_arg_t out_p_addr; + mfc_strm_ref_buf_arg_t out_buf_size; + unsigned int out_header_size; + + /* MPEG4 Only */ + int in_qpelME_enable; /* [IN] Quarter-pel MC enable(1:enable, 0:disable) */ + int in_time_increament_res; /* [IN] time increment resolution */ + int in_time_vop_time_increament; /* [IN] time increment */ +} mfc_enc_init_mpeg4_arg_t; + +typedef mfc_enc_init_mpeg4_arg_t mfc_enc_init_h263_arg_t; + +typedef struct { + SSBSIP_MFC_CODEC_TYPE in_codec_type; /* [IN] codec type */ + int in_width; /* [IN] width of YUV420 frame to be encoded */ + int in_height; /* [IN] height of YUV420 frame to be encoded */ + int in_profile_level; /* [IN] profile & level */ + int in_gop_num; /* [IN] GOP Number (interval of I-frame) */ + int in_frame_qp; /* [IN] the quantization parameter of the frame */ + int in_frame_P_qp; /* [IN] the quantization parameter of the P frame */ + int in_frame_B_qp; /* [IN] the quantization parameter of the B frame */ + + int in_RC_frm_enable; /* [IN] RC enable (0:disable, 1:frame level RC) */ + int in_RC_framerate; /* [IN] RC parameter (framerate) */ + int in_RC_bitrate; /* [IN] RC parameter (bitrate in kbps) */ + int in_RC_qbound; /* [IN] RC parameter (Q bound) */ + int in_RC_rpara; /* [IN] RC parameter (Reaction Coefficient) */ + + int in_MS_mode; /* [IN] Multi-slice mode (0:single, 1:multiple) */ + int in_MS_size; /* [IN] Multi-slice size (in num. of mb or byte) */ + int in_mb_refresh; /* [IN] Macroblock refresh */ + int in_interlace_mode; /* [IN] interlace mode(0:progressive, 1:interlace) */ + int in_BframeNum; + + int in_pad_ctrl_on; /* [IN] Enable padding control */ + int in_luma_pad_val; /* [IN] Luma pel value used to fill padding area */ + int in_cb_pad_val; /* [IN] CB pel value used to fill padding area */ + int in_cr_pad_val; /* [IN] CR pel value used to fill padding area */ + + int in_frame_map; /* [IN] Encoding input NV12 type linear(0) TILE(1) */ + + unsigned int in_mapped_addr; + mfc_strm_ref_buf_arg_t out_u_addr; + mfc_strm_ref_buf_arg_t out_p_addr; + mfc_strm_ref_buf_arg_t out_buf_size; + unsigned int out_header_size; + + /* H264 Only */ + int in_RC_mb_enable; /* [IN] RC enable (0:disable, 1:MB level RC) */ + int in_reference_num; /* [IN] The number of reference pictures used */ + int in_ref_num_p; /* [IN] The number of reference pictures used for P pictures */ + int in_RC_mb_dark_disable; /* [IN] Disable adaptive rate control on dark region */ + int in_RC_mb_smooth_disable; /* [IN] Disable adaptive rate control on smooth region */ + int in_RC_mb_static_disable; /* [IN] Disable adaptive rate control on static region */ + int in_RC_mb_activity_disable; /* [IN] Disable adaptive rate control on static region */ + int in_deblock_filt; /* [IN] disable the loop filter */ + int in_deblock_alpha_C0; /* [IN] Alpha & C0 offset for H.264 loop filter */ + int in_deblock_beta; /* [IN] Beta offset for H.264 loop filter */ + int in_symbolmode; /* [IN] The mode of entropy coding(CABAC, CAVLC) */ + int in_transform8x8_mode; /* [IN] Allow 8x8 transform(only for high profile) */ + int in_md_interweight_pps; /* [IN] Inter weighted parameter for mode decision */ + int in_md_intraweight_pps; /* [IN] Intra weighted parameter for mode decision */ +} mfc_enc_init_h264_arg_t; + +typedef struct { + SSBSIP_MFC_CODEC_TYPE in_codec_type; /* [IN] codec type */ + unsigned int in_Y_addr; /* [IN] In-buffer addr of Y component */ + unsigned int in_CbCr_addr; /* [IN] In-buffer addr of CbCr component */ + unsigned int in_Y_addr_vir; /* [IN] In-buffer addr of Y component */ + unsigned int in_CbCr_addr_vir; /* [IN] In-buffer addr of CbCr component */ + unsigned int in_strm_st; /* [IN] Out-buffer start addr of encoded strm */ + unsigned int in_strm_end; /* [IN] Out-buffer end addr of encoded strm */ + int in_frametag; /* [IN] unique frame ID */ + + unsigned int out_frame_type; /* [OUT] frame type */ + int out_encoded_size; /* [OUT] Length of Encoded video stream */ + unsigned int out_encoded_Y_paddr; /* [OUT] physical Y address which is flushed */ + unsigned int out_encoded_C_paddr; /* [OUT] physical C address which is flushed */ + int out_frametag_top; /* [OUT] unique frame ID of an output frame or top field */ + int out_frametag_bottom; /* [OUT] unique frame ID of bottom field */ +} mfc_enc_exe_arg; + +typedef struct { + SSBSIP_MFC_CODEC_TYPE in_codec_type; /* [IN] codec type */ + unsigned int in_strm_buf; /* [IN] the physical address of STRM_BUF */ + int in_strm_size; /* [IN] size of video stream filled in STRM_BUF */ + int in_packed_PB; /* [IN] Is packed PB frame or not, 1: packedPB 0: unpacked */ + + int out_img_width; /* [OUT] width of YUV420 frame */ + int out_img_height; /* [OUT] height of YUV420 frame */ + int out_buf_width; /* [OUT] width of YUV420 frame */ + int out_buf_height; /* [OUT] height of YUV420 frame */ + int out_dpb_cnt; /* [OUT] the number of buffers which is nessary during decoding */ + + int out_crop_top_offset; /* [OUT] crop information, top offset */ + int out_crop_bottom_offset; /* [OUT] crop information, bottom offset */ + int out_crop_left_offset; /* [OUT] crop information, left offset */ + int out_crop_right_offset; /* [OUT] crop information, right offset */ + + mfc_frame_buf_arg_t in_frm_buf; /* [IN] the address of dpb FRAME_BUF */ + mfc_frame_buf_arg_t in_frm_size; /* [IN] size of dpb FRAME_BUF */ + unsigned int in_mapped_addr; + + mfc_frame_buf_arg_t out_u_addr; + mfc_frame_buf_arg_t out_p_addr; + mfc_frame_buf_arg_t out_frame_buf_size; +} mfc_dec_init_arg_t; + +typedef struct { + SSBSIP_MFC_CODEC_TYPE in_codec_type; /* [IN] codec type */ + unsigned int in_strm_buf; /* [IN] the physical address of STRM_BUF */ + int in_strm_size; /* [IN] Size of video stream filled in STRM_BUF */ + mfc_frame_buf_arg_t in_frm_buf; /* [IN] the address of dpb FRAME_BUF */ + mfc_frame_buf_arg_t in_frm_size; /* [IN] size of dpb FRAME_BUF */ + int in_frametag; /* [IN] unique frame ID */ + + unsigned int out_display_Y_addr; /* [OUT] the physical address of display buf */ + unsigned int out_display_C_addr; /* [OUT] the physical address of display buf */ + int out_display_status; /* [OUT] whether display frame exist or not. */ + int out_timestamp_top; /* [OUT] presentation time of an output frame or top field */ + int out_timestamp_bottom; /* [OUT] presentation time of bottom field */ + int out_consume_bytes; /* [OUT] consumed bytes when decoding finished */ + int out_frametag_top; /* [OUT] unique frame ID of an output frame or top field */ + int out_frametag_bottom; /* [OUT] unique frame ID of bottom field */ + int out_res_change; /* [OUT] whether resolution is changed or not (0, 1, 2) */ + int out_crop_top_offset; /* [OUT] crop information, top offset */ + int out_crop_bottom_offset; /* [OUT] crop information, bottom offset */ + int out_crop_left_offset; /* [OUT] crop information, left offset */ + int out_crop_right_offset; /* [OUT] crop information, right offset */ +} mfc_dec_exe_arg_t; + +typedef struct { + int in_config_param; /* [IN] Configurable parameter type */ + int out_config_value[4]; /* [IN] Values to get for the configurable parameter. */ +} mfc_get_config_arg_t; + +typedef struct { + int in_config_param; /* [IN] Configurable parameter type */ + int in_config_value[2]; /* [IN] Values to be set for the configurable parameter. */ + int out_config_value_old[2]; /* [OUT] Old values of the configurable parameters */ +} mfc_set_config_arg_t; + +typedef struct tag_get_phys_addr_arg +{ + unsigned int u_addr; + unsigned int p_addr; +} mfc_get_phys_addr_arg_t; + +typedef struct tag_mem_alloc_arg +{ + SSBSIP_MFC_CODEC_TYPE codec_type; + int buff_size; + unsigned int mapped_addr; + unsigned int out_uaddr; + unsigned int out_paddr; +} mfc_mem_alloc_arg_t; + +typedef struct tag_mem_free_arg_t +{ + unsigned int u_addr; +} mfc_mem_free_arg_t; + +typedef enum { + MFC_BUFFER_NO_CACHE = 0, + MFC_BUFFER_CACHE = 1 +} mfc_buffer_type; + +typedef union { + mfc_enc_init_mpeg4_arg_t enc_init_mpeg4; + mfc_enc_init_h263_arg_t enc_init_h263; + mfc_enc_init_h264_arg_t enc_init_h264; + mfc_enc_exe_arg enc_exe; + + mfc_dec_init_arg_t dec_init; + mfc_dec_exe_arg_t dec_exe; + + mfc_get_config_arg_t get_config; + mfc_set_config_arg_t set_config; + + mfc_mem_alloc_arg_t mem_alloc; + mfc_mem_free_arg_t mem_free; + mfc_get_phys_addr_arg_t get_phys_addr; + + mfc_buffer_type buf_type; +} mfc_args; + +typedef struct tag_mfc_args { + SSBSIP_MFC_ERROR_CODE ret_code; /* [OUT] error code */ + mfc_args args; +} mfc_common_args; + +typedef struct { + int magic; + int hMFC; + int width; + int height; + int sizeStrmBuf; + mfc_frame_buf_arg_t sizeFrmBuf; + int displayStatus; + int inter_buff_status; + unsigned int virFreeStrmAddr; + unsigned int phyStrmBuf; + unsigned int virStrmBuf; + unsigned int virMvRefYC; + mfc_frame_buf_arg_t phyFrmBuf; + mfc_frame_buf_arg_t virFrmBuf; + unsigned int mapped_addr; + mfc_common_args MfcArg; + SSBSIP_MFC_CODEC_TYPE codec_type; + SSBSIP_MFC_DEC_OUTPUT_INFO decOutInfo; + unsigned int encodedHeaderSize; + int encodedDataSize; + unsigned int encodedframeType; + int in_frametag; + int out_frametag_top; + int out_frametag_bottom; + unsigned int encoded_Y_paddr; + unsigned int encoded_C_paddr; + unsigned int encode_cnt; +} _MFCLIB; + +#endif /* _MFC_INTERFACE_H_ */ |