summaryrefslogtreecommitdiffstats
path: root/common/x86/ih264_padding_ssse3.c
diff options
context:
space:
mode:
Diffstat (limited to 'common/x86/ih264_padding_ssse3.c')
-rwxr-xr-xcommon/x86/ih264_padding_ssse3.c335
1 files changed, 335 insertions, 0 deletions
diff --git a/common/x86/ih264_padding_ssse3.c b/common/x86/ih264_padding_ssse3.c
new file mode 100755
index 0000000..6dadd39
--- /dev/null
+++ b/common/x86/ih264_padding_ssse3.c
@@ -0,0 +1,335 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ih264_padding_atom_intr.c
+*
+* @brief
+* Contains function definitions for Padding
+*
+* @author
+* Srinivas T
+*
+* @par List of Functions:
+* - ih264_pad_left_luma_ssse3()
+* - ih264_pad_left_chroma_ssse3()
+* - ih264_pad_right_luma_ssse3()
+* - ih264_pad_right_chroma_ssse3()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#include <string.h>
+#include <assert.h>
+#include "ih264_typedefs.h"
+#include "ih264_platform_macros.h"
+#include "ih264_mem_fns.h"
+#include "ih264_debug.h"
+
+#include <immintrin.h>
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Padding (luma block) at the left of a 2d array
+*
+* @par Description:
+* The left column of a 2d array is replicated for pad_size times at the left
+*
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] ht
+* integer height of the array
+*
+* @param[in] wd
+* integer width of the array
+*
+* @param[in] pad_size
+* integer -padding size of the array
+*
+* @param[in] ht
+* integer height of the array
+*
+* @param[in] wd
+* integer width of the array
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ih264_pad_left_luma_ssse3(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 ht,
+ WORD32 pad_size)
+{
+ WORD32 row;
+ WORD32 i;
+ UWORD8 *pu1_dst;
+ __m128i const0_16x8b;
+
+ const0_16x8b = _mm_setzero_si128();
+
+ ASSERT(pad_size % 8 == 0);
+
+ for(row = 0; row < ht; row++)
+ {
+ __m128i src_temp0_16x8b;
+
+ src_temp0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_dst = pu1_src - pad_size;
+ src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
+ for(i = 0; i < pad_size; i += 8)
+ {
+ _mm_storel_epi64((__m128i *)(pu1_dst + i), src_temp0_16x8b);
+ }
+ pu1_src += src_strd;
+ }
+
+}
+
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Padding (chroma block) at the left of a 2d array
+*
+* @par Description:
+* The left column of a 2d array is replicated for pad_size times at the left
+*
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] ht
+* integer height of the array
+*
+* @param[in] wd
+* integer width of the array (each colour component)
+*
+* @param[in] pad_size
+* integer -padding size of the array
+*
+* @param[in] ht
+* integer height of the array
+*
+* @param[in] wd
+* integer width of the array
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ih264_pad_left_chroma_ssse3(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 ht,
+ WORD32 pad_size)
+{
+ WORD32 row;
+ WORD32 col;
+ UWORD8 *pu1_dst;
+ __m128i const0_16x8b, const1_16x8b;
+ const0_16x8b = _mm_setzero_si128();
+ const1_16x8b = _mm_set1_epi8(1);
+ const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b);
+
+ ASSERT(pad_size % 8 == 0);
+ for(row = 0; row < ht; row++)
+ {
+ __m128i src_temp0_16x8b;
+
+ src_temp0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
+ pu1_dst = pu1_src - pad_size;
+ src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
+
+ for(col = 0; col < pad_size; col += 8)
+ {
+ _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
+ }
+ pu1_src += src_strd;
+ }
+
+}
+
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Padding (luma block) at the right of a 2d array
+*
+* @par Description:
+* The right column of a 2d array is replicated for pad_size times at the right
+*
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] ht
+* integer height of the array
+*
+* @param[in] wd
+* integer width of the array
+*
+* @param[in] pad_size
+* integer -padding size of the array
+*
+* @param[in] ht
+* integer height of the array
+*
+* @param[in] wd
+* integer width of the array
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ih264_pad_right_luma_ssse3(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 ht,
+ WORD32 pad_size)
+{
+ WORD32 row;
+ WORD32 col;
+ UWORD8 *pu1_dst;
+ __m128i const0_16x8b;
+
+ ASSERT(pad_size % 8 == 0);
+
+ for(row = 0; row < ht; row++)
+ {
+ __m128i src_temp0_16x8b;
+
+ src_temp0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src - 1));
+ const0_16x8b = _mm_setzero_si128();
+ pu1_dst = pu1_src;
+ src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
+ for(col = 0; col < pad_size; col += 8)
+ {
+ _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
+ }
+ pu1_src += src_strd;
+ }
+
+}
+
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Padding (chroma block) at the right of a 2d array
+*
+* @par Description:
+* The right column of a 2d array is replicated for pad_size times at the right
+*
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the source
+*
+* @param[in] src_strd
+* integer source stride
+*
+* @param[in] ht
+* integer height of the array
+*
+* @param[in] wd
+* integer width of the array (each colour component)
+*
+* @param[in] pad_size
+* integer -padding size of the array
+*
+* @param[in] ht
+* integer height of the array
+*
+* @param[in] wd
+* integer width of the array
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ih264_pad_right_chroma_ssse3(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 ht,
+ WORD32 pad_size)
+{
+ WORD32 row;
+ WORD32 col;
+ UWORD8 *pu1_dst;
+ __m128i const0_16x8b, const1_16x8b;
+ const0_16x8b = _mm_setzero_si128();
+ const1_16x8b = _mm_set1_epi8(1);
+ const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b);
+
+ ASSERT(pad_size % 8 == 0);
+
+ for(row = 0; row < ht; row++)
+ {
+ __m128i src_temp0_16x8b;
+
+ src_temp0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src - 2));
+ pu1_dst = pu1_src;
+ src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
+ for(col = 0; col < pad_size; col += 8)
+ {
+ _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
+ }
+
+ pu1_src += src_strd;
+ }
+}
+