diff --git a/selfdrive/camerad/cameras/camera_common.cc b/selfdrive/camerad/cameras/camera_common.cc index 7c4d2748f1..f3879bd2fe 100644 --- a/selfdrive/camerad/cameras/camera_common.cc +++ b/selfdrive/camerad/cameras/camera_common.cc @@ -260,7 +260,7 @@ kj::Array get_frame_image(const CameraBuf *b) { static kj::Array yuv420_to_jpeg(const CameraBuf *b, int thumbnail_width, int thumbnail_height) { // make the buffer big enough. jpeg_write_raw_data requires 16-pixels aligned height to be used. - std::unique_ptr buf(new uint8_t[(thumbnail_width * ((thumbnail_height + 15) & ~15) * 3) / 2]); + std::unique_ptr buf(new uint8_t[(thumbnail_width * ((thumbnail_height + 15) & ~15) * 3) / 2]); uint8_t *y_plane = buf.get(); uint8_t *u_plane = y_plane + thumbnail_width * thumbnail_height; uint8_t *v_plane = u_plane + (thumbnail_width * thumbnail_height) / 4; diff --git a/third_party/libyuv/build.txt b/third_party/libyuv/build.txt index 376e981ec7..50a319954e 100644 --- a/third_party/libyuv/build.txt +++ b/third_party/libyuv/build.txt @@ -1,4 +1,4 @@ git clone https://chromium.googlesource.com/libyuv/libyuv cd libyuv -git reset --hard 4a14cb2e81235ecd656e799aecaaf139db8ce4a2 cmake . +sudo make install diff --git a/third_party/libyuv/include/libyuv.h b/third_party/libyuv/include/libyuv.h index aeffd5ef7a..a06e1233ab 100644 --- a/third_party/libyuv/include/libyuv.h +++ b/third_party/libyuv/include/libyuv.h @@ -26,6 +26,7 @@ #include "libyuv/scale.h" #include "libyuv/scale_argb.h" #include "libyuv/scale_row.h" +#include "libyuv/scale_uv.h" #include "libyuv/version.h" #include "libyuv/video_common.h" diff --git a/third_party/libyuv/include/libyuv/basic_types.h b/third_party/libyuv/include/libyuv/basic_types.h index 5b760ee0d4..1bea67f2f2 100644 --- a/third_party/libyuv/include/libyuv/basic_types.h +++ b/third_party/libyuv/include/libyuv/basic_types.h @@ -11,79 +11,36 @@ #ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_ #define INCLUDE_LIBYUV_BASIC_TYPES_H_ -#include // for NULL, size_t +#include // For size_t and NULL + +#if !defined(INT_TYPES_DEFINED) && !defined(GG_LONGLONG) +#define INT_TYPES_DEFINED #if defined(_MSC_VER) && (_MSC_VER < 1600) #include // for uintptr_t on x86 +typedef unsigned __int64 uint64_t; +typedef __int64 int64_t; +typedef unsigned int uint32_t; +typedef int int32_t; +typedef unsigned short uint16_t; +typedef short int16_t; +typedef unsigned char uint8_t; +typedef signed char int8_t; #else -#include // for uintptr_t -#endif - -#ifndef GG_LONGLONG -#ifndef INT_TYPES_DEFINED -#define INT_TYPES_DEFINED -#ifdef COMPILER_MSVC -typedef unsigned __int64 uint64; -typedef __int64 int64; -#ifndef INT64_C -#define INT64_C(x) x ## I64 -#endif -#ifndef UINT64_C -#define UINT64_C(x) x ## UI64 -#endif -#define INT64_F "I64" -#else // COMPILER_MSVC -#if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__) -typedef unsigned long uint64; // NOLINT -typedef long int64; // NOLINT -#ifndef INT64_C -#define INT64_C(x) x ## L -#endif -#ifndef UINT64_C -#define UINT64_C(x) x ## UL -#endif -#define INT64_F "l" -#else // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__) -typedef unsigned long long uint64; // NOLINT -typedef long long int64; // NOLINT -#ifndef INT64_C -#define INT64_C(x) x ## LL -#endif -#ifndef UINT64_C -#define UINT64_C(x) x ## ULL -#endif -#define INT64_F "ll" -#endif // __LP64__ -#endif // COMPILER_MSVC -typedef unsigned int uint32; -typedef int int32; -typedef unsigned short uint16; // NOLINT -typedef short int16; // NOLINT -typedef unsigned char uint8; -typedef signed char int8; +#include // for uintptr_t and C99 types +#endif // defined(_MSC_VER) && (_MSC_VER < 1600) +// Types are deprecated. Enable this macro for legacy types. +#ifdef LIBYUV_LEGACY_TYPES +typedef uint64_t uint64; +typedef int64_t int64; +typedef uint32_t uint32; +typedef int32_t int32; +typedef uint16_t uint16; +typedef int16_t int16; +typedef uint8_t uint8; +typedef int8_t int8; +#endif // LIBYUV_LEGACY_TYPES #endif // INT_TYPES_DEFINED -#endif // GG_LONGLONG - -// Detect compiler is for x86 or x64. -#if defined(__x86_64__) || defined(_M_X64) || \ - defined(__i386__) || defined(_M_IX86) -#define CPU_X86 1 -#endif -// Detect compiler is for ARM. -#if defined(__arm__) || defined(_M_ARM) -#define CPU_ARM 1 -#endif - -#ifndef ALIGNP -#ifdef __cplusplus -#define ALIGNP(p, t) \ - (reinterpret_cast(((reinterpret_cast(p) + \ - ((t) - 1)) & ~((t) - 1)))) -#else -#define ALIGNP(p, t) \ - ((uint8*)((((uintptr_t)(p) + ((t) - 1)) & ~((t) - 1)))) /* NOLINT */ -#endif -#endif #if !defined(LIBYUV_API) #if defined(_WIN32) || defined(__CYGWIN__) @@ -95,24 +52,17 @@ typedef signed char int8; #define LIBYUV_API #endif // LIBYUV_BUILDING_SHARED_LIBRARY #elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__APPLE__) && \ - (defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \ - defined(LIBYUV_USING_SHARED_LIBRARY)) -#define LIBYUV_API __attribute__ ((visibility ("default"))) + (defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \ + defined(LIBYUV_USING_SHARED_LIBRARY)) +#define LIBYUV_API __attribute__((visibility("default"))) #else #define LIBYUV_API #endif // __GNUC__ #endif // LIBYUV_API +// TODO(fbarchard): Remove bool macros. #define LIBYUV_BOOL int #define LIBYUV_FALSE 0 #define LIBYUV_TRUE 1 -// Visual C x86 or GCC little endian. -#if defined(__x86_64__) || defined(_M_X64) || \ - defined(__i386__) || defined(_M_IX86) || \ - defined(__arm__) || defined(_M_ARM) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -#define LIBYUV_LITTLE_ENDIAN -#endif - #endif // INCLUDE_LIBYUV_BASIC_TYPES_H_ diff --git a/third_party/libyuv/include/libyuv/compare.h b/third_party/libyuv/include/libyuv/compare.h index 550712de6e..3353ad71c6 100644 --- a/third_party/libyuv/include/libyuv/compare.h +++ b/third_party/libyuv/include/libyuv/compare.h @@ -20,55 +20,88 @@ extern "C" { // Compute a hash for specified memory. Seed of 5381 recommended. LIBYUV_API -uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed); +uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed); + +// Hamming Distance +LIBYUV_API +uint64_t ComputeHammingDistance(const uint8_t* src_a, + const uint8_t* src_b, + int count); // Scan an opaque argb image and return fourcc based on alpha offset. // Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown. LIBYUV_API -uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height); +uint32_t ARGBDetect(const uint8_t* argb, + int stride_argb, + int width, + int height); // Sum Square Error - used to compute Mean Square Error or PSNR. LIBYUV_API -uint64 ComputeSumSquareError(const uint8* src_a, - const uint8* src_b, int count); +uint64_t ComputeSumSquareError(const uint8_t* src_a, + const uint8_t* src_b, + int count); LIBYUV_API -uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, - const uint8* src_b, int stride_b, - int width, int height); +uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a, + int stride_a, + const uint8_t* src_b, + int stride_b, + int width, + int height); static const int kMaxPsnr = 128; LIBYUV_API -double SumSquareErrorToPsnr(uint64 sse, uint64 count); +double SumSquareErrorToPsnr(uint64_t sse, uint64_t count); LIBYUV_API -double CalcFramePsnr(const uint8* src_a, int stride_a, - const uint8* src_b, int stride_b, - int width, int height); +double CalcFramePsnr(const uint8_t* src_a, + int stride_a, + const uint8_t* src_b, + int stride_b, + int width, + int height); LIBYUV_API -double I420Psnr(const uint8* src_y_a, int stride_y_a, - const uint8* src_u_a, int stride_u_a, - const uint8* src_v_a, int stride_v_a, - const uint8* src_y_b, int stride_y_b, - const uint8* src_u_b, int stride_u_b, - const uint8* src_v_b, int stride_v_b, - int width, int height); +double I420Psnr(const uint8_t* src_y_a, + int stride_y_a, + const uint8_t* src_u_a, + int stride_u_a, + const uint8_t* src_v_a, + int stride_v_a, + const uint8_t* src_y_b, + int stride_y_b, + const uint8_t* src_u_b, + int stride_u_b, + const uint8_t* src_v_b, + int stride_v_b, + int width, + int height); LIBYUV_API -double CalcFrameSsim(const uint8* src_a, int stride_a, - const uint8* src_b, int stride_b, - int width, int height); +double CalcFrameSsim(const uint8_t* src_a, + int stride_a, + const uint8_t* src_b, + int stride_b, + int width, + int height); LIBYUV_API -double I420Ssim(const uint8* src_y_a, int stride_y_a, - const uint8* src_u_a, int stride_u_a, - const uint8* src_v_a, int stride_v_a, - const uint8* src_y_b, int stride_y_b, - const uint8* src_u_b, int stride_u_b, - const uint8* src_v_b, int stride_v_b, - int width, int height); +double I420Ssim(const uint8_t* src_y_a, + int stride_y_a, + const uint8_t* src_u_a, + int stride_u_a, + const uint8_t* src_v_a, + int stride_v_a, + const uint8_t* src_y_b, + int stride_y_b, + const uint8_t* src_u_b, + int stride_u_b, + const uint8_t* src_v_b, + int stride_v_b, + int width, + int height); #ifdef __cplusplus } // extern "C" diff --git a/third_party/libyuv/include/libyuv/compare_row.h b/third_party/libyuv/include/libyuv/compare_row.h index 781cad3e65..d8e82d721b 100644 --- a/third_party/libyuv/include/libyuv/compare_row.h +++ b/third_party/libyuv/include/libyuv/compare_row.h @@ -18,20 +18,23 @@ namespace libyuv { extern "C" { #endif -#if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE2__)) +#if defined(__pnacl__) || defined(__CLR_VER) || \ + (defined(__native_client__) && defined(__x86_64__)) || \ + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif +#if defined(__native_client__) +#define LIBYUV_DISABLE_NEON +#endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) #define LIBYUV_DISABLE_X86 #endif #endif - // Visual C 2012 required for AVX2. -#if defined(_M_IX86) && !defined(__clang__) && \ - defined(_MSC_VER) && _MSC_VER >= 1700 +#if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \ + _MSC_VER >= 1700 #define VISUALC_HAS_AVX2 1 #endif // VisualStudio >= 2012 @@ -42,39 +45,82 @@ extern "C" { #endif // clang >= 3.4 #endif // __clang__ -#if !defined(LIBYUV_DISABLE_X86) && \ - defined(_M_IX86) && (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2)) -#define HAS_HASHDJB2_AVX2 -#endif - // The following are available for Visual C and GCC: #if !defined(LIBYUV_DISABLE_X86) && \ - (defined(__x86_64__) || (defined(__i386__) || defined(_M_IX86))) + (defined(__x86_64__) || defined(__i386__) || defined(_M_IX86)) #define HAS_HASHDJB2_SSE41 #define HAS_SUMSQUAREERROR_SSE2 +#define HAS_HAMMINGDISTANCE_SSE42 #endif // The following are available for Visual C and clangcl 32 bit: -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \ +#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) && \ + !defined(__clang__) && \ (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2)) #define HAS_HASHDJB2_AVX2 #define HAS_SUMSQUAREERROR_AVX2 #endif +// The following are available for GCC and clangcl: +#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) +#define HAS_HAMMINGDISTANCE_SSSE3 +#endif + +// The following are available for GCC and clangcl: +#if !defined(LIBYUV_DISABLE_X86) && defined(CLANG_HAS_AVX2) && \ + (defined(__x86_64__) || defined(__i386__)) +#define HAS_HAMMINGDISTANCE_AVX2 +#endif + // The following are available for Neon: #if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) #define HAS_SUMSQUAREERROR_NEON +#define HAS_HAMMINGDISTANCE_NEON +#endif + +#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) +#define HAS_HAMMINGDISTANCE_MSA +#define HAS_SUMSQUAREERROR_MSA #endif -uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count); -uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count); -uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count); -uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count); +uint32_t HammingDistance_C(const uint8_t* src_a, + const uint8_t* src_b, + int count); +uint32_t HammingDistance_SSE42(const uint8_t* src_a, + const uint8_t* src_b, + int count); +uint32_t HammingDistance_SSSE3(const uint8_t* src_a, + const uint8_t* src_b, + int count); +uint32_t HammingDistance_AVX2(const uint8_t* src_a, + const uint8_t* src_b, + int count); +uint32_t HammingDistance_NEON(const uint8_t* src_a, + const uint8_t* src_b, + int count); +uint32_t HammingDistance_MSA(const uint8_t* src_a, + const uint8_t* src_b, + int count); +uint32_t SumSquareError_C(const uint8_t* src_a, + const uint8_t* src_b, + int count); +uint32_t SumSquareError_SSE2(const uint8_t* src_a, + const uint8_t* src_b, + int count); +uint32_t SumSquareError_AVX2(const uint8_t* src_a, + const uint8_t* src_b, + int count); +uint32_t SumSquareError_NEON(const uint8_t* src_a, + const uint8_t* src_b, + int count); +uint32_t SumSquareError_MSA(const uint8_t* src_a, + const uint8_t* src_b, + int count); -uint32 HashDjb2_C(const uint8* src, int count, uint32 seed); -uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed); -uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed); +uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed); +uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed); +uint32_t HashDjb2_AVX2(const uint8_t* src, int count, uint32_t seed); #ifdef __cplusplus } // extern "C" diff --git a/third_party/libyuv/include/libyuv/convert.h b/third_party/libyuv/include/libyuv/convert.h index d44485847b..5c2954f4c3 100644 --- a/third_party/libyuv/include/libyuv/convert.h +++ b/third_party/libyuv/include/libyuv/convert.h @@ -16,8 +16,8 @@ #include "libyuv/rotate.h" // For enum RotationMode. // TODO(fbarchard): fix WebRTC source to include following libyuv headers: -#include "libyuv/convert_argb.h" // For WebRTC I420ToARGB. b/620 -#include "libyuv/convert_from.h" // For WebRTC ConvertFromI420. b/620 +#include "libyuv/convert_argb.h" // For WebRTC I420ToARGB. b/620 +#include "libyuv/convert_from.h" // For WebRTC ConvertFromI420. b/620 #include "libyuv/planar_functions.h" // For WebRTC I420Rect, CopyPlane. b/618 #ifdef __cplusplus @@ -27,196 +27,835 @@ extern "C" { // Convert I444 to I420. LIBYUV_API -int I444ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int I444ToI420(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert I444 to NV12. +LIBYUV_API +int I444ToNV12(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); + +// Convert I444 to NV21. +LIBYUV_API +int I444ToNV21(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height); // Convert I422 to I420. LIBYUV_API -int I422ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int I422ToI420(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert I422 to I444. +LIBYUV_API +int I422ToI444(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert I422 to I210. +LIBYUV_API +int I422ToI210(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert MM21 to NV12. +LIBYUV_API +int MM21ToNV12(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); -// Convert I411 to I420. +// Convert MM21 to I420. LIBYUV_API -int I411ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int MM21ToI420(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert I422 to NV21. +LIBYUV_API +int I422ToNV21(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height); // Copy I420 to I420. #define I420ToI420 I420Copy LIBYUV_API -int I420Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int I420Copy(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert I420 to I444. +LIBYUV_API +int I420ToI444(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Copy I010 to I010 +#define I010ToI010 I010Copy +#define H010ToH010 I010Copy +LIBYUV_API +int I010Copy(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert 10 bit YUV to 8 bit +#define H010ToH420 I010ToI420 +LIBYUV_API +int I010ToI420(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +#define H210ToH422 I210ToI422 +LIBYUV_API +int I210ToI422(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +#define H410ToH444 I410ToI444 +LIBYUV_API +int I410ToI444(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +#define H012ToH420 I012ToI420 +LIBYUV_API +int I012ToI420(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +#define H212ToH422 I212ToI422 +LIBYUV_API +int I212ToI422(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +#define H412ToH444 I412ToI444 +LIBYUV_API +int I412ToI444(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +#define I412ToI012 I410ToI010 +#define H410ToH010 I410ToI010 +#define H412ToH012 I410ToI010 +LIBYUV_API +int I410ToI010(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height); + +#define I212ToI012 I210ToI010 +#define H210ToH010 I210ToI010 +#define H212ToH012 I210ToI010 +LIBYUV_API +int I210ToI010(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert I010 to I410 +LIBYUV_API +int I010ToI410(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert I012 to I412 +#define I012ToI412 I010ToI410 + +// Convert I210 to I410 +LIBYUV_API +int I210ToI410(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert I212 to I412 +#define I212ToI412 I210ToI410 + +// Convert I010 to P010 +LIBYUV_API +int I010ToP010(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_uv, + int dst_stride_uv, + int width, + int height); + +// Convert I210 to P210 +LIBYUV_API +int I210ToP210(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_uv, + int dst_stride_uv, + int width, + int height); + +// Convert I012 to P012 +LIBYUV_API +int I012ToP012(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_uv, + int dst_stride_uv, + int width, + int height); + +// Convert I212 to P212 +LIBYUV_API +int I212ToP212(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_uv, + int dst_stride_uv, + int width, + int height); // Convert I400 (grey) to I420. LIBYUV_API -int I400ToI420(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int I400ToI420(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert I400 (grey) to NV21. +LIBYUV_API +int I400ToNV21(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height); #define J400ToJ420 I400ToI420 // Convert NV12 to I420. LIBYUV_API -int NV12ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int NV12ToI420(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Convert NV21 to I420. LIBYUV_API -int NV21ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_vu, int src_stride_vu, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int NV21ToI420(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert NV12 to NV24. +LIBYUV_API +int NV12ToNV24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); + +// Convert NV16 to NV24. +LIBYUV_API +int NV16ToNV24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); + +// Convert P010 to P410. +LIBYUV_API +int P010ToP410(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_uv, + int dst_stride_uv, + int width, + int height); + +// Convert P012 to P412. +#define P012ToP412 P010ToP410 + +// Convert P016 to P416. +#define P016ToP416 P010ToP410 + +// Convert P210 to P410. +LIBYUV_API +int P210ToP410(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_uv, + int dst_stride_uv, + int width, + int height); + +// Convert P212 to P412. +#define P212ToP412 P210ToP410 + +// Convert P216 to P416. +#define P216ToP416 P210ToP410 // Convert YUY2 to I420. LIBYUV_API -int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int YUY2ToI420(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Convert UYVY to I420. LIBYUV_API -int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int UYVYToI420(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert AYUV to NV12. +LIBYUV_API +int AYUVToNV12(const uint8_t* src_ayuv, + int src_stride_ayuv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); -// Convert M420 to I420. +// Convert AYUV to NV21. LIBYUV_API -int M420ToI420(const uint8* src_m420, int src_stride_m420, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int AYUVToNV21(const uint8_t* src_ayuv, + int src_stride_ayuv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height); // Convert Android420 to I420. LIBYUV_API -int Android420ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - int pixel_stride_uv, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int Android420ToI420(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + int src_pixel_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // ARGB little endian (bgra in memory) to I420. LIBYUV_API -int ARGBToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int ARGBToI420(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // BGRA little endian (argb in memory) to I420. LIBYUV_API -int BGRAToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int BGRAToI420(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // ABGR little endian (rgba in memory) to I420. LIBYUV_API -int ABGRToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int ABGRToI420(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // RGBA little endian (abgr in memory) to I420. LIBYUV_API -int RGBAToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int RGBAToI420(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // RGB little endian (bgr in memory) to I420. LIBYUV_API -int RGB24ToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int RGB24ToI420(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// RGB little endian (bgr in memory) to J420. +LIBYUV_API +int RGB24ToJ420(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // RGB big endian (rgb in memory) to I420. LIBYUV_API -int RAWToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int RAWToI420(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// RGB big endian (rgb in memory) to J420. +LIBYUV_API +int RAWToJ420(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // RGB16 (RGBP fourcc) little endian to I420. LIBYUV_API -int RGB565ToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int RGB565ToI420(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // RGB15 (RGBO fourcc) little endian to I420. LIBYUV_API -int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int ARGB1555ToI420(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // RGB12 (R444 fourcc) little endian to I420. LIBYUV_API -int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int ARGB4444ToI420(const uint8_t* src_argb4444, + int src_stride_argb4444, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// RGB little endian (bgr in memory) to J400. +LIBYUV_API +int RGB24ToJ400(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_yj, + int dst_stride_yj, + int width, + int height); + +// RGB big endian (rgb in memory) to J400. +LIBYUV_API +int RAWToJ400(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_yj, + int dst_stride_yj, + int width, + int height); -#ifdef HAVE_JPEG // src_width/height provided by capture. // dst_width/height for clipping determine final size. LIBYUV_API -int MJPGToI420(const uint8* sample, size_t sample_size, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int src_width, int src_height, - int dst_width, int dst_height); +int MJPGToI420(const uint8_t* sample, + size_t sample_size, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int src_width, + int src_height, + int dst_width, + int dst_height); + +// JPEG to NV21 +LIBYUV_API +int MJPGToNV21(const uint8_t* sample, + size_t sample_size, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int src_width, + int src_height, + int dst_width, + int dst_height); + +// JPEG to NV12 +LIBYUV_API +int MJPGToNV12(const uint8_t* sample, + size_t sample_size, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int src_width, + int src_height, + int dst_width, + int dst_height); // Query size of MJPG in pixels. LIBYUV_API -int MJPGSize(const uint8* sample, size_t sample_size, - int* width, int* height); -#endif +int MJPGSize(const uint8_t* sample, + size_t sample_size, + int* width, + int* height); // Convert camera sample to I420 with cropping, rotation and vertical flip. // "src_size" is needed to parse MJPG. @@ -238,18 +877,25 @@ int MJPGSize(const uint8* sample, size_t sample_size, // Must be less than or equal to src_width/src_height // Cropping parameters are pre-rotation. // "rotation" can be 0, 90, 180 or 270. -// "format" is a fourcc. ie 'I420', 'YUY2' +// "fourcc" is a fourcc. ie 'I420', 'YUY2' // Returns 0 for successful; -1 for invalid parameter. Non-zero for failure. LIBYUV_API -int ConvertToI420(const uint8* src_frame, size_t src_size, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int crop_x, int crop_y, - int src_width, int src_height, - int crop_width, int crop_height, +int ConvertToI420(const uint8_t* sample, + size_t sample_size, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int crop_x, + int crop_y, + int src_width, + int src_height, + int crop_width, + int crop_height, enum RotationMode rotation, - uint32 format); + uint32_t fourcc); #ifdef __cplusplus } // extern "C" diff --git a/third_party/libyuv/include/libyuv/convert_argb.h b/third_party/libyuv/include/libyuv/convert_argb.h index dc03ac8d5d..f66d20ce19 100644 --- a/third_party/libyuv/include/libyuv/convert_argb.h +++ b/third_party/libyuv/include/libyuv/convert_argb.h @@ -14,274 +14,2139 @@ #include "libyuv/basic_types.h" #include "libyuv/rotate.h" // For enum RotationMode. - -// TODO(fbarchard): This set of functions should exactly match convert.h -// TODO(fbarchard): Add tests. Create random content of right size and convert -// with C vs Opt and or to I420 and compare. -// TODO(fbarchard): Some of these functions lack parameter setting. +#include "libyuv/scale.h" // For enum FilterMode. #ifdef __cplusplus namespace libyuv { extern "C" { #endif +// Conversion matrix for YUV to RGB +LIBYUV_API extern const struct YuvConstants kYuvI601Constants; // BT.601 +LIBYUV_API extern const struct YuvConstants kYuvJPEGConstants; // BT.601 full +LIBYUV_API extern const struct YuvConstants kYuvH709Constants; // BT.709 +LIBYUV_API extern const struct YuvConstants kYuvF709Constants; // BT.709 full +LIBYUV_API extern const struct YuvConstants kYuv2020Constants; // BT.2020 +LIBYUV_API extern const struct YuvConstants kYuvV2020Constants; // BT.2020 full + +// Conversion matrix for YVU to BGR +LIBYUV_API extern const struct YuvConstants kYvuI601Constants; // BT.601 +LIBYUV_API extern const struct YuvConstants kYvuJPEGConstants; // BT.601 full +LIBYUV_API extern const struct YuvConstants kYvuH709Constants; // BT.709 +LIBYUV_API extern const struct YuvConstants kYvuF709Constants; // BT.709 full +LIBYUV_API extern const struct YuvConstants kYvu2020Constants; // BT.2020 +LIBYUV_API extern const struct YuvConstants kYvuV2020Constants; // BT.2020 full + +// Macros for end swapped destination Matrix conversions. +// Swap UV and pass mirrored kYvuJPEGConstants matrix. +// TODO(fbarchard): Add macro for each Matrix function. +#define kYuvI601ConstantsVU kYvuI601Constants +#define kYuvJPEGConstantsVU kYvuJPEGConstants +#define kYuvH709ConstantsVU kYvuH709Constants +#define kYuvF709ConstantsVU kYvuF709Constants +#define kYuv2020ConstantsVU kYvu2020Constants +#define kYuvV2020ConstantsVU kYvuV2020Constants + +#define NV12ToABGRMatrix(a, b, c, d, e, f, g, h, i) \ + NV21ToARGBMatrix(a, b, c, d, e, f, g##VU, h, i) +#define NV21ToABGRMatrix(a, b, c, d, e, f, g, h, i) \ + NV12ToARGBMatrix(a, b, c, d, e, f, g##VU, h, i) +#define NV12ToRAWMatrix(a, b, c, d, e, f, g, h, i) \ + NV21ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i) +#define NV21ToRAWMatrix(a, b, c, d, e, f, g, h, i) \ + NV12ToRGB24Matrix(a, b, c, d, e, f, g##VU, h, i) +#define I010ToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k) \ + I010ToARGBMatrix(a, b, e, f, c, d, g, h, i##VU, j, k) +#define I210ToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k) \ + I210ToARGBMatrix(a, b, e, f, c, d, g, h, i##VU, j, k) +#define I410ToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k) \ + I410ToARGBMatrix(a, b, e, f, c, d, g, h, i##VU, j, k) +#define I010ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \ + I010ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k) +#define I210ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \ + I210ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k) +#define I410ToAB30Matrix(a, b, c, d, e, f, g, h, i, j, k) \ + I410ToAR30Matrix(a, b, e, f, c, d, g, h, i##VU, j, k) +#define I420AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \ + I420AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n) +#define I422AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \ + I422AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n) +#define I444AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \ + I444AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n) +#define I010AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \ + I010AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n) +#define I210AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \ + I210AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n) +#define I410AlphaToABGRMatrix(a, b, c, d, e, f, g, h, i, j, k, l, m, n) \ + I410AlphaToARGBMatrix(a, b, e, f, c, d, g, h, i, j, k##VU, l, m, n) + // Alias. #define ARGBToARGB ARGBCopy // Copy ARGB to ARGB. LIBYUV_API -int ARGBCopy(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGBCopy(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert I420 to ARGB. LIBYUV_API -int I420ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int I420ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert I420 to ABGR. +LIBYUV_API +int I420ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert J420 to ARGB. +LIBYUV_API +int J420ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); -// Duplicate prototype for function in convert_from.h for remoting. +// Convert J420 to ABGR. +LIBYUV_API +int J420ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert H420 to ARGB. LIBYUV_API -int I420ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int H420ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert H420 to ABGR. +LIBYUV_API +int H420ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert U420 to ARGB. +LIBYUV_API +int U420ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert U420 to ABGR. +LIBYUV_API +int U420ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); // Convert I422 to ARGB. LIBYUV_API -int I422ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int I422ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert I422 to ABGR. +LIBYUV_API +int I422ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert J422 to ARGB. +LIBYUV_API +int J422ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert J422 to ABGR. +LIBYUV_API +int J422ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert H422 to ARGB. +LIBYUV_API +int H422ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert H422 to ABGR. +LIBYUV_API +int H422ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert U422 to ARGB. +LIBYUV_API +int U422ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert U422 to ABGR. +LIBYUV_API +int U422ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); // Convert I444 to ARGB. LIBYUV_API -int I444ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int I444ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert I444 to ABGR. +LIBYUV_API +int I444ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); // Convert J444 to ARGB. LIBYUV_API -int J444ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int J444ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); -// Convert I444 to ABGR. +// Convert J444 to ABGR. +LIBYUV_API +int J444ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert H444 to ARGB. +LIBYUV_API +int H444ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert H444 to ABGR. +LIBYUV_API +int H444ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert U444 to ARGB. +LIBYUV_API +int U444ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert U444 to ABGR. +LIBYUV_API +int U444ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert I010 to ARGB. +LIBYUV_API +int I010ToARGB(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert I010 to ABGR. +LIBYUV_API +int I010ToABGR(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert H010 to ARGB. +LIBYUV_API +int H010ToARGB(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert H010 to ABGR. +LIBYUV_API +int H010ToABGR(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert U010 to ARGB. +LIBYUV_API +int U010ToARGB(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert U010 to ABGR. +LIBYUV_API +int U010ToABGR(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert I210 to ARGB. +LIBYUV_API +int I210ToARGB(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert I210 to ABGR. +LIBYUV_API +int I210ToABGR(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert H210 to ARGB. +LIBYUV_API +int H210ToARGB(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert H210 to ABGR. +LIBYUV_API +int H210ToABGR(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert U210 to ARGB. LIBYUV_API -int I444ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height); +int U210ToARGB(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); -// Convert I411 to ARGB. +// Convert U210 to ABGR. LIBYUV_API -int I411ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int U210ToABGR(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); // Convert I420 with Alpha to preattenuated ARGB. LIBYUV_API -int I420AlphaToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - const uint8* src_a, int src_stride_a, - uint8* dst_argb, int dst_stride_argb, - int width, int height, int attenuate); +int I420AlphaToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height, + int attenuate); // Convert I420 with Alpha to preattenuated ABGR. LIBYUV_API -int I420AlphaToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - const uint8* src_a, int src_stride_a, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height, int attenuate); +int I420AlphaToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height, + int attenuate); + +// Convert I422 with Alpha to preattenuated ARGB. +LIBYUV_API +int I422AlphaToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height, + int attenuate); + +// Convert I422 with Alpha to preattenuated ABGR. +LIBYUV_API +int I422AlphaToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height, + int attenuate); + +// Convert I444 with Alpha to preattenuated ARGB. +LIBYUV_API +int I444AlphaToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height, + int attenuate); + +// Convert I444 with Alpha to preattenuated ABGR. +LIBYUV_API +int I444AlphaToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height, + int attenuate); // Convert I400 (grey) to ARGB. Reverse of ARGBToI400. LIBYUV_API -int I400ToARGB(const uint8* src_y, int src_stride_y, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int I400ToARGB(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert J400 (jpeg grey) to ARGB. LIBYUV_API -int J400ToARGB(const uint8* src_y, int src_stride_y, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int J400ToARGB(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Alias. #define YToARGB I400ToARGB // Convert NV12 to ARGB. LIBYUV_API -int NV12ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int NV12ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert NV21 to ARGB. LIBYUV_API -int NV21ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_vu, int src_stride_vu, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int NV21ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); -// Convert M420 to ARGB. +// Convert NV12 to ABGR. LIBYUV_API -int M420ToARGB(const uint8* src_m420, int src_stride_m420, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int NV12ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert NV21 to ABGR. +LIBYUV_API +int NV21ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert NV12 to RGB24. +LIBYUV_API +int NV12ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height); + +// Convert NV21 to RGB24. +LIBYUV_API +int NV21ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height); + +// Convert NV21 to YUV24. +LIBYUV_API +int NV21ToYUV24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_yuv24, + int dst_stride_yuv24, + int width, + int height); + +// Convert NV12 to RAW. +LIBYUV_API +int NV12ToRAW(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height); + +// Convert NV21 to RAW. +LIBYUV_API +int NV21ToRAW(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height); // Convert YUY2 to ARGB. LIBYUV_API -int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int YUY2ToARGB(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert UYVY to ARGB. LIBYUV_API -int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int UYVYToARGB(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); -// Convert J420 to ARGB. +// Convert I010 to AR30. LIBYUV_API -int J420ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int I010ToAR30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height); -// Convert J422 to ARGB. +// Convert H010 to AR30. LIBYUV_API -int J422ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int H010ToAR30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height); -// Convert J420 to ABGR. +// Convert I010 to AB30. LIBYUV_API -int J420ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height); +int I010ToAB30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ab30, + int dst_stride_ab30, + int width, + int height); -// Convert J422 to ABGR. +// Convert H010 to AB30. LIBYUV_API -int J422ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height); +int H010ToAB30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ab30, + int dst_stride_ab30, + int width, + int height); -// Convert H420 to ARGB. +// Convert U010 to AR30. LIBYUV_API -int H420ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int U010ToAR30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height); -// Convert H422 to ARGB. +// Convert U010 to AB30. LIBYUV_API -int H422ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int U010ToAB30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ab30, + int dst_stride_ab30, + int width, + int height); -// Convert H420 to ABGR. +// Convert I210 to AR30. LIBYUV_API -int H420ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height); +int I210ToAR30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height); -// Convert H422 to ABGR. +// Convert I210 to AB30. +LIBYUV_API +int I210ToAB30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ab30, + int dst_stride_ab30, + int width, + int height); + +// Convert H210 to AR30. +LIBYUV_API +int H210ToAR30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height); + +// Convert H210 to AB30. LIBYUV_API -int H422ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height); +int H210ToAB30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ab30, + int dst_stride_ab30, + int width, + int height); + +// Convert U210 to AR30. +LIBYUV_API +int U210ToAR30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height); + +// Convert U210 to AB30. +LIBYUV_API +int U210ToAB30(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ab30, + int dst_stride_ab30, + int width, + int height); // BGRA little endian (argb in memory) to ARGB. LIBYUV_API -int BGRAToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int BGRAToARGB(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // ABGR little endian (rgba in memory) to ARGB. LIBYUV_API -int ABGRToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ABGRToARGB(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // RGBA little endian (abgr in memory) to ARGB. LIBYUV_API -int RGBAToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int RGBAToARGB(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Deprecated function name. #define BG24ToARGB RGB24ToARGB // RGB little endian (bgr in memory) to ARGB. LIBYUV_API -int RGB24ToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int RGB24ToARGB(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // RGB big endian (rgb in memory) to ARGB. LIBYUV_API -int RAWToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int RAWToARGB(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// RGB big endian (rgb in memory) to RGBA. +LIBYUV_API +int RAWToRGBA(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_rgba, + int dst_stride_rgba, + int width, + int height); // RGB16 (RGBP fourcc) little endian to ARGB. LIBYUV_API -int RGB565ToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int RGB565ToARGB(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // RGB15 (RGBO fourcc) little endian to ARGB. LIBYUV_API -int ARGB1555ToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGB1555ToARGB(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // RGB12 (R444 fourcc) little endian to ARGB. LIBYUV_API -int ARGB4444ToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGB4444ToARGB(const uint8_t* src_argb4444, + int src_stride_argb4444, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Aliases +#define AB30ToARGB AR30ToABGR +#define AB30ToABGR AR30ToARGB +#define AB30ToAR30 AR30ToAB30 + +// Convert AR30 To ARGB. +LIBYUV_API +int AR30ToARGB(const uint8_t* src_ar30, + int src_stride_ar30, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert AR30 To ABGR. +LIBYUV_API +int AR30ToABGR(const uint8_t* src_ar30, + int src_stride_ar30, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert AR30 To AB30. +LIBYUV_API +int AR30ToAB30(const uint8_t* src_ar30, + int src_stride_ar30, + uint8_t* dst_ab30, + int dst_stride_ab30, + int width, + int height); + +// Convert AR64 to ARGB. +LIBYUV_API +int AR64ToARGB(const uint16_t* src_ar64, + int src_stride_ar64, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert AB64 to ABGR. +#define AB64ToABGR AR64ToARGB + +// Convert AB64 to ARGB. +LIBYUV_API +int AB64ToARGB(const uint16_t* src_ab64, + int src_stride_ab64, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert AR64 to ABGR. +#define AR64ToABGR AB64ToARGB + +// Convert AR64 To AB64. +LIBYUV_API +int AR64ToAB64(const uint16_t* src_ar64, + int src_stride_ar64, + uint16_t* dst_ab64, + int dst_stride_ab64, + int width, + int height); + +// Convert AB64 To AR64. +#define AB64ToAR64 AR64ToAB64 -#ifdef HAVE_JPEG // src_width/height provided by capture // dst_width/height for clipping determine final size. LIBYUV_API -int MJPGToARGB(const uint8* sample, size_t sample_size, - uint8* dst_argb, int dst_stride_argb, - int src_width, int src_height, - int dst_width, int dst_height); -#endif +int MJPGToARGB(const uint8_t* sample, + size_t sample_size, + uint8_t* dst_argb, + int dst_stride_argb, + int src_width, + int src_height, + int dst_width, + int dst_height); + +// Convert Android420 to ARGB. +LIBYUV_API +int Android420ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + int src_pixel_stride_uv, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert Android420 to ABGR. +LIBYUV_API +int Android420ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + int src_pixel_stride_uv, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert NV12 to RGB565. +LIBYUV_API +int NV12ToRGB565(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + int width, + int height); + +// Convert I422 to BGRA. +LIBYUV_API +int I422ToBGRA(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_bgra, + int dst_stride_bgra, + int width, + int height); + +// Convert I422 to ABGR. +LIBYUV_API +int I422ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +// Convert I422 to RGBA. +LIBYUV_API +int I422ToRGBA(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgba, + int dst_stride_rgba, + int width, + int height); + +LIBYUV_API +int I420ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +LIBYUV_API +int I420ToBGRA(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_bgra, + int dst_stride_bgra, + int width, + int height); + +LIBYUV_API +int I420ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); + +LIBYUV_API +int I420ToRGBA(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgba, + int dst_stride_rgba, + int width, + int height); + +LIBYUV_API +int I420ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height); + +LIBYUV_API +int I420ToRAW(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height); + +LIBYUV_API +int H420ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height); + +LIBYUV_API +int H420ToRAW(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height); + +LIBYUV_API +int J420ToRGB24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height); + +LIBYUV_API +int J420ToRAW(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height); + +LIBYUV_API +int I420ToRGB565(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + int width, + int height); + +LIBYUV_API +int J420ToRGB565(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + int width, + int height); + +LIBYUV_API +int H420ToRGB565(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + int width, + int height); + +LIBYUV_API +int I422ToRGB565(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + int width, + int height); + +// Convert I420 To RGB565 with 4x4 dither matrix (16 bytes). +// Values in dither matrix from 0 to 7 recommended. +// The order of the dither matrix is first byte is upper left. + +LIBYUV_API +int I420ToRGB565Dither(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + const uint8_t* dither4x4, + int width, + int height); + +LIBYUV_API +int I420ToARGB1555(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb1555, + int dst_stride_argb1555, + int width, + int height); + +LIBYUV_API +int I420ToARGB4444(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb4444, + int dst_stride_argb4444, + int width, + int height); + +// Convert I420 to AR30. +LIBYUV_API +int I420ToAR30(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height); + +// Convert I420 to AB30. +LIBYUV_API +int I420ToAB30(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_ab30, + int dst_stride_ab30, + int width, + int height); + +// Convert H420 to AR30. +LIBYUV_API +int H420ToAR30(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height); + +// Convert H420 to AB30. +LIBYUV_API +int H420ToAB30(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_ab30, + int dst_stride_ab30, + int width, + int height); + +// Convert I420 to ARGB with matrix. +LIBYUV_API +int I420ToARGBMatrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert I422 to ARGB with matrix. +LIBYUV_API +int I422ToARGBMatrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert I444 to ARGB with matrix. +LIBYUV_API +int I444ToARGBMatrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert 10 bit 420 YUV to ARGB with matrix. +LIBYUV_API +int I010ToAR30Matrix(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert 10 bit 420 YUV to ARGB with matrix. +LIBYUV_API +int I210ToAR30Matrix(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert 10 bit 444 YUV to ARGB with matrix. +LIBYUV_API +int I410ToAR30Matrix(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert 10 bit YUV to ARGB with matrix. +LIBYUV_API +int I010ToARGBMatrix(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// multiply 12 bit yuv into high bits to allow any number of bits. +LIBYUV_API +int I012ToAR30Matrix(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert 12 bit YUV to ARGB with matrix. +LIBYUV_API +int I012ToARGBMatrix(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert 10 bit 422 YUV to ARGB with matrix. +LIBYUV_API +int I210ToARGBMatrix(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert 10 bit 444 YUV to ARGB with matrix. +LIBYUV_API +int I410ToARGBMatrix(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert P010 to ARGB with matrix. +LIBYUV_API +int P010ToARGBMatrix(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert P210 to ARGB with matrix. +LIBYUV_API +int P210ToARGBMatrix(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert P010 to AR30 with matrix. +LIBYUV_API +int P010ToAR30Matrix(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert P210 to AR30 with matrix. +LIBYUV_API +int P210ToAR30Matrix(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// P012 and P010 use most significant bits so the conversion is the same. +// Convert P012 to ARGB with matrix. +#define P012ToARGBMatrix P010ToARGBMatrix +// Convert P012 to AR30 with matrix. +#define P012ToAR30Matrix P010ToAR30Matrix +// Convert P212 to ARGB with matrix. +#define P212ToARGBMatrix P210ToARGBMatrix +// Convert P212 to AR30 with matrix. +#define P212ToAR30Matrix P210ToAR30Matrix + +// Convert P016 to ARGB with matrix. +#define P016ToARGBMatrix P010ToARGBMatrix +// Convert P016 to AR30 with matrix. +#define P016ToAR30Matrix P010ToAR30Matrix +// Convert P216 to ARGB with matrix. +#define P216ToARGBMatrix P210ToARGBMatrix +// Convert P216 to AR30 with matrix. +#define P216ToAR30Matrix P210ToAR30Matrix + +// Convert I420 with Alpha to preattenuated ARGB with matrix. +LIBYUV_API +int I420AlphaToARGBMatrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate); + +// Convert I422 with Alpha to preattenuated ARGB with matrix. +LIBYUV_API +int I422AlphaToARGBMatrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate); + +// Convert I444 with Alpha to preattenuated ARGB with matrix. +LIBYUV_API +int I444AlphaToARGBMatrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate); + +// Convert I010 with Alpha to preattenuated ARGB with matrix. +LIBYUV_API +int I010AlphaToARGBMatrix(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + const uint16_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate); + +// Convert I210 with Alpha to preattenuated ARGB with matrix. +LIBYUV_API +int I210AlphaToARGBMatrix(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + const uint16_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate); + +// Convert I410 with Alpha to preattenuated ARGB with matrix. +LIBYUV_API +int I410AlphaToARGBMatrix(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + const uint16_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate); + +// Convert NV12 to ARGB with matrix. +LIBYUV_API +int NV12ToARGBMatrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert NV21 to ARGB with matrix. +LIBYUV_API +int NV21ToARGBMatrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert NV12 to RGB565 with matrix. +LIBYUV_API +int NV12ToRGB565Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert NV12 to RGB24 with matrix. +LIBYUV_API +int NV12ToRGB24Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert NV21 to RGB24 with matrix. +LIBYUV_API +int NV21ToRGB24Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert Android420 to ARGB with matrix. +LIBYUV_API +int Android420ToARGBMatrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + int src_pixel_stride_uv, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert I422 to RGBA with matrix. +LIBYUV_API +int I422ToRGBAMatrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgba, + int dst_stride_rgba, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert I420 to RGBA with matrix. +LIBYUV_API +int I420ToRGBAMatrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgba, + int dst_stride_rgba, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert I420 to RGB24 with matrix. +LIBYUV_API +int I420ToRGB24Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert I420 to RGB565 with specified color matrix. +LIBYUV_API +int I420ToRGB565Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert I420 to AR30 with matrix. +LIBYUV_API +int I420ToAR30Matrix(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert I400 (grey) to ARGB. Reverse of ARGBToI400. +LIBYUV_API +int I400ToARGBMatrix(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height); + +// Convert I420 to ARGB with matrix and UV filter mode. +LIBYUV_API +int I420ToARGBMatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + enum FilterMode filter); + +// Convert I422 to ARGB with matrix and UV filter mode. +LIBYUV_API +int I422ToARGBMatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + enum FilterMode filter); + +// Convert I010 to AR30 with matrix and UV filter mode. +LIBYUV_API +int I010ToAR30MatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height, + enum FilterMode filter); + +// Convert I210 to AR30 with matrix and UV filter mode. +LIBYUV_API +int I210ToAR30MatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height, + enum FilterMode filter); + +// Convert I010 to ARGB with matrix and UV filter mode. +LIBYUV_API +int I010ToARGBMatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + enum FilterMode filter); + +// Convert I210 to ARGB with matrix and UV filter mode. +LIBYUV_API +int I210ToARGBMatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + enum FilterMode filter); + +// Convert I420 with Alpha to attenuated ARGB with matrix and UV filter mode. +LIBYUV_API +int I420AlphaToARGBMatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate, + enum FilterMode filter); + +// Convert I422 with Alpha to attenuated ARGB with matrix and UV filter mode. +LIBYUV_API +int I422AlphaToARGBMatrixFilter(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate, + enum FilterMode filter); + +// Convert I010 with Alpha to attenuated ARGB with matrix and UV filter mode. +LIBYUV_API +int I010AlphaToARGBMatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + const uint16_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate, + enum FilterMode filter); + +// Convert I210 with Alpha to attenuated ARGB with matrix and UV filter mode. +LIBYUV_API +int I210AlphaToARGBMatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + const uint16_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + int attenuate, + enum FilterMode filter); + +// Convert P010 to ARGB with matrix and UV filter mode. +LIBYUV_API +int P010ToARGBMatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + enum FilterMode filter); + +// Convert P210 to ARGB with matrix and UV filter mode. +LIBYUV_API +int P210ToARGBMatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint8_t* dst_argb, + int dst_stride_argb, + const struct YuvConstants* yuvconstants, + int width, + int height, + enum FilterMode filter); + +// Convert P010 to AR30 with matrix and UV filter mode. +LIBYUV_API +int P010ToAR30MatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height, + enum FilterMode filter); + +// Convert P210 to AR30 with matrix and UV filter mode. +LIBYUV_API +int P210ToAR30MatrixFilter(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint8_t* dst_ar30, + int dst_stride_ar30, + const struct YuvConstants* yuvconstants, + int width, + int height, + enum FilterMode filter); // Convert camera sample to ARGB with cropping, rotation and vertical flip. -// "src_size" is needed to parse MJPG. +// "sample_size" is needed to parse MJPG. // "dst_stride_argb" number of bytes in a row of the dst_argb plane. // Normally this would be the same as dst_width, with recommended alignment // to 16 bytes for better efficiency. @@ -300,16 +2165,21 @@ int MJPGToARGB(const uint8* sample, size_t sample_size, // Must be less than or equal to src_width/src_height // Cropping parameters are pre-rotation. // "rotation" can be 0, 90, 180 or 270. -// "format" is a fourcc. ie 'I420', 'YUY2' +// "fourcc" is a fourcc. ie 'I420', 'YUY2' // Returns 0 for successful; -1 for invalid parameter. Non-zero for failure. LIBYUV_API -int ConvertToARGB(const uint8* src_frame, size_t src_size, - uint8* dst_argb, int dst_stride_argb, - int crop_x, int crop_y, - int src_width, int src_height, - int crop_width, int crop_height, +int ConvertToARGB(const uint8_t* sample, + size_t sample_size, + uint8_t* dst_argb, + int dst_stride_argb, + int crop_x, + int crop_y, + int src_width, + int src_height, + int crop_width, + int crop_height, enum RotationMode rotation, - uint32 format); + uint32_t fourcc); #ifdef __cplusplus } // extern "C" diff --git a/third_party/libyuv/include/libyuv/convert_from.h b/third_party/libyuv/include/libyuv/convert_from.h index 59c40474f1..32f42a6330 100644 --- a/third_party/libyuv/include/libyuv/convert_from.h +++ b/third_party/libyuv/include/libyuv/convert_from.h @@ -21,155 +21,179 @@ extern "C" { // See Also convert.h for conversions from formats to I420. -// I420Copy in convert to I420ToI420. - -LIBYUV_API -int I420ToI422(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -LIBYUV_API -int I420ToI444(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -LIBYUV_API -int I420ToI411(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +// Convert 8 bit YUV to 10 bit. +#define H420ToH010 I420ToI010 +LIBYUV_API +int I420ToI010(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert 8 bit YUV to 12 bit. +#define H420ToH012 I420ToI012 +LIBYUV_API +int I420ToI012(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height); + +LIBYUV_API +int I420ToI422(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +LIBYUV_API +int I420ToI444(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21. LIBYUV_API -int I400Copy(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height); - -LIBYUV_API -int I420ToNV12(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height); - -LIBYUV_API -int I420ToNV21(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_vu, int dst_stride_vu, - int width, int height); - -LIBYUV_API -int I420ToYUY2(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -LIBYUV_API -int I420ToUYVY(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -LIBYUV_API -int I420ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -LIBYUV_API -int I420ToBGRA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -LIBYUV_API -int I420ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -LIBYUV_API -int I420ToRGBA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgba, int dst_stride_rgba, - int width, int height); - -LIBYUV_API -int I420ToRGB24(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -LIBYUV_API -int I420ToRAW(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -LIBYUV_API -int I420ToRGB565(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -// Convert I420 To RGB565 with 4x4 dither matrix (16 bytes). -// Values in dither matrix from 0 to 7 recommended. -// The order of the dither matrix is first byte is upper left. - -LIBYUV_API -int I420ToRGB565Dither(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - const uint8* dither4x4, int width, int height); - -LIBYUV_API -int I420ToARGB1555(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -LIBYUV_API -int I420ToARGB4444(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); +int I400Copy(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height); + +LIBYUV_API +int I420ToNV12(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); + +LIBYUV_API +int I420ToNV21(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height); + +LIBYUV_API +int I420ToYUY2(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_yuy2, + int dst_stride_yuy2, + int width, + int height); + +LIBYUV_API +int I420ToUYVY(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_uyvy, + int dst_stride_uyvy, + int width, + int height); + +// The following are from convert_argb.h +// DEPRECATED: The prototypes will be removed in future. Use convert_argb.h + +// Convert I420 to ARGB. +LIBYUV_API +int I420ToARGB(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Convert I420 to ABGR. +LIBYUV_API +int I420ToABGR(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); // Convert I420 to specified format. // "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the // buffer has contiguous rows. Can be negative. A multiple of 16 is optimal. LIBYUV_API -int ConvertFromI420(const uint8* y, int y_stride, - const uint8* u, int u_stride, - const uint8* v, int v_stride, - uint8* dst_sample, int dst_sample_stride, - int width, int height, - uint32 format); +int ConvertFromI420(const uint8_t* y, + int y_stride, + const uint8_t* u, + int u_stride, + const uint8_t* v, + int v_stride, + uint8_t* dst_sample, + int dst_sample_stride, + int width, + int height, + uint32_t fourcc); #ifdef __cplusplus } // extern "C" diff --git a/third_party/libyuv/include/libyuv/convert_from_argb.h b/third_party/libyuv/include/libyuv/convert_from_argb.h index 8d7f02f8c4..2a488838a9 100644 --- a/third_party/libyuv/include/libyuv/convert_from_argb.h +++ b/third_party/libyuv/include/libyuv/convert_from_argb.h @@ -21,166 +21,322 @@ extern "C" { // Copy ARGB to ARGB. #define ARGBToARGB ARGBCopy LIBYUV_API -int ARGBCopy(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGBCopy(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert ARGB To BGRA. LIBYUV_API -int ARGBToBGRA(const uint8* src_argb, int src_stride_argb, - uint8* dst_bgra, int dst_stride_bgra, - int width, int height); +int ARGBToBGRA(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_bgra, + int dst_stride_bgra, + int width, + int height); // Convert ARGB To ABGR. LIBYUV_API -int ARGBToABGR(const uint8* src_argb, int src_stride_argb, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height); +int ARGBToABGR(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_abgr, + int dst_stride_abgr, + int width, + int height); // Convert ARGB To RGBA. LIBYUV_API -int ARGBToRGBA(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgba, int dst_stride_rgba, - int width, int height); +int ARGBToRGBA(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_rgba, + int dst_stride_rgba, + int width, + int height); + +// Aliases +#define ARGBToAB30 ABGRToAR30 +#define ABGRToAB30 ARGBToAR30 + +// Convert ABGR To AR30. +LIBYUV_API +int ABGRToAR30(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height); + +// Convert ARGB To AR30. +LIBYUV_API +int ARGBToAR30(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height); + +// Aliases +#define ABGRToRGB24 ARGBToRAW +#define ABGRToRAW ARGBToRGB24 // Convert ARGB To RGB24. LIBYUV_API -int ARGBToRGB24(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb24, int dst_stride_rgb24, - int width, int height); +int ARGBToRGB24(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height); // Convert ARGB To RAW. LIBYUV_API -int ARGBToRAW(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb, int dst_stride_rgb, - int width, int height); +int ARGBToRAW(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_raw, + int dst_stride_raw, + int width, + int height); // Convert ARGB To RGB565. LIBYUV_API -int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb565, int dst_stride_rgb565, - int width, int height); +int ARGBToRGB565(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + int width, + int height); // Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes). // Values in dither matrix from 0 to 7 recommended. // The order of the dither matrix is first byte is upper left. // TODO(fbarchard): Consider pointer to 2d array for dither4x4. -// const uint8(*dither)[4][4]; +// const uint8_t(*dither)[4][4]; LIBYUV_API -int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb565, int dst_stride_rgb565, - const uint8* dither4x4, int width, int height); +int ARGBToRGB565Dither(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_rgb565, + int dst_stride_rgb565, + const uint8_t* dither4x4, + int width, + int height); // Convert ARGB To ARGB1555. LIBYUV_API -int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb1555, int dst_stride_argb1555, - int width, int height); +int ARGBToARGB1555(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb1555, + int dst_stride_argb1555, + int width, + int height); // Convert ARGB To ARGB4444. LIBYUV_API -int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb4444, int dst_stride_argb4444, - int width, int height); +int ARGBToARGB4444(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb4444, + int dst_stride_argb4444, + int width, + int height); // Convert ARGB To I444. LIBYUV_API -int ARGBToI444(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int ARGBToI444(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Convert ARGB to AR64. +LIBYUV_API +int ARGBToAR64(const uint8_t* src_argb, + int src_stride_argb, + uint16_t* dst_ar64, + int dst_stride_ar64, + int width, + int height); + +// Convert ABGR to AB64. +#define ABGRToAB64 ARGBToAR64 + +// Convert ARGB to AB64. +LIBYUV_API +int ARGBToAB64(const uint8_t* src_argb, + int src_stride_argb, + uint16_t* dst_ab64, + int dst_stride_ab64, + int width, + int height); + +// Convert ABGR to AR64. +#define ABGRToAR64 ARGBToAB64 // Convert ARGB To I422. LIBYUV_API -int ARGBToI422(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int ARGBToI422(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Convert ARGB To I420. (also in convert.h) LIBYUV_API -int ARGBToI420(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int ARGBToI420(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Convert ARGB to J420. (JPeg full range I420). LIBYUV_API -int ARGBToJ420(const uint8* src_argb, int src_stride_argb, - uint8* dst_yj, int dst_stride_yj, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int ARGBToJ420(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_yj, + int dst_stride_yj, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Convert ARGB to J422. LIBYUV_API -int ARGBToJ422(const uint8* src_argb, int src_stride_argb, - uint8* dst_yj, int dst_stride_yj, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int ARGBToJ422(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_yj, + int dst_stride_yj, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); -// Convert ARGB To I411. +// Convert ARGB to J400. (JPeg full range). LIBYUV_API -int ARGBToI411(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int ARGBToJ400(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_yj, + int dst_stride_yj, + int width, + int height); -// Convert ARGB to J400. (JPeg full range). +// Convert RGBA to J400. (JPeg full range). LIBYUV_API -int ARGBToJ400(const uint8* src_argb, int src_stride_argb, - uint8* dst_yj, int dst_stride_yj, - int width, int height); +int RGBAToJ400(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_yj, + int dst_stride_yj, + int width, + int height); // Convert ARGB to I400. LIBYUV_API -int ARGBToI400(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - int width, int height); +int ARGBToI400(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height); // Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB) LIBYUV_API -int ARGBToG(const uint8* src_argb, int src_stride_argb, - uint8* dst_g, int dst_stride_g, - int width, int height); +int ARGBToG(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_g, + int dst_stride_g, + int width, + int height); // Convert ARGB To NV12. LIBYUV_API -int ARGBToNV12(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height); +int ARGBToNV12(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); // Convert ARGB To NV21. LIBYUV_API -int ARGBToNV21(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_vu, int dst_stride_vu, - int width, int height); +int ARGBToNV21(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height); -// Convert ARGB To NV21. +// Convert ABGR To NV12. LIBYUV_API -int ARGBToNV21(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_vu, int dst_stride_vu, - int width, int height); +int ABGRToNV12(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); + +// Convert ABGR To NV21. +LIBYUV_API +int ABGRToNV21(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height); // Convert ARGB To YUY2. LIBYUV_API -int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, - uint8* dst_yuy2, int dst_stride_yuy2, - int width, int height); +int ARGBToYUY2(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_yuy2, + int dst_stride_yuy2, + int width, + int height); // Convert ARGB To UYVY. LIBYUV_API -int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, - uint8* dst_uyvy, int dst_stride_uyvy, - int width, int height); +int ARGBToUYVY(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_uyvy, + int dst_stride_uyvy, + int width, + int height); + +// RAW to JNV21 full range NV21 +LIBYUV_API +int RAWToJNV21(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height); #ifdef __cplusplus } // extern "C" diff --git a/third_party/libyuv/include/libyuv/cpu_id.h b/third_party/libyuv/include/libyuv/cpu_id.h index 7c6c9aeb00..fb90c6c737 100644 --- a/third_party/libyuv/include/libyuv/cpu_id.h +++ b/third_party/libyuv/include/libyuv/cpu_id.h @@ -31,47 +31,93 @@ static const int kCpuHasX86 = 0x10; static const int kCpuHasSSE2 = 0x20; static const int kCpuHasSSSE3 = 0x40; static const int kCpuHasSSE41 = 0x80; -static const int kCpuHasSSE42 = 0x100; +static const int kCpuHasSSE42 = 0x100; // unused at this time. static const int kCpuHasAVX = 0x200; static const int kCpuHasAVX2 = 0x400; static const int kCpuHasERMS = 0x800; static const int kCpuHasFMA3 = 0x1000; -static const int kCpuHasAVX3 = 0x2000; -// 0x2000, 0x4000, 0x8000 reserved for future X86 flags. +static const int kCpuHasF16C = 0x2000; +static const int kCpuHasGFNI = 0x4000; +static const int kCpuHasAVX512BW = 0x8000; +static const int kCpuHasAVX512VL = 0x10000; +static const int kCpuHasAVX512VNNI = 0x20000; +static const int kCpuHasAVX512VBMI = 0x40000; +static const int kCpuHasAVX512VBMI2 = 0x80000; +static const int kCpuHasAVX512VBITALG = 0x100000; +static const int kCpuHasAVX512VPOPCNTDQ = 0x200000; // These flags are only valid on MIPS processors. -static const int kCpuHasMIPS = 0x10000; -static const int kCpuHasDSPR2 = 0x20000; -static const int kCpuHasMSA = 0x40000; +static const int kCpuHasMIPS = 0x400000; +static const int kCpuHasMSA = 0x800000; -// Internal function used to auto-init. -LIBYUV_API -int InitCpuFlags(void); +// These flags are only valid on LOONGARCH processors. +static const int kCpuHasLOONGARCH = 0x2000000; +static const int kCpuHasLSX = 0x4000000; +static const int kCpuHasLASX = 0x8000000; -// Internal function for parsing /proc/cpuinfo. +// Optional init function. TestCpuFlag does an auto-init. +// Returns cpu_info flags. LIBYUV_API -int ArmCpuCaps(const char* cpuinfo_name); +int InitCpuFlags(void); // Detect CPU has SSE2 etc. // Test_flag parameter should be one of kCpuHas constants above. -// returns non-zero if instruction set is detected +// Returns non-zero if instruction set is detected static __inline int TestCpuFlag(int test_flag) { LIBYUV_API extern int cpu_info_; - return (!cpu_info_ ? InitCpuFlags() : cpu_info_) & test_flag; +#ifdef __ATOMIC_RELAXED + int cpu_info = __atomic_load_n(&cpu_info_, __ATOMIC_RELAXED); +#else + int cpu_info = cpu_info_; +#endif + return (!cpu_info ? InitCpuFlags() : cpu_info) & test_flag; } +// Internal function for parsing /proc/cpuinfo. +LIBYUV_API +int ArmCpuCaps(const char* cpuinfo_name); +LIBYUV_API +int MipsCpuCaps(const char* cpuinfo_name); + // For testing, allow CPU flags to be disabled. // ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3. // MaskCpuFlags(-1) to enable all cpu specific optimizations. // MaskCpuFlags(1) to disable all cpu specific optimizations. +// MaskCpuFlags(0) to reset state so next call will auto init. +// Returns cpu_info flags. LIBYUV_API -void MaskCpuFlags(int enable_flags); +int MaskCpuFlags(int enable_flags); + +// Sets the CPU flags to |cpu_flags|, bypassing the detection code. |cpu_flags| +// should be a valid combination of the kCpuHas constants above and include +// kCpuInitialized. Use this method when running in a sandboxed process where +// the detection code might fail (as it might access /proc/cpuinfo). In such +// cases the cpu_info can be obtained from a non sandboxed process by calling +// InitCpuFlags() and passed to the sandboxed process (via command line +// parameters, IPC...) which can then call this method to initialize the CPU +// flags. +// Notes: +// - when specifying 0 for |cpu_flags|, the auto initialization is enabled +// again. +// - enabling CPU features that are not supported by the CPU will result in +// undefined behavior. +// TODO(fbarchard): consider writing a helper function that translates from +// other library CPU info to libyuv CPU info and add a .md doc that explains +// CPU detection. +static __inline void SetCpuFlags(int cpu_flags) { + LIBYUV_API extern int cpu_info_; +#ifdef __ATOMIC_RELAXED + __atomic_store_n(&cpu_info_, cpu_flags, __ATOMIC_RELAXED); +#else + cpu_info_ = cpu_flags; +#endif +} // Low level cpuid for X86. Returns zeros on other CPUs. // eax is the info type that you want. // ecx is typically the cpu number, and should normally be zero. LIBYUV_API -void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info); +void CpuId(int info_eax, int info_ecx, int* cpu_info); #ifdef __cplusplus } // extern "C" diff --git a/third_party/libyuv/include/libyuv/loongson_intrinsics.h b/third_party/libyuv/include/libyuv/loongson_intrinsics.h new file mode 100644 index 0000000000..1d613defb1 --- /dev/null +++ b/third_party/libyuv/include/libyuv/loongson_intrinsics.h @@ -0,0 +1,1949 @@ +/* + * Copyright 2022 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef INCLUDE_LIBYUV_LOONGSON_INTRINSICS_H +#define INCLUDE_LIBYUV_LOONGSON_INTRINSICS_H + +/* + * Copyright (c) 2022 Loongson Technology Corporation Limited + * All rights reserved. + * Contributed by Shiyou Yin + * Xiwei Gu + * Lu Wang + * + * This file is a header file for loongarch builtin extension. + * + */ + +#ifndef LOONGSON_INTRINSICS_H +#define LOONGSON_INTRINSICS_H + +/** + * MAJOR version: Macro usage changes. + * MINOR version: Add new functions, or bug fixes. + * MICRO version: Comment changes or implementation changes. + */ +#define LSOM_VERSION_MAJOR 1 +#define LSOM_VERSION_MINOR 1 +#define LSOM_VERSION_MICRO 0 + +#define DUP2_ARG1(_INS, _IN0, _IN1, _OUT0, _OUT1) \ + { \ + _OUT0 = _INS(_IN0); \ + _OUT1 = _INS(_IN1); \ + } + +#define DUP2_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _OUT0, _OUT1) \ + { \ + _OUT0 = _INS(_IN0, _IN1); \ + _OUT1 = _INS(_IN2, _IN3); \ + } + +#define DUP2_ARG3(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _OUT0, _OUT1) \ + { \ + _OUT0 = _INS(_IN0, _IN1, _IN2); \ + _OUT1 = _INS(_IN3, _IN4, _IN5); \ + } + +#define DUP4_ARG1(_INS, _IN0, _IN1, _IN2, _IN3, _OUT0, _OUT1, _OUT2, _OUT3) \ + { \ + DUP2_ARG1(_INS, _IN0, _IN1, _OUT0, _OUT1); \ + DUP2_ARG1(_INS, _IN2, _IN3, _OUT2, _OUT3); \ + } + +#define DUP4_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _IN6, _IN7, _OUT0, \ + _OUT1, _OUT2, _OUT3) \ + { \ + DUP2_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _OUT0, _OUT1); \ + DUP2_ARG2(_INS, _IN4, _IN5, _IN6, _IN7, _OUT2, _OUT3); \ + } + +#define DUP4_ARG3(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _IN6, _IN7, _IN8, \ + _IN9, _IN10, _IN11, _OUT0, _OUT1, _OUT2, _OUT3) \ + { \ + DUP2_ARG3(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _OUT0, _OUT1); \ + DUP2_ARG3(_INS, _IN6, _IN7, _IN8, _IN9, _IN10, _IN11, _OUT2, _OUT3); \ + } + +#ifdef __loongarch_sx +#include +/* + * ============================================================================= + * Description : Dot product & addition of byte vector elements + * Arguments : Inputs - in_c, in_h, in_l + * Outputs - out + * Return Type - halfword + * Details : Signed byte elements from in_h are multiplied by + * signed byte elements from in_l, and then added adjacent to + * each other to get results with the twice size of input. + * Then the results plus to signed half-word elements from in_c. + * Example : out = __lsx_vdp2add_h_b(in_c, in_h, in_l) + * in_c : 1,2,3,4, 1,2,3,4 + * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 + * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1 + * out : 23,40,41,26, 23,40,41,26 + * ============================================================================= + */ +static inline __m128i __lsx_vdp2add_h_b(__m128i in_c, + __m128i in_h, + __m128i in_l) { + __m128i out; + + out = __lsx_vmaddwev_h_b(in_c, in_h, in_l); + out = __lsx_vmaddwod_h_b(out, in_h, in_l); + return out; +} + +/* + * ============================================================================= + * Description : Dot product & addition of byte vector elements + * Arguments : Inputs - in_c, in_h, in_l + * Outputs - out + * Return Type - halfword + * Details : Unsigned byte elements from in_h are multiplied by + * unsigned byte elements from in_l, and then added adjacent to + * each other to get results with the twice size of input. + * The results plus to signed half-word elements from in_c. + * Example : out = __lsx_vdp2add_h_bu(in_c, in_h, in_l) + * in_c : 1,2,3,4, 1,2,3,4 + * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 + * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1 + * out : 23,40,41,26, 23,40,41,26 + * ============================================================================= + */ +static inline __m128i __lsx_vdp2add_h_bu(__m128i in_c, + __m128i in_h, + __m128i in_l) { + __m128i out; + + out = __lsx_vmaddwev_h_bu(in_c, in_h, in_l); + out = __lsx_vmaddwod_h_bu(out, in_h, in_l); + return out; +} + +/* + * ============================================================================= + * Description : Dot product & addition of byte vector elements + * Arguments : Inputs - in_c, in_h, in_l + * Outputs - out + * Return Type - halfword + * Details : Unsigned byte elements from in_h are multiplied by + * signed byte elements from in_l, and then added adjacent to + * each other to get results with the twice size of input. + * The results plus to signed half-word elements from in_c. + * Example : out = __lsx_vdp2add_h_bu_b(in_c, in_h, in_l) + * in_c : 1,1,1,1, 1,1,1,1 + * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 + * in_l : -1,-2,-3,-4, -5,-6,-7,-8, 1,2,3,4, 5,6,7,8 + * out : -4,-24,-60,-112, 6,26,62,114 + * ============================================================================= + */ +static inline __m128i __lsx_vdp2add_h_bu_b(__m128i in_c, + __m128i in_h, + __m128i in_l) { + __m128i out; + + out = __lsx_vmaddwev_h_bu_b(in_c, in_h, in_l); + out = __lsx_vmaddwod_h_bu_b(out, in_h, in_l); + return out; +} + +/* + * ============================================================================= + * Description : Dot product & addition of half-word vector elements + * Arguments : Inputs - in_c, in_h, in_l + * Outputs - out + * Return Type - __m128i + * Details : Signed half-word elements from in_h are multiplied by + * signed half-word elements from in_l, and then added adjacent to + * each other to get results with the twice size of input. + * Then the results plus to signed word elements from in_c. + * Example : out = __lsx_vdp2add_h_b(in_c, in_h, in_l) + * in_c : 1,2,3,4 + * in_h : 1,2,3,4, 5,6,7,8 + * in_l : 8,7,6,5, 4,3,2,1 + * out : 23,40,41,26 + * ============================================================================= + */ +static inline __m128i __lsx_vdp2add_w_h(__m128i in_c, + __m128i in_h, + __m128i in_l) { + __m128i out; + + out = __lsx_vmaddwev_w_h(in_c, in_h, in_l); + out = __lsx_vmaddwod_w_h(out, in_h, in_l); + return out; +} + +/* + * ============================================================================= + * Description : Dot product of byte vector elements + * Arguments : Inputs - in_h, in_l + * Outputs - out + * Return Type - halfword + * Details : Signed byte elements from in_h are multiplied by + * signed byte elements from in_l, and then added adjacent to + * each other to get results with the twice size of input. + * Example : out = __lsx_vdp2_h_b(in_h, in_l) + * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 + * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1 + * out : 22,38,38,22, 22,38,38,22 + * ============================================================================= + */ +static inline __m128i __lsx_vdp2_h_b(__m128i in_h, __m128i in_l) { + __m128i out; + + out = __lsx_vmulwev_h_b(in_h, in_l); + out = __lsx_vmaddwod_h_b(out, in_h, in_l); + return out; +} + +/* + * ============================================================================= + * Description : Dot product of byte vector elements + * Arguments : Inputs - in_h, in_l + * Outputs - out + * Return Type - halfword + * Details : Unsigned byte elements from in_h are multiplied by + * unsigned byte elements from in_l, and then added adjacent to + * each other to get results with the twice size of input. + * Example : out = __lsx_vdp2_h_bu(in_h, in_l) + * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 + * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1 + * out : 22,38,38,22, 22,38,38,22 + * ============================================================================= + */ +static inline __m128i __lsx_vdp2_h_bu(__m128i in_h, __m128i in_l) { + __m128i out; + + out = __lsx_vmulwev_h_bu(in_h, in_l); + out = __lsx_vmaddwod_h_bu(out, in_h, in_l); + return out; +} + +/* + * ============================================================================= + * Description : Dot product of byte vector elements + * Arguments : Inputs - in_h, in_l + * Outputs - out + * Return Type - halfword + * Details : Unsigned byte elements from in_h are multiplied by + * signed byte elements from in_l, and then added adjacent to + * each other to get results with the twice size of input. + * Example : out = __lsx_vdp2_h_bu_b(in_h, in_l) + * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 + * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,-1 + * out : 22,38,38,22, 22,38,38,6 + * ============================================================================= + */ +static inline __m128i __lsx_vdp2_h_bu_b(__m128i in_h, __m128i in_l) { + __m128i out; + + out = __lsx_vmulwev_h_bu_b(in_h, in_l); + out = __lsx_vmaddwod_h_bu_b(out, in_h, in_l); + return out; +} + +/* + * ============================================================================= + * Description : Dot product of byte vector elements + * Arguments : Inputs - in_h, in_l + * Outputs - out + * Return Type - halfword + * Details : Signed byte elements from in_h are multiplied by + * signed byte elements from in_l, and then added adjacent to + * each other to get results with the twice size of input. + * Example : out = __lsx_vdp2_w_h(in_h, in_l) + * in_h : 1,2,3,4, 5,6,7,8 + * in_l : 8,7,6,5, 4,3,2,1 + * out : 22,38,38,22 + * ============================================================================= + */ +static inline __m128i __lsx_vdp2_w_h(__m128i in_h, __m128i in_l) { + __m128i out; + + out = __lsx_vmulwev_w_h(in_h, in_l); + out = __lsx_vmaddwod_w_h(out, in_h, in_l); + return out; +} + +/* + * ============================================================================= + * Description : Clip all halfword elements of input vector between min & max + * out = ((_in) < (min)) ? (min) : (((_in) > (max)) ? (max) : + * (_in)) + * Arguments : Inputs - _in (input vector) + * - min (min threshold) + * - max (max threshold) + * Outputs - out (output vector with clipped elements) + * Return Type - signed halfword + * Example : out = __lsx_vclip_h(_in) + * _in : -8,2,280,249, -8,255,280,249 + * min : 1,1,1,1, 1,1,1,1 + * max : 9,9,9,9, 9,9,9,9 + * out : 1,2,9,9, 1,9,9,9 + * ============================================================================= + */ +static inline __m128i __lsx_vclip_h(__m128i _in, __m128i min, __m128i max) { + __m128i out; + + out = __lsx_vmax_h(min, _in); + out = __lsx_vmin_h(max, out); + return out; +} + +/* + * ============================================================================= + * Description : Set each element of vector between 0 and 255 + * Arguments : Inputs - _in + * Outputs - out + * Return Type - halfword + * Details : Signed byte elements from _in are clamped between 0 and 255. + * Example : out = __lsx_vclip255_h(_in) + * _in : -8,255,280,249, -8,255,280,249 + * out : 0,255,255,249, 0,255,255,249 + * ============================================================================= + */ +static inline __m128i __lsx_vclip255_h(__m128i _in) { + __m128i out; + + out = __lsx_vmaxi_h(_in, 0); + out = __lsx_vsat_hu(out, 7); + return out; +} + +/* + * ============================================================================= + * Description : Set each element of vector between 0 and 255 + * Arguments : Inputs - _in + * Outputs - out + * Return Type - word + * Details : Signed byte elements from _in are clamped between 0 and 255. + * Example : out = __lsx_vclip255_w(_in) + * _in : -8,255,280,249 + * out : 0,255,255,249 + * ============================================================================= + */ +static inline __m128i __lsx_vclip255_w(__m128i _in) { + __m128i out; + + out = __lsx_vmaxi_w(_in, 0); + out = __lsx_vsat_wu(out, 7); + return out; +} + +/* + * ============================================================================= + * Description : Swap two variables + * Arguments : Inputs - _in0, _in1 + * Outputs - _in0, _in1 (in-place) + * Details : Swapping of two input variables using xor + * Example : LSX_SWAP(_in0, _in1) + * _in0 : 1,2,3,4 + * _in1 : 5,6,7,8 + * _in0(out) : 5,6,7,8 + * _in1(out) : 1,2,3,4 + * ============================================================================= + */ +#define LSX_SWAP(_in0, _in1) \ + { \ + _in0 = __lsx_vxor_v(_in0, _in1); \ + _in1 = __lsx_vxor_v(_in0, _in1); \ + _in0 = __lsx_vxor_v(_in0, _in1); \ + } + +/* + * ============================================================================= + * Description : Transpose 4x4 block with word elements in vectors + * Arguments : Inputs - in0, in1, in2, in3 + * Outputs - out0, out1, out2, out3 + * Details : + * Example : + * 1, 2, 3, 4 1, 5, 9,13 + * 5, 6, 7, 8 to 2, 6,10,14 + * 9,10,11,12 =====> 3, 7,11,15 + * 13,14,15,16 4, 8,12,16 + * ============================================================================= + */ +#define LSX_TRANSPOSE4x4_W(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \ + { \ + __m128i _t0, _t1, _t2, _t3; \ + \ + _t0 = __lsx_vilvl_w(_in1, _in0); \ + _t1 = __lsx_vilvh_w(_in1, _in0); \ + _t2 = __lsx_vilvl_w(_in3, _in2); \ + _t3 = __lsx_vilvh_w(_in3, _in2); \ + _out0 = __lsx_vilvl_d(_t2, _t0); \ + _out1 = __lsx_vilvh_d(_t2, _t0); \ + _out2 = __lsx_vilvl_d(_t3, _t1); \ + _out3 = __lsx_vilvh_d(_t3, _t1); \ + } + +/* + * ============================================================================= + * Description : Transpose 8x8 block with byte elements in vectors + * Arguments : Inputs - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7 + * Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6, + * _out7 + * Details : The rows of the matrix become columns, and the columns + * become rows. + * Example : LSX_TRANSPOSE8x8_B + * _in0 : 00,01,02,03,04,05,06,07, 00,00,00,00,00,00,00,00 + * _in1 : 10,11,12,13,14,15,16,17, 00,00,00,00,00,00,00,00 + * _in2 : 20,21,22,23,24,25,26,27, 00,00,00,00,00,00,00,00 + * _in3 : 30,31,32,33,34,35,36,37, 00,00,00,00,00,00,00,00 + * _in4 : 40,41,42,43,44,45,46,47, 00,00,00,00,00,00,00,00 + * _in5 : 50,51,52,53,54,55,56,57, 00,00,00,00,00,00,00,00 + * _in6 : 60,61,62,63,64,65,66,67, 00,00,00,00,00,00,00,00 + * _in7 : 70,71,72,73,74,75,76,77, 00,00,00,00,00,00,00,00 + * + * _ out0 : 00,10,20,30,40,50,60,70, 00,00,00,00,00,00,00,00 + * _ out1 : 01,11,21,31,41,51,61,71, 00,00,00,00,00,00,00,00 + * _ out2 : 02,12,22,32,42,52,62,72, 00,00,00,00,00,00,00,00 + * _ out3 : 03,13,23,33,43,53,63,73, 00,00,00,00,00,00,00,00 + * _ out4 : 04,14,24,34,44,54,64,74, 00,00,00,00,00,00,00,00 + * _ out5 : 05,15,25,35,45,55,65,75, 00,00,00,00,00,00,00,00 + * _ out6 : 06,16,26,36,46,56,66,76, 00,00,00,00,00,00,00,00 + * _ out7 : 07,17,27,37,47,57,67,77, 00,00,00,00,00,00,00,00 + * ============================================================================= + */ +#define LSX_TRANSPOSE8x8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ + _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ + _out7) \ + { \ + __m128i zero = {0}; \ + __m128i shuf8 = {0x0F0E0D0C0B0A0908, 0x1716151413121110}; \ + __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; \ + \ + _t0 = __lsx_vilvl_b(_in2, _in0); \ + _t1 = __lsx_vilvl_b(_in3, _in1); \ + _t2 = __lsx_vilvl_b(_in6, _in4); \ + _t3 = __lsx_vilvl_b(_in7, _in5); \ + _t4 = __lsx_vilvl_b(_t1, _t0); \ + _t5 = __lsx_vilvh_b(_t1, _t0); \ + _t6 = __lsx_vilvl_b(_t3, _t2); \ + _t7 = __lsx_vilvh_b(_t3, _t2); \ + _out0 = __lsx_vilvl_w(_t6, _t4); \ + _out2 = __lsx_vilvh_w(_t6, _t4); \ + _out4 = __lsx_vilvl_w(_t7, _t5); \ + _out6 = __lsx_vilvh_w(_t7, _t5); \ + _out1 = __lsx_vshuf_b(zero, _out0, shuf8); \ + _out3 = __lsx_vshuf_b(zero, _out2, shuf8); \ + _out5 = __lsx_vshuf_b(zero, _out4, shuf8); \ + _out7 = __lsx_vshuf_b(zero, _out6, shuf8); \ + } + +/* + * ============================================================================= + * Description : Transpose 8x8 block with half-word elements in vectors + * Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7 + * Outputs - out0, out1, out2, out3, out4, out5, out6, out7 + * Details : + * Example : + * 00,01,02,03,04,05,06,07 00,10,20,30,40,50,60,70 + * 10,11,12,13,14,15,16,17 01,11,21,31,41,51,61,71 + * 20,21,22,23,24,25,26,27 02,12,22,32,42,52,62,72 + * 30,31,32,33,34,35,36,37 to 03,13,23,33,43,53,63,73 + * 40,41,42,43,44,45,46,47 ======> 04,14,24,34,44,54,64,74 + * 50,51,52,53,54,55,56,57 05,15,25,35,45,55,65,75 + * 60,61,62,63,64,65,66,67 06,16,26,36,46,56,66,76 + * 70,71,72,73,74,75,76,77 07,17,27,37,47,57,67,77 + * ============================================================================= + */ +#define LSX_TRANSPOSE8x8_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ + _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ + _out7) \ + { \ + __m128i _s0, _s1, _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; \ + \ + _s0 = __lsx_vilvl_h(_in6, _in4); \ + _s1 = __lsx_vilvl_h(_in7, _in5); \ + _t0 = __lsx_vilvl_h(_s1, _s0); \ + _t1 = __lsx_vilvh_h(_s1, _s0); \ + _s0 = __lsx_vilvh_h(_in6, _in4); \ + _s1 = __lsx_vilvh_h(_in7, _in5); \ + _t2 = __lsx_vilvl_h(_s1, _s0); \ + _t3 = __lsx_vilvh_h(_s1, _s0); \ + _s0 = __lsx_vilvl_h(_in2, _in0); \ + _s1 = __lsx_vilvl_h(_in3, _in1); \ + _t4 = __lsx_vilvl_h(_s1, _s0); \ + _t5 = __lsx_vilvh_h(_s1, _s0); \ + _s0 = __lsx_vilvh_h(_in2, _in0); \ + _s1 = __lsx_vilvh_h(_in3, _in1); \ + _t6 = __lsx_vilvl_h(_s1, _s0); \ + _t7 = __lsx_vilvh_h(_s1, _s0); \ + \ + _out0 = __lsx_vpickev_d(_t0, _t4); \ + _out2 = __lsx_vpickev_d(_t1, _t5); \ + _out4 = __lsx_vpickev_d(_t2, _t6); \ + _out6 = __lsx_vpickev_d(_t3, _t7); \ + _out1 = __lsx_vpickod_d(_t0, _t4); \ + _out3 = __lsx_vpickod_d(_t1, _t5); \ + _out5 = __lsx_vpickod_d(_t2, _t6); \ + _out7 = __lsx_vpickod_d(_t3, _t7); \ + } + +/* + * ============================================================================= + * Description : Transpose input 8x4 byte block into 4x8 + * Arguments : Inputs - _in0, _in1, _in2, _in3 (input 8x4 byte block) + * Outputs - _out0, _out1, _out2, _out3 (output 4x8 byte block) + * Return Type - as per RTYPE + * Details : The rows of the matrix become columns, and the columns become + * rows. + * Example : LSX_TRANSPOSE8x4_B + * _in0 : 00,01,02,03,00,00,00,00, 00,00,00,00,00,00,00,00 + * _in1 : 10,11,12,13,00,00,00,00, 00,00,00,00,00,00,00,00 + * _in2 : 20,21,22,23,00,00,00,00, 00,00,00,00,00,00,00,00 + * _in3 : 30,31,32,33,00,00,00,00, 00,00,00,00,00,00,00,00 + * _in4 : 40,41,42,43,00,00,00,00, 00,00,00,00,00,00,00,00 + * _in5 : 50,51,52,53,00,00,00,00, 00,00,00,00,00,00,00,00 + * _in6 : 60,61,62,63,00,00,00,00, 00,00,00,00,00,00,00,00 + * _in7 : 70,71,72,73,00,00,00,00, 00,00,00,00,00,00,00,00 + * + * _out0 : 00,10,20,30,40,50,60,70, 00,00,00,00,00,00,00,00 + * _out1 : 01,11,21,31,41,51,61,71, 00,00,00,00,00,00,00,00 + * _out2 : 02,12,22,32,42,52,62,72, 00,00,00,00,00,00,00,00 + * _out3 : 03,13,23,33,43,53,63,73, 00,00,00,00,00,00,00,00 + * ============================================================================= + */ +#define LSX_TRANSPOSE8x4_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ + _out0, _out1, _out2, _out3) \ + { \ + __m128i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m; \ + \ + _tmp0_m = __lsx_vpackev_w(_in4, _in0); \ + _tmp1_m = __lsx_vpackev_w(_in5, _in1); \ + _tmp2_m = __lsx_vilvl_b(_tmp1_m, _tmp0_m); \ + _tmp0_m = __lsx_vpackev_w(_in6, _in2); \ + _tmp1_m = __lsx_vpackev_w(_in7, _in3); \ + \ + _tmp3_m = __lsx_vilvl_b(_tmp1_m, _tmp0_m); \ + _tmp0_m = __lsx_vilvl_h(_tmp3_m, _tmp2_m); \ + _tmp1_m = __lsx_vilvh_h(_tmp3_m, _tmp2_m); \ + \ + _out0 = __lsx_vilvl_w(_tmp1_m, _tmp0_m); \ + _out2 = __lsx_vilvh_w(_tmp1_m, _tmp0_m); \ + _out1 = __lsx_vilvh_d(_out2, _out0); \ + _out3 = __lsx_vilvh_d(_out0, _out2); \ + } + +/* + * ============================================================================= + * Description : Transpose 16x8 block with byte elements in vectors + * Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, in8 + * in9, in10, in11, in12, in13, in14, in15 + * Outputs - out0, out1, out2, out3, out4, out5, out6, out7 + * Details : + * Example : + * 000,001,002,003,004,005,006,007 + * 008,009,010,011,012,013,014,015 + * 016,017,018,019,020,021,022,023 + * 024,025,026,027,028,029,030,031 + * 032,033,034,035,036,037,038,039 + * 040,041,042,043,044,045,046,047 000,008,...,112,120 + * 048,049,050,051,052,053,054,055 001,009,...,113,121 + * 056,057,058,059,060,061,062,063 to 002,010,...,114,122 + * 064,068,066,067,068,069,070,071 =====> 003,011,...,115,123 + * 072,073,074,075,076,077,078,079 004,012,...,116,124 + * 080,081,082,083,084,085,086,087 005,013,...,117,125 + * 088,089,090,091,092,093,094,095 006,014,...,118,126 + * 096,097,098,099,100,101,102,103 007,015,...,119,127 + * 104,105,106,107,108,109,110,111 + * 112,113,114,115,116,117,118,119 + * 120,121,122,123,124,125,126,127 + * ============================================================================= + */ +#define LSX_TRANSPOSE16x8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ + _in8, _in9, _in10, _in11, _in12, _in13, _in14, \ + _in15, _out0, _out1, _out2, _out3, _out4, _out5, \ + _out6, _out7) \ + { \ + __m128i _tmp0, _tmp1, _tmp2, _tmp3, _tmp4, _tmp5, _tmp6, _tmp7; \ + __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; \ + DUP4_ARG2(__lsx_vilvl_b, _in2, _in0, _in3, _in1, _in6, _in4, _in7, _in5, \ + _tmp0, _tmp1, _tmp2, _tmp3); \ + DUP4_ARG2(__lsx_vilvl_b, _in10, _in8, _in11, _in9, _in14, _in12, _in15, \ + _in13, _tmp4, _tmp5, _tmp6, _tmp7); \ + DUP2_ARG2(__lsx_vilvl_b, _tmp1, _tmp0, _tmp3, _tmp2, _t0, _t2); \ + DUP2_ARG2(__lsx_vilvh_b, _tmp1, _tmp0, _tmp3, _tmp2, _t1, _t3); \ + DUP2_ARG2(__lsx_vilvl_b, _tmp5, _tmp4, _tmp7, _tmp6, _t4, _t6); \ + DUP2_ARG2(__lsx_vilvh_b, _tmp5, _tmp4, _tmp7, _tmp6, _t5, _t7); \ + DUP2_ARG2(__lsx_vilvl_w, _t2, _t0, _t3, _t1, _tmp0, _tmp4); \ + DUP2_ARG2(__lsx_vilvh_w, _t2, _t0, _t3, _t1, _tmp2, _tmp6); \ + DUP2_ARG2(__lsx_vilvl_w, _t6, _t4, _t7, _t5, _tmp1, _tmp5); \ + DUP2_ARG2(__lsx_vilvh_w, _t6, _t4, _t7, _t5, _tmp3, _tmp7); \ + DUP2_ARG2(__lsx_vilvl_d, _tmp1, _tmp0, _tmp3, _tmp2, _out0, _out2); \ + DUP2_ARG2(__lsx_vilvh_d, _tmp1, _tmp0, _tmp3, _tmp2, _out1, _out3); \ + DUP2_ARG2(__lsx_vilvl_d, _tmp5, _tmp4, _tmp7, _tmp6, _out4, _out6); \ + DUP2_ARG2(__lsx_vilvh_d, _tmp5, _tmp4, _tmp7, _tmp6, _out5, _out7); \ + } + +/* + * ============================================================================= + * Description : Butterfly of 4 input vectors + * Arguments : Inputs - in0, in1, in2, in3 + * Outputs - out0, out1, out2, out3 + * Details : Butterfly operation + * Example : + * out0 = in0 + in3; + * out1 = in1 + in2; + * out2 = in1 - in2; + * out3 = in0 - in3; + * ============================================================================= + */ +#define LSX_BUTTERFLY_4_B(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \ + { \ + _out0 = __lsx_vadd_b(_in0, _in3); \ + _out1 = __lsx_vadd_b(_in1, _in2); \ + _out2 = __lsx_vsub_b(_in1, _in2); \ + _out3 = __lsx_vsub_b(_in0, _in3); \ + } +#define LSX_BUTTERFLY_4_H(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \ + { \ + _out0 = __lsx_vadd_h(_in0, _in3); \ + _out1 = __lsx_vadd_h(_in1, _in2); \ + _out2 = __lsx_vsub_h(_in1, _in2); \ + _out3 = __lsx_vsub_h(_in0, _in3); \ + } +#define LSX_BUTTERFLY_4_W(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \ + { \ + _out0 = __lsx_vadd_w(_in0, _in3); \ + _out1 = __lsx_vadd_w(_in1, _in2); \ + _out2 = __lsx_vsub_w(_in1, _in2); \ + _out3 = __lsx_vsub_w(_in0, _in3); \ + } +#define LSX_BUTTERFLY_4_D(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \ + { \ + _out0 = __lsx_vadd_d(_in0, _in3); \ + _out1 = __lsx_vadd_d(_in1, _in2); \ + _out2 = __lsx_vsub_d(_in1, _in2); \ + _out3 = __lsx_vsub_d(_in0, _in3); \ + } + +/* + * ============================================================================= + * Description : Butterfly of 8 input vectors + * Arguments : Inputs - _in0, _in1, _in2, _in3, ~ + * Outputs - _out0, _out1, _out2, _out3, ~ + * Details : Butterfly operation + * Example : + * _out0 = _in0 + _in7; + * _out1 = _in1 + _in6; + * _out2 = _in2 + _in5; + * _out3 = _in3 + _in4; + * _out4 = _in3 - _in4; + * _out5 = _in2 - _in5; + * _out6 = _in1 - _in6; + * _out7 = _in0 - _in7; + * ============================================================================= + */ +#define LSX_BUTTERFLY_8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ + _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ + _out7) \ + { \ + _out0 = __lsx_vadd_b(_in0, _in7); \ + _out1 = __lsx_vadd_b(_in1, _in6); \ + _out2 = __lsx_vadd_b(_in2, _in5); \ + _out3 = __lsx_vadd_b(_in3, _in4); \ + _out4 = __lsx_vsub_b(_in3, _in4); \ + _out5 = __lsx_vsub_b(_in2, _in5); \ + _out6 = __lsx_vsub_b(_in1, _in6); \ + _out7 = __lsx_vsub_b(_in0, _in7); \ + } + +#define LSX_BUTTERFLY_8_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ + _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ + _out7) \ + { \ + _out0 = __lsx_vadd_h(_in0, _in7); \ + _out1 = __lsx_vadd_h(_in1, _in6); \ + _out2 = __lsx_vadd_h(_in2, _in5); \ + _out3 = __lsx_vadd_h(_in3, _in4); \ + _out4 = __lsx_vsub_h(_in3, _in4); \ + _out5 = __lsx_vsub_h(_in2, _in5); \ + _out6 = __lsx_vsub_h(_in1, _in6); \ + _out7 = __lsx_vsub_h(_in0, _in7); \ + } + +#define LSX_BUTTERFLY_8_W(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ + _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ + _out7) \ + { \ + _out0 = __lsx_vadd_w(_in0, _in7); \ + _out1 = __lsx_vadd_w(_in1, _in6); \ + _out2 = __lsx_vadd_w(_in2, _in5); \ + _out3 = __lsx_vadd_w(_in3, _in4); \ + _out4 = __lsx_vsub_w(_in3, _in4); \ + _out5 = __lsx_vsub_w(_in2, _in5); \ + _out6 = __lsx_vsub_w(_in1, _in6); \ + _out7 = __lsx_vsub_w(_in0, _in7); \ + } + +#define LSX_BUTTERFLY_8_D(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ + _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ + _out7) \ + { \ + _out0 = __lsx_vadd_d(_in0, _in7); \ + _out1 = __lsx_vadd_d(_in1, _in6); \ + _out2 = __lsx_vadd_d(_in2, _in5); \ + _out3 = __lsx_vadd_d(_in3, _in4); \ + _out4 = __lsx_vsub_d(_in3, _in4); \ + _out5 = __lsx_vsub_d(_in2, _in5); \ + _out6 = __lsx_vsub_d(_in1, _in6); \ + _out7 = __lsx_vsub_d(_in0, _in7); \ + } + +#endif // LSX + +#ifdef __loongarch_asx +#include +/* + * ============================================================================= + * Description : Dot product of byte vector elements + * Arguments : Inputs - in_h, in_l + * Output - out + * Return Type - signed halfword + * Details : Unsigned byte elements from in_h are multiplied with + * unsigned byte elements from in_l producing a result + * twice the size of input i.e. signed halfword. + * Then this multiplied results of adjacent odd-even elements + * are added to the out vector + * Example : See out = __lasx_xvdp2_w_h(in_h, in_l) + * ============================================================================= + */ +static inline __m256i __lasx_xvdp2_h_bu(__m256i in_h, __m256i in_l) { + __m256i out; + + out = __lasx_xvmulwev_h_bu(in_h, in_l); + out = __lasx_xvmaddwod_h_bu(out, in_h, in_l); + return out; +} + +/* + * ============================================================================= + * Description : Dot product of byte vector elements + * Arguments : Inputs - in_h, in_l + * Output - out + * Return Type - signed halfword + * Details : Signed byte elements from in_h are multiplied with + * signed byte elements from in_l producing a result + * twice the size of input i.e. signed halfword. + * Then this multiplication results of adjacent odd-even elements + * are added to the out vector + * Example : See out = __lasx_xvdp2_w_h(in_h, in_l) + * ============================================================================= + */ +static inline __m256i __lasx_xvdp2_h_b(__m256i in_h, __m256i in_l) { + __m256i out; + + out = __lasx_xvmulwev_h_b(in_h, in_l); + out = __lasx_xvmaddwod_h_b(out, in_h, in_l); + return out; +} + +/* + * ============================================================================= + * Description : Dot product of halfword vector elements + * Arguments : Inputs - in_h, in_l + * Output - out + * Return Type - signed word + * Details : Signed halfword elements from in_h are multiplied with + * signed halfword elements from in_l producing a result + * twice the size of input i.e. signed word. + * Then this multiplied results of adjacent odd-even elements + * are added to the out vector. + * Example : out = __lasx_xvdp2_w_h(in_h, in_l) + * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 + * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1 + * out : 22,38,38,22, 22,38,38,22 + * ============================================================================= + */ +static inline __m256i __lasx_xvdp2_w_h(__m256i in_h, __m256i in_l) { + __m256i out; + + out = __lasx_xvmulwev_w_h(in_h, in_l); + out = __lasx_xvmaddwod_w_h(out, in_h, in_l); + return out; +} + +/* + * ============================================================================= + * Description : Dot product of word vector elements + * Arguments : Inputs - in_h, in_l + * Output - out + * Return Type - signed double + * Details : Signed word elements from in_h are multiplied with + * signed word elements from in_l producing a result + * twice the size of input i.e. signed double-word. + * Then this multiplied results of adjacent odd-even elements + * are added to the out vector. + * Example : See out = __lasx_xvdp2_w_h(in_h, in_l) + * ============================================================================= + */ +static inline __m256i __lasx_xvdp2_d_w(__m256i in_h, __m256i in_l) { + __m256i out; + + out = __lasx_xvmulwev_d_w(in_h, in_l); + out = __lasx_xvmaddwod_d_w(out, in_h, in_l); + return out; +} + +/* + * ============================================================================= + * Description : Dot product of halfword vector elements + * Arguments : Inputs - in_h, in_l + * Output - out + * Return Type - signed word + * Details : Unsigned halfword elements from in_h are multiplied with + * signed halfword elements from in_l producing a result + * twice the size of input i.e. unsigned word. + * Multiplication result of adjacent odd-even elements + * are added to the out vector + * Example : See out = __lasx_xvdp2_w_h(in_h, in_l) + * ============================================================================= + */ +static inline __m256i __lasx_xvdp2_w_hu_h(__m256i in_h, __m256i in_l) { + __m256i out; + + out = __lasx_xvmulwev_w_hu_h(in_h, in_l); + out = __lasx_xvmaddwod_w_hu_h(out, in_h, in_l); + return out; +} + +/* + * ============================================================================= + * Description : Dot product & addition of byte vector elements + * Arguments : Inputs - in_h, in_l + * Output - out + * Return Type - halfword + * Details : Signed byte elements from in_h are multiplied with + * signed byte elements from in_l producing a result + * twice the size of input i.e. signed halfword. + * Then this multiplied results of adjacent odd-even elements + * are added to the in_c vector. + * Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) + * ============================================================================= + */ +static inline __m256i __lasx_xvdp2add_h_b(__m256i in_c, + __m256i in_h, + __m256i in_l) { + __m256i out; + + out = __lasx_xvmaddwev_h_b(in_c, in_h, in_l); + out = __lasx_xvmaddwod_h_b(out, in_h, in_l); + return out; +} + +/* + * ============================================================================= + * Description : Dot product & addition of byte vector elements + * Arguments : Inputs - in_h, in_l + * Output - out + * Return Type - halfword + * Details : Unsigned byte elements from in_h are multiplied with + * unsigned byte elements from in_l producing a result + * twice the size of input i.e. signed halfword. + * Then this multiplied results of adjacent odd-even elements + * are added to the in_c vector. + * Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) + * ============================================================================= + */ +static inline __m256i __lasx_xvdp2add_h_bu(__m256i in_c, + __m256i in_h, + __m256i in_l) { + __m256i out; + + out = __lasx_xvmaddwev_h_bu(in_c, in_h, in_l); + out = __lasx_xvmaddwod_h_bu(out, in_h, in_l); + return out; +} + +/* + * ============================================================================= + * Description : Dot product & addition of byte vector elements + * Arguments : Inputs - in_h, in_l + * Output - out + * Return Type - halfword + * Details : Unsigned byte elements from in_h are multiplied with + * signed byte elements from in_l producing a result + * twice the size of input i.e. signed halfword. + * Then this multiplied results of adjacent odd-even elements + * are added to the in_c vector. + * Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) + * ============================================================================= + */ +static inline __m256i __lasx_xvdp2add_h_bu_b(__m256i in_c, + __m256i in_h, + __m256i in_l) { + __m256i out; + + out = __lasx_xvmaddwev_h_bu_b(in_c, in_h, in_l); + out = __lasx_xvmaddwod_h_bu_b(out, in_h, in_l); + return out; +} + +/* + * ============================================================================= + * Description : Dot product of halfword vector elements + * Arguments : Inputs - in_c, in_h, in_l + * Output - out + * Return Type - per RTYPE + * Details : Signed halfword elements from in_h are multiplied with + * signed halfword elements from in_l producing a result + * twice the size of input i.e. signed word. + * Multiplication result of adjacent odd-even elements + * are added to the in_c vector. + * Example : out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) + * in_c : 1,2,3,4, 1,2,3,4 + * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8, + * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1, + * out : 23,40,41,26, 23,40,41,26 + * ============================================================================= + */ +static inline __m256i __lasx_xvdp2add_w_h(__m256i in_c, + __m256i in_h, + __m256i in_l) { + __m256i out; + + out = __lasx_xvmaddwev_w_h(in_c, in_h, in_l); + out = __lasx_xvmaddwod_w_h(out, in_h, in_l); + return out; +} + +/* + * ============================================================================= + * Description : Dot product of halfword vector elements + * Arguments : Inputs - in_c, in_h, in_l + * Output - out + * Return Type - signed word + * Details : Unsigned halfword elements from in_h are multiplied with + * unsigned halfword elements from in_l producing a result + * twice the size of input i.e. signed word. + * Multiplication result of adjacent odd-even elements + * are added to the in_c vector. + * Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) + * ============================================================================= + */ +static inline __m256i __lasx_xvdp2add_w_hu(__m256i in_c, + __m256i in_h, + __m256i in_l) { + __m256i out; + + out = __lasx_xvmaddwev_w_hu(in_c, in_h, in_l); + out = __lasx_xvmaddwod_w_hu(out, in_h, in_l); + return out; +} + +/* + * ============================================================================= + * Description : Dot product of halfword vector elements + * Arguments : Inputs - in_c, in_h, in_l + * Output - out + * Return Type - signed word + * Details : Unsigned halfword elements from in_h are multiplied with + * signed halfword elements from in_l producing a result + * twice the size of input i.e. signed word. + * Multiplication result of adjacent odd-even elements + * are added to the in_c vector + * Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) + * ============================================================================= + */ +static inline __m256i __lasx_xvdp2add_w_hu_h(__m256i in_c, + __m256i in_h, + __m256i in_l) { + __m256i out; + + out = __lasx_xvmaddwev_w_hu_h(in_c, in_h, in_l); + out = __lasx_xvmaddwod_w_hu_h(out, in_h, in_l); + return out; +} + +/* + * ============================================================================= + * Description : Vector Unsigned Dot Product and Subtract + * Arguments : Inputs - in_c, in_h, in_l + * Output - out + * Return Type - signed halfword + * Details : Unsigned byte elements from in_h are multiplied with + * unsigned byte elements from in_l producing a result + * twice the size of input i.e. signed halfword. + * Multiplication result of adjacent odd-even elements + * are added together and subtracted from double width elements + * in_c vector. + * Example : See out = __lasx_xvdp2sub_w_h(in_c, in_h, in_l) + * ============================================================================= + */ +static inline __m256i __lasx_xvdp2sub_h_bu(__m256i in_c, + __m256i in_h, + __m256i in_l) { + __m256i out; + + out = __lasx_xvmulwev_h_bu(in_h, in_l); + out = __lasx_xvmaddwod_h_bu(out, in_h, in_l); + out = __lasx_xvsub_h(in_c, out); + return out; +} + +/* + * ============================================================================= + * Description : Vector Signed Dot Product and Subtract + * Arguments : Inputs - in_c, in_h, in_l + * Output - out + * Return Type - signed word + * Details : Signed halfword elements from in_h are multiplied with + * Signed halfword elements from in_l producing a result + * twice the size of input i.e. signed word. + * Multiplication result of adjacent odd-even elements + * are added together and subtracted from double width elements + * in_c vector. + * Example : out = __lasx_xvdp2sub_w_h(in_c, in_h, in_l) + * in_c : 0,0,0,0, 0,0,0,0 + * in_h : 3,1,3,0, 0,0,0,1, 0,0,1,1, 0,0,0,1 + * in_l : 2,1,1,0, 1,0,0,0, 0,0,1,0, 1,0,0,1 + * out : -7,-3,0,0, 0,-1,0,-1 + * ============================================================================= + */ +static inline __m256i __lasx_xvdp2sub_w_h(__m256i in_c, + __m256i in_h, + __m256i in_l) { + __m256i out; + + out = __lasx_xvmulwev_w_h(in_h, in_l); + out = __lasx_xvmaddwod_w_h(out, in_h, in_l); + out = __lasx_xvsub_w(in_c, out); + return out; +} + +/* + * ============================================================================= + * Description : Dot product of halfword vector elements + * Arguments : Inputs - in_h, in_l + * Output - out + * Return Type - signed word + * Details : Signed halfword elements from in_h are multiplied with + * signed halfword elements from in_l producing a result + * four times the size of input i.e. signed doubleword. + * Then this multiplication results of four adjacent elements + * are added together and stored to the out vector. + * Example : out = __lasx_xvdp4_d_h(in_h, in_l) + * in_h : 3,1,3,0, 0,0,0,1, 0,0,1,-1, 0,0,0,1 + * in_l : -2,1,1,0, 1,0,0,0, 0,0,1, 0, 1,0,0,1 + * out : -2,0,1,1 + * ============================================================================= + */ +static inline __m256i __lasx_xvdp4_d_h(__m256i in_h, __m256i in_l) { + __m256i out; + + out = __lasx_xvmulwev_w_h(in_h, in_l); + out = __lasx_xvmaddwod_w_h(out, in_h, in_l); + out = __lasx_xvhaddw_d_w(out, out); + return out; +} + +/* + * ============================================================================= + * Description : The high half of the vector elements are expanded and + * added after being doubled. + * Arguments : Inputs - in_h, in_l + * Output - out + * Details : The in_h vector and the in_l vector are added after the + * higher half of the two-fold sign extension (signed byte + * to signed halfword) and stored to the out vector. + * Example : See out = __lasx_xvaddwh_w_h(in_h, in_l) + * ============================================================================= + */ +static inline __m256i __lasx_xvaddwh_h_b(__m256i in_h, __m256i in_l) { + __m256i out; + + out = __lasx_xvilvh_b(in_h, in_l); + out = __lasx_xvhaddw_h_b(out, out); + return out; +} + +/* + * ============================================================================= + * Description : The high half of the vector elements are expanded and + * added after being doubled. + * Arguments : Inputs - in_h, in_l + * Output - out + * Details : The in_h vector and the in_l vector are added after the + * higher half of the two-fold sign extension (signed halfword + * to signed word) and stored to the out vector. + * Example : out = __lasx_xvaddwh_w_h(in_h, in_l) + * in_h : 3, 0,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1 + * in_l : 2,-1,1,2, 1,0,0, 0, 1,0,1, 0, 1,0,0,1 + * out : 1,0,0,-1, 1,0,0, 2 + * ============================================================================= + */ +static inline __m256i __lasx_xvaddwh_w_h(__m256i in_h, __m256i in_l) { + __m256i out; + + out = __lasx_xvilvh_h(in_h, in_l); + out = __lasx_xvhaddw_w_h(out, out); + return out; +} + +/* + * ============================================================================= + * Description : The low half of the vector elements are expanded and + * added after being doubled. + * Arguments : Inputs - in_h, in_l + * Output - out + * Details : The in_h vector and the in_l vector are added after the + * lower half of the two-fold sign extension (signed byte + * to signed halfword) and stored to the out vector. + * Example : See out = __lasx_xvaddwl_w_h(in_h, in_l) + * ============================================================================= + */ +static inline __m256i __lasx_xvaddwl_h_b(__m256i in_h, __m256i in_l) { + __m256i out; + + out = __lasx_xvilvl_b(in_h, in_l); + out = __lasx_xvhaddw_h_b(out, out); + return out; +} + +/* + * ============================================================================= + * Description : The low half of the vector elements are expanded and + * added after being doubled. + * Arguments : Inputs - in_h, in_l + * Output - out + * Details : The in_h vector and the in_l vector are added after the + * lower half of the two-fold sign extension (signed halfword + * to signed word) and stored to the out vector. + * Example : out = __lasx_xvaddwl_w_h(in_h, in_l) + * in_h : 3, 0,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1 + * in_l : 2,-1,1,2, 1,0,0, 0, 1,0,1, 0, 1,0,0,1 + * out : 5,-1,4,2, 1,0,2,-1 + * ============================================================================= + */ +static inline __m256i __lasx_xvaddwl_w_h(__m256i in_h, __m256i in_l) { + __m256i out; + + out = __lasx_xvilvl_h(in_h, in_l); + out = __lasx_xvhaddw_w_h(out, out); + return out; +} + +/* + * ============================================================================= + * Description : The low half of the vector elements are expanded and + * added after being doubled. + * Arguments : Inputs - in_h, in_l + * Output - out + * Details : The out vector and the out vector are added after the + * lower half of the two-fold zero extension (unsigned byte + * to unsigned halfword) and stored to the out vector. + * Example : See out = __lasx_xvaddwl_w_h(in_h, in_l) + * ============================================================================= + */ +static inline __m256i __lasx_xvaddwl_h_bu(__m256i in_h, __m256i in_l) { + __m256i out; + + out = __lasx_xvilvl_b(in_h, in_l); + out = __lasx_xvhaddw_hu_bu(out, out); + return out; +} + +/* + * ============================================================================= + * Description : The low half of the vector elements are expanded and + * added after being doubled. + * Arguments : Inputs - in_h, in_l + * Output - out + * Details : The in_l vector after double zero extension (unsigned byte to + * signed halfword),added to the in_h vector. + * Example : See out = __lasx_xvaddw_w_w_h(in_h, in_l) + * ============================================================================= + */ +static inline __m256i __lasx_xvaddw_h_h_bu(__m256i in_h, __m256i in_l) { + __m256i out; + + out = __lasx_xvsllwil_hu_bu(in_l, 0); + out = __lasx_xvadd_h(in_h, out); + return out; +} + +/* + * ============================================================================= + * Description : The low half of the vector elements are expanded and + * added after being doubled. + * Arguments : Inputs - in_h, in_l + * Output - out + * Details : The in_l vector after double sign extension (signed halfword to + * signed word), added to the in_h vector. + * Example : out = __lasx_xvaddw_w_w_h(in_h, in_l) + * in_h : 0, 1,0,0, -1,0,0,1, + * in_l : 2,-1,1,2, 1,0,0,0, 0,0,1,0, 1,0,0,1, + * out : 2, 0,1,2, -1,0,1,1, + * ============================================================================= + */ +static inline __m256i __lasx_xvaddw_w_w_h(__m256i in_h, __m256i in_l) { + __m256i out; + + out = __lasx_xvsllwil_w_h(in_l, 0); + out = __lasx_xvadd_w(in_h, out); + return out; +} + +/* + * ============================================================================= + * Description : Multiplication and addition calculation after expansion + * of the lower half of the vector. + * Arguments : Inputs - in_c, in_h, in_l + * Output - out + * Details : The in_h vector and the in_l vector are multiplied after + * the lower half of the two-fold sign extension (signed halfword + * to signed word), and the result is added to the vector in_c, + * then stored to the out vector. + * Example : out = __lasx_xvmaddwl_w_h(in_c, in_h, in_l) + * in_c : 1,2,3,4, 5,6,7,8 + * in_h : 1,2,3,4, 1,2,3,4, 5,6,7,8, 5,6,7,8 + * in_l : 200, 300, 400, 500, 2000, 3000, 4000, 5000, + * -200,-300,-400,-500, -2000,-3000,-4000,-5000 + * out : 201, 602,1203,2004, -995, -1794,-2793,-3992 + * ============================================================================= + */ +static inline __m256i __lasx_xvmaddwl_w_h(__m256i in_c, + __m256i in_h, + __m256i in_l) { + __m256i tmp0, tmp1, out; + + tmp0 = __lasx_xvsllwil_w_h(in_h, 0); + tmp1 = __lasx_xvsllwil_w_h(in_l, 0); + tmp0 = __lasx_xvmul_w(tmp0, tmp1); + out = __lasx_xvadd_w(tmp0, in_c); + return out; +} + +/* + * ============================================================================= + * Description : Multiplication and addition calculation after expansion + * of the higher half of the vector. + * Arguments : Inputs - in_c, in_h, in_l + * Output - out + * Details : The in_h vector and the in_l vector are multiplied after + * the higher half of the two-fold sign extension (signed + * halfword to signed word), and the result is added to + * the vector in_c, then stored to the out vector. + * Example : See out = __lasx_xvmaddwl_w_h(in_c, in_h, in_l) + * ============================================================================= + */ +static inline __m256i __lasx_xvmaddwh_w_h(__m256i in_c, + __m256i in_h, + __m256i in_l) { + __m256i tmp0, tmp1, out; + + tmp0 = __lasx_xvilvh_h(in_h, in_h); + tmp1 = __lasx_xvilvh_h(in_l, in_l); + tmp0 = __lasx_xvmulwev_w_h(tmp0, tmp1); + out = __lasx_xvadd_w(tmp0, in_c); + return out; +} + +/* + * ============================================================================= + * Description : Multiplication calculation after expansion of the lower + * half of the vector. + * Arguments : Inputs - in_h, in_l + * Output - out + * Details : The in_h vector and the in_l vector are multiplied after + * the lower half of the two-fold sign extension (signed + * halfword to signed word), then stored to the out vector. + * Example : out = __lasx_xvmulwl_w_h(in_h, in_l) + * in_h : 3,-1,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1 + * in_l : 2,-1,1,2, 1,0,0, 0, 0,0,1, 0, 1,0,0,1 + * out : 6,1,3,0, 0,0,1,0 + * ============================================================================= + */ +static inline __m256i __lasx_xvmulwl_w_h(__m256i in_h, __m256i in_l) { + __m256i tmp0, tmp1, out; + + tmp0 = __lasx_xvsllwil_w_h(in_h, 0); + tmp1 = __lasx_xvsllwil_w_h(in_l, 0); + out = __lasx_xvmul_w(tmp0, tmp1); + return out; +} + +/* + * ============================================================================= + * Description : Multiplication calculation after expansion of the lower + * half of the vector. + * Arguments : Inputs - in_h, in_l + * Output - out + * Details : The in_h vector and the in_l vector are multiplied after + * the lower half of the two-fold sign extension (signed + * halfword to signed word), then stored to the out vector. + * Example : out = __lasx_xvmulwh_w_h(in_h, in_l) + * in_h : 3,-1,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1 + * in_l : 2,-1,1,2, 1,0,0, 0, 0,0,1, 0, 1,0,0,1 + * out : 0,0,0,0, 0,0,0,1 + * ============================================================================= + */ +static inline __m256i __lasx_xvmulwh_w_h(__m256i in_h, __m256i in_l) { + __m256i tmp0, tmp1, out; + + tmp0 = __lasx_xvilvh_h(in_h, in_h); + tmp1 = __lasx_xvilvh_h(in_l, in_l); + out = __lasx_xvmulwev_w_h(tmp0, tmp1); + return out; +} + +/* + * ============================================================================= + * Description : The low half of the vector elements are added to the high half + * after being doubled, then saturated. + * Arguments : Inputs - in_h, in_l + * Output - out + * Details : The in_h vector adds the in_l vector after the lower half of + * the two-fold zero extension (unsigned byte to unsigned + * halfword) and then saturated. The results are stored to the out + * vector. + * Example : out = __lasx_xvsaddw_hu_hu_bu(in_h, in_l) + * in_h : 2,65532,1,2, 1,0,0,0, 0,0,1,0, 1,0,0,1 + * in_l : 3,6,3,0, 0,0,0,1, 0,0,1,1, 0,0,0,1, 3,18,3,0, 0,0,0,1, 0,0,1,1, + * 0,0,0,1 + * out : 5,65535,4,2, 1,0,0,1, 3,18,4,0, 1,0,0,2, + * ============================================================================= + */ +static inline __m256i __lasx_xvsaddw_hu_hu_bu(__m256i in_h, __m256i in_l) { + __m256i tmp1, out; + __m256i zero = {0}; + + tmp1 = __lasx_xvilvl_b(zero, in_l); + out = __lasx_xvsadd_hu(in_h, tmp1); + return out; +} + +/* + * ============================================================================= + * Description : Clip all halfword elements of input vector between min & max + * out = ((in) < (min)) ? (min) : (((in) > (max)) ? (max) : (in)) + * Arguments : Inputs - in (input vector) + * - min (min threshold) + * - max (max threshold) + * Outputs - in (output vector with clipped elements) + * Return Type - signed halfword + * Example : out = __lasx_xvclip_h(in, min, max) + * in : -8,2,280,249, -8,255,280,249, 4,4,4,4, 5,5,5,5 + * min : 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1 + * max : 9,9,9,9, 9,9,9,9, 9,9,9,9, 9,9,9,9 + * out : 1,2,9,9, 1,9,9,9, 4,4,4,4, 5,5,5,5 + * ============================================================================= + */ +static inline __m256i __lasx_xvclip_h(__m256i in, __m256i min, __m256i max) { + __m256i out; + + out = __lasx_xvmax_h(min, in); + out = __lasx_xvmin_h(max, out); + return out; +} + +/* + * ============================================================================= + * Description : Clip all signed halfword elements of input vector + * between 0 & 255 + * Arguments : Inputs - in (input vector) + * Outputs - out (output vector with clipped elements) + * Return Type - signed halfword + * Example : See out = __lasx_xvclip255_w(in) + * ============================================================================= + */ +static inline __m256i __lasx_xvclip255_h(__m256i in) { + __m256i out; + + out = __lasx_xvmaxi_h(in, 0); + out = __lasx_xvsat_hu(out, 7); + return out; +} + +/* + * ============================================================================= + * Description : Clip all signed word elements of input vector + * between 0 & 255 + * Arguments : Inputs - in (input vector) + * Output - out (output vector with clipped elements) + * Return Type - signed word + * Example : out = __lasx_xvclip255_w(in) + * in : -8,255,280,249, -8,255,280,249 + * out : 0,255,255,249, 0,255,255,249 + * ============================================================================= + */ +static inline __m256i __lasx_xvclip255_w(__m256i in) { + __m256i out; + + out = __lasx_xvmaxi_w(in, 0); + out = __lasx_xvsat_wu(out, 7); + return out; +} + +/* + * ============================================================================= + * Description : Indexed halfword element values are replicated to all + * elements in output vector. If 'idx < 8' use xvsplati_l_*, + * if 'idx >= 8' use xvsplati_h_*. + * Arguments : Inputs - in, idx + * Output - out + * Details : Idx element value from in vector is replicated to all + * elements in out vector. + * Valid index range for halfword operation is 0-7 + * Example : out = __lasx_xvsplati_l_h(in, idx) + * in : 20,10,11,12, 13,14,15,16, 0,0,2,0, 0,0,0,0 + * idx : 0x02 + * out : 11,11,11,11, 11,11,11,11, 11,11,11,11, 11,11,11,11 + * ============================================================================= + */ +static inline __m256i __lasx_xvsplati_l_h(__m256i in, int idx) { + __m256i out; + + out = __lasx_xvpermi_q(in, in, 0x02); + out = __lasx_xvreplve_h(out, idx); + return out; +} + +/* + * ============================================================================= + * Description : Indexed halfword element values are replicated to all + * elements in output vector. If 'idx < 8' use xvsplati_l_*, + * if 'idx >= 8' use xvsplati_h_*. + * Arguments : Inputs - in, idx + * Output - out + * Details : Idx element value from in vector is replicated to all + * elements in out vector. + * Valid index range for halfword operation is 0-7 + * Example : out = __lasx_xvsplati_h_h(in, idx) + * in : 20,10,11,12, 13,14,15,16, 0,2,0,0, 0,0,0,0 + * idx : 0x09 + * out : 2,2,2,2, 2,2,2,2, 2,2,2,2, 2,2,2,2 + * ============================================================================= + */ +static inline __m256i __lasx_xvsplati_h_h(__m256i in, int idx) { + __m256i out; + + out = __lasx_xvpermi_q(in, in, 0x13); + out = __lasx_xvreplve_h(out, idx); + return out; +} + +/* + * ============================================================================= + * Description : Transpose 4x4 block with double-word elements in vectors + * Arguments : Inputs - _in0, _in1, _in2, _in3 + * Outputs - _out0, _out1, _out2, _out3 + * Example : LASX_TRANSPOSE4x4_D + * _in0 : 1,2,3,4 + * _in1 : 1,2,3,4 + * _in2 : 1,2,3,4 + * _in3 : 1,2,3,4 + * + * _out0 : 1,1,1,1 + * _out1 : 2,2,2,2 + * _out2 : 3,3,3,3 + * _out3 : 4,4,4,4 + * ============================================================================= + */ +#define LASX_TRANSPOSE4x4_D(_in0, _in1, _in2, _in3, _out0, _out1, _out2, \ + _out3) \ + { \ + __m256i _tmp0, _tmp1, _tmp2, _tmp3; \ + _tmp0 = __lasx_xvilvl_d(_in1, _in0); \ + _tmp1 = __lasx_xvilvh_d(_in1, _in0); \ + _tmp2 = __lasx_xvilvl_d(_in3, _in2); \ + _tmp3 = __lasx_xvilvh_d(_in3, _in2); \ + _out0 = __lasx_xvpermi_q(_tmp2, _tmp0, 0x20); \ + _out2 = __lasx_xvpermi_q(_tmp2, _tmp0, 0x31); \ + _out1 = __lasx_xvpermi_q(_tmp3, _tmp1, 0x20); \ + _out3 = __lasx_xvpermi_q(_tmp3, _tmp1, 0x31); \ + } + +/* + * ============================================================================= + * Description : Transpose 8x8 block with word elements in vectors + * Arguments : Inputs - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7 + * Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6, + * _out7 + * Example : LASX_TRANSPOSE8x8_W + * _in0 : 1,2,3,4,5,6,7,8 + * _in1 : 2,2,3,4,5,6,7,8 + * _in2 : 3,2,3,4,5,6,7,8 + * _in3 : 4,2,3,4,5,6,7,8 + * _in4 : 5,2,3,4,5,6,7,8 + * _in5 : 6,2,3,4,5,6,7,8 + * _in6 : 7,2,3,4,5,6,7,8 + * _in7 : 8,2,3,4,5,6,7,8 + * + * _out0 : 1,2,3,4,5,6,7,8 + * _out1 : 2,2,2,2,2,2,2,2 + * _out2 : 3,3,3,3,3,3,3,3 + * _out3 : 4,4,4,4,4,4,4,4 + * _out4 : 5,5,5,5,5,5,5,5 + * _out5 : 6,6,6,6,6,6,6,6 + * _out6 : 7,7,7,7,7,7,7,7 + * _out7 : 8,8,8,8,8,8,8,8 + * ============================================================================= + */ +#define LASX_TRANSPOSE8x8_W(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ + _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ + _out7) \ + { \ + __m256i _s0_m, _s1_m; \ + __m256i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m; \ + __m256i _tmp4_m, _tmp5_m, _tmp6_m, _tmp7_m; \ + \ + _s0_m = __lasx_xvilvl_w(_in2, _in0); \ + _s1_m = __lasx_xvilvl_w(_in3, _in1); \ + _tmp0_m = __lasx_xvilvl_w(_s1_m, _s0_m); \ + _tmp1_m = __lasx_xvilvh_w(_s1_m, _s0_m); \ + _s0_m = __lasx_xvilvh_w(_in2, _in0); \ + _s1_m = __lasx_xvilvh_w(_in3, _in1); \ + _tmp2_m = __lasx_xvilvl_w(_s1_m, _s0_m); \ + _tmp3_m = __lasx_xvilvh_w(_s1_m, _s0_m); \ + _s0_m = __lasx_xvilvl_w(_in6, _in4); \ + _s1_m = __lasx_xvilvl_w(_in7, _in5); \ + _tmp4_m = __lasx_xvilvl_w(_s1_m, _s0_m); \ + _tmp5_m = __lasx_xvilvh_w(_s1_m, _s0_m); \ + _s0_m = __lasx_xvilvh_w(_in6, _in4); \ + _s1_m = __lasx_xvilvh_w(_in7, _in5); \ + _tmp6_m = __lasx_xvilvl_w(_s1_m, _s0_m); \ + _tmp7_m = __lasx_xvilvh_w(_s1_m, _s0_m); \ + _out0 = __lasx_xvpermi_q(_tmp4_m, _tmp0_m, 0x20); \ + _out1 = __lasx_xvpermi_q(_tmp5_m, _tmp1_m, 0x20); \ + _out2 = __lasx_xvpermi_q(_tmp6_m, _tmp2_m, 0x20); \ + _out3 = __lasx_xvpermi_q(_tmp7_m, _tmp3_m, 0x20); \ + _out4 = __lasx_xvpermi_q(_tmp4_m, _tmp0_m, 0x31); \ + _out5 = __lasx_xvpermi_q(_tmp5_m, _tmp1_m, 0x31); \ + _out6 = __lasx_xvpermi_q(_tmp6_m, _tmp2_m, 0x31); \ + _out7 = __lasx_xvpermi_q(_tmp7_m, _tmp3_m, 0x31); \ + } + +/* + * ============================================================================= + * Description : Transpose input 16x8 byte block + * Arguments : Inputs - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, + * _in8, _in9, _in10, _in11, _in12, _in13, _in14, _in15 + * (input 16x8 byte block) + * Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6, + * _out7 (output 8x16 byte block) + * Details : The rows of the matrix become columns, and the columns become + * rows. + * Example : See LASX_TRANSPOSE16x8_H + * ============================================================================= + */ +#define LASX_TRANSPOSE16x8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ + _in8, _in9, _in10, _in11, _in12, _in13, _in14, \ + _in15, _out0, _out1, _out2, _out3, _out4, _out5, \ + _out6, _out7) \ + { \ + __m256i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m; \ + __m256i _tmp4_m, _tmp5_m, _tmp6_m, _tmp7_m; \ + \ + _tmp0_m = __lasx_xvilvl_b(_in2, _in0); \ + _tmp1_m = __lasx_xvilvl_b(_in3, _in1); \ + _tmp2_m = __lasx_xvilvl_b(_in6, _in4); \ + _tmp3_m = __lasx_xvilvl_b(_in7, _in5); \ + _tmp4_m = __lasx_xvilvl_b(_in10, _in8); \ + _tmp5_m = __lasx_xvilvl_b(_in11, _in9); \ + _tmp6_m = __lasx_xvilvl_b(_in14, _in12); \ + _tmp7_m = __lasx_xvilvl_b(_in15, _in13); \ + _out0 = __lasx_xvilvl_b(_tmp1_m, _tmp0_m); \ + _out1 = __lasx_xvilvh_b(_tmp1_m, _tmp0_m); \ + _out2 = __lasx_xvilvl_b(_tmp3_m, _tmp2_m); \ + _out3 = __lasx_xvilvh_b(_tmp3_m, _tmp2_m); \ + _out4 = __lasx_xvilvl_b(_tmp5_m, _tmp4_m); \ + _out5 = __lasx_xvilvh_b(_tmp5_m, _tmp4_m); \ + _out6 = __lasx_xvilvl_b(_tmp7_m, _tmp6_m); \ + _out7 = __lasx_xvilvh_b(_tmp7_m, _tmp6_m); \ + _tmp0_m = __lasx_xvilvl_w(_out2, _out0); \ + _tmp2_m = __lasx_xvilvh_w(_out2, _out0); \ + _tmp4_m = __lasx_xvilvl_w(_out3, _out1); \ + _tmp6_m = __lasx_xvilvh_w(_out3, _out1); \ + _tmp1_m = __lasx_xvilvl_w(_out6, _out4); \ + _tmp3_m = __lasx_xvilvh_w(_out6, _out4); \ + _tmp5_m = __lasx_xvilvl_w(_out7, _out5); \ + _tmp7_m = __lasx_xvilvh_w(_out7, _out5); \ + _out0 = __lasx_xvilvl_d(_tmp1_m, _tmp0_m); \ + _out1 = __lasx_xvilvh_d(_tmp1_m, _tmp0_m); \ + _out2 = __lasx_xvilvl_d(_tmp3_m, _tmp2_m); \ + _out3 = __lasx_xvilvh_d(_tmp3_m, _tmp2_m); \ + _out4 = __lasx_xvilvl_d(_tmp5_m, _tmp4_m); \ + _out5 = __lasx_xvilvh_d(_tmp5_m, _tmp4_m); \ + _out6 = __lasx_xvilvl_d(_tmp7_m, _tmp6_m); \ + _out7 = __lasx_xvilvh_d(_tmp7_m, _tmp6_m); \ + } + +/* + * ============================================================================= + * Description : Transpose input 16x8 byte block + * Arguments : Inputs - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, + * _in8, _in9, _in10, _in11, _in12, _in13, _in14, _in15 + * (input 16x8 byte block) + * Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6, + * _out7 (output 8x16 byte block) + * Details : The rows of the matrix become columns, and the columns become + * rows. + * Example : LASX_TRANSPOSE16x8_H + * _in0 : 1,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 + * _in1 : 2,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 + * _in2 : 3,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 + * _in3 : 4,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 + * _in4 : 5,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 + * _in5 : 6,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 + * _in6 : 7,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 + * _in7 : 8,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 + * _in8 : 9,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 + * _in9 : 1,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 + * _in10 : 0,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 + * _in11 : 2,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 + * _in12 : 3,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 + * _in13 : 7,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 + * _in14 : 5,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 + * _in15 : 6,2,3,4,5,6,7,8,0,0,0,0,0,0,0,0 + * + * _out0 : 1,2,3,4,5,6,7,8,9,1,0,2,3,7,5,6 + * _out1 : 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2 + * _out2 : 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3 + * _out3 : 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4 + * _out4 : 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 + * _out5 : 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 + * _out6 : 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 + * _out7 : 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 + * ============================================================================= + */ +#define LASX_TRANSPOSE16x8_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ + _in8, _in9, _in10, _in11, _in12, _in13, _in14, \ + _in15, _out0, _out1, _out2, _out3, _out4, _out5, \ + _out6, _out7) \ + { \ + __m256i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m; \ + __m256i _tmp4_m, _tmp5_m, _tmp6_m, _tmp7_m; \ + __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; \ + \ + _tmp0_m = __lasx_xvilvl_h(_in2, _in0); \ + _tmp1_m = __lasx_xvilvl_h(_in3, _in1); \ + _tmp2_m = __lasx_xvilvl_h(_in6, _in4); \ + _tmp3_m = __lasx_xvilvl_h(_in7, _in5); \ + _tmp4_m = __lasx_xvilvl_h(_in10, _in8); \ + _tmp5_m = __lasx_xvilvl_h(_in11, _in9); \ + _tmp6_m = __lasx_xvilvl_h(_in14, _in12); \ + _tmp7_m = __lasx_xvilvl_h(_in15, _in13); \ + _t0 = __lasx_xvilvl_h(_tmp1_m, _tmp0_m); \ + _t1 = __lasx_xvilvh_h(_tmp1_m, _tmp0_m); \ + _t2 = __lasx_xvilvl_h(_tmp3_m, _tmp2_m); \ + _t3 = __lasx_xvilvh_h(_tmp3_m, _tmp2_m); \ + _t4 = __lasx_xvilvl_h(_tmp5_m, _tmp4_m); \ + _t5 = __lasx_xvilvh_h(_tmp5_m, _tmp4_m); \ + _t6 = __lasx_xvilvl_h(_tmp7_m, _tmp6_m); \ + _t7 = __lasx_xvilvh_h(_tmp7_m, _tmp6_m); \ + _tmp0_m = __lasx_xvilvl_d(_t2, _t0); \ + _tmp2_m = __lasx_xvilvh_d(_t2, _t0); \ + _tmp4_m = __lasx_xvilvl_d(_t3, _t1); \ + _tmp6_m = __lasx_xvilvh_d(_t3, _t1); \ + _tmp1_m = __lasx_xvilvl_d(_t6, _t4); \ + _tmp3_m = __lasx_xvilvh_d(_t6, _t4); \ + _tmp5_m = __lasx_xvilvl_d(_t7, _t5); \ + _tmp7_m = __lasx_xvilvh_d(_t7, _t5); \ + _out0 = __lasx_xvpermi_q(_tmp1_m, _tmp0_m, 0x20); \ + _out1 = __lasx_xvpermi_q(_tmp3_m, _tmp2_m, 0x20); \ + _out2 = __lasx_xvpermi_q(_tmp5_m, _tmp4_m, 0x20); \ + _out3 = __lasx_xvpermi_q(_tmp7_m, _tmp6_m, 0x20); \ + \ + _tmp0_m = __lasx_xvilvh_h(_in2, _in0); \ + _tmp1_m = __lasx_xvilvh_h(_in3, _in1); \ + _tmp2_m = __lasx_xvilvh_h(_in6, _in4); \ + _tmp3_m = __lasx_xvilvh_h(_in7, _in5); \ + _tmp4_m = __lasx_xvilvh_h(_in10, _in8); \ + _tmp5_m = __lasx_xvilvh_h(_in11, _in9); \ + _tmp6_m = __lasx_xvilvh_h(_in14, _in12); \ + _tmp7_m = __lasx_xvilvh_h(_in15, _in13); \ + _t0 = __lasx_xvilvl_h(_tmp1_m, _tmp0_m); \ + _t1 = __lasx_xvilvh_h(_tmp1_m, _tmp0_m); \ + _t2 = __lasx_xvilvl_h(_tmp3_m, _tmp2_m); \ + _t3 = __lasx_xvilvh_h(_tmp3_m, _tmp2_m); \ + _t4 = __lasx_xvilvl_h(_tmp5_m, _tmp4_m); \ + _t5 = __lasx_xvilvh_h(_tmp5_m, _tmp4_m); \ + _t6 = __lasx_xvilvl_h(_tmp7_m, _tmp6_m); \ + _t7 = __lasx_xvilvh_h(_tmp7_m, _tmp6_m); \ + _tmp0_m = __lasx_xvilvl_d(_t2, _t0); \ + _tmp2_m = __lasx_xvilvh_d(_t2, _t0); \ + _tmp4_m = __lasx_xvilvl_d(_t3, _t1); \ + _tmp6_m = __lasx_xvilvh_d(_t3, _t1); \ + _tmp1_m = __lasx_xvilvl_d(_t6, _t4); \ + _tmp3_m = __lasx_xvilvh_d(_t6, _t4); \ + _tmp5_m = __lasx_xvilvl_d(_t7, _t5); \ + _tmp7_m = __lasx_xvilvh_d(_t7, _t5); \ + _out4 = __lasx_xvpermi_q(_tmp1_m, _tmp0_m, 0x20); \ + _out5 = __lasx_xvpermi_q(_tmp3_m, _tmp2_m, 0x20); \ + _out6 = __lasx_xvpermi_q(_tmp5_m, _tmp4_m, 0x20); \ + _out7 = __lasx_xvpermi_q(_tmp7_m, _tmp6_m, 0x20); \ + } + +/* + * ============================================================================= + * Description : Transpose 4x4 block with halfword elements in vectors + * Arguments : Inputs - _in0, _in1, _in2, _in3 + * Outputs - _out0, _out1, _out2, _out3 + * Return Type - signed halfword + * Details : The rows of the matrix become columns, and the columns become + * rows. + * Example : See LASX_TRANSPOSE8x8_H + * ============================================================================= + */ +#define LASX_TRANSPOSE4x4_H(_in0, _in1, _in2, _in3, _out0, _out1, _out2, \ + _out3) \ + { \ + __m256i _s0_m, _s1_m; \ + \ + _s0_m = __lasx_xvilvl_h(_in1, _in0); \ + _s1_m = __lasx_xvilvl_h(_in3, _in2); \ + _out0 = __lasx_xvilvl_w(_s1_m, _s0_m); \ + _out2 = __lasx_xvilvh_w(_s1_m, _s0_m); \ + _out1 = __lasx_xvilvh_d(_out0, _out0); \ + _out3 = __lasx_xvilvh_d(_out2, _out2); \ + } + +/* + * ============================================================================= + * Description : Transpose input 8x8 byte block + * Arguments : Inputs - _in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7 + * (input 8x8 byte block) + * Outputs - _out0, _out1, _out2, _out3, _out4, _out5, _out6, + * _out7 (output 8x8 byte block) + * Example : See LASX_TRANSPOSE8x8_H + * ============================================================================= + */ +#define LASX_TRANSPOSE8x8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ + _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ + _out7) \ + { \ + __m256i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m; \ + __m256i _tmp4_m, _tmp5_m, _tmp6_m, _tmp7_m; \ + _tmp0_m = __lasx_xvilvl_b(_in2, _in0); \ + _tmp1_m = __lasx_xvilvl_b(_in3, _in1); \ + _tmp2_m = __lasx_xvilvl_b(_in6, _in4); \ + _tmp3_m = __lasx_xvilvl_b(_in7, _in5); \ + _tmp4_m = __lasx_xvilvl_b(_tmp1_m, _tmp0_m); \ + _tmp5_m = __lasx_xvilvh_b(_tmp1_m, _tmp0_m); \ + _tmp6_m = __lasx_xvilvl_b(_tmp3_m, _tmp2_m); \ + _tmp7_m = __lasx_xvilvh_b(_tmp3_m, _tmp2_m); \ + _out0 = __lasx_xvilvl_w(_tmp6_m, _tmp4_m); \ + _out2 = __lasx_xvilvh_w(_tmp6_m, _tmp4_m); \ + _out4 = __lasx_xvilvl_w(_tmp7_m, _tmp5_m); \ + _out6 = __lasx_xvilvh_w(_tmp7_m, _tmp5_m); \ + _out1 = __lasx_xvbsrl_v(_out0, 8); \ + _out3 = __lasx_xvbsrl_v(_out2, 8); \ + _out5 = __lasx_xvbsrl_v(_out4, 8); \ + _out7 = __lasx_xvbsrl_v(_out6, 8); \ + } + +/* + * ============================================================================= + * Description : Transpose 8x8 block with halfword elements in vectors. + * Arguments : Inputs - _in0, _in1, ~ + * Outputs - _out0, _out1, ~ + * Details : The rows of the matrix become columns, and the columns become + * rows. + * Example : LASX_TRANSPOSE8x8_H + * _in0 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 + * _in1 : 8,2,3,4, 5,6,7,8, 8,2,3,4, 5,6,7,8 + * _in2 : 8,2,3,4, 5,6,7,8, 8,2,3,4, 5,6,7,8 + * _in3 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 + * _in4 : 9,2,3,4, 5,6,7,8, 9,2,3,4, 5,6,7,8 + * _in5 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 + * _in6 : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 + * _in7 : 9,2,3,4, 5,6,7,8, 9,2,3,4, 5,6,7,8 + * + * _out0 : 1,8,8,1, 9,1,1,9, 1,8,8,1, 9,1,1,9 + * _out1 : 2,2,2,2, 2,2,2,2, 2,2,2,2, 2,2,2,2 + * _out2 : 3,3,3,3, 3,3,3,3, 3,3,3,3, 3,3,3,3 + * _out3 : 4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4,4,4 + * _out4 : 5,5,5,5, 5,5,5,5, 5,5,5,5, 5,5,5,5 + * _out5 : 6,6,6,6, 6,6,6,6, 6,6,6,6, 6,6,6,6 + * _out6 : 7,7,7,7, 7,7,7,7, 7,7,7,7, 7,7,7,7 + * _out7 : 8,8,8,8, 8,8,8,8, 8,8,8,8, 8,8,8,8 + * ============================================================================= + */ +#define LASX_TRANSPOSE8x8_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ + _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ + _out7) \ + { \ + __m256i _s0_m, _s1_m; \ + __m256i _tmp0_m, _tmp1_m, _tmp2_m, _tmp3_m; \ + __m256i _tmp4_m, _tmp5_m, _tmp6_m, _tmp7_m; \ + \ + _s0_m = __lasx_xvilvl_h(_in6, _in4); \ + _s1_m = __lasx_xvilvl_h(_in7, _in5); \ + _tmp0_m = __lasx_xvilvl_h(_s1_m, _s0_m); \ + _tmp1_m = __lasx_xvilvh_h(_s1_m, _s0_m); \ + _s0_m = __lasx_xvilvh_h(_in6, _in4); \ + _s1_m = __lasx_xvilvh_h(_in7, _in5); \ + _tmp2_m = __lasx_xvilvl_h(_s1_m, _s0_m); \ + _tmp3_m = __lasx_xvilvh_h(_s1_m, _s0_m); \ + \ + _s0_m = __lasx_xvilvl_h(_in2, _in0); \ + _s1_m = __lasx_xvilvl_h(_in3, _in1); \ + _tmp4_m = __lasx_xvilvl_h(_s1_m, _s0_m); \ + _tmp5_m = __lasx_xvilvh_h(_s1_m, _s0_m); \ + _s0_m = __lasx_xvilvh_h(_in2, _in0); \ + _s1_m = __lasx_xvilvh_h(_in3, _in1); \ + _tmp6_m = __lasx_xvilvl_h(_s1_m, _s0_m); \ + _tmp7_m = __lasx_xvilvh_h(_s1_m, _s0_m); \ + \ + _out0 = __lasx_xvpickev_d(_tmp0_m, _tmp4_m); \ + _out2 = __lasx_xvpickev_d(_tmp1_m, _tmp5_m); \ + _out4 = __lasx_xvpickev_d(_tmp2_m, _tmp6_m); \ + _out6 = __lasx_xvpickev_d(_tmp3_m, _tmp7_m); \ + _out1 = __lasx_xvpickod_d(_tmp0_m, _tmp4_m); \ + _out3 = __lasx_xvpickod_d(_tmp1_m, _tmp5_m); \ + _out5 = __lasx_xvpickod_d(_tmp2_m, _tmp6_m); \ + _out7 = __lasx_xvpickod_d(_tmp3_m, _tmp7_m); \ + } + +/* + * ============================================================================= + * Description : Butterfly of 4 input vectors + * Arguments : Inputs - _in0, _in1, _in2, _in3 + * Outputs - _out0, _out1, _out2, _out3 + * Details : Butterfly operation + * Example : LASX_BUTTERFLY_4 + * _out0 = _in0 + _in3; + * _out1 = _in1 + _in2; + * _out2 = _in1 - _in2; + * _out3 = _in0 - _in3; + * ============================================================================= + */ +#define LASX_BUTTERFLY_4_B(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \ + { \ + _out0 = __lasx_xvadd_b(_in0, _in3); \ + _out1 = __lasx_xvadd_b(_in1, _in2); \ + _out2 = __lasx_xvsub_b(_in1, _in2); \ + _out3 = __lasx_xvsub_b(_in0, _in3); \ + } +#define LASX_BUTTERFLY_4_H(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \ + { \ + _out0 = __lasx_xvadd_h(_in0, _in3); \ + _out1 = __lasx_xvadd_h(_in1, _in2); \ + _out2 = __lasx_xvsub_h(_in1, _in2); \ + _out3 = __lasx_xvsub_h(_in0, _in3); \ + } +#define LASX_BUTTERFLY_4_W(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \ + { \ + _out0 = __lasx_xvadd_w(_in0, _in3); \ + _out1 = __lasx_xvadd_w(_in1, _in2); \ + _out2 = __lasx_xvsub_w(_in1, _in2); \ + _out3 = __lasx_xvsub_w(_in0, _in3); \ + } +#define LASX_BUTTERFLY_4_D(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \ + { \ + _out0 = __lasx_xvadd_d(_in0, _in3); \ + _out1 = __lasx_xvadd_d(_in1, _in2); \ + _out2 = __lasx_xvsub_d(_in1, _in2); \ + _out3 = __lasx_xvsub_d(_in0, _in3); \ + } + +/* + * ============================================================================= + * Description : Butterfly of 8 input vectors + * Arguments : Inputs - _in0, _in1, _in2, _in3, ~ + * Outputs - _out0, _out1, _out2, _out3, ~ + * Details : Butterfly operation + * Example : LASX_BUTTERFLY_8 + * _out0 = _in0 + _in7; + * _out1 = _in1 + _in6; + * _out2 = _in2 + _in5; + * _out3 = _in3 + _in4; + * _out4 = _in3 - _in4; + * _out5 = _in2 - _in5; + * _out6 = _in1 - _in6; + * _out7 = _in0 - _in7; + * ============================================================================= + */ +#define LASX_BUTTERFLY_8_B(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ + _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ + _out7) \ + { \ + _out0 = __lasx_xvadd_b(_in0, _in7); \ + _out1 = __lasx_xvadd_b(_in1, _in6); \ + _out2 = __lasx_xvadd_b(_in2, _in5); \ + _out3 = __lasx_xvadd_b(_in3, _in4); \ + _out4 = __lasx_xvsub_b(_in3, _in4); \ + _out5 = __lasx_xvsub_b(_in2, _in5); \ + _out6 = __lasx_xvsub_b(_in1, _in6); \ + _out7 = __lasx_xvsub_b(_in0, _in7); \ + } + +#define LASX_BUTTERFLY_8_H(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ + _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ + _out7) \ + { \ + _out0 = __lasx_xvadd_h(_in0, _in7); \ + _out1 = __lasx_xvadd_h(_in1, _in6); \ + _out2 = __lasx_xvadd_h(_in2, _in5); \ + _out3 = __lasx_xvadd_h(_in3, _in4); \ + _out4 = __lasx_xvsub_h(_in3, _in4); \ + _out5 = __lasx_xvsub_h(_in2, _in5); \ + _out6 = __lasx_xvsub_h(_in1, _in6); \ + _out7 = __lasx_xvsub_h(_in0, _in7); \ + } + +#define LASX_BUTTERFLY_8_W(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ + _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ + _out7) \ + { \ + _out0 = __lasx_xvadd_w(_in0, _in7); \ + _out1 = __lasx_xvadd_w(_in1, _in6); \ + _out2 = __lasx_xvadd_w(_in2, _in5); \ + _out3 = __lasx_xvadd_w(_in3, _in4); \ + _out4 = __lasx_xvsub_w(_in3, _in4); \ + _out5 = __lasx_xvsub_w(_in2, _in5); \ + _out6 = __lasx_xvsub_w(_in1, _in6); \ + _out7 = __lasx_xvsub_w(_in0, _in7); \ + } + +#define LASX_BUTTERFLY_8_D(_in0, _in1, _in2, _in3, _in4, _in5, _in6, _in7, \ + _out0, _out1, _out2, _out3, _out4, _out5, _out6, \ + _out7) \ + { \ + _out0 = __lasx_xvadd_d(_in0, _in7); \ + _out1 = __lasx_xvadd_d(_in1, _in6); \ + _out2 = __lasx_xvadd_d(_in2, _in5); \ + _out3 = __lasx_xvadd_d(_in3, _in4); \ + _out4 = __lasx_xvsub_d(_in3, _in4); \ + _out5 = __lasx_xvsub_d(_in2, _in5); \ + _out6 = __lasx_xvsub_d(_in1, _in6); \ + _out7 = __lasx_xvsub_d(_in0, _in7); \ + } + +#endif // LASX + +/* + * ============================================================================= + * Description : Print out elements in vector. + * Arguments : Inputs - RTYPE, _element_num, _in0, _enter + * Outputs - + * Details : Print out '_element_num' elements in 'RTYPE' vector '_in0', if + * '_enter' is TRUE, prefix "\nVP:" will be added first. + * Example : VECT_PRINT(v4i32,4,in0,1); // in0: 1,2,3,4 + * VP:1,2,3,4, + * ============================================================================= + */ +#define VECT_PRINT(RTYPE, element_num, in0, enter) \ + { \ + RTYPE _tmp0 = (RTYPE)in0; \ + int _i = 0; \ + if (enter) \ + printf("\nVP:"); \ + for (_i = 0; _i < element_num; _i++) \ + printf("%d,", _tmp0[_i]); \ + } + +#endif /* LOONGSON_INTRINSICS_H */ +#endif /* INCLUDE_LIBYUV_LOONGSON_INTRINSICS_H */ diff --git a/third_party/libyuv/include/libyuv/macros_msa.h b/third_party/libyuv/include/libyuv/macros_msa.h index 92ed21c385..b9a44fcced 100644 --- a/third_party/libyuv/include/libyuv/macros_msa.h +++ b/third_party/libyuv/include/libyuv/macros_msa.h @@ -12,15 +12,153 @@ #define INCLUDE_LIBYUV_MACROS_MSA_H_ #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) -#include #include +#include + +#if (__mips_isa_rev >= 6) +#define LW(psrc) \ + ({ \ + const uint8_t* psrc_lw_m = (const uint8_t*)(psrc); \ + uint32_t val_m; \ + asm volatile("lw %[val_m], %[psrc_lw_m] \n" \ + : [val_m] "=r"(val_m) \ + : [psrc_lw_m] "m"(*psrc_lw_m)); \ + val_m; \ + }) + +#if (__mips == 64) +#define LD(psrc) \ + ({ \ + const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \ + uint64_t val_m = 0; \ + asm volatile("ld %[val_m], %[psrc_ld_m] \n" \ + : [val_m] "=r"(val_m) \ + : [psrc_ld_m] "m"(*psrc_ld_m)); \ + val_m; \ + }) +#else // !(__mips == 64) +#define LD(psrc) \ + ({ \ + const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \ + uint32_t val0_m, val1_m; \ + uint64_t val_m = 0; \ + val0_m = LW(psrc_ld_m); \ + val1_m = LW(psrc_ld_m + 4); \ + val_m = (uint64_t)(val1_m); /* NOLINT */ \ + val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ + val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \ + val_m; \ + }) +#endif // (__mips == 64) + +#define SW(val, pdst) \ + ({ \ + uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \ + uint32_t val_m = (val); \ + asm volatile("sw %[val_m], %[pdst_sw_m] \n" \ + : [pdst_sw_m] "=m"(*pdst_sw_m) \ + : [val_m] "r"(val_m)); \ + }) + +#if (__mips == 64) +#define SD(val, pdst) \ + ({ \ + uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \ + uint64_t val_m = (val); \ + asm volatile("sd %[val_m], %[pdst_sd_m] \n" \ + : [pdst_sd_m] "=m"(*pdst_sd_m) \ + : [val_m] "r"(val_m)); \ + }) +#else // !(__mips == 64) +#define SD(val, pdst) \ + ({ \ + uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \ + uint32_t val0_m, val1_m; \ + val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ + val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ + SW(val0_m, pdst_sd_m); \ + SW(val1_m, pdst_sd_m + 4); \ + }) +#endif // !(__mips == 64) +#else // !(__mips_isa_rev >= 6) +#define LW(psrc) \ + ({ \ + uint8_t* psrc_lw_m = (uint8_t*)(psrc); \ + uint32_t val_lw_m; \ + \ + __asm__ volatile( \ + "lwr %[val_lw_m], 0(%[psrc_lw_m]) \n\t" \ + "lwl %[val_lw_m], 3(%[psrc_lw_m]) \n\t" \ + \ + : [val_lw_m] "=&r"(val_lw_m) \ + : [psrc_lw_m] "r"(psrc_lw_m)); \ + \ + val_lw_m; \ + }) -#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */ -#define LD_UB(...) LD_B(v16u8, __VA_ARGS__) +#if (__mips == 64) +#define LD(psrc) \ + ({ \ + uint8_t* psrc_ld_m = (uint8_t*)(psrc); \ + uint64_t val_ld_m = 0; \ + \ + __asm__ volatile( \ + "ldr %[val_ld_m], 0(%[psrc_ld_m]) \n\t" \ + "ldl %[val_ld_m], 7(%[psrc_ld_m]) \n\t" \ + \ + : [val_ld_m] "=&r"(val_ld_m) \ + : [psrc_ld_m] "r"(psrc_ld_m)); \ + \ + val_ld_m; \ + }) +#else // !(__mips == 64) +#define LD(psrc) \ + ({ \ + const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \ + uint32_t val0_m, val1_m; \ + uint64_t val_m = 0; \ + val0_m = LW(psrc_ld_m); \ + val1_m = LW(psrc_ld_m + 4); \ + val_m = (uint64_t)(val1_m); /* NOLINT */ \ + val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ + val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \ + val_m; \ + }) +#endif // (__mips == 64) -#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ +#define SW(val, pdst) \ + ({ \ + uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \ + uint32_t val_m = (val); \ + asm volatile("usw %[val_m], %[pdst_sw_m] \n" \ + : [pdst_sw_m] "=m"(*pdst_sw_m) \ + : [val_m] "r"(val_m)); \ + }) + +#define SD(val, pdst) \ + ({ \ + uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \ + uint32_t val0_m, val1_m; \ + val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ + val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ + SW(val0_m, pdst_sd_m); \ + SW(val1_m, pdst_sd_m + 4); \ + }) +#endif // (__mips_isa_rev >= 6) + +// TODO(fbarchard): Consider removing __VAR_ARGS versions. +#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */ +#define LD_UB(...) LD_B(const v16u8, __VA_ARGS__) + +#define LD_H(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */ +#define LD_UH(...) LD_H(const v8u16, __VA_ARGS__) + +#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ #define ST_UB(...) ST_B(v16u8, __VA_ARGS__) +#define ST_H(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ +#define ST_UH(...) ST_H(v8u16, __VA_ARGS__) + /* Description : Load two vectors with 16 'byte' sized elements Arguments : Inputs - psrc, stride Outputs - out0, out1 @@ -28,17 +166,19 @@ Details : Load 16 byte elements in 'out0' from (psrc) Load 16 byte elements in 'out1' from (psrc + stride) */ -#define LD_B2(RTYPE, psrc, stride, out0, out1) { \ - out0 = LD_B(RTYPE, (psrc)); \ - out1 = LD_B(RTYPE, (psrc) + stride); \ -} -#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__) - -#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) { \ - LD_B2(RTYPE, (psrc), stride, out0, out1); \ - LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \ -} -#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__) +#define LD_B2(RTYPE, psrc, stride, out0, out1) \ + { \ + out0 = LD_B(RTYPE, (psrc)); \ + out1 = LD_B(RTYPE, (psrc) + stride); \ + } +#define LD_UB2(...) LD_B2(const v16u8, __VA_ARGS__) + +#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \ + { \ + LD_B2(RTYPE, (psrc), stride, out0, out1); \ + LD_B2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \ + } +#define LD_UB4(...) LD_B4(const v16u8, __VA_ARGS__) /* Description : Store two vectors with stride each having 16 'byte' sized elements @@ -46,18 +186,33 @@ Details : Store 16 byte elements from 'in0' to (pdst) Store 16 byte elements from 'in1' to (pdst + stride) */ -#define ST_B2(RTYPE, in0, in1, pdst, stride) { \ - ST_B(RTYPE, in0, (pdst)); \ - ST_B(RTYPE, in1, (pdst) + stride); \ -} +#define ST_B2(RTYPE, in0, in1, pdst, stride) \ + { \ + ST_B(RTYPE, in0, (pdst)); \ + ST_B(RTYPE, in1, (pdst) + stride); \ + } #define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__) -# -#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) { \ - ST_B2(RTYPE, in0, in1, (pdst), stride); \ - ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ -} + +#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \ + { \ + ST_B2(RTYPE, in0, in1, (pdst), stride); \ + ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ + } #define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__) -# + +/* Description : Store vectors of 8 halfword elements with stride + Arguments : Inputs - in0, in1, pdst, stride + Details : Store 8 halfword elements from 'in0' to (pdst) + Store 8 halfword elements from 'in1' to (pdst + stride) +*/ +#define ST_H2(RTYPE, in0, in1, pdst, stride) \ + { \ + ST_H(RTYPE, in0, (pdst)); \ + ST_H(RTYPE, in1, (pdst) + stride); \ + } +#define ST_UH2(...) ST_H2(v8u16, __VA_ARGS__) + +// TODO(fbarchard): Consider using __msa_vshf_b and __msa_ilvr_b directly. /* Description : Shuffle byte vector elements as per mask vector Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 Outputs - out0, out1 @@ -65,12 +220,27 @@ Details : Byte elements from 'in0' & 'in1' are copied selectively to 'out0' as per control vector 'mask0' */ -#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) { \ - out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \ - out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \ -} +#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ + { \ + out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0); \ + out1 = (RTYPE)__msa_vshf_b((v16i8)mask1, (v16i8)in3, (v16i8)in2); \ + } #define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__) -#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */ +/* Description : Interleave both left and right half of input vectors + Arguments : Inputs - in0, in1 + Outputs - out0, out1 + Return Type - as per RTYPE + Details : Right half of byte elements from 'in0' and 'in1' are + interleaved and written to 'out0' +*/ +#define ILVRL_B2(RTYPE, in0, in1, out0, out1) \ + { \ + out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \ + out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \ + } +#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) + +#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */ #endif // INCLUDE_LIBYUV_MACROS_MSA_H_ diff --git a/third_party/libyuv/include/libyuv/mjpeg_decoder.h b/third_party/libyuv/include/libyuv/mjpeg_decoder.h index 4975bae5b7..275f8d4c18 100644 --- a/third_party/libyuv/include/libyuv/mjpeg_decoder.h +++ b/third_party/libyuv/include/libyuv/mjpeg_decoder.h @@ -26,25 +26,24 @@ namespace libyuv { extern "C" { #endif -LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size); +LIBYUV_BOOL ValidateJpeg(const uint8_t* sample, size_t sample_size); #ifdef __cplusplus } // extern "C" #endif -static const uint32 kUnknownDataSize = 0xFFFFFFFF; +static const uint32_t kUnknownDataSize = 0xFFFFFFFF; enum JpegSubsamplingType { kJpegYuv420, kJpegYuv422, - kJpegYuv411, kJpegYuv444, kJpegYuv400, kJpegUnknown }; struct Buffer { - const uint8* data; + const uint8_t* data; int len; }; @@ -66,7 +65,7 @@ struct SetJmpErrorMgr; class LIBYUV_API MJpegDecoder { public: typedef void (*CallbackFunction)(void* opaque, - const uint8* const* data, + const uint8_t* const* data, const int* strides, int rows); @@ -86,7 +85,7 @@ class LIBYUV_API MJpegDecoder { // If return value is LIBYUV_TRUE, then the values for all the following // getters are populated. // src_len is the size of the compressed mjpeg frame in bytes. - LIBYUV_BOOL LoadFrame(const uint8* src, size_t src_len); + LIBYUV_BOOL LoadFrame(const uint8_t* src, size_t src_len); // Returns width of the last loaded frame in pixels. int GetWidth(); @@ -139,18 +138,22 @@ class LIBYUV_API MJpegDecoder { // at least GetComponentSize(i). The pointers in planes are incremented // to point to after the end of the written data. // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded. - LIBYUV_BOOL DecodeToBuffers(uint8** planes, int dst_width, int dst_height); + LIBYUV_BOOL DecodeToBuffers(uint8_t** planes, int dst_width, int dst_height); // Decodes the entire image and passes the data via repeated calls to a // callback function. Each call will get the data for a whole number of // image scanlines. // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded. - LIBYUV_BOOL DecodeToCallback(CallbackFunction fn, void* opaque, - int dst_width, int dst_height); + LIBYUV_BOOL DecodeToCallback(CallbackFunction fn, + void* opaque, + int dst_width, + int dst_height); // The helper function which recognizes the jpeg sub-sampling type. static JpegSubsamplingType JpegSubsamplingTypeHelper( - int* subsample_x, int* subsample_y, int number_of_components); + int* subsample_x, + int* subsample_y, + int number_of_components); private: void AllocOutputBuffers(int num_outbufs); @@ -159,7 +162,7 @@ class LIBYUV_API MJpegDecoder { LIBYUV_BOOL StartDecode(); LIBYUV_BOOL FinishDecode(); - void SetScanlinePointers(uint8** data); + void SetScanlinePointers(uint8_t** data); LIBYUV_BOOL DecodeImcuRow(); int GetComponentScanlinePadding(int component); @@ -178,11 +181,11 @@ class LIBYUV_API MJpegDecoder { // Temporaries used to point to scanline outputs. int num_outbufs_; // Outermost size of all arrays below. - uint8*** scanlines_; + uint8_t*** scanlines_; int* scanlines_sizes_; // Temporary buffer used for decoding when we can't decode directly to the // output buffers. Large enough for just one iMCU row. - uint8** databuf_; + uint8_t** databuf_; int* databuf_strides_; }; diff --git a/third_party/libyuv/include/libyuv/planar_functions.h b/third_party/libyuv/include/libyuv/planar_functions.h index 1b57b29261..972ca9e324 100644 --- a/third_party/libyuv/include/libyuv/planar_functions.h +++ b/third_party/libyuv/include/libyuv/planar_functions.h @@ -22,102 +22,457 @@ namespace libyuv { extern "C" { #endif +// TODO(fbarchard): Move cpu macros to row.h +#if defined(__pnacl__) || defined(__CLR_VER) || \ + (defined(__native_client__) && defined(__x86_64__)) || \ + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) +#define LIBYUV_DISABLE_X86 +#endif +// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#define LIBYUV_DISABLE_X86 +#endif +#endif +// The following are available on all x86 platforms: +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) +#define HAS_ARGBAFFINEROW_SSE2 +#endif + // Copy a plane of data. LIBYUV_API -void CopyPlane(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height); +void CopyPlane(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height); + +LIBYUV_API +void CopyPlane_16(const uint16_t* src_y, + int src_stride_y, + uint16_t* dst_y, + int dst_stride_y, + int width, + int height); LIBYUV_API -void CopyPlane_16(const uint16* src_y, int src_stride_y, - uint16* dst_y, int dst_stride_y, - int width, int height); +void Convert16To8Plane(const uint16_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int scale, // 16384 for 10 bits + int width, + int height); + +LIBYUV_API +void Convert8To16Plane(const uint8_t* src_y, + int src_stride_y, + uint16_t* dst_y, + int dst_stride_y, + int scale, // 1024 for 10 bits + int width, + int height); // Set a plane of data to a 32 bit value. LIBYUV_API -void SetPlane(uint8* dst_y, int dst_stride_y, - int width, int height, - uint32 value); +void SetPlane(uint8_t* dst_y, + int dst_stride_y, + int width, + int height, + uint32_t value); + +// Convert a plane of tiles of 16 x H to linear. +LIBYUV_API +void DetilePlane(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height, + int tile_height); + +// Convert a UV plane of tiles of 16 x H into linear U and V planes. +LIBYUV_API +void DetileSplitUVPlane(const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + int tile_height); // Split interleaved UV plane into separate U and V planes. LIBYUV_API -void SplitUVPlane(const uint8* src_uv, int src_stride_uv, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +void SplitUVPlane(const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Merge separate U and V planes into one interleaved UV plane. LIBYUV_API -void MergeUVPlane(const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_uv, int dst_stride_uv, - int width, int height); +void MergeUVPlane(const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); + +// Split interleaved msb UV plane into separate lsb U and V planes. +LIBYUV_API +void SplitUVPlane_16(const uint16_t* src_uv, + int src_stride_uv, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height, + int depth); + +// Merge separate lsb U and V planes into one interleaved msb UV plane. +LIBYUV_API +void MergeUVPlane_16(const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_uv, + int dst_stride_uv, + int width, + int height, + int depth); + +// Convert lsb plane to msb plane +LIBYUV_API +void ConvertToMSBPlane_16(const uint16_t* src_y, + int src_stride_y, + uint16_t* dst_y, + int dst_stride_y, + int width, + int height, + int depth); + +// Convert msb plane to lsb plane +LIBYUV_API +void ConvertToLSBPlane_16(const uint16_t* src_y, + int src_stride_y, + uint16_t* dst_y, + int dst_stride_y, + int width, + int height, + int depth); + +// Scale U and V to half width and height and merge into interleaved UV plane. +// width and height are source size, allowing odd sizes. +// Use for converting I444 or I422 to NV12. +LIBYUV_API +void HalfMergeUVPlane(const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); + +// Swap U and V channels in interleaved UV plane. +LIBYUV_API +void SwapUVPlane(const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height); + +// Split interleaved RGB plane into separate R, G and B planes. +LIBYUV_API +void SplitRGBPlane(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_r, + int dst_stride_r, + uint8_t* dst_g, + int dst_stride_g, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height); + +// Merge separate R, G and B planes into one interleaved RGB plane. +LIBYUV_API +void MergeRGBPlane(const uint8_t* src_r, + int src_stride_r, + const uint8_t* src_g, + int src_stride_g, + const uint8_t* src_b, + int src_stride_b, + uint8_t* dst_rgb, + int dst_stride_rgb, + int width, + int height); + +// Split interleaved ARGB plane into separate R, G, B and A planes. +// dst_a can be NULL to discard alpha plane. +LIBYUV_API +void SplitARGBPlane(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_r, + int dst_stride_r, + uint8_t* dst_g, + int dst_stride_g, + uint8_t* dst_b, + int dst_stride_b, + uint8_t* dst_a, + int dst_stride_a, + int width, + int height); + +// Merge separate R, G, B and A planes into one interleaved ARGB plane. +// src_a can be NULL to fill opaque value to alpha. +LIBYUV_API +void MergeARGBPlane(const uint8_t* src_r, + int src_stride_r, + const uint8_t* src_g, + int src_stride_g, + const uint8_t* src_b, + int src_stride_b, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +// Merge separate 'depth' bit R, G and B planes stored in lsb +// into one interleaved XR30 plane. +// depth should in range [10, 16] +LIBYUV_API +void MergeXR30Plane(const uint16_t* src_r, + int src_stride_r, + const uint16_t* src_g, + int src_stride_g, + const uint16_t* src_b, + int src_stride_b, + uint8_t* dst_ar30, + int dst_stride_ar30, + int width, + int height, + int depth); + +// Merge separate 'depth' bit R, G, B and A planes stored in lsb +// into one interleaved AR64 plane. +// src_a can be NULL to fill opaque value to alpha. +// depth should in range [1, 16] +LIBYUV_API +void MergeAR64Plane(const uint16_t* src_r, + int src_stride_r, + const uint16_t* src_g, + int src_stride_g, + const uint16_t* src_b, + int src_stride_b, + const uint16_t* src_a, + int src_stride_a, + uint16_t* dst_ar64, + int dst_stride_ar64, + int width, + int height, + int depth); + +// Merge separate 'depth' bit R, G, B and A planes stored in lsb +// into one interleaved ARGB plane. +// src_a can be NULL to fill opaque value to alpha. +// depth should in range [8, 16] +LIBYUV_API +void MergeARGB16To8Plane(const uint16_t* src_r, + int src_stride_r, + const uint16_t* src_g, + int src_stride_g, + const uint16_t* src_b, + int src_stride_b, + const uint16_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height, + int depth); // Copy I400. Supports inverting. LIBYUV_API -int I400ToI400(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height); +int I400ToI400(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height); #define J400ToJ400 I400ToI400 // Copy I422 to I422. #define I422ToI422 I422Copy LIBYUV_API -int I422Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int I422Copy(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Copy I444 to I444. #define I444ToI444 I444Copy LIBYUV_API -int I444Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int I444Copy(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Copy I210 to I210. +#define I210ToI210 I210Copy +LIBYUV_API +int I210Copy(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height); + +// Copy NV12. Supports inverting. +int NV12Copy(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); + +// Copy NV21. Supports inverting. +int NV21Copy(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height); // Convert YUY2 to I422. LIBYUV_API -int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int YUY2ToI422(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Convert UYVY to I422. LIBYUV_API -int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int UYVYToI422(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); + +LIBYUV_API +int YUY2ToNV12(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); LIBYUV_API -int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height); +int UYVYToNV12(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); + +// Convert NV21 to NV12. +LIBYUV_API +int NV21ToNV12(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_vu, + int src_stride_vu, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); LIBYUV_API -int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height); +int YUY2ToY(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height); // Convert I420 to I400. (calls CopyPlane ignoring u/v). LIBYUV_API -int I420ToI400(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - int width, int height); +int I420ToI400(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height); // Alias #define J420ToJ400 I420ToI400 @@ -125,13 +480,20 @@ int I420ToI400(const uint8* src_y, int src_stride_y, // I420 mirror. LIBYUV_API -int I420Mirror(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int I420Mirror(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Alias #define I400ToI400Mirror I400Mirror @@ -139,87 +501,134 @@ int I420Mirror(const uint8* src_y, int src_stride_y, // I400 mirror. A single plane is mirrored horizontally. // Pass negative height to achieve 180 degree rotation. LIBYUV_API -int I400Mirror(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height); +int I400Mirror(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height); + +// Alias +#define NV12ToNV12Mirror NV12Mirror + +// NV12 mirror. +LIBYUV_API +int NV12Mirror(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); // Alias #define ARGBToARGBMirror ARGBMirror // ARGB mirror. LIBYUV_API -int ARGBMirror(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGBMirror(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); -// Convert NV12 to RGB565. -LIBYUV_API -int NV12ToRGB565(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_rgb565, int dst_stride_rgb565, - int width, int height); +// Alias +#define RGB24ToRGB24Mirror RGB24Mirror -// I422ToARGB is in convert_argb.h -// Convert I422 to BGRA. +// RGB24 mirror. LIBYUV_API -int I422ToBGRA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_bgra, int dst_stride_bgra, - int width, int height); - -// Convert I422 to ABGR. +int RGB24Mirror(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height); + +// Mirror a plane of data. LIBYUV_API -int I422ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height); - -// Convert I422 to RGBA. +void MirrorPlane(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height); + +// Mirror a plane of UV data. LIBYUV_API -int I422ToRGBA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgba, int dst_stride_rgba, - int width, int height); +void MirrorUVPlane(const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height); // Alias #define RGB24ToRAW RAWToRGB24 LIBYUV_API -int RAWToRGB24(const uint8* src_raw, int src_stride_raw, - uint8* dst_rgb24, int dst_stride_rgb24, - int width, int height); +int RAWToRGB24(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_rgb24, + int dst_stride_rgb24, + int width, + int height); // Draw a rectangle into I420. LIBYUV_API -int I420Rect(uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int x, int y, int width, int height, - int value_y, int value_u, int value_v); +int I420Rect(uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int x, + int y, + int width, + int height, + int value_y, + int value_u, + int value_v); // Draw a rectangle into ARGB. LIBYUV_API -int ARGBRect(uint8* dst_argb, int dst_stride_argb, - int x, int y, int width, int height, uint32 value); +int ARGBRect(uint8_t* dst_argb, + int dst_stride_argb, + int dst_x, + int dst_y, + int width, + int height, + uint32_t value); // Convert ARGB to gray scale ARGB. LIBYUV_API -int ARGBGrayTo(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGBGrayTo(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Make a rectangle of ARGB gray scale. LIBYUV_API -int ARGBGray(uint8* dst_argb, int dst_stride_argb, - int x, int y, int width, int height); +int ARGBGray(uint8_t* dst_argb, + int dst_stride_argb, + int dst_x, + int dst_y, + int width, + int height); // Make a rectangle of ARGB Sepia tone. LIBYUV_API -int ARGBSepia(uint8* dst_argb, int dst_stride_argb, - int x, int y, int width, int height); +int ARGBSepia(uint8_t* dst_argb, + int dst_stride_argb, + int dst_x, + int dst_y, + int width, + int height); // Apply a matrix rotation to each ARGB pixel. // matrix_argb is 4 signed ARGB values. -128 to 127 representing -2 to 2. @@ -228,10 +637,13 @@ int ARGBSepia(uint8* dst_argb, int dst_stride_argb, // The next 4 coefficients apply to B, G, R, A and produce R of the output. // The last 4 coefficients apply to B, G, R, A and produce A of the output. LIBYUV_API -int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - const int8* matrix_argb, - int width, int height); +int ARGBColorMatrix(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + const int8_t* matrix_argb, + int width, + int height); // Deprecated. Use ARGBColorMatrix instead. // Apply a matrix rotation to each ARGB pixel. @@ -240,32 +652,47 @@ int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb, // The next 4 coefficients apply to B, G, R, A and produce G of the output. // The last 4 coefficients apply to B, G, R, A and produce R of the output. LIBYUV_API -int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb, - const int8* matrix_rgb, - int x, int y, int width, int height); +int RGBColorMatrix(uint8_t* dst_argb, + int dst_stride_argb, + const int8_t* matrix_rgb, + int dst_x, + int dst_y, + int width, + int height); // Apply a color table each ARGB pixel. // Table contains 256 ARGB values. LIBYUV_API -int ARGBColorTable(uint8* dst_argb, int dst_stride_argb, - const uint8* table_argb, - int x, int y, int width, int height); +int ARGBColorTable(uint8_t* dst_argb, + int dst_stride_argb, + const uint8_t* table_argb, + int dst_x, + int dst_y, + int width, + int height); // Apply a color table each ARGB pixel but preserve destination alpha. // Table contains 256 ARGB values. LIBYUV_API -int RGBColorTable(uint8* dst_argb, int dst_stride_argb, - const uint8* table_argb, - int x, int y, int width, int height); +int RGBColorTable(uint8_t* dst_argb, + int dst_stride_argb, + const uint8_t* table_argb, + int dst_x, + int dst_y, + int width, + int height); // Apply a luma/color table each ARGB pixel but preserve destination alpha. // Table contains 32768 values indexed by [Y][C] where 7 it 7 bit luma from // RGB (YJ style) and C is an 8 bit color component (R, G or B). LIBYUV_API -int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - const uint8* luma_rgb_table, - int width, int height); +int ARGBLumaColorTable(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + const uint8_t* luma, + int width, + int height); // Apply a 3 term polynomial to ARGB values. // poly points to a 4x4 matrix. The first row is constants. The 2nd row is @@ -276,54 +703,84 @@ int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb, // A polynomial approximation can be dirived using software such as 'R'. LIBYUV_API -int ARGBPolynomial(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, +int ARGBPolynomial(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, const float* poly, - int width, int height); + int width, + int height); // Convert plane of 16 bit shorts to half floats. // Source values are multiplied by scale before storing as half float. LIBYUV_API -int HalfFloatPlane(const uint16* src_y, int src_stride_y, - uint16* dst_y, int dst_stride_y, +int HalfFloatPlane(const uint16_t* src_y, + int src_stride_y, + uint16_t* dst_y, + int dst_stride_y, float scale, - int width, int height); + int width, + int height); + +// Convert a buffer of bytes to floats, scale the values and store as floats. +LIBYUV_API +int ByteToFloat(const uint8_t* src_y, float* dst_y, float scale, int width); // Quantize a rectangle of ARGB. Alpha unaffected. // scale is a 16 bit fractional fixed point scaler between 0 and 65535. // interval_size should be a value between 1 and 255. // interval_offset should be a value between 0 and 255. LIBYUV_API -int ARGBQuantize(uint8* dst_argb, int dst_stride_argb, - int scale, int interval_size, int interval_offset, - int x, int y, int width, int height); +int ARGBQuantize(uint8_t* dst_argb, + int dst_stride_argb, + int scale, + int interval_size, + int interval_offset, + int dst_x, + int dst_y, + int width, + int height); // Copy ARGB to ARGB. LIBYUV_API -int ARGBCopy(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGBCopy(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Copy Alpha channel of ARGB to alpha of ARGB. LIBYUV_API -int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGBCopyAlpha(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Extract the alpha channel from ARGB. LIBYUV_API -int ARGBExtractAlpha(const uint8* src_argb, int src_stride_argb, - uint8* dst_a, int dst_stride_a, - int width, int height); +int ARGBExtractAlpha(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_a, + int dst_stride_a, + int width, + int height); // Copy Y channel to Alpha of ARGB. LIBYUV_API -int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -typedef void (*ARGBBlendRow)(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width); +int ARGBCopyYToAlpha(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); + +typedef void (*ARGBBlendRow)(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); // Get function to Alpha Blend ARGB pixels and store to destination. LIBYUV_API @@ -333,92 +790,143 @@ ARGBBlendRow GetARGBBlend(); // Source is pre-multiplied by alpha using ARGBAttenuate. // Alpha of destination is set to 255. LIBYUV_API -int ARGBBlend(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGBBlend(const uint8_t* src_argb0, + int src_stride_argb0, + const uint8_t* src_argb1, + int src_stride_argb1, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Alpha Blend plane and store to destination. // Source is not pre-multiplied by alpha. LIBYUV_API -int BlendPlane(const uint8* src_y0, int src_stride_y0, - const uint8* src_y1, int src_stride_y1, - const uint8* alpha, int alpha_stride, - uint8* dst_y, int dst_stride_y, - int width, int height); +int BlendPlane(const uint8_t* src_y0, + int src_stride_y0, + const uint8_t* src_y1, + int src_stride_y1, + const uint8_t* alpha, + int alpha_stride, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height); // Alpha Blend YUV images and store to destination. // Source is not pre-multiplied by alpha. // Alpha is full width x height and subsampled to half size to apply to UV. LIBYUV_API -int I420Blend(const uint8* src_y0, int src_stride_y0, - const uint8* src_u0, int src_stride_u0, - const uint8* src_v0, int src_stride_v0, - const uint8* src_y1, int src_stride_y1, - const uint8* src_u1, int src_stride_u1, - const uint8* src_v1, int src_stride_v1, - const uint8* alpha, int alpha_stride, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); +int I420Blend(const uint8_t* src_y0, + int src_stride_y0, + const uint8_t* src_u0, + int src_stride_u0, + const uint8_t* src_v0, + int src_stride_v0, + const uint8_t* src_y1, + int src_stride_y1, + const uint8_t* src_u1, + int src_stride_u1, + const uint8_t* src_v1, + int src_stride_v1, + const uint8_t* alpha, + int alpha_stride, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height); // Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255. LIBYUV_API -int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGBMultiply(const uint8_t* src_argb0, + int src_stride_argb0, + const uint8_t* src_argb1, + int src_stride_argb1, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Add ARGB image with ARGB image. Saturates to 255. LIBYUV_API -int ARGBAdd(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGBAdd(const uint8_t* src_argb0, + int src_stride_argb0, + const uint8_t* src_argb1, + int src_stride_argb1, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Subtract ARGB image (argb1) from ARGB image (argb0). Saturates to 0. LIBYUV_API -int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGBSubtract(const uint8_t* src_argb0, + int src_stride_argb0, + const uint8_t* src_argb1, + int src_stride_argb1, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert I422 to YUY2. LIBYUV_API -int I422ToYUY2(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); +int I422ToYUY2(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_yuy2, + int dst_stride_yuy2, + int width, + int height); // Convert I422 to UYVY. LIBYUV_API -int I422ToUYVY(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); +int I422ToUYVY(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_uyvy, + int dst_stride_uyvy, + int width, + int height); // Convert unattentuated ARGB to preattenuated ARGB. LIBYUV_API -int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGBAttenuate(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Convert preattentuated ARGB to unattenuated ARGB. LIBYUV_API -int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGBUnattenuate(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Internal function - do not call directly. // Computes table of cumulative sum for image where the value is the sum // of all values above and to the left of the entry. Used by ARGBBlur. LIBYUV_API -int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb, - int32* dst_cumsum, int dst_stride32_cumsum, - int width, int height); +int ARGBComputeCumulativeSum(const uint8_t* src_argb, + int src_stride_argb, + int32_t* dst_cumsum, + int dst_stride32_cumsum, + int width, + int height); // Blur ARGB image. // dst_cumsum table of width * (height + 1) * 16 bytes aligned to @@ -427,99 +935,157 @@ int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb, // radius is number of pixels around the center. e.g. 1 = 3x3. 2=5x5. // Blur is optimized for radius of 5 (11x11) or less. LIBYUV_API -int ARGBBlur(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int32* dst_cumsum, int dst_stride32_cumsum, - int width, int height, int radius); +int ARGBBlur(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int32_t* dst_cumsum, + int dst_stride32_cumsum, + int width, + int height, + int radius); + +// Gaussian 5x5 blur a float plane. +// Coefficients of 1, 4, 6, 4, 1. +// Each destination pixel is a blur of the 5x5 +// pixels from the source. +// Source edges are clamped. +LIBYUV_API +int GaussPlane_F32(const float* src, + int src_stride, + float* dst, + int dst_stride, + int width, + int height); // Multiply ARGB image by ARGB value. LIBYUV_API -int ARGBShade(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height, uint32 value); +int ARGBShade(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height, + uint32_t value); // Interpolate between two images using specified amount of interpolation // (0 to 255) and store to destination. // 'interpolation' is specified as 8 bit fraction where 0 means 100% src0 // and 255 means 1% src0 and 99% src1. LIBYUV_API -int InterpolatePlane(const uint8* src0, int src_stride0, - const uint8* src1, int src_stride1, - uint8* dst, int dst_stride, - int width, int height, int interpolation); +int InterpolatePlane(const uint8_t* src0, + int src_stride0, + const uint8_t* src1, + int src_stride1, + uint8_t* dst, + int dst_stride, + int width, + int height, + int interpolation); // Interpolate between two ARGB images using specified amount of interpolation // Internally calls InterpolatePlane with width * 4 (bpp). LIBYUV_API -int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height, int interpolation); +int ARGBInterpolate(const uint8_t* src_argb0, + int src_stride_argb0, + const uint8_t* src_argb1, + int src_stride_argb1, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height, + int interpolation); // Interpolate between two YUV images using specified amount of interpolation // Internally calls InterpolatePlane on each plane where the U and V planes // are half width and half height. LIBYUV_API -int I420Interpolate(const uint8* src0_y, int src0_stride_y, - const uint8* src0_u, int src0_stride_u, - const uint8* src0_v, int src0_stride_v, - const uint8* src1_y, int src1_stride_y, - const uint8* src1_u, int src1_stride_u, - const uint8* src1_v, int src1_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height, int interpolation); - -#if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE2__)) -#define LIBYUV_DISABLE_X86 -#endif -// MemorySanitizer does not support assembly code yet. http://crbug.com/344505 -#if defined(__has_feature) -#if __has_feature(memory_sanitizer) -#define LIBYUV_DISABLE_X86 -#endif -#endif -// The following are available on all x86 platforms: -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) -#define HAS_ARGBAFFINEROW_SSE2 -#endif +int I420Interpolate(const uint8_t* src0_y, + int src0_stride_y, + const uint8_t* src0_u, + int src0_stride_u, + const uint8_t* src0_v, + int src0_stride_v, + const uint8_t* src1_y, + int src1_stride_y, + const uint8_t* src1_u, + int src1_stride_u, + const uint8_t* src1_v, + int src1_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + int interpolation); // Row function for copying pixels from a source with a slope to a row // of destination. Useful for scaling, rotation, mirror, texture mapping. LIBYUV_API -void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, - uint8* dst_argb, const float* uv_dudv, int width); +void ARGBAffineRow_C(const uint8_t* src_argb, + int src_argb_stride, + uint8_t* dst_argb, + const float* uv_dudv, + int width); +// TODO(fbarchard): Move ARGBAffineRow_SSE2 to row.h LIBYUV_API -void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, - uint8* dst_argb, const float* uv_dudv, int width); +void ARGBAffineRow_SSE2(const uint8_t* src_argb, + int src_argb_stride, + uint8_t* dst_argb, + const float* uv_dudv, + int width); // Shuffle ARGB channel order. e.g. BGRA to ARGB. -// shuffler is 16 bytes and must be aligned. +// shuffler is 16 bytes. +LIBYUV_API +int ARGBShuffle(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_argb, + int dst_stride_argb, + const uint8_t* shuffler, + int width, + int height); + +// Shuffle AR64 channel order. e.g. AR64 to AB64. +// shuffler is 16 bytes. LIBYUV_API -int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_argb, int dst_stride_argb, - const uint8* shuffler, int width, int height); +int AR64Shuffle(const uint16_t* src_ar64, + int src_stride_ar64, + uint16_t* dst_ar64, + int dst_stride_ar64, + const uint8_t* shuffler, + int width, + int height); // Sobel ARGB effect with planar output. LIBYUV_API -int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - int width, int height); +int ARGBSobelToPlane(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height); // Sobel ARGB effect. LIBYUV_API -int ARGBSobel(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGBSobel(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); // Sobel ARGB effect w/ Sobel X, Sobel, Sobel Y in ARGB. LIBYUV_API -int ARGBSobelXY(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +int ARGBSobelXY(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height); #ifdef __cplusplus } // extern "C" diff --git a/third_party/libyuv/include/libyuv/rotate.h b/third_party/libyuv/include/libyuv/rotate.h index 8a2da9a5aa..684ed5e6de 100644 --- a/third_party/libyuv/include/libyuv/rotate.h +++ b/third_party/libyuv/include/libyuv/rotate.h @@ -20,8 +20,8 @@ extern "C" { // Supported rotation. typedef enum RotationMode { - kRotate0 = 0, // No rotation. - kRotate90 = 90, // Rotate 90 degrees clockwise. + kRotate0 = 0, // No rotation. + kRotate90 = 90, // Rotate 90 degrees clockwise. kRotate180 = 180, // Rotate 180 degrees. kRotate270 = 270, // Rotate 270 degrees clockwise. @@ -33,81 +33,196 @@ typedef enum RotationMode { // Rotate I420 frame. LIBYUV_API -int I420Rotate(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int src_width, int src_height, enum RotationMode mode); +int I420Rotate(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode mode); + +// Rotate I422 frame. +LIBYUV_API +int I422Rotate(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode mode); + +// Rotate I444 frame. +LIBYUV_API +int I444Rotate(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode mode); // Rotate NV12 input and store in I420. LIBYUV_API -int NV12ToI420Rotate(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int src_width, int src_height, enum RotationMode mode); +int NV12ToI420Rotate(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode mode); + +// Convert Android420 to I420 with rotation. +// "rotation" can be 0, 90, 180 or 270. +LIBYUV_API +int Android420ToI420Rotate(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + int src_pixel_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode rotation); // Rotate a plane by 0, 90, 180, or 270. LIBYUV_API -int RotatePlane(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int src_width, int src_height, enum RotationMode mode); +int RotatePlane(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width, + int height, + enum RotationMode mode); // Rotate planes by 90, 180, 270. Deprecated. LIBYUV_API -void RotatePlane90(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height); - -LIBYUV_API -void RotatePlane180(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height); +void RotatePlane90(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width, + int height); LIBYUV_API -void RotatePlane270(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height); +void RotatePlane180(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width, + int height); LIBYUV_API -void RotateUV90(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height); +void RotatePlane270(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width, + int height); // Rotations for when U and V are interleaved. -// These functions take one input pointer and +// These functions take one UV input pointer and // split the data into two buffers while -// rotating them. Deprecated. +// rotating them. +// width and height expected to be half size for NV12. +LIBYUV_API +int SplitRotateUV(const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + enum RotationMode mode); + +LIBYUV_API +void SplitRotateUV90(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height); + LIBYUV_API -void RotateUV180(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height); +void SplitRotateUV180(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height); LIBYUV_API -void RotateUV270(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height); +void SplitRotateUV270(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height); // The 90 and 270 functions are based on transposes. // Doing a transpose with reversing the read/write // order will result in a rotation by +- 90 degrees. // Deprecated. LIBYUV_API -void TransposePlane(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height); +void TransposePlane(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width, + int height); LIBYUV_API -void TransposeUV(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height); +void SplitTransposeUV(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height); #ifdef __cplusplus } // extern "C" diff --git a/third_party/libyuv/include/libyuv/rotate_argb.h b/third_party/libyuv/include/libyuv/rotate_argb.h index 21fe7e1807..20432949ab 100644 --- a/third_party/libyuv/include/libyuv/rotate_argb.h +++ b/third_party/libyuv/include/libyuv/rotate_argb.h @@ -21,9 +21,13 @@ extern "C" { // Rotate ARGB frame LIBYUV_API -int ARGBRotate(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int src_width, int src_height, enum RotationMode mode); +int ARGBRotate(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_argb, + int dst_stride_argb, + int src_width, + int src_height, + enum RotationMode mode); #ifdef __cplusplus } // extern "C" diff --git a/third_party/libyuv/include/libyuv/rotate_row.h b/third_party/libyuv/include/libyuv/rotate_row.h index 6abd201677..aa8528a925 100644 --- a/third_party/libyuv/include/libyuv/rotate_row.h +++ b/third_party/libyuv/include/libyuv/rotate_row.h @@ -18,100 +18,203 @@ namespace libyuv { extern "C" { #endif -#if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE2__)) +#if defined(__pnacl__) || defined(__CLR_VER) || \ + (defined(__native_client__) && defined(__x86_64__)) || \ + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif +#if defined(__native_client__) +#define LIBYUV_DISABLE_NEON +#endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) #define LIBYUV_DISABLE_X86 #endif #endif -// The following are available for Visual C and clangcl 32 bit: -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) +// The following are available for Visual C 32 bit: +#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) && \ + !defined(__clang__) #define HAS_TRANSPOSEWX8_SSSE3 #define HAS_TRANSPOSEUVWX8_SSE2 #endif -// The following are available for GCC 32 or 64 bit but not NaCL for 64 bit: -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__))) +// The following are available for GCC 32 or 64 bit: +#if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__)) #define HAS_TRANSPOSEWX8_SSSE3 #endif -// The following are available for 64 bit GCC but not NaCL: -#if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \ - defined(__x86_64__) +// The following are available for 64 bit GCC: +#if !defined(LIBYUV_DISABLE_X86) && defined(__x86_64__) #define HAS_TRANSPOSEWX8_FAST_SSSE3 #define HAS_TRANSPOSEUVWX8_SSE2 #endif -#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ +#if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) #define HAS_TRANSPOSEWX8_NEON #define HAS_TRANSPOSEUVWX8_NEON #endif -#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \ - defined(__mips__) && \ - defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_TRANSPOSEWX8_DSPR2 -#define HAS_TRANSPOSEUVWX8_DSPR2 -#endif // defined(__mips__) - -void TransposeWxH_C(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width, int height); - -void TransposeWx8_C(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); -void TransposeWx8_NEON(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); -void TransposeWx8_SSSE3(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); -void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); -void TransposeWx8_DSPR2(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); -void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); - -void TransposeWx8_Any_NEON(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); -void TransposeWx8_Any_SSSE3(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); -void TransposeWx8_Fast_Any_SSSE3(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); -void TransposeWx8_Any_DSPR2(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); - -void TransposeUVWxH_C(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height); - -void TransposeUVWx8_C(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, int width); -void TransposeUVWx8_SSE2(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, int width); -void TransposeUVWx8_NEON(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, int width); -void TransposeUVWx8_DSPR2(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, int width); - -void TransposeUVWx8_Any_SSE2(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, int width); -void TransposeUVWx8_Any_NEON(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, int width); -void TransposeUVWx8_Any_DSPR2(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, int width); +#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) +#define HAS_TRANSPOSEWX16_MSA +#define HAS_TRANSPOSEUVWX16_MSA +#endif + +#if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx) +#define HAS_TRANSPOSEWX16_LSX +#define HAS_TRANSPOSEUVWX16_LSX +#endif + +void TransposeWxH_C(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width, + int height); + +void TransposeWx8_C(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); +void TransposeWx16_C(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); +void TransposeWx8_NEON(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); +void TransposeWx8_SSSE3(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); +void TransposeWx8_Fast_SSSE3(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); +void TransposeWx16_MSA(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); +void TransposeWx16_LSX(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); + +void TransposeWx8_Any_NEON(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); +void TransposeWx8_Any_SSSE3(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); +void TransposeWx8_Fast_Any_SSSE3(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); +void TransposeWx16_Any_MSA(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); +void TransposeWx16_Any_LSX(const uint8_t* src, + int src_stride, + uint8_t* dst, + int dst_stride, + int width); + +void TransposeUVWxH_C(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height); + +void TransposeUVWx8_C(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width); +void TransposeUVWx16_C(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width); +void TransposeUVWx8_SSE2(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width); +void TransposeUVWx8_NEON(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width); +void TransposeUVWx16_MSA(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width); +void TransposeUVWx16_LSX(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width); + +void TransposeUVWx8_Any_SSE2(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width); +void TransposeUVWx8_Any_NEON(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width); +void TransposeUVWx16_Any_MSA(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width); +void TransposeUVWx16_Any_LSX(const uint8_t* src, + int src_stride, + uint8_t* dst_a, + int dst_stride_a, + uint8_t* dst_b, + int dst_stride_b, + int width); #ifdef __cplusplus } // extern "C" diff --git a/third_party/libyuv/include/libyuv/row.h b/third_party/libyuv/include/libyuv/row.h index b810221ec7..ae9f595c82 100644 --- a/third_party/libyuv/include/libyuv/row.h +++ b/third_party/libyuv/include/libyuv/row.h @@ -20,34 +20,20 @@ namespace libyuv { extern "C" { #endif -#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1))) - -#define align_buffer_64(var, size) \ - uint8* var##_mem = (uint8*)(malloc((size) + 63)); /* NOLINT */ \ - uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */ - -#define free_aligned_buffer_64(var) \ - free(var##_mem); \ - var = 0 - -#if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE2__)) +#if defined(__pnacl__) || defined(__CLR_VER) || \ + (defined(__native_client__) && defined(__x86_64__)) || \ + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif +#if defined(__native_client__) +#define LIBYUV_DISABLE_NEON +#endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) #define LIBYUV_DISABLE_X86 #endif #endif -// True if compiling for SSSE3 as a requirement. -#if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3)) -#define LIBYUV_SSSE3_ONLY -#endif - -#if defined(__native_client__) -#define LIBYUV_DISABLE_NEON -#endif // clang >= 3.5.0 required for Arm64. #if defined(__clang__) && defined(__aarch64__) && !defined(LIBYUV_DISABLE_NEON) #if (__clang_major__ < 3) || (__clang_major__ == 3 && (__clang_minor__ < 5)) @@ -69,9 +55,18 @@ extern "C" { #endif // clang >= 3.4 #endif // __clang__ +// clang >= 6.0.0 required for AVX512. +#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) +// clang in xcode follows a different versioning scheme. +// TODO(fbarchard): fix xcode 9 ios b/789. +#if (__clang_major__ >= 7) && !defined(__APPLE__) +#define CLANG_HAS_AVX512 1 +#endif // clang >= 7 +#endif // __clang__ + // Visual C 2012 required for AVX2. -#if defined(_M_IX86) && !defined(__clang__) && \ - defined(_MSC_VER) && _MSC_VER >= 1700 +#if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \ + _MSC_VER >= 1700 #define VISUALC_HAS_AVX2 1 #endif // VisualStudio >= 2012 @@ -79,12 +74,14 @@ extern "C" { #if !defined(LIBYUV_DISABLE_X86) && \ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) // Conversions: -#define HAS_ABGRTOUVROW_SSSE3 #define HAS_ABGRTOYROW_SSSE3 +#if !defined(LIBYUV_BIT_EXACT) +#define HAS_ABGRTOUVROW_SSSE3 +#endif #define HAS_ARGB1555TOARGBROW_SSE2 #define HAS_ARGB4444TOARGBROW_SSE2 +#define HAS_ARGBEXTRACTALPHAROW_SSE2 #define HAS_ARGBSETROW_X86 -#define HAS_ARGBSHUFFLEROW_SSE2 #define HAS_ARGBSHUFFLEROW_SSSE3 #define HAS_ARGBTOARGB1555ROW_SSE2 #define HAS_ARGBTOARGB4444ROW_SSE2 @@ -92,18 +89,19 @@ extern "C" { #define HAS_ARGBTORGB24ROW_SSSE3 #define HAS_ARGBTORGB565DITHERROW_SSE2 #define HAS_ARGBTORGB565ROW_SSE2 +#define HAS_ARGBTOYJROW_SSSE3 +#define HAS_ARGBTOYROW_SSSE3 +#define HAS_BGRATOYROW_SSSE3 +#if !defined(LIBYUV_BIT_EXACT) #define HAS_ARGBTOUV444ROW_SSSE3 #define HAS_ARGBTOUVJROW_SSSE3 #define HAS_ARGBTOUVROW_SSSE3 -#define HAS_ARGBTOYJROW_SSSE3 -#define HAS_ARGBTOYROW_SSSE3 -#define HAS_ARGBEXTRACTALPHAROW_SSE2 #define HAS_BGRATOUVROW_SSSE3 -#define HAS_BGRATOYROW_SSSE3 +#endif #define HAS_COPYROW_ERMS #define HAS_COPYROW_SSE2 #define HAS_H422TOARGBROW_SSSE3 -#define HAS_I400TOARGBROW_SSE2 +#define HAS_HALFFLOATROW_SSE2 #define HAS_I422TOARGB1555ROW_SSSE3 #define HAS_I422TOARGB4444ROW_SSSE3 #define HAS_I422TOARGBROW_SSSE3 @@ -117,18 +115,24 @@ extern "C" { #define HAS_J422TOARGBROW_SSSE3 #define HAS_MERGEUVROW_SSE2 #define HAS_MIRRORROW_SSSE3 -#define HAS_MIRRORUVROW_SSSE3 +#define HAS_MIRRORSPLITUVROW_SSSE3 #define HAS_NV12TOARGBROW_SSSE3 +#define HAS_NV12TORGB24ROW_SSSE3 #define HAS_NV12TORGB565ROW_SSSE3 #define HAS_NV21TOARGBROW_SSSE3 +#define HAS_NV21TORGB24ROW_SSSE3 #define HAS_RAWTOARGBROW_SSSE3 #define HAS_RAWTORGB24ROW_SSSE3 -#define HAS_RAWTOYROW_SSSE3 #define HAS_RGB24TOARGBROW_SSSE3 -#define HAS_RGB24TOYROW_SSSE3 #define HAS_RGB565TOARGBROW_SSE2 -#define HAS_RGBATOUVROW_SSSE3 +#define HAS_RAWTOYROW_SSSE3 +#define HAS_RGB24TOYROW_SSSE3 #define HAS_RGBATOYROW_SSSE3 +#if !defined(LIBYUV_BIT_EXACT) +#define HAS_RGB24TOYJROW_SSSE3 +#define HAS_RAWTOYJROW_SSSE3 +#define HAS_RGBATOUVROW_SSSE3 +#endif #define HAS_SETROW_ERMS #define HAS_SETROW_X86 #define HAS_SPLITUVROW_SSE2 @@ -144,7 +148,9 @@ extern "C" { // Effects: #define HAS_ARGBADDROW_SSE2 #define HAS_ARGBAFFINEROW_SSE2 +#if !defined(LIBYUV_BIT_EXACT) #define HAS_ARGBATTENUATEROW_SSSE3 +#endif #define HAS_ARGBBLENDROW_SSSE3 #define HAS_ARGBCOLORMATRIXROW_SSSE3 #define HAS_ARGBCOLORTABLEROW_X86 @@ -173,41 +179,39 @@ extern "C" { // The following functions fail on gcc/clang 32 bit with fpic and framepointer. // caveat: clangcl uses row_win.cc which works. -#if defined(NDEBUG) || !(defined(_DEBUG) && defined(__i386__)) || \ - !defined(__i386__) || defined(_MSC_VER) -// TODO(fbarchard): fix build error on x86 debug -// https://code.google.com/p/libyuv/issues/detail?id=524 -#define HAS_I411TOARGBROW_SSSE3 +#if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \ + defined(_MSC_VER) // TODO(fbarchard): fix build error on android_full_debug=1 // https://code.google.com/p/libyuv/issues/detail?id=517 #define HAS_I422ALPHATOARGBROW_SSSE3 +#define HAS_I444ALPHATOARGBROW_SSSE3 #endif #endif // The following are available on all x86 platforms, but // require VS2012, clang 3.4 or gcc 4.7. -// The code supports NaCL but requires a new compiler and validator. -#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \ - defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \ + defined(GCC_HAS_AVX2)) #define HAS_ARGBCOPYALPHAROW_AVX2 #define HAS_ARGBCOPYYTOALPHAROW_AVX2 +#define HAS_ARGBEXTRACTALPHAROW_AVX2 #define HAS_ARGBMIRRORROW_AVX2 #define HAS_ARGBPOLYNOMIALROW_AVX2 #define HAS_ARGBSHUFFLEROW_AVX2 #define HAS_ARGBTORGB565DITHERROW_AVX2 -#define HAS_ARGBTOUVJROW_AVX2 -#define HAS_ARGBTOUVROW_AVX2 #define HAS_ARGBTOYJROW_AVX2 +#define HAS_RAWTOYJROW_AVX2 +#define HAS_RGB24TOYJROW_AVX2 #define HAS_ARGBTOYROW_AVX2 +#if !defined(LIBYUV_BIT_EXACT) +#define HAS_ARGBTOUVJROW_AVX2 +#define HAS_ARGBTOUVROW_AVX2 +#endif #define HAS_COPYROW_AVX #define HAS_H422TOARGBROW_AVX2 -#define HAS_I400TOARGBROW_AVX2 -#if !(defined(_DEBUG) && defined(__i386__)) -// TODO(fbarchard): fix build error on android_full_debug=1 -// https://code.google.com/p/libyuv/issues/detail?id=517 -#define HAS_I422ALPHATOARGBROW_AVX2 -#endif -#define HAS_I411TOARGBROW_AVX2 +#define HAS_HALFFLOATROW_AVX2 +// #define HAS_HALFFLOATROW_F16C // Enable to test halffloat cast #define HAS_I422TOARGB1555ROW_AVX2 #define HAS_I422TOARGB4444ROW_AVX2 #define HAS_I422TOARGBROW_AVX2 @@ -220,8 +224,10 @@ extern "C" { #define HAS_MERGEUVROW_AVX2 #define HAS_MIRRORROW_AVX2 #define HAS_NV12TOARGBROW_AVX2 +#define HAS_NV12TORGB24ROW_AVX2 #define HAS_NV12TORGB565ROW_AVX2 #define HAS_NV21TOARGBROW_AVX2 +#define HAS_NV21TORGB24ROW_AVX2 #define HAS_SPLITUVROW_AVX2 #define HAS_UYVYTOARGBROW_AVX2 #define HAS_UYVYTOUV422ROW_AVX2 @@ -231,21 +237,30 @@ extern "C" { #define HAS_YUY2TOUV422ROW_AVX2 #define HAS_YUY2TOUVROW_AVX2 #define HAS_YUY2TOYROW_AVX2 -#define HAS_HALFFLOATROW_AVX2 // Effects: #define HAS_ARGBADDROW_AVX2 +#if !defined(LIBYUV_BIT_EXACT) #define HAS_ARGBATTENUATEROW_AVX2 +#endif #define HAS_ARGBMULTIPLYROW_AVX2 #define HAS_ARGBSUBTRACTROW_AVX2 #define HAS_ARGBUNATTENUATEROW_AVX2 #define HAS_BLENDPLANEROW_AVX2 + +#if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \ + defined(_MSC_VER) +// TODO(fbarchard): fix build error on android_full_debug=1 +// https://code.google.com/p/libyuv/issues/detail?id=517 +#define HAS_I422ALPHATOARGBROW_AVX2 +#define HAS_I444ALPHATOARGBROW_AVX2 +#endif #endif -// The following are available for AVX2 Visual C and clangcl 32 bit: +// The following are available for AVX2 Visual C 32 bit: // TODO(fbarchard): Port to gcc. -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \ - (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2)) +#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) && \ + !defined(__clang__) && defined(VISUALC_HAS_AVX2) #define HAS_ARGB1555TOARGBROW_AVX2 #define HAS_ARGB4444TOARGBROW_AVX2 #define HAS_ARGBTOARGB1555ROW_AVX2 @@ -258,15 +273,138 @@ extern "C" { // The following are also available on x64 Visual C. #if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && defined(_M_X64) && \ (!defined(__clang__) || defined(__SSSE3__)) +#define HAS_I444ALPHATOARGBROW_SSSE3 +#define HAS_I444TOARGBROW_SSSE3 #define HAS_I422ALPHATOARGBROW_SSSE3 #define HAS_I422TOARGBROW_SSSE3 #endif -// The following are available on gcc x86 platforms: -// TODO(fbarchard): Port to Visual C. +// The following are available for gcc/clang x86 platforms: +// TODO(fbarchard): Port to Visual C +#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) +#define HAS_ABGRTOAR30ROW_SSSE3 +#define HAS_ARGBTOAR30ROW_SSSE3 +#define HAS_ARGBTOAR64ROW_SSSE3 +#define HAS_ARGBTOAB64ROW_SSSE3 +#define HAS_AR64TOARGBROW_SSSE3 +#define HAS_AB64TOARGBROW_SSSE3 +#define HAS_CONVERT16TO8ROW_SSSE3 +#define HAS_CONVERT8TO16ROW_SSE2 +#define HAS_DETILEROW_SSE2 +#define HAS_DETILESPLITUVROW_SSSE3 +#define HAS_HALFMERGEUVROW_SSSE3 +#define HAS_I210TOAR30ROW_SSSE3 +#define HAS_I210TOARGBROW_SSSE3 +#define HAS_I212TOAR30ROW_SSSE3 +#define HAS_I212TOARGBROW_SSSE3 +#define HAS_I400TOARGBROW_SSE2 +#define HAS_I422TOAR30ROW_SSSE3 +#define HAS_I410TOAR30ROW_SSSE3 +#define HAS_I410TOARGBROW_SSSE3 +#define HAS_MERGEARGBROW_SSE2 +#define HAS_MERGEXRGBROW_SSE2 +#define HAS_MERGERGBROW_SSSE3 +#define HAS_MIRRORUVROW_SSSE3 +#define HAS_NV21TOYUV24ROW_SSSE3 +#define HAS_P210TOAR30ROW_SSSE3 +#define HAS_P210TOARGBROW_SSSE3 +#define HAS_P410TOAR30ROW_SSSE3 +#define HAS_P410TOARGBROW_SSSE3 +#define HAS_RAWTORGBAROW_SSSE3 +#define HAS_RGB24MIRRORROW_SSSE3 +#if !defined(LIBYUV_BIT_EXACT) +#define HAS_RGBATOYJROW_SSSE3 +#endif +#define HAS_SPLITARGBROW_SSE2 +#define HAS_SPLITARGBROW_SSSE3 +#define HAS_SPLITXRGBROW_SSE2 +#define HAS_SPLITXRGBROW_SSSE3 +#define HAS_SPLITRGBROW_SSSE3 +#define HAS_SWAPUVROW_SSSE3 + +#if defined(__x86_64__) || !defined(__pic__) +// TODO(fbarchard): fix build error on android_full_debug=1 +// https://code.google.com/p/libyuv/issues/detail?id=517 +#define HAS_I210ALPHATOARGBROW_SSSE3 +#define HAS_I410ALPHATOARGBROW_SSSE3 +#endif +#endif + +// The following are available for AVX2 gcc/clang x86 platforms: +// TODO(fbarchard): Port to Visual C +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(__x86_64__) || defined(__i386__)) && \ + (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) +#define HAS_ABGRTOAR30ROW_AVX2 +#if !defined(LIBYUV_BIT_EXACT) +#define HAS_ABGRTOUVROW_AVX2 +#define HAS_ABGRTOYROW_AVX2 +#endif +#define HAS_ARGBTOAR30ROW_AVX2 +#define HAS_ARGBTORAWROW_AVX2 +#define HAS_ARGBTORGB24ROW_AVX2 +#define HAS_ARGBTOAR64ROW_AVX2 +#define HAS_ARGBTOAB64ROW_AVX2 +#define HAS_AR64TOARGBROW_AVX2 +#define HAS_AB64TOARGBROW_AVX2 +#define HAS_CONVERT16TO8ROW_AVX2 +#define HAS_CONVERT8TO16ROW_AVX2 +#define HAS_DIVIDEROW_16_AVX2 +#define HAS_HALFMERGEUVROW_AVX2 +#define HAS_MERGEAR64ROW_AVX2 +#define HAS_MERGEARGB16TO8ROW_AVX2 +#define HAS_MERGEARGBROW_AVX2 +#define HAS_MERGEXR30ROW_AVX2 +#define HAS_MERGEXR64ROW_AVX2 +#define HAS_MERGEXRGB16TO8ROW_AVX2 +#define HAS_MERGEXRGBROW_AVX2 +#define HAS_NV21TOYUV24ROW_AVX2 +#define HAS_I210TOAR30ROW_AVX2 +#define HAS_I210TOARGBROW_AVX2 +#define HAS_I212TOAR30ROW_AVX2 +#define HAS_I212TOARGBROW_AVX2 +#define HAS_I400TOARGBROW_AVX2 +#define HAS_I410TOAR30ROW_AVX2 +#define HAS_I410TOARGBROW_AVX2 +#define HAS_P210TOAR30ROW_AVX2 +#define HAS_P210TOARGBROW_AVX2 +#define HAS_P410TOAR30ROW_AVX2 +#define HAS_P410TOARGBROW_AVX2 +#define HAS_I422TOAR30ROW_AVX2 +#define HAS_I422TOUYVYROW_AVX2 +#define HAS_I422TOYUY2ROW_AVX2 +#define HAS_MERGEUVROW_16_AVX2 +#define HAS_MIRRORUVROW_AVX2 +#define HAS_MULTIPLYROW_16_AVX2 +#if !defined(LIBYUV_BIT_EXACT) +#define HAS_RGBATOYJROW_AVX2 +#endif +#define HAS_SPLITARGBROW_AVX2 +#define HAS_SPLITXRGBROW_AVX2 +#define HAS_SPLITUVROW_16_AVX2 +#define HAS_SWAPUVROW_AVX2 + +#if defined(__x86_64__) || !defined(__pic__) +// TODO(fbarchard): fix build error on android_full_debug=1 +// https://code.google.com/p/libyuv/issues/detail?id=517 +#define HAS_I210ALPHATOARGBROW_AVX2 +#define HAS_I410ALPHATOARGBROW_AVX2 +#endif +#endif + +// The following are available for AVX512 clang x86 platforms: +// TODO(fbarchard): Port to GCC and Visual C +// TODO(fbarchard): re-enable HAS_ARGBTORGB24ROW_AVX512VBMI. Issue libyuv:789 #if !defined(LIBYUV_DISABLE_X86) && \ - (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) -#define HAS_HALFFLOATROW_SSE2 + (defined(__x86_64__) || defined(__i386__)) && (defined(CLANG_HAS_AVX512)) +#define HAS_ARGBTORGB24ROW_AVX512VBMI +#endif + +// The following are available for AVX512 clang x64 platforms: +// TODO(fbarchard): Port to x86 +#if !defined(LIBYUV_DISABLE_X86) && defined(__x86_64__) && \ + (defined(CLANG_HAS_AVX512)) +#define HAS_I422TOARGBROW_AVX512BW #endif // The following are available on Neon platforms: @@ -280,6 +418,7 @@ extern "C" { #define HAS_ARGB4444TOARGBROW_NEON #define HAS_ARGB4444TOUVROW_NEON #define HAS_ARGB4444TOYROW_NEON +#define HAS_ARGBEXTRACTALPHAROW_NEON #define HAS_ARGBSETROW_NEON #define HAS_ARGBTOARGB1555ROW_NEON #define HAS_ARGBTOARGB4444ROW_NEON @@ -287,18 +426,29 @@ extern "C" { #define HAS_ARGBTORGB24ROW_NEON #define HAS_ARGBTORGB565DITHERROW_NEON #define HAS_ARGBTORGB565ROW_NEON -#define HAS_ARGBTOUV411ROW_NEON +#define HAS_ARGBTOAR64ROW_NEON +#define HAS_ARGBTOAB64ROW_NEON +#define HAS_AR64TOARGBROW_NEON +#define HAS_AB64TOARGBROW_NEON #define HAS_ARGBTOUV444ROW_NEON #define HAS_ARGBTOUVJROW_NEON #define HAS_ARGBTOUVROW_NEON #define HAS_ARGBTOYJROW_NEON #define HAS_ARGBTOYROW_NEON -#define HAS_ARGBEXTRACTALPHAROW_NEON +#define HAS_AYUVTOUVROW_NEON +#define HAS_AYUVTOVUROW_NEON +#define HAS_AYUVTOYROW_NEON #define HAS_BGRATOUVROW_NEON #define HAS_BGRATOYROW_NEON +#define HAS_BYTETOFLOATROW_NEON #define HAS_COPYROW_NEON +#define HAS_DETILEROW_NEON +#define HAS_DETILESPLITUVROW_NEON +#define HAS_DIVIDEROW_16_NEON +#define HAS_HALFFLOATROW_NEON +#define HAS_HALFMERGEUVROW_NEON #define HAS_I400TOARGBROW_NEON -#define HAS_I411TOARGBROW_NEON +#define HAS_I444ALPHATOARGBROW_NEON #define HAS_I422ALPHATOARGBROW_NEON #define HAS_I422TOARGB1555ROW_NEON #define HAS_I422TOARGB4444ROW_NEON @@ -310,26 +460,50 @@ extern "C" { #define HAS_I422TOYUY2ROW_NEON #define HAS_I444TOARGBROW_NEON #define HAS_J400TOARGBROW_NEON +#define HAS_MERGEAR64ROW_NEON +#define HAS_MERGEARGB16TO8ROW_NEON +#define HAS_MERGEARGBROW_NEON +#define HAS_MERGEXR30ROW_NEON +#define HAS_MERGEXR64ROW_NEON +#define HAS_MERGEXRGB16TO8ROW_NEON +#define HAS_MERGEXRGBROW_NEON #define HAS_MERGEUVROW_NEON +#define HAS_MERGEUVROW_16_NEON #define HAS_MIRRORROW_NEON #define HAS_MIRRORUVROW_NEON +#define HAS_MIRRORSPLITUVROW_NEON +#define HAS_MULTIPLYROW_16_NEON #define HAS_NV12TOARGBROW_NEON +#define HAS_NV12TORGB24ROW_NEON #define HAS_NV12TORGB565ROW_NEON #define HAS_NV21TOARGBROW_NEON +#define HAS_NV21TORGB24ROW_NEON +#define HAS_NV21TOYUV24ROW_NEON #define HAS_RAWTOARGBROW_NEON #define HAS_RAWTORGB24ROW_NEON +#define HAS_RAWTORGBAROW_NEON #define HAS_RAWTOUVROW_NEON +#define HAS_RAWTOUVJROW_NEON +#define HAS_RAWTOYJROW_NEON #define HAS_RAWTOYROW_NEON #define HAS_RGB24TOARGBROW_NEON #define HAS_RGB24TOUVROW_NEON +#define HAS_RGB24TOUVJROW_NEON +#define HAS_RGB24TOYJROW_NEON #define HAS_RGB24TOYROW_NEON #define HAS_RGB565TOARGBROW_NEON #define HAS_RGB565TOUVROW_NEON #define HAS_RGB565TOYROW_NEON #define HAS_RGBATOUVROW_NEON +#define HAS_RGBATOYJROW_NEON #define HAS_RGBATOYROW_NEON #define HAS_SETROW_NEON +#define HAS_SPLITARGBROW_NEON +#define HAS_SPLITXRGBROW_NEON +#define HAS_SPLITRGBROW_NEON #define HAS_SPLITUVROW_NEON +#define HAS_SPLITUVROW_16_NEON +#define HAS_SWAPUVROW_NEON #define HAS_UYVYTOARGBROW_NEON #define HAS_UYVYTOUV422ROW_NEON #define HAS_UYVYTOUVROW_NEON @@ -346,6 +520,7 @@ extern "C" { #define HAS_ARGBCOLORMATRIXROW_NEON #define HAS_ARGBGRAYROW_NEON #define HAS_ARGBMIRRORROW_NEON +#define HAS_RGB24MIRRORROW_NEON #define HAS_ARGBMULTIPLYROW_NEON #define HAS_ARGBQUANTIZEROW_NEON #define HAS_ARGBSEPIAROW_NEON @@ -360,22 +535,195 @@ extern "C" { #define HAS_SOBELYROW_NEON #endif -// The following are available on Mips platforms: -#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \ - (_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6) -#define HAS_COPYROW_MIPS -#if defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_I422TOARGBROW_DSPR2 -#define HAS_INTERPOLATEROW_DSPR2 -#define HAS_MIRRORROW_DSPR2 -#define HAS_MIRRORUVROW_DSPR2 -#define HAS_SPLITUVROW_DSPR2 -#endif +// The following are available on AArch64 platforms: +#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) +#define HAS_SCALESUMSAMPLES_NEON +#define HAS_GAUSSROW_F32_NEON +#define HAS_GAUSSCOL_F32_NEON #endif - #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) -#define HAS_MIRRORROW_MSA +#define HAS_ABGRTOUVROW_MSA +#define HAS_ABGRTOYROW_MSA +#define HAS_ARGB1555TOARGBROW_MSA +#define HAS_ARGB1555TOUVROW_MSA +#define HAS_ARGB1555TOYROW_MSA +#define HAS_ARGB4444TOARGBROW_MSA +#define HAS_ARGBADDROW_MSA +#define HAS_ARGBATTENUATEROW_MSA +#define HAS_ARGBBLENDROW_MSA +#define HAS_ARGBCOLORMATRIXROW_MSA +#define HAS_ARGBEXTRACTALPHAROW_MSA +#define HAS_ARGBGRAYROW_MSA #define HAS_ARGBMIRRORROW_MSA +#define HAS_ARGBMULTIPLYROW_MSA +#define HAS_ARGBQUANTIZEROW_MSA +#define HAS_ARGBSEPIAROW_MSA +#define HAS_ARGBSETROW_MSA +#define HAS_ARGBSHADEROW_MSA +#define HAS_ARGBSHUFFLEROW_MSA +#define HAS_ARGBSUBTRACTROW_MSA +#define HAS_ARGBTOARGB1555ROW_MSA +#define HAS_ARGBTOARGB4444ROW_MSA +#define HAS_ARGBTORAWROW_MSA +#define HAS_ARGBTORGB24ROW_MSA +#define HAS_ARGBTORGB565DITHERROW_MSA +#define HAS_ARGBTORGB565ROW_MSA +#define HAS_ARGBTOUV444ROW_MSA +#define HAS_ARGBTOUVJROW_MSA +#define HAS_ARGBTOUVROW_MSA +#define HAS_ARGBTOYJROW_MSA +#define HAS_ARGBTOYROW_MSA +#define HAS_BGRATOUVROW_MSA +#define HAS_BGRATOYROW_MSA +#define HAS_HALFFLOATROW_MSA +#define HAS_I400TOARGBROW_MSA +#define HAS_I422TOUYVYROW_MSA +#define HAS_I422TOYUY2ROW_MSA +#define HAS_I422TOARGBROW_MSA +#define HAS_I422TORGBAROW_MSA +#define HAS_I422ALPHATOARGBROW_MSA +#define HAS_I422TORGB24ROW_MSA +#define HAS_I422TORGB565ROW_MSA +#define HAS_I422TOARGB4444ROW_MSA +#define HAS_I422TOARGB1555ROW_MSA +#define HAS_NV12TOARGBROW_MSA +#define HAS_NV12TORGB565ROW_MSA +#define HAS_NV21TOARGBROW_MSA +#define HAS_YUY2TOARGBROW_MSA +#define HAS_UYVYTOARGBROW_MSA +#define HAS_I444TOARGBROW_MSA +#define HAS_INTERPOLATEROW_MSA +#define HAS_J400TOARGBROW_MSA +#define HAS_MERGEUVROW_MSA +#define HAS_MIRRORROW_MSA +#define HAS_MIRRORUVROW_MSA +#define HAS_MIRRORSPLITUVROW_MSA +#define HAS_RAWTOARGBROW_MSA +#define HAS_RAWTORGB24ROW_MSA +#define HAS_RAWTOUVROW_MSA +#define HAS_RAWTOYROW_MSA +#define HAS_RGB24TOARGBROW_MSA +#define HAS_RGB24TOUVROW_MSA +#define HAS_RGB24TOYROW_MSA +#define HAS_RGB565TOARGBROW_MSA +#define HAS_RGB565TOUVROW_MSA +#define HAS_RGB565TOYROW_MSA +#define HAS_RGBATOUVROW_MSA +#define HAS_RGBATOYROW_MSA +#define HAS_SETROW_MSA +#define HAS_SOBELROW_MSA +#define HAS_SOBELTOPLANEROW_MSA +#define HAS_SOBELXROW_MSA +#define HAS_SOBELXYROW_MSA +#define HAS_SOBELYROW_MSA +#define HAS_SPLITUVROW_MSA +#define HAS_UYVYTOUVROW_MSA +#define HAS_UYVYTOYROW_MSA +#define HAS_YUY2TOUV422ROW_MSA +#define HAS_YUY2TOUVROW_MSA +#define HAS_YUY2TOYROW_MSA +#endif + +#if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx) +#define HAS_ARGB4444TOARGBROW_LSX +#define HAS_ARGB1555TOARGBROW_LSX +#define HAS_RGB565TOARGBROW_LSX +#define HAS_RGB24TOARGBROW_LSX +#define HAS_RAWTOARGBROW_LSX +#define HAS_ARGB1555TOYROW_LSX +#define HAS_ARGB1555TOUVROW_LSX +#define HAS_RGB565TOYROW_LSX +#define HAS_RGB565TOUVROW_LSX +#define HAS_RGB24TOYROW_LSX +#define HAS_RGB24TOUVROW_LSX +#define HAS_RAWTOYROW_LSX +#define HAS_RAWTOUVROW_LSX +#define HAS_NV12TOARGBROW_LSX +#define HAS_NV12TORGB565ROW_LSX +#define HAS_NV21TOARGBROW_LSX +#define HAS_SOBELROW_LSX +#define HAS_SOBELTOPLANEROW_LSX +#define HAS_SOBELXYROW_LSX +#define HAS_ARGBTOYJROW_LSX +#define HAS_BGRATOYROW_LSX +#define HAS_BGRATOUVROW_LSX +#define HAS_ABGRTOYROW_LSX +#define HAS_ABGRTOUVROW_LSX +#define HAS_RGBATOYROW_LSX +#define HAS_RGBATOUVROW_LSX +#define HAS_ARGBTOUVJROW_LSX +#define HAS_I444TOARGBROW_LSX +#define HAS_I400TOARGBROW_LSX +#define HAS_J400TOARGBROW_LSX +#define HAS_YUY2TOARGBROW_LSX +#define HAS_UYVYTOARGBROW_LSX +#define HAS_INTERPOLATEROW_LSX +#define HAS_ARGBSETROW_LSX +#define HAS_RAWTORGB24ROW_LSX +#define HAS_MERGEUVROW_LSX +#define HAS_ARGBEXTRACTALPHAROW_LSX +#define HAS_ARGBBLENDROW_LSX +#define HAS_ARGBQUANTIZEROW_LSX +#define HAS_ARGBCOLORMATRIXROW_LSX +#define HAS_SPLITUVROW_LSX +#define HAS_SETROW_LSX +#define HAS_MIRRORSPLITUVROW_LSX +#endif + +#if !defined(LIBYUV_DISABLE_LASX) && defined(__loongarch_asx) +#define HAS_I422TOARGBROW_LASX +#define HAS_I422TORGBAROW_LASX +#define HAS_I422ALPHATOARGBROW_LASX +#define HAS_I422TOYUY2ROW_LASX +#define HAS_I422TOUYVYROW_LASX +#define HAS_MIRRORROW_LASX +#define HAS_MIRRORUVROW_LASX +#define HAS_ARGBMIRRORROW_LASX +#define HAS_I422TORGB24ROW_LASX +#define HAS_I422TORGB565ROW_LASX +#define HAS_I422TOARGB4444ROW_LASX +#define HAS_I422TOARGB1555ROW_LASX +#define HAS_YUY2TOUVROW_LASX +#define HAS_YUY2TOYROW_LASX +#define HAS_YUY2TOUV422ROW_LASX +#define HAS_UYVYTOYROW_LASX +#define HAS_UYVYTOUVROW_LASX +#define HAS_UYVYTOUV422ROW_LASX +#define HAS_ARGBTOYROW_LASX +#define HAS_ARGBTOUVROW_LASX +#define HAS_ARGBTORGB24ROW_LASX +#define HAS_ARGBTORAWROW_LASX +#define HAS_ARGBTORGB565ROW_LASX +#define HAS_ARGBTOARGB1555ROW_LASX +#define HAS_ARGBTOARGB4444ROW_LASX +#define HAS_ARGBTOUV444ROW_LASX +#define HAS_ARGBMULTIPLYROW_LASX +#define HAS_ARGBADDROW_LASX +#define HAS_ARGBSUBTRACTROW_LASX +#define HAS_ARGBATTENUATEROW_LASX +#define HAS_ARGBTORGB565DITHERROW_LASX +#define HAS_ARGBSHUFFLEROW_LASX +#define HAS_ARGBSHADEROW_LASX +#define HAS_ARGBGRAYROW_LASX +#define HAS_ARGBSEPIAROW_LASX +#define HAS_ARGB4444TOARGBROW_LASX +#define HAS_ARGB1555TOARGBROW_LASX +#define HAS_RGB565TOARGBROW_LASX +#define HAS_RGB24TOARGBROW_LASX +#define HAS_RAWTOARGBROW_LASX +#define HAS_ARGB1555TOYROW_LASX +#define HAS_ARGB1555TOUVROW_LASX +#define HAS_RGB565TOYROW_LASX +#define HAS_RGB565TOUVROW_LASX +#define HAS_RGB24TOYROW_LASX +#define HAS_RGB24TOUVROW_LASX +#define HAS_RAWTOYROW_LASX +#define HAS_RAWTOUVROW_LASX +#define HAS_NV12TOARGBROW_LASX +#define HAS_NV12TORGB565ROW_LASX +#define HAS_NV21TOARGBROW_LASX +#define HAS_ARGBTOYJROW_LASX +#define HAS_ARGBTOUVJROW_LASX #endif #if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__) @@ -384,18 +732,20 @@ extern "C" { #else #define SIMD_ALIGNED(var) __declspec(align(16)) var #endif -typedef __declspec(align(16)) int16 vec16[8]; -typedef __declspec(align(16)) int32 vec32[4]; -typedef __declspec(align(16)) int8 vec8[16]; -typedef __declspec(align(16)) uint16 uvec16[8]; -typedef __declspec(align(16)) uint32 uvec32[4]; -typedef __declspec(align(16)) uint8 uvec8[16]; -typedef __declspec(align(32)) int16 lvec16[16]; -typedef __declspec(align(32)) int32 lvec32[8]; -typedef __declspec(align(32)) int8 lvec8[32]; -typedef __declspec(align(32)) uint16 ulvec16[16]; -typedef __declspec(align(32)) uint32 ulvec32[8]; -typedef __declspec(align(32)) uint8 ulvec8[32]; +#define LIBYUV_NOINLINE __declspec(noinline) +typedef __declspec(align(16)) int16_t vec16[8]; +typedef __declspec(align(16)) int32_t vec32[4]; +typedef __declspec(align(16)) float vecf32[4]; +typedef __declspec(align(16)) int8_t vec8[16]; +typedef __declspec(align(16)) uint16_t uvec16[8]; +typedef __declspec(align(16)) uint32_t uvec32[4]; +typedef __declspec(align(16)) uint8_t uvec8[16]; +typedef __declspec(align(32)) int16_t lvec16[16]; +typedef __declspec(align(32)) int32_t lvec32[8]; +typedef __declspec(align(32)) int8_t lvec8[32]; +typedef __declspec(align(32)) uint16_t ulvec16[16]; +typedef __declspec(align(32)) uint32_t ulvec32[8]; +typedef __declspec(align(32)) uint8_t ulvec8[32]; #elif !defined(__pnacl__) && (defined(__GNUC__) || defined(__clang__)) // Caveat GCC 4.2 to 4.7 have a known issue using vectors with const. #if defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2) @@ -403,83 +753,72 @@ typedef __declspec(align(32)) uint8 ulvec8[32]; #else #define SIMD_ALIGNED(var) var __attribute__((aligned(16))) #endif -typedef int16 __attribute__((vector_size(16))) vec16; -typedef int32 __attribute__((vector_size(16))) vec32; -typedef int8 __attribute__((vector_size(16))) vec8; -typedef uint16 __attribute__((vector_size(16))) uvec16; -typedef uint32 __attribute__((vector_size(16))) uvec32; -typedef uint8 __attribute__((vector_size(16))) uvec8; -typedef int16 __attribute__((vector_size(32))) lvec16; -typedef int32 __attribute__((vector_size(32))) lvec32; -typedef int8 __attribute__((vector_size(32))) lvec8; -typedef uint16 __attribute__((vector_size(32))) ulvec16; -typedef uint32 __attribute__((vector_size(32))) ulvec32; -typedef uint8 __attribute__((vector_size(32))) ulvec8; +#define LIBYUV_NOINLINE __attribute__((noinline)) +typedef int16_t __attribute__((vector_size(16))) vec16; +typedef int32_t __attribute__((vector_size(16))) vec32; +typedef float __attribute__((vector_size(16))) vecf32; +typedef int8_t __attribute__((vector_size(16))) vec8; +typedef uint16_t __attribute__((vector_size(16))) uvec16; +typedef uint32_t __attribute__((vector_size(16))) uvec32; +typedef uint8_t __attribute__((vector_size(16))) uvec8; +typedef int16_t __attribute__((vector_size(32))) lvec16; +typedef int32_t __attribute__((vector_size(32))) lvec32; +typedef int8_t __attribute__((vector_size(32))) lvec8; +typedef uint16_t __attribute__((vector_size(32))) ulvec16; +typedef uint32_t __attribute__((vector_size(32))) ulvec32; +typedef uint8_t __attribute__((vector_size(32))) ulvec8; #else #define SIMD_ALIGNED(var) var -typedef int16 vec16[8]; -typedef int32 vec32[4]; -typedef int8 vec8[16]; -typedef uint16 uvec16[8]; -typedef uint32 uvec32[4]; -typedef uint8 uvec8[16]; -typedef int16 lvec16[16]; -typedef int32 lvec32[8]; -typedef int8 lvec8[32]; -typedef uint16 ulvec16[16]; -typedef uint32 ulvec32[8]; -typedef uint8 ulvec8[32]; +#define LIBYUV_NOINLINE +typedef int16_t vec16[8]; +typedef int32_t vec32[4]; +typedef float vecf32[4]; +typedef int8_t vec8[16]; +typedef uint16_t uvec16[8]; +typedef uint32_t uvec32[4]; +typedef uint8_t uvec8[16]; +typedef int16_t lvec16[16]; +typedef int32_t lvec32[8]; +typedef int8_t lvec8[32]; +typedef uint16_t ulvec16[16]; +typedef uint32_t ulvec32[8]; +typedef uint8_t ulvec8[32]; #endif -#if defined(__aarch64__) -// This struct is for Arm64 color conversion. -struct YuvConstants { - uvec16 kUVToRB; - uvec16 kUVToRB2; - uvec16 kUVToG; - uvec16 kUVToG2; - vec16 kUVBiasBGR; - vec32 kYToRgb; -}; -#elif defined(__arm__) -// This struct is for ArmV7 color conversion. +#if defined(__aarch64__) || defined(__arm__) +// This struct is for ARM color conversion. struct YuvConstants { - uvec8 kUVToRB; - uvec8 kUVToG; - vec16 kUVBiasBGR; - vec32 kYToRgb; + uvec8 kUVCoeff; + vec16 kRGBCoeffBias; }; #else // This struct is for Intel color conversion. struct YuvConstants { - int8 kUVToB[32]; - int8 kUVToG[32]; - int8 kUVToR[32]; - int16 kUVBiasB[16]; - int16 kUVBiasG[16]; - int16 kUVBiasR[16]; - int16 kYToRgb[16]; + uint8_t kUVToB[32]; + uint8_t kUVToG[32]; + uint8_t kUVToR[32]; + int16_t kYToRgb[16]; + int16_t kYBiasToRgb[16]; }; // Offsets into YuvConstants structure -#define KUVTOB 0 -#define KUVTOG 32 -#define KUVTOR 64 -#define KUVBIASB 96 -#define KUVBIASG 128 -#define KUVBIASR 160 -#define KYTORGB 192 +#define KUVTOB 0 +#define KUVTOG 32 +#define KUVTOR 64 +#define KYTORGB 96 +#define KYBIASTORGB 128 + #endif -// Conversion matrix for YUV to RGB -extern const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants); // BT.601 -extern const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants); // JPeg -extern const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants); // BT.709 +#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1))) -// Conversion matrix for YVU to BGR -extern const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants); // BT.601 -extern const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants); // JPeg -extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709 +#define align_buffer_64(var, size) \ + uint8_t* var##_mem = (uint8_t*)(malloc((size) + 63)); /* NOLINT */ \ + uint8_t* var = (uint8_t*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */ + +#define free_aligned_buffer_64(var) \ + free(var##_mem); \ + var = 0 #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__) #define OMITFP @@ -493,1467 +832,4646 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709 #else #define LABELALIGN #endif -#if defined(__native_client__) && defined(__x86_64__) -// r14 is used for MEMOP macros. -#define NACL_R14 "r14", -#define BUNDLELOCK ".bundle_lock\n" -#define BUNDLEUNLOCK ".bundle_unlock\n" -#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")" -#define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")" -#define MEMLEA(offset, base) #offset "(%q" #base ")" -#define MEMLEA3(offset, index, scale) \ - #offset "(,%q" #index "," #scale ")" -#define MEMLEA4(offset, base, index, scale) \ - #offset "(%q" #base ",%q" #index "," #scale ")" -#define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15" -#define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15" -#define MEMOPREG(opcode, offset, base, index, scale, reg) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ - #opcode " (%%r15,%%r14),%%" #reg "\n" \ - BUNDLEUNLOCK -#define MEMOPMEM(opcode, reg, offset, base, index, scale) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ - #opcode " %%" #reg ",(%%r15,%%r14)\n" \ - BUNDLEUNLOCK -#define MEMOPARG(opcode, offset, base, index, scale, arg) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ - #opcode " (%%r15,%%r14),%" #arg "\n" \ - BUNDLEUNLOCK -#define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ - #opcode " (%%r15,%%r14),%%" #reg1 ",%%" #reg2 "\n" \ - BUNDLEUNLOCK -#define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ - #op " $" #sel ",%%" #reg ",(%%r15,%%r14)\n" \ - BUNDLEUNLOCK -#else // defined(__native_client__) && defined(__x86_64__) -#define NACL_R14 -#define BUNDLEALIGN -#define MEMACCESS(base) "(%" #base ")" -#define MEMACCESS2(offset, base) #offset "(%" #base ")" -#define MEMLEA(offset, base) #offset "(%" #base ")" -#define MEMLEA3(offset, index, scale) \ - #offset "(,%" #index "," #scale ")" -#define MEMLEA4(offset, base, index, scale) \ - #offset "(%" #base ",%" #index "," #scale ")" -#define MEMMOVESTRING(s, d) -#define MEMSTORESTRING(reg, d) -#define MEMOPREG(opcode, offset, base, index, scale, reg) \ - #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n" -#define MEMOPMEM(opcode, reg, offset, base, index, scale) \ - #opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n" -#define MEMOPARG(opcode, offset, base, index, scale, arg) \ - #opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n" -#define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \ - #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg1 ",%%" \ - #reg2 "\n" -#define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \ - #op " $" #sel ",%%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n" -#endif // defined(__native_client__) && defined(__x86_64__) - -#if defined(__arm__) || defined(__aarch64__) -#undef MEMACCESS -#if defined(__native_client__) -#define MEMACCESS(base) ".p2align 3\nbic %" #base ", #0xc0000000\n" -#else -#define MEMACCESS(base) -#endif + +// Intel Code Analizer markers. Insert IACA_START IACA_END around code to be +// measured and then run with iaca -64 libyuv_unittest. +// IACA_ASM_START amd IACA_ASM_END are equivalents that can be used within +// inline assembly blocks. +// example of iaca: +// ~/iaca-lin64/bin/iaca.sh -64 -analysis LATENCY out/Release/libyuv_unittest + +#if defined(__x86_64__) || defined(__i386__) + +#define IACA_ASM_START \ + ".byte 0x0F, 0x0B\n" \ + " movl $111, %%ebx\n" \ + ".byte 0x64, 0x67, 0x90\n" + +#define IACA_ASM_END \ + " movl $222, %%ebx\n" \ + ".byte 0x64, 0x67, 0x90\n" \ + ".byte 0x0F, 0x0B\n" + +#define IACA_SSC_MARK(MARK_ID) \ + __asm__ __volatile__("\n\t movl $" #MARK_ID \ + ", %%ebx" \ + "\n\t .byte 0x64, 0x67, 0x90" \ + : \ + : \ + : "memory"); + +#define IACA_UD_BYTES __asm__ __volatile__("\n\t .byte 0x0F, 0x0B"); + +#else /* Visual C */ +#define IACA_UD_BYTES \ + { __asm _emit 0x0F __asm _emit 0x0B } + +#define IACA_SSC_MARK(x) \ + { __asm mov ebx, x __asm _emit 0x64 __asm _emit 0x67 __asm _emit 0x90 } + +#define IACA_VC64_START __writegsbyte(111, 111); +#define IACA_VC64_END __writegsbyte(222, 222); #endif -void I444ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +#define IACA_START \ + { \ + IACA_UD_BYTES \ + IACA_SSC_MARK(111) \ + } +#define IACA_END \ + { \ + IACA_SSC_MARK(222) \ + IACA_UD_BYTES \ + } + +void I444ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_NEON(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, +void I444AlphaToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I411ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToRGBARow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422AlphaToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGBARow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, +void I422ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, +void I422ToRGB565Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, +void I422ToARGB1555Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void I422ToARGB4444Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void NV12ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, +void NV12ToRGB565Row_NEON(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_NEON(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void YUY2ToARGBRow_NEON(const uint8* src_yuy2, - uint8* dst_argb, +void NV21ToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void UYVYToARGBRow_NEON(const uint8* src_uyvy, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); - -void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width); -void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int width); -void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int width); -void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int width); -void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int width); -void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width); -void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width); -void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width); -void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int width); -void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw, - uint8* dst_u, uint8* dst_v, int width); -void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int width); -void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int width); -void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width); -void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width); -void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width); -void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width); -void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width); -void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width); -void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width); -void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int width); -void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int width); -void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int width); -void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int width); -void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int width); -void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width); -void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width); -void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width); -void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int width); -void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int width); -void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int width); -void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int width); -void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int width); -void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int width); -void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int width); -void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int width); -void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int width); -void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int width); -void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int width); -void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int width); -void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int width); -void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, - int width); -void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, - int width); - -void ARGBToUVRow_AVX2(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_AVX2(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_SSSE3(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_Any_AVX2(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width); -void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width); -void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_Any_NEON(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width); -void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int width); -void RAWToUVRow_Any_NEON(const uint8* src_raw, int src_stride_raw, - uint8* dst_u, uint8* dst_v, int width); -void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int width); -void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555, - int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int width); -void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444, - int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width); -void RGB24ToUVRow_C(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int width); -void RAWToUVRow_C(const uint8* src_raw, int src_stride_raw, - uint8* dst_u, uint8* dst_v, int width); -void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int width); -void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int width); -void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int width); - -void ARGBToUV444Row_SSSE3(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); - -void ARGBToUV444Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV411Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); - -void MirrorRow_AVX2(const uint8* src, uint8* dst, int width); -void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width); -void MirrorRow_NEON(const uint8* src, uint8* dst, int width); -void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width); -void MirrorRow_MSA(const uint8* src, uint8* dst, int width); -void MirrorRow_C(const uint8* src, uint8* dst, int width); -void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width); -void MirrorRow_Any_SSSE3(const uint8* src, uint8* dst, int width); -void MirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width); -void MirrorRow_Any_NEON(const uint8* src, uint8* dst, int width); -void MirrorRow_Any_MSA(const uint8* src, uint8* dst, int width); - -void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); - -void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_MSA(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_Any_NEON(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_Any_MSA(const uint8* src, uint8* dst, int width); - -void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width); -void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void SplitUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void SplitUVRow_Any_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); - -void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); - -void CopyRow_SSE2(const uint8* src, uint8* dst, int count); -void CopyRow_AVX(const uint8* src, uint8* dst, int count); -void CopyRow_ERMS(const uint8* src, uint8* dst, int count); -void CopyRow_NEON(const uint8* src, uint8* dst, int count); -void CopyRow_MIPS(const uint8* src, uint8* dst, int count); -void CopyRow_C(const uint8* src, uint8* dst, int count); -void CopyRow_Any_SSE2(const uint8* src, uint8* dst, int count); -void CopyRow_Any_AVX(const uint8* src, uint8* dst, int count); -void CopyRow_Any_NEON(const uint8* src, uint8* dst, int count); - -void CopyRow_16_C(const uint16* src, uint16* dst, int count); - -void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBCopyAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, - int width); -void ARGBCopyAlphaRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, - int width); - -void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width); -void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width); -void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width); -void ARGBExtractAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_a, - int width); -void ARGBExtractAlphaRow_Any_NEON(const uint8* src_argb, uint8* dst_a, - int width); - -void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width); -void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); -void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width); -void ARGBCopyYToAlphaRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, - int width); -void ARGBCopyYToAlphaRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, - int width); - -void SetRow_C(uint8* dst, uint8 v8, int count); -void SetRow_X86(uint8* dst, uint8 v8, int count); -void SetRow_ERMS(uint8* dst, uint8 v8, int count); -void SetRow_NEON(uint8* dst, uint8 v8, int count); -void SetRow_Any_X86(uint8* dst, uint8 v8, int count); -void SetRow_Any_NEON(uint8* dst, uint8 v8, int count); - -void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int count); -void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count); -void ARGBSetRow_NEON(uint8* dst_argb, uint32 v32, int count); -void ARGBSetRow_Any_NEON(uint8* dst_argb, uint32 v32, int count); - -// ARGBShufflers for BGRAToARGB etc. -void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width); -void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width); -void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width); -void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width); -void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width); -void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width); -void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width); -void ARGBShuffleRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width); -void ARGBShuffleRow_Any_NEON(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int width); - -void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int width); -void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int width); -void RAWToRGB24Row_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width); -void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int width); -void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb, - int width); -void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb, - int width); -void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb, int width); -void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb, - int width); -void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, uint8* dst_argb, - int width); - -void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width); -void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width); -void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width); -void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width); -void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb, - int width); -void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb, - int width); -void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width); -void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width); -void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width); -void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width); -void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, - int width); -void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int width); -void RAWToRGB24Row_Any_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width); - -void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb, - int width); -void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb, - int width); -void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb, - int width); -void RGB565ToARGBRow_Any_AVX2(const uint8* src_rgb565, uint8* dst_argb, - int width); -void ARGB1555ToARGBRow_Any_AVX2(const uint8* src_argb1555, uint8* dst_argb, - int width); -void ARGB4444ToARGBRow_Any_AVX2(const uint8* src_argb4444, uint8* dst_argb, - int width); - -void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb, - int width); -void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int width); -void RAWToRGB24Row_Any_NEON(const uint8* src_raw, uint8* dst_rgb24, int width); -void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb, - int width); -void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb, - int width); -void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb, - int width); - -void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width); - -void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width); -void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width); -void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width); - -void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); - -void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width); - -void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width); - -void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); -void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width); -void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width); -void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width); -void J400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int width); -void J400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int width); -void J400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width); - -void I444ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422AlphaToARGBRow_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I411ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void NV12ToARGBRow_C(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void NV12ToRGB565Row_C(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void NV21ToARGBRow_C(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void YUY2ToARGBRow_C(const uint8* src_yuy2, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void UYVYToARGBRow_C(const uint8* src_uyvy, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToRGBARow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, - const struct YuvConstants* yuvconstants, - int width); -void I422ToRGB24Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, - const struct YuvConstants* yuvconstants, - int width); -void I422ToARGB4444Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void NV12ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, +void NV21ToRGB24Row_NEON(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, - const struct YuvConstants* yuvconstants, - int width); -void I422ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void NV21ToYUV24Row_NEON(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_yuv24, + int width); +void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void UYVYToARGBRow_NEON(const uint8_t* src_uyvy, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToRGBARow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I444ToARGBRow_LSX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); + +void I422ToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGBRow_LASX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I444ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGBARow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGBARow_LASX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I444ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422AlphaToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422AlphaToARGBRow_LASX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGB24Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB24Row_LASX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422AlphaToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB565Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void I411ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB565Row_LASX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB4444Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB4444Row_LASX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB1555Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB1555Row_LASX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB565Row_MSA(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void I411ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void NV21ToARGBRow_MSA(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void YUY2ToARGBRow_MSA(const uint8_t* src_yuy2, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void UYVYToARGBRow_MSA(const uint8_t* src_uyvy, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); + +void NV12ToARGBRow_LSX(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToARGBRow_LASX(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToRGB565Row_LSX(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void NV12ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void NV12ToRGB565Row_AVX2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToRGB565Row_LASX(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void NV21ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV21ToARGBRow_LSX(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToARGBRow_LASX(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, +void YUY2ToARGBRow_LSX(const uint8_t* src_yuy2, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void UYVYToARGBRow_LSX(const uint8_t* src_uyvy, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); + +void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width); +void ARGBToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width); +void ABGRToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width); +void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width); +void ARGBToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width); +void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width); +void RGBAToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYJRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width); +void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width); +void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width); +void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width); +void RGB24ToYRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width); +void RGB24ToYJRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_yj, int width); +void RAWToYRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width); +void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width); +void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_yj, int width); +void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_yj, int width); +void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width); +void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width); +void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width); +void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ARGBToYJRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ARGBToYJRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ARGBToUV444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void UYVYToARGBRow_AVX2(const uint8* src_uyvy, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, +void ARGBToUVRow_NEON(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUV444Row_MSA(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void I422ToRGBARow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, - const struct YuvConstants* yuvconstants, - int width); -void I422ToARGB4444Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void ARGBToUVRow_MSA(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVRow_LASX(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUV444Row_LASX(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_NEON(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void BGRAToUVRow_NEON(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ABGRToUVRow_NEON(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGBAToUVRow_NEON(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVRow_NEON(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVRow_NEON(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVJRow_NEON(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB565ToUVRow_NEON(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444, + int src_stride_argb4444, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_MSA(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void BGRAToUVRow_MSA(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ABGRToUVRow_MSA(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGBAToUVRow_MSA(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVRow_MSA(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVRow_MSA(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB565ToUVRow_MSA(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void BGRAToUVRow_LSX(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ABGRToUVRow_LSX(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGBAToUVRow_LSX(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_LSX(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_LASX(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGB1555ToUVRow_LSX(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGB1555ToUVRow_LASX(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB565ToUVRow_LSX(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB565ToUVRow_LASX(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVRow_LSX(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVRow_LASX(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVRow_LSX(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVRow_LASX(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width); +void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width); +void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width); +void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width); +void RGB24ToYJRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_yj, int width); +void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width); +void RAWToYJRow_NEON(const uint8_t* src_raw, uint8_t* dst_yj, int width); +void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width); +void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, + uint8_t* dst_y, + int width); +void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, + uint8_t* dst_y, + int width); +void BGRAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); +void ABGRToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); +void RGBAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); +void RGB24ToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); +void RAWToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); +void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width); +void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_y, int width); + +void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width); +void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width); +void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width); +void ARGB1555ToYRow_LSX(const uint8_t* src_argb1555, uint8_t* dst_y, int width); +void ARGB1555ToYRow_LASX(const uint8_t* src_argb1555, + uint8_t* dst_y, + int width); +void RGB565ToYRow_LSX(const uint8_t* src_rgb565, uint8_t* dst_y, int width); +void RGB565ToYRow_LASX(const uint8_t* src_rgb565, uint8_t* dst_y, int width); +void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width); +void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width); +void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width); +void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width); + +void ARGBToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void ARGBToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void RGBAToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void BGRAToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void ABGRToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void RGBAToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void RGB24ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void RGB24ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void RAWToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void RAWToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width); +void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width); +void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width); +void ARGBToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void BGRAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ABGRToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYJRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RAWToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void BGRAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ABGRToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB565ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGB1555ToYRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB4444ToYRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void BGRAToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ABGRToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYJRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB565ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGB1555ToYRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); + +void BGRAToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ABGRToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB565ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGB1555ToYRow_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); + +void RGB565ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGB1555ToYRow_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); + +void ARGBToUVRow_AVX2(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ABGRToUVRow_AVX2(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_AVX2(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVRow_SSSE3(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_SSSE3(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void BGRAToUVRow_SSSE3(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ABGRToUVRow_SSSE3(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGBAToUVRow_SSSE3(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVRow_Any_AVX2(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ABGRToUVRow_Any_AVX2(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_Any_AVX2(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVRow_Any_SSSE3(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_Any_SSSE3(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void BGRAToUVRow_Any_SSSE3(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ABGRToUVRow_Any_SSSE3(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGBAToUVRow_Any_SSSE3(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUV444Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUV444Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVRow_Any_LASX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUV444Row_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_Any_NEON(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void BGRAToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ABGRToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGBAToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVJRow_Any_NEON(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVJRow_Any_NEON(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB565ToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGB1555ToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGB4444ToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void BGRAToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ABGRToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGBAToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB565ToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGB1555ToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ABGRToUVRow_Any_LSX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void BGRAToUVRow_Any_LSX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGBAToUVRow_Any_LSX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_Any_LSX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_Any_LASX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGB1555ToUVRow_Any_LSX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGB1555ToUVRow_Any_LASX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB565ToUVRow_Any_LSX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB565ToUVRow_Any_LASX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVRow_Any_LSX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVRow_Any_LASX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVRow_Any_LSX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVRow_Any_LASX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVRow_C(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_C(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVRow_C(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_C(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void BGRAToUVRow_C(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ABGRToUVRow_C(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGBAToUVRow_C(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVRow_C(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVRow_C(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVJRow_C(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVJRow_C(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB565ToUVRow_C(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGB1555ToUVRow_C(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGB4444ToUVRow_C(const uint8_t* src_argb4444, + int src_stride_argb4444, + uint8_t* dst_u, + uint8_t* dst_v, + int width); + +void ARGBToUV444Row_SSSE3(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUV444Row_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); + +void ARGBToUV444Row_C(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); + +void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width); +void MirrorUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_uv, int width); +void MirrorUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_uv, int width); +void MirrorUVRow_MSA(const uint8_t* src_uv, uint8_t* dst_uv, int width); +void MirrorUVRow_LASX(const uint8_t* src_uv, uint8_t* dst_uv, int width); +void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width); +void MirrorUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorUVRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorUVRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorUVRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); + +void MirrorSplitUVRow_SSSE3(const uint8_t* src, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void MirrorSplitUVRow_NEON(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void MirrorSplitUVRow_MSA(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void MirrorSplitUVRow_LSX(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void MirrorSplitUVRow_C(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); + +void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); +void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGBMirrorRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width); +void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width); +void ARGBMirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width); +void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width); +void ARGBMirrorRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBMirrorRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBMirrorRow_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); + +void RGB24MirrorRow_SSSE3(const uint8_t* src_rgb24, + uint8_t* dst_rgb24, + int width); +void RGB24MirrorRow_NEON(const uint8_t* src_rgb24, + uint8_t* dst_rgb24, + int width); +void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width); +void RGB24MirrorRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RGB24MirrorRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); + +void SplitUVRow_C(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void SplitUVRow_SSE2(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void SplitUVRow_AVX2(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void SplitUVRow_NEON(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void SplitUVRow_MSA(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void SplitUVRow_LSX(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void SplitUVRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void SplitUVRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void SplitUVRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void SplitUVRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void SplitUVRow_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void DetileRow_C(const uint8_t* src, + ptrdiff_t src_tile_stride, + uint8_t* dst, + int width); + +void DetileRow_NEON(const uint8_t* src, + ptrdiff_t src_tile_stride, + uint8_t* dst, + int width); +void DetileRow_Any_NEON(const uint8_t* src, + ptrdiff_t src_tile_stride, + uint8_t* dst, + int width); +void DetileRow_SSE2(const uint8_t* src, + ptrdiff_t src_tile_stride, + uint8_t* dst, + int width); +void DetileRow_Any_SSE2(const uint8_t* src, + ptrdiff_t src_tile_stride, + uint8_t* dst, + int width); +void DetileSplitUVRow_C(const uint8_t* src_uv, + ptrdiff_t src_tile_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void DetileSplitUVRow_SSSE3(const uint8_t* src_uv, + ptrdiff_t src_tile_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void DetileSplitUVRow_Any_SSSE3(const uint8_t* src_uv, + ptrdiff_t src_tile_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void DetileSplitUVRow_NEON(const uint8_t* src_uv, + ptrdiff_t src_tile_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void DetileSplitUVRow_Any_NEON(const uint8_t* src_uv, + ptrdiff_t src_tile_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void MergeUVRow_C(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, + int width); +void MergeUVRow_SSE2(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, + int width); +void MergeUVRow_AVX2(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, + int width); +void MergeUVRow_NEON(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, + int width); +void MergeUVRow_MSA(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, + int width); +void MergeUVRow_LSX(const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uv, + int width); +void MergeUVRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void MergeUVRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void MergeUVRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void MergeUVRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void MergeUVRow_Any_LSX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); + +void HalfMergeUVRow_C(const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_uv, + int width); + +void HalfMergeUVRow_NEON(const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_uv, + int width); + +void HalfMergeUVRow_SSSE3(const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_uv, + int width); + +void HalfMergeUVRow_AVX2(const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_uv, + int width); + +void SplitRGBRow_C(const uint8_t* src_rgb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitRGBRow_SSSE3(const uint8_t* src_rgb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitRGBRow_NEON(const uint8_t* src_rgb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitRGBRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitRGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); + +void MergeRGBRow_C(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, + int width); +void MergeRGBRow_SSSE3(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, + int width); +void MergeRGBRow_NEON(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, + int width); +void MergeRGBRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void MergeRGBRow_Any_NEON(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_rgb, + int width); +void MergeARGBRow_C(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + const uint8_t* src_a, + uint8_t* dst_argb, + int width); +void MergeARGBRow_SSE2(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + const uint8_t* src_a, + uint8_t* dst_argb, + int width); +void MergeARGBRow_AVX2(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + const uint8_t* src_a, + uint8_t* dst_argb, + int width); +void MergeARGBRow_NEON(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + const uint8_t* src_a, + uint8_t* dst_argb, + int width); +void MergeARGBRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, + int width); +void MergeARGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, + int width); +void MergeARGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, + int width); +void SplitARGBRow_C(const uint8_t* src_argb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + uint8_t* dst_a, + int width); +void SplitARGBRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + uint8_t* dst_a, + int width); +void SplitARGBRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + uint8_t* dst_a, + int width); +void SplitARGBRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + uint8_t* dst_a, + int width); +void SplitARGBRow_NEON(const uint8_t* src_rgba, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + uint8_t* dst_a, + int width); +void SplitARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + uint8_t* dst_a, + int width); +void SplitARGBRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + uint8_t* dst_a, + int width); +void SplitARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + uint8_t* dst_a, + int width); +void SplitARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + uint8_t* dst_a, + int width); +void MergeXRGBRow_C(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_argb, + int width); +void MergeXRGBRow_SSE2(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_argb, + int width); +void MergeXRGBRow_AVX2(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_argb, + int width); +void MergeXRGBRow_NEON(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_argb, + int width); +void MergeXRGBRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void MergeXRGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void MergeXRGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void SplitXRGBRow_C(const uint8_t* src_argb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitXRGBRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitXRGBRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitXRGBRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitXRGBRow_NEON(const uint8_t* src_rgba, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitXRGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitXRGBRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitXRGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitXRGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); + +void MergeXR30Row_C(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint8_t* dst_ar30, + int depth, + int width); +void MergeAR64Row_C(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + const uint16_t* src_a, + uint16_t* dst_ar64, + int depth, + int width); +void MergeARGB16To8Row_C(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + const uint16_t* src_a, + uint8_t* dst_argb, + int depth, + int width); +void MergeXR64Row_C(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint16_t* dst_ar64, + int depth, + int width); +void MergeXRGB16To8Row_C(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint8_t* dst_argb, + int depth, + int width); +void MergeXR30Row_AVX2(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint8_t* dst_ar30, + int depth, + int width); +void MergeAR64Row_AVX2(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + const uint16_t* src_a, + uint16_t* dst_ar64, + int depth, + int width); +void MergeARGB16To8Row_AVX2(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + const uint16_t* src_a, + uint8_t* dst_argb, + int depth, + int width); +void MergeXR64Row_AVX2(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint16_t* dst_ar64, + int depth, + int width); +void MergeXRGB16To8Row_AVX2(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint8_t* dst_argb, + int depth, + int width); +void MergeXR30Row_NEON(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint8_t* dst_ar30, + int depth, + int width); +void MergeXR30Row_10_NEON(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint8_t* dst_ar30, + int /* depth */, + int width); +void MergeAR64Row_NEON(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + const uint16_t* src_a, + uint16_t* dst_ar64, + int depth, + int width); +void MergeARGB16To8Row_NEON(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + const uint16_t* src_a, + uint8_t* dst_argb, + int depth, + int width); +void MergeXR64Row_NEON(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint16_t* dst_ar64, + int depth, + int width); +void MergeXRGB16To8Row_NEON(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint8_t* dst_argb, + int depth, + int width); +void MergeXR30Row_Any_AVX2(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + uint8_t* dst_ptr, + int depth, + int width); +void MergeAR64Row_Any_AVX2(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + const uint16_t* a_buf, + uint16_t* dst_ptr, + int depth, + int width); +void MergeXR64Row_Any_AVX2(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + uint16_t* dst_ptr, + int depth, + int width); +void MergeARGB16To8Row_Any_AVX2(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + const uint16_t* a_buf, + uint8_t* dst_ptr, + int depth, + int width); +void MergeXRGB16To8Row_Any_AVX2(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + uint8_t* dst_ptr, + int depth, + int width); +void MergeXR30Row_Any_NEON(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + uint8_t* dst_ptr, + int depth, + int width); +void MergeXR30Row_10_Any_NEON(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + uint8_t* dst_ptr, + int depth, + int width); +void MergeAR64Row_Any_NEON(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + const uint16_t* a_buf, + uint16_t* dst_ptr, + int depth, + int width); +void MergeARGB16To8Row_Any_NEON(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + const uint16_t* a_buf, + uint8_t* dst_ptr, + int depth, + int width); +void MergeXR64Row_Any_NEON(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + uint16_t* dst_ptr, + int depth, + int width); +void MergeXRGB16To8Row_Any_NEON(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + uint8_t* dst_ptr, + int depth, + int width); + +void MergeUVRow_16_C(const uint16_t* src_u, + const uint16_t* src_v, + uint16_t* dst_uv, + int depth, + int width); +void MergeUVRow_16_AVX2(const uint16_t* src_u, + const uint16_t* src_v, + uint16_t* dst_uv, + int depth, + int width); +void MergeUVRow_16_Any_AVX2(const uint16_t* src_u, + const uint16_t* src_v, + uint16_t* dst_uv, + int depth, + int width); +void MergeUVRow_16_NEON(const uint16_t* src_u, + const uint16_t* src_v, + uint16_t* dst_uv, + int depth, + int width); +void MergeUVRow_16_Any_NEON(const uint16_t* src_u, + const uint16_t* src_v, + uint16_t* dst_uv, + int depth, + int width); + +void SplitUVRow_16_C(const uint16_t* src_uv, + uint16_t* dst_u, + uint16_t* dst_v, + int depth, + int width); +void SplitUVRow_16_AVX2(const uint16_t* src_uv, + uint16_t* dst_u, + uint16_t* dst_v, + int depth, + int width); +void SplitUVRow_16_Any_AVX2(const uint16_t* src_uv, + uint16_t* dst_u, + uint16_t* dst_v, + int depth, + int width); +void SplitUVRow_16_NEON(const uint16_t* src_uv, + uint16_t* dst_u, + uint16_t* dst_v, + int depth, + int width); +void SplitUVRow_16_Any_NEON(const uint16_t* src_uv, + uint16_t* dst_u, + uint16_t* dst_v, + int depth, + int width); + +void MultiplyRow_16_C(const uint16_t* src_y, + uint16_t* dst_y, + int scale, + int width); +void MultiplyRow_16_AVX2(const uint16_t* src_y, + uint16_t* dst_y, + int scale, + int width); +void MultiplyRow_16_Any_AVX2(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int scale, + int width); +void MultiplyRow_16_NEON(const uint16_t* src_y, + uint16_t* dst_y, + int scale, + int width); +void MultiplyRow_16_Any_NEON(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int scale, + int width); + +void DivideRow_16_C(const uint16_t* src_y, + uint16_t* dst_y, + int scale, + int width); +void DivideRow_16_AVX2(const uint16_t* src_y, + uint16_t* dst_y, + int scale, + int width); +void DivideRow_16_Any_AVX2(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int scale, + int width); +void DivideRow_16_NEON(const uint16_t* src_y, + uint16_t* dst_y, + int scale, + int width); +void DivideRow_16_Any_NEON(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int scale, + int width); + +void Convert8To16Row_C(const uint8_t* src_y, + uint16_t* dst_y, + int scale, + int width); +void Convert8To16Row_SSE2(const uint8_t* src_y, + uint16_t* dst_y, + int scale, + int width); +void Convert8To16Row_AVX2(const uint8_t* src_y, + uint16_t* dst_y, + int scale, + int width); +void Convert8To16Row_Any_SSE2(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int scale, + int width); +void Convert8To16Row_Any_AVX2(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int scale, + int width); + +void Convert16To8Row_C(const uint16_t* src_y, + uint8_t* dst_y, + int scale, + int width); +void Convert16To8Row_SSSE3(const uint16_t* src_y, + uint8_t* dst_y, + int scale, + int width); +void Convert16To8Row_AVX2(const uint16_t* src_y, + uint8_t* dst_y, + int scale, + int width); +void Convert16To8Row_Any_SSSE3(const uint16_t* src_ptr, + uint8_t* dst_ptr, + int scale, + int width); +void Convert16To8Row_Any_AVX2(const uint16_t* src_ptr, + uint8_t* dst_ptr, + int scale, + int width); + +void CopyRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width); +void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width); +void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width); +void CopyRow_MIPS(const uint8_t* src, uint8_t* dst, int count); +void CopyRow_C(const uint8_t* src, uint8_t* dst, int count); +void CopyRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void CopyRow_Any_AVX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void CopyRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); + +void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count); + +void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width); +void ARGBCopyAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGBCopyAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width); +void ARGBCopyAlphaRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBCopyAlphaRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); + +void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width); +void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_a, + int width); +void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_a, + int width); +void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, + uint8_t* dst_a, + int width); +void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb, + uint8_t* dst_a, + int width); +void ARGBExtractAlphaRow_LSX(const uint8_t* src_argb, + uint8_t* dst_a, + int width); +void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBExtractAlphaRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBExtractAlphaRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBExtractAlphaRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBExtractAlphaRow_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); + +void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width); +void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width); +void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBCopyYToAlphaRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); + +void SetRow_C(uint8_t* dst, uint8_t v8, int width); +void SetRow_MSA(uint8_t* dst, uint8_t v8, int width); +void SetRow_X86(uint8_t* dst, uint8_t v8, int width); +void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width); +void SetRow_NEON(uint8_t* dst, uint8_t v8, int width); +void SetRow_LSX(uint8_t* dst, uint8_t v8, int width); +void SetRow_Any_X86(uint8_t* dst_ptr, uint8_t v32, int width); +void SetRow_Any_NEON(uint8_t* dst_ptr, uint8_t v32, int width); +void SetRow_Any_LSX(uint8_t* dst_ptr, uint8_t v32, int width); + +void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width); +void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int width); +void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width); +void ARGBSetRow_Any_NEON(uint8_t* dst_ptr, uint32_t v32, int width); +void ARGBSetRow_MSA(uint8_t* dst_argb, uint32_t v32, int width); +void ARGBSetRow_Any_MSA(uint8_t* dst_ptr, uint32_t v32, int width); +void ARGBSetRow_LSX(uint8_t* dst_argb, uint32_t v32, int width); +void ARGBSetRow_Any_LSX(uint8_t* dst_ptr, uint32_t v32, int width); + +// ARGBShufflers for BGRAToARGB etc. +void ARGBShuffleRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width); +void ARGBShuffleRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width); +void ARGBShuffleRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width); +void ARGBShuffleRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width); +void ARGBShuffleRow_MSA(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width); +void ARGBShuffleRow_LASX(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width); +void ARGBShuffleRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint8_t* param, + int width); +void ARGBShuffleRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint8_t* param, + int width); +void ARGBShuffleRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint8_t* param, + int width); +void ARGBShuffleRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint8_t* param, + int width); +void ARGBShuffleRow_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint8_t* param, + int width); + +void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24, + uint8_t* dst_argb, + int width); +void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToRGBARow_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgba, int width); +void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); +void RGB565ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGB1555ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGB4444ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width); +void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555, + uint8_t* dst_argb, + int width); +void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444, + uint8_t* dst_argb, + int width); + +void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, + uint8_t* dst_argb, + int width); +void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); +void RGB24ToARGBRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); +void RGB24ToARGBRow_LASX(const uint8_t* src_rgb24, + uint8_t* dst_argb, + int width); +void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToRGBARow_NEON(const uint8_t* src_raw, uint8_t* dst_rgba, int width); +void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToARGBRow_LSX(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToARGBRow_LASX(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); +void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); +void RAWToRGB24Row_LSX(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); +void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width); +void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width); +void RGB565ToARGBRow_LSX(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width); +void RGB565ToARGBRow_LASX(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width); +void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555, + uint8_t* dst_argb, + int width); +void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555, + uint8_t* dst_argb, + int width); +void ARGB1555ToARGBRow_LSX(const uint8_t* src_argb1555, + uint8_t* dst_argb, + int width); +void ARGB1555ToARGBRow_LASX(const uint8_t* src_argb1555, + uint8_t* dst_argb, + int width); +void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444, + uint8_t* dst_argb, + int width); +void ARGB4444ToARGBRow_MSA(const uint8_t* src_argb4444, + uint8_t* dst_argb, + int width); +void ARGB4444ToARGBRow_LSX(const uint8_t* src_argb4444, + uint8_t* dst_argb, + int width); +void ARGB4444ToARGBRow_LASX(const uint8_t* src_argb4444, + uint8_t* dst_argb, + int width); +void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); +void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToRGBARow_C(const uint8_t* src_raw, uint8_t* dst_rgba, int width); +void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); +void RGB565ToARGBRow_C(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); +void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555, + uint8_t* dst_argb, + int width); +void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444, + uint8_t* dst_argb, + int width); +void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width); +void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width); +void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width); +void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width); + +void RGB24ToARGBRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RAWToARGBRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RAWToRGBARow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RAWToRGB24Row_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); + +void RGB565ToARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB1555ToARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB4444ToARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RGB565ToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB1555ToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB4444ToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); + +void RGB24ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RGB24ToARGBRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RGB24ToARGBRow_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RGB24ToARGBRow_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RAWToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToRGBARow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToARGBRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToARGBRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToRGB24Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RAWToRGB24Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToRGB24Row_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB565ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RGB565ToARGBRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RGB565ToARGBRow_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RGB565ToARGBRow_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB1555ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB1555ToARGBRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB4444ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB1555ToARGBRow_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB1555ToARGBRow_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); + +void ARGB4444ToARGBRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB4444ToARGBRow_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB4444ToARGBRow_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); + +void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); +void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width); +void ARGBToRGB565Row_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGBToARGB1555Row_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGBToARGB4444Row_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ABGRToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); +void ARGBToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); + +void ARGBToRAWRow_AVX2(const uint8_t* src, uint8_t* dst, int width); +void ARGBToRGB24Row_AVX2(const uint8_t* src, uint8_t* dst, int width); + +void ARGBToRGB24Row_AVX512VBMI(const uint8_t* src, uint8_t* dst, int width); + +void ARGBToRGB565DitherRow_C(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, + int width); +void ARGBToRGB565DitherRow_SSE2(const uint8_t* src, + uint8_t* dst, + const uint32_t dither4, + int width); +void ARGBToRGB565DitherRow_AVX2(const uint8_t* src, + uint8_t* dst, + const uint32_t dither4, + int width); + +void ARGBToRGB565Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToARGB1555Row_AVX2(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width); +void ARGBToARGB4444Row_AVX2(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width); +void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width); +void ARGBToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width); + +void ARGBToRGB24Row_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb24, + int width); +void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width); +void ARGBToRGB565Row_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb565, + int width); +void ARGBToARGB1555Row_NEON(const uint8_t* src_argb, + uint8_t* dst_argb1555, + int width); +void ARGBToARGB4444Row_NEON(const uint8_t* src_argb, + uint8_t* dst_argb4444, + int width); +void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, + int width); +void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToARGB1555Row_MSA(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width); +void ARGBToARGB4444Row_MSA(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width); +void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, + int width); +void ARGBToRGB565DitherRow_LASX(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, + int width); + +void ARGBToRGB24Row_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRAWRow_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRGB565Row_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToARGB1555Row_LASX(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width); +void ARGBToARGB4444Row_LASX(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width); + +void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width); +void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width); + +void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width); +void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width); +void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width); +void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width); +void AR64ShuffleRow_C(const uint8_t* src_ar64, + uint8_t* dst_ar64, + const uint8_t* shuffler, + int width); +void ARGBToAR64Row_SSSE3(const uint8_t* src_argb, + uint16_t* dst_ar64, + int width); +void ARGBToAB64Row_SSSE3(const uint8_t* src_argb, + uint16_t* dst_ab64, + int width); +void AR64ToARGBRow_SSSE3(const uint16_t* src_ar64, + uint8_t* dst_argb, + int width); +void AB64ToARGBRow_SSSE3(const uint16_t* src_ab64, + uint8_t* dst_argb, + int width); +void ARGBToAR64Row_AVX2(const uint8_t* src_argb, uint16_t* dst_ar64, int width); +void ARGBToAB64Row_AVX2(const uint8_t* src_argb, uint16_t* dst_ab64, int width); +void AR64ToARGBRow_AVX2(const uint16_t* src_ar64, uint8_t* dst_argb, int width); +void AB64ToARGBRow_AVX2(const uint16_t* src_ab64, uint8_t* dst_argb, int width); +void ARGBToAR64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ar64, int width); +void ARGBToAB64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ab64, int width); +void AR64ToARGBRow_NEON(const uint16_t* src_ar64, uint8_t* dst_argb, int width); +void AB64ToARGBRow_NEON(const uint16_t* src_ab64, uint8_t* dst_argb, int width); +void ARGBToAR64Row_Any_SSSE3(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int width); +void ARGBToAB64Row_Any_SSSE3(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int width); +void AR64ToARGBRow_Any_SSSE3(const uint16_t* src_ptr, + uint8_t* dst_ptr, + int width); +void AB64ToARGBRow_Any_SSSE3(const uint16_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToAR64Row_Any_AVX2(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int width); +void ARGBToAB64Row_Any_AVX2(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int width); +void AR64ToARGBRow_Any_AVX2(const uint16_t* src_ptr, + uint8_t* dst_ptr, + int width); +void AB64ToARGBRow_Any_AVX2(const uint16_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToAR64Row_Any_NEON(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int width); +void ARGBToAB64Row_Any_NEON(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int width); +void AR64ToARGBRow_Any_NEON(const uint16_t* src_ptr, + uint8_t* dst_ptr, + int width); +void AB64ToARGBRow_Any_NEON(const uint16_t* src_ptr, + uint8_t* dst_ptr, + int width); + +void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width); +void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width); +void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width); +void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width); +void J400ToARGBRow_LSX(const uint8_t* src_y, uint8_t* dst_argb, int width); +void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width); +void J400ToARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void J400ToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void J400ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void J400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void J400ToARGBRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); + +void I444ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I422ToAR30Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I210ToAR30Row_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I210ToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I212ToAR30Row_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I212ToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I410ToAR30Row_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I410ToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I210AlphaToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + const uint16_t* src_a, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I410AlphaToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + const uint16_t* src_a, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I444AlphaToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I422AlphaToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB565Row_C(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB24Row_C(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToRGB24Row_C(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToYUV24Row_C(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_yuv24, + int width); +void YUY2ToARGBRow_C(const uint8_t* src_yuy2, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void UYVYToARGBRow_C(const uint8_t* src_uyvy, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void P210ToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_uv, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void P410ToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_uv, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void P210ToAR30Row_C(const uint16_t* src_y, + const uint16_t* src_uv, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void P410ToAR30Row_C(const uint16_t* src_y, + const uint16_t* src_uv, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); + +void I422ToRGBARow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGB24Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB4444Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB1555Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGB565Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGBRow_AVX512BW(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGBARow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I444ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I444ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); + +void I422ToAR30Row_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I210ToAR30Row_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I210ToARGBRow_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I212ToAR30Row_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I212ToARGBRow_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I410ToAR30Row_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I410ToARGBRow_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I210AlphaToARGBRow_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + const uint16_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I410AlphaToARGBRow_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + const uint16_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422ToAR30Row_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I210ToARGBRow_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I210ToAR30Row_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I212ToARGBRow_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I212ToAR30Row_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I410ToAR30Row_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I410ToARGBRow_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I210AlphaToARGBRow_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + const uint16_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I410AlphaToARGBRow_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + const uint16_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I444AlphaToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I444AlphaToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I422AlphaToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void NV12ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB24Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToRGB24Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB565Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB24Row_AVX2(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToRGB24Row_AVX2(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_rgb24, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToYUV24Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_yuv24, + int width); +void NV21ToYUV24Row_AVX2(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_yuv24, + int width); +void NV12ToRGB565Row_AVX2(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* vu_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* vu_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void YUY2ToARGBRow_SSSE3(const uint8_t* yuy2_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void UYVYToARGBRow_SSSE3(const uint8_t* uyvy_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void YUY2ToARGBRow_AVX2(const uint8_t* yuy2_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void UYVYToARGBRow_AVX2(const uint8_t* uyvy_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); + +void P210ToARGBRow_SSSE3(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void P410ToARGBRow_SSSE3(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void P210ToAR30Row_SSSE3(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void P410ToAR30Row_SSSE3(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void P210ToARGBRow_AVX2(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void P410ToARGBRow_AVX2(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void P210ToAR30Row_AVX2(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void P410ToAR30Row_AVX2(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); + +void I422ToRGBARow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgba, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB4444Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB4444Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGB1555Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGB1555Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB565Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB565Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, +void I422ToRGB24Row_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, +void I422ToRGB24Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGBARow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_Any_AVX512BW(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGBARow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToAR30Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I210ToAR30Row_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I210ToARGBRow_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I212ToAR30Row_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I212ToARGBRow_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_Any_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, +void I410ToAR30Row_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I410ToARGBRow_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I210AlphaToARGBRow_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + const uint16_t* a_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I410AlphaToARGBRow_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + const uint16_t* a_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToAR30Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I210ToARGBRow_Any_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I210ToAR30Row_Any_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I212ToARGBRow_Any_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I212ToAR30Row_Any_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I410ToAR30Row_Any_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I410ToARGBRow_Any_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I210AlphaToARGBRow_Any_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + const uint16_t* a_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I410AlphaToARGBRow_Any_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + const uint16_t* a_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I444AlphaToARGBRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I444AlphaToARGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422AlphaToARGBRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_Any_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - const uint8* a_buf, - uint8* dst_argb, +void I422AlphaToARGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I411ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void NV12ToARGBRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I411ToARGBRow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void NV12ToARGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV21ToARGBRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_Any_AVX2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV21ToARGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, +void NV12ToRGB24Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToRGB24Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB24Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_Any_AVX2(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV21ToRGB24Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToYUV24Row_Any_SSSE3(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_yuv24, + int width); +void NV21ToYUV24Row_Any_AVX2(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_yuv24, + int width); +void NV12ToRGB565Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV12ToRGB565Row_Any_AVX2(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToRGB565Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2, - uint8* dst_argb, +void YUY2ToARGBRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void UYVYToARGBRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void YUY2ToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void UYVYToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void P210ToARGBRow_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy, - uint8* dst_argb, +void P410ToARGBRow_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void YUY2ToARGBRow_Any_AVX2(const uint8* src_yuy2, - uint8* dst_argb, +void P210ToAR30Row_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void P410ToAR30Row_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void P210ToARGBRow_Any_AVX2(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void P410ToARGBRow_Any_AVX2(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void P210ToAR30Row_Any_AVX2(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void UYVYToARGBRow_Any_AVX2(const uint8* src_uyvy, - uint8* dst_argb, +void P410ToAR30Row_Any_AVX2(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGBARow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToRGBARow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToARGB4444Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToARGB4444Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToARGB1555Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToARGB1555Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToRGB565Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToRGB565Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB24Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB24Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width); -void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); -void I400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width); -void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width); -void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int width); -void I400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int width); -void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width); +void I400ToARGBRow_C(const uint8_t* src_y, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I400ToARGBRow_SSE2(const uint8_t* y_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I400ToARGBRow_AVX2(const uint8_t* y_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I400ToARGBRow_NEON(const uint8_t* src_y, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I400ToARGBRow_MSA(const uint8_t* src_y, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I400ToARGBRow_LSX(const uint8_t* src_y, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const struct YuvConstants* param, + int width); +void I400ToARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const struct YuvConstants* param, + int width); +void I400ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const struct YuvConstants* param, + int width); +void I400ToARGBRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I400ToARGBRow_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); // ARGB preattenuated alpha blend. -void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBBlendRow_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); +void ARGBBlendRow_SSSE3(const uint8_t* src_argb, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBBlendRow_NEON(const uint8_t* src_argb, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBBlendRow_MSA(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBBlendRow_LSX(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBBlendRow_C(const uint8_t* src_argb, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); // Unattenuated planar alpha blend. -void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1, - const uint8* alpha, uint8* dst, int width); -void BlendPlaneRow_Any_SSSE3(const uint8* src0, const uint8* src1, - const uint8* alpha, uint8* dst, int width); -void BlendPlaneRow_AVX2(const uint8* src0, const uint8* src1, - const uint8* alpha, uint8* dst, int width); -void BlendPlaneRow_Any_AVX2(const uint8* src0, const uint8* src1, - const uint8* alpha, uint8* dst, int width); -void BlendPlaneRow_C(const uint8* src0, const uint8* src1, - const uint8* alpha, uint8* dst, int width); +void BlendPlaneRow_SSSE3(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, + int width); +void BlendPlaneRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void BlendPlaneRow_AVX2(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, + int width); +void BlendPlaneRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void BlendPlaneRow_C(const uint8_t* src0, + const uint8_t* src1, + const uint8_t* alpha, + uint8_t* dst, + int width); // ARGB multiply images. Same API as Blend, but these require // pointer and width alignment for SSE2. -void ARGBMultiplyRow_C(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); +void ARGBMultiplyRow_C(const uint8_t* src_argb, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBMultiplyRow_SSE2(const uint8_t* src_argb, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBMultiplyRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBMultiplyRow_AVX2(const uint8_t* src_argb, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBMultiplyRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBMultiplyRow_NEON(const uint8_t* src_argb, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBMultiplyRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBMultiplyRow_MSA(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBMultiplyRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBMultiplyRow_LASX(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBMultiplyRow_Any_LASX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); // ARGB add images. -void ARGBAddRow_C(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); +void ARGBAddRow_C(const uint8_t* src_argb, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBAddRow_SSE2(const uint8_t* src_argb, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBAddRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBAddRow_AVX2(const uint8_t* src_argb, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBAddRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBAddRow_NEON(const uint8_t* src_argb, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBAddRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBAddRow_MSA(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBAddRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBAddRow_LASX(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBAddRow_Any_LASX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); + +// ARGB subtract images. Same API as Blend, but these require +// pointer and width alignment for SSE2. +void ARGBSubtractRow_C(const uint8_t* src_argb, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBSubtractRow_SSE2(const uint8_t* src_argb, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBSubtractRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBSubtractRow_AVX2(const uint8_t* src_argb, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBSubtractRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBSubtractRow_NEON(const uint8_t* src_argb, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBSubtractRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBSubtractRow_MSA(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBSubtractRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void ARGBSubtractRow_LASX(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBSubtractRow_Any_LASX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); + +void ARGBToRGB24Row_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRAWRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRGB565Row_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB1555Row_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB4444Row_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ABGRToAR30Row_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToAR30Row_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRAWRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToRGB24Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRGB24Row_Any_AVX512VBMI(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRGB565DitherRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint32_t param, + int width); +void ARGBToRGB565DitherRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint32_t param, + int width); -// ARGB subtract images. Same API as Blend, but these require -// pointer and width alignment for SSE2. -void ARGBSubtractRow_C(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); - -void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, +void ARGBToRGB565Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB1555Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, +void ARGBToARGB4444Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); +void ABGRToAR30Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToAR30Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); -void ARGBToRGB565DitherRow_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width); -void ARGBToRGB565DitherRow_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width); - -void ARGBToRGB565Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, +void ARGBToRGB24Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRAWRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToRGB565Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB1555Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBToARGB4444Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, +void ARGBToARGB4444Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); +void ARGBToRGB565DitherRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint32_t param, + int width); +void ARGBToRGB24Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRAWRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToRGB565Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB1555Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB4444Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRGB565DitherRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint32_t param, + int width); +void ARGBToRGB565DitherRow_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint32_t param, + int width); -void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width); -void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, +void ARGBToRGB24Row_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToRAWRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToRGB565Row_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB1555Row_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, +void ARGBToARGB4444Row_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBToRGB565DitherRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, - const uint32 dither4, int width); -void I444ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I444ToARGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422AlphaToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - const uint8* src_a, - uint8* dst_argb, +void I444AlphaToARGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I411ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I422ToRGBARow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422AlphaToARGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGBARow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB24Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB4444Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGB4444Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGB1555Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGB1555Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB565Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToRGB565Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV12ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToARGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV21ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, +void NV21ToARGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV12ToRGB565Row_Any_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void NV12ToRGB24Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToRGB24Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToYUV24Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void NV12ToRGB565Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2, - uint8* dst_argb, +void YUY2ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy, - uint8* dst_argb, +void UYVYToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, +void P210ToARGBRow_NEON(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void P410ToARGBRow_NEON(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void P210ToAR30Row_NEON(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void P410ToAR30Row_NEON(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void P210ToARGBRow_Any_NEON(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void P410ToARGBRow_Any_NEON(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void P210ToAR30Row_Any_NEON(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void P410ToAR30Row_Any_NEON(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I444ToARGBRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I444ToARGBRow_Any_LSX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGBRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGBRow_Any_LASX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGBARow_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGBARow_Any_LASX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422AlphaToARGBRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422AlphaToARGBRow_Any_LASX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGB24Row_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGB24Row_Any_LASX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGB565Row_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGB565Row_Any_LASX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB4444Row_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB4444Row_Any_LASX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB1555Row_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB1555Row_Any_LASX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToARGBRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB565Row_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToARGBRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void YUY2ToARGBRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void UYVYToARGBRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); + +void NV12ToARGBRow_Any_LSX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToARGBRow_Any_LASX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB565Row_Any_LSX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB565Row_Any_LASX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToARGBRow_Any_LSX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToARGBRow_Any_LASX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void YUY2ToARGBRow_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void UYVYToARGBRow_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); + +void YUY2ToYRow_AVX2(const uint8_t* src_yuy2, uint8_t* dst_y, int width); +void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, int width); -void I422ToARGBRow_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - const struct YuvConstants* yuvconstants, +void YUY2ToYRow_SSE2(const uint8_t* src_yuy2, uint8_t* dst_y, int width); +void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width); +void YUY2ToUVRow_NEON(const uint8_t* src_yuy2, + int stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width); +void YUY2ToYRow_LASX(const uint8_t* src_yuy2, uint8_t* dst_y, int width); +void YUY2ToUVRow_MSA(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUVRow_LASX(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUV422Row_LASX(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width); +void YUY2ToUVRow_C(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUV422Row_C(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void YUY2ToUVRow_Any_AVX2(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUV422Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void YUY2ToUVRow_Any_SSE2(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUV422Row_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void YUY2ToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUV422Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void YUY2ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void YUY2ToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUVRow_Any_LASX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUV422Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void YUY2ToUV422Row_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToUVRow_AVX2(const uint8_t* src_uyvy, + int stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToYRow_SSE2(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToUVRow_SSE2(const uint8_t* src_uyvy, + int stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_SSE2(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToUVRow_AVX2(const uint8_t* src_uyvy, + int stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToUVRow_NEON(const uint8_t* src_uyvy, + int stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_NEON(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, int width); +void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToYRow_LASX(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToUVRow_MSA(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUVRow_LASX(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_MSA(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_LASX(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); + +void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToUVRow_C(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_C(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void UYVYToUVRow_Any_AVX2(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void UYVYToUVRow_Any_SSE2(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void UYVYToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void UYVYToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void UYVYToUVRow_Any_MSA(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUVRow_Any_LASX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void UYVYToUV422Row_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width); +void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width); +void SwapUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void SwapUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_vu, int width); +void SwapUVRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void SwapUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_vu, int width); +void SwapUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width); +void AYUVToUVRow_C(const uint8_t* src_ayuv, + int src_stride_ayuv, + uint8_t* dst_uv, + int width); +void AYUVToVURow_C(const uint8_t* src_ayuv, + int src_stride_ayuv, + uint8_t* dst_vu, + int width); +void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width); +void AYUVToUVRow_NEON(const uint8_t* src_ayuv, + int src_stride_ayuv, + uint8_t* dst_uv, + int width); +void AYUVToVURow_NEON(const uint8_t* src_ayuv, + int src_stride_ayuv, + uint8_t* dst_vu, + int width); +void AYUVToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void AYUVToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_vu, + int width); +void AYUVToVURow_Any_NEON(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_vu, + int width); -void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToUV422Row_NEON(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToUV422Row_C(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int width); -void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToUV422Row_AVX2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToUV422Row_SSE2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToUV422Row_AVX2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToUV422Row_NEON(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width); - -void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToUV422Row_C(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int width); -void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int width); -void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width); - -void I422ToYUY2Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width); -void I422ToUYVYRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width); -void I422ToYUY2Row_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width); -void I422ToUYVYRow_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width); -void I422ToYUY2Row_Any_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width); -void I422ToUYVYRow_Any_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width); -void I422ToYUY2Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width); -void I422ToUYVYRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width); -void I422ToYUY2Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width); -void I422ToUYVYRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width); +void I422ToYUY2Row_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_frame, + int width); +void I422ToUYVYRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_frame, + int width); +void I422ToYUY2Row_SSE2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, + int width); +void I422ToUYVYRow_SSE2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, + int width); +void I422ToYUY2Row_Any_SSE2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToUYVYRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToYUY2Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, + int width); +void I422ToUYVYRow_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, + int width); +void I422ToYUY2Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToUYVYRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToYUY2Row_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, + int width); +void I422ToUYVYRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, + int width); +void I422ToYUY2Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToUYVYRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToYUY2Row_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, + int width); +void I422ToYUY2Row_LASX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, + int width); +void I422ToUYVYRow_MSA(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, + int width); +void I422ToUYVYRow_LASX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, + int width); +void I422ToYUY2Row_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToYUY2Row_Any_LASX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToUYVYRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void I422ToUYVYRow_Any_LASX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); // Effects related row functions. -void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, - int width); -void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb, +void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width); +void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBAttenuateRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBAttenuateRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBAttenuateRow_MSA(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBAttenuateRow_LASX(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, +void ARGBAttenuateRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb, +void ARGBAttenuateRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBAttenuateRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBAttenuateRow_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); // Inverse table for unattenuate, shared by C and SSE2. -extern const uint32 fixed_invtbl8[256]; -void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, +extern const uint32_t fixed_invtbl8[256]; +void ARGBUnattenuateRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); +void ARGBUnattenuateRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, +void ARGBUnattenuateRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, int width); -void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width); - -void ARGBSepiaRow_C(uint8* dst_argb, int width); -void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width); -void ARGBSepiaRow_NEON(uint8* dst_argb, int width); - -void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width); -void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width); -void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width); - -void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width); -void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width); - -void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width); -void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width); - -void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size, - int interval_offset, int width); -void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, - int interval_offset, int width); -void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size, - int interval_offset, int width); - -void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width, - uint32 value); -void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, - uint32 value); -void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width, - uint32 value); +void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width); +void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width); +void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width); +void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width); +void ARGBGrayRow_LASX(const uint8_t* src_argb, uint8_t* dst_argb, int width); + +void ARGBSepiaRow_C(uint8_t* dst_argb, int width); +void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width); +void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width); +void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width); +void ARGBSepiaRow_LASX(uint8_t* dst_argb, int width); + +void ARGBColorMatrixRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, + int width); +void ARGBColorMatrixRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, + int width); +void ARGBColorMatrixRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, + int width); +void ARGBColorMatrixRow_MSA(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, + int width); +void ARGBColorMatrixRow_LSX(const uint8_t* src_argb, + uint8_t* dst_argb, + const int8_t* matrix_argb, + int width); + +void ARGBColorTableRow_C(uint8_t* dst_argb, + const uint8_t* table_argb, + int width); +void ARGBColorTableRow_X86(uint8_t* dst_argb, + const uint8_t* table_argb, + int width); + +void RGBColorTableRow_C(uint8_t* dst_argb, + const uint8_t* table_argb, + int width); +void RGBColorTableRow_X86(uint8_t* dst_argb, + const uint8_t* table_argb, + int width); + +void ARGBQuantizeRow_C(uint8_t* dst_argb, + int scale, + int interval_size, + int interval_offset, + int width); +void ARGBQuantizeRow_SSE2(uint8_t* dst_argb, + int scale, + int interval_size, + int interval_offset, + int width); +void ARGBQuantizeRow_NEON(uint8_t* dst_argb, + int scale, + int interval_size, + int interval_offset, + int width); +void ARGBQuantizeRow_MSA(uint8_t* dst_argb, + int scale, + int interval_size, + int interval_offset, + int width); +void ARGBQuantizeRow_LSX(uint8_t* dst_argb, + int scale, + int interval_size, + int interval_offset, + int width); + +void ARGBShadeRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + uint32_t value); +void ARGBShadeRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + uint32_t value); +void ARGBShadeRow_NEON(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + uint32_t value); +void ARGBShadeRow_MSA(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + uint32_t value); +void ARGBShadeRow_LASX(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + uint32_t value); // Used for blur. -void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, - int width, int area, uint8* dst, int count); -void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, - const int32* previous_cumsum, int width); +void CumulativeSumToAverageRow_SSE2(const int32_t* topleft, + const int32_t* botleft, + int width, + int area, + uint8_t* dst, + int count); +void ComputeCumulativeSumRow_SSE2(const uint8_t* row, + int32_t* cumsum, + const int32_t* previous_cumsum, + int width); -void CumulativeSumToAverageRow_C(const int32* topleft, const int32* botleft, - int width, int area, uint8* dst, int count); -void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum, - const int32* previous_cumsum, int width); +void CumulativeSumToAverageRow_C(const int32_t* tl, + const int32_t* bl, + int w, + int area, + uint8_t* dst, + int count); +void ComputeCumulativeSumRow_C(const uint8_t* row, + int32_t* cumsum, + const int32_t* previous_cumsum, + int width); LIBYUV_API -void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, - uint8* dst_argb, const float* uv_dudv, int width); +void ARGBAffineRow_C(const uint8_t* src_argb, + int src_argb_stride, + uint8_t* dst_argb, + const float* uv_dudv, + int width); LIBYUV_API -void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, - uint8* dst_argb, const float* uv_dudv, int width); +void ARGBAffineRow_SSE2(const uint8_t* src_argb, + int src_argb_stride, + uint8_t* dst_argb, + const float* src_dudv, + int width); // Used for I420Scale, ARGBScale, and ARGBInterpolate. -void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, - int width, int source_y_fraction); -void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, +void InterpolateRow_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int width, + int source_y_fraction); +void InterpolateRow_SSSE3(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, int source_y_fraction); -void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, +void InterpolateRow_AVX2(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, int source_y_fraction); -void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, +void InterpolateRow_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, int source_y_fraction); -void InterpolateRow_DSPR2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, +void InterpolateRow_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int width, + int source_y_fraction); +void InterpolateRow_LSX(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int width, + int source_y_fraction); +void InterpolateRow_Any_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride_ptr, + int width, int source_y_fraction); -void InterpolateRow_Any_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, +void InterpolateRow_Any_SSSE3(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride_ptr, + int width, int source_y_fraction); -void InterpolateRow_Any_AVX2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, +void InterpolateRow_Any_AVX2(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride_ptr, + int width, int source_y_fraction); -void InterpolateRow_Any_DSPR2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); +void InterpolateRow_Any_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride_ptr, + int width, + int source_y_fraction); +void InterpolateRow_Any_LSX(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride_ptr, + int width, + int source_y_fraction); -void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr, - ptrdiff_t src_stride_ptr, - int width, int source_y_fraction); +void InterpolateRow_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, + ptrdiff_t src_stride, + int width, + int source_y_fraction); // Sobel images. -void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2, - uint8* dst_sobelx, int width); -void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1, - const uint8* src_y2, uint8* dst_sobelx, int width); -void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1, - const uint8* src_y2, uint8* dst_sobelx, int width); -void SobelYRow_C(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width); -void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width); -void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width); -void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width); -void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width); -void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width); -void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelToPlaneRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width); -void SobelToPlaneRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width); -void SobelXYRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelXYRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); - -void ARGBPolynomialRow_C(const uint8* src_argb, - uint8* dst_argb, const float* poly, - int width); -void ARGBPolynomialRow_SSE2(const uint8* src_argb, - uint8* dst_argb, const float* poly, - int width); -void ARGBPolynomialRow_AVX2(const uint8* src_argb, - uint8* dst_argb, const float* poly, +void SobelXRow_C(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, + int width); +void SobelXRow_SSE2(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, + int width); +void SobelXRow_NEON(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, + int width); +void SobelXRow_MSA(const uint8_t* src_y0, + const uint8_t* src_y1, + const uint8_t* src_y2, + uint8_t* dst_sobelx, + int width); +void SobelYRow_C(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, + int width); +void SobelYRow_SSE2(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, + int width); +void SobelYRow_NEON(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, + int width); +void SobelYRow_MSA(const uint8_t* src_y0, + const uint8_t* src_y1, + uint8_t* dst_sobely, + int width); +void SobelRow_C(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width); +void SobelRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width); +void SobelRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width); +void SobelRow_MSA(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width); +void SobelRow_LSX(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width); +void SobelToPlaneRow_C(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, + int width); +void SobelToPlaneRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, + int width); +void SobelToPlaneRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, + int width); +void SobelToPlaneRow_MSA(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, + int width); +void SobelToPlaneRow_LSX(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_y, + int width); +void SobelXYRow_C(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width); +void SobelXYRow_SSE2(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width); +void SobelXYRow_NEON(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width); +void SobelXYRow_MSA(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width); +void SobelXYRow_LSX(const uint8_t* src_sobelx, + const uint8_t* src_sobely, + uint8_t* dst_argb, + int width); +void SobelRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void SobelRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void SobelRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void SobelRow_Any_LSX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void SobelToPlaneRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void SobelToPlaneRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void SobelToPlaneRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void SobelToPlaneRow_Any_LSX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void SobelXYRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void SobelXYRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void SobelXYRow_Any_MSA(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); +void SobelXYRow_Any_LSX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); + +void ARGBPolynomialRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + const float* poly, + int width); +void ARGBPolynomialRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_argb, + const float* poly, + int width); +void ARGBPolynomialRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_argb, + const float* poly, int width); // Scale and convert to half float. -void HalfFloatRow_C(const uint16* src, uint16* dst, float scale, int width); -void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width); -void HalfFloatRow_Any_AVX2(const uint16* src, uint16* dst, float scale, +void HalfFloatRow_C(const uint16_t* src, uint16_t* dst, float scale, int width); +void HalfFloatRow_SSE2(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloatRow_Any_SSE2(const uint16_t* src_ptr, + uint16_t* dst_ptr, + float param, + int width); +void HalfFloatRow_AVX2(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloatRow_Any_AVX2(const uint16_t* src_ptr, + uint16_t* dst_ptr, + float param, int width); -void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width); -void HalfFloatRow_Any_SSE2(const uint16* src, uint16* dst, float scale, +void HalfFloatRow_F16C(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloatRow_Any_F16C(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloat1Row_F16C(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloat1Row_Any_F16C(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloatRow_NEON(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloatRow_Any_NEON(const uint16_t* src_ptr, + uint16_t* dst_ptr, + float param, int width); +void HalfFloat1Row_NEON(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloat1Row_Any_NEON(const uint16_t* src_ptr, + uint16_t* dst_ptr, + float param, + int width); +void HalfFloatRow_MSA(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloatRow_Any_MSA(const uint16_t* src_ptr, + uint16_t* dst_ptr, + float param, + int width); +void HalfFloatRow_LSX(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloatRow_Any_LSX(const uint16_t* src_ptr, + uint16_t* dst_ptr, + float param, + int width); +void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width); +void ByteToFloatRow_NEON(const uint8_t* src, + float* dst, + float scale, + int width); +void ByteToFloatRow_Any_NEON(const uint8_t* src_ptr, + float* dst_ptr, + float param, + int width); -void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width, - const uint8* luma, uint32 lumacoeff); -void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb, +void ARGBLumaColorTableRow_C(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + const uint8_t* luma, + uint32_t lumacoeff); +void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_argb, int width, - const uint8* luma, uint32 lumacoeff); + const uint8_t* luma, + uint32_t lumacoeff); + +float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width); +float ScaleMaxSamples_NEON(const float* src, + float* dst, + float scale, + int width); +float ScaleSumSamples_C(const float* src, float* dst, float scale, int width); +float ScaleSumSamples_NEON(const float* src, + float* dst, + float scale, + int width); +void ScaleSamples_C(const float* src, float* dst, float scale, int width); +void ScaleSamples_NEON(const float* src, float* dst, float scale, int width); + +void GaussRow_F32_NEON(const float* src, float* dst, int width); +void GaussRow_F32_C(const float* src, float* dst, int width); + +void GaussCol_F32_NEON(const float* src0, + const float* src1, + const float* src2, + const float* src3, + const float* src4, + float* dst, + int width); + +void GaussCol_F32_C(const float* src0, + const float* src1, + const float* src2, + const float* src3, + const float* src4, + float* dst, + int width); #ifdef __cplusplus } // extern "C" diff --git a/third_party/libyuv/include/libyuv/scale.h b/third_party/libyuv/include/libyuv/scale.h index ae14694598..443f89c2f9 100644 --- a/third_party/libyuv/include/libyuv/scale.h +++ b/third_party/libyuv/include/libyuv/scale.h @@ -20,25 +20,45 @@ extern "C" { // Supported filtering. typedef enum FilterMode { - kFilterNone = 0, // Point sample; Fastest. - kFilterLinear = 1, // Filter horizontally only. + kFilterNone = 0, // Point sample; Fastest. + kFilterLinear = 1, // Filter horizontally only. kFilterBilinear = 2, // Faster than box, but lower quality scaling down. - kFilterBox = 3 // Highest quality. + kFilterBox = 3 // Highest quality. } FilterModeEnum; // Scale a YUV plane. LIBYUV_API -void ScalePlane(const uint8* src, int src_stride, - int src_width, int src_height, - uint8* dst, int dst_stride, - int dst_width, int dst_height, +void ScalePlane(const uint8_t* src, + int src_stride, + int src_width, + int src_height, + uint8_t* dst, + int dst_stride, + int dst_width, + int dst_height, enum FilterMode filtering); LIBYUV_API -void ScalePlane_16(const uint16* src, int src_stride, - int src_width, int src_height, - uint16* dst, int dst_stride, - int dst_width, int dst_height, +void ScalePlane_16(const uint16_t* src, + int src_stride, + int src_width, + int src_height, + uint16_t* dst, + int dst_stride, + int dst_width, + int dst_height, + enum FilterMode filtering); + +// Sample is expected to be in the low 12 bits. +LIBYUV_API +void ScalePlane_12(const uint16_t* src, + int src_stride, + int src_width, + int src_height, + uint16_t* dst, + int dst_stride, + int dst_width, + int dst_height, enum FilterMode filtering); // Scales a YUV 4:2:0 image from the src width and height to the @@ -52,43 +72,240 @@ void ScalePlane_16(const uint16* src, int src_stride, // Returns 0 if successful. LIBYUV_API -int I420Scale(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - int src_width, int src_height, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int dst_width, int dst_height, +int I420Scale(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + int src_width, + int src_height, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int dst_width, + int dst_height, enum FilterMode filtering); LIBYUV_API -int I420Scale_16(const uint16* src_y, int src_stride_y, - const uint16* src_u, int src_stride_u, - const uint16* src_v, int src_stride_v, - int src_width, int src_height, - uint16* dst_y, int dst_stride_y, - uint16* dst_u, int dst_stride_u, - uint16* dst_v, int dst_stride_v, - int dst_width, int dst_height, +int I420Scale_16(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + int src_width, + int src_height, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int dst_width, + int dst_height, enum FilterMode filtering); -#ifdef __cplusplus -// Legacy API. Deprecated. LIBYUV_API -int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v, - int src_stride_y, int src_stride_u, int src_stride_v, - int src_width, int src_height, - uint8* dst_y, uint8* dst_u, uint8* dst_v, - int dst_stride_y, int dst_stride_u, int dst_stride_v, - int dst_width, int dst_height, - LIBYUV_BOOL interpolate); +int I420Scale_12(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + int src_width, + int src_height, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int dst_width, + int dst_height, + enum FilterMode filtering); + +// Scales a YUV 4:4:4 image from the src width and height to the +// dst width and height. +// If filtering is kFilterNone, a simple nearest-neighbor algorithm is +// used. This produces basic (blocky) quality at the fastest speed. +// If filtering is kFilterBilinear, interpolation is used to produce a better +// quality image, at the expense of speed. +// If filtering is kFilterBox, averaging is used to produce ever better +// quality image, at further expense of speed. +// Returns 0 if successful. + +LIBYUV_API +int I444Scale(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + int src_width, + int src_height, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int dst_width, + int dst_height, + enum FilterMode filtering); + +LIBYUV_API +int I444Scale_16(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + int src_width, + int src_height, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int dst_width, + int dst_height, + enum FilterMode filtering); + +LIBYUV_API +int I444Scale_12(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + int src_width, + int src_height, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int dst_width, + int dst_height, + enum FilterMode filtering); + +// Scales a YUV 4:2:2 image from the src width and height to the +// dst width and height. +// If filtering is kFilterNone, a simple nearest-neighbor algorithm is +// used. This produces basic (blocky) quality at the fastest speed. +// If filtering is kFilterBilinear, interpolation is used to produce a better +// quality image, at the expense of speed. +// If filtering is kFilterBox, averaging is used to produce ever better +// quality image, at further expense of speed. +// Returns 0 if successful. +LIBYUV_API +int I422Scale(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + int src_width, + int src_height, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int dst_width, + int dst_height, + enum FilterMode filtering); +LIBYUV_API +int I422Scale_16(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + int src_width, + int src_height, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int dst_width, + int dst_height, + enum FilterMode filtering); + +LIBYUV_API +int I422Scale_12(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + int src_width, + int src_height, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int dst_width, + int dst_height, + enum FilterMode filtering); + +// Scales an NV12 image from the src width and height to the +// dst width and height. +// If filtering is kFilterNone, a simple nearest-neighbor algorithm is +// used. This produces basic (blocky) quality at the fastest speed. +// If filtering is kFilterBilinear, interpolation is used to produce a better +// quality image, at the expense of speed. +// kFilterBox is not supported for the UV channel and will be treated as +// bilinear. +// Returns 0 if successful. + +LIBYUV_API +int NV12Scale(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + int src_width, + int src_height, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int dst_width, + int dst_height, + enum FilterMode filtering); + +#ifdef __cplusplus // Legacy API. Deprecated. LIBYUV_API -int ScaleOffset(const uint8* src_i420, int src_width, int src_height, - uint8* dst_i420, int dst_width, int dst_height, int dst_yoffset, - LIBYUV_BOOL interpolate); +int Scale(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + int src_stride_y, + int src_stride_u, + int src_stride_v, + int src_width, + int src_height, + uint8_t* dst_y, + uint8_t* dst_u, + uint8_t* dst_v, + int dst_stride_y, + int dst_stride_u, + int dst_stride_v, + int dst_width, + int dst_height, + LIBYUV_BOOL interpolate); // For testing, allow disabling of specialized scalers. LIBYUV_API diff --git a/third_party/libyuv/include/libyuv/scale_argb.h b/third_party/libyuv/include/libyuv/scale_argb.h index 35cd191c0f..7641f18e34 100644 --- a/third_party/libyuv/include/libyuv/scale_argb.h +++ b/third_party/libyuv/include/libyuv/scale_argb.h @@ -20,32 +20,52 @@ extern "C" { #endif LIBYUV_API -int ARGBScale(const uint8* src_argb, int src_stride_argb, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - int dst_width, int dst_height, +int ARGBScale(const uint8_t* src_argb, + int src_stride_argb, + int src_width, + int src_height, + uint8_t* dst_argb, + int dst_stride_argb, + int dst_width, + int dst_height, enum FilterMode filtering); // Clipped scale takes destination rectangle coordinates for clip values. LIBYUV_API -int ARGBScaleClip(const uint8* src_argb, int src_stride_argb, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - int dst_width, int dst_height, - int clip_x, int clip_y, int clip_width, int clip_height, +int ARGBScaleClip(const uint8_t* src_argb, + int src_stride_argb, + int src_width, + int src_height, + uint8_t* dst_argb, + int dst_stride_argb, + int dst_width, + int dst_height, + int clip_x, + int clip_y, + int clip_width, + int clip_height, enum FilterMode filtering); // Scale with YUV conversion to ARGB and clipping. LIBYUV_API -int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint32 src_fourcc, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - uint32 dst_fourcc, - int dst_width, int dst_height, - int clip_x, int clip_y, int clip_width, int clip_height, +int YUVToARGBScaleClip(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint32_t src_fourcc, + int src_width, + int src_height, + uint8_t* dst_argb, + int dst_stride_argb, + uint32_t dst_fourcc, + int dst_width, + int dst_height, + int clip_x, + int clip_y, + int clip_width, + int clip_height, enum FilterMode filtering); #ifdef __cplusplus diff --git a/third_party/libyuv/include/libyuv/scale_rgb.h b/third_party/libyuv/include/libyuv/scale_rgb.h new file mode 100644 index 0000000000..d17c39fd6e --- /dev/null +++ b/third_party/libyuv/include/libyuv/scale_rgb.h @@ -0,0 +1,42 @@ +/* + * Copyright 2022 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef INCLUDE_LIBYUV_SCALE_RGB_H_ +#define INCLUDE_LIBYUV_SCALE_RGB_H_ + +#include "libyuv/basic_types.h" +#include "libyuv/scale.h" // For FilterMode + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +// RGB can be RAW, RGB24 or YUV24 +// RGB scales 24 bit images by converting a row at a time to ARGB +// and using ARGB row functions to scale, then convert to RGB. +// TODO(fbarchard): Allow input/output formats to be specified. +LIBYUV_API +int RGBScale(const uint8_t* src_rgb, + int src_stride_rgb, + int src_width, + int src_height, + uint8_t* dst_rgb, + int dst_stride_rgb, + int dst_width, + int dst_height, + enum FilterMode filtering); + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif + +#endif // INCLUDE_LIBYUV_SCALE_UV_H_ diff --git a/third_party/libyuv/include/libyuv/scale_row.h b/third_party/libyuv/include/libyuv/scale_row.h index 791fbf7d05..cc1c906195 100644 --- a/third_party/libyuv/include/libyuv/scale_row.h +++ b/third_party/libyuv/include/libyuv/scale_row.h @@ -19,17 +19,20 @@ namespace libyuv { extern "C" { #endif -#if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE2__)) +#if defined(__pnacl__) || defined(__CLR_VER) || \ + (defined(__native_client__) && defined(__x86_64__)) || \ + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif +#if defined(__native_client__) +#define LIBYUV_DISABLE_NEON +#endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) #define LIBYUV_DISABLE_X86 #endif #endif - // GCC >= 4.7.0 required for AVX2. #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) #if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) @@ -45,8 +48,8 @@ extern "C" { #endif // __clang__ // Visual C 2012 required for AVX2. -#if defined(_M_IX86) && !defined(__clang__) && \ - defined(_MSC_VER) && _MSC_VER >= 1700 +#if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \ + _MSC_VER >= 1700 #define VISUALC_HAS_AVX2 1 #endif // VisualStudio >= 2012 @@ -55,6 +58,7 @@ extern "C" { (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) #define HAS_FIXEDDIV1_X86 #define HAS_FIXEDDIV_X86 +#define HAS_SCALEADDROW_SSE2 #define HAS_SCALEARGBCOLS_SSE2 #define HAS_SCALEARGBCOLSUP2_SSE2 #define HAS_SCALEARGBFILTERCOLS_SSSE3 @@ -66,23 +70,62 @@ extern "C" { #define HAS_SCALEROWDOWN34_SSSE3 #define HAS_SCALEROWDOWN38_SSSE3 #define HAS_SCALEROWDOWN4_SSSE3 -#define HAS_SCALEADDROW_SSE2 +#endif + +// The following are available for gcc/clang x86 platforms: +// TODO(fbarchard): Port to Visual C +#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) +#define HAS_SCALEUVROWDOWN2BOX_SSSE3 +#define HAS_SCALEROWUP2LINEAR_SSE2 +#define HAS_SCALEROWUP2LINEAR_SSSE3 +#define HAS_SCALEROWUP2BILINEAR_SSE2 +#define HAS_SCALEROWUP2BILINEAR_SSSE3 +#define HAS_SCALEROWUP2LINEAR_12_SSSE3 +#define HAS_SCALEROWUP2BILINEAR_12_SSSE3 +#define HAS_SCALEROWUP2LINEAR_16_SSE2 +#define HAS_SCALEROWUP2BILINEAR_16_SSE2 +#define HAS_SCALEUVROWUP2LINEAR_SSSE3 +#define HAS_SCALEUVROWUP2BILINEAR_SSSE3 +#define HAS_SCALEUVROWUP2LINEAR_16_SSE41 +#define HAS_SCALEUVROWUP2BILINEAR_16_SSE41 +#endif + +// The following are available for gcc/clang x86 platforms, but +// require clang 3.4 or gcc 4.7. +// TODO(fbarchard): Port to Visual C +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(__x86_64__) || defined(__i386__)) && \ + (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) +#define HAS_SCALEUVROWDOWN2BOX_AVX2 +#define HAS_SCALEROWUP2LINEAR_AVX2 +#define HAS_SCALEROWUP2BILINEAR_AVX2 +#define HAS_SCALEROWUP2LINEAR_12_AVX2 +#define HAS_SCALEROWUP2BILINEAR_12_AVX2 +#define HAS_SCALEROWUP2LINEAR_16_AVX2 +#define HAS_SCALEROWUP2BILINEAR_16_AVX2 +#define HAS_SCALEUVROWUP2LINEAR_AVX2 +#define HAS_SCALEUVROWUP2BILINEAR_AVX2 +#define HAS_SCALEUVROWUP2LINEAR_16_AVX2 +#define HAS_SCALEUVROWUP2BILINEAR_16_AVX2 #endif // The following are available on all x86 platforms, but // require VS2012, clang 3.4 or gcc 4.7. // The code supports NaCL but requires a new compiler and validator. -#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \ - defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \ + defined(GCC_HAS_AVX2)) #define HAS_SCALEADDROW_AVX2 #define HAS_SCALEROWDOWN2_AVX2 #define HAS_SCALEROWDOWN4_AVX2 #endif // The following are available on Neon platforms: -#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ +#if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) +#define HAS_SCALEADDROW_NEON #define HAS_SCALEARGBCOLS_NEON +#define HAS_SCALEARGBFILTERCOLS_NEON #define HAS_SCALEARGBROWDOWN2_NEON #define HAS_SCALEARGBROWDOWNEVEN_NEON #define HAS_SCALEFILTERCOLS_NEON @@ -90,410 +133,1559 @@ extern "C" { #define HAS_SCALEROWDOWN34_NEON #define HAS_SCALEROWDOWN38_NEON #define HAS_SCALEROWDOWN4_NEON -#define HAS_SCALEARGBFILTERCOLS_NEON +#define HAS_SCALEUVROWDOWN2BOX_NEON +#define HAS_SCALEUVROWDOWNEVEN_NEON +#define HAS_SCALEROWUP2LINEAR_NEON +#define HAS_SCALEROWUP2BILINEAR_NEON +#define HAS_SCALEROWUP2LINEAR_12_NEON +#define HAS_SCALEROWUP2BILINEAR_12_NEON +#define HAS_SCALEROWUP2LINEAR_16_NEON +#define HAS_SCALEROWUP2BILINEAR_16_NEON +#define HAS_SCALEUVROWUP2LINEAR_NEON +#define HAS_SCALEUVROWUP2BILINEAR_NEON +#define HAS_SCALEUVROWUP2LINEAR_16_NEON +#define HAS_SCALEUVROWUP2BILINEAR_16_NEON #endif -// The following are available on Mips platforms: -#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \ - defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_SCALEROWDOWN2_DSPR2 -#define HAS_SCALEROWDOWN4_DSPR2 -#define HAS_SCALEROWDOWN34_DSPR2 -#define HAS_SCALEROWDOWN38_DSPR2 +#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) +#define HAS_SCALEADDROW_MSA +#define HAS_SCALEARGBCOLS_MSA +#define HAS_SCALEARGBFILTERCOLS_MSA +#define HAS_SCALEARGBROWDOWN2_MSA +#define HAS_SCALEARGBROWDOWNEVEN_MSA +#define HAS_SCALEFILTERCOLS_MSA +#define HAS_SCALEROWDOWN2_MSA +#define HAS_SCALEROWDOWN34_MSA +#define HAS_SCALEROWDOWN38_MSA +#define HAS_SCALEROWDOWN4_MSA +#endif + +#if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx) +#define HAS_SCALEARGBROWDOWN2_LSX +#define HAS_SCALEARGBROWDOWNEVEN_LSX +#define HAS_SCALEROWDOWN2_LSX +#define HAS_SCALEROWDOWN4_LSX +#define HAS_SCALEROWDOWN38_LSX +#define HAS_SCALEFILTERCOLS_LSX +#define HAS_SCALEADDROW_LSX +#define HAS_SCALEARGBCOLS_LSX +#define HAS_SCALEARGBFILTERCOLS_LSX +#define HAS_SCALEROWDOWN34_LSX #endif // Scale ARGB vertically with bilinear interpolation. void ScalePlaneVertical(int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb, - int x, int y, int dy, - int bpp, enum FilterMode filtering); + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint8_t* src_argb, + uint8_t* dst_argb, + int x, + int y, + int dy, + int bpp, + enum FilterMode filtering); void ScalePlaneVertical_16(int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint16* src_argb, uint16* dst_argb, - int x, int y, int dy, - int wpp, enum FilterMode filtering); + int dst_width, + int dst_height, + int src_stride, + int dst_stride, + const uint16_t* src_argb, + uint16_t* dst_argb, + int x, + int y, + int dy, + int wpp, + enum FilterMode filtering); // Simplify the filtering based on scale factors. -enum FilterMode ScaleFilterReduce(int src_width, int src_height, - int dst_width, int dst_height, +enum FilterMode ScaleFilterReduce(int src_width, + int src_height, + int dst_width, + int dst_height, enum FilterMode filtering); // Divide num by div and return as 16.16 fixed point result. int FixedDiv_C(int num, int div); int FixedDiv_X86(int num, int div); +int FixedDiv_MIPS(int num, int div); // Divide num - 1 by div - 1 and return as 16.16 fixed point result. int FixedDiv1_C(int num, int div); int FixedDiv1_X86(int num, int div); +int FixedDiv1_MIPS(int num, int div); #ifdef HAS_FIXEDDIV_X86 #define FixedDiv FixedDiv_X86 #define FixedDiv1 FixedDiv1_X86 +#elif defined HAS_FIXEDDIV_MIPS +#define FixedDiv FixedDiv_MIPS +#define FixedDiv1 FixedDiv1_MIPS #else #define FixedDiv FixedDiv_C #define FixedDiv1 FixedDiv1_C #endif // Compute slope values for stepping. -void ScaleSlope(int src_width, int src_height, - int dst_width, int dst_height, +void ScaleSlope(int src_width, + int src_height, + int dst_width, + int dst_height, enum FilterMode filtering, - int* x, int* y, int* dx, int* dy); - -void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width); -void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width); -void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width); -void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width); -void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width); -void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width); -void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width); -void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* d, int dst_width); -void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width); -void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* d, int dst_width); -void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); -void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr, - int dst_width, int x, int dx); -void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int, int); -void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr, - int dst_width, int, int); -void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); -void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr, - int dst_width, int x, int dx); -void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); -void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr, - int dst_width, int x, int dx); -void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst, int dst_width); -void ScaleRowDown38_3_Box_C(const uint8* src_ptr, + int* x, + int* y, + int* dx, + int* dy); + +void ScaleRowDown2_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); +void ScaleRowDown2Linear_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); +void ScaleRowDown2Box_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Box_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); +void ScaleRowDown4_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown4_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); +void ScaleRowDown4Box_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown4Box_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); +void ScaleRowDown34_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown34_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); +void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* d, + int dst_width); +void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* d, + int dst_width); +void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* d, + int dst_width); +void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* d, + int dst_width); + +void ScaleRowUp2_Linear_C(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_16_C(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_Any_C(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_Any_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_16_Any_C(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_16_Any_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); + +void ScaleCols_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleCols_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleColsUp2_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int, + int); +void ScaleColsUp2_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, + int dst_width, + int, + int); +void ScaleFilterCols_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleFilterCols_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleFilterCols64_C(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x32, + int dx); +void ScaleFilterCols64_16_C(uint16_t* dst_ptr, + const uint16_t* src_ptr, + int dst_width, + int x32, + int dx); +void ScaleRowDown38_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown38_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst, + int dst_width); +void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + int dst_width); +void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int dst_width); -void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int dst_width); -void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width); -void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width); -void ScaleARGBRowDown2_C(const uint8* src_argb, + uint16_t* dst_ptr, + int dst_width); +void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); +void ScaleAddRow_16_C(const uint16_t* src_ptr, + uint32_t* dst_ptr, + int src_width); +void ScaleARGBRowDown2_C(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Linear_C(const uint8* src_argb, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Box_C(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEven_C(const uint8_t* src_argb, + ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_C(const uint8* src_argb, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int, int); -void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); + uint8_t* dst_argb, + int dst_width); +void ScaleARGBCols_C(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx); +void ScaleARGBCols64_C(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x32, + int dx); +void ScaleARGBColsUp2_C(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int, + int); +void ScaleARGBFilterCols_C(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx); +void ScaleARGBFilterCols64_C(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x32, + int dx); +void ScaleUVRowDown2_C(const uint8_t* src_uv, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDown2Linear_C(const uint8_t* src_uv, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDown2Box_C(const uint8_t* src_uv, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDownEven_C(const uint8_t* src_uv, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDownEvenBox_C(const uint8_t* src_uv, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_uv, + int dst_width); + +void ScaleUVRowUp2_Linear_C(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowUp2_Bilinear_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleUVRowUp2_Linear_Any_C(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowUp2_Bilinear_Any_C(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleUVRowUp2_Linear_16_C(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleUVRowUp2_Bilinear_16_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleUVRowUp2_Linear_16_Any_C(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleUVRowUp2_Bilinear_16_Any_C(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); + +void ScaleUVCols_C(uint8_t* dst_uv, + const uint8_t* src_uv, + int dst_width, + int x, + int dx); +void ScaleUVCols64_C(uint8_t* dst_uv, + const uint8_t* src_uv, + int dst_width, + int x32, + int dx); +void ScaleUVColsUp2_C(uint8_t* dst_uv, + const uint8_t* src_uv, + int dst_width, + int, + int); +void ScaleUVFilterCols_C(uint8_t* dst_uv, + const uint8_t* src_uv, + int dst_width, + int x, + int dx); +void ScaleUVFilterCols64_C(uint8_t* dst_uv, + const uint8_t* src_uv, + int dst_width, + int x32, + int dx); // Specialized scalers for x86. -void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - -void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, +void ScaleRowDown2_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); + +void ScaleRowDown34_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Linear_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Box_Odd_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Box_Odd_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - -void ScaleRowDown34_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_1_Box_Any_SSSE3(const uint8* src_ptr, + uint8_t* dst_ptr, + int dst_width); + +void ScaleRowUp2_Linear_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_SSE2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_12_SSSE3(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_12_SSSE3(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_16_SSE2(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_16_SSE2(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_12_AVX2(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_12_AVX2(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_16_AVX2(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_Any_SSE2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_12_Any_SSSE3(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_12_Any_SSSE3(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_16_Any_SSE2(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_16_Any_SSSE3(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_0_Box_Any_SSSE3(const uint8* src_ptr, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_Any_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_12_Any_AVX2(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_12_Any_AVX2(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_16_Any_AVX2(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_16_Any_AVX2(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); + +void ScaleRowDown2_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Linear_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Box_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Box_Odd_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2_Any_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Linear_Any_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Box_Any_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Box_Odd_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4Box_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4_Any_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4Box_Any_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); + +void ScaleRowDown34_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_1_Box_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_3_Box_Any_SSSE3(const uint8* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_0_Box_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_2_Box_Any_SSSE3(const uint8* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_3_Box_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - -void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width); -void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width); -void ScaleAddRow_Any_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width); -void ScaleAddRow_Any_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width); + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_2_Box_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); -void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); -void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); +void ScaleAddRow_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); +void ScaleAddRow_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); +void ScaleAddRow_Any_SSE2(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int src_width); +void ScaleAddRow_Any_AVX2(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int src_width); +void ScaleFilterCols_SSSE3(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleColsUp2_SSE2(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); // ARGB Column functions -void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBFilterCols_NEON(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBCols_NEON(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBFilterCols_Any_NEON(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBCols_Any_NEON(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); +void ScaleARGBCols_SSE2(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx); +void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx); +void ScaleARGBColsUp2_SSE2(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx); +void ScaleARGBFilterCols_NEON(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx); +void ScaleARGBCols_NEON(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx); +void ScaleARGBFilterCols_Any_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleARGBCols_Any_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleARGBFilterCols_MSA(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx); +void ScaleARGBCols_MSA(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx); +void ScaleARGBFilterCols_Any_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleARGBCols_Any_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); // ARGB Row functions -void ScaleARGBRowDown2_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleARGBRowDown2Linear_NEON(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleARGBRowDown2_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Linear_Any_SSE2(const uint8* src_argb, +void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleARGBRowDown2_MSA(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Linear_MSA(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Box_MSA(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2_LSX(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Linear_LSX(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2Box_LSX(const uint8_t* src_argb, + ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDown2_Any_SSE2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDown2Linear_Any_SSE2(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Box_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleARGBRowDown2Linear_Any_NEON(const uint8* src_argb, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDown2Box_Any_SSE2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDown2_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDown2Linear_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); - -void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDown2Box_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDown2_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDown2Linear_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDown2Box_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDown2_Any_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDown2Linear_Any_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDown2Box_Any_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEvenBox_SSE2(const uint8_t* src_argb, + ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEven_NEON(const uint8_t* src_argb, + ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEvenBox_NEON(const uint8_t* src_argb, + ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEven_Any_SSE2(const uint8* src_argb, ptrdiff_t src_stride, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEven_MSA(const uint8_t* src_argb, + ptrdiff_t src_stride, + int32_t src_stepx, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEvenBox_MSA(const uint8_t* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEven_LSX(const uint8_t* src_argb, + ptrdiff_t src_stride, + int32_t src_stepx, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEvenBox_LSX(const uint8_t* src_argb, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_argb, + int dst_width); +void ScaleARGBRowDownEven_Any_SSE2(const uint8_t* src_ptr, + ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8* src_argb, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEven_Any_NEON(const uint8* src_argb, ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDownEven_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_Any_NEON(const uint8* src_argb, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDownEvenBox_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width); + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDownEven_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int32_t src_stepx, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDownEvenBox_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDownEven_Any_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int32_t src_stepx, + uint8_t* dst_ptr, + int dst_width); +void ScaleARGBRowDownEvenBox_Any_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_ptr, + int dst_width); + +// UV Row functions +void ScaleUVRowDown2_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDown2Linear_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDown2Box_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDown2Box_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDown2_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleUVRowDown2Linear_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDown2Box_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleUVRowDown2_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDown2Linear_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDown2Box_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDown2_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowDown2Linear_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowDown2Box_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowDown2Box_Any_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowDown2_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowDown2Linear_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowDown2Box_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowDown2_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowDown2Linear_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowDown2Box_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowDownEven_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDownEvenBox_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDownEven_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDownEvenBox_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDownEven_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int32_t src_stepx, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDownEvenBox_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_uv, + int dst_width); +void ScaleUVRowDownEven_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowDownEvenBox_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowDownEven_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowDownEvenBox_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowDownEven_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int32_t src_stepx, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowDownEvenBox_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + int src_stepx, + uint8_t* dst_ptr, + int dst_width); + +void ScaleUVRowUp2_Linear_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowUp2_Bilinear_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleUVRowUp2_Linear_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowUp2_Bilinear_Any_SSSE3(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleUVRowUp2_Linear_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowUp2_Bilinear_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleUVRowUp2_Linear_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowUp2_Bilinear_Any_AVX2(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleUVRowUp2_Linear_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowUp2_Bilinear_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleUVRowUp2_Linear_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleUVRowUp2_Bilinear_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleUVRowUp2_Bilinear_16_SSE41(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleUVRowUp2_Linear_16_Any_SSE41(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleUVRowUp2_Bilinear_16_Any_SSE41(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleUVRowUp2_Linear_16_AVX2(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleUVRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleUVRowUp2_Linear_16_Any_AVX2(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleUVRowUp2_Bilinear_16_Any_AVX2(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleUVRowUp2_Linear_16_NEON(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleUVRowUp2_Bilinear_16_NEON(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleUVRowUp2_Linear_16_Any_NEON(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleUVRowUp2_Bilinear_16_Any_NEON(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); // ScaleRowDown2Box also used by planar functions // NEON downscalers with interpolation. // Note - not static due to reuse in convert for 444 to 420. -void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); - -void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); +void ScaleRowDown2_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Box_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); + +void ScaleRowDown4_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4Box_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); // Down scale from 4 to 3 pixels. Use the neon multilane read/write // to load up the every 4th pixel into a 4 different registers. // Point samples 32 pixels to 24 pixels. -void ScaleRowDown34_NEON(const uint8* src_ptr, +void ScaleRowDown34_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_0_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_1_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); + uint8_t* dst_ptr, + int dst_width); // 32 -> 12 -void ScaleRowDown38_NEON(const uint8* src_ptr, +void ScaleRowDown38_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); + uint8_t* dst_ptr, + int dst_width); // 32x3 -> 12x1 -void ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, +void ScaleRowDown38_3_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); + uint8_t* dst_ptr, + int dst_width); // 32x2 -> 12x1 -void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, +void ScaleRowDown38_2_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - -void ScaleRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Box_Odd_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown4_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_0_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_1_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); + uint8_t* dst_ptr, + int dst_width); + +void ScaleRowDown2_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Linear_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Box_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Box_Odd_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4Box_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_0_Box_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_1_Box_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); // 32 -> 12 -void ScaleRowDown38_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); +void ScaleRowDown38_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); // 32x3 -> 12x1 -void ScaleRowDown38_3_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); +void ScaleRowDown38_3_Box_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); // 32x2 -> 12x1 -void ScaleRowDown38_2_Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - -void ScaleAddRow_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width); -void ScaleAddRow_Any_NEON(const uint8* src_ptr, uint16* dst_ptr, int src_width); - -void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); - -void ScaleFilterCols_Any_NEON(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); - -void ScaleRowDown2_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown34_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width); -void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width); -void ScaleRowDown38_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); +void ScaleRowDown38_2_Box_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); + +void ScaleRowUp2_Linear_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_12_NEON(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_12_NEON(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_16_NEON(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_16_NEON(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_Any_NEON(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_12_Any_NEON(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_12_Any_NEON(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); +void ScaleRowUp2_Linear_16_Any_NEON(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int dst_width); +void ScaleRowUp2_Bilinear_16_Any_NEON(const uint16_t* src_ptr, + ptrdiff_t src_stride, + uint16_t* dst_ptr, + ptrdiff_t dst_stride, + int dst_width); + +void ScaleAddRow_NEON(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); +void ScaleAddRow_Any_NEON(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int src_width); + +void ScaleFilterCols_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); + +void ScaleFilterCols_Any_NEON(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); + +void ScaleRowDown2_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Linear_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Box_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown4_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown4Box_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown38_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown38_2_Box_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_3_Box_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleAddRow_MSA(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); +void ScaleFilterCols_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleRowDown34_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown34_0_Box_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* d, + int dst_width); +void ScaleRowDown34_1_Box_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* d, + int dst_width); + +void ScaleRowDown2_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Linear_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Box_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4Box_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_2_Box_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_3_Box_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleAddRow_Any_MSA(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int src_width); +void ScaleFilterCols_Any_MSA(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleRowDown34_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_0_Box_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_1_Box_Any_MSA(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); + +void ScaleRowDown2_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Linear_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown2Box_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown4_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown4Box_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown38_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown38_2_Box_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_3_Box_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleAddRow_LSX(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); +void ScaleFilterCols_LSX(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleARGBFilterCols_LSX(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx); +void ScaleARGBCols_LSX(uint8_t* dst_argb, + const uint8_t* src_argb, + int dst_width, + int x, + int dx); +void ScaleRowDown34_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst, + int dst_width); +void ScaleRowDown34_0_Box_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* d, + int dst_width); +void ScaleRowDown34_1_Box_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* d, + int dst_width); +void ScaleRowDown2_Any_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Linear_Any_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown2Box_Any_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4_Any_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown4Box_Any_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_Any_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_2_Box_Any_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown38_3_Box_Any_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleAddRow_Any_LSX(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int src_width); +void ScaleFilterCols_Any_LSX(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleARGBCols_Any_LSX(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleARGBFilterCols_Any_LSX(uint8_t* dst_ptr, + const uint8_t* src_ptr, + int dst_width, + int x, + int dx); +void ScaleRowDown34_Any_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_0_Box_Any_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); +void ScaleRowDown34_1_Box_Any_LSX(const uint8_t* src_ptr, + ptrdiff_t src_stride, + uint8_t* dst_ptr, + int dst_width); #ifdef __cplusplus } // extern "C" diff --git a/third_party/libyuv/include/libyuv/scale_uv.h b/third_party/libyuv/include/libyuv/scale_uv.h new file mode 100644 index 0000000000..8e74e3195b --- /dev/null +++ b/third_party/libyuv/include/libyuv/scale_uv.h @@ -0,0 +1,51 @@ +/* + * Copyright 2020 The LibYuv Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef INCLUDE_LIBYUV_SCALE_UV_H_ +#define INCLUDE_LIBYUV_SCALE_UV_H_ + +#include "libyuv/basic_types.h" +#include "libyuv/scale.h" // For FilterMode + +#ifdef __cplusplus +namespace libyuv { +extern "C" { +#endif + +LIBYUV_API +int UVScale(const uint8_t* src_uv, + int src_stride_uv, + int src_width, + int src_height, + uint8_t* dst_uv, + int dst_stride_uv, + int dst_width, + int dst_height, + enum FilterMode filtering); + +// Scale a 16 bit UV image. +// This function is currently incomplete, it can't handle all cases. +LIBYUV_API +int UVScale_16(const uint16_t* src_uv, + int src_stride_uv, + int src_width, + int src_height, + uint16_t* dst_uv, + int dst_stride_uv, + int dst_width, + int dst_height, + enum FilterMode filtering); + +#ifdef __cplusplus +} // extern "C" +} // namespace libyuv +#endif + +#endif // INCLUDE_LIBYUV_SCALE_UV_H_ diff --git a/third_party/libyuv/include/libyuv/version.h b/third_party/libyuv/include/libyuv/version.h index 3a8f6337ca..a2d9fce0d3 100644 --- a/third_party/libyuv/include/libyuv/version.h +++ b/third_party/libyuv/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1622 +#define LIBYUV_VERSION 1822 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/third_party/libyuv/include/libyuv/video_common.h b/third_party/libyuv/include/libyuv/video_common.h index cb425426a2..32b8a5210b 100644 --- a/third_party/libyuv/include/libyuv/video_common.h +++ b/third_party/libyuv/include/libyuv/video_common.h @@ -28,13 +28,14 @@ extern "C" { // Needs to be a macro otherwise the OS X compiler complains when the kFormat* // constants are used in a switch. #ifdef __cplusplus -#define FOURCC(a, b, c, d) ( \ - (static_cast(a)) | (static_cast(b) << 8) | \ - (static_cast(c) << 16) | (static_cast(d) << 24)) +#define FOURCC(a, b, c, d) \ + ((static_cast(a)) | (static_cast(b) << 8) | \ + (static_cast(c) << 16) | /* NOLINT */ \ + (static_cast(d) << 24)) /* NOLINT */ #else -#define FOURCC(a, b, c, d) ( \ - ((uint32)(a)) | ((uint32)(b) << 8) | /* NOLINT */ \ - ((uint32)(c) << 16) | ((uint32)(d) << 24)) /* NOLINT */ +#define FOURCC(a, b, c, d) \ + (((uint32_t)(a)) | ((uint32_t)(b) << 8) | /* NOLINT */ \ + ((uint32_t)(c) << 16) | ((uint32_t)(d) << 24)) /* NOLINT */ #endif // Some pages discussing FourCC codes: @@ -49,49 +50,69 @@ extern "C" { // Secondary formats are converted in 2 steps. // Auxilliary formats call primary converters. enum FourCC { - // 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed. + // 10 Primary YUV formats: 5 planar, 2 biplanar, 2 packed. FOURCC_I420 = FOURCC('I', '4', '2', '0'), FOURCC_I422 = FOURCC('I', '4', '2', '2'), FOURCC_I444 = FOURCC('I', '4', '4', '4'), - FOURCC_I411 = FOURCC('I', '4', '1', '1'), FOURCC_I400 = FOURCC('I', '4', '0', '0'), FOURCC_NV21 = FOURCC('N', 'V', '2', '1'), FOURCC_NV12 = FOURCC('N', 'V', '1', '2'), FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'), FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'), + FOURCC_I010 = FOURCC('I', '0', '1', '0'), // bt.601 10 bit 420 + FOURCC_I210 = FOURCC('I', '2', '1', '0'), // bt.601 10 bit 422 - // 2 Secondary YUV formats: row biplanar. + // 1 Secondary YUV format: row biplanar. deprecated. FOURCC_M420 = FOURCC('M', '4', '2', '0'), - FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated. - // 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp. + // 13 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc 2 64 bpp FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'), FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'), FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'), + FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010. + FOURCC_AB30 = FOURCC('A', 'B', '3', '0'), // ABGR version of 10 bit + FOURCC_AR64 = FOURCC('A', 'R', '6', '4'), // 16 bit per channel. + FOURCC_AB64 = FOURCC('A', 'B', '6', '4'), // ABGR version of 16 bit FOURCC_24BG = FOURCC('2', '4', 'B', 'G'), - FOURCC_RAW = FOURCC('r', 'a', 'w', ' '), + FOURCC_RAW = FOURCC('r', 'a', 'w', ' '), FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'), FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE. FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE. FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE. - // 4 Secondary RGB formats: 4 Bayer Patterns. deprecated. - FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'), - FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'), - FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'), - FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'), - // 1 Primary Compressed YUV format. FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'), - // 5 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias. + // 14 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias. FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'), FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'), FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'), FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420. - FOURCC_J420 = FOURCC('J', '4', '2', '0'), - FOURCC_J400 = FOURCC('J', '4', '0', '0'), // unofficial fourcc - FOURCC_H420 = FOURCC('H', '4', '2', '0'), // unofficial fourcc + FOURCC_J420 = + FOURCC('J', '4', '2', '0'), // jpeg (bt.601 full), unofficial fourcc + FOURCC_J422 = + FOURCC('J', '4', '2', '2'), // jpeg (bt.601 full), unofficial fourcc + FOURCC_J444 = + FOURCC('J', '4', '4', '4'), // jpeg (bt.601 full), unofficial fourcc + FOURCC_J400 = + FOURCC('J', '4', '0', '0'), // jpeg (bt.601 full), unofficial fourcc + FOURCC_F420 = FOURCC('F', '4', '2', '0'), // bt.709 full, unofficial fourcc + FOURCC_F422 = FOURCC('F', '4', '2', '2'), // bt.709 full, unofficial fourcc + FOURCC_F444 = FOURCC('F', '4', '4', '4'), // bt.709 full, unofficial fourcc + FOURCC_H420 = FOURCC('H', '4', '2', '0'), // bt.709, unofficial fourcc + FOURCC_H422 = FOURCC('H', '4', '2', '2'), // bt.709, unofficial fourcc + FOURCC_H444 = FOURCC('H', '4', '4', '4'), // bt.709, unofficial fourcc + FOURCC_U420 = FOURCC('U', '4', '2', '0'), // bt.2020, unofficial fourcc + FOURCC_U422 = FOURCC('U', '4', '2', '2'), // bt.2020, unofficial fourcc + FOURCC_U444 = FOURCC('U', '4', '4', '4'), // bt.2020, unofficial fourcc + FOURCC_F010 = FOURCC('F', '0', '1', '0'), // bt.709 full range 10 bit 420 + FOURCC_H010 = FOURCC('H', '0', '1', '0'), // bt.709 10 bit 420 + FOURCC_U010 = FOURCC('U', '0', '1', '0'), // bt.2020 10 bit 420 + FOURCC_F210 = FOURCC('F', '2', '1', '0'), // bt.709 full range 10 bit 422 + FOURCC_H210 = FOURCC('H', '2', '1', '0'), // bt.709 10 bit 422 + FOURCC_U210 = FOURCC('U', '2', '1', '0'), // bt.2020 10 bit 422 + FOURCC_P010 = FOURCC('P', '0', '1', '0'), + FOURCC_P210 = FOURCC('P', '2', '1', '0'), // 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc. FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420. @@ -112,7 +133,13 @@ enum FourCC { FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP. FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO. - // 1 Auxiliary compressed YUV format set aside for capturer. + // deprecated formats. Not supported, but defined for backward compatibility. + FOURCC_I411 = FOURCC('I', '4', '1', '1'), + FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), + FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'), + FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'), + FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'), + FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'), FOURCC_H264 = FOURCC('H', '2', '6', '4'), // Match any fourcc. @@ -130,14 +157,18 @@ enum FourCCBpp { FOURCC_BPP_NV12 = 12, FOURCC_BPP_YUY2 = 16, FOURCC_BPP_UYVY = 16, - FOURCC_BPP_M420 = 12, + FOURCC_BPP_M420 = 12, // deprecated FOURCC_BPP_Q420 = 12, FOURCC_BPP_ARGB = 32, FOURCC_BPP_BGRA = 32, FOURCC_BPP_ABGR = 32, FOURCC_BPP_RGBA = 32, + FOURCC_BPP_AR30 = 32, + FOURCC_BPP_AB30 = 32, + FOURCC_BPP_AR64 = 64, + FOURCC_BPP_AB64 = 64, FOURCC_BPP_24BG = 24, - FOURCC_BPP_RAW = 24, + FOURCC_BPP_RAW = 24, FOURCC_BPP_RGBP = 16, FOURCC_BPP_RGBO = 16, FOURCC_BPP_R444 = 16, @@ -152,6 +183,13 @@ enum FourCCBpp { FOURCC_BPP_J420 = 12, FOURCC_BPP_J400 = 8, FOURCC_BPP_H420 = 12, + FOURCC_BPP_H422 = 16, + FOURCC_BPP_I010 = 15, + FOURCC_BPP_I210 = 20, + FOURCC_BPP_H010 = 15, + FOURCC_BPP_H210 = 20, + FOURCC_BPP_P010 = 15, + FOURCC_BPP_P210 = 20, FOURCC_BPP_MJPG = 0, // 0 means unknown. FOURCC_BPP_H264 = 0, FOURCC_BPP_IYUV = 12, @@ -170,11 +208,11 @@ enum FourCCBpp { FOURCC_BPP_CM24 = 24, // Match any fourcc. - FOURCC_BPP_ANY = 0, // 0 means unknown. + FOURCC_BPP_ANY = 0, // 0 means unknown. }; // Converts fourcc aliases into canonical ones. -LIBYUV_API uint32 CanonicalFourCC(uint32 fourcc); +LIBYUV_API uint32_t CanonicalFourCC(uint32_t fourcc); #ifdef __cplusplus } // extern "C" diff --git a/third_party/libyuv/larch64/lib/libyuv.a b/third_party/libyuv/larch64/lib/libyuv.a index fdbcda2517..402463a694 100644 Binary files a/third_party/libyuv/larch64/lib/libyuv.a and b/third_party/libyuv/larch64/lib/libyuv.a differ