openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

247 lines
11 KiB

/*
* Copyright 2016 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_MACROS_MSA_H_
#define INCLUDE_LIBYUV_MACROS_MSA_H_
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#include <msa.h>
3 years ago
#include <stdint.h>
#if (__mips_isa_rev >= 6)
#define LW(psrc) \
({ \
const uint8_t* psrc_lw_m = (const uint8_t*)(psrc); \
uint32_t val_m; \
asm volatile("lw %[val_m], %[psrc_lw_m] \n" \
: [val_m] "=r"(val_m) \
: [psrc_lw_m] "m"(*psrc_lw_m)); \
val_m; \
})
#if (__mips == 64)
#define LD(psrc) \
({ \
const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
uint64_t val_m = 0; \
asm volatile("ld %[val_m], %[psrc_ld_m] \n" \
: [val_m] "=r"(val_m) \
: [psrc_ld_m] "m"(*psrc_ld_m)); \
val_m; \
})
#else // !(__mips == 64)
#define LD(psrc) \
({ \
const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
uint32_t val0_m, val1_m; \
uint64_t val_m = 0; \
val0_m = LW(psrc_ld_m); \
val1_m = LW(psrc_ld_m + 4); \
val_m = (uint64_t)(val1_m); /* NOLINT */ \
val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \
val_m; \
})
#endif // (__mips == 64)
#define SW(val, pdst) \
({ \
uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \
uint32_t val_m = (val); \
asm volatile("sw %[val_m], %[pdst_sw_m] \n" \
: [pdst_sw_m] "=m"(*pdst_sw_m) \
: [val_m] "r"(val_m)); \
})
#if (__mips == 64)
#define SD(val, pdst) \
({ \
uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \
uint64_t val_m = (val); \
asm volatile("sd %[val_m], %[pdst_sd_m] \n" \
: [pdst_sd_m] "=m"(*pdst_sd_m) \
: [val_m] "r"(val_m)); \
})
#else // !(__mips == 64)
#define SD(val, pdst) \
({ \
uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \
uint32_t val0_m, val1_m; \
val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \
val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
SW(val0_m, pdst_sd_m); \
SW(val1_m, pdst_sd_m + 4); \
})
#endif // !(__mips == 64)
#else // !(__mips_isa_rev >= 6)
#define LW(psrc) \
({ \
uint8_t* psrc_lw_m = (uint8_t*)(psrc); \
uint32_t val_lw_m; \
\
__asm__ volatile( \
"lwr %[val_lw_m], 0(%[psrc_lw_m]) \n\t" \
"lwl %[val_lw_m], 3(%[psrc_lw_m]) \n\t" \
\
: [val_lw_m] "=&r"(val_lw_m) \
: [psrc_lw_m] "r"(psrc_lw_m)); \
\
val_lw_m; \
})
3 years ago
#if (__mips == 64)
#define LD(psrc) \
({ \
uint8_t* psrc_ld_m = (uint8_t*)(psrc); \
uint64_t val_ld_m = 0; \
\
__asm__ volatile( \
"ldr %[val_ld_m], 0(%[psrc_ld_m]) \n\t" \
"ldl %[val_ld_m], 7(%[psrc_ld_m]) \n\t" \
\
: [val_ld_m] "=&r"(val_ld_m) \
: [psrc_ld_m] "r"(psrc_ld_m)); \
\
val_ld_m; \
})
#else // !(__mips == 64)
#define LD(psrc) \
({ \
const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
uint32_t val0_m, val1_m; \
uint64_t val_m = 0; \
val0_m = LW(psrc_ld_m); \
val1_m = LW(psrc_ld_m + 4); \
val_m = (uint64_t)(val1_m); /* NOLINT */ \
val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \
val_m; \
})
#endif // (__mips == 64)
3 years ago
#define SW(val, pdst) \
({ \
uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \
uint32_t val_m = (val); \
asm volatile("usw %[val_m], %[pdst_sw_m] \n" \
: [pdst_sw_m] "=m"(*pdst_sw_m) \
: [val_m] "r"(val_m)); \
})
#define SD(val, pdst) \
({ \
uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \
uint32_t val0_m, val1_m; \
val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \
val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
SW(val0_m, pdst_sd_m); \
SW(val1_m, pdst_sd_m + 4); \
})
#endif // (__mips_isa_rev >= 6)
// TODO(fbarchard): Consider removing __VAR_ARGS versions.
#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */
#define LD_UB(...) LD_B(const v16u8, __VA_ARGS__)
#define LD_H(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */
#define LD_UH(...) LD_H(const v8u16, __VA_ARGS__)
#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */
#define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
3 years ago
#define ST_H(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */
#define ST_UH(...) ST_H(v8u16, __VA_ARGS__)
/* Description : Load two vectors with 16 'byte' sized elements
Arguments : Inputs - psrc, stride
Outputs - out0, out1
Return Type - as per RTYPE
Details : Load 16 byte elements in 'out0' from (psrc)
Load 16 byte elements in 'out1' from (psrc + stride)
*/
3 years ago
#define LD_B2(RTYPE, psrc, stride, out0, out1) \
{ \
out0 = LD_B(RTYPE, (psrc)); \
out1 = LD_B(RTYPE, (psrc) + stride); \
}
#define LD_UB2(...) LD_B2(const v16u8, __VA_ARGS__)
#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \
{ \
LD_B2(RTYPE, (psrc), stride, out0, out1); \
LD_B2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \
}
#define LD_UB4(...) LD_B4(const v16u8, __VA_ARGS__)
/* Description : Store two vectors with stride each having 16 'byte' sized
elements
Arguments : Inputs - in0, in1, pdst, stride
Details : Store 16 byte elements from 'in0' to (pdst)
Store 16 byte elements from 'in1' to (pdst + stride)
*/
3 years ago
#define ST_B2(RTYPE, in0, in1, pdst, stride) \
{ \
ST_B(RTYPE, in0, (pdst)); \
ST_B(RTYPE, in1, (pdst) + stride); \
}
#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)
3 years ago
#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \
{ \
ST_B2(RTYPE, in0, in1, (pdst), stride); \
ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
}
#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
3 years ago
/* Description : Store vectors of 8 halfword elements with stride
Arguments : Inputs - in0, in1, pdst, stride
Details : Store 8 halfword elements from 'in0' to (pdst)
Store 8 halfword elements from 'in1' to (pdst + stride)
*/
#define ST_H2(RTYPE, in0, in1, pdst, stride) \
{ \
ST_H(RTYPE, in0, (pdst)); \
ST_H(RTYPE, in1, (pdst) + stride); \
}
#define ST_UH2(...) ST_H2(v8u16, __VA_ARGS__)
// TODO(fbarchard): Consider using __msa_vshf_b and __msa_ilvr_b directly.
/* Description : Shuffle byte vector elements as per mask vector
Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
Outputs - out0, out1
Return Type - as per RTYPE
Details : Byte elements from 'in0' & 'in1' are copied selectively to
'out0' as per control vector 'mask0'
*/
3 years ago
#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
{ \
out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0); \
out1 = (RTYPE)__msa_vshf_b((v16i8)mask1, (v16i8)in3, (v16i8)in2); \
}
#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
3 years ago
/* Description : Interleave both left and right half of input vectors
Arguments : Inputs - in0, in1
Outputs - out0, out1
Return Type - as per RTYPE
Details : Right half of byte elements from 'in0' and 'in1' are
interleaved and written to 'out0'
*/
#define ILVRL_B2(RTYPE, in0, in1, out0, out1) \
{ \
out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
}
#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */
#endif // INCLUDE_LIBYUV_MACROS_MSA_H_