You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
3742 lines
149 KiB
3742 lines
149 KiB
3 days ago
|
use crate::helpers::{extract_mantissa, f16_hi, f16_lo, ldexp, nth, sign_ext, IEEEClass, VOPModifier};
|
||
|
use crate::helpers::DEBUG;
|
||
|
use crate::state::{Register, Value, VecDataStore, WaveValue, VGPR};
|
||
|
use crate::todo_instr;
|
||
|
use half::{bf16, f16};
|
||
|
use crate::rdna3::{Instruction, decode};
|
||
|
use num_traits::Float;
|
||
|
|
||
|
pub const SGPR_COUNT: usize = 128;
|
||
|
pub const VCC: usize = 106;
|
||
|
pub const EXEC: usize = 126;
|
||
|
pub const NULL_SRC: usize = 124;
|
||
|
pub const SGPR_SRC: usize = 105;
|
||
|
|
||
|
const VGPR_COUNT: usize = 256;
|
||
|
const SIMM_SRC: usize = 255;
|
||
|
|
||
|
pub const END_PRG: u32 = 0xbfb00000;
|
||
|
|
||
|
pub struct Thread<'a> {
|
||
|
pub scalar_reg: &'a mut [u32; SGPR_COUNT],
|
||
|
pub scc: &'a mut u32, // SCC is physically an sgpr, unclear which one
|
||
|
|
||
|
pub vec_reg: &'a mut VGPR,
|
||
|
pub vcc: &'a mut WaveValue,
|
||
|
pub exec: &'a mut WaveValue,
|
||
|
|
||
|
pub lds: &'a mut VecDataStore,
|
||
|
pub sds: &'a mut VecDataStore,
|
||
|
|
||
|
pub pc_offset: usize,
|
||
|
pub stream: Vec<u32>,
|
||
|
pub simm: Option<u32>,
|
||
|
pub sgpr_co: &'a mut Option<(usize, WaveValue)>,
|
||
|
pub warp_size: usize,
|
||
|
pub scalar: bool,
|
||
|
}
|
||
|
|
||
|
impl<'a> Thread<'a> {
|
||
|
pub fn interpret(&mut self) -> Result<(), i32> {
|
||
|
let instruction = self.stream[self.pc_offset];
|
||
|
let decoded = decode(self.stream[self.pc_offset], self.stream.get(self.pc_offset+1));
|
||
|
if *DEBUG {
|
||
|
print!("{:?}", decoded);
|
||
|
}
|
||
|
if let Instruction::SMEM { sbase, sdata, op, offset, soffset, .. } = decoded {
|
||
|
let _ = self.u64_instr();
|
||
|
let soffset: u32 = self.val(soffset as usize);
|
||
|
|
||
|
// TODO: refactor vcc_lo to store in scalar register 106
|
||
|
let base_addr = match sbase as usize {
|
||
|
VCC => ((self.scalar_reg[107] as u64) << 32) | self.vcc.value as u64,
|
||
|
s => self.scalar_reg.read64(s),
|
||
|
};
|
||
|
let addr = (base_addr as i64 + offset as i64 + soffset as i64) as u64;
|
||
|
|
||
|
match op {
|
||
|
0..=4 => (0..2_usize.pow(op as u32)).for_each(|i| {
|
||
|
let ret = unsafe { *((addr + (4 * i as u64)) as *const u32) };
|
||
|
self.write_to_sdst(sdata as usize + i, ret);
|
||
|
}),
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
self.scalar = true;
|
||
|
}
|
||
|
else if let Instruction::SOP1 { ssrc0, op, sdst } = decoded {
|
||
|
let src = ssrc0 as usize;
|
||
|
let sdst = sdst as usize;
|
||
|
|
||
|
match op {
|
||
|
1 => {
|
||
|
let s0 = self.val(src);
|
||
|
let ret = match op {
|
||
|
1 => s0,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
self.scalar_reg.write64(sdst as usize, ret);
|
||
|
}
|
||
|
_ => {
|
||
|
let s0 = self.val(src);
|
||
|
let ret = match op {
|
||
|
0 => s0,
|
||
|
10 => self.clz_i32_u32(s0),
|
||
|
12 => self.cls_i32(s0),
|
||
|
4 => s0.reverse_bits(),
|
||
|
14 => s0 as i8 as i32 as u32,
|
||
|
15 => s0 as i16 as i32 as u32,
|
||
|
16 | 18 => {
|
||
|
let sdst: u32 = self.val(sdst as usize);
|
||
|
if op == 16 {
|
||
|
sdst & !(1 << (s0 & 0x1f))
|
||
|
} else {
|
||
|
sdst | (1 << (s0 & 0x1f))
|
||
|
}
|
||
|
}
|
||
|
21 => {
|
||
|
let s0 = s0 as i32;
|
||
|
let ret = s0.abs();
|
||
|
*self.scc = (ret != 0) as u32;
|
||
|
ret as u32
|
||
|
}
|
||
|
30 => {
|
||
|
let ret = !s0;
|
||
|
*self.scc = (ret != 0) as u32;
|
||
|
ret
|
||
|
}
|
||
|
32 | 34 | 48 => {
|
||
|
let saveexec = self.exec.value;
|
||
|
self.exec.value = match op {
|
||
|
32 => s0 & saveexec,
|
||
|
34 => s0 | saveexec,
|
||
|
48 => s0 & !saveexec,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
*self.scc = (self.exec.value != 0) as u32;
|
||
|
saveexec
|
||
|
}
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
|
||
|
self.write_to_sdst(sdst, ret);
|
||
|
}
|
||
|
};
|
||
|
self.scalar = true;
|
||
|
}
|
||
|
else if let Instruction::SOPC { ssrc0, ssrc1, op } = decoded {
|
||
|
let s0 = ssrc0 as usize;
|
||
|
let s1 = ssrc1 as usize;
|
||
|
|
||
|
fn scmp<T>(s0: T, s1: T, offset: u8, op: u8) -> bool
|
||
|
where
|
||
|
T: PartialOrd + PartialEq,
|
||
|
{
|
||
|
match op - offset {
|
||
|
0 => s0 == s1,
|
||
|
1 => s0 != s1,
|
||
|
2 => s0 > s1,
|
||
|
3 => s0 >= s1,
|
||
|
4 => s0 < s1,
|
||
|
_ => s0 <= s1,
|
||
|
}
|
||
|
}
|
||
|
*self.scc = match op {
|
||
|
0..=5 => {
|
||
|
let (s0, s1): (u32, u32) = (self.val(s0), self.val(s1));
|
||
|
scmp(s0 as i32, s1 as i32, 0, op)
|
||
|
}
|
||
|
6..=11 => {
|
||
|
let (s0, s1): (u32, u32) = (self.val(s0), self.val(s1));
|
||
|
scmp(s0, s1, 6, op)
|
||
|
}
|
||
|
12 => {
|
||
|
let (s0, s1): (u32, u32) = (self.val(s0), self.val(s1));
|
||
|
s0 & (1 << (s1 & 0x1F)) == 0
|
||
|
}
|
||
|
13 => {
|
||
|
let (s0, s1): (u32, u32) = (self.val(s0), self.val(s1));
|
||
|
s0 & (1 << (s1 & 0x1F)) == 1
|
||
|
}
|
||
|
16 | 17 => {
|
||
|
let (s0, s1): (u64, u64) = (self.val(s0), self.val(s1));
|
||
|
scmp(s0, s1, 16, op)
|
||
|
}
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
} as u32;
|
||
|
self.scalar = true;
|
||
|
}
|
||
|
else if let Instruction::SOPP { simm16, op } = decoded {
|
||
|
|
||
|
match op {
|
||
|
32..=42 => {
|
||
|
let should_jump = match op {
|
||
|
32 => true,
|
||
|
33 => *self.scc == 0,
|
||
|
34 => *self.scc == 1,
|
||
|
35 => self.vcc.value == 0,
|
||
|
36 => self.vcc.value != 0,
|
||
|
37 => self.exec.value == 0,
|
||
|
38 => self.exec.value != 0,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
if should_jump {
|
||
|
self.pc_offset = (self.pc_offset as i64 + simm16 as i64) as usize;
|
||
|
}
|
||
|
}
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
self.scalar = true;
|
||
|
}
|
||
|
else if let Instruction::SOPK { simm16, sdst, op } = decoded {
|
||
|
let simm = simm16 as u16;
|
||
|
let sdst = sdst as usize;
|
||
|
let s0: u32 = self.val(sdst);
|
||
|
|
||
|
match op {
|
||
|
0 => self.write_to_sdst(sdst, simm as i16 as i32 as u32),
|
||
|
3..=8 => {
|
||
|
let s1 = simm as i16 as i64;
|
||
|
let s0 = s0 as i32 as i64;
|
||
|
*self.scc = match op {
|
||
|
3 => s0 == s1,
|
||
|
4 => s0 != s1,
|
||
|
5 => s0 > s1,
|
||
|
7 => s0 < s1,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
} as u32
|
||
|
}
|
||
|
9..=14 => {
|
||
|
let s1 = simm as u16 as u32;
|
||
|
*self.scc = match op {
|
||
|
9 => s0 == s1,
|
||
|
10 => s0 != s1,
|
||
|
11 => s0 > s1,
|
||
|
12 => s0 >= s1,
|
||
|
13 => s0 < s1,
|
||
|
14 => s0 <= s1,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
} as u32
|
||
|
}
|
||
|
15 => {
|
||
|
let temp = s0 as i32;
|
||
|
let simm16 = simm as i16;
|
||
|
let dest = (temp as i64 + simm16 as i64) as i32;
|
||
|
self.write_to_sdst(sdst, dest as u32);
|
||
|
let temp_sign = ((temp >> 31) & 1) as u32;
|
||
|
let simm_sign = ((simm16 >> 15) & 1) as u32;
|
||
|
let dest_sign = ((dest >> 31) & 1) as u32;
|
||
|
*self.scc = ((temp_sign == simm_sign) && (temp_sign != dest_sign)) as u32;
|
||
|
}
|
||
|
16 => {
|
||
|
let simm16 = simm as i16;
|
||
|
let ret = (s0 as i32 * simm16 as i32) as u32;
|
||
|
self.write_to_sdst(sdst, ret);
|
||
|
}
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
self.scalar = true;
|
||
|
}
|
||
|
else if let Instruction::SOP2 { ssrc0, ssrc1, sdst, op } = decoded {
|
||
|
let s0 = ssrc0 as usize;
|
||
|
let s1 = ssrc1 as usize;
|
||
|
let sdst = sdst as usize;
|
||
|
|
||
|
match op {
|
||
|
23 | 25 | 27 => {
|
||
|
let (s0, s1): (u64, u64) = (self.val(s0), self.val(s1));
|
||
|
let ret = match op {
|
||
|
23 => s0 & s1,
|
||
|
25 => s0 | s1,
|
||
|
27 => s0 ^ s1,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
self.scalar_reg.write64(sdst as usize, ret);
|
||
|
*self.scc = (ret != 0) as u32;
|
||
|
}
|
||
|
9 | 13 | 11 | 40 | 41 => {
|
||
|
let (s0, s1): (u64, u32) = (self.val(s0), self.val(s1));
|
||
|
let ret = match op {
|
||
|
9 => {
|
||
|
let ret = s0 << (s1 & 0x3f);
|
||
|
(ret, Some(ret != 0))
|
||
|
}
|
||
|
11 => {
|
||
|
let ret = s0 >> (s1 & 0x3f);
|
||
|
(ret as u64, Some(ret != 0))
|
||
|
}
|
||
|
13 => {
|
||
|
let ret = (s0 as i64) >> (s1 & 0x3f);
|
||
|
(ret as u64, Some(ret != 0))
|
||
|
}
|
||
|
40 => {
|
||
|
let ret = (s0 >> (s1 & 0x3f)) & ((1 << ((s1 >> 16) & 0x7f)) - 1);
|
||
|
(ret as u64, Some(ret != 0))
|
||
|
}
|
||
|
41 => {
|
||
|
let s0 = s0 as i64;
|
||
|
let mut ret = (s0 >> (s1 & 0x3f)) & ((1 << ((s1 >> 16) & 0x7f)) - 1);
|
||
|
let shift = 64 - ((s1 >> 16) & 0x7f);
|
||
|
ret = (ret << shift) >> shift;
|
||
|
(ret as u64, Some(ret != 0))
|
||
|
}
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
self.scalar_reg.write64(sdst as usize, ret.0);
|
||
|
if let Some(val) = ret.1 {
|
||
|
*self.scc = val as u32
|
||
|
}
|
||
|
}
|
||
|
_ => {
|
||
|
let (s0, s1): (u32, u32) = (self.val(s0), self.val(s1));
|
||
|
let ret = match op {
|
||
|
0 | 4 => {
|
||
|
let (s0, s1) = (s0 as u64, s1 as u64);
|
||
|
let ret = match op {
|
||
|
0 => s0 + s1,
|
||
|
4 => s0 + s1 + *self.scc as u64,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
(ret as u32, Some(ret >= 0x100000000))
|
||
|
}
|
||
|
1 => (s0 - s1, Some(s1 > s0)),
|
||
|
5 => (s0 - s1 - *self.scc, Some((s1 as u64 + *self.scc as u64) > s0 as u64)),
|
||
|
2 | 3 => {
|
||
|
let s0 = s0 as i32 as i64;
|
||
|
let s1 = s1 as i32 as i64;
|
||
|
let ret = match op {
|
||
|
2 => s0 + s1,
|
||
|
3 => s0 - s1,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
let overflow = (nth(s0 as u32, 31) == nth(s1 as u32, 31)) && (nth(s0 as u32, 31) != nth(ret as u32, 31));
|
||
|
|
||
|
(ret as i32 as u32, Some(overflow))
|
||
|
}
|
||
|
(8..=17) => {
|
||
|
let s1 = s1 & 0x1f;
|
||
|
let ret = match op {
|
||
|
8 => s0 << s1,
|
||
|
10 => s0 >> s1,
|
||
|
12 => ((s0 as i32) >> (s1 as i32)) as u32,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
(ret, Some(ret != 0))
|
||
|
}
|
||
|
(18..=21) => {
|
||
|
let scc = match op {
|
||
|
18 => (s0 as i32) < (s1 as i32),
|
||
|
19 => s0 < s1,
|
||
|
20 => (s0 as i32) > (s1 as i32),
|
||
|
21 => s0 > s1,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
let ret = match scc {
|
||
|
true => s0,
|
||
|
false => s1,
|
||
|
};
|
||
|
(ret, Some(scc))
|
||
|
}
|
||
|
(22..=26) | 34 | 36 => {
|
||
|
let ret = match op {
|
||
|
22 => s0 & s1,
|
||
|
24 => s0 | s1,
|
||
|
26 => s0 ^ s1,
|
||
|
34 => s0 & !s1,
|
||
|
36 => s0 | !s1,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
(ret, Some(ret != 0))
|
||
|
}
|
||
|
38 => {
|
||
|
let ret = (s0 >> (s1 & 0x1f)) & ((1 << ((s1 >> 16) & 0x7f)) - 1);
|
||
|
(ret, Some(ret != 0))
|
||
|
}
|
||
|
39 => {
|
||
|
let s0 = s0 as i32;
|
||
|
let mut ret = (s0 >> (s1 & 0x1f)) & ((1 << ((s1 >> 16) & 0x1f)) - 1);
|
||
|
let shift = 32 - ((s1 >> 16) & 0x7f);
|
||
|
ret = (ret << shift) >> shift;
|
||
|
(ret as u32, Some(ret != 0))
|
||
|
}
|
||
|
44 => (((s0 as i32) * (s1 as i32)) as u32, None),
|
||
|
45 => (((s0 as u64) * (s1 as u64) >> 32) as u32, None),
|
||
|
46 => ((((s0 as i32 as i64 * s1 as i32 as i64) as u64) >> 32u64) as i32 as u32, None),
|
||
|
48 => match *self.scc != 0 {
|
||
|
true => (s0, None),
|
||
|
false => (s1, None),
|
||
|
},
|
||
|
50..=53 => {
|
||
|
let (s0, s1) = match op {
|
||
|
50 => (s0 as u16, s1 as u16),
|
||
|
51 => (s0 as u16, (s1 >> 16) as u16),
|
||
|
52 => ((s0 >> 16) as u16, (s1 >> 16) as u16),
|
||
|
_ => ((s0 >> 16) as u16, s1 as u16),
|
||
|
};
|
||
|
(((s1 as u32) << 16) | (s0 as u32), None)
|
||
|
}
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
|
||
|
self.write_to_sdst(sdst, ret.0);
|
||
|
if let Some(val) = ret.1 {
|
||
|
*self.scc = val as u32
|
||
|
}
|
||
|
}
|
||
|
};
|
||
|
self.scalar = true;
|
||
|
}
|
||
|
// vopp
|
||
|
else if instruction >> 24 == 0b11001100 {
|
||
|
let instr = self.u64_instr();
|
||
|
let vdst = (instr & 0xff) as usize;
|
||
|
let clmp = (instr >> 15) & 0x1;
|
||
|
assert_eq!([clmp], [0]);
|
||
|
let op = (instr >> 16) & 0x7f;
|
||
|
|
||
|
let mut src = |x: usize| -> (u16, u16, u32) {
|
||
|
let val: u32 = self.val(x);
|
||
|
match x {
|
||
|
255 => {
|
||
|
let val_lo: u16 = self.val(x);
|
||
|
(val_lo, val_lo, val)
|
||
|
}
|
||
|
(240..=247) => {
|
||
|
let val_lo: u16 = self.val(x);
|
||
|
(val_lo, f16::from_bits(0).to_bits(), val)
|
||
|
}
|
||
|
_ => ((val & 0xffff) as u16, ((val >> 16) & 0xffff) as u16, val),
|
||
|
}
|
||
|
};
|
||
|
|
||
|
let s = [32, 41, 50].iter().map(|x| ((instr >> x) & 0x1ff) as usize).collect::<Vec<_>>();
|
||
|
let src_parts = s.iter().map(|x| src(*x)).collect::<Vec<_>>();
|
||
|
|
||
|
let b = |i: usize| (instr >> i) & 0x1 != 0;
|
||
|
let neg_hi = ((instr >> 8) & 0x7) as usize;
|
||
|
let neg = ((instr >> 61) & 0x7) as usize;
|
||
|
let opsel = [b(11), b(12), b(13)];
|
||
|
let opsel_hi = [b(59), b(60), b(14)];
|
||
|
|
||
|
match op {
|
||
|
0..=18 => {
|
||
|
let fxn = |x, y, z| -> u16 {
|
||
|
match op {
|
||
|
0 => x * y + z,
|
||
|
1 => x * y,
|
||
|
2 => x + y,
|
||
|
3 => x - y,
|
||
|
4 => y << (x & 0xf),
|
||
|
5 => y >> (x & 0xf),
|
||
|
6 => ((y as i16) >> ((x as i16) & 0xf)) as u16,
|
||
|
7 => i16::max(x as i16, y as i16) as u16,
|
||
|
8 => i16::min(x as i16, y as i16) as u16,
|
||
|
9 => x * y + z,
|
||
|
10 => x + y,
|
||
|
11 => x - y,
|
||
|
12 => u16::max(x, y),
|
||
|
13 => u16::min(x, y),
|
||
|
_ => {
|
||
|
let (x, y, z) = (f16::from_bits(x), f16::from_bits(y), f16::from_bits(z));
|
||
|
let ret = match op {
|
||
|
14 => f16::mul_add(x, y, z),
|
||
|
15 => x + y,
|
||
|
16 => x * y,
|
||
|
17 => f16::min(x, y),
|
||
|
18 => f16::max(x, y),
|
||
|
_ => unreachable!("op should be in range 0..=18, got {op}"),
|
||
|
};
|
||
|
ret.to_bits()
|
||
|
}
|
||
|
}
|
||
|
};
|
||
|
let src = |opsel: [bool; 3]| {
|
||
|
opsel
|
||
|
.iter()
|
||
|
.enumerate()
|
||
|
.map(|(i, sel)| {
|
||
|
if (14..=19).contains(&op) {
|
||
|
let half = |x, n| f16::from_bits(x).negate(i, n).to_bits();
|
||
|
match sel {
|
||
|
true => half(src_parts[i].1, neg),
|
||
|
false => half(src_parts[i].0, neg_hi),
|
||
|
}
|
||
|
} else {
|
||
|
match sel {
|
||
|
true => src_parts[i].1,
|
||
|
false => src_parts[i].0,
|
||
|
}
|
||
|
}
|
||
|
})
|
||
|
.collect::<Vec<u16>>()
|
||
|
};
|
||
|
let (src_hi, src_lo) = (src(opsel_hi), src(opsel));
|
||
|
let ret = ((fxn(src_hi[0], src_hi[1], src_hi[2]) as u32) << 16) | (fxn(src_lo[0], src_lo[1], src_lo[2]) as u32);
|
||
|
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg[vdst] = ret;
|
||
|
}
|
||
|
}
|
||
|
32..=34 => {
|
||
|
let src: Vec<f32> = src_parts
|
||
|
.iter()
|
||
|
.enumerate()
|
||
|
.map(|(i, (lo, hi, full))| {
|
||
|
if !opsel_hi[i] {
|
||
|
f32::from_bits(*full).absolute(i, neg_hi)
|
||
|
} else if opsel[i] {
|
||
|
f32::from(f16::from_bits(*hi)).absolute(i, neg_hi)
|
||
|
} else {
|
||
|
f32::from(f16::from_bits(*lo)).absolute(i, neg_hi)
|
||
|
}
|
||
|
})
|
||
|
.collect();
|
||
|
let ret = match op {
|
||
|
32 => f32::mul_add(src[0], src[1], src[2]).to_bits(),
|
||
|
33 | 34 => {
|
||
|
let ret = f16::from_f32(f32::mul_add(src[0], src[1], src[2])).to_bits();
|
||
|
match op {
|
||
|
33 => (self.vec_reg[vdst] & 0xffff0000) | (ret as u32),
|
||
|
34 => (self.vec_reg[vdst] & 0x0000ffff) | ((ret as u32) << 16),
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
}
|
||
|
}
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg[vdst] = ret;
|
||
|
}
|
||
|
}
|
||
|
64..=69 => {
|
||
|
match op {
|
||
|
64 => {
|
||
|
let a = self.wmma_b16_16x16(s[0]).map(|v| f16::from_bits(v).to_f32());
|
||
|
let b = self.wmma_b16_16x16(s[1]).map(|v| f16::from_bits(v).to_f32());
|
||
|
let c = self.wmma_b32_16x16(s[2]).map(|v| f32::from_bits(v));
|
||
|
let ret = wmma(a.collect(), b.collect(), c.collect());
|
||
|
for (i, val) in ret.into_iter().enumerate() {
|
||
|
let lane = i % 32;
|
||
|
self.vec_reg.get_lane_mut(lane)[(i / 32) + vdst] = val.to_bits();
|
||
|
}
|
||
|
}
|
||
|
65 => {
|
||
|
let a = self.wmma_b16_16x16(s[0]).map(|v| bf16::from_bits(v).to_f32());
|
||
|
let b = self.wmma_b16_16x16(s[1]).map(|v| bf16::from_bits(v).to_f32());
|
||
|
let c = self.wmma_b32_16x16(s[2]).map(|v| f32::from_bits(v));
|
||
|
let ret = wmma(a.collect(), b.collect(), c.collect());
|
||
|
for (i, val) in ret.into_iter().enumerate() {
|
||
|
let register = (i / 32) + vdst;
|
||
|
let lane = i % 32;
|
||
|
self.vec_reg.get_lane_mut(lane)[register] = val.to_bits()
|
||
|
}
|
||
|
}
|
||
|
66 => {
|
||
|
let a = self.wmma_b16_16x16(s[0]).map(|v| f16::from_bits(v));
|
||
|
let b = self.wmma_b16_16x16(s[1]).map(|v| f16::from_bits(v));
|
||
|
let c = self.wmma_b32_16x16(s[2]).map(|v| f16::from_bits(v as u16));
|
||
|
let ret = wmma(a.collect(), b.collect(), c.collect());
|
||
|
for (i, val) in ret.into_iter().enumerate() {
|
||
|
let register = (i / 32) + vdst;
|
||
|
let lane = i % 32;
|
||
|
self.vec_reg.get_lane_mut(lane)[register].mut_lo16(val.to_bits());
|
||
|
}
|
||
|
}
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
self.scalar = true;
|
||
|
}
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
}
|
||
|
}
|
||
|
else if let Instruction::VOP1 { src, op, vdst } = decoded {
|
||
|
let s0 = src as usize;
|
||
|
let vdst = vdst as usize;
|
||
|
|
||
|
match op {
|
||
|
3 | 15 | 21 | 23 | 25 | 26 | 60 | 61 | 47 | 49 => {
|
||
|
let s0: u64 = self.val(s0);
|
||
|
match op {
|
||
|
3 | 15 | 21 | 23 | 25 | 26 | 60 | 61 | 47 | 49 => {
|
||
|
let s0 = f64::from_bits(s0);
|
||
|
match op {
|
||
|
23 | 25 | 26 | 61 | 47 | 49 => {
|
||
|
let ret = match op {
|
||
|
23 => f64::trunc(s0),
|
||
|
25 => {
|
||
|
let mut temp = f64::floor(s0 + 0.5);
|
||
|
if f64::floor(s0) % 2.0 != 0.0 && f64::fract(s0) == 0.5 {
|
||
|
temp -= 1.0;
|
||
|
}
|
||
|
temp
|
||
|
}
|
||
|
26 => f64::floor(s0),
|
||
|
47 => 1.0 / s0,
|
||
|
49 => 1.0 / f64::sqrt(s0),
|
||
|
61 => extract_mantissa(s0),
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg.write64(vdst, ret.to_bits())
|
||
|
}
|
||
|
}
|
||
|
_ => {
|
||
|
let ret = match op {
|
||
|
3 => s0 as i32 as u32,
|
||
|
15 => (s0 as f32).to_bits(),
|
||
|
21 => s0 as u32,
|
||
|
60 => match (s0 == f64::INFINITY) || (s0 == f64::NEG_INFINITY) || s0.is_nan() {
|
||
|
true => 0,
|
||
|
false => (s0.exponent() as i32 - 1023 + 1) as u32,
|
||
|
},
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg[vdst] = ret;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
}
|
||
|
}
|
||
|
84..=97 => {
|
||
|
let s0 = f16::from_bits(self.val(s0));
|
||
|
let ret = match op {
|
||
|
84 => f16::recip(s0),
|
||
|
85 => f16::sqrt(s0),
|
||
|
87 => f16::log2(s0),
|
||
|
88 => f16::exp2(s0),
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg[vdst] = ret.to_bits() as u32;
|
||
|
}
|
||
|
}
|
||
|
_ => {
|
||
|
let s0: u32 = self.val(s0);
|
||
|
match op {
|
||
|
4 | 16 | 22 => {
|
||
|
let ret = match op {
|
||
|
4 => (s0 as i32 as f64).to_bits(),
|
||
|
22 => (s0 as f64).to_bits(),
|
||
|
16 => (f32::from_bits(s0) as f64).to_bits(),
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg.write64(vdst, ret)
|
||
|
}
|
||
|
}
|
||
|
2 => {
|
||
|
let idx = self.exec.value.trailing_zeros() as usize;
|
||
|
self.scalar_reg[vdst] = self.vec_reg.get_lane(idx)[(instruction & 0x1ff) as usize - VGPR_COUNT];
|
||
|
}
|
||
|
_ => {
|
||
|
let ret = match op {
|
||
|
1 => s0,
|
||
|
5 => (s0 as i32 as f32).to_bits(),
|
||
|
6 => (s0 as f32).to_bits(),
|
||
|
7 => f32::from_bits(s0) as u32,
|
||
|
8 => f32::from_bits(s0) as i32 as u32,
|
||
|
10 => f16::from_f32(f32::from_bits(s0)).to_bits() as u32,
|
||
|
11 => f32::from(f16::from_bits(s0 as u16)).to_bits(),
|
||
|
17 => ((s0 & 0xff) as f32).to_bits(),
|
||
|
18 => (((s0 >> 8) & 0xff) as f32).to_bits(),
|
||
|
19 => (((s0 >> 16) & 0xff) as f32).to_bits(),
|
||
|
20 => (((s0 >> 24) & 0xff) as f32).to_bits(),
|
||
|
56 => s0.reverse_bits(),
|
||
|
57 => self.clz_i32_u32(s0),
|
||
|
33..=51 => {
|
||
|
let s0 = f32::from_bits(s0);
|
||
|
match op {
|
||
|
33 => s0.trunc(),
|
||
|
34 => {
|
||
|
let mut d0 = s0.trunc();
|
||
|
if s0 > 0.0 && s0 != d0 {
|
||
|
d0 += 1.0;
|
||
|
}
|
||
|
d0
|
||
|
}
|
||
|
35 => {
|
||
|
let mut temp = f32::floor(s0 + 0.5);
|
||
|
if f32::floor(s0) % 2.0 != 0.0 && f32::fract(s0) == 0.5 {
|
||
|
temp -= 1.0;
|
||
|
}
|
||
|
temp
|
||
|
}
|
||
|
36 => {
|
||
|
let mut d0 = s0.trunc();
|
||
|
if s0 < 0.0 && s0 != d0 {
|
||
|
d0 -= 1.0;
|
||
|
}
|
||
|
d0
|
||
|
}
|
||
|
37 => f32::exp2(s0),
|
||
|
39 => f32::log2(s0),
|
||
|
42 => 1.0 / s0,
|
||
|
43 => 1.0 / s0,
|
||
|
46 => 1.0 / f32::sqrt(s0),
|
||
|
51 => f32::sqrt(s0),
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
}
|
||
|
.to_bits()
|
||
|
}
|
||
|
55 => !s0,
|
||
|
59 => self.cls_i32(s0),
|
||
|
80 => f16::from_f32(s0 as u16 as f32).to_bits() as u32,
|
||
|
81 => f16::from_f32(s0 as i16 as f32).to_bits() as u32,
|
||
|
82 => f32::from(f16::from_bits(s0 as u16)) as u32,
|
||
|
83 => f32::from(f16::from_bits(s0 as u16)) as i16 as u32,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg[vdst] = ret;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// vopd
|
||
|
else if instruction >> 26 == 0b110010 {
|
||
|
let instr = self.u64_instr();
|
||
|
let sx = instr & 0x1ff;
|
||
|
let vx = (instr >> 9) & 0xff;
|
||
|
let srcx0 = self.val(sx as usize);
|
||
|
let vsrcx1 = self.vec_reg[(vx) as usize] as u32;
|
||
|
let opy = (instr >> 17) & 0x1f;
|
||
|
let sy = (instr >> 32) & 0x1ff;
|
||
|
let vy = (instr >> 41) & 0xff;
|
||
|
let opx = (instr >> 22) & 0xf;
|
||
|
let srcy0 = match sy {
|
||
|
255 => match sx {
|
||
|
255 => srcx0,
|
||
|
_ => self.val(sy as usize),
|
||
|
},
|
||
|
_ => self.val(sy as usize),
|
||
|
};
|
||
|
let vsrcy1 = self.vec_reg[(vy) as usize];
|
||
|
|
||
|
let vdstx = ((instr >> 56) & 0xff) as usize;
|
||
|
// LSB is the opposite of VDSTX[0]
|
||
|
let vdsty = (((instr >> 49) & 0x7f) << 1 | ((vdstx as u64 & 1) ^ 1)) as usize;
|
||
|
|
||
|
for (op, s0, s1, dst) in ([(opx, srcx0, vsrcx1, vdstx), (opy, srcy0, vsrcy1, vdsty)]).iter() {
|
||
|
let ret = match *op {
|
||
|
0 | 1 | 2 | 3 | 4 | 5 | 6 | 10 | 11 => {
|
||
|
let s0 = f32::from_bits(*s0 as u32);
|
||
|
let s1 = f32::from_bits(*s1 as u32);
|
||
|
match *op {
|
||
|
0 => f32::mul_add(s0, s1, f32::from_bits(self.vec_reg[*dst])),
|
||
|
1 => f32::mul_add(s0, s1, f32::from_bits(self.val(SIMM_SRC))),
|
||
|
2 => f32::mul_add(s0, f32::from_bits(self.val(SIMM_SRC)), s1),
|
||
|
3 => s0 * s1,
|
||
|
4 => s0 + s1,
|
||
|
5 => s0 - s1,
|
||
|
6 => s1 - s0,
|
||
|
10 => f32::max(s0, s1),
|
||
|
11 => f32::min(s0, s1),
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
}
|
||
|
.to_bits()
|
||
|
}
|
||
|
8 => *s0,
|
||
|
9 => match self.vcc.read() {
|
||
|
true => *s1,
|
||
|
false => *s0,
|
||
|
},
|
||
|
16 => s0 + s1,
|
||
|
17 => s1 << s0,
|
||
|
18 => s0 & s1,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg[*dst] = ret;
|
||
|
};
|
||
|
}
|
||
|
}
|
||
|
// vopc
|
||
|
else if let Instruction::VOPC { vsrc, src, op } = decoded {
|
||
|
let s0 = src as usize;
|
||
|
let s1 = vsrc as usize;
|
||
|
let op = op as u32;
|
||
|
|
||
|
let dest_offset = if op >= 128 { 128 } else { 0 };
|
||
|
let ret = match op {
|
||
|
(0..=15) | 125 | (128..=143) => {
|
||
|
let s0 = f16::from_bits(self.val(s0));
|
||
|
let s1 = f16::from_bits(self.vec_reg[s1] as u16);
|
||
|
match op {
|
||
|
125 => self.cmp_class_f16(s0, s1.to_bits()),
|
||
|
_ => self.cmpf(s0, s1, op - dest_offset),
|
||
|
}
|
||
|
}
|
||
|
(16..=31) | 126 | (144..=159) => {
|
||
|
let s0 = f32::from_bits(self.val(s0));
|
||
|
let s1 = f32::from_bits(self.vec_reg[s1]);
|
||
|
match op {
|
||
|
126 => self.cmp_class_f32(s0, s1.to_bits()),
|
||
|
_ => self.cmpf(s0, s1, op - 16 - dest_offset),
|
||
|
}
|
||
|
}
|
||
|
(32..=47) | 127 | (160..=174) => {
|
||
|
let s0 = self.val(s0);
|
||
|
match op {
|
||
|
127 => {
|
||
|
let s1 = self.val(s1);
|
||
|
self.cmp_class_f64(s0, s1)
|
||
|
}
|
||
|
_ => {
|
||
|
let s1 = f64::from_bits(self.vec_reg.read64(s1));
|
||
|
self.cmpf(s0, s1, op - 32 - dest_offset)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
(49..=54) | (177..=182) => {
|
||
|
let (s0, s1): (u16, u16) = (self.val(s0), self.vec_reg[s1] as u16);
|
||
|
self.cmpi(s0 as i16, s1 as i16, op - 48 - dest_offset)
|
||
|
}
|
||
|
(57..=62) | (185..=190) => {
|
||
|
let (s0, s1): (u16, u16) = (self.val(s0), self.vec_reg[s1] as u16);
|
||
|
self.cmpi(s0, s1, op - 56 - dest_offset)
|
||
|
}
|
||
|
(64..=71) | (192..=199) => {
|
||
|
let (s0, s1): (u32, u32) = (self.val(s0), self.vec_reg[s1]);
|
||
|
self.cmpi(s0 as i32, s1 as i32, op - 64 - dest_offset)
|
||
|
}
|
||
|
(72..=79) | (200..=207) => {
|
||
|
let (s0, s1): (u32, u32) = (self.val(s0), self.vec_reg[s1]);
|
||
|
self.cmpi(s0, s1, op - 72 - dest_offset)
|
||
|
}
|
||
|
(80..=87) | (208..=215) => {
|
||
|
let (s0, s1): (u64, u64) = (self.val(s0), self.vec_reg.read64(s1));
|
||
|
self.cmpi(s0 as i64, s1 as i64, op - 80 - dest_offset)
|
||
|
}
|
||
|
(88..=95) | (216..=223) => {
|
||
|
let (s0, s1): (u64, u64) = (self.val(s0), self.vec_reg.read64(s1));
|
||
|
self.cmpi(s0, s1, op - 88 - dest_offset)
|
||
|
}
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
|
||
|
if self.exec.read() {
|
||
|
match op >= 128 {
|
||
|
true => self.exec.set_lane(ret),
|
||
|
false => self.vcc.set_lane(ret),
|
||
|
};
|
||
|
}
|
||
|
}
|
||
|
else if let Instruction::VOP2 { vsrc, src, vdst, op } = decoded {
|
||
|
let s0 = src as usize;
|
||
|
let s1 = self.vec_reg[vsrc as usize];
|
||
|
let vdst = vdst as usize;
|
||
|
|
||
|
match op {
|
||
|
(50..=60) => {
|
||
|
let (s0, s1) = (f16::from_bits(self.val(s0)), f16::from_bits(s1 as u16));
|
||
|
let ret = match op {
|
||
|
50 => s0 + s1,
|
||
|
51 => s0 - s1,
|
||
|
53 => s0 * s1,
|
||
|
54 => f16::mul_add(s0, s1, f16::from_bits(self.vec_reg[vdst] as u16)),
|
||
|
55 => f16::mul_add(s0, f16::from_bits(self.val(SIMM_SRC)), s1),
|
||
|
56 => f16::mul_add(s0, s1, f16::from_bits(self.val(SIMM_SRC))),
|
||
|
57 => f16::max(s0, s1),
|
||
|
58 => f16::min(s0, s1),
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg[vdst] = ret.to_bits() as u32;
|
||
|
}
|
||
|
}
|
||
|
_ => {
|
||
|
let s0 = self.val(s0);
|
||
|
let ret = match op {
|
||
|
1 => match self.vcc.read() {
|
||
|
true => s1,
|
||
|
false => s0,
|
||
|
},
|
||
|
2 => {
|
||
|
let mut acc = f32::from_bits(self.vec_reg[vdst]);
|
||
|
acc += f32::from(f16_lo(s0)) * f32::from(f16_lo(s1));
|
||
|
acc += f32::from(f16_hi(s0)) * f32::from(f16_hi(s1));
|
||
|
acc.to_bits()
|
||
|
}
|
||
|
|
||
|
3 | 4 | 5 | 8 | 15 | 16 | 43 | 44 | 45 => {
|
||
|
let (s0, s1) = (f32::from_bits(s0), f32::from_bits(s1));
|
||
|
match op {
|
||
|
3 => s0 + s1,
|
||
|
4 => s0 - s1,
|
||
|
5 => s1 - s0,
|
||
|
8 => s0 * s1,
|
||
|
15 => f32::min(s0, s1),
|
||
|
16 => f32::max(s0, s1),
|
||
|
43 => f32::mul_add(s0, s1, f32::from_bits(self.vec_reg[vdst])),
|
||
|
44 => f32::mul_add(s0, f32::from_bits(self.val(SIMM_SRC)), s1),
|
||
|
45 => f32::mul_add(s0, s1, f32::from_bits(self.val(SIMM_SRC))),
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
}
|
||
|
.to_bits()
|
||
|
}
|
||
|
9 => {
|
||
|
let s0 = sign_ext((s0 & 0xffffff) as u64, 24) as i32;
|
||
|
let s1 = sign_ext((s1 & 0xffffff) as u64, 24) as i32;
|
||
|
(s0 * s1) as u32
|
||
|
}
|
||
|
17 | 18 | 26 => {
|
||
|
let (s0, s1) = (s0 as i32, s1 as i32);
|
||
|
(match op {
|
||
|
17 => i32::min(s0, s1),
|
||
|
18 => i32::max(s0, s1),
|
||
|
26 => s1 >> s0,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
}) as u32
|
||
|
}
|
||
|
32 => {
|
||
|
let temp = s0 as u64 + s1 as u64 + self.vcc.read() as u64;
|
||
|
self.vcc.set_lane(temp >= 0x100000000);
|
||
|
temp as u32
|
||
|
}
|
||
|
33 | 34 => {
|
||
|
let temp = match op {
|
||
|
33 => s0 - s1 - self.vcc.read() as u32,
|
||
|
34 => s1 - s0 - self.vcc.read() as u32,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
self.vcc.set_lane((s1 as u64 + self.vcc.read() as u64) > s0 as u64);
|
||
|
temp
|
||
|
}
|
||
|
11 => s0 * s1,
|
||
|
19 => u32::min(s0, s1),
|
||
|
20 => u32::max(s0, s1),
|
||
|
24 => s1 << s0,
|
||
|
25 => s1 >> s0,
|
||
|
27 => s0 & s1,
|
||
|
28 => s0 | s1,
|
||
|
29 => s0 ^ s1,
|
||
|
37 => s0 + s1,
|
||
|
38 => s0 - s1,
|
||
|
39 => s1 - s0,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg[vdst] = ret;
|
||
|
}
|
||
|
}
|
||
|
};
|
||
|
}
|
||
|
// vop3
|
||
|
else if instruction >> 26 == 0b110101 {
|
||
|
let instr = self.u64_instr();
|
||
|
|
||
|
let op = ((instr >> 16) & 0x3ff) as u32;
|
||
|
match op {
|
||
|
764 | 765 | 288 | 289 | 290 | 766 | 768 | 769 => {
|
||
|
let vdst = (instr & 0xff) as usize;
|
||
|
let sdst = ((instr >> 8) & 0x7f) as usize;
|
||
|
let f = |i: u32| -> usize { ((instr >> i) & 0x1ff) as usize };
|
||
|
let (s0, s1, s2) = (f(32), f(41), f(50));
|
||
|
let mut carry_in = WaveValue::new(self.val(s2), self.warp_size);
|
||
|
carry_in.default_lane = self.vcc.default_lane;
|
||
|
let omod = (instr >> 59) & 0x3;
|
||
|
let _neg = (instr >> 61) & 0x7;
|
||
|
let clmp = (instr >> 15) & 0x1;
|
||
|
assert_eq!(omod, 0);
|
||
|
assert_eq!(clmp, 0);
|
||
|
|
||
|
let vcc = match op {
|
||
|
766 => {
|
||
|
let (s0, s1, s2): (u32, u32, u64) = (self.val(s0), self.val(s1), self.val(s2));
|
||
|
let (mul_result, overflow_mul) = (s0 as u64).overflowing_mul(s1 as u64);
|
||
|
let (ret, overflow_add) = mul_result.overflowing_add(s2);
|
||
|
let overflowed = overflow_mul || overflow_add;
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg.write64(vdst, ret);
|
||
|
}
|
||
|
overflowed
|
||
|
}
|
||
|
765 => {
|
||
|
assert!(f64::from_bits(self.val(s2)).exponent() <= 1076);
|
||
|
let ret = ldexp(self.val(s0), 128);
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg.write64(vdst, ret.to_bits());
|
||
|
}
|
||
|
false
|
||
|
}
|
||
|
_ => {
|
||
|
let (s0, s1, _s2): (u32, u32, u32) = (self.val(s0), self.val(s1), self.val(s2));
|
||
|
let (ret, vcc) = match op {
|
||
|
288 => {
|
||
|
let ret = s0 as u64 + s1 as u64 + carry_in.read() as u64;
|
||
|
(ret as u32, ret >= 0x100000000)
|
||
|
}
|
||
|
289 => {
|
||
|
let ret = (s0 as u64).wrapping_sub(s1 as u64).wrapping_sub(carry_in.read() as u64);
|
||
|
(ret as u32, s1 as u64 + (carry_in.read() as u64) > s0 as u64)
|
||
|
}
|
||
|
290 => {
|
||
|
let ret = (s1 as u64).wrapping_sub(s0 as u64).wrapping_sub(carry_in.read() as u64);
|
||
|
(ret as u32, s1 as u64 + (carry_in.read() as u64) > s0 as u64)
|
||
|
}
|
||
|
764 => (0, false), // NOTE: div scaling isn't required
|
||
|
768 => {
|
||
|
let ret = s0 as u64 + s1 as u64;
|
||
|
(ret as u32, ret >= 0x100000000)
|
||
|
}
|
||
|
769 => {
|
||
|
let ret = s0.wrapping_sub(s1);
|
||
|
(ret as u32, s1 > s0)
|
||
|
}
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg[vdst] = ret;
|
||
|
}
|
||
|
vcc
|
||
|
}
|
||
|
};
|
||
|
|
||
|
match sdst {
|
||
|
VCC => self.vcc.set_lane(vcc),
|
||
|
NULL_SRC => {}
|
||
|
_ => self.set_sgpr_co(sdst, vcc),
|
||
|
}
|
||
|
}
|
||
|
_ => {
|
||
|
let vdst = (instr & 0xff) as usize;
|
||
|
let abs = ((instr >> 8) & 0x7) as usize;
|
||
|
let opsel = ((instr >> 11) & 0xf) as usize;
|
||
|
let cm = (instr >> 15) & 0x1;
|
||
|
|
||
|
let s = |n: usize| ((instr >> n) & 0x1ff) as usize;
|
||
|
let src = (s(32), s(41), s(50));
|
||
|
|
||
|
let omod = (instr >> 59) & 0x3;
|
||
|
let neg = ((instr >> 61) & 0x7) as usize;
|
||
|
assert_eq!(omod, 0);
|
||
|
assert_eq!(cm, 0);
|
||
|
assert_eq!(opsel, 0);
|
||
|
|
||
|
match op {
|
||
|
// VOPC using VOP3 encoding
|
||
|
0..=255 => {
|
||
|
let dest_offset = if op >= 128 { 128 } else { 0 };
|
||
|
let ret = match op {
|
||
|
(0..=15) | 125 | (128..=143) => {
|
||
|
let (s0, s1) = (self.val(src.0), self.val(src.1));
|
||
|
let s0 = f16::from_bits(s0).negate(0, neg).absolute(0, abs);
|
||
|
let s1 = f16::from_bits(s1).negate(1, neg).absolute(1, abs);
|
||
|
match op {
|
||
|
125 => self.cmp_class_f16(s0, s1.to_bits()),
|
||
|
_ => self.cmpf(s0, s1, op - dest_offset),
|
||
|
}
|
||
|
}
|
||
|
(16..=31) | 126 | (144..=159) => {
|
||
|
let (s0, s1) = (self.val(src.0), self.val(src.1));
|
||
|
let s0 = f32::from_bits(s0).negate(0, neg).absolute(0, abs);
|
||
|
let s1 = f32::from_bits(s1).negate(1, neg).absolute(1, abs);
|
||
|
match op {
|
||
|
126 => self.cmp_class_f32(s0, s1.to_bits()),
|
||
|
_ => self.cmpf(s0, s1, op - 16 - dest_offset),
|
||
|
}
|
||
|
}
|
||
|
(32..=47) | 127 | (160..=174) => {
|
||
|
let s0: f64 = self.val(src.0);
|
||
|
let s0 = s0.negate(0, neg).absolute(0, abs);
|
||
|
match op {
|
||
|
127 => {
|
||
|
let s1 = self.val(src.1);
|
||
|
self.cmp_class_f64(s0, s1)
|
||
|
}
|
||
|
_ => {
|
||
|
let s1 = self.val(src.1);
|
||
|
let s1 = f64::from_bits(s1).negate(1, neg).absolute(1, abs);
|
||
|
self.cmpf(s0, s1, op - 32 - dest_offset)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
(49..=54) | (177..=182) => {
|
||
|
let (s0, s1): (u16, u16) = (self.val(src.0), self.val(src.1));
|
||
|
self.cmpi(s0 as i16, s1 as i16, op - 48 - dest_offset)
|
||
|
}
|
||
|
(57..=62) | (185..=190) => {
|
||
|
let (s0, s1): (u16, u16) = (self.val(src.0), self.val(src.1));
|
||
|
self.cmpi(s0, s1, op - 56 - dest_offset)
|
||
|
}
|
||
|
(64..=71) | (192..=199) => {
|
||
|
let (s0, s1): (u32, u32) = (self.val(src.0), self.val(src.1));
|
||
|
self.cmpi(s0 as i32, s1 as i32, op - 64 - dest_offset)
|
||
|
}
|
||
|
(72..=79) | (200..=207) => {
|
||
|
let (s0, s1): (u32, u32) = (self.val(src.0), self.val(src.1));
|
||
|
self.cmpi(s0, s1, op - 72 - dest_offset)
|
||
|
}
|
||
|
(80..=87) | (208..=215) => {
|
||
|
let (s0, s1): (u64, u64) = (self.val(src.0), self.val(src.1));
|
||
|
self.cmpi(s0 as i64, s1 as i64, op - 80 - dest_offset)
|
||
|
}
|
||
|
(88..=95) | (216..=223) => {
|
||
|
let (s0, s1): (u64, u64) = (self.val(src.0), self.val(src.1));
|
||
|
self.cmpi(s0, s1, op - 88 - dest_offset)
|
||
|
}
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
|
||
|
if self.exec.read() {
|
||
|
match vdst {
|
||
|
0..=SGPR_SRC | 107 => self.set_sgpr_co(vdst, ret),
|
||
|
VCC => self.vcc.set_lane(ret),
|
||
|
EXEC => self.exec.set_lane(ret),
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
828..=830 => {
|
||
|
let (s0, s1, _s2): (u32, u64, u64) = (self.val(src.0), self.val(src.1), self.val(src.2));
|
||
|
let shift = s0 & 0x3f;
|
||
|
let ret = match op {
|
||
|
828 => s1 << shift,
|
||
|
829 => s1 >> shift,
|
||
|
830 => ((s1 as i64) >> shift) as u64,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg.write64(vdst, ret)
|
||
|
}
|
||
|
}
|
||
|
407 | 532 | 552 | 568 | (807..=811) => {
|
||
|
let (_s0, _s1, _s2): (f64, f64, f64) = (self.val(src.0), self.val(src.1), self.val(src.2));
|
||
|
let s0 = _s0.negate(0, neg).absolute(0, abs);
|
||
|
let s1 = _s1.negate(1, neg).absolute(1, abs);
|
||
|
let s2 = _s2.negate(2, neg).absolute(2, abs);
|
||
|
let ret = match op {
|
||
|
407 => f64::trunc(s0),
|
||
|
532 => f64::mul_add(s0, s1, s2),
|
||
|
552 => {
|
||
|
assert!(s0.is_normal());
|
||
|
s0
|
||
|
}
|
||
|
807 => s0 + s1,
|
||
|
808 => s0 * s1,
|
||
|
809 => f64::min(s0, s1),
|
||
|
810 => f64::max(s0, s1),
|
||
|
811 => {
|
||
|
let s1: u32 = self.val(src.1);
|
||
|
s0 * 2f64.powi(s1 as i32)
|
||
|
}
|
||
|
568 => {
|
||
|
assert!(!self.vcc.read());
|
||
|
f64::mul_add(s0, s1, s2)
|
||
|
}
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
}
|
||
|
.to_bits();
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg.write64(vdst, ret)
|
||
|
}
|
||
|
}
|
||
|
306 | 309 | 310 | 313 | 596 | 584 | 585 | 588 => {
|
||
|
let (s0, s1, s2) = (self.val(src.0), self.val(src.1), self.val(src.2));
|
||
|
let s0 = f16::from_bits(s0).negate(0, neg).absolute(0, abs);
|
||
|
let s1 = f16::from_bits(s1).negate(1, neg).absolute(1, abs);
|
||
|
let s2 = f16::from_bits(s2).negate(1, neg).absolute(1, abs);
|
||
|
let ret = match op {
|
||
|
309 => s0 * s1,
|
||
|
310 => f16::mul_add(s0, s1, f16::from_bits(self.vec_reg[vdst] as u16)),
|
||
|
306 => s0 + s1,
|
||
|
584 => f16::mul_add(s0, s1, s2),
|
||
|
585 => f16::min(f16::min(s0, s1), s2),
|
||
|
588 => f16::max(f16::max(s0, s1), s2),
|
||
|
596 => s2 / s1,
|
||
|
313 => f16::max(s0, s1),
|
||
|
314 => f16::min(s0, s1),
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
}
|
||
|
.to_bits();
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg[vdst] = ret as u32;
|
||
|
}
|
||
|
}
|
||
|
394 => {
|
||
|
let s0 = f32::from_bits(self.val(src.0)).negate(0, neg).absolute(0, abs);
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg[vdst].mut_lo16(f16::from_f32(s0).to_bits());
|
||
|
}
|
||
|
}
|
||
|
467 => {
|
||
|
let s0 = f16::from_bits(self.val(src.0)).negate(0, neg).absolute(0, abs);
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg[vdst] = s0.to_f32() as i16 as u32;
|
||
|
}
|
||
|
}
|
||
|
395 => {
|
||
|
let s0 = f16::from_bits(self.val(src.0)).negate(0, neg).absolute(0, abs);
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg[vdst] = f32::from(s0).to_bits();
|
||
|
}
|
||
|
}
|
||
|
399 => {
|
||
|
let s0: f64 = self.val(src.0);
|
||
|
let s0 = s0.negate(0, neg).absolute(0, abs);
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg[vdst] = (s0 as f32).to_bits();
|
||
|
}
|
||
|
}
|
||
|
785 => {
|
||
|
let (s0, s1) = (self.val(src.0), self.val(src.1));
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg[vdst] = (f16::from_bits(s1).to_bits() as u32) << 16 | f16::from_bits(s0).to_bits() as u32;
|
||
|
}
|
||
|
}
|
||
|
_ => {
|
||
|
let (s0, s1, s2) = (self.val(src.0), self.val(src.1), self.val(src.2));
|
||
|
match op {
|
||
|
865 => {
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg.get_lane_mut(s1 as usize)[vdst] = s0;
|
||
|
}
|
||
|
return Ok(());
|
||
|
}
|
||
|
864 => {
|
||
|
let val = self.vec_reg.get_lane(s1 as usize)[src.0 - VGPR_COUNT];
|
||
|
self.write_to_sdst(vdst, val);
|
||
|
return Ok(());
|
||
|
}
|
||
|
826 => {
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg[vdst].mut_lo16(((s1 as i16) >> (s0 & 0xf)) as u16);
|
||
|
}
|
||
|
return Ok(());
|
||
|
}
|
||
|
587 | 577 | 590 | 771 | 772 | 773 | 777 | 779 | 824 | 825 => {
|
||
|
let (s0, s1, s2) = (s0 as u16, s1 as u16, s2 as u16);
|
||
|
let ret = match op {
|
||
|
587 => u16::min(u16::min(s0, s1), s2),
|
||
|
590 => u16::max(u16::max(s0, s1), s2),
|
||
|
577 => s0 * s1 + s2,
|
||
|
771 => s0 + s1,
|
||
|
772 => s0 - s1,
|
||
|
773 => s0 * s1,
|
||
|
777 => u16::max(s0, s1),
|
||
|
779 => u16::min(s0, s1),
|
||
|
824 => s1 << s0,
|
||
|
825 => s1 >> s0,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg[vdst].mut_lo16(ret);
|
||
|
}
|
||
|
return Ok(());
|
||
|
}
|
||
|
586 | 589 | 778 | 780 | 781 | 782 => {
|
||
|
let (s0, s1, s2) = (s0 as i16, s1 as i16, s2 as i16);
|
||
|
let ret = match op {
|
||
|
586 => i16::min(i16::min(s0, s1), s2),
|
||
|
589 => i16::max(i16::max(s0, s1), s2),
|
||
|
778 => i16::max(s0, s1),
|
||
|
780 => i16::min(s0, s1),
|
||
|
781 => s0 + s1,
|
||
|
782 => s0 - s1,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg[vdst].mut_lo16(ret as u16);
|
||
|
}
|
||
|
return Ok(());
|
||
|
}
|
||
|
_ => {}
|
||
|
}
|
||
|
|
||
|
let ret = match op {
|
||
|
257 | 259 | 299 | 260 | 261 | 264 | 272 | 392 | 426 | 430 | 531 | 537 | 540 | 551 | 567 | 796 => {
|
||
|
let s0 = f32::from_bits(s0).negate(0, neg).absolute(0, abs);
|
||
|
let s1 = f32::from_bits(s1).negate(1, neg).absolute(1, abs);
|
||
|
let s2 = f32::from_bits(s2).negate(2, neg).absolute(2, abs);
|
||
|
match op {
|
||
|
259 => s0 + s1,
|
||
|
260 => s0 - s1,
|
||
|
261 => s1 - s0,
|
||
|
264 => s0 * s1,
|
||
|
272 => f32::max(s0, s1),
|
||
|
299 => f32::mul_add(s0, s1, f32::from_bits(self.vec_reg[vdst])),
|
||
|
426 => s0.recip(),
|
||
|
430 => 1.0 / f32::sqrt(s0),
|
||
|
531 => f32::mul_add(s0, s1, s2),
|
||
|
537 => f32::min(f32::min(s0, s1), s2),
|
||
|
540 => f32::max(f32::max(s0, s1), s2),
|
||
|
551 => s2 / s1,
|
||
|
567 => {
|
||
|
let ret = f32::mul_add(s0, s1, s2);
|
||
|
match self.vcc.read() {
|
||
|
true => 2.0_f32.powi(32) * ret,
|
||
|
false => ret,
|
||
|
}
|
||
|
}
|
||
|
796 => s0 * 2f32.powi(s1.to_bits() as i32),
|
||
|
// cnd_mask isn't a float only ALU but supports neg
|
||
|
257 => {
|
||
|
let mut cond = WaveValue::new(s2.to_bits(), self.warp_size);
|
||
|
cond.default_lane = self.vcc.default_lane;
|
||
|
match cond.read() {
|
||
|
true => s1,
|
||
|
false => s0,
|
||
|
}
|
||
|
}
|
||
|
392 => f32::from_bits(s0 as i32 as u32),
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
}
|
||
|
.to_bits()
|
||
|
}
|
||
|
_ => {
|
||
|
if neg != 0 {
|
||
|
todo_instr!(instruction)?
|
||
|
}
|
||
|
match op {
|
||
|
529 => {
|
||
|
let s0 = s0 as i32;
|
||
|
let shift = 32 - (s2 & 0x1f);
|
||
|
let mask: i32 = 1 << (s2 & 0x1f);
|
||
|
let ret = (s0 >> (s1 & 0x1f)) & (mask.wrapping_sub(1));
|
||
|
((ret << shift) >> shift) as u32
|
||
|
}
|
||
|
522 | 541 | 538 | 544 | 814 => {
|
||
|
let (s0, s1, s2) = (s0 as i32, s1 as i32, s2 as i32);
|
||
|
|
||
|
(match op {
|
||
|
522 => {
|
||
|
let s0 = sign_ext((s0 & 0xffffff) as u64, 24) as i32;
|
||
|
let s1 = sign_ext((s1 & 0xffffff) as u64, 24) as i32;
|
||
|
s0 * s1 + s2
|
||
|
}
|
||
|
538 => i32::min(i32::min(s0, s1), s2),
|
||
|
541 => i32::max(i32::max(s0, s1), s2),
|
||
|
544 => {
|
||
|
if (i32::max(i32::max(s0, s1), s2)) == s0 {
|
||
|
i32::max(s1, s2)
|
||
|
} else if (i32::max(i32::max(s0, s1), s2)) == s1 {
|
||
|
i32::max(s0, s2)
|
||
|
} else {
|
||
|
i32::max(s0, s1)
|
||
|
}
|
||
|
}
|
||
|
814 => ((s0 as i64) * (s1 as i64) >> 32) as i32,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
}) as u32
|
||
|
}
|
||
|
275 => u32::min(s0, s1),
|
||
|
276 => u32::max(s0, s1),
|
||
|
280 => s1 << s0,
|
||
|
281 => s1 >> s0,
|
||
|
283 => s0 & s1,
|
||
|
284 => s0 | s1,
|
||
|
285 => s0 ^ s1,
|
||
|
286 => !(s0 ^ s1),
|
||
|
523 => s0 * s1 + s2, // TODO 24 bit trunc
|
||
|
528 => (s0 >> s1) & ((1 << s2) - 1),
|
||
|
530 => (s0 & s1) | (!s0 & s2),
|
||
|
534 => {
|
||
|
let val = ((s0 as u64) << 32) | (s1 as u64);
|
||
|
let shift = (s2 & 0x1F) as u64;
|
||
|
((val >> shift) & 0xffffffff) as u32
|
||
|
}
|
||
|
542 => u32::max(u32::max(s0, s1), s2),
|
||
|
576 => s0 ^ s1 ^ s2,
|
||
|
580 => {
|
||
|
fn byte_permute(data: u64, sel: u32) -> u8 {
|
||
|
let bytes = data.to_ne_bytes();
|
||
|
match sel {
|
||
|
13..=u32::MAX => 0xff,
|
||
|
12 => 0x00,
|
||
|
11 => ((bytes[7] & 0x80) != 0) as u8 * 0xff,
|
||
|
10 => ((bytes[5] & 0x80) != 0) as u8 * 0xff,
|
||
|
9 => ((bytes[3] & 0x80) != 0) as u8 * 0xff,
|
||
|
8 => ((bytes[1] & 0x80) != 0) as u8 * 0xff,
|
||
|
_ => bytes[sel as usize],
|
||
|
}
|
||
|
}
|
||
|
let combined = ((s0 as u64) << 32) | s1 as u64;
|
||
|
let d0 = ((byte_permute(combined, s2 >> 24) as u32) << 24)
|
||
|
| ((byte_permute(combined, (s2 >> 16) & 0xFF) as u32) << 16)
|
||
|
| ((byte_permute(combined, (s2 >> 8) & 0xFF) as u32) << 8)
|
||
|
| (byte_permute(combined, s2 & 0xFF) as u32);
|
||
|
d0
|
||
|
}
|
||
|
581 => (s0 ^ s1) + s2,
|
||
|
582 => (s0 << s1) + s2,
|
||
|
583 => (s0 + s1) << s2,
|
||
|
597 => s0 + s1 + s2,
|
||
|
598 => (s0 << s1) | s2,
|
||
|
599 => (s0 & s1) | s2,
|
||
|
600 => s0 | s1 | s2,
|
||
|
798 => {
|
||
|
let mut ret = s1;
|
||
|
(0..=31).into_iter().for_each(|i| ret += nth(s0, i));
|
||
|
ret
|
||
|
}
|
||
|
812 => s0 * s1,
|
||
|
813 => ((s0 as u64) * (s1 as u64) >> 32) as u32,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
}
|
||
|
}
|
||
|
};
|
||
|
if self.exec.read() {
|
||
|
self.vec_reg[vdst] = ret;
|
||
|
}
|
||
|
}
|
||
|
};
|
||
|
}
|
||
|
}
|
||
|
} else if let Instruction::DS { op, gds, addr, data0, offset0, data1, offset1, vdst } = decoded {
|
||
|
let _ = self.u64_instr();
|
||
|
if gds {
|
||
|
return todo_instr!(instruction)?;
|
||
|
}
|
||
|
if !self.exec.read() {
|
||
|
return Ok(());
|
||
|
}
|
||
|
|
||
|
let [data0, data1, vdst] = [data0 as usize, data1 as usize, vdst as usize];
|
||
|
let lds_base = self.vec_reg[addr as usize];
|
||
|
let single_addr = || (lds_base + u16::from_le_bytes([offset0, offset1]) as u32) as usize;
|
||
|
let double_addr = |adj: u32| {
|
||
|
let addr0 = lds_base + offset0 as u32 * adj;
|
||
|
let addr1 = lds_base + offset1 as u32 * adj;
|
||
|
(addr0 as usize, addr1 as usize)
|
||
|
};
|
||
|
|
||
|
match op {
|
||
|
// load
|
||
|
54 | 118 | 254 | 255 => {
|
||
|
let dwords = match op {
|
||
|
255 => 4,
|
||
|
254 => 3,
|
||
|
118 => 2,
|
||
|
_ => 1,
|
||
|
};
|
||
|
(0..dwords).for_each(|i| {
|
||
|
self.vec_reg[vdst + i] = self.lds.read(single_addr() + 4 * i);
|
||
|
});
|
||
|
}
|
||
|
58 => self.vec_reg[vdst] = self.lds.read(single_addr()) as u8 as u32,
|
||
|
60 => self.vec_reg[vdst] = self.lds.read(single_addr()) as u16 as u32,
|
||
|
55 => {
|
||
|
let (addr0, addr1) = double_addr(4);
|
||
|
self.vec_reg[vdst] = self.lds.read(addr0);
|
||
|
self.vec_reg[vdst + 1] = self.lds.read(addr1);
|
||
|
}
|
||
|
119 => {
|
||
|
let (addr0, addr1) = double_addr(8);
|
||
|
self.vec_reg.write64(vdst, self.lds.read64(addr0));
|
||
|
self.vec_reg.write64(vdst + 2, self.lds.read64(addr1));
|
||
|
}
|
||
|
// store
|
||
|
13 | 77 | 222 | 223 => {
|
||
|
let dwords = match op {
|
||
|
223 => 4,
|
||
|
222 => 3,
|
||
|
77 => 2,
|
||
|
_ => 1,
|
||
|
};
|
||
|
(0..dwords).for_each(|i| {
|
||
|
self.lds.write(single_addr() + 4 * i, self.vec_reg[data0 + i]);
|
||
|
})
|
||
|
}
|
||
|
30 => {
|
||
|
let addr = single_addr();
|
||
|
if addr + 1 >= self.lds.data.len() {
|
||
|
self.lds.data.resize(self.lds.data.len() + addr + 2, 0);
|
||
|
}
|
||
|
self.lds.data[addr..addr + 1].iter_mut().enumerate().for_each(|(i, x)| {
|
||
|
*x = (self.vec_reg[data0] as u8).to_le_bytes()[i];
|
||
|
});
|
||
|
}
|
||
|
31 | 161 => {
|
||
|
let addr = single_addr();
|
||
|
if addr + 2 >= self.lds.data.len() {
|
||
|
self.lds.data.resize(self.lds.data.len() + addr + 3, 0);
|
||
|
}
|
||
|
let b32 = self.vec_reg[data0];
|
||
|
self.lds.data[addr..addr + 2].iter_mut().enumerate().for_each(|(i, x)| {
|
||
|
*x = (if op == 31 { b32 as u16 } else { ((b32 >> 16) & 0xffff) as u16 }).to_le_bytes()[i];
|
||
|
});
|
||
|
}
|
||
|
14 => {
|
||
|
let (addr0, addr1) = double_addr(4);
|
||
|
self.lds.write(addr0, self.vec_reg[data0]);
|
||
|
self.lds.write(addr1, self.vec_reg[data1]);
|
||
|
}
|
||
|
78 => {
|
||
|
let (addr0, addr1) = double_addr(8);
|
||
|
self.lds.write64(addr0, self.vec_reg.read64(data0));
|
||
|
self.lds.write64(addr1, self.vec_reg.read64(data1));
|
||
|
}
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
}
|
||
|
}
|
||
|
// global
|
||
|
// flat
|
||
|
else if instruction >> 26 == 0b110111 {
|
||
|
let instr = self.u64_instr();
|
||
|
if !self.exec.read() {
|
||
|
return Ok(());
|
||
|
}
|
||
|
let offset = sign_ext(instr & 0x1fff, 13);
|
||
|
let seg = (instr >> 16) & 0x3;
|
||
|
let op = ((instr >> 18) & 0x7f) as usize;
|
||
|
let addr = ((instr >> 32) & 0xff) as usize;
|
||
|
let data = ((instr >> 40) & 0xff) as usize;
|
||
|
let saddr = ((instr >> 48) & 0x7f) as usize;
|
||
|
let vdst = ((instr >> 56) & 0xff) as usize;
|
||
|
|
||
|
let saddr_val: u32 = self.val(saddr);
|
||
|
let saddr_off = saddr_val == 0x7F || saddr == NULL_SRC;
|
||
|
|
||
|
match seg {
|
||
|
1 => {
|
||
|
let sve = ((instr >> 50) & 0x1) != 0;
|
||
|
|
||
|
let addr = match (sve, saddr_off) {
|
||
|
(true, true) => offset as u64 as usize,
|
||
|
(false, false) => saddr_val as usize,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
match op {
|
||
|
// load
|
||
|
20..=23 => (0..op - 19).for_each(|i| {
|
||
|
self.vec_reg[vdst + i] = self.sds.read(addr + 4 * i);
|
||
|
}),
|
||
|
// store
|
||
|
26..=29 => (0..op - 25).for_each(|i| {
|
||
|
self.sds.write(addr + 4 * i, self.vec_reg[data + i]);
|
||
|
}),
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
}
|
||
|
}
|
||
|
2 => {
|
||
|
let addr = match saddr_off {
|
||
|
true => self.vec_reg.read64(addr) as i64 + (offset as i64),
|
||
|
false => {
|
||
|
let scalar_addr = self.scalar_reg.read64(saddr);
|
||
|
let vgpr_offset = self.vec_reg[addr];
|
||
|
scalar_addr as i64 + vgpr_offset as i64 + offset
|
||
|
}
|
||
|
} as u64;
|
||
|
|
||
|
unsafe {
|
||
|
match op {
|
||
|
// load
|
||
|
16 => self.vec_reg[vdst] = *(addr as *const u8) as u32,
|
||
|
17 => self.vec_reg[vdst] = *(addr as *const i8) as u32,
|
||
|
18 => self.vec_reg[vdst] = *(addr as *const u16) as u32,
|
||
|
19 => self.vec_reg[vdst] = *(addr as *const i16) as u32,
|
||
|
|
||
|
20..=23 => (0..op - 19).for_each(|i| {
|
||
|
self.vec_reg[vdst + i] = *((addr + 4 * i as u64) as *const u32);
|
||
|
}),
|
||
|
32 => self.vec_reg[vdst].mut_lo16(*(addr as *const u16)),
|
||
|
35 => self.vec_reg[vdst].mut_hi16(*(addr as *const u16)),
|
||
|
// store
|
||
|
24 => *(addr as *mut u8) = self.vec_reg[data] as u8,
|
||
|
25 => *(addr as *mut u16) = self.vec_reg[data] as u16,
|
||
|
26..=29 => (0..op - 25).for_each(|i| {
|
||
|
*((addr + 4 * i as u64) as u64 as *mut u32) = self.vec_reg[data + i];
|
||
|
}),
|
||
|
37 => *(addr as *mut u16) = ((self.vec_reg[data] >> 16) & 0xffff) as u16,
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
}
|
||
|
}
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
}
|
||
|
// mubuf
|
||
|
else if instruction >> 26 == 0b111000 {
|
||
|
let instr = self.u64_instr();
|
||
|
let op = ((instr >> 18) & 0x7f) as usize;
|
||
|
match op {
|
||
|
43 => {} // NOTE: remu doesn't have an l0 cache, it just has the software managed lds
|
||
|
_ => todo_instr!(instruction)?,
|
||
|
};
|
||
|
} else {
|
||
|
todo_instr!(instruction)?;
|
||
|
}
|
||
|
Ok(())
|
||
|
}
|
||
|
|
||
|
fn cmpf<T>(&self, s0: T, s1: T, offset: u32) -> bool
|
||
|
where
|
||
|
T: Float + std::fmt::Display,
|
||
|
{
|
||
|
return match offset {
|
||
|
0 => true,
|
||
|
1 => s0 < s1,
|
||
|
2 => s0 == s1,
|
||
|
3 => s0 <= s1,
|
||
|
4 => s0 > s1,
|
||
|
5 => s0 != s1,
|
||
|
6 => s0 >= s1,
|
||
|
7 => (!s0.is_nan()) && (!s1.is_nan()),
|
||
|
8 => s0.is_nan() || s1.is_nan(),
|
||
|
9 => !(s0 >= s1),
|
||
|
10 => !(s0 != s1),
|
||
|
11 => !(s0 > s1),
|
||
|
12 => !(s0 <= s1),
|
||
|
13 => !(s0 == s1),
|
||
|
14 => !(s0 < s1),
|
||
|
15 => true,
|
||
|
_ => panic!("invalid offset for float compare {offset}"),
|
||
|
};
|
||
|
}
|
||
|
fn cmp_class_f64(&self, s0: f64, s1: u32) -> bool {
|
||
|
let offset = match s0 {
|
||
|
_ if s0.is_nan() => 1,
|
||
|
_ if s0.is_infinite() => match s0.signum() == -1.0 {
|
||
|
true => 2,
|
||
|
false => 9,
|
||
|
},
|
||
|
_ if s0.exponent() > 0 => match s0.signum() == -1.0 {
|
||
|
true => 3,
|
||
|
false => 8,
|
||
|
},
|
||
|
_ if s0.abs() > 0.0 => match s0.signum() == -1.0 {
|
||
|
true => 4,
|
||
|
false => 7,
|
||
|
},
|
||
|
_ => match s0.signum() == -1.0 {
|
||
|
true => 5,
|
||
|
false => 6,
|
||
|
},
|
||
|
};
|
||
|
((s1 >> offset) & 1) != 0
|
||
|
}
|
||
|
fn cmp_class_f32(&self, s0: f32, s1: u32) -> bool {
|
||
|
let offset = match s0 {
|
||
|
_ if (s0 as f64).is_nan() => 1,
|
||
|
_ if s0.exponent() == 255 => match s0.signum() == -1.0 {
|
||
|
true => 2,
|
||
|
false => 9,
|
||
|
},
|
||
|
_ if s0.exponent() > 0 => match s0.signum() == -1.0 {
|
||
|
true => 3,
|
||
|
false => 8,
|
||
|
},
|
||
|
_ if s0.abs() as f64 > 0.0 => match s0.signum() == -1.0 {
|
||
|
true => 4,
|
||
|
false => 7,
|
||
|
},
|
||
|
_ => match s0.signum() == -1.0 {
|
||
|
true => 5,
|
||
|
false => 6,
|
||
|
},
|
||
|
};
|
||
|
((s1 >> offset) & 1) != 0
|
||
|
}
|
||
|
fn cmp_class_f16(&self, s0: f16, s1: u16) -> bool {
|
||
|
let offset = match s0 {
|
||
|
_ if (f64::from(s0)).is_nan() => 1,
|
||
|
_ if s0.exponent() == 31 => match s0.signum() == f16::NEG_ONE {
|
||
|
true => 2,
|
||
|
false => 9,
|
||
|
},
|
||
|
_ if s0.exponent() > 0 => match s0.signum() == f16::NEG_ONE {
|
||
|
true => 3,
|
||
|
false => 8,
|
||
|
},
|
||
|
_ if f64::from(s0.abs()) > 0.0 => match s0.signum() == f16::NEG_ONE {
|
||
|
true => 4,
|
||
|
false => 7,
|
||
|
},
|
||
|
_ => match s0.signum() == f16::NEG_ONE {
|
||
|
true => 5,
|
||
|
false => 6,
|
||
|
},
|
||
|
};
|
||
|
((s1 >> offset) & 1) != 0
|
||
|
}
|
||
|
fn cmpi<T>(&self, s0: T, s1: T, offset: u32) -> bool
|
||
|
where
|
||
|
T: PartialOrd + PartialEq,
|
||
|
{
|
||
|
return match offset {
|
||
|
0 => false,
|
||
|
1 => s0 < s1,
|
||
|
2 => s0 == s1,
|
||
|
3 => s0 <= s1,
|
||
|
4 => s0 > s1,
|
||
|
5 => s0 != s1,
|
||
|
6 => s0 >= s1,
|
||
|
7 => true,
|
||
|
_ => panic!("invalid offset for integer compare {offset}"),
|
||
|
};
|
||
|
}
|
||
|
fn cls_i32(&self, s0: u32) -> u32 {
|
||
|
let mut ret: i32 = -1;
|
||
|
let s0 = s0 as i32;
|
||
|
for i in (1..=31).into_iter() {
|
||
|
if s0 >> (31 - i as u32) != s0 >> 31 {
|
||
|
ret = i;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
ret as u32
|
||
|
}
|
||
|
fn clz_i32_u32(&self, s0: u32) -> u32 {
|
||
|
let mut ret: i32 = -1;
|
||
|
for i in (0..=31).into_iter() {
|
||
|
if s0 >> (31 - i as u32) == 1 {
|
||
|
ret = i;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
ret as u32
|
||
|
}
|
||
|
|
||
|
/* ALU utils */
|
||
|
fn _common_srcs(&mut self, code: usize) -> u32 {
|
||
|
match code {
|
||
|
VCC => self.vcc.value,
|
||
|
107 => self.scalar_reg[code as usize],
|
||
|
EXEC => self.exec.value,
|
||
|
NULL_SRC | 128 => 0,
|
||
|
253 => *self.scc as u32,
|
||
|
255 => match self.simm {
|
||
|
None => {
|
||
|
let val = self.stream[self.pc_offset + 1];
|
||
|
self.simm = Some(val);
|
||
|
self.pc_offset += 1;
|
||
|
val
|
||
|
}
|
||
|
Some(val) => val,
|
||
|
},
|
||
|
_ => todo!("resolve_src={code}"),
|
||
|
}
|
||
|
}
|
||
|
fn write_to_sdst(&mut self, sdst_bf: usize, val: u32) {
|
||
|
match sdst_bf {
|
||
|
// NOTE: remu is only wave32, vcc_hi is treated as a regular SGPR
|
||
|
0..=SGPR_SRC | 107 => self.scalar_reg[sdst_bf] = val,
|
||
|
VCC => self.vcc.value = val,
|
||
|
126 => self.exec.value = val,
|
||
|
_ => todo!("write to sdst {}", sdst_bf),
|
||
|
}
|
||
|
}
|
||
|
fn set_sgpr_co(&mut self, idx: usize, val: bool) {
|
||
|
let mut wv = self.sgpr_co.map(|(_, wv)| wv).unwrap_or_else(|| WaveValue::new(0, self.warp_size));
|
||
|
wv.default_lane = self.vcc.default_lane;
|
||
|
wv.set_lane(val);
|
||
|
*self.sgpr_co = Some((idx, wv));
|
||
|
}
|
||
|
|
||
|
fn u64_instr(&mut self) -> u64 {
|
||
|
let msb = self.stream[self.pc_offset + 1] as u64;
|
||
|
let instr = msb << 32 | self.stream[self.pc_offset] as u64;
|
||
|
self.pc_offset += 1;
|
||
|
return instr;
|
||
|
}
|
||
|
|
||
|
fn wmma_b16_16x16(&'a self, vsrc: usize) -> impl Iterator<Item = u16> + 'a {
|
||
|
(0..16).flat_map(move |i| {
|
||
|
let lane = self.vec_reg.get_lane(i);
|
||
|
(vsrc..=vsrc + 7).flat_map(move |j| {
|
||
|
let val = lane[j - VGPR_COUNT];
|
||
|
[(val & 0xffff) as u16, (val >> 16) as u16]
|
||
|
})
|
||
|
})
|
||
|
}
|
||
|
|
||
|
fn wmma_b32_16x16(&'a self, vsrc: usize) -> impl Iterator<Item = u32> + 'a {
|
||
|
(0..256).map(move |i| self.vec_reg.get_lane(i % 32)[(i / 32) + vsrc - VGPR_COUNT])
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fn wmma<T: Float>(a: Vec<T>, b: Vec<T>, c: Vec<T>) -> [T; 256] {
|
||
|
let mut ret = [T::zero(); 256];
|
||
|
for row in 0..16 {
|
||
|
for col in 0..16 {
|
||
|
let mut sum = T::zero();
|
||
|
for k in 0..16 {
|
||
|
let a_val = a[row * 16 + k];
|
||
|
let b_val = b[col * 16 + k];
|
||
|
sum = sum + (a_val * b_val);
|
||
|
}
|
||
|
let c_val = c[row * 16 + col];
|
||
|
ret[row * 16 + col] = sum + c_val;
|
||
|
}
|
||
|
}
|
||
|
ret
|
||
|
}
|
||
|
|
||
|
pub trait ALUSrc<T> {
|
||
|
fn val(&mut self, code: usize) -> T;
|
||
|
}
|
||
|
impl ALUSrc<u16> for Thread<'_> {
|
||
|
fn val(&mut self, code: usize) -> u16 {
|
||
|
match code {
|
||
|
0..=SGPR_SRC => self.scalar_reg[code] as u16,
|
||
|
VGPR_COUNT..=511 => self.vec_reg[code - VGPR_COUNT] as u16,
|
||
|
129..=192 => (code - 128) as u16,
|
||
|
193..=208 => ((code - 192) as i16 * -1) as u16,
|
||
|
240..=247 => f16::from_f32(
|
||
|
[
|
||
|
(240, 0.5_f32),
|
||
|
(241, -0.5_f32),
|
||
|
(242, 1_f32),
|
||
|
(243, -1.0_f32),
|
||
|
(244, 2.0_f32),
|
||
|
(245, -2.0_f32),
|
||
|
(246, 4.0_f32),
|
||
|
(247, -4.0_f32),
|
||
|
]
|
||
|
.iter()
|
||
|
.find(|x| x.0 == code)
|
||
|
.unwrap()
|
||
|
.1,
|
||
|
)
|
||
|
.to_bits(),
|
||
|
_ => self._common_srcs(code) as u16,
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
impl ALUSrc<u32> for Thread<'_> {
|
||
|
fn val(&mut self, code: usize) -> u32 {
|
||
|
match code {
|
||
|
0..=SGPR_SRC => self.scalar_reg[code],
|
||
|
VGPR_COUNT..=511 => self.vec_reg[code - VGPR_COUNT],
|
||
|
129..=192 => (code - 128) as u32,
|
||
|
193..=208 => ((code - 192) as i32 * -1) as u32,
|
||
|
240..=247 => [
|
||
|
(240, 0.5_f32),
|
||
|
(241, -0.5_f32),
|
||
|
(242, 1_f32),
|
||
|
(243, -1.0_f32),
|
||
|
(244, 2.0_f32),
|
||
|
(245, -2.0_f32),
|
||
|
(246, 4.0_f32),
|
||
|
(247, -4.0_f32),
|
||
|
]
|
||
|
.iter()
|
||
|
.find(|x| x.0 == code)
|
||
|
.unwrap()
|
||
|
.1
|
||
|
.to_bits(),
|
||
|
_ => self._common_srcs(code),
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
impl ALUSrc<u64> for Thread<'_> {
|
||
|
fn val(&mut self, code: usize) -> u64 {
|
||
|
match code {
|
||
|
0..=SGPR_SRC => self.scalar_reg.read64(code),
|
||
|
VGPR_COUNT..=511 => self.vec_reg.read64(code - VGPR_COUNT),
|
||
|
129..=192 => (code - 128) as u64,
|
||
|
193..=208 => ((code - 192) as i64 * -1) as u64,
|
||
|
240..=247 => [
|
||
|
(240, 0.5_f64),
|
||
|
(241, -0.5_f64),
|
||
|
(242, 1_f64),
|
||
|
(243, -1.0_f64),
|
||
|
(244, 2.0_f64),
|
||
|
(245, -2.0_f64),
|
||
|
(246, 4.0_f64),
|
||
|
(247, -4.0_f64),
|
||
|
]
|
||
|
.iter()
|
||
|
.find(|x| x.0 == code)
|
||
|
.unwrap()
|
||
|
.1
|
||
|
.to_bits(),
|
||
|
_ => self._common_srcs(code) as u64,
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
impl ALUSrc<f64> for Thread<'_> {
|
||
|
fn val(&mut self, code: usize) -> f64 {
|
||
|
let uret: u64 = self.val(code);
|
||
|
match code {
|
||
|
SIMM_SRC => f64::from_bits(uret << 32),
|
||
|
_ => f64::from_bits(uret),
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#[cfg(test)]
|
||
|
mod test_alu_utils {
|
||
|
use super::*;
|
||
|
|
||
|
#[test]
|
||
|
fn test_write_to_sdst_sgpr() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.write_to_sdst(10, 200);
|
||
|
assert_eq!(thread.scalar_reg[10], 200);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_write_to_sdst_vcc_val() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
let val = 0b1011101011011011111011101111;
|
||
|
thread.write_to_sdst(VCC, val);
|
||
|
assert_eq!(thread.vcc.value, 195935983);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_clz_i32_u32() {
|
||
|
let thread = _helper_test_thread();
|
||
|
assert_eq!(thread.clz_i32_u32(0x00000000), 0xffffffff);
|
||
|
assert_eq!(thread.clz_i32_u32(0x0000cccc), 16);
|
||
|
assert_eq!(thread.clz_i32_u32(0xffff3333), 0);
|
||
|
assert_eq!(thread.clz_i32_u32(0x7fffffff), 1);
|
||
|
assert_eq!(thread.clz_i32_u32(0x80000000), 0);
|
||
|
assert_eq!(thread.clz_i32_u32(0xffffffff), 0);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_cls_i32() {
|
||
|
let thread = _helper_test_thread();
|
||
|
assert_eq!(thread.cls_i32(0x00000000), 0xffffffff);
|
||
|
assert_eq!(thread.cls_i32(0x0000cccc), 16);
|
||
|
assert_eq!(thread.cls_i32(0xffff3333), 16);
|
||
|
assert_eq!(thread.cls_i32(0x7fffffff), 1);
|
||
|
assert_eq!(thread.cls_i32(0x80000000), 1);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_sgpr_co_init() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vcc.default_lane = Some(0);
|
||
|
thread.set_sgpr_co(10, true);
|
||
|
thread.vcc.default_lane = Some(1);
|
||
|
assert_eq!(thread.sgpr_co.unwrap().0, 10);
|
||
|
assert_eq!(thread.sgpr_co.unwrap().1.mutations.unwrap()[0], true);
|
||
|
thread.set_sgpr_co(10, true);
|
||
|
assert_eq!(thread.sgpr_co.unwrap().0, 10);
|
||
|
assert_eq!(thread.sgpr_co.unwrap().1.mutations.unwrap()[1], true);
|
||
|
assert_eq!(thread.sgpr_co.unwrap().1.mutations.unwrap()[0], true);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#[cfg(test)]
|
||
|
mod test_smem {
|
||
|
use super::*;
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_load_b32_simple() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
let mut buf = vec![0u8; 4];
|
||
|
let a: u32 = 0xDEADBEEF;
|
||
|
unsafe {
|
||
|
*(buf.as_mut_ptr() as *mut u32) = a;
|
||
|
}
|
||
|
let base_addr = buf.as_ptr() as u64;
|
||
|
thread.scalar_reg.write64(0, base_addr);
|
||
|
r(&vec![0xF4000040, 0xF8000000, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[1], a);
|
||
|
std::mem::forget(buf);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_load_b32_vcc() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
let mut buf = vec![0u8; 4];
|
||
|
let a: u32 = 0xDEADBEEF;
|
||
|
unsafe {
|
||
|
*(buf.as_mut_ptr() as *mut u32) = a;
|
||
|
}
|
||
|
let base_addr = buf.as_ptr() as u64;
|
||
|
thread.scalar_reg.write64(0, base_addr);
|
||
|
r(&vec![0xF4001A80, 0xF8000000, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vcc.value, a);
|
||
|
std::mem::forget(buf);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_load_b32_vcc_addr() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
let mut buf = vec![0u8; 4];
|
||
|
let a: u32 = 0xDEADBEEF;
|
||
|
unsafe {
|
||
|
*(buf.as_mut_ptr() as *mut u32) = a;
|
||
|
}
|
||
|
let addr = buf.as_ptr() as u64;
|
||
|
// NOTE: vcc is an alias for s[106:107]
|
||
|
thread.scalar_reg.write64(VCC, addr);
|
||
|
// TODO: vcc_lo should just read from s106
|
||
|
thread.vcc.value = (addr & 0xffffffff) as u32;
|
||
|
r(&vec![0xF4000035, 0xF8000000, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[0], a);
|
||
|
std::mem::forget(buf);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#[cfg(test)]
|
||
|
mod test_sop1 {
|
||
|
use super::*;
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_brev_b32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[5] = 8;
|
||
|
r(&vec![0xBE850405, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[5], 268435456);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_mov_b64() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg.write64(16, 5236523008);
|
||
|
r(&vec![0xBE880110, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg.read64(8), 5236523008);
|
||
|
assert_eq!(thread.scalar, true);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_mov_exec() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.exec.value = 0b11111111110111111110111111111111;
|
||
|
r(&vec![0xBE80007E, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[0], 0b11111111110111111110111111111111);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_mov_b32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[15] = 42;
|
||
|
r(&vec![0xbe82000f, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[2], 42);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_bitset0_b32() {
|
||
|
[
|
||
|
[
|
||
|
0b11111111111111111111111111111111,
|
||
|
0b00000000000000000000000000000001,
|
||
|
0b11111111111111111111111111111101,
|
||
|
],
|
||
|
[
|
||
|
0b11111111111111111111111111111111,
|
||
|
0b00000000000000000000000000000010,
|
||
|
0b11111111111111111111111111111011,
|
||
|
],
|
||
|
]
|
||
|
.iter()
|
||
|
.for_each(|[a, b, ret]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[20] = *a;
|
||
|
thread.scalar_reg[10] = *b;
|
||
|
r(&vec![0xBE94100A, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[20], *ret);
|
||
|
});
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_bitset1_b32() {
|
||
|
[
|
||
|
[
|
||
|
0b00000000000000000000000000000000,
|
||
|
0b00000000000000000000000000000001,
|
||
|
0b00000000000000000000000000000010,
|
||
|
],
|
||
|
[
|
||
|
0b00000000000000000000000000000000,
|
||
|
0b00000000000000000000000000000010,
|
||
|
0b00000000000000000000000000000100,
|
||
|
],
|
||
|
]
|
||
|
.iter()
|
||
|
.for_each(|[a, b, ret]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[20] = *a;
|
||
|
thread.scalar_reg[10] = *b;
|
||
|
r(&vec![0xbe94120a, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[20], *ret);
|
||
|
});
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_not_b32() {
|
||
|
[[0, 4294967295, 1], [1, 4294967294, 1], [u32::MAX, 0, 0]]
|
||
|
.iter()
|
||
|
.for_each(|[a, ret, scc]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[10] = *a;
|
||
|
r(&vec![0xBE8A1E0A, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[10], *ret);
|
||
|
assert_eq!(*thread.scc, *scc);
|
||
|
});
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#[cfg(test)]
|
||
|
mod test_sopk {
|
||
|
use super::*;
|
||
|
|
||
|
#[test]
|
||
|
fn test_cmp_zero_extend() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[20] = 0xcd14;
|
||
|
r(&vec![0xB494CD14, END_PRG], &mut thread);
|
||
|
assert_eq!(*thread.scc, 1);
|
||
|
|
||
|
r(&vec![0xB194CD14, END_PRG], &mut thread);
|
||
|
assert_eq!(*thread.scc, 0);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_cmp_sign_extend() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[6] = 0x2db4;
|
||
|
r(&vec![0xB1862DB4, END_PRG], &mut thread);
|
||
|
assert_eq!(*thread.scc, 1);
|
||
|
|
||
|
r(&vec![0xB1862DB4, END_PRG], &mut thread);
|
||
|
assert_eq!(*thread.scc, 1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#[cfg(test)]
|
||
|
mod test_sop2 {
|
||
|
use super::*;
|
||
|
|
||
|
#[test]
|
||
|
fn test_xor_exec() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.exec.value = 0b10010010010010010010010010010010;
|
||
|
thread.scalar_reg[2] = 0b11111111111111111111111111111111;
|
||
|
r(&vec![0x8D02027E, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[2], 1840700269);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_add_u32() {
|
||
|
[[10, 20, 30, 0], [u32::MAX, 10, 9, 1], [u32::MAX, 0, u32::MAX, 0]]
|
||
|
.iter()
|
||
|
.for_each(|[a, b, expected, scc]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[2] = *a;
|
||
|
thread.scalar_reg[6] = *b;
|
||
|
r(&vec![0x80060206, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[6], *expected);
|
||
|
assert_eq!(*thread.scc, *scc);
|
||
|
});
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_addc_u32() {
|
||
|
[[10, 20, 31, 1, 0], [10, 20, 30, 0, 0], [u32::MAX, 10, 10, 1, 1]]
|
||
|
.iter()
|
||
|
.for_each(|[a, b, expected, scc_before, scc_after]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
*thread.scc = *scc_before;
|
||
|
thread.scalar_reg[7] = *a;
|
||
|
thread.scalar_reg[3] = *b;
|
||
|
r(&vec![0x82070307, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[7], *expected);
|
||
|
assert_eq!(*thread.scc, *scc_after);
|
||
|
});
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_add_i32() {
|
||
|
[[-10, 20, 10, 0], [i32::MAX, 1, -2147483648, 1]]
|
||
|
.iter()
|
||
|
.for_each(|[a, b, expected, scc]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[14] = *a as u32;
|
||
|
thread.scalar_reg[10] = *b as u32;
|
||
|
r(&vec![0x81060E0A, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[6], *expected as u32);
|
||
|
assert_eq!(*thread.scc, *scc as u32);
|
||
|
});
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_sub_i32() {
|
||
|
[[-10, 20, -30, 0], [i32::MAX, -1, -2147483648, 1]]
|
||
|
.iter()
|
||
|
.for_each(|[a, b, expected, scc]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[13] = *a as u32;
|
||
|
thread.scalar_reg[8] = *b as u32;
|
||
|
r(&vec![0x818C080D, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[12], *expected as u32);
|
||
|
assert_eq!(*thread.scc, *scc as u32);
|
||
|
});
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_lshl_b32() {
|
||
|
[[20, 40, 1], [0, 0, 0]].iter().for_each(|[a, expected, scc]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[15] = *a as u32;
|
||
|
r(&vec![0x8408810F, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[8], *expected as u32);
|
||
|
assert_eq!(*thread.scc, *scc as u32);
|
||
|
});
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_lshl_b64() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg.write64(2, u64::MAX - 30);
|
||
|
r(&vec![0x84828202, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[2], 4294967172);
|
||
|
assert_eq!(thread.scalar_reg[3], 4294967295);
|
||
|
assert_eq!(*thread.scc, 1);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_ashr_i32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[2] = 36855;
|
||
|
r(&vec![0x86039F02, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[3], 0);
|
||
|
assert_eq!(*thread.scc, 0);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_source_vcc() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[10] = 0x55;
|
||
|
thread.vcc.value = 29;
|
||
|
r(&vec![0x8B140A6A, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[20], 21);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_min_i32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[2] = -42i32 as u32;
|
||
|
thread.scalar_reg[3] = -92i32 as u32;
|
||
|
r(&vec![0x89020203, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[2], -92i32 as u32);
|
||
|
assert_eq!(*thread.scc, 1);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_mul_hi_u32() {
|
||
|
[[u32::MAX, 10, 9], [u32::MAX / 2, 4, 1]].iter().for_each(|[a, b, expected]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[0] = *a;
|
||
|
thread.scalar_reg[8] = *b;
|
||
|
r(&vec![0x96810800, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[1], *expected);
|
||
|
});
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_mul_hi_i32() {
|
||
|
[[(u64::MAX) as i32, (u64::MAX / 2) as i32, 0], [2, -2, -1]]
|
||
|
.iter()
|
||
|
.for_each(|[a, b, expected]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[0] = *a as u32;
|
||
|
thread.scalar_reg[8] = *b as u32;
|
||
|
r(&vec![0x97010800, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[1], *expected as u32);
|
||
|
});
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_mul_i32() {
|
||
|
[[40, 2, 80], [-10, -10, 100]].iter().for_each(|[a, b, expected]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[0] = *a as u32;
|
||
|
thread.scalar_reg[6] = *b as u32;
|
||
|
r(&vec![0x96000600, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[0], *expected as u32);
|
||
|
});
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_bfe_u64() {
|
||
|
[[2, 4, 2, 0], [800, 400, 32, 0], [-10i32 as u32, 3, 246, 0], [u32::MAX, u32::MAX, 255, 0]]
|
||
|
.iter()
|
||
|
.for_each(|[a_lo, a_hi, ret_lo, ret_hi]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[6] = *a_lo;
|
||
|
thread.scalar_reg[7] = *a_hi;
|
||
|
r(&vec![0x940cff06, 524288, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[12], *ret_lo);
|
||
|
assert_eq!(thread.scalar_reg[13], *ret_hi);
|
||
|
});
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_bfe_i64() {
|
||
|
[[131073, 0, 1, 0, 0x100000], [-2, 0, -2, -1, 524288], [2, 0, 2, 0, 524288]]
|
||
|
.iter()
|
||
|
.for_each(|[a_lo, a_hi, ret_lo, ret_hi, shift]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[6] = *a_lo as u32;
|
||
|
thread.scalar_reg[7] = *a_hi as u32;
|
||
|
r(&vec![0x948cff06, *shift as u32, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[12], *ret_lo as u32);
|
||
|
assert_eq!(thread.scalar_reg[13], *ret_hi as u32);
|
||
|
});
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_bfe_u32() {
|
||
|
[
|
||
|
[67305985, 2],
|
||
|
[0b100000000110111111100000001, 0b1111111],
|
||
|
[0b100000000100000000000000001, 0b0],
|
||
|
[0b100000000111000000000000001, 0b10000000],
|
||
|
[0b100000000111111111100000001, 0b11111111],
|
||
|
]
|
||
|
.iter()
|
||
|
.for_each(|[a, ret]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[0] = *a;
|
||
|
r(&vec![0x9303FF00, 0x00080008, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[3], *ret);
|
||
|
});
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_pack_xx_b32_b16() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
// ll
|
||
|
thread.scalar_reg[0] = 0x12345678;
|
||
|
r(&vec![0x9903ff00, 0x9ABCDEF0, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[3], 0xdef05678);
|
||
|
// lh
|
||
|
r(&vec![0x9983ff00, 0x9ABCDEF0, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[3], 0x9abc5678);
|
||
|
// hh
|
||
|
r(&vec![0x9a03ff00, 0x9ABCDEF0, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[3], 2596016692);
|
||
|
// hl
|
||
|
r(&vec![0x9a83ff00, 0x9ABCDEF0, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[3], 3740275252);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#[cfg(test)]
|
||
|
mod test_sopc {
|
||
|
use super::*;
|
||
|
|
||
|
#[test]
|
||
|
fn test_s_bitcmp0_b32() {
|
||
|
[[0b00, 0b1, 0], [0b01, 0b1, 1], [0b10, 0b1, 1], [0b10000000, 0b1, 0]]
|
||
|
.iter()
|
||
|
.for_each(|[s0, s1, scc]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[3] = *s0;
|
||
|
thread.scalar_reg[4] = *s1;
|
||
|
r(&vec![0xBF0C0304, END_PRG], &mut thread);
|
||
|
assert_eq!(*thread.scc, *scc);
|
||
|
})
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#[cfg(test)]
|
||
|
mod test_vopd {
|
||
|
use super::*;
|
||
|
|
||
|
#[test]
|
||
|
fn test_inline_const_vopx_only() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[0] = f32::to_bits(0.5);
|
||
|
let constant = f32::from_bits(0x39a8b099);
|
||
|
thread.vec_reg[1] = 10;
|
||
|
r(&vec![0xC8D000FF, 0x00000080, 0x39A8B099, END_PRG], &mut thread);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[0]), 0.5 * constant);
|
||
|
assert_eq!(thread.vec_reg[1], 0);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_inline_const_vopy_only() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[0] = 10;
|
||
|
thread.vec_reg[1] = 10;
|
||
|
r(&vec![0xCA100080, 0x000000FF, 0x3E15F480, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[0], 0);
|
||
|
assert_eq!(thread.vec_reg[1], 0x3e15f480);
|
||
|
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[18] = f32::to_bits(2.0);
|
||
|
thread.vec_reg[32] = f32::to_bits(4.0);
|
||
|
thread.vec_reg[7] = 10;
|
||
|
r(&vec![0xC9204112, 0x00060EFF, 0x0000006E, END_PRG], &mut thread);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[0]), 2.0f32 + 4.0f32);
|
||
|
assert_eq!(thread.vec_reg[7], 120);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_inline_const_shared() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[2] = f32::to_bits(2.0);
|
||
|
thread.vec_reg[3] = f32::to_bits(4.0);
|
||
|
let constant = f32::from_bits(0x3e800000);
|
||
|
r(&vec![0xC8C604FF, 0x020206FF, 0x3E800000, END_PRG], &mut thread);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[2]), 2.0 * constant);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[3]), 4.0 * constant);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_simm_op_shared_1() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[23] = f32::to_bits(4.0);
|
||
|
thread.vec_reg[12] = f32::to_bits(2.0);
|
||
|
|
||
|
thread.vec_reg[13] = f32::to_bits(10.0);
|
||
|
thread.vec_reg[24] = f32::to_bits(3.0);
|
||
|
|
||
|
let simm = f32::from_bits(0x3e000000);
|
||
|
r(&vec![0xC8841917, 0x0C0C1B18, 0x3E000000, END_PRG], &mut thread);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[12]), 4.0 * simm + 2.0);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[13]), 3.0 * simm + 10.0);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_simm_op_shared_2() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[29] = f32::to_bits(4.0);
|
||
|
thread.vec_reg[10] = f32::to_bits(2.0);
|
||
|
|
||
|
thread.vec_reg[11] = f32::to_bits(10.0);
|
||
|
thread.vec_reg[26] = f32::to_bits(6.5);
|
||
|
|
||
|
let simm = 0.125;
|
||
|
r(&vec![0xC880151D, 0x0A0A34FF, 0x3E000000, END_PRG], &mut thread);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[10]), 4.0 * simm + 2.0);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[11]), simm * 6.5 + 10.0);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_add_mov() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[0] = f32::to_bits(10.5);
|
||
|
r(&vec![0xC9100300, 0x00000080, END_PRG], &mut thread);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[0]), 10.5);
|
||
|
assert_eq!(thread.vec_reg[1], 0);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_max_add() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[0] = f32::to_bits(5.0);
|
||
|
thread.vec_reg[3] = f32::to_bits(2.0);
|
||
|
thread.vec_reg[1] = f32::to_bits(2.0);
|
||
|
r(&vec![0xCA880280, 0x01000700, END_PRG], &mut thread);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[0]), 7.0);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[1]), 2.0);
|
||
|
}
|
||
|
}
|
||
|
#[cfg(test)]
|
||
|
mod test_vop1 {
|
||
|
use super::*;
|
||
|
use float_cmp::approx_eq;
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_cvt_f32_f64() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg.write64(0, 2.0f64.to_bits());
|
||
|
r(&vec![0xD58F0101, 0x00000100, END_PRG], &mut thread);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[1]), 2.0);
|
||
|
thread.vec_reg.write64(0, (-2.0f64).to_bits());
|
||
|
r(&vec![0xD58F0101, 0x00000100, END_PRG], &mut thread);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[1]), 2.0);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_mov_b32_srrc_const0() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
r(&vec![0x7e000280, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[0], 0);
|
||
|
r(&vec![0x7e020280, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[1], 0);
|
||
|
r(&vec![0x7e040280, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[2], 0);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_mov_b32_srrc_register() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[6] = 31;
|
||
|
r(&vec![0x7e020206, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[1], 31);
|
||
|
}
|
||
|
|
||
|
fn helper_test_fexp(val: f32) -> f32 {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[6] = val.to_bits();
|
||
|
r(&vec![0x7E0C4B06, END_PRG], &mut thread);
|
||
|
f32::from_bits(thread.vec_reg[6])
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_fexp_1ulp() {
|
||
|
let test_values = [-2.0, -1.0, 0.0, 1.0, 2.0, 3.0];
|
||
|
for &val in test_values.iter() {
|
||
|
let expected = (2.0_f32).powf(val);
|
||
|
assert!((helper_test_fexp(val) - expected).abs() <= f32::EPSILON);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_fexp_flush_denormals() {
|
||
|
assert_eq!(helper_test_fexp(f32::from_bits(0xff800000)), 0.0);
|
||
|
assert_eq!(helper_test_fexp(f32::from_bits(0x80000000)), 1.0);
|
||
|
assert_eq!(helper_test_fexp(f32::from_bits(0x7f800000)), f32::from_bits(0x7f800000));
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_cast_f32_i32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
[(10.42, 10i32), (-20.08, -20i32)].iter().for_each(|(src, expected)| {
|
||
|
thread.scalar_reg[2] = f32::to_bits(*src);
|
||
|
r(&vec![0x7E001002, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[0] as i32, *expected);
|
||
|
})
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_cast_f32_u32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[4] = 2;
|
||
|
r(&vec![0x7E000C04, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[0], 1073741824);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_cast_u32_f32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[0] = 1325400062;
|
||
|
r(&vec![0x7E000F00, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[0], 2147483392);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_cast_i32_f32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
[(10.0, 10i32), (-20.0, -20i32)].iter().for_each(|(expected, src)| {
|
||
|
thread.vec_reg[0] = *src as u32;
|
||
|
r(&vec![0x7E000B00, END_PRG], &mut thread);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[0]), *expected);
|
||
|
})
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_readfirstlane_b32_basic() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[0] = 2147483392;
|
||
|
r(&vec![0x7E060500, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[3], 2147483392);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_readfirstlane_b32_fancy() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg.get_lane_mut(0)[13] = 44;
|
||
|
thread.vec_reg.get_lane_mut(1)[13] = 22;
|
||
|
thread.exec.value = 0b00000000000000000000000000000010;
|
||
|
thread.exec.default_lane = Some(2);
|
||
|
r(&vec![0x7E1A050D, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[13], 22);
|
||
|
|
||
|
thread.exec.value = 0b00000000000000000000000000000000;
|
||
|
thread.exec.default_lane = Some(1);
|
||
|
r(&vec![0x7E1A050D, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[13], 44);
|
||
|
|
||
|
thread.exec.value = 0b10000000000000000000000000000000;
|
||
|
thread.vec_reg.get_lane_mut(31)[13] = 88;
|
||
|
thread.exec.default_lane = Some(1);
|
||
|
r(&vec![0x7E1A050D, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[13], 88);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_cls_i32() {
|
||
|
fn t(val: u32) -> u32 {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[2] = val;
|
||
|
r(&vec![0x7E087702, END_PRG], &mut thread);
|
||
|
return thread.vec_reg[4];
|
||
|
}
|
||
|
|
||
|
assert_eq!(t(0x00000000), 0xffffffff);
|
||
|
assert_eq!(t(0x40000000), 1);
|
||
|
assert_eq!(t(0x80000000), 1);
|
||
|
assert_eq!(t(0x0fffffff), 4);
|
||
|
assert_eq!(t(0xffff0000), 16);
|
||
|
assert_eq!(t(0xfffffffe), 31);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_rndne_f32() {
|
||
|
[
|
||
|
[1.2344, 1.0],
|
||
|
[2.3, 2.0], // [0.5f32, 0.0f32],
|
||
|
[0.51, 1.0],
|
||
|
[f32::from_bits(1186963295), f32::from_bits(1186963456)],
|
||
|
]
|
||
|
.iter()
|
||
|
.for_each(|[a, ret]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[0] = f32::to_bits(*a);
|
||
|
r(&vec![0x7E024700, END_PRG], &mut thread);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[1]), *ret);
|
||
|
})
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_rndne_f64() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[0] = 0x652b82fe;
|
||
|
thread.vec_reg[1] = 0x40071547;
|
||
|
r(&vec![0x7E043300, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[2], 0);
|
||
|
assert_eq!(thread.vec_reg[3], 1074266112);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_cvt_i32_f64() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[2] = 0;
|
||
|
thread.vec_reg[3] = 0x40080000;
|
||
|
r(&vec![0x7E080702, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 3);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_frexp_mant_f64() {
|
||
|
[[2.0, 0.5], [1.0, 0.5], [0.54, 0.54], [f64::NAN, f64::NAN]]
|
||
|
.iter()
|
||
|
.for_each(|[x, expected]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg.write64(0, f64::to_bits(*x));
|
||
|
r(&vec![0x7E047B00, END_PRG], &mut thread);
|
||
|
let ret = f64::from_bits(thread.vec_reg.read64(2));
|
||
|
if ret.is_nan() {
|
||
|
assert!(ret.is_nan() && expected.is_nan());
|
||
|
} else {
|
||
|
assert_eq!(f64::from_bits(thread.vec_reg.read64(2)), *expected)
|
||
|
}
|
||
|
})
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_rcp_f64() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[0] = 0;
|
||
|
thread.vec_reg[1] = 1073741824;
|
||
|
r(&vec![0x7E045F00, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[2], 0);
|
||
|
assert_eq!(thread.vec_reg[3], 1071644672);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_rsq_f32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[0] = f32::to_bits(4.0);
|
||
|
r(&vec![0x7E005D00, END_PRG], &mut thread);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[0]), 0.5);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_frexp_exp_i32_f64() {
|
||
|
[(3573412790272.0, 42), (69.0, 7), (2.0, 2), (f64::NEG_INFINITY, 0)]
|
||
|
.iter()
|
||
|
.for_each(|(x, ret)| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg.write64(0, f64::to_bits(*x));
|
||
|
r(&vec![0x7E047900, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[2], *ret);
|
||
|
})
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_rsq_f64() {
|
||
|
[(2.0, 0.707)].iter().for_each(|(x, ret)| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg.write64(0, f64::to_bits(*x));
|
||
|
println!("{} {}", thread.vec_reg[0], thread.vec_reg[1]);
|
||
|
r(&vec![0x7E046300, END_PRG], &mut thread);
|
||
|
assert!(approx_eq!(f64, f64::from_bits(thread.vec_reg.read64(2)), *ret, (0.01, 2)));
|
||
|
})
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#[cfg(test)]
|
||
|
mod test_vopc {
|
||
|
use super::*;
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_cmp_gt_i32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
|
||
|
thread.vec_reg[1] = (4_i32 * -1) as u32;
|
||
|
r(&vec![0x7c8802c1, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vcc.read(), true);
|
||
|
|
||
|
thread.vec_reg[1] = 4;
|
||
|
r(&vec![0x7c8802c1, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vcc.read(), false);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_cmpx_nlt_f32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.exec.value = 0b010011;
|
||
|
thread.vec_reg[0] = f32::to_bits(0.9);
|
||
|
thread.vec_reg[3] = f32::to_bits(0.4);
|
||
|
r(&vec![0x7D3C0700, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.exec.read(), true);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_cmpx_gt_i32_e32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[3] = 100;
|
||
|
r(&vec![0x7D8806FF, 0x00000041, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.exec.read(), false);
|
||
|
|
||
|
thread.vec_reg[3] = -20i32 as u32;
|
||
|
r(&vec![0x7D8806FF, 0x00000041, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.exec.read(), true);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_cmp_class_f32() {
|
||
|
let thread = _helper_test_thread();
|
||
|
assert!(!thread.cmp_class_f32(f32::NAN, 0b00001));
|
||
|
assert!(thread.cmp_class_f32(f32::NAN, 0b00010));
|
||
|
|
||
|
assert!(thread.cmp_class_f32(f32::INFINITY, 0b00000000000000000000001000000000));
|
||
|
assert!(!thread.cmp_class_f32(f32::INFINITY, 0b00000000000000000000000000000010));
|
||
|
|
||
|
assert!(thread.cmp_class_f32(f32::NEG_INFINITY, 0b00000000000000000000000000000100));
|
||
|
assert!(!thread.cmp_class_f32(f32::NEG_INFINITY, 0b00000000000000000000010000000000));
|
||
|
|
||
|
assert!(!thread.cmp_class_f32(0.752, 0b00000000000000000000000000000000));
|
||
|
assert!(thread.cmp_class_f32(0.752, 0b00000000000000000000000100000000));
|
||
|
|
||
|
assert!(!thread.cmp_class_f32(-0.752, 0b00000000000000000000010000000000));
|
||
|
assert!(thread.cmp_class_f32(-0.752, 0b00000000000000000000010000001000));
|
||
|
|
||
|
assert!(!thread.cmp_class_f32(1.0e-42, 0b11111111111111111111111101111111));
|
||
|
assert!(thread.cmp_class_f32(1.0e-42, 0b00000000000000000000000010000000));
|
||
|
|
||
|
assert!(thread.cmp_class_f32(-1.0e-42, 0b00000000000000000000000000010000));
|
||
|
assert!(!thread.cmp_class_f32(-1.0e-42, 0b11111111111111111111111111101111));
|
||
|
|
||
|
assert!(thread.cmp_class_f32(-0.0, 0b00000000000000000000000000100000));
|
||
|
assert!(thread.cmp_class_f32(0.0, 0b00000000000000000000000001000000));
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_cmp_class_f64() {
|
||
|
let thread = _helper_test_thread();
|
||
|
|
||
|
assert!(!thread.cmp_class_f64(f64::NAN, 0b00001));
|
||
|
assert!(thread.cmp_class_f64(f64::NAN, 0b00010));
|
||
|
|
||
|
assert!(thread.cmp_class_f64(f64::INFINITY, 0b00000000000000000000001000000000));
|
||
|
assert!(!thread.cmp_class_f64(f64::INFINITY, 0b00000000000000000000000000000010));
|
||
|
|
||
|
assert!(thread.cmp_class_f64(f64::NEG_INFINITY, 0b00000000000000000000000000000100));
|
||
|
assert!(!thread.cmp_class_f64(f64::NEG_INFINITY, 0b00000000000000000000010000000000));
|
||
|
|
||
|
assert!(!thread.cmp_class_f64(0.752, 0b00000000000000000000000000000000));
|
||
|
assert!(thread.cmp_class_f64(0.752, 0b00000000000000000000000100000000));
|
||
|
|
||
|
assert!(!thread.cmp_class_f64(-1.0e-42, 0b00000000000000000000000000010000));
|
||
|
assert!(thread.cmp_class_f64(-1.0e-42, 0b11111111111111111111111111101111));
|
||
|
|
||
|
assert!(thread.cmp_class_f64(-0.0, 0b00000000000000000000000000100000));
|
||
|
assert!(thread.cmp_class_f64(0.0, 0b00000000000000000000000001000000));
|
||
|
}
|
||
|
}
|
||
|
#[cfg(test)]
|
||
|
mod test_vop2 {
|
||
|
use super::*;
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_add_f32_e32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[2] = f32::to_bits(42.0);
|
||
|
thread.vec_reg[0] = f32::to_bits(1.0);
|
||
|
r(&vec![0x06000002, END_PRG], &mut thread);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[0]), 43.0);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_and_b32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[10] = 15;
|
||
|
r(&vec![0x36141482, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[10], 2);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_mul_f32_e32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[2] = f32::to_bits(21.0);
|
||
|
thread.vec_reg[4] = f32::to_bits(2.0);
|
||
|
r(&vec![0x10060504, END_PRG], &mut thread);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[3]), 42.0);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_ashrrev_i32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[0] = 4294967295;
|
||
|
r(&vec![0x3402009F, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[1] as i32, -1);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_mul_i32_i24() {
|
||
|
[
|
||
|
[18, 0x64, 1800],
|
||
|
[0b10000000000000000000000000, 0b1, 0],
|
||
|
[0b100000000000000000000000, 0b1, 0b11111111100000000000000000000000],
|
||
|
]
|
||
|
.iter()
|
||
|
.for_each(|[a, b, ret]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[1] = *a;
|
||
|
r(&vec![0x124E02FF, *b, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[39], *ret);
|
||
|
});
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_add_nc_u32_const() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[18] = 7;
|
||
|
r(&vec![0x4A3024B8, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[24], 63);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_add_nc_u32_sint() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[14] = 7;
|
||
|
thread.vec_reg[6] = 4294967279;
|
||
|
r(&vec![0x4A0C1D06, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[6], 4294967286);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#[cfg(test)]
|
||
|
mod test_vopsd {
|
||
|
use super::*;
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_add_co_u32_scalar_co_zero() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[10] = 0;
|
||
|
thread.vcc.default_lane = Some(1);
|
||
|
thread.vec_reg.default_lane = Some(1);
|
||
|
thread.vec_reg[10] = u32::MAX;
|
||
|
thread.vec_reg[20] = 20;
|
||
|
r(&vec![0xD7000A0A, 0x0002290A, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[10], 19);
|
||
|
assert_eq!(thread.scalar_reg[10], 2);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_add_co_u32_scalar_co_override() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[10] = 0b11111111111111111111111111111111;
|
||
|
thread.vcc.default_lane = Some(2);
|
||
|
thread.vec_reg.default_lane = Some(2);
|
||
|
thread.vec_reg[10] = u32::MAX;
|
||
|
thread.vec_reg[20] = 20;
|
||
|
r(&vec![0xD7000A0A, 0x0002290A, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[10], 19);
|
||
|
// NOTE: the co mask only writes to the bit that it needs to write, then at the _wave_
|
||
|
// level, the final result accumulates
|
||
|
assert_eq!(thread.scalar_reg[10], 0b100);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_add_co_ci_u32() {
|
||
|
[[0, 0, 0b0], [1, -1i32 as usize, 0b10]].iter().for_each(|[lane_id, result, carry_out]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vcc.default_lane = Some(*lane_id);
|
||
|
thread.vec_reg.default_lane = Some(*lane_id);
|
||
|
thread.scalar_reg[20] = 0b10;
|
||
|
thread.vec_reg[1] = 2;
|
||
|
thread.vec_reg[2] = 2;
|
||
|
r(&vec![0xD5211401, 0x00520501, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[1], *result as u32);
|
||
|
assert_eq!(thread.scalar_reg[20], *carry_out as u32);
|
||
|
})
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_sub_co_ci_u32() {
|
||
|
[[3, 2, 0b1000], [2, 0, 0b100]].iter().for_each(|[lane_id, result, carry_out]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vcc.default_lane = Some(*lane_id);
|
||
|
thread.vec_reg.default_lane = Some(*lane_id);
|
||
|
thread.scalar_reg[20] = 0b1010;
|
||
|
thread.vec_reg[1] = *lane_id as u32;
|
||
|
thread.vec_reg[2] = u32::MAX - 1;
|
||
|
r(&vec![0xD5201401, 0x00520501, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[1], *result as u32);
|
||
|
assert_eq!(thread.scalar_reg[20], *carry_out as u32);
|
||
|
})
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_mad_u64_u32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg.write64(3, u64::MAX - 3);
|
||
|
thread.scalar_reg[13] = 3;
|
||
|
thread.scalar_reg[10] = 1;
|
||
|
r(&vec![0xD6FE0D06, 0x040C140D, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg.read64(6), u64::MAX);
|
||
|
assert_eq!(thread.scalar_reg[13], 0);
|
||
|
|
||
|
thread.vec_reg.write64(3, u64::MAX - 3);
|
||
|
thread.scalar_reg[13] = 4;
|
||
|
thread.scalar_reg[10] = 1;
|
||
|
r(&vec![0xD6FE0D06, 0x040C140D, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[6], 0);
|
||
|
assert_eq!(thread.vec_reg[7], 0);
|
||
|
assert_eq!(thread.scalar_reg[13], 1);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_add_co_u32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vcc.default_lane = Some(1);
|
||
|
thread.vec_reg[2] = u32::MAX;
|
||
|
thread.vec_reg[3] = 3;
|
||
|
r(&vec![0xD7000D02, 0x00020503, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[2], 2);
|
||
|
assert_eq!(thread.scalar_reg[13], 0b10);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_sub_co_u32() {
|
||
|
[[69, 0, 69, 0], [100, 200, 4294967196, 1]].iter().for_each(|[a, b, ret, scc]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[4] = *a;
|
||
|
thread.vec_reg[15] = *b;
|
||
|
r(&vec![0xD7016A04, 0x00021F04, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], *ret);
|
||
|
assert_eq!(thread.vcc.read(), *scc != 0);
|
||
|
})
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_return_value_exec_zero() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.exec.value = 0b11111111111111111111111111111101;
|
||
|
thread.vcc.default_lane = Some(1);
|
||
|
thread.exec.default_lane = Some(1);
|
||
|
thread.vec_reg[2] = u32::MAX;
|
||
|
thread.vec_reg[3] = 3;
|
||
|
r(&vec![0xD7000D02, 0x00020503, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[2], u32::MAX);
|
||
|
assert_eq!(thread.scalar_reg[13], 0b10);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_div_scale_f64() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
let v = -0.41614683654714246;
|
||
|
thread.vec_reg.write64(0, f64::to_bits(v));
|
||
|
thread.vec_reg.write64(2, f64::to_bits(v));
|
||
|
thread.vec_reg.write64(4, f64::to_bits(0.909));
|
||
|
r(&vec![0xD6FD7C06, 0x04120500, END_PRG], &mut thread);
|
||
|
thread.vec_reg[6] = 1465086470;
|
||
|
thread.vec_reg[7] = 3218776614;
|
||
|
let ret = f64::from_bits(thread.vec_reg.read64(6));
|
||
|
assert_eq!(ret, v);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#[cfg(test)]
|
||
|
mod test_vop3 {
|
||
|
use super::*;
|
||
|
use float_cmp::approx_eq;
|
||
|
|
||
|
fn helper_test_vop3(op: u32, a: f32, b: f32) -> f32 {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[0] = f32::to_bits(a);
|
||
|
thread.scalar_reg[6] = f32::to_bits(b);
|
||
|
r(&vec![op, 0x00000006, END_PRG], &mut thread);
|
||
|
return f32::from_bits(thread.vec_reg[0]);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_add_f32() {
|
||
|
assert_eq!(helper_test_vop3(0xd5030000, 0.4, 0.2), 0.6);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_mul_f16() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[1].mut_lo16(f16::from_f32(2.0).to_bits());
|
||
|
thread.vec_reg[2].mut_lo16(f16::from_f32(4.0).to_bits());
|
||
|
r(&vec![0xD5350000, 0x00020501, END_PRG], &mut thread);
|
||
|
assert_eq!(f16::from_bits(thread.vec_reg[0] as u16), f16::from_f32(8.0));
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_max_f32() {
|
||
|
assert_eq!(helper_test_vop3(0xd5100000, 0.4, 0.2), 0.4);
|
||
|
assert_eq!(helper_test_vop3(0xd5100000, 0.2, 0.8), 0.8);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_mul_f32() {
|
||
|
assert_eq!(helper_test_vop3(0xd5080000, 0.4, 0.2), 0.4 * 0.2);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_signed_src() {
|
||
|
// v0, max(s2, s2)
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[2] = f32::to_bits(0.5);
|
||
|
r(&vec![0xd5100000, 0x00000402, END_PRG], &mut thread);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[0]), 0.5);
|
||
|
|
||
|
// v1, max(-s2, -s2)
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[2] = f32::to_bits(0.5);
|
||
|
r(&vec![0xd5100001, 0x60000402, END_PRG], &mut thread);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[1]), -0.5);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_cnd_mask_cond_src_sgpr() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[3] = 0b001;
|
||
|
r(&vec![0xD5010000, 0x000D0280, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[0], 1);
|
||
|
|
||
|
thread.scalar_reg[3] = 0b00;
|
||
|
r(&vec![0xD5010000, 0x000D0280, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[0], 0);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_cnd_mask_cond_src_vcclo() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[2] = 20;
|
||
|
thread.vec_reg[0] = 100;
|
||
|
r(&vec![0xD5010002, 0x41AA0102, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[2], 20);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_cnd_mask_float_const() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vcc.value = 0b00000010;
|
||
|
thread.vcc.default_lane = Some(0);
|
||
|
r(&vec![0xD5010003, 0x01A9E480, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[3], 0);
|
||
|
|
||
|
thread.vcc.value = 0b00000010;
|
||
|
thread.vcc.default_lane = Some(1);
|
||
|
r(&vec![0xD5010003, 0x01A9E480, END_PRG], &mut thread);
|
||
|
assert_eq!(f32::from_bits(thread.vec_reg[3]), 1.0);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_cndmask_b32_e64_neg() {
|
||
|
[[0.0f32, -0.0], [-0.0f32, 0.0], [1.0f32, -1.0], [-1.0f32, 1.0]].iter().for_each(|[input, ret]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[0] = false as u32;
|
||
|
thread.vec_reg[3] = input.to_bits();
|
||
|
r(&vec![0xD5010003, 0x2001FF03, 0x80000000, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[3], ret.to_bits());
|
||
|
});
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_mul_hi_i32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[2] = -2i32 as u32;
|
||
|
r(&vec![0xD72E0003, 0x000204FF, 0x2E8BA2E9, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[3] as i32, -1);
|
||
|
|
||
|
thread.vec_reg[2] = 2;
|
||
|
r(&vec![0xD72E0003, 0x000204FF, 0x2E8BA2E9, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[3], 0);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_writelane_b32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[8] = 25056;
|
||
|
r(&vec![0xD7610004, 0x00010008, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg.get_lane(0)[4], 25056);
|
||
|
|
||
|
thread.scalar_reg[9] = 25056;
|
||
|
r(&vec![0xD7610004, 0x00010209, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg.get_lane(1)[4], 25056);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_readlane_b32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg.get_lane_mut(15)[4] = 0b1111;
|
||
|
r(&vec![0xD760006A, 0x00011F04, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vcc.read(), true);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_lshlrev_b64() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg.write64(2, 100);
|
||
|
thread.vec_reg[4] = 2;
|
||
|
r(&vec![0xD73C0002, 0x00020504, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg.read64(2), 400);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_lshrrev_b64() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg.write64(2, 100);
|
||
|
thread.vec_reg[4] = 2;
|
||
|
r(&vec![0xd73d0002, 0x00020504, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg.read64(2), 25);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_add_f64_neg_modifier() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[0] = 0x652b82fe;
|
||
|
thread.vec_reg[1] = 0x40071547;
|
||
|
thread.vec_reg[2] = 0;
|
||
|
thread.vec_reg[3] = 0x40080000;
|
||
|
r(&vec![0xD7270004, 0x40020500, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 1519362112);
|
||
|
assert_eq!(thread.vec_reg[5], 3216856851);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_cvt_f32_f16_abs_modifier() {
|
||
|
[[0.4, 0.4], [-0.4, 0.4]].iter().for_each(|[a, ret]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[1] = f16::from_f32_const(*a).to_bits() as u32;
|
||
|
r(&vec![0xD58B0102, 0x00000101, END_PRG], &mut thread);
|
||
|
assert!(approx_eq!(f32, f32::from_bits(thread.vec_reg[2]), *ret, (0.01, 2)));
|
||
|
});
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_alignbit_b32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[4] = 5340353;
|
||
|
thread.scalar_reg[10] = 3072795146;
|
||
|
thread.vec_reg[0] = 8;
|
||
|
r(&vec![0xD6160001, 0x04001404, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[1], 3250005794);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_bfe_i32() {
|
||
|
[
|
||
|
[0b00000000000000000000000000000001, -1],
|
||
|
[0b00000000000000000000000000000000, 0],
|
||
|
[0b00000000000000000000000000000010, 0],
|
||
|
]
|
||
|
.iter()
|
||
|
.for_each(|[a, ret]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[2] = *a as u32;
|
||
|
r(&vec![0xD6110005, 0x02050102, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[5] as i32, *ret);
|
||
|
});
|
||
|
|
||
|
[
|
||
|
[0b00000000000000000000000000000010, -2],
|
||
|
[0b00000000000000000000000000000001, 1],
|
||
|
[0b00000000000000000000000000000100, 0],
|
||
|
]
|
||
|
.iter()
|
||
|
.for_each(|[a, ret]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[2] = *a as u32;
|
||
|
r(&vec![0xD6110005, 0x02090102, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[5] as i32, *ret);
|
||
|
});
|
||
|
|
||
|
[
|
||
|
[0b00100000000000000000000000000000, 0b100000000000000000000000000000],
|
||
|
[0b00000000000000001000000000000000, 0b1000000000000000],
|
||
|
[-1, -1],
|
||
|
]
|
||
|
.iter()
|
||
|
.for_each(|[a, ret]| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[2] = *a as u32;
|
||
|
r(&vec![0xD6110005, 0x03050102, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[5] as i32, *ret);
|
||
|
});
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_ashrrev_i16() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
[
|
||
|
[0b10000000000000000000000000000000, 0],
|
||
|
[0b10000000000000000000000000000111, 3],
|
||
|
[0b0000000000000000, 0],
|
||
|
[0b1000000000000000, 0b1100000000000000],
|
||
|
[0b0100000000000000, 0b0010000000000000],
|
||
|
[0b0010000000000000, 0b0001000000000000],
|
||
|
[0b1010000000000000, 0b1101000000000000],
|
||
|
[0b1110000000000000, 0b1111000000000000],
|
||
|
[0b0110000000000000, 0b0011000000000000],
|
||
|
]
|
||
|
.iter()
|
||
|
.for_each(|[a, ret]| {
|
||
|
thread.vec_reg[2] = *a;
|
||
|
thread.scalar_reg[1] = 1;
|
||
|
r(&vec![0xd73a0005, 0b11000001100000010000000001, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[5], *ret);
|
||
|
});
|
||
|
|
||
|
[
|
||
|
[0b1000000000000000, 0b1111, 0b1111111111111111],
|
||
|
[0b1000000000000000, 0b11111, 0b1111111111111111],
|
||
|
[0b1000000000000000, 0b0111, 0b1111111100000000],
|
||
|
]
|
||
|
.iter()
|
||
|
.for_each(|[a, shift, ret]| {
|
||
|
thread.vec_reg[2] = *a;
|
||
|
thread.scalar_reg[1] = *shift;
|
||
|
r(&vec![0xd73a0005, 0b11000001100000010000000001, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[5], *ret);
|
||
|
});
|
||
|
|
||
|
thread.vec_reg[5] = 0b11100000000000001111111111111111;
|
||
|
thread.vec_reg[2] = 0b0100000000000000;
|
||
|
thread.scalar_reg[1] = 1;
|
||
|
r(&vec![0xd73a0005, 0b11000001100000010000000001, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[5], 0b11100000000000000010000000000000);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_add_nc_u16() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[5] = 10;
|
||
|
thread.vec_reg[8] = 20;
|
||
|
r(&vec![0xD7030005, 0x00021105, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[5], 30);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_mul_lo_u16() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[5] = 2;
|
||
|
thread.vec_reg[15] = 0;
|
||
|
r(&vec![0xD705000F, 0x00010B05, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[15], 10);
|
||
|
|
||
|
thread.vec_reg[5] = 2;
|
||
|
thread.vec_reg[15] = 0b10000000000000000000000000000000;
|
||
|
r(&vec![0xD705000F, 0x00010B05, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[15], 0b10000000000000000000000000000000 + 10);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_cmp_gt_u16() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[1] = 52431;
|
||
|
thread.scalar_reg[5] = 0;
|
||
|
r(&vec![0xD43C0005, 0x000202FF, 0x00003334, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[5], 0);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_cmp_ngt_f32_abs() {
|
||
|
[(0.5f32, 0.5f32, 1), (-0.5, 0.5, 1), (0.1, 0.2, 0), (-0.1, 0.2, 0)]
|
||
|
.iter()
|
||
|
.for_each(|(x, y, ret)| {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[2] = x.to_bits();
|
||
|
r(&vec![0xD41B0203, 0x000004FF, y.to_bits(), END_PRG], &mut thread);
|
||
|
assert_eq!(thread.scalar_reg[3], *ret);
|
||
|
})
|
||
|
}
|
||
|
#[test]
|
||
|
fn test_fma() {
|
||
|
fn v_fma_f32(a: u32, b: u32, c: u32, ret: u32) {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[1] = b;
|
||
|
thread.scalar_reg[3] = c;
|
||
|
r(&vec![0xD6130000, 0x000E02FF, a, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[0], ret);
|
||
|
}
|
||
|
fn v_fmac_f32(a: u32, b: u32, c: u32, ret: u32) {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.scalar_reg[1] = a;
|
||
|
thread.scalar_reg[2] = b;
|
||
|
thread.vec_reg[0] = c;
|
||
|
r(&vec![0xd52b0000, 0x401, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[0], ret);
|
||
|
}
|
||
|
[[0xbfc90fda, 1186963456, 1192656896, 3204127872]].iter().for_each(|[a, b, c, ret]| {
|
||
|
v_fma_f32(*a, *b, *c, *ret);
|
||
|
v_fmac_f32(*a, *b, *c, *ret);
|
||
|
})
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_perm_b32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[1] = 15944;
|
||
|
thread.vec_reg[0] = 84148480;
|
||
|
r(&vec![0xD644000F, 0x03FE0101, 0x05040100, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[15], 1044906240);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_mul_f64() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[0] = 0x5a8fa040;
|
||
|
thread.vec_reg[1] = 0xbfbd5713;
|
||
|
thread.vec_reg[2] = 0x3b39803f;
|
||
|
thread.vec_reg[3] = 0x3c7abc9e;
|
||
|
r(&vec![0xD7280004, 0x00020500, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 1602589062);
|
||
|
assert_eq!(thread.vec_reg[5], 3158868912);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_fma_f64() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[0] = 0x5a8fa040;
|
||
|
thread.vec_reg[1] = 0xbfbd5713;
|
||
|
thread.vec_reg[2] = 0xfefa39ef;
|
||
|
thread.vec_reg[3] = 0x3fe62e42;
|
||
|
thread.vec_reg[4] = 0x5f859186;
|
||
|
thread.vec_reg[5] = 0xbc4883b0;
|
||
|
r(&vec![0xD6140006, 0x04120500, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[6], 3883232879);
|
||
|
assert_eq!(thread.vec_reg[7], 3216266823);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_fma_f64_const() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[0] = 0xf690ecbf;
|
||
|
thread.vec_reg[1] = 0x3fdf2b4f;
|
||
|
thread.vec_reg[2] = 0xe7756e6f;
|
||
|
thread.vec_reg[3] = 0xbfb45647;
|
||
|
r(&vec![0xD6140004, 0x03CA0500, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 962012421);
|
||
|
assert_eq!(thread.vec_reg[5], 1072612110);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_ldexp_f64() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg.write64(0, f64::to_bits(5.0));
|
||
|
thread.vec_reg[2] = 3;
|
||
|
thread.vec_reg[3] = 3;
|
||
|
r(&vec![0xD72B0000, 0x00020500, END_PRG], &mut thread);
|
||
|
let val = f64::from_bits(thread.vec_reg.read64(0));
|
||
|
assert_eq!(val, 40.0);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_simm_resolve_int_in_double_op() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg.write64(0, 3.0f64.to_bits());
|
||
|
let simm = 0xFFFFFFE0;
|
||
|
r(&vec![0xD72B0002, 0x0001FF00, simm, END_PRG], &mut thread);
|
||
|
assert_eq!(f64::from_bits(thread.vec_reg.read64(2)), 3.0 * 2.0.powi(-32));
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_simm_resolve_double_in_double_op() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg.write64(0, 2.0f64.to_bits());
|
||
|
let simm = 0x40080000;
|
||
|
r(&vec![0xD7280000, 0x000200FF, simm, END_PRG], &mut thread);
|
||
|
assert_eq!(f64::from_bits(thread.vec_reg.read64(0)), 6.0);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#[cfg(test)]
|
||
|
mod test_vopp {
|
||
|
use super::*;
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_fma_mix_f32() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[2] = 1065353216;
|
||
|
thread.scalar_reg[2] = 3217620992;
|
||
|
thread.vec_reg[1] = 15360;
|
||
|
r(&vec![0xCC204403, 0x04040502, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[3], 3205627904);
|
||
|
|
||
|
thread.vec_reg[2] = 1065353216;
|
||
|
thread.scalar_reg[2] = 3217620992;
|
||
|
thread.vec_reg[1] = 48128;
|
||
|
r(&vec![0xCC204403, 0x04040502, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[3], 3205627904);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_packed_opsel_000_op_000() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[1] = 1;
|
||
|
thread.vec_reg[2] = 2;
|
||
|
thread.vec_reg[3] = 3;
|
||
|
r(&vec![0xCC090004, 0x040E0501, 0xBFB00000, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 0b1010000000000000101);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_packed_opsel_001_op_100() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[1] = 1;
|
||
|
thread.vec_reg[2] = 2;
|
||
|
thread.vec_reg[3] = 3;
|
||
|
r(&vec![0xCC092004, 0x0C0E0501, 0xBFB00000, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 0b110000000000000010);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_packed_inline_const_int() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[1] = 1;
|
||
|
thread.vec_reg[2] = 2;
|
||
|
thread.vec_reg[3] = 3;
|
||
|
|
||
|
r(&vec![0xCC090004, 0x020E0501, 0xBFB00000, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 0b1010000000000000101);
|
||
|
|
||
|
r(&vec![0xCC090804, 0x0A0E0501, 0xBFB00000, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 0b110000000000000011);
|
||
|
|
||
|
r(&vec![0xCC096004, 0x020E0501, 0xBFB00000, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 0b100000000000000010);
|
||
|
|
||
|
r(&vec![0xCC090004, 0x03FE0501, 0x00000080, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 8519810);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_pk_fma_f16_inline_const() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[2] = 0x393a35f6;
|
||
|
thread.vec_reg[3] = 0x2800;
|
||
|
|
||
|
r(&vec![0xCC0E0004, 0x03FE0702, 0x0000A400, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 2618596372);
|
||
|
|
||
|
r(&vec![0xCC0E0004, 0x0BFE0702, 0x0000A400, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 485006356);
|
||
|
|
||
|
r(&vec![0xCC0E0004, 0x1BFE0702, 0x0000A400, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 2751503380);
|
||
|
|
||
|
r(&vec![0xCC0E0804, 0x03FE0702, 0x0000A400, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 2618563816);
|
||
|
|
||
|
r(&vec![0xCC0E1804, 0x03FE0702, 0x0000A400, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 2618598400);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_fma_mixhilo_f16() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[11] = 1065353216;
|
||
|
thread.vec_reg[7] = 3047825943;
|
||
|
thread.vec_reg[16] = 3047825943;
|
||
|
|
||
|
thread.vec_reg[14] = 0b10101010101010101111111111111111;
|
||
|
r(&vec![0xCC21000E, 0x04420F0B, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[14], 0b10101010101010101000000000101011);
|
||
|
|
||
|
thread.vec_reg[14] = 0b10101010101010101111111111111111;
|
||
|
r(&vec![0xCC22000E, 0x04420F0B, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[14], 0b10000000001010111111111111111111);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_v_pk_lshlrev_b16() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[3] = 0b1010101011101101;
|
||
|
|
||
|
r(&vec![0xCC044004, 0x0002068E, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 0b1000000000000000100000000000000);
|
||
|
|
||
|
r(&vec![0xCC044004, 0x1002068E, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 0b100000000000000);
|
||
|
|
||
|
r(&vec![0xCC044004, 0x100206FF, 0x00010002, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 0b1010101110110100);
|
||
|
r(&vec![0xCC044004, 0x100206FF, 0x05012002, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 0b1010101110110100);
|
||
|
|
||
|
r(&vec![0xCC044004, 0x100206FF, 0x0503E00F, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 0b1000000000000000);
|
||
|
r(&vec![0xCC044004, 0x100206FF, 0x0503E007, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 0b111011010000000);
|
||
|
r(&vec![0xCC044004, 0x100206FF, 0x0503E01F, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[4], 0b1000000000000000);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_pk_fma_with_neg() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
let a1 = f16::from_f32(1.0);
|
||
|
let b1 = f16::from_f32(2.0);
|
||
|
let c1 = f16::from_f32(3.0);
|
||
|
|
||
|
let a2 = f16::from_f32(4.0);
|
||
|
let b2 = f16::from_f32(5.0);
|
||
|
let c2 = f16::from_f32(6.0);
|
||
|
|
||
|
thread.vec_reg[0] = (a1.to_bits() as u32) << 16 | (a2.to_bits() as u32);
|
||
|
thread.vec_reg[9] = (b1.to_bits() as u32) << 16 | (b2.to_bits() as u32);
|
||
|
thread.vec_reg[10] = (c1.to_bits() as u32) << 16 | (c2.to_bits() as u32);
|
||
|
|
||
|
r(&vec![0xCC0E3805, 0x042A1300, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[5], 1317029120);
|
||
|
|
||
|
r(&vec![0xCC0E3805, 0x242A1300, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[5], 1317026816);
|
||
|
|
||
|
r(&vec![0xCC0E3B05, 0x042A1300, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[5], 1317029120);
|
||
|
|
||
|
r(&vec![0xCC0E3905, 0x042A1300, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[5], 3405792512);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_pk_add_f16_with_float_const() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
let a1 = f16::from_f32(5.0);
|
||
|
let a2 = f16::from_f32(10.0);
|
||
|
|
||
|
thread.vec_reg[1] = (a1.to_bits() as u32) << 16 | (a2.to_bits() as u32);
|
||
|
r(&vec![0xCC0F4002, 0x0001E501, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[2], 1233144192);
|
||
|
|
||
|
r(&vec![0xCC0F5002, 0x0001E501, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[2], 1233144064);
|
||
|
|
||
|
r(&vec![0xCC0F5002, 0x1001E501, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[2], 1224755456);
|
||
|
|
||
|
r(&vec![0xCC0F5802, 0x1801E501, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[2], 1157645568);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#[cfg(test)]
|
||
|
mod test_flat {
|
||
|
use super::*;
|
||
|
use std::alloc::{alloc, handle_alloc_error, Layout};
|
||
|
|
||
|
#[test]
|
||
|
fn test_scratch_swap_values() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[13] = 42;
|
||
|
thread.vec_reg[14] = 10;
|
||
|
r(
|
||
|
&vec![
|
||
|
0xDC690096, 0x007C0D00, 0xDC69001E, 0x007C0E00, 0xDC51001E, 0x0D7C0000, 0xDC510096, 0x0E7C0000, END_PRG,
|
||
|
],
|
||
|
&mut thread,
|
||
|
);
|
||
|
assert_eq!(thread.vec_reg[13], 10);
|
||
|
assert_eq!(thread.vec_reg[14], 42);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_scratch_load_dword_offset() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[14] = 14;
|
||
|
thread.vec_reg[15] = 23;
|
||
|
r(&vec![0xDC6D000A, 0x007C0E00, 0xDC51000A, 0x0E7C0000, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[14], 14);
|
||
|
|
||
|
r(&vec![0xDC6D000A, 0x007C0E00, 0xDC51000E, 0x0E7C0000, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[14], 23);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_global_load_d16_hi_b16() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[13] = 0b10101011101101001111111111111111;
|
||
|
unsafe {
|
||
|
let layout = Layout::new::<u16>();
|
||
|
let ptr = alloc(layout);
|
||
|
if ptr.is_null() {
|
||
|
handle_alloc_error(layout)
|
||
|
}
|
||
|
*(ptr as *mut u16) = 42;
|
||
|
thread.vec_reg.write64(10, ptr as u64);
|
||
|
}
|
||
|
r(&vec![0xDC8E0000, 0x0D7C000A, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[13], 0b00000000001010101111111111111111);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#[cfg(test)]
|
||
|
mod test_lds {
|
||
|
use super::*;
|
||
|
#[test]
|
||
|
fn test_ds_load_offset() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.lds.write(256, 69);
|
||
|
thread.vec_reg[9] = 0;
|
||
|
r(&vec![0xD8D80100, 0x01000009, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[1], 69);
|
||
|
|
||
|
thread.lds.write(800, 69);
|
||
|
thread.vec_reg[9] = 0;
|
||
|
r(&vec![0xD8D80320, 0x01000009, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[1], 69);
|
||
|
|
||
|
thread.lds.write(3, 69);
|
||
|
thread.vec_reg[9] = 0;
|
||
|
r(&vec![0xD8D80003, 0x01000009, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[1], 69);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_ds_load_dwords() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.lds.write(0, 100);
|
||
|
thread.lds.write(4, 200);
|
||
|
thread.vec_reg[9] = 0;
|
||
|
r(&vec![0xD9D80000, 0x00000009, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg.read64(0), 858993459300);
|
||
|
|
||
|
thread.lds.write(0, 1);
|
||
|
thread.lds.write(4, 2);
|
||
|
thread.lds.write(8, 3);
|
||
|
thread.lds.write(12, 4);
|
||
|
thread.vec_reg[9] = 0;
|
||
|
r(&vec![0xDBFC0000, 0x00000009, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[0], 1);
|
||
|
assert_eq!(thread.vec_reg[1], 2);
|
||
|
assert_eq!(thread.vec_reg[2], 3);
|
||
|
assert_eq!(thread.vec_reg[3], 4);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_ds_load_u8() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.lds.write(0, 17);
|
||
|
thread.vec_reg[0] = 0;
|
||
|
r(&vec![0xD8E80000, 0x00000100, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[0], 17);
|
||
|
|
||
|
thread.lds.write(0, 264);
|
||
|
thread.vec_reg[0] = 0;
|
||
|
r(&vec![0xD8E80000, 0x00000100, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[0], 8);
|
||
|
|
||
|
thread.lds.write(8, 23);
|
||
|
thread.vec_reg[0] = 0;
|
||
|
r(&vec![0xD8E80008, 0x00000100, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[0], 23);
|
||
|
|
||
|
thread.lds.write(16, 29);
|
||
|
thread.vec_reg[0] = 0;
|
||
|
r(&vec![0xD8E80010, 0x00000100, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.vec_reg[0], 29);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_ds_store_dwords() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[9] = 69;
|
||
|
thread.vec_reg[0] = 0;
|
||
|
r(&vec![0xD83403E8, 0x00000900, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.lds.read(1000), 69);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn test_ds_store_half() {
|
||
|
let mut thread = _helper_test_thread();
|
||
|
thread.vec_reg[9].mut_lo16(f16::from_f32(1.2).to_bits());
|
||
|
thread.vec_reg[9].mut_hi16(f16::from_f32(4.3).to_bits());
|
||
|
thread.vec_reg[0] = 0;
|
||
|
thread.vec_reg[1] = 2;
|
||
|
r(&vec![0xDA840000, 0x00000900, 0xD87C0000, 0x00000901, END_PRG], &mut thread);
|
||
|
assert_eq!(thread.lds.read(0) as u16, f16::from_f32(4.3).to_bits());
|
||
|
assert_eq!(thread.lds.read(2) as u16, f16::from_f32(1.2).to_bits());
|
||
|
}
|
||
|
}
|
||
|
#[allow(dead_code)]
|
||
|
fn r(prg: &Vec<u32>, thread: &mut Thread) {
|
||
|
let mut pc = 0;
|
||
|
let instructions = prg.to_vec();
|
||
|
thread.pc_offset = 0;
|
||
|
if thread.exec.value == 0 {
|
||
|
thread.exec.value = u32::MAX;
|
||
|
}
|
||
|
|
||
|
loop {
|
||
|
if instructions[pc] == END_PRG {
|
||
|
break;
|
||
|
}
|
||
|
if instructions[pc] == 0xbfb60003 || instructions[pc] >> 20 == 0xbf8 {
|
||
|
pc += 1;
|
||
|
continue;
|
||
|
}
|
||
|
thread.pc_offset = 0;
|
||
|
thread.stream = instructions[pc..instructions.len()].to_vec();
|
||
|
thread.interpret().unwrap();
|
||
|
thread.simm = None;
|
||
|
if thread.vcc.mutations.is_some() {
|
||
|
thread.vcc.apply_muts();
|
||
|
thread.vcc.mutations = None;
|
||
|
}
|
||
|
if thread.exec.mutations.is_some() {
|
||
|
thread.exec.apply_muts();
|
||
|
thread.exec.mutations = None;
|
||
|
}
|
||
|
if let Some((idx, mut wv)) = thread.sgpr_co {
|
||
|
wv.apply_muts();
|
||
|
thread.scalar_reg[*idx] = wv.value;
|
||
|
}
|
||
|
if *DEBUG {
|
||
|
println!()
|
||
|
}
|
||
|
pc = ((pc as isize) + 1 + (thread.pc_offset as isize)) as usize;
|
||
|
}
|
||
|
}
|
||
|
fn _helper_test_thread() -> Thread<'static> {
|
||
|
let static_lds: &'static mut VecDataStore = Box::leak(Box::new(VecDataStore::new()));
|
||
|
let static_sgpr: &'static mut [u32; SGPR_COUNT] = Box::leak(Box::new([0; SGPR_COUNT]));
|
||
|
let static_vgpr: &'static mut VGPR = Box::leak(Box::new(VGPR::new()));
|
||
|
let static_scc: &'static mut u32 = Box::leak(Box::new(0));
|
||
|
let static_exec: &'static mut WaveValue = Box::leak(Box::new(WaveValue::new(u32::MAX, 32)));
|
||
|
let static_vcc: &'static mut WaveValue = Box::leak(Box::new(WaveValue::new(0, 32)));
|
||
|
let static_sds: &'static mut VecDataStore = Box::leak(Box::new(VecDataStore::new()));
|
||
|
let static_co: &'static mut Option<(usize, WaveValue)> = Box::leak(Box::new(None));
|
||
|
|
||
|
let thread = Thread {
|
||
|
scalar_reg: static_sgpr,
|
||
|
vec_reg: static_vgpr,
|
||
|
scc: static_scc,
|
||
|
vcc: static_vcc,
|
||
|
exec: static_exec,
|
||
|
lds: static_lds,
|
||
|
sds: static_sds,
|
||
|
simm: None,
|
||
|
pc_offset: 0,
|
||
|
stream: vec![],
|
||
|
sgpr_co: static_co,
|
||
|
warp_size: 32,
|
||
|
scalar: false,
|
||
|
};
|
||
|
thread.vec_reg.default_lane = Some(0);
|
||
|
thread.vcc.default_lane = Some(0);
|
||
|
thread.exec.default_lane = Some(0);
|
||
|
return thread;
|
||
|
}
|