8000 [LA64_DYNAREC] Added more 660F opcodes by xiangzhai · Pull Request #2127 · ptitSeb/box64 · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

[LA64_DYNAREC] Added more 660F opcodes #2127

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
8000 Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
241 changes: 240 additions & 1 deletion src/dynarec/la64/dynarec_la64_660f.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
uint8_t eb1, eb2;
int64_t j64;
uint64_t tmp64u, tmp64u2;
int v0, v1;
int v0, v1, v2;
int q0, q1;
int d0, d1, d2;
int64_t fixedaddress, gdoffset;
Expand Down Expand Up @@ -316,6 +316,79 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
GETEX(q1, 0, 0);
VSIGNCOV_W(q0, q1, q0);
break;
case 0x0B:
INST_NAME("PMULHRSW Gx,Ex");
nextop = F8;
GETGX(q0, 1);
GETEX(q1, 0, 0);
v0 = fpu_get_scratch(dyn);
v1 = fpu_get_scratch(dyn);
VEXT2XV_W_H(v0, q0);
VEXT2XV_W_H(v1, q1);
XVMUL_W(v0, v0, v1);
XVSRLI_W(v0, v0, 14);
XVADDI_WU(v0, v0, 1);
XVSRLNI_H_W(v0, v0, 1);
XVPERMI_D(q0, v0, 0b1000);
break;
case 0x1C:
INST_NAME("PABSB Gx,Ex");
nextop = F8;
GETEX(q1, 0, 0);
GETGX_empty(q0);
v0 = fpu_get_scratch(dyn);
VXOR_V(v0, v0, v0);
VABSD_B(q0, q1, v0);
break;
case 0x1D:
INST_NAME("PABSW Gx,Ex");
nextop = F8;
GETEX(q1, 0, 0);
GETGX_empty(q0);
v0 = fpu_get_scratch(dyn);
VXOR_V(v0, v0, v0);
VABSD_H(q0, q1, v0);
break;
case 0x2B:
INST_NAME("PACKUSDW Gx, Ex"); // SSE4 opcode!
nextop = F8;
GETEX(q1, 0, 0);
GETGX(q0, 1);
v0 = fpu_get_scratch(dyn);
v1 = fpu_get_scratch(dyn);
VSLTI_W(v0, q0, 0);
VANDN_V(q0, v0, q0);
VSSRANI_HU_W(q0, q0, 0);
if (q0 == q1) {
VEXTRINS_D(q0, q0, VEXTRINS_IMM_4_0(1, 0));
} else {
VSLTI_W(v1, q1, 0);
VANDN_V(v1, v1, q1);
VSSRANI_HU_W(v1, v1, 0);
VEXTRINS_D(q0, v1, VEXTRINS_IMM_4_0(1, 0));
}
break;
case 0x3A:
INST_NAME("PMINUW Gx, Ex"); // SSE4 opcode!
nextop = F8;
GETEX(q1, 0, 0);
GETGX(q0, 1);
VMIN_HU(q0, q0, q1);
break;
case 0x3D:
INST_NAME("PMAXSD Gx, Ex"); // SSE4 opcode!
nextop = F8;
GETEX(q1, 0, 0);
GETGX(q0, 1);
VMAX_W(q0, q0, q1);
break;
case 0x40:
INST_NAME("PMULLD Gx, Ex"); 10000 // SSE4 opcode!
nextop = F8;
GETEX(q1, 0, 0);
GETGX(q0, 1);
VMUL_W(q0, q0, q1);
break;
case 0xDB:
INST_NAME("AESIMC Gx, Ex"); // AES-NI
nextop = F8;
Expand Down Expand Up @@ -418,6 +491,63 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
VOR_V(q0, q0, d0);
}
break;
case 0x0E:
INST_NAME("PBLENDW Gx, Ex, Ib");
nextop = F8;
GETGX(q0, 1);
GETEX(q1, 0, 1);
u8 = F8;
i32 = 0;
if (q0 != q1) {
if (u8 == 0xff) {
VAND_V(q0, q1, q1);
} else {
/* 64bits */
if ((u8 & 0xf) == 0xf) {
VEXTRINS_D(q0, q1, VEXTRINS_IMM_4_0(0, 0));
u8 &= ~0xf;
}
if ((u8 & 0xf0) == 0xf0) {
VEXTRINS_D(q0, q1, VEXTRINS_IMM_4_0(1, 1));
u8 &= ~0xf0;
}
/* 32bits */
if ((u8 & 0x3) == 0x3) {
VEXTRINS_W(q0, q1, VEXTRINS_IMM_4_0(0, 0));
u8 &= ~0x3;
}
if ((u8 & 0xc) == 0xc) {
VEXTRINS_W(q0, q1, VEXTRINS_IMM_4_0(1, 1));
u8 &= ~0xc;
}
if ((u8 & 0x30) == 0x30) {
VEXTRINS_W(q0, q1, VEXTRINS_IMM_4_0(2, 2));
u8 &= ~0x30;
}
if ((u8 & 0xc0) == 0xc0) {
VEXTRINS_W(q0, q1, VEXTRINS_IMM_4_0(3, 3));
u8 &= ~0xc0;
}
/* 16bits */
if (u8 & 0x1)
VEXTRINS_H(q0, q1, VEXTRINS_IMM_4_0(0, 0));
if (u8 & 0x2)
VEXTRINS_H(q0, q1, VEXTRINS_IMM_4_0(1, 1));
if (u8 & 0x4)
VEXTRINS_H(q0, q1, VEXTRINS_IMM_4_0(2, 2));
if (u8 & 0x8)
VEXTRINS_H(q0, q1, VEXTRINS_IMM_4_0(3, 3));
if (u8 & 0x10)
VEXTRINS_H(q0, q1, VEXTRINS_IMM_4_0(4, 4));
if (u8 & 0x20)
VEXTRINS_H(q0, q1, VEXTRINS_IMM_4_0(5, 5));
if (u8 & 0x40)
VEXTRINS_H(q0, q1, VEXTRINS_IMM_4_0(6, 6));
if (u8 & 0x80)
VEXTRINS_H(q0, q1, VEXTRINS_IMM_4_0(7, 7));
}
}
break;
case 0x16:
if (rex.w) {
INST_NAME("PEXTRQ Ed, Gx, Ib");
Expand Down Expand Up @@ -1166,6 +1296,19 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
u8 = F8;
VSHUF4I_D(v0, v1, 0x8 | (u8 & 1) | ((u8 & 2) << 1));
break;
case 0xD1:
INST_NAME("PSRLW Gx,Ex");
nextop = F8;
GETGX(q0, 1);
GETEX(q1, 0, 0);
v0 = fpu_get_scratch(dyn);
v1 = fpu_get_scratch(dyn);
VREPLVEI_D(v0, q1, 0);
VSLEI_DU(v1, v0, 15);
VREPLVEI_H(v0, q1, 0);
VSRL_H(q0, q0, v0);
VAND_V(q0, q0, v1);
break;
case 0xD2:
INST_NAME("PSRLD Gx, Ex");
nextop = F8;
Expand Down Expand Up @@ -1242,13 +1385,27 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
MOVFR2GR_D(x1, v0);
BSTRPICK_D(gd, x1, 15, 0);
break;
case 0xD8:
INST_NAME("PSUBUSB Gx, Ex");
nextop = F8;
GETGX(q0, 1);
GETEX(q1, 0, 0);
VSSUB_BU(q0, q0, q1);
break;
case 0xD9:
INST_NAME("PSUBUSW Gx, Ex");
nextop = F8;
GETGX(q0, 1);
GETEX(q1, 0, 0);
VSSUB_HU(q0, q0, q1);
break;
case 0xDA:
INST_NAME("PMINUB Gx, Ex");
nextop = F8;
GETGX(q0, 1);
GETEX(q1, 0, 0);
VMIN_BU(q0, q0, q1);
break;
case 0xDB:
INST_NAME("PAND Gx,Ex");
nextop = F8;
Expand All @@ -1263,6 +1420,20 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
GETEX(q1, 0, 0);
VSADD_BU(q0, q0, q1);
break;
case 0xDD:
INST_NAME("PADDUSW Gx,Ex");
nextop = F8;
GETGX(q0, 1);
GETEX(q1, 0, 0);
VSADD_HU(q0, q0, q1);
break;
case 0xDE:
INST_NAME("PMAXUB Gx, Ex");
nextop = F8;
GETGX(q0, 1);
GETEX(q1, 0, 0);
VMAX_BU(q0, q0, q1);
break;
case 0xDF:
INST_NAME("PANDN Gx,Ex");
nextop = F8;
Expand All @@ -1287,6 +1458,21 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
VREPLVEI_H(v0, v0, 0);
VSRA_H(q0, q0, v0);
break;
case 0xE2:
INST_NAME("PSRAD Gx,Ex");
nextop = F8;
GETGX(q0, 1);
GETEX(q1, 0, 0);
v0 = fpu_get_scratch(dyn);
v1 = fpu_get_scratch(dyn);
v2 = fpu_get_scratch(dyn);
VREPLVEI_D(v0, q1, 0);
VSLEI_DU(v1, v0, 31);
VREPLVEI_W(v0, q1, 0);
VSRAI_W(v2, q0, 31);
VSRA_W(q0, q0, v0);
VBITSEL_V(q0, v2, q0, v1);
break;
case 0xE3:
INST_NAME("PAVGW Gx,Ex");
nextop = F8;
Expand Down Expand Up @@ -1328,13 +1514,55 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
VST(v0, ed, fixedaddress);
}
break;
case 0xE8:
INST_NAME("PSUBSB Gx,Ex");
nextop = F8;
GETGX(v0, 1);
GETEX(q0, 0, 0);
VSSUB_B(v0, v0, q0);
break;
case 0xE9:
INST_NAME("PSUBSW Gx,Ex");
nextop = F8;
GETGX(v0, 1);
GETEX(q0, 0, 0);
VSSUB_H(v0, v0, q0);
break;
case 0xEA:
INST_NAME("PMINSW Gx,Ex");
nextop = F8;
GETGX(v0, 1);
GETEX(q0, 0, 0);
VMIN_H(v0, v0, q0);
break;
case 0xEB:
INST_NAME("POR Gx,Ex");
nextop = F8;
GETGX(v0, 1);
GETEX(q0, 0, 0);
VOR_V(v0, v0, q0);
break;
case 0xEC:
INST_NAME("PADDSB Gx,Ex");
nextop = F8;
GETGX(v0, 1);
GETEX(q0, 0, 0);
VSADD_B(v0, v0, q0);
break;
case 0xED:
INST_NAME("PADDSW Gx,Ex");
nextop = F8;
GETGX(v0, 1);
GETEX(q0, 0, 0);
VSADD_H(v0, v0, q0);
break;
case 0xEE:
INST_NAME("PMAXSW Gx,Ex");
nextop = F8;
GETGX(v0, 1);
GETEX(q0, 0, 0);
VMAX_H(v0, v0, q0);
break;
case 0xEF:
INST_NAME("PXOR Gx,Ex");
nextop = F8;
Expand All @@ -1356,6 +1584,17 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
GETEX(v1, 0, 0);
VMULWEV_D_WU(v0, v0, v1);
break;
case 0xF5:
INST_NAME("PMADDWD Gx, Ex");
nextop = F8;
GETGX(v0, 1);
GETEX(v1, 0, 0);
q0 = fpu_get_scratch(dyn);
q1 = fpu_get_scratch(dyn);
VMULWEV_W_H(q0, v0, v1);
VMULWOD_W_H(q1, v0, v1);
VADD_W(v0, q0, q1);
break;
case 0xF6:
INST_NAME("PSADBW Gx, Ex");
nextop = F8;
Expand Down
2 changes: 2 additions & 0 deletions src/dynarec/la64/dynarec_la64_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,8 @@
ed = i; \
}

#define VEXTRINS_IMM_4_0(n, m) ((n & 0xf) << 4 | (m & 0xf))

// Get GX as a quad (might use x1)
#define GETGX(a, w) \
gd = ((nextop & 0x38) >> 3) + (rex.r << 3); \
Expand Down
6 changes: 6 additions & 0 deletions src/dynarec/la64/la64_emitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -1287,6 +1287,7 @@ LSX instruction starts with V, LASX instruction starts with XV.
#define VBITSET_H(vd, vj, vk) EMIT(type_3R(0b01110001000011101, vk, vj, vd))
#define VBITSET_W(vd, vj, vk) EMIT(type_3R(0b01110001000011110, vk, vj, vd))
#define VBITSET_D(vd, vj, vk) EMIT(type_3R(0b01110001000011111, vk, vj, vd))
#define VBITSEL_V(vd, vj, vk, va) EMIT(type_4R(0b000011010001, va, vk, vj, vd))
#define VBITREV_B(vd, vj, vk) EMIT(type_3R(0b01110001000100000, vk, vj, vd))
#define VBITREV_H(vd, vj, vk) EMIT(type_3R(0b01110001000100001, vk, vj, vd))
#define VBITREV_W(vd, vj, vk) EMIT(type_3R(0b01110001000100010, vk, vj, vd))
Expand Down Expand Up @@ -1369,9 +1370,11 @@ LSX instruction starts with V, LASX instruction starts with XV.
#define VSLE_HU(vd, vj, vk) EMIT(type_3R(0b01110000000001001, vk, vj, vd))
#define VSLE_WU(vd, vj, vk) EMIT(type_3R(0b01110000000001010, vk, vj, vd))
#define VSLE_DU(vd, vj, vk) EMIT(type_3R(0b01110000000001011, vk, vj, vd))
#define VSLEI_DU(vd, vj, imm5) EMIT(type_2RI5(0b01110010100001011, imm5, vj, vd))
#define VSLT_B(vd, vj, vk) EMIT(type_3R(0b01110000000001100, vk, vj, vd))
#define VSLT_H(vd, vj, vk) EMIT(type_3R(0b01110000000001101, vk, vj, vd))
#define VSLT_W(vd, vj, vk) EMIT(type_3R(0b01110000000001110, vk, vj, vd))
#define VSLTI_W(vd, vj, imm5) EMIT(type_2RI5(0b01110010100001110, imm5, vj, vd))
#define VSLT_D(vd, vj, vk) EMIT(type_3R(0b01110000000001111, vk, vj, vd))
#define VSLT_BU(vd, vj, vk) EMIT(type_3R(0b01110000000010000, vk, vj, vd))
#define VSLT_HU(vd, vj, vk) EMIT(type_3R(0b01110000000010001, vk, vj, vd))
Expand Down Expand Up @@ -1818,6 +1821,9 @@ LSX instruction starts with V, LASX instruction starts with XV.
#define VEXT2XV_WU_HU(vd, vj) EMIT(type_2R(0b0111011010011111001101, vj, vd))
#define VEXT2XV_DU_HU(vd, vj) EMIT(type_2R(0b0111011010011111001110, vj, vd))
#define VEXT2XV_DU_WU(vd, vj) EMIT(type_2R(0b0111011010011111001111, vj, vd))
#define XVADDI_WU(vd, vj, imm5) EMIT(type_2RI5(0b01110110100010110, imm5, vj, vd))
#define XVSRLNI_H_W(vd, vj, imm5) EMIT(type_2RI5(0b01110111010000001, imm5, vj, vd))
#define XVSRLI_W(vd, vj, imm5) EMIT(type_2RI5(0b01110111001100001, imm5, vj, vd))

////////////////////////////////////////////////////////////////////////////////
// (undocumented) LBT extension instructions
Expand Down
Loading
0