From 3985d22bcc7575eedd82e2983a5e5c54f527b035 Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Fri, 24 May 2024 03:59:14 +0800 Subject: [PATCH] [LA64_DYNAREC] Added more opcodes --- src/dynarec/la64/dynarec_la64_00.c | 54 ++++- src/dynarec/la64/dynarec_la64_0f.c | 31 +++ src/dynarec/la64/dynarec_la64_66.c | 37 +++ src/dynarec/la64/dynarec_la64_660f.c | 45 ++++ src/dynarec/la64/dynarec_la64_emit_math.c | 281 +++++++++++++++++++++- src/dynarec/la64/dynarec_la64_f0.c | 44 ++++ src/dynarec/la64/dynarec_la64_f20f.c | 2 + src/dynarec/la64/dynarec_la64_f30f.c | 1 + src/dynarec/la64/dynarec_la64_helper.h | 8 + src/dynarec/la64/la64_emitter.h | 3 + 10 files changed, 498 insertions(+), 8 deletions(-) diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index 4ed4e271ee..586d23920a 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -167,6 +167,16 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni DEFAULT; } break; + case 0x18: + INST_NAME("SBB Eb, Gb"); + READFLAGS(X_CF); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETEB(x1, 0); + GETGB(x2); + emit_sbb8(dyn, ninst, x1, x2, x4, x5, x6); + EBBACK(); + break; case 0x19: INST_NAME("SBB Ed, Gd"); READFLAGS(X_CF); @@ -321,6 +331,14 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETED(0); emit_xor32(dyn, ninst, rex, gd, ed, x3, x4); break; + case 0x34: + INST_NAME("XOR AL, Ib"); + SETFLAGS(X_ALL, SF_SET_PENDING); + u8 = F8; + ANDI(x1, xRAX, 0xff); + emit_xor8c(dyn, ninst, x1, u8, x3, x4); + BSTRINS_D(xRAX, x1, 7, 0); + break; case 0x35: INST_NAME("XOR EAX, Id"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -2052,6 +2070,35 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni DEFAULT; } break; + case 0xFC: + INST_NAME("CLD"); + BSTRINS_D(xFlags, xZR, F_DF, F_DF); + break; + case 0xFD: + INST_NAME("STD"); + ORI(xFlags, xFlags, 1 << F_DF); + break; + case 0xFE: + nextop = F8; + switch ((nextop >> 3) & 7) { + case 0: + INST_NAME("INC Eb"); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING); + GETEB(x1, 0); + emit_inc8(dyn, ninst, ed, x2, x4, x5); + EBBACK(); + break; + case 1: + INST_NAME("DEC Eb"); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING); + GETEB(x1, 0); + emit_dec8(dyn, ninst, ed, x2, x4, x5); + EBBACK(); + break; + default: + DEFAULT; + } + break; case 0xFF: nextop = F8; switch ((nextop >> 3) & 7) { @@ -2071,12 +2118,9 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 2: INST_NAME("CALL Ed"); - PASS2IF((box64_dynarec_safeflags > 1) || ((ninst && dyn->insts[ninst - 1].x64.set_flags) || ((ninst > 1) && dyn->insts[ninst - 2].x64.set_flags)), 1) - { + PASS2IF ((box64_dynarec_safeflags > 1) || ((ninst && dyn->insts[ninst - 1].x64.set_flags) || ((ninst > 1) && dyn->insts[ninst - 2].x64.set_flags)), 1) { READFLAGS(X_PEND); // that's suspicious - } - else - { + } else { SETFLAGS(X_ALL, SF_SET); // Hack to put flag in "don't care" state } GETEDz(0); diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index 87c51c8165..6d056fa249 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -417,6 +417,13 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETGX(q1, 1); VFCVTL_D_S(q1, q0); break; + case 0x5B: + INST_NAME("CVTDQ2PS Gx, Ex"); + nextop = F8; + GETEX(q0, 0, 0); + GETGX_empty(q1); + VFFINT_S_W(q1, q0); + break; case 0x5C: INST_NAME("SUBPS Gx, Ex"); nextop = F8; @@ -424,6 +431,13 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETGX(v0, 1); VFSUB_S(v0, v0, q0); break; + case 0x5E: + INST_NAME("DIVPS Gx, Ex"); + nextop = F8; + GETEX(q0, 0, 0); + GETGX(v0, 1); + VFDIV_S(v0, v0, q0); + break; #define GO(GETFLAGS, NO, YES, F, I) \ if (box64_dynarec_test == 2) { NOTEST(x1); } \ @@ -791,6 +805,23 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } if (!rex.w) ZEROUP(gd); break; + case 0xC2: + INST_NAME("CMPPS Gx, Ex, Ib"); + nextop = F8; + GETGX(v0, 1); + GETEX(v1, 0, 1); + u8 = F8; + switch (u8 & 7) { + case 0: VFCMP_D(v0, v0, v1, cEQ); break; // Equal + case 1: VFCMP_D(v0, v0, v1, cLT); break; // Less than + case 2: VFCMP_D(v0, v0, v1, cLE); break; // Less or equal + case 3: VFCMP_D(v0, v0, v1, cUN); break; // NaN + case 4: VFCMP_D(v0, v0, v1, cUNE); break; // Not Equal or unordered + case 5: VFCMP_D(v0, v1, v0, cULE); break; // Greater or equal or unordered + case 6: VFCMP_D(v0, v1, v0, cULT); break; // Greater or unordered, test inverted, N!=V so unordered or less than (inverted) + case 7: VFCMP_D(v0, v0, v1, cOR); break; // not NaN + } + break; case 0xC6: INST_NAME("SHUFPS Gx, Ex, Ib"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_66.c b/src/dynarec/la64/dynarec_la64_66.c index 88d03ddc86..8e7764d03f 100644 --- a/src/dynarec/la64/dynarec_la64_66.c +++ b/src/dynarec/la64/dynarec_la64_66.c @@ -324,6 +324,22 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SMWRITELOCK(lock); } break; + case 0x8B: + INST_NAME("MOV Gw, Ew"); + nextop = F8; + GETGD; + if (MODREG) { + ed = TO_LA64((nextop & 7) + (rex.b << 3)); + if (ed != gd) { + BSTRINS_D(gd, ed, 15, 0); + } + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0); + SMREADLOCK(lock); + LD_HU(x1, ed, fixedaddress); + BSTRINS_D(gd, x1, 15, 0); + } + break; case 0x90: case 0x91: case 0x92: @@ -528,6 +544,27 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni DEFAULT; } break; + case 0xFF: + nextop = F8; + switch ((nextop >> 3) & 7) { + case 0: + INST_NAME("INC Ew"); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING); + GETEW(x1, 0); + emit_inc16(dyn, ninst, x1, x2, x4, x5); + EWBACK; + break; + case 1: + INST_NAME("DEC Ew"); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING); + GETEW(x1, 0); + emit_dec16(dyn, ninst, x1, x2, x4, x5, x6); + EWBACK; + break; + default: + DEFAULT; + } + break; default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index aea7b81528..c9057c1ece 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -51,6 +51,20 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int MAYUSE(j64); switch (opcode) { + case 0x12: + INST_NAME("MOVLPD Gx, Eq"); + nextop = F8; + GETGX(v0, 1); + if (MODREG) { + DEFAULT; + return addr; + } + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0); + v1 = fpu_get_scratch(dyn); + FLD_D(v1, wback, fixedaddress); + VEXTRINS_D(v0, v1, 0); + break; case 0x14: INST_NAME("UNPCKLPD Gx, Ex"); nextop = F8; @@ -86,6 +100,21 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int VLD(v0, ed, fixedaddress); } break; + case 0x29: + INST_NAME("MOVAPD Ex,Gx"); + nextop = F8; + GETG; + v0 = sse_get_reg(dyn, ninst, x1, gd, 0); + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + v1 = sse_get_reg_empty(dyn, ninst, x1, ed); + VOR_V(v1, v0, v0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); + VST(v0, ed, fixedaddress); + SMWRITE2(); + } + break; case 0x2E: // no special check... case 0x2F: @@ -283,6 +312,14 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int VXOR_V(q0, q0, q1); } break; + case 0x58: + INST_NAME("ADDPD Gx, Ex"); + nextop = F8; + GETEX(q0, 0, 0); + GETGX(q1, 1); + // TODO: fastnan handling + VFADD_D(q1, q1, q0); + break; case 0x5A: INST_NAME("CVTPD2PS Gx, Ex"); nextop = F8; @@ -294,6 +331,14 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int VXOR_V(v0, v0, v0); VEXTRINS_D(v0, q0, 0); break; + case 0x5C: + INST_NAME("SUBPD Gx, Ex"); + nextop = F8; + GETEX(q0, 0, 0); + GETGX(q1, 1); + // TODO: fastnan handling + VFSUB_D(q1, q1, q0); + break; case 0x60: INST_NAME("PUNPCKLBW Gx,Ex"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_emit_math.c b/src/dynarec/la64/dynarec_la64_emit_math.c index cd94673e4b..ad2054673e 100644 --- a/src/dynarec/la64/dynarec_la64_emit_math.c +++ b/src/dynarec/la64/dynarec_la64_emit_math.c @@ -1127,6 +1127,141 @@ void emit_adc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } } +// emit INC8 instruction, from s1, store result in s1 using s2, s3 and s4 as scratch +void emit_inc8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) +{ + IFX (X_PEND) { + ST_B(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s3, d_inc8); + } else IFX (X_ALL) { + SET_DFNONE(); + } + IFXA (X_AF | X_OF, !la64_lbt) { + ORI(s3, s1, 1); // s3 = op1 | op2 + ANDI(s4, s1, 1); // s5 = op1 & op2 + } + + IFXA (X_ALL, la64_lbt) { + X64_INC_B(s1); + } + + ADDI_W(s1, s1, 1); + + IFX (X_PEND) { + ST_B(s1, xEmu, offsetof(x64emu_t, res)); + } + + if (la64_lbt) { + ANDI(s1, s1, 0xff); + return; + } + + IFX (X_ALL) { + // preserving CF + MOV64x(s4, (1UL << F_AF) | (1UL << F_OF) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF)); + ANDN(xFlags, xFlags, s4); + } + IFX (X_AF | X_OF) { + ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) + OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) + IFX (X_AF) { + ANDI(s2, s3, 0x08); // AF: cc & 0x08 + BEQZ(s2, 8); + ORI(xFlags, xFlags, 1 << F_AF); + } + IFX (X_OF) { + SRLI_D(s3, s3, 6); + SRLI_D(s2, s3, 1); + XOR(s3, s3, s2); + ANDI(s3, s3, 1); // OF: xor of two MSB's of cc + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_OF); + } + } + IFX (X_SF) { + ANDI(s2, s1, 0x80); + BEQZ(s2, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + ANDI(s1, s1, 0xff); + IFX (X_PF) { + emit_pf(dyn, ninst, s1, s3, s2); + } + IFX (X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } +} + +// emit INC16 instruction, from s1, store result in s1 using s3 and s4 as scratch +void emit_inc16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) +{ + IFX (X_PEND) { + ST_H(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s3, d_inc16); + } else IFX (X_ZF | X_OF | X_AF | X_SF | X_PF) { + SET_DFNONE(); + } + IFXA (X_AF | X_OF, !la64_lbt) { + ORI(s3, s1, 1); // s3 = op1 | op2 + ANDI(s4, s1, 1); // s4 = op1 & op2 + } + + IFXA (X_ALL, la64_lbt) { + X64_INC_H(s1); + } + + ADDI_D(s1, s1, 1); + + IFX (X_PEND) { + ST_H(s1, xEmu, offsetof(x64emu_t, res)); + } + + if (la64_lbt) { + BSTRPICK_D(s1, s1, 15, 0); + return; + } + + IFX (X_ALL) { + // preserving CF + MOV64x(s4, (1UL << F_AF) | (1UL << F_OF) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF)); + ANDN(xFlags, xFlags, s4); + } + + IFX (X_AF | X_OF) { + ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) + OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) + IFX (X_AF) { + ANDI(s4, s3, 0x08); // AF: cc & 0x08 + BEQZ(s4, 8); + ORI(xFlags, xFlags, 1 << F_AF); + } + IFX (X_OF) { + SRLI_D(s3, s3, 14); + SRLI_D(s4, s3, 1); + XOR(s3, s3, s4); + ANDI(s3, s3, 1); // OF: xor of two MSB's of cc + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_OF); + } + } + + BSTRPICK_D(s1, s1, 15, 0); + + IFX (X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX (X_SF) { + SRLI_D(s3, s1, 15); + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + IFX (X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + // emit INC32 instruction, from s1, store result in s1 using s3 and s4 as scratch void emit_inc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { @@ -1156,7 +1291,10 @@ void emit_inc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) return; + if (la64_lbt) { + if (!rex.w) ZEROUP(s1); + return; + } IFX (X_ALL) { // preserving CF @@ -1196,6 +1334,141 @@ void emit_inc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } } +// emit DEC8 instruction, from s1, store result in s1 using s2, s3 and s4 as scratch +void emit_dec8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) +{ + IFX (X_PEND) { + ST_B(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s3, d_dec8); + } else IFX (X_ALL) { + SET_DFNONE(); + } + IFXA (X_AF | X_OF, !la64_lbt) { + NOR(s4, xZR, s1); // s4 = ~op1 + ORI(s3, s4, 1); // s3 = ~op1 | op2 + ANDI(s4, s4, 1); // s4 = ~op1 & op2 + } + + IFXA (X_ALL, la64_lbt) { + X64_DEC_B(s1); + } + + ADDI_W(s1, s1, -1); + + IFX (X_PEND) { + ST_B(s1, xEmu, offsetof(x64emu_t, res)); + } + + if (la64_lbt) { + ANDI(s1, s1, 0xff); + return; + } + + IFX (X_ALL) { + // preserving CF + MOV64x(s4, (1UL << F_AF) | (1UL << F_OF) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF)); + ANDN(xFlags, xFlags, s4); + } + IFX (X_AF | X_OF) { + AND(s3, s1, s3); // s3 = res & (~op1 | op2) + OR(s3, s3, s4); // cc = (res & (~op1 | op2)) | (~op1 & op2) + IFX (X_AF) { + ANDI(s2, s3, 0x08); // AF: cc & 0x08 + BEQZ(s2, 8); + ORI(xFlags, xFlags, 1 << F_AF); + } + IFX (X_OF) { + SRLI_D(s3, s3, 6); + SRLI_D(s2, s3, 1); + XOR(s3, s3, s2); + ANDI(s3, s3, 1); // OF: xor of two MSB's of cc + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_OF); + } + } + IFX (X_SF) { + ANDI(s2, s1, 0x80); + BEQZ(s2, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + ANDI(s1, s1, 0xff); + IFX (X_PF) { + emit_pf(dyn, ninst, s1, s3, s2); + } + IFX (X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } +} + +// emit DEC16 instruction, from s1, store result in s1 using s3 and s4 as scratch +void emit_dec16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) +{ + IFX (X_PEND) { + ST_H(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s3, d_dec16); + } else IFX (X_ALL) { + SET_DFNONE(); + } + IFX (X_AF | X_OF) { + NOR(s5, xZR, s1); + ORI(s3, s5, 1); // s3 = ~op1 | op2 + ANDI(s5, s5, 1); // s5 = ~op1 & op2 + } + + IFXA (X_ALL, la64_lbt) { + X64_DEC_H(s1); + } + + ADDI_W(s1, s1, -1); + + IFX (X_PEND) { + ST_H(s1, xEmu, offsetof(x64emu_t, res)); + } + + if (la64_lbt) { + BSTRPICK_D(s1, s1, 15, 0); + return; + } + + IFX (X_ALL) { + // preserving CF + MOV64x(s4, (1UL << F_AF) | (1UL << F_OF) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF)); + ANDN(xFlags, xFlags, s4); + } + + IFX (X_AF | X_OF) { + AND(s3, s1, s3); // s3 = res & (~op1 | op2) + OR(s3, s3, s5); // cc = (res & (~op1 | op2)) | (~op1 & op2) + IFX (X_AF) { + ANDI(s2, s3, 0x08); // AF: cc & 0x08 + BEQZ(s2, 8); + ORI(xFlags, xFlags, 1 << F_AF); + } + IFX (X_OF) { + SRLI_D(s3, s3, 14); + SRLI_D(s2, s3, 1); + XOR(s3, s3, s2); + ANDI(s3, s3, 1); // OF: xor of two MSB's of cc + BEQZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_OF); + } + } + SLLI_W(s1, s1, 16); + IFX (X_SF) { + BGE(s1, xZR, 8); + ORI(xFlags, xFlags, 1 << F_SF); + } + SRLI_W(s1, s1, 16); + IFX (X_PF) { + emit_pf(dyn, ninst, s1, s3, s2); + } + IFX (X_ZF) { + BNEZ(s1, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } +} + // emit DEC32 instruction, from s1, store result in s1 using s3 and s4 as scratch void emit_dec32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { @@ -1219,14 +1492,16 @@ void emit_dec32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } } - ADDIxw(s1, s1, -1); IFX (X_PEND) { SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) return; + if (la64_lbt) { + if (!rex.w) ZEROUP(s1); + return; + } IFX (X_ALL) { // preserving CF diff --git a/src/dynarec/la64/dynarec_la64_f0.c b/src/dynarec/la64/dynarec_la64_f0.c index 550713c4c5..bfad81dfed 100644 --- a/src/dynarec/la64/dynarec_la64_f0.c +++ b/src/dynarec/la64/dynarec_la64_f0.c @@ -382,6 +382,50 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SMDMB(); } break; + case 0xFF: + nextop = F8; + + switch ((nextop >> 3) & 7) { + case 0: + INST_NAME("LOCK INC Ed"); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING); + SMDMB(); + if (MODREG) { + ed = TO_LA64((nextop & 7) + (rex.b << 3)); + emit_inc32(dyn, ninst, rex, ed, x3, x4, x5, x6); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); + MARKLOCK; + LLxw(x1, wback, 0); + ADDIxw(x4, x1, 1); + SCxw(x4, wback, 0); + BEQZ_MARKLOCK(x4); + IFX (X_ALL | X_PEND) + emit_inc32(dyn, ninst, rex, x1, x3, x4, x5, x6); + } + break; + case 1: + INST_NAME("LOCK DEC Ed"); + SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING); + SMDMB(); + if (MODREG) { + ed = xRAX + (nextop & 7) + (rex.b << 3); + emit_dec32(dyn, ninst, rex, ed, x3, x4, x5, x6); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); + MARKLOCK; + LLxw(x1, wback, 0); + ADDIxw(x4, x1, -1); + SCxw(x4, wback, 0); + BNEZ_MARKLOCK(x4); + IFX (X_ALL | X_PEND) + emit_dec32(dyn, ninst, rex, x1, x3, x4, x5, x6); + } + break; + default: + DEFAULT; + } + break; default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_f20f.c b/src/dynarec/la64/dynarec_la64_f20f.c index b6e060ea08..eb03439f9b 100644 --- a/src/dynarec/la64/dynarec_la64_f20f.c +++ b/src/dynarec/la64/dynarec_la64_f20f.c @@ -108,6 +108,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int } else { FTINTRZ_W_D(d1, q0); MOVFR2GR_S(gd, d1); + ZEROUP(gd); } if (!rex.w) ZEROUP(gd); if (!box64_dynarec_fastround) { @@ -138,6 +139,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int } else { FTINT_W_D(d1, q0); MOVFR2GR_S(gd, d1); + ZEROUP(gd); } x87_restoreround(dyn, ninst, u8); if (!box64_dynarec_fastround) { diff --git a/src/dynarec/la64/dynarec_la64_f30f.c b/src/dynarec/la64/dynarec_la64_f30f.c index c42310a7d7..5e1f094062 100644 --- a/src/dynarec/la64/dynarec_la64_f30f.c +++ b/src/dynarec/la64/dynarec_la64_f30f.c @@ -114,6 +114,7 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int } else { FTINTRZ_W_S(d1, d0); MOVFR2GR_S(gd, d1); + ZEROUP(gd); } if (!rex.w) ZEROUP(gd); if (!box64_dynarec_fastround) { diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index cb74bcdf47..fbc91e3492 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -764,7 +764,11 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define emit_sbb32 STEPNAME(emit_sbb32) #define emit_neg8 STEPNAME(emit_neg8) #define emit_neg32 STEPNAME(emit_neg32) +#define emit_inc8 STEPNAME(emit_inc8) +#define emit_inc16 STEPNAME(emit_inc16) #define emit_inc32 STEPNAME(emit_inc32) +#define emit_dec8 STEPNAME(emit_dec8) +#define emit_dec16 STEPNAME(emit_dec16) #define emit_dec32 STEPNAME(emit_dec32) #define emit_or32 STEPNAME(emit_or32) #define emit_or32c STEPNAME(emit_or32c) @@ -855,7 +859,11 @@ void emit_sbb16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, void emit_sbb32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); void emit_neg8(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4); void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3); +void emit_inc8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4); +void emit_inc16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4); void emit_inc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); +void emit_dec8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4); +void emit_dec16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); void emit_dec32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index d5388f2c47..6201eda8c2 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -1357,6 +1357,9 @@ LSX instruction starts with V, LASX instruction starts with XV. #define VLD(vd, rj, imm12) EMIT(type_2RI12(0b0010110000, imm12, rj, vd)) #define VST(vd, rj, imm12) EMIT(type_2RI12(0b0010110001, imm12, rj, vd)) +#define VFCMP_S(vd, vj, vk, cond) EMIT(type_4R(0b000011000101, cond, vk, vj, vd)) +#define VFCMP_D(vd, vj, vk, cond) EMIT(type_4R(0b000011000110, cond, vk, vj, vd)) + #define XVADD_B(vd, vj, vk) EMIT(type_3R(0b01110100000010100, vk, vj, vd)) #define XVADD_H(vd, vj, vk) EMIT(type_3R(0b01110100000010101, vk, vj, vd)) #define XVADD_W(vd, vj, vk) EMIT(type_3R(0b01110100000010110, vk, vj, vd))