From 52cb4bd82e5f452f46ea6e9e89541eb2c5c749a0 Mon Sep 17 00:00:00 2001 From: zhaixiang Date: Sat, 7 Dec 2024 10:21:02 +0800 Subject: [PATCH 1/2] [LA64_DYNAREC] Added AES opcodes --- src/dynarec/la64/dynarec_la64_660f.c | 54 ++++++++++++++++++++++++++ src/dynarec/la64/dynarec_la64_helper.h | 40 +++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index 4c42c32719..08e43b60a4 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -316,6 +316,60 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(q1, 0, 0); VSIGNCOV_W(q0, q1, q0); break; + case 0xDB: + INST_NAME("AESIMC Gx, Ex"); // AES-NI + nextop = F8; + GETGX_(); + GETEX_(x2, 0, 8); + SSE_LOOP_MV_Q(x3); + sse_forget_reg(dyn, ninst, gd); + MOV32w(x1, gd); + CALL(native_aesimc, -1); + break; + case 0xDC: + INST_NAME("AESENC Gx, Ex"); // AES-NI + nextop = F8; + GETG; + sse_forget_reg(dyn, ninst, gd); + MOV32w(x1, gd); + CALL(native_aese, -1); + GETGX_(); + GETEX_(x2, 0, 8); + SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); + break; + case 0xDD: + INST_NAME("AESENCLAST Gx, Ex"); // AES-NI + nextop = F8; + GETG; + sse_forget_reg(dyn, ninst, gd); + MOV32w(x1, gd); + CALL(native_aeselast, -1); + GETGX_(); + GETEX_(x2, 0, 8); + SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); + break; + case 0xDE: + INST_NAME("AESDEC Gx, Ex"); // AES-NI + nextop = F8; + GETG; + sse_forget_reg(dyn, ninst, gd); + MOV32w(x1, gd); + CALL(native_aesd, -1); + GETGX_(); + GETEX_(x2, 0, 8); + SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); + break; + case 0xDF: + INST_NAME("AESDECLAST Gx, Ex"); // AES-NI + nextop = F8; + GETG; + sse_forget_reg(dyn, ninst, gd); + MOV32w(x1, gd); + CALL(native_aesdlast, -1); + GETGX_(); + GETEX_(x2, 0, 8); + SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); + break; default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index cbfdc4ef52..242fe78044 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -324,6 +324,13 @@ gd = ((nextop & 0x38) >> 3) + (rex.r << 3); \ a = sse_get_reg_empty(dyn, ninst, x1, gd) +// Will get pointer to GX in general register a, will purge SS or SD if loaded. can use gback as load address +#define GETGX_() \ + gd = ((nextop & 0x38) >> 3) + (rex.r << 3); \ + sse_forget_reg(dyn, ninst, gd); \ + gback = xEmu; \ + gdoffset = offsetof(x64emu_t, xmm[gd]) + // Get EX as a quad, (x1 is used) #define GETEX(a, w, D) \ if (MODREG) { \ @@ -335,6 +342,19 @@ VLD(a, ed, fixedaddress); \ } +// Get Ex address in general register a, will purge SS or SD if it's reg and is loaded. May use x3. Use wback as load address! +#define GETEX_(a, D, I12) \ + if (MODREG) { \ + ed = (nextop & 7) + (rex.b << 3); \ + sse_forget_reg(dyn, ninst, ed); \ + fixedaddress = offsetof(x64emu_t, xmm[ed]); \ + wback = xEmu; \ + } else { \ + SMREAD(); \ + ed = 16; \ + addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, I12, D); \ + } + // Put Back EX if it was a memory and not an emm register #define PUTEX(a) \ if (!MODREG) { \ @@ -388,6 +408,26 @@ BEQZ(s, 4 + 4); \ SUB_D(r, xZR, r); +#define SSE_LOOP_MV_Q_ITEM(s, i) \ + LD_D(s, wback, fixedaddress + i * 8); \ + ST_D(s, gback, gdoffset + i * 8); + +// Loop for SSE opcode that moves 64bits value from wback to gback, use s as scratch. +#define SSE_LOOP_MV_Q(s) \ + SSE_LOOP_MV_Q_ITEM(s, 0) \ + SSE_LOOP_MV_Q_ITEM(s, 1) + +#define SSE_LOOP_Q_ITEM(GX1, EX1, F, i) \ + LD_D(GX1, gback, gdoffset + i * 8); \ + LD_D(EX1, wback, fixedaddress + i * 8); \ + F; \ + ST_D(GX1, gback, gdoffset + i * 8); + +// Loop for SSE opcode that use 64bits value and write to GX. +#define SSE_LOOP_Q(GX1, EX1, F) \ + SSE_LOOP_Q_ITEM(GX1, EX1, F, 0) \ + SSE_LOOP_Q_ITEM(GX1, EX1, F, 1) + // CALL will use x6 for the call address. Return value can be put in ret (unless ret is -1) // R0 will not be pushed/popd if ret is -2 #define CALL(F, ret) call_c(dyn, ninst, F, x6, ret, 1, 0) From 481b40bef93662b8fdfd9e8ade28231492d48510 Mon Sep 17 00:00:00 2001 From: zhaixiang Date: Sat, 7 Dec 2024 15:49:27 +0800 Subject: [PATCH 2/2] [LA64_DYNAREC] Use LSX instruction instead --- src/dynarec/la64/dynarec_la64_660f.c | 52 ++++++++++++++++++-------- src/dynarec/la64/dynarec_la64_helper.h | 40 -------------------- 2 files changed, 37 insertions(+), 55 deletions(-) diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index 08e43b60a4..4daba8bf96 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -319,9 +319,11 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0xDB: INST_NAME("AESIMC Gx, Ex"); // AES-NI nextop = F8; - GETGX_(); - GETEX_(x2, 0, 8); - SSE_LOOP_MV_Q(x3); + GETEX(q1, 0, 0); + GETGX_empty(q0); + if (q0 != q1) { + VOR_V(q0, q1, q1); + } sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); CALL(native_aesimc, -1); @@ -330,45 +332,65 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("AESENC Gx, Ex"); // AES-NI nextop = F8; GETG; + GETEX(q1, 0, 0); + if (MODREG && (gd == (nextop & 7) + (rex.b << 3))) { + d0 = fpu_get_scratch(dyn); + VOR_V(d0, q1, q1); + } else + d0 = -1; sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); CALL(native_aese, -1); - GETGX_(); - GETEX_(x2, 0, 8); - SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); + GETGX(q0, 1); + VXOR_V(q0, q0, (d0 != -1) ? d0 : q1); break; case 0xDD: INST_NAME("AESENCLAST Gx, Ex"); // AES-NI nextop = F8; GETG; + GETEX(q1, 0, 0); + if (MODREG && (gd == (nextop & 7) + (rex.b << 3))) { + d0 = fpu_get_scratch(dyn); + VOR_V(d0, q1, q1); + } else + d0 = -1; sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); CALL(native_aeselast, -1); - GETGX_(); - GETEX_(x2, 0, 8); - SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); + GETGX(q0, 1); + VXOR_V(q0, q0, (d0 != -1) ? d0 : q1); break; case 0xDE: INST_NAME("AESDEC Gx, Ex"); // AES-NI nextop = F8; GETG; + GETEX(q1, 0, 0); + if (MODREG && (gd == (nextop & 7) + (rex.b << 3))) { + d0 = fpu_get_scratch(dyn); + VOR_V(d0, q1, q1); + } else + d0 = -1; sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); CALL(native_aesd, -1); - GETGX_(); - GETEX_(x2, 0, 8); - SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); + GETGX(q0, 1); + VXOR_V(q0, q0, (d0 != -1) ? d0 : q1); break; case 0xDF: INST_NAME("AESDECLAST Gx, Ex"); // AES-NI nextop = F8; GETG; + GETEX(q1, 0, 0); + if (MODREG && (gd == (nextop & 7) + (rex.b << 3))) { + d0 = fpu_get_scratch(dyn); + VOR_V(d0, q1, q1); + } else + d0 = -1; sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); CALL(native_aesdlast, -1); - GETGX_(); - GETEX_(x2, 0, 8); - SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); + GETGX(q0, 1); + VXOR_V(q0, q0, (d0 != -1) ? d0 : q1); break; default: DEFAULT; diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index 242fe78044..cbfdc4ef52 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -324,13 +324,6 @@ gd = ((nextop & 0x38) >> 3) + (rex.r << 3); \ a = sse_get_reg_empty(dyn, ninst, x1, gd) -// Will get pointer to GX in general register a, will purge SS or SD if loaded. can use gback as load address -#define GETGX_() \ - gd = ((nextop & 0x38) >> 3) + (rex.r << 3); \ - sse_forget_reg(dyn, ninst, gd); \ - gback = xEmu; \ - gdoffset = offsetof(x64emu_t, xmm[gd]) - // Get EX as a quad, (x1 is used) #define GETEX(a, w, D) \ if (MODREG) { \ @@ -342,19 +335,6 @@ VLD(a, ed, fixedaddress); \ } -// Get Ex address in general register a, will purge SS or SD if it's reg and is loaded. May use x3. Use wback as load address! -#define GETEX_(a, D, I12) \ - if (MODREG) { \ - ed = (nextop & 7) + (rex.b << 3); \ - sse_forget_reg(dyn, ninst, ed); \ - fixedaddress = offsetof(x64emu_t, xmm[ed]); \ - wback = xEmu; \ - } else { \ - SMREAD(); \ - ed = 16; \ - addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, I12, D); \ - } - // Put Back EX if it was a memory and not an emm register #define PUTEX(a) \ if (!MODREG) { \ @@ -408,26 +388,6 @@ BEQZ(s, 4 + 4); \ SUB_D(r, xZR, r); -#define SSE_LOOP_MV_Q_ITEM(s, i) \ - LD_D(s, wback, fixedaddress + i * 8); \ - ST_D(s, gback, gdoffset + i * 8); - -// Loop for SSE opcode that moves 64bits value from wback to gback, use s as scratch. -#define SSE_LOOP_MV_Q(s) \ - SSE_LOOP_MV_Q_ITEM(s, 0) \ - SSE_LOOP_MV_Q_ITEM(s, 1) - -#define SSE_LOOP_Q_ITEM(GX1, EX1, F, i) \ - LD_D(GX1, gback, gdoffset + i * 8); \ - LD_D(EX1, wback, fixedaddress + i * 8); \ - F; \ - ST_D(GX1, gback, gdoffset + i * 8); - -// Loop for SSE opcode that use 64bits value and write to GX. -#define SSE_LOOP_Q(GX1, EX1, F) \ - SSE_LOOP_Q_ITEM(GX1, EX1, F, 0) \ - SSE_LOOP_Q_ITEM(GX1, EX1, F, 1) - // CALL will use x6 for the call address. Return value can be put in ret (unless ret is -1) // R0 will not be pushed/popd if ret is -2 #define CALL(F, ret) call_c(dyn, ninst, F, x6, ret, 1, 0)