8000 [RV64_DYNAREC] Added A5 REP MOVSD opcode by ksco · Pull Request #622 · ptitSeb/box64 · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

[RV64_DYNAREC] Added A5 REP MOVSD opcode #622

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 34 additions & 3 deletions src/dynarec/rv64/dynarec_rv64_00.c
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,37 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
ZEROUP(xRDX);
}
break;
case 0xA5:
if(rep) {
INST_NAME("REP MOVSD");
CBZ_NEXT(xRCX);
ANDI(x1, xFlags, 1<<F_DF);
BNEZ_MARK2(x1);
MARK; // Part with DF==0
LDxw(x1, xRSI, 0);
ADDI(xRSI, xRSI, rex.w?8:4);
SDxw(x1, xRDI, 0);
ADDI(xRDI, xRDI, rex.w?8:4);
SUBI(xRCX, xRCX, 1);
BNEZ_MARK(xRCX);
B_NEXT_nocond;
MARK2; // Part with DF==1
LDxw(x1, xRSI, 0);
SUBI(xRSI, xRSI, rex.w?8:4);
SDxw(x1, xRDI, 0);
SUBI(xRDI, xRDI, rex.w?8:4);
SUBI(xRCX, xRCX, 1);
BNEZ_MARK2(xRCX);
// done
} else {
INST_NAME("MOVSD");
GETDIR(x3, x1, rex.w?8:4);
LDxw(x1, xRSI, 0);
SDxw(x1, xRDI, 0);
ADD(xRSI, xRSI, x3);
ADD(xRDI, xRDI, x3);
}
break;
case 0xA8:
INST_NAME("TEST AL, Ib");
SETFLAGS(X_ALL, SF_SET_PENDING);
Expand All @@ -689,13 +720,13 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
MARK; // Part with DF==0
SDxw(xRAX, xRDI, 0);
ADDI(xRDI, xRDI, rex.w?8:4);
ADDI(xRCX, xRCX, -1);
SUBI(xRCX, xRCX, 1);
BNEZ_MARK(xRCX);
B_NEXT_nocond;
MARK2; // Part with DF==1
SDxw(xRAX, xRDI, 0);
ADDI(xRDI, xRDI, rex.w?-8:-4);
ADDI(xRCX, xRCX, -1);
SUBI(xRDI, xRDI, rex.w?8:4);
SUBI(xRCX, xRCX, 1);
BNEZ_MARK2(xRCX);
// done
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/dynarec/rv64/dynarec_rv64_d9.c
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
INST_NAME("FDECSTP");
fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
LW(x2, xEmu, offsetof(x64emu_t, top));
ADDI(x2, x2, -1);
SUBI(x2, x2, 1);
ANDI(x2, x2, 7);
SW(x2, xEmu, offsetof(x64emu_t, top));
break;
Expand Down
2 changes: 1 addition & 1 deletion src/dynarec/rv64/dynarec_rv64_emit_math.c
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
{
// special case when doing math on RSP and only PEND is needed: ignoring it!
if (c > -2048 && c <= 2048) {
ADDI(s1, s1, -c);
SUBI(s1, s1, c);
} else {
MOV64xw(s2, c);
SUBxw(s1, s1, s2);
Expand Down
18 changes: 9 additions & 9 deletions src/dynarec/rv64/dynarec_rv64_helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int sav
}
fpu_pushcache(dyn, ninst, reg, 0);
if(ret!=-2) {
ADDI(xSP, xSP, -16); // RV64 stack needs to be 16byte aligned
SUBI(xSP, xSP, 16); // RV64 stack needs to be 16byte aligned
SD(xEmu, xSP, 0);
SD(savereg, xSP, 8);
// x5..x8, x10..x17, x28..x31 those needs to be saved by caller
Expand Down Expand Up @@ -447,7 +447,7 @@ void call_n(dynarec_rv64_t* dyn, int ninst, void* fnc, int w)
fpu_pushcache(dyn, ninst, x3, 1);
// x5..x8, x10..x17, x28..x31 those needs to be saved by caller
// RDI, RSI, RDX, RCX, R8, R9 are used for function call
ADDI(xSP, xSP, -16);
SUBI(xSP, xSP, 16);
SD(xEmu, xSP, 0);
SD(xRIP, xSP, 8); // ARM64 stack needs to be 16byte aligned
STORE_REG(R12);
Expand Down Expand Up @@ -560,7 +560,7 @@ void x87_stackcount(dynarec_rv64_t* dyn, int ninst, int scratch)
SW(scratch, xEmu, offsetof(x64emu_t, fpu_stack));
// Sub x87stack to top, with and 7
LW(scratc 8000 h, xEmu, offsetof(x64emu_t, top));
ADDI(scratch, scratch, -a);
SUBI(scratch, scratch, a);
ANDI(scratch, scratch, 7);
SW(scratch, xEmu, offsetof(x64emu_t, top));
// reset x87stack, but not the stack count of extcache
Expand Down Expand Up @@ -674,7 +674,7 @@ void x87_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, in
// new tag to fulls
ADDI(s3, xZR, 0);
for (int i=0; i<a; ++i) {
ADDI(s2, s2, -1);
SUBI(s2, s2, 1);
ANDI(s2, s2, 7); // (emu->top + st)&7
SLLI(s1, s2, 2);
ADD(s1, xEmu, s1);
Expand Down Expand Up @@ -970,7 +970,7 @@ int x87_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
SLLI(s1, s1, 1);
ADDI(s2, xZR, 3);
BGE(s1, s2, 4+8);
ADDI(s1, s1, -4);
SUBI(s1, s1, 4);
XORI(s3, s1, 0b11);
// transform done (is there a faster way?)
FSRM(s3); // exange RM with current
Expand All @@ -991,7 +991,7 @@ int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
SLLI(s1, s1, 1);
ADDI(s2, xZR, 3);
BGE(s1, s2, 4+8);
ADDI(s1, s1, -4);
SUBI(s1, s1, 4);
XORI(s3, s1, 0b11);
// transform done (is there a faster way?)
FSRM(s3); // exange RM with current
Expand Down Expand Up @@ -1213,7 +1213,7 @@ void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07)
++n;
if(n) {
MESSAGE(LOG_DUMP, "\tPush x87/MMX Cache (%d)------\n", n);
ADDI(xSP, xSP, -8*((n+1)&~1));
SUBI(xSP, xSP, 8*((n+1)&~1));
int p = 0;
for(int i=17; i<24; ++i)
if(dyn->e.extcache[i].v!=0) {
Expand Down Expand Up @@ -1520,7 +1520,7 @@ static void fpuCacheTransform(dynarec_rv64_t* dyn, int ninst, int s1, int s2, in
ADDI(s1, xEmu, offsetof(x64emu_t, p_regs));
SLLI(s3, s3, 2);
for (int i=0; i<a; ++i) {
ADDI(s3, s3, -1<<2);
SUBI(s3, s3, 1<<2);
ANDI(s3, s3, 7<<2);
ADD(s3, s1, s3);
SW(s2, s3, 0); // that slot is full
Expand Down Expand Up @@ -1788,4 +1788,4 @@ void fpu_propagate_stack(dynarec_rv64_t* dyn, int ninst)
dyn->e.news = 0;
dyn->e.stack_push = 0;
dyn->e.swapped = 0;
}
}
0