Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[a64] Implement an ARM64 backend #2259

Draft
wants to merge 144 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
144 commits
Select commit Hold shift + click to select a range
1746177
[Build] Add Windows ARM64 support
Wunkolo Apr 27, 2024
a6d9113
[Base] Add Windows-ARM64 exception handling
Wunkolo Apr 28, 2024
1874f0c
[CPU] Add Windows ARM64 stack-walker
Wunkolo Apr 28, 2024
b48ec84
[ImGui] Stub ARM64 host debug text
Wunkolo Apr 28, 2024
f254848
[Base] Disable AVX check on ARM64
Wunkolo Apr 28, 2024
fe9c98e
[CPU] Disable x64 backend on ARM64
Wunkolo Apr 28, 2024
045441a
[Base] Add Windows-ARM64 `bit_count` implementation
Wunkolo Apr 28, 2024
f2b05ea
[CPU] Stub ARM64 to Null CPU backend
Wunkolo Apr 29, 2024
aa4a3e0
[UI] Fix divide-by-zero hazard
Wunkolo Apr 29, 2024
a0f6cd7
[Build] Link SDL2 to xenia-app
Wunkolo Jun 23, 2024
ffc966c
[CPU] Add ARM64 backend build target
Wunkolo Apr 29, 2024
59bc265
[a64] Integrate `oaknut` submodule
Wunkolo Apr 30, 2024
2284ed4
[Base] Add ARM64 utility functions
Wunkolo Apr 30, 2024
9960ef9
[CPU] Implement ARM64 CPU backend
Wunkolo May 1, 2024
39429aa
[a64] Fix `BYTE_SWAP_V128`
Wunkolo May 3, 2024
b9571cf
[a64] Implement `OPCODE_EXTRACT`
Wunkolo May 3, 2024
652b7a1
[a64] Implement `OPCODE_SPLAT`
Wunkolo May 3, 2024
10310d7
[a64] Implement `OPCODE_INSERT`
Wunkolo May 3, 2024
61feb6a
[a64] Implement `OPCODE_LOAD_VECTOR_SHL`
Wunkolo May 3, 2024
1b574be
[a64] Implement `OPCODE_LOAD_VECTOR_SHR`
Wunkolo May 3, 2024
72380bf
[a64] Implement `OPCODE_PACK`(D3DCOLOR)
Wunkolo May 3, 2024
6770682
[a64] Implement `OPCODE_VECTOR_SHA`
Wunkolo May 4, 2024
10cba8e
[a64] Implement `OPCODE_{SHR,SHA}`
Wunkolo May 4, 2024
defb68e
[a64] Fix StackLayout
Wunkolo May 4, 2024
124f684
[a64] Fix Guest-To-Host native calls
Wunkolo May 4, 2024
8aa4b93
[a64] Fix memory address generation
Wunkolo May 4, 2024
6a0e6a9
[a64] Fix indirect and external calls
Wunkolo May 4, 2024
3d345d7
[a64] Fix overwriting of return-value registers
Wunkolo May 4, 2024
07a4df8
[a64] Implement `OPCODE_VECTOR_SHL`
Wunkolo May 4, 2024
88ed113
[a64] Remove volatile storing of X0/Q0
Wunkolo May 4, 2024
7feea4c
[a64] Implement `OPCODE_VECTOR_{SHR,SHA}`
Wunkolo May 4, 2024
3ac5121
[a64] Implement `OPCODE_VECTOR_ROTATE_LEFT`
Wunkolo May 4, 2024
ebd1f84
[a64] Implement `OPCODE_VECTOR_MIN`
Wunkolo May 4, 2024
584c34c
[a64] Implement `OPCODE_VECTOR_MAX`
Wunkolo May 4, 2024
35e8a80
[a64] Implement `OPCODE_VECTOR_ADD`
Wunkolo May 4, 2024
e62f3f3
[a64] Fix native vector calls
Wunkolo May 4, 2024
3b2612b
[a64] Implement `OPCODE_PACK`(FLOAT16)
Wunkolo May 4, 2024
e5fd3d3
[a64] Implement `OPCODE_PACK`(SHORT)
Wunkolo May 4, 2024
8257740
[a64] Implement HIR Branch labeling
Wunkolo May 4, 2024
725ea3d
[a64] Implement control sequences
Wunkolo May 4, 2024
5b8ac36
[a64] Fix ResolveFunction thunk
Wunkolo May 4, 2024
65288d5
[a64] Fix resetting of labels during Emplace
Wunkolo May 4, 2024
dfa5bdb
[a64] Fix ResolveFunctionThunk call
Wunkolo May 4, 2024
a1741bf
[a64] Pad code cache with `0x00` bytes
Wunkolo May 5, 2024
9b70ea0
[a64] Draft Windows-ARM64 stack unwinding data
Wunkolo May 6, 2024
17987ca
[a64] Use `X4` for address-generation veneer
Wunkolo May 6, 2024
9ec4b68
[a64] Optimize Volatile/NonVolatile push/pop
Wunkolo May 6, 2024
c428d79
[a64] Refactor thunk prolog/epilog
Wunkolo May 6, 2024
6a5f461
[a64] Update Membase and Context register
Wunkolo May 6, 2024
5bff71f
[a64] Fix emitted function prolog/epilog
Wunkolo May 6, 2024
b5d55e1
[a64] Refactor XSP to SP
Wunkolo May 6, 2024
018e484
[a64] Implement `OPCODE_{LOAD,STORE}_MMIO`
Wunkolo May 6, 2024
8b4b713
[a64] Remove redundant zero-extension during address computation
Wunkolo May 6, 2024
2b3147b
[a64] Fix `CallIndirect` return address
Wunkolo May 6, 2024
4f5c640
[a64] Refactor `REV{32,64}` to `REV`
Wunkolo May 6, 2024
8836eb2
[a64] Implement `OPCODE_MEMSET`
Wunkolo May 6, 2024
8a1e343
[a64] Implement `OPCODE_MEMORY_BARRIER`
Wunkolo May 6, 2024
d656c5b
[a64] Implement `OPCODE_{LOAD,STORE}_LOCAL`
Wunkolo May 6, 2024
cf6c2c2
[a64] Implement `OPCODE_ATOMIC_EXCHANGE`
Wunkolo May 6, 2024
647d26c
[a64] Implement `OPCODE_ATOMIC_COMPARE_EXCHANGE`
Wunkolo May 6, 2024
52b2593
[a64] Fix `ComputeMemoryAddress{Offset}` register stomp
Wunkolo May 7, 2024
0f9769b
[a64] Refactor `REV{16,32}` to `REV`
Wunkolo May 7, 2024
49f9edb
[a64] Reorganize guest register allocation
Wunkolo May 7, 2024
906d0c6
[a64] Remove standard prolog/epilog from thunks
Wunkolo May 7, 2024
540344f
[a64] Fix `EmitGetCurrentThreadId` type
Wunkolo May 7, 2024
ba924fe
[a64] Fix immediates being too large
Wunkolo May 7, 2024
e4d3b2a
[a64] Increase function code size to 1MiB
Wunkolo May 7, 2024
c6a7270
[a64] Fix external function call arguments
Wunkolo May 7, 2024
b18f2ff
[a64] Fix up-casting zero/sign extensions
Wunkolo May 8, 2024
47665fd
[a64] Compute memory offsets as 32-bit registers
Wunkolo May 8, 2024
2d093ae
[a64] Use `offsetof` to reload membase
Wunkolo May 8, 2024
fd32c0e
[a64] Fix 32-bit store
Wunkolo May 8, 2024
dc6666d
[a64] Update guest calling conventions
Wunkolo May 8, 2024
6e83e2a
[a64] Fix instruction constant generation
Wunkolo May 9, 2024
fbc306f
[a64] Implement multi-arch capstone support
Wunkolo May 9, 2024
c495fe7
[PPC] Add a64 backend testing support
Wunkolo May 9, 2024
31b2ccd
[a64] Protect address-generation from imm-overflow
Wunkolo May 9, 2024
6f0ff9e
[a64] Preserve X0 when resolving functions
Wunkolo May 10, 2024
1bdc243
[a64] Fix ADDC carry-bit assignment
Wunkolo May 10, 2024
866ce97
[a64] Fix signed MUL_HI
Wunkolo May 10, 2024
50d7ad5
[a64] Fix non-const MUL_I32
Wunkolo May 10, 2024
b532ab5
[a64] Implement `PERMUTE_V128`(int8)
Wunkolo May 10, 2024
c4b2638
[a64] Implement `PERMUTE_I32`
Wunkolo May 10, 2024
f73c8fe
[a64] Implement `OPCODE_SWIZZLE`
Wunkolo May 10, 2024
046e8ed
[a64] Fix `SELECT` register usage
Wunkolo May 10, 2024
f5e14d6
[a64] Fix `SET_ROUNDING_MODE_I32` exception
Wunkolo May 10, 2024
737f2b5
[UI] Implement Arm64 host register info
Wunkolo May 10, 2024
3adb86c
[a64] Implement `OPCODE_VECTOR_SUB`
Wunkolo May 10, 2024
87cca91
[a64] Fix `PERMUTE_V128` out-of-index case
Wunkolo May 11, 2024
2e2f47f
[a64] Fix `AND_NOT_V128`
Wunkolo May 11, 2024
207e2c1
[a64] Implement `VECTOR_COMPARE_{EQ,UGT,UGE,SGT,SGE}_V128`
Wunkolo May 11, 2024
de040f0
[a64] Fix `OPCODE_SPLAT`
Wunkolo May 11, 2024
1ad0d7e
[a64] Fix `SELECT_V128_V128`
Wunkolo May 11, 2024
edfd2f2
[a64] Implement `OPCODE_VECTOR_AVERAGE`
Wunkolo May 11, 2024
6b4ff8b
[CPU] Fix multi-arch cpu-test support
Wunkolo May 11, 2024
42d41a5
[a64 Fix floating-point `BRANCH_FALSE`
Wunkolo May 11, 2024
be0c793
[a64] Refactor `OPCODE_ATOMIC_COMPARE_EXCHANGE`
Wunkolo May 11, 2024
28b629e
[a64] Fix `OPCODE_MAX`
Wunkolo May 12, 2024
41eeae1
[a64] Fix `MUL_HI_I32` operands
Wunkolo May 12, 2024
e2d141e
[a64] Fix `OPCODE_VECTOR_SHA`(constant)
Wunkolo May 12, 2024
0e2f756
[a64] Implement `VECTOR_CONVERT_{F2I,I2F}`
Wunkolo May 12, 2024
1919dda
[a64] Fix `OPCODE_VECTOR_CONVERT_{I2F,F2I}`
Wunkolo May 13, 2024
d3d3ea3
[a64] Fix `FPCR` starting bit index
Wunkolo May 13, 2024
7eca228
[a64] Fix `VECTOR_CONVERT_F2I` rounding
Wunkolo May 13, 2024
684904c
[a64] Implement `PERMUTE_V128`(int16)
Wunkolo May 13, 2024
b9d0752
[a64] Optimize `OPCODE_MUL_ADD`
Wunkolo May 13, 2024
bec248c
[a64] Fix `OPCODE_CNTLZ`
Wunkolo May 13, 2024
c33f543
[a64] Implement `kDebugInfoTraceFunctions` and `kDebugInfoTraceFuncti…
Wunkolo May 13, 2024
f1235be
[a64] Fix `ATOMIC_COMPARE_EXCHANGE_I32` comparison type
Wunkolo May 13, 2024
a542265
[a64] Implement memory tracing
Wunkolo May 13, 2024
eb0736e
[a64] Reduce function prolog/epilog to 16 bytes
Wunkolo May 16, 2024
f7bd0c8
[a64] Implement guest-debugger stalk-walks
Wunkolo May 16, 2024
c3efaaa
[a64] Implement instruction stepping.
Wunkolo May 16, 2024
a7ae117
[a64] Implement `b` `bl` `br` `blr` `cbnz` `cbz` instruction-stepping
Wunkolo May 17, 2024
e2d1e5d
[a64] Optimize vector-constant generation
Wunkolo May 21, 2024
6e2910b
[a64] Optimize memory-address calculation
Wunkolo May 21, 2024
9b5a690
[a64] Optimize `OPCODE_MEMSET`
Wunkolo May 21, 2024
7c094dc
[a64] Implement `OPCODE_LOAD_CLOCk` `clock_source_raw`
Wunkolo May 21, 2024
40d908b
[a64] Implement `OPCODE_PACK`(2101010, 4202020, 8-in-16, 16-in-32)
Wunkolo May 23, 2024
6478623
[a64] Fix `OPCODE_PACK` saturation edge-cases
Wunkolo May 23, 2024
96d444d
[a64] Implement `OPCODE_UNPACK`
Wunkolo May 23, 2024
06daedf
[a64] Implement `LSE` and `FP16C` detection
Wunkolo May 25, 2024
2d72b40
[a64] Optimize `OPCODE_{UN}PACK`(float16) with `F16C`
Wunkolo May 25, 2024
4ff43ae
[a64] Fix `OPCODE_PACK`(short)
Wunkolo May 25, 2024
fc1a13d
[a64] Optimize bulk VConst access with relative addressing
Wunkolo May 25, 2024
bf12583
[a64] Optimize constant vector byte-splats
Wunkolo May 25, 2024
63f31d5
[a64] Fix `OPCODE_SWIZZLE` register-aliasing
Wunkolo May 27, 2024
3b1a696
[a64] Implement raw clock source
Wunkolo May 27, 2024
cba92a2
[a64] Remove `VOne` constant in favor of `FMOV`
Wunkolo May 27, 2024
7b9f791
[a64] Add arch-agnostic documentation configurations
Wunkolo May 27, 2024
818a773
[a64] Optimize zero MovMem64
Wunkolo May 27, 2024
f830f79
[a64] Implement `OPCODE_DID_SATURATE`
Wunkolo May 27, 2024
8f6c0ad
[a64] Detect `MOVI` utilizations for vector-element splats(u8,u16,u32)
Wunkolo May 28, 2024
4655bc1
[a64] Optimize constant-loads with `FMOV`
Wunkolo May 28, 2024
151700d
[a64] Implement armv8.0 atomic operations
Wunkolo May 29, 2024
164f1e4
[a64] Remove x64 reference implementations
Wunkolo May 31, 2024
1127fd9
[a64] Implement `OPCODE_CACHE_CONTROL`
Wunkolo Jun 2, 2024
02edbd2
[a64] Fix out-of-bounds `OPCODE_VECTOR_SHL`(all-same) case
Wunkolo Jun 3, 2024
2953e2e
[a64] Use VectorCodeGenerator rather than CodeBlock+CodeGenerator
Wunkolo Jun 8, 2024
3acd0a3
[a64] Replace instances of `MOV`+`DUP-splats to `MOVI`
Wunkolo Jun 12, 2024
539a03d
[a64] Optimize `OPCODE_SPLAT` byte-constants
Wunkolo Jun 12, 2024
9c8b067
[a64] Optimize `OPCODE_SPLAT` with `MOVI`/`FMOV`
Wunkolo Jun 13, 2024
9c572c3
[a64] Remove redundant `OPCODE_DOT_PRODUCT_{3,4}` lane-isolation
Wunkolo Jun 16, 2024
a8b9cd8
[a64] Implement support for large stack sizes
Wunkolo Jun 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,6 @@
[submodule "third_party/VulkanMemoryAllocator"]
path = third_party/VulkanMemoryAllocator
url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator.git
[submodule "third_party/oaknut"]
path = third_party/oaknut
url = https://github.com/merryhime/oaknut.git
27 changes: 21 additions & 6 deletions premake5.lua
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ filter("configurations:Checked")
defines({
"DEBUG",
})
filter({"configurations:Checked", "platforms:Windows"})
filter({"configurations:Checked", "platforms:Windows-*"})
buildoptions({
"/RTCsu", -- Full Run-Time Checks.
})
Expand Down Expand Up @@ -153,7 +153,7 @@ filter("platforms:Android-*")
"log",
})

filter("platforms:Windows")
filter("platforms:Windows-*")
system("windows")
toolset("msc")
buildoptions({
Expand All @@ -179,8 +179,12 @@ filter("platforms:Windows")
"_CRT_SECURE_NO_WARNINGS",
"WIN32",
"_WIN64=1",
"_AMD64=1",
})
filter("architecture:x86_64")
defines({
"_AMD64=1",
})
filter({})
linkoptions({
"/ignore:4006", -- Ignores complaints about empty obj files.
"/ignore:4221",
Expand All @@ -198,7 +202,7 @@ filter("platforms:Windows")
})

-- Embed the manifest for things like dependencies and DPI awareness.
filter({"platforms:Windows", "kind:ConsoleApp or WindowedApp"})
filter({"platforms:Windows-*", "kind:ConsoleApp or WindowedApp"})
files({
"src/xenia/base/app_win32.manifest"
})
Expand Down Expand Up @@ -228,7 +232,12 @@ workspace("xenia")
["ARCHS"] = "x86_64"
})
elseif os.istarget("windows") then
platforms({"Windows"})
platforms({"Windows-ARM64", "Windows-x86_64"})
filter("platforms:Windows-ARM64")
architecture("ARM64")
filter("platforms:Windows-x86_64")
architecture("x86_64")
filter({})
-- 10.0.15063.0: ID3D12GraphicsCommandList1::SetSamplePositions.
-- 10.0.19041.0: D3D12_HEAP_FLAG_CREATE_NOT_ZEROED.
-- 10.0.22000.0: DWMWA_WINDOW_CORNER_PREFERENCE.
Expand Down Expand Up @@ -284,7 +293,13 @@ workspace("xenia")
include("src/xenia/apu/nop")
include("src/xenia/base")
include("src/xenia/cpu")
include("src/xenia/cpu/backend/x64")

filter("architecture:x86_64")
include("src/xenia/cpu/backend/x64")
filter("architecture:ARM64")
include("src/xenia/cpu/backend/a64")
filter({})

include("src/xenia/debug/ui")
include("src/xenia/gpu")
include("src/xenia/gpu/null")
Expand Down
14 changes: 10 additions & 4 deletions src/xenia/app/premake5.lua
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ project("xenia-app")
"libavcodec",
"libavutil",
"mspack",
"SDL2",
"snappy",
"xxhash",
})
Expand Down Expand Up @@ -72,13 +73,18 @@ project("xenia-app")
"xenia-cpu-backend-x64",
})

filter("architecture:ARM64")
links({
"xenia-cpu-backend-a64",
})

-- TODO(Triang3l): The emulator itself on Android.
filter("platforms:not Android-*")
files({
"xenia_main.cc",
})

filter("platforms:Windows")
filter("platforms:Windows-*")
files({
"main_resources.rc",
})
Expand All @@ -104,7 +110,7 @@ project("xenia-app")
"SDL2",
})

filter("platforms:Windows")
filter("platforms:Windows-*")
links({
"xenia-apu-xaudio2",
"xenia-gpu-d3d12",
Expand All @@ -113,13 +119,13 @@ project("xenia-app")
"xenia-ui-d3d12",
})

filter({"platforms:Windows", SINGLE_LIBRARY_FILTER})
filter({"platforms:Windows-*", SINGLE_LIBRARY_FILTER})
links({
"xenia-gpu-d3d12-trace-viewer",
"xenia-ui-window-d3d12-demo",
})

filter("platforms:Windows")
filter("platforms:Windows-*")
-- Only create the .user file if it doesn't already exist.
local user_file = project_root.."/build/xenia-app.vcxproj.user"
if not os.isfile(user_file) then
Expand Down
5 changes: 3 additions & 2 deletions src/xenia/base/clock.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ DEFINE_bool(clock_no_scaling, false,
"Guest system time is directly pulled from host.",
"CPU");
DEFINE_bool(clock_source_raw, false,
"Use the RDTSC instruction as the time source. "
"Host CPU must support invariant TSC.",
"On x64, Use the RDTSC instruction as the time source. Requires "
"invariant TSC. "
"On a64, Use the CNTVCT_EL0 register as the time source",
"CPU");

namespace xe {
Expand Down
2 changes: 2 additions & 0 deletions src/xenia/base/clock.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

#if XE_ARCH_AMD64
#define XE_CLOCK_RAW_AVAILABLE 1
#elif XE_ARCH_ARM64
#define XE_CLOCK_RAW_AVAILABLE 1
#endif

DECLARE_bool(clock_no_scaling);
Expand Down
50 changes: 50 additions & 0 deletions src/xenia/base/clock_a64.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/

#include "xenia/base/clock.h"
#include "xenia/base/platform.h"

#if XE_ARCH_ARM64 && XE_CLOCK_RAW_AVAILABLE

#include "xenia/base/logging.h"

#ifdef _MSC_VER
#include <arm64_neon.h>
#include <intrin.h>
#else
#include <arm_neon.h>
#endif

// Wrap all these different cpu compiler intrinsics.
#if XE_COMPILER_MSVC
constexpr int32_t CNTFRQ_EL0 = ARM64_SYSREG(3, 3, 14, 0, 0);
constexpr int32_t CNTVCT_EL0 = ARM64_SYSREG(3, 3, 14, 0, 2);
#define xe_cpu_mrs(reg) _ReadStatusReg(reg)
#elif XE_COMPILER_CLANG || XE_COMPILER_GNUC
constexpr int32_t CNTFRQ_EL0 = 0b11'011'1110'0000'000;
constexpr int32_t CNTVCT_EL0 = 0b11'011'1110'0000'010;

uint64_t xe_cpu_mrs(uint32_t reg) {
uint64_t result;
__asm__ volatile("mrs \t%0," #reg : "=r"(result));
return result;
}
#else
#error \
"No cpu instruction wrappers xe_cpu_mrs(CNTVCT_EL0); for current compiler implemented."
#endif

namespace xe {

uint64_t Clock::host_tick_frequency_raw() { return xe_cpu_mrs(CNTFRQ_EL0); }
uint64_t Clock::host_tick_count_raw() { return xe_cpu_mrs(CNTVCT_EL0); }

} // namespace xe

#endif
33 changes: 33 additions & 0 deletions src/xenia/base/exception_handler_win.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,22 @@ LONG CALLBACK ExceptionHandlerCallback(PEXCEPTION_POINTERS ex_info) {
}

HostThreadContext thread_context;

#if XE_ARCH_AMD64
thread_context.rip = ex_info->ContextRecord->Rip;
thread_context.eflags = ex_info->ContextRecord->EFlags;
std::memcpy(thread_context.int_registers, &ex_info->ContextRecord->Rax,
sizeof(thread_context.int_registers));
std::memcpy(thread_context.xmm_registers, &ex_info->ContextRecord->Xmm0,
sizeof(thread_context.xmm_registers));
#elif XE_ARCH_ARM64
thread_context.pc = ex_info->ContextRecord->Pc;
thread_context.cpsr = ex_info->ContextRecord->Cpsr;
std::memcpy(thread_context.x, &ex_info->ContextRecord->X,
sizeof(thread_context.x));
std::memcpy(thread_context.v, &ex_info->ContextRecord->V,
sizeof(thread_context.v));
#endif

// https://msdn.microsoft.com/en-us/library/ms679331(v=vs.85).aspx
// https://msdn.microsoft.com/en-us/library/aa363082(v=vs.85).aspx
Expand Down Expand Up @@ -78,6 +88,7 @@ LONG CALLBACK ExceptionHandlerCallback(PEXCEPTION_POINTERS ex_info) {
for (size_t i = 0; i < xe::countof(handlers_) && handlers_[i].first; ++i) {
if (handlers_[i].first(&ex, handlers_[i].second)) {
// Exception handled.
#if XE_ARCH_AMD64
ex_info->ContextRecord->Rip = thread_context.rip;
ex_info->ContextRecord->EFlags = thread_context.eflags;
uint32_t modified_register_index;
Expand All @@ -98,6 +109,28 @@ LONG CALLBACK ExceptionHandlerCallback(PEXCEPTION_POINTERS ex_info) {
&thread_context.xmm_registers[modified_register_index],
sizeof(vec128_t));
}
#elif XE_ARCH_ARM64
ex_info->ContextRecord->Pc = thread_context.pc;
ex_info->ContextRecord->Cpsr = thread_context.cpsr;
uint32_t modified_register_index;
uint16_t modified_int_registers_remaining = ex.modified_x_registers();
while (xe::bit_scan_forward(modified_int_registers_remaining,
&modified_register_index)) {
modified_int_registers_remaining &=
~(UINT16_C(1) << modified_register_index);
ex_info->ContextRecord->X[modified_register_index] =
thread_context.x[modified_register_index];
}
uint16_t modified_xmm_registers_remaining = ex.modified_v_registers();
while (xe::bit_scan_forward(modified_xmm_registers_remaining,
&modified_register_index)) {
modified_xmm_registers_remaining &=
~(UINT16_C(1) << modified_register_index);
std::memcpy(&ex_info->ContextRecord->V + modified_register_index,
&thread_context.v[modified_register_index],
sizeof(vec128_t));
}
#endif
return EXCEPTION_CONTINUE_EXECUTION;
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/xenia/base/host_thread_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ std::string HostThreadContext::GetStringFromValue(HostRegister reg,
case Arm64Register::kPc:
return hex ? string_util::to_hex_string(pc) : std::to_string(pc);
case Arm64Register::kPstate:
return hex ? string_util::to_hex_string(pstate) : std::to_string(pstate);
return hex ? string_util::to_hex_string(cpsr) : std::to_string(cpsr);
case Arm64Register::kFpsr:
return hex ? string_util::to_hex_string(fpsr) : std::to_string(fpsr);
case Arm64Register::kFpcr:
Expand Down
2 changes: 1 addition & 1 deletion src/xenia/base/host_thread_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ class HostThreadContext {
uint64_t x[31];
uint64_t sp;
uint64_t pc;
uint64_t pstate;
uint32_t cpsr;
uint32_t fpsr;
uint32_t fpcr;
vec128_t v[32];
Expand Down
4 changes: 4 additions & 0 deletions src/xenia/base/main_init_win.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

#include <cstdlib>

#if XE_ARCH_AMD64

// Includes Windows headers, so it goes after platform_win.h.
#include "third_party/xbyak/xbyak/xbyak_util.h"

Expand Down Expand Up @@ -39,3 +41,5 @@ class StartupAvxCheck {
#pragma warning(suppress : 4073)
#pragma init_seg(lib)
static StartupAvxCheck gStartupAvxCheck;

#endif
27 changes: 27 additions & 0 deletions src/xenia/base/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@

#if XE_ARCH_AMD64
#include <xmmintrin.h>
#elif XE_ARCH_ARM64
#include <arm64_neon.h>
#endif

namespace xe {
Expand Down Expand Up @@ -133,10 +135,17 @@ constexpr inline uint32_t bit_count(T v) {
}
#else
#if XE_COMPILER_MSVC || XE_COMPILER_INTEL
#if XE_ARCH_AMD64
inline uint32_t bit_count(uint32_t v) { return __popcnt(v); }
inline uint32_t bit_count(uint64_t v) {
return static_cast<uint32_t>(__popcnt64(v));
}
#elif XE_ARCH_ARM64
inline uint32_t bit_count(uint32_t v) { return _CountOneBits(v); }
inline uint32_t bit_count(uint64_t v) {
return static_cast<uint32_t>(_CountOneBits64(v));
}
#endif
#elif XE_COMPILER_GCC || XE_COMPILER_CLANG
static_assert(sizeof(unsigned int) == sizeof(uint32_t));
static_assert(sizeof(unsigned long long) == sizeof(uint64_t));
Expand Down Expand Up @@ -376,6 +385,24 @@ template <int N>
int64_t m128_i64(const __m128& v) {
return m128_i64<N>(_mm_castps_pd(v));
}
#elif XE_ARCH_ARM64
// Utilities for NEON values.
template <int N>
float m128_f32(const float32x4_t& v) {
return vgetq_lane_f32(v, N);
}
template <int N>
int32_t m128_i32(const int32x4_t& v) {
return vgetq_lane_s32(v, N);
}
template <int N>
double m128_f64(const float64x2_t& v) {
return vgetq_lane_f64(v, N);
}
template <int N>
int64_t m128_i64(const int64x2_t& v) {
return vgetq_lane_s64(v, N);
}
#endif

// Similar to the C++ implementation of XMConvertFloatToHalf and
Expand Down
8 changes: 8 additions & 0 deletions src/xenia/base/platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,14 @@
#define XE_ARCH_PPC 1
#endif

#ifdef XE_ARCH_AMD64
#define XE_HOST_ARCH_NAME "x64"
#elif XE_ARCH_ARM64
#define XE_HOST_ARCH_NAME "a64"
#elif XE_ARCH_PPC
#define XE_HOST_ARCH_NAME "ppc"
#endif

#if XE_PLATFORM_WIN32
#define WIN32_LEAN_AND_MEAN
#define NOMINMAX // Don't want windows.h including min/max macros.
Expand Down
Loading