This commit is contained in:
rtldg 2025-03-07 14:48:31 +00:00
parent a884302c14
commit 2e7ae4687b
5 changed files with 262 additions and 225 deletions

View File

@ -199,7 +199,7 @@ namespace SourceHook
// SH tries to auto-detect these
// If you want to override SH's auto-detection, pass them in yourself
PassFlag_RetMem = (1<<6), /**< Object is returned in memory (through hidden first param */
PassFlag_RetReg = (1<<7) /**< Object is returned in EAX(:EDX)/RAX(x86_64) */
PassFlag_RetReg = (1<<7) /**< Object is returned in EAX(:EDX) (x86) / RAX(:RDX)/XMM0(:XMM1) (x86_64) */
};
size_t size; //!< Size of the data being passed

View File

@ -582,6 +582,10 @@ namespace SourceHook
void retn() {
this->write_ubyte(0xC3);
}
void leave() {
this->write_ubyte(0xC9);
}
};
}
}

View File

@ -199,7 +199,7 @@ namespace SourceHook
// SH tries to auto-detect these
// If you want to override SH's auto-detection, pass them in yourself
PassFlag_RetMem = (1<<6), /**< Object is returned in memory (through hidden first param) */
PassFlag_RetReg = (1<<7) /**< Object is returned in EAX(:EDX)/RAX(x86_64) */
PassFlag_RetReg = (1<<7) /**< Object is returned in EAX(:EDX) (x86) / RAX(:RDX)/XMM0(:XMM1) (x86_64) */
};
size_t size; //!< Size of the data being passed

View File

@ -18,7 +18,7 @@
#include <stdio.h>
#include "sourcehook_impl.h"
#include "sourcehook_hookmangen.h"
#if defined( PLATFORM_64BITS ) && !defined( _LINUX )
#if defined( PLATFORM_64BITS )
#include "sourcehook_hookmangen_x86_64.h"
typedef SourceHook::Impl::x64GenContext SHGenContext;
#else
@ -61,9 +61,6 @@ namespace SourceHook
HookManagerPubFunc CHookManagerAutoGen::MakeHookMan(const ProtoInfo *proto, int vtbl_offs, int vtbl_idx)
{
#if defined( PLATFORM_64BITS ) && defined( _LINUX )
return nullptr;
#else
CProto mproto(proto);
for (auto iter = m_Contexts.begin(); iter != m_Contexts.end(); ++iter)
{
@ -85,7 +82,6 @@ namespace SourceHook
m_Contexts.emplace_back(std::move(sctx));
}
return pubFunc;
#endif
}
void CHookManagerAutoGen::ReleaseHookMan(HookManagerPubFunc pubFunc)

View File

@ -41,6 +41,10 @@ extern SourceMM::IMetamodSourceProvider *provider;
# define MSVC_ONLY(x)
#endif
// On Windows: this is the start of the shadow space.
// On Linux: this is the start of the stack args.
const std::int32_t OffsetToCallerStack = 16;
using namespace SourceHook::Asm;
namespace SourceHook
@ -72,10 +76,6 @@ namespace SourceHook
}
}
// Shadow space
MSVC_ONLY(jit.sub(rsp, 40));
// We need to keep it aligned to 16 bytes on Linux too...
GCC_ONLY(jit.sub(rsp, 8));
MSVC_ONLY(jit.mov(rcx, reinterpret_cast<std::uint64_t>(provider)));
GCC_ONLY(jit.mov(rdi, reinterpret_cast<std::uint64_t>(provider)));
@ -98,11 +98,6 @@ namespace SourceHook
jit.mov(rax, rax(mfi2.vtbloffs));
jit.mov(rax, rax(sizeof(void*) * mfi2.vtblindex));
jit.call(rax);
// Free Linux stack alignment
GCC_ONLY(jit.add(rsp, 8));
// Free shadow space
MSVC_ONLY(jit.add(rsp, 40));
}
x64GenContext::x64GenContext()
@ -313,72 +308,111 @@ namespace SourceHook
void* x64GenContext::GenerateHookFunc()
{
const auto& retInfo = m_Proto.GetRet();
//m_HookFunc.breakpoint();
m_HookFunc.breakpoint();
// For the time being, we only consider xmm0-xmm15 registers
// are only used to store 64bits worth of data, despite being
// able to store up to 128bits
// RBP is a general purpose register on x86_64
// we will therefore use it on both linux and windows
// to refer to our space in the stack where we grew
// Linux uses RBP as the frame pointer while Windows mostly doesn't*.
// It's a good register to index stack variables so we'll still use it like a Linux frame pointer.
// It'll probably help Accelerator's crash-logging too (at least on Linux).
//
// *: MSVC does not support the frame pointer option (/Oy-) in x64!
// alloca() and some exception handling things will use it though.
// Their usage is also weird: https://stackoverflow.com/q/75722486
// Save our frame pointer.
// This also realigns the stack to 16 bytes.
m_HookFunc.push(rbp);
m_HookFunc.mov(rbp, rsp);
// *********** stack frame *************
// MSVC ONLY START
// rbp + 40 end of shadow space
// rbp + 8 start of shadow space
// rbp + ?? end of stack args
// rbp + 48 start of stack args
// rbp + 40 shadow space 4
// rbp + 32 shadow space 3
// rbp + 24 shadow space 2
// rbp + 16 shadow space 1
// MSVC ONLY END
//
// rbp - 0 begining of (old) rsp
// rbp - 8 saved old rbp value
// GCC ONLY START
// rbp + ?? end of stack args
// rbp + 16 start of stack args
// GCC ONLY END
//
// rbp + 8 return address
// rbp - 0 original rbp
// rbp - 8 this ptr
// rbp - 16 vfnptr_origentry
// rbp - 24 status
// rbp - 32 prev_res
// rbp - 40 cur_res
// rbp - 48 iter
// rbp - 56 context
// rbp - 64 this ptr
// [Non void functions:]
// rbp - 64 - sizeof(returntype) original return
// rbp - 64 - sizeof(returntype) * 2 override return
// rbp - 64 - sizeof(returntype) * 3 plugin return
// rbp - 64 ret ptr
// rbp - 72 memret ptr
// rbp - 80 - sizeof(returntype) original return
// rbp - 80 - sizeof(returntype) * 2 override return
// rbp - 80 - sizeof(returntype) * 3 plugin return
//
// - 64 end of unused padding
// - 128 start of unused padding
//
// MSVC ONLY START
// - 128 end of 80 bytes of shadow space
// - 208 start of 80 bytes of shadow space
// MSVC ONLY END
//
// GCC ONLY START
// - 128 end of stack arg copies
// - ??? start of stack arg copies
// GCC ONLY END
const std::int8_t v_original_rbp = AddVarToFrame(SIZE_PTR); // -8
const std::int8_t v_this = AddVarToFrame(SIZE_PTR); // -8
const std::int8_t v_vfnptr_origentry = AddVarToFrame(SIZE_PTR); // -16
const std::int8_t v_status = AddVarToFrame(SIZE_PTR /*sizeof(META_RES)*/); // -24
const std::int8_t v_prev_res = AddVarToFrame(SIZE_PTR /*sizeof(META_RES)*/); // -32
const std::int8_t v_cur_res = AddVarToFrame(SIZE_PTR /*sizeof(META_RES)*/); // -40
const std::int8_t v_iter = AddVarToFrame(SIZE_PTR); // -48
const std::int8_t v_pContext = AddVarToFrame(SIZE_PTR); // -56
const std::int8_t v_this = AddVarToFrame(SIZE_PTR); // -64
// Non void return, track the values
std::int32_t v_ret_ptr = 0;
std::int32_t v_memret_ptr = 0;
std::int8_t v_ret_ptr = 0;
std::int8_t v_memret_ptr = 0;
std::int32_t v_orig_ret = 0;
std::int32_t v_override_ret = 0;
std::int32_t v_plugin_ret = 0;
std::int32_t v_mem_ret = 0;
if (m_Proto.GetRet().size != 0)
{
v_ret_ptr = AddVarToFrame(SIZE_PTR);
v_memret_ptr = AddVarToFrame(SIZE_PTR);
v_orig_ret = AddVarToFrame(AlignSize(GetParamStackSize(retInfo), 16)); // 16 bytes aligned
v_ret_ptr = AddVarToFrame(SIZE_PTR); // -64
v_memret_ptr = AddVarToFrame(SIZE_PTR); // -72
// Did you know that 80 is 5*16? I'm gonna be sick...
v_orig_ret = AddVarToFrame(AlignSize(GetParamStackSize(retInfo), 16)); // -80 // 16 bytes aligned
v_override_ret = AddVarToFrame(AlignSize(GetParamStackSize(retInfo), 16));
v_plugin_ret = AddVarToFrame(AlignSize(GetParamStackSize(retInfo), 16));
v_mem_ret = AddVarToFrame(AlignSize(GetParamStackSize(retInfo), 16));
}
std::int32_t stack_frame_size = ComputeVarsSize();
// TODO: Only added to *maybe* prevent crashes when people fuck up their parameters.
// Probably shouldn't be kept...
std::int32_t v_padding_after_ret = AddVarToFrame(64);
#if SH_COMP == SH_COMP_MSVC
// Regular shadow space + 6 stack args for CallSetupHookLoop.
// Don't actually use this variable, just index `rsp` instead.
std::int32_t v_local_shadow_space = AddVarToFrame(32 + 6*8);
#endif
#if SH_COMP == SH_COMP_MSVC
std::int32_t stack_frame_size = AlignSize(ComputeVarsSize(), 16);
m_HookFunc.sub(rsp, stack_frame_size);
// Store rbp where it should be
m_HookFunc.mov(rsp(stack_frame_size - SIZE_PTR), rbp);
m_HookFunc.lea(rbp, rsp(stack_frame_size));
// MSVC ONLY - Save the registers into shadow space
#if SH_COMP == SH_COMP_MSVC
const x86_64_Reg params_reg[] = { rcx, rdx, r8, r9 };
const x86_64_FloatReg params_floatreg[] = { xmm0, xmm1, xmm2, xmm3 };
@ -386,40 +420,165 @@ namespace SourceHook
// retrieve this ptr
m_HookFunc.mov(rbp(v_this), params_reg[reg_index]);
m_HookFunc.mov(rbp(reg_index * 8 + 8), params_reg[reg_index]);
m_HookFunc.mov(rbp(reg_index * 8 + OffsetToCallerStack), params_reg[reg_index]);
reg_index++;
// Non standard return size, a ptr has been passed into rcx. Shifting all the parameters
if ((retInfo.flags & PassInfo::PassFlag_RetMem) == PassInfo::PassFlag_RetMem) {
m_HookFunc.mov(rbp(reg_index * 8 + 8), params_reg[reg_index]);
m_HookFunc.mov(rbp(reg_index * 8 + OffsetToCallerStack), params_reg[reg_index]);
m_HookFunc.mov(rbp(v_memret_ptr), params_reg[reg_index]);
reg_index++;
}
// START DEBUG HELPERS
m_HookFunc.mov(rax, m_Proto.GetNumOfParams());
m_HookFunc.mov(rax, reg_index);
// END DEBUG HELPERS
m_HookFunc.mov(rax, retInfo.size);
for (int i = 0; i < m_Proto.GetNumOfParams() && reg_index < 4; reg_index++, i++) {
auto& info = m_Proto.GetParam(i);
if (info.type == PassInfo::PassType_Float && (info.flags & PassInfo::PassFlag_ByRef) != PassInfo::PassFlag_ByRef) {
m_HookFunc.movsd(rbp(reg_index * 8 + 8), params_floatreg[reg_index]);
m_HookFunc.movsd(rbp(reg_index * 8 + OffsetToCallerStack), params_floatreg[reg_index]);
} else {
m_HookFunc.mov(rbp(reg_index * 8 + 8), params_reg[reg_index]);
m_HookFunc.mov(rbp(reg_index * 8 + OffsetToCallerStack), params_reg[reg_index]);
}
}
#else
const x86_64_Reg params_reg[] = { rdi, rsi, rdx, rcx, r8, r9 };
const x86_64_FloatReg params_floatreg[] = { xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 };
int num_reg = sizeof(params_reg) / sizeof(params_reg[0]);
int num_floatreg = sizeof(params_floatreg) / sizeof(params_floatreg[0]);
int reg_index = 0;
int floatreg_index = 0;
m_HookFunc.mov(rbp(v_this), rdi);
reg_index++;
// Retmem is in RDI even when thiscall!
if ((retInfo.flags & PassInfo::PassFlag_RetMem) == PassInfo::PassFlag_RetMem) {
m_HookFunc.mov(rbp(v_memret_ptr), params_reg[reg_index]);
reg_index++;
}
m_HookFunc.mov(rbp(v_this), params_reg[reg_index]);
reg_index++;
// Linux objects can be passed
// - inline on the stack
// - unpacked into registers
// - as a pointer
// This is pretty complicated and we can't know what will happen without knowing the object layout.
// And we don't know the object layout...
//
// From Agner Fog's Calling conventions pdf:
// >Objects with inheritance, member functions, or constructors can be passed in registers [and on stack].
// >Objects with copy constructor, destructor, or virtual are passed by pointers.
//
// For now, we'll assume that any object passed inline on the stack can be safely copied around.
// This probably doesn't hold in 100% of cases but for now it's the easiest.
//
// Another option would be to fuck up our stack frame:
// - move all offsets and data we use like 32KiB or 64KiB deeper into the stack
// - `pop` the real return address off the stack and store it in the deep stack (until we need it)
// This would allow to use the original objects that are inlined on the stack in.
// TODO: Allow 2 floats per custom_register.
// Allow 2 ints per custom_register too!
// This helps with objects being unpacked into registers.
// We're going to backup ALL parameter registers.
std::int32_t v_sysv_floatreg = AddVarToFrame(num_floatreg * 8);
std::int32_t v_sysv_reg = AddVarToFrame(num_reg * 8);
auto orig_reg_index = reg_index;
// Next we need to figure out how must stack space for stack args...
std::int32_t stack_args_size = 0;
for (int i = 0; i < m_Proto.GetNumOfParams(); ++i) {
const IntPassInfo &pi = m_Proto.GetParam(i);
if (pi.type == PassInfo::PassType_Basic) {
if (++reg_index >= num_reg) {
stack_args_size += 8;
}
} else if (pi.type == PassInfo::PassType_Float) {
if (++floatreg_index >= num_floatreg) {
stack_args_size += 8;
}
} else if (pi.type == PassInfo::PassType_Object) {
if (pi.flags & PassInfo::PassFlag_ByRef) {
if (++reg_index >= num_reg) {
stack_args_size += 8;
}
} else {
stack_args_size += pi.size;
}
}
}
std::int32_t v_sysv_stack_copy = AddVarToFrame(stack_args_size);
std::int32_t stack_frame_size = AlignSize(ComputeVarsSize(), 16);
m_HookFunc.sub(rsp, stack_frame_size);
for (int i = 0; i < num_floatreg; i++) {
m_HookFunc.movsd(rbp(v_sysv_floatreg + i*8), params_floatreg[i]);
}
for (int i = 0; i < num_reg; i++) {
m_HookFunc.mov(rbp(v_sysv_reg + i*8), params_reg[i]);
}
// Now let's copy stack params!
reg_index = orig_reg_index;
floatreg_index = 0;
std::int32_t stack_offset = 0;
for (int i = 0; i < m_Proto.GetNumOfParams(); ++i) {
const IntPassInfo &pi = m_Proto.GetParam(i);
if (pi.type == PassInfo::PassType_Basic) {
if (++reg_index >= num_reg) {
m_HookFunc.lea(rax, rbp(OffsetToCallerStack + stack_offset));
m_HookFunc.mov(rax, rax());
m_HookFunc.mov(rsp(stack_offset), rax);
stack_offset += 8;
}
} else if (pi.type == PassInfo::PassType_Float) {
if (++floatreg_index >= num_floatreg) {
m_HookFunc.lea(rax, rbp(OffsetToCallerStack + stack_offset));
m_HookFunc.mov(rax, rax());
m_HookFunc.mov(rsp(stack_offset), rax);
stack_offset += 8;
}
} else if (pi.type == PassInfo::PassType_Object) {
if (pi.flags & PassInfo::PassFlag_ByRef) {
if (++reg_index >= num_reg) {
m_HookFunc.lea(rax, rbp(OffsetToCallerStack + stack_offset));
m_HookFunc.mov(rax, rax());
m_HookFunc.mov(rsp(stack_offset), rax);
stack_offset += 8;
}
} else {
if (pi.pAssignOperator || pi.pCopyCtor) {
// 1st parameter (this)
m_HookFunc.lea(rdi, rbp(OffsetToCallerStack + stack_offset));
// 2nd parameter (copy)
m_HookFunc.lea(rsi, rsp(stack_offset));
// Move address and call
m_HookFunc.mov(rax, reinterpret_cast<std::uint64_t>(
pi.pAssignOperator ? pi.pAssignOperator : pi.pCopyCtor));
m_HookFunc.call(rax);
} else {
// from
m_HookFunc.lea(rsi, rbp(OffsetToCallerStack + stack_offset));
// to
m_HookFunc.lea(rdi, rsp(stack_offset));
// size
m_HookFunc.mov(rcx, pi.size);
// do the copy
m_HookFunc.rep_movs_bytes();
}
stack_offset += pi.size;
}
}
}
#endif
// From this point on, no matter what. RSP should be aligned on 16 bytes boundary
@ -430,11 +589,6 @@ namespace SourceHook
std::int32_t v_ret_vals[] = {v_orig_ret, v_override_ret, v_plugin_ret};
for (int i = 0; i < 3; i++) {
// Shadow space
MSVC_ONLY(m_HookFunc.sub(rsp, 40));
// We need to keep it aligned to 16 bytes on Linux too...
GCC_ONLY(m_HookFunc.sub(rsp, 8));
// First param is this
MSVC_ONLY(m_HookFunc.lea(rcx, rbp(v_ret_vals[i])));
GCC_ONLY(m_HookFunc.lea(rdi, rbp(v_ret_vals[i])));
@ -442,11 +596,6 @@ namespace SourceHook
// We've saved (or not) r8 value, use the freed register to store function ptr
m_HookFunc.mov(r8, reinterpret_cast<std::uint64_t>(retInfo.pNormalCtor));
m_HookFunc.call(r8);
// Free Linux stack alignment
GCC_ONLY(m_HookFunc.add(rsp, 8));
// Free shadow space
MSVC_ONLY(m_HookFunc.add(rsp, 40));
}
}
@ -496,30 +645,24 @@ namespace SourceHook
stack_index++;
}
// TODO: Linux will need this to be using offsets into the original stack
for (int i = 0; i < m_Proto.GetNumOfParams(); ++i, ++stack_index) {
// Shadow space
MSVC_ONLY(m_HookFunc.sub(rsp, 40));
// We need to keep it aligned to 16 bytes on Linux too...
GCC_ONLY(m_HookFunc.sub(rsp, 8));
const IntPassInfo &pi = m_Proto.GetParam(i);
if (pi.type == PassInfo::PassType_Object && (pi.flags & PassInfo::PassFlag_ODtor) &&
(pi.flags & PassInfo::PassFlag_ByVal)) {
// Every non trivial types are passed as a pointer to a special dedicated space
MSVC_ONLY(m_HookFunc.mov(rcx, rbp(8 + stack_index * 8)));
GCC_ONLY(m_HookFunc.mov(rdi, rbp(8 + stack_index * 8)));
// All non-trivial types are passed as a pointer to a special dedicated space
MSVC_ONLY(m_HookFunc.mov(rcx, rbp(OffsetToCallerStack + stack_index * 8)));
GCC_ONLY(m_HookFunc.mov(rdi, rbp(OffsetToCallerStack + stack_index * 8)));
m_HookFunc.mov(rax, reinterpret_cast<std::uint64_t>(pi.pDtor));
m_HookFunc.call(rax);
}
// Free Linux stack alignment
GCC_ONLY(m_HookFunc.add(rsp, 8));
// Free shadow space
MSVC_ONLY(m_HookFunc.add(rsp, 40));
}
DoReturn(v_ret_ptr, v_memret_ptr);
// TODO: VERY IMPORTANT! The destructor below can clobber RAX/XMM0!
// It should be safe to move DoReturn() right before .leave().
// FIGURE IT OUT THOUGH!!!!
// From then on, rax cannot be used as a general register
// Use r8 or r9 instead
@ -528,10 +671,6 @@ namespace SourceHook
{
std::int32_t v_ret_vals[] = {v_orig_ret, v_override_ret, v_plugin_ret};
// Shadow space
MSVC_ONLY(m_HookFunc.sub(rsp, 40));
// We need to keep it aligned to 16 bytes on Linux too...
GCC_ONLY(m_HookFunc.sub(rsp, 8));
for (int i = 0; i < 3; i++) {
// First param is this
MSVC_ONLY(m_HookFunc.lea(rcx, rbp(v_ret_vals[i])));
@ -540,16 +679,11 @@ namespace SourceHook
m_HookFunc.mov(r8, reinterpret_cast<std::uint64_t>(retInfo.pDtor));
m_HookFunc.call(r8);
}
// Free Linux stack alignment
GCC_ONLY(m_HookFunc.add(rsp, 8));
// Free shadow space
MSVC_ONLY(m_HookFunc.add(rsp, 40));
}
// Restore rbp
m_HookFunc.mov(rbp, rbp(v_original_rbp));
// Free the stack frame
m_HookFunc.add(rsp, stack_frame_size);
// Restore RSP and RBP
// (same as `mov rsp, rbp` + `pop rbp`)
m_HookFunc.leave();
m_HookFunc.retn();
@ -584,8 +718,8 @@ namespace SourceHook
}
// Allocate the necessary stack space
MSVC_ONLY(m_HookFunc.sub(rsp, 88)); // shadow space (32 bytes) + 6 stack arguments (48 bytes) + 8 bytes
// TODO: GCC_ONLY(m_HookFunk.sub(rsp, 8 + ?));
// (we already allocated enough shadow space for Windows)
GCC_ONLY(m_HookFunc.sub(rsp, 32));
// 1st parameter (this)
GCC_ONLY(m_HookFunc.mov(rdi, reinterpret_cast<std::uintptr_t>(m_SHPtr)));
@ -615,28 +749,34 @@ namespace SourceHook
MSVC_ONLY(m_HookFunc.lea(rax, rbp(v_status)));
MSVC_ONLY(m_HookFunc.mov(rsp(0x28), rax));
// 7th argument - META_RES* prevResPtr
MSVC_ONLY(m_HookFunc.lea(rax, rbp(v_prev_res)));
m_HookFunc.lea(rax, rbp(v_prev_res));
MSVC_ONLY(m_HookFunc.mov(rsp(0x30), rax));
GCC_ONLY(m_HookFunc.mov(rsp(0x00), rax));
// 8th argument - META_RES* curResPtr
MSVC_ONLY(m_HookFunc.lea(rax, rbp(v_cur_res)));
m_HookFunc.lea(rax, rbp(v_cur_res));
MSVC_ONLY(m_HookFunc.mov(rsp(0x38), rax));
GCC_ONLY(m_HookFunc.mov(rsp(0x08), rax));
if (m_Proto.GetRet().size == 0) // void return function
{
// nullptr
m_HookFunc.xor_reg(rax, rax);
// 9th argument - const void* origRetPtr
MSVC_ONLY(m_HookFunc.mov(rsp(0x40), rax));
GCC_ONLY(m_HookFunc.mov(rsp(0x10), rax));
// 10th argument - void* overrideRetPtr
MSVC_ONLY(m_HookFunc.mov(rsp(0x48), rax));
GCC_ONLY(m_HookFunc.mov(rsp(0x18), rax));
}
else
{
// 9th argument - const void* origRetPtr
MSVC_ONLY(m_HookFunc.lea(rax, rbp(v_orig_ret)));
m_HookFunc.lea(rax, rbp(v_orig_ret));
MSVC_ONLY(m_HookFunc.mov(rsp(0x40), rax));
GCC_ONLY(m_HookFunc.mov(rsp(0x10), rax));
// 10th argument - void* overrideRetPtr
MSVC_ONLY(m_HookFunc.lea(rax, rbp(v_override_ret)));
m_HookFunc.lea(rax, rbp(v_override_ret));
MSVC_ONLY(m_HookFunc.mov(rsp(0x48), rax));
GCC_ONLY(m_HookFunc.mov(rsp(0x18), rax));
}
// Retrieve the function address
@ -646,9 +786,8 @@ namespace SourceHook
// Store the return value
m_HookFunc.mov(rbp(v_pContext), rax);
// TODO: GCC_ONLY(m_HookFunc.add(rsp, 8 + ?));
// Restore the rsp value
MSVC_ONLY(m_HookFunc.add(rsp, 88));
GCC_ONLY(m_HookFunc.add(rsp, 32));
}
// Extension of MAKE_DELEG macro
@ -719,11 +858,6 @@ namespace SourceHook
m_HookFunc.mov(rax, rax()); // *this (vtable)
m_HookFunc.mov(rax, rax(getNext.vtblindex * SIZE_PTR)); // vtable[vtblindex]
// Shadow space 32 bytes + 8 bytes
MSVC_ONLY(m_HookFunc.sub(rsp, 40));
// We need to keep it aligned to 16 bytes on Linux too...
GCC_ONLY(m_HookFunc.sub(rsp, 8));
GCC_ONLY(m_HookFunc.mov(rdi, rbp(v_pContext)));
MSVC_ONLY(m_HookFunc.mov(rcx, rbp(v_pContext)));
m_HookFunc.call(rax); // pContext->GetNext()
@ -731,11 +865,6 @@ namespace SourceHook
// store into iter
m_HookFunc.mov(rbp(v_iter), rax);
// Free Linux stack alignment
GCC_ONLY(m_HookFunc.add(rsp, 8));
// Free shadow space
MSVC_ONLY(m_HookFunc.add(rsp, 40));
// null check iter
m_HookFunc.test(rax, rax);
m_HookFunc.jz(0x0); // Leave loop if nullptr
@ -779,11 +908,6 @@ namespace SourceHook
std::int32_t earlyLoopBack = m_HookFunc.get_outputpos() - startLoop;
m_HookFunc.rewrite<std::int32_t>(m_HookFunc.get_outputpos() - sizeof(std::int32_t), -earlyLoopBack);
// Shadow space 32 bytes + 8 bytes
MSVC_ONLY(m_HookFunc.sub(rsp, 40));
// We need to keep it aligned to 16 bytes on Linux too...
GCC_ONLY(m_HookFunc.sub(rsp, 8));
m_HookFunc.mov(rax, rbp(v_pContext));
m_HookFunc.mov(rax, rax()); // *this (vtable)
m_HookFunc.mov(rax, rax(getOverrideRetPtrMfi.vtblindex * SIZE_PTR)); // vtable[vtblindex]
@ -792,10 +916,6 @@ namespace SourceHook
MSVC_ONLY(m_HookFunc.mov(rcx, rbp(v_pContext)));
m_HookFunc.call(rax); // pContext->GetOverrideRetPtr()
// Free Linux stack alignment
GCC_ONLY(m_HookFunc.add(rsp, 8));
MSVC_ONLY(m_HookFunc.add(rsp, 40));
// *reinterpret_cast<my_rettype*>(pContext->GetOverrideRetPtr()) = plugin_ret;
// byref is always a pointer underneath
@ -809,11 +929,6 @@ namespace SourceHook
// custom assignment operator, so call it
if (retInfo.pAssignOperator)
{
// Shadow space 32 bytes + 8 bytes
MSVC_ONLY(m_HookFunc.sub(rsp, 40));
// We need to keep it aligned to 16 bytes on Linux too...
GCC_ONLY(m_HookFunc.sub(rsp, 8));
// 1st parameter (this)
GCC_ONLY(m_HookFunc.mov(rdi, rax));
MSVC_ONLY(m_HookFunc.mov(rcx, rax));
@ -825,11 +940,6 @@ namespace SourceHook
// Move address and call
m_HookFunc.mov(rax, reinterpret_cast<std::uint64_t>(retInfo.pAssignOperator));
m_HookFunc.call(rax);
// Free Linux stack alignment
GCC_ONLY(m_HookFunc.add(rsp, 8));
// Free shadow space
MSVC_ONLY(m_HookFunc.add(rsp, 40));
}
else
{
@ -887,11 +997,6 @@ namespace SourceHook
m_HookFunc.je(0x0);
auto statusCmpOff = m_HookFunc.get_outputpos();
// Shadow space 32 bytes + 8 bytes
MSVC_ONLY(m_HookFunc.sub(rsp, 40));
// We need to keep it aligned to 16 bytes on Linux too...
GCC_ONLY(m_HookFunc.sub(rsp, 8));
m_HookFunc.mov(rax, rbp(v_pContext));
// 1st parameter (this)
@ -903,11 +1008,6 @@ namespace SourceHook
m_HookFunc.call(rax); // pContext->ShouldCallOrig()
// Free Linux stack alignment
GCC_ONLY(m_HookFunc.add(rsp, 8));
// Free shadow space
MSVC_ONLY(m_HookFunc.add(rsp, 40));
// Don't have the lower register yet, so this will do for now
m_HookFunc.test(rax, 0x1);
m_HookFunc.jz(0x0);
@ -943,11 +1043,6 @@ namespace SourceHook
// custom assignment operator, so call it
if (retInfo.pAssignOperator)
{
// Shadow space 32 bytes + 8 bytes
MSVC_ONLY(m_HookFunc.sub(rsp, 40));
// We need to keep it aligned to 16 bytes on Linux too...
GCC_ONLY(m_HookFunc.sub(rsp, 8));
// 1st parameter (this)
GCC_ONLY(m_HookFunc.lea(rdi, rbp(v_orig_ret)));
MSVC_ONLY(m_HookFunc.lea(rcx, rbp(v_orig_ret)));
@ -959,11 +1054,6 @@ namespace SourceHook
// Move address and call
m_HookFunc.mov(rax, reinterpret_cast<std::uint64_t>(retInfo.pAssignOperator));
m_HookFunc.call(rax);
// Free Linux stack alignment
GCC_ONLY(m_HookFunc.add(rsp, 8));
// Free shadow space
MSVC_ONLY(m_HookFunc.add(rsp, 40));
}
else
{
@ -1021,26 +1111,21 @@ namespace SourceHook
}
// Allocate the shadow space
m_HookFunc.sub(rsp, 32);
stackSpace += 32;
int parameters_on_stack = m_Proto.GetNumOfParams() - parameter_index;
m_HookFunc.sub(rsp, parameters_on_stack * 8);
stackSpace += parameters_on_stack * 8;
// If this number is even we need to allocate an extra 8 bytes
if (parameters_on_stack % 2 == 0) {
m_HookFunc.sub(rsp, 8);
stackSpace += 8;
}
// And it needs to be 16-byte aligned...
m_HookFunc.sub(rsp, AlignSize(stackSpace, 16));
for (int i = 0; parameter_index < m_Proto.GetNumOfParams(); parameter_index++, i++) {
m_HookFunc.mov(rax, rbp(40 + (8 * i))); // We need to skip the shadow space + return address
m_HookFunc.mov(rax, rbp(OffsetToCallerStack + (8 * 4) + (8 * i))); // We need to skip the shadow space
m_HookFunc.mov(rsp(32 + (8 * i)), rax);
}
return stackSpace;
#else
// TODO: Fix up this shit for objects passed inline on the stack
const x86_64_Reg params_reg[] = { rdi, rsi, rdx, rcx, r8, r9 };
const x86_64_FloatReg params_floatreg[] = { xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 };
const std::uint8_t num_reg = sizeof(params_reg) / sizeof(params_reg[0]);
@ -1049,16 +1134,17 @@ namespace SourceHook
int reg_index = 0;
int floatreg_index = 0;
// setup this parameter
m_HookFunc.mov(params_reg[reg_index], rbp(v_this));
reg_index++;
// TODO: What am I doing here?
// Non standard return
if (retInfo.size != 0 && (retInfo.flags & PassInfo::PassFlag_RetMem) == PassInfo::PassFlag_RetMem) {
m_HookFunc.lea(params_reg[reg_index], rbp(v_ret));
reg_index++;
}
// setup this parameter
m_HookFunc.mov(params_reg[reg_index], rbp(v_this));
reg_index++;
// TODO: Doesn't handle custom_register at all........
int parameters_on_stack = 0;
@ -1083,15 +1169,16 @@ namespace SourceHook
// Actually push registers to stack...
for (int i = 0, pushed_stack_parameters = 0; i < m_Proto.GetNumOfParams(); i++) {
auto& info = m_Proto.GetParam(i);
// TODO: These offsets are WRONG
if (info.type == PassInfo::PassType_Float && (info.flags & PassInfo::PassFlag_ByRef) != PassInfo::PassFlag_ByRef) {
if (++floatreg_index >= num_floatreg) {
m_HookFunc.mov(rax, rbp(8 + (8 * pushed_stack_parameters)));
m_HookFunc.mov(rax, rbp(OffsetToCallerStack + (8 * pushed_stack_parameters)));
m_HookFunc.mov(rsp(0 + (8 * pushed_stack_parameters)), rax);
pushed_stack_parameters++;
}
} else {
if (++reg_index >= num_reg) {
m_HookFunc.mov(rax, rbp(8 + (8 * pushed_stack_parameters)));
m_HookFunc.mov(rax, rbp(OffsetToCallerStack + (8 * pushed_stack_parameters)));
m_HookFunc.mov(rsp(0 + (8 * pushed_stack_parameters)), rax);
pushed_stack_parameters++;
}
@ -1114,8 +1201,9 @@ namespace SourceHook
return;
}
// ByVal
// TOOD: handle Vector3f into XMM0 & XMM1 for Linux64 here....
// ByVal
if (retInfo.type == PassInfo::PassType_Float) {
m_HookFunc.movsd(rbp(v_ret), xmm0);
} else if (retInfo.type == PassInfo::PassType_Basic) {
@ -1123,11 +1211,6 @@ namespace SourceHook
} else if ((retInfo.flags & PassInfo::PassFlag_RetMem) == PassInfo::PassFlag_RetMem) {
if (MemRetWithTempObj()) {
if (retInfo.pAssignOperator) {
// Shadow space 32 bytes + 8 bytes
MSVC_ONLY(m_HookFunc.sub(rsp, 40));
// We need to keep it aligned to 16 bytes on Linux too...
GCC_ONLY(m_HookFunc.sub(rsp, 8));
// 1st parameter (this)
GCC_ONLY(m_HookFunc.lea(rdi, rbp(v_ret)));
MSVC_ONLY(m_HookFunc.lea(rcx, rbp(v_ret)));
@ -1139,11 +1222,6 @@ namespace SourceHook
// Move address and call
m_HookFunc.mov(rax, reinterpret_cast<std::uint64_t>(retInfo.pAssignOperator));
m_HookFunc.call(rax);
// Free Linux stack alignment
GCC_ONLY(m_HookFunc.add(rsp, 8));
// Free shadow space
MSVC_ONLY(m_HookFunc.add(rsp, 40));
}
else {
m_HookFunc.push(rdi);
@ -1162,11 +1240,6 @@ namespace SourceHook
}
if (retInfo.pDtor) {
// Shadow space 32 bytes + 8 bytes
MSVC_ONLY(m_HookFunc.sub(rsp, 40));
// We need to keep it aligned to 16 bytes on Linux too...
GCC_ONLY(m_HookFunc.sub(rsp, 8));
// 1st parameter (this)
GCC_ONLY(m_HookFunc.lea(rdi, rbp(v_mem_ret)));
MSVC_ONLY(m_HookFunc.lea(rcx, rbp(v_mem_ret)));
@ -1174,11 +1247,6 @@ namespace SourceHook
// Move address and call
m_HookFunc.mov(rax, reinterpret_cast<std::uint64_t>(retInfo.pDtor));
m_HookFunc.call(rax);
// Free Linux stack alignment
GCC_ONLY(m_HookFunc.add(rsp, 8));
// Free shadow space
MSVC_ONLY(m_HookFunc.add(rsp, 40));
}
} else {
@ -1228,28 +1296,17 @@ namespace SourceHook
m_HookFunc.mov(rax, rax(getOrigRetPtrMfi.vtblindex * SIZE_PTR));
m_HookFunc.mov(r8, r8(getOverrideRetPtrMfi.vtblindex * SIZE_PTR));
m_HookFunc.xor_reg(r9, r9);
m_HookFunc.mov(r9, rbp(v_status));
m_HookFunc.cmp(r9, MRES_OVERRIDE);
m_HookFunc.cmovge(rax, r8);
// Shadow space 32 bytes + 8 bytes
MSVC_ONLY(m_HookFunc.sub(rsp, 40));
// We need to keep it aligned to 16 bytes on Linux too...
GCC_ONLY(m_HookFunc.sub(rsp, 8));
// 1st parameter (this)
GCC_ONLY(m_HookFunc.mov(rdi, rbp(v_pContext)));
MSVC_ONLY(m_HookFunc.mov(rcx, rbp(v_pContext)));
m_HookFunc.call(rax);
// Free Linux stack alignment
GCC_ONLY(m_HookFunc.add(rsp, 8));
// Free shadow space
MSVC_ONLY(m_HookFunc.add(rsp, 40));
m_HookFunc.mov(rbp(v_retptr), rax);
}
@ -1262,6 +1319,8 @@ namespace SourceHook
m_HookFunc.mov(r8, rbp(v_retptr));
// TOOD: handle Vector3f into XMM0 & XMM1 for Linux64 here....
if (retInfo.flags & PassInfo::PassFlag_ByRef) {
m_HookFunc.mov(rax, r8());
return;
@ -1281,11 +1340,6 @@ namespace SourceHook
// *memret_outaddr = plugin_ret
if (retInfo.pCopyCtor)
{
// Shadow space 32 bytes + 8 bytes
MSVC_ONLY(m_HookFunc.sub(rsp, 40));
// We need to keep it aligned to 16 bytes on Linux too...
GCC_ONLY(m_HookFunc.sub(rsp, 8));
// 1st parameter (this)
GCC_ONLY(m_HookFunc.mov(rdi, rbp(v_memret_outaddr)));
MSVC_ONLY(m_HookFunc.mov(rcx, rbp(v_memret_outaddr)));
@ -1297,11 +1351,6 @@ namespace SourceHook
// Move address and call
m_HookFunc.mov(rax, reinterpret_cast<std::uint64_t>(retInfo.pCopyCtor));
m_HookFunc.call(rax);
// Free Linux stack alignment
GCC_ONLY(m_HookFunc.add(rsp, 8));
// Free shadow space
MSVC_ONLY(m_HookFunc.add(rsp, 40));
}
else
{
@ -1337,11 +1386,6 @@ namespace SourceHook
}
}
// Shadow space 32 bytes + 8 bytes
MSVC_ONLY(m_HookFunc.sub(rsp, 40));
// We need to keep it aligned to 16 bytes on Linux too...
GCC_ONLY(m_HookFunc.sub(rsp, 8));
// 1st parameter (this)
GCC_ONLY(m_HookFunc.mov(rdi, reinterpret_cast<std::uintptr_t>(m_SHPtr)));
MSVC_ONLY(m_HookFunc.mov(rcx, reinterpret_cast<std::uintptr_t>(m_SHPtr)));
@ -1353,11 +1397,6 @@ namespace SourceHook
// Move address and call
m_HookFunc.mov(rax, (*reinterpret_cast<std::uintptr_t**>(m_SHPtr))[mfi.vtblindex]);
m_HookFunc.call(rax);
// Free Linux stack alignment
GCC_ONLY(m_HookFunc.add(rsp, 8));
// Free shadow space
MSVC_ONLY(m_HookFunc.add(rsp, 40));
}
bool x64GenContext::MemRetWithTempObj() {
@ -1401,13 +1440,13 @@ namespace SourceHook
// If the user says nothing, auto-detect
if ((pi.flags & (PassInfo::PassFlag_RetMem | PassInfo::PassFlag_RetReg)) == 0)
{
bool hasSpecialFunctions = (pi.flags & (PassInfo::PassFlag_OCtor|PassInfo::PassFlag_ODtor|PassInfo::PassFlag_CCtor)) != 0;
#if SH_COMP == SH_COMP_MSVC
// MSVC has various criteria for passing in memory
// if object doesn't fit on 8, 16, 32, or 64 bits. It's in memory
// if object has a constructor or destructor. It's in memory
bool unconventionalsize = (pi.size == 3 || (pi.size != 8 && pi.size > 4));
bool hasSpecialFunctions = (pi.flags & (PassInfo::PassFlag_OCtor|PassInfo::PassFlag_ODtor|PassInfo::PassFlag_CCtor)) != 0;
if (unconventionalsize || hasSpecialFunctions) {
pi.flags |= PassInfo::PassFlag_RetMem;
@ -1445,8 +1484,11 @@ namespace SourceHook
// Result: we cannot detect if it should be register or memory without knowing the layout of the object.
bool tooBig = (pi.size > (8 * 8));
bool hasSpecialFunctions = (pi.flags & (PassInfo::PassFlag_ODtor|PassInfo::PassFlag_CCtor)) != 0;
if (hasSpecialFunctions || tooBig)
bool probablyVector = (pi.size == 12);
if (hasSpecialFunctions || tooBig || probablyVector)
{
pi.flags |= PassInfo::PassFlag_RetMem;
return true;
@ -1468,7 +1510,7 @@ namespace SourceHook
return true;
}
void x64GenContext::AutoDetectParamFlags()
bool x64GenContext::AutoDetectParamFlags()
{
}
@ -1487,14 +1529,13 @@ namespace SourceHook
// hi->SetInfo(HOOKMAN_VERSION, m_VtblOffs, m_VtblIdx, m_Proto.GetProto(), m_HookfuncVfnptr)
// }
// prologue
MSVC_ONLY(m_PubFunc.sub(rsp, 0x38)); // Shadow space 32 bytes + 2 * 8 bytes (for our parameters) + 8 bytes
// TODO: GCC_ONLY(m_PubFunc.sub(rsp, 8+?));
// Save our frame pointer. (somewhat needlessly on Windows...)
// This also realigns the stack to 16 bytes.
m_PubFunc.push(rbp);
m_PubFunc.mov(rbp, rsp);
// Frame pointer! We like working callstacks when debugging crashes!
// TODO: Might mean we don't have to `sub rsp, 8`?
GCC_ONLY(m_PubFunc.push(rbp));
GCC_ONLY(m_PubFunc.mov(rbp, rsp));
// prologue
MSVC_ONLY(m_PubFunc.sub(rsp, 0x30)); // Shadow space 32 bytes + 2 * 8 bytes (for our parameters)
// Both Microsoft and AMD uses r8 and r9 as argument parameters
// Therefore they need not to be preserved across function calls
@ -1569,13 +1610,9 @@ namespace SourceHook
m_PubFunc.rewrite<std::int32_t>(jumpOff - sizeof(std::int32_t), endOff);
// epilogue
// Free Linux stack alignment
// TODO: GCC_ONLY(m_HookFunc.add(rsp, 8 + ?));
// Free shadow space & parameter space & stack alignment
MSVC_ONLY(m_PubFunc.add(rsp, 0x38));
GCC_ONLY(m_PubFunc.pop(rbp));
// Restore RSP and RBP
// (same as `mov rsp, rbp` + `pop rbp`)
m_PubFunc.leave();
// Return 0
m_PubFunc.xor_reg(rax, rax);