/* ======== SourceHook ======== * Copyright (C) 2024 Metamod:Source Development Team * No warranties of any kind * * License: zlib/libpng * * Author(s): André "Kenzzer" Benoist * ============================ */ // recommended literature: // https://www.felixcloutier.com/x86/ // http://ref.x86asm.net/coder64.html // https://defuse.ca/online-x86-assembler.htm // https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention // https://refspecs.linuxbase.orgz/elf/x86_64-abi-0.99.pdf #include #include #include "sourcehook_impl.h" #include "sourcehook_hookmangen.h" #include "sourcehook_hookmangen_x86_64.h" #include "sourcehook_pibuilder.h" #include "metamod_oslink.h" #include "metamod.h" #include #include #include #include extern SourceHook::ISourceHook *g_SHPtr; extern SourceMM::IMetamodSourceProvider *provider; #if SH_COMP == SH_COMP_MSVC # define GCC_ONLY(x) # define MSVC_ONLY(x) x #elif SH_COMP == SH_COMP_GCC # define GCC_ONLY(x) x # define MSVC_ONLY(x) #endif using namespace SourceHook::Asm; namespace SourceHook { namespace Impl { void PrintDebug(x64JitWriter& jit, const char* message) { static MemFuncInfo mfi = {false, -1, -1, -1}; if (mfi.vtblindex == -1) { GetFuncInfo(&SourceMM::IMetamodSourceProvider::ConsolePrint, mfi); if (!mfi.isVirtual || mfi.thisptroffs != 0 || mfi.vtbloffs != 0 || mfi.vtblindex < 0) { mfi.vtblindex = -1; SH_ASSERT(0, ("Couldn't retrieve details of SourceMM::IMetamodSourceProvider::ConsolePrint!")); return; } } static MemFuncInfo mfi2 = {false, -1, -1, -1}; if (mfi2.vtblindex == -1) { GetFuncInfo(&SourceMM::IMetamodSourceProvider::LogMessage, mfi2); if (!mfi2.isVirtual || mfi2.thisptroffs != 0 || mfi2.vtbloffs != 0 || mfi2.vtblindex < 0) { mfi2.vtblindex = -1; SH_ASSERT(0, ("Couldn't retrieve details of SourceMM::IMetamodSourceProvider::ConsolePrint!")); return; } } // Shadow space MSVC_ONLY(jit.sub(rsp, 40)); // We need to keep it aligned to 16 bytes on Linux too... GCC_ONLY(jit.sub(rsp, 8)); MSVC_ONLY(jit.mov(rcx, reinterpret_cast(provider))); GCC_ONLY(jit.mov(rdi, reinterpret_cast(provider))); MSVC_ONLY(jit.mov(rdx, reinterpret_cast(message))); GCC_ONLY(jit.mov(rsi, reinterpret_cast(message))); jit.mov(rax, reinterpret_cast(provider)); jit.mov(rax, rax(mfi.vtbloffs)); jit.mov(rax, rax(sizeof(void*) * mfi.vtblindex)); jit.call(rax); MSVC_ONLY(jit.mov(rcx, reinterpret_cast(provider))); GCC_ONLY(jit.mov(rdi, reinterpret_cast(provider))); MSVC_ONLY(jit.mov(rdx, reinterpret_cast(message))); GCC_ONLY(jit.mov(rsi, reinterpret_cast(message))); jit.mov(rax, reinterpret_cast(provider)); jit.mov(rax, rax(mfi2.vtbloffs)); jit.mov(rax, rax(sizeof(void*) * mfi2.vtblindex)); jit.call(rax); // Free Linux stack alignment GCC_ONLY(jit.add(rsp, 8)); // Free shadow space MSVC_ONLY(jit.add(rsp, 40)); } x64GenContext::x64GenContext() : m_GeneratedPubFunc(nullptr), m_VtblOffs(0), m_VtblIdx(666), m_SHPtr((ISourceHook*)0x1122334455667788), m_pHI(nullptr), m_HookfuncVfnptr(nullptr), m_HookFunc_FrameOffset(0), m_HookFunc_FrameVarsSize(0) { m_pHI = new void*; *m_pHI = (void*)0x77777777; m_HookfuncVfnptr = new void*; m_BuiltPI = new ProtoInfo; m_BuiltPI_Params = nullptr; m_BuiltPI_Params2 = nullptr; } x64GenContext::x64GenContext(const ProtoInfo *proto, int vtbl_offs, int vtbl_idx, ISourceHook *pSHPtr) : m_GeneratedPubFunc(nullptr), m_OrigProto(proto), m_Proto(proto), m_VtblOffs(vtbl_offs), m_VtblIdx(vtbl_idx), m_SHPtr(pSHPtr), m_pHI(nullptr), m_HookfuncVfnptr(nullptr), m_HookFunc_FrameOffset(0), m_HookFunc_FrameVarsSize(0) { m_pHI = new void*; *m_pHI = (void*)0x77777777; // Magic number for debugging m_HookfuncVfnptr = new void*; m_BuiltPI = new ProtoInfo; m_BuiltPI_Params = nullptr; m_BuiltPI_Params2 = nullptr; } x64GenContext::~x64GenContext() { //Clear(); delete m_pHI; delete m_HookfuncVfnptr; delete m_BuiltPI; } void x64GenContext::Clear() { m_HookFunc.clear(); m_PubFunc.clear(); if (m_BuiltPI_Params) { delete [] m_BuiltPI_Params; m_BuiltPI_Params = NULL; } if (m_BuiltPI_Params2) { delete [] m_BuiltPI_Params2; m_BuiltPI_Params2 = NULL; } } void x64GenContext::BuildProtoInfo() { m_BuiltPI->convention = m_Proto.GetConvention(); m_BuiltPI->numOfParams = m_Proto.GetNumOfParams(); m_BuiltPI->retPassInfo.size = m_Proto.GetRet().size; m_BuiltPI->retPassInfo.type = m_Proto.GetRet().type; m_BuiltPI->retPassInfo.flags = m_Proto.GetRet().flags; m_BuiltPI->retPassInfo2.pNormalCtor = m_Proto.GetRet().pNormalCtor; m_BuiltPI->retPassInfo2.pCopyCtor = m_Proto.GetRet().pCopyCtor; m_BuiltPI->retPassInfo2.pDtor = m_Proto.GetRet().pDtor; m_BuiltPI->retPassInfo2.pAssignOperator = m_Proto.GetRet().pAssignOperator; if (m_BuiltPI_Params) delete m_BuiltPI_Params; m_BuiltPI_Params = new PassInfo[m_BuiltPI->numOfParams + 1]; if (m_BuiltPI_Params2) delete m_BuiltPI_Params2; m_BuiltPI_Params2 = new PassInfo::V2Info[m_BuiltPI->numOfParams + 1]; m_BuiltPI_Params[0].size = 1; // Version 1 m_BuiltPI_Params[0].type = 0; m_BuiltPI_Params[0].flags = 0; for (int i = 0; i < m_Proto.GetNumOfParams(); ++i) { m_BuiltPI_Params[i+1].size = m_Proto.GetParam(i).size; m_BuiltPI_Params[i+1].type = m_Proto.GetParam(i).type; m_BuiltPI_Params[i+1].flags = m_Proto.GetParam(i).flags; m_BuiltPI_Params2[i+1].pNormalCtor = m_Proto.GetParam(i).pNormalCtor; m_BuiltPI_Params2[i+1].pCopyCtor = m_Proto.GetParam(i).pCopyCtor; m_BuiltPI_Params2[i+1].pDtor = m_Proto.GetParam(i).pDtor; m_BuiltPI_Params2[i+1].pAssignOperator = m_Proto.GetParam(i).pAssignOperator; } m_BuiltPI->paramsPassInfo = m_BuiltPI_Params; m_BuiltPI->paramsPassInfo2 = m_BuiltPI_Params2; } std::int32_t x64GenContext::AddVarToFrame(std::int32_t size) { m_HookFunc_FrameOffset -= size; m_HookFunc_FrameVarsSize += size; return m_HookFunc_FrameOffset; } std::int32_t x64GenContext::ComputeVarsSize() { return m_HookFunc_FrameVarsSize; } std::int32_t x64GenContext::GetRealSize(const IntPassInfo& info) { if ((info.flags & PassInfo::PassFlag_ByRef) == PassInfo::PassFlag_ByRef) { return SIZE_PTR; } return static_cast(info.size); } std::int32_t x64GenContext::AlignSize(std::int32_t x, std::int32_t boundary) { if (x % boundary != 0) x = (x & ~(boundary-1)) + boundary; return x; } // Computes size on the stack std::int32_t x64GenContext::GetParamStackSize(const IntPassInfo &info) { // Align up to 8 byte boundaries return AlignSize(GetRealSize(info), SIZE_PTR); } HookManagerPubFunc x64GenContext::Generate() { Clear(); // Check conditions: // -1) good proto version // 0) we don't support unknown passtypes, convention, ... // 1) we don't support functions which return objects by value or take parameters by value // that have a constructor, a destructor or an overloaded assignment op // (we wouldn't know how to call it!) if (m_Proto.GetVersion() < 1) { return nullptr; } // Detect the pass flags (if they're missing) for return and parameters type if (!AutoDetectRetType()) { return nullptr; } AutoDetectParamFlags(); // Calling conventions are gone on x86_64, there's only one to call all functions // however act as if they still exist to avoid code duplication on the user's side // TO-DO: Handle microsoft's vectorcall if ((m_Proto.GetConvention() & (~ProtoInfo::CallConv_HasVafmt)) != ProtoInfo::CallConv_ThisCall) { return nullptr; } // Non void return, ensure we support it if (m_Proto.GetRet().size != 0 && !PassInfoSupported(m_Proto.GetRet(), true)) { return nullptr; } // Ensure we support each param for (int i = 0; i < m_Proto.GetNumOfParams(); ++i) { if (!PassInfoSupported(m_Proto.GetParam(i), false)) { return nullptr; } } BuildProtoInfo(); GenerateHookFunc(); return fastdelegate::detail::horrible_cast(GeneratePubFunc()); } bool x64GenContext::PassInfoSupported(const IntPassInfo& pi, bool is_ret) { if (pi.type != PassInfo::PassType_Basic && pi.type != PassInfo::PassType_Float && pi.type != PassInfo::PassType_Object) { return false; } if (pi.type == PassInfo::PassType_Object && (pi.flags & PassInfo::PassFlag_ByVal)) { if ((pi.flags & PassInfo::PassFlag_CCtor) && !pi.pCopyCtor) { return false; } if ((pi.flags & PassInfo::PassFlag_ODtor) && !pi.pDtor) { return false; } if ((pi.flags & PassInfo::PassFlag_AssignOp) && !pi.pAssignOperator) { return false; } if ((pi.flags & PassInfo::PassFlag_OCtor) && !pi.pNormalCtor) { return false; } } if ((pi.flags & (PassInfo::PassFlag_ByVal | PassInfo::PassFlag_ByRef)) == 0) { return false; // Neither byval nor byref! } return true; } void* x64GenContext::GenerateHookFunc() { const auto& retInfo = m_Proto.GetRet(); //m_HookFunc.breakpoint(); // For the time being, we only consider xmm0-xmm15 registers // are only used to store 64bits worth of data, despite being // able to store up to 128bits // RBP is a general purpose register on x86_64 // we will therefore use it on both linux and windows // to refer to our space in the stack where we grew // *********** stack frame ************* // MSVC ONLY START // rbp + 40 end of shadow space // rbp + 8 start of shadow space // MSVC ONLY END // // rbp - 0 begining of (old) rsp // rbp - 8 saved old rbp value // rbp - 16 vfnptr_origentry // rbp - 24 status // rbp - 32 prev_res // rbp - 40 cur_res // rbp - 48 iter // rbp - 56 context // rbp - 64 this ptr // [Non void functions:] // rbp - 64 - sizeof(returntype) original return // rbp - 64 - sizeof(returntype) * 2 override return // rbp - 64 - sizeof(returntype) * 3 plugin return const std::int8_t v_original_rbp = AddVarToFrame(SIZE_PTR); // -8 const std::int8_t v_vfnptr_origentry = AddVarToFrame(SIZE_PTR); // -16 const std::int8_t v_status = AddVarToFrame(SIZE_PTR /*sizeof(META_RES)*/); // -24 const std::int8_t v_prev_res = AddVarToFrame(SIZE_PTR /*sizeof(META_RES)*/); // -32 const std::int8_t v_cur_res = AddVarToFrame(SIZE_PTR /*sizeof(META_RES)*/); // -40 const std::int8_t v_iter = AddVarToFrame(SIZE_PTR); // -48 const std::int8_t v_pContext = AddVarToFrame(SIZE_PTR); // -56 const std::int8_t v_this = AddVarToFrame(SIZE_PTR); // -64 // Non void return, track the values std::int32_t v_ret_ptr = 0; std::int32_t v_memret_ptr = 0; std::int32_t v_orig_ret = 0; std::int32_t v_override_ret = 0; std::int32_t v_plugin_ret = 0; std::int32_t v_mem_ret = 0; if (m_Proto.GetRet().size != 0) { v_ret_ptr = AddVarToFrame(SIZE_PTR); v_memret_ptr = AddVarToFrame(SIZE_PTR); v_orig_ret = AddVarToFrame(AlignSize(GetParamStackSize(retInfo), 16)); // 16 bytes aligned v_override_ret = AddVarToFrame(AlignSize(GetParamStackSize(retInfo), 16)); v_plugin_ret = AddVarToFrame(AlignSize(GetParamStackSize(retInfo), 16)); v_mem_ret = AddVarToFrame(AlignSize(GetParamStackSize(retInfo), 16)); } std::int32_t stack_frame_size = ComputeVarsSize(); m_HookFunc.sub(rsp, stack_frame_size); // Store rbp where it should be m_HookFunc.mov(rsp(stack_frame_size - SIZE_PTR), rbp); m_HookFunc.lea(rbp, rsp(stack_frame_size)); // MSVC ONLY - Save the registers into shadow space #if SH_COMP == SH_COMP_MSVC const x86_64_Reg params_reg[] = { rcx, rdx, r8, r9 }; const x86_64_FloatReg params_floatreg[] = { xmm0, xmm1, xmm2, xmm3 }; int reg_index = 0; // retrieve this ptr m_HookFunc.mov(rbp(v_this), params_reg[reg_index]); m_HookFunc.mov(rbp(reg_index * 8 + 8), params_reg[reg_index]); reg_index++; // Non standard return size, a ptr has been passed into rcx. Shifting all the parameters if ((retInfo.flags & PassInfo::PassFlag_RetMem) == PassInfo::PassFlag_RetMem) { m_HookFunc.mov(rbp(reg_index * 8 + 8), params_reg[reg_index]); m_HookFunc.mov(rbp(v_memret_ptr), params_reg[reg_index]); reg_index++; } m_HookFunc.mov(rax, m_Proto.GetNumOfParams()); m_HookFunc.mov(rax, reg_index); m_HookFunc.mov(rax, retInfo.size); for (int i = 0; i < m_Proto.GetNumOfParams() && reg_index < 4; reg_index++, i++) { auto& info = m_Proto.GetParam(i); if (info.type == PassInfo::PassType_Float && (info.flags & PassInfo::PassFlag_ByRef) != PassInfo::PassFlag_ByRef) { m_HookFunc.movsd(rbp(reg_index * 8 + 8), params_floatreg[reg_index]); } else { m_HookFunc.mov(rbp(reg_index * 8 + 8), params_reg[reg_index]); } } #else const x86_64_Reg params_reg[] = { rdi, rsi, rdx, rcx, r8, r9 }; int reg_index = 0; m_HookFunc.mov(rbp(v_this), rdi); reg_index++; if ((retInfo.flags & PassInfo::PassFlag_RetMem) == PassInfo::PassFlag_RetMem) { m_HookFunc.mov(rbp(v_memret_ptr), params_reg[reg_index]); reg_index++; } #endif // From this point on, no matter what. RSP should be aligned on 16 bytes boundary // If return value has a constructor, call it if ((retInfo.flags & PassInfo::PassFlag_ByVal) && retInfo.pNormalCtor != nullptr) { std::int32_t v_ret_vals[] = {v_orig_ret, v_override_ret, v_plugin_ret}; for (int i = 0; i < 3; i++) { // Shadow space MSVC_ONLY(m_HookFunc.sub(rsp, 40)); // We need to keep it aligned to 16 bytes on Linux too... GCC_ONLY(m_HookFunc.sub(rsp, 8)); // First param is this MSVC_ONLY(m_HookFunc.lea(rcx, rbp(v_ret_vals[i]))); GCC_ONLY(m_HookFunc.lea(rdi, rbp(v_ret_vals[i]))); // We've saved (or not) r8 value, use the freed register to store function ptr m_HookFunc.mov(r8, reinterpret_cast(retInfo.pNormalCtor)); m_HookFunc.call(r8); // Free Linux stack alignment GCC_ONLY(m_HookFunc.add(rsp, 8)); // Free shadow space MSVC_ONLY(m_HookFunc.add(rsp, 40)); } } m_HookFunc.mov(rbp(v_status), MRES_IGNORED); m_HookFunc.mov(rbp(v_cur_res), MRES_IGNORED); m_HookFunc.mov(rbp(v_prev_res), MRES_IGNORED); // ********************** SetupHookLoop ********************** //PrintDebug(m_HookFunc, "Call - SetupHookLoop\n"); CallSetupHookLoop(v_orig_ret, v_override_ret, v_cur_res, v_prev_res, v_status, v_vfnptr_origentry, v_this, v_pContext); //PrintDebug(m_HookFunc, "Call - SetupHookLoop - END\n"); // ********************** call pre hooks ********************** //PrintDebug(m_HookFunc, "Call - CallHooks [PRE]\n"); GenerateCallHooks(v_status, v_prev_res, v_cur_res, v_iter, v_pContext, v_plugin_ret, v_mem_ret); //PrintDebug(m_HookFunc, "Call - CallHooks [PRE] - END\n"); // ********************** call orig func ********************** //PrintDebug(m_HookFunc, "Call - CallOrig\n"); GenerateCallOrig(v_status, v_pContext, v_this, v_vfnptr_origentry, v_orig_ret, v_override_ret, v_mem_ret); //PrintDebug(m_HookFunc, "Call - CallOrig - END\n"); // ********************** call post hooks ********************** //PrintDebug(m_HookFunc, "Call - Hooks [POST]\n"); GenerateCallHooks(v_status, v_prev_res, v_cur_res, v_iter, v_pContext, v_plugin_ret, v_mem_ret); //PrintDebug(m_HookFunc, "Call - Hooks [POST] - END\n"); // ********************** end context and return ********************** PrepareReturn(v_status, v_pContext, v_ret_ptr); CallEndContext(v_pContext); // Call destructors of byval object params which have a destructor int stack_index = 1; // account this pointer if ((retInfo.flags & PassInfo::PassFlag_RetMem) == PassInfo::PassFlag_RetMem) { // Non trivial return value stack_index++; } for (int i = 0; i < m_Proto.GetNumOfParams(); ++i, ++stack_index) { // Shadow space MSVC_ONLY(m_HookFunc.sub(rsp, 40)); // We need to keep it aligned to 16 bytes on Linux too... GCC_ONLY(m_HookFunc.sub(rsp, 8)); const IntPassInfo &pi = m_Proto.GetParam(i); if (pi.type == PassInfo::PassType_Object && (pi.flags & PassInfo::PassFlag_ODtor) && (pi.flags & PassInfo::PassFlag_ByVal)) { // Every non trivial types are passed as a pointer to a special dedicated space MSVC_ONLY(m_HookFunc.mov(rcx, rbp(8 + stack_index * 8))); GCC_ONLY(m_HookFunc.mov(rdi, rbp(8 + stack_index * 8))); m_HookFunc.mov(rax, reinterpret_cast(pi.pDtor)); m_HookFunc.call(rax); } // Free Linux stack alignment GCC_ONLY(m_HookFunc.add(rsp, 8)); // Free shadow space MSVC_ONLY(m_HookFunc.add(rsp, 40)); } DoReturn(v_ret_ptr, v_memret_ptr); // From then on, rax cannot be used as a general register // Use r8 or r9 instead // If return value type has a destructor, call it if ((retInfo.flags & PassInfo::PassFlag_ByVal) && retInfo.pDtor != nullptr) { std::int32_t v_ret_vals[] = {v_orig_ret, v_override_ret, v_plugin_ret}; // Shadow space MSVC_ONLY(m_HookFunc.sub(rsp, 40)); // We need to keep it aligned to 16 bytes on Linux too... GCC_ONLY(m_HookFunc.sub(rsp, 8)); for (int i = 0; i < 3; i++) { // First param is this MSVC_ONLY(m_HookFunc.lea(rcx, rbp(v_ret_vals[i]))); GCC_ONLY(m_HookFunc.lea(rdi, rbp(v_ret_vals[i]))); m_HookFunc.mov(r8, reinterpret_cast(retInfo.pDtor)); m_HookFunc.call(r8); } // Free Linux stack alignment GCC_ONLY(m_HookFunc.add(rsp, 8)); // Free shadow space MSVC_ONLY(m_HookFunc.add(rsp, 40)); } // Restore rbp m_HookFunc.mov(rbp, rbp(v_original_rbp)); // Free the stack frame m_HookFunc.add(rsp, stack_frame_size); m_HookFunc.retn(); // Store pointer for later use // m_HookfuncVfnPtr is a pointer to a void* because SH expects a pointer // into the hookman's vtable *m_HookfuncVfnptr = reinterpret_cast(m_HookFunc.GetData()); m_HookFunc.SetRE(); return m_HookFunc.GetData(); } void x64GenContext::CallSetupHookLoop(int v_orig_ret, int v_override_ret, int v_cur_res, int v_prev_res, int v_status, int v_vfnptr_origentry, int v_this, int v_pContext) { // IHookContext *shptr->SetupHookLoop(IHookManagerInfo *hi, void *vfnptr, void *thisptr, void **origCallAddr, META_RES *statusPtr, // META_RES *prevResPtr, META_RES *curResPtr, const void *origRetPtr, void *overrideRetPtr); static MemFuncInfo mfi = {false, -1, -1, -1}; if (mfi.vtblindex == -1) { GetFuncInfo(&ISourceHook::SetupHookLoop, mfi); // The function is somehow not virtual, or has a non trivial this ptr if (!mfi.isVirtual || mfi.thisptroffs != 0 || mfi.vtbloffs != 0 || mfi.vtblindex < 0) { mfi.vtblindex = -1; // Ensure we go through there again on subsequent calls SH_ASSERT(0, ("Couldn't retrieve details of ISourceHook::SetupHookLoop!")); return; } } // Allocate the necessary stack space MSVC_ONLY(m_HookFunc.sub(rsp, 88)); // shadow space (32 bytes) + 6 stack arguments (48 bytes) + 8 bytes // TODO: GCC_ONLY(m_HookFunk.sub(rsp, 8 + ?)); // 1st parameter (this) GCC_ONLY(m_HookFunc.mov(rdi, reinterpret_cast(m_SHPtr))); MSVC_ONLY(m_HookFunc.mov(rcx, reinterpret_cast(m_SHPtr))); // 2nd parameter - IHookManagerInfo* hi GCC_ONLY(m_HookFunc.mov(rsi, reinterpret_cast(m_pHI))); GCC_ONLY(m_HookFunc.mov(rsi, rsi())); MSVC_ONLY(m_HookFunc.mov(rdx, reinterpret_cast(m_pHI))); MSVC_ONLY(m_HookFunc.mov(rdx, rdx())); // 3rd parameter - void* vfnptr GCC_ONLY(m_HookFunc.mov(rdx, rbp(v_this))); GCC_ONLY(m_HookFunc.mov(rdx, rdx(m_VtblOffs))); // *(this + m_VtblOffs) GCC_ONLY(m_HookFunc.add(rdx, SIZE_PTR * m_VtblIdx)); // vtable + m_VtblIdx MSVC_ONLY(m_HookFunc.mov(r8, rbp(v_this))); MSVC_ONLY(m_HookFunc.mov(r8, r8(m_VtblOffs))); // *(this + m_VtblOffs) MSVC_ONLY(m_HookFunc.add(r8, SIZE_PTR * m_VtblIdx)); // vtable + m_VtblIdx // 4th parameter - void* thisptr GCC_ONLY(m_HookFunc.mov(rcx, rbp(v_this))); MSVC_ONLY(m_HookFunc.mov(r9, rbp(v_this))); // 5th argument - void** original call address GCC_ONLY(m_HookFunc.lea(r8, rbp(v_vfnptr_origentry))); MSVC_ONLY(m_HookFunc.lea(rax, rbp(v_vfnptr_origentry))); MSVC_ONLY(m_HookFunc.mov(rsp(0x20), rax)); // 6th argument - META_RES* statusPtr GCC_ONLY(m_HookFunc.lea(r9, rbp(v_status))); MSVC_ONLY(m_HookFunc.lea(rax, rbp(v_status))); MSVC_ONLY(m_HookFunc.mov(rsp(0x28), rax)); // 7th argument - META_RES* prevResPtr MSVC_ONLY(m_HookFunc.lea(rax, rbp(v_prev_res))); MSVC_ONLY(m_HookFunc.mov(rsp(0x30), rax)); // 8th argument - META_RES* curResPtr MSVC_ONLY(m_HookFunc.lea(rax, rbp(v_cur_res))); MSVC_ONLY(m_HookFunc.mov(rsp(0x38), rax)); if (m_Proto.GetRet().size == 0) // void return function { // nullptr m_HookFunc.xor_reg(rax, rax); // 9th argument - const void* origRetPtr MSVC_ONLY(m_HookFunc.mov(rsp(0x40), rax)); // 10th argument - void* overrideRetPtr MSVC_ONLY(m_HookFunc.mov(rsp(0x48), rax)); } else { // 9th argument - const void* origRetPtr MSVC_ONLY(m_HookFunc.lea(rax, rbp(v_orig_ret))); MSVC_ONLY(m_HookFunc.mov(rsp(0x40), rax)); // 10th argument - void* overrideRetPtr MSVC_ONLY(m_HookFunc.lea(rax, rbp(v_override_ret))); MSVC_ONLY(m_HookFunc.mov(rsp(0x48), rax)); } // Retrieve the function address m_HookFunc.mov(rax, (*reinterpret_cast(m_SHPtr))[mfi.vtblindex]); m_HookFunc.call(rax); // Store the return value m_HookFunc.mov(rbp(v_pContext), rax); // TODO: GCC_ONLY(m_HookFunc.add(rsp, 8 + ?)); // Restore the rsp value MSVC_ONLY(m_HookFunc.add(rsp, 88)); } // Extension of MAKE_DELEG macro struct IMyDelegate : ::SourceHook::ISHDelegate { virtual void Call() = 0; }; void x64GenContext::GenerateCallHooks(int v_status, int v_prev_res, int v_cur_res, int v_iter, int v_pContext, int v_plugin_ret, int v_mem_ret) { static MemFuncInfo getNext = {false, -1, -1, -1}; if (getNext.vtblindex == -1) { GetFuncInfo(&IHookContext::GetNext, getNext); // The function is somehow not virtual, or has a non trivial this ptr if (!getNext.isVirtual || getNext.thisptroffs != 0 || getNext.vtbloffs != 0 || getNext.vtblindex < 0) { getNext.vtblindex = -1; // Ensure we go through there again on subsequent calls SH_ASSERT(0, ("Unexpected compilation of IHookContext::GetNext!")); return; } } static MemFuncInfo callMfi = {false, -1, -1, -1}; if (callMfi.vtblindex == -1) { GetFuncInfo(&IMyDelegate::Call, callMfi); // The function is somehow not virtual, or has a non trivial this ptr if (!callMfi.isVirtual || callMfi.thisptroffs != 0 || callMfi.vtbloffs != 0 || callMfi.vtblindex < 0) { callMfi.vtblindex = -1; // Ensure we go through there again on subsequent calls SH_ASSERT(0, ("Unexpected compilation of IMyDelegate::Call!")); return; } } static MemFuncInfo getOverrideRetPtrMfi = {false, -1, -1, -1}; if (getOverrideRetPtrMfi.vtblindex == -1) { GetFuncInfo(&IHookContext::GetOverrideRetPtr, getOverrideRetPtrMfi); // The function is somehow not virtual, or has a non trivial this ptr if (!getOverrideRetPtrMfi.isVirtual || getOverrideRetPtrMfi.thisptroffs != 0 || getOverrideRetPtrMfi.vtbloffs != 0 || getOverrideRetPtrMfi.vtblindex < 0) { getOverrideRetPtrMfi.vtblindex = -1; // Ensure we go through there again on subsequent calls SH_ASSERT(0, ("Unexpected compilation of IHookContext::GetOverrideRetPtr!")); return; } } //prev_res = MRES_IGNORED; //while ( (iter = static_cast(pContext->GetNext())) ) //{ // cur_res = MRES_IGNORED; // plugin_ret = iter->Call params; // prev_res = cur_res; // if (cur_res > status) // status = cur_res; // if (cur_res >= MRES_OVERRIDE) // *reinterpret_cast(pContext->GetOverrideRetPtr()) = plugin_ret; //} // prev_res = MRES_IGNORED; //m_HookFunc.breakpoint(); m_HookFunc.mov(rbp(v_prev_res), MRES_IGNORED); auto startLoop = m_HookFunc.get_outputpos(); // while ( (iter = static_cast(pContext->GetNext())) ) m_HookFunc.mov(rax, rbp(v_pContext)); m_HookFunc.mov(rax, rax()); // *this (vtable) m_HookFunc.mov(rax, rax(getNext.vtblindex * SIZE_PTR)); // vtable[vtblindex] // Shadow space 32 bytes + 8 bytes MSVC_ONLY(m_HookFunc.sub(rsp, 40)); // We need to keep it aligned to 16 bytes on Linux too... GCC_ONLY(m_HookFunc.sub(rsp, 8)); GCC_ONLY(m_HookFunc.mov(rdi, rbp(v_pContext))); MSVC_ONLY(m_HookFunc.mov(rcx, rbp(v_pContext))); m_HookFunc.call(rax); // pContext->GetNext() // store into iter m_HookFunc.mov(rbp(v_iter), rax); // Free Linux stack alignment GCC_ONLY(m_HookFunc.add(rsp, 8)); // Free shadow space MSVC_ONLY(m_HookFunc.add(rsp, 40)); // null check iter m_HookFunc.test(rax, rax); m_HookFunc.jz(0x0); // Leave loop if nullptr std::int32_t jumpOff = m_HookFunc.get_outputpos(); // cur_res = MRES_IGNORED; m_HookFunc.mov(rbp(v_cur_res), MRES_IGNORED); // prev_res = cur_res; m_HookFunc.mov(rax, rbp(v_cur_res)); m_HookFunc.mov(rbp(v_prev_res), rax); // call std::int32_t stackSpace = PushParameters(v_iter, MemRetWithTempObj() ? v_mem_ret : v_plugin_ret); m_HookFunc.mov(rax, rbp(v_iter)); m_HookFunc.mov(rax, rax()); // *this (vtable) m_HookFunc.mov(rax, rax(callMfi.vtblindex * SIZE_PTR)); // vtable[vtblindex] iter -> Call m_HookFunc.call(rax); // epilog free the stack m_HookFunc.add(rsp, stackSpace); SaveReturnValue(v_mem_ret, v_plugin_ret); // if (cur_res > status) m_HookFunc.mov(rax, rbp(v_cur_res)); m_HookFunc.cmp(rax, rbp(v_status)); // status = cur_res; m_HookFunc.mov(rax, rbp(v_status)); m_HookFunc.cmovg(rax, rbp(v_cur_res)); m_HookFunc.mov(rbp(v_status), rax); // Are we dealing with a non void function ? auto retInfo = m_Proto.GetRet(); if (retInfo.size != 0) { // if (cur_res >= MRES_OVERRIDE) m_HookFunc.mov(rax, MRES_OVERRIDE); m_HookFunc.cmp(rbp(v_cur_res), rax); m_HookFunc.jl(0x0); std::int32_t earlyLoopBack = m_HookFunc.get_outputpos() - startLoop; m_HookFunc.rewrite(m_HookFunc.get_outputpos() - sizeof(std::int32_t), -earlyLoopBack); // Shadow space 32 bytes + 8 bytes MSVC_ONLY(m_HookFunc.sub(rsp, 40)); // We need to keep it aligned to 16 bytes on Linux too... GCC_ONLY(m_HookFunc.sub(rsp, 8)); m_HookFunc.mov(rax, rbp(v_pContext)); m_HookFunc.mov(rax, rax()); // *this (vtable) m_HookFunc.mov(rax, rax(getOverrideRetPtrMfi.vtblindex * SIZE_PTR)); // vtable[vtblindex] GCC_ONLY(m_HookFunc.mov(rdi, rbp(v_pContext))); MSVC_ONLY(m_HookFunc.mov(rcx, rbp(v_pContext))); m_HookFunc.call(rax); // pContext->GetOverrideRetPtr() // Free Linux stack alignment GCC_ONLY(m_HookFunc.add(rsp, 8)); MSVC_ONLY(m_HookFunc.add(rsp, 40)); // *reinterpret_cast(pContext->GetOverrideRetPtr()) = plugin_ret; // byref is always a pointer underneath if (retInfo.flags & PassInfo::PassFlag_ByRef) { m_HookFunc.mov(r8, rbp(v_plugin_ret)); m_HookFunc.mov(rax(), r8); } else { // custom assignment operator, so call it if (retInfo.pAssignOperator) { // Shadow space 32 bytes + 8 bytes MSVC_ONLY(m_HookFunc.sub(rsp, 40)); // We need to keep it aligned to 16 bytes on Linux too... GCC_ONLY(m_HookFunc.sub(rsp, 8)); // 1st parameter (this) GCC_ONLY(m_HookFunc.mov(rdi, rax)); MSVC_ONLY(m_HookFunc.mov(rcx, rax)); // 2nd parameter (copy) GCC_ONLY(m_HookFunc.lea(rsi, rbp(v_plugin_ret))); MSVC_ONLY(m_HookFunc.lea(rdx, rbp(v_plugin_ret))); // Move address and call m_HookFunc.mov(rax, reinterpret_cast(retInfo.pAssignOperator)); m_HookFunc.call(rax); // Free Linux stack alignment GCC_ONLY(m_HookFunc.add(rsp, 8)); // Free shadow space MSVC_ONLY(m_HookFunc.add(rsp, 40)); } else { m_HookFunc.push(rdi); m_HookFunc.push(rsi); m_HookFunc.push(rcx); m_HookFunc.mov(rcx, retInfo.size); m_HookFunc.mov(rdi, rax); m_HookFunc.lea(rsi, rbp(v_plugin_ret)); m_HookFunc.rep_movs_bytes(); m_HookFunc.pop(rcx); m_HookFunc.pop(rsi); m_HookFunc.pop(rdi); } } } m_HookFunc.jump(0x0); std::int32_t loopBack = m_HookFunc.get_outputpos() - startLoop; m_HookFunc.rewrite(m_HookFunc.get_outputpos() - sizeof(std::int32_t), -loopBack); m_HookFunc.rewrite(jumpOff - sizeof(std::int32_t), m_HookFunc.get_outputpos() - jumpOff); } void x64GenContext::GenerateCallOrig(int v_status, int v_pContext, int v_this, int v_vfnptr_origentry, int v_orig_ret, int v_override_ret, int v_place_for_memret) { static MemFuncInfo shouldCallOrigMfi = {false, -1, -1, -1}; if (shouldCallOrigMfi.vtblindex == -1) { GetFuncInfo(&IHookContext::ShouldCallOrig, shouldCallOrigMfi); // The function is somehow not virtual, or has a non trivial this ptr if (!shouldCallOrigMfi.isVirtual || shouldCallOrigMfi.thisptroffs != 0 || shouldCallOrigMfi.vtbloffs != 0 || shouldCallOrigMfi.vtblindex < 0) { shouldCallOrigMfi.vtblindex = -1; // Ensure we go through there again on subsequent calls SH_ASSERT(0, ("Unexpected compilation of IHookContext::ShouldCallOrig!")); return; } } //if (status != MRES_SUPERCEDE && pContext->ShouldCallOrig()) //{ // rettype (EmptyClass::*mfp)paramtypes; // SH_SETUP_MFP(mfp); // orig_ret = (reinterpret_cast(this)->*mfp)params; //} //else // orig_ret = override_ret; //m_HookFunc.breakpoint(); m_HookFunc.mov(rax, rbp(v_status)); m_HookFunc.cmp(rax, MRES_SUPERCEDE); m_HookFunc.je(0x0); auto statusCmpOff = m_HookFunc.get_outputpos(); // Shadow space 32 bytes + 8 bytes MSVC_ONLY(m_HookFunc.sub(rsp, 40)); // We need to keep it aligned to 16 bytes on Linux too... GCC_ONLY(m_HookFunc.sub(rsp, 8)); m_HookFunc.mov(rax, rbp(v_pContext)); // 1st parameter (this) GCC_ONLY(m_HookFunc.mov(rdi, rax)); MSVC_ONLY(m_HookFunc.mov(rcx, rax)); m_HookFunc.mov(rax, rax()); m_HookFunc.mov(rax, rax(SIZE_PTR * shouldCallOrigMfi.vtblindex)); m_HookFunc.call(rax); // pContext->ShouldCallOrig() // Free Linux stack alignment GCC_ONLY(m_HookFunc.add(rsp, 8)); // Free shadow space MSVC_ONLY(m_HookFunc.add(rsp, 40)); // Don't have the lower register yet, so this will do for now m_HookFunc.test(rax, 0x1); m_HookFunc.jz(0x0); auto shouldCallOff = m_HookFunc.get_outputpos(); // original call std::int32_t stackSpace = PushParameters(v_this, MemRetWithTempObj() ? v_place_for_memret : v_orig_ret); m_HookFunc.mov(rax, rbp(v_vfnptr_origentry)); m_HookFunc.call(rax); // epilog free the stack m_HookFunc.add(rsp, stackSpace); SaveReturnValue(v_place_for_memret, v_orig_ret); m_HookFunc.jump(0x0); auto callOriginalOff = m_HookFunc.get_outputpos(); // else auto elseStartOff = m_HookFunc.get_outputpos(); m_HookFunc.rewrite(statusCmpOff - sizeof(std::int32_t), static_cast(elseStartOff - statusCmpOff)); m_HookFunc.rewrite(shouldCallOff - sizeof(std::int32_t), static_cast(elseStartOff - shouldCallOff)); auto retInfo = m_Proto.GetRet(); if (retInfo.size != 0) { if (retInfo.flags & PassInfo::PassFlag_ByRef) { m_HookFunc.mov(rax, rbp(v_override_ret)); m_HookFunc.mov(rbp(v_orig_ret), rax); } else { // custom assignment operator, so call it if (retInfo.pAssignOperator) { // Shadow space 32 bytes + 8 bytes MSVC_ONLY(m_HookFunc.sub(rsp, 40)); // We need to keep it aligned to 16 bytes on Linux too... GCC_ONLY(m_HookFunc.sub(rsp, 8)); // 1st parameter (this) GCC_ONLY(m_HookFunc.lea(rdi, rbp(v_orig_ret))); MSVC_ONLY(m_HookFunc.lea(rcx, rbp(v_orig_ret))); // 2nd parameter (copy) GCC_ONLY(m_HookFunc.lea(rsi, rbp(v_override_ret))); MSVC_ONLY(m_HookFunc.lea(rdx, rbp(v_override_ret))); // Move address and call m_HookFunc.mov(rax, reinterpret_cast(retInfo.pAssignOperator)); m_HookFunc.call(rax); // Free Linux stack alignment GCC_ONLY(m_HookFunc.add(rsp, 8)); // Free shadow space MSVC_ONLY(m_HookFunc.add(rsp, 40)); } else { m_HookFunc.push(rdi); m_HookFunc.push(rsi); m_HookFunc.push(rcx); m_HookFunc.mov(rcx, retInfo.size); m_HookFunc.lea(rdi, rbp(v_orig_ret)); m_HookFunc.lea(rsi, rbp(v_override_ret)); m_HookFunc.rep_movs_bytes(); m_HookFunc.pop(rcx); m_HookFunc.pop(rsi); m_HookFunc.pop(rdi); } } } m_HookFunc.rewrite(callOriginalOff - sizeof(std::int32_t), static_cast(m_HookFunc.get_outputpos() - callOriginalOff)); } std::int32_t x64GenContext::PushParameters(int v_this, int v_ret) { auto retInfo = m_Proto.GetRet(); std::int32_t stackSpace = 0; #if SH_COMP == SH_COMP_MSVC const x86_64_Reg params_reg[] = { rcx, rdx, r8, r9 }; const x86_64_FloatReg params_floatreg[] = { xmm0, xmm1, xmm2, xmm3 }; int reg_index = 0; // setup this parameter m_HookFunc.mov(params_reg[reg_index], rbp(v_this)); reg_index++; // Non standard return if (retInfo.size != 0 && (retInfo.flags & PassInfo::PassFlag_RetMem) == PassInfo::PassFlag_RetMem) { m_HookFunc.lea(params_reg[reg_index], rbp(v_ret)); reg_index++; } // We've backed up the parameters into the shadow space int parameter_index = 0; for (; parameter_index < m_Proto.GetNumOfParams() && reg_index < 4; reg_index++, parameter_index++) { auto& info = m_Proto.GetParam(parameter_index); if (info.type == PassInfo::PassType_Float && (info.flags & PassInfo::PassFlag_ByRef) != PassInfo::PassFlag_ByRef) { m_HookFunc.movsd(params_floatreg[reg_index], rbp(reg_index * 8 + 8)); } else { m_HookFunc.mov(params_reg[reg_index], rbp(reg_index * 8 + 8)); } } // Allocate the shadow space m_HookFunc.sub(rsp, 32); stackSpace += 32; int parameters_on_stack = m_Proto.GetNumOfParams() - parameter_index; m_HookFunc.sub(rsp, parameters_on_stack * 8); stackSpace += parameters_on_stack * 8; // If this number is even we need to allocate an extra 8 bytes if (parameters_on_stack % 2 == 0) { m_HookFunc.sub(rsp, 8); stackSpace += 8; } for (int i = 0; parameter_index < m_Proto.GetNumOfParams(); parameter_index++, i++) { m_HookFunc.mov(rax, rbp(40 + (8 * i))); // We need to skip the shadow space + return address m_HookFunc.mov(rsp(32 + (8 * i)), rax); } return stackSpace; #else const x86_64_Reg params_reg[] = { rdi, rsi, rdx, rcx, r8, r9 }; const x86_64_FloatReg params_floatreg[] = { xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 }; const std::uint8_t num_reg = sizeof(params_reg) / sizeof(params_reg[0]); const std::uint8_t num_floatreg = sizeof(params_floatreg) / sizeof(params_floatreg[0]); int reg_index = 0; int floatreg_index = 0; // setup this parameter m_HookFunc.mov(params_reg[reg_index], rbp(v_this)); reg_index++; // Non standard return if (retInfo.size != 0 && (retInfo.flags & PassInfo::PassFlag_RetMem) == PassInfo::PassFlag_RetMem) { m_HookFunc.lea(params_reg[reg_index], rbp(v_ret)); reg_index++; } // TODO: Doesn't handle custom_register at all........ int parameters_on_stack = 0; // Pass to calculate stack space... for (int i = 0, tmp_reg_index = reg_index, tmp_floatreg_index = floatreg_index; i < m_Proto.GetNumOfParams(); i++) { auto& info = m_Proto.GetParam(i); if (info.type == PassInfo::PassType_Float && (info.flags & PassInfo::PassFlag_ByRef) != PassInfo::PassFlag_ByRef) { if (++tmp_floatreg_index >= num_floatreg) { parameters_on_stack++; } } else { if (++tmp_reg_index >= num_reg) { parameters_on_stack++; } } } stackSpace = AlignSize(parameters_on_stack * 8, 16); m_HookFunc.sub(rsp, stackSpace); // Actually push registers to stack... for (int i = 0, pushed_stack_parameters = 0; i < m_Proto.GetNumOfParams(); i++) { auto& info = m_Proto.GetParam(i); if (info.type == PassInfo::PassType_Float && (info.flags & PassInfo::PassFlag_ByRef) != PassInfo::PassFlag_ByRef) { if (++floatreg_index >= num_floatreg) { m_HookFunc.mov(rax, rbp(8 + (8 * pushed_stack_parameters))); m_HookFunc.mov(rsp(0 + (8 * pushed_stack_parameters)), rax); pushed_stack_parameters++; } } else { if (++reg_index >= num_reg) { m_HookFunc.mov(rax, rbp(8 + (8 * pushed_stack_parameters))); m_HookFunc.mov(rsp(0 + (8 * pushed_stack_parameters)), rax); pushed_stack_parameters++; } } } return stackSpace; #endif } void x64GenContext::SaveReturnValue(int v_mem_ret, int v_ret) { const auto& retInfo = m_Proto.GetRet(); // void, early return if (retInfo.size == 0) { return; } if ((retInfo.flags & PassInfo::PassFlag_ByRef) == PassInfo::PassFlag_ByRef) { m_HookFunc.mov(rbp(v_ret), rax); return; } // ByVal if (retInfo.type == PassInfo::PassType_Float) { m_HookFunc.movsd(rbp(v_ret), xmm0); } else if (retInfo.type == PassInfo::PassType_Basic) { m_HookFunc.mov(rbp(v_ret), rax); } else if ((retInfo.flags & PassInfo::PassFlag_RetMem) == PassInfo::PassFlag_RetMem) { if (MemRetWithTempObj()) { if (retInfo.pAssignOperator) { // Shadow space 32 bytes + 8 bytes MSVC_ONLY(m_HookFunc.sub(rsp, 40)); // We need to keep it aligned to 16 bytes on Linux too... GCC_ONLY(m_HookFunc.sub(rsp, 8)); // 1st parameter (this) GCC_ONLY(m_HookFunc.lea(rdi, rbp(v_ret))); MSVC_ONLY(m_HookFunc.lea(rcx, rbp(v_ret))); // 2nd parameter (copy) GCC_ONLY(m_HookFunc.lea(rsi, rbp(v_mem_ret))); MSVC_ONLY(m_HookFunc.lea(rdx, rbp(v_mem_ret))); // Move address and call m_HookFunc.mov(rax, reinterpret_cast(retInfo.pAssignOperator)); m_HookFunc.call(rax); // Free Linux stack alignment GCC_ONLY(m_HookFunc.add(rsp, 8)); // Free shadow space MSVC_ONLY(m_HookFunc.add(rsp, 40)); } else { m_HookFunc.push(rdi); m_HookFunc.push(rsi); m_HookFunc.push(rcx); m_HookFunc.mov(rcx, retInfo.size); m_HookFunc.lea(rdi, rbp(v_ret)); m_HookFunc.lea(rsi, rbp(v_mem_ret)); m_HookFunc.rep_movs_bytes(); m_HookFunc.pop(rcx); m_HookFunc.pop(rsi); m_HookFunc.pop(rdi); } if (retInfo.pDtor) { // Shadow space 32 bytes + 8 bytes MSVC_ONLY(m_HookFunc.sub(rsp, 40)); // We need to keep it aligned to 16 bytes on Linux too... GCC_ONLY(m_HookFunc.sub(rsp, 8)); // 1st parameter (this) GCC_ONLY(m_HookFunc.lea(rdi, rbp(v_mem_ret))); MSVC_ONLY(m_HookFunc.lea(rcx, rbp(v_mem_ret))); // Move address and call m_HookFunc.mov(rax, reinterpret_cast(retInfo.pDtor)); m_HookFunc.call(rax); // Free Linux stack alignment GCC_ONLY(m_HookFunc.add(rsp, 8)); // Free shadow space MSVC_ONLY(m_HookFunc.add(rsp, 40)); } } else { // Already copied in the proper location return; } } else { SH_ASSERT(0, ("Unknown handling of return type!")); return; } } void x64GenContext::PrepareReturn(int v_status, int v_pContext, int v_retptr) { const auto& retInfo = m_Proto.GetRet(); if (retInfo.size == 0) { return; } static MemFuncInfo getOverrideRetPtrMfi = {false, -1, -1, -1}; if (getOverrideRetPtrMfi.vtblindex == -1) { GetFuncInfo(&IHookContext::GetOverrideRetPtr, getOverrideRetPtrMfi); if (!getOverrideRetPtrMfi.isVirtual || getOverrideRetPtrMfi.thisptroffs != 0 || getOverrideRetPtrMfi.vtbloffs != 0 || getOverrideRetPtrMfi.vtblindex < 0) { getOverrideRetPtrMfi.vtblindex = -1; SH_ASSERT(0, ("Unexpected compilation of IHookContext::GetOverrideRetPtr!")); return; } } static MemFuncInfo getOrigRetPtrMfi = {false, -1, -1, -1}; if (getOrigRetPtrMfi.vtblindex == -1) { GetFuncInfo(&IHookContext::GetOrigRetPtr, getOrigRetPtrMfi); if (!getOrigRetPtrMfi.isVirtual || getOrigRetPtrMfi.thisptroffs != 0 || getOrigRetPtrMfi.vtbloffs != 0 || getOrigRetPtrMfi.vtblindex < 0) { getOrigRetPtrMfi.vtblindex = -1; SH_ASSERT(0, ("Unexpected compilation of IHookContext::GetOverrideRetPtr!")); return; } } //const my_rettype *retptr = reinterpret_cast( //(status >= MRES_OVERRIDE) ? pContext->GetOverrideRetPtr() : pContext->GetOrigRetPtr()); m_HookFunc.mov(rax, rbp(v_pContext)); m_HookFunc.mov(rax, rax()); m_HookFunc.mov(r8, rax); m_HookFunc.mov(rax, rax(getOrigRetPtrMfi.vtblindex * SIZE_PTR)); m_HookFunc.mov(r8, r8(getOverrideRetPtrMfi.vtblindex * SIZE_PTR)); m_HookFunc.xor_reg(r9, r9); m_HookFunc.mov(r9, rbp(v_status)); m_HookFunc.cmp(r9, MRES_OVERRIDE); m_HookFunc.cmovge(rax, r8); // Shadow space 32 bytes + 8 bytes MSVC_ONLY(m_HookFunc.sub(rsp, 40)); // We need to keep it aligned to 16 bytes on Linux too... GCC_ONLY(m_HookFunc.sub(rsp, 8)); // 1st parameter (this) GCC_ONLY(m_HookFunc.mov(rdi, rbp(v_pContext))); MSVC_ONLY(m_HookFunc.mov(rcx, rbp(v_pContext))); m_HookFunc.call(rax); // Free Linux stack alignment GCC_ONLY(m_HookFunc.add(rsp, 8)); // Free shadow space MSVC_ONLY(m_HookFunc.add(rsp, 40)); m_HookFunc.mov(rbp(v_retptr), rax); } void x64GenContext::DoReturn(int v_retptr, int v_memret_outaddr) { const auto& retInfo = m_Proto.GetRet(); if (retInfo.size == 0) { return; } m_HookFunc.mov(r8, rbp(v_retptr)); if (retInfo.flags & PassInfo::PassFlag_ByRef) { m_HookFunc.mov(rax, r8()); return; } // else: byval if (retInfo.type == PassInfo::PassType_Float) { m_HookFunc.movsd(xmm0, r8()); } else if (retInfo.type == PassInfo::PassType_Basic || ((retInfo.type == PassInfo::PassType_Object) && (retInfo.flags & PassInfo::PassFlag_RetReg)) ) { m_HookFunc.mov(rax, r8()); } if (retInfo.flags & PassInfo::PassFlag_RetMem) { // *memret_outaddr = plugin_ret if (retInfo.pCopyCtor) { // Shadow space 32 bytes + 8 bytes MSVC_ONLY(m_HookFunc.sub(rsp, 40)); // We need to keep it aligned to 16 bytes on Linux too... GCC_ONLY(m_HookFunc.sub(rsp, 8)); // 1st parameter (this) GCC_ONLY(m_HookFunc.mov(rdi, rbp(v_memret_outaddr))); MSVC_ONLY(m_HookFunc.mov(rcx, rbp(v_memret_outaddr))); // 2nd parameter (copy) GCC_ONLY(m_HookFunc.mov(rsi, r8)); MSVC_ONLY(m_HookFunc.mov(rdx, r8)); // Move address and call m_HookFunc.mov(rax, reinterpret_cast(retInfo.pCopyCtor)); m_HookFunc.call(rax); // Free Linux stack alignment GCC_ONLY(m_HookFunc.add(rsp, 8)); // Free shadow space MSVC_ONLY(m_HookFunc.add(rsp, 40)); } else { m_HookFunc.push(rdi); m_HookFunc.push(rsi); m_HookFunc.push(rcx); m_HookFunc.mov(rcx, retInfo.size); m_HookFunc.mov(rdi, rbp(v_memret_outaddr)); m_HookFunc.mov(rsi, r8); m_HookFunc.rep_movs_bytes(); m_HookFunc.pop(rcx); m_HookFunc.pop(rsi); m_HookFunc.pop(rdi); } m_HookFunc.mov(rax, rbp(v_memret_outaddr)); } } void x64GenContext::CallEndContext(int v_pContext) { static MemFuncInfo mfi = {false, -1, -1, -1}; if (mfi.vtblindex == -1) { GetFuncInfo(&ISourceHook::EndContext, mfi); // The function is somehow not virtual, or has a non trivial this ptr if (!mfi.isVirtual || mfi.thisptroffs != 0 || mfi.vtbloffs != 0 || mfi.vtblindex < 0) { mfi.vtblindex = -1; // Ensure we go through there again on subsequent calls SH_ASSERT(0, ("Couldn't retrieve details of ISourceHook::EndContext!")); return; } } // Shadow space 32 bytes + 8 bytes MSVC_ONLY(m_HookFunc.sub(rsp, 40)); // We need to keep it aligned to 16 bytes on Linux too... GCC_ONLY(m_HookFunc.sub(rsp, 8)); // 1st parameter (this) GCC_ONLY(m_HookFunc.mov(rdi, reinterpret_cast(m_SHPtr))); MSVC_ONLY(m_HookFunc.mov(rcx, reinterpret_cast(m_SHPtr))); // 2nd param GCC_ONLY(m_HookFunc.mov(rsi, rbp(v_pContext))); MSVC_ONLY(m_HookFunc.mov(rdx, rbp(v_pContext))); // Move address and call m_HookFunc.mov(rax, (*reinterpret_cast(m_SHPtr))[mfi.vtblindex]); m_HookFunc.call(rax); // Free Linux stack alignment GCC_ONLY(m_HookFunc.add(rsp, 8)); // Free shadow space MSVC_ONLY(m_HookFunc.add(rsp, 40)); } bool x64GenContext::MemRetWithTempObj() { const auto& retInfo = m_Proto.GetRet(); // Memory return AND (has destructor OR has assign operator) return ((retInfo.flags & PassInfo::PassFlag_RetMem) && (retInfo.flags & (PassInfo::PassFlag_ODtor | PassInfo::PassFlag_AssignOp))); } bool x64GenContext::AutoDetectRetType() { auto& pi = m_Proto.GetRet(); // Void return, ignore if (pi.size == 0) { return true; } // Only relevant for byval types if (pi.flags & PassInfo::PassFlag_ByVal) { // Basic + float: if (pi.type == PassInfo::PassType_Basic || pi.type == PassInfo::PassType_Float) { // <= 8 bytes: // _always_ in registers, no matter what the user says if (pi.size <= 8) { pi.flags &= ~PassInfo::PassFlag_RetMem; pi.flags |= PassInfo::PassFlag_RetReg; } else { // Does this even exist? No idea, if it does: in memory! pi.flags &= ~PassInfo::PassFlag_RetReg; pi.flags |= PassInfo::PassFlag_RetMem; } } // Object: else if (pi.type == PassInfo::PassType_Object) { // If the user says nothing, auto-detect if ((pi.flags & (PassInfo::PassFlag_RetMem | PassInfo::PassFlag_RetReg)) == 0) { bool hasSpecialFunctions = (pi.flags & (PassInfo::PassFlag_OCtor|PassInfo::PassFlag_ODtor|PassInfo::PassFlag_CCtor)) != 0; #if SH_COMP == SH_COMP_MSVC // MSVC has various criteria for passing in memory // if object doesn't fit on 8, 16, 32, or 64 bits. It's in memory // if object has a constructor or destructor. It's in memory bool unconventionalsize = (pi.size == 3 || (pi.size != 8 && pi.size > 4)); if (unconventionalsize || hasSpecialFunctions) { pi.flags |= PassInfo::PassFlag_RetMem; } else { pi.flags |= PassInfo::PassFlag_RetReg; } #elif SH_COMP == SH_COMP_GCC // "If the size of an object is larger than eight eightbytes, or it contains unaligned fields, it has class MEMORY". // // "If a C++ object is non-trivial for the purpose of calls, as specified in the C++ ABI[16], it is passed by invisible reference (the object is replaced in the parameter list by a pointer that has class INTEGER)[17]." // "[17]An object whose type is non-trivial for the purpose of calls cannot be passed by value because such objects must have the same address in the caller and the callee. Similar issues apply when returning an object from a function." // // source: System V AMD64 psABI section 3.2.3 Parameter Passing // https://gitlab.com/x86-psABIs/x86-64-ABI/-/jobs/artifacts/master/raw/x86-64-ABI/abi.pdf?job=build) // // "A type is considered non-trivial for the purposes of call if: // - it has a non-trivial copy constructor, move constructor, or destructor, or // - all of its copy and move constructors are deleted." // source: https://itanium-cxx-abi.github.io/cxx-abi/abi.html (yes, System V copied this definition from Itanium...) // // // typedef struct __attribute__((packed)) { char a; int b; } thing; // = memory (5 bytes) // // typedef struct __attribute__((packed)) { int a; char b; } thing; // = register (5 bytes) // // typedef struct __attribute__((packed)) { char a; short b; char c; } thing; // = memory (6 bytes) // // typedef struct __attribute__((packed)) { char a; short b; int c; char d; } thing; // = memory (8 bytes) // // // Result: we cannot detect if it should be register or memory without knowing the layout of the object. bool tooBig = (pi.size > (8 * 8)); if (hasSpecialFunctions || tooBig) { pi.flags |= PassInfo::PassFlag_RetMem; return true; } else { return false; } #endif } } } else { // byref: make sure that the flag is _not_ set pi.flags &= ~PassInfo::PassFlag_RetMem; pi.flags |= PassInfo::PassFlag_RetReg; } return true; } void x64GenContext::AutoDetectParamFlags() { } void* x64GenContext::GeneratePubFunc() { // The pubfunc is a static cdecl function. // C Code: // int HookManPubFunc( // bool store, // rdi (AMD) rcx (microsoft) // IHookManagerInfo *hi // rsi (AMD) rdx (microsoft) // ) // { // if (store) // *m_pHI = hi; // if (hi) // hi->SetInfo(HOOKMAN_VERSION, m_VtblOffs, m_VtblIdx, m_Proto.GetProto(), m_HookfuncVfnptr) // } // prologue MSVC_ONLY(m_PubFunc.sub(rsp, 0x38)); // Shadow space 32 bytes + 2 * 8 bytes (for our parameters) + 8 bytes // TODO: GCC_ONLY(m_PubFunc.sub(rsp, 8+?)); // Frame pointer! We like working callstacks when debugging crashes! // TODO: Might mean we don't have to `sub rsp, 8`? GCC_ONLY(m_PubFunc.push(rbp)); GCC_ONLY(m_PubFunc.mov(rbp, rsp)); // Both Microsoft and AMD uses r8 and r9 as argument parameters // Therefore they need not to be preserved across function calls // Let's use them as local variables, this will make writing the // rest of the function much easier // Store 'store' into r8 GCC_ONLY(m_PubFunc.mov(r8, rdi)); MSVC_ONLY(m_PubFunc.mov(r8, rcx)); // Store 'hi' into r9 GCC_ONLY(m_PubFunc.mov(r9, rsi)); MSVC_ONLY(m_PubFunc.mov(r9, rdx)); // If 'store' is true, store hi into rax m_PubFunc.test(r8, 0x1); m_PubFunc.jz(0x0); auto storeOff = m_PubFunc.get_outputpos(); m_PubFunc.mov(rax, reinterpret_cast(m_pHI)); m_PubFunc.mov(rax(), r9); m_PubFunc.rewrite(storeOff - sizeof(std::int32_t), m_PubFunc.get_outputpos() - storeOff); // If 'hi' is not null, call SetInfo m_PubFunc.test(r9, r9); m_PubFunc.jz(0x0); // We will write the real offset later auto jumpOff = m_PubFunc.get_outputpos(); static MemFuncInfo mfi = {false, -1, -1, -1}; if (mfi.vtblindex == -1) { GetFuncInfo(&IHookManagerInfo::SetInfo, mfi); // The function is somehow not virtual, or has a non trivial this ptr if (!mfi.isVirtual || mfi.thisptroffs != 0 || mfi.vtbloffs != 0 || mfi.vtblindex < 0) { mfi.vtblindex = -1; // Ensure we go through there again on subsequent calls return nullptr; } } // Obtain the vtable m_PubFunc.mov(rax, r9()); m_PubFunc.mov(rax, rax(SIZE_PTR * mfi.vtblindex)); // 1st parameter (this) GCC_ONLY(m_PubFunc.mov(rdi, r9)); MSVC_ONLY(m_PubFunc.mov(rcx, r9)); // 2nd parameter GCC_ONLY(m_PubFunc.mov(rsi, SH_HOOKMAN_VERSION)); MSVC_ONLY(m_PubFunc.mov(rdx, SH_HOOKMAN_VERSION)); // 3rd parameter GCC_ONLY(m_PubFunc.mov(rdx, m_VtblOffs)); MSVC_ONLY(m_PubFunc.mov(r8, m_VtblOffs)); // 4th parameter GCC_ONLY(m_PubFunc.mov(rcx, m_VtblIdx)); MSVC_ONLY(m_PubFunc.mov(r9, m_VtblIdx)); // 5th argument GCC_ONLY(m_PubFunc.mov(r8, reinterpret_cast(m_BuiltPI))); MSVC_ONLY(m_PubFunc.mov(r10, reinterpret_cast(m_BuiltPI))); MSVC_ONLY(m_PubFunc.mov(rsp(0x20), r10)); // 6th argument GCC_ONLY(m_PubFunc.mov(r9, reinterpret_cast(m_HookfuncVfnptr))); MSVC_ONLY(m_PubFunc.mov(r10, reinterpret_cast(m_HookfuncVfnptr))); MSVC_ONLY(m_PubFunc.mov(rsp(0x28), r10)); m_PubFunc.call(rax); // Now that we've written the conditional branch // we can move set the offset at our earlier jump std::int32_t endOff = static_cast(m_PubFunc.get_outputpos()) - jumpOff; m_PubFunc.rewrite(jumpOff - sizeof(std::int32_t), endOff); // epilogue // Free Linux stack alignment // TODO: GCC_ONLY(m_HookFunc.add(rsp, 8 + ?)); // Free shadow space & parameter space & stack alignment MSVC_ONLY(m_PubFunc.add(rsp, 0x38)); GCC_ONLY(m_PubFunc.pop(rbp)); // Return 0 m_PubFunc.xor_reg(rax, rax); m_PubFunc.retn(); m_PubFunc.SetRE(); return m_PubFunc; } HookManagerPubFunc x64GenContext::GetPubFunc() { if (m_GeneratedPubFunc == nullptr) { // Try generating the function m_GeneratedPubFunc = Generate(); } return m_GeneratedPubFunc; } bool x64GenContext::Equal(const CProto &proto, int vtbl_offs, int vtbl_idx) { return (m_OrigProto.ExactlyEqual(proto) && m_VtblOffs == vtbl_offs && m_VtblIdx == vtbl_idx); } bool x64GenContext::Equal(HookManagerPubFunc other) { return m_GeneratedPubFunc == other; } } }