1595 lines
44 KiB
Plaintext
1595 lines
44 KiB
Plaintext
|
/* vim: ts=4 sw=4 sts=4 et tw=78
|
||
|
* Copyright (c) 2011 James R. McKaskill. See license in ffi.h
|
||
|
*/
|
||
|
|.if X64
|
||
|
|.arch x64
|
||
|
|.else
|
||
|
|.arch x86
|
||
|
|.endif
|
||
|
|
||
|
|.actionlist build_actionlist
|
||
|
|.globalnames globnames
|
||
|
|.externnames extnames
|
||
|
|
||
|
|.if not X64
|
||
|
|.define RET_H, edx // for int64_t returns
|
||
|
|.define RET_L, eax
|
||
|
|.endif
|
||
|
|
||
|
|.if X64WIN
|
||
|
|
|
||
|
|.macro call_rrrp, func, arg0, arg1, arg2, arg3
|
||
|
| mov64 r9, arg3
|
||
|
| mov r8, arg2
|
||
|
| mov rdx, arg1
|
||
|
| mov rcx, arg0
|
||
|
| call func
|
||
|
|.endmacro
|
||
|
|.macro call_rrrr, func, arg0, arg1, arg2, arg3
|
||
|
| mov r9, arg3
|
||
|
| mov r8, arg2
|
||
|
| mov rdx, arg1
|
||
|
| mov rcx, arg0
|
||
|
| call func
|
||
|
|.endmacro
|
||
|
|
|
||
|
|.macro call_rrp, func, arg0, arg1, arg2
|
||
|
| mov64 r8, arg2
|
||
|
| mov rdx, arg1
|
||
|
| mov rcx, arg0
|
||
|
| call func
|
||
|
|.endmacro
|
||
|
|.macro call_rrr, func, arg0, arg1, arg2
|
||
|
| mov r8, arg2
|
||
|
| mov rdx, arg1
|
||
|
| mov rcx, arg0
|
||
|
| call func
|
||
|
|.endmacro
|
||
|
|
|
||
|
|.macro call_rp, func, arg0, arg1
|
||
|
| mov64 rdx, arg1
|
||
|
| mov rcx, arg0
|
||
|
| call func
|
||
|
|.endmacro
|
||
|
|.macro call_rr, func, arg0, arg1
|
||
|
| mov rdx, arg1
|
||
|
| mov rcx, arg0
|
||
|
| call func
|
||
|
|.endmacro
|
||
|
|
|
||
|
|.macro call_r, func, arg0
|
||
|
| mov rcx, arg0
|
||
|
| call func
|
||
|
|.endmacro
|
||
|
|
|
||
|
|.elif X64
|
||
|
|
|
||
|
| // the 5 and 6 arg forms are only used on posix x64
|
||
|
|.macro call_rrrrrr, func, arg0, arg1, arg2, arg3, arg4, arg5
|
||
|
| mov r9, arg5
|
||
|
| mov r8, arg4
|
||
|
| mov rcx, arg3
|
||
|
| mov rdx, arg2
|
||
|
| mov rsi, arg1
|
||
|
| mov rdi, arg0
|
||
|
| call func
|
||
|
|.endmacro
|
||
|
|.macro call_rrrrr, func, arg0, arg1, arg2, arg3, arg4
|
||
|
| mov r8, arg4
|
||
|
| mov rcx, arg3
|
||
|
| mov rdx, arg2
|
||
|
| mov rsi, arg1
|
||
|
| mov rdi, arg0
|
||
|
| call func
|
||
|
|.endmacro
|
||
|
|
|
||
|
|.macro call_rrrp, func, arg0, arg1, arg2, arg3
|
||
|
| mov64 rcx, arg3
|
||
|
| mov rdx, arg2
|
||
|
| mov rsi, arg1
|
||
|
| mov rdi, arg0
|
||
|
| call func
|
||
|
|.endmacro
|
||
|
|.macro call_rrrr, func, arg0, arg1, arg2, arg3
|
||
|
| mov rcx, arg3
|
||
|
| mov rdx, arg2
|
||
|
| mov rsi, arg1
|
||
|
| mov rdi, arg0
|
||
|
| call func
|
||
|
|.endmacro
|
||
|
|
|
||
|
|.macro call_rrp, func, arg0, arg1, arg2
|
||
|
| mov64 rdx, arg2
|
||
|
| mov rsi, arg1
|
||
|
| mov rdi, arg0
|
||
|
| call func
|
||
|
|.endmacro
|
||
|
|.macro call_rrr, func, arg0, arg1, arg2
|
||
|
| mov rdx, arg2
|
||
|
| mov rsi, arg1
|
||
|
| mov rdi, arg0
|
||
|
| call func
|
||
|
|.endmacro
|
||
|
|
|
||
|
|.macro call_rp, func, arg0, arg1
|
||
|
| mov64 rsi, arg1
|
||
|
| mov rdi, arg0
|
||
|
| call func
|
||
|
|.endmacro
|
||
|
|.macro call_rr, func, arg0, arg1
|
||
|
| mov rsi, arg1
|
||
|
| mov rdi, arg0
|
||
|
| call func
|
||
|
|.endmacro
|
||
|
|
|
||
|
|.macro call_r, func, arg0
|
||
|
| mov rdi, arg0
|
||
|
| call func
|
||
|
|.endmacro
|
||
|
|
|
||
|
|.else
|
||
|
| // define the 64bit registers to the 32 bit counterparts, so the common
|
||
|
| // code can use r*x for all pointers
|
||
|
|.define rax, eax
|
||
|
|.define rcx, ecx
|
||
|
|.define rdx, edx
|
||
|
|.define rsp, esp
|
||
|
|.define rbp, ebp
|
||
|
|.define rdi, edi
|
||
|
|.define rsi, esi
|
||
|
|.define mov64, mov
|
||
|
|
|
||
|
|.macro call_rrrr, func, arg0, arg1, arg2, arg3
|
||
|
| mov dword [rsp+12], arg3
|
||
|
| mov dword [rsp+8], arg2
|
||
|
| mov dword [rsp+4], arg1
|
||
|
| mov dword [rsp], arg0
|
||
|
| call func
|
||
|
|.endmacro
|
||
|
|.macro call_rrr, func, arg0, arg1, arg2
|
||
|
| mov dword [rsp+8], arg2
|
||
|
| mov dword [rsp+4], arg1
|
||
|
| mov dword [rsp], arg0
|
||
|
| call func
|
||
|
|.endmacro
|
||
|
|.macro call_rr, func, arg0, arg1
|
||
|
| mov dword [rsp+4], arg1
|
||
|
| mov dword [rsp], arg0
|
||
|
| call func
|
||
|
|.endmacro
|
||
|
|.macro call_r, func, arg0
|
||
|
| mov dword [rsp], arg0
|
||
|
| call func
|
||
|
|.endmacro
|
||
|
|
|
||
|
|.define call_rrrp, call_rrrr
|
||
|
|.define call_rrp, call_rrr
|
||
|
|.define call_rp, call_rr
|
||
|
|
|
||
|
|.endif
|
||
|
|
||
|
#if defined _WIN64 || defined __amd64__
|
||
|
#define JUMP_SIZE 14
|
||
|
#else
|
||
|
#define JUMP_SIZE 4
|
||
|
#endif
|
||
|
|
||
|
#define MIN_BRANCH INT32_MIN
|
||
|
#define MAX_BRANCH INT32_MAX
|
||
|
#define BRANCH_OFF 4
|
||
|
|
||
|
static void compile_extern_jump(struct jit* jit, lua_State* L, cfunction func, uint8_t* code)
|
||
|
{
|
||
|
/* The jump code is the function pointer followed by a stub to call the
|
||
|
* function pointer. The stub exists in 64 bit so we can jump to functions
|
||
|
* with an offset greater than 2 GB.
|
||
|
*
|
||
|
* Note we have to manually set this up since there are commands buffered
|
||
|
* in the jit state and dynasm doesn't support rip relative addressing.
|
||
|
*
|
||
|
* eg on 64 bit:
|
||
|
* 0-8: function ptr
|
||
|
* 8-14: jmp aword [rip-14]
|
||
|
*
|
||
|
* for 32 bit we only set the function ptr as it can always fit in a 32
|
||
|
* bit displacement
|
||
|
*/
|
||
|
#if defined _WIN64 || defined __amd64__
|
||
|
*(cfunction*) code = func;
|
||
|
code[8] = 0xFF; /* FF /4 operand for jmp */
|
||
|
code[9] = 0x25; /* RIP displacement */
|
||
|
*(int32_t*) &code[10] = -14;
|
||
|
#else
|
||
|
*(cfunction*) code = func;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
void compile_globals(struct jit* jit, lua_State* L)
|
||
|
{
|
||
|
struct jit* Dst = jit;
|
||
|
int* perr = &jit->last_errno;
|
||
|
dasm_setup(Dst, build_actionlist);
|
||
|
|
||
|
/* Note: since the return code uses EBP to reset the stack pointer, we
|
||
|
* don't have to track the amount of stack space used. It also means we
|
||
|
* can handle stdcall and cdecl with the same code.
|
||
|
*/
|
||
|
|
||
|
/* Note the various call_* functions want 32 bytes of 16 byte aligned
|
||
|
* stack
|
||
|
*/
|
||
|
|
||
|
|.if X64
|
||
|
|.define L_ARG, r12
|
||
|
|.define TOP, r13
|
||
|
|.else
|
||
|
|.define L_ARG, rdi
|
||
|
|.define TOP, rsi
|
||
|
|.endif
|
||
|
|
||
|
|.macro epilog
|
||
|
|.if X64
|
||
|
| mov TOP, [rbp-16]
|
||
|
| mov L_ARG, [rbp-8]
|
||
|
|.else
|
||
|
| mov TOP, [rbp-8]
|
||
|
| mov L_ARG, [rbp-4]
|
||
|
|.endif
|
||
|
| mov rsp, rbp
|
||
|
| pop rbp
|
||
|
| ret
|
||
|
|.endmacro
|
||
|
|
||
|
|.macro get_errno // note trashes registers
|
||
|
| call extern GetLastError
|
||
|
| mov64 rcx, perr
|
||
|
| mov dword [rcx], eax
|
||
|
|.endmacro
|
||
|
|
||
|
/* the general idea for the return functions is:
|
||
|
* 1) Save return value on stack
|
||
|
* 2) Call get_errno (this trashes the registers hence #1)
|
||
|
* 3) Unpack return value from stack
|
||
|
* 4) Call lua push function
|
||
|
* 5) Set eax to number of returned args (0 or 1)
|
||
|
* 6) Call return which pops our stack frame
|
||
|
*/
|
||
|
|
||
|
|->lua_return_arg:
|
||
|
| mov eax, 1
|
||
|
| epilog
|
||
|
|
||
|
|->lua_return_void:
|
||
|
| get_errno
|
||
|
| mov eax, 0
|
||
|
| epilog
|
||
|
|
||
|
|->lua_return_double:
|
||
|
|.if X64
|
||
|
| movq qword [rsp+32], xmm0
|
||
|
|.else
|
||
|
| fstp qword [rsp+4] // note get_errno doesn't require any stack on x86
|
||
|
|.endif
|
||
|
|
|
||
|
| get_errno
|
||
|
|
|
||
|
|.if X64WIN
|
||
|
| movq xmm1, qword [rsp+32]
|
||
|
| mov rcx, L_ARG
|
||
|
|.elif X64
|
||
|
| movq xmm0, qword [rsp+32]
|
||
|
| mov rdi, L_ARG
|
||
|
|.else
|
||
|
| mov [rsp], L_ARG
|
||
|
|.endif
|
||
|
| call extern lua_pushnumber
|
||
|
| jmp ->lua_return_arg
|
||
|
|
||
|
|->lua_return_bool:
|
||
|
| movzx eax, al
|
||
|
| mov [rsp+32], eax
|
||
|
| get_errno
|
||
|
| mov eax, [rsp+32]
|
||
|
| call_rr extern lua_pushboolean, L_ARG, rax
|
||
|
| jmp ->lua_return_arg
|
||
|
|
||
|
|->lua_return_int:
|
||
|
| mov [rsp+32], eax
|
||
|
| get_errno
|
||
|
| mov eax, [rsp+32]
|
||
|
| call_rr extern push_int, L_ARG, rax
|
||
|
| jmp ->lua_return_arg
|
||
|
|
||
|
|->lua_return_uint:
|
||
|
| mov [rsp+32], eax
|
||
|
| get_errno
|
||
|
| mov eax, [rsp+32]
|
||
|
| call_rr extern push_uint, L_ARG, rax
|
||
|
| jmp ->lua_return_arg
|
||
|
|
||
|
|->too_few_arguments:
|
||
|
| mov ax, 0
|
||
|
| call_rp extern luaL_error, L_ARG, &"too few arguments"
|
||
|
|
||
|
|->too_many_arguments:
|
||
|
| mov ax, 0
|
||
|
| call_rp extern luaL_error, L_ARG, &"too many arguments"
|
||
|
|
||
|
|->save_registers:
|
||
|
| // use rbp relative so we store values in the outer stack frame
|
||
|
|.if X64WIN
|
||
|
| // use the provided shadow space for int registers above prev rbp and
|
||
|
| // return address
|
||
|
| mov [rbp+16], rcx
|
||
|
| mov [rbp+24], rdx
|
||
|
| mov [rbp+32], r8
|
||
|
| mov [rbp+40], r9
|
||
|
| // use the extra space we added for float registers
|
||
|
| // -16 to store underneath previous value of L_ARG
|
||
|
| movq qword [rbp-16], xmm0
|
||
|
| movq qword [rbp-24], xmm1
|
||
|
| movq qword [rbp-32], xmm2
|
||
|
| movq qword [rbp-40], xmm3
|
||
|
|.elif X64
|
||
|
| movq qword [rbp-16], xmm0
|
||
|
| movq qword [rbp-24], xmm1
|
||
|
| movq qword [rbp-32], xmm2
|
||
|
| movq qword [rbp-40], xmm3
|
||
|
| movq qword [rbp-48], xmm4
|
||
|
| movq qword [rbp-56], xmm5
|
||
|
| movq qword [rbp-64], xmm6
|
||
|
| movq qword [rbp-72], xmm7
|
||
|
| mov [rbp-80], rdi
|
||
|
| mov [rbp-88], rsi
|
||
|
| mov [rbp-96], rdx
|
||
|
| mov [rbp-104], rcx
|
||
|
| mov [rbp-112], r8
|
||
|
| mov [rbp-120], r9
|
||
|
|.else
|
||
|
| // fastcall, -8 to store underneath previous value of L_ARG
|
||
|
| mov [rbp-8], ecx
|
||
|
| mov [rbp-12], edx
|
||
|
|.endif
|
||
|
| ret
|
||
|
|
||
|
compile(Dst, L, NULL, LUA_NOREF);
|
||
|
}
|
||
|
|
||
|
int x86_return_size(lua_State* L, int usr, const struct ctype* ct)
|
||
|
{
|
||
|
int ret = 0;
|
||
|
const struct ctype* mt;
|
||
|
|
||
|
if (ct->calling_convention != C_CALL) {
|
||
|
size_t i;
|
||
|
size_t argn = lua_rawlen(L, usr);
|
||
|
for (i = 1; i <= argn; i++) {
|
||
|
lua_rawgeti(L, usr, (int) i);
|
||
|
mt = (const struct ctype*) lua_touserdata(L, -1);
|
||
|
|
||
|
if (mt->pointers) {
|
||
|
ret += sizeof(void*);
|
||
|
} else {
|
||
|
switch (mt->type) {
|
||
|
case DOUBLE_TYPE:
|
||
|
case COMPLEX_FLOAT_TYPE:
|
||
|
case INT64_TYPE:
|
||
|
ret += 8;
|
||
|
break;
|
||
|
case COMPLEX_DOUBLE_TYPE:
|
||
|
ret += 16;
|
||
|
break;
|
||
|
case INTPTR_TYPE:
|
||
|
ret += sizeof(intptr_t);
|
||
|
break;
|
||
|
case FUNCTION_PTR_TYPE:
|
||
|
ret += sizeof(cfunction);
|
||
|
break;
|
||
|
case BOOL_TYPE:
|
||
|
case FLOAT_TYPE:
|
||
|
case INT8_TYPE:
|
||
|
case INT16_TYPE:
|
||
|
case INT32_TYPE:
|
||
|
case ENUM_TYPE:
|
||
|
ret += 4;
|
||
|
break;
|
||
|
default:
|
||
|
return luaL_error(L, "NYI - argument type");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
lua_pop(L, 1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#if !defined _WIN64 && !defined __amd64__
|
||
|
lua_rawgeti(L, usr, 0);
|
||
|
mt = (const struct ctype*) lua_touserdata(L, -1);
|
||
|
if (!mt->pointers && mt->type == COMPLEX_DOUBLE_TYPE) {
|
||
|
ret += sizeof(void*);
|
||
|
}
|
||
|
lua_pop(L, 1);
|
||
|
#endif
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
#ifdef _WIN64
|
||
|
#define MAX_REGISTERS(ct) 4 /* rcx, rdx, r8, r9 */
|
||
|
|
||
|
#elif defined __amd64__
|
||
|
#define MAX_INT_REGISTERS(ct) 6 /* rdi, rsi, rdx, rcx, r8, r9 */
|
||
|
#define MAX_FLOAT_REGISTERS(ct) 8 /* xmm0-7 */
|
||
|
|
||
|
#else
|
||
|
#define MAX_INT_REGISTERS(ct) ((ct)->calling_convention == FAST_CALL ? 2 /* ecx, edx */ : 0)
|
||
|
#define MAX_FLOAT_REGISTERS(ct) 0
|
||
|
#endif
|
||
|
|
||
|
struct reg_alloc {
|
||
|
#ifdef _WIN64
|
||
|
int regs;
|
||
|
int is_float[4];
|
||
|
int is_int[4];
|
||
|
#else
|
||
|
int floats;
|
||
|
int ints;
|
||
|
#endif
|
||
|
int off;
|
||
|
};
|
||
|
|
||
|
#ifdef _WIN64
|
||
|
#define REGISTER_STACK_SPACE(ct) (4*8)
|
||
|
#elif defined __amd64__
|
||
|
#define REGISTER_STACK_SPACE(ct) (14*8)
|
||
|
#else
|
||
|
#define REGISTER_STACK_SPACE(ct) ALIGN_UP(((ct)->calling_convention == FAST_CALL ? 2*4 : 0), 15)
|
||
|
#endif
|
||
|
|
||
|
/* Fastcall:
|
||
|
* Uses ecx, edx as first two int registers
|
||
|
* Everything else on stack (include 64bit ints)
|
||
|
* No overflow stack space
|
||
|
* Pops the stack before returning
|
||
|
* Returns int in eax, float in ST0
|
||
|
* We use the same register allocation logic as posix x64 with 2 int regs and 0 float regs
|
||
|
*/
|
||
|
|
||
|
static void get_int(Dst_DECL, const struct ctype* ct, struct reg_alloc* reg, int is_int64)
|
||
|
{
|
||
|
/* grab the register from the shadow space */
|
||
|
#ifdef _WIN64
|
||
|
if (reg->regs < MAX_REGISTERS(ct)) {
|
||
|
| mov rcx, [rbp + 16 + 8*reg->regs]
|
||
|
reg->regs++;
|
||
|
}
|
||
|
#elif __amd64__
|
||
|
if (reg->ints < MAX_INT_REGISTERS(ct)) {
|
||
|
| mov rcx, [rbp - 80 - 8*reg->ints]
|
||
|
reg->ints++;
|
||
|
}
|
||
|
#else
|
||
|
if (!is_int64 && reg->ints < MAX_INT_REGISTERS(ct)) {
|
||
|
| mov ecx, [rbp - 8 - 4*reg->ints]
|
||
|
reg->ints++;
|
||
|
}
|
||
|
#endif
|
||
|
else if (is_int64) {
|
||
|
|.if X64
|
||
|
| mov rcx, [rbp + reg->off]
|
||
|
|.else
|
||
|
| mov rcx, [rbp + reg->off]
|
||
|
| mov rdx, [rbp + reg->off + 4]
|
||
|
|.endif
|
||
|
reg->off += 8;
|
||
|
} else {
|
||
|
| mov ecx, [rbp + reg->off]
|
||
|
reg->off += 4;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void add_int(Dst_DECL, const struct ctype* ct, struct reg_alloc* reg, int is_int64)
|
||
|
{
|
||
|
#ifdef _WIN64
|
||
|
if (reg->regs < MAX_REGISTERS(ct)) {
|
||
|
| mov [rsp + 32 + 8*(reg->regs)], rax
|
||
|
reg->is_int[reg->regs++] = 1;
|
||
|
}
|
||
|
#elif __amd64__
|
||
|
if (reg->ints < MAX_INT_REGISTERS(ct)) {
|
||
|
| mov [rsp + 32 + 8*reg->ints], rax
|
||
|
reg->ints++;
|
||
|
}
|
||
|
#else
|
||
|
if (!is_int64 && reg->ints < MAX_INT_REGISTERS(ct)) {
|
||
|
| mov [rsp + 32 + 4*reg->ints], rax
|
||
|
reg->ints++;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
else if (is_int64) {
|
||
|
|.if X64
|
||
|
| mov [rsp + reg->off], rax
|
||
|
|.else
|
||
|
| mov [rsp + reg->off], RET_L
|
||
|
| mov [rsp + reg->off + 4], RET_H
|
||
|
|.endif
|
||
|
reg->off += 8;
|
||
|
} else {
|
||
|
| mov [rsp+reg->off], eax
|
||
|
reg->off += 4;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void get_float(Dst_DECL, const struct ctype* ct, struct reg_alloc* reg, int is_double)
|
||
|
{
|
||
|
#if !defined _WIN64 && !defined __amd64__
|
||
|
assert(MAX_FLOAT_REGISTERS(ct) == 0);
|
||
|
if (is_double) {
|
||
|
| fld qword [rbp + reg->off]
|
||
|
reg->off += 8;
|
||
|
} else {
|
||
|
| fld dword [rbp + reg->off]
|
||
|
reg->off += 4;
|
||
|
}
|
||
|
#else
|
||
|
int off;
|
||
|
|
||
|
#ifdef _WIN64
|
||
|
if (reg->regs < MAX_REGISTERS(ct)) {
|
||
|
off = -16 - 8*reg->regs;
|
||
|
reg->regs++;
|
||
|
}
|
||
|
#else
|
||
|
if (reg->floats < MAX_FLOAT_REGISTERS(ct)) {
|
||
|
off = -16 - 8*reg->floats;
|
||
|
reg->floats++;
|
||
|
}
|
||
|
#endif
|
||
|
else {
|
||
|
off = reg->off;
|
||
|
reg->off += is_double ? 8 : 4;
|
||
|
}
|
||
|
|
||
|
if (is_double) {
|
||
|
| movq xmm0, qword [rbp + off]
|
||
|
} else {
|
||
|
| cvtss2sd xmm0, dword [rbp + off]
|
||
|
}
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
static void add_float(Dst_DECL, const struct ctype* ct, struct reg_alloc* reg, int is_double)
|
||
|
{
|
||
|
#if !defined _WIN64 && !defined __amd64__
|
||
|
assert(MAX_FLOAT_REGISTERS(ct) == 0);
|
||
|
if (is_double) {
|
||
|
| fstp qword [rsp + reg->off]
|
||
|
reg->off += 8;
|
||
|
} else {
|
||
|
| fstp dword [rsp + reg->off]
|
||
|
reg->off += 4;
|
||
|
}
|
||
|
#else
|
||
|
|
||
|
#ifdef _WIN64
|
||
|
if (reg->regs < MAX_REGISTERS(ct)) {
|
||
|
if (is_double) {
|
||
|
| movq qword [rsp + 32 + 8*(reg->regs)], xmm0
|
||
|
} else {
|
||
|
| cvtsd2ss xmm0, xmm0
|
||
|
| movq qword [rsp + 32 + 8*(reg->regs)], xmm0
|
||
|
}
|
||
|
reg->is_float[reg->regs++] = 1;
|
||
|
}
|
||
|
#else
|
||
|
if (reg->floats < MAX_FLOAT_REGISTERS(ct)) {
|
||
|
if (is_double) {
|
||
|
| movq qword [rsp + 32 + 8*(MAX_INT_REGISTERS(ct) + reg->floats)], xmm0
|
||
|
} else {
|
||
|
| cvtsd2ss xmm0, xmm0
|
||
|
| movq qword [rsp + 32 + 8*(MAX_INT_REGISTERS(ct) + reg->floats)], xmm0
|
||
|
}
|
||
|
reg->floats++;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
else if (is_double) {
|
||
|
| movq qword [rsp + reg->off], xmm0
|
||
|
reg->off += 8;
|
||
|
} else {
|
||
|
| cvtsd2ss xmm0, xmm0
|
||
|
| movd dword [rsp + reg->off], xmm0
|
||
|
reg->off += 4;
|
||
|
}
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
#if defined _WIN64 || defined __amd64__
|
||
|
#define add_pointer(jit, ct, reg) add_int(jit, ct, reg, 1)
|
||
|
#define get_pointer(jit, ct, reg) get_int(jit, ct, reg, 1)
|
||
|
#else
|
||
|
#define add_pointer(jit, ct, reg) add_int(jit, ct, reg, 0)
|
||
|
#define get_pointer(jit, ct, reg) get_int(jit, ct, reg, 0)
|
||
|
#endif
|
||
|
|
||
|
cfunction compile_callback(lua_State* L, int fidx, int ct_usr, const struct ctype* ct)
|
||
|
{
|
||
|
int i, nargs;
|
||
|
cfunction* pf;
|
||
|
struct ctype ct2 = *ct;
|
||
|
const struct ctype* mt;
|
||
|
struct reg_alloc reg;
|
||
|
int num_upvals = 0;
|
||
|
int top = lua_gettop(L);
|
||
|
struct jit* Dst = get_jit(L);
|
||
|
int ref;
|
||
|
int hidden_arg_off = 0;
|
||
|
|
||
|
ct_usr = lua_absindex(L, ct_usr);
|
||
|
fidx = lua_absindex(L, fidx);
|
||
|
|
||
|
assert(lua_isnil(L, fidx) || lua_isfunction(L, fidx));
|
||
|
|
||
|
memset(®, 0, sizeof(reg));
|
||
|
#ifdef _WIN64
|
||
|
reg.off = 16 + REGISTER_STACK_SPACE(ct); /* stack registers are above the shadow space */
|
||
|
#elif __amd64__
|
||
|
reg.off = 16;
|
||
|
#else
|
||
|
reg.off = 8;
|
||
|
#endif
|
||
|
|
||
|
dasm_setup(Dst, build_actionlist);
|
||
|
|
||
|
// add a table to store ctype and function upvalues
|
||
|
// callback_set assumes the first value is the lua function
|
||
|
nargs = (int) lua_rawlen(L, ct_usr);
|
||
|
lua_newtable(L);
|
||
|
lua_pushvalue(L, -1);
|
||
|
ref = luaL_ref(L, LUA_REGISTRYINDEX);
|
||
|
|
||
|
if (ct->has_var_arg) {
|
||
|
luaL_error(L, "can't create callbacks with varargs");
|
||
|
}
|
||
|
|
||
|
// setup a stack frame to hold args for the call into lua_call
|
||
|
|
||
|
| push rbp
|
||
|
| mov rbp, rsp
|
||
|
| push L_ARG
|
||
|
| // stack is 4 or 8 (mod 16) (L_ARG, rbp, rip)
|
||
|
|.if X64
|
||
|
| // 8 to realign, 16 for return vars, 32 for local calls, rest to save registers
|
||
|
| sub rsp, 8 + 16 + 32 + REGISTER_STACK_SPACE(ct)
|
||
|
| call ->save_registers
|
||
|
|.else
|
||
|
| // 4 to realign, 16 for return vars, 32 for local calls, rest to save registers
|
||
|
| sub rsp, 4 + 16 + 32 + REGISTER_STACK_SPACE(ct)
|
||
|
if (ct->calling_convention == FAST_CALL) {
|
||
|
| call ->save_registers
|
||
|
}
|
||
|
|.endif
|
||
|
|
||
|
// hardcode the lua_State* value into the assembly
|
||
|
| mov64 L_ARG, L
|
||
|
|
||
|
/* get the upval table */
|
||
|
| call_rrr extern lua_rawgeti, L_ARG, LUA_REGISTRYINDEX, ref
|
||
|
|
||
|
/* get the lua function */
|
||
|
lua_pushvalue(L, fidx);
|
||
|
lua_rawseti(L, -2, ++num_upvals);
|
||
|
assert(num_upvals == CALLBACK_FUNC_USR_IDX);
|
||
|
| call_rrr extern lua_rawgeti, L_ARG, -1, num_upvals
|
||
|
|
||
|
#if !defined _WIN64 && !defined __amd64__
|
||
|
lua_rawgeti(L, ct_usr, 0);
|
||
|
mt = (const struct ctype*) lua_touserdata(L, -1);
|
||
|
if (!mt->pointers && mt->type == COMPLEX_DOUBLE_TYPE) {
|
||
|
hidden_arg_off = reg.off;
|
||
|
reg.off += sizeof(void*);
|
||
|
}
|
||
|
lua_pop(L, 1);
|
||
|
#else
|
||
|
(void) hidden_arg_off;
|
||
|
#endif
|
||
|
|
||
|
for (i = 1; i <= nargs; i++) {
|
||
|
lua_rawgeti(L, ct_usr, i);
|
||
|
mt = (const struct ctype*) lua_touserdata(L, -1);
|
||
|
|
||
|
if (mt->pointers) {
|
||
|
lua_getuservalue(L, -1);
|
||
|
lua_rawseti(L, -3, ++num_upvals); /* usr value */
|
||
|
lua_rawseti(L, -2, ++num_upvals); /* mt */
|
||
|
/* on the lua stack in the callback:
|
||
|
* upval tbl, lua func, i-1 args
|
||
|
*/
|
||
|
| call_rrr extern lua_rawgeti, L_ARG, -i-1, num_upvals-1
|
||
|
| call_rrp extern push_cdata, L_ARG, -1, mt
|
||
|
get_pointer(Dst, ct, ®);
|
||
|
| mov [rax], rcx
|
||
|
| call_rr, extern lua_remove, L_ARG, -2
|
||
|
} else {
|
||
|
switch (mt->type) {
|
||
|
case INT64_TYPE:
|
||
|
lua_getuservalue(L, -1);
|
||
|
lua_rawseti(L, -3, ++num_upvals); /* mt */
|
||
|
lua_pop(L, 1);
|
||
|
| call_rrp extern push_cdata, L_ARG, 0, mt
|
||
|
get_int(Dst, ct, ®, 1);
|
||
|
|.if X64
|
||
|
| mov [rax], rcx
|
||
|
|.else
|
||
|
| mov [rax], ecx
|
||
|
| mov [rax+4], edx
|
||
|
|.endif
|
||
|
break;
|
||
|
|
||
|
case INTPTR_TYPE:
|
||
|
lua_getuservalue(L, -1);
|
||
|
lua_rawseti(L, -3, ++num_upvals); /* mt */
|
||
|
lua_pop(L, 1);
|
||
|
| call_rrp extern push_cdata, L_ARG, 0, mt
|
||
|
get_pointer(Dst, ct, ®);
|
||
|
| mov [rax], rcx
|
||
|
break;
|
||
|
|
||
|
case COMPLEX_FLOAT_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
#if defined _WIN64 || defined __amd64__
|
||
|
/* complex floats are two floats packed into a double */
|
||
|
| call_rrp extern push_cdata, L_ARG, 0, mt
|
||
|
get_float(Dst, ct, ®, 1);
|
||
|
| movq qword [rax], xmm0
|
||
|
#else
|
||
|
/* complex floats are real followed by imag on the stack */
|
||
|
| call_rrp extern push_cdata, L_ARG, 0, mt
|
||
|
get_float(Dst, ct, ®, 0);
|
||
|
| fstp dword [rax]
|
||
|
get_float(Dst, ct, ®, 0);
|
||
|
| fstp dword [rax+4]
|
||
|
#endif
|
||
|
break;
|
||
|
|
||
|
case COMPLEX_DOUBLE_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
| call_rrp extern push_cdata, L_ARG, 0, mt
|
||
|
/* real */
|
||
|
get_float(Dst, ct, ®, 1);
|
||
|
|.if X64
|
||
|
| movq qword [rax], xmm0
|
||
|
|.else
|
||
|
| fstp qword [rax]
|
||
|
|.endif
|
||
|
/* imag */
|
||
|
get_float(Dst, ct, ®, 1);
|
||
|
|.if X64
|
||
|
| movq qword [rax+8], xmm0
|
||
|
|.else
|
||
|
| fstp qword [rax+8]
|
||
|
|.endif
|
||
|
break;
|
||
|
|
||
|
case FLOAT_TYPE:
|
||
|
case DOUBLE_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
get_float(Dst, ct, ®, mt->type == DOUBLE_TYPE);
|
||
|
|.if X64WIN
|
||
|
| movq xmm1, xmm0
|
||
|
| mov rcx, L_ARG
|
||
|
|.elif X64
|
||
|
| // for 64bit xmm0 is already set
|
||
|
| mov rdi, L_ARG
|
||
|
|.else
|
||
|
| fstp qword [rsp+4]
|
||
|
| mov [rsp], L_ARG
|
||
|
|.endif
|
||
|
| call extern lua_pushnumber
|
||
|
break;
|
||
|
|
||
|
case BOOL_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
get_int(Dst, ct, ®, 0);
|
||
|
| movzx ecx, cl
|
||
|
| call_rr extern lua_pushboolean, L_ARG, rcx
|
||
|
break;
|
||
|
|
||
|
case INT8_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
get_int(Dst, ct, ®, 0);
|
||
|
if (mt->is_unsigned) {
|
||
|
| movzx ecx, cl
|
||
|
} else {
|
||
|
| movsx ecx, cl
|
||
|
}
|
||
|
| call_rr extern push_int, L_ARG, rcx
|
||
|
break;
|
||
|
|
||
|
case INT16_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
get_int(Dst, ct, ®, 0);
|
||
|
if (mt->is_unsigned) {
|
||
|
| movzx ecx, cx
|
||
|
} else {
|
||
|
| movsx ecx, cx
|
||
|
}
|
||
|
| call_rr extern push_int, L_ARG, rcx
|
||
|
break;
|
||
|
|
||
|
case ENUM_TYPE:
|
||
|
case INT32_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
get_int(Dst, ct, ®, 0);
|
||
|
if (mt->is_unsigned) {
|
||
|
| call_rr extern push_uint, L_ARG, rcx
|
||
|
} else {
|
||
|
| call_rr extern push_int, L_ARG, rcx
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
luaL_error(L, "NYI: callback arg type");
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
lua_rawgeti(L, ct_usr, 0);
|
||
|
mt = (const struct ctype*) lua_touserdata(L, -1);
|
||
|
|
||
|
| call_rrrp extern lua_callk, L_ARG, nargs, (mt->pointers || mt->type != VOID_TYPE) ? 1 : 0, 0
|
||
|
|
||
|
// Unpack the return argument if not "void", also clean-up the lua stack
|
||
|
// to remove the return argument and bind table. Use lua_settop rather
|
||
|
// than lua_pop as lua_pop is implemented as a macro.
|
||
|
if (mt->pointers) {
|
||
|
lua_getuservalue(L, -1);
|
||
|
lua_rawseti(L, -3, ++num_upvals); /* usr value */
|
||
|
lua_rawseti(L, -2, ++num_upvals); /* mt */
|
||
|
| call_rrr extern lua_rawgeti, L_ARG, -2, num_upvals-1
|
||
|
| call_rrrp extern check_typed_pointer, L_ARG, -2, -1, mt
|
||
|
| mov [rsp+32], rax
|
||
|
| call_rr extern lua_settop, L_ARG, -4
|
||
|
| mov rax, [rsp+32]
|
||
|
|
||
|
} else {
|
||
|
switch (mt->type) {
|
||
|
case ENUM_TYPE:
|
||
|
lua_getuservalue(L, -1);
|
||
|
lua_rawseti(L, -3, ++num_upvals); /* usr value */
|
||
|
lua_rawseti(L, -2, ++num_upvals); /* mt */
|
||
|
| call_rrr extern lua_rawgeti, L_ARG, -2, num_upvals-1
|
||
|
| call_rrrp, extern check_enum, L_ARG, -2, -1, mt
|
||
|
| mov [rsp+32], eax
|
||
|
| call_rr extern lua_settop, L_ARG, -4
|
||
|
| mov eax, [rsp+32]
|
||
|
break;
|
||
|
|
||
|
case VOID_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
| call_rr extern lua_settop, L_ARG, -2
|
||
|
break;
|
||
|
|
||
|
case BOOL_TYPE:
|
||
|
case INT8_TYPE:
|
||
|
case INT16_TYPE:
|
||
|
case INT32_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
if (mt->is_unsigned) {
|
||
|
| call_rr extern check_uint32, L_ARG, -1
|
||
|
} else {
|
||
|
| call_rr extern check_int32, L_ARG, -1
|
||
|
}
|
||
|
| mov [rsp+32], eax
|
||
|
| call_rr extern lua_settop, L_ARG, -3
|
||
|
| mov eax, [rsp+32]
|
||
|
break;
|
||
|
|
||
|
case INT64_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
|
||
|
if (mt->is_unsigned) {
|
||
|
| call_rr extern check_uint64, L_ARG, -1
|
||
|
} else {
|
||
|
| call_rr extern check_int64, L_ARG, -1
|
||
|
}
|
||
|
|
||
|
|.if X64
|
||
|
| mov [rsp+32], rax
|
||
|
|.else
|
||
|
| mov [rsp+32], RET_L
|
||
|
| mov [rsp+36], RET_H
|
||
|
|.endif
|
||
|
| call_rr extern lua_settop, L_ARG, -3
|
||
|
|.if X64
|
||
|
| mov rax, [rsp+32]
|
||
|
|.else
|
||
|
| mov RET_L, [rsp+32]
|
||
|
| mov RET_H, [rsp+36]
|
||
|
|.endif
|
||
|
break;
|
||
|
|
||
|
case INTPTR_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
| call_rr extern check_uintptr, L_ARG, -1
|
||
|
| mov [rsp+32], rax
|
||
|
| call_rr extern lua_settop, L_ARG, -3
|
||
|
| mov rax, [rsp+32]
|
||
|
break;
|
||
|
|
||
|
case FLOAT_TYPE:
|
||
|
case DOUBLE_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
| call_rr extern check_double, L_ARG, -1
|
||
|
|.if X64
|
||
|
| movq qword [rsp+32], xmm0
|
||
|
| call_rr extern lua_settop, L_ARG, -3
|
||
|
if (mt->type == FLOAT_TYPE) {
|
||
|
| cvtsd2ss xmm0, qword [rsp+32]
|
||
|
} else {
|
||
|
| movq xmm0, qword [rsp+32]
|
||
|
}
|
||
|
|.else
|
||
|
| fstp qword [rsp+32]
|
||
|
| call_rr extern lua_settop, L_ARG, -3
|
||
|
| fld qword [rsp+32]
|
||
|
|.endif
|
||
|
break;
|
||
|
|
||
|
case COMPLEX_FLOAT_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
#if !defined HAVE_COMPLEX
|
||
|
luaL_error(L, "ffi lib compiled without complex number support");
|
||
|
#endif
|
||
|
/* on 64 bit complex floats are two floats packed into a double,
|
||
|
* on 32 bit returned complex floats use eax and edx */
|
||
|
| call_rr extern check_complex_float, L_ARG, -1
|
||
|
|
|
||
|
|.if X64
|
||
|
| movq qword [rsp+32], xmm0
|
||
|
|.else
|
||
|
| mov [rsp+32], eax
|
||
|
| mov [rsp+36], edx
|
||
|
|.endif
|
||
|
|
|
||
|
| call_rr extern lua_settop, L_ARG, -3
|
||
|
|
|
||
|
|.if X64
|
||
|
| movq xmm0, qword [rsp+32]
|
||
|
|.else
|
||
|
| mov eax, [rsp+32]
|
||
|
| mov edx, [rsp+36]
|
||
|
|.endif
|
||
|
break;
|
||
|
|
||
|
case COMPLEX_DOUBLE_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
#if !defined HAVE_COMPLEX
|
||
|
luaL_error(L, "ffi lib compiled without complex number support");
|
||
|
#endif
|
||
|
/* on 64 bit, returned complex doubles use xmm0, xmm1, on 32 bit
|
||
|
* there is a hidden first parameter that points to 16 bytes where
|
||
|
* the returned arg is stored which is popped by the called
|
||
|
* function */
|
||
|
#if defined _WIN64 || defined __amd64__
|
||
|
| call_rr extern check_complex_double, L_ARG, -1
|
||
|
| movq qword [rsp+32], xmm0
|
||
|
| movq qword [rsp+40], xmm1
|
||
|
| call_rr extern lua_settop, L_ARG, -3
|
||
|
| movq xmm0, qword [rsp+32]
|
||
|
| movq xmm1, qword [rsp+40]
|
||
|
#else
|
||
|
| mov rcx, [rbp + hidden_arg_off]
|
||
|
| call_rrr extern check_complex_double, rcx, L_ARG, -1
|
||
|
| sub rsp, 4 // to realign from popped hidden arg
|
||
|
| call_rr extern lua_settop, L_ARG, -3
|
||
|
#endif
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
luaL_error(L, "NYI: callback return type");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|.if X64
|
||
|
| mov L_ARG, [rbp-8]
|
||
|
|.else
|
||
|
| mov L_ARG, [rbp-4]
|
||
|
|.endif
|
||
|
| mov rsp, rbp
|
||
|
| pop rbp
|
||
|
| ret x86_return_size(L, ct_usr, ct)
|
||
|
|
||
|
lua_pop(L, 1); /* upval table - already in registry */
|
||
|
assert(lua_gettop(L) == top);
|
||
|
|
||
|
ct2.is_jitted = 1;
|
||
|
pf = (cfunction*) push_cdata(L, ct_usr, &ct2);
|
||
|
*pf = compile(Dst, L, NULL, ref);
|
||
|
|
||
|
assert(lua_gettop(L) == top + 1);
|
||
|
|
||
|
return *pf;
|
||
|
}
|
||
|
|
||
|
void compile_function(lua_State* L, cfunction func, int ct_usr, const struct ctype* ct)
|
||
|
{
|
||
|
size_t i, nargs;
|
||
|
int num_upvals;
|
||
|
const struct ctype* mbr_ct;
|
||
|
struct jit* Dst = get_jit(L);
|
||
|
struct reg_alloc reg;
|
||
|
void* p;
|
||
|
int top = lua_gettop(L);
|
||
|
int* perr = &Dst->last_errno;
|
||
|
|
||
|
ct_usr = lua_absindex(L, ct_usr);
|
||
|
|
||
|
memset(®, 0, sizeof(reg));
|
||
|
reg.off = 32 + REGISTER_STACK_SPACE(ct);
|
||
|
|
||
|
dasm_setup(Dst, build_actionlist);
|
||
|
|
||
|
p = push_cdata(L, ct_usr, ct);
|
||
|
*(cfunction*) p = func;
|
||
|
num_upvals = 1;
|
||
|
|
||
|
nargs = lua_rawlen(L, ct_usr);
|
||
|
|
||
|
if (ct->calling_convention != C_CALL && ct->has_var_arg) {
|
||
|
luaL_error(L, "vararg is only allowed with the c calling convention");
|
||
|
}
|
||
|
|
||
|
| push rbp
|
||
|
| mov rbp, rsp
|
||
|
| push L_ARG
|
||
|
| push TOP
|
||
|
| // stack is 0 (mod 16) (TOP, L_ARG, rbp, rip)
|
||
|
|
|
||
|
| // Get L from our arguments and allocate some stack for lua_gettop
|
||
|
|.if X64WIN
|
||
|
| mov L_ARG, rcx
|
||
|
| sub rsp, 32 // shadow space
|
||
|
|.elif X64
|
||
|
| mov L_ARG, rdi
|
||
|
|.else
|
||
|
| mov L_ARG, [rbp + 8]
|
||
|
| sub rsp, 16
|
||
|
|.endif
|
||
|
|
|
||
|
| call_r extern lua_gettop, L_ARG
|
||
|
| mov TOP, rax // no need for movzxd rax, eax - high word guarenteed to be zero by x86-64
|
||
|
| cmp rax, nargs
|
||
|
| jl ->too_few_arguments
|
||
|
|
||
|
if (!ct->has_var_arg) {
|
||
|
| jg ->too_many_arguments
|
||
|
}
|
||
|
|
||
|
/* no need to zero extend eax returned by lua_gettop to rax as x86-64
|
||
|
* preguarentees that the upper 32 bits will be zero */
|
||
|
| shl rax, 4 // reserve 16 bytes per argument - this maintains the alignment mod 16
|
||
|
| sub rsp, rax
|
||
|
| sub rsp, 32 + REGISTER_STACK_SPACE(ct) // reserve an extra 32 to call local functions
|
||
|
|
||
|
#if !defined _WIN64 && !defined __amd64__
|
||
|
/* Returned complex doubles require a hidden first parameter where the
|
||
|
* data is stored, which is popped by the calling code. */
|
||
|
lua_rawgeti(L, ct_usr, 0);
|
||
|
mbr_ct = (const struct ctype*) lua_touserdata(L, -1);
|
||
|
if (!mbr_ct->pointers && mbr_ct->type == COMPLEX_DOUBLE_TYPE) {
|
||
|
/* we can allocate more space for arguments as long as no add_*
|
||
|
* function has been called yet, mbr_ct will be added as an upvalue in
|
||
|
* the return processing later */
|
||
|
| call_rrp extern push_cdata, L_ARG, 0, mbr_ct
|
||
|
| sub rsp, 16
|
||
|
add_pointer(Dst, ct, ®);
|
||
|
}
|
||
|
lua_pop(L, 1);
|
||
|
#endif
|
||
|
|
||
|
for (i = 1; i <= nargs; i++) {
|
||
|
lua_rawgeti(L, ct_usr, (int) i);
|
||
|
mbr_ct = (const struct ctype*) lua_touserdata(L, -1);
|
||
|
|
||
|
if (mbr_ct->pointers) {
|
||
|
lua_getuservalue(L, -1);
|
||
|
num_upvals += 2;
|
||
|
| call_rrrp extern check_typed_pointer, L_ARG, i, lua_upvalueindex(num_upvals), mbr_ct
|
||
|
add_pointer(Dst, ct, ®);
|
||
|
} else {
|
||
|
switch (mbr_ct->type) {
|
||
|
case FUNCTION_PTR_TYPE:
|
||
|
lua_getuservalue(L, -1);
|
||
|
num_upvals += 2;
|
||
|
| call_rrrp extern check_typed_cfunction, L_ARG, i, lua_upvalueindex(num_upvals), mbr_ct
|
||
|
add_pointer(Dst, ct, ®);
|
||
|
break;
|
||
|
|
||
|
case ENUM_TYPE:
|
||
|
lua_getuservalue(L, -1);
|
||
|
num_upvals += 2;
|
||
|
| call_rrrp, extern check_enum, L_ARG, i, lua_upvalueindex(num_upvals), mbr_ct
|
||
|
add_int(Dst, ct, ®, 0);
|
||
|
break;
|
||
|
|
||
|
case INT8_TYPE:
|
||
|
| call_rr extern check_int32, L_ARG, i
|
||
|
if (mbr_ct->is_unsigned) {
|
||
|
| movzx eax, al
|
||
|
} else {
|
||
|
| movsx eax, al
|
||
|
}
|
||
|
add_int(Dst, ct, ®, 0);
|
||
|
lua_pop(L, 1);
|
||
|
break;
|
||
|
|
||
|
case INT16_TYPE:
|
||
|
| call_rr extern check_int32, L_ARG, i
|
||
|
if (mbr_ct->is_unsigned) {
|
||
|
| movzx eax, ax
|
||
|
} else {
|
||
|
| movsx eax, ax
|
||
|
}
|
||
|
add_int(Dst, ct, ®, 0);
|
||
|
lua_pop(L, 1);
|
||
|
break;
|
||
|
|
||
|
case BOOL_TYPE:
|
||
|
| call_rr extern check_int32, L_ARG, i
|
||
|
| cmp eax, 0
|
||
|
| setne al
|
||
|
| movzx eax, al
|
||
|
add_int(Dst, ct, ®, 0);
|
||
|
lua_pop(L, 1);
|
||
|
break;
|
||
|
|
||
|
case INT32_TYPE:
|
||
|
if (mbr_ct->is_unsigned) {
|
||
|
| call_rr extern check_uint32, L_ARG, i
|
||
|
} else {
|
||
|
| call_rr extern check_int32, L_ARG, i
|
||
|
}
|
||
|
add_int(Dst, ct, ®, 0);
|
||
|
lua_pop(L, 1);
|
||
|
break;
|
||
|
|
||
|
case INTPTR_TYPE:
|
||
|
| call_rr extern check_uintptr, L_ARG, i
|
||
|
add_pointer(Dst, ct, ®);
|
||
|
lua_pop(L, 1);
|
||
|
break;
|
||
|
|
||
|
case INT64_TYPE:
|
||
|
if (mbr_ct->is_unsigned) {
|
||
|
| call_rr extern check_uint64, L_ARG, i
|
||
|
} else {
|
||
|
| call_rr extern check_int64, L_ARG, i
|
||
|
}
|
||
|
add_int(Dst, ct, ®, 1);
|
||
|
lua_pop(L, 1);
|
||
|
break;
|
||
|
|
||
|
case DOUBLE_TYPE:
|
||
|
| call_rr extern check_double, L_ARG, i
|
||
|
add_float(Dst, ct, ®, 1);
|
||
|
lua_pop(L, 1);
|
||
|
break;
|
||
|
|
||
|
case COMPLEX_DOUBLE_TYPE:
|
||
|
/* on 64 bit, returned complex doubles use xmm0, xmm1, on 32 bit
|
||
|
* there is a hidden first parameter that points to 16 bytes where
|
||
|
* the returned arg is stored (this is popped by the called
|
||
|
* function) */
|
||
|
#if defined _WIN64 || defined __amd64__
|
||
|
| call_rr extern check_complex_double, L_ARG, i
|
||
|
add_float(Dst, ct, ®, 1);
|
||
|
| movq xmm0, xmm1
|
||
|
add_float(Dst, ct, ®, 1);
|
||
|
#else
|
||
|
| lea rax, [rsp+reg.off]
|
||
|
| sub rsp, 4
|
||
|
| call_rrr extern check_complex_double, rax, L_ARG, i
|
||
|
reg.off += 16;
|
||
|
#endif
|
||
|
lua_pop(L, 1);
|
||
|
break;
|
||
|
|
||
|
case FLOAT_TYPE:
|
||
|
| call_rr extern check_double, L_ARG, i
|
||
|
add_float(Dst, ct, ®, 0);
|
||
|
lua_pop(L, 1);
|
||
|
break;
|
||
|
|
||
|
case COMPLEX_FLOAT_TYPE:
|
||
|
#if defined _WIN64 || defined __amd64__
|
||
|
| call_rr extern check_complex_float, L_ARG, i
|
||
|
/* complex floats are two floats packed into a double */
|
||
|
add_float(Dst, ct, ®, 1);
|
||
|
#else
|
||
|
/* returned complex floats use eax and edx */
|
||
|
| call_rr extern check_complex_float, L_ARG, i
|
||
|
| mov [rsp], eax
|
||
|
| fld dword [rsp]
|
||
|
add_float(Dst, ct, ®, 0);
|
||
|
| mov [rsp], edx
|
||
|
| fld dword [rsp]
|
||
|
add_float(Dst, ct, ®, 0);
|
||
|
#endif
|
||
|
lua_pop(L, 1);
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
luaL_error(L, "NYI: call arg type");
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (ct->has_var_arg) {
|
||
|
#ifdef _WIN64
|
||
|
|.if X64WIN
|
||
|
if (reg.regs < MAX_REGISTERS(ct)) {
|
||
|
assert(reg.regs == nargs);
|
||
|
| cmp TOP, MAX_REGISTERS(ct)
|
||
|
| jle >1
|
||
|
| // unpack onto stack
|
||
|
| mov rax, rsp
|
||
|
| add rax, 32 + 8*MAX_REGISTERS(ct)
|
||
|
| call_rrrr extern unpack_varargs_stack, L_ARG, MAX_REGISTERS(ct)+1, TOP, rax
|
||
|
| // unpack to registers
|
||
|
| mov rax, rsp
|
||
|
| add rax, 32 + 8*(reg.regs)
|
||
|
| call_rrrr extern unpack_varargs_reg, L_ARG, nargs+1, MAX_REGISTERS(ct), rax
|
||
|
| jmp >2
|
||
|
|1:
|
||
|
| // unpack just to registers
|
||
|
| mov rax, rsp
|
||
|
| add rax, 32 + 8*(reg.regs)
|
||
|
| call_rrrr extern unpack_varargs_reg, L_ARG, nargs+1, TOP, rax
|
||
|
|2:
|
||
|
} else {
|
||
|
| // unpack just to stack
|
||
|
| mov rax, rsp
|
||
|
| add rax, reg.off
|
||
|
| call_rrrr extern unpack_varargs_stack, L_ARG, nargs+1, TOP, rax
|
||
|
}
|
||
|
|
||
|
for (i = nargs; i < MAX_REGISTERS(ct); i++) {
|
||
|
reg.is_int[i] = reg.is_float[i] = 1;
|
||
|
}
|
||
|
reg.regs = MAX_REGISTERS(ct);
|
||
|
#elif defined __amd64__
|
||
|
|.elif X64
|
||
|
if (reg.floats < MAX_FLOAT_REGISTERS(ct)) {
|
||
|
| mov rax, rsp
|
||
|
| add rax, 32 + 8*(MAX_INT_REGISTERS(ct) + reg.floats)
|
||
|
| call_rrrrr extern unpack_varargs_float, L_ARG, nargs+1, TOP, MAX_FLOAT_REGISTERS(ct) - reg.floats, rax
|
||
|
}
|
||
|
|
||
|
if (reg.ints < MAX_INT_REGISTERS(ct)) {
|
||
|
| mov rax, rsp
|
||
|
| add rax, 32 + 8*(reg.ints)
|
||
|
| call_rrrrr extern unpack_varargs_int, L_ARG, nargs+1, TOP, MAX_INT_REGISTERS(ct) - reg.ints, rax
|
||
|
}
|
||
|
|
||
|
| mov rax, rsp
|
||
|
| add rax, reg.off
|
||
|
| call_rrrrrr extern unpack_varargs_stack_skip, L_ARG, nargs+1, TOP, MAX_INT_REGISTERS(ct) - reg.ints, MAX_FLOAT_REGISTERS(ct) - reg.floats, rax
|
||
|
|
||
|
reg.floats = MAX_FLOAT_REGISTERS(ct);
|
||
|
reg.ints = MAX_INT_REGISTERS(ct);
|
||
|
#else
|
||
|
|.else
|
||
|
| mov rax, rsp
|
||
|
| add rax, reg.off
|
||
|
| call_rrrr extern unpack_varargs_stack, L_ARG, nargs+1, TOP, rax
|
||
|
|.endif
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
| mov64 rcx, perr
|
||
|
| mov eax, dword [rcx]
|
||
|
| call_r extern SetLastError, rax
|
||
|
|
||
|
/* remove the stack space to call local functions */
|
||
|
|.if X32WIN
|
||
|
| add rsp, 28 // SetLastError will have already popped 4
|
||
|
|.else
|
||
|
| add rsp, 32
|
||
|
|.endif
|
||
|
|
||
|
#ifdef _WIN64
|
||
|
|.if X64WIN
|
||
|
switch (reg.regs) {
|
||
|
case 4:
|
||
|
if (reg.is_float[3]) {
|
||
|
| movq xmm3, qword [rsp + 8*3]
|
||
|
}
|
||
|
if (reg.is_int[3]) {
|
||
|
| mov r9, [rsp + 8*3]
|
||
|
}
|
||
|
case 3:
|
||
|
if (reg.is_float[2]) {
|
||
|
| movq xmm2, qword [rsp + 8*2]
|
||
|
}
|
||
|
if (reg.is_int[2]) {
|
||
|
| mov r8, [rsp + 8*2]
|
||
|
}
|
||
|
case 2:
|
||
|
if (reg.is_float[1]) {
|
||
|
| movq xmm1, qword [rsp + 8*1]
|
||
|
}
|
||
|
if (reg.is_int[1]) {
|
||
|
| mov rdx, [rsp + 8*1]
|
||
|
}
|
||
|
case 1:
|
||
|
if (reg.is_float[0]) {
|
||
|
| movq xmm0, qword [rsp]
|
||
|
}
|
||
|
if (reg.is_int[0]) {
|
||
|
| mov rcx, [rsp]
|
||
|
}
|
||
|
case 0:
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
/* don't remove the space for the registers as we need 32 bytes of register overflow space */
|
||
|
assert(REGISTER_STACK_SPACE(ct) == 32);
|
||
|
|
||
|
#elif defined __amd64__
|
||
|
|.elif X64
|
||
|
switch (reg.floats) {
|
||
|
case 8:
|
||
|
| movq xmm7, qword [rsp + 8*(MAX_INT_REGISTERS(ct)+7)]
|
||
|
case 7:
|
||
|
| movq xmm6, qword [rsp + 8*(MAX_INT_REGISTERS(ct)+6)]
|
||
|
case 6:
|
||
|
| movq xmm5, qword [rsp + 8*(MAX_INT_REGISTERS(ct)+5)]
|
||
|
case 5:
|
||
|
| movq xmm4, qword [rsp + 8*(MAX_INT_REGISTERS(ct)+4)]
|
||
|
case 4:
|
||
|
| movq xmm3, qword [rsp + 8*(MAX_INT_REGISTERS(ct)+3)]
|
||
|
case 3:
|
||
|
| movq xmm2, qword [rsp + 8*(MAX_INT_REGISTERS(ct)+2)]
|
||
|
case 2:
|
||
|
| movq xmm1, qword [rsp + 8*(MAX_INT_REGISTERS(ct)+1)]
|
||
|
case 1:
|
||
|
| movq xmm0, qword [rsp + 8*(MAX_INT_REGISTERS(ct))]
|
||
|
case 0:
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
switch (reg.ints) {
|
||
|
case 6:
|
||
|
| mov r9, [rsp + 8*5]
|
||
|
case 5:
|
||
|
| mov r8, [rsp + 8*4]
|
||
|
case 4:
|
||
|
| mov rcx, [rsp + 8*3]
|
||
|
case 3:
|
||
|
| mov rdx, [rsp + 8*2]
|
||
|
case 2:
|
||
|
| mov rsi, [rsp + 8*1]
|
||
|
case 1:
|
||
|
| mov rdi, [rsp]
|
||
|
case 0:
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
| add rsp, REGISTER_STACK_SPACE(ct)
|
||
|
#else
|
||
|
|.else
|
||
|
if (ct->calling_convention == FAST_CALL) {
|
||
|
switch (reg.ints) {
|
||
|
case 2:
|
||
|
| mov edx, [rsp + 4]
|
||
|
case 1:
|
||
|
| mov ecx, [rsp]
|
||
|
case 0:
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
| add rsp, REGISTER_STACK_SPACE(ct)
|
||
|
}
|
||
|
|.endif
|
||
|
#endif
|
||
|
|
||
|
#ifdef __amd64__
|
||
|
if (ct->has_var_arg) {
|
||
|
/* al stores an upper limit on the number of float register, note that
|
||
|
* its allowed to be more than the actual number of float registers used as
|
||
|
* long as its 0-8 */
|
||
|
|.if X64 and not X64WIN
|
||
|
| mov al, 8
|
||
|
|.endif
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
| call extern FUNCTION
|
||
|
| sub rsp, 48 // 32 to be able to call local functions, 16 so we can store some local variables
|
||
|
|
||
|
/* note on windows X86 the stack may be only aligned to 4 (stdcall will
|
||
|
* have popped a multiple of 4 bytes), but we don't need 16 byte alignment on
|
||
|
* that platform
|
||
|
*/
|
||
|
|
||
|
lua_rawgeti(L, ct_usr, 0);
|
||
|
mbr_ct = (const struct ctype*) lua_touserdata(L, -1);
|
||
|
|
||
|
if (mbr_ct->pointers || mbr_ct->type == INTPTR_TYPE) {
|
||
|
lua_getuservalue(L, -1);
|
||
|
num_upvals += 2;
|
||
|
| mov [rsp+32], rax // save the pointer
|
||
|
| get_errno
|
||
|
| call_rrp extern push_cdata, L_ARG, lua_upvalueindex(num_upvals), mbr_ct
|
||
|
| mov rcx, [rsp+32]
|
||
|
| mov [rax], rcx // *(void**) cdata = val
|
||
|
| jmp ->lua_return_arg
|
||
|
|
||
|
} else {
|
||
|
switch (mbr_ct->type) {
|
||
|
case FUNCTION_PTR_TYPE:
|
||
|
lua_getuservalue(L, -1);
|
||
|
num_upvals += 2;
|
||
|
| mov [rsp+32], rax // save the function pointer
|
||
|
| get_errno
|
||
|
| call_rrp extern push_cdata, L_ARG, lua_upvalueindex(num_upvals), mbr_ct
|
||
|
| mov rcx, [rsp+32]
|
||
|
| mov [rax], rcx // *(cfunction**) cdata = val
|
||
|
| jmp ->lua_return_arg
|
||
|
break;
|
||
|
|
||
|
case INT64_TYPE:
|
||
|
num_upvals++;
|
||
|
| // save the return value
|
||
|
|.if X64
|
||
|
| mov [rsp+32], rax
|
||
|
|.else
|
||
|
| mov [rsp+36], edx // high
|
||
|
| mov [rsp+32], eax // low
|
||
|
|.endif
|
||
|
|
|
||
|
| get_errno
|
||
|
| call_rrp extern push_cdata, L_ARG, 0, mbr_ct
|
||
|
|
|
||
|
| // *(int64_t*) cdata = val
|
||
|
|.if X64
|
||
|
| mov rcx, [rsp+32]
|
||
|
| mov [rax], rcx
|
||
|
|.else
|
||
|
| mov rcx, [rsp+36]
|
||
|
| mov rdx, [rsp+32]
|
||
|
| mov [rax+4], rcx
|
||
|
| mov [rax], rdx
|
||
|
|.endif
|
||
|
|
|
||
|
| jmp ->lua_return_arg
|
||
|
break;
|
||
|
|
||
|
case COMPLEX_FLOAT_TYPE:
|
||
|
num_upvals++;
|
||
|
|.if X64
|
||
|
| // complex floats are returned as two floats packed into xmm0
|
||
|
| movq qword [rsp+32], xmm0
|
||
|
|.else
|
||
|
| // complex floats are returned as floats in eax and edx
|
||
|
| mov [rsp+32], eax
|
||
|
| mov [rsp+36], edx
|
||
|
|.endif
|
||
|
|
|
||
|
| get_errno
|
||
|
| call_rrp extern push_cdata, L_ARG, 0, mbr_ct
|
||
|
|
|
||
|
| // ((complex_float*) cdata) = val
|
||
|
|.if X64
|
||
|
| mov rcx, [rsp+32]
|
||
|
| mov [rax], rcx
|
||
|
|.else
|
||
|
| mov ecx, [rsp+32]
|
||
|
| mov [rax], ecx
|
||
|
| mov ecx, [rsp+36]
|
||
|
| mov [rax+4], ecx
|
||
|
|.endif
|
||
|
|
|
||
|
| jmp ->lua_return_arg
|
||
|
break;
|
||
|
|
||
|
case COMPLEX_DOUBLE_TYPE:
|
||
|
num_upvals++;
|
||
|
|.if X64
|
||
|
| // complex doubles are returned as xmm0 and xmm1
|
||
|
| movq qword [rsp+40], xmm1
|
||
|
| movq qword [rsp+32], xmm0
|
||
|
|
|
||
|
| get_errno
|
||
|
| call_rrp extern push_cdata, L_ARG, 0, mbr_ct
|
||
|
|
|
||
|
| // ((complex_double*) cdata)->real = val0
|
||
|
| // ((complex_double*) cdata)->imag = val1
|
||
|
| mov rcx, [rsp+40]
|
||
|
| mov [rax+8], rcx
|
||
|
| mov rcx, [rsp+32]
|
||
|
| mov [rax], rcx
|
||
|
|
|
||
|
|.else
|
||
|
| // On 32 bit we have already handled this by pushing a new cdata
|
||
|
| // and handing the cdata ptr in as the hidden first param, but
|
||
|
| // still need to add mbr_ct as an upval as its used earlier.
|
||
|
| // Hidden param was popped by called function, we need to realign.
|
||
|
| sub rsp, 4
|
||
|
| get_errno
|
||
|
|.endif
|
||
|
|
|
||
|
| jmp ->lua_return_arg
|
||
|
break;
|
||
|
|
||
|
case VOID_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
| jmp ->lua_return_void
|
||
|
break;
|
||
|
|
||
|
case BOOL_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
| jmp ->lua_return_bool
|
||
|
break;
|
||
|
|
||
|
case INT8_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
if (mbr_ct->is_unsigned) {
|
||
|
| movzx eax, al
|
||
|
} else {
|
||
|
| movsx eax, al
|
||
|
}
|
||
|
| jmp ->lua_return_int
|
||
|
break;
|
||
|
|
||
|
case INT16_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
if (mbr_ct->is_unsigned) {
|
||
|
| movzx eax, ax
|
||
|
} else {
|
||
|
| movsx eax, ax
|
||
|
}
|
||
|
| jmp ->lua_return_int
|
||
|
break;
|
||
|
|
||
|
case INT32_TYPE:
|
||
|
case ENUM_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
if (mbr_ct->is_unsigned) {
|
||
|
| jmp ->lua_return_uint
|
||
|
} else {
|
||
|
| jmp ->lua_return_int
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case FLOAT_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
|.if X64
|
||
|
| cvtss2sd xmm0, xmm0
|
||
|
|.endif
|
||
|
| jmp ->lua_return_double
|
||
|
break;
|
||
|
|
||
|
case DOUBLE_TYPE:
|
||
|
lua_pop(L, 1);
|
||
|
| jmp ->lua_return_double
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
luaL_error(L, "NYI: call return type");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
assert(lua_gettop(L) == top + num_upvals);
|
||
|
{
|
||
|
cfunction f = compile(Dst, L, func, LUA_NOREF);
|
||
|
/* add a callback as an upval so that the jitted code gets cleaned up when
|
||
|
* the function gets gc'd */
|
||
|
push_callback(L, f);
|
||
|
lua_pushcclosure(L, (lua_CFunction) f, num_upvals+1);
|
||
|
}
|
||
|
}
|
||
|
|