From 14a234f3ed5b654b416e3fa92dbc684cf482681c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Madar=C3=A1sz?= Date: Fri, 12 Apr 2024 21:19:31 +0200 Subject: [PATCH] tools: luaffi --- tools/luaffi/MAKE.bat | 118 ++ tools/luaffi/Makefile | 54 + tools/luaffi/README.md | 112 + tools/luaffi/call.c | 280 +++ tools/luaffi/call_arm.dasc | 639 ++++++ tools/luaffi/call_x86.dasc | 1594 ++++++++++++++ tools/luaffi/ctype.c | 268 +++ tools/luaffi/dynasm/dasm_arm.h | 455 ++++ tools/luaffi/dynasm/dasm_arm.lua | 952 +++++++++ tools/luaffi/dynasm/dasm_ppc.h | 408 ++++ tools/luaffi/dynasm/dasm_ppc.lua | 1225 +++++++++++ tools/luaffi/dynasm/dasm_proto.h | 83 + tools/luaffi/dynasm/dasm_x64.lua | 12 + tools/luaffi/dynasm/dasm_x86.h | 470 +++++ tools/luaffi/dynasm/dasm_x86.lua | 1931 +++++++++++++++++ tools/luaffi/dynasm/dynasm.lua | 1096 ++++++++++ tools/luaffi/ffi.c | 3334 ++++++++++++++++++++++++++++++ tools/luaffi/ffi.h | 450 ++++ tools/luaffi/parser.c | 2552 +++++++++++++++++++++++ tools/luaffi/test.c | 677 ++++++ tools/luaffi/test.lua | 890 ++++++++ 21 files changed, 17600 insertions(+) create mode 100644 tools/luaffi/MAKE.bat create mode 100644 tools/luaffi/Makefile create mode 100644 tools/luaffi/README.md create mode 100644 tools/luaffi/call.c create mode 100644 tools/luaffi/call_arm.dasc create mode 100644 tools/luaffi/call_x86.dasc create mode 100644 tools/luaffi/ctype.c create mode 100644 tools/luaffi/dynasm/dasm_arm.h create mode 100644 tools/luaffi/dynasm/dasm_arm.lua create mode 100644 tools/luaffi/dynasm/dasm_ppc.h create mode 100644 tools/luaffi/dynasm/dasm_ppc.lua create mode 100644 tools/luaffi/dynasm/dasm_proto.h create mode 100644 tools/luaffi/dynasm/dasm_x64.lua create mode 100644 tools/luaffi/dynasm/dasm_x86.h create mode 100644 tools/luaffi/dynasm/dasm_x86.lua create mode 100644 tools/luaffi/dynasm/dynasm.lua create mode 100644 tools/luaffi/ffi.c create mode 100644 tools/luaffi/ffi.h create mode 100644 tools/luaffi/parser.c create mode 100644 tools/luaffi/test.c create mode 100644 tools/luaffi/test.lua diff --git a/tools/luaffi/MAKE.bat b/tools/luaffi/MAKE.bat new file mode 100644 index 0000000..4df4645 --- /dev/null +++ b/tools/luaffi/MAKE.bat @@ -0,0 +1,118 @@ +@echo off +cd /d "%~dp0" + +if "%1"=="tidy" ( + del *.exe 1>nul 2>nul + del *.dll 1>nul 2>nul + del *.lib 1>nul 2>nul + del *.obj 1>nul 2>nul + del *.exp 1>nul 2>nul + del *.ilk 1>nul 2>nul + del *.pdb 1>nul 2>nul + del *.def 1>nul 2>nul + del call_*.h 1>nul 2>nul + rd /q /s lua 1>nul 2>nul + exit /b +) + +if "%1"=="dll" ( + + if not exist lua git clone https://github.com/lua/lua + if not exist lua.dll ( + cl lua\onelua.c -Ilua /Felua.exe + + cl lua\onelua.c -Ilua /Felua.dll /DLUA_BUILD_AS_DLL /LD + echo LIBRARY LUA > lua.def + echo EXPORTS >> lua.def + for /f "skip=19 tokens=4" %%A in ('dumpbin /exports lua.dll') do echo %%A >> lua.def + lib /def:lua.def /out:lua.lib /machine:x64 + ) + + lua dynasm\dynasm.lua -LNE -D X32WIN -o call_x86.h call_x86.dasc + lua dynasm\dynasm.lua -LNE -D X64 -o call_x64.h call_x86.dasc + lua dynasm\dynasm.lua -LNE -D X64 -D X64WIN -o call_x64win.h call_x86.dasc + lua dynasm\dynasm.lua -LNE -o call_arm.h call_arm.dasc + + set COMMON=/nologo /Zi /D_CRT_SECURE_NO_DEPRECATE /DLUA_FFI_BUILD_AS_DLL + + if "%1"=="rel" ( + set OPTIONS=/MD /Ox %COMMON% + ) else ( + set OPTIONS=/MDd /Od %COMMON% + ) + + CL %OPTIONS% /I. /Ilua /DLUA_DLL_NAME="lua.dll" call.c ctype.c ffi.c parser.c lua.lib /Feffi.dll /LD + + CL %OPTIONS% /Gd test.c /Fe"test_cdecl.dll" /LD + CL %OPTIONS% /Gz test.c /Fe"test_stdcall.dll" /LD + CL %OPTIONS% /Gr test.c /Fe"test_fastcall.dll" /LD + + lua test.lua + + del *.exp *.ilk *.pdb *.obj *.manifest 2> nul 1> nul + + exit /b +) + +if "%1"=="3rd" ( + + echo #ifndef LUAFFI_H > 3rd_luaffi.h + echo #define LUAFFI_H >> 3rd_luaffi.h + echo. >> 3rd_luaffi.h + + type ffi.h >> 3rd_luaffi.h + echo. >> 3rd_luaffi.h + + type "dynasm\dasm_proto.h" >> 3rd_luaffi.h + echo. >> 3rd_luaffi.h + + echo #endif >> 3rd_luaffi.h + echo. >> 3rd_luaffi.h + + + echo #ifdef LUAFFI_C >> 3rd_luaffi.h + echo. >> 3rd_luaffi.h + + echo static cfunction compile(Dst_DECL, lua_State* L, cfunction func, int ref^); >> 3rd_luaffi.h + echo. >> 3rd_luaffi.h + + echo #if defined __arm__ ^|^| defined __arm ^|^| defined __ARM__ ^|^| defined __ARM ^|^| defined ARM ^|^| defined _ARM_ ^|^| defined ARMV4I ^|^| defined _M_ARM >> 3rd_luaffi.h + type "dynasm\dasm_arm.h" >> 3rd_luaffi.h + echo #else >> 3rd_luaffi.h + type "dynasm\dasm_x86.h" >> 3rd_luaffi.h + echo #endif >> 3rd_luaffi.h + echo. >> 3rd_luaffi.h + + echo #if defined __arm__ ^|^| defined __arm ^|^| defined __ARM__ ^|^| defined __ARM ^|^| defined ARM ^|^| defined _ARM_ ^|^| defined ARMV4I ^|^| defined _M_ARM >> 3rd_luaffi.h + type "call_arm.h" >> 3rd_luaffi.h + echo #elif defined _WIN64 >> 3rd_luaffi.h + type "call_x64win.h" >> 3rd_luaffi.h + echo #elif defined __amd64__ >> 3rd_luaffi.h + type "call_x64.h" >> 3rd_luaffi.h + echo #else >> 3rd_luaffi.h + type "call_x86.h" >> 3rd_luaffi.h + echo #endif >> 3rd_luaffi.h + echo. >> 3rd_luaffi.h + + type call.c >> 3rd_luaffi.h + type ctype.c >> 3rd_luaffi.h + type parser.c >> 3rd_luaffi.h + type ffi.c >> 3rd_luaffi.h + echo. >> 3rd_luaffi.h + + echo #endif >> 3rd_luaffi.h + echo. >> 3rd_luaffi.h + + ..\fart -- 3rd_luaffi.h "#pragma once" "//#pragma once" + ..\fart -- 3rd_luaffi.h "#include \"" "//#include \"" + ..\fart -- 3rd_luaffi.h "# include" "//# include" + ..\fart -- 3rd_luaffi.h "dasm_State*" "struct dasm_State*" + ..\fart -- 3rd_luaffi.h "EXPORT" "LUAFFI_EXPORT" + ..\fart -- 3rd_luaffi.h "ALIGN_UP" "LUAFFI_ALIGN_UP" + ..\fart -- 3rd_luaffi.h "get_int" "LUAFFI_get_int" +) + +if not exist "..\fart.exe" echo ..\fart.exe not found && exit /b +if not exist "call_x86.h" call make dll +if not exist "3rd_luaffi.h" call make 3rd +if exist "3rd_luaffi.h" move /y 3rd_luaffi.h ..\..\engine\split && call make tidy diff --git a/tools/luaffi/Makefile b/tools/luaffi/Makefile new file mode 100644 index 0000000..181af30 --- /dev/null +++ b/tools/luaffi/Makefile @@ -0,0 +1,54 @@ +.PHONY: all clean test + +PKG_CONFIG=pkg-config +LUA=lua + +#LUA_CFLAGS=`$(PKG_CONFIG) --cflags lua5.2 2>/dev/null || $(PKG_CONFIG) --cflags lua` +LUA_CFLAGS=-I$(PWD)/../lua-5.2.4/src +SOCFLAGS=-fPIC +SOCC=$(CC) -shared $(SOCFLAGS) +CFLAGS=-fPIC -g -Wall -Werror $(LUA_CFLAGS) -fvisibility=hidden -Wno-unused-function --std=gnu99 + +MODNAME=ffi +MODSO=$(MODNAME).so + +all: + if [ `uname` = "Darwin" ]; then $(MAKE) macosx; else $(MAKE) posix; fi + +test: + if [ `uname` = "Darwin" ]; then $(MAKE) test_macosx; else $(MAKE) test_posix; fi + +macosx: + $(MAKE) posix "SOCC=MACOSX_DEPLOYMENT_TARGET=10.3 $(CC) -dynamiclib -single_module -undefined dynamic_lookup $(SOCFLAGS)" + +test_macosx: + $(MAKE) test_posix "SOCC=MACOSX_DEPLOYMENT_TARGET=10.3 $(CC) -dynamiclib -single_module -undefined dynamic_lookup $(SOCFLAGS)" + +posix: $(MODSO) test_cdecl.so + +clean: + rm -f *.o *.so call_*.h + +call_x86.h: call_x86.dasc dynasm/*.lua + $(LUA) dynasm/dynasm.lua -LN -o $@ $< + +call_x64.h: call_x86.dasc dynasm/*.lua + $(LUA) dynasm/dynasm.lua -D X64 -LN -o $@ $< + +call_x64win.h: call_x86.dasc dynasm/*.lua + $(LUA) dynasm/dynasm.lua -D X64 -D X64WIN -LN -o $@ $< + +%.o: %.c *.h dynasm/*.h call_x86.h call_x64.h call_x64win.h + $(CC) $(CFLAGS) -o $@ -c $< + +$(MODSO): ffi.o ctype.o parser.o call.o + $(SOCC) $^ -o $@ + +test_cdecl.so: test.o + $(SOCC) $^ -o $@ + +test_posix: test_cdecl.so $(MODSO) + LD_LIBRARY_PATH=./ $(LUA) test.lua + + + diff --git a/tools/luaffi/README.md b/tools/luaffi/README.md new file mode 100644 index 0000000..873a120 --- /dev/null +++ b/tools/luaffi/README.md @@ -0,0 +1,112 @@ +About +----- +This is a library for calling C function and manipulating C types from lua. It +is designed to be interface compatible with the FFI library in luajit (see +http://luajit.org/ext_ffi.html). It can parse C function declarations and +struct definitions that have been directly copied out of C header files and +into lua source as a string. + +License +------- +Copyright (c) 2011 James R. McKaskill. +MIT same as Lua 5.1. See full license text in ffi.h. + +Source +------ +https://github.com/jmckaskill/luaffi + +Platforms +--------- +Currently supported: +- windows x86/x64 +- linux x86/x64 +- windows CE ARM little endian (ARMv4+) +- OSX x86/x64 + +Currently only dll builds are supported (ie no static). + +Runs with both Lua 5.1 and Lua 5.2 beta. + +Build +----- + +On windows use msvcbuild.bat in a visual studio cmd prompt. Available targets are: +- nothing or release: default release build +- debug: debug build +- test: build and run the test debug build +- test-release: build and run the test release build +- clean: cleanup object files + +Edit msvcbuild.bat if your lua exe, lib, lua include path, or lua dll name +differ from c:\Lua5.1 and lua5.1.dll. + +The build script does not build for CE as this is non-trivial and very +dependent on which CE profile (or even a custom one). Instead to build on CE, +add generate_call_h.bat as a pre-build event and then build *.c with UNDER_CE +defined plus whatever defines windows.h requires. + +On posix use make. Available targets are: +- nothing or all: default release build +- debug: debug build +- test: build and run the test build +- clean: cleanup object files +- macosx: release build for Mac OSX + +Edit the Makefile if your lua exe differs from `lua5.1` or if you can't get +the include and lib arguments from pkg-config. + +Known Issues +------------ +- Has not been bullet proof tested +- Casting is different from luajit. For the moment this follows C++ + - ffi.cast is equivalent to a C cast in C++ (T t = (T) f) + - ffi.new and ctype() is equivalent to an implicit cast in C++ (T t = f) + - since this follows C++ semantics void* does not cast to T* (an explicit + cast using ffi.cast is required) +- Comparing a ctype pointer to nil doesn't work the same as luajit. This is + unfixable with the current metamethod semantics. Instead use ffi.C.NULL +- Constant expressions can't handle non integer intermediate values (eg + offsetof won't work because it manipulates pointers) +- Not all metamethods work with lua 5.1 (eg char* + number). This is due to + the way metamethods are looked up with mixed types in Lua 5.1. If you need +this upgrade to Lua 5.2 or use boxed numbers (uint64_t and uintptr_t). +- All bitfields are treated as unsigned (does anyone even use signed + bitfields?). Note that "int s:8" is unsigned on unix x86/x64, but signed on +windows. + +Todo +---- +See Github issues for the most up to date list. +- Fix arm support - broken since the callback refactor +- Vectors +- C++ reference types +- Subtracting one pointer from another +- Variable sized members in unions (is this needed?) + +How it works +------------ +Types are represented by a struct ctype structure and an associated user value +table. The table is shared between all related types for structs, unions, and +functions. It's members have the types of struct members, function argument +types, etc. The struct ctype structure then contains the modifications from +the base type (eg number of pointers, array size, etc). + +Types are pushed into lua as a userdata containing the struct ctype with a +user value (or fenv in 5.1) set to the shared type table. + +Boxed cdata types are pushed into lua as a userdata containing the struct +cdata structure (which contains the struct ctype of the data as its header) +followed by the boxed data. + +The functions in ffi.c provide the cdata and ctype metatables and ffi.* +functions which manipulate these two types. + +C functions (and function pointers) are pushed into lua as a lua c function +with the function pointer cdata as the first upvalue. The actual code is JITed +using dynasm (see call_x86.dasc). The JITed code does the following in order: +1. Calls the needed unpack functions in ffi.c placing each argument on the HW stack +2. Updates errno +3. Performs the c call +4. Retrieves errno +5. Pushes the result back into lua from the HW register or stack + diff --git a/tools/luaffi/call.c b/tools/luaffi/call.c new file mode 100644 index 0000000..36f9aab --- /dev/null +++ b/tools/luaffi/call.c @@ -0,0 +1,280 @@ +/* vim: ts=4 sw=4 sts=4 et tw=78 + * Copyright (c) 2011 James R. McKaskill. See license in ffi.h + */ +#include "ffi.h" + +static cfunction compile(Dst_DECL, lua_State* L, cfunction func, int ref); + +static void* reserve_code(struct jit* jit, lua_State* L, size_t sz); +static void commit_code(struct jit* jit, void* p, size_t sz); + +static void push_int(lua_State* L, int val) +{ lua_pushnumber(L, val); } + +static void push_uint(lua_State* L, unsigned int val) +{ lua_pushnumber(L, val); } + +static void push_float(lua_State* L, float val) +{ lua_pushnumber(L, val); } + +#ifndef _WIN32 +static int GetLastError(void) +{ return errno; } +static void SetLastError(int err) +{ errno = err; } +#endif + +#ifdef NDEBUG +#define shred(a,b,c) +#else +#define shred(p,s,e) memset((uint8_t*)(p)+(s),0xCC,(e)-(s)) +#endif + + +#ifdef _WIN64 +#include "dynasm/dasm_x86.h" +#include "call_x64win.h" +#elif defined __amd64__ +#include "dynasm/dasm_x86.h" +#include "call_x64.h" +#elif defined __arm__ || defined __arm || defined __ARM__ || defined __ARM || defined ARM || defined _ARM_ || defined ARMV4I || defined _M_ARM +#include "dynasm/dasm_arm.h" +#include "call_arm.h" +#else +#include "dynasm/dasm_x86.h" +#include "call_x86.h" +#endif + +struct jit_head { + size_t size; + int ref; + uint8_t jump[JUMP_SIZE]; +}; + +#define LINKTABLE_MAX_SIZE (sizeof(extnames) / sizeof(extnames[0]) * (JUMP_SIZE)) + +static cfunction compile(struct jit* jit, lua_State* L, cfunction func, int ref) +{ + struct jit_head* code; + size_t codesz; + int err; + + dasm_checkstep(jit, -1); + if ((err = dasm_link(jit, &codesz)) != 0) { + char buf[32]; + sprintf(buf, "%x", err); + luaL_error(L, "dasm_link error %s", buf); + } + + codesz += sizeof(struct jit_head); + code = (struct jit_head*) reserve_code(jit, L, codesz); + code->ref = ref; + code->size = codesz; + compile_extern_jump(jit, L, func, code->jump); + + if ((err = dasm_encode(jit, code+1)) != 0) { + char buf[32]; + sprintf(buf, "%x", err); + commit_code(jit, code, 0); + luaL_error(L, "dasm_encode error %s", buf); + } + + commit_code(jit, code, codesz); + return (cfunction) (code+1); +} + +typedef uint8_t jump_t[JUMP_SIZE]; + +int get_extern(struct jit* jit, uint8_t* addr, int idx, int type) +{ + struct page* page = jit->pages[jit->pagenum-1]; + jump_t* jumps = (jump_t*) (page+1); + struct jit_head* h = (struct jit_head*) ((uint8_t*) page + page->off); + uint8_t* jmp; + ptrdiff_t off; + + if (idx == jit->function_extern) { + jmp = h->jump; + } else { + jmp = jumps[idx]; + } + + /* compensate for room taken up for the offset so that we can work rip + * relative */ + addr += BRANCH_OFF; + + /* see if we can fit the offset in the branch displacement, if not use the + * jump instruction */ + off = *(uint8_t**) jmp - addr; + + if (MIN_BRANCH <= off && off <= MAX_BRANCH) { + return (int32_t) off; + } else { + return (int32_t)(jmp + sizeof(uint8_t*) - addr); + } +} + +#if LUA_VERSION_NUM >= 503 +LUA_API void lua_remove_compat (lua_State *L, int idx) { + lua_remove(L, idx); +} +#endif + +static void* reserve_code(struct jit* jit, lua_State* L, size_t sz) +{ + struct page* page; + size_t off = (jit->pagenum > 0) ? jit->pages[jit->pagenum-1]->off : 0; + size_t size = (jit->pagenum > 0) ? jit->pages[jit->pagenum-1]->size : 0; + + if (off + sz >= size) { + int i; + uint8_t* pdata; + cfunction func; + + /* need to create a new page */ + jit->pages = (struct page**) realloc(jit->pages, (++jit->pagenum) * sizeof(jit->pages[0])); + + size = ALIGN_UP(sz + LINKTABLE_MAX_SIZE + sizeof(struct page), jit->align_page_size); + + page = (struct page*) AllocPage(size); + jit->pages[jit->pagenum-1] = page; + pdata = (uint8_t*) page; + page->size = size; + page->off = sizeof(struct page); + + lua_newtable(L); + +#define ADDFUNC(DLL, NAME) \ + lua_pushliteral(L, #NAME); \ + func = DLL ? (cfunction) GetProcAddressA(DLL, #NAME) : NULL; \ + func = func ? func : (cfunction) &NAME; \ + lua_pushcfunction(L, (lua_CFunction) func); \ + lua_rawset(L, -3) + + ADDFUNC(NULL, check_double); + ADDFUNC(NULL, check_float); + ADDFUNC(NULL, check_uint64); + ADDFUNC(NULL, check_int64); + ADDFUNC(NULL, check_int32); + ADDFUNC(NULL, check_uint32); + ADDFUNC(NULL, check_uintptr); + ADDFUNC(NULL, check_enum); + ADDFUNC(NULL, check_typed_pointer); + ADDFUNC(NULL, check_typed_cfunction); + ADDFUNC(NULL, check_complex_double); + ADDFUNC(NULL, check_complex_float); + ADDFUNC(NULL, unpack_varargs_stack); + ADDFUNC(NULL, unpack_varargs_stack_skip); + ADDFUNC(NULL, unpack_varargs_reg); + ADDFUNC(NULL, unpack_varargs_float); + ADDFUNC(NULL, unpack_varargs_int); + ADDFUNC(NULL, push_cdata); + ADDFUNC(NULL, push_int); + ADDFUNC(NULL, push_uint); + ADDFUNC(NULL, push_float); + ADDFUNC(jit->kernel32_dll, SetLastError); + ADDFUNC(jit->kernel32_dll, GetLastError); + ADDFUNC(jit->lua_dll, luaL_error); + ADDFUNC(jit->lua_dll, lua_pushnumber); + ADDFUNC(jit->lua_dll, lua_pushboolean); + ADDFUNC(jit->lua_dll, lua_gettop); + ADDFUNC(jit->lua_dll, lua_rawgeti); + ADDFUNC(jit->lua_dll, lua_pushnil); + ADDFUNC(jit->lua_dll, lua_callk); + ADDFUNC(jit->lua_dll, lua_settop); +#if LUA_VERSION_NUM >= 503 + lua_pushliteral(L, "lua_remove"); + lua_pushcfunction(L, (lua_CFunction) lua_remove_compat); + lua_rawset(L, -3); +#else + ADDFUNC(jit->lua_dll, lua_remove); +#endif +#undef ADDFUNC + + for (i = 0; extnames[i] != NULL; i++) { + + if (strcmp(extnames[i], "FUNCTION") == 0) { + shred(pdata + page->off, 0, JUMP_SIZE); + jit->function_extern = i; + + } else { + lua_getfield(L, -1, extnames[i]); + func = (cfunction) lua_tocfunction(L, -1); + + if (func == NULL) { + luaL_error(L, "internal error: missing link for %s", extnames[i]); + } + + compile_extern_jump(jit, L, func, pdata + page->off); + lua_pop(L, 1); + } + + page->off += JUMP_SIZE; + } + + page->freed = page->off; + lua_pop(L, 1); + + } else { + page = jit->pages[jit->pagenum-1]; + EnableWrite(page, page->size); + } + + return (uint8_t*) page + page->off; +} + +static void commit_code(struct jit* jit, void* code, size_t sz) +{ + struct page* page = jit->pages[jit->pagenum-1]; + page->off += sz; + EnableExecute(page, page->size); + { +#if 0 + FILE* out = fopen("\\Hard Disk\\out.bin", "wb"); + fwrite(page, page->off, 1, out); + fclose(out); +#endif + } +} + +/* push_func_ref pushes a copy of the upval table embedded in the compiled + * function func. + */ +void push_func_ref(lua_State* L, cfunction func) +{ + struct jit_head* h = ((struct jit_head*) func) - 1; + lua_rawgeti(L, LUA_REGISTRYINDEX, h->ref); +} + +void free_code(struct jit* jit, lua_State* L, cfunction func) +{ + size_t i; + struct jit_head* h = ((struct jit_head*) func) - 1; + for (i = 0; i < jit->pagenum; i++) { + struct page* p = jit->pages[i]; + + if ((uint8_t*) h < (uint8_t*) p || (uint8_t*) p + p->size <= (uint8_t*) h) { + continue; + } + + luaL_unref(L, LUA_REGISTRYINDEX, h->ref); + + EnableWrite(p, p->size); + p->freed += h->size; + + shred(h, 0, h->size); + + if (p->freed < p->off) { + EnableExecute(p, p->size); + return; + } + + FreePage(p, p->size); + memmove(&jit->pages[i], &jit->pages[i+1], (jit->pagenum - (i+1)) * sizeof(jit->pages[0])); + jit->pagenum--; + return; + } + + assert(!"couldn't find func in the jit pages"); +} + diff --git a/tools/luaffi/call_arm.dasc b/tools/luaffi/call_arm.dasc new file mode 100644 index 0000000..2d54863 --- /dev/null +++ b/tools/luaffi/call_arm.dasc @@ -0,0 +1,639 @@ +/* vim: ts=4 sw=4 sts=4 et tw=78 + * Copyright (c) 2011 James R. McKaskill. See license in ffi.h + */ +|.arch arm +|.actionlist build_actionlist +|.globalnames globnames +|.externnames extnames + +#define JUMP_SIZE 8 +#define MIN_BRANCH ((INT32_MIN) >> 8) +#define MAX_BRANCH ((INT32_MAX) >> 8) +#define BRANCH_OFF 4 + +static void compile_extern_jump(struct jit* jit, lua_State* L, function_t func, uint8_t* code) +{ + /* The jump code is the function pointer followed by a stub to call the + * function pointer. The stub exists so we can jump to functions with an + * offset greater than 32MB. + * + * Note we have to manually set this up since there are commands buffered + * in the jit state. + */ + *(function_t*) code = func; + /* ldr pc, [pc - 12] */ + *(uint32_t*) &code[4] = 0xE51FF00CU; +} + +|.define TOP, r4 +|.define L_ARG, r5 +|.define DATA, r6 +|.define DATA2, r7 + +|.macro load32, reg, val +| ldr reg, [pc] +| b >5 +|.long val +|5: +|.endmacro + +|.macro lcall, func +| mov r0, L_ARG +| bl func +|.endmacro + +void compile_globals(struct jit* jit, lua_State* L) +{ + (void) jit; +} + +function_t push_callback(struct jit* jit, lua_State* L, int fidx, int ct_usr, const struct ctype* ct) +{ + struct jit* Dst = jit; + int i, nargs, num_upvals, ref; + const struct ctype* mt; + + int top = lua_gettop(L); + + ct_usr = lua_absindex(L, ct_usr); + fidx = lua_absindex(L, fidx); + nargs = (int) lua_rawlen(L, ct_usr); + + dasm_setup(Dst, build_actionlist); + + lua_newtable(L); + lua_pushvalue(L, -1); + ref = luaL_ref(L, LUA_REGISTRYINDEX); + num_upvals = 0; + + if (ct->has_var_arg) { + luaL_error(L, "can't create callbacks with varargs"); + } + + /* prolog and get the upval table */ + | mov r12, sp + | push {r0, r1, r2, r3} // do this first so that r0-r3 is right before stack bound arguments + | push {TOP, L_ARG, DATA, DATA2, r12, lr} + | sub DATA, r12, #16 // points to r0 on stack + | ldr L_ARG, [pc, #8] + | ldr r2, [pc, #8] + | ldr r1, [pc, #8] + | b >1 + |.long L, ref, LUA_REGISTRYINDEX + |1: + | lcall extern lua_rawgeti + + /* get the lua function */ + lua_pushvalue(L, fidx); + lua_rawseti(L, -2, ++num_upvals); + | mov r2, #num_upvals + | mvn r1, #0 // -1 + | lcall extern lua_rawgeti + + for (i = 1; i <= nargs; i++) { + lua_rawgeti(L, ct_usr, i); + mt = (const struct ctype*) lua_touserdata(L, -1); + + if (mt->pointers) { + lua_getuservalue(L, -1); + lua_rawseti(L, -3, ++num_upvals); /* usr value */ + lua_rawseti(L, -2, ++num_upvals); /* mt */ + + | mov r2, #num_upvals-1 // usr value + | mvn r1, #i // -i-1, stack is upval table, func, i-1 args + | lcall extern lua_rawgeti + | load32 r2, mt + | mvn r1, #0 // -1 + | lcall extern push_cdata + | ldr r2, [DATA], #4 + | str r2, [r0] + | mvn r1, #1 // -2 + | lcall extern lua_remove // remove the usr value + + } else { + switch (mt->type) { + case INT64_TYPE: + case UINT64_TYPE: + lua_rawseti(L, -2, ++num_upvals); /* mt */ + | lcall extern lua_pushnil + | load32 r2, mt + | mvn r1, #0 // -1 + | lcall extern push_cdata + | ldr r2, [DATA], #4 + | ldr r3, [DATA], #4 + | str r2, [r0] + | str r3, [r0, #4] + | mvn r1, #1 // -2 + | lcall extern lua_remove // remove the nil usr + break; + + case UINTPTR_TYPE: + lua_rawseti(L, -2, ++num_upvals); /* mt */ + | lcall extern lua_pushnil + | load32 r2, mt + | mvn r1, #0 // -1 + | lcall extern push_cdata + | ldr r2, [DATA], #4 + | str r2, [r0] + | mvn r1, #1 // -2 + | lcall extern lua_remove // remove the nil usr + break; + + case BOOL_TYPE: + lua_pop(L, 1); + | ldr r1, [DATA], #4 + | lcall extern lua_pushboolean + break; + + case INT8_TYPE: + lua_pop(L, 1); + | ldr r1, [DATA], #4 + | mov r1, r1, lsl #24 + | mov r1, r1, asr #24 + | lcall extern push_int + break; + + case UINT8_TYPE: + lua_pop(L, 1); + | ldr r1, [DATA], #4 + | and r1, r1, #0xFF + | lcall extern push_uint + break; + + case INT16_TYPE: + lua_pop(L, 1); + | ldr r1, [DATA], #4 + | mov r1, r1, lsl #16 + | mov r1, r1, asr #16 + | lcall extern push_int + break; + + case UINT16_TYPE: + lua_pop(L, 1); + | ldr r1, [DATA], #4 + | mov r1, r1, lsl #16 + | mov r1, r1, lsr #16 + | lcall extern push_uint + break; + + case ENUM_TYPE: + case INT32_TYPE: + lua_pop(L, 1); + | ldr r1, [DATA], #4 + | lcall extern push_int + break; + + case UINT32_TYPE: + lua_pop(L, 1); + | ldr r1, [DATA], #4 + | lcall extern push_uint + break; + + case FLOAT_TYPE: + lua_pop(L, 1); + | ldr r1, [DATA], #4 + | lcall extern push_float + break; + + case DOUBLE_TYPE: + lua_pop(L, 1); + | ldmia DATA!, {r1, r2} + | lcall extern lua_pushnumber + break; + + default: + luaL_error(L, "NYI: callback arg type"); + } + } + } + + lua_rawgeti(L, ct_usr, 0); + mt = (const struct ctype*) lua_touserdata(L, -1); + + | mov r3, #0 + | mov r2, #((mt->pointers || mt->type != VOID_TYPE) ? 1 : 0) + | mov r1, #nargs + | lcall extern lua_callk + + if (mt->pointers) { + lua_getuservalue(L, -1); + lua_rawseti(L, -3, ++num_upvals); /* usr value */ + lua_rawseti(L, -2, ++num_upvals); /* mt */ + + | mov r2, #num_upvals-1 // usr value + | mvn r1, #1 // -2 stack is (upval table, ret val) + | lcall extern lua_rawgeti + | load32 r3, mt + | mov r2, #0 // -1 - ct_usr + | mvn r1, #1 // -2 - val + | lcall extern to_typed_pointer + | mov DATA, r0 + | mvn r1, #3 // -4 - remove 3 (upval table, ret val, usr value) + | lcall extern lua_settop + | mov r0, DATA + } else { + switch (mt->type) { + case ENUM_TYPE: + lua_getuservalue(L, -1); + lua_rawseti(L, -3, ++num_upvals); /* usr value */ + lua_rawseti(L, -2, ++num_upvals); /* mt */ + + | mov r2, #num_upvals-1 // usr value + | mvn r1, #1 // -2 stack is (upval table, ret val) + | lcall extern lua_rawgeti + | load32 r3, mt + | mvn r2, #0 // -1 - ct_usr + | mvn r1, #1 // -2 - val + | lcall extern to_enum + | mov DATA, r0 + | mvn r1, #3 // -4 - remove 3 (upval table, ret val, usr value) + | lcall extern lua_settop + | mov r0, DATA + break; + + case VOID_TYPE: + | mvn r1, #1 // -2 + | lcall extern lua_settop + lua_pop(L, 1); + break; + + case BOOL_TYPE: + case INT8_TYPE: + case INT16_TYPE: + case INT32_TYPE: + | mvn r1, #0 // -1 + | lcall extern to_int32 + goto single; + + case UINT8_TYPE: + case UINT16_TYPE: + case UINT32_TYPE: + | mvn r1, #0 // -1 + | lcall extern to_uint32 + goto single; + + case INT64_TYPE: + | mvn r1, #0 // -1 + | lcall extern to_int64 + goto dual; + + case UINT64_TYPE: + | mvn r1, #0 // -1 + | lcall extern to_uint64 + goto dual; + + case UINTPTR_TYPE: + | mvn r1, #0 // -1 + | lcall extern to_uintptr + goto single; + + case FLOAT_TYPE: + | mvn r1, #0 // -1 + | lcall extern to_float + goto single; + + case DOUBLE_TYPE: + | mvn r1, #0 // -1 + | lcall extern to_double + goto dual; + + single: + | mov DATA, r0 + | mvn r1, #2 // -3 + | lcall extern lua_settop + | mov r0, DATA + lua_pop(L, 1); + break; + + dual: + | mov DATA, r0 + | mov DATA2, r1 + | mvn r1, #2 // -3 + | lcall extern lua_settop + | mov r0, DATA + | mov r1, DATA2 + lua_pop(L, 1); + break; + + default: + luaL_error(L, "NYI: callback return type"); + } + } + + | ldmia sp, {TOP, L_ARG, DATA, DATA2, sp, pc} + + lua_pop(L, 1); /* upval table - already in registry */ + assert(lua_gettop(L) == top); + + { + void* p; + struct ctype ft; + function_t func; + + func = compile(jit, L, NULL, ref); + + ft = *ct; + ft.is_jitted = 1; + p = push_cdata(L, ct_usr, &ft); + *(function_t*) p = func; + + assert(lua_gettop(L) == top + 1); + + return func; + } +} + +void push_function(struct jit* jit, lua_State* L, function_t func, int ct_usr, const struct ctype* ct) +{ + struct jit* Dst = jit; + int i, nargs, num_upvals; + const struct ctype* mt; + void* p; + + int top = lua_gettop(L); + + ct_usr = lua_absindex(L, ct_usr); + nargs = (int) lua_rawlen(L, ct_usr); + + p = push_cdata(L, ct_usr, ct); + *(function_t*) p = func; + num_upvals = 1; + + dasm_setup(Dst, build_actionlist); + + | mov r12, sp + | push {r0} + | push {TOP, L_ARG, DATA, DATA2, r11, r12, lr} + | sub r11, r12, #4 + | mov L_ARG, r0 + | lcall extern lua_gettop + | mov TOP, r0 + | cmp TOP, #nargs + | // these should really be in globals - but for some reason dynasm breaks when you do that + if (ct->has_var_arg) { + | bge >1 + | load32 r1, "too few arguments" + | lcall extern luaL_error + |1: + } else { + | beq >1 + | load32 r1, "incorrect number of arguments" + | lcall extern luaL_error + |1: + } + + /* reserve enough stack space for all of the arguments (8 bytes per + * argument for double and maintains alignment). Add an extra 16 bytes so + * that the pop {r0, r1, r2, r3} doesn't clean out our stack frame */ + | sub sp, sp, TOP, lsl #3 + | sub sp, sp, #16 + | mov DATA, sp + + for (i = 1; i <= nargs; i++) { + lua_rawgeti(L, ct_usr, i); + mt = (const struct ctype*) lua_touserdata(L, -1); + + if (mt->pointers || mt->type == FUNCTION_PTR_TYPE || mt->type == ENUM_TYPE) { + lua_getuservalue(L, -1); + num_upvals += 2; + + | ldr r3, [pc, #4] + | ldr r2, [pc, #4] + | b >1 + |.long mt, lua_upvalueindex(num_upvals) + |1: + | mov r1, #i + | mov r0, L_ARG + + if (mt->pointers) { + | bl extern to_typed_pointer + } else if (mt->type == FUNCTION_PTR_TYPE) { + | bl extern to_typed_function + } else if (mt->type == ENUM_TYPE) { + | bl extern to_enum + } + + | str r0, [DATA], #4 + + } else { + lua_pop(L, 1); + | mov r1, #i + + switch (mt->type) { + case INT8_TYPE: + | lcall extern to_int32 + | mov r0, r0, lsl #24 + | mov r0, r0, asr #24 + | str r0, [DATA], #4 + break; + + case INT16_TYPE: + | lcall extern to_int32 + | mov r0, r0, lsl #16 + | mov r0, r0, asr #16 + | str r0, [DATA], #4 + break; + + case INT32_TYPE: + | lcall extern to_int32 + | str r0, [DATA], #4 + break; + + case UINT8_TYPE: + | lcall extern to_uint32 + | and r0, r0, #0xFF + | str r0, [DATA], #4 + break; + + case UINT16_TYPE: + | lcall extern to_uint32 + | mov r0, r0, lsl #16 + | mov r0, r0, lsr #16 + | str r0, [DATA], #4 + break; + + case UINT32_TYPE: + | lcall extern to_uint32 + | str r0, [DATA], #4 + break; + + case INT64_TYPE: + | lcall extern to_int64 + | str r0, [DATA], #4 + | str r1, [DATA], #4 + break; + + case UINT64_TYPE: + | lcall extern to_uint64 + | str r0, [DATA], #4 + | str r1, [DATA], #4 + break; + + case DOUBLE_TYPE: + | lcall extern to_double + | str r0, [DATA], #4 + | str r1, [DATA], #4 + break; + + case UINTPTR_TYPE: + | lcall extern to_uintptr + | str r0, [DATA], #4 + break; + + case FLOAT_TYPE: + | lcall extern to_float + | str r0, [DATA], #4 + break; + + default: + luaL_error(L, "NYI: call arg type"); + } + } + } + + if (ct->has_var_arg) { + | mov r3, DATA + | mov r2, TOP + | mov r1, #nargs+1 + | lcall extern unpack_varargs_stack + } + + | load32 r0, &jit->last_errno + | ldr r0, [r0] + | bl extern SetLastError + + | pop {r0, r1, r2, r3} // this pop is balanced with the sub sp, #16 + | bl extern FUNCTION + + |.macro get_errno + | bl extern GetLastError + | load32 r1, &jit->last_errno + | str r0, [r1] + |.endmacro + + |.macro return + | ldmdb r11, {TOP, L_ARG, DATA, r11, sp, pc} + |.endmacro + + lua_rawgeti(L, ct_usr, 0); + mt = (const struct ctype*) lua_touserdata(L, -1); + + if (mt->pointers) { + lua_getuservalue(L, -1); + num_upvals += 2; + | mov DATA, r0 + | get_errno + | ldr r2, [pc, #4] + | ldr r1, [pc, #4] + | b >1 + |.long mt, lua_upvalueindex(num_upvals) + |1: + | lcall extern push_cdata + | str DATA, [r0] + | mov r0, #1 + | return + + } else { + switch (mt->type) { + case INT64_TYPE: + case UINT64_TYPE: + num_upvals++; + | mov DATA, r0 + | mov DATA2, r1 + | get_errno + | lcall extern lua_pushnil + | load32 r2, mt + | mvn r1, #0 // -1 + | lcall extern push_cdata + | str DATA, [r0] + | str DATA2, [r0, #4] + | mov r0, #1 + | return + break; + + case UINTPTR_TYPE: + num_upvals++; + | mov DATA, r0 + | get_errno + | lcall extern lua_pushnil + | load32 r2, mt + | mvn r1, #0 // -1 + | lcall extern push_cdata + | str DATA, [r0] + | mov r0, #1 + | return + break; + + case VOID_TYPE: + lua_pop(L, 1); + | get_errno + | mov r0, #0 + | return + break; + + case BOOL_TYPE: + lua_pop(L, 1); + | mov DATA, r0 + | get_errno + | mov r1, DATA + | lcall extern lua_pushboolean + | mov r0, #1 + | return + break; + + case INT8_TYPE: + case INT16_TYPE: + case INT32_TYPE: + case ENUM_TYPE: + lua_pop(L, 1); + | mov DATA, r0 + | get_errno + | mov r1, DATA + | lcall extern push_int + | mov r0, #1 + | return + break; + + case UINT8_TYPE: + case UINT16_TYPE: + case UINT32_TYPE: + lua_pop(L, 1); + | mov DATA, r0 + | get_errno + | mov r1, DATA + | lcall extern push_uint + | mov r0, #1 + | return + break; + + case FLOAT_TYPE: + lua_pop(L, 1); + | mov DATA, r0 + | get_errno + | mov r1, DATA + | lcall extern push_float + | mov r0, #1 + | return + break; + + case DOUBLE_TYPE: + lua_pop(L, 1); + | mov DATA, r0 + | mov DATA2, r1 + | get_errno + | mov r2, DATA2 + | mov r1, DATA + | lcall extern lua_pushnumber + | mov r0, #1 + | return + break; + + default: + luaL_error(L, "NYI: call return type"); + } + } + + assert(lua_gettop(L) == top + num_upvals); + lua_pushcclosure(L, (lua_CFunction) compile(jit, L, func, LUA_NOREF), num_upvals); +} + diff --git a/tools/luaffi/call_x86.dasc b/tools/luaffi/call_x86.dasc new file mode 100644 index 0000000..546bd1f --- /dev/null +++ b/tools/luaffi/call_x86.dasc @@ -0,0 +1,1594 @@ +/* vim: ts=4 sw=4 sts=4 et tw=78 + * Copyright (c) 2011 James R. McKaskill. See license in ffi.h + */ +|.if X64 +|.arch x64 +|.else +|.arch x86 +|.endif + +|.actionlist build_actionlist +|.globalnames globnames +|.externnames extnames + +|.if not X64 +|.define RET_H, edx // for int64_t returns +|.define RET_L, eax +|.endif + +|.if X64WIN +| +|.macro call_rrrp, func, arg0, arg1, arg2, arg3 +| mov64 r9, arg3 +| mov r8, arg2 +| mov rdx, arg1 +| mov rcx, arg0 +| call func +|.endmacro +|.macro call_rrrr, func, arg0, arg1, arg2, arg3 +| mov r9, arg3 +| mov r8, arg2 +| mov rdx, arg1 +| mov rcx, arg0 +| call func +|.endmacro +| +|.macro call_rrp, func, arg0, arg1, arg2 +| mov64 r8, arg2 +| mov rdx, arg1 +| mov rcx, arg0 +| call func +|.endmacro +|.macro call_rrr, func, arg0, arg1, arg2 +| mov r8, arg2 +| mov rdx, arg1 +| mov rcx, arg0 +| call func +|.endmacro +| +|.macro call_rp, func, arg0, arg1 +| mov64 rdx, arg1 +| mov rcx, arg0 +| call func +|.endmacro +|.macro call_rr, func, arg0, arg1 +| mov rdx, arg1 +| mov rcx, arg0 +| call func +|.endmacro +| +|.macro call_r, func, arg0 +| mov rcx, arg0 +| call func +|.endmacro +| +|.elif X64 +| +| // the 5 and 6 arg forms are only used on posix x64 +|.macro call_rrrrrr, func, arg0, arg1, arg2, arg3, arg4, arg5 +| mov r9, arg5 +| mov r8, arg4 +| mov rcx, arg3 +| mov rdx, arg2 +| mov rsi, arg1 +| mov rdi, arg0 +| call func +|.endmacro +|.macro call_rrrrr, func, arg0, arg1, arg2, arg3, arg4 +| mov r8, arg4 +| mov rcx, arg3 +| mov rdx, arg2 +| mov rsi, arg1 +| mov rdi, arg0 +| call func +|.endmacro +| +|.macro call_rrrp, func, arg0, arg1, arg2, arg3 +| mov64 rcx, arg3 +| mov rdx, arg2 +| mov rsi, arg1 +| mov rdi, arg0 +| call func +|.endmacro +|.macro call_rrrr, func, arg0, arg1, arg2, arg3 +| mov rcx, arg3 +| mov rdx, arg2 +| mov rsi, arg1 +| mov rdi, arg0 +| call func +|.endmacro +| +|.macro call_rrp, func, arg0, arg1, arg2 +| mov64 rdx, arg2 +| mov rsi, arg1 +| mov rdi, arg0 +| call func +|.endmacro +|.macro call_rrr, func, arg0, arg1, arg2 +| mov rdx, arg2 +| mov rsi, arg1 +| mov rdi, arg0 +| call func +|.endmacro +| +|.macro call_rp, func, arg0, arg1 +| mov64 rsi, arg1 +| mov rdi, arg0 +| call func +|.endmacro +|.macro call_rr, func, arg0, arg1 +| mov rsi, arg1 +| mov rdi, arg0 +| call func +|.endmacro +| +|.macro call_r, func, arg0 +| mov rdi, arg0 +| call func +|.endmacro +| +|.else +| // define the 64bit registers to the 32 bit counterparts, so the common +| // code can use r*x for all pointers +|.define rax, eax +|.define rcx, ecx +|.define rdx, edx +|.define rsp, esp +|.define rbp, ebp +|.define rdi, edi +|.define rsi, esi +|.define mov64, mov +| +|.macro call_rrrr, func, arg0, arg1, arg2, arg3 +| mov dword [rsp+12], arg3 +| mov dword [rsp+8], arg2 +| mov dword [rsp+4], arg1 +| mov dword [rsp], arg0 +| call func +|.endmacro +|.macro call_rrr, func, arg0, arg1, arg2 +| mov dword [rsp+8], arg2 +| mov dword [rsp+4], arg1 +| mov dword [rsp], arg0 +| call func +|.endmacro +|.macro call_rr, func, arg0, arg1 +| mov dword [rsp+4], arg1 +| mov dword [rsp], arg0 +| call func +|.endmacro +|.macro call_r, func, arg0 +| mov dword [rsp], arg0 +| call func +|.endmacro +| +|.define call_rrrp, call_rrrr +|.define call_rrp, call_rrr +|.define call_rp, call_rr +| +|.endif + +#if defined _WIN64 || defined __amd64__ +#define JUMP_SIZE 14 +#else +#define JUMP_SIZE 4 +#endif + +#define MIN_BRANCH INT32_MIN +#define MAX_BRANCH INT32_MAX +#define BRANCH_OFF 4 + +static void compile_extern_jump(struct jit* jit, lua_State* L, cfunction func, uint8_t* code) +{ + /* The jump code is the function pointer followed by a stub to call the + * function pointer. The stub exists in 64 bit so we can jump to functions + * with an offset greater than 2 GB. + * + * Note we have to manually set this up since there are commands buffered + * in the jit state and dynasm doesn't support rip relative addressing. + * + * eg on 64 bit: + * 0-8: function ptr + * 8-14: jmp aword [rip-14] + * + * for 32 bit we only set the function ptr as it can always fit in a 32 + * bit displacement + */ +#if defined _WIN64 || defined __amd64__ + *(cfunction*) code = func; + code[8] = 0xFF; /* FF /4 operand for jmp */ + code[9] = 0x25; /* RIP displacement */ + *(int32_t*) &code[10] = -14; +#else + *(cfunction*) code = func; +#endif +} + +void compile_globals(struct jit* jit, lua_State* L) +{ + struct jit* Dst = jit; + int* perr = &jit->last_errno; + dasm_setup(Dst, build_actionlist); + + /* Note: since the return code uses EBP to reset the stack pointer, we + * don't have to track the amount of stack space used. It also means we + * can handle stdcall and cdecl with the same code. + */ + + /* Note the various call_* functions want 32 bytes of 16 byte aligned + * stack + */ + + |.if X64 + |.define L_ARG, r12 + |.define TOP, r13 + |.else + |.define L_ARG, rdi + |.define TOP, rsi + |.endif + + |.macro epilog + |.if X64 + | mov TOP, [rbp-16] + | mov L_ARG, [rbp-8] + |.else + | mov TOP, [rbp-8] + | mov L_ARG, [rbp-4] + |.endif + | mov rsp, rbp + | pop rbp + | ret + |.endmacro + + |.macro get_errno // note trashes registers + | call extern GetLastError + | mov64 rcx, perr + | mov dword [rcx], eax + |.endmacro + + /* the general idea for the return functions is: + * 1) Save return value on stack + * 2) Call get_errno (this trashes the registers hence #1) + * 3) Unpack return value from stack + * 4) Call lua push function + * 5) Set eax to number of returned args (0 or 1) + * 6) Call return which pops our stack frame + */ + + |->lua_return_arg: + | mov eax, 1 + | epilog + + |->lua_return_void: + | get_errno + | mov eax, 0 + | epilog + + |->lua_return_double: + |.if X64 + | movq qword [rsp+32], xmm0 + |.else + | fstp qword [rsp+4] // note get_errno doesn't require any stack on x86 + |.endif + | + | get_errno + | + |.if X64WIN + | movq xmm1, qword [rsp+32] + | mov rcx, L_ARG + |.elif X64 + | movq xmm0, qword [rsp+32] + | mov rdi, L_ARG + |.else + | mov [rsp], L_ARG + |.endif + | call extern lua_pushnumber + | jmp ->lua_return_arg + + |->lua_return_bool: + | movzx eax, al + | mov [rsp+32], eax + | get_errno + | mov eax, [rsp+32] + | call_rr extern lua_pushboolean, L_ARG, rax + | jmp ->lua_return_arg + + |->lua_return_int: + | mov [rsp+32], eax + | get_errno + | mov eax, [rsp+32] + | call_rr extern push_int, L_ARG, rax + | jmp ->lua_return_arg + + |->lua_return_uint: + | mov [rsp+32], eax + | get_errno + | mov eax, [rsp+32] + | call_rr extern push_uint, L_ARG, rax + | jmp ->lua_return_arg + + |->too_few_arguments: + | mov ax, 0 + | call_rp extern luaL_error, L_ARG, &"too few arguments" + + |->too_many_arguments: + | mov ax, 0 + | call_rp extern luaL_error, L_ARG, &"too many arguments" + + |->save_registers: + | // use rbp relative so we store values in the outer stack frame + |.if X64WIN + | // use the provided shadow space for int registers above prev rbp and + | // return address + | mov [rbp+16], rcx + | mov [rbp+24], rdx + | mov [rbp+32], r8 + | mov [rbp+40], r9 + | // use the extra space we added for float registers + | // -16 to store underneath previous value of L_ARG + | movq qword [rbp-16], xmm0 + | movq qword [rbp-24], xmm1 + | movq qword [rbp-32], xmm2 + | movq qword [rbp-40], xmm3 + |.elif X64 + | movq qword [rbp-16], xmm0 + | movq qword [rbp-24], xmm1 + | movq qword [rbp-32], xmm2 + | movq qword [rbp-40], xmm3 + | movq qword [rbp-48], xmm4 + | movq qword [rbp-56], xmm5 + | movq qword [rbp-64], xmm6 + | movq qword [rbp-72], xmm7 + | mov [rbp-80], rdi + | mov [rbp-88], rsi + | mov [rbp-96], rdx + | mov [rbp-104], rcx + | mov [rbp-112], r8 + | mov [rbp-120], r9 + |.else + | // fastcall, -8 to store underneath previous value of L_ARG + | mov [rbp-8], ecx + | mov [rbp-12], edx + |.endif + | ret + + compile(Dst, L, NULL, LUA_NOREF); +} + +int x86_return_size(lua_State* L, int usr, const struct ctype* ct) +{ + int ret = 0; + const struct ctype* mt; + + if (ct->calling_convention != C_CALL) { + size_t i; + size_t argn = lua_rawlen(L, usr); + for (i = 1; i <= argn; i++) { + lua_rawgeti(L, usr, (int) i); + mt = (const struct ctype*) lua_touserdata(L, -1); + + if (mt->pointers) { + ret += sizeof(void*); + } else { + switch (mt->type) { + case DOUBLE_TYPE: + case COMPLEX_FLOAT_TYPE: + case INT64_TYPE: + ret += 8; + break; + case COMPLEX_DOUBLE_TYPE: + ret += 16; + break; + case INTPTR_TYPE: + ret += sizeof(intptr_t); + break; + case FUNCTION_PTR_TYPE: + ret += sizeof(cfunction); + break; + case BOOL_TYPE: + case FLOAT_TYPE: + case INT8_TYPE: + case INT16_TYPE: + case INT32_TYPE: + case ENUM_TYPE: + ret += 4; + break; + default: + return luaL_error(L, "NYI - argument type"); + } + } + + lua_pop(L, 1); + } + } + +#if !defined _WIN64 && !defined __amd64__ + lua_rawgeti(L, usr, 0); + mt = (const struct ctype*) lua_touserdata(L, -1); + if (!mt->pointers && mt->type == COMPLEX_DOUBLE_TYPE) { + ret += sizeof(void*); + } + lua_pop(L, 1); +#endif + + return ret; +} + +#ifdef _WIN64 +#define MAX_REGISTERS(ct) 4 /* rcx, rdx, r8, r9 */ + +#elif defined __amd64__ +#define MAX_INT_REGISTERS(ct) 6 /* rdi, rsi, rdx, rcx, r8, r9 */ +#define MAX_FLOAT_REGISTERS(ct) 8 /* xmm0-7 */ + +#else +#define MAX_INT_REGISTERS(ct) ((ct)->calling_convention == FAST_CALL ? 2 /* ecx, edx */ : 0) +#define MAX_FLOAT_REGISTERS(ct) 0 +#endif + +struct reg_alloc { +#ifdef _WIN64 + int regs; + int is_float[4]; + int is_int[4]; +#else + int floats; + int ints; +#endif + int off; +}; + +#ifdef _WIN64 +#define REGISTER_STACK_SPACE(ct) (4*8) +#elif defined __amd64__ +#define REGISTER_STACK_SPACE(ct) (14*8) +#else +#define REGISTER_STACK_SPACE(ct) ALIGN_UP(((ct)->calling_convention == FAST_CALL ? 2*4 : 0), 15) +#endif + +/* Fastcall: + * Uses ecx, edx as first two int registers + * Everything else on stack (include 64bit ints) + * No overflow stack space + * Pops the stack before returning + * Returns int in eax, float in ST0 + * We use the same register allocation logic as posix x64 with 2 int regs and 0 float regs + */ + +static void get_int(Dst_DECL, const struct ctype* ct, struct reg_alloc* reg, int is_int64) +{ + /* grab the register from the shadow space */ +#ifdef _WIN64 + if (reg->regs < MAX_REGISTERS(ct)) { + | mov rcx, [rbp + 16 + 8*reg->regs] + reg->regs++; + } +#elif __amd64__ + if (reg->ints < MAX_INT_REGISTERS(ct)) { + | mov rcx, [rbp - 80 - 8*reg->ints] + reg->ints++; + } +#else + if (!is_int64 && reg->ints < MAX_INT_REGISTERS(ct)) { + | mov ecx, [rbp - 8 - 4*reg->ints] + reg->ints++; + } +#endif + else if (is_int64) { + |.if X64 + | mov rcx, [rbp + reg->off] + |.else + | mov rcx, [rbp + reg->off] + | mov rdx, [rbp + reg->off + 4] + |.endif + reg->off += 8; + } else { + | mov ecx, [rbp + reg->off] + reg->off += 4; + } +} + +static void add_int(Dst_DECL, const struct ctype* ct, struct reg_alloc* reg, int is_int64) +{ +#ifdef _WIN64 + if (reg->regs < MAX_REGISTERS(ct)) { + | mov [rsp + 32 + 8*(reg->regs)], rax + reg->is_int[reg->regs++] = 1; + } +#elif __amd64__ + if (reg->ints < MAX_INT_REGISTERS(ct)) { + | mov [rsp + 32 + 8*reg->ints], rax + reg->ints++; + } +#else + if (!is_int64 && reg->ints < MAX_INT_REGISTERS(ct)) { + | mov [rsp + 32 + 4*reg->ints], rax + reg->ints++; + } +#endif + + else if (is_int64) { + |.if X64 + | mov [rsp + reg->off], rax + |.else + | mov [rsp + reg->off], RET_L + | mov [rsp + reg->off + 4], RET_H + |.endif + reg->off += 8; + } else { + | mov [rsp+reg->off], eax + reg->off += 4; + } +} + +static void get_float(Dst_DECL, const struct ctype* ct, struct reg_alloc* reg, int is_double) +{ +#if !defined _WIN64 && !defined __amd64__ + assert(MAX_FLOAT_REGISTERS(ct) == 0); + if (is_double) { + | fld qword [rbp + reg->off] + reg->off += 8; + } else { + | fld dword [rbp + reg->off] + reg->off += 4; + } +#else + int off; + +#ifdef _WIN64 + if (reg->regs < MAX_REGISTERS(ct)) { + off = -16 - 8*reg->regs; + reg->regs++; + } +#else + if (reg->floats < MAX_FLOAT_REGISTERS(ct)) { + off = -16 - 8*reg->floats; + reg->floats++; + } +#endif + else { + off = reg->off; + reg->off += is_double ? 8 : 4; + } + + if (is_double) { + | movq xmm0, qword [rbp + off] + } else { + | cvtss2sd xmm0, dword [rbp + off] + } +#endif +} + +static void add_float(Dst_DECL, const struct ctype* ct, struct reg_alloc* reg, int is_double) +{ +#if !defined _WIN64 && !defined __amd64__ + assert(MAX_FLOAT_REGISTERS(ct) == 0); + if (is_double) { + | fstp qword [rsp + reg->off] + reg->off += 8; + } else { + | fstp dword [rsp + reg->off] + reg->off += 4; + } +#else + +#ifdef _WIN64 + if (reg->regs < MAX_REGISTERS(ct)) { + if (is_double) { + | movq qword [rsp + 32 + 8*(reg->regs)], xmm0 + } else { + | cvtsd2ss xmm0, xmm0 + | movq qword [rsp + 32 + 8*(reg->regs)], xmm0 + } + reg->is_float[reg->regs++] = 1; + } +#else + if (reg->floats < MAX_FLOAT_REGISTERS(ct)) { + if (is_double) { + | movq qword [rsp + 32 + 8*(MAX_INT_REGISTERS(ct) + reg->floats)], xmm0 + } else { + | cvtsd2ss xmm0, xmm0 + | movq qword [rsp + 32 + 8*(MAX_INT_REGISTERS(ct) + reg->floats)], xmm0 + } + reg->floats++; + } +#endif + + else if (is_double) { + | movq qword [rsp + reg->off], xmm0 + reg->off += 8; + } else { + | cvtsd2ss xmm0, xmm0 + | movd dword [rsp + reg->off], xmm0 + reg->off += 4; + } +#endif +} + +#if defined _WIN64 || defined __amd64__ +#define add_pointer(jit, ct, reg) add_int(jit, ct, reg, 1) +#define get_pointer(jit, ct, reg) get_int(jit, ct, reg, 1) +#else +#define add_pointer(jit, ct, reg) add_int(jit, ct, reg, 0) +#define get_pointer(jit, ct, reg) get_int(jit, ct, reg, 0) +#endif + +cfunction compile_callback(lua_State* L, int fidx, int ct_usr, const struct ctype* ct) +{ + int i, nargs; + cfunction* pf; + struct ctype ct2 = *ct; + const struct ctype* mt; + struct reg_alloc reg; + int num_upvals = 0; + int top = lua_gettop(L); + struct jit* Dst = get_jit(L); + int ref; + int hidden_arg_off = 0; + + ct_usr = lua_absindex(L, ct_usr); + fidx = lua_absindex(L, fidx); + + assert(lua_isnil(L, fidx) || lua_isfunction(L, fidx)); + + memset(®, 0, sizeof(reg)); +#ifdef _WIN64 + reg.off = 16 + REGISTER_STACK_SPACE(ct); /* stack registers are above the shadow space */ +#elif __amd64__ + reg.off = 16; +#else + reg.off = 8; +#endif + + dasm_setup(Dst, build_actionlist); + + // add a table to store ctype and function upvalues + // callback_set assumes the first value is the lua function + nargs = (int) lua_rawlen(L, ct_usr); + lua_newtable(L); + lua_pushvalue(L, -1); + ref = luaL_ref(L, LUA_REGISTRYINDEX); + + if (ct->has_var_arg) { + luaL_error(L, "can't create callbacks with varargs"); + } + + // setup a stack frame to hold args for the call into lua_call + + | push rbp + | mov rbp, rsp + | push L_ARG + | // stack is 4 or 8 (mod 16) (L_ARG, rbp, rip) + |.if X64 + | // 8 to realign, 16 for return vars, 32 for local calls, rest to save registers + | sub rsp, 8 + 16 + 32 + REGISTER_STACK_SPACE(ct) + | call ->save_registers + |.else + | // 4 to realign, 16 for return vars, 32 for local calls, rest to save registers + | sub rsp, 4 + 16 + 32 + REGISTER_STACK_SPACE(ct) + if (ct->calling_convention == FAST_CALL) { + | call ->save_registers + } + |.endif + + // hardcode the lua_State* value into the assembly + | mov64 L_ARG, L + + /* get the upval table */ + | call_rrr extern lua_rawgeti, L_ARG, LUA_REGISTRYINDEX, ref + + /* get the lua function */ + lua_pushvalue(L, fidx); + lua_rawseti(L, -2, ++num_upvals); + assert(num_upvals == CALLBACK_FUNC_USR_IDX); + | call_rrr extern lua_rawgeti, L_ARG, -1, num_upvals + +#if !defined _WIN64 && !defined __amd64__ + lua_rawgeti(L, ct_usr, 0); + mt = (const struct ctype*) lua_touserdata(L, -1); + if (!mt->pointers && mt->type == COMPLEX_DOUBLE_TYPE) { + hidden_arg_off = reg.off; + reg.off += sizeof(void*); + } + lua_pop(L, 1); +#else + (void) hidden_arg_off; +#endif + + for (i = 1; i <= nargs; i++) { + lua_rawgeti(L, ct_usr, i); + mt = (const struct ctype*) lua_touserdata(L, -1); + + if (mt->pointers) { + lua_getuservalue(L, -1); + lua_rawseti(L, -3, ++num_upvals); /* usr value */ + lua_rawseti(L, -2, ++num_upvals); /* mt */ + /* on the lua stack in the callback: + * upval tbl, lua func, i-1 args + */ + | call_rrr extern lua_rawgeti, L_ARG, -i-1, num_upvals-1 + | call_rrp extern push_cdata, L_ARG, -1, mt + get_pointer(Dst, ct, ®); + | mov [rax], rcx + | call_rr, extern lua_remove, L_ARG, -2 + } else { + switch (mt->type) { + case INT64_TYPE: + lua_getuservalue(L, -1); + lua_rawseti(L, -3, ++num_upvals); /* mt */ + lua_pop(L, 1); + | call_rrp extern push_cdata, L_ARG, 0, mt + get_int(Dst, ct, ®, 1); + |.if X64 + | mov [rax], rcx + |.else + | mov [rax], ecx + | mov [rax+4], edx + |.endif + break; + + case INTPTR_TYPE: + lua_getuservalue(L, -1); + lua_rawseti(L, -3, ++num_upvals); /* mt */ + lua_pop(L, 1); + | call_rrp extern push_cdata, L_ARG, 0, mt + get_pointer(Dst, ct, ®); + | mov [rax], rcx + break; + + case COMPLEX_FLOAT_TYPE: + lua_pop(L, 1); +#if defined _WIN64 || defined __amd64__ + /* complex floats are two floats packed into a double */ + | call_rrp extern push_cdata, L_ARG, 0, mt + get_float(Dst, ct, ®, 1); + | movq qword [rax], xmm0 +#else + /* complex floats are real followed by imag on the stack */ + | call_rrp extern push_cdata, L_ARG, 0, mt + get_float(Dst, ct, ®, 0); + | fstp dword [rax] + get_float(Dst, ct, ®, 0); + | fstp dword [rax+4] +#endif + break; + + case COMPLEX_DOUBLE_TYPE: + lua_pop(L, 1); + | call_rrp extern push_cdata, L_ARG, 0, mt + /* real */ + get_float(Dst, ct, ®, 1); + |.if X64 + | movq qword [rax], xmm0 + |.else + | fstp qword [rax] + |.endif + /* imag */ + get_float(Dst, ct, ®, 1); + |.if X64 + | movq qword [rax+8], xmm0 + |.else + | fstp qword [rax+8] + |.endif + break; + + case FLOAT_TYPE: + case DOUBLE_TYPE: + lua_pop(L, 1); + get_float(Dst, ct, ®, mt->type == DOUBLE_TYPE); + |.if X64WIN + | movq xmm1, xmm0 + | mov rcx, L_ARG + |.elif X64 + | // for 64bit xmm0 is already set + | mov rdi, L_ARG + |.else + | fstp qword [rsp+4] + | mov [rsp], L_ARG + |.endif + | call extern lua_pushnumber + break; + + case BOOL_TYPE: + lua_pop(L, 1); + get_int(Dst, ct, ®, 0); + | movzx ecx, cl + | call_rr extern lua_pushboolean, L_ARG, rcx + break; + + case INT8_TYPE: + lua_pop(L, 1); + get_int(Dst, ct, ®, 0); + if (mt->is_unsigned) { + | movzx ecx, cl + } else { + | movsx ecx, cl + } + | call_rr extern push_int, L_ARG, rcx + break; + + case INT16_TYPE: + lua_pop(L, 1); + get_int(Dst, ct, ®, 0); + if (mt->is_unsigned) { + | movzx ecx, cx + } else { + | movsx ecx, cx + } + | call_rr extern push_int, L_ARG, rcx + break; + + case ENUM_TYPE: + case INT32_TYPE: + lua_pop(L, 1); + get_int(Dst, ct, ®, 0); + if (mt->is_unsigned) { + | call_rr extern push_uint, L_ARG, rcx + } else { + | call_rr extern push_int, L_ARG, rcx + } + break; + + default: + luaL_error(L, "NYI: callback arg type"); + } + } + } + + lua_rawgeti(L, ct_usr, 0); + mt = (const struct ctype*) lua_touserdata(L, -1); + + | call_rrrp extern lua_callk, L_ARG, nargs, (mt->pointers || mt->type != VOID_TYPE) ? 1 : 0, 0 + + // Unpack the return argument if not "void", also clean-up the lua stack + // to remove the return argument and bind table. Use lua_settop rather + // than lua_pop as lua_pop is implemented as a macro. + if (mt->pointers) { + lua_getuservalue(L, -1); + lua_rawseti(L, -3, ++num_upvals); /* usr value */ + lua_rawseti(L, -2, ++num_upvals); /* mt */ + | call_rrr extern lua_rawgeti, L_ARG, -2, num_upvals-1 + | call_rrrp extern check_typed_pointer, L_ARG, -2, -1, mt + | mov [rsp+32], rax + | call_rr extern lua_settop, L_ARG, -4 + | mov rax, [rsp+32] + + } else { + switch (mt->type) { + case ENUM_TYPE: + lua_getuservalue(L, -1); + lua_rawseti(L, -3, ++num_upvals); /* usr value */ + lua_rawseti(L, -2, ++num_upvals); /* mt */ + | call_rrr extern lua_rawgeti, L_ARG, -2, num_upvals-1 + | call_rrrp, extern check_enum, L_ARG, -2, -1, mt + | mov [rsp+32], eax + | call_rr extern lua_settop, L_ARG, -4 + | mov eax, [rsp+32] + break; + + case VOID_TYPE: + lua_pop(L, 1); + | call_rr extern lua_settop, L_ARG, -2 + break; + + case BOOL_TYPE: + case INT8_TYPE: + case INT16_TYPE: + case INT32_TYPE: + lua_pop(L, 1); + if (mt->is_unsigned) { + | call_rr extern check_uint32, L_ARG, -1 + } else { + | call_rr extern check_int32, L_ARG, -1 + } + | mov [rsp+32], eax + | call_rr extern lua_settop, L_ARG, -3 + | mov eax, [rsp+32] + break; + + case INT64_TYPE: + lua_pop(L, 1); + + if (mt->is_unsigned) { + | call_rr extern check_uint64, L_ARG, -1 + } else { + | call_rr extern check_int64, L_ARG, -1 + } + + |.if X64 + | mov [rsp+32], rax + |.else + | mov [rsp+32], RET_L + | mov [rsp+36], RET_H + |.endif + | call_rr extern lua_settop, L_ARG, -3 + |.if X64 + | mov rax, [rsp+32] + |.else + | mov RET_L, [rsp+32] + | mov RET_H, [rsp+36] + |.endif + break; + + case INTPTR_TYPE: + lua_pop(L, 1); + | call_rr extern check_uintptr, L_ARG, -1 + | mov [rsp+32], rax + | call_rr extern lua_settop, L_ARG, -3 + | mov rax, [rsp+32] + break; + + case FLOAT_TYPE: + case DOUBLE_TYPE: + lua_pop(L, 1); + | call_rr extern check_double, L_ARG, -1 + |.if X64 + | movq qword [rsp+32], xmm0 + | call_rr extern lua_settop, L_ARG, -3 + if (mt->type == FLOAT_TYPE) { + | cvtsd2ss xmm0, qword [rsp+32] + } else { + | movq xmm0, qword [rsp+32] + } + |.else + | fstp qword [rsp+32] + | call_rr extern lua_settop, L_ARG, -3 + | fld qword [rsp+32] + |.endif + break; + + case COMPLEX_FLOAT_TYPE: + lua_pop(L, 1); +#if !defined HAVE_COMPLEX + luaL_error(L, "ffi lib compiled without complex number support"); +#endif + /* on 64 bit complex floats are two floats packed into a double, + * on 32 bit returned complex floats use eax and edx */ + | call_rr extern check_complex_float, L_ARG, -1 + | + |.if X64 + | movq qword [rsp+32], xmm0 + |.else + | mov [rsp+32], eax + | mov [rsp+36], edx + |.endif + | + | call_rr extern lua_settop, L_ARG, -3 + | + |.if X64 + | movq xmm0, qword [rsp+32] + |.else + | mov eax, [rsp+32] + | mov edx, [rsp+36] + |.endif + break; + + case COMPLEX_DOUBLE_TYPE: + lua_pop(L, 1); +#if !defined HAVE_COMPLEX + luaL_error(L, "ffi lib compiled without complex number support"); +#endif + /* on 64 bit, returned complex doubles use xmm0, xmm1, on 32 bit + * there is a hidden first parameter that points to 16 bytes where + * the returned arg is stored which is popped by the called + * function */ +#if defined _WIN64 || defined __amd64__ + | call_rr extern check_complex_double, L_ARG, -1 + | movq qword [rsp+32], xmm0 + | movq qword [rsp+40], xmm1 + | call_rr extern lua_settop, L_ARG, -3 + | movq xmm0, qword [rsp+32] + | movq xmm1, qword [rsp+40] +#else + | mov rcx, [rbp + hidden_arg_off] + | call_rrr extern check_complex_double, rcx, L_ARG, -1 + | sub rsp, 4 // to realign from popped hidden arg + | call_rr extern lua_settop, L_ARG, -3 +#endif + break; + + default: + luaL_error(L, "NYI: callback return type"); + } + } + + |.if X64 + | mov L_ARG, [rbp-8] + |.else + | mov L_ARG, [rbp-4] + |.endif + | mov rsp, rbp + | pop rbp + | ret x86_return_size(L, ct_usr, ct) + + lua_pop(L, 1); /* upval table - already in registry */ + assert(lua_gettop(L) == top); + + ct2.is_jitted = 1; + pf = (cfunction*) push_cdata(L, ct_usr, &ct2); + *pf = compile(Dst, L, NULL, ref); + + assert(lua_gettop(L) == top + 1); + + return *pf; +} + +void compile_function(lua_State* L, cfunction func, int ct_usr, const struct ctype* ct) +{ + size_t i, nargs; + int num_upvals; + const struct ctype* mbr_ct; + struct jit* Dst = get_jit(L); + struct reg_alloc reg; + void* p; + int top = lua_gettop(L); + int* perr = &Dst->last_errno; + + ct_usr = lua_absindex(L, ct_usr); + + memset(®, 0, sizeof(reg)); + reg.off = 32 + REGISTER_STACK_SPACE(ct); + + dasm_setup(Dst, build_actionlist); + + p = push_cdata(L, ct_usr, ct); + *(cfunction*) p = func; + num_upvals = 1; + + nargs = lua_rawlen(L, ct_usr); + + if (ct->calling_convention != C_CALL && ct->has_var_arg) { + luaL_error(L, "vararg is only allowed with the c calling convention"); + } + + | push rbp + | mov rbp, rsp + | push L_ARG + | push TOP + | // stack is 0 (mod 16) (TOP, L_ARG, rbp, rip) + | + | // Get L from our arguments and allocate some stack for lua_gettop + |.if X64WIN + | mov L_ARG, rcx + | sub rsp, 32 // shadow space + |.elif X64 + | mov L_ARG, rdi + |.else + | mov L_ARG, [rbp + 8] + | sub rsp, 16 + |.endif + | + | call_r extern lua_gettop, L_ARG + | mov TOP, rax // no need for movzxd rax, eax - high word guarenteed to be zero by x86-64 + | cmp rax, nargs + | jl ->too_few_arguments + + if (!ct->has_var_arg) { + | jg ->too_many_arguments + } + + /* no need to zero extend eax returned by lua_gettop to rax as x86-64 + * preguarentees that the upper 32 bits will be zero */ + | shl rax, 4 // reserve 16 bytes per argument - this maintains the alignment mod 16 + | sub rsp, rax + | sub rsp, 32 + REGISTER_STACK_SPACE(ct) // reserve an extra 32 to call local functions + +#if !defined _WIN64 && !defined __amd64__ + /* Returned complex doubles require a hidden first parameter where the + * data is stored, which is popped by the calling code. */ + lua_rawgeti(L, ct_usr, 0); + mbr_ct = (const struct ctype*) lua_touserdata(L, -1); + if (!mbr_ct->pointers && mbr_ct->type == COMPLEX_DOUBLE_TYPE) { + /* we can allocate more space for arguments as long as no add_* + * function has been called yet, mbr_ct will be added as an upvalue in + * the return processing later */ + | call_rrp extern push_cdata, L_ARG, 0, mbr_ct + | sub rsp, 16 + add_pointer(Dst, ct, ®); + } + lua_pop(L, 1); +#endif + + for (i = 1; i <= nargs; i++) { + lua_rawgeti(L, ct_usr, (int) i); + mbr_ct = (const struct ctype*) lua_touserdata(L, -1); + + if (mbr_ct->pointers) { + lua_getuservalue(L, -1); + num_upvals += 2; + | call_rrrp extern check_typed_pointer, L_ARG, i, lua_upvalueindex(num_upvals), mbr_ct + add_pointer(Dst, ct, ®); + } else { + switch (mbr_ct->type) { + case FUNCTION_PTR_TYPE: + lua_getuservalue(L, -1); + num_upvals += 2; + | call_rrrp extern check_typed_cfunction, L_ARG, i, lua_upvalueindex(num_upvals), mbr_ct + add_pointer(Dst, ct, ®); + break; + + case ENUM_TYPE: + lua_getuservalue(L, -1); + num_upvals += 2; + | call_rrrp, extern check_enum, L_ARG, i, lua_upvalueindex(num_upvals), mbr_ct + add_int(Dst, ct, ®, 0); + break; + + case INT8_TYPE: + | call_rr extern check_int32, L_ARG, i + if (mbr_ct->is_unsigned) { + | movzx eax, al + } else { + | movsx eax, al + } + add_int(Dst, ct, ®, 0); + lua_pop(L, 1); + break; + + case INT16_TYPE: + | call_rr extern check_int32, L_ARG, i + if (mbr_ct->is_unsigned) { + | movzx eax, ax + } else { + | movsx eax, ax + } + add_int(Dst, ct, ®, 0); + lua_pop(L, 1); + break; + + case BOOL_TYPE: + | call_rr extern check_int32, L_ARG, i + | cmp eax, 0 + | setne al + | movzx eax, al + add_int(Dst, ct, ®, 0); + lua_pop(L, 1); + break; + + case INT32_TYPE: + if (mbr_ct->is_unsigned) { + | call_rr extern check_uint32, L_ARG, i + } else { + | call_rr extern check_int32, L_ARG, i + } + add_int(Dst, ct, ®, 0); + lua_pop(L, 1); + break; + + case INTPTR_TYPE: + | call_rr extern check_uintptr, L_ARG, i + add_pointer(Dst, ct, ®); + lua_pop(L, 1); + break; + + case INT64_TYPE: + if (mbr_ct->is_unsigned) { + | call_rr extern check_uint64, L_ARG, i + } else { + | call_rr extern check_int64, L_ARG, i + } + add_int(Dst, ct, ®, 1); + lua_pop(L, 1); + break; + + case DOUBLE_TYPE: + | call_rr extern check_double, L_ARG, i + add_float(Dst, ct, ®, 1); + lua_pop(L, 1); + break; + + case COMPLEX_DOUBLE_TYPE: + /* on 64 bit, returned complex doubles use xmm0, xmm1, on 32 bit + * there is a hidden first parameter that points to 16 bytes where + * the returned arg is stored (this is popped by the called + * function) */ +#if defined _WIN64 || defined __amd64__ + | call_rr extern check_complex_double, L_ARG, i + add_float(Dst, ct, ®, 1); + | movq xmm0, xmm1 + add_float(Dst, ct, ®, 1); +#else + | lea rax, [rsp+reg.off] + | sub rsp, 4 + | call_rrr extern check_complex_double, rax, L_ARG, i + reg.off += 16; +#endif + lua_pop(L, 1); + break; + + case FLOAT_TYPE: + | call_rr extern check_double, L_ARG, i + add_float(Dst, ct, ®, 0); + lua_pop(L, 1); + break; + + case COMPLEX_FLOAT_TYPE: +#if defined _WIN64 || defined __amd64__ + | call_rr extern check_complex_float, L_ARG, i + /* complex floats are two floats packed into a double */ + add_float(Dst, ct, ®, 1); +#else + /* returned complex floats use eax and edx */ + | call_rr extern check_complex_float, L_ARG, i + | mov [rsp], eax + | fld dword [rsp] + add_float(Dst, ct, ®, 0); + | mov [rsp], edx + | fld dword [rsp] + add_float(Dst, ct, ®, 0); +#endif + lua_pop(L, 1); + break; + + default: + luaL_error(L, "NYI: call arg type"); + } + } + } + + if (ct->has_var_arg) { +#ifdef _WIN64 + |.if X64WIN + if (reg.regs < MAX_REGISTERS(ct)) { + assert(reg.regs == nargs); + | cmp TOP, MAX_REGISTERS(ct) + | jle >1 + | // unpack onto stack + | mov rax, rsp + | add rax, 32 + 8*MAX_REGISTERS(ct) + | call_rrrr extern unpack_varargs_stack, L_ARG, MAX_REGISTERS(ct)+1, TOP, rax + | // unpack to registers + | mov rax, rsp + | add rax, 32 + 8*(reg.regs) + | call_rrrr extern unpack_varargs_reg, L_ARG, nargs+1, MAX_REGISTERS(ct), rax + | jmp >2 + |1: + | // unpack just to registers + | mov rax, rsp + | add rax, 32 + 8*(reg.regs) + | call_rrrr extern unpack_varargs_reg, L_ARG, nargs+1, TOP, rax + |2: + } else { + | // unpack just to stack + | mov rax, rsp + | add rax, reg.off + | call_rrrr extern unpack_varargs_stack, L_ARG, nargs+1, TOP, rax + } + + for (i = nargs; i < MAX_REGISTERS(ct); i++) { + reg.is_int[i] = reg.is_float[i] = 1; + } + reg.regs = MAX_REGISTERS(ct); +#elif defined __amd64__ + |.elif X64 + if (reg.floats < MAX_FLOAT_REGISTERS(ct)) { + | mov rax, rsp + | add rax, 32 + 8*(MAX_INT_REGISTERS(ct) + reg.floats) + | call_rrrrr extern unpack_varargs_float, L_ARG, nargs+1, TOP, MAX_FLOAT_REGISTERS(ct) - reg.floats, rax + } + + if (reg.ints < MAX_INT_REGISTERS(ct)) { + | mov rax, rsp + | add rax, 32 + 8*(reg.ints) + | call_rrrrr extern unpack_varargs_int, L_ARG, nargs+1, TOP, MAX_INT_REGISTERS(ct) - reg.ints, rax + } + + | mov rax, rsp + | add rax, reg.off + | call_rrrrrr extern unpack_varargs_stack_skip, L_ARG, nargs+1, TOP, MAX_INT_REGISTERS(ct) - reg.ints, MAX_FLOAT_REGISTERS(ct) - reg.floats, rax + + reg.floats = MAX_FLOAT_REGISTERS(ct); + reg.ints = MAX_INT_REGISTERS(ct); +#else + |.else + | mov rax, rsp + | add rax, reg.off + | call_rrrr extern unpack_varargs_stack, L_ARG, nargs+1, TOP, rax + |.endif +#endif + } + + | mov64 rcx, perr + | mov eax, dword [rcx] + | call_r extern SetLastError, rax + + /* remove the stack space to call local functions */ + |.if X32WIN + | add rsp, 28 // SetLastError will have already popped 4 + |.else + | add rsp, 32 + |.endif + +#ifdef _WIN64 + |.if X64WIN + switch (reg.regs) { + case 4: + if (reg.is_float[3]) { + | movq xmm3, qword [rsp + 8*3] + } + if (reg.is_int[3]) { + | mov r9, [rsp + 8*3] + } + case 3: + if (reg.is_float[2]) { + | movq xmm2, qword [rsp + 8*2] + } + if (reg.is_int[2]) { + | mov r8, [rsp + 8*2] + } + case 2: + if (reg.is_float[1]) { + | movq xmm1, qword [rsp + 8*1] + } + if (reg.is_int[1]) { + | mov rdx, [rsp + 8*1] + } + case 1: + if (reg.is_float[0]) { + | movq xmm0, qword [rsp] + } + if (reg.is_int[0]) { + | mov rcx, [rsp] + } + case 0: + break; + } + + /* don't remove the space for the registers as we need 32 bytes of register overflow space */ + assert(REGISTER_STACK_SPACE(ct) == 32); + +#elif defined __amd64__ + |.elif X64 + switch (reg.floats) { + case 8: + | movq xmm7, qword [rsp + 8*(MAX_INT_REGISTERS(ct)+7)] + case 7: + | movq xmm6, qword [rsp + 8*(MAX_INT_REGISTERS(ct)+6)] + case 6: + | movq xmm5, qword [rsp + 8*(MAX_INT_REGISTERS(ct)+5)] + case 5: + | movq xmm4, qword [rsp + 8*(MAX_INT_REGISTERS(ct)+4)] + case 4: + | movq xmm3, qword [rsp + 8*(MAX_INT_REGISTERS(ct)+3)] + case 3: + | movq xmm2, qword [rsp + 8*(MAX_INT_REGISTERS(ct)+2)] + case 2: + | movq xmm1, qword [rsp + 8*(MAX_INT_REGISTERS(ct)+1)] + case 1: + | movq xmm0, qword [rsp + 8*(MAX_INT_REGISTERS(ct))] + case 0: + break; + } + + switch (reg.ints) { + case 6: + | mov r9, [rsp + 8*5] + case 5: + | mov r8, [rsp + 8*4] + case 4: + | mov rcx, [rsp + 8*3] + case 3: + | mov rdx, [rsp + 8*2] + case 2: + | mov rsi, [rsp + 8*1] + case 1: + | mov rdi, [rsp] + case 0: + break; + } + + | add rsp, REGISTER_STACK_SPACE(ct) +#else + |.else + if (ct->calling_convention == FAST_CALL) { + switch (reg.ints) { + case 2: + | mov edx, [rsp + 4] + case 1: + | mov ecx, [rsp] + case 0: + break; + } + + | add rsp, REGISTER_STACK_SPACE(ct) + } + |.endif +#endif + +#ifdef __amd64__ + if (ct->has_var_arg) { + /* al stores an upper limit on the number of float register, note that + * its allowed to be more than the actual number of float registers used as + * long as its 0-8 */ + |.if X64 and not X64WIN + | mov al, 8 + |.endif + } +#endif + + | call extern FUNCTION + | sub rsp, 48 // 32 to be able to call local functions, 16 so we can store some local variables + + /* note on windows X86 the stack may be only aligned to 4 (stdcall will + * have popped a multiple of 4 bytes), but we don't need 16 byte alignment on + * that platform + */ + + lua_rawgeti(L, ct_usr, 0); + mbr_ct = (const struct ctype*) lua_touserdata(L, -1); + + if (mbr_ct->pointers || mbr_ct->type == INTPTR_TYPE) { + lua_getuservalue(L, -1); + num_upvals += 2; + | mov [rsp+32], rax // save the pointer + | get_errno + | call_rrp extern push_cdata, L_ARG, lua_upvalueindex(num_upvals), mbr_ct + | mov rcx, [rsp+32] + | mov [rax], rcx // *(void**) cdata = val + | jmp ->lua_return_arg + + } else { + switch (mbr_ct->type) { + case FUNCTION_PTR_TYPE: + lua_getuservalue(L, -1); + num_upvals += 2; + | mov [rsp+32], rax // save the function pointer + | get_errno + | call_rrp extern push_cdata, L_ARG, lua_upvalueindex(num_upvals), mbr_ct + | mov rcx, [rsp+32] + | mov [rax], rcx // *(cfunction**) cdata = val + | jmp ->lua_return_arg + break; + + case INT64_TYPE: + num_upvals++; + | // save the return value + |.if X64 + | mov [rsp+32], rax + |.else + | mov [rsp+36], edx // high + | mov [rsp+32], eax // low + |.endif + | + | get_errno + | call_rrp extern push_cdata, L_ARG, 0, mbr_ct + | + | // *(int64_t*) cdata = val + |.if X64 + | mov rcx, [rsp+32] + | mov [rax], rcx + |.else + | mov rcx, [rsp+36] + | mov rdx, [rsp+32] + | mov [rax+4], rcx + | mov [rax], rdx + |.endif + | + | jmp ->lua_return_arg + break; + + case COMPLEX_FLOAT_TYPE: + num_upvals++; + |.if X64 + | // complex floats are returned as two floats packed into xmm0 + | movq qword [rsp+32], xmm0 + |.else + | // complex floats are returned as floats in eax and edx + | mov [rsp+32], eax + | mov [rsp+36], edx + |.endif + | + | get_errno + | call_rrp extern push_cdata, L_ARG, 0, mbr_ct + | + | // ((complex_float*) cdata) = val + |.if X64 + | mov rcx, [rsp+32] + | mov [rax], rcx + |.else + | mov ecx, [rsp+32] + | mov [rax], ecx + | mov ecx, [rsp+36] + | mov [rax+4], ecx + |.endif + | + | jmp ->lua_return_arg + break; + + case COMPLEX_DOUBLE_TYPE: + num_upvals++; + |.if X64 + | // complex doubles are returned as xmm0 and xmm1 + | movq qword [rsp+40], xmm1 + | movq qword [rsp+32], xmm0 + | + | get_errno + | call_rrp extern push_cdata, L_ARG, 0, mbr_ct + | + | // ((complex_double*) cdata)->real = val0 + | // ((complex_double*) cdata)->imag = val1 + | mov rcx, [rsp+40] + | mov [rax+8], rcx + | mov rcx, [rsp+32] + | mov [rax], rcx + | + |.else + | // On 32 bit we have already handled this by pushing a new cdata + | // and handing the cdata ptr in as the hidden first param, but + | // still need to add mbr_ct as an upval as its used earlier. + | // Hidden param was popped by called function, we need to realign. + | sub rsp, 4 + | get_errno + |.endif + | + | jmp ->lua_return_arg + break; + + case VOID_TYPE: + lua_pop(L, 1); + | jmp ->lua_return_void + break; + + case BOOL_TYPE: + lua_pop(L, 1); + | jmp ->lua_return_bool + break; + + case INT8_TYPE: + lua_pop(L, 1); + if (mbr_ct->is_unsigned) { + | movzx eax, al + } else { + | movsx eax, al + } + | jmp ->lua_return_int + break; + + case INT16_TYPE: + lua_pop(L, 1); + if (mbr_ct->is_unsigned) { + | movzx eax, ax + } else { + | movsx eax, ax + } + | jmp ->lua_return_int + break; + + case INT32_TYPE: + case ENUM_TYPE: + lua_pop(L, 1); + if (mbr_ct->is_unsigned) { + | jmp ->lua_return_uint + } else { + | jmp ->lua_return_int + } + break; + + case FLOAT_TYPE: + lua_pop(L, 1); + |.if X64 + | cvtss2sd xmm0, xmm0 + |.endif + | jmp ->lua_return_double + break; + + case DOUBLE_TYPE: + lua_pop(L, 1); + | jmp ->lua_return_double + break; + + default: + luaL_error(L, "NYI: call return type"); + } + } + + assert(lua_gettop(L) == top + num_upvals); + { + cfunction f = compile(Dst, L, func, LUA_NOREF); + /* add a callback as an upval so that the jitted code gets cleaned up when + * the function gets gc'd */ + push_callback(L, f); + lua_pushcclosure(L, (lua_CFunction) f, num_upvals+1); + } +} + diff --git a/tools/luaffi/ctype.c b/tools/luaffi/ctype.c new file mode 100644 index 0000000..3b72ca6 --- /dev/null +++ b/tools/luaffi/ctype.c @@ -0,0 +1,268 @@ +/* vim: ts=4 sw=4 sts=4 et tw=78 + * Copyright (c) 2011 James R. McKaskill. See license in ffi.h + */ +#include "ffi.h" + +static int to_define_key; + +static void update_on_definition(lua_State* L, int ct_usr, int ct_idx) +{ + ct_usr = lua_absindex(L, ct_usr); + ct_idx = lua_absindex(L, ct_idx); + + lua_pushlightuserdata(L, &to_define_key); + lua_rawget(L, ct_usr); + + if (lua_isnil(L, -1)) { + lua_pop(L, 1); /* pop the nil */ + + /* {} */ + lua_newtable(L); + + /* {__mode='k'} */ + lua_newtable(L); + lua_pushliteral(L, "k"); + lua_setfield(L, -2, "__mode"); + + /* setmetatable({}, {__mode='k'}) */ + lua_setmetatable(L, -2); + + /* usr[TO_UPDATE_KEY] = setmetatable({}, {__mode='k'}) */ + lua_pushlightuserdata(L, &to_define_key); + lua_pushvalue(L, -2); + lua_rawset(L, ct_usr); + + /* leave the table on the stack */ + } + + /* to_update[ctype or cdata] = true */ + lua_pushvalue(L, ct_idx); + lua_pushboolean(L, 1); + lua_rawset(L, -3); + + /* pop the to_update table */ + lua_pop(L, 1); +} + +void set_defined(lua_State* L, int ct_usr, struct ctype* ct) +{ + ct_usr = lua_absindex(L, ct_usr); + + ct->is_defined = 1; + + /* update ctypes and cdatas that were created before the definition came in */ + lua_pushlightuserdata(L, &to_define_key); + lua_rawget(L, ct_usr); + + if (!lua_isnil(L, -1)) { + lua_pushnil(L); + + while (lua_next(L, -2)) { + struct ctype* upd = (struct ctype*) lua_touserdata(L, -2); + upd->base_size = ct->base_size; + upd->align_mask = ct->align_mask; + upd->is_defined = 1; + upd->is_variable_struct = ct->is_variable_struct; + upd->variable_increment = ct->variable_increment; + assert(!upd->variable_size_known); + lua_pop(L, 1); + } + + lua_pop(L, 1); + /* usr[TO_UPDATE_KEY] = nil */ + lua_pushlightuserdata(L, &to_define_key); + lua_pushnil(L); + lua_rawset(L, ct_usr); + } else { + lua_pop(L, 1); + } +} + +struct ctype* push_ctype(lua_State* L, int ct_usr, const struct ctype* ct) +{ + struct ctype* ret; + ct_usr = lua_absindex(L, ct_usr); + + ret = (struct ctype*) lua_newuserdata(L, sizeof(struct ctype)); + *ret = *ct; + + push_upval(L, &ctype_mt_key); + lua_setmetatable(L, -2); + +#if LUA_VERSION_NUM == 501 + if (!ct_usr || lua_isnil(L, ct_usr)) { + push_upval(L, &niluv_key); + lua_setfenv(L, -2); + } +#endif + + if (ct_usr && !lua_isnil(L, ct_usr)) { + lua_pushvalue(L, ct_usr); + lua_setuservalue(L, -2); + } + + if (!ct->is_defined && ct_usr && !lua_isnil(L, ct_usr)) { + update_on_definition(L, ct_usr, -1); + } + + return ret; +} + +size_t ctype_size(lua_State* L, const struct ctype* ct) +{ + if (ct->pointers - ct->is_array) { + return sizeof(void*) * (ct->is_array ? ct->array_size : 1); + + } else if (!ct->is_defined || ct->type == VOID_TYPE) { + return luaL_error(L, "can't calculate size of an undefined type"); + + } else if (ct->variable_size_known) { + assert(ct->is_variable_struct && !ct->is_array); + return ct->base_size + ct->variable_increment; + + } else if (ct->is_variable_array || ct->is_variable_struct) { + return luaL_error(L, "internal error: calc size of variable type with unknown size"); + + } else { + return ct->base_size * (ct->is_array ? ct->array_size : 1); + } +} + +void* push_cdata(lua_State* L, int ct_usr, const struct ctype* ct) +{ + struct cdata* cd; + size_t sz = ct->is_reference ? sizeof(void*) : ctype_size(L, ct); + ct_usr = lua_absindex(L, ct_usr); + + /* This is to stop valgrind from complaining. Bitfields are accessed in 8 + * byte chunks so that the code doesn't have to deal with different access + * patterns, but this means that occasionally it will read past the end of + * the struct. As its not setting the bits past the end (only reading and + * then writing the bits back) and the read is aligned its a non-issue, + * but valgrind complains nonetheless. + */ + if (ct->has_bitfield) { + sz = ALIGN_UP(sz, 7); + } + + cd = (struct cdata*) lua_newuserdata(L, sizeof(struct cdata) + sz); + *(struct ctype*) &cd->type = *ct; + memset(cd+1, 0, sz); + + /* TODO: handle cases where lua_newuserdata returns a pointer that is not + * aligned */ +#if 0 + assert((uintptr_t) (cd + 1) % 8 == 0); +#endif + +#if LUA_VERSION_NUM == 501 + if (!ct_usr || lua_isnil(L, ct_usr)) { + push_upval(L, &niluv_key); + lua_setfenv(L, -2); + } +#endif + + if (ct_usr && !lua_isnil(L, ct_usr)) { + lua_pushvalue(L, ct_usr); + lua_setuservalue(L, -2); + } + + push_upval(L, &cdata_mt_key); + lua_setmetatable(L, -2); + + if (!ct->is_defined && ct_usr && !lua_isnil(L, ct_usr)) { + update_on_definition(L, ct_usr, -1); + } + + return cd+1; +} + +void push_callback(lua_State* L, cfunction f) +{ + cfunction* pf = (cfunction*) lua_newuserdata(L, sizeof(cfunction)); + *pf = f; + + push_upval(L, &callback_mt_key); + lua_setmetatable(L, -2); +} + +/* returns the value as a ctype, pushes the user value onto the stack */ +void check_ctype(lua_State* L, int idx, struct ctype* ct) +{ + if (lua_isstring(L, idx)) { + struct parser P; + P.line = 1; + P.prev = P.next = lua_tostring(L, idx); + P.align_mask = DEFAULT_ALIGN_MASK; + parse_type(L, &P, ct); + parse_argument(L, &P, -1, ct, NULL, NULL); + lua_remove(L, -2); /* remove the user value from parse_type */ + + } else if (lua_getmetatable(L, idx)) { + if (!equals_upval(L, -1, &ctype_mt_key) + && !equals_upval(L, -1, &cdata_mt_key)) { + goto err; + } + + lua_pop(L, 1); /* pop the metatable */ + *ct = *(struct ctype*) lua_touserdata(L, idx); + lua_getuservalue(L, idx); + + } else { + goto err; + } + + return; + +err: + luaL_error(L, "expected cdata, ctype or string for arg #%d", idx); +} + +/* to_cdata returns the struct cdata* and pushes the user value onto the + * stack. If the index is not a ctype then ct is not touched, a nil is pushed, + * NULL is returned, and ct->type is set to INVALID_TYPE. Also dereferences + * references */ +void* to_cdata(lua_State* L, int idx, struct ctype* ct) +{ + struct cdata* cd; + + ct->type = INVALID_TYPE; + if (!lua_isuserdata(L, idx) || !lua_getmetatable(L, idx)) { + lua_pushnil(L); + return NULL; + } + + if (!equals_upval(L, -1, &cdata_mt_key)) { + lua_pop(L, 1); /* mt */ + lua_pushnil(L); + return NULL; + } + + lua_pop(L, 1); /* mt */ + cd = (struct cdata*) lua_touserdata(L, idx); + *ct = cd->type; + lua_getuservalue(L, idx); + + if (ct->is_reference) { + ct->is_reference = 0; + return *(void**) (cd+1); + + } else if (ct->pointers && !ct->is_array) { + return *(void**) (cd+1); + + } else { + return cd + 1; + } +} + +/* check_cdata returns the struct cdata* and pushes the user value onto the + * stack. Also dereferences references. */ +void* check_cdata(lua_State* L, int idx, struct ctype* ct) +{ + void* p = to_cdata(L, idx, ct); + if (ct->type == INVALID_TYPE) { + luaL_error(L, "expected cdata for arg #%d", idx); + } + return p; +} + diff --git a/tools/luaffi/dynasm/dasm_arm.h b/tools/luaffi/dynasm/dasm_arm.h new file mode 100644 index 0000000..71a4979 --- /dev/null +++ b/tools/luaffi/dynasm/dasm_arm.h @@ -0,0 +1,455 @@ +/* +** DynASM ARM encoding engine. +** Copyright (C) 2005-2011 Mike Pall. All rights reserved. +** Released under the MIT/X license. See dynasm.lua for full copyright notice. +*/ + +#include +#include +#include +#include + +#define DASM_ARCH "arm" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, DASM_LABEL_PC, + DASM_LONG, DASM_IMM, DASM_IMM12, DASM_IMM16, DASM_IMML8, DASM_IMML12, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned int *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +static int dasm_imm12(unsigned int n) +{ + int i; + for (i = 0; i < 16; i++, n = (n << 2) | (n >> 30)) + if (n <= 255) return (int)(n + (i << 8)); + return -1; +} + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + if (action >= DASM__MAX) { + ofs += 4; + } else { + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: goto stop; + case DASM_SECTION: + n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; goto stop; + case DASM_ESC: p++; ofs += 4; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; + case DASM_REL_LG: + n = (ins & 2047) - 10; pl = D->lglabels + n; + if (n >= 0) { CKPL(lg, LG); goto putrel; } /* Bkwd rel or global. */ + pl += 10; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + break; + case DASM_LABEL_LG: + pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; + } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_LONG: + ofs += 4; + b[pos++] = n; + break; + case DASM_IMM: + case DASM_IMM16: +#ifdef DASM_CHECKS + CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); + if ((ins & 0x8000)) + CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); + else + CK((n>>((ins>>5)&31)) == 0, RANGE_I); +#endif + b[pos++] = n; + break; + case DASM_IMML8: + case DASM_IMML12: + CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) : + (((-n)>>((ins>>5)&31)) == 0), RANGE_I); + b[pos++] = n; + break; + case DASM_IMM12: + CK(dasm_imm12((unsigned int)n) != -1, RANGE_I); + b[pos++] = n; + break; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: p++; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; + case DASM_REL_LG: case DASM_REL_PC: pos++; break; + case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; + case DASM_LONG: case DASM_IMM: case DASM_IMM12: case DASM_IMM16: + case DASM_IMML8: case DASM_IMML12: pos++; break; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned int *cp = (unsigned int *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: *cp++ = *p++; break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048)); + goto patchrel; + case DASM_ALIGN: + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000; + break; + case DASM_REL_LG: + CK(n >= 0, UNDEF_LG); + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) - 4; + patchrel: + if ((ins & 0x800) == 0) { + CK((n & 3) == 0 && ((n+0x02000000) >> 26) == 0, RANGE_REL); + cp[-1] |= ((n >> 2) & 0x00ffffff); + } else if ((ins & 0x1000)) { + CK((n & 3) == 0 && -256 <= n && n <= 256, RANGE_REL); + goto patchimml8; + } else { + CK((n & 3) == 0 && -4096 <= n && n <= 4096, RANGE_REL); + goto patchimml12; + } + break; + case DASM_LABEL_LG: + ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); + break; + case DASM_LABEL_PC: break; + case DASM_LONG: + *cp++ = n; + break; + case DASM_IMM: + cp[-1] |= ((n>>((ins>>10)&31)) & ((1<<((ins>>5)&31))-1)) << (ins&31); + break; + case DASM_IMM12: + cp[-1] |= dasm_imm12((unsigned int)n); + break; + case DASM_IMM16: + cp[-1] |= ((n & 0xf000) << 4) | (n & 0x0fff); + break; + case DASM_IMML8: patchimml8: + cp[-1] |= n >= 0 ? (0x00800000 | (n & 0x0f) | ((n & 0xf0) << 4)) : + ((-n & 0x0f) | ((-n & 0xf0) << 4)); + break; + case DASM_IMML12: patchimml12: + cp[-1] |= n >= 0 ? (0x00800000 | n) : (-n); + break; + default: *cp++ = ins; break; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(D->section-D->sections); + return D->status; +} +#endif + diff --git a/tools/luaffi/dynasm/dasm_arm.lua b/tools/luaffi/dynasm/dasm_arm.lua new file mode 100644 index 0000000..a54230d --- /dev/null +++ b/tools/luaffi/dynasm/dasm_arm.lua @@ -0,0 +1,952 @@ +------------------------------------------------------------------------------ +-- DynASM ARM module. +-- +-- Copyright (C) 2005-2011 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +-- Module information: +local _info = { + arch = "arm", + description = "DynASM ARM module", + version = "1.3.0", + vernum = 10300, + release = "2011-05-05", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable, rawget = assert, setmetatable, rawget +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub +local concat, sort, insert = table.concat, table.sort, table.insert + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "LONG", "IMM", "IMM12", "IMM16", "IMML8", "IMML12", +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +for n,name in ipairs(action_names) do + map_action[name] = n-1 +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Return 8 digit hex number. +local function tohex(x) + return sub(format("%08x", x), -8) -- Avoid 64 bit portability problem in Lua. +end + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned int ", name, "[", nn, "] = {\n") + for i = 1,nn-1 do + assert(out:write("0x", tohex(actlist[i]), ",\n")) + end + assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add word to action list. +local function wputxw(n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxw(w * 0x10000 + (val or 0)) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped word. +local function wputw(n) + if n <= 0x000fffff then waction("ESC") end + wputxw(n) +end + +-- Reserve position for word. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +-- Store word to reserved position. +local function wputpos(pos, n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + if n <= 0x000fffff then + insert(actlist, pos+1, n) + n = map_action.ESC * 0x10000 + end + actlist[pos] = n +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20,next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0,next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0,next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. + +-- Ext. register name -> int. name. +local map_archdef = { sp = "r13", lr = "r14", pc = "r15", } + +-- Int. register name -> ext. name. +local map_reg_rev = { r13 = "sp", r14 = "lr", r15 = "pc", } + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + return map_reg_rev[s] or s +end + +local map_shift = { lsl = 0, lsr = 1, asr = 2, ror = 3, } + +local map_cond = { + eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7, + hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14, + hs = 2, lo = 3, +} + +------------------------------------------------------------------------------ + +-- Template strings for ARM instructions. +local map_op = { + -- Basic data processing instructions. + and_3 = "e0000000DNPs", + eor_3 = "e0200000DNPs", + sub_3 = "e0400000DNPs", + rsb_3 = "e0600000DNPs", + add_3 = "e0800000DNPs", + adc_3 = "e0a00000DNPs", + sbc_3 = "e0c00000DNPs", + rsc_3 = "e0e00000DNPs", + tst_2 = "e1100000NP", + teq_2 = "e1300000NP", + cmp_2 = "e1500000NP", + cmn_2 = "e1700000NP", + orr_3 = "e1800000DNPs", + mov_2 = "e1a00000DPs", + bic_3 = "e1c00000DNPs", + mvn_2 = "e1e00000DPs", + + and_4 = "e0000000DNMps", + eor_4 = "e0200000DNMps", + sub_4 = "e0400000DNMps", + rsb_4 = "e0600000DNMps", + add_4 = "e0800000DNMps", + adc_4 = "e0a00000DNMps", + sbc_4 = "e0c00000DNMps", + rsc_4 = "e0e00000DNMps", + tst_3 = "e1100000NMp", + teq_3 = "e1300000NMp", + cmp_3 = "e1500000NMp", + cmn_3 = "e1700000NMp", + orr_4 = "e1800000DNMps", + mov_3 = "e1a00000DMps", + bic_4 = "e1c00000DNMps", + mvn_3 = "e1e00000DMps", + + lsl_3 = "e1a00000DMws", + lsr_3 = "e1a00020DMws", + asr_3 = "e1a00040DMws", + ror_3 = "e1a00060DMws", + rrx_2 = "e1a00060DMs", + + -- Multiply and multiply-accumulate. + mul_3 = "e0000090NMSs", + mla_4 = "e0200090NMSDs", + umaal_4 = "e0400090DNMSs", -- v6 + mls_4 = "e0600090DNMSs", -- v6T2 + umull_4 = "e0800090DNMSs", + umlal_4 = "e0a00090DNMSs", + smull_4 = "e0c00090DNMSs", + smlal_4 = "e0e00090DNMSs", + + -- Halfword multiply and multiply-accumulate. + smlabb_4 = "e1000080NMSD", -- v5TE + smlatb_4 = "e10000a0NMSD", -- v5TE + smlabt_4 = "e10000c0NMSD", -- v5TE + smlatt_4 = "e10000e0NMSD", -- v5TE + smlawb_4 = "e1200080NMSD", -- v5TE + smulwb_3 = "e12000a0NMS", -- v5TE + smlawt_4 = "e12000c0NMSD", -- v5TE + smulwt_3 = "e12000e0NMS", -- v5TE + smlalbb_4 = "e1400080NMSD", -- v5TE + smlaltb_4 = "e14000a0NMSD", -- v5TE + smlalbt_4 = "e14000c0NMSD", -- v5TE + smlaltt_4 = "e14000e0NMSD", -- v5TE + smulbb_3 = "e1600080NMS", -- v5TE + smultb_3 = "e16000a0NMS", -- v5TE + smulbt_3 = "e16000c0NMS", -- v5TE + smultt_3 = "e16000e0NMS", -- v5TE + + -- Miscellaneous data processing instructions. + clz_2 = "e16f0f10DM", -- v5T + rev_2 = "e6bf0f30DM", -- v6 + rev16_2 = "e6bf0fb0DM", -- v6 + revsh_2 = "e6ff0fb0DM", -- v6 + sel_3 = "e6800fb0DNM", -- v6 + usad8_3 = "e780f010NMS", -- v6 + usada8_4 = "e7800010NMSD", -- v6 + rbit_2 = "e6ff0f30DM", -- v6T2 + movw_2 = "e3000000DW", -- v6T2 + movt_2 = "e3400000DW", -- v6T2 + -- Note: the X encodes width-1, not width. + sbfx_4 = "e7a00050DMvX", -- v6T2 + ubfx_4 = "e7e00050DMvX", -- v6T2 + -- Note: the X encodes the msb field, not the width. + bfc_3 = "e7c0001fDvX", -- v6T2 + bfi_4 = "e7c00010DMvX", -- v6T2 + + -- Packing and unpacking instructions. + pkhbt_3 = "e6800010DNM", pkhbt_4 = "e6800010DNMv", -- v6 + pkhtb_3 = "e6800050DNM", pkhtb_4 = "e6800050DNMv", -- v6 + sxtab_3 = "e6a00070DNM", sxtab_4 = "e6a00070DNMv", -- v6 + sxtab16_3 = "e6800070DNM", sxtab16_4 = "e6800070DNMv", -- v6 + sxtah_3 = "e6b00070DNM", sxtah_4 = "e6b00070DNMv", -- v6 + sxtb_2 = "e6af0070DM", sxtb_3 = "e6af0070DMv", -- v6 + sxtb16_2 = "e68f0070DM", sxtb16_3 = "e68f0070DMv", -- v6 + sxth_2 = "e6bf0070DM", sxth_3 = "e6bf0070DMv", -- v6 + uxtab_3 = "e6e00070DNM", uxtab_4 = "e6e00070DNMv", -- v6 + uxtab16_3 = "e6c00070DNM", uxtab16_4 = "e6c00070DNMv", -- v6 + uxtah_3 = "e6f00070DNM", uxtah_4 = "e6f00070DNMv", -- v6 + uxtb_2 = "e6ef0070DM", uxtb_3 = "e6ef0070DMv", -- v6 + uxtb16_2 = "e6cf0070DM", uxtb16_3 = "e6cf0070DMv", -- v6 + uxth_2 = "e6ff0070DM", uxth_3 = "e6ff0070DMv", -- v6 + + -- Saturating instructions. + qadd_3 = "e1000050DMN", -- v5TE + qsub_3 = "e1200050DMN", -- v5TE + qdadd_3 = "e1400050DMN", -- v5TE + qdsub_3 = "e1600050DMN", -- v5TE + -- Note: the X for ssat* encodes sat_imm-1, not sat_imm. + ssat_3 = "e6a00010DXM", ssat_4 = "e6a00010DXMp", -- v6 + usat_3 = "e6e00010DXM", usat_4 = "e6e00010DXMp", -- v6 + ssat16_3 = "e6a00f30DXM", -- v6 + usat16_3 = "e6e00f30DXM", -- v6 + + -- Parallel addition and subtraction. + sadd16_3 = "e6100f10DNM", -- v6 + sasx_3 = "e6100f30DNM", -- v6 + ssax_3 = "e6100f50DNM", -- v6 + ssub16_3 = "e6100f70DNM", -- v6 + sadd8_3 = "e6100f90DNM", -- v6 + ssub8_3 = "e6100ff0DNM", -- v6 + qadd16_3 = "e6200f10DNM", -- v6 + qasx_3 = "e6200f30DNM", -- v6 + qsax_3 = "e6200f50DNM", -- v6 + qsub16_3 = "e6200f70DNM", -- v6 + qadd8_3 = "e6200f90DNM", -- v6 + qsub8_3 = "e6200ff0DNM", -- v6 + shadd16_3 = "e6300f10DNM", -- v6 + shasx_3 = "e6300f30DNM", -- v6 + shsax_3 = "e6300f50DNM", -- v6 + shsub16_3 = "e6300f70DNM", -- v6 + shadd8_3 = "e6300f90DNM", -- v6 + shsub8_3 = "e6300ff0DNM", -- v6 + uadd16_3 = "e6500f10DNM", -- v6 + uasx_3 = "e6500f30DNM", -- v6 + usax_3 = "e6500f50DNM", -- v6 + usub16_3 = "e6500f70DNM", -- v6 + uadd8_3 = "e6500f90DNM", -- v6 + usub8_3 = "e6500ff0DNM", -- v6 + uqadd16_3 = "e6600f10DNM", -- v6 + uqasx_3 = "e6600f30DNM", -- v6 + uqsax_3 = "e6600f50DNM", -- v6 + uqsub16_3 = "e6600f70DNM", -- v6 + uqadd8_3 = "e6600f90DNM", -- v6 + uqsub8_3 = "e6600ff0DNM", -- v6 + uhadd16_3 = "e6700f10DNM", -- v6 + uhasx_3 = "e6700f30DNM", -- v6 + uhsax_3 = "e6700f50DNM", -- v6 + uhsub16_3 = "e6700f70DNM", -- v6 + uhadd8_3 = "e6700f90DNM", -- v6 + uhsub8_3 = "e6700ff0DNM", -- v6 + + -- Load/store instructions. + str_2 = "e4000000DL", str_3 = "e4000000DL", str_4 = "e4000000DL", + strb_2 = "e4400000DL", strb_3 = "e4400000DL", strb_4 = "e4400000DL", + ldr_2 = "e4100000DL", ldr_3 = "e4100000DL", ldr_4 = "e4100000DL", + ldrb_2 = "e4500000DL", ldrb_3 = "e4500000DL", ldrb_4 = "e4500000DL", + strh_2 = "e00000b0DL", strh_3 = "e00000b0DL", + ldrh_2 = "e01000b0DL", ldrh_3 = "e01000b0DL", + ldrd_2 = "e00000d0DL", ldrd_3 = "e00000d0DL", -- v5TE + ldrsb_2 = "e01000d0DL", ldrsb_3 = "e01000d0DL", + strd_2 = "e00000f0DL", strd_3 = "e00000f0DL", -- v5TE + ldrsh_2 = "e01000f0DL", ldrsh_3 = "e01000f0DL", + + ldm_2 = "e8900000nR", ldmia_2 = "e8900000nR", ldmfd_2 = "e8900000nR", + ldmda_2 = "e8100000nR", ldmfa_2 = "e8100000nR", + ldmdb_2 = "e9100000nR", ldmea_2 = "e9100000nR", + ldmib_2 = "e9900000nR", ldmed_2 = "e9900000nR", + stm_2 = "e8800000nR", stmia_2 = "e8800000nR", stmfd_2 = "e8800000nR", + stmda_2 = "e8000000nR", stmfa_2 = "e8000000nR", + stmdb_2 = "e9000000nR", stmea_2 = "e9000000nR", + stmib_2 = "e9800000nR", stmed_2 = "e9800000nR", + pop_1 = "e8bd0000R", push_1 = "e92d0000R", + + -- Branch instructions. + b_1 = "ea000000B", + bl_1 = "eb000000B", + blx_1 = "e12fff30C", + bx_1 = "e12fff10M", + + -- Miscellaneous instructions. + nop_0 = "e1a00000", + mrs_1 = "e10f0000D", + bkpt_1 = "e1200070K", -- v5T + svc_1 = "ef000000T", swi_1 = "ef000000T", + ud_0 = "e7f001f0", + + -- NYI: Advanced SIMD and VFP instructions. + + -- NYI instructions, since I have no need for them right now: + -- swp, swpb, strex, ldrex, strexd, ldrexd, strexb, ldrexb, strexh, ldrexh + -- msr, nopv6, yield, wfe, wfi, sev, dbg, bxj, smc, srs, rfe + -- cps, setend, pli, pld, pldw, clrex, dsb, dmb, isb + -- stc, ldc, mcr, mcr2, mrc, mrc2, mcrr, mcrr2, mrrc, mrrc2, cdp, cdp2 +} + +-- Add mnemonics for "s" variants. +do + local t = {} + for k,v in pairs(map_op) do + if sub(v, -1) == "s" then + local v2 = sub(v, 1, 2)..char(byte(v, 3)+1)..sub(v, 4, -2) + t[sub(k, 1, -3).."s"..sub(k, -2)] = v2 + end + end + for k,v in pairs(t) do + map_op[k] = v + end +end + +------------------------------------------------------------------------------ + +local function parse_gpr(expr) + local tname, ovreg = match(expr, "^([%w_]+):(r1?[0-9])$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local r = match(expr, "^r(1?[0-9])$") + if r then + r = tonumber(r) + if r <= 15 then return r, tp end + end + werror("bad register name `"..expr.."'") +end + +local function parse_gpr_pm(expr) + local pm, expr2 = match(expr, "^([+-]?)(.*)$") + return parse_gpr(expr2), (pm == "-") +end + +local function parse_reglist(reglist) + reglist = match(reglist, "^{%s*([^}]*)}$") + if not reglist then werror("register list expected") end + local rr = 0 + for p in gmatch(reglist..",", "%s*([^,]*),") do + local rbit = 2^parse_gpr(gsub(p, "%s+$", "")) + if ((rr - (rr % rbit)) / rbit) % 2 ~= 0 then + werror("duplicate register `"..p.."'") + end + rr = rr + rbit + end + return rr +end + +local function parse_imm(imm, bits, shift, scale, signed) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = tonumber(imm) + if n then + if n % 2^scale == 0 then + n = n / 2^scale + if signed then + if n >= 0 then + if n < 2^(bits-1) then return n*2^shift end + else + if n >= -(2^(bits-1))-1 then return (n+2^bits)*2^shift end + end + else + if n >= 0 and n <= 2^bits-1 then return n*2^shift end + end + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) + return 0 + end +end + +local function parse_imm12(imm) + local n = tonumber(imm) + if n then + local m = n + for i=0,-15,-1 do + if m >= 0 and m <= 255 and n % 1 == 0 then return m + (i%16) * 256 end + local t = m % 4 + m = (m - t) / 4 + t * 2^30 + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM12", 0, imm) + return 0 + end +end + +local function parse_imm16(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = tonumber(imm) + if n then + if n >= 0 and n <= 65535 and n % 1 == 0 then + local t = n % 4096 + return (n - t) * 16 + t + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM16", 32*16, imm) + return 0 + end +end + +local function parse_imm_load(imm, ext) + local n = tonumber(imm) + if n then + if ext then + if n >= -255 and n <= 255 then + local up = 0x00800000 + if n < 0 then n = -n; up = 0 end + return (n-(n%16))*16+(n%16) + up + end + else + if n >= -4095 and n <= 4095 then + if n >= 0 then return n+0x00800000 end + return -n + end + end + werror("out of range immediate `"..imm.."'") + else + waction(ext and "IMML8" or "IMML12", 32768 + 32*(ext and 8 or 12), imm) + return 0 + end +end + +local function parse_shift(shift, gprok) + if shift == "rrx" then + return 3 * 32 + else + local s, s2 = match(shift, "^(%S+)%s*(.*)$") + s = map_shift[s] + if not s then werror("expected shift operand") end + if sub(s2, 1, 1) == "#" then + return parse_imm(s2, 5, 7, 0, false) + s * 32 + else + if not gprok then werror("expected immediate shift operand") end + return parse_gpr(s2) * 256 + s * 32 + 16 + end + end +end + +local function parse_label(label, def) + local prefix = sub(label, 1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, sub(label, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[sub(label, 3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + end + werror("bad label `"..label.."'") +end + +local function parse_load(params, nparams, n, op) + local oplo = op % 256 + local ext, ldrd = (oplo ~= 0), (oplo == 208) + local d + if (ldrd or oplo == 240) then + d = ((op - (op % 4096)) / 4096) % 16 + if d % 2 ~= 0 then werror("odd destination register") end + end + local pn = params[n] + local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") + local p2 = params[n+1] + if not p1 then + if not p2 then + if match(pn, "^[<>=%-]") or match(pn, "^extern%s+") then + local mode, n, s = parse_label(pn, false) + waction("REL_"..mode, n + (ext and 0x1800 or 0x0800), s, 1) + return op + 15 * 65536 + 0x01000000 + (ext and 0x00400000 or 0) + end + local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local d, tp = parse_gpr(reg) + if tp then + waction(ext and "IMML8" or "IMML12", 32768 + 32*(ext and 8 or 12), + format(tp.ctypefmt, tailr)) + return op + d * 65536 + 0x01000000 + (ext and 0x00400000 or 0) + end + end + end + werror("expected address operand") + end + if wb == "!" then op = op + 0x00200000 end + if p2 then + if wb == "!" then werror("bad use of '!'") end + local p3 = params[n+2] + op = op + parse_gpr(p1) * 65536 + local imm = match(p2, "^#(.*)$") + if imm then + local m = parse_imm_load(imm, ext) + if p3 then werror("too many parameters") end + op = op + m + (ext and 0x00400000 or 0) + else + local m, neg = parse_gpr_pm(p2) + if ldrd and (m == d or m-1 == d) then werror("register conflict") end + op = op + m + (neg and 0 or 0x00800000) + (ext and 0 or 0x02000000) + if p3 then op = op + parse_shift(p3) end + end + else + local p1a, p2 = match(p1, "^([^,%s]*)%s*(.*)$") + op = op + parse_gpr(p1a) * 65536 + 0x01000000 + if p2 ~= "" then + local imm = match(p2, "^,%s*#(.*)$") + if imm then + local m = parse_imm_load(imm, ext) + op = op + m + (ext and 0x00400000 or 0) + else + local p2a, p3 = match(p2, "^,%s*([^,%s]*)%s*,?%s*(.*)$") + local m, neg = parse_gpr_pm(p2a) + if ldrd and (m == d or m-1 == d) then werror("register conflict") end + op = op + m + (neg and 0 or 0x00800000) + (ext and 0 or 0x02000000) + if p3 ~= "" then + if ext then werror("too many parameters") end + op = op + parse_shift(p3) + end + end + else + if wb == "!" then werror("bad use of '!'") end + op = op + (ext and 0x00c00000 or 0x00800000) + end + end + return op +end + +------------------------------------------------------------------------------ + +-- Handle opcodes defined with template strings. +map_op[".template__"] = function(params, template, nparams) + if not params then return sub(template, 9) end + local op = tonumber(sub(template, 1, 8), 16) + local n = 1 + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 3 positions. + if secpos+3 > maxsecpos then wflush() end + local pos = wpos() + + -- Process each character. + for p in gmatch(sub(template, 9), ".") do + if p == "D" then + op = op + parse_gpr(params[n]) * 4096; n = n + 1 + elseif p == "N" then + op = op + parse_gpr(params[n]) * 65536; n = n + 1 + elseif p == "S" then + op = op + parse_gpr(params[n]) * 256; n = n + 1 + elseif p == "M" then + op = op + parse_gpr(params[n]); n = n + 1 + elseif p == "P" then + local imm = match(params[n], "^#(.*)$") + if imm then + op = op + parse_imm12(imm) + 0x02000000 + else + op = op + parse_gpr(params[n]) + end + n = n + 1 + elseif p == "p" then + op = op + parse_shift(params[n], true); n = n + 1 + elseif p == "L" then + op = parse_load(params, nparams, n, op) + elseif p == "B" then + local mode, n, s = parse_label(params[n], false) + waction("REL_"..mode, n, s, 1) + elseif p == "C" then -- blx gpr vs. blx label. + local p = params[n] + if match(p, "^([%w_]+):(r1?[0-9])$") or match(p, "^r(1?[0-9])$") then + op = op + parse_gpr(p) + else + if op < 0xe0000000 then werror("unconditional instruction") end + local mode, n, s = parse_label(p, false) + waction("REL_"..mode, n, s, 1) + op = 0xfa000000 + end + elseif p == "n" then + local r, wb = match(params[n], "^([^!]*)(!?)$") + op = op + parse_gpr(r) * 65536 + (wb == "!" and 0x00200000 or 0) + n = n + 1 + elseif p == "R" then + op = op + parse_reglist(params[n]); n = n + 1 + elseif p == "W" then + op = op + parse_imm16(params[n]); n = n + 1 + elseif p == "v" then + op = op + parse_imm(params[n], 5, 7, 0, false); n = n + 1 + elseif p == "w" then + local imm = match(params[n], "^#(.*)$") + if imm then + op = op + parse_imm(params[n], 5, 7, 0, false); n = n + 1 + else + op = op + parse_gpr(params[n]) * 256 + 16 + end + elseif p == "X" then + op = op + parse_imm(params[n], 5, 16, 0, false); n = n + 1 + elseif p == "K" then + local imm = tonumber(match(params[n], "^#(.*)$")); n = n + 1 + if not imm or imm % 1 ~= 0 or imm < 0 or imm > 0xffff then + werror("bad immediate operand") + end + local t = imm % 16 + op = op + (imm - t) * 16 + t + elseif p == "T" then + op = op + parse_imm(params[n], 24, 0, 0, false); n = n + 1 + elseif p == "s" then + -- Ignored. + else + assert(false) + end + end + wputpos(pos, op) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +map_op[".long_*"] = function(params) + if not params then return "imm..." end + for _,p in ipairs(params) do + local n = tonumber(p) + if n then + if n < 0 then n = n + 2^32 end + wputw(n) + if secpos+2 > maxsecpos then wflush() end + else + waction("LONG", 0, format("(uintptr_t)(%s)", p)) + end + end +end + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = function(t, k) + local v = map_coreop[k] + if v then return v end + local cc = sub(k, -4, -3) + local cv = map_cond[cc] + if cv then + local v = rawget(t, sub(k, 1, -5)..sub(k, -2)) + if type(v) == "string" then return format("%x%s", cv, sub(v, 2)) end + end + end }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/tools/luaffi/dynasm/dasm_ppc.h b/tools/luaffi/dynasm/dasm_ppc.h new file mode 100644 index 0000000..e928ffe --- /dev/null +++ b/tools/luaffi/dynasm/dasm_ppc.h @@ -0,0 +1,408 @@ +/* +** DynASM PPC encoding engine. +** Copyright (C) 2005-2011 Mike Pall. All rights reserved. +** Released under the MIT/X license. See dynasm.lua for full copyright notice. +*/ + +#include +#include +#include +#include + +#define DASM_ARCH "ppc" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned int *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + if (action >= DASM__MAX) { + ofs += 4; + } else { + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: goto stop; + case DASM_SECTION: + n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; goto stop; + case DASM_ESC: p++; ofs += 4; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; + case DASM_REL_LG: + n = (ins & 2047) - 10; pl = D->lglabels + n; + if (n >= 0) { CKPL(lg, LG); goto putrel; } /* Bkwd rel or global. */ + pl += 10; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + break; + case DASM_LABEL_LG: + pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; + } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_IMM: +#ifdef DASM_CHECKS + CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); + if (ins & 0x8000) + CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); + else + CK((n>>((ins>>5)&31)) == 0, RANGE_I); +#endif + b[pos++] = n; + break; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: p++; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; + case DASM_REL_LG: case DASM_REL_PC: pos++; break; + case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; + case DASM_IMM: pos++; break; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned int *cp = (unsigned int *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: *cp++ = *p++; break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1); + goto patchrel; + case DASM_ALIGN: + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; + break; + case DASM_REL_LG: + CK(n >= 0, UNDEF_LG); + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base); + patchrel: + CK((n & 3) == 0 && + (((n+4) + ((ins & 2048) ? 0x00008000 : 0x02000000)) >> + ((ins & 2048) ? 16 : 26)) == 0, RANGE_REL); + cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc)); + break; + case DASM_LABEL_LG: + ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); + break; + case DASM_LABEL_PC: break; + case DASM_IMM: + cp[-1] |= ((n>>((ins>>10)&31)) & ((1<<((ins>>5)&31))-1)) << (ins&31); + break; + default: *cp++ = ins; break; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(D->section-D->sections); + return D->status; +} +#endif + diff --git a/tools/luaffi/dynasm/dasm_ppc.lua b/tools/luaffi/dynasm/dasm_ppc.lua new file mode 100644 index 0000000..bb6de01 --- /dev/null +++ b/tools/luaffi/dynasm/dasm_ppc.lua @@ -0,0 +1,1225 @@ +------------------------------------------------------------------------------ +-- DynASM PPC module. +-- +-- Copyright (C) 2005-2011 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +-- Module information: +local _info = { + arch = "ppc", + description = "DynASM PPC module", + version = "1.3.0", + vernum = 10300, + release = "2011-05-05", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable = assert, setmetatable +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local match, gmatch = _s.match, _s.gmatch +local concat, sort = table.concat, table.sort + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "IMM", +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +for n,name in ipairs(action_names) do + map_action[name] = n-1 +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Return 8 digit hex number. +local function tohex(x) + return sub(format("%08x", x), -8) -- Avoid 64 bit portability problem in Lua. +end + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned int ", name, "[", nn, "] = {\n") + for i = 1,nn-1 do + assert(out:write("0x", tohex(actlist[i]), ",\n")) + end + assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add word to action list. +local function wputxw(n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxw(w * 0x10000 + (val or 0)) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped word. +local function wputw(n) + if n <= 0xffffff then waction("ESC") end + wputxw(n) +end + +-- Reserve position for word. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +-- Store word to reserved position. +local function wputpos(pos, n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[pos] = n +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20,next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0,next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0,next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. +local map_archdef = { sp = "r1" } -- Ext. register name -> int. name. + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + if s == "r1" then return "sp" end + return s +end + +local map_cond = { + lt = 0, gt = 1, eq = 2, so = 3, + ge = 4, le = 5, ne = 6, ns = 7, +} + +------------------------------------------------------------------------------ + +-- Template strings for PPC instructions. +local map_op = { + tdi_3 = "08000000ARI", + twi_3 = "0c000000ARI", + mulli_3 = "1c000000RRI", + subfic_3 = "20000000RRI", + cmplwi_3 = "28000000XRU", + cmplwi_2 = "28000000-RU", + cmpldi_3 = "28200000XRU", + cmpldi_2 = "28200000-RU", + cmpwi_3 = "2c000000XRI", + cmpwi_2 = "2c000000-RI", + cmpdi_3 = "2c200000XRI", + cmpdi_2 = "2c200000-RI", + addic_3 = "30000000RRI", + ["addic._3"] = "34000000RRI", + addi_3 = "38000000RR0I", + li_2 = "38000000RI", + la_2 = "38000000RD", + addis_3 = "3c000000RR0I", + lis_2 = "3c000000RI", + lus_2 = "3c000000RU", + bc_3 = "40000000AAK", + bcl_3 = "40000001AAK", + bdnz_1 = "42000000K", + bdz_1 = "42400000K", + sc_0 = "44000000", + b_1 = "48000000J", + bl_1 = "48000001J", + rlwimi_5 = "50000000RR~AAA.", + rlwinm_5 = "54000000RR~AAA.", + rlwnm_5 = "5c000000RR~RAA.", + ori_3 = "60000000RR~U", + nop_0 = "60000000", + oris_3 = "64000000RR~U", + xori_3 = "68000000RR~U", + xoris_3 = "6c000000RR~U", + ["andi._3"] = "70000000RR~U", + ["andis._3"] = "74000000RR~U", + lwz_2 = "80000000RD", + lwzu_2 = "84000000RD", + lbz_2 = "88000000RD", + lbzu_2 = "8c000000RD", + stw_2 = "90000000RD", + stwu_2 = "94000000RD", + stb_2 = "98000000RD", + stbu_2 = "9c000000RD", + lhz_2 = "a0000000RD", + lhzu_2 = "a4000000RD", + lha_2 = "a8000000RD", + lhau_2 = "ac000000RD", + sth_2 = "b0000000RD", + sthu_2 = "b4000000RD", + lmw_2 = "b8000000RD", + stmw_2 = "bc000000RD", + lfs_2 = "c0000000FD", + lfsu_2 = "c4000000FD", + lfd_2 = "c8000000FD", + lfdu_2 = "cc000000FD", + stfs_2 = "d0000000FD", + stfsu_2 = "d4000000FD", + stfd_2 = "d8000000FD", + stfdu_2 = "dc000000FD", + ld_2 = "e8000000RD", -- NYI: displacement must be divisible by 4. + ldu_2 = "e8000001RD", + lwa_2 = "e8000002RD", + std_2 = "f8000000RD", + stdu_2 = "f8000001RD", + + -- Primary opcode 19: + mcrf_2 = "4c000000XX", + isync_0 = "4c00012c", + crnor_3 = "4c000042CCC", + crnot_2 = "4c000042CC=", + crandc_3 = "4c000102CCC", + crxor_3 = "4c000182CCC", + crclr_1 = "4c000182C==", + crnand_3 = "4c0001c2CCC", + crand_3 = "4c000202CCC", + creqv_3 = "4c000242CCC", + crset_1 = "4c000242C==", + crorc_3 = "4c000342CCC", + cror_3 = "4c000382CCC", + crmove_2 = "4c000382CC=", + bclr_2 = "4c000020AA", + bclrl_2 = "4c000021AA", + bcctr_2 = "4c000420AA", + bcctrl_2 = "4c000421AA", + blr_0 = "4e800020", + blrl_0 = "4e800021", + bctr_0 = "4e800420", + bctrl_0 = "4e800421", + + -- Primary opcode 31: + cmpw_3 = "7c000000XRR", + cmpw_2 = "7c000000-RR", + cmpd_3 = "7c200000XRR", + cmpd_2 = "7c200000-RR", + tw_3 = "7c000008ARR", + subfc_3 = "7c000010RRR.", + subc_3 = "7c000010RRR~.", + mulhdu_3 = "7c000012RRR.", + addc_3 = "7c000014RRR.", + mulhwu_3 = "7c000016RRR.", + isel_4 = "7c00001eRRRC", + isellt_3 = "7c00001eRRR", + iselgt_3 = "7c00005eRRR", + iseleq_3 = "7c00009eRRR", + mfcr_1 = "7c000026R", + -- NYI: mtcrf, mtocrf, mfocrf + lwarx_3 = "7c000028RR0R", + ldx_3 = "7c00002aRR0R", + lwzx_3 = "7c00002eRR0R", + slw_3 = "7c000030RR~R.", + cntlzw_2 = "7c000034RR~", + sld_3 = "7c000036RR~R.", + and_3 = "7c000038RR~R.", + cmplw_3 = "7c000040XRR", + cmplw_2 = "7c000040-RR", + cmpld_3 = "7c200040XRR", + cmpld_2 = "7c200040-RR", + subf_3 = "7c000050RRR.", + sub_3 = "7c000050RRR~.", + ldux_3 = "7c00006aRR0R", + dcbst_2 = "7c00006c-RR", + lwzux_3 = "7c00006eRR0R", + cntlzd_2 = "7c000074RR~", + andc_3 = "7c000078RR~R.", + td_3 = "7c000088ARR", + mulhd_3 = "7c000092RRR.", + mulhw_3 = "7c000096RRR.", + ldarx_3 = "7c0000a8RR0R", + dcbf_2 = "7c0000ac-RR", + lbzx_3 = "7c0000aeRR0R", + neg_2 = "7c0000d0RR.", + lbzux_3 = "7c0000eeRR0R", + popcntb_2 = "7c0000f4RR~", + not_2 = "7c0000f8RR~%.", + nor_3 = "7c0000f8RR~R.", + subfe_3 = "7c000110RRR.", + sube_3 = "7c000110RRR~.", + adde_3 = "7c000114RRR.", + stdx_3 = "7c00012aRR0R", + stwcx_3 = "7c00012cRR0R.", + stwx_3 = "7c00012eRR0R", + prtyw_2 = "7c000134RR~", + stdux_3 = "7c00016aRR0R", + stwux_3 = "7c00016eRR0R", + prtyd_2 = "7c000174RR~", + subfze_2 = "7c000190RR.", + addze_2 = "7c000194RR.", + stdcx_3 = "7c0001acRR0R.", + stbx_3 = "7c0001aeRR0R", + subfme_2 = "7c0001d0RR.", + mulld_3 = "7c0001d2RRR.", + addme_2 = "7c0001d4RR.", + mullw_3 = "7c0001d6RRR.", + dcbtst_2 = "7c0001ec-RR", + stbux_3 = "7c0001eeRR0R", + add_3 = "7c000214RRR.", + dcbt_2 = "7c00022c-RR", + lhzx_3 = "7c00022eRR0R", + eqv_3 = "7c000238RR~R.", + eciwx_3 = "7c00026cRR0R", + lhzux_3 = "7c00026eRR0R", + xor_3 = "7c000278RR~R.", + mfspefscr_1 = "7c0082a6R", + mfxer_1 = "7c0102a6R", + mflr_1 = "7c0802a6R", + mfctr_1 = "7c0902a6R", + lwax_3 = "7c0002aaRR0R", + lhax_3 = "7c0002aeRR0R", + mftb_1 = "7c0c42e6R", + mftbu_1 = "7c0d42e6R", + lwaux_3 = "7c0002eaRR0R", + lhaux_3 = "7c0002eeRR0R", + sthx_3 = "7c00032eRR0R", + orc_3 = "7c000338RR~R.", + ecowx_3 = "7c00036cRR0R", + sthux_3 = "7c00036eRR0R", + or_3 = "7c000378RR~R.", + mr_2 = "7c000378RR~%.", + divdu_3 = "7c000392RRR.", + divwu_3 = "7c000396RRR.", + mtspefscr_1 = "7c0083a6R", + mtxer_1 = "7c0103a6R", + mtlr_1 = "7c0803a6R", + mtctr_1 = "7c0903a6R", + dcbi_2 = "7c0003ac-RR", + nand_3 = "7c0003b8RR~R.", + divd_3 = "7c0003d2RRR.", + divw_3 = "7c0003d6RRR.", + cmpb_3 = "7c0003f8RR~R.", + mcrxr_1 = "7c000400X", + subfco_3 = "7c000410RRR.", + subco_3 = "7c000410RRR~.", + addco_3 = "7c000414RRR.", + ldbrx_3 = "7c000428RR0R", + lswx_3 = "7c00042aRR0R", + lwbrx_3 = "7c00042cRR0R", + lfsx_3 = "7c00042eFR0R", + srw_3 = "7c000430RR~R.", + srd_3 = "7c000436RR~R.", + subfo_3 = "7c000450RRR.", + subo_3 = "7c000450RRR~.", + lfsux_3 = "7c00046eFR0R", + lswi_3 = "7c0004aaRR0A", + sync_0 = "7c0004ac", + lwsync_0 = "7c2004ac", + ptesync_0 = "7c4004ac", + lfdx_3 = "7c0004aeFR0R", + nego_2 = "7c0004d0RR.", + lfdux_3 = "7c0004eeFR0R", + subfeo_3 = "7c000510RRR.", + subeo_3 = "7c000510RRR~.", + addeo_3 = "7c000514RRR.", + stdbrx_3 = "7c000528RR0R", + stswx_3 = "7c00052aRR0R", + stwbrx_3 = "7c00052cRR0R", + stfsx_3 = "7c00052eFR0R", + stfsux_3 = "7c00056eFR0R", + subfzeo_2 = "7c000590RR.", + addzeo_2 = "7c000594RR.", + stswi_3 = "7c0005aaRR0A", + stfdx_3 = "7c0005aeFR0R", + subfmeo_2 = "7c0005d0RR.", + mulldo_3 = "7c0005d2RRR.", + addmeo_2 = "7c0005d4RR.", + mullwo_3 = "7c0005d6RRR.", + dcba_2 = "7c0005ec-RR", + stfdux_3 = "7c0005eeFR0R", + addo_3 = "7c000614RRR.", + lhbrx_3 = "7c00062cRR0R", + sraw_3 = "7c000630RR~R.", + srad_3 = "7c000634RR~R.", + srawi_3 = "7c000670RR~A.", + eieio_0 = "7c0006ac", + lfiwax_3 = "7c0006aeFR0R", + sthbrx_3 = "7c00072cRR0R", + extsh_2 = "7c000734RR~.", + extsb_2 = "7c000774RR~.", + divduo_3 = "7c000792RRR.", + divwou_3 = "7c000796RRR.", + icbi_2 = "7c0007ac-RR", + stfiwx_3 = "7c0007aeFR0R", + extsw_2 = "7c0007b4RR~.", + divdo_3 = "7c0007d2RRR.", + divwo_3 = "7c0007d6RRR.", + dcbz_2 = "7c0007ec-RR", + + -- Primary opcode 59: + fdivs_3 = "ec000024FFF.", + fsubs_3 = "ec000028FFF.", + fadds_3 = "ec00002aFFF.", + fsqrts_2 = "ec00002cF-F.", + fres_2 = "ec000030F-F.", + fmuls_3 = "ec000032FF-F.", + frsqrtes_2 = "ec000034F-F.", + fmsubs_4 = "ec000038FFFF~.", + fmadds_4 = "ec00003aFFFF~.", + fnmsubs_4 = "ec00003cFFFF~.", + fnmadds_4 = "ec00003eFFFF~.", + + -- Primary opcode 63: + fdiv_3 = "fc000024FFF.", + fsub_3 = "fc000028FFF.", + fadd_3 = "fc00002aFFF.", + fsqrt_2 = "fc00002cF-F.", + fsel_4 = "fc00002eFFFF~.", + fre_2 = "fc000030F-F.", + fmul_3 = "fc000032FF-F.", + frsqrte_2 = "fc000034F-F.", + fmsub_4 = "fc000038FFFF~.", + fmadd_4 = "fc00003aFFFF~.", + fnmsub_4 = "fc00003cFFFF~.", + fnmadd_4 = "fc00003eFFFF~.", + fcmpu_3 = "fc000000XFF", + fcpsgn_3 = "fc000010FFF.", + fcmpo_3 = "fc000040XFF", + mtfsb1_1 = "fc00004cA", + fneg_2 = "fc000050F-F.", + mcrfs_2 = "fc000080XX", + mtfsb0_1 = "fc00008cA", + fmr_2 = "fc000090F-F.", + frsp_2 = "fc000018F-F.", + fctiw_2 = "fc00001cF-F.", + fctiwz_2 = "fc00001eF-F.", + mtfsfi_2 = "fc00010cAA", -- NYI: upshift. + fnabs_2 = "fc000110F-F.", + fabs_2 = "fc000210F-F.", + frin_2 = "fc000310F-F.", + friz_2 = "fc000350F-F.", + frip_2 = "fc000390F-F.", + frim_2 = "fc0003d0F-F.", + mffs_1 = "fc00048eF.", + mtfsf_1 = "fc00058eF.", + fctid_2 = "fc00065cF-F.", + fctidz_2 = "fc00065eF-F.", + fcfid_2 = "fc00069cF-F.", + + -- Primary opcode 4, SPE APU extension: + evaddw_3 = "10000200RRR", + evaddiw_3 = "10000202RAR~", + evsubw_3 = "10000204RRR~", + evsubiw_3 = "10000206RAR~", + evabs_2 = "10000208RR", + evneg_2 = "10000209RR", + evextsb_2 = "1000020aRR", + evextsh_2 = "1000020bRR", + evrndw_2 = "1000020cRR", + evcntlzw_2 = "1000020dRR", + evcntlsw_2 = "1000020eRR", + brinc_3 = "1000020fRRR", + evand_3 = "10000211RRR", + evandc_3 = "10000212RRR", + evxor_3 = "10000216RRR", + evor_3 = "10000217RRR", + evmr_2 = "10000217RR=", + evnor_3 = "10000218RRR", + evnot_2 = "10000218RR=", + eveqv_3 = "10000219RRR", + evorc_3 = "1000021bRRR", + evnand_3 = "1000021eRRR", + evsrwu_3 = "10000220RRR", + evsrws_3 = "10000221RRR", + evsrwiu_3 = "10000222RRA", + evsrwis_3 = "10000223RRA", + evslw_3 = "10000224RRR", + evslwi_3 = "10000226RRA", + evrlw_3 = "10000228RRR", + evsplati_2 = "10000229RS", + evrlwi_3 = "1000022aRRA", + evsplatfi_2 = "1000022bRS", + evmergehi_3 = "1000022cRRR", + evmergelo_3 = "1000022dRRR", + evcmpgtu_3 = "10000230XRR", + evcmpgtu_2 = "10000230-RR", + evcmpgts_3 = "10000231XRR", + evcmpgts_2 = "10000231-RR", + evcmpltu_3 = "10000232XRR", + evcmpltu_2 = "10000232-RR", + evcmplts_3 = "10000233XRR", + evcmplts_2 = "10000233-RR", + evcmpeq_3 = "10000234XRR", + evcmpeq_2 = "10000234-RR", + evsel_4 = "10000278RRRW", + evsel_3 = "10000278RRR", + evfsadd_3 = "10000280RRR", + evfssub_3 = "10000281RRR", + evfsabs_2 = "10000284RR", + evfsnabs_2 = "10000285RR", + evfsneg_2 = "10000286RR", + evfsmul_3 = "10000288RRR", + evfsdiv_3 = "10000289RRR", + evfscmpgt_3 = "1000028cXRR", + evfscmpgt_2 = "1000028c-RR", + evfscmplt_3 = "1000028dXRR", + evfscmplt_2 = "1000028d-RR", + evfscmpeq_3 = "1000028eXRR", + evfscmpeq_2 = "1000028e-RR", + evfscfui_2 = "10000290R-R", + evfscfsi_2 = "10000291R-R", + evfscfuf_2 = "10000292R-R", + evfscfsf_2 = "10000293R-R", + evfsctui_2 = "10000294R-R", + evfsctsi_2 = "10000295R-R", + evfsctuf_2 = "10000296R-R", + evfsctsf_2 = "10000297R-R", + evfsctuiz_2 = "10000298R-R", + evfsctsiz_2 = "1000029aR-R", + evfststgt_3 = "1000029cXRR", + evfststgt_2 = "1000029c-RR", + evfststlt_3 = "1000029dXRR", + evfststlt_2 = "1000029d-RR", + evfststeq_3 = "1000029eXRR", + evfststeq_2 = "1000029e-RR", + efsadd_3 = "100002c0RRR", + efssub_3 = "100002c1RRR", + efsabs_2 = "100002c4RR", + efsnabs_2 = "100002c5RR", + efsneg_2 = "100002c6RR", + efsmul_3 = "100002c8RRR", + efsdiv_3 = "100002c9RRR", + efscmpgt_3 = "100002ccXRR", + efscmpgt_2 = "100002cc-RR", + efscmplt_3 = "100002cdXRR", + efscmplt_2 = "100002cd-RR", + efscmpeq_3 = "100002ceXRR", + efscmpeq_2 = "100002ce-RR", + efscfd_2 = "100002cfR-R", + efscfui_2 = "100002d0R-R", + efscfsi_2 = "100002d1R-R", + efscfuf_2 = "100002d2R-R", + efscfsf_2 = "100002d3R-R", + efsctui_2 = "100002d4R-R", + efsctsi_2 = "100002d5R-R", + efsctuf_2 = "100002d6R-R", + efsctsf_2 = "100002d7R-R", + efsctuiz_2 = "100002d8R-R", + efsctsiz_2 = "100002daR-R", + efststgt_3 = "100002dcXRR", + efststgt_2 = "100002dc-RR", + efststlt_3 = "100002ddXRR", + efststlt_2 = "100002dd-RR", + efststeq_3 = "100002deXRR", + efststeq_2 = "100002de-RR", + efdadd_3 = "100002e0RRR", + efdsub_3 = "100002e1RRR", + efdcfuid_2 = "100002e2R-R", + efdcfsid_2 = "100002e3R-R", + efdabs_2 = "100002e4RR", + efdnabs_2 = "100002e5RR", + efdneg_2 = "100002e6RR", + efdmul_3 = "100002e8RRR", + efddiv_3 = "100002e9RRR", + efdctuidz_2 = "100002eaR-R", + efdctsidz_2 = "100002ebR-R", + efdcmpgt_3 = "100002ecXRR", + efdcmpgt_2 = "100002ec-RR", + efdcmplt_3 = "100002edXRR", + efdcmplt_2 = "100002ed-RR", + efdcmpeq_3 = "100002eeXRR", + efdcmpeq_2 = "100002ee-RR", + efdcfs_2 = "100002efR-R", + efdcfui_2 = "100002f0R-R", + efdcfsi_2 = "100002f1R-R", + efdcfuf_2 = "100002f2R-R", + efdcfsf_2 = "100002f3R-R", + efdctui_2 = "100002f4R-R", + efdctsi_2 = "100002f5R-R", + efdctuf_2 = "100002f6R-R", + efdctsf_2 = "100002f7R-R", + efdctuiz_2 = "100002f8R-R", + efdctsiz_2 = "100002faR-R", + efdtstgt_3 = "100002fcXRR", + efdtstgt_2 = "100002fc-RR", + efdtstlt_3 = "100002fdXRR", + efdtstlt_2 = "100002fd-RR", + efdtsteq_3 = "100002feXRR", + efdtsteq_2 = "100002fe-RR", + evlddx_3 = "10000300RR0R", + evldd_2 = "10000301R8", + evldwx_3 = "10000302RR0R", + evldw_2 = "10000303R8", + evldhx_3 = "10000304RR0R", + evldh_2 = "10000305R8", + evlwhex_3 = "10000310RR0R", + evlwhe_2 = "10000311R4", + evlwhoux_3 = "10000314RR0R", + evlwhou_2 = "10000315R4", + evlwhosx_3 = "10000316RR0R", + evlwhos_2 = "10000317R4", + evstddx_3 = "10000320RR0R", + evstdd_2 = "10000321R8", + evstdwx_3 = "10000322RR0R", + evstdw_2 = "10000323R8", + evstdhx_3 = "10000324RR0R", + evstdh_2 = "10000325R8", + evstwhex_3 = "10000330RR0R", + evstwhe_2 = "10000331R4", + evstwhox_3 = "10000334RR0R", + evstwho_2 = "10000335R4", + evstwwex_3 = "10000338RR0R", + evstwwe_2 = "10000339R4", + evstwwox_3 = "1000033cRR0R", + evstwwo_2 = "1000033dR4", + evmhessf_3 = "10000403RRR", + evmhossf_3 = "10000407RRR", + evmheumi_3 = "10000408RRR", + evmhesmi_3 = "10000409RRR", + evmhesmf_3 = "1000040bRRR", + evmhoumi_3 = "1000040cRRR", + evmhosmi_3 = "1000040dRRR", + evmhosmf_3 = "1000040fRRR", + evmhessfa_3 = "10000423RRR", + evmhossfa_3 = "10000427RRR", + evmheumia_3 = "10000428RRR", + evmhesmia_3 = "10000429RRR", + evmhesmfa_3 = "1000042bRRR", + evmhoumia_3 = "1000042cRRR", + evmhosmia_3 = "1000042dRRR", + evmhosmfa_3 = "1000042fRRR", + evmwhssf_3 = "10000447RRR", + evmwlumi_3 = "10000448RRR", + evmwhumi_3 = "1000044cRRR", + evmwhsmi_3 = "1000044dRRR", + evmwhsmf_3 = "1000044fRRR", + evmwssf_3 = "10000453RRR", + evmwumi_3 = "10000458RRR", + evmwsmi_3 = "10000459RRR", + evmwsmf_3 = "1000045bRRR", + evmwhssfa_3 = "10000467RRR", + evmwlumia_3 = "10000468RRR", + evmwhumia_3 = "1000046cRRR", + evmwhsmia_3 = "1000046dRRR", + evmwhsmfa_3 = "1000046fRRR", + evmwssfa_3 = "10000473RRR", + evmwumia_3 = "10000478RRR", + evmwsmia_3 = "10000479RRR", + evmwsmfa_3 = "1000047bRRR", + evmra_2 = "100004c4RR", + evdivws_3 = "100004c6RRR", + evdivwu_3 = "100004c7RRR", + evmwssfaa_3 = "10000553RRR", + evmwumiaa_3 = "10000558RRR", + evmwsmiaa_3 = "10000559RRR", + evmwsmfaa_3 = "1000055bRRR", + evmwssfan_3 = "100005d3RRR", + evmwumian_3 = "100005d8RRR", + evmwsmian_3 = "100005d9RRR", + evmwsmfan_3 = "100005dbRRR", + evmergehilo_3 = "1000022eRRR", + evmergelohi_3 = "1000022fRRR", + evlhhesplatx_3 = "10000308RR0R", + evlhhesplat_2 = "10000309R2", + evlhhousplatx_3 = "1000030cRR0R", + evlhhousplat_2 = "1000030dR2", + evlhhossplatx_3 = "1000030eRR0R", + evlhhossplat_2 = "1000030fR2", + evlwwsplatx_3 = "10000318RR0R", + evlwwsplat_2 = "10000319R4", + evlwhsplatx_3 = "1000031cRR0R", + evlwhsplat_2 = "1000031dR4", + evaddusiaaw_2 = "100004c0RR", + evaddssiaaw_2 = "100004c1RR", + evsubfusiaaw_2 = "100004c2RR", + evsubfssiaaw_2 = "100004c3RR", + evaddumiaaw_2 = "100004c8RR", + evaddsmiaaw_2 = "100004c9RR", + evsubfumiaaw_2 = "100004caRR", + evsubfsmiaaw_2 = "100004cbRR", + evmheusiaaw_3 = "10000500RRR", + evmhessiaaw_3 = "10000501RRR", + evmhessfaaw_3 = "10000503RRR", + evmhousiaaw_3 = "10000504RRR", + evmhossiaaw_3 = "10000505RRR", + evmhossfaaw_3 = "10000507RRR", + evmheumiaaw_3 = "10000508RRR", + evmhesmiaaw_3 = "10000509RRR", + evmhesmfaaw_3 = "1000050bRRR", + evmhoumiaaw_3 = "1000050cRRR", + evmhosmiaaw_3 = "1000050dRRR", + evmhosmfaaw_3 = "1000050fRRR", + evmhegumiaa_3 = "10000528RRR", + evmhegsmiaa_3 = "10000529RRR", + evmhegsmfaa_3 = "1000052bRRR", + evmhogumiaa_3 = "1000052cRRR", + evmhogsmiaa_3 = "1000052dRRR", + evmhogsmfaa_3 = "1000052fRRR", + evmwlusiaaw_3 = "10000540RRR", + evmwlssiaaw_3 = "10000541RRR", + evmwlumiaaw_3 = "10000548RRR", + evmwlsmiaaw_3 = "10000549RRR", + evmheusianw_3 = "10000580RRR", + evmhessianw_3 = "10000581RRR", + evmhessfanw_3 = "10000583RRR", + evmhousianw_3 = "10000584RRR", + evmhossianw_3 = "10000585RRR", + evmhossfanw_3 = "10000587RRR", + evmheumianw_3 = "10000588RRR", + evmhesmianw_3 = "10000589RRR", + evmhesmfanw_3 = "1000058bRRR", + evmhoumianw_3 = "1000058cRRR", + evmhosmianw_3 = "1000058dRRR", + evmhosmfanw_3 = "1000058fRRR", + evmhegumian_3 = "100005a8RRR", + evmhegsmian_3 = "100005a9RRR", + evmhegsmfan_3 = "100005abRRR", + evmhogumian_3 = "100005acRRR", + evmhogsmian_3 = "100005adRRR", + evmhogsmfan_3 = "100005afRRR", + evmwlusianw_3 = "100005c0RRR", + evmwlssianw_3 = "100005c1RRR", + evmwlumianw_3 = "100005c8RRR", + evmwlsmianw_3 = "100005c9RRR", + + -- NYI: some 64 bit PowerPC and Book E instructions: + -- rldicl, rldicr, rldic, rldimi, rldcl, rldcr, sradi, 64 bit ext. add/sub, + -- extended addressing branches, cache management, loads and stores +} + +-- Add mnemonics for "." variants. +do + local t = {} + for k,v in pairs(map_op) do + if sub(v, -1) == "." then + local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2) + t[sub(k, 1, -3).."."..sub(k, -2)] = v2 + end + end + for k,v in pairs(t) do + map_op[k] = v + end +end + +-- Add more branch mnemonics. +for cond,c in pairs(map_cond) do + local b1 = "b"..cond + local c1 = (c%4)*0x00010000 + (c < 4 and 0x01000000 or 0) + -- bX[l] + map_op[b1.."_1"] = tohex(0x40800000 + c1).."K" + map_op[b1.."l_1"] = tohex(0x40800001 + c1).."K" + map_op[b1.."_2"] = tohex(0x40800000 + c1).."-XK" + map_op[b1.."l_2"] = tohex(0x40800001 + c1).."-XK" + -- bXlr[l] + map_op[b1.."lr_0"] = tohex(0x4c800020 + c1) + map_op[b1.."lrl_0"] = tohex(0x4c800021 + c1) + map_op[b1.."ctr_0"] = tohex(0x4c800420 + c1) + map_op[b1.."ctrl_0"] = tohex(0x4c800421 + c1) + -- bXctr[l] + map_op[b1.."lr_1"] = tohex(0x4c800020 + c1).."-X" + map_op[b1.."lrl_1"] = tohex(0x4c800021 + c1).."-X" + map_op[b1.."ctr_1"] = tohex(0x4c800420 + c1).."-X" + map_op[b1.."ctrl_1"] = tohex(0x4c800421 + c1).."-X" +end + +------------------------------------------------------------------------------ + +local function parse_gpr(expr) + local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local r = match(expr, "^r([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r, tp end + end + werror("bad register name `"..expr.."'") +end + +local function parse_fpr(expr) + local r = match(expr, "^f([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r end + end + werror("bad register name `"..expr.."'") +end + +local function parse_cr(expr) + local r = match(expr, "^cr([0-7])$") + if r then return tonumber(r) end + werror("bad condition register name `"..expr.."'") +end + +local function parse_cond(expr) + local r, cond = match(expr, "^4%*cr([0-7])%+(%w%w)$") + if r then + r = tonumber(r) + local c = map_cond[cond] + if c and c < 4 then return r*4+c end + end + werror("bad condition bit name `"..expr.."'") +end + +local function parse_imm(imm, bits, shift, scale, signed) + local n = tonumber(imm) + if n then + if n % 2^scale == 0 then + n = n / 2^scale + if signed then + if n >= 0 then + if n < 2^(bits-1) then return n*2^shift end + else + if n >= -(2^(bits-1))-1 then return (n+2^bits)*2^shift end + end + else + if n >= 0 and n <= 2^bits-1 then return n*2^shift end + end + end + werror("out of range immediate `"..imm.."'") + elseif match(imm, "^r([1-3]?[0-9])$") or + match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then + werror("expected immediate operand, got register") + else + waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) + return 0 + end +end + +local function parse_disp(disp) + local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") + if imm then + local r = parse_gpr(reg) + if r == 0 then werror("cannot use r0 in displacement") end + return r*65536 + parse_imm(imm, 16, 0, 0, true) + end + local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local r, tp = parse_gpr(reg) + if r == 0 then werror("cannot use r0 in displacement") end + if tp then + waction("IMM", 32768+16*32, format(tp.ctypefmt, tailr)) + return r*65536 + end + end + werror("bad displacement `"..disp.."'") +end + +local function parse_u5disp(disp, scale) + local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") + if imm then + local r = parse_gpr(reg) + if r == 0 then werror("cannot use r0 in displacement") end + return r*65536 + parse_imm(imm, 5, 11, scale, false) + end + local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local r, tp = parse_gpr(reg) + if r == 0 then werror("cannot use r0 in displacement") end + if tp then + waction("IMM", scale*1024+5*32+11, format(tp.ctypefmt, tailr)) + return r*65536 + end + end + werror("bad displacement `"..disp.."'") +end + +local function parse_label(label, def) + local prefix = sub(label, 1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, sub(label, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[sub(label, 3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + end + werror("bad label `"..label.."'") +end + +------------------------------------------------------------------------------ + +-- Handle opcodes defined with template strings. +map_op[".template__"] = function(params, template, nparams) + if not params then return sub(template, 9) end + local op = tonumber(sub(template, 1, 8), 16) + local n, rs = 1, 26 + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 3 positions (rlwinm). + if secpos+3 > maxsecpos then wflush() end + local pos = wpos() + + -- Process each character. + for p in gmatch(sub(template, 9), ".") do + if p == "R" then + rs = rs - 5; op = op + parse_gpr(params[n]) * 2^rs; n = n + 1 + elseif p == "F" then + rs = rs - 5; op = op + parse_fpr(params[n]) * 2^rs; n = n + 1 + elseif p == "A" then + rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1 + elseif p == "S" then + rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, true); n = n + 1 + elseif p == "I" then + op = op + parse_imm(params[n], 16, 0, 0, true); n = n + 1 + elseif p == "U" then + op = op + parse_imm(params[n], 16, 0, 0, false); n = n + 1 + elseif p == "D" then + op = op + parse_disp(params[n]); n = n + 1 + elseif p == "2" then + op = op + parse_u5disp(params[n], 1); n = n + 1 + elseif p == "4" then + op = op + parse_u5disp(params[n], 2); n = n + 1 + elseif p == "8" then + op = op + parse_u5disp(params[n], 3); n = n + 1 + elseif p == "C" then + rs = rs - 5; op = op + parse_cond(params[n]) * 2^rs; n = n + 1 + elseif p == "X" then + rs = rs - 5; op = op + parse_cr(params[n]) * 2^(rs+2); n = n + 1 + elseif p == "W" then + op = op + parse_cr(params[n]); n = n + 1 + elseif p == "J" or p == "K" then + local mode, n, s = parse_label(params[n], false) + if p == "K" then n = n + 2048 end + waction("REL_"..mode, n, s, 1) + n = n + 1 + elseif p == "0" then + local mm = 2^rs + local t = op % mm + if ((op - t) / mm) % 32 == 0 then werror("cannot use r0") end + elseif p == "=" or p == "%" then + local mm = 2^(rs + (p == "%" and 5 or 0)) + local t = ((op - op % mm) / mm) % 32 + rs = rs - 5 + op = op + t * 2^rs + elseif p == "~" then + local mm = 2^rs + local t1l = op % mm + local t1h = (op - t1l) / mm + local t2l = t1h % 32 + local t2h = (t1h - t2l) / 32 + local t3l = t2h % 32 + op = ((t2h - t3l + t2l)*32 + t3l)*mm + t1l + elseif p == "-" then + rs = rs - 5 + elseif p == "." then + -- Ignored. + else + assert(false) + end + end + wputpos(pos, op) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +map_op[".long_*"] = function(params) + if not params then return "imm..." end + for _,p in ipairs(params) do + local n = tonumber(p) + if not n then werror("bad immediate `"..p.."'") end + if n < 0 then n = n + 2^32 end + wputw(n) + if secpos+2 > maxsecpos then wflush() end + end +end + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/tools/luaffi/dynasm/dasm_proto.h b/tools/luaffi/dynasm/dasm_proto.h new file mode 100644 index 0000000..dc9ed51 --- /dev/null +++ b/tools/luaffi/dynasm/dasm_proto.h @@ -0,0 +1,83 @@ +/* +** DynASM encoding engine prototypes. +** Copyright (C) 2005-2011 Mike Pall. All rights reserved. +** Released under the MIT/X license. See dynasm.lua for full copyright notice. +*/ + +#ifndef _DASM_PROTO_H +#define _DASM_PROTO_H + +#include +#include + +#define DASM_IDENT "DynASM 1.3.0" +#define DASM_VERSION 10300 /* 1.3.0 */ + +#ifndef Dst_DECL +#define Dst_DECL dasm_State **Dst +#endif + +#ifndef Dst_REF +#define Dst_REF (*Dst) +#endif + +#ifndef DASM_FDEF +#define DASM_FDEF extern +#endif + +#ifndef DASM_M_GROW +#define DASM_M_GROW(ctx, t, p, sz, need) \ + do { \ + size_t _sz = (sz), _need = (need); \ + if (_sz < _need) { \ + if (_sz < 16) _sz = 16; \ + while (_sz < _need) _sz += _sz; \ + (p) = (t *)realloc((p), _sz); \ + if ((p) == NULL) exit(1); \ + (sz) = _sz; \ + } \ + } while(0) +#endif + +#ifndef DASM_M_FREE +#define DASM_M_FREE(ctx, p, sz) free(p) +#endif + +/* Internal DynASM encoder state. */ +typedef struct dasm_State dasm_State; + + +/* Initialize and free DynASM state. */ +DASM_FDEF void dasm_init(Dst_DECL, int maxsection); +DASM_FDEF void dasm_free(Dst_DECL); + +/* Setup global array. Must be called before dasm_setup(). */ +DASM_FDEF void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl); + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +DASM_FDEF void dasm_growpc(Dst_DECL, unsigned int maxpc); + +/* Setup encoder. */ +DASM_FDEF void dasm_setup(Dst_DECL, const void *actionlist); + +/* Feed encoder with actions. Calls are generated by pre-processor. */ +DASM_FDEF void dasm_put(Dst_DECL, int start, ...); + +/* Link sections and return the resulting size. */ +DASM_FDEF int dasm_link(Dst_DECL, size_t *szp); + +/* Encode sections into buffer. */ +DASM_FDEF int dasm_encode(Dst_DECL, void *buffer); + +/* Get PC label offset. */ +DASM_FDEF int dasm_getpclabel(Dst_DECL, unsigned int pc); + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +DASM_FDEF int dasm_checkstep(Dst_DECL, int secmatch); +#else +#define dasm_checkstep(a, b) 0 +#endif + + +#endif /* _DASM_PROTO_H */ diff --git a/tools/luaffi/dynasm/dasm_x64.lua b/tools/luaffi/dynasm/dasm_x64.lua new file mode 100644 index 0000000..73e01e9 --- /dev/null +++ b/tools/luaffi/dynasm/dasm_x64.lua @@ -0,0 +1,12 @@ +------------------------------------------------------------------------------ +-- DynASM x64 module. +-- +-- Copyright (C) 2005-2011 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ +-- This module just sets 64 bit mode for the combined x86/x64 module. +-- All the interesting stuff is there. +------------------------------------------------------------------------------ + +x64 = true -- Using a global is an ugly, but effective solution. +return require("dasm_x86") diff --git a/tools/luaffi/dynasm/dasm_x86.h b/tools/luaffi/dynasm/dasm_x86.h new file mode 100644 index 0000000..23e213c --- /dev/null +++ b/tools/luaffi/dynasm/dasm_x86.h @@ -0,0 +1,470 @@ +/* +** DynASM x86 encoding engine. +** Copyright (C) 2005-2011 Mike Pall. All rights reserved. +** Released under the MIT/X license. See dynasm.lua for full copyright notice. +*/ + +#include +#include +#include +#include + +#define DASM_ARCH "x86" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. DASM_STOP must be 255. */ +enum { + DASM_DISP = 233, + DASM_IMM_S, DASM_IMM_B, DASM_IMM_W, DASM_IMM_D, DASM_IMM_WB, DASM_IMM_DB, + DASM_VREG, DASM_SPACE, DASM_SETLABEL, DASM_REL_A, DASM_REL_LG, DASM_REL_PC, + DASM_IMM_LG, DASM_IMM_PC, DASM_LABEL_LG, DASM_LABEL_PC, DASM_ALIGN, + DASM_EXTERN, DASM_ESC, DASM_MARK, DASM_SECTION, DASM_STOP +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_VREG 0x15000000 +#define DASM_S_UNDEF_L 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned char *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status=DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs, mrm = 4; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + int action = *p++; + if (action < DASM_DISP) { + ofs++; + } else if (action <= DASM_REL_A) { + int n = va_arg(ap, int); + b[pos++] = n; + switch (action) { + case DASM_DISP: + if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; } + case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; + case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ + case DASM_IMM_D: ofs += 4; break; + case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob; + case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break; + case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; + case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; + case DASM_SPACE: p++; ofs += n; break; + case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ + case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG); + if (*p++ == 1 && *p == DASM_DISP) mrm = n; continue; + } + mrm = 4; + } else { + int *pl, n; + switch (action) { + case DASM_REL_LG: + case DASM_IMM_LG: + n = *p++; pl = D->lglabels + n; + if (n <= 246) { CKPL(lg, LG); goto putrel; } /* Bkwd rel or global. */ + pl -= 246; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + case DASM_IMM_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + ofs += 4; /* Maximum offset needed. */ + if (action == DASM_REL_LG || action == DASM_REL_PC) + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_ALIGN: + ofs += *p++; /* Maximum alignment needed (arg is 2**n-1). */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_EXTERN: p += 2; ofs += 4; break; + case DASM_ESC: p++; ofs++; break; + case DASM_MARK: mrm = p[-2]; break; + case DASM_SECTION: + n = *p; CK(n < D->maxsection, RANGE_SEC); D->section = &D->sections[n]; + case DASM_STOP: goto stop; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink branches/aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + int op, action = *p++; + switch (action) { + case DASM_REL_LG: p++; op = p[-3]; goto rel_pc; + case DASM_REL_PC: op = p[-2]; rel_pc: { + int shrink = op == 0xe9 ? 3 : ((op&0xf0) == 0x80 ? 4 : 0); + if (shrink) { /* Shrinkable branch opcode? */ + int lofs, lpos = b[pos]; + if (lpos < 0) goto noshrink; /* Ext global? */ + lofs = *DASM_POS2PTR(D, lpos); + if (lpos > pos) { /* Fwd label: add cumulative section offsets. */ + int i; + for (i = secnum; i < DASM_POS2SEC(lpos); i++) + lofs += D->sections[i].ofs; + } else { + lofs -= ofs; /* Bkwd label: unfix offset. */ + } + lofs -= b[pos+1]; /* Short branch ok? */ + if (lofs >= -128-shrink && lofs <= 127) ofs -= shrink; /* Yes. */ + else { noshrink: shrink = 0; } /* No, cannot shrink op. */ + } + b[pos+1] = shrink; + pos += 2; + break; + } + case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++; + case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W: + case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB: + case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break; + case DASM_LABEL_LG: p++; + case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */ + case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */ + case DASM_EXTERN: p += 2; break; + case DASM_ESC: p++; break; + case DASM_MARK: break; + case DASM_SECTION: case DASM_STOP: goto stop; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#define dasmb(x) *cp++ = (unsigned char)(x) +#ifndef DASM_ALIGNED_WRITES +#define dasmw(x) \ + do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0) +#define dasmd(x) \ + do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0) +#else +#define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0) +#define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + unsigned char *base = (unsigned char *)buffer; + unsigned char *cp = base; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + unsigned char *mark = NULL; + while (1) { + int action = *p++; + int n = (action >= DASM_DISP && action <= DASM_ALIGN) ? *b++ : 0; + switch (action) { + case DASM_DISP: if (!mark) mark = cp; { + unsigned char *mm = mark; + if (*p != DASM_IMM_DB && *p != DASM_IMM_WB) mark = NULL; + if (n == 0) { int mrm = mm[-1]&7; if (mrm == 4) mrm = mm[0]&7; + if (mrm != 5) { mm[-1] -= 0x80; break; } } + if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40; + } + case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break; + case DASM_IMM_DB: if (((n+128)&-256) == 0) { + db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb; + } else mark = NULL; + case DASM_IMM_D: wd: dasmd(n); break; + case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL; + case DASM_IMM_W: dasmw(n); break; + case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; } + case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; + b++; n = (int)(ptrdiff_t)D->globals[-n]; + case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ + case DASM_REL_PC: rel_pc: { + int shrink = *b++; + int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; } + n = *pb - ((int)(cp-base) + 4-shrink); + if (shrink == 0) goto wd; + if (shrink == 4) { cp--; cp[-1] = *cp-0x10; } else cp[-1] = 0xeb; + goto wb; + } + case DASM_IMM_LG: + p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; } + case DASM_IMM_PC: { + int *pb = DASM_POS2PTR(D, n); + n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base); + goto wd; + } + case DASM_LABEL_LG: { + int idx = *p++; + if (idx >= 10) + D->globals[idx] = (void *)(base + (*p == DASM_SETLABEL ? *b : n)); + break; + } + case DASM_LABEL_PC: case DASM_SETLABEL: break; + case DASM_SPACE: { int fill = *p++; while (n--) *cp++ = fill; break; } + case DASM_ALIGN: + n = *p++; + while (((cp-base) & n)) *cp++ = 0x90; /* nop */ + break; + case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd; + case DASM_MARK: mark = cp; break; + case DASM_ESC: action = *p++; + default: *cp++ = action; break; + case DASM_SECTION: case DASM_STOP: goto stop; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_L|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections); + return D->status; +} +#endif + diff --git a/tools/luaffi/dynasm/dasm_x86.lua b/tools/luaffi/dynasm/dasm_x86.lua new file mode 100644 index 0000000..f5c8c52 --- /dev/null +++ b/tools/luaffi/dynasm/dasm_x86.lua @@ -0,0 +1,1931 @@ +------------------------------------------------------------------------------ +-- DynASM x86/x64 module. +-- +-- Copyright (C) 2005-2011 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +local x64 = x64 + +-- Module information: +local _info = { + arch = x64 and "x64" or "x86", + description = "DynASM x86/x64 module", + version = "1.3.0", + vernum = 10300, + release = "2011-05-05", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatable +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub +local concat, sort = table.concat, table.sort + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + -- int arg, 1 buffer pos: + "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", + -- action arg (1 byte), int arg, 1 buffer pos (reg/num): + "VREG", "SPACE", -- !x64: VREG support NYI. + -- ptrdiff_t arg, 1 buffer pos (address): !x64 + "SETLABEL", "REL_A", + -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): + "REL_LG", "REL_PC", + -- action arg (1 byte) or int arg, 1 buffer pos (link): + "IMM_LG", "IMM_PC", + -- action arg (1 byte) or int arg, 1 buffer pos (offset): + "LABEL_LG", "LABEL_PC", + -- action arg (1 byte), 1 buffer pos (offset): + "ALIGN", + -- action args (2 bytes), no buffer pos. + "EXTERN", + -- action arg (1 byte), no buffer pos. + "ESC", + -- no action arg, no buffer pos. + "MARK", + -- action arg (1 byte), no buffer pos, terminal action: + "SECTION", + -- no args, no buffer pos, terminal action: + "STOP" +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number (dynamically generated below). +local map_action = {} +-- First action number. Everything below does not need to be escaped. +local actfirst = 256-#action_names + +-- Action list buffer and string (only used to remove dupes). +local actlist = {} +local actstr = "" + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Compute action numbers for action names. +for n,name in ipairs(action_names) do + local num = actfirst + n - 1 + map_action[name] = num +end + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + local last = actlist[nn] or 255 + actlist[nn] = nil -- Remove last byte. + if nn == 0 then nn = 1 end + out:write("static const unsigned char ", name, "[", nn, "] = {\n") + local s = " " + for n,b in ipairs(actlist) do + s = s..b.."," + if #s >= 75 then + assert(out:write(s, "\n")) + s = " " + end + end + out:write(s, last, "\n};\n\n") -- Add last byte back. +end + +------------------------------------------------------------------------------ + +-- Add byte to action list. +local function wputxb(n) + assert(n >= 0 and n <= 255 and n % 1 == 0, "byte out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, a, num) + wputxb(assert(map_action[action], "bad action name `"..action.."'")) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Add call to embedded DynASM C code. +local function wcall(func, args) + wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) +end + +-- Delete duplicate action list chunks. A tad slow, but so what. +local function dedupechunk(offset) + local al, as = actlist, actstr + local chunk = char(unpack(al, offset+1, #al)) + local orig = find(as, chunk, 1, true) + if orig then + actargs[1] = orig-1 -- Replace with original offset. + for i=offset+1,#al do al[i] = nil end -- Kill dupe. + else + actstr = as..chunk + end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + local offset = actargs[1] + if #actlist == offset then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + dedupechunk(offset) + wcall("put", actargs) -- Add call to dasm_put(). + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped byte. +local function wputb(n) + if n >= actfirst then waction("ESC") end -- Need to escape byte. + wputxb(n) +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 10 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_@]*$") then werror("bad global label") end + local n = next_global + if n > 246 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=10,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=10,next_global-1 do + out:write(" ", prefix, gsub(t[i], "@.*", ""), ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=10,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = -1 +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n < -256 then werror("too many extern labels") end + next_extern = n - 1 + t[name] = n + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + local t = {} + for name, n in pairs(map_extern) do t[-n] = name end + out:write("Extern labels:\n") + for i=1,-next_extern-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + local t = {} + for name, n in pairs(map_extern) do t[-n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=1,-next_extern-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. +local map_archdef = {} -- Ext. register name -> int. name. +local map_reg_rev = {} -- Int. register name -> ext. name. +local map_reg_num = {} -- Int. register name -> register number. +local map_reg_opsize = {} -- Int. register name -> operand size. +local map_reg_valid_base = {} -- Int. register name -> valid base register? +local map_reg_valid_index = {} -- Int. register name -> valid index register? +local map_reg_needrex = {} -- Int. register name -> need rex vs. no rex. +local reg_list = {} -- Canonical list of int. register names. + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for _PTx macros). + +local addrsize = x64 and "q" or "d" -- Size for address operands. + +-- Helper functions to fill register maps. +local function mkrmap(sz, cl, names) + local cname = format("@%s", sz) + reg_list[#reg_list+1] = cname + map_archdef[cl] = cname + map_reg_rev[cname] = cl + map_reg_num[cname] = -1 + map_reg_opsize[cname] = sz + if sz == addrsize or sz == "d" then + map_reg_valid_base[cname] = true + map_reg_valid_index[cname] = true + end + if names then + for n,name in ipairs(names) do + local iname = format("@%s%x", sz, n-1) + reg_list[#reg_list+1] = iname + map_archdef[name] = iname + map_reg_rev[iname] = name + map_reg_num[iname] = n-1 + map_reg_opsize[iname] = sz + if sz == "b" and n > 4 then map_reg_needrex[iname] = false end + if sz == addrsize or sz == "d" then + map_reg_valid_base[iname] = true + map_reg_valid_index[iname] = true + end + end + end + for i=0,(x64 and sz ~= "f") and 15 or 7 do + local needrex = sz == "b" and i > 3 + local iname = format("@%s%x%s", sz, i, needrex and "R" or "") + if needrex then map_reg_needrex[iname] = true end + local name + if sz == "o" then name = format("xmm%d", i) + elseif sz == "f" then name = format("st%d", i) + else name = format("r%d%s", i, sz == addrsize and "" or sz) end + map_archdef[name] = iname + if not map_reg_rev[iname] then + reg_list[#reg_list+1] = iname + map_reg_rev[iname] = name + map_reg_num[iname] = i + map_reg_opsize[iname] = sz + if sz == addrsize or sz == "d" then + map_reg_valid_base[iname] = true + map_reg_valid_index[iname] = true + end + end + end + reg_list[#reg_list+1] = "" +end + +-- Integer registers (qword, dword, word and byte sized). +if x64 then + mkrmap("q", "Rq", {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"}) +end +mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"}) +mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"}) +mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) +map_reg_valid_index[map_archdef.esp] = false +if x64 then map_reg_valid_index[map_archdef.rsp] = false end +map_archdef["Ra"] = "@"..addrsize + +-- FP registers (internally tword sized, but use "f" as operand size). +mkrmap("f", "Rf") + +-- SSE registers (oword sized, but qword and dword accessible). +mkrmap("o", "xmm") + +-- Operand size prefixes to codes. +local map_opsize = { + byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t", + aword = addrsize, +} + +-- Operand size code to number. +local map_opsizenum = { + b = 1, w = 2, d = 4, q = 8, o = 16, t = 10, +} + +-- Operand size code to name. +local map_opsizename = { + b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword", + f = "fpword", +} + +-- Valid index register scale factors. +local map_xsc = { + ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3, +} + +-- Condition codes. +local map_cc = { + o = 0, no = 1, b = 2, nb = 3, e = 4, ne = 5, be = 6, nbe = 7, + s = 8, ns = 9, p = 10, np = 11, l = 12, nl = 13, le = 14, nle = 15, + c = 2, nae = 2, nc = 3, ae = 3, z = 4, nz = 5, na = 6, a = 7, + pe = 10, po = 11, nge = 12, ge = 13, ng = 14, g = 15, +} + + +-- Reverse defines for registers. +function _M.revdef(s) + return gsub(s, "@%w+", map_reg_rev) +end + +-- Dump register names and numbers +local function dumpregs(out) + out:write("Register names, sizes and internal numbers:\n") + for _,reg in ipairs(reg_list) do + if reg == "" then + out:write("\n") + else + local name = map_reg_rev[reg] + local num = map_reg_num[reg] + local opsize = map_opsizename[map_reg_opsize[reg]] + out:write(format(" %-5s %-8s %s\n", name, opsize, + num < 0 and "(variable)" or num)) + end + end +end + +------------------------------------------------------------------------------ + +-- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC). +local function wputlabel(aprefix, imm, num) + if type(imm) == "number" then + if imm < 0 then + waction("EXTERN") + wputxb(aprefix == "IMM_" and 0 or 1) + imm = -imm-1 + else + waction(aprefix.."LG", nil, num); + end + wputxb(imm) + else + waction(aprefix.."PC", imm, num) + end +end + +-- Put signed byte or arg. +local function wputsbarg(n) + if type(n) == "number" then + if n < -128 or n > 127 then + werror("signed immediate byte out of range") + end + if n < 0 then n = n + 256 end + wputb(n) + else waction("IMM_S", n) end +end + +-- Put unsigned byte or arg. +local function wputbarg(n) + if type(n) == "number" then + if n < 0 or n > 255 then + werror("unsigned immediate byte out of range") + end + wputb(n) + else waction("IMM_B", n) end +end + +-- Put unsigned word or arg. +local function wputwarg(n) + if type(n) == "number" then + if n < 0 or n > 65535 then + werror("unsigned immediate word out of range") + end + local r = n%256; n = (n-r)/256; wputb(r); wputb(n); + else waction("IMM_W", n) end +end + +-- Put signed or unsigned dword or arg. +local function wputdarg(n) + local tn = type(n) + if tn == "number" then + if n < 0 then n = n + 4294967296 end + local r = n%256; n = (n-r)/256; wputb(r); + r = n%256; n = (n-r)/256; wputb(r); + r = n%256; n = (n-r)/256; wputb(r); wputb(n); + elseif tn == "table" then + wputlabel("IMM_", n[1], 1) + else + waction("IMM_D", n) + end +end + +-- Put operand-size dependent number or arg (defaults to dword). +local function wputszarg(sz, n) + if not sz or sz == "d" or sz == "q" then wputdarg(n) + elseif sz == "w" then wputwarg(n) + elseif sz == "b" then wputbarg(n) + elseif sz == "s" then wputsbarg(n) + else werror("bad operand size") end +end + +-- Put multi-byte opcode with operand-size dependent modifications. +local function wputop(sz, op, rex) + local r + if rex ~= 0 and not x64 then werror("bad operand size") end + if sz == "w" then wputb(102) end + -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] + if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end + if op >= 16777216 then r = op % 16777216 wputb((op-r) / 16777216) op = r end + if op >= 65536 then + if rex ~= 0 then + local opc3 = op - op % 256 + if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then + wputb(64 + rex % 16); rex = 0 + end + end + r = op % 65536 wputb((op-r) / 65536) op = r + end + if op >= 256 then + r = op % 256 + local b = (op-r) / 256 + if b == 15 and rex ~= 0 then wputb(64 + rex % 16); rex = 0 end + wputb(b) + op = r + end + if rex ~= 0 then wputb(64 + rex % 16) end + if sz == "b" then op = op - 1 end + wputb(op) +end + +-- Put ModRM or SIB formatted byte. +local function wputmodrm(m, s, rm, vs, vrm) + assert(m < 4 and s < 16 and rm < 16, "bad modrm operands") + wputb(64*m + 8*(s%8) + (rm%8)) +end + +-- Put ModRM/SIB plus optional displacement. +local function wputmrmsib(t, imark, s, vsreg) + local vreg, vxreg + local reg, xreg = t.reg, t.xreg + if reg and reg < 0 then reg = 0; vreg = t.vreg end + if xreg and xreg < 0 then xreg = 0; vxreg = t.vxreg end + if s < 0 then s = 0 end + + -- Register mode. + if sub(t.mode, 1, 1) == "r" then + wputmodrm(3, s, reg) + if vsreg then waction("VREG", vsreg); wputxb(2) end + if vreg then waction("VREG", vreg); wputxb(0) end + return + end + + local disp = t.disp + local tdisp = type(disp) + -- No base register? + if not reg then + local riprel = false + if xreg then + -- Indexed mode with index register only. + -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) + wputmodrm(0, s, 4) + if imark == "I" then waction("MARK") end + if vsreg then waction("VREG", vsreg); wputxb(2) end + wputmodrm(t.xsc, xreg, 5) + if vxreg then waction("VREG", vxreg); wputxb(3) end + else + -- Pure 32 bit displacement. + if x64 and tdisp ~= "table" then + wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp) + if imark == "I" then waction("MARK") end + wputmodrm(0, 4, 5) + else + riprel = x64 + wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp) + if imark == "I" then waction("MARK") end + end + if vsreg then waction("VREG", vsreg); wputxb(2) end + end + if riprel then -- Emit rip-relative displacement. + if match("UWSiI", imark) then + werror("NYI: rip-relative displacement followed by immediate") + end + -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f. + wputlabel("REL_", disp[1], 2) + else + wputdarg(disp) + end + return + end + + local m + if tdisp == "number" then -- Check displacement size at assembly time. + if disp == 0 and (reg%8) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too) + if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0] + elseif disp >= -128 and disp <= 127 then m = 1 + else m = 2 end + elseif tdisp == "table" then + m = 2 + end + + -- Index register present or esp as base register: need SIB encoding. + if xreg or (reg%8) == 4 then + wputmodrm(m or 2, s, 4) -- ModRM. + if m == nil or imark == "I" then waction("MARK") end + if vsreg then waction("VREG", vsreg); wputxb(2) end + wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. + if vxreg then waction("VREG", vxreg); wputxb(3) end + if vreg then waction("VREG", vreg); wputxb(1) end + else + wputmodrm(m or 2, s, reg) -- ModRM. + if (imark == "I" and (m == 1 or m == 2)) or + (m == nil and (vsreg or vreg)) then waction("MARK") end + if vsreg then waction("VREG", vsreg); wputxb(2) end + if vreg then waction("VREG", vreg); wputxb(1) end + end + + -- Put displacement. + if m == 1 then wputsbarg(disp) + elseif m == 2 then wputdarg(disp) + elseif m == nil then waction("DISP", disp) end +end + +------------------------------------------------------------------------------ + +-- Return human-readable operand mode string. +local function opmodestr(op, args) + local m = {} + for i=1,#args do + local a = args[i] + m[#m+1] = sub(a.mode, 1, 1)..(a.opsize or "?") + end + return op.." "..concat(m, ",") +end + +-- Convert number to valid integer or nil. +local function toint(expr) + local n = tonumber(expr) + if n then + if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then + werror("bad integer number `"..expr.."'") + end + return n + end +end + +-- Parse immediate expression. +local function immexpr(expr) + -- &expr (pointer) + if sub(expr, 1, 1) == "&" then + return "iPJ", format("(ptrdiff_t)(%s)", sub(expr,2)) + end + + local prefix = sub(expr, 1, 2) + -- =>expr (pc label reference) + if prefix == "=>" then + return "iJ", sub(expr, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "iJ", map_global[sub(expr, 3)] + end + + -- [<>][1-9] (local label reference) + local dir, lnum = match(expr, "^([<>])([1-9])$") + if dir then -- Fwd: 247-255, Bkwd: 1-9. + return "iJ", lnum + (dir == ">" and 246 or 0) + end + + local extname = match(expr, "^extern%s+(%S+)$") + if extname then + return "iJ", map_extern[extname] + end + + -- expr (interpreted as immediate) + return "iI", expr +end + +-- Parse displacement expression: +-num, +-expr, +-opsize*num +local function dispexpr(expr) + local disp = expr == "" and 0 or toint(expr) + if disp then return disp end + local c, dispt = match(expr, "^([+-])%s*(.+)$") + if c == "+" then + expr = dispt + elseif not c then + werror("bad displacement expression `"..expr.."'") + end + local opsize, tailops = match(dispt, "^(%w+)%s*%*%s*(.+)$") + local ops, imm = map_opsize[opsize], toint(tailops) + if ops and imm then + if c == "-" then imm = -imm end + return imm*map_opsizenum[ops] + end + local mode, iexpr = immexpr(dispt) + if mode == "iJ" then + if c == "-" then werror("cannot invert label reference") end + return { iexpr } + end + return expr -- Need to return original signed expression. +end + +-- Parse register or type expression. +local function rtexpr(expr) + if not expr then return end + local tname, ovreg = match(expr, "^([%w_]+):(@[%w_]+)$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + local rnum = map_reg_num[reg] + if not rnum then + werror("type `"..(tname or expr).."' needs a register override") + end + if not map_reg_valid_base[reg] then + werror("bad base register override `"..(map_reg_rev[reg] or reg).."'") + end + return reg, rnum, tp + end + return expr, map_reg_num[expr] +end + +-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }. +local function parseoperand(param) + local t = {} + + local expr = param + local opsize, tailops = match(param, "^(%w+)%s*(.+)$") + if opsize then + t.opsize = map_opsize[opsize] + if t.opsize then expr = tailops end + end + + local br = match(expr, "^%[%s*(.-)%s*%]$") + repeat + if br then + t.mode = "xm" + + -- [disp] + t.disp = toint(br) + if t.disp then + t.mode = x64 and "xm" or "xmO" + break + end + + -- [reg...] + local tp + local reg, tailr = match(br, "^([@%w_:]+)%s*(.*)$") + reg, t.reg, tp = rtexpr(reg) + if not t.reg then + -- [expr] + t.mode = x64 and "xm" or "xmO" + t.disp = dispexpr("+"..br) + break + end + + if t.reg == -1 then + t.vreg, tailr = match(tailr, "^(%b())(.*)$") + if not t.vreg then werror("bad variable register expression") end + end + + -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr] + local xsc, tailsc = match(tailr, "^%*%s*([1248])%s*(.*)$") + if xsc then + if not map_reg_valid_index[reg] then + werror("bad index register `"..map_reg_rev[reg].."'") + end + t.xsc = map_xsc[xsc] + t.xreg = t.reg + t.vxreg = t.vreg + t.reg = nil + t.vreg = nil + t.disp = dispexpr(tailsc) + break + end + if not map_reg_valid_base[reg] then + werror("bad base register `"..map_reg_rev[reg].."'") + end + + -- [reg] or [reg+-disp] + t.disp = toint(tailr) or (tailr == "" and 0) + if t.disp then break end + + -- [reg+xreg...] + local xreg, tailx = match(tailr, "^+%s*([@%w_:]+)%s*(.*)$") + xreg, t.xreg, tp = rtexpr(xreg) + if not t.xreg then + -- [reg+-expr] + t.disp = dispexpr(tailr) + break + end + if not map_reg_valid_index[xreg] then + werror("bad index register `"..map_reg_rev[xreg].."'") + end + + if t.xreg == -1 then + t.vxreg, tailx = match(tailx, "^(%b())(.*)$") + if not t.vxreg then werror("bad variable register expression") end + end + + -- [reg+xreg*xsc...] + local xsc, tailsc = match(tailx, "^%*%s*([1248])%s*(.*)$") + if xsc then + t.xsc = map_xsc[xsc] + tailx = tailsc + end + + -- [...] or [...+-disp] or [...+-expr] + t.disp = dispexpr(tailx) + else + -- imm or opsize*imm + local imm = toint(expr) + if not imm and sub(expr, 1, 1) == "*" and t.opsize then + imm = toint(sub(expr, 2)) + if imm then + imm = imm * map_opsizenum[t.opsize] + t.opsize = nil + end + end + if imm then + if t.opsize then werror("bad operand size override") end + local m = "i" + if imm == 1 then m = m.."1" end + if imm >= 4294967168 and imm <= 4294967295 then imm = imm-4294967296 end + if imm >= -128 and imm <= 127 then m = m.."S" end + t.imm = imm + t.mode = m + break + end + + local tp + local reg, tailr = match(expr, "^([@%w_:]+)%s*(.*)$") + reg, t.reg, tp = rtexpr(reg) + if t.reg then + if t.reg == -1 then + t.vreg, tailr = match(tailr, "^(%b())(.*)$") + if not t.vreg then werror("bad variable register expression") end + end + -- reg + if tailr == "" then + if t.opsize then werror("bad operand size override") end + t.opsize = map_reg_opsize[reg] + if t.opsize == "f" then + t.mode = t.reg == 0 and "fF" or "f" + else + if reg == "@w4" or (x64 and reg == "@d4") then + wwarn("bad idea, try again with `"..(x64 and "rsp'" or "esp'")) + end + t.mode = t.reg == 0 and "rmR" or (reg == "@b1" and "rmC" or "rm") + end + t.needrex = map_reg_needrex[reg] + break + end + + -- type[idx], type[idx].field, type->field -> [reg+offset_expr] + if not tp then werror("bad operand `"..param.."'") end + t.mode = "xm" + t.disp = format(tp.ctypefmt, tailr) + else + t.mode, t.imm = immexpr(expr) + if sub(t.mode, -1) == "J" then + if t.opsize and t.opsize ~= addrsize then + werror("bad operand size override") + end + t.opsize = addrsize + end + end + end + until true + return t +end + +------------------------------------------------------------------------------ +-- x86 Template String Description +-- =============================== +-- +-- Each template string is a list of [match:]pattern pairs, +-- separated by "|". The first match wins. No match means a +-- bad or unsupported combination of operand modes or sizes. +-- +-- The match part and the ":" is omitted if the operation has +-- no operands. Otherwise the first N characters are matched +-- against the mode strings of each of the N operands. +-- +-- The mode string for each operand type is (see parseoperand()): +-- Integer register: "rm", +"R" for eax, ax, al, +"C" for cl +-- FP register: "f", +"F" for st0 +-- Index operand: "xm", +"O" for [disp] (pure offset) +-- Immediate: "i", +"S" for signed 8 bit, +"1" for 1, +-- +"I" for arg, +"P" for pointer +-- Any: +"J" for valid jump targets +-- +-- So a match character "m" (mixed) matches both an integer register +-- and an index operand (to be encoded with the ModRM/SIB scheme). +-- But "r" matches only a register and "x" only an index operand +-- (e.g. for FP memory access operations). +-- +-- The operand size match string starts right after the mode match +-- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty. +-- The effective data size of the operation is matched against this list. +-- +-- If only the regular "b", "w", "d", "q", "t" operand sizes are +-- present, then all operands must be the same size. Unspecified sizes +-- are ignored, but at least one operand must have a size or the pattern +-- won't match (use the "byte", "word", "dword", "qword", "tword" +-- operand size overrides. E.g.: mov dword [eax], 1). +-- +-- If the list has a "1" or "2" prefix, the operand size is taken +-- from the respective operand and any other operand sizes are ignored. +-- If the list contains only ".", all operand sizes are ignored. +-- If the list has a "/" prefix, the concatenated (mixed) operand sizes +-- are compared to the match. +-- +-- E.g. "rrdw" matches for either two dword registers or two word +-- registers. "Fx2dq" matches an st0 operand plus an index operand +-- pointing to a dword (float) or qword (double). +-- +-- Every character after the ":" is part of the pattern string: +-- Hex chars are accumulated to form the opcode (left to right). +-- "n" disables the standard opcode mods +-- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q") +-- "X" Force REX.W. +-- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode. +-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. +-- The spare 3 bits are either filled with the last hex digit or +-- the result from a previous "r"/"R". The opcode is restored. +-- +-- All of the following characters force a flush of the opcode: +-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. +-- "S" stores a signed 8 bit immediate from the last operand. +-- "U" stores an unsigned 8 bit immediate from the last operand. +-- "W" stores an unsigned 16 bit immediate from the last operand. +-- "i" stores an operand sized immediate from the last operand. +-- "I" dito, but generates an action code to optionally modify +-- the opcode (+2) for a signed 8 bit immediate. +-- "J" generates one of the REL action codes from the last operand. +-- +------------------------------------------------------------------------------ + +-- Template strings for x86 instructions. Ordered by first opcode byte. +-- Unimplemented opcodes (deliberate omissions) are marked with *. +local map_op = { + -- 00-05: add... + -- 06: *push es + -- 07: *pop es + -- 08-0D: or... + -- 0E: *push cs + -- 0F: two byte opcode prefix + -- 10-15: adc... + -- 16: *push ss + -- 17: *pop ss + -- 18-1D: sbb... + -- 1E: *push ds + -- 1F: *pop ds + -- 20-25: and... + es_0 = "26", + -- 27: *daa + -- 28-2D: sub... + cs_0 = "2E", + -- 2F: *das + -- 30-35: xor... + ss_0 = "36", + -- 37: *aaa + -- 38-3D: cmp... + ds_0 = "3E", + -- 3F: *aas + inc_1 = x64 and "m:FF0m" or "rdw:40r|m:FF0m", + dec_1 = x64 and "m:FF1m" or "rdw:48r|m:FF1m", + push_1 = (x64 and "rq:n50r|rw:50r|mq:nFF6m|mw:FF6m" or + "rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i", + pop_1 = x64 and "rq:n58r|rw:58r|mq:n8F0m|mw:8F0m" or "rdw:58r|mdw:8F0m", + -- 60: *pusha, *pushad, *pushaw + -- 61: *popa, *popad, *popaw + -- 62: *bound rdw,x + -- 63: x86: *arpl mw,rw + movsxd_2 = x64 and "rm/qd:63rM", + fs_0 = "64", + gs_0 = "65", + o16_0 = "66", + a16_0 = not x64 and "67" or nil, + a32_0 = x64 and "67", + -- 68: push idw + -- 69: imul rdw,mdw,idw + -- 6A: push ib + -- 6B: imul rdw,mdw,S + -- 6C: *insb + -- 6D: *insd, *insw + -- 6E: *outsb + -- 6F: *outsd, *outsw + -- 70-7F: jcc lb + -- 80: add... mb,i + -- 81: add... mdw,i + -- 82: *undefined + -- 83: add... mdw,S + test_2 = "mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi", + -- 86: xchg rb,mb + -- 87: xchg rdw,mdw + -- 88: mov mb,r + -- 89: mov mdw,r + -- 8A: mov r,mb + -- 8B: mov r,mdw + -- 8C: *mov mdw,seg + lea_2 = "rx1dq:8DrM", + -- 8E: *mov seg,mdw + -- 8F: pop mdw + nop_0 = "90", + xchg_2 = "Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm", + cbw_0 = "6698", + cwde_0 = "98", + cdqe_0 = "4898", + cwd_0 = "6699", + cdq_0 = "99", + cqo_0 = "4899", + -- 9A: *call iw:idw + wait_0 = "9B", + fwait_0 = "9B", + pushf_0 = "9C", + pushfd_0 = not x64 and "9C", + pushfq_0 = x64 and "9C", + popf_0 = "9D", + popfd_0 = not x64 and "9D", + popfq_0 = x64 and "9D", + sahf_0 = "9E", + lahf_0 = "9F", + mov_2 = "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi", + movsb_0 = "A4", + movsw_0 = "66A5", + movsd_0 = "A5", + cmpsb_0 = "A6", + cmpsw_0 = "66A7", + cmpsd_0 = "A7", + -- A8: test Rb,i + -- A9: test Rdw,i + stosb_0 = "AA", + stosw_0 = "66AB", + stosd_0 = "AB", + lodsb_0 = "AC", + lodsw_0 = "66AD", + lodsd_0 = "AD", + scasb_0 = "AE", + scasw_0 = "66AF", + scasd_0 = "AF", + -- B0-B7: mov rb,i + -- B8-BF: mov rdw,i + -- C0: rol... mb,i + -- C1: rol... mdw,i + ret_1 = "i.:nC2W", + ret_0 = "C3", + -- C4: *les rdw,mq + -- C5: *lds rdw,mq + -- C6: mov mb,i + -- C7: mov mdw,i + -- C8: *enter iw,ib + leave_0 = "C9", + -- CA: *retf iw + -- CB: *retf + int3_0 = "CC", + int_1 = "i.:nCDU", + into_0 = "CE", + -- CF: *iret + -- D0: rol... mb,1 + -- D1: rol... mdw,1 + -- D2: rol... mb,cl + -- D3: rol... mb,cl + -- D4: *aam ib + -- D5: *aad ib + -- D6: *salc + -- D7: *xlat + -- D8-DF: floating point ops + -- E0: *loopne + -- E1: *loope + -- E2: *loop + -- E3: *jcxz, *jecxz + -- E4: *in Rb,ib + -- E5: *in Rdw,ib + -- E6: *out ib,Rb + -- E7: *out ib,Rdw + call_1 = x64 and "mq:nFF2m|J.:E8nJ" or "md:FF2m|J.:E8J", + jmp_1 = x64 and "mq:nFF4m|J.:E9nJ" or "md:FF4m|J.:E9J", -- short: EB + -- EA: *jmp iw:idw + -- EB: jmp ib + -- EC: *in Rb,dx + -- ED: *in Rdw,dx + -- EE: *out dx,Rb + -- EF: *out dx,Rdw + -- F0: *lock + int1_0 = "F1", + repne_0 = "F2", + repnz_0 = "F2", + rep_0 = "F3", + repe_0 = "F3", + repz_0 = "F3", + -- F4: *hlt + cmc_0 = "F5", + -- F6: test... mb,i; div... mb + -- F7: test... mdw,i; div... mdw + clc_0 = "F8", + stc_0 = "F9", + -- FA: *cli + cld_0 = "FC", + std_0 = "FD", + -- FE: inc... mb + -- FF: inc... mdw + + -- misc ops + not_1 = "m:F72m", + neg_1 = "m:F73m", + mul_1 = "m:F74m", + imul_1 = "m:F75m", + div_1 = "m:F76m", + idiv_1 = "m:F77m", + + imul_2 = "rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi", + imul_3 = "rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi", + + movzx_2 = "rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:", + movsx_2 = "rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:", + + bswap_1 = "rqd:0FC8r", + bsf_2 = "rmqdw:0FBCrM", + bsr_2 = "rmqdw:0FBDrM", + bt_2 = "mrqdw:0FA3Rm|miqdw:0FBA4mU", + btc_2 = "mrqdw:0FBBRm|miqdw:0FBA7mU", + btr_2 = "mrqdw:0FB3Rm|miqdw:0FBA6mU", + bts_2 = "mrqdw:0FABRm|miqdw:0FBA5mU", + + rdtsc_0 = "0F31", -- P1+ + cpuid_0 = "0FA2", -- P1+ + + -- floating point ops + fst_1 = "ff:DDD0r|xd:D92m|xq:nDD2m", + fstp_1 = "ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m", + fld_1 = "ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m", + + fpop_0 = "DDD8", -- Alias for fstp st0. + + fist_1 = "xw:nDF2m|xd:DB2m", + fistp_1 = "xw:nDF3m|xd:DB3m|xq:nDF7m", + fild_1 = "xw:nDF0m|xd:DB0m|xq:nDF5m", + + fxch_0 = "D9C9", + fxch_1 = "ff:D9C8r", + fxch_2 = "fFf:D9C8r|Fff:D9C8R", + + fucom_1 = "ff:DDE0r", + fucom_2 = "Fff:DDE0R", + fucomp_1 = "ff:DDE8r", + fucomp_2 = "Fff:DDE8R", + fucomi_1 = "ff:DBE8r", -- P6+ + fucomi_2 = "Fff:DBE8R", -- P6+ + fucomip_1 = "ff:DFE8r", -- P6+ + fucomip_2 = "Fff:DFE8R", -- P6+ + fcomi_1 = "ff:DBF0r", -- P6+ + fcomi_2 = "Fff:DBF0R", -- P6+ + fcomip_1 = "ff:DFF0r", -- P6+ + fcomip_2 = "Fff:DFF0R", -- P6+ + fucompp_0 = "DAE9", + fcompp_0 = "DED9", + + fldcw_1 = "xw:nD95m", + fstcw_1 = "xw:n9BD97m", + fnstcw_1 = "xw:nD97m", + fstsw_1 = "Rw:n9BDFE0|xw:n9BDD7m", + fnstsw_1 = "Rw:nDFE0|xw:nDD7m", + fclex_0 = "9BDBE2", + fnclex_0 = "DBE2", + + fnop_0 = "D9D0", + -- D9D1-D9DF: unassigned + + fchs_0 = "D9E0", + fabs_0 = "D9E1", + -- D9E2: unassigned + -- D9E3: unassigned + ftst_0 = "D9E4", + fxam_0 = "D9E5", + -- D9E6: unassigned + -- D9E7: unassigned + fld1_0 = "D9E8", + fldl2t_0 = "D9E9", + fldl2e_0 = "D9EA", + fldpi_0 = "D9EB", + fldlg2_0 = "D9EC", + fldln2_0 = "D9ED", + fldz_0 = "D9EE", + -- D9EF: unassigned + + f2xm1_0 = "D9F0", + fyl2x_0 = "D9F1", + fptan_0 = "D9F2", + fpatan_0 = "D9F3", + fxtract_0 = "D9F4", + fprem1_0 = "D9F5", + fdecstp_0 = "D9F6", + fincstp_0 = "D9F7", + fprem_0 = "D9F8", + fyl2xp1_0 = "D9F9", + fsqrt_0 = "D9FA", + fsincos_0 = "D9FB", + frndint_0 = "D9FC", + fscale_0 = "D9FD", + fsin_0 = "D9FE", + fcos_0 = "D9FF", + + -- SSE, SSE2 + andnpd_2 = "rmo:660F55rM", + andnps_2 = "rmo:0F55rM", + andpd_2 = "rmo:660F54rM", + andps_2 = "rmo:0F54rM", + clflush_1 = "x.:0FAE7m", + cmppd_3 = "rmio:660FC2rMU", + cmpps_3 = "rmio:0FC2rMU", + cmpsd_3 = "rrio:F20FC2rMU|rxi/oq:", + cmpss_3 = "rrio:F30FC2rMU|rxi/od:", + comisd_2 = "rro:660F2FrM|rx/oq:", + comiss_2 = "rro:0F2FrM|rx/od:", + cvtdq2pd_2 = "rro:F30FE6rM|rx/oq:", + cvtdq2ps_2 = "rmo:0F5BrM", + cvtpd2dq_2 = "rmo:F20FE6rM", + cvtpd2ps_2 = "rmo:660F5ArM", + cvtpi2pd_2 = "rx/oq:660F2ArM", + cvtpi2ps_2 = "rx/oq:0F2ArM", + cvtps2dq_2 = "rmo:660F5BrM", + cvtps2pd_2 = "rro:0F5ArM|rx/oq:", + cvtsd2si_2 = "rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:", + cvtsd2ss_2 = "rro:F20F5ArM|rx/oq:", + cvtsi2sd_2 = "rm/od:F20F2ArM|rm/oq:F20F2ArXM", + cvtsi2ss_2 = "rm/od:F30F2ArM|rm/oq:F30F2ArXM", + cvtss2sd_2 = "rro:F30F5ArM|rx/od:", + cvtss2si_2 = "rr/do:F20F2CrM|rr/qo:|rxd:|rx/qd:", + cvttpd2dq_2 = "rmo:660FE6rM", + cvttps2dq_2 = "rmo:F30F5BrM", + cvttsd2si_2 = "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:", + cvttss2si_2 = "rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:", + ldmxcsr_1 = "xd:0FAE2m", + lfence_0 = "0FAEE8", + maskmovdqu_2 = "rro:660FF7rM", + mfence_0 = "0FAEF0", + movapd_2 = "rmo:660F28rM|mro:660F29Rm", + movaps_2 = "rmo:0F28rM|mro:0F29Rm", + movd_2 = "rm/od:660F6ErM|rm/oq:660F6ErXM|mr/do:660F7ERm|mr/qo:", + movdqa_2 = "rmo:660F6FrM|mro:660F7FRm", + movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm", + movhlps_2 = "rro:0F12rM", + movhpd_2 = "rx/oq:660F16rM|xr/qo:n660F17Rm", + movhps_2 = "rx/oq:0F16rM|xr/qo:n0F17Rm", + movlhps_2 = "rro:0F16rM", + movlpd_2 = "rx/oq:660F12rM|xr/qo:n660F13Rm", + movlps_2 = "rx/oq:0F12rM|xr/qo:n0F13Rm", + movmskpd_2 = "rr/do:660F50rM", + movmskps_2 = "rr/do:0F50rM", + movntdq_2 = "xro:660FE7Rm", + movnti_2 = "xrqd:0FC3Rm", + movntpd_2 = "xro:660F2BRm", + movntps_2 = "xro:0F2BRm", + movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm", + movsd_2 = "rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm", + movss_2 = "rro:F30F10rM|rx/od:|xr/do:F30F11Rm", + movupd_2 = "rmo:660F10rM|mro:660F11Rm", + movups_2 = "rmo:0F10rM|mro:0F11Rm", + orpd_2 = "rmo:660F56rM", + orps_2 = "rmo:0F56rM", + packssdw_2 = "rmo:660F6BrM", + packsswb_2 = "rmo:660F63rM", + packuswb_2 = "rmo:660F67rM", + paddb_2 = "rmo:660FFCrM", + paddd_2 = "rmo:660FFErM", + paddq_2 = "rmo:660FD4rM", + paddsb_2 = "rmo:660FECrM", + paddsw_2 = "rmo:660FEDrM", + paddusb_2 = "rmo:660FDCrM", + paddusw_2 = "rmo:660FDDrM", + paddw_2 = "rmo:660FFDrM", + pand_2 = "rmo:660FDBrM", + pandn_2 = "rmo:660FDFrM", + pause_0 = "F390", + pavgb_2 = "rmo:660FE0rM", + pavgw_2 = "rmo:660FE3rM", + pcmpeqb_2 = "rmo:660F74rM", + pcmpeqd_2 = "rmo:660F76rM", + pcmpeqw_2 = "rmo:660F75rM", + pcmpgtb_2 = "rmo:660F64rM", + pcmpgtd_2 = "rmo:660F66rM", + pcmpgtw_2 = "rmo:660F65rM", + pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nrMU", -- Mem op: SSE4.1 only. + pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", + pmaddwd_2 = "rmo:660FF5rM", + pmaxsw_2 = "rmo:660FEErM", + pmaxub_2 = "rmo:660FDErM", + pminsw_2 = "rmo:660FEArM", + pminub_2 = "rmo:660FDArM", + pmovmskb_2 = "rr/do:660FD7rM", + pmulhuw_2 = "rmo:660FE4rM", + pmulhw_2 = "rmo:660FE5rM", + pmullw_2 = "rmo:660FD5rM", + pmuludq_2 = "rmo:660FF4rM", + por_2 = "rmo:660FEBrM", + prefetchnta_1 = "xb:n0F180m", + prefetcht0_1 = "xb:n0F181m", + prefetcht1_1 = "xb:n0F182m", + prefetcht2_1 = "xb:n0F183m", + psadbw_2 = "rmo:660FF6rM", + pshufd_3 = "rmio:660F70rMU", + pshufhw_3 = "rmio:F30F70rMU", + pshuflw_3 = "rmio:F20F70rMU", + pslld_2 = "rmo:660FF2rM|rio:660F726mU", + pslldq_2 = "rio:660F737mU", + psllq_2 = "rmo:660FF3rM|rio:660F736mU", + psllw_2 = "rmo:660FF1rM|rio:660F716mU", + psrad_2 = "rmo:660FE2rM|rio:660F724mU", + psraw_2 = "rmo:660FE1rM|rio:660F714mU", + psrld_2 = "rmo:660FD2rM|rio:660F722mU", + psrldq_2 = "rio:660F733mU", + psrlq_2 = "rmo:660FD3rM|rio:660F732mU", + psrlw_2 = "rmo:660FD1rM|rio:660F712mU", + psubb_2 = "rmo:660FF8rM", + psubd_2 = "rmo:660FFArM", + psubq_2 = "rmo:660FFBrM", + psubsb_2 = "rmo:660FE8rM", + psubsw_2 = "rmo:660FE9rM", + psubusb_2 = "rmo:660FD8rM", + psubusw_2 = "rmo:660FD9rM", + psubw_2 = "rmo:660FF9rM", + punpckhbw_2 = "rmo:660F68rM", + punpckhdq_2 = "rmo:660F6ArM", + punpckhqdq_2 = "rmo:660F6DrM", + punpckhwd_2 = "rmo:660F69rM", + punpcklbw_2 = "rmo:660F60rM", + punpckldq_2 = "rmo:660F62rM", + punpcklqdq_2 = "rmo:660F6CrM", + punpcklwd_2 = "rmo:660F61rM", + pxor_2 = "rmo:660FEFrM", + rcpps_2 = "rmo:0F53rM", + rcpss_2 = "rro:F30F53rM|rx/od:", + rsqrtps_2 = "rmo:0F52rM", + rsqrtss_2 = "rmo:F30F52rM", + sfence_0 = "0FAEF8", + shufpd_3 = "rmio:660FC6rMU", + shufps_3 = "rmio:0FC6rMU", + stmxcsr_1 = "xd:0FAE3m", + ucomisd_2 = "rro:660F2ErM|rx/oq:", + ucomiss_2 = "rro:0F2ErM|rx/od:", + unpckhpd_2 = "rmo:660F15rM", + unpckhps_2 = "rmo:0F15rM", + unpcklpd_2 = "rmo:660F14rM", + unpcklps_2 = "rmo:0F14rM", + xorpd_2 = "rmo:660F57rM", + xorps_2 = "rmo:0F57rM", + + -- SSE3 ops + fisttp_1 = "xw:nDF1m|xd:DB1m|xq:nDD1m", + addsubpd_2 = "rmo:660FD0rM", + addsubps_2 = "rmo:F20FD0rM", + haddpd_2 = "rmo:660F7CrM", + haddps_2 = "rmo:F20F7CrM", + hsubpd_2 = "rmo:660F7DrM", + hsubps_2 = "rmo:F20F7DrM", + lddqu_2 = "rxo:F20FF0rM", + movddup_2 = "rmo:F20F12rM", + movshdup_2 = "rmo:F30F16rM", + movsldup_2 = "rmo:F30F12rM", + + -- SSSE3 ops + pabsb_2 = "rmo:660F381CrM", + pabsd_2 = "rmo:660F381ErM", + pabsw_2 = "rmo:660F381DrM", + palignr_3 = "rmio:660F3A0FrMU", + phaddd_2 = "rmo:660F3802rM", + phaddsw_2 = "rmo:660F3803rM", + phaddw_2 = "rmo:660F3801rM", + phsubd_2 = "rmo:660F3806rM", + phsubsw_2 = "rmo:660F3807rM", + phsubw_2 = "rmo:660F3805rM", + pmaddubsw_2 = "rmo:660F3804rM", + pmulhrsw_2 = "rmo:660F380BrM", + pshufb_2 = "rmo:660F3800rM", + psignb_2 = "rmo:660F3808rM", + psignd_2 = "rmo:660F380ArM", + psignw_2 = "rmo:660F3809rM", + + -- SSE4.1 ops + blendpd_3 = "rmio:660F3A0DrMU", + blendps_3 = "rmio:660F3A0CrMU", + blendvpd_3 = "rmRo:660F3815rM", + blendvps_3 = "rmRo:660F3814rM", + dppd_3 = "rmio:660F3A41rMU", + dpps_3 = "rmio:660F3A40rMU", + extractps_3 = "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU", + insertps_3 = "rrio:660F3A41rMU|rxi/od:", + movntdqa_2 = "rmo:660F382ArM", + mpsadbw_3 = "rmio:660F3A42rMU", + packusdw_2 = "rmo:660F382BrM", + pblendvb_3 = "rmRo:660F3810rM", + pblendw_3 = "rmio:660F3A0ErMU", + pcmpeqq_2 = "rmo:660F3829rM", + pextrb_3 = "rri/do:660F3A14nRmU|rri/qo:|xri/bo:", + pextrd_3 = "mri/do:660F3A16RmU", + pextrq_3 = "mri/qo:660F3A16RmU", + -- pextrw is SSE2, mem operand is SSE4.1 only + phminposuw_2 = "rmo:660F3841rM", + pinsrb_3 = "rri/od:660F3A20nrMU|rxi/ob:", + pinsrd_3 = "rmi/od:660F3A22rMU", + pinsrq_3 = "rmi/oq:660F3A22rXMU", + pmaxsb_2 = "rmo:660F383CrM", + pmaxsd_2 = "rmo:660F383DrM", + pmaxud_2 = "rmo:660F383FrM", + pmaxuw_2 = "rmo:660F383ErM", + pminsb_2 = "rmo:660F3838rM", + pminsd_2 = "rmo:660F3839rM", + pminud_2 = "rmo:660F383BrM", + pminuw_2 = "rmo:660F383ArM", + pmovsxbd_2 = "rro:660F3821rM|rx/od:", + pmovsxbq_2 = "rro:660F3822rM|rx/ow:", + pmovsxbw_2 = "rro:660F3820rM|rx/oq:", + pmovsxdq_2 = "rro:660F3825rM|rx/oq:", + pmovsxwd_2 = "rro:660F3823rM|rx/oq:", + pmovsxwq_2 = "rro:660F3824rM|rx/od:", + pmovzxbd_2 = "rro:660F3831rM|rx/od:", + pmovzxbq_2 = "rro:660F3832rM|rx/ow:", + pmovzxbw_2 = "rro:660F3830rM|rx/oq:", + pmovzxdq_2 = "rro:660F3835rM|rx/oq:", + pmovzxwd_2 = "rro:660F3833rM|rx/oq:", + pmovzxwq_2 = "rro:660F3834rM|rx/od:", + pmuldq_2 = "rmo:660F3828rM", + pmulld_2 = "rmo:660F3840rM", + ptest_2 = "rmo:660F3817rM", + roundpd_3 = "rmio:660F3A09rMU", + roundps_3 = "rmio:660F3A08rMU", + roundsd_3 = "rrio:660F3A0BrMU|rxi/oq:", + roundss_3 = "rrio:660F3A0ArMU|rxi/od:", + + -- SSE4.2 ops + crc32_2 = "rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:", + pcmpestri_3 = "rmio:660F3A61rMU", + pcmpestrm_3 = "rmio:660F3A60rMU", + pcmpgtq_2 = "rmo:660F3837rM", + pcmpistri_3 = "rmio:660F3A63rMU", + pcmpistrm_3 = "rmio:660F3A62rMU", + popcnt_2 = "rmqdw:F30FB8rM", + + -- SSE4a + extrq_2 = "rro:660F79rM", + extrq_3 = "riio:660F780mUU", + insertq_2 = "rro:F20F79rM", + insertq_4 = "rriio:F20F78rMUU", + lzcnt_2 = "rmqdw:F30FBDrM", + movntsd_2 = "xr/qo:nF20F2BRm", + movntss_2 = "xr/do:F30F2BRm", + -- popcnt is also in SSE4.2 +} + +------------------------------------------------------------------------------ + +-- Arithmetic ops. +for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3, + ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do + local n8 = n * 8 + map_op[name.."_2"] = format( + "mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi", + 1+n8, 3+n8, n, n, 5+n8, n) +end + +-- Shift ops. +for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3, + shl = 4, shr = 5, sar = 7, sal = 4 } do + map_op[name.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n, n, n) +end + +-- Conditional ops. +for cc,n in pairs(map_cc) do + map_op["j"..cc.."_1"] = format("J.:n0F8%XJ", n) -- short: 7%X + map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n) + map_op["cmov"..cc.."_2"] = format("rmqdw:0F4%XrM", n) -- P6+ +end + +-- FP arithmetic ops. +for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3, + sub = 4, subr = 5, div = 6, divr = 7 } do + local nc = 192 + n * 8 + local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8)) + local fn = "f"..name + map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n) + if n == 2 or n == 3 then + map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc, n, n) + else + map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc, nr, n, n) + map_op[fn.."p_1"] = format("ff:DE%02Xr", nr) + map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr) + end + map_op["fi"..name.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n, n) +end + +-- FP conditional moves. +for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do + local n4 = n % 4 + local nc = 56000 + n4 * 8 + (n-n4) * 64 + map_op["fcmov"..cc.."_1"] = format("ff:%04Xr", nc) -- P6+ + map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ +end + +-- SSE FP arithmetic ops. +for name,n in pairs{ sqrt = 1, add = 8, mul = 9, + sub = 12, min = 13, div = 14, max = 15 } do + map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) + map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) + map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) + map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) +end + +------------------------------------------------------------------------------ + +-- Process pattern string. +local function dopattern(pat, args, sz, op, needrex) + local digit, addin + local opcode = 0 + local szov = sz + local narg = 1 + local rex = 0 + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 5 positions. + if secpos+5 > maxsecpos then wflush() end + + -- Process each character. + for c in gmatch(pat.."|", ".") do + if match(c, "%x") then -- Hex digit. + digit = byte(c) - 48 + if digit > 48 then digit = digit - 39 + elseif digit > 16 then digit = digit - 7 end + opcode = opcode*16 + digit + addin = nil + elseif c == "n" then -- Disable operand size mods for opcode. + szov = nil + elseif c == "X" then -- Force REX.W. + rex = 8 + elseif c == "r" then -- Merge 1st operand regno. into opcode. + addin = args[1]; opcode = opcode + (addin.reg % 8) + if narg < 2 then narg = 2 end + elseif c == "R" then -- Merge 2nd operand regno. into opcode. + addin = args[2]; opcode = opcode + (addin.reg % 8) + narg = 3 + elseif c == "m" or c == "M" then -- Encode ModRM/SIB. + local s + if addin then + s = addin.reg + opcode = opcode - (s%8) -- Undo regno opcode merge. + else + s = opcode % 16 -- Undo last digit. + opcode = (opcode - s) / 16 + end + local nn = c == "m" and 1 or 2 + local t = args[nn] + if narg <= nn then narg = nn + 1 end + if szov == "q" and rex == 0 then rex = rex + 8 end + if t.reg and t.reg > 7 then rex = rex + 1 end + if t.xreg and t.xreg > 7 then rex = rex + 2 end + if s > 7 then rex = rex + 4 end + if needrex then rex = rex + 16 end + wputop(szov, opcode, rex); opcode = nil + local imark = sub(pat, -1) -- Force a mark (ugly). + -- Put ModRM/SIB with regno/last digit as spare. + wputmrmsib(t, imark, s, addin and addin.vreg) + addin = nil + else + if opcode then -- Flush opcode. + if szov == "q" and rex == 0 then rex = rex + 8 end + if needrex then rex = rex + 16 end + if addin and addin.reg == -1 then + wputop(szov, opcode - 7, rex) + waction("VREG", addin.vreg); wputxb(0) + else + if addin and addin.reg > 7 then rex = rex + 1 end + wputop(szov, opcode, rex) + end + opcode = nil + end + if c == "|" then break end + if c == "o" then -- Offset (pure 32 bit displacement). + wputdarg(args[1].disp); if narg < 2 then narg = 2 end + elseif c == "O" then + wputdarg(args[2].disp); narg = 3 + else + -- Anything else is an immediate operand. + local a = args[narg] + narg = narg + 1 + local mode, imm = a.mode, a.imm + if mode == "iJ" and not match("iIJ", c) then + werror("bad operand size for label") + end + if c == "S" then + wputsbarg(imm) + elseif c == "U" then + wputbarg(imm) + elseif c == "W" then + wputwarg(imm) + elseif c == "i" or c == "I" then + if mode == "iJ" then + wputlabel("IMM_", imm, 1) + elseif mode == "iI" and c == "I" then + waction(sz == "w" and "IMM_WB" or "IMM_DB", imm) + else + wputszarg(sz, imm) + end + elseif c == "J" then + if mode == "iPJ" then + waction("REL_A", imm) -- !x64 (secpos) + else + wputlabel("REL_", imm, 2) + end + else + werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") + end + end + end + end +end + +------------------------------------------------------------------------------ + +-- Mapping of operand modes to short names. Suppress output with '#'. +local map_modename = { + r = "reg", R = "eax", C = "cl", x = "mem", m = "mrm", i = "imm", + f = "stx", F = "st0", J = "lbl", ["1"] = "1", + I = "#", S = "#", O = "#", +} + +-- Return a table/string showing all possible operand modes. +local function templatehelp(template, nparams) + if nparams == 0 then return "" end + local t = {} + for tm in gmatch(template, "[^%|]+") do + local s = map_modename[sub(tm, 1, 1)] + s = s..gsub(sub(tm, 2, nparams), ".", function(c) + return ", "..map_modename[c] + end) + if not match(s, "#") then t[#t+1] = s end + end + return t +end + +-- Match operand modes against mode match part of template. +local function matchtm(tm, args) + for i=1,#args do + if not match(args[i].mode, sub(tm, i, i)) then return end + end + return true +end + +-- Handle opcodes defined with template strings. +map_op[".template__"] = function(params, template, nparams) + if not params then return templatehelp(template, nparams) end + local args = {} + + -- Zero-operand opcodes have no match part. + if #params == 0 then + dopattern(template, args, "d", params.op, nil) + return + end + + -- Determine common operand size (coerce undefined size) or flag as mixed. + local sz, szmix, needrex + for i,p in ipairs(params) do + args[i] = parseoperand(p) + local nsz = args[i].opsize + if nsz then + if sz and sz ~= nsz then szmix = true else sz = nsz end + end + local nrex = args[i].needrex + if nrex ~= nil then + if needrex == nil then + needrex = nrex + elseif needrex ~= nrex then + werror("bad mix of byte-addressable registers") + end + end + end + + -- Try all match:pattern pairs (separated by '|'). + local gotmatch, lastpat + for tm in gmatch(template, "[^%|]+") do + -- Split off size match (starts after mode match) and pattern string. + local szm, pat = match(tm, "^(.-):(.*)$", #args+1) + if pat == "" then pat = lastpat else lastpat = pat end + if matchtm(tm, args) then + local prefix = sub(szm, 1, 1) + if prefix == "/" then -- Match both operand sizes. + if args[1].opsize == sub(szm, 2, 2) and + args[2].opsize == sub(szm, 3, 3) then + dopattern(pat, args, sz, params.op, needrex) -- Process pattern. + return + end + else -- Match common operand size. + local szp = sz + if szm == "" then szm = x64 and "qdwb" or "dwb" end -- Default sizes. + if prefix == "1" then szp = args[1].opsize; szmix = nil + elseif prefix == "2" then szp = args[2].opsize; szmix = nil end + if not szmix and (prefix == "." or match(szm, szp or "#")) then + dopattern(pat, args, szp, params.op, needrex) -- Process pattern. + return + end + end + gotmatch = true + end + end + + local msg = "bad operand mode" + if gotmatch then + if szmix then + msg = "mixed operand size" + else + msg = sz and "bad operand size" or "missing operand size" + end + end + + werror(msg.." in `"..opmodestr(params.op, args).."'") +end + +------------------------------------------------------------------------------ + +-- x64-specific opcode for 64 bit immediates and displacements. +if x64 then + function map_op.mov64_2(params) + if not params then return { "reg, imm", "reg, [disp]", "[disp], reg" } end + if secpos+2 > maxsecpos then wflush() end + local opcode, op64, sz, rex + local op64 = match(params[1], "^%[%s*(.-)%s*%]$") + if op64 then + local a = parseoperand(params[2]) + if a.mode ~= "rmR" then werror("bad operand mode") end + sz = a.opsize + rex = sz == "q" and 8 or 0 + opcode = 0xa3 + else + op64 = match(params[2], "^%[%s*(.-)%s*%]$") + local a = parseoperand(params[1]) + if op64 then + if a.mode ~= "rmR" then werror("bad operand mode") end + sz = a.opsize + rex = sz == "q" and 8 or 0 + opcode = 0xa1 + else + if sub(a.mode, 1, 1) ~= "r" or a.opsize ~= "q" then + werror("bad operand mode") + end + op64 = params[2] + opcode = 0xb8 + (a.reg%8) -- !x64: no VREG support. + rex = a.reg > 7 and 9 or 8 + end + end + wputop(sz, opcode, rex) + waction("IMM_D", format("(unsigned int)((uintptr_t)(%s))", op64)) + waction("IMM_D", format("(unsigned int)(((uintptr_t)(%s))>>32)", op64)) + end +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +local function op_data(params) + if not params then return "imm..." end + local sz = sub(params.op, 2, 2) + if sz == "a" then sz = addrsize end + for _,p in ipairs(params) do + local a = parseoperand(p) + if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then + werror("bad mode or size in `"..p.."'") + end + if a.mode == "iJ" then + wputlabel("IMM_", a.imm, 1) + else + wputszarg(sz, a.imm) + end + if secpos+2 > maxsecpos then wflush() end + end +end + +map_op[".byte_*"] = op_data +map_op[".sbyte_*"] = op_data +map_op[".word_*"] = op_data +map_op[".dword_*"] = op_data +map_op[".aword_*"] = op_data + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_2"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr [, addr]" end + if secpos+2 > maxsecpos then wflush() end + local a = parseoperand(params[1]) + local mode, imm = a.mode, a.imm + if type(imm) == "number" and (mode == "iJ" or (imm >= 1 and imm <= 9)) then + -- Local label (1: ... 9:) or global label (->global:). + waction("LABEL_LG", nil, 1) + wputxb(imm) + elseif mode == "iJ" then + -- PC label (=>pcexpr:). + waction("LABEL_PC", imm) + else + werror("bad label definition") + end + -- SETLABEL must immediately follow LABEL_LG/LABEL_PC. + local addr = params[2] + if addr then + local a = parseoperand(addr) + if a.mode == "iPJ" then + waction("SETLABEL", a.imm) + else + werror("bad label assignment") + end + end +end +map_op[".label_1"] = map_op[".label_2"] + +------------------------------------------------------------------------------ + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) or map_opsizenum[map_opsize[params[1]]] + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", nil, 1) + wputxb(align-1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +-- Spacing pseudo-opcode. +map_op[".space_2"] = function(params) + if not params then return "num [, filler]" end + if secpos+1 > maxsecpos then wflush() end + waction("SPACE", params[1]) + local fill = params[2] + if fill then + fill = tonumber(fill) + if not fill or fill < 0 or fill > 255 then werror("bad filler") end + end + wputxb(fill or 0) +end +map_op[".space_1"] = map_op[".space_2"] + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + if reg and not map_reg_valid_base[reg] then + werror("bad base register `"..(map_reg_rev[reg] or reg).."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg and map_reg_rev[tp.reg] or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION") + wputxb(num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpregs(out) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/tools/luaffi/dynasm/dynasm.lua b/tools/luaffi/dynasm/dynasm.lua new file mode 100644 index 0000000..2c5062d --- /dev/null +++ b/tools/luaffi/dynasm/dynasm.lua @@ -0,0 +1,1096 @@ +------------------------------------------------------------------------------ +-- DynASM. A dynamic assembler for code generation engines. +-- Originally designed and implemented for LuaJIT. +-- +-- Copyright (C) 2005-2011 Mike Pall. All rights reserved. +-- See below for full copyright notice. +------------------------------------------------------------------------------ + +-- Application information. +local _info = { + name = "DynASM", + description = "A dynamic assembler for code generation engines", + version = "1.3.0", + vernum = 10300, + release = "2011-05-05", + author = "Mike Pall", + url = "http://luajit.org/dynasm.html", + license = "MIT", + copyright = [[ +Copyright (C) 2005-2011 Mike Pall. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +[ MIT license: http://www.opensource.org/licenses/mit-license.php ] +]], +} + +-- Cache library functions. +local type, pairs, ipairs = type, pairs, ipairs +local pcall, error, assert = pcall, error, assert +local _s = string +local sub, match, gmatch, gsub = _s.sub, _s.match, _s.gmatch, _s.gsub +local format, rep, upper = _s.format, _s.rep, _s.upper +local _t = table +local insert, remove, concat, sort = _t.insert, _t.remove, _t.concat, _t.sort +local exit = os.exit +local io = io +local stdin, stdout, stderr = io.stdin, io.stdout, io.stderr + +------------------------------------------------------------------------------ + +-- Program options. +local g_opt = {} + +-- Global state for current file. +local g_fname, g_curline, g_indent, g_lineno, g_synclineno, g_arch +local g_errcount = 0 + +-- Write buffer for output file. +local g_wbuffer, g_capbuffer + +------------------------------------------------------------------------------ + +-- Write an output line (or callback function) to the buffer. +local function wline(line, needindent) + local buf = g_capbuffer or g_wbuffer + buf[#buf+1] = needindent and g_indent..line or line + g_synclineno = g_synclineno + 1 +end + +-- Write assembler line as a comment, if requestd. +local function wcomment(aline) + if g_opt.comment then + wline(g_opt.comment..aline..g_opt.endcomment, true) + end +end + +-- Resync CPP line numbers. +local function wsync() + if g_synclineno ~= g_lineno and g_opt.cpp then + wline("# "..g_lineno..' "'..g_fname..'"') + g_synclineno = g_lineno + end +end + +-- Dummy action flush function. Replaced with arch-specific function later. +local function wflush(term) +end + +-- Dump all buffered output lines. +local function wdumplines(out, buf) + for _,line in ipairs(buf) do + if type(line) == "string" then + assert(out:write(line, "\n")) + else + -- Special callback to dynamically insert lines after end of processing. + line(out) + end + end +end + +------------------------------------------------------------------------------ + +-- Emit an error. Processing continues with next statement. +local function werror(msg) + if g_opt.vserror then + error(format("%s(%s) : error: %s:\n%s", g_fname, g_lineno, msg, g_curline), 0) + else + error(format("%s:%s: error: %s:\n%s", g_fname, g_lineno, msg, g_curline), 0) + end +end + +-- Emit a fatal error. Processing stops. +local function wfatal(msg) + g_errcount = "fatal" + werror(msg) +end + +-- Print a warning. Processing continues. +local function wwarn(msg) + stderr:write(format("%s:%s: warning: %s:\n%s\n", + g_fname, g_lineno, msg, g_curline)) +end + +-- Print caught error message. But suppress excessive errors. +local function wprinterr(...) + if type(g_errcount) == "number" then + -- Regular error. + g_errcount = g_errcount + 1 + if g_errcount < 21 then -- Seems to be a reasonable limit. + stderr:write(...) + elseif g_errcount == 21 then + stderr:write(g_fname, + ":*: warning: too many errors (suppressed further messages).\n") + end + else + -- Fatal error. + stderr:write(...) + return true -- Stop processing. + end +end + +------------------------------------------------------------------------------ + +-- Map holding all option handlers. +local opt_map = {} +local opt_current + +-- Print error and exit with error status. +local function opterror(...) + stderr:write("dynasm.lua: ERROR: ", ...) + stderr:write("\n") + exit(1) +end + +-- Get option parameter. +local function optparam(args) + local argn = args.argn + local p = args[argn] + if not p then + opterror("missing parameter for option `", opt_current, "'.") + end + args.argn = argn + 1 + return p +end + +------------------------------------------------------------------------------ + +-- Core pseudo-opcodes. +local map_coreop = {} +-- Dummy opcode map. Replaced by arch-specific map. +local map_op = {} + +-- Forward declarations. +local dostmt +local readfile + +------------------------------------------------------------------------------ + +-- Map for defines (initially empty, chains to arch-specific map). +local map_def = {} + +-- Pseudo-opcode to define a substitution. +map_coreop[".define_2"] = function(params, nparams) + if not params then return nparams == 1 and "name" or "name, subst" end + local name, def = params[1], params[2] or "1" + if not match(name, "^[%a_][%w_]*$") then werror("bad or duplicate define") end + map_def[name] = def +end +map_coreop[".define_1"] = map_coreop[".define_2"] + +-- Define a substitution on the command line. +function opt_map.D(args) + local namesubst = optparam(args) + local name, subst = match(namesubst, "^([%a_][%w_]*)=(.*)$") + if name then + map_def[name] = subst + elseif match(namesubst, "^[%a_][%w_]*$") then + map_def[namesubst] = "1" + else + opterror("bad define") + end +end + +-- Undefine a substitution on the command line. +function opt_map.U(args) + local name = optparam(args) + if match(name, "^[%a_][%w_]*$") then + map_def[name] = nil + else + opterror("bad define") + end +end + +-- Helper for definesubst. +local gotsubst + +local function definesubst_one(word) + local subst = map_def[word] + if subst then gotsubst = word; return subst else return word end +end + +-- Iteratively substitute defines. +local function definesubst(stmt) + -- Limit number of iterations. + for i=1,100 do + gotsubst = false + stmt = gsub(stmt, "#?[%w_]+", definesubst_one) + if not gotsubst then break end + end + if gotsubst then wfatal("recursive define involving `"..gotsubst.."'") end + return stmt +end + +-- Dump all defines. +local function dumpdefines(out, lvl) + local t = {} + for name in pairs(map_def) do + t[#t+1] = name + end + sort(t) + out:write("Defines:\n") + for _,name in ipairs(t) do + local subst = map_def[name] + if g_arch then subst = g_arch.revdef(subst) end + out:write(format(" %-20s %s\n", name, subst)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Support variables for conditional assembly. +local condlevel = 0 +local condstack = {} + +local function loadin(source, env) + if setfenv then + local func, err = loadstring(source) + if func then + setfenv(func, env) + end + return func, err + else + return load(source, nil, nil, env) + end +end + +-- Evaluate condition with a Lua expression. Substitutions already performed. +local function cond_eval(cond) + -- No globals. All unknown identifiers evaluate to nil. + local func, err = loadin("return "..cond, {}) + if func then + local ok, res = pcall(func) + if ok then + if res == 0 then return false end -- Oh well. + return not not res + end + err = res + end + wfatal("bad condition: "..err) +end + +-- Skip statements until next conditional pseudo-opcode at the same level. +local function stmtskip() + local dostmt_save = dostmt + local lvl = 0 + dostmt = function(stmt) + local op = match(stmt, "^%s*(%S+)") + if op == ".if" then + lvl = lvl + 1 + elseif lvl ~= 0 then + if op == ".endif" then lvl = lvl - 1 end + elseif op == ".elif" or op == ".else" or op == ".endif" then + dostmt = dostmt_save + dostmt(stmt) + end + end +end + +-- Pseudo-opcodes for conditional assembly. +map_coreop[".if_1"] = function(params) + if not params then return "condition" end + local lvl = condlevel + 1 + local res = cond_eval(params[1]) + condlevel = lvl + condstack[lvl] = res + if not res then stmtskip() end +end + +map_coreop[".elif_1"] = function(params) + if not params then return "condition" end + if condlevel == 0 then wfatal(".elif without .if") end + local lvl = condlevel + local res = condstack[lvl] + if res then + if res == "else" then wfatal(".elif after .else") end + else + res = cond_eval(params[1]) + if res then + condstack[lvl] = res + return + end + end + stmtskip() +end + +map_coreop[".else_0"] = function(params) + if condlevel == 0 then wfatal(".else without .if") end + local lvl = condlevel + local res = condstack[lvl] + condstack[lvl] = "else" + if res then + if res == "else" then wfatal(".else after .else") end + stmtskip() + end +end + +map_coreop[".endif_0"] = function(params) + local lvl = condlevel + if lvl == 0 then wfatal(".endif without .if") end + condlevel = lvl - 1 +end + +-- Check for unfinished conditionals. +local function checkconds() + if g_errcount ~= "fatal" and condlevel ~= 0 then + wprinterr(g_fname, ":*: error: unbalanced conditional\n") + end +end + +------------------------------------------------------------------------------ + +-- Search for a file in the given path and open it for reading. +local function pathopen(path, name) + local dirsep = match(package.path, "\\") and "\\" or "/" + for _,p in ipairs(path) do + local fullname = p == "" and name or p..dirsep..name + local fin = io.open(fullname, "r") + if fin then + g_fname = fullname + return fin + end + end +end + +-- Include a file. +map_coreop[".include_1"] = function(params) + if not params then return "filename" end + local name = params[1] + -- Save state. Ugly, I know. but upvalues are fast. + local gf, gl, gcl, gi = g_fname, g_lineno, g_curline, g_indent + -- Read the included file. + local fatal = readfile(pathopen(g_opt.include, name) or + wfatal("include file `"..name.."' not found")) + -- Restore state. + g_synclineno = -1 + g_fname, g_lineno, g_curline, g_indent = gf, gl, gcl, gi + if fatal then wfatal("in include file") end +end + +-- Make .include and conditionals initially available, too. +map_op[".include_1"] = map_coreop[".include_1"] +map_op[".if_1"] = map_coreop[".if_1"] +map_op[".elif_1"] = map_coreop[".elif_1"] +map_op[".else_0"] = map_coreop[".else_0"] +map_op[".endif_0"] = map_coreop[".endif_0"] + +------------------------------------------------------------------------------ + +-- Support variables for macros. +local mac_capture, mac_lineno, mac_name +local mac_active = {} +local mac_list = {} + +-- Pseudo-opcode to define a macro. +map_coreop[".macro_*"] = function(mparams) + if not mparams then return "name [, params...]" end + -- Split off and validate macro name. + local name = remove(mparams, 1) + if not name then werror("missing macro name") end + if not (match(name, "^[%a_][%w_%.]*$") or match(name, "^%.[%w_%.]+$")) then + wfatal("bad macro name `"..name.."'") + end + -- Validate macro parameter names. + local mdup = {} + for _,mp in ipairs(mparams) do + if not match(mp, "^[%a_][%w_]*$") then + wfatal("bad macro parameter name `"..mp.."'") + end + if mdup[mp] then wfatal("duplicate macro parameter name `"..mp.."'") end + mdup[mp] = true + end + -- Check for duplicate or recursive macro definitions. + local opname = name.."_"..#mparams + if map_op[opname] or map_op[name.."_*"] then + wfatal("duplicate macro `"..name.."' ("..#mparams.." parameters)") + end + if mac_capture then wfatal("recursive macro definition") end + + -- Enable statement capture. + local lines = {} + mac_lineno = g_lineno + mac_name = name + mac_capture = function(stmt) -- Statement capture function. + -- Stop macro definition with .endmacro pseudo-opcode. + if not match(stmt, "^%s*.endmacro%s*$") then + lines[#lines+1] = stmt + return + end + mac_capture = nil + mac_lineno = nil + mac_name = nil + mac_list[#mac_list+1] = opname + -- Add macro-op definition. + map_op[opname] = function(params) + if not params then return mparams, lines end + -- Protect against recursive macro invocation. + if mac_active[opname] then wfatal("recursive macro invocation") end + mac_active[opname] = true + -- Setup substitution map. + local subst = {} + for i,mp in ipairs(mparams) do subst[mp] = params[i] end + local mcom + if g_opt.maccomment and g_opt.comment then + mcom = " MACRO "..name.." ("..#mparams..")" + wcomment("{"..mcom) + end + -- Loop through all captured statements + for _,stmt in ipairs(lines) do + -- Substitute macro parameters. + local st = gsub(stmt, "[%w_]+", subst) + st = definesubst(st) + st = gsub(st, "%s*%.%.%s*", "") -- Token paste a..b. + if mcom and sub(st, 1, 1) ~= "|" then wcomment(st) end + -- Emit statement. Use a protected call for better diagnostics. + local ok, err = pcall(dostmt, st) + if not ok then + -- Add the captured statement to the error. + wprinterr(err, "\n", g_indent, "| ", stmt, + "\t[MACRO ", name, " (", #mparams, ")]\n") + end + end + if mcom then wcomment("}"..mcom) end + mac_active[opname] = nil + end + end +end + +-- An .endmacro pseudo-opcode outside of a macro definition is an error. +map_coreop[".endmacro_0"] = function(params) + wfatal(".endmacro without .macro") +end + +-- Dump all macros and their contents (with -PP only). +local function dumpmacros(out, lvl) + sort(mac_list) + out:write("Macros:\n") + for _,opname in ipairs(mac_list) do + local name = sub(opname, 1, -3) + local params, lines = map_op[opname]() + out:write(format(" %-20s %s\n", name, concat(params, ", "))) + if lvl > 1 then + for _,line in ipairs(lines) do + out:write(" |", line, "\n") + end + out:write("\n") + end + end + out:write("\n") +end + +-- Check for unfinished macro definitions. +local function checkmacros() + if mac_capture then + wprinterr(g_fname, ":", mac_lineno, + ": error: unfinished .macro `", mac_name ,"'\n") + end +end + +------------------------------------------------------------------------------ + +-- Support variables for captures. +local cap_lineno, cap_name +local cap_buffers = {} +local cap_used = {} + +-- Start a capture. +map_coreop[".capture_1"] = function(params) + if not params then return "name" end + wflush() + local name = params[1] + if not match(name, "^[%a_][%w_]*$") then + wfatal("bad capture name `"..name.."'") + end + if cap_name then + wfatal("already capturing to `"..cap_name.."' since line "..cap_lineno) + end + cap_name = name + cap_lineno = g_lineno + -- Create or continue a capture buffer and start the output line capture. + local buf = cap_buffers[name] + if not buf then buf = {}; cap_buffers[name] = buf end + g_capbuffer = buf + g_synclineno = 0 +end + +-- Stop a capture. +map_coreop[".endcapture_0"] = function(params) + wflush() + if not cap_name then wfatal(".endcapture without a valid .capture") end + cap_name = nil + cap_lineno = nil + g_capbuffer = nil + g_synclineno = 0 +end + +-- Dump a capture buffer. +map_coreop[".dumpcapture_1"] = function(params) + if not params then return "name" end + wflush() + local name = params[1] + if not match(name, "^[%a_][%w_]*$") then + wfatal("bad capture name `"..name.."'") + end + cap_used[name] = true + wline(function(out) + local buf = cap_buffers[name] + if buf then wdumplines(out, buf) end + end) + g_synclineno = 0 +end + +-- Dump all captures and their buffers (with -PP only). +local function dumpcaptures(out, lvl) + out:write("Captures:\n") + for name,buf in pairs(cap_buffers) do + out:write(format(" %-20s %4s)\n", name, "("..#buf)) + if lvl > 1 then + local bar = rep("=", 76) + out:write(" ", bar, "\n") + for _,line in ipairs(buf) do + out:write(" ", line, "\n") + end + out:write(" ", bar, "\n\n") + end + end + out:write("\n") +end + +-- Check for unfinished or unused captures. +local function checkcaptures() + if cap_name then + wprinterr(g_fname, ":", cap_lineno, + ": error: unfinished .capture `", cap_name,"'\n") + return + end + for name in pairs(cap_buffers) do + if not cap_used[name] then + wprinterr(g_fname, ":*: error: missing .dumpcapture ", name ,"\n") + end + end +end + +------------------------------------------------------------------------------ + +-- Sections names. +local map_sections = {} + +-- Pseudo-opcode to define code sections. +-- TODO: Data sections, BSS sections. Needs extra C code and API. +map_coreop[".section_*"] = function(params) + if not params then return "name..." end + if #map_sections > 0 then werror("duplicate section definition") end + wflush() + for sn,name in ipairs(params) do + local opname = "."..name.."_0" + if not match(name, "^[%a][%w_]*$") or + map_op[opname] or map_op["."..name.."_*"] then + werror("bad section name `"..name.."'") + end + map_sections[#map_sections+1] = name + wline(format("#define DASM_SECTION_%s\t%d", upper(name), sn-1)) + map_op[opname] = function(params) g_arch.section(sn-1) end + end + wline(format("#define DASM_MAXSECTION\t\t%d", #map_sections)) +end + +-- Dump all sections. +local function dumpsections(out, lvl) + out:write("Sections:\n") + for _,name in ipairs(map_sections) do + out:write(format(" %s\n", name)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Load architecture-specific module. +local function loadarch(arch) + if not match(arch, "^[%w_]+$") then return "bad arch name" end + local ok, m_arch = pcall(require, "dasm_"..arch) + if not ok then return "cannot load module: "..m_arch end + g_arch = m_arch + wflush = m_arch.passcb(wline, werror, wfatal, wwarn) + m_arch.setup(arch, g_opt) + map_op, map_def = m_arch.mergemaps(map_coreop, map_def) +end + +-- Dump architecture description. +function opt_map.dumparch(args) + local name = optparam(args) + if not g_arch then + local err = loadarch(name) + if err then opterror(err) end + end + + local t = {} + for name in pairs(map_coreop) do t[#t+1] = name end + for name in pairs(map_op) do t[#t+1] = name end + sort(t) + + local out = stdout + local _arch = g_arch._info + out:write(format("%s version %s, released %s, %s\n", + _info.name, _info.version, _info.release, _info.url)) + g_arch.dumparch(out) + + local pseudo = true + out:write("Pseudo-Opcodes:\n") + for _,sname in ipairs(t) do + local name, nparam = match(sname, "^(.+)_([0-9%*])$") + if name then + if pseudo and sub(name, 1, 1) ~= "." then + out:write("\nOpcodes:\n") + pseudo = false + end + local f = map_op[sname] + local s + if nparam ~= "*" then nparam = nparam + 0 end + if nparam == 0 then + s = "" + elseif type(f) == "string" then + s = map_op[".template__"](nil, f, nparam) + else + s = f(nil, nparam) + end + if type(s) == "table" then + for _,s2 in ipairs(s) do + out:write(format(" %-12s %s\n", name, s2)) + end + else + out:write(format(" %-12s %s\n", name, s)) + end + end + end + out:write("\n") + exit(0) +end + +-- Pseudo-opcode to set the architecture. +-- Only initially available (map_op is replaced when called). +map_op[".arch_1"] = function(params) + if not params then return "name" end + local err = loadarch(params[1]) + if err then wfatal(err) end +end + +-- Dummy .arch pseudo-opcode to improve the error report. +map_coreop[".arch_1"] = function(params) + if not params then return "name" end + wfatal("duplicate .arch statement") +end + +------------------------------------------------------------------------------ + +-- Dummy pseudo-opcode. Don't confuse '.nop' with 'nop'. +map_coreop[".nop_*"] = function(params) + if not params then return "[ignored...]" end +end + +-- Pseudo-opcodes to raise errors. +map_coreop[".error_1"] = function(params) + if not params then return "message" end + werror(params[1]) +end + +map_coreop[".fatal_1"] = function(params) + if not params then return "message" end + wfatal(params[1]) +end + +-- Dump all user defined elements. +local function dumpdef(out) + local lvl = g_opt.dumpdef + if lvl == 0 then return end + dumpsections(out, lvl) + dumpdefines(out, lvl) + if g_arch then g_arch.dumpdef(out, lvl) end + dumpmacros(out, lvl) + dumpcaptures(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Helper for splitstmt. +local splitlvl + +local function splitstmt_one(c) + if c == "(" then + splitlvl = ")"..splitlvl + elseif c == "[" then + splitlvl = "]"..splitlvl + elseif c == "{" then + splitlvl = "}"..splitlvl + elseif c == ")" or c == "]" or c == "}" then + if sub(splitlvl, 1, 1) ~= c then werror("unbalanced (), [] or {}") end + splitlvl = sub(splitlvl, 2) + elseif splitlvl == "" then + return " \0 " + end + return c +end + +-- Split statement into (pseudo-)opcode and params. +local function splitstmt(stmt) + -- Convert label with trailing-colon into .label statement. + local label = match(stmt, "^%s*(.+):%s*$") + if label then return ".label", {label} end + + -- Split at commas and equal signs, but obey parentheses and brackets. + splitlvl = "" + stmt = gsub(stmt, "[,%(%)%[%]{}]", splitstmt_one) + if splitlvl ~= "" then werror("unbalanced () or []") end + + -- Split off opcode. + local op, other = match(stmt, "^%s*([^%s%z]+)%s*(.*)$") + if not op then werror("bad statement syntax") end + + -- Split parameters. + local params = {} + for p in gmatch(other, "%s*(%Z+)%z?") do + params[#params+1] = gsub(p, "%s+$", "") + end + if #params > 16 then werror("too many parameters") end + + params.op = op + return op, params +end + +-- Process a single statement. +dostmt = function(stmt) + -- Ignore empty statements. + if match(stmt, "^%s*$") then return end + + -- Capture macro defs before substitution. + if mac_capture then return mac_capture(stmt) end + stmt = definesubst(stmt) + + -- Emit C code without parsing the line. + if sub(stmt, 1, 1) == "|" then + local tail = sub(stmt, 2) + wflush() + if sub(tail, 1, 2) == "//" then wcomment(tail) else wline(tail, true) end + return + end + + -- Split into (pseudo-)opcode and params. + local op, params = splitstmt(stmt) + + -- Get opcode handler (matching # of parameters or generic handler). + local f = map_op[op.."_"..#params] or map_op[op.."_*"] + if not f then + if not g_arch then wfatal("first statement must be .arch") end + -- Improve error report. + for i=0,9 do + if map_op[op.."_"..i] then + werror("wrong number of parameters for `"..op.."'") + end + end + werror("unknown statement `"..op.."'") + end + + -- Call opcode handler or special handler for template strings. + if type(f) == "string" then + map_op[".template__"](params, f) + else + f(params) + end +end + +-- Process a single line. +local function doline(line) + if g_opt.flushline then wflush() end + + -- Assembler line? + local indent, aline = match(line, "^(%s*)%|(.*)$") + if not aline then + -- No, plain C code line, need to flush first. + wflush() + wsync() + wline(line, false) + return + end + + g_indent = indent -- Remember current line indentation. + + -- Emit C code (even from macros). Avoids echo and line parsing. + if sub(aline, 1, 1) == "|" then + if not mac_capture then + wsync() + elseif g_opt.comment then + wsync() + wcomment(aline) + end + dostmt(aline) + return + end + + -- Echo assembler line as a comment. + if g_opt.comment then + wsync() + wcomment(aline) + end + + -- Strip assembler comments. + aline = gsub(aline, "//.*$", "") + + -- Split line into statements at semicolons. + if match(aline, ";") then + for stmt in gmatch(aline, "[^;]+") do dostmt(stmt) end + else + dostmt(aline) + end +end + +------------------------------------------------------------------------------ + +-- Write DynASM header. +local function dasmhead(out) + out:write(format([[ +/* +** This file has been pre-processed with DynASM. +** %s +** DynASM version %s, DynASM %s version %s +** DO NOT EDIT! The original file is in "%s". +*/ + +#if DASM_VERSION != %d +#error "Version mismatch between DynASM and included encoding engine" +#endif + +]], _info.url, + _info.version, g_arch._info.arch, g_arch._info.version, + g_fname, _info.vernum)) +end + +-- Read input file. +readfile = function(fin) + g_indent = "" + g_lineno = 0 + g_synclineno = -1 + + -- Process all lines. + for line in fin:lines() do + g_lineno = g_lineno + 1 + g_curline = line + local ok, err = pcall(doline, line) + if not ok and wprinterr(err, "\n") then return true end + end + wflush() + + -- Close input file. + assert(fin == stdin or fin:close()) +end + +-- Write output file. +local function writefile(outfile) + local fout + + -- Open output file. + if outfile == nil or outfile == "-" then + fout = stdout + else + fout = assert(io.open(outfile, "w")) + end + + -- Write all buffered lines + wdumplines(fout, g_wbuffer) + + -- Close output file. + assert(fout == stdout or fout:close()) + + -- Optionally dump definitions. + dumpdef(fout == stdout and stderr or stdout) +end + +-- Translate an input file to an output file. +local function translate(infile, outfile) + g_wbuffer = {} + g_indent = "" + g_lineno = 0 + g_synclineno = -1 + + -- Put header. + wline(dasmhead) + + -- Read input file. + local fin + if infile == "-" then + g_fname = "(stdin)" + fin = stdin + else + g_fname = infile + fin = assert(io.open(infile, "r")) + end + readfile(fin) + + -- Check for errors. + if not g_arch then + wprinterr(g_fname, ":*: error: missing .arch directive\n") + end + checkconds() + checkmacros() + checkcaptures() + + if g_errcount ~= 0 then + stderr:write(g_fname, ":*: info: ", g_errcount, " error", + (type(g_errcount) == "number" and g_errcount > 1) and "s" or "", + " in input file -- no output file generated.\n") + dumpdef(stderr) + exit(1) + end + + -- Write output file. + writefile(outfile) +end + +------------------------------------------------------------------------------ + +-- Print help text. +function opt_map.help() + stdout:write("DynASM -- ", _info.description, ".\n") + stdout:write("DynASM ", _info.version, " ", _info.release, " ", _info.url, "\n") + stdout:write[[ + +Usage: dynasm [OPTION]... INFILE.dasc|- + + -h, --help Display this help text. + -V, --version Display version and copyright information. + + -o, --outfile FILE Output file name (default is stdout). + -I, --include DIR Add directory to the include search path. + + -c, --ccomment Use /* */ comments for assembler lines. + -C, --cppcomment Use // comments for assembler lines (default). + -N, --nocomment Suppress assembler lines in output. + -M, --maccomment Show macro expansions as comments (default off). + + -L, --nolineno Suppress CPP line number information in output. + -F, --flushline Flush action list for every line. + + -E, --vserror Use Visual Studio style errors file(line) vs file:line + + -D NAME[=SUBST] Define a substitution. + -U NAME Undefine a substitution. + + -P, --dumpdef Dump defines, macros, etc. Repeat for more output. + -A, --dumparch ARCH Load architecture ARCH and dump description. +]] + exit(0) +end + +-- Print version information. +function opt_map.version() + stdout:write(format("%s version %s, released %s\n%s\n\n%s", + _info.name, _info.version, _info.release, _info.url, _info.copyright)) + exit(0) +end + +-- Misc. options. +function opt_map.outfile(args) g_opt.outfile = optparam(args) end +function opt_map.include(args) insert(g_opt.include, 1, optparam(args)) end +function opt_map.ccomment() g_opt.comment = "/*|"; g_opt.endcomment = " */" end +function opt_map.cppcomment() g_opt.comment = "//|"; g_opt.endcomment = "" end +function opt_map.nocomment() g_opt.comment = false end +function opt_map.maccomment() g_opt.maccomment = true end +function opt_map.nolineno() g_opt.cpp = false end +function opt_map.flushline() g_opt.flushline = true end +function opt_map.vserror() g_opt.vserror = true end +function opt_map.dumpdef() g_opt.dumpdef = g_opt.dumpdef + 1 end + +------------------------------------------------------------------------------ + +-- Short aliases for long options. +local opt_alias = { + h = "help", ["?"] = "help", V = "version", + o = "outfile", I = "include", + c = "ccomment", C = "cppcomment", N = "nocomment", M = "maccomment", + L = "nolineno", F = "flushline", + E = "vserror", + P = "dumpdef", A = "dumparch", +} + +-- Parse single option. +local function parseopt(opt, args) + opt_current = #opt == 1 and "-"..opt or "--"..opt + local f = opt_map[opt] or opt_map[opt_alias[opt]] + if not f then + opterror("unrecognized option `", opt_current, "'. Try `--help'.\n") + end + f(args) +end + +-- Parse arguments. +local function parseargs(args) + -- Default options. + g_opt.comment = "//|" + g_opt.endcomment = "" + g_opt.cpp = true + g_opt.dumpdef = 0 + g_opt.include = { "" } + + -- Process all option arguments. + args.argn = 1 + repeat + local a = args[args.argn] + if not a then break end + local lopt, opt = match(a, "^%-(%-?)(.+)") + if not opt then break end + args.argn = args.argn + 1 + if lopt == "" then + -- Loop through short options. + for o in gmatch(opt, ".") do parseopt(o, args) end + else + -- Long option. + parseopt(opt, args) + end + until false + + -- Check for proper number of arguments. + local nargs = #args - args.argn + 1 + if nargs ~= 1 then + if nargs == 0 then + if g_opt.dumpdef > 0 then return dumpdef(stdout) end + end + opt_map.help() + end + + -- Translate a single input file to a single output file + -- TODO: Handle multiple files? + translate(args[args.argn], g_opt.outfile) +end + +------------------------------------------------------------------------------ + +-- Add the directory dynasm.lua resides in to the Lua module search path. +local arg = arg +if arg and arg[0] then + local prefix = match(arg[0], "^(.*[/\\])") + if prefix then package.path = prefix.."?.lua;"..package.path end +end + +-- Start DynASM. +parseargs{...} + +------------------------------------------------------------------------------ + diff --git a/tools/luaffi/ffi.c b/tools/luaffi/ffi.c new file mode 100644 index 0000000..70c621d --- /dev/null +++ b/tools/luaffi/ffi.c @@ -0,0 +1,3334 @@ +/* vim: ts=4 sw=4 sts=4 et tw=78 + * Copyright (c) 2011 James R. McKaskill. See license in ffi.h + */ +#include "ffi.h" +#include +#include + +/* Set to 1 to get extra debugging on print */ +#define DEBUG_TOSTRING 0 + +int jit_key; +int ctype_mt_key; +int cdata_mt_key; +int callback_mt_key; +int cmodule_mt_key; +int constants_key; +int types_key; +int gc_key; +int callbacks_key; +int functions_key; +int abi_key; +int next_unnamed_key; +int niluv_key; +int asmname_key; + +void push_upval(lua_State* L, int* key) +{ + lua_pushlightuserdata(L, key); + lua_rawget(L, LUA_REGISTRYINDEX); +} + +void set_upval(lua_State* L, int* key) +{ + lua_pushlightuserdata(L, key); + lua_insert(L, -2); + lua_rawset(L, LUA_REGISTRYINDEX); +} + +int equals_upval(lua_State* L, int idx, int* key) +{ + int ret; + lua_pushvalue(L, idx); + push_upval(L, key); + ret = lua_rawequal(L, -2, -1); + lua_pop(L, 2); + return ret; +} + +struct jit* get_jit(lua_State* L) +{ + struct jit* jit; + push_upval(L, &jit_key); + jit = (struct jit*) lua_touserdata(L, -1); + jit->L = L; + lua_pop(L, 1); /* still in registry */ + return jit; +} + +static int type_error(lua_State* L, int idx, const char* to_type, int to_usr, const struct ctype* to_ct) +{ + luaL_Buffer B; + struct ctype ft; + + assert(to_type || (to_usr && to_ct)); + if (to_usr) { + to_usr = lua_absindex(L, to_usr); + } + + idx = lua_absindex(L, idx); + + luaL_buffinit(L, &B); + to_cdata(L, idx, &ft); + + if (ft.type != INVALID_TYPE) { + push_type_name(L, -1, &ft); + lua_pushfstring(L, "unable to convert argument %d from cdata<%s> to cdata<", idx, lua_tostring(L, -1)); + lua_remove(L, -2); + luaL_addvalue(&B); + } else { + lua_pushfstring(L, "unable to convert argument %d from lua<%s> to cdata<", idx, luaL_typename(L, idx)); + luaL_addvalue(&B); + } + + if (to_ct) { + push_type_name(L, to_usr, to_ct); + luaL_addvalue(&B); + } else { + luaL_addstring(&B, to_type); + } + + luaL_addchar(&B, '>'); + + luaL_pushresult(&B); + return lua_error(L); +} + +static int64_t check_intptr(lua_State* L, int idx, void* p, struct ctype* ct) +{ + if (ct->type == INVALID_TYPE) { + int64_t ret; + memset(ct, 0, sizeof(*ct)); + ct->base_size = 8; + ct->type = INT64_TYPE; + ct->is_defined = 1; + ret = luaL_checknumber(L, idx); + return ret; + + } else if (ct->pointers) { + return (intptr_t) p; + } + + switch (ct->type) { + case INTPTR_TYPE: + case FUNCTION_PTR_TYPE: + return *(intptr_t*) p; + + case INT64_TYPE: + return *(int64_t*) p; + + case INT32_TYPE: + return ct->is_unsigned ? (int64_t) *(uint32_t*) p : (int64_t) *(int32_t*) p; + + case INT16_TYPE: + return ct->is_unsigned ? (int64_t) *(uint16_t*) p : (int64_t) *(int16_t*) p; + + case INT8_TYPE: + return ct->is_unsigned ? (int64_t) *(uint8_t*) p : (int64_t) *(int8_t*) p; + + default: + type_error(L, idx, "intptr_t", 0, NULL); + return 0; + } +} + +#define TO_NUMBER(TYPE, ALLOW_POINTERS) \ + TYPE real = 0, imag = 0; \ + void* p; \ + struct ctype ct; \ + \ + switch (lua_type(L, idx)) { \ + case LUA_TBOOLEAN: \ + real = (TYPE) lua_toboolean(L, idx); \ + break; \ + \ + case LUA_TNUMBER: \ + real = (TYPE) lua_tonumber(L, idx); \ + break; \ + \ + case LUA_TSTRING: \ + if (!ALLOW_POINTERS) { \ + type_error(L, idx, #TYPE, 0, NULL); \ + } \ + real = (TYPE) (intptr_t) lua_tostring(L, idx); \ + break; \ + \ + case LUA_TLIGHTUSERDATA: \ + if (!ALLOW_POINTERS) { \ + type_error(L, idx, #TYPE, 0, NULL); \ + } \ + real = (TYPE) (intptr_t) lua_topointer(L, idx); \ + break; \ + \ + case LUA_TUSERDATA: \ + p = to_cdata(L, idx, &ct); \ + \ + if (ct.type == INVALID_TYPE) { \ + if (!ALLOW_POINTERS) { \ + type_error(L, idx, #TYPE, 0, NULL); \ + } \ + real = (TYPE) (intptr_t) p; \ + } else if (ct.pointers || ct.type == STRUCT_TYPE || ct.type == UNION_TYPE) {\ + if (!ALLOW_POINTERS) { \ + type_error(L, idx, #TYPE, 0, NULL); \ + } \ + real = (TYPE) (intptr_t) p; \ + } else if (ct.type == COMPLEX_DOUBLE_TYPE) { \ + real = (TYPE) creal(*(complex_double*) p); \ + imag = (TYPE) cimag(*(complex_double*) p); \ + } else if (ct.type == COMPLEX_FLOAT_TYPE) { \ + real = (TYPE) crealf(*(complex_float*) p); \ + imag = (TYPE) cimagf(*(complex_float*) p); \ + } else if (ct.type == DOUBLE_TYPE) { \ + real = (TYPE) *(double*) p; \ + } else if (ct.type == FLOAT_TYPE) { \ + real = (TYPE) *(float*) p; \ + } else { \ + real = check_intptr(L, idx, p, &ct); \ + } \ + lua_pop(L, 1); \ + break; \ + \ + case LUA_TNIL: \ + real = (TYPE) 0; \ + break; \ + \ + default: \ + type_error(L, idx, #TYPE, 0, NULL); \ + } \ + +static int64_t cast_int64(lua_State* L, int idx, int is_cast) +{ TO_NUMBER(int64_t, is_cast); (void) imag; return real; } + +static uint64_t cast_uint64(lua_State* L, int idx, int is_cast) +{ TO_NUMBER(uint64_t, is_cast); (void) imag; return real; } + +int32_t check_int32(lua_State* L, int idx) +{ return (int32_t) cast_int64(L, idx, 0); } + +uint32_t check_uint32(lua_State* L, int idx) +{ return (uint32_t) cast_uint64(L, idx, 0); } + +int64_t check_int64(lua_State* L, int idx) +{ return cast_int64(L, idx, 0); } + +uint64_t check_uint64(lua_State* L, int idx) +{ return cast_uint64(L, idx, 0); } + +static void do_check_double(lua_State* L, int idx, double* preal, double* pimag) +{ + TO_NUMBER(double, 0); + if (preal) *preal = real; + if (pimag) *pimag = imag; +} + +double check_double(lua_State* L, int idx) +{ double ret; do_check_double(L, idx, &ret, NULL); return ret; } + +float check_float(lua_State* L, int idx) +{ double ret; do_check_double(L, idx, &ret, NULL); return ret; } + +uintptr_t check_uintptr(lua_State* L, int idx) +{ TO_NUMBER(uintptr_t, 1); (void) imag; return real; } + +#ifdef HAVE_COMPLEX +complex_double check_complex_double(lua_State* L, int idx) +{ double real, imag; do_check_double(L, idx, &real, &imag); return real + imag * 1i; } + +complex_float check_complex_float(lua_State* L, int idx) +{ double real, imag; do_check_double(L, idx, &real, &imag); return real + imag * 1i; } + +#else +complex_double check_complex_double(lua_State* L, int idx) +{ + complex_double c; + do_check_double(L, idx, &c.real, &c.imag); + return c; +} + +complex_float check_complex_float(lua_State* L, int idx) +{ + complex_double d; + complex_float f; + do_check_double(L, idx, &d.real, &d.imag); + f.real = d.real; + f.imag = d.imag; + return f; +} +#endif + +static size_t unpack_vararg(lua_State* L, int i, char* to) +{ + void* p; + struct ctype ct; + + switch (lua_type(L, i)) { + case LUA_TBOOLEAN: + *(int*) to = lua_toboolean(L, i); + return sizeof(int); + + case LUA_TNUMBER: + *(double*) to = lua_tonumber(L, i); + return sizeof(double); + + case LUA_TSTRING: + *(const char**) to = lua_tostring(L, i); + return sizeof(const char*); + + case LUA_TLIGHTUSERDATA: + *(void**) to = lua_touserdata(L, i); + return sizeof(void*); + + case LUA_TUSERDATA: + p = to_cdata(L, i, &ct); + + if (ct.type == INVALID_TYPE) { + *(void**) to = p; + return sizeof(void*); + } + + lua_pop(L, 1); + + if (ct.pointers || ct.type == INTPTR_TYPE) { + *(void**) to = p; + return sizeof(void*); + + } else if (ct.type == INT32_TYPE) { + *(int32_t*) to = *(int32_t*) p; + return sizeof(int32_t); + + } else if (ct.type == INT64_TYPE) { + *(int64_t*) to = *(int64_t*) p; + return sizeof(int64_t); + } + goto err; + + case LUA_TNIL: + *(void**) to = NULL; + return sizeof(void*); + + default: + goto err; + } + +err: + return type_error(L, i, "vararg", 0, NULL); +} + +void unpack_varargs_stack(lua_State* L, int first, int last, char* to) +{ + int i; + + for (i = first; i <= last; i++) { + to += unpack_vararg(L, i, to); + } +} + +void unpack_varargs_stack_skip(lua_State* L, int first, int last, int ints_to_skip, int floats_to_skip, char* to) +{ + int i; + + for (i = first; i <= last; i++) { + int type = lua_type(L, i); + + if (type == LUA_TNUMBER && --floats_to_skip >= 0) { + continue; + } else if (type != LUA_TNUMBER && --ints_to_skip >= 0) { + continue; + } + + to += unpack_vararg(L, i, to); + } +} + +void unpack_varargs_float(lua_State* L, int first, int last, int max, char* to) +{ + int i; + + for (i = first; i <= last && max > 0; i++) { + if (lua_type(L, i) == LUA_TNUMBER) { + unpack_vararg(L, i, to); + to += sizeof(double); + max--; + } + } +} + +void unpack_varargs_int(lua_State* L, int first, int last, int max, char* to) +{ + int i; + + for (i = first; i <= last && max > 0; i++) { + if (lua_type(L, i) != LUA_TNUMBER) { + unpack_vararg(L, i, to); + to += sizeof(void*); + max--; + } + } +} + +void unpack_varargs_reg(lua_State* L, int first, int last, char* to) +{ + int i; + + for (i = first; i <= last; i++) { + unpack_vararg(L, i, to); + to += sizeof(double); + } +} + +/* to_enum tries to convert a value at idx to the enum type indicated by to_ct + * and uv to_usr. For strings this means it will do a string lookup for the + * enum type. It leaves the stack unchanged. Will throw an error if the type + * at idx can't be conerted. + */ +int32_t check_enum(lua_State* L, int idx, int to_usr, const struct ctype* to_ct) +{ + int32_t ret; + + switch (lua_type(L, idx)) { + case LUA_TSTRING: + /* lookup string in to_usr to find value */ + to_usr = lua_absindex(L, to_usr); + lua_pushvalue(L, idx); + lua_rawget(L, to_usr); + + if (lua_isnil(L, -1)) { + goto err; + } + + ret = (int32_t) lua_tointeger(L, -1); + lua_pop(L, 1); + return ret; + + case LUA_TUSERDATA: + return check_int32(L, idx); + + case LUA_TNIL: + return (int32_t) 0; + + case LUA_TNUMBER: + return (int32_t) lua_tointeger(L, idx); + + default: + goto err; + } + +err: + return type_error(L, idx, NULL, to_usr, to_ct); +} + +/* to_pointer tries converts a value at idx to a pointer. It fills out ct and + * pushes the uv of the found type. It will throw a lua error if it can not + * convert the value to a pointer. */ +static void* check_pointer(lua_State* L, int idx, struct ctype* ct) +{ + void* p; + memset(ct, 0, sizeof(*ct)); + ct->pointers = 1; + idx = lua_absindex(L, idx); + + switch (lua_type(L, idx)) { + case LUA_TNIL: + ct->type = VOID_TYPE; + ct->is_null = 1; + lua_pushnil(L); + return NULL; + + case LUA_TNUMBER: + ct->type = INTPTR_TYPE; + ct->is_unsigned = 1; + ct->pointers = 0; + lua_pushnil(L); + return (void*) (uintptr_t) lua_tonumber(L, idx); + + case LUA_TLIGHTUSERDATA: + ct->type = VOID_TYPE; + lua_pushnil(L); + return lua_touserdata(L, idx); + + case LUA_TSTRING: + ct->type = INT8_TYPE; + ct->is_unsigned = IS_CHAR_UNSIGNED; + ct->is_array = 1; + ct->base_size = 1; + ct->const_mask = 2; + lua_pushnil(L); + return (void*) lua_tolstring(L, idx, &ct->array_size); + + case LUA_TUSERDATA: + p = to_cdata(L, idx, ct); + + if (ct->type == INVALID_TYPE) { + /* some other type of user data */ + ct->type = VOID_TYPE; + return lua_touserdata(L, idx); + } else if (ct->type == STRUCT_TYPE || ct->type == UNION_TYPE) { + return p; + } else { + return (void*) (intptr_t) check_intptr(L, idx, p, ct); + } + break; + } + + type_error(L, idx, "pointer", 0, NULL); + return NULL; +} + +static int is_void_ptr(const struct ctype* ct) +{ + return ct->type == VOID_TYPE + && ct->pointers == 1; +} + +static int is_same_type(lua_State* L, int usr1, int usr2, const struct ctype* t1, const struct ctype* t2) +{ + if (t1->type != t2->type) { + return 0; + } + +#if LUA_VERSION_NUM == 501 + if (lua_isnil(L, usr1) != lua_isnil(L, usr2)) { + int ret; + usr1 = lua_absindex(L, usr1); + usr2 = lua_absindex(L, usr2); + push_upval(L, &niluv_key); + + ret = lua_rawequal(L, usr1, -1) + || lua_rawequal(L, usr2, -1); + + lua_pop(L, 1); + + if (ret) { + return 1; + } + } +#endif + + return lua_rawequal(L, usr1, usr2); +} + +static void set_struct(lua_State* L, int idx, void* to, int to_usr, const struct ctype* tt, int check_pointers); + +/* to_typed_pointer converts a value at idx to a type tt with target uv to_usr + * checking all types. May push a temporary value so that it can create + * structs on the fly. */ +void* check_typed_pointer(lua_State* L, int idx, int to_usr, const struct ctype* tt) +{ + struct ctype ft; + void* p; + + to_usr = lua_absindex(L, to_usr); + idx = lua_absindex(L, idx); + + if (tt->pointers == 1 && (tt->type == STRUCT_TYPE || tt->type == UNION_TYPE) && lua_type(L, idx) == LUA_TTABLE) { + /* need to construct a struct of the target type */ + struct ctype ct = *tt; + ct.pointers = ct.is_array = 0; + p = push_cdata(L, to_usr, &ct); + set_struct(L, idx, p, to_usr, &ct, 1); + return p; + } + + p = check_pointer(L, idx, &ft); + + if (tt->pointers == 1 && ft.pointers == 0 && (ft.type == STRUCT_TYPE || ft.type == UNION_TYPE)) { + /* auto dereference structs */ + ft.pointers = 1; + ft.const_mask <<= 1; + } + + if (is_void_ptr(tt)) { + /* any pointer can convert to void* */ + goto suc; + + } else if (ft.is_null) { + /* NULL can convert to any pointer */ + goto suc; + + } else if (!is_same_type(L, to_usr, -1, tt, &ft)) { + /* the base type is different */ + goto err; + + } else if (tt->pointers != ft.pointers) { + goto err; + + } else if (ft.const_mask & ~tt->const_mask) { + /* for every const in from it must be in to, there are further rules + * for const casting (see the c++ spec), but they are hard to test + * quickly */ + goto err; + } + +suc: + return p; + +err: + type_error(L, idx, NULL, to_usr, tt); + return NULL; +} + +/* to_cfunction converts a value at idx with usr table at to_usr and type tt + * into a function. Leaves the stack unchanged. */ +static cfunction check_cfunction(lua_State* L, int idx, int to_usr, const struct ctype* tt, int check_pointers) +{ + void* p; + struct ctype ft; + cfunction f; + int top = lua_gettop(L); + + idx = lua_absindex(L, idx); + to_usr = lua_absindex(L, to_usr); + + switch (lua_type(L, idx)) { + case LUA_TFUNCTION: + /* Function cdatas are pinned and must be manually cleaned up by + * calling func:free(). */ + push_upval(L, &callbacks_key); + f = compile_callback(L, idx, to_usr, tt); + lua_pushboolean(L, 1); + lua_rawset(L, -3); + lua_pop(L, 1); /* callbacks tbl */ + return f; + + case LUA_TNIL: + return NULL; + + case LUA_TLIGHTUSERDATA: + if (check_pointers) { + goto err; + } else { + return (cfunction) lua_touserdata(L, idx); + } + + case LUA_TUSERDATA: + p = to_cdata(L, idx, &ft); + assert(lua_gettop(L) == top + 1); + + if (ft.type == INVALID_TYPE) { + if (check_pointers) { + goto err; + } else { + lua_pop(L, 1); + return (cfunction) lua_touserdata(L, idx); + } + + } else if (ft.is_null) { + lua_pop(L, 1); + return NULL; + + } else if (!check_pointers && (ft.pointers || ft.type == INTPTR_TYPE)) { + lua_pop(L, 1); + return (cfunction) *(void**) p; + + } else if (ft.type != FUNCTION_PTR_TYPE) { + goto err; + + } else if (!check_pointers) { + lua_pop(L, 1); + return *(cfunction*) p; + + } else if (ft.calling_convention != tt->calling_convention) { + goto err; + + } else if (!is_same_type(L, -1, to_usr, &ft, tt)) { + goto err; + + } else { + lua_pop(L, 1); + return *(cfunction*) p; + } + + default: + goto err; + } + +err: + type_error(L, idx, NULL, to_usr, tt); + return NULL; +} + +/* to_type_cfunction converts a value at idx with uv at to_usr and type tt to + * a cfunction. Leaves the stack unchanged. */ +cfunction check_typed_cfunction(lua_State* L, int idx, int to_usr, const struct ctype* tt) +{ return check_cfunction(L, idx, to_usr, tt, 1); } + +static void set_value(lua_State* L, int idx, void* to, int to_usr, const struct ctype* tt, int check_pointers); + +static void set_array(lua_State* L, int idx, void* to, int to_usr, const struct ctype* tt, int check_pointers) +{ + size_t i, sz, esz; + struct ctype et; + + idx = lua_absindex(L, idx); + to_usr = lua_absindex(L, to_usr); + + switch (lua_type(L, idx)) { + case LUA_TSTRING: + if (tt->pointers == 1 && tt->type == INT8_TYPE) { + const char* str = lua_tolstring(L, idx, &sz); + + if (!tt->is_variable_array && sz >= tt->array_size) { + memcpy(to, str, tt->array_size); + } else { + /* include nul terminator */ + memcpy(to, str, sz+1); + } + } else { + goto err; + } + break; + + case LUA_TTABLE: + et = *tt; + et.pointers--; + et.const_mask >>= 1; + et.is_array = 0; + esz = et.pointers ? sizeof(void*) : et.base_size; + + lua_rawgeti(L, idx, 2); + + if (tt->is_variable_array) { + /* we have no idea how big the array is, so set values based off + * how many items were given to us */ + lua_pop(L, 1); + for (i = 0; i < lua_rawlen(L, idx); i++) { + lua_rawgeti(L, idx, (int) i + 1); + set_value(L, -1, (char*) to + esz * i, to_usr, &et, check_pointers); + lua_pop(L, 1); + } + + } else if (lua_isnil(L, -1)) { + /* there is no second element, so we set the whole array to the + * first element (or nil - ie 0) if there is no first element) */ + lua_pop(L, 1); + lua_rawgeti(L, idx, 1); + + if (lua_isnil(L, -1)) { + memset(to, 0, ctype_size(L, tt)); + } else { + /* if its still variable we have no idea how many values to set */ + for (i = 0; i < tt->array_size; i++) { + set_value(L, -1, (char*) to + esz * i, to_usr, &et, check_pointers); + } + } + + lua_pop(L, 1); + + } else { + /* there is a second element, so we set each element using the + * equiv index in the table initializer */ + lua_pop(L, 1); + for (i = 0; i < tt->array_size; i++) { + lua_rawgeti(L, idx, (int) (i+1)); + + if (lua_isnil(L, -1)) { + /* we've hit the end of the values provided in the + * initializer, so memset the rest to zero */ + lua_pop(L, 1); + memset((char*) to + esz * i, 0, (tt->array_size - i) * esz); + break; + + } else { + set_value(L, -1, (char*) to + esz * i, to_usr, &et, check_pointers); + lua_pop(L, 1); + } + } + } + break; + + default: + goto err; + } + + return; + +err: + type_error(L, idx, NULL, to_usr, tt); +} + +/* pops the member key from the stack, leaves the member user value on the + * stack. Returns the member offset. Returns -ve if the member can not be + * found. */ +static ptrdiff_t get_member(lua_State* L, int usr, const struct ctype* ct, struct ctype* mt) +{ + ptrdiff_t off; + lua_rawget(L, usr); + + if (lua_isnil(L, -1)) { + lua_pop(L, 1); + return -1; + } + + *mt = *(const struct ctype*) lua_touserdata(L, -1); + lua_getuservalue(L, -1); + lua_replace(L, -2); + + if (mt->is_variable_array && ct->variable_size_known) { + /* eg char mbr[?] */ + size_t sz = (mt->pointers > 1) ? sizeof(void*) : mt->base_size; + assert(ct->is_variable_struct && mt->is_array); + mt->array_size = ct->variable_increment / sz; + mt->is_variable_array = 0; + + } else if (mt->is_variable_struct && ct->variable_size_known) { + /* eg struct {char a; char b[?]} mbr; */ + assert(ct->is_variable_struct); + mt->variable_size_known = 1; + mt->variable_increment = ct->variable_increment; + } + + off = mt->offset; + mt->offset = 0; + return off; +} + +static void set_struct(lua_State* L, int idx, void* to, int to_usr, const struct ctype* tt, int check_pointers) +{ + int have_first = 0; + int have_other = 0; + struct ctype mt; + void* p; + + to_usr = lua_absindex(L, to_usr); + idx = lua_absindex(L, idx); + + switch (lua_type(L, idx)) { + case LUA_TTABLE: + /* match up to the members based off the table initializers key - this + * will match both numbered and named members in the user table + * we need a special case for when no entries in the initializer - + * zero initialize the c struct, and only one entry in the initializer + * - set all members to this value */ + memset(to, 0, ctype_size(L, tt)); + lua_pushnil(L); + while (lua_next(L, idx)) { + ptrdiff_t off; + + if (!have_first && lua_tonumber(L, -2) == 1 && lua_tonumber(L, -1) != 0) { + have_first = 1; + } else if (!have_other && (lua_type(L, -2) != LUA_TNUMBER || lua_tonumber(L, -2) != 1)) { + have_other = 1; + } + + lua_pushvalue(L, -2); + off = get_member(L, to_usr, tt, &mt); + assert(off >= 0); + set_value(L, -2, (char*) to + off, -1, &mt, check_pointers); + + /* initializer value, mt usr */ + lua_pop(L, 2); + } + + /* if we only had a single non zero value then initialize all members to that value */ + if (!have_other && have_first && tt->type != UNION_TYPE) { + size_t i, sz; + ptrdiff_t off; + lua_rawgeti(L, idx, 1); + sz = lua_rawlen(L, to_usr); + + for (i = 2; i < sz; i++) { + lua_pushnumber(L, i); + off = get_member(L, to_usr, tt, &mt); + assert(off >= 0); + set_value(L, -2, (char*) to + off, -1, &mt, check_pointers); + lua_pop(L, 1); /* mt usr */ + } + + lua_pop(L, 1); /* initializer table */ + } + break; + + case LUA_TUSERDATA: + if (check_pointers) { + p = check_typed_pointer(L, idx, to_usr, tt); + } else { + struct ctype ct; + p = check_pointer(L, idx, &ct); + } + memcpy(to, p, tt->base_size); + lua_pop(L, 1); + break; + + default: + goto err; + } + + return; + +err: + type_error(L, idx, NULL, to_usr, tt); +} + +static void set_value(lua_State* L, int idx, void* to, int to_usr, const struct ctype* tt, int check_pointers) +{ + int top = lua_gettop(L); + + if (tt->is_array) { + set_array(L, idx, to, to_usr, tt, check_pointers); + + } else if (tt->pointers) { + union { + uint8_t c[sizeof(void*)]; + void* p; + } u; + + if (lua_istable(L, idx)) { + luaL_error(L, "Can't set a pointer member to a struct that's about to be freed"); + } + + if (check_pointers) { + u.p = check_typed_pointer(L, idx, to_usr, tt); + } else { + struct ctype ct; + u.p = check_pointer(L, idx, &ct); + } + +#ifndef ALLOW_MISALIGNED_ACCESS + if ((uintptr_t) to & PTR_ALIGN_MASK) { + memcpy(to, u.c, sizeof(void*)); + } else +#endif + { + *(void**) to = u.p; + } + + lua_pop(L, 1); + + } else if (tt->is_bitfield) { + + uint64_t hi_mask = UINT64_C(0) - (UINT64_C(1) << (tt->bit_offset + tt->bit_size)); + uint64_t low_mask = (UINT64_C(1) << tt->bit_offset) - UINT64_C(1); + uint64_t val = check_uint64(L, idx); + val &= (UINT64_C(1) << tt->bit_size) - 1; + val <<= tt->bit_offset; + *(uint64_t*) to = val | (*(uint64_t*) to & (hi_mask | low_mask)); + + } else if (tt->type == STRUCT_TYPE || tt->type == UNION_TYPE) { + set_struct(L, idx, to, to_usr, tt, check_pointers); + + } else { + +#ifndef ALLOW_MISALIGNED_ACCESS + union { + uint8_t c[8]; + _Bool b; + uint64_t u64; + float f; + double d; + cfunction func; + } misalign; + + void* origto = to; + + if ((uintptr_t) origto & (tt->base_size - 1)) { + to = misalign.c; + } +#endif + + switch (tt->type) { + case BOOL_TYPE: + *(_Bool*) to = (cast_int64(L, idx, !check_pointers) != 0); + break; + case INT8_TYPE: + if (tt->is_unsigned) { + *(uint8_t*) to = (uint8_t) cast_uint64(L, idx, !check_pointers); + } else { + *(int8_t*) to = (int8_t) cast_int64(L, idx, !check_pointers); + } + break; + case INT16_TYPE: + if (tt->is_unsigned) { + *(uint16_t*) to = (uint16_t) cast_uint64(L, idx, !check_pointers); + } else { + *(int16_t*) to = (int16_t) cast_int64(L, idx, !check_pointers); + } + break; + case INT32_TYPE: + if (tt->is_unsigned) { + *(uint32_t*) to = (uint32_t) cast_uint64(L, idx, !check_pointers); + } else { + *(int32_t*) to = (int32_t) cast_int64(L, idx, !check_pointers); + } + break; + case INT64_TYPE: + if (tt->is_unsigned) { + *(uint64_t*) to = cast_uint64(L, idx, !check_pointers); + } else { + *(int64_t*) to = cast_int64(L, idx, !check_pointers); + } + break; + case FLOAT_TYPE: + *(float*) to = (float) check_double(L, idx); + break; + case DOUBLE_TYPE: + *(double*) to = check_double(L, idx); + break; + case COMPLEX_FLOAT_TYPE: + *(complex_float*) to = check_complex_float(L, idx); + break; + case COMPLEX_DOUBLE_TYPE: + *(complex_double*) to = check_complex_double(L, idx); + break; + case INTPTR_TYPE: + *(uintptr_t*) to = check_uintptr(L, idx); + break; + case ENUM_TYPE: + *(int32_t*) to = check_enum(L, idx, to_usr, tt); + break; + case FUNCTION_PTR_TYPE: + *(cfunction*) to = check_cfunction(L, idx, to_usr, tt, check_pointers); + break; + default: + goto err; + } + +#ifndef ALLOW_MISALIGNED_ACCESS + if ((uintptr_t) origto & (tt->base_size - 1)) { + memcpy(origto, misalign.c, tt->base_size); + } +#endif + } + + assert(lua_gettop(L) == top); + return; +err: + type_error(L, idx, NULL, to_usr, tt); +} + +static int ffi_typeof(lua_State* L) +{ + struct ctype ct; + check_ctype(L, 1, &ct); + push_ctype(L, -1, &ct); + return 1; +} + +static void setmintop(lua_State* L, int idx) +{ + if (lua_gettop(L) < idx) { + lua_settop(L, idx); + } +} + +/* warning: in the case that it finds an array size, it removes that index */ +static void get_variable_array_size(lua_State* L, int idx, struct ctype* ct) +{ + /* we only care about the variable buisness for the variable array + * directly ie ffi.new('char[?]') or the struct that contains the variable + * array ffi.new('struct {char v[?]}'). A pointer to the struct doesn't + * care about the variable size (it treats it as a zero sized array). */ + + if (ct->is_variable_array) { + assert(ct->is_array); + ct->array_size = (size_t) luaL_checknumber(L, idx); + ct->is_variable_array = 0; + lua_remove(L, idx); + + } else if (ct->is_variable_struct && !ct->variable_size_known) { + assert(ct->type == STRUCT_TYPE && !ct->is_array); + ct->variable_increment *= (size_t) luaL_checknumber(L, idx); + ct->variable_size_known = 1; + lua_remove(L, idx); + } +} + +static int try_set_value(lua_State* L) +{ + void* p = lua_touserdata(L, 2); + struct ctype* ct = (struct ctype*) lua_touserdata(L, 4); + int check_ptrs = lua_toboolean(L, 5); + set_value(L, 1, p, 3, ct, check_ptrs); + return 0; +} + +static int do_new(lua_State* L, int is_cast) +{ + int cargs, i; + void* p; + struct ctype ct; + int check_ptrs = !is_cast; + + check_ctype(L, 1, &ct); + + /* don't push a callback when we have a c function, as cb:set needs a + * compiled callback from a lua function to work */ + if (!ct.pointers && ct.type == FUNCTION_PTR_TYPE && (lua_isnil(L, 2) || lua_isfunction(L, 2))) { + /* Function cdatas are pinned and must be manually cleaned up by + * calling func:free(). */ + compile_callback(L, 2, -1, &ct); + push_upval(L, &callbacks_key); + lua_pushvalue(L, -2); + lua_pushboolean(L, 1); + lua_rawset(L, -3); + lua_pop(L, 1); /* callbacks tbl */ + return 1; + } + + /* this removes the vararg argument if its needed, and errors if its invalid */ + if (!is_cast) { + get_variable_array_size(L, 2, &ct); + } + + p = push_cdata(L, -1, &ct); + + /* if the user mt has a __gc function then call ffi.gc on this value */ + if (push_user_mt(L, -2, &ct)) { + push_upval(L, &gc_key); + lua_pushvalue(L, -3); + + /* user_mt.__gc */ + lua_pushliteral(L, "__gc"); + lua_rawget(L, -4); + + lua_rawset(L, -3); /* gc_upval[cdata] = user_mt.__gc */ + lua_pop(L, 2); /* user_mt and gc_upval */ + } + + /* stack is: + * ctype arg + * ctor args ... 0+ + * ctype usr + * cdata + */ + + cargs = lua_gettop(L) - 3; + + if (cargs == 0) { + return 1; + } + + if (cargs == 1) { + /* try packed form first + * packed: ffi.new('int[3]', {1}) + * unpacked: ffi.new('int[3]', 1) + */ + lua_pushcfunction(L, &try_set_value); + lua_pushvalue(L, 2); /* ctor arg */ + lua_pushlightuserdata(L, p); + lua_pushvalue(L, -5); /* ctype usr */ + lua_pushlightuserdata(L, &ct); + lua_pushboolean(L, check_ptrs); + + if (!lua_pcall(L, 5, 0, 0)) { + return 1; + } + + /* remove any errors */ + lua_settop(L, 4); + } + + /* if we have more than 2 ctor arguments then they must be unpacked, e.g. + * ffi.new('int[3]', 1, 2, 3) */ + lua_createtable(L, cargs, 0); + lua_replace(L, 1); + for (i = 1; i <= cargs; i++) { + lua_pushvalue(L, i + 1); + lua_rawseti(L, 1, i); + } + assert(lua_gettop(L) == cargs + 3); + set_value(L, 1, p, -2, &ct, check_ptrs); + + return 1; +} + +static int ffi_new(lua_State* L) +{ return do_new(L, 0); } + +static int ffi_cast(lua_State* L) +{ return do_new(L, 1); } + +static int ctype_new(lua_State* L) +{ return do_new(L, 0); } + +static int ctype_call(lua_State* L) +{ + struct ctype ct; + int top = lua_gettop(L); + + check_ctype(L, 1, &ct); + + if (push_user_mt(L, -1, &ct)) { + lua_pushstring(L, "__new"); + lua_rawget(L, -2); + if (!lua_isnil(L, -1)) { + lua_insert(L, 1); // function at bottom of stack under args + lua_pop(L, 2); + lua_call(L, top, 1); + return 1; + } + lua_pop(L, 2); + } + lua_pop(L, 1); + + assert(lua_gettop(L) == top); + return do_new(L, 0); +} + +static int ffi_sizeof(lua_State* L) +{ + struct ctype ct; + check_ctype(L, 1, &ct); + get_variable_array_size(L, 2, &ct); + lua_pushnumber(L, ctype_size(L, &ct)); + return 1; +} + +static int ffi_alignof(lua_State* L) +{ + struct ctype ct, mt; + lua_settop(L, 2); + check_ctype(L, 1, &ct); + + /* if no member is specified then we return the alignment of the type */ + if (lua_isnil(L, 2)) { + lua_pushnumber(L, ct.align_mask + 1); + return 1; + } + + /* get the alignment of the member */ + lua_pushvalue(L, 2); + if (get_member(L, -2, &ct, &mt) < 0) { + push_type_name(L, 3, &ct); + return luaL_error(L, "type %s has no member %s", lua_tostring(L, -1), lua_tostring(L, 2)); + } + + lua_pushnumber(L, mt.align_mask + 1); + return 1; +} + +static int ffi_offsetof(lua_State* L) +{ + ptrdiff_t off; + struct ctype ct, mt; + lua_settop(L, 2); + check_ctype(L, 1, &ct); + + lua_pushvalue(L, 2); + off = get_member(L, -2, &ct, &mt); /* this replaces the member key at -1 with the mbr usr value */ + if (off < 0) { + push_type_name(L, 3, &ct); + return luaL_error(L, "type %s has no member %s", lua_tostring(L, -1), lua_tostring(L, 2)); + } + + lua_pushnumber(L, off); + + if (!mt.is_bitfield) { + return 1; + } + + lua_pushnumber(L, mt.bit_offset); + lua_pushnumber(L, mt.bit_size); + return 3; +} + +static int ffi_istype(lua_State* L) +{ + struct ctype tt, ft; + check_ctype(L, 1, &tt); + to_cdata(L, 2, &ft); + + if (ft.type == INVALID_TYPE) { + goto fail; + } + + if (!is_same_type(L, 3, 4, &tt, &ft)) { + goto fail; + } + + if (tt.pointers != ft.pointers) { + goto fail; + } + + if (tt.is_array != ft.is_array) { + goto fail; + } + + if (tt.is_array && tt.array_size != ft.array_size) { + goto fail; + } + + if (tt.calling_convention != ft.calling_convention) { + goto fail; + } + + lua_pushboolean(L, 1); + return 1; + +fail: + lua_pushboolean(L, 0); + return 1; +} + +static int cdata_gc(lua_State* L) +{ + struct ctype ct; + check_cdata(L, 1, &ct); + lua_settop(L, 1); + + /* call the gc func if there is any registered */ + lua_pushvalue(L, 1); + lua_rawget(L, lua_upvalueindex(2)); + if (!lua_isnil(L, -1)) { + lua_pushvalue(L, 1); + lua_pcall(L, 1, 0, 0); + } + + /* unset the closure */ + lua_pushvalue(L, 1); + lua_pushnil(L); + lua_rawset(L, lua_upvalueindex(1)); + + return 0; +} + +static int callback_free(lua_State* L) +{ + cfunction* p = (cfunction*) lua_touserdata(L, 1); + free_code(get_jit(L), L, *p); + return 0; +} + +static int cdata_free(lua_State* L) +{ + struct ctype ct; + cfunction* p = (cfunction*) check_cdata(L, 1, &ct); + lua_settop(L, 1); + + /* unset the closure */ + lua_pushvalue(L, 1); + lua_pushnil(L); + lua_rawset(L, lua_upvalueindex(1)); + + if (ct.is_jitted) { + free_code(get_jit(L), L, *p); + *p = NULL; + } + + return 0; +} + +static int cdata_set(lua_State* L) +{ + struct ctype ct; + cfunction* p = (cfunction*) check_cdata(L, 1, &ct); + luaL_checktype(L, 2, LUA_TFUNCTION); + + if (!ct.is_jitted) { + luaL_error(L, "can't set the function for a non-lua callback"); + } + + if (*p == NULL) { + luaL_error(L, "can't set the function for a free'd callback"); + } + + push_func_ref(L, *p); + lua_pushvalue(L, 2); + lua_rawseti(L, -2, CALLBACK_FUNC_USR_IDX); + + /* remove the closure for this callback as it embeds the function pointer + * value */ + lua_pushvalue(L, 1); + lua_pushboolean(L, 1); + lua_rawset(L, lua_upvalueindex(1)); + + return 0; +} + +static int cdata_call(lua_State* L) +{ + struct ctype ct; + int top = lua_gettop(L); + cfunction* p = (cfunction*) check_cdata(L, 1, &ct); + + if (push_user_mt(L, -1, &ct)) { + lua_pushliteral(L, "__call"); + lua_rawget(L, -2); + + if (!lua_isnil(L, -1)) { + lua_insert(L, 1); + lua_pop(L, 2); /* ct_usr, user_mt */ + lua_call(L, lua_gettop(L) - 1, LUA_MULTRET); + return lua_gettop(L); + } + } + if (ct.pointers || ct.type != FUNCTION_PTR_TYPE) { + return luaL_error(L, "only function callbacks are callable"); + } + + lua_pushvalue(L, 1); + lua_rawget(L, lua_upvalueindex(1)); + + if (!lua_isfunction(L, -1)) { + lua_pop(L, 1); + compile_function(L, *p, -1, &ct); + + assert(lua_gettop(L) == top + 2); /* uv, closure */ + + /* closures[func] = closure */ + lua_pushvalue(L, 1); + lua_pushvalue(L, -2); + lua_rawset(L, lua_upvalueindex(1)); + + lua_replace(L, 1); + } else { + lua_replace(L, 1); + } + + lua_pop(L, 1); /* uv */ + assert(lua_gettop(L) == top); + + lua_call(L, lua_gettop(L) - 1, LUA_MULTRET); + return lua_gettop(L); +} + +static int user_mt_key; + +static int ffi_metatype(lua_State* L) +{ + struct ctype ct; + lua_settop(L, 2); + + check_ctype(L, 1, &ct); + if (lua_type(L, 2) != LUA_TTABLE && lua_type(L, 2) != LUA_TNIL) { + return luaL_argerror(L, 2, "metatable must be a table or nil"); + } + + lua_pushlightuserdata(L, &user_mt_key); + lua_pushvalue(L, 2); + lua_rawset(L, 3); /* user[user_mt_key] = mt */ + + /* return the passed in ctype */ + push_ctype(L, 3, &ct); + return 1; +} + +/* push_user_mt returns 1 if the type has a user metatable and pushes it onto + * the stack, otherwise it returns 0 and pushes nothing */ +int push_user_mt(lua_State* L, int ct_usr, const struct ctype* ct) +{ + if (ct->type != STRUCT_TYPE && ct->type != UNION_TYPE) { + return 0; + } + + ct_usr = lua_absindex(L, ct_usr); + lua_pushlightuserdata(L, &user_mt_key); + lua_rawget(L, ct_usr); + + if (lua_isnil(L, -1)) { + lua_pop(L, 1); + return 0; + } + return 1; +} + +static int ffi_gc(lua_State* L) +{ + struct ctype ct; + lua_settop(L, 2); + check_cdata(L, 1, &ct); + + push_upval(L, &gc_key); + lua_pushvalue(L, 1); + lua_pushvalue(L, 2); + lua_rawset(L, -3); + + /* return the cdata back */ + lua_settop(L, 1); + return 1; +} + +/* lookup_cdata_index returns the offset of the found type and user value on + * the stack if valid. Otherwise returns -ve and doesn't touch the stack. + */ +static ptrdiff_t lookup_cdata_index(lua_State* L, int idx, int ct_usr, struct ctype* ct) +{ + struct ctype mt; + ptrdiff_t off; + + ct_usr = lua_absindex(L, ct_usr); + + switch (lua_type(L, idx)) { + case LUA_TNUMBER: + /* possibilities are array, pointer */ + + if (!ct->pointers || is_void_ptr(ct)) { + return -1; + } + + ct->is_array = 0; + ct->pointers--; + ct->const_mask >>= 1; + + lua_pushvalue(L, ct_usr); + + return (ct->pointers ? sizeof(void*) : ct->base_size) * lua_tonumber(L, 2); + + case LUA_TSTRING: + /* possibilities are struct/union, pointer to struct/union */ + + if ((ct->type != STRUCT_TYPE && ct->type != UNION_TYPE) || ct->is_array || ct->pointers > 1) { + return -1; + } + + lua_pushvalue(L, idx); + off = get_member(L, ct_usr, ct, &mt); + if (off < 0) { + return -1; + } + + *ct = mt; + return off; + + default: + return -1; + } +} + +static int cdata_newindex(lua_State* L) +{ + struct ctype tt; + char* to; + ptrdiff_t off; + + lua_settop(L, 3); + + to = (char*) check_cdata(L, 1, &tt); + off = lookup_cdata_index(L, 2, -1, &tt); + + if (off < 0) { + if (!push_user_mt(L, -1, &tt)) { + goto err; + } + + lua_pushliteral(L, "__newindex"); + lua_rawget(L, -2); + + if (lua_isnil(L, -1)) { + goto err; + } + + lua_insert(L, 1); + lua_settop(L, 4); + lua_call(L, 3, LUA_MULTRET); + return lua_gettop(L); + } + + if (tt.const_mask & 1) { + return luaL_error(L, "can't set const data"); + } + + set_value(L, 3, to + off, -1, &tt, 1); + return 0; + +err: + push_type_name(L, 4, &tt); + return luaL_error(L, "type %s has no member %s", lua_tostring(L, -1), lua_tostring(L, 2)); +} + +static int cdata_index(lua_State* L) +{ + void* to; + struct ctype ct; + char* data; + ptrdiff_t off; + + lua_settop(L, 2); + data = (char*) check_cdata(L, 1, &ct); + assert(lua_gettop(L) == 3); + + if (!ct.pointers) { + switch (ct.type) { + case FUNCTION_PTR_TYPE: + /* Callbacks use the same metatable as standard cdata values, but have set + * and free members. So instead of mt.__index = mt, we do the equiv here. */ + lua_getmetatable(L, 1); + lua_pushvalue(L, 2); + lua_rawget(L, -2); + return 1; + + /* This provides the .re and .im virtual members */ + case COMPLEX_DOUBLE_TYPE: + case COMPLEX_FLOAT_TYPE: + if (!lua_isstring(L, 2)) { + luaL_error(L, "invalid member for complex number"); + + } else if (strcmp(lua_tostring(L, 2), "re") == 0) { + lua_pushnumber(L, ct.type == COMPLEX_DOUBLE_TYPE ? creal(*(complex_double*) data) : crealf(*(complex_float*) data)); + + } else if (strcmp(lua_tostring(L, 2), "im") == 0) { + lua_pushnumber(L, ct.type == COMPLEX_DOUBLE_TYPE ? cimag(*(complex_double*) data) : cimagf(*(complex_float*) data)); + + } else { + luaL_error(L, "invalid member for complex number"); + } + return 1; + } + } + + off = lookup_cdata_index(L, 2, -1, &ct); + + if (off < 0) { + assert(lua_gettop(L) == 3); + if (!push_user_mt(L, -1, &ct)) { + goto err; + } + + lua_pushliteral(L, "__index"); + lua_rawget(L, -2); + + if (lua_isnil(L, -1)) { + goto err; + } + + if (lua_istable(L, -1)) { + lua_pushvalue(L, 2); + lua_gettable(L, -2); + return 1; + } + + lua_insert(L, 1); + lua_settop(L, 3); + lua_call(L, 2, LUA_MULTRET); + return lua_gettop(L); + +err: + push_type_name(L, 3, &ct); + return luaL_error(L, "type %s has no member %s", lua_tostring(L, -1), lua_tostring(L, 2)); + } + + assert(lua_gettop(L) == 4); /* ct, key, ct_usr, mbr_usr */ + data += off; + + if (ct.is_array) { + /* push a reference to the array */ + ct.is_reference = 1; + to = push_cdata(L, -1, &ct); + *(void**) to = data; + return 1; + + } else if (ct.is_bitfield) { + + if (ct.type == INT64_TYPE) { + struct ctype rt; + uint64_t val = *(uint64_t*) data; + val >>= ct.bit_offset; + val &= (UINT64_C(1) << ct.bit_size) - 1; + + memset(&rt, 0, sizeof(rt)); + rt.base_size = 8; + rt.type = INT64_TYPE; + rt.is_unsigned = 1; + rt.is_defined = 1; + + to = push_cdata(L, 0, &rt); + *(uint64_t*) to = val; + + return 1; + + } else if (ct.type == BOOL_TYPE) { + uint64_t val = *(uint64_t*) data; + lua_pushboolean(L, (int) (val & (UINT64_C(1) << ct.bit_offset))); + return 1; + + } else { + uint64_t val = *(uint64_t*) data; + val >>= ct.bit_offset; + val &= (UINT64_C(1) << ct.bit_size) - 1; + lua_pushnumber(L, val); + return 1; + } + + } else if (ct.pointers) { +#ifndef ALLOW_MISALIGNED_ACCESS + union { + uint8_t c[8]; + void* p; + } misalignbuf; + + if ((uintptr_t) data & PTR_ALIGN_MASK) { + memcpy(misalignbuf.c, data, sizeof(void*)); + data = misalignbuf.c; + } +#endif + to = push_cdata(L, -1, &ct); + *(void**) to = *(void**) data; + return 1; + + } else if (ct.type == STRUCT_TYPE || ct.type == UNION_TYPE) { + /* push a reference to the member */ + ct.is_reference = 1; + to = push_cdata(L, -1, &ct); + *(void**) to = data; + return 1; + + } else if (ct.type == FUNCTION_PTR_TYPE) { + cfunction* pf = (cfunction*) push_cdata(L, -1, &ct); + *pf = *(cfunction*) data; + return 1; + + } else { +#ifndef ALLOW_MISALIGNED_ACCESS + union { + uint8_t c[8]; + double d; + float f; + uint64_t u64; + } misalignbuf; + + assert(ct.base_size <= 8); + + if ((uintptr_t) data & (ct.base_size - 1)) { + memcpy(misalignbuf.c, data, ct.base_size); + data = misalignbuf.c; + } +#endif + + switch (ct.type) { + case BOOL_TYPE: + lua_pushboolean(L, *(_Bool*) data); + break; + case INT8_TYPE: + lua_pushnumber(L, ct.is_unsigned ? (lua_Number) *(uint8_t*) data : (lua_Number) *(int8_t*) data); + break; + case INT16_TYPE: + lua_pushnumber(L, ct.is_unsigned ? (lua_Number) *(uint16_t*) data : (lua_Number) *(int16_t*) data); + break; + case ENUM_TYPE: + case INT32_TYPE: + lua_pushnumber(L, ct.is_unsigned ? (lua_Number) *(uint32_t*) data : (lua_Number) *(int32_t*) data); + break; + case INT64_TYPE: + to = push_cdata(L, -1, &ct); + *(int64_t*) to = *(int64_t*) data; + break; + case INTPTR_TYPE: + to = push_cdata(L, -1, &ct); + *(intptr_t*) to = *(intptr_t*) data; + break; + case FLOAT_TYPE: + lua_pushnumber(L, *(float*) data); + break; + case DOUBLE_TYPE: + lua_pushnumber(L, *(double*) data); + break; + default: + luaL_error(L, "internal error: invalid member type"); + } + + return 1; + } +} + +static complex_double check_complex(lua_State* L, int idx, void* p, struct ctype* ct) +{ + if (ct->type == INVALID_TYPE) { + double d = luaL_checknumber(L, idx); +#ifdef HAVE_COMPLEX + return d; +#else + complex_double c; + c.real = d; + c.imag = 0; + return c; +#endif + } else if (ct->type == COMPLEX_DOUBLE_TYPE) { + return *(complex_double*) p; + } else if (ct->type == COMPLEX_FLOAT_TYPE) { + complex_float* f = (complex_float*) p; +#ifdef HAVE_COMPLEX + return *f; +#else + complex_double d; + d.real = f->real; + d.imag = f->imag; + return d; +#endif + } else { + complex_double dummy; + type_error(L, idx, "complex", 0, NULL); + memset(&dummy, 0, sizeof(dummy)); + return dummy; + } +} + +static int rank(const struct ctype* ct) +{ + if (ct->pointers) { + return 5; + } + + switch (ct->type) { + case COMPLEX_DOUBLE_TYPE: + return 7; + case COMPLEX_FLOAT_TYPE: + return 6; + case INTPTR_TYPE: + return sizeof(intptr_t) >= sizeof(int64_t) ? 4 : 1; + case INT64_TYPE: + return ct->is_unsigned ? 3 : 2; + case INT32_TYPE: + case INT16_TYPE: + case INT8_TYPE: + return 2; + default: + return 0; + } +} + +static void push_complex(lua_State* L, complex_double res, int ct_usr, const struct ctype* ct) +{ + if (ct->type == COMPLEX_DOUBLE_TYPE) { + complex_double* p = (complex_double*) push_cdata(L, ct_usr, ct); + *p = res; + } else { + complex_float* p = (complex_float*) push_cdata(L, ct_usr, ct); +#ifdef HAVE_COMPLEX + *p = (complex float) res; +#else + p->real = (float) res.real; + p->imag = (float) res.imag; +#endif + } +} + +static void push_number(lua_State* L, int64_t val, int ct_usr, const struct ctype* ct) +{ + if ((ct->pointers || ct->type == INTPTR_TYPE) && sizeof(intptr_t) != sizeof(int64_t)) { + intptr_t* p = (intptr_t*) push_cdata(L, ct_usr, ct); + *p = val; + } else { + int64_t* p = (int64_t*) push_cdata(L, ct_usr, ct); + *p = val; + } +} + +static int call_user_op(lua_State* L, const char* opfield, int idx, int ct_usr, const struct ctype* ct) +{ + idx = lua_absindex(L, idx); + + if (push_user_mt(L, ct_usr, ct)) { + lua_pushstring(L, opfield); + lua_rawget(L, -2); + if (!lua_isnil(L, -1)) { + int top = lua_gettop(L); + lua_pushvalue(L, idx); + lua_call(L, 1, LUA_MULTRET); + return lua_gettop(L) - top + 1; + } + lua_pop(L, 2); + } + return -1; +} + +static int cdata_unm(lua_State* L) +{ + struct ctype ct; + void* p; + int64_t val; + int ret; + + lua_settop(L, 1); + p = to_cdata(L, 1, &ct); + + ret = call_user_op(L, "__unm", 1, 2, &ct); + if (ret >= 0) { + return ret; + } + + val = check_intptr(L, 1, p, &ct); + + if (ct.pointers) { + luaL_error(L, "can't negate a pointer value"); + } else { + memset(&ct, 0, sizeof(ct)); + ct.type = INT64_TYPE; + ct.base_size = 8; + ct.is_defined = 1; + push_number(L, -val, 0, &ct); + } + + return 1; +} + +/* returns -ve if no binop was called otherwise returns the number of return + * arguments */ +static int call_user_binop(lua_State* L, const char* opfield, int lidx, int lusr, const struct ctype* lt, int ridx, int rusr, const struct ctype* rt) +{ + lidx = lua_absindex(L, lidx); + ridx = lua_absindex(L, ridx); + + if (push_user_mt(L, lusr, lt)) { + lua_pushstring(L, opfield); + lua_rawget(L, -2); + + if (!lua_isnil(L, -1)) { + int top = lua_gettop(L); + lua_pushvalue(L, lidx); + lua_pushvalue(L, ridx); + lua_call(L, 2, LUA_MULTRET); + return lua_gettop(L) - top + 1; + } + + lua_pop(L, 2); /* user_mt and user_mt.op */ + } + + if (push_user_mt(L, rusr, rt)) { + lua_pushstring(L, opfield); + lua_rawget(L, -2); + + if (!lua_isnil(L, -1)) { + int top = lua_gettop(L); + lua_pushvalue(L, lidx); + lua_pushvalue(L, ridx); + lua_call(L, 2, LUA_MULTRET); + return lua_gettop(L) - top + 1; + } + + lua_pop(L, 2); /* user_mt and user_mt.op */ + } + + return -1; +} + +static int cdata_concat(lua_State* L) +{ + struct ctype lt, rt; + int ret; + + lua_settop(L, 2); + to_cdata(L, 1, <); + to_cdata(L, 2, &rt); + + ret = call_user_binop(L, "__concat", 1, 3, <, 2, 4, &rt); + if (ret >= 0) { + return ret; + } + + return luaL_error(L, "NYI"); +} + +static int cdata_len(lua_State* L) +{ + struct ctype ct; + int ret; + + lua_settop(L, 1); + to_cdata(L, 1, &ct); + + ret = call_user_op(L, "__len", 1, 2, &ct); + if (ret >= 0) { + return ret; + } + + push_type_name(L, 2, &ct); + return luaL_error(L, "type %s does not implement the __len metamethod", lua_tostring(L, -1)); +} + +static int cdata_pairs(lua_State* L) +{ + struct ctype ct; + int ret; + + lua_settop(L, 1); + to_cdata(L, 1, &ct); + + ret = call_user_op(L, "__pairs", 1, 2, &ct); + if (ret >= 0) { + return ret; + } + + push_type_name(L, 2, &ct); + return luaL_error(L, "type %s does not implement the __pairs metamethod", lua_tostring(L, -1)); +} + +static int cdata_ipairs(lua_State* L) +{ + struct ctype ct; + int ret; + + lua_settop(L, 1); + to_cdata(L, 1, &ct); + + ret = call_user_op(L, "__ipairs", 1, 2, &ct); + if (ret >= 0) { + return ret; + } + + push_type_name(L, 2, &ct); + return luaL_error(L, "type %s does not implement the __ipairs metamethod", lua_tostring(L, -1)); +} + +static int cdata_add(lua_State* L) +{ + struct ctype lt, rt, ct; + void *lp, *rp; + int ct_usr; + int ret; + + lua_settop(L, 2); + + lp = to_cdata(L, 1, <); + rp = to_cdata(L, 2, &rt); + assert(lua_gettop(L) == 4); + + ret = call_user_binop(L, "__add", 1, 3, <, 2, 4, &rt); + if (ret >= 0) { + return ret; + } + assert(lua_gettop(L) == 4); + + ct_usr = rank(<) > rank(&rt) ? 3 : 4; + ct = rank(<) > rank(&rt) ? lt : rt; + + if (IS_COMPLEX(ct.type)) { + complex_double left, right, res; + + left = check_complex(L, 1, lp, <); + right = check_complex(L, 2, rp, &rt); + assert(lua_gettop(L) == 4); + +#ifdef HAVE_COMPLEX + res = left + right; +#else + res.real = left.real + right.real; + res.imag = left.imag + right.imag; +#endif + + push_complex(L, res, ct_usr, &ct); + return 1; + + } else { + int64_t left = check_intptr(L, 1, lp, <); + int64_t right = check_intptr(L, 2, rp, &rt); + assert(lua_gettop(L) == 4); + + /* note due to 2s complement it doesn't matter if we do the addition as int or uint, + * but the result needs to be uint64_t if either of the sources are */ + + if (lt.pointers && rt.pointers) { + luaL_error(L, "can't add two pointers"); + + } else if (lt.pointers) { + int64_t res = left + (lt.pointers > 1 ? sizeof(void*) : lt.base_size) * right; + lt.is_array = 0; + push_number(L, res, 3, <); + + } else if (rt.pointers) { + int64_t res = right + (rt.pointers > 1 ? sizeof(void*) : rt.base_size) * left; + rt.is_array = 0; + push_number(L, res, 4, &rt); + + } else { + push_number(L, left + right, ct_usr, &ct); + } + + return 1; + } +} + +static int cdata_sub(lua_State* L) +{ + struct ctype lt, rt, ct; + void *lp, *rp; + int ct_usr; + int ret; + + lua_settop(L, 2); + + lp = to_cdata(L, 1, <); + rp = to_cdata(L, 2, &rt); + + ret = call_user_binop(L, "__sub", 1, 3, <, 2, 4, &rt); + if (ret >= 0) { + return ret; + } + + ct_usr = rank(<) > rank(&rt) ? 3 : 4; + ct = rank(<) > rank(&rt) ? lt : rt; + + if (IS_COMPLEX(ct.type)) { + complex_double left, right, res; + + left = check_complex(L, 1, lp, <); + right = check_complex(L, 2, rp, &rt); + +#ifdef HAVE_COMPLEX + res = left - right; +#else + res.real = left.real - right.real; + res.imag = left.imag - right.imag; +#endif + + push_complex(L, res, ct_usr, &ct); + return 1; + + } else { + int64_t left = check_intptr(L, 1, lp, <); + int64_t right = check_intptr(L, 2, rp, &rt); + + if (rt.pointers) { + luaL_error(L, "NYI: can't subtract a pointer value"); + + } else if (lt.pointers) { + int64_t res = left - (lt.pointers > 1 ? sizeof(void*) : lt.base_size) * right; + lt.is_array = 0; + push_number(L, res, 3, <); + + } else { + int64_t res = left - right; + push_number(L, res, ct_usr, &ct); + } + + return 1; + } +} + +/* TODO fix for unsigned */ +#define NUMBER_ONLY_BINOP(OPSTR, DO_NORMAL, DO_COMPLEX) \ + struct ctype lt, rt, ct; \ + void *lp, *rp; \ + int ct_usr; \ + int ret; \ + \ + lua_settop(L, 2); \ + \ + lp = to_cdata(L, 1, <); \ + rp = to_cdata(L, 2, &rt); \ + \ + ret = call_user_binop(L, OPSTR, 1, 3, <, 2, 4, &rt); \ + if (ret >= 0) { \ + return ret; \ + } \ + \ + ct_usr = rank(<) > rank(&rt) ? 3 : 4; \ + ct = rank(<) > rank(&rt) ? lt : rt; \ + \ + if (IS_COMPLEX(ct.type)) { \ + complex_double res; \ + complex_double left = check_complex(L, 1, lp, <); \ + complex_double right = check_complex(L, 2, rp, &rt); \ + \ + DO_COMPLEX(left, right, res); \ + push_complex(L, res, ct_usr, &ct); \ + \ + } else if (lt.pointers || rt.pointers) { \ + luaL_error(L, "can't operate on a pointer value"); \ + \ + } else { \ + int64_t res; \ + int64_t left = check_intptr(L, 1, lp, <); \ + int64_t right = check_intptr(L, 2, rp, &rt); \ + \ + DO_NORMAL(left, right, res); \ + push_number(L, res, ct_usr, &ct); \ + } \ + \ + return 1 + +#define MUL(l,r,s) s = l * r +#define DIV(l,r,s) s = l / r +#define MOD(l,r,s) s = l % r +#define POW(l,r,s) s = pow(l, r) + +#ifdef HAVE_COMPLEX +#define MULC(l,r,s) s = l * r +#define DIVC(l,r,s) s = l / r +#define MODC(l,r,s) (void) l, (void) r, memset(&s, 0, sizeof(s)), luaL_error(L, "NYI: complex mod") +#define POWC(l,r,s) s = cpow(l, r) +#else +#define MULC(l,r,s) s.real = l.real * r.real - l.imag * r.imag, s.imag = l.real * r.imag + l.imag * r.real +#define DIVC(l,r,s) s.real = (l.real * r.real + l.imag * r.imag) / (r.real * r.real + r.imag * r.imag), \ + s.imag = (l.imag * r.real - l.real * r.imag) / (r.real * r.real + r.imag * r.imag) +#define MODC(l,r,s) (void) l, (void) r, memset(&s, 0, sizeof(s)), luaL_error(L, "NYI: complex mod") +#define POWC(l,r,s) (void) l, (void) r, memset(&s, 0, sizeof(s)), luaL_error(L, "NYI: complex pow") +#endif + +static int cdata_mul(lua_State* L) +{ NUMBER_ONLY_BINOP("__mul", MUL, MULC); } + +static int cdata_div(lua_State* L) +{ NUMBER_ONLY_BINOP("__div", DIV, DIVC); } + +static int cdata_mod(lua_State* L) +{ NUMBER_ONLY_BINOP("__mod", MOD, MODC); } + +static int cdata_pow(lua_State* L) +{ NUMBER_ONLY_BINOP("__pow", POW, POWC); } + +#define COMPARE_BINOP(OPSTR, OP, OPC) \ + struct ctype lt, rt; \ + void *lp, *rp; \ + int ret, res; \ + \ + lua_settop(L, 2); \ + \ + lp = to_cdata(L, 1, <); \ + rp = to_cdata(L, 2, &rt); \ + \ + ret = call_user_binop(L, OPSTR, 1, 3, <, 2, 4, &rt); \ + if (ret >= 0) { \ + return ret; \ + } \ + \ + if (IS_COMPLEX(lt.type) || IS_COMPLEX(rt.type)) { \ + complex_double left = check_complex(L, 1, lp, <); \ + complex_double right = check_complex(L, 2, rp, &rt); \ + \ + res = OPC(left, right); \ + \ + lua_pushboolean(L, res); \ + \ + } else { \ + int64_t left = check_intptr(L, 1, lp, <); \ + int64_t right = check_intptr(L, 2, rp, &rt); \ + \ + if (lt.pointers && rt.pointers) { \ + if (is_void_ptr(<) || is_void_ptr(&rt) || is_same_type(L, 3, 4, <, &rt)) { \ + res = OP((uint64_t) left, (uint64_t) right); \ + } else { \ + goto err; \ + } \ + \ + } else if (lt.is_null && rt.type == FUNCTION_PTR_TYPE) { \ + res = OP((uint64_t) left, (uint64_t) right); \ + \ + } else if (rt.is_null && lt.type == FUNCTION_PTR_TYPE) { \ + res = OP((uint64_t) left, (uint64_t) right); \ + \ + } else if (lt.pointers && rt.type == INTPTR_TYPE && rt.is_unsigned) {\ + res = OP((uint64_t) left, (uint64_t) right); \ + \ + } else if (rt.pointers && lt.type == INTPTR_TYPE && lt.is_unsigned) {\ + res = OP((uint64_t) left, (uint64_t) right); \ + \ + } else if (rt.pointers || lt.pointers) { \ + goto err; \ + \ + } else if (lt.is_unsigned && rt.is_unsigned) { \ + res = OP((uint64_t) left, (uint64_t) right); \ + \ + } else if (lt.is_unsigned) { \ + res = OP((int64_t) (uint64_t) left, right); \ + \ + } else if (rt.is_unsigned) { \ + res = OP(left, (int64_t) (uint64_t) right); \ + \ + } else { \ + res = OP(left, right); \ + } \ + \ + lua_pushboolean(L, res); \ + } \ + return 1 + +#define EQ(l, r) (l) == (r) +#define LT(l, r) (l) < (r) +#define LE(l, r) (l) <= (r) + +#ifdef HAVE_COMPLEX +#define EQC(l, r) (l) == (r) +#else +#define EQC(l, r) (l).real == (r).real && (l).imag == (r).imag +#endif + +#define LEC(l, r) EQC(l, r), luaL_error(L, "complex numbers are non-orderable") +#define LTC(l, r) EQC(l, r), luaL_error(L, "complex numbers are non-orderable") + +static int cdata_eq(lua_State* L) +{ + COMPARE_BINOP("__eq", EQ, EQC); +err: + lua_pushboolean(L, 0); + return 1; +} + +static int cdata_lt(lua_State* L) +{ + COMPARE_BINOP("__lt", LT, LTC); +err: + lua_getuservalue(L, 1); + lua_getuservalue(L, 2); + push_type_name(L, -2, <); + push_type_name(L, -2, <); + return luaL_error(L, "trying to compare incompatible types %s and %s", lua_tostring(L, -2), lua_tostring(L, -1)); +} + +static int cdata_le(lua_State* L) +{ + COMPARE_BINOP("__le", LE, LEC); +err: + lua_getuservalue(L, 1); + lua_getuservalue(L, 2); + push_type_name(L, -2, <); + push_type_name(L, -2, <); + return luaL_error(L, "trying to compare incompatible types %s and %s", lua_tostring(L, -2), lua_tostring(L, -1)); +} + +static const char* etype_tostring(int type) +{ + switch (type) { + case VOID_TYPE: return "void"; + case DOUBLE_TYPE: return "double"; + case FLOAT_TYPE: return "float"; + case COMPLEX_DOUBLE_TYPE: return "complex double"; + case COMPLEX_FLOAT_TYPE: return "complex float"; + case BOOL_TYPE: return "bool"; + case INT8_TYPE: return "int8"; + case INT16_TYPE: return "int16"; + case INT32_TYPE: return "int32"; + case INT64_TYPE: return "int64"; + case INTPTR_TYPE: return "intptr"; + case ENUM_TYPE: return "enum"; + case UNION_TYPE: return "union"; + case STRUCT_TYPE: return "struct"; + case FUNCTION_PTR_TYPE: return "function ptr"; + case FUNCTION_TYPE: return "function"; + default: return "invalid"; + } +} + +static void print_type(lua_State* L, const struct ctype* ct) +{ + lua_pushfstring(L, " sz %d %d %d align %d ptr %d %d %d type %s%s %d %d %d name %d call %d %d var %d %d %d bit %d %d %d %d jit %d", + /* sz */ + ct->base_size, + ct->array_size, + ct->offset, + /* align */ + ct->align_mask, + /* ptr */ + ct->is_array, + ct->pointers, + ct->const_mask, + /* type */ + ct->is_unsigned ? "u" : "", + etype_tostring(ct->type), + ct->is_reference, + ct->is_defined, + ct->is_null, + /* name */ + ct->has_member_name, + /* call */ + ct->calling_convention, + ct->has_var_arg, + /* var */ + ct->is_variable_array, + ct->is_variable_struct, + ct->variable_size_known, + /* bit */ + ct->is_bitfield, + ct->has_bitfield, + ct->bit_offset, + ct->bit_size, + /* jit */ + ct->is_jitted); +} + +static int ctype_tostring(lua_State* L) +{ + struct ctype ct; + assert(lua_type(L, 1) == LUA_TUSERDATA); + lua_settop(L, 1); + check_ctype(L, 1, &ct); + assert(lua_gettop(L) == 2); + push_type_name(L, -1, &ct); + lua_pushfstring(L, "ctype<%s> %p", lua_tostring(L, -1), lua_topointer(L, 1)); + + if (DEBUG_TOSTRING) { + print_type(L, &ct); + lua_concat(L, 2); + } + + return 1; +} + +static int cdata_tostring(lua_State* L) +{ + struct ctype ct; + char buf[64]; + void* p; + int ret; + + lua_settop(L, 1); + p = to_cdata(L, 1, &ct); + + ret = call_user_op(L, "__tostring", 1, 2, &ct); + if (ret >= 0) { + return ret; + } + + if (ct.pointers > 0 || ct.type == STRUCT_TYPE || ct.type == UNION_TYPE) { + push_type_name(L, -1, &ct); + lua_pushfstring(L, "cdata<%s>: %p", lua_tostring(L, -1), p); + + if (DEBUG_TOSTRING) { + print_type(L, &ct); + lua_concat(L, 2); + } + + return 1; + } + + switch (ct.type) { + case COMPLEX_DOUBLE_TYPE: + { + complex_double c = *(complex_double*) p; + if (cimag(c) != 0) { + lua_pushfstring(L, "%f+%fi", creal(c), cimag(c)); + } else { + lua_pushfstring(L, "%f", creal(c)); + } + } + return 1; + + case COMPLEX_FLOAT_TYPE: + { + complex_float c = *(complex_float*) p; + if (cimagf(c) != 0) { + lua_pushfstring(L, "%f+%fi", crealf(c), cimagf(c)); + } else { + lua_pushfstring(L, "%f", crealf(c)); + } + } + return 1; + + case FUNCTION_PTR_TYPE: + p = *(void**) p; + push_type_name(L, -1, &ct); + lua_pushfstring(L, "cdata<%s>: %p", lua_tostring(L, -1), *(void**) p); + return 1; + + case INTPTR_TYPE: + lua_pushfstring(L, "%p", *(uintptr_t*) p); + return 1; + + case INT64_TYPE: + sprintf(buf, ct.is_unsigned ? "%"PRIu64 : "%"PRId64, *(uint64_t*) p); + lua_pushstring(L, buf); + return 1; + + default: + sprintf(buf, ct.is_unsigned ? "%"PRId64 : "%"PRId64, (int64_t) check_intptr(L, 1, p, &ct)); + lua_pushstring(L, buf); + return 1; + } +} + +static int ffi_errno(lua_State* L) +{ + struct jit* jit = get_jit(L); + + if (!lua_isnoneornil(L, 1)) { + lua_pushnumber(L, jit->last_errno); + jit->last_errno = luaL_checknumber(L, 1); + } else { + lua_pushnumber(L, jit->last_errno); + } + + return 1; +} + +static int ffi_number(lua_State* L) +{ + struct ctype ct; + void* data = to_cdata(L, 1, &ct); + + if (ct.type != INVALID_TYPE) { + lua_pushnumber(L, check_intptr(L, 1, data, &ct)); + return 1; + } else { + /* call the old _G.tonumber, we use an upvalue as _G.tonumber is set + * to this function */ + lua_pushvalue(L, lua_upvalueindex(1)); + lua_insert(L, 1); + lua_call(L, lua_gettop(L)-1, LUA_MULTRET); + return lua_gettop(L); + } +} + +static int ffi_string(lua_State* L) +{ + struct ctype ct; + char* data; + lua_settop(L, 2); + + data = (char*) check_cdata(L, 1, &ct); + + if (is_void_ptr(&ct)) { + lua_pushlstring(L, data, (size_t) luaL_checknumber(L, 2)); + return 1; + + } else if (ct.type == INT8_TYPE && ct.pointers == 1) { + size_t sz; + + if (!lua_isnil(L, 2)) { + sz = (size_t) luaL_checknumber(L, 2); + + } else if (ct.is_array && !ct.is_variable_array) { + char* nul = memchr(data, '\0', ct.array_size); + sz = nul ? nul - data : ct.array_size; + + } else { + sz = strlen(data); + } + + lua_pushlstring(L, data, sz); + return 1; + } + + return luaL_error(L, "unable to convert cdata to string"); +} + +static int ffi_copy(lua_State* L) +{ + struct ctype ft, tt; + char *to, *from; + + setmintop(L, 3); + to = (char*) check_pointer(L, 1, &tt); + from = (char*) check_pointer(L, 2, &ft); + + if (!lua_isnoneornil(L, 3)) { + memcpy(to, from, (size_t) luaL_checknumber(L, 3)); + + } else if (ft.type == INT8_TYPE && ft.pointers == 1) { + size_t sz = ft.is_array ? ft.array_size : strlen(from); + memcpy(to, from, sz); + to[sz] = '\0'; + } + + return 0; +} + +static int ffi_fill(lua_State* L) +{ + struct ctype ct; + void* to; + size_t sz; + int val = 0; + + setmintop(L, 3); + to = check_pointer(L, 1, &ct); + sz = (size_t) luaL_checknumber(L, 2); + + if (!lua_isnoneornil(L, 3)) { + val = (int) luaL_checkinteger(L, 3); + } + + memset(to, val, sz); + return 0; +} + +static int ffi_abi(lua_State* L) +{ + luaL_checkstring(L, 1); + push_upval(L, &abi_key); + lua_pushvalue(L, 1); + lua_rawget(L, -2); + lua_pushboolean(L, lua_toboolean(L, -1)); + return 1; +} + +static int ffi_load(lua_State* L) +{ + const char* libname = luaL_checkstring(L, 1); + void** lib = (void**) lua_newuserdata(L, sizeof(void*)); + + *lib = LoadLibraryA(libname); + +#ifdef LIB_FORMAT_1 + if (!*lib) { + libname = lua_pushfstring(L, LIB_FORMAT_1, lua_tostring(L, 1)); + *lib = LoadLibraryA(libname); + lua_pop(L, 1); + } +#endif + +#ifdef LIB_FORMAT_2 + if (!*lib) { + libname = lua_pushfstring(L, LIB_FORMAT_2, lua_tostring(L, 1)); + *lib = LoadLibraryA(libname); + lua_pop(L, 1); + } +#endif + + if (!*lib) { + return luaL_error(L, "could not load library %s", lua_tostring(L, 1)); + } + + lua_newtable(L); + lua_setuservalue(L, -2); + + push_upval(L, &cmodule_mt_key); + lua_setmetatable(L, -2); + return 1; +} + +static void* find_symbol(lua_State* L, int modidx, const char* asmname) +{ + size_t i; + void** libs; + size_t num; + void* sym = NULL; + + libs = (void**) lua_touserdata(L, modidx); + num = lua_rawlen(L, modidx) / sizeof(void*); + + for (i = 0; i < num && sym == NULL; i++) { + if (libs[i]) { + sym = GetProcAddressA(libs[i], asmname); + } + } + + return sym; +} + +/* pushes the user table */ +static void* lookup_global(lua_State* L, int modidx, int nameidx, const char** pname, struct ctype* ct) +{ + int top = lua_gettop(L); + void* sym; + + modidx = lua_absindex(L, modidx); + nameidx = lua_absindex(L, nameidx); + + *pname = luaL_checkstring(L, nameidx); + + /* get the ctype */ + push_upval(L, &functions_key); + lua_pushvalue(L, nameidx); + lua_rawget(L, -2); + if (lua_isnil(L, -1)) { + luaL_error(L, "missing declaration for function/global %s", *pname); + return NULL; + } + + /* leave just the ct_usr on the stack */ + *ct = *(const struct ctype*) lua_touserdata(L, -1); + lua_getuservalue(L, -1); + lua_replace(L, top + 1); + lua_pop(L, 1); + + assert(lua_gettop(L) == top + 1); + + /* get the assembly name */ + push_upval(L, &asmname_key); + lua_pushvalue(L, nameidx); + lua_rawget(L, -2); + if (lua_isstring(L, -1)) { + *pname = lua_tostring(L, -1); + } + lua_pop(L, 2); + + sym = find_symbol(L, modidx, *pname); + + assert(lua_gettop(L) == top + 1); + return sym; +} + +static int cmodule_index(lua_State* L) +{ + const char* asmname; + struct ctype ct; + void *sym; + + lua_settop(L, 2); + + /* see if we have already loaded the function */ + lua_getuservalue(L, 1); + lua_pushvalue(L, 2); + lua_rawget(L, -2); + if (!lua_isnil(L, -1)) { + return 1; + } + lua_pop(L, 2); + + /* check the constants table */ + push_upval(L, &constants_key); + lua_pushvalue(L, 2); + lua_rawget(L, -2); + if (!lua_isnil(L, -1)) { + return 1; + } + lua_pop(L, 2); + + /* lookup_global pushes the ct_usr */ + sym = lookup_global(L, 1, 2, &asmname, &ct); + +#if defined _WIN32 && !defined _WIN64 && (defined __i386__ || defined _M_IX86) + if (!sym && ct.type == FUNCTION_TYPE) { + ct.calling_convention = STD_CALL; + lua_pushfstring(L, "_%s@%d", asmname, x86_return_size(L, -1, &ct)); + sym = find_symbol(L, 1, lua_tostring(L, -1)); + lua_pop(L, 1); + } + + if (!sym && ct.type == FUNCTION_TYPE) { + ct.calling_convention = FAST_CALL; + lua_pushfstring(L, "@%s@%d", asmname, x86_return_size(L, -1, &ct)); + sym = find_symbol(L, 1, lua_tostring(L, -1)); + lua_pop(L, 1); + } +#endif + + if (!sym) { + return luaL_error(L, "failed to find function/global %s", asmname); + } + + assert(lua_gettop(L) == 3); /* module, name, ct_usr */ + + if (ct.type == FUNCTION_TYPE) { + compile_function(L, (cfunction) sym, -1, &ct); + assert(lua_gettop(L) == 4); /* module, name, ct_usr, function */ + + /* set module usr value[luaname] = function to cache for next time */ + lua_getuservalue(L, 1); + lua_pushvalue(L, 2); + lua_pushvalue(L, -3); + lua_rawset(L, -3); + lua_pop(L, 1); /* module uv */ + return 1; + } + + /* extern const char* foo; and extern const char foo[]; */ + if (ct.pointers == 1 && ct.type == INT8_TYPE) { + char* str = (char*) sym; + if (!ct.is_array) { + str = *(char**) sym; + } + lua_pushstring(L, str); + return 1; + } + + /* extern struct foo foo[], extern void* foo[]; and extern struct foo foo; */ + if (ct.is_array || (!ct.pointers && (ct.type == UNION_TYPE || ct.type == STRUCT_TYPE))) { + void* p; + ct.is_reference = 1; + p = push_cdata(L, -1, &ct); + *(void**) p = sym; + return 1; + } + + /* extern void* foo; and extern void (*foo)(); */ + if (ct.pointers || ct.type == FUNCTION_PTR_TYPE) { + void* p = push_cdata(L, -1, &ct); + *(void**) p = *(void**) sym; + return 1; + } + + switch (ct.type) { + case COMPLEX_DOUBLE_TYPE: + case COMPLEX_FLOAT_TYPE: + case INTPTR_TYPE: + case INT64_TYPE: + { + /* TODO: complex float/double need to be references if .re and + * .imag are setable */ + void* p = push_cdata(L, -1, &ct); + memcpy(p, sym, ct.base_size); + return 1; + } + + case DOUBLE_TYPE: + lua_pushnumber(L, *(double*) sym); + return 1; + + case FLOAT_TYPE: + lua_pushnumber(L, *(float*) sym); + return 1; + + case BOOL_TYPE: + lua_pushboolean(L, *(bool*) sym); + return 1; + + case INT8_TYPE: + lua_pushnumber(L, ct.is_unsigned ? (lua_Number) *(uint8_t*) sym : (lua_Number) *(int8_t*) sym); + return 1; + + case INT16_TYPE: + lua_pushnumber(L, ct.is_unsigned ? (lua_Number) *(uint16_t*) sym : (lua_Number) *(int16_t*) sym); + return 1; + + case INT32_TYPE: + case ENUM_TYPE: + lua_pushnumber(L, ct.is_unsigned ? (lua_Number) *(uint32_t*) sym : (lua_Number) *(int32_t*) sym); + return 1; + } + + return luaL_error(L, "NYI - global value type"); +} + +static int cmodule_newindex(lua_State* L) +{ + const char* name; + void* sym; + struct ctype ct; + + lua_settop(L, 3); + + /* pushes the ct_usr */ + sym = lookup_global(L, 1, 2, &name, &ct); + assert(lua_gettop(L) == 4); /* module, name, value, ct_usr */ + + if (sym == NULL) { + return luaL_error(L, "failed to find global %s", name); + } + + if (ct.type == FUNCTION_TYPE || ct.is_array || (ct.const_mask & 1)) { + return luaL_error(L, "can not set global %s", name); + } + + set_value(L, 3, sym, -1, &ct, 1); + return 0; +} + +static int jit_gc(lua_State* L) +{ + size_t i; + struct jit* jit = get_jit(L); + dasm_free(jit); + for (i = 0; i < jit->pagenum; i++) { + FreePage(jit->pages[i], jit->pages[i]->size); + } + free(jit->globals); + return 0; +} + +static int ffi_debug(lua_State* L) +{ + lua_newtable(L); + push_upval(L, &ctype_mt_key); + lua_setfield(L, -2, "ctype_mt"); + push_upval(L, &cdata_mt_key); + lua_setfield(L, -2, "cdata_mt"); + push_upval(L, &cmodule_mt_key); + lua_setfield(L, -2, "cmodule_mt"); + push_upval(L, &constants_key); + lua_setfield(L, -2, "constants"); + push_upval(L, &types_key); + lua_setfield(L, -2, "types"); + push_upval(L, &jit_key); + lua_setfield(L, -2, "jit"); + push_upval(L, &gc_key); + lua_setfield(L, -2, "gc"); + push_upval(L, &callbacks_key); + lua_setfield(L, -2, "callbacks"); + push_upval(L, &functions_key); + lua_setfield(L, -2, "functions"); + push_upval(L, &abi_key); + lua_setfield(L, -2, "abi"); + push_upval(L, &next_unnamed_key); + lua_setfield(L, -2, "next_unnamed"); + return 1; +} + +static int do64(lua_State* L, int is_unsigned) +{ + lua_Number low, high; + struct ctype ct; + int64_t val; + + lua_settop(L, 2); + + if (!lua_isnil(L, 2)) { + high = luaL_checknumber(L, 1); + low = luaL_checknumber(L, 2); + } else { + high = 0; + low = luaL_checknumber(L, 1); + } + + val = ((int64_t) (uint32_t) high << 32) | (int64_t) (uint32_t) low; + + if (!is_unsigned && (high < 0 || low < 0)) { + val = -val; + } + + memset(&ct, 0, sizeof(ct)); + ct.type = INT64_TYPE; + ct.is_unsigned = is_unsigned; + ct.is_defined = 1; + ct.base_size = sizeof(int64_t); + push_number(L, (int64_t) val, 0, &ct); + + return 1; +} + +static int ffi_i64(lua_State* L) +{ return do64(L, 0); } + +static int ffi_u64(lua_State* L) +{ return do64(L, 1); } + +static const luaL_Reg cdata_mt[] = { + {"__gc", &cdata_gc}, + {"__call", &cdata_call}, + {"free", &cdata_free}, + {"set", &cdata_set}, + {"__index", &cdata_index}, + {"__newindex", &cdata_newindex}, + {"__add", &cdata_add}, + {"__sub", &cdata_sub}, + {"__mul", &cdata_mul}, + {"__div", &cdata_div}, + {"__mod", &cdata_mod}, + {"__pow", &cdata_pow}, + {"__unm", &cdata_unm}, + {"__eq", &cdata_eq}, + {"__lt", &cdata_lt}, + {"__le", &cdata_le}, + {"__tostring", &cdata_tostring}, + {"__concat", &cdata_concat}, + {"__len", &cdata_len}, + {"__pairs", &cdata_pairs}, + {"__ipairs", &cdata_ipairs}, + {NULL, NULL} +}; + +static const luaL_Reg callback_mt[] = { + {"__gc", &callback_free}, + {NULL, NULL} +}; + +static const luaL_Reg ctype_mt[] = { + {"__call", &ctype_call}, + {"__new", &ctype_new}, + {"__tostring", &ctype_tostring}, + {NULL, NULL} +}; + +static const luaL_Reg cmodule_mt[] = { + {"__index", &cmodule_index}, + {"__newindex", &cmodule_newindex}, + {NULL, NULL} +}; + +static const luaL_Reg jit_mt[] = { + {"__gc", &jit_gc}, + {NULL, NULL} +}; + +static const luaL_Reg ffi_reg[] = { + {"cdef", &ffi_cdef}, + {"load", &ffi_load}, + {"new", &ffi_new}, + {"typeof", &ffi_typeof}, + {"cast", &ffi_cast}, + {"metatype", &ffi_metatype}, + {"gc", &ffi_gc}, + {"sizeof", &ffi_sizeof}, + {"alignof", &ffi_alignof}, + {"offsetof", &ffi_offsetof}, + {"istype", &ffi_istype}, + {"errno", &ffi_errno}, + {"string", &ffi_string}, + {"copy", &ffi_copy}, + {"fill", &ffi_fill}, + {"abi", &ffi_abi}, + {"debug", &ffi_debug}, + {"i64", &ffi_i64}, + {"u64", &ffi_u64}, + {NULL, NULL} +}; + +/* leaves the usr table on the stack */ +static void push_builtin(lua_State* L, struct ctype* ct, const char* name, int type, int size, int align, int is_unsigned) +{ + memset(ct, 0, sizeof(*ct)); + ct->type = type; + ct->base_size = size; + ct->align_mask = align; + ct->is_defined = 1; + ct->is_unsigned = is_unsigned; + + push_upval(L, &types_key); + push_ctype(L, 0, ct); + lua_setfield(L, -2, name); + lua_pop(L, 1); /* types */ +} + +static void push_builtin_undef(lua_State* L, struct ctype* ct, const char* name, int type) +{ + memset(ct, 0, sizeof(*ct)); + ct->type = type; + + push_upval(L, &types_key); + push_ctype(L, 0, ct); + lua_setfield(L, -2, name); + lua_pop(L, 1); /* types */ +} + +static void add_typedef(lua_State* L, const char* from, const char* to) +{ + struct ctype ct; + struct parser P; + P.line = 1; + P.align_mask = DEFAULT_ALIGN_MASK; + P.next = P.prev = from; + + push_upval(L, &types_key); + parse_type(L, &P, &ct); + parse_argument(L, &P, -1, &ct, NULL, NULL); + push_ctype(L, -1, &ct); + + /* stack is at +4: types, type usr, arg usr, ctype */ + + lua_setfield(L, -4, to); + lua_pop(L, 3); /* types, type usr, arg usr */ +} + +static int setup_upvals(lua_State* L) +{ + struct jit* jit = get_jit(L); + + /* jit setup */ + { + dasm_init(jit, 64); +#ifdef _WIN32 + { + SYSTEM_INFO si; + GetSystemInfo(&si); + jit->align_page_size = si.dwAllocationGranularity - 1; + } +#else + jit->align_page_size = sysconf(_SC_PAGE_SIZE) - 1; +#endif + jit->globals = (void**) malloc(64 * sizeof(void*)); + dasm_setupglobal(jit, jit->globals, 64); + compile_globals(jit, L); + } + + /* ffi.C */ + { +#ifdef _WIN32 + size_t sz = sizeof(HMODULE) * 6; + HMODULE* libs = lua_newuserdata(L, sz); + memset(libs, 0, sz); + + /* exe */ + GetModuleHandle(NULL); + /* lua dll */ +#ifdef LUA_DLL_NAME +#define STR2(tok) #tok +#define STR(tok) STR2(tok) + libs[1] = LoadLibraryA(STR(LUA_DLL_NAME)); +#undef STR +#undef STR2 +#endif + + /* crt */ +#ifdef UNDER_CE + libs[2] = LoadLibraryA("coredll.dll"); +#else + GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, (char*) &_fmode, &libs[2]); + libs[3] = LoadLibraryA("kernel32.dll"); + libs[4] = LoadLibraryA("user32.dll"); + libs[5] = LoadLibraryA("gdi32.dll"); +#endif + + jit->lua_dll = libs[1]; + jit->kernel32_dll = libs[3]; + +#else /* !_WIN32 */ + size_t sz = sizeof(void*) * 5; + void** libs = lua_newuserdata(L, sz); + memset(libs, 0, sz); + + libs[0] = LoadLibraryA(NULL); /* exe */ + libs[1] = LoadLibraryA("libc.so"); +#ifdef __GNUC__ + libs[2] = LoadLibraryA("libgcc.so"); +#endif + libs[3] = LoadLibraryA("libm.so"); + libs[4] = LoadLibraryA("libdl.so"); +#endif + + lua_newtable(L); + lua_setuservalue(L, -2); + + push_upval(L, &cmodule_mt_key); + lua_setmetatable(L, -2); + + lua_setfield(L, 1, "C"); + } + + /* setup builtin types */ + { + complex_double* pc; + struct {char ch; uint16_t v;} a16; + struct {char ch; uint32_t v;} a32; + struct {char ch; uint64_t v;} a64; + struct {char ch; float v;} af; + struct {char ch; double v;} ad; +#ifdef HAVE_LONG_DOUBLE + struct {char ch; long double v;} ald; +#endif + struct {char ch; uintptr_t v;} aptr; + struct ctype ct; + struct {char ch; complex_float v;} cf; + struct {char ch; complex_double v;} cd; +#if defined HAVE_LONG_DOUBLE && defined HAVE_COMPLEX + struct {char ch; complex long double v;} cld; +#endif + + push_builtin(L, &ct, "void", VOID_TYPE, 0, 0, 0); + push_builtin(L, &ct, "bool", BOOL_TYPE, sizeof(_Bool), sizeof(_Bool) -1, 1); + push_builtin(L, &ct, "uint8_t", INT8_TYPE, sizeof(uint8_t), 0, 1); + push_builtin(L, &ct, "int8_t", INT8_TYPE, sizeof(int8_t), 0, 0); + push_builtin(L, &ct, "uint16_t", INT16_TYPE, sizeof(uint16_t), ALIGNOF(a16), 1); + push_builtin(L, &ct, "int16_t", INT16_TYPE, sizeof(int16_t), ALIGNOF(a16), 0); + push_builtin(L, &ct, "uint32_t", INT32_TYPE, sizeof(uint32_t), ALIGNOF(a32), 1); + push_builtin(L, &ct, "int32_t", INT32_TYPE, sizeof(int32_t), ALIGNOF(a32), 0); + push_builtin(L, &ct, "uint64_t", INT64_TYPE, sizeof(uint64_t), ALIGNOF(a64), 1); + push_builtin(L, &ct, "int64_t", INT64_TYPE, sizeof(int64_t), ALIGNOF(a64), 0); + push_builtin(L, &ct, "float", FLOAT_TYPE, sizeof(float), ALIGNOF(af), 0); + push_builtin(L, &ct, "double", DOUBLE_TYPE, sizeof(double), ALIGNOF(ad), 0); +#ifdef HAVE_LONG_DOUBLE + push_builtin(L, &ct, "long double", LONG_DOUBLE_TYPE, sizeof(long double), ALIGNOF(ald), 0); +#else + push_builtin_undef(L, &ct, "long double", LONG_DOUBLE_TYPE); +#endif + push_builtin(L, &ct, "uintptr_t", INTPTR_TYPE, sizeof(uintptr_t), ALIGNOF(aptr), 1); + push_builtin(L, &ct, "intptr_t", INTPTR_TYPE, sizeof(uintptr_t), ALIGNOF(aptr), 0); + push_builtin(L, &ct, "complex float", COMPLEX_FLOAT_TYPE, sizeof(complex_float), ALIGNOF(cf), 0); + push_builtin(L, &ct, "complex double", COMPLEX_DOUBLE_TYPE, sizeof(complex_double), ALIGNOF(cd), 0); +#if defined HAVE_LONG_DOUBLE && defined HAVE_COMPLEX + push_builtin(L, &ct, "complex long double", COMPLEX_LONG_DOUBLE_TYPE, sizeof(complex long double), ALIGNOF(cld), 0); +#else + push_builtin_undef(L, &ct, "complex long double", COMPLEX_LONG_DOUBLE_TYPE); +#endif + + /* add NULL and i constants */ + push_upval(L, &constants_key); + + memset(&ct, 0, sizeof(ct)); + ct.type = VOID_TYPE; + ct.is_defined = 1; + ct.pointers = 1; + ct.is_null = 1; + + push_cdata(L, 0, &ct); + lua_setfield(L, -2, "NULL"); + + memset(&ct, 0, sizeof(ct)); + ct.type = COMPLEX_DOUBLE_TYPE; + ct.is_defined = 1; + ct.base_size = sizeof(complex_double); + pc = (complex_double*) push_cdata(L, 0, &ct); +#ifdef HAVE_COMPLEX + *pc = 1i; +#else + pc->real = 0; + pc->imag = 1; +#endif + lua_setfield(L, -2, "i"); + + lua_pop(L, 1); /* constants */ + } + + assert(lua_gettop(L) == 1); + + /* setup builtin typedefs */ + { + add_typedef(L, "bool", "_Bool"); + + if (sizeof(uint32_t) == sizeof(size_t)) { + add_typedef(L, "uint32_t", "size_t"); + add_typedef(L, "int32_t", "ssize_t"); + } else if (sizeof(uint64_t) == sizeof(size_t)) { + add_typedef(L, "uint64_t", "size_t"); + add_typedef(L, "int64_t", "ssize_t"); + } + + if (sizeof(int32_t) == sizeof(intptr_t)) { + add_typedef(L, "int32_t", "intptr_t"); + add_typedef(L, "int32_t", "ptrdiff_t"); + } else if (sizeof(int64_t) == sizeof(intptr_t)) { + add_typedef(L, "int64_t", "intptr_t"); + add_typedef(L, "int64_t", "ptrdiff_t"); + } + + if (sizeof(uint8_t) == sizeof(wchar_t)) { + add_typedef(L, "uint8_t", "wchar_t"); + } else if (sizeof(uint16_t) == sizeof(wchar_t)) { + add_typedef(L, "uint16_t", "wchar_t"); + } else if (sizeof(uint32_t) == sizeof(wchar_t)) { + add_typedef(L, "uint32_t", "wchar_t"); + } + + if (sizeof(va_list) == sizeof(char*)) { + add_typedef(L, "char*", "va_list"); + } else { + struct {char ch; va_list v;} av; + lua_pushfstring(L, "struct {char data[%d] __attribute__((align(%d)));}", (int) sizeof(va_list), (int) ALIGNOF(av) + 1); + add_typedef(L, lua_tostring(L, -1), "va_list"); + lua_pop(L, 1); + } + + add_typedef(L, "va_list", "__builtin_va_list"); + add_typedef(L, "va_list", "__gnuc_va_list"); + } + + assert(lua_gettop(L) == 1); + + /* setup ABI params table */ + push_upval(L, &abi_key); + +#if defined ARCH_X86 || defined ARCH_ARM + lua_pushboolean(L, 1); + lua_setfield(L, -2, "32bit"); +#elif defined ARCH_X64 + lua_pushboolean(L, 1); + lua_setfield(L, -2, "64bit"); +#else +#error +#endif + +#if defined ARCH_X86 || defined ARCH_X64 || defined ARCH_ARM + lua_pushboolean(L, 1); + lua_setfield(L, -2, "le"); +#else +#error +#endif + +#if defined ARCH_X86 || defined ARCH_X64 + lua_pushboolean(L, 1); + lua_setfield(L, -2, "fpu"); +#elif defined ARCH_ARM + lua_pushboolean(L, 1); + lua_setfield(L, -2, "softfp"); +#else +#error +#endif + lua_pop(L, 1); /* abi tbl */ + + + /* GC table - shouldn't pin cdata values */ + push_upval(L, &gc_key); + lua_newtable(L); + lua_pushliteral(L, "k"); + lua_setfield(L, -2, "__mode"); + lua_setmetatable(L, -2); + lua_pop(L, 1); /* gc table */ + + + /* ffi.os */ +#if defined OS_CE + lua_pushliteral(L, "WindowsCE"); +#elif defined OS_WIN + lua_pushliteral(L, "Windows"); +#elif defined OS_OSX + lua_pushliteral(L, "OSX"); +#elif defined OS_LINUX + lua_pushliteral(L, "Linux"); +#elif defined OS_BSD + lua_pushliteral(L, "BSD"); +#elif defined OS_POSIX + lua_pushliteral(L, "POSIX"); +#else + lua_pushliteral(L, "Other"); +#endif + lua_setfield(L, 1, "os"); + + + /* ffi.arch */ +#if defined ARCH_X86 + lua_pushliteral(L, "x86"); +#elif defined ARCH_X64 + lua_pushliteral(L, "x64"); +#elif defined ARCH_ARM + lua_pushliteral(L, "arm"); +#else +# error +#endif + lua_setfield(L, 1, "arch"); + + assert(lua_gettop(L) == 1); + + return 0; +} + +static void setup_mt(lua_State* L, const luaL_Reg* mt, int upvals) +{ + lua_pushboolean(L, 1); + lua_setfield(L, -upvals-2, "__metatable"); + luaL_setfuncs(L, mt, upvals); +} + +int luaopen_ffi(lua_State* L) +{ + lua_settop(L, 0); + + lua_newtable(L); + set_upval(L, &niluv_key); + + lua_newtable(L); + setup_mt(L, ctype_mt, 0); + set_upval(L, &ctype_mt_key); + + lua_newtable(L); + set_upval(L, &callbacks_key); + + lua_newtable(L); + set_upval(L, &gc_key); + + lua_newtable(L); + push_upval(L, &callbacks_key); + push_upval(L, &gc_key); + setup_mt(L, cdata_mt, 2); + set_upval(L, &cdata_mt_key); + + lua_newtable(L); + setup_mt(L, callback_mt, 0); + set_upval(L, &callback_mt_key); + + lua_newtable(L); + setup_mt(L, cmodule_mt, 0); + set_upval(L, &cmodule_mt_key); + + memset(lua_newuserdata(L, sizeof(struct jit)), 0, sizeof(struct jit)); + lua_newtable(L); + setup_mt(L, jit_mt, 0); + lua_setmetatable(L, -2); + set_upval(L, &jit_key); + + lua_newtable(L); + set_upval(L, &constants_key); + + lua_newtable(L); + set_upval(L, &types_key); + + lua_newtable(L); + set_upval(L, &functions_key); + + lua_newtable(L); + set_upval(L, &asmname_key); + + lua_newtable(L); + set_upval(L, &abi_key); + + lua_pushinteger(L, 1); + set_upval(L, &next_unnamed_key); + + assert(lua_gettop(L) == 0); + + /* ffi table */ + lua_newtable(L); + luaL_setfuncs(L, ffi_reg, 0); + + /* setup_upvals(ffi tbl) */ + lua_pushcfunction(L, &setup_upvals); + lua_pushvalue(L, 1); + lua_call(L, 1, 0); + + assert(lua_gettop(L) == 1); + + lua_getglobal(L, "tonumber"); + lua_pushcclosure(L, &ffi_number, 1); + lua_pushvalue(L, -1); + lua_setglobal(L, "tonumber"); + lua_setfield(L, -2, "number"); /* ffi.number */ + + return 1; +} diff --git a/tools/luaffi/ffi.h b/tools/luaffi/ffi.h new file mode 100644 index 0000000..efdcf37 --- /dev/null +++ b/tools/luaffi/ffi.h @@ -0,0 +1,450 @@ +/* vim: ts=4 sw=4 sts=4 et tw=78 + * + * Copyright (c) 2011 James R. McKaskill + * + * This software is licensed under the stock MIT license: + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * ---------------------------------------------------------------------------- + */ + +#pragma once + +#ifdef _MSC_VER +#define _CRT_SECURE_NO_WARNINGS +#endif + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +# include +# include +} +# define EXTERN_C extern "C" +#else +# include +# include +# define EXTERN_C extern +#endif + +#ifdef _WIN32 +#include +#else +#include +#include +#include +#include +#endif + +#if __STDC_VERSION__+0 >= 199901L && !defined(__TINYC__) //< @r-lyeh: handle tcc case +#include +#define HAVE_COMPLEX +#define HAVE_LONG_DOUBLE +#endif + +#ifndef NDEBUG +#define DASM_CHECKS +#endif + +struct jit; +#define Dst_DECL struct jit* Dst +#define Dst_REF (Dst->ctx) +#define DASM_EXTERN(a,b,c,d) get_extern(a,b,c,d) + +#include "dynasm/dasm_proto.h" + +#if defined LUA_FFI_BUILD_AS_DLL +# define EXPORT __declspec(dllexport) +#elif defined __GNUC__ +# define EXPORT __attribute__((visibility("default"))) +#else +# define EXPORT +#endif + +EXTERN_C EXPORT int luaopen_ffi(lua_State* L); + +static int lua_absindex2(lua_State* L, int idx) { + return (LUA_REGISTRYINDEX <= idx && idx < 0) + ? lua_gettop(L) + idx + 1 + : idx; +} +/* use our own version of lua_absindex such that lua_absindex(L, 0) == 0 */ +#define lua_absindex(L, idx) lua_absindex2(L, idx) + +#if LUA_VERSION_NUM == 501 +static void lua_callk(lua_State *L, int nargs, int nresults, int ctx, lua_CFunction k) +{ + lua_call(L, nargs, nresults); +} +/* +** set functions from list 'l' into table at top - 'nup'; each +** function gets the 'nup' elements at the top as upvalues. +** Returns with only the table at the stack. +*/ +static void luaL_setfuncs (lua_State *L, const luaL_Reg *l, int nup) { + luaL_checkstack(L, nup, "too many upvalues"); + for (; l && l->name; l++) { /* fill the table with given functions */ + int i; + for (i = 0; i < nup; i++) /* copy upvalues to the top */ + lua_pushvalue(L, -nup); + lua_pushcclosure(L, l->func, nup); /* closure with those upvalues */ + lua_setfield(L, -(nup + 2), l->name); + } + lua_pop(L, nup); /* remove upvalues */ +} +#define lua_setuservalue lua_setfenv +#define lua_getuservalue lua_getfenv +#define lua_rawlen lua_objlen +static char* luaL_prepbuffsize(luaL_Buffer* B, size_t sz) { + if (sz > LUAL_BUFFERSIZE) { + luaL_error(B->L, "string too long"); + } + return luaL_prepbuffer(B); +} +#endif + +/* architectures */ +#if defined _WIN32 && defined UNDER_CE +# define OS_CE +#elif defined _WIN32 +# define OS_WIN +#elif defined __APPLE__ && defined __MACH__ +# define OS_OSX +#elif defined __linux__ +# define OS_LINUX +#elif defined __FreeBSD__ || defined __OpenBSD__ || defined __NetBSD__ +# define OS_BSD +#elif defined unix || defined __unix__ || defined __unix || defined _POSIX_VERSION || defined _XOPEN_VERSION +# define OS_POSIX +#endif + +/* architecture */ +#if defined __i386__ || defined _M_IX86 +# define ARCH_X86 +#elif defined __amd64__ || defined _M_X64 +# define ARCH_X64 +#elif defined __arm__ || defined __ARM__ || defined ARM || defined __ARM || defined __arm +# define ARCH_ARM +#elif defined OS_LINUX && defined __TINYC__ //< @r-lyeh: tcc+linux +# define ARCH_X64 //< @r-lyeh: tcc+linux +#else +# error +#endif + + +#ifdef _WIN32 + +# ifdef UNDER_CE + static void* DoLoadLibraryA(const char* name) { + wchar_t buf[MAX_PATH]; + int sz = MultiByteToWideChar(CP_UTF8, 0, name, -1, buf, 512); + if (sz > 0) { + buf[sz] = 0; + return LoadLibraryW(buf); + } else { + return NULL; + } + } +# define LoadLibraryA DoLoadLibraryA +# else +# define GetProcAddressA GetProcAddress +# endif + +# define LIB_FORMAT_1 "%s.dll" +# define AllocPage(size) VirtualAlloc(NULL, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE) +# define FreePage(data, size) VirtualFree(data, 0, MEM_RELEASE) +# define EnableExecute(data, size) do {DWORD old; VirtualProtect(data, size, PAGE_EXECUTE, &old); FlushInstructionCache(GetCurrentProcess(), data, size);} while (0) +# define EnableWrite(data, size) do {DWORD old; VirtualProtect(data, size, PAGE_READWRITE, &old);} while (0) + +#else +# define LIB_FORMAT_1 "%s.so" +# define LIB_FORMAT_2 "lib%s.so" +# define LoadLibraryA(name) dlopen(name, RTLD_LAZY | RTLD_GLOBAL) +# define GetProcAddressA(lib, name) dlsym(lib, name) +# define AllocPage(size) mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0) +# define FreePage(data, size) munmap(data, size) +# define EnableExecute(data, size) mprotect(data, size, PROT_READ|PROT_EXEC) +# define EnableWrite(data, size) mprotect(data, size, PROT_READ|PROT_WRITE) +#endif + +#if defined ARCH_X86 || defined ARCH_X64 +#define ALLOW_MISALIGNED_ACCESS +#endif + +struct token; + +struct parser { + int line; + const char* next; + const char* prev; + unsigned align_mask; +}; + +struct page { + size_t size; + size_t off; + size_t freed; +}; + +struct jit { + lua_State* L; + int32_t last_errno; + dasm_State* ctx; + size_t pagenum; + struct page** pages; + size_t align_page_size; + void** globals; + int function_extern; + void* lua_dll; + void* kernel32_dll; +}; + +#define ALIGN_DOWN(PTR, MASK) \ + (((uintptr_t) (PTR)) & (~ ((uintptr_t) (MASK)) )) +#define ALIGN_UP(PTR, MASK) \ + (( ((uintptr_t) (PTR)) + ((uintptr_t) (MASK)) ) & (~ ((uintptr_t) (MASK)) )) + +/* struct cdata/struct ctype */ + +#define PTR_ALIGN_MASK (sizeof(void*) - 1) +#define FUNCTION_ALIGN_MASK (sizeof(void (*)()) - 1) +#define DEFAULT_ALIGN_MASK 7 + +#ifdef OS_OSX +/* TODO: figure out why the alignof trick doesn't work on OS X */ +#define ALIGNED_DEFAULT 7 +#elif defined __GNUC__ +#define ALIGNED_DEFAULT (__alignof__(void* __attribute__((aligned))) - 1) +#else +#define ALIGNED_DEFAULT PTR_ALIGN_MASK +#endif + +extern int jit_key; +extern int ctype_mt_key; +extern int cdata_mt_key; +extern int cmodule_mt_key; +extern int callback_mt_key; +extern int constants_key; +extern int types_key; +extern int gc_key; +extern int callbacks_key; +extern int functions_key; +extern int abi_key; +extern int next_unnamed_key; +extern int niluv_key; +extern int asmname_key; + +int equals_upval(lua_State* L, int idx, int* key); +void push_upval(lua_State* L, int* key); +void set_upval(lua_State* L, int* key); +struct jit* get_jit(lua_State* L); + +/* both ctype and cdata are stored as userdatas + * + * usr value is a table shared between the related subtypes which has: + * name -> member ctype (for structs and unions) + * +ves -> member ctype - in memory order (for structs) + * +ves -> argument ctype (for function prototypes) + * 0 -> return ctype (for function prototypes) + * light userdata -> misc + */ + +enum { + C_CALL, + STD_CALL, + FAST_CALL, +}; + +enum { + INVALID_TYPE, + VOID_TYPE, + FLOAT_TYPE, + DOUBLE_TYPE, + LONG_DOUBLE_TYPE, + COMPLEX_FLOAT_TYPE, + COMPLEX_DOUBLE_TYPE, + COMPLEX_LONG_DOUBLE_TYPE, + BOOL_TYPE, + INT8_TYPE, + INT16_TYPE, + INT32_TYPE, + INT64_TYPE, + INTPTR_TYPE, + ENUM_TYPE, + UNION_TYPE, + STRUCT_TYPE, + FUNCTION_TYPE, + FUNCTION_PTR_TYPE, +}; + +#define IS_CHAR_UNSIGNED (((char) -1) > 0) +#define IS_COMPLEX(type) ((type) == COMPLEX_FLOAT_TYPE || (type) == COMPLEX_DOUBLE_TYPE) + +#define POINTER_BITS 2 +#define POINTER_MAX ((1 << POINTER_BITS) - 1) + +#define ALIGNOF(S) ((int) ((char*) &S.v - (char*) &S - 1)) + +/* Note: if adding a new member that is associated with a struct/union + * definition then it needs to be copied over in ctype.c:set_defined for when + * we create types based off of the declaration alone. + * + * Since this is used as a header for every ctype and cdata, and we create a + * ton of them on the stack, we try and minimise its size. + */ +struct ctype { + size_t base_size; /* size of the base type in bytes */ + + union { + /* valid if is_bitfield */ + struct { + /* size of bitfield in bits */ + unsigned bit_size : 7; + /* offset within the current byte between 0-63 */ + unsigned bit_offset : 6; + }; + /* Valid if is_array */ + size_t array_size; + /* Valid for is_variable_struct or is_variable_array. If + * variable_size_known (only used for is_variable_struct) then this is + * the total increment otherwise this is the per element increment. + */ + size_t variable_increment; + }; + size_t offset; + unsigned align_mask : 4; /* as (align bytes - 1) eg 7 gives 8 byte alignment */ + unsigned pointers : POINTER_BITS; /* number of dereferences to get to the base type including +1 for arrays */ + unsigned const_mask : POINTER_MAX + 1; /* const pointer mask, LSB is current pointer, +1 for the whether the base type is const */ + unsigned type : 5; /* value given by type enum above */ + unsigned is_reference : 1; + unsigned is_array : 1; + unsigned is_defined : 1; + unsigned is_null : 1; + unsigned has_member_name : 1; + unsigned calling_convention : 2; + unsigned has_var_arg : 1; + unsigned is_variable_array : 1; /* set for variable array types where we don't know the variable size yet */ + unsigned is_variable_struct : 1; + unsigned variable_size_known : 1; /* used for variable structs after we know the variable size */ + unsigned is_bitfield : 1; + unsigned has_bitfield : 1; + unsigned is_jitted : 1; + unsigned is_packed : 1; + unsigned is_unsigned : 1; +}; + +#ifdef _MSC_VER +__declspec(align(16)) +#endif +struct cdata { + const struct ctype type +#ifdef __GNUC__ + __attribute__ ((aligned(16))) +#endif + ; +}; + +typedef void (*cfunction)(void); + +#ifdef HAVE_COMPLEX +typedef double complex complex_double; +typedef float complex complex_float; +#else +typedef struct { + double real, imag; +} complex_double; + +typedef struct { + float real, imag; +} complex_float; + +static double creal(complex_double c) { + return c.real; +} +static float crealf(complex_float c) { + return c.real; +} + +static double cimag(complex_double c) { + return c.imag; +} +static float cimagf(complex_float c) { + return c.imag; +} +#endif + +#define CALLBACK_FUNC_USR_IDX 1 + +void set_defined(lua_State* L, int ct_usr, struct ctype* ct); +struct ctype* push_ctype(lua_State* L, int ct_usr, const struct ctype* ct); +void* push_cdata(lua_State* L, int ct_usr, const struct ctype* ct); /* called from asm */ +void push_callback(lua_State* L, cfunction f); +void check_ctype(lua_State* L, int idx, struct ctype* ct); +void* to_cdata(lua_State* L, int idx, struct ctype* ct); +void* check_cdata(lua_State* L, int idx, struct ctype* ct); +size_t ctype_size(lua_State* L, const struct ctype* ct); + +int parse_type(lua_State* L, struct parser* P, struct ctype* type); +void parse_argument(lua_State* L, struct parser* P, int ct_usr, struct ctype* type, struct token* name, struct parser* asmname); +void push_type_name(lua_State* L, int usr, const struct ctype* ct); + +int push_user_mt(lua_State* L, int ct_usr, const struct ctype* ct); + +int ffi_cdef(lua_State* L); + +void push_func_ref(lua_State* L, cfunction func); +void free_code(struct jit* jit, lua_State* L, cfunction func); +int x86_return_size(lua_State* L, int usr, const struct ctype* ct); +void compile_function(lua_State* L, cfunction f, int ct_usr, const struct ctype* ct); +cfunction compile_callback(lua_State* L, int fidx, int ct_usr, const struct ctype* ct); +void compile_globals(struct jit* jit, lua_State* L); +int get_extern(struct jit* jit, uint8_t* addr, int idx, int type); + +/* WARNING: assembly needs to be updated for prototype changes of these functions */ +int check_bool(lua_State* L, int idx); +double check_double(lua_State* L, int idx); +double check_complex_imag(lua_State* L, int idx); +float check_float(lua_State* L, int idx); +uint64_t check_uint64(lua_State* L, int idx); +int64_t check_int64(lua_State* L, int idx); +int32_t check_int32(lua_State* L, int idx); +uint32_t check_uint32(lua_State* L, int idx); +uintptr_t check_uintptr(lua_State* L, int idx); +int32_t check_enum(lua_State* L, int idx, int to_usr, const struct ctype* tt); +/* these two will always push a value so that we can create structs/functions on the fly */ +void* check_typed_pointer(lua_State* L, int idx, int to_usr, const struct ctype* tt); +cfunction check_typed_cfunction(lua_State* L, int idx, int to_usr, const struct ctype* tt); +complex_double check_complex_double(lua_State* L, int idx); +complex_float check_complex_float(lua_State* L, int idx); + +void unpack_varargs_stack(lua_State* L, int first, int last, char* to); +void unpack_varargs_reg(lua_State* L, int first, int last, char* to); + +void unpack_varargs_stack_skip(lua_State* L, int first, int last, int ints_to_skip, int floats_to_skip, char* to); +void unpack_varargs_float(lua_State* L, int first, int last, int max, char* to); +void unpack_varargs_int(lua_State* L, int first, int last, int max, char* to); + + + diff --git a/tools/luaffi/parser.c b/tools/luaffi/parser.c new file mode 100644 index 0000000..cf0e408 --- /dev/null +++ b/tools/luaffi/parser.c @@ -0,0 +1,2552 @@ +/* vim: ts=4 sw=4 sts=4 et tw=78 + * Copyright (c) 2011 James R. McKaskill. See license in ffi.h + */ +#include "ffi.h" + +#define IS_CONST(tok) (IS_LITERAL(tok, "const") || IS_LITERAL(tok, "__const") || IS_LITERAL(tok, "__const__")) +#define IS_VOLATILE(tok) (IS_LITERAL(tok, "volatile") || IS_LITERAL(tok, "__volatile") || IS_LITERAL(tok, "__volatile__")) +#define IS_RESTRICT(tok) (IS_LITERAL(tok, "restrict") || IS_LITERAL(tok, "__restrict") || IS_LITERAL(tok, "__restrict__")) + +enum etoken { + TOK_NIL, + TOK_NUMBER, + TOK_STRING, + TOK_TOKEN, + + /* the order of these values must match the token strings in lex.c */ + + TOK_3_BEGIN, + TOK_VA_ARG, + + TOK_2_BEGIN, + TOK_LEFT_SHIFT, TOK_RIGHT_SHIFT, TOK_LOGICAL_AND, TOK_LOGICAL_OR, TOK_LESS_EQUAL, + TOK_GREATER_EQUAL, TOK_EQUAL, TOK_NOT_EQUAL, + + TOK_1_BEGIN, + TOK_OPEN_CURLY, TOK_CLOSE_CURLY, TOK_SEMICOLON, TOK_COMMA, TOK_COLON, + TOK_ASSIGN, TOK_OPEN_PAREN, TOK_CLOSE_PAREN, TOK_OPEN_SQUARE, TOK_CLOSE_SQUARE, + TOK_DOT, TOK_AMPERSAND, TOK_LOGICAL_NOT, TOK_BITWISE_NOT, TOK_MINUS, + TOK_PLUS, TOK_STAR, TOK_DIVIDE, TOK_MODULUS, TOK_LESS, + TOK_GREATER, TOK_BITWISE_XOR, TOK_BITWISE_OR, TOK_QUESTION, TOK_POUND, + + TOK_REFERENCE = TOK_AMPERSAND, + TOK_MULTIPLY = TOK_STAR, + TOK_BITWISE_AND = TOK_AMPERSAND, +}; + +struct token { + enum etoken type; + int64_t integer; + const char* str; + size_t size; +}; + +#define IS_LITERAL(TOK, STR) \ + (((TOK).size == sizeof(STR) - 1) && 0 == memcmp((TOK).str, STR, sizeof(STR) - 1)) + +/* the order of tokens _must_ match the order of the enum etoken enum */ + +static char tok3[][4] = { + "...", /* unused ">>=", "<<=", */ +}; + +static char tok2[][3] = { + "<<", ">>", "&&", "||", "<=", + ">=", "==", "!=", + /* unused "+=", "-=", "*=", "/=", "%=", "&=", "^=", "|=", "++", "--", "->", "::", */ +}; + +static char tok1[] = { + '{', '}', ';', ',', ':', + '=', '(', ')', '[', ']', + '.', '&', '!', '~', '-', + '+', '*', '/', '%', '<', + '>', '^', '|', '?', '#' +}; + +static int next_token(lua_State* L, struct parser* P, struct token* tok) +{ + size_t i; + const char* s = P->next; + + /* UTF8 BOM */ + if (s[0] == '\xEF' && s[1] == '\xBB' && s[2] == '\xBF') { + s += 3; + } + + /* consume whitespace and comments */ + for (;;) { + /* consume whitespace */ + while(*s == '\t' || *s == '\n' || *s == ' ' || *s == '\v' || *s == '\r') { + if (*s == '\n') { + P->line++; + } + s++; + } + + /* consume comments */ + if (*s == '/' && *(s+1) == '/') { + + s = strchr(s, '\n'); + if (!s) { + luaL_error(L, "non-terminated comment"); + } + + } else if (*s == '/' && *(s+1) == '*') { + s += 2; + + for (;;) { + if (s[0] == '\0') { + luaL_error(L, "non-terminated comment"); + } else if (s[0] == '*' && s[1] == '/') { + s += 2; + break; + } else if (s[0] == '\n') { + P->line++; + } + s++; + } + + } else if (*s == '\0') { + tok->type = TOK_NIL; + return 0; + + } else { + break; + } + } + + P->prev = s; + + for (i = 0; i < sizeof(tok3) / sizeof(tok3[0]); i++) { + if (s[0] == tok3[i][0] && s[1] == tok3[i][1] && s[2] == tok3[i][2]) { + tok->type = (enum etoken) (TOK_3_BEGIN + 1 + i); + P->next = s + 3; + goto end; + } + } + + for (i = 0; i < sizeof(tok2) / sizeof(tok2[0]); i++) { + if (s[0] == tok2[i][0] && s[1] == tok2[i][1]) { + tok->type = (enum etoken) (TOK_2_BEGIN + 1 + i); + P->next = s + 2; + goto end; + } + } + + for (i = 0; i < sizeof(tok1) / sizeof(tok1[0]); i++) { + if (s[0] == tok1[i]) { + tok->type = (enum etoken) (TOK_1_BEGIN + 1 + i); + P->next = s + 1; + goto end; + } + } + + if (*s == '.' || *s == '-' || ('0' <= *s && *s <= '9')) { + /* number */ + tok->type = TOK_NUMBER; + + /* split out the negative case so we get the full range of bits for + * unsigned (eg to support 0xFFFFFFFF where sizeof(long) == 4) + */ + if (*s == '-') { + tok->integer = strtol(s, (char**) &s, 0); + } else { + tok->integer = strtoul(s, (char**) &s, 0); + } + + while (*s == 'u' || *s == 'U' || *s == 'l' || *s == 'L') { + s++; + } + + P->next = s; + goto end; + + } else if (*s == '\'' || *s == '\"') { + /* "..." or '...' */ + char quote = *s; + s++; /* jump over " */ + + tok->type = TOK_STRING; + tok->str = s; + + while (*s != quote) { + + if (*s == '\0' || (*s == '\\' && *(s+1) == '\0')) { + return luaL_error(L, "string not finished"); + } + + if (*s == '\\') { + s++; + } + + s++; + } + + tok->size = s - tok->str; + s++; /* jump over " */ + P->next = s; + goto end; + + } else if (('a' <= *s && *s <= 'z') || ('A' <= *s && *s <= 'Z') || *s == '_') { + /* tokens */ + tok->type = TOK_TOKEN; + tok->str = s; + + while (('a' <= *s && *s <= 'z') || ('A' <= *s && *s <= 'Z') || *s == '_' || ('0' <= *s && *s <= '9')) { + s++; + } + + tok->size = s - tok->str; + P->next = s; + goto end; + + } else { + return luaL_error(L, "invalid character %d", P->line); + } + +end: + /*fprintf(stderr, "token %d %d %.*s %.10s\n", tok->type, (int) tok->size, (tok->type == TOK_TOKEN || tok->type == TOK_STRING) ? (int) tok->size : 0, tok->str, P->next);*/ + return 1; +} + +static void require_token(lua_State* L, struct parser* P, struct token* tok) +{ + if (!next_token(L, P, tok)) { + luaL_error(L, "unexpected end"); + } +} + +static void check_token(lua_State* L, struct parser* P, int type, const char* str, const char* err, ...) +{ + struct token tok; + if (!next_token(L, P, &tok) || tok.type != type || (tok.type == TOK_TOKEN && (tok.size != strlen(str) || memcmp(tok.str, str, tok.size) != 0))) { + va_list ap; + va_start(ap, err); + lua_pushvfstring(L, err, ap); + lua_error(L); + } +} + +static void put_back(struct parser* P) +{ P->next = P->prev; } + + +int64_t calculate_constant(lua_State* L, struct parser* P); + +static int g_name_key; +static int g_front_name_key; +static int g_back_name_key; + +#ifndef max +#define max(a,b) ((a) < (b) ? (b) : (a)) +#endif + +#ifndef min +#define min(a,b) ((a) < (b) ? (a) : (b)) +#endif + +enum test {TEST}; + +/* Parses an enum definition from after the open curly through to the close + * curly. Expects the user table to be on the top of the stack + */ +static int parse_enum(lua_State* L, struct parser* P, struct ctype* type) +{ + struct token tok; + int value = -1; + int ct_usr = lua_gettop(L); + + for (;;) { + require_token(L, P, &tok); + + assert(lua_gettop(L) == ct_usr); + + if (tok.type == TOK_CLOSE_CURLY) { + break; + } else if (tok.type != TOK_TOKEN) { + return luaL_error(L, "unexpected token in enum at line %d", P->line); + } + + lua_pushlstring(L, tok.str, tok.size); + + require_token(L, P, &tok); + + if (tok.type == TOK_COMMA || tok.type == TOK_CLOSE_CURLY) { + /* we have an auto calculated enum value */ + value++; + } else if (tok.type == TOK_ASSIGN) { + /* we have an explicit enum value */ + value = (int) calculate_constant(L, P); + require_token(L, P, &tok); + } else { + return luaL_error(L, "unexpected token in enum at line %d", P->line); + } + + assert(lua_gettop(L) == ct_usr + 1); + + /* add the enum value to the constants table */ + push_upval(L, &constants_key); + lua_pushvalue(L, -2); + lua_pushnumber(L, value); + lua_rawset(L, -3); + lua_pop(L, 1); + + assert(lua_gettop(L) == ct_usr + 1); + + /* add the enum value to the enum usr value table */ + lua_pushnumber(L, value); + lua_rawset(L, ct_usr); + + if (tok.type == TOK_CLOSE_CURLY) { + break; + } else if (tok.type != TOK_COMMA) { + return luaL_error(L, "unexpected token in enum at line %d", P->line); + } + } + + type->base_size = sizeof(enum test); + type->align_mask = sizeof(enum test) - 1; + + assert(lua_gettop(L) == ct_usr); + return 0; +} + +static void calculate_member_position(lua_State* L, struct parser* P, struct ctype* ct, struct ctype* mt, int* pbit_offset, int* pbitfield_type) +{ + int bit_offset = *pbit_offset; + + if (ct->type == UNION_TYPE) { + size_t msize; + + if (mt->is_variable_struct || mt->is_variable_array) { + luaL_error(L, "NYI: variable sized members in unions"); + return; + + } else if (mt->is_bitfield) { + msize = (mt->align_mask + 1); +#ifdef _WIN32 + /* MSVC has a bug where it doesn't update the alignment of + * a union for bitfield members. */ + mt->align_mask = 0; +#endif + + } else if (mt->is_array) { + msize = mt->array_size * (mt->pointers > 1 ? sizeof(void*) : mt->base_size); + + } else { + msize = mt->pointers ? sizeof(void*) : mt->base_size; + } + + ct->base_size = max(ct->base_size, msize); + + } else if (mt->is_bitfield) { + if (mt->has_member_name && mt->bit_size == 0) { + luaL_error(L, "zero length bitfields must be unnamed on line %d", P->line); + } + +#if defined _WIN32 + /* MSVC uses a seperate storage unit for each size. This is aligned + * before the first bitfield. :0 finishes up the storage unit using + * the greater alignment of the storage unit or the type used with the + * :0. This is equivalent to the :0 always creating a new storage + * unit, but not necesserily using it yet. + */ + + if (*pbitfield_type == -1 && mt->bit_size == 0) { + /* :0 not after a bitfield are ignored */ + return; + } + + { + int different_storage = mt->align_mask != *pbitfield_type; + int no_room_left = bit_offset + mt->bit_size > (mt->align_mask + 1) * CHAR_BIT; + + if (different_storage || no_room_left || !mt->bit_size) { + ct->base_size += (bit_offset + CHAR_BIT - 1) / CHAR_BIT; + bit_offset = 0; + if (*pbitfield_type >= 0) { + ct->base_size = ALIGN_UP(ct->base_size, *pbitfield_type); + } + ct->base_size = ALIGN_UP(ct->base_size, mt->align_mask); + } + } + + mt->bit_offset = bit_offset; + mt->offset = ct->base_size; + + *pbitfield_type = mt->align_mask; + bit_offset += mt->bit_size; + +#elif defined OS_OSX + /* OSX doesn't use containers and bitfields are not aligned. So + * bitfields never add any padding, except for :0 which still forces + * an alignment based off the type used with the :0 */ + if (mt->bit_size) { + mt->offset = ct->base_size; + mt->bit_offset = bit_offset; + bit_offset += mt->bit_size; + ct->base_size += bit_offset / CHAR_BIT; + bit_offset = bit_offset % CHAR_BIT; + } else { + ct->base_size += (bit_offset + CHAR_BIT - 1) / CHAR_BIT; + ct->base_size = ALIGN_UP(ct->base_size, mt->align_mask); + bit_offset = 0; + } + + if (!mt->has_member_name) { + /* unnamed bitfields don't update the struct alignment */ + mt->align_mask = 0; + } + +#elif defined __GNUC__ || defined __TINYC__ //< @r-lyeh: tcc case + /* GCC tries to pack bitfields in as close as much as possible, but + * still making sure that they don't cross alignment boundaries. + * :0 forces an alignment based off the type used with the :0 + */ + + int bits_used = (ct->base_size - ALIGN_DOWN(ct->base_size, mt->align_mask)) * CHAR_BIT + bit_offset; + int need_to_realign = bits_used + mt->bit_size > mt->base_size * CHAR_BIT; + + if (!mt->is_packed && (!mt->bit_size || need_to_realign)) { + ct->base_size += (bit_offset + CHAR_BIT - 1) / CHAR_BIT; + ct->base_size = ALIGN_UP(ct->base_size, mt->align_mask); + bit_offset = 0; + } + + mt->bit_offset = bit_offset; + mt->offset = ct->base_size; + + bit_offset += mt->bit_size; + ct->base_size += bit_offset / CHAR_BIT; + bit_offset = bit_offset % CHAR_BIT; + + /* unnamed bitfields don't update the struct alignment */ + if (!mt->has_member_name) { + mt->align_mask = 0; + } +#else +#error +#endif + + } else { + /* finish up the current bitfield storage unit */ + ct->base_size += (bit_offset + CHAR_BIT - 1) / CHAR_BIT; + bit_offset = 0; + + if (*pbitfield_type >= 0) { + ct->base_size = ALIGN_UP(ct->base_size, *pbitfield_type); + } + + *pbitfield_type = -1; + + ct->base_size = ALIGN_UP(ct->base_size, mt->align_mask); + mt->offset = ct->base_size; + + if (mt->is_variable_array) { + ct->is_variable_struct = 1; + ct->variable_increment = mt->pointers > 1 ? sizeof(void*) : mt->base_size; + + } else if (mt->is_variable_struct) { + assert(!mt->variable_size_known && !mt->is_array && !mt->pointers); + ct->base_size += mt->base_size; + ct->is_variable_struct = 1; + ct->variable_increment = mt->variable_increment; + + } else if (mt->is_array) { + ct->base_size += mt->array_size * (mt->pointers > 1 ? sizeof(void*) : mt->base_size); + + } else { + ct->base_size += mt->pointers ? sizeof(void*) : mt->base_size; + } + } + + /* increase the outer struct/union alignment if needed */ + if (mt->align_mask > (int) ct->align_mask) { + ct->align_mask = mt->align_mask; + } + + if (mt->has_bitfield || mt->is_bitfield) { + ct->has_bitfield = 1; + } + + *pbit_offset = bit_offset; +} + +static int copy_submembers(lua_State* L, int to_usr, int from_usr, const struct ctype* ft, int* midx) +{ + struct ctype ct; + int i, sublen; + + from_usr = lua_absindex(L, from_usr); + to_usr = lua_absindex(L, to_usr); + + /* integer keys */ + sublen = (int) lua_rawlen(L, from_usr); + for (i = 1; i <= sublen; i++) { + lua_rawgeti(L, from_usr, i); + + ct = *(const struct ctype*) lua_touserdata(L, -1); + ct.offset += ft->offset; + lua_getuservalue(L, -1); + + push_ctype(L, -1, &ct); + lua_rawseti(L, to_usr, (*midx)++); + + lua_pop(L, 2); /* ctype, user value */ + } + + /* string keys */ + lua_pushnil(L); + while (lua_next(L, from_usr)) { + if (lua_type(L, -2) == LUA_TSTRING) { + struct ctype ct = *(const struct ctype*) lua_touserdata(L, -1); + ct.offset += ft->offset; + lua_getuservalue(L, -1); + + /* uservalue[sub_mname] = new_sub_mtype */ + lua_pushvalue(L, -3); + push_ctype(L, -2, &ct); + lua_rawset(L, to_usr); + + lua_pop(L, 1); /* remove submember user value */ + } + lua_pop(L, 1); + } + + return 0; +} + +static int add_member(lua_State* L, int ct_usr, int mname, int mbr_usr, const struct ctype* mt, int* midx) +{ + ct_usr = lua_absindex(L, ct_usr); + mname = lua_absindex(L, mname); + + push_ctype(L, mbr_usr, mt); + + /* usrvalue[mbr index] = pushed mtype */ + lua_pushvalue(L, -1); + lua_rawseti(L, ct_usr, (*midx)++); + + /* set usrvalue[mname] = pushed mtype */ + lua_pushvalue(L, mname); + lua_pushvalue(L, -2); + lua_rawset(L, ct_usr); + + /* set usrvalue[mtype] = mname */ + lua_pushvalue(L, -1); + lua_pushvalue(L, mname); + lua_rawset(L, ct_usr); + + lua_pop(L, 1); + + return 0; +} + +/* Parses a struct from after the open curly through to the close curly. + */ +static int parse_struct(lua_State* L, struct parser* P, int tmp_usr, const struct ctype* ct) +{ + struct token tok; + int midx = 1; + int top = lua_gettop(L); + + tmp_usr = lua_absindex(L, tmp_usr); + + /* parse members */ + for (;;) { + struct ctype mbase; + + assert(lua_gettop(L) == top); + + /* see if we're at the end of the struct */ + require_token(L, P, &tok); + if (tok.type == TOK_CLOSE_CURLY) { + break; + } else if (ct->is_variable_struct) { + return luaL_error(L, "can't have members after a variable sized member on line %d", P->line); + } else { + put_back(P); + } + + /* members are of the form + * , , ; + * eg struct foo bar, *bar2[2]; + * mbase is 'struct foo' + * mtype is '' then '*[2]' + * mname is 'bar' then 'bar2' + */ + + parse_type(L, P, &mbase); + + for (;;) { + struct token mname; + struct ctype mt = mbase; + + memset(&mname, 0, sizeof(mname)); + + if (ct->is_variable_struct) { + return luaL_error(L, "can't have members after a variable sized member on line %d", P->line); + } + + assert(lua_gettop(L) == top + 1); + parse_argument(L, P, -1, &mt, &mname, NULL); + assert(lua_gettop(L) == top + 2); + + if (!mt.is_defined && (mt.pointers - mt.is_array) == 0) { + return luaL_error(L, "member type is undefined on line %d", P->line); + } + + if (mt.type == VOID_TYPE && (mt.pointers - mt.is_array) == 0) { + return luaL_error(L, "member type can not be void on line %d", P->line); + } + + mt.has_member_name = (mname.size > 0); + lua_pushlstring(L, mname.str, mname.size); + + add_member(L, tmp_usr, -1, -2, &mt, &midx); + + /* pop the usr value from push_argument and the member name */ + lua_pop(L, 2); + assert(lua_gettop(L) == top + 1); + + require_token(L, P, &tok); + if (tok.type == TOK_SEMICOLON) { + break; + } else if (tok.type != TOK_COMMA) { + luaL_error(L, "unexpected token in struct definition on line %d", P->line); + } + } + + /* pop the usr value from push_type */ + lua_pop(L, 1); + } + + assert(lua_gettop(L) == top); + return 0; +} + +static int calculate_struct_offsets(lua_State* L, struct parser* P, int ct_usr, struct ctype* ct, int tmp_usr) +{ + int i; + int midx = 1; + int sz = (int) lua_rawlen(L, tmp_usr); + int bit_offset = 0; + int bitfield_type = -1; + + ct_usr = lua_absindex(L, ct_usr); + tmp_usr = lua_absindex(L, tmp_usr); + + for (i = 1; i <= sz; i++) { + struct ctype mt; + + /* get the member type */ + lua_rawgeti(L, tmp_usr, i); + mt = *(const struct ctype*) lua_touserdata(L, -1); + + /* get the member user table */ + lua_getuservalue(L, -1); + + /* get the member name */ + lua_pushvalue(L, -2); + lua_rawget(L, tmp_usr); + + calculate_member_position(L, P, ct, &mt, &bit_offset, &bitfield_type); + + if (mt.has_member_name) { + assert(!lua_isnil(L, -1)); + add_member(L, ct_usr, -1, -2, &mt, &midx); + + } else if (mt.type == STRUCT_TYPE || mt.type == UNION_TYPE) { + /* With an unnamed member we copy all of the submembers into our + * usr value adjusting the offset as necessary. Note ctypes are + * immutable so need to push a new ctype to update the offset. + */ + copy_submembers(L, ct_usr, -2, &mt, &midx); + + } else { + /* We ignore unnamed members that aren't structs or unions. These + * are there just to change the padding */ + } + + lua_pop(L, 3); + } + + /* finish up the current bitfield storage unit */ + ct->base_size += (bit_offset + CHAR_BIT - 1) / CHAR_BIT; + + /* only void is allowed 0 size */ + if (ct->base_size == 0) { + ct->base_size = 1; + } + + ct->base_size = ALIGN_UP(ct->base_size, ct->align_mask); + return 0; +} + +/* copy over attributes that could be specified before the typedef eg + * __attribute__(packed) const type_t */ +static void instantiate_typedef(struct parser* P, struct ctype* tt, const struct ctype* ft) +{ + struct ctype pt = *tt; + *tt = *ft; + + tt->const_mask |= pt.const_mask; + tt->is_packed = pt.is_packed; + + if (tt->is_packed) { + tt->align_mask = 0; + } else { + /* Instantiate the typedef in the current packing. This may be + * further updated if a pointer is added or another alignment + * attribute is applied. If pt.align_mask is already non-zero than an + * increased alignment via __declspec(aligned(#)) has been set. */ + tt->align_mask = max(min(P->align_mask, tt->align_mask), pt.align_mask); + } +} + +/* this parses a struct or union starting with the optional + * name before the opening brace + * leaves the type usr value on the stack + */ +static int parse_record(lua_State* L, struct parser* P, struct ctype* ct) +{ + struct token tok; + int top = lua_gettop(L); + + require_token(L, P, &tok); + + /* name is optional */ + if (tok.type == TOK_TOKEN) { + /* declaration */ + lua_pushlstring(L, tok.str, tok.size); + + assert(lua_gettop(L) == top+1); + + /* lookup the name to see if we've seen this type before */ + push_upval(L, &types_key); + lua_pushvalue(L, -2); + lua_rawget(L, top+2); + + assert(lua_gettop(L) == top+3); + + if (lua_isnil(L, -1)) { + lua_pop(L, 1); /* pop the nil usr value */ + lua_newtable(L); /* the new usr table */ + + /* stack layout is: + * top+1: record name + * top+2: types table + * top+3: new usr table + */ + + lua_pushlightuserdata(L, &g_name_key); + lua_pushvalue(L, top+1); + lua_rawset(L, top+3); /* usr[name_key] = name */ + + lua_pushvalue(L, top+1); + push_ctype(L, top+3, ct); + lua_rawset(L, top+2); /* types[name] = new_ctype */ + + } else { + /* get the exsting declared type */ + const struct ctype* prevt = (const struct ctype*) lua_touserdata(L, top+3); + + if (prevt->type != ct->type) { + lua_getuservalue(L, top+3); + push_type_name(L, -1, ct); + push_type_name(L, top+3, prevt); + luaL_error(L, "type '%s' previously declared as '%s'", lua_tostring(L, -2), lua_tostring(L, -1)); + } + + instantiate_typedef(P, ct, prevt); + + /* replace the ctype with its usr value */ + lua_getuservalue(L, -1); + lua_replace(L, -2); + } + + /* remove the extra name and types table */ + lua_replace(L, -3); + lua_pop(L, 1); + + assert(lua_gettop(L) == top + 1 && lua_istable(L, -1)); + + /* if a name is given then we may be at the end of the string + * eg for ffi.new('struct foo') + */ + if (!next_token(L, P, &tok)) { + return 0; + } + + } else { + /* create a new unnamed record */ + int num; + + /* get the next unnamed number */ + push_upval(L, &next_unnamed_key); + num = lua_tointeger(L, -1); + lua_pop(L, 1); + + /* increment the unnamed upval */ + lua_pushinteger(L, num + 1); + set_upval(L, &next_unnamed_key); + + lua_newtable(L); /* the new usr table - leave on stack */ + + /* usr[name_key] = num */ + lua_pushlightuserdata(L, &g_name_key); + lua_pushfstring(L, "%d", num); + lua_rawset(L, -3); + } + + if (tok.type != TOK_OPEN_CURLY) { + /* this may just be a declaration or use of the type as an argument or + * member */ + put_back(P); + return 0; + } + + if (ct->is_defined) { + return luaL_error(L, "redefinition in line %d", P->line); + } + + assert(lua_gettop(L) == top + 1 && lua_istable(L, -1)); + + if (ct->type == ENUM_TYPE) { + parse_enum(L, P, ct); + } else { + /* we do a two stage parse, where we parse the content first and build up + * the temp user table. We then iterate over that to calculate the offsets + * and fill out ct_usr. This is so we can handle out of order members + * (eg vtable) and attributes specified at the end of the struct. + */ + lua_newtable(L); + parse_struct(L, P, -1, ct); + calculate_struct_offsets(L, P, -2, ct, -1); + assert(lua_gettop(L) == top + 2 && lua_istable(L, -1)); + lua_pop(L, 1); + } + + assert(lua_gettop(L) == top + 1 && lua_istable(L, -1)); + set_defined(L, -1, ct); + assert(lua_gettop(L) == top + 1); + return 0; +} + +/* parses single or multi work built in types, and pushes it onto the stack */ +static int parse_type_name(lua_State* L, struct parser* P) +{ + struct token tok; + int flags = 0; + + enum { + UNSIGNED = 0x01, + SIGNED = 0x02, + LONG = 0x04, + SHORT = 0x08, + INT = 0x10, + CHAR = 0x20, + LONG_LONG = 0x40, + INT8 = 0x80, + INT16 = 0x100, + INT32 = 0x200, + INT64 = 0x400, + DOUBLE = 0x800, + FLOAT = 0x1000, + COMPLEX = 0x2000, + }; + + require_token(L, P, &tok); + + /* we have to manually decode the builtin types since they can take up + * more then one token + */ + for (;;) { + if (tok.type != TOK_TOKEN) { + break; + } else if (IS_LITERAL(tok, "unsigned")) { + flags |= UNSIGNED; + } else if (IS_LITERAL(tok, "signed")) { + flags |= SIGNED; + } else if (IS_LITERAL(tok, "short")) { + flags |= SHORT; + } else if (IS_LITERAL(tok, "char")) { + flags |= CHAR; + } else if (IS_LITERAL(tok, "long")) { + flags |= (flags & LONG) ? LONG_LONG : LONG; + } else if (IS_LITERAL(tok, "int")) { + flags |= INT; + } else if (IS_LITERAL(tok, "__int8")) { + flags |= INT8; + } else if (IS_LITERAL(tok, "__int16")) { + flags |= INT16; + } else if (IS_LITERAL(tok, "__int32")) { + flags |= INT32; + } else if (IS_LITERAL(tok, "__int64")) { + flags |= INT64; + } else if (IS_LITERAL(tok, "double")) { + flags |= DOUBLE; + } else if (IS_LITERAL(tok, "float")) { + flags |= FLOAT; + } else if (IS_LITERAL(tok, "complex") || IS_LITERAL(tok, "_Complex")) { + flags |= COMPLEX; + } else if (IS_LITERAL(tok, "register")) { + /* ignore */ + } else { + break; + } + + if (!next_token(L, P, &tok)) { + break; + } + } + + if (flags) { + put_back(P); + } + + if (flags & CHAR) { + if (flags & SIGNED) { + lua_pushliteral(L, "int8_t"); + } else if (flags & UNSIGNED) { + lua_pushliteral(L, "uint8_t"); + } else { + lua_pushstring(L, (((char) -1) > 0) ? "uint8_t" : "int8_t"); + } + + } else if (flags & INT8) { + lua_pushstring(L, (flags & UNSIGNED) ? "uint8_t" : "int8_t"); + } else if (flags & INT16) { + lua_pushstring(L, (flags & UNSIGNED) ? "uint16_t" : "int16_t"); + } else if (flags & INT32) { + lua_pushstring(L, (flags & UNSIGNED) ? "uint32_t" : "int32_t"); + } else if (flags & (INT64 | LONG_LONG)) { + lua_pushstring(L, (flags & UNSIGNED) ? "uint64_t" : "int64_t"); + + } else if (flags & COMPLEX) { + if (flags & LONG) { + lua_pushliteral(L, "complex long double"); + } else if (flags & FLOAT) { + lua_pushliteral(L, "complex float"); + } else { + lua_pushliteral(L, "complex double"); + } + + } else if (flags & DOUBLE) { + if (flags & LONG) { + lua_pushliteral(L, "long double"); + } else { + lua_pushliteral(L, "double"); + } + + } else if (flags & FLOAT) { + lua_pushliteral(L, "float"); + + } else if (flags & SHORT) { +#define SHORT_TYPE(u) (sizeof(short) == sizeof(int64_t) ? u "int64_t" : sizeof(short) == sizeof(int32_t) ? u "int32_t" : u "int16_t") + if (flags & UNSIGNED) { + lua_pushstring(L, SHORT_TYPE("u")); + } else { + lua_pushstring(L, SHORT_TYPE("")); + } +#undef SHORT_TYPE + + } else if (flags & LONG) { +#define LONG_TYPE(u) (sizeof(long) == sizeof(int64_t) ? u "int64_t" : u "int32_t") + if (flags & UNSIGNED) { + lua_pushstring(L, LONG_TYPE("u")); + } else { + lua_pushstring(L, LONG_TYPE("")); + } +#undef LONG_TYPE + + } else if (flags) { +#define INT_TYPE(u) (sizeof(int) == sizeof(int64_t) ? u "int64_t" : sizeof(int) == sizeof(int32_t) ? u "int32_t" : u "int16_t") + if (flags & UNSIGNED) { + lua_pushstring(L, INT_TYPE("u")); + } else { + lua_pushstring(L, INT_TYPE("")); + } +#undef INT_TYPE + + } else { + lua_pushlstring(L, tok.str, tok.size); + } + + return 0; +} + +/* parse_attribute parses a token to see if it is an attribute. It may then + * parse some following tokens to decode the attribute setting the appropriate + * fields in ct. It will return 1 if the token was used (and possibly some + * more following it) or 0 if not. If the token was used, the next token must + * be retrieved using next_token/require_token. + */ +static int parse_attribute(lua_State* L, struct parser* P, struct token* tok, struct ctype* ct, struct parser* asmname) +{ + if (tok->type != TOK_TOKEN) { + return 0; + + } else if (asmname && (IS_LITERAL(*tok, "__asm__") || IS_LITERAL(*tok, "__asm"))) { + check_token(L, P, TOK_OPEN_PAREN, NULL, "unexpected token after __asm__ on line %d", P->line); + *asmname = *P; + + require_token(L, P, tok); + while (tok->type == TOK_STRING) { + require_token(L, P, tok); + } + + if (tok->type != TOK_CLOSE_PAREN) { + luaL_error(L, "unexpected token after __asm__ on line %d", P->line); + } + return 1; + + } else if (IS_LITERAL(*tok, "__attribute__") || IS_LITERAL(*tok, "__declspec")) { + int parens = 1; + check_token(L, P, TOK_OPEN_PAREN, NULL, "expected parenthesis after __attribute__ or __declspec on line %d", P->line); + + for (;;) { + require_token(L, P, tok); + if (tok->type == TOK_OPEN_PAREN) { + parens++; + } else if (tok->type == TOK_CLOSE_PAREN) { + if (--parens == 0) { + break; + } + + } else if (tok->type != TOK_TOKEN) { + /* ignore unknown symbols within parentheses */ + + } else if (IS_LITERAL(*tok, "align") || IS_LITERAL(*tok, "aligned") || IS_LITERAL(*tok, "__aligned__")) { + unsigned align = 0; + require_token(L, P, tok); + + switch (tok->type) { + case TOK_CLOSE_PAREN: + align = ALIGNED_DEFAULT; + put_back(P); + break; + + case TOK_OPEN_PAREN: + require_token(L, P, tok); + + if (tok->type != TOK_NUMBER) { + luaL_error(L, "expected align(#) on line %d", P->line); + } + + switch (tok->integer) { + case 1: align = 0; break; + case 2: align = 1; break; + case 4: align = 3; break; + case 8: align = 7; break; + case 16: align = 15; break; + default: + luaL_error(L, "unsupported align size on line %d", P->line); + } + + check_token(L, P, TOK_CLOSE_PAREN, NULL, "expected align(#) on line %d", P->line); + break; + + default: + luaL_error(L, "expected align(#) on line %d", P->line); + } + + /* __attribute__(aligned(#)) is only supposed to increase alignment */ + ct->align_mask = max(align, ct->align_mask); + + } else if (IS_LITERAL(*tok, "packed") || IS_LITERAL(*tok, "__packed__")) { + ct->align_mask = 0; + ct->is_packed = 1; + + } else if (IS_LITERAL(*tok, "mode") || IS_LITERAL(*tok, "__mode__")) { + + check_token(L, P, TOK_OPEN_PAREN, NULL, "expected mode(MODE) on line %d", P->line); + + require_token(L, P, tok); + if (tok->type != TOK_TOKEN) { + luaL_error(L, "expected mode(MODE) on line %d", P->line); + } + + if (ct->type == FLOAT_TYPE || ct->type == DOUBLE_TYPE) { + struct {char ch; float v;} af; + struct {char ch; double v;} ad; + + if (IS_LITERAL(*tok, "SF") || IS_LITERAL(*tok, "__SF__")) { + ct->type = FLOAT_TYPE; + ct->base_size = sizeof(float); + ct->align_mask = ALIGNOF(af); + + } else if (IS_LITERAL(*tok, "DF") || IS_LITERAL(*tok, "__DF__")) { + ct->type = DOUBLE_TYPE; + ct->base_size = sizeof(double); + ct->align_mask = ALIGNOF(ad); + + } else { + luaL_error(L, "unexpected mode on line %d", P->line); + } + + } else { + struct {char ch; uint16_t v;} a16; + struct {char ch; uint32_t v;} a32; + struct {char ch; uint64_t v;} a64; + + if (IS_LITERAL(*tok, "QI") || IS_LITERAL(*tok, "__QI__") + || IS_LITERAL(*tok, "byte") || IS_LITERAL(*tok, "__byte__") + ) { + ct->type = INT8_TYPE; + ct->base_size = sizeof(uint8_t); + ct->align_mask = 0; + + } else if (IS_LITERAL(*tok, "HI") || IS_LITERAL(*tok, "__HI__")) { + ct->type = INT16_TYPE; + ct->base_size = sizeof(uint16_t); + ct->align_mask = ALIGNOF(a16); + + } else if (IS_LITERAL(*tok, "SI") || IS_LITERAL(*tok, "__SI__") +#if defined ARCH_X86 || defined ARCH_ARM + || IS_LITERAL(*tok, "word") || IS_LITERAL(*tok, "__word__") + || IS_LITERAL(*tok, "pointer") || IS_LITERAL(*tok, "__pointer__") +#endif + ) { + ct->type = INT32_TYPE; + ct->base_size = sizeof(uint32_t); + ct->align_mask = ALIGNOF(a32); + + } else if (IS_LITERAL(*tok, "DI") || IS_LITERAL(*tok, "__DI__") +#if defined ARCH_X64 + || IS_LITERAL(*tok, "word") || IS_LITERAL(*tok, "__word__") + || IS_LITERAL(*tok, "pointer") || IS_LITERAL(*tok, "__pointer__") +#endif + ) { + ct->type = INT64_TYPE; + ct->base_size = sizeof(uint64_t); + ct->align_mask = ALIGNOF(a64); + + } else { + luaL_error(L, "unexpected mode on line %d", P->line); + } + } + + check_token(L, P, TOK_CLOSE_PAREN, NULL, "expected mode(MODE) on line %d", P->line); + + } else if (IS_LITERAL(*tok, "cdecl") || IS_LITERAL(*tok, "__cdecl__")) { + ct->calling_convention = C_CALL; + + } else if (IS_LITERAL(*tok, "fastcall") || IS_LITERAL(*tok, "__fastcall__")) { + ct->calling_convention = FAST_CALL; + + } else if (IS_LITERAL(*tok, "stdcall") || IS_LITERAL(*tok, "__stdcall__")) { + ct->calling_convention = STD_CALL; + } + /* ignore unknown tokens within parentheses */ + } + return 1; + + } else if (IS_LITERAL(*tok, "__cdecl")) { + ct->calling_convention = C_CALL; + return 1; + + } else if (IS_LITERAL(*tok, "__fastcall")) { + ct->calling_convention = FAST_CALL; + return 1; + + } else if (IS_LITERAL(*tok, "__stdcall")) { + ct->calling_convention = STD_CALL; + return 1; + + } else if (IS_LITERAL(*tok, "__extension__") || IS_LITERAL(*tok, "extern")) { + /* ignore */ + return 1; + + } else { + return 0; + } +} + +/* parses out the base type of a type expression in a function declaration, + * struct definition, typedef etc + * + * leaves the usr value of the type on the stack + */ +int parse_type(lua_State* L, struct parser* P, struct ctype* ct) +{ + struct token tok; + int top = lua_gettop(L); + + memset(ct, 0, sizeof(*ct)); + + require_token(L, P, &tok); + + /* get function attributes before the return type */ + while (parse_attribute(L, P, &tok, ct, NULL)) { + require_token(L, P, &tok); + } + + /* get const/volatile before the base type */ + for (;;) { + if (tok.type != TOK_TOKEN) { + return luaL_error(L, "unexpected value before type name on line %d", P->line); + + } else if (IS_CONST(tok)) { + ct->const_mask = 1; + require_token(L, P, &tok); + + } else if (IS_VOLATILE(tok) || IS_RESTRICT(tok)) { + /* ignored for now */ + require_token(L, P, &tok); + + } else { + break; + } + } + + /* get base type */ + if (tok.type != TOK_TOKEN) { + return luaL_error(L, "unexpected value before type name on line %d", P->line); + + } else if (IS_LITERAL(tok, "struct")) { + ct->type = STRUCT_TYPE; + parse_record(L, P, ct); + + } else if (IS_LITERAL(tok, "union")) { + ct->type = UNION_TYPE; + parse_record(L, P, ct); + + } else if (IS_LITERAL(tok, "enum")) { + ct->type = ENUM_TYPE; + parse_record(L, P, ct); + + } else { + put_back(P); + + /* lookup type */ + push_upval(L, &types_key); + parse_type_name(L, P); + lua_rawget(L, -2); + lua_remove(L, -2); + + if (lua_isnil(L, -1)) { + lua_pushlstring(L, tok.str, tok.size); + return luaL_error(L, "unknown type %s on line %d", lua_tostring(L, -1), P->line); + } + + instantiate_typedef(P, ct, (const struct ctype*) lua_touserdata(L, -1)); + + /* we only want the usr tbl from the ctype in the types tbl */ + lua_getuservalue(L, -1); + lua_replace(L, -2); + } + + while (next_token(L, P, &tok)) { + if (tok.type != TOK_TOKEN) { + put_back(P); + break; + + } else if (IS_CONST(tok) || IS_VOLATILE(tok)) { + /* ignore for now */ + + } else { + put_back(P); + break; + } + } + + assert(lua_gettop(L) == top + 1 && (lua_istable(L, -1) || lua_isnil(L, -1))); + return 0; +} + +enum name_type { + BOTH, + FRONT, + BACK, +}; + +static void append_type_name(luaL_Buffer* B, int usr, const struct ctype* ct, enum name_type type) +{ + size_t i; + lua_State* L = B->L; + + usr = lua_absindex(L, usr); + + if (type == FRONT || type == BOTH) { + if (ct->type != FUNCTION_PTR_TYPE && (ct->const_mask & (1 << ct->pointers))) { + luaL_addstring(B, "const "); + } + + if (ct->is_unsigned) { + luaL_addstring(B, "unsigned "); + } + + switch (ct->type) { + case ENUM_TYPE: + luaL_addstring(B, "enum "); + goto get_name; + + case STRUCT_TYPE: + luaL_addstring(B, "struct "); + goto get_name; + + case UNION_TYPE: + luaL_addstring(B, "union "); + goto get_name; + + get_name: + lua_pushlightuserdata(L, &g_name_key); + lua_rawget(L, usr); + luaL_addvalue(B); + break; + + case FUNCTION_TYPE: + case FUNCTION_PTR_TYPE: + lua_pushlightuserdata(L, &g_front_name_key); + lua_rawget(L, usr); + luaL_addvalue(B); + break; + + case VOID_TYPE: + luaL_addstring(B, "void"); + break; + case BOOL_TYPE: + luaL_addstring(B, "bool"); + break; + case DOUBLE_TYPE: + luaL_addstring(B, "double"); + break; + case LONG_DOUBLE_TYPE: + luaL_addstring(B, "long double"); + break; + case FLOAT_TYPE: + luaL_addstring(B, "float"); + break; + case COMPLEX_LONG_DOUBLE_TYPE: + luaL_addstring(B, "long complex double"); + break; + case COMPLEX_DOUBLE_TYPE: + luaL_addstring(B, "complex double"); + break; + case COMPLEX_FLOAT_TYPE: + luaL_addstring(B, "complex float"); + break; + case INT8_TYPE: + luaL_addstring(B, "char"); + break; + case INT16_TYPE: + luaL_addstring(B, "short"); + break; + case INT32_TYPE: + luaL_addstring(B, "int"); + break; + case INT64_TYPE: + luaL_addstring(B, "long long"); + break; + + case INTPTR_TYPE: + if (sizeof(intptr_t) == sizeof(int32_t)) { + luaL_addstring(B, "long"); + } else if (sizeof(intptr_t) == sizeof(int64_t)) { + luaL_addstring(B, "long long"); + } else { + luaL_error(L, "internal error - bad type"); + } + break; + + default: + luaL_error(L, "internal error - bad type %d", ct->type); + } + + for (i = 0; i < ct->pointers - ct->is_array; i++) { + luaL_addchar(B, '*'); + if (ct->const_mask & (1 << (ct->pointers - i - 1))) { + luaL_addstring(B, " const"); + } + } + } + + if (type == BOTH || type == BACK) { + if (ct->is_reference) { + luaL_addstring(B, "(&)"); + } + + if (ct->is_variable_array && !ct->variable_size_known) { + luaL_addstring(B, "[?]"); + } else if (ct->is_array) { + lua_pushfstring(L, "[%d]", (int) ct->array_size); + luaL_addvalue(B); + } + + if (ct->type == FUNCTION_PTR_TYPE || ct->type == FUNCTION_TYPE) { + lua_pushlightuserdata(L, &g_back_name_key); + lua_rawget(L, usr); + luaL_addvalue(B); + } + + if (ct->is_bitfield) { + lua_pushfstring(L, " : %d", (int) ct->bit_size); + luaL_addvalue(B); + } + } +} + +void push_type_name(lua_State* L, int usr, const struct ctype* ct) +{ + luaL_Buffer B; + usr = lua_absindex(L, usr); + luaL_buffinit(L, &B); + append_type_name(&B, usr, ct, BOTH); + luaL_pushresult(&B); +} + +static void push_function_type_strings(lua_State* L, int usr, const struct ctype* ct) +{ + size_t i, args; + luaL_Buffer B; + int top = lua_gettop(L); + const struct ctype* ret_ct; + + int arg_ct = top+3; + int arg_usr = top+4; + int ret_usr = top+6; + + usr = lua_absindex(L, usr); + + /* return type */ + lua_settop(L, top+4); /* room for two returns and two temp positions */ + lua_rawgeti(L, usr, 0); + lua_getuservalue(L, -1); + ret_ct = (const struct ctype*) lua_touserdata(L, -2); + + luaL_buffinit(L, &B); + append_type_name(&B, ret_usr, ret_ct, FRONT); + + if (ret_ct->type != FUNCTION_TYPE && ret_ct->type != FUNCTION_PTR_TYPE) { + luaL_addchar(&B, ' '); + } + + switch (ct->calling_convention) { + case STD_CALL: + luaL_addstring(&B, "(__stdcall *"); + break; + case FAST_CALL: + luaL_addstring(&B, "(__fastcall *"); + break; + case C_CALL: + luaL_addstring(&B, "(*"); + break; + default: + luaL_error(L, "internal error - unknown calling convention"); + } + + luaL_pushresult(&B); + lua_replace(L, top+1); + + luaL_buffinit(L, &B); + luaL_addstring(&B, ")("); + + /* arguments */ + args = lua_rawlen(L, usr); + for (i = 1; i <= args; i++) { + if (i > 1) { + luaL_addstring(&B, ", "); + } + + /* note push the arg and user value below the indexes used by the buffer + * and use indexes relative to top to avoid problems due to the buffer + * system pushing a variable number of arguments onto the stack */ + lua_rawgeti(L, usr, (int) i); + lua_replace(L, arg_ct); + lua_getuservalue(L, arg_ct); + lua_replace(L, arg_usr); + append_type_name(&B, arg_usr, (const struct ctype*) lua_touserdata(L, arg_ct), BOTH); + } + + luaL_addstring(&B, ")"); + append_type_name(&B, ret_usr, ret_ct, BACK); + luaL_pushresult(&B); + lua_replace(L, top+2); + + lua_settop(L, top+2); + assert(lua_isstring(L, top+1) && lua_isstring(L, top+2)); +} + +/* parses from after the opening paranthesis to after the closing parenthesis */ +static void parse_function_arguments(lua_State* L, struct parser* P, int ct_usr, struct ctype* ct) +{ + struct token tok; + int args = 0; + int top = lua_gettop(L); + + ct_usr = lua_absindex(L, ct_usr); + + for (;;) { + require_token(L, P, &tok); + + if (tok.type == TOK_CLOSE_PAREN) { + break; + } + + if (args) { + if (tok.type != TOK_COMMA) { + luaL_error(L, "unexpected token in function argument %d on line %d", args, P->line); + } + + require_token(L, P, &tok); + } + + if (tok.type == TOK_VA_ARG) { + ct->has_var_arg = true; + check_token(L, P, TOK_CLOSE_PAREN, "", "unexpected token after ... in function on line %d", P->line); + break; + + } else if (tok.type == TOK_TOKEN) { + struct ctype at; + + put_back(P); + parse_type(L, P, &at); + parse_argument(L, P, -1, &at, NULL, NULL); + + assert(lua_gettop(L) == top + 2); + + /* array arguments are just treated as their base pointer type */ + at.is_array = 0; + + /* check for the c style int func(void) and error on other uses of arguments of type void */ + if (at.type == VOID_TYPE && at.pointers == 0) { + if (args) { + luaL_error(L, "can't have argument of type void on line %d", P->line); + } + + check_token(L, P, TOK_CLOSE_PAREN, "", "unexpected void in function on line %d", P->line); + lua_pop(L, 2); + break; + } + + push_ctype(L, -1, &at); + lua_rawseti(L, ct_usr, ++args); + + lua_pop(L, 2); /* parse_type and parse_argument at_usr */ + + } else { + luaL_error(L, "unexpected token in function argument %d on line %d", args+1, P->line); + } + } + + assert(lua_gettop(L) == top); +} + +static int max_bitfield_size(int type) +{ + switch (type) { + case BOOL_TYPE: + return 1; + case INT8_TYPE: + return 8; + case INT16_TYPE: + return 16; + case INT32_TYPE: + case ENUM_TYPE: + return 32; + case INT64_TYPE: + return 64; + default: + return -1; + } +} + +static struct ctype* parse_argument2(lua_State* L, struct parser* P, int ct_usr, struct ctype* ct, struct token* name, struct parser* asmname); + +/* parses from after the first ( in a function declaration or function pointer + * can be one of: + * void foo(...) before ... + * void (foo)(...) before foo + * void (* <>)(...) before <> which is the inner type + */ +static struct ctype* parse_function(lua_State* L, struct parser* P, int ct_usr, struct ctype* ct, struct token* name, struct parser* asmname) +{ + /* We have a function pointer or a function. The usr table will + * get replaced by the canonical one (if there is one) in + * find_canonical_usr after all the arguments and returns have + * been parsed. */ + struct token tok; + int top = lua_gettop(L); + struct ctype* ret; + + lua_newtable(L); + ret = push_ctype(L, ct_usr, ct); + lua_rawseti(L, -2, 0); + ct_usr = lua_gettop(L); + + memset(ct, 0, sizeof(*ct)); + ct->base_size = sizeof(void (*)()); + ct->align_mask = min(FUNCTION_ALIGN_MASK, P->align_mask); + ct->type = FUNCTION_TYPE; + ct->is_defined = 1; + + if (name->type == TOK_NIL) { + for (;;) { + require_token(L, P, &tok); + + if (tok.type == TOK_STAR) { + + if (ct->type == FUNCTION_TYPE) { + ct->type = FUNCTION_PTR_TYPE; + } else if (ct->pointers == POINTER_MAX) { + luaL_error(L, "maximum number of pointer derefs reached - use a struct to break up the pointers on line %d", P->line); + } else { + ct->pointers++; + ct->const_mask <<= 1; + } + + } else if (parse_attribute(L, P, &tok, ct, asmname)) { + /* parse_attribute sets the appropriate fields */ + + } else { + /* call parse_argument to handle the inner contents + * e.g. the <> in "void (* <>) (...)". Note that the + * inner contents can itself be a function, a function + * ptr, array, etc (e.g. "void (*signal(int sig, void + * (*func)(int)))(int)" ). + */ + put_back(P); + ct = parse_argument2(L, P, ct_usr, ct, name, asmname); + break; + } + } + + check_token(L, P, TOK_CLOSE_PAREN, NULL, "unexpected token in function on line %d", P->line); + check_token(L, P, TOK_OPEN_PAREN, NULL, "unexpected token in function on line %d", P->line); + } + + parse_function_arguments(L, P, ct_usr, ct); + + /* if we have an inner function then set the outer function ptr as its + * return type and return the inner function + * e.g. for void (* )(int) inner is + * surrounded by <>, return type is void (*)(int) + */ + if (lua_gettop(L) == ct_usr+1) { + lua_replace(L, ct_usr); + } + + assert(lua_gettop(L) == top + 1 && lua_istable(L, -1)); + return ret; +} + +static struct ctype* parse_argument2(lua_State* L, struct parser* P, int ct_usr, struct ctype* ct, struct token* name, struct parser* asmname) +{ + struct token tok; + int top = lua_gettop(L); + int ft_usr = 0; + + luaL_checkstack(L, 10, "function too complex"); + ct_usr = lua_absindex(L, ct_usr); + + for (;;) { + if (!next_token(L, P, &tok)) { + /* we've reached the end of the string */ + break; + + } else if (tok.type == TOK_STAR) { + if (ct->pointers == POINTER_MAX) { + luaL_error(L, "maximum number of pointer derefs reached - use a struct to break up the pointers on line %d", P->line); + } + + ct->pointers++; + ct->const_mask <<= 1; + + /* __declspec(align(#)) may come before the type in a member */ + if (!ct->is_packed) { + ct->align_mask = max(min(PTR_ALIGN_MASK, P->align_mask), ct->align_mask); + } + + } else if (tok.type == TOK_REFERENCE) { + luaL_error(L, "NYI: c++ reference types"); + + } else if (parse_attribute(L, P, &tok, ct, asmname)) { + /* parse attribute has filled out appropriate fields in type */ + + } else if (tok.type == TOK_OPEN_PAREN) { + ct = parse_function(L, P, ct_usr, ct, name, asmname); + ft_usr = lua_gettop(L); + + } else if (tok.type == TOK_OPEN_SQUARE) { + /* array */ + if (ct->pointers == POINTER_MAX) { + luaL_error(L, "maximum number of pointer derefs reached - use a struct to break up the pointers"); + } + ct->is_array = 1; + ct->pointers++; + ct->const_mask <<= 1; + require_token(L, P, &tok); + + if (ct->pointers == 1 && !ct->is_defined) { + luaL_error(L, "array of undefined type on line %d", P->line); + } + + if (ct->is_variable_struct || ct->is_variable_array) { + luaL_error(L, "can't have an array of a variably sized type on line %d", P->line); + } + + if (tok.type == TOK_QUESTION) { + ct->is_variable_array = 1; + ct->variable_increment = (ct->pointers > 1) ? sizeof(void*) : ct->base_size; + check_token(L, P, TOK_CLOSE_SQUARE, "", "invalid character in array on line %d", P->line); + + } else if (tok.type == TOK_CLOSE_SQUARE) { + ct->array_size = 0; + + } else if (tok.type == TOK_TOKEN && IS_RESTRICT(tok)) { + /* odd gcc extension foo[__restrict] for arguments */ + ct->array_size = 0; + check_token(L, P, TOK_CLOSE_SQUARE, "", "invalid character in array on line %d", P->line); + + } else { + int64_t asize; + put_back(P); + asize = calculate_constant(L, P); + if (asize < 0) { + luaL_error(L, "array size can not be negative on line %d", P->line); + } + ct->array_size = (size_t) asize; + check_token(L, P, TOK_CLOSE_SQUARE, "", "invalid character in array on line %d", P->line); + } + + } else if (tok.type == TOK_COLON) { + int64_t bsize = calculate_constant(L, P); + + if (ct->pointers || bsize < 0 || bsize > max_bitfield_size(ct->type)) { + luaL_error(L, "invalid bitfield on line %d", P->line); + } + + ct->is_bitfield = 1; + ct->bit_size = (unsigned) bsize; + + } else if (tok.type != TOK_TOKEN) { + /* we've reached the end of the declaration */ + put_back(P); + break; + + } else if (IS_CONST(tok)) { + ct->const_mask |= 1; + + } else if (IS_VOLATILE(tok) || IS_RESTRICT(tok)) { + /* ignored for now */ + + } else { + *name = tok; + } + } + + assert((ft_usr == 0 && lua_gettop(L) == top) || (lua_gettop(L) == top + 1 && ft_usr == top + 1 && (lua_istable(L, -1) || lua_isnil(L, -1)))); + return ct; +} + +static void find_canonical_usr(lua_State* L, int ct_usr, const struct ctype *ct) +{ + struct ctype rt; + int top = lua_gettop(L); + int types; + + if (ct->type != FUNCTION_PTR_TYPE && ct->type != FUNCTION_TYPE) { + return; + } + + luaL_checkstack(L, 10, "function too complex"); + ct_usr = lua_absindex(L, ct_usr); + + /* check to see if we already have the canonical usr table */ + lua_pushlightuserdata(L, &g_name_key); + lua_rawget(L, ct_usr); + if (!lua_isnil(L, -1)) { + lua_pop(L, 1); + assert(top == lua_gettop(L)); + return; + } + lua_pop(L, 1); + + assert(top == lua_gettop(L)); + + /* first canonize the return type */ + lua_rawgeti(L, ct_usr, 0); + rt = *(struct ctype*) lua_touserdata(L, -1); + lua_getuservalue(L, -1); + find_canonical_usr(L, -1, &rt); + push_ctype(L, -1, &rt); + lua_rawseti(L, ct_usr, 0); + lua_pop(L, 2); /* return ctype and usr */ + + assert(top == lua_gettop(L)); + + /* look up the type string in the types table */ + push_upval(L, &types_key); + types = lua_gettop(L); + + push_function_type_strings(L, ct_usr, ct); + lua_pushvalue(L, -2); + lua_pushvalue(L, -2); + lua_concat(L, 2); + + lua_pushvalue(L, -1); + lua_rawget(L, types); + + assert(lua_gettop(L) == types + 4 && types == top + 1); + /* stack: types, front, back, both, looked up value */ + + if (lua_isnil(L, -1)) { + lua_pop(L, 1); + + lua_pushlightuserdata(L, &g_front_name_key); + lua_pushvalue(L, -4); + lua_rawset(L, ct_usr); + + lua_pushlightuserdata(L, &g_back_name_key); + lua_pushvalue(L, -3); + lua_rawset(L, ct_usr); + + lua_pushlightuserdata(L, &g_name_key); + lua_pushvalue(L, -2); + lua_rawset(L, ct_usr); + + lua_pushvalue(L, -1); + push_ctype(L, ct_usr, ct); + lua_rawset(L, types); + } else { + lua_getuservalue(L, -1); + lua_replace(L, ct_usr); + lua_pop(L, 1); + } + + lua_pop(L, 4); + assert(top == lua_gettop(L) && types == top + 1); +} + + +/* parses after the main base type of a typedef, function argument or + * struct/union member + * eg for const void* bar[3] the base type is void with the subtype so far of + * const, this parses the "* bar[3]" and updates the type argument + * + * ct_usr and type must be as filled out by parse_type + * + * pushes the updated user value on the top of the stack + */ +void parse_argument(lua_State* L, struct parser* P, int ct_usr, struct ctype* ct, struct token* pname, struct parser* asmname) +{ + struct token tok, name; + int top = lua_gettop(L); + + memset(&name, 0, sizeof(name)); + parse_argument2(L, P, ct_usr, ct, &name, asmname); + + for (;;) { + if (!next_token(L, P, &tok)) { + break; + } else if (parse_attribute(L, P, &tok, ct, asmname)) { + /* parse_attribute sets the appropriate fields */ + } else { + put_back(P); + break; + } + } + + if (lua_gettop(L) == top) { + lua_pushvalue(L, ct_usr); + } + + find_canonical_usr(L, -1, ct); + + if (pname) { + *pname = name; + } +} + +static void parse_typedef(lua_State* L, struct parser* P) +{ + struct token tok; + struct ctype base_type; + int top = lua_gettop(L); + + parse_type(L, P, &base_type); + + for (;;) { + struct ctype arg_type = base_type; + struct token name; + + memset(&name, 0, sizeof(name)); + + assert(lua_gettop(L) == top + 1); + parse_argument(L, P, -1, &arg_type, &name, NULL); + assert(lua_gettop(L) == top + 2); + + if (!name.size) { + luaL_error(L, "Can't have a typedef without a name on line %d", P->line); + } else if (arg_type.is_variable_array) { + luaL_error(L, "Can't typedef a variable length array on line %d", P->line); + } + + push_upval(L, &types_key); + lua_pushlstring(L, name.str, name.size); + push_ctype(L, -3, &arg_type); + lua_rawset(L, -3); + lua_pop(L, 2); /* types and parse_argument usr tbl */ + + require_token(L, P, &tok); + + if (tok.type == TOK_SEMICOLON) { + break; + } else if (tok.type != TOK_COMMA) { + luaL_error(L, "Unexpected character in typedef on line %d", P->line); + } + } + + lua_pop(L, 1); /* parse_type usr tbl */ + assert(lua_gettop(L) == top); +} + +static bool is_hex(char ch) +{ return ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F'); } + +static bool is_digit(char ch) +{ return '0' <= ch && ch <= '9'; } + +static int from_hex(char ch) +{ + if (ch >= 'a') { + return ch - 'a' + 10; + } else if (ch >= 'A') { + return ch - 'A' + 10; + } else { + return ch - '0'; + } +} + +static void push_strings(lua_State* L, struct parser* P) +{ + luaL_Buffer B; + luaL_buffinit(L, &B); + + for (;;) { + const char *p, *e; + char *t, *s; + struct token tok; + + require_token(L, P, &tok); + if (tok.type != TOK_STRING) { + break; + } + + p = tok.str; + e = p + tok.size; + + t = luaL_prepbuffsize(&B, tok.size); + s = t; + + while (p < e) { + if (*p == '\\') { + if (++p == e) { + luaL_error(L, "parse error in string"); + } + switch (*p) { + case '\\': *(t++) = '\\'; p++; break; + case '\"': *(t++) = '\"'; p++; break; + case '\'': *(t++) = '\''; p++; break; + case 'n': *(t++) = '\n'; p++; break; + case 'r': *(t++) = '\r'; p++; break; + case 'b': *(t++) = '\b'; p++; break; + case 't': *(t++) = '\t'; p++; break; + case 'f': *(t++) = '\f'; p++; break; + case 'a': *(t++) = '\a'; p++; break; + case 'v': *(t++) = '\v'; p++; break; + case 'e': *(t++) = 0x1B; p++; break; + case 'x': + { + uint8_t u; + p++; + if (p + 2 > e || !is_hex(p[0]) || !is_hex(p[1])) { + luaL_error(L, "parse error in string"); + } + u = (from_hex(p[0]) << 4) | from_hex(p[1]); + *(t++) = *(char*) &u; + p += 2; + break; + } + default: + { + uint8_t u; + const char* e2 = min(p + 3, e); + if (!is_digit(*p)) { + luaL_error(L, "parse error in string"); + } + u = *p - '0'; + p++; + while (is_digit(*p) && p < e2) { + u = 10*u + *p-'0'; + p++; + } + *(t++) = *(char*) &u; + break; + } + } + } else { + *(t++) = *(p++); + } + } + + luaL_addsize(&B, t-s); + } + + luaL_pushresult(&B); +} + +#define END 0 +#define PRAGMA_POP 1 + +static int parse_root(lua_State* L, struct parser* P) +{ + int top = lua_gettop(L); + struct token tok; + + while (next_token(L, P, &tok)) { + /* we can have: + * struct definition + * enum definition + * union definition + * struct/enum/union declaration + * typedef + * function declaration + * pragma pack + */ + + assert(lua_gettop(L) == top); + + if (tok.type == TOK_SEMICOLON) { + /* empty semicolon in root continue on */ + + } else if (tok.type == TOK_POUND) { + + check_token(L, P, TOK_TOKEN, "pragma", "unexpected pre processor directive on line %d", P->line); + check_token(L, P, TOK_TOKEN, "pack", "unexpected pre processor directive on line %d", P->line); + check_token(L, P, TOK_OPEN_PAREN, "", "invalid pack directive on line %d", P->line); + + require_token(L, P, &tok); + + if (tok.type == TOK_NUMBER) { + if (tok.integer != 1 && tok.integer != 2 && tok.integer != 4 && tok.integer != 8 && tok.integer != 16) { + luaL_error(L, "pack directive with invalid pack size on line %d", P->line); + } + + P->align_mask = (unsigned) (tok.integer - 1); + check_token(L, P, TOK_CLOSE_PAREN, "", "invalid pack directive on line %d", P->line); + + } else if (tok.type == TOK_TOKEN && IS_LITERAL(tok, "push")) { + int line = P->line; + unsigned previous_alignment = P->align_mask; + + check_token(L, P, TOK_CLOSE_PAREN, "", "invalid pack directive on line %d", P->line); + + if (parse_root(L, P) != PRAGMA_POP) { + luaL_error(L, "reached end of string without a pragma pop to match the push on line %d", line); + } + + P->align_mask = previous_alignment; + + } else if (tok.type == TOK_TOKEN && IS_LITERAL(tok, "pop")) { + check_token(L, P, TOK_CLOSE_PAREN, "", "invalid pack directive on line %d", P->line); + return PRAGMA_POP; + + } else { + luaL_error(L, "invalid pack directive on line %d", P->line); + } + + + } else if (tok.type != TOK_TOKEN) { + return luaL_error(L, "unexpected character on line %d", P->line); + + } else if (IS_LITERAL(tok, "__extension__")) { + /* ignore */ + continue; + + } else if (IS_LITERAL(tok, "extern")) { + /* ignore extern as data and functions can only be extern */ + continue; + + } else if (IS_LITERAL(tok, "typedef")) { + parse_typedef(L, P); + + } else if (IS_LITERAL(tok, "static")) { + struct ctype at; + + int64_t val; + require_token(L, P, &tok); + if (!IS_CONST(tok)) { + luaL_error(L, "expected 'static const int' on line %d", P->line); + } + + parse_type(L, P, &at); + + require_token(L, P, &tok); + if (tok.type != TOK_TOKEN) { + luaL_error(L, "expected constant name after 'static const int' on line %d", P->line); + } + + check_token(L, P, TOK_ASSIGN, "", "expected = after 'static const int ' on line %d", P->line); + + val = calculate_constant(L, P); + + check_token(L, P, TOK_SEMICOLON, "", "expected ; after 'static const int' definition on line %d", P->line); + + push_upval(L, &constants_key); + lua_pushlstring(L, tok.str, tok.size); + + switch (at.type) { + case INT8_TYPE: + case INT16_TYPE: + case INT32_TYPE: + if (at.is_unsigned) + lua_pushnumber(L, (unsigned int) val); + else + lua_pushnumber(L, (int) val); + break; + + default: + luaL_error(L, "expected a valid 8-, 16-, or 32-bit signed or unsigned integer type after 'static const' on line %d", P->line); + } + + lua_rawset(L, -3); + lua_pop(L, 2); /*constants and type*/ + + } else { + /* type declaration, type definition, or function declaration */ + struct ctype type; + struct token name; + struct parser asmname; + + memset(&name, 0, sizeof(name)); + memset(&asmname, 0, sizeof(asmname)); + + put_back(P); + parse_type(L, P, &type); + + for (;;) { + parse_argument(L, P, -1, &type, &name, &asmname); + + if (name.size) { + /* global/function declaration */ + + /* set asmname_tbl[name] = asmname */ + if (asmname.next) { + push_upval(L, &asmname_key); + lua_pushlstring(L, name.str, name.size); + push_strings(L, &asmname); + lua_rawset(L, -3); + lua_pop(L, 1); /* asmname upval */ + } + + push_upval(L, &functions_key); + lua_pushlstring(L, name.str, name.size); + push_ctype(L, -3, &type); + lua_rawset(L, -3); + lua_pop(L, 1); /* functions upval */ + } else { + /* type declaration/definition - already been processed */ + } + + lua_pop(L, 1); + + require_token(L, P, &tok); + + if (tok.type == TOK_SEMICOLON) { + break; + } else if (tok.type != TOK_COMMA) { + luaL_error(L, "missing semicolon on line %d", P->line); + } + } + + lua_pop(L, 1); + } + } + + return END; +} + +int ffi_cdef(lua_State* L) +{ + struct parser P; + + P.line = 1; + P.prev = P.next = luaL_checkstring(L, 1); + P.align_mask = DEFAULT_ALIGN_MASK; + + if (parse_root(L, &P) == PRAGMA_POP) { + luaL_error(L, "pragma pop without an associated push on line %d", P.line); + } + + return 0; +} + +/* calculate_constant handles operator precedence by having a number of + * recursive commands each of which computes the result at that level of + * precedence and above. calculate_constant1 is the highest precedence + */ + +static int try_cast(lua_State* L) +{ + struct parser* P = (struct parser*) lua_touserdata(L, 1); + struct ctype ct; + struct token name, tok; + memset(&name, 0, sizeof(name)); + + parse_type(L, P, &ct); + parse_argument(L, P, -1, &ct, &name, NULL); + + require_token(L, P, &tok); + if (tok.type != TOK_CLOSE_PAREN || name.size) { + return luaL_error(L, "invalid cast"); + } + + if (ct.pointers || ct.type != INT32_TYPE) { + return luaL_error(L, "unsupported cast on line %d", P->line); + } + + return 0; +} + +static int64_t calculate_constant2(lua_State* L, struct parser* P, struct token* tok); + +/* () */ +static int64_t calculate_constant1(lua_State* L, struct parser* P, struct token* tok) +{ + int64_t ret; + + if (tok->type == TOK_NUMBER) { + ret = tok->integer; + next_token(L, P, tok); + return ret; + + } else if (tok->type == TOK_TOKEN) { + /* look up name in constants table */ + push_upval(L, &constants_key); + lua_pushlstring(L, tok->str, tok->size); + lua_rawget(L, -2); + lua_remove(L, -2); /* constants table */ + + if (!lua_isnumber(L, -1)) { + lua_pushlstring(L, tok->str, tok->size); + luaL_error(L, "use of undefined constant %s on line %d", lua_tostring(L, -1), P->line); + } + + ret = (int64_t) lua_tonumber(L, -1); + lua_pop(L, 1); + next_token(L, P, tok); + return ret; + + } else if (tok->type == TOK_OPEN_PAREN) { + struct parser before_cast = *P; + int top = lua_gettop(L); + + /* see if this is a numeric cast, which we ignore */ + lua_pushcfunction(L, &try_cast); + lua_pushlightuserdata(L, P); + if (!lua_pcall(L, 1, 0, 0)) { + next_token(L, P, tok); + return calculate_constant2(L, P, tok); + } + lua_settop(L, top); + + *P = before_cast; + ret = calculate_constant(L, P); + + require_token(L, P, tok); + if (tok->type != TOK_CLOSE_PAREN) { + luaL_error(L, "error whilst parsing constant at line %d", P->line); + } + + next_token(L, P, tok); + return ret; + + } else { + return luaL_error(L, "unexpected token whilst parsing constant at line %d", P->line); + } +} + +/* ! and ~, unary + and -, and sizeof */ +static int64_t calculate_constant2(lua_State* L, struct parser* P, struct token* tok) +{ + if (tok->type == TOK_LOGICAL_NOT) { + require_token(L, P, tok); + return !calculate_constant2(L, P, tok); + + } else if (tok->type == TOK_BITWISE_NOT) { + require_token(L, P, tok); + return ~calculate_constant2(L, P, tok); + + } else if (tok->type == TOK_PLUS) { + require_token(L, P, tok); + return calculate_constant2(L, P, tok); + + } else if (tok->type == TOK_MINUS) { + require_token(L, P, tok); + return -calculate_constant2(L, P, tok); + + } else if (tok->type == TOK_TOKEN && + (IS_LITERAL(*tok, "sizeof") + || IS_LITERAL(*tok, "alignof") + || IS_LITERAL(*tok, "__alignof__") + || IS_LITERAL(*tok, "__alignof"))) { + + bool issize = IS_LITERAL(*tok, "sizeof"); + struct ctype type; + + require_token(L, P, tok); + if (tok->type != TOK_OPEN_PAREN) { + luaL_error(L, "invalid sizeof at line %d", P->line); + } + + parse_type(L, P, &type); + parse_argument(L, P, -1, &type, NULL, NULL); + lua_pop(L, 2); + + require_token(L, P, tok); + if (tok->type != TOK_CLOSE_PAREN) { + luaL_error(L, "invalid sizeof at line %d", P->line); + } + + next_token(L, P, tok); + + return issize ? ctype_size(L, &type) : type.align_mask + 1; + + } else { + return calculate_constant1(L, P, tok); + } +} + +/* binary * / and % (left associative) */ +static int64_t calculate_constant3(lua_State* L, struct parser* P, struct token* tok) +{ + int64_t left = calculate_constant2(L, P, tok); + + for (;;) { + if (tok->type == TOK_MULTIPLY) { + require_token(L, P, tok); + left *= calculate_constant2(L, P, tok); + + } else if (tok->type == TOK_DIVIDE) { + require_token(L, P, tok); + left /= calculate_constant2(L, P, tok); + + } else if (tok->type == TOK_MODULUS) { + require_token(L, P, tok); + left %= calculate_constant2(L, P, tok); + + } else { + return left; + } + } +} + +/* binary + and - (left associative) */ +static int64_t calculate_constant4(lua_State* L, struct parser* P, struct token* tok) +{ + int64_t left = calculate_constant3(L, P, tok); + + for (;;) { + if (tok->type == TOK_PLUS) { + require_token(L, P, tok); + left += calculate_constant3(L, P, tok); + + } else if (tok->type == TOK_MINUS) { + require_token(L, P, tok); + left -= calculate_constant3(L, P, tok); + + } else { + return left; + } + } +} + +/* binary << and >> (left associative) */ +static int64_t calculate_constant5(lua_State* L, struct parser* P, struct token* tok) +{ + int64_t left = calculate_constant4(L, P, tok); + + for (;;) { + if (tok->type == TOK_LEFT_SHIFT) { + require_token(L, P, tok); + left <<= calculate_constant4(L, P, tok); + + } else if (tok->type == TOK_RIGHT_SHIFT) { + require_token(L, P, tok); + left >>= calculate_constant4(L, P, tok); + + } else { + return left; + } + } +} + +/* binary <, <=, >, and >= (left associative) */ +static int64_t calculate_constant6(lua_State* L, struct parser* P, struct token* tok) +{ + int64_t left = calculate_constant5(L, P, tok); + + for (;;) { + if (tok->type == TOK_LESS) { + require_token(L, P, tok); + left = (left < calculate_constant5(L, P, tok)); + + } else if (tok->type == TOK_LESS_EQUAL) { + require_token(L, P, tok); + left = (left <= calculate_constant5(L, P, tok)); + + } else if (tok->type == TOK_GREATER) { + require_token(L, P, tok); + left = (left > calculate_constant5(L, P, tok)); + + } else if (tok->type == TOK_GREATER_EQUAL) { + require_token(L, P, tok); + left = (left >= calculate_constant5(L, P, tok)); + + } else { + return left; + } + } +} + +/* binary ==, != (left associative) */ +static int64_t calculate_constant7(lua_State* L, struct parser* P, struct token* tok) +{ + int64_t left = calculate_constant6(L, P, tok); + + for (;;) { + if (tok->type == TOK_EQUAL) { + require_token(L, P, tok); + left = (left == calculate_constant6(L, P, tok)); + + } else if (tok->type == TOK_NOT_EQUAL) { + require_token(L, P, tok); + left = (left != calculate_constant6(L, P, tok)); + + } else { + return left; + } + } +} + +/* binary & (left associative) */ +static int64_t calculate_constant8(lua_State* L, struct parser* P, struct token* tok) +{ + int64_t left = calculate_constant7(L, P, tok); + + for (;;) { + if (tok->type == TOK_BITWISE_AND) { + require_token(L, P, tok); + left = (left & calculate_constant7(L, P, tok)); + + } else { + return left; + } + } +} + +/* binary ^ (left associative) */ +static int64_t calculate_constant9(lua_State* L, struct parser* P, struct token* tok) +{ + int64_t left = calculate_constant8(L, P, tok); + + for (;;) { + if (tok->type == TOK_BITWISE_XOR) { + require_token(L, P, tok); + left = (left ^ calculate_constant8(L, P, tok)); + + } else { + return left; + } + } +} + +/* binary | (left associative) */ +static int64_t calculate_constant10(lua_State* L, struct parser* P, struct token* tok) +{ + int64_t left = calculate_constant9(L, P, tok); + + for (;;) { + if (tok->type == TOK_BITWISE_OR) { + require_token(L, P, tok); + left = (left | calculate_constant9(L, P, tok)); + + } else { + return left; + } + } +} + +/* binary && (left associative) */ +static int64_t calculate_constant11(lua_State* L, struct parser* P, struct token* tok) +{ + int64_t left = calculate_constant10(L, P, tok); + + for (;;) { + if (tok->type == TOK_LOGICAL_AND) { + require_token(L, P, tok); + left = (left && calculate_constant10(L, P, tok)); + + } else { + return left; + } + } +} + +/* binary || (left associative) */ +static int64_t calculate_constant12(lua_State* L, struct parser* P, struct token* tok) +{ + int64_t left = calculate_constant11(L, P, tok); + + for (;;) { + if (tok->type == TOK_LOGICAL_OR) { + require_token(L, P, tok); + left = (left || calculate_constant11(L, P, tok)); + + } else { + return left; + } + } +} + +/* ternary ?: (right associative) */ +static int64_t calculate_constant13(lua_State* L, struct parser* P, struct token* tok) +{ + int64_t left = calculate_constant12(L, P, tok); + + if (tok->type == TOK_QUESTION) { + int64_t middle, right; + require_token(L, P, tok); + middle = calculate_constant13(L, P, tok); + if (tok->type != TOK_COLON) { + luaL_error(L, "invalid ternery (? :) in constant on line %d", P->line); + } + require_token(L, P, tok); + right = calculate_constant13(L, P, tok); + return left ? middle : right; + + } else { + return left; + } +} + +int64_t calculate_constant(lua_State* L, struct parser* P) +{ + struct token tok; + int64_t ret; + require_token(L, P, &tok); + ret = calculate_constant13(L, P, &tok); + + if (tok.type != TOK_NIL) { + put_back(P); + } + + return ret; +} + + + + diff --git a/tools/luaffi/test.c b/tools/luaffi/test.c new file mode 100644 index 0000000..6f065b9 --- /dev/null +++ b/tools/luaffi/test.c @@ -0,0 +1,677 @@ +/* vim: ts=4 sw=4 sts=4 et tw=78 + * Copyright (c) 2011 James R. McKaskill. See license in ffi.h + */ +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#else +#include +#endif + +#if __STDC_VERSION__+0 >= 199901L +#include +#define HAVE_COMPLEX +#endif + +#ifdef __cplusplus +# define EXTERN_C extern "C" +#else +# define EXTERN_C extern +#endif + +#ifdef _WIN32 +#define EXPORT EXTERN_C __declspec(dllexport) +#elif defined __GNUC__ +#define EXPORT EXTERN_C __attribute__((visibility("default"))) +#else +#define EXPORT EXTERN_C +#endif + +enum e8 { + FOO8, + BAR8, +}; +enum e16 { + FOO16 = 1 << 8, + BAR16, + BIG16 = 1 << 14, +}; +enum e32 { + FOO32 = 1 << 16, + BAR32, + BIG32 = 1 << 30, +}; + +EXPORT bool have_complex(); + +bool have_complex() +{ +#ifdef HAVE_COMPLEX + return 1; +#else + return 0; +#endif +} + +EXPORT bool is_msvc; + +bool is_msvc = +#ifdef _MSC_VER + 1; +#else + 0; +#endif + +EXPORT int test_pow(int v); +int test_pow(int v) +{ return v * v; } + +#define ADD(TYPE, NAME) \ + EXPORT TYPE NAME(TYPE a, TYPE b); \ + TYPE NAME(TYPE a, TYPE b) { return a + b; } + +ADD(int8_t, add_i8) +ADD(uint8_t, add_u8) +ADD(int16_t, add_i16) +ADD(uint16_t, add_u16) +ADD(int32_t, add_i32) +ADD(uint32_t, add_u32) +ADD(int64_t, add_i64) +ADD(uint64_t, add_u64) +ADD(double, add_d) +ADD(float, add_f) +#ifdef HAVE_COMPLEX +ADD(double complex, add_dc) +ADD(float complex, add_fc) +#endif + +EXPORT enum e8 inc_e8(enum e8 v); +EXPORT enum e16 inc_e16(enum e16 v); +EXPORT enum e32 inc_e32(enum e32 v); +enum e8 inc_e8(enum e8 v) {return v+1;} +enum e16 inc_e16(enum e16 v) {return v+1;} +enum e32 inc_e32(enum e32 v) {return v+1;} + +EXPORT _Bool not_b(_Bool v); +EXPORT _Bool not_b2(_Bool v); + +_Bool not_b(_Bool v) {return !v;} +_Bool not_b2(_Bool v) {return !v;} + +#define PRINT(TYPE, NAME, FORMAT) \ + EXPORT int NAME(char* buf, TYPE val); \ + int NAME(char* buf, TYPE val) {return sprintf(buf, "%" FORMAT, val);} + +PRINT(int8_t, print_i8, PRId8) +PRINT(uint8_t, print_u8, PRIu8) +PRINT(int16_t, print_i16, PRId16) +PRINT(uint16_t, print_u16, PRIu16) +PRINT(int32_t, print_i32, PRId32) +PRINT(uint32_t, print_u32, PRIu32) +PRINT(int64_t, print_i64, PRId64) +PRINT(uint64_t, print_u64, PRIu64) +PRINT(double, print_d, "g") +PRINT(float, print_f, "g") +PRINT(const char*, print_s, "s") +PRINT(void*, print_p, "p") +PRINT(enum e8, print_e8, "d") +PRINT(enum e16, print_e16, "d") +PRINT(enum e32, print_e32, "d") + +#ifdef HAVE_COMPLEX +EXPORT int print_dc(char* buf, double complex val); +EXPORT int print_fc(char* buf, float complex val); +int print_dc(char* buf, double complex val) {return sprintf(buf, "%g+%gi", creal(val), cimag(val));} +int print_fc(char* buf, float complex val) {return sprintf(buf, "%g+%gi", creal(val), cimag(val));} +#endif + +EXPORT int print_b(char* buf, _Bool val); +EXPORT int print_b2(char* buf, _Bool val); +int print_b(char* buf, _Bool val) {return sprintf(buf, "%s", val ? "true" : "false");} +int print_b2(char* buf, _Bool val) {return sprintf(buf, "%s", val ? "true" : "false");} + +EXPORT bool (*ret_fp(bool (*val)(bool)))(bool); +bool (*ret_fp(bool (*val)(bool)))(bool) +{return val;} + +#define OFFSETOF(STRUCT, MEMBER) ((int) ((char*) &STRUCT.MEMBER - (char*) &S - 1)) + +#define ALIGN_UP(VALUE, ALIGNMENT, SUFFIX) \ + struct align_##ALIGNMENT##_##SUFFIX { \ + char pad; \ + VALUE; \ + }; \ + EXPORT int print_align_##ALIGNMENT##_##SUFFIX(char* buf, struct align_##ALIGNMENT##_##SUFFIX* p); \ + int print_align_##ALIGNMENT##_##SUFFIX(char* buf, struct align_##ALIGNMENT##_##SUFFIX* p) { \ + struct {char ch; struct align_##ALIGNMENT##_##SUFFIX v;} s; \ + int off = sprintf(buf, "size %d offset %d align %d value ", \ + (int) sizeof(s.v), \ + (int) (((char*) &p->v) - (char*) p), \ + (int) (((char*) &s.v) - (char*) &s)); \ + return print_##SUFFIX(buf+off, p->v); \ + } + +#ifdef HAVE_COMPLEX +#define COMPLEX_ALIGN(ALIGNMENT, ATTR) \ + ALIGN_UP(ATTR(double complex), ALIGNMENT, dc) \ + ALIGN_UP(ATTR(float complex), ALIGNMENT, fc) +#else +#define COMPLEX_ALIGN(ALIGNMENT, ATTR) +#endif + +/* MSVC doesn't support __declspec(aligned(#)) on enums see C4329 */ +#define ENUM_ALIGN2(ALIGNMENT, ATTR) \ + ALIGN_UP(ATTR(enum e8), ALIGNMENT, e8) \ + ALIGN_UP(ATTR(enum e16), ALIGNMENT, e16) \ + ALIGN_UP(ATTR(enum e32), ALIGNMENT, e32) \ + +#ifdef _MSC_VER +#define ENUM_ALIGN(ALIGNMENT, ATTR) +#else +#define ENUM_ALIGN(ALIGNMENT, ATTR) ENUM_ALIGN2(ALIGNMENT, ATTR) +#endif + +#define ALIGN2(ALIGNMENT, ATTR) \ + ALIGN_UP(ATTR(uint16_t), ALIGNMENT, u16) \ + ALIGN_UP(ATTR(uint32_t), ALIGNMENT, u32) \ + ALIGN_UP(ATTR(uint64_t), ALIGNMENT, u64) \ + ALIGN_UP(ATTR(float), ALIGNMENT, f) \ + ALIGN_UP(ATTR(double), ALIGNMENT, d) \ + ALIGN_UP(ATTR(const char*), ALIGNMENT, s) \ + ALIGN_UP(ATTR(void*), ALIGNMENT, p) \ + ALIGN_UP(ATTR(_Bool), ALIGNMENT, b) \ + ALIGN_UP(ATTR(_Bool), ALIGNMENT, b2) \ + ENUM_ALIGN(ALIGNMENT, ATTR) \ + COMPLEX_ALIGN(ALIGNMENT, ATTR) + +#define NO_ATTR(TYPE) TYPE v + +#ifdef _MSC_VER +#define ALIGN_NO_ATTR(ALIGNMENT) \ + ALIGN2(ALIGNMENT, NO_ATTR) \ + ENUM_ALIGN2(ALIGNMENT, NO_ATTR) +#else +#define ALIGN_NO_ATTR(ALIGNMENT) \ + ALIGN2(ALIGNMENT, NO_ATTR) +#endif + +ALIGN_NO_ATTR(0) + +#pragma pack(push) +#pragma pack(1) +ALIGN_NO_ATTR(1) +#pragma pack(2) +ALIGN_NO_ATTR(2) +#pragma pack(4) +ALIGN_NO_ATTR(4) +#pragma pack(8) +ALIGN_NO_ATTR(8) +#pragma pack(16) +ALIGN_NO_ATTR(16) +#pragma pack(pop) + +#ifdef _MSC_VER +#define ATTR_(TYPE, ALIGN) __declspec(align(ALIGN)) TYPE v +#else +#define ATTR_(TYPE, ALIGN) TYPE v __attribute__((aligned(ALIGN))) +#endif + +#define ATTR1(TYPE) ATTR_(TYPE, 1) +#define ATTR2(TYPE) ATTR_(TYPE, 2) +#define ATTR4(TYPE) ATTR_(TYPE, 4) +#define ATTR8(TYPE) ATTR_(TYPE, 8) +#define ATTR16(TYPE) ATTR_(TYPE, 16) + +#define ATTR_DEF(TYPE) TYPE v __attribute__((aligned)) + +ALIGN2(attr_1, ATTR1) +ALIGN2(attr_2, ATTR2) +ALIGN2(attr_4, ATTR4) +ALIGN2(attr_8, ATTR8) +ALIGN2(attr_16, ATTR16) + +#ifndef _MSC_VER +ALIGN2(attr_def, ATTR_DEF) +#endif + +#ifdef _MSC_VER +#define alignof(type) __alignof(type) +#else +#define alignof(type) __alignof__(type) +#endif + +EXPORT int max_alignment(); + +int max_alignment() +{ return alignof(struct align_attr_16_p); } + +/* bit_fields1.cpp */ +/* compile with: /LD */ +struct Date { + unsigned short nWeekDay : 3; /* 0..7 (3 bits) */ + unsigned short nMonthDay : 6; /* 0..31 (6 bits) */ + unsigned short nMonth : 5; /* 0..12 (5 bits) */ + unsigned short nYear : 8; /* 0..100 (8 bits) */ +}; + +EXPORT int print_date(size_t* sz, size_t* align, char* buf, struct Date* d); + +int print_date(size_t* sz, size_t* align, char* buf, struct Date* d) { + *sz = sizeof(struct Date); + *align = alignof(struct Date); + return sprintf(buf, "%d %d %d %d", d->nWeekDay, d->nMonthDay, d->nMonth, d->nYear); +} + +/* bit_fields2.cpp */ +/* compile with: /LD */ +struct Date2 { + unsigned nWeekDay : 3; /* 0..7 (3 bits) */ + unsigned nMonthDay : 6; /* 0..31 (6 bits) */ + unsigned : 0; /* Force alignment to next boundary. */ + unsigned nMonth : 5; /* 0..12 (5 bits) */ + unsigned nYear : 8; /* 0..100 (8 bits) */ +}; + +EXPORT int print_date2(size_t* sz, size_t* align, char* buf, struct Date2* d); + +int print_date2(size_t* sz, size_t* align, char* buf, struct Date2* d) { + *sz = sizeof(struct Date2); + *align = alignof(struct Date2); + return sprintf(buf, "%d %d %d %d", d->nWeekDay, d->nMonthDay, d->nMonth, d->nYear); +} + +// Examples from SysV X86 ABI +struct sysv1 { + int j:5; + int k:6; + int m:7; +}; + +EXPORT int print_sysv1(size_t* sz, size_t* align, char* buf, struct sysv1* s); + +int print_sysv1(size_t* sz, size_t* align, char* buf, struct sysv1* s) { + *sz = sizeof(struct sysv1); + *align = alignof(struct sysv1); + return sprintf(buf, "%d %d %d", s->j, s->k, s->m); +} + +struct sysv2 { + short s:9; + int j:9; + char c; + short t:9; + short u:9; + char d; +}; + +EXPORT int print_sysv2(size_t* sz, size_t* align, char* buf, struct sysv2* s); + +int print_sysv2(size_t* sz, size_t* align, char* buf, struct sysv2* s) { + *sz = sizeof(struct sysv2); + *align = alignof(struct sysv2); + return sprintf(buf, "%d %d %d %d %d %d", s->s, s->j, s->c, s->t, s->u, s->d); +} + +struct sysv3 { + char c; + short s:8; +}; + +EXPORT int print_sysv3(size_t* sz, size_t* align, char* buf, struct sysv3* s); + +int print_sysv3(size_t* sz, size_t* align, char* buf, struct sysv3* s) { + *sz = sizeof(struct sysv3); + *align = alignof(struct sysv3); + return sprintf(buf, "%d %d", s->c, s->s); +} + +union sysv4 { + char c; + short s:8; +}; + +EXPORT int print_sysv4(size_t* sz, size_t* align, char* buf, union sysv4* s); + +int print_sysv4(size_t* sz, size_t* align, char* buf, union sysv4* s) { + *sz = sizeof(union sysv4); + *align = alignof(union sysv4); + return sprintf(buf, "%d", s->s); +} + +struct sysv5 { + char c; + int :0; + char d; + short :9; + char e; + char :0; +}; + +EXPORT int print_sysv5(size_t* sz, size_t* align, char* buf, struct sysv5* s); + +int print_sysv5(size_t* sz, size_t* align, char* buf, struct sysv5* s) { + *sz = sizeof(struct sysv5); + *align = alignof(struct sysv5); + return sprintf(buf, "%d %d %d", s->c, s->d, s->e); +} + +struct sysv6 { + char c; + int :0; + char d; + int :9; + char e; +}; + +EXPORT int print_sysv6(size_t* sz, size_t* align, char* buf, struct sysv6* s); + +int print_sysv6(size_t* sz, size_t* align, char* buf, struct sysv6* s) { + *sz = sizeof(struct sysv6); + *align = alignof(struct sysv6); + return sprintf(buf, "%d %d %d", s->c, s->d, s->e); +} + +struct sysv7 { + int j:9; + short s:9; + char c; + short t:9; + short u:9; +}; + +EXPORT int print_sysv7(size_t* sz, size_t* align, char* buf, struct sysv7* s); + +int print_sysv7(size_t* sz, size_t* align, char* buf, struct sysv7* s) { + *sz = sizeof(struct sysv7); + *align = alignof(struct sysv7); + return sprintf(buf, "%d %d %d %d %d", s->j, s->s, s->c, s->t, s->u); +} + +/* Now some targeting bitfield tests */ + +/* Bitfield alignment */ +#define BITALIGN(TNUM,BNUM) \ + struct ba_##TNUM##_##BNUM { \ + char a; \ + uint##TNUM##_t b : BNUM; \ + }; \ + EXPORT int print_ba_##TNUM##_##BNUM(size_t* sz, size_t* align, char* buf, struct ba_##TNUM##_##BNUM* s); \ + int print_ba_##TNUM##_##BNUM(size_t* sz, size_t* align, char* buf, struct ba_##TNUM##_##BNUM* s) { \ + *sz = sizeof(struct ba_##TNUM##_##BNUM); \ + *align = alignof(struct ba_##TNUM##_##BNUM); \ + return sprintf(buf, "%d %d", (int) s->a, (int) s->b); \ + } + +BITALIGN(8,7) +BITALIGN(16,7) +BITALIGN(16,15) +BITALIGN(32,7) +BITALIGN(32,15) +BITALIGN(32,31) +BITALIGN(64,7) +BITALIGN(64,15) +BITALIGN(64,31) +BITALIGN(64,63) + +/* Do unsigned and signed coallesce */ +#define BITCOALESCE(NUM) \ + struct bc##NUM { \ + uint##NUM##_t a : 3; \ + int##NUM##_t b : 3; \ + }; \ + EXPORT int print_bc##NUM(size_t* sz, size_t* align, char* buf, struct bc##NUM* s); \ + int print_bc##NUM(size_t* sz, size_t* align, char* buf, struct bc##NUM* s) { \ + *sz = sizeof(struct bc##NUM); \ + *align = alignof(struct bc##NUM); \ + return sprintf(buf, "%d %d", (int) s->a, (int) s->b); \ + } + +BITCOALESCE(8) +BITCOALESCE(16) +BITCOALESCE(32) +BITCOALESCE(64) + +// Do different sizes coallesce +struct bdsz { + uint8_t a : 3; + uint16_t b : 3; + uint32_t c : 3; + uint64_t d : 3; +}; + +EXPORT int print_bdsz(size_t* sz, size_t* align, char* buf, struct bdsz* s); +int print_bdsz(size_t* sz, size_t* align, char* buf, struct bdsz* s) { + *sz = sizeof(struct bdsz); + *align = alignof(struct bdsz); + return sprintf(buf, "%d %d %d %d", (int) s->a, (int) s->b, (int) s->c, (int) s->d); +} + +// Does coallesence upgrade the storage unit +struct bcup { + uint8_t a : 7; + uint16_t b : 9; + uint32_t c : 17; + uint64_t d : 33; +}; + +EXPORT int print_bcup(size_t* sz, size_t* align, char* buf, struct bcup* s); +int print_bcup(size_t* sz, size_t* align, char* buf, struct bcup* s) { + *sz = sizeof(struct bcup); + *align = alignof(struct bcup); + return sprintf(buf, "%d %d %d %"PRIu64, (int) s->a, (int) s->b, (int) s->c, (uint64_t) s->d); +} + +// Is unaligned access allowed +struct buna { + uint32_t a : 31; + uint32_t b : 31; +}; + +EXPORT int print_buna(size_t* sz, size_t* align, char* buf, struct buna* s); +int print_buna(size_t* sz, size_t* align, char* buf, struct buna* s) { + *sz = sizeof(struct buna); + *align = alignof(struct buna); + return sprintf(buf, "%d %d", (int) s->a, (int) s->b); +} + +/* What does a lone :0 do */ +#define BITLONEZERO(NUM) \ + struct blz##NUM { \ + uint##NUM##_t a; \ + uint##NUM##_t :0; \ + uint##NUM##_t b; \ + }; \ + EXPORT int print_##blz##NUM(size_t* sz, size_t* align, char* buf, struct blz##NUM* s); \ + int print_blz##NUM(size_t* sz, size_t* align, char* buf, struct blz##NUM* s) { \ + *sz = sizeof(struct blz##NUM); \ + *align = alignof(struct blz##NUM); \ + return sprintf(buf, "%d %d", (int) s->a, (int) s->b); \ + } + +BITLONEZERO(8) +BITLONEZERO(16) +BITLONEZERO(32) +BITLONEZERO(64) + +/* What does a :0 or unnamed :# of the same or different type do */ +#define BITZERO(NUM, ZNUM, BNUM) \ + struct bz_##NUM##_##ZNUM##_##BNUM { \ + uint8_t a; \ + uint##NUM##_t b : 3; \ + uint##ZNUM##_t :BNUM; \ + uint##NUM##_t c : 3; \ + }; \ + EXPORT int print_bz_##NUM##_##ZNUM##_##BNUM(size_t* sz, size_t* align, char* buf, struct bz_##NUM##_##ZNUM##_##BNUM* s); \ + int print_bz_##NUM##_##ZNUM##_##BNUM(size_t* sz, size_t* align, char* buf, struct bz_##NUM##_##ZNUM##_##BNUM* s) { \ + *sz = sizeof(struct bz_##NUM##_##ZNUM##_##BNUM); \ + *align = alignof(struct bz_##NUM##_##ZNUM##_##BNUM); \ + return sprintf(buf, "%d %d %d", (int) s->a, (int) s->b, (int) s->c); \ + } + +BITZERO(8,8,0) +BITZERO(8,8,7) +BITZERO(8,16,0) +BITZERO(8,16,7) +BITZERO(8,16,15) +BITZERO(8,32,0) +BITZERO(8,32,7) +BITZERO(8,32,15) +BITZERO(8,32,31) +BITZERO(8,64,0) +BITZERO(8,64,7) +BITZERO(8,64,15) +BITZERO(8,64,31) +BITZERO(8,64,63) +BITZERO(16,8,0) +BITZERO(16,8,7) +BITZERO(16,16,0) +BITZERO(16,16,7) +BITZERO(16,16,15) +BITZERO(16,32,0) +BITZERO(16,32,7) +BITZERO(16,32,15) +BITZERO(16,32,31) +BITZERO(16,64,0) +BITZERO(16,64,7) +BITZERO(16,64,15) +BITZERO(16,64,31) +BITZERO(16,64,63) +BITZERO(32,8,0) +BITZERO(32,8,7) +BITZERO(32,16,0) +BITZERO(32,16,7) +BITZERO(32,16,15) +BITZERO(32,32,0) +BITZERO(32,32,7) +BITZERO(32,32,15) +BITZERO(32,32,31) +BITZERO(32,64,0) +BITZERO(32,64,7) +BITZERO(32,64,15) +BITZERO(32,64,31) +BITZERO(32,64,63) +BITZERO(64,8,0) +BITZERO(64,8,7) +BITZERO(64,16,0) +BITZERO(64,16,7) +BITZERO(64,16,15) +BITZERO(64,32,0) +BITZERO(64,32,7) +BITZERO(64,32,15) +BITZERO(64,32,31) +BITZERO(64,64,0) +BITZERO(64,64,7) +BITZERO(64,64,15) +BITZERO(64,64,31) +BITZERO(64,64,63) + +#define CALL(TYPE, SUFFIX) \ + EXPORT TYPE call_##SUFFIX(TYPE (*func)(TYPE), TYPE arg); \ + TYPE call_##SUFFIX(TYPE (*func)(TYPE), TYPE arg) { \ + return func(arg); \ + } + +CALL(int, i) +CALL(float, f) +CALL(double, d) +CALL(const char*, s) +CALL(_Bool, b) +CALL(enum e8, e8) +CALL(enum e16, e16) +CALL(enum e32, e32) +#ifdef HAVE_COMPLEX +CALL(double complex, dc) +CALL(float complex, fc) +#endif + +struct fptr { +#ifdef _MSC_VER + int (__cdecl *p)(int); +#else + int (*p)(int); +#endif +}; + +EXPORT int call_fptr(struct fptr* s, int val); + +int call_fptr(struct fptr* s, int val) { + return (s->p)(val); +} + +EXPORT bool g_b; +EXPORT int8_t g_i8; +EXPORT int16_t g_i16; +EXPORT int32_t g_i32; +EXPORT int64_t g_i64; +EXPORT uint8_t g_u8; +EXPORT uint16_t g_u16; +EXPORT uint32_t g_u32; +EXPORT uint64_t g_u64; +EXPORT float g_f; +EXPORT double g_d; +#ifdef HAVE_COMPLEX +EXPORT double complex g_dc; +EXPORT float complex g_fc; +#endif +EXPORT bool (*g_fp)(bool); +EXPORT const char g_s[]; +EXPORT const char* g_sp; +EXPORT void* g_p; +EXPORT enum e8 g_e8; +EXPORT enum e16 g_e16; +EXPORT enum e32 g_e32; +EXPORT struct Date g_date; + +bool g_b = true; +int8_t g_i8 = -8; +int16_t g_i16 = -16; +int32_t g_i32 = -32; +int64_t g_i64 = -64; +uint8_t g_u8 = 8; +uint16_t g_u16 = 16; +uint32_t g_u32 = 32; +uint64_t g_u64 = 64; +float g_f = 3; +double g_d = 5; +#ifdef HAVE_COMPLEX +double complex g_dc = 7+8i; +float complex g_fc = 6+9i; +#endif +bool (*g_fp)(bool) = ¬_b; +void* g_p = (void*) ¬_b; +const char g_s[] = "g_s"; +const char* g_sp = "g_sp"; +enum e8 g_e8 = FOO8; +enum e16 g_e16 = FOO16; +enum e32 g_e32 = FOO32; +struct Date g_date = {1,2,3,4}; + +EXPORT void set_errno(int val); +EXPORT int get_errno(void); + +void set_errno(int val) { +#ifdef _WIN32 + SetLastError(val); +#else + errno = val; +#endif +} + +int get_errno(void) { +#ifdef _WIN32 + return GetLastError(); +#else + return errno; +#endif +} + +EXPORT int va_list_size, va_list_align; +int va_list_size = sizeof(va_list); +int va_list_align = alignof(va_list); + diff --git a/tools/luaffi/test.lua b/tools/luaffi/test.lua new file mode 100644 index 0000000..2e2d2f5 --- /dev/null +++ b/tools/luaffi/test.lua @@ -0,0 +1,890 @@ +-- vim: ts=4 sw=4 sts=4 et tw=78 +-- Copyright (c) 2011 James R. McKaskill. See license in ffi.h + +io.stdout:setvbuf('no') +local ffi = require 'ffi' +local dlls = {} + +local num_ok = 0 +local num_err = 0 +local assert = function(a, hint1, hint2) + num_ok = (num_ok or 0) + (a == true and 1 or 0) + num_err = (num_err or 0) + (a ~= true and 1 or 0) + if a ~= true then + print('F'..num_err..'/T'..(num_ok+num_err), 'L'..debug.getinfo(2).currentline, debug.getinfo(2).name or '', hint1 or '', hint2 or '') + end + --return _G.assert(a) +end + +dlls.__cdecl = ffi.load('test_cdecl') + +if ffi.arch == 'x86' and ffi.os == 'Windows' then + dlls.__stdcall = ffi.load('test_stdcall') + dlls.__fastcall = ffi.load('test_fastcall') +end + +local function check(a, b) + return assert(a == b, a, b) +end + +print('Running test') + +ffi.cdef [[ +enum e8 { + FOO8, + BAR8, +}; +enum e16 { + FOO16 = 1 << 8, + BAR16, + BIG16 = 1 << 14, +}; +enum e32 { + FOO32 = 1 << 16, + BAR32, + BIG32 = 1 << 30, +}; +int max_alignment(); +bool is_msvc, is_msvc2 __asm__("is_msvc"); +bool have_complex(void); +bool have_complex2() __asm__("have" /*foo*/ "\x5F" "complex"); // 5F is _ + +int8_t add_i8(int8_t a, int8_t b); +uint8_t add_u8(uint8_t a, uint8_t b); +int16_t add_i16(int16_t a, int16_t b); +uint16_t add_i16(uint16_t a, uint16_t b); +int32_t add_i32(int32_t a, int32_t b); +uint32_t add_u32(uint32_t a, uint32_t b); +int64_t add_i64(int64_t a, int64_t b); +uint64_t add_u64(uint64_t a, uint64_t b); +double add_d(double a, double b); +float add_f(float a, float b); +double complex add_dc(double complex a, double complex b); +float complex add_fc(float complex a, float complex b); +enum e8 inc_e8(enum e8); +enum e16 inc_e16(enum e16); +enum e32 inc_e32(enum e32); +bool not_b(bool v); +_Bool not_b2(_Bool v); +typedef bool (*fp)(bool); +fp ret_fp(fp v); +bool (*ret_fp2(bool (*)(bool)))(bool) __asm("ret_fp"); + +int print_i8(char* buf, int8_t val); +int print_u8(char* buf, uint8_t val); +int print_i16(char* buf, int16_t val); +int print_u16(char* buf, uint16_t val); +int print_i32(char* buf, int32_t val); +int print_u32(char* buf, uint32_t val); +int print_i64(char* buf, int64_t val); +int print_u64(char* buf, uint64_t val); +int print_s(char* buf, const char* val); +int print_b(char* buf, bool val); +int print_b2(char* buf, _Bool val); +int print_d(char* buf, double val); +int print_f(char* buf, float val); +int print_p(char* buf, void* val); +int print_dc(char* buf, double complex val); +int print_fc(char* buf, float complex val); +int print_e8(char* buf, enum e8 val); +int print_e16(char* buf, enum e16 val); +int print_e32(char* buf, enum e32 val); +int sprintf(char* buf, const char* format, ...); + +// Examples from MSDN + +// bit_fields1.cpp +// compile with: /LD +struct Date { + unsigned short nWeekDay : 3; // 0..7 (3 bits) + unsigned short nMonthDay : 6; // 0..31 (6 bits) + unsigned short nMonth : 5; // 0..12 (5 bits) + unsigned short nYear : 8; // 0..100 (8 bits) +}; + +// bit_fields2.cpp +// compile with: /LD +struct Date2 { + unsigned nWeekDay : 3; // 0..7 (3 bits) + unsigned nMonthDay : 6; // 0..31 (6 bits) + unsigned : 0; // Force alignment to next boundary. + unsigned nMonth : 5; // 0..12 (5 bits) + unsigned nYear : 8; // 0..100 (8 bits) +}; + +// For checking the alignment of short bitfields +struct Date3 { + char pad; + unsigned short nWeekDay : 3; // 0..7 (3 bits) + unsigned short nMonthDay : 6; // 0..31 (6 bits) + unsigned short nMonth : 5; // 0..12 (5 bits) + unsigned short nYear : 8; // 0..100 (8 bits) +}; + +// For checking the alignment and container of int64 bitfields +struct bit64 { + char pad; + uint64_t a : 15; + uint64_t b : 14; + uint64_t c : 13; + uint64_t d : 12; +}; + +// Examples from SysV X86 ABI +struct sysv1 { + int j:5; + int k:6; + int m:7; +}; + +struct sysv2 { + short s:9; + int j:9; + char c; + short t:9; + short u:9; + char d; +}; + +struct sysv3 { + char c; + short s:8; +}; + +union sysv4 { + char c; + short s:8; +}; + +struct sysv5 { + char c; + int :0; + char d; + short :9; + char e; + char :0; +}; + +struct sysv6 { + char c; + int :0; + char d; + int :9; + char e; +}; + +struct sysv7 { + int j:9; + short s:9; + char c; + short t:9; + short u:9; +}; + +int print_date(size_t* sz, size_t* align, char* buf, struct Date* s); +int print_date2(size_t* sz, size_t* align, char* buf, struct Date2* s); +int print_date3(size_t* sz, size_t* align, char* buf, struct Date3* d); +int print_bit64(size_t* sz, size_t* align, char* buf, struct bit64* d); +int print_sysv1(size_t* sz, size_t* align, char* buf, struct sysv1* s); +int print_sysv2(size_t* sz, size_t* align, char* buf, struct sysv2* s); +int print_sysv3(size_t* sz, size_t* align, char* buf, struct sysv3* s); +int print_sysv4(size_t* sz, size_t* align, char* buf, union sysv4* s); +int print_sysv5(size_t* sz, size_t* align, char* buf, struct sysv5* s); +int print_sysv6(size_t* sz, size_t* align, char* buf, struct sysv6* s); +int print_sysv7(size_t* sz, size_t* align, char* buf, struct sysv7* s); + +struct fptr { + int (__cdecl *p)(int); +}; +int call_fptr(struct fptr* s, int val); + +bool g_b; +int8_t g_i8; +int16_t g_i16; +int32_t g_i32; +int64_t g_i64; +uint8_t g_u8; +uint16_t g_u16; +uint32_t g_u32; +uint64_t g_u64; +float g_f; +double g_d; +double complex g_dc; +float complex g_fc; +bool (*g_fp)(bool); +const char g_s[]; +const char* g_sp; +void* g_p; +enum e8 g_e8; +enum e16 g_e16; +enum e32 g_e32; +struct Date g_date; + +void set_errno(int val); +int get_errno(void); +]] + +local align = [[ +struct align_ALIGN_SUFFIX { + char pad; + TYPE v; +}; + +int print_align_ALIGN_SUFFIX(char* buf, struct align_ALIGN_SUFFIX* p); +]] + +local palign = [[ +#pragma pack(push) +#pragma pack(ALIGN) +]] .. align .. [[ +#pragma pack(pop) +]] + +local bitfields = [[ +struct bcTNUM { + uintTNUM_t a : 3; + intTNUM_t b : 3; +}; +struct blzTNUM { + uintTNUM_t a; + uintTNUM_t :0; + uintTNUM_t b; +}; +int print_bcTNUM(size_t* sz, size_t* align, char* buf, struct bcTNUM* s); +int print_blzTNUM(size_t* sz, size_t* align, char* buf, struct blzTNUM* s); +]] + +local bitalign = [[ +struct ba_TNUM_BNUM { + char a; + uintTNUM_t b : BNUM; +}; +struct bu_TNUM_BNUM { + char a; + uintTNUM_t :BNUM; + char b; +}; +int print_ba_TNUM_BNUM(size_t* sz, size_t* align, char* buf, struct ba_TNUM_BNUM* s); +]] + +local bitzero = [[ +struct bz_TNUM_ZNUM_BNUM { + uint8_t a; + uintTNUM_t b : 3; + uintZNUM_t :BNUM; + uintTNUM_t c : 3; +}; +int print_bz_TNUM_ZNUM_BNUM(size_t* sz, size_t* align, char* buf, struct bz_TNUM_ZNUM_BNUM* s); +]] + +local i = ffi.C.i +local test_values = { + ['void*'] = ffi.new('char[3]'), + ['const char*'] = 'foo', + float = 3.4, + double = 5.6, + uint16_t = 65000, + uint32_t = ffi.new('uint32_t', 700000056), + uint64_t = 12345678901234, + bool = true, + _Bool = false, + ['float complex'] = 3+4*i, + ['double complex'] = 5+6*i, + ['enum e8'] = ffi.C.FOO8, + ['enum e16'] = ffi.C.FOO16, + ['enum e32'] = ffi.C.FOO32, +} + +local types = { + b = 'bool', + b2 = '_Bool', + d = 'double', + f = 'float', + u64 = 'uint64_t', + u32 = 'uint32_t', + u16 = 'uint16_t', + s = 'const char*', + p = 'void*', + e8 = 'enum e8', + e16 = 'enum e16', + e32 = 'enum e32', +} + +local buf = ffi.new('char[256]') + +local function checkbuf(kind, ret) + local str = tostring(test_values[kind]):gsub('^cdata%b<>: ', '') + + if type(test_values[kind])=='number' and tonumber(str) % 1 == 0 then -- if not decimal place... + str = tostring(math.floor(tonumber(str))) + end + + check(ffi.string(buf), str) + check(ret, #str) +end + +local function checkalign(kind, v, ret) + --print(v) + local str = tostring(test_values[kind]):gsub('^cdata%b<>: ', '') + + if type(test_values[kind])=='number' and tonumber(str) % 1 == 0 then -- if not decimal place... + str = tostring(math.floor(tonumber(str))) + end + + check(ffi.string(buf), ('size %d offset %d align %d value %s'):format(ffi.sizeof(v), ffi.offsetof(v, 'v'), ffi.alignof(v, 'v'), str)) + check(ret, #str) +end + +local u64 = ffi.typeof('uint64_t') +local i64 = ffi.typeof('int64_t') + +local first = true + +for convention,c in pairs(dlls) do + check(c.add_i8(1,1), 2) + check(c.add_i8(256,1), 1) + check(c.add_i8(127,1), -128) + check(c.add_i8(-120,120), 0) + check(c.add_u8(255,1), 0) + check(c.add_u8(120,120), 240) + check(c.add_i16(2000,4000), 6000) + check(c.add_d(20, 12), 32) + check(c.add_f(40, 32), 72) + check(c.not_b(true), false) + check(c.not_b2(false), true) + check(c.inc_e8(c.FOO8), c.BAR8) + check(c.inc_e8('FOO8'), c.BAR8) + check(c.inc_e16(c.FOO16), c.BAR16) + check(c.inc_e32(c.FOO32), c.BAR32) + check(c.ret_fp(c.g_fp), c.g_fp) + check(c.ret_fp2(c.g_fp), c.g_fp) + + if c.have_complex() then + check(c.add_dc(3+4*i, 4+5*i), 7+9*i) + check(c.add_fc(2+4*i, 6+8*i), 8+12*i) + types.dc = 'double complex' + types.fc = 'float complex' + else + types.dc = nil + types.fc = nil + end + check((3+4*i).re, 3) + check((3+4*i).im, 4) + check(ffi.new('complex float', 2+8*i).re, 2) + check(ffi.new('complex float', 5+6*i).im, 6) + + check(c.have_complex(), c.have_complex2()) + check(c.is_msvc, c.is_msvc2) + + check(c.g_b, true) + check(c.g_i8, -8) + check(c.g_i16, -16) + check(c.g_i32, -32) + check(c.g_i64, i64(-64)) + check(c.g_u8, 8) + check(c.g_u16, 16) + check(c.g_u32, 32) + check(c.g_u64, u64(64)) + check(c.g_f, 3) + check(c.g_d, 5) + if c.have_complex() then + check(c.g_dc, 7 + 8*i) + check(c.g_fc, 6 + 9*i) + end + check(ffi.cast('void*', c.g_fp), c.g_p) + check(c.g_s, 'g_s') + check(c.g_sp, 'g_sp') + check(c.g_e8, c.FOO8) + check(c.g_e16, c.FOO16) + check(c.g_e32, c.FOO32) + check(c.g_date.nWeekDay, 1) + check(c.g_date.nMonthDay, 2) + check(c.g_date.nMonth, 3) + check(c.g_date.nYear, 4) + + c.g_b = false; check(c.g_b, false) + c.g_i8 = -108; check(c.g_i8, -108) + c.g_i16 = -1016; check(c.g_i16, -1016) + c.g_i32 = -1032; check(c.g_i32, -1032) + c.g_i64 = -1064; check(c.g_i64, i64(-1064)) + c.g_u8 = 208; check(c.g_u8, 208) + c.g_u16 = 2016; check(c.g_u16, 2016) + c.g_u32 = 2032; check(c.g_u32, 2032) + c.g_u64 = 2064; check(c.g_u64, u64(2064)) + c.g_f = 13; check(c.g_f, 13) + c.g_d = 15; check(c.g_d, 15) + if c.have_complex() then + c.g_dc = 17+18*i; check(c.g_dc, 17+18*i) + c.g_fc = 16+19*i; check(c.g_fc, 16+19*i) + end + c.g_sp = 'foo'; check(c.g_sp, 'foo') + c.g_e8 = c.BAR8; check(c.g_e8, c.BAR8) + c.g_e16 = c.BAR16; check(c.g_e16, c.BAR16) + c.g_e32 = c.BAR32; check(c.g_e32, c.BAR32) + c.g_date.nWeekDay = 3; check(c.g_date.nWeekDay, 3) + + local align_attr = c.is_msvc and [[ + struct align_attr_ALIGN_SUFFIX { + char pad; + __declspec(align(ALIGN)) TYPE v; + }; + + int print_align_attr_ALIGN_SUFFIX(char* buf, struct align_attr_ALIGN_SUFFIX* p); + ]] or [[ + struct align_attr_ALIGN_SUFFIX { + char pad; + TYPE v __attribute__(aligned(ALIGN)); + }; + + int print_align_attr_ALIGN_SUFFIX(char* buf, struct align_attr_ALIGN_SUFFIX* p); + ]] + + for suffix, type in pairs(types) do + local test = test_values[type] + --print('checkbuf', suffix, type, buf, test) + checkbuf(type, c['print_' .. suffix](buf, test)) + + if first then + ffi.cdef(align:gsub('SUFFIX', suffix):gsub('TYPE', type):gsub('ALIGN', 0)) + end + + local v = ffi.new('struct align_0_' .. suffix, {0, test}) + checkalign(type, v, c['print_align_0_' .. suffix](buf, v)) + + for _,align in ipairs{1,2,4,8,16} do + if align > c.max_alignment() then + break + end + + if first then + ffi.cdef(palign:gsub('SUFFIX', suffix):gsub('TYPE', type):gsub('ALIGN', align)) + ffi.cdef(align_attr:gsub('SUFFIX', suffix):gsub('TYPE', type):gsub('ALIGN', align)) + end + + local v = ffi.new('struct align_' .. align .. '_' .. suffix, {0, test}) + checkalign(type, v, c['print_align_' .. align .. '_' .. suffix](buf, v)) + + -- MSVC doesn't support aligned attributes on enums + if not type:match('^enum e[0-9]*$') or not c.is_msvc then + local v2 = ffi.new('struct align_attr_' .. align .. '_' .. suffix, {0, test}) + checkalign(type, v2, c['print_align_attr_' .. align .. '_' .. suffix](buf, v2)) + end + end + + if not c.is_msvc then + if first then + local h = [[ + struct align_attr_def_SUFFIX { + char pad; + TYPE v __attribute__(aligned); + }; + int print_align_attr_def_SUFFIX(char* buf, struct align_attr_def_SUFFIX* p); + ]] + ffi.cdef(h:gsub('SUFFIX', suffix):gsub('TYPE', type)) + end + + local v = ffi.new('struct align_attr_def_' .. suffix, {0, test}) + checkalign(type, v, c['print_align_attr_def_' .. suffix](buf, v)) + end + end + + local psz = ffi.new('size_t[1]') + local palign = ffi.new('size_t[1]') + local function check_align(type, test, ret) + --print('check_align', type, test, ret, ffi.string(buf), psz[0], palign[0]) + check(tonumber(palign[0]), ffi.alignof(type)) + check(tonumber(psz[0]), ffi.sizeof(type)) + check(ret, #test) + check(test, ffi.string(buf)) + end + + for _, tnum in ipairs{8, 16, 32, 64} do + if first then + ffi.cdef(bitfields:gsub('TNUM',tnum)) + end + + check_align('struct bc'..tnum, '1 2', c['print_bc'..tnum](psz, palign, buf, {1,2})) + check_align('struct blz'..tnum, '1 2', c['print_blz'..tnum](psz, palign, buf, {1,2})) + + for _, znum in ipairs{8, 16, 32, 64} do + for _, bnum in ipairs{7, 15, 31, 63} do + if bnum > znum then + break + end + if first then + ffi.cdef(bitzero:gsub('TNUM',tnum):gsub('ZNUM',znum):gsub('BNUM', bnum)) + end + check_align('struct bz_'..tnum..'_'..znum..'_'..bnum, '1 2 3', c['print_bz_'..tnum..'_'..znum..'_'..bnum](psz, palign, buf, {1,2,3})) + end + end + + for _, bnum in ipairs{7, 15, 31, 63} do + if bnum > tnum then + break + end + if first then + ffi.cdef(bitalign:gsub('TNUM',tnum):gsub('BNUM',bnum)) + end + check_align('struct ba_'..tnum..'_'..bnum, '1 2', c['print_ba_'..tnum..'_'..bnum](psz, palign, buf, {1,2})) + end + end + + check_align('struct Date', '1 2 3 4', c.print_date(psz, palign, buf, {1,2,3,4})) + check_align('struct Date2', '1 2 3 4', c.print_date2(psz, palign, buf, {1,2,3,4})) + check_align('struct sysv1', '1 2 3', c.print_sysv1(psz, palign, buf, {1,2,3})) + check_align('struct sysv2', '1 2 3 4 5 6', c.print_sysv2(psz, palign, buf, {1,2,3,4,5,6})) + check_align('struct sysv3', '1 2', c.print_sysv3(psz, palign, buf, {1,2})) + check_align('union sysv4', '1', c.print_sysv4(psz, palign, buf, {1})) + check_align('struct sysv5', '1 2 3', c.print_sysv5(psz, palign, buf, {1,2,3})) + check_align('struct sysv6', '1 2 3', c.print_sysv6(psz, palign, buf, {1,2,3})) + check_align('struct sysv7', '1 2 3 4 5', c.print_sysv7(psz, palign, buf, {1,2,3,4,5})) + + local cbs = [[ + typedef const char* (*__cdecl sfunc)(const char*); + int call_i(int (*__cdecl func)(int), int arg); + float call_f(float (*__cdecl func)(float), float arg); + double call_d(double (*__cdecl func)(double), double arg); + const char* call_s(sfunc func, const char* arg); + _Bool call_b(_Bool (*__cdecl func)(_Bool), _Bool arg); + double complex call_dc(double complex (*__cdecl func)(double complex), double complex arg); + float complex call_fc(float complex (*__cdecl func)(float complex), float complex arg); + enum e8 call_e8(enum e8 (*__cdecl func)(enum e8), enum e8 arg); + enum e16 call_e16(enum e16 (*__cdecl func)(enum e16), enum e16 arg); + enum e32 call_e32(enum e32 (*__cdecl func)(enum e32), enum e32 arg); + ]] + + ffi.cdef(cbs:gsub('__cdecl', convention)) + + local u3 = ffi.new('uint64_t', 3) + check(c.call_i(function(a) return 2*a end, 3), 6) + assert(math.abs(c.call_d(function(a) return 2*a end, 3.2) - 6.4) < 0.0000000001) + assert(math.abs(c.call_f(function(a) return 2*a end, 3.2) - 6.4) < 0.000001) + check(ffi.string(c.call_s(function(s) return s + u3 end, 'foobar')), 'bar') + check(c.call_b(function(v) return not v end, true), false) + check(c.call_e8(function(v) return v + 1 end, c.FOO8), c.BAR8) + check(c.call_e16(function(v) return v + 1 end, c.FOO16), c.BAR16) + check(c.call_e32(function(v) return v + 1 end, c.FOO32), c.BAR32) + + if c.have_complex() then + check(c.call_dc(function(v) return v + 2+3*i end, 4+6*i), 6+9*i) + check(c.call_fc(function(v) return v + 1+2*i end, 7+4*i), 8+6*i) + end + + local u2 = ffi.new('uint64_t', 2) + local cb = ffi.new('sfunc', function(s) return s + u3 end) + check(ffi.string(cb('foobar')), 'bar') + check(ffi.string(c.call_s(cb, 'foobar')), 'bar') + cb:set(function(s) return s + u2 end) + check(ffi.string(c.call_s(cb, 'foobar')), 'obar') + + local fp = ffi.new('struct fptr') + assert(fp.p == ffi.C.NULL) + fp.p = function(a) return 2*a end + assert(fp.p ~= ffi.C.NULL) + check(c.call_fptr(fp, 4), 8) + local suc, err = pcall(function() fp.p:set(function() end) end) + assert(not suc) + check(err:gsub('^.*: ',''), "can't set the function for a non-lua callback") + + check(c.call_fptr({function(a) return 3*a end}, 5), 15) + + local suc, err = pcall(c.call_s, function(s) error(ffi.string(s), 0) end, 'my error') + check(suc, false) + check(err, 'my error') + + check(ffi.errno(), c.get_errno()) + c.set_errno(3) + check(ffi.errno(), 3) + check(c.get_errno(), 3) + check(ffi.errno(4), 3) + check(ffi.errno(), 4) + check(c.get_errno(), 4) + + local gccattr = { + __cdecl = 'int test_pow(int v) __attribute__((cdecl));', + __stdcall = 'int test_pow(int v) __attribute__(stdcall);', + __fastcall = '__attribute__(fastcall) int test_pow(int v);', + } + + ffi.cdef(gccattr[convention]) + check(c.test_pow(5), 25) + + ffi.cdef [[ + int va_list_size, va_list_align; + int vsnprintf(char* buf, size_t sz, const char* fmt, va_list ap); + ]] + ffi.new('va_list') + assert(ffi.debug().functions.vsnprintf ~= nil) + assert(ffi.istype('va_list', ffi.new('__builtin_va_list'))) + assert(ffi.istype('va_list', ffi.new('__gnuc_va_list'))) + check(ffi.sizeof('va_list'), c.va_list_size) + check(ffi.alignof('va_list'), c.va_list_align) + + first = false +end + +local c = ffi.C + +if ffi.os ~= 'Windows' then +assert(c.sprintf(buf, "%g", 5.3) == 3 and ffi.string(buf) == '5.3') +assert(c.sprintf(buf, "%d", false) == 1 and ffi.string(buf) == '0') +assert(c.sprintf(buf, "%d%g", false, 6.7) == 4 and ffi.string(buf) == '06.7') +end + +assert(ffi.sizeof('uint32_t[?]', 32) == 32 * 4) +assert(ffi.sizeof(ffi.new('uint32_t[?]', 32)) == 32 * 4) + +ffi.cdef [[ +struct vls { + struct { + char a; + struct { + char b; + char v[?]; + } c; + } d; +}; +struct vls2 { + char pad; + union { + uint8_t a; + uint16_t b; + }; +}; +]] + +assert(ffi.sizeof('struct vls', 3) == 5) +assert(ffi.sizeof(ffi.new('struct vls', 4).d.c) == 5) +assert(ffi.offsetof('struct vls2', 'a') == 2) +assert(ffi.sizeof('struct vls2') == 4) + +ffi.cdef [[ static const int DUMMY = 8 << 2; ]] +assert(ffi.C.DUMMY == 32) + +ffi.new('struct {const char* foo;}', {'foo'}) + +assert(not pcall(function() + ffi.new('struct {char* foo;}', {'ff'}) +end)) + +local mt = {} +local vls = ffi.new(ffi.metatype('struct vls', mt), 1) + +assert(not pcall(function() return vls.key end)) + +mt.__index = function(vls, key) + return function(vls, a, b) + return 'in index ' .. key .. ' ' .. vls.d.a .. ' ' .. a .. ' ' .. b + end +end + +vls.d.a = 3 +check(vls:key('a', 'b'), 'in index key 3.0 a b') + +assert(not pcall(function() vls.k = 3 end)) + +mt.__newindex = function(vls, key, val) + error('in newindex ' .. key .. ' ' .. vls.d.a .. ' ' .. val, 0) +end + +vls.d.a = 4 +local suc, err = pcall(function() vls.key = 'val' end) +assert(not suc) +check(err, 'in newindex key 4.0 val') + +mt.__add = function(vls, a) return vls.d.a + a end +mt.__sub = function(vls, a) return vls.d.a - a end +mt.__mul = function(vls, a) return vls.d.a * a end +mt.__div = function(vls, a) return vls.d.a / a end +mt.__mod = function(vls, a) return vls.d.a % a end +mt.__pow = function(vls, a) return vls.d.a ^ a end +mt.__eq = function(vls, a) return u64(vls.d.a) == a end +mt.__lt = function(vls, a) return u64(vls.d.a) < a end +mt.__le = function(vls, a) return u64(vls.d.a) <= a end +mt.__call = function(vls, a, b) return '__call', vls.d.a .. a .. (b or 'nil') end +mt.__unm = function(vls) return -vls.d.a end +mt.__concat = function(vls, a) return vls.d.a .. a end +mt.__len = function(vls) return vls.d.a end +mt.__tostring = function(vls) return 'string ' .. vls.d.a end + +vls.d.a = 5 +check(vls + 5, 10) +check(vls - 5, 0) +check(vls * 5, 25) +check(vls / 5, 1) +check(vls % 3, 2) +check(vls ^ 3, 125) +check(vls == u64(4), false) +check(vls == u64(5), true) +check(vls == u64(6), false) +check(vls < u64(4), false) +check(vls < u64(5), false) +check(vls < u64(6), true) +check(vls <= u64(4), false) +check(vls <= u64(5), true) +check(vls <= u64(6), true) +check(-vls, -5) +local a,b = vls('6') +check(a, '__call') +check(b, '5.06nil') +check(tostring(vls), 'string 5.0') + +if _VERSION ~= 'Lua 5.1' then + check(vls .. 'str', '5.0str') + check(#vls, 5) +end + +check(tostring(1+3*i), '1.0+3.0i') +check(tostring((1+3*i)*(2+4*i)), '-10.0+10.0i') +check(tostring((3+2*i)*(3-2*i)), '13.0') + +-- Should ignore unknown attributes +ffi.cdef [[ +typedef int ALenum; +__attribute__((dllimport)) void __attribute__((__cdecl__)) alEnable( ALenum capability ); +]] + +check(ffi.sizeof('struct {char foo[alignof(uint64_t)];}'), ffi.alignof('uint64_t')) + +-- Long double is not supported yet but it should be parsed +ffi.cdef('long double foo(long double val);') +check(tostring(ffi.debug().functions.foo):match('ctype(%b<>)'), '') + +ffi.cdef [[ +typedef int byte1 __attribute__(mode(QI)); +typedef int byte2 __attribute__(mode(HI)); +typedef int byte4 __attribute__(mode(SI)); +typedef int byte8 __attribute__(mode(DI)); +typedef unsigned ubyte8 __attribute__(mode(DI)); +typedef int word __attribute__(mode(word)); +typedef int pointer __attribute__(mode(pointer)); +typedef int byte __attribute__(mode(byte)); +typedef float float4 __attribute__(mode(SF)); +typedef float float8 __attribute__(mode(DF)); +]] +assert(ffi.istype('int8_t', ffi.new('byte1'))) +assert(ffi.istype('int16_t', ffi.new('byte2'))) +assert(ffi.istype('int32_t', ffi.new('byte4'))) +assert(ffi.istype('int64_t', ffi.new('byte8'))) +assert(ffi.istype('uint64_t', ffi.new('ubyte8'))) +check(ffi.sizeof('void*'), ffi.sizeof('pointer')) +check(ffi.alignof('void*'), ffi.alignof('pointer')) +check(ffi.sizeof('void*'), ffi.sizeof('word')) +check(ffi.alignof('void*'), ffi.alignof('word')) +assert(ffi.istype('int8_t', ffi.new('byte'))) +assert(ffi.istype('float', ffi.new('float4'))) +assert(ffi.istype('double', ffi.new('float8'))) + +ffi.cdef('void register_foo(register int val);') +check(tostring(ffi.debug().functions.register_foo):match('%b<>'), '') + +ffi.cdef [[ + typedef struct __sFILE FILE; + FILE *fopen(const char * , const char * ) __asm("_" "fopen" ); +]] + +assert(not ffi.istype('int', ffi.new('int*'))) +assert(not ffi.istype('int[]', ffi.new('int*'))) +assert(not ffi.istype('int[3]', ffi.new('int*'))) +assert(not ffi.istype('int[3]', ffi.new('int[2]'))) +assert(ffi.istype('const int[3]', ffi.new('const int[3]'))) +assert(ffi.istype('int[3]', ffi.new('const int[3]'))) + +-- Crazy function pointer that takes an int and a function pointer and returns +-- a function pointer. Type of &signal. +check(tostring(ffi.typeof('void (*foo(int, void(*)(int)))(int)')):match('%b<>'), '') + +-- Make sure we pass all arguments to tonumber +check(tonumber('FE', 16), 0xFE) + +-- Allow casts from pointer to numeric types +ffi.cast('long', ffi.C.NULL) +ffi.cast('int8_t', ffi.C.NULL) +assert(not pcall(function() ffi.new('long', ffi.C.NULL) end)) + +-- ffi.new and ffi.cast allow unpacked struct/arrays +assert(ffi.new('int[3]', 1)[0] == 1) +assert(ffi.new('int[3]', {1})[0] == 1) +assert(ffi.new('int[3]', 1, 2)[1] == 2) +assert(ffi.new('int[3]', {1, 2})[1] == 2) + +ffi.cdef[[ +struct var { + char ch[?]; +}; +]] +local d = ffi.new('char[4]') +local v = ffi.cast('struct var*', d) +v.ch = {1,2,3,4} +assert(v.ch[3] == 4) +v.ch = "bar" +assert(v.ch[3] == 0) +assert(v.ch[2] == string.byte('r')) +assert(d[1] == string.byte('a')) + +ffi.cast('char*', 1) + +-- 2 arg form of ffi.copy +ffi.copy(d, 'bar') + +-- unsigned should be ignored for pointer rules +ffi.cdef[[ +int strncmp(const signed char *s1, const unsigned char *s2, size_t n); +]] +assert(ffi.C.strncmp("two", "three", 3) ~= 0) + +ffi.fill(d, 3, 1) +assert(d[2] == 1) +ffi.fill(d, 3) +assert(d[2] == 0) + +-- tests for __new +ffi.cdef[[ +struct newtest { + int a; + int b; + int c; +}; +]] + +local tp = ffi.metatype("struct newtest", {__new = + function(tp, x, y, z) + tp = ffi.new(tp) + tp.a, tp.b, tp.c = x, y, z + return tp + end}) +local v = tp(1, 2, 3) +assert(v.a == 1 and v.b == 2 and v.c == 3) + +local tp = ffi.metatype("struct newtest", {__new = + function(tp, x, y, z) + tp = ffi.new(tp, {a = x, b = y, c = z}) + return tp + end}) +local v = tp(1, 2, 3) +assert(v.a == 1 and v.b == 2 and v.c == 3) + +-- tests for __pairs and __ipairs; not iterating just testing what is returned +local tp = ffi.metatype("struct newtest", + {__pairs = function(tp) return tp.a, tp.b end, __ipairs = function(tp) return tp.b, tp.c end} +) +local v = tp(1, 2, 3) +x, y = pairs(v) +assert(x == 1 and y == 2) +x, y = ipairs(v) +assert(x == 2 and y == 3) + +-- test for pointer to struct having same metamethods +local st = ffi.cdef "struct ptest {int a, b;};" +local tp = ffi.metatype("struct ptest", {__index = function(s, k) return k end, __len = function(s) return 3 end}) + +local a = tp(1, 2) +assert(a.banana == "banana") +assert(#a == 3) +local b = ffi.new("int[2]") +local c = ffi.cast("struct ptest *", b) +assert(c.banana == "banana") -- should have same methods +assert(#c == 3) + + +print('Done: '..num_ok..'/'..(num_ok+num_err)..' passed')