Untitled diff

Created Diff never expires
131 removals
307 lines
127 additions
305 lines
/*
/*
* ia32abicc.c
* x64win64ia32abicc.c
*
*
* Support for Call-outs and Call-backs from the Plugin.
* Support for Call-outs and Call-backs from the Plugin on x86_64 on Windows.
* Written by Eliot Miranda 11/07.
* here referred to as x64win64ABI
*/
*/


/* null if compiled on other than x86, to get around gnu make bugs or
/* null if compiled on other than x64, to get around gnu make bugs or
* misunderstandings on our part.
* misunderstandings on our part.
*/
*/
#if i386|i486|i586|i686
#if x86_64|x64|__x86_64|__x86_64__|_M_AMD64|_M_X64


#if defined(_MSC_VER) || defined(__MINGW32__)
#if defined(_MSC_VER) || defined(__MINGW32__)
# include "windows.h" /* for GetSystemInfo & VirtualAlloc */
# include "windows.h" /* for GetSystemInfo & VirtualAlloc */
#elif __APPLE__ && __MACH__
# include <sys/mman.h> /* for mprotect */
# if OBJC_DEBUG /* define this to get debug info for struct objc_class et al */
# include <objc/objc.h>
# include <objc/objc-class.h>

struct objc_class *baz;

void setbaz(void *p) { baz = p; }
void *getbaz() { return baz; }
# endif
# include <stdlib.h> /* for valloc */
# include <sys/mman.h> /* for mprotect */
#else
#else
# include <stdlib.h> /* for valloc */
# error Non windows should use the SystemV ABI, not the win64 ABI
# include <sys/mman.h> /* for mprotect */
#endif
#endif


# include <stdlib.h> /* for valloc */
#include <string.h> /* for memcpy et al */
#include <string.h> /* for memcpy et al */
#include <setjmp.h>
#include <setjmp.h>
#include <stdio.h> /* for fprintf(stderr,...) */
#include <stdio.h> /* for fprintf(stderr,...) */


#include "sqMemoryAccess.h"
#include "sqMemoryAccess.h"
#include "vmCallback.h"
#include "vmCallback.h"
#include "sqAssert.h"
#include "sqAssert.h"
#include "sqVirtualMachine.h"
#include "sqVirtualMachine.h"
#include "ia32abi.h"
#include "ia32abi.h"


#if !defined(min)
#if !defined(min)
# define min(a,b) ((a) < (b) ? (a) : (b))
# define min(a,b) ((a) < (b) ? (a) : (b))
#endif
#endif


#define NUM_REG_ARGS 4

#ifdef SQUEAK_BUILTIN_PLUGIN
#ifdef SQUEAK_BUILTIN_PLUGIN
extern
extern
#endif
#endif
struct VirtualMachine* interpreterProxy;
struct VirtualMachine* interpreterProxy;


#ifdef _MSC_VER
# define alloca _alloca
#endif
#if __GNUC__
#if __GNUC__
# define setsp(sp) asm volatile ("movl %0,%%esp" : : "m"(sp))
# define setsp(sp) asm volatile ("movq %0,%%rsp" : : "m"(sp))
# define getsp() ({ void *sp; asm volatile ("movl %%esp,%0" : "=r"(sp) : ); sp;})
# define getsp() ({ void *sp; asm volatile ("movq %%rsp,%0" : "=r"(sp) : ); sp;})
#endif
#if __APPLE__ && __MACH__ && __i386__
# define STACK_ALIGN_BYTES 16
#elif __linux__ && __i386__
# define STACK_ALIGN_BYTES 16
#elif defined(_WIN32) && __SSE2__
/* using sse2 instructions requires 16-byte stack alignment but on win32 there's
* no guarantee that libraries preserve alignment so compensate on callback.
*/
# define STACK_ALIGN_HACK 1
# define STACK_ALIGN_BYTES 16
#endif
#endif
#define STACK_ALIGN_BYTES 32 /* 32 if a 256-bit argument is passed; 16 otherwise */


#if !defined(setsp)
#if !defined(setsp)
# define setsp(ignored) 0
# define setsp(ignored) 0
#endif
#endif


#define moduloPOT(m,v) (((v)+(m)-1) & ~((m)-1))
#define RoundUpPowerOfTwo(value, modulus) \
#define alignModuloPOT(m,v) ((void *)moduloPOT(m,(unsigned long)(v)))
(((value) + (modulus) - 1) & ~((modulus) - 1))

#define IsAlignedPowerOfTwo(value, modulus) \
(((value) & ((modulus) - 1)) == 0)


#define objIsAlien(anOop) (interpreterProxy->includesBehaviorThatOf(interpreterProxy->fetchClassOf(anOop), interpreterProxy->classAlien()))
#define objIsAlien(anOop) (interpreterProxy->includesBehaviorThatOf(interpreterProxy->fetchClassOf(anOop), interpreterProxy->classAlien()))
#define objIsUnsafeAlien(anOop) (interpreterProxy->includesBehaviorThatOf(interpreterProxy->fetchClassOf(anOop), interpreterProxy->classUnsafeAlien()))
#define objIsUnsafeAlien(anOop) (interpreterProxy->includesBehaviorThatOf(interpreterProxy->fetchClassOf(anOop), interpreterProxy->classUnsafeAlien()))


#define sizeField(alien) (*(long *)pointerForOop((sqInt)(alien) + BaseHeaderSize))
#define sizeField(alien) (*(long long *)pointerForOop((sqInt)(alien) + BaseHeaderSize))
#define dataPtr(alien) pointerForOop((sqInt)(alien) + BaseHeaderSize + BytesPerOop)
#define dataPtr(alien) pointerForOop((sqInt)(alien) + BaseHeaderSize + BytesPerOop)
#if 0 /* obsolete after adding pointer Aliens with size field == 0 */
# define isIndirectOrPointer(alien) (sizeField(alien) <= 0)
# define startOfData(alien) (isIndirectOrPointer(alien) \
? *(void **)dataPtr(alien) \
: (void *)dataPtr(alien))
#endif
#define isIndirect(alien) (sizeField(alien) < 0)
#define isIndirect(alien) (sizeField(alien) < 0)
#define startOfParameterData(alien) (isIndirect(alien) \
#define startOfParameterData(alien) (isIndirect(alien) \
? *(void **)dataPtr(alien) \
? *(void **)dataPtr(alien) \
: (void *)dataPtr(alien))
: (void *)dataPtr(alien))
#define isIndirectSize(size) ((size) < 0)
#define isIndirectSize(size) ((size) < 0)
#define startOfDataWithSize(alien,size) (isIndirectSize(size) \
#define startOfDataWithSize(alien,size) (isIndirectSize(size) \
? *(void **)dataPtr(alien) \
? *(void **)dataPtr(alien) \
: (void *)dataPtr(alien))
: (void *)dataPtr(alien))


#define isSmallInt(oop) ((oop)&1)
#define isSmallInt(oop) (((oop)&7)==1)
#define intVal(oop) (((long)(oop))>>1)
#define intVal(oop) (((long long)(oop))>>3)

extern void loadFloatRegs(double,double,double,double);

typedef union {
long long i;
double d;
} int64_or_double;


/*
/*
* Call a foreign function that answers an integral result in %eax (and
* Call a foreign function that answers an integral result in %rax
* possibly %edx) according to IA32-ish ABI rules.
* according to x64-ish ABI rules.
*/
*/
sqInt
sqInt callIA32IntegralReturn(SIGNATURE) {
callIA32IntegralReturn(SIGNATURE) {
long long (*f0)(long long rcx, long long rdx, long long r8, long long r9);
#ifdef _MSC_VER
long long (*f1)(double xmm0, long long rdx, long long r8, long long r9);
__int64 (*f)(), r;
long long (*f2)(long long rcx, double xmm1, long long r8, long long r9);
#else
long long (*f3)(double xmm0, double xmm1, long long r8, long long r9);
long long (*f)(), r;
long long (*f4)(long long rcx, long long rdx, double xmm2, long long r9);
#endif
long long (*f5)(double xmm0, long long rdx, double xmm2, long long r9);
#include "dabusiness.h"
long long (*f6)(long long rcx, double xmm1, double xmm2, long long r9);
long long (*f7)(double xmm0, double xmm1, double xmm2, long long r9);
long long (*f8)(long long rcx, long long rdx, long long r8, double xmm3);
long long (*f9)(double xmm0, long long rdx, long long r8, double xmm3);
long long (*fA)(long long rcx, double xmm1, long long r8, double xmm3);
long long (*fB)(double xmm0, double xmm1, long long r8, double xmm3);
long long (*fC)(long long rcx, long long rdx, double xmm2, double xmm3);
long long (*fD)(double xmm0, long long rdx, double xmm2, double xmm3);
long long (*fE)(long long rcx, double xmm1, double xmm2, double xmm3);
long long (*fF)(double xmm0, double xmm1, double xmm2, double xmm3);
long long r;
#include "dax64win64business.h"
}
}


/*
/*
* Call a foreign function that answers a single-precision floating-point
* Call a foreign function that answers a single-precision floating-point
* result in %f0 according to IA32-ish ABI rules.
* result in %xmm0 according to x64-ish ABI rules.
*/
*/
sqInt
sqInt callIA32FloatReturn(SIGNATURE) {
callIA32FloatReturn(SIGNATURE) { float (*f)(), r;
float (*f0)(long long rcx, long long rdx, long long r8, long long r9);
#include "dabusiness.h"
float (*f1)(double xmm0, long long rdx, long long r8, long long r9);
float (*f2)(long long rcx, double xmm1, long long r8, long long r9);
float (*f3)(double xmm0, double xmm1, long long r8, long long r9);
float (*f4)(long long rcx, long long rdx, double xmm2, long long r9);
float (*f5)(double xmm0, long long rdx, double xmm2, long long r9);
float (*f6)(long long rcx, double xmm1, double xmm2, long long r9);
float (*f7)(double xmm0, double xmm1, double xmm2, long long r9);
float (*f8)(long long rcx, long long rdx, long long r8, double xmm3);
float (*f9)(double xmm0, long long rdx, long long r8, double xmm3);
float (*fA)(long long rcx, double xmm1, long long r8, double xmm3);
float (*fB)(double xmm0, double xmm1, long long r8, double xmm3);
float (*fC)(long long rcx, long long rdx, double xmm2, double xmm3);
float (*fD)(double xmm0, long long rdx, double xmm2, double xmm3);
float (*fE)(long long rcx, double xmm1, double xmm2, double xmm3);
float (*fF)(double xmm0, double xmm1, double xmm2, double xmm3);
float r;
#include "dax64win64business.h"
}
}


/*
/*
* Call a foreign function that answers a double-precision floating-point
* Call a foreign function that answers a double-precision floating-point
* result in %f0 according to IA32-ish ABI rules.
* result in %xmm0 according to x64-ish ABI rules.
*/
*/
sqInt
sqInt callIA32DoubleReturn(SIGNATURE) {
callIA32DoubleReturn(SIGNATURE) { double (*f)(), r;
double (*f0)(long long rcx, long long rdx, long long r8, long long r9);
#include "dabusiness.h"
double (*f1)(double xmm0, long long rdx, long long r8, long long r9);
double (*f2)(long long rcx, double xmm1, long long r8, long long r9);
double (*f3)(double xmm0, double xmm1, long long r8, long long r9);
double (*f4)(long long rcx, long long rdx, double xmm2, long long r9);
double (*f5)(double xmm0, long long rdx, double xmm2, long long r9);
double (*f6)(long long rcx, double xmm1, double xmm2, long long r9);
double (*f7)(double xmm0, double xmm1, double xmm2, long long r9);
double (*f8)(long long rcx, long long rdx, long long r8, double xmm3);
double (*f9)(double xmm0, long long rdx, long long r8, double xmm3);
double (*fA)(long long rcx, double xmm1, long long r8, double xmm3);
double (*fB)(double xmm0, double xmm1, long long r8, double xmm3);
double (*fC)(long long rcx, long long rdx, double xmm2, double xmm3);
double (*fD)(double xmm0, long long rdx, double xmm2, double xmm3);
double (*fE)(long long rcx, double xmm1, double xmm2, double xmm3);
double (*fF)(double xmm0, double xmm1, double xmm2, double xmm3);
double r;
#include "dax64win64business.h"
}
}


/* Queueing order for callback returns. To ensure that callback returns occur
/* Queueing order for callback returns. To ensure that callback returns occur
* in LIFO order we provide mostRecentCallbackContext which is tested by the
* in LIFO order we provide mostRecentCallbackContext which is tested by the
* return primitive primReturnFromContextThrough. Note that in the threaded VM
* return primitive primReturnFromContextThrough. Note that in the threaded VM
* this does not have to be thread-specific or locked since it is within the
* this does not have to be thread-specific or locked since it is within the
* bounds of the ownVM/disownVM pair.
* bounds of the ownVM/disownVM pair.
*/
*/
static VMCallbackContext *mostRecentCallbackContext = 0;
static VMCallbackContext *mostRecentCallbackContext = 0;


VMCallbackContext *
VMCallbackContext *
getMostRecentCallbackContext() { return mostRecentCallbackContext; }
getMostRecentCallbackContext() { return mostRecentCallbackContext; }


#define getRMCC(t) mostRecentCallbackContext
#define getRMCC(t) mostRecentCallbackContext
#define setRMCC(t) (mostRecentCallbackContext = (void *)(t))
#define setRMCC(t) (mostRecentCallbackContext = (void *)(t))


/*
/*
* Entry-point for call-back thunks. Args are thunk address and stack pointer,
* Entry-point for call-back thunks. Args are thunk address and stack pointer,
* where the stack pointer is pointing one word below the return address of the
* where the stack pointer is pointing one word below the return address of the
* thunk's callee, 4 bytes below the thunk's first argument. The stack is:
* thunk's callee, 4 bytes below the thunk's first argument. The stack is:
* callback
* callback
* arguments
* arguments
* retpc (thunk) <--\
* retpc (thunk) <--\
* address of retpc-/ <--\
* address of retpc-/ <--\
* address of address of ret pc-/
* address of address of ret pc-/
* thunkp
* thunkp
* esp->retpc (thunkEntry)
* space for saving 4 registers rcx,rdx,r8,r9
*
* rsp->retpc (thunkEntry)
* The stack pointer is pushed twice to keep the stack alignment to 16 bytes, a
* requirement on platforms using SSE2 such as Mac OS X, and harmless elsewhere.
*
*
* This function's roles are to use setjmp/longjmp to save the call point
* This function's roles are to use setjmp/longjmp to save the call point
* and return to it, to correct C stack pointer alignment if necessary (see
* and return to it, and to return any of the various values from the callback.
* STACK_ALIGN_HACK), and to return any of the various values from the callback.
*
*
* Looking forward to support for x86-64, which typically has 6 register
* To support x86-64, which has 4 register arguments (int or floating-point)
* arguments, the function would take 8 arguments, the 6 register args as
* the function takes 6 arguments, the 4 register args as long longs,
* longs, followed by the thunkp and stackp passed on the stack. The register
* followed by the thunkp and stackp passed on the stack. The register
* args would get copied into a struct on the stack. A pointer to the struct
* args get copied into a struct on the stack. A pointer to the struct is then
* is then passed as an element of the VMCallbackContext.
* passed as an element of the VMCallbackContext.
*/
*/
long

thunkEntry(void *thunkp, sqIntptr_t *stackp)
long long
thunkEntry(long long rcx, long long rdx,
long long r8, long long r9,
void *thunkp, sqIntptr_t *stackp)
{
{
VMCallbackContext vmcc;
VMCallbackContext vmcc;
VMCallbackContext *previousCallbackContext;
VMCallbackContext *previousCallbackContext;
int flags, returnType;
long long flags, returnType;
long long intargs[4];
double fpargs[4];


#if STACK_ALIGN_HACK
intargs[0] = rcx;
{ void *sp = getsp();
intargs[1] = rdx;
int offset = (unsigned long)sp & (STACK_ALIGN_BYTES - 1);
intargs[2] = r8;
if (offset) {
intargs[3] = r9;
# if _MSC_VER
_asm sub esp, dword ptr offset;
extern void saveFloatRegsWin64(long long xmm0,long long xmm1,long long xmm2, long long xmm3,double *fpargs); /* fake passing long long args */
# elif __GNUC__
saveFloatRegsWin64(rcx,rdx,r8,r9,fpargs); /* the callee expects double parameters that it will retrieve thru registers */
asm("sub %0,%%esp" : : "m"(offset));
# else
# error need to subtract offset from esp
# endif
sp = getsp();
assert(!((unsigned long)sp & (STACK_ALIGN_BYTES - 1)));
}
}
#endif /* STACK_ALIGN_HACK */


if ((flags = interpreterProxy->ownVM(0)) < 0) {
if ((flags = interpreterProxy->ownVM(0)) < 0) {
fprintf(stderr,"Warning; callback failed to own the VM\n");
fprintf(stderr,"Warning; callback failed to own the VM\n");
return -1;
return -1;
}
}


if (!(returnType = setjmp(vmcc.trampoline))) {
if (!(returnType = setjmp(vmcc.trampoline))) {
previousCallbackContext = getRMCC();
previousCallbackContext = getRMCC();
setRMCC(&vmcc);
setRMCC(&vmcc);
vmcc.thunkp = thunkp;
vmcc.thunkp = thunkp;
vmcc.stackp = stackp + 2; /* skip address of retpc & retpc (thunk) */
vmcc.stackp = stackp + 2; /* skip address of retpc & retpc (thunk) */
vmcc.intregargsp = 0;
vmcc.intregargsp = intargs;
vmcc.floatregargsp = 0;
vmcc.floatregargsp = fpargs;
interpreterProxy->sendInvokeCallbackContext(&vmcc);
interpreterProxy->sendInvokeCallbackContext(&vmcc);
fprintf(stderr,"Warning; callback failed to invoke\n");
fprintf(stderr,"Warning; callback failed to invoke\n");
setRMCC(previousCallbackContext);
setRMCC(previousCallbackContext);
interpreterProxy->disownVM(flags);
interpreterProxy->disownVM(flags);
return -1;
return -1;
}
}
setRMCC(previousCallbackContext);
setRMCC(previousCallbackContext);
interpreterProxy->disownVM(flags);
interpreterProxy->disownVM(flags);


switch (returnType) {
switch (returnType) {


case retword: return vmcc.rvs.valword;
case retword: return vmcc.rvs.valword;


case retword64: {
case retword64: return (((unsigned long long)vmcc.rvs.valleint64.high) << 32) | (unsigned int)vmcc.rvs.valleint64.low;
long vhigh = vmcc.rvs.valleint64.high;
#if _MSC_VER
_asm mov edx, dword ptr vhigh;
#elif __GNUC__
asm("mov %0,%%edx" : : "m"(vhigh));
#else
# error need to load edx with vmcc.rvs.valleint64.high on this compiler
#endif
return vmcc.rvs.valleint64.low;
}


case retdouble: {
case retdouble:
double valflt64 = vmcc.rvs.valflt64;
fakeReturnDouble( vmcc.rvs.valflt64 );
#if _MSC_VER
return 0;
_asm fld qword ptr valflt64;
#elif __GNUC__
asm("fldl %0" : : "m"(valflt64));
#else
# error need to load %f0 with vmcc.rvs.valflt64 on this compiler
#endif
return 0;
}


case retstruct: memcpy( (void *)(stackp[1]),
case retstruct: memcpy( (void *)(stackp[1]),
vmcc.rvs.valstruct.addr,
vmcc.rvs.valstruct.addr,
vmcc.rvs.valstruct.size);
vmcc.rvs.valstruct.size);
return stackp[1];
return stackp[1];
}
}
fprintf(stderr,"Warning; invalid callback return type\n");
fprintf(stderr,"Warning; invalid callback return type\n");
return 0;
return 0;
}
}


/*
/*
* Thunk allocation support. Since thunks must be executable and some OSs
* Thunk allocation support. Since thunks must be executable and some OSs
* may not provide default execute permission on memory returned by malloc
* may not provide default execute permission on memory returned by malloc
* we must provide memory that is guaranteed to be executable. The abstraction
* we must provide memory that is guaranteed to be executable. The abstraction
* is to answer an Alien that references an executable piece of memory that
* is to answer an Alien that references an executable piece of memory that
* is some (possiby unitary) multiple of the pagesize.
* is some (possiby unitary) multiple of the pagesize.
*
*
* We assume the Smalltalk image code will manage subdividing the executable
* We assume the Smalltalk image code will manage subdividing the executable
* page amongst thunks so there is no need to free these pages, since the image
* page amongst thunks so there is no need to free these pages, since the image
* will recycle parts of the page for reclaimed thunks.
* will recycle parts of the page for reclaimed thunks.
*/
*/
#if defined(_MSC_VER) || defined(__MINGW32__)
#if defined(_MSC_VER) || defined(__MINGW32__)
static unsigned long pagesize = 0;
static unsigned long pagesize = 0;
#endif
#endif


void *
void *
allocateExecutablePage(long *size)
allocateExecutablePage(long *size)
{
{
void *mem;
void *mem;


#if defined(_MSC_VER) || defined(__MINGW32__)
#if defined(_MSC_VER) || defined(__MINGW32__)
#if !defined(MEM_TOP_DOWN)
#if !defined(MEM_TOP_DOWN)
# define MEM_TOP_DOWN 0x100000
# define MEM_TOP_DOWN 0x100000
#endif
#endif
if (!pagesize) {
if (!pagesize) {
SYSTEM_INFO sysinf;
SYSTEM_INFO sysinf;


GetSystemInfo(&sysinf);
GetSystemInfo(&sysinf);


pagesize = sysinf.dwPageSize;
pagesize = sysinf.dwPageSize;
}
}
/* N.B. VirtualAlloc MEM_COMMIT initializes the memory returned to zero. */
/* N.B. VirtualAlloc MEM_COMMIT initializes the memory returned to zero. */
mem = VirtualAlloc( 0,
mem = VirtualAlloc( 0,
pagesize,
pagesize,
MEM_COMMIT | MEM_TOP_DOWN,
MEM_COMMIT | MEM_TOP_DOWN,
PAGE_EXECUTE_READWRITE);
PAGE_EXECUTE_READWRITE);
if (mem)
if (mem)
*size = pagesize;
*size = pagesize;
#else
#else
long pagesize = getpagesize();
long long pagesize = getpagesize();


if (!(mem = valloc(pagesize)))
if (!(mem = valloc(pagesize)))
return 0;
return 0;


memset(mem, 0, pagesize);
memset(mem, 0, pagesize);
if (mprotect(mem, pagesize, PROT_READ | PROT_WRITE | PROT_EXEC) < 0) {
if (mprotect(mem, pagesize, PROT_READ | PROT_WRITE | PROT_EXEC) < 0) {
free(mem);
free(mem);
return 0;
return 0;
}
}
*size = pagesize;
*size = pagesize;
#endif
#endif
return mem;
return mem;
}
}
#endif /* i386|i486|i586|i686 */
#endif /* x86_64|x64|__x86_64|__x86_64__ */