1 #define LIBFFI_ASM
  2 #include <fficonfig.h>
  3 #include <ffi.h>
  4 #include <ffi_cfi.h>
  5 #include "asmnames.h"
  6 
  7 #if defined(HAVE_AS_CFI_PSEUDO_OP)
  8         .cfi_sections   .debug_frame
  9 #endif
 10 
 11 #ifdef X86_WIN64
 12 #define SEH(...) __VA_ARGS__
 13 #define arg0	rcx
 14 #define arg1	rdx
 15 #define arg2	r8
 16 #define arg3	r9
 17 #else
 18 #define SEH(...)
 19 #define arg0	rdi
 20 #define arg1	rsi
 21 #define arg2	rdx
 22 #define arg3	rcx
 23 #endif
 24 
 25 /* This macro allows the safe creation of jump tables without an
 26    actual table.  The entry points into the table are all 8 bytes.
 27    The use of ORG asserts that we're at the correct location.  */
 28 /* ??? The clang assembler doesn't handle .org with symbolic expressions.  */
 29 #if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
 30 # define E(BASE, X)	ALIGN 8
 31 #else
 32 # define E(BASE, X)	ALIGN 8; ORG BASE + X * 8
 33 #endif
 34 
 35 	.CODE
 36 	extern PLT(C(abort)):near
 37 	extern C(ffi_closure_win64_inner):near
 38 
 39 /* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10)
 40 
 41    Bit o trickiness here -- FRAME is the base of the stack frame
 42    for this function.  This has been allocated by ffi_call.  We also
 43    deallocate some of the stack that has been alloca'd.  */
 44 
 45 	ALIGN	8
 46 	PUBLIC	C(ffi_call_win64)
 47 
 48 	; SEH(.safesh ffi_call_win64)
 49 C(ffi_call_win64) proc SEH(frame)
 50 	cfi_startproc
 51 	/* Set up the local stack frame and install it in rbp/rsp.  */
 52 	mov	RAX, [RSP] ; 	movq	(%rsp), %rax
 53 	mov [arg1], RBP ; movq	%rbp, (arg1)
 54 	mov [arg1 + 8], RAX;	movq	%rax, 8(arg1)
 55 	mov	 RBP, arg1; movq	arg1, %rbp
 56 	cfi_def_cfa(rbp, 16)
 57 	cfi_rel_offset(rbp, 0)
 58 	SEH(.pushreg rbp)
 59 	SEH(.setframe rbp, 0)
 60 	SEH(.endprolog)
 61 	mov	RSP, arg0 ;	movq	arg0, %rsp
 62 
 63 	mov	R10, arg2 ; movq	arg2, %r10
 64 
 65 	/* Load all slots into both general and xmm registers.  */
 66 	mov	RCX, [RSP] ;	movq	(%rsp), %rcx
 67 	movsd XMM0, qword ptr [RSP] ; movsd	(%rsp), %xmm0
 68 	mov	RDX, [RSP + 8] ;movq	8(%rsp), %rdx
 69 	movsd XMM1, qword ptr [RSP + 8];	movsd	8(%rsp), %xmm1
 70 	mov R8, [RSP + 16] ; movq	16(%rsp), %r8
 71 	movsd	XMM2, qword ptr [RSP + 16] ; movsd	16(%rsp), %xmm2
 72 	mov	R9, [RSP + 24] ; movq	24(%rsp), %r9
 73 	movsd	XMM3, qword ptr [RSP + 24] ;movsd	24(%rsp), %xmm3
 74 
 75 	CALL qword ptr [RBP + 16] ; call	*16(%rbp)
 76 
 77 	mov	 ECX, [RBP + 24] ; movl	24(%rbp), %ecx
 78 	mov	R8, [RBP + 32] ; movq	32(%rbp), %r8
 79 	LEA	R10, ffi_call_win64_tab ; leaq	0f(%rip), %r10
 80 	CMP	ECX, FFI_TYPE_SMALL_STRUCT_4B ; cmpl	$FFI_TYPE_SMALL_STRUCT_4B, %ecx
 81 	LEA	R10, [R10 + RCX*8] ; leaq	(%r10, %rcx, 8), %r10
 82 	JA	L99 ; ja	99f
 83 	JMP	R10 ; jmp	*%r10
 84 
 85 /* Below, we're space constrained most of the time.  Thus we eschew the
 86    modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes).  */
 87 epilogue macro
 88 	LEAVE
 89 	cfi_remember_state
 90 	cfi_def_cfa(rsp, 8)
 91 	cfi_restore(rbp)
 92 	RET
 93 	cfi_restore_state
 94 endm
 95 
 96 	ALIGN 8
 97 ffi_call_win64_tab LABEL NEAR
 98 E(0b, FFI_TYPE_VOID)
 99 	epilogue
100 E(0b, FFI_TYPE_INT)
101 	movsxd rax, eax ; movslq	%eax, %rax
102 	mov qword ptr [r8], rax; movq	%rax, (%r8)
103 	epilogue
104 E(0b, FFI_TYPE_FLOAT)
105 	movss dword ptr [r8], xmm0 ; movss	%xmm0, (%r8)
106 	epilogue
107 E(0b, FFI_TYPE_DOUBLE)
108 	movsd qword ptr[r8], xmm0; movsd	%xmm0, (%r8)
109 	epilogue
110 E(0b, FFI_TYPE_LONGDOUBLE)
111 	call	PLT(C(abort))
112 E(0b, FFI_TYPE_UINT8)
113 	movzx eax, al ;movzbl	%al, %eax
114 	mov qword ptr[r8], rax; movq	%rax, (%r8)
115 	epilogue
116 E(0b, FFI_TYPE_SINT8)
117 	movsx rax, al ; movsbq	%al, %rax
118 	jmp	L98
119 E(0b, FFI_TYPE_UINT16)
120 	movzx eax, ax ; movzwl	%ax, %eax
121 	mov qword ptr[r8], rax; movq	%rax, (%r8)
122 	epilogue
123 E(0b, FFI_TYPE_SINT16)
124 	movsx rax, ax; movswq	%ax, %rax
125 	jmp	L98
126 E(0b, FFI_TYPE_UINT32)
127 	mov eax, eax; movl	%eax, %eax
128 	mov qword ptr[r8], rax ; movq	%rax, (%r8)
129 	epilogue
130 E(0b, FFI_TYPE_SINT32)
131 	movsxd rax, eax; movslq	%eax, %rax
132 	mov qword ptr [r8], rax; movq	%rax, (%r8)
133 	epilogue
134 E(0b, FFI_TYPE_UINT64)
135 L98 LABEL near
136 	mov qword ptr [r8], rax ; movq	%rax, (%r8)
137 	epilogue
138 E(0b, FFI_TYPE_SINT64)
139 	mov qword ptr [r8], rax;movq	%rax, (%r8)
140 	epilogue
141 E(0b, FFI_TYPE_STRUCT)
142 	epilogue
143 E(0b, FFI_TYPE_POINTER)
144 	mov qword ptr [r8], rax ;movq	%rax, (%r8)
145 	epilogue
146 E(0b, FFI_TYPE_COMPLEX)
147 	call	PLT(C(abort))
148 E(0b, FFI_TYPE_SMALL_STRUCT_1B)
149 	mov byte ptr [r8], al ; movb	%al, (%r8)
150 	epilogue
151 E(0b, FFI_TYPE_SMALL_STRUCT_2B)
152 	mov word ptr [r8], ax ; movw	%ax, (%r8)
153 	epilogue
154 E(0b, FFI_TYPE_SMALL_STRUCT_4B)
155 	mov dword ptr [r8], eax ; movl	%eax, (%r8)
156 	epilogue
157 
158 	align	8
159 L99 LABEL near
160 	call	PLT(C(abort))
161 
162 	epilogue
163 
164 	cfi_endproc
165 	C(ffi_call_win64) endp
166 
167 
168 /* 32 bytes of outgoing register stack space, 8 bytes of alignment,
169    16 bytes of result, 32 bytes of xmm registers.  */
170 #define ffi_clo_FS	(32+8+16+32)
171 #define ffi_clo_OFF_R	(32+8)
172 #define ffi_clo_OFF_X	(32+8+16)
173 
174 	align	8
175 	PUBLIC	C(ffi_go_closure_win64)
176 
177 C(ffi_go_closure_win64) proc
178 	cfi_startproc
179 	/* Save all integer arguments into the incoming reg stack space.  */
180 	mov qword ptr [rsp + 8], rcx; movq	%rcx, 8(%rsp)
181 	mov qword ptr [rsp + 16], rdx; movq	%rdx, 16(%rsp)
182 	mov qword ptr [rsp + 24], r8; movq	%r8, 24(%rsp)
183 	mov qword ptr [rsp + 32], r9 ;movq	%r9, 32(%rsp)
184 
185 	mov rcx, qword ptr [r10 + 8]; movq	8(%r10), %rcx			/* load cif */
186 	mov rdx, qword ptr [r10 + 16];  movq	16(%r10), %rdx			/* load fun */
187 	mov r8, r10 ; movq	%r10, %r8			/* closure is user_data */
188 	jmp	ffi_closure_win64_2
189 	cfi_endproc
190 	C(ffi_go_closure_win64) endp
191 
192 	align	8
193 
194 PUBLIC C(ffi_closure_win64)
195 C(ffi_closure_win64) PROC FRAME
196 	cfi_startproc
197 	/* Save all integer arguments into the incoming reg stack space.  */
198 	mov qword ptr [rsp + 8], rcx; movq	%rcx, 8(%rsp)
199 	mov qword ptr [rsp + 16], rdx;	movq	%rdx, 16(%rsp)
200 	mov qword ptr [rsp + 24], r8; 	movq	%r8, 24(%rsp)
201 	mov qword ptr [rsp + 32], r9;	movq	%r9, 32(%rsp)
202 
203 	mov rcx, qword ptr [FFI_TRAMPOLINE_SIZE + r10]	;movq	FFI_TRAMPOLINE_SIZE(%r10), %rcx		/* load cif */
204 	mov rdx, qword ptr [FFI_TRAMPOLINE_SIZE + 8 + r10] ;	movq	FFI_TRAMPOLINE_SIZE+8(%r10), %rdx	/* load fun */
205 	mov r8, qword ptr [FFI_TRAMPOLINE_SIZE+16+r10] ;movq	FFI_TRAMPOLINE_SIZE+16(%r10), %r8	/* load user_data */
206 ffi_closure_win64_2 LABEL near
207 	sub rsp, ffi_clo_FS ;subq	$ffi_clo_FS, %rsp
208 	cfi_adjust_cfa_offset(ffi_clo_FS)
209 	SEH(.allocstack ffi_clo_FS)
210 	SEH(.endprolog)
211 
212 	/* Save all sse arguments into the stack frame.  */
213 	movsd qword ptr [ffi_clo_OFF_X + rsp], xmm0	; movsd	%xmm0, ffi_clo_OFF_X(%rsp)
214 	movsd qword ptr [ffi_clo_OFF_X+8+rsp], xmm1 ; movsd	%xmm1, ffi_clo_OFF_X+8(%rsp)
215 	movsd qword ptr [ffi_clo_OFF_X+16+rsp], xmm2 ; movsd %xmm2, ffi_clo_OFF_X+16(%rsp)
216 	movsd qword ptr [ffi_clo_OFF_X+24+rsp], xmm3 ; movsd %xmm3, ffi_clo_OFF_X+24(%rsp)
217 
218 	lea	r9, [ffi_clo_OFF_R + rsp] ; leaq	ffi_clo_OFF_R(%rsp), %r9
219 	call C(ffi_closure_win64_inner)
220 
221 	/* Load the result into both possible result registers.  */
222 
223 	mov rax, qword ptr [ffi_clo_OFF_R + rsp] ;movq    ffi_clo_OFF_R(%rsp), %rax
224 	movsd xmm0, qword ptr [rsp + ffi_clo_OFF_R] ;movsd   ffi_clo_OFF_R(%rsp), %xmm0
225 
226 	add rsp, ffi_clo_FS ;addq	$ffi_clo_FS, %rsp
227 	cfi_adjust_cfa_offset(-ffi_clo_FS)
228 	ret
229 
230 	cfi_endproc
231 	C(ffi_closure_win64) endp
232 
233 #if defined __ELF__ && defined __linux__
234 	.section	.note.GNU-stack,"",@progbits
235 #endif
236 _text ends
237 end