1 /* -----------------------------------------------------------------------
  2    sysv.S - Copyright (c) 2017  Anthony Green
  3           - Copyright (c) 2013  The Written Word, Inc.
  4           - Copyright (c) 1996,1998,2001-2003,2005,2008,2010  Red Hat, Inc.
  5 
  6    X86 Foreign Function Interface
  7 
  8    Permission is hereby granted, free of charge, to any person obtaining
  9    a copy of this software and associated documentation files (the
 10    ``Software''), to deal in the Software without restriction, including
 11    without limitation the rights to use, copy, modify, merge, publish,
 12    distribute, sublicense, and/or sell copies of the Software, and to
 13    permit persons to whom the Software is furnished to do so, subject to
 14    the following conditions:
 15 
 16    The above copyright notice and this permission notice shall be included
 17    in all copies or substantial portions of the Software.
 18 
 19    THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
 20    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 21    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 22    NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 23    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 24    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 25    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 26    DEALINGS IN THE SOFTWARE.
 27    ----------------------------------------------------------------------- */
 28 
 29 #ifndef __x86_64__
 30 #ifdef _MSC_VER
 31 
 32 #define LIBFFI_ASM
 33 #include <fficonfig.h>
 34 #include <ffi.h>
 35 #include <ffi_cfi.h>
 36 #include "internal.h"
 37 
 38 #define C2(X, Y)  X ## Y
 39 #define C1(X, Y)  C2(X, Y)
 40 #define L(X)     C1(L, X)
 41 # define ENDF(X) X ENDP
 42 
 43 /* This macro allows the safe creation of jump tables without an
 44    actual table.  The entry points into the table are all 8 bytes.
 45    The use of ORG asserts that we're at the correct location.  */
 46 /* ??? The clang assembler doesn't handle .org with symbolic expressions.  */
 47 #if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
 48 # define E(BASE, X)	ALIGN 8
 49 #else
 50 # define E(BASE, X)	ALIGN 8; ORG BASE + X * 8
 51 #endif
 52 
 53     .686P
 54     .MODEL FLAT
 55 
 56 EXTRN	@ffi_closure_inner@8:PROC
 57 _TEXT SEGMENT
 58 
 59 /* This is declared as
 60 
 61    void ffi_call_i386(struct call_frame *frame, char *argp)
 62         __attribute__((fastcall));
 63 
 64    Thus the arguments are present in
 65 
 66         ecx: frame
 67         edx: argp
 68 */
 69 
 70 ALIGN 16
 71 PUBLIC @ffi_call_i386@8
 72 @ffi_call_i386@8 PROC
 73 L(UW0):
 74 	cfi_startproc
 75  #if !HAVE_FASTCALL
 76 	mov	    ecx, [esp+4]
 77 	mov 	edx, [esp+8]
 78  #endif
 79 	mov	    eax, [esp]		/* move the return address */
 80 	mov	    [ecx], ebp		/* store ebp into local frame */
 81 	mov 	[ecx+4], eax	/* store retaddr into local frame */
 82 
 83 	/* New stack frame based off ebp.  This is a itty bit of unwind
 84 	   trickery in that the CFA *has* changed.  There is no easy way
 85 	   to describe it correctly on entry to the function.  Fortunately,
 86 	   it doesn't matter too much since at all points we can correctly
 87 	   unwind back to ffi_call.  Note that the location to which we
 88 	   moved the return address is (the new) CFA-4, so from the
 89 	   perspective of the unwind info, it hasn't moved.  */
 90 	mov 	ebp, ecx
 91 L(UW1):
 92 	// cfi_def_cfa(%ebp, 8)
 93 	// cfi_rel_offset(%ebp, 0)
 94 
 95 	mov 	esp, edx		/* set outgoing argument stack */
 96 	mov 	eax, [20+R_EAX*4+ebp]	/* set register arguments */
 97 	mov 	edx, [20+R_EDX*4+ebp]
 98 	mov	    ecx, [20+R_ECX*4+ebp]
 99 
100 	call	dword ptr [ebp+8]
101 
102 	mov	    ecx, [12+ebp]		/* load return type code */
103 	mov 	[ebp+8], ebx		/* preserve %ebx */
104 L(UW2):
105 	// cfi_rel_offset(%ebx, 8)
106 
107 	and 	ecx, X86_RET_TYPE_MASK
108 	lea 	ebx, [L(store_table) + ecx * 8]
109 	mov 	ecx, [ebp+16]		/* load result address */
110 	jmp	    ebx
111 
112 	ALIGN	8
113 L(store_table):
114 E(L(store_table), X86_RET_FLOAT)
115 	fstp	DWORD PTR [ecx]
116 	jmp	L(e1)
117 E(L(store_table), X86_RET_DOUBLE)
118 	fstp	QWORD PTR [ecx]
119 	jmp	L(e1)
120 E(L(store_table), X86_RET_LDOUBLE)
121 	fstp	QWORD PTR [ecx]
122 	jmp	L(e1)
123 E(L(store_table), X86_RET_SINT8)
124 	movsx	eax, al
125 	mov	[ecx], eax
126 	jmp	L(e1)
127 E(L(store_table), X86_RET_SINT16)
128 	movsx	eax, ax
129 	mov	[ecx], eax
130 	jmp	L(e1)
131 E(L(store_table), X86_RET_UINT8)
132 	movzx	eax, al
133 	mov	[ecx], eax
134 	jmp	L(e1)
135 E(L(store_table), X86_RET_UINT16)
136 	movzx	eax, ax
137 	mov	[ecx], eax
138 	jmp	L(e1)
139 E(L(store_table), X86_RET_INT64)
140 	mov	[ecx+4], edx
141 	/* fallthru */
142 E(L(store_table), X86_RET_int 32)
143 	mov	[ecx], eax
144 	/* fallthru */
145 E(L(store_table), X86_RET_VOID)
146 L(e1):
147 	mov	    ebx, [ebp+8]
148 	mov	    esp, ebp
149 	pop 	ebp
150 L(UW3):
151 	// cfi_remember_state
152 	// cfi_def_cfa(%esp, 4)
153 	// cfi_restore(%ebx)
154 	// cfi_restore(%ebp)
155 	ret
156 L(UW4):
157 	// cfi_restore_state
158 
159 E(L(store_table), X86_RET_STRUCTPOP)
160 	jmp	    L(e1)
161 E(L(store_table), X86_RET_STRUCTARG)
162 	jmp	    L(e1)
163 E(L(store_table), X86_RET_STRUCT_1B)
164 	mov 	[ecx], al
165 	jmp	    L(e1)
166 E(L(store_table), X86_RET_STRUCT_2B)
167 	mov 	[ecx], ax
168 	jmp	    L(e1)
169 
170 	/* Fill out the table so that bad values are predictable.  */
171 E(L(store_table), X86_RET_UNUSED14)
172 	int 3
173 E(L(store_table), X86_RET_UNUSED15)
174 	int 3
175 
176 L(UW5):
177 	// cfi_endproc
178 ENDF(@ffi_call_i386@8)
179 
180 /* The inner helper is declared as
181 
182    void ffi_closure_inner(struct closure_frame *frame, char *argp)
183 	__attribute_((fastcall))
184 
185    Thus the arguments are placed in
186 
187 	ecx:	frame
188 	edx:	argp
189 */
190 
191 /* Macros to help setting up the closure_data structure.  */
192 
193 #if HAVE_FASTCALL
194 # define closure_FS	(40 + 4)
195 # define closure_CF	0
196 #else
197 # define closure_FS	(8 + 40 + 12)
198 # define closure_CF	8
199 #endif
200 
201 FFI_CLOSURE_SAVE_REGS MACRO
202 	mov 	[esp + closure_CF+16+R_EAX*4], eax
203 	mov 	[esp + closure_CF+16+R_EDX*4], edx
204 	mov 	[esp + closure_CF+16+R_ECX*4], ecx
205 ENDM
206 
207 FFI_CLOSURE_COPY_TRAMP_DATA MACRO
208 	mov 	edx, [eax+FFI_TRAMPOLINE_SIZE]      /* copy cif */
209 	mov 	ecx, [eax+FFI_TRAMPOLINE_SIZE+4]    /* copy fun */
210 	mov 	eax, [eax+FFI_TRAMPOLINE_SIZE+8];   /* copy user_data */
211 	mov 	[esp+closure_CF+28], edx
212 	mov 	[esp+closure_CF+32], ecx
213 	mov 	[esp+closure_CF+36], eax
214 ENDM
215 
216 #if HAVE_FASTCALL
217 FFI_CLOSURE_PREP_CALL MACRO
218 	mov	    ecx, esp                    /* load closure_data */
219 	lea 	edx, [esp+closure_FS+4]     /* load incoming stack */
220 ENDM
221 #else
222 FFI_CLOSURE_PREP_CALL MACRO
223 	lea 	ecx, [esp+closure_CF]       /* load closure_data */
224 	lea 	edx, [esp+closure_FS+4]     /* load incoming stack */
225 	mov 	[esp], ecx
226 	mov 	[esp+4], edx
227 ENDM
228 #endif
229 
230 FFI_CLOSURE_CALL_INNER MACRO UWN
231 	call	@ffi_closure_inner@8
232 ENDM
233 
234 FFI_CLOSURE_MASK_AND_JUMP MACRO LABEL
235 	and	    eax, X86_RET_TYPE_MASK
236 	lea 	edx, [LABEL+eax*8]
237 	mov 	eax, [esp+closure_CF]       /* optimiztic load */
238 	jmp	    edx
239 ENDM
240 
241 ALIGN 16
242 PUBLIC ffi_go_closure_EAX
243 ffi_go_closure_EAX PROC C
244 L(UW6):
245 	// cfi_startproc
246 	sub	esp, closure_FS
247 L(UW7):
248 	// cfi_def_cfa_offset(closure_FS + 4)
249 	FFI_CLOSURE_SAVE_REGS
250 	mov     edx, [eax+4]			/* copy cif */
251 	mov 	ecx, [eax +8]			/* copy fun */
252 	mov 	[esp+closure_CF+28], edx
253 	mov 	[esp+closure_CF+32], ecx
254 	mov 	[esp+closure_CF+36], eax	/* closure is user_data */
255 	jmp	L(do_closure_i386)
256 L(UW8):
257 	// cfi_endproc
258 ENDF(ffi_go_closure_EAX)
259 
260 ALIGN 16
261 PUBLIC ffi_go_closure_ECX
262 ffi_go_closure_ECX PROC C
263 L(UW9):
264 	// cfi_startproc
265 	sub 	esp, closure_FS
266 L(UW10):
267 	// cfi_def_cfa_offset(closure_FS + 4)
268 	FFI_CLOSURE_SAVE_REGS
269 	mov 	edx, [ecx+4]			/* copy cif */
270 	mov 	eax, [ecx+8]			/* copy fun */
271 	mov 	[esp+closure_CF+28], edx
272 	mov 	[esp+closure_CF+32], eax
273 	mov 	[esp+closure_CF+36], ecx	/* closure is user_data */
274 	jmp	L(do_closure_i386)
275 L(UW11):
276 	// cfi_endproc
277 ENDF(ffi_go_closure_ECX)
278 
279 /* The closure entry points are reached from the ffi_closure trampoline.
280    On entry, %eax contains the address of the ffi_closure.  */
281 
282 ALIGN 16
283 PUBLIC ffi_closure_i386
284 ffi_closure_i386 PROC C
285 L(UW12):
286 	// cfi_startproc
287 	sub	    esp, closure_FS
288 L(UW13):
289 	// cfi_def_cfa_offset(closure_FS + 4)
290 
291 	FFI_CLOSURE_SAVE_REGS
292 	FFI_CLOSURE_COPY_TRAMP_DATA
293 
294 	/* Entry point from preceeding Go closures.  */
295 L(do_closure_i386)::
296 
297 	FFI_CLOSURE_PREP_CALL
298 	FFI_CLOSURE_CALL_INNER(14)
299 	FFI_CLOSURE_MASK_AND_JUMP L(C1(load_table,2))
300 
301     ALIGN 8
302 L(load_table2):
303 E(L(load_table2), X86_RET_FLOAT)
304 	fld 	dword ptr [esp+closure_CF]
305 	jmp	L(e2)
306 E(L(load_table2), X86_RET_DOUBLE)
307 	fld 	qword ptr [esp+closure_CF]
308 	jmp	L(e2)
309 E(L(load_table2), X86_RET_LDOUBLE)
310 	fld 	qword ptr [esp+closure_CF]
311 	jmp	L(e2)
312 E(L(load_table2), X86_RET_SINT8)
313 	movsx	eax, al
314 	jmp	L(e2)
315 E(L(load_table2), X86_RET_SINT16)
316 	movsx	eax, ax
317 	jmp	L(e2)
318 E(L(load_table2), X86_RET_UINT8)
319 	movzx	eax, al
320 	jmp	L(e2)
321 E(L(load_table2), X86_RET_UINT16)
322 	movzx	eax, ax
323 	jmp	L(e2)
324 E(L(load_table2), X86_RET_INT64)
325 	mov 	edx, [esp+closure_CF+4]
326 	jmp	L(e2)
327 E(L(load_table2), X86_RET_INT32)
328 	nop
329 	/* fallthru */
330 E(L(load_table2), X86_RET_VOID)
331 L(e2):
332 	add 	esp, closure_FS
333 L(UW16):
334 	// cfi_adjust_cfa_offset(-closure_FS)
335 	ret
336 L(UW17):
337 	// cfi_adjust_cfa_offset(closure_FS)
338 E(L(load_table2), X86_RET_STRUCTPOP)
339 	add 	esp, closure_FS
340 L(UW18):
341 	// cfi_adjust_cfa_offset(-closure_FS)
342 	ret	4
343 L(UW19):
344 	// cfi_adjust_cfa_offset(closure_FS)
345 E(L(load_table2), X86_RET_STRUCTARG)
346 	jmp	L(e2)
347 E(L(load_table2), X86_RET_STRUCT_1B)
348 	movzx	eax, al
349 	jmp	L(e2)
350 E(L(load_table2), X86_RET_STRUCT_2B)
351 	movzx	eax, ax
352 	jmp	L(e2)
353 
354 	/* Fill out the table so that bad values are predictable.  */
355 E(L(load_table2), X86_RET_UNUSED14)
356 	int 3
357 E(L(load_table2), X86_RET_UNUSED15)
358 	int 3
359 
360 L(UW20):
361 	// cfi_endproc
362 ENDF(ffi_closure_i386)
363 
364 ALIGN 16
365 PUBLIC	ffi_go_closure_STDCALL
366 ffi_go_closure_STDCALL PROC C
367 L(UW21):
368 	// cfi_startproc
369 	sub 	esp, closure_FS
370 L(UW22):
371 	// cfi_def_cfa_offset(closure_FS + 4)
372 	FFI_CLOSURE_SAVE_REGS
373 	mov 	edx, [ecx+4]			/* copy cif */
374 	mov 	eax, [ecx+8]			/* copy fun */
375 	mov 	[esp+closure_CF+28], edx
376 	mov 	[esp+closure_CF+32], eax
377 	mov 	[esp+closure_CF+36], ecx	/* closure is user_data */
378 	jmp	L(do_closure_STDCALL)
379 L(UW23):
380 	// cfi_endproc
381 ENDF(ffi_go_closure_STDCALL)
382 
383 /* For REGISTER, we have no available parameter registers, and so we
384    enter here having pushed the closure onto the stack.  */
385 
386 ALIGN 16
387 PUBLIC ffi_closure_REGISTER
388 ffi_closure_REGISTER PROC C
389 L(UW24):
390 	// cfi_startproc
391 	// cfi_def_cfa(%esp, 8)
392 	// cfi_offset(%eip, -8)
393 	sub 	esp, closure_FS-4
394 L(UW25):
395 	// cfi_def_cfa_offset(closure_FS + 4)
396 	FFI_CLOSURE_SAVE_REGS
397 	mov	ecx, [esp+closure_FS-4] 	/* load retaddr */
398 	mov	eax, [esp+closure_FS]		/* load closure */
399 	mov	[esp+closure_FS], ecx		/* move retaddr */
400 	jmp	L(do_closure_REGISTER)
401 L(UW26):
402 	// cfi_endproc
403 ENDF(ffi_closure_REGISTER)
404 
405 /* For STDCALL (and others), we need to pop N bytes of arguments off
406    the stack following the closure.  The amount needing to be popped
407    is returned to us from ffi_closure_inner.  */
408 
409 ALIGN 16
410 PUBLIC ffi_closure_STDCALL
411 ffi_closure_STDCALL PROC C
412 L(UW27):
413 	// cfi_startproc
414 	sub 	esp, closure_FS
415 L(UW28):
416 	// cfi_def_cfa_offset(closure_FS + 4)
417 
418 	FFI_CLOSURE_SAVE_REGS
419 
420 	/* Entry point from ffi_closure_REGISTER.  */
421 L(do_closure_REGISTER)::
422 
423 	FFI_CLOSURE_COPY_TRAMP_DATA
424 
425 	/* Entry point from preceeding Go closure.  */
426 L(do_closure_STDCALL)::
427 
428 	FFI_CLOSURE_PREP_CALL
429 	FFI_CLOSURE_CALL_INNER(29)
430 
431 	mov 	ecx, eax
432 	shr 	ecx, X86_RET_POP_SHIFT	    /* isolate pop count */
433 	lea 	ecx, [esp+closure_FS+ecx]	/* compute popped esp */
434 	mov 	edx, [esp+closure_FS]		/* move return address */
435 	mov 	[ecx], edx
436 
437 	/* From this point on, the value of %esp upon return is %ecx+4,
438 	   and we've copied the return address to %ecx to make return easy.
439 	   There's no point in representing this in the unwind info, as
440 	   there is always a window between the mov and the ret which
441 	   will be wrong from one point of view or another.  */
442 
443 	FFI_CLOSURE_MASK_AND_JUMP  L(C1(load_table,3))
444 
445     ALIGN 8
446 L(load_table3):
447 E(L(load_table3), X86_RET_FLOAT)
448 	fld    DWORD PTR [esp+closure_CF]
449 	mov     esp, ecx
450 	ret
451 E(L(load_table3), X86_RET_DOUBLE)
452 	fld    QWORD PTR [esp+closure_CF]
453 	mov     esp, ecx
454 	ret
455 E(L(load_table3), X86_RET_LDOUBLE)
456 	fld    QWORD PTR [esp+closure_CF]
457 	mov     esp, ecx
458 	ret
459 E(L(load_table3), X86_RET_SINT8)
460 	movsx   eax, al
461 	mov     esp, ecx
462 	ret
463 E(L(load_table3), X86_RET_SINT16)
464 	movsx   eax, ax
465 	mov     esp, ecx
466 	ret
467 E(L(load_table3), X86_RET_UINT8)
468 	movzx   eax, al
469 	mov     esp, ecx
470 	ret
471 E(L(load_table3), X86_RET_UINT16)
472 	movzx   eax, ax
473 	mov     esp, ecx
474 	ret
475 E(L(load_table3), X86_RET_INT64)
476 	mov 	edx, [esp+closure_CF+4]
477 	mov     esp, ecx
478 	ret
479 E(L(load_table3), X86_RET_int 32)
480 	mov     esp, ecx
481 	ret
482 E(L(load_table3), X86_RET_VOID)
483 	mov     esp, ecx
484 	ret
485 E(L(load_table3), X86_RET_STRUCTPOP)
486 	mov     esp, ecx
487 	ret
488 E(L(load_table3), X86_RET_STRUCTARG)
489 	mov 	esp, ecx
490 	ret
491 E(L(load_table3), X86_RET_STRUCT_1B)
492 	movzx	eax, al
493 	mov 	esp, ecx
494 	ret
495 E(L(load_table3), X86_RET_STRUCT_2B)
496 	movzx	eax, ax
497 	mov 	esp, ecx
498 	ret
499 
500 	/* Fill out the table so that bad values are predictable.  */
501 E(L(load_table3), X86_RET_UNUSED14)
502 	int 3
503 E(L(load_table3), X86_RET_UNUSED15)
504 	int 3
505 
506 L(UW31):
507 	// cfi_endproc
508 ENDF(ffi_closure_STDCALL)
509 
510 #if !FFI_NO_RAW_API
511 
512 #define raw_closure_S_FS	(16+16+12)
513 
514 ALIGN 16
515 PUBLIC ffi_closure_raw_SYSV
516 ffi_closure_raw_SYSV PROC C
517 L(UW32):
518 	// cfi_startproc
519 	sub 	esp, raw_closure_S_FS
520 L(UW33):
521 	// cfi_def_cfa_offset(raw_closure_S_FS + 4)
522 	mov 	[esp+raw_closure_S_FS-4], ebx
523 L(UW34):
524 	// cfi_rel_offset(%ebx, raw_closure_S_FS-4)
525 
526 	mov 	edx, [eax+FFI_TRAMPOLINE_SIZE+8]	/* load cl->user_data */
527 	mov 	[esp+12], edx
528 	lea 	edx, [esp+raw_closure_S_FS+4]		/* load raw_args */
529 	mov 	[esp+8], edx
530 	lea 	edx, [esp+16]				/* load &res */
531 	mov 	[esp+4], edx
532 	mov 	ebx, [eax+FFI_TRAMPOLINE_SIZE]		/* load cl->cif */
533 	mov 	[esp], ebx
534 	call	DWORD PTR [eax+FFI_TRAMPOLINE_SIZE+4]		/* call cl->fun */
535 
536 	mov 	eax, [ebx+20]			/* load cif->flags */
537 	and 	eax, X86_RET_TYPE_MASK
538 // #ifdef __PIC__
539 // 	call	__x86.get_pc_thunk.bx
540 // L(pc4):
541 // 	lea 	ecx, L(load_table4)-L(pc4)(%ebx, %eax, 8), %ecx
542 // #else
543 	lea 	ecx, [L(load_table4)+eax+8]
544 // #endif
545 	mov 	ebx, [esp+raw_closure_S_FS-4]
546 L(UW35):
547 	// cfi_restore(%ebx)
548 	mov 	eax, [esp+16]				/* Optimistic load */
549 	jmp	    dword ptr [ecx]
550 
551 	ALIGN 8
552 L(load_table4):
553 E(L(load_table4), X86_RET_FLOAT)
554 	fld 	DWORD PTR [esp +16]
555 	jmp	L(e4)
556 E(L(load_table4), X86_RET_DOUBLE)
557 	fld 	QWORD PTR [esp +16]
558 	jmp	L(e4)
559 E(L(load_table4), X86_RET_LDOUBLE)
560 	fld 	QWORD PTR [esp +16]
561 	jmp	L(e4)
562 E(L(load_table4), X86_RET_SINT8)
563 	movsx	eax, al
564 	jmp	L(e4)
565 E(L(load_table4), X86_RET_SINT16)
566 	movsx	eax, ax
567 	jmp	L(e4)
568 E(L(load_table4), X86_RET_UINT8)
569 	movzx	eax, al
570 	jmp	L(e4)
571 E(L(load_table4), X86_RET_UINT16)
572 	movzx	eax, ax
573 	jmp	L(e4)
574 E(L(load_table4), X86_RET_INT64)
575 	mov 	edx, [esp+16+4]
576 	jmp	L(e4)
577 E(L(load_table4), X86_RET_int 32)
578 	nop
579 	/* fallthru */
580 E(L(load_table4), X86_RET_VOID)
581 L(e4):
582 	add 	esp, raw_closure_S_FS
583 L(UW36):
584 	// cfi_adjust_cfa_offset(-raw_closure_S_FS)
585 	ret
586 L(UW37):
587 	// cfi_adjust_cfa_offset(raw_closure_S_FS)
588 E(L(load_table4), X86_RET_STRUCTPOP)
589 	add 	esp, raw_closure_S_FS
590 L(UW38):
591 	// cfi_adjust_cfa_offset(-raw_closure_S_FS)
592 	ret	4
593 L(UW39):
594 	// cfi_adjust_cfa_offset(raw_closure_S_FS)
595 E(L(load_table4), X86_RET_STRUCTARG)
596 	jmp	L(e4)
597 E(L(load_table4), X86_RET_STRUCT_1B)
598 	movzx	eax, al
599 	jmp	L(e4)
600 E(L(load_table4), X86_RET_STRUCT_2B)
601 	movzx	eax, ax
602 	jmp	L(e4)
603 
604 	/* Fill out the table so that bad values are predictable.  */
605 E(L(load_table4), X86_RET_UNUSED14)
606 	int 3
607 E(L(load_table4), X86_RET_UNUSED15)
608 	int 3
609 
610 L(UW40):
611 	// cfi_endproc
612 ENDF(ffi_closure_raw_SYSV)
613 
614 #define raw_closure_T_FS	(16+16+8)
615 
616 ALIGN 16
617 PUBLIC ffi_closure_raw_THISCALL
618 ffi_closure_raw_THISCALL PROC C
619 L(UW41):
620 	// cfi_startproc
621 	/* Rearrange the stack such that %ecx is the first argument.
622 	   This means moving the return address.  */
623 	pop 	edx
624 L(UW42):
625 	// cfi_def_cfa_offset(0)
626 	// cfi_register(%eip, %edx)
627 	push	ecx
628 L(UW43):
629 	// cfi_adjust_cfa_offset(4)
630 	push 	edx
631 L(UW44):
632 	// cfi_adjust_cfa_offset(4)
633 	// cfi_rel_offset(%eip, 0)
634 	sub 	esp, raw_closure_T_FS
635 L(UW45):
636 	// cfi_adjust_cfa_offset(raw_closure_T_FS)
637 	mov 	[esp+raw_closure_T_FS-4], ebx
638 L(UW46):
639 	// cfi_rel_offset(%ebx, raw_closure_T_FS-4)
640 
641 	mov 	edx, [eax+FFI_TRAMPOLINE_SIZE+8]	/* load cl->user_data */
642 	mov 	[esp+12], edx
643 	lea 	edx, [esp+raw_closure_T_FS+4]		/* load raw_args */
644 	mov 	[esp+8], edx
645 	lea 	edx, [esp+16]				/* load &res */
646 	mov 	[esp+4], edx
647 	mov 	ebx, [eax+FFI_TRAMPOLINE_SIZE]		/* load cl->cif */
648 	mov 	[esp], ebx
649 	call	DWORD PTR [eax+FFI_TRAMPOLINE_SIZE+4]		/* call cl->fun */
650 
651 	mov 	eax, [ebx+20]				/* load cif->flags */
652 	and 	eax, X86_RET_TYPE_MASK
653 // #ifdef __PIC__
654 // 	call	__x86.get_pc_thunk.bx
655 // L(pc5):
656 // 	leal	L(load_table5)-L(pc5)(%ebx, %eax, 8), %ecx
657 // #else
658 	lea 	ecx, [L(load_table5)+eax*8]
659 //#endif
660 	mov 	ebx, [esp+raw_closure_T_FS-4]
661 L(UW47):
662 	// cfi_restore(%ebx)
663 	mov 	eax, [esp+16]				/* Optimistic load */
664 	jmp	    DWORD PTR [ecx]
665 
666 	AlIGN 4
667 L(load_table5):
668 E(L(load_table5), X86_RET_FLOAT)
669 	fld	DWORD PTR [esp +16]
670 	jmp	L(e5)
671 E(L(load_table5), X86_RET_DOUBLE)
672 	fld	QWORD PTR [esp +16]
673 	jmp	L(e5)
674 E(L(load_table5), X86_RET_LDOUBLE)
675 	fld	QWORD PTR [esp+16]
676 	jmp	L(e5)
677 E(L(load_table5), X86_RET_SINT8)
678 	movsx	eax, al
679 	jmp	L(e5)
680 E(L(load_table5), X86_RET_SINT16)
681 	movsx	eax, ax
682 	jmp	L(e5)
683 E(L(load_table5), X86_RET_UINT8)
684 	movzx	eax, al
685 	jmp	L(e5)
686 E(L(load_table5), X86_RET_UINT16)
687 	movzx	eax, ax
688 	jmp	L(e5)
689 E(L(load_table5), X86_RET_INT64)
690 	mov 	edx, [esp+16+4]
691 	jmp	L(e5)
692 E(L(load_table5), X86_RET_int 32)
693 	nop
694 	/* fallthru */
695 E(L(load_table5), X86_RET_VOID)
696 L(e5):
697 	add 	esp, raw_closure_T_FS
698 L(UW48):
699 	// cfi_adjust_cfa_offset(-raw_closure_T_FS)
700 	/* Remove the extra %ecx argument we pushed.  */
701 	ret	4
702 L(UW49):
703 	// cfi_adjust_cfa_offset(raw_closure_T_FS)
704 E(L(load_table5), X86_RET_STRUCTPOP)
705 	add 	esp, raw_closure_T_FS
706 L(UW50):
707 	// cfi_adjust_cfa_offset(-raw_closure_T_FS)
708 	ret	8
709 L(UW51):
710 	// cfi_adjust_cfa_offset(raw_closure_T_FS)
711 E(L(load_table5), X86_RET_STRUCTARG)
712 	jmp	L(e5)
713 E(L(load_table5), X86_RET_STRUCT_1B)
714 	movzx	eax, al
715 	jmp	L(e5)
716 E(L(load_table5), X86_RET_STRUCT_2B)
717 	movzx	eax, ax
718 	jmp	L(e5)
719 
720 	/* Fill out the table so that bad values are predictable.  */
721 E(L(load_table5), X86_RET_UNUSED14)
722 	int 3
723 E(L(load_table5), X86_RET_UNUSED15)
724 	int 3
725 
726 L(UW52):
727 	// cfi_endproc
728 ENDF(ffi_closure_raw_THISCALL)
729 
730 #endif /* !FFI_NO_RAW_API */
731 
732 #ifdef X86_DARWIN
733 # define COMDAT(X)							\
734         .section __TEXT,__text,coalesced,pure_instructions;		\
735         .weak_definition X;						\
736         FFI_HIDDEN(X)
737 #elif defined __ELF__ && !(defined(__sun__) && defined(__svr4__))
738 # define COMDAT(X)							\
739 	.section .text.X,"axG",@progbits,X,comdat;			\
740 	PUBLIC	X;							\
741 	FFI_HIDDEN(X)
742 #else
743 # define COMDAT(X)
744 #endif
745 
746 // #if defined(__PIC__)
747 // 	COMDAT(C(__x86.get_pc_thunk.bx))
748 // C(__x86.get_pc_thunk.bx):
749 // 	movl	(%esp), %ebx
750 // 	ret
751 // ENDF(C(__x86.get_pc_thunk.bx))
752 // # if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
753 // 	COMDAT(C(__x86.get_pc_thunk.dx))
754 // C(__x86.get_pc_thunk.dx):
755 // 	movl	(%esp), %edx
756 // 	ret
757 // ENDF(C(__x86.get_pc_thunk.dx))
758 // #endif /* DARWIN || HIDDEN */
759 // #endif /* __PIC__ */
760 
761 #if 0
762 /* Sadly, OSX cctools-as doesn't understand .cfi directives at all.  */
763 
764 #ifdef __APPLE__
765 .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
766 EHFrame0:
767 #elif defined(X86_WIN32)
768 .section .eh_frame,"r"
769 #elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE)
770 .section .eh_frame,EH_FRAME_FLAGS,@unwind
771 #else
772 .section .eh_frame,EH_FRAME_FLAGS,@progbits
773 #endif
774 
775 #ifdef HAVE_AS_X86_PCREL
776 # define PCREL(X)	X - .
777 #else
778 # define PCREL(X)	X@rel
779 #endif
780 
781 /* Simplify advancing between labels.  Assume DW_CFA_advance_loc1 fits.  */
782 #define ADV(N, P)	.byte 2, L(N)-L(P)
783 
784 	.balign 4
785 L(CIE):
786 	.set	L(set0),L(ECIE)-L(SCIE)
787 	.long	L(set0)			/* CIE Length */
788 L(SCIE):
789 	.long	0			/* CIE Identifier Tag */
790 	.byte	1			/* CIE Version */
791 	.ascii	"zR\0"			/* CIE Augmentation */
792 	.byte	1			/* CIE Code Alignment Factor */
793 	.byte	0x7c			/* CIE Data Alignment Factor */
794 	.byte	0x8			/* CIE RA Column */
795 	.byte	1			/* Augmentation size */
796 	.byte	0x1b			/* FDE Encoding (pcrel sdata4) */
797 	.byte	0xc, 4, 4		/* DW_CFA_def_cfa, %esp offset 4 */
798 	.byte	0x80+8, 1		/* DW_CFA_offset, %eip offset 1*-4 */
799 	.balign 4
800 L(ECIE):
801 
802 	.set	L(set1),L(EFDE1)-L(SFDE1)
803 	.long	L(set1)			/* FDE Length */
804 L(SFDE1):
805 	.long	L(SFDE1)-L(CIE)		/* FDE CIE offset */
806 	.long	PCREL(L(UW0))		/* Initial location */
807 	.long	L(UW5)-L(UW0)		/* Address range */
808 	.byte	0			/* Augmentation size */
809 	ADV(UW1, UW0)
810 	.byte	0xc, 5, 8		/* DW_CFA_def_cfa, %ebp 8 */
811 	.byte	0x80+5, 2		/* DW_CFA_offset, %ebp 2*-4 */
812 	ADV(UW2, UW1)
813 	.byte	0x80+3, 0		/* DW_CFA_offset, %ebx 0*-4 */
814 	ADV(UW3, UW2)
815 	.byte	0xa			/* DW_CFA_remember_state */
816 	.byte	0xc, 4, 4		/* DW_CFA_def_cfa, %esp 4 */
817 	.byte	0xc0+3			/* DW_CFA_restore, %ebx */
818 	.byte	0xc0+5			/* DW_CFA_restore, %ebp */
819 	ADV(UW4, UW3)
820 	.byte	0xb			/* DW_CFA_restore_state */
821 	.balign	4
822 L(EFDE1):
823 
824 	.set	L(set2),L(EFDE2)-L(SFDE2)
825 	.long	L(set2)			/* FDE Length */
826 L(SFDE2):
827 	.long	L(SFDE2)-L(CIE)		/* FDE CIE offset */
828 	.long	PCREL(L(UW6))		/* Initial location */
829 	.long	L(UW8)-L(UW6)		/* Address range */
830 	.byte	0			/* Augmentation size */
831 	ADV(UW7, UW6)
832 	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
833 	.balign	4
834 L(EFDE2):
835 
836 	.set	L(set3),L(EFDE3)-L(SFDE3)
837 	.long	L(set3)			/* FDE Length */
838 L(SFDE3):
839 	.long	L(SFDE3)-L(CIE)		/* FDE CIE offset */
840 	.long	PCREL(L(UW9))		/* Initial location */
841 	.long	L(UW11)-L(UW9)		/* Address range */
842 	.byte	0			/* Augmentation size */
843 	ADV(UW10, UW9)
844 	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
845 	.balign	4
846 L(EFDE3):
847 
848 	.set	L(set4),L(EFDE4)-L(SFDE4)
849 	.long	L(set4)			/* FDE Length */
850 L(SFDE4):
851 	.long	L(SFDE4)-L(CIE)		/* FDE CIE offset */
852 	.long	PCREL(L(UW12))		/* Initial location */
853 	.long	L(UW20)-L(UW12)		/* Address range */
854 	.byte	0			/* Augmentation size */
855 	ADV(UW13, UW12)
856 	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
857 #ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
858 	ADV(UW14, UW13)
859 	.byte	0x80+3, (40-(closure_FS+4))/-4  /* DW_CFA_offset %ebx */
860 	ADV(UW15, UW14)
861 	.byte	0xc0+3			/* DW_CFA_restore %ebx */
862 	ADV(UW16, UW15)
863 #else
864 	ADV(UW16, UW13)
865 #endif
866 	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
867 	ADV(UW17, UW16)
868 	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
869 	ADV(UW18, UW17)
870 	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
871 	ADV(UW19, UW18)
872 	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
873 	.balign	4
874 L(EFDE4):
875 
876 	.set	L(set5),L(EFDE5)-L(SFDE5)
877 	.long	L(set5)			/* FDE Length */
878 L(SFDE5):
879 	.long	L(SFDE5)-L(CIE)		/* FDE CIE offset */
880 	.long	PCREL(L(UW21))		/* Initial location */
881 	.long	L(UW23)-L(UW21)		/* Address range */
882 	.byte	0			/* Augmentation size */
883 	ADV(UW22, UW21)
884 	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
885 	.balign	4
886 L(EFDE5):
887 
888 	.set	L(set6),L(EFDE6)-L(SFDE6)
889 	.long	L(set6)			/* FDE Length */
890 L(SFDE6):
891 	.long	L(SFDE6)-L(CIE)		/* FDE CIE offset */
892 	.long	PCREL(L(UW24))		/* Initial location */
893 	.long	L(UW26)-L(UW24)		/* Address range */
894 	.byte	0			/* Augmentation size */
895 	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
896 	.byte	0x80+8, 2		/* DW_CFA_offset %eip, 2*-4 */
897 	ADV(UW25, UW24)
898 	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
899 	.balign	4
900 L(EFDE6):
901 
902 	.set	L(set7),L(EFDE7)-L(SFDE7)
903 	.long	L(set7)			/* FDE Length */
904 L(SFDE7):
905 	.long	L(SFDE7)-L(CIE)		/* FDE CIE offset */
906 	.long	PCREL(L(UW27))		/* Initial location */
907 	.long	L(UW31)-L(UW27)		/* Address range */
908 	.byte	0			/* Augmentation size */
909 	ADV(UW28, UW27)
910 	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
911 #ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
912 	ADV(UW29, UW28)
913 	.byte	0x80+3, (40-(closure_FS+4))/-4  /* DW_CFA_offset %ebx */
914 	ADV(UW30, UW29)
915 	.byte	0xc0+3			/* DW_CFA_restore %ebx */
916 #endif
917 	.balign	4
918 L(EFDE7):
919 
920 #if !FFI_NO_RAW_API
921 	.set	L(set8),L(EFDE8)-L(SFDE8)
922 	.long	L(set8)			/* FDE Length */
923 L(SFDE8):
924 	.long	L(SFDE8)-L(CIE)		/* FDE CIE offset */
925 	.long	PCREL(L(UW32))		/* Initial location */
926 	.long	L(UW40)-L(UW32)		/* Address range */
927 	.byte	0			/* Augmentation size */
928 	ADV(UW33, UW32)
929 	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
930 	ADV(UW34, UW33)
931 	.byte	0x80+3, 2		/* DW_CFA_offset %ebx 2*-4 */
932 	ADV(UW35, UW34)
933 	.byte	0xc0+3			/* DW_CFA_restore %ebx */
934 	ADV(UW36, UW35)
935 	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
936 	ADV(UW37, UW36)
937 	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
938 	ADV(UW38, UW37)
939 	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
940 	ADV(UW39, UW38)
941 	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
942 	.balign	4
943 L(EFDE8):
944 
945 	.set	L(set9),L(EFDE9)-L(SFDE9)
946 	.long	L(set9)			/* FDE Length */
947 L(SFDE9):
948 	.long	L(SFDE9)-L(CIE)		/* FDE CIE offset */
949 	.long	PCREL(L(UW41))		/* Initial location */
950 	.long	L(UW52)-L(UW41)		/* Address range */
951 	.byte	0			/* Augmentation size */
952 	ADV(UW42, UW41)
953 	.byte	0xe, 0			/* DW_CFA_def_cfa_offset */
954 	.byte	0x9, 8, 2		/* DW_CFA_register %eip, %edx */
955 	ADV(UW43, UW42)
956 	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
957 	ADV(UW44, UW43)
958 	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
959 	.byte	0x80+8, 2		/* DW_CFA_offset %eip 2*-4 */
960 	ADV(UW45, UW44)
961 	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
962 	ADV(UW46, UW45)
963 	.byte	0x80+3, 3		/* DW_CFA_offset %ebx 3*-4 */
964 	ADV(UW47, UW46)
965 	.byte	0xc0+3			/* DW_CFA_restore %ebx */
966 	ADV(UW48, UW47)
967 	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
968 	ADV(UW49, UW48)
969 	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
970 	ADV(UW50, UW49)
971 	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
972 	ADV(UW51, UW50)
973 	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
974 	.balign	4
975 L(EFDE9):
976 #endif /* !FFI_NO_RAW_API */
977 
978 #ifdef _WIN32
979 	.def	 @feat.00;
980 	.scl	3;
981 	.type	0;
982 	.endef
983 	PUBLIC	@feat.00
984 @feat.00 = 1
985 #endif
986 
987 #endif /* ifndef _MSC_VER */
988 #endif /* ifndef __x86_64__ */
989 
990 #if defined __ELF__ && defined __linux__
991 	.section	.note.GNU-stack,"",@progbits
992 #endif
993 #endif
994 
995 END