1 /* ----------------------------------------------------------------------- 2 ffi64.c - Copyright (c) 2013 The Written Word, Inc. 3 Copyright (c) 2011 Anthony Green 4 Copyright (c) 2008, 2010 Red Hat, Inc. 5 Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de> 6 7 x86-64 Foreign Function Interface 8 9 Permission is hereby granted, free of charge, to any person obtaining 10 a copy of this software and associated documentation files (the 11 ``Software''), to deal in the Software without restriction, including 12 without limitation the rights to use, copy, modify, merge, publish, 13 distribute, sublicense, and/or sell copies of the Software, and to 14 permit persons to whom the Software is furnished to do so, subject to 15 the following conditions: 16 17 The above copyright notice and this permission notice shall be included 18 in all copies or substantial portions of the Software. 19 20 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 23 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 24 HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 25 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 27 DEALINGS IN THE SOFTWARE. 28 ----------------------------------------------------------------------- */ 29 30 #include <ffi.h> 31 #include <ffi_common.h> 32 33 #include <stdlib.h> 34 #include <stdarg.h> 35 36 #ifdef __x86_64__ 37 38 #define MAX_GPR_REGS 6 39 #define MAX_SSE_REGS 8 40 41 #if defined(__INTEL_COMPILER) 42 #include "xmmintrin.h" 43 #define UINT128 __m128 44 #else 45 #if defined(__SUNPRO_C) 46 #include <sunmedia_types.h> 47 #define UINT128 __m128i 48 #else 49 #define UINT128 __int128_t 50 #endif 51 #endif 52 53 union big_int_union 54 { 55 UINT32 i32; 56 UINT64 i64; 57 UINT128 i128; 58 }; 59 60 struct register_args 61 { 62 /* Registers for argument passing. */ 63 UINT64 gpr[MAX_GPR_REGS]; 64 union big_int_union sse[MAX_SSE_REGS]; 65 }; 66 67 extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, 68 void *raddr, void (*fnaddr)(void), unsigned ssecount); 69 70 /* All reference to register classes here is identical to the code in 71 gcc/config/i386/i386.c. Do *not* change one without the other. */ 72 73 /* Register class used for passing given 64bit part of the argument. 74 These represent classes as documented by the PS ABI, with the 75 exception of SSESF, SSEDF classes, that are basically SSE class, 76 just gcc will use SF or DFmode move instead of DImode to avoid 77 reformatting penalties. 78 79 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves 80 whenever possible (upper half does contain padding). */ 81 enum x86_64_reg_class 82 { 83 X86_64_NO_CLASS, 84 X86_64_INTEGER_CLASS, 85 X86_64_INTEGERSI_CLASS, 86 X86_64_SSE_CLASS, 87 X86_64_SSESF_CLASS, 88 X86_64_SSEDF_CLASS, 89 X86_64_SSEUP_CLASS, 90 X86_64_X87_CLASS, 91 X86_64_X87UP_CLASS, 92 X86_64_COMPLEX_X87_CLASS, 93 X86_64_MEMORY_CLASS 94 }; 95 96 #define MAX_CLASSES 4 97 98 #define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS) 99 100 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal 101 of this code is to classify each 8bytes of incoming argument by the register 102 class and assign registers accordingly. */ 103 104 /* Return the union class of CLASS1 and CLASS2. 105 See the x86-64 PS ABI for details. */ 106 107 static enum x86_64_reg_class 108 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) 109 { 110 /* Rule #1: If both classes are equal, this is the resulting class. */ 111 if (class1 == class2) 112 return class1; 113 114 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is 115 the other class. */ 116 if (class1 == X86_64_NO_CLASS) 117 return class2; 118 if (class2 == X86_64_NO_CLASS) 119 return class1; 120 121 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ 122 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) 123 return X86_64_MEMORY_CLASS; 124 125 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ 126 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) 127 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) 128 return X86_64_INTEGERSI_CLASS; 129 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS 130 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) 131 return X86_64_INTEGER_CLASS; 132 133 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, 134 MEMORY is used. */ 135 if (class1 == X86_64_X87_CLASS 136 || class1 == X86_64_X87UP_CLASS 137 || class1 == X86_64_COMPLEX_X87_CLASS 138 || class2 == X86_64_X87_CLASS 139 || class2 == X86_64_X87UP_CLASS 140 || class2 == X86_64_COMPLEX_X87_CLASS) 141 return X86_64_MEMORY_CLASS; 142 143 /* Rule #6: Otherwise class SSE is used. */ 144 return X86_64_SSE_CLASS; 145 } 146 147 /* Classify the argument of type TYPE and mode MODE. 148 CLASSES will be filled by the register class used to pass each word 149 of the operand. The number of words is returned. In case the parameter 150 should be passed in memory, 0 is returned. As a special case for zero 151 sized containers, classes[0] will be NO_CLASS and 1 is returned. 152 153 See the x86-64 PS ABI for details. 154 */ 155 static size_t 156 classify_argument (ffi_type *type, enum x86_64_reg_class classes[], 157 size_t byte_offset) 158 { 159 switch (type->type) 160 { 161 case FFI_TYPE_UINT8: 162 case FFI_TYPE_SINT8: 163 case FFI_TYPE_UINT16: 164 case FFI_TYPE_SINT16: 165 case FFI_TYPE_UINT32: 166 case FFI_TYPE_SINT32: 167 case FFI_TYPE_UINT64: 168 case FFI_TYPE_SINT64: 169 case FFI_TYPE_POINTER: 170 { 171 size_t size = byte_offset + type->size; 172 173 if (size <= 4) 174 { 175 classes[0] = X86_64_INTEGERSI_CLASS; 176 return 1; 177 } 178 else if (size <= 8) 179 { 180 classes[0] = X86_64_INTEGER_CLASS; 181 return 1; 182 } 183 else if (size <= 12) 184 { 185 classes[0] = X86_64_INTEGER_CLASS; 186 classes[1] = X86_64_INTEGERSI_CLASS; 187 return 2; 188 } 189 else if (size <= 16) 190 { 191 classes[0] = classes[1] = X86_64_INTEGERSI_CLASS; 192 return 2; 193 } 194 else 195 FFI_ASSERT (0); 196 } 197 case FFI_TYPE_FLOAT: 198 if (!(byte_offset % 8)) 199 classes[0] = X86_64_SSESF_CLASS; 200 else 201 classes[0] = X86_64_SSE_CLASS; 202 return 1; 203 case FFI_TYPE_DOUBLE: 204 classes[0] = X86_64_SSEDF_CLASS; 205 return 1; 206 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 207 case FFI_TYPE_LONGDOUBLE: 208 classes[0] = X86_64_X87_CLASS; 209 classes[1] = X86_64_X87UP_CLASS; 210 return 2; 211 #endif 212 case FFI_TYPE_STRUCT: 213 { 214 const size_t UNITS_PER_WORD = 8; 215 size_t words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 216 ffi_type **ptr; 217 int i; 218 enum x86_64_reg_class subclasses[MAX_CLASSES]; 219 220 /* If the struct is larger than 32 bytes, pass it on the stack. */ 221 if (type->size > 32) 222 return 0; 223 224 for (i = 0; i < words; i++) 225 classes[i] = X86_64_NO_CLASS; 226 227 /* Zero sized arrays or structures are NO_CLASS. We return 0 to 228 signalize memory class, so handle it as special case. */ 229 if (!words) 230 { 231 classes[0] = X86_64_NO_CLASS; 232 return 1; 233 } 234 235 /* Merge the fields of structure. */ 236 for (ptr = type->elements; *ptr != NULL; ptr++) 237 { 238 size_t num; 239 240 byte_offset = ALIGN (byte_offset, (*ptr)->alignment); 241 242 num = classify_argument (*ptr, subclasses, byte_offset % 8); 243 if (num == 0) 244 return 0; 245 for (i = 0; i < num; i++) 246 { 247 size_t pos = byte_offset / 8; 248 classes[i + pos] = 249 merge_classes (subclasses[i], classes[i + pos]); 250 } 251 252 byte_offset += (*ptr)->size; 253 } 254 255 if (words > 2) 256 { 257 /* When size > 16 bytes, if the first one isn't 258 X86_64_SSE_CLASS or any other ones aren't 259 X86_64_SSEUP_CLASS, everything should be passed in 260 memory. */ 261 if (classes[0] != X86_64_SSE_CLASS) 262 return 0; 263 264 for (i = 1; i < words; i++) 265 if (classes[i] != X86_64_SSEUP_CLASS) 266 return 0; 267 } 268 269 /* Final merger cleanup. */ 270 for (i = 0; i < words; i++) 271 { 272 /* If one class is MEMORY, everything should be passed in 273 memory. */ 274 if (classes[i] == X86_64_MEMORY_CLASS) 275 return 0; 276 277 /* The X86_64_SSEUP_CLASS should be always preceded by 278 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */ 279 if (classes[i] == X86_64_SSEUP_CLASS 280 && classes[i - 1] != X86_64_SSE_CLASS 281 && classes[i - 1] != X86_64_SSEUP_CLASS) 282 { 283 /* The first one should never be X86_64_SSEUP_CLASS. */ 284 FFI_ASSERT (i != 0); 285 classes[i] = X86_64_SSE_CLASS; 286 } 287 288 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, 289 everything should be passed in memory. */ 290 if (classes[i] == X86_64_X87UP_CLASS 291 && (classes[i - 1] != X86_64_X87_CLASS)) 292 { 293 /* The first one should never be X86_64_X87UP_CLASS. */ 294 FFI_ASSERT (i != 0); 295 return 0; 296 } 297 } 298 return words; 299 } 300 301 default: 302 FFI_ASSERT(0); 303 } 304 return 0; /* Never reached. */ 305 } 306 307 /* Examine the argument and return set number of register required in each 308 class. Return zero iff parameter should be passed in memory, otherwise 309 the number of registers. */ 310 311 static size_t 312 examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES], 313 _Bool in_return, int *pngpr, int *pnsse) 314 { 315 size_t n; 316 int i, ngpr, nsse; 317 318 n = classify_argument (type, classes, 0); 319 if (n == 0) 320 return 0; 321 322 ngpr = nsse = 0; 323 for (i = 0; i < n; ++i) 324 switch (classes[i]) 325 { 326 case X86_64_INTEGER_CLASS: 327 case X86_64_INTEGERSI_CLASS: 328 ngpr++; 329 break; 330 case X86_64_SSE_CLASS: 331 case X86_64_SSESF_CLASS: 332 case X86_64_SSEDF_CLASS: 333 nsse++; 334 break; 335 case X86_64_NO_CLASS: 336 case X86_64_SSEUP_CLASS: 337 break; 338 case X86_64_X87_CLASS: 339 case X86_64_X87UP_CLASS: 340 case X86_64_COMPLEX_X87_CLASS: 341 return in_return != 0; 342 default: 343 abort (); 344 } 345 346 *pngpr = ngpr; 347 *pnsse = nsse; 348 349 return n; 350 } 351 352 /* Perform machine dependent cif processing. */ 353 354 ffi_status 355 ffi_prep_cif_machdep (ffi_cif *cif) 356 { 357 int gprcount, ssecount, i, avn, ngpr, nsse, flags; 358 enum x86_64_reg_class classes[MAX_CLASSES]; 359 size_t bytes, n; 360 361 gprcount = ssecount = 0; 362 363 flags = cif->rtype->type; 364 if (flags != FFI_TYPE_VOID) 365 { 366 n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse); 367 if (n == 0) 368 { 369 /* The return value is passed in memory. A pointer to that 370 memory is the first argument. Allocate a register for it. */ 371 gprcount++; 372 /* We don't have to do anything in asm for the return. */ 373 flags = FFI_TYPE_VOID; 374 } 375 else if (flags == FFI_TYPE_STRUCT) 376 { 377 /* Mark which registers the result appears in. */ 378 _Bool sse0 = SSE_CLASS_P (classes[0]); 379 _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]); 380 if (sse0 && !sse1) 381 flags |= 1 << 8; 382 else if (!sse0 && sse1) 383 flags |= 1 << 9; 384 else if (sse0 && sse1) 385 flags |= 1 << 10; 386 /* Mark the true size of the structure. */ 387 flags |= cif->rtype->size << 12; 388 } 389 } 390 391 /* Go over all arguments and determine the way they should be passed. 392 If it's in a register and there is space for it, let that be so. If 393 not, add it's size to the stack byte count. */ 394 for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++) 395 { 396 if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0 397 || gprcount + ngpr > MAX_GPR_REGS 398 || ssecount + nsse > MAX_SSE_REGS) 399 { 400 long align = cif->arg_types[i]->alignment; 401 402 if (align < 8) 403 align = 8; 404 405 bytes = ALIGN (bytes, align); 406 bytes += cif->arg_types[i]->size; 407 } 408 else 409 { 410 gprcount += ngpr; 411 ssecount += nsse; 412 } 413 } 414 if (ssecount) 415 flags |= 1 << 11; 416 cif->flags = flags; 417 cif->bytes = (unsigned)ALIGN (bytes, 8); 418 419 return FFI_OK; 420 } 421 422 void 423 ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) 424 { 425 enum x86_64_reg_class classes[MAX_CLASSES]; 426 char *stack, *argp; 427 ffi_type **arg_types; 428 int gprcount, ssecount, ngpr, nsse, i, avn; 429 _Bool ret_in_memory; 430 struct register_args *reg_args; 431 432 /* Can't call 32-bit mode from 64-bit mode. */ 433 FFI_ASSERT (cif->abi == FFI_UNIX64); 434 435 /* If the return value is a struct and we don't have a return value 436 address then we need to make one. Note the setting of flags to 437 VOID above in ffi_prep_cif_machdep. */ 438 ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT 439 && (cif->flags & 0xff) == FFI_TYPE_VOID); 440 if (rvalue == NULL && ret_in_memory) 441 rvalue = alloca (cif->rtype->size); 442 443 /* Allocate the space for the arguments, plus 4 words of temp space. */ 444 stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8); 445 reg_args = (struct register_args *) stack; 446 argp = stack + sizeof (struct register_args); 447 448 gprcount = ssecount = 0; 449 450 /* If the return value is passed in memory, add the pointer as the 451 first integer argument. */ 452 if (ret_in_memory) 453 reg_args->gpr[gprcount++] = (unsigned long) rvalue; 454 455 avn = cif->nargs; 456 arg_types = cif->arg_types; 457 458 for (i = 0; i < avn; ++i) 459 { 460 size_t n, size = arg_types[i]->size; 461 462 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse); 463 if (n == 0 464 || gprcount + ngpr > MAX_GPR_REGS 465 || ssecount + nsse > MAX_SSE_REGS) 466 { 467 long align = arg_types[i]->alignment; 468 469 /* Stack arguments are *always* at least 8 byte aligned. */ 470 if (align < 8) 471 align = 8; 472 473 /* Pass this argument in memory. */ 474 argp = (void *) ALIGN (argp, align); 475 memcpy (argp, avalue[i], size); 476 argp += size; 477 } 478 else 479 { 480 /* The argument is passed entirely in registers. */ 481 char *a = (char *) avalue[i]; 482 int j; 483 484 for (j = 0; j < n; j++, a += 8, size -= 8) 485 { 486 switch (classes[j]) 487 { 488 case X86_64_INTEGER_CLASS: 489 case X86_64_INTEGERSI_CLASS: 490 /* Sign-extend integer arguments passed in general 491 purpose registers, to cope with the fact that 492 LLVM incorrectly assumes that this will be done 493 (the x86-64 PS ABI does not specify this). */ 494 switch (arg_types[i]->type) 495 { 496 case FFI_TYPE_SINT8: 497 *(SINT64 *)®_args->gpr[gprcount] = (SINT64) *((SINT8 *) a); 498 break; 499 case FFI_TYPE_SINT16: 500 *(SINT64 *)®_args->gpr[gprcount] = (SINT64) *((SINT16 *) a); 501 break; 502 case FFI_TYPE_SINT32: 503 *(SINT64 *)®_args->gpr[gprcount] = (SINT64) *((SINT32 *) a); 504 break; 505 default: 506 reg_args->gpr[gprcount] = 0; 507 memcpy (®_args->gpr[gprcount], a, size < 8 ? size : 8); 508 } 509 gprcount++; 510 break; 511 case X86_64_SSE_CLASS: 512 case X86_64_SSEDF_CLASS: 513 reg_args->sse[ssecount++].i64 = *(UINT64 *) a; 514 break; 515 case X86_64_SSESF_CLASS: 516 reg_args->sse[ssecount++].i32 = *(UINT32 *) a; 517 break; 518 default: 519 abort(); 520 } 521 } 522 } 523 } 524 525 ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args), 526 cif->flags, rvalue, fn, ssecount); 527 } 528 529 530 extern void ffi_closure_unix64(void); 531 532 ffi_status 533 ffi_prep_closure_loc (ffi_closure* closure, 534 ffi_cif* cif, 535 void (*fun)(ffi_cif*, void*, void**, void*), 536 void *user_data, 537 void *codeloc) 538 { 539 volatile unsigned short *tramp; 540 541 /* Sanity check on the cif ABI. */ 542 { 543 int abi = cif->abi; 544 if (UNLIKELY (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI))) 545 return FFI_BAD_ABI; 546 } 547 548 tramp = (volatile unsigned short *) &closure->tramp[0]; 549 550 tramp[0] = 0xbb49; /* mov <code>, %r11 */ 551 *((unsigned long long * volatile) &tramp[1]) 552 = (unsigned long) ffi_closure_unix64; 553 tramp[5] = 0xba49; /* mov <data>, %r10 */ 554 *((unsigned long long * volatile) &tramp[6]) 555 = (unsigned long) codeloc; 556 557 /* Set the carry bit iff the function uses any sse registers. 558 This is clc or stc, together with the first byte of the jmp. */ 559 tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8; 560 561 tramp[11] = 0xe3ff; /* jmp *%r11 */ 562 563 closure->cif = cif; 564 closure->fun = fun; 565 closure->user_data = user_data; 566 567 return FFI_OK; 568 } 569 570 int 571 ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue, 572 struct register_args *reg_args, char *argp) 573 { 574 ffi_cif *cif; 575 void **avalue; 576 ffi_type **arg_types; 577 long i, avn; 578 int gprcount, ssecount, ngpr, nsse; 579 int ret; 580 581 cif = closure->cif; 582 avalue = alloca(cif->nargs * sizeof(void *)); 583 gprcount = ssecount = 0; 584 585 ret = cif->rtype->type; 586 if (ret != FFI_TYPE_VOID) 587 { 588 enum x86_64_reg_class classes[MAX_CLASSES]; 589 size_t n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse); 590 if (n == 0) 591 { 592 /* The return value goes in memory. Arrange for the closure 593 return value to go directly back to the original caller. */ 594 rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++]; 595 /* We don't have to do anything in asm for the return. */ 596 ret = FFI_TYPE_VOID; 597 } 598 else if (ret == FFI_TYPE_STRUCT && n == 2) 599 { 600 /* Mark which register the second word of the structure goes in. */ 601 _Bool sse0 = SSE_CLASS_P (classes[0]); 602 _Bool sse1 = SSE_CLASS_P (classes[1]); 603 if (!sse0 && sse1) 604 ret |= 1 << 8; 605 else if (sse0 && !sse1) 606 ret |= 1 << 9; 607 } 608 } 609 610 avn = cif->nargs; 611 arg_types = cif->arg_types; 612 613 for (i = 0; i < avn; ++i) 614 { 615 enum x86_64_reg_class classes[MAX_CLASSES]; 616 size_t n; 617 618 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse); 619 if (n == 0 620 || gprcount + ngpr > MAX_GPR_REGS 621 || ssecount + nsse > MAX_SSE_REGS) 622 { 623 long align = arg_types[i]->alignment; 624 625 /* Stack arguments are *always* at least 8 byte aligned. */ 626 if (align < 8) 627 align = 8; 628 629 /* Pass this argument in memory. */ 630 argp = (void *) ALIGN (argp, align); 631 avalue[i] = argp; 632 argp += arg_types[i]->size; 633 } 634 /* If the argument is in a single register, or two consecutive 635 integer registers, then we can use that address directly. */ 636 else if (n == 1 637 || (n == 2 && !(SSE_CLASS_P (classes[0]) 638 || SSE_CLASS_P (classes[1])))) 639 { 640 /* The argument is in a single register. */ 641 if (SSE_CLASS_P (classes[0])) 642 { 643 avalue[i] = ®_args->sse[ssecount]; 644 ssecount += n; 645 } 646 else 647 { 648 avalue[i] = ®_args->gpr[gprcount]; 649 gprcount += n; 650 } 651 } 652 /* Otherwise, allocate space to make them consecutive. */ 653 else 654 { 655 char *a = alloca (16); 656 int j; 657 658 avalue[i] = a; 659 for (j = 0; j < n; j++, a += 8) 660 { 661 if (SSE_CLASS_P (classes[j])) 662 memcpy (a, ®_args->sse[ssecount++], 8); 663 else 664 memcpy (a, ®_args->gpr[gprcount++], 8); 665 } 666 } 667 } 668 669 /* Invoke the closure. */ 670 closure->fun (cif, rvalue, avalue, closure->user_data); 671 672 /* Tell assembly how to perform return type promotions. */ 673 return ret; 674 } 675 676 #endif /* __x86_64__ */