9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #include "precompiled.hpp"
27 #include "asm/macroAssembler.hpp"
28 #include "asm/macroAssembler.inline.hpp"
29 #include "code/codeCache.hpp"
30 #include "code/debugInfoRec.hpp"
31 #include "code/icBuffer.hpp"
32 #include "code/vtableStubs.hpp"
33 #include "gc/shared/barrierSetAssembler.hpp"
34 #include "interpreter/interpreter.hpp"
35 #include "interpreter/interp_masm.hpp"
36 #include "logging/log.hpp"
37 #include "memory/resourceArea.hpp"
38 #include "nativeInst_aarch64.hpp"
39 #include "oops/compiledICHolder.hpp"
40 #include "oops/klass.inline.hpp"
41 #include "runtime/safepointMechanism.hpp"
42 #include "runtime/sharedRuntime.hpp"
43 #include "runtime/vframeArray.hpp"
44 #include "utilities/align.hpp"
45 #include "vmreg_aarch64.inline.hpp"
46 #ifdef COMPILER1
47 #include "c1/c1_Runtime1.hpp"
48 #endif
274 case T_SHORT:
275 case T_INT:
276 if (int_args < Argument::n_int_register_parameters_j) {
277 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
278 } else {
279 regs[i].set1(VMRegImpl::stack2reg(stk_args));
280 stk_args += 2;
281 }
282 break;
283 case T_VOID:
284 // halves of T_LONG or T_DOUBLE
285 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
286 regs[i].set_bad();
287 break;
288 case T_LONG:
289 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
290 // fall through
291 case T_OBJECT:
292 case T_ARRAY:
293 case T_ADDRESS:
294 if (int_args < Argument::n_int_register_parameters_j) {
295 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
296 } else {
297 regs[i].set2(VMRegImpl::stack2reg(stk_args));
298 stk_args += 2;
299 }
300 break;
301 case T_FLOAT:
302 if (fp_args < Argument::n_float_register_parameters_j) {
303 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
304 } else {
305 regs[i].set1(VMRegImpl::stack2reg(stk_args));
306 stk_args += 2;
307 }
308 break;
309 case T_DOUBLE:
310 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
311 if (fp_args < Argument::n_float_register_parameters_j) {
312 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
313 } else {
314 regs[i].set2(VMRegImpl::stack2reg(stk_args));
315 stk_args += 2;
316 }
317 break;
318 default:
319 ShouldNotReachHere();
320 break;
321 }
322 }
323
324 return align_up(stk_args, 2);
325 }
326
327 // Patch the callers callsite with entry to compiled code if it exists.
328 static void patch_callers_callsite(MacroAssembler *masm) {
329 Label L;
330 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset())));
331 __ cbz(rscratch1, L);
332
333 __ enter();
334 __ push_CPU_state();
335
336 // VM needs caller's callsite
337 // VM needs target method
338 // This needs to be a long call since we will relocate this adapter to
339 // the codeBuffer and it may not reach
340
341 #ifndef PRODUCT
342 assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
343 #endif
344
345 __ mov(c_rarg0, rmethod);
346 __ mov(c_rarg1, lr);
347 __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
348 __ blr(rscratch1);
349 __ maybe_isb();
350
351 __ pop_CPU_state();
352 // restore sp
353 __ leave();
354 __ bind(L);
355 }
356
357 static void gen_c2i_adapter(MacroAssembler *masm,
358 int total_args_passed,
359 int comp_args_on_stack,
360 const BasicType *sig_bt,
361 const VMRegPair *regs,
362 Label& skip_fixup) {
363 // Before we get into the guts of the C2I adapter, see if we should be here
364 // at all. We've come from compiled code and are attempting to jump to the
365 // interpreter, which means the caller made a static call to get here
366 // (vcalls always get a compiled target if there is one). Check for a
367 // compiled target. If there is one, we need to patch the caller's call.
368 patch_callers_callsite(masm);
369
370 __ bind(skip_fixup);
371
372 int words_pushed = 0;
373
374 // Since all args are passed on the stack, total_args_passed *
375 // Interpreter::stackElementSize is the space we need.
376
377 int extraspace = total_args_passed * Interpreter::stackElementSize;
378
379 __ mov(r13, sp);
380
381 // stack is aligned, keep it that way
382 extraspace = align_up(extraspace, 2*wordSize);
383
384 if (extraspace)
385 __ sub(sp, sp, extraspace);
386
387 // Now write the args into the outgoing interpreter space
388 for (int i = 0; i < total_args_passed; i++) {
389 if (sig_bt[i] == T_VOID) {
390 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
391 continue;
392 }
393
394 // offset to start parameters
395 int st_off = (total_args_passed - i - 1) * Interpreter::stackElementSize;
396 int next_off = st_off - Interpreter::stackElementSize;
397
398 // Say 4 args:
399 // i st_off
400 // 0 32 T_LONG
401 // 1 24 T_VOID
402 // 2 16 T_OBJECT
403 // 3 8 T_BOOL
404 // - 0 return address
405 //
406 // However to make thing extra confusing. Because we can fit a long/double in
407 // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
408 // leaves one slot empty and only stores to a single slot. In this case the
409 // slot that is occupied is the T_VOID slot. See I said it was confusing.
410
411 VMReg r_1 = regs[i].first();
412 VMReg r_2 = regs[i].second();
413 if (!r_1->is_valid()) {
414 assert(!r_2->is_valid(), "");
415 continue;
416 }
417 if (r_1->is_stack()) {
418 // memory to memory use rscratch1
419 int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size
420 + extraspace
421 + words_pushed * wordSize);
422 if (!r_2->is_valid()) {
423 // sign extend??
424 __ ldrw(rscratch1, Address(sp, ld_off));
425 __ str(rscratch1, Address(sp, st_off));
426
427 } else {
428
429 __ ldr(rscratch1, Address(sp, ld_off));
430
431 // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
432 // T_DOUBLE and T_LONG use two slots in the interpreter
433 if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
434 // ld_off == LSW, ld_off+wordSize == MSW
435 // st_off == MSW, next_off == LSW
436 __ str(rscratch1, Address(sp, next_off));
437 #ifdef ASSERT
438 // Overwrite the unused slot with known junk
439 __ mov(rscratch1, 0xdeadffffdeadaaaaul);
440 __ str(rscratch1, Address(sp, st_off));
441 #endif /* ASSERT */
442 } else {
443 __ str(rscratch1, Address(sp, st_off));
444 }
445 }
446 } else if (r_1->is_Register()) {
447 Register r = r_1->as_Register();
448 if (!r_2->is_valid()) {
449 // must be only an int (or less ) so move only 32bits to slot
450 // why not sign extend??
451 __ str(r, Address(sp, st_off));
452 } else {
453 // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
454 // T_DOUBLE and T_LONG use two slots in the interpreter
455 if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
456 // long/double in gpr
457 #ifdef ASSERT
458 // Overwrite the unused slot with known junk
459 __ mov(rscratch1, 0xdeadffffdeadaaabul);
460 __ str(rscratch1, Address(sp, st_off));
461 #endif /* ASSERT */
462 __ str(r, Address(sp, next_off));
463 } else {
464 __ str(r, Address(sp, st_off));
465 }
466 }
467 } else {
468 assert(r_1->is_FloatRegister(), "");
469 if (!r_2->is_valid()) {
470 // only a float use just part of the slot
471 __ strs(r_1->as_FloatRegister(), Address(sp, st_off));
472 } else {
473 #ifdef ASSERT
474 // Overwrite the unused slot with known junk
475 __ mov(rscratch1, 0xdeadffffdeadaaacul);
476 __ str(rscratch1, Address(sp, st_off));
477 #endif /* ASSERT */
478 __ strd(r_1->as_FloatRegister(), Address(sp, next_off));
479 }
480 }
481 }
482
483 __ mov(esp, sp); // Interp expects args on caller's expression stack
484
485 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::interpreter_entry_offset())));
486 __ br(rscratch1);
487 }
488
489
490 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
491 int total_args_passed,
492 int comp_args_on_stack,
493 const BasicType *sig_bt,
494 const VMRegPair *regs) {
495
496 // Note: r13 contains the senderSP on entry. We must preserve it since
497 // we may do a i2c -> c2i transition if we lose a race where compiled
498 // code goes non-entrant while we get args ready.
499
500 // In addition we use r13 to locate all the interpreter args because
501 // we must align the stack to 16 bytes.
502
503 // Adapters are frameless.
504
505 // An i2c adapter is frameless because the *caller* frame, which is
506 // interpreted, routinely repairs its own esp (from
507 // interpreter_frame_last_sp), even if a callee has modified the
508 // stack pointer. It also recalculates and aligns sp.
509
510 // A c2i adapter is frameless because the *callee* frame, which is
511 // interpreted, routinely repairs its caller's sp (from sender_sp,
512 // which is set up via the senderSP register).
513
514 // In other words, if *either* the caller or callee is interpreted, we can
534 range_check(masm, rax, r11,
535 Interpreter::code()->code_start(), Interpreter::code()->code_end(),
536 L_ok);
537 if (StubRoutines::code1() != NULL)
538 range_check(masm, rax, r11,
539 StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(),
540 L_ok);
541 if (StubRoutines::code2() != NULL)
542 range_check(masm, rax, r11,
543 StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(),
544 L_ok);
545 const char* msg = "i2c adapter must return to an interpreter frame";
546 __ block_comment(msg);
547 __ stop(msg);
548 __ bind(L_ok);
549 __ block_comment("} verify_i2ce ");
550 #endif
551 }
552
553 // Cut-out for having no stack args.
554 int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
555 if (comp_args_on_stack) {
556 __ sub(rscratch1, sp, comp_words_on_stack * wordSize);
557 __ andr(sp, rscratch1, -16);
558 }
559
560 // Will jump to the compiled code just as if compiled code was doing it.
561 // Pre-load the register-jump target early, to schedule it better.
562 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_offset())));
563
564 #if INCLUDE_JVMCI
565 if (EnableJVMCI || UseAOT) {
566 // check if this call should be routed towards a specific entry point
567 __ ldr(rscratch2, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
568 Label no_alternative_target;
569 __ cbz(rscratch2, no_alternative_target);
570 __ mov(rscratch1, rscratch2);
571 __ str(zr, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
572 __ bind(no_alternative_target);
573 }
574 #endif // INCLUDE_JVMCI
575
576 // Now generate the shuffle code.
577 for (int i = 0; i < total_args_passed; i++) {
578 if (sig_bt[i] == T_VOID) {
579 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
580 continue;
581 }
582
583 // Pick up 0, 1 or 2 words from SP+offset.
584
585 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
586 "scrambled load targets?");
587 // Load in argument order going down.
588 int ld_off = (total_args_passed - i - 1)*Interpreter::stackElementSize;
589 // Point to interpreter value (vs. tag)
590 int next_off = ld_off - Interpreter::stackElementSize;
591 //
592 //
593 //
594 VMReg r_1 = regs[i].first();
595 VMReg r_2 = regs[i].second();
596 if (!r_1->is_valid()) {
597 assert(!r_2->is_valid(), "");
598 continue;
599 }
600 if (r_1->is_stack()) {
601 // Convert stack slot to an SP offset (+ wordSize to account for return address )
602 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
603 if (!r_2->is_valid()) {
604 // sign extend???
605 __ ldrsw(rscratch2, Address(esp, ld_off));
606 __ str(rscratch2, Address(sp, st_off));
607 } else {
608 //
609 // We are using two optoregs. This can be either T_OBJECT,
610 // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
611 // two slots but only uses one for thr T_LONG or T_DOUBLE case
612 // So we must adjust where to pick up the data to match the
613 // interpreter.
614 //
615 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
616 // are accessed as negative so LSW is at LOW address
617
618 // ld_off is MSW so get LSW
619 const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
620 next_off : ld_off;
621 __ ldr(rscratch2, Address(esp, offset));
622 // st_off is LSW (i.e. reg.first())
623 __ str(rscratch2, Address(sp, st_off));
624 }
625 } else if (r_1->is_Register()) { // Register argument
626 Register r = r_1->as_Register();
627 if (r_2->is_valid()) {
628 //
629 // We are using two VMRegs. This can be either T_OBJECT,
630 // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
631 // two slots but only uses one for thr T_LONG or T_DOUBLE case
632 // So we must adjust where to pick up the data to match the
633 // interpreter.
634
635 const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
636 next_off : ld_off;
637
638 // this can be a misaligned move
639 __ ldr(r, Address(esp, offset));
640 } else {
641 // sign extend and use a full word?
642 __ ldrw(r, Address(esp, ld_off));
643 }
644 } else {
645 if (!r_2->is_valid()) {
646 __ ldrs(r_1->as_FloatRegister(), Address(esp, ld_off));
647 } else {
648 __ ldrd(r_1->as_FloatRegister(), Address(esp, next_off));
649 }
650 }
651 }
652
653 // 6243940 We might end up in handle_wrong_method if
654 // the callee is deoptimized as we race thru here. If that
655 // happens we don't want to take a safepoint because the
656 // caller frame will look interpreted and arguments are now
657 // "compiled" so it is much better to make this transition
658 // invisible to the stack walking code. Unfortunately if
659 // we try and find the callee by normal means a safepoint
660 // is possible. So we stash the desired callee in the thread
661 // and the vm will find there should this case occur.
662
663 __ str(rmethod, Address(rthread, JavaThread::callee_target_offset()));
664
665 __ br(rscratch1);
666 }
667
668 // ---------------------------------------------------------------
669 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
670 int total_args_passed,
671 int comp_args_on_stack,
672 const BasicType *sig_bt,
673 const VMRegPair *regs,
674 AdapterFingerPrint* fingerprint) {
675 address i2c_entry = __ pc();
676
677 gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
678
679 address c2i_unverified_entry = __ pc();
680 Label skip_fixup;
681
682 Label ok;
683
684 Register holder = rscratch2;
685 Register receiver = j_rarg0;
686 Register tmp = r10; // A call-clobbered register not used for arg passing
687
688 // -------------------------------------------------------------------------
689 // Generate a C2I adapter. On entry we know rmethod holds the Method* during calls
690 // to the interpreter. The args start out packed in the compiled layout. They
691 // need to be unpacked into the interpreter layout. This will almost always
692 // require some stack space. We grow the current (compiled) stack, then repack
693 // the args. We finally end in a jump to the generic interpreter entry point.
694 // On exit from the interpreter, the interpreter will restore our SP (lest the
695 // compiled code, which relys solely on SP and not FP, get sick).
696
697 {
698 __ block_comment("c2i_unverified_entry {");
699 __ load_klass(rscratch1, receiver);
700 __ ldr(tmp, Address(holder, CompiledICHolder::holder_klass_offset()));
701 __ cmp(rscratch1, tmp);
702 __ ldr(rmethod, Address(holder, CompiledICHolder::holder_metadata_offset()));
703 __ br(Assembler::EQ, ok);
704 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
705
706 __ bind(ok);
707 // Method might have been compiled since the call site was patched to
708 // interpreted; if that is the case treat it as a miss so we can get
709 // the call site corrected.
710 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset())));
711 __ cbz(rscratch1, skip_fixup);
712 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
713 __ block_comment("} c2i_unverified_entry");
714 }
715
716 address c2i_entry = __ pc();
717
718 // Class initialization barrier for static methods
719 address c2i_no_clinit_check_entry = NULL;
720 if (VM_Version::supports_fast_class_init_checks()) {
721 Label L_skip_barrier;
722
723 { // Bypass the barrier for non-static methods
724 __ ldrw(rscratch1, Address(rmethod, Method::access_flags_offset()));
725 __ andsw(zr, rscratch1, JVM_ACC_STATIC);
726 __ br(Assembler::EQ, L_skip_barrier); // non-static
727 }
728
729 __ load_method_holder(rscratch2, rmethod);
730 __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
731 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
732
733 __ bind(L_skip_barrier);
734 c2i_no_clinit_check_entry = __ pc();
735 }
736
737 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
738 bs->c2i_entry_barrier(masm);
739
740 gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
741
742 __ flush();
743 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
744 }
745
746 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
747 VMRegPair *regs,
748 VMRegPair *regs2,
749 int total_args_passed) {
750 assert(regs2 == NULL, "not needed on AArch64");
751
752 // We return the amount of VMRegImpl stack slots we need to reserve for all
753 // the arguments NOT counting out_preserve_stack_slots.
754
755 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
756 c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7
757 };
758 static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
759 c_farg0, c_farg1, c_farg2, c_farg3,
760 c_farg4, c_farg5, c_farg6, c_farg7
761 };
762
763 uint int_args = 0;
766
767 for (int i = 0; i < total_args_passed; i++) {
768 switch (sig_bt[i]) {
769 case T_BOOLEAN:
770 case T_CHAR:
771 case T_BYTE:
772 case T_SHORT:
773 case T_INT:
774 if (int_args < Argument::n_int_register_parameters_c) {
775 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
776 } else {
777 regs[i].set1(VMRegImpl::stack2reg(stk_args));
778 stk_args += 2;
779 }
780 break;
781 case T_LONG:
782 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
783 // fall through
784 case T_OBJECT:
785 case T_ARRAY:
786 case T_ADDRESS:
787 case T_METADATA:
788 if (int_args < Argument::n_int_register_parameters_c) {
789 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
790 } else {
791 regs[i].set2(VMRegImpl::stack2reg(stk_args));
792 stk_args += 2;
793 }
794 break;
795 case T_FLOAT:
796 if (fp_args < Argument::n_float_register_parameters_c) {
797 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
798 } else {
799 regs[i].set1(VMRegImpl::stack2reg(stk_args));
800 stk_args += 2;
801 }
802 break;
803 case T_DOUBLE:
804 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
805 if (fp_args < Argument::n_float_register_parameters_c) {
1617 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
1618 } else if (out_regs[c_arg].first()->is_FloatRegister()) {
1619 freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
1620 }
1621 #endif /* ASSERT */
1622 switch (in_sig_bt[i]) {
1623 case T_ARRAY:
1624 if (is_critical_native) {
1625 unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
1626 c_arg++;
1627 #ifdef ASSERT
1628 if (out_regs[c_arg].first()->is_Register()) {
1629 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
1630 } else if (out_regs[c_arg].first()->is_FloatRegister()) {
1631 freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
1632 }
1633 #endif
1634 int_args++;
1635 break;
1636 }
1637 case T_OBJECT:
1638 assert(!is_critical_native, "no oop arguments");
1639 object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1640 ((i == 0) && (!is_static)),
1641 &receiver_offset);
1642 int_args++;
1643 break;
1644 case T_VOID:
1645 break;
1646
1647 case T_FLOAT:
1648 float_move(masm, in_regs[i], out_regs[c_arg]);
1649 float_args++;
1650 break;
1651
1652 case T_DOUBLE:
1653 assert( i + 1 < total_in_args &&
1654 in_sig_bt[i + 1] == T_VOID &&
1655 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1656 double_move(masm, in_regs[i], out_regs[c_arg]);
1804
1805 rt_call(masm, native_func);
1806
1807 __ bind(native_return);
1808
1809 intptr_t return_pc = (intptr_t) __ pc();
1810 oop_maps->add_gc_map(return_pc - start, map);
1811
1812 // Unpack native results.
1813 switch (ret_type) {
1814 case T_BOOLEAN: __ c2bool(r0); break;
1815 case T_CHAR : __ ubfx(r0, r0, 0, 16); break;
1816 case T_BYTE : __ sbfx(r0, r0, 0, 8); break;
1817 case T_SHORT : __ sbfx(r0, r0, 0, 16); break;
1818 case T_INT : __ sbfx(r0, r0, 0, 32); break;
1819 case T_DOUBLE :
1820 case T_FLOAT :
1821 // Result is in v0 we'll save as needed
1822 break;
1823 case T_ARRAY: // Really a handle
1824 case T_OBJECT: // Really a handle
1825 break; // can't de-handlize until after safepoint check
1826 case T_VOID: break;
1827 case T_LONG: break;
1828 default : ShouldNotReachHere();
1829 }
1830
1831 // Switch thread to "native transition" state before reading the synchronization state.
1832 // This additional state is necessary because reading and testing the synchronization
1833 // state is not atomic w.r.t. GC, as this scenario demonstrates:
1834 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1835 // VM thread changes sync state to synchronizing and suspends threads for GC.
1836 // Thread A is resumed to finish this native method, but doesn't block here since it
1837 // didn't see any synchronization is progress, and escapes.
1838 __ mov(rscratch1, _thread_in_native_trans);
1839
1840 __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
1841
1842 // Force this write out before the read below
1843 __ dmb(Assembler::ISH);
3029 #ifdef ASSERT
3030 __ str(zr, Address(rthread, JavaThread::exception_handler_pc_offset()));
3031 __ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
3032 #endif
3033 // Clear the exception oop so GC no longer processes it as a root.
3034 __ str(zr, Address(rthread, JavaThread::exception_oop_offset()));
3035
3036 // r0: exception oop
3037 // r8: exception handler
3038 // r4: exception pc
3039 // Jump to handler
3040
3041 __ br(r8);
3042
3043 // Make sure all code is generated
3044 masm->flush();
3045
3046 // Set exception blob
3047 _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
3048 }
3049 #endif // COMPILER2
|
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #include "precompiled.hpp"
27 #include "asm/macroAssembler.hpp"
28 #include "asm/macroAssembler.inline.hpp"
29 #include "classfile/symbolTable.hpp"
30 #include "code/codeCache.hpp"
31 #include "code/debugInfoRec.hpp"
32 #include "code/icBuffer.hpp"
33 #include "code/vtableStubs.hpp"
34 #include "gc/shared/barrierSetAssembler.hpp"
35 #include "interpreter/interpreter.hpp"
36 #include "interpreter/interp_masm.hpp"
37 #include "logging/log.hpp"
38 #include "memory/resourceArea.hpp"
39 #include "nativeInst_aarch64.hpp"
40 #include "oops/compiledICHolder.hpp"
41 #include "oops/klass.inline.hpp"
42 #include "runtime/safepointMechanism.hpp"
43 #include "runtime/sharedRuntime.hpp"
44 #include "runtime/vframeArray.hpp"
45 #include "utilities/align.hpp"
46 #include "vmreg_aarch64.inline.hpp"
47 #ifdef COMPILER1
48 #include "c1/c1_Runtime1.hpp"
49 #endif
275 case T_SHORT:
276 case T_INT:
277 if (int_args < Argument::n_int_register_parameters_j) {
278 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
279 } else {
280 regs[i].set1(VMRegImpl::stack2reg(stk_args));
281 stk_args += 2;
282 }
283 break;
284 case T_VOID:
285 // halves of T_LONG or T_DOUBLE
286 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
287 regs[i].set_bad();
288 break;
289 case T_LONG:
290 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
291 // fall through
292 case T_OBJECT:
293 case T_ARRAY:
294 case T_ADDRESS:
295 case T_INLINE_TYPE:
296 if (int_args < Argument::n_int_register_parameters_j) {
297 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
298 } else {
299 regs[i].set2(VMRegImpl::stack2reg(stk_args));
300 stk_args += 2;
301 }
302 break;
303 case T_FLOAT:
304 if (fp_args < Argument::n_float_register_parameters_j) {
305 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
306 } else {
307 regs[i].set1(VMRegImpl::stack2reg(stk_args));
308 stk_args += 2;
309 }
310 break;
311 case T_DOUBLE:
312 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
313 if (fp_args < Argument::n_float_register_parameters_j) {
314 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
315 } else {
316 regs[i].set2(VMRegImpl::stack2reg(stk_args));
317 stk_args += 2;
318 }
319 break;
320 default:
321 ShouldNotReachHere();
322 break;
323 }
324 }
325
326 return align_up(stk_args, 2);
327 }
328
329
330 // const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1;
331 const uint SharedRuntime::java_return_convention_max_int = 6;
332 const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j;
333
334 int SharedRuntime::java_return_convention(const BasicType *sig_bt, VMRegPair *regs, int total_args_passed) {
335
336 // Create the mapping between argument positions and
337 // registers.
338 // r1, r2 used to address klasses and states, exclude it from return convention to avoid colision
339
340 static const Register INT_ArgReg[java_return_convention_max_int] = {
341 r0 /* j_rarg7 */, j_rarg6, j_rarg5, j_rarg4, j_rarg3, j_rarg2
342 };
343
344 static const FloatRegister FP_ArgReg[java_return_convention_max_float] = {
345 j_farg0, j_farg1, j_farg2, j_farg3, j_farg4, j_farg5, j_farg6, j_farg7
346 };
347
348 uint int_args = 0;
349 uint fp_args = 0;
350
351 for (int i = 0; i < total_args_passed; i++) {
352 switch (sig_bt[i]) {
353 case T_BOOLEAN:
354 case T_CHAR:
355 case T_BYTE:
356 case T_SHORT:
357 case T_INT:
358 if (int_args < SharedRuntime::java_return_convention_max_int) {
359 regs[i].set1(INT_ArgReg[int_args]->as_VMReg());
360 int_args ++;
361 } else {
362 // Should we have gurantee here?
363 return -1;
364 }
365 break;
366 case T_VOID:
367 // halves of T_LONG or T_DOUBLE
368 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
369 regs[i].set_bad();
370 break;
371 case T_LONG:
372 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
373 // fall through
374 case T_OBJECT:
375 case T_ARRAY:
376 case T_ADDRESS:
377 // Should T_METADATA be added to java_calling_convention as well ?
378 case T_METADATA:
379 case T_INLINE_TYPE:
380 if (int_args < SharedRuntime::java_return_convention_max_int) {
381 regs[i].set2(INT_ArgReg[int_args]->as_VMReg());
382 int_args ++;
383 } else {
384 return -1;
385 }
386 break;
387 case T_FLOAT:
388 if (fp_args < SharedRuntime::java_return_convention_max_float) {
389 regs[i].set1(FP_ArgReg[fp_args]->as_VMReg());
390 fp_args ++;
391 } else {
392 return -1;
393 }
394 break;
395 case T_DOUBLE:
396 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
397 if (fp_args < Argument::n_float_register_parameters_j) {
398 regs[i].set2(FP_ArgReg[fp_args]->as_VMReg());
399 fp_args ++;
400 } else {
401 return -1;
402 }
403 break;
404 default:
405 ShouldNotReachHere();
406 break;
407 }
408 }
409
410 return int_args + fp_args;
411 }
412
413 // Patch the callers callsite with entry to compiled code if it exists.
414 static void patch_callers_callsite(MacroAssembler *masm) {
415 Label L;
416 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset())));
417 __ cbz(rscratch1, L);
418
419 __ enter();
420 __ push_CPU_state();
421
422 // VM needs caller's callsite
423 // VM needs target method
424 // This needs to be a long call since we will relocate this adapter to
425 // the codeBuffer and it may not reach
426
427 #ifndef PRODUCT
428 assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
429 #endif
430
431 __ mov(c_rarg0, rmethod);
432 __ mov(c_rarg1, lr);
433 __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
434 __ blr(rscratch1);
435 __ maybe_isb();
436
437 __ pop_CPU_state();
438 // restore sp
439 __ leave();
440 __ bind(L);
441 }
442
443 // For each inline type argument, sig includes the list of fields of
444 // the inline type. This utility function computes the number of
445 // arguments for the call if inline types are passed by reference (the
446 // calling convention the interpreter expects).
447 static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) {
448 int total_args_passed = 0;
449 if (InlineTypePassFieldsAsArgs) {
450 for (int i = 0; i < sig_extended->length(); i++) {
451 BasicType bt = sig_extended->at(i)._bt;
452 if (SigEntry::is_reserved_entry(sig_extended, i)) {
453 // Ignore reserved entry
454 } else if (bt == T_INLINE_TYPE) {
455 // In sig_extended, an inline type argument starts with:
456 // T_INLINE_TYPE, followed by the types of the fields of the
457 // inline type and T_VOID to mark the end of the value
458 // type. Inline types are flattened so, for instance, in the
459 // case of an inline type with an int field and an inline type
460 // field that itself has 2 fields, an int and a long:
461 // T_INLINE_TYPE T_INT T_INLINE_TYPE T_INT T_LONG T_VOID (second
462 // slot for the T_LONG) T_VOID (inner T_INLINE_TYPE) T_VOID
463 // (outer T_INLINE_TYPE)
464 total_args_passed++;
465 int vt = 1;
466 do {
467 i++;
468 BasicType bt = sig_extended->at(i)._bt;
469 BasicType prev_bt = sig_extended->at(i-1)._bt;
470 if (bt == T_INLINE_TYPE) {
471 vt++;
472 } else if (bt == T_VOID &&
473 prev_bt != T_LONG &&
474 prev_bt != T_DOUBLE) {
475 vt--;
476 }
477 } while (vt != 0);
478 } else {
479 total_args_passed++;
480 }
481 }
482 } else {
483 total_args_passed = sig_extended->length();
484 }
485
486 return total_args_passed;
487 }
488
489
490 static void gen_c2i_adapter_helper(MacroAssembler* masm, BasicType bt, const VMRegPair& reg_pair, int extraspace, const Address& to) {
491
492 assert(bt != T_INLINE_TYPE || !InlineTypePassFieldsAsArgs, "no inline type here");
493
494 // Say 4 args:
495 // i st_off
496 // 0 32 T_LONG
497 // 1 24 T_VOID
498 // 2 16 T_OBJECT
499 // 3 8 T_BOOL
500 // - 0 return address
501 //
502 // However to make thing extra confusing. Because we can fit a long/double in
503 // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
504 // leaves one slot empty and only stores to a single slot. In this case the
505 // slot that is occupied is the T_VOID slot. See I said it was confusing.
506
507 // int next_off = st_off - Interpreter::stackElementSize;
508
509 VMReg r_1 = reg_pair.first();
510 VMReg r_2 = reg_pair.second();
511
512 if (!r_1->is_valid()) {
513 assert(!r_2->is_valid(), "");
514 return;
515 }
516
517 if (r_1->is_stack()) {
518 // memory to memory use rscratch1
519 // words_pushed is always 0 so we don't use it.
520 int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace /* + word_pushed * wordSize */);
521 if (!r_2->is_valid()) {
522 // sign extend??
523 __ ldrw(rscratch1, Address(sp, ld_off));
524 __ str(rscratch1, to);
525
526 } else {
527 __ ldr(rscratch1, Address(sp, ld_off));
528 __ str(rscratch1, to);
529 }
530 } else if (r_1->is_Register()) {
531 Register r = r_1->as_Register();
532 __ str(r, to);
533 } else {
534 assert(r_1->is_FloatRegister(), "");
535 if (!r_2->is_valid()) {
536 // only a float use just part of the slot
537 __ strs(r_1->as_FloatRegister(), to);
538 } else {
539 __ strd(r_1->as_FloatRegister(), to);
540 }
541 }
542 }
543
544 static void gen_c2i_adapter(MacroAssembler *masm,
545 const GrowableArray<SigEntry>* sig_extended,
546 const VMRegPair *regs,
547 Label& skip_fixup,
548 address start,
549 OopMapSet* oop_maps,
550 int& frame_complete,
551 int& frame_size_in_words,
552 bool alloc_inline_receiver) {
553
554 // Before we get into the guts of the C2I adapter, see if we should be here
555 // at all. We've come from compiled code and are attempting to jump to the
556 // interpreter, which means the caller made a static call to get here
557 // (vcalls always get a compiled target if there is one). Check for a
558 // compiled target. If there is one, we need to patch the caller's call.
559 patch_callers_callsite(masm);
560
561 __ bind(skip_fixup);
562
563 bool has_inline_argument = false;
564
565 if (InlineTypePassFieldsAsArgs) {
566 // Is there an inline type argument?
567 for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) {
568 has_inline_argument = (sig_extended->at(i)._bt == T_INLINE_TYPE);
569 }
570 if (has_inline_argument) {
571 // There is at least an inline type argument: we're coming from
572 // compiled code so we have no buffers to back the inline types
573 // Allocate the buffers here with a runtime call.
574 OopMap* map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
575
576 frame_complete = __ offset();
577 address the_pc = __ pc();
578
579 __ set_last_Java_frame(noreg, noreg, the_pc, rscratch1);
580
581 __ mov(c_rarg0, rthread);
582 __ mov(c_rarg1, r1);
583 __ mov(c_rarg2, (int64_t)alloc_inline_receiver);
584
585 __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_inline_types)));
586 __ blr(rscratch1);
587
588 oop_maps->add_gc_map((int)(__ pc() - start), map);
589 __ reset_last_Java_frame(false);
590
591 RegisterSaver::restore_live_registers(masm);
592
593 Label no_exception;
594 __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
595 __ cbz(r0, no_exception);
596
597 __ str(zr, Address(rthread, JavaThread::vm_result_offset()));
598 __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
599 __ b(RuntimeAddress(StubRoutines::forward_exception_entry()));
600
601 __ bind(no_exception);
602
603 // We get an array of objects from the runtime call
604 __ get_vm_result(r10, rthread);
605 __ get_vm_result_2(r1, rthread); // TODO: required to keep the callee Method live?
606 }
607 }
608
609 int words_pushed = 0;
610
611 // Since all args are passed on the stack, total_args_passed *
612 // Interpreter::stackElementSize is the space we need.
613
614 int total_args_passed = compute_total_args_passed_int(sig_extended);
615 int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize;
616
617 // stack is aligned, keep it that way
618 extraspace = align_up(extraspace, 2 * wordSize);
619
620 __ mov(r13, sp);
621
622 if (extraspace)
623 __ sub(sp, sp, extraspace);
624
625 // Now write the args into the outgoing interpreter space
626
627 int ignored = 0, next_vt_arg = 0, next_arg_int = 0;
628 bool has_oop_field = false;
629
630 for (int next_arg_comp = 0; next_arg_comp < total_args_passed; next_arg_comp++) {
631 BasicType bt = sig_extended->at(next_arg_comp)._bt;
632 // offset to start parameters
633 int st_off = (total_args_passed - next_arg_int - 1) * Interpreter::stackElementSize;
634
635 if (!InlineTypePassFieldsAsArgs || bt != T_INLINE_TYPE) {
636
637 if (SigEntry::is_reserved_entry(sig_extended, next_arg_comp)) {
638 continue; // Ignore reserved entry
639 }
640
641 if (bt == T_VOID) {
642 assert(next_arg_comp > 0 && (sig_extended->at(next_arg_comp - 1)._bt == T_LONG || sig_extended->at(next_arg_comp - 1)._bt == T_DOUBLE), "missing half");
643 next_arg_int ++;
644 continue;
645 }
646
647 int next_off = st_off - Interpreter::stackElementSize;
648 int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off;
649
650 gen_c2i_adapter_helper(masm, bt, regs[next_arg_comp], extraspace, Address(sp, offset));
651 next_arg_int ++;
652 } else {
653 ignored++;
654 // get the buffer from the just allocated pool of buffers
655 int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_INLINE_TYPE);
656 __ load_heap_oop(rscratch1, Address(r10, index));
657 next_vt_arg++;
658 next_arg_int++;
659 int vt = 1;
660 // write fields we get from compiled code in registers/stack
661 // slots to the buffer: we know we are done with that inline type
662 // argument when we hit the T_VOID that acts as an end of value
663 // type delimiter for this inline type. Inline types are flattened
664 // so we might encounter embedded inline types. Each entry in
665 // sig_extended contains a field offset in the buffer.
666 do {
667 next_arg_comp++;
668 BasicType bt = sig_extended->at(next_arg_comp)._bt;
669 BasicType prev_bt = sig_extended->at(next_arg_comp - 1)._bt;
670 if (bt == T_INLINE_TYPE) {
671 vt++;
672 ignored++;
673 } else if (bt == T_VOID && prev_bt != T_LONG && prev_bt != T_DOUBLE) {
674 vt--;
675 ignored++;
676 } else if (SigEntry::is_reserved_entry(sig_extended, next_arg_comp)) {
677 // Ignore reserved entry
678 } else {
679 int off = sig_extended->at(next_arg_comp)._offset;
680 assert(off > 0, "offset in object should be positive");
681
682 bool is_oop = (bt == T_OBJECT || bt == T_ARRAY);
683 has_oop_field = has_oop_field || is_oop;
684
685 gen_c2i_adapter_helper(masm, bt, regs[next_arg_comp - ignored], extraspace, Address(r11, off));
686 }
687 } while (vt != 0);
688 // pass the buffer to the interpreter
689 __ str(rscratch1, Address(sp, st_off));
690 }
691
692 }
693
694 // If an inline type was allocated and initialized, apply post barrier to all oop fields
695 if (has_inline_argument && has_oop_field) {
696 __ push(r13); // save senderSP
697 __ push(r1); // save callee
698 // Allocate argument register save area
699 if (frame::arg_reg_save_area_bytes != 0) {
700 __ sub(sp, sp, frame::arg_reg_save_area_bytes);
701 }
702 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::apply_post_barriers), rthread, r10);
703 // De-allocate argument register save area
704 if (frame::arg_reg_save_area_bytes != 0) {
705 __ add(sp, sp, frame::arg_reg_save_area_bytes);
706 }
707 __ pop(r1); // restore callee
708 __ pop(r13); // restore sender SP
709 }
710
711 __ mov(esp, sp); // Interp expects args on caller's expression stack
712
713 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::interpreter_entry_offset())));
714 __ br(rscratch1);
715 }
716
717 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, int comp_args_on_stack, const GrowableArray<SigEntry>* sig, const VMRegPair *regs) {
718
719
720 // Note: r13 contains the senderSP on entry. We must preserve it since
721 // we may do a i2c -> c2i transition if we lose a race where compiled
722 // code goes non-entrant while we get args ready.
723
724 // In addition we use r13 to locate all the interpreter args because
725 // we must align the stack to 16 bytes.
726
727 // Adapters are frameless.
728
729 // An i2c adapter is frameless because the *caller* frame, which is
730 // interpreted, routinely repairs its own esp (from
731 // interpreter_frame_last_sp), even if a callee has modified the
732 // stack pointer. It also recalculates and aligns sp.
733
734 // A c2i adapter is frameless because the *callee* frame, which is
735 // interpreted, routinely repairs its caller's sp (from sender_sp,
736 // which is set up via the senderSP register).
737
738 // In other words, if *either* the caller or callee is interpreted, we can
758 range_check(masm, rax, r11,
759 Interpreter::code()->code_start(), Interpreter::code()->code_end(),
760 L_ok);
761 if (StubRoutines::code1() != NULL)
762 range_check(masm, rax, r11,
763 StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(),
764 L_ok);
765 if (StubRoutines::code2() != NULL)
766 range_check(masm, rax, r11,
767 StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(),
768 L_ok);
769 const char* msg = "i2c adapter must return to an interpreter frame";
770 __ block_comment(msg);
771 __ stop(msg);
772 __ bind(L_ok);
773 __ block_comment("} verify_i2ce ");
774 #endif
775 }
776
777 // Cut-out for having no stack args.
778 int comp_words_on_stack = 0;
779 if (comp_args_on_stack) {
780 comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord;
781 __ sub(rscratch1, sp, comp_words_on_stack * wordSize);
782 __ andr(sp, rscratch1, -16);
783 }
784
785 // Will jump to the compiled code just as if compiled code was doing it.
786 // Pre-load the register-jump target early, to schedule it better.
787 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_offset())));
788
789 #if INCLUDE_JVMCI
790 if (EnableJVMCI || UseAOT) {
791 // check if this call should be routed towards a specific entry point
792 __ ldr(rscratch2, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
793 Label no_alternative_target;
794 __ cbz(rscratch2, no_alternative_target);
795 __ mov(rscratch1, rscratch2);
796 __ str(zr, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
797 __ bind(no_alternative_target);
798 }
799 #endif // INCLUDE_JVMCI
800
801 int total_args_passed = sig->length();
802
803 // Now generate the shuffle code.
804 for (int i = 0; i < total_args_passed; i++) {
805 BasicType bt = sig->at(i)._bt;
806
807 assert(bt != T_INLINE_TYPE, "i2c adapter doesn't unpack inline typ args");
808 if (bt == T_VOID) {
809 assert(i > 0 && (sig->at(i - 1)._bt == T_LONG || sig->at(i - 1)._bt == T_DOUBLE), "missing half");
810 continue;
811 }
812
813 // Pick up 0, 1 or 2 words from SP+offset.
814 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
815
816 // Load in argument order going down.
817 int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize;
818 // Point to interpreter value (vs. tag)
819 int next_off = ld_off - Interpreter::stackElementSize;
820 //
821 //
822 //
823 VMReg r_1 = regs[i].first();
824 VMReg r_2 = regs[i].second();
825 if (!r_1->is_valid()) {
826 assert(!r_2->is_valid(), "");
827 continue;
828 }
829 if (r_1->is_stack()) {
830 // Convert stack slot to an SP offset (+ wordSize to account for return address )
831 int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size;
832 if (!r_2->is_valid()) {
833 // sign extend???
834 __ ldrsw(rscratch2, Address(esp, ld_off));
835 __ str(rscratch2, Address(sp, st_off));
836 } else {
837 //
838 // We are using two optoregs. This can be either T_OBJECT,
839 // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
840 // two slots but only uses one for thr T_LONG or T_DOUBLE case
841 // So we must adjust where to pick up the data to match the
842 // interpreter.
843 //
844 // Interpreter local[n] == MSW, local[n+1] == LSW however locals
845 // are accessed as negative so LSW is at LOW address
846
847 // ld_off is MSW so get LSW
848 const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off;
849 __ ldr(rscratch2, Address(esp, offset));
850 // st_off is LSW (i.e. reg.first())
851 __ str(rscratch2, Address(sp, st_off));
852 }
853 } else if (r_1->is_Register()) { // Register argument
854 Register r = r_1->as_Register();
855 if (r_2->is_valid()) {
856 //
857 // We are using two VMRegs. This can be either T_OBJECT,
858 // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
859 // two slots but only uses one for thr T_LONG or T_DOUBLE case
860 // So we must adjust where to pick up the data to match the
861 // interpreter.
862
863 const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off;
864
865 // this can be a misaligned move
866 __ ldr(r, Address(esp, offset));
867 } else {
868 // sign extend and use a full word?
869 __ ldrw(r, Address(esp, ld_off));
870 }
871 } else {
872 if (!r_2->is_valid()) {
873 __ ldrs(r_1->as_FloatRegister(), Address(esp, ld_off));
874 } else {
875 __ ldrd(r_1->as_FloatRegister(), Address(esp, next_off));
876 }
877 }
878 }
879
880
881 // 6243940 We might end up in handle_wrong_method if
882 // the callee is deoptimized as we race thru here. If that
883 // happens we don't want to take a safepoint because the
884 // caller frame will look interpreted and arguments are now
885 // "compiled" so it is much better to make this transition
886 // invisible to the stack walking code. Unfortunately if
887 // we try and find the callee by normal means a safepoint
888 // is possible. So we stash the desired callee in the thread
889 // and the vm will find there should this case occur.
890
891 __ str(rmethod, Address(rthread, JavaThread::callee_target_offset()));
892 __ br(rscratch1);
893 }
894
895 static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) {
896
897 Label ok;
898
899 Register holder = rscratch2;
900 Register receiver = j_rarg0;
901 Register tmp = r10; // A call-clobbered register not used for arg passing
902
903 // -------------------------------------------------------------------------
904 // Generate a C2I adapter. On entry we know rmethod holds the Method* during calls
905 // to the interpreter. The args start out packed in the compiled layout. They
906 // need to be unpacked into the interpreter layout. This will almost always
907 // require some stack space. We grow the current (compiled) stack, then repack
908 // the args. We finally end in a jump to the generic interpreter entry point.
909 // On exit from the interpreter, the interpreter will restore our SP (lest the
910 // compiled code, which relys solely on SP and not FP, get sick).
911
912 {
913 __ block_comment("c2i_unverified_entry {");
914 __ load_klass(rscratch1, receiver);
915 __ ldr(tmp, Address(holder, CompiledICHolder::holder_klass_offset()));
916 __ cmp(rscratch1, tmp);
917 __ ldr(rmethod, Address(holder, CompiledICHolder::holder_metadata_offset()));
918 __ br(Assembler::EQ, ok);
919 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
920
921 __ bind(ok);
922 // Method might have been compiled since the call site was patched to
923 // interpreted; if that is the case treat it as a miss so we can get
924 // the call site corrected.
925 __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset())));
926 __ cbz(rscratch1, skip_fixup);
927 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
928 __ block_comment("} c2i_unverified_entry");
929 }
930 }
931
932
933 // ---------------------------------------------------------------
934 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
935 int comp_args_on_stack,
936 const GrowableArray<SigEntry>* sig,
937 const VMRegPair* regs,
938 const GrowableArray<SigEntry>* sig_cc,
939 const VMRegPair* regs_cc,
940 const GrowableArray<SigEntry>* sig_cc_ro,
941 const VMRegPair* regs_cc_ro,
942 AdapterFingerPrint* fingerprint,
943 AdapterBlob*& new_adapter) {
944
945 address i2c_entry = __ pc();
946 gen_i2c_adapter(masm, comp_args_on_stack, sig, regs);
947
948 address c2i_unverified_entry = __ pc();
949 Label skip_fixup;
950
951 gen_inline_cache_check(masm, skip_fixup);
952
953 OopMapSet* oop_maps = new OopMapSet();
954 int frame_complete = CodeOffsets::frame_never_safe;
955 int frame_size_in_words = 0;
956
957 // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver)
958 address c2i_inline_ro_entry = __ pc();
959 if (regs_cc != regs_cc_ro) {
960 Label unused;
961 gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false);
962 skip_fixup = unused;
963 }
964
965 // Scalarized c2i adapter
966 address c2i_entry = __ pc();
967
968 // Class initialization barrier for static methods
969 address c2i_no_clinit_check_entry = NULL;
970
971 if (VM_Version::supports_fast_class_init_checks()) {
972 Label L_skip_barrier;
973 { // Bypass the barrier for non-static methods
974 Register flags = rscratch1;
975 __ ldrw(flags, Address(rmethod, Method::access_flags_offset()));
976 __ tst(flags, JVM_ACC_STATIC);
977 __ br(Assembler::NE, L_skip_barrier); // non-static
978 }
979
980 Register klass = rscratch1;
981 __ load_method_holder(klass, rmethod);
982 // We pass rthread to this function on x86
983 __ clinit_barrier(klass, rscratch2, &L_skip_barrier /*L_fast_path*/);
984
985 __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
986
987 __ bind(L_skip_barrier);
988 c2i_no_clinit_check_entry = __ pc();
989 }
990
991 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
992 bs->c2i_entry_barrier(masm);
993
994 gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
995
996 address c2i_unverified_inline_entry = c2i_unverified_entry;
997
998 // Non-scalarized c2i adapter
999 address c2i_inline_entry = c2i_entry;
1000 if (regs != regs_cc) {
1001 Label inline_entry_skip_fixup;
1002 c2i_unverified_inline_entry = __ pc();
1003 gen_inline_cache_check(masm, inline_entry_skip_fixup);
1004
1005 c2i_inline_entry = __ pc();
1006 Label unused;
1007 gen_c2i_adapter(masm, sig, regs, inline_entry_skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false);
1008 }
1009
1010 __ flush();
1011
1012 // The c2i adapter might safepoint and trigger a GC. The caller must make sure that
1013 // the GC knows about the location of oop argument locations passed to the c2i adapter.
1014
1015 bool caller_must_gc_arguments = (regs != regs_cc);
1016 new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words + 10, oop_maps, caller_must_gc_arguments);
1017
1018 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_inline_entry, c2i_inline_ro_entry, c2i_unverified_entry, c2i_unverified_inline_entry, c2i_no_clinit_check_entry);
1019 }
1020
1021 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
1022 VMRegPair *regs,
1023 VMRegPair *regs2,
1024 int total_args_passed) {
1025 assert(regs2 == NULL, "not needed on AArch64");
1026
1027 // We return the amount of VMRegImpl stack slots we need to reserve for all
1028 // the arguments NOT counting out_preserve_stack_slots.
1029
1030 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
1031 c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7
1032 };
1033 static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
1034 c_farg0, c_farg1, c_farg2, c_farg3,
1035 c_farg4, c_farg5, c_farg6, c_farg7
1036 };
1037
1038 uint int_args = 0;
1041
1042 for (int i = 0; i < total_args_passed; i++) {
1043 switch (sig_bt[i]) {
1044 case T_BOOLEAN:
1045 case T_CHAR:
1046 case T_BYTE:
1047 case T_SHORT:
1048 case T_INT:
1049 if (int_args < Argument::n_int_register_parameters_c) {
1050 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
1051 } else {
1052 regs[i].set1(VMRegImpl::stack2reg(stk_args));
1053 stk_args += 2;
1054 }
1055 break;
1056 case T_LONG:
1057 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
1058 // fall through
1059 case T_OBJECT:
1060 case T_ARRAY:
1061 case T_INLINE_TYPE:
1062 case T_ADDRESS:
1063 case T_METADATA:
1064 if (int_args < Argument::n_int_register_parameters_c) {
1065 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
1066 } else {
1067 regs[i].set2(VMRegImpl::stack2reg(stk_args));
1068 stk_args += 2;
1069 }
1070 break;
1071 case T_FLOAT:
1072 if (fp_args < Argument::n_float_register_parameters_c) {
1073 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
1074 } else {
1075 regs[i].set1(VMRegImpl::stack2reg(stk_args));
1076 stk_args += 2;
1077 }
1078 break;
1079 case T_DOUBLE:
1080 assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
1081 if (fp_args < Argument::n_float_register_parameters_c) {
1893 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
1894 } else if (out_regs[c_arg].first()->is_FloatRegister()) {
1895 freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
1896 }
1897 #endif /* ASSERT */
1898 switch (in_sig_bt[i]) {
1899 case T_ARRAY:
1900 if (is_critical_native) {
1901 unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
1902 c_arg++;
1903 #ifdef ASSERT
1904 if (out_regs[c_arg].first()->is_Register()) {
1905 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
1906 } else if (out_regs[c_arg].first()->is_FloatRegister()) {
1907 freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
1908 }
1909 #endif
1910 int_args++;
1911 break;
1912 }
1913 case T_INLINE_TYPE:
1914 case T_OBJECT:
1915 assert(!is_critical_native, "no oop arguments");
1916 object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1917 ((i == 0) && (!is_static)),
1918 &receiver_offset);
1919 int_args++;
1920 break;
1921 case T_VOID:
1922 break;
1923
1924 case T_FLOAT:
1925 float_move(masm, in_regs[i], out_regs[c_arg]);
1926 float_args++;
1927 break;
1928
1929 case T_DOUBLE:
1930 assert( i + 1 < total_in_args &&
1931 in_sig_bt[i + 1] == T_VOID &&
1932 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1933 double_move(masm, in_regs[i], out_regs[c_arg]);
2081
2082 rt_call(masm, native_func);
2083
2084 __ bind(native_return);
2085
2086 intptr_t return_pc = (intptr_t) __ pc();
2087 oop_maps->add_gc_map(return_pc - start, map);
2088
2089 // Unpack native results.
2090 switch (ret_type) {
2091 case T_BOOLEAN: __ c2bool(r0); break;
2092 case T_CHAR : __ ubfx(r0, r0, 0, 16); break;
2093 case T_BYTE : __ sbfx(r0, r0, 0, 8); break;
2094 case T_SHORT : __ sbfx(r0, r0, 0, 16); break;
2095 case T_INT : __ sbfx(r0, r0, 0, 32); break;
2096 case T_DOUBLE :
2097 case T_FLOAT :
2098 // Result is in v0 we'll save as needed
2099 break;
2100 case T_ARRAY: // Really a handle
2101 case T_INLINE_TYPE:
2102 case T_OBJECT: // Really a handle
2103 break; // can't de-handlize until after safepoint check
2104 case T_VOID: break;
2105 case T_LONG: break;
2106 default : ShouldNotReachHere();
2107 }
2108
2109 // Switch thread to "native transition" state before reading the synchronization state.
2110 // This additional state is necessary because reading and testing the synchronization
2111 // state is not atomic w.r.t. GC, as this scenario demonstrates:
2112 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2113 // VM thread changes sync state to synchronizing and suspends threads for GC.
2114 // Thread A is resumed to finish this native method, but doesn't block here since it
2115 // didn't see any synchronization is progress, and escapes.
2116 __ mov(rscratch1, _thread_in_native_trans);
2117
2118 __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
2119
2120 // Force this write out before the read below
2121 __ dmb(Assembler::ISH);
3307 #ifdef ASSERT
3308 __ str(zr, Address(rthread, JavaThread::exception_handler_pc_offset()));
3309 __ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
3310 #endif
3311 // Clear the exception oop so GC no longer processes it as a root.
3312 __ str(zr, Address(rthread, JavaThread::exception_oop_offset()));
3313
3314 // r0: exception oop
3315 // r8: exception handler
3316 // r4: exception pc
3317 // Jump to handler
3318
3319 __ br(r8);
3320
3321 // Make sure all code is generated
3322 masm->flush();
3323
3324 // Set exception blob
3325 _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
3326 }
3327
3328 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) {
3329 BufferBlob* buf = BufferBlob::create("inline types pack/unpack", 16 * K);
3330 CodeBuffer buffer(buf);
3331 short buffer_locs[20];
3332 buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs,
3333 sizeof(buffer_locs)/sizeof(relocInfo));
3334
3335 MacroAssembler _masm(&buffer);
3336 MacroAssembler* masm = &_masm;
3337
3338 const Array<SigEntry>* sig_vk = vk->extended_sig();
3339 const Array<VMRegPair>* regs = vk->return_regs();
3340
3341 int pack_fields_off = __ offset();
3342
3343 int j = 1;
3344 for (int i = 0; i < sig_vk->length(); i++) {
3345 BasicType bt = sig_vk->at(i)._bt;
3346 if (bt == T_INLINE_TYPE) {
3347 continue;
3348 }
3349 if (bt == T_VOID) {
3350 if (sig_vk->at(i-1)._bt == T_LONG ||
3351 sig_vk->at(i-1)._bt == T_DOUBLE) {
3352 j++;
3353 }
3354 continue;
3355 }
3356 int off = sig_vk->at(i)._offset;
3357 VMRegPair pair = regs->at(j);
3358 VMReg r_1 = pair.first();
3359 VMReg r_2 = pair.second();
3360 Address to(r0, off);
3361 if (bt == T_FLOAT) {
3362 __ strs(r_1->as_FloatRegister(), to);
3363 } else if (bt == T_DOUBLE) {
3364 __ strd(r_1->as_FloatRegister(), to);
3365 } else if (bt == T_OBJECT || bt == T_ARRAY) {
3366 Register val = r_1->as_Register();
3367 assert_different_registers(r0, val);
3368 // We don't need barriers because the destination is a newly allocated object.
3369 // Also, we cannot use store_heap_oop(to, val) because it uses r8 as tmp.
3370 if (UseCompressedOops) {
3371 __ encode_heap_oop(val);
3372 __ str(val, to);
3373 } else {
3374 __ str(val, to);
3375 }
3376 } else {
3377 assert(is_java_primitive(bt), "unexpected basic type");
3378 assert_different_registers(r0, r_1->as_Register());
3379 size_t size_in_bytes = type2aelembytes(bt);
3380 __ store_sized_value(to, r_1->as_Register(), size_in_bytes);
3381 }
3382 j++;
3383 }
3384 assert(j == regs->length(), "missed a field?");
3385
3386 __ ret(lr);
3387
3388 int unpack_fields_off = __ offset();
3389
3390 j = 1;
3391 for (int i = 0; i < sig_vk->length(); i++) {
3392 BasicType bt = sig_vk->at(i)._bt;
3393 if (bt == T_INLINE_TYPE) {
3394 continue;
3395 }
3396 if (bt == T_VOID) {
3397 if (sig_vk->at(i-1)._bt == T_LONG ||
3398 sig_vk->at(i-1)._bt == T_DOUBLE) {
3399 j++;
3400 }
3401 continue;
3402 }
3403 int off = sig_vk->at(i)._offset;
3404 VMRegPair pair = regs->at(j);
3405 VMReg r_1 = pair.first();
3406 VMReg r_2 = pair.second();
3407 Address from(r0, off);
3408 if (bt == T_FLOAT) {
3409 __ ldrs(r_1->as_FloatRegister(), from);
3410 } else if (bt == T_DOUBLE) {
3411 __ ldrd(r_1->as_FloatRegister(), from);
3412 } else if (bt == T_OBJECT || bt == T_ARRAY) {
3413 assert_different_registers(r0, r_1->as_Register());
3414 __ load_heap_oop(r_1->as_Register(), from);
3415 } else {
3416 assert(is_java_primitive(bt), "unexpected basic type");
3417 assert_different_registers(r0, r_1->as_Register());
3418
3419 size_t size_in_bytes = type2aelembytes(bt);
3420 __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN);
3421 }
3422 j++;
3423 }
3424 assert(j == regs->length(), "missed a field?");
3425
3426 __ ret(lr);
3427
3428 __ flush();
3429
3430 return BufferedInlineTypeBlob::create(&buffer, pack_fields_off, unpack_fields_off);
3431 }
3432 #endif // COMPILER2
|