< prev index next >

src/hotspot/cpu/x86/x86_64.ad

Print this page

  850     st->print("# stack alignment check");
  851 #endif
  852   }
  853   if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
  854     st->print("\n\t");
  855     st->print("cmpl    [r15_thread + #disarmed_offset], #disarmed_value\t");
  856     st->print("\n\t");
  857     st->print("je      fast_entry\t");
  858     st->print("\n\t");
  859     st->print("call    #nmethod_entry_barrier_stub\t");
  860     st->print("\n\tfast_entry:");
  861   }
  862   st->cr();
  863 }
  864 #endif
  865 
  866 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  867   Compile* C = ra_->C;
  868   MacroAssembler _masm(&cbuf);
  869 
  870   int framesize = C->output()->frame_size_in_bytes();
  871   int bangsize = C->output()->bang_size_in_bytes();
  872 
  873   if (C->clinit_barrier_on_entry()) {
  874     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  875     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  876 
  877     Label L_skip_barrier;
  878     Register klass = rscratch1;
  879 
  880     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
  881     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
  882 
  883     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  884 
  885     __ bind(L_skip_barrier);
  886   }
  887 
  888   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != NULL);






  889 
  890   C->output()->set_frame_complete(cbuf.insts_size());
  891 
  892   if (C->has_mach_constant_base_node()) {
  893     // NOTE: We set the table base offset here because users might be
  894     // emitted before MachConstantBaseNode.
  895     ConstantTable& constant_table = C->output()->constant_table();
  896     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  897   }
  898 }
  899 
  900 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
  901 {
  902   return MachNode::size(ra_); // too many variables; just compute it
  903                               // the hard way
  904 }
  905 
  906 int MachPrologNode::reloc() const
  907 {
  908   return 0; // a large enough number
  909 }
  910 
  911 //=============================================================================
  912 #ifndef PRODUCT
  913 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  914 {
  915   Compile* C = ra_->C;
  916   if (generate_vzeroupper(C)) {
  917     st->print("vzeroupper");
  918     st->cr(); st->print("\t");
  919   }
  920 
  921   int framesize = C->output()->frame_size_in_bytes();
  922   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  923   // Remove word for return adr already pushed
  924   // and RBP
  925   framesize -= 2*wordSize;

  933   if (do_polling() && C->is_method_compilation()) {
  934     st->print("\t");
  935     st->print_cr("movq    rscratch1, poll_offset[r15_thread] #polling_page_address\n\t"
  936                  "testl   rax, [rscratch1]\t"
  937                  "# Safepoint: poll for GC");
  938   }
  939 }
  940 #endif
  941 
  942 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  943 {
  944   Compile* C = ra_->C;
  945   MacroAssembler _masm(&cbuf);
  946 
  947   if (generate_vzeroupper(C)) {
  948     // Clear upper bits of YMM registers when current compiled code uses
  949     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  950     __ vzeroupper();
  951   }
  952 
  953   int framesize = C->output()->frame_size_in_bytes();
  954   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  955   // Remove word for return adr already pushed
  956   // and RBP
  957   framesize -= 2*wordSize;
  958 
  959   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  960 
  961   if (framesize) {
  962     emit_opcode(cbuf, Assembler::REX_W);
  963     if (framesize < 0x80) {
  964       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
  965       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
  966       emit_d8(cbuf, framesize);
  967     } else {
  968       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
  969       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
  970       emit_d32(cbuf, framesize);
  971     }
  972   }
  973 
  974   // popq rbp
  975   emit_opcode(cbuf, 0x58 | RBP_enc);
  976 
  977   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  978     __ reserved_stack_check();
  979   }
  980 
  981   if (do_polling() && C->is_method_compilation()) {
  982     MacroAssembler _masm(&cbuf);
  983     __ movq(rscratch1, Address(r15_thread, Thread::polling_page_offset()));
  984     __ relocate(relocInfo::poll_return_type);
  985     __ testl(rax, Address(rscratch1, 0));
  986   }
  987 }
  988 
  989 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
  990 {
  991   return MachNode::size(ra_); // too many variables; just compute it
  992                               // the hard way
  993 }
  994 
  995 int MachEpilogNode::reloc() const
  996 {
  997   return 2; // a large enough number
  998 }
  999 
 1000 const Pipeline* MachEpilogNode::pipeline() const
 1001 {
 1002   return MachNode::pipeline_class();
 1003 }
 1004 
 1005 //=============================================================================
 1006 
 1007 enum RC {
 1008   rc_bad,
 1009   rc_int,
 1010   rc_float,
 1011   rc_stack
 1012 };
 1013 
 1014 static enum RC rc_class(OptoReg::Name reg)

 1510     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1511     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1512     emit_rm(cbuf, 0x2, reg & 7, 0x04);
 1513     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1514     emit_d32(cbuf, offset);
 1515   } else {
 1516     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1517     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1518     emit_rm(cbuf, 0x1, reg & 7, 0x04);
 1519     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1520     emit_d8(cbuf, offset);
 1521   }
 1522 }
 1523 
 1524 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1525 {
 1526   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1527   return (offset < 0x80) ? 5 : 8; // REX
 1528 }
 1529 




























 1530 //=============================================================================
 1531 #ifndef PRODUCT
 1532 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1533 {
 1534   if (UseCompressedClassPointers) {
 1535     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1536     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 1537     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
 1538   } else {
 1539     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
 1540                  "# Inline cache check");
 1541   }
 1542   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1543   st->print_cr("\tnop\t# nops to align entry point");
 1544 }
 1545 #endif
 1546 
 1547 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1548 {
 1549   MacroAssembler masm(&cbuf);

 1552     masm.load_klass(rscratch1, j_rarg0, rscratch2);
 1553     masm.cmpptr(rax, rscratch1);
 1554   } else {
 1555     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1556   }
 1557 
 1558   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1559 
 1560   /* WARNING these NOPs are critical so that verified entry point is properly
 1561      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1562   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1563   if (OptoBreakpoint) {
 1564     // Leave space for int3
 1565     nops_cnt -= 1;
 1566   }
 1567   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1568   if (nops_cnt > 0)
 1569     masm.nop(nops_cnt);
 1570 }
 1571 
 1572 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 1573 {
 1574   return MachNode::size(ra_); // too many variables; just compute it
 1575                               // the hard way
 1576 }
 1577 
 1578 
 1579 //=============================================================================
 1580 
 1581 int Matcher::regnum_to_fpu_offset(int regnum)
 1582 {
 1583   return regnum - 32; // The FP registers are in the second chunk
 1584 }
 1585 
 1586 // This is UltraSparc specific, true just means we have fast l2f conversion
 1587 const bool Matcher::convL2FSupported(void) {
 1588   return true;
 1589 }
 1590 
 1591 // Is this branch offset short enough that a short branch can be used?
 1592 //
 1593 // NOTE: If the platform does not provide any short branch variants, then
 1594 //       this method should return false for offset 0.
 1595 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1596   // The passed offset is relative to address of the branch.
 1597   // On 86 a branch displacement is calculated relative to address
 1598   // of a next instruction.

 3841   %}
 3842 %}
 3843 
 3844 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3845 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3846 %{
 3847   constraint(ALLOC_IN_RC(ptr_reg));
 3848   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3849   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3850 
 3851   op_cost(10);
 3852   format %{"[$reg + $off + $idx << $scale]" %}
 3853   interface(MEMORY_INTER) %{
 3854     base($reg);
 3855     index($idx);
 3856     scale($scale);
 3857     disp($off);
 3858   %}
 3859 %}
 3860 
















 3861 // Indirect Narrow Oop Plus Offset Operand
 3862 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3863 // we can't free r12 even with CompressedOops::base() == NULL.
 3864 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3865   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3866   constraint(ALLOC_IN_RC(ptr_reg));
 3867   match(AddP (DecodeN reg) off);
 3868 
 3869   op_cost(10);
 3870   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 3871   interface(MEMORY_INTER) %{
 3872     base(0xc); // R12
 3873     index($reg);
 3874     scale(0x3);
 3875     disp($off);
 3876   %}
 3877 %}
 3878 
 3879 // Indirect Memory Operand
 3880 operand indirectNarrow(rRegN reg)

 4183     equal(0x4, "e");
 4184     not_equal(0x5, "ne");
 4185     less(0x2, "b");
 4186     greater_equal(0x3, "nb");
 4187     less_equal(0x6, "be");
 4188     greater(0x7, "nbe");
 4189     overflow(0x0, "o");
 4190     no_overflow(0x1, "no");
 4191   %}
 4192 %}
 4193 
 4194 //----------OPERAND CLASSES----------------------------------------------------
 4195 // Operand Classes are groups of operands that are used as to simplify
 4196 // instruction definitions by not requiring the AD writer to specify separate
 4197 // instructions for every form of operand when the instruction accepts
 4198 // multiple operand types with the same basic encoding and format.  The classic
 4199 // case of this is memory operands.
 4200 
 4201 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 4202                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 4203                indCompressedOopOffset,
 4204                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 4205                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 4206                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 4207 
 4208 //----------PIPELINE-----------------------------------------------------------
 4209 // Rules which define the behavior of the target architectures pipeline.
 4210 pipeline %{
 4211 
 4212 //----------ATTRIBUTES---------------------------------------------------------
 4213 attributes %{
 4214   variable_size_instructions;        // Fixed size instructions
 4215   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4216   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4217   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4218   instruction_fetch_units = 1;       // of 16 bytes
 4219 
 4220   // List of nop instructions
 4221   nops( MachNop );
 4222 %}
 4223 

 6667   format %{ "MEMBAR-storestore (empty encoding)" %}
 6668   ins_encode( );
 6669   ins_pipe(empty);
 6670 %}
 6671 
 6672 //----------Move Instructions--------------------------------------------------
 6673 
 6674 instruct castX2P(rRegP dst, rRegL src)
 6675 %{
 6676   match(Set dst (CastX2P src));
 6677 
 6678   format %{ "movq    $dst, $src\t# long->ptr" %}
 6679   ins_encode %{
 6680     if ($dst$$reg != $src$$reg) {
 6681       __ movptr($dst$$Register, $src$$Register);
 6682     }
 6683   %}
 6684   ins_pipe(ialu_reg_reg); // XXX
 6685 %}
 6686 













 6687 instruct castP2X(rRegL dst, rRegP src)
 6688 %{
 6689   match(Set dst (CastP2X src));
 6690 
 6691   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6692   ins_encode %{
 6693     if ($dst$$reg != $src$$reg) {
 6694       __ movptr($dst$$Register, $src$$Register);
 6695     }
 6696   %}
 6697   ins_pipe(ialu_reg_reg); // XXX
 6698 %}
 6699 



























 6700 // Convert oop into int for vectors alignment masking
 6701 instruct convP2I(rRegI dst, rRegP src)
 6702 %{
 6703   match(Set dst (ConvL2I (CastP2X src)));
 6704 
 6705   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6706   ins_encode %{
 6707     __ movl($dst$$Register, $src$$Register);
 6708   %}
 6709   ins_pipe(ialu_reg_reg); // XXX
 6710 %}
 6711 
 6712 // Convert compressed oop into int for vectors alignment masking
 6713 // in case of 32bit oops (heap < 4Gb).
 6714 instruct convN2I(rRegI dst, rRegN src)
 6715 %{
 6716   predicate(CompressedOops::shift() == 0);
 6717   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 6718 
 6719   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}

10893   ins_encode %{
10894     __ movdl($dst$$XMMRegister, $src$$Register);
10895   %}
10896   ins_pipe( pipe_slow );
10897 %}
10898 
10899 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10900   match(Set dst (MoveL2D src));
10901   effect(DEF dst, USE src);
10902   ins_cost(100);
10903   format %{ "movd    $dst,$src\t# MoveL2D" %}
10904   ins_encode %{
10905      __ movdq($dst$$XMMRegister, $src$$Register);
10906   %}
10907   ins_pipe( pipe_slow );
10908 %}
10909 
10910 
10911 // =======================================================================
10912 // fast clearing of an array
10913 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10914                   Universe dummy, rFlagsReg cr)
10915 %{
10916   predicate(!((ClearArrayNode*)n)->is_large());
10917   match(Set dummy (ClearArray cnt base));
10918   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
10919 
10920   format %{ $$template
10921     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10922     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10923     $$emit$$"jg      LARGE\n\t"
10924     $$emit$$"dec     rcx\n\t"
10925     $$emit$$"js      DONE\t# Zero length\n\t"
10926     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10927     $$emit$$"dec     rcx\n\t"
10928     $$emit$$"jge     LOOP\n\t"
10929     $$emit$$"jmp     DONE\n\t"
10930     $$emit$$"# LARGE:\n\t"
10931     if (UseFastStosb) {
10932        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10933        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10934     } else if (UseXMMForObjInit) {
10935        $$emit$$"mov     rdi,rax\n\t"
10936        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"

10937        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10938        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10939        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10940        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10941        $$emit$$"add     0x40,rax\n\t"
10942        $$emit$$"# L_zero_64_bytes:\n\t"
10943        $$emit$$"sub     0x8,rcx\n\t"
10944        $$emit$$"jge     L_loop\n\t"
10945        $$emit$$"add     0x4,rcx\n\t"
10946        $$emit$$"jl      L_tail\n\t"
10947        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10948        $$emit$$"add     0x20,rax\n\t"
10949        $$emit$$"sub     0x4,rcx\n\t"
10950        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10951        $$emit$$"add     0x4,rcx\n\t"
10952        $$emit$$"jle     L_end\n\t"
10953        $$emit$$"dec     rcx\n\t"
10954        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10955        $$emit$$"vmovq   xmm0,(rax)\n\t"
10956        $$emit$$"add     0x8,rax\n\t"
10957        $$emit$$"dec     rcx\n\t"
10958        $$emit$$"jge     L_sloop\n\t"
10959        $$emit$$"# L_end:\n\t"
10960     } else {
10961        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
10962     }
10963     $$emit$$"# DONE"
10964   %}
10965   ins_encode %{
10966     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10967                  $tmp$$XMMRegister, false);
10968   %}
10969   ins_pipe(pipe_slow);
10970 %}
10971 
10972 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
























































10973                         Universe dummy, rFlagsReg cr)
10974 %{
10975   predicate(((ClearArrayNode*)n)->is_large());
10976   match(Set dummy (ClearArray cnt base));
10977   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
10978 
10979   format %{ $$template
10980     if (UseFastStosb) {
10981        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
10982        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10983        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
10984     } else if (UseXMMForObjInit) {
10985        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
10986        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"

10987        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10988        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10989        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10990        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10991        $$emit$$"add     0x40,rax\n\t"
10992        $$emit$$"# L_zero_64_bytes:\n\t"
10993        $$emit$$"sub     0x8,rcx\n\t"
10994        $$emit$$"jge     L_loop\n\t"
10995        $$emit$$"add     0x4,rcx\n\t"
10996        $$emit$$"jl      L_tail\n\t"
10997        $$emit$$"vmovdqu ymm0,(rax)\n\t"
10998        $$emit$$"add     0x20,rax\n\t"
10999        $$emit$$"sub     0x4,rcx\n\t"
11000        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11001        $$emit$$"add     0x4,rcx\n\t"
11002        $$emit$$"jle     L_end\n\t"
11003        $$emit$$"dec     rcx\n\t"
11004        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11005        $$emit$$"vmovq   xmm0,(rax)\n\t"
11006        $$emit$$"add     0x8,rax\n\t"
11007        $$emit$$"dec     rcx\n\t"
11008        $$emit$$"jge     L_sloop\n\t"
11009        $$emit$$"# L_end:\n\t"
11010     } else {
11011        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
11012        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11013     }
11014   %}
11015   ins_encode %{
11016     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11017                  $tmp$$XMMRegister, true);














































11018   %}
11019   ins_pipe(pipe_slow);
11020 %}
11021 
11022 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11023                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11024 %{
11025   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11026   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11027   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11028 
11029   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11030   ins_encode %{
11031     __ string_compare($str1$$Register, $str2$$Register,
11032                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11033                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11034   %}
11035   ins_pipe( pipe_slow );
11036 %}
11037 

11562 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11563 %{
11564   match(Set cr (CmpI (AndI src con) zero));
11565 
11566   format %{ "testl   $src, $con" %}
11567   opcode(0xF7, 0x00);
11568   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11569   ins_pipe(ialu_cr_reg_imm);
11570 %}
11571 
11572 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11573 %{
11574   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11575 
11576   format %{ "testl   $src, $mem" %}
11577   opcode(0x85);
11578   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11579   ins_pipe(ialu_cr_reg_mem);
11580 %}
11581 











11582 // Unsigned compare Instructions; really, same as signed except they
11583 // produce an rFlagsRegU instead of rFlagsReg.
11584 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11585 %{
11586   match(Set cr (CmpU op1 op2));
11587 
11588   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11589   opcode(0x3B); /* Opcode 3B /r */
11590   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11591   ins_pipe(ialu_cr_reg_reg);
11592 %}
11593 
11594 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11595 %{
11596   match(Set cr (CmpU op1 op2));
11597 
11598   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11599   opcode(0x81,0x07); /* Opcode 81 /7 */
11600   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11601   ins_pipe(ialu_cr_reg_imm);

11874 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
11875 %{
11876   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
11877 
11878   format %{ "testq   $src, $mem" %}
11879   opcode(0x85);
11880   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
11881   ins_pipe(ialu_cr_reg_mem);
11882 %}
11883 
11884 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
11885 %{
11886   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
11887 
11888   format %{ "testq   $src, $mem" %}
11889   opcode(0x85);
11890   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
11891   ins_pipe(ialu_cr_reg_mem);
11892 %}
11893 











11894 // Manifest a CmpL result in an integer register.  Very painful.
11895 // This is the test to avoid.
11896 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
11897 %{
11898   match(Set dst (CmpL3 src1 src2));
11899   effect(KILL flags);
11900 
11901   ins_cost(275); // XXX
11902   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
11903             "movl    $dst, -1\n\t"
11904             "jl,s    done\n\t"
11905             "setne   $dst\n\t"
11906             "movzbl  $dst, $dst\n\t"
11907     "done:" %}
11908   ins_encode(cmpl3_flag(src1, src2, dst));
11909   ins_pipe(pipe_slow);
11910 %}
11911 
11912 // Unsigned long compare Instructions; really, same as signed long except they
11913 // produce an rFlagsRegU instead of rFlagsReg.

12541 
12542   ins_cost(300);
12543   format %{ "call,runtime " %}
12544   ins_encode(clear_avx, Java_To_Runtime(meth));
12545   ins_pipe(pipe_slow);
12546 %}
12547 
12548 // Call runtime without safepoint
12549 instruct CallLeafDirect(method meth)
12550 %{
12551   match(CallLeaf);
12552   effect(USE meth);
12553 
12554   ins_cost(300);
12555   format %{ "call_leaf,runtime " %}
12556   ins_encode(clear_avx, Java_To_Runtime(meth));
12557   ins_pipe(pipe_slow);
12558 %}
12559 
12560 // Call runtime without safepoint















12561 instruct CallLeafNoFPDirect(method meth)
12562 %{

12563   match(CallLeafNoFP);
12564   effect(USE meth);
12565 
12566   ins_cost(300);
12567   format %{ "call_leaf_nofp,runtime " %}
12568   ins_encode(clear_avx, Java_To_Runtime(meth));
12569   ins_pipe(pipe_slow);
12570 %}
12571 
12572 // Return Instruction
12573 // Remove the return address & jump to it.
12574 // Notice: We always emit a nop after a ret to make sure there is room
12575 // for safepoint patching
12576 instruct Ret()
12577 %{
12578   match(Return);
12579 
12580   format %{ "ret" %}
12581   opcode(0xC3);
12582   ins_encode(OpcP);

  850     st->print("# stack alignment check");
  851 #endif
  852   }
  853   if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
  854     st->print("\n\t");
  855     st->print("cmpl    [r15_thread + #disarmed_offset], #disarmed_value\t");
  856     st->print("\n\t");
  857     st->print("je      fast_entry\t");
  858     st->print("\n\t");
  859     st->print("call    #nmethod_entry_barrier_stub\t");
  860     st->print("\n\tfast_entry:");
  861   }
  862   st->cr();
  863 }
  864 #endif
  865 
  866 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  867   Compile* C = ra_->C;
  868   MacroAssembler _masm(&cbuf);
  869 



  870   if (C->clinit_barrier_on_entry()) {
  871     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  872     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  873 
  874     Label L_skip_barrier;
  875     Register klass = rscratch1;
  876 
  877     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
  878     __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
  879 
  880     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  881 
  882     __ bind(L_skip_barrier);
  883   }
  884 
  885   __ verified_entry(C);
  886   __ bind(*_verified_entry);
  887 
  888   if (C->stub_function() == NULL) {
  889     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
  890     bs->nmethod_entry_barrier(&_masm);
  891   }
  892 
  893   C->output()->set_frame_complete(cbuf.insts_size());
  894 
  895   if (C->has_mach_constant_base_node()) {
  896     // NOTE: We set the table base offset here because users might be
  897     // emitted before MachConstantBaseNode.
  898     ConstantTable& constant_table = C->output()->constant_table();
  899     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  900   }
  901 }
  902 






  903 int MachPrologNode::reloc() const
  904 {
  905   return 0; // a large enough number
  906 }
  907 
  908 //=============================================================================
  909 #ifndef PRODUCT
  910 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  911 {
  912   Compile* C = ra_->C;
  913   if (generate_vzeroupper(C)) {
  914     st->print("vzeroupper");
  915     st->cr(); st->print("\t");
  916   }
  917 
  918   int framesize = C->output()->frame_size_in_bytes();
  919   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  920   // Remove word for return adr already pushed
  921   // and RBP
  922   framesize -= 2*wordSize;

  930   if (do_polling() && C->is_method_compilation()) {
  931     st->print("\t");
  932     st->print_cr("movq    rscratch1, poll_offset[r15_thread] #polling_page_address\n\t"
  933                  "testl   rax, [rscratch1]\t"
  934                  "# Safepoint: poll for GC");
  935   }
  936 }
  937 #endif
  938 
  939 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
  940 {
  941   Compile* C = ra_->C;
  942   MacroAssembler _masm(&cbuf);
  943 
  944   if (generate_vzeroupper(C)) {
  945     // Clear upper bits of YMM registers when current compiled code uses
  946     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  947     __ vzeroupper();
  948   }
  949 
  950   // Subtract two words to account for return address and rbp
  951   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
  952   __ remove_frame(initial_framesize, C->needs_stack_repair(), C->output()->sp_inc_offset());




















  953 
  954   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  955     __ reserved_stack_check();
  956   }
  957 
  958   if (do_polling() && C->is_method_compilation()) {
  959     MacroAssembler _masm(&cbuf);
  960     __ movq(rscratch1, Address(r15_thread, Thread::polling_page_offset()));
  961     __ relocate(relocInfo::poll_return_type);
  962     __ testl(rax, Address(rscratch1, 0));
  963   }
  964 }
  965 






  966 int MachEpilogNode::reloc() const
  967 {
  968   return 2; // a large enough number
  969 }
  970 
  971 const Pipeline* MachEpilogNode::pipeline() const
  972 {
  973   return MachNode::pipeline_class();
  974 }
  975 
  976 //=============================================================================
  977 
  978 enum RC {
  979   rc_bad,
  980   rc_int,
  981   rc_float,
  982   rc_stack
  983 };
  984 
  985 static enum RC rc_class(OptoReg::Name reg)

 1481     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1482     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1483     emit_rm(cbuf, 0x2, reg & 7, 0x04);
 1484     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1485     emit_d32(cbuf, offset);
 1486   } else {
 1487     emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
 1488     emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
 1489     emit_rm(cbuf, 0x1, reg & 7, 0x04);
 1490     emit_rm(cbuf, 0x0, 0x04, RSP_enc);
 1491     emit_d8(cbuf, offset);
 1492   }
 1493 }
 1494 
 1495 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 1496 {
 1497   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1498   return (offset < 0x80) ? 5 : 8; // REX
 1499 }
 1500 
 1501 //=============================================================================
 1502 #ifndef PRODUCT
 1503 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1504 {
 1505   st->print_cr("MachVEPNode");
 1506 }
 1507 #endif
 1508 
 1509 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1510 {
 1511   MacroAssembler masm(&cbuf);
 1512   if (!_verified) {  
 1513     uint insts_size = cbuf.insts_size();
 1514     if (UseCompressedClassPointers) {
 1515       masm.load_klass(rscratch1, j_rarg0, rscratch2);
 1516       masm.cmpptr(rax, rscratch1);
 1517     } else {
 1518       masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1519     }
 1520     masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1521   } else {
 1522     // Unpack inline type args passed as oop and then jump to
 1523     // the verified entry point (skipping the unverified entry).
 1524     masm.unpack_inline_args(ra_->C, _receiver_only);
 1525     masm.jmp(*_verified_entry);
 1526   }
 1527 }
 1528 
 1529 //=============================================================================
 1530 #ifndef PRODUCT
 1531 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1532 {
 1533   if (UseCompressedClassPointers) {
 1534     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 1535     st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
 1536     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
 1537   } else {
 1538     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
 1539                  "# Inline cache check");
 1540   }
 1541   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 1542   st->print_cr("\tnop\t# nops to align entry point");
 1543 }
 1544 #endif
 1545 
 1546 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 1547 {
 1548   MacroAssembler masm(&cbuf);

 1551     masm.load_klass(rscratch1, j_rarg0, rscratch2);
 1552     masm.cmpptr(rax, rscratch1);
 1553   } else {
 1554     masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
 1555   }
 1556 
 1557   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1558 
 1559   /* WARNING these NOPs are critical so that verified entry point is properly
 1560      4 bytes aligned for patching by NativeJump::patch_verified_entry() */
 1561   int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
 1562   if (OptoBreakpoint) {
 1563     // Leave space for int3
 1564     nops_cnt -= 1;
 1565   }
 1566   nops_cnt &= 0x3; // Do not add nops if code is aligned.
 1567   if (nops_cnt > 0)
 1568     masm.nop(nops_cnt);
 1569 }
 1570 







 1571 //=============================================================================
 1572 
 1573 int Matcher::regnum_to_fpu_offset(int regnum)
 1574 {
 1575   return regnum - 32; // The FP registers are in the second chunk
 1576 }
 1577 
 1578 // This is UltraSparc specific, true just means we have fast l2f conversion
 1579 const bool Matcher::convL2FSupported(void) {
 1580   return true;
 1581 }
 1582 
 1583 // Is this branch offset short enough that a short branch can be used?
 1584 //
 1585 // NOTE: If the platform does not provide any short branch variants, then
 1586 //       this method should return false for offset 0.
 1587 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1588   // The passed offset is relative to address of the branch.
 1589   // On 86 a branch displacement is calculated relative to address
 1590   // of a next instruction.

 3833   %}
 3834 %}
 3835 
 3836 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 3837 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 3838 %{
 3839   constraint(ALLOC_IN_RC(ptr_reg));
 3840   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 3841   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 3842 
 3843   op_cost(10);
 3844   format %{"[$reg + $off + $idx << $scale]" %}
 3845   interface(MEMORY_INTER) %{
 3846     base($reg);
 3847     index($idx);
 3848     scale($scale);
 3849     disp($off);
 3850   %}
 3851 %}
 3852 
 3853 // Indirect Narrow Oop Operand
 3854 operand indCompressedOop(rRegN reg) %{
 3855   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3856   constraint(ALLOC_IN_RC(ptr_reg));
 3857   match(DecodeN reg);
 3858 
 3859   op_cost(10);
 3860   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
 3861   interface(MEMORY_INTER) %{
 3862     base(0xc); // R12
 3863     index($reg);
 3864     scale(0x3);
 3865     disp(0x0);
 3866   %}
 3867 %}
 3868 
 3869 // Indirect Narrow Oop Plus Offset Operand
 3870 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 3871 // we can't free r12 even with CompressedOops::base() == NULL.
 3872 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 3873   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 3874   constraint(ALLOC_IN_RC(ptr_reg));
 3875   match(AddP (DecodeN reg) off);
 3876 
 3877   op_cost(10);
 3878   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 3879   interface(MEMORY_INTER) %{
 3880     base(0xc); // R12
 3881     index($reg);
 3882     scale(0x3);
 3883     disp($off);
 3884   %}
 3885 %}
 3886 
 3887 // Indirect Memory Operand
 3888 operand indirectNarrow(rRegN reg)

 4191     equal(0x4, "e");
 4192     not_equal(0x5, "ne");
 4193     less(0x2, "b");
 4194     greater_equal(0x3, "nb");
 4195     less_equal(0x6, "be");
 4196     greater(0x7, "nbe");
 4197     overflow(0x0, "o");
 4198     no_overflow(0x1, "no");
 4199   %}
 4200 %}
 4201 
 4202 //----------OPERAND CLASSES----------------------------------------------------
 4203 // Operand Classes are groups of operands that are used as to simplify
 4204 // instruction definitions by not requiring the AD writer to specify separate
 4205 // instructions for every form of operand when the instruction accepts
 4206 // multiple operand types with the same basic encoding and format.  The classic
 4207 // case of this is memory operands.
 4208 
 4209 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 4210                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 4211                indCompressedOop, indCompressedOopOffset,
 4212                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 4213                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 4214                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 4215 
 4216 //----------PIPELINE-----------------------------------------------------------
 4217 // Rules which define the behavior of the target architectures pipeline.
 4218 pipeline %{
 4219 
 4220 //----------ATTRIBUTES---------------------------------------------------------
 4221 attributes %{
 4222   variable_size_instructions;        // Fixed size instructions
 4223   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4224   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4225   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4226   instruction_fetch_units = 1;       // of 16 bytes
 4227 
 4228   // List of nop instructions
 4229   nops( MachNop );
 4230 %}
 4231 

 6675   format %{ "MEMBAR-storestore (empty encoding)" %}
 6676   ins_encode( );
 6677   ins_pipe(empty);
 6678 %}
 6679 
 6680 //----------Move Instructions--------------------------------------------------
 6681 
 6682 instruct castX2P(rRegP dst, rRegL src)
 6683 %{
 6684   match(Set dst (CastX2P src));
 6685 
 6686   format %{ "movq    $dst, $src\t# long->ptr" %}
 6687   ins_encode %{
 6688     if ($dst$$reg != $src$$reg) {
 6689       __ movptr($dst$$Register, $src$$Register);
 6690     }
 6691   %}
 6692   ins_pipe(ialu_reg_reg); // XXX
 6693 %}
 6694 
 6695 instruct castN2X(rRegL dst, rRegN src)
 6696 %{
 6697   match(Set dst (CastP2X src));
 6698 
 6699   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6700   ins_encode %{
 6701     if ($dst$$reg != $src$$reg) {
 6702       __ movptr($dst$$Register, $src$$Register);
 6703     }
 6704   %}
 6705   ins_pipe(ialu_reg_reg); // XXX
 6706 %}
 6707 
 6708 instruct castP2X(rRegL dst, rRegP src)
 6709 %{
 6710   match(Set dst (CastP2X src));
 6711 
 6712   format %{ "movq    $dst, $src\t# ptr -> long" %}
 6713   ins_encode %{
 6714     if ($dst$$reg != $src$$reg) {
 6715       __ movptr($dst$$Register, $src$$Register);
 6716     }
 6717   %}
 6718   ins_pipe(ialu_reg_reg); // XXX
 6719 %}
 6720 
 6721 instruct castN2I(rRegI dst, rRegN src)
 6722 %{
 6723   match(Set dst (CastN2I src));
 6724 
 6725   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 6726   ins_encode %{
 6727     if ($dst$$reg != $src$$reg) {
 6728       __ movl($dst$$Register, $src$$Register);
 6729     }
 6730   %}
 6731   ins_pipe(ialu_reg_reg); // XXX
 6732 %}
 6733 
 6734 instruct castI2N(rRegN dst, rRegI src)
 6735 %{
 6736   match(Set dst (CastI2N src));
 6737 
 6738   format %{ "movl    $dst, $src\t# int -> compressed ptr" %}
 6739   ins_encode %{
 6740     if ($dst$$reg != $src$$reg) {
 6741       __ movl($dst$$Register, $src$$Register);
 6742     }
 6743   %}
 6744   ins_pipe(ialu_reg_reg); // XXX
 6745 %}
 6746 
 6747 
 6748 // Convert oop into int for vectors alignment masking
 6749 instruct convP2I(rRegI dst, rRegP src)
 6750 %{
 6751   match(Set dst (ConvL2I (CastP2X src)));
 6752 
 6753   format %{ "movl    $dst, $src\t# ptr -> int" %}
 6754   ins_encode %{
 6755     __ movl($dst$$Register, $src$$Register);
 6756   %}
 6757   ins_pipe(ialu_reg_reg); // XXX
 6758 %}
 6759 
 6760 // Convert compressed oop into int for vectors alignment masking
 6761 // in case of 32bit oops (heap < 4Gb).
 6762 instruct convN2I(rRegI dst, rRegN src)
 6763 %{
 6764   predicate(CompressedOops::shift() == 0);
 6765   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 6766 
 6767   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}

10941   ins_encode %{
10942     __ movdl($dst$$XMMRegister, $src$$Register);
10943   %}
10944   ins_pipe( pipe_slow );
10945 %}
10946 
10947 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10948   match(Set dst (MoveL2D src));
10949   effect(DEF dst, USE src);
10950   ins_cost(100);
10951   format %{ "movd    $dst,$src\t# MoveL2D" %}
10952   ins_encode %{
10953      __ movdq($dst$$XMMRegister, $src$$Register);
10954   %}
10955   ins_pipe( pipe_slow );
10956 %}
10957 
10958 
10959 // =======================================================================
10960 // fast clearing of an array
10961 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10962                   Universe dummy, rFlagsReg cr)
10963 %{
10964   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only());
10965   match(Set dummy (ClearArray (Binary cnt base) val));
10966   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
10967 
10968   format %{ $$template

10969     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
10970     $$emit$$"jg      LARGE\n\t"
10971     $$emit$$"dec     rcx\n\t"
10972     $$emit$$"js      DONE\t# Zero length\n\t"
10973     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
10974     $$emit$$"dec     rcx\n\t"
10975     $$emit$$"jge     LOOP\n\t"
10976     $$emit$$"jmp     DONE\n\t"
10977     $$emit$$"# LARGE:\n\t"
10978     if (UseFastStosb) {
10979        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
10980        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
10981     } else if (UseXMMForObjInit) {
10982        $$emit$$"movdq   $tmp, $val\n\t"
10983        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10984        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10985        $$emit$$"jmpq    L_zero_64_bytes\n\t"
10986        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10987        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10988        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10989        $$emit$$"add     0x40,rax\n\t"
10990        $$emit$$"# L_zero_64_bytes:\n\t"
10991        $$emit$$"sub     0x8,rcx\n\t"
10992        $$emit$$"jge     L_loop\n\t"
10993        $$emit$$"add     0x4,rcx\n\t"
10994        $$emit$$"jl      L_tail\n\t"
10995        $$emit$$"vmovdqu $tmp,(rax)\n\t"
10996        $$emit$$"add     0x20,rax\n\t"
10997        $$emit$$"sub     0x4,rcx\n\t"
10998        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10999        $$emit$$"add     0x4,rcx\n\t"
11000        $$emit$$"jle     L_end\n\t"
11001        $$emit$$"dec     rcx\n\t"
11002        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11003        $$emit$$"vmovq   xmm0,(rax)\n\t"
11004        $$emit$$"add     0x8,rax\n\t"
11005        $$emit$$"dec     rcx\n\t"
11006        $$emit$$"jge     L_sloop\n\t"
11007        $$emit$$"# L_end:\n\t"
11008     } else {
11009        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11010     }
11011     $$emit$$"# DONE"
11012   %}
11013   ins_encode %{
11014     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11015                  $tmp$$XMMRegister, false, false);
11016   %}
11017   ins_pipe(pipe_slow);
11018 %}
11019 
11020 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11021                   Universe dummy, rFlagsReg cr)
11022 %{
11023   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only());
11024   match(Set dummy (ClearArray (Binary cnt base) val));
11025   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
11026 
11027   format %{ $$template
11028     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
11029     $$emit$$"jg      LARGE\n\t"
11030     $$emit$$"dec     rcx\n\t"
11031     $$emit$$"js      DONE\t# Zero length\n\t"
11032     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
11033     $$emit$$"dec     rcx\n\t"
11034     $$emit$$"jge     LOOP\n\t"
11035     $$emit$$"jmp     DONE\n\t"
11036     $$emit$$"# LARGE:\n\t"
11037     if (UseXMMForObjInit) {
11038        $$emit$$"movdq   $tmp, $val\n\t"
11039        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11040        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11041        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11042        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11043        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11044        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11045        $$emit$$"add     0x40,rax\n\t"
11046        $$emit$$"# L_zero_64_bytes:\n\t"
11047        $$emit$$"sub     0x8,rcx\n\t"
11048        $$emit$$"jge     L_loop\n\t"
11049        $$emit$$"add     0x4,rcx\n\t"
11050        $$emit$$"jl      L_tail\n\t"
11051        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11052        $$emit$$"add     0x20,rax\n\t"
11053        $$emit$$"sub     0x4,rcx\n\t"
11054        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11055        $$emit$$"add     0x4,rcx\n\t"
11056        $$emit$$"jle     L_end\n\t"
11057        $$emit$$"dec     rcx\n\t"
11058        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11059        $$emit$$"vmovq   xmm0,(rax)\n\t"
11060        $$emit$$"add     0x8,rax\n\t"
11061        $$emit$$"dec     rcx\n\t"
11062        $$emit$$"jge     L_sloop\n\t"
11063        $$emit$$"# L_end:\n\t"
11064     } else {
11065        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
11066     }
11067     $$emit$$"# DONE"
11068   %}
11069   ins_encode %{
11070     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11071                  $tmp$$XMMRegister, false, true);
11072   %}
11073   ins_pipe(pipe_slow);
11074 %}
11075 
11076 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11077                         Universe dummy, rFlagsReg cr)
11078 %{
11079   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only());
11080   match(Set dummy (ClearArray (Binary cnt base) val));
11081   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
11082 
11083   format %{ $$template
11084     if (UseFastStosb) {

11085        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
11086        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
11087     } else if (UseXMMForObjInit) {
11088        $$emit$$"movdq   $tmp, $val\n\t"
11089        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11090        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11091        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11092        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11093        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11094        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11095        $$emit$$"add     0x40,rax\n\t"
11096        $$emit$$"# L_zero_64_bytes:\n\t"
11097        $$emit$$"sub     0x8,rcx\n\t"
11098        $$emit$$"jge     L_loop\n\t"
11099        $$emit$$"add     0x4,rcx\n\t"
11100        $$emit$$"jl      L_tail\n\t"
11101        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11102        $$emit$$"add     0x20,rax\n\t"
11103        $$emit$$"sub     0x4,rcx\n\t"
11104        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11105        $$emit$$"add     0x4,rcx\n\t"
11106        $$emit$$"jle     L_end\n\t"
11107        $$emit$$"dec     rcx\n\t"
11108        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11109        $$emit$$"vmovq   xmm0,(rax)\n\t"
11110        $$emit$$"add     0x8,rax\n\t"
11111        $$emit$$"dec     rcx\n\t"
11112        $$emit$$"jge     L_sloop\n\t"
11113        $$emit$$"# L_end:\n\t"
11114     } else {

11115        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11116     }
11117   %}
11118   ins_encode %{
11119     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11120                  $tmp$$XMMRegister, true, false);
11121   %}
11122   ins_pipe(pipe_slow);
11123 %}
11124 
11125 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val, 
11126                         Universe dummy, rFlagsReg cr)
11127 %{
11128   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only());
11129   match(Set dummy (ClearArray (Binary cnt base) val));
11130   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
11131 
11132   format %{ $$template
11133     if (UseXMMForObjInit) {
11134        $$emit$$"movdq   $tmp, $val\n\t"
11135        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11136        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11137        $$emit$$"jmpq    L_zero_64_bytes\n\t"
11138        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11139        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11140        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11141        $$emit$$"add     0x40,rax\n\t"
11142        $$emit$$"# L_zero_64_bytes:\n\t"
11143        $$emit$$"sub     0x8,rcx\n\t"
11144        $$emit$$"jge     L_loop\n\t"
11145        $$emit$$"add     0x4,rcx\n\t"
11146        $$emit$$"jl      L_tail\n\t"
11147        $$emit$$"vmovdqu $tmp,(rax)\n\t"
11148        $$emit$$"add     0x20,rax\n\t"
11149        $$emit$$"sub     0x4,rcx\n\t"
11150        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11151        $$emit$$"add     0x4,rcx\n\t"
11152        $$emit$$"jle     L_end\n\t"
11153        $$emit$$"dec     rcx\n\t"
11154        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11155        $$emit$$"vmovq   xmm0,(rax)\n\t"
11156        $$emit$$"add     0x8,rax\n\t"
11157        $$emit$$"dec     rcx\n\t"
11158        $$emit$$"jge     L_sloop\n\t"
11159        $$emit$$"# L_end:\n\t"
11160     } else {
11161        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
11162     }
11163   %}
11164   ins_encode %{
11165     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register, 
11166                  $tmp$$XMMRegister, true, true);
11167   %}
11168   ins_pipe(pipe_slow);
11169 %}
11170 
11171 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11172                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
11173 %{
11174   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11175   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11176   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11177 
11178   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11179   ins_encode %{
11180     __ string_compare($str1$$Register, $str2$$Register,
11181                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11182                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11183   %}
11184   ins_pipe( pipe_slow );
11185 %}
11186 

11711 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11712 %{
11713   match(Set cr (CmpI (AndI src con) zero));
11714 
11715   format %{ "testl   $src, $con" %}
11716   opcode(0xF7, 0x00);
11717   ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11718   ins_pipe(ialu_cr_reg_imm);
11719 %}
11720 
11721 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11722 %{
11723   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11724 
11725   format %{ "testl   $src, $mem" %}
11726   opcode(0x85);
11727   ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11728   ins_pipe(ialu_cr_reg_mem);
11729 %}
11730 
11731 // Fold array properties check
11732 instruct testI_mem_imm(rFlagsReg cr, memory mem, immI con, immI0 zero)
11733 %{
11734   match(Set cr (CmpI (AndI (CastN2I (LoadNKlass mem)) con) zero));
11735 
11736   format %{ "testl   $mem, $con" %}
11737   opcode(0xF7, 0x00);
11738   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(con));
11739   ins_pipe(ialu_mem_imm);
11740 %}
11741 
11742 // Unsigned compare Instructions; really, same as signed except they
11743 // produce an rFlagsRegU instead of rFlagsReg.
11744 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11745 %{
11746   match(Set cr (CmpU op1 op2));
11747 
11748   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11749   opcode(0x3B); /* Opcode 3B /r */
11750   ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11751   ins_pipe(ialu_cr_reg_reg);
11752 %}
11753 
11754 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11755 %{
11756   match(Set cr (CmpU op1 op2));
11757 
11758   format %{ "cmpl    $op1, $op2\t# unsigned" %}
11759   opcode(0x81,0x07); /* Opcode 81 /7 */
11760   ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11761   ins_pipe(ialu_cr_reg_imm);

12034 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12035 %{
12036   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12037 
12038   format %{ "testq   $src, $mem" %}
12039   opcode(0x85);
12040   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12041   ins_pipe(ialu_cr_reg_mem);
12042 %}
12043 
12044 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
12045 %{
12046   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
12047 
12048   format %{ "testq   $src, $mem" %}
12049   opcode(0x85);
12050   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12051   ins_pipe(ialu_cr_reg_mem);
12052 %}
12053 
12054 // Fold array properties check
12055 instruct testL_reg_mem3(rFlagsReg cr, memory mem, rRegL src, immL0 zero)
12056 %{
12057   match(Set cr (CmpL (AndL (CastP2X (LoadKlass mem)) src) zero));
12058 
12059   format %{ "testq   $src, $mem\t# test array properties" %}
12060   opcode(0x85);
12061   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12062   ins_pipe(ialu_cr_reg_mem);
12063 %}
12064 
12065 // Manifest a CmpL result in an integer register.  Very painful.
12066 // This is the test to avoid.
12067 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12068 %{
12069   match(Set dst (CmpL3 src1 src2));
12070   effect(KILL flags);
12071 
12072   ins_cost(275); // XXX
12073   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
12074             "movl    $dst, -1\n\t"
12075             "jl,s    done\n\t"
12076             "setne   $dst\n\t"
12077             "movzbl  $dst, $dst\n\t"
12078     "done:" %}
12079   ins_encode(cmpl3_flag(src1, src2, dst));
12080   ins_pipe(pipe_slow);
12081 %}
12082 
12083 // Unsigned long compare Instructions; really, same as signed long except they
12084 // produce an rFlagsRegU instead of rFlagsReg.

12712 
12713   ins_cost(300);
12714   format %{ "call,runtime " %}
12715   ins_encode(clear_avx, Java_To_Runtime(meth));
12716   ins_pipe(pipe_slow);
12717 %}
12718 
12719 // Call runtime without safepoint
12720 instruct CallLeafDirect(method meth)
12721 %{
12722   match(CallLeaf);
12723   effect(USE meth);
12724 
12725   ins_cost(300);
12726   format %{ "call_leaf,runtime " %}
12727   ins_encode(clear_avx, Java_To_Runtime(meth));
12728   ins_pipe(pipe_slow);
12729 %}
12730 
12731 // Call runtime without safepoint
12732 // entry point is null, target holds the address to call
12733 instruct CallLeafNoFPInDirect(rRegP target)
12734 %{
12735   predicate(n->as_Call()->entry_point() == NULL);
12736   match(CallLeafNoFP target);
12737 
12738   ins_cost(300);
12739   format %{ "call_leaf_nofp,runtime indirect " %}
12740   ins_encode %{
12741      __ call($target$$Register);
12742   %}
12743 
12744   ins_pipe(pipe_slow);
12745 %}
12746 
12747 instruct CallLeafNoFPDirect(method meth)
12748 %{
12749   predicate(n->as_Call()->entry_point() != NULL);
12750   match(CallLeafNoFP);
12751   effect(USE meth);
12752 
12753   ins_cost(300);
12754   format %{ "call_leaf_nofp,runtime " %}
12755   ins_encode(clear_avx, Java_To_Runtime(meth));
12756   ins_pipe(pipe_slow);
12757 %}
12758 
12759 // Return Instruction
12760 // Remove the return address & jump to it.
12761 // Notice: We always emit a nop after a ret to make sure there is room
12762 // for safepoint patching
12763 instruct Ret()
12764 %{
12765   match(Return);
12766 
12767   format %{ "ret" %}
12768   opcode(0xC3);
12769   ins_encode(OpcP);
< prev index next >