< prev index next >

src/hotspot/cpu/x86/x86_64.ad

Print this page
*** 865,13 ***
  
  void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
    Compile* C = ra_->C;
    MacroAssembler _masm(&cbuf);
  
-   int framesize = C->output()->frame_size_in_bytes();
-   int bangsize = C->output()->bang_size_in_bytes();
- 
    if (C->clinit_barrier_on_entry()) {
      assert(VM_Version::supports_fast_class_init_checks(), "sanity");
      assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
  
      Label L_skip_barrier;
--- 865,10 ---

*** 883,11 ***
      __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  
      __ bind(L_skip_barrier);
    }
  
!   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != NULL);
  
    C->output()->set_frame_complete(cbuf.insts_size());
  
    if (C->has_mach_constant_base_node()) {
      // NOTE: We set the table base offset here because users might be
--- 880,17 ---
      __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
  
      __ bind(L_skip_barrier);
    }
  
!   __ verified_entry(C);
+   __ bind(*_verified_entry);
+ 
+   if (C->stub_function() == NULL) {
+     BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+     bs->nmethod_entry_barrier(&_masm);
+   }
  
    C->output()->set_frame_complete(cbuf.insts_size());
  
    if (C->has_mach_constant_base_node()) {
      // NOTE: We set the table base offset here because users might be

*** 895,16 ***
      ConstantTable& constant_table = C->output()->constant_table();
      constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
    }
  }
  
- uint MachPrologNode::size(PhaseRegAlloc* ra_) const
- {
-   return MachNode::size(ra_); // too many variables; just compute it
-                               // the hard way
- }
- 
  int MachPrologNode::reloc() const
  {
    return 0; // a large enough number
  }
  
--- 898,10 ---

*** 948,33 ***
      // Clear upper bits of YMM registers when current compiled code uses
      // wide vectors to avoid AVX <-> SSE transition penalty during call.
      __ vzeroupper();
    }
  
!   int framesize = C->output()->frame_size_in_bytes();
!   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
!   // Remove word for return adr already pushed
-   // and RBP
-   framesize -= 2*wordSize;
- 
-   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
- 
-   if (framesize) {
-     emit_opcode(cbuf, Assembler::REX_W);
-     if (framesize < 0x80) {
-       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
-       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
-       emit_d8(cbuf, framesize);
-     } else {
-       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
-       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
-       emit_d32(cbuf, framesize);
-     }
-   }
- 
-   // popq rbp
-   emit_opcode(cbuf, 0x58 | RBP_enc);
  
    if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
      __ reserved_stack_check();
    }
  
--- 945,13 ---
      // Clear upper bits of YMM registers when current compiled code uses
      // wide vectors to avoid AVX <-> SSE transition penalty during call.
      __ vzeroupper();
    }
  
!   // Subtract two words to account for return address and rbp
!   int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
!   __ remove_frame(initial_framesize, C->needs_stack_repair(), C->output()->sp_inc_offset());
  
    if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
      __ reserved_stack_check();
    }
  

*** 984,16 ***
      __ relocate(relocInfo::poll_return_type);
      __ testl(rax, Address(rscratch1, 0));
    }
  }
  
- uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
- {
-   return MachNode::size(ra_); // too many variables; just compute it
-                               // the hard way
- }
- 
  int MachEpilogNode::reloc() const
  {
    return 2; // a large enough number
  }
  
--- 961,10 ---

*** 1525,10 ***
--- 1496,38 ---
  {
    int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
    return (offset < 0x80) ? 5 : 8; // REX
  }
  
+ //=============================================================================
+ #ifndef PRODUCT
+ void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
+ {
+   st->print_cr("MachVEPNode");
+ }
+ #endif
+ 
+ void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
+ {
+   MacroAssembler masm(&cbuf);
+   if (!_verified) {  
+     uint insts_size = cbuf.insts_size();
+     if (UseCompressedClassPointers) {
+       masm.load_klass(rscratch1, j_rarg0, rscratch2);
+       masm.cmpptr(rax, rscratch1);
+     } else {
+       masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
+     }
+     masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+   } else {
+     // Unpack inline type args passed as oop and then jump to
+     // the verified entry point (skipping the unverified entry).
+     masm.unpack_inline_args(ra_->C, _receiver_only);
+     masm.jmp(*_verified_entry);
+   }
+ }
+ 
  //=============================================================================
  #ifndef PRODUCT
  void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
  {
    if (UseCompressedClassPointers) {

*** 1567,17 ***
    nops_cnt &= 0x3; // Do not add nops if code is aligned.
    if (nops_cnt > 0)
      masm.nop(nops_cnt);
  }
  
- uint MachUEPNode::size(PhaseRegAlloc* ra_) const
- {
-   return MachNode::size(ra_); // too many variables; just compute it
-                               // the hard way
- }
- 
- 
  //=============================================================================
  
  int Matcher::regnum_to_fpu_offset(int regnum)
  {
    return regnum - 32; // The FP registers are in the second chunk
--- 1566,10 ---

*** 3856,10 ***
--- 3848,26 ---
      scale($scale);
      disp($off);
    %}
  %}
  
+ // Indirect Narrow Oop Operand
+ operand indCompressedOop(rRegN reg) %{
+   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
+   constraint(ALLOC_IN_RC(ptr_reg));
+   match(DecodeN reg);
+ 
+   op_cost(10);
+   format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
+   interface(MEMORY_INTER) %{
+     base(0xc); // R12
+     index($reg);
+     scale(0x3);
+     disp(0x0);
+   %}
+ %}
+ 
  // Indirect Narrow Oop Plus Offset Operand
  // Note: x86 architecture doesn't support "scale * index + offset" without a base
  // we can't free r12 even with CompressedOops::base() == NULL.
  operand indCompressedOopOffset(rRegN reg, immL32 off) %{
    predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));

*** 4198,11 ***
  // multiple operand types with the same basic encoding and format.  The classic
  // case of this is memory operands.
  
  opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
                 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
!                indCompressedOopOffset,
                 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
                 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
                 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
  
  //----------PIPELINE-----------------------------------------------------------
--- 4206,11 ---
  // multiple operand types with the same basic encoding and format.  The classic
  // case of this is memory operands.
  
  opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
                 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
!                indCompressedOop, indCompressedOopOffset,
                 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
                 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
                 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
  
  //----------PIPELINE-----------------------------------------------------------

*** 6682,10 ***
--- 6690,23 ---
      }
    %}
    ins_pipe(ialu_reg_reg); // XXX
  %}
  
+ instruct castN2X(rRegL dst, rRegN src)
+ %{
+   match(Set dst (CastP2X src));
+ 
+   format %{ "movq    $dst, $src\t# ptr -> long" %}
+   ins_encode %{
+     if ($dst$$reg != $src$$reg) {
+       __ movptr($dst$$Register, $src$$Register);
+     }
+   %}
+   ins_pipe(ialu_reg_reg); // XXX
+ %}
+ 
  instruct castP2X(rRegL dst, rRegP src)
  %{
    match(Set dst (CastP2X src));
  
    format %{ "movq    $dst, $src\t# ptr -> long" %}

*** 6695,10 ***
--- 6716,37 ---
      }
    %}
    ins_pipe(ialu_reg_reg); // XXX
  %}
  
+ instruct castN2I(rRegI dst, rRegN src)
+ %{
+   match(Set dst (CastN2I src));
+ 
+   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
+   ins_encode %{
+     if ($dst$$reg != $src$$reg) {
+       __ movl($dst$$Register, $src$$Register);
+     }
+   %}
+   ins_pipe(ialu_reg_reg); // XXX
+ %}
+ 
+ instruct castI2N(rRegN dst, rRegI src)
+ %{
+   match(Set dst (CastI2N src));
+ 
+   format %{ "movl    $dst, $src\t# int -> compressed ptr" %}
+   ins_encode %{
+     if ($dst$$reg != $src$$reg) {
+       __ movl($dst$$Register, $src$$Register);
+     }
+   %}
+   ins_pipe(ialu_reg_reg); // XXX
+ %}
+ 
+ 
  // Convert oop into int for vectors alignment masking
  instruct convP2I(rRegI dst, rRegP src)
  %{
    match(Set dst (ConvL2I (CastP2X src)));
  

*** 10908,19 ***
  %}
  
  
  // =======================================================================
  // fast clearing of an array
! instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
                    Universe dummy, rFlagsReg cr)
  %{
!   predicate(!((ClearArrayNode*)n)->is_large());
!   match(Set dummy (ClearArray cnt base));
!   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
  
    format %{ $$template
-     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
      $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
      $$emit$$"jg      LARGE\n\t"
      $$emit$$"dec     rcx\n\t"
      $$emit$$"js      DONE\t# Zero length\n\t"
      $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
--- 10956,18 ---
  %}
  
  
  // =======================================================================
  // fast clearing of an array
! instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
                    Universe dummy, rFlagsReg cr)
  %{
!   predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only());
!   match(Set dummy (ClearArray (Binary cnt base) val));
!   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
  
    format %{ $$template
      $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
      $$emit$$"jg      LARGE\n\t"
      $$emit$$"dec     rcx\n\t"
      $$emit$$"js      DONE\t# Zero length\n\t"
      $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"

*** 10930,23 ***
      $$emit$$"# LARGE:\n\t"
      if (UseFastStosb) {
         $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
         $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
      } else if (UseXMMForObjInit) {
!        $$emit$$"mov     rdi,rax\n\t"
!        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
         $$emit$$"jmpq    L_zero_64_bytes\n\t"
         $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
!        $$emit$$"vmovdqu ymm0,(rax)\n\t"
!        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
         $$emit$$"add     0x40,rax\n\t"
         $$emit$$"# L_zero_64_bytes:\n\t"
         $$emit$$"sub     0x8,rcx\n\t"
         $$emit$$"jge     L_loop\n\t"
         $$emit$$"add     0x4,rcx\n\t"
         $$emit$$"jl      L_tail\n\t"
!        $$emit$$"vmovdqu ymm0,(rax)\n\t"
         $$emit$$"add     0x20,rax\n\t"
         $$emit$$"sub     0x4,rcx\n\t"
         $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
         $$emit$$"add     0x4,rcx\n\t"
         $$emit$$"jle     L_end\n\t"
--- 10977,24 ---
      $$emit$$"# LARGE:\n\t"
      if (UseFastStosb) {
         $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
         $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
      } else if (UseXMMForObjInit) {
!        $$emit$$"movdq   $tmp, $val\n\t"
!        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
+        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
         $$emit$$"jmpq    L_zero_64_bytes\n\t"
         $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
!        $$emit$$"vmovdqu $tmp,(rax)\n\t"
!        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
         $$emit$$"add     0x40,rax\n\t"
         $$emit$$"# L_zero_64_bytes:\n\t"
         $$emit$$"sub     0x8,rcx\n\t"
         $$emit$$"jge     L_loop\n\t"
         $$emit$$"add     0x4,rcx\n\t"
         $$emit$$"jl      L_tail\n\t"
!        $$emit$$"vmovdqu $tmp,(rax)\n\t"
         $$emit$$"add     0x20,rax\n\t"
         $$emit$$"sub     0x4,rcx\n\t"
         $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
         $$emit$$"add     0x4,rcx\n\t"
         $$emit$$"jle     L_end\n\t"

*** 10961,42 ***
         $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
      }
      $$emit$$"# DONE"
    %}
    ins_encode %{
!     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
!                  $tmp$$XMMRegister, false);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
                          Universe dummy, rFlagsReg cr)
  %{
!   predicate(((ClearArrayNode*)n)->is_large());
!   match(Set dummy (ClearArray cnt base));
!   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
  
    format %{ $$template
      if (UseFastStosb) {
-        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
         $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
         $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
      } else if (UseXMMForObjInit) {
!        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
!        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
         $$emit$$"jmpq    L_zero_64_bytes\n\t"
         $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
!        $$emit$$"vmovdqu ymm0,(rax)\n\t"
!        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
         $$emit$$"add     0x40,rax\n\t"
         $$emit$$"# L_zero_64_bytes:\n\t"
         $$emit$$"sub     0x8,rcx\n\t"
         $$emit$$"jge     L_loop\n\t"
         $$emit$$"add     0x4,rcx\n\t"
         $$emit$$"jl      L_tail\n\t"
!        $$emit$$"vmovdqu ymm0,(rax)\n\t"
         $$emit$$"add     0x20,rax\n\t"
         $$emit$$"sub     0x4,rcx\n\t"
         $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
         $$emit$$"add     0x4,rcx\n\t"
         $$emit$$"jle     L_end\n\t"
--- 11009,98 ---
         $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
      }
      $$emit$$"# DONE"
    %}
    ins_encode %{
!     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
!                  $tmp$$XMMRegister, false, false);
    %}
    ins_pipe(pipe_slow);
  %}
  
! instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
+                   Universe dummy, rFlagsReg cr)
+ %{
+   predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only());
+   match(Set dummy (ClearArray (Binary cnt base) val));
+   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
+ 
+   format %{ $$template
+     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
+     $$emit$$"jg      LARGE\n\t"
+     $$emit$$"dec     rcx\n\t"
+     $$emit$$"js      DONE\t# Zero length\n\t"
+     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
+     $$emit$$"dec     rcx\n\t"
+     $$emit$$"jge     LOOP\n\t"
+     $$emit$$"jmp     DONE\n\t"
+     $$emit$$"# LARGE:\n\t"
+     if (UseXMMForObjInit) {
+        $$emit$$"movdq   $tmp, $val\n\t"
+        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
+        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
+        $$emit$$"jmpq    L_zero_64_bytes\n\t"
+        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
+        $$emit$$"vmovdqu $tmp,(rax)\n\t"
+        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
+        $$emit$$"add     0x40,rax\n\t"
+        $$emit$$"# L_zero_64_bytes:\n\t"
+        $$emit$$"sub     0x8,rcx\n\t"
+        $$emit$$"jge     L_loop\n\t"
+        $$emit$$"add     0x4,rcx\n\t"
+        $$emit$$"jl      L_tail\n\t"
+        $$emit$$"vmovdqu $tmp,(rax)\n\t"
+        $$emit$$"add     0x20,rax\n\t"
+        $$emit$$"sub     0x4,rcx\n\t"
+        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
+        $$emit$$"add     0x4,rcx\n\t"
+        $$emit$$"jle     L_end\n\t"
+        $$emit$$"dec     rcx\n\t"
+        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
+        $$emit$$"vmovq   xmm0,(rax)\n\t"
+        $$emit$$"add     0x8,rax\n\t"
+        $$emit$$"dec     rcx\n\t"
+        $$emit$$"jge     L_sloop\n\t"
+        $$emit$$"# L_end:\n\t"
+     } else {
+        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
+     }
+     $$emit$$"# DONE"
+   %}
+   ins_encode %{
+     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
+                  $tmp$$XMMRegister, false, true);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
                          Universe dummy, rFlagsReg cr)
  %{
!   predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only());
!   match(Set dummy (ClearArray (Binary cnt base) val));
!   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
  
    format %{ $$template
      if (UseFastStosb) {
         $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
         $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
      } else if (UseXMMForObjInit) {
!        $$emit$$"movdq   $tmp, $val\n\t"
!        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
+        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
         $$emit$$"jmpq    L_zero_64_bytes\n\t"
         $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
!        $$emit$$"vmovdqu $tmp,(rax)\n\t"
!        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
         $$emit$$"add     0x40,rax\n\t"
         $$emit$$"# L_zero_64_bytes:\n\t"
         $$emit$$"sub     0x8,rcx\n\t"
         $$emit$$"jge     L_loop\n\t"
         $$emit$$"add     0x4,rcx\n\t"
         $$emit$$"jl      L_tail\n\t"
!        $$emit$$"vmovdqu $tmp,(rax)\n\t"
         $$emit$$"add     0x20,rax\n\t"
         $$emit$$"sub     0x4,rcx\n\t"
         $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
         $$emit$$"add     0x4,rcx\n\t"
         $$emit$$"jle     L_end\n\t"

*** 11006,17 ***
         $$emit$$"add     0x8,rax\n\t"
         $$emit$$"dec     rcx\n\t"
         $$emit$$"jge     L_sloop\n\t"
         $$emit$$"# L_end:\n\t"
      } else {
-        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
         $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
      }
    %}
    ins_encode %{
!     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
!                  $tmp$$XMMRegister, true);
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
--- 11110,62 ---
         $$emit$$"add     0x8,rax\n\t"
         $$emit$$"dec     rcx\n\t"
         $$emit$$"jge     L_sloop\n\t"
         $$emit$$"# L_end:\n\t"
      } else {
         $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
      }
    %}
    ins_encode %{
!     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
!                  $tmp$$XMMRegister, true, false);
+   %}
+   ins_pipe(pipe_slow);
+ %}
+ 
+ instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val, 
+                         Universe dummy, rFlagsReg cr)
+ %{
+   predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only());
+   match(Set dummy (ClearArray (Binary cnt base) val));
+   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
+ 
+   format %{ $$template
+     if (UseXMMForObjInit) {
+        $$emit$$"movdq   $tmp, $val\n\t"
+        $$emit$$"punpcklqdq $tmp, $tmp\n\t"
+        $$emit$$"vinserti128_high $tmp, $tmp\n\t"
+        $$emit$$"jmpq    L_zero_64_bytes\n\t"
+        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
+        $$emit$$"vmovdqu $tmp,(rax)\n\t"
+        $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
+        $$emit$$"add     0x40,rax\n\t"
+        $$emit$$"# L_zero_64_bytes:\n\t"
+        $$emit$$"sub     0x8,rcx\n\t"
+        $$emit$$"jge     L_loop\n\t"
+        $$emit$$"add     0x4,rcx\n\t"
+        $$emit$$"jl      L_tail\n\t"
+        $$emit$$"vmovdqu $tmp,(rax)\n\t"
+        $$emit$$"add     0x20,rax\n\t"
+        $$emit$$"sub     0x4,rcx\n\t"
+        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
+        $$emit$$"add     0x4,rcx\n\t"
+        $$emit$$"jle     L_end\n\t"
+        $$emit$$"dec     rcx\n\t"
+        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
+        $$emit$$"vmovq   xmm0,(rax)\n\t"
+        $$emit$$"add     0x8,rax\n\t"
+        $$emit$$"dec     rcx\n\t"
+        $$emit$$"jge     L_sloop\n\t"
+        $$emit$$"# L_end:\n\t"
+     } else {
+        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
+     }
+   %}
+   ins_encode %{
+     __ clear_mem($base$$Register, $cnt$$Register, $val$$Register, 
+                  $tmp$$XMMRegister, true, true);
    %}
    ins_pipe(pipe_slow);
  %}
  
  instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,

*** 11577,10 ***
--- 11726,21 ---
    opcode(0x85);
    ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
    ins_pipe(ialu_cr_reg_mem);
  %}
  
+ // Fold array properties check
+ instruct testI_mem_imm(rFlagsReg cr, memory mem, immI con, immI0 zero)
+ %{
+   match(Set cr (CmpI (AndI (CastN2I (LoadNKlass mem)) con) zero));
+ 
+   format %{ "testl   $mem, $con" %}
+   opcode(0xF7, 0x00);
+   ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(con));
+   ins_pipe(ialu_mem_imm);
+ %}
+ 
  // Unsigned compare Instructions; really, same as signed except they
  // produce an rFlagsRegU instead of rFlagsReg.
  instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
  %{
    match(Set cr (CmpU op1 op2));

*** 11889,10 ***
--- 12049,21 ---
    opcode(0x85);
    ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
    ins_pipe(ialu_cr_reg_mem);
  %}
  
+ // Fold array properties check
+ instruct testL_reg_mem3(rFlagsReg cr, memory mem, rRegL src, immL0 zero)
+ %{
+   match(Set cr (CmpL (AndL (CastP2X (LoadKlass mem)) src) zero));
+ 
+   format %{ "testq   $src, $mem\t# test array properties" %}
+   opcode(0x85);
+   ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
+   ins_pipe(ialu_cr_reg_mem);
+ %}
+ 
  // Manifest a CmpL result in an integer register.  Very painful.
  // This is the test to avoid.
  instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
  %{
    match(Set dst (CmpL3 src1 src2));

*** 12556,12 ***
--- 12727,28 ---
    ins_encode(clear_avx, Java_To_Runtime(meth));
    ins_pipe(pipe_slow);
  %}
  
  // Call runtime without safepoint
+ // entry point is null, target holds the address to call
+ instruct CallLeafNoFPInDirect(rRegP target)
+ %{
+   predicate(n->as_Call()->entry_point() == NULL);
+   match(CallLeafNoFP target);
+ 
+   ins_cost(300);
+   format %{ "call_leaf_nofp,runtime indirect " %}
+   ins_encode %{
+      __ call($target$$Register);
+   %}
+ 
+   ins_pipe(pipe_slow);
+ %}
+ 
  instruct CallLeafNoFPDirect(method meth)
  %{
+   predicate(n->as_Call()->entry_point() != NULL);
    match(CallLeafNoFP);
    effect(USE meth);
  
    ins_cost(300);
    format %{ "call_leaf_nofp,runtime " %}
< prev index next >