850 st->print("# stack alignment check");
851 #endif
852 }
853 if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
854 st->print("\n\t");
855 st->print("cmpl [r15_thread + #disarmed_offset], #disarmed_value\t");
856 st->print("\n\t");
857 st->print("je fast_entry\t");
858 st->print("\n\t");
859 st->print("call #nmethod_entry_barrier_stub\t");
860 st->print("\n\tfast_entry:");
861 }
862 st->cr();
863 }
864 #endif
865
866 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
867 Compile* C = ra_->C;
868 MacroAssembler _masm(&cbuf);
869
870 int framesize = C->output()->frame_size_in_bytes();
871 int bangsize = C->output()->bang_size_in_bytes();
872
873 if (C->clinit_barrier_on_entry()) {
874 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
875 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
876
877 Label L_skip_barrier;
878 Register klass = rscratch1;
879
880 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
881 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
882
883 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
884
885 __ bind(L_skip_barrier);
886 }
887
888 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != NULL);
889
890 C->output()->set_frame_complete(cbuf.insts_size());
891
892 if (C->has_mach_constant_base_node()) {
893 // NOTE: We set the table base offset here because users might be
894 // emitted before MachConstantBaseNode.
895 ConstantTable& constant_table = C->output()->constant_table();
896 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
897 }
898 }
899
900 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
901 {
902 return MachNode::size(ra_); // too many variables; just compute it
903 // the hard way
904 }
905
906 int MachPrologNode::reloc() const
907 {
908 return 0; // a large enough number
909 }
910
911 //=============================================================================
912 #ifndef PRODUCT
913 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
914 {
915 Compile* C = ra_->C;
916 if (generate_vzeroupper(C)) {
917 st->print("vzeroupper");
918 st->cr(); st->print("\t");
919 }
920
921 int framesize = C->output()->frame_size_in_bytes();
922 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
923 // Remove word for return adr already pushed
924 // and RBP
925 framesize -= 2*wordSize;
933 if (do_polling() && C->is_method_compilation()) {
934 st->print("\t");
935 st->print_cr("movq rscratch1, poll_offset[r15_thread] #polling_page_address\n\t"
936 "testl rax, [rscratch1]\t"
937 "# Safepoint: poll for GC");
938 }
939 }
940 #endif
941
942 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
943 {
944 Compile* C = ra_->C;
945 MacroAssembler _masm(&cbuf);
946
947 if (generate_vzeroupper(C)) {
948 // Clear upper bits of YMM registers when current compiled code uses
949 // wide vectors to avoid AVX <-> SSE transition penalty during call.
950 __ vzeroupper();
951 }
952
953 int framesize = C->output()->frame_size_in_bytes();
954 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
955 // Remove word for return adr already pushed
956 // and RBP
957 framesize -= 2*wordSize;
958
959 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
960
961 if (framesize) {
962 emit_opcode(cbuf, Assembler::REX_W);
963 if (framesize < 0x80) {
964 emit_opcode(cbuf, 0x83); // addq rsp, #framesize
965 emit_rm(cbuf, 0x3, 0x00, RSP_enc);
966 emit_d8(cbuf, framesize);
967 } else {
968 emit_opcode(cbuf, 0x81); // addq rsp, #framesize
969 emit_rm(cbuf, 0x3, 0x00, RSP_enc);
970 emit_d32(cbuf, framesize);
971 }
972 }
973
974 // popq rbp
975 emit_opcode(cbuf, 0x58 | RBP_enc);
976
977 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
978 __ reserved_stack_check();
979 }
980
981 if (do_polling() && C->is_method_compilation()) {
982 MacroAssembler _masm(&cbuf);
983 __ movq(rscratch1, Address(r15_thread, Thread::polling_page_offset()));
984 __ relocate(relocInfo::poll_return_type);
985 __ testl(rax, Address(rscratch1, 0));
986 }
987 }
988
989 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
990 {
991 return MachNode::size(ra_); // too many variables; just compute it
992 // the hard way
993 }
994
995 int MachEpilogNode::reloc() const
996 {
997 return 2; // a large enough number
998 }
999
1000 const Pipeline* MachEpilogNode::pipeline() const
1001 {
1002 return MachNode::pipeline_class();
1003 }
1004
1005 //=============================================================================
1006
1007 enum RC {
1008 rc_bad,
1009 rc_int,
1010 rc_float,
1011 rc_stack
1012 };
1013
1014 static enum RC rc_class(OptoReg::Name reg)
1510 emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1511 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1512 emit_rm(cbuf, 0x2, reg & 7, 0x04);
1513 emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1514 emit_d32(cbuf, offset);
1515 } else {
1516 emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1517 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1518 emit_rm(cbuf, 0x1, reg & 7, 0x04);
1519 emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1520 emit_d8(cbuf, offset);
1521 }
1522 }
1523
1524 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1525 {
1526 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1527 return (offset < 0x80) ? 5 : 8; // REX
1528 }
1529
1530 //=============================================================================
1531 #ifndef PRODUCT
1532 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1533 {
1534 if (UseCompressedClassPointers) {
1535 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1536 st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1537 st->print_cr("\tcmpq rax, rscratch1\t # Inline cache check");
1538 } else {
1539 st->print_cr("\tcmpq rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1540 "# Inline cache check");
1541 }
1542 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1543 st->print_cr("\tnop\t# nops to align entry point");
1544 }
1545 #endif
1546
1547 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1548 {
1549 MacroAssembler masm(&cbuf);
1552 masm.load_klass(rscratch1, j_rarg0, rscratch2);
1553 masm.cmpptr(rax, rscratch1);
1554 } else {
1555 masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1556 }
1557
1558 masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1559
1560 /* WARNING these NOPs are critical so that verified entry point is properly
1561 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1562 int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1563 if (OptoBreakpoint) {
1564 // Leave space for int3
1565 nops_cnt -= 1;
1566 }
1567 nops_cnt &= 0x3; // Do not add nops if code is aligned.
1568 if (nops_cnt > 0)
1569 masm.nop(nops_cnt);
1570 }
1571
1572 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1573 {
1574 return MachNode::size(ra_); // too many variables; just compute it
1575 // the hard way
1576 }
1577
1578
1579 //=============================================================================
1580
1581 int Matcher::regnum_to_fpu_offset(int regnum)
1582 {
1583 return regnum - 32; // The FP registers are in the second chunk
1584 }
1585
1586 // This is UltraSparc specific, true just means we have fast l2f conversion
1587 const bool Matcher::convL2FSupported(void) {
1588 return true;
1589 }
1590
1591 // Is this branch offset short enough that a short branch can be used?
1592 //
1593 // NOTE: If the platform does not provide any short branch variants, then
1594 // this method should return false for offset 0.
1595 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1596 // The passed offset is relative to address of the branch.
1597 // On 86 a branch displacement is calculated relative to address
1598 // of a next instruction.
3841 %}
3842 %}
3843
3844 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3845 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3846 %{
3847 constraint(ALLOC_IN_RC(ptr_reg));
3848 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3849 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3850
3851 op_cost(10);
3852 format %{"[$reg + $off + $idx << $scale]" %}
3853 interface(MEMORY_INTER) %{
3854 base($reg);
3855 index($idx);
3856 scale($scale);
3857 disp($off);
3858 %}
3859 %}
3860
3861 // Indirect Narrow Oop Plus Offset Operand
3862 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3863 // we can't free r12 even with CompressedOops::base() == NULL.
3864 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3865 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3866 constraint(ALLOC_IN_RC(ptr_reg));
3867 match(AddP (DecodeN reg) off);
3868
3869 op_cost(10);
3870 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3871 interface(MEMORY_INTER) %{
3872 base(0xc); // R12
3873 index($reg);
3874 scale(0x3);
3875 disp($off);
3876 %}
3877 %}
3878
3879 // Indirect Memory Operand
3880 operand indirectNarrow(rRegN reg)
4183 equal(0x4, "e");
4184 not_equal(0x5, "ne");
4185 less(0x2, "b");
4186 greater_equal(0x3, "nb");
4187 less_equal(0x6, "be");
4188 greater(0x7, "nbe");
4189 overflow(0x0, "o");
4190 no_overflow(0x1, "no");
4191 %}
4192 %}
4193
4194 //----------OPERAND CLASSES----------------------------------------------------
4195 // Operand Classes are groups of operands that are used as to simplify
4196 // instruction definitions by not requiring the AD writer to specify separate
4197 // instructions for every form of operand when the instruction accepts
4198 // multiple operand types with the same basic encoding and format. The classic
4199 // case of this is memory operands.
4200
4201 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
4202 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
4203 indCompressedOopOffset,
4204 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
4205 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
4206 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
4207
4208 //----------PIPELINE-----------------------------------------------------------
4209 // Rules which define the behavior of the target architectures pipeline.
4210 pipeline %{
4211
4212 //----------ATTRIBUTES---------------------------------------------------------
4213 attributes %{
4214 variable_size_instructions; // Fixed size instructions
4215 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
4216 instruction_unit_size = 1; // An instruction is 1 bytes long
4217 instruction_fetch_unit_size = 16; // The processor fetches one line
4218 instruction_fetch_units = 1; // of 16 bytes
4219
4220 // List of nop instructions
4221 nops( MachNop );
4222 %}
4223
6667 format %{ "MEMBAR-storestore (empty encoding)" %}
6668 ins_encode( );
6669 ins_pipe(empty);
6670 %}
6671
6672 //----------Move Instructions--------------------------------------------------
6673
6674 instruct castX2P(rRegP dst, rRegL src)
6675 %{
6676 match(Set dst (CastX2P src));
6677
6678 format %{ "movq $dst, $src\t# long->ptr" %}
6679 ins_encode %{
6680 if ($dst$$reg != $src$$reg) {
6681 __ movptr($dst$$Register, $src$$Register);
6682 }
6683 %}
6684 ins_pipe(ialu_reg_reg); // XXX
6685 %}
6686
6687 instruct castP2X(rRegL dst, rRegP src)
6688 %{
6689 match(Set dst (CastP2X src));
6690
6691 format %{ "movq $dst, $src\t# ptr -> long" %}
6692 ins_encode %{
6693 if ($dst$$reg != $src$$reg) {
6694 __ movptr($dst$$Register, $src$$Register);
6695 }
6696 %}
6697 ins_pipe(ialu_reg_reg); // XXX
6698 %}
6699
6700 // Convert oop into int for vectors alignment masking
6701 instruct convP2I(rRegI dst, rRegP src)
6702 %{
6703 match(Set dst (ConvL2I (CastP2X src)));
6704
6705 format %{ "movl $dst, $src\t# ptr -> int" %}
6706 ins_encode %{
6707 __ movl($dst$$Register, $src$$Register);
6708 %}
6709 ins_pipe(ialu_reg_reg); // XXX
6710 %}
6711
6712 // Convert compressed oop into int for vectors alignment masking
6713 // in case of 32bit oops (heap < 4Gb).
6714 instruct convN2I(rRegI dst, rRegN src)
6715 %{
6716 predicate(CompressedOops::shift() == 0);
6717 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
6718
6719 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
10893 ins_encode %{
10894 __ movdl($dst$$XMMRegister, $src$$Register);
10895 %}
10896 ins_pipe( pipe_slow );
10897 %}
10898
10899 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10900 match(Set dst (MoveL2D src));
10901 effect(DEF dst, USE src);
10902 ins_cost(100);
10903 format %{ "movd $dst,$src\t# MoveL2D" %}
10904 ins_encode %{
10905 __ movdq($dst$$XMMRegister, $src$$Register);
10906 %}
10907 ins_pipe( pipe_slow );
10908 %}
10909
10910
10911 // =======================================================================
10912 // fast clearing of an array
10913 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10914 Universe dummy, rFlagsReg cr)
10915 %{
10916 predicate(!((ClearArrayNode*)n)->is_large());
10917 match(Set dummy (ClearArray cnt base));
10918 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
10919
10920 format %{ $$template
10921 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10922 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10923 $$emit$$"jg LARGE\n\t"
10924 $$emit$$"dec rcx\n\t"
10925 $$emit$$"js DONE\t# Zero length\n\t"
10926 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10927 $$emit$$"dec rcx\n\t"
10928 $$emit$$"jge LOOP\n\t"
10929 $$emit$$"jmp DONE\n\t"
10930 $$emit$$"# LARGE:\n\t"
10931 if (UseFastStosb) {
10932 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10933 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10934 } else if (UseXMMForObjInit) {
10935 $$emit$$"mov rdi,rax\n\t"
10936 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10937 $$emit$$"jmpq L_zero_64_bytes\n\t"
10938 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10939 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10940 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10941 $$emit$$"add 0x40,rax\n\t"
10942 $$emit$$"# L_zero_64_bytes:\n\t"
10943 $$emit$$"sub 0x8,rcx\n\t"
10944 $$emit$$"jge L_loop\n\t"
10945 $$emit$$"add 0x4,rcx\n\t"
10946 $$emit$$"jl L_tail\n\t"
10947 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10948 $$emit$$"add 0x20,rax\n\t"
10949 $$emit$$"sub 0x4,rcx\n\t"
10950 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10951 $$emit$$"add 0x4,rcx\n\t"
10952 $$emit$$"jle L_end\n\t"
10953 $$emit$$"dec rcx\n\t"
10954 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10955 $$emit$$"vmovq xmm0,(rax)\n\t"
10956 $$emit$$"add 0x8,rax\n\t"
10957 $$emit$$"dec rcx\n\t"
10958 $$emit$$"jge L_sloop\n\t"
10959 $$emit$$"# L_end:\n\t"
10960 } else {
10961 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10962 }
10963 $$emit$$"# DONE"
10964 %}
10965 ins_encode %{
10966 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10967 $tmp$$XMMRegister, false);
10968 %}
10969 ins_pipe(pipe_slow);
10970 %}
10971
10972 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10973 Universe dummy, rFlagsReg cr)
10974 %{
10975 predicate(((ClearArrayNode*)n)->is_large());
10976 match(Set dummy (ClearArray cnt base));
10977 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
10978
10979 format %{ $$template
10980 if (UseFastStosb) {
10981 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10982 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10983 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10984 } else if (UseXMMForObjInit) {
10985 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10986 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10987 $$emit$$"jmpq L_zero_64_bytes\n\t"
10988 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10989 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10990 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10991 $$emit$$"add 0x40,rax\n\t"
10992 $$emit$$"# L_zero_64_bytes:\n\t"
10993 $$emit$$"sub 0x8,rcx\n\t"
10994 $$emit$$"jge L_loop\n\t"
10995 $$emit$$"add 0x4,rcx\n\t"
10996 $$emit$$"jl L_tail\n\t"
10997 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10998 $$emit$$"add 0x20,rax\n\t"
10999 $$emit$$"sub 0x4,rcx\n\t"
11000 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11001 $$emit$$"add 0x4,rcx\n\t"
11002 $$emit$$"jle L_end\n\t"
11003 $$emit$$"dec rcx\n\t"
11004 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11005 $$emit$$"vmovq xmm0,(rax)\n\t"
11006 $$emit$$"add 0x8,rax\n\t"
11007 $$emit$$"dec rcx\n\t"
11008 $$emit$$"jge L_sloop\n\t"
11009 $$emit$$"# L_end:\n\t"
11010 } else {
11011 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
11012 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
11013 }
11014 %}
11015 ins_encode %{
11016 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11017 $tmp$$XMMRegister, true);
11018 %}
11019 ins_pipe(pipe_slow);
11020 %}
11021
11022 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11023 rax_RegI result, legRegD tmp1, rFlagsReg cr)
11024 %{
11025 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11026 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11027 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11028
11029 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11030 ins_encode %{
11031 __ string_compare($str1$$Register, $str2$$Register,
11032 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11033 $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11034 %}
11035 ins_pipe( pipe_slow );
11036 %}
11037
11562 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11563 %{
11564 match(Set cr (CmpI (AndI src con) zero));
11565
11566 format %{ "testl $src, $con" %}
11567 opcode(0xF7, 0x00);
11568 ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11569 ins_pipe(ialu_cr_reg_imm);
11570 %}
11571
11572 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11573 %{
11574 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11575
11576 format %{ "testl $src, $mem" %}
11577 opcode(0x85);
11578 ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11579 ins_pipe(ialu_cr_reg_mem);
11580 %}
11581
11582 // Unsigned compare Instructions; really, same as signed except they
11583 // produce an rFlagsRegU instead of rFlagsReg.
11584 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11585 %{
11586 match(Set cr (CmpU op1 op2));
11587
11588 format %{ "cmpl $op1, $op2\t# unsigned" %}
11589 opcode(0x3B); /* Opcode 3B /r */
11590 ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11591 ins_pipe(ialu_cr_reg_reg);
11592 %}
11593
11594 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11595 %{
11596 match(Set cr (CmpU op1 op2));
11597
11598 format %{ "cmpl $op1, $op2\t# unsigned" %}
11599 opcode(0x81,0x07); /* Opcode 81 /7 */
11600 ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11601 ins_pipe(ialu_cr_reg_imm);
11874 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
11875 %{
11876 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
11877
11878 format %{ "testq $src, $mem" %}
11879 opcode(0x85);
11880 ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
11881 ins_pipe(ialu_cr_reg_mem);
11882 %}
11883
11884 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
11885 %{
11886 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
11887
11888 format %{ "testq $src, $mem" %}
11889 opcode(0x85);
11890 ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
11891 ins_pipe(ialu_cr_reg_mem);
11892 %}
11893
11894 // Manifest a CmpL result in an integer register. Very painful.
11895 // This is the test to avoid.
11896 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
11897 %{
11898 match(Set dst (CmpL3 src1 src2));
11899 effect(KILL flags);
11900
11901 ins_cost(275); // XXX
11902 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
11903 "movl $dst, -1\n\t"
11904 "jl,s done\n\t"
11905 "setne $dst\n\t"
11906 "movzbl $dst, $dst\n\t"
11907 "done:" %}
11908 ins_encode(cmpl3_flag(src1, src2, dst));
11909 ins_pipe(pipe_slow);
11910 %}
11911
11912 // Unsigned long compare Instructions; really, same as signed long except they
11913 // produce an rFlagsRegU instead of rFlagsReg.
12541
12542 ins_cost(300);
12543 format %{ "call,runtime " %}
12544 ins_encode(clear_avx, Java_To_Runtime(meth));
12545 ins_pipe(pipe_slow);
12546 %}
12547
12548 // Call runtime without safepoint
12549 instruct CallLeafDirect(method meth)
12550 %{
12551 match(CallLeaf);
12552 effect(USE meth);
12553
12554 ins_cost(300);
12555 format %{ "call_leaf,runtime " %}
12556 ins_encode(clear_avx, Java_To_Runtime(meth));
12557 ins_pipe(pipe_slow);
12558 %}
12559
12560 // Call runtime without safepoint
12561 instruct CallLeafNoFPDirect(method meth)
12562 %{
12563 match(CallLeafNoFP);
12564 effect(USE meth);
12565
12566 ins_cost(300);
12567 format %{ "call_leaf_nofp,runtime " %}
12568 ins_encode(clear_avx, Java_To_Runtime(meth));
12569 ins_pipe(pipe_slow);
12570 %}
12571
12572 // Return Instruction
12573 // Remove the return address & jump to it.
12574 // Notice: We always emit a nop after a ret to make sure there is room
12575 // for safepoint patching
12576 instruct Ret()
12577 %{
12578 match(Return);
12579
12580 format %{ "ret" %}
12581 opcode(0xC3);
12582 ins_encode(OpcP);
|
850 st->print("# stack alignment check");
851 #endif
852 }
853 if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
854 st->print("\n\t");
855 st->print("cmpl [r15_thread + #disarmed_offset], #disarmed_value\t");
856 st->print("\n\t");
857 st->print("je fast_entry\t");
858 st->print("\n\t");
859 st->print("call #nmethod_entry_barrier_stub\t");
860 st->print("\n\tfast_entry:");
861 }
862 st->cr();
863 }
864 #endif
865
866 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
867 Compile* C = ra_->C;
868 MacroAssembler _masm(&cbuf);
869
870 if (C->clinit_barrier_on_entry()) {
871 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
872 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
873
874 Label L_skip_barrier;
875 Register klass = rscratch1;
876
877 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
878 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
879
880 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
881
882 __ bind(L_skip_barrier);
883 }
884
885 __ verified_entry(C);
886 __ bind(*_verified_entry);
887
888 if (C->stub_function() == NULL) {
889 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
890 bs->nmethod_entry_barrier(&_masm);
891 }
892
893 C->output()->set_frame_complete(cbuf.insts_size());
894
895 if (C->has_mach_constant_base_node()) {
896 // NOTE: We set the table base offset here because users might be
897 // emitted before MachConstantBaseNode.
898 ConstantTable& constant_table = C->output()->constant_table();
899 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
900 }
901 }
902
903 int MachPrologNode::reloc() const
904 {
905 return 0; // a large enough number
906 }
907
908 //=============================================================================
909 #ifndef PRODUCT
910 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
911 {
912 Compile* C = ra_->C;
913 if (generate_vzeroupper(C)) {
914 st->print("vzeroupper");
915 st->cr(); st->print("\t");
916 }
917
918 int framesize = C->output()->frame_size_in_bytes();
919 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
920 // Remove word for return adr already pushed
921 // and RBP
922 framesize -= 2*wordSize;
930 if (do_polling() && C->is_method_compilation()) {
931 st->print("\t");
932 st->print_cr("movq rscratch1, poll_offset[r15_thread] #polling_page_address\n\t"
933 "testl rax, [rscratch1]\t"
934 "# Safepoint: poll for GC");
935 }
936 }
937 #endif
938
939 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
940 {
941 Compile* C = ra_->C;
942 MacroAssembler _masm(&cbuf);
943
944 if (generate_vzeroupper(C)) {
945 // Clear upper bits of YMM registers when current compiled code uses
946 // wide vectors to avoid AVX <-> SSE transition penalty during call.
947 __ vzeroupper();
948 }
949
950 // Subtract two words to account for return address and rbp
951 int initial_framesize = C->output()->frame_size_in_bytes() - 2*wordSize;
952 __ remove_frame(initial_framesize, C->needs_stack_repair(), C->output()->sp_inc_offset());
953
954 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
955 __ reserved_stack_check();
956 }
957
958 if (do_polling() && C->is_method_compilation()) {
959 MacroAssembler _masm(&cbuf);
960 __ movq(rscratch1, Address(r15_thread, Thread::polling_page_offset()));
961 __ relocate(relocInfo::poll_return_type);
962 __ testl(rax, Address(rscratch1, 0));
963 }
964 }
965
966 int MachEpilogNode::reloc() const
967 {
968 return 2; // a large enough number
969 }
970
971 const Pipeline* MachEpilogNode::pipeline() const
972 {
973 return MachNode::pipeline_class();
974 }
975
976 //=============================================================================
977
978 enum RC {
979 rc_bad,
980 rc_int,
981 rc_float,
982 rc_stack
983 };
984
985 static enum RC rc_class(OptoReg::Name reg)
1481 emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1482 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1483 emit_rm(cbuf, 0x2, reg & 7, 0x04);
1484 emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1485 emit_d32(cbuf, offset);
1486 } else {
1487 emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1488 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1489 emit_rm(cbuf, 0x1, reg & 7, 0x04);
1490 emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1491 emit_d8(cbuf, offset);
1492 }
1493 }
1494
1495 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1496 {
1497 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1498 return (offset < 0x80) ? 5 : 8; // REX
1499 }
1500
1501 //=============================================================================
1502 #ifndef PRODUCT
1503 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1504 {
1505 st->print_cr("MachVEPNode");
1506 }
1507 #endif
1508
1509 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1510 {
1511 MacroAssembler masm(&cbuf);
1512 if (!_verified) {
1513 uint insts_size = cbuf.insts_size();
1514 if (UseCompressedClassPointers) {
1515 masm.load_klass(rscratch1, j_rarg0, rscratch2);
1516 masm.cmpptr(rax, rscratch1);
1517 } else {
1518 masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1519 }
1520 masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1521 } else {
1522 // Unpack inline type args passed as oop and then jump to
1523 // the verified entry point (skipping the unverified entry).
1524 masm.unpack_inline_args(ra_->C, _receiver_only);
1525 masm.jmp(*_verified_entry);
1526 }
1527 }
1528
1529 //=============================================================================
1530 #ifndef PRODUCT
1531 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1532 {
1533 if (UseCompressedClassPointers) {
1534 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1535 st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1536 st->print_cr("\tcmpq rax, rscratch1\t # Inline cache check");
1537 } else {
1538 st->print_cr("\tcmpq rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1539 "# Inline cache check");
1540 }
1541 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1542 st->print_cr("\tnop\t# nops to align entry point");
1543 }
1544 #endif
1545
1546 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1547 {
1548 MacroAssembler masm(&cbuf);
1551 masm.load_klass(rscratch1, j_rarg0, rscratch2);
1552 masm.cmpptr(rax, rscratch1);
1553 } else {
1554 masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1555 }
1556
1557 masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1558
1559 /* WARNING these NOPs are critical so that verified entry point is properly
1560 4 bytes aligned for patching by NativeJump::patch_verified_entry() */
1561 int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
1562 if (OptoBreakpoint) {
1563 // Leave space for int3
1564 nops_cnt -= 1;
1565 }
1566 nops_cnt &= 0x3; // Do not add nops if code is aligned.
1567 if (nops_cnt > 0)
1568 masm.nop(nops_cnt);
1569 }
1570
1571 //=============================================================================
1572
1573 int Matcher::regnum_to_fpu_offset(int regnum)
1574 {
1575 return regnum - 32; // The FP registers are in the second chunk
1576 }
1577
1578 // This is UltraSparc specific, true just means we have fast l2f conversion
1579 const bool Matcher::convL2FSupported(void) {
1580 return true;
1581 }
1582
1583 // Is this branch offset short enough that a short branch can be used?
1584 //
1585 // NOTE: If the platform does not provide any short branch variants, then
1586 // this method should return false for offset 0.
1587 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1588 // The passed offset is relative to address of the branch.
1589 // On 86 a branch displacement is calculated relative to address
1590 // of a next instruction.
3833 %}
3834 %}
3835
3836 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
3837 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
3838 %{
3839 constraint(ALLOC_IN_RC(ptr_reg));
3840 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
3841 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
3842
3843 op_cost(10);
3844 format %{"[$reg + $off + $idx << $scale]" %}
3845 interface(MEMORY_INTER) %{
3846 base($reg);
3847 index($idx);
3848 scale($scale);
3849 disp($off);
3850 %}
3851 %}
3852
3853 // Indirect Narrow Oop Operand
3854 operand indCompressedOop(rRegN reg) %{
3855 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3856 constraint(ALLOC_IN_RC(ptr_reg));
3857 match(DecodeN reg);
3858
3859 op_cost(10);
3860 format %{"[R12 + $reg << 3] (compressed oop addressing)" %}
3861 interface(MEMORY_INTER) %{
3862 base(0xc); // R12
3863 index($reg);
3864 scale(0x3);
3865 disp(0x0);
3866 %}
3867 %}
3868
3869 // Indirect Narrow Oop Plus Offset Operand
3870 // Note: x86 architecture doesn't support "scale * index + offset" without a base
3871 // we can't free r12 even with CompressedOops::base() == NULL.
3872 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
3873 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
3874 constraint(ALLOC_IN_RC(ptr_reg));
3875 match(AddP (DecodeN reg) off);
3876
3877 op_cost(10);
3878 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
3879 interface(MEMORY_INTER) %{
3880 base(0xc); // R12
3881 index($reg);
3882 scale(0x3);
3883 disp($off);
3884 %}
3885 %}
3886
3887 // Indirect Memory Operand
3888 operand indirectNarrow(rRegN reg)
4191 equal(0x4, "e");
4192 not_equal(0x5, "ne");
4193 less(0x2, "b");
4194 greater_equal(0x3, "nb");
4195 less_equal(0x6, "be");
4196 greater(0x7, "nbe");
4197 overflow(0x0, "o");
4198 no_overflow(0x1, "no");
4199 %}
4200 %}
4201
4202 //----------OPERAND CLASSES----------------------------------------------------
4203 // Operand Classes are groups of operands that are used as to simplify
4204 // instruction definitions by not requiring the AD writer to specify separate
4205 // instructions for every form of operand when the instruction accepts
4206 // multiple operand types with the same basic encoding and format. The classic
4207 // case of this is memory operands.
4208
4209 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
4210 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
4211 indCompressedOop, indCompressedOopOffset,
4212 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
4213 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
4214 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
4215
4216 //----------PIPELINE-----------------------------------------------------------
4217 // Rules which define the behavior of the target architectures pipeline.
4218 pipeline %{
4219
4220 //----------ATTRIBUTES---------------------------------------------------------
4221 attributes %{
4222 variable_size_instructions; // Fixed size instructions
4223 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
4224 instruction_unit_size = 1; // An instruction is 1 bytes long
4225 instruction_fetch_unit_size = 16; // The processor fetches one line
4226 instruction_fetch_units = 1; // of 16 bytes
4227
4228 // List of nop instructions
4229 nops( MachNop );
4230 %}
4231
6675 format %{ "MEMBAR-storestore (empty encoding)" %}
6676 ins_encode( );
6677 ins_pipe(empty);
6678 %}
6679
6680 //----------Move Instructions--------------------------------------------------
6681
6682 instruct castX2P(rRegP dst, rRegL src)
6683 %{
6684 match(Set dst (CastX2P src));
6685
6686 format %{ "movq $dst, $src\t# long->ptr" %}
6687 ins_encode %{
6688 if ($dst$$reg != $src$$reg) {
6689 __ movptr($dst$$Register, $src$$Register);
6690 }
6691 %}
6692 ins_pipe(ialu_reg_reg); // XXX
6693 %}
6694
6695 instruct castN2X(rRegL dst, rRegN src)
6696 %{
6697 match(Set dst (CastP2X src));
6698
6699 format %{ "movq $dst, $src\t# ptr -> long" %}
6700 ins_encode %{
6701 if ($dst$$reg != $src$$reg) {
6702 __ movptr($dst$$Register, $src$$Register);
6703 }
6704 %}
6705 ins_pipe(ialu_reg_reg); // XXX
6706 %}
6707
6708 instruct castP2X(rRegL dst, rRegP src)
6709 %{
6710 match(Set dst (CastP2X src));
6711
6712 format %{ "movq $dst, $src\t# ptr -> long" %}
6713 ins_encode %{
6714 if ($dst$$reg != $src$$reg) {
6715 __ movptr($dst$$Register, $src$$Register);
6716 }
6717 %}
6718 ins_pipe(ialu_reg_reg); // XXX
6719 %}
6720
6721 instruct castN2I(rRegI dst, rRegN src)
6722 %{
6723 match(Set dst (CastN2I src));
6724
6725 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
6726 ins_encode %{
6727 if ($dst$$reg != $src$$reg) {
6728 __ movl($dst$$Register, $src$$Register);
6729 }
6730 %}
6731 ins_pipe(ialu_reg_reg); // XXX
6732 %}
6733
6734 instruct castI2N(rRegN dst, rRegI src)
6735 %{
6736 match(Set dst (CastI2N src));
6737
6738 format %{ "movl $dst, $src\t# int -> compressed ptr" %}
6739 ins_encode %{
6740 if ($dst$$reg != $src$$reg) {
6741 __ movl($dst$$Register, $src$$Register);
6742 }
6743 %}
6744 ins_pipe(ialu_reg_reg); // XXX
6745 %}
6746
6747
6748 // Convert oop into int for vectors alignment masking
6749 instruct convP2I(rRegI dst, rRegP src)
6750 %{
6751 match(Set dst (ConvL2I (CastP2X src)));
6752
6753 format %{ "movl $dst, $src\t# ptr -> int" %}
6754 ins_encode %{
6755 __ movl($dst$$Register, $src$$Register);
6756 %}
6757 ins_pipe(ialu_reg_reg); // XXX
6758 %}
6759
6760 // Convert compressed oop into int for vectors alignment masking
6761 // in case of 32bit oops (heap < 4Gb).
6762 instruct convN2I(rRegI dst, rRegN src)
6763 %{
6764 predicate(CompressedOops::shift() == 0);
6765 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
6766
6767 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
10941 ins_encode %{
10942 __ movdl($dst$$XMMRegister, $src$$Register);
10943 %}
10944 ins_pipe( pipe_slow );
10945 %}
10946
10947 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10948 match(Set dst (MoveL2D src));
10949 effect(DEF dst, USE src);
10950 ins_cost(100);
10951 format %{ "movd $dst,$src\t# MoveL2D" %}
10952 ins_encode %{
10953 __ movdq($dst$$XMMRegister, $src$$Register);
10954 %}
10955 ins_pipe( pipe_slow );
10956 %}
10957
10958
10959 // =======================================================================
10960 // fast clearing of an array
10961 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10962 Universe dummy, rFlagsReg cr)
10963 %{
10964 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only());
10965 match(Set dummy (ClearArray (Binary cnt base) val));
10966 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
10967
10968 format %{ $$template
10969 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10970 $$emit$$"jg LARGE\n\t"
10971 $$emit$$"dec rcx\n\t"
10972 $$emit$$"js DONE\t# Zero length\n\t"
10973 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10974 $$emit$$"dec rcx\n\t"
10975 $$emit$$"jge LOOP\n\t"
10976 $$emit$$"jmp DONE\n\t"
10977 $$emit$$"# LARGE:\n\t"
10978 if (UseFastStosb) {
10979 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10980 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10981 } else if (UseXMMForObjInit) {
10982 $$emit$$"movdq $tmp, $val\n\t"
10983 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10984 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10985 $$emit$$"jmpq L_zero_64_bytes\n\t"
10986 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10987 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10988 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10989 $$emit$$"add 0x40,rax\n\t"
10990 $$emit$$"# L_zero_64_bytes:\n\t"
10991 $$emit$$"sub 0x8,rcx\n\t"
10992 $$emit$$"jge L_loop\n\t"
10993 $$emit$$"add 0x4,rcx\n\t"
10994 $$emit$$"jl L_tail\n\t"
10995 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10996 $$emit$$"add 0x20,rax\n\t"
10997 $$emit$$"sub 0x4,rcx\n\t"
10998 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10999 $$emit$$"add 0x4,rcx\n\t"
11000 $$emit$$"jle L_end\n\t"
11001 $$emit$$"dec rcx\n\t"
11002 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11003 $$emit$$"vmovq xmm0,(rax)\n\t"
11004 $$emit$$"add 0x8,rax\n\t"
11005 $$emit$$"dec rcx\n\t"
11006 $$emit$$"jge L_sloop\n\t"
11007 $$emit$$"# L_end:\n\t"
11008 } else {
11009 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
11010 }
11011 $$emit$$"# DONE"
11012 %}
11013 ins_encode %{
11014 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11015 $tmp$$XMMRegister, false, false);
11016 %}
11017 ins_pipe(pipe_slow);
11018 %}
11019
11020 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11021 Universe dummy, rFlagsReg cr)
11022 %{
11023 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only());
11024 match(Set dummy (ClearArray (Binary cnt base) val));
11025 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
11026
11027 format %{ $$template
11028 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
11029 $$emit$$"jg LARGE\n\t"
11030 $$emit$$"dec rcx\n\t"
11031 $$emit$$"js DONE\t# Zero length\n\t"
11032 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
11033 $$emit$$"dec rcx\n\t"
11034 $$emit$$"jge LOOP\n\t"
11035 $$emit$$"jmp DONE\n\t"
11036 $$emit$$"# LARGE:\n\t"
11037 if (UseXMMForObjInit) {
11038 $$emit$$"movdq $tmp, $val\n\t"
11039 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11040 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11041 $$emit$$"jmpq L_zero_64_bytes\n\t"
11042 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11043 $$emit$$"vmovdqu $tmp,(rax)\n\t"
11044 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11045 $$emit$$"add 0x40,rax\n\t"
11046 $$emit$$"# L_zero_64_bytes:\n\t"
11047 $$emit$$"sub 0x8,rcx\n\t"
11048 $$emit$$"jge L_loop\n\t"
11049 $$emit$$"add 0x4,rcx\n\t"
11050 $$emit$$"jl L_tail\n\t"
11051 $$emit$$"vmovdqu $tmp,(rax)\n\t"
11052 $$emit$$"add 0x20,rax\n\t"
11053 $$emit$$"sub 0x4,rcx\n\t"
11054 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11055 $$emit$$"add 0x4,rcx\n\t"
11056 $$emit$$"jle L_end\n\t"
11057 $$emit$$"dec rcx\n\t"
11058 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11059 $$emit$$"vmovq xmm0,(rax)\n\t"
11060 $$emit$$"add 0x8,rax\n\t"
11061 $$emit$$"dec rcx\n\t"
11062 $$emit$$"jge L_sloop\n\t"
11063 $$emit$$"# L_end:\n\t"
11064 } else {
11065 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
11066 }
11067 $$emit$$"# DONE"
11068 %}
11069 ins_encode %{
11070 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11071 $tmp$$XMMRegister, false, true);
11072 %}
11073 ins_pipe(pipe_slow);
11074 %}
11075
11076 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11077 Universe dummy, rFlagsReg cr)
11078 %{
11079 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only());
11080 match(Set dummy (ClearArray (Binary cnt base) val));
11081 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
11082
11083 format %{ $$template
11084 if (UseFastStosb) {
11085 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
11086 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
11087 } else if (UseXMMForObjInit) {
11088 $$emit$$"movdq $tmp, $val\n\t"
11089 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11090 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11091 $$emit$$"jmpq L_zero_64_bytes\n\t"
11092 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11093 $$emit$$"vmovdqu $tmp,(rax)\n\t"
11094 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11095 $$emit$$"add 0x40,rax\n\t"
11096 $$emit$$"# L_zero_64_bytes:\n\t"
11097 $$emit$$"sub 0x8,rcx\n\t"
11098 $$emit$$"jge L_loop\n\t"
11099 $$emit$$"add 0x4,rcx\n\t"
11100 $$emit$$"jl L_tail\n\t"
11101 $$emit$$"vmovdqu $tmp,(rax)\n\t"
11102 $$emit$$"add 0x20,rax\n\t"
11103 $$emit$$"sub 0x4,rcx\n\t"
11104 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11105 $$emit$$"add 0x4,rcx\n\t"
11106 $$emit$$"jle L_end\n\t"
11107 $$emit$$"dec rcx\n\t"
11108 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11109 $$emit$$"vmovq xmm0,(rax)\n\t"
11110 $$emit$$"add 0x8,rax\n\t"
11111 $$emit$$"dec rcx\n\t"
11112 $$emit$$"jge L_sloop\n\t"
11113 $$emit$$"# L_end:\n\t"
11114 } else {
11115 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
11116 }
11117 %}
11118 ins_encode %{
11119 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11120 $tmp$$XMMRegister, true, false);
11121 %}
11122 ins_pipe(pipe_slow);
11123 %}
11124
11125 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11126 Universe dummy, rFlagsReg cr)
11127 %{
11128 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only());
11129 match(Set dummy (ClearArray (Binary cnt base) val));
11130 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
11131
11132 format %{ $$template
11133 if (UseXMMForObjInit) {
11134 $$emit$$"movdq $tmp, $val\n\t"
11135 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11136 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11137 $$emit$$"jmpq L_zero_64_bytes\n\t"
11138 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11139 $$emit$$"vmovdqu $tmp,(rax)\n\t"
11140 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11141 $$emit$$"add 0x40,rax\n\t"
11142 $$emit$$"# L_zero_64_bytes:\n\t"
11143 $$emit$$"sub 0x8,rcx\n\t"
11144 $$emit$$"jge L_loop\n\t"
11145 $$emit$$"add 0x4,rcx\n\t"
11146 $$emit$$"jl L_tail\n\t"
11147 $$emit$$"vmovdqu $tmp,(rax)\n\t"
11148 $$emit$$"add 0x20,rax\n\t"
11149 $$emit$$"sub 0x4,rcx\n\t"
11150 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11151 $$emit$$"add 0x4,rcx\n\t"
11152 $$emit$$"jle L_end\n\t"
11153 $$emit$$"dec rcx\n\t"
11154 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11155 $$emit$$"vmovq xmm0,(rax)\n\t"
11156 $$emit$$"add 0x8,rax\n\t"
11157 $$emit$$"dec rcx\n\t"
11158 $$emit$$"jge L_sloop\n\t"
11159 $$emit$$"# L_end:\n\t"
11160 } else {
11161 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
11162 }
11163 %}
11164 ins_encode %{
11165 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11166 $tmp$$XMMRegister, true, true);
11167 %}
11168 ins_pipe(pipe_slow);
11169 %}
11170
11171 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11172 rax_RegI result, legRegD tmp1, rFlagsReg cr)
11173 %{
11174 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11175 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11176 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11177
11178 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11179 ins_encode %{
11180 __ string_compare($str1$$Register, $str2$$Register,
11181 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11182 $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11183 %}
11184 ins_pipe( pipe_slow );
11185 %}
11186
11711 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
11712 %{
11713 match(Set cr (CmpI (AndI src con) zero));
11714
11715 format %{ "testl $src, $con" %}
11716 opcode(0xF7, 0x00);
11717 ins_encode(REX_reg(src), OpcP, reg_opc(src), Con32(con));
11718 ins_pipe(ialu_cr_reg_imm);
11719 %}
11720
11721 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
11722 %{
11723 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
11724
11725 format %{ "testl $src, $mem" %}
11726 opcode(0x85);
11727 ins_encode(REX_reg_mem(src, mem), OpcP, reg_mem(src, mem));
11728 ins_pipe(ialu_cr_reg_mem);
11729 %}
11730
11731 // Fold array properties check
11732 instruct testI_mem_imm(rFlagsReg cr, memory mem, immI con, immI0 zero)
11733 %{
11734 match(Set cr (CmpI (AndI (CastN2I (LoadNKlass mem)) con) zero));
11735
11736 format %{ "testl $mem, $con" %}
11737 opcode(0xF7, 0x00);
11738 ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con32(con));
11739 ins_pipe(ialu_mem_imm);
11740 %}
11741
11742 // Unsigned compare Instructions; really, same as signed except they
11743 // produce an rFlagsRegU instead of rFlagsReg.
11744 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
11745 %{
11746 match(Set cr (CmpU op1 op2));
11747
11748 format %{ "cmpl $op1, $op2\t# unsigned" %}
11749 opcode(0x3B); /* Opcode 3B /r */
11750 ins_encode(REX_reg_reg(op1, op2), OpcP, reg_reg(op1, op2));
11751 ins_pipe(ialu_cr_reg_reg);
11752 %}
11753
11754 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
11755 %{
11756 match(Set cr (CmpU op1 op2));
11757
11758 format %{ "cmpl $op1, $op2\t# unsigned" %}
11759 opcode(0x81,0x07); /* Opcode 81 /7 */
11760 ins_encode(OpcSErm(op1, op2), Con8or32(op2));
11761 ins_pipe(ialu_cr_reg_imm);
12034 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
12035 %{
12036 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
12037
12038 format %{ "testq $src, $mem" %}
12039 opcode(0x85);
12040 ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12041 ins_pipe(ialu_cr_reg_mem);
12042 %}
12043
12044 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
12045 %{
12046 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
12047
12048 format %{ "testq $src, $mem" %}
12049 opcode(0x85);
12050 ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12051 ins_pipe(ialu_cr_reg_mem);
12052 %}
12053
12054 // Fold array properties check
12055 instruct testL_reg_mem3(rFlagsReg cr, memory mem, rRegL src, immL0 zero)
12056 %{
12057 match(Set cr (CmpL (AndL (CastP2X (LoadKlass mem)) src) zero));
12058
12059 format %{ "testq $src, $mem\t# test array properties" %}
12060 opcode(0x85);
12061 ins_encode(REX_reg_mem_wide(src, mem), OpcP, reg_mem(src, mem));
12062 ins_pipe(ialu_cr_reg_mem);
12063 %}
12064
12065 // Manifest a CmpL result in an integer register. Very painful.
12066 // This is the test to avoid.
12067 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
12068 %{
12069 match(Set dst (CmpL3 src1 src2));
12070 effect(KILL flags);
12071
12072 ins_cost(275); // XXX
12073 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
12074 "movl $dst, -1\n\t"
12075 "jl,s done\n\t"
12076 "setne $dst\n\t"
12077 "movzbl $dst, $dst\n\t"
12078 "done:" %}
12079 ins_encode(cmpl3_flag(src1, src2, dst));
12080 ins_pipe(pipe_slow);
12081 %}
12082
12083 // Unsigned long compare Instructions; really, same as signed long except they
12084 // produce an rFlagsRegU instead of rFlagsReg.
12712
12713 ins_cost(300);
12714 format %{ "call,runtime " %}
12715 ins_encode(clear_avx, Java_To_Runtime(meth));
12716 ins_pipe(pipe_slow);
12717 %}
12718
12719 // Call runtime without safepoint
12720 instruct CallLeafDirect(method meth)
12721 %{
12722 match(CallLeaf);
12723 effect(USE meth);
12724
12725 ins_cost(300);
12726 format %{ "call_leaf,runtime " %}
12727 ins_encode(clear_avx, Java_To_Runtime(meth));
12728 ins_pipe(pipe_slow);
12729 %}
12730
12731 // Call runtime without safepoint
12732 // entry point is null, target holds the address to call
12733 instruct CallLeafNoFPInDirect(rRegP target)
12734 %{
12735 predicate(n->as_Call()->entry_point() == NULL);
12736 match(CallLeafNoFP target);
12737
12738 ins_cost(300);
12739 format %{ "call_leaf_nofp,runtime indirect " %}
12740 ins_encode %{
12741 __ call($target$$Register);
12742 %}
12743
12744 ins_pipe(pipe_slow);
12745 %}
12746
12747 instruct CallLeafNoFPDirect(method meth)
12748 %{
12749 predicate(n->as_Call()->entry_point() != NULL);
12750 match(CallLeafNoFP);
12751 effect(USE meth);
12752
12753 ins_cost(300);
12754 format %{ "call_leaf_nofp,runtime " %}
12755 ins_encode(clear_avx, Java_To_Runtime(meth));
12756 ins_pipe(pipe_slow);
12757 %}
12758
12759 // Return Instruction
12760 // Remove the return address & jump to it.
12761 // Notice: We always emit a nop after a ret to make sure there is room
12762 // for safepoint patching
12763 instruct Ret()
12764 %{
12765 match(Return);
12766
12767 format %{ "ret" %}
12768 opcode(0xC3);
12769 ins_encode(OpcP);
|