diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp index b5bfcb0fcedf2..1501934d48f05 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -4450,7 +4450,7 @@ void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_ addi(offset, offset, receiver_step); bdnz(L_loop_search_receiver); - // Fast: no receiver, but profile is full + // Fast: no receiver, but profile is not full if (count != noreg) { mtctr(count); } else { diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp index 38698370faa2f..08f922a0b9ae1 100644 --- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp @@ -2413,32 +2413,9 @@ void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { } void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, - Register recv, Register tmp1, Label* update_done) { - uint i; - for (i = 0; i < VirtualCallData::row_limit(); i++) { - Label next_test; - // See if the receiver is receiver[n]. - Address receiver_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))); - __ z_cg(recv, receiver_addr); - __ z_brne(next_test); - Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))); - __ add2mem_64(data_addr, DataLayout::counter_increment, tmp1); - __ branch_optimized(Assembler::bcondAlways, *update_done); - __ bind(next_test); - } - - // Didn't find receiver; find next empty slot and fill it in. - for (i = 0; i < VirtualCallData::row_limit(); i++) { - Label next_test; - Address recv_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))); - __ z_ltg(Z_R0_scratch, recv_addr); - __ z_brne(next_test); - __ z_stg(recv, recv_addr); - __ load_const_optimized(tmp1, DataLayout::counter_increment); - __ z_stg(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)), mdo); - __ branch_optimized(Assembler::bcondAlways, *update_done); - __ bind(next_test); - } + Register recv, Register tmp1) { + int mdp_offset = md->byte_offset_of_slot(data, in_ByteSize(0)); + __ profile_receiver_type(recv, mdo, mdp_offset, tmp1); } void LIR_Assembler::setup_md_access(ciMethod* method, int bci, @@ -2510,13 +2487,9 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L __ branch_optimized(Assembler::bcondAlways, *obj_is_null); __ bind(not_null); - NearLabel update_done; Register recv = k_RInfo; __ load_klass(recv, obj); - type_profile_helper(mdo, md, data, recv, Rtmp1, &update_done); - Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); - __ add2mem_64(counter_addr, DataLayout::counter_increment, Rtmp1); - __ bind(update_done); + type_profile_helper(mdo, md, data, recv, Rtmp1); } else { __ compareU64_and_branch(obj, (intptr_t) 0, Assembler::bcondEqual, *obj_is_null); } @@ -2606,13 +2579,9 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { __ branch_optimized(Assembler::bcondAlways, done); __ bind(not_null); - NearLabel update_done; Register recv = k_RInfo; __ load_klass(recv, value); - type_profile_helper(mdo, md, data, recv, Rtmp1, &update_done); - Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); - __ add2mem_64(counter_addr, DataLayout::counter_increment, Rtmp1); - __ bind(update_done); + type_profile_helper(mdo, md, data, recv, Rtmp1); } else { __ compareU64_and_branch(value, (intptr_t) 0, Assembler::bcondEqual, done); } @@ -2772,11 +2741,8 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { // statically update the MethodData* rather than needing to do // dynamic tests on the receiver type. - // NOTE: we should probably put a lock around this search to - // avoid collisions by concurrent compilations. ciVirtualCallData* vc_data = (ciVirtualCallData*) data; - uint i; - for (i = 0; i < VirtualCallData::row_limit(); i++) { + for (uint i = 0; i < VirtualCallData::row_limit(); i++) { ciKlass* receiver = vc_data->receiver(i); if (known_klass->equals(receiver)) { Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); @@ -2784,32 +2750,13 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { return; } } - - // Receiver type not found in profile data. Select an empty slot. - - // Note that this is less efficient than it should be because it - // always does a write to the receiver part of the - // VirtualCallData rather than just the first time. - for (i = 0; i < VirtualCallData::row_limit(); i++) { - ciKlass* receiver = vc_data->receiver(i); - if (receiver == nullptr) { - Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))); - metadata2reg(known_klass->constant_encoding(), tmp1); - __ z_stg(tmp1, recv_addr); - Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); - __ add2mem_64(data_addr, DataLayout::counter_increment, tmp1); - return; - } - } + // Receiver type is not found in profile data. + // Fall back to runtime helper to handle the rest at runtime. + metadata2reg(known_klass->constant_encoding(), recv); } else { __ load_klass(recv, recv); - NearLabel update_done; - type_profile_helper(mdo, md, data, recv, tmp1, &update_done); - // Receiver did not match any saved receiver and there is no empty row for it. - // Increment total counter to indicate polymorphic case. - __ add2mem_64(counter_addr, DataLayout::counter_increment, tmp1); - __ bind(update_done); } + type_profile_helper(mdo, md, data, recv, tmp1); } else { // static call __ add2mem_64(counter_addr, DataLayout::counter_increment, tmp1); diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.hpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.hpp index 9fcf7c0c221bf..73d25f77d05dc 100644 --- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.hpp +++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.hpp @@ -30,7 +30,7 @@ // Record the type of the receiver in ReceiverTypeData. void type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, - Register recv, Register tmp1, Label* update_done); + Register recv, Register tmp1); // Setup pointers to MDO, MDO slot, also compute offset bias to access the slot. void setup_md_access(ciMethod* method, int bci, ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias); diff --git a/src/hotspot/cpu/s390/interp_masm_s390.cpp b/src/hotspot/cpu/s390/interp_masm_s390.cpp index cc8ca7a1f477c..d50cb833e6839 100644 --- a/src/hotspot/cpu/s390/interp_masm_s390.cpp +++ b/src/hotspot/cpu/s390/interp_masm_s390.cpp @@ -1267,7 +1267,7 @@ void InterpreterMacroAssembler::profile_virtual_call(Register receiver, test_method_data_pointer(mdp, profile_continue); // Record the receiver type. - record_klass_in_profile(receiver, mdp, reg2); + profile_receiver_type(receiver, mdp, 0, reg2); // The method data pointer needs to be updated to reflect the new target. update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); @@ -1275,125 +1275,6 @@ void InterpreterMacroAssembler::profile_virtual_call(Register receiver, } } -// This routine creates a state machine for updating the multi-row -// type profile at a virtual call site (or other type-sensitive bytecode). -// The machine visits each row (of receiver/count) until the receiver type -// is found, or until it runs out of rows. At the same time, it remembers -// the location of the first empty row. (An empty row records null for its -// receiver, and can be allocated for a newly-observed receiver type.) -// Because there are two degrees of freedom in the state, a simple linear -// search will not work; it must be a decision tree. Hence this helper -// function is recursive, to generate the required tree structured code. -// It's the interpreter, so we are trading off code space for speed. -// See below for example code. -void InterpreterMacroAssembler::record_klass_in_profile_helper( - Register receiver, Register mdp, - Register reg2, int start_row, - Label& done) { - if (TypeProfileWidth == 0) { - increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); - return; - } - - int last_row = VirtualCallData::row_limit() - 1; - assert(start_row <= last_row, "must be work left to do"); - // Test this row for both the receiver and for null. - // Take any of three different outcomes: - // 1. found receiver => increment count and goto done - // 2. found null => keep looking for case 1, maybe allocate this cell - // 3. found something else => keep looking for cases 1 and 2 - // Case 3 is handled by a recursive call. - for (int row = start_row; row <= last_row; row++) { - NearLabel next_test; - bool test_for_null_also = (row == start_row); - - // See if the receiver is receiver[n]. - int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row)); - test_mdp_data_at(mdp, recvr_offset, receiver, - (test_for_null_also ? reg2 : noreg), - next_test); - // (Reg2 now contains the receiver from the CallData.) - - // The receiver is receiver[n]. Increment count[n]. - int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); - increment_mdp_data_at(mdp, count_offset); - z_bru(done); - bind(next_test); - - if (test_for_null_also) { - Label found_null; - // Failed the equality check on receiver[n]... Test for null. - z_ltgr(reg2, reg2); - if (start_row == last_row) { - // The only thing left to do is handle the null case. - z_brz(found_null); - // Receiver did not match any saved receiver and there is no empty row for it. - // Increment total counter to indicate polymorphic case. - increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); - z_bru(done); - bind(found_null); - break; - } - // Since null is rare, make it be the branch-taken case. - z_brz(found_null); - - // Put all the "Case 3" tests here. - record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done); - - // Found a null. Keep searching for a matching receiver, - // but remember that this is an empty (unused) slot. - bind(found_null); - } - } - - // In the fall-through case, we found no matching receiver, but we - // observed the receiver[start_row] is null. - - // Fill in the receiver field and increment the count. - int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row)); - set_mdp_data_at(mdp, recvr_offset, receiver); - int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row)); - load_const_optimized(reg2, DataLayout::counter_increment); - set_mdp_data_at(mdp, count_offset, reg2); - if (start_row > 0) { - z_bru(done); - } -} - -// Example state machine code for three profile rows: -// // main copy of decision tree, rooted at row[1] -// if (row[0].rec == rec) { row[0].incr(); goto done; } -// if (row[0].rec != nullptr) { -// // inner copy of decision tree, rooted at row[1] -// if (row[1].rec == rec) { row[1].incr(); goto done; } -// if (row[1].rec != nullptr) { -// // degenerate decision tree, rooted at row[2] -// if (row[2].rec == rec) { row[2].incr(); goto done; } -// if (row[2].rec != nullptr) { count.incr(); goto done; } // overflow -// row[2].init(rec); goto done; -// } else { -// // remember row[1] is empty -// if (row[2].rec == rec) { row[2].incr(); goto done; } -// row[1].init(rec); goto done; -// } -// } else { -// // remember row[0] is empty -// if (row[1].rec == rec) { row[1].incr(); goto done; } -// if (row[2].rec == rec) { row[2].incr(); goto done; } -// row[0].init(rec); goto done; -// } -// done: - -void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, - Register mdp, Register reg2) { - assert(ProfileInterpreter, "must be profiling"); - Label done; - - record_klass_in_profile_helper(receiver, mdp, reg2, 0, done); - - bind (done); -} - void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) { if (ProfileInterpreter) { NearLabel profile_continue; @@ -1462,7 +1343,7 @@ void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); // Record the object type. - record_klass_in_profile(klass, mdp, reg2); + profile_receiver_type(klass, mdp, 0, reg2); } update_mdp_by_constant(mdp, mdp_delta); diff --git a/src/hotspot/cpu/s390/interp_masm_s390.hpp b/src/hotspot/cpu/s390/interp_masm_s390.hpp index b816185b06550..a210588d06240 100644 --- a/src/hotspot/cpu/s390/interp_masm_s390.hpp +++ b/src/hotspot/cpu/s390/interp_masm_s390.hpp @@ -280,12 +280,6 @@ class InterpreterMacroAssembler: public MacroAssembler { Register test_value_out, Label& not_equal_continue); - void record_klass_in_profile(Register receiver, Register mdp, - Register reg2); - void record_klass_in_profile_helper(Register receiver, Register mdp, - Register reg2, int start_row, - Label& done); - void update_mdp_by_offset(Register mdp_in, int offset_of_offset); void update_mdp_by_offset(Register mdp_in, Register dataidx, int offset_of_disp); void update_mdp_by_constant(Register mdp_in, int constant); diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp index 4d2bbe796fb43..ea75d483e5f0f 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp @@ -39,6 +39,7 @@ #include "oops/compressedKlass.inline.hpp" #include "oops/compressedOops.inline.hpp" #include "oops/klass.inline.hpp" +#include "oops/methodData.hpp" #include "prims/methodHandles.hpp" #include "registerSaver_s390.hpp" #include "runtime/icache.hpp" @@ -6766,3 +6767,156 @@ void MacroAssembler::load_on_condition_imm_64(Register dst, int64_t i2, branch_c bind(done); } } + +// Handle the receiver type profile update given the "recv" klass. +// +// Normally updates the ReceiverData (RD) that starts at "mdp" + "mdp_offset". +// If there are no matching or claimable receiver entries in RD, updates +// the polymorphic counter. +// +// This code expected to run by either the interpreter or JIT-ed code, without +// extra synchronization. For safety, receiver cells are claimed atomically, which +// avoids grossly misrepresenting the profiles under concurrent updates. For speed, +// counter updates are not atomic. +// +void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_offset, Register scratch) { + Register r0_tmp = Z_R0_scratch; // cannot be used in address calculation + assert_different_registers(recv, mdp, scratch, r0_tmp); + + int base_receiver_offset = in_bytes(ReceiverTypeData::receiver_offset(0)); + int end_receiver_offset = in_bytes(ReceiverTypeData::receiver_offset(ReceiverTypeData::row_limit())); + int poly_count_offset = in_bytes(CounterData::count_offset()); + int receiver_step = in_bytes(ReceiverTypeData::receiver_offset(1)) - base_receiver_offset; + int receiver_to_count_step = in_bytes(ReceiverTypeData::receiver_count_offset(0)) - base_receiver_offset; + + // Adjust for MDP offsets. + base_receiver_offset += mdp_offset; + end_receiver_offset += mdp_offset; + poly_count_offset += mdp_offset; + +#ifdef ASSERT + // We are about to walk the MDO slots without asking for offsets. + // Check that our math hits all the right spots. + for (uint c = 0; c < ReceiverTypeData::row_limit(); c++) { + int real_recv_offset = mdp_offset + in_bytes(ReceiverTypeData::receiver_offset(c)); + int real_count_offset = mdp_offset + in_bytes(ReceiverTypeData::receiver_count_offset(c)); + int offset = base_receiver_offset + receiver_step*c; + int count_offset = offset + receiver_to_count_step; + assert(offset == real_recv_offset, "receiver slot math"); + assert(count_offset == real_count_offset, "receiver count math"); + } + int real_poly_count_offset = mdp_offset + in_bytes(CounterData::count_offset()); + assert(poly_count_offset == real_poly_count_offset, "poly counter math"); +#endif + + // Corner case: no profile table. Increment poly counter and exit. + if (ReceiverTypeData::row_limit() == 0) { + add2mem_64(Address(mdp, poly_count_offset), DataLayout::counter_increment, scratch); + return; + } + + NearLabel L_loop_search_receiver, L_loop_search_empty; + NearLabel L_restart, L_found_recv, L_found_empty, L_count_update; + Register offset = scratch; + + // The code here recognizes three major cases: + // A. Fastest: receiver found in the table + // B. Fast: no receiver in the table, and the table is full + // C. Slow: no receiver in the table, free slots in the table + // + // The case A performance is most important, as perfectly-behaved code would end up + // there, especially with larger TypeProfileWidth. The case B performance is + // important as well, this is where bulk of code would land for normally megamorphic + // cases. The case C performance is not essential, its job is to deal with installation + // races, we optimize for code density instead. Case C needs to make sure that receiver + // rows are only claimed once. This makes sure we never overwrite a row for another + // receiver and never duplicate the receivers in the list, making profile type-accurate. + // + // It is very tempting to handle these cases in a single loop, and claim the first slot + // without checking the rest of the table. But, profiling code should tolerate free slots + // in the table, as class unloading can clear them. After such cleanup, the receiver + // we need might be _after_ the free slot. Therefore, we need to let at least full scan + // to complete, before trying to install new slots. Splitting the code in several tight + // loops also helpfully optimizes for cases A and B. + // + // This code is effectively: + // + // restart: + // // Fastest: receiver is already installed + // for (i = 0; i < receiver_count(); i++) { + // if (receiver(i) == recv) goto found_recv(i); + // } + // + // // Fast: no receiver, but profile is not full + // for (i = 0; i < receiver_count(); i++) { + // if (receiver(i) == null) goto found_null(i); + // } + // goto polymorphic + // + // // Slow: try to install receiver + // found_null(i): + // CAS(&receiver(i), null, recv); + // goto restart + // + // polymorphic: + // count++; + // return + // + // found_recv(i): + // *receiver_count(i)++ + // + + bind(L_restart); + + // Fastest: receiver is already installed + load_const_optimized(offset, base_receiver_offset); + + bind(L_loop_search_receiver); + z_cg(recv, Address(mdp, offset)); + z_bre(L_found_recv); + add2reg(offset, receiver_step); + compare64_and_branch(offset, end_receiver_offset, bcondNotEqual, L_loop_search_receiver); + + // Fast: no receiver, but profile is not full + load_const_optimized(offset, base_receiver_offset); + + bind(L_loop_search_empty); + z_ltg(r0_tmp, Address(mdp, offset)); + z_brz(L_found_empty); + add2reg(offset, receiver_step); + compare64_and_branch(offset, end_receiver_offset, bcondNotEqual, L_loop_search_empty); + + // Slow: Receiver is not found and table is full. + // Increment polymorphic counter instead of receiver slot. + load_const_optimized(offset, poly_count_offset); + z_bru(L_count_update); + + // Slowest: try to install receiver + bind(L_found_empty); + + { + // Atomically swing receiver slot: null -> recv. + // Use compare-and-swap to claim the slot. + Register receiver_addr = offset; + z_agr(receiver_addr, mdp); // receiver_addr = mdp + offset + + // r0_tmp is used as expected value (0), recv is the new value + z_lghi(r0_tmp, 0); + z_csg(r0_tmp, recv, 0, receiver_addr); + } + + // CAS success means the slot now has the receiver we want. CAS failure means + // something had claimed the slot concurrently: it can be the same receiver we want, + // or something else. Since this is a slow path, we can optimize for code density, + // and just restart the search from the beginning. + z_bru(L_restart); + + // Found a receiver, convert its slot offset to corresponding count offset. + bind(L_found_recv); + add2reg(offset, receiver_to_count_step); + + // Finally, update the counter + bind(L_count_update); + z_agr(offset, mdp); + add2mem_64(Address(offset), DataLayout::counter_increment, r0_tmp); +} diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.hpp b/src/hotspot/cpu/s390/macroAssembler_s390.hpp index 34389917cef71..8e2834ba9b703 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.hpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.hpp @@ -1111,6 +1111,8 @@ class MacroAssembler: public Assembler { void load_on_condition_imm_32(Register dst, int64_t i2, branch_condition cc); void load_on_condition_imm_64(Register dst, int64_t i2, branch_condition cc); + + void profile_receiver_type(Register recv, Register mdp, int mdp_offset, Register tmp1); }; #ifdef ASSERT