1 /*
  2  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 
 26 #include "precompiled.hpp"
 27 #include "classfile/altHashing.hpp"
 28 #include "classfile/classLoaderData.hpp"
 29 #include "gc/shared/collectedHeap.hpp"
 30 #include "logging/log.hpp"
 31 #include "logging/logStream.hpp"
 32 #include "memory/allocation.inline.hpp"
 33 #include "memory/metaspaceShared.hpp"
 34 #include "memory/resourceArea.hpp"
 35 #include "memory/universe.hpp"
 36 #include "oops/symbol.hpp"
 37 #include "runtime/atomic.hpp"
 38 #include "runtime/os.hpp"
 39 #include "runtime/signature.hpp"
 40 #include "utilities/utf8.hpp"
 41 
 42 uint32_t Symbol::pack_hash_and_refcount(short hash, int refcount) {
 43   STATIC_ASSERT(PERM_REFCOUNT == ((1 << 16) - 1));
 44   assert(refcount >= 0, "negative refcount");
 45   assert(refcount <= PERM_REFCOUNT, "invalid refcount");
 46   uint32_t hi = hash;
 47   uint32_t lo = refcount;
 48   return (hi << 16) | lo;
 49 }
 50 
 51 Symbol::Symbol(const u1* name, int length, int refcount) {
 52   _hash_and_refcount =  pack_hash_and_refcount((short)os::random(), refcount);
 53   _length = length;
 54   _body[0] = 0;  // in case length == 0
 55   for (int i = 0; i < length; i++) {
 56     byte_at_put(i, name[i]);
 57   }
 58 }
 59 
 60 void* Symbol::operator new(size_t sz, int len) throw() {
 61 #if INCLUDE_CDS
 62  if (DumpSharedSpaces) {
 63     // To get deterministic output from -Xshare:dump, we ensure that Symbols are allocated in
 64     // increasing addresses. When the symbols are copied into the archive, we preserve their
 65     // relative address order (see SortedSymbolClosure in metaspaceShared.cpp)
 66     //
 67     // We cannot use arena because arena chunks are allocated by the OS. As a result, for example,
 68     // the archived symbol of "java/lang/Object" may sometimes be lower than "java/lang/String", and
 69     // sometimes be higher. This would cause non-deterministic contents in the archive.
 70    DEBUG_ONLY(static void* last = 0);
 71    void* p = (void*)MetaspaceShared::symbol_space_alloc(size(len)*wordSize);
 72    assert(p > last, "must increase monotonically");
 73    DEBUG_ONLY(last = p);
 74    return p;
 75  }
 76 #endif
 77   int alloc_size = size(len)*wordSize;
 78   address res = (address) AllocateHeap(alloc_size, mtSymbol);
 79   return res;
 80 }
 81 
 82 void* Symbol::operator new(size_t sz, int len, Arena* arena) throw() {
 83   int alloc_size = size(len)*wordSize;
 84   address res = (address)arena->Amalloc_4(alloc_size);
 85   return res;
 86 }
 87 
 88 void Symbol::operator delete(void *p) {
 89   assert(((Symbol*)p)->refcount() == 0, "should not call this");
 90   FreeHeap(p);
 91 }
 92 
 93 #if INCLUDE_CDS
 94 void Symbol::update_identity_hash() {
 95   // This is called at a safepoint during dumping of a static CDS archive. The caller should have
 96   // called os::init_random() with a deterministic seed and then iterate all archived Symbols in
 97   // a deterministic order.
 98   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
 99   _hash_and_refcount =  pack_hash_and_refcount((short)os::random(), PERM_REFCOUNT);
100 }
101 
102 void Symbol::set_permanent() {
103   // This is called at a safepoint during dumping of a dynamic CDS archive.
104   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
105   _hash_and_refcount =  pack_hash_and_refcount(extract_hash(_hash_and_refcount), PERM_REFCOUNT);
106 }
107 #endif
108 
109 bool Symbol::is_Q_signature() const {
110   int len = utf8_length();
111   return len > 2 && char_at(0) == JVM_SIGNATURE_INLINE_TYPE && char_at(len - 1) == JVM_SIGNATURE_ENDCLASS;
112 }
113 
114 bool Symbol::is_Q_array_signature() const {
115   int l = utf8_length();
116   if (l < 2 || char_at(0) != JVM_SIGNATURE_ARRAY || char_at(l - 1) != JVM_SIGNATURE_ENDCLASS) {
117     return false;
118   }
119   for (int i = 1; i < (l - 2); i++) {
120     char c = char_at(i);
121     if (c == JVM_SIGNATURE_INLINE_TYPE) {
122       return true;
123     }
124     if (c != JVM_SIGNATURE_ARRAY) {
125       return false;
126     }
127   }
128   return false;
129 }
130 
131 bool Symbol::is_Q_method_signature() const {
132   assert(SignatureVerifier::is_valid_method_signature(this), "must be");
133   int len = utf8_length();
134   if (len > 4 && char_at(0) == JVM_SIGNATURE_FUNC) {
135     for (int i=1; i<len-3; i++) { // Must end with ")Qx;", where x is at least one character or more.
136       if (char_at(i) == JVM_SIGNATURE_ENDFUNC && char_at(i+1) == JVM_SIGNATURE_INLINE_TYPE) {
137         return true;
138       }
139     }
140   }
141   return false;
142 }
143 
144 Symbol* Symbol::fundamental_name(TRAPS) {
145   if ((char_at(0) == JVM_SIGNATURE_INLINE_TYPE || char_at(0) == JVM_SIGNATURE_CLASS) && ends_with(JVM_SIGNATURE_ENDCLASS)) {
146     return SymbolTable::new_symbol(this, 1, utf8_length() - 1);
147   } else {
148     // reference count is incremented to be consistent with the behavior with
149     // the SymbolTable::new_symbol() call above
150     this->increment_refcount();
151     return this;
152   }
153 }
154 
155 bool Symbol::is_same_fundamental_type(Symbol* s) const {
156   if (this == s) return true;
157   if (utf8_length() < 3) return false;
158   int offset1, offset2, len;
159   if (ends_with(JVM_SIGNATURE_ENDCLASS)) {
160     if (char_at(0) != JVM_SIGNATURE_INLINE_TYPE && char_at(0) != JVM_SIGNATURE_CLASS) return false;
161     offset1 = 1;
162     len = utf8_length() - 2;
163   } else {
164     offset1 = 0;
165     len = utf8_length();
166   }
167   if (ends_with(JVM_SIGNATURE_ENDCLASS)) {
168     if (s->char_at(0) != JVM_SIGNATURE_INLINE_TYPE && s->char_at(0) != JVM_SIGNATURE_CLASS) return false;
169     offset2 = 1;
170   } else {
171     offset2 = 0;
172   }
173   if ((offset2 + len) > s->utf8_length()) return false;
174   if ((utf8_length() - offset1 * 2) != (s->utf8_length() - offset2 * 2))
175     return false;
176   int l = len;
177   while (l-- > 0) {
178     if (char_at(offset1 + l) != s->char_at(offset2 + l))
179       return false;
180   }
181   return true;
182 }
183 
184 // ------------------------------------------------------------------
185 // Symbol::index_of
186 //
187 // Finds if the given string is a substring of this symbol's utf8 bytes.
188 // Return -1 on failure.  Otherwise return the first index where str occurs.
189 int Symbol::index_of_at(int i, const char* str, int len) const {
190   assert(i >= 0 && i <= utf8_length(), "oob");
191   if (len <= 0)  return 0;
192   char first_char = str[0];
193   address bytes = (address) ((Symbol*)this)->base();
194   address limit = bytes + utf8_length() - len;  // inclusive limit
195   address scan = bytes + i;
196   if (scan > limit)
197     return -1;
198   for (; scan <= limit; scan++) {
199     scan = (address) memchr(scan, first_char, (limit + 1 - scan));
200     if (scan == NULL)
201       return -1;  // not found
202     assert(scan >= bytes+i && scan <= limit, "scan oob");
203     if (len <= 2
204         ? (char) scan[len-1] == str[len-1]
205         : memcmp(scan+1, str+1, len-1) == 0) {
206       return (int)(scan - bytes);
207     }
208   }
209   return -1;
210 }
211 
212 
213 char* Symbol::as_C_string(char* buf, int size) const {
214   if (size > 0) {
215     int len = MIN2(size - 1, utf8_length());
216     for (int i = 0; i < len; i++) {
217       buf[i] = char_at(i);
218     }
219     buf[len] = '\0';
220   }
221   return buf;
222 }
223 
224 char* Symbol::as_C_string() const {
225   int len = utf8_length();
226   char* str = NEW_RESOURCE_ARRAY(char, len + 1);
227   return as_C_string(str, len + 1);
228 }
229 
230 void Symbol::print_utf8_on(outputStream* st) const {
231   st->print("%s", as_C_string());
232 }
233 
234 void Symbol::print_symbol_on(outputStream* st) const {
235   char *s;
236   st = st ? st : tty;
237   {
238     // ResourceMark may not affect st->print(). If st is a string
239     // stream it could resize, using the same resource arena.
240     ResourceMark rm;
241     s = as_quoted_ascii();
242     s = os::strdup(s);
243   }
244   if (s == NULL) {
245     st->print("(null)");
246   } else {
247     st->print("%s", s);
248     os::free(s);
249   }
250 }
251 
252 char* Symbol::as_quoted_ascii() const {
253   const char *ptr = (const char *)&_body[0];
254   int quoted_length = UTF8::quoted_ascii_length(ptr, utf8_length());
255   char* result = NEW_RESOURCE_ARRAY(char, quoted_length + 1);
256   UTF8::as_quoted_ascii(ptr, utf8_length(), result, quoted_length + 1);
257   return result;
258 }
259 
260 jchar* Symbol::as_unicode(int& length) const {
261   Symbol* this_ptr = (Symbol*)this;
262   length = UTF8::unicode_length((char*)this_ptr->bytes(), utf8_length());
263   jchar* result = NEW_RESOURCE_ARRAY(jchar, length);
264   if (length > 0) {
265     UTF8::convert_to_unicode((char*)this_ptr->bytes(), result, length);
266   }
267   return result;
268 }
269 
270 const char* Symbol::as_klass_external_name(char* buf, int size) const {
271   if (size > 0) {
272     char* str    = as_C_string(buf, size);
273     int   length = (int)strlen(str);
274     // Turn all '/'s into '.'s (also for array klasses)
275     for (int index = 0; index < length; index++) {
276       if (str[index] == JVM_SIGNATURE_SLASH) {
277         str[index] = JVM_SIGNATURE_DOT;
278       }
279     }
280     return str;
281   } else {
282     return buf;
283   }
284 }
285 
286 const char* Symbol::as_klass_external_name() const {
287   char* str    = as_C_string();
288   int   length = (int)strlen(str);
289   // Turn all '/'s into '.'s (also for array klasses)
290   for (int index = 0; index < length; index++) {
291     if (str[index] == JVM_SIGNATURE_SLASH) {
292       str[index] = JVM_SIGNATURE_DOT;
293     }
294   }
295   return str;
296 }
297 
298 static void print_class(outputStream *os, const SignatureStream& ss) {
299   int sb = ss.raw_symbol_begin(), se = ss.raw_symbol_end();
300   for (int i = sb; i < se; ++i) {
301     int ch = ss.raw_char_at(i);
302     if (ch == JVM_SIGNATURE_SLASH) {
303       os->put(JVM_SIGNATURE_DOT);
304     } else {
305       os->put(ch);
306     }
307   }
308 }
309 
310 static void print_array(outputStream *os, SignatureStream& ss) {
311   int dimensions = ss.skip_array_prefix();
312   assert(dimensions > 0, "");
313   if (ss.is_reference()) {
314     print_class(os, ss);
315   } else {
316     os->print("%s", type2name(ss.type()));
317   }
318   for (int i = 0; i < dimensions; ++i) {
319     os->print("[]");
320   }
321 }
322 
323 void Symbol::print_as_signature_external_return_type(outputStream *os) {
324   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
325     if (ss.at_return_type()) {
326       if (ss.is_array()) {
327         print_array(os, ss);
328       } else if (ss.is_reference()) {
329         print_class(os, ss);
330       } else {
331         os->print("%s", type2name(ss.type()));
332       }
333     }
334   }
335 }
336 
337 void Symbol::print_as_signature_external_parameters(outputStream *os) {
338   bool first = true;
339   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
340     if (ss.at_return_type()) break;
341     if (!first) { os->print(", "); }
342     if (ss.is_array()) {
343       print_array(os, ss);
344     } else if (ss.is_reference()) {
345       print_class(os, ss);
346     } else {
347       os->print("%s", type2name(ss.type()));
348     }
349     first = false;
350   }
351 }
352 
353 // Increment refcount while checking for zero.  If the Symbol's refcount becomes zero
354 // a thread could be concurrently removing the Symbol.  This is used during SymbolTable
355 // lookup to avoid reviving a dead Symbol.
356 bool Symbol::try_increment_refcount() {
357   uint32_t found = _hash_and_refcount;
358   while (true) {
359     uint32_t old_value = found;
360     int refc = extract_refcount(old_value);
361     if (refc == PERM_REFCOUNT) {
362       return true;  // sticky max or created permanent
363     } else if (refc == 0) {
364       return false; // dead, can't revive.
365     } else {
366       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, old_value + 1);
367       if (found == old_value) {
368         return true; // successfully updated.
369       }
370       // refcount changed, try again.
371     }
372   }
373 }
374 
375 // The increment_refcount() is called when not doing lookup. It is assumed that you
376 // have a symbol with a non-zero refcount and it can't become zero while referenced by
377 // this caller.
378 void Symbol::increment_refcount() {
379   if (!try_increment_refcount()) {
380 #ifdef ASSERT
381     print();
382     fatal("refcount has gone to zero");
383 #endif
384   }
385 #ifndef PRODUCT
386   if (refcount() != PERM_REFCOUNT) { // not a permanent symbol
387     NOT_PRODUCT(Atomic::inc(&_total_count);)
388   }
389 #endif
390 }
391 
392 // Decrement refcount potentially while racing increment, so we need
393 // to check the value after attempting to decrement so that if another
394 // thread increments to PERM_REFCOUNT the value is not decremented.
395 void Symbol::decrement_refcount() {
396   uint32_t found = _hash_and_refcount;
397   while (true) {
398     uint32_t old_value = found;
399     int refc = extract_refcount(old_value);
400     if (refc == PERM_REFCOUNT) {
401       return;  // refcount is permanent, permanent is sticky
402     } else if (refc == 0) {
403 #ifdef ASSERT
404       print();
405       fatal("refcount underflow");
406 #endif
407       return;
408     } else {
409       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, old_value - 1);
410       if (found == old_value) {
411         return;  // successfully updated.
412       }
413       // refcount changed, try again.
414     }
415   }
416 }
417 
418 void Symbol::make_permanent() {
419   uint32_t found = _hash_and_refcount;
420   while (true) {
421     uint32_t old_value = found;
422     int refc = extract_refcount(old_value);
423     if (refc == PERM_REFCOUNT) {
424       return;  // refcount is permanent, permanent is sticky
425     } else if (refc == 0) {
426 #ifdef ASSERT
427       print();
428       fatal("refcount underflow");
429 #endif
430       return;
431     } else {
432       int hash = extract_hash(old_value);
433       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, pack_hash_and_refcount(hash, PERM_REFCOUNT));
434       if (found == old_value) {
435         return;  // successfully updated.
436       }
437       // refcount changed, try again.
438     }
439   }
440 }
441 
442 void Symbol::metaspace_pointers_do(MetaspaceClosure* it) {
443   if (log_is_enabled(Trace, cds)) {
444     LogStream trace_stream(Log(cds)::trace());
445     trace_stream.print("Iter(Symbol): %p ", this);
446     print_value_on(&trace_stream);
447     trace_stream.cr();
448   }
449 }
450 
451 void Symbol::print_on(outputStream* st) const {
452   st->print("Symbol: '");
453   print_symbol_on(st);
454   st->print("'");
455   st->print(" count %d", refcount());
456 }
457 
458 void Symbol::print() const { print_on(tty); }
459 
460 // The print_value functions are present in all builds, to support the
461 // disassembler and error reporting.
462 void Symbol::print_value_on(outputStream* st) const {
463   st->print("'");
464   for (int i = 0; i < utf8_length(); i++) {
465     st->print("%c", char_at(i));
466   }
467   st->print("'");
468 }
469 
470 void Symbol::print_value() const { print_value_on(tty); }
471 
472 bool Symbol::is_valid(Symbol* s) {
473   if (!is_aligned(s, sizeof(MetaWord))) return false;
474   if ((size_t)s < os::min_page_size()) return false;
475 
476   if (!os::is_readable_range(s, s + 1)) return false;
477 
478   // Symbols are not allocated in Java heap.
479   if (Universe::heap()->is_in(s)) return false;
480 
481   int len = s->utf8_length();
482   if (len < 0) return false;
483 
484   jbyte* bytes = (jbyte*) s->bytes();
485   return os::is_readable_range(bytes, bytes + len);
486 }
487 
488 void Symbol::print_Qvalue_on(outputStream* st) const {
489   if (this == NULL) {
490     st->print("NULL");
491   } else {
492     st->print("'Q");
493     for (int i = 0; i < utf8_length(); i++) {
494       st->print("%c", char_at(i));
495     }
496     st->print(";'");
497   }
498 }
499 
500 // SymbolTable prints this in its statistics
501 NOT_PRODUCT(size_t Symbol::_total_count = 0;)