/* Shared functions related to mangling names for the GNU compiler for the Java(TM) language. Copyright (C) 2001-2014 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . Java and all Java-based marks are trademarks or registered trademarks of Sun Microsystems, Inc. in the United States and other countries. The Free Software Foundation is independent of Sun Microsystems, Inc. */ /* Written by Alexandre Petit-Bianco */ #include "config.h" #include "system.h" #include "coretypes.h" #include "jcf.h" #include "tree.h" #include "java-tree.h" #include "obstack.h" #include "diagnostic-core.h" static void append_unicode_mangled_name (const char *, int); #ifndef HAVE_AS_UTF8 static int unicode_mangling_length (const char *, int); #endif extern struct obstack *mangle_obstack; static int utf8_cmp (const unsigned char *str, int length, const char *name) { const unsigned char *limit = str + length; int i; for (i = 0; name[i]; ++i) { int ch = UTF8_GET (str, limit); if (ch != name[i]) return ch - name[i]; } return str == limit ? 0 : 1; } /* A sorted list of all C++ keywords. If you change this, be sure also to change the list in libjava/classpath/tools/gnu/classpath/tools/javah/Keywords.java. */ static const char *const cxx_keywords[] = { "_Complex", "__alignof", "__alignof__", "__asm", "__asm__", "__attribute", "__attribute__", "__builtin_va_arg", "__complex", "__complex__", "__const", "__const__", "__extension__", "__imag", "__imag__", "__inline", "__inline__", "__label__", "__null", "__real", "__real__", "__restrict", "__restrict__", "__signed", "__signed__", "__typeof", "__typeof__", "__volatile", "__volatile__", "and", "and_eq", "asm", "auto", "bitand", "bitor", "bool", "break", "case", "catch", "char", "class", "compl", "const", "const_cast", "continue", "default", "delete", "do", "double", "dynamic_cast", "else", "enum", "explicit", "export", "extern", "false", "float", "for", "friend", "goto", "if", "inline", "int", "long", "mutable", "namespace", "new", "not", "not_eq", "operator", "or", "or_eq", "private", "protected", "public", "register", "reinterpret_cast", "return", "short", "signed", "sizeof", "static", "static_cast", "struct", "switch", "template", "this", "throw", "true", "try", "typedef", "typeid", "typename", "typeof", "union", "unsigned", "using", "virtual", "void", "volatile", "wchar_t", "while", "xor", "xor_eq" }; /* Return true if NAME is a C++ keyword. */ int cxx_keyword_p (const char *name, int length) { int last = ARRAY_SIZE (cxx_keywords); int first = 0; int mid = (last + first) / 2; int old = -1; for (mid = (last + first) / 2; mid != old; old = mid, mid = (last + first) / 2) { int kwl = strlen (cxx_keywords[mid]); int min_length = kwl > length ? length : kwl; int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]); if (r == 0) { int i; /* We've found a match if all the remaining characters are `$'. */ for (i = min_length; i < length && name[i] == '$'; ++i) ; if (i == length) return 1; r = 1; } if (r < 0) last = mid; else first = mid; } return 0; } /* If NAME happens to be a C++ keyword, add `$'. */ #define MANGLE_CXX_KEYWORDS(NAME, LEN) \ do \ { \ if (cxx_keyword_p ((NAME), (LEN))) \ { \ char *tmp_buf = (char *)alloca ((LEN)+1); \ memcpy (tmp_buf, (NAME), (LEN)); \ tmp_buf[LEN]= '$'; \ (NAME) = tmp_buf; \ (LEN)++; \ } \ } \ while (0) /* If the assembler doesn't support UTF8 in symbol names, some characters might need to be escaped. */ #ifndef HAVE_AS_UTF8 /* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string appropriately mangled (with Unicode escapes if needed) to MANGLE_OBSTACK. Note that `java', `lang' and `Object' are used so frequently that they could be cached. */ void append_gpp_mangled_name (const char *name, int len) { int encoded_len, needs_escapes; char buf[6]; MANGLE_CXX_KEYWORDS (name, len); encoded_len = unicode_mangling_length (name, len); needs_escapes = encoded_len > 0; sprintf (buf, "%d", (needs_escapes ? encoded_len : len)); obstack_grow (mangle_obstack, buf, strlen (buf)); if (needs_escapes) append_unicode_mangled_name (name, len); else obstack_grow (mangle_obstack, name, len); } /* Assuming (NAME, LEN) is a Utf8-encoded string, emit the string appropriately mangled (with Unicode escapes) to MANGLE_OBSTACK. Characters needing an escape are encoded `__UNN_' to `__UNNNN_', in which case `__U' will be mangled `__U_'. */ static void append_unicode_mangled_name (const char *name, int len) { const unsigned char *ptr; const unsigned char *limit = (const unsigned char *)name + len; int uuU = 0; for (ptr = (const unsigned char *) name; ptr < limit; ) { int ch = UTF8_GET(ptr, limit); if ((ISALNUM (ch) && ch != 'U') || ch == '$') { obstack_1grow (mangle_obstack, ch); uuU = 0; } /* Everything else needs encoding */ else { char buf [9]; if (ch == '_' || ch == 'U') { /* Prepare to recognize __U */ if (ch == '_' && (uuU < 3)) { uuU++; obstack_1grow (mangle_obstack, ch); } /* We recognize __U that we wish to encode __U_. Finish the encoding. */ else if (ch == 'U' && (uuU == 2)) { uuU = 0; obstack_grow (mangle_obstack, "U_", 2); } /* Otherwise, just reset uuU and emit the character we have. */ else { uuU = 0; obstack_1grow (mangle_obstack, ch); } continue; } sprintf (buf, "__U%x_", ch); obstack_grow (mangle_obstack, buf, strlen (buf)); uuU = 0; } } } /* Assuming (NAME, LEN) is a Utf8-encoding string, calculate the length of the string as mangled (a la g++) including Unicode escapes. If no escapes are needed, return 0. */ static int unicode_mangling_length (const char *name, int len) { const unsigned char *ptr; const unsigned char *limit = (const unsigned char *)name + len; int need_escapes = 0; /* Whether we need an escape or not */ int num_chars = 0; /* Number of characters in the mangled name */ int uuU = 0; /* Help us to find __U. 0: '_', 1: '__' */ for (ptr = (const unsigned char *) name; ptr < limit; ) { int ch = UTF8_GET(ptr, limit); if (ch < 0) error ("internal error - invalid Utf8 name"); if ((ISALNUM (ch) && ch != 'U') || ch == '$') { num_chars++; uuU = 0; } /* Everything else needs encoding */ else { int encoding_length = 2; if (ch == '_' || ch == 'U') { /* It's always at least one character. */ num_chars++; /* Prepare to recognize __U */ if (ch == '_' && (uuU < 3)) uuU++; /* We recognize __U that we wish to encode __U_, we count one more character. */ else if (ch == 'U' && (uuU == 2)) { num_chars++; need_escapes = 1; uuU = 0; } /* Otherwise, just reset uuU */ else uuU = 0; continue; } if (ch > 0xff) encoding_length++; if (ch > 0xfff) encoding_length++; num_chars += (4 + encoding_length); need_escapes = 1; uuU = 0; } } if (need_escapes) return num_chars; else return 0; } #else /* The assembler supports UTF8, we don't use escapes. Mangling is simply NAME. is the number of UTF8 encoded characters that are found in NAME. Note that `java', `lang' and `Object' are used so frequently that they could be cached. */ void append_gpp_mangled_name (const char *name, int len) { const unsigned char *ptr; const unsigned char *limit; int encoded_len; char buf [6]; MANGLE_CXX_KEYWORDS (name, len); limit = (const unsigned char *)name + len; /* Compute the length of the string we wish to mangle. */ for (encoded_len = 0, ptr = (const unsigned char *) name; ptr < limit; encoded_len++) { int ch = UTF8_GET(ptr, limit); if (ch < 0) error ("internal error - invalid Utf8 name"); } sprintf (buf, "%d", encoded_len); obstack_grow (mangle_obstack, buf, strlen (buf)); obstack_grow (mangle_obstack, name, len); } #endif /* HAVE_AS_UTF8 */