summaryrefslogtreecommitdiffstats
path: root/runtime/check_jni.cc
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/check_jni.cc')
-rw-r--r--runtime/check_jni.cc30
1 files changed, 23 insertions, 7 deletions
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index e45d3a383..6ec0949ce 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -1095,6 +1095,8 @@ class ScopedCheck {
return true;
}
+ // Checks whether |bytes| is valid modified UTF-8. We also accept 4 byte UTF
+ // sequences in place of encoded surrogate pairs.
static uint8_t CheckUtfBytes(const char* bytes, const char** errorKind) {
while (*bytes != '\0') {
uint8_t utf8 = *(bytes++);
@@ -1114,14 +1116,26 @@ class ScopedCheck {
case 0x09:
case 0x0a:
case 0x0b:
- case 0x0f:
- /*
- * Bit pattern 10xx or 1111, which are illegal start bytes.
- * Note: 1111 is valid for normal UTF-8, but not the
- * Modified UTF-8 used here.
- */
+ // Bit patterns 10xx, which are illegal start bytes.
*errorKind = "start";
return utf8;
+ case 0x0f:
+ // Bit pattern 1111, which might be the start of a 4 byte sequence.
+ if ((utf8 & 0x08) == 0) {
+ // Bit pattern 1111 0xxx, which is the start of a 4 byte sequence.
+ // We consume one continuation byte here, and fall through to consume two more.
+ utf8 = *(bytes++);
+ if ((utf8 & 0xc0) != 0x80) {
+ *errorKind = "continuation";
+ return utf8;
+ }
+ } else {
+ *errorKind = "start";
+ return utf8;
+ }
+
+ // Fall through to the cases below to consume two more continuation bytes.
+ FALLTHROUGH_INTENDED;
case 0x0e:
// Bit pattern 1110, so there are two additional bytes.
utf8 = *(bytes++);
@@ -1129,7 +1143,9 @@ class ScopedCheck {
*errorKind = "continuation";
return utf8;
}
- FALLTHROUGH_INTENDED; // Fall-through to take care of the final byte.
+
+ // Fall through to consume one more continuation byte.
+ FALLTHROUGH_INTENDED;
case 0x0c:
case 0x0d:
// Bit pattern 110x, so there is one additional byte.