diff options
Diffstat (limited to 'runtime/check_jni.cc')
-rw-r--r-- | runtime/check_jni.cc | 30 |
1 files changed, 23 insertions, 7 deletions
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc index e45d3a383..6ec0949ce 100644 --- a/runtime/check_jni.cc +++ b/runtime/check_jni.cc @@ -1095,6 +1095,8 @@ class ScopedCheck { return true; } + // Checks whether |bytes| is valid modified UTF-8. We also accept 4 byte UTF + // sequences in place of encoded surrogate pairs. static uint8_t CheckUtfBytes(const char* bytes, const char** errorKind) { while (*bytes != '\0') { uint8_t utf8 = *(bytes++); @@ -1114,14 +1116,26 @@ class ScopedCheck { case 0x09: case 0x0a: case 0x0b: - case 0x0f: - /* - * Bit pattern 10xx or 1111, which are illegal start bytes. - * Note: 1111 is valid for normal UTF-8, but not the - * Modified UTF-8 used here. - */ + // Bit patterns 10xx, which are illegal start bytes. *errorKind = "start"; return utf8; + case 0x0f: + // Bit pattern 1111, which might be the start of a 4 byte sequence. + if ((utf8 & 0x08) == 0) { + // Bit pattern 1111 0xxx, which is the start of a 4 byte sequence. + // We consume one continuation byte here, and fall through to consume two more. + utf8 = *(bytes++); + if ((utf8 & 0xc0) != 0x80) { + *errorKind = "continuation"; + return utf8; + } + } else { + *errorKind = "start"; + return utf8; + } + + // Fall through to the cases below to consume two more continuation bytes. + FALLTHROUGH_INTENDED; case 0x0e: // Bit pattern 1110, so there are two additional bytes. utf8 = *(bytes++); @@ -1129,7 +1143,9 @@ class ScopedCheck { *errorKind = "continuation"; return utf8; } - FALLTHROUGH_INTENDED; // Fall-through to take care of the final byte. + + // Fall through to consume one more continuation byte. + FALLTHROUGH_INTENDED; case 0x0c: case 0x0d: // Bit pattern 110x, so there is one additional byte. |