diff options
Diffstat (limited to 'runtime/utf_test.cc')
-rw-r--r-- | runtime/utf_test.cc | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/runtime/utf_test.cc b/runtime/utf_test.cc index 8048bbdbe0..94a6ea57e2 100644 --- a/runtime/utf_test.cc +++ b/runtime/utf_test.cc @@ -19,6 +19,8 @@ #include "common_runtime_test.h" #include "utf-inl.h" +#include <vector> + namespace art { class UtfTest : public CommonRuntimeTest {}; @@ -110,4 +112,52 @@ TEST_F(UtfTest, CountModifiedUtf8Chars) { EXPECT_EQ(2u, CountModifiedUtf8Chars(reinterpret_cast<const char *>(kSurrogateEncoding))); } +static void AssertConversion(const std::vector<uint16_t> input, + const std::vector<uint8_t> expected) { + ASSERT_EQ(expected.size(), CountUtf8Bytes(&input[0], input.size())); + + std::vector<uint8_t> output(expected.size()); + ConvertUtf16ToModifiedUtf8(reinterpret_cast<char*>(&output[0]), &input[0], input.size()); + EXPECT_EQ(expected, output); +} + +TEST_F(UtfTest, CountAndConvertUtf8Bytes) { + // Surrogate pairs will be converted into 4 byte sequences. + AssertConversion({ 0xd801, 0xdc00 }, { 0xf0, 0x90, 0x90, 0x80 }); + + // Three byte encodings that are below & above the leading surrogate + // range respectively. + AssertConversion({ 0xdef0 }, { 0xed, 0xbb, 0xb0 }); + AssertConversion({ 0xdcff }, { 0xed, 0xb3, 0xbf }); + // Two byte encoding. + AssertConversion({ 0x0101 }, { 0xc4, 0x81 }); + + // Two byte special case : 0 must use an overlong encoding. + AssertConversion({ 0x0101, 0x0000 }, { 0xc4, 0x81, 0xc0, 0x80 }); + + // One byte encoding. + AssertConversion({ 'h', 'e', 'l', 'l', 'o' }, { 0x68, 0x65, 0x6c, 0x6c, 0x6f }); + + AssertConversion({ + 0xd802, 0xdc02, // Surrogate pair + 0xdef0, 0xdcff, // Three byte encodings + 0x0101, 0x0000, // Two byte encodings + 'p' , 'p' // One byte encoding + }, { + 0xf0, 0x90, 0xa0, 0x82, + 0xed, 0xbb, 0xb0, 0xed, 0xb3, 0xbf, + 0xc4, 0x81, 0xc0, 0x80, + 0x70, 0x70 + }); +} + +TEST_F(UtfTest, CountAndConvertUtf8Bytes_UnpairedSurrogate) { + // Unpaired trailing surrogate at the end of input. + AssertConversion({ 'h', 'e', 0xd801 }, { 'h', 'e', 0xed, 0xa0, 0x81 }); + // Unpaired (or incorrectly paired) surrogates in the middle of the input. + AssertConversion({ 'h', 0xd801, 'e' }, { 'h', 0xed, 0xa0, 0x81, 'e' }); + AssertConversion({ 'h', 0xd801, 0xd801, 'e' }, { 'h', 0xed, 0xa0, 0x81, 0xed, 0xa0, 0x81, 'e' }); + AssertConversion({ 'h', 0xdc00, 0xdc00, 'e' }, { 'h', 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 'e' }); +} + } // namespace art |