summaryrefslogtreecommitdiffstats
path: root/runtime/jni_internal_test.cc
diff options
context:
space:
mode:
authorNarayan Kamath <narayan@google.com>2015-02-13 11:49:22 +0000
committerNarayan Kamath <narayan@google.com>2015-03-25 08:46:51 +0000
commite16dad1d6388b0305f13e2171308a77f42e7c682 (patch)
treea56d002ba622ecce7f1310e8fac56adca6547d5a /runtime/jni_internal_test.cc
parentebfd062af849a915bd75eebd81c6fdea15b1d8d5 (diff)
downloadandroid_art-e16dad1d6388b0305f13e2171308a77f42e7c682.tar.gz
android_art-e16dad1d6388b0305f13e2171308a77f42e7c682.tar.bz2
android_art-e16dad1d6388b0305f13e2171308a77f42e7c682.zip
Emit 4 byte UTF-sequences in place of encoded surrogate pairs.
Symmetric with a5afcfc73141e5e378d79a326d0 which converts 4 byte UTF-8 sequences to surrogate pairs. bug: 18848397 Change-Id: I42adc275b7e0df0cbbd9d8a799e8b0447d8f5cae
Diffstat (limited to 'runtime/jni_internal_test.cc')
-rw-r--r--runtime/jni_internal_test.cc36
1 files changed, 25 insertions, 11 deletions
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 10482142da..5516eab4f8 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -1355,24 +1355,38 @@ TEST_F(JniInternalTest, NewStringUTF) {
s = env_->NewStringUTF("\xed\xa0\x81\xed\xb0\x80");
EXPECT_NE(s, nullptr);
EXPECT_EQ(2, env_->GetStringLength(s));
- // Note that this uses 2 x 3 byte UTF sequences, one
- // for each half of the surrogate pair.
- EXPECT_EQ(6, env_->GetStringUTFLength(s));
+
+ // The surrogate pair gets encoded into a 4 byte UTF sequence..
+ EXPECT_EQ(4, env_->GetStringUTFLength(s));
const char* chars = env_->GetStringUTFChars(s, nullptr);
- EXPECT_STREQ("\xed\xa0\x81\xed\xb0\x80", chars);
+ EXPECT_STREQ("\xf0\x90\x90\x80", chars);
env_->ReleaseStringUTFChars(s, chars);
+ // .. but is stored as is in the utf-16 representation.
+ const jchar* jchars = env_->GetStringChars(s, nullptr);
+ EXPECT_EQ(0xd801, jchars[0]);
+ EXPECT_EQ(0xdc00, jchars[1]);
+ env_->ReleaseStringChars(s, jchars);
+
// 4 byte UTF sequence appended to an encoded surrogate pair.
s = env_->NewStringUTF("\xed\xa0\x81\xed\xb0\x80 \xf0\x9f\x8f\xa0");
EXPECT_NE(s, nullptr);
+
+ // The 4 byte sequence {0xf0, 0x9f, 0x8f, 0xa0} is converted into a surrogate
+ // pair {0xd83c, 0xdfe0}.
EXPECT_EQ(5, env_->GetStringLength(s));
- EXPECT_EQ(13, env_->GetStringUTFLength(s));
+ jchars = env_->GetStringChars(s, nullptr);
+ // The first surrogate pair, encoded as such in the input.
+ EXPECT_EQ(0xd801, jchars[0]);
+ EXPECT_EQ(0xdc00, jchars[1]);
+ // The second surrogate pair, from the 4 byte UTF sequence in the input.
+ EXPECT_EQ(0xd83c, jchars[3]);
+ EXPECT_EQ(0xdfe0, jchars[4]);
+ env_->ReleaseStringChars(s, jchars);
+
+ EXPECT_EQ(9, env_->GetStringUTFLength(s));
chars = env_->GetStringUTFChars(s, nullptr);
- // The 4 byte sequence {0xf0, 0x9f, 0x8f, 0xa0} is converted into a surrogate
- // pair {0xd83c, 0xdfe0} which is then converted into a two three byte
- // sequences {0xed 0xa0, 0xbc} and {0xed, 0xbf, 0xa0}, one for each half of
- // the surrogate pair.
- EXPECT_STREQ("\xed\xa0\x81\xed\xb0\x80 \xed\xa0\xbc\xed\xbf\xa0", chars);
+ EXPECT_STREQ("\xf0\x90\x90\x80 \xf0\x9f\x8f\xa0", chars);
env_->ReleaseStringUTFChars(s, chars);
// A string with 1, 2, 3 and 4 byte UTF sequences with spaces
@@ -1380,7 +1394,7 @@ TEST_F(JniInternalTest, NewStringUTF) {
s = env_->NewStringUTF("\x24 \xc2\xa2 \xe2\x82\xac \xf0\x9f\x8f\xa0");
EXPECT_NE(s, nullptr);
EXPECT_EQ(8, env_->GetStringLength(s));
- EXPECT_EQ(15, env_->GetStringUTFLength(s));
+ EXPECT_EQ(13, env_->GetStringUTFLength(s));
}
TEST_F(JniInternalTest, NewString) {