diff options
-rw-r--r-- | OWNERS | 7 | ||||
-rw-r--r-- | android_icu4j/src/main/java/android/icu/text/Bidi.java | 141 | ||||
-rw-r--r-- | android_icu4j/src/main/tests/android/icu/dev/test/bidi/TestBidi.java | 20 | ||||
-rw-r--r-- | android_icu4j/src/main/tests/android/icu/dev/test/format/ListFormatterTest.java | 34 | ||||
-rw-r--r-- | icu4c/source/Android.bp | 8 | ||||
-rw-r--r-- | icu4c/source/common/Android.bp | 13 | ||||
-rw-r--r-- | icu4c/source/common/listformatter.cpp | 117 | ||||
-rw-r--r-- | icu4c/source/common/unicode/listformatter.h | 2 | ||||
-rw-r--r-- | icu4c/source/i18n/Android.bp | 13 | ||||
-rw-r--r-- | icu4c/source/test/intltest/listformattertest.cpp | 45 | ||||
-rw-r--r-- | icu4c/source/test/intltest/listformattertest.h | 3 | ||||
-rw-r--r-- | icu4c/source/test/intltest/measfmttest.cpp | 2 | ||||
-rw-r--r-- | icu4j/main/classes/core/src/com/ibm/icu/text/Bidi.java | 141 | ||||
-rw-r--r-- | icu4j/main/tests/core/src/com/ibm/icu/dev/test/bidi/TestBidi.java | 20 | ||||
-rw-r--r-- | icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ListFormatterTest.java | 34 |
15 files changed, 418 insertions, 182 deletions
@@ -0,0 +1,7 @@ +icu-team+reviews@google.com +mscherer@google.com +roubert@google.com + +android-libcore-team+review@google.com +jsauer@google.com +nfuller@google.com diff --git a/android_icu4j/src/main/java/android/icu/text/Bidi.java b/android_icu4j/src/main/java/android/icu/text/Bidi.java index b1090ae69..2234e98b4 100644 --- a/android_icu4j/src/main/java/android/icu/text/Bidi.java +++ b/android_icu4j/src/main/java/android/icu/text/Bidi.java @@ -2639,28 +2639,29 @@ public class Bidi { return dirct; } - /* + /** * Use a pre-specified embedding levels array: * - * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), + * <p>Adjust the directional properties for overrides (->LEVEL_OVERRIDE), * ignore all explicit codes (X9), * and check all the preset levels. * - * Recalculate the flags to have them reflect the real properties + * <p>Recalculate the flags to have them reflect the real properties * after taking the explicit embeddings into account. */ private byte checkExplicitLevels() { - byte dirProp; - int i; int isolateCount = 0; this.flags = 0; /* collect all directionalities in the text */ - byte level; this.isolateCount = 0; - for (i = 0; i < length; ++i) { - level = levels[i]; - dirProp = dirProps[i]; + int currentParaIndex = 0; + int currentParaLimit = paras_limit[0]; + byte currentParaLevel = paraLevel; + + for (int i = 0; i < length; ++i) { + byte level = levels[i]; + byte dirProp = dirProps[i]; if (dirProp == LRI || dirProp == RLI) { isolateCount++; if (isolateCount > this.isolateCount) @@ -2670,21 +2671,40 @@ public class Bidi { isolateCount--; else if (dirProp == B) isolateCount = 0; - if ((level & LEVEL_OVERRIDE) != 0) { + + // optimized version of byte currentParaLevel = GetParaLevelAt(i); + if (defaultParaLevel != 0 && + i == currentParaLimit && (currentParaIndex + 1) < paraCount) { + currentParaLevel = paras_level[++currentParaIndex]; + currentParaLimit = paras_limit[currentParaIndex]; + } + + int overrideFlag = level & LEVEL_OVERRIDE; + level &= ~LEVEL_OVERRIDE; + if (level < currentParaLevel || MAX_EXPLICIT_LEVEL < level) { + if (level == 0) { + if (dirProp == B) { + // Paragraph separators are ok with explicit level 0. + // Prevents reordering of paragraphs. + } else { + // Treat explicit level 0 as a wildcard for the paragraph level. + // Avoid making the caller guess what the paragraph level would be. + level = currentParaLevel; + levels[i] = (byte)(level | overrideFlag); + } + } else { + // 1 <= level < currentParaLevel or MAX_EXPLICIT_LEVEL < level + throw new IllegalArgumentException("level " + level + + " out of bounds at " + i); + } + } + if (overrideFlag != 0) { /* keep the override flag in levels[i] but adjust the flags */ - level &= ~LEVEL_OVERRIDE; /* make the range check below simpler */ flags |= DirPropFlagO(level); } else { /* set the flags */ flags |= DirPropFlagE(level) | DirPropFlag(dirProp); } - if ((level < GetParaLevelAt(i) && - !((0 == level) && (dirProp == B))) || - (MAX_EXPLICIT_LEVEL < level)) { - /* level out of bounds */ - throw new IllegalArgumentException("level " + level + - " out of bounds at " + i); - } } if ((flags & MASK_EMBEDDING) != 0) flags |= DirPropFlagLR(paraLevel); @@ -3780,24 +3800,22 @@ public class Bidi { /** * Perform the Unicode Bidi algorithm. It is defined in the - * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, - * version 13, - * also described in The Unicode Standard, Version 4.0 .<p> + * <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9</a>. * - * This method takes a piece of plain text containing one or more paragraphs, + * <p>This method takes a piece of plain text containing one or more paragraphs, * with or without externally specified embedding levels from <i>styled</i> - * text and computes the left-right-directionality of each character.<p> + * text and computes the left-right-directionality of each character.</p> * - * If the entire text is all of the same directionality, then + * <p>If the entire text is all of the same directionality, then * the method may not perform all the steps described by the algorithm, * i.e., some levels may not be the same as if all steps were performed. * This is not relevant for unidirectional text.<br> * For example, in pure LTR text with numbers the numbers would get * a resolved level of 2 higher than the surrounding text according to * the algorithm. This implementation may set all resolved levels to - * the same value in such a case.<p> + * the same value in such a case.</p> * - * The text can be composed of multiple paragraphs. Occurrence of a block + * <p>The text can be composed of multiple paragraphs. Occurrence of a block * separator in the text terminates a paragraph, and whatever comes next starts * a new paragraph. The exception to this rule is when a Carriage Return (CR) * is followed by a Line Feed (LF). Both CR and LF are block separators, but @@ -3805,7 +3823,7 @@ public class Bidi { * preceding paragraph, and a new paragraph will be started by a character * coming after the LF. * - * Although the text is passed here as a <code>String</code>, it is + * <p>Although the text is passed here as a <code>String</code>, it is * stored internally as an array of characters. Therefore the * documentation will refer to indexes of the characters in the text. * @@ -3830,11 +3848,14 @@ public class Bidi { * A level overrides the directional property of its corresponding * (same index) character if the level has the * <code>LEVEL_OVERRIDE</code> bit set.<br><br> - * Except for that bit, it must be + * Aside from that bit, it must be * <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>, - * with one exception: a level of zero may be specified for a - * paragraph separator even if <code>paraLevel>0</code> when multiple - * paragraphs are submitted in the same call to <code>setPara()</code>.<br><br> + * except that level 0 is always allowed. + * Level 0 for a paragraph separator prevents reordering of paragraphs; + * this only works reliably if <code>LEVEL_OVERRIDE</code> + * is also set for paragraph separators. + * Level 0 for other characters is treated as a wildcard + * and is lifted up to the resolved level of the surrounding paragraph.<br><br> * <strong>Caution: </strong>A reference to this array, not a copy * of the levels, will be stored in the <code>Bidi</code> object; * the <code>embeddingLevels</code> @@ -3864,24 +3885,22 @@ public class Bidi { /** * Perform the Unicode Bidi algorithm. It is defined in the - * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, - * version 13, - * also described in The Unicode Standard, Version 4.0 .<p> + * <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9</a>. * - * This method takes a piece of plain text containing one or more paragraphs, + * <p>This method takes a piece of plain text containing one or more paragraphs, * with or without externally specified embedding levels from <i>styled</i> - * text and computes the left-right-directionality of each character.<p> + * text and computes the left-right-directionality of each character.</p> * - * If the entire text is all of the same directionality, then + * <p>If the entire text is all of the same directionality, then * the method may not perform all the steps described by the algorithm, * i.e., some levels may not be the same as if all steps were performed. * This is not relevant for unidirectional text.<br> * For example, in pure LTR text with numbers the numbers would get * a resolved level of 2 higher than the surrounding text according to * the algorithm. This implementation may set all resolved levels to - * the same value in such a case.<p> + * the same value in such a case.</p> * - * The text can be composed of multiple paragraphs. Occurrence of a block + * <p>The text can be composed of multiple paragraphs. Occurrence of a block * separator in the text terminates a paragraph, and whatever comes next starts * a new paragraph. The exception to this rule is when a Carriage Return (CR) * is followed by a Line Feed (LF). Both CR and LF are block separators, but @@ -3889,7 +3908,7 @@ public class Bidi { * preceding paragraph, and a new paragraph will be started by a character * coming after the LF. * - * The text is stored internally as an array of characters. Therefore the + * <p>The text is stored internally as an array of characters. Therefore the * documentation will refer to indexes of the characters in the text. * * @param chars contains the text that the Bidi algorithm will be performed @@ -3913,11 +3932,14 @@ public class Bidi { * A level overrides the directional property of its corresponding * (same index) character if the level has the * <code>LEVEL_OVERRIDE</code> bit set.<br><br> - * Except for that bit, it must be + * Aside from that bit, it must be * <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>, - * with one exception: a level of zero may be specified for a - * paragraph separator even if <code>paraLevel>0</code> when multiple - * paragraphs are submitted in the same call to <code>setPara()</code>.<br><br> + * except that level 0 is always allowed. + * Level 0 for a paragraph separator prevents reordering of paragraphs; + * this only works reliably if <code>LEVEL_OVERRIDE</code> + * is also set for paragraph separators. + * Level 0 for other characters is treated as a wildcard + * and is lifted up to the resolved level of the surrounding paragraph.<br><br> * <strong>Caution: </strong>A reference to this array, not a copy * of the levels, will be stored in the <code>Bidi</code> object; * the <code>embeddingLevels</code> @@ -5216,13 +5238,19 @@ public class Bidi { /** * Create Bidi from the given text, embedding, and direction information. - * The embeddings array may be null. If present, the values represent - * embedding level information. Negative values from -1 to -61 indicate - * overrides at the absolute value of the level. Positive values from 1 to - * 61 indicate embeddings. Where values are zero, the base embedding level - * as determined by the base direction is assumed.<p> * - * Note: this constructor calls setPara() internally. + * <p>The embeddings array may be null. If present, the values represent + * embedding level information. + * Negative values from -1 to -{@link #MAX_EXPLICIT_LEVEL} + * indicate overrides at the absolute value of the level. + * Positive values from 1 to {@link #MAX_EXPLICIT_LEVEL} indicate embeddings. + * Where values are zero, the base embedding level + * as determined by the base direction is assumed, + * except for paragraph separators which remain at 0 to prevent reordering of paragraphs.</p> + * + * <p>Note: This constructor calls setPara() internally, + * after converting the java.text.Bidi-style embeddings with negative overrides + * into ICU-style embeddings with bit fields for {@link #LEVEL_OVERRIDE} and the level. * * @param text an array containing the paragraph of text to process. * @param textStart the index into the text array of the start of the @@ -5275,22 +5303,23 @@ public class Bidi { if (embeddings == null) { paraEmbeddings = null; } else { + // Convert from java.text.Bidi embeddings to ICU setPara() levels: + // Copy to the start of a new array and convert java.text negative overrides + // to ICU bit-field-and-mask overrides. + // A copy of the embeddings is always required because + // setPara() may modify its embeddings. paraEmbeddings = new byte[paragraphLength]; byte lev; for (int i = 0; i < paragraphLength; i++) { lev = embeddings[i + embStart]; if (lev < 0) { lev = (byte)((- lev) | LEVEL_OVERRIDE); - } else if (lev == 0) { - lev = paraLvl; - if (paraLvl > MAX_EXPLICIT_LEVEL) { - lev &= 1; - } } + // setPara() lifts level 0 up to the resolved paragraph level. paraEmbeddings[i] = lev; } } - if (textStart == 0 && embStart == 0 && paragraphLength == text.length) { + if (textStart == 0 && paragraphLength == text.length) { setPara(text, paraLvl, paraEmbeddings); } else { char[] paraText = new char[paragraphLength]; diff --git a/android_icu4j/src/main/tests/android/icu/dev/test/bidi/TestBidi.java b/android_icu4j/src/main/tests/android/icu/dev/test/bidi/TestBidi.java index 88dee912a..dda7e7718 100644 --- a/android_icu4j/src/main/tests/android/icu/dev/test/bidi/TestBidi.java +++ b/android_icu4j/src/main/tests/android/icu/dev/test/bidi/TestBidi.java @@ -529,7 +529,7 @@ public class TestBidi extends BidiFmwk { bidi.setReorderingMode(Bidi.REORDER_RUNS_ONLY); bidi.setPara("a \u05d0 b \u05d1 c \u05d2 d ", Bidi.LTR, null); assertEquals("\nWrong number of runs #4", 14, bidi.countRuns()); - + /* test testGetBaseDirection to verify fast string direction detection function */ /* mixed start with L */ String mixedEnglishFirst = "\u0061\u0627\u0032\u06f3\u0061\u0034"; @@ -569,7 +569,7 @@ public class TestBidi extends BidiFmwk { assertEquals("\nWrong direction through fast detection #12", Bidi.NEUTRAL, Bidi.getBaseDirection(allArabicDigits)); /* null string */ String nullString = null; - assertEquals("\nWrong direction through fast detection #13", Bidi.NEUTRAL, Bidi.getBaseDirection(nullString)); + assertEquals("\nWrong direction through fast detection #13", Bidi.NEUTRAL, Bidi.getBaseDirection(nullString)); /* first L (English) others are R (Hebrew etc.) */ String startEnglishOthersHebrew = "\u0071\u0590\u05D5\u05EA\u05F1"; assertEquals("\nWrong direction through fast detection #14", Bidi.LTR, Bidi.getBaseDirection(startEnglishOthersHebrew)); @@ -577,4 +577,20 @@ public class TestBidi extends BidiFmwk { String lastHebrewOthersEnglishDigit = "\u0031\u0032\u0033\u05F1"; assertEquals("\nWrong direction through fast detection #15", Bidi.RTL, Bidi.getBaseDirection(lastHebrewOthersEnglishDigit)); } + + @Test + public void testExplicitLevel0() { + // The following used to fail with an error, see ICU ticket #12922. + String text = "\u202d\u05d0"; + byte[] embeddings = new byte[2]; // all 0 + int flags = Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT; // 0x7e + Bidi bidi = new Bidi(text.toCharArray(), 0, embeddings, 0, text.length(), flags); + assertEquals("resolved level at 0", 1, bidi.getLevelAt(0)); + assertEquals("resolved level at 1", 1, bidi.getLevelAt(1)); + + flags = java.text.Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT; // -2 + java.text.Bidi jb = new java.text.Bidi(text.toCharArray(), 0, embeddings, 0, text.length(), flags); + assertEquals("java.text resolved level at 0", 1, jb.getLevelAt(0)); + assertEquals("java.text resolved level at 1", 1, jb.getLevelAt(1)); + } } diff --git a/android_icu4j/src/main/tests/android/icu/dev/test/format/ListFormatterTest.java b/android_icu4j/src/main/tests/android/icu/dev/test/format/ListFormatterTest.java index 00ad5a108..7332a7f03 100644 --- a/android_icu4j/src/main/tests/android/icu/dev/test/format/ListFormatterTest.java +++ b/android_icu4j/src/main/tests/android/icu/dev/test/format/ListFormatterTest.java @@ -56,6 +56,40 @@ public class ListFormatterTest extends TestFmwk { } } + // Tests resource loading and inheritance when region sublocale + // has only partial data for the listPattern element (overriding + // some of the parent data). #12994 + String[] EnglishGBTestData = { + "", + "A", + "A and B", + "A, B and C", + "A, B, C and D", + "A, B, C, D and E" + }; + + @Test + public void TestEnglishGB() { + checkData(ListFormatter.getInstance(new ULocale("en_GB")), EnglishGBTestData); + } + + // Tests resource loading and inheritance when region sublocale + // has only partial data for the listPattern element (overriding + // some of the parent data). #12994 + String[] ChineseTradHKTestData = { + "", + "A", + "A\u53CAB", + "A\u3001B\u53CAC", + "A\u3001B\u3001C\u53CAD", + "A\u3001B\u3001C\u3001D\u53CAE" + }; + + @Test + public void TestChineseTradHK() { + checkData(ListFormatter.getInstance(new ULocale("zh_Hant_HK")), ChineseTradHKTestData); + } + String[] JapaneseTestData = { "", "A", diff --git a/icu4c/source/Android.bp b/icu4c/source/Android.bp index 437f48cd3..8a1c3cfd6 100644 --- a/icu4c/source/Android.bp +++ b/icu4c/source/Android.bp @@ -19,14 +19,8 @@ subdirs = [ cc_library_static { name: "libicuuc_stubdata", + vendor_available: true, host_supported: true, srcs: ["stubdata/stubdata.c"], local_include_dirs: ["common"], } - -cc_library_static { - name: "libicuuc_stubdata_ndk", - sdk_version: "9", - srcs: ["stubdata/stubdata.c"], - local_include_dirs: ["common"], -} diff --git a/icu4c/source/common/Android.bp b/icu4c/source/common/Android.bp index 6bacb5003..8ec554761 100644 --- a/icu4c/source/common/Android.bp +++ b/icu4c/source/common/Android.bp @@ -259,16 +259,3 @@ cc_library_shared { defaults: ["libicuuc_defaults"], static_libs: ["libicuuc_stubdata"], } - -// -// Build as a static library against the NDK -// -cc_library_static { - name: "libicuuc_static", - sdk_version: "9", - stl: "stlport_static", - defaults: ["libicuuc_defaults"], - static_libs: ["libicuuc_stubdata_ndk"], - - cflags: ["-Os"], // Using -Os over -O3 actually cuts down the final executable size by a few dozen kilobytes -} diff --git a/icu4c/source/common/listformatter.cpp b/icu4c/source/common/listformatter.cpp index 9225c22f1..263d5eb7b 100644 --- a/icu4c/source/common/listformatter.cpp +++ b/icu4c/source/common/listformatter.cpp @@ -25,6 +25,7 @@ #include "charstr.h" #include "ucln_cmn.h" #include "uresimp.h" +#include "resource.h" U_NAMESPACE_BEGIN @@ -78,17 +79,6 @@ uprv_deleteListFormatInternal(void *obj) { U_CDECL_END -static ListFormatInternal* loadListFormatInternal( - const Locale& locale, - const char* style, - UErrorCode& errorCode); - -static void getStringByKey( - const UResourceBundle* rb, - const char* key, - UnicodeString& result, - UErrorCode& errorCode); - ListFormatter::ListFormatter(const ListFormatter& other) : owned(other.owned), data(other.data) { if (other.owned != NULL) { @@ -171,30 +161,100 @@ const ListFormatInternal* ListFormatter::getListFormatInternal( return result; } -static ListFormatInternal* loadListFormatInternal( +static const UChar solidus = 0x2F; +static const UChar aliasPrefix[] = { 0x6C,0x69,0x73,0x74,0x50,0x61,0x74,0x74,0x65,0x72,0x6E,0x2F }; // "listPattern/" +enum { + kAliasPrefixLen = UPRV_LENGTHOF(aliasPrefix), + kStyleLenMax = 24 // longest currently is 14 +}; + +struct ListFormatter::ListPatternsSink : public ResourceSink { + UnicodeString two, start, middle, end; + char aliasedStyle[kStyleLenMax+1] = {0}; + + ListPatternsSink() {} + virtual ~ListPatternsSink(); + + void setAliasedStyle(UnicodeString alias) { + int32_t startIndex = alias.indexOf(aliasPrefix, kAliasPrefixLen, 0); + if (startIndex < 0) { + return; + } + startIndex += kAliasPrefixLen; + int32_t endIndex = alias.indexOf(solidus, startIndex); + if (endIndex < 0) { + endIndex = alias.length(); + } + alias.extract(startIndex, endIndex-startIndex, aliasedStyle, kStyleLenMax+1, US_INV); + aliasedStyle[kStyleLenMax] = 0; + } + + void handleValueForPattern(ResourceValue &value, UnicodeString &pattern, UErrorCode &errorCode) { + if (pattern.isEmpty()) { + if (value.getType() == URES_ALIAS) { + if (aliasedStyle[0] == 0) { + setAliasedStyle(value.getAliasUnicodeString(errorCode)); + } + } else { + pattern = value.getUnicodeString(errorCode); + } + } + } + + virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/, + UErrorCode &errorCode) { + aliasedStyle[0] = 0; + if (value.getType() == URES_ALIAS) { + setAliasedStyle(value.getAliasUnicodeString(errorCode)); + return; + } + ResourceTable listPatterns = value.getTable(errorCode); + for (int i = 0; U_SUCCESS(errorCode) && listPatterns.getKeyAndValue(i, key, value); ++i) { + if (uprv_strcmp(key, "2") == 0) { + handleValueForPattern(value, two, errorCode); + } else if (uprv_strcmp(key, "end") == 0) { + handleValueForPattern(value, end, errorCode); + } else if (uprv_strcmp(key, "middle") == 0) { + handleValueForPattern(value, middle, errorCode); + } else if (uprv_strcmp(key, "start") == 0) { + handleValueForPattern(value, start, errorCode); + } + } + } +}; + +// Virtual destructors must be defined out of line. +ListFormatter::ListPatternsSink::~ListPatternsSink() {} + +ListFormatInternal* ListFormatter::loadListFormatInternal( const Locale& locale, const char * style, UErrorCode& errorCode) { UResourceBundle* rb = ures_open(NULL, locale.getName(), &errorCode); - if (U_FAILURE(errorCode)) { - ures_close(rb); - return NULL; - } rb = ures_getByKeyWithFallback(rb, "listPattern", rb, &errorCode); - rb = ures_getByKeyWithFallback(rb, style, rb, &errorCode); - if (U_FAILURE(errorCode)) { ures_close(rb); return NULL; } - UnicodeString two, start, middle, end; - getStringByKey(rb, "2", two, errorCode); - getStringByKey(rb, "start", start, errorCode); - getStringByKey(rb, "middle", middle, errorCode); - getStringByKey(rb, "end", end, errorCode); + ListFormatter::ListPatternsSink sink; + char currentStyle[kStyleLenMax+1]; + uprv_strncpy(currentStyle, style, kStyleLenMax); + currentStyle[kStyleLenMax] = 0; + + for (;;) { + ures_getAllItemsWithFallback(rb, currentStyle, sink, errorCode); + if (U_FAILURE(errorCode) || sink.aliasedStyle[0] == 0 || uprv_strcmp(currentStyle, sink.aliasedStyle) == 0) { + break; + } + uprv_strcpy(currentStyle, sink.aliasedStyle); + } ures_close(rb); if (U_FAILURE(errorCode)) { return NULL; } - ListFormatInternal* result = new ListFormatInternal(two, start, middle, end, errorCode); + if (sink.two.isEmpty() || sink.start.isEmpty() || sink.middle.isEmpty() || sink.end.isEmpty()) { + errorCode = U_MISSING_RESOURCE_ERROR; + return NULL; + } + ListFormatInternal* result = new ListFormatInternal(sink.two, sink.start, sink.middle, sink.end, errorCode); if (result == NULL) { errorCode = U_MEMORY_ALLOCATION_ERROR; return NULL; @@ -206,15 +266,6 @@ static ListFormatInternal* loadListFormatInternal( return result; } -static void getStringByKey(const UResourceBundle* rb, const char* key, UnicodeString& result, UErrorCode& errorCode) { - int32_t len; - const UChar* ustr = ures_getStringByKeyWithFallback(rb, key, &len, &errorCode); - if (U_FAILURE(errorCode)) { - return; - } - result.setTo(ustr, len); -} - ListFormatter* ListFormatter::createInstance(UErrorCode& errorCode) { Locale locale; // The default locale. return createInstance(locale, errorCode); diff --git a/icu4c/source/common/unicode/listformatter.h b/icu4c/source/common/unicode/listformatter.h index f2c898881..93eb7f360 100644 --- a/icu4c/source/common/unicode/listformatter.h +++ b/icu4c/source/common/unicode/listformatter.h @@ -157,6 +157,8 @@ class U_COMMON_API ListFormatter : public UObject{ private: static void initializeHash(UErrorCode& errorCode); static const ListFormatInternal* getListFormatInternal(const Locale& locale, const char *style, UErrorCode& errorCode); + struct ListPatternsSink; + static ListFormatInternal* loadListFormatInternal(const Locale& locale, const char* style, UErrorCode& errorCode); ListFormatter(); diff --git a/icu4c/source/i18n/Android.bp b/icu4c/source/i18n/Android.bp index 253bad864..ef84d970d 100644 --- a/icu4c/source/i18n/Android.bp +++ b/icu4c/source/i18n/Android.bp @@ -251,16 +251,3 @@ cc_library_shared { "-lpthread", ], } - -// -// Build as a static library against the NDK -// -cc_library_static { - name: "libicui18n_static", - defaults: ["libicui18n_defaults"], - sdk_version: "9", - stl: "stlport_static", - static_libs: ["libicuuc_static"], - - cflags: ["-Os"], // Using -Os over -O3 actually cuts down the final executable size by a few dozen kilobytes -} diff --git a/icu4c/source/test/intltest/listformattertest.cpp b/icu4c/source/test/intltest/listformattertest.cpp index b7aaea216..ad0afa54b 100644 --- a/icu4c/source/test/intltest/listformattertest.cpp +++ b/icu4c/source/test/intltest/listformattertest.cpp @@ -147,6 +147,48 @@ void ListFormatterTest::TestEnglishUS() { CheckFourCases("en_US", one, two, three, four, results); } +// Tests resource loading and inheritance when region sublocale +// has only partial data for the listPattern element (overriding +// some of the parent data). #12994 +void ListFormatterTest::TestEnglishGB() { + UnicodeString results[4] = { + one, + one + " and " + two, + one + ", " + two + " and " + three, + one + ", " + two + ", " + three + " and " + four + }; + + CheckFourCases("en_GB", one, two, three, four, results); +} + +// Tests resource loading and inheritance when region sublocale +// has only partial data for the listPattern element (overriding +// some of the parent data). #12994 +void ListFormatterTest::TestNynorsk() { + UnicodeString results[4] = { + one, + one + " og " + two, + one + ", " + two + " og " + three, + one + ", " + two + ", " + three + " og " + four + }; + + CheckFourCases("nn", one, two, three, four, results); +} + +// Tests resource loading and inheritance when region sublocale +// has only partial data for the listPattern element (overriding +// some of the parent data). #12994 +void ListFormatterTest::TestChineseTradHK() { + UnicodeString results[4] = { + one, + one + "\u53CA" + two, + one + "\u3001" + two + "\u53CA" + three, + one + "\u3001" + two + "\u3001" + three + "\u53CA" + four + }; + + CheckFourCases("zh_Hant_HK", one, two, three, four, results); +} + // Formatting in Russian. // "\\u0438" is used before the last element, and all elements up to (but not including) the penultimate are followed by a comma. void ListFormatterTest::TestRussian() { @@ -229,6 +271,9 @@ void ListFormatterTest::runIndexedTest(int32_t index, UBool exec, case 6: name = "TestZulu"; if (exec) TestZulu(); break; case 7: name = "TestOutOfOrderPatterns"; if (exec) TestOutOfOrderPatterns(); break; case 8: name = "Test9946"; if (exec) Test9946(); break; + case 9: name = "TestEnglishGB"; if (exec) TestEnglishGB(); break; + case 10: name = "TestNynorsk"; if (exec) TestNynorsk(); break; + case 11: name = "TestChineseTradHK"; if (exec) TestChineseTradHK(); break; default: name = ""; break; } diff --git a/icu4c/source/test/intltest/listformattertest.h b/icu4c/source/test/intltest/listformattertest.h index 1281306c1..70686d703 100644 --- a/icu4c/source/test/intltest/listformattertest.h +++ b/icu4c/source/test/intltest/listformattertest.h @@ -33,6 +33,9 @@ class ListFormatterTest : public IntlTest { void TestBogus(); void TestEnglish(); void TestEnglishUS(); + void TestEnglishGB(); + void TestNynorsk(); + void TestChineseTradHK(); void TestRussian(); void TestMalayalam(); void TestZulu(); diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 510146b01..801e5902b 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -1645,6 +1645,8 @@ void MeasureFormatTest::TestManyLocaleDurations() { helperTestManyLocaleDurations("de", UMEASFMT_WIDTH_NUMERIC, measures, UPRV_LENGTHOF(measures), "5:37"); helperTestManyLocaleDurations("en", UMEASFMT_WIDTH_NARROW, measures, UPRV_LENGTHOF(measures), "5h 37m"); helperTestManyLocaleDurations("en", UMEASFMT_WIDTH_NUMERIC, measures, UPRV_LENGTHOF(measures), "5:37"); + helperTestManyLocaleDurations("en_GB", UMEASFMT_WIDTH_NARROW, measures, UPRV_LENGTHOF(measures), "5h 37m"); + helperTestManyLocaleDurations("en_GB", UMEASFMT_WIDTH_NUMERIC, measures, UPRV_LENGTHOF(measures), "5:37"); helperTestManyLocaleDurations("es", UMEASFMT_WIDTH_NARROW, measures, UPRV_LENGTHOF(measures), "5h 37min"); helperTestManyLocaleDurations("es", UMEASFMT_WIDTH_NUMERIC, measures, UPRV_LENGTHOF(measures), "5:37"); helperTestManyLocaleDurations("fi", UMEASFMT_WIDTH_NARROW, measures, UPRV_LENGTHOF(measures), "5t 37min"); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/Bidi.java b/icu4j/main/classes/core/src/com/ibm/icu/text/Bidi.java index 62a276dc7..2fb22188a 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/Bidi.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/Bidi.java @@ -2674,28 +2674,29 @@ public class Bidi { return dirct; } - /* + /** * Use a pre-specified embedding levels array: * - * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), + * <p>Adjust the directional properties for overrides (->LEVEL_OVERRIDE), * ignore all explicit codes (X9), * and check all the preset levels. * - * Recalculate the flags to have them reflect the real properties + * <p>Recalculate the flags to have them reflect the real properties * after taking the explicit embeddings into account. */ private byte checkExplicitLevels() { - byte dirProp; - int i; int isolateCount = 0; this.flags = 0; /* collect all directionalities in the text */ - byte level; this.isolateCount = 0; - for (i = 0; i < length; ++i) { - level = levels[i]; - dirProp = dirProps[i]; + int currentParaIndex = 0; + int currentParaLimit = paras_limit[0]; + byte currentParaLevel = paraLevel; + + for (int i = 0; i < length; ++i) { + byte level = levels[i]; + byte dirProp = dirProps[i]; if (dirProp == LRI || dirProp == RLI) { isolateCount++; if (isolateCount > this.isolateCount) @@ -2705,21 +2706,40 @@ public class Bidi { isolateCount--; else if (dirProp == B) isolateCount = 0; - if ((level & LEVEL_OVERRIDE) != 0) { + + // optimized version of byte currentParaLevel = GetParaLevelAt(i); + if (defaultParaLevel != 0 && + i == currentParaLimit && (currentParaIndex + 1) < paraCount) { + currentParaLevel = paras_level[++currentParaIndex]; + currentParaLimit = paras_limit[currentParaIndex]; + } + + int overrideFlag = level & LEVEL_OVERRIDE; + level &= ~LEVEL_OVERRIDE; + if (level < currentParaLevel || MAX_EXPLICIT_LEVEL < level) { + if (level == 0) { + if (dirProp == B) { + // Paragraph separators are ok with explicit level 0. + // Prevents reordering of paragraphs. + } else { + // Treat explicit level 0 as a wildcard for the paragraph level. + // Avoid making the caller guess what the paragraph level would be. + level = currentParaLevel; + levels[i] = (byte)(level | overrideFlag); + } + } else { + // 1 <= level < currentParaLevel or MAX_EXPLICIT_LEVEL < level + throw new IllegalArgumentException("level " + level + + " out of bounds at " + i); + } + } + if (overrideFlag != 0) { /* keep the override flag in levels[i] but adjust the flags */ - level &= ~LEVEL_OVERRIDE; /* make the range check below simpler */ flags |= DirPropFlagO(level); } else { /* set the flags */ flags |= DirPropFlagE(level) | DirPropFlag(dirProp); } - if ((level < GetParaLevelAt(i) && - !((0 == level) && (dirProp == B))) || - (MAX_EXPLICIT_LEVEL < level)) { - /* level out of bounds */ - throw new IllegalArgumentException("level " + level + - " out of bounds at " + i); - } } if ((flags & MASK_EMBEDDING) != 0) flags |= DirPropFlagLR(paraLevel); @@ -3816,24 +3836,22 @@ public class Bidi { /** * Perform the Unicode Bidi algorithm. It is defined in the - * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, - * version 13, - * also described in The Unicode Standard, Version 4.0 .<p> + * <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9</a>. * - * This method takes a piece of plain text containing one or more paragraphs, + * <p>This method takes a piece of plain text containing one or more paragraphs, * with or without externally specified embedding levels from <i>styled</i> - * text and computes the left-right-directionality of each character.<p> + * text and computes the left-right-directionality of each character.</p> * - * If the entire text is all of the same directionality, then + * <p>If the entire text is all of the same directionality, then * the method may not perform all the steps described by the algorithm, * i.e., some levels may not be the same as if all steps were performed. * This is not relevant for unidirectional text.<br> * For example, in pure LTR text with numbers the numbers would get * a resolved level of 2 higher than the surrounding text according to * the algorithm. This implementation may set all resolved levels to - * the same value in such a case.<p> + * the same value in such a case.</p> * - * The text can be composed of multiple paragraphs. Occurrence of a block + * <p>The text can be composed of multiple paragraphs. Occurrence of a block * separator in the text terminates a paragraph, and whatever comes next starts * a new paragraph. The exception to this rule is when a Carriage Return (CR) * is followed by a Line Feed (LF). Both CR and LF are block separators, but @@ -3841,7 +3859,7 @@ public class Bidi { * preceding paragraph, and a new paragraph will be started by a character * coming after the LF. * - * Although the text is passed here as a <code>String</code>, it is + * <p>Although the text is passed here as a <code>String</code>, it is * stored internally as an array of characters. Therefore the * documentation will refer to indexes of the characters in the text. * @@ -3866,11 +3884,14 @@ public class Bidi { * A level overrides the directional property of its corresponding * (same index) character if the level has the * <code>LEVEL_OVERRIDE</code> bit set.<br><br> - * Except for that bit, it must be + * Aside from that bit, it must be * <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>, - * with one exception: a level of zero may be specified for a - * paragraph separator even if <code>paraLevel>0</code> when multiple - * paragraphs are submitted in the same call to <code>setPara()</code>.<br><br> + * except that level 0 is always allowed. + * Level 0 for a paragraph separator prevents reordering of paragraphs; + * this only works reliably if <code>LEVEL_OVERRIDE</code> + * is also set for paragraph separators. + * Level 0 for other characters is treated as a wildcard + * and is lifted up to the resolved level of the surrounding paragraph.<br><br> * <strong>Caution: </strong>A reference to this array, not a copy * of the levels, will be stored in the <code>Bidi</code> object; * the <code>embeddingLevels</code> @@ -3901,24 +3922,22 @@ public class Bidi { /** * Perform the Unicode Bidi algorithm. It is defined in the - * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, - * version 13, - * also described in The Unicode Standard, Version 4.0 .<p> + * <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9</a>. * - * This method takes a piece of plain text containing one or more paragraphs, + * <p>This method takes a piece of plain text containing one or more paragraphs, * with or without externally specified embedding levels from <i>styled</i> - * text and computes the left-right-directionality of each character.<p> + * text and computes the left-right-directionality of each character.</p> * - * If the entire text is all of the same directionality, then + * <p>If the entire text is all of the same directionality, then * the method may not perform all the steps described by the algorithm, * i.e., some levels may not be the same as if all steps were performed. * This is not relevant for unidirectional text.<br> * For example, in pure LTR text with numbers the numbers would get * a resolved level of 2 higher than the surrounding text according to * the algorithm. This implementation may set all resolved levels to - * the same value in such a case.<p> + * the same value in such a case.</p> * - * The text can be composed of multiple paragraphs. Occurrence of a block + * <p>The text can be composed of multiple paragraphs. Occurrence of a block * separator in the text terminates a paragraph, and whatever comes next starts * a new paragraph. The exception to this rule is when a Carriage Return (CR) * is followed by a Line Feed (LF). Both CR and LF are block separators, but @@ -3926,7 +3945,7 @@ public class Bidi { * preceding paragraph, and a new paragraph will be started by a character * coming after the LF. * - * The text is stored internally as an array of characters. Therefore the + * <p>The text is stored internally as an array of characters. Therefore the * documentation will refer to indexes of the characters in the text. * * @param chars contains the text that the Bidi algorithm will be performed @@ -3950,11 +3969,14 @@ public class Bidi { * A level overrides the directional property of its corresponding * (same index) character if the level has the * <code>LEVEL_OVERRIDE</code> bit set.<br><br> - * Except for that bit, it must be + * Aside from that bit, it must be * <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>, - * with one exception: a level of zero may be specified for a - * paragraph separator even if <code>paraLevel>0</code> when multiple - * paragraphs are submitted in the same call to <code>setPara()</code>.<br><br> + * except that level 0 is always allowed. + * Level 0 for a paragraph separator prevents reordering of paragraphs; + * this only works reliably if <code>LEVEL_OVERRIDE</code> + * is also set for paragraph separators. + * Level 0 for other characters is treated as a wildcard + * and is lifted up to the resolved level of the surrounding paragraph.<br><br> * <strong>Caution: </strong>A reference to this array, not a copy * of the levels, will be stored in the <code>Bidi</code> object; * the <code>embeddingLevels</code> @@ -5294,13 +5316,19 @@ public class Bidi { /** * Create Bidi from the given text, embedding, and direction information. - * The embeddings array may be null. If present, the values represent - * embedding level information. Negative values from -1 to -61 indicate - * overrides at the absolute value of the level. Positive values from 1 to - * 61 indicate embeddings. Where values are zero, the base embedding level - * as determined by the base direction is assumed.<p> * - * Note: this constructor calls setPara() internally. + * <p>The embeddings array may be null. If present, the values represent + * embedding level information. + * Negative values from -1 to -{@link #MAX_EXPLICIT_LEVEL} + * indicate overrides at the absolute value of the level. + * Positive values from 1 to {@link #MAX_EXPLICIT_LEVEL} indicate embeddings. + * Where values are zero, the base embedding level + * as determined by the base direction is assumed, + * except for paragraph separators which remain at 0 to prevent reordering of paragraphs.</p> + * + * <p>Note: This constructor calls setPara() internally, + * after converting the java.text.Bidi-style embeddings with negative overrides + * into ICU-style embeddings with bit fields for {@link #LEVEL_OVERRIDE} and the level. * * @param text an array containing the paragraph of text to process. * @param textStart the index into the text array of the start of the @@ -5354,22 +5382,23 @@ public class Bidi { if (embeddings == null) { paraEmbeddings = null; } else { + // Convert from java.text.Bidi embeddings to ICU setPara() levels: + // Copy to the start of a new array and convert java.text negative overrides + // to ICU bit-field-and-mask overrides. + // A copy of the embeddings is always required because + // setPara() may modify its embeddings. paraEmbeddings = new byte[paragraphLength]; byte lev; for (int i = 0; i < paragraphLength; i++) { lev = embeddings[i + embStart]; if (lev < 0) { lev = (byte)((- lev) | LEVEL_OVERRIDE); - } else if (lev == 0) { - lev = paraLvl; - if (paraLvl > MAX_EXPLICIT_LEVEL) { - lev &= 1; - } } + // setPara() lifts level 0 up to the resolved paragraph level. paraEmbeddings[i] = lev; } } - if (textStart == 0 && embStart == 0 && paragraphLength == text.length) { + if (textStart == 0 && paragraphLength == text.length) { setPara(text, paraLvl, paraEmbeddings); } else { char[] paraText = new char[paragraphLength]; diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/bidi/TestBidi.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/bidi/TestBidi.java index 9fe50a65a..af155ed8d 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/bidi/TestBidi.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/bidi/TestBidi.java @@ -526,7 +526,7 @@ public class TestBidi extends BidiFmwk { bidi.setReorderingMode(Bidi.REORDER_RUNS_ONLY); bidi.setPara("a \u05d0 b \u05d1 c \u05d2 d ", Bidi.LTR, null); assertEquals("\nWrong number of runs #4", 14, bidi.countRuns()); - + /* test testGetBaseDirection to verify fast string direction detection function */ /* mixed start with L */ String mixedEnglishFirst = "\u0061\u0627\u0032\u06f3\u0061\u0034"; @@ -566,7 +566,7 @@ public class TestBidi extends BidiFmwk { assertEquals("\nWrong direction through fast detection #12", Bidi.NEUTRAL, Bidi.getBaseDirection(allArabicDigits)); /* null string */ String nullString = null; - assertEquals("\nWrong direction through fast detection #13", Bidi.NEUTRAL, Bidi.getBaseDirection(nullString)); + assertEquals("\nWrong direction through fast detection #13", Bidi.NEUTRAL, Bidi.getBaseDirection(nullString)); /* first L (English) others are R (Hebrew etc.) */ String startEnglishOthersHebrew = "\u0071\u0590\u05D5\u05EA\u05F1"; assertEquals("\nWrong direction through fast detection #14", Bidi.LTR, Bidi.getBaseDirection(startEnglishOthersHebrew)); @@ -574,4 +574,20 @@ public class TestBidi extends BidiFmwk { String lastHebrewOthersEnglishDigit = "\u0031\u0032\u0033\u05F1"; assertEquals("\nWrong direction through fast detection #15", Bidi.RTL, Bidi.getBaseDirection(lastHebrewOthersEnglishDigit)); } + + @Test + public void testExplicitLevel0() { + // The following used to fail with an error, see ICU ticket #12922. + String text = "\u202d\u05d0"; + byte[] embeddings = new byte[2]; // all 0 + int flags = Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT; // 0x7e + Bidi bidi = new Bidi(text.toCharArray(), 0, embeddings, 0, text.length(), flags); + assertEquals("resolved level at 0", 1, bidi.getLevelAt(0)); + assertEquals("resolved level at 1", 1, bidi.getLevelAt(1)); + + flags = java.text.Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT; // -2 + java.text.Bidi jb = new java.text.Bidi(text.toCharArray(), 0, embeddings, 0, text.length(), flags); + assertEquals("java.text resolved level at 0", 1, jb.getLevelAt(0)); + assertEquals("java.text resolved level at 1", 1, jb.getLevelAt(1)); + } } diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ListFormatterTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ListFormatterTest.java index 2a42f7c6b..9f52775b3 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ListFormatterTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ListFormatterTest.java @@ -53,6 +53,40 @@ public class ListFormatterTest extends TestFmwk { } } + // Tests resource loading and inheritance when region sublocale + // has only partial data for the listPattern element (overriding + // some of the parent data). #12994 + String[] EnglishGBTestData = { + "", + "A", + "A and B", + "A, B and C", + "A, B, C and D", + "A, B, C, D and E" + }; + + @Test + public void TestEnglishGB() { + checkData(ListFormatter.getInstance(new ULocale("en_GB")), EnglishGBTestData); + } + + // Tests resource loading and inheritance when region sublocale + // has only partial data for the listPattern element (overriding + // some of the parent data). #12994 + String[] ChineseTradHKTestData = { + "", + "A", + "A\u53CAB", + "A\u3001B\u53CAC", + "A\u3001B\u3001C\u53CAD", + "A\u3001B\u3001C\u3001D\u53CAE" + }; + + @Test + public void TestChineseTradHK() { + checkData(ListFormatter.getInstance(new ULocale("zh_Hant_HK")), ChineseTradHKTestData); + } + String[] JapaneseTestData = { "", "A", |