diff options
Diffstat (limited to 'icu4j/main/classes/core/src/com/ibm/icu/text/Bidi.java')
-rw-r--r-- | icu4j/main/classes/core/src/com/ibm/icu/text/Bidi.java | 141 |
1 files changed, 85 insertions, 56 deletions
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/Bidi.java b/icu4j/main/classes/core/src/com/ibm/icu/text/Bidi.java index 62a276dc7..2fb22188a 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/Bidi.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/Bidi.java @@ -2674,28 +2674,29 @@ public class Bidi { return dirct; } - /* + /** * Use a pre-specified embedding levels array: * - * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), + * <p>Adjust the directional properties for overrides (->LEVEL_OVERRIDE), * ignore all explicit codes (X9), * and check all the preset levels. * - * Recalculate the flags to have them reflect the real properties + * <p>Recalculate the flags to have them reflect the real properties * after taking the explicit embeddings into account. */ private byte checkExplicitLevels() { - byte dirProp; - int i; int isolateCount = 0; this.flags = 0; /* collect all directionalities in the text */ - byte level; this.isolateCount = 0; - for (i = 0; i < length; ++i) { - level = levels[i]; - dirProp = dirProps[i]; + int currentParaIndex = 0; + int currentParaLimit = paras_limit[0]; + byte currentParaLevel = paraLevel; + + for (int i = 0; i < length; ++i) { + byte level = levels[i]; + byte dirProp = dirProps[i]; if (dirProp == LRI || dirProp == RLI) { isolateCount++; if (isolateCount > this.isolateCount) @@ -2705,21 +2706,40 @@ public class Bidi { isolateCount--; else if (dirProp == B) isolateCount = 0; - if ((level & LEVEL_OVERRIDE) != 0) { + + // optimized version of byte currentParaLevel = GetParaLevelAt(i); + if (defaultParaLevel != 0 && + i == currentParaLimit && (currentParaIndex + 1) < paraCount) { + currentParaLevel = paras_level[++currentParaIndex]; + currentParaLimit = paras_limit[currentParaIndex]; + } + + int overrideFlag = level & LEVEL_OVERRIDE; + level &= ~LEVEL_OVERRIDE; + if (level < currentParaLevel || MAX_EXPLICIT_LEVEL < level) { + if (level == 0) { + if (dirProp == B) { + // Paragraph separators are ok with explicit level 0. + // Prevents reordering of paragraphs. + } else { + // Treat explicit level 0 as a wildcard for the paragraph level. + // Avoid making the caller guess what the paragraph level would be. + level = currentParaLevel; + levels[i] = (byte)(level | overrideFlag); + } + } else { + // 1 <= level < currentParaLevel or MAX_EXPLICIT_LEVEL < level + throw new IllegalArgumentException("level " + level + + " out of bounds at " + i); + } + } + if (overrideFlag != 0) { /* keep the override flag in levels[i] but adjust the flags */ - level &= ~LEVEL_OVERRIDE; /* make the range check below simpler */ flags |= DirPropFlagO(level); } else { /* set the flags */ flags |= DirPropFlagE(level) | DirPropFlag(dirProp); } - if ((level < GetParaLevelAt(i) && - !((0 == level) && (dirProp == B))) || - (MAX_EXPLICIT_LEVEL < level)) { - /* level out of bounds */ - throw new IllegalArgumentException("level " + level + - " out of bounds at " + i); - } } if ((flags & MASK_EMBEDDING) != 0) flags |= DirPropFlagLR(paraLevel); @@ -3816,24 +3836,22 @@ public class Bidi { /** * Perform the Unicode Bidi algorithm. It is defined in the - * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, - * version 13, - * also described in The Unicode Standard, Version 4.0 .<p> + * <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9</a>. * - * This method takes a piece of plain text containing one or more paragraphs, + * <p>This method takes a piece of plain text containing one or more paragraphs, * with or without externally specified embedding levels from <i>styled</i> - * text and computes the left-right-directionality of each character.<p> + * text and computes the left-right-directionality of each character.</p> * - * If the entire text is all of the same directionality, then + * <p>If the entire text is all of the same directionality, then * the method may not perform all the steps described by the algorithm, * i.e., some levels may not be the same as if all steps were performed. * This is not relevant for unidirectional text.<br> * For example, in pure LTR text with numbers the numbers would get * a resolved level of 2 higher than the surrounding text according to * the algorithm. This implementation may set all resolved levels to - * the same value in such a case.<p> + * the same value in such a case.</p> * - * The text can be composed of multiple paragraphs. Occurrence of a block + * <p>The text can be composed of multiple paragraphs. Occurrence of a block * separator in the text terminates a paragraph, and whatever comes next starts * a new paragraph. The exception to this rule is when a Carriage Return (CR) * is followed by a Line Feed (LF). Both CR and LF are block separators, but @@ -3841,7 +3859,7 @@ public class Bidi { * preceding paragraph, and a new paragraph will be started by a character * coming after the LF. * - * Although the text is passed here as a <code>String</code>, it is + * <p>Although the text is passed here as a <code>String</code>, it is * stored internally as an array of characters. Therefore the * documentation will refer to indexes of the characters in the text. * @@ -3866,11 +3884,14 @@ public class Bidi { * A level overrides the directional property of its corresponding * (same index) character if the level has the * <code>LEVEL_OVERRIDE</code> bit set.<br><br> - * Except for that bit, it must be + * Aside from that bit, it must be * <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>, - * with one exception: a level of zero may be specified for a - * paragraph separator even if <code>paraLevel>0</code> when multiple - * paragraphs are submitted in the same call to <code>setPara()</code>.<br><br> + * except that level 0 is always allowed. + * Level 0 for a paragraph separator prevents reordering of paragraphs; + * this only works reliably if <code>LEVEL_OVERRIDE</code> + * is also set for paragraph separators. + * Level 0 for other characters is treated as a wildcard + * and is lifted up to the resolved level of the surrounding paragraph.<br><br> * <strong>Caution: </strong>A reference to this array, not a copy * of the levels, will be stored in the <code>Bidi</code> object; * the <code>embeddingLevels</code> @@ -3901,24 +3922,22 @@ public class Bidi { /** * Perform the Unicode Bidi algorithm. It is defined in the - * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, - * version 13, - * also described in The Unicode Standard, Version 4.0 .<p> + * <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9</a>. * - * This method takes a piece of plain text containing one or more paragraphs, + * <p>This method takes a piece of plain text containing one or more paragraphs, * with or without externally specified embedding levels from <i>styled</i> - * text and computes the left-right-directionality of each character.<p> + * text and computes the left-right-directionality of each character.</p> * - * If the entire text is all of the same directionality, then + * <p>If the entire text is all of the same directionality, then * the method may not perform all the steps described by the algorithm, * i.e., some levels may not be the same as if all steps were performed. * This is not relevant for unidirectional text.<br> * For example, in pure LTR text with numbers the numbers would get * a resolved level of 2 higher than the surrounding text according to * the algorithm. This implementation may set all resolved levels to - * the same value in such a case.<p> + * the same value in such a case.</p> * - * The text can be composed of multiple paragraphs. Occurrence of a block + * <p>The text can be composed of multiple paragraphs. Occurrence of a block * separator in the text terminates a paragraph, and whatever comes next starts * a new paragraph. The exception to this rule is when a Carriage Return (CR) * is followed by a Line Feed (LF). Both CR and LF are block separators, but @@ -3926,7 +3945,7 @@ public class Bidi { * preceding paragraph, and a new paragraph will be started by a character * coming after the LF. * - * The text is stored internally as an array of characters. Therefore the + * <p>The text is stored internally as an array of characters. Therefore the * documentation will refer to indexes of the characters in the text. * * @param chars contains the text that the Bidi algorithm will be performed @@ -3950,11 +3969,14 @@ public class Bidi { * A level overrides the directional property of its corresponding * (same index) character if the level has the * <code>LEVEL_OVERRIDE</code> bit set.<br><br> - * Except for that bit, it must be + * Aside from that bit, it must be * <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>, - * with one exception: a level of zero may be specified for a - * paragraph separator even if <code>paraLevel>0</code> when multiple - * paragraphs are submitted in the same call to <code>setPara()</code>.<br><br> + * except that level 0 is always allowed. + * Level 0 for a paragraph separator prevents reordering of paragraphs; + * this only works reliably if <code>LEVEL_OVERRIDE</code> + * is also set for paragraph separators. + * Level 0 for other characters is treated as a wildcard + * and is lifted up to the resolved level of the surrounding paragraph.<br><br> * <strong>Caution: </strong>A reference to this array, not a copy * of the levels, will be stored in the <code>Bidi</code> object; * the <code>embeddingLevels</code> @@ -5294,13 +5316,19 @@ public class Bidi { /** * Create Bidi from the given text, embedding, and direction information. - * The embeddings array may be null. If present, the values represent - * embedding level information. Negative values from -1 to -61 indicate - * overrides at the absolute value of the level. Positive values from 1 to - * 61 indicate embeddings. Where values are zero, the base embedding level - * as determined by the base direction is assumed.<p> * - * Note: this constructor calls setPara() internally. + * <p>The embeddings array may be null. If present, the values represent + * embedding level information. + * Negative values from -1 to -{@link #MAX_EXPLICIT_LEVEL} + * indicate overrides at the absolute value of the level. + * Positive values from 1 to {@link #MAX_EXPLICIT_LEVEL} indicate embeddings. + * Where values are zero, the base embedding level + * as determined by the base direction is assumed, + * except for paragraph separators which remain at 0 to prevent reordering of paragraphs.</p> + * + * <p>Note: This constructor calls setPara() internally, + * after converting the java.text.Bidi-style embeddings with negative overrides + * into ICU-style embeddings with bit fields for {@link #LEVEL_OVERRIDE} and the level. * * @param text an array containing the paragraph of text to process. * @param textStart the index into the text array of the start of the @@ -5354,22 +5382,23 @@ public class Bidi { if (embeddings == null) { paraEmbeddings = null; } else { + // Convert from java.text.Bidi embeddings to ICU setPara() levels: + // Copy to the start of a new array and convert java.text negative overrides + // to ICU bit-field-and-mask overrides. + // A copy of the embeddings is always required because + // setPara() may modify its embeddings. paraEmbeddings = new byte[paragraphLength]; byte lev; for (int i = 0; i < paragraphLength; i++) { lev = embeddings[i + embStart]; if (lev < 0) { lev = (byte)((- lev) | LEVEL_OVERRIDE); - } else if (lev == 0) { - lev = paraLvl; - if (paraLvl > MAX_EXPLICIT_LEVEL) { - lev &= 1; - } } + // setPara() lifts level 0 up to the resolved paragraph level. paraEmbeddings[i] = lev; } } - if (textStart == 0 && embStart == 0 && paragraphLength == text.length) { + if (textStart == 0 && paragraphLength == text.length) { setPara(text, paraLvl, paraEmbeddings); } else { char[] paraText = new char[paragraphLength]; |