diff options
Diffstat (limited to 'src/com/android/launcher3/locale/HanziToPinyin.java')
-rw-r--r-- | src/com/android/launcher3/locale/HanziToPinyin.java | 186 |
1 files changed, 186 insertions, 0 deletions
diff --git a/src/com/android/launcher3/locale/HanziToPinyin.java b/src/com/android/launcher3/locale/HanziToPinyin.java new file mode 100644 index 000000000..9e398fac0 --- /dev/null +++ b/src/com/android/launcher3/locale/HanziToPinyin.java @@ -0,0 +1,186 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.launcher3.locale; + +import android.text.TextUtils; +import android.util.Log; + +import java.util.ArrayList; + +import libcore.icu.Transliterator; + +/** + * An object to convert Chinese character to its corresponding pinyin string. + * For characters with multiple possible pinyin string, only one is selected + * according to ICU Transliterator class. Polyphone is not supported in this + * implementation. + */ +public class HanziToPinyin { + private static final String TAG = "HanziToPinyin"; + + private static HanziToPinyin sInstance; + private Transliterator mPinyinTransliterator; + private Transliterator mAsciiTransliterator; + + public static class Token { + /** + * Separator between target string for each source char + */ + public static final String SEPARATOR = " "; + + public static final int LATIN = 1; + public static final int PINYIN = 2; + public static final int UNKNOWN = 3; + + public Token() { + } + + public Token(int type, String source, String target) { + this.type = type; + this.source = source; + this.target = target; + } + + /** + * Type of this token, ASCII, PINYIN or UNKNOWN. + */ + public int type; + /** + * Original string before translation. + */ + public String source; + /** + * Translated string of source. For Han, target is corresponding Pinyin. Otherwise target is + * original string in source. + */ + public String target; + } + + private HanziToPinyin() { + try { + mPinyinTransliterator = new Transliterator("Han-Latin/Names; Latin-Ascii; Any-Upper"); + mAsciiTransliterator = new Transliterator("Latin-Ascii"); + } catch (RuntimeException e) { + Log.w(TAG, "Han-Latin/Names transliterator data is missing," + + " HanziToPinyin is disabled"); + } + } + + public boolean hasChineseTransliterator() { + return mPinyinTransliterator != null; + } + + public static HanziToPinyin getInstance() { + synchronized (HanziToPinyin.class) { + if (sInstance == null) { + sInstance = new HanziToPinyin(); + } + return sInstance; + } + } + + private void tokenize(char character, Token token) { + token.source = Character.toString(character); + + // ASCII + if (character < 128) { + token.type = Token.LATIN; + token.target = token.source; + return; + } + + // Extended Latin. Transcode these to ASCII equivalents + if (character < 0x250 || (0x1e00 <= character && character < 0x1eff)) { + token.type = Token.LATIN; + token.target = mAsciiTransliterator == null ? token.source : + mAsciiTransliterator.transliterate(token.source); + return; + } + + token.type = Token.PINYIN; + token.target = mPinyinTransliterator.transliterate(token.source); + if (TextUtils.isEmpty(token.target) || + TextUtils.equals(token.source, token.target)) { + token.type = Token.UNKNOWN; + token.target = token.source; + } + } + + public String transliterate(final String input) { + if (!hasChineseTransliterator() || TextUtils.isEmpty(input)) { + return null; + } + return mPinyinTransliterator.transliterate(input); + } + + /** + * Convert the input to a array of tokens. The sequence of ASCII or Unknown characters without + * space will be put into a Token, One Hanzi character which has pinyin will be treated as a + * Token. If there is no Chinese transliterator, the empty token array is returned. + */ + public ArrayList<Token> getTokens(final String input) { + ArrayList<Token> tokens = new ArrayList<Token>(); + if (!hasChineseTransliterator() || TextUtils.isEmpty(input)) { + // return empty tokens. + return tokens; + } + + final int inputLength = input.length(); + final StringBuilder sb = new StringBuilder(); + int tokenType = Token.LATIN; + Token token = new Token(); + + // Go through the input, create a new token when + // a. Token type changed + // b. Get the Pinyin of current charater. + // c. current character is space. + for (int i = 0; i < inputLength; i++) { + final char character = input.charAt(i); + if (Character.isSpaceChar(character)) { + if (sb.length() > 0) { + addToken(sb, tokens, tokenType); + } + } else { + tokenize(character, token); + if (token.type == Token.PINYIN) { + if (sb.length() > 0) { + addToken(sb, tokens, tokenType); + } + tokens.add(token); + token = new Token(); + } else { + if (tokenType != token.type && sb.length() > 0) { + addToken(sb, tokens, tokenType); + } + sb.append(token.target); + } + tokenType = token.type; + } + } + if (sb.length() > 0) { + addToken(sb, tokens, tokenType); + } + return tokens; + } + + private void addToken( + final StringBuilder sb, final ArrayList<Token> tokens, final int tokenType) { + String str = sb.toString(); + tokens.add(new Token(tokenType, str, str)); + sb.setLength(0); + } +} |