aboutsummaryrefslogtreecommitdiffstats
path: root/common/transforms/Persian-Latin-BGN.xml
blob: c7ee6c17923e5f955422d338650a8df12ef15954 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
<!--
Copyright © 1991-2013 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<supplementalData>
	<version number="$Revision: 13787 $"/>
	<transforms>
		<transform source="fa" target="fa_Latn" variant="BGN" direction="forward" draft="contributed" alias="Persian-Latin/BGN fa-Latn-t-fa-m0-bgn">
			<tRule><![CDATA[
#
########################################################################
# BGN/PCGN 1956 System
#
# This system was adopted by the BGN in 1946 and by the PCGN in 1958.
# It is used for the romanization of geographic names in Iran and
# for Persian-language names in Afghanistan.
#
# Originally prepared by Michael Everson <everson@evertype.com>
########################################################################
#
# MINIMAL FILTER: Persian-Latin
#

:: [[:arabic:][:block=ARABIC:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهویيَُِّْ٠١٢٣٤٥٦٧٨٩پچژگی]] ;
:: NFKD (NFC) ;
#
#
########################################################################


#
########################################################################
#
# Define All Transformation Variables
#
########################################################################
#

$alef = ’;
$ayin = ‘;
$disambig =  ̱ ;
#
#
# Use this $wordBoundary until bug 2034 is fixed in ICU:
# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest
#

$wordBoundary =  [^[:L:][:M:][:N:]] ;
#
#
########################################################################

# non-letters
[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR
[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR
٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR
٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR
#  ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate
، ↔ ',' ; # ARABIC COMMA
؛ ↔ ';' ; # ARABIC SEMICOLON
؟ ↔ '?' ; # ARABIC QUESTION MARK
٪ ↔ '%' ; # ARABIC PERCENT SIGN
٠ ↔ 0 $disambig ; # ARABIC-INDIC DIGIT ZERO
١ ↔ 1 $disambig ; # ARABIC-INDIC DIGIT ONE
٢ ↔ 2 $disambig ; # ARABIC-INDIC DIGIT TWO
٣ ↔ 3 $disambig ; # ARABIC-INDIC DIGIT THREE
٤ ↔ 4 $disambig ; # ARABIC-INDIC DIGIT FOUR
٥ ↔ 5 $disambig ; # ARABIC-INDIC DIGIT FIVE
٦ ↔ 6 $disambig ; # ARABIC-INDIC DIGIT SIX
٧ ↔ 7 $disambig ; # ARABIC-INDIC DIGIT SEVEN
٨ ↔ 8 $disambig ; # ARABIC-INDIC DIGIT EIGHT
٩ ↔ 9 $disambig ; # ARABIC-INDIC DIGIT NINE
۰ ↔ 0 ; # EXTENDED ARABIC-INDIC DIGIT ZERO
۱ ↔ 1 ; # EXTENDED ARABIC-INDIC DIGIT ONE
۲ ↔ 2 ; # EXTENDED ARABIC-INDIC DIGIT TWO
۳ ↔ 3 ; # EXTENDED ARABIC-INDIC DIGIT THREE
۴ ↔ 4 ; # EXTENDED ARABIC-INDIC DIGIT FOUR
۵ ↔ 5 ; # EXTENDED ARABIC-INDIC DIGIT FIVE
۶ ↔ 6 ; # EXTENDED ARABIC-INDIC DIGIT SIX
۷ ↔ 7 ; # EXTENDED ARABIC-INDIC DIGIT SEVEN
۸ ↔ 8 ; # EXTENDED ARABIC-INDIC DIGIT EIGHT
۹ ↔ 9 ; # EXTENDED ARABIC-INDIC DIGIT NINE

#
########################################################################
#
# Rules moved to front to avoid masking
#
########################################################################


#
########################################################################
#
# BGN Page 89 Rule 4
#
# The character sequences كه , زه , سه , and گه may be romanized k·h, z·h,
# s·h, and g·h in order to differentiate those romanizations from the
# digraphs kh, zh, sh, and gh.
#
########################################################################
#

كه → k·h ; # ARABIC LETTER KAF + HEH
زه → z·h ; # ARABIC LETTER ZAIN + HEH
سه → s·h ; # ARABIC LETTER SEEN + HEH
گه → g·h ; # ARABIC LETTER GAF + HEH
#
#
########################################################################
#
# End Rule 4
#
########################################################################



#
########################################################################
#
# BGN Page 91 Rule 7
#
# Doubles consonant sounds are represented in Arabic script by
# placing a shaddah ( ّ ) over a consonant character. In romanization
# the letter should be doubled. [The remainder of this rule deals with
# the definite article and is lexical.]
#
########################################################################
#

بّ → bb ; # ARABIC LETTER BEH + SHADDA
پّ → pp ; # ARABIC LETTER PEH + SHADDA
تّ → tt ; # ARABIC LETTER TEH + SHADDA
ثّ → s̄s̄ ; # ARABIC LETTER THEH + SHADDA
جّ → jj ; # ARABIC LETTER JEEM + SHADDA
چّ → chch ; # ARABIC LETTER TCHEH + SHADDA
حّ → ḥḥ ; # ARABIC LETTER HAH + SHADDA
خّ → khkh ; # ARABIC LETTER KHAH + SHADDA
دّ → dd ; # ARABIC LETTER DAL + SHADDA
ذّ → z̄z̄ ; # ARABIC LETTER THAL + SHADDA
رّ → rr ; # ARABIC LETTER REH + SHADDA
زّ → zz ; # ARABIC LETTER ZAIN + SHADDA
ژّ → zhzh ; # ARABIC LETTER JEH + SHADDA
سّ → ss ; # ARABIC LETTER SEEN + SHADDA
شّ → shsh ; # ARABIC LETTER SHEEN + SHADDA
صّ → ṣṣ ; # ARABIC LETTER SAD + SHADDA
ضّ → ḍḍ ; # ARABIC LETTER DAD + SHADDA
طّ → ṭṭ ; # ARABIC LETTER TAH + SHADDA
ظّ → ẓẓ ; # ARABIC LETTER ZAH + SHADDA
عّ → $ayin $ayin ; # ARABIC LETTER AIN + SHADDA
غّ → ghgh ; # ARABIC LETTER GHAIN + SHADDA
فّ → ff ; # ARABIC LETTER FEH + SHADDA
قّ → qq ; # ARABIC LETTER QAF + SHADDA
كّ → kk ; # ARABIC LETTER KAF + SHADDA
لّ → ll ; # ARABIC LETTER LAM + SHADDA
مّ → mm ; # ARABIC LETTER MEEM + SHADDA
نّ → nn ; # ARABIC LETTER NOON + SHADDA
هّ → hh ; # ARABIC LETTER HEH + SHADDA
وّ → ww ; # ARABIC LETTER WAW + SHADDA
یّ → yy ; # ARABIC LETTER FARSI YEH + SHADDA
#
#
########################################################################
#
# End Rule 7
#
########################################################################



#
########################################################################
#
# Start of Transformations
#
########################################################################
#

$wordBoundary{ء →  ; # ARABIC LETTER HAMZA
ء → $alef ; # ARABIC LETTER HAMZA
$wordBoundary{ا →  ; # ARABIC LETTER ALEF
آ → $alef ā ; # ARABIC FATHA ALEF WITH MADDA ABOVE
ب → b ; # ARABIC LETTER BEH
پ → p ; # ARABIC LETTER PEH
ت → t ; # ARABIC LETTER TEH
ة → h ; # ARABIC LETTER TEH MARBUTA
ث → s̄ ; # ARABIC LETTER THEH
ج → j ; # ARABIC LETTER JEEM
چ → ch ; # ARABIC LETTER TCHEH
ح → ḥ ; # ARABIC LETTER HAH
خ → kh ; # ARABIC LETTER KHAH
د → d ; # ARABIC LETTER DAL
ذ → z̄ ; # ARABIC LETTER THAL
ر → r ; # ARABIC LETTER REH
ز → z ; # ARABIC LETTER ZAIN
ژ → zh ; # ARABIC LETTER JEH
س → s ; # ARABIC LETTER SEEN
ش → sh ; # ARABIC LETTER SHEEN
ص → ṣ ; # ARABIC LETTER SAD
ض → ẕ ; # ARABIC LETTER DAD
ط → ṭ ; # ARABIC LETTER TAH
ظ → ẓ ; # ARABIC LETTER ZAH
ع → $ayin ; # ARABIC LETTER AIN
غ → gh ; # ARABIC LETTER GHAIN
ف → f ; # ARABIC LETTER FEH
ق → q ; # ARABIC LETTER QAF
ک ↔ k ; # ARABIC LETTER KEHEH
ك ↔ k $disambig ; # ARABIC LETTER KAF
گ → g ; # ARABIC LETTER GAF
ل → l ; # ARABIC LETTER LAM
م → m ; # ARABIC LETTER MEEM
ن → n ; # ARABIC LETTER NOON
ه → h ; # ARABIC LETTER HEH
و → v ; # ARABIC LETTER WAW
ی → y ; # ARABIC LETTER FARSI YEH

َا → ā ; # ARABIC FATHA + ALEF
َی → á ; # ARABIC FATHA + FARSI YEH
َوْ → ow ; # ARABIC FATHA + WAW + SUKUN
َ → a ; # ARABIC FATHA

ِي → ī ; # ARABIC KASRA + YEH
ِ → e ; # ARABIC KASRA

ُو → ū ; # ARABIC DAMMA + WAW
ُ → o ; # ARABIC DAMMA

ْ →  ; # ARABIC SUKUN
::NFC (NFD) ;

#
#
########################################################################

			]]></tRule>
		</transform>
	</transforms>
</supplementalData>