|
15 | 15 | * limitations under the License. |
16 | 16 | */ |
17 | 17 |
|
18 | | -// CONSTONANTS |
19 | | -"ph" "" "" "f" // foreign |
20 | | -"sh" "" "" "S" // foreign |
21 | | -"kh" "" "" "x" // foreign |
22 | | - |
23 | | -"gli" "" "" "(gli|l[italian])" |
24 | | -"gni" "" "" "(gni|ni[italian+french])" |
25 | | -"gn" "" "[aeou]" "(n[italian+french]|nj[italian+french]|gn) |
26 | | -"gh" "" "" "g" // It + translit. from Arabic |
27 | | -"dh" "" "" "d" // translit. from Arabic |
28 | | -"bh" "" "" "d" // translit. from Arabic |
29 | | -"th" "" "" "t" // translit. from Arabic |
30 | | -"lh" "" "" "l" // Port |
31 | | -"nh" "" "" "nj" // Port |
32 | | - |
33 | | -"ig" "[aeiou]" "" "(ig|tS[spanish])" |
34 | | -"ix" "[aeiou]" "" "S" // Sp |
35 | | -"tx" "" "" "tS" // Sp |
36 | | -"tj" "" "$" "tS" // Sp |
37 | | -"tj" "" "" "dZ" // Sp |
38 | | -"tg" "" "" "(tg|dZ[spanish])" |
39 | | - |
40 | | -"gi" "" "[aeou]" "dZ" // Italian |
41 | | -"g" "" "y" "Z" // French |
42 | | -"gg" "" "[ei]" "(gZ[portuguese+french]|dZ[italian+spanish]|x[spanish])" |
43 | | -"g" "" "[ei]" "(Z[portuguese+french]|dZ[italian+spanish]|x[spanish])" |
44 | | - |
45 | | -"guy" "" "" "gi" |
46 | | -"gue" "" "$" "(k[french]|ge)" |
47 | | -"gu" "" "[ei]" "(g|gv") // not It |
48 | | -"gu" "" "[ao]" "gv" // not It |
49 | | - |
50 | | -"ñ" "" "" "(n|nj)" |
51 | | -"ny" "" "" "nj" |
52 | | - |
53 | | -"sc" "" "[ei]" "(s|S[italian])" |
54 | | -"sç" "" "[aeiou]" "s" // not It |
55 | | -"ss" "" "" "s" |
56 | | -"ç" "" "" "s" // not It |
57 | | - |
58 | | -"ch" "" "[ei]" "(k[italian]|S[portuguese+french]|tS[spanish]|dZ[spanish])" |
59 | | -"ch" "" "" "(S|tS[spanish]|dZ[spanish])" |
60 | | - |
61 | | -"ci" "" "[aeou]" "(tS[italian]|si)" |
62 | | -"cc" "" "[eiyéèê]" "(tS[italian]|ks[portuguese+french+spanish])" |
63 | | -"c" "" "[eiyéèê]" "(tS[italian]|s[portuguese+french+spanish])" |
64 | | - //array("c" "" "[aou]" "(k|C[".($portuguese+$spanish)."])" // "C" means that the actual letter could be "ç" (cedille omitted) |
65 | | - |
66 | | -"s" "^" "" "s" |
67 | | -"s" "[aáuiíoóeéêy]" "[aáuiíoóeéêy]" "(s[spanish]|z[portuguese+french+italian])" |
68 | | -"s" "" "[dglmnrv]" "(z|Z[portuguese])" |
69 | | - |
70 | | -"z" "" "$" "(s|ts[italian]|S[portuguese])" // ts It, s/S/Z Port, s in Sp, z Fr |
71 | | -"z" "" "[bdgv]" "(z|dz[italian]|Z[portuguese])" // dz It, Z/z Port, z Sp & Fr |
72 | | -"z" "" "[ptckf]" "(s|ts[italian]|S[portuguese])" // ts It, s/S/z Port, z/s Sp |
73 | | -"z" "" "" "(z|dz[italian]|ts[italian]|s[spanish])" // ts/dz It, z Port & Fr, z/s Sp |
74 | | - |
75 | | -"que" "" "$" "(k[french]|ke)" |
76 | | -"qu" "" "[eiu]" "k" |
77 | | -"qu" "" "[ao]" "(kv|k)" // k is It |
78 | | - |
79 | | -"ex" "" "[aáuiíoóeéêy]" "(ez[portuguese]|eS[portuguese]|eks|egz)" |
80 | | -"ex" "" "[cs]" "(e[portuguese]|ek)" |
81 | | - |
82 | | -"m" "" "[cdglnrst]" "(m|n[portuguese])" |
83 | | -"m" "" "[bfpv]" "(m|n[portuguese+spanish])" |
84 | | -"m" "" "$" "(m|n[portuguese])" |
85 | | - |
86 | | -"b" "^" "" "(b|V[spanish])" |
87 | | -"v" "^" "" "(v|B[spanish])" |
88 | | - |
89 | | - // VOWELS |
90 | | -"eau" "" "" "o" // Fr |
91 | | - |
92 | | -"ouh" "" "[aioe]" "(v[french]|uh)" |
93 | | -"uh" "" "[aioe]" "(v|uh)" |
94 | | -"ou" "" "[aioe]" "v" // french |
95 | | -"uo" "" "" "(vo|o)" |
96 | | -"u" "" "[aie]" "v" |
97 | | - |
98 | | -"i" "[aáuoóeéê]" "" "j" |
99 | | -"i" "" "[aeou]" "j" |
100 | | -"y" "[aáuiíoóeéê]" "" "j" |
101 | | -"y" "" "[aeiíou]" "j" |
102 | | -"e" "" "$" "(e|E[french])" |
103 | | - |
104 | | -"ão" "" "" "(au|an)" // Port |
105 | | -"ãe" "" "" "(aj|an)" // Port |
106 | | -"ãi" "" "" "(aj|an)" // Port |
107 | | -"õe" "" "" "(oj|on)" // Port |
108 | | -"où" "" "" "u" // Fr |
109 | | -"ou" "" "" "(ou|u[french])" |
110 | | - |
111 | | -"â" "" "" "a" // Port & Fr |
112 | | -"à" "" "" "a" // Port |
113 | | -"á" "" "" "a" // Port & Sp |
114 | | -"ã" "" "" "(a|an)" // Port |
115 | | -"é" "" "" "e" |
116 | | -"ê" "" "" "e" // Port & Fr |
117 | | -"è" "" "" "e" // Sp & Fr & It |
118 | | -"í" "" "" "i" // Port & Sp |
119 | | -"î" "" "" "i" // Fr |
120 | | -"ô" "" "" "o" // Port & Fr |
121 | | -"ó" "" "" "o" // Port & Sp & It |
122 | | -"õ" "" "" "(o|on)" // Port |
123 | | -"ò" "" "" "o" // Sp & It |
124 | | -"ú" "" "" "u" // Port & Sp |
125 | | -"ü" "" "" "u" // Port & Sp |
126 | | - |
127 | | - // LATIN ALPHABET |
128 | | -"a" "" "" "a" |
129 | | -"b" "" "" "(b|v[spanish])" |
130 | | -"c" "" "" "k" |
131 | | -"d" "" "" "d" |
132 | | -"e" "" "" "e" |
133 | | -"f" "" "" "f" |
134 | | -"g" "" "" "g" |
135 | | -"h" "" "" "h" |
136 | | -"i" "" "" "i" |
137 | | -"j" "" "" "(x[spanish]|Z)" // not It |
138 | | -"k" "" "" "k" |
139 | | -"l" "" "" "l" |
140 | | -"m" "" "" "m" |
141 | | -"n" "" "" "n" |
142 | | -"o" "" "" "o" |
143 | | -"p" "" "" "p" |
144 | | -"q" "" "" "k" |
145 | | -"r" "" "" "r" |
146 | | -"s" "" "" "(s|S[portuguese])" |
147 | | -"t" "" "" "t" |
148 | | -"u" "" "" "u" |
149 | | -"v" "" "" "(v|b[spanish])" |
150 | | -"w" "" "" "v" // foreign |
151 | | -"x" "" "" "(ks|gz|S[portuguese+spanish])" // S/ks Port & Sp, gz Sp, It only ks |
152 | | -"y" "" "" "i" |
153 | | -"z" "" "" "z" |
| 18 | +// ASHKENAZIC |
| 19 | + |
| 20 | +// A, E, I, O, P, U should create variants, but a, e, i, o, u should not create any new variant |
| 21 | +// Q = ü ; Y = ä = ö |
| 22 | +// H = initial "H" in German/English |
| 23 | + |
| 24 | +// CONSONANTS |
| 25 | +"b" "" "" "(b|v[spanish])" |
| 26 | +"J" "" "" "z" // Argentina Spanish: "ll" = /Z/, but approximately /Z/ = /z/ |
| 27 | + |
| 28 | +// VOWELS |
| 29 | +// "ALL" DIPHTHONGS are interchangeable BETWEEN THEM and with monophthongs of which they are composed ("D" means "diphthong") |
| 30 | +// {a,o} are totally interchangeable if non-stressed; in German "a/o" can actually be from "ä/ö" (that are equivalent to "e") |
| 31 | +// {i,e} are interchangeable if non-stressed, while in German "u" can actually be from "ü" (that is equivalent to "i") |
| 32 | + |
| 33 | +"aiB" "" "[bp]" "(D|Dm)" |
| 34 | +"AiB" "" "[bp]" "(D|Dm)" |
| 35 | +"oiB" "" "[bp]" "(D|Dm)" |
| 36 | +"OiB" "" "[bp]" "(D|Dm)" |
| 37 | +"uiB" "" "[bp]" "(D|Dm)" |
| 38 | +"UiB" "" "[bp]" "(D|Dm)" |
| 39 | +"eiB" "" "[bp]" "(D|Dm)" |
| 40 | +"EiB" "" "[bp]" "(D|Dm)" |
| 41 | +"iiB" "" "[bp]" "(D|Dm)" |
| 42 | +"IiB" "" "[bp]" "(D|Dm)" |
| 43 | + |
| 44 | +"aiB" "" "[dgkstvz]" "(D|Dn)" |
| 45 | +"AiB" "" "[dgkstvz]" "(D|Dn)" |
| 46 | +"oiB" "" "[dgkstvz]" "(D|Dn)" |
| 47 | +"OiB" "" "[dgkstvz]" "(D|Dn)" |
| 48 | +"uiB" "" "[dgkstvz]" "(D|Dn)" |
| 49 | +"UiB" "" "[dgkstvz]" "(D|Dn)" |
| 50 | +"eiB" "" "[dgkstvz]" "(D|Dn)" |
| 51 | +"EiB" "" "[dgkstvz]" "(D|Dn)" |
| 52 | +"iiB" "" "[dgkstvz]" "(D|Dn)" |
| 53 | +"IiB" "" "[dgkstvz]" "(D|Dn)" |
| 54 | + |
| 55 | +"B" "" "[bp]" "(o|om[polish]|im[polish])" |
| 56 | +"B" "" "[dgkstvz]" "(a|o|on[polish]|in[polish])" |
| 57 | +"B" "" "" "(a|o)" |
| 58 | + |
| 59 | +"aiF" "" "[bp]" "(D|Dm)" |
| 60 | +"AiF" "" "[bp]" "(D|Dm)" |
| 61 | +"oiF" "" "[bp]" "(D|Dm)" |
| 62 | +"OiF" "" "[bp]" "(D|Dm)" |
| 63 | +"uiF" "" "[bp]" "(D|Dm)" |
| 64 | +"UiF" "" "[bp]" "(D|Dm)" |
| 65 | +"eiF" "" "[bp]" "(D|Dm)" |
| 66 | +"EiF" "" "[bp]" "(D|Dm)" |
| 67 | +"iiF" "" "[bp]" "(D|Dm)" |
| 68 | +"IiF" "" "[bp]" "(D|Dm)" |
| 69 | + |
| 70 | +"aiF" "" "[dgkstvz]" "(D|Dn)" |
| 71 | +"AiF" "" "[dgkstvz]" "(D|Dn)" |
| 72 | +"oiF" "" "[dgkstvz]" "(D|Dn)" |
| 73 | +"OiF" "" "[dgkstvz]" "(D|Dn)" |
| 74 | +"uiF" "" "[dgkstvz]" "(D|Dn)" |
| 75 | +"UiF" "" "[dgkstvz]" "(D|Dn)" |
| 76 | +"eiF" "" "[dgkstvz]" "(D|Dn)" |
| 77 | +"EiF" "" "[dgkstvz]" "(D|Dn)" |
| 78 | +"iiF" "" "[dgkstvz]" "(D|Dn)" |
| 79 | +"IiF" "" "[dgkstvz]" "(D|Dn)" |
| 80 | + |
| 81 | +"F" "" "[bp]" "(i|im[polish]|om[polish])" |
| 82 | +"F" "" "[dgkstvz]" "(i|in[polish]|on[polish])" |
| 83 | +"F" "" "" "i" |
| 84 | + |
| 85 | +"P" "" "" "(o|u)" |
| 86 | + |
| 87 | +"I" "[aeiouAEIBFOUQY]" "" "i" |
| 88 | +"I" "" "[^aeiouAEBFIOU]e" "(Q[german]|i|D[english])" // "line" |
| 89 | +"I" "" "$" "i" |
| 90 | +"I" "" "[^k]$" "i" |
| 91 | +"Ik" "[lr]" "$" "(ik|Qk[german])" |
| 92 | +"Ik" "" "$" "ik" |
| 93 | +"sIts" "" "$" "(sits|sQts[german])" |
| 94 | +"Its" "" "$" "its" |
| 95 | +"I" "" "" "(Q[german]|i)" |
| 96 | + |
| 97 | +"lE" "[bdfgkmnprsStvzZ]" "$" "(li|il[english])" // Apple < Appel |
| 98 | +"lE" "[bdfgkmnprsStvzZ]" "" "(li|il[english]|lY[german])" // Applebaum < Appelbaum |
| 99 | + |
| 100 | +"au" "" "" "(D|a|u)" |
| 101 | +"ou" "" "" "(D|o|u)" |
| 102 | + |
| 103 | +"ai" "" "" "(D|a|i)" |
| 104 | +"Ai" "" "" "(D|a|i)" |
| 105 | +"oi" "" "" "(D|o|i)" |
| 106 | +"Oi" "" "" "(D|o|i)" |
| 107 | +"ui" "" "" "(D|u|i)" |
| 108 | +"Ui" "" "" "(D|u|i)" |
| 109 | +"ei" "" "" "(D|i)" |
| 110 | +"Ei" "" "" "(D|i)" |
| 111 | + |
| 112 | +"iA" "" "$" "(ia|io)" |
| 113 | +"iA" "" "" "(ia|io|iY[german])" |
| 114 | +"A" "" "[^aeiouAEBFIOU]e" "(a|o|Y[german]|D[english])" // "plane" |
| 115 | + |
| 116 | +"E" "i[^aeiouAEIOU]" "" "(i|Y[german]|[english])" // Wineberg (vineberg/vajneberg) --> vajnberg |
| 117 | +"E" "a[^aeiouAEIOU]" "" "(i|Y[german]|[english])" // Shaneberg (shaneberg/shejneberg) --> shejnberg |
| 118 | + |
| 119 | +"e" "" "[fklmnprstv]$" "i" |
| 120 | +"e" "" "ts$" "i" |
| 121 | +"e" "" "$" "i" |
| 122 | +"e" "[DaoiuAOIUQY]" "" "i" |
| 123 | +"e" "" "[aoAOQY]" "i" |
| 124 | +"e" "" "" "(i|Y[german])" |
| 125 | + |
| 126 | +"E" "" "[fklmnprst]$" "i" |
| 127 | +"E" "" "ts$" "i" |
| 128 | +"E" "" "$" "i" |
| 129 | +"E" "[DaoiuAOIUQY]" "" "i" |
| 130 | +"E" "" "[aoAOQY]" "i" |
| 131 | +"E" "" "" "(i|Y[german])" |
| 132 | + |
| 133 | +"a" "" "" "(a|o)" |
| 134 | + |
| 135 | +"O" "" "[fklmnprstv]$" "o" |
| 136 | +"O" "" "ts$" "o" |
| 137 | +"O" "" "$" "o" |
| 138 | +"O" "[oeiuQY]" "" "o" |
| 139 | +"O" "" "" "(o|Y[german])" |
| 140 | + |
| 141 | +"A" "" "[fklmnprst]$" "(a|o)" |
| 142 | +"A" "" "ts$" "(a|o)" |
| 143 | +"A" "" "$" "(a|o)" |
| 144 | +"A" "[oeiuQY]" "" "(a|o)" |
| 145 | +"A" "" "" "(a|o|Y[german])" |
| 146 | + |
| 147 | +"U" "" "$" "u" |
| 148 | +"U" "[DoiuQY]" "" "u" |
| 149 | +"U" "" "[^k]$" "u" |
| 150 | +"Uk" "[lr]" "$" "(uk|Qk[german])" |
| 151 | +"Uk" "" "$" "uk" |
| 152 | + |
| 153 | +"sUts" "" "$" "(suts|sQts[german])" |
| 154 | +"Uts" "" "$" "uts" |
| 155 | +"U" "" "" "(u|Q[german])" |
0 commit comments