|
1 |
| -# phonetic_english.h - phonetic transformation rules for use with phonetic.c |
2 |
| -# Copyright (C) 2000 Bjoern Jacke |
3 |
| -# |
4 |
| -# This rule set is based on Lawrence Phillips original metaphone |
5 |
| -# algorithm with modifications made by Michael Kuhn in his |
6 |
| -# C implantation, more modifications by Bjoern Jacke when |
7 |
| -# converting the algorithm to a rule set and minor |
8 |
| -# touch ups by Kevin Atkinson |
9 |
| -# |
10 |
| -# This library is free software; you can redistribute it and/or |
11 |
| -# modify it under the terms of the GNU Lesser General Public |
12 |
| -# License version 2.1 as published by the Free Software Foundation; |
13 |
| -# |
14 |
| -# This library is distributed in the hope that it will be useful, |
15 |
| -# but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 |
| -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 |
| -# Lesser General Public License for more details. |
18 |
| -# |
19 |
| -# You should have received a copy of the GNU Lesser General Public |
20 |
| -# License along with this library; if not, write to the Free Software |
21 |
| -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
22 |
| -# |
23 |
| -# Bjoern Jacke may be reached by email at bjoern.jacke@gmx.de |
24 |
| -# |
25 |
| -# Changelog: |
26 |
| -# |
27 |
| -# 2000-01-05 Bjoern Jacke <bjoern.jacke@gmx.de> |
28 |
| -# - first version with translation rules derived from |
29 |
| -# metaphone.cc distributed with aspell 0.28.3 |
30 |
| -# - "TH" is now representated as "@" because "0" is a |
31 |
| -# meta character |
32 |
| -# - removed TH(!vowel) --> T; always use TH --> # instead |
33 |
| -# - dropped "^AE" -> "E" (redundant) |
34 |
| -# - "ing" is transformed to "N", not "NK" |
35 |
| -# - "SCH(EO)" transforms to "SK" now |
36 |
| -# - added R --> SILENT if (after a vowel) and no (vowel or |
37 |
| -# "y" follows) like in "Marcy" or "abort" |
38 |
| -# - H is SILENT in RH at beginning of words |
39 |
| -# - H is SILENT if vowel leads and "Y" follows |
40 |
| -# - some ".OUGH.." --> ...F exceptions added |
41 |
| -# - "^V" transforms to "W" |
42 |
| -# 2000-01-07 Kevin Atkinson <kevinatk@home.com> |
43 |
| -# Converted from header to data file. |
44 |
| -# |
45 |
| - |
46 | 1 | version 1.1
|
47 |
| - |
48 |
| -AH(AEIOUY)-^ *H |
49 |
| -AR(AEIOUY)-^ *R |
50 |
| -A(HR)^ * |
51 |
| -A^ * |
52 |
| -AH(AEIOUY)- H |
53 |
| -AR(AEIOUY)- R |
54 |
| -A(HR) _ |
55 |
| -BB- _ |
56 |
| -B B |
57 |
| -CQ- _ |
58 |
| -CIA X |
59 |
| -CH X |
60 |
| -C(EIY)- S |
61 |
| -CK K |
62 |
| -COUGH^ KF |
63 |
| -CC< C |
64 |
| -C K |
65 |
| -DG(EIY) K |
66 |
| -DD- _ |
67 |
| -D T |
68 |
| -É< E |
69 |
| -EH(AEIOUY)-^ *H |
70 |
| -ER(AEIOUY)-^ *R |
71 |
| -E(HR)^ * |
72 |
| -ENOUGH^$ *NF |
73 |
| -E^ * |
74 |
| -EH(AEIOUY)- H |
75 |
| -ER(AEIOUY)- R |
76 |
| -E(HR) _ |
77 |
| -FF- _ |
78 |
| -F F |
79 |
| -GN^ N |
80 |
| -GN$ N |
81 |
| -GNS$ NS |
82 |
| -GNED$ N |
83 |
| -GH(AEIOUY)- K |
84 |
| -GH _ |
85 |
| -GG9 K |
86 |
| -G K |
87 |
| -H H |
88 |
| -IH(AEIOUY)-^ *H |
89 |
| -IR(AEIOUY)-^ *R |
90 |
| -I(HR)^ * |
91 |
| -I^ * |
92 |
| -ING6 N |
93 |
| -IH(AEIOUY)- H |
94 |
| -IR(AEIOUY)- R |
95 |
| -I(HR) _ |
96 |
| -J K |
97 |
| -KN^ N |
98 |
| -KK- _ |
99 |
| -K K |
100 |
| -LAUGH^ LF |
101 |
| -LL- _ |
102 |
| -L L |
103 |
| -MB$ M |
104 |
| -MM M |
105 |
| -M M |
106 |
| -NN- _ |
107 |
| -N N |
108 |
| -OH(AEIOUY)-^ *H |
109 |
| -OR(AEIOUY)-^ *R |
110 |
| -O(HR)^ * |
111 |
| -O^ * |
112 |
| -OH(AEIOUY)- H |
113 |
| -OR(AEIOUY)- R |
114 |
| -O(HR) _ |
115 |
| -PH F |
116 |
| -PN^ N |
117 |
| -PP- _ |
118 |
| -P P |
119 |
| -Q K |
120 |
| -RH^ R |
121 |
| -ROUGH^ RF |
122 |
| -RR- _ |
123 |
| -R R |
124 |
| -SCH(EOU)- SK |
125 |
| -SC(IEY)- S |
126 |
| -SH X |
127 |
| -SI(AO)- X |
128 |
| -SS- _ |
129 |
| -S S |
130 |
| -TI(AO)- X |
131 |
| -TH @ |
132 |
| -TCH-- _ |
133 |
| -TOUGH^ TF |
134 |
| -TT- _ |
135 |
| -T T |
136 |
| -UH(AEIOUY)-^ *H |
137 |
| -UR(AEIOUY)-^ *R |
138 |
| -U(HR)^ * |
139 |
| -U^ * |
140 |
| -UH(AEIOUY)- H |
141 |
| -UR(AEIOUY)- R |
142 |
| -U(HR) _ |
143 |
| -V^ W |
144 |
| -V F |
145 |
| -WR^ R |
146 |
| -WH^ W |
147 |
| -W(AEIOU)- W |
148 |
| -X^ S |
149 |
| -X KS |
150 |
| -Y(AEIOU)- Y |
151 |
| -ZZ- _ |
152 |
| -Z S |
153 |
| - |
154 |
| -#The rules in a different view: |
155 |
| -# |
156 |
| -# Exceptions: |
157 |
| -# |
158 |
| -# Beginning of word: "gn", "kn-", "pn-", "wr-" ----> drop first letter |
159 |
| -# "Aebersold", "Gnagy", "Knuth", "Pniewski", "Wright" |
160 |
| -# |
161 |
| -# Beginning of word: "x" ----> change to "s" |
162 |
| -# as in "Deng Xiaopeng" |
163 |
| -# |
164 |
| -# Beginning of word: "wh-" ----> change to "w" |
165 |
| -# as in "Whalen" |
166 |
| -# Beginning of word: leading vowels are transformed to "*" |
167 |
| -# |
168 |
| -# "[crt]ough" and "enough" are handled separately because of "F" sound |
169 |
| -# |
170 |
| -# |
171 |
| -# A --> A at beginning |
172 |
| -# _ otherwise |
173 |
| -# |
174 |
| -# B --> B unless at the end of word after "m", as in "dumb", "McComb" |
175 |
| -# |
176 |
| -# C --> X (sh) if "-cia-" or "-ch-" |
177 |
| -# S if "-ci-", "-ce-", or "-cy-" |
178 |
| -# SILENT if "-sci-", "-sce-", or "-scy-", or "-cq-" |
179 |
| -# K otherwise, including in "-sch-" |
180 |
| -# |
181 |
| -# D --> K if in "-dge-", "-dgy-", or "-dgi-" |
182 |
| -# T otherwise |
183 |
| -# |
184 |
| -# E --> A at beginnig |
185 |
| -# _ SILENT otherwise |
186 |
| -# |
187 |
| -# F --> F |
188 |
| -# |
189 |
| -# G --> SILENT if in "-gh-" and not at end or before a vowel |
190 |
| -# in "-gn" or "-gned" or "-gns" |
191 |
| -# in "-dge-" etc., as in above rule |
192 |
| -# K if before "i", or "e", or "y" if not double "gg" |
193 |
| -# |
194 |
| -# K otherwise (incl. "GG"!) |
195 |
| -# |
196 |
| -# H --> SILENT if after vowel and no vowel or "Y" follows |
197 |
| -# or after "-ch-", "-sh-", "-ph-", "-th-", "-gh-" |
198 |
| -# or after "rh-" at beginning |
199 |
| -# H otherwise |
200 |
| -# |
201 |
| -# I --> A at beginning |
202 |
| -# _ SILENT otherwise |
203 |
| -# |
204 |
| -# J --> K |
205 |
| -# |
206 |
| -# K --> SILENT if after "c" |
207 |
| -# K otherwise |
208 |
| -# |
209 |
| -# L --> L |
210 |
| -# |
211 |
| -# M --> M |
212 |
| -# |
213 |
| -# N --> N |
214 |
| -# |
215 |
| -# O --> A at beginning |
216 |
| -# _ SILENT otherwise |
217 |
| -# |
218 |
| -# P --> F if before "h" |
219 |
| -# P otherwise |
220 |
| -# |
221 |
| -# Q --> K |
222 |
| -# |
223 |
| -# R --> SILENT if after vowel and no vowel or "Y" follows |
224 |
| -# R otherwise |
225 |
| -# |
226 |
| -# S --> X (sh) if before "h" or in "-sio-" or "-sia-" |
227 |
| -# SK if followed by "ch(eo)" (SCH(EO)) |
228 |
| -# S otherwise |
229 |
| -# |
230 |
| -# T --> X (sh) if "-tia-" or "-tio-" |
231 |
| -# 0 (th) if before "h" |
232 |
| -# silent if in "-tch-" |
233 |
| -# T otherwise |
234 |
| -# |
235 |
| -# U --> A at beginning |
236 |
| -# _ SILENT otherwise |
237 |
| -# |
238 |
| -# V --> V if first letter of word |
239 |
| -# F otherwise |
240 |
| -# |
241 |
| -# W --> SILENT if not followed by a vowel |
242 |
| -# W if followed by a vowel |
243 |
| -# |
244 |
| -# X --> KS |
245 |
| -# |
246 |
| -# Y --> SILENT if not followed by a vowel |
247 |
| -# Y if followed by a vowel |
248 |
| -# |
249 |
| -# Z --> S |
250 |
| - |
0 commit comments