generator.py 86 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296
  1. #============================================================================
  2. # This file is part of Pwman3.
  3. #
  4. # Pwman3 is free software; you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License, version 2
  6. # as published by the Free Software Foundation;
  7. #
  8. # Pwman3 is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with Pwman3; if not, write to the Free Software
  15. # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  16. #============================================================================
  17. # Copyright (C) 2006 Ivan Kelly <ivan@ivankelly.net>
  18. #============================================================================
  19. """
  20. Functions to generate passwords.
  21. Based heavily on passogva.py (c) 2004 Mo-Tsuki, LLC.
  22. http://dev.mosuki.com/passogva/
  23. Usage:
  24. import pwman.util.generator as PwGen
  25. minlen = 6
  26. maxlen = 8
  27. (word, hypenated_word) = PwGen.generate_password(minlen, maxlen)
  28. """
  29. import random
  30. class PasswordGenerationException(Exception):
  31. def __init__(self, message):
  32. self.message = message
  33. def __str__(self):
  34. return self.message
  35. def generate_password(minlen, maxlen, capitals = True, symbols = False, numerics = False):
  36. (password, hyphenated) = generate_password_shazel(minlen, maxlen)
  37. if (capitals):
  38. password = randomly_capitalize(password)
  39. if (symbols):
  40. password = leetify(password)
  41. elif (numerics):
  42. password = change_numerics(password)
  43. return (password, hyphenated)
  44. def randomly_capitalize(password):
  45. newpassword = str()
  46. for l in password:
  47. if (random.random() >= 0.5):
  48. l = l.upper()
  49. newpassword = newpassword + l
  50. return newpassword
  51. def leetify(password):
  52. newpassword = str()
  53. for l in password:
  54. if (random.random() >= 0.5):
  55. l = leetify_char(l)
  56. newpassword = newpassword + l
  57. return newpassword
  58. def change_numerics(password):
  59. newpassword = str()
  60. for l in password:
  61. if (random.random() >= 0.5):
  62. l = change_numerics_char(l)
  63. newpassword = newpassword + l
  64. return newpassword
  65. #
  66. # Dictionary of mappings for leetness
  67. #
  68. leetlist = {
  69. 'w': "\/\/", 'W': "\/\/", 'e': '3', 'E': '3', 't': '+', 'T': '7',
  70. 'i': '1', 'I': '1', 'o': '0', 'O': '0', 'A': '4', 's': '5', 'S': '$',
  71. 'g': '9', 'K': '|<', 'k': '|<', 'x': '><', 'X': '><', 'c': '<', 'C': '<',
  72. 'v': '\/', 'V': '\/', 'n': '|\|', 'N': '|\|', 'm': '|\/|', 'M': '|\/|'
  73. }
  74. def leetify_char(l):
  75. try:
  76. return leetlist[l]
  77. except KeyError:
  78. return l
  79. numericlist = {
  80. 'e': '3', 'E': '3', 'T': '7',
  81. 'i': '1', 'I': '1', 'o': '0', 'O': '0', 'A': '4', 's': '5', 'S': '5',
  82. 'g': '9', 'q': '9', 'l': '1'
  83. }
  84. def change_numerics_char(l):
  85. try:
  86. return numericlist[l]
  87. except KeyError:
  88. return l
  89. #
  90. # Beyond this point layeth Steve Hazel's code
  91. # Steven Hazel <sah@mosuki.com>
  92. #
  93. # I've added exceptions
  94. #
  95. MIN_LENGTH_PASSWORD = 6
  96. MAX_LENGTH_PASSWORD = 14
  97. grams = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
  98. 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y',
  99. 'z', 'ch', 'gh', 'ph', 'rh', 'sh', 'th', 'wh', 'qu', 'ck')
  100. vowel_grams = ('a', 'e', 'i', 'o', 'u', 'y')
  101. occurrence_frequencies = {
  102. 'a' : 10, 'b' : 8, 'c' : 12, 'd' : 12,
  103. 'e' : 12, 'f' : 8, 'g' : 8, 'h' : 6,
  104. 'i' : 10, 'j' : 8, 'k' : 8, 'l' : 6,
  105. 'm' : 6, 'n' : 10, 'o' : 10, 'p' : 6,
  106. 'r' : 10, 's' : 8, 't' : 10, 'u' : 6,
  107. 'v' : 8, 'w' : 8, 'x' : 1, 'y' : 8,
  108. 'z' : 1, 'ch' : 1, 'gh' : 1, 'ph' : 1,
  109. 'rh' : 1, 'sh' : 2, 'th' : 1, 'wh' : 1,
  110. 'qu' : 1, 'ck' : 1}
  111. numbers = []
  112. for gram in grams:
  113. for i in range(occurrence_frequencies[gram]):
  114. numbers.append(gram)
  115. vowel_numbers = []
  116. for gram in vowel_grams:
  117. for i in range(occurrence_frequencies[gram]):
  118. vowel_numbers.append(gram)
  119. #
  120. # Bit flags
  121. #
  122. MAX_UNACCEPTABLE = 20
  123. # gram rules:
  124. NOT_BEGIN_SYLLABLE = 0x08
  125. NO_FINAL_SPLIT = 0x04
  126. VOWEL = 0x02
  127. ALTERNATE_VOWEL = 0x01
  128. NO_SPECIAL_RULE = 0x00
  129. # digram rules:
  130. BEGIN = 0x80
  131. NOT_BEGIN = 0x40
  132. BREAK = 0x20
  133. PREFIX = 0x10
  134. ILLEGAL_PAIR = 0x08
  135. SUFFIX = 0x04
  136. END = 0x02
  137. NOT_END = 0x01
  138. ANY_COMBINATION = 0x00
  139. gram_rules = dict()
  140. for gram in grams:
  141. gram_rules[ gram ] = NO_SPECIAL_RULE
  142. for gram in vowel_grams:
  143. gram_rules[ gram ] = VOWEL
  144. gram_rules['e'] |= NO_FINAL_SPLIT
  145. gram_rules['y'] |= ALTERNATE_VOWEL
  146. gram_rules['x'] = NOT_BEGIN_SYLLABLE
  147. gram_rules['ck'] = NOT_BEGIN_SYLLABLE
  148. digram_rules = dict()
  149. ###############################################################################
  150. # BEGIN DIGRAM RULES
  151. ###############################################################################
  152. digram_rules['a'] = dict()
  153. digram_rules['a']['a'] = ILLEGAL_PAIR
  154. digram_rules['a']['b'] = ANY_COMBINATION
  155. digram_rules['a']['c'] = ANY_COMBINATION
  156. digram_rules['a']['d'] = ANY_COMBINATION
  157. digram_rules['a']['e'] = ILLEGAL_PAIR
  158. digram_rules['a']['f'] = ANY_COMBINATION
  159. digram_rules['a']['g'] = ANY_COMBINATION
  160. digram_rules['a']['h'] = NOT_BEGIN | BREAK | NOT_END
  161. digram_rules['a']['i'] = ANY_COMBINATION
  162. digram_rules['a']['j'] = ANY_COMBINATION
  163. digram_rules['a']['k'] = ANY_COMBINATION
  164. digram_rules['a']['l'] = ANY_COMBINATION
  165. digram_rules['a']['m'] = ANY_COMBINATION
  166. digram_rules['a']['n'] = ANY_COMBINATION
  167. digram_rules['a']['o'] = ILLEGAL_PAIR
  168. digram_rules['a']['p'] = ANY_COMBINATION
  169. digram_rules['a']['r'] = ANY_COMBINATION
  170. digram_rules['a']['s'] = ANY_COMBINATION
  171. digram_rules['a']['t'] = ANY_COMBINATION
  172. digram_rules['a']['u'] = ANY_COMBINATION
  173. digram_rules['a']['v'] = ANY_COMBINATION
  174. digram_rules['a']['w'] = ANY_COMBINATION
  175. digram_rules['a']['x'] = ANY_COMBINATION
  176. digram_rules['a']['y'] = ANY_COMBINATION
  177. digram_rules['a']['z'] = ANY_COMBINATION
  178. digram_rules['a']['ch'] = ANY_COMBINATION
  179. digram_rules['a']['gh'] = ILLEGAL_PAIR
  180. digram_rules['a']['ph'] = ANY_COMBINATION
  181. digram_rules['a']['rh'] = ILLEGAL_PAIR
  182. digram_rules['a']['sh'] = ANY_COMBINATION
  183. digram_rules['a']['th'] = ANY_COMBINATION
  184. digram_rules['a']['wh'] = ILLEGAL_PAIR
  185. digram_rules['a']['qu'] = BREAK | NOT_END
  186. digram_rules['a']['ck'] = ANY_COMBINATION
  187. digram_rules['b'] = dict()
  188. digram_rules['b']['a'] = ANY_COMBINATION
  189. digram_rules['b']['b'] = NOT_BEGIN | BREAK | NOT_END
  190. digram_rules['b']['c'] = NOT_BEGIN | BREAK | NOT_END
  191. digram_rules['b']['d'] = NOT_BEGIN | BREAK | NOT_END
  192. digram_rules['b']['e'] = ANY_COMBINATION
  193. digram_rules['b']['f'] = NOT_BEGIN | BREAK | NOT_END
  194. digram_rules['b']['g'] = NOT_BEGIN | BREAK | NOT_END
  195. digram_rules['b']['h'] = NOT_BEGIN | BREAK | NOT_END
  196. digram_rules['b']['i'] = ANY_COMBINATION
  197. digram_rules['b']['j'] = NOT_BEGIN | BREAK | NOT_END
  198. digram_rules['b']['k'] = NOT_BEGIN | BREAK | NOT_END
  199. digram_rules['b']['l'] = BEGIN | SUFFIX | NOT_END
  200. digram_rules['b']['m'] = NOT_BEGIN | BREAK | NOT_END
  201. digram_rules['b']['n'] = NOT_BEGIN | BREAK | NOT_END
  202. digram_rules['b']['o'] = ANY_COMBINATION
  203. digram_rules['b']['p'] = NOT_BEGIN | BREAK | NOT_END
  204. digram_rules['b']['r'] = BEGIN | END
  205. digram_rules['b']['s'] = NOT_BEGIN
  206. digram_rules['b']['t'] = NOT_BEGIN | BREAK | NOT_END
  207. digram_rules['b']['u'] = ANY_COMBINATION
  208. digram_rules['b']['v'] = NOT_BEGIN | BREAK | NOT_END
  209. digram_rules['b']['w'] = NOT_BEGIN | BREAK | NOT_END
  210. digram_rules['b']['x'] = ILLEGAL_PAIR
  211. digram_rules['b']['y'] = ANY_COMBINATION
  212. digram_rules['b']['z'] = NOT_BEGIN | BREAK | NOT_END
  213. digram_rules['b']['ch'] = NOT_BEGIN | BREAK | NOT_END
  214. digram_rules['b']['gh'] = ILLEGAL_PAIR
  215. digram_rules['b']['ph'] = NOT_BEGIN | BREAK | NOT_END
  216. digram_rules['b']['rh'] = ILLEGAL_PAIR
  217. digram_rules['b']['sh'] = NOT_BEGIN | BREAK | NOT_END
  218. digram_rules['b']['th'] = NOT_BEGIN | BREAK | NOT_END
  219. digram_rules['b']['wh'] = ILLEGAL_PAIR
  220. digram_rules['b']['qu'] = NOT_BEGIN | BREAK | NOT_END
  221. digram_rules['b']['ck'] = ILLEGAL_PAIR
  222. digram_rules['c'] = dict()
  223. digram_rules['c']['a'] = ANY_COMBINATION
  224. digram_rules['c']['b'] = NOT_BEGIN | BREAK | NOT_END
  225. digram_rules['c']['c'] = NOT_BEGIN | BREAK | NOT_END
  226. digram_rules['c']['d'] = NOT_BEGIN | BREAK | NOT_END
  227. digram_rules['c']['e'] = ANY_COMBINATION
  228. digram_rules['c']['f'] = NOT_BEGIN | BREAK | NOT_END
  229. digram_rules['c']['g'] = NOT_BEGIN | BREAK | NOT_END
  230. digram_rules['c']['h'] = NOT_BEGIN | BREAK | NOT_END
  231. digram_rules['c']['i'] = ANY_COMBINATION
  232. digram_rules['c']['j'] = NOT_BEGIN | BREAK | NOT_END
  233. digram_rules['c']['k'] = NOT_BEGIN | BREAK | NOT_END
  234. digram_rules['c']['l'] = SUFFIX | NOT_END
  235. digram_rules['c']['m'] = NOT_BEGIN | BREAK | NOT_END
  236. digram_rules['c']['n'] = NOT_BEGIN | BREAK | NOT_END
  237. digram_rules['c']['o'] = ANY_COMBINATION
  238. digram_rules['c']['p'] = NOT_BEGIN | BREAK | NOT_END
  239. digram_rules['c']['r'] = NOT_END
  240. digram_rules['c']['s'] = NOT_BEGIN | END
  241. digram_rules['c']['t'] = NOT_BEGIN | PREFIX
  242. digram_rules['c']['u'] = ANY_COMBINATION
  243. digram_rules['c']['v'] = NOT_BEGIN | BREAK | NOT_END
  244. digram_rules['c']['w'] = NOT_BEGIN | BREAK | NOT_END
  245. digram_rules['c']['x'] = ILLEGAL_PAIR
  246. digram_rules['c']['y'] = ANY_COMBINATION
  247. digram_rules['c']['z'] = NOT_BEGIN | BREAK | NOT_END
  248. digram_rules['c']['ch'] = ILLEGAL_PAIR
  249. digram_rules['c']['gh'] = ILLEGAL_PAIR
  250. digram_rules['c']['ph'] = NOT_BEGIN | BREAK | NOT_END
  251. digram_rules['c']['rh'] = ILLEGAL_PAIR
  252. digram_rules['c']['sh'] = NOT_BEGIN | BREAK | NOT_END
  253. digram_rules['c']['th'] = NOT_BEGIN | BREAK | NOT_END
  254. digram_rules['c']['wh'] = ILLEGAL_PAIR
  255. digram_rules['c']['qu'] = NOT_BEGIN | SUFFIX | NOT_END
  256. digram_rules['c']['ck'] = ILLEGAL_PAIR
  257. digram_rules['d'] = dict()
  258. digram_rules['d']['a'] = ANY_COMBINATION
  259. digram_rules['d']['b'] = NOT_BEGIN | BREAK | NOT_END
  260. digram_rules['d']['c'] = NOT_BEGIN | BREAK | NOT_END
  261. digram_rules['d']['d'] = NOT_BEGIN
  262. digram_rules['d']['e'] = ANY_COMBINATION
  263. digram_rules['d']['f'] = NOT_BEGIN | BREAK | NOT_END
  264. digram_rules['d']['g'] = NOT_BEGIN | BREAK | NOT_END
  265. digram_rules['d']['h'] = NOT_BEGIN | BREAK | NOT_END
  266. digram_rules['d']['i'] = ANY_COMBINATION
  267. digram_rules['d']['j'] = NOT_BEGIN | BREAK | NOT_END
  268. digram_rules['d']['k'] = NOT_BEGIN | BREAK | NOT_END
  269. digram_rules['d']['l'] = NOT_BEGIN | BREAK | NOT_END
  270. digram_rules['d']['m'] = NOT_BEGIN | BREAK | NOT_END
  271. digram_rules['d']['n'] = NOT_BEGIN | BREAK | NOT_END
  272. digram_rules['d']['o'] = ANY_COMBINATION
  273. digram_rules['d']['p'] = NOT_BEGIN | BREAK | NOT_END
  274. digram_rules['d']['r'] = BEGIN | NOT_END
  275. digram_rules['d']['s'] = NOT_BEGIN | END
  276. digram_rules['d']['t'] = NOT_BEGIN | BREAK | NOT_END
  277. digram_rules['d']['u'] = ANY_COMBINATION
  278. digram_rules['d']['v'] = NOT_BEGIN | BREAK | NOT_END
  279. digram_rules['d']['w'] = NOT_BEGIN | BREAK | NOT_END
  280. digram_rules['d']['x'] = ILLEGAL_PAIR
  281. digram_rules['d']['y'] = ANY_COMBINATION
  282. digram_rules['d']['z'] = NOT_BEGIN | BREAK | NOT_END
  283. digram_rules['d']['ch'] = NOT_BEGIN | BREAK | NOT_END
  284. digram_rules['d']['gh'] = NOT_BEGIN | BREAK | NOT_END
  285. digram_rules['d']['ph'] = NOT_BEGIN | BREAK | NOT_END
  286. digram_rules['d']['rh'] = ILLEGAL_PAIR
  287. digram_rules['d']['sh'] = NOT_BEGIN | NOT_END
  288. digram_rules['d']['th'] = NOT_BEGIN | PREFIX
  289. digram_rules['d']['wh'] = ILLEGAL_PAIR
  290. digram_rules['d']['qu'] = NOT_BEGIN | BREAK | NOT_END
  291. digram_rules['d']['ck'] = ILLEGAL_PAIR
  292. digram_rules['e'] = dict()
  293. digram_rules['e']['a'] = ANY_COMBINATION
  294. digram_rules['e']['b'] = ANY_COMBINATION
  295. digram_rules['e']['c'] = ANY_COMBINATION
  296. digram_rules['e']['d'] = ANY_COMBINATION
  297. digram_rules['e']['e'] = ANY_COMBINATION
  298. digram_rules['e']['f'] = ANY_COMBINATION
  299. digram_rules['e']['g'] = ANY_COMBINATION
  300. digram_rules['e']['h'] = NOT_BEGIN | BREAK | NOT_END
  301. digram_rules['e']['i'] = NOT_END
  302. digram_rules['e']['j'] = ANY_COMBINATION
  303. digram_rules['e']['k'] = ANY_COMBINATION
  304. digram_rules['e']['l'] = ANY_COMBINATION
  305. digram_rules['e']['m'] = ANY_COMBINATION
  306. digram_rules['e']['n'] = ANY_COMBINATION
  307. digram_rules['e']['o'] = BREAK
  308. digram_rules['e']['p'] = ANY_COMBINATION
  309. digram_rules['e']['r'] = ANY_COMBINATION
  310. digram_rules['e']['s'] = ANY_COMBINATION
  311. digram_rules['e']['t'] = ANY_COMBINATION
  312. digram_rules['e']['u'] = ANY_COMBINATION
  313. digram_rules['e']['v'] = ANY_COMBINATION
  314. digram_rules['e']['w'] = ANY_COMBINATION
  315. digram_rules['e']['x'] = ANY_COMBINATION
  316. digram_rules['e']['y'] = ANY_COMBINATION
  317. digram_rules['e']['z'] = ANY_COMBINATION
  318. digram_rules['e']['ch'] = ANY_COMBINATION
  319. digram_rules['e']['gh'] = NOT_BEGIN | BREAK | NOT_END
  320. digram_rules['e']['ph'] = ANY_COMBINATION
  321. digram_rules['e']['rh'] = ILLEGAL_PAIR
  322. digram_rules['e']['sh'] = ANY_COMBINATION
  323. digram_rules['e']['th'] = ANY_COMBINATION
  324. digram_rules['e']['wh'] = ILLEGAL_PAIR
  325. digram_rules['e']['qu'] = BREAK | NOT_END
  326. digram_rules['e']['ck'] = ANY_COMBINATION
  327. digram_rules['f'] = dict()
  328. digram_rules['f']['a'] = ANY_COMBINATION
  329. digram_rules['f']['b'] = NOT_BEGIN | BREAK | NOT_END
  330. digram_rules['f']['c'] = NOT_BEGIN | BREAK | NOT_END
  331. digram_rules['f']['d'] = NOT_BEGIN | BREAK | NOT_END
  332. digram_rules['f']['e'] = ANY_COMBINATION
  333. digram_rules['f']['f'] = NOT_BEGIN
  334. digram_rules['f']['g'] = NOT_BEGIN | BREAK | NOT_END
  335. digram_rules['f']['h'] = NOT_BEGIN | BREAK | NOT_END
  336. digram_rules['f']['i'] = ANY_COMBINATION
  337. digram_rules['f']['j'] = NOT_BEGIN | BREAK | NOT_END
  338. digram_rules['f']['k'] = NOT_BEGIN | BREAK | NOT_END
  339. digram_rules['f']['l'] = BEGIN | SUFFIX | NOT_END
  340. digram_rules['f']['m'] = NOT_BEGIN | BREAK | NOT_END
  341. digram_rules['f']['n'] = NOT_BEGIN | BREAK | NOT_END
  342. digram_rules['f']['o'] = ANY_COMBINATION
  343. digram_rules['f']['p'] = NOT_BEGIN | BREAK | NOT_END
  344. digram_rules['f']['r'] = BEGIN | NOT_END
  345. digram_rules['f']['s'] = NOT_BEGIN
  346. digram_rules['f']['t'] = NOT_BEGIN
  347. digram_rules['f']['u'] = ANY_COMBINATION
  348. digram_rules['f']['v'] = NOT_BEGIN | BREAK | NOT_END
  349. digram_rules['f']['w'] = NOT_BEGIN | BREAK | NOT_END
  350. digram_rules['f']['x'] = ILLEGAL_PAIR
  351. digram_rules['f']['y'] = NOT_BEGIN
  352. digram_rules['f']['z'] = NOT_BEGIN | BREAK | NOT_END
  353. digram_rules['f']['ch'] = NOT_BEGIN | BREAK | NOT_END
  354. digram_rules['f']['gh'] = NOT_BEGIN | BREAK | NOT_END
  355. digram_rules['f']['ph'] = NOT_BEGIN | BREAK | NOT_END
  356. digram_rules['f']['rh'] = ILLEGAL_PAIR
  357. digram_rules['f']['sh'] = NOT_BEGIN | BREAK | NOT_END
  358. digram_rules['f']['th'] = NOT_BEGIN | BREAK | NOT_END
  359. digram_rules['f']['wh'] = ILLEGAL_PAIR
  360. digram_rules['f']['qu'] = NOT_BEGIN | BREAK | NOT_END
  361. digram_rules['f']['ck'] = ILLEGAL_PAIR
  362. digram_rules['g'] = dict()
  363. digram_rules['g']['a'] = ANY_COMBINATION
  364. digram_rules['g']['b'] = NOT_BEGIN | BREAK | NOT_END
  365. digram_rules['g']['c'] = NOT_BEGIN | BREAK | NOT_END
  366. digram_rules['g']['d'] = NOT_BEGIN | BREAK | NOT_END
  367. digram_rules['g']['e'] = ANY_COMBINATION
  368. digram_rules['g']['f'] = NOT_BEGIN | BREAK | NOT_END
  369. digram_rules['g']['g'] = NOT_BEGIN
  370. digram_rules['g']['h'] = NOT_BEGIN | BREAK | NOT_END
  371. digram_rules['g']['i'] = ANY_COMBINATION
  372. digram_rules['g']['j'] = NOT_BEGIN | BREAK | NOT_END
  373. digram_rules['g']['k'] = ILLEGAL_PAIR
  374. digram_rules['g']['l'] = BEGIN | SUFFIX | NOT_END
  375. digram_rules['g']['m'] = NOT_BEGIN | BREAK | NOT_END
  376. digram_rules['g']['n'] = NOT_BEGIN | BREAK | NOT_END
  377. digram_rules['g']['o'] = ANY_COMBINATION
  378. digram_rules['g']['p'] = NOT_BEGIN | BREAK | NOT_END
  379. digram_rules['g']['r'] = BEGIN | NOT_END
  380. digram_rules['g']['s'] = NOT_BEGIN | END
  381. digram_rules['g']['t'] = NOT_BEGIN | BREAK | NOT_END
  382. digram_rules['g']['u'] = ANY_COMBINATION
  383. digram_rules['g']['v'] = NOT_BEGIN | BREAK | NOT_END
  384. digram_rules['g']['w'] = NOT_BEGIN | BREAK | NOT_END
  385. digram_rules['g']['x'] = ILLEGAL_PAIR
  386. digram_rules['g']['y'] = NOT_BEGIN
  387. digram_rules['g']['z'] = NOT_BEGIN | BREAK | NOT_END
  388. digram_rules['g']['ch'] = NOT_BEGIN | BREAK | NOT_END
  389. digram_rules['g']['gh'] = ILLEGAL_PAIR
  390. digram_rules['g']['ph'] = NOT_BEGIN | BREAK | NOT_END
  391. digram_rules['g']['rh'] = ILLEGAL_PAIR
  392. digram_rules['g']['sh'] = NOT_BEGIN
  393. digram_rules['g']['th'] = NOT_BEGIN
  394. digram_rules['g']['wh'] = ILLEGAL_PAIR
  395. digram_rules['g']['qu'] = NOT_BEGIN | BREAK | NOT_END
  396. digram_rules['g']['ck'] = ILLEGAL_PAIR
  397. digram_rules['h'] = dict()
  398. digram_rules['h']['a'] = ANY_COMBINATION
  399. digram_rules['h']['b'] = NOT_BEGIN | BREAK | NOT_END
  400. digram_rules['h']['c'] = NOT_BEGIN | BREAK | NOT_END
  401. digram_rules['h']['d'] = NOT_BEGIN | BREAK | NOT_END
  402. digram_rules['h']['e'] = ANY_COMBINATION
  403. digram_rules['h']['f'] = NOT_BEGIN | BREAK | NOT_END
  404. digram_rules['h']['g'] = NOT_BEGIN | BREAK | NOT_END
  405. digram_rules['h']['h'] = ILLEGAL_PAIR
  406. digram_rules['h']['i'] = ANY_COMBINATION
  407. digram_rules['h']['j'] = NOT_BEGIN | BREAK | NOT_END
  408. digram_rules['h']['k'] = NOT_BEGIN | BREAK | NOT_END
  409. digram_rules['h']['l'] = NOT_BEGIN | BREAK | NOT_END
  410. digram_rules['h']['m'] = NOT_BEGIN | BREAK | NOT_END
  411. digram_rules['h']['n'] = NOT_BEGIN | BREAK | NOT_END
  412. digram_rules['h']['o'] = ANY_COMBINATION
  413. digram_rules['h']['p'] = NOT_BEGIN | BREAK | NOT_END
  414. digram_rules['h']['r'] = NOT_BEGIN | BREAK | NOT_END
  415. digram_rules['h']['s'] = NOT_BEGIN | BREAK | NOT_END
  416. digram_rules['h']['t'] = NOT_BEGIN | BREAK | NOT_END
  417. digram_rules['h']['u'] = ANY_COMBINATION
  418. digram_rules['h']['v'] = NOT_BEGIN | BREAK | NOT_END
  419. digram_rules['h']['w'] = NOT_BEGIN | BREAK | NOT_END
  420. digram_rules['h']['x'] = ILLEGAL_PAIR
  421. digram_rules['h']['y'] = ANY_COMBINATION
  422. digram_rules['h']['z'] = NOT_BEGIN | BREAK | NOT_END
  423. digram_rules['h']['ch'] = NOT_BEGIN | BREAK | NOT_END
  424. digram_rules['h']['gh'] = NOT_BEGIN | BREAK | NOT_END
  425. digram_rules['h']['ph'] = NOT_BEGIN | BREAK | NOT_END
  426. digram_rules['h']['rh'] = ILLEGAL_PAIR
  427. digram_rules['h']['sh'] = NOT_BEGIN | BREAK | NOT_END
  428. digram_rules['h']['th'] = NOT_BEGIN | BREAK | NOT_END
  429. digram_rules['h']['wh'] = ILLEGAL_PAIR
  430. digram_rules['h']['qu'] = NOT_BEGIN | BREAK | NOT_END
  431. digram_rules['h']['ck'] = ILLEGAL_PAIR
  432. digram_rules['i'] = dict()
  433. digram_rules['i']['a'] = ANY_COMBINATION
  434. digram_rules['i']['b'] = ANY_COMBINATION
  435. digram_rules['i']['c'] = ANY_COMBINATION
  436. digram_rules['i']['d'] = ANY_COMBINATION
  437. digram_rules['i']['e'] = NOT_BEGIN
  438. digram_rules['i']['f'] = ANY_COMBINATION
  439. digram_rules['i']['g'] = ANY_COMBINATION
  440. digram_rules['i']['h'] = NOT_BEGIN | BREAK | NOT_END
  441. digram_rules['i']['i'] = ILLEGAL_PAIR
  442. digram_rules['i']['j'] = ANY_COMBINATION
  443. digram_rules['i']['k'] = ANY_COMBINATION
  444. digram_rules['i']['l'] = ANY_COMBINATION
  445. digram_rules['i']['m'] = ANY_COMBINATION
  446. digram_rules['i']['n'] = ANY_COMBINATION
  447. digram_rules['i']['o'] = BREAK
  448. digram_rules['i']['p'] = ANY_COMBINATION
  449. digram_rules['i']['r'] = ANY_COMBINATION
  450. digram_rules['i']['s'] = ANY_COMBINATION
  451. digram_rules['i']['t'] = ANY_COMBINATION
  452. digram_rules['i']['u'] = NOT_BEGIN | BREAK | NOT_END
  453. digram_rules['i']['v'] = ANY_COMBINATION
  454. digram_rules['i']['w'] = NOT_BEGIN | BREAK | NOT_END
  455. digram_rules['i']['x'] = ANY_COMBINATION
  456. digram_rules['i']['y'] = NOT_BEGIN | BREAK | NOT_END
  457. digram_rules['i']['z'] = ANY_COMBINATION
  458. digram_rules['i']['ch'] = ANY_COMBINATION
  459. digram_rules['i']['gh'] = NOT_BEGIN
  460. digram_rules['i']['ph'] = ANY_COMBINATION
  461. digram_rules['i']['rh'] = ILLEGAL_PAIR
  462. digram_rules['i']['sh'] = ANY_COMBINATION
  463. digram_rules['i']['th'] = ANY_COMBINATION
  464. digram_rules['i']['wh'] = ILLEGAL_PAIR
  465. digram_rules['i']['qu'] = BREAK | NOT_END
  466. digram_rules['i']['ck'] = ANY_COMBINATION
  467. digram_rules['j'] = dict()
  468. digram_rules['j']['a'] = ANY_COMBINATION
  469. digram_rules['j']['b'] = NOT_BEGIN | BREAK | NOT_END
  470. digram_rules['j']['c'] = NOT_BEGIN | BREAK | NOT_END
  471. digram_rules['j']['d'] = NOT_BEGIN | BREAK | NOT_END
  472. digram_rules['j']['e'] = ANY_COMBINATION
  473. digram_rules['j']['f'] = NOT_BEGIN | BREAK | NOT_END
  474. digram_rules['j']['g'] = ILLEGAL_PAIR
  475. digram_rules['j']['h'] = NOT_BEGIN | BREAK | NOT_END
  476. digram_rules['j']['i'] = ANY_COMBINATION
  477. digram_rules['j']['j'] = ILLEGAL_PAIR
  478. digram_rules['j']['k'] = NOT_BEGIN | BREAK | NOT_END
  479. digram_rules['j']['l'] = NOT_BEGIN | BREAK | NOT_END
  480. digram_rules['j']['m'] = NOT_BEGIN | BREAK | NOT_END
  481. digram_rules['j']['n'] = NOT_BEGIN | BREAK | NOT_END
  482. digram_rules['j']['o'] = ANY_COMBINATION
  483. digram_rules['j']['p'] = NOT_BEGIN | BREAK | NOT_END
  484. digram_rules['j']['r'] = NOT_BEGIN | BREAK | NOT_END
  485. digram_rules['j']['s'] = NOT_BEGIN | BREAK | NOT_END
  486. digram_rules['j']['t'] = NOT_BEGIN | BREAK | NOT_END
  487. digram_rules['j']['u'] = ANY_COMBINATION
  488. digram_rules['j']['v'] = NOT_BEGIN | BREAK | NOT_END
  489. digram_rules['j']['w'] = NOT_BEGIN | BREAK | NOT_END
  490. digram_rules['j']['x'] = ILLEGAL_PAIR
  491. digram_rules['j']['y'] = NOT_BEGIN
  492. digram_rules['j']['z'] = NOT_BEGIN | BREAK | NOT_END
  493. digram_rules['j']['ch'] = NOT_BEGIN | BREAK | NOT_END
  494. digram_rules['j']['gh'] = NOT_BEGIN | BREAK | NOT_END
  495. digram_rules['j']['ph'] = NOT_BEGIN | BREAK | NOT_END
  496. digram_rules['j']['rh'] = ILLEGAL_PAIR
  497. digram_rules['j']['sh'] = NOT_BEGIN | BREAK | NOT_END
  498. digram_rules['j']['th'] = NOT_BEGIN | BREAK | NOT_END
  499. digram_rules['j']['wh'] = ILLEGAL_PAIR
  500. digram_rules['j']['qu'] = NOT_BEGIN | BREAK | NOT_END
  501. digram_rules['j']['ck'] = ILLEGAL_PAIR
  502. digram_rules['k'] = dict()
  503. digram_rules['k']['a'] = ANY_COMBINATION
  504. digram_rules['k']['b'] = NOT_BEGIN | BREAK | NOT_END
  505. digram_rules['k']['c'] = NOT_BEGIN | BREAK | NOT_END
  506. digram_rules['k']['d'] = NOT_BEGIN | BREAK | NOT_END
  507. digram_rules['k']['e'] = ANY_COMBINATION
  508. digram_rules['k']['f'] = NOT_BEGIN | BREAK | NOT_END
  509. digram_rules['k']['g'] = NOT_BEGIN | BREAK | NOT_END
  510. digram_rules['k']['h'] = NOT_BEGIN | BREAK | NOT_END
  511. digram_rules['k']['i'] = ANY_COMBINATION
  512. digram_rules['k']['j'] = NOT_BEGIN | BREAK | NOT_END
  513. digram_rules['k']['k'] = NOT_BEGIN | BREAK | NOT_END
  514. digram_rules['k']['l'] = SUFFIX | NOT_END
  515. digram_rules['k']['m'] = NOT_BEGIN | BREAK | NOT_END
  516. digram_rules['k']['n'] = BEGIN | SUFFIX | NOT_END
  517. digram_rules['k']['o'] = ANY_COMBINATION
  518. digram_rules['k']['p'] = NOT_BEGIN | BREAK | NOT_END
  519. digram_rules['k']['r'] = SUFFIX | NOT_END
  520. digram_rules['k']['s'] = NOT_BEGIN | END
  521. digram_rules['k']['t'] = NOT_BEGIN | BREAK | NOT_END
  522. digram_rules['k']['u'] = ANY_COMBINATION
  523. digram_rules['k']['v'] = NOT_BEGIN | BREAK | NOT_END
  524. digram_rules['k']['w'] = NOT_BEGIN | BREAK | NOT_END
  525. digram_rules['k']['x'] = ILLEGAL_PAIR
  526. digram_rules['k']['y'] = NOT_BEGIN
  527. digram_rules['k']['z'] = NOT_BEGIN | BREAK | NOT_END
  528. digram_rules['k']['ch'] = NOT_BEGIN | BREAK | NOT_END
  529. digram_rules['k']['gh'] = NOT_BEGIN | BREAK | NOT_END
  530. digram_rules['k']['ph'] = NOT_BEGIN | PREFIX
  531. digram_rules['k']['rh'] = ILLEGAL_PAIR
  532. digram_rules['k']['sh'] = NOT_BEGIN
  533. digram_rules['k']['th'] = NOT_BEGIN | BREAK | NOT_END
  534. digram_rules['k']['wh'] = ILLEGAL_PAIR
  535. digram_rules['k']['qu'] = NOT_BEGIN | BREAK | NOT_END
  536. digram_rules['k']['ck'] = ILLEGAL_PAIR
  537. digram_rules['l'] = dict()
  538. digram_rules['l']['a'] = ANY_COMBINATION
  539. digram_rules['l']['b'] = NOT_BEGIN | PREFIX
  540. digram_rules['l']['c'] = NOT_BEGIN | BREAK | NOT_END
  541. digram_rules['l']['d'] = NOT_BEGIN | PREFIX
  542. digram_rules['l']['e'] = ANY_COMBINATION
  543. digram_rules['l']['f'] = NOT_BEGIN | PREFIX
  544. digram_rules['l']['g'] = NOT_BEGIN | PREFIX
  545. digram_rules['l']['h'] = NOT_BEGIN | BREAK | NOT_END
  546. digram_rules['l']['i'] = ANY_COMBINATION
  547. digram_rules['l']['j'] = NOT_BEGIN | PREFIX
  548. digram_rules['l']['k'] = NOT_BEGIN | PREFIX
  549. digram_rules['l']['l'] = NOT_BEGIN | PREFIX
  550. digram_rules['l']['m'] = NOT_BEGIN | PREFIX
  551. digram_rules['l']['n'] = NOT_BEGIN | BREAK | NOT_END
  552. digram_rules['l']['o'] = ANY_COMBINATION
  553. digram_rules['l']['p'] = NOT_BEGIN | PREFIX
  554. digram_rules['l']['r'] = NOT_BEGIN | BREAK | NOT_END
  555. digram_rules['l']['s'] = NOT_BEGIN
  556. digram_rules['l']['t'] = NOT_BEGIN | PREFIX
  557. digram_rules['l']['u'] = ANY_COMBINATION
  558. digram_rules['l']['v'] = NOT_BEGIN | PREFIX
  559. digram_rules['l']['w'] = NOT_BEGIN | BREAK | NOT_END
  560. digram_rules['l']['x'] = ILLEGAL_PAIR
  561. digram_rules['l']['y'] = ANY_COMBINATION
  562. digram_rules['l']['z'] = NOT_BEGIN | BREAK | NOT_END
  563. digram_rules['l']['ch'] = NOT_BEGIN | PREFIX
  564. digram_rules['l']['gh'] = NOT_BEGIN | BREAK | NOT_END
  565. digram_rules['l']['ph'] = NOT_BEGIN | PREFIX
  566. digram_rules['l']['rh'] = ILLEGAL_PAIR
  567. digram_rules['l']['sh'] = NOT_BEGIN | PREFIX
  568. digram_rules['l']['th'] = NOT_BEGIN | PREFIX
  569. digram_rules['l']['wh'] = ILLEGAL_PAIR
  570. digram_rules['l']['qu'] = NOT_BEGIN | BREAK | NOT_END
  571. digram_rules['l']['ck'] = ILLEGAL_PAIR
  572. digram_rules['m'] = dict()
  573. digram_rules['m']['a'] = ANY_COMBINATION
  574. digram_rules['m']['b'] = NOT_BEGIN | BREAK | NOT_END
  575. digram_rules['m']['c'] = NOT_BEGIN | BREAK | NOT_END
  576. digram_rules['m']['d'] = NOT_BEGIN | BREAK | NOT_END
  577. digram_rules['m']['e'] = ANY_COMBINATION
  578. digram_rules['m']['f'] = NOT_BEGIN | BREAK | NOT_END
  579. digram_rules['m']['g'] = NOT_BEGIN | BREAK | NOT_END
  580. digram_rules['m']['h'] = NOT_BEGIN | BREAK | NOT_END
  581. digram_rules['m']['i'] = ANY_COMBINATION
  582. digram_rules['m']['j'] = NOT_BEGIN | BREAK | NOT_END
  583. digram_rules['m']['k'] = NOT_BEGIN | BREAK | NOT_END
  584. digram_rules['m']['l'] = NOT_BEGIN | BREAK | NOT_END
  585. digram_rules['m']['m'] = NOT_BEGIN
  586. digram_rules['m']['n'] = NOT_BEGIN | BREAK | NOT_END
  587. digram_rules['m']['o'] = ANY_COMBINATION
  588. digram_rules['m']['p'] = NOT_BEGIN
  589. digram_rules['m']['r'] = NOT_BEGIN | BREAK | NOT_END
  590. digram_rules['m']['s'] = NOT_BEGIN
  591. digram_rules['m']['t'] = NOT_BEGIN
  592. digram_rules['m']['u'] = ANY_COMBINATION
  593. digram_rules['m']['v'] = NOT_BEGIN | BREAK | NOT_END
  594. digram_rules['m']['w'] = NOT_BEGIN | BREAK | NOT_END
  595. digram_rules['m']['x'] = ILLEGAL_PAIR
  596. digram_rules['m']['y'] = ANY_COMBINATION
  597. digram_rules['m']['z'] = NOT_BEGIN | BREAK | NOT_END
  598. digram_rules['m']['ch'] = NOT_BEGIN | PREFIX
  599. digram_rules['m']['gh'] = NOT_BEGIN | BREAK | NOT_END
  600. digram_rules['m']['ph'] = NOT_BEGIN
  601. digram_rules['m']['rh'] = ILLEGAL_PAIR
  602. digram_rules['m']['sh'] = NOT_BEGIN
  603. digram_rules['m']['th'] = NOT_BEGIN
  604. digram_rules['m']['wh'] = ILLEGAL_PAIR
  605. digram_rules['m']['qu'] = NOT_BEGIN | BREAK | NOT_END
  606. digram_rules['m']['ck'] = ILLEGAL_PAIR
  607. digram_rules['n'] = dict()
  608. digram_rules['n']['a'] = ANY_COMBINATION
  609. digram_rules['n']['b'] = NOT_BEGIN | BREAK | NOT_END
  610. digram_rules['n']['c'] = NOT_BEGIN | BREAK | NOT_END
  611. digram_rules['n']['d'] = NOT_BEGIN
  612. digram_rules['n']['e'] = ANY_COMBINATION
  613. digram_rules['n']['f'] = NOT_BEGIN | BREAK | NOT_END
  614. digram_rules['n']['g'] = NOT_BEGIN | PREFIX
  615. digram_rules['n']['h'] = NOT_BEGIN | BREAK | NOT_END
  616. digram_rules['n']['i'] = ANY_COMBINATION
  617. digram_rules['n']['j'] = NOT_BEGIN | BREAK | NOT_END
  618. digram_rules['n']['k'] = NOT_BEGIN | PREFIX
  619. digram_rules['n']['l'] = NOT_BEGIN | BREAK | NOT_END
  620. digram_rules['n']['m'] = NOT_BEGIN | BREAK | NOT_END
  621. digram_rules['n']['n'] = NOT_BEGIN
  622. digram_rules['n']['o'] = ANY_COMBINATION
  623. digram_rules['n']['p'] = NOT_BEGIN | BREAK | NOT_END
  624. digram_rules['n']['r'] = NOT_BEGIN | BREAK | NOT_END
  625. digram_rules['n']['s'] = NOT_BEGIN
  626. digram_rules['n']['t'] = NOT_BEGIN
  627. digram_rules['n']['u'] = ANY_COMBINATION
  628. digram_rules['n']['v'] = NOT_BEGIN | BREAK | NOT_END
  629. digram_rules['n']['w'] = NOT_BEGIN | BREAK | NOT_END
  630. digram_rules['n']['x'] = ILLEGAL_PAIR
  631. digram_rules['n']['y'] = NOT_BEGIN
  632. digram_rules['n']['z'] = NOT_BEGIN | BREAK | NOT_END
  633. digram_rules['n']['ch'] = NOT_BEGIN | PREFIX
  634. digram_rules['n']['gh'] = NOT_BEGIN | BREAK | NOT_END
  635. digram_rules['n']['ph'] = NOT_BEGIN | PREFIX
  636. digram_rules['n']['rh'] = ILLEGAL_PAIR
  637. digram_rules['n']['sh'] = NOT_BEGIN
  638. digram_rules['n']['th'] = NOT_BEGIN
  639. digram_rules['n']['wh'] = ILLEGAL_PAIR
  640. digram_rules['n']['qu'] = NOT_BEGIN | BREAK | NOT_END
  641. digram_rules['n']['ck'] = NOT_BEGIN | PREFIX
  642. digram_rules['o'] = dict()
  643. digram_rules['o']['a'] = ANY_COMBINATION
  644. digram_rules['o']['b'] = ANY_COMBINATION
  645. digram_rules['o']['c'] = ANY_COMBINATION
  646. digram_rules['o']['d'] = ANY_COMBINATION
  647. digram_rules['o']['e'] = ILLEGAL_PAIR
  648. digram_rules['o']['f'] = ANY_COMBINATION
  649. digram_rules['o']['g'] = ANY_COMBINATION
  650. digram_rules['o']['h'] = NOT_BEGIN | BREAK | NOT_END
  651. digram_rules['o']['i'] = ANY_COMBINATION
  652. digram_rules['o']['j'] = ANY_COMBINATION
  653. digram_rules['o']['k'] = ANY_COMBINATION
  654. digram_rules['o']['l'] = ANY_COMBINATION
  655. digram_rules['o']['m'] = ANY_COMBINATION
  656. digram_rules['o']['n'] = ANY_COMBINATION
  657. digram_rules['o']['o'] = ANY_COMBINATION
  658. digram_rules['o']['p'] = ANY_COMBINATION
  659. digram_rules['o']['r'] = ANY_COMBINATION
  660. digram_rules['o']['s'] = ANY_COMBINATION
  661. digram_rules['o']['t'] = ANY_COMBINATION
  662. digram_rules['o']['u'] = ANY_COMBINATION
  663. digram_rules['o']['v'] = ANY_COMBINATION
  664. digram_rules['o']['w'] = ANY_COMBINATION
  665. digram_rules['o']['x'] = ANY_COMBINATION
  666. digram_rules['o']['y'] = ANY_COMBINATION
  667. digram_rules['o']['z'] = ANY_COMBINATION
  668. digram_rules['o']['ch'] = ANY_COMBINATION
  669. digram_rules['o']['gh'] = NOT_BEGIN
  670. digram_rules['o']['ph'] = ANY_COMBINATION
  671. digram_rules['o']['rh'] = ILLEGAL_PAIR
  672. digram_rules['o']['sh'] = ANY_COMBINATION
  673. digram_rules['o']['th'] = ANY_COMBINATION
  674. digram_rules['o']['wh'] = ILLEGAL_PAIR
  675. digram_rules['o']['qu'] = BREAK | NOT_END
  676. digram_rules['o']['ck'] = ANY_COMBINATION
  677. digram_rules['p'] = dict()
  678. digram_rules['p']['a'] = ANY_COMBINATION
  679. digram_rules['p']['b'] = NOT_BEGIN | BREAK | NOT_END
  680. digram_rules['p']['c'] = NOT_BEGIN | BREAK | NOT_END
  681. digram_rules['p']['d'] = NOT_BEGIN | BREAK | NOT_END
  682. digram_rules['p']['e'] = ANY_COMBINATION
  683. digram_rules['p']['f'] = NOT_BEGIN | BREAK | NOT_END
  684. digram_rules['p']['g'] = NOT_BEGIN | BREAK | NOT_END
  685. digram_rules['p']['h'] = NOT_BEGIN | BREAK | NOT_END
  686. digram_rules['p']['i'] = ANY_COMBINATION
  687. digram_rules['p']['j'] = NOT_BEGIN | BREAK | NOT_END
  688. digram_rules['p']['k'] = NOT_BEGIN | BREAK | NOT_END
  689. digram_rules['p']['l'] = SUFFIX | NOT_END
  690. digram_rules['p']['m'] = NOT_BEGIN | BREAK | NOT_END
  691. digram_rules['p']['n'] = NOT_BEGIN | BREAK | NOT_END
  692. digram_rules['p']['o'] = ANY_COMBINATION
  693. digram_rules['p']['p'] = NOT_BEGIN | PREFIX
  694. digram_rules['p']['r'] = NOT_END
  695. digram_rules['p']['s'] = NOT_BEGIN | END
  696. digram_rules['p']['t'] = NOT_BEGIN | END
  697. digram_rules['p']['u'] = NOT_BEGIN | END
  698. digram_rules['p']['v'] = NOT_BEGIN | BREAK | NOT_END
  699. digram_rules['p']['w'] = NOT_BEGIN | BREAK | NOT_END
  700. digram_rules['p']['x'] = ILLEGAL_PAIR
  701. digram_rules['p']['y'] = ANY_COMBINATION
  702. digram_rules['p']['z'] = NOT_BEGIN | BREAK | NOT_END
  703. digram_rules['p']['ch'] = NOT_BEGIN | BREAK | NOT_END
  704. digram_rules['p']['gh'] = NOT_BEGIN | BREAK | NOT_END
  705. digram_rules['p']['ph'] = NOT_BEGIN | BREAK | NOT_END
  706. digram_rules['p']['rh'] = ILLEGAL_PAIR
  707. digram_rules['p']['sh'] = NOT_BEGIN | BREAK | NOT_END
  708. digram_rules['p']['th'] = NOT_BEGIN | BREAK | NOT_END
  709. digram_rules['p']['wh'] = ILLEGAL_PAIR
  710. digram_rules['p']['qu'] = NOT_BEGIN | BREAK | NOT_END
  711. digram_rules['p']['ck'] = ILLEGAL_PAIR
  712. digram_rules['r'] = dict()
  713. digram_rules['r']['a'] = ANY_COMBINATION
  714. digram_rules['r']['b'] = NOT_BEGIN | PREFIX
  715. digram_rules['r']['c'] = NOT_BEGIN | PREFIX
  716. digram_rules['r']['d'] = NOT_BEGIN | PREFIX
  717. digram_rules['r']['e'] = ANY_COMBINATION
  718. digram_rules['r']['f'] = NOT_BEGIN | PREFIX
  719. digram_rules['r']['g'] = NOT_BEGIN | PREFIX
  720. digram_rules['r']['h'] = NOT_BEGIN | BREAK | NOT_END
  721. digram_rules['r']['i'] = ANY_COMBINATION
  722. digram_rules['r']['j'] = NOT_BEGIN | PREFIX
  723. digram_rules['r']['k'] = NOT_BEGIN | PREFIX
  724. digram_rules['r']['l'] = NOT_BEGIN | PREFIX
  725. digram_rules['r']['m'] = NOT_BEGIN | PREFIX
  726. digram_rules['r']['n'] = NOT_BEGIN | PREFIX
  727. digram_rules['r']['o'] = ANY_COMBINATION
  728. digram_rules['r']['p'] = NOT_BEGIN | PREFIX
  729. digram_rules['r']['r'] = NOT_BEGIN | PREFIX
  730. digram_rules['r']['s'] = NOT_BEGIN | PREFIX
  731. digram_rules['r']['t'] = NOT_BEGIN | PREFIX
  732. digram_rules['r']['u'] = ANY_COMBINATION
  733. digram_rules['r']['v'] = NOT_BEGIN | PREFIX
  734. digram_rules['r']['w'] = NOT_BEGIN | BREAK | NOT_END
  735. digram_rules['r']['x'] = ILLEGAL_PAIR
  736. digram_rules['r']['y'] = ANY_COMBINATION
  737. digram_rules['r']['z'] = NOT_BEGIN | PREFIX
  738. digram_rules['r']['ch'] = NOT_BEGIN | PREFIX
  739. digram_rules['r']['gh'] = NOT_BEGIN | BREAK | NOT_END
  740. digram_rules['r']['ph'] = NOT_BEGIN | PREFIX
  741. digram_rules['r']['rh'] = ILLEGAL_PAIR
  742. digram_rules['r']['sh'] = NOT_BEGIN | PREFIX
  743. digram_rules['r']['th'] = NOT_BEGIN | PREFIX
  744. digram_rules['r']['wh'] = ILLEGAL_PAIR
  745. digram_rules['r']['qu'] = NOT_BEGIN | PREFIX | NOT_END
  746. digram_rules['r']['ck'] = NOT_BEGIN | PREFIX
  747. digram_rules['s'] = dict()
  748. digram_rules['s']['a'] = ANY_COMBINATION
  749. digram_rules['s']['b'] = NOT_BEGIN | BREAK | NOT_END
  750. digram_rules['s']['c'] = NOT_END
  751. digram_rules['s']['d'] = NOT_BEGIN | BREAK | NOT_END
  752. digram_rules['s']['e'] = ANY_COMBINATION
  753. digram_rules['s']['f'] = NOT_BEGIN | BREAK | NOT_END
  754. digram_rules['s']['g'] = NOT_BEGIN | BREAK | NOT_END
  755. digram_rules['s']['h'] = NOT_BEGIN | BREAK | NOT_END
  756. digram_rules['s']['i'] = ANY_COMBINATION
  757. digram_rules['s']['j'] = NOT_BEGIN | BREAK | NOT_END
  758. digram_rules['s']['k'] = ANY_COMBINATION
  759. digram_rules['s']['l'] = BEGIN | SUFFIX | NOT_END
  760. digram_rules['s']['m'] = SUFFIX | NOT_END
  761. digram_rules['s']['n'] = PREFIX | SUFFIX | NOT_END
  762. digram_rules['s']['o'] = ANY_COMBINATION
  763. digram_rules['s']['p'] = ANY_COMBINATION
  764. digram_rules['s']['r'] = NOT_BEGIN | NOT_END
  765. digram_rules['s']['s'] = NOT_BEGIN | PREFIX
  766. digram_rules['s']['t'] = ANY_COMBINATION
  767. digram_rules['s']['u'] = ANY_COMBINATION
  768. digram_rules['s']['v'] = NOT_BEGIN | BREAK | NOT_END
  769. digram_rules['s']['w'] = BEGIN | SUFFIX | NOT_END
  770. digram_rules['s']['x'] = ILLEGAL_PAIR
  771. digram_rules['s']['y'] = ANY_COMBINATION
  772. digram_rules['s']['z'] = NOT_BEGIN | BREAK | NOT_END
  773. digram_rules['s']['ch'] = BEGIN | SUFFIX | NOT_END
  774. digram_rules['s']['gh'] = NOT_BEGIN | BREAK | NOT_END
  775. digram_rules['s']['ph'] = NOT_BEGIN | BREAK | NOT_END
  776. digram_rules['s']['rh'] = ILLEGAL_PAIR
  777. digram_rules['s']['sh'] = NOT_BEGIN | BREAK | NOT_END
  778. digram_rules['s']['th'] = NOT_BEGIN | BREAK | NOT_END
  779. digram_rules['s']['wh'] = ILLEGAL_PAIR
  780. digram_rules['s']['qu'] = SUFFIX | NOT_END
  781. digram_rules['s']['ck'] = NOT_BEGIN
  782. digram_rules['t'] = dict()
  783. digram_rules['t']['a'] = ANY_COMBINATION
  784. digram_rules['t']['b'] = NOT_BEGIN | BREAK | NOT_END
  785. digram_rules['t']['c'] = NOT_BEGIN | BREAK | NOT_END
  786. digram_rules['t']['d'] = NOT_BEGIN | BREAK | NOT_END
  787. digram_rules['t']['e'] = ANY_COMBINATION
  788. digram_rules['t']['f'] = NOT_BEGIN | BREAK | NOT_END
  789. digram_rules['t']['g'] = NOT_BEGIN | BREAK | NOT_END
  790. digram_rules['t']['h'] = NOT_BEGIN | BREAK | NOT_END
  791. digram_rules['t']['i'] = ANY_COMBINATION
  792. digram_rules['t']['j'] = NOT_BEGIN | BREAK | NOT_END
  793. digram_rules['t']['k'] = NOT_BEGIN | BREAK | NOT_END
  794. digram_rules['t']['l'] = NOT_BEGIN | BREAK | NOT_END
  795. digram_rules['t']['m'] = NOT_BEGIN | BREAK | NOT_END
  796. digram_rules['t']['n'] = NOT_BEGIN | BREAK | NOT_END
  797. digram_rules['t']['o'] = ANY_COMBINATION
  798. digram_rules['t']['p'] = NOT_BEGIN | BREAK | NOT_END
  799. digram_rules['t']['r'] = NOT_END
  800. digram_rules['t']['s'] = NOT_BEGIN | END
  801. digram_rules['t']['t'] = NOT_BEGIN | PREFIX
  802. digram_rules['t']['u'] = ANY_COMBINATION
  803. digram_rules['t']['v'] = NOT_BEGIN | BREAK | NOT_END
  804. digram_rules['t']['w'] = BEGIN | SUFFIX | NOT_END
  805. digram_rules['t']['x'] = ILLEGAL_PAIR
  806. digram_rules['t']['y'] = ANY_COMBINATION
  807. digram_rules['t']['z'] = NOT_BEGIN | BREAK | NOT_END
  808. digram_rules['t']['ch'] = NOT_BEGIN
  809. digram_rules['t']['gh'] = NOT_BEGIN | BREAK | NOT_END
  810. digram_rules['t']['ph'] = NOT_BEGIN | END
  811. digram_rules['t']['rh'] = ILLEGAL_PAIR
  812. digram_rules['t']['sh'] = NOT_BEGIN | END
  813. digram_rules['t']['th'] = NOT_BEGIN | BREAK | NOT_END
  814. digram_rules['t']['wh'] = ILLEGAL_PAIR
  815. digram_rules['t']['qu'] = NOT_BEGIN | BREAK | NOT_END
  816. digram_rules['t']['ck'] = ILLEGAL_PAIR
  817. digram_rules['u'] = dict()
  818. digram_rules['u']['a'] = NOT_BEGIN | BREAK | NOT_END
  819. digram_rules['u']['b'] = ANY_COMBINATION
  820. digram_rules['u']['c'] = ANY_COMBINATION
  821. digram_rules['u']['d'] = ANY_COMBINATION
  822. digram_rules['u']['e'] = NOT_BEGIN
  823. digram_rules['u']['f'] = ANY_COMBINATION
  824. digram_rules['u']['g'] = ANY_COMBINATION
  825. digram_rules['u']['h'] = NOT_BEGIN | BREAK | NOT_END
  826. digram_rules['u']['i'] = NOT_BEGIN | BREAK | NOT_END
  827. digram_rules['u']['j'] = ANY_COMBINATION
  828. digram_rules['u']['k'] = ANY_COMBINATION
  829. digram_rules['u']['l'] = ANY_COMBINATION
  830. digram_rules['u']['m'] = ANY_COMBINATION
  831. digram_rules['u']['n'] = ANY_COMBINATION
  832. digram_rules['u']['o'] = NOT_BEGIN | BREAK
  833. digram_rules['u']['p'] = ANY_COMBINATION
  834. digram_rules['u']['r'] = ANY_COMBINATION
  835. digram_rules['u']['s'] = ANY_COMBINATION
  836. digram_rules['u']['t'] = ANY_COMBINATION
  837. digram_rules['u']['u'] = ILLEGAL_PAIR
  838. digram_rules['u']['v'] = ANY_COMBINATION
  839. digram_rules['u']['w'] = NOT_BEGIN | BREAK | NOT_END
  840. digram_rules['u']['x'] = ANY_COMBINATION
  841. digram_rules['u']['y'] = NOT_BEGIN | BREAK | NOT_END
  842. digram_rules['u']['z'] = ANY_COMBINATION
  843. digram_rules['u']['ch'] = ANY_COMBINATION
  844. digram_rules['u']['gh'] = NOT_BEGIN | PREFIX
  845. digram_rules['u']['ph'] = ANY_COMBINATION
  846. digram_rules['u']['rh'] = ILLEGAL_PAIR
  847. digram_rules['u']['sh'] = ANY_COMBINATION
  848. digram_rules['u']['th'] = ANY_COMBINATION
  849. digram_rules['u']['wh'] = ILLEGAL_PAIR
  850. digram_rules['u']['qu'] = BREAK | NOT_END
  851. digram_rules['u']['ck'] = ANY_COMBINATION
  852. digram_rules['v'] = dict()
  853. digram_rules['v']['a'] = ANY_COMBINATION
  854. digram_rules['v']['b'] = NOT_BEGIN | BREAK | NOT_END
  855. digram_rules['v']['c'] = NOT_BEGIN | BREAK | NOT_END
  856. digram_rules['v']['d'] = NOT_BEGIN | BREAK | NOT_END
  857. digram_rules['v']['e'] = ANY_COMBINATION
  858. digram_rules['v']['f'] = NOT_BEGIN | BREAK | NOT_END
  859. digram_rules['v']['g'] = NOT_BEGIN | BREAK | NOT_END
  860. digram_rules['v']['h'] = NOT_BEGIN | BREAK | NOT_END
  861. digram_rules['v']['i'] = ANY_COMBINATION
  862. digram_rules['v']['j'] = NOT_BEGIN | BREAK | NOT_END
  863. digram_rules['v']['k'] = NOT_BEGIN | BREAK | NOT_END
  864. digram_rules['v']['l'] = NOT_BEGIN | BREAK | NOT_END
  865. digram_rules['v']['m'] = NOT_BEGIN | BREAK | NOT_END
  866. digram_rules['v']['n'] = NOT_BEGIN | BREAK | NOT_END
  867. digram_rules['v']['o'] = ANY_COMBINATION
  868. digram_rules['v']['p'] = NOT_BEGIN | BREAK | NOT_END
  869. digram_rules['v']['r'] = NOT_BEGIN | BREAK | NOT_END
  870. digram_rules['v']['s'] = NOT_BEGIN | BREAK | NOT_END
  871. digram_rules['v']['t'] = NOT_BEGIN | BREAK | NOT_END
  872. digram_rules['v']['u'] = ANY_COMBINATION
  873. digram_rules['v']['v'] = NOT_BEGIN | BREAK | NOT_END
  874. digram_rules['v']['w'] = NOT_BEGIN | BREAK | NOT_END
  875. digram_rules['v']['x'] = ILLEGAL_PAIR
  876. digram_rules['v']['y'] = NOT_BEGIN
  877. digram_rules['v']['z'] = NOT_BEGIN | BREAK | NOT_END
  878. digram_rules['v']['ch'] = NOT_BEGIN | BREAK | NOT_END
  879. digram_rules['v']['gh'] = NOT_BEGIN | BREAK | NOT_END
  880. digram_rules['v']['ph'] = NOT_BEGIN | BREAK | NOT_END
  881. digram_rules['v']['rh'] = ILLEGAL_PAIR
  882. digram_rules['v']['sh'] = NOT_BEGIN | BREAK | NOT_END
  883. digram_rules['v']['th'] = NOT_BEGIN | BREAK | NOT_END
  884. digram_rules['v']['wh'] = ILLEGAL_PAIR
  885. digram_rules['v']['qu'] = NOT_BEGIN | BREAK | NOT_END
  886. digram_rules['v']['ck'] = ILLEGAL_PAIR
  887. digram_rules['w'] = dict()
  888. digram_rules['w']['a'] = ANY_COMBINATION
  889. digram_rules['w']['b'] = NOT_BEGIN | PREFIX
  890. digram_rules['w']['c'] = NOT_BEGIN | BREAK | NOT_END
  891. digram_rules['w']['d'] = NOT_BEGIN | PREFIX | END
  892. digram_rules['w']['e'] = ANY_COMBINATION
  893. digram_rules['w']['f'] = NOT_BEGIN | PREFIX
  894. digram_rules['w']['g'] = NOT_BEGIN | PREFIX | END
  895. digram_rules['w']['h'] = NOT_BEGIN | BREAK | NOT_END
  896. digram_rules['w']['i'] = ANY_COMBINATION
  897. digram_rules['w']['j'] = NOT_BEGIN | BREAK | NOT_END
  898. digram_rules['w']['k'] = NOT_BEGIN | PREFIX
  899. digram_rules['w']['l'] = NOT_BEGIN | PREFIX | SUFFIX
  900. digram_rules['w']['m'] = NOT_BEGIN | PREFIX
  901. digram_rules['w']['n'] = NOT_BEGIN | PREFIX
  902. digram_rules['w']['o'] = ANY_COMBINATION
  903. digram_rules['w']['p'] = NOT_BEGIN | PREFIX
  904. digram_rules['w']['r'] = BEGIN | SUFFIX | NOT_END
  905. digram_rules['w']['s'] = NOT_BEGIN | PREFIX
  906. digram_rules['w']['t'] = NOT_BEGIN | PREFIX
  907. digram_rules['w']['u'] = ANY_COMBINATION
  908. digram_rules['w']['v'] = NOT_BEGIN | PREFIX
  909. digram_rules['w']['w'] = NOT_BEGIN | BREAK | NOT_END
  910. digram_rules['w']['x'] = NOT_BEGIN | PREFIX
  911. digram_rules['w']['y'] = ANY_COMBINATION
  912. digram_rules['w']['z'] = NOT_BEGIN | PREFIX
  913. digram_rules['w']['ch'] = NOT_BEGIN
  914. digram_rules['w']['gh'] = NOT_BEGIN | BREAK | NOT_END
  915. digram_rules['w']['ph'] = NOT_BEGIN
  916. digram_rules['w']['rh'] = ILLEGAL_PAIR
  917. digram_rules['w']['sh'] = NOT_BEGIN
  918. digram_rules['w']['th'] = NOT_BEGIN
  919. digram_rules['w']['wh'] = ILLEGAL_PAIR
  920. digram_rules['w']['qu'] = NOT_BEGIN | BREAK | NOT_END
  921. digram_rules['w']['ck'] = NOT_BEGIN
  922. digram_rules['x'] = dict()
  923. digram_rules['x']['a'] = NOT_BEGIN
  924. digram_rules['x']['b'] = NOT_BEGIN | BREAK | NOT_END
  925. digram_rules['x']['c'] = NOT_BEGIN | BREAK | NOT_END
  926. digram_rules['x']['d'] = NOT_BEGIN | BREAK | NOT_END
  927. digram_rules['x']['e'] = NOT_BEGIN
  928. digram_rules['x']['f'] = NOT_BEGIN | BREAK | NOT_END
  929. digram_rules['x']['g'] = NOT_BEGIN | BREAK | NOT_END
  930. digram_rules['x']['h'] = NOT_BEGIN | BREAK | NOT_END
  931. digram_rules['x']['i'] = NOT_BEGIN
  932. digram_rules['x']['j'] = NOT_BEGIN | BREAK | NOT_END
  933. digram_rules['x']['k'] = NOT_BEGIN | BREAK | NOT_END
  934. digram_rules['x']['l'] = NOT_BEGIN | BREAK | NOT_END
  935. digram_rules['x']['m'] = NOT_BEGIN | BREAK | NOT_END
  936. digram_rules['x']['n'] = NOT_BEGIN | BREAK | NOT_END
  937. digram_rules['x']['o'] = NOT_BEGIN
  938. digram_rules['x']['p'] = NOT_BEGIN | BREAK | NOT_END
  939. digram_rules['x']['r'] = NOT_BEGIN | BREAK | NOT_END
  940. digram_rules['x']['s'] = NOT_BEGIN | BREAK | NOT_END
  941. digram_rules['x']['t'] = NOT_BEGIN | BREAK | NOT_END
  942. digram_rules['x']['u'] = NOT_BEGIN
  943. digram_rules['x']['v'] = NOT_BEGIN | BREAK | NOT_END
  944. digram_rules['x']['w'] = NOT_BEGIN | BREAK | NOT_END
  945. digram_rules['x']['x'] = ILLEGAL_PAIR
  946. digram_rules['x']['y'] = NOT_BEGIN
  947. digram_rules['x']['z'] = NOT_BEGIN | BREAK | NOT_END
  948. digram_rules['x']['ch'] = NOT_BEGIN | BREAK | NOT_END
  949. digram_rules['x']['gh'] = NOT_BEGIN | BREAK | NOT_END
  950. digram_rules['x']['ph'] = NOT_BEGIN | BREAK | NOT_END
  951. digram_rules['x']['rh'] = ILLEGAL_PAIR
  952. digram_rules['x']['sh'] = NOT_BEGIN | BREAK | NOT_END
  953. digram_rules['x']['th'] = NOT_BEGIN | BREAK | NOT_END
  954. digram_rules['x']['wh'] = ILLEGAL_PAIR
  955. digram_rules['x']['qu'] = NOT_BEGIN | BREAK | NOT_END
  956. digram_rules['x']['ck'] = ILLEGAL_PAIR
  957. digram_rules['y'] = dict()
  958. digram_rules['y']['a'] = ANY_COMBINATION
  959. digram_rules['y']['b'] = NOT_BEGIN
  960. digram_rules['y']['c'] = NOT_BEGIN | NOT_END
  961. digram_rules['y']['d'] = NOT_BEGIN
  962. digram_rules['y']['e'] = ANY_COMBINATION
  963. digram_rules['y']['f'] = NOT_BEGIN | NOT_END
  964. digram_rules['y']['g'] = NOT_BEGIN
  965. digram_rules['y']['h'] = NOT_BEGIN | BREAK | NOT_END
  966. digram_rules['y']['i'] = BEGIN | NOT_END
  967. digram_rules['y']['j'] = NOT_BEGIN | NOT_END
  968. digram_rules['y']['k'] = NOT_BEGIN
  969. digram_rules['y']['l'] = NOT_BEGIN | NOT_END
  970. digram_rules['y']['m'] = NOT_BEGIN
  971. digram_rules['y']['n'] = NOT_BEGIN
  972. digram_rules['y']['o'] = ANY_COMBINATION
  973. digram_rules['y']['p'] = NOT_BEGIN
  974. digram_rules['y']['r'] = NOT_BEGIN | BREAK | NOT_END
  975. digram_rules['y']['s'] = NOT_BEGIN
  976. digram_rules['y']['t'] = NOT_BEGIN
  977. digram_rules['y']['u'] = ANY_COMBINATION
  978. digram_rules['y']['v'] = NOT_BEGIN | NOT_END
  979. digram_rules['y']['w'] = NOT_BEGIN | BREAK | NOT_END
  980. digram_rules['y']['x'] = NOT_BEGIN
  981. digram_rules['y']['y'] = ILLEGAL_PAIR
  982. digram_rules['y']['z'] = NOT_BEGIN
  983. digram_rules['y']['ch'] = NOT_BEGIN | BREAK | NOT_END
  984. digram_rules['y']['gh'] = NOT_BEGIN | BREAK | NOT_END
  985. digram_rules['y']['ph'] = NOT_BEGIN | BREAK | NOT_END
  986. digram_rules['y']['rh'] = ILLEGAL_PAIR
  987. digram_rules['y']['sh'] = NOT_BEGIN | BREAK | NOT_END
  988. digram_rules['y']['th'] = NOT_BEGIN | BREAK | NOT_END
  989. digram_rules['y']['wh'] = ILLEGAL_PAIR
  990. digram_rules['y']['qu'] = NOT_BEGIN | BREAK | NOT_END
  991. digram_rules['y']['ck'] = ILLEGAL_PAIR
  992. digram_rules['z'] = dict()
  993. digram_rules['z']['a'] = ANY_COMBINATION
  994. digram_rules['z']['b'] = NOT_BEGIN | BREAK | NOT_END
  995. digram_rules['z']['c'] = NOT_BEGIN | BREAK | NOT_END
  996. digram_rules['z']['d'] = NOT_BEGIN | BREAK | NOT_END
  997. digram_rules['z']['e'] = ANY_COMBINATION
  998. digram_rules['z']['f'] = NOT_BEGIN | BREAK | NOT_END
  999. digram_rules['z']['g'] = NOT_BEGIN | BREAK | NOT_END
  1000. digram_rules['z']['h'] = NOT_BEGIN | BREAK | NOT_END
  1001. digram_rules['z']['i'] = ANY_COMBINATION
  1002. digram_rules['z']['j'] = NOT_BEGIN | BREAK | NOT_END
  1003. digram_rules['z']['k'] = NOT_BEGIN | BREAK | NOT_END
  1004. digram_rules['z']['l'] = NOT_BEGIN | BREAK | NOT_END
  1005. digram_rules['z']['m'] = NOT_BEGIN | BREAK | NOT_END
  1006. digram_rules['z']['n'] = NOT_BEGIN | BREAK | NOT_END
  1007. digram_rules['z']['o'] = ANY_COMBINATION
  1008. digram_rules['z']['p'] = NOT_BEGIN | BREAK | NOT_END
  1009. digram_rules['z']['r'] = NOT_BEGIN | NOT_END
  1010. digram_rules['z']['s'] = NOT_BEGIN | BREAK | NOT_END
  1011. digram_rules['z']['t'] = NOT_BEGIN
  1012. digram_rules['z']['u'] = ANY_COMBINATION
  1013. digram_rules['z']['v'] = NOT_BEGIN | BREAK | NOT_END
  1014. digram_rules['z']['w'] = SUFFIX | NOT_END
  1015. digram_rules['z']['x'] = ILLEGAL_PAIR
  1016. digram_rules['z']['y'] = ANY_COMBINATION
  1017. digram_rules['z']['z'] = NOT_BEGIN
  1018. digram_rules['z']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1019. digram_rules['z']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1020. digram_rules['z']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1021. digram_rules['z']['rh'] = ILLEGAL_PAIR
  1022. digram_rules['z']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1023. digram_rules['z']['th'] = NOT_BEGIN | BREAK | NOT_END
  1024. digram_rules['z']['wh'] = ILLEGAL_PAIR
  1025. digram_rules['z']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1026. digram_rules['z']['ck'] = ILLEGAL_PAIR
  1027. digram_rules['ch'] = dict()
  1028. digram_rules['ch']['a'] = ANY_COMBINATION
  1029. digram_rules['ch']['b'] = NOT_BEGIN | BREAK | NOT_END
  1030. digram_rules['ch']['c'] = NOT_BEGIN | BREAK | NOT_END
  1031. digram_rules['ch']['d'] = NOT_BEGIN | BREAK | NOT_END
  1032. digram_rules['ch']['e'] = ANY_COMBINATION
  1033. digram_rules['ch']['f'] = NOT_BEGIN | BREAK | NOT_END
  1034. digram_rules['ch']['g'] = NOT_BEGIN | BREAK | NOT_END
  1035. digram_rules['ch']['h'] = NOT_BEGIN | BREAK | NOT_END
  1036. digram_rules['ch']['i'] = ANY_COMBINATION
  1037. digram_rules['ch']['j'] = NOT_BEGIN | BREAK | NOT_END
  1038. digram_rules['ch']['k'] = NOT_BEGIN | BREAK | NOT_END
  1039. digram_rules['ch']['l'] = NOT_BEGIN | BREAK | NOT_END
  1040. digram_rules['ch']['m'] = NOT_BEGIN | BREAK | NOT_END
  1041. digram_rules['ch']['n'] = NOT_BEGIN | BREAK | NOT_END
  1042. digram_rules['ch']['o'] = ANY_COMBINATION
  1043. digram_rules['ch']['p'] = NOT_BEGIN | BREAK | NOT_END
  1044. digram_rules['ch']['r'] = NOT_END
  1045. digram_rules['ch']['s'] = NOT_BEGIN | BREAK | NOT_END
  1046. digram_rules['ch']['t'] = NOT_BEGIN | BREAK | NOT_END
  1047. digram_rules['ch']['u'] = ANY_COMBINATION
  1048. digram_rules['ch']['v'] = NOT_BEGIN | BREAK | NOT_END
  1049. digram_rules['ch']['w'] = NOT_BEGIN | NOT_END
  1050. digram_rules['ch']['x'] = ILLEGAL_PAIR
  1051. digram_rules['ch']['y'] = ANY_COMBINATION
  1052. digram_rules['ch']['z'] = NOT_BEGIN | BREAK | NOT_END
  1053. digram_rules['ch']['ch'] = ILLEGAL_PAIR
  1054. digram_rules['ch']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1055. digram_rules['ch']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1056. digram_rules['ch']['rh'] = ILLEGAL_PAIR
  1057. digram_rules['ch']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1058. digram_rules['ch']['th'] = NOT_BEGIN | BREAK | NOT_END
  1059. digram_rules['ch']['wh'] = ILLEGAL_PAIR
  1060. digram_rules['ch']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1061. digram_rules['ch']['ck'] = ILLEGAL_PAIR
  1062. digram_rules['gh'] = dict()
  1063. digram_rules['gh']['a'] = ANY_COMBINATION
  1064. digram_rules['gh']['b'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1065. digram_rules['gh']['c'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1066. digram_rules['gh']['d'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1067. digram_rules['gh']['e'] = ANY_COMBINATION
  1068. digram_rules['gh']['f'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1069. digram_rules['gh']['g'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1070. digram_rules['gh']['h'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1071. digram_rules['gh']['i'] = BEGIN | NOT_END
  1072. digram_rules['gh']['j'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1073. digram_rules['gh']['k'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1074. digram_rules['gh']['l'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1075. digram_rules['gh']['m'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1076. digram_rules['gh']['n'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1077. digram_rules['gh']['o'] = BEGIN | NOT_END
  1078. digram_rules['gh']['p'] = NOT_BEGIN | BREAK | NOT_END
  1079. digram_rules['gh']['r'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1080. digram_rules['gh']['s'] = NOT_BEGIN | PREFIX
  1081. digram_rules['gh']['t'] = NOT_BEGIN | PREFIX
  1082. digram_rules['gh']['u'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1083. digram_rules['gh']['v'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1084. digram_rules['gh']['w'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1085. digram_rules['gh']['x'] = ILLEGAL_PAIR
  1086. digram_rules['gh']['y'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1087. digram_rules['gh']['z'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1088. digram_rules['gh']['ch'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1089. digram_rules['gh']['gh'] = ILLEGAL_PAIR
  1090. digram_rules['gh']['ph'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1091. digram_rules['gh']['rh'] = ILLEGAL_PAIR
  1092. digram_rules['gh']['sh'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1093. digram_rules['gh']['th'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1094. digram_rules['gh']['wh'] = ILLEGAL_PAIR
  1095. digram_rules['gh']['qu'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1096. digram_rules['gh']['ck'] = ILLEGAL_PAIR
  1097. digram_rules['ph'] = dict()
  1098. digram_rules['ph']['a'] = ANY_COMBINATION
  1099. digram_rules['ph']['b'] = NOT_BEGIN | BREAK | NOT_END
  1100. digram_rules['ph']['c'] = NOT_BEGIN | BREAK | NOT_END
  1101. digram_rules['ph']['d'] = NOT_BEGIN | BREAK | NOT_END
  1102. digram_rules['ph']['e'] = ANY_COMBINATION
  1103. digram_rules['ph']['f'] = NOT_BEGIN | BREAK | NOT_END
  1104. digram_rules['ph']['g'] = NOT_BEGIN | BREAK | NOT_END
  1105. digram_rules['ph']['h'] = NOT_BEGIN | BREAK | NOT_END
  1106. digram_rules['ph']['i'] = ANY_COMBINATION
  1107. digram_rules['ph']['j'] = NOT_BEGIN | BREAK | NOT_END
  1108. digram_rules['ph']['k'] = NOT_BEGIN | BREAK | NOT_END
  1109. digram_rules['ph']['l'] = BEGIN | SUFFIX | NOT_END
  1110. digram_rules['ph']['m'] = NOT_BEGIN | BREAK | NOT_END
  1111. digram_rules['ph']['n'] = NOT_BEGIN | BREAK | NOT_END
  1112. digram_rules['ph']['o'] = ANY_COMBINATION
  1113. digram_rules['ph']['p'] = NOT_BEGIN | BREAK | NOT_END
  1114. digram_rules['ph']['r'] = NOT_END
  1115. digram_rules['ph']['s'] = NOT_BEGIN
  1116. digram_rules['ph']['t'] = NOT_BEGIN
  1117. digram_rules['ph']['u'] = ANY_COMBINATION
  1118. digram_rules['ph']['v'] = NOT_BEGIN | NOT_END
  1119. digram_rules['ph']['w'] = NOT_BEGIN | NOT_END
  1120. digram_rules['ph']['x'] = ILLEGAL_PAIR
  1121. digram_rules['ph']['y'] = NOT_BEGIN
  1122. digram_rules['ph']['z'] = NOT_BEGIN | BREAK | NOT_END
  1123. digram_rules['ph']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1124. digram_rules['ph']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1125. digram_rules['ph']['ph'] = ILLEGAL_PAIR
  1126. digram_rules['ph']['rh'] = ILLEGAL_PAIR
  1127. digram_rules['ph']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1128. digram_rules['ph']['th'] = NOT_BEGIN | BREAK | NOT_END
  1129. digram_rules['ph']['wh'] = ILLEGAL_PAIR
  1130. digram_rules['ph']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1131. digram_rules['ph']['ck'] = ILLEGAL_PAIR
  1132. digram_rules['rh'] = dict()
  1133. digram_rules['rh']['a'] = BEGIN | NOT_END
  1134. digram_rules['rh']['b'] = ILLEGAL_PAIR
  1135. digram_rules['rh']['c'] = ILLEGAL_PAIR
  1136. digram_rules['rh']['d'] = ILLEGAL_PAIR
  1137. digram_rules['rh']['e'] = BEGIN | NOT_END
  1138. digram_rules['rh']['f'] = ILLEGAL_PAIR
  1139. digram_rules['rh']['g'] = ILLEGAL_PAIR
  1140. digram_rules['rh']['h'] = ILLEGAL_PAIR
  1141. digram_rules['rh']['i'] = BEGIN | NOT_END
  1142. digram_rules['rh']['j'] = ILLEGAL_PAIR
  1143. digram_rules['rh']['k'] = ILLEGAL_PAIR
  1144. digram_rules['rh']['l'] = ILLEGAL_PAIR
  1145. digram_rules['rh']['m'] = ILLEGAL_PAIR
  1146. digram_rules['rh']['n'] = ILLEGAL_PAIR
  1147. digram_rules['rh']['o'] = BEGIN | NOT_END
  1148. digram_rules['rh']['p'] = ILLEGAL_PAIR
  1149. digram_rules['rh']['r'] = ILLEGAL_PAIR
  1150. digram_rules['rh']['s'] = ILLEGAL_PAIR
  1151. digram_rules['rh']['t'] = ILLEGAL_PAIR
  1152. digram_rules['rh']['u'] = BEGIN | NOT_END
  1153. digram_rules['rh']['v'] = ILLEGAL_PAIR
  1154. digram_rules['rh']['w'] = ILLEGAL_PAIR
  1155. digram_rules['rh']['x'] = ILLEGAL_PAIR
  1156. digram_rules['rh']['y'] = BEGIN | NOT_END
  1157. digram_rules['rh']['z'] = ILLEGAL_PAIR
  1158. digram_rules['rh']['ch'] = ILLEGAL_PAIR
  1159. digram_rules['rh']['gh'] = ILLEGAL_PAIR
  1160. digram_rules['rh']['ph'] = ILLEGAL_PAIR
  1161. digram_rules['rh']['rh'] = ILLEGAL_PAIR
  1162. digram_rules['rh']['sh'] = ILLEGAL_PAIR
  1163. digram_rules['rh']['th'] = ILLEGAL_PAIR
  1164. digram_rules['rh']['wh'] = ILLEGAL_PAIR
  1165. digram_rules['rh']['qu'] = ILLEGAL_PAIR
  1166. digram_rules['rh']['ck'] = ILLEGAL_PAIR
  1167. digram_rules['sh'] = dict()
  1168. digram_rules['sh']['a'] = ANY_COMBINATION
  1169. digram_rules['sh']['b'] = NOT_BEGIN | BREAK | NOT_END
  1170. digram_rules['sh']['c'] = NOT_BEGIN | BREAK | NOT_END
  1171. digram_rules['sh']['d'] = NOT_BEGIN | BREAK | NOT_END
  1172. digram_rules['sh']['e'] = ANY_COMBINATION
  1173. digram_rules['sh']['f'] = NOT_BEGIN | BREAK | NOT_END
  1174. digram_rules['sh']['g'] = NOT_BEGIN | BREAK | NOT_END
  1175. digram_rules['sh']['h'] = ILLEGAL_PAIR
  1176. digram_rules['sh']['i'] = ANY_COMBINATION
  1177. digram_rules['sh']['j'] = NOT_BEGIN | BREAK | NOT_END
  1178. digram_rules['sh']['k'] = NOT_BEGIN
  1179. digram_rules['sh']['l'] = BEGIN | SUFFIX | NOT_END
  1180. digram_rules['sh']['m'] = BEGIN | SUFFIX | NOT_END
  1181. digram_rules['sh']['n'] = BEGIN | SUFFIX | NOT_END
  1182. digram_rules['sh']['o'] = ANY_COMBINATION
  1183. digram_rules['sh']['p'] = NOT_BEGIN
  1184. digram_rules['sh']['r'] = BEGIN | SUFFIX | NOT_END
  1185. digram_rules['sh']['s'] = NOT_BEGIN | BREAK | NOT_END
  1186. digram_rules['sh']['t'] = SUFFIX
  1187. digram_rules['sh']['u'] = ANY_COMBINATION
  1188. digram_rules['sh']['v'] = NOT_BEGIN | BREAK | NOT_END
  1189. digram_rules['sh']['w'] = SUFFIX | NOT_END
  1190. digram_rules['sh']['x'] = ILLEGAL_PAIR
  1191. digram_rules['sh']['y'] = ANY_COMBINATION
  1192. digram_rules['sh']['z'] = NOT_BEGIN | BREAK | NOT_END
  1193. digram_rules['sh']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1194. digram_rules['sh']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1195. digram_rules['sh']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1196. digram_rules['sh']['rh'] = ILLEGAL_PAIR
  1197. digram_rules['sh']['sh'] = ILLEGAL_PAIR
  1198. digram_rules['sh']['th'] = NOT_BEGIN | BREAK | NOT_END
  1199. digram_rules['sh']['wh'] = ILLEGAL_PAIR
  1200. digram_rules['sh']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1201. digram_rules['sh']['ck'] = ILLEGAL_PAIR
  1202. digram_rules['th'] = dict()
  1203. digram_rules['th']['a'] = ANY_COMBINATION
  1204. digram_rules['th']['b'] = NOT_BEGIN | BREAK | NOT_END
  1205. digram_rules['th']['c'] = NOT_BEGIN | BREAK | NOT_END
  1206. digram_rules['th']['d'] = NOT_BEGIN | BREAK | NOT_END
  1207. digram_rules['th']['e'] = ANY_COMBINATION
  1208. digram_rules['th']['f'] = NOT_BEGIN | BREAK | NOT_END
  1209. digram_rules['th']['g'] = NOT_BEGIN | BREAK | NOT_END
  1210. digram_rules['th']['h'] = NOT_BEGIN | BREAK | NOT_END
  1211. digram_rules['th']['i'] = ANY_COMBINATION
  1212. digram_rules['th']['j'] = NOT_BEGIN | BREAK | NOT_END
  1213. digram_rules['th']['k'] = NOT_BEGIN | BREAK | NOT_END
  1214. digram_rules['th']['l'] = NOT_BEGIN | BREAK | NOT_END
  1215. digram_rules['th']['m'] = NOT_BEGIN | BREAK | NOT_END
  1216. digram_rules['th']['n'] = NOT_BEGIN | BREAK | NOT_END
  1217. digram_rules['th']['o'] = ANY_COMBINATION
  1218. digram_rules['th']['p'] = NOT_BEGIN | BREAK | NOT_END
  1219. digram_rules['th']['r'] = NOT_END
  1220. digram_rules['th']['s'] = NOT_BEGIN | END
  1221. digram_rules['th']['t'] = NOT_BEGIN | BREAK | NOT_END
  1222. digram_rules['th']['u'] = ANY_COMBINATION
  1223. digram_rules['th']['v'] = NOT_BEGIN | BREAK | NOT_END
  1224. digram_rules['th']['w'] = SUFFIX | NOT_END
  1225. digram_rules['th']['x'] = ILLEGAL_PAIR
  1226. digram_rules['th']['y'] = ANY_COMBINATION
  1227. digram_rules['th']['z'] = NOT_BEGIN | BREAK | NOT_END
  1228. digram_rules['th']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1229. digram_rules['th']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1230. digram_rules['th']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1231. digram_rules['th']['rh'] = ILLEGAL_PAIR
  1232. digram_rules['th']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1233. digram_rules['th']['th'] = ILLEGAL_PAIR
  1234. digram_rules['th']['wh'] = ILLEGAL_PAIR
  1235. digram_rules['th']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1236. digram_rules['th']['ck'] = ILLEGAL_PAIR
  1237. digram_rules['wh'] = dict()
  1238. digram_rules['wh']['a'] = BEGIN | NOT_END
  1239. digram_rules['wh']['b'] = ILLEGAL_PAIR
  1240. digram_rules['wh']['c'] = ILLEGAL_PAIR
  1241. digram_rules['wh']['d'] = ILLEGAL_PAIR
  1242. digram_rules['wh']['e'] = BEGIN | NOT_END
  1243. digram_rules['wh']['f'] = ILLEGAL_PAIR
  1244. digram_rules['wh']['g'] = ILLEGAL_PAIR
  1245. digram_rules['wh']['h'] = ILLEGAL_PAIR
  1246. digram_rules['wh']['i'] = BEGIN | NOT_END
  1247. digram_rules['wh']['j'] = ILLEGAL_PAIR
  1248. digram_rules['wh']['k'] = ILLEGAL_PAIR
  1249. digram_rules['wh']['l'] = ILLEGAL_PAIR
  1250. digram_rules['wh']['m'] = ILLEGAL_PAIR
  1251. digram_rules['wh']['n'] = ILLEGAL_PAIR
  1252. digram_rules['wh']['o'] = BEGIN | NOT_END
  1253. digram_rules['wh']['p'] = ILLEGAL_PAIR
  1254. digram_rules['wh']['r'] = ILLEGAL_PAIR
  1255. digram_rules['wh']['s'] = ILLEGAL_PAIR
  1256. digram_rules['wh']['t'] = ILLEGAL_PAIR
  1257. digram_rules['wh']['u'] = ILLEGAL_PAIR
  1258. digram_rules['wh']['v'] = ILLEGAL_PAIR
  1259. digram_rules['wh']['w'] = ILLEGAL_PAIR
  1260. digram_rules['wh']['x'] = ILLEGAL_PAIR
  1261. digram_rules['wh']['y'] = BEGIN | NOT_END
  1262. digram_rules['wh']['z'] = ILLEGAL_PAIR
  1263. digram_rules['wh']['ch'] = ILLEGAL_PAIR
  1264. digram_rules['wh']['gh'] = ILLEGAL_PAIR
  1265. digram_rules['wh']['ph'] = ILLEGAL_PAIR
  1266. digram_rules['wh']['rh'] = ILLEGAL_PAIR
  1267. digram_rules['wh']['sh'] = ILLEGAL_PAIR
  1268. digram_rules['wh']['th'] = ILLEGAL_PAIR
  1269. digram_rules['wh']['wh'] = ILLEGAL_PAIR
  1270. digram_rules['wh']['qu'] = ILLEGAL_PAIR
  1271. digram_rules['wh']['ck'] = ILLEGAL_PAIR
  1272. digram_rules['qu'] = dict()
  1273. digram_rules['qu']['a'] = ANY_COMBINATION
  1274. digram_rules['qu']['b'] = ILLEGAL_PAIR
  1275. digram_rules['qu']['c'] = ILLEGAL_PAIR
  1276. digram_rules['qu']['d'] = ILLEGAL_PAIR
  1277. digram_rules['qu']['e'] = ANY_COMBINATION
  1278. digram_rules['qu']['f'] = ILLEGAL_PAIR
  1279. digram_rules['qu']['g'] = ILLEGAL_PAIR
  1280. digram_rules['qu']['h'] = ILLEGAL_PAIR
  1281. digram_rules['qu']['i'] = ANY_COMBINATION
  1282. digram_rules['qu']['j'] = ILLEGAL_PAIR
  1283. digram_rules['qu']['k'] = ILLEGAL_PAIR
  1284. digram_rules['qu']['l'] = ILLEGAL_PAIR
  1285. digram_rules['qu']['m'] = ILLEGAL_PAIR
  1286. digram_rules['qu']['n'] = ILLEGAL_PAIR
  1287. digram_rules['qu']['o'] = ANY_COMBINATION
  1288. digram_rules['qu']['p'] = ILLEGAL_PAIR
  1289. digram_rules['qu']['r'] = ILLEGAL_PAIR
  1290. digram_rules['qu']['s'] = ILLEGAL_PAIR
  1291. digram_rules['qu']['t'] = ILLEGAL_PAIR
  1292. digram_rules['qu']['u'] = ILLEGAL_PAIR
  1293. digram_rules['qu']['v'] = ILLEGAL_PAIR
  1294. digram_rules['qu']['w'] = ILLEGAL_PAIR
  1295. digram_rules['qu']['x'] = ILLEGAL_PAIR
  1296. digram_rules['qu']['y'] = ILLEGAL_PAIR
  1297. digram_rules['qu']['z'] = ILLEGAL_PAIR
  1298. digram_rules['qu']['ch'] = ILLEGAL_PAIR
  1299. digram_rules['qu']['gh'] = ILLEGAL_PAIR
  1300. digram_rules['qu']['ph'] = ILLEGAL_PAIR
  1301. digram_rules['qu']['rh'] = ILLEGAL_PAIR
  1302. digram_rules['qu']['sh'] = ILLEGAL_PAIR
  1303. digram_rules['qu']['th'] = ILLEGAL_PAIR
  1304. digram_rules['qu']['wh'] = ILLEGAL_PAIR
  1305. digram_rules['qu']['qu'] = ILLEGAL_PAIR
  1306. digram_rules['qu']['ck'] = ILLEGAL_PAIR
  1307. digram_rules['ck'] = dict()
  1308. digram_rules['ck']['a'] = NOT_BEGIN | BREAK | NOT_END
  1309. digram_rules['ck']['b'] = NOT_BEGIN | BREAK | NOT_END
  1310. digram_rules['ck']['c'] = NOT_BEGIN | BREAK | NOT_END
  1311. digram_rules['ck']['d'] = NOT_BEGIN | BREAK | NOT_END
  1312. digram_rules['ck']['e'] = NOT_BEGIN | BREAK | NOT_END
  1313. digram_rules['ck']['f'] = NOT_BEGIN | BREAK | NOT_END
  1314. digram_rules['ck']['g'] = NOT_BEGIN | BREAK | NOT_END
  1315. digram_rules['ck']['h'] = NOT_BEGIN | BREAK | NOT_END
  1316. digram_rules['ck']['i'] = NOT_BEGIN | BREAK | NOT_END
  1317. digram_rules['ck']['j'] = NOT_BEGIN | BREAK | NOT_END
  1318. digram_rules['ck']['k'] = NOT_BEGIN | BREAK | NOT_END
  1319. digram_rules['ck']['l'] = NOT_BEGIN | BREAK | NOT_END
  1320. digram_rules['ck']['m'] = NOT_BEGIN | BREAK | NOT_END
  1321. digram_rules['ck']['n'] = NOT_BEGIN | BREAK | NOT_END
  1322. digram_rules['ck']['o'] = NOT_BEGIN | BREAK | NOT_END
  1323. digram_rules['ck']['p'] = NOT_BEGIN | BREAK | NOT_END
  1324. digram_rules['ck']['r'] = NOT_BEGIN | BREAK | NOT_END
  1325. digram_rules['ck']['s'] = NOT_BEGIN
  1326. digram_rules['ck']['t'] = NOT_BEGIN | BREAK | NOT_END
  1327. digram_rules['ck']['u'] = NOT_BEGIN | BREAK | NOT_END
  1328. digram_rules['ck']['v'] = NOT_BEGIN | BREAK | NOT_END
  1329. digram_rules['ck']['w'] = NOT_BEGIN | BREAK | NOT_END
  1330. digram_rules['ck']['x'] = ILLEGAL_PAIR
  1331. digram_rules['ck']['y'] = NOT_BEGIN
  1332. digram_rules['ck']['z'] = NOT_BEGIN | BREAK | NOT_END
  1333. digram_rules['ck']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1334. digram_rules['ck']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1335. digram_rules['ck']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1336. digram_rules['ck']['rh'] = ILLEGAL_PAIR
  1337. digram_rules['ck']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1338. digram_rules['ck']['th'] = NOT_BEGIN | BREAK | NOT_END
  1339. digram_rules['ck']['wh'] = ILLEGAL_PAIR
  1340. digram_rules['ck']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1341. digram_rules['ck']['ck'] = ILLEGAL_PAIR
  1342. ###############################################################################
  1343. # END DIGRAM RULES
  1344. ###############################################################################
  1345. def marked(flag, first_unit, second_unit):
  1346. return digram_rules[first_unit][second_unit] & flag
  1347. # Generates a random word, as well as its hyphenated form. The
  1348. # length of the returned word will be between minlen and maxlen.
  1349. def generate_password_shazel(minlen = MIN_LENGTH_PASSWORD,
  1350. maxlen = MAX_LENGTH_PASSWORD):
  1351. if (minlen > maxlen):
  1352. raise PasswordGenerationException("minlen minlen is greater than maxlen maxlen.")
  1353. #
  1354. # Check for zero length words. This is technically not an error,
  1355. # so we take the short cut and return empty words.
  1356. #
  1357. if (maxlen == 0):
  1358. raise PasswordGenerationException("maxlen must be greater than 0.")
  1359. word = ''
  1360. for i in range(MAX_UNACCEPTABLE):
  1361. results = _random_word(random.randint(minlen, maxlen))
  1362. word = results[0]
  1363. hyphenated_word = results[1]
  1364. if (word != ''):
  1365. break
  1366. if (word == "" and (minlen > 0)):
  1367. raise PasswordGenerationException("failed to generate an acceptable random password.")
  1368. return (word, hyphenated_word)
  1369. # Selects a random element from an array.
  1370. def random_element(ar):
  1371. try:
  1372. keys = ar.keys()
  1373. except:
  1374. keys = range(len(ar))
  1375. return ar[ keys[random.randint(0, len(keys) - 1)] ]
  1376. # This is the routine that returns a random word. It collects random
  1377. # syllables until a predetermined word length is found. If a retry
  1378. # threshold is reached, another word is tried.
  1379. def _random_word(pwlen):
  1380. word = ''
  1381. word_syllables = []
  1382. max_retries = (4 * pwlen) + len(grams)
  1383. tries = 0 # count of retries.
  1384. # word_units used to be an array of indices into the 'rules' C-array.
  1385. # now it's an array of actual units (grams).
  1386. word_units = []
  1387. saved_pair = []
  1388. #
  1389. # Find syllables until the entire word is constructed.
  1390. #
  1391. while(len(word) < pwlen):
  1392. #
  1393. # Get the syllable and find its length.
  1394. #
  1395. new_syllable, syllable_units, saved_pair = get_syllable(pwlen - len(word), saved_pair)
  1396. #
  1397. # Append the syllable units to the word units.
  1398. #
  1399. word_units = word_units + syllable_units
  1400. #
  1401. # If the word has been improperly formed, throw out
  1402. # the syllable. The checks performed here are those
  1403. # that must be formed on a word basis. The other
  1404. # tests are performed entirely within the syllable.
  1405. # Otherwise, append the syllable to the word.
  1406. #
  1407. if not (
  1408. _improper_word(word_units)
  1409. or
  1410. (
  1411. word == ''
  1412. and
  1413. _have_initial_y(syllable_units)
  1414. )
  1415. or
  1416. (
  1417. len(word + new_syllable) == pwlen
  1418. and
  1419. _have_final_split(syllable_units)
  1420. )
  1421. ):
  1422. word = word + new_syllable
  1423. word_syllables.append(new_syllable)
  1424. #
  1425. # Keep track of the times we have tried to get syllables.
  1426. # If we have exceeded the threshold, start from scratch.
  1427. #
  1428. tries = tries + 1
  1429. if (tries > max_retries):
  1430. tries = 0
  1431. word = ''
  1432. word_syllables = []
  1433. word_units = []
  1434. return (word, '-'.join(word_syllables))
  1435. # Selects a gram (aka "unit"). This is the standard random unit
  1436. # generating routine for get_syllable().
  1437. #
  1438. # This routine attempts to return grams (units) with a distribution
  1439. # approaching that of the distribution of the units in English.
  1440. #
  1441. # The distribution of the units may be altered in this procedure
  1442. # without affecting the digram table or any other programs using the
  1443. # random_word function, as long as the set of grams (units) is kept
  1444. # consistent throughout this library.
  1445. def _random_unit(type):
  1446. if (type & VOWEL):
  1447. # Sometimes, we are asked to explicitly get a vowel (i.e., if
  1448. # a digram pair expects one following it). This is a
  1449. # shortcut to do that and avoid looping with rejected
  1450. # consonants.
  1451. return random_element(vowel_numbers)
  1452. else:
  1453. # Get any letter according to the English distribution.
  1454. return random_element(numbers)
  1455. # Check that the word does not contain illegal combinations
  1456. # that may span syllables. Specifically, these are:
  1457. #
  1458. # 1. An illegal pair of units between syllables.
  1459. # 2. Three consecutive vowel units.
  1460. # 3. Three consecutive consonant units.
  1461. #
  1462. # The checks are made against units (1 or 2 letters), not against
  1463. # the individual letters, so three consecutive units can have
  1464. # the length of 6 at most.
  1465. def _improper_word(units):
  1466. failure = 0
  1467. for unit_count in range(len(units)):
  1468. #
  1469. # Check for ILLEGAL_PAIR.
  1470. # This should have been caught for units within a syllable,
  1471. # but in some cases it would have gone unnoticed for units between syllables
  1472. # (e.g., when saved units in get_syllable() were not used).
  1473. #
  1474. if (unit_count > 0
  1475. and digram_rules[units[unit_count-1]][units[unit_count]]
  1476. & ILLEGAL_PAIR):
  1477. return 1 # Failure!
  1478. if (unit_count >= 2):
  1479. #
  1480. # Check for consecutive vowels or consonants. Because the
  1481. # initial y of a syllable is treated as a consonant rather
  1482. # than as a vowel, we exclude y from the first vowel in the
  1483. # vowel test. The only problem comes when y ends a syllable
  1484. # and two other vowels start the next, like fly-oint. Since
  1485. # such words are still pronounceable, we accept this.
  1486. #
  1487. #
  1488. # Vowel check.
  1489. #
  1490. if ((
  1491. (gram_rules[units[unit_count - 2]] & VOWEL)
  1492. and
  1493. not (gram_rules[units[unit_count - 2]] & ALTERNATE_VOWEL)
  1494. and
  1495. (gram_rules[units[unit_count - 1]] & VOWEL)
  1496. and
  1497. (gram_rules[units[unit_count ]] & VOWEL)
  1498. )
  1499. or
  1500. #
  1501. # Consonant check.
  1502. #
  1503. (
  1504. not (gram_rules[units[unit_count - 2]] & VOWEL)
  1505. and
  1506. not (gram_rules[units[unit_count - 1]] & VOWEL)
  1507. and
  1508. not (gram_rules[units[unit_count ]] & VOWEL)
  1509. )):
  1510. return 1 # Failure!
  1511. return 0 # success
  1512. # Treating y as a vowel is sometimes a problem. Some words get
  1513. # formed that look irregular. One special group is when y starts a
  1514. # word and is the only vowel in the first syllable. The word ycl is
  1515. # one example. We discard words like these.
  1516. def _have_initial_y(units):
  1517. vowel_count = 0
  1518. normal_vowel_count = 0
  1519. for unit_count in range(len(units)):
  1520. #
  1521. # Count vowels.
  1522. #
  1523. if (gram_rules[units[unit_count]] & VOWEL):
  1524. vowel_count = vowel_count + 1
  1525. #
  1526. # Count the vowels that are not:
  1527. # 1. 'y'
  1528. # 2. at the start of the word.
  1529. #
  1530. if (not (gram_rules[units[unit_count]] & ALTERNATE_VOWEL) or (unit_count > 0)):
  1531. normal_vowel_count = normal_vowel_count + 1
  1532. return (vowel_count <= 1) and (normal_vowel_count == 0)
  1533. # Besides the problem with the letter y, there is one with a silent e
  1534. # at the end of words, like face or nice. We allow this silent e,
  1535. # but we do not allow it as the only vowel at the end of the word or
  1536. # syllables like ble will be generated.
  1537. def _have_final_split(units):
  1538. vowel_count = 0
  1539. #
  1540. # Count all the vowels in the word.
  1541. #
  1542. for unit_count in range(len(units)):
  1543. if (gram_rules[units[unit_count]] & VOWEL):
  1544. vowel_count = vowel_count + 1
  1545. #
  1546. # Return TRUE iff the only vowel was e, found at the end if the word.
  1547. #
  1548. return ((vowel_count == 1)
  1549. and (gram_rules[units[len(units) - 1]] & NO_FINAL_SPLIT))
  1550. def digram_is_invalid(first_unit, second_unit, current_unit_num,
  1551. length_left, units_in_syllable, vowel_count):
  1552. #
  1553. # Reject ILLEGAL_PAIRS of units.
  1554. #
  1555. if (marked(ILLEGAL_PAIR,
  1556. first_unit,
  1557. second_unit)):
  1558. return 1
  1559. #
  1560. # Reject units that will be split between
  1561. # syllables when the syllable has no vowels
  1562. # in it.
  1563. #
  1564. if (marked(BREAK,
  1565. first_unit,
  1566. second_unit) and
  1567. (vowel_count == 0)):
  1568. return 1
  1569. #
  1570. # Reject a unit that will end a syllable when
  1571. # no previous unit was a vowel and neither is
  1572. # this one.
  1573. #
  1574. if (marked(END,
  1575. first_unit,
  1576. second_unit) and
  1577. (vowel_count == 0) and
  1578. not (gram_rules[second_unit] & VOWEL)):
  1579. return 1
  1580. if (current_unit_num == 1):
  1581. #
  1582. # Reject the unit if we are at the starting
  1583. # digram of a syllable and it does not fit.
  1584. #
  1585. if (marked(NOT_BEGIN,
  1586. first_unit,
  1587. second_unit)):
  1588. return 1
  1589. else:
  1590. # We are not at the start of a syllable.
  1591. #
  1592. # Do not allow syllables where the first letter is y
  1593. # and the next pair can begin a syllable. This may
  1594. # lead to splits where y is left alone in a syllable.
  1595. # Also, the combination does not sound to good even
  1596. # if not split.
  1597. #
  1598. if ((current_unit_num == 2) and
  1599. marked(BEGIN,
  1600. first_unit,
  1601. second_unit) and
  1602. (gram_rules[units_in_syllable[0]] &
  1603. ALTERNATE_VOWEL)):
  1604. return 1
  1605. #
  1606. # If this is the last unit of a word, we
  1607. # should reject any digram that cannot end a
  1608. # syllable.
  1609. #
  1610. if (marked(NOT_END,
  1611. first_unit,
  1612. second_unit) and
  1613. (length_left == 0)):
  1614. return 1
  1615. #
  1616. # Reject the unit if the digram it forms wants
  1617. # to break the syllable, but the resulting
  1618. # digram that would end the syllable is not
  1619. # allowed to end a syllable.
  1620. #
  1621. if (marked(BREAK,
  1622. first_unit,
  1623. second_unit) and
  1624. (digram_rules[units_in_syllable[current_unit_num-2]]
  1625. [first_unit] & NOT_END)):
  1626. return 1
  1627. #
  1628. # Reject the unit if the digram it forms
  1629. # expects a vowel preceding it and there
  1630. # is none.
  1631. #
  1632. if (marked(PREFIX,
  1633. first_unit,
  1634. second_unit) and
  1635. not (gram_rules[ units_in_syllable[current_unit_num-2] ] &
  1636. VOWEL)):
  1637. return 1
  1638. return 0
  1639. # Generate next unit to password, making sure that it follows these rules:
  1640. #
  1641. # 1. Each syllable must contain exactly 1 or 2 consecutive vowels,
  1642. # where y is considered a vowel.
  1643. #
  1644. # 2. Syllable end is determined as follows:
  1645. #
  1646. # a. Vowel is generated and previous unit is a consonant and
  1647. # syllable already has a vowel. In this case, new syllable is
  1648. # started and already contains a vowel.
  1649. # b. A pair determined to be a "break" pair is encountered.
  1650. # In this case new syllable is started with second unit of this pair.
  1651. # c. End of password is encountered.
  1652. # d. "begin" pair is encountered legally. New syllable is started
  1653. # with this pair.
  1654. # e. "end" pair is legally encountered. New syllable has nothing yet.
  1655. #
  1656. # 3. Try generating another unit if:
  1657. #
  1658. # a. third consecutive vowel and not y.
  1659. # b. "break" pair generated but no vowel yet in current or
  1660. # previous 2 units are "not_end".
  1661. # c. "begin" pair generated but no vowel in syllable preceding begin pair,
  1662. # or both previous 2 pairs are designated "not_end".
  1663. # d. "end" pair generated but no vowel in current syllable or in
  1664. # "end" pair.
  1665. # e. "not_begin" pair generated but new syllable must begin
  1666. # (because previous syllable ended as defined in 2 above).
  1667. # f. vowel is generated and 2a is satisfied, but no syllable break
  1668. # is possible in previous 3 pairs.
  1669. # g. Second and third units of syllable must begin, and first unit
  1670. # is "alternate_vowel".
  1671. def get_syllable(pwlen, saved_pair):
  1672. #
  1673. # This is needed if the saved_pair is tried and the syllable then
  1674. # discarded because of the retry limit. Since the saved_pair is OK and
  1675. # fits in nicely with the preceding syllable, we will always use it.
  1676. #
  1677. hold_saved_pair = saved_pair
  1678. max_retries = (4 * pwlen) + len(grams)
  1679. max_loops = 100
  1680. num_loops = 0
  1681. #
  1682. # Loop until valid syllable is found.
  1683. #
  1684. while True: # do: ftso python while: not PEP 315.
  1685. #
  1686. # Try for a new syllable. Initialize all pertinent
  1687. # syllable variables.
  1688. #
  1689. syllable = "" # string, returned
  1690. units_in_syllable = dict() # array of units, returned
  1691. # grams:
  1692. unit = ''
  1693. current_unit = 0
  1694. last_unit = ''
  1695. # numbers:
  1696. vowel_count = 0
  1697. tries = 0
  1698. length_left = pwlen
  1699. # flags:
  1700. rule_broken = 0
  1701. want_vowel = 0
  1702. want_another_unit = 1
  1703. saved_pair = hold_saved_pair
  1704. #
  1705. # This loop finds all the units for the syllable.
  1706. #
  1707. while True: # do: ftso python while: not PEP 315.
  1708. want_vowel = 0
  1709. #
  1710. # This loop continues until a valid unit is found for the
  1711. # current position within the syllable.
  1712. #
  1713. while True: # do: ftso python while: not PEP 315.
  1714. rule_broken = 0
  1715. #
  1716. # If there are saved units from the previous
  1717. # syllable, use them up first.
  1718. #
  1719. #
  1720. # If there were two saved units, the first is
  1721. # guaranteed (by checks performed in the previous
  1722. # syllable) to be valid. We ignore the checks and
  1723. # place it in this syllable manually.
  1724. #
  1725. if (len(saved_pair) == 2):
  1726. syllable = saved_pair.pop()
  1727. units_in_syllable[0] = syllable
  1728. if (gram_rules[syllable] & VOWEL):
  1729. vowel_count = vowel_count + 1
  1730. current_unit = current_unit + 1
  1731. length_left -= len(syllable)
  1732. if (len(saved_pair) > 0):
  1733. #
  1734. # The unit becomes the last unit checked in the
  1735. # previous syllable.
  1736. #
  1737. unit = saved_pair.pop()
  1738. #
  1739. # The saved units have been used. Do not try to
  1740. # reuse them in this syllable (unless this
  1741. # particular syllable is rejected at which point
  1742. # we start to rebuild it with these same saved
  1743. # units).
  1744. #
  1745. else:
  1746. #
  1747. # If we don't have to consider the saved units,
  1748. # we generate a random one.
  1749. #
  1750. if (want_vowel):
  1751. unit = _random_unit(VOWEL)
  1752. else:
  1753. unit = _random_unit(NO_SPECIAL_RULE)
  1754. length_left -= len(unit)
  1755. rule_broken = 0
  1756. #
  1757. # Prevent having a word longer than expected.
  1758. #
  1759. if (length_left < 0):
  1760. rule_broken = 1
  1761. #
  1762. # First unit of syllable. This is special because
  1763. # the digram tests require 2 units and we don't have
  1764. # that yet. Nevertheless, we can perform some
  1765. # checks.
  1766. #
  1767. if (current_unit == 0):
  1768. #
  1769. # If this shouldn't begin a syllable, don't use it.
  1770. #
  1771. if (gram_rules[unit] & NOT_BEGIN_SYLLABLE):
  1772. rule_broken = 1
  1773. elif (length_left == 0):
  1774. #
  1775. # If this is the last unit of a word, we have
  1776. # a one unit syllable. Since each syllable
  1777. # must have a vowel, we make sure the unit is
  1778. # a vowel. Otherwise, we discard it.
  1779. #
  1780. if (gram_rules[unit] & VOWEL):
  1781. want_another_unit = 0
  1782. else:
  1783. rule_broken = 1
  1784. else:
  1785. #
  1786. # We are not at the start of a syllable.
  1787. # Save the previous unit for later tests.
  1788. #
  1789. last_unit = units_in_syllable[current_unit-1]
  1790. #
  1791. # There are some digram tests that are
  1792. # universally true. We test them out.
  1793. #
  1794. if (digram_is_invalid(last_unit,
  1795. unit,
  1796. current_unit,
  1797. length_left,
  1798. units_in_syllable,
  1799. vowel_count)):
  1800. rule_broken = 1
  1801. #
  1802. # The following checks occur when the current
  1803. # unit is a vowel and we are not looking at a
  1804. # word ending with an e.
  1805. #
  1806. if (not rule_broken and
  1807. (gram_rules[unit] & VOWEL) and
  1808. ((length_left > 0)
  1809. or not (gram_rules[last_unit] & NO_FINAL_SPLIT))):
  1810. #
  1811. # Don't allow 3 consecutive vowels in a
  1812. # syllable. Although some words formed
  1813. # like this are OK, like "beau", most are
  1814. # not.
  1815. #
  1816. if ((vowel_count > 1) and
  1817. (gram_rules[last_unit] & VOWEL)):
  1818. rule_broken = 1
  1819. #
  1820. # Check for the case of
  1821. # vowels-consonants-vowel, which is only
  1822. # legal if the last vowel is an e and we
  1823. # are the end of the word (which is not
  1824. # happening here due to a previous
  1825. # check).
  1826. #
  1827. elif ((vowel_count != 0) and not (gram_rules[last_unit] & VOWEL)):
  1828. #
  1829. # Try to save the vowel for the next
  1830. # syllable, but if the syllable left here
  1831. # is not proper (i.e., the resulting last
  1832. # digram cannot legally end it), just
  1833. # discard it and try for another.
  1834. #
  1835. if (digram_rules[ units_in_syllable[ current_unit - 2] ][last_unit] & NOT_END):
  1836. rule_broken = 1
  1837. else:
  1838. saved_pair = [unit]
  1839. want_another_unit = 0
  1840. #
  1841. # The unit picked and the digram formed are legal.
  1842. # We now determine if we can end the syllable. It may,
  1843. # in some cases, mean the last unit(s) may be deferred to
  1844. # the next syllable. We also check here to see if the
  1845. # digram formed expects a vowel to follow.
  1846. #
  1847. if (not rule_broken and want_another_unit):
  1848. if ((vowel_count != 0) and
  1849. (gram_rules[unit] & NO_FINAL_SPLIT) and
  1850. (length_left == 0) and
  1851. not (gram_rules[last_unit] & VOWEL)):
  1852. #
  1853. # This word ends in a silent e.
  1854. #
  1855. want_another_unit = 0
  1856. elif (marked(END,
  1857. last_unit,
  1858. unit)
  1859. or (length_left == 0)):
  1860. #
  1861. # This syllable ends either because the
  1862. # digram is a END pair or we would
  1863. # otherwise exceed the length of the
  1864. # word.
  1865. #
  1866. want_another_unit = 0
  1867. elif (vowel_count != 0 and length_left > 0):
  1868. #
  1869. # Since we have a vowel in the syllable
  1870. # already, if the digram calls for the end of the
  1871. # syllable, we can legally split it off. We also
  1872. # make sure that we are not at the end of the
  1873. # dangerous because that syllable may not have
  1874. # vowels, or it may not be a legal syllable end,
  1875. # and the retrying mechanism will loop infinitely
  1876. # with the same digram.
  1877. #
  1878. #
  1879. # If we must begin a syllable, we do so if
  1880. # the only vowel in THIS syllable is not part
  1881. # of the digram we are pushing to the next
  1882. # syllable.
  1883. #
  1884. if (marked(BEGIN,
  1885. last_unit,
  1886. unit) and
  1887. (current_unit > 1) and
  1888. not ((vowel_count == 1) and
  1889. (gram_rules[last_unit] & VOWEL))):
  1890. saved_pair = [unit, last_unit]
  1891. want_another_unit = 0
  1892. elif (
  1893. marked(BREAK,
  1894. last_unit,
  1895. unit)):
  1896. saved_pair = [unit]
  1897. want_another_unit = 0
  1898. elif (
  1899. marked(SUFFIX,
  1900. last_unit,
  1901. unit)):
  1902. want_vowel = 1
  1903. tries = tries + 1
  1904. #
  1905. # If this unit was illegal, redetermine the amount of
  1906. # letters left to go in the word.
  1907. #
  1908. if (rule_broken):
  1909. length_left += len(unit)
  1910. if not (rule_broken and tries <= max_retries):
  1911. break
  1912. #
  1913. # The unit fit OK.
  1914. #
  1915. if (tries <= max_retries):
  1916. #
  1917. # If the unit were a vowel, count it in. However, if
  1918. # the unit were a y and appear at the start of the
  1919. # syllable, treat it like a constant (so that words
  1920. # like "year" can appear and not conflict with the 3
  1921. # consecutive vowel rule).
  1922. #
  1923. if (
  1924. (gram_rules[unit] & VOWEL)
  1925. and
  1926. ((current_unit > 0) or not (gram_rules[unit] & ALTERNATE_VOWEL))
  1927. ):
  1928. vowel_count = vowel_count + 1
  1929. #
  1930. # If a unit or units were to be saved, we must adjust
  1931. # the syllable formed. Otherwise, we append the
  1932. # current unit to the syllable.
  1933. #
  1934. if (len(saved_pair) == 2):
  1935. syllable = syllable[0:
  1936. len(syllable) -
  1937. len(last_unit)]
  1938. length_left += len(last_unit)
  1939. current_unit -= 2
  1940. elif (len(saved_pair) == 1):
  1941. current_unit = current_unit - 1
  1942. else:
  1943. units_in_syllable[ current_unit ] = unit
  1944. syllable = syllable + unit
  1945. else:
  1946. #
  1947. # Whoops! Too many tries. We set rule_broken so we
  1948. # can loop in the outer loop and try another
  1949. # syllable.
  1950. #
  1951. rule_broken = 1
  1952. current_unit = current_unit + 1
  1953. if not (tries <= max_retries and want_another_unit):
  1954. break
  1955. num_loops = num_loops + 1
  1956. if not ((rule_broken or _illegal_placement(units_in_syllable))):
  1957. break
  1958. return (syllable, units_in_syllable.values(), saved_pair)
  1959. # goes through an individual syllable and checks for illegal
  1960. # combinations of letters that go beyond looking at digrams.
  1961. #
  1962. # We look at things like 3 consecutive vowels or consonants, or
  1963. # syllables with consonants between vowels (unless one of them is the
  1964. # final silent e).
  1965. def _illegal_placement(units):
  1966. vowel_count = 0
  1967. failure = 0
  1968. for unit_count in range(len(units)):
  1969. if (failure):
  1970. break
  1971. if (unit_count >= 1):
  1972. #
  1973. # Don't allow vowels to be split with consonants in a
  1974. # single syllable. If we find such a combination (except
  1975. # for the silent e) we have to discard the syllable.
  1976. #
  1977. if (
  1978. (
  1979. not (gram_rules[units[unit_count-1]] & VOWEL)
  1980. and
  1981. (gram_rules[units[unit_count ]] & VOWEL)
  1982. and
  1983. not ((gram_rules[units[unit_count ]] & NO_FINAL_SPLIT) and (unit_count == len(units)))
  1984. and
  1985. vowel_count
  1986. )
  1987. or
  1988. #
  1989. # Perform these checks when we have at least 3 units.
  1990. #
  1991. (
  1992. (unit_count >= 2)
  1993. and
  1994. (
  1995. #
  1996. # Disallow 3 consecutive consonants.
  1997. #
  1998. (
  1999. not (gram_rules[units[unit_count-2]] & VOWEL)
  2000. and
  2001. not (gram_rules[units[unit_count-1]] & VOWEL)
  2002. and
  2003. not (gram_rules[units[unit_count]] & VOWEL)
  2004. )
  2005. or
  2006. #
  2007. # Disallow 3 consecutive vowels, where the
  2008. # first is not a y.
  2009. #
  2010. (
  2011. (gram_rules[units[unit_count-2]] & VOWEL)
  2012. and
  2013. not ((gram_rules[units[0]] & ALTERNATE_VOWEL)
  2014. and (unit_count == 2))
  2015. and
  2016. (gram_rules[units[unit_count-1]] & VOWEL)
  2017. and
  2018. (gram_rules[units[unit_count]] & VOWEL)
  2019. )
  2020. )
  2021. )
  2022. ):
  2023. failure = 1
  2024. #
  2025. # Count the vowels in the syllable. As mentioned somewhere
  2026. # above, exclude the initial y of a syllable. Instead, treat
  2027. # it as a consonant.
  2028. #
  2029. if (
  2030. (gram_rules[units[unit_count]] & VOWEL)
  2031. and
  2032. not (
  2033. (gram_rules[units[0]] & ALTERNATE_VOWEL)
  2034. and
  2035. (unit_count == 0)
  2036. and
  2037. (len(units) > 1)
  2038. )
  2039. ):
  2040. vowel_count = vowel_count + 1
  2041. return failure