generator.py 86 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299
  1. #============================================================================
  2. # This file is part of Pwman3.
  3. #
  4. # Pwman3 is free software; you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License, version 2
  6. # as published by the Free Software Foundation;
  7. #
  8. # Pwman3 is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with Pwman3; if not, write to the Free Software
  15. # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  16. #============================================================================
  17. # Copyright (C) 2012 Oz Nahum <nahumoz@gmail.com>
  18. #============================================================================
  19. #============================================================================
  20. # Copyright (C) 2006 Ivan Kelly <ivan@ivankelly.net>
  21. #============================================================================
  22. """
  23. Functions to generate passwords.
  24. Based heavily on passogva.py (c) 2004 Mo-Tsuki, LLC.
  25. http://dev.mosuki.com/passogva/
  26. Usage:
  27. import pwman.util.generator as PwGen
  28. minlen = 6
  29. maxlen = 8
  30. (word, hypenated_word) = PwGen.generate_password(minlen, maxlen)
  31. """
  32. import random
  33. class PasswordGenerationException(Exception):
  34. def __init__(self, message):
  35. self.message = message
  36. def __str__(self):
  37. return self.message
  38. def generate_password(minlen, maxlen, capitals = True, symbols = False, numerics = False):
  39. (password, hyphenated) = generate_password_shazel(minlen, maxlen)
  40. if (capitals):
  41. password = randomly_capitalize(password)
  42. if (symbols):
  43. password = leetify(password)
  44. elif (numerics):
  45. password = change_numerics(password)
  46. return (password, hyphenated)
  47. def randomly_capitalize(password):
  48. newpassword = str()
  49. for l in password:
  50. if (random.random() >= 0.5):
  51. l = l.upper()
  52. newpassword = newpassword + l
  53. return newpassword
  54. def leetify(password):
  55. newpassword = str()
  56. for l in password:
  57. if (random.random() >= 0.5):
  58. l = leetify_char(l)
  59. newpassword = newpassword + l
  60. return newpassword
  61. def change_numerics(password):
  62. newpassword = str()
  63. for l in password:
  64. if (random.random() >= 0.5):
  65. l = change_numerics_char(l)
  66. newpassword = newpassword + l
  67. return newpassword
  68. #
  69. # Dictionary of mappings for leetness
  70. #
  71. leetlist = {
  72. 'w': "\/\/", 'W': "\/\/", 'e': '3', 'E': '3', 't': '+', 'T': '7',
  73. 'i': '1', 'I': '1', 'o': '0', 'O': '0', 'A': '4', 's': '5', 'S': '$',
  74. 'g': '9', 'K': '|<', 'k': '|<', 'x': '><', 'X': '><', 'c': '<', 'C': '<',
  75. 'v': '\/', 'V': '\/', 'n': '|\|', 'N': '|\|', 'm': '|\/|', 'M': '|\/|'
  76. }
  77. def leetify_char(l):
  78. try:
  79. return leetlist[l]
  80. except KeyError:
  81. return l
  82. numericlist = {
  83. 'e': '3', 'E': '3', 'T': '7',
  84. 'i': '1', 'I': '1', 'o': '0', 'O': '0', 'A': '4', 's': '5', 'S': '5',
  85. 'g': '9', 'q': '9', 'l': '1'
  86. }
  87. def change_numerics_char(l):
  88. try:
  89. return numericlist[l]
  90. except KeyError:
  91. return l
  92. #
  93. # Beyond this point layeth Steve Hazel's code
  94. # Steven Hazel <sah@mosuki.com>
  95. #
  96. # I've added exceptions
  97. #
  98. MIN_LENGTH_PASSWORD = 6
  99. MAX_LENGTH_PASSWORD = 14
  100. grams = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
  101. 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y',
  102. 'z', 'ch', 'gh', 'ph', 'rh', 'sh', 'th', 'wh', 'qu', 'ck')
  103. vowel_grams = ('a', 'e', 'i', 'o', 'u', 'y')
  104. occurrence_frequencies = {
  105. 'a' : 10, 'b' : 8, 'c' : 12, 'd' : 12,
  106. 'e' : 12, 'f' : 8, 'g' : 8, 'h' : 6,
  107. 'i' : 10, 'j' : 8, 'k' : 8, 'l' : 6,
  108. 'm' : 6, 'n' : 10, 'o' : 10, 'p' : 6,
  109. 'r' : 10, 's' : 8, 't' : 10, 'u' : 6,
  110. 'v' : 8, 'w' : 8, 'x' : 1, 'y' : 8,
  111. 'z' : 1, 'ch' : 1, 'gh' : 1, 'ph' : 1,
  112. 'rh' : 1, 'sh' : 2, 'th' : 1, 'wh' : 1,
  113. 'qu' : 1, 'ck' : 1}
  114. numbers = []
  115. for gram in grams:
  116. for i in range(occurrence_frequencies[gram]):
  117. numbers.append(gram)
  118. vowel_numbers = []
  119. for gram in vowel_grams:
  120. for i in range(occurrence_frequencies[gram]):
  121. vowel_numbers.append(gram)
  122. #
  123. # Bit flags
  124. #
  125. MAX_UNACCEPTABLE = 20
  126. # gram rules:
  127. NOT_BEGIN_SYLLABLE = 0x08
  128. NO_FINAL_SPLIT = 0x04
  129. VOWEL = 0x02
  130. ALTERNATE_VOWEL = 0x01
  131. NO_SPECIAL_RULE = 0x00
  132. # digram rules:
  133. BEGIN = 0x80
  134. NOT_BEGIN = 0x40
  135. BREAK = 0x20
  136. PREFIX = 0x10
  137. ILLEGAL_PAIR = 0x08
  138. SUFFIX = 0x04
  139. END = 0x02
  140. NOT_END = 0x01
  141. ANY_COMBINATION = 0x00
  142. gram_rules = dict()
  143. for gram in grams:
  144. gram_rules[ gram ] = NO_SPECIAL_RULE
  145. for gram in vowel_grams:
  146. gram_rules[ gram ] = VOWEL
  147. gram_rules['e'] |= NO_FINAL_SPLIT
  148. gram_rules['y'] |= ALTERNATE_VOWEL
  149. gram_rules['x'] = NOT_BEGIN_SYLLABLE
  150. gram_rules['ck'] = NOT_BEGIN_SYLLABLE
  151. digram_rules = dict()
  152. ###############################################################################
  153. # BEGIN DIGRAM RULES
  154. ###############################################################################
  155. digram_rules['a'] = dict()
  156. digram_rules['a']['a'] = ILLEGAL_PAIR
  157. digram_rules['a']['b'] = ANY_COMBINATION
  158. digram_rules['a']['c'] = ANY_COMBINATION
  159. digram_rules['a']['d'] = ANY_COMBINATION
  160. digram_rules['a']['e'] = ILLEGAL_PAIR
  161. digram_rules['a']['f'] = ANY_COMBINATION
  162. digram_rules['a']['g'] = ANY_COMBINATION
  163. digram_rules['a']['h'] = NOT_BEGIN | BREAK | NOT_END
  164. digram_rules['a']['i'] = ANY_COMBINATION
  165. digram_rules['a']['j'] = ANY_COMBINATION
  166. digram_rules['a']['k'] = ANY_COMBINATION
  167. digram_rules['a']['l'] = ANY_COMBINATION
  168. digram_rules['a']['m'] = ANY_COMBINATION
  169. digram_rules['a']['n'] = ANY_COMBINATION
  170. digram_rules['a']['o'] = ILLEGAL_PAIR
  171. digram_rules['a']['p'] = ANY_COMBINATION
  172. digram_rules['a']['r'] = ANY_COMBINATION
  173. digram_rules['a']['s'] = ANY_COMBINATION
  174. digram_rules['a']['t'] = ANY_COMBINATION
  175. digram_rules['a']['u'] = ANY_COMBINATION
  176. digram_rules['a']['v'] = ANY_COMBINATION
  177. digram_rules['a']['w'] = ANY_COMBINATION
  178. digram_rules['a']['x'] = ANY_COMBINATION
  179. digram_rules['a']['y'] = ANY_COMBINATION
  180. digram_rules['a']['z'] = ANY_COMBINATION
  181. digram_rules['a']['ch'] = ANY_COMBINATION
  182. digram_rules['a']['gh'] = ILLEGAL_PAIR
  183. digram_rules['a']['ph'] = ANY_COMBINATION
  184. digram_rules['a']['rh'] = ILLEGAL_PAIR
  185. digram_rules['a']['sh'] = ANY_COMBINATION
  186. digram_rules['a']['th'] = ANY_COMBINATION
  187. digram_rules['a']['wh'] = ILLEGAL_PAIR
  188. digram_rules['a']['qu'] = BREAK | NOT_END
  189. digram_rules['a']['ck'] = ANY_COMBINATION
  190. digram_rules['b'] = dict()
  191. digram_rules['b']['a'] = ANY_COMBINATION
  192. digram_rules['b']['b'] = NOT_BEGIN | BREAK | NOT_END
  193. digram_rules['b']['c'] = NOT_BEGIN | BREAK | NOT_END
  194. digram_rules['b']['d'] = NOT_BEGIN | BREAK | NOT_END
  195. digram_rules['b']['e'] = ANY_COMBINATION
  196. digram_rules['b']['f'] = NOT_BEGIN | BREAK | NOT_END
  197. digram_rules['b']['g'] = NOT_BEGIN | BREAK | NOT_END
  198. digram_rules['b']['h'] = NOT_BEGIN | BREAK | NOT_END
  199. digram_rules['b']['i'] = ANY_COMBINATION
  200. digram_rules['b']['j'] = NOT_BEGIN | BREAK | NOT_END
  201. digram_rules['b']['k'] = NOT_BEGIN | BREAK | NOT_END
  202. digram_rules['b']['l'] = BEGIN | SUFFIX | NOT_END
  203. digram_rules['b']['m'] = NOT_BEGIN | BREAK | NOT_END
  204. digram_rules['b']['n'] = NOT_BEGIN | BREAK | NOT_END
  205. digram_rules['b']['o'] = ANY_COMBINATION
  206. digram_rules['b']['p'] = NOT_BEGIN | BREAK | NOT_END
  207. digram_rules['b']['r'] = BEGIN | END
  208. digram_rules['b']['s'] = NOT_BEGIN
  209. digram_rules['b']['t'] = NOT_BEGIN | BREAK | NOT_END
  210. digram_rules['b']['u'] = ANY_COMBINATION
  211. digram_rules['b']['v'] = NOT_BEGIN | BREAK | NOT_END
  212. digram_rules['b']['w'] = NOT_BEGIN | BREAK | NOT_END
  213. digram_rules['b']['x'] = ILLEGAL_PAIR
  214. digram_rules['b']['y'] = ANY_COMBINATION
  215. digram_rules['b']['z'] = NOT_BEGIN | BREAK | NOT_END
  216. digram_rules['b']['ch'] = NOT_BEGIN | BREAK | NOT_END
  217. digram_rules['b']['gh'] = ILLEGAL_PAIR
  218. digram_rules['b']['ph'] = NOT_BEGIN | BREAK | NOT_END
  219. digram_rules['b']['rh'] = ILLEGAL_PAIR
  220. digram_rules['b']['sh'] = NOT_BEGIN | BREAK | NOT_END
  221. digram_rules['b']['th'] = NOT_BEGIN | BREAK | NOT_END
  222. digram_rules['b']['wh'] = ILLEGAL_PAIR
  223. digram_rules['b']['qu'] = NOT_BEGIN | BREAK | NOT_END
  224. digram_rules['b']['ck'] = ILLEGAL_PAIR
  225. digram_rules['c'] = dict()
  226. digram_rules['c']['a'] = ANY_COMBINATION
  227. digram_rules['c']['b'] = NOT_BEGIN | BREAK | NOT_END
  228. digram_rules['c']['c'] = NOT_BEGIN | BREAK | NOT_END
  229. digram_rules['c']['d'] = NOT_BEGIN | BREAK | NOT_END
  230. digram_rules['c']['e'] = ANY_COMBINATION
  231. digram_rules['c']['f'] = NOT_BEGIN | BREAK | NOT_END
  232. digram_rules['c']['g'] = NOT_BEGIN | BREAK | NOT_END
  233. digram_rules['c']['h'] = NOT_BEGIN | BREAK | NOT_END
  234. digram_rules['c']['i'] = ANY_COMBINATION
  235. digram_rules['c']['j'] = NOT_BEGIN | BREAK | NOT_END
  236. digram_rules['c']['k'] = NOT_BEGIN | BREAK | NOT_END
  237. digram_rules['c']['l'] = SUFFIX | NOT_END
  238. digram_rules['c']['m'] = NOT_BEGIN | BREAK | NOT_END
  239. digram_rules['c']['n'] = NOT_BEGIN | BREAK | NOT_END
  240. digram_rules['c']['o'] = ANY_COMBINATION
  241. digram_rules['c']['p'] = NOT_BEGIN | BREAK | NOT_END
  242. digram_rules['c']['r'] = NOT_END
  243. digram_rules['c']['s'] = NOT_BEGIN | END
  244. digram_rules['c']['t'] = NOT_BEGIN | PREFIX
  245. digram_rules['c']['u'] = ANY_COMBINATION
  246. digram_rules['c']['v'] = NOT_BEGIN | BREAK | NOT_END
  247. digram_rules['c']['w'] = NOT_BEGIN | BREAK | NOT_END
  248. digram_rules['c']['x'] = ILLEGAL_PAIR
  249. digram_rules['c']['y'] = ANY_COMBINATION
  250. digram_rules['c']['z'] = NOT_BEGIN | BREAK | NOT_END
  251. digram_rules['c']['ch'] = ILLEGAL_PAIR
  252. digram_rules['c']['gh'] = ILLEGAL_PAIR
  253. digram_rules['c']['ph'] = NOT_BEGIN | BREAK | NOT_END
  254. digram_rules['c']['rh'] = ILLEGAL_PAIR
  255. digram_rules['c']['sh'] = NOT_BEGIN | BREAK | NOT_END
  256. digram_rules['c']['th'] = NOT_BEGIN | BREAK | NOT_END
  257. digram_rules['c']['wh'] = ILLEGAL_PAIR
  258. digram_rules['c']['qu'] = NOT_BEGIN | SUFFIX | NOT_END
  259. digram_rules['c']['ck'] = ILLEGAL_PAIR
  260. digram_rules['d'] = dict()
  261. digram_rules['d']['a'] = ANY_COMBINATION
  262. digram_rules['d']['b'] = NOT_BEGIN | BREAK | NOT_END
  263. digram_rules['d']['c'] = NOT_BEGIN | BREAK | NOT_END
  264. digram_rules['d']['d'] = NOT_BEGIN
  265. digram_rules['d']['e'] = ANY_COMBINATION
  266. digram_rules['d']['f'] = NOT_BEGIN | BREAK | NOT_END
  267. digram_rules['d']['g'] = NOT_BEGIN | BREAK | NOT_END
  268. digram_rules['d']['h'] = NOT_BEGIN | BREAK | NOT_END
  269. digram_rules['d']['i'] = ANY_COMBINATION
  270. digram_rules['d']['j'] = NOT_BEGIN | BREAK | NOT_END
  271. digram_rules['d']['k'] = NOT_BEGIN | BREAK | NOT_END
  272. digram_rules['d']['l'] = NOT_BEGIN | BREAK | NOT_END
  273. digram_rules['d']['m'] = NOT_BEGIN | BREAK | NOT_END
  274. digram_rules['d']['n'] = NOT_BEGIN | BREAK | NOT_END
  275. digram_rules['d']['o'] = ANY_COMBINATION
  276. digram_rules['d']['p'] = NOT_BEGIN | BREAK | NOT_END
  277. digram_rules['d']['r'] = BEGIN | NOT_END
  278. digram_rules['d']['s'] = NOT_BEGIN | END
  279. digram_rules['d']['t'] = NOT_BEGIN | BREAK | NOT_END
  280. digram_rules['d']['u'] = ANY_COMBINATION
  281. digram_rules['d']['v'] = NOT_BEGIN | BREAK | NOT_END
  282. digram_rules['d']['w'] = NOT_BEGIN | BREAK | NOT_END
  283. digram_rules['d']['x'] = ILLEGAL_PAIR
  284. digram_rules['d']['y'] = ANY_COMBINATION
  285. digram_rules['d']['z'] = NOT_BEGIN | BREAK | NOT_END
  286. digram_rules['d']['ch'] = NOT_BEGIN | BREAK | NOT_END
  287. digram_rules['d']['gh'] = NOT_BEGIN | BREAK | NOT_END
  288. digram_rules['d']['ph'] = NOT_BEGIN | BREAK | NOT_END
  289. digram_rules['d']['rh'] = ILLEGAL_PAIR
  290. digram_rules['d']['sh'] = NOT_BEGIN | NOT_END
  291. digram_rules['d']['th'] = NOT_BEGIN | PREFIX
  292. digram_rules['d']['wh'] = ILLEGAL_PAIR
  293. digram_rules['d']['qu'] = NOT_BEGIN | BREAK | NOT_END
  294. digram_rules['d']['ck'] = ILLEGAL_PAIR
  295. digram_rules['e'] = dict()
  296. digram_rules['e']['a'] = ANY_COMBINATION
  297. digram_rules['e']['b'] = ANY_COMBINATION
  298. digram_rules['e']['c'] = ANY_COMBINATION
  299. digram_rules['e']['d'] = ANY_COMBINATION
  300. digram_rules['e']['e'] = ANY_COMBINATION
  301. digram_rules['e']['f'] = ANY_COMBINATION
  302. digram_rules['e']['g'] = ANY_COMBINATION
  303. digram_rules['e']['h'] = NOT_BEGIN | BREAK | NOT_END
  304. digram_rules['e']['i'] = NOT_END
  305. digram_rules['e']['j'] = ANY_COMBINATION
  306. digram_rules['e']['k'] = ANY_COMBINATION
  307. digram_rules['e']['l'] = ANY_COMBINATION
  308. digram_rules['e']['m'] = ANY_COMBINATION
  309. digram_rules['e']['n'] = ANY_COMBINATION
  310. digram_rules['e']['o'] = BREAK
  311. digram_rules['e']['p'] = ANY_COMBINATION
  312. digram_rules['e']['r'] = ANY_COMBINATION
  313. digram_rules['e']['s'] = ANY_COMBINATION
  314. digram_rules['e']['t'] = ANY_COMBINATION
  315. digram_rules['e']['u'] = ANY_COMBINATION
  316. digram_rules['e']['v'] = ANY_COMBINATION
  317. digram_rules['e']['w'] = ANY_COMBINATION
  318. digram_rules['e']['x'] = ANY_COMBINATION
  319. digram_rules['e']['y'] = ANY_COMBINATION
  320. digram_rules['e']['z'] = ANY_COMBINATION
  321. digram_rules['e']['ch'] = ANY_COMBINATION
  322. digram_rules['e']['gh'] = NOT_BEGIN | BREAK | NOT_END
  323. digram_rules['e']['ph'] = ANY_COMBINATION
  324. digram_rules['e']['rh'] = ILLEGAL_PAIR
  325. digram_rules['e']['sh'] = ANY_COMBINATION
  326. digram_rules['e']['th'] = ANY_COMBINATION
  327. digram_rules['e']['wh'] = ILLEGAL_PAIR
  328. digram_rules['e']['qu'] = BREAK | NOT_END
  329. digram_rules['e']['ck'] = ANY_COMBINATION
  330. digram_rules['f'] = dict()
  331. digram_rules['f']['a'] = ANY_COMBINATION
  332. digram_rules['f']['b'] = NOT_BEGIN | BREAK | NOT_END
  333. digram_rules['f']['c'] = NOT_BEGIN | BREAK | NOT_END
  334. digram_rules['f']['d'] = NOT_BEGIN | BREAK | NOT_END
  335. digram_rules['f']['e'] = ANY_COMBINATION
  336. digram_rules['f']['f'] = NOT_BEGIN
  337. digram_rules['f']['g'] = NOT_BEGIN | BREAK | NOT_END
  338. digram_rules['f']['h'] = NOT_BEGIN | BREAK | NOT_END
  339. digram_rules['f']['i'] = ANY_COMBINATION
  340. digram_rules['f']['j'] = NOT_BEGIN | BREAK | NOT_END
  341. digram_rules['f']['k'] = NOT_BEGIN | BREAK | NOT_END
  342. digram_rules['f']['l'] = BEGIN | SUFFIX | NOT_END
  343. digram_rules['f']['m'] = NOT_BEGIN | BREAK | NOT_END
  344. digram_rules['f']['n'] = NOT_BEGIN | BREAK | NOT_END
  345. digram_rules['f']['o'] = ANY_COMBINATION
  346. digram_rules['f']['p'] = NOT_BEGIN | BREAK | NOT_END
  347. digram_rules['f']['r'] = BEGIN | NOT_END
  348. digram_rules['f']['s'] = NOT_BEGIN
  349. digram_rules['f']['t'] = NOT_BEGIN
  350. digram_rules['f']['u'] = ANY_COMBINATION
  351. digram_rules['f']['v'] = NOT_BEGIN | BREAK | NOT_END
  352. digram_rules['f']['w'] = NOT_BEGIN | BREAK | NOT_END
  353. digram_rules['f']['x'] = ILLEGAL_PAIR
  354. digram_rules['f']['y'] = NOT_BEGIN
  355. digram_rules['f']['z'] = NOT_BEGIN | BREAK | NOT_END
  356. digram_rules['f']['ch'] = NOT_BEGIN | BREAK | NOT_END
  357. digram_rules['f']['gh'] = NOT_BEGIN | BREAK | NOT_END
  358. digram_rules['f']['ph'] = NOT_BEGIN | BREAK | NOT_END
  359. digram_rules['f']['rh'] = ILLEGAL_PAIR
  360. digram_rules['f']['sh'] = NOT_BEGIN | BREAK | NOT_END
  361. digram_rules['f']['th'] = NOT_BEGIN | BREAK | NOT_END
  362. digram_rules['f']['wh'] = ILLEGAL_PAIR
  363. digram_rules['f']['qu'] = NOT_BEGIN | BREAK | NOT_END
  364. digram_rules['f']['ck'] = ILLEGAL_PAIR
  365. digram_rules['g'] = dict()
  366. digram_rules['g']['a'] = ANY_COMBINATION
  367. digram_rules['g']['b'] = NOT_BEGIN | BREAK | NOT_END
  368. digram_rules['g']['c'] = NOT_BEGIN | BREAK | NOT_END
  369. digram_rules['g']['d'] = NOT_BEGIN | BREAK | NOT_END
  370. digram_rules['g']['e'] = ANY_COMBINATION
  371. digram_rules['g']['f'] = NOT_BEGIN | BREAK | NOT_END
  372. digram_rules['g']['g'] = NOT_BEGIN
  373. digram_rules['g']['h'] = NOT_BEGIN | BREAK | NOT_END
  374. digram_rules['g']['i'] = ANY_COMBINATION
  375. digram_rules['g']['j'] = NOT_BEGIN | BREAK | NOT_END
  376. digram_rules['g']['k'] = ILLEGAL_PAIR
  377. digram_rules['g']['l'] = BEGIN | SUFFIX | NOT_END
  378. digram_rules['g']['m'] = NOT_BEGIN | BREAK | NOT_END
  379. digram_rules['g']['n'] = NOT_BEGIN | BREAK | NOT_END
  380. digram_rules['g']['o'] = ANY_COMBINATION
  381. digram_rules['g']['p'] = NOT_BEGIN | BREAK | NOT_END
  382. digram_rules['g']['r'] = BEGIN | NOT_END
  383. digram_rules['g']['s'] = NOT_BEGIN | END
  384. digram_rules['g']['t'] = NOT_BEGIN | BREAK | NOT_END
  385. digram_rules['g']['u'] = ANY_COMBINATION
  386. digram_rules['g']['v'] = NOT_BEGIN | BREAK | NOT_END
  387. digram_rules['g']['w'] = NOT_BEGIN | BREAK | NOT_END
  388. digram_rules['g']['x'] = ILLEGAL_PAIR
  389. digram_rules['g']['y'] = NOT_BEGIN
  390. digram_rules['g']['z'] = NOT_BEGIN | BREAK | NOT_END
  391. digram_rules['g']['ch'] = NOT_BEGIN | BREAK | NOT_END
  392. digram_rules['g']['gh'] = ILLEGAL_PAIR
  393. digram_rules['g']['ph'] = NOT_BEGIN | BREAK | NOT_END
  394. digram_rules['g']['rh'] = ILLEGAL_PAIR
  395. digram_rules['g']['sh'] = NOT_BEGIN
  396. digram_rules['g']['th'] = NOT_BEGIN
  397. digram_rules['g']['wh'] = ILLEGAL_PAIR
  398. digram_rules['g']['qu'] = NOT_BEGIN | BREAK | NOT_END
  399. digram_rules['g']['ck'] = ILLEGAL_PAIR
  400. digram_rules['h'] = dict()
  401. digram_rules['h']['a'] = ANY_COMBINATION
  402. digram_rules['h']['b'] = NOT_BEGIN | BREAK | NOT_END
  403. digram_rules['h']['c'] = NOT_BEGIN | BREAK | NOT_END
  404. digram_rules['h']['d'] = NOT_BEGIN | BREAK | NOT_END
  405. digram_rules['h']['e'] = ANY_COMBINATION
  406. digram_rules['h']['f'] = NOT_BEGIN | BREAK | NOT_END
  407. digram_rules['h']['g'] = NOT_BEGIN | BREAK | NOT_END
  408. digram_rules['h']['h'] = ILLEGAL_PAIR
  409. digram_rules['h']['i'] = ANY_COMBINATION
  410. digram_rules['h']['j'] = NOT_BEGIN | BREAK | NOT_END
  411. digram_rules['h']['k'] = NOT_BEGIN | BREAK | NOT_END
  412. digram_rules['h']['l'] = NOT_BEGIN | BREAK | NOT_END
  413. digram_rules['h']['m'] = NOT_BEGIN | BREAK | NOT_END
  414. digram_rules['h']['n'] = NOT_BEGIN | BREAK | NOT_END
  415. digram_rules['h']['o'] = ANY_COMBINATION
  416. digram_rules['h']['p'] = NOT_BEGIN | BREAK | NOT_END
  417. digram_rules['h']['r'] = NOT_BEGIN | BREAK | NOT_END
  418. digram_rules['h']['s'] = NOT_BEGIN | BREAK | NOT_END
  419. digram_rules['h']['t'] = NOT_BEGIN | BREAK | NOT_END
  420. digram_rules['h']['u'] = ANY_COMBINATION
  421. digram_rules['h']['v'] = NOT_BEGIN | BREAK | NOT_END
  422. digram_rules['h']['w'] = NOT_BEGIN | BREAK | NOT_END
  423. digram_rules['h']['x'] = ILLEGAL_PAIR
  424. digram_rules['h']['y'] = ANY_COMBINATION
  425. digram_rules['h']['z'] = NOT_BEGIN | BREAK | NOT_END
  426. digram_rules['h']['ch'] = NOT_BEGIN | BREAK | NOT_END
  427. digram_rules['h']['gh'] = NOT_BEGIN | BREAK | NOT_END
  428. digram_rules['h']['ph'] = NOT_BEGIN | BREAK | NOT_END
  429. digram_rules['h']['rh'] = ILLEGAL_PAIR
  430. digram_rules['h']['sh'] = NOT_BEGIN | BREAK | NOT_END
  431. digram_rules['h']['th'] = NOT_BEGIN | BREAK | NOT_END
  432. digram_rules['h']['wh'] = ILLEGAL_PAIR
  433. digram_rules['h']['qu'] = NOT_BEGIN | BREAK | NOT_END
  434. digram_rules['h']['ck'] = ILLEGAL_PAIR
  435. digram_rules['i'] = dict()
  436. digram_rules['i']['a'] = ANY_COMBINATION
  437. digram_rules['i']['b'] = ANY_COMBINATION
  438. digram_rules['i']['c'] = ANY_COMBINATION
  439. digram_rules['i']['d'] = ANY_COMBINATION
  440. digram_rules['i']['e'] = NOT_BEGIN
  441. digram_rules['i']['f'] = ANY_COMBINATION
  442. digram_rules['i']['g'] = ANY_COMBINATION
  443. digram_rules['i']['h'] = NOT_BEGIN | BREAK | NOT_END
  444. digram_rules['i']['i'] = ILLEGAL_PAIR
  445. digram_rules['i']['j'] = ANY_COMBINATION
  446. digram_rules['i']['k'] = ANY_COMBINATION
  447. digram_rules['i']['l'] = ANY_COMBINATION
  448. digram_rules['i']['m'] = ANY_COMBINATION
  449. digram_rules['i']['n'] = ANY_COMBINATION
  450. digram_rules['i']['o'] = BREAK
  451. digram_rules['i']['p'] = ANY_COMBINATION
  452. digram_rules['i']['r'] = ANY_COMBINATION
  453. digram_rules['i']['s'] = ANY_COMBINATION
  454. digram_rules['i']['t'] = ANY_COMBINATION
  455. digram_rules['i']['u'] = NOT_BEGIN | BREAK | NOT_END
  456. digram_rules['i']['v'] = ANY_COMBINATION
  457. digram_rules['i']['w'] = NOT_BEGIN | BREAK | NOT_END
  458. digram_rules['i']['x'] = ANY_COMBINATION
  459. digram_rules['i']['y'] = NOT_BEGIN | BREAK | NOT_END
  460. digram_rules['i']['z'] = ANY_COMBINATION
  461. digram_rules['i']['ch'] = ANY_COMBINATION
  462. digram_rules['i']['gh'] = NOT_BEGIN
  463. digram_rules['i']['ph'] = ANY_COMBINATION
  464. digram_rules['i']['rh'] = ILLEGAL_PAIR
  465. digram_rules['i']['sh'] = ANY_COMBINATION
  466. digram_rules['i']['th'] = ANY_COMBINATION
  467. digram_rules['i']['wh'] = ILLEGAL_PAIR
  468. digram_rules['i']['qu'] = BREAK | NOT_END
  469. digram_rules['i']['ck'] = ANY_COMBINATION
  470. digram_rules['j'] = dict()
  471. digram_rules['j']['a'] = ANY_COMBINATION
  472. digram_rules['j']['b'] = NOT_BEGIN | BREAK | NOT_END
  473. digram_rules['j']['c'] = NOT_BEGIN | BREAK | NOT_END
  474. digram_rules['j']['d'] = NOT_BEGIN | BREAK | NOT_END
  475. digram_rules['j']['e'] = ANY_COMBINATION
  476. digram_rules['j']['f'] = NOT_BEGIN | BREAK | NOT_END
  477. digram_rules['j']['g'] = ILLEGAL_PAIR
  478. digram_rules['j']['h'] = NOT_BEGIN | BREAK | NOT_END
  479. digram_rules['j']['i'] = ANY_COMBINATION
  480. digram_rules['j']['j'] = ILLEGAL_PAIR
  481. digram_rules['j']['k'] = NOT_BEGIN | BREAK | NOT_END
  482. digram_rules['j']['l'] = NOT_BEGIN | BREAK | NOT_END
  483. digram_rules['j']['m'] = NOT_BEGIN | BREAK | NOT_END
  484. digram_rules['j']['n'] = NOT_BEGIN | BREAK | NOT_END
  485. digram_rules['j']['o'] = ANY_COMBINATION
  486. digram_rules['j']['p'] = NOT_BEGIN | BREAK | NOT_END
  487. digram_rules['j']['r'] = NOT_BEGIN | BREAK | NOT_END
  488. digram_rules['j']['s'] = NOT_BEGIN | BREAK | NOT_END
  489. digram_rules['j']['t'] = NOT_BEGIN | BREAK | NOT_END
  490. digram_rules['j']['u'] = ANY_COMBINATION
  491. digram_rules['j']['v'] = NOT_BEGIN | BREAK | NOT_END
  492. digram_rules['j']['w'] = NOT_BEGIN | BREAK | NOT_END
  493. digram_rules['j']['x'] = ILLEGAL_PAIR
  494. digram_rules['j']['y'] = NOT_BEGIN
  495. digram_rules['j']['z'] = NOT_BEGIN | BREAK | NOT_END
  496. digram_rules['j']['ch'] = NOT_BEGIN | BREAK | NOT_END
  497. digram_rules['j']['gh'] = NOT_BEGIN | BREAK | NOT_END
  498. digram_rules['j']['ph'] = NOT_BEGIN | BREAK | NOT_END
  499. digram_rules['j']['rh'] = ILLEGAL_PAIR
  500. digram_rules['j']['sh'] = NOT_BEGIN | BREAK | NOT_END
  501. digram_rules['j']['th'] = NOT_BEGIN | BREAK | NOT_END
  502. digram_rules['j']['wh'] = ILLEGAL_PAIR
  503. digram_rules['j']['qu'] = NOT_BEGIN | BREAK | NOT_END
  504. digram_rules['j']['ck'] = ILLEGAL_PAIR
  505. digram_rules['k'] = dict()
  506. digram_rules['k']['a'] = ANY_COMBINATION
  507. digram_rules['k']['b'] = NOT_BEGIN | BREAK | NOT_END
  508. digram_rules['k']['c'] = NOT_BEGIN | BREAK | NOT_END
  509. digram_rules['k']['d'] = NOT_BEGIN | BREAK | NOT_END
  510. digram_rules['k']['e'] = ANY_COMBINATION
  511. digram_rules['k']['f'] = NOT_BEGIN | BREAK | NOT_END
  512. digram_rules['k']['g'] = NOT_BEGIN | BREAK | NOT_END
  513. digram_rules['k']['h'] = NOT_BEGIN | BREAK | NOT_END
  514. digram_rules['k']['i'] = ANY_COMBINATION
  515. digram_rules['k']['j'] = NOT_BEGIN | BREAK | NOT_END
  516. digram_rules['k']['k'] = NOT_BEGIN | BREAK | NOT_END
  517. digram_rules['k']['l'] = SUFFIX | NOT_END
  518. digram_rules['k']['m'] = NOT_BEGIN | BREAK | NOT_END
  519. digram_rules['k']['n'] = BEGIN | SUFFIX | NOT_END
  520. digram_rules['k']['o'] = ANY_COMBINATION
  521. digram_rules['k']['p'] = NOT_BEGIN | BREAK | NOT_END
  522. digram_rules['k']['r'] = SUFFIX | NOT_END
  523. digram_rules['k']['s'] = NOT_BEGIN | END
  524. digram_rules['k']['t'] = NOT_BEGIN | BREAK | NOT_END
  525. digram_rules['k']['u'] = ANY_COMBINATION
  526. digram_rules['k']['v'] = NOT_BEGIN | BREAK | NOT_END
  527. digram_rules['k']['w'] = NOT_BEGIN | BREAK | NOT_END
  528. digram_rules['k']['x'] = ILLEGAL_PAIR
  529. digram_rules['k']['y'] = NOT_BEGIN
  530. digram_rules['k']['z'] = NOT_BEGIN | BREAK | NOT_END
  531. digram_rules['k']['ch'] = NOT_BEGIN | BREAK | NOT_END
  532. digram_rules['k']['gh'] = NOT_BEGIN | BREAK | NOT_END
  533. digram_rules['k']['ph'] = NOT_BEGIN | PREFIX
  534. digram_rules['k']['rh'] = ILLEGAL_PAIR
  535. digram_rules['k']['sh'] = NOT_BEGIN
  536. digram_rules['k']['th'] = NOT_BEGIN | BREAK | NOT_END
  537. digram_rules['k']['wh'] = ILLEGAL_PAIR
  538. digram_rules['k']['qu'] = NOT_BEGIN | BREAK | NOT_END
  539. digram_rules['k']['ck'] = ILLEGAL_PAIR
  540. digram_rules['l'] = dict()
  541. digram_rules['l']['a'] = ANY_COMBINATION
  542. digram_rules['l']['b'] = NOT_BEGIN | PREFIX
  543. digram_rules['l']['c'] = NOT_BEGIN | BREAK | NOT_END
  544. digram_rules['l']['d'] = NOT_BEGIN | PREFIX
  545. digram_rules['l']['e'] = ANY_COMBINATION
  546. digram_rules['l']['f'] = NOT_BEGIN | PREFIX
  547. digram_rules['l']['g'] = NOT_BEGIN | PREFIX
  548. digram_rules['l']['h'] = NOT_BEGIN | BREAK | NOT_END
  549. digram_rules['l']['i'] = ANY_COMBINATION
  550. digram_rules['l']['j'] = NOT_BEGIN | PREFIX
  551. digram_rules['l']['k'] = NOT_BEGIN | PREFIX
  552. digram_rules['l']['l'] = NOT_BEGIN | PREFIX
  553. digram_rules['l']['m'] = NOT_BEGIN | PREFIX
  554. digram_rules['l']['n'] = NOT_BEGIN | BREAK | NOT_END
  555. digram_rules['l']['o'] = ANY_COMBINATION
  556. digram_rules['l']['p'] = NOT_BEGIN | PREFIX
  557. digram_rules['l']['r'] = NOT_BEGIN | BREAK | NOT_END
  558. digram_rules['l']['s'] = NOT_BEGIN
  559. digram_rules['l']['t'] = NOT_BEGIN | PREFIX
  560. digram_rules['l']['u'] = ANY_COMBINATION
  561. digram_rules['l']['v'] = NOT_BEGIN | PREFIX
  562. digram_rules['l']['w'] = NOT_BEGIN | BREAK | NOT_END
  563. digram_rules['l']['x'] = ILLEGAL_PAIR
  564. digram_rules['l']['y'] = ANY_COMBINATION
  565. digram_rules['l']['z'] = NOT_BEGIN | BREAK | NOT_END
  566. digram_rules['l']['ch'] = NOT_BEGIN | PREFIX
  567. digram_rules['l']['gh'] = NOT_BEGIN | BREAK | NOT_END
  568. digram_rules['l']['ph'] = NOT_BEGIN | PREFIX
  569. digram_rules['l']['rh'] = ILLEGAL_PAIR
  570. digram_rules['l']['sh'] = NOT_BEGIN | PREFIX
  571. digram_rules['l']['th'] = NOT_BEGIN | PREFIX
  572. digram_rules['l']['wh'] = ILLEGAL_PAIR
  573. digram_rules['l']['qu'] = NOT_BEGIN | BREAK | NOT_END
  574. digram_rules['l']['ck'] = ILLEGAL_PAIR
  575. digram_rules['m'] = dict()
  576. digram_rules['m']['a'] = ANY_COMBINATION
  577. digram_rules['m']['b'] = NOT_BEGIN | BREAK | NOT_END
  578. digram_rules['m']['c'] = NOT_BEGIN | BREAK | NOT_END
  579. digram_rules['m']['d'] = NOT_BEGIN | BREAK | NOT_END
  580. digram_rules['m']['e'] = ANY_COMBINATION
  581. digram_rules['m']['f'] = NOT_BEGIN | BREAK | NOT_END
  582. digram_rules['m']['g'] = NOT_BEGIN | BREAK | NOT_END
  583. digram_rules['m']['h'] = NOT_BEGIN | BREAK | NOT_END
  584. digram_rules['m']['i'] = ANY_COMBINATION
  585. digram_rules['m']['j'] = NOT_BEGIN | BREAK | NOT_END
  586. digram_rules['m']['k'] = NOT_BEGIN | BREAK | NOT_END
  587. digram_rules['m']['l'] = NOT_BEGIN | BREAK | NOT_END
  588. digram_rules['m']['m'] = NOT_BEGIN
  589. digram_rules['m']['n'] = NOT_BEGIN | BREAK | NOT_END
  590. digram_rules['m']['o'] = ANY_COMBINATION
  591. digram_rules['m']['p'] = NOT_BEGIN
  592. digram_rules['m']['r'] = NOT_BEGIN | BREAK | NOT_END
  593. digram_rules['m']['s'] = NOT_BEGIN
  594. digram_rules['m']['t'] = NOT_BEGIN
  595. digram_rules['m']['u'] = ANY_COMBINATION
  596. digram_rules['m']['v'] = NOT_BEGIN | BREAK | NOT_END
  597. digram_rules['m']['w'] = NOT_BEGIN | BREAK | NOT_END
  598. digram_rules['m']['x'] = ILLEGAL_PAIR
  599. digram_rules['m']['y'] = ANY_COMBINATION
  600. digram_rules['m']['z'] = NOT_BEGIN | BREAK | NOT_END
  601. digram_rules['m']['ch'] = NOT_BEGIN | PREFIX
  602. digram_rules['m']['gh'] = NOT_BEGIN | BREAK | NOT_END
  603. digram_rules['m']['ph'] = NOT_BEGIN
  604. digram_rules['m']['rh'] = ILLEGAL_PAIR
  605. digram_rules['m']['sh'] = NOT_BEGIN
  606. digram_rules['m']['th'] = NOT_BEGIN
  607. digram_rules['m']['wh'] = ILLEGAL_PAIR
  608. digram_rules['m']['qu'] = NOT_BEGIN | BREAK | NOT_END
  609. digram_rules['m']['ck'] = ILLEGAL_PAIR
  610. digram_rules['n'] = dict()
  611. digram_rules['n']['a'] = ANY_COMBINATION
  612. digram_rules['n']['b'] = NOT_BEGIN | BREAK | NOT_END
  613. digram_rules['n']['c'] = NOT_BEGIN | BREAK | NOT_END
  614. digram_rules['n']['d'] = NOT_BEGIN
  615. digram_rules['n']['e'] = ANY_COMBINATION
  616. digram_rules['n']['f'] = NOT_BEGIN | BREAK | NOT_END
  617. digram_rules['n']['g'] = NOT_BEGIN | PREFIX
  618. digram_rules['n']['h'] = NOT_BEGIN | BREAK | NOT_END
  619. digram_rules['n']['i'] = ANY_COMBINATION
  620. digram_rules['n']['j'] = NOT_BEGIN | BREAK | NOT_END
  621. digram_rules['n']['k'] = NOT_BEGIN | PREFIX
  622. digram_rules['n']['l'] = NOT_BEGIN | BREAK | NOT_END
  623. digram_rules['n']['m'] = NOT_BEGIN | BREAK | NOT_END
  624. digram_rules['n']['n'] = NOT_BEGIN
  625. digram_rules['n']['o'] = ANY_COMBINATION
  626. digram_rules['n']['p'] = NOT_BEGIN | BREAK | NOT_END
  627. digram_rules['n']['r'] = NOT_BEGIN | BREAK | NOT_END
  628. digram_rules['n']['s'] = NOT_BEGIN
  629. digram_rules['n']['t'] = NOT_BEGIN
  630. digram_rules['n']['u'] = ANY_COMBINATION
  631. digram_rules['n']['v'] = NOT_BEGIN | BREAK | NOT_END
  632. digram_rules['n']['w'] = NOT_BEGIN | BREAK | NOT_END
  633. digram_rules['n']['x'] = ILLEGAL_PAIR
  634. digram_rules['n']['y'] = NOT_BEGIN
  635. digram_rules['n']['z'] = NOT_BEGIN | BREAK | NOT_END
  636. digram_rules['n']['ch'] = NOT_BEGIN | PREFIX
  637. digram_rules['n']['gh'] = NOT_BEGIN | BREAK | NOT_END
  638. digram_rules['n']['ph'] = NOT_BEGIN | PREFIX
  639. digram_rules['n']['rh'] = ILLEGAL_PAIR
  640. digram_rules['n']['sh'] = NOT_BEGIN
  641. digram_rules['n']['th'] = NOT_BEGIN
  642. digram_rules['n']['wh'] = ILLEGAL_PAIR
  643. digram_rules['n']['qu'] = NOT_BEGIN | BREAK | NOT_END
  644. digram_rules['n']['ck'] = NOT_BEGIN | PREFIX
  645. digram_rules['o'] = dict()
  646. digram_rules['o']['a'] = ANY_COMBINATION
  647. digram_rules['o']['b'] = ANY_COMBINATION
  648. digram_rules['o']['c'] = ANY_COMBINATION
  649. digram_rules['o']['d'] = ANY_COMBINATION
  650. digram_rules['o']['e'] = ILLEGAL_PAIR
  651. digram_rules['o']['f'] = ANY_COMBINATION
  652. digram_rules['o']['g'] = ANY_COMBINATION
  653. digram_rules['o']['h'] = NOT_BEGIN | BREAK | NOT_END
  654. digram_rules['o']['i'] = ANY_COMBINATION
  655. digram_rules['o']['j'] = ANY_COMBINATION
  656. digram_rules['o']['k'] = ANY_COMBINATION
  657. digram_rules['o']['l'] = ANY_COMBINATION
  658. digram_rules['o']['m'] = ANY_COMBINATION
  659. digram_rules['o']['n'] = ANY_COMBINATION
  660. digram_rules['o']['o'] = ANY_COMBINATION
  661. digram_rules['o']['p'] = ANY_COMBINATION
  662. digram_rules['o']['r'] = ANY_COMBINATION
  663. digram_rules['o']['s'] = ANY_COMBINATION
  664. digram_rules['o']['t'] = ANY_COMBINATION
  665. digram_rules['o']['u'] = ANY_COMBINATION
  666. digram_rules['o']['v'] = ANY_COMBINATION
  667. digram_rules['o']['w'] = ANY_COMBINATION
  668. digram_rules['o']['x'] = ANY_COMBINATION
  669. digram_rules['o']['y'] = ANY_COMBINATION
  670. digram_rules['o']['z'] = ANY_COMBINATION
  671. digram_rules['o']['ch'] = ANY_COMBINATION
  672. digram_rules['o']['gh'] = NOT_BEGIN
  673. digram_rules['o']['ph'] = ANY_COMBINATION
  674. digram_rules['o']['rh'] = ILLEGAL_PAIR
  675. digram_rules['o']['sh'] = ANY_COMBINATION
  676. digram_rules['o']['th'] = ANY_COMBINATION
  677. digram_rules['o']['wh'] = ILLEGAL_PAIR
  678. digram_rules['o']['qu'] = BREAK | NOT_END
  679. digram_rules['o']['ck'] = ANY_COMBINATION
  680. digram_rules['p'] = dict()
  681. digram_rules['p']['a'] = ANY_COMBINATION
  682. digram_rules['p']['b'] = NOT_BEGIN | BREAK | NOT_END
  683. digram_rules['p']['c'] = NOT_BEGIN | BREAK | NOT_END
  684. digram_rules['p']['d'] = NOT_BEGIN | BREAK | NOT_END
  685. digram_rules['p']['e'] = ANY_COMBINATION
  686. digram_rules['p']['f'] = NOT_BEGIN | BREAK | NOT_END
  687. digram_rules['p']['g'] = NOT_BEGIN | BREAK | NOT_END
  688. digram_rules['p']['h'] = NOT_BEGIN | BREAK | NOT_END
  689. digram_rules['p']['i'] = ANY_COMBINATION
  690. digram_rules['p']['j'] = NOT_BEGIN | BREAK | NOT_END
  691. digram_rules['p']['k'] = NOT_BEGIN | BREAK | NOT_END
  692. digram_rules['p']['l'] = SUFFIX | NOT_END
  693. digram_rules['p']['m'] = NOT_BEGIN | BREAK | NOT_END
  694. digram_rules['p']['n'] = NOT_BEGIN | BREAK | NOT_END
  695. digram_rules['p']['o'] = ANY_COMBINATION
  696. digram_rules['p']['p'] = NOT_BEGIN | PREFIX
  697. digram_rules['p']['r'] = NOT_END
  698. digram_rules['p']['s'] = NOT_BEGIN | END
  699. digram_rules['p']['t'] = NOT_BEGIN | END
  700. digram_rules['p']['u'] = NOT_BEGIN | END
  701. digram_rules['p']['v'] = NOT_BEGIN | BREAK | NOT_END
  702. digram_rules['p']['w'] = NOT_BEGIN | BREAK | NOT_END
  703. digram_rules['p']['x'] = ILLEGAL_PAIR
  704. digram_rules['p']['y'] = ANY_COMBINATION
  705. digram_rules['p']['z'] = NOT_BEGIN | BREAK | NOT_END
  706. digram_rules['p']['ch'] = NOT_BEGIN | BREAK | NOT_END
  707. digram_rules['p']['gh'] = NOT_BEGIN | BREAK | NOT_END
  708. digram_rules['p']['ph'] = NOT_BEGIN | BREAK | NOT_END
  709. digram_rules['p']['rh'] = ILLEGAL_PAIR
  710. digram_rules['p']['sh'] = NOT_BEGIN | BREAK | NOT_END
  711. digram_rules['p']['th'] = NOT_BEGIN | BREAK | NOT_END
  712. digram_rules['p']['wh'] = ILLEGAL_PAIR
  713. digram_rules['p']['qu'] = NOT_BEGIN | BREAK | NOT_END
  714. digram_rules['p']['ck'] = ILLEGAL_PAIR
  715. digram_rules['r'] = dict()
  716. digram_rules['r']['a'] = ANY_COMBINATION
  717. digram_rules['r']['b'] = NOT_BEGIN | PREFIX
  718. digram_rules['r']['c'] = NOT_BEGIN | PREFIX
  719. digram_rules['r']['d'] = NOT_BEGIN | PREFIX
  720. digram_rules['r']['e'] = ANY_COMBINATION
  721. digram_rules['r']['f'] = NOT_BEGIN | PREFIX
  722. digram_rules['r']['g'] = NOT_BEGIN | PREFIX
  723. digram_rules['r']['h'] = NOT_BEGIN | BREAK | NOT_END
  724. digram_rules['r']['i'] = ANY_COMBINATION
  725. digram_rules['r']['j'] = NOT_BEGIN | PREFIX
  726. digram_rules['r']['k'] = NOT_BEGIN | PREFIX
  727. digram_rules['r']['l'] = NOT_BEGIN | PREFIX
  728. digram_rules['r']['m'] = NOT_BEGIN | PREFIX
  729. digram_rules['r']['n'] = NOT_BEGIN | PREFIX
  730. digram_rules['r']['o'] = ANY_COMBINATION
  731. digram_rules['r']['p'] = NOT_BEGIN | PREFIX
  732. digram_rules['r']['r'] = NOT_BEGIN | PREFIX
  733. digram_rules['r']['s'] = NOT_BEGIN | PREFIX
  734. digram_rules['r']['t'] = NOT_BEGIN | PREFIX
  735. digram_rules['r']['u'] = ANY_COMBINATION
  736. digram_rules['r']['v'] = NOT_BEGIN | PREFIX
  737. digram_rules['r']['w'] = NOT_BEGIN | BREAK | NOT_END
  738. digram_rules['r']['x'] = ILLEGAL_PAIR
  739. digram_rules['r']['y'] = ANY_COMBINATION
  740. digram_rules['r']['z'] = NOT_BEGIN | PREFIX
  741. digram_rules['r']['ch'] = NOT_BEGIN | PREFIX
  742. digram_rules['r']['gh'] = NOT_BEGIN | BREAK | NOT_END
  743. digram_rules['r']['ph'] = NOT_BEGIN | PREFIX
  744. digram_rules['r']['rh'] = ILLEGAL_PAIR
  745. digram_rules['r']['sh'] = NOT_BEGIN | PREFIX
  746. digram_rules['r']['th'] = NOT_BEGIN | PREFIX
  747. digram_rules['r']['wh'] = ILLEGAL_PAIR
  748. digram_rules['r']['qu'] = NOT_BEGIN | PREFIX | NOT_END
  749. digram_rules['r']['ck'] = NOT_BEGIN | PREFIX
  750. digram_rules['s'] = dict()
  751. digram_rules['s']['a'] = ANY_COMBINATION
  752. digram_rules['s']['b'] = NOT_BEGIN | BREAK | NOT_END
  753. digram_rules['s']['c'] = NOT_END
  754. digram_rules['s']['d'] = NOT_BEGIN | BREAK | NOT_END
  755. digram_rules['s']['e'] = ANY_COMBINATION
  756. digram_rules['s']['f'] = NOT_BEGIN | BREAK | NOT_END
  757. digram_rules['s']['g'] = NOT_BEGIN | BREAK | NOT_END
  758. digram_rules['s']['h'] = NOT_BEGIN | BREAK | NOT_END
  759. digram_rules['s']['i'] = ANY_COMBINATION
  760. digram_rules['s']['j'] = NOT_BEGIN | BREAK | NOT_END
  761. digram_rules['s']['k'] = ANY_COMBINATION
  762. digram_rules['s']['l'] = BEGIN | SUFFIX | NOT_END
  763. digram_rules['s']['m'] = SUFFIX | NOT_END
  764. digram_rules['s']['n'] = PREFIX | SUFFIX | NOT_END
  765. digram_rules['s']['o'] = ANY_COMBINATION
  766. digram_rules['s']['p'] = ANY_COMBINATION
  767. digram_rules['s']['r'] = NOT_BEGIN | NOT_END
  768. digram_rules['s']['s'] = NOT_BEGIN | PREFIX
  769. digram_rules['s']['t'] = ANY_COMBINATION
  770. digram_rules['s']['u'] = ANY_COMBINATION
  771. digram_rules['s']['v'] = NOT_BEGIN | BREAK | NOT_END
  772. digram_rules['s']['w'] = BEGIN | SUFFIX | NOT_END
  773. digram_rules['s']['x'] = ILLEGAL_PAIR
  774. digram_rules['s']['y'] = ANY_COMBINATION
  775. digram_rules['s']['z'] = NOT_BEGIN | BREAK | NOT_END
  776. digram_rules['s']['ch'] = BEGIN | SUFFIX | NOT_END
  777. digram_rules['s']['gh'] = NOT_BEGIN | BREAK | NOT_END
  778. digram_rules['s']['ph'] = NOT_BEGIN | BREAK | NOT_END
  779. digram_rules['s']['rh'] = ILLEGAL_PAIR
  780. digram_rules['s']['sh'] = NOT_BEGIN | BREAK | NOT_END
  781. digram_rules['s']['th'] = NOT_BEGIN | BREAK | NOT_END
  782. digram_rules['s']['wh'] = ILLEGAL_PAIR
  783. digram_rules['s']['qu'] = SUFFIX | NOT_END
  784. digram_rules['s']['ck'] = NOT_BEGIN
  785. digram_rules['t'] = dict()
  786. digram_rules['t']['a'] = ANY_COMBINATION
  787. digram_rules['t']['b'] = NOT_BEGIN | BREAK | NOT_END
  788. digram_rules['t']['c'] = NOT_BEGIN | BREAK | NOT_END
  789. digram_rules['t']['d'] = NOT_BEGIN | BREAK | NOT_END
  790. digram_rules['t']['e'] = ANY_COMBINATION
  791. digram_rules['t']['f'] = NOT_BEGIN | BREAK | NOT_END
  792. digram_rules['t']['g'] = NOT_BEGIN | BREAK | NOT_END
  793. digram_rules['t']['h'] = NOT_BEGIN | BREAK | NOT_END
  794. digram_rules['t']['i'] = ANY_COMBINATION
  795. digram_rules['t']['j'] = NOT_BEGIN | BREAK | NOT_END
  796. digram_rules['t']['k'] = NOT_BEGIN | BREAK | NOT_END
  797. digram_rules['t']['l'] = NOT_BEGIN | BREAK | NOT_END
  798. digram_rules['t']['m'] = NOT_BEGIN | BREAK | NOT_END
  799. digram_rules['t']['n'] = NOT_BEGIN | BREAK | NOT_END
  800. digram_rules['t']['o'] = ANY_COMBINATION
  801. digram_rules['t']['p'] = NOT_BEGIN | BREAK | NOT_END
  802. digram_rules['t']['r'] = NOT_END
  803. digram_rules['t']['s'] = NOT_BEGIN | END
  804. digram_rules['t']['t'] = NOT_BEGIN | PREFIX
  805. digram_rules['t']['u'] = ANY_COMBINATION
  806. digram_rules['t']['v'] = NOT_BEGIN | BREAK | NOT_END
  807. digram_rules['t']['w'] = BEGIN | SUFFIX | NOT_END
  808. digram_rules['t']['x'] = ILLEGAL_PAIR
  809. digram_rules['t']['y'] = ANY_COMBINATION
  810. digram_rules['t']['z'] = NOT_BEGIN | BREAK | NOT_END
  811. digram_rules['t']['ch'] = NOT_BEGIN
  812. digram_rules['t']['gh'] = NOT_BEGIN | BREAK | NOT_END
  813. digram_rules['t']['ph'] = NOT_BEGIN | END
  814. digram_rules['t']['rh'] = ILLEGAL_PAIR
  815. digram_rules['t']['sh'] = NOT_BEGIN | END
  816. digram_rules['t']['th'] = NOT_BEGIN | BREAK | NOT_END
  817. digram_rules['t']['wh'] = ILLEGAL_PAIR
  818. digram_rules['t']['qu'] = NOT_BEGIN | BREAK | NOT_END
  819. digram_rules['t']['ck'] = ILLEGAL_PAIR
  820. digram_rules['u'] = dict()
  821. digram_rules['u']['a'] = NOT_BEGIN | BREAK | NOT_END
  822. digram_rules['u']['b'] = ANY_COMBINATION
  823. digram_rules['u']['c'] = ANY_COMBINATION
  824. digram_rules['u']['d'] = ANY_COMBINATION
  825. digram_rules['u']['e'] = NOT_BEGIN
  826. digram_rules['u']['f'] = ANY_COMBINATION
  827. digram_rules['u']['g'] = ANY_COMBINATION
  828. digram_rules['u']['h'] = NOT_BEGIN | BREAK | NOT_END
  829. digram_rules['u']['i'] = NOT_BEGIN | BREAK | NOT_END
  830. digram_rules['u']['j'] = ANY_COMBINATION
  831. digram_rules['u']['k'] = ANY_COMBINATION
  832. digram_rules['u']['l'] = ANY_COMBINATION
  833. digram_rules['u']['m'] = ANY_COMBINATION
  834. digram_rules['u']['n'] = ANY_COMBINATION
  835. digram_rules['u']['o'] = NOT_BEGIN | BREAK
  836. digram_rules['u']['p'] = ANY_COMBINATION
  837. digram_rules['u']['r'] = ANY_COMBINATION
  838. digram_rules['u']['s'] = ANY_COMBINATION
  839. digram_rules['u']['t'] = ANY_COMBINATION
  840. digram_rules['u']['u'] = ILLEGAL_PAIR
  841. digram_rules['u']['v'] = ANY_COMBINATION
  842. digram_rules['u']['w'] = NOT_BEGIN | BREAK | NOT_END
  843. digram_rules['u']['x'] = ANY_COMBINATION
  844. digram_rules['u']['y'] = NOT_BEGIN | BREAK | NOT_END
  845. digram_rules['u']['z'] = ANY_COMBINATION
  846. digram_rules['u']['ch'] = ANY_COMBINATION
  847. digram_rules['u']['gh'] = NOT_BEGIN | PREFIX
  848. digram_rules['u']['ph'] = ANY_COMBINATION
  849. digram_rules['u']['rh'] = ILLEGAL_PAIR
  850. digram_rules['u']['sh'] = ANY_COMBINATION
  851. digram_rules['u']['th'] = ANY_COMBINATION
  852. digram_rules['u']['wh'] = ILLEGAL_PAIR
  853. digram_rules['u']['qu'] = BREAK | NOT_END
  854. digram_rules['u']['ck'] = ANY_COMBINATION
  855. digram_rules['v'] = dict()
  856. digram_rules['v']['a'] = ANY_COMBINATION
  857. digram_rules['v']['b'] = NOT_BEGIN | BREAK | NOT_END
  858. digram_rules['v']['c'] = NOT_BEGIN | BREAK | NOT_END
  859. digram_rules['v']['d'] = NOT_BEGIN | BREAK | NOT_END
  860. digram_rules['v']['e'] = ANY_COMBINATION
  861. digram_rules['v']['f'] = NOT_BEGIN | BREAK | NOT_END
  862. digram_rules['v']['g'] = NOT_BEGIN | BREAK | NOT_END
  863. digram_rules['v']['h'] = NOT_BEGIN | BREAK | NOT_END
  864. digram_rules['v']['i'] = ANY_COMBINATION
  865. digram_rules['v']['j'] = NOT_BEGIN | BREAK | NOT_END
  866. digram_rules['v']['k'] = NOT_BEGIN | BREAK | NOT_END
  867. digram_rules['v']['l'] = NOT_BEGIN | BREAK | NOT_END
  868. digram_rules['v']['m'] = NOT_BEGIN | BREAK | NOT_END
  869. digram_rules['v']['n'] = NOT_BEGIN | BREAK | NOT_END
  870. digram_rules['v']['o'] = ANY_COMBINATION
  871. digram_rules['v']['p'] = NOT_BEGIN | BREAK | NOT_END
  872. digram_rules['v']['r'] = NOT_BEGIN | BREAK | NOT_END
  873. digram_rules['v']['s'] = NOT_BEGIN | BREAK | NOT_END
  874. digram_rules['v']['t'] = NOT_BEGIN | BREAK | NOT_END
  875. digram_rules['v']['u'] = ANY_COMBINATION
  876. digram_rules['v']['v'] = NOT_BEGIN | BREAK | NOT_END
  877. digram_rules['v']['w'] = NOT_BEGIN | BREAK | NOT_END
  878. digram_rules['v']['x'] = ILLEGAL_PAIR
  879. digram_rules['v']['y'] = NOT_BEGIN
  880. digram_rules['v']['z'] = NOT_BEGIN | BREAK | NOT_END
  881. digram_rules['v']['ch'] = NOT_BEGIN | BREAK | NOT_END
  882. digram_rules['v']['gh'] = NOT_BEGIN | BREAK | NOT_END
  883. digram_rules['v']['ph'] = NOT_BEGIN | BREAK | NOT_END
  884. digram_rules['v']['rh'] = ILLEGAL_PAIR
  885. digram_rules['v']['sh'] = NOT_BEGIN | BREAK | NOT_END
  886. digram_rules['v']['th'] = NOT_BEGIN | BREAK | NOT_END
  887. digram_rules['v']['wh'] = ILLEGAL_PAIR
  888. digram_rules['v']['qu'] = NOT_BEGIN | BREAK | NOT_END
  889. digram_rules['v']['ck'] = ILLEGAL_PAIR
  890. digram_rules['w'] = dict()
  891. digram_rules['w']['a'] = ANY_COMBINATION
  892. digram_rules['w']['b'] = NOT_BEGIN | PREFIX
  893. digram_rules['w']['c'] = NOT_BEGIN | BREAK | NOT_END
  894. digram_rules['w']['d'] = NOT_BEGIN | PREFIX | END
  895. digram_rules['w']['e'] = ANY_COMBINATION
  896. digram_rules['w']['f'] = NOT_BEGIN | PREFIX
  897. digram_rules['w']['g'] = NOT_BEGIN | PREFIX | END
  898. digram_rules['w']['h'] = NOT_BEGIN | BREAK | NOT_END
  899. digram_rules['w']['i'] = ANY_COMBINATION
  900. digram_rules['w']['j'] = NOT_BEGIN | BREAK | NOT_END
  901. digram_rules['w']['k'] = NOT_BEGIN | PREFIX
  902. digram_rules['w']['l'] = NOT_BEGIN | PREFIX | SUFFIX
  903. digram_rules['w']['m'] = NOT_BEGIN | PREFIX
  904. digram_rules['w']['n'] = NOT_BEGIN | PREFIX
  905. digram_rules['w']['o'] = ANY_COMBINATION
  906. digram_rules['w']['p'] = NOT_BEGIN | PREFIX
  907. digram_rules['w']['r'] = BEGIN | SUFFIX | NOT_END
  908. digram_rules['w']['s'] = NOT_BEGIN | PREFIX
  909. digram_rules['w']['t'] = NOT_BEGIN | PREFIX
  910. digram_rules['w']['u'] = ANY_COMBINATION
  911. digram_rules['w']['v'] = NOT_BEGIN | PREFIX
  912. digram_rules['w']['w'] = NOT_BEGIN | BREAK | NOT_END
  913. digram_rules['w']['x'] = NOT_BEGIN | PREFIX
  914. digram_rules['w']['y'] = ANY_COMBINATION
  915. digram_rules['w']['z'] = NOT_BEGIN | PREFIX
  916. digram_rules['w']['ch'] = NOT_BEGIN
  917. digram_rules['w']['gh'] = NOT_BEGIN | BREAK | NOT_END
  918. digram_rules['w']['ph'] = NOT_BEGIN
  919. digram_rules['w']['rh'] = ILLEGAL_PAIR
  920. digram_rules['w']['sh'] = NOT_BEGIN
  921. digram_rules['w']['th'] = NOT_BEGIN
  922. digram_rules['w']['wh'] = ILLEGAL_PAIR
  923. digram_rules['w']['qu'] = NOT_BEGIN | BREAK | NOT_END
  924. digram_rules['w']['ck'] = NOT_BEGIN
  925. digram_rules['x'] = dict()
  926. digram_rules['x']['a'] = NOT_BEGIN
  927. digram_rules['x']['b'] = NOT_BEGIN | BREAK | NOT_END
  928. digram_rules['x']['c'] = NOT_BEGIN | BREAK | NOT_END
  929. digram_rules['x']['d'] = NOT_BEGIN | BREAK | NOT_END
  930. digram_rules['x']['e'] = NOT_BEGIN
  931. digram_rules['x']['f'] = NOT_BEGIN | BREAK | NOT_END
  932. digram_rules['x']['g'] = NOT_BEGIN | BREAK | NOT_END
  933. digram_rules['x']['h'] = NOT_BEGIN | BREAK | NOT_END
  934. digram_rules['x']['i'] = NOT_BEGIN
  935. digram_rules['x']['j'] = NOT_BEGIN | BREAK | NOT_END
  936. digram_rules['x']['k'] = NOT_BEGIN | BREAK | NOT_END
  937. digram_rules['x']['l'] = NOT_BEGIN | BREAK | NOT_END
  938. digram_rules['x']['m'] = NOT_BEGIN | BREAK | NOT_END
  939. digram_rules['x']['n'] = NOT_BEGIN | BREAK | NOT_END
  940. digram_rules['x']['o'] = NOT_BEGIN
  941. digram_rules['x']['p'] = NOT_BEGIN | BREAK | NOT_END
  942. digram_rules['x']['r'] = NOT_BEGIN | BREAK | NOT_END
  943. digram_rules['x']['s'] = NOT_BEGIN | BREAK | NOT_END
  944. digram_rules['x']['t'] = NOT_BEGIN | BREAK | NOT_END
  945. digram_rules['x']['u'] = NOT_BEGIN
  946. digram_rules['x']['v'] = NOT_BEGIN | BREAK | NOT_END
  947. digram_rules['x']['w'] = NOT_BEGIN | BREAK | NOT_END
  948. digram_rules['x']['x'] = ILLEGAL_PAIR
  949. digram_rules['x']['y'] = NOT_BEGIN
  950. digram_rules['x']['z'] = NOT_BEGIN | BREAK | NOT_END
  951. digram_rules['x']['ch'] = NOT_BEGIN | BREAK | NOT_END
  952. digram_rules['x']['gh'] = NOT_BEGIN | BREAK | NOT_END
  953. digram_rules['x']['ph'] = NOT_BEGIN | BREAK | NOT_END
  954. digram_rules['x']['rh'] = ILLEGAL_PAIR
  955. digram_rules['x']['sh'] = NOT_BEGIN | BREAK | NOT_END
  956. digram_rules['x']['th'] = NOT_BEGIN | BREAK | NOT_END
  957. digram_rules['x']['wh'] = ILLEGAL_PAIR
  958. digram_rules['x']['qu'] = NOT_BEGIN | BREAK | NOT_END
  959. digram_rules['x']['ck'] = ILLEGAL_PAIR
  960. digram_rules['y'] = dict()
  961. digram_rules['y']['a'] = ANY_COMBINATION
  962. digram_rules['y']['b'] = NOT_BEGIN
  963. digram_rules['y']['c'] = NOT_BEGIN | NOT_END
  964. digram_rules['y']['d'] = NOT_BEGIN
  965. digram_rules['y']['e'] = ANY_COMBINATION
  966. digram_rules['y']['f'] = NOT_BEGIN | NOT_END
  967. digram_rules['y']['g'] = NOT_BEGIN
  968. digram_rules['y']['h'] = NOT_BEGIN | BREAK | NOT_END
  969. digram_rules['y']['i'] = BEGIN | NOT_END
  970. digram_rules['y']['j'] = NOT_BEGIN | NOT_END
  971. digram_rules['y']['k'] = NOT_BEGIN
  972. digram_rules['y']['l'] = NOT_BEGIN | NOT_END
  973. digram_rules['y']['m'] = NOT_BEGIN
  974. digram_rules['y']['n'] = NOT_BEGIN
  975. digram_rules['y']['o'] = ANY_COMBINATION
  976. digram_rules['y']['p'] = NOT_BEGIN
  977. digram_rules['y']['r'] = NOT_BEGIN | BREAK | NOT_END
  978. digram_rules['y']['s'] = NOT_BEGIN
  979. digram_rules['y']['t'] = NOT_BEGIN
  980. digram_rules['y']['u'] = ANY_COMBINATION
  981. digram_rules['y']['v'] = NOT_BEGIN | NOT_END
  982. digram_rules['y']['w'] = NOT_BEGIN | BREAK | NOT_END
  983. digram_rules['y']['x'] = NOT_BEGIN
  984. digram_rules['y']['y'] = ILLEGAL_PAIR
  985. digram_rules['y']['z'] = NOT_BEGIN
  986. digram_rules['y']['ch'] = NOT_BEGIN | BREAK | NOT_END
  987. digram_rules['y']['gh'] = NOT_BEGIN | BREAK | NOT_END
  988. digram_rules['y']['ph'] = NOT_BEGIN | BREAK | NOT_END
  989. digram_rules['y']['rh'] = ILLEGAL_PAIR
  990. digram_rules['y']['sh'] = NOT_BEGIN | BREAK | NOT_END
  991. digram_rules['y']['th'] = NOT_BEGIN | BREAK | NOT_END
  992. digram_rules['y']['wh'] = ILLEGAL_PAIR
  993. digram_rules['y']['qu'] = NOT_BEGIN | BREAK | NOT_END
  994. digram_rules['y']['ck'] = ILLEGAL_PAIR
  995. digram_rules['z'] = dict()
  996. digram_rules['z']['a'] = ANY_COMBINATION
  997. digram_rules['z']['b'] = NOT_BEGIN | BREAK | NOT_END
  998. digram_rules['z']['c'] = NOT_BEGIN | BREAK | NOT_END
  999. digram_rules['z']['d'] = NOT_BEGIN | BREAK | NOT_END
  1000. digram_rules['z']['e'] = ANY_COMBINATION
  1001. digram_rules['z']['f'] = NOT_BEGIN | BREAK | NOT_END
  1002. digram_rules['z']['g'] = NOT_BEGIN | BREAK | NOT_END
  1003. digram_rules['z']['h'] = NOT_BEGIN | BREAK | NOT_END
  1004. digram_rules['z']['i'] = ANY_COMBINATION
  1005. digram_rules['z']['j'] = NOT_BEGIN | BREAK | NOT_END
  1006. digram_rules['z']['k'] = NOT_BEGIN | BREAK | NOT_END
  1007. digram_rules['z']['l'] = NOT_BEGIN | BREAK | NOT_END
  1008. digram_rules['z']['m'] = NOT_BEGIN | BREAK | NOT_END
  1009. digram_rules['z']['n'] = NOT_BEGIN | BREAK | NOT_END
  1010. digram_rules['z']['o'] = ANY_COMBINATION
  1011. digram_rules['z']['p'] = NOT_BEGIN | BREAK | NOT_END
  1012. digram_rules['z']['r'] = NOT_BEGIN | NOT_END
  1013. digram_rules['z']['s'] = NOT_BEGIN | BREAK | NOT_END
  1014. digram_rules['z']['t'] = NOT_BEGIN
  1015. digram_rules['z']['u'] = ANY_COMBINATION
  1016. digram_rules['z']['v'] = NOT_BEGIN | BREAK | NOT_END
  1017. digram_rules['z']['w'] = SUFFIX | NOT_END
  1018. digram_rules['z']['x'] = ILLEGAL_PAIR
  1019. digram_rules['z']['y'] = ANY_COMBINATION
  1020. digram_rules['z']['z'] = NOT_BEGIN
  1021. digram_rules['z']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1022. digram_rules['z']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1023. digram_rules['z']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1024. digram_rules['z']['rh'] = ILLEGAL_PAIR
  1025. digram_rules['z']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1026. digram_rules['z']['th'] = NOT_BEGIN | BREAK | NOT_END
  1027. digram_rules['z']['wh'] = ILLEGAL_PAIR
  1028. digram_rules['z']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1029. digram_rules['z']['ck'] = ILLEGAL_PAIR
  1030. digram_rules['ch'] = dict()
  1031. digram_rules['ch']['a'] = ANY_COMBINATION
  1032. digram_rules['ch']['b'] = NOT_BEGIN | BREAK | NOT_END
  1033. digram_rules['ch']['c'] = NOT_BEGIN | BREAK | NOT_END
  1034. digram_rules['ch']['d'] = NOT_BEGIN | BREAK | NOT_END
  1035. digram_rules['ch']['e'] = ANY_COMBINATION
  1036. digram_rules['ch']['f'] = NOT_BEGIN | BREAK | NOT_END
  1037. digram_rules['ch']['g'] = NOT_BEGIN | BREAK | NOT_END
  1038. digram_rules['ch']['h'] = NOT_BEGIN | BREAK | NOT_END
  1039. digram_rules['ch']['i'] = ANY_COMBINATION
  1040. digram_rules['ch']['j'] = NOT_BEGIN | BREAK | NOT_END
  1041. digram_rules['ch']['k'] = NOT_BEGIN | BREAK | NOT_END
  1042. digram_rules['ch']['l'] = NOT_BEGIN | BREAK | NOT_END
  1043. digram_rules['ch']['m'] = NOT_BEGIN | BREAK | NOT_END
  1044. digram_rules['ch']['n'] = NOT_BEGIN | BREAK | NOT_END
  1045. digram_rules['ch']['o'] = ANY_COMBINATION
  1046. digram_rules['ch']['p'] = NOT_BEGIN | BREAK | NOT_END
  1047. digram_rules['ch']['r'] = NOT_END
  1048. digram_rules['ch']['s'] = NOT_BEGIN | BREAK | NOT_END
  1049. digram_rules['ch']['t'] = NOT_BEGIN | BREAK | NOT_END
  1050. digram_rules['ch']['u'] = ANY_COMBINATION
  1051. digram_rules['ch']['v'] = NOT_BEGIN | BREAK | NOT_END
  1052. digram_rules['ch']['w'] = NOT_BEGIN | NOT_END
  1053. digram_rules['ch']['x'] = ILLEGAL_PAIR
  1054. digram_rules['ch']['y'] = ANY_COMBINATION
  1055. digram_rules['ch']['z'] = NOT_BEGIN | BREAK | NOT_END
  1056. digram_rules['ch']['ch'] = ILLEGAL_PAIR
  1057. digram_rules['ch']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1058. digram_rules['ch']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1059. digram_rules['ch']['rh'] = ILLEGAL_PAIR
  1060. digram_rules['ch']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1061. digram_rules['ch']['th'] = NOT_BEGIN | BREAK | NOT_END
  1062. digram_rules['ch']['wh'] = ILLEGAL_PAIR
  1063. digram_rules['ch']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1064. digram_rules['ch']['ck'] = ILLEGAL_PAIR
  1065. digram_rules['gh'] = dict()
  1066. digram_rules['gh']['a'] = ANY_COMBINATION
  1067. digram_rules['gh']['b'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1068. digram_rules['gh']['c'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1069. digram_rules['gh']['d'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1070. digram_rules['gh']['e'] = ANY_COMBINATION
  1071. digram_rules['gh']['f'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1072. digram_rules['gh']['g'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1073. digram_rules['gh']['h'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1074. digram_rules['gh']['i'] = BEGIN | NOT_END
  1075. digram_rules['gh']['j'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1076. digram_rules['gh']['k'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1077. digram_rules['gh']['l'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1078. digram_rules['gh']['m'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1079. digram_rules['gh']['n'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1080. digram_rules['gh']['o'] = BEGIN | NOT_END
  1081. digram_rules['gh']['p'] = NOT_BEGIN | BREAK | NOT_END
  1082. digram_rules['gh']['r'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1083. digram_rules['gh']['s'] = NOT_BEGIN | PREFIX
  1084. digram_rules['gh']['t'] = NOT_BEGIN | PREFIX
  1085. digram_rules['gh']['u'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1086. digram_rules['gh']['v'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1087. digram_rules['gh']['w'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1088. digram_rules['gh']['x'] = ILLEGAL_PAIR
  1089. digram_rules['gh']['y'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1090. digram_rules['gh']['z'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1091. digram_rules['gh']['ch'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1092. digram_rules['gh']['gh'] = ILLEGAL_PAIR
  1093. digram_rules['gh']['ph'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1094. digram_rules['gh']['rh'] = ILLEGAL_PAIR
  1095. digram_rules['gh']['sh'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1096. digram_rules['gh']['th'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1097. digram_rules['gh']['wh'] = ILLEGAL_PAIR
  1098. digram_rules['gh']['qu'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1099. digram_rules['gh']['ck'] = ILLEGAL_PAIR
  1100. digram_rules['ph'] = dict()
  1101. digram_rules['ph']['a'] = ANY_COMBINATION
  1102. digram_rules['ph']['b'] = NOT_BEGIN | BREAK | NOT_END
  1103. digram_rules['ph']['c'] = NOT_BEGIN | BREAK | NOT_END
  1104. digram_rules['ph']['d'] = NOT_BEGIN | BREAK | NOT_END
  1105. digram_rules['ph']['e'] = ANY_COMBINATION
  1106. digram_rules['ph']['f'] = NOT_BEGIN | BREAK | NOT_END
  1107. digram_rules['ph']['g'] = NOT_BEGIN | BREAK | NOT_END
  1108. digram_rules['ph']['h'] = NOT_BEGIN | BREAK | NOT_END
  1109. digram_rules['ph']['i'] = ANY_COMBINATION
  1110. digram_rules['ph']['j'] = NOT_BEGIN | BREAK | NOT_END
  1111. digram_rules['ph']['k'] = NOT_BEGIN | BREAK | NOT_END
  1112. digram_rules['ph']['l'] = BEGIN | SUFFIX | NOT_END
  1113. digram_rules['ph']['m'] = NOT_BEGIN | BREAK | NOT_END
  1114. digram_rules['ph']['n'] = NOT_BEGIN | BREAK | NOT_END
  1115. digram_rules['ph']['o'] = ANY_COMBINATION
  1116. digram_rules['ph']['p'] = NOT_BEGIN | BREAK | NOT_END
  1117. digram_rules['ph']['r'] = NOT_END
  1118. digram_rules['ph']['s'] = NOT_BEGIN
  1119. digram_rules['ph']['t'] = NOT_BEGIN
  1120. digram_rules['ph']['u'] = ANY_COMBINATION
  1121. digram_rules['ph']['v'] = NOT_BEGIN | NOT_END
  1122. digram_rules['ph']['w'] = NOT_BEGIN | NOT_END
  1123. digram_rules['ph']['x'] = ILLEGAL_PAIR
  1124. digram_rules['ph']['y'] = NOT_BEGIN
  1125. digram_rules['ph']['z'] = NOT_BEGIN | BREAK | NOT_END
  1126. digram_rules['ph']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1127. digram_rules['ph']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1128. digram_rules['ph']['ph'] = ILLEGAL_PAIR
  1129. digram_rules['ph']['rh'] = ILLEGAL_PAIR
  1130. digram_rules['ph']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1131. digram_rules['ph']['th'] = NOT_BEGIN | BREAK | NOT_END
  1132. digram_rules['ph']['wh'] = ILLEGAL_PAIR
  1133. digram_rules['ph']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1134. digram_rules['ph']['ck'] = ILLEGAL_PAIR
  1135. digram_rules['rh'] = dict()
  1136. digram_rules['rh']['a'] = BEGIN | NOT_END
  1137. digram_rules['rh']['b'] = ILLEGAL_PAIR
  1138. digram_rules['rh']['c'] = ILLEGAL_PAIR
  1139. digram_rules['rh']['d'] = ILLEGAL_PAIR
  1140. digram_rules['rh']['e'] = BEGIN | NOT_END
  1141. digram_rules['rh']['f'] = ILLEGAL_PAIR
  1142. digram_rules['rh']['g'] = ILLEGAL_PAIR
  1143. digram_rules['rh']['h'] = ILLEGAL_PAIR
  1144. digram_rules['rh']['i'] = BEGIN | NOT_END
  1145. digram_rules['rh']['j'] = ILLEGAL_PAIR
  1146. digram_rules['rh']['k'] = ILLEGAL_PAIR
  1147. digram_rules['rh']['l'] = ILLEGAL_PAIR
  1148. digram_rules['rh']['m'] = ILLEGAL_PAIR
  1149. digram_rules['rh']['n'] = ILLEGAL_PAIR
  1150. digram_rules['rh']['o'] = BEGIN | NOT_END
  1151. digram_rules['rh']['p'] = ILLEGAL_PAIR
  1152. digram_rules['rh']['r'] = ILLEGAL_PAIR
  1153. digram_rules['rh']['s'] = ILLEGAL_PAIR
  1154. digram_rules['rh']['t'] = ILLEGAL_PAIR
  1155. digram_rules['rh']['u'] = BEGIN | NOT_END
  1156. digram_rules['rh']['v'] = ILLEGAL_PAIR
  1157. digram_rules['rh']['w'] = ILLEGAL_PAIR
  1158. digram_rules['rh']['x'] = ILLEGAL_PAIR
  1159. digram_rules['rh']['y'] = BEGIN | NOT_END
  1160. digram_rules['rh']['z'] = ILLEGAL_PAIR
  1161. digram_rules['rh']['ch'] = ILLEGAL_PAIR
  1162. digram_rules['rh']['gh'] = ILLEGAL_PAIR
  1163. digram_rules['rh']['ph'] = ILLEGAL_PAIR
  1164. digram_rules['rh']['rh'] = ILLEGAL_PAIR
  1165. digram_rules['rh']['sh'] = ILLEGAL_PAIR
  1166. digram_rules['rh']['th'] = ILLEGAL_PAIR
  1167. digram_rules['rh']['wh'] = ILLEGAL_PAIR
  1168. digram_rules['rh']['qu'] = ILLEGAL_PAIR
  1169. digram_rules['rh']['ck'] = ILLEGAL_PAIR
  1170. digram_rules['sh'] = dict()
  1171. digram_rules['sh']['a'] = ANY_COMBINATION
  1172. digram_rules['sh']['b'] = NOT_BEGIN | BREAK | NOT_END
  1173. digram_rules['sh']['c'] = NOT_BEGIN | BREAK | NOT_END
  1174. digram_rules['sh']['d'] = NOT_BEGIN | BREAK | NOT_END
  1175. digram_rules['sh']['e'] = ANY_COMBINATION
  1176. digram_rules['sh']['f'] = NOT_BEGIN | BREAK | NOT_END
  1177. digram_rules['sh']['g'] = NOT_BEGIN | BREAK | NOT_END
  1178. digram_rules['sh']['h'] = ILLEGAL_PAIR
  1179. digram_rules['sh']['i'] = ANY_COMBINATION
  1180. digram_rules['sh']['j'] = NOT_BEGIN | BREAK | NOT_END
  1181. digram_rules['sh']['k'] = NOT_BEGIN
  1182. digram_rules['sh']['l'] = BEGIN | SUFFIX | NOT_END
  1183. digram_rules['sh']['m'] = BEGIN | SUFFIX | NOT_END
  1184. digram_rules['sh']['n'] = BEGIN | SUFFIX | NOT_END
  1185. digram_rules['sh']['o'] = ANY_COMBINATION
  1186. digram_rules['sh']['p'] = NOT_BEGIN
  1187. digram_rules['sh']['r'] = BEGIN | SUFFIX | NOT_END
  1188. digram_rules['sh']['s'] = NOT_BEGIN | BREAK | NOT_END
  1189. digram_rules['sh']['t'] = SUFFIX
  1190. digram_rules['sh']['u'] = ANY_COMBINATION
  1191. digram_rules['sh']['v'] = NOT_BEGIN | BREAK | NOT_END
  1192. digram_rules['sh']['w'] = SUFFIX | NOT_END
  1193. digram_rules['sh']['x'] = ILLEGAL_PAIR
  1194. digram_rules['sh']['y'] = ANY_COMBINATION
  1195. digram_rules['sh']['z'] = NOT_BEGIN | BREAK | NOT_END
  1196. digram_rules['sh']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1197. digram_rules['sh']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1198. digram_rules['sh']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1199. digram_rules['sh']['rh'] = ILLEGAL_PAIR
  1200. digram_rules['sh']['sh'] = ILLEGAL_PAIR
  1201. digram_rules['sh']['th'] = NOT_BEGIN | BREAK | NOT_END
  1202. digram_rules['sh']['wh'] = ILLEGAL_PAIR
  1203. digram_rules['sh']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1204. digram_rules['sh']['ck'] = ILLEGAL_PAIR
  1205. digram_rules['th'] = dict()
  1206. digram_rules['th']['a'] = ANY_COMBINATION
  1207. digram_rules['th']['b'] = NOT_BEGIN | BREAK | NOT_END
  1208. digram_rules['th']['c'] = NOT_BEGIN | BREAK | NOT_END
  1209. digram_rules['th']['d'] = NOT_BEGIN | BREAK | NOT_END
  1210. digram_rules['th']['e'] = ANY_COMBINATION
  1211. digram_rules['th']['f'] = NOT_BEGIN | BREAK | NOT_END
  1212. digram_rules['th']['g'] = NOT_BEGIN | BREAK | NOT_END
  1213. digram_rules['th']['h'] = NOT_BEGIN | BREAK | NOT_END
  1214. digram_rules['th']['i'] = ANY_COMBINATION
  1215. digram_rules['th']['j'] = NOT_BEGIN | BREAK | NOT_END
  1216. digram_rules['th']['k'] = NOT_BEGIN | BREAK | NOT_END
  1217. digram_rules['th']['l'] = NOT_BEGIN | BREAK | NOT_END
  1218. digram_rules['th']['m'] = NOT_BEGIN | BREAK | NOT_END
  1219. digram_rules['th']['n'] = NOT_BEGIN | BREAK | NOT_END
  1220. digram_rules['th']['o'] = ANY_COMBINATION
  1221. digram_rules['th']['p'] = NOT_BEGIN | BREAK | NOT_END
  1222. digram_rules['th']['r'] = NOT_END
  1223. digram_rules['th']['s'] = NOT_BEGIN | END
  1224. digram_rules['th']['t'] = NOT_BEGIN | BREAK | NOT_END
  1225. digram_rules['th']['u'] = ANY_COMBINATION
  1226. digram_rules['th']['v'] = NOT_BEGIN | BREAK | NOT_END
  1227. digram_rules['th']['w'] = SUFFIX | NOT_END
  1228. digram_rules['th']['x'] = ILLEGAL_PAIR
  1229. digram_rules['th']['y'] = ANY_COMBINATION
  1230. digram_rules['th']['z'] = NOT_BEGIN | BREAK | NOT_END
  1231. digram_rules['th']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1232. digram_rules['th']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1233. digram_rules['th']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1234. digram_rules['th']['rh'] = ILLEGAL_PAIR
  1235. digram_rules['th']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1236. digram_rules['th']['th'] = ILLEGAL_PAIR
  1237. digram_rules['th']['wh'] = ILLEGAL_PAIR
  1238. digram_rules['th']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1239. digram_rules['th']['ck'] = ILLEGAL_PAIR
  1240. digram_rules['wh'] = dict()
  1241. digram_rules['wh']['a'] = BEGIN | NOT_END
  1242. digram_rules['wh']['b'] = ILLEGAL_PAIR
  1243. digram_rules['wh']['c'] = ILLEGAL_PAIR
  1244. digram_rules['wh']['d'] = ILLEGAL_PAIR
  1245. digram_rules['wh']['e'] = BEGIN | NOT_END
  1246. digram_rules['wh']['f'] = ILLEGAL_PAIR
  1247. digram_rules['wh']['g'] = ILLEGAL_PAIR
  1248. digram_rules['wh']['h'] = ILLEGAL_PAIR
  1249. digram_rules['wh']['i'] = BEGIN | NOT_END
  1250. digram_rules['wh']['j'] = ILLEGAL_PAIR
  1251. digram_rules['wh']['k'] = ILLEGAL_PAIR
  1252. digram_rules['wh']['l'] = ILLEGAL_PAIR
  1253. digram_rules['wh']['m'] = ILLEGAL_PAIR
  1254. digram_rules['wh']['n'] = ILLEGAL_PAIR
  1255. digram_rules['wh']['o'] = BEGIN | NOT_END
  1256. digram_rules['wh']['p'] = ILLEGAL_PAIR
  1257. digram_rules['wh']['r'] = ILLEGAL_PAIR
  1258. digram_rules['wh']['s'] = ILLEGAL_PAIR
  1259. digram_rules['wh']['t'] = ILLEGAL_PAIR
  1260. digram_rules['wh']['u'] = ILLEGAL_PAIR
  1261. digram_rules['wh']['v'] = ILLEGAL_PAIR
  1262. digram_rules['wh']['w'] = ILLEGAL_PAIR
  1263. digram_rules['wh']['x'] = ILLEGAL_PAIR
  1264. digram_rules['wh']['y'] = BEGIN | NOT_END
  1265. digram_rules['wh']['z'] = ILLEGAL_PAIR
  1266. digram_rules['wh']['ch'] = ILLEGAL_PAIR
  1267. digram_rules['wh']['gh'] = ILLEGAL_PAIR
  1268. digram_rules['wh']['ph'] = ILLEGAL_PAIR
  1269. digram_rules['wh']['rh'] = ILLEGAL_PAIR
  1270. digram_rules['wh']['sh'] = ILLEGAL_PAIR
  1271. digram_rules['wh']['th'] = ILLEGAL_PAIR
  1272. digram_rules['wh']['wh'] = ILLEGAL_PAIR
  1273. digram_rules['wh']['qu'] = ILLEGAL_PAIR
  1274. digram_rules['wh']['ck'] = ILLEGAL_PAIR
  1275. digram_rules['qu'] = dict()
  1276. digram_rules['qu']['a'] = ANY_COMBINATION
  1277. digram_rules['qu']['b'] = ILLEGAL_PAIR
  1278. digram_rules['qu']['c'] = ILLEGAL_PAIR
  1279. digram_rules['qu']['d'] = ILLEGAL_PAIR
  1280. digram_rules['qu']['e'] = ANY_COMBINATION
  1281. digram_rules['qu']['f'] = ILLEGAL_PAIR
  1282. digram_rules['qu']['g'] = ILLEGAL_PAIR
  1283. digram_rules['qu']['h'] = ILLEGAL_PAIR
  1284. digram_rules['qu']['i'] = ANY_COMBINATION
  1285. digram_rules['qu']['j'] = ILLEGAL_PAIR
  1286. digram_rules['qu']['k'] = ILLEGAL_PAIR
  1287. digram_rules['qu']['l'] = ILLEGAL_PAIR
  1288. digram_rules['qu']['m'] = ILLEGAL_PAIR
  1289. digram_rules['qu']['n'] = ILLEGAL_PAIR
  1290. digram_rules['qu']['o'] = ANY_COMBINATION
  1291. digram_rules['qu']['p'] = ILLEGAL_PAIR
  1292. digram_rules['qu']['r'] = ILLEGAL_PAIR
  1293. digram_rules['qu']['s'] = ILLEGAL_PAIR
  1294. digram_rules['qu']['t'] = ILLEGAL_PAIR
  1295. digram_rules['qu']['u'] = ILLEGAL_PAIR
  1296. digram_rules['qu']['v'] = ILLEGAL_PAIR
  1297. digram_rules['qu']['w'] = ILLEGAL_PAIR
  1298. digram_rules['qu']['x'] = ILLEGAL_PAIR
  1299. digram_rules['qu']['y'] = ILLEGAL_PAIR
  1300. digram_rules['qu']['z'] = ILLEGAL_PAIR
  1301. digram_rules['qu']['ch'] = ILLEGAL_PAIR
  1302. digram_rules['qu']['gh'] = ILLEGAL_PAIR
  1303. digram_rules['qu']['ph'] = ILLEGAL_PAIR
  1304. digram_rules['qu']['rh'] = ILLEGAL_PAIR
  1305. digram_rules['qu']['sh'] = ILLEGAL_PAIR
  1306. digram_rules['qu']['th'] = ILLEGAL_PAIR
  1307. digram_rules['qu']['wh'] = ILLEGAL_PAIR
  1308. digram_rules['qu']['qu'] = ILLEGAL_PAIR
  1309. digram_rules['qu']['ck'] = ILLEGAL_PAIR
  1310. digram_rules['ck'] = dict()
  1311. digram_rules['ck']['a'] = NOT_BEGIN | BREAK | NOT_END
  1312. digram_rules['ck']['b'] = NOT_BEGIN | BREAK | NOT_END
  1313. digram_rules['ck']['c'] = NOT_BEGIN | BREAK | NOT_END
  1314. digram_rules['ck']['d'] = NOT_BEGIN | BREAK | NOT_END
  1315. digram_rules['ck']['e'] = NOT_BEGIN | BREAK | NOT_END
  1316. digram_rules['ck']['f'] = NOT_BEGIN | BREAK | NOT_END
  1317. digram_rules['ck']['g'] = NOT_BEGIN | BREAK | NOT_END
  1318. digram_rules['ck']['h'] = NOT_BEGIN | BREAK | NOT_END
  1319. digram_rules['ck']['i'] = NOT_BEGIN | BREAK | NOT_END
  1320. digram_rules['ck']['j'] = NOT_BEGIN | BREAK | NOT_END
  1321. digram_rules['ck']['k'] = NOT_BEGIN | BREAK | NOT_END
  1322. digram_rules['ck']['l'] = NOT_BEGIN | BREAK | NOT_END
  1323. digram_rules['ck']['m'] = NOT_BEGIN | BREAK | NOT_END
  1324. digram_rules['ck']['n'] = NOT_BEGIN | BREAK | NOT_END
  1325. digram_rules['ck']['o'] = NOT_BEGIN | BREAK | NOT_END
  1326. digram_rules['ck']['p'] = NOT_BEGIN | BREAK | NOT_END
  1327. digram_rules['ck']['r'] = NOT_BEGIN | BREAK | NOT_END
  1328. digram_rules['ck']['s'] = NOT_BEGIN
  1329. digram_rules['ck']['t'] = NOT_BEGIN | BREAK | NOT_END
  1330. digram_rules['ck']['u'] = NOT_BEGIN | BREAK | NOT_END
  1331. digram_rules['ck']['v'] = NOT_BEGIN | BREAK | NOT_END
  1332. digram_rules['ck']['w'] = NOT_BEGIN | BREAK | NOT_END
  1333. digram_rules['ck']['x'] = ILLEGAL_PAIR
  1334. digram_rules['ck']['y'] = NOT_BEGIN
  1335. digram_rules['ck']['z'] = NOT_BEGIN | BREAK | NOT_END
  1336. digram_rules['ck']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1337. digram_rules['ck']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1338. digram_rules['ck']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1339. digram_rules['ck']['rh'] = ILLEGAL_PAIR
  1340. digram_rules['ck']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1341. digram_rules['ck']['th'] = NOT_BEGIN | BREAK | NOT_END
  1342. digram_rules['ck']['wh'] = ILLEGAL_PAIR
  1343. digram_rules['ck']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1344. digram_rules['ck']['ck'] = ILLEGAL_PAIR
  1345. ###############################################################################
  1346. # END DIGRAM RULES
  1347. ###############################################################################
  1348. def marked(flag, first_unit, second_unit):
  1349. return digram_rules[first_unit][second_unit] & flag
  1350. # Generates a random word, as well as its hyphenated form. The
  1351. # length of the returned word will be between minlen and maxlen.
  1352. def generate_password_shazel(minlen = MIN_LENGTH_PASSWORD,
  1353. maxlen = MAX_LENGTH_PASSWORD):
  1354. if (minlen > maxlen):
  1355. raise PasswordGenerationException("minlen minlen is greater than maxlen maxlen.")
  1356. #
  1357. # Check for zero length words. This is technically not an error,
  1358. # so we take the short cut and return empty words.
  1359. #
  1360. if (maxlen == 0):
  1361. raise PasswordGenerationException("maxlen must be greater than 0.")
  1362. word = ''
  1363. for i in range(MAX_UNACCEPTABLE):
  1364. results = _random_word(random.randint(minlen, maxlen))
  1365. word = results[0]
  1366. hyphenated_word = results[1]
  1367. if (word != ''):
  1368. break
  1369. if (word == "" and (minlen > 0)):
  1370. raise PasswordGenerationException("failed to generate an acceptable random password.")
  1371. return (word, hyphenated_word)
  1372. # Selects a random element from an array.
  1373. def random_element(ar):
  1374. try:
  1375. keys = ar.keys()
  1376. except:
  1377. keys = range(len(ar))
  1378. return ar[ keys[random.randint(0, len(keys) - 1)] ]
  1379. # This is the routine that returns a random word. It collects random
  1380. # syllables until a predetermined word length is found. If a retry
  1381. # threshold is reached, another word is tried.
  1382. def _random_word(pwlen):
  1383. word = ''
  1384. word_syllables = []
  1385. max_retries = (4 * pwlen) + len(grams)
  1386. tries = 0 # count of retries.
  1387. # word_units used to be an array of indices into the 'rules' C-array.
  1388. # now it's an array of actual units (grams).
  1389. word_units = []
  1390. saved_pair = []
  1391. #
  1392. # Find syllables until the entire word is constructed.
  1393. #
  1394. while(len(word) < pwlen):
  1395. #
  1396. # Get the syllable and find its length.
  1397. #
  1398. new_syllable, syllable_units, saved_pair = get_syllable(pwlen - len(word), saved_pair)
  1399. #
  1400. # Append the syllable units to the word units.
  1401. #
  1402. word_units = word_units + syllable_units
  1403. #
  1404. # If the word has been improperly formed, throw out
  1405. # the syllable. The checks performed here are those
  1406. # that must be formed on a word basis. The other
  1407. # tests are performed entirely within the syllable.
  1408. # Otherwise, append the syllable to the word.
  1409. #
  1410. if not (
  1411. _improper_word(word_units)
  1412. or
  1413. (
  1414. word == ''
  1415. and
  1416. _have_initial_y(syllable_units)
  1417. )
  1418. or
  1419. (
  1420. len(word + new_syllable) == pwlen
  1421. and
  1422. _have_final_split(syllable_units)
  1423. )
  1424. ):
  1425. word = word + new_syllable
  1426. word_syllables.append(new_syllable)
  1427. #
  1428. # Keep track of the times we have tried to get syllables.
  1429. # If we have exceeded the threshold, start from scratch.
  1430. #
  1431. tries = tries + 1
  1432. if (tries > max_retries):
  1433. tries = 0
  1434. word = ''
  1435. word_syllables = []
  1436. word_units = []
  1437. return (word, '-'.join(word_syllables))
  1438. # Selects a gram (aka "unit"). This is the standard random unit
  1439. # generating routine for get_syllable().
  1440. #
  1441. # This routine attempts to return grams (units) with a distribution
  1442. # approaching that of the distribution of the units in English.
  1443. #
  1444. # The distribution of the units may be altered in this procedure
  1445. # without affecting the digram table or any other programs using the
  1446. # random_word function, as long as the set of grams (units) is kept
  1447. # consistent throughout this library.
  1448. def _random_unit(type):
  1449. if (type & VOWEL):
  1450. # Sometimes, we are asked to explicitly get a vowel (i.e., if
  1451. # a digram pair expects one following it). This is a
  1452. # shortcut to do that and avoid looping with rejected
  1453. # consonants.
  1454. return random_element(vowel_numbers)
  1455. else:
  1456. # Get any letter according to the English distribution.
  1457. return random_element(numbers)
  1458. # Check that the word does not contain illegal combinations
  1459. # that may span syllables. Specifically, these are:
  1460. #
  1461. # 1. An illegal pair of units between syllables.
  1462. # 2. Three consecutive vowel units.
  1463. # 3. Three consecutive consonant units.
  1464. #
  1465. # The checks are made against units (1 or 2 letters), not against
  1466. # the individual letters, so three consecutive units can have
  1467. # the length of 6 at most.
  1468. def _improper_word(units):
  1469. failure = 0
  1470. for unit_count in range(len(units)):
  1471. #
  1472. # Check for ILLEGAL_PAIR.
  1473. # This should have been caught for units within a syllable,
  1474. # but in some cases it would have gone unnoticed for units between syllables
  1475. # (e.g., when saved units in get_syllable() were not used).
  1476. #
  1477. if (unit_count > 0
  1478. and digram_rules[units[unit_count-1]][units[unit_count]]
  1479. & ILLEGAL_PAIR):
  1480. return 1 # Failure!
  1481. if (unit_count >= 2):
  1482. #
  1483. # Check for consecutive vowels or consonants. Because the
  1484. # initial y of a syllable is treated as a consonant rather
  1485. # than as a vowel, we exclude y from the first vowel in the
  1486. # vowel test. The only problem comes when y ends a syllable
  1487. # and two other vowels start the next, like fly-oint. Since
  1488. # such words are still pronounceable, we accept this.
  1489. #
  1490. #
  1491. # Vowel check.
  1492. #
  1493. if ((
  1494. (gram_rules[units[unit_count - 2]] & VOWEL)
  1495. and
  1496. not (gram_rules[units[unit_count - 2]] & ALTERNATE_VOWEL)
  1497. and
  1498. (gram_rules[units[unit_count - 1]] & VOWEL)
  1499. and
  1500. (gram_rules[units[unit_count ]] & VOWEL)
  1501. )
  1502. or
  1503. #
  1504. # Consonant check.
  1505. #
  1506. (
  1507. not (gram_rules[units[unit_count - 2]] & VOWEL)
  1508. and
  1509. not (gram_rules[units[unit_count - 1]] & VOWEL)
  1510. and
  1511. not (gram_rules[units[unit_count ]] & VOWEL)
  1512. )):
  1513. return 1 # Failure!
  1514. return 0 # success
  1515. # Treating y as a vowel is sometimes a problem. Some words get
  1516. # formed that look irregular. One special group is when y starts a
  1517. # word and is the only vowel in the first syllable. The word ycl is
  1518. # one example. We discard words like these.
  1519. def _have_initial_y(units):
  1520. vowel_count = 0
  1521. normal_vowel_count = 0
  1522. for unit_count in range(len(units)):
  1523. #
  1524. # Count vowels.
  1525. #
  1526. if (gram_rules[units[unit_count]] & VOWEL):
  1527. vowel_count = vowel_count + 1
  1528. #
  1529. # Count the vowels that are not:
  1530. # 1. 'y'
  1531. # 2. at the start of the word.
  1532. #
  1533. if (not (gram_rules[units[unit_count]] & ALTERNATE_VOWEL) or (unit_count > 0)):
  1534. normal_vowel_count = normal_vowel_count + 1
  1535. return (vowel_count <= 1) and (normal_vowel_count == 0)
  1536. # Besides the problem with the letter y, there is one with a silent e
  1537. # at the end of words, like face or nice. We allow this silent e,
  1538. # but we do not allow it as the only vowel at the end of the word or
  1539. # syllables like ble will be generated.
  1540. def _have_final_split(units):
  1541. vowel_count = 0
  1542. #
  1543. # Count all the vowels in the word.
  1544. #
  1545. for unit_count in range(len(units)):
  1546. if (gram_rules[units[unit_count]] & VOWEL):
  1547. vowel_count = vowel_count + 1
  1548. #
  1549. # Return TRUE iff the only vowel was e, found at the end if the word.
  1550. #
  1551. return ((vowel_count == 1)
  1552. and (gram_rules[units[len(units) - 1]] & NO_FINAL_SPLIT))
  1553. def digram_is_invalid(first_unit, second_unit, current_unit_num,
  1554. length_left, units_in_syllable, vowel_count):
  1555. #
  1556. # Reject ILLEGAL_PAIRS of units.
  1557. #
  1558. if (marked(ILLEGAL_PAIR,
  1559. first_unit,
  1560. second_unit)):
  1561. return 1
  1562. #
  1563. # Reject units that will be split between
  1564. # syllables when the syllable has no vowels
  1565. # in it.
  1566. #
  1567. if (marked(BREAK,
  1568. first_unit,
  1569. second_unit) and
  1570. (vowel_count == 0)):
  1571. return 1
  1572. #
  1573. # Reject a unit that will end a syllable when
  1574. # no previous unit was a vowel and neither is
  1575. # this one.
  1576. #
  1577. if (marked(END,
  1578. first_unit,
  1579. second_unit) and
  1580. (vowel_count == 0) and
  1581. not (gram_rules[second_unit] & VOWEL)):
  1582. return 1
  1583. if (current_unit_num == 1):
  1584. #
  1585. # Reject the unit if we are at the starting
  1586. # digram of a syllable and it does not fit.
  1587. #
  1588. if (marked(NOT_BEGIN,
  1589. first_unit,
  1590. second_unit)):
  1591. return 1
  1592. else:
  1593. # We are not at the start of a syllable.
  1594. #
  1595. # Do not allow syllables where the first letter is y
  1596. # and the next pair can begin a syllable. This may
  1597. # lead to splits where y is left alone in a syllable.
  1598. # Also, the combination does not sound to good even
  1599. # if not split.
  1600. #
  1601. if ((current_unit_num == 2) and
  1602. marked(BEGIN,
  1603. first_unit,
  1604. second_unit) and
  1605. (gram_rules[units_in_syllable[0]] &
  1606. ALTERNATE_VOWEL)):
  1607. return 1
  1608. #
  1609. # If this is the last unit of a word, we
  1610. # should reject any digram that cannot end a
  1611. # syllable.
  1612. #
  1613. if (marked(NOT_END,
  1614. first_unit,
  1615. second_unit) and
  1616. (length_left == 0)):
  1617. return 1
  1618. #
  1619. # Reject the unit if the digram it forms wants
  1620. # to break the syllable, but the resulting
  1621. # digram that would end the syllable is not
  1622. # allowed to end a syllable.
  1623. #
  1624. if (marked(BREAK,
  1625. first_unit,
  1626. second_unit) and
  1627. (digram_rules[units_in_syllable[current_unit_num-2]]
  1628. [first_unit] & NOT_END)):
  1629. return 1
  1630. #
  1631. # Reject the unit if the digram it forms
  1632. # expects a vowel preceding it and there
  1633. # is none.
  1634. #
  1635. if (marked(PREFIX,
  1636. first_unit,
  1637. second_unit) and
  1638. not (gram_rules[ units_in_syllable[current_unit_num-2] ] &
  1639. VOWEL)):
  1640. return 1
  1641. return 0
  1642. # Generate next unit to password, making sure that it follows these rules:
  1643. #
  1644. # 1. Each syllable must contain exactly 1 or 2 consecutive vowels,
  1645. # where y is considered a vowel.
  1646. #
  1647. # 2. Syllable end is determined as follows:
  1648. #
  1649. # a. Vowel is generated and previous unit is a consonant and
  1650. # syllable already has a vowel. In this case, new syllable is
  1651. # started and already contains a vowel.
  1652. # b. A pair determined to be a "break" pair is encountered.
  1653. # In this case new syllable is started with second unit of this pair.
  1654. # c. End of password is encountered.
  1655. # d. "begin" pair is encountered legally. New syllable is started
  1656. # with this pair.
  1657. # e. "end" pair is legally encountered. New syllable has nothing yet.
  1658. #
  1659. # 3. Try generating another unit if:
  1660. #
  1661. # a. third consecutive vowel and not y.
  1662. # b. "break" pair generated but no vowel yet in current or
  1663. # previous 2 units are "not_end".
  1664. # c. "begin" pair generated but no vowel in syllable preceding begin pair,
  1665. # or both previous 2 pairs are designated "not_end".
  1666. # d. "end" pair generated but no vowel in current syllable or in
  1667. # "end" pair.
  1668. # e. "not_begin" pair generated but new syllable must begin
  1669. # (because previous syllable ended as defined in 2 above).
  1670. # f. vowel is generated and 2a is satisfied, but no syllable break
  1671. # is possible in previous 3 pairs.
  1672. # g. Second and third units of syllable must begin, and first unit
  1673. # is "alternate_vowel".
  1674. def get_syllable(pwlen, saved_pair):
  1675. #
  1676. # This is needed if the saved_pair is tried and the syllable then
  1677. # discarded because of the retry limit. Since the saved_pair is OK and
  1678. # fits in nicely with the preceding syllable, we will always use it.
  1679. #
  1680. hold_saved_pair = saved_pair
  1681. max_retries = (4 * pwlen) + len(grams)
  1682. max_loops = 100
  1683. num_loops = 0
  1684. #
  1685. # Loop until valid syllable is found.
  1686. #
  1687. while True: # do: ftso python while: not PEP 315.
  1688. #
  1689. # Try for a new syllable. Initialize all pertinent
  1690. # syllable variables.
  1691. #
  1692. syllable = "" # string, returned
  1693. units_in_syllable = dict() # array of units, returned
  1694. # grams:
  1695. unit = ''
  1696. current_unit = 0
  1697. last_unit = ''
  1698. # numbers:
  1699. vowel_count = 0
  1700. tries = 0
  1701. length_left = pwlen
  1702. # flags:
  1703. rule_broken = 0
  1704. want_vowel = 0
  1705. want_another_unit = 1
  1706. saved_pair = hold_saved_pair
  1707. #
  1708. # This loop finds all the units for the syllable.
  1709. #
  1710. while True: # do: ftso python while: not PEP 315.
  1711. want_vowel = 0
  1712. #
  1713. # This loop continues until a valid unit is found for the
  1714. # current position within the syllable.
  1715. #
  1716. while True: # do: ftso python while: not PEP 315.
  1717. rule_broken = 0
  1718. #
  1719. # If there are saved units from the previous
  1720. # syllable, use them up first.
  1721. #
  1722. #
  1723. # If there were two saved units, the first is
  1724. # guaranteed (by checks performed in the previous
  1725. # syllable) to be valid. We ignore the checks and
  1726. # place it in this syllable manually.
  1727. #
  1728. if (len(saved_pair) == 2):
  1729. syllable = saved_pair.pop()
  1730. units_in_syllable[0] = syllable
  1731. if (gram_rules[syllable] & VOWEL):
  1732. vowel_count = vowel_count + 1
  1733. current_unit = current_unit + 1
  1734. length_left -= len(syllable)
  1735. if (len(saved_pair) > 0):
  1736. #
  1737. # The unit becomes the last unit checked in the
  1738. # previous syllable.
  1739. #
  1740. unit = saved_pair.pop()
  1741. #
  1742. # The saved units have been used. Do not try to
  1743. # reuse them in this syllable (unless this
  1744. # particular syllable is rejected at which point
  1745. # we start to rebuild it with these same saved
  1746. # units).
  1747. #
  1748. else:
  1749. #
  1750. # If we don't have to consider the saved units,
  1751. # we generate a random one.
  1752. #
  1753. if (want_vowel):
  1754. unit = _random_unit(VOWEL)
  1755. else:
  1756. unit = _random_unit(NO_SPECIAL_RULE)
  1757. length_left -= len(unit)
  1758. rule_broken = 0
  1759. #
  1760. # Prevent having a word longer than expected.
  1761. #
  1762. if (length_left < 0):
  1763. rule_broken = 1
  1764. #
  1765. # First unit of syllable. This is special because
  1766. # the digram tests require 2 units and we don't have
  1767. # that yet. Nevertheless, we can perform some
  1768. # checks.
  1769. #
  1770. if (current_unit == 0):
  1771. #
  1772. # If this shouldn't begin a syllable, don't use it.
  1773. #
  1774. if (gram_rules[unit] & NOT_BEGIN_SYLLABLE):
  1775. rule_broken = 1
  1776. elif (length_left == 0):
  1777. #
  1778. # If this is the last unit of a word, we have
  1779. # a one unit syllable. Since each syllable
  1780. # must have a vowel, we make sure the unit is
  1781. # a vowel. Otherwise, we discard it.
  1782. #
  1783. if (gram_rules[unit] & VOWEL):
  1784. want_another_unit = 0
  1785. else:
  1786. rule_broken = 1
  1787. else:
  1788. #
  1789. # We are not at the start of a syllable.
  1790. # Save the previous unit for later tests.
  1791. #
  1792. last_unit = units_in_syllable[current_unit-1]
  1793. #
  1794. # There are some digram tests that are
  1795. # universally true. We test them out.
  1796. #
  1797. if (digram_is_invalid(last_unit,
  1798. unit,
  1799. current_unit,
  1800. length_left,
  1801. units_in_syllable,
  1802. vowel_count)):
  1803. rule_broken = 1
  1804. #
  1805. # The following checks occur when the current
  1806. # unit is a vowel and we are not looking at a
  1807. # word ending with an e.
  1808. #
  1809. if (not rule_broken and
  1810. (gram_rules[unit] & VOWEL) and
  1811. ((length_left > 0)
  1812. or not (gram_rules[last_unit] & NO_FINAL_SPLIT))):
  1813. #
  1814. # Don't allow 3 consecutive vowels in a
  1815. # syllable. Although some words formed
  1816. # like this are OK, like "beau", most are
  1817. # not.
  1818. #
  1819. if ((vowel_count > 1) and
  1820. (gram_rules[last_unit] & VOWEL)):
  1821. rule_broken = 1
  1822. #
  1823. # Check for the case of
  1824. # vowels-consonants-vowel, which is only
  1825. # legal if the last vowel is an e and we
  1826. # are the end of the word (which is not
  1827. # happening here due to a previous
  1828. # check).
  1829. #
  1830. elif ((vowel_count != 0) and not (gram_rules[last_unit] & VOWEL)):
  1831. #
  1832. # Try to save the vowel for the next
  1833. # syllable, but if the syllable left here
  1834. # is not proper (i.e., the resulting last
  1835. # digram cannot legally end it), just
  1836. # discard it and try for another.
  1837. #
  1838. if (digram_rules[ units_in_syllable[ current_unit - 2] ][last_unit] & NOT_END):
  1839. rule_broken = 1
  1840. else:
  1841. saved_pair = [unit]
  1842. want_another_unit = 0
  1843. #
  1844. # The unit picked and the digram formed are legal.
  1845. # We now determine if we can end the syllable. It may,
  1846. # in some cases, mean the last unit(s) may be deferred to
  1847. # the next syllable. We also check here to see if the
  1848. # digram formed expects a vowel to follow.
  1849. #
  1850. if (not rule_broken and want_another_unit):
  1851. if ((vowel_count != 0) and
  1852. (gram_rules[unit] & NO_FINAL_SPLIT) and
  1853. (length_left == 0) and
  1854. not (gram_rules[last_unit] & VOWEL)):
  1855. #
  1856. # This word ends in a silent e.
  1857. #
  1858. want_another_unit = 0
  1859. elif (marked(END,
  1860. last_unit,
  1861. unit)
  1862. or (length_left == 0)):
  1863. #
  1864. # This syllable ends either because the
  1865. # digram is a END pair or we would
  1866. # otherwise exceed the length of the
  1867. # word.
  1868. #
  1869. want_another_unit = 0
  1870. elif (vowel_count != 0 and length_left > 0):
  1871. #
  1872. # Since we have a vowel in the syllable
  1873. # already, if the digram calls for the end of the
  1874. # syllable, we can legally split it off. We also
  1875. # make sure that we are not at the end of the
  1876. # dangerous because that syllable may not have
  1877. # vowels, or it may not be a legal syllable end,
  1878. # and the retrying mechanism will loop infinitely
  1879. # with the same digram.
  1880. #
  1881. #
  1882. # If we must begin a syllable, we do so if
  1883. # the only vowel in THIS syllable is not part
  1884. # of the digram we are pushing to the next
  1885. # syllable.
  1886. #
  1887. if (marked(BEGIN,
  1888. last_unit,
  1889. unit) and
  1890. (current_unit > 1) and
  1891. not ((vowel_count == 1) and
  1892. (gram_rules[last_unit] & VOWEL))):
  1893. saved_pair = [unit, last_unit]
  1894. want_another_unit = 0
  1895. elif (
  1896. marked(BREAK,
  1897. last_unit,
  1898. unit)):
  1899. saved_pair = [unit]
  1900. want_another_unit = 0
  1901. elif (
  1902. marked(SUFFIX,
  1903. last_unit,
  1904. unit)):
  1905. want_vowel = 1
  1906. tries = tries + 1
  1907. #
  1908. # If this unit was illegal, redetermine the amount of
  1909. # letters left to go in the word.
  1910. #
  1911. if (rule_broken):
  1912. length_left += len(unit)
  1913. if not (rule_broken and tries <= max_retries):
  1914. break
  1915. #
  1916. # The unit fit OK.
  1917. #
  1918. if (tries <= max_retries):
  1919. #
  1920. # If the unit were a vowel, count it in. However, if
  1921. # the unit were a y and appear at the start of the
  1922. # syllable, treat it like a constant (so that words
  1923. # like "year" can appear and not conflict with the 3
  1924. # consecutive vowel rule).
  1925. #
  1926. if (
  1927. (gram_rules[unit] & VOWEL)
  1928. and
  1929. ((current_unit > 0) or not (gram_rules[unit] & ALTERNATE_VOWEL))
  1930. ):
  1931. vowel_count = vowel_count + 1
  1932. #
  1933. # If a unit or units were to be saved, we must adjust
  1934. # the syllable formed. Otherwise, we append the
  1935. # current unit to the syllable.
  1936. #
  1937. if (len(saved_pair) == 2):
  1938. syllable = syllable[0:
  1939. len(syllable) -
  1940. len(last_unit)]
  1941. length_left += len(last_unit)
  1942. current_unit -= 2
  1943. elif (len(saved_pair) == 1):
  1944. current_unit = current_unit - 1
  1945. else:
  1946. units_in_syllable[ current_unit ] = unit
  1947. syllable = syllable + unit
  1948. else:
  1949. #
  1950. # Whoops! Too many tries. We set rule_broken so we
  1951. # can loop in the outer loop and try another
  1952. # syllable.
  1953. #
  1954. rule_broken = 1
  1955. current_unit = current_unit + 1
  1956. if not (tries <= max_retries and want_another_unit):
  1957. break
  1958. num_loops = num_loops + 1
  1959. if not ((rule_broken or _illegal_placement(units_in_syllable))):
  1960. break
  1961. return (syllable, units_in_syllable.values(), saved_pair)
  1962. # goes through an individual syllable and checks for illegal
  1963. # combinations of letters that go beyond looking at digrams.
  1964. #
  1965. # We look at things like 3 consecutive vowels or consonants, or
  1966. # syllables with consonants between vowels (unless one of them is the
  1967. # final silent e).
  1968. def _illegal_placement(units):
  1969. vowel_count = 0
  1970. failure = 0
  1971. for unit_count in range(len(units)):
  1972. if (failure):
  1973. break
  1974. if (unit_count >= 1):
  1975. #
  1976. # Don't allow vowels to be split with consonants in a
  1977. # single syllable. If we find such a combination (except
  1978. # for the silent e) we have to discard the syllable.
  1979. #
  1980. if (
  1981. (
  1982. not (gram_rules[units[unit_count-1]] & VOWEL)
  1983. and
  1984. (gram_rules[units[unit_count ]] & VOWEL)
  1985. and
  1986. not ((gram_rules[units[unit_count ]] & NO_FINAL_SPLIT) and (unit_count == len(units)))
  1987. and
  1988. vowel_count
  1989. )
  1990. or
  1991. #
  1992. # Perform these checks when we have at least 3 units.
  1993. #
  1994. (
  1995. (unit_count >= 2)
  1996. and
  1997. (
  1998. #
  1999. # Disallow 3 consecutive consonants.
  2000. #
  2001. (
  2002. not (gram_rules[units[unit_count-2]] & VOWEL)
  2003. and
  2004. not (gram_rules[units[unit_count-1]] & VOWEL)
  2005. and
  2006. not (gram_rules[units[unit_count]] & VOWEL)
  2007. )
  2008. or
  2009. #
  2010. # Disallow 3 consecutive vowels, where the
  2011. # first is not a y.
  2012. #
  2013. (
  2014. (gram_rules[units[unit_count-2]] & VOWEL)
  2015. and
  2016. not ((gram_rules[units[0]] & ALTERNATE_VOWEL)
  2017. and (unit_count == 2))
  2018. and
  2019. (gram_rules[units[unit_count-1]] & VOWEL)
  2020. and
  2021. (gram_rules[units[unit_count]] & VOWEL)
  2022. )
  2023. )
  2024. )
  2025. ):
  2026. failure = 1
  2027. #
  2028. # Count the vowels in the syllable. As mentioned somewhere
  2029. # above, exclude the initial y of a syllable. Instead, treat
  2030. # it as a consonant.
  2031. #
  2032. if (
  2033. (gram_rules[units[unit_count]] & VOWEL)
  2034. and
  2035. not (
  2036. (gram_rules[units[0]] & ALTERNATE_VOWEL)
  2037. and
  2038. (unit_count == 0)
  2039. and
  2040. (len(units) > 1)
  2041. )
  2042. ):
  2043. vowel_count = vowel_count + 1
  2044. return failure